Enable arf stack based reference frame selection

Use the arf/lst/gld stack based reference frame tracking system
to select and maintain the reference frames for each coding frame.

The coding performance remains neutral for this change, since we
need to change the reference frame ops in tpl model as well.

STATS_CHANGED

Change-Id: Iec583d676320d0ea86e59ffb50a61346cd6b2677
diff --git a/av1/encoder/encode_strategy.c b/av1/encoder/encode_strategy.c
index 8b36b8c..b82ae83 100644
--- a/av1/encoder/encode_strategy.c
+++ b/av1/encoder/encode_strategy.c
@@ -836,7 +836,7 @@
         stack_push(cpi->arf_stack, &cpi->arf_stack_size, ref_map_index);
         break;
       case OVERLAY_UPDATE:
-        if (cpi->preserve_arf_as_gld) {
+        if (cpi->preserve_arf_as_gld || cm->show_existing_frame) {
           ref_map_index = stack_pop(cpi->arf_stack, &cpi->arf_stack_size);
           stack_push(cpi->gld_stack, &cpi->gld_stack_size, ref_map_index);
         } else {
@@ -847,15 +847,14 @@
         break;
       case INTNL_OVERLAY_UPDATE:
         ref_map_index = stack_pop(cpi->arf_stack, &cpi->arf_stack_size);
-        if (cpi->gf_group.layer_depth[cpi->gf_group.index] == 2)
-          stack_push(cpi->gld_stack, &cpi->gld_stack_size, ref_map_index);
-        else
-          stack_push(cpi->lst_stack, &cpi->lst_stack_size, ref_map_index);
+        stack_push(cpi->lst_stack, &cpi->lst_stack_size, ref_map_index);
         break;
       default: assert(0 && "unknown type");
     }
   }
 
+  if (!cpi->ext_refresh_frame_flags_pending) return;
+
   // If check_frame_refs_short_signaling() decided to set
   // frame_refs_short_signaling=1 then we update remapped_ref_idx[] here.  Every
   // reference will still map to the same RefCntBuffer (through ref_frame_map[])
@@ -962,6 +961,35 @@
 #endif  // DUMP_REF_FRAME_IMAGES
 }
 
+static int get_free_ref_map_index(const AV1_COMP *cpi) {
+  for (int idx = 0; idx < REF_FRAMES; ++idx) {
+    int is_free = 1;
+    for (int i = 0; i < cpi->arf_stack_size; ++i) {
+      if (cpi->arf_stack[i] == idx) {
+        is_free = 0;
+        break;
+      }
+    }
+
+    for (int i = 0; i < cpi->lst_stack_size; ++i) {
+      if (cpi->lst_stack[i] == idx) {
+        is_free = 0;
+        break;
+      }
+    }
+
+    for (int i = 0; i < cpi->gld_stack_size; ++i) {
+      if (cpi->gld_stack[i] == idx) {
+        is_free = 0;
+        break;
+      }
+    }
+
+    if (is_free) return idx;
+  }
+  return INVALID_IDX;
+}
+
 static int get_refresh_frame_flags(const AV1_COMP *const cpi,
                                    const EncodeFrameParams *const frame_params,
                                    FRAME_UPDATE_TYPE frame_update_type) {
@@ -986,22 +1014,32 @@
     // Unfortunately the encoder interface reflects the old refresh_*_frame
     // flags so we have to replicate the old refresh_frame_flags logic here in
     // order to preserve the behaviour of the flag overrides.
-    refresh_mask |= cpi->ext_refresh_last_frame
-                    << get_ref_frame_map_idx(cm, LAST3_FRAME);
-    refresh_mask |= cpi->ext_refresh_bwd_ref_frame
-                    << get_ref_frame_map_idx(cm, EXTREF_FRAME);
-    refresh_mask |= cpi->ext_refresh_alt2_ref_frame
-                    << get_ref_frame_map_idx(cm, ALTREF2_FRAME);
+    int ref_frame_map_idx = get_ref_frame_map_idx(cm, LAST3_FRAME);
+    if (ref_frame_map_idx != INVALID_IDX)
+      refresh_mask |= cpi->ext_refresh_last_frame << ref_frame_map_idx;
+
+    ref_frame_map_idx = get_ref_frame_map_idx(cm, EXTREF_FRAME);
+    if (ref_frame_map_idx != INVALID_IDX)
+      refresh_mask |= cpi->ext_refresh_bwd_ref_frame << ref_frame_map_idx;
+
+    ref_frame_map_idx = get_ref_frame_map_idx(cm, ALTREF2_FRAME);
+    if (ref_frame_map_idx != INVALID_IDX)
+      refresh_mask |= cpi->ext_refresh_alt2_ref_frame << ref_frame_map_idx;
+
     if (frame_update_type == OVERLAY_UPDATE) {
       if (!cpi->preserve_arf_as_gld) {
-        refresh_mask |= cpi->ext_refresh_golden_frame
-                        << get_ref_frame_map_idx(cm, ALTREF_FRAME);
+        ref_frame_map_idx = get_ref_frame_map_idx(cm, ALTREF_FRAME);
+        if (ref_frame_map_idx != INVALID_IDX)
+          refresh_mask |= cpi->ext_refresh_golden_frame << ref_frame_map_idx;
       }
     } else {
-      refresh_mask |= cpi->ext_refresh_golden_frame
-                      << get_ref_frame_map_idx(cm, GOLDEN_FRAME);
-      refresh_mask |= cpi->ext_refresh_alt_ref_frame
-                      << get_ref_frame_map_idx(cm, ALTREF_FRAME);
+      ref_frame_map_idx = get_ref_frame_map_idx(cm, GOLDEN_FRAME);
+      if (ref_frame_map_idx != INVALID_IDX)
+        refresh_mask |= cpi->ext_refresh_golden_frame << ref_frame_map_idx;
+
+      ref_frame_map_idx = get_ref_frame_map_idx(cm, ALTREF_FRAME);
+      if (ref_frame_map_idx != INVALID_IDX)
+        refresh_mask |= cpi->ext_refresh_alt_ref_frame << ref_frame_map_idx;
     }
     return refresh_mask;
   }
@@ -1010,6 +1048,67 @@
   // buffer management strategy currently in use.  This function just decides
   // which buffers should be refreshed.
 
+  int free_fb_index = get_free_ref_map_index(cpi);
+  switch (frame_update_type) {
+    case KF_UPDATE:
+    case GF_UPDATE:
+      if (free_fb_index != INVALID_IDX) {
+        refresh_mask = 1 << free_fb_index;
+      } else {
+        if (cpi->gld_stack_size)
+          refresh_mask = 1 << cpi->gld_stack[cpi->gld_stack_size - 1];
+        else
+          refresh_mask = 1 << cpi->lst_stack[cpi->lst_stack_size - 1];
+      }
+      break;
+    case LF_UPDATE:
+      if (free_fb_index != INVALID_IDX) {
+        refresh_mask = 1 << free_fb_index;
+      } else {
+        if (cpi->lst_stack_size >= 2)
+          refresh_mask = 1 << cpi->lst_stack[cpi->lst_stack_size - 1];
+        else
+          assert(0 && "No ref map index found");
+      }
+      break;
+    case ARF_UPDATE:
+      if (free_fb_index != INVALID_IDX) {
+        refresh_mask = 1 << free_fb_index;
+      } else {
+        if (cpi->gld_stack_size >= 3)
+          refresh_mask = 1 << cpi->gld_stack[cpi->gld_stack_size - 1];
+        else if (cpi->lst_stack_size >= 2)
+          refresh_mask = 1 << cpi->lst_stack[cpi->lst_stack_size - 1];
+        else
+          assert(0 && "No ref map index found");
+      }
+      break;
+    case INTNL_ARF_UPDATE:
+      if (free_fb_index != INVALID_IDX) {
+        refresh_mask = 1 << free_fb_index;
+      } else {
+        refresh_mask = 1 << cpi->lst_stack[cpi->lst_stack_size - 1];
+      }
+      break;
+    case OVERLAY_UPDATE:
+      if (!cpi->preserve_arf_as_gld) {
+        if (free_fb_index != INVALID_IDX) {
+          refresh_mask = 1 << free_fb_index;
+        } else if (cpi->arf_stack_size) {
+          refresh_mask = 1 << cpi->arf_stack[cpi->arf_stack_size - 1];
+        } else {
+          refresh_mask = 1 << cpi->lst_stack[cpi->lst_stack_size - 1];
+        }
+      }
+      break;
+    case INTNL_OVERLAY_UPDATE: break;
+    default: assert(0); break;
+  }
+
+  return refresh_mask;
+
+  refresh_mask = 0;
+
   switch (frame_update_type) {
     case KF_UPDATE:
       // Note that a real shown key-frame or S-frame refreshes every buffer,
@@ -1149,6 +1248,99 @@
 }
 #endif  // !CONFIG_REALTIME_ONLY
 
+static void get_ref_frames(AV1_COMP *const cpi,
+                           FRAME_UPDATE_TYPE frame_update_type) {
+  AV1_COMMON *cm = &cpi->common;
+  (void)frame_update_type;
+
+  const int arf_stack_size = cpi->arf_stack_size;
+  const int lst_stack_size = cpi->lst_stack_size;
+  const int gld_stack_size = cpi->gld_stack_size;
+
+  // Initialization
+  for (int i = 0; i < REF_FRAMES; ++i) cm->remapped_ref_idx[i] = INVALID_IDX;
+
+  if (arf_stack_size) {
+    cm->remapped_ref_idx[ALTREF_FRAME - LAST_FRAME] =
+        cpi->arf_stack[arf_stack_size - 1];
+
+    if (arf_stack_size > 1)
+      cm->remapped_ref_idx[BWDREF_FRAME - LAST_FRAME] = cpi->arf_stack[0];
+
+    if (arf_stack_size > 2)
+      cm->remapped_ref_idx[ALTREF2_FRAME - LAST_FRAME] = cpi->arf_stack[1];
+  }
+
+  if (lst_stack_size) {
+    cm->remapped_ref_idx[LAST_FRAME - LAST_FRAME] = cpi->lst_stack[0];
+
+    if (lst_stack_size > 1)
+      cm->remapped_ref_idx[LAST2_FRAME - LAST_FRAME] = cpi->lst_stack[1];
+  }
+
+  if (gld_stack_size) {
+    cm->remapped_ref_idx[GOLDEN_FRAME - LAST_FRAME] =
+        cpi->gld_stack[gld_stack_size - 1];
+
+    if (gld_stack_size > 1) {
+      if (arf_stack_size <= 1)
+        cm->remapped_ref_idx[BWDREF_FRAME - LAST_FRAME] = cpi->gld_stack[0];
+      else
+        cm->remapped_ref_idx[LAST3_FRAME - LAST_FRAME] = cpi->gld_stack[0];
+    }
+  }
+
+  for (int idx = ALTREF_FRAME - LAST_FRAME; idx >= 0; --idx) {
+    int ref_map_index = cm->remapped_ref_idx[idx];
+
+    if (ref_map_index != INVALID_IDX) continue;
+
+    for (int i = 0; i < cpi->arf_stack_size && ref_map_index == INVALID_IDX;
+         ++i) {
+      int ref_idx = 0;
+      for (ref_idx = 0; ref_idx <= ALTREF_FRAME - LAST_FRAME; ++ref_idx)
+        if (cpi->arf_stack[i] == cm->remapped_ref_idx[ref_idx]) break;
+
+      // not in use
+      if (ref_idx > ALTREF_FRAME - LAST_FRAME) {
+        ref_map_index = cpi->arf_stack[i];
+        break;
+      }
+    }
+
+    for (int i = 0; i < cpi->gld_stack_size && ref_map_index == INVALID_IDX;
+         ++i) {
+      int ref_idx = 0;
+      for (ref_idx = 0; ref_idx <= ALTREF_FRAME - LAST_FRAME; ++ref_idx)
+        if (cpi->gld_stack[i] == cm->remapped_ref_idx[ref_idx]) break;
+
+      // not in use
+      if (ref_idx > ALTREF_FRAME - LAST_FRAME) {
+        ref_map_index = cpi->gld_stack[i];
+        break;
+      }
+    }
+
+    for (int i = 0; i < cpi->lst_stack_size && ref_map_index == INVALID_IDX;
+         ++i) {
+      int ref_idx = 0;
+      for (ref_idx = 0; ref_idx <= ALTREF_FRAME - LAST_FRAME; ++ref_idx)
+        if (cpi->lst_stack[i] == cm->remapped_ref_idx[ref_idx]) break;
+
+      // not in use
+      if (ref_idx > ALTREF_FRAME - LAST_FRAME) {
+        ref_map_index = cpi->lst_stack[i];
+        break;
+      }
+    }
+
+    if (ref_map_index != INVALID_IDX)
+      cm->remapped_ref_idx[idx] = ref_map_index;
+    else
+      cm->remapped_ref_idx[idx] = cpi->gld_stack[0];
+  }
+}
+
 int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
                         uint8_t *const dest, unsigned int *frame_flags,
                         int64_t *const time_stamp, int64_t *const time_end,
@@ -1310,6 +1502,9 @@
                                force_refresh_all);
 
   if (oxcf->pass == 0 || oxcf->pass == 2) {
+    if (!cpi->ext_refresh_frame_flags_pending)
+      get_ref_frames(cpi, frame_update_type);
+
     // Work out which reference frame slots may be used.
     frame_params.ref_frame_flags = get_ref_frame_flags(cpi);
 
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 72f0757..f50b7f3 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -1145,7 +1145,7 @@
 // TODO(jingning): Move these functions as primitive members for the new cpi
 // class.
 static INLINE void stack_push(int *stack, int *stack_size, int item) {
-  for (int i = 0; i < *stack_size; ++i) stack[i + 1] = stack[i];
+  for (int i = *stack_size - 1; i >= 0; --i) stack[i + 1] = stack[i];
   stack[0] = item;
   ++*stack_size;
 }
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index 67402d8..b1cc2fd 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c
@@ -1506,12 +1506,12 @@
   }
 
   int which_arf = gf_group->arf_update_idx[gf_group->index];
-
   // Set the temporal filtering status for the corresponding OVERLAY frame
   if (strength == 0 && frames_to_blur == 1)
     cpi->is_arf_filter_off[which_arf] = 1;
   else
     cpi->is_arf_filter_off[which_arf] = 0;
+
   cpi->common.showable_frame = cpi->is_arf_filter_off[which_arf];
 
   if (distance == -1) {