Integrate the arf stack buffer control with tpl

Rework the reference frame mapping in the tpl model using the arf
stack based reference frame control scheme. The stack based reference
frame management system provides more flexibility in frame selection.
It allows the codec to support multiple long term reference frames.
The compression performance is improved:

speed 0 150 frames
          avg PSNR         ovr PSNR        SSIM
lowres    -0.55%           -0.60%         -0.51%
midres    -0.39%           -0.45%         -0.30%
ugc       -0.54%           -0.52%         -0.41%

speed 1 150 frames
          avg PSNR         ovr PSNR        SSIM
lowres    -0.50%           -0.55%         -0.43%
midres    -0.44%           -0.45%         -0.38%
ugc       -0.60%           -0.55%         -0.35%

STATS_CHANGED

Change-Id: Id6397a2417acbf4c1af027a4794df10e9dd74c25
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 70264d0..b5a8b1d 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -3499,7 +3499,7 @@
   AV1_COMMON *const cm = &cpi->common;
   assert(IMPLIES(cpi->gf_group.size > 0,
                  cpi->gf_group.index < cpi->gf_group.size));
-  const int tpl_idx = cpi->gf_group.frame_disp_idx[cpi->gf_group.index];
+  const int tpl_idx = cpi->gf_group.index;
   TplDepFrame *tpl_frame = &cpi->tpl_frame[tpl_idx];
   TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
   int tpl_stride = tpl_frame->stride;
@@ -3576,7 +3576,7 @@
   AV1_COMMON *const cm = &cpi->common;
   assert(IMPLIES(cpi->gf_group.size > 0,
                  cpi->gf_group.index < cpi->gf_group.size));
-  const int tpl_idx = cpi->gf_group.frame_disp_idx[cpi->gf_group.index];
+  const int tpl_idx = cpi->gf_group.index;
   TplDepFrame *tpl_frame = &cpi->tpl_frame[tpl_idx];
   TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
   int tpl_stride = tpl_frame->stride;
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 8016404..33a50cf 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -2800,7 +2800,7 @@
     cpi->tpl_stats_buffer[frame].mi_rows = cm->mi_rows;
     cpi->tpl_stats_buffer[frame].mi_cols = cm->mi_cols;
   }
-  cpi->tpl_frame = &cpi->tpl_stats_buffer[REF_FRAMES];
+  cpi->tpl_frame = &cpi->tpl_stats_buffer[REF_FRAMES + 1];
 
 #if CONFIG_COLLECT_PARTITION_STATS == 2
   av1_zero(cpi->partition_stats);
@@ -3636,7 +3636,8 @@
   AV1_COMMON *const cm = &cpi->common;
 
   assert(IMPLIES(gf_group->size > 0, gf_group->index < gf_group->size));
-  const int tpl_idx = gf_group->frame_disp_idx[gf_group->index];
+
+  const int tpl_idx = gf_group->index;
   TplDepFrame *tpl_frame = &cpi->tpl_frame[tpl_idx];
   TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
 
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index d73799d..92fed66 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -157,7 +157,7 @@
   SS_CFG_TOTAL = 2
 } UENUM1BYTE(SS_CFG_OFFSET);
 
-#define MAX_LENGTH_TPL_FRAME_STATS (27 + 8)
+#define MAX_LENGTH_TPL_FRAME_STATS (27 + 9)
 
 typedef struct TplDepStats {
   int64_t intra_cost;
@@ -176,6 +176,7 @@
   uint8_t is_valid;
   TplDepStats *tpl_stats_ptr;
   const YV12_BUFFER_CONFIG *gf_picture;
+  int ref_map_index[REF_FRAMES];
   int stride;
   int width;
   int height;
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index 27ce86e..fc59c52 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -106,7 +106,8 @@
                             struct scale_factors *sf, int frame_idx,
                             int16_t *src_diff, tran_low_t *coeff, int use_satd,
                             int mi_row, int mi_col, BLOCK_SIZE bsize,
-                            TX_SIZE tx_size, YV12_BUFFER_CONFIG *ref_frame[],
+                            TX_SIZE tx_size,
+                            const YV12_BUFFER_CONFIG *ref_frame[],
                             uint8_t *predictor, TplDepStats *tpl_stats) {
   AV1_COMMON *cm = &cpi->common;
   const GF_GROUP *gf_group = &cpi->gf_group;
@@ -194,7 +195,10 @@
   for (rf_idx = 0; rf_idx < INTER_REFS_PER_FRAME; ++rf_idx) {
     if (ref_frame[rf_idx] == NULL) continue;
 
-    int q_ref = gf_group->q_val[gf_group->ref_frame_gop_idx[frame_idx][rf_idx]];
+    int q_ref =
+        cpi->tpl_frame[frame_idx].ref_map_index[rf_idx] > 0
+            ? gf_group->q_val[cpi->tpl_frame[frame_idx].ref_map_index[rf_idx]]
+            : 0;
 
     const int16_t qstep_ref =
         ROUND_POWER_OF_TWO(av1_ac_quant_QTX(q_ref, 0, xd->bd), xd->bd - 8);
@@ -262,10 +266,9 @@
   tpl_stats->inter_cost = best_inter_cost << TPL_DEP_COST_SCALE_LOG2;
   tpl_stats->intra_cost = best_intra_cost << TPL_DEP_COST_SCALE_LOG2;
 
-  if (frame_idx) {
-    const int idx = gf_group->ref_frame_gop_idx[frame_idx][best_rf_idx];
-    tpl_stats->ref_frame_index = idx;
-    tpl_stats->ref_disp_frame_index = gf_group->frame_disp_idx[idx];
+  if (frame_idx && best_rf_idx != -1) {
+    tpl_stats->ref_frame_index =
+        cpi->tpl_frame[frame_idx].ref_map_index[best_rf_idx];
     tpl_stats->mv.as_int = best_mv.as_int;
   }
 }
@@ -319,7 +322,7 @@
 static void tpl_model_update_b(TplDepFrame *tpl_frame,
                                TplDepStats *tpl_stats_ptr, int mi_row,
                                int mi_col, const BLOCK_SIZE bsize) {
-  TplDepFrame *ref_tpl_frame = &tpl_frame[tpl_stats_ptr->ref_disp_frame_index];
+  TplDepFrame *ref_tpl_frame = &tpl_frame[tpl_stats_ptr->ref_frame_index];
   TplDepStats *ref_stats_ptr = ref_tpl_frame->tpl_stats_ptr;
   MV mv = tpl_stats_ptr->mv.as_mv;
   int mv_row = mv.row >> 3;
@@ -415,7 +418,6 @@
       tpl_ptr->inter_cost = inter_cost;
       tpl_ptr->mc_dep_cost = tpl_ptr->intra_cost + tpl_ptr->mc_flow;
       tpl_ptr->ref_frame_index = src_stats->ref_frame_index;
-      tpl_ptr->ref_disp_frame_index = src_stats->ref_disp_frame_index;
       tpl_ptr->mv.as_int = src_stats->mv.as_int;
       ++tpl_ptr;
     }
@@ -442,12 +444,12 @@
                               int frame_idx) {
   const GF_GROUP *gf_group = &cpi->gf_group;
   if (frame_idx == gf_group->size) return;
-  int tpl_idx = gf_group->frame_disp_idx[frame_idx];
-  TplDepFrame *tpl_frame = &cpi->tpl_frame[tpl_idx];
-  YV12_BUFFER_CONFIG *this_frame = gf_picture[frame_idx];
-  YV12_BUFFER_CONFIG *ref_frame[7] = {
-    NULL, NULL, NULL, NULL, NULL, NULL, NULL
-  };
+  TplDepFrame *tpl_frame = &cpi->tpl_frame[frame_idx];
+  const YV12_BUFFER_CONFIG *this_frame = tpl_frame->gf_picture;
+  const YV12_BUFFER_CONFIG *ref_frame[7] = { NULL, NULL, NULL, NULL,
+                                             NULL, NULL, NULL };
+
+  (void)gf_picture;
 
   AV1_COMMON *cm = &cpi->common;
   struct scale_factors sf;
@@ -493,22 +495,9 @@
   else
     predictor = predictor8;
 
-  // Prepare reference frame pointers. If any reference frame slot is
-  // unavailable, the pointer will be set to Null.
-  for (idx = 0; idx < INTER_REFS_PER_FRAME; ++idx) {
-    const int rf_idx = gf_group->ref_frame_gop_idx[frame_idx][idx];
-    if (rf_idx != -1) {
-      int duplicate = 0;
-      for (int idx2 = 0; idx2 < idx; ++idx2) {
-        const int rf_idx2 = gf_group->ref_frame_gop_idx[frame_idx][idx2];
-        if (rf_idx2 != -1 && gf_picture[rf_idx] == gf_picture[rf_idx2]) {
-          duplicate = 1;
-          break;
-        }
-      }
-      if (!duplicate) ref_frame[idx] = gf_picture[rf_idx];
-    }
-  }
+  // TODO(jingning): remove the duplicate frames.
+  for (idx = 0; idx < INTER_REFS_PER_FRAME; ++idx)
+    ref_frame[idx] = cpi->tpl_frame[tpl_frame->ref_map_index[idx]].gf_picture;
 
   xd->mi = cm->mi_grid_base;
   xd->mi[0] = cm->mi;
@@ -566,7 +555,6 @@
   AV1_COMMON *cm = &cpi->common;
   const SequenceHeader *const seq_params = &cm->seq_params;
   int frame_idx = 0;
-  int frame_disp_idx = 0;
   RefCntBuffer *frame_bufs = cm->buffer_pool->frame_bufs;
   int pframe_qindex = 0;
   int cur_frame_idx = gf_group->index;
@@ -574,6 +562,9 @@
   RefBufferStack ref_buffer_stack = cpi->ref_buffer_stack;
   EncodeFrameParams frame_params = *init_frame_params;
 
+  int ref_picture_map[REF_FRAMES];
+  (void)gf_picture;
+
   for (int i = 0; i < FRAME_BUFFERS && frame_idx < INTER_REFS_PER_FRAME + 1;
        ++i) {
     if (frame_bufs[i].ref_count == 0) {
@@ -589,138 +580,114 @@
     }
   }
 
-  for (int i = LAST_FRAME; i < REF_FRAMES; ++i)
-    cpi->tpl_frame[-i].gf_picture = get_ref_frame_yv12_buf(cm, i);
+  for (int i = 0; i < REF_FRAMES; ++i) {
+    if (frame_params.frame_type == KEY_FRAME)
+      cpi->tpl_frame[-i - 1].gf_picture = NULL;
+    else
+      cpi->tpl_frame[-i - 1].gf_picture = &cm->ref_frame_map[i]->buf;
 
-  for (int gf_index = gf_group->index; gf_index < gf_group->size; ++gf_index) {
+    ref_picture_map[i] = -i - 1;
+  }
+
+  *tpl_group_frames = 0;
+
+  int gf_index;
+  int use_arf = gf_group->update_type[1] == ARF_UPDATE;
+  const int gop_length =
+      AOMMIN(gf_group->size - 1 + use_arf, MAX_LENGTH_TPL_FRAME_STATS - 1);
+  for (gf_index = gf_group->index; gf_index <= gop_length; ++gf_index) {
+    TplDepFrame *tpl_frame = &cpi->tpl_frame[gf_index];
     FRAME_UPDATE_TYPE frame_update_type = gf_group->update_type[gf_index];
+    if (gf_index == gf_group->size) {
+      frame_update_type = INTNL_OVERLAY_UPDATE;
+      gf_group->update_type[gf_index] = frame_update_type;
+      gf_group->q_val[gf_index] = gf_group->q_val[1];
+    }
+
     frame_params.show_frame = frame_update_type != ARF_UPDATE &&
                               frame_update_type != INTNL_ARF_UPDATE;
     frame_params.show_existing_frame =
-        frame_update_type == INTNL_OVERLAY_UPDATE;
+        frame_update_type == INTNL_OVERLAY_UPDATE ||
+        frame_update_type == OVERLAY_UPDATE;
     frame_params.frame_type =
         frame_update_type == KF_UPDATE ? KEY_FRAME : INTER_FRAME;
 
+    if (frame_update_type == LF_UPDATE)
+      pframe_qindex = gf_group->q_val[gf_index];
+
+    if (gf_index == cur_frame_idx) {
+      tpl_frame->gf_picture = frame_input->source;
+    } else {
+      int frame_display_index = gf_index == gf_group->size
+                                    ? cpi->rc.baseline_gf_interval
+                                    : gf_group->frame_disp_idx[gf_index];
+      struct lookahead_entry *buf =
+          av1_lookahead_peek(cpi->lookahead, frame_display_index - 1);
+      if (buf == NULL) break;
+      tpl_frame->gf_picture = &buf->img;
+    }
+
     av1_get_ref_frames(cpi, frame_update_type, &ref_buffer_stack);
     int refresh_mask = av1_get_refresh_frame_flags(
         cpi, &frame_params, frame_update_type, &ref_buffer_stack);
-    int ref_map_index = av1_get_refresh_ref_frame_map(refresh_mask);
-    av1_update_ref_frame_map(cpi, frame_update_type, ref_map_index,
+    int refresh_frame_map_index = av1_get_refresh_ref_frame_map(refresh_mask);
+    av1_update_ref_frame_map(cpi, frame_update_type, refresh_frame_map_index,
                              &ref_buffer_stack);
-  }
 
-  av1_get_ref_frames(cpi, gf_group->update_type[gf_group->index],
-                     &cpi->ref_buffer_stack);
+    for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
+      tpl_frame->ref_map_index[i - LAST_FRAME] =
+          ref_picture_map[cm->remapped_ref_idx[i - LAST_FRAME]];
 
-  *tpl_group_frames = 0;
-
-  if (cur_frame_idx > 0) {
-    // Initialize Golden reference frame.
-    RefCntBuffer *ref_buf = get_ref_frame_buf(cm, GOLDEN_FRAME);
-    gf_picture[0] = &ref_buf->buf;
-    ++*tpl_group_frames;
-  }
-
-  if (cur_frame_idx > 1) {
-    // Initialize Alt reference frame.
-    RefCntBuffer *ref_buf = get_ref_frame_buf(cm, ALTREF_FRAME);
-    gf_picture[1] = &ref_buf->buf;
-    ++*tpl_group_frames;
-  }
-
-  // Initialize frames in the GF group
-  for (frame_idx = cur_frame_idx;
-       frame_idx <= AOMMIN(gf_group->size, MAX_LENGTH_TPL_FRAME_STATS - 1);
-       ++frame_idx) {
-    if (frame_idx == cur_frame_idx) {
-      gf_picture[frame_idx] = frame_input->source;
-      frame_disp_idx = gf_group->frame_disp_idx[frame_idx];
-    } else {
-      frame_disp_idx = frame_idx == gf_group->size
-                           ? gf_group->frame_disp_idx[1]
-                           : gf_group->frame_disp_idx[frame_idx];
-      struct lookahead_entry *buf =
-          av1_lookahead_peek(cpi->lookahead, frame_disp_idx - 1);
-
-      if (buf == NULL) break;
-
-      gf_picture[frame_idx] = &buf->img;
-      if (frame_idx == gf_group->size) {
-        gf_group->frame_disp_idx[frame_idx] = frame_disp_idx;
-        gf_group->q_val[frame_idx] = gf_group->q_val[1];
-        gf_group->update_type[frame_idx] = OVERLAY_UPDATE;
-      }
-    }
-
-    if (gf_group->update_type[frame_idx] == LF_UPDATE)
-      pframe_qindex = gf_group->q_val[frame_idx];
+    if (refresh_mask) ref_picture_map[refresh_frame_map_index] = gf_index;
 
     ++*tpl_group_frames;
   }
 
   if (cur_frame_idx == 0) return;
 
-  if (frame_idx < MAX_LENGTH_TPL_FRAME_STATS) {
-    ++frame_disp_idx;
-    int extend_frame_count = 0;
-    const int gld_idx_next_gop = gf_group->size;
-    const int lst_idx_next_gop =
-        gf_group->ref_frame_gop_idx[gld_idx_next_gop][REF_IDX(LAST_FRAME)];
-    const int lst2_idx_next_gop =
-        gf_group->ref_frame_gop_idx[gld_idx_next_gop][REF_IDX(LAST2_FRAME)];
-    const int lst3_idx_next_gop =
-        gf_group->ref_frame_gop_idx[gld_idx_next_gop][REF_IDX(LAST3_FRAME)];
+  int extend_frame_count = 0;
+  int frame_display_index = cpi->rc.baseline_gf_interval + 1;
 
-    // Extend two frames outside the current gf group.
-    for (; frame_idx < MAX_LENGTH_TPL_FRAME_STATS && extend_frame_count < 2;
-         ++frame_idx) {
-      struct lookahead_entry *buf =
-          av1_lookahead_peek(cpi->lookahead, frame_disp_idx - 1);
+  for (; gf_index < MAX_LENGTH_TPL_FRAME_STATS && extend_frame_count < 2;
+       ++gf_index) {
+    TplDepFrame *tpl_frame = &cpi->tpl_frame[gf_index];
+    FRAME_UPDATE_TYPE frame_update_type = LF_UPDATE;
+    frame_params.show_frame = frame_update_type != ARF_UPDATE &&
+                              frame_update_type != INTNL_ARF_UPDATE;
+    frame_params.show_existing_frame =
+        frame_update_type == INTNL_OVERLAY_UPDATE;
+    frame_params.frame_type = INTER_FRAME;
 
-      if (buf == NULL) break;
+    struct lookahead_entry *buf =
+        av1_lookahead_peek(cpi->lookahead, frame_display_index - 1);
 
-      gf_picture[frame_idx] = &buf->img;
-      gf_group->q_val[frame_idx] = pframe_qindex;
-      gf_group->frame_disp_idx[frame_idx] = frame_disp_idx;
-      gf_group->update_type[frame_idx] = LF_UPDATE;
+    if (buf == NULL) break;
 
-      gf_group->ref_frame_gop_idx[frame_idx][REF_IDX(GOLDEN_FRAME)] =
-          gld_idx_next_gop;
-      gf_group->ref_frame_gop_idx[frame_idx][REF_IDX(LAST_FRAME)] =
-          lst_idx_next_gop;
-      gf_group->ref_frame_gop_idx[frame_idx][REF_IDX(LAST2_FRAME)] =
-          lst2_idx_next_gop;
-      gf_group->ref_frame_gop_idx[frame_idx][REF_IDX(LAST3_FRAME)] =
-          lst3_idx_next_gop;
+    tpl_frame->gf_picture = &buf->img;
 
-      ++*tpl_group_frames;
-      ++extend_frame_count;
-      ++frame_disp_idx;
-    }
+    gf_group->update_type[gf_index] = LF_UPDATE;
+    gf_group->q_val[gf_index] = pframe_qindex;
+
+    av1_get_ref_frames(cpi, frame_update_type, &ref_buffer_stack);
+    int refresh_mask = av1_get_refresh_frame_flags(
+        cpi, &frame_params, frame_update_type, &ref_buffer_stack);
+    int refresh_frame_map_index = av1_get_refresh_ref_frame_map(refresh_mask);
+    av1_update_ref_frame_map(cpi, frame_update_type, refresh_frame_map_index,
+                             &ref_buffer_stack);
+
+    for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
+      tpl_frame->ref_map_index[i - LAST_FRAME] =
+          ref_picture_map[cm->remapped_ref_idx[i - LAST_FRAME]];
+
+    if (refresh_mask) ref_picture_map[refresh_frame_map_index] = gf_index;
+
+    ++*tpl_group_frames;
+    ++extend_frame_count;
+    ++frame_display_index;
   }
 
-  for (frame_idx = 0; frame_idx < *tpl_group_frames; ++frame_idx) {
-    assert(gf_picture[frame_idx] == get_framebuf(cpi, frame_input, frame_idx));
-  }
-  /*
-  for (frame_idx = 0; frame_idx < *tpl_group_frames; ++frame_idx) {
-    printf("frame_idx:%d -> %d [ %d ]\n", frame_idx,
-           gf_group->frame_disp_idx[frame_idx],
-           gf_group->q_val[frame_idx]);
-    for (int i = 0; i < INTER_REFS_PER_FRAME; ++i)
-      printf("%d, ", gf_group->ref_frame_gop_idx[frame_idx][i]);
-    printf(" -> ");
-    for (int i = 0; i < INTER_REFS_PER_FRAME; ++i)
-      printf(
-          "%d, ",
-          gf_group->frame_disp_idx[gf_group->ref_frame_gop_idx[frame_idx][i]]);
-    printf(" -> ");
-    for (int i = 0; i < INTER_REFS_PER_FRAME; ++i)
-      printf("%d, ",
-             gf_group->q_val[gf_group->ref_frame_gop_idx[frame_idx][i]]);
-    printf("\n");
-  }
-  */
+  av1_get_ref_frames(cpi, gf_group->update_type[gf_group->index],
+                     &cpi->ref_buffer_stack);
 }
 
 static void init_tpl_stats(AV1_COMP *cpi) {
@@ -750,10 +717,14 @@
     // Backward propagation from tpl_group_frames to 1.
     for (int frame_idx = cpi->tpl_gf_group_frames - 1;
          frame_idx >= gf_group->index; --frame_idx) {
-      if (gf_group->update_type[frame_idx] == OVERLAY_UPDATE ||
-          gf_group->update_type[frame_idx] == INTNL_OVERLAY_UPDATE)
-        continue;
-      mc_flow_dispenser(cpi, gf_picture, frame_idx);
+      int is_process = 1;
+
+      if (frame_idx == gf_group->size) is_process = 0;
+      if (frame_idx < gf_group->size)
+        if (gf_group->update_type[frame_idx] == INTNL_OVERLAY_UPDATE)
+          is_process = 0;
+
+      if (is_process) mc_flow_dispenser(cpi, gf_picture, frame_idx);
     }
   }
 }