Prune reference frames in tpl

Introduced the speed feature 'prune_ref_frames_in_tpl' for
pruning reference frames in tpl mode estimation. This speed
feature is enabled for speed 3 and above.

            Encode Time          BD-Rate Loss(%)
cpu-used    Reduction(%)   avg.psnr  ovr.psnr   ssim
   3          0.990        -0.0556   -0.0528   -0.1450
   4          0.618        -0.0442   -0.0428   -0.0638
   5          0.985        -0.0500   -0.0463   -0.0673
   6          0.312        -0.0394   -0.0468   -0.0163

STATS_CHANGED

Change-Id: I5e5c9c7b9d2f24156f738c2458e005602caf66e9
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 1be87e5..ec0fbdb 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -337,7 +337,7 @@
   av1_zero(x->tpl_keep_ref_frame);
 
   if (tpl_frame->is_valid == 0) return;
-  if (!is_frame_tpl_eligible(gf_group)) return;
+  if (!is_frame_tpl_eligible(gf_group, gf_group->index)) return;
   if (frame_idx >= MAX_TPL_FRAME_IDX) return;
   if (cpi->superres_mode != AOM_SUPERRES_NONE) return;
   if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return;
@@ -1277,7 +1277,8 @@
     // frames currently.
     const GF_GROUP *gf_group = &cpi->gf_group;
     if (cm->delta_q_info.delta_q_present_flag) {
-      if (deltaq_mode == DELTA_Q_OBJECTIVE && !is_frame_tpl_eligible(gf_group))
+      if (deltaq_mode == DELTA_Q_OBJECTIVE &&
+          !is_frame_tpl_eligible(gf_group, gf_group->index))
         cm->delta_q_info.delta_q_present_flag = 0;
     }
 
diff --git a/av1/encoder/encodeframe_utils.c b/av1/encoder/encodeframe_utils.c
index 216d748e..695084f 100644
--- a/av1/encoder/encodeframe_utils.c
+++ b/av1/encoder/encodeframe_utils.c
@@ -73,7 +73,7 @@
   const TplDepFrame *tpl_frame = &cpi->tpl_data.tpl_frame[tpl_idx];
   const int deltaq_rdmult = set_deltaq_rdmult(cpi, x);
   if (tpl_frame->is_valid == 0) return deltaq_rdmult;
-  if (!is_frame_tpl_eligible(gf_group)) return deltaq_rdmult;
+  if (!is_frame_tpl_eligible(gf_group, gf_group->index)) return deltaq_rdmult;
   if (tpl_idx >= MAX_TPL_FRAME_IDX) return deltaq_rdmult;
   if (cpi->superres_mode != AOM_SUPERRES_NONE) return deltaq_rdmult;
   if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return deltaq_rdmult;
@@ -662,7 +662,7 @@
 
   if (tpl_frame->is_valid == 0) return orig_rdmult;
 
-  if (!is_frame_tpl_eligible(gf_group)) return orig_rdmult;
+  if (!is_frame_tpl_eligible(gf_group, gf_group->index)) return orig_rdmult;
 
   if (cpi->gf_group.index >= MAX_TPL_FRAME_IDX) return orig_rdmult;
 
@@ -876,7 +876,7 @@
 
   if (tpl_frame->is_valid == 0) return base_qindex;
 
-  if (!is_frame_tpl_eligible(gf_group)) return base_qindex;
+  if (!is_frame_tpl_eligible(gf_group, gf_group->index)) return base_qindex;
 
   if (cpi->gf_group.index >= MAX_TPL_FRAME_IDX) return base_qindex;
 
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index dc970e2..2cc96f3 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -3084,12 +3084,21 @@
 #define MAX_GFUBOOST_FACTOR 10.0
 #define MIN_GFUBOOST_FACTOR 4.0
 
-static INLINE int is_frame_tpl_eligible(const GF_GROUP *const gf_group) {
-  const FRAME_UPDATE_TYPE update_type = gf_group->update_type[gf_group->index];
+static INLINE int is_frame_tpl_eligible(const GF_GROUP *const gf_group,
+                                        uint8_t index) {
+  const FRAME_UPDATE_TYPE update_type = gf_group->update_type[index];
   return update_type == ARF_UPDATE || update_type == GF_UPDATE ||
          update_type == KF_UPDATE;
 }
 
+static INLINE int is_frame_eligible_for_ref_pruning(const GF_GROUP *gf_group,
+                                                    int selective_ref_frame,
+                                                    int prune_ref_frames_in_tpl,
+                                                    int gf_index) {
+  return (selective_ref_frame > 0) && (prune_ref_frames_in_tpl > 0) &&
+         !is_frame_tpl_eligible(gf_group, gf_index);
+}
+
 // Get update type of the current frame.
 static INLINE FRAME_UPDATE_TYPE
 get_frame_update_type(const GF_GROUP *gf_group) {
diff --git a/av1/encoder/encoder_utils.c b/av1/encoder/encoder_utils.c
index 1028649..22ddc58 100644
--- a/av1/encoder/encoder_utils.c
+++ b/av1/encoder/encoder_utils.c
@@ -519,7 +519,7 @@
     } else {
       aom_clear_system_state();
       cpi->rd.r0 = (double)intra_cost_base / mc_dep_cost_base;
-      if (is_frame_tpl_eligible(gf_group)) {
+      if (is_frame_tpl_eligible(gf_group, gf_group->index)) {
         cpi->rd.arf_r0 = cpi->rd.r0;
         if (cpi->lap_enabled) {
           double min_boost_factor = sqrt(cpi->rc.baseline_gf_interval);
@@ -557,7 +557,8 @@
 
 #if !CONFIG_REALTIME_ONLY
   GF_GROUP *gf_group = &cpi->gf_group;
-  if (cpi->oxcf.algo_cfg.enable_tpl_model && is_frame_tpl_eligible(gf_group)) {
+  if (cpi->oxcf.algo_cfg.enable_tpl_model &&
+      is_frame_tpl_eligible(gf_group, gf_group->index)) {
     process_tpl_stats_frame(cpi);
     av1_tpl_rdmult_setup(cpi);
   }
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index b254899..8bbd2d5 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -503,6 +503,7 @@
     sf->tpl_sf.reduce_first_step_size = 6;
     sf->tpl_sf.subpel_force_stop = QUARTER_PEL;
     sf->tpl_sf.search_method = DIAMOND;
+    sf->tpl_sf.prune_ref_frames_in_tpl = 1;
 
     sf->tx_sf.adaptive_txb_search_level = boosted ? 2 : 3;
     sf->tx_sf.tx_type_search.use_skip_flag_prediction = 2;
@@ -950,6 +951,7 @@
   tpl_sf->subpel_force_stop = EIGHTH_PEL;
   tpl_sf->search_method = NSTEP;
   tpl_sf->disable_filtered_key_tpl = 0;
+  tpl_sf->prune_ref_frames_in_tpl = 0;
 }
 
 static AOM_INLINE void init_gm_sf(GLOBAL_MOTION_SPEED_FEATURES *gm_sf) {
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 39440c7..c415b17 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -345,6 +345,9 @@
 
   // Not run TPL for filtered Key frame.
   int disable_filtered_key_tpl;
+
+  // Prune reference frames in TPL.
+  int prune_ref_frames_in_tpl;
 } TPL_SPEED_FEATURES;
 
 typedef struct GLOBAL_MOTION_SPEED_FEATURES {
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index d1cfb45..9bc1d1b 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -739,6 +739,12 @@
   }
 }
 
+static AOM_INLINE int get_gop_length(const GF_GROUP *gf_group) {
+  int use_arf = gf_group->arf_index >= 0;
+  int gop_length = AOMMIN(gf_group->size - 1 + use_arf, MAX_TPL_FRAME_IDX - 1);
+  return gop_length;
+}
+
 // Initialize the mc_flow parameters used in computing tpl data.
 static AOM_INLINE void init_mc_flow_dispenser(AV1_COMP *cpi, int frame_idx,
                                               int pframe_qindex) {
@@ -746,6 +752,12 @@
   TplDepFrame *tpl_frame = &tpl_data->tpl_frame[frame_idx];
   const YV12_BUFFER_CONFIG *this_frame = tpl_frame->gf_picture;
   const YV12_BUFFER_CONFIG *ref_frames_ordered[INTER_REFS_PER_FRAME];
+  uint32_t ref_frame_display_indices[INTER_REFS_PER_FRAME];
+  GF_GROUP *gf_group = &cpi->gf_group;
+  int ref_pruning_enabled = is_frame_eligible_for_ref_pruning(
+      gf_group, cpi->sf.inter_sf.selective_ref_frame,
+      cpi->sf.tpl_sf.prune_ref_frames_in_tpl, frame_idx);
+  int gop_length = get_gop_length(gf_group);
   int ref_frame_flags;
   AV1_COMMON *cm = &cpi->common;
   int rdmult, idx;
@@ -764,10 +776,11 @@
   xd->cur_buf = this_frame;
 
   for (idx = 0; idx < INTER_REFS_PER_FRAME; ++idx) {
-    tpl_data->ref_frame[idx] =
-        tpl_data->tpl_frame[tpl_frame->ref_map_index[idx]].rec_picture;
-    tpl_data->src_ref_frame[idx] =
-        tpl_data->tpl_frame[tpl_frame->ref_map_index[idx]].gf_picture;
+    TplDepFrame *tpl_ref_frame =
+        &tpl_data->tpl_frame[tpl_frame->ref_map_index[idx]];
+    tpl_data->ref_frame[idx] = tpl_ref_frame->rec_picture;
+    tpl_data->src_ref_frame[idx] = tpl_ref_frame->gf_picture;
+    ref_frame_display_indices[idx] = tpl_ref_frame->frame_display_index;
   }
 
   // Store the reference frames based on priority order
@@ -789,6 +802,21 @@
     }
   }
 
+  // Skip motion estimation w.r.t. reference frames which are not
+  // considered in RD search, using "selective_ref_frame" speed feature.
+  // The reference frame pruning is not enabled for frames beyond the gop
+  // length, as there are fewer reference frames and the reference frames
+  // differ from the frames considered during RD search.
+  if (ref_pruning_enabled && (frame_idx <= gop_length)) {
+    for (idx = 0; idx < INTER_REFS_PER_FRAME; ++idx) {
+      const MV_REFERENCE_FRAME refs[2] = { idx + 1, NONE_FRAME };
+      if (prune_ref_by_selective_ref_frame(cpi, NULL, refs,
+                                           ref_frame_display_indices)) {
+        tpl_data->ref_frame[idx] = NULL;
+      }
+    }
+  }
+
   // Make a temporary mbmi for tpl model
   MB_MODE_INFO mbmi;
   memset(&mbmi, 0, sizeof(mbmi));
@@ -910,9 +938,12 @@
     if (frame_params.frame_type == KEY_FRAME || gop_eval) {
       tpl_data->tpl_frame[-i - 1].gf_picture = NULL;
       tpl_data->tpl_frame[-1 - 1].rec_picture = NULL;
+      tpl_data->tpl_frame[-i - 1].frame_display_index = 0;
     } else {
       tpl_data->tpl_frame[-i - 1].gf_picture = &cm->ref_frame_map[i]->buf;
       tpl_data->tpl_frame[-i - 1].rec_picture = &cm->ref_frame_map[i]->buf;
+      tpl_data->tpl_frame[-i - 1].frame_display_index =
+          cm->ref_frame_map[i]->display_order_hint;
     }
 
     ref_picture_map[i] = -i - 1;
@@ -921,15 +952,17 @@
   *tpl_group_frames = cur_frame_idx;
 
   int gf_index;
-  int use_arf = gf_group->arf_index >= 0;
   int anc_frame_offset = gf_group->cur_frame_idx[cur_frame_idx];
   int process_frame_count = 0;
-  const int gop_length =
-      AOMMIN(gf_group->size - 1 + use_arf, MAX_TPL_FRAME_IDX - 1);
+  const int gop_length = get_gop_length(gf_group);
 
   for (gf_index = cur_frame_idx; gf_index <= gop_length; ++gf_index) {
     TplDepFrame *tpl_frame = &tpl_data->tpl_frame[gf_index];
     FRAME_UPDATE_TYPE frame_update_type = gf_group->update_type[gf_index];
+    int frame_display_index = gf_index == gf_group->size
+                                  ? cpi->rc.baseline_gf_interval
+                                  : gf_group->cur_frame_idx[gf_index] +
+                                        gf_group->arf_src_offset[gf_index];
 
     frame_params.show_frame = frame_update_type != ARF_UPDATE &&
                               frame_update_type != INTNL_ARF_UPDATE;
@@ -945,16 +978,22 @@
     if (gf_index == cur_frame_idx) {
       tpl_frame->gf_picture = frame_input->source;
     } else {
-      int frame_display_index = gf_index == gf_group->size
-                                    ? cpi->rc.baseline_gf_interval
-                                    : gf_group->cur_frame_idx[gf_index] +
-                                          gf_group->arf_src_offset[gf_index];
       struct lookahead_entry *buf = av1_lookahead_peek(
           cpi->lookahead, frame_display_index - anc_frame_offset,
           cpi->compressor_stage);
       if (buf == NULL) break;
       tpl_frame->gf_picture = &buf->img;
     }
+    // 'cm->current_frame.frame_number' is the display number
+    // of the current frame.
+    // 'anc_frame_offset' is the number of frames displayed so
+    // far within the gf group. 'cm->current_frame.frame_number -
+    // anc_frame_offset' is the offset of the first frame in the gf group.
+    // 'frame display index' is frame offset within the gf group.
+    // 'frame_display_index + cm->current_frame.frame_number - anc_frame_offset'
+    // is the display index of the frame.
+    tpl_frame->frame_display_index =
+        frame_display_index + cm->current_frame.frame_number - anc_frame_offset;
 
     if (frame_update_type != OVERLAY_UPDATE &&
         frame_update_type != INTNL_OVERLAY_UPDATE) {
@@ -1008,6 +1047,17 @@
     tpl_frame->gf_picture = &buf->img;
     tpl_frame->rec_picture = &tpl_data->tpl_rec_pool[process_frame_count];
     tpl_frame->tpl_stats_ptr = tpl_data->tpl_stats_pool[process_frame_count];
+    // 'cm->current_frame.frame_number' is the display number
+    // of the current frame.
+    // 'anc_frame_offset' is the number of frames displayed so
+    // far within the gf group. 'cm->current_frame.frame_number -
+    // anc_frame_offset' is the offset of the first frame in the gf group.
+    // 'frame display index' is frame offset within the gf group.
+    // 'frame_display_index + cm->current_frame.frame_number - anc_frame_offset'
+    // is the display index of the frame.
+    tpl_frame->frame_display_index =
+        frame_display_index + cm->current_frame.frame_number - anc_frame_offset;
+
     ++process_frame_count;
 
     gf_group->update_type[gf_index] = LF_UPDATE;
@@ -1229,7 +1279,7 @@
   TplDepFrame *tpl_frame = &cpi->tpl_data.tpl_frame[tpl_idx];
 
   if (tpl_frame->is_valid == 0) return;
-  if (!is_frame_tpl_eligible(gf_group)) return;
+  if (!is_frame_tpl_eligible(gf_group, gf_group->index)) return;
   if (tpl_idx >= MAX_TPL_FRAME_IDX) return;
   if (cpi->superres_mode != AOM_SUPERRES_NONE) return;
   if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return;
diff --git a/av1/encoder/tpl_model.h b/av1/encoder/tpl_model.h
index 1407b1a..1557c50 100644
--- a/av1/encoder/tpl_model.h
+++ b/av1/encoder/tpl_model.h
@@ -110,6 +110,7 @@
   int mi_rows;
   int mi_cols;
   int base_rdmult;
+  uint32_t frame_display_index;
 } TplDepFrame;
 
 /*!\endcond */