Introduce reuse_compound_type_decision speed feature

Reuse compound type decision when match is found.
This speed feature is enabled for preset >= 3.

          Instruction Count
cpu-used       Reduction        Quality Loss
   3             0.50              0.0045%
   4             0.55             -0.0184%

STATS_CHANGED

Change-Id: Ide80d68742e011b0e0bc973e1861778bbc72d2a2
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 3a162a8..de61c56 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -193,12 +193,14 @@
   int64_t dist[COMPOUND_TYPES];
   int32_t model_rate[COMPOUND_TYPES];
   int64_t model_dist[COMPOUND_TYPES];
+  int comp_rs2[COMPOUND_TYPES];
   int_mv mv[2];
   MV_REFERENCE_FRAME ref_frames[2];
   PREDICTION_MODE mode;
   int_interpfilters filter;
   int ref_mv_idx;
   int is_global[2];
+  INTERINTER_COMPOUND_DATA interinter_comp;
 } COMP_RD_STATS;
 
 // Struct for buffers used by compound_type_rd() function.
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 0a82562..81536ab 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -7537,11 +7537,13 @@
 static INLINE void backup_stats(COMPOUND_TYPE cur_type, int32_t *comp_rate,
                                 int64_t *comp_dist, int32_t *comp_model_rate,
                                 int64_t *comp_model_dist, int rate_sum,
-                                int64_t dist_sum, RD_STATS *rd_stats) {
+                                int64_t dist_sum, RD_STATS *rd_stats,
+                                int *comp_rs2, int rs2) {
   comp_rate[cur_type] = rd_stats->rate;
   comp_dist[cur_type] = rd_stats->dist;
   comp_model_rate[cur_type] = rate_sum;
   comp_model_dist[cur_type] = dist_sum;
+  comp_rs2[cur_type] = rs2;
 }
 
 static int64_t masked_compound_type_rd(
@@ -7552,7 +7554,7 @@
     int mode_rate, int64_t rd_thresh, int *calc_pred_masked_compound,
     int32_t *comp_rate, int64_t *comp_dist, int32_t *comp_model_rate,
     int64_t *comp_model_dist, const int64_t comp_best_model_rd,
-    int64_t *const comp_model_rd_cur) {
+    int64_t *const comp_model_rd_cur, int *comp_rs2) {
   const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = xd->mi[0];
@@ -7699,7 +7701,8 @@
           RDCOST(x->rdmult, *rs2 + *out_rate_mv + rd_stats.rate, rd_stats.dist);
       // Backup rate and distortion for future reuse
       backup_stats(compound_type, comp_rate, comp_dist, comp_model_rate,
-                   comp_model_dist, rate_sum, dist_sum, &rd_stats);
+                   comp_model_dist, rate_sum, dist_sum, &rd_stats, comp_rs2,
+                   *rs2);
     }
   } else {
     // Reuse data as matching record is found
@@ -8365,10 +8368,13 @@
 }
 
 // Checks if characteristics of search match
-static INLINE int is_comp_rd_match(
-    const AV1_COMP *const cpi, const MACROBLOCK *const x,
-    const COMP_RD_STATS *st, const MB_MODE_INFO *const mi, int32_t *comp_rate,
-    int64_t *comp_dist, int32_t *comp_model_rate, int64_t *comp_model_dist) {
+static INLINE int is_comp_rd_match(const AV1_COMP *const cpi,
+                                   const MACROBLOCK *const x,
+                                   const COMP_RD_STATS *st,
+                                   const MB_MODE_INFO *const mi,
+                                   int32_t *comp_rate, int64_t *comp_dist,
+                                   int32_t *comp_model_rate,
+                                   int64_t *comp_model_dist, int *comp_rs2) {
   // TODO(ranjit): Ensure that compound type search use regular filter always
   // and check if following check can be removed
   // Check if interp filter matches with previous case
@@ -8392,6 +8398,7 @@
     comp_dist[comp_type] = st->dist[comp_type];
     comp_model_rate[comp_type] = st->model_rate[comp_type];
     comp_model_dist[comp_type] = st->model_dist[comp_type];
+    comp_rs2[comp_type] = st->comp_rs2[comp_type];
   }
 
   // For compound wedge/segment, reuse data only if NEWMV is not present in
@@ -8407,6 +8414,8 @@
            sizeof(comp_model_rate[COMPOUND_WEDGE]) * 2);
     memcpy(&comp_model_dist[COMPOUND_WEDGE], &st->model_dist[COMPOUND_WEDGE],
            sizeof(comp_model_dist[COMPOUND_WEDGE]) * 2);
+    memcpy(&comp_rs2[COMPOUND_WEDGE], &st->comp_rs2[COMPOUND_WEDGE],
+           sizeof(comp_rs2[COMPOUND_WEDGE]) * 2);
   }
   return 1;
 }
@@ -8449,10 +8458,13 @@
                                         const MB_MODE_INFO *const mbmi,
                                         int32_t *comp_rate, int64_t *comp_dist,
                                         int32_t *comp_model_rate,
-                                        int64_t *comp_model_dist) {
+                                        int64_t *comp_model_dist, int *comp_rs2,
+                                        int *match_index) {
   for (int j = 0; j < x->comp_rd_stats_idx; ++j) {
     if (is_comp_rd_match(cpi, x, &x->comp_rd_stats[j], mbmi, comp_rate,
-                         comp_dist, comp_model_rate, comp_model_dist)) {
+                         comp_dist, comp_model_rate, comp_model_dist,
+                         comp_rs2)) {
+      *match_index = j;
       return 1;
     }
   }
@@ -8481,7 +8493,7 @@
 static INLINE void save_comp_rd_search_stat(
     MACROBLOCK *x, const MB_MODE_INFO *const mbmi, const int32_t *comp_rate,
     const int64_t *comp_dist, const int32_t *comp_model_rate,
-    const int64_t *comp_model_dist, const int_mv *cur_mv) {
+    const int64_t *comp_model_dist, const int_mv *cur_mv, const int *comp_rs2) {
   const int offset = x->comp_rd_stats_idx;
   if (offset < MAX_COMP_RD_STATS) {
     COMP_RD_STATS *const rd_stats = x->comp_rd_stats + offset;
@@ -8489,6 +8501,7 @@
     memcpy(rd_stats->dist, comp_dist, sizeof(rd_stats->dist));
     memcpy(rd_stats->model_rate, comp_model_rate, sizeof(rd_stats->model_rate));
     memcpy(rd_stats->model_dist, comp_model_dist, sizeof(rd_stats->model_dist));
+    memcpy(rd_stats->comp_rs2, comp_rs2, sizeof(rd_stats->comp_rs2));
     memcpy(rd_stats->mv, cur_mv, sizeof(rd_stats->mv));
     memcpy(rd_stats->ref_frames, mbmi->ref_frame, sizeof(rd_stats->ref_frames));
     rd_stats->mode = mbmi->mode;
@@ -8500,6 +8513,8 @@
           &xd->global_motion[mbmi->ref_frame[i]];
       rd_stats->is_global[i] = is_global_mv_block(mbmi, wm->wmtype);
     }
+    memcpy(&rd_stats->interinter_comp, &mbmi->interinter_comp,
+           sizeof(rd_stats->interinter_comp));
     ++x->comp_rd_stats_idx;
   }
 }
@@ -9697,6 +9712,29 @@
   mbmi->compound_idx = (cur_type != COMPOUND_DISTWTD);
 }
 
+// When match is found, populate the compound type data
+// and calculate the rd cost using the stored stats and
+// update the mbmi appropriately.
+static INLINE int populate_reuse_comp_type_data(
+    const MACROBLOCK *x, MB_MODE_INFO *mbmi,
+    BEST_COMP_TYPE_STATS *best_type_stats, int_mv *cur_mv, int32_t *comp_rate,
+    int64_t *comp_dist, int *comp_rs2, int *rate_mv, int64_t *rd,
+    int match_index) {
+  const int winner_comp_type =
+      x->comp_rd_stats[match_index].interinter_comp.type;
+  if (comp_rate[winner_comp_type] == INT_MAX)
+    return best_type_stats->best_compmode_interinter_cost;
+  update_mbmi_for_compound_type(mbmi, winner_comp_type);
+  mbmi->interinter_comp = x->comp_rd_stats[match_index].interinter_comp;
+  *rd = RDCOST(
+      x->rdmult,
+      comp_rs2[winner_comp_type] + *rate_mv + comp_rate[winner_comp_type],
+      comp_dist[winner_comp_type]);
+  mbmi->mv[0].as_int = cur_mv[0].as_int;
+  mbmi->mv[1].as_int = cur_mv[1].as_int;
+  return comp_rs2[winner_comp_type];
+}
+
 // Updates rd cost and relevant compound type data for the best compound type
 static INLINE void update_best_info(const MB_MODE_INFO *const mbmi, int64_t *rd,
                                     BEST_COMP_TYPE_STATS *best_type_stats,
@@ -9822,12 +9860,15 @@
   int64_t comp_dist[COMPOUND_TYPES] = { INT64_MAX, INT64_MAX, INT64_MAX,
                                         INT64_MAX };
   int32_t comp_rate[COMPOUND_TYPES] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX };
+  int comp_rs2[COMPOUND_TYPES] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX };
   int32_t comp_model_rate[COMPOUND_TYPES] = { INT_MAX, INT_MAX, INT_MAX,
                                               INT_MAX };
   int64_t comp_model_dist[COMPOUND_TYPES] = { INT64_MAX, INT64_MAX, INT64_MAX,
                                               INT64_MAX };
-  const int match_found = find_comp_rd_in_stats(
-      cpi, x, mbmi, comp_rate, comp_dist, comp_model_rate, comp_model_dist);
+  int match_index = 0;
+  const int match_found =
+      find_comp_rd_in_stats(cpi, x, mbmi, comp_rate, comp_dist, comp_model_rate,
+                            comp_model_dist, comp_rs2, &match_index);
   best_mv[0].as_int = cur_mv[0].as_int;
   best_mv[1].as_int = cur_mv[1].as_int;
   *rd = INT64_MAX;
@@ -9861,6 +9902,13 @@
   const int mi_row = xd->mi_row;
   const int mi_col = xd->mi_col;
 
+  // If the match is found, calculate the rd cost using the
+  // stored stats and update the mbmi appropriately.
+  if (match_found && cpi->sf.inter_sf.reuse_compound_type_decision) {
+    return populate_reuse_comp_type_data(x, mbmi, &best_type_stats, cur_mv,
+                                         comp_rate, comp_dist, comp_rs2,
+                                         rate_mv, rd, match_index);
+  }
   // Special handling if both compound_average and compound_distwtd
   // are to be searched. In this case, first estimate between the two
   // modes and then call estimate_yrd_for_sb() only for the better of
@@ -9928,7 +9976,7 @@
         // Backup rate and distortion for future reuse
         backup_stats(best_type, comp_rate, comp_dist, comp_model_rate,
                      comp_model_dist, est_rate[best_type], est_dist[best_type],
-                     &est_rd_stats);
+                     &est_rd_stats, comp_rs2, rs2);
         comp_model_rd_cur = est_rd;
       }
       if (best_type == COMPOUND_AVERAGE) restore_dst_buf(xd, *tmp_dst, 1);
@@ -9978,7 +10026,8 @@
 
             // Backup rate and distortion for future reuse
             backup_stats(cur_type, comp_rate, comp_dist, comp_model_rate,
-                         comp_model_dist, rate_sum, dist_sum, &est_rd_stats);
+                         comp_model_dist, rate_sum, dist_sum, &est_rd_stats,
+                         comp_rs2, rs2);
           }
         } else {
           // Calculate RD cost based on stored stats
@@ -10009,7 +10058,7 @@
             &tmp_rate_mv, preds0, preds1, buffers->residual1, buffers->diff10,
             strides, rd_stats->rate, tmp_rd_thresh, &calc_pred_masked_compound,
             comp_rate, comp_dist, comp_model_rate, comp_model_dist,
-            best_type_stats.comp_best_model_rd, &comp_model_rd_cur);
+            best_type_stats.comp_best_model_rd, &comp_model_rd_cur, comp_rs2);
       }
     }
     // Update stats for best compound type
@@ -10046,7 +10095,7 @@
   restore_dst_buf(xd, *orig_dst, 1);
   if (!match_found)
     save_comp_rd_search_stat(x, mbmi, comp_rate, comp_dist, comp_model_rate,
-                             comp_model_dist, cur_mv);
+                             comp_model_dist, cur_mv, comp_rs2);
   return best_type_stats.best_compmode_interinter_cost;
 }
 
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index fa90d88..afc7174 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -435,6 +435,7 @@
     sf->inter_sf.prune_motion_mode_level = boosted ? 2 : 3;
     if (cpi->oxcf.enable_smooth_interintra)
       sf->inter_sf.disable_smooth_interintra = boosted ? 0 : 1;
+    sf->inter_sf.reuse_compound_type_decision = 1;
 
     sf->intra_sf.prune_palette_search_level = 2;
 
@@ -941,6 +942,7 @@
   inter_sf->disable_interinter_wedge = 0;
   inter_sf->prune_ref_mv_idx_search = 0;
   inter_sf->prune_warped_prob_thresh = 0;
+  inter_sf->reuse_compound_type_decision = 0;
 }
 
 static AOM_INLINE void init_interp_sf(INTERP_FILTER_SPEED_FEATURES *interp_sf) {
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 84066ff..4cf387c 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -628,6 +628,11 @@
   // 0: no breakout
   // 1: use model based rd breakout
   int model_based_post_interp_filter_breakout;
+
+  // Reuse compound type rd decision when exact match is found
+  // 0: No reuse
+  // 1: Reuse the compound type decision
+  int reuse_compound_type_decision;
 } INTER_MODE_SPEED_FEATURES;
 
 typedef struct INTERP_FILTER_SPEED_FEATURES {