Limit the number of best inter mode candidates

In tx_search_best_inter_candidates(), limit the number of best inter
mode candidates considered for transform search if the first turns
out to be valid and transform skip or compound.

          Instruction Count       BD-Rate Loss(%)
cpu-used    Reduction(%)     avg.psnr  ovr.psnr   ssim
   2          3.519           0.0447    0.0440   0.0388
   3          1.771           0.0386    0.0413   0.0250

STATS_CHANGED

Change-Id: Iddbf73833965a689b0045daea67637b988dcad02
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 853be0f..e5f29bc 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -4909,8 +4909,9 @@
           : INT64_MAX;
   *yrd = INT64_MAX;
   int64_t best_rd_in_this_partition = INT64_MAX;
+  int num_inter_mode_cands = inter_modes_info->num;
   // Iterate over best inter mode candidates and perform tx search
-  for (int j = 0; j < inter_modes_info->num; ++j) {
+  for (int j = 0; j < num_inter_mode_cands; ++j) {
     const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx;
     *mbmi = inter_modes_info->mbmi_arr[data_idx];
     int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx];
@@ -4988,6 +4989,27 @@
       update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
                           &rd_stats_uv, mode_enum, x, txfm_search_done);
       search_state->best_skip_rd[0] = skip_rd;
+      // Limit the total number of modes to be evaluated if the first is valid
+      // and transform skip or compound
+      if (cpi->sf.inter_sf.inter_mode_txfm_breakout) {
+        if (!j && (search_state->best_mbmode.skip_txfm || rd_stats.skip_txfm)) {
+          // Evaluate more candidates at high quantizers where occurrence of
+          // transform skip is high.
+          const int max_cands_cap[5] = { 2, 3, 5, 7, 9 };
+          const int qindex_band = (5 * x->qindex) >> QINDEX_BITS;
+          num_inter_mode_cands =
+              AOMMIN(max_cands_cap[qindex_band], inter_modes_info->num);
+        } else if (!j && has_second_ref(&search_state->best_mbmode)) {
+          const int aggr = cpi->sf.inter_sf.inter_mode_txfm_breakout - 1;
+          // Evaluate more candidates at low quantizers where occurrence of
+          // single reference mode is high.
+          const int max_cands_cap_cmp[2][4] = { { 10, 7, 5, 4 },
+                                                { 10, 7, 5, 3 } };
+          const int qindex_band_cmp = (4 * x->qindex) >> QINDEX_BITS;
+          num_inter_mode_cands = AOMMIN(
+              max_cands_cap_cmp[aggr][qindex_band_cmp], inter_modes_info->num);
+        }
+      }
     }
   }
 }
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 3dd78ea..ab55901 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -872,6 +872,7 @@
     sf->inter_sf.txfm_rd_gate_level = boosted ? 0 : 1;
     sf->inter_sf.disable_interinter_wedge_newmv_search =
         is_boosted_arf2_bwd_type ? 0 : 1;
+    sf->inter_sf.inter_mode_txfm_breakout = boosted ? 0 : 1;
 
     // TODO(Sachin): Enable/Enhance this speed feature for speed 2 & 3
     sf->interp_sf.adaptive_interp_filter_search = 1;
@@ -929,6 +930,7 @@
     sf->inter_sf.txfm_rd_gate_level =
         boosted ? 0 : (is_boosted_arf2_bwd_type ? 1 : 2);
     sf->inter_sf.enable_fast_wedge_mask_search = 1;
+    sf->inter_sf.inter_mode_txfm_breakout = boosted ? 0 : 2;
 
     sf->interp_sf.adaptive_interp_filter_search = 2;
 
@@ -1617,6 +1619,7 @@
   inter_sf->enable_fast_compound_mode_search = 0;
   inter_sf->reuse_mask_search_results = 0;
   inter_sf->enable_fast_wedge_mask_search = 0;
+  inter_sf->inter_mode_txfm_breakout = 0;
 }
 
 static AOM_INLINE void init_interp_sf(INTERP_FILTER_SPEED_FEATURES *interp_sf) {
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 750c6c6..21c510d 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -802,6 +802,9 @@
 
   // Enable/disable fast search for wedge masks
   int enable_fast_wedge_mask_search;
+
+  // Early breakout from transform search of inter modes
+  int inter_mode_txfm_breakout;
 } INTER_MODE_SPEED_FEATURES;
 
 typedef struct INTERP_FILTER_SPEED_FEATURES {