AV1 RT: Limit the number of modes with TX search

8-10% speedup on speed 6 QVGA.
1.5% BDRate loss overall but 6% on one outlier.
The feature is turned off for now.

Change-Id: Ieb82b82243e577a922ca50a06a1b2219be6170c4
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index faea6de..7ebef47 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -11882,27 +11882,30 @@
           AOMMIN(x->best_pred_mv_sad, x->pred_mv_sad[ref_frame]);
   }
   // ref_frame = ALTREF_FRAME
-  for (; ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
-    x->mbmi_ext->mode_context[ref_frame] = 0;
-    mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
-    const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES];
-    if (!((cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) &&
-          (cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]]))) {
-      continue;
-    }
-
-    if (mbmi->partition != PARTITION_NONE &&
-        mbmi->partition != PARTITION_SPLIT) {
-      if (skip_ref_frame_mask & (1 << ref_frame)) {
+  if (!cpi->sf.use_real_time_ref_set) {  // No second reference on RT ref set,
+                                         // so no need to initialize
+    for (; ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
+      x->mbmi_ext->mode_context[ref_frame] = 0;
+      mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
+      const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES];
+      if (!((cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) &&
+            (cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]]))) {
         continue;
       }
+
+      if (mbmi->partition != PARTITION_NONE &&
+          mbmi->partition != PARTITION_SPLIT) {
+        if (skip_ref_frame_mask & (1 << ref_frame)) {
+          continue;
+        }
+      }
+      av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
+                       xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
+                       mi_row, mi_col, mbmi_ext->mode_context);
+      // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
+      // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
+      av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
     }
-    av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
-                     xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
-                     mi_row, mi_col, mbmi_ext->mode_context);
-    // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
-    // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
-    av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
   }
 
   av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
@@ -12966,7 +12969,8 @@
   const int do_tx_search =
       !((cpi->sf.inter_mode_rd_model_estimation == 1 && md->ready) ||
         (cpi->sf.inter_mode_rd_model_estimation == 2 &&
-         num_pels_log2_lookup[bsize] > 8));
+         num_pels_log2_lookup[bsize] > 8) ||
+        cpi->sf.force_tx_search_off);
   InterModesInfo *inter_modes_info = x->inter_modes_info;
   inter_modes_info->num = 0;
 
@@ -13211,7 +13215,10 @@
     inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
     search_state.best_rd = best_rd_so_far;
     search_state.best_mode_index = THR_INVALID;
-
+    inter_modes_info->num =
+        inter_modes_info->num < cpi->sf.num_inter_modes_for_tx_search
+            ? inter_modes_info->num
+            : cpi->sf.num_inter_modes_for_tx_search;
     const int64_t top_est_rd =
         inter_modes_info->num > 0
             ? inter_modes_info
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 4f6109b..7addf4d 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -648,6 +648,11 @@
     sf->max_intra_bsize = BLOCK_16X16;
     sf->use_inter_txb_hash = 0;
     sf->skip_interp_filter_search = 1;
+#if 0
+    // Turning this off until we agree that tradeoff qualuty vs speed is good.
+    sf->force_tx_search_off = 1;
+    sf->num_inter_modes_for_tx_search = 2;
+#endif
   }
   if (speed >= 7) {
     sf->lpf_pick = LPF_PICK_FROM_Q;
@@ -825,6 +830,8 @@
   // TODO(yunqing): turn it on for speed 0 if there is gain.
   sf->adaptive_overlay_encoding = 0;
   sf->skip_interp_filter_search = 0;
+  sf->force_tx_search_off = 0;
+  sf->num_inter_modes_for_tx_search = INT_MAX;
 
   for (i = 0; i < TX_SIZES; i++) {
     sf->intra_y_mode_mask[i] = INTRA_ALL;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 6b1e0bc..2478d97 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -798,6 +798,12 @@
 
   // For nonrd: use block_yrd for rd cost in interpolation filter search.
   int nonrd_use_blockyrd_interp_filter;
+
+  // Forces TX search off for RDCost calulation.
+  int force_tx_search_off;
+
+  // Number of best inter modes to search transform. INT_MAX - search all.
+  int num_inter_modes_for_tx_search;
 } SPEED_FEATURES;
 
 struct AV1_COMP;