Tune txfm_rd_gate_level for motion mode rd

Increase the aggressiveness of skipping transform search based on
skip rd in mode evaluation stage of motion mode.

     Instruction Count        BD-Rate Loss(%)
cpu    Reduction(%)    avg.psnr   ovr.psnr    ssim
 5        1.325         0.0496     0.0479    0.0488
 6        1.411         0.0404     0.0431    0.0583

STATS_CHANGED for Good preset, speed 5 and 6

Change-Id: I344c5e15bcf165d7e52b64236d0300aa7ac2377b
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index ee7486d..67f5e41 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1321,6 +1321,16 @@
   const int mi_row = xd->mi_row;
   const int mi_col = xd->mi_col;
   int mode_index_start, mode_index_end;
+  int txfm_rd_gate_level = cpi->sf.inter_sf.txfm_rd_gate_level;
+
+  // Set aggressive level of transform rd gating for mode evaluation stage.
+  if (cpi->sf.inter_sf.motion_mode_txfm_rd_gating_offset &&
+      txfm_rd_gate_level && !eval_motion_mode &&
+      num_pels_log2_lookup[bsize] > 8) {
+    txfm_rd_gate_level += cpi->sf.inter_sf.motion_mode_txfm_rd_gating_offset;
+    txfm_rd_gate_level = AOMMIN(txfm_rd_gate_level, MAX_TX_RD_GATE_LEVEL);
+  }
+
   // Modify the start and end index according to speed features. For example,
   // if SIMPLE_TRANSLATION has already been searched according to
   // the motion_mode_for_winner_cand speed feature, update the mode_index_start
@@ -1524,7 +1534,7 @@
       if (rd_stats->rdcost < *best_est_rd) {
         *best_est_rd = rd_stats->rdcost;
         assert(sse_y >= 0);
-        ref_skip_rd[1] = cpi->sf.inter_sf.txfm_rd_gate_level
+        ref_skip_rd[1] = txfm_rd_gate_level
                              ? RDCOST(x->rdmult, mode_rate, (sse_y << 4))
                              : INT64_MAX;
       }
@@ -1546,14 +1556,14 @@
       // Perform full transform search
       int64_t skip_rd = INT64_MAX;
       int64_t skip_rdy = INT64_MAX;
-      if (cpi->sf.inter_sf.txfm_rd_gate_level) {
+      if (txfm_rd_gate_level) {
         // Check if the mode is good enough based on skip RD
         int64_t sse_y = INT64_MAX;
         int64_t curr_sse = get_sse(cpi, x, &sse_y);
         skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse);
         skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4));
         int eval_txfm = check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd,
-                                        cpi->sf.inter_sf.txfm_rd_gate_level, 0);
+                                        txfm_rd_gate_level, 0);
         if (!eval_txfm) continue;
       }
 
diff --git a/av1/encoder/rdopt_utils.h b/av1/encoder/rdopt_utils.h
index 91823d8..17e76fb 100644
--- a/av1/encoder/rdopt_utils.h
+++ b/av1/encoder/rdopt_utils.h
@@ -23,6 +23,7 @@
 #endif
 
 #define MAX_REF_MV_SEARCH 3
+#define MAX_TX_RD_GATE_LEVEL 5
 #define INTER_INTRA_RD_THRESH_SCALE 9
 #define INTER_INTRA_RD_THRESH_SHIFT 4
 
@@ -352,10 +353,12 @@
   // Derive aggressiveness factor for gating the transform search
   // Lower value indicates more aggressiveness. Be more conservative (high
   // value) for (i) low quantizers (ii) regions where prediction is poor
-  const int scale[5] = { INT_MAX, 4, 3, 2, 2 };
+  const int scale[MAX_TX_RD_GATE_LEVEL + 1] = { INT_MAX, 4, 3, 2, 2, 1 };
   const int qslope = 2 * (!is_luma_only);
-  const int level_to_qindex_map[5] = { 0, 0, 0, 80, 100 };
+  const int level_to_qindex_map[MAX_TX_RD_GATE_LEVEL + 1] = { 0,  0,   0,
+                                                              80, 100, 140 };
   int aggr_factor = 4;
+  assert(level <= MAX_TX_RD_GATE_LEVEL);
   const int pred_qindex_thresh = level_to_qindex_map[level];
   if (!is_luma_only && level <= 2) {
     aggr_factor = 4 * AOMMAX(1, ROUND_POWER_OF_TWO((MAXQ - x->qindex) * qslope,
@@ -374,7 +377,9 @@
   // since best_skip_rd is computed after and skip_rd is computed (with 8-bit
   // prediction signals blended for WEDGE/DIFFWTD rather than 16-bit) before
   // interpolation filter search
-  const int luma_mul[5] = { INT_MAX, 32, 29, 17, 17 };
+  const int luma_mul[MAX_TX_RD_GATE_LEVEL + 1] = {
+    INT_MAX, 32, 29, 17, 17, 17
+  };
   int mul_factor = is_luma_only ? luma_mul[level] : 16;
   int64_t rd_thresh =
       (best_skip_rd == INT64_MAX)
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 36bc645..9bef51c 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1210,6 +1210,7 @@
 
     sf->inter_sf.prune_inter_modes_if_skippable = 1;
     sf->inter_sf.txfm_rd_gate_level = boosted ? 0 : 4;
+    sf->inter_sf.motion_mode_txfm_rd_gating_offset = boosted ? 0 : 1;
     sf->inter_sf.enable_fast_compound_mode_search = 2;
 
     sf->intra_sf.chroma_intra_pruning_with_hog = 3;
@@ -1998,6 +1999,7 @@
   inter_sf->prune_warped_prob_thresh = 0;
   inter_sf->reuse_compound_type_decision = 0;
   inter_sf->txfm_rd_gate_level = 0;
+  inter_sf->motion_mode_txfm_rd_gating_offset = 0;
   inter_sf->prune_inter_modes_if_skippable = 0;
   inter_sf->disable_masked_comp = 0;
   inter_sf->enable_fast_compound_mode_search = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 6ce7e35..f351ed5 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -843,6 +843,10 @@
   // Bypass transform search based on skip rd
   int txfm_rd_gate_level;
 
+  // Set transform rd gating offset used in mode evaluation stage of motion
+  // mode. This sf is applicable only if txfm_rd_gate_level is enabled.
+  int motion_mode_txfm_rd_gating_offset;
+
   // Limit the inter mode tested in the RD loop
   int reduce_inter_modes;