rtc: Early termination of NEWMV mode evaluation

As part of this CL, introduced a speed feature to early
terminate NEWMV mode evaluation based on sse of best mode
so far.

          Instruction Count      BD-Rate Loss(%)
cpu-used    Reduction(%)    avg.psnr    ovr.psnr    ssim
   5          1.652    	    -0.0222     -0.0280   -0.0763
   6          1.311          0.0274      0.0276    0.0162

STATS_CHANGED for rt speed 5 and 6

Change-Id: Ia5ee51329a2addb2676dc03c75b3644cbfbb4904
diff --git a/av1/encoder/interp_search.h b/av1/encoder/interp_search.h
index 902b699..e25d44d 100644
--- a/av1/encoder/interp_search.h
+++ b/av1/encoder/interp_search.h
@@ -149,6 +149,11 @@
    *    MACROBLOCK::pred_sse due to different interpolation filter used.
    */
   unsigned int best_single_sse_in_refs[REF_FRAMES];
+  /*!
+   * Holds the sse of best mode so far in the mode evaluation process. This is
+   * used in intermediate termination of NEWMV mode evaluation.
+   */
+  unsigned int best_pred_sse;
 } HandleInterModeArgs;
 
 /*!\cond */
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 0da6596..c346c92 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -2757,6 +2757,15 @@
         if (this_sse < args->best_single_sse_in_refs[ref]) {
           args->best_single_sse_in_refs[ref] = this_sse;
         }
+
+        if (cpi->sf.rt_sf.skip_newmv_mode_based_on_sse) {
+          const double scale_factor[11] = { 0.7, 0.7, 0.7, 0.7, 0.7, 0.8,
+                                            0.8, 0.9, 0.9, 0.9, 0.9 };
+          assert(num_pels_log2_lookup[bsize] >= 4);
+          if (args->best_pred_sse <
+              scale_factor[num_pels_log2_lookup[bsize] - 4] * this_sse)
+            continue;
+        }
       }
 
       rd_stats->rate += rate_mv;
@@ -5399,7 +5408,8 @@
                                -1,
                                -1,
                                { 0 },
-                               { 0 } };
+                               { 0 },
+                               UINT_MAX };
   for (i = 0; i < MODE_CTX_REF_FRAMES; ++i) args.cmp_mode[i] = -1;
   // Indicates the appropriate number of simple translation winner modes for
   // exhaustive motion mode evaluation
@@ -5618,6 +5628,7 @@
     args.single_newmv_valid = search_state.single_newmv_valid;
     args.single_comp_cost = real_compmode_cost;
     args.ref_frame_cost = ref_frame_cost;
+    args.best_pred_sse = search_state.best_pred_sse;
 
     int64_t skip_rd[2] = { search_state.best_skip_rd[0],
                            search_state.best_skip_rd[1] };
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 9d5b4de..4b2f8e7 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1415,6 +1415,7 @@
         cm->width * cm->height > 640 * 480)
       sf->rt_sf.use_temporal_noise_estimate = 1;
     sf->rt_sf.skip_tx_no_split_var_based_partition = 1;
+    sf->rt_sf.skip_newmv_mode_based_on_sse = 1;
 
     // For SVC: use better mv search on base temporal layers, and only
     // on base spatial layer if highest resolution is above 640x360.
@@ -1840,6 +1841,7 @@
   rt_sf->skip_cdef_sb = 0;
   rt_sf->force_large_partition_blocks_intra = 0;
   rt_sf->skip_tx_no_split_var_based_partition = 0;
+  rt_sf->skip_newmv_mode_based_on_sse = 0;
 }
 
 void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi, int speed) {
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 954c512..77a2d01 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -1305,6 +1305,10 @@
 
   // Skip evaluation of no split in tx size selection for merge partition
   int skip_tx_no_split_var_based_partition;
+
+  // Intermediate termination of newMV mode evaluation based on so far best mode
+  // sse
+  int skip_newmv_mode_based_on_sse;
 } REAL_TIME_SPEED_FEATURES;
 
 /*!\endcond */