Prune ref_mv_idx search

Pruned ref_mv_idx search based on MV found. This change only covered
compound modes, and was for speed 4.

Borg test result at speed 4:
       avg_psnr:  ovr_psnr:  ssim:  avg speedup:
lowres: 0.044      0.046     0.016    0.8%
midres: 0.024      0.021     -0.015   0.5%

STATS_CHANGED

Change-Id: I5feb5f48ba9c31cf48ab4c3c6a9fff45988214d0
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 5002d7e..10b1377 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -10912,6 +10912,9 @@
   // First, perform a simple translation search for each of the indices. If
   // an index performs well, it will be fully searched here.
   const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
+  // Save MV results from first 2 ref_mv_idx.
+  int_mv save_mv[MAX_REF_MV_SEARCH - 1][2] = { { { 0 } } };
+  int best_ref_mv_idx = -1;
   int idx_mask = ref_mv_idx_to_search(cpi, x, rd_stats, args, ref_best_rd,
                                       mode_info, bsize, ref_set);
   for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
@@ -11042,6 +11045,34 @@
       continue;
     }
 
+    if (cpi->sf.prune_ref_mv_idx_search && is_comp_pred) {
+      // TODO(yunqing): Move this part to a separate function when it is done.
+      // Store MV result.
+      if (ref_mv_idx < MAX_REF_MV_SEARCH - 1) {
+        for (i = 0; i < is_comp_pred + 1; ++i)
+          save_mv[ref_mv_idx][i].as_int = mbmi->mv[i].as_int;
+      }
+      // Skip the evaluation if an MV match is found.
+      if (ref_mv_idx > 0) {
+        int match = 0;
+        for (int idx = 0; idx < ref_mv_idx; ++idx) {
+          int mv_diff = 0;
+          for (i = 0; i < 1 + is_comp_pred; ++i) {
+            mv_diff += abs(save_mv[idx][i].as_mv.row - mbmi->mv[i].as_mv.row) +
+                       abs(save_mv[idx][i].as_mv.col - mbmi->mv[i].as_mv.col);
+          }
+
+          // If this mode is not the best one, and current MV is similar to
+          // previous stored MV, terminate this ref_mv_idx evaluation.
+          if (best_ref_mv_idx == -1 && mv_diff < 1) {
+            match = 1;
+            break;
+          }
+        }
+        if (match == 1) continue;
+      }
+    }
+
 #if CONFIG_COLLECT_COMPONENT_TIMING
     start_timing(cpi, compound_type_rd_time);
 #endif
@@ -11163,6 +11194,7 @@
 
       if (tmp_rd < ref_best_rd) {
         ref_best_rd = tmp_rd;
+        best_ref_mv_idx = ref_mv_idx;
       }
     }
     restore_dst_buf(xd, orig_dst, num_planes);
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 95b4296..2b432c1 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -464,6 +464,7 @@
     sf->tx_domain_dist_thres_level = 2;
     sf->simple_motion_search_prune_agg = 2;
     sf->skip_repeat_interpolation_filter_search = 2;
+    sf->prune_ref_mv_idx_search = 1;
   }
 
   if (speed >= 5) {
@@ -932,6 +933,7 @@
   sf->disable_obmc = 0;
   sf->nonrd_merge_partition = 0;
   sf->disable_interinter_wedge = 0;
+  sf->prune_ref_mv_idx_search = 0;
 
   if (oxcf->mode == GOOD)
     set_good_speed_features_framesize_independent(cpi, sf, speed);
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 2aaf859..7433eb5 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -809,6 +809,9 @@
   // If set forces interpolation filter to EIGHTTAP_REGULAR
   int skip_interp_filter_search;
 
+  // Based on previous ref_mv_idx search result, prune the following search.
+  int prune_ref_mv_idx_search;
+
   // For nonrd: use block_yrd for rd cost in interpolation filter search.
   int nonrd_use_blockyrd_interp_filter;