Bypass multiple motion search runs on diff wtd compound mode

Allow the encoder to re-use the diff wtd index estimated based on
the first reference motion vector for the later ones. Skip repeated
motion estimation runs related to the weighting index search.
Local test shows minor speed up. The overall coding performance
change is minor too.

STATS_CHANGED

Change-Id: I9c3d35081c7c0f4c173e1f27f5c4fc45266f2551
diff --git a/av1/encoder/compound_type.c b/av1/encoder/compound_type.c
index 8fc29f3..6f9cb56 100644
--- a/av1/encoder/compound_type.c
+++ b/av1/encoder/compound_type.c
@@ -1429,43 +1429,71 @@
       best_rd_cur = RDCOST(x->rdmult, rs2 + tmp_rate_mv + est_rd_stats.rate,
                            est_rd_stats.dist);
     } else if (cur_type == COMPOUND_DIFFWTD) {
+      int_mv tmp_mv[2];
+      int best_mask_index = 0;
       rs2 += get_interinter_compound_mask_rate(&x->mode_costs, mbmi);
-      int mask_value = 38;
-      memset(xd->seg_mask, mask_value, sizeof(xd->seg_mask));
-      tmp_rate_mv = *rate_mv;
-      mbmi->interinter_comp.mask_type = DIFFWTD_38;
 
-      if (have_newmv_in_inter_mode(this_mode))
-        tmp_rate_mv = av1_interinter_compound_motion_search(cpi, x, cur_mv,
-                                                            bsize, this_mode);
-      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
-                                    AOM_PLANE_Y, AOM_PLANE_Y);
-      RD_STATS est_rd_stats;
-      estimate_yrd_for_sb(cpi, bsize, x, INT64_MAX, &est_rd_stats);
-      best_rd_cur = RDCOST(x->rdmult, rs2 + tmp_rate_mv + est_rd_stats.rate,
-                           est_rd_stats.dist);
-      int_mv tmp_mv[2] = { mbmi->mv[0], mbmi->mv[1] };
+      int need_mask_search =
+          args->diffwtd_index == -1 || !have_newmv_in_inter_mode(this_mode);
 
-      mask_value = 26;
-      memset(xd->seg_mask, mask_value, sizeof(xd->seg_mask));
-      tmp_rate_mv = *rate_mv;
-      mbmi->interinter_comp.mask_type = DIFFWTD_38_INV;
+      for (int mask_index = 0; mask_index < 2 && need_mask_search;
+           ++mask_index) {
+        // hard coded number for diff wtd
+        int mask_value = mask_index == 0 ? 38 : 26;
+        memset(xd->seg_mask, mask_value, sizeof(xd->seg_mask));
+        tmp_rate_mv = *rate_mv;
+        mbmi->interinter_comp.mask_type =
+            mask_index == 0 ? DIFFWTD_38 : DIFFWTD_38_INV;
+        if (have_newmv_in_inter_mode(this_mode))
+          tmp_rate_mv = av1_interinter_compound_motion_search(cpi, x, cur_mv,
+                                                              bsize, this_mode);
+        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
+                                      AOM_PLANE_Y, AOM_PLANE_Y);
+        RD_STATS est_rd_stats;
+        int64_t this_rd_cur =
+            estimate_yrd_for_sb(cpi, bsize, x, INT64_MAX, &est_rd_stats);
+        if (this_rd_cur < INT64_MAX) {
+          this_rd_cur = RDCOST(x->rdmult, rs2 + tmp_rate_mv + est_rd_stats.rate,
+                               est_rd_stats.dist);
+        }
 
-      if (have_newmv_in_inter_mode(this_mode))
-        tmp_rate_mv = av1_interinter_compound_motion_search(cpi, x, cur_mv,
-                                                            bsize, this_mode);
-      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
-                                    AOM_PLANE_Y, AOM_PLANE_Y);
-      estimate_yrd_for_sb(cpi, bsize, x, INT64_MAX, &est_rd_stats);
-      int64_t this_rd_cur = RDCOST(
-          x->rdmult, rs2 + tmp_rate_mv + est_rd_stats.rate, est_rd_stats.dist);
-      if (this_rd_cur < best_rd_cur) {
-        best_rd_cur = this_rd_cur;
-      } else {
-        mbmi->interinter_comp.mask_type = DIFFWTD_38;
-        mbmi->mv[0] = tmp_mv[0];
-        mbmi->mv[1] = tmp_mv[1];
+        if (this_rd_cur < best_rd_cur) {
+          best_rd_cur = this_rd_cur;
+          best_mask_index = mbmi->interinter_comp.mask_type;
+          tmp_mv[0] = mbmi->mv[0];
+          tmp_mv[1] = mbmi->mv[1];
+        }
       }
+
+      if (need_mask_search) {
+        args->diffwtd_index = best_mask_index;
+      } else {
+        mbmi->interinter_comp.mask_type = args->diffwtd_index;
+        rs2 = masked_type_cost[cur_type];
+        rs2 += get_interinter_compound_mask_rate(&x->mode_costs, mbmi);
+
+        int mask_value = mbmi->interinter_comp.mask_type == 0 ? 38 : 26;
+        memset(xd->seg_mask, mask_value, sizeof(xd->seg_mask));
+
+        tmp_rate_mv = av1_interinter_compound_motion_search(cpi, x, cur_mv,
+                                                            bsize, this_mode);
+        best_mask_index = mbmi->interinter_comp.mask_type;
+        tmp_mv[0] = mbmi->mv[0];
+        tmp_mv[1] = mbmi->mv[1];
+        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
+                                      AOM_PLANE_Y, AOM_PLANE_Y);
+        RD_STATS est_rd_stats;
+        int64_t this_rd_cur =
+            estimate_yrd_for_sb(cpi, bsize, x, INT64_MAX, &est_rd_stats);
+        if (this_rd_cur < INT64_MAX) {
+          best_rd_cur = RDCOST(x->rdmult, rs2 + tmp_rate_mv + est_rd_stats.rate,
+                               est_rd_stats.dist);
+        }
+      }
+
+      mbmi->interinter_comp.mask_type = best_mask_index;
+      mbmi->mv[0] = tmp_mv[0];
+      mbmi->mv[1] = tmp_mv[1];
     } else {
       // Handle masked compound types
       // Factors to control gating of compound type selection based on best
diff --git a/av1/encoder/interp_search.h b/av1/encoder/interp_search.h
index be57872..9a6dcfa 100644
--- a/av1/encoder/interp_search.h
+++ b/av1/encoder/interp_search.h
@@ -131,6 +131,10 @@
    * Estimated wedge sign.
    */
   int wedge_sign;
+  /*!
+   * Estimated diff wtd index.
+   */
+  int diffwtd_index;
 } HandleInterModeArgs;
 
 /*!\cond */
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index c81f213..c53fb53 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -2768,6 +2768,7 @@
 
   args->wedge_index = -1;
   args->wedge_sign = -1;
+  args->diffwtd_index = -1;
 
   // Main loop of this function. This will  iterate over all of the ref mvs
   // in the dynamic reference list and do the following:
@@ -5351,6 +5352,7 @@
                                { { { 0 }, { { 0 } }, { 0 }, 0, 0, 0, 0 } },
                                0,
                                0,
+                               0,
                                0 };
   // Indicates the appropriate number of simple translation winner modes for
   // exhaustive motion mode evaluation