[NORMATIVE] Compound mode ref mv construction

Re-design the compound mode reference motion vector fetch. Use
a single run to provide all the compound ref mvs. Save the potential
additional two ref mv search runs on single reference frames.

Tested on night_720p 50 frames at 800 kbps. The average time cost
on find_mv_refs calls is reduced by 15% (average 69875 us ->
60473 us). The overall compression performance change is less than
0.01%.

BUG=aomedia:1373

Change-Id: I388b9cf36817d10613cd2c9d0bd8865b43324009
diff --git a/av1/common/mvref_common.c b/av1/common/mvref_common.c
index 481c9d3..e7fe0e3 100644
--- a/av1/common/mvref_common.c
+++ b/av1/common/mvref_common.c
@@ -1001,6 +1001,121 @@
   }
 
   if (rf[1] > NONE_FRAME) {
+#if CONFIG_OPT_REF_MV
+    // TODO(jingning, yunqing): Refactor and consolidate the compound and
+    // single reference frame modes. Reduce unnecessary redundancy.
+    if (refmv_count[ref_frame] < 2) {
+      int_mv ref_id[2][2], ref_diff[2][2];
+      int ref_id_count[2] = { 0 }, ref_diff_count[2] = { 0 };
+
+      int mi_width = AOMMIN(mi_size_wide[BLOCK_64X64], xd->n8_w);
+      mi_width = AOMMIN(mi_width, cm->mi_cols - mi_col);
+      int mi_height = AOMMIN(mi_size_high[BLOCK_64X64], xd->n8_h);
+      mi_height = AOMMIN(mi_height, cm->mi_rows - mi_row);
+      int mi_size = AOMMIN(mi_width, mi_height);
+
+      for (int idx = 0; abs(max_row_offset) >= 1 && idx < mi_size;) {
+        const MODE_INFO *const candidate_mi = xd->mi[-xd->mi_stride + idx];
+        const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+        const int candidate_bsize = candidate->sb_type;
+
+        for (int rf_idx = 0; rf_idx < 2; ++rf_idx) {
+          MV_REFERENCE_FRAME can_rf = candidate->ref_frame[rf_idx];
+
+          for (int cmp_idx = 0; cmp_idx < 2; ++cmp_idx) {
+            if (can_rf == rf[cmp_idx] && ref_id_count[cmp_idx] < 2) {
+              ref_id[cmp_idx][ref_id_count[cmp_idx]] = candidate->mv[rf_idx];
+              ++ref_id_count[cmp_idx];
+            } else if (can_rf > INTRA_FRAME && ref_diff_count[cmp_idx] < 2) {
+              int_mv this_mv = candidate->mv[rf_idx];
+              if (cm->ref_frame_sign_bias[can_rf] !=
+                  cm->ref_frame_sign_bias[rf[cmp_idx]]) {
+                this_mv.as_mv.row = -this_mv.as_mv.row;
+                this_mv.as_mv.col = -this_mv.as_mv.col;
+              }
+              ref_diff[cmp_idx][ref_diff_count[cmp_idx]] = this_mv;
+              ++ref_diff_count[cmp_idx];
+            }
+          }
+        }
+        idx += mi_size_wide[candidate_bsize];
+      }
+
+      for (int idx = 0; abs(max_col_offset) >= 1 && idx < mi_size;) {
+        const MODE_INFO *const candidate_mi = xd->mi[idx * xd->mi_stride - 1];
+        const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+        const int candidate_bsize = candidate->sb_type;
+
+        for (int rf_idx = 0; rf_idx < 2; ++rf_idx) {
+          MV_REFERENCE_FRAME can_rf = candidate->ref_frame[rf_idx];
+
+          for (int cmp_idx = 0; cmp_idx < 2; ++cmp_idx) {
+            if (can_rf == rf[cmp_idx] && ref_id_count[cmp_idx] < 2) {
+              ref_id[cmp_idx][ref_id_count[cmp_idx]] = candidate->mv[rf_idx];
+              ++ref_id_count[cmp_idx];
+            } else if (can_rf > INTRA_FRAME && ref_diff_count[cmp_idx] < 2) {
+              int_mv this_mv = candidate->mv[rf_idx];
+              if (cm->ref_frame_sign_bias[can_rf] !=
+                  cm->ref_frame_sign_bias[rf[cmp_idx]]) {
+                this_mv.as_mv.row = -this_mv.as_mv.row;
+                this_mv.as_mv.col = -this_mv.as_mv.col;
+              }
+              ref_diff[cmp_idx][ref_diff_count[cmp_idx]] = this_mv;
+              ++ref_diff_count[cmp_idx];
+            }
+          }
+        }
+        idx += mi_size_high[candidate_bsize];
+      }
+
+      // Build up the compound mv predictor
+      int_mv comp_list[3][2];
+
+      for (int idx = 0; idx < 2; ++idx) {
+        int comp_idx = 0;
+        for (int list_idx = 0; list_idx < ref_id_count[idx] && comp_idx < 3;
+             ++list_idx, ++comp_idx)
+          comp_list[comp_idx][idx] = ref_id[idx][list_idx];
+        for (int list_idx = 0; list_idx < ref_diff_count[idx] && comp_idx < 3;
+             ++list_idx, ++comp_idx)
+          comp_list[comp_idx][idx] = ref_diff[idx][list_idx];
+        for (; comp_idx < 3; ++comp_idx)
+          comp_list[comp_idx][idx] = gm_mv_candidates[idx];
+      }
+
+      if (refmv_count[ref_frame]) {
+        assert(refmv_count[ref_frame] == 1);
+        if (comp_list[0][0].as_int ==
+                ref_mv_stack[ref_frame][0].this_mv.as_int &&
+            comp_list[0][1].as_int ==
+                ref_mv_stack[ref_frame][0].comp_mv.as_int) {
+          ref_mv_stack[ref_frame][refmv_count[ref_frame]].this_mv =
+              comp_list[1][0];
+          ref_mv_stack[ref_frame][refmv_count[ref_frame]].comp_mv =
+              comp_list[1][1];
+        } else {
+          ref_mv_stack[ref_frame][refmv_count[ref_frame]].this_mv =
+              comp_list[0][0];
+          ref_mv_stack[ref_frame][refmv_count[ref_frame]].comp_mv =
+              comp_list[0][1];
+        }
+        ref_mv_stack[ref_frame][refmv_count[ref_frame]].weight = 2;
+        ++refmv_count[ref_frame];
+      } else {
+        for (int idx = 0; idx < MAX_MV_REF_CANDIDATES; ++idx) {
+          ref_mv_stack[ref_frame][refmv_count[ref_frame]].this_mv =
+              comp_list[idx][0];
+          ref_mv_stack[ref_frame][refmv_count[ref_frame]].comp_mv =
+              comp_list[idx][1];
+          ref_mv_stack[ref_frame][refmv_count[ref_frame]].weight = 2;
+          ++refmv_count[ref_frame];
+        }
+      }
+    }
+
+    assert(refmv_count[ref_frame] >= 2);
+#endif
+
     for (int idx = 0; idx < refmv_count[ref_frame]; ++idx) {
       clamp_mv_ref(&ref_mv_stack[ref_frame][idx].this_mv.as_mv,
                    xd->n8_w << MI_SIZE_LOG2, xd->n8_h << MI_SIZE_LOG2, xd);