Speed up motion estimation in tpl

In tpl motion estimation, introduced a speed feature to prune
duplicate full pixel center MVs. This speed feature is applicable
for cpu-used >= 3.

          Instruction Count
cpu-used       Reduction        BD-Rate impact
   3            3.03%               -0.01%
   4            3.39%               -0.03%

STATS_CHANGED

Change-Id: I22d1eb0e3a9474f923fcdfc945212cc3f0bac30b
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index d13e4d3..52a6a8a 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -460,6 +460,8 @@
         cm->allow_screen_content_tools ? 0 : 2;
     sf->lpf_sf.reduce_wiener_window_size = is_boosted_arf2_bwd_type ? 0 : 1;
     sf->hl_sf.second_alt_ref_filtering = 0;
+
+    sf->tpl_sf.skip_repeated_mv_level = 1;
   }
 
   if (speed >= 4) {
@@ -843,6 +845,7 @@
 static AOM_INLINE void init_tpl_sf(TPL_SPEED_FEATURES *tpl_sf) {
   tpl_sf->prune_intra_modes = 0;
   tpl_sf->reduce_first_step_size = 0;
+  tpl_sf->skip_repeated_mv_level = 0;
 }
 
 static AOM_INLINE void init_gm_sf(GLOBAL_MOTION_SPEED_FEATURES *gm_sf) {
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 4cf387c..f5e7677 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -309,6 +309,11 @@
   int prune_intra_modes;
   // This parameter controls which step in the n-step process we start at.
   int reduce_first_step_size;
+  // Skip motion estimation based on the precision of centre MVs.
+  // If set to 0, motion estimation is skipped for duplicate centre MVs
+  // (default). If set to 1, motion estimation is skipped for duplicate
+  // full-pixel centre MVs.
+  int skip_repeated_mv_level;
 } TPL_SPEED_FEATURES;
 
 typedef struct GLOBAL_MOTION_SPEED_FEATURES {
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index 7600f09..79406ec 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -180,6 +180,29 @@
   return bestsme;
 }
 
+static int is_duplicate_mv(int_mv candidate_mv, int_mv *center_mvs,
+                           int center_mvs_count, int skip_repeated_mv_level) {
+  int_mv candidate_mv_full; /* full-pixel value */
+  static int mv_shift_lookup[2] = { 0, 3 };
+  int shift = mv_shift_lookup[skip_repeated_mv_level];
+  int i;
+
+  candidate_mv_full.as_mv.col = (candidate_mv.as_mv.col >> shift);
+  candidate_mv_full.as_mv.row = (candidate_mv.as_mv.row >> shift);
+
+  for (i = 0; i < center_mvs_count; i++) {
+    int_mv center_mv_full;
+    center_mv_full.as_mv.col = (center_mvs[i].as_mv.col >> shift);
+    center_mv_full.as_mv.row = (center_mvs[i].as_mv.row >> shift);
+
+    if (candidate_mv_full.as_int == center_mv_full.as_int) {
+      return 1;
+    }
+  }
+
+  return 0;
+}
+
 static AOM_INLINE void mode_estimation(
     AV1_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, struct scale_factors *sf,
     int frame_idx, int mi_row, int mi_col, BLOCK_SIZE bsize, TX_SIZE tx_size,
@@ -307,7 +330,8 @@
     if (xd->up_available) {
       TplDepStats *ref_tpl_stats = &tpl_frame->tpl_stats_ptr[av1_tpl_ptr_pos(
           cpi, mi_row - mi_height, mi_col, tpl_frame->stride)];
-      if (ref_tpl_stats->mv[rf_idx].as_int != 0) {
+      if (!is_duplicate_mv(ref_tpl_stats->mv[rf_idx], center_mvs, refmv_count,
+                           cpi->sf.tpl_sf.skip_repeated_mv_level)) {
         center_mvs[refmv_count].as_int = ref_tpl_stats->mv[rf_idx].as_int;
         ++refmv_count;
       }
@@ -316,8 +340,8 @@
     if (xd->left_available) {
       TplDepStats *ref_tpl_stats = &tpl_frame->tpl_stats_ptr[av1_tpl_ptr_pos(
           cpi, mi_row, mi_col - mi_width, tpl_frame->stride)];
-      if (ref_tpl_stats->mv[rf_idx].as_int != 0 &&
-          ref_tpl_stats->mv[rf_idx].as_int != center_mvs[1].as_int) {
+      if (!is_duplicate_mv(ref_tpl_stats->mv[rf_idx], center_mvs, refmv_count,
+                           cpi->sf.tpl_sf.skip_repeated_mv_level)) {
         center_mvs[refmv_count].as_int = ref_tpl_stats->mv[rf_idx].as_int;
         ++refmv_count;
       }
@@ -326,9 +350,8 @@
     if (xd->up_available && mi_col + mi_width < xd->tile.mi_col_end) {
       TplDepStats *ref_tpl_stats = &tpl_frame->tpl_stats_ptr[av1_tpl_ptr_pos(
           cpi, mi_row - mi_height, mi_col + mi_width, tpl_frame->stride)];
-      if (ref_tpl_stats->mv[rf_idx].as_int != 0 &&
-          ref_tpl_stats->mv[rf_idx].as_int != center_mvs[1].as_int &&
-          ref_tpl_stats->mv[rf_idx].as_int != center_mvs[2].as_int) {
+      if (!is_duplicate_mv(ref_tpl_stats->mv[rf_idx], center_mvs, refmv_count,
+                           cpi->sf.tpl_sf.skip_repeated_mv_level)) {
         center_mvs[refmv_count].as_int = ref_tpl_stats->mv[rf_idx].as_int;
         ++refmv_count;
       }