Reduce full pixel motion search range

1. Added a speed 5 feature to reduce single motion search range based
on how close the ref_mvs are and the mv result of prior ref_mv.
2. Corrected calling of set_mv_search_params. This also gave some
speedups for lower speeds.
3. More work will follow.

Borg test results(150f):
          avg_psnr:  ovr_psnr:  ssim:  speedup:
speed 5:
lowres:   -0.018     -0.025    -0.015   0.9%
midres:    0.029      0.027     0.025   1.0%

speed 2:
lowres:   -0.003     -0.007     0.023   0.6%
midres:   -0.001      0.002     0.032   0.6%

STATS_CHANGED

Change-Id: I8d0688ebcc10c417145aaf784f478001b577db68
diff --git a/av1/encoder/encodemv.c b/av1/encoder/encodemv.c
index 1b24f88..d764ac1 100644
--- a/av1/encoder/encodemv.c
+++ b/av1/encoder/encodemv.c
@@ -192,7 +192,7 @@
   // If auto_mv_step_size is enabled then keep track of the largest
   // motion vector component used.
   if (cpi->sf.mv_sf.auto_mv_step_size) {
-    unsigned int maxv = AOMMAX(abs(mv->row), abs(mv->col)) >> 3;
+    int maxv = AOMMAX(abs(mv->row), abs(mv->col)) >> 3;
     cpi->max_mv_magnitude = AOMMAX(maxv, cpi->max_mv_magnitude);
   }
 }
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 64d0513..d178056e 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -3906,7 +3906,7 @@
 
 static void set_mv_search_params(AV1_COMP *cpi) {
   const AV1_COMMON *const cm = &cpi->common;
-  const unsigned int max_mv_def = AOMMIN(cm->width, cm->height);
+  const int max_mv_def = AOMMAX(cm->width, cm->height);
 
   // Default based on max resolution.
   cpi->mv_step_param = av1_init_search_range(max_mv_def);
@@ -3917,14 +3917,15 @@
       // after a key/intra-only frame.
       cpi->max_mv_magnitude = max_mv_def;
     } else {
-      if (cm->show_frame) {
+      // Use cpi->max_mv_magnitude == -1 to exclude first pass case.
+      if (cm->show_frame && cpi->max_mv_magnitude != -1) {
         // Allow mv_steps to correspond to twice the max mv magnitude found
         // in the previous frame, capped by the default max_mv_magnitude based
         // on resolution.
         cpi->mv_step_param = av1_init_search_range(
             AOMMIN(max_mv_def, 2 * cpi->max_mv_magnitude));
       }
-      cpi->max_mv_magnitude = 0;
+      cpi->max_mv_magnitude = -1;
     }
   }
 }
@@ -4271,13 +4272,14 @@
   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
   int ref_frame;
 
+  // TODO(yunqing): The following condition seems not work. Need to investigate.
   if (width != cm->width || height != cm->height) {
     // There has been a change in the encoded frame size
     av1_set_size_literal(cpi, width, height);
-    set_mv_search_params(cpi);
     // Recalculate 'all_lossless' in case super-resolution was (un)selected.
     cm->all_lossless = cm->coded_lossless && !av1_superres_scaled(cm);
   }
+  set_mv_search_params(cpi);
 
   if (is_stat_consumption_stage(cpi)) {
     av1_set_target_rate(cpi, cm->width, cm->height);
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 0c9cc7c..ce16c7f 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -901,7 +901,7 @@
   // sf contains fine-grained config set internally based on speed
   SPEED_FEATURES sf;
 
-  unsigned int max_mv_magnitude;
+  int max_mv_magnitude;
   int mv_step_param;
 
   int all_one_sided_refs;
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index 884a44d..031c7cd 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -1891,8 +1891,7 @@
               point as the best match, we will do a final 1-away diamond
               refining search  */
 static int full_pixel_diamond(MACROBLOCK *x, MV *mvp_full, int step_param,
-                              int use_var, int sadpb, int further_steps,
-                              int *cost_list,
+                              int use_var, int sadpb, int *cost_list,
                               const aom_variance_fn_ptr_t *fn_ptr,
                               const MV *ref_mv, const search_site_config *cfg,
                               uint8_t *second_pred, uint8_t *mask,
@@ -1920,6 +1919,7 @@
 
   // If there won't be more n-step search, check to see if refining search is
   // needed.
+  const int further_steps = cfg->ss_count - 1 - step_param;
   while (n < further_steps) {
     ++n;
 
@@ -2379,9 +2379,9 @@
       break;
     case NSTEP:
     case DIAMOND:
-      var = full_pixel_diamond(x, mvp_full, step_param, use_var, error_per_bit,
-                               cfg->ss_count - 1 - step_param, cost_list,
-                               fn_ptr, ref_mv, cfg, NULL, NULL, 0, 0);
+      var =
+          full_pixel_diamond(x, mvp_full, step_param, use_var, error_per_bit,
+                             cost_list, fn_ptr, ref_mv, cfg, NULL, NULL, 0, 0);
       break;
     default: assert(0 && "Invalid search method.");
   }
diff --git a/av1/encoder/motion_search_facade.c b/av1/encoder/motion_search_facade.c
index 3f383fd..0063ac1 100644
--- a/av1/encoder/motion_search_facade.c
+++ b/av1/encoder/motion_search_facade.c
@@ -15,7 +15,8 @@
 #include "av1/encoder/reconinter_enc.h"
 
 void av1_single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
-                              BLOCK_SIZE bsize, int ref_idx, int *rate_mv) {
+                              BLOCK_SIZE bsize, int ref_idx, int *rate_mv,
+                              int search_range) {
   MACROBLOCKD *xd = &x->e_mbd;
   const AV1_COMMON *cm = &cpi->common;
   const int num_planes = av1_num_planes(cm);
@@ -93,6 +94,21 @@
   }
 
   const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
+
+  // Further reduce the search range.
+  if (search_range < INT_MAX) {
+    const search_site_config *ss_cfg = &cpi->ss_cfg[SS_CFG_SRC];
+    // MAx step_param is ss_cfg->ss_count.
+    if (search_range < 1) {
+      step_param = ss_cfg->ss_count;
+    } else {
+      while (ss_cfg->radius[ss_cfg->ss_count - step_param - 1] >
+                 (search_range << 1) &&
+             ss_cfg->ss_count - step_param - 1 > 0)
+        step_param++;
+    }
+  }
+
   // Note: MV limits are modified here. Always restore the original values
   // after full-pixel motion search.
   av1_set_mv_search_range(&x->mv_limits, &ref_mv);
@@ -631,4 +647,4 @@
     mbmi->mv[which].as_int = tmp_mv[which].as_int;
   }
   return tmp_rate_mv;
-}
\ No newline at end of file
+}
diff --git a/av1/encoder/motion_search_facade.h b/av1/encoder/motion_search_facade.h
index fda7a1f..4bfe06d 100644
--- a/av1/encoder/motion_search_facade.h
+++ b/av1/encoder/motion_search_facade.h
@@ -19,7 +19,8 @@
 #endif
 
 void av1_single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
-                              BLOCK_SIZE bsize, int ref_idx, int *rate_mv);
+                              BLOCK_SIZE bsize, int ref_idx, int *rate_mv,
+                              int search_range);
 
 void av1_joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
                              BLOCK_SIZE bsize, int_mv *cur_mv,
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index fe1c359..bd1a800 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1499,7 +1499,43 @@
       }
     }
   } else {
-    av1_single_motion_search(cpi, x, bsize, 0, rate_mv);
+    // Single ref case.
+    const int ref_idx = 0;
+    int search_range = INT_MAX;
+
+    if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) {
+      const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
+      int min_mv_diff = INT_MAX;
+      int best_match = -1;
+      MV prev_ref_mv[2] = { { 0 } };
+      for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) {
+        prev_ref_mv[idx] = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame,
+                                                     idx, x->mbmi_ext)
+                               .as_mv;
+        const int ref_mv_diff = AOMMAX(abs(ref_mv.row - prev_ref_mv[idx].row),
+                                       abs(ref_mv.col - prev_ref_mv[idx].col));
+
+        if (min_mv_diff > ref_mv_diff) {
+          min_mv_diff = ref_mv_diff;
+          best_match = idx;
+        }
+      }
+
+      if (min_mv_diff < (16 << 3)) {
+        if (args->single_newmv_valid[best_match][refs[0]]) {
+          search_range = min_mv_diff;
+          search_range +=
+              AOMMAX(abs(args->single_newmv[best_match][refs[0]].as_mv.row -
+                         prev_ref_mv[best_match].row),
+                     abs(args->single_newmv[best_match][refs[0]].as_mv.col -
+                         prev_ref_mv[best_match].col));
+          // Get full pixel search range.
+          search_range = (search_range + 4) >> 3;
+        }
+      }
+    }
+
+    av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range);
     if (x->best_mv.as_int == INVALID_MV) return INT64_MAX;
 
     args->single_newmv[ref_mv_idx][refs[0]] = x->best_mv;
@@ -1671,7 +1707,7 @@
       const uint32_t cur_mv = mbmi->mv[0].as_int;
       assert(!is_comp_pred);
       if (have_newmv_in_inter_mode(this_mode)) {
-        av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv);
+        av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX);
         mbmi->mv[0].as_int = x->best_mv.as_int;
         tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
       }
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index d80914e..ce1ecd1 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -558,6 +558,8 @@
     sf->inter_sf.disable_onesided_comp = 1;
 
     sf->lpf_sf.disable_lr_filter = 1;
+
+    sf->mv_sf.reduce_search_range = 1;
   }
 }
 
@@ -920,6 +922,7 @@
   mv_sf->adaptive_motion_search = 0;
   mv_sf->use_accurate_subpel_search = USE_8_TAPS;
   mv_sf->disable_hash_me = 0;
+  mv_sf->reduce_search_range = 0;
 }
 
 static AOM_INLINE void init_inter_sf(INTER_MODE_SPEED_FEATURES *inter_sf) {
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 08ba89c..e6b2f29 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -480,6 +480,9 @@
   // Use to control hash generation and use of the same
   // Applicable only for screen contents
   int disable_hash_me;
+
+  // Reduce single motion search range based on MV result of prior ref_mv_idx.
+  int reduce_search_range;
 } MV_SPEED_FEATURES;
 
 typedef struct INTER_MODE_SPEED_FEATURES {
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index 20b9d4d..943b0fa 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c
@@ -89,7 +89,7 @@
   const search_site_config ss_cfg = cpi->ss_cfg[SS_CFG_LOOKAHEAD];
   const SEARCH_METHODS full_search_method = NSTEP;
   const int step_param = av1_init_search_range(
-      AOMMIN(frame_to_filter->y_crop_width, frame_to_filter->y_crop_height));
+      AOMMAX(frame_to_filter->y_crop_width, frame_to_filter->y_crop_height));
   const SUBPEL_SEARCH_TYPE subpel_search_type = USE_8_TAPS;
   const int allow_high_precision_mv = cpi->common.allow_high_precision_mv;
   const int subpel_iters_per_step = cpi->sf.mv_sf.subpel_iters_per_step;