Use small interval in exhaustive search for screen content.

Summary:
Use a smaller search step/interval for exhaustive search.
BUG=aomedia:2650

Results:
(1). Clip "Wikipedia" gains -12.1% (AWCY, 1 pass, fixed arf mode)
(2). Screen content sets gains -2%. (2 pass, q mode)
"sc_web_browsing" and "chromoting_numbers" gain -10% each.

Speed:
30-40%, or even double the encoding time due to more motion search.

This is only applied for the ARF, when the video is detected as
screen content type on low speed setting (speed <= 2).

For screen content videos, our NSTEP/Diamond motion search sometimes
fails to find the best matching block.
This CL [1].https://aomedia-review.googlesource.com/c/aom/+/108861
reduces the threshold to allow more exhaustive search and improves
motion search.

This CL is based on [1], and forces to use a smaller search interval
in the exhaustive search based on the observation that the current
exhaustive search still misses the best candidate block.
The reason is that the first iteration of exhaustive search uses a
large search interval. And a local minima is found, which is far away
from the true matching block. The best candidate block position
is used as the center for following searches. Both search interval
and range are reduced in the following iterations. Since the initial
candidate is far away from the truth, these following iterations
still can't find the global minima, leading to suboptimal prediction
quality. This happens frequently for videos with a lot of texts,
where the inital search is easily stuck by a local minima due to
large search interval.

The best example is clip "Wikipedia". CL [1] gains 5-6% by using
more exhaustive search. This CL is based on it and gains 12%.

Using smaller interval results in more searches. In order to
reduce speed overhead, this CL only applies on the ARF.
Local tests shows less than 10% more encoding time as compared to [1].

STATS_CHANGED

Change-Id: I10f37ec073d94ee464552bf3870d3efe6e45a8fb
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index 724e4ac..706c24d 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@@ -228,7 +228,8 @@
       &cpi->mv_search_params.ss_cfg[SS_CFG_FPF];
   FULLPEL_MOTION_SEARCH_PARAMS ms_params;
   av1_make_default_fullpel_ms_params(&ms_params, cpi, x, bsize, ref_mv,
-                                     first_pass_search_sites);
+                                     first_pass_search_sites,
+                                     /*fine_search_interval=*/0);
   ms_params.search_method = NSTEP;
 
   FULLPEL_MV this_best_mv;
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index 43f7f5c..46737bf 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -54,10 +54,12 @@
   ms_buffers->obmc_mask = x->mask_buf;
 }
 
-void av1_make_default_fullpel_ms_params(
-    FULLPEL_MOTION_SEARCH_PARAMS *ms_params, const struct AV1_COMP *cpi,
-    const MACROBLOCK *x, BLOCK_SIZE bsize, const MV *ref_mv,
-    const search_site_config *search_sites) {
+void av1_make_default_fullpel_ms_params(FULLPEL_MOTION_SEARCH_PARAMS *ms_params,
+                                        const struct AV1_COMP *cpi,
+                                        const MACROBLOCK *x, BLOCK_SIZE bsize,
+                                        const MV *ref_mv,
+                                        const search_site_config *search_sites,
+                                        const int fine_search_interval) {
   // High level params
   ms_params->bsize = bsize;
   ms_params->vfp = &cpi->fn_ptr[bsize];
@@ -72,6 +74,7 @@
   ms_params->force_mesh_thresh = cpi->sf.mv_sf.exhaustive_searches_thresh;
   ms_params->prune_mesh_search = cpi->sf.mv_sf.prune_mesh_search;
   ms_params->run_mesh_search = 0;
+  ms_params->fine_search_interval = fine_search_interval;
 
   ms_params->is_intra_mode = 0;
 
@@ -1295,6 +1298,15 @@
   range = AOMMAX(range, (5 * AOMMAX(abs(best_mv->row), abs(best_mv->col))) / 4);
   range = AOMMIN(range, kMaxRange);
   interval = AOMMAX(interval, range / baseline_interval_divisor);
+  // Use a small search step/interval for certain kind of clips.
+  // For example, screen content clips with a lot of texts.
+  // Large interval could lead to a false matching position, and it can't find
+  // the best global candidate in following iterations due to reduced search
+  // range. The solution here is to use a small search iterval in the beginning
+  // and thus reduces the chance of missing the best candidate.
+  if (ms_params->fine_search_interval) {
+    interval = AOMMIN(interval, 4);
+  }
 
   // initial search
   bestsme = exhaustive_mesh_search(*best_mv, ms_params, range, interval,
@@ -1461,11 +1473,11 @@
 
   // Should we allow a follow on exhaustive search?
   if (!run_mesh_search && search_method == NSTEP) {
-    int exhuastive_thr = ms_params->force_mesh_thresh;
-    exhuastive_thr >>=
+    int exhaustive_thr = ms_params->force_mesh_thresh;
+    exhaustive_thr >>=
         10 - (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]);
     // Threshold variance for an exhaustive full search.
-    if (var > exhuastive_thr) run_mesh_search = 1;
+    if (var > exhaustive_thr) run_mesh_search = 1;
   }
 
   // TODO(yunqing): the following is used to reduce mesh search in temporal
diff --git a/av1/encoder/mcomp.h b/av1/encoder/mcomp.h
index 73135d8..7d1b3f0 100644
--- a/av1/encoder/mcomp.h
+++ b/av1/encoder/mcomp.h
@@ -145,6 +145,10 @@
                           // higher than the threshold.
   const struct MESH_PATTERN *mesh_patterns[2];
 
+  // Use maximum search interval of 4 if true. This helps motion search to find
+  // the best motion vector for screen content types.
+  int fine_search_interval;
+
   int is_intra_mode;
 
   int fast_obmc_search;
@@ -157,7 +161,8 @@
                                         const struct AV1_COMP *cpi,
                                         const MACROBLOCK *x, BLOCK_SIZE bsize,
                                         const MV *ref_mv,
-                                        const search_site_config *search_sites);
+                                        const search_site_config *search_sites,
+                                        int fine_search_interval);
 
 // Sets up configs for fullpixel diamond search
 void av1_init_dsmotion_compensation(search_site_config *cfg, int stride);
diff --git a/av1/encoder/motion_search_facade.c b/av1/encoder/motion_search_facade.c
index 8db1423..207cb40 100644
--- a/av1/encoder/motion_search_facade.c
+++ b/av1/encoder/motion_search_facade.c
@@ -37,6 +37,13 @@
   return 0;
 }
 
+// Allow more mesh searches for screen content type on the ARF.
+static int use_fine_search_interval(const AV1_COMP *const cpi) {
+  return cpi->is_screen_content_type &&
+         cpi->gf_group.update_type[cpi->gf_group.index] == ARF_UPDATE &&
+         cpi->oxcf.speed <= 2;
+}
+
 void av1_single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
                               BLOCK_SIZE bsize, int ref_idx, int *rate_mv,
                               int search_range, inter_mode_info *mode_info,
@@ -206,11 +213,13 @@
   int_mv second_best_mv;
   best_mv->as_int = second_best_mv.as_int = INVALID_MV;
 
+  // Allow more mesh searches for screen content type on the ARF.
+  const int fine_search_interval = use_fine_search_interval(cpi);
   const search_site_config *src_search_sites =
       &mv_search_params->ss_cfg[SS_CFG_SRC];
   FULLPEL_MOTION_SEARCH_PARAMS full_ms_params;
   av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize, &ref_mv,
-                                     src_search_sites);
+                                     src_search_sites, fine_search_interval);
 
   switch (mbmi->motion_mode) {
     case SIMPLE_TRANSLATION: {
@@ -453,7 +462,8 @@
     // Make motion search params
     FULLPEL_MOTION_SEARCH_PARAMS full_ms_params;
     av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize,
-                                       &ref_mv[id].as_mv, NULL);
+                                       &ref_mv[id].as_mv, NULL,
+                                       /*fine_search_interval=*/0);
     av1_set_ms_compound_refs(&full_ms_params.ms_buffers, second_pred, mask,
                              mask_stride, id);
 
@@ -576,7 +586,8 @@
   // Make motion search params
   FULLPEL_MOTION_SEARCH_PARAMS full_ms_params;
   av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize,
-                                     &ref_mv.as_mv, NULL);
+                                     &ref_mv.as_mv, NULL,
+                                     /*fine_search_interval=*/0);
   av1_set_ms_compound_refs(&full_ms_params.ms_buffers, second_pred, mask,
                            mask_stride, ref_idx);
 
@@ -790,9 +801,11 @@
                          num_planes);
   }
 
+  // Allow more mesh searches for screen content type on the ARF.
+  const int fine_search_interval = use_fine_search_interval(cpi);
   FULLPEL_MOTION_SEARCH_PARAMS full_ms_params;
   av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize, &ref_mv,
-                                     src_search_sites);
+                                     src_search_sites, fine_search_interval);
 
   var = av1_full_pixel_search(start_mv, &full_ms_params, step_param,
                               cond_cost_list(cpi, cost_list),
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index d3880cd..a442b3b 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -157,7 +157,8 @@
       &cpi->mv_search_params.ss_cfg[SS_CFG_SRC];
   FULLPEL_MOTION_SEARCH_PARAMS full_ms_params;
   av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize, &center_mv,
-                                     src_search_sites);
+                                     src_search_sites,
+                                     /*fine_search_interval=*/0);
 
   av1_full_pixel_search(start_mv, &full_ms_params, step_param,
                         cond_cost_list(cpi, cost_list), &tmp_mv->as_fullmv,
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 79cf166..3c3fccd 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -2703,7 +2703,8 @@
   const search_site_config *lookahead_search_sites =
       &cpi->mv_search_params.ss_cfg[SS_CFG_LOOKAHEAD];
   av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
-                                     &dv_ref.as_mv, lookahead_search_sites);
+                                     &dv_ref.as_mv, lookahead_search_sites,
+                                     /*fine_search_interval=*/0);
   fullms_params.is_intra_mode = 1;
 
   for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 387e63b..f1833b6 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -327,10 +327,12 @@
   sf->rt_sf.use_nonrd_pick_mode = 0;
   sf->rt_sf.use_real_time_ref_set = 0;
 
-  if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION)
-    sf->mv_sf.exhaustive_searches_thresh = (1 << 24);
-  else
+  if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION ||
+      cpi->is_screen_content_type) {
+    sf->mv_sf.exhaustive_searches_thresh = (1 << 20);
+  } else {
     sf->mv_sf.exhaustive_searches_thresh = (1 << 25);
+  }
 
   sf->rd_sf.perform_coeff_opt = 1;
 
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index 606adeb..4562024 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c
@@ -130,7 +130,8 @@
   mb->mv_cost_type = mv_cost_type;
 
   av1_make_default_fullpel_ms_params(&full_ms_params, cpi, mb, block_size,
-                                     &baseline_mv, &ss_cfg);
+                                     &baseline_mv, &ss_cfg,
+                                     /*fine_search_interval=*/0);
   full_ms_params.run_mesh_search = 1;
   full_ms_params.search_method = full_search_method;
   av1_full_pixel_search(start_mv, &full_ms_params, step_param,
@@ -178,8 +179,9 @@
         mbd->plane[0].pre[0].buf = ref_frame->y_buffer + y_offset + offset;
         mb->mv_cost_type = mv_cost_type;
 
-        av1_make_default_fullpel_ms_params(
-            &full_ms_params, cpi, mb, subblock_size, &baseline_mv, &ss_cfg);
+        av1_make_default_fullpel_ms_params(&full_ms_params, cpi, mb,
+                                           subblock_size, &baseline_mv, &ss_cfg,
+                                           /*fine_search_interval=*/0);
         full_ms_params.run_mesh_search = 1;
         full_ms_params.search_method = full_search_method;
         av1_full_pixel_search(start_mv, &full_ms_params, step_param,
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index e267830..db8956e 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -150,7 +150,7 @@
 
   FULLPEL_MOTION_SEARCH_PARAMS full_ms_params;
   av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize, &center_mv,
-                                     ss_cfg);
+                                     ss_cfg, /*fine_search_interval=*/0);
 
   av1_full_pixel_search(start_mv, &full_ms_params, step_param,
                         cond_cost_list(cpi, cost_list), &best_mv->as_fullmv,