Add speed feature to switch search pattern

Use 8-pts motion estimation in speed 3 and above. At speed 5, the
encoding time for bus_cif at 200 kbps changes from 314s to 281s, ~10%
faster. The corresponding compression performance loss is 0.5%,
as is consistent with speed 1 settings observation.

STATS_CHANGED

Change-Id: I784c8c41d1efd612cc7ece9a36dea9c52a70b07b
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 8875287..47b90ac 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -4054,14 +4054,9 @@
     av1_init_dsmotion_compensation(&cpi->ss_cfg[SS_CFG_LOOKAHEAD],
                                    y_stride_src);
   } else {
-    // Update the offsets in search_sites as y_stride can change due to scaled
-    // references. This update allows NSTEP to be used on scaled references as
-    // long as sf.mv.search_method is not DIAMOND. Currently in the codebae,
-    // sf.mv.search_method is never set to DIAMOND.
     av1_init3smotion_compensation(&cpi->ss_cfg[SS_CFG_SRC], y_stride);
     av1_init3smotion_compensation(&cpi->ss_cfg[SS_CFG_LOOKAHEAD], y_stride_src);
   }
-
   av1_init_motion_fpf(&cpi->ss_cfg[SS_CFG_FPF], y_stride);
 }
 
@@ -4116,6 +4111,9 @@
     seq_params->subsampling_y = subsampling_y;
     seq_params->use_highbitdepth = use_highbitdepth;
 
+    av1_set_speed_features_framesize_independent(cpi, cpi->oxcf.speed);
+    av1_set_speed_features_framesize_dependent(cpi, cpi->oxcf.speed);
+
     alloc_altref_frame_buffer(cpi);
     init_ref_frame_bufs(cpi);
     alloc_util_frame_buffers(cpi);
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index 893ef9d..1a4fd61 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -104,30 +104,33 @@
 }
 
 void av1_init_dsmotion_compensation(search_site_config *cfg, int stride) {
-  int len, ss_count = 0;
+  int ss_count = 0;
   int stage_index = MAX_MVSEARCH_STEPS - 1;
 
   cfg->ss[stage_index][0].mv.col = cfg->ss[stage_index][0].mv.row = 0;
   cfg->ss[stage_index][0].offset = 0;
   cfg->stride = stride;
 
-  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
-    // Generate offsets for 4 search sites per step.
-    const MV ss_mvs[5] = {
-      { 0, 0 }, { -len, 0 }, { len, 0 }, { 0, -len }, { 0, len }
+  for (int radius = MAX_FIRST_STEP; radius > 0; radius /= 2) {
+    int num_search_pts = 8;
+
+    const MV ss_mvs[13] = {
+      { 0, 0 },           { -radius, 0 },      { radius, 0 },
+      { 0, -radius },     { 0, radius },       { -radius, -radius },
+      { radius, radius }, { -radius, radius }, { radius, -radius },
     };
+
     int i;
-    for (i = 0; i < 5; ++i) {
+    for (i = 0; i <= num_search_pts; ++i) {
       search_site *const ss = &cfg->ss[stage_index][i];
       ss->mv = ss_mvs[i];
       ss->offset = ss->mv.row * stride + ss->mv.col;
     }
-    cfg->searches_per_step[stage_index] = 4;
-    cfg->radius[stage_index] = len;
+    cfg->searches_per_step[stage_index] = num_search_pts;
+    cfg->radius[stage_index] = radius;
     --stage_index;
     ++ss_count;
   }
-
   cfg->ss_count = ss_count;
 }
 
@@ -180,7 +183,7 @@
   int stage_index = 0;
   cfg->stride = stride;
   int radius = 1;
-  for (stage_index = 0; stage_index < 15; ++stage_index) {
+  for (stage_index = 0; stage_index < 12; ++stage_index) {
     int tan_radius = AOMMAX((int)(0.41 * radius), 1);
     int num_search_pts = 12;
     if (radius == 1) num_search_pts = 8;
@@ -208,7 +211,7 @@
     cfg->searches_per_step[stage_index] = num_search_pts;
     cfg->radius[stage_index] = radius;
     ++ss_count;
-    radius = (int)AOMMAX((radius * 1.4 + 0.5), radius + 1);
+    radius = (int)AOMMAX((radius * 1.5 + 0.5), radius + 1);
   }
   cfg->ss_count = ss_count;
 }
@@ -2336,6 +2339,7 @@
                           fn_ptr, 1, ref_mv);
       break;
     case NSTEP:
+    case DIAMOND:
       var = full_pixel_diamond(x, mvp_full, step_param, use_var, error_per_bit,
                                cfg->ss_count - 1 - step_param, cost_list,
                                fn_ptr, ref_mv, cfg, NULL, NULL, 0, 0);
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 06be216..9d192f9 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -422,7 +422,7 @@
     // sf->mv_sf.adaptive_motion_search = 1;
     sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED;
     sf->mv_sf.use_accurate_subpel_search = USE_2_TAPS;
-
+    sf->mv_sf.search_method = DIAMOND;
     sf->inter_sf.disable_sb_level_mv_cost_upd = 1;
     // TODO(yunqing): evaluate this speed feature for speed 1 & 2, and combine
     // it with cpi->sf.disable_wedge_search_var_thresh.