Forward step motion search

Allow forward step motion search at highest search scale. This
improves the motion search quality. In speed 1, the coding performance
of vbr mode at 150 frames changes:

      avg PSNR     overall PSNR     SSIM
low   -0.051	    -0.048	   -0.032
mid   -0.091	    -0.079	   -0.100
hd    -0.117	    -0.175	   -0.149

Most clips remain neutral, several gain between 1 - 4% in PSNR.

STATS_CHANGED

Change-Id: Ia197d5394c4847436d05925639676c3d01a0c3b1
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index e95041a..0eabacb 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -183,10 +183,11 @@
   int stage_index = 0;
   cfg->stride = stride;
   int radius = 1;
-  for (stage_index = 0; stage_index < 12; ++stage_index) {
+  for (stage_index = 0; stage_index < 15; ++stage_index) {
     int tan_radius = AOMMAX((int)(0.41 * radius), 1);
     int num_search_pts = 12;
     if (radius == 1) num_search_pts = 8;
+
     const MV ss_mvs[13] = {
       { 0, 0 },
       { -radius, 0 },
@@ -211,7 +212,8 @@
     cfg->searches_per_step[stage_index] = num_search_pts;
     cfg->radius[stage_index] = radius;
     ++ss_count;
-    radius = (int)AOMMAX((radius * 1.5 + 0.5), radius + 1);
+    if (stage_index < 12)
+      radius = (int)AOMMAX((radius * 1.5 + 0.5), radius + 1);
   }
   cfg->ss_count = ss_count;
 }
@@ -1792,9 +1794,11 @@
 
   bestsad += mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
 
+  int next_step_size = tot_steps > 2 ? cfg->radius[tot_steps - 2] : 1;
   for (int step = tot_steps - 1; step >= 0; --step) {
     const search_site *ss = cfg->ss[step];
     best_site = 0;
+    if (step > 0) next_step_size = cfg->radius[step - 1];
 
     int all_in = 1, j;
     // Trap illegal vectors
@@ -1866,6 +1870,14 @@
     }
 
     if (is_off_center == 0) (*num00)++;
+
+    if (best_site == 0) {
+      while (next_step_size == cfg->radius[step] && step > 2) {
+        ++(*num00);
+        --step;
+        next_step_size = cfg->radius[step - 1];
+      }
+    }
   }
 
   return bestsad;