Extend sf skip_fullpel_search_using_startmv to cpu5

This CL improves and extends the speed feature
skip_fullpel_search_using_startmv to cpu 5. The full pixel
search is skipped by comparing the start mv candidate with
start mvs corresponding to previous ref_mv_idx for below
conditions.

- The sum of absolute mv difference is <= 1
- When complete motion search is performed for the start mv
  used for comparision

single_newmv_valid check is also extended to cpu 6.

cpu-used  Instruction Count      BD-Rate Loss(%)
            Reduction(%)    avg.psnr  ovr.psnr  ssim
    5         0.938         0.0233    0.0136   -0.0002
    6        -0.074        -0.0078   -0.0104    0.0069

STATS_CHANGED

Change-Id: I14ef0be4854dba3eee144e254d47b324d56cdd82
diff --git a/av1/encoder/interp_search.h b/av1/encoder/interp_search.h
index 8eba483..bce494e 100644
--- a/av1/encoder/interp_search.h
+++ b/av1/encoder/interp_search.h
@@ -126,6 +126,11 @@
   FULLPEL_MV start_mv_stack[(MAX_REF_MV_SEARCH - 1) * 2];
 
   /*!
+   * Stack to store ref_mv_idx of NEWMV mode.
+   */
+  uint8_t ref_mv_idx_stack[(MAX_REF_MV_SEARCH - 1) * 2];
+
+  /*!
    * Count of mvs in start mv stack.
    */
   int start_mv_cnt;
diff --git a/av1/encoder/motion_search_facade.c b/av1/encoder/motion_search_facade.c
index 4b6136d..da6729c 100644
--- a/av1/encoder/motion_search_facade.c
+++ b/av1/encoder/motion_search_facade.c
@@ -192,23 +192,42 @@
 
       // Check difference between mvs in the stack and candidate mv.
       for (int stack_idx = 0; stack_idx < stack_size; stack_idx++) {
-        FULLPEL_MV *fmv_stack = &args->start_mv_stack[stack_idx];
-        const int row = abs(fmv_stack->row - fmv_cand->as_fullmv.row);
-        const int col = abs(fmv_stack->col - fmv_cand->as_fullmv.col);
+        const uint8_t this_ref_mv_idx = args->ref_mv_idx_stack[stack_idx];
+        const FULLPEL_MV *fmv_stack = &args->start_mv_stack[stack_idx];
+        const int this_newmv_valid =
+            args->single_newmv_valid[this_ref_mv_idx][ref];
+        const int row_diff = abs(fmv_stack->row - fmv_cand->as_fullmv.row);
+        const int col_diff = abs(fmv_stack->col - fmv_cand->as_fullmv.col);
 
-        if (row <= 1 && col <= 1) {
-          skip_cand_mv = 1;
-          break;
+        if (!this_newmv_valid) continue;
+
+        if (cpi->sf.mv_sf.skip_fullpel_search_using_startmv >= 2) {
+          // Prunes the current start_mv candidate, if the absolute mv
+          // difference of both row and column are <= 1.
+          if (row_diff <= 1 && col_diff <= 1) {
+            skip_cand_mv = 1;
+            break;
+          }
+        } else if (cpi->sf.mv_sf.skip_fullpel_search_using_startmv >= 1) {
+          // Prunes the current start_mv candidate, if the sum of the absolute
+          // mv difference of row and column is <= 1.
+          if (row_diff + col_diff <= 1) {
+            skip_cand_mv = 1;
+            break;
+          }
         }
       }
       if (skip_cand_mv) {
+        // Ensure atleast one full-pel motion search is not pruned.
+        assert(mbmi->ref_mv_idx != 0);
         // Mark the candidate mv as invalid so that motion search gets skipped.
         cand[cand_idx].fmv.as_int = INVALID_MV;
       } else {
-        // Store start mv candidate of full-pel search in the mv stack (except
-        // last ref_mv_idx).
+        // Store start_mv candidate and corresponding ref_mv_idx of full-pel
+        // search in the mv stack (except last ref_mv_idx).
         if (mbmi->ref_mv_idx != MAX_REF_MV_SEARCH - 1) {
           args->start_mv_stack[args->start_mv_cnt] = fmv_cand->as_fullmv;
+          args->ref_mv_idx_stack[args->start_mv_cnt] = mbmi->ref_mv_idx;
           args->start_mv_cnt++;
           assert(args->start_mv_cnt <= (MAX_REF_MV_SEARCH - 1) * 2);
         }
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 67f5e41..7ce91a9 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -5725,6 +5725,7 @@
                                interintra_modes,
                                { { { 0 }, { { 0 } }, { 0 }, 0, 0, 0, 0 } },
                                { { 0, 0 } },
+                               { 0 },
                                0,
                                0,
                                -1,
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index cf402f1..c3b00ff 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1208,6 +1208,7 @@
                                                                           : 2;
 
     sf->mv_sf.warp_search_method = WARP_SEARCH_DIAMOND;
+    sf->mv_sf.skip_fullpel_search_using_startmv = boosted ? 0 : 1;
 
     sf->inter_sf.prune_inter_modes_if_skippable = 1;
     sf->inter_sf.txfm_rd_gate_level = boosted ? 0 : 4;
@@ -1260,7 +1261,7 @@
 
     sf->mv_sf.simple_motion_subpel_force_stop = FULL_PEL;
     sf->mv_sf.use_bsize_dependent_search_method = 1;
-    sf->mv_sf.skip_fullpel_search_using_startmv = boosted ? 0 : 1;
+    sf->mv_sf.skip_fullpel_search_using_startmv = boosted ? 0 : 2;
 
     sf->tpl_sf.gop_length_decision_method = 3;
 
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 2692088..ae9b99e 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -821,6 +821,9 @@
   int disable_second_mv;
 
   // Skips full pixel search based on start mv of prior ref_mv_idx.
+  // 0: Disabled
+  // 1: Skips the full pixel search upto 4 neighbor full-pel MV positions.
+  // 2: Skips the full pixel search upto 8 neighbor full-pel MV positions.
   int skip_fullpel_search_using_startmv;
 
   // Method to use for refining WARPED_CAUSAL motion vectors