RTC: Adaptively downgrade subpixel search

This commit adds the use_adaptive_subpel_search speed feature, which
downgrades the subpel search to av1_find_best_sub_pixel_tree_pruned_more
when either x->content_state_sb.source_sad_nonrd <= kLowSad, or the
corresponding fullpel search returns low residue variance.

For reference, if the encoder always uses
av1_find_best_sub_pixel_tree_pruned_more, there is about 1.97% encoding
time reduction @ 0.366% BDRate loss.

| SPD_SET | TESTSET  | AVG_PSNR | OVR_PSNR |  SSIM   | ENC_T |
|---------|----------|----------|----------|---------|-------|
|    7    |   rtc    | +0.015%  | +0.039%  | +0.085% | -1.2% |
|    7    | rtc_derf | +0.018%  | +0.051%  | -0.008% | -1.1% |
|---------|----------|----------|----------|---------|-------|
|    8    |   rtc    | -0.027%  | -0.051%  | -0.073% | -1.3% |
|    8    | rtc_derf | -0.286%  | -0.293%  | -0.310% | -1.1% |

STATS_CHANGED

Change-Id: I623f138694c1e85b1ac8112027d6adbaec6517d7
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index 47dbf87..39c2a87 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -266,10 +266,11 @@
   MACROBLOCKD *xd = &x->e_mbd;
   const AV1_COMMON *cm = &cpi->common;
   const int num_planes = av1_num_planes(cm);
+  const SPEED_FEATURES *sf = &cpi->sf;
   MB_MODE_INFO *mi = xd->mi[0];
   struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } };
-  int step_param = (cpi->sf.rt_sf.fullpel_search_step_param)
-                       ? cpi->sf.rt_sf.fullpel_search_step_param
+  int step_param = (sf->rt_sf.fullpel_search_step_param)
+                       ? sf->rt_sf.fullpel_search_step_param
                        : cpi->mv_search_params.mv_step_param;
   FULLPEL_MV start_mv;
   const int ref = mi->ref_frame[0];
@@ -299,7 +300,7 @@
   else
     center_mv = tmp_mv->as_mv;
 
-  const SEARCH_METHODS search_method = cpi->sf.mv_sf.search_method;
+  const SEARCH_METHODS search_method = sf->mv_sf.search_method;
   const MotionVectorSearchParams *mv_search_params = &cpi->mv_search_params;
   const int ref_stride = xd->plane[0].pre[0].stride;
   const search_site_config *src_search_sites = av1_get_search_site_config(
@@ -326,27 +327,37 @@
     SUBPEL_MOTION_SEARCH_PARAMS ms_params;
     av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, &ref_mv,
                                       cost_list);
-    if (cpi->sf.rt_sf.reduce_mv_pel_precision &&
-        cpi->sf.mv_sf.subpel_force_stop < HALF_PEL)
+    if (sf->rt_sf.reduce_mv_pel_precision &&
+        sf->mv_sf.subpel_force_stop < HALF_PEL)
       ms_params.forced_stop = subpel_select(cpi, x, bsize, tmp_mv);
-    if (cpi->sf.rt_sf.reduce_zeromv_mvres && ref_mv.row == 0 &&
-        ref_mv.col == 0 && start_mv.row == 0 && start_mv.col == 0) {
+    const bool fullpel_performed_well =
+        (bsize == BLOCK_64X64 && full_var_rd * 40 < 62267 * 7) ||
+        (bsize == BLOCK_32X32 && full_var_rd * 8 < 42380) ||
+        (bsize == BLOCK_16X16 && full_var_rd * 8 < 10127);
+    if (sf->rt_sf.reduce_zeromv_mvres && ref_mv.row == 0 && ref_mv.col == 0 &&
+        start_mv.row == 0 && start_mv.col == 0) {
       // If both the refmv and the fullpel results show zero mv, then there is
       // high likelihood that the current block is static. So we can try to
       // reduce the mv resolution here.
       // These thresholds are the mean var rd collected from multiple encoding
       // runs.
-      if ((bsize == BLOCK_64X64 && full_var_rd * 40 < 62267 * 7) ||
-          (bsize == BLOCK_32X32 && full_var_rd * 8 < 42380) ||
-          (bsize == BLOCK_16X16 && full_var_rd * 8 < 10127)) {
+      if (fullpel_performed_well) {
         ms_params.forced_stop = HALF_PEL;
       }
     }
 
     MV subpel_start_mv = get_mv_from_fullmv(&tmp_mv->as_fullmv);
-    cpi->mv_search_params.find_fractional_mv_step(
-        xd, cm, &ms_params, subpel_start_mv, &tmp_mv->as_mv, &dis,
-        &x->pred_sse[ref], NULL);
+    if (sf->rt_sf.use_adaptive_subpel_search &&
+        (fullpel_performed_well ||
+         x->content_state_sb.source_sad_nonrd <= kLowSad)) {
+      av1_find_best_sub_pixel_tree_pruned_more(xd, cm, &ms_params,
+                                               subpel_start_mv, &tmp_mv->as_mv,
+                                               &dis, &x->pred_sse[ref], NULL);
+    } else {
+      cpi->mv_search_params.find_fractional_mv_step(
+          xd, cm, &ms_params, subpel_start_mv, &tmp_mv->as_mv, &dis,
+          &x->pred_sse[ref], NULL);
+    }
 
     *rate_mv =
         av1_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->mv_costs->nmv_joint_cost,
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index f6fc32a..95340cc 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1679,6 +1679,7 @@
     sf->rt_sf.prune_compoundmode_with_singlecompound_var = true;
     sf->rt_sf.prune_compoundmode_with_singlemode_var = true;
     sf->rt_sf.skip_compound_based_on_var = true;
+    sf->rt_sf.use_adaptive_subpel_search = true;
   }
 
   if (speed >= 8) {
@@ -1709,6 +1710,7 @@
     sf->rt_sf.frame_level_mode_cost_update = true;
     sf->rt_sf.check_only_zero_zeromv_on_large_blocks = true;
     sf->rt_sf.reduce_mv_pel_precision = 0;
+    sf->rt_sf.use_adaptive_subpel_search = false;
     // For multi-thread use case with row_mt enabled, enable top right
     // dependency wait of threads at mi level.
     if ((cpi->oxcf.row_mt == 1) && (cpi->mt_info.num_workers > 1)) {
@@ -2062,6 +2064,7 @@
   rt_sf->skip_compound_based_on_var = false;
   rt_sf->top_right_sync_wait_in_mis = false;
   rt_sf->set_zeromv_skip_based_on_source_sad = 1;
+  rt_sf->use_adaptive_subpel_search = false;
 }
 
 // Populate appropriate sub-pel search method based on speed feature and user
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 0c23572..6261b4c 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -1623,6 +1623,10 @@
   // 1: If source sad is kZeroSad
   // 2: If source sad <= kVeryLowSad
   int set_zeromv_skip_based_on_source_sad;
+
+  // Downgrades the subpel search to av1_find_best_sub_pixel_tree_pruned_more
+  // when either the fullpel search performed well, or when zeromv has low sad.
+  bool use_adaptive_subpel_search;
 } REAL_TIME_SPEED_FEATURES;
 
 /*!\endcond */