rtc: refactor reduce_mv_pel_precision and reduce_zeromv_mvres * The sf reduce_mv_pel_precision is refactored into two sfs: reduce_mv_pel_precision_highmot and reduce_mv_pel_precision_lowcomplex. * The sf reduce_zeromv_mvres is made a part of reduce_mv_pel_precision_lowcomplex as level 1 This change is bit-exact. Change-Id: I35fb604b1c31100fc29104c7129357d9e139f6f6

commit: 14b2f3f4710e909aec3e04e2772b07d01bb2d547 [log] [tgz]
author: Neeraj Gadgil <neeraj.gadgil@ittiam.com> Mon Aug 29 09:42:17 2022 +0530
committer: ranjit tulabandu <ranjit.tulabandu@ittiam.com> Sun Sep 04 07:02:37 2022 +0000
tree: ace876b78558b416f1f44999187da7e2482f59d7
parent: 34f2a6eaf5d0c49bd55dddfe8be62789043fb396 [diff]
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index df12354..ce5d67d 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c

@@ -175,9 +175,10 @@
 }
 
 static INLINE int subpel_select(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
-                                int_mv *mv) {
-  assert(cpi->sf.rt_sf.reduce_mv_pel_precision);
-  if (cpi->sf.rt_sf.reduce_mv_pel_precision == 2) {
+                                int_mv *mv, MV ref_mv, FULLPEL_MV start_mv,
+                                bool fullpel_performed_well) {
+  // Reduce MV precision to halfpel for higher int MV value& frame-level motion
+  if (cpi->sf.rt_sf.reduce_mv_pel_precision_highmotion == 1) {
     int mv_thresh = 4;
     const int is_low_resoln =
         (cpi->common.width * cpi->common.height <= 320 * 240);
@@ -188,9 +189,10 @@
     if (abs(mv->as_fullmv.row) >= mv_thresh ||
         abs(mv->as_fullmv.col) >= mv_thresh)
       return HALF_PEL;
-  } else if (cpi->sf.rt_sf.reduce_mv_pel_precision == 1) {
-    // Reduce MV precision for relatively static (e.g. background), low-complex
-    // large areas
+  }
+  // Reduce MV precision for relatively static (e.g. background), low-complex
+  // large areas
+  if (cpi->sf.rt_sf.reduce_mv_pel_precision_lowcomplex >= 2) {
     const int qband = x->qindex >> (QINDEX_BITS - 2);
     assert(qband < 4);
     if (x->content_state_sb.source_sad_nonrd <= kVeryLowSad &&
@@ -200,6 +202,10 @@
       else if (x->source_variance < 5000)
         return HALF_PEL;
     }
+  } else if (cpi->sf.rt_sf.reduce_mv_pel_precision_lowcomplex >= 1) {
+    if (fullpel_performed_well && ref_mv.row == 0 && ref_mv.col == 0 &&
+        start_mv.row == 0 && start_mv.col == 0)
+      return HALF_PEL;
   }
   return cpi->sf.mv_sf.subpel_force_stop;
 }
@@ -296,24 +302,14 @@
     SUBPEL_MOTION_SEARCH_PARAMS ms_params;
     av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, &ref_mv,
                                       cost_list);
-    if (sf->rt_sf.reduce_mv_pel_precision &&
-        sf->mv_sf.subpel_force_stop < HALF_PEL)
-      ms_params.forced_stop = subpel_select(cpi, x, bsize, tmp_mv);
     const bool fullpel_performed_well =
         (bsize == BLOCK_64X64 && full_var_rd * 40 < 62267 * 7) ||
         (bsize == BLOCK_32X32 && full_var_rd * 8 < 42380) ||
         (bsize == BLOCK_16X16 && full_var_rd * 8 < 10127);
-    if (sf->rt_sf.reduce_zeromv_mvres && ref_mv.row == 0 && ref_mv.col == 0 &&
-        start_mv.row == 0 && start_mv.col == 0) {
-      // If both the refmv and the fullpel results show zero mv, then there is
-      // high likelihood that the current block is static. So we can try to
-      // reduce the mv resolution here.
-      // These thresholds are the mean var rd collected from multiple encoding
-      // runs.
-      if (fullpel_performed_well) {
-        ms_params.forced_stop = HALF_PEL;
-      }
-    }
+    if (sf->rt_sf.reduce_mv_pel_precision_highmotion ||
+        sf->rt_sf.reduce_mv_pel_precision_lowcomplex)
+      ms_params.forced_stop = subpel_select(cpi, x, bsize, tmp_mv, ref_mv,
+                                            start_mv, fullpel_performed_well);
 
     MV subpel_start_mv = get_mv_from_fullmv(&tmp_mv->as_fullmv);
     if (sf->rt_sf.use_adaptive_subpel_search &&
@@ -402,9 +398,12 @@
 
     SUBPEL_MOTION_SEARCH_PARAMS ms_params;
     av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, &ref_mv, NULL);
-    if (cpi->sf.rt_sf.reduce_mv_pel_precision &&
-        cpi->sf.mv_sf.subpel_force_stop < HALF_PEL)
-      ms_params.forced_stop = subpel_select(cpi, x, bsize, &best_mv);
+    if (cpi->sf.rt_sf.reduce_mv_pel_precision_highmotion ||
+        cpi->sf.rt_sf.reduce_mv_pel_precision_lowcomplex) {
+      FULLPEL_MV start_mv = { .row = 0, .col = 0 };
+      ms_params.forced_stop =
+          subpel_select(cpi, x, bsize, &best_mv, ref_mv, start_mv, false);
+    }
     MV start_mv = get_mv_from_fullmv(&best_mv.as_fullmv);
     cpi->mv_search_params.find_fractional_mv_step(
         xd, cm, &ms_params, start_mv, &best_mv.as_mv, &dis,

diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 64b5ee0..80457a2 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c

@@ -1292,7 +1292,7 @@
     if (speed >= 10) {
       sf->rt_sf.skip_intra_pred = 2;
       sf->rt_sf.hybrid_intra_pickmode = 3;
-      sf->rt_sf.reduce_zeromv_mvres = true;
+      sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 1;
     }
   } else {
     sf->rt_sf.prune_intra_mode_based_on_mv_range = 2;
@@ -1344,14 +1344,14 @@
   } else {
     if (speed >= 6) sf->rt_sf.skip_newmv_mode_based_on_sse = 3;
     if (speed == 7) sf->rt_sf.prefer_large_partition_blocks = 0;
-    if (speed >= 7) sf->rt_sf.reduce_mv_pel_precision = 1;
+    if (speed >= 7) sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 2;
     if (speed >= 9) {
       sf->rt_sf.sad_based_adp_altref_lag = 1;
-      sf->rt_sf.reduce_mv_pel_precision = 0;
+      sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 0;
     }
     if (speed >= 10) {
       sf->rt_sf.sad_based_adp_altref_lag = 3;
-      sf->rt_sf.reduce_mv_pel_precision = 2;
+      sf->rt_sf.reduce_mv_pel_precision_highmotion = 1;
     }
   }
   // Setting for SVC, or when the ref_frame_config control is
@@ -1369,11 +1369,11 @@
       sf->mv_sf.search_method = NSTEP;
       sf->mv_sf.subpel_search_method = SUBPEL_TREE;
       sf->rt_sf.fullpel_search_step_param = 6;
-      sf->rt_sf.reduce_mv_pel_precision = 0;
+      sf->rt_sf.reduce_mv_pel_precision_highmotion = 0;
     }
     if (speed >= 8) {
       sf->rt_sf.disable_cdf_update_non_reference_frame = true;
-      sf->rt_sf.reduce_mv_pel_precision = 2;
+      sf->rt_sf.reduce_mv_pel_precision_highmotion = 1;
       if (rtc_ref->non_reference_frame) {
         sf->rt_sf.nonrd_agressive_skip = 1;
         sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
@@ -1411,8 +1411,8 @@
       sf->rt_sf.nonrd_prune_ref_frame_search = 3;
       sf->rt_sf.var_part_split_threshold_shift = 10;
       sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
-      sf->rt_sf.reduce_mv_pel_precision = 2;
-      sf->rt_sf.reduce_zeromv_mvres = true;
+      sf->rt_sf.reduce_mv_pel_precision_highmotion = 1;
+      sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 1;
       sf->rt_sf.screen_content_cdef_filter_qindex_thresh = 20;
     }
     if (speed >= 10) {
@@ -1720,7 +1720,7 @@
     sf->rt_sf.var_part_based_on_qidx = 0;
     sf->rt_sf.frame_level_mode_cost_update = true;
     sf->rt_sf.check_only_zero_zeromv_on_large_blocks = true;
-    sf->rt_sf.reduce_mv_pel_precision = 0;
+    sf->rt_sf.reduce_mv_pel_precision_highmotion = 0;
     sf->rt_sf.use_adaptive_subpel_search = false;
     // For multi-thread use case with row_mt enabled, enable top right
     // dependency wait of threads at mi level.
@@ -1737,7 +1737,7 @@
     sf->rt_sf.nonrd_prune_ref_frame_search = 3;
     sf->rt_sf.var_part_split_threshold_shift = 10;
     sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
-    sf->rt_sf.reduce_mv_pel_precision = 2;
+    sf->rt_sf.reduce_mv_pel_precision_highmotion = 1;
   }
 }
 
@@ -2049,7 +2049,8 @@
   rt_sf->prune_inter_modes_with_golden_ref = 0;
   rt_sf->prune_inter_modes_wrt_gf_arf_based_on_sad = 0;
   rt_sf->prune_inter_modes_using_temp_var = 0;
-  rt_sf->reduce_mv_pel_precision = 0;
+  rt_sf->reduce_mv_pel_precision_highmotion = 0;
+  rt_sf->reduce_mv_pel_precision_lowcomplex = 0;
   rt_sf->prune_intra_mode_based_on_mv_range = 0;
   rt_sf->var_part_split_threshold_shift = 7;
   rt_sf->gf_refresh_based_on_qp = 0;
@@ -2062,7 +2063,6 @@
   rt_sf->partition_direct_merging = 0;
   rt_sf->var_part_based_on_qidx = 0;
   rt_sf->tx_size_level_based_on_qstep = 0;
-  rt_sf->reduce_zeromv_mvres = false;
   rt_sf->vbp_prune_16x16_split_using_min_max_sub_blk_var = false;
   rt_sf->prune_compoundmode_with_singlecompound_var = false;
   rt_sf->frame_level_mode_cost_update = false;

diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 64e2b6c..4cbce7d 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h

@@ -1505,10 +1505,15 @@
   // variance wrt LAST reference.
   int prune_inter_modes_using_temp_var;
 
-  // Reduce MV precision at block level, represents various algos (0: disabled)
-  // 1: switch to halfpel, fullpel based on blk SAD, source var, bsize and qp
-  // 2: switch to halfpel based on integer mv size, bsize, frame-level motion
-  int reduce_mv_pel_precision;
+  // Reduce MV precision to halfpel for higher int MV value & frame-level motion
+  int reduce_mv_pel_precision_highmotion;
+
+  // Reduce MV precision for low complexity blocks
+  // 0: disabled
+  // 1: Reduce the mv resolution for zero mv if the variance is low
+  // 2: Switch to halfpel, fullpel based on low block spatial-temporal
+  // complexity.
+  int reduce_mv_pel_precision_lowcomplex;
 
   // Prune intra mode evaluation in inter frames based on mv range.
   BLOCK_SIZE prune_intra_mode_based_on_mv_range;
@@ -1556,9 +1561,6 @@
   // Level of aggressiveness for obtaining tx size based on qstep
   int tx_size_level_based_on_qstep;
 
-  // Reduce the mv resolution for zero mv if the variance is low.
-  bool reduce_zeromv_mvres;
-
   // Avoid the partitioning of a 16x16 block in variance based partitioning
   // (VBP) by making use of minimum and maximum sub-block variances.
   // For allintra encode, this speed feature reduces instruction count by 5.39%
commit	14b2f3f4710e909aec3e04e2772b07d01bb2d547	[log] [tgz]
author	Neeraj Gadgil <neeraj.gadgil@ittiam.com>	Mon Aug 29 09:42:17 2022 +0530
committer	ranjit tulabandu <ranjit.tulabandu@ittiam.com>	Sun Sep 04 07:02:37 2022 +0000
tree	ace876b78558b416f1f44999187da7e2482f59d7
parent	34f2a6eaf5d0c49bd55dddfe8be62789043fb396 [diff]