rtc: Tune force_large_partition_blocks sf to speed 8

Introduced new levels for the sf: force_large_partition_blocks
to act conservatively for speed 8 for variance based
partitions.
* Changed the name of sf to prefer_large_partition_blocks to
accomodate speeds other than speed 9.
* Refactored code to introduce a new function
tune_thresh_based_on_qindex_window()

 ------------------------------------------------------------
|cpu| Res    |Instr. Count |    BD-Rate Drop (%)             |
|   |        | Reduction(%)| avg. psnr | ovr. psnr |  ssim   |
 ------------------------------------------------------------
| 8 |rtc     |   2.262     | -0.2182   |   0.1719  | -0.1653 |
| 8 |rtc-derf|   4.752     | -0.3534   |  -0.2707  | -0.3229 |
| 8 |Average |   3.476     | -0.2745   |  -0.0125  | -0.2310 |
 ------------------------------------------------------------

No changes to speed 7, 9 and 10.

STATS_CHANGED

Change-Id: I7e505d8c41b6ac0f66dd3f4d3a59ae81d0ad1d6e
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 6255259..bfbf417 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1323,11 +1323,13 @@
     }
   }
   if (!is_720p_or_larger) {
+    if (speed == 8) sf->rt_sf.prefer_large_partition_blocks = 2;
     if (speed >= 9) {
       sf->rt_sf.force_large_partition_blocks_intra = 1;
     }
   } else {
     if (speed >= 6) sf->rt_sf.skip_newmv_mode_based_on_sse = 3;
+    if (speed == 8) sf->rt_sf.prefer_large_partition_blocks = 1;
     if (speed >= 9) {
       sf->rt_sf.sad_based_adp_altref_lag = 1;
       sf->rt_sf.sad_based_comp_prune = 1;
@@ -1392,7 +1394,7 @@
     sf->rt_sf.sad_based_comp_prune = 0;
     sf->rt_sf.source_metrics_sb_nonrd = 1;
     if (cpi->rc.high_source_sad == 1) {
-      sf->rt_sf.force_large_partition_blocks = 0;
+      sf->rt_sf.prefer_large_partition_blocks = 0;
       sf->part_sf.max_intra_bsize = BLOCK_128X128;
       for (int i = 0; i < BLOCK_SIZES; ++i) {
         if (i > BLOCK_32X32)
@@ -1678,7 +1680,7 @@
     sf->rt_sf.sse_early_term_inter_search = EARLY_TERM_IDX_3;
     sf->rt_sf.screen_content_cdef_filter_qindex_thresh = 20;
     sf->rt_sf.estimate_motion_for_var_based_partition = 0;
-    sf->rt_sf.force_large_partition_blocks = 1;
+    sf->rt_sf.prefer_large_partition_blocks = 3;
     sf->rt_sf.skip_intra_pred = 2;
     sf->rt_sf.var_part_split_threshold_shift = 9;
     for (int i = 0; i < BLOCK_SIZES; ++i)
@@ -1994,7 +1996,7 @@
   rt_sf->source_metrics_sb_nonrd = 0;
   rt_sf->overshoot_detection_cbr = NO_DETECTION;
   rt_sf->check_scene_detection = 0;
-  rt_sf->force_large_partition_blocks = 0;
+  rt_sf->prefer_large_partition_blocks = 0;
   rt_sf->use_temporal_noise_estimate = 0;
   rt_sf->fullpel_search_step_param = 0;
   for (int i = 0; i < BLOCK_SIZES; ++i)
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 7ba118b..3c53b53 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -1456,8 +1456,9 @@
   // Check for scene/content change detection on every frame before encoding.
   int check_scene_detection;
 
-  // Forces larger partition blocks in variance based partitioning
-  int force_large_partition_blocks;
+  // For nonrd mode: Prefer larger partition blks in variance based partitioning
+  // 0: disabled, 1-2: increasing aggressiveness
+  int prefer_large_partition_blocks;
 
   // uses results of temporal noise estimate
   int use_temporal_noise_estimate;
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c
index 2c88427..75533bd 100644
--- a/av1/encoder/var_based_part.c
+++ b/av1/encoder/var_based_part.c
@@ -428,6 +428,25 @@
   return threshold;
 }
 
+static AOM_INLINE void tune_thresh_based_on_qindex_window(
+    int qindex, int th, int64_t thresholds[]) {
+  const int win = 45;
+  double weight;
+
+  if (qindex < th - win)
+    weight = 1.0;
+  else if (qindex > th + win)
+    weight = 0.0;
+  else
+    weight = 1.0 - (qindex - th + win) / (2 * win);
+  thresholds[1] =
+      (int)((1 - weight) * (thresholds[1] << 1) + weight * thresholds[1]);
+  thresholds[2] =
+      (int)((1 - weight) * (thresholds[2] << 1) + weight * thresholds[2]);
+  thresholds[3] =
+      (int)((1 - weight) * (thresholds[3] << 2) + weight * thresholds[3]);
+}
+
 static AOM_INLINE void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
                                           int q, int content_lowsumdiff,
                                           int source_sad_nonrd,
@@ -477,7 +496,7 @@
     if (noise_level == kHigh)
       threshold_base = (5 * threshold_base) >> 1;
     else if (noise_level == kMedium &&
-             !cpi->sf.rt_sf.force_large_partition_blocks)
+             !cpi->sf.rt_sf.prefer_large_partition_blocks)
       threshold_base = (5 * threshold_base) >> 2;
   }
   // TODO(kyslov) Enable var based partition adjusment on temporal denoising
@@ -548,7 +567,8 @@
   } else {
     thresholds[2] = (5 * threshold_base) >> 1;
   }
-  if (cpi->sf.rt_sf.force_large_partition_blocks) {
+  // Tune thresholds less or more aggressively to prefer larger partitions
+  if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 3) {
     double weight;
     const int win = 20;
     if (current_qindex < QINDEX_LARGE_BLOCK_THR - win)
@@ -597,6 +617,13 @@
           (int)((1 - weight) * (thresholds[2] << 4) + weight * thresholds[2]);
       thresholds[3] = INT32_MAX;
     }
+  } else if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 2) {
+    tune_thresh_based_on_qindex_window(current_qindex, QINDEX_LARGE_BLOCK_THR,
+                                       thresholds);
+  } else if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 1) {
+    thresholds[3] <<= 2;
+    thresholds[1] <<= (source_sad_nonrd == kLowSad) ? 1 : 0;
+    thresholds[2] <<= (source_sad_nonrd == kLowSad) ? 1 : 0;
   }
   if (cpi->sf.part_sf.disable_8x8_part_based_on_qidx && (current_qindex < 128))
     thresholds[3] = INT64_MAX;
@@ -1379,7 +1406,7 @@
                    (((maxvar_16x16[m][i] - minvar_16x16[m][i]) >
                          (thresholds[2] >> 1) &&
                      maxvar_16x16[m][i] > thresholds[2]) ||
-                    (cpi->sf.rt_sf.force_large_partition_blocks &&
+                    (cpi->sf.rt_sf.prefer_large_partition_blocks &&
                      x->content_state_sb.source_sad_nonrd > kLowSad &&
                      cpi->rc.frame_source_sad < 20000 &&
                      maxvar_16x16[m][i] > (thresholds[2] >> 4) &&
@@ -1405,7 +1432,7 @@
           (max_var_32x32[m] - min_var_32x32[m]) > 3 * (thresholds[1] >> 3) &&
           max_var_32x32[m] > thresholds[1] >> 1 &&
           (noise_level >= kMedium || cpi->ppi->use_svc ||
-           cpi->sf.rt_sf.force_large_partition_blocks)) {
+           cpi->sf.rt_sf.prefer_large_partition_blocks)) {
         force_split[1 + m] = PART_EVAL_ONLY_SPLIT;
         force_split[0] = PART_EVAL_ONLY_SPLIT;
       }
diff --git a/test/rt_end_to_end_test.cc b/test/rt_end_to_end_test.cc
index a6f39c1..6e9711e 100644
--- a/test/rt_end_to_end_test.cc
+++ b/test/rt_end_to_end_test.cc
@@ -40,9 +40,9 @@
                            { 10, { { 0, 34.7 }, { 3, 35.3 } } } } },
                        { "paris_352_288_30.y4m",
                          { { 5, { { 0, 36.2 }, { 3, 36.7 } } },
-                           { 6, { { 0, 36.1 }, { 3, 36.5 } } },
+                           { 6, { { 0, 36.1 }, { 3, 36.48 } } },
                            { 7, { { 0, 35.5 }, { 3, 36.0 } } },
-                           { 8, { { 0, 36.0 }, { 3, 36.5 } } },
+                           { 8, { { 0, 35.98 }, { 3, 36.48 } } },
                            { 9, { { 0, 35.5 }, { 3, 36.0 } } },
                            { 10, { { 0, 35.3 }, { 3, 35.9 } } } } },
                        { "niklas_1280_720_30.y4m",