rtc: Adjust tune_thresh_based_on_qindex_window conservatively

Fixed visual artifacts in desktopqvga clip using source_sad in
prefer_larger_partition_blocks sf. The speed, BD-Rate impact:

 ---------------------------------------------------------
|cpu| Test set |Instr. Count|    BD-Rate Drop (%)         |
|   |          |Reduction(%)|avg. psnr |ovr. psnr|  ssim  |
 ---------------------------------------------------------
| 8 | rtc-derf |  -1.248    | -0.2538  | -0.2606 | -0.2099|
 ---------------------------------------------------------

The worst and best case quality impact is:
 ---------------------------------------------------------
|         Clip                 |     BD Rate Drop (%)     |
|                              |(-ve: Gain, +ve Loss)     |
|                             ----------------------------|
|                              |avg.PSNR |ovr.PSNR| SSIM  |
|---------------------------------------------------------|
|Best  | jimred_320_240        | -2.17  | -2.10  | -1.96  |
|      | desktop1_320_180      | -0.83  | -1.08  | -2.80  |
|---------------------------------------------------------|
|Worst | street180p            |  0.51  |  0.57  |  0.46  |
|      | apprtc_pixel3_320_240 | -0.08  |  0.25  |  0.95  |
 ---------------------------------------------------------
Other speeds and resolutions are not impacted.

STATS_CHANGED

Change-Id: Icc43160deafc823a0bc6f6fbea47c35f37d8ce9c
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index bfbf417..221e9d6 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1258,6 +1258,7 @@
       sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
       sf->rt_sf.check_only_zero_zeromv_on_large_blocks = true;
     }
+    if (speed == 8) sf->rt_sf.prefer_large_partition_blocks = 2;
     if (speed >= 8) {
       sf->rt_sf.use_nonrd_filter_search = 0;
       sf->rt_sf.tx_size_level_based_on_qstep = 1;
@@ -1294,6 +1295,7 @@
       sf->rt_sf.short_circuit_low_temp_var = 0;
       sf->rt_sf.use_nonrd_altref_frame = 1;
     }
+    if (speed == 8) sf->rt_sf.prefer_large_partition_blocks = 3;
     if (speed >= 8) sf->rt_sf.tx_size_level_based_on_qstep = 2;
     if (speed >= 9) {
       sf->rt_sf.gf_length_lvl = 1;
@@ -1323,7 +1325,6 @@
     }
   }
   if (!is_720p_or_larger) {
-    if (speed == 8) sf->rt_sf.prefer_large_partition_blocks = 2;
     if (speed >= 9) {
       sf->rt_sf.force_large_partition_blocks_intra = 1;
     }
@@ -1680,7 +1681,7 @@
     sf->rt_sf.sse_early_term_inter_search = EARLY_TERM_IDX_3;
     sf->rt_sf.screen_content_cdef_filter_qindex_thresh = 20;
     sf->rt_sf.estimate_motion_for_var_based_partition = 0;
-    sf->rt_sf.prefer_large_partition_blocks = 3;
+    sf->rt_sf.prefer_large_partition_blocks = 4;
     sf->rt_sf.skip_intra_pred = 2;
     sf->rt_sf.var_part_split_threshold_shift = 9;
     for (int i = 0; i < BLOCK_SIZES; ++i)
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 3c53b53..19b190a4 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -1457,7 +1457,7 @@
   int check_scene_detection;
 
   // For nonrd mode: Prefer larger partition blks in variance based partitioning
-  // 0: disabled, 1-2: increasing aggressiveness
+  // 0: disabled, 1-4: increasing aggressiveness
   int prefer_large_partition_blocks;
 
   // uses results of temporal noise estimate
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c
index 75533bd..c5e2edd 100644
--- a/av1/encoder/var_based_part.c
+++ b/av1/encoder/var_based_part.c
@@ -429,7 +429,7 @@
 }
 
 static AOM_INLINE void tune_thresh_based_on_qindex_window(
-    int qindex, int th, int64_t thresholds[]) {
+    int qindex, int th, int source_sad, int ag_idx, int64_t thresholds[]) {
   const int win = 45;
   double weight;
 
@@ -443,8 +443,9 @@
       (int)((1 - weight) * (thresholds[1] << 1) + weight * thresholds[1]);
   thresholds[2] =
       (int)((1 - weight) * (thresholds[2] << 1) + weight * thresholds[2]);
+  const int fac = (!ag_idx && source_sad != kLowSad) ? 1 : 2;
   thresholds[3] =
-      (int)((1 - weight) * (thresholds[3] << 2) + weight * thresholds[3]);
+      (int)((1 - weight) * (thresholds[3] << fac) + weight * thresholds[3]);
 }
 
 static AOM_INLINE void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
@@ -568,7 +569,7 @@
     thresholds[2] = (5 * threshold_base) >> 1;
   }
   // Tune thresholds less or more aggressively to prefer larger partitions
-  if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 3) {
+  if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 4) {
     double weight;
     const int win = 20;
     if (current_qindex < QINDEX_LARGE_BLOCK_THR - win)
@@ -618,8 +619,9 @@
       thresholds[3] = INT32_MAX;
     }
   } else if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 2) {
-    tune_thresh_based_on_qindex_window(current_qindex, QINDEX_LARGE_BLOCK_THR,
-                                       thresholds);
+    tune_thresh_based_on_qindex_window(
+        current_qindex, QINDEX_LARGE_BLOCK_THR, source_sad_nonrd,
+        cpi->sf.rt_sf.prefer_large_partition_blocks - 2, thresholds);
   } else if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 1) {
     thresholds[3] <<= 2;
     thresholds[1] <<= (source_sad_nonrd == kLowSad) ? 1 : 0;