Revert "rtc: Allow for split to 8x8 blocks for speed >=9 low-resoln"

This reverts commit 6fabf7b6afdcede439884b3c890cac95581de864.

Reason for revert: Possible tsan error:
Bug:aomedia:3331


Change-Id: Iecd203096e5c9a368ff4a0ea8f8ee4894d9195be
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c
index 235a1d9..dcf75f6 100644
--- a/av1/encoder/var_based_part.c
+++ b/av1/encoder/var_based_part.c
@@ -449,8 +449,7 @@
 static AOM_INLINE void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
                                           int q, int content_lowsumdiff,
                                           int source_sad_nonrd,
-                                          int source_sad_rd, int segment_id,
-                                          uint64_t blk_sad) {
+                                          int source_sad_rd, int segment_id) {
   AV1_COMMON *const cm = &cpi->common;
   const int is_key_frame = frame_is_intra_only(cm);
   const int threshold_multiplier = is_key_frame ? 120 : 1;
@@ -586,7 +585,7 @@
       }
     }
     if (cm->width * cm->height <= 352 * 288) {
-      thresholds[3] = INT64_MAX;
+      thresholds[3] = INT32_MAX;
       if (segment_id == 0) {
         thresholds[1] <<= 2;
         thresholds[2] <<= (source_sad_nonrd <= kLowSad) ? 5 : 4;
@@ -594,16 +593,6 @@
         thresholds[1] <<= 1;
         thresholds[2] <<= 3;
       }
-      // Allow for split to 8x8 for superblocks where part of it has
-      // moving boudary. So allow for sb with source_sad above threshold,
-      // and avoid very large source_sad or high source content, to avoid
-      // too many 8x8 within superblock.
-      if (cpi->rc.avg_source_sad < 25000 && blk_sad > 20000 &&
-          blk_sad < 60000) {
-        thresholds[2] = thresholds[2] >> 1;
-        thresholds[3] = thresholds[2] << 3;
-      }
-
       // Condition the increase of partition thresholds on the segment
       // and the content. Avoid the increase for superblocks which have
       // high source sad, unless the whole frame has very high motion
@@ -927,7 +916,7 @@
     return;
   } else {
     set_vbp_thresholds(cpi, cpi->vbp_info.thresholds, q, content_lowsumdiff, 0,
-                       0, 0, 0);
+                       0, 0);
     // The threshold below is not changed locally.
     cpi->vbp_info.threshold_minmax = 15 + (q >> 3);
   }
@@ -1011,13 +1000,13 @@
     AV1_COMP *cpi, MACROBLOCK *x, VP128x128 *vt, VP16x16 *vt2,
     PART_EVAL_STATUS *force_split, int avg_16x16[][4], int maxvar_16x16[][4],
     int minvar_16x16[][4], int *variance4x4downsample, int64_t *thresholds,
-    uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride,
-    int use_4x4avg) {
+    uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride) {
   AV1_COMMON *cm = &cpi->common;
   MACROBLOCKD *xd = &x->e_mbd;
   const int is_key_frame = frame_is_intra_only(cm);
   const int is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64);
   const int num_64x64_blocks = is_small_sb ? 1 : 4;
+  // TODO(kyslov) Bring back compute_minmax_variance with content type detection
   const int compute_minmax_variance = 0;
   const int segment_id = xd->mi[0]->segment_id;
   int pixels_wide = 128, pixels_high = 128;
@@ -1067,8 +1056,7 @@
             maxvar_16x16[m][i] =
                 vt->split[m].split[i].split[j].part_variances.none.variance;
           if (vt->split[m].split[i].split[j].part_variances.none.variance >
-                  thresholds[3] &&
-              !use_4x4avg) {
+              thresholds[3]) {
             // 16X16 variance is above threshold for split, so force split to
             // 8x8 for this 16x16 block (this also forces splits for upper
             // levels).
@@ -1100,10 +1088,7 @@
             }
           }
         }
-        if (is_key_frame ||
-            (use_4x4avg &&
-             vt->split[m].split[i].split[j].part_variances.none.variance >
-                 (thresholds[2] << 1))) {
+        if (is_key_frame) {
           force_split[split_index] = PART_EVAL_ALL;
           // Go down to 4x4 down-sampling for variance.
           variance4x4downsample[i2 + j] = 1;
@@ -1203,9 +1188,8 @@
 // Decides whether to split or merge a 16x16 partition block in variance based
 // partitioning based on the 8x8 sub-block variances.
 static AOM_INLINE PART_EVAL_STATUS get_part_eval_based_on_sub_blk_var(
-    VP16x16 *var_16x16_info, int64_t threshold16, int is_key_frame) {
+    VP16x16 *var_16x16_info, int64_t threshold16) {
   int max_8x8_var = 0, min_8x8_var = INT_MAX;
-  int fac_shift = 2;
   for (int k = 0; k < 4; k++) {
     get_variance(&var_16x16_info->split[k].part_variances.none);
     int this_8x8_var = var_16x16_info->split[k].part_variances.none.variance;
@@ -1215,11 +1199,8 @@
   // If the difference between maximum and minimum sub-block variances is high,
   // then only evaluate PARTITION_SPLIT for the 16x16 block. Otherwise, evaluate
   // only PARTITION_NONE. The shift factor for threshold16 has been derived
-  // empirically. For delta frames: add condition that min_8x8_var is small,
-  // this is to target moving boundary withing 16x16 block, so some area should
-  // be small variance/stationary.
-  return ((max_8x8_var - min_8x8_var) > (threshold16 << fac_shift) &&
-          (!is_key_frame && min_8x8_var < AOMMAX(400, (threshold16 >> 3))))
+  // empirically.
+  return ((max_8x8_var - min_8x8_var) > (threshold16 << 2))
              ? PART_EVAL_ONLY_SPLIT
              : PART_EVAL_ONLY_NONE;
 }
@@ -1260,7 +1241,7 @@
   int avg_16x16[4][4];
   int maxvar_16x16[4][4];
   int minvar_16x16[4][4];
-  int use_4x4avg;
+  int64_t threshold_4x4avg;
   uint8_t *s;
   const uint8_t *d;
   int sp;
@@ -1295,36 +1276,27 @@
                             vbp_thresholds[2], vbp_thresholds[3],
                             vbp_thresholds[4] };
 
-  const int low_res = cm->width * cm->height <= 352 * 288;
+  const int low_res = (cm->width <= 352 && cm->height <= 288);
   int variance4x4downsample[64];
   const int segment_id = xd->mi[0]->segment_id;
 
-  uint64_t blk_sad = 0;
-  if (cpi->src_sad_blk_64x64 != NULL) {
-    const int sb_size_by_mb = (cm->seq_params->sb_size == BLOCK_128X128)
-                                  ? (cm->seq_params->mib_size >> 1)
-                                  : cm->seq_params->mib_size;
-    const int sb_cols =
-        (cm->mi_params.mi_cols + sb_size_by_mb - 1) / sb_size_by_mb;
-    const int sbi_col = mi_col / sb_size_by_mb;
-    const int sbi_row = mi_row / sb_size_by_mb;
-    blk_sad = cpi->src_sad_blk_64x64[sbi_col + sbi_row * sb_cols];
-  }
-
   if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
       cyclic_refresh_segment_id_boosted(segment_id)) {
     const int q =
         av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex);
     set_vbp_thresholds(cpi, thresholds, q, x->content_state_sb.low_sumdiff,
                        x->content_state_sb.source_sad_nonrd,
-                       x->content_state_sb.source_sad_rd, 1, blk_sad);
+                       x->content_state_sb.source_sad_rd, 1);
   } else {
     set_vbp_thresholds(cpi, thresholds, cm->quant_params.base_qindex,
                        x->content_state_sb.low_sumdiff,
                        x->content_state_sb.source_sad_nonrd,
-                       x->content_state_sb.source_sad_rd, 0, blk_sad);
+                       x->content_state_sb.source_sad_rd, 0);
   }
 
+  // For non keyframes, disable 4x4 average for low resolution when speed = 8
+  threshold_4x4avg = INT64_MAX;
+
   s = x->plane[0].src.buf;
   sp = x->plane[0].src.stride;
 
@@ -1414,19 +1386,13 @@
   if (cpi->noise_estimate.enabled)
     noise_level = av1_noise_estimate_extract_level(&cpi->noise_estimate);
 
-  if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 3 && low_res &&
-      thresholds[3] < INT64_MAX) {
-    use_4x4avg = 1;
-    CHECK_MEM_ERROR(cm, vt2, aom_calloc(16, sizeof(*vt2)));
-  } else {
-    use_4x4avg = 0;
-  }
-
+  if (low_res && threshold_4x4avg < INT64_MAX)
+    CHECK_MEM_ERROR(cm, vt2, aom_malloc(sizeof(*vt2)));
   // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances
   // for splits.
   fill_variance_tree_leaves(cpi, x, vt, vt2, force_split, avg_16x16,
                             maxvar_16x16, minvar_16x16, variance4x4downsample,
-                            thresholds, s, sp, d, dp, use_4x4avg);
+                            thresholds, s, sp, d, dp);
 
   avg_64x64 = 0;
   for (m = 0; m < num_64x64_blocks; ++m) {
@@ -1448,10 +1414,8 @@
           get_variance(&vtemp->part_variances.none);
           if (vtemp->part_variances.none.variance > thresholds[3]) {
             force_split[split_index] =
-                cpi->sf.rt_sf.vbp_prune_16x16_split_using_min_max_sub_blk_var ||
-                        (!is_key_frame && use_4x4avg)
-                    ? get_part_eval_based_on_sub_blk_var(vtemp, thresholds[3],
-                                                         is_key_frame)
+                cpi->sf.rt_sf.vbp_prune_16x16_split_using_min_max_sub_blk_var
+                    ? get_part_eval_based_on_sub_blk_var(vtemp, thresholds[3])
                     : PART_EVAL_ONLY_SPLIT;
             force_split[5 + m2 + i] = PART_EVAL_ONLY_SPLIT;
             force_split[m + 1] = PART_EVAL_ONLY_SPLIT;