Allintra: Prune winner mode processing based on source variance

The sf prune_winner_mode_processing_using_src_var is introduced
to disable winner mode processing for blocks with low source
variance. The threshold to disable winner mode processing is
calculated based on qindex. The sf is enabled for allintra
cpu-used>=6.

For AVIF still-image encode,

             Encode Time      BD-Rate Loss(%)
cpu-used     Reduction(%)     psnr       ssim
    6           3.804         0.2174     0.3855
    7           5.239         0.0420     0.4382
    8           1.327         0.0077     0.0108
    9           0.000         0.0000     0.0000

STATS_CHANGED for --allintra encode

Change-Id: I2d95ace6c5ab2c72abc79c0358066e76cd23810c
diff --git a/av1/encoder/intra_mode_search.c b/av1/encoder/intra_mode_search.c
index e8bbe11..cc6c0b6 100644
--- a/av1/encoder/intra_mode_search.c
+++ b/av1/encoder/intra_mode_search.c
@@ -1414,7 +1414,7 @@
 
     for (int mode_idx = 0; mode_idx < x->winner_mode_count; mode_idx++) {
       *mbmi = x->winner_mode_stats[mode_idx].mbmi;
-      if (is_winner_mode_processing_enabled(cpi, mbmi, mbmi->mode)) {
+      if (is_winner_mode_processing_enabled(cpi, x, mbmi, mbmi->mode)) {
         // Restore color_map of palette mode before winner mode processing
         if (mbmi->palette_mode_info.palette_size[0] > 0) {
           uint8_t *color_map_src =
@@ -1446,7 +1446,7 @@
     // If previous searches use only the default tx type/no R-D optimization of
     // quantized coeffs, do an extra search for the best tx type/better R-D
     // optimization of quantized coeffs
-    if (is_winner_mode_processing_enabled(cpi, mbmi, best_mbmi.mode)) {
+    if (is_winner_mode_processing_enabled(cpi, x, mbmi, best_mbmi.mode)) {
       // Set params for winner mode evaluation
       set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
       *mbmi = best_mbmi;
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 05928d0..9acc81d 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -3526,7 +3526,8 @@
   int64_t best_rd;
   const int num_planes = av1_num_planes(cm);
 
-  if (!is_winner_mode_processing_enabled(cpi, best_mbmode, best_mbmode->mode))
+  if (!is_winner_mode_processing_enabled(cpi, x, best_mbmode,
+                                         best_mbmode->mode))
     return;
 
   // Set params for winner mode evaluation
@@ -3550,7 +3551,7 @@
 
     if (xd->lossless[winner_mbmi->segment_id] == 0 &&
         winner_mode_index != THR_INVALID &&
-        is_winner_mode_processing_enabled(cpi, winner_mbmi,
+        is_winner_mode_processing_enabled(cpi, x, winner_mbmi,
                                           winner_mbmi->mode)) {
       RD_STATS rd_stats = *winner_rd_stats;
       int skip_blk = 0;
diff --git a/av1/encoder/rdopt_utils.h b/av1/encoder/rdopt_utils.h
index f52082a..7076f91 100644
--- a/av1/encoder/rdopt_utils.h
+++ b/av1/encoder/rdopt_utils.h
@@ -389,10 +389,18 @@
 }
 // Checks the conditions to enable winner mode processing
 static INLINE int is_winner_mode_processing_enabled(
-    const struct AV1_COMP *cpi, MB_MODE_INFO *const mbmi,
-    const PREDICTION_MODE best_mode) {
+    const struct AV1_COMP *cpi, const MACROBLOCK *const x,
+    MB_MODE_INFO *const mbmi, const PREDICTION_MODE best_mode) {
   const SPEED_FEATURES *sf = &cpi->sf;
 
+  // Disable winner mode processing for blocks with low source variance.
+  // The aggressiveness of this pruning logic reduces as qindex increases.
+  // The threshold decreases linearly from 64 as qindex varies from 0 to 255.
+  if (sf->winner_mode_sf.prune_winner_mode_processing_using_src_var) {
+    const unsigned int src_var_thresh = 64 - 48 * x->qindex / (MAXQ + 1);
+    if (x->source_variance < src_var_thresh) return 0;
+  }
+
   // TODO(any): Move block independent condition checks to frame level
   if (is_inter_block(mbmi)) {
     if (is_inter_mode(best_mode) &&
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index eb93a6c..2fcd9f7 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -501,6 +501,7 @@
     sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q;
 
     sf->winner_mode_sf.multi_winner_mode_type = MULTI_WINNER_MODE_OFF;
+    sf->winner_mode_sf.prune_winner_mode_processing_using_src_var = 1;
   }
   // The following should make all-intra mode speed 7 approximately equal
   // to real-time speed 6,
@@ -1827,6 +1828,7 @@
   winner_mode_sf->multi_winner_mode_type = 0;
   winner_mode_sf->dc_blk_pred_level = 0;
   winner_mode_sf->winner_mode_ifs = 0;
+  winner_mode_sf->prune_winner_mode_processing_using_src_var = 0;
 }
 
 static AOM_INLINE void init_lpf_sf(LOOP_FILTER_SPEED_FEATURES *lpf_sf) {
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 690c030..e2e4680 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -1193,6 +1193,10 @@
   // performs it during winner mode processing by \ref
   // tx_search_best_inter_candidates.
   int winner_mode_ifs;
+
+  // Flag used to enable the pruning of winner mode processing for blocks with
+  // low source variance.
+  int prune_winner_mode_processing_using_src_var;
 } WINNER_MODE_SPEED_FEATURES;
 
 typedef struct LOOP_FILTER_SPEED_FEATURES {