Allintra: Introduce sf enable_intra_mode_pruning_using_neighbors

This CL introduces a speed feature
enable_intra_mode_pruning_using_neighbors to prune specific intra
modes in nonrd path based on source variance and best mode so far
only if current mode is not the winner mode of both the
neighboring blocks. This speed feature is enabled for allintra,
speed 9.

For AVIF still-image encode,

             Encode Time     BD-Rate Loss(%)
cpu-used     Reduction(%)    psnr       ssim
   9           3.462        -0.0577    -0.2393

STATS_CHANGED for speed=9

Change-Id: I73c1e1cd1e2f1843122f40a1aa5d6faf4fc4f845
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index 11de850..98d3f5f 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -2543,6 +2543,20 @@
   pd->dst.buf = dst_buf_base;
 }
 
+static bool should_prune_intra_modes_using_neighbors(
+    const MACROBLOCKD *xd, bool enable_intra_mode_pruning_using_neighbors,
+    PREDICTION_MODE this_mode, PREDICTION_MODE above_mode,
+    PREDICTION_MODE left_mode) {
+  if (!enable_intra_mode_pruning_using_neighbors) return false;
+
+  if (this_mode == DC_PRED) return false;
+
+  // Enable the pruning for current mode only if it is not the winner mode of
+  // both the neighboring blocks (left/top).
+  return xd->up_available && this_mode != above_mode && xd->left_available &&
+         this_mode != left_mode;
+}
+
 void av1_nonrd_pick_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost,
                                BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
   AV1_COMMON *const cm = &cpi->common;
@@ -2562,6 +2576,7 @@
   const PREDICTION_MODE L = av1_left_block_mode(left_mi);
   const int above_ctx = intra_mode_context[A];
   const int left_ctx = intra_mode_context[L];
+  const unsigned int source_variance = x->source_variance;
   bmode_costs = x->mode_costs.y_mode_costs[above_ctx][left_ctx];
 
   av1_invalid_rd_stats(&best_rdc);
@@ -2585,6 +2600,20 @@
         this_mode == H_PRED && best_mode == V_PRED)
       continue;
 
+    if (should_prune_intra_modes_using_neighbors(
+            xd, cpi->sf.rt_sf.enable_intra_mode_pruning_using_neighbors,
+            this_mode, A, L)) {
+      // Prune V_PRED and H_PRED if source variance of the block is less than
+      // or equal to 50. The source variance threshold is obtained empirically.
+      if ((this_mode == V_PRED || this_mode == H_PRED) && source_variance <= 50)
+        continue;
+
+      // As per the statistics, probability of SMOOTH_PRED being the winner
+      // is less when best mode so far is DC_PRED (out of DC_PRED, V_PRED and
+      // H_PRED). Hence, SMOOTH_PRED mode is not evaluated.
+      if (best_mode == DC_PRED && this_mode == SMOOTH_PRED) continue;
+    }
+
     this_rdc.dist = this_rdc.rate = 0;
     args.mode = this_mode;
     args.skippable = 1;
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 4efafdd..5d27e96 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -555,6 +555,7 @@
     sf->rt_sf.var_part_split_threshold_shift = 9;
     sf->rt_sf.vbp_prune_16x16_split_using_min_max_sub_blk_var = true;
     sf->rt_sf.prune_h_pred_using_best_mode_so_far = true;
+    sf->rt_sf.enable_intra_mode_pruning_using_neighbors = true;
   }
 
   // As the speed feature prune_chroma_modes_using_luma_winner already
@@ -2115,6 +2116,7 @@
   rt_sf->prune_compoundmode_with_singlecompound_var = false;
   rt_sf->frame_level_mode_cost_update = false;
   rt_sf->prune_h_pred_using_best_mode_so_far = false;
+  rt_sf->enable_intra_mode_pruning_using_neighbors = false;
   rt_sf->check_only_zero_zeromv_on_large_blocks = false;
   rt_sf->disable_cdf_update_non_reference_frame = false;
   rt_sf->prune_compoundmode_with_singlemode_var = false;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index caa2f30..604412c 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -1637,6 +1637,17 @@
   // 0.08%.
   bool prune_h_pred_using_best_mode_so_far;
 
+  // Enable pruning of intra mode evaluations in nonrd path based on source
+  // variance and best mode so far. The pruning logic is enabled only if the
+  // mode is not a winner mode of both the neighboring blocks (left/top).
+  //
+  // For allintra encode, this speed feature reduces instruction count by 3.96%
+  // for speed 9 with coding performance change less than 0.38%.
+  // For AVIF image encode, this speed feature reduces encode time by 3.46% for
+  // speed 9 on a typical image dataset with coding performance change less than
+  // -0.06%.
+  bool enable_intra_mode_pruning_using_neighbors;
+
   // If compound is enabled, and the current block size is \geq BLOCK_16X16,
   // limit the compound modes to GLOBAL_GLOBALMV. This does not apply to the
   // base layer of svc.