Allintra: Introduce sf prune_h_pred_using_best_mode_so_far

This CL introduces a speed feature
prune_h_pred_using_best_mode_so_far to prune H_PRED mode if
V_PRED is the best mode so far. This speed feature is enabled
for allintra, speed 9.

For AVIF still-image encode,

             Encode Time     BD-Rate Loss(%)
cpu-used     Reduction(%)    psnr       ssim
   9           1.034         0.0834     0.0691

STATS_CHANGED

Change-Id: I3a108379b57ce294268549ebaca3abcda3cb7c95
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index 95dba0f..c44aef2 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -2005,6 +2005,17 @@
   // mode tests.
   for (int i = 0; i < 4; ++i) {
     PREDICTION_MODE this_mode = intra_mode_list[i];
+
+    // As per the statistics generated for intra mode evaluation in the nonrd
+    // path, it is found that the probability of H_PRED mode being the winner is
+    // very less when the best mode so far is V_PRED (out of DC_PRED and
+    // V_PRED). If V_PRED is the winner mode out of DC_PRED and V_PRED, it could
+    // imply the presence of a vertically dominant pattern. Hence, H_PRED mode
+    // is not evaluated.
+    if (cpi->sf.rt_sf.prune_h_pred_using_best_mode_so_far &&
+        this_mode == H_PRED && best_mode == V_PRED)
+      continue;
+
     this_rdc.dist = this_rdc.rate = 0;
     args.mode = this_mode;
     args.skippable = 1;
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 9a5e684..0ea12eb 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -554,6 +554,7 @@
     sf->rt_sf.hybrid_intra_pickmode = 0;
     sf->rt_sf.var_part_split_threshold_shift = 9;
     sf->rt_sf.vbp_prune_16x16_split_using_min_max_sub_blk_var = true;
+    sf->rt_sf.prune_h_pred_using_best_mode_so_far = true;
   }
 
   // As the speed feature prune_chroma_modes_using_luma_winner already
@@ -2050,6 +2051,7 @@
   rt_sf->vbp_prune_16x16_split_using_min_max_sub_blk_var = false;
   rt_sf->prune_compoundmode_with_singlecompound_var = false;
   rt_sf->frame_level_mode_cost_update = false;
+  rt_sf->prune_h_pred_using_best_mode_so_far = false;
   rt_sf->check_only_zero_zeromv_on_large_blocks = false;
   rt_sf->disable_cdf_update_non_reference_frame = false;
   rt_sf->prune_compoundmode_with_singlemode_var = false;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 62d248a..d53d4cf 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -1588,6 +1588,16 @@
   // except on key frame and first delta).
   bool frame_level_mode_cost_update;
 
+  // Prune H_PRED during intra mode evaluation in the nonrd path based on best
+  // mode so far.
+  //
+  // For allintra encode, this speed feature reduces instruction count by 1.10%
+  // for speed 9 with coding performance change less than 0.04%.
+  // For AVIF image encode, this speed feature reduces encode time by 1.03% for
+  // speed 9 on a typical image dataset with coding performance change less than
+  // 0.08%.
+  bool prune_h_pred_using_best_mode_so_far;
+
   // If compound is enabled, and the current block size is \geq BLOCK_16X16,
   // limit the compound modes to GLOBAL_GLOBALMV. This does not apply to the
   // base layer of svc.