Enable intra_cnn_split for screen contents in intra frames

Added levels to speed feature intra_cnn_split, to introduce less
aggressive partition pruning for screen contents in intra frames.
As screen contents tend to choose larger partitions, the sf
intra_cnn_split is extended to prune split and rectangular partitions
only for screen contents. In this case, none partitions are not pruned.
Partition pruning for screen contents is enabled only for speed presets
5 and 6 in allintra encoding mode.
Also, the name of the speed feature is changed to
intra_cnn_based_part_prune_level to indicate the level of
aggressiveness of pruning.

For allintra video encode (on screen content set),

          Instruction Count        BD-Rate Loss(%)
cpu-used     Reduction(%)   avg.psnr  ovr.psnr    ssim
   5           36.101       4.5634    4.5612      3.7563
   6           13.183       1.1232    1.1270      0.7183

For AVIF still image encode,

          Instruction Count    BD-Rate Loss(%)
cpu-used     Reduction(%)      psnr       ssim
   5           1.565           0.0316     0.0504
   6           0.114          -0.0448    -0.0643

BUG=aomedia:3096

STATS_CHANGED

Change-Id: I3cc72c4ecb159a2922c2f75a4f8a41142662a70d
diff --git a/av1/encoder/partition_search.c b/av1/encoder/partition_search.c
index e184bc1..09cf07e 100644
--- a/av1/encoder/partition_search.c
+++ b/av1/encoder/partition_search.c
@@ -64,7 +64,7 @@
   part_sf->simple_motion_search_prune_rect = 0;
   part_sf->simple_motion_search_early_term_none = 0;
   part_sf->simple_motion_search_reduce_search_steps = 0;
-  part_sf->intra_cnn_split = 0;
+  part_sf->intra_cnn_based_part_prune_level = 0;
   part_sf->ext_partition_eval_thresh = BLOCK_8X8;
   part_sf->prune_ext_part_using_split_info = 0;
   part_sf->prune_rectangular_split_based_on_qidx = 0;
diff --git a/av1/encoder/partition_strategy.c b/av1/encoder/partition_strategy.c
index 9335aad..377defd 100644
--- a/av1/encoder/partition_strategy.c
+++ b/av1/encoder/partition_strategy.c
@@ -134,6 +134,7 @@
 //   -- use chroma pixels in addition to luma pixels
 void av1_intra_mode_cnn_partition(const AV1_COMMON *const cm, MACROBLOCK *x,
                                   int quad_tree_idx,
+                                  int intra_cnn_based_part_prune_level,
                                   PartitionSearchState *part_state) {
   assert(cm->seq_params->sb_size >= BLOCK_64X64 &&
          "Invalid sb_size for intra_cnn!");
@@ -315,7 +316,13 @@
   }
 
   if (logits[0] > split_only_thresh) {
-    av1_set_square_split_only(part_state);
+    // As screen contents tend to choose larger partitions, do not prune
+    // PARTITION_NONE when intra_cnn_based_part_prune_level=1.
+    if (intra_cnn_based_part_prune_level != 1) {
+      part_state->partition_none_allowed = 0;
+    }
+    part_state->do_square_split = 1;
+    av1_disable_rect_partitions(part_state);
   }
 
   if (logits[0] < no_split_thresh) {
@@ -1567,16 +1574,17 @@
 
   // A CNN-based speed feature pruning out either split or all non-split
   // partition in INTRA frame coding.
-  const int try_intra_cnn_split =
-      !cpi->use_screen_content_tools && frame_is_intra_only(cm) &&
-      cpi->sf.part_sf.intra_cnn_split &&
+  const int try_intra_cnn_based_part_prune =
+      frame_is_intra_only(cm) &&
+      cpi->sf.part_sf.intra_cnn_based_part_prune_level &&
       cm->seq_params->sb_size >= BLOCK_64X64 && bsize <= BLOCK_64X64 &&
       blk_params->bsize_at_least_8x8 &&
       av1_is_whole_blk_in_frame(blk_params, mi_params);
 
-  if (try_intra_cnn_split) {
-    av1_intra_mode_cnn_partition(&cpi->common, x,
-                                 x->part_search_info.quad_tree_idx, part_state);
+  if (try_intra_cnn_based_part_prune) {
+    av1_intra_mode_cnn_partition(
+        &cpi->common, x, x->part_search_info.quad_tree_idx,
+        cpi->sf.part_sf.intra_cnn_based_part_prune_level, part_state);
   }
 
   // Use simple motion search to prune out split or non-split partitions. This
diff --git a/av1/encoder/partition_strategy.h b/av1/encoder/partition_strategy.h
index 8030f69..1958abb 100644
--- a/av1/encoder/partition_strategy.h
+++ b/av1/encoder/partition_strategy.h
@@ -19,6 +19,7 @@
 
 void av1_intra_mode_cnn_partition(const AV1_COMMON *const cm, MACROBLOCK *x,
                                   int label_idx,
+                                  int intra_cnn_based_part_prune_level,
                                   PartitionSearchState *part_state);
 
 // Performs a simple_motion_search with a single reference frame and extract
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 23c8396..6f662b3 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -333,7 +333,8 @@
   sf->hl_sf.superres_auto_search_type = SUPERRES_AUTO_DUAL;
 
   if (speed >= 1) {
-    sf->part_sf.intra_cnn_split = 1;
+    sf->part_sf.intra_cnn_based_part_prune_level =
+        allow_screen_content_tools ? 0 : 2;
     sf->part_sf.simple_motion_search_early_term_none = 1;
     // TODO(Venkat): Clean-up frame type dependency for
     // simple_motion_search_split in partition search function and set the
@@ -451,6 +452,8 @@
     sf->part_sf.simple_motion_search_prune_agg = 3;
     sf->part_sf.ext_partition_eval_thresh =
         allow_screen_content_tools ? BLOCK_8X8 : BLOCK_16X16;
+    sf->part_sf.intra_cnn_based_part_prune_level =
+        allow_screen_content_tools ? 1 : 2;
 
     sf->intra_sf.chroma_intra_pruning_with_hog = 3;
 
@@ -824,7 +827,8 @@
     sf->gm_sf.gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2;
     sf->gm_sf.prune_ref_frame_for_gm_search = boosted ? 0 : 1;
 
-    sf->part_sf.intra_cnn_split = 1;
+    sf->part_sf.intra_cnn_based_part_prune_level =
+        allow_screen_content_tools ? 0 : 2;
     sf->part_sf.simple_motion_search_early_term_none = 1;
     // TODO(Venkat): Clean-up frame type dependency for
     // simple_motion_search_split in partition search function and set the
@@ -1553,7 +1557,7 @@
   part_sf->simple_motion_search_prune_rect = 0;
   part_sf->simple_motion_search_early_term_none = 0;
   part_sf->simple_motion_search_reduce_search_steps = 0;
-  part_sf->intra_cnn_split = 0;
+  part_sf->intra_cnn_based_part_prune_level = 0;
   part_sf->ext_partition_eval_thresh = BLOCK_8X8;
   part_sf->prune_ext_part_using_split_info = 0;
   part_sf->prune_rectangular_split_based_on_qidx = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index cfc383c..7a6c26a 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -534,8 +534,11 @@
   BLOCK_SIZE max_intra_bsize;
 
   // Use CNN with luma pixels on source frame on each of the 64x64 subblock to
-  // perform split/no_split decision on intra-frames.
-  int intra_cnn_split;
+  // perform partition pruning in intra frames.
+  // 0: No Pruning
+  // 1: Prune split and rectangular partitions only
+  // 2: Prune none, split and rectangular partitions
+  int intra_cnn_based_part_prune_level;
 
   // Disable extended partition search for lower block sizes.
   int ext_partition_eval_thresh;