Gate interintra using first partition pass stats

Added a speed feature to evaluate interintra motion mode based
on the stats collected during first partition search pass for speed 3

For speed 3 preset, BD-rate drop is seen by 0.02% (as per AWCY runs)
with encode time reduction of 1.5% (averaged across multiple test cases)

STATS_CHANGED

Change-Id: I648897e987eecb15c196ecb9833096b6009ec95c
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 2458698..2d4f558 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -176,6 +176,8 @@
   uint8_t ref0_counts[REF_FRAMES];  // Counters for ref_frame[0].
   uint8_t ref1_counts[REF_FRAMES];  // Counters for ref_frame[1].
   int sample_counts;                // Number of samples collected.
+  uint8_t interintra_motion_mode_count[REF_FRAMES];  // Counter for interintra
+                                                     // motion mode
 } FIRST_PARTITION_PASS_STATS;
 
 #define MAX_INTERP_FILTER_STATS 64
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 1735657..8b991e3 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -5320,11 +5320,14 @@
 
 // Set all the counters as max.
 static void init_first_partition_pass_stats_tables(
-    FIRST_PARTITION_PASS_STATS *stats) {
+    AV1_COMP *cpi, FIRST_PARTITION_PASS_STATS *stats) {
   for (int i = 0; i < FIRST_PARTITION_PASS_STATS_TABLES; ++i) {
     memset(stats[i].ref0_counts, 0xff, sizeof(stats[i].ref0_counts));
     memset(stats[i].ref1_counts, 0xff, sizeof(stats[i].ref1_counts));
     stats[i].sample_counts = INT_MAX;
+    if (cpi->sf.use_first_partition_pass_interintra_stats)
+      memset(stats[i].interintra_motion_mode_count, 0xff,
+             sizeof(stats[i].interintra_motion_mode_count));
   }
 }
 
@@ -5499,6 +5502,9 @@
       // If there are not enough samples collected, make all available.
       memset(stat->ref0_counts, 0xff, sizeof(stat->ref0_counts));
       memset(stat->ref1_counts, 0xff, sizeof(stat->ref1_counts));
+      if (cpi->sf.use_first_partition_pass_interintra_stats)
+        memset(stat->interintra_motion_mode_count, 0xff,
+               sizeof(stat->interintra_motion_mode_count));
     } else if (sf->selective_ref_frame < 3) {
       // ALTREF2_FRAME and BWDREF_FRAME may be skipped during the
       // initial partition scan, so we don't eliminate them.
@@ -5506,6 +5512,10 @@
       stat->ref1_counts[ALTREF2_FRAME] = 0xff;
       stat->ref0_counts[BWDREF_FRAME] = 0xff;
       stat->ref1_counts[BWDREF_FRAME] = 0xff;
+      if (cpi->sf.use_first_partition_pass_interintra_stats) {
+        stat->interintra_motion_mode_count[ALTREF2_FRAME] = 0xff;
+        stat->interintra_motion_mode_count[BWDREF_FRAME] = 0xff;
+      }
     }
   }
 }
@@ -5864,7 +5874,8 @@
 #if CONFIG_COLLECT_COMPONENT_TIMING
       start_timing(cpi, first_partition_search_pass_time);
 #endif
-      init_first_partition_pass_stats_tables(x->first_partition_pass_stats);
+      init_first_partition_pass_stats_tables(cpi,
+                                             x->first_partition_pass_stats);
       // Do the first pass if we need two pass partition search
       if (cpi->two_pass_partition_search &&
           cpi->sf.use_square_partition_only_threshold > BLOCK_4X4 &&
@@ -6350,7 +6361,7 @@
   av1_zero(rdc->comp_pred_diff);
   // Two pass partition search can be enabled/disabled for different frames.
   // Reset this data at frame level to avoid any incorrect usage.
-  init_first_partition_pass_stats_tables(x->first_partition_pass_stats);
+  init_first_partition_pass_stats_tables(cpi, x->first_partition_pass_stats);
 
   // Reset the flag.
   cpi->intrabc_used = 0;
@@ -6978,6 +6989,13 @@
         if (mbmi->ref_frame[1] >= 0 &&
             stats->ref1_counts[mbmi->ref_frame[1]] < 255)
           ++stats->ref1_counts[mbmi->ref_frame[1]];
+        if (cpi->sf.use_first_partition_pass_interintra_stats) {
+          // Increase the counter for interintra_motion_mode_count
+          if (mbmi->motion_mode == 0 && mbmi->ref_frame[1] == INTRA_FRAME &&
+              stats->interintra_motion_mode_count[mbmi->ref_frame[0]] < 255) {
+            ++stats->interintra_motion_mode_count[mbmi->ref_frame[0]];
+          }
+        }
       }
     }
   }
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 90b8258..c7c724d 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -9259,6 +9259,36 @@
   return mv_field_check_ctxt.mv_field_check_result;
 }
 
+static int skip_interintra_based_on_first_pass_stats(const AV1_COMP *const cpi,
+                                                     MACROBLOCK *const x,
+                                                     BLOCK_SIZE bsize,
+                                                     int mi_row, int mi_col) {
+  MACROBLOCKD *xd = &x->e_mbd;
+  MB_MODE_INFO *mbmi = xd->mi[0];
+  if (cpi->two_pass_partition_search &&
+      cpi->sf.use_first_partition_pass_interintra_stats &&
+      !x->cb_partition_scan) {
+    const int mi_width = mi_size_wide[bsize];
+    const int mi_height = mi_size_high[bsize];
+    // Search in the stats table to see if obmc motion mode was used in the
+    // first pass of partition search.
+    for (int row = mi_row; row < mi_row + mi_width;
+         row += FIRST_PARTITION_PASS_SAMPLE_REGION) {
+      for (int col = mi_col; col < mi_col + mi_height;
+           col += FIRST_PARTITION_PASS_SAMPLE_REGION) {
+        const int index = av1_first_partition_pass_stats_index(row, col);
+        const FIRST_PARTITION_PASS_STATS *const stats =
+            &x->first_partition_pass_stats[index];
+        if (stats->interintra_motion_mode_count[mbmi->ref_frame[0]]) {
+          return 0;
+        }
+      }
+    }
+    return 1;
+  }
+  return 0;
+}
+
 // TODO(afergs): Refactor the MBMI references in here - there's four
 // TODO(afergs): Refactor optional args - add them to a struct or remove
 static int64_t motion_mode_rd(
@@ -9279,6 +9309,7 @@
   RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
   const int rate_mv0 = *rate_mv;
+  int skip_interintra_mode = 0;
   const int interintra_allowed = cm->seq_params.enable_interintra_compound &&
                                  is_interintra_allowed(mbmi) &&
                                  mbmi->compound_idx;
@@ -9459,6 +9490,9 @@
         continue;
       }
     } else if (is_interintra_mode) {
+      skip_interintra_mode = skip_interintra_based_on_first_pass_stats(
+          cpi, x, bsize, mi_row, mi_col);
+      if (skip_interintra_mode) continue;
       const int ret = handle_inter_intra_mode(
           cpi, x, bsize, mi_row, mi_col, mbmi, args, ref_best_rd, &tmp_rate_mv,
           &tmp_rate2, orig_dst);
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index ef17681..94ebee6 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -327,6 +327,8 @@
     sf->tx_type_search.prune_mode = PRUNE_2D_FAST;
     sf->gm_search_type = GM_DISABLE_SEARCH;
     sf->prune_comp_search_by_single_result = 2;
+    sf->use_first_partition_pass_interintra_stats =
+        sf->two_pass_partition_search;
     sf->prune_motion_mode_level = boosted ? 2 : 3;
     sf->prune_warp_using_wmtype = 1;
     // TODO(yunqing): evaluate this speed feature for speed 1 & 2, and combine
@@ -713,6 +715,7 @@
   sf->allow_partition_search_skip = 0;
   sf->use_accurate_subpel_search = USE_8_TAPS;
   sf->disable_wedge_search_edge_thresh = 0;
+  sf->use_first_partition_pass_interintra_stats = 0;
   sf->disable_wedge_search_var_thresh = 0;
   sf->disable_loop_restoration_chroma = 0;
   sf->fast_wedge_sign_estimate = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 0f80774..efc2ab2 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -367,6 +367,10 @@
   // Use a ML model to prune horz and vert partitions
   int ml_prune_rect_partition;
 
+  // Disable/Enable interintra motion mode based on stats collected during
+  // first_partition_search_pass
+  int use_first_partition_pass_interintra_stats;
+
   // Use a ML model to prune horz_a, horz_b, vert_a and vert_b partitions.
   int ml_prune_ab_partition;