Introduce speed feature reuse_inter_intra_mode

The inter intra modes of a block may search 4 times,
once for each of 4 single ref modes. The search
results very likely are the same. So we can save the
search result after the first search, and reuse it
later.
This feature is controled by sf.reuse_inter_intra_mode.
Enabled at speed level 1 and above.

For encoder, about 1.7% faster shows by encoding
20 frame of BasketballDrill_832x480_50.y4m at 800kbps
on speed 1. ( 215670 ms -> 211931 ms)
The coding performance is 0.01% loss on average.

STATS_CHANGED expected

Change-Id: I5a610ef07007c6c605c71b9487426a46b790bc5e
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 47b9698..00196a8 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -7720,6 +7720,7 @@
   int single_comp_cost;
   int64_t (*simple_rd)[MAX_REF_MV_SERCH][REF_FRAMES];
   int skip_motion_mode;
+  INTERINTRA_MODE inter_intra_mode[REF_FRAMES];
 } HandleInterModeArgs;
 
 static INLINE int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv,
@@ -8387,6 +8388,7 @@
 static int handle_inter_intra_mode(const AV1_COMP *const cpi,
                                    MACROBLOCK *const x, BLOCK_SIZE bsize,
                                    int mi_row, int mi_col, MB_MODE_INFO *mbmi,
+                                   HandleInterModeArgs *args,
                                    int64_t ref_best_rd, int rate_mv,
                                    int *tmp_rate2, BUFFER_SET *orig_dst) {
   const AV1_COMMON *const cm = &cpi->common;
@@ -8397,7 +8399,6 @@
   int64_t rd, best_interintra_rd = INT64_MAX;
   int rmode, rate_sum;
   int64_t dist_sum;
-  int j;
   int tmp_rate_mv = 0;
   int tmp_skip_txfm_sb;
   int bw = block_size_wide[bsize];
@@ -8418,26 +8419,34 @@
   restore_dst_buf(xd, *orig_dst, num_planes);
   mbmi->ref_frame[1] = INTRA_FRAME;
   mbmi->use_wedge_interintra = 0;
-  for (j = 0; j < INTERINTRA_MODES; ++j) {
-    mbmi->interintra_mode = (INTERINTRA_MODE)j;
+  best_interintra_mode = args->inter_intra_mode[mbmi->ref_frame[0]];
+  int j = 0;
+  if (cpi->sf.reuse_inter_intra_mode == 0 ||
+      best_interintra_mode == INTERINTRA_MODES) {
+    for (j = 0; j < INTERINTRA_MODES; ++j) {
+      mbmi->interintra_mode = (INTERINTRA_MODE)j;
+      rmode = interintra_mode_cost[mbmi->interintra_mode];
+      av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, orig_dst,
+                                                intrapred, bw);
+      av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
+      model_rd_fn[MODELRD_LEGACY](cpi, bsize, x, xd, 0, 0, mi_row, mi_col,
+                                  &rate_sum, &dist_sum, &tmp_skip_txfm_sb,
+                                  &tmp_skip_sse_sb, NULL, NULL, NULL);
+      rd = RDCOST(x->rdmult, tmp_rate_mv + rate_sum + rmode, dist_sum);
+      if (rd < best_interintra_rd) {
+        best_interintra_rd = rd;
+        best_interintra_mode = mbmi->interintra_mode;
+      }
+    }
+    args->inter_intra_mode[mbmi->ref_frame[0]] = best_interintra_mode;
+  }
+  if (j == 0 || best_interintra_mode != II_SMOOTH_PRED) {
+    mbmi->interintra_mode = best_interintra_mode;
     rmode = interintra_mode_cost[mbmi->interintra_mode];
     av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, orig_dst,
                                               intrapred, bw);
     av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
-    model_rd_fn[MODELRD_LEGACY](cpi, bsize, x, xd, 0, 0, mi_row, mi_col,
-                                &rate_sum, &dist_sum, &tmp_skip_txfm_sb,
-                                &tmp_skip_sse_sb, NULL, NULL, NULL);
-    rd = RDCOST(x->rdmult, tmp_rate_mv + rate_sum + rmode, dist_sum);
-    if (rd < best_interintra_rd) {
-      best_interintra_rd = rd;
-      best_interintra_mode = mbmi->interintra_mode;
-    }
   }
-  mbmi->interintra_mode = best_interintra_mode;
-  rmode = interintra_mode_cost[mbmi->interintra_mode];
-  av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, orig_dst,
-                                            intrapred, bw);
-  av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
   rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
                            &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
   if (rd != INT64_MAX)
@@ -8677,7 +8686,7 @@
       }
     } else if (is_interintra_mode) {
       const int ret =
-          handle_inter_intra_mode(cpi, x, bsize, mi_row, mi_col, mbmi,
+          handle_inter_intra_mode(cpi, x, bsize, mi_row, mi_col, mbmi, args,
                                   ref_best_rd, rate_mv, &tmp_rate2, orig_dst);
       if (ret < 0) continue;
     }
@@ -11159,13 +11168,21 @@
                                best_rd_so_far);
 
   HandleInterModeArgs args = {
-    { NULL },  { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
-    { NULL },  { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1 },
-    NULL,      NULL,
-    NULL,      NULL,
-    { { 0 } }, INT_MAX,
-    INT_MAX,   NULL,
-    0
+    { NULL },
+    { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
+    { NULL },
+    { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1 },
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    { { 0 } },
+    INT_MAX,
+    INT_MAX,
+    NULL,
+    0,
+    { INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES,
+      INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES }
   };
   for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
 
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index b7851a0..a3d34ef 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -221,6 +221,7 @@
     sf->optimize_b_precheck = 1;
     sf->dual_sgr_penalty_level = 1;
     sf->use_accurate_subpel_search = 1;
+    sf->reuse_inter_intra_mode = 1;
   }
 
   if (speed >= 2) {
@@ -469,6 +470,7 @@
   sf->optimize_b_precheck = 0;
   sf->jnt_comp_fast_tx_search = 0;
   sf->jnt_comp_skip_mv_search = 0;
+  sf->reuse_inter_intra_mode = 0;
 
   for (i = 0; i < TX_SIZES; i++) {
     sf->intra_y_mode_mask[i] = INTRA_ALL;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 8ed3c40..b38da06 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -638,6 +638,10 @@
   // Note: The search order might affect the result. It is better to search same
   // single inter mode as a group.
   int prune_comp_search_by_single_result;
+
+  // Reuse the inter_intra_mode search result from NEARESTMV mode to other
+  // single ref modes
+  int reuse_inter_intra_mode;
 } SPEED_FEATURES;
 
 struct AV1_COMP;