Use ref_type as skip_ref_frame_mask's index

Use ref_type instead of mix single ref and
compound ref can skip more unwanted combinations.

For encoder, about 1.5% faster shows by encoding
20 frame of BasketballDrill_832x480_50.y4m at
800kbps on speed 1. (234709 ms -> 231096 ms)

The coding performance is about 0.02% loss on
average.

STATS_CHANGED expected

Change-Id: I0aa0c624994098cc89d46fc698d96f4980bd95f4
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index e9a93ce..05b45f3 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -1731,31 +1731,20 @@
       pc_tree->partitioning = partition;
     }
   }
-
-  pc_tree->horizontal[0].skip_ref_frame_mask = 0;
-  pc_tree->horizontal[1].skip_ref_frame_mask = 0;
-  pc_tree->vertical[0].skip_ref_frame_mask = 0;
-  pc_tree->vertical[1].skip_ref_frame_mask = 0;
-  pc_tree->horizontala[0].skip_ref_frame_mask = 0;
-  pc_tree->horizontala[1].skip_ref_frame_mask = 0;
-  pc_tree->horizontala[2].skip_ref_frame_mask = 0;
-  pc_tree->horizontalb[0].skip_ref_frame_mask = 0;
-  pc_tree->horizontalb[1].skip_ref_frame_mask = 0;
-  pc_tree->horizontalb[2].skip_ref_frame_mask = 0;
-  pc_tree->verticala[0].skip_ref_frame_mask = 0;
-  pc_tree->verticala[1].skip_ref_frame_mask = 0;
-  pc_tree->verticala[2].skip_ref_frame_mask = 0;
-  pc_tree->verticalb[0].skip_ref_frame_mask = 0;
-  pc_tree->verticalb[1].skip_ref_frame_mask = 0;
-  pc_tree->verticalb[2].skip_ref_frame_mask = 0;
-  pc_tree->horizontal4[0].skip_ref_frame_mask = 0;
-  pc_tree->horizontal4[1].skip_ref_frame_mask = 0;
-  pc_tree->horizontal4[2].skip_ref_frame_mask = 0;
-  pc_tree->horizontal4[3].skip_ref_frame_mask = 0;
-  pc_tree->vertical4[0].skip_ref_frame_mask = 0;
-  pc_tree->vertical4[1].skip_ref_frame_mask = 0;
-  pc_tree->vertical4[2].skip_ref_frame_mask = 0;
-  pc_tree->vertical4[3].skip_ref_frame_mask = 0;
+  for (int b = 0; b < 2; ++b) {
+    pc_tree->horizontal[b].skip_ref_frame_mask = 0;
+    pc_tree->vertical[b].skip_ref_frame_mask = 0;
+  }
+  for (int b = 0; b < 3; ++b) {
+    pc_tree->horizontala[b].skip_ref_frame_mask = 0;
+    pc_tree->horizontalb[b].skip_ref_frame_mask = 0;
+    pc_tree->verticala[b].skip_ref_frame_mask = 0;
+    pc_tree->verticalb[b].skip_ref_frame_mask = 0;
+  }
+  for (int b = 0; b < 4; ++b) {
+    pc_tree->horizontal4[b].skip_ref_frame_mask = 0;
+    pc_tree->vertical4[b].skip_ref_frame_mask = 0;
+  }
   switch (partition) {
     case PARTITION_NONE:
       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
@@ -3321,7 +3310,7 @@
 
   // Ref frames picked in the [i_th] quarter subblock during square partition
   // RD search. It may be used to prune ref frame selection of rect partitions.
-  uint8_t ref_frames_used[4] = {
+  int ref_frames_used[4] = {
     0,
   };
 
@@ -3352,11 +3341,9 @@
     if (none_rd) *none_rd = this_rdc.rdcost;
     if (this_rdc.rate != INT_MAX) {
       if (cpi->sf.prune_ref_frame_for_rect_partitions) {
-        const int ref1 = ctx_none->mic.ref_frame[0];
-        const int ref2 = ctx_none->mic.ref_frame[1];
+        const int ref_type = av1_ref_frame_type(ctx_none->mic.ref_frame);
         for (int i = 0; i < 4; ++i) {
-          ref_frames_used[i] |= (1 << ref1);
-          if (ref2 > 0) ref_frames_used[i] |= (1 << ref2);
+          ref_frames_used[i] |= (1 << ref_type);
         }
       }
       if (bsize_at_least_8x8) {
@@ -3491,10 +3478,9 @@
         sum_rdc.rdcost += this_rdc.rdcost;
         if (cpi->sf.prune_ref_frame_for_rect_partitions &&
             pc_tree->split[idx]->none.rate != INT_MAX) {
-          const int ref1 = pc_tree->split[idx]->none.mic.ref_frame[0];
-          const int ref2 = pc_tree->split[idx]->none.mic.ref_frame[1];
-          ref_frames_used[idx] |= (1 << ref1);
-          if (ref2 > 0) ref_frames_used[idx] |= (1 << ref2);
+          const int ref_type =
+              av1_ref_frame_type(pc_tree->split[idx]->none.mic.ref_frame);
+          ref_frames_used[idx] |= (1 << ref_type);
         }
         if (idx <= 1 && (bsize <= BLOCK_8X8 ||
                          pc_tree->split[idx]->partitioning == PARTITION_NONE)) {
@@ -3532,7 +3518,7 @@
   pc_tree->vertical[0].skip_ref_frame_mask = 0;
   pc_tree->vertical[1].skip_ref_frame_mask = 0;
   if (cpi->sf.prune_ref_frame_for_rect_partitions) {
-    uint8_t used_frames;
+    int used_frames;
     used_frames = ref_frames_used[0] | ref_frames_used[1];
     if (used_frames) pc_tree->horizontal[0].skip_ref_frame_mask = ~used_frames;
     used_frames = ref_frames_used[2] | ref_frames_used[3];
@@ -3789,7 +3775,7 @@
     pc_tree->horizontala[1].skip_ref_frame_mask = 0;
     pc_tree->horizontala[2].skip_ref_frame_mask = 0;
     if (cpi->sf.prune_ref_frame_for_rect_partitions) {
-      uint8_t used_frames;
+      int used_frames;
       used_frames = ref_frames_used[0];
       if (used_frames)
         pc_tree->horizontala[0].skip_ref_frame_mask = ~used_frames;
@@ -3822,7 +3808,7 @@
     pc_tree->horizontalb[1].skip_ref_frame_mask = 0;
     pc_tree->horizontalb[2].skip_ref_frame_mask = 0;
     if (cpi->sf.prune_ref_frame_for_rect_partitions) {
-      uint8_t used_frames;
+      int used_frames;
       used_frames = ref_frames_used[0] | ref_frames_used[1];
       if (used_frames)
         pc_tree->horizontalb[0].skip_ref_frame_mask = ~used_frames;
@@ -3856,7 +3842,7 @@
     pc_tree->verticala[1].skip_ref_frame_mask = 0;
     pc_tree->verticala[2].skip_ref_frame_mask = 0;
     if (cpi->sf.prune_ref_frame_for_rect_partitions) {
-      uint8_t used_frames;
+      int used_frames;
       used_frames = ref_frames_used[0];
       if (used_frames) pc_tree->verticala[0].skip_ref_frame_mask = ~used_frames;
       used_frames = ref_frames_used[2];
@@ -3886,7 +3872,7 @@
     pc_tree->verticalb[1].skip_ref_frame_mask = 0;
     pc_tree->verticalb[2].skip_ref_frame_mask = 0;
     if (cpi->sf.prune_ref_frame_for_rect_partitions) {
-      uint8_t used_frames;
+      int used_frames;
       used_frames = ref_frames_used[0] | ref_frames_used[2];
       if (used_frames) pc_tree->verticalb[0].skip_ref_frame_mask = ~used_frames;
       used_frames = ref_frames_used[1];
@@ -3956,9 +3942,9 @@
       ctx_this->rd_mode_is_ready = 0;
       ctx_this->skip_ref_frame_mask = 0;
       if (cpi->sf.prune_ref_frame_for_rect_partitions) {
-        const uint8_t used_frames =
-            i <= 1 ? (ref_frames_used[0] | ref_frames_used[1])
-                   : (ref_frames_used[2] | ref_frames_used[3]);
+        const int used_frames = i <= 1
+                                    ? (ref_frames_used[0] | ref_frames_used[1])
+                                    : (ref_frames_used[2] | ref_frames_used[3]);
         if (used_frames) ctx_this->skip_ref_frame_mask = ~used_frames;
       }
       if (!rd_try_subblock(cpi, td, tile_data, tp, (i == 0), (i == 3),
@@ -3999,9 +3985,9 @@
       ctx_this->rd_mode_is_ready = 0;
       ctx_this->skip_ref_frame_mask = 0;
       if (cpi->sf.prune_ref_frame_for_rect_partitions) {
-        const uint8_t used_frames =
-            i <= 1 ? (ref_frames_used[0] | ref_frames_used[2])
-                   : (ref_frames_used[1] | ref_frames_used[3]);
+        const int used_frames = i <= 1
+                                    ? (ref_frames_used[0] | ref_frames_used[2])
+                                    : (ref_frames_used[1] | ref_frames_used[3]);
         if (used_frames) ctx_this->skip_ref_frame_mask = ~used_frames;
       }
       if (!rd_try_subblock(cpi, td, tile_data, tp, (i == 0), (i == 3), mi_row,
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index d82833b..7353f4a 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -7719,6 +7719,7 @@
   int ref_frame_cost;
   int single_comp_cost;
   int64_t (*simple_rd)[MAX_REF_MV_SERCH][REF_FRAMES];
+  int skip_motion_mode;
 } HandleInterModeArgs;
 
 static INLINE int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv,
@@ -8409,6 +8410,7 @@
   RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
   MB_MODE_INFO base_mbmi, best_mbmi;
   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
+
   int interintra_allowed = cm->seq_params.enable_interintra_compound &&
                            is_interintra_allowed(mbmi) && mbmi->compound_idx;
   int pts0[SAMPLES_ARRAY_SIZE], pts_inref0[SAMPLES_ARRAY_SIZE];
@@ -8419,11 +8421,11 @@
   av1_invalid_rd_stats(&best_rd_stats);
   aom_clear_system_state();
   mbmi->num_proj_ref = 1;  // assume num_proj_ref >=1
-  MOTION_MODE last_motion_mode_allowed =
-      cm->switchable_motion_mode
-          ? motion_mode_allowed(xd->global_motion, xd, mbmi,
-                                cm->allow_warped_motion)
-          : SIMPLE_TRANSLATION;
+  MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
+  if (cm->switchable_motion_mode) {
+    last_motion_mode_allowed = motion_mode_allowed(xd->global_motion, xd, mbmi,
+                                                   cm->allow_warped_motion);
+  }
   if (last_motion_mode_allowed == WARPED_CAUSAL) {
     mbmi->num_proj_ref = findSamples(cm, xd, mi_row, mi_col, pts0, pts_inref0);
   }
@@ -8439,6 +8441,7 @@
   for (int mode_index = (int)SIMPLE_TRANSLATION;
        mode_index <= (int)last_motion_mode_allowed + interintra_allowed;
        mode_index++) {
+    if (args->skip_motion_mode && mode_index) continue;
     int64_t tmp_rd = INT64_MAX;
     int tmp_rate2 = rate2_nocoeff;
     int is_interintra_mode = mode_index > (int)last_motion_mode_allowed;
@@ -10113,9 +10116,20 @@
     mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
     if (cpi->ref_frame_flags & ref_frame_flag_list[ref_frame]) {
       if (mbmi->partition != PARTITION_NONE &&
-          mbmi->partition != PARTITION_SPLIT &&
-          (skip_ref_frame_mask & (1 << ref_frame))) {
-        continue;
+          mbmi->partition != PARTITION_SPLIT) {
+        if (skip_ref_frame_mask & (1 << ref_frame)) {
+          int skip = 1;
+          for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
+            if (!(skip_ref_frame_mask & (1 << r))) {
+              const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
+              if (rf[0] == ref_frame || rf[1] == ref_frame) {
+                skip = 0;
+                break;
+              }
+            }
+          }
+          if (skip) continue;
+        }
       }
       assert(get_ref_frame_buffer(cpi, ref_frame) != NULL);
       setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
@@ -10131,10 +10145,10 @@
           (cpi->ref_frame_flags & ref_frame_flag_list[rf[1]]))) {
       continue;
     }
+
     if (mbmi->partition != PARTITION_NONE &&
         mbmi->partition != PARTITION_SPLIT) {
-      if ((skip_ref_frame_mask & (1 << rf[0])) ||
-          (skip_ref_frame_mask & (1 << rf[1]))) {
+      if (skip_ref_frame_mask & (1 << ref_frame)) {
         continue;
       }
     }
@@ -10448,6 +10462,9 @@
   av1_zero(search_state->single_state_modelled_cnt);
 }
 
+// Case 1: return 0, means don't skip this mode
+// Case 2: return 1, means skip this mode completely
+// Case 3: return 2, means skip compound only, but still try single motion modes
 static int inter_mode_search_order_independent_skip(
     const AV1_COMP *cpi, const PICK_MODE_CONTEXT *ctx, const MACROBLOCK *x,
     BLOCK_SIZE bsize, int mode_index, int mi_row, int mi_col,
@@ -10460,10 +10477,31 @@
   const unsigned char segment_id = mbmi->segment_id;
   const MV_REFERENCE_FRAME *ref_frame = av1_mode_order[mode_index].ref_frame;
   const PREDICTION_MODE this_mode = av1_mode_order[mode_index].mode;
+  int skip_motion_mode = 0;
   if (mbmi->partition != PARTITION_NONE && mbmi->partition != PARTITION_SPLIT) {
-    if (ctx->skip_ref_frame_mask & (1 << ref_frame[0])) return 1;
-    if (ref_frame[1] > 0 && (ctx->skip_ref_frame_mask & (1 << ref_frame[1])))
-      return 1;
+    const int ref_type = av1_ref_frame_type(ref_frame);
+    int skip_ref = ctx->skip_ref_frame_mask & (1 << ref_type);
+    if (ref_type <= ALTREF_FRAME && skip_ref) {
+      // Since the compound ref modes depends on the motion estimation result of
+      // two single ref modes( best mv of single ref modes as the start point )
+      // If current single ref mode is marked skip, we need to check if it will
+      // be used in compound ref modes.
+      for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
+        if (!(ctx->skip_ref_frame_mask & (1 << r))) {
+          const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
+          if (rf[0] == ref_type || rf[1] == ref_type) {
+            // Found a not skipped compound ref mode which contains current
+            // single ref. So this single ref can't be skipped completly
+            // Just skip it's motion mode search, still try it's simple
+            // transition mode.
+            skip_motion_mode = 1;
+            skip_ref = 0;
+            break;
+          }
+        }
+      }
+    }
+    if (skip_ref) return 1;
   }
 
   if (cpi->sf.mode_pruning_based_on_two_pass_partition_search &&
@@ -10580,6 +10618,9 @@
   if (skip_repeated_mv(cm, x, this_mode, ref_frame)) {
     return 1;
   }
+  if (skip_motion_mode) {
+    return 2;
+  }
   return 0;
 }
 
@@ -11116,7 +11157,8 @@
     NULL,      NULL,
     NULL,      NULL,
     { { 0 } }, INT_MAX,
-    INT_MAX,   NULL
+    INT_MAX,   NULL,
+    0
   };
   for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
 
@@ -11170,12 +11212,11 @@
       analyze_single_states(cpi, &search_state);
       reach_first_comp_mode = 1;
     }
-
-    if (inter_mode_search_order_independent_skip(cpi, ctx, x, bsize, mode_index,
-                                                 mi_row, mi_col, mode_skip_mask,
-                                                 ref_frame_skip_mask))
-      continue;
-
+    const int ret = inter_mode_search_order_independent_skip(
+        cpi, ctx, x, bsize, mode_index, mi_row, mi_col, mode_skip_mask,
+        ref_frame_skip_mask);
+    if (ret == 1) continue;
+    args.skip_motion_mode = (ret == 2);
     if (sf->prune_comp_search_by_single_result > 0 &&
         second_ref_frame > INTRA_FRAME) {
       if (compound_skip_by_single_states(cpi, &search_state, this_mode,