Speed up inter frame rate-distortion optimization

The frame marker system supports one to map the reference frame
index into the natural order. It allows direct checking on the
efficacy of the reference frames given their relative locations
with respect to the current coding frame.

This commit uses such property to filter out reference frames
less likely to contribute coding gains from the rate-distortion
optimization process. For example, it takes out the check on
last2 / 3 frames, when their actual location is further away
from the golden frame.

The AWCY results show 0.6% performance regression. The encoding
speed gets doubled.

To use the speed up, one needs to turn on frame-marker experiment
before we turn it on by default, and enable selective_ref_frame
entry in the speed feature.

Change-Id: Ifb03ed90acd980bbc7ff1c2e17982e21e68d2588
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
index 46c00d0..50b4904 100644
--- a/av1/common/onyxc_int.h
+++ b/av1/common/onyxc_int.h
@@ -122,14 +122,14 @@
   int ref_count;
 
 #if CONFIG_FRAME_MARKER
-  int cur_frame_offset;
-  int lst_frame_offset;
-  int alt_frame_offset;
-  int gld_frame_offset;
-  int lst2_frame_offset;
-  int lst3_frame_offset;
-  int bwd_frame_offset;
-  int alt2_frame_offset;
+  unsigned int cur_frame_offset;
+  unsigned int lst_frame_offset;
+  unsigned int alt_frame_offset;
+  unsigned int gld_frame_offset;
+  unsigned int lst2_frame_offset;
+  unsigned int lst3_frame_offset;
+  unsigned int bwd_frame_offset;
+  unsigned int alt2_frame_offset;
 #endif  // CONFIG_FRAME_MARKER
 
   MV_REF *mvs;
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 76fc5fa..174d7cf 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -9947,6 +9947,25 @@
     mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
 #endif  // CONFIG_INTERINTRA
 
+#if CONFIG_FRAME_MARKER
+    if (sf->selective_ref_frame) {
+      if (mbmi->ref_frame[0] == ALTREF2_FRAME ||
+          mbmi->ref_frame[1] == ALTREF2_FRAME)
+        if (cm->cur_frame->alt2_frame_offset < cm->frame_offset) continue;
+      if (mbmi->ref_frame[0] == BWDREF_FRAME ||
+          mbmi->ref_frame[1] == BWDREF_FRAME)
+        if (cm->cur_frame->bwd_frame_offset < cm->frame_offset) continue;
+      if (mbmi->ref_frame[0] == LAST3_FRAME ||
+          mbmi->ref_frame[1] == LAST3_FRAME)
+        if (cm->cur_frame->lst3_frame_offset <= cm->cur_frame->gld_frame_offset)
+          continue;
+      if (mbmi->ref_frame[0] == LAST2_FRAME ||
+          mbmi->ref_frame[1] == LAST2_FRAME)
+        if (cm->cur_frame->lst2_frame_offset <= cm->cur_frame->gld_frame_offset)
+          continue;
+    }
+#endif
+
     if (ref_frame == INTRA_FRAME) {
       RD_STATS rd_stats_y;
       TX_SIZE uv_tx;
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 85fa1ae..bfdda60 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -151,6 +151,8 @@
   }
 
   if (speed >= 2) {
+    sf->selective_ref_frame = 1;
+
     if ((cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ||
         av1_internal_image_edge(cpi)) {
       sf->use_square_partition_only = !frame_is_boosted(cpi);
@@ -398,6 +400,7 @@
   sf->tx_type_search.use_skip_flag_prediction = 1;
   sf->tx_type_search.fast_intra_tx_type_search = 0;
   sf->tx_type_search.fast_inter_tx_type_search = 0;
+  sf->selective_ref_frame = 0;
   sf->less_rectangular_check = 0;
   sf->use_square_partition_only = 0;
   sf->auto_min_max_partition_size = NOT_IN_USE;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index d2f2e3a..9397898 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -324,6 +324,9 @@
   // Used if partition_search_type = FIXED_SIZE_PARTITION
   BLOCK_SIZE always_this_block_size;
 
+  // Drop less likely picked reference frames in the RD search
+  int selective_ref_frame;
+
   // Skip rectangular partition test when partition type none gives better
   // rd than partition type split.
   int less_rectangular_check;