Enhance selective_ref_frame for use in speed 0

Add an additional level for selective_ref_frame speed feature
for use in speed 0.
In the new level, some of the searches are disabled for certain
references for compound modes.

About 8-9% speed-up with:
+0.061% loss (lowres, 30 frames, end-usage=q)
+0.093% loss (lowres, 60 frames, end-usage=q)
+0.077% loss (midres, 60 frames, end-usage=q)

STATS_CHANGED

Change-Id: Ie842f48ff8adbd0682ac3e5053cae4a28d14358a
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 6ad4ea0..ab72ca9 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -4764,7 +4764,7 @@
         // If there are not enough samples collected, make all available.
         memset(stat->ref0_counts, 0xff, sizeof(stat->ref0_counts));
         memset(stat->ref1_counts, 0xff, sizeof(stat->ref1_counts));
-      } else if (sf->selective_ref_frame < 2) {
+      } else if (sf->selective_ref_frame < 3) {
         // ALTREF2_FRAME and BWDREF_FRAME may be skipped during the
         // initial partition scan, so we don't eliminate them.
         stat->ref0_counts[ALTREF2_FRAME] = 0xff;
@@ -5792,7 +5792,7 @@
   }
 
   av1_setup_frame_buf_refs(cm);
-  if (cpi->sf.selective_ref_frame >= 2) enforce_max_ref_frames(cpi);
+  if (cpi->sf.selective_ref_frame >= 3) enforce_max_ref_frames(cpi);
   av1_setup_frame_sign_bias(cm);
 
 #if CONFIG_MISMATCH_DEBUG
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index ddca999..aa2aea8 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -11115,7 +11115,7 @@
   }
 
   if (sf->selective_ref_frame) {
-    if (sf->selective_ref_frame >= 2 || x->cb_partition_scan) {
+    if (sf->selective_ref_frame >= 3 || x->cb_partition_scan) {
       if (ref_frame[0] == ALTREF2_FRAME || ref_frame[1] == ALTREF2_FRAME)
         if (get_relative_dist(
                 cm, cm->cur_frame->ref_frame_offset[ALTREF2_FRAME - LAST_FRAME],
@@ -11127,20 +11127,26 @@
                 cm->frame_offset) < 0)
           return 1;
     }
-    if (ref_frame[0] == LAST3_FRAME || ref_frame[1] == LAST3_FRAME)
-      if (get_relative_dist(
-              cm, cm->cur_frame->ref_frame_offset[LAST3_FRAME - LAST_FRAME],
-              cm->cur_frame->ref_frame_offset[GOLDEN_FRAME - LAST_FRAME]) <= 0)
-        return 1;
-    if (ref_frame[0] == LAST2_FRAME || ref_frame[1] == LAST2_FRAME)
-      if (get_relative_dist(
-              cm, cm->cur_frame->ref_frame_offset[LAST2_FRAME - LAST_FRAME],
-              cm->cur_frame->ref_frame_offset[GOLDEN_FRAME - LAST_FRAME]) <= 0)
-        return 1;
+
+    if (sf->selective_ref_frame >= 2 ||
+        (sf->selective_ref_frame == 1 && comp_pred)) {
+      if (ref_frame[0] == LAST3_FRAME || ref_frame[1] == LAST3_FRAME)
+        if (get_relative_dist(
+                cm, cm->cur_frame->ref_frame_offset[LAST3_FRAME - LAST_FRAME],
+                cm->cur_frame->ref_frame_offset[GOLDEN_FRAME - LAST_FRAME]) <=
+            0)
+          return 1;
+      if (ref_frame[0] == LAST2_FRAME || ref_frame[1] == LAST2_FRAME)
+        if (get_relative_dist(
+                cm, cm->cur_frame->ref_frame_offset[LAST2_FRAME - LAST_FRAME],
+                cm->cur_frame->ref_frame_offset[GOLDEN_FRAME - LAST_FRAME]) <=
+            0)
+          return 1;
+    }
   }
 
   // One-sided compound is used only when all reference frames are one-sided.
-  if (sf->selective_ref_frame && comp_pred && !cpi->all_one_sided_refs) {
+  if ((sf->selective_ref_frame >= 2) && comp_pred && !cpi->all_one_sided_refs) {
     unsigned int ref_offsets[2];
     for (int i = 0; i < 2; ++i) {
       const int buf_idx = cm->frame_refs[ref_frame[i] - LAST_FRAME].idx;
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 60a910a..f3c5104 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -198,10 +198,12 @@
   sf->use_fast_interpolation_filter_search = 1;
   sf->intra_tx_size_search_init_depth_sqr = 1;
   sf->intra_angle_estimation = 1;
+  sf->selective_ref_frame = 1;
 
   if (speed >= 1) {
     sf->gm_erroradv_type = GM_ERRORADV_TR_1;
-    sf->selective_ref_frame = 1;
+    sf->selective_ref_frame = 2;
+
     sf->inter_tx_size_search_init_depth_rect = 1;
     sf->inter_tx_size_search_init_depth_sqr = 1;
     sf->intra_tx_size_search_init_depth_rect = 1;
@@ -237,7 +239,7 @@
   if (speed >= 2) {
     sf->gm_erroradv_type = GM_ERRORADV_TR_2;
 
-    sf->selective_ref_frame = 2;
+    sf->selective_ref_frame = 3;
     sf->fast_cdef_search = 1;
 
     sf->adaptive_rd_thresh = 1;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 7e850f2..8f0f9a6 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -339,7 +339,7 @@
   BLOCK_SIZE always_this_block_size;
 
   // Drop less likely to be picked reference frames in the RD search.
-  // Has three levels for now: 0, 1 and 2, where higher levels prune more
+  // Has four levels for now: 0, 1, 2 and 3, where higher levels prune more
   // aggressively than lower ones. (0 means no pruning).
   int selective_ref_frame;