Speed up by dropping some ref frames in compound search Record distortion for each single ref in rd. Rank according to their distortions. Then in compound search, drop the combination of ref frames of the largest and second largest distortions This patch shows neutral performance on google test using lowres with 20 frame. Local tests show ~5% speed up over baseline. Change-Id: I722fe66a0551f5f8a044c57c55caa74e46db7ee8

commit: c683bf9b50a5d7ac7d63e98e82abc5b2ba871ee7 [log] [tgz]
author: Cheng Chen <chengchen@google.com> Wed Dec 13 09:21:40 2017 -0800
committer: Cheng Chen <chengchen@google.com> Mon Dec 18 01:06:14 2017 +0000
tree: 121f475b52c34d87957fd8649a487910f5cc1718
parent: a25c0300c4ca70e04db240f96ceae87e97671eed [diff]
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index db583d0..9498087 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c

@@ -9284,6 +9284,12 @@
   int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
   int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
 
+  int64_t dist_refs[TOTAL_REFS_PER_FRAME];
+  int dist_order_refs[TOTAL_REFS_PER_FRAME];
+  int num_available_refs = 0;
+  memset(dist_refs, -1, sizeof(dist_refs));
+  memset(dist_order_refs, -1, sizeof(dist_order_refs));
+
 #if CONFIG_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     int len = sizeof(uint16_t);
@@ -9554,6 +9560,18 @@
     second_ref_frame = av1_mode_order[mode_index].ref_frame[1];
     mbmi->ref_mv_idx = 0;
 
+    if (sf->drop_ref) {
+      if (ref_frame > INTRA_FRAME && second_ref_frame > INTRA_FRAME) {
+        if (num_available_refs > 2) {
+          if ((ref_frame == dist_order_refs[0] &&
+               second_ref_frame == dist_order_refs[1]) ||
+              (ref_frame == dist_order_refs[1] &&
+               second_ref_frame == dist_order_refs[0]))
+            continue;
+        }
+      }
+    }
+
     if (ref_frame > INTRA_FRAME && second_ref_frame == INTRA_FRAME) {
       // Mode must by compatible
       if (!is_interintra_allowed_mode(this_mode)) continue;
@@ -10422,6 +10440,40 @@
         best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
     }
 
+    if (sf->drop_ref) {
+      if (second_ref_frame == NONE_FRAME) {
+        const int idx = ref_frame - LAST_FRAME;
+        if (idx && distortion2 > dist_refs[idx]) {
+          dist_refs[idx] = distortion2;
+          dist_order_refs[idx] = ref_frame;
+        }
+
+        // Reach the last single ref prediction mode
+        if (ref_frame == ALTREF_FRAME && this_mode == GLOBALMV) {
+          // bubble sort dist_refs and the order index
+          for (i = 0; i < TOTAL_REFS_PER_FRAME; ++i) {
+            for (k = i + 1; k < TOTAL_REFS_PER_FRAME; ++k) {
+              if (dist_refs[i] < dist_refs[k]) {
+                int64_t tmp_dist = dist_refs[i];
+                dist_refs[i] = dist_refs[k];
+                dist_refs[k] = tmp_dist;
+
+                int tmp_idx = dist_order_refs[i];
+                dist_order_refs[i] = dist_order_refs[k];
+                dist_order_refs[k] = tmp_idx;
+              }
+            }
+          }
+
+          for (i = 0; i < TOTAL_REFS_PER_FRAME; ++i) {
+            if (dist_refs[i] == -1) break;
+            num_available_refs = i;
+          }
+          num_available_refs++;
+        }
+      }
+    }
+
     if (x->skip && !comp_pred) break;
   }
 

diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 47f6ce7..3635ce1 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c

@@ -465,6 +465,7 @@
   sf->use_upsampled_references = 1;
   sf->disable_wedge_search_var_thresh = 0;
   sf->fast_wedge_sign_estimate = 0;
+  sf->drop_ref = 0;
 
   for (i = 0; i < TX_SIZES; i++) {
     sf->intra_y_mode_mask[i] = INTRA_ALL;

diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index ba1b31f..9b7d3e6 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h

@@ -519,6 +519,9 @@
   // Do limited interpolation filter search for dual filters, since best choice
   // usually includes EIGHTTAP_REGULAR.
   int use_fast_interpolation_filter_search;
+
+  // flag to drop some ref frames in compound motion search
+  int drop_ref;
 } SPEED_FEATURES;
 
 struct AV1_COMP;
commit	c683bf9b50a5d7ac7d63e98e82abc5b2ba871ee7	[log] [tgz]
author	Cheng Chen <chengchen@google.com>	Wed Dec 13 09:21:40 2017 -0800
committer	Cheng Chen <chengchen@google.com>	Mon Dec 18 01:06:14 2017 +0000
tree	121f475b52c34d87957fd8649a487910f5cc1718
parent	a25c0300c4ca70e04db240f96ceae87e97671eed [diff]