rtc: Add golden as possible reference in variance part

For variance-based partition and nonrd pickmode:
check if golden reference is better as reference
for variance-based partition, and if so, avoid the
reduced search/testing  on golden reference in
nonrd_pickmode.

Add frame level speed feature to enable/disable
reduced golden search/testing in nonrd_pickmode.
Enable it only at speed 8.

Avg bdrate gain for speed 8 rtc set:
~0.5%, several clips over 1%. Speed loss of ~5%
measured on clip (720p) with max bdrate gain.

Change-Id: I4364286356c50f4663f1550dc0caf477671fd9b9
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 595f32e..f86fcc6 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -459,6 +459,7 @@
   // Strong color activity detection. Used in REALTIME coding mode to enhance
   // the visual quality at the boundary of moving color objects.
   uint8_t color_sensitivity[2];
+  int nonrd_reduce_golden_mode_search;
 
   // Used to control the tx size search evaluation for mode processing
   // (normal/winner mode)
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index 3c58d50..99d8ecb 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -1488,7 +1488,8 @@
     usable_ref_frame = LAST_FRAME;
   }
 
-  if (cpi->sf.short_circuit_low_temp_var) {
+  if (cpi->sf.short_circuit_low_temp_var &&
+      x->nonrd_reduce_golden_mode_search) {
     force_skip_low_temp_var =
         get_force_skip_low_temp_var(&x->variance_low[0], mi_row, mi_col, bsize);
     // If force_skip_low_temp_var is set, and for short circuit mode = 1 and 3,
@@ -1594,11 +1595,14 @@
 
     if (const_motion[ref_frame] && this_mode == NEARMV) continue;
 
-    if (ref_frame != LAST_FRAME &&
-        (bsize > BLOCK_64X64 || (bsize > BLOCK_16X16 && this_mode == NEWMV)))
-      continue;
+    // Skip testing golden if this flag is set.
+    if (x->nonrd_reduce_golden_mode_search) {
+      if (ref_frame != LAST_FRAME &&
+          (bsize > BLOCK_64X64 || (bsize > BLOCK_16X16 && this_mode == NEWMV)))
+        continue;
 
-    if (ref_frame != LAST_FRAME && this_mode == NEARMV) continue;
+      if (ref_frame != LAST_FRAME && this_mode == NEARMV) continue;
+    }
 
     // Skip non-zeromv mode search for golden frame if force_skip_low_temp_var
     // is set. If nearestmv for golden frame is 0, zeromv mode will be skipped
@@ -1728,7 +1732,7 @@
 #endif
     if (cpi->sf.use_nonrd_filter_search &&
         ((mi->mv[0].as_mv.row & 0x07) || (mi->mv[0].as_mv.col & 0x07)) &&
-        ref_frame == LAST_FRAME) {
+        (ref_frame == LAST_FRAME || !x->nonrd_reduce_golden_mode_search)) {
       search_filter_ref(cpi, x, &this_rdc, mi_row, mi_col, tmp, bsize,
                         reuse_inter_pred, &this_mode_pred, &var_y, &sse_y,
                         &this_early_term, use_model_yrd_large, &this_sse,
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index bc3e5d5..e7d8c53 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -502,6 +502,7 @@
   sf->check_intra_pred_nonrd = 1;
   sf->use_nonrd_filter_search = 1;
   sf->nonrd_use_blockyrd_interp_filter = 0;
+  sf->nonrd_reduce_golden_mode_search = 0;
 
   if (speed >= 1) {
     sf->gm_erroradv_type = GM_ERRORADV_TR_1;
@@ -693,6 +694,7 @@
     sf->short_circuit_low_temp_var = 1;
     sf->reuse_inter_pred_nonrd = 1;
     sf->max_intra_bsize = BLOCK_32X32;
+    sf->nonrd_reduce_golden_mode_search = 1;
     // This gives ~2% bdrate improvement but with 5-10% slowdown.
     // sf->nonrd_use_blockyrd_interp_filter = 1;
 
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 1345ebf..84b141a 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -810,6 +810,9 @@
   int num_inter_modes_for_tx_search;
 
   int nonrd_merge_partition;
+
+  // For nonrd: Reduces golden mode search/testing in nonrd pickmode.
+  int nonrd_reduce_golden_mode_search;
 } SPEED_FEATURES;
 
 struct AV1_COMP;
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c
index b25e7c1..0749f7f 100644
--- a/av1/encoder/var_based_part.c
+++ b/av1/encoder/var_based_part.c
@@ -629,6 +629,7 @@
   const int num_64x64_blocks = is_small_sb ? 1 : 4;
 
   unsigned int y_sad = UINT_MAX;
+  unsigned int y_sad_g = UINT_MAX;
   BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128;
 
   // Ref frame used in partitioning.
@@ -680,8 +681,23 @@
     // is!!
     MB_MODE_INFO *mi = xd->mi[0];
     const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
-
     assert(yv12 != NULL);
+    const YV12_BUFFER_CONFIG *yv12_g = NULL;
+
+    // For non-SVC GOLDEN is another temporal reference. Check if it should be
+    // used as reference for partitioning.
+    if (!cpi->use_svc && (cpi->ref_frame_flags & AOM_GOLD_FLAG) &&
+        cpi->sf.use_fast_nonrd_pick_mode) {
+      yv12_g = get_ref_frame_yv12_buf(cm, GOLDEN_FRAME);
+      if (yv12_g && yv12_g != yv12) {
+        av1_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
+                             get_ref_scale_factors(cm, GOLDEN_FRAME),
+                             num_planes);
+        y_sad_g = cpi->fn_ptr[bsize].sdf(
+            x->plane[0].src.buf, x->plane[0].src.stride,
+            xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride);
+      }
+    }
 
     av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
                          get_ref_scale_factors(cm, LAST_FRAME), num_planes);
@@ -703,7 +719,22 @@
           xd->plane[0].pre[0].stride);
     }
 
-    x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv;
+    // Pick the ref frame for partitioning, use golden frame only if its
+    // lower sad.
+    if (y_sad_g < 0.9 * y_sad) {
+      av1_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
+                           get_ref_scale_factors(cm, GOLDEN_FRAME), num_planes);
+      mi->ref_frame[0] = GOLDEN_FRAME;
+      mi->mv[0].as_int = 0;
+      y_sad = y_sad_g;
+      ref_frame_partition = GOLDEN_FRAME;
+      x->nonrd_reduce_golden_mode_search = 0;
+    } else {
+      x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv;
+      ref_frame_partition = LAST_FRAME;
+      x->nonrd_reduce_golden_mode_search =
+          cpi->sf.nonrd_reduce_golden_mode_search;
+    }
 
     set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
     av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL,