rtc: Allow golden-ref based on block sad

For the aggressive ref frame pruning mode,
nonrd_prune_ref_frame_search >= 3 (speed >= 10):
the golden ref is often completely skipped, but in
some cases kept based on superblock sad.
Here we add condition based on the (already computed)
sad of coding block, before mode testing, from the
find_predictors() (sad of nearest/nearmv for last-ref),
to make a local decision to search golden ref.

The old logic based on superblock sad is removed in
favor of the new one based on block sad. The threshold
is choosen to be conservative to avoid slowdown.

This is generally useful for uncovered areas, and shows
visual improvement on face after hand moving
(desktop2_180p).

Stats changed for speed 10 (overall neutral)
             avg_psnr/ovr_psnr/ssim, IC%
rtc_derf:     -0.46/-0.55/-0.55, -0.49

bdrate gains of ~1-2% on desktop2, desktopqvga and jimredvga.

Neutral change for rtc set:
rtc:  -0.08/-0.1i0/-0.13, 0.02

Disabled for screen.
Change-Id: I57056d3207fba333783ec9686aa25fa34f1f8847
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index d696d18..7267f9e 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -2355,14 +2355,6 @@
        (x->nonrd_prune_ref_frame_search > 1 && bsize > BLOCK_64X64))) {
     use_golden_ref_frame = 0;
     use_alt_ref_frame = 0;
-    // Keep golden (longer-term) reference if sb has high source sad, for
-    // frames whose average source_sad is below threshold. This is to try to
-    // capture case where only part of frame has high motion.
-    // Exclude screen content mode.
-    if (cpi->oxcf.tune_cfg.content != AOM_CONTENT_SCREEN &&
-        x->content_state_sb.source_sad_nonrd >= kHighSad &&
-        bsize <= BLOCK_32X32 && cpi->rc.frame_source_sad < 50000)
-      use_golden_ref_frame = 1;
   }
 
   if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) &&
@@ -2384,6 +2376,22 @@
       (x->color_sensitivity_sb_g[0] == 1 || x->color_sensitivity_sb_g[1] == 1))
     use_golden_ref_frame = 0;
 
+  // For non-screen: if golden and altref are not being selected as references
+  // (use_golden_ref_frame/use_alt_ref_frame = 0) check to allow golden back
+  // based on the sad of nearest/nearmv of LAST ref. If this block sad is large,
+  // keep golden as reference. Only do this for the agrressive pruning mode and
+  // avoid it when color is set for golden reference.
+  if (cpi->oxcf.tune_cfg.content != AOM_CONTENT_SCREEN &&
+      (cpi->ref_frame_flags & AOM_LAST_FLAG) && !use_golden_ref_frame &&
+      !use_alt_ref_frame && x->pred_mv_sad[LAST_FRAME] != INT_MAX &&
+      x->nonrd_prune_ref_frame_search > 2 &&
+      x->color_sensitivity_sb_g[0] == 0 && x->color_sensitivity_sb_g[1] == 0) {
+    int thr = (cm->width * cm->height >= 640 * 360) ? 100 : 150;
+    int pred = x->pred_mv_sad[LAST_FRAME] >>
+               (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
+    if (pred > thr) use_golden_ref_frame = 1;
+  }
+
   use_alt_ref_frame =
       cpi->ref_frame_flags & AOM_ALT_FLAG ? use_alt_ref_frame : 0;
   use_golden_ref_frame =
@@ -3144,6 +3152,10 @@
     svc_mv_row = -4;
   }
 
+  if (cpi->ref_frame_flags & AOM_LAST_FLAG)
+    find_predictors(cpi, x, LAST_FRAME, frame_mv, tile_data, yv12_mb, bsize,
+                    force_skip_low_temp_var, x->force_zeromv_skip_for_blk);
+
   get_ref_frame_use_mask(cpi, x, mi, mi_row, mi_col, bsize, gf_temporal_ref,
                          use_ref_frame_mask, &force_skip_low_temp_var);
 
@@ -3163,7 +3175,8 @@
     tot_num_comp_modes = 0;
   }
 
-  for (MV_REFERENCE_FRAME ref_frame_iter = LAST_FRAME;
+  // Start at LAST_FRAME + 1.
+  for (MV_REFERENCE_FRAME ref_frame_iter = LAST_FRAME + 1;
        ref_frame_iter <= ALTREF_FRAME; ++ref_frame_iter) {
     if (use_ref_frame_mask[ref_frame_iter]) {
       find_predictors(cpi, x, ref_frame_iter, frame_mv, tile_data, yv12_mb,
@@ -3171,10 +3184,12 @@
     }
   }
 
-  thresh_sad_pred = ((int64_t)x->pred_mv_sad[LAST_FRAME]) << 1;
-  // Increase threshold for less aggressive pruning.
-  if (cpi->sf.rt_sf.nonrd_prune_ref_frame_search == 1)
-    thresh_sad_pred += (x->pred_mv_sad[LAST_FRAME] >> 2);
+  if (x->pred_mv_sad[LAST_FRAME] != INT_MAX) {
+    thresh_sad_pred = ((int64_t)x->pred_mv_sad[LAST_FRAME]) << 1;
+    // Increase threshold for less aggressive pruning.
+    if (cpi->sf.rt_sf.nonrd_prune_ref_frame_search == 1)
+      thresh_sad_pred += (x->pred_mv_sad[LAST_FRAME] >> 2);
+  }
 
   const int large_block = bsize >= BLOCK_32X32;
   const int use_model_yrd_large =