rtc: Fix to color artifacts under high motion

For RTC: the golden reference is used as longer-term
reference, but the color_sensitivity flags are
only defined relative to the last reference.
This can cause color artifacts for high motion
if golden is selected as the best reference and
has different uv-signal than the source.

The fix here is to define a color_sensitivity for
golden reference at the superblock level, and for
now skip testing the golden reference if this
color flag is set. Logic is also based on source_variance
and content_stats, to avoid any regression from too
much/unneeded skipping.

This reduces color artifacts in the issue below.
To reduce further: logic above on golden-ref can be better
tuned, and defining color_sensitivity for alt-ref would
also be needed, but since alt-ref is closer to last ref,
the impact from that is less.

Stats changed are neutral/small:
               avg-psnr  ovr-pnsr  ssim   IC-speedup
speed 7 derf:   0.03      0.03     0.05   -0.01
speed 9 derf:   0.15      0.18     0.18   -0.15
speed 9 rtc:    0.25      0.23     0.33    0.03
speed 10 rtc:  -0.01     -0.02    -0.03   -0.14
speed 10 screen:0.26      0.09     0.34    0.04

Bug: aomedia:3308
Bug: aomedia:3313

Change-Id: Ie23ab25d8189540ea11d95c46cf9ea4f549c583d
(cherry picked from commit c6a503f4b15da2ecef856f917d08473ea788ae00)
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 46a56a5..0ad118d 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -1261,6 +1261,8 @@
    * of moving color objects.
    */
   uint8_t color_sensitivity_sb[2];
+  //! Color sensitivity flag for the superblock for golden reference.
+  uint8_t color_sensitivity_sb_g[2];
   //! Color sensitivity flag for the coding block.
   uint8_t color_sensitivity[2];
   /**@}*/
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 6a36031..2a395aa 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -887,6 +887,8 @@
     // Reset color coding related parameters
     x->color_sensitivity_sb[0] = 0;
     x->color_sensitivity_sb[1] = 0;
+    x->color_sensitivity_sb_g[0] = 0;
+    x->color_sensitivity_sb_g[1] = 0;
     x->color_sensitivity[0] = 0;
     x->color_sensitivity[1] = 0;
     x->content_state_sb.source_sad_nonrd = kMedSad;
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index f188f0e..58fc07a 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -2081,6 +2081,12 @@
     use_alt_ref_frame = 0;
   }
 
+  // Skip golden reference if color is set, on flat blocks with motion.
+  if (x->source_variance < 500 &&
+      x->content_state_sb.source_sad_nonrd > kLowSad &&
+      (x->color_sensitivity_sb_g[0] == 1 || x->color_sensitivity_sb_g[1] == 1))
+    use_golden_ref_frame = 0;
+
   use_alt_ref_frame =
       cpi->ref_frame_flags & AOM_ALT_FLAG ? use_alt_ref_frame : 0;
   use_golden_ref_frame =
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c
index c5e2edd..ab27d1f 100644
--- a/av1/encoder/var_based_part.c
+++ b/av1/encoder/var_based_part.c
@@ -924,8 +924,8 @@
 
 static AOM_INLINE void chroma_check(AV1_COMP *cpi, MACROBLOCK *x,
                                     BLOCK_SIZE bsize, unsigned int y_sad,
-                                    int is_key_frame, int zero_motion,
-                                    unsigned int *uv_sad) {
+                                    unsigned int y_sad_g, int is_key_frame,
+                                    int zero_motion, unsigned int *uv_sad) {
   int i;
   MACROBLOCKD *xd = &x->e_mbd;
   int shift = 3;
@@ -938,9 +938,11 @@
   MB_MODE_INFO *mi = xd->mi[0];
   const AV1_COMMON *const cm = &cpi->common;
   const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
+  const YV12_BUFFER_CONFIG *yv12_g = get_ref_frame_yv12_buf(cm, GOLDEN_FRAME);
   const struct scale_factors *const sf =
       get_ref_scale_factors_const(cm, LAST_FRAME);
   struct buf_2d dst;
+  unsigned int uv_sad_g = 0;
 
   for (i = 1; i <= 2; ++i) {
     struct macroblock_plane *p = &x->plane[i];
@@ -949,6 +951,7 @@
         get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
 
     if (bs != BLOCK_INVALID) {
+      // For last:
       if (zero_motion) {
         if (mi->ref_frame[0] == LAST_FRAME) {
           uv_sad[i - 1] = cpi->ppi->fn_ptr[bs].sdf(
@@ -963,9 +966,21 @@
           uv_sad[i - 1] = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride,
                                                    dst.buf, dst.stride);
         }
-      } else
+      } else {
         uv_sad[i - 1] = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride,
                                                  pd->dst.buf, pd->dst.stride);
+      }
+
+      // For golden:
+      if (y_sad_g != UINT_MAX) {
+        uint8_t *src = (i == 1) ? yv12_g->u_buffer : yv12_g->v_buffer;
+        setup_pred_plane(&dst, xd->mi[0]->bsize, src, yv12_g->uv_crop_width,
+                         yv12_g->uv_crop_height, yv12_g->uv_stride, xd->mi_row,
+                         xd->mi_col, sf, xd->plane[i].subsampling_x,
+                         xd->plane[i].subsampling_y);
+        uv_sad_g = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, dst.buf,
+                                            dst.stride);
+      }
     }
 
     if (uv_sad[i - 1] > (y_sad >> 1))
@@ -976,6 +991,8 @@
     // for coding block size < sb_size.
     else
       x->color_sensitivity_sb[i - 1] = 2;
+
+    x->color_sensitivity_sb_g[i - 1] = uv_sad_g > y_sad_g / 6;
   }
 }
 
@@ -1313,7 +1330,8 @@
 
   uv_sad[0] = 0;
   uv_sad[1] = 0;
-  chroma_check(cpi, x, bsize, y_sad_last, is_key_frame, zero_motion, uv_sad);
+  chroma_check(cpi, x, bsize, y_sad_last, y_sad_g, is_key_frame, zero_motion,
+               uv_sad);
 
   x->force_zeromv_skip = 0;
   const unsigned int thresh_exit_part =