AV1 RT: Add color to RDCost

7% BDRate improvement in UV PSNR. On some clips the improvement of UV
BDRate is 20-30%

Change-Id: Id981340dc4e2f123545362244935c1d2b2198897
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index 387537d..850b6c5 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -968,6 +968,56 @@
   }
 }
 
+static void model_rd_for_sb_uv(AV1_COMP *cpi, BLOCK_SIZE plane_bsize,
+                               MACROBLOCK *x, MACROBLOCKD *xd,
+                               RD_STATS *this_rdc, unsigned int *var_y,
+                               unsigned int *sse_y, int start_plane,
+                               int stop_plane) {
+  // Note our transform coeffs are 8 times an orthogonal transform.
+  // Hence quantizer step is also 8 times. To get effective quantizer
+  // we need to divide by 8 before sending to modeling function.
+  unsigned int sse;
+  int rate;
+  int64_t dist;
+  int i;
+  uint32_t tot_var = *var_y;
+  uint32_t tot_sse = *sse_y;
+
+  this_rdc->rate = 0;
+  this_rdc->dist = 0;
+
+  for (i = start_plane; i <= stop_plane; ++i) {
+    struct macroblock_plane *const p = &x->plane[i];
+    struct macroblockd_plane *const pd = &xd->plane[i];
+    const uint32_t dc_quant = p->dequant_QTX[0];
+    const uint32_t ac_quant = p->dequant_QTX[1];
+    const BLOCK_SIZE bs = plane_bsize;
+    unsigned int var;
+    if (!x->color_sensitivity[i - 1]) continue;
+
+    var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
+                             pd->dst.stride, &sse);
+    assert(sse >= var);
+    tot_var += var;
+    tot_sse += sse;
+
+    av1_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bs],
+                                 dc_quant >> 3, &rate, &dist);
+
+    this_rdc->rate += rate >> 1;
+    this_rdc->dist += dist << 3;
+
+    av1_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bs], ac_quant >> 3,
+                                 &rate, &dist);
+
+    this_rdc->rate += rate;
+    this_rdc->dist += dist << 4;
+  }
+
+  *var_y = tot_var;
+  *sse_y = tot_sse;
+}
+
 struct estimate_block_intra_args {
   AV1_COMP *cpi;
   MACROBLOCK *x;
@@ -994,20 +1044,21 @@
   RD_STATS this_rdc;
 
   (void)block;
-  (void)plane_bsize;
-  assert(plane == 0);
 
   p->src.buf = &src_buf_base[4 * (row * src_stride + col)];
   pd->dst.buf = &dst_buf_base[4 * (row * dst_stride + col)];
 
-  av1_predict_intra_block_facade(cm, xd, 0, col, row, tx_size);
+  av1_predict_intra_block_facade(cm, xd, plane, col, row, tx_size);
 
   if (plane == 0) {
     int64_t this_sse = INT64_MAX;
     block_yrd(cpi, x, 0, 0, &this_rdc, &args->skippable, &this_sse, bsize_tx,
               AOMMIN(tx_size, TX_16X16));
   } else {
-    return;
+    unsigned int var = 0;
+    unsigned int sse = 0;
+    model_rd_for_sb_uv(cpi, plane_bsize, x, xd, &this_rdc, &var, &sse, plane,
+                       plane);
   }
 
   p->src.buf = src_buf_base;
@@ -1443,6 +1494,11 @@
       }
     }
 
+    if (x->color_sensitivity[0] || x->color_sensitivity[1]) {
+      this_early_term = 0;
+      this_rdc.skip = 0;
+    }
+
     if (ref_frame == LAST_FRAME && frame_mv[this_mode][ref_frame].as_int == 0) {
       sse_zeromv_norm =
           sse_y >> (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
@@ -1487,6 +1543,24 @@
       this_rdc.dist = sse_y << 4;
     }
 
+    if (!this_early_term &&
+        (x->color_sensitivity[0] || x->color_sensitivity[1])) {
+      RD_STATS rdc_uv;
+      const BLOCK_SIZE uv_bsize = get_plane_block_size(
+          bsize, xd->plane[1].subsampling_x, xd->plane[1].subsampling_y);
+      if (x->color_sensitivity[0]) {
+        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
+                                      AOM_PLANE_U, AOM_PLANE_U);
+      }
+      if (x->color_sensitivity[1]) {
+        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
+                                      AOM_PLANE_V, AOM_PLANE_V);
+      }
+      model_rd_for_sb_uv(cpi, uv_bsize, x, xd, &rdc_uv, &var_y, &sse_y, 1, 2);
+      this_rdc.rate += rdc_uv.rate;
+      this_rdc.dist += rdc_uv.dist;
+    }
+
     // TODO(kyslov) account for UV prediction cost
     this_rdc.rate += rate_mv;
     const int16_t mode_ctx =
@@ -1572,6 +1646,8 @@
                               rd_thresh_freq_fact[mode_index])) {
         continue;
       }
+      const BLOCK_SIZE uv_bsize = get_plane_block_size(
+          bsize, xd->plane[1].subsampling_x, xd->plane[1].subsampling_y);
 
       mi->mode = this_mode;
       mi->ref_frame[0] = INTRA_FRAME;
@@ -1585,6 +1661,13 @@
       av1_foreach_transformed_block_in_plane(xd, bsize, 0, estimate_block_intra,
                                              &args);
       // TODO(kyslov@) Need to account for skippable
+      if (x->color_sensitivity[0])
+        av1_foreach_transformed_block_in_plane(xd, uv_bsize, 1,
+                                               estimate_block_intra, &args);
+      if (x->color_sensitivity[1])
+        av1_foreach_transformed_block_in_plane(xd, uv_bsize, 2,
+                                               estimate_block_intra, &args);
+
       int mode_cost = 0;
       if (av1_is_directional_mode(this_mode) && av1_use_angle_delta(bsize)) {
         mode_cost += x->angle_delta_cost[this_mode - V_PRED]