Skip calculation related to U, V plane in tpl

Added a speed feature which skips the rate and
distortion calculation of U and V components during
tpl model estimation. This speed feature is enabled
for speed 5 and 6.

          Instruction Count        BD-Rate Loss(%)
cpu-used    Reduction(%)     avg.psnr  ovr.psnr   ssim
   5          1.403           0.0917	0.0842	 -0.0917
   6          0.978           0.0807    0.0687   -0.0379

STATS_CHANGED

Change-Id: Idc5fd3292ecd619655be150c4a4cec6b99e36e1d
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 907fcea..9b275fc 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1006,6 +1006,7 @@
     sf->mv_sf.prune_mesh_search = 1;
 
     sf->tpl_sf.prune_starting_mv = 3;
+    sf->tpl_sf.use_y_only_rate_distortion = 1;
 
     sf->winner_mode_sf.dc_blk_pred_level = 1;
   }
@@ -1386,6 +1387,7 @@
   tpl_sf->disable_filtered_key_tpl = 0;
   tpl_sf->prune_ref_frames_in_tpl = 0;
   tpl_sf->allow_compound_pred = 1;
+  tpl_sf->use_y_only_rate_distortion = 0;
 }
 
 static AOM_INLINE void init_gm_sf(GLOBAL_MOTION_SPEED_FEATURES *gm_sf) {
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 4a7d024..504421c 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -374,6 +374,9 @@
 
   // Support compound predictions.
   int allow_compound_pred;
+
+  // Calculate rate and distortion based on Y plane only.
+  int use_y_only_rate_distortion;
 } TPL_SPEED_FEATURES;
 
 typedef struct GLOBAL_MOTION_SPEED_FEATURES {
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index 38b70c7..e1500ec 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -229,12 +229,13 @@
     tran_low_t *qcoeff, tran_low_t *dqcoeff, AV1_COMMON *cm, MACROBLOCK *x,
     const YV12_BUFFER_CONFIG *ref_frame_ptr[2], uint8_t *rec_buffer_pool[3],
     const int rec_stride_pool[3], TX_SIZE tx_size, PREDICTION_MODE best_mode,
-    int mi_row, int mi_col) {
+    int mi_row, int mi_col, int use_y_only_rate_distortion) {
   *rate_cost = 0;
   *recon_error = 1;
 
   MACROBLOCKD *xd = &x->e_mbd;
   int is_compound = (best_mode == NEW_NEWMV);
+  int num_planes = use_y_only_rate_distortion ? 1 : MAX_MB_PLANE;
 
   uint8_t *src_buffer_pool[MAX_MB_PLANE] = {
     xd->cur_buf->y_buffer,
@@ -250,7 +251,7 @@
   const int_interpfilters kernel =
       av1_broadcast_interp_filter(EIGHTTAP_REGULAR);
 
-  for (int plane = 0; plane < MAX_MB_PLANE; ++plane) {
+  for (int plane = 0; plane < num_planes; ++plane) {
     struct macroblockd_plane *pd = &xd->plane[plane];
     BLOCK_SIZE bsize_plane =
         ss_size_lookup[txsize_to_bsize[tx_size]][pd->subsampling_x]
@@ -348,6 +349,7 @@
       mi_row * MI_SIZE * tpl_frame->rec_picture->y_stride + mi_col * MI_SIZE;
   uint8_t *dst_buffer = tpl_frame->rec_picture->y_buffer + dst_mb_offset;
   int dst_buffer_stride = tpl_frame->rec_picture->y_stride;
+  int use_y_only_rate_distortion = cpi->sf.tpl_sf.use_y_only_rate_distortion;
 
   uint8_t *rec_buffer_pool[3] = {
     tpl_frame->rec_picture->y_buffer,
@@ -668,7 +670,8 @@
     int rate_cost = 1;
     get_rate_distortion(&rate_cost, &recon_error, src_diff, coeff, qcoeff,
                         dqcoeff, cm, x, ref_frame_ptr, rec_buffer_pool,
-                        rec_stride_pool, tx_size, best_mode, mi_row, mi_col);
+                        rec_stride_pool, tx_size, best_mode, mi_row, mi_col,
+                        use_y_only_rate_distortion);
     tpl_stats->srcrf_rate = rate_cost << TPL_DEP_COST_SCALE_LOG2;
   }
 
@@ -693,7 +696,8 @@
           : NULL;
   get_rate_distortion(&rate_cost, &recon_error, src_diff, coeff, qcoeff,
                       dqcoeff, cm, x, ref_frame_ptr, rec_buffer_pool,
-                      rec_stride_pool, tx_size, best_mode, mi_row, mi_col);
+                      rec_stride_pool, tx_size, best_mode, mi_row, mi_col,
+                      use_y_only_rate_distortion);
 
   tpl_stats->recrf_dist = recon_error << (TPL_DEP_COST_SCALE_LOG2);
   tpl_stats->recrf_rate = rate_cost << TPL_DEP_COST_SCALE_LOG2;
@@ -711,7 +715,8 @@
         tpl_data->src_ref_frame[comp_ref_frames[best_cmp_rf_idx][1]];
     get_rate_distortion(&rate_cost, &recon_error, src_diff, coeff, qcoeff,
                         dqcoeff, cm, x, ref_frame_ptr, rec_buffer_pool,
-                        rec_stride_pool, tx_size, best_mode, mi_row, mi_col);
+                        rec_stride_pool, tx_size, best_mode, mi_row, mi_col,
+                        use_y_only_rate_distortion);
     tpl_stats->cmp_recrf_dist[0] = recon_error << TPL_DEP_COST_SCALE_LOG2;
     tpl_stats->cmp_recrf_rate[0] = rate_cost << TPL_DEP_COST_SCALE_LOG2;
 
@@ -731,7 +736,8 @@
     ref_frame_ptr[1] = tpl_data->ref_frame[comp_ref_frames[best_cmp_rf_idx][1]];
     get_rate_distortion(&rate_cost, &recon_error, src_diff, coeff, qcoeff,
                         dqcoeff, cm, x, ref_frame_ptr, rec_buffer_pool,
-                        rec_stride_pool, tx_size, best_mode, mi_row, mi_col);
+                        rec_stride_pool, tx_size, best_mode, mi_row, mi_col,
+                        use_y_only_rate_distortion);
     tpl_stats->cmp_recrf_dist[1] = recon_error << TPL_DEP_COST_SCALE_LOG2;
     tpl_stats->cmp_recrf_rate[1] = rate_cost << TPL_DEP_COST_SCALE_LOG2;