Enable R-D optimization of qcoeff for winner mode

For speed >= 3, R-D optimization of qcoeff is performed
conservatively during mode evaluation and the same is
enabled always for winner mode

          Encode Time
Preset    Reduction       Quality Loss
  3         1.98%           +0.05%
  4         3.12%           +0.09%

STATS_CHANGED

Change-Id: Iba934b6cfb4e1e45da2a2aec91f8d4d41af356c0
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 6f7a42c..2ea62d1 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -409,6 +409,10 @@
 
   CB_COEFF_BUFFER *cb_coef_buff;
 
+  // Threshold used to decide the applicability of R-D optimization of
+  // quantized coeffs
+  uint32_t coeff_opt_dist_threshold;
+
 #if !CONFIG_REALTIME_ONLY
   int quad_tree_idx;
   int cnn_output_valid;
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 8bb81bf..20aae15 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -662,6 +662,12 @@
     x->edge_strength_y = ei.y;
   }
 
+  // Default initialization of the threshold for R-D optimization of
+  // coefficients for mode decision
+  x->coeff_opt_dist_threshold =
+      get_rd_opt_coeff_thresh(cpi->coeff_opt_dist_threshold,
+                              cpi->sf.enable_winner_mode_for_coeff_opt, 0);
+
   // Save rdmult before it might be changed, so it can be restored later.
   const int orig_rdmult = x->rdmult;
   x->rdmult = cpi->rd.RDMULT;
diff --git a/av1/encoder/rd.h b/av1/encoder/rd.h
index de8e0b3..ad6dace 100644
--- a/av1/encoder/rd.h
+++ b/av1/encoder/rd.h
@@ -487,6 +487,22 @@
   x->errorperbit += (x->errorperbit == 0);
 }
 
+// Get the threshold for R-D optimization of coefficients depending upon mode
+// decision/winner mode processing
+static INLINE uint32_t get_rd_opt_coeff_thresh(
+    uint32_t coeff_opt_dist_threshold, int enable_winner_mode_for_coeff_opt,
+    int is_winner_mode) {
+  uint32_t coeff_opt_thresh = coeff_opt_dist_threshold;
+  // TODO(any): Experiment with coeff_opt_dist_threshold values when
+  // enable_winner_mode_for_coeff_opt is ON
+  // TODO(any): Skip the winner mode processing for blocks with lower residual
+  // energy as R-D optimization of coefficients would have been enabled during
+  // mode decision
+  if (is_winner_mode && enable_winner_mode_for_coeff_opt)
+    coeff_opt_thresh = UINT32_MAX;
+  return coeff_opt_thresh;
+}
+
 void av1_setup_pred_block(const MACROBLOCKD *xd,
                           struct buf_2d dst[MAX_MB_PLANE],
                           const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 3bb5c58..a618589 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -3125,7 +3125,7 @@
   // coeffs. For smaller residuals, coeff optimization would be helpful. For
   // larger residuals, R-D optimization may not be effective.
   // TODO(any): Experiment with variance and mean based thresholds
-  perform_block_coeff_opt = (block_mse_q8 <= cpi->coeff_opt_dist_threshold);
+  perform_block_coeff_opt = (block_mse_q8 <= x->coeff_opt_dist_threshold);
 
   assert(IMPLIES(txk_allowed < TX_TYPES, allowed_tx_mask == 1 << txk_allowed));
 
@@ -4710,6 +4710,11 @@
   else
     x->use_default_intra_tx_type = 0;
 
+  // Get the threshold for R-D optimization of coefficients for mode decision
+  x->coeff_opt_dist_threshold =
+      get_rd_opt_coeff_thresh(cpi->coeff_opt_dist_threshold,
+                              cpi->sf.enable_winner_mode_for_coeff_opt, 0);
+
   MB_MODE_INFO best_mbmi = *mbmi;
   /* Y Search for intra prediction mode */
   for (int mode_idx = INTRA_MODE_START; mode_idx < INTRA_MODE_END; ++mode_idx) {
@@ -4787,10 +4792,18 @@
     }
   }
 
-  // If previous searches use only the default tx type, do an extra search for
-  // the best tx type.
-  if (cpi->sf.tx_type_search.fast_intra_tx_type_search &&
-      !cpi->oxcf.use_intra_default_tx_only) {
+  // If previous searches use only the default tx type/no R-D optimization of
+  // quantized coeffs, do an extra search for the best tx type/better R-D
+  // optimization of quantized coeffs
+  if ((cpi->sf.tx_type_search.fast_intra_tx_type_search &&
+       !cpi->oxcf.use_intra_default_tx_only) ||
+      (cpi->sf.enable_winner_mode_for_coeff_opt &&
+       (cpi->optimize_seg_arr[mbmi->segment_id] != NO_TRELLIS_OPT &&
+        cpi->optimize_seg_arr[mbmi->segment_id] != FINAL_PASS_TRELLIS_OPT))) {
+    // Get the threshold for R-D optimization of coefficients for winner mode
+    x->coeff_opt_dist_threshold =
+        get_rd_opt_coeff_thresh(cpi->coeff_opt_dist_threshold,
+                                cpi->sf.enable_winner_mode_for_coeff_opt, 1);
     *mbmi = best_mbmi;
     x->use_default_intra_tx_type = 0;
     intra_block_yrd(cpi, x, bsize, bmode_costs, &best_rd, rate, rate_tokenonly,
@@ -10839,6 +10852,13 @@
     }
     if (num_planes > 1) {
       max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
+
+      // Get the threshold for R-D optimization of coefficients for mode
+      // decision
+      x->coeff_opt_dist_threshold =
+          get_rd_opt_coeff_thresh(cpi->coeff_opt_dist_threshold,
+                                  cpi->sf.enable_winner_mode_for_coeff_opt, 0);
+
       init_sbuv_mode(mbmi);
       if (!x->skip_chroma_rd)
         rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv,
@@ -11097,7 +11117,10 @@
         !cpi->oxcf.use_inter_dct_only && is_inter_mode(best_mbmode->mode)) ||
        (sf->tx_type_search.fast_intra_tx_type_search &&
         !cpi->oxcf.use_intra_default_tx_only && !cpi->oxcf.use_intra_dct_only &&
-        !is_inter_mode(best_mbmode->mode)))) {
+        !is_inter_mode(best_mbmode->mode)) ||
+       (cpi->sf.enable_winner_mode_for_coeff_opt &&
+        (cpi->optimize_seg_arr[mbmi->segment_id] != NO_TRELLIS_OPT &&
+         cpi->optimize_seg_arr[mbmi->segment_id] != FINAL_PASS_TRELLIS_OPT)))) {
     int skip_blk = 0;
     RD_STATS rd_stats_y, rd_stats_uv;
     const int skip_ctx = av1_get_skip_context(xd);
@@ -11105,6 +11128,11 @@
     x->use_default_inter_tx_type = 0;
     x->use_default_intra_tx_type = 0;
 
+    // Get the threshold for R-D optimization of coefficients for winner mode
+    x->coeff_opt_dist_threshold =
+        get_rd_opt_coeff_thresh(cpi->coeff_opt_dist_threshold,
+                                cpi->sf.enable_winner_mode_for_coeff_opt, 1);
+
     *mbmi = *best_mbmode;
 
     set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
@@ -11487,6 +11515,12 @@
     x->use_default_inter_tx_type = 1;
   else
     x->use_default_inter_tx_type = 0;
+
+  // Get the threshold for R-D optimization of coefficients for mode decision
+  x->coeff_opt_dist_threshold =
+      get_rd_opt_coeff_thresh(cpi->coeff_opt_dist_threshold,
+                              cpi->sf.enable_winner_mode_for_coeff_opt, 0);
+
   if (cpi->sf.skip_repeat_interpolation_filter_search) {
     x->interp_filter_stats_idx[0] = 0;
     x->interp_filter_stats_idx[1] = 0;
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index f08dc2e..84faf6c 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -325,6 +325,11 @@
     sf->prune_comp_type_by_model_rd = boosted ? 0 : 1;
     sf->disable_smooth_intra =
         !frame_is_intra_only(&cpi->common) || (cpi->rc.frames_to_key != 1);
+    sf->perform_coeff_opt = frame_is_intra_only(&cpi->common) ? 2 : 4;
+    // TODO(any): Experiment on the dependency of this speed feature with
+    // use_intra_txb_hash, use_inter_txb_hash and use_mb_rd_hash speed features
+    sf->enable_winner_mode_for_coeff_opt =
+        frame_is_intra_only(&cpi->common) ? 0 : 1;
   }
 
   if (speed >= 4) {
@@ -338,7 +343,8 @@
     sf->adaptive_mode_search = 1;
     sf->alt_ref_search_fp = 1;
     sf->skip_sharp_interp_filter_search = 1;
-    sf->perform_coeff_opt = is_boosted_arf2_bwd_type ? 2 : 4;
+    sf->perform_coeff_opt = 4;
+    sf->enable_winner_mode_for_coeff_opt = 1;
     sf->adaptive_txb_search_level = boosted ? 2 : 3;
   }
 
@@ -777,6 +783,7 @@
   sf->prune_warp_using_wmtype = 0;
   sf->disable_wedge_interintra_search = 0;
   sf->perform_coeff_opt = 0;
+  sf->enable_winner_mode_for_coeff_opt = 0;
   sf->prune_comp_type_by_model_rd = 0;
   sf->disable_smooth_intra = 0;
   sf->perform_best_rd_based_gating_for_chroma = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 4d02d56..c8a86ab 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -667,6 +667,10 @@
   // Flag used to control the extent of coeff R-D optimization
   int perform_coeff_opt;
 
+  // Flag used to control the winner mode processing for better R-D optimization
+  // of quantized coeffs
+  int enable_winner_mode_for_coeff_opt;
+
   // Flag used to control the speed of the eob selection in trellis.
   int trellis_eob_fast;