Extend DC only txfm block prediction to speed 5

This CL extends the logic to predict DC only block
to cpu-used=5.

         Instruction Count       BD-Rate Loss(%)
cpu-used   Reduction(%)    avg.psnr  ovr.psnr  ssim
   5          0.905        0.0420    0.0361    0.0055

STATS_CHANGED

Change-Id: Id3d1202ca948315ac4651e479b4a0b45f330088d
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index c88c8c0..566f277 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -118,8 +118,9 @@
 // Values indicate the aggressiveness of skip flag prediction.
 // 0 : no early DC block prediction
 // 1 : Early DC block prediction based on error variance
-static unsigned int predict_dc_levels[2][MODE_EVAL_TYPES] = { { 0, 0, 0 },
-                                                              { 1, 1, 0 } };
+static unsigned int predict_dc_levels[3][MODE_EVAL_TYPES] = { { 0, 0, 0 },
+                                                              { 1, 1, 0 },
+                                                              { 1, 1, 1 } };
 
 // This table holds the maximum number of reference frames for global motion.
 // The table is indexed as per the speed feature 'gm_search_type'.
@@ -655,6 +656,8 @@
     sf->mv_sf.prune_mesh_search = 1;
 
     sf->tpl_sf.prune_starting_mv = 3;
+
+    sf->winner_mode_sf.dc_blk_pred_level = 1;
   }
 
   if (speed >= 6) {
@@ -675,7 +678,7 @@
 
     sf->rd_sf.perform_coeff_opt = is_boosted_arf2_bwd_type ? 4 : 6;
 
-    sf->winner_mode_sf.enable_dc_only_blk_pred = 1;
+    sf->winner_mode_sf.dc_blk_pred_level = 2;
     sf->winner_mode_sf.multi_winner_mode_type = MULTI_WINNER_MODE_OFF;
   }
 
@@ -1207,7 +1210,7 @@
   winner_mode_sf->enable_winner_mode_for_tx_size_srch = 0;
   winner_mode_sf->enable_winner_mode_for_use_tx_domain_dist = 0;
   winner_mode_sf->multi_winner_mode_type = 0;
-  winner_mode_sf->enable_dc_only_blk_pred = 0;
+  winner_mode_sf->dc_blk_pred_level = 0;
 }
 
 static AOM_INLINE void init_lpf_sf(LOOP_FILTER_SPEED_FEATURES *lpf_sf) {
@@ -1385,7 +1388,7 @@
          tx_size_search_methods[cpi->sf.winner_mode_sf.tx_size_search_level],
          sizeof(winner_mode_params->tx_size_search_methods));
   memcpy(winner_mode_params->predict_dc_level,
-         predict_dc_levels[cpi->sf.winner_mode_sf.enable_dc_only_blk_pred],
+         predict_dc_levels[cpi->sf.winner_mode_sf.dc_blk_pred_level],
          sizeof(winner_mode_params->predict_dc_level));
 
   if (cpi->oxcf.row_mt == 1 && (cpi->oxcf.max_threads > 1)) {
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index a453c84..eac36d6 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -891,7 +891,9 @@
   int motion_mode_for_winner_cand;
 
   // Early DC only txfm block prediction
-  int enable_dc_only_blk_pred;
+  // 0: speed feature OFF
+  // 1 / 2 : Use the configured level for different modes
+  int dc_blk_pred_level;
 } WINNER_MODE_SPEED_FEATURES;
 
 typedef struct LOOP_FILTER_SPEED_FEATURES {
diff --git a/av1/encoder/tx_search.c b/av1/encoder/tx_search.c
index 0b2ca47..37da269 100644
--- a/av1/encoder/tx_search.c
+++ b/av1/encoder/tx_search.c
@@ -2207,10 +2207,7 @@
     get_txb_ctx(plane_bsize, tx_size, plane, ta, tl, &txb_ctx_tmp);
     const int zero_blk_rate = x->coeff_costs.coeff_costs[txs_ctx][plane_type]
                                   .txb_skip_cost[txb_ctx_tmp.txb_skip_ctx][1];
-    best_rd_stats->rate =
-        zero_blk_rate *
-        (block_size_wide[plane_bsize] >> tx_size_wide_log2[tx_size]) *
-        (block_size_high[plane_bsize] >> tx_size_high_log2[tx_size]);
+    best_rd_stats->rate = zero_blk_rate;
 
     best_rd_stats->rdcost =
         RDCOST(x->rdmult, best_rd_stats->rate, best_rd_stats->sse);
@@ -2307,7 +2304,7 @@
   unsigned int block_mse_q8;
   int dc_only_blk = 0;
   const bool predict_dc_block =
-      cpi->sf.winner_mode_sf.enable_dc_only_blk_pred && txw != 64 && txh != 64;
+      txfm_params->predict_dc_level && txw != 64 && txh != 64;
   int64_t per_px_mean = INT64_MAX;
   if (predict_dc_block) {
     predict_dc_only_block(x, plane, plane_bsize, tx_size, block, blk_row,