Clean ups on coeff optimization

Change-Id: Ia75dded05b6def859a7cb88a8e5e2afd1c06fe70
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 38b7e71..af463a7 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -50,6 +50,13 @@
   unsigned int var;
 } DIFF;
 
+enum {
+  NO_TRELLIS_OPT,          // No trellis optimization
+  FULL_TRELLIS_OPT,        // Trellis optimization in all stages
+  FINAL_PASS_TRELLIS_OPT,  // Trellis optimization in only the final encode pass
+  NO_ESTIMATE_YRD_TRELLIS_OPT  // Disable trellis in estimate_yrd_for_sb
+} UENUM1BYTE(TRELLIS_OPT_TYPE);
+
 typedef struct macroblock_plane {
   DECLARE_ALIGNED(32, int16_t, src_diff[MAX_SB_SQUARE]);
   tran_low_t *qcoeff;
diff --git a/av1/encoder/compound_type.c b/av1/encoder/compound_type.c
index a7e9efc..ac377d8 100644
--- a/av1/encoder/compound_type.c
+++ b/av1/encoder/compound_type.c
@@ -481,8 +481,8 @@
   if (ref_best_rd < 0) return INT64_MAX;
   av1_subtract_plane(x, bs, 0);
   x->rd_model = LOW_TXFM_RD;
-  int skip_trellis = cpi->optimize_seg_arr[xd->mi[0]->segment_id] ==
-                     NO_ESTIMATE_YRD_TRELLIS_OPT;
+  const int skip_trellis = (cpi->optimize_seg_arr[xd->mi[0]->segment_id] ==
+                            NO_ESTIMATE_YRD_TRELLIS_OPT);
   const int64_t rd =
       av1_txfm_yrd(cpi, x, rd_stats, ref_best_rd, bs,
                    max_txsize_rect_lookup[bs], FTXS_NONE, skip_trellis);
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index dff7cd5..5cf1e41 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -5658,7 +5658,7 @@
     if (xd->lossless[i]) cpi->has_lossless_segment = 1;
     xd->qindex[i] = qindex;
     if (xd->lossless[i]) {
-      cpi->optimize_seg_arr[i] = 0;
+      cpi->optimize_seg_arr[i] = NO_TRELLIS_OPT;
     } else {
       cpi->optimize_seg_arr[i] = cpi->sf.rd_sf.optimize_coefficients;
     }
@@ -6236,7 +6236,7 @@
     xd->cfl.store_y = store_cfl_required(cm, xd);
     mbmi->skip = 1;
     for (int plane = 0; plane < num_planes; ++plane) {
-      av1_encode_intra_block_plane(cpi, x, bsize, plane,
+      av1_encode_intra_block_plane(cpi, x, bsize, plane, dry_run,
                                    cpi->optimize_seg_arr[mbmi->segment_id]);
     }
 
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 1dce2ba..d855d2f 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -82,22 +82,22 @@
                      pd->dst.buf, pd->dst.stride);
 }
 
-int av1_optimize_b(const struct AV1_COMP *cpi, MACROBLOCK *mb, int plane,
+int av1_optimize_b(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane,
                    int block, TX_SIZE tx_size, TX_TYPE tx_type,
                    const TXB_CTX *const txb_ctx, int fast_mode,
                    int *rate_cost) {
-  MACROBLOCKD *const xd = &mb->e_mbd;
-  struct macroblock_plane *const p = &mb->plane[plane];
+  MACROBLOCKD *const xd = &x->e_mbd;
+  struct macroblock_plane *const p = &x->plane[plane];
   const int eob = p->eobs[block];
   const int segment_id = xd->mi[0]->segment_id;
 
   if (eob == 0 || !cpi->optimize_seg_arr[segment_id] ||
       xd->lossless[segment_id]) {
-    *rate_cost = av1_cost_skip_txb(mb, txb_ctx, plane, tx_size);
+    *rate_cost = av1_cost_skip_txb(x, txb_ctx, plane, tx_size);
     return eob;
   }
 
-  return av1_optimize_txb_new(cpi, mb, plane, block, tx_size, tx_type, txb_ctx,
+  return av1_optimize_txb_new(cpi, x, plane, block, tx_size, tx_type, txb_ctx,
                               rate_cost, cpi->oxcf.sharpness, fast_mode);
 }
 
@@ -386,14 +386,13 @@
                               cm->features.reduced_tx_set_used);
     TxfmParam txfm_param;
     QUANT_PARAM quant_param;
-    int use_trellis = (args->enable_optimize_b != NO_TRELLIS_OPT);
+    const int use_trellis = is_trellis_used(args->enable_optimize_b, dry_run);
     int quant_idx;
-    if (use_trellis && args->enable_optimize_b != FINAL_PASS_TRELLIS_OPT) {
+    if (use_trellis)
       quant_idx = AV1_XFORM_QUANT_FP;
-    } else {
+    else
       quant_idx =
           USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP;
-    }
     av1_setup_xform(cm, x, tx_size, tx_type, &txfm_param);
     av1_setup_quant(tx_size, use_trellis, quant_idx, cpi->use_quant_b_adapt,
                     &quant_param);
@@ -617,13 +616,10 @@
   if (x->force_skip) return;
 
   struct optimize_ctx ctx;
-  struct encode_b_args arg = { cpi,
-                               x,
-                               &ctx,
-                               &mbmi->skip,
-                               NULL,
-                               NULL,
-                               cpi->optimize_seg_arr[mbmi->segment_id] };
+  struct encode_b_args arg = {
+    cpi,  x,    &ctx,    &mbmi->skip,
+    NULL, NULL, dry_run, cpi->optimize_seg_arr[mbmi->segment_id]
+  };
   const AV1_COMMON *const cm = &cpi->common;
   const int num_planes = av1_num_planes(cm);
   for (int plane = 0; plane < num_planes; ++plane) {
@@ -718,9 +714,10 @@
                               cm->features.reduced_tx_set_used);
     TxfmParam txfm_param;
     QUANT_PARAM quant_param;
-    int use_trellis = args->enable_optimize_b != NO_TRELLIS_OPT;
+    const int use_trellis =
+        is_trellis_used(args->enable_optimize_b, args->dry_run);
     int quant_idx;
-    if (use_trellis && args->enable_optimize_b != FINAL_PASS_TRELLIS_OPT)
+    if (use_trellis)
       quant_idx = AV1_XFORM_QUANT_FP;
     else
       quant_idx =
@@ -792,8 +789,8 @@
 }
 
 void av1_encode_intra_block_plane(const struct AV1_COMP *cpi, MACROBLOCK *x,
-                                  BLOCK_SIZE bsize, int plane,
-                                  int enable_optimize_b) {
+                                  BLOCK_SIZE bsize, int plane, RUN_TYPE dry_run,
+                                  TRELLIS_OPT_TYPE enable_optimize_b) {
   assert(bsize < BLOCK_SIZES_ALL);
   const MACROBLOCKD *const xd = &x->e_mbd;
   if (plane && !xd->is_chroma_ref) return;
@@ -803,9 +800,8 @@
   const int ss_y = pd->subsampling_y;
   ENTROPY_CONTEXT ta[MAX_MIB_SIZE] = { 0 };
   ENTROPY_CONTEXT tl[MAX_MIB_SIZE] = { 0 };
-  struct encode_b_args arg = {
-    cpi, x, NULL, &(xd->mi[0]->skip), ta, tl, enable_optimize_b
-  };
+  struct encode_b_args arg = { cpi, x,  NULL,    &(xd->mi[0]->skip),
+                               ta,  tl, dry_run, enable_optimize_b };
   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ss_x, ss_y);
   if (enable_optimize_b) {
     av1_get_entropy_contexts(plane_bsize, pd, ta, tl);
diff --git a/av1/encoder/encodemb.h b/av1/encoder/encodemb.h
index 02f9d35..2abe8ff 100644
--- a/av1/encoder/encodemb.h
+++ b/av1/encoder/encodemb.h
@@ -34,7 +34,8 @@
   int8_t *skip;
   ENTROPY_CONTEXT *ta;
   ENTROPY_CONTEXT *tl;
-  int8_t enable_optimize_b;
+  RUN_TYPE dry_run;
+  TRELLIS_OPT_TYPE enable_optimize_b;
 };
 
 enum {
@@ -126,9 +127,16 @@
                             BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg);
 
 void av1_encode_intra_block_plane(const struct AV1_COMP *cpi, MACROBLOCK *x,
-                                  BLOCK_SIZE bsize, int plane,
-                                  int enable_optimize_b);
+                                  BLOCK_SIZE bsize, int plane, RUN_TYPE dry_run,
+                                  TRELLIS_OPT_TYPE enable_optimize_b);
 
+static INLINE int is_trellis_used(TRELLIS_OPT_TYPE optimize_b,
+                                  RUN_TYPE dry_run) {
+  if (optimize_b == NO_TRELLIS_OPT) return false;
+  if (optimize_b == FINAL_PASS_TRELLIS_OPT && dry_run != OUTPUT_ENABLED)
+    return false;
+  return true;
+}
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index b6eba2a..79bbe43 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -788,7 +788,7 @@
   struct lookahead_entry *alt_ref_source;
   int no_show_kf;
 
-  int optimize_seg_arr[MAX_SEGMENTS];
+  TRELLIS_OPT_TYPE optimize_seg_arr[MAX_SEGMENTS];
   int mi_ext_alloc_size;
 
   YV12_BUFFER_CONFIG *source;
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index 9183c28..1a32356 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@@ -466,7 +466,7 @@
       xd->mi[0]->mode = DC_PRED;
       xd->mi[0]->tx_size =
           use_dc_pred ? (bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4;
-      av1_encode_intra_block_plane(cpi, x, bsize, 0, 0);
+      av1_encode_intra_block_plane(cpi, x, bsize, 0, DRY_RUN_NORMAL, 0);
       this_intra_error = aom_get_mb_ss(x->plane[0].src_diff);
 
       if (this_intra_error < UL_INTRA_THRESH) {
diff --git a/av1/encoder/intra_mode_search.c b/av1/encoder/intra_mode_search.c
index 0b5fb3e..5a85037 100644
--- a/av1/encoder/intra_mode_search.c
+++ b/av1/encoder/intra_mode_search.c
@@ -1216,6 +1216,7 @@
   if (xd->cfl.store_y) {
     // Restore reconstructed luma values.
     av1_encode_intra_block_plane(cpi, x, mbmi->sb_type, AOM_PLANE_Y,
+                                 DRY_RUN_NORMAL,
                                  cpi->optimize_seg_arr[mbmi->segment_id]);
     xd->cfl.store_y = 0;
   }
@@ -1336,6 +1337,7 @@
   int best_rate_uv[CFL_JOINT_SIGNS][CFL_PRED_PLANES];
 #endif  // CONFIG_DEBUG
 
+  const int skip_trellis = 0;
   for (int plane = 0; plane < CFL_PRED_PLANES; plane++) {
     RD_STATS rd_stats;
     av1_init_rd_stats(&rd_stats);
@@ -1351,9 +1353,9 @@
       if (i == CFL_SIGN_NEG) {
         mbmi->cfl_alpha_idx = 0;
         mbmi->cfl_alpha_signs = joint_sign;
-        av1_txfm_rd_in_plane(x, cpi, &rd_stats, best_rd, 0, plane + 1,
-                             plane_bsize, tx_size,
-                             cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE, 0);
+        av1_txfm_rd_in_plane(
+            x, cpi, &rd_stats, best_rd, 0, plane + 1, plane_bsize, tx_size,
+            cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE, skip_trellis);
         if (rd_stats.rate == INT_MAX) break;
       }
       const int alpha_rate = x->cfl_cost[joint_sign][plane][0];
@@ -1382,7 +1384,7 @@
             mbmi->cfl_alpha_signs = joint_sign;
             av1_txfm_rd_in_plane(
                 x, cpi, &rd_stats, best_rd, 0, plane + 1, plane_bsize, tx_size,
-                cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE, 0);
+                cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE, skip_trellis);
             if (rd_stats.rate == INT_MAX) break;
           }
           const int alpha_rate = x->cfl_cost[joint_sign][plane][c];
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index fc7870a..48a9d8b 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -2843,7 +2843,7 @@
       memcpy(x->blk_skip, ctx->blk_skip,
              sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
       av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
-      av1_encode_intra_block_plane(cpi, x, bsize, AOM_PLANE_Y,
+      av1_encode_intra_block_plane(cpi, x, bsize, AOM_PLANE_Y, DRY_RUN_NORMAL,
                                    cpi->optimize_seg_arr[mbmi->segment_id]);
       av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
       xd->cfl.store_y = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 923c0a5..45b0164 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -40,13 +40,6 @@
 } UENUM1BYTE(GM_ERRORADV_TYPE);
 
 enum {
-  NO_TRELLIS_OPT,          // No trellis optimization
-  FULL_TRELLIS_OPT,        // Trellis optimization in all stages
-  FINAL_PASS_TRELLIS_OPT,  // Trellis optimization in only the final encode pass
-  NO_ESTIMATE_YRD_TRELLIS_OPT  // Disable trellis in estimate_yrd_for_sb
-} UENUM1BYTE(TRELLIS_OPT_TYPE);
-
-enum {
   FULL_TXFM_RD,
   LOW_TXFM_RD,
 } UENUM1BYTE(TXFM_RD_MODEL);
diff --git a/av1/encoder/tx_search.c b/av1/encoder/tx_search.c
index 0ff71ef..5f905c9 100644
--- a/av1/encoder/tx_search.c
+++ b/av1/encoder/tx_search.c
@@ -2117,9 +2117,9 @@
       (mi_row + mi_size_high[plane_bsize] < xd->tile.mi_row_end) &&
       mi_col >= xd->tile.mi_col_start &&
       (mi_col + mi_size_wide[plane_bsize] < xd->tile.mi_col_end);
-  skip_trellis |=
-      cpi->optimize_seg_arr[mbmi->segment_id] == NO_TRELLIS_OPT ||
-      cpi->optimize_seg_arr[mbmi->segment_id] == FINAL_PASS_TRELLIS_OPT;
+
+  skip_trellis |= !is_trellis_used(cpi->optimize_seg_arr[xd->mi[0]->segment_id],
+                                   DRY_RUN_NORMAL);
   if (is_intra_hash_match(cpi, x, plane, blk_row, blk_col, plane_bsize, tx_size,
                           txb_ctx, &intra_txb_rd_info, within_border,
                           tx_type_map_idx, &cur_joint_ctx)) {
@@ -2417,8 +2417,11 @@
   }
 
   RD_STATS this_rd_stats;
+
+  const int skip_trellis = 0;
   search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
-                  txb_ctx, ftxs_mode, 0, 0, ref_rdcost, &this_rd_stats);
+                  txb_ctx, ftxs_mode, 0, skip_trellis, ref_rdcost,
+                  &this_rd_stats);
 
   av1_merge_rd_stats(rd_stats, &this_rd_stats);
 
@@ -2692,9 +2695,11 @@
   // Skip RDcost is used only for Inter blocks
   if (is_inter_block(xd->mi[0])) skip_rd = RDCOST(x->rdmult, s1, 0);
 
+  const int skip_trellis = 0;
   av1_txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, AOMMIN(this_rd, skip_rd),
                        AOM_PLANE_Y, bs, mbmi->tx_size,
-                       cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE, 0);
+                       cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE,
+                       skip_trellis);
 }
 
 static AOM_INLINE void choose_smallest_tx_size(const AV1_COMP *const cpi,
@@ -2707,8 +2712,10 @@
 
   mbmi->tx_size = TX_4X4;
   // TODO(any) : Pass this_rd based on skip/non-skip cost
+  const int skip_trellis = 0;
   av1_txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, 0, bs, mbmi->tx_size,
-                       cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE, 0);
+                       cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE,
+                       skip_trellis);
 }
 
 static AOM_INLINE void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
@@ -2737,6 +2744,7 @@
     init_depth = MAX_TX_DEPTH;
   }
 
+  const int skip_trellis = 0;
   uint8_t best_txk_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
   TX_SIZE best_tx_size = max_rect_tx_size;
@@ -2750,8 +2758,8 @@
     if (!cpi->oxcf.enable_tx64 && txsize_sqr_up_map[n] == TX_64X64) continue;
 
     RD_STATS this_rd_stats;
-    rd[depth] =
-        av1_txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, n, FTXS_NONE, 0);
+    rd[depth] = av1_txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, n,
+                             FTXS_NONE, skip_trellis);
 
     if (rd[depth] < best_rd) {
       av1_copy_array(best_blk_skip, x->blk_skip, n4);
@@ -3367,6 +3375,7 @@
       av1_subtract_plane(x, plane_bsize, plane);
   }
 
+  const int skip_trellis = 0;
   if (is_cost_valid) {
     const TX_SIZE uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
     for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
@@ -3382,7 +3391,8 @@
         chroma_ref_best_rd = ref_best_rd - AOMMIN(this_rd, skip_rd);
       av1_txfm_rd_in_plane(x, cpi, &pn_rd_stats, chroma_ref_best_rd, 0, plane,
                            plane_bsize, uv_tx_size,
-                           cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE, 0);
+                           cpi->sf.rd_sf.use_fast_coef_costing, FTXS_NONE,
+                           skip_trellis);
       if (pn_rd_stats.rate == INT_MAX) {
         is_cost_valid = 0;
         break;