Refactor the setup stage of xform and quant.

Separated from av1_xform_quant function.
Avoids redundant re-assigning parameters for tx search.
Slightly reduces instruction counts. (no performance change)

Change-Id: I06b94e668aa23ebf0a45e0060a2cbe82b7c54d20
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index fb4fab5..143df94 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -125,21 +125,83 @@
 };
 #endif
 
-void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
-                     int blk_row, int blk_col, BLOCK_SIZE plane_bsize,
-                     TX_SIZE tx_size, TX_TYPE tx_type,
-                     AV1_XFORM_QUANT xform_quant_idx) {
+void av1_xform_quant(MACROBLOCK *x, int plane, int block, int blk_row,
+                     int blk_col, BLOCK_SIZE plane_bsize,
+                     AV1_XFORM_QUANT xform_quant_idx, TxfmParam *txfm_param,
+                     QUANT_PARAM *qparam) {
   MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
   const struct macroblock_plane *const p = &x->plane[plane];
   const struct macroblockd_plane *const pd = &xd->plane[plane];
-  const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
+  const SCAN_ORDER *const scan_order =
+      get_scan(txfm_param->tx_size, txfm_param->tx_type);
   const int block_offset = BLOCK_OFFSET(block);
   tran_low_t *const coeff = p->coeff + block_offset;
   tran_low_t *const qcoeff = p->qcoeff + block_offset;
   tran_low_t *const dqcoeff = pd->dqcoeff + block_offset;
   uint16_t *const eob = &p->eobs[block];
   const int diff_stride = block_size_wide[plane_bsize];
+
+  const int src_offset = (blk_row * diff_stride + blk_col);
+  const int16_t *src_diff = &p->src_diff[src_offset << tx_size_wide_log2[0]];
+
+  av1_fwd_txfm(src_diff, coeff, diff_stride, txfm_param);
+
+  if (xform_quant_idx != AV1_XFORM_QUANT_SKIP_QUANT) {
+    const int n_coeffs = av1_get_max_eob(txfm_param->tx_size);
+    if (LIKELY(!x->skip_block)) {
+#if CONFIG_AV1_HIGHBITDEPTH
+      quant_func_list[xform_quant_idx][txfm_param->is_hbd](
+          coeff, n_coeffs, p, qcoeff, dqcoeff, eob, scan_order, qparam);
+#else
+      quant_func_list[xform_quant_idx](coeff, n_coeffs, p, qcoeff, dqcoeff, eob,
+                                       scan_order, qparam);
+#endif
+    } else {
+      av1_quantize_skip(n_coeffs, qcoeff, dqcoeff, eob);
+    }
+  }
+  // NOTE: optimize_b_following is true means av1_optimze_b will be called
+  // When the condition of doing optimize_b is changed,
+  // this flag need update simultaneously
+  const int optimize_b_following =
+      (xform_quant_idx != AV1_XFORM_QUANT_FP) || (txfm_param->lossless);
+  if (optimize_b_following) {
+    p->txb_entropy_ctx[block] =
+        (uint8_t)av1_get_txb_entropy_context(qcoeff, scan_order, *eob);
+  } else {
+    p->txb_entropy_ctx[block] = 0;
+  }
+  return;
+}
+
+void av1_setup_xform(const AV1_COMMON *cm, MACROBLOCK *x, TX_SIZE tx_size,
+                     TX_TYPE tx_type, TxfmParam *txfm_param) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *const mbmi = xd->mi[0];
+
+  txfm_param->tx_type = tx_type;
+  txfm_param->tx_size = tx_size;
+  txfm_param->lossless = xd->lossless[mbmi->segment_id];
+  txfm_param->tx_set_type = av1_get_ext_tx_set_type(
+      tx_size, is_inter_block(mbmi), cm->reduced_tx_set_used);
+
+  txfm_param->bd = xd->bd;
+  txfm_param->is_hbd = is_cur_buf_hbd(xd);
+}
+void av1_setup_quant(const AV1_COMMON *cm, TX_SIZE tx_size,
+                     QUANT_PARAM *qparam) {
+  qparam->log_scale = av1_get_tx_scale(tx_size);
+  qparam->tx_size = tx_size;
+
+  qparam->use_quant_b_adapt = cm->use_quant_b_adapt;
+  qparam->qmatrix = NULL;
+  qparam->iqmatrix = NULL;
+}
+void av1_setup_qmatrix(const AV1_COMMON *cm, MACROBLOCK *x, int plane,
+                       TX_SIZE tx_size, TX_TYPE tx_type, QUANT_PARAM *qparam) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+  MB_MODE_INFO *const mbmi = xd->mi[0];
   int seg_id = mbmi->segment_id;
   const TX_SIZE qm_tx_size = av1_get_adjusted_tx_size(tx_size);
   // Use a flat matrix (i.e. no weighting) for 1D and Identity transforms
@@ -150,53 +212,8 @@
       IS_2D_TRANSFORM(tx_type)
           ? pd->seg_iqmatrix[seg_id][qm_tx_size]
           : cm->giqmatrix[NUM_QM_LEVELS - 1][0][qm_tx_size];
-
-  const int src_offset = (blk_row * diff_stride + blk_col);
-  const int16_t *src_diff = &p->src_diff[src_offset << tx_size_wide_log2[0]];
-  QUANT_PARAM qparam;
-  qparam.log_scale = av1_get_tx_scale(tx_size);
-  qparam.tx_size = tx_size;
-  qparam.qmatrix = qmatrix;
-  qparam.iqmatrix = iqmatrix;
-  qparam.use_quant_b_adapt = cm->use_quant_b_adapt;
-  TxfmParam txfm_param;
-  txfm_param.tx_type = tx_type;
-  txfm_param.tx_size = tx_size;
-  txfm_param.lossless = xd->lossless[mbmi->segment_id];
-  txfm_param.tx_set_type = av1_get_ext_tx_set_type(
-      txfm_param.tx_size, is_inter_block(mbmi), cm->reduced_tx_set_used);
-
-  txfm_param.bd = xd->bd;
-  txfm_param.is_hbd = is_cur_buf_hbd(xd);
-
-  av1_fwd_txfm(src_diff, coeff, diff_stride, &txfm_param);
-
-  if (xform_quant_idx != AV1_XFORM_QUANT_SKIP_QUANT) {
-    const int n_coeffs = av1_get_max_eob(tx_size);
-    if (LIKELY(!x->skip_block)) {
-#if CONFIG_AV1_HIGHBITDEPTH
-      quant_func_list[xform_quant_idx][txfm_param.is_hbd](
-          coeff, n_coeffs, p, qcoeff, dqcoeff, eob, scan_order, &qparam);
-#else
-      quant_func_list[xform_quant_idx](coeff, n_coeffs, p, qcoeff, dqcoeff, eob,
-                                       scan_order, &qparam);
-#endif
-    } else {
-      av1_quantize_skip(n_coeffs, qcoeff, dqcoeff, eob);
-    }
-  }
-  // NOTE: optimize_b_following is true means av1_optimze_b will be called
-  // When the condition of doing optimize_b is changed,
-  // this flag need update simultaneously
-  const int optimize_b_following =
-      (xform_quant_idx != AV1_XFORM_QUANT_FP) || (txfm_param.lossless);
-  if (optimize_b_following) {
-    p->txb_entropy_ctx[block] =
-        (uint8_t)av1_get_txb_entropy_context(qcoeff, scan_order, *eob);
-  } else {
-    p->txb_entropy_ctx[block] = 0;
-  }
-  return;
+  qparam->qmatrix = qmatrix;
+  qparam->iqmatrix = iqmatrix;
 }
 
 static void encode_block(int plane, int block, int blk_row, int blk_col,
@@ -228,21 +245,27 @@
   if (!is_blk_skip(x, plane, blk_row * bw + blk_col) && !mbmi->skip_mode) {
     tx_type = av1_get_tx_type(xd, pd->plane_type, blk_row, blk_col, tx_size,
                               cm->reduced_tx_set_used);
+    TxfmParam txfm_param;
+    QUANT_PARAM quant_param;
+    av1_setup_xform(cm, x, tx_size, tx_type, &txfm_param);
+    av1_setup_quant(cm, tx_size, &quant_param);
+    av1_setup_qmatrix(cm, x, plane, tx_size, tx_type, &quant_param);
     if (args->enable_optimize_b != NO_TRELLIS_OPT) {
-      av1_xform_quant(
-          cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, tx_type,
-          USE_B_QUANT_NO_TRELLIS &&
-                  (args->enable_optimize_b == FINAL_PASS_TRELLIS_OPT)
-              ? AV1_XFORM_QUANT_B
-              : AV1_XFORM_QUANT_FP);
+      av1_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize,
+                      USE_B_QUANT_NO_TRELLIS && (args->enable_optimize_b ==
+                                                 FINAL_PASS_TRELLIS_OPT)
+                          ? AV1_XFORM_QUANT_B
+                          : AV1_XFORM_QUANT_FP,
+                      &txfm_param, &quant_param);
       TXB_CTX txb_ctx;
       get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
       av1_optimize_b(args->cpi, x, plane, block, tx_size, tx_type, &txb_ctx,
                      args->cpi->sf.trellis_eob_fast, &dummy_rate_cost);
     } else {
       av1_xform_quant(
-          cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, tx_type,
-          USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP);
+          x, plane, block, blk_row, blk_col, plane_bsize,
+          USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP,
+          &txfm_param, &quant_param);
     }
   } else {
     p->eobs[block] = 0;
@@ -424,22 +447,23 @@
   struct macroblock_plane *const p = &x->plane[plane];
   struct macroblockd_plane *const pd = &xd->plane[plane];
   tran_low_t *const dqcoeff = pd->dqcoeff + BLOCK_OFFSET(block);
-  TxfmParam txfm_param;
+
   uint8_t *dst;
   dst = &pd->dst
              .buf[(blk_row * pd->dst.stride + blk_col) << tx_size_wide_log2[0]];
-  av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
-                  DCT_DCT, AV1_XFORM_QUANT_B);
+
+  TxfmParam txfm_param;
+  QUANT_PARAM quant_param;
+
+  av1_setup_xform(cm, x, tx_size, DCT_DCT, &txfm_param);
+  av1_setup_quant(cm, tx_size, &quant_param);
+  av1_setup_qmatrix(cm, x, plane, tx_size, DCT_DCT, &quant_param);
+
+  av1_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize,
+                  AV1_XFORM_QUANT_B, &txfm_param, &quant_param);
 
   if (p->eobs[block] > 0) {
-    txfm_param.bd = xd->bd;
-    txfm_param.is_hbd = is_cur_buf_hbd(xd);
-    txfm_param.tx_type = DCT_DCT;
-    txfm_param.tx_size = tx_size;
     txfm_param.eob = p->eobs[block];
-    txfm_param.lossless = xd->lossless[xd->mi[0]->segment_id];
-    txfm_param.tx_set_type = av1_get_ext_tx_set_type(
-        txfm_param.tx_size, is_inter_block(xd->mi[0]), cm->reduced_tx_set_used);
     if (txfm_param.is_hbd) {
       av1_highbd_inv_txfm_add(dqcoeff, dst, pd->dst.stride, &txfm_param);
       return;
@@ -582,21 +606,27 @@
     const ENTROPY_CONTEXT *l = &args->tl[blk_row];
     tx_type = av1_get_tx_type(xd, plane_type, blk_row, blk_col, tx_size,
                               cm->reduced_tx_set_used);
+    TxfmParam txfm_param;
+    QUANT_PARAM quant_param;
+    av1_setup_xform(cm, x, tx_size, tx_type, &txfm_param);
+    av1_setup_quant(cm, tx_size, &quant_param);
+    av1_setup_qmatrix(cm, x, plane, tx_size, tx_type, &quant_param);
     if (args->enable_optimize_b != NO_TRELLIS_OPT) {
-      av1_xform_quant(
-          cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, tx_type,
-          USE_B_QUANT_NO_TRELLIS &&
-                  (args->enable_optimize_b == FINAL_PASS_TRELLIS_OPT)
-              ? AV1_XFORM_QUANT_B
-              : AV1_XFORM_QUANT_FP);
+      av1_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize,
+                      USE_B_QUANT_NO_TRELLIS && (args->enable_optimize_b ==
+                                                 FINAL_PASS_TRELLIS_OPT)
+                          ? AV1_XFORM_QUANT_B
+                          : AV1_XFORM_QUANT_FP,
+                      &txfm_param, &quant_param);
       TXB_CTX txb_ctx;
       get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
       av1_optimize_b(args->cpi, x, plane, block, tx_size, tx_type, &txb_ctx,
                      args->cpi->sf.trellis_eob_fast, &dummy_rate_cost);
     } else {
       av1_xform_quant(
-          cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, tx_type,
-          USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP);
+          x, plane, block, blk_row, blk_col, plane_bsize,
+          USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP,
+          &txfm_param, &quant_param);
     }
   }
 
diff --git a/av1/encoder/encodemb.h b/av1/encoder/encodemb.h
index efb2314..0a38859 100644
--- a/av1/encoder/encodemb.h
+++ b/av1/encoder/encodemb.h
@@ -59,10 +59,17 @@
 
 void av1_encode_sby_pass1(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize);
 
-void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
-                     int blk_row, int blk_col, BLOCK_SIZE plane_bsize,
-                     TX_SIZE tx_size, TX_TYPE tx_type,
-                     AV1_XFORM_QUANT xform_quant_idx);
+void av1_setup_xform(const AV1_COMMON *cm, MACROBLOCK *x, TX_SIZE tx_size,
+                     TX_TYPE tx_type, TxfmParam *txfm_param);
+void av1_setup_quant(const AV1_COMMON *cm, TX_SIZE tx_size,
+                     QUANT_PARAM *qparam);
+void av1_setup_qmatrix(const AV1_COMMON *cm, MACROBLOCK *x, int plane,
+                       TX_SIZE tx_size, TX_TYPE tx_type, QUANT_PARAM *qparam);
+
+void av1_xform_quant(MACROBLOCK *x, int plane, int block, int blk_row,
+                     int blk_col, BLOCK_SIZE plane_bsize,
+                     AV1_XFORM_QUANT xform_quant_idx, TxfmParam *txfm_param,
+                     QUANT_PARAM *qparam);
 
 int av1_optimize_b(const struct AV1_COMP *cpi, MACROBLOCK *mb, int plane,
                    int block, TX_SIZE tx_size, TX_TYPE tx_type,
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index a83e161..4140ac7 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -3461,22 +3461,33 @@
 
   assert(IMPLIES(txk_allowed < TX_TYPES, allowed_tx_mask == 1 << txk_allowed));
 
+  TxfmParam txfm_param;
+  QUANT_PARAM quant_param;
+  av1_setup_xform(cm, x, tx_size, DCT_DCT, &txfm_param);
+  av1_setup_quant(cm, tx_size, &quant_param);
+  int use_qm = !(xd->lossless[mbmi->segment_id] || cm->using_qmatrix == 0);
+
   for (int idx = 0; idx < TX_TYPES; ++idx) {
     const TX_TYPE tx_type = (TX_TYPE)txk_map[idx];
     if (!(allowed_tx_mask & (1 << tx_type))) continue;
+    txfm_param.tx_type = tx_type;
+    if (use_qm) {
+      av1_setup_qmatrix(cm, x, plane, tx_size, tx_type, &quant_param);
+    }
     if (plane == 0) xd->tx_type_map[tx_type_map_idx] = tx_type;
     RD_STATS this_rd_stats;
     av1_invalid_rd_stats(&this_rd_stats);
     if (skip_trellis || (!perform_block_coeff_opt)) {
       av1_xform_quant(
-          cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, tx_type,
-          USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP);
+          x, plane, block, blk_row, blk_col, plane_bsize,
+          USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP,
+          &txfm_param, &quant_param);
       rate_cost =
           av1_cost_coeffs(x, plane, block, tx_size, tx_type, txb_ctx,
                           use_fast_coef_costing, cm->reduced_tx_set_used);
     } else {
-      av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize,
-                      tx_size, tx_type, AV1_XFORM_QUANT_FP);
+      av1_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize,
+                      AV1_XFORM_QUANT_FP, &txfm_param, &quant_param);
       if (cpi->sf.optimize_b_precheck && best_rd < INT64_MAX &&
           eobs_ptr[block] >= 4) {
         // Calculate distortion quickly in transform domain.
@@ -3646,14 +3657,21 @@
     // if the last search tx_type is the best tx_type, we don't need to
     // do this again
     if (best_tx_type != last_tx_type) {
+      TxfmParam txfm_param_intra;
+      QUANT_PARAM quant_param_intra;
+      av1_setup_xform(cm, x, tx_size, best_tx_type, &txfm_param_intra);
+      av1_setup_quant(cm, tx_size, &quant_param_intra);
+      av1_setup_qmatrix(cm, x, plane, tx_size, best_tx_type,
+                        &quant_param_intra);
       if (skip_trellis || (!perform_block_coeff_opt)) {
         av1_xform_quant(
-            cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
-            best_tx_type,
-            USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP);
+            x, plane, block, blk_row, blk_col, plane_bsize,
+            USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP,
+            &txfm_param_intra, &quant_param_intra);
       } else {
-        av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize,
-                        tx_size, best_tx_type, AV1_XFORM_QUANT_FP);
+        av1_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize,
+                        AV1_XFORM_QUANT_FP, &txfm_param_intra,
+                        &quant_param_intra);
         av1_optimize_b(cpi, x, plane, block, tx_size, best_tx_type, txb_ctx,
                        cpi->sf.trellis_eob_fast, &rate_cost);
       }