Further changes to new-quant tables

Refactor to streamline the number of profiles needed, in
preparation for the next steps.

NO change in performance.

Change-Id: I753b89299897857f3c250c316b4cdc4fedcb90e8
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 12ef33d..6cf25fb 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -844,6 +844,7 @@
 
     if (cyclic_refresh_segment_id_boosted(segment_id)) {
       int q = av1_get_qindex(&cm->seg, segment_id, cm->base_qindex);
+      assert(q == xd->qindex[segment_id]);
       set_vbp_thresholds(cpi, thresholds, q);
     }
   }
@@ -1597,6 +1598,7 @@
   av1_init_plane_quantizers(cpi, x, segment_id);
   aom_clear_system_state();
   segment_qindex = av1_get_qindex(&cm->seg, segment_id, cm->base_qindex);
+  assert(segment_qindex == x->e_mbd.qindex[segment_id]);
   return av1_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
 }
 
@@ -4594,6 +4596,7 @@
                            : cm->base_qindex;
     xd->lossless[i] = qindex == 0 && cm->y_dc_delta_q == 0 &&
                       cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
+    xd->qindex[i] = qindex;
   }
 
   if (!cm->seg.enabled && xd->lossless[0]) x->optimize = 0;
@@ -5126,7 +5129,7 @@
       av1_tokenize_sb(cpi, td, t, !output_enabled, AOMMAX(bsize, BLOCK_8X8));
     else
 #endif
-      av1_tokenize_sb_inter(cpi, td, t, !output_enabled, mi_row, mi_col,
+      av1_tokenize_sb_vartx(cpi, td, t, !output_enabled, mi_row, mi_col,
                             AOMMAX(bsize, BLOCK_8X8));
 #else
     av1_tokenize_sb(cpi, td, t, !output_enabled, AOMMAX(bsize, BLOCK_8X8));
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index cf37140..c5dfadd 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -80,29 +80,29 @@
   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
   const int eob = p->eobs[block];
-  const PLANE_TYPE type = pd->plane_type;
+  const PLANE_TYPE plane_type = pd->plane_type;
   const int default_eob = get_tx2d_size(tx_size);
   const int16_t *const dequant_ptr = pd->dequant;
   const uint8_t *const band_translate = get_band_translate(tx_size);
-  TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size);
+  TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
   const scan_order *const so =
       get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
   const int16_t *const scan = so->scan;
   const int16_t *const nb = so->neighbors;
 #if CONFIG_AOM_QM
   int seg_id = xd->mi[0]->mbmi.segment_id;
-  int is_intra = !is_inter_block(&xd->mi[0]->mbmi);
-  const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][tx_size];
+  const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][!ref][tx_size];
 #endif
   const int shift = get_tx_scale(xd, tx_type, tx_size);
 #if CONFIG_NEW_QUANT
-  int dq = get_dq_profile_from_ctx(ctx, ref, type);
+  int dq = get_dq_profile_from_ctx(xd->qindex[xd->mi[0]->mbmi.segment_id], ctx,
+                                   ref, plane_type);
   const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq[dq];
 #else
   const int dq_step[2] = { dequant_ptr[0] >> shift, dequant_ptr[1] >> shift };
 #endif  // CONFIG_NEW_QUANT
   int next = eob, sz = 0;
-  const int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][type]) >> 1;
+  const int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][plane_type]) >> 1;
   const int64_t rddiv = mb->rddiv;
   int64_t rd_cost0, rd_cost1;
   int rate0, rate1;
@@ -117,15 +117,18 @@
   const int *cat6_high_cost = av1_get_high_cost_table(8);
 #endif
   unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
-      mb->token_costs[txsize_sqr_map[tx_size]][type][ref];
+      mb->token_costs[txsize_sqr_map[tx_size]][plane_type][ref];
   const uint16_t *band_counts = &band_count_table[tx_size][band];
   uint16_t band_left = eob - band_cum_count_table[tx_size][band] + 1;
   int shortcut = 0;
   int next_shortcut = 0;
 
+  assert((xd->qindex[xd->mi[0]->mbmi.segment_id] == 0) ^
+         (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
+
   token_costs += band;
 
-  assert((!type && !plane) || (type && plane));
+  assert((!plane_type && !plane) || (plane_type && plane));
   assert(eob <= default_eob);
 
   /* Now set up a Viterbi trellis to evaluate alternative roundings. */
@@ -443,8 +446,8 @@
   const struct macroblockd_plane *const pd = &xd->plane[plane];
   PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
   TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
-  const scan_order *const scan_order =
-      get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
+  const int is_inter = is_inter_block(&xd->mi[0]->mbmi);
+  const scan_order *const scan_order = get_scan(tx_size, tx_type, is_inter);
   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
@@ -452,9 +455,8 @@
   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
 #if CONFIG_AOM_QM
   int seg_id = xd->mi[0]->mbmi.segment_id;
-  int is_intra = !is_inter_block(&xd->mi[0]->mbmi);
-  const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][is_intra][tx_size];
-  const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][tx_size];
+  const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][!is_inter][tx_size];
+  const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][!is_inter][tx_size];
 #endif
   const int16_t *src_diff;
   const int tx2d_size = get_tx2d_size(tx_size);
@@ -522,7 +524,8 @@
   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
-  int dq = get_dq_profile_from_ctx(ctx, is_inter, plane_type);
+  int dq = get_dq_profile_from_ctx(xd->qindex[xd->mi[0]->mbmi.segment_id], ctx,
+                                   is_inter, plane_type);
   uint16_t *const eob = &p->eobs[block];
   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
   const int16_t *src_diff;
@@ -530,6 +533,9 @@
 
   FWD_TXFM_PARAM fwd_txfm_param;
 
+  assert((xd->qindex[xd->mi[0]->mbmi.segment_id] == 0) ^
+         (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
+
   fwd_txfm_param.tx_type = tx_type;
   fwd_txfm_param.tx_size = tx_size;
   fwd_txfm_param.fwd_txfm_opt = fwd_txfm_opt_list[AV1_XFORM_QUANT_FP];
@@ -588,7 +594,8 @@
   PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
   TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
   const scan_order *const scan_order = get_scan(tx_size, tx_type, is_inter);
-  int dq = get_dq_profile_from_ctx(ctx, is_inter, plane_type);
+  int dq = get_dq_profile_from_ctx(xd->qindex[xd->mi[0]->mbmi.segment_id], ctx,
+                                   is_inter, plane_type);
   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
@@ -599,6 +606,9 @@
 
   FWD_TXFM_PARAM fwd_txfm_param;
 
+  assert((xd->qindex[xd->mi[0]->mbmi.segment_id] == 0) ^
+         (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
+
   fwd_txfm_param.tx_type = tx_type;
   fwd_txfm_param.tx_size = tx_size;
   fwd_txfm_param.fwd_txfm_opt = fwd_txfm_opt_list[AV1_XFORM_QUANT_FP];
@@ -661,10 +671,14 @@
   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
   const int16_t *src_diff;
   const int is_inter = is_inter_block(&xd->mi[0]->mbmi);
-  int dq = get_dq_profile_from_ctx(ctx, is_inter, plane_type);
+  int dq = get_dq_profile_from_ctx(xd->qindex[xd->mi[0]->mbmi.segment_id], ctx,
+                                   is_inter, plane_type);
 
   FWD_TXFM_PARAM fwd_txfm_param;
 
+  assert((xd->qindex[xd->mi[0]->mbmi.segment_id] == 0) ^
+         (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
+
   fwd_txfm_param.tx_type = tx_type;
   fwd_txfm_param.tx_size = tx_size;
   fwd_txfm_param.fwd_txfm_opt = fwd_txfm_opt_list[AV1_XFORM_QUANT_DC];
@@ -722,10 +736,14 @@
   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
   const int16_t *src_diff;
   const int is_inter = is_inter_block(&xd->mi[0]->mbmi);
-  int dq = get_dq_profile_from_ctx(ctx, is_inter, plane_type);
+  int dq = get_dq_profile_from_ctx(xd->qindex[xd->mi[0]->mbmi.segment_id], ctx,
+                                   is_inter, plane_type);
 
   FWD_TXFM_PARAM fwd_txfm_param;
 
+  assert((xd->qindex[xd->mi[0]->mbmi.segment_id] == 0) ^
+         (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
+
   fwd_txfm_param.tx_type = tx_type;
   fwd_txfm_param.tx_size = tx_size;
   fwd_txfm_param.fwd_txfm_opt = fwd_txfm_opt_list[AV1_XFORM_QUANT_DC];
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index c474e3f..5821d3f 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@@ -492,6 +492,7 @@
   double intra_factor;
   double brightness_factor;
   BufferPool *const pool = cm->buffer_pool;
+  const int qindex = find_fp_qindex(cm->bit_depth);
 
   // First pass code requires valid last and new frame buffers.
   assert(new_yv12 != NULL);
@@ -510,7 +511,7 @@
   neutral_count = 0.0;
 
   set_first_pass_params(cpi);
-  av1_set_quantizer(cm, find_fp_qindex(cm->bit_depth));
+  av1_set_quantizer(cm, qindex);
 
   av1_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
 
@@ -584,6 +585,8 @@
 #if CONFIG_SUPERTX
       xd->mi[0]->mbmi.segment_id_supertx = 0;
 #endif  // CONFIG_SUPERTX
+      xd->qindex[xd->mi[0]->mbmi.segment_id] = qindex;
+      xd->lossless[xd->mi[0]->mbmi.segment_id] = (qindex == 0);
       xd->mi[0]->mbmi.mode = DC_PRED;
       xd->mi[0]->mbmi.tx_size =
           use_dc_pred ? (bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4;
diff --git a/av1/encoder/quantize.c b/av1/encoder/quantize.c
index 73946c2..d3b8c1c 100644
--- a/av1/encoder/quantize.c
+++ b/av1/encoder/quantize.c
@@ -1117,10 +1117,9 @@
       for (i = 0; i < COEF_BANDS; i++) {
         const int quant = cpi->y_dequant[q][i != 0];
         const int uvquant = cpi->uv_dequant[q][i != 0];
-        av1_get_dequant_val_nuq(quant, q, i, cpi->y_dequant_val_nuq[dq][q][i],
+        av1_get_dequant_val_nuq(quant, i, cpi->y_dequant_val_nuq[dq][q][i],
                                 quants->y_cuml_bins_nuq[dq][q][i], dq);
-        av1_get_dequant_val_nuq(uvquant, q, i,
-                                cpi->uv_dequant_val_nuq[dq][q][i],
+        av1_get_dequant_val_nuq(uvquant, i, cpi->uv_dequant_val_nuq[dq][q][i],
                                 quants->uv_cuml_bins_nuq[dq][q][i], dq);
       }
     }
diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c
index 86c659b..3bf2410 100644
--- a/av1/encoder/tokenize.c
+++ b/av1/encoder/tokenize.c
@@ -560,9 +560,9 @@
 }
 
 #if CONFIG_VAR_TX
-void tokenize_tx(ThreadData *td, TOKENEXTRA **t, int dry_run, TX_SIZE tx_size,
-                 BLOCK_SIZE plane_bsize, int blk_row, int blk_col, int block,
-                 int plane, void *arg) {
+void tokenize_vartx(ThreadData *td, TOKENEXTRA **t, int dry_run,
+                    TX_SIZE tx_size, BLOCK_SIZE plane_bsize, int blk_row,
+                    int blk_col, int block, int plane, void *arg) {
   MACROBLOCK *const x = &td->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
@@ -610,13 +610,13 @@
 
       if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
 
-      tokenize_tx(td, t, dry_run, tx_size - 1, plane_bsize, offsetr, offsetc,
-                  block + i * step, plane, arg);
+      tokenize_vartx(td, t, dry_run, tx_size - 1, plane_bsize, offsetr, offsetc,
+                     block + i * step, plane, arg);
     }
   }
 }
 
-void av1_tokenize_sb_inter(AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
+void av1_tokenize_sb_vartx(AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
                            int dry_run, int mi_row, int mi_col,
                            BLOCK_SIZE bsize) {
   AV1_COMMON *const cm = &cpi->common;
@@ -656,8 +656,8 @@
     int step = num_4x4_blocks_txsize_lookup[max_tx_size];
     for (idy = 0; idy < mi_height; idy += bh) {
       for (idx = 0; idx < mi_width; idx += bh) {
-        tokenize_tx(td, t, dry_run, max_tx_size, plane_bsize, idy, idx, block,
-                    plane, &arg);
+        tokenize_vartx(td, t, dry_run, max_tx_size, plane_bsize, idy, idx,
+                       block, plane, &arg);
         block += step;
       }
     }
diff --git a/av1/encoder/tokenize.h b/av1/encoder/tokenize.h
index 37f2309..a7e30d5 100644
--- a/av1/encoder/tokenize.h
+++ b/av1/encoder/tokenize.h
@@ -57,7 +57,7 @@
 struct ThreadData;
 
 #if CONFIG_VAR_TX
-void av1_tokenize_sb_inter(struct AV1_COMP *cpi, struct ThreadData *td,
+void av1_tokenize_sb_vartx(struct AV1_COMP *cpi, struct ThreadData *td,
                            TOKENEXTRA **t, int dry_run, int mi_row, int mi_col,
                            BLOCK_SIZE bsize);
 #endif