Make new-quant compatible with aom-qm

Change-Id: I4d201631e288ca0f5a0d13583707dff483eb5cac
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index edd5fcb..9f4b26f 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -151,18 +151,18 @@
 }
 
 if (aom_config("CONFIG_NEW_QUANT") eq "yes") {
-  add_proto qw/void quantize_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan";
+  add_proto qw/void quantize_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, int dq, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr";
 
-  add_proto qw/void quantize_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan";
+  add_proto qw/void quantize_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, int dq, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr";
 
-  add_proto qw/void quantize_32x32_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan";
+  add_proto qw/void quantize_32x32_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, int dq, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr";
 
-  add_proto qw/void quantize_32x32_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan";
+  add_proto qw/void quantize_32x32_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, int dq, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr";
 
   if (aom_config("CONFIG_TX64X64") eq "yes") {
-    add_proto qw/void quantize_64x64_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan";
+    add_proto qw/void quantize_64x64_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, int dq, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr";
 
-    add_proto qw/void quantize_64x64_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan";
+    add_proto qw/void quantize_64x64_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, int dq, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr";
   }
 }
 
@@ -415,18 +415,18 @@
 
     # ENCODEMB INVOKE
     if (aom_config("CONFIG_NEW_QUANT") eq "yes") {
-      add_proto qw/void highbd_quantize_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan";
+      add_proto qw/void highbd_quantize_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, int dq, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr";
 
-      add_proto qw/void highbd_quantize_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan";
+      add_proto qw/void highbd_quantize_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, int dq, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr";
 
-      add_proto qw/void highbd_quantize_32x32_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan";
+      add_proto qw/void highbd_quantize_32x32_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, int dq, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr";
 
-      add_proto qw/void highbd_quantize_32x32_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan";
+      add_proto qw/void highbd_quantize_32x32_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, int dq, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr";
 
       if (aom_config("CONFIG_TX64X64") eq "yes") {
-        add_proto qw/void highbd_quantize_64x64_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan";
+        add_proto qw/void highbd_quantize_64x64_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, int dq, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr";
 
-        add_proto qw/void highbd_quantize_64x64_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan";
+        add_proto qw/void highbd_quantize_64x64_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, int dq, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr";
       }
     }
 
diff --git a/av1/common/quant_common.c b/av1/common/quant_common.c
index 12e0f03..c924116 100644
--- a/av1/common/quant_common.c
+++ b/av1/common/quant_common.c
@@ -232,15 +232,35 @@
   dq[2] = ROUND_POWER_OF_TWO_SIGNED(doff * q, 9);
 }
 
-tran_low_t av1_dequant_abscoeff_nuq(int v, int q, const tran_low_t *dq,
+tran_low_t av1_dequant_abscoeff_nuq(int v, int q,
+#if CONFIG_AOM_QM
+                                    int q_profile, int is_ac_coeff,
+#else
+                                    const tran_low_t *dq,
+#endif  // CONFIG_AOM_QM
                                     int shift) {
   if (v == 0) return 0;
+#if CONFIG_AOM_QM
+  const uint8_t doff = quant_to_doff_fixed(is_ac_coeff, q_profile);
+  return ((q * v) >> shift) + ROUND_POWER_OF_TWO_SIGNED(doff * q, 7 + shift);
+#else
   return ((q * v) >> shift) + dq[shift];
+#endif
 }
 
-tran_low_t av1_dequant_coeff_nuq(int v, int q, const tran_low_t *dq,
+tran_low_t av1_dequant_coeff_nuq(int v, int q,
+#if CONFIG_AOM_QM
+                                 int q_profile, int is_ac_coeff,
+#else
+                                 const tran_low_t *dq,
+#endif  // CONFIG_AOM_QM
                                  int shift) {
+#if CONFIG_AOM_QM
+  tran_low_t dqmag =
+      av1_dequant_abscoeff_nuq(abs(v), q, q_profile, is_ac_coeff, shift);
+#else
   tran_low_t dqmag = av1_dequant_abscoeff_nuq(abs(v), q, dq, shift);
+#endif  // CONFIG_AOM_QM
   return (v < 0 ? -dqmag : dqmag);
 }
 #endif  // CONFIG_NEW_QUANT
diff --git a/av1/common/quant_common.h b/av1/common/quant_common.h
index ada7c67..e3b4f84 100644
--- a/av1/common/quant_common.h
+++ b/av1/common/quant_common.h
@@ -75,9 +75,20 @@
                              int dq_off_index);
 void av1_get_cuml_bins_nuq(int q, int is_ac_coeff, tran_low_t *cuml_bins,
                            int q_profile);
-tran_low_t av1_dequant_abscoeff_nuq(int v, int q, const tran_low_t *dq,
+tran_low_t av1_dequant_abscoeff_nuq(int v, int q,
+#if CONFIG_AOM_QM
+                                    int q_profile, int is_ac_coeff,
+#else
+                                    const tran_low_t *dq,
+#endif  // CONFIG_AOM_QM
                                     int shift);
-tran_low_t av1_dequant_coeff_nuq(int v, int q, const tran_low_t *dq, int shift);
+tran_low_t av1_dequant_coeff_nuq(int v, int q,
+#if CONFIG_AOM_QM
+                                 int q_profile, int is_ac_coeff,
+#else
+                                 const tran_low_t *dq,
+#endif  // CONFIG_AOM_QM
+                                 int shift);
 
 static INLINE int qindex_to_qrange(int qindex) {
   return (qindex < 140 ? 1 : 0);
diff --git a/av1/decoder/decodetxb.c b/av1/decoder/decodetxb.c
index ebb9b2d..fe82b5a 100644
--- a/av1/decoder/decodetxb.c
+++ b/av1/decoder/decodetxb.c
@@ -57,7 +57,11 @@
                             aom_reader *const r, const int blk_row,
                             const int blk_col, const int plane,
 #if CONFIG_NEW_QUANT
+#if CONFIG_AOM_QM
+                            int dq_profile,
+#else
                             dequant_val_type_nuq *dq_val,
+#endif  // CONFIG_AOM_QM
 #endif  // CONFIG_NEW_QUANT
                             const TXB_CTX *const txb_ctx, const TX_SIZE tx_size,
                             int16_t *const max_scan_line, int *const eob) {
@@ -78,12 +82,20 @@
   struct macroblockd_plane *const pd = &xd->plane[plane];
   const int16_t *const dequant = pd->seg_dequant_QTX[mbmi->segment_id];
   tran_low_t *const tcoeffs = pd->dqcoeff;
-#if CONFIG_NEW_QUANT
-  const tran_low_t *dqv_val = &dq_val[0][0];
-#endif  // CONFIG_NEW_QUANT
 #if !CONFIG_DAALA_TX
   const int shift = av1_get_tx_scale(tx_size);
 #endif
+#if CONFIG_NEW_QUANT
+#if !CONFIG_AOM_QM
+  const tran_low_t *dqv_val = &dq_val[0][0];
+#endif  // !CONFIG_AOM_QM
+
+#if CONFIG_DAALA_TX
+  const int nq_shift = 0;
+#else
+  const int nq_shift = shift;
+#endif  // CONFIG_DAALA_TX
+#endif  // CONFIG_NEW_QUANT && !CONFIG_AOM_QM
   const int bwl = get_txb_bwl(tx_size);
   const int width = get_txb_wide(tx_size);
   const int height = get_txb_high(tx_size);
@@ -267,12 +279,13 @@
       if (level < 3) {
         cul_level += level;
 #if CONFIG_NEW_QUANT
-        dqv_val = &dq_val[pos != 0][0];
-#if !CONFIG_DAALA_TX
-        v = av1_dequant_abscoeff_nuq(level, dequant[!!c], dqv_val, shift);
+#if CONFIG_AOM_QM
+        v = av1_dequant_abscoeff_nuq(level, dequant[!!c], dq_profile, !!c,
+                                     nq_shift);
 #else
-        v = av1_dequant_abscoeff_nuq(level, dequant[!!c], dqv_val, 0);
-#endif  // !CONFIG_DAALA_TX
+        dqv_val = &dq_val[pos != 0][0];
+        v = av1_dequant_abscoeff_nuq(level, dequant[!!c], dqv_val, nq_shift);
+#endif  // CONFIG_AOM_QM
 #else
         v = level * dequant[!!c];
 #if !CONFIG_DAALA_TX
@@ -338,12 +351,13 @@
         cul_level += *level;
         tran_low_t t;
 #if CONFIG_NEW_QUANT
-        dqv_val = &dq_val[pos != 0][0];
-#if !CONFIG_DAALA_TX
-        t = av1_dequant_abscoeff_nuq(*level, dequant[!!pos], dqv_val, shift);
+#if CONFIG_AOM_QM
+        t = av1_dequant_abscoeff_nuq(*level, dequant[!!pos], dq_profile, !!pos,
+                                     nq_shift);
 #else
-        t = av1_dequant_abscoeff_nuq(*level, dequant[!!pos], dqv_val, 0);
-#endif  // !CONFIG_DAALA_TX
+        dqv_val = &dq_val[pos != 0][0];
+        t = av1_dequant_abscoeff_nuq(*level, dequant[!!pos], dqv_val, nq_shift);
+#endif  // CONFIG_AOM_QM
 #else
         t = *level * dequant[!!pos];
 #if !CONFIG_DAALA_TX
@@ -360,12 +374,13 @@
       tran_low_t t = *level + read_golomb(xd, r, counts);
       cul_level += (int)t;
 #if CONFIG_NEW_QUANT
-      dqv_val = &dq_val[pos != 0][0];
-#if !CONFIG_DAALA_TX
-      t = av1_dequant_abscoeff_nuq(t, dequant[!!pos], dqv_val, shift);
+#if CONFIG_AOM_QM
+      t = av1_dequant_abscoeff_nuq(t, dequant[!!pos], dq_profile, !!pos,
+                                   nq_shift);
 #else
-      t = av1_dequant_abscoeff_nuq(t, dequant[!!pos], dqv_val, 0);
-#endif  // !CONFIG_DAALA_TX
+      dqv_val = &dq_val[pos != 0][0];
+      t = av1_dequant_abscoeff_nuq(t, dequant[!!pos], dqv_val, nq_shift);
+#endif  // CONFIG_AOM_QM
 #else
       t = t * dequant[!!pos];
 #if !CONFIG_DAALA_TX
@@ -408,7 +423,11 @@
   uint8_t cul_level =
       av1_read_coeffs_txb(cm, xd, r, row, col, plane,
 #if CONFIG_NEW_QUANT
+#if CONFIG_AOM_QM
+                          dq,
+#else
                           pd->seg_dequant_nuq_QTX[seg_id][dq],
+#endif  // CONFIG_AOM_QM
 #endif  // CONFIG_NEW_QUANT
                           &txb_ctx, tx_size, max_scan_line, eob);
   av1_set_contexts(xd, pd, plane, tx_size, cul_level, col, row);
diff --git a/av1/decoder/decodetxb.h b/av1/decoder/decodetxb.h
index 455cbf1..13f089d 100644
--- a/av1/decoder/decodetxb.h
+++ b/av1/decoder/decodetxb.h
@@ -22,7 +22,11 @@
                             aom_reader *const r, const int blk_row,
                             const int blk_col, const int plane,
 #if CONFIG_NEW_QUANT
+#if CONFIG_AOM_QM
+                            int dq_profile,
+#else
                             dequant_val_type_nuq *dq_val,
+#endif  // CONFIG_AOM_QM
 #endif  // CONFIG_NEW_QUANT
                             const TXB_CTX *const txb_ctx, const TX_SIZE tx_size,
                             int16_t *const max_scan_line, int *const eob);
diff --git a/av1/decoder/detokenize.c b/av1/decoder/detokenize.c
index 89743ed..e410fa1 100644
--- a/av1/decoder/detokenize.c
+++ b/av1/decoder/detokenize.c
@@ -72,18 +72,21 @@
 static int decode_coefs(MACROBLOCKD *xd, PLANE_TYPE type, tran_low_t *dqcoeff,
                         TX_SIZE tx_size, TX_TYPE tx_type, const int16_t *dq,
 #if CONFIG_NEW_QUANT
-                        dequant_val_type_nuq *dq_val,
+#if CONFIG_AOM_QM
+                        int dq_profile,
 #else
+                        dequant_val_type_nuq *dq_val,
+#endif  // CONFIG_AOM_QM
+#endif  // CONFIG_NEW_QUANT
 #if CONFIG_AOM_QM
                         qm_val_t *iqm[TX_SIZES_ALL],
 #endif  // CONFIG_AOM_QM
-#endif  // CONFIG_NEW_QUANT
                         int ctx, const int16_t *scan, const int16_t *nb,
                         int16_t *max_scan_line, aom_reader *r) {
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
   const int max_eob = av1_get_max_eob(tx_size);
   const int ref = is_inter_block(&xd->mi[0]->mbmi);
-#if CONFIG_AOM_QM && !CONFIG_NEW_QUANT
+#if CONFIG_AOM_QM
   const qm_val_t *iqmatrix = iqm[tx_size];
 #endif  // CONFIG_AOM_QM
   (void)tx_type;
@@ -99,14 +102,22 @@
   const uint8_t *band_translate = get_band_translate(tx_size);
   int v, token;
   int32_t dqv = dq[0];
-#if CONFIG_NEW_QUANT
+#if CONFIG_NEW_QUANT && !CONFIG_AOM_QM
   const tran_low_t *dqv_val = &dq_val[0][0];
-#endif  // CONFIG_NEW_QUANT
+#endif  // CONFIG_NEW_QUANT && !CONFIG_AOM_QM
 
 #if !CONFIG_DAALA_TX
   int dq_shift = av1_get_tx_scale(tx_size);
 #endif
 
+#if CONFIG_NEW_QUANT
+#if CONFIG_DAALA_TX
+  int nq_shift = 0;
+#else
+  int nq_shift = dq_shift;
+#endif  // CONFIG_DAALA_TX
+#endif  // CONFIG_NEW_QUANT
+
   band = *band_translate++;
 
   int more_data = 1;
@@ -115,9 +126,9 @@
     int last_pos = (c + 1 == max_eob);
     int first_pos = (c == 0);
 
-#if CONFIG_NEW_QUANT
+#if CONFIG_NEW_QUANT && !CONFIG_AOM_QM
     dqv_val = &dq_val[band != 0][0];
-#endif  // CONFIG_NEW_QUANT
+#endif  // CONFIG_NEW_QUANT && !CONFIG_AOM_QM
 
     comb_token = last_pos ? 2 * av1_read_record_bit(xd->counts, r, ACCT_STR) + 2
                           : av1_read_record_symbol(
@@ -156,9 +167,9 @@
     if (token > ONE_TOKEN)
       token += av1_read_record_symbol(xd->counts, r, coef_tail_cdfs[band][ctx],
                                       TAIL_TOKENS, ACCT_STR);
-#if CONFIG_NEW_QUANT
+#if CONFIG_NEW_QUANT && !CONFIG_AOM_QM
     dqv_val = &dq_val[band != 0][0];
-#endif  // CONFIG_NEW_QUANT
+#endif  // CONFIG_NEW_QUANT && !CONFIG_AOM_QM
 
     *max_scan_line = AOMMAX(*max_scan_line, scan[c]);
     token_cache[scan[c]] = av1_pt_energy_class[token];
@@ -168,19 +179,19 @@
     av1_record_coeff(xd->counts, val);
 #endif
 
-#if CONFIG_NEW_QUANT
-#if !CONFIG_DAALA_TX
-    v = av1_dequant_abscoeff_nuq(val, dqv, dqv_val, dq_shift);
-#else
-    v = av1_dequant_abscoeff_nuq(val, dqv, dqv_val, 0);
-#endif
-#else
 #if CONFIG_AOM_QM
     // Apply quant matrix only for 2D transforms
     if (IS_2D_TRANSFORM(tx_type) && iqmatrix != NULL)
       dqv = ((iqmatrix[scan[c]] * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >>
             AOM_QM_BITS;
 #endif
+#if CONFIG_NEW_QUANT
+#if CONFIG_AOM_QM
+    v = av1_dequant_abscoeff_nuq(val, dqv, dq_profile, band != 0, nq_shift);
+#else
+    v = av1_dequant_abscoeff_nuq(val, dqv, dqv_val, nq_shift);
+#endif  // CONFIG_AOM_QM
+#else
 #if !CONFIG_DAALA_TX
     v = (int)(((int64_t)val * dqv) >> dq_shift);
 #else
@@ -299,7 +310,11 @@
   const int eob =
       decode_coefs(xd, pd->plane_type, pd->dqcoeff, tx_size, tx_type, dequant,
 #if CONFIG_NEW_QUANT
+#if CONFIG_AOM_QM
+                   dq, pd->seg_iqmatrix[seg_id],
+#else
                    pd->seg_dequant_nuq_QTX[seg_id][dq],
+#endif  // CONFIG_AOM_QM
 #else
 #if CONFIG_AOM_QM
                    pd->seg_iqmatrix[seg_id],
diff --git a/av1/encoder/av1_quantize.c b/av1/encoder/av1_quantize.c
index 9c923e2..412cf42 100644
--- a/av1/encoder/av1_quantize.c
+++ b/av1/encoder/av1_quantize.c
@@ -27,19 +27,27 @@
 #if CONFIG_NEW_QUANT
 static INLINE int quantize_coeff_nuq(
     const tran_low_t coeffv, const int16_t quant, const int16_t quant_shift,
-    const int zbin, const int16_t dequant, const tran_low_t *cuml_bins_ptr,
-    const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
-    tran_low_t *dqcoeff_ptr) {
+    const int zbin, const int16_t dequant, int dq, int is_ac_coeff,
+    const tran_low_t *cuml_bins_ptr, const tran_low_t *dequant_val,
+    tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const qm_val_t wt) {
   const int coeff = coeffv;
   const int coeff_sign = (coeff >> 31);
   const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
   int q = 0;
-  int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX);
-  if (tmp >= AOMMAX(zbin, cuml_bins_ptr[0])) {
+  if (abs_coeff * wt >= (AOMMAX(zbin, cuml_bins_ptr[0]) * (1 << AOM_QM_BITS))) {
+    int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX);
     tmp -= cuml_bins_ptr[0];
-    q = NUQ_KNOTS + (((((tmp * quant) >> 16) + tmp) * quant_shift) >> 16);
-
+    tmp *= wt;
+    q = NUQ_KNOTS +
+        (((((tmp * quant) >> 16) + tmp) * quant_shift) >> (16 + AOM_QM_BITS));
+#if CONFIG_AOM_QM
+    (void)dequant_val;
+    *dqcoeff_ptr = av1_dequant_abscoeff_nuq(q, dequant, dq, is_ac_coeff, 0);
+#else
+    (void)dq;
+    (void)is_ac_coeff;
     *dqcoeff_ptr = av1_dequant_abscoeff_nuq(q, dequant, dequant_val, 0);
+#endif
     *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign;
     *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
   } else {
@@ -51,23 +59,34 @@
 
 static INLINE int quantize_coeff_bigtx_nuq(
     const tran_low_t coeffv, const int16_t quant, const int16_t quant_shift,
-    const int zbin, const int16_t dequant, const tran_low_t *cuml_bins_ptr,
-    const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
-    tran_low_t *dqcoeff_ptr, int logsizeby16) {
+    const int zbin, const int16_t dequant, int dq, int is_ac_coeff,
+    const tran_low_t *cuml_bins_ptr, const tran_low_t *dequant_val,
+    tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const qm_val_t wt,
+    int logsizeby16) {
   const int zbin_val = ROUND_POWER_OF_TWO(zbin, logsizeby16);
   const int coeff = coeffv;
   const int coeff_sign = (coeff >> 31);
   const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
   int q = 0;
-  int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX);
   const int cuml_bins_ptr_val =
       ROUND_POWER_OF_TWO(cuml_bins_ptr[0], logsizeby16);
-  if (tmp >= AOMMAX(zbin_val, cuml_bins_ptr_val)) {
+  if (abs_coeff * wt >=
+      (AOMMAX(zbin_val, cuml_bins_ptr_val) * (1 << AOM_QM_BITS))) {
+    int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX);
     tmp -= cuml_bins_ptr_val;
-    q = NUQ_KNOTS +
-        (((((tmp * quant) >> 16) + tmp) * quant_shift) >> (16 - logsizeby16));
+    tmp *= wt;
+    q = NUQ_KNOTS + (((((tmp * quant) >> 16) + tmp) * quant_shift) >>
+                     (16 - logsizeby16 + AOM_QM_BITS));
+#if CONFIG_AOM_QM
+    (void)dequant_val;
+    *dqcoeff_ptr =
+        av1_dequant_abscoeff_nuq(q, dequant, dq, is_ac_coeff, logsizeby16);
+#else
+    (void)dq;
+    (void)is_ac_coeff;
     *dqcoeff_ptr =
         av1_dequant_abscoeff_nuq(q, dequant, dequant_val, logsizeby16);
+#endif
     *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign;
     *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
   } else {
@@ -78,17 +97,27 @@
 }
 
 static INLINE int quantize_coeff_fp_nuq(
-    const tran_low_t coeffv, const int16_t quant, const int16_t dequant,
-    const tran_low_t *cuml_bins_ptr, const tran_low_t *dequant_val,
-    tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr) {
+    const tran_low_t coeffv, const int16_t quant, const int16_t dequant, int dq,
+    int is_ac_coeff, const tran_low_t *cuml_bins_ptr,
+    const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const qm_val_t wt) {
   const int coeff = coeffv;
   const int coeff_sign = (coeff >> 31);
   const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
   int q = 0;
-  int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX);
-  if (tmp > cuml_bins_ptr[0]) {
-    q = NUQ_KNOTS + ((((int64_t)tmp - cuml_bins_ptr[0]) * quant) >> 16);
+
+  if (abs_coeff * wt >= (cuml_bins_ptr[0] * (1 << AOM_QM_BITS))) {
+    int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+    q = NUQ_KNOTS + ((((int64_t)tmp - cuml_bins_ptr[0]) * wt * quant) >>
+                     (16 + AOM_QM_BITS));
+#if CONFIG_AOM_QM
+    (void)dequant_val;
+    *dqcoeff_ptr = av1_dequant_abscoeff_nuq(q, dequant, dq, is_ac_coeff, 0);
+#else
+    (void)dq;
+    (void)is_ac_coeff;
     *dqcoeff_ptr = av1_dequant_abscoeff_nuq(q, dequant, dequant_val, 0);
+#endif
     *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign;
     *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
   } else {
@@ -99,21 +128,31 @@
 }
 
 static INLINE int quantize_coeff_bigtx_fp_nuq(
-    const tran_low_t coeffv, const int16_t quant, const int16_t dequant,
-    const tran_low_t *cuml_bins_ptr, const tran_low_t *dequant_val,
-    tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, int logsizeby16) {
+    const tran_low_t coeffv, const int16_t quant, const int16_t dequant, int dq,
+    int is_ac_coeff, const tran_low_t *cuml_bins_ptr,
+    const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const qm_val_t wt, int logsizeby16) {
   const int coeff = coeffv;
   const int coeff_sign = (coeff >> 31);
   const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
   int q = 0;
-  int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX);
-  if (tmp > ROUND_POWER_OF_TWO(cuml_bins_ptr[0], logsizeby16)) {
+  if (abs_coeff * wt >=
+      (cuml_bins_ptr[0] * (1 << (AOM_QM_BITS - logsizeby16)))) {
+    int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX);
     q = NUQ_KNOTS +
         ((((int64_t)tmp - ROUND_POWER_OF_TWO(cuml_bins_ptr[0], logsizeby16)) *
-          quant) >>
-         (16 - logsizeby16));
+          wt * quant) >>
+         (16 - logsizeby16 + AOM_QM_BITS));
+#if CONFIG_AOM_QM
+    (void)dequant_val;
+    *dqcoeff_ptr =
+        av1_dequant_abscoeff_nuq(q, dequant, dq, is_ac_coeff, logsizeby16);
+#else
+    (void)dq;
+    (void)is_ac_coeff;
     *dqcoeff_ptr =
         av1_dequant_abscoeff_nuq(q, dequant, dequant_val, logsizeby16);
+#endif
     *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign;
     *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
   } else {
@@ -126,17 +165,25 @@
 void quantize_dc_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                      int skip_block, const int16_t *zbin_ptr,
                      const int16_t quant, const int16_t quant_shift,
-                     const int16_t dequant, const tran_low_t *cuml_bins_ptr,
+                     const int16_t dequant, int dq,
+                     const tran_low_t *cuml_bins_ptr,
                      const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
-                     tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr) {
+                     tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+                     const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
   int eob = -1;
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
   if (!skip_block) {
     const int rc = 0;
+    const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+    const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+    const int16_t dequant_iwt =
+        CONFIG_AOM_QM
+            ? (dequant * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS
+            : dequant;
     if (quantize_coeff_nuq(coeff_ptr[rc], quant, quant_shift, zbin_ptr[rc],
-                           dequant, cuml_bins_ptr, dequant_val, qcoeff_ptr,
-                           dqcoeff_ptr))
+                           dequant_iwt, dq, rc, cuml_bins_ptr, dequant_val,
+                           qcoeff_ptr, dqcoeff_ptr, wt))
       eob = 0;
   }
   *eob_ptr = eob + 1;
@@ -144,16 +191,25 @@
 
 void quantize_dc_fp_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                         int skip_block, const int16_t quant,
-                        const int16_t dequant, const tran_low_t *cuml_bins_ptr,
+                        const int16_t dequant, int dq,
+                        const tran_low_t *cuml_bins_ptr,
                         const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
-                        tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr) {
+                        tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+                        const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
   int eob = -1;
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
   if (!skip_block) {
     const int rc = 0;
-    if (quantize_coeff_fp_nuq(coeff_ptr[rc], quant, dequant, cuml_bins_ptr,
-                              dequant_val, qcoeff_ptr, dqcoeff_ptr))
+    const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+    const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+    const int16_t dequant_iwt =
+        CONFIG_AOM_QM
+            ? (dequant * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS
+            : dequant;
+    if (quantize_coeff_fp_nuq(coeff_ptr[rc], quant, dequant_iwt, dq, rc,
+                              cuml_bins_ptr, dequant_val, qcoeff_ptr,
+                              dqcoeff_ptr, wt))
       eob = 0;
   }
   *eob_ptr = eob + 1;
@@ -162,20 +218,27 @@
 void quantize_dc_32x32_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                            int skip_block, const int16_t *zbin_ptr,
                            const int16_t quant, const int16_t quant_shift,
-                           const int16_t dequant,
+                           const int16_t dequant, int dq,
                            const tran_low_t *cuml_bins_ptr,
                            const tran_low_t *dequant_val,
                            tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                           uint16_t *eob_ptr) {
+                           uint16_t *eob_ptr, const qm_val_t *qm_ptr,
+                           const qm_val_t *iqm_ptr) {
   int eob = -1;
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
   if (!skip_block) {
     const int rc = 0;
+    const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+    const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+    const int16_t dequant_iwt =
+        CONFIG_AOM_QM
+            ? (dequant * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS
+            : dequant;
     if (quantize_coeff_bigtx_nuq(coeff_ptr[rc], quant, quant_shift,
-                                 zbin_ptr[rc], dequant, cuml_bins_ptr,
-                                 dequant_val, qcoeff_ptr, dqcoeff_ptr,
-                                 av1_get_tx_scale(TX_32X32)))
+                                 zbin_ptr[rc], dequant_iwt, dq, rc,
+                                 cuml_bins_ptr, dequant_val, qcoeff_ptr,
+                                 dqcoeff_ptr, wt, av1_get_tx_scale(TX_32X32)))
       eob = 0;
   }
   *eob_ptr = eob + 1;
@@ -183,19 +246,27 @@
 
 void quantize_dc_32x32_fp_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                               int skip_block, const int16_t quant,
-                              const int16_t dequant,
+                              const int16_t dequant, int dq,
                               const tran_low_t *cuml_bins_ptr,
                               const tran_low_t *dequant_val,
                               tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                              uint16_t *eob_ptr) {
+                              uint16_t *eob_ptr, const qm_val_t *qm_ptr,
+                              const qm_val_t *iqm_ptr) {
   int eob = -1;
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
   if (!skip_block) {
     const int rc = 0;
-    if (quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc], quant, dequant,
+    const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+    const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+    const int16_t dequant_iwt =
+        CONFIG_AOM_QM
+            ? (dequant * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS
+            : dequant;
+    if (quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc], quant, dequant_iwt, dq, rc,
                                     cuml_bins_ptr, dequant_val, qcoeff_ptr,
-                                    dqcoeff_ptr, av1_get_tx_scale(TX_32X32)))
+                                    dqcoeff_ptr, wt,
+                                    av1_get_tx_scale(TX_32X32)))
       eob = 0;
   }
   *eob_ptr = eob + 1;
@@ -205,20 +276,27 @@
 void quantize_dc_64x64_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                            int skip_block, const int16_t *zbin_ptr,
                            const int16_t quant, const int16_t quant_shift,
-                           const int16_t dequant,
+                           const int16_t dequant, int dq,
                            const tran_low_t *cuml_bins_ptr,
                            const tran_low_t *dequant_val,
                            tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                           uint16_t *eob_ptr) {
+                           uint16_t *eob_ptr, const qm_val_t *qm_ptr,
+                           const qm_val_t *iqm_ptr) {
   int eob = -1;
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
   if (!skip_block) {
     const int rc = 0;
+    const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+    const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+    const int16_t dequant_iwt =
+        CONFIG_AOM_QM
+            ? (dequant * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS
+            : dequant;
     if (quantize_coeff_bigtx_nuq(coeff_ptr[rc], quant, quant_shift,
-                                 zbin_ptr[rc], dequant, cuml_bins_ptr,
-                                 dequant_val, qcoeff_ptr, dqcoeff_ptr,
-                                 av1_get_tx_scale(TX_64X64)))
+                                 zbin_ptr[rc], dequant_iwt, dq, rc,
+                                 cuml_bins_ptr, dequant_val, qcoeff_ptr,
+                                 dqcoeff_ptr, wt, av1_get_tx_scale(TX_64X64)))
       eob = 0;
   }
   *eob_ptr = eob + 1;
@@ -226,19 +304,27 @@
 
 void quantize_dc_64x64_fp_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                               int skip_block, const int16_t quant,
-                              const int16_t dequant,
+                              const int16_t dequant, int dq,
                               const tran_low_t *cuml_bins_ptr,
                               const tran_low_t *dequant_val,
                               tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                              uint16_t *eob_ptr) {
+                              uint16_t *eob_ptr, const qm_val_t *qm_ptr,
+                              const qm_val_t *iqm_ptr) {
   int eob = -1;
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
   if (!skip_block) {
     const int rc = 0;
-    if (quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc], quant, dequant,
+    const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+    const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+    const int16_t dequant_iwt =
+        CONFIG_AOM_QM
+            ? (dequant * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS
+            : dequant;
+    if (quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc], quant, dequant_iwt, dq, rc,
                                     cuml_bins_ptr, dequant_val, qcoeff_ptr,
-                                    dqcoeff_ptr, av1_get_tx_scale(TX_64X64)))
+                                    dqcoeff_ptr, wt,
+                                    av1_get_tx_scale(TX_64X64)))
       eob = 0;
   }
   *eob_ptr = eob + 1;
@@ -248,11 +334,12 @@
 void quantize_nuq_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                     int skip_block, const int16_t *zbin_ptr,
                     const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
-                    const int16_t *dequant_ptr,
+                    const int16_t *dequant_ptr, int dq,
                     const cuml_bins_type_nuq *cuml_bins_ptr,
                     const dequant_val_type_nuq *dequant_val,
                     tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                    uint16_t *eob_ptr, const int16_t *scan) {
+                    uint16_t *eob_ptr, const int16_t *scan,
+                    const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
   int eob = -1;
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
@@ -260,10 +347,18 @@
     int i;
     for (i = 0; i < n_coeffs; i++) {
       const int rc = scan[i];
-      if (quantize_coeff_nuq(
-              coeff_ptr[rc], quant_ptr[rc != 0], quant_shift_ptr[rc != 0],
-              zbin_ptr[rc != 0], dequant_ptr[rc != 0], cuml_bins_ptr[rc != 0],
-              dequant_val[rc != 0], &qcoeff_ptr[rc], &dqcoeff_ptr[rc]))
+      const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+      const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+      const int16_t dequant_iwt =
+          CONFIG_AOM_QM
+              ? (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+                    AOM_QM_BITS
+              : dequant_ptr[rc != 0];
+      if (quantize_coeff_nuq(coeff_ptr[rc], quant_ptr[rc != 0],
+                             quant_shift_ptr[rc != 0], zbin_ptr[rc != 0],
+                             dequant_iwt, dq, rc != 0, cuml_bins_ptr[rc != 0],
+                             dequant_val[rc != 0], &qcoeff_ptr[rc],
+                             &dqcoeff_ptr[rc], wt))
         eob = i;
     }
   }
@@ -272,11 +367,12 @@
 
 void quantize_fp_nuq_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                        int skip_block, const int16_t *quant_ptr,
-                       const int16_t *dequant_ptr,
+                       const int16_t *dequant_ptr, int dq,
                        const cuml_bins_type_nuq *cuml_bins_ptr,
                        const dequant_val_type_nuq *dequant_val,
                        tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                       uint16_t *eob_ptr, const int16_t *scan) {
+                       uint16_t *eob_ptr, const int16_t *scan,
+                       const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
   int eob = -1;
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
@@ -284,10 +380,17 @@
     int i;
     for (i = 0; i < n_coeffs; i++) {
       const int rc = scan[i];
-      if (quantize_coeff_fp_nuq(coeff_ptr[rc], quant_ptr[rc != 0],
-                                dequant_ptr[rc != 0], cuml_bins_ptr[rc != 0],
+      const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+      const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+      const int16_t dequant_iwt =
+          CONFIG_AOM_QM
+              ? (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+                    AOM_QM_BITS
+              : dequant_ptr[rc != 0];
+      if (quantize_coeff_fp_nuq(coeff_ptr[rc], quant_ptr[rc != 0], dequant_iwt,
+                                dq, rc != 0, cuml_bins_ptr[rc != 0],
                                 dequant_val[rc != 0], &qcoeff_ptr[rc],
-                                &dqcoeff_ptr[rc]))
+                                &dqcoeff_ptr[rc], wt))
         eob = i;
     }
   }
@@ -298,11 +401,12 @@
                           int skip_block, const int16_t *zbin_ptr,
                           const int16_t *quant_ptr,
                           const int16_t *quant_shift_ptr,
-                          const int16_t *dequant_ptr,
+                          const int16_t *dequant_ptr, int dq,
                           const cuml_bins_type_nuq *cuml_bins_ptr,
                           const dequant_val_type_nuq *dequant_val,
                           tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                          uint16_t *eob_ptr, const int16_t *scan) {
+                          uint16_t *eob_ptr, const int16_t *scan,
+                          const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
   int eob = -1;
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
@@ -310,11 +414,18 @@
     int i;
     for (i = 0; i < n_coeffs; i++) {
       const int rc = scan[i];
+      const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+      const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+      const int16_t dequant_iwt =
+          CONFIG_AOM_QM
+              ? (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+                    AOM_QM_BITS
+              : dequant_ptr[rc != 0];
       if (quantize_coeff_bigtx_nuq(
               coeff_ptr[rc], quant_ptr[rc != 0], quant_shift_ptr[rc != 0],
-              zbin_ptr[rc != 0], dequant_ptr[rc != 0], cuml_bins_ptr[rc != 0],
-              dequant_val[rc != 0], &qcoeff_ptr[rc], &dqcoeff_ptr[rc],
-              av1_get_tx_scale(TX_32X32)))
+              zbin_ptr[rc != 0], dequant_iwt, dq, rc != 0,
+              cuml_bins_ptr[rc != 0], dequant_val[rc != 0], &qcoeff_ptr[rc],
+              &dqcoeff_ptr[rc], wt, av1_get_tx_scale(TX_32X32)))
         eob = i;
     }
   }
@@ -323,11 +434,12 @@
 
 void quantize_32x32_fp_nuq_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                              int skip_block, const int16_t *quant_ptr,
-                             const int16_t *dequant_ptr,
+                             const int16_t *dequant_ptr, int dq,
                              const cuml_bins_type_nuq *cuml_bins_ptr,
                              const dequant_val_type_nuq *dequant_val,
                              tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                             uint16_t *eob_ptr, const int16_t *scan) {
+                             uint16_t *eob_ptr, const int16_t *scan,
+                             const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
   int eob = -1;
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
@@ -335,10 +447,17 @@
     int i;
     for (i = 0; i < n_coeffs; i++) {
       const int rc = scan[i];
+      const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+      const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+      const int16_t dequant_iwt =
+          CONFIG_AOM_QM
+              ? (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+                    AOM_QM_BITS
+              : dequant_ptr[rc != 0];
       if (quantize_coeff_bigtx_fp_nuq(
-              coeff_ptr[rc], quant_ptr[rc != 0], dequant_ptr[rc != 0],
+              coeff_ptr[rc], quant_ptr[rc != 0], dequant_iwt, dq, rc != 0,
               cuml_bins_ptr[rc != 0], dequant_val[rc != 0], &qcoeff_ptr[rc],
-              &dqcoeff_ptr[rc], av1_get_tx_scale(TX_32X32)))
+              &dqcoeff_ptr[rc], wt, av1_get_tx_scale(TX_32X32)))
         eob = i;
     }
   }
@@ -350,11 +469,12 @@
                           int skip_block, const int16_t *zbin_ptr,
                           const int16_t *quant_ptr,
                           const int16_t *quant_shift_ptr,
-                          const int16_t *dequant_ptr,
+                          const int16_t *dequant_ptr, int dq,
                           const cuml_bins_type_nuq *cuml_bins_ptr,
                           const dequant_val_type_nuq *dequant_val,
                           tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                          uint16_t *eob_ptr, const int16_t *scan) {
+                          uint16_t *eob_ptr, const int16_t *scan,
+                          const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
   int eob = -1;
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
@@ -362,11 +482,18 @@
     int i;
     for (i = 0; i < n_coeffs; i++) {
       const int rc = scan[i];
+      const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+      const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+      const int16_t dequant_iwt =
+          CONFIG_AOM_QM
+              ? (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+                    AOM_QM_BITS
+              : dequant_ptr[rc != 0];
       if (quantize_coeff_bigtx_nuq(
               coeff_ptr[rc], quant_ptr[rc != 0], quant_shift_ptr[rc != 0],
-              zbin_ptr[rc != 0], dequant_ptr[rc != 0], cuml_bins_ptr[rc != 0],
-              dequant_val[rc != 0], &qcoeff_ptr[rc], &dqcoeff_ptr[rc],
-              av1_get_tx_scale(TX_64X64)))
+              zbin_ptr[rc != 0], dequant_iwt, dq, rc != 0,
+              cuml_bins_ptr[rc != 0], dequant_val[rc != 0], &qcoeff_ptr[rc],
+              &dqcoeff_ptr[rc], wt, av1_get_tx_scale(TX_64X64)))
         eob = i;
     }
   }
@@ -375,11 +502,12 @@
 
 void quantize_64x64_fp_nuq_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                              int skip_block, const int16_t *quant_ptr,
-                             const int16_t *dequant_ptr,
+                             const int16_t *dequant_ptr, int dq,
                              const cuml_bins_type_nuq *cuml_bins_ptr,
                              const dequant_val_type_nuq *dequant_val,
                              tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                             uint16_t *eob_ptr, const int16_t *scan) {
+                             uint16_t *eob_ptr, const int16_t *scan,
+                             const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
   int eob = -1;
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
@@ -387,10 +515,17 @@
     int i;
     for (i = 0; i < n_coeffs; i++) {
       const int rc = scan[i];
+      const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+      const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+      const int16_t dequant_iwt =
+          CONFIG_AOM_QM
+              ? (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+                    AOM_QM_BITS
+              : dequant_ptr[rc != 0];
       if (quantize_coeff_bigtx_fp_nuq(
-              coeff_ptr[rc], quant_ptr[rc != 0], dequant_ptr[rc != 0],
+              coeff_ptr[rc], quant_ptr[rc != 0], dequant_iwt, dq, rc != 0,
               cuml_bins_ptr[rc != 0], dequant_val[rc != 0], &qcoeff_ptr[rc],
-              &dqcoeff_ptr[rc], av1_get_tx_scale(TX_64X64)))
+              &dqcoeff_ptr[rc], wt, av1_get_tx_scale(TX_64X64)))
         eob = i;
     }
   }
@@ -713,31 +848,59 @@
   const int skip_block = 0;
   const int dq = qparam->dq;
   const int x0 = qparam->x0;
+  const qm_val_t *qm_ptr;
+  const qm_val_t *iqm_ptr;
+#if CONFIG_AOM_QM
+  if (qparam->qmatrix != NULL && qparam->iqmatrix != NULL) {
+    qm_ptr = qparam->qmatrix;
+    iqm_ptr = qparam->iqmatrix;
+  } else {
+    qm_ptr = NULL;
+    iqm_ptr = NULL;
+  }
+#else
+  qm_ptr = NULL;
+  iqm_ptr = NULL;
+#endif  // CONFIG_AOM_QM
 
   switch (qparam->log_scale) {
     case 0:
       quantize_nuq(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX, p->quant_QTX,
-                   p->quant_shift_QTX, p->dequant_QTX,
+                   p->quant_shift_QTX, p->dequant_QTX, dq,
                    (const cuml_bins_type_nuq *)p->cuml_bins_nuq[x0],
+#if CONFIG_AOM_QM
+                   NULL,
+#else
                    (const dequant_val_type_nuq *)p->dequant_val_nuq_QTX[dq],
-                   qcoeff_ptr, dqcoeff_ptr, eob_ptr, sc->scan);
+#endif  // CONFIG_AOM_QM
+                   qcoeff_ptr, dqcoeff_ptr, eob_ptr, sc->scan, qm_ptr, iqm_ptr);
       break;
     case 1:
-      quantize_32x32_nuq(
-          coeff_ptr, n_coeffs, skip_block, p->zbin_QTX, p->quant_QTX,
-          p->quant_shift_QTX, p->dequant_QTX,
-          (const cuml_bins_type_nuq *)p->cuml_bins_nuq[x0],
-          (const dequant_val_type_nuq *)p->dequant_val_nuq_QTX[dq], qcoeff_ptr,
-          dqcoeff_ptr, eob_ptr, sc->scan);
+      quantize_32x32_nuq(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
+                         p->quant_QTX, p->quant_shift_QTX, p->dequant_QTX, dq,
+                         (const cuml_bins_type_nuq *)p->cuml_bins_nuq[x0],
+#if CONFIG_AOM_QM
+                         NULL,
+#else
+                         (const dequant_val_type_nuq *)
+                             p->dequant_val_nuq_QTX[dq],
+#endif  // CONFIG_AOM_QM
+                         qcoeff_ptr, dqcoeff_ptr, eob_ptr, sc->scan, qm_ptr,
+                         iqm_ptr);
       break;
 #if CONFIG_TX64X64
     case 2:
-      quantize_64x64_nuq(
-          coeff_ptr, n_coeffs, skip_block, p->zbin_QTX, p->quant_QTX,
-          p->quant_shift_QTX, p->dequant_QTX,
-          (const cuml_bins_type_nuq *)p->cuml_bins_nuq[x0],
-          (const dequant_val_type_nuq *)p->dequant_val_nuq_QTX[dq], qcoeff_ptr,
-          dqcoeff_ptr, eob_ptr, sc->scan);
+      quantize_64x64_nuq(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
+                         p->quant_QTX, p->quant_shift_QTX, p->dequant_QTX, dq,
+                         (const cuml_bins_type_nuq *)p->cuml_bins_nuq[x0],
+#if CONFIG_AOM_QM
+                         NULL,
+#else
+                         (const dequant_val_type_nuq *)
+                             p->dequant_val_nuq_QTX[dq],
+#endif  // CONFIG_AOM_QM
+                         qcoeff_ptr, dqcoeff_ptr, eob_ptr, sc->scan, qm_ptr,
+                         iqm_ptr);
       break;
 #endif  // CONFIG_TX64X64
     default: assert(0);
@@ -753,29 +916,55 @@
   const int skip_block = 0;
   const int dq = qparam->dq;
   const int x0 = qparam->x0;
+  const qm_val_t *qm_ptr;
+  const qm_val_t *iqm_ptr;
+#if CONFIG_AOM_QM
+  if (qparam->qmatrix != NULL && qparam->iqmatrix != NULL) {
+    qm_ptr = qparam->qmatrix;
+    iqm_ptr = qparam->iqmatrix;
+  } else {
+    qm_ptr = NULL;
+    iqm_ptr = NULL;
+  }
+#else
+  qm_ptr = NULL;
+  iqm_ptr = NULL;
+#endif  // CONFIG_AOM_QM
 
   switch (qparam->log_scale) {
     case 0:
-      quantize_fp_nuq(coeff_ptr, n_coeffs, skip_block, p->quant_fp_QTX,
-                      p->dequant_QTX,
-                      (const cuml_bins_type_nuq *)p->cuml_bins_nuq[x0],
-                      (const dequant_val_type_nuq *)p->dequant_val_nuq_QTX[dq],
-                      qcoeff_ptr, dqcoeff_ptr, eob_ptr, sc->scan);
+      quantize_fp_nuq(
+          coeff_ptr, n_coeffs, skip_block, p->quant_fp_QTX, p->dequant_QTX, dq,
+          (const cuml_bins_type_nuq *)p->cuml_bins_nuq[x0],
+#if CONFIG_AOM_QM
+          NULL,
+#else
+          (const dequant_val_type_nuq *)p->dequant_val_nuq_QTX[dq],
+#endif  // CONFIG_AOM_QM
+          qcoeff_ptr, dqcoeff_ptr, eob_ptr, sc->scan, qm_ptr, iqm_ptr);
       break;
     case 1:
       quantize_32x32_fp_nuq(
-          coeff_ptr, n_coeffs, skip_block, p->quant_fp_QTX, p->dequant_QTX,
+          coeff_ptr, n_coeffs, skip_block, p->quant_fp_QTX, p->dequant_QTX, dq,
           (const cuml_bins_type_nuq *)p->cuml_bins_nuq[x0],
-          (const dequant_val_type_nuq *)p->dequant_val_nuq_QTX[dq], qcoeff_ptr,
-          dqcoeff_ptr, eob_ptr, sc->scan);
+#if CONFIG_AOM_QM
+          NULL,
+#else
+          (const dequant_val_type_nuq *)p->dequant_val_nuq_QTX[dq],
+#endif  // CONFIG_AOM_QM
+          qcoeff_ptr, dqcoeff_ptr, eob_ptr, sc->scan, qm_ptr, iqm_ptr);
       break;
 #if CONFIG_TX64X64
     case 2:
       quantize_64x64_fp_nuq(
-          coeff_ptr, n_coeffs, skip_block, p->quant_fp_QTX, p->dequant_QTX,
+          coeff_ptr, n_coeffs, skip_block, p->quant_fp_QTX, p->dequant_QTX, dq,
           (const cuml_bins_type_nuq *)p->cuml_bins_nuq[x0],
-          (const dequant_val_type_nuq *)p->dequant_val_nuq_QTX[dq], qcoeff_ptr,
-          dqcoeff_ptr, eob_ptr, sc->scan);
+#if CONFIG_AOM_QM
+          NULL,
+#else
+          (const dequant_val_type_nuq *)p->dequant_val_nuq_QTX[dq],
+#endif  // CONFIG_AOM_QM
+          qcoeff_ptr, dqcoeff_ptr, eob_ptr, sc->scan, qm_ptr, iqm_ptr);
       break;
 #endif  // CONFIG_TX64X64
     default: assert(0);
@@ -792,26 +981,54 @@
   const int dq = qparam->dq;
   const int x0 = qparam->x0;
   (void)sc;
+  const qm_val_t *qm_ptr;
+  const qm_val_t *iqm_ptr;
+#if CONFIG_AOM_QM
+  if (qparam->qmatrix != NULL && qparam->iqmatrix != NULL) {
+    qm_ptr = qparam->qmatrix;
+    iqm_ptr = qparam->iqmatrix;
+  } else {
+    qm_ptr = NULL;
+    iqm_ptr = NULL;
+  }
+#else
+  qm_ptr = NULL;
+  iqm_ptr = NULL;
+#endif  // CONFIG_AOM_QM
 
   switch (qparam->log_scale) {
     case 0:
       quantize_dc_fp_nuq(coeff_ptr, n_coeffs, skip_block, p->quant_fp_QTX[0],
-                         p->dequant_QTX[0], p->cuml_bins_nuq[x0][0],
-                         p->dequant_val_nuq_QTX[dq][0], qcoeff_ptr, dqcoeff_ptr,
-                         eob_ptr);
+                         p->dequant_QTX[0], dq, p->cuml_bins_nuq[x0][0],
+#if CONFIG_AOM_QM
+                         NULL,
+#else
+                         p->dequant_val_nuq_QTX[dq][0],
+#endif  // CONFIG_AOM_QM
+                         qcoeff_ptr, dqcoeff_ptr, eob_ptr, qm_ptr, iqm_ptr);
       break;
     case 1:
       quantize_dc_32x32_fp_nuq(
           coeff_ptr, n_coeffs, skip_block, p->quant_fp_QTX[0],
-          p->dequant_QTX[0], p->cuml_bins_nuq[x0][0],
-          p->dequant_val_nuq_QTX[dq][0], qcoeff_ptr, dqcoeff_ptr, eob_ptr);
+          p->dequant_QTX[0], dq, p->cuml_bins_nuq[x0][0],
+#if CONFIG_AOM_QM
+          NULL,
+#else
+          p->dequant_val_nuq_QTX[dq][0],
+#endif  // CONFIG_AOM_QM
+          qcoeff_ptr, dqcoeff_ptr, eob_ptr, qm_ptr, iqm_ptr);
       break;
 #if CONFIG_TX64X64
     case 2:
       quantize_dc_64x64_fp_nuq(
           coeff_ptr, n_coeffs, skip_block, p->quant_fp_QTX[0],
-          p->dequant_QTX[0], p->cuml_bins_nuq[x0][0],
-          p->dequant_val_nuq_QTX[dq][0], qcoeff_ptr, dqcoeff_ptr, eob_ptr);
+          p->dequant_QTX[0], dq, p->cuml_bins_nuq[x0][0],
+#if CONFIG_AOM_QM
+          NULL,
+#else
+          p->dequant_val_nuq_QTX[dq][0],
+#endif  // CONFIG_AOM_QM
+          qcoeff_ptr, dqcoeff_ptr, eob_ptr, qm_ptr, iqm_ptr);
       break;
 #endif  // CONFIG_TX64X64
     default: assert(0);
@@ -972,18 +1189,27 @@
 #if CONFIG_NEW_QUANT
 static INLINE int highbd_quantize_coeff_nuq(
     const tran_low_t coeffv, const int16_t quant, const int16_t quant_shift,
-    const int zbin, const int16_t dequant, const tran_low_t *cuml_bins_ptr,
-    const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
-    tran_low_t *dqcoeff_ptr) {
+    const int zbin, const int16_t dequant, int dq, int is_ac_coeff,
+    const tran_low_t *cuml_bins_ptr, const tran_low_t *dequant_val,
+    tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const qm_val_t wt) {
   const int coeff = coeffv;
   const int coeff_sign = (coeff >> 31);
   const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
   int q = 0;
-  int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX);
-  if (tmp >= AOMMAX(zbin, cuml_bins_ptr[0])) {
+  if (abs_coeff * wt >= (AOMMAX(zbin, cuml_bins_ptr[0]) * (1 << AOM_QM_BITS))) {
+    int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX);
     tmp -= cuml_bins_ptr[0];
-    q = NUQ_KNOTS + (int)(((((tmp * quant) >> 16) + tmp) * quant_shift) >> 16);
+    tmp *= wt;
+    q = NUQ_KNOTS + (int)(((((tmp * quant) >> 16) + tmp) * quant_shift) >>
+                          (16 + AOM_QM_BITS));
+#if CONFIG_AOM_QM
+    (void)dequant_val;
+    *dqcoeff_ptr = av1_dequant_abscoeff_nuq(q, dequant, dq, is_ac_coeff, 0);
+#else
+    (void)dq;
+    (void)is_ac_coeff;
     *dqcoeff_ptr = av1_dequant_abscoeff_nuq(q, dequant, dequant_val, 0);
+#endif
     *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign;
     *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
   } else {
@@ -994,17 +1220,26 @@
 }
 
 static INLINE int highbd_quantize_coeff_fp_nuq(
-    const tran_low_t coeffv, const int16_t quant, const int16_t dequant,
-    const tran_low_t *cuml_bins_ptr, const tran_low_t *dequant_val,
-    tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr) {
+    const tran_low_t coeffv, const int16_t quant, const int16_t dequant, int dq,
+    int is_ac_coeff, const tran_low_t *cuml_bins_ptr,
+    const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const qm_val_t wt) {
   const int coeff = coeffv;
   const int coeff_sign = (coeff >> 31);
   const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
   int q = 0;
-  int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX);
-  if (tmp > cuml_bins_ptr[0]) {
-    q = NUQ_KNOTS + (int)(((tmp - cuml_bins_ptr[0]) * quant) >> 16);
+  if (abs_coeff * wt >= (cuml_bins_ptr[0] * (1 << AOM_QM_BITS))) {
+    int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX);
+    q = NUQ_KNOTS +
+        (int)(((tmp - cuml_bins_ptr[0]) * wt * quant) >> (16 + AOM_QM_BITS));
+#if CONFIG_AOM_QM
+    (void)dequant_val;
+    *dqcoeff_ptr = av1_dequant_abscoeff_nuq(q, dequant, dq, is_ac_coeff, 0);
+#else
+    (void)dq;
+    (void)is_ac_coeff;
     *dqcoeff_ptr = av1_dequant_abscoeff_nuq(q, dequant, dequant_val, 0);
+#endif
     *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign;
     *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
   } else {
@@ -1015,21 +1250,31 @@
 }
 
 static INLINE int highbd_quantize_coeff_bigtx_fp_nuq(
-    const tran_low_t coeffv, const int16_t quant, const int16_t dequant,
-    const tran_low_t *cuml_bins_ptr, const tran_low_t *dequant_val,
-    tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, int logsizeby16) {
+    const tran_low_t coeffv, const int16_t quant, const int16_t dequant, int dq,
+    int is_ac_coeff, const tran_low_t *cuml_bins_ptr,
+    const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const qm_val_t wt, int logsizeby16) {
   const int coeff = coeffv;
   const int coeff_sign = (coeff >> 31);
   const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
   int q = 0;
-  int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX);
-  if (tmp > ROUND_POWER_OF_TWO(cuml_bins_ptr[0], logsizeby16)) {
+  if (abs_coeff * wt >=
+      (cuml_bins_ptr[0] * (1 << (AOM_QM_BITS - logsizeby16)))) {
+    int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX);
     q = NUQ_KNOTS +
-        (int)(((tmp - ROUND_POWER_OF_TWO(cuml_bins_ptr[0], logsizeby16)) *
+        (int)(((tmp - ROUND_POWER_OF_TWO(cuml_bins_ptr[0], logsizeby16)) * wt *
                quant) >>
-              (16 - logsizeby16));
+              (16 - logsizeby16 + AOM_QM_BITS));
+#if CONFIG_AOM_QM
+    (void)dequant_val;
+    *dqcoeff_ptr =
+        av1_dequant_abscoeff_nuq(q, dequant, dq, is_ac_coeff, logsizeby16);
+#else
+    (void)dq;
+    (void)is_ac_coeff;
     *dqcoeff_ptr =
         av1_dequant_abscoeff_nuq(q, dequant, dequant_val, logsizeby16);
+#endif
     *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign;
     *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
   } else {
@@ -1041,23 +1286,34 @@
 
 static INLINE int highbd_quantize_coeff_bigtx_nuq(
     const tran_low_t coeffv, const int16_t quant, const int16_t quant_shift,
-    const int zbin, const int16_t dequant, const tran_low_t *cuml_bins_ptr,
-    const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
-    tran_low_t *dqcoeff_ptr, int logsizeby16) {
+    const int zbin, const int16_t dequant, int dq, int is_ac_coeff,
+    const tran_low_t *cuml_bins_ptr, const tran_low_t *dequant_val,
+    tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const qm_val_t wt,
+    int logsizeby16) {
   const int zbin_val = ROUND_POWER_OF_TWO(zbin, logsizeby16);
   const int coeff = coeffv;
   const int coeff_sign = (coeff >> 31);
   const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
   int q = 0;
-  int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX);
   const int cuml_bins_ptr_val =
       ROUND_POWER_OF_TWO(cuml_bins_ptr[0], logsizeby16);
-  if (tmp >= AOMMAX(zbin_val, cuml_bins_ptr_val)) {
+  if (abs_coeff * wt >=
+      (AOMMAX(zbin_val, cuml_bins_ptr_val) * (1 << AOM_QM_BITS))) {
+    int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX);
     tmp -= cuml_bins_ptr_val;
+    tmp *= wt;
     q = NUQ_KNOTS + (int)(((((tmp * quant) >> 16) + tmp) * quant_shift) >>
-                          (16 - logsizeby16));
+                          (16 - logsizeby16 + AOM_QM_BITS));
+#if CONFIG_AOM_QM
+    (void)dequant_val;
+    *dqcoeff_ptr =
+        av1_dequant_abscoeff_nuq(q, dequant, dq, is_ac_coeff, logsizeby16);
+#else
+    (void)dq;
+    (void)is_ac_coeff;
     *dqcoeff_ptr =
         av1_dequant_abscoeff_nuq(q, dequant, dequant_val, logsizeby16);
+#endif
     *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign;
     *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
   } else {
@@ -1070,19 +1326,26 @@
 void highbd_quantize_dc_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                             int skip_block, const int16_t *zbin_ptr,
                             const int16_t quant, const int16_t quant_shift,
-                            const int16_t dequant,
+                            const int16_t dequant, int dq,
                             const tran_low_t *cuml_bins_ptr,
                             const tran_low_t *dequant_val,
                             tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                            uint16_t *eob_ptr) {
+                            uint16_t *eob_ptr, const qm_val_t *qm_ptr,
+                            const qm_val_t *iqm_ptr) {
   int eob = -1;
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
   if (!skip_block) {
     const int rc = 0;
-    if (highbd_quantize_coeff_nuq(coeff_ptr[rc], quant, quant_shift,
-                                  zbin_ptr[rc], dequant, cuml_bins_ptr,
-                                  dequant_val, qcoeff_ptr, dqcoeff_ptr))
+    const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+    const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+    const int16_t dequant_iwt =
+        CONFIG_AOM_QM
+            ? (dequant * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS
+            : dequant;
+    if (highbd_quantize_coeff_nuq(
+            coeff_ptr[rc], quant, quant_shift, zbin_ptr[rc], dequant_iwt, dq,
+            rc, cuml_bins_ptr, dequant_val, qcoeff_ptr, dqcoeff_ptr, wt))
       eob = 0;
   }
   *eob_ptr = eob + 1;
@@ -1090,19 +1353,26 @@
 
 void highbd_quantize_dc_fp_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                                int skip_block, const int16_t quant,
-                               const int16_t dequant,
+                               const int16_t dequant, int dq,
                                const tran_low_t *cuml_bins_ptr,
                                const tran_low_t *dequant_val,
                                tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                               uint16_t *eob_ptr) {
+                               uint16_t *eob_ptr, const qm_val_t *qm_ptr,
+                               const qm_val_t *iqm_ptr) {
   int eob = -1;
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
   if (!skip_block) {
     const int rc = 0;
-    if (highbd_quantize_coeff_fp_nuq(coeff_ptr[rc], quant, dequant,
+    const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+    const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+    const int16_t dequant_iwt =
+        CONFIG_AOM_QM
+            ? (dequant * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS
+            : dequant;
+    if (highbd_quantize_coeff_fp_nuq(coeff_ptr[rc], quant, dequant_iwt, dq, rc,
                                      cuml_bins_ptr, dequant_val, qcoeff_ptr,
-                                     dqcoeff_ptr))
+                                     dqcoeff_ptr, wt))
       eob = 0;
   }
   *eob_ptr = eob + 1;
@@ -1112,11 +1382,12 @@
                            int skip_block, const int16_t *zbin_ptr,
                            const int16_t *quant_ptr,
                            const int16_t *quant_shift_ptr,
-                           const int16_t *dequant_ptr,
+                           const int16_t *dequant_ptr, int dq,
                            const cuml_bins_type_nuq *cuml_bins_ptr,
                            const dequant_val_type_nuq *dequant_val,
                            tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                           uint16_t *eob_ptr, const int16_t *scan) {
+                           uint16_t *eob_ptr, const int16_t *scan,
+                           const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
   int eob = -1;
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
@@ -1124,10 +1395,18 @@
     int i;
     for (i = 0; i < n_coeffs; i++) {
       const int rc = scan[i];
+      const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+      const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+      const int16_t dequant_iwt =
+          CONFIG_AOM_QM
+              ? (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+                    AOM_QM_BITS
+              : dequant_ptr[rc != 0];
       if (highbd_quantize_coeff_nuq(
               coeff_ptr[rc], quant_ptr[rc != 0], quant_shift_ptr[rc != 0],
-              zbin_ptr[rc != 0], dequant_ptr[rc != 0], cuml_bins_ptr[rc != 0],
-              dequant_val[rc != 0], &qcoeff_ptr[rc], &dqcoeff_ptr[rc]))
+              zbin_ptr[rc != 0], dequant_iwt, dq, rc != 0,
+              cuml_bins_ptr[rc != 0], dequant_val[rc != 0], &qcoeff_ptr[rc],
+              &dqcoeff_ptr[rc], wt))
         eob = i;
     }
   }
@@ -1137,10 +1416,11 @@
 void highbd_quantize_32x32_nuq_c(
     const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
     const int16_t *zbin_ptr, const int16_t *quant_ptr,
-    const int16_t *quant_shift_ptr, const int16_t *dequant_ptr,
+    const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, int dq,
     const cuml_bins_type_nuq *cuml_bins_ptr,
     const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr,
-    tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan) {
+    tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan,
+    const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
   int eob = -1;
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
@@ -1148,11 +1428,18 @@
     int i;
     for (i = 0; i < n_coeffs; i++) {
       const int rc = scan[i];
+      const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+      const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+      const int16_t dequant_iwt =
+          CONFIG_AOM_QM
+              ? (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+                    AOM_QM_BITS
+              : dequant_ptr[rc != 0];
       if (highbd_quantize_coeff_bigtx_nuq(
               coeff_ptr[rc], quant_ptr[rc != 0], quant_shift_ptr[rc != 0],
-              zbin_ptr[rc != 0], dequant_ptr[rc != 0], cuml_bins_ptr[rc != 0],
-              dequant_val[rc != 0], &qcoeff_ptr[rc], &dqcoeff_ptr[rc],
-              av1_get_tx_scale(TX_32X32)))
+              zbin_ptr[rc != 0], dequant_iwt, dq, rc != 0,
+              cuml_bins_ptr[rc != 0], dequant_val[rc != 0], &qcoeff_ptr[rc],
+              &dqcoeff_ptr[rc], wt, av1_get_tx_scale(TX_32X32)))
         eob = i;
     }
   }
@@ -1161,10 +1448,11 @@
 
 void highbd_quantize_32x32_fp_nuq_c(
     const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
-    const int16_t *quant_ptr, const int16_t *dequant_ptr,
+    const int16_t *quant_ptr, const int16_t *dequant_ptr, int dq,
     const cuml_bins_type_nuq *cuml_bins_ptr,
     const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr,
-    tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan) {
+    tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan,
+    const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
   int eob = -1;
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
@@ -1172,10 +1460,17 @@
     int i;
     for (i = 0; i < n_coeffs; i++) {
       const int rc = scan[i];
+      const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+      const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+      const int16_t dequant_iwt =
+          CONFIG_AOM_QM
+              ? (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+                    AOM_QM_BITS
+              : dequant_ptr[rc != 0];
       if (highbd_quantize_coeff_bigtx_fp_nuq(
-              coeff_ptr[rc], quant_ptr[rc != 0], dequant_ptr[rc != 0],
+              coeff_ptr[rc], quant_ptr[rc != 0], dequant_iwt, dq, rc != 0,
               cuml_bins_ptr[rc != 0], dequant_val[rc != 0], &qcoeff_ptr[rc],
-              &dqcoeff_ptr[rc], av1_get_tx_scale(TX_32X32)))
+              &dqcoeff_ptr[rc], wt, av1_get_tx_scale(TX_32X32)))
         eob = i;
     }
   }
@@ -1186,10 +1481,11 @@
 void highbd_quantize_64x64_nuq_c(
     const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
     const int16_t *zbin_ptr, const int16_t *quant_ptr,
-    const int16_t *quant_shift_ptr, const int16_t *dequant_ptr,
+    const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, int dq,
     const cuml_bins_type_nuq *cuml_bins_ptr,
     const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr,
-    tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan) {
+    tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan,
+    const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
   int eob = -1;
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
@@ -1197,11 +1493,18 @@
     int i;
     for (i = 0; i < n_coeffs; i++) {
       const int rc = scan[i];
+      const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+      const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+      const int16_t dequant_iwt =
+          CONFIG_AOM_QM
+              ? (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+                    AOM_QM_BITS
+              : dequant_ptr[rc != 0];
       if (highbd_quantize_coeff_bigtx_nuq(
               coeff_ptr[rc], quant_ptr[rc != 0], quant_shift_ptr[rc != 0],
-              zbin_ptr[rc != 0], dequant_ptr[rc != 0], cuml_bins_ptr[rc != 0],
-              dequant_val[rc != 0], &qcoeff_ptr[rc], &dqcoeff_ptr[rc],
-              av1_get_tx_scale(TX_64X64)))
+              zbin_ptr[rc != 0], dequant_iwt, dq, rc != 0,
+              cuml_bins_ptr[rc != 0], dequant_val[rc != 0], &qcoeff_ptr[rc],
+              &dqcoeff_ptr[rc], wt, av1_get_tx_scale(TX_64X64)))
         eob = i;
     }
   }
@@ -1210,10 +1513,11 @@
 
 void highbd_quantize_64x64_fp_nuq_c(
     const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
-    const int16_t *quant_ptr, const int16_t *dequant_ptr,
+    const int16_t *quant_ptr, const int16_t *dequant_ptr, int dq,
     const cuml_bins_type_nuq *cuml_bins_ptr,
     const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr,
-    tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan) {
+    tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan,
+    const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
   int eob = -1;
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
@@ -1221,10 +1525,17 @@
     int i;
     for (i = 0; i < n_coeffs; i++) {
       const int rc = scan[i];
+      const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+      const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+      const int16_t dequant_iwt =
+          CONFIG_AOM_QM
+              ? (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+                    AOM_QM_BITS
+              : dequant_ptr[rc != 0];
       if (highbd_quantize_coeff_bigtx_fp_nuq(
-              coeff_ptr[rc], quant_ptr[rc != 0], dequant_ptr[rc != 0],
+              coeff_ptr[rc], quant_ptr[rc != 0], dequant_iwt, dq, rc != 0,
               cuml_bins_ptr[rc != 0], dequant_val[rc != 0], &qcoeff_ptr[rc],
-              &dqcoeff_ptr[rc], av1_get_tx_scale(TX_64X64)))
+              &dqcoeff_ptr[rc], wt, av1_get_tx_scale(TX_64X64)))
         eob = i;
     }
   }
@@ -1234,11 +1545,12 @@
 
 void highbd_quantize_fp_nuq_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                               int skip_block, const int16_t *quant_ptr,
-                              const int16_t *dequant_ptr,
+                              const int16_t *dequant_ptr, int dq,
                               const cuml_bins_type_nuq *cuml_bins_ptr,
                               const dequant_val_type_nuq *dequant_val,
                               tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                              uint16_t *eob_ptr, const int16_t *scan) {
+                              uint16_t *eob_ptr, const int16_t *scan,
+                              const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
   int eob = -1;
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
@@ -1246,10 +1558,17 @@
     int i;
     for (i = 0; i < n_coeffs; i++) {
       const int rc = scan[i];
+      const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+      const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+      const int16_t dequant_iwt =
+          CONFIG_AOM_QM
+              ? (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+                    AOM_QM_BITS
+              : dequant_ptr[rc != 0];
       if (highbd_quantize_coeff_fp_nuq(
-              coeff_ptr[rc], quant_ptr[rc != 0], dequant_ptr[rc != 0],
+              coeff_ptr[rc], quant_ptr[rc != 0], dequant_iwt, dq, rc != 0,
               cuml_bins_ptr[rc != 0], dequant_val[rc != 0], &qcoeff_ptr[rc],
-              &dqcoeff_ptr[rc]))
+              &dqcoeff_ptr[rc], wt))
         eob = i;
     }
   }
@@ -1259,18 +1578,25 @@
 void highbd_quantize_dc_32x32_nuq(
     const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
     const int16_t *zbin_ptr, const int16_t quant, const int16_t quant_shift,
-    const int16_t dequant, const tran_low_t *cuml_bins_ptr,
+    const int16_t dequant, int dq, const tran_low_t *cuml_bins_ptr,
     const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
-    tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr) {
+    tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const qm_val_t *qm_ptr,
+    const qm_val_t *iqm_ptr) {
   int eob = -1;
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
   if (!skip_block) {
     const int rc = 0;
-    if (highbd_quantize_coeff_bigtx_nuq(coeff_ptr[rc], quant, quant_shift,
-                                        zbin_ptr[rc], dequant, cuml_bins_ptr,
-                                        dequant_val, qcoeff_ptr, dqcoeff_ptr,
-                                        av1_get_tx_scale(TX_32X32)))
+    const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+    const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+    const int16_t dequant_iwt =
+        CONFIG_AOM_QM
+            ? (dequant * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS
+            : dequant;
+    if (highbd_quantize_coeff_bigtx_nuq(
+            coeff_ptr[rc], quant, quant_shift, zbin_ptr[rc], dequant_iwt, dq,
+            rc, cuml_bins_ptr, dequant_val, qcoeff_ptr, dqcoeff_ptr, wt,
+            av1_get_tx_scale(TX_32X32)))
       eob = 0;
   }
   *eob_ptr = eob + 1;
@@ -1278,17 +1604,25 @@
 
 void highbd_quantize_dc_32x32_fp_nuq(
     const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
-    const int16_t quant, const int16_t dequant, const tran_low_t *cuml_bins_ptr,
-    const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
-    tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr) {
+    const int16_t quant, const int16_t dequant, int dq,
+    const tran_low_t *cuml_bins_ptr, const tran_low_t *dequant_val,
+    tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+    const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
   int eob = -1;
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
   if (!skip_block) {
     const int rc = 0;
-    if (highbd_quantize_coeff_bigtx_fp_nuq(
-            coeff_ptr[rc], quant, dequant, cuml_bins_ptr, dequant_val,
-            qcoeff_ptr, dqcoeff_ptr, av1_get_tx_scale(TX_32X32)))
+    const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+    const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+    const int16_t dequant_iwt =
+        CONFIG_AOM_QM
+            ? (dequant * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS
+            : dequant;
+    if (highbd_quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc], quant, dequant_iwt,
+                                           dq, rc, cuml_bins_ptr, dequant_val,
+                                           qcoeff_ptr, dqcoeff_ptr, wt,
+                                           av1_get_tx_scale(TX_32X32)))
       eob = 0;
   }
   *eob_ptr = eob + 1;
@@ -1298,18 +1632,25 @@
 void highbd_quantize_dc_64x64_nuq(
     const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
     const int16_t *zbin_ptr, const int16_t quant, const int16_t quant_shift,
-    const int16_t dequant, const tran_low_t *cuml_bins_ptr,
+    const int16_t dequant, int dq, const tran_low_t *cuml_bins_ptr,
     const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
-    tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr) {
+    tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const qm_val_t *qm_ptr,
+    const qm_val_t *iqm_ptr) {
   int eob = -1;
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
   if (!skip_block) {
     const int rc = 0;
-    if (highbd_quantize_coeff_bigtx_nuq(coeff_ptr[rc], quant, quant_shift,
-                                        zbin_ptr[rc], dequant, cuml_bins_ptr,
-                                        dequant_val, qcoeff_ptr, dqcoeff_ptr,
-                                        av1_get_tx_scale(TX_64X64)))
+    const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+    const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+    const int16_t dequant_iwt =
+        CONFIG_AOM_QM
+            ? (dequant * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS
+            : dequant;
+    if (highbd_quantize_coeff_bigtx_nuq(
+            coeff_ptr[rc], quant, quant_shift, zbin_ptr[rc], dequant_iwt, dq,
+            rc, cuml_bins_ptr, dequant_val, qcoeff_ptr, dqcoeff_ptr, wt,
+            av1_get_tx_scale(TX_64X64)))
       eob = 0;
   }
   *eob_ptr = eob + 1;
@@ -1317,17 +1658,25 @@
 
 void highbd_quantize_dc_64x64_fp_nuq(
     const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
-    const int16_t quant, const int16_t dequant, const tran_low_t *cuml_bins_ptr,
-    const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
-    tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr) {
+    const int16_t quant, const int16_t dequant, int dq,
+    const tran_low_t *cuml_bins_ptr, const tran_low_t *dequant_val,
+    tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+    const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
   int eob = -1;
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
   if (!skip_block) {
     const int rc = 0;
-    if (highbd_quantize_coeff_bigtx_fp_nuq(
-            coeff_ptr[rc], quant, dequant, cuml_bins_ptr, dequant_val,
-            qcoeff_ptr, dqcoeff_ptr, av1_get_tx_scale(TX_64X64)))
+    const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+    const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+    const int16_t dequant_iwt =
+        CONFIG_AOM_QM
+            ? (dequant * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS
+            : dequant;
+    if (highbd_quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc], quant, dequant_iwt,
+                                           dq, rc, cuml_bins_ptr, dequant_val,
+                                           qcoeff_ptr, dqcoeff_ptr, wt,
+                                           av1_get_tx_scale(TX_64X64)))
       eob = 0;
   }
   *eob_ptr = eob + 1;
@@ -1342,32 +1691,59 @@
   const int skip_block = 0;
   const int dq = qparam->dq;
   const int x0 = qparam->x0;
+  const qm_val_t *qm_ptr;
+  const qm_val_t *iqm_ptr;
+#if CONFIG_AOM_QM
+  if (qparam->qmatrix != NULL && qparam->iqmatrix != NULL) {
+    qm_ptr = qparam->qmatrix;
+    iqm_ptr = qparam->iqmatrix;
+  } else {
+    qm_ptr = NULL;
+    iqm_ptr = NULL;
+  }
+#else
+  qm_ptr = NULL;
+  iqm_ptr = NULL;
+#endif  // CONFIG_AOM_QM
 
   switch (qparam->log_scale) {
     case 0:
-      highbd_quantize_nuq(
-          coeff_ptr, n_coeffs, skip_block, p->zbin_QTX, p->quant_QTX,
-          p->quant_shift_QTX, p->dequant_QTX,
-          (const cuml_bins_type_nuq *)p->cuml_bins_nuq[x0],
-          (const dequant_val_type_nuq *)p->dequant_val_nuq_QTX[dq], qcoeff_ptr,
-          dqcoeff_ptr, eob_ptr, sc->scan);
+      highbd_quantize_nuq(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
+                          p->quant_QTX, p->quant_shift_QTX, p->dequant_QTX, dq,
+                          (const cuml_bins_type_nuq *)p->cuml_bins_nuq[x0],
+#if CONFIG_AOM_QM
+                          NULL,
+#else
+                          (const dequant_val_type_nuq *)
+                              p->dequant_val_nuq_QTX[dq],
+#endif  // CONFIG_AOM_QM
+                          qcoeff_ptr, dqcoeff_ptr, eob_ptr, sc->scan, qm_ptr,
+                          iqm_ptr);
       break;
     case 1:
       highbd_quantize_32x32_nuq(
           coeff_ptr, n_coeffs, skip_block, p->zbin_QTX, p->quant_QTX,
-          p->quant_shift_QTX, p->dequant_QTX,
+          p->quant_shift_QTX, p->dequant_QTX, dq,
           (const cuml_bins_type_nuq *)p->cuml_bins_nuq[x0],
-          (const dequant_val_type_nuq *)p->dequant_val_nuq_QTX[dq], qcoeff_ptr,
-          dqcoeff_ptr, eob_ptr, sc->scan);
+#if CONFIG_AOM_QM
+          NULL,
+#else
+          (const dequant_val_type_nuq *)p->dequant_val_nuq_QTX[dq],
+#endif  // CONFIG_AOM_QM
+          qcoeff_ptr, dqcoeff_ptr, eob_ptr, sc->scan, qm_ptr, iqm_ptr);
       break;
 #if CONFIG_TX64X64
     case 2:
       highbd_quantize_64x64_nuq(
           coeff_ptr, n_coeffs, skip_block, p->zbin_QTX, p->quant_QTX,
-          p->quant_shift_QTX, p->dequant_QTX,
+          p->quant_shift_QTX, p->dequant_QTX, dq,
           (const cuml_bins_type_nuq *)p->cuml_bins_nuq[x0],
-          (const dequant_val_type_nuq *)p->dequant_val_nuq_QTX[dq], qcoeff_ptr,
-          dqcoeff_ptr, eob_ptr, sc->scan);
+#if CONFIG_AOM_QM
+          NULL,
+#else
+          (const dequant_val_type_nuq *)p->dequant_val_nuq_QTX[dq],
+#endif  // CONFIG_AOM_QM
+          qcoeff_ptr, dqcoeff_ptr, eob_ptr, sc->scan, qm_ptr, iqm_ptr);
       break;
 #endif  // CONFIG_TX64X64
     default: assert(0);
@@ -1382,29 +1758,55 @@
   const int skip_block = 0;
   const int dq = qparam->dq;
   const int x0 = qparam->x0;
+  const qm_val_t *qm_ptr;
+  const qm_val_t *iqm_ptr;
+#if CONFIG_AOM_QM
+  if (qparam->qmatrix != NULL && qparam->iqmatrix != NULL) {
+    qm_ptr = qparam->qmatrix;
+    iqm_ptr = qparam->iqmatrix;
+  } else {
+    qm_ptr = NULL;
+    iqm_ptr = NULL;
+  }
+#else
+  qm_ptr = NULL;
+  iqm_ptr = NULL;
+#endif  // CONFIG_AOM_QM
 
   switch (qparam->log_scale) {
     case 0:
       highbd_quantize_fp_nuq(
-          coeff_ptr, n_coeffs, skip_block, p->quant_fp_QTX, p->dequant_QTX,
+          coeff_ptr, n_coeffs, skip_block, p->quant_fp_QTX, p->dequant_QTX, dq,
           (const cuml_bins_type_nuq *)p->cuml_bins_nuq[x0],
-          (const dequant_val_type_nuq *)p->dequant_val_nuq_QTX[dq], qcoeff_ptr,
-          dqcoeff_ptr, eob_ptr, sc->scan);
+#if CONFIG_AOM_QM
+          NULL,
+#else
+          (const dequant_val_type_nuq *)p->dequant_val_nuq_QTX[dq],
+#endif  // CONFIG_AOM_QM
+          qcoeff_ptr, dqcoeff_ptr, eob_ptr, sc->scan, qm_ptr, iqm_ptr);
       break;
     case 1:
       highbd_quantize_32x32_fp_nuq(
-          coeff_ptr, n_coeffs, skip_block, p->quant_fp_QTX, p->dequant_QTX,
+          coeff_ptr, n_coeffs, skip_block, p->quant_fp_QTX, p->dequant_QTX, dq,
           (const cuml_bins_type_nuq *)p->cuml_bins_nuq[x0],
-          (const dequant_val_type_nuq *)p->dequant_val_nuq_QTX[dq], qcoeff_ptr,
-          dqcoeff_ptr, eob_ptr, sc->scan);
+#if CONFIG_AOM_QM
+          NULL,
+#else
+          (const dequant_val_type_nuq *)p->dequant_val_nuq_QTX[dq],
+#endif  // CONFIG_AOM_QM
+          qcoeff_ptr, dqcoeff_ptr, eob_ptr, sc->scan, qm_ptr, iqm_ptr);
       break;
 #if CONFIG_TX64X64
     case 2:
       highbd_quantize_64x64_fp_nuq(
-          coeff_ptr, n_coeffs, skip_block, p->quant_fp_QTX, p->dequant_QTX,
+          coeff_ptr, n_coeffs, skip_block, p->quant_fp_QTX, p->dequant_QTX, dq,
           (const cuml_bins_type_nuq *)p->cuml_bins_nuq[x0],
-          (const dequant_val_type_nuq *)p->dequant_val_nuq_QTX[dq], qcoeff_ptr,
-          dqcoeff_ptr, eob_ptr, sc->scan);
+#if CONFIG_AOM_QM
+          NULL,
+#else
+          (const dequant_val_type_nuq *)p->dequant_val_nuq_QTX[dq],
+#endif  // CONFIG_AOM_QM
+          qcoeff_ptr, dqcoeff_ptr, eob_ptr, sc->scan, qm_ptr, iqm_ptr);
       break;
 #endif  // CONFIG_TX64X64
     default: assert(0);
@@ -1420,26 +1822,55 @@
   const int dq = qparam->dq;
   const int x0 = qparam->x0;
   (void)sc;
+  const qm_val_t *qm_ptr;
+  const qm_val_t *iqm_ptr;
+#if CONFIG_AOM_QM
+  if (qparam->qmatrix != NULL && qparam->iqmatrix != NULL) {
+    qm_ptr = qparam->qmatrix;
+    iqm_ptr = qparam->iqmatrix;
+  } else {
+    qm_ptr = NULL;
+    iqm_ptr = NULL;
+  }
+#else
+  qm_ptr = NULL;
+  iqm_ptr = NULL;
+#endif  // CONFIG_AOM_QM
 
   switch (qparam->log_scale) {
     case 0:
       highbd_quantize_dc_fp_nuq(
           coeff_ptr, n_coeffs, skip_block, p->quant_fp_QTX[0],
-          p->dequant_QTX[0], p->cuml_bins_nuq[x0][0],
-          p->dequant_val_nuq_QTX[dq][0], qcoeff_ptr, dqcoeff_ptr, eob_ptr);
+          p->dequant_QTX[0], dq, p->cuml_bins_nuq[x0][0],
+#if CONFIG_AOM_QM
+          NULL,
+#else
+          p->dequant_val_nuq_QTX[dq][0],
+#endif  // CONFIG_AOM_QM
+          qcoeff_ptr, dqcoeff_ptr, eob_ptr, qm_ptr, iqm_ptr);
       break;
     case 1:
       highbd_quantize_dc_32x32_fp_nuq(
           coeff_ptr, n_coeffs, skip_block, p->quant_fp_QTX[0],
-          p->dequant_QTX[0], p->cuml_bins_nuq[x0][0],
-          p->dequant_val_nuq_QTX[dq][0], qcoeff_ptr, dqcoeff_ptr, eob_ptr);
+          p->dequant_QTX[0], dq, p->cuml_bins_nuq[x0][0],
+#if CONFIG_AOM_QM
+          NULL,
+#else
+          p->dequant_val_nuq_QTX[dq][0],
+#endif  // CONFIG_AOM_QM
+          qcoeff_ptr, dqcoeff_ptr, eob_ptr, qm_ptr, iqm_ptr);
       break;
 #if CONFIG_TX64X64
     case 2:
       highbd_quantize_dc_64x64_fp_nuq(
           coeff_ptr, n_coeffs, skip_block, p->quant_fp_QTX[0],
-          p->dequant_QTX[0], p->cuml_bins_nuq[x0][0],
-          p->dequant_val_nuq_QTX[dq][0], qcoeff_ptr, dqcoeff_ptr, eob_ptr);
+          p->dequant_QTX[0], dq, p->cuml_bins_nuq[x0][0],
+#if CONFIG_AOM_QM
+          NULL,
+#else
+          p->dequant_val_nuq_QTX[dq][0],
+#endif  // CONFIG_AOM_QM
+          qcoeff_ptr, dqcoeff_ptr, eob_ptr, qm_ptr, iqm_ptr);
       break;
 #endif  // CONFIG_TX64X64
     default: assert(0);
@@ -1559,6 +1990,7 @@
     for (dq = 0; dq < QUANT_PROFILES; dq++) {
       // DC and AC coefs
       for (i = 0; i < 2; i++) {
+#if !CONFIG_AOM_QM
         const int y_quant = deq->y_dequant_QTX[q][i != 0];
         const int u_quant = deq->u_dequant_QTX[q][i != 0];
         const int v_quant = deq->v_dequant_QTX[q][i != 0];
@@ -1568,6 +2000,7 @@
                                 deq->u_dequant_val_nuq_QTX[dq][q][i], dq);
         av1_get_dequant_val_nuq(v_quant, i,
                                 deq->v_dequant_val_nuq_QTX[dq][q][i], dq);
+#endif  // !CONFIG_AOM_QM
       }
     }
 #endif  // CONFIG_NEW_QUANT
@@ -1660,8 +2093,10 @@
     x->plane[0].cuml_bins_nuq[x0] = quants->y_cuml_bins_nuq[x0][qindex];
   }
   for (int dq = 0; dq < QUANT_PROFILES; dq++) {
+#if !CONFIG_AOM_QM
     x->plane[0].dequant_val_nuq_QTX[dq] =
         cpi->dequants.y_dequant_val_nuq_QTX[dq][qindex];
+#endif  // !CONFIG_AOM_QM
   }
 #endif  // CONFIG_NEW_QUANT
 
@@ -1687,8 +2122,10 @@
       x->plane[1].cuml_bins_nuq[x0] = quants->u_cuml_bins_nuq[x0][qindex];
     }
     for (int dq = 0; dq < QUANT_PROFILES; dq++) {
+#if !CONFIG_AOM_QM
       x->plane[1].dequant_val_nuq_QTX[dq] =
           cpi->dequants.u_dequant_val_nuq_QTX[dq][qindex];
+#endif  // !CONFIG_AOM_QM
     }
 #endif  // CONFIG_NEW_QUANT
   }
@@ -1714,8 +2151,10 @@
       x->plane[2].cuml_bins_nuq[x0] = quants->v_cuml_bins_nuq[x0][qindex];
     }
     for (int dq = 0; dq < QUANT_PROFILES; dq++) {
+#if !CONFIG_AOM_QM
       x->plane[2].dequant_val_nuq_QTX[dq] =
           cpi->dequants.v_dequant_val_nuq_QTX[dq][qindex];
+#endif  // !CONFIG_AOM_QM
     }
 #endif  // CONFIG_NEW_QUANT
   }
diff --git a/av1/encoder/av1_quantize.h b/av1/encoder/av1_quantize.h
index 349ace4..6bb8c88 100644
--- a/av1/encoder/av1_quantize.h
+++ b/av1/encoder/av1_quantize.h
@@ -92,14 +92,14 @@
   DECLARE_ALIGNED(16, int16_t, y_dequant_Q3[QINDEX_RANGE][8]);  // 8: SIMD width
   DECLARE_ALIGNED(16, int16_t, u_dequant_Q3[QINDEX_RANGE][8]);  // 8: SIMD width
   DECLARE_ALIGNED(16, int16_t, v_dequant_Q3[QINDEX_RANGE][8]);  // 8: SIMD width
-#if CONFIG_NEW_QUANT
+#if CONFIG_NEW_QUANT && !CONFIG_AOM_QM
   DECLARE_ALIGNED(16, dequant_val_type_nuq,
                   y_dequant_val_nuq_QTX[QUANT_PROFILES][QINDEX_RANGE][2]);
   DECLARE_ALIGNED(16, dequant_val_type_nuq,
                   u_dequant_val_nuq_QTX[QUANT_PROFILES][QINDEX_RANGE][2]);
   DECLARE_ALIGNED(16, dequant_val_type_nuq,
                   v_dequant_val_nuq_QTX[QUANT_PROFILES][QINDEX_RANGE][2]);
-#endif  // CONFIG_NEW_QUANT
+#endif  // CONFIG_NEW_QUANT && !CONFIG_AOM_QM
 } Dequants;
 
 struct AV1_COMP;
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 8459c8c..1940bad 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -162,7 +162,9 @@
 #endif  // CONFIG_AOM_QM
 #if CONFIG_NEW_QUANT
   int dq = get_dq_profile(cm->dq_type, mb->qindex, ref, plane_type);
+#if !CONFIG_AOM_QM
   const dequant_val_type_nuq *dequant_val = p->dequant_val_nuq_QTX[dq];
+#endif  // !CONFIG_AOM_QM
 #endif  // CONFIG_NEW_QUANT
   int64_t rd_cost0, rd_cost1;
   int16_t t0, t1;
@@ -255,16 +257,25 @@
       if (x_a != 0) {
 #if CONFIG_DAALA_TX
 #if CONFIG_NEW_QUANT
+#if CONFIG_AOM_QM
+        dx = av1_dequant_coeff_nuq(x_a, dqv, dq, rc != 0, 0) - coeff[rc];
+#else
         dx = av1_dequant_coeff_nuq(x_a, dqv, dequant_val[rc != 0], 0) -
              coeff[rc];
+#endif  // CONFIG_AOM_QM
 #else   // CONFIG_NEW_QUANT
         dx -= (dqv + sz) ^ sz;
 #endif  // CONFIG_NEW_QUANT
         d2_a = ((int64_t)dx * dx + depth_round) >> depth_shift;
 #else  // CONFIG_DAALA_TX
 #if CONFIG_NEW_QUANT
+#if CONFIG_AOM_QM
+        dx = av1_dequant_coeff_nuq(x_a, dqv, dq, rc != 0, 0) -
+             (coeff[rc] * (1 << shift));
+#else
         dx = av1_dequant_coeff_nuq(x_a, dqv, dequant_val[rc != 0], 0) -
              (coeff[rc] * (1 << shift));
+#endif  // CONFIG_AOM_QM
         dx >>= xd->bd - 8;
 #else   // CONFIG_NEW_QUANT
         dx -= ((dqv >> (xd->bd - 8)) + sz) ^ sz;
@@ -345,16 +356,24 @@
         if (x_a != 0) {
 #if CONFIG_DAALA_TX
 #if CONFIG_NEW_QUANT
+#if CONFIG_AOM_QM
+          dqc_a = av1_dequant_abscoeff_nuq(abs(x_a), dqv, dq, rc != 0, 0);
+#else
           dqc_a =
               av1_dequant_abscoeff_nuq(abs(x_a), dqv, dequant_val[rc != 0], 0);
+#endif  // CONFIG_AOM_QM
           if (sz) dqc_a = -dqc_a;
 #else
           dqc_a = x_a * dqv;
 #endif  // CONFIG_NEW_QUANT
 #else   // CONFIG_DAALA_TX
 #if CONFIG_NEW_QUANT
+#if CONFIG_AOM_QM
+          dqc_a = av1_dequant_abscoeff_nuq(abs(x_a), dqv, dq, rc != 0, shift);
+#else
           dqc_a = av1_dequant_abscoeff_nuq(abs(x_a), dqv, dequant_val[rc != 0],
                                            shift);
+#endif  // CONFIG_AOM_QM
           if (sz) dqc_a = -dqc_a;
 #else
           if (x_a < 0)
diff --git a/av1/encoder/encodetxb.c b/av1/encoder/encodetxb.c
index 437d5b0..49802cd 100644
--- a/av1/encoder/encodetxb.c
+++ b/av1/encoder/encodetxb.c
@@ -134,12 +134,20 @@
 
 static INLINE tran_low_t qcoeff_to_dqcoeff(tran_low_t qc,
 #if CONFIG_NEW_QUANT
+#if CONFIG_AOM_QM
+                                           int dq_idx, int is_ac_coeff,
+#else
                                            const tran_low_t *nq_dq,
+#endif  // CONFIG_AOM_QM
 #endif  // CONFIG_NEW_QUANT
                                            int dqv, int shift) {
   int sgn = qc < 0 ? -1 : 1;
 #if CONFIG_NEW_QUANT
+#if CONFIG_AOM_QM
+  int dqcoeff = av1_dequant_coeff_nuq(abs(qc), dqv, dq_idx, is_ac_coeff, shift);
+#else
   int dqcoeff = av1_dequant_coeff_nuq(abs(qc), dqv, nq_dq, shift);
+#endif  // CONFIG_AOM_QM
   return sgn * dqcoeff;
 #endif  // CONFIG_NEW_QUANT
 
@@ -278,7 +286,11 @@
   const tran_low_t tqc = txb_info->tcoeff[coeff_idx];
   const int dqv = txb_info->dequant[coeff_idx != 0];
 #if CONFIG_NEW_QUANT
+#if CONFIG_AOM_QM
+  const int dq_idx = txb_info->dq_idx;
+#else
   const tran_low_t *nq_dequant_val = txb_info->nq_dequant_vals[coeff_idx != 0];
+#endif  // CONFIG_AOM_QM
 #endif  // CONFIG_NEW_QUANT
 
   const int coeff_ctx =
@@ -293,7 +305,11 @@
   } else {
     const tran_low_t dqc = qcoeff_to_dqcoeff(qc,
 #if CONFIG_NEW_QUANT
+#if CONFIG_AOM_QM
+                                             dq_idx, coeff_idx != 0,
+#else
                                              nq_dequant_val,
+#endif  // CONFIG_AOM_QM
 #endif  // CONFIG_NEW_QUANT
                                              dqv, txb_info->shift);
     const int64_t dqc_dist = get_coeff_dist(tqc, dqc, txb_info->shift);
@@ -301,7 +317,11 @@
     // distortion difference when coefficient is quantized to 0
     const tran_low_t dqc0 = qcoeff_to_dqcoeff(0,
 #if CONFIG_NEW_QUANT
+#if CONFIG_AOM_QM
+                                              dq_idx, coeff_idx != 0,
+#else
                                               nq_dequant_val,
+#endif  // CONFIG_AOM_QM
 #endif  // CONFIG_NEW_QUANT
                                               dqv, txb_info->shift);
 
@@ -322,7 +342,11 @@
     } else {
       stats->low_dqc = qcoeff_to_dqcoeff(stats->low_qc,
 #if CONFIG_NEW_QUANT
+#if CONFIG_AOM_QM
+                                         dq_idx, coeff_idx != 0,
+#else
                                          nq_dequant_val,
+#endif  // CONFIG_AOM_QM
 #endif  // CONFIG_NEW_QUANT
                                          dqv, txb_info->shift);
       const int64_t low_dqc_dist =
@@ -366,11 +390,19 @@
   update_qcoeff(coeff_idx, qc, txb_info);
   const int dqv = txb_info->dequant[coeff_idx != 0];
 #if CONFIG_NEW_QUANT
+#if CONFIG_AOM_QM
+  const int dq_idx = txb_info->dq_idx;
+#else
   const tran_low_t *nq_dequant_val = txb_info->nq_dequant_vals[coeff_idx != 0];
+#endif  // CONFIG_AOM_QM
 #endif  // CONFIG_NEW_QUANT
   txb_info->dqcoeff[coeff_idx] = qcoeff_to_dqcoeff(qc,
 #if CONFIG_NEW_QUANT
+#if CONFIG_AOM_QM
+                                                   dq_idx, coeff_idx != 0,
+#else
                                                    nq_dequant_val,
+#endif  // CONFIG_AOM_QM
 #endif  // CONFIG_NEW_QUANT
                                                    dqv, txb_info->shift);
 }
@@ -2024,7 +2056,9 @@
   const LV_MAP_COEFF_COST txb_costs = x->coeff_costs[txs_ctx][plane_type];
 #if CONFIG_NEW_QUANT
   int dq = get_dq_profile(cm->dq_type, x->qindex, is_inter, plane_type);
+#if !CONFIG_AOM_QM
   const dequant_val_type_nuq *dequant_val = p->dequant_val_nuq_QTX[dq];
+#endif  // !CONFIG_AOM_QM
 #endif  // CONFIG_NEW_QUANT
   const int eob_multi_size = txsize_log2_minus4[tx_size];
   const LV_MAP_EOB_COST txb_eob_costs =
@@ -2050,7 +2084,11 @@
     tcoeff,
     dequant,
 #if CONFIG_NEW_QUANT
+#if CONFIG_AOM_QM
+    dq,
+#else
     dequant_val,
+#endif  // CONFIG_AOM_QM
 #endif  // CONFIG_NEW_QUANT
     shift,
     tx_size,
diff --git a/av1/encoder/encodetxb.h b/av1/encoder/encodetxb.h
index eceb25a..d86c49c 100644
--- a/av1/encoder/encodetxb.h
+++ b/av1/encoder/encodetxb.h
@@ -30,7 +30,11 @@
   const tran_low_t *tcoeff;
   const int16_t *dequant;
 #if CONFIG_NEW_QUANT
+#if CONFIG_AOM_QM
+  const int dq_idx;
+#else
   const dequant_val_type_nuq *nq_dequant_vals;
+#endif  // CONFIG_AOM_QM
 #endif  // CONFIG_NEW_QUANT
   int shift;
   TX_SIZE tx_size;