Refactor quantization C code.

This commit de-duplicates C reference quantization code
and unifies quantization matrix (QM) and non-QM code
paths when there is no SIMD.

The reorganisation also will facilitate re-using SIMD quant
functions for QM when the matrix is flat, as is the
default when AOM_QM is enabled.

Change-Id: Idbfdac9eb9a31adcffe734aac1877d58b86fab77
diff --git a/aom_dsp/aom_dsp_common.h b/aom_dsp/aom_dsp_common.h
index 5b10432..a3cea56 100644
--- a/aom_dsp/aom_dsp_common.h
+++ b/aom_dsp/aom_dsp_common.h
@@ -52,10 +52,9 @@
 #define UNLIKELY(v) (v)
 #endif
 
-#if CONFIG_AOM_QM
 typedef uint16_t qm_val_t;
 #define AOM_QM_BITS 5
-#endif
+
 #if CONFIG_HIGHBITDEPTH
 // Note:
 // tran_low_t  is the datatype used for final transform coefficients.
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index 726b4f2..f54b5150 100755
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -517,23 +517,7 @@
 #
 # Quantization
 #
-if (aom_config("CONFIG_AOM_QM") eq "yes") {
-  if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
-    add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
-
-    add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
-
-    add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
-
-    add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
-
-    add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
-
-    add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
-
-  }  # CONFIG_AV1_ENCODER
-} else {
-  if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
+if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
     add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
     specialize qw/aom_quantize_b sse2/, "$ssse3_x86_64", "$avx_x86_64";
 
@@ -541,17 +525,18 @@
     specialize qw/aom_quantize_b_32x32/, "$ssse3_x86_64", "$avx_x86_64";
 
     add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+}  # CONFIG_AV1_ENCODER
 
-    add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-    specialize qw/aom_highbd_quantize_b sse2 avx2/;
+if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
+  add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+  specialize qw/aom_highbd_quantize_b sse2 avx2/;
 
-    add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-    specialize qw/aom_highbd_quantize_b_32x32 sse2/;
+  add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+  specialize qw/aom_highbd_quantize_b_32x32 sse2/;
 
-    add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+  add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
 
-  }  # CONFIG_AV1_ENCODER
-} # CONFIG_AOM_QM
+}  # CONFIG_AV1_ENCODER
 if (aom_config("CONFIG_AV1") eq "yes") {
   #
   # Alpha blending with mask
diff --git a/aom_dsp/quantize.c b/aom_dsp/quantize.c
index fe98b60..3e8f0d4 100644
--- a/aom_dsp/quantize.c
+++ b/aom_dsp/quantize.c
@@ -12,18 +12,14 @@
 #include "aom_dsp/quantize.h"
 #include "aom_mem/aom_mem.h"
 
-static void quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                                int skip_block, const int16_t *zbin_ptr,
-                                const int16_t *round_ptr,
-                                const int16_t *quant_ptr,
-                                const int16_t *quant_shift_ptr,
-                                tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                                const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                                const int16_t *scan, const int16_t *iscan,
-#if CONFIG_AOM_QM
-                                const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr,
-#endif
-                                const int log_scale) {
+void quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+                         int skip_block, const int16_t *zbin_ptr,
+                         const int16_t *round_ptr, const int16_t *quant_ptr,
+                         const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+                         tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
+                         uint16_t *eob_ptr, const int16_t *scan,
+                         const int16_t *iscan, const qm_val_t *qm_ptr,
+                         const qm_val_t *iqm_ptr, const int log_scale) {
   const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale),
                          ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale) };
   const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
@@ -37,20 +33,12 @@
     // Pre-scan pass
     for (i = (int)n_coeffs - 1; i >= 0; i--) {
       const int rc = scan[i];
-#if CONFIG_AOM_QM
-      const qm_val_t wt = qm_ptr[rc];
+      const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
       const int coeff = coeff_ptr[rc] * wt;
-#else
-      const int coeff = coeff_ptr[rc];
-#endif  // CONFIG_AOM_QM
 
-#if CONFIG_AOM_QM
       if (coeff < (zbins[rc != 0] << AOM_QM_BITS) &&
           coeff > (nzbins[rc != 0] << AOM_QM_BITS))
         non_zero_count--;
-#else
-      if (coeff < zbins[rc != 0] && coeff > nzbins[rc != 0]) non_zero_count--;
-#endif  // CONFIG_AOM_QM
       else
         break;
     }
@@ -64,35 +52,21 @@
       const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
       int tmp32;
 
-#if CONFIG_AOM_QM
-      const qm_val_t wt = qm_ptr[rc];
+      const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
       if (abs_coeff * wt >= (zbins[rc != 0] << AOM_QM_BITS)) {
-#else
-      if (abs_coeff >= zbins[rc != 0]) {
-#endif  // CONFIG_AOM_QM
         int64_t tmp =
             clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale),
                   INT16_MIN, INT16_MAX);
-#if CONFIG_AOM_QM
         tmp *= wt;
         tmp32 = (int)(((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
                        quant_shift_ptr[rc != 0]) >>
                       (16 - log_scale + AOM_QM_BITS));  // quantization
-#else
-        tmp32 = (int)(((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
-                       quant_shift_ptr[rc != 0]) >>
-                      (16 - log_scale));  // quantization
-#endif  // CONFIG_AOM_QM
         qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
-#if CONFIG_AOM_QM
+        const int iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
         const int dequant =
-            (dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >>
+            (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
             AOM_QM_BITS;
         dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant / (1 << log_scale);
-#else
-        dqcoeff_ptr[rc] =
-            qcoeff_ptr[rc] * dequant_ptr[rc != 0] / (1 << log_scale);
-#endif  // CONFIG_AOM_QM
 
         if (tmp32) eob = i;
       }
@@ -101,25 +75,111 @@
   *eob_ptr = eob + 1;
 }
 
+void highbd_quantize_b_helper_c(
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
+    const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+    const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
+    const qm_val_t *iqm_ptr, const int log_scale) {
+  int i, eob = -1;
+  const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale),
+                         ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale) };
+  const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
+  int dequant;
+#if CONFIG_TX64X64
+  int idx_arr[4096];
+#else
+  int idx_arr[1024];
+#endif
+  (void)iscan;
+  int idx = 0;
+
+  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+  if (!skip_block) {
+    // Pre-scan pass
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+      const int coeff = coeff_ptr[rc] * wt;
+
+      // If the coefficient is out of the base ZBIN range, keep it for
+      // quantization.
+      if (coeff >= (zbins[rc != 0] << AOM_QM_BITS) ||
+          coeff <= (nzbins[rc != 0] << AOM_QM_BITS))
+        idx_arr[idx++] = i;
+    }
+
+    // Quantization pass: only process the coefficients selected in
+    // pre-scan pass. Note: idx can be zero.
+    for (i = 0; i < idx; i++) {
+      const int rc = scan[idx_arr[i]];
+      const int coeff = coeff_ptr[rc];
+      const int coeff_sign = (coeff >> 31);
+      const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+      const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+      const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+      const int64_t tmp1 =
+          abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
+      const int64_t tmpw = tmp1 * wt;
+      const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
+      const uint32_t abs_qcoeff = (uint32_t)(
+          (tmp2 * quant_shift_ptr[rc != 0]) >> (16 - log_scale + AOM_QM_BITS));
+      qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+      dequant = (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+                AOM_QM_BITS;
+      dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / (1 << log_scale);
+      if (abs_qcoeff) eob = idx_arr[i];
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+
+void quantize_dc_helper(const tran_low_t *coeff_ptr, int n_coeffs,
+                        int skip_block, const int16_t *round_ptr,
+                        const int16_t quant, tran_low_t *qcoeff_ptr,
+                        tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr,
+                        uint16_t *eob_ptr, const qm_val_t *qm_ptr,
+                        const qm_val_t *iqm_ptr, const int log_scale) {
+  const int rc = 0;
+  const int coeff = coeff_ptr[rc];
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int64_t tmp, eob = -1;
+  int32_t tmp32;
+  int dequant;
+
+  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+  if (!skip_block) {
+    const int wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+    const int iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+    tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale),
+                INT16_MIN, INT16_MAX);
+    tmp32 = (int32_t)((tmp * wt * quant) >> (16 - log_scale + AOM_QM_BITS));
+    qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
+    dequant = (dequant_ptr * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
+    dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / (1 << log_scale);
+    if (tmp32) eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+
+/* These functions should only be called when quantisation matrices
+   are not used. */
 void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                       int skip_block, const int16_t *zbin_ptr,
                       const int16_t *round_ptr, const int16_t *quant_ptr,
                       const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
                       tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
                       uint16_t *eob_ptr, const int16_t *scan,
-                      const int16_t *iscan
-#if CONFIG_AOM_QM
-                      ,
-                      const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
-#endif
-                      ) {
+                      const int16_t *iscan) {
   quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
                       quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
-                      dequant_ptr, eob_ptr, scan, iscan,
-#if CONFIG_AOM_QM
-                      qm_ptr, iqm_ptr,
-#endif
-                      0);
+                      dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 0);
 }
 
 void aom_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
@@ -128,19 +188,10 @@
                             const int16_t *quant_shift_ptr,
                             tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                             const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                            const int16_t *scan, const int16_t *iscan
-#if CONFIG_AOM_QM
-                            ,
-                            const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
-#endif
-                            ) {
+                            const int16_t *scan, const int16_t *iscan) {
   quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
                       quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
-                      dequant_ptr, eob_ptr, scan, iscan,
-#if CONFIG_AOM_QM
-                      qm_ptr, iqm_ptr,
-#endif
-                      1);
+                      dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 1);
 }
 
 #if CONFIG_TX64X64
@@ -150,427 +201,28 @@
                             const int16_t *quant_shift_ptr,
                             tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                             const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                            const int16_t *scan, const int16_t *iscan
-#if CONFIG_AOM_QM
-                            ,
-                            const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
-#endif
-                            ) {
+                            const int16_t *scan, const int16_t *iscan) {
   quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
                       quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
-                      dequant_ptr, eob_ptr, scan, iscan,
-#if CONFIG_AOM_QM
-                      qm_ptr, iqm_ptr,
-#endif
-                      2);
+                      dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 2);
 }
 #endif  // CONFIG_TX64X64
 
-#if CONFIG_AOM_QM
-void aom_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
-                     const int16_t *round_ptr, const int16_t quant,
-                     tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                     const int16_t dequant_ptr, uint16_t *eob_ptr,
-                     const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
-  const int rc = 0;
-  const int coeff = coeff_ptr[rc];
-  const int coeff_sign = (coeff >> 31);
-  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-  int64_t tmp, eob = -1;
-  int32_t tmp32;
-  int dequant =
-      (dequant_ptr * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
-    tmp32 = (int32_t)((tmp * qm_ptr[rc] * quant) >> (16 + AOM_QM_BITS));
-    qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
-    dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant;
-    if (tmp32) eob = 0;
-  }
-  *eob_ptr = eob + 1;
-}
-
-void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
-                           const int16_t *round_ptr, const int16_t quant,
-                           tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                           const int16_t dequant_ptr, uint16_t *eob_ptr,
-                           const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
-  const int n_coeffs = 1024;
-  const int rc = 0;
-  const int coeff = coeff_ptr[rc];
-  const int coeff_sign = (coeff >> 31);
-  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-  int64_t tmp, eob = -1;
-  int32_t tmp32;
-  int dequant;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1),
-                INT16_MIN, INT16_MAX);
-    tmp32 = (int32_t)((tmp * qm_ptr[rc] * quant) >> (15 + AOM_QM_BITS));
-    qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
-    dequant =
-        (dequant_ptr * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
-    dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 2;
-    if (tmp32) eob = 0;
-  }
-  *eob_ptr = eob + 1;
-}
-
-#if CONFIG_TX64X64
-void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
-                           const int16_t *round_ptr, const int16_t quant,
-                           tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                           const int16_t dequant_ptr, uint16_t *eob_ptr,
-                           const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
-  const int n_coeffs = 1024;
-  const int rc = 0;
-  const int coeff = coeff_ptr[rc];
-  const int coeff_sign = (coeff >> 31);
-  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-  int64_t tmp, eob = -1;
-  int32_t tmp32;
-  int dequant;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2),
-                INT16_MIN, INT16_MAX);
-    tmp32 = (int32_t)((tmp * qm_ptr[rc] * quant) >> (14 + AOM_QM_BITS));
-    qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
-    dequant =
-        (dequant_ptr * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
-    dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 4;
-    if (tmp32) eob = 0;
-  }
-  *eob_ptr = eob + 1;
-}
-#endif  // CONFIG_TX64X64
-
-void aom_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
-                            int skip_block, const int16_t *round_ptr,
-                            const int16_t quant, tran_low_t *qcoeff_ptr,
-                            tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr,
-                            uint16_t *eob_ptr, const qm_val_t *qm_ptr,
-                            const qm_val_t *iqm_ptr) {
-  int eob = -1;
-  int dequant =
-      (dequant_ptr * iqm_ptr[0] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    const int coeff = coeff_ptr[0];
-    const int coeff_sign = (coeff >> 31);
-    const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-    const int64_t tmp = abs_coeff + round_ptr[0];
-    const uint32_t abs_qcoeff =
-        (uint32_t)((tmp * qm_ptr[0] * quant) >> (16 + AOM_QM_BITS));
-    qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-    dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant;
-    if (abs_qcoeff) eob = 0;
-  }
-  *eob_ptr = eob + 1;
-}
-
-void aom_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
-                                  const int16_t *round_ptr, const int16_t quant,
-                                  tran_low_t *qcoeff_ptr,
-                                  tran_low_t *dqcoeff_ptr,
-                                  const int16_t dequant_ptr, uint16_t *eob_ptr,
-                                  const qm_val_t *qm_ptr,
-                                  const qm_val_t *iqm_ptr) {
-  const int n_coeffs = 1024;
-  int eob = -1;
-  int dequant;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    const int coeff = coeff_ptr[0];
-    const int coeff_sign = (coeff >> 31);
-    const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-    const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 1);
-    const uint32_t abs_qcoeff =
-        (uint32_t)((tmp * qm_ptr[0] * quant) >> (15 + AOM_QM_BITS));
-    qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-    dequant =
-        (dequant_ptr * iqm_ptr[0] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
-    dqcoeff_ptr[0] = (qcoeff_ptr[0] * dequant) / 2;
-    if (abs_qcoeff) eob = 0;
-  }
-  *eob_ptr = eob + 1;
-}
-
-#if CONFIG_TX64X64
-void aom_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
-                                  const int16_t *round_ptr, const int16_t quant,
-                                  tran_low_t *qcoeff_ptr,
-                                  tran_low_t *dqcoeff_ptr,
-                                  const int16_t dequant_ptr, uint16_t *eob_ptr,
-                                  const qm_val_t *qm_ptr,
-                                  const qm_val_t *iqm_ptr) {
-  const int n_coeffs = 1024;
-  int eob = -1;
-  int dequant;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    const int coeff = coeff_ptr[0];
-    const int coeff_sign = (coeff >> 31);
-    const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-    const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 2);
-    const uint32_t abs_qcoeff =
-        (uint32_t)((tmp * qm_ptr[0] * quant) >> (14 + AOM_QM_BITS));
-    qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-    dequant =
-        (dequant_ptr * iqm_ptr[0] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
-    dqcoeff_ptr[0] = (qcoeff_ptr[0] * dequant) / 4;
-    if (abs_qcoeff) eob = 0;
-  }
-  *eob_ptr = eob + 1;
-}
-#endif  // CONFIG_TX64X64
-
-void aom_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                             int skip_block, const int16_t *zbin_ptr,
-                             const int16_t *round_ptr, const int16_t *quant_ptr,
-                             const int16_t *quant_shift_ptr,
-                             tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                             const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                             const int16_t *scan, const int16_t *iscan,
-                             const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
-  int i, non_zero_count = (int)n_coeffs, eob = -1;
-  const int zbins[2] = { zbin_ptr[0], zbin_ptr[1] };
-  const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
-  int dequant;
-  (void)iscan;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    // Pre-scan pass
-    for (i = (int)n_coeffs - 1; i >= 0; i--) {
-      const int rc = scan[i];
-      const qm_val_t wt = qm_ptr[rc];
-      const int coeff = coeff_ptr[rc] * wt;
-
-      if (coeff < (zbins[rc != 0] << AOM_QM_BITS) &&
-          coeff > (nzbins[rc != 0] << AOM_QM_BITS))
-        non_zero_count--;
-      else
-        break;
-    }
-
-    // Quantization pass: All coefficients with index >= zero_flag are
-    // skippable. Note: zero_flag can be zero.
-    for (i = 0; i < non_zero_count; i++) {
-      const int rc = scan[i];
-      const int coeff = coeff_ptr[rc];
-      const qm_val_t wt = qm_ptr[rc];
-      const int coeff_sign = (coeff >> 31);
-      const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-
-      if (abs_coeff * wt >= (zbins[rc != 0] << AOM_QM_BITS)) {
-        const int64_t tmp1 = abs_coeff + round_ptr[rc != 0];
-        const int64_t tmpw = tmp1 * wt;
-        const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
-        const uint32_t abs_qcoeff =
-            (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> (16 + AOM_QM_BITS));
-        qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-        dequant =
-            (dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >>
-            AOM_QM_BITS;
-        dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant;
-        if (abs_qcoeff) eob = i;
-      }
-    }
-  }
-  *eob_ptr = eob + 1;
-}
-
-void aom_highbd_quantize_b_32x32_c(
-    const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
-    const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
-    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
-    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
-    const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
-    const qm_val_t *iqm_ptr) {
-  const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 1),
-                         ROUND_POWER_OF_TWO(zbin_ptr[1], 1) };
-  const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
-
-  int idx = 0;
-  int idx_arr[1024];
-  int i, eob = -1;
-  int dequant;
-  (void)iscan;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    // Pre-scan pass
-    for (i = 0; i < n_coeffs; i++) {
-      const int rc = scan[i];
-      const qm_val_t wt = qm_ptr[rc];
-      const int coeff = coeff_ptr[rc] * wt;
-
-      // If the coefficient is out of the base ZBIN range, keep it for
-      // quantization.
-      if (coeff >= (zbins[rc != 0] << AOM_QM_BITS) ||
-          coeff <= (nzbins[rc != 0] << AOM_QM_BITS))
-        idx_arr[idx++] = i;
-    }
-
-    // Quantization pass: only process the coefficients selected in
-    // pre-scan pass. Note: idx can be zero.
-    for (i = 0; i < idx; i++) {
-      const int rc = scan[idx_arr[i]];
-      const int coeff = coeff_ptr[rc];
-      const int coeff_sign = (coeff >> 31);
-      const qm_val_t wt = qm_ptr[rc];
-      const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-      const int64_t tmp1 =
-          abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
-      const int64_t tmpw = tmp1 * wt;
-      const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
-      const uint32_t abs_qcoeff =
-          (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> (15 + AOM_QM_BITS));
-      qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-      dequant =
-          (dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >>
-          AOM_QM_BITS;
-      dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 2;
-      if (abs_qcoeff) eob = idx_arr[i];
-    }
-  }
-  *eob_ptr = eob + 1;
-}
-
-#if CONFIG_TX64X64
-void aom_highbd_quantize_b_64x64_c(
-    const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
-    const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
-    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
-    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
-    const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
-    const qm_val_t *iqm_ptr) {
-  const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 2),
-                         ROUND_POWER_OF_TWO(zbin_ptr[1], 2) };
-  const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
-
-  int idx = 0;
-  int idx_arr[4096];
-  int i, eob = -1;
-  int dequant;
-  (void)iscan;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    // Pre-scan pass
-    for (i = 0; i < n_coeffs; i++) {
-      const int rc = scan[i];
-      const qm_val_t wt = qm_ptr[rc];
-      const int coeff = coeff_ptr[rc] * wt;
-
-      // If the coefficient is out of the base ZBIN range, keep it for
-      // quantization.
-      if (coeff >= (zbins[rc != 0] << AOM_QM_BITS) ||
-          coeff <= (nzbins[rc != 0] << AOM_QM_BITS))
-        idx_arr[idx++] = i;
-    }
-
-    // Quantization pass: only process the coefficients selected in
-    // pre-scan pass. Note: idx can be zero.
-    for (i = 0; i < idx; i++) {
-      const int rc = scan[idx_arr[i]];
-      const int coeff = coeff_ptr[rc];
-      const int coeff_sign = (coeff >> 31);
-      const qm_val_t wt = qm_ptr[rc];
-      const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-      const int64_t tmp1 =
-          abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2);
-      const int64_t tmpw = tmp1 * wt;
-      const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
-      const uint32_t abs_qcoeff =
-          (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> (14 + AOM_QM_BITS));
-      qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-      dequant =
-          (dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >>
-          AOM_QM_BITS;
-      dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 4;
-      if (abs_qcoeff) eob = idx_arr[i];
-    }
-  }
-  *eob_ptr = eob + 1;
-}
-#endif  // CONFIG_TX64X64
-
-#else  // CONFIG_AOM_QM
-
 void aom_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
                      const int16_t *round_ptr, const int16_t quant,
                      tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                      const int16_t dequant_ptr, uint16_t *eob_ptr) {
-  const int rc = 0;
-  const int coeff = coeff_ptr[rc];
-  const int coeff_sign = (coeff >> 31);
-  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-  int tmp, eob = -1;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
-    tmp = (tmp * quant) >> 16;
-    qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
-    dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr;
-    if (tmp) eob = 0;
-  }
-  *eob_ptr = eob + 1;
+  quantize_dc_helper(coeff_ptr, n_coeffs, skip_block, round_ptr, quant,
+                     qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, NULL, NULL,
+                     0);
 }
 
 void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
                            const int16_t *round_ptr, const int16_t quant,
                            tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                            const int16_t dequant_ptr, uint16_t *eob_ptr) {
-  const int n_coeffs = 1024;
-  const int rc = 0;
-  const int coeff = coeff_ptr[rc];
-  const int coeff_sign = (coeff >> 31);
-  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-  int tmp, eob = -1;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1),
-                INT16_MIN, INT16_MAX);
-    tmp = (tmp * quant) >> 15;
-    qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
-    dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2;
-    if (tmp) eob = 0;
-  }
-  *eob_ptr = eob + 1;
+  quantize_dc_helper(coeff_ptr, 1024, skip_block, round_ptr, quant, qcoeff_ptr,
+                     dqcoeff_ptr, dequant_ptr, eob_ptr, NULL, NULL, 1);
 }
 
 #if CONFIG_TX64X64
@@ -578,100 +230,8 @@
                            const int16_t *round_ptr, const int16_t quant,
                            tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                            const int16_t dequant_ptr, uint16_t *eob_ptr) {
-  const int n_coeffs = 4096;
-  const int rc = 0;
-  const int coeff = coeff_ptr[rc];
-  const int coeff_sign = (coeff >> 31);
-  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-  int tmp, eob = -1;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2),
-                INT16_MIN, INT16_MAX);
-    tmp = (tmp * quant) >> 14;
-    qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
-    dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 4;
-    if (tmp) eob = 0;
-  }
-  *eob_ptr = eob + 1;
-}
-#endif  // CONFIG_TX64X64
-
-void aom_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
-                            int skip_block, const int16_t *round_ptr,
-                            const int16_t quant, tran_low_t *qcoeff_ptr,
-                            tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr,
-                            uint16_t *eob_ptr) {
-  int eob = -1;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    const int coeff = coeff_ptr[0];
-    const int coeff_sign = (coeff >> 31);
-    const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-    const int64_t tmp = abs_coeff + round_ptr[0];
-    const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 16);
-    qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-    dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr;
-    if (abs_qcoeff) eob = 0;
-  }
-  *eob_ptr = eob + 1;
-}
-
-void aom_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
-                                  const int16_t *round_ptr, const int16_t quant,
-                                  tran_low_t *qcoeff_ptr,
-                                  tran_low_t *dqcoeff_ptr,
-                                  const int16_t dequant_ptr,
-                                  uint16_t *eob_ptr) {
-  const int n_coeffs = 1024;
-  int eob = -1;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    const int coeff = coeff_ptr[0];
-    const int coeff_sign = (coeff >> 31);
-    const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-    const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 1);
-    const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 15);
-    qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-    dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / 2;
-    if (abs_qcoeff) eob = 0;
-  }
-  *eob_ptr = eob + 1;
-}
-
-#if CONFIG_TX64X64
-void aom_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
-                                  const int16_t *round_ptr, const int16_t quant,
-                                  tran_low_t *qcoeff_ptr,
-                                  tran_low_t *dqcoeff_ptr,
-                                  const int16_t dequant_ptr,
-                                  uint16_t *eob_ptr) {
-  const int n_coeffs = 4096;
-  int eob = -1;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    const int coeff = coeff_ptr[0];
-    const int coeff_sign = (coeff >> 31);
-    const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-    const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 2);
-    const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 14);
-    qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-    dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / 4;
-    if (abs_qcoeff) eob = 0;
-  }
-  *eob_ptr = eob + 1;
+  quantize_dc_helper(coeff_ptr, 4096, skip_block, round_ptr, quant, qcoeff_ptr,
+                     dqcoeff_ptr, dequant_ptr, eob_ptr, NULL, NULL, 2);
 }
 #endif  // CONFIG_TX64X64
 
@@ -682,45 +242,10 @@
                              tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                              const int16_t *dequant_ptr, uint16_t *eob_ptr,
                              const int16_t *scan, const int16_t *iscan) {
-  int i, non_zero_count = (int)n_coeffs, eob = -1;
-  const int zbins[2] = { zbin_ptr[0], zbin_ptr[1] };
-  const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
-  (void)iscan;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    // Pre-scan pass
-    for (i = (int)n_coeffs - 1; i >= 0; i--) {
-      const int rc = scan[i];
-      const int coeff = coeff_ptr[rc];
-
-      if (coeff < zbins[rc != 0] && coeff > nzbins[rc != 0])
-        non_zero_count--;
-      else
-        break;
-    }
-
-    // Quantization pass: All coefficients with index >= zero_flag are
-    // skippable. Note: zero_flag can be zero.
-    for (i = 0; i < non_zero_count; i++) {
-      const int rc = scan[i];
-      const int coeff = coeff_ptr[rc];
-      const int coeff_sign = (coeff >> 31);
-      const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-
-      if (abs_coeff >= zbins[rc != 0]) {
-        const int64_t tmp1 = abs_coeff + round_ptr[rc != 0];
-        const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
-        const int abs_qcoeff = (int)((tmp2 * quant_shift_ptr[rc != 0]) >> 16);
-        qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-        dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
-        if (abs_qcoeff) eob = i;
-      }
-    }
-  }
-  *eob_ptr = eob + 1;
+  highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr,
+                             round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr,
+                             dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
+                             NULL, NULL, 0);
 }
 
 void aom_highbd_quantize_b_32x32_c(
@@ -729,47 +254,10 @@
     const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
     tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
     const int16_t *scan, const int16_t *iscan) {
-  const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 1),
-                         ROUND_POWER_OF_TWO(zbin_ptr[1], 1) };
-  const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
-
-  int idx = 0;
-  int idx_arr[1024];
-  int i, eob = -1;
-  (void)iscan;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    // Pre-scan pass
-    for (i = 0; i < n_coeffs; i++) {
-      const int rc = scan[i];
-      const int coeff = coeff_ptr[rc];
-
-      // If the coefficient is out of the base ZBIN range, keep it for
-      // quantization.
-      if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0])
-        idx_arr[idx++] = i;
-    }
-
-    // Quantization pass: only process the coefficients selected in
-    // pre-scan pass. Note: idx can be zero.
-    for (i = 0; i < idx; i++) {
-      const int rc = scan[idx_arr[i]];
-      const int coeff = coeff_ptr[rc];
-      const int coeff_sign = (coeff >> 31);
-      const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-      const int64_t tmp1 =
-          abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
-      const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
-      const int abs_qcoeff = (int)((tmp2 * quant_shift_ptr[rc != 0]) >> 15);
-      qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-      dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
-      if (abs_qcoeff) eob = idx_arr[i];
-    }
-  }
-  *eob_ptr = eob + 1;
+  highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr,
+                             round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr,
+                             dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
+                             NULL, NULL, 1);
 }
 
 #if CONFIG_TX64X64
@@ -779,47 +267,9 @@
     const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
     tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
     const int16_t *scan, const int16_t *iscan) {
-  const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 2),
-                         ROUND_POWER_OF_TWO(zbin_ptr[1], 2) };
-  const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
-
-  int idx = 0;
-  int idx_arr[4096];
-  int i, eob = -1;
-  (void)iscan;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    // Pre-scan pass
-    for (i = 0; i < n_coeffs; i++) {
-      const int rc = scan[i];
-      const int coeff = coeff_ptr[rc];
-
-      // If the coefficient is out of the base ZBIN range, keep it for
-      // quantization.
-      if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0])
-        idx_arr[idx++] = i;
-    }
-
-    // Quantization pass: only process the coefficients selected in
-    // pre-scan pass. Note: idx can be zero.
-    for (i = 0; i < idx; i++) {
-      const int rc = scan[idx_arr[i]];
-      const int coeff = coeff_ptr[rc];
-      const int coeff_sign = (coeff >> 31);
-      const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-      const int64_t tmp1 =
-          abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2);
-      const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
-      const int abs_qcoeff = (int)((tmp2 * quant_shift_ptr[rc != 0]) >> 14);
-      qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-      dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 4;
-      if (abs_qcoeff) eob = idx_arr[i];
-    }
-  }
-  *eob_ptr = eob + 1;
+  highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr,
+                             round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr,
+                             dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
+                             NULL, NULL, 2);
 }
 #endif  // CONFIG_TX64X64
-#endif  // CONFIG_AOM_QM
diff --git a/aom_dsp/quantize.h b/aom_dsp/quantize.h
index fe49b83..e4bbfb9 100644
--- a/aom_dsp/quantize.h
+++ b/aom_dsp/quantize.h
@@ -19,32 +19,57 @@
 extern "C" {
 #endif
 
-#if CONFIG_AOM_QM
-void aom_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
-                     const int16_t *round_ptr, const int16_t quant_ptr,
-                     tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                     const int16_t dequant_ptr, uint16_t *eob_ptr,
-                     const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr);
-void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
-                           const int16_t *round_ptr, const int16_t quant_ptr,
-                           tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                           const int16_t dequant_ptr, uint16_t *eob_ptr,
-                           const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr);
-#if CONFIG_TX64X64
-void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
-                           const int16_t *round_ptr, const int16_t quant_ptr,
-                           tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                           const int16_t dequant_ptr, uint16_t *eob_ptr,
-                           const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr);
-#endif  // CONFIG_TX64X64
+void quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+                         int skip_block, const int16_t *zbin_ptr,
+                         const int16_t *round_ptr, const int16_t *quant_ptr,
+                         const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+                         tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
+                         uint16_t *eob_ptr, const int16_t *scan,
+                         const int16_t *iscan, const qm_val_t *qm_ptr,
+                         const qm_val_t *iqm_ptr, const int log_scale);
+
 void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                       int skip_block, const int16_t *zbin_ptr,
                       const int16_t *round_ptr, const int16_t *quant_ptr,
                       const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
                       tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
                       uint16_t *eob_ptr, const int16_t *scan,
-                      const int16_t *iscan, const qm_val_t *qm_ptr,
-                      const qm_val_t *iqm_ptr);
+                      const int16_t *iscan);
+
+#if CONFIG_HIGHBITDEPTH
+void aom_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+                             int skip_block, const int16_t *zbin_ptr,
+                             const int16_t *round_ptr, const int16_t *quant_ptr,
+                             const int16_t *quant_shift_ptr,
+                             tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+                             const int16_t *dequant_ptr, uint16_t *eob_ptr,
+                             const int16_t *scan, const int16_t *iscan);
+
+void highbd_quantize_b_helper_c(
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
+    const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+    const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
+    const qm_val_t *iqm_ptr, const int log_scale);
+#endif
+
+void aom_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
+                     const int16_t *round_ptr, const int16_t quant_ptr,
+                     tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+                     const int16_t dequant_ptr, uint16_t *eob_ptr);
+void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
+                           const int16_t *round_ptr, const int16_t quant_ptr,
+                           tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+                           const int16_t dequant_ptr, uint16_t *eob_ptr);
+#if CONFIG_TX64X64
+void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
+                           const int16_t *round_ptr, const int16_t quant_ptr,
+                           tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+                           const int16_t dequant_ptr, uint16_t *eob_ptr);
+#endif  // CONFIG_TX64X64
+
+#if CONFIG_AOM_QM
 #if CONFIG_HIGHBITDEPTH
 void aom_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
                             int skip_block, const int16_t *round_ptr,
@@ -64,32 +89,10 @@
     const int16_t dequant_ptr, uint16_t *eob_ptr, const qm_val_t *qm_ptr,
     const qm_val_t *iqm_ptr);
 #endif  // CONFIG_TX64X64
-void aom_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                             int skip_block, const int16_t *zbin_ptr,
-                             const int16_t *round_ptr, const int16_t *quant_ptr,
-                             const int16_t *quant_shift_ptr,
-                             tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                             const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                             const int16_t *scan, const int16_t *iscan,
-                             const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr);
 #endif  // CONFIG_HIGHBITDEPTH
 
 #else  // CONFIG_AOM_QM
 
-void aom_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
-                     const int16_t *round_ptr, const int16_t quant_ptr,
-                     tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                     const int16_t dequant_ptr, uint16_t *eob_ptr);
-void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
-                           const int16_t *round_ptr, const int16_t quant_ptr,
-                           tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                           const int16_t dequant_ptr, uint16_t *eob_ptr);
-#if CONFIG_TX64X64
-void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
-                           const int16_t *round_ptr, const int16_t quant_ptr,
-                           tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                           const int16_t dequant_ptr, uint16_t *eob_ptr);
-#endif  // CONFIG_TX64X64
 #if CONFIG_HIGHBITDEPTH
 void aom_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
                             int skip_block, const int16_t *round_ptr,
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 91af8dd..edc07f8 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -278,41 +278,7 @@
 
 # ENCODEMB INVOKE
 
-if (aom_config("CONFIG_AOM_QM") eq "yes") {
-  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
-    # the transform coefficients are held in 32-bit
-    # values, so the assembler code for  av1_block_error can no longer be used.
-    add_proto qw/int64_t av1_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
-    specialize qw/av1_block_error avx2/;
-
-    add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
-
-    add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
-
-    if (aom_config("CONFIG_TX64X64") eq "yes") {
-      add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
-    }
-
-    add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
-  } else {
-    add_proto qw/int64_t av1_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
-    specialize qw/av1_block_error avx2 msa/, "$sse2_x86inc";
-
-    add_proto qw/int64_t av1_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
-    specialize qw/av1_block_error_fp neon/, "$sse2_x86inc";
-
-    add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
-
-    add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
-
-    if (aom_config("CONFIG_TX64X64") eq "yes") {
-      add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
-    }
-
-    add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
-  }
-} else {
-  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
     # the transform coefficients are held in 32-bit
     # values, so the assembler code for  av1_block_error can no longer be used.
     add_proto qw/int64_t av1_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
@@ -327,9 +293,7 @@
     if (aom_config("CONFIG_TX64X64") eq "yes") {
       add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
     }
-
-    add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-  } else {
+} else {
     add_proto qw/int64_t av1_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
     specialize qw/av1_block_error sse2 avx2 msa/;
 
@@ -346,12 +310,26 @@
       add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
     }
 
-    add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-    specialize qw/av1_fdct8x8_quant sse2 ssse3 neon/;
-  }
-
 }
 
+#Unused
+#if (aom_config("CONFIG_AOM_QM") eq "yes") {
+#  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+#    add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
+#  } else {
+#    add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
+#  }
+#} else {
+#  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+#
+#    add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+#  } else {
+#    add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+#    specialize qw/av1_fdct8x8_quant sse2 ssse3 neon/;
+#  }
+#
+#}
+
 # fdct functions
 
 add_proto qw/void av1_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
@@ -488,22 +466,8 @@
 
 }
 
-if (aom_config("CONFIG_AOM_QM") eq "yes") {
-  add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
-
-  add_proto qw/void av1_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
-
-  if (aom_config("CONFIG_TX64X64") eq "yes") {
-    add_proto qw/void av1_highbd_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
-  }
-
-  add_proto qw/void av1_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
-} else {
-  add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
-  specialize qw/av1_highbd_quantize_fp sse4_1 avx2/;
-
-  add_proto qw/void av1_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
-}
+ add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
+ specialize qw/av1_highbd_quantize_fp sse4_1 avx2/;
 
 add_proto qw/void av1_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
 
diff --git a/av1/encoder/av1_quantize.c b/av1/encoder/av1_quantize.c
index dd53d42..b44e06f 100644
--- a/av1/encoder/av1_quantize.c
+++ b/av1/encoder/av1_quantize.c
@@ -443,11 +443,8 @@
     const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
     const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
     tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
-    const int16_t *scan, const int16_t *iscan,
-#if CONFIG_AOM_QM
-    const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr,
-#endif
-    int log_scale) {
+    const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
+    const qm_val_t *iqm_ptr, int log_scale) {
   int i, eob = -1;
   // TODO(jingning) Decide the need of these arguments after the
   // quantization process is completed.
@@ -464,35 +461,22 @@
     for (i = 0; i < n_coeffs; i++) {
       const int rc = scan[i];
       const int coeff = coeff_ptr[rc];
-#if CONFIG_AOM_QM
-      const qm_val_t wt = qm_ptr[rc];
-      const qm_val_t iwt = iqm_ptr[rc];
+      const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+      const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
       const int dequant =
           (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
           AOM_QM_BITS;
-#endif
       const int coeff_sign = (coeff >> 31);
-      int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+      int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
       int tmp32 = 0;
-#if CONFIG_AOM_QM
       if (abs_coeff * wt >=
           (dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) {
-#else
-      if (abs_coeff >= (dequant_ptr[rc != 0] >> (1 + log_scale))) {
-#endif
         abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
-        abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
-#if CONFIG_AOM_QM
+        abs_coeff = clamp64(abs_coeff, INT16_MIN, INT16_MAX);
         tmp32 = (int)((abs_coeff * wt * quant_ptr[rc != 0]) >>
-                      ((16 - log_scale) + AOM_QM_BITS));
+                      (16 - log_scale + AOM_QM_BITS));
         qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
         dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant / (1 << log_scale);
-#else
-        tmp32 = (int)((abs_coeff * quant_ptr[rc != 0]) >> (16 - log_scale));
-        qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
-        dqcoeff_ptr[rc] =
-            qcoeff_ptr[rc] * dequant_ptr[rc != 0] / (1 << log_scale);
-#endif
       }
 
       if (tmp32) eob = i;
@@ -501,25 +485,60 @@
   *eob_ptr = eob + 1;
 }
 
+static void highbd_quantize_fp_helper_c(
+    const tran_low_t *coeff_ptr, intptr_t count, int skip_block,
+    const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+    const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
+    const qm_val_t *iqm_ptr, int log_scale) {
+  int i;
+  int eob = -1;
+  const int scale = 1 << log_scale;
+  const int shift = 16 - log_scale;
+  // TODO(jingning) Decide the need of these arguments after the
+  // quantization process is completed.
+  (void)zbin_ptr;
+  (void)quant_shift_ptr;
+  (void)iscan;
+
+  memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr));
+
+  if (!skip_block) {
+    // Quantization pass: All coefficients with index >= zero_flag are
+    // skippable. Note: zero_flag can be zero.
+    for (i = 0; i < count; i++) {
+      const int rc = scan[i];
+      const int coeff = coeff_ptr[rc];
+      const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+      const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+      const int dequant =
+          (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+          AOM_QM_BITS;
+      const int coeff_sign = (coeff >> 31);
+      const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+      const int64_t tmp = abs_coeff + (round_ptr[rc != 0] >> log_scale);
+      const int abs_qcoeff =
+          (int)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS));
+      qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+      dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant / scale;
+      if (abs_qcoeff) eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+
 void av1_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                        int skip_block, const int16_t *zbin_ptr,
                        const int16_t *round_ptr, const int16_t *quant_ptr,
                        const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
                        tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
                        uint16_t *eob_ptr, const int16_t *scan,
-                       const int16_t *iscan
-#if CONFIG_AOM_QM
-                       ,
-                       const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
-#endif
-                       ) {
+                       const int16_t *iscan) {
   quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
                        quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
-                       dequant_ptr, eob_ptr, scan, iscan,
-#if CONFIG_AOM_QM
-                       qm_ptr, iqm_ptr,
-#endif
-                       0);
+                       dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 0);
 }
 
 void av1_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
@@ -528,19 +547,10 @@
                              const int16_t *quant_shift_ptr,
                              tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                              const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                             const int16_t *scan, const int16_t *iscan
-#if CONFIG_AOM_QM
-                             ,
-                             const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
-#endif
-                             ) {
+                             const int16_t *scan, const int16_t *iscan) {
   quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
                        quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
-                       dequant_ptr, eob_ptr, scan, iscan,
-#if CONFIG_AOM_QM
-                       qm_ptr, iqm_ptr,
-#endif
-                       1);
+                       dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 1);
 }
 
 #if CONFIG_TX64X64
@@ -550,19 +560,10 @@
                              const int16_t *quant_shift_ptr,
                              tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                              const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                             const int16_t *scan, const int16_t *iscan
-#if CONFIG_AOM_QM
-                             ,
-                             const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
-#endif
-                             ) {
+                             const int16_t *scan, const int16_t *iscan) {
   quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
                        quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
-                       dequant_ptr, eob_ptr, scan, iscan,
-#if CONFIG_AOM_QM
-                       qm_ptr, iqm_ptr,
-#endif
-                       2);
+                       dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 2);
 }
 #endif  // CONFIG_TX64X64
 
@@ -576,58 +577,47 @@
 #if CONFIG_AOM_QM
   const qm_val_t *qm_ptr = qparam->qmatrix;
   const qm_val_t *iqm_ptr = qparam->iqmatrix;
-#endif  // CONFIG_AOM_QM
-
-  switch (qparam->log_scale) {
-    case 0:
-      if (n_coeffs < 16) {
-        // TODO(jingning): Need SIMD implementation for smaller block size
-        // quantization.
-        quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin,
-                             p->round_fp, p->quant_fp, p->quant_shift,
-                             qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
-                             sc->scan, sc->iscan,
-#if CONFIG_AOM_QM
-                             qm_ptr, iqm_ptr,
+  if (1 /*qm_ptr != NULL || iqm_ptr != NULL*/) {
+    quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp,
+                         p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+                         pd->dequant, eob_ptr, sc->scan, sc->iscan, qm_ptr,
+                         iqm_ptr, qparam->log_scale);
+  } else {
 #endif
-                             qparam->log_scale);
-      } else {
-        av1_quantize_fp(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp,
-                        p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
-                        pd->dequant, eob_ptr, sc->scan, sc->iscan
-#if CONFIG_AOM_QM
-                        ,
-                        qm_ptr, iqm_ptr
-#endif
-                        );
-      }
-      break;
-    case 1:
-      av1_quantize_fp_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin,
-                            p->round_fp, p->quant_fp, p->quant_shift,
-                            qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
-                            sc->scan, sc->iscan
-#if CONFIG_AOM_QM
-                            ,
-                            qm_ptr, iqm_ptr
-#endif
-                            );
-      break;
+    switch (qparam->log_scale) {
+      case 0:
+        if (n_coeffs < 16) {
+          // TODO(jingning): Need SIMD implementation for smaller block size
+          // quantization.
+          quantize_fp_helper_c(
+              coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp,
+              p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, pd->dequant,
+              eob_ptr, sc->scan, sc->iscan, NULL, NULL, qparam->log_scale);
+        } else {
+          av1_quantize_fp(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp,
+                          p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+                          pd->dequant, eob_ptr, sc->scan, sc->iscan);
+        }
+        break;
+      case 1:
+        av1_quantize_fp_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin,
+                              p->round_fp, p->quant_fp, p->quant_shift,
+                              qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
+                              sc->scan, sc->iscan);
+        break;
 #if CONFIG_TX64X64
-    case 2:
-      av1_quantize_fp_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin,
-                            p->round_fp, p->quant_fp, p->quant_shift,
-                            qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
-                            sc->scan, sc->iscan
-#if CONFIG_AOM_QM
-                            ,
-                            qm_ptr, iqm_ptr
-#endif
-                            );
-      break;
+      case 2:
+        av1_quantize_fp_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin,
+                              p->round_fp, p->quant_fp, p->quant_shift,
+                              qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
+                              sc->scan, sc->iscan);
+        break;
 #endif  // CONFIG_TX64X64
-    default: assert(0);
+      default: assert(0);
+    }
+#if CONFIG_AOM_QM
   }
+#endif
 }
 
 void av1_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
@@ -640,43 +630,68 @@
 #if CONFIG_AOM_QM
   const qm_val_t *qm_ptr = qparam->qmatrix;
   const qm_val_t *iqm_ptr = qparam->iqmatrix;
+  if (1 /*qm_ptr != NULL && iqm_ptr != NULL*/) {
+    quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
+                        p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+                        pd->dequant, eob_ptr, sc->scan, sc->iscan, qm_ptr,
+                        iqm_ptr, qparam->log_scale);
+  } else {
 #endif  // CONFIG_AOM_QM
 
-  switch (qparam->log_scale) {
-    case 0:
-      aom_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
-                     p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
-                     pd->dequant, eob_ptr, sc->scan, sc->iscan
-#if CONFIG_AOM_QM
-                     ,
-                     qm_ptr, iqm_ptr
-#endif
-                     );
-      break;
-    case 1:
-      aom_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
-                           p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
-                           pd->dequant, eob_ptr, sc->scan, sc->iscan
-#if CONFIG_AOM_QM
-                           ,
-                           qm_ptr, iqm_ptr
-#endif
-                           );
-      break;
+    switch (qparam->log_scale) {
+      case 0:
+        aom_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
+                       p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+                       pd->dequant, eob_ptr, sc->scan, sc->iscan);
+        break;
+      case 1:
+        aom_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
+                             p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+                             pd->dequant, eob_ptr, sc->scan, sc->iscan);
+        break;
 #if CONFIG_TX64X64
-    case 2:
-      aom_quantize_b_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
-                           p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
-                           pd->dequant, eob_ptr, sc->scan, sc->iscan
-#if CONFIG_AOM_QM
-                           ,
-                           qm_ptr, iqm_ptr
-#endif
-                           );
-      break;
+      case 2:
+        aom_quantize_b_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
+                             p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+                             pd->dequant, eob_ptr, sc->scan, sc->iscan);
+        break;
 #endif  // CONFIG_TX64X64
-    default: assert(0);
+      default: assert(0);
+    }
+#if CONFIG_AOM_QM
   }
+#endif
+}
+
+static void quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
+                        int skip_block, const int16_t *round_ptr,
+                        const int16_t quant, tran_low_t *qcoeff_ptr,
+                        tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr,
+                        uint16_t *eob_ptr, const qm_val_t *qm_ptr,
+                        const qm_val_t *iqm_ptr, const int log_scale) {
+  const int rc = 0;
+  const int coeff = coeff_ptr[rc];
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int64_t tmp, eob = -1;
+  int32_t tmp32;
+  int dequant;
+
+  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+  if (!skip_block) {
+    const int wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+    const int iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+    tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale),
+                INT16_MIN, INT16_MAX);
+    tmp32 = (int32_t)((tmp * wt * quant) >> (16 - log_scale + AOM_QM_BITS));
+    qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
+    dequant = (dequant_ptr * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
+    dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / (1 << log_scale);
+    if (tmp32) eob = 0;
+  }
+  *eob_ptr = eob + 1;
 }
 
 void av1_quantize_dc_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
@@ -686,45 +701,18 @@
                             const SCAN_ORDER *sc, const QUANT_PARAM *qparam) {
   // obsolete skip_block
   const int skip_block = 0;
+  (void)sc;
+  assert(qparam->log_scale >= 0 && qparam->log_scale < (2 + CONFIG_TX64X64));
 #if CONFIG_AOM_QM
   const qm_val_t *qm_ptr = qparam->qmatrix;
   const qm_val_t *iqm_ptr = qparam->iqmatrix;
-#endif  // CONFIG_AOM_QM
-
-  (void)sc;
-
-  switch (qparam->log_scale) {
-    case 0:
-      aom_quantize_dc(coeff_ptr, (int)n_coeffs, skip_block, p->round,
-                      p->quant_fp[0], qcoeff_ptr, dqcoeff_ptr, pd->dequant[0],
-                      eob_ptr
-#if CONFIG_AOM_QM
-                      ,
-                      qm_ptr, iqm_ptr
+#else
+  const qm_val_t *qm_ptr = NULL;
+  const qm_val_t *iqm_ptr = NULL;
 #endif
-                      );
-      break;
-    case 1:
-      aom_quantize_dc_32x32(coeff_ptr, skip_block, p->round, p->quant_fp[0],
-                            qcoeff_ptr, dqcoeff_ptr, pd->dequant[0], eob_ptr
-#if CONFIG_AOM_QM
-                            ,
-                            qm_ptr, iqm_ptr
-#endif
-                            );
-      break;
-#if CONFIG_TX64X64
-      aom_quantize_dc_64x64(coeff_ptr, skip_block, p->round, p->quant_fp[0],
-                            qcoeff_ptr, dqcoeff_ptr, pd->dequant[0], eob_ptr
-#if CONFIG_AOM_QM
-                            ,
-                            qm_ptr, iqm_ptr
-#endif
-                            );
-    case 2: break;
-#endif  // CONFIG_TX64X64
-    default: assert(0);
-  }
+  quantize_dc(coeff_ptr, (int)n_coeffs, skip_block, p->round, p->quant_fp[0],
+              qcoeff_ptr, dqcoeff_ptr, pd->dequant[0], eob_ptr, qm_ptr, iqm_ptr,
+              qparam->log_scale);
 }
 
 #if CONFIG_NEW_QUANT
@@ -857,29 +845,31 @@
 #if CONFIG_AOM_QM
   const qm_val_t *qm_ptr = qparam->qmatrix;
   const qm_val_t *iqm_ptr = qparam->iqmatrix;
+  if (1 /*qm_ptr != NULL && iqm_ptr != NULL*/) {
+    highbd_quantize_fp_helper_c(
+        coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp, p->quant_fp,
+        p->quant_shift, qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan,
+        sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
+  } else {
 #endif  // CONFIG_AOM_QM
 
-  if (n_coeffs < 16) {
-    // TODO(jingning): Need SIMD implementation for smaller block size
-    // quantization.
-    av1_highbd_quantize_fp_c(coeff_ptr, n_coeffs, skip_block, p->zbin,
-                             p->round_fp, p->quant_fp, p->quant_shift,
-                             qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
-                             sc->scan, sc->iscan,
-#if CONFIG_AOM_QM
-                             qm_ptr, iqm_ptr,
-#endif
-                             qparam->log_scale);
-    return;
-  }
+    if (n_coeffs < 16) {
+      // TODO(jingning): Need SIMD implementation for smaller block size
+      // quantization.
+      av1_highbd_quantize_fp_c(coeff_ptr, n_coeffs, skip_block, p->zbin,
+                               p->round_fp, p->quant_fp, p->quant_shift,
+                               qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
+                               sc->scan, sc->iscan, qparam->log_scale);
+      return;
+    }
 
-  av1_highbd_quantize_fp(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp,
-                         p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
-                         pd->dequant, eob_ptr, sc->scan, sc->iscan,
+    av1_highbd_quantize_fp(coeff_ptr, n_coeffs, skip_block, p->zbin,
+                           p->round_fp, p->quant_fp, p->quant_shift, qcoeff_ptr,
+                           dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan,
+                           sc->iscan, qparam->log_scale);
 #if CONFIG_AOM_QM
-                         qm_ptr, iqm_ptr,
+  }
 #endif
-                         qparam->log_scale);
 }
 
 void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr,
@@ -894,86 +884,76 @@
 #if CONFIG_AOM_QM
   const qm_val_t *qm_ptr = qparam->qmatrix;
   const qm_val_t *iqm_ptr = qparam->iqmatrix;
+  if (1 /*qm_ptr != NULL && iqm_ptr != NULL*/) {
+    highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin,
+                               p->round, p->quant, p->quant_shift, qcoeff_ptr,
+                               dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan,
+                               sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
+  } else {
 #endif  // CONFIG_AOM_QM
 
-  switch (qparam->log_scale) {
-    case 0:
-      if (LIKELY(n_coeffs >= 8)) {
-        aom_highbd_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin,
-                              p->round, p->quant, p->quant_shift, qcoeff_ptr,
-                              dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan,
-                              sc->iscan
-#if CONFIG_AOM_QM
-                              ,
-                              qm_ptr, iqm_ptr
-#endif
-                              );
-      } else {
-        // TODO(luoyi): Need SIMD (e.g. sse2) for smaller block size
-        // quantization
-        aom_highbd_quantize_b_c(coeff_ptr, n_coeffs, skip_block, p->zbin,
+    switch (qparam->log_scale) {
+      case 0:
+        if (LIKELY(n_coeffs >= 8)) {
+          aom_highbd_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin,
                                 p->round, p->quant, p->quant_shift, qcoeff_ptr,
                                 dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan,
-                                sc->iscan
-#if CONFIG_AOM_QM
-                                ,
-                                qm_ptr, iqm_ptr
-#endif
-                                );
-      }
-      break;
-    case 1:
-      aom_highbd_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin,
+                                sc->iscan);
+        } else {
+          // TODO(luoyi): Need SIMD (e.g. sse2) for smaller block size
+          // quantization
+          aom_highbd_quantize_b_c(coeff_ptr, n_coeffs, skip_block, p->zbin,
                                   p->round, p->quant, p->quant_shift,
                                   qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
-                                  sc->scan, sc->iscan
-#if CONFIG_AOM_QM
-                                  ,
-                                  qm_ptr, iqm_ptr
-#endif
-                                  );
-      break;
+                                  sc->scan, sc->iscan);
+        }
+        break;
+      case 1:
+        aom_highbd_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin,
+                                    p->round, p->quant, p->quant_shift,
+                                    qcoeff_ptr, dqcoeff_ptr, pd->dequant,
+                                    eob_ptr, sc->scan, sc->iscan);
+        break;
 #if CONFIG_TX64X64
-    case 2:
-      aom_highbd_quantize_b_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin,
-                                  p->round, p->quant, p->quant_shift,
-                                  qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
-                                  sc->scan, sc->iscan
-#if CONFIG_AOM_QM
-                                  ,
-                                  qm_ptr, iqm_ptr
-#endif
-                                  );
-      break;
+      case 2:
+        aom_highbd_quantize_b_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin,
+                                    p->round, p->quant, p->quant_shift,
+                                    qcoeff_ptr, dqcoeff_ptr, pd->dequant,
+                                    eob_ptr, sc->scan, sc->iscan);
+        break;
 #endif  // CONFIG_TX64X64
-    default: assert(0);
+      default: assert(0);
+    }
+#if CONFIG_AOM_QM
   }
+#endif
 }
 
 static INLINE void highbd_quantize_dc(
     const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
     const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr,
     tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr,
-#if CONFIG_AOM_QM
-    const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr,
-#endif
-    const int log_scale) {
+    const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr, const int log_scale) {
   int eob = -1;
 
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-#if CONFIG_AOM_QM
-  (void)qm_ptr;
-  (void)iqm_ptr;
-#endif
+
   if (!skip_block) {
+    const qm_val_t wt = qm_ptr != NULL ? qm_ptr[0] : (1 << AOM_QM_BITS);
+    const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[0] : (1 << AOM_QM_BITS);
     const int coeff = coeff_ptr[0];
     const int coeff_sign = (coeff >> 31);
     const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-    const int64_t tmp = abs_coeff + round_ptr[0];
-    const int abs_qcoeff = (int)((tmp * quant) >> (16 - log_scale));
+    const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], log_scale);
+    const int64_t tmpw = tmp * wt;
+    const int abs_qcoeff =
+        (int)((tmpw * quant) >> (16 - log_scale + AOM_QM_BITS));
     qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-    dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / (1 << log_scale);
+    const int dequant =
+        (dequant_ptr * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
+
+    dqcoeff_ptr[0] = (qcoeff_ptr[0] * dequant) / (1 << log_scale);
     if (abs_qcoeff) eob = 0;
   }
   *eob_ptr = eob + 1;
@@ -991,17 +971,16 @@
 #if CONFIG_AOM_QM
   const qm_val_t *qm_ptr = qparam->qmatrix;
   const qm_val_t *iqm_ptr = qparam->iqmatrix;
+#else
+  const qm_val_t *qm_ptr = NULL;
+  const qm_val_t *iqm_ptr = NULL;
 #endif  // CONFIG_AOM_QM
 
   (void)sc;
 
   highbd_quantize_dc(coeff_ptr, (int)n_coeffs, skip_block, p->round,
                      p->quant_fp[0], qcoeff_ptr, dqcoeff_ptr, pd->dequant[0],
-                     eob_ptr,
-#if CONFIG_AOM_QM
-                     qm_ptr, iqm_ptr,
-#endif
-                     qparam->log_scale);
+                     eob_ptr, qm_ptr, iqm_ptr, qparam->log_scale);
 }
 
 #if CONFIG_NEW_QUANT
@@ -1517,61 +1496,16 @@
 }
 #endif  // CONFIG_NEW_QUANT
 
-void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count,
-                              int skip_block, const int16_t *zbin_ptr,
-                              const int16_t *round_ptr,
-                              const int16_t *quant_ptr,
-                              const int16_t *quant_shift_ptr,
-                              tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                              const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                              const int16_t *scan, const int16_t *iscan,
-#if CONFIG_AOM_QM
-                              const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr,
-#endif
-                              int log_scale) {
-  int i;
-  int eob = -1;
-  const int scale = 1 << log_scale;
-  const int shift = 16 - log_scale;
-  // TODO(jingning) Decide the need of these arguments after the
-  // quantization process is completed.
-  (void)zbin_ptr;
-  (void)quant_shift_ptr;
-  (void)iscan;
-
-  memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    // Quantization pass: All coefficients with index >= zero_flag are
-    // skippable. Note: zero_flag can be zero.
-    for (i = 0; i < count; i++) {
-      const int rc = scan[i];
-      const int coeff = coeff_ptr[rc];
-#if CONFIG_AOM_QM
-      const qm_val_t wt = qm_ptr[rc];
-      const qm_val_t iwt = iqm_ptr[rc];
-      const int dequant =
-          (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
-          AOM_QM_BITS;
-#endif
-      const int coeff_sign = (coeff >> 31);
-      const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-      const int64_t tmp = abs_coeff + (round_ptr[rc != 0] >> log_scale);
-#if CONFIG_AOM_QM
-      const int abs_qcoeff =
-          (int)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS));
-      qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-      dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant / scale;
-#else
-      const int abs_qcoeff = (int)((tmp * quant_ptr[rc != 0]) >> shift);
-      qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-      dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / scale;
-#endif
-      if (abs_qcoeff) eob = i;
-    }
-  }
-  *eob_ptr = eob + 1;
+void av1_highbd_quantize_fp_c(
+    const tran_low_t *coeff_ptr, intptr_t count, int skip_block,
+    const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+    const int16_t *scan, const int16_t *iscan, int log_scale) {
+  highbd_quantize_fp_helper_c(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
+                              quant_ptr, quant_shift_ptr, qcoeff_ptr,
+                              dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
+                              NULL, NULL, log_scale);
 }
 
 static void invert_quant(int16_t *quant, int16_t *shift, int d) {
diff --git a/av1/encoder/dct.c b/av1/encoder/dct.c
index 2ca4f34..858ea24 100644
--- a/av1/encoder/dct.c
+++ b/av1/encoder/dct.c
@@ -2087,12 +2087,8 @@
                          const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
                          tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
                          uint16_t *eob_ptr, const int16_t *scan,
-                         const int16_t *iscan
-#if CONFIG_AOM_QM
-                         ,
-                         const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
-#endif
-                         ) {
+                         const int16_t *iscan, const qm_val_t *qm_ptr,
+                         const qm_val_t *iqm_ptr) {
   int eob = -1;
 
   int i, j;
@@ -2177,27 +2173,19 @@
     for (i = 0; i < n_coeffs; i++) {
       const int rc = scan[i];
       const int coeff = coeff_ptr[rc];
-#if CONFIG_AOM_QM
-      const qm_val_t wt = qm_ptr[rc];
-      const qm_val_t iwt = iqm_ptr[rc];
+      const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+      const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
       const int dequant =
           (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
           AOM_QM_BITS;
-#endif
       const int coeff_sign = (coeff >> 31);
       const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
 
       int64_t tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
       int tmp32;
-#if CONFIG_AOM_QM
       tmp32 = (int)((tmp * quant_ptr[rc != 0] * wt) >> (16 + AOM_QM_BITS));
       qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
       dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant;
-#else
-      tmp32 = (int)((tmp * quant_ptr[rc != 0]) >> 16);
-      qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
-      dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
-#endif
 
       if (tmp32) eob = i;
     }