AOM_QM: use SIMD for flat matrices and re-enable tests.

When AOM_QM is enabled, by default quantization matrices are
flat unless enabled with --enable-qm=1. Re-use existing SIMD
functions when a flat matrix is used, so that there is no
speed deficit when AOM_QM is enabled.

SIMD for the non-flat case is TBC.

Change-Id: I1bb8da70d3dd5858dac15099610ddf61662e3d0d
diff --git a/av1/common/quant_common.c b/av1/common/quant_common.c
index 5210ba8..2c07941 100644
--- a/av1/common/quant_common.c
+++ b/av1/common/quant_common.c
@@ -368,12 +368,17 @@
         current = 0;
         for (t = 0; t < TX_SIZES_ALL; ++t) {
           size = tx_size_2d[t];
-          cm->gqmatrix[q][c][f][t] = &wt_matrix_ref[AOMMIN(
-              NUM_QM_LEVELS - 1, f == 0 ? q + DEFAULT_QM_INTER_OFFSET : q)][c]
-                                                   [current];
-          cm->giqmatrix[q][c][f][t] = &iwt_matrix_ref[AOMMIN(
-              NUM_QM_LEVELS - 1, f == 0 ? q + DEFAULT_QM_INTER_OFFSET : q)][c]
+          if (q == NUM_QM_LEVELS - 1) {
+            cm->gqmatrix[q][c][f][t] = NULL;
+            cm->giqmatrix[q][c][f][t] = NULL;
+          } else {
+            cm->gqmatrix[q][c][f][t] = &wt_matrix_ref[AOMMIN(
+                NUM_QM_LEVELS - 1, f == 0 ? q + DEFAULT_QM_INTER_OFFSET : q)][c]
                                                      [current];
+            cm->giqmatrix[q][c][f][t] = &iwt_matrix_ref[AOMMIN(
+                NUM_QM_LEVELS - 1, f == 0 ? q + DEFAULT_QM_INTER_OFFSET : q)][c]
+                                                       [current];
+          }
           current += size;
         }
       }
diff --git a/av1/decoder/detokenize.c b/av1/decoder/detokenize.c
index 461494d..3aefc9f 100644
--- a/av1/decoder/detokenize.c
+++ b/av1/decoder/detokenize.c
@@ -195,7 +195,7 @@
 #else
 #if CONFIG_AOM_QM
     // Apply quant matrix only for 2D transforms
-    if (IS_2D_TRANSFORM(tx_type))
+    if (IS_2D_TRANSFORM(tx_type) && iqmatrix != NULL)
       dqv = ((iqmatrix[scan[c]] * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >>
             AOM_QM_BITS;
 #endif
diff --git a/av1/encoder/av1_quantize.c b/av1/encoder/av1_quantize.c
index b44e06f..0265f1f 100644
--- a/av1/encoder/av1_quantize.c
+++ b/av1/encoder/av1_quantize.c
@@ -577,7 +577,7 @@
 #if CONFIG_AOM_QM
   const qm_val_t *qm_ptr = qparam->qmatrix;
   const qm_val_t *iqm_ptr = qparam->iqmatrix;
-  if (1 /*qm_ptr != NULL || iqm_ptr != NULL*/) {
+  if (qm_ptr != NULL && iqm_ptr != NULL) {
     quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp,
                          p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
                          pd->dequant, eob_ptr, sc->scan, sc->iscan, qm_ptr,
@@ -630,7 +630,7 @@
 #if CONFIG_AOM_QM
   const qm_val_t *qm_ptr = qparam->qmatrix;
   const qm_val_t *iqm_ptr = qparam->iqmatrix;
-  if (1 /*qm_ptr != NULL && iqm_ptr != NULL*/) {
+  if (qm_ptr != NULL && iqm_ptr != NULL) {
     quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
                         p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
                         pd->dequant, eob_ptr, sc->scan, sc->iscan, qm_ptr,
@@ -845,7 +845,7 @@
 #if CONFIG_AOM_QM
   const qm_val_t *qm_ptr = qparam->qmatrix;
   const qm_val_t *iqm_ptr = qparam->iqmatrix;
-  if (1 /*qm_ptr != NULL && iqm_ptr != NULL*/) {
+  if (qm_ptr != NULL && iqm_ptr != NULL) {
     highbd_quantize_fp_helper_c(
         coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp, p->quant_fp,
         p->quant_shift, qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan,
@@ -884,7 +884,7 @@
 #if CONFIG_AOM_QM
   const qm_val_t *qm_ptr = qparam->qmatrix;
   const qm_val_t *iqm_ptr = qparam->iqmatrix;
-  if (1 /*qm_ptr != NULL && iqm_ptr != NULL*/) {
+  if (qm_ptr != NULL && iqm_ptr != NULL) {
     highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin,
                                p->round, p->quant, p->quant_shift, qcoeff_ptr,
                                dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan,
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index f2bb213..efb4763 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -214,9 +214,12 @@
       const int x_a = x - 2 * sz - 1;
       int dqv;
 #if CONFIG_AOM_QM
-      int iwt = iqmatrix[rc];
+      int iwt;
       dqv = dequant_ptr[rc != 0];
-      dqv = ((iwt * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
+      if (iqmatrix != NULL) {
+        iwt = iqmatrix[rc];
+        dqv = ((iwt * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
+      }
 #else
       dqv = dequant_ptr[rc != 0];
 #endif
diff --git a/test/av1_quantize_test.cc b/test/av1_quantize_test.cc
index 239b041..85a5577 100644
--- a/test/av1_quantize_test.cc
+++ b/test/av1_quantize_test.cc
@@ -195,7 +195,6 @@
 TEST_P(AV1QuantizeTest, EobVerify) { RunEobTest(); }
 
 #if HAVE_SSE4_1
-#if !CONFIG_AOM_QM
 const QuantizeFuncParams qfps[4] = {
   QuantizeFuncParams(av1_highbd_quantize_fp_sse4_1, &av1_highbd_quantize_fp_c,
                      16),
@@ -208,6 +207,5 @@
 };
 
 INSTANTIATE_TEST_CASE_P(SSE4_1, AV1QuantizeTest, ::testing::ValuesIn(qfps));
-#endif  // !CONFIG_AOM_QM
 #endif  // HAVE_SSE4_1
 }  // namespace
diff --git a/test/test.mk b/test/test.mk
index 4132e4f..7292575 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -163,11 +163,9 @@
 #LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_quantize_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += subtract_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += arf_freq_test.cc
-ifneq ($(CONFIG_AOM_QM), yes)
 ifneq ($(CONFIG_NEW_QUANT), yes)
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += quantize_func_test.cc
 endif
-endif
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += block_error_test.cc
 
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_inv_txfm_test.cc