Fix a bug in av1_quantize_lp AVX2 optimization

In av1_quantize_lp AVX2 optimization, eob is scanned from coeff
results. To get the correct eob, iscan should be used instead of
scan. This CL fixed the bug, and resolved AVX2 annd c mismatch.
The unit test for this function was added.

BUG=aomedia:3156

Change-Id: Id173dc292ca5951ef1f1a75f37fa31f4615e2da0
(cherry picked from commit 984b81aac328a7eb2d5ea93787be295c4e14b66c)
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 63e5758..231908e 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -330,9 +330,9 @@
   add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
   specialize qw/av1_quantize_fp sse2 avx2 neon/;
 
-  add_proto qw/void av1_quantize_lp/, "const int16_t *coeff_ptr, intptr_t n_coeffs, const int16_t *round_ptr, const int16_t *quant_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan";
-  specialize qw/av1_quantize_lp avx2 neon/;
-
+  # TODO(any): need to fix the bug in neon optimization and re-enable it.
+  add_proto qw/void av1_quantize_lp/, "const int16_t *coeff_ptr, intptr_t n_coeffs, const int16_t *round_ptr, const int16_t *quant_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+  specialize qw/av1_quantize_lp avx2/;
 
   add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
   specialize qw/av1_quantize_fp_32x32 neon avx2/;
diff --git a/av1/encoder/av1_quantize.c b/av1/encoder/av1_quantize.c
index de1e1b3..66be3b6 100644
--- a/av1/encoder/av1_quantize.c
+++ b/av1/encoder/av1_quantize.c
@@ -213,7 +213,8 @@
                        const int16_t *round_ptr, const int16_t *quant_ptr,
                        int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
                        const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                       const int16_t *scan) {
+                       const int16_t *scan, const int16_t *iscan) {
+  (void)iscan;
   int eob = -1;
 
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index a3f1216..3d710dd 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -833,20 +833,21 @@
             aom_hadamard_lp_16x16(src_diff, diff_stride, low_coeff);
             av1_quantize_lp(low_coeff, 16 * 16, p->round_fp_QTX,
                             p->quant_fp_QTX, low_qcoeff, low_dqcoeff,
-                            p->dequant_QTX, eob, scan_order->scan);
+                            p->dequant_QTX, eob, scan_order->scan,
+                            scan_order->iscan);
             break;
           case TX_8X8:
             aom_hadamard_lp_8x8(src_diff, diff_stride, low_coeff);
             av1_quantize_lp(low_coeff, 8 * 8, p->round_fp_QTX, p->quant_fp_QTX,
                             low_qcoeff, low_dqcoeff, p->dequant_QTX, eob,
-                            scan_order->scan);
+                            scan_order->scan, scan_order->iscan);
             break;
           default:
             assert(tx_size == TX_4X4);
             aom_fdct4x4_lp(src_diff, low_coeff, diff_stride);
             av1_quantize_lp(low_coeff, 4 * 4, p->round_fp_QTX, p->quant_fp_QTX,
                             low_qcoeff, low_dqcoeff, p->dequant_QTX, eob,
-                            scan_order->scan);
+                            scan_order->scan, scan_order->iscan);
             break;
 #endif
         }
diff --git a/av1/encoder/x86/av1_quantize_avx2.c b/av1/encoder/x86/av1_quantize_avx2.c
index f5f7ee1..591edd7 100644
--- a/av1/encoder/x86/av1_quantize_avx2.c
+++ b/av1/encoder/x86/av1_quantize_avx2.c
@@ -154,22 +154,18 @@
   return _mm_extract_epi16(eob, 1);
 }
 
-static INLINE void store_zero_tran_low(int16_t *a) {
-  const __m256i zero = _mm256_setzero_si256();
-  _mm256_storeu_si256((__m256i *)(a), zero);
-}
-
 void av1_quantize_lp_avx2(const int16_t *coeff_ptr, intptr_t n_coeffs,
                           const int16_t *round_ptr, const int16_t *quant_ptr,
                           int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
                           const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                          const int16_t *scan) {
+                          const int16_t *scan, const int16_t *iscan) {
+  (void)scan;
   __m128i eob;
   __m256i round256, quant256, dequant256;
-  __m256i eob256, thr256;
+  __m256i eob256;
 
   coeff_ptr += n_coeffs;
-  scan += n_coeffs;
+  iscan += n_coeffs;
   qcoeff_ptr += n_coeffs;
   dqcoeff_ptr += n_coeffs;
   n_coeffs = -n_coeffs;
@@ -205,7 +201,7 @@
       _mm256_storeu_si256((__m256i *)(dqcoeff_ptr + n_coeffs), coeff256);
     }
 
-    eob256 = scan_eob_256((const __m256i *)(scan + n_coeffs), &coeff256);
+    eob256 = scan_eob_256((const __m256i *)(iscan + n_coeffs), &coeff256);
     n_coeffs += 8 * 2;
   }
 
@@ -214,30 +210,22 @@
   quant256 = _mm256_permute2x128_si256(quant256, quant256, 0x31);
   round256 = _mm256_permute2x128_si256(round256, round256, 0x31);
 
-  thr256 = _mm256_srai_epi16(dequant256, 1);
-
   // AC only loop
   while (n_coeffs < 0) {
     __m256i coeff256 =
         _mm256_loadu_si256((const __m256i *)(coeff_ptr + n_coeffs));
     __m256i qcoeff256 = _mm256_abs_epi16(coeff256);
-    int32_t nzflag =
-        _mm256_movemask_epi8(_mm256_cmpgt_epi16(qcoeff256, thr256));
 
-    if (nzflag) {
-      __m256i qtmp256;
-      qcoeff256 = _mm256_adds_epi16(qcoeff256, round256);
-      qtmp256 = _mm256_mulhi_epi16(qcoeff256, quant256);
-      qcoeff256 = _mm256_sign_epi16(qtmp256, coeff256);
-      _mm256_storeu_si256((__m256i *)(qcoeff_ptr + n_coeffs), qcoeff256);
-      coeff256 = _mm256_mullo_epi16(qcoeff256, dequant256);
-      _mm256_storeu_si256((__m256i *)(dqcoeff_ptr + n_coeffs), coeff256);
-      eob256 = _mm256_max_epi16(
-          eob256, scan_eob_256((const __m256i *)(scan + n_coeffs), &coeff256));
-    } else {
-      store_zero_tran_low(qcoeff_ptr + n_coeffs);
-      store_zero_tran_low(dqcoeff_ptr + n_coeffs);
-    }
+    __m256i qtmp256;
+    qcoeff256 = _mm256_adds_epi16(qcoeff256, round256);
+    qtmp256 = _mm256_mulhi_epi16(qcoeff256, quant256);
+    qcoeff256 = _mm256_sign_epi16(qtmp256, coeff256);
+    _mm256_storeu_si256((__m256i *)(qcoeff_ptr + n_coeffs), qcoeff256);
+    coeff256 = _mm256_mullo_epi16(qcoeff256, dequant256);
+    _mm256_storeu_si256((__m256i *)(dqcoeff_ptr + n_coeffs), coeff256);
+    eob256 = _mm256_max_epi16(
+        eob256, scan_eob_256((const __m256i *)(iscan + n_coeffs), &coeff256));
+
     n_coeffs += 8 * 2;
   }
 
diff --git a/test/quantize_lp_func_test.cc b/test/quantize_lp_func_test.cc
new file mode 100644
index 0000000..b02222d
--- /dev/null
+++ b/test/quantize_lp_func_test.cc
@@ -0,0 +1,336 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <tuple>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+#include "config/av1_rtcd.h"
+
+#include "aom/aom_codec.h"
+#include "aom_ports/aom_timer.h"
+#include "av1/encoder/encoder.h"
+#include "av1/common/scan.h"
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+namespace {
+using libaom_test::ACMRandom;
+
+#define QUAN_LP_PARAM_LIST                                                 \
+  const int16_t *coeff_ptr, intptr_t n_coeffs, const int16_t *round_ptr,   \
+      const int16_t *quant_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, \
+      const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan,  \
+      const int16_t *iscan
+
+typedef void (*QuantizeFunc)(QUAN_LP_PARAM_LIST);
+
+using std::tuple;
+typedef tuple<QuantizeFunc, QuantizeFunc, TX_SIZE, aom_bit_depth_t>
+    QuantizeParam;
+
+typedef struct {
+  QUANTS quant;
+  Dequants dequant;
+} QuanTable;
+
+const int kTestNum = 1000;
+
+template <typename CoeffType>
+class QuantizeTestBase : public ::testing::TestWithParam<QuantizeParam> {
+ protected:
+  QuantizeTestBase()
+      : quant_ref_(GET_PARAM(0)), quant_(GET_PARAM(1)), tx_size_(GET_PARAM(2)),
+        bd_(GET_PARAM(3)) {}
+
+  virtual ~QuantizeTestBase() {}
+
+  virtual void SetUp() {
+    qtab_ = reinterpret_cast<QuanTable *>(aom_memalign(32, sizeof(*qtab_)));
+    const int n_coeffs = coeff_num();
+    coeff_ = reinterpret_cast<CoeffType *>(
+        aom_memalign(32, 6 * n_coeffs * sizeof(CoeffType)));
+    InitQuantizer();
+  }
+
+  virtual void TearDown() {
+    aom_free(qtab_);
+    qtab_ = NULL;
+    aom_free(coeff_);
+    coeff_ = NULL;
+  }
+
+  void InitQuantizer() {
+    av1_build_quantizer(bd_, 0, 0, 0, 0, 0, &qtab_->quant, &qtab_->dequant);
+  }
+
+  virtual void RunQuantizeFunc(const CoeffType *coeff_ptr, intptr_t n_coeffs,
+                               const int16_t *round_ptr,
+                               const int16_t *quant_ptr, CoeffType *qcoeff_ptr,
+                               CoeffType *qcoeff_ref_ptr,
+                               CoeffType *dqcoeff_ptr,
+                               CoeffType *dqcoeff_ref_ptr,
+                               const int16_t *dequant_ptr,
+                               uint16_t *eob_ref_ptr, uint16_t *eob_ptr,
+                               const int16_t *scan, const int16_t *iscan) = 0;
+
+  void QuantizeRun(bool is_loop, int q = 0, int test_num = 1) {
+    CoeffType *coeff_ptr = coeff_;
+    const intptr_t n_coeffs = coeff_num();
+
+    CoeffType *qcoeff_ref = coeff_ptr + n_coeffs;
+    CoeffType *dqcoeff_ref = qcoeff_ref + n_coeffs;
+
+    CoeffType *qcoeff = dqcoeff_ref + n_coeffs;
+    CoeffType *dqcoeff = qcoeff + n_coeffs;
+    uint16_t *eob = (uint16_t *)(dqcoeff + n_coeffs);
+
+    // Testing uses 2-D DCT scan order table
+    const SCAN_ORDER *const sc = get_default_scan(tx_size_, DCT_DCT);
+
+    // Testing uses luminance quantization table
+    const int16_t *round = 0;
+    const int16_t *quant = 0;
+    round = qtab_->quant.y_round_fp[q];
+    quant = qtab_->quant.y_quant_fp[q];
+
+    const int16_t *dequant = qtab_->dequant.y_dequant_QTX[q];
+
+    for (int i = 0; i < test_num; ++i) {
+      if (is_loop) FillCoeffRandom();
+
+      memset(qcoeff_ref, 0, 5 * n_coeffs * sizeof(*qcoeff_ref));
+
+      RunQuantizeFunc(coeff_ptr, n_coeffs, round, quant, qcoeff, qcoeff_ref,
+                      dqcoeff, dqcoeff_ref, dequant, &eob[0], &eob[1], sc->scan,
+                      sc->iscan);
+
+      quant_ref_(coeff_ptr, n_coeffs, round, quant, qcoeff_ref, dqcoeff_ref,
+                 dequant, &eob[0], sc->scan, sc->iscan);
+
+      API_REGISTER_STATE_CHECK(quant_(coeff_ptr, n_coeffs, round, quant, qcoeff,
+                                      dqcoeff, dequant, &eob[1], sc->scan,
+                                      sc->iscan));
+
+      for (int j = 0; j < n_coeffs; ++j) {
+        ASSERT_EQ(qcoeff_ref[j], qcoeff[j])
+            << "Q mismatch on test: " << i << " at position: " << j
+            << " Q: " << q << " coeff: " << coeff_ptr[j];
+      }
+
+      for (int j = 0; j < n_coeffs; ++j) {
+        ASSERT_EQ(dqcoeff_ref[j], dqcoeff[j])
+            << "Dq mismatch on test: " << i << " at position: " << j
+            << " Q: " << q << " coeff: " << coeff_ptr[j];
+      }
+
+      ASSERT_EQ(eob[0], eob[1])
+          << "eobs mismatch on test: " << i << " Q: " << q;
+    }
+  }
+
+  void CompareResults(const CoeffType *buf_ref, const CoeffType *buf, int size,
+                      const char *text, int q, int number) {
+    int i;
+    for (i = 0; i < size; ++i) {
+      ASSERT_EQ(buf_ref[i], buf[i]) << text << " mismatch on test: " << number
+                                    << " at position: " << i << " Q: " << q;
+    }
+  }
+
+  int coeff_num() const { return av1_get_max_eob(tx_size_); }
+
+  void FillCoeff(CoeffType c) {
+    const int n_coeffs = coeff_num();
+    for (int i = 0; i < n_coeffs; ++i) {
+      coeff_[i] = c;
+    }
+  }
+
+  void FillCoeffRandom() {
+    const int n_coeffs = coeff_num();
+    FillCoeffZero();
+    int num = rnd_.Rand16() % n_coeffs;
+    for (int i = 0; i < num; ++i) {
+      coeff_[i] = GetRandomCoeff();
+    }
+  }
+
+  void FillCoeffRandomRows(int num) {
+    FillCoeffZero();
+    for (int i = 0; i < num; ++i) {
+      coeff_[i] = GetRandomCoeff();
+    }
+  }
+
+  void FillCoeffZero() { FillCoeff(0); }
+
+  void FillCoeffConstant() {
+    CoeffType c = GetRandomCoeff();
+    FillCoeff(c);
+  }
+
+  void FillDcOnly() {
+    FillCoeffZero();
+    coeff_[0] = GetRandomCoeff();
+  }
+
+  void FillDcLargeNegative() {
+    FillCoeffZero();
+    // Generate a qcoeff which contains 512/-512 (0x0100/0xFE00) to catch issues
+    // like BUG=883 where the constant being compared was incorrectly
+    // initialized.
+    coeff_[0] = -8191;
+  }
+
+  CoeffType GetRandomCoeff() {
+    CoeffType coeff;
+    if (bd_ == AOM_BITS_8) {
+      coeff =
+          clamp(static_cast<int16_t>(rnd_.Rand16()), INT16_MIN + 1, INT16_MAX);
+    } else {
+      CoeffType min = -(1 << (7 + bd_));
+      CoeffType max = -min - 1;
+      coeff = clamp(static_cast<CoeffType>(rnd_.Rand31()), min, max);
+    }
+    return coeff;
+  }
+
+  ACMRandom rnd_;
+  QuanTable *qtab_;
+  CoeffType *coeff_;
+  QuantizeFunc quant_ref_;
+  QuantizeFunc quant_;
+  TX_SIZE tx_size_;
+  aom_bit_depth_t bd_;
+};
+
+class FullPrecisionQuantizeLpTest : public QuantizeTestBase<int16_t> {
+  void RunQuantizeFunc(const int16_t *coeff_ptr, intptr_t n_coeffs,
+                       const int16_t *round_ptr, const int16_t *quant_ptr,
+                       int16_t *qcoeff_ptr, int16_t *qcoeff_ref_ptr,
+                       int16_t *dqcoeff_ptr, int16_t *dqcoeff_ref_ptr,
+                       const int16_t *dequant_ptr, uint16_t *eob_ref_ptr,
+                       uint16_t *eob_ptr, const int16_t *scan,
+                       const int16_t *iscan) override {
+    quant_ref_(coeff_ptr, n_coeffs, round_ptr, quant_ptr, qcoeff_ref_ptr,
+               dqcoeff_ref_ptr, dequant_ptr, eob_ref_ptr, scan, iscan);
+
+    API_REGISTER_STATE_CHECK(quant_(coeff_ptr, n_coeffs, round_ptr, quant_ptr,
+                                    qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
+                                    eob_ptr, scan, iscan));
+  }
+};
+
+TEST_P(FullPrecisionQuantizeLpTest, ZeroInput) {
+  FillCoeffZero();
+  QuantizeRun(false);
+}
+
+TEST_P(FullPrecisionQuantizeLpTest, LargeNegativeInput) {
+  FillDcLargeNegative();
+  QuantizeRun(false, 0, 1);
+}
+
+TEST_P(FullPrecisionQuantizeLpTest, DcOnlyInput) {
+  FillDcOnly();
+  QuantizeRun(false, 0, 1);
+}
+
+TEST_P(FullPrecisionQuantizeLpTest, RandomInput) {
+  QuantizeRun(true, 0, kTestNum);
+}
+
+TEST_P(FullPrecisionQuantizeLpTest, MultipleQ) {
+  for (int q = 0; q < QINDEX_RANGE; ++q) {
+    QuantizeRun(true, q, kTestNum);
+  }
+}
+
+// Force the coeff to be half the value of the dequant.  This exposes a
+// mismatch found in av1_quantize_fp_sse2().
+TEST_P(FullPrecisionQuantizeLpTest, CoeffHalfDequant) {
+  FillCoeff(16);
+  QuantizeRun(false, 25, 1);
+}
+
+TEST_P(FullPrecisionQuantizeLpTest, DISABLED_Speed) {
+  int16_t *coeff_ptr = coeff_;
+  const intptr_t n_coeffs = coeff_num();
+
+  int16_t *qcoeff_ref = coeff_ptr + n_coeffs;
+  int16_t *dqcoeff_ref = qcoeff_ref + n_coeffs;
+
+  int16_t *qcoeff = dqcoeff_ref + n_coeffs;
+  int16_t *dqcoeff = qcoeff + n_coeffs;
+  uint16_t *eob = (uint16_t *)(dqcoeff + n_coeffs);
+
+  // Testing uses 2-D DCT scan order table
+  const SCAN_ORDER *const sc = get_default_scan(tx_size_, DCT_DCT);
+
+  // Testing uses luminance quantization table
+  const int q = 22;
+  const int16_t *round_fp = qtab_->quant.y_round_fp[q];
+  const int16_t *quant_fp = qtab_->quant.y_quant_fp[q];
+  const int16_t *dequant = qtab_->dequant.y_dequant_QTX[q];
+  const int kNumTests = 5000000;
+  aom_usec_timer timer, simd_timer;
+  int rows = tx_size_high[tx_size_];
+  int cols = tx_size_wide[tx_size_];
+  rows = AOMMIN(32, rows);
+  cols = AOMMIN(32, cols);
+  for (int cnt = 0; cnt <= rows; cnt++) {
+    FillCoeffRandomRows(cnt * cols);
+
+    aom_usec_timer_start(&timer);
+    for (int n = 0; n < kNumTests; ++n) {
+      quant_ref_(coeff_ptr, n_coeffs, round_fp, quant_fp, qcoeff, dqcoeff,
+                 dequant, eob, sc->scan, sc->iscan);
+    }
+    aom_usec_timer_mark(&timer);
+
+    aom_usec_timer_start(&simd_timer);
+    for (int n = 0; n < kNumTests; ++n) {
+      quant_(coeff_ptr, n_coeffs, round_fp, quant_fp, qcoeff, dqcoeff, dequant,
+             eob, sc->scan, sc->iscan);
+    }
+    aom_usec_timer_mark(&simd_timer);
+
+    const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+    const int simd_elapsed_time =
+        static_cast<int>(aom_usec_timer_elapsed(&simd_timer));
+    printf("c_time = %d \t simd_time = %d \t Gain = %f \n", elapsed_time,
+           simd_elapsed_time, ((float)elapsed_time / simd_elapsed_time));
+  }
+}
+
+using std::make_tuple;
+
+#if HAVE_AVX2
+const QuantizeParam kQParamArrayAVX2[] = {
+  // av1_quantize_lp is only called in nonrd_pickmode.c, and is used for 16X16,
+  // 8X8, and 4X4.
+  make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_avx2,
+             static_cast<TX_SIZE>(TX_16X16), AOM_BITS_8),
+  make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_avx2,
+             static_cast<TX_SIZE>(TX_8X8), AOM_BITS_8),
+  make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_avx2,
+             static_cast<TX_SIZE>(TX_4X4), AOM_BITS_8)
+};
+
+INSTANTIATE_TEST_SUITE_P(AVX2, FullPrecisionQuantizeLpTest,
+                         ::testing::ValuesIn(kQParamArrayAVX2));
+#endif
+
+}  // namespace
diff --git a/test/test.cmake b/test/test.cmake
index 9f58511..3efcfac 100644
--- a/test/test.cmake
+++ b/test/test.cmake
@@ -277,6 +277,7 @@
               "${AOM_ROOT}/test/obmc_variance_test.cc"
               "${AOM_ROOT}/test/pickrst_test.cc"
               "${AOM_ROOT}/test/quantize_func_test.cc"
+              "${AOM_ROOT}/test/quantize_lp_func_test.cc"
               "${AOM_ROOT}/test/sad_test.cc"
               "${AOM_ROOT}/test/subtract_test.cc"
               "${AOM_ROOT}/test/reconinter_test.cc"
@@ -323,7 +324,8 @@
 
   if(NOT (HAVE_SSE2 OR HAVE_NEON))
     list(REMOVE_ITEM AOM_UNIT_TEST_ENCODER_SOURCES
-                     "${AOM_ROOT}/test/quantize_func_test.cc")
+                     "${AOM_ROOT}/test/quantize_func_test.cc"
+                     "${AOM_ROOT}/test/quantize_lp_func_test.cc")
   endif()
 
   if(HAVE_SSE4_1)