AV1 RT: Fix NEON bug in av1_quantize_lp Fix NEON implementation of av1_quantize_lp and re-enable it. Also adding unit test BUG=aomedia:3162 Change-Id: I8bc62c009bfb8e264a75e7bb67c475d5121bf079

commit: 76d5cfb6e3638a06654733ac62912fef8ea2959a [log] [tgz]
author: Fyodor Kyslov <kyslov@google.com> Mon Oct 11 17:45:59 2021 -0700
committer: Fyodor Kyslov <kyslov@google.com> Tue Oct 12 18:26:55 2021 +0000
tree: 35d168514485195b8e9d957eacf170a1b8453782
parent: 714f6159840b1805c625482ab260a204bb3f4caf [diff]
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 7f2cd7e..a4e1bbf 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl

@@ -330,9 +330,8 @@
   add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
   specialize qw/av1_quantize_fp sse2 avx2 neon/;
 
-  # TODO(any): need to fix the bug in neon optimization and re-enable it.
   add_proto qw/void av1_quantize_lp/, "const int16_t *coeff_ptr, intptr_t n_coeffs, const int16_t *round_ptr, const int16_t *quant_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-  specialize qw/av1_quantize_lp sse2 avx2/;
+  specialize qw/av1_quantize_lp sse2 avx2 neon/;
 
   add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
   specialize qw/av1_quantize_fp_32x32 neon avx2/;

diff --git a/av1/encoder/arm/neon/quantize_neon.c b/av1/encoder/arm/neon/quantize_neon.c
index b0e8950..7e028f5 100644
--- a/av1/encoder/arm/neon/quantize_neon.c
+++ b/av1/encoder/arm/neon/quantize_neon.c

@@ -133,7 +133,8 @@
                           const int16_t *round_ptr, const int16_t *quant_ptr,
                           int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
                           const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                          const int16_t *scan) {
+                          const int16_t *scan, const int16_t *iscan) {
+  (void)scan;
   // Quantization pass: All coefficients with index >= zero_flag are
   // skippable. Note: zero_flag can be zero.
   const int16x8_t v_zero = vdupq_n_s16(0);
@@ -149,7 +150,7 @@
   v_dequant = vsetq_lane_s16(dequant_ptr[0], v_dequant, 0);
   // process dc and the first seven ac coeffs
   {
-    const int16x8_t v_iscan = vld1q_s16(&scan[0]);
+    const int16x8_t v_iscan = vld1q_s16(&iscan[0]);
     const int16x8_t v_coeff = vld1q_s16(coeff_ptr);
     const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
     const int16x8_t v_abs = vabsq_s16(v_coeff);
@@ -174,7 +175,7 @@
   }
   // now process the rest of the ac coeffs
   for (int i = 8; i < count; i += 8) {
-    const int16x8_t v_iscan = vld1q_s16(&scan[i]);
+    const int16x8_t v_iscan = vld1q_s16(&iscan[i]);
     const int16x8_t v_coeff = vld1q_s16(coeff_ptr + i);
     const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
     const int16x8_t v_abs = vabsq_s16(v_coeff);

diff --git a/test/quantize_lp_func_test.cc b/test/quantize_lp_func_test.cc
index 898b810..f398623 100644
--- a/test/quantize_lp_func_test.cc
+++ b/test/quantize_lp_func_test.cc

@@ -348,4 +348,18 @@
                          ::testing::ValuesIn(kQParamArraySSE2));
 #endif
 
+#if HAVE_NEON
+const QuantizeParam kQParamArrayNEON[] = {
+  make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_neon,
+             static_cast<TX_SIZE>(TX_16X16), AOM_BITS_8),
+  make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_neon,
+             static_cast<TX_SIZE>(TX_8X8), AOM_BITS_8),
+  make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_neon,
+             static_cast<TX_SIZE>(TX_4X4), AOM_BITS_8)
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, FullPrecisionQuantizeLpTest,
+                         ::testing::ValuesIn(kQParamArrayNEON));
+#endif
+
 }  // namespace
commit	76d5cfb6e3638a06654733ac62912fef8ea2959a	[log] [tgz]
author	Fyodor Kyslov <kyslov@google.com>	Mon Oct 11 17:45:59 2021 -0700
committer	Fyodor Kyslov <kyslov@google.com>	Tue Oct 12 18:26:55 2021 +0000
tree	35d168514485195b8e9d957eacf170a1b8453782
parent	714f6159840b1805c625482ab260a204bb3f4caf [diff]