Fix overflow in av1_quantize_fp_neon. Add tests.
Change-Id: I55e3d9420c6614abf7c72ca7b1a2a34e74b324e2
diff --git a/av1/encoder/arm/neon/quantize_neon.c b/av1/encoder/arm/neon/quantize_neon.c
index 1220402..75ad319 100644
--- a/av1/encoder/arm/neon/quantize_neon.c
+++ b/av1/encoder/arm/neon/quantize_neon.c
@@ -54,7 +54,8 @@
const int16x8_t v_iscan = vld1q_s16(&iscan[0]);
const int16x8_t v_coeff = load_tran_low_to_s16q(&coeff_ptr[0]);
const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
- const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
+ const int16x8_t v_abs = vabsq_s16(v_coeff);
+ const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round);
const int32x4_t v_tmp_lo =
vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
const int32x4_t v_tmp_hi =
@@ -79,7 +80,8 @@
const int16x8_t v_iscan = vld1q_s16(&iscan[i]);
const int16x8_t v_coeff = load_tran_low_to_s16q(&coeff_ptr[i]);
const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
- const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
+ const int16x8_t v_abs = vabsq_s16(v_coeff);
+ const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round);
const int32x4_t v_tmp_lo =
vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
const int32x4_t v_tmp_hi =
diff --git a/test/quantize_func_test.cc b/test/quantize_func_test.cc
index 2807b91..a8a5058 100644
--- a/test/quantize_func_test.cc
+++ b/test/quantize_func_test.cc
@@ -497,6 +497,24 @@
::testing::ValuesIn(kQParamArraySSE2));
#endif
+#if HAVE_NEON
+const QuantizeParam kQParamArrayNEON[] = {
+ make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon,
+ static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8),
+ make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon,
+ static_cast<TX_SIZE>(TX_4X16), TYPE_FP, AOM_BITS_8),
+ make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon,
+ static_cast<TX_SIZE>(TX_16X4), TYPE_FP, AOM_BITS_8),
+ make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon,
+ static_cast<TX_SIZE>(TX_8X32), TYPE_FP, AOM_BITS_8),
+ make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon,
+ static_cast<TX_SIZE>(TX_32X8), TYPE_FP, AOM_BITS_8)
+};
+
+INSTANTIATE_TEST_CASE_P(NEON, QuantizeTest,
+ ::testing::ValuesIn(kQParamArrayNEON));
+#endif
+
#if HAVE_SSSE3 && ARCH_X86_64
INSTANTIATE_TEST_CASE_P(
SSSE3, QuantizeTest,
diff --git a/test/test.cmake b/test/test.cmake
index 50b5625..edf88a8 100644
--- a/test/test.cmake
+++ b/test/test.cmake
@@ -206,6 +206,7 @@
"${AOM_ROOT}/test/obmc_sad_test.cc"
"${AOM_ROOT}/test/obmc_variance_test.cc"
"${AOM_ROOT}/test/pickrst_test.cc"
+ "${AOM_ROOT}/test/quantize_func_test.cc"
"${AOM_ROOT}/test/sad_test.cc"
"${AOM_ROOT}/test/subtract_test.cc"
"${AOM_ROOT}/test/reconinter_test.cc"
@@ -221,7 +222,6 @@
"${AOM_ROOT}/test/av1_highbd_iht_test.cc"
"${AOM_ROOT}/test/av1_quantize_test.cc"
"${AOM_ROOT}/test/corner_match_test.cc"
- "${AOM_ROOT}/test/quantize_func_test.cc"
"${AOM_ROOT}/test/simd_cmp_sse4.cc")
if(HAVE_SSE4_1)