Fix overflow in av1_quantize_fp_neon. Add tests.

Change-Id: I55e3d9420c6614abf7c72ca7b1a2a34e74b324e2
diff --git a/av1/encoder/arm/neon/quantize_neon.c b/av1/encoder/arm/neon/quantize_neon.c
index 1220402..75ad319 100644
--- a/av1/encoder/arm/neon/quantize_neon.c
+++ b/av1/encoder/arm/neon/quantize_neon.c
@@ -54,7 +54,8 @@
     const int16x8_t v_iscan = vld1q_s16(&iscan[0]);
     const int16x8_t v_coeff = load_tran_low_to_s16q(&coeff_ptr[0]);
     const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
-    const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
+    const int16x8_t v_abs = vabsq_s16(v_coeff);
+    const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round);
     const int32x4_t v_tmp_lo =
         vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
     const int32x4_t v_tmp_hi =
@@ -79,7 +80,8 @@
     const int16x8_t v_iscan = vld1q_s16(&iscan[i]);
     const int16x8_t v_coeff = load_tran_low_to_s16q(&coeff_ptr[i]);
     const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
-    const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
+    const int16x8_t v_abs = vabsq_s16(v_coeff);
+    const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round);
     const int32x4_t v_tmp_lo =
         vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
     const int32x4_t v_tmp_hi =
diff --git a/test/quantize_func_test.cc b/test/quantize_func_test.cc
index 2807b91..a8a5058 100644
--- a/test/quantize_func_test.cc
+++ b/test/quantize_func_test.cc
@@ -497,6 +497,24 @@
                         ::testing::ValuesIn(kQParamArraySSE2));
 #endif
 
+#if HAVE_NEON
+const QuantizeParam kQParamArrayNEON[] = {
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon,
+             static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon,
+             static_cast<TX_SIZE>(TX_4X16), TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon,
+             static_cast<TX_SIZE>(TX_16X4), TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon,
+             static_cast<TX_SIZE>(TX_8X32), TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon,
+             static_cast<TX_SIZE>(TX_32X8), TYPE_FP, AOM_BITS_8)
+};
+
+INSTANTIATE_TEST_CASE_P(NEON, QuantizeTest,
+                        ::testing::ValuesIn(kQParamArrayNEON));
+#endif
+
 #if HAVE_SSSE3 && ARCH_X86_64
 INSTANTIATE_TEST_CASE_P(
     SSSE3, QuantizeTest,
diff --git a/test/test.cmake b/test/test.cmake
index 50b5625..edf88a8 100644
--- a/test/test.cmake
+++ b/test/test.cmake
@@ -206,6 +206,7 @@
               "${AOM_ROOT}/test/obmc_sad_test.cc"
               "${AOM_ROOT}/test/obmc_variance_test.cc"
               "${AOM_ROOT}/test/pickrst_test.cc"
+              "${AOM_ROOT}/test/quantize_func_test.cc"
               "${AOM_ROOT}/test/sad_test.cc"
               "${AOM_ROOT}/test/subtract_test.cc"
               "${AOM_ROOT}/test/reconinter_test.cc"
@@ -221,7 +222,6 @@
               "${AOM_ROOT}/test/av1_highbd_iht_test.cc"
               "${AOM_ROOT}/test/av1_quantize_test.cc"
               "${AOM_ROOT}/test/corner_match_test.cc"
-              "${AOM_ROOT}/test/quantize_func_test.cc"
               "${AOM_ROOT}/test/simd_cmp_sse4.cc")
 
   if(HAVE_SSE4_1)