SIMD updates & unit testing
Change-Id: I6e35237c3045d0b421300ae6f62c3fcc9d1f7e70
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index 9ad5d4b..a1be01d 100755
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -551,7 +551,7 @@
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
if (aom_config("CONFIG_EXTQUANT") eq "yes") {
add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int32_t *zbin_ptr, const int32_t *round_ptr, const int32_t *quant_ptr, const int32_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int32_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/aom_highbd_quantize_b sse2 avx2/;
+ specialize qw/aom_highbd_quantize_b sse2/;
add_proto qw/void aom_highbd_quantize_b_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int32_t *zbin_ptr, const int32_t *round_ptr, const int32_t *quant_ptr, const int32_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int32_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
diff --git a/aom_dsp/x86/highbd_quantize_intrin_sse2.c b/aom_dsp/x86/highbd_quantize_intrin_sse2.c
index 4f48663..13d6530 100644
--- a/aom_dsp/x86/highbd_quantize_intrin_sse2.c
+++ b/aom_dsp/x86/highbd_quantize_intrin_sse2.c
@@ -89,7 +89,11 @@
int k = 4 * i + j;
const int64_t tmp3 = abs_coeff[j] + round_ptr[k != 0];
const int64_t tmp4 = ((tmp3 * quant_ptr[k != 0]) >> 16) + tmp3;
+#if CONFIG_EXTQUANT
+ const int32_t abs_qcoeff =
+#else
const uint32_t abs_qcoeff =
+#endif
(uint32_t)((tmp4 * quant_shift_ptr[k != 0]) >> 16);
qcoeff_ptr[k] = (int)(abs_qcoeff ^ coeff_sign[j]) - coeff_sign[j];
#if CONFIG_EXTQUANT
diff --git a/av1/encoder/x86/av1_highbd_quantize_avx2.c b/av1/encoder/x86/av1_highbd_quantize_avx2.c
index bed144e..b63a9a6 100644
--- a/av1/encoder/x86/av1_highbd_quantize_avx2.c
+++ b/av1/encoder/x86/av1_highbd_quantize_avx2.c
@@ -74,7 +74,7 @@
__m256i *eob) {
const __m256i abs_coeff = _mm256_abs_epi32(*c);
#if CONFIG_EXTQUANT
- const __m256i round = _mm256_set1_epi64x((1 << QUANT_TABLE_BITS) >> 1);
+ const __m256i round = _mm256_set1_epi32((1 << QUANT_TABLE_BITS) >> 1);
#endif
__m256i q = _mm256_add_epi32(abs_coeff, qp[0]);
diff --git a/test/av1_quantize_test.cc b/test/av1_quantize_test.cc
index 5714ba4..d319f1e 100644
--- a/test/av1_quantize_test.cc
+++ b/test/av1_quantize_test.cc
@@ -21,13 +21,21 @@
#include "av1/common/scan.h"
namespace {
-
+#if CONFIG_EXTQUANT
+typedef void (*QuantizeFpFunc)(
+ const tran_low_t *coeff_ptr, intptr_t count, const int32_t *zbin_ptr,
+ const int32_t *round_ptr, const int32_t *quant_ptr,
+ const int32_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int32_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan, int log_scale);
+#else
typedef void (*QuantizeFpFunc)(
const tran_low_t *coeff_ptr, intptr_t count, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan, int log_scale);
+#endif
struct QuantizeFuncParams {
QuantizeFuncParams(QuantizeFpFunc qF = NULL, QuantizeFpFunc qRefF = NULL,
@@ -42,14 +50,31 @@
const int numTests = 1000;
const int maxSize = 1024;
+#if CONFIG_EXTQUANT
+const int roundFactorRange = 64;
+const int dequantRange = 1048576;
+#else
const int roundFactorRange = 127;
const int dequantRange = 32768;
+#endif
const int coeffRange = (1 << 20) - 1;
class AV1QuantizeTest : public ::testing::TestWithParam<QuantizeFuncParams> {
public:
void RunQuantizeTest() {
ACMRandom rnd(ACMRandom::DeterministicSeed());
+#if CONFIG_EXTQUANT
+ DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[maxSize]);
+ DECLARE_ALIGNED(16, int32_t, zbin_ptr[8]);
+ DECLARE_ALIGNED(16, int32_t, round_ptr[8]);
+ DECLARE_ALIGNED(16, int32_t, quant_ptr[8]);
+ DECLARE_ALIGNED(16, int32_t, quant_shift_ptr[8]);
+ DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[maxSize]);
+ DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[maxSize]);
+ DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[maxSize]);
+ DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[maxSize]);
+ DECLARE_ALIGNED(16, int32_t, dequant_ptr[8]);
+#else
DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[maxSize]);
DECLARE_ALIGNED(16, int16_t, zbin_ptr[8]);
DECLARE_ALIGNED(16, int16_t, round_ptr[8]);
@@ -60,6 +85,7 @@
DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[maxSize]);
DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[maxSize]);
DECLARE_ALIGNED(16, int16_t, dequant_ptr[8]);
+#endif
uint16_t eob;
uint16_t ref_eob;
int err_count_total = 0;
@@ -79,12 +105,23 @@
}
for (int j = 0; j < 2; j++) {
+#if CONFIG_EXTQUANT
+ zbin_ptr[j] = rnd.Rand31();
+ quant_shift_ptr[j] = rnd.Rand31();
+ // int32_t positive
+ dequant_ptr[j] = abs(rnd(dequantRange));
+ quant_ptr[j] = static_cast<int32_t>(
+ (1 << (16 + QUANT_FP_BITS + QUANT_TABLE_BITS)) / dequant_ptr[j]);
+ round_ptr[j] = (abs(rnd(roundFactorRange)) * dequant_ptr[j]) >>
+ (7 + QUANT_TABLE_BITS);
+#else
zbin_ptr[j] = rnd.Rand16();
quant_shift_ptr[j] = rnd.Rand16();
// int16_t positive
dequant_ptr[j] = abs(rnd(dequantRange));
quant_ptr[j] = static_cast<int16_t>((1 << 16) / dequant_ptr[j]);
round_ptr[j] = (abs(rnd(roundFactorRange)) * dequant_ptr[j]) >> 7;
+#endif
}
for (int j = 2; j < 8; ++j) {
zbin_ptr[j] = zbin_ptr[1];
@@ -125,6 +162,18 @@
void RunEobTest() {
ACMRandom rnd(ACMRandom::DeterministicSeed());
+#if CONFIG_EXTQUANT
+ DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[maxSize]);
+ DECLARE_ALIGNED(16, int32_t, zbin_ptr[8]);
+ DECLARE_ALIGNED(16, int32_t, round_ptr[8]);
+ DECLARE_ALIGNED(16, int32_t, quant_ptr[8]);
+ DECLARE_ALIGNED(16, int32_t, quant_shift_ptr[8]);
+ DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[maxSize]);
+ DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[maxSize]);
+ DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[maxSize]);
+ DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[maxSize]);
+ DECLARE_ALIGNED(16, int32_t, dequant_ptr[8]);
+#else
DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[maxSize]);
DECLARE_ALIGNED(16, int16_t, zbin_ptr[8]);
DECLARE_ALIGNED(16, int16_t, round_ptr[8]);
@@ -135,6 +184,7 @@
DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[maxSize]);
DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[maxSize]);
DECLARE_ALIGNED(16, int16_t, dequant_ptr[8]);
+#endif
uint16_t eob;
uint16_t ref_eob;
int count = params_.coeffCount;
@@ -155,12 +205,23 @@
coeff_ptr[rnd(count)] = rnd(coeffRange);
for (int j = 0; j < 2; j++) {
+#if CONFIG_EXTQUANT
+ zbin_ptr[j] = rnd.Rand31();
+ quant_shift_ptr[j] = rnd.Rand31();
+ // int32_t positive
+ dequant_ptr[j] = abs(rnd(dequantRange));
+ quant_ptr[j] =
+ ((1 << (16 + QUANT_FP_BITS + QUANT_TABLE_BITS)) / dequant_ptr[j]);
+ round_ptr[j] = (abs(rnd(roundFactorRange)) * dequant_ptr[j]) >>
+ (7 + QUANT_TABLE_BITS);
+#else
zbin_ptr[j] = rnd.Rand16();
quant_shift_ptr[j] = rnd.Rand16();
// int16_t positive
dequant_ptr[j] = abs(rnd(dequantRange));
quant_ptr[j] = (1 << 16) / dequant_ptr[j];
round_ptr[j] = (abs(rnd(roundFactorRange)) * dequant_ptr[j]) >> 7;
+#endif
}
for (int j = 2; j < 8; ++j) {
zbin_ptr[j] = zbin_ptr[1];
@@ -207,7 +268,7 @@
TEST_P(AV1QuantizeTest, BitExactCheck) { RunQuantizeTest(); }
TEST_P(AV1QuantizeTest, EobVerify) { RunEobTest(); }
-#if HAVE_SSE4_1 && !CONFIG_EXTQUANT
+#if HAVE_SSE4_1 && CONFIG_EXTQUANT
const QuantizeFuncParams qfps[4] = {
QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1, &av1_highbd_quantize_fp_c,
16),
@@ -222,7 +283,7 @@
INSTANTIATE_TEST_SUITE_P(SSE4_1, AV1QuantizeTest, ::testing::ValuesIn(qfps));
#endif // HAVE_SSE4_1
-#if HAVE_AVX2 && !CONFIG_EXTQUANT
+#if HAVE_AVX2 && CONFIG_EXTQUANT
const QuantizeFuncParams qfps_avx2[4] = {
QuantizeFuncParams(&av1_highbd_quantize_fp_avx2, &av1_highbd_quantize_fp_c,
16),
diff --git a/test/quantize_func_test.cc b/test/quantize_func_test.cc
index c1e5539..ba2698b 100644
--- a/test/quantize_func_test.cc
+++ b/test/quantize_func_test.cc
@@ -439,6 +439,38 @@
INSTANTIATE_TEST_SUITE_P(AVX2, QuantizeTest,
::testing::ValuesIn(kQParamArrayAvx2));
+#elif HAVE_AVX2 && CONFIG_EXTQUANT
+const QuantizeParam kQParamArrayAvx2[] = {
+ make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>,
+ &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_avx2>,
+ static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8),
+ make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>,
+ &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_avx2>,
+ static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_10),
+ make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>,
+ &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_avx2>,
+ static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_12),
+ make_tuple(&highbd_quan32x32_wrapper<av1_highbd_quantize_fp_c>,
+ &highbd_quan32x32_wrapper<av1_highbd_quantize_fp_avx2>,
+ static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_8),
+ make_tuple(&highbd_quan32x32_wrapper<av1_highbd_quantize_fp_c>,
+ &highbd_quan32x32_wrapper<av1_highbd_quantize_fp_avx2>,
+ static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_10),
+ make_tuple(&highbd_quan32x32_wrapper<av1_highbd_quantize_fp_c>,
+ &highbd_quan32x32_wrapper<av1_highbd_quantize_fp_avx2>,
+ static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_12),
+ make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>,
+ &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_avx2>,
+ static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_8),
+ make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>,
+ &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_avx2>,
+ static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_10),
+ make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>,
+ &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_avx2>,
+ static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_12),
+};
+INSTANTIATE_TEST_SUITE_P(AVX2, QuantizeTest,
+ ::testing::ValuesIn(kQParamArrayAvx2));
#endif // HAVE_AVX2
#if HAVE_SSE2 && !CONFIG_EXTQUANT
@@ -528,9 +560,33 @@
INSTANTIATE_TEST_SUITE_P(SSE2, QuantizeTest,
::testing::ValuesIn(kQParamArraySSE2));
+#elif HAVE_SSE2 && CONFIG_EXTQUANT
+const QuantizeParam kQParamArraySSE2[] = {
+ make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_sse2,
+ static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),
+ make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_sse2,
+ static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_10),
+ make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_sse2,
+ static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12),
+ make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_sse2,
+ static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8),
+ make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_sse2,
+ static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_10),
+ make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_sse2,
+ static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12),
+ make_tuple(&aom_highbd_quantize_b_64x64_c, &aom_highbd_quantize_b_64x64_sse2,
+ static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8),
+ make_tuple(&aom_highbd_quantize_b_64x64_c, &aom_highbd_quantize_b_64x64_sse2,
+ static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_10),
+ make_tuple(&aom_highbd_quantize_b_64x64_c, &aom_highbd_quantize_b_64x64_sse2,
+ static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_12)
+};
+
+INSTANTIATE_TEST_SUITE_P(SSE2, QuantizeTest,
+ ::testing::ValuesIn(kQParamArraySSE2));
#endif
-#if HAVE_NEON
+#if HAVE_NEON && !CONFIG_EXTQUANT
const QuantizeParam kQParamArrayNEON[] = {
make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon,
static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8),