Fix high bit-depth quantization process
Scale the rounding factor according to the scaling factor applied
to the quantization step size. This resolves a compression
performance regression in 32x32 and above transform size.
BUG=aomedia:599
Change-Id: Id3fc9a46c4a8843ff5d77ccaa59ee3112b12d7f4
diff --git a/av1/encoder/av1_quantize.c b/av1/encoder/av1_quantize.c
index 63727df..84d4bbf 100644
--- a/av1/encoder/av1_quantize.c
+++ b/av1/encoder/av1_quantize.c
@@ -1547,7 +1547,7 @@
#endif
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp = abs_coeff + round_ptr[rc != 0];
+ const int64_t tmp = abs_coeff + (round_ptr[rc != 0] >> log_scale);
#if CONFIG_AOM_QM
const uint32_t abs_qcoeff =
(uint32_t)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS));
diff --git a/av1/encoder/x86/av1_highbd_quantize_sse4.c b/av1/encoder/x86/av1_highbd_quantize_sse4.c
index fa56260..764c4c5 100644
--- a/av1/encoder/x86/av1_highbd_quantize_sse4.c
+++ b/av1/encoder/x86/av1_highbd_quantize_sse4.c
@@ -133,7 +133,8 @@
coeff[0] = _mm_loadu_si128((__m128i const *)src);
qparam[0] =
- _mm_set_epi32(round_ptr[1], round_ptr[1], round_ptr[1], round_ptr[0]);
+ _mm_set_epi32(round_ptr[1] >> log_scale, round_ptr[1] >> log_scale,
+ round_ptr[1] >> log_scale, round_ptr[0] >> log_scale);
qparam[1] = _mm_set_epi64x(quant_ptr[1], quant_ptr[0]);
qparam[2] = _mm_set_epi64x(dequant_ptr[1], dequant_ptr[0]);