Refactor quantization C code. This commit de-duplicates C reference quantization code and unifies quantization matrix (QM) and non-QM code paths when there is no SIMD. The reorganisation also will facilitate re-using SIMD quant functions for QM when the matrix is flat, as is the default when AOM_QM is enabled. Change-Id: Idbfdac9eb9a31adcffe734aac1877d58b86fab77
diff --git a/aom_dsp/aom_dsp_common.h b/aom_dsp/aom_dsp_common.h index 5b10432..a3cea56 100644 --- a/aom_dsp/aom_dsp_common.h +++ b/aom_dsp/aom_dsp_common.h
@@ -52,10 +52,9 @@ #define UNLIKELY(v) (v) #endif -#if CONFIG_AOM_QM typedef uint16_t qm_val_t; #define AOM_QM_BITS 5 -#endif + #if CONFIG_HIGHBITDEPTH // Note: // tran_low_t is the datatype used for final transform coefficients.
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl index 726b4f2..f54b5150 100755 --- a/aom_dsp/aom_dsp_rtcd_defs.pl +++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -517,23 +517,7 @@ # # Quantization # -if (aom_config("CONFIG_AOM_QM") eq "yes") { - if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { - add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; - - add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; - - add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; - - add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; - - add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; - - add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; - - } # CONFIG_AV1_ENCODER -} else { - if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { +if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/aom_quantize_b sse2/, "$ssse3_x86_64", "$avx_x86_64"; @@ -541,17 +525,18 @@ specialize qw/aom_quantize_b_32x32/, "$ssse3_x86_64", "$avx_x86_64"; add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; +} # CONFIG_AV1_ENCODER - add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/aom_highbd_quantize_b sse2 avx2/; +if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { + add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/aom_highbd_quantize_b sse2 avx2/; - add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/aom_highbd_quantize_b_32x32 sse2/; + add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/aom_highbd_quantize_b_32x32 sse2/; - add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - } # CONFIG_AV1_ENCODER -} # CONFIG_AOM_QM +} # CONFIG_AV1_ENCODER if (aom_config("CONFIG_AV1") eq "yes") { # # Alpha blending with mask
diff --git a/aom_dsp/quantize.c b/aom_dsp/quantize.c index fe98b60..3e8f0d4 100644 --- a/aom_dsp/quantize.c +++ b/aom_dsp/quantize.c
@@ -12,18 +12,14 @@ #include "aom_dsp/quantize.h" #include "aom_mem/aom_mem.h" -static void quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, - int skip_block, const int16_t *zbin_ptr, - const int16_t *round_ptr, - const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, - tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan, -#if CONFIG_AOM_QM - const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr, -#endif - const int log_scale) { +void quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *zbin_ptr, + const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, + uint16_t *eob_ptr, const int16_t *scan, + const int16_t *iscan, const qm_val_t *qm_ptr, + const qm_val_t *iqm_ptr, const int log_scale) { const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale), ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale) }; const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 }; @@ -37,20 +33,12 @@ // Pre-scan pass for (i = (int)n_coeffs - 1; i >= 0; i--) { const int rc = scan[i]; -#if CONFIG_AOM_QM - const qm_val_t wt = qm_ptr[rc]; + const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS); const int coeff = coeff_ptr[rc] * wt; -#else - const int coeff = coeff_ptr[rc]; -#endif // CONFIG_AOM_QM -#if CONFIG_AOM_QM if (coeff < (zbins[rc != 0] << AOM_QM_BITS) && coeff > (nzbins[rc != 0] << AOM_QM_BITS)) non_zero_count--; -#else - if (coeff < zbins[rc != 0] && coeff > nzbins[rc != 0]) non_zero_count--; -#endif // CONFIG_AOM_QM else break; } @@ -64,35 +52,21 @@ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; int tmp32; -#if CONFIG_AOM_QM - const qm_val_t wt = qm_ptr[rc]; + const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS); if (abs_coeff * wt >= (zbins[rc != 0] << AOM_QM_BITS)) { -#else - if (abs_coeff >= zbins[rc != 0]) { -#endif // CONFIG_AOM_QM int64_t tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale), INT16_MIN, INT16_MAX); -#if CONFIG_AOM_QM tmp *= wt; tmp32 = (int)(((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) * quant_shift_ptr[rc != 0]) >> (16 - log_scale + AOM_QM_BITS)); // quantization -#else - tmp32 = (int)(((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) * - quant_shift_ptr[rc != 0]) >> - (16 - log_scale)); // quantization -#endif // CONFIG_AOM_QM qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; -#if CONFIG_AOM_QM + const int iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS); const int dequant = - (dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> + (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant / (1 << log_scale); -#else - dqcoeff_ptr[rc] = - qcoeff_ptr[rc] * dequant_ptr[rc != 0] / (1 << log_scale); -#endif // CONFIG_AOM_QM if (tmp32) eob = i; } @@ -101,25 +75,111 @@ *eob_ptr = eob + 1; } +void highbd_quantize_b_helper_c( + const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, + const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr, + const qm_val_t *iqm_ptr, const int log_scale) { + int i, eob = -1; + const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale), + ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale) }; + const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 }; + int dequant; +#if CONFIG_TX64X64 + int idx_arr[4096]; +#else + int idx_arr[1024]; +#endif + (void)iscan; + int idx = 0; + + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + + if (!skip_block) { + // Pre-scan pass + for (i = 0; i < n_coeffs; i++) { + const int rc = scan[i]; + const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS); + const int coeff = coeff_ptr[rc] * wt; + + // If the coefficient is out of the base ZBIN range, keep it for + // quantization. + if (coeff >= (zbins[rc != 0] << AOM_QM_BITS) || + coeff <= (nzbins[rc != 0] << AOM_QM_BITS)) + idx_arr[idx++] = i; + } + + // Quantization pass: only process the coefficients selected in + // pre-scan pass. Note: idx can be zero. + for (i = 0; i < idx; i++) { + const int rc = scan[idx_arr[i]]; + const int coeff = coeff_ptr[rc]; + const int coeff_sign = (coeff >> 31); + const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS); + const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + const int64_t tmp1 = + abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale); + const int64_t tmpw = tmp1 * wt; + const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw; + const uint32_t abs_qcoeff = (uint32_t)( + (tmp2 * quant_shift_ptr[rc != 0]) >> (16 - log_scale + AOM_QM_BITS)); + qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); + dequant = (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> + AOM_QM_BITS; + dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / (1 << log_scale); + if (abs_qcoeff) eob = idx_arr[i]; + } + } + *eob_ptr = eob + 1; +} + +void quantize_dc_helper(const tran_low_t *coeff_ptr, int n_coeffs, + int skip_block, const int16_t *round_ptr, + const int16_t quant, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, + uint16_t *eob_ptr, const qm_val_t *qm_ptr, + const qm_val_t *iqm_ptr, const int log_scale) { + const int rc = 0; + const int coeff = coeff_ptr[rc]; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + int64_t tmp, eob = -1; + int32_t tmp32; + int dequant; + + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + + if (!skip_block) { + const int wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS); + const int iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS); + tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale), + INT16_MIN, INT16_MAX); + tmp32 = (int32_t)((tmp * wt * quant) >> (16 - log_scale + AOM_QM_BITS)); + qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; + dequant = (dequant_ptr * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; + dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / (1 << log_scale); + if (tmp32) eob = 0; + } + *eob_ptr = eob + 1; +} + +/* These functions should only be called when quantisation matrices + are not used. */ void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, - const int16_t *iscan -#if CONFIG_AOM_QM - , - const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr -#endif - ) { + const int16_t *iscan) { quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, - dequant_ptr, eob_ptr, scan, iscan, -#if CONFIG_AOM_QM - qm_ptr, iqm_ptr, -#endif - 0); + dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 0); } void aom_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, @@ -128,19 +188,10 @@ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan -#if CONFIG_AOM_QM - , - const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr -#endif - ) { + const int16_t *scan, const int16_t *iscan) { quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, - dequant_ptr, eob_ptr, scan, iscan, -#if CONFIG_AOM_QM - qm_ptr, iqm_ptr, -#endif - 1); + dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 1); } #if CONFIG_TX64X64 @@ -150,427 +201,28 @@ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan -#if CONFIG_AOM_QM - , - const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr -#endif - ) { + const int16_t *scan, const int16_t *iscan) { quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, - dequant_ptr, eob_ptr, scan, iscan, -#if CONFIG_AOM_QM - qm_ptr, iqm_ptr, -#endif - 2); + dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 2); } #endif // CONFIG_TX64X64 -#if CONFIG_AOM_QM -void aom_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, - const int16_t *round_ptr, const int16_t quant, - tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t dequant_ptr, uint16_t *eob_ptr, - const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) { - const int rc = 0; - const int coeff = coeff_ptr[rc]; - const int coeff_sign = (coeff >> 31); - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - int64_t tmp, eob = -1; - int32_t tmp32; - int dequant = - (dequant_ptr * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; - - memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); - - if (!skip_block) { - tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); - tmp32 = (int32_t)((tmp * qm_ptr[rc] * quant) >> (16 + AOM_QM_BITS)); - qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; - dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant; - if (tmp32) eob = 0; - } - *eob_ptr = eob + 1; -} - -void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, - const int16_t *round_ptr, const int16_t quant, - tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t dequant_ptr, uint16_t *eob_ptr, - const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) { - const int n_coeffs = 1024; - const int rc = 0; - const int coeff = coeff_ptr[rc]; - const int coeff_sign = (coeff >> 31); - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - int64_t tmp, eob = -1; - int32_t tmp32; - int dequant; - - memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); - - if (!skip_block) { - tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1), - INT16_MIN, INT16_MAX); - tmp32 = (int32_t)((tmp * qm_ptr[rc] * quant) >> (15 + AOM_QM_BITS)); - qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; - dequant = - (dequant_ptr * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; - dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 2; - if (tmp32) eob = 0; - } - *eob_ptr = eob + 1; -} - -#if CONFIG_TX64X64 -void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block, - const int16_t *round_ptr, const int16_t quant, - tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t dequant_ptr, uint16_t *eob_ptr, - const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) { - const int n_coeffs = 1024; - const int rc = 0; - const int coeff = coeff_ptr[rc]; - const int coeff_sign = (coeff >> 31); - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - int64_t tmp, eob = -1; - int32_t tmp32; - int dequant; - - memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); - - if (!skip_block) { - tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2), - INT16_MIN, INT16_MAX); - tmp32 = (int32_t)((tmp * qm_ptr[rc] * quant) >> (14 + AOM_QM_BITS)); - qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; - dequant = - (dequant_ptr * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; - dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 4; - if (tmp32) eob = 0; - } - *eob_ptr = eob + 1; -} -#endif // CONFIG_TX64X64 - -void aom_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, - int skip_block, const int16_t *round_ptr, - const int16_t quant, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, - uint16_t *eob_ptr, const qm_val_t *qm_ptr, - const qm_val_t *iqm_ptr) { - int eob = -1; - int dequant = - (dequant_ptr * iqm_ptr[0] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; - - memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); - - if (!skip_block) { - const int coeff = coeff_ptr[0]; - const int coeff_sign = (coeff >> 31); - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - const int64_t tmp = abs_coeff + round_ptr[0]; - const uint32_t abs_qcoeff = - (uint32_t)((tmp * qm_ptr[0] * quant) >> (16 + AOM_QM_BITS)); - qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); - dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant; - if (abs_qcoeff) eob = 0; - } - *eob_ptr = eob + 1; -} - -void aom_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, - const int16_t *round_ptr, const int16_t quant, - tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, - const int16_t dequant_ptr, uint16_t *eob_ptr, - const qm_val_t *qm_ptr, - const qm_val_t *iqm_ptr) { - const int n_coeffs = 1024; - int eob = -1; - int dequant; - - memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); - - if (!skip_block) { - const int coeff = coeff_ptr[0]; - const int coeff_sign = (coeff >> 31); - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 1); - const uint32_t abs_qcoeff = - (uint32_t)((tmp * qm_ptr[0] * quant) >> (15 + AOM_QM_BITS)); - qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); - dequant = - (dequant_ptr * iqm_ptr[0] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; - dqcoeff_ptr[0] = (qcoeff_ptr[0] * dequant) / 2; - if (abs_qcoeff) eob = 0; - } - *eob_ptr = eob + 1; -} - -#if CONFIG_TX64X64 -void aom_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block, - const int16_t *round_ptr, const int16_t quant, - tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, - const int16_t dequant_ptr, uint16_t *eob_ptr, - const qm_val_t *qm_ptr, - const qm_val_t *iqm_ptr) { - const int n_coeffs = 1024; - int eob = -1; - int dequant; - - memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); - - if (!skip_block) { - const int coeff = coeff_ptr[0]; - const int coeff_sign = (coeff >> 31); - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 2); - const uint32_t abs_qcoeff = - (uint32_t)((tmp * qm_ptr[0] * quant) >> (14 + AOM_QM_BITS)); - qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); - dequant = - (dequant_ptr * iqm_ptr[0] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; - dqcoeff_ptr[0] = (qcoeff_ptr[0] * dequant) / 4; - if (abs_qcoeff) eob = 0; - } - *eob_ptr = eob + 1; -} -#endif // CONFIG_TX64X64 - -void aom_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, - int skip_block, const int16_t *zbin_ptr, - const int16_t *round_ptr, const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, - tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan, - const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) { - int i, non_zero_count = (int)n_coeffs, eob = -1; - const int zbins[2] = { zbin_ptr[0], zbin_ptr[1] }; - const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 }; - int dequant; - (void)iscan; - - memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); - - if (!skip_block) { - // Pre-scan pass - for (i = (int)n_coeffs - 1; i >= 0; i--) { - const int rc = scan[i]; - const qm_val_t wt = qm_ptr[rc]; - const int coeff = coeff_ptr[rc] * wt; - - if (coeff < (zbins[rc != 0] << AOM_QM_BITS) && - coeff > (nzbins[rc != 0] << AOM_QM_BITS)) - non_zero_count--; - else - break; - } - - // Quantization pass: All coefficients with index >= zero_flag are - // skippable. Note: zero_flag can be zero. - for (i = 0; i < non_zero_count; i++) { - const int rc = scan[i]; - const int coeff = coeff_ptr[rc]; - const qm_val_t wt = qm_ptr[rc]; - const int coeff_sign = (coeff >> 31); - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - - if (abs_coeff * wt >= (zbins[rc != 0] << AOM_QM_BITS)) { - const int64_t tmp1 = abs_coeff + round_ptr[rc != 0]; - const int64_t tmpw = tmp1 * wt; - const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw; - const uint32_t abs_qcoeff = - (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> (16 + AOM_QM_BITS)); - qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); - dequant = - (dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> - AOM_QM_BITS; - dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant; - if (abs_qcoeff) eob = i; - } - } - } - *eob_ptr = eob + 1; -} - -void aom_highbd_quantize_b_32x32_c( - const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, - const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr, - const qm_val_t *iqm_ptr) { - const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 1), - ROUND_POWER_OF_TWO(zbin_ptr[1], 1) }; - const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 }; - - int idx = 0; - int idx_arr[1024]; - int i, eob = -1; - int dequant; - (void)iscan; - - memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); - - if (!skip_block) { - // Pre-scan pass - for (i = 0; i < n_coeffs; i++) { - const int rc = scan[i]; - const qm_val_t wt = qm_ptr[rc]; - const int coeff = coeff_ptr[rc] * wt; - - // If the coefficient is out of the base ZBIN range, keep it for - // quantization. - if (coeff >= (zbins[rc != 0] << AOM_QM_BITS) || - coeff <= (nzbins[rc != 0] << AOM_QM_BITS)) - idx_arr[idx++] = i; - } - - // Quantization pass: only process the coefficients selected in - // pre-scan pass. Note: idx can be zero. - for (i = 0; i < idx; i++) { - const int rc = scan[idx_arr[i]]; - const int coeff = coeff_ptr[rc]; - const int coeff_sign = (coeff >> 31); - const qm_val_t wt = qm_ptr[rc]; - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - const int64_t tmp1 = - abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1); - const int64_t tmpw = tmp1 * wt; - const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw; - const uint32_t abs_qcoeff = - (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> (15 + AOM_QM_BITS)); - qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); - dequant = - (dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> - AOM_QM_BITS; - dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 2; - if (abs_qcoeff) eob = idx_arr[i]; - } - } - *eob_ptr = eob + 1; -} - -#if CONFIG_TX64X64 -void aom_highbd_quantize_b_64x64_c( - const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, - const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr, - const qm_val_t *iqm_ptr) { - const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 2), - ROUND_POWER_OF_TWO(zbin_ptr[1], 2) }; - const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 }; - - int idx = 0; - int idx_arr[4096]; - int i, eob = -1; - int dequant; - (void)iscan; - - memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); - - if (!skip_block) { - // Pre-scan pass - for (i = 0; i < n_coeffs; i++) { - const int rc = scan[i]; - const qm_val_t wt = qm_ptr[rc]; - const int coeff = coeff_ptr[rc] * wt; - - // If the coefficient is out of the base ZBIN range, keep it for - // quantization. - if (coeff >= (zbins[rc != 0] << AOM_QM_BITS) || - coeff <= (nzbins[rc != 0] << AOM_QM_BITS)) - idx_arr[idx++] = i; - } - - // Quantization pass: only process the coefficients selected in - // pre-scan pass. Note: idx can be zero. - for (i = 0; i < idx; i++) { - const int rc = scan[idx_arr[i]]; - const int coeff = coeff_ptr[rc]; - const int coeff_sign = (coeff >> 31); - const qm_val_t wt = qm_ptr[rc]; - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - const int64_t tmp1 = - abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2); - const int64_t tmpw = tmp1 * wt; - const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw; - const uint32_t abs_qcoeff = - (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> (14 + AOM_QM_BITS)); - qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); - dequant = - (dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> - AOM_QM_BITS; - dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 4; - if (abs_qcoeff) eob = idx_arr[i]; - } - } - *eob_ptr = eob + 1; -} -#endif // CONFIG_TX64X64 - -#else // CONFIG_AOM_QM - void aom_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr) { - const int rc = 0; - const int coeff = coeff_ptr[rc]; - const int coeff_sign = (coeff >> 31); - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - int tmp, eob = -1; - - memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); - - if (!skip_block) { - tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); - tmp = (tmp * quant) >> 16; - qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; - dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr; - if (tmp) eob = 0; - } - *eob_ptr = eob + 1; + quantize_dc_helper(coeff_ptr, n_coeffs, skip_block, round_ptr, quant, + qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, NULL, NULL, + 0); } void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr) { - const int n_coeffs = 1024; - const int rc = 0; - const int coeff = coeff_ptr[rc]; - const int coeff_sign = (coeff >> 31); - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - int tmp, eob = -1; - - memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); - - if (!skip_block) { - tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1), - INT16_MIN, INT16_MAX); - tmp = (tmp * quant) >> 15; - qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; - dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2; - if (tmp) eob = 0; - } - *eob_ptr = eob + 1; + quantize_dc_helper(coeff_ptr, 1024, skip_block, round_ptr, quant, qcoeff_ptr, + dqcoeff_ptr, dequant_ptr, eob_ptr, NULL, NULL, 1); } #if CONFIG_TX64X64 @@ -578,100 +230,8 @@ const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr) { - const int n_coeffs = 4096; - const int rc = 0; - const int coeff = coeff_ptr[rc]; - const int coeff_sign = (coeff >> 31); - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - int tmp, eob = -1; - - memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); - - if (!skip_block) { - tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2), - INT16_MIN, INT16_MAX); - tmp = (tmp * quant) >> 14; - qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; - dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 4; - if (tmp) eob = 0; - } - *eob_ptr = eob + 1; -} -#endif // CONFIG_TX64X64 - -void aom_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, - int skip_block, const int16_t *round_ptr, - const int16_t quant, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, - uint16_t *eob_ptr) { - int eob = -1; - - memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); - - if (!skip_block) { - const int coeff = coeff_ptr[0]; - const int coeff_sign = (coeff >> 31); - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - const int64_t tmp = abs_coeff + round_ptr[0]; - const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 16); - qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); - dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr; - if (abs_qcoeff) eob = 0; - } - *eob_ptr = eob + 1; -} - -void aom_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, - const int16_t *round_ptr, const int16_t quant, - tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, - const int16_t dequant_ptr, - uint16_t *eob_ptr) { - const int n_coeffs = 1024; - int eob = -1; - - memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); - - if (!skip_block) { - const int coeff = coeff_ptr[0]; - const int coeff_sign = (coeff >> 31); - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 1); - const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 15); - qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); - dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / 2; - if (abs_qcoeff) eob = 0; - } - *eob_ptr = eob + 1; -} - -#if CONFIG_TX64X64 -void aom_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block, - const int16_t *round_ptr, const int16_t quant, - tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, - const int16_t dequant_ptr, - uint16_t *eob_ptr) { - const int n_coeffs = 4096; - int eob = -1; - - memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); - - if (!skip_block) { - const int coeff = coeff_ptr[0]; - const int coeff_sign = (coeff >> 31); - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 2); - const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 14); - qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); - dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / 4; - if (abs_qcoeff) eob = 0; - } - *eob_ptr = eob + 1; + quantize_dc_helper(coeff_ptr, 4096, skip_block, round_ptr, quant, qcoeff_ptr, + dqcoeff_ptr, dequant_ptr, eob_ptr, NULL, NULL, 2); } #endif // CONFIG_TX64X64 @@ -682,45 +242,10 @@ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { - int i, non_zero_count = (int)n_coeffs, eob = -1; - const int zbins[2] = { zbin_ptr[0], zbin_ptr[1] }; - const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 }; - (void)iscan; - - memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); - - if (!skip_block) { - // Pre-scan pass - for (i = (int)n_coeffs - 1; i >= 0; i--) { - const int rc = scan[i]; - const int coeff = coeff_ptr[rc]; - - if (coeff < zbins[rc != 0] && coeff > nzbins[rc != 0]) - non_zero_count--; - else - break; - } - - // Quantization pass: All coefficients with index >= zero_flag are - // skippable. Note: zero_flag can be zero. - for (i = 0; i < non_zero_count; i++) { - const int rc = scan[i]; - const int coeff = coeff_ptr[rc]; - const int coeff_sign = (coeff >> 31); - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - - if (abs_coeff >= zbins[rc != 0]) { - const int64_t tmp1 = abs_coeff + round_ptr[rc != 0]; - const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1; - const int abs_qcoeff = (int)((tmp2 * quant_shift_ptr[rc != 0]) >> 16); - qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); - dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0]; - if (abs_qcoeff) eob = i; - } - } - } - *eob_ptr = eob + 1; + highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, + round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr, + dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, + NULL, NULL, 0); } void aom_highbd_quantize_b_32x32_c( @@ -729,47 +254,10 @@ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { - const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 1), - ROUND_POWER_OF_TWO(zbin_ptr[1], 1) }; - const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 }; - - int idx = 0; - int idx_arr[1024]; - int i, eob = -1; - (void)iscan; - - memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); - - if (!skip_block) { - // Pre-scan pass - for (i = 0; i < n_coeffs; i++) { - const int rc = scan[i]; - const int coeff = coeff_ptr[rc]; - - // If the coefficient is out of the base ZBIN range, keep it for - // quantization. - if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0]) - idx_arr[idx++] = i; - } - - // Quantization pass: only process the coefficients selected in - // pre-scan pass. Note: idx can be zero. - for (i = 0; i < idx; i++) { - const int rc = scan[idx_arr[i]]; - const int coeff = coeff_ptr[rc]; - const int coeff_sign = (coeff >> 31); - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - const int64_t tmp1 = - abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1); - const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1; - const int abs_qcoeff = (int)((tmp2 * quant_shift_ptr[rc != 0]) >> 15); - qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); - dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2; - if (abs_qcoeff) eob = idx_arr[i]; - } - } - *eob_ptr = eob + 1; + highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, + round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr, + dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, + NULL, NULL, 1); } #if CONFIG_TX64X64 @@ -779,47 +267,9 @@ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { - const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 2), - ROUND_POWER_OF_TWO(zbin_ptr[1], 2) }; - const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 }; - - int idx = 0; - int idx_arr[4096]; - int i, eob = -1; - (void)iscan; - - memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); - - if (!skip_block) { - // Pre-scan pass - for (i = 0; i < n_coeffs; i++) { - const int rc = scan[i]; - const int coeff = coeff_ptr[rc]; - - // If the coefficient is out of the base ZBIN range, keep it for - // quantization. - if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0]) - idx_arr[idx++] = i; - } - - // Quantization pass: only process the coefficients selected in - // pre-scan pass. Note: idx can be zero. - for (i = 0; i < idx; i++) { - const int rc = scan[idx_arr[i]]; - const int coeff = coeff_ptr[rc]; - const int coeff_sign = (coeff >> 31); - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - const int64_t tmp1 = - abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2); - const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1; - const int abs_qcoeff = (int)((tmp2 * quant_shift_ptr[rc != 0]) >> 14); - qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); - dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 4; - if (abs_qcoeff) eob = idx_arr[i]; - } - } - *eob_ptr = eob + 1; + highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, + round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr, + dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, + NULL, NULL, 2); } #endif // CONFIG_TX64X64 -#endif // CONFIG_AOM_QM
diff --git a/aom_dsp/quantize.h b/aom_dsp/quantize.h index fe49b83..e4bbfb9 100644 --- a/aom_dsp/quantize.h +++ b/aom_dsp/quantize.h
@@ -19,32 +19,57 @@ extern "C" { #endif -#if CONFIG_AOM_QM -void aom_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, - const int16_t *round_ptr, const int16_t quant_ptr, - tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t dequant_ptr, uint16_t *eob_ptr, - const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr); -void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, - const int16_t *round_ptr, const int16_t quant_ptr, - tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t dequant_ptr, uint16_t *eob_ptr, - const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr); -#if CONFIG_TX64X64 -void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block, - const int16_t *round_ptr, const int16_t quant_ptr, - tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t dequant_ptr, uint16_t *eob_ptr, - const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr); -#endif // CONFIG_TX64X64 +void quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *zbin_ptr, + const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, + uint16_t *eob_ptr, const int16_t *scan, + const int16_t *iscan, const qm_val_t *qm_ptr, + const qm_val_t *iqm_ptr, const int log_scale); + void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, - const int16_t *iscan, const qm_val_t *qm_ptr, - const qm_val_t *iqm_ptr); + const int16_t *iscan); + +#if CONFIG_HIGHBITDEPTH +void aom_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *zbin_ptr, + const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan); + +void highbd_quantize_b_helper_c( + const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, + const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr, + const qm_val_t *iqm_ptr, const int log_scale); +#endif + +void aom_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, + const int16_t *round_ptr, const int16_t quant_ptr, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t dequant_ptr, uint16_t *eob_ptr); +void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, + const int16_t *round_ptr, const int16_t quant_ptr, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t dequant_ptr, uint16_t *eob_ptr); +#if CONFIG_TX64X64 +void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block, + const int16_t *round_ptr, const int16_t quant_ptr, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t dequant_ptr, uint16_t *eob_ptr); +#endif // CONFIG_TX64X64 + +#if CONFIG_AOM_QM #if CONFIG_HIGHBITDEPTH void aom_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, const int16_t *round_ptr, @@ -64,32 +89,10 @@ const int16_t dequant_ptr, uint16_t *eob_ptr, const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr); #endif // CONFIG_TX64X64 -void aom_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, - int skip_block, const int16_t *zbin_ptr, - const int16_t *round_ptr, const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, - tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan, - const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr); #endif // CONFIG_HIGHBITDEPTH #else // CONFIG_AOM_QM -void aom_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, - const int16_t *round_ptr, const int16_t quant_ptr, - tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t dequant_ptr, uint16_t *eob_ptr); -void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, - const int16_t *round_ptr, const int16_t quant_ptr, - tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t dequant_ptr, uint16_t *eob_ptr); -#if CONFIG_TX64X64 -void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block, - const int16_t *round_ptr, const int16_t quant_ptr, - tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t dequant_ptr, uint16_t *eob_ptr); -#endif // CONFIG_TX64X64 #if CONFIG_HIGHBITDEPTH void aom_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, const int16_t *round_ptr,
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl index 91af8dd..edc07f8 100755 --- a/av1/common/av1_rtcd_defs.pl +++ b/av1/common/av1_rtcd_defs.pl
@@ -278,41 +278,7 @@ # ENCODEMB INVOKE -if (aom_config("CONFIG_AOM_QM") eq "yes") { - if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") { - # the transform coefficients are held in 32-bit - # values, so the assembler code for av1_block_error can no longer be used. - add_proto qw/int64_t av1_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz"; - specialize qw/av1_block_error avx2/; - - add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr"; - - add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr"; - - if (aom_config("CONFIG_TX64X64") eq "yes") { - add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr"; - } - - add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr"; - } else { - add_proto qw/int64_t av1_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz"; - specialize qw/av1_block_error avx2 msa/, "$sse2_x86inc"; - - add_proto qw/int64_t av1_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size"; - specialize qw/av1_block_error_fp neon/, "$sse2_x86inc"; - - add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr"; - - add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr"; - - if (aom_config("CONFIG_TX64X64") eq "yes") { - add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr"; - } - - add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr"; - } -} else { - if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") { +if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") { # the transform coefficients are held in 32-bit # values, so the assembler code for av1_block_error can no longer be used. add_proto qw/int64_t av1_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz"; @@ -327,9 +293,7 @@ if (aom_config("CONFIG_TX64X64") eq "yes") { add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; } - - add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - } else { +} else { add_proto qw/int64_t av1_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz"; specialize qw/av1_block_error sse2 avx2 msa/; @@ -346,12 +310,26 @@ add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; } - add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/av1_fdct8x8_quant sse2 ssse3 neon/; - } - } +#Unused +#if (aom_config("CONFIG_AOM_QM") eq "yes") { +# if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") { +# add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr"; +# } else { +# add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr"; +# } +#} else { +# if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") { +# +# add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; +# } else { +# add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; +# specialize qw/av1_fdct8x8_quant sse2 ssse3 neon/; +# } +# +#} + # fdct functions add_proto qw/void av1_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param"; @@ -488,22 +466,8 @@ } -if (aom_config("CONFIG_AOM_QM") eq "yes") { - add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale"; - - add_proto qw/void av1_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale"; - - if (aom_config("CONFIG_TX64X64") eq "yes") { - add_proto qw/void av1_highbd_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale"; - } - - add_proto qw/void av1_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale"; -} else { - add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale"; - specialize qw/av1_highbd_quantize_fp sse4_1 avx2/; - - add_proto qw/void av1_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale"; -} + add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale"; + specialize qw/av1_highbd_quantize_fp sse4_1 avx2/; add_proto qw/void av1_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
diff --git a/av1/encoder/av1_quantize.c b/av1/encoder/av1_quantize.c index dd53d42..b44e06f 100644 --- a/av1/encoder/av1_quantize.c +++ b/av1/encoder/av1_quantize.c
@@ -443,11 +443,8 @@ const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan, -#if CONFIG_AOM_QM - const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr, -#endif - int log_scale) { + const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr, + const qm_val_t *iqm_ptr, int log_scale) { int i, eob = -1; // TODO(jingning) Decide the need of these arguments after the // quantization process is completed. @@ -464,35 +461,22 @@ for (i = 0; i < n_coeffs; i++) { const int rc = scan[i]; const int coeff = coeff_ptr[rc]; -#if CONFIG_AOM_QM - const qm_val_t wt = qm_ptr[rc]; - const qm_val_t iwt = iqm_ptr[rc]; + const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS); + const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS); const int dequant = (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; -#endif const int coeff_sign = (coeff >> 31); - int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign; int tmp32 = 0; -#if CONFIG_AOM_QM if (abs_coeff * wt >= (dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) { -#else - if (abs_coeff >= (dequant_ptr[rc != 0] >> (1 + log_scale))) { -#endif abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale); - abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX); -#if CONFIG_AOM_QM + abs_coeff = clamp64(abs_coeff, INT16_MIN, INT16_MAX); tmp32 = (int)((abs_coeff * wt * quant_ptr[rc != 0]) >> - ((16 - log_scale) + AOM_QM_BITS)); + (16 - log_scale + AOM_QM_BITS)); qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant / (1 << log_scale); -#else - tmp32 = (int)((abs_coeff * quant_ptr[rc != 0]) >> (16 - log_scale)); - qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; - dqcoeff_ptr[rc] = - qcoeff_ptr[rc] * dequant_ptr[rc != 0] / (1 << log_scale); -#endif } if (tmp32) eob = i; @@ -501,25 +485,60 @@ *eob_ptr = eob + 1; } +static void highbd_quantize_fp_helper_c( + const tran_low_t *coeff_ptr, intptr_t count, int skip_block, + const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr, + const qm_val_t *iqm_ptr, int log_scale) { + int i; + int eob = -1; + const int scale = 1 << log_scale; + const int shift = 16 - log_scale; + // TODO(jingning) Decide the need of these arguments after the + // quantization process is completed. + (void)zbin_ptr; + (void)quant_shift_ptr; + (void)iscan; + + memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr)); + + if (!skip_block) { + // Quantization pass: All coefficients with index >= zero_flag are + // skippable. Note: zero_flag can be zero. + for (i = 0; i < count; i++) { + const int rc = scan[i]; + const int coeff = coeff_ptr[rc]; + const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS); + const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS); + const int dequant = + (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> + AOM_QM_BITS; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + const int64_t tmp = abs_coeff + (round_ptr[rc != 0] >> log_scale); + const int abs_qcoeff = + (int)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS)); + qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); + dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant / scale; + if (abs_qcoeff) eob = i; + } + } + *eob_ptr = eob + 1; +} + void av1_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, - const int16_t *iscan -#if CONFIG_AOM_QM - , - const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr -#endif - ) { + const int16_t *iscan) { quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, - dequant_ptr, eob_ptr, scan, iscan, -#if CONFIG_AOM_QM - qm_ptr, iqm_ptr, -#endif - 0); + dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 0); } void av1_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, @@ -528,19 +547,10 @@ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan -#if CONFIG_AOM_QM - , - const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr -#endif - ) { + const int16_t *scan, const int16_t *iscan) { quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, - dequant_ptr, eob_ptr, scan, iscan, -#if CONFIG_AOM_QM - qm_ptr, iqm_ptr, -#endif - 1); + dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 1); } #if CONFIG_TX64X64 @@ -550,19 +560,10 @@ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan -#if CONFIG_AOM_QM - , - const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr -#endif - ) { + const int16_t *scan, const int16_t *iscan) { quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, - dequant_ptr, eob_ptr, scan, iscan, -#if CONFIG_AOM_QM - qm_ptr, iqm_ptr, -#endif - 2); + dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 2); } #endif // CONFIG_TX64X64 @@ -576,58 +577,47 @@ #if CONFIG_AOM_QM const qm_val_t *qm_ptr = qparam->qmatrix; const qm_val_t *iqm_ptr = qparam->iqmatrix; -#endif // CONFIG_AOM_QM - - switch (qparam->log_scale) { - case 0: - if (n_coeffs < 16) { - // TODO(jingning): Need SIMD implementation for smaller block size - // quantization. - quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin, - p->round_fp, p->quant_fp, p->quant_shift, - qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr, - sc->scan, sc->iscan, -#if CONFIG_AOM_QM - qm_ptr, iqm_ptr, + if (1 /*qm_ptr != NULL || iqm_ptr != NULL*/) { + quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp, + p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, + pd->dequant, eob_ptr, sc->scan, sc->iscan, qm_ptr, + iqm_ptr, qparam->log_scale); + } else { #endif - qparam->log_scale); - } else { - av1_quantize_fp(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp, - p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, - pd->dequant, eob_ptr, sc->scan, sc->iscan -#if CONFIG_AOM_QM - , - qm_ptr, iqm_ptr -#endif - ); - } - break; - case 1: - av1_quantize_fp_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin, - p->round_fp, p->quant_fp, p->quant_shift, - qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr, - sc->scan, sc->iscan -#if CONFIG_AOM_QM - , - qm_ptr, iqm_ptr -#endif - ); - break; + switch (qparam->log_scale) { + case 0: + if (n_coeffs < 16) { + // TODO(jingning): Need SIMD implementation for smaller block size + // quantization. + quantize_fp_helper_c( + coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp, + p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, pd->dequant, + eob_ptr, sc->scan, sc->iscan, NULL, NULL, qparam->log_scale); + } else { + av1_quantize_fp(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp, + p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, + pd->dequant, eob_ptr, sc->scan, sc->iscan); + } + break; + case 1: + av1_quantize_fp_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin, + p->round_fp, p->quant_fp, p->quant_shift, + qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr, + sc->scan, sc->iscan); + break; #if CONFIG_TX64X64 - case 2: - av1_quantize_fp_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin, - p->round_fp, p->quant_fp, p->quant_shift, - qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr, - sc->scan, sc->iscan -#if CONFIG_AOM_QM - , - qm_ptr, iqm_ptr -#endif - ); - break; + case 2: + av1_quantize_fp_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin, + p->round_fp, p->quant_fp, p->quant_shift, + qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr, + sc->scan, sc->iscan); + break; #endif // CONFIG_TX64X64 - default: assert(0); + default: assert(0); + } +#if CONFIG_AOM_QM } +#endif } void av1_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, @@ -640,43 +630,68 @@ #if CONFIG_AOM_QM const qm_val_t *qm_ptr = qparam->qmatrix; const qm_val_t *iqm_ptr = qparam->iqmatrix; + if (1 /*qm_ptr != NULL && iqm_ptr != NULL*/) { + quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round, + p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, + pd->dequant, eob_ptr, sc->scan, sc->iscan, qm_ptr, + iqm_ptr, qparam->log_scale); + } else { #endif // CONFIG_AOM_QM - switch (qparam->log_scale) { - case 0: - aom_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round, - p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, - pd->dequant, eob_ptr, sc->scan, sc->iscan -#if CONFIG_AOM_QM - , - qm_ptr, iqm_ptr -#endif - ); - break; - case 1: - aom_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round, - p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, - pd->dequant, eob_ptr, sc->scan, sc->iscan -#if CONFIG_AOM_QM - , - qm_ptr, iqm_ptr -#endif - ); - break; + switch (qparam->log_scale) { + case 0: + aom_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round, + p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, + pd->dequant, eob_ptr, sc->scan, sc->iscan); + break; + case 1: + aom_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round, + p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, + pd->dequant, eob_ptr, sc->scan, sc->iscan); + break; #if CONFIG_TX64X64 - case 2: - aom_quantize_b_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round, - p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, - pd->dequant, eob_ptr, sc->scan, sc->iscan -#if CONFIG_AOM_QM - , - qm_ptr, iqm_ptr -#endif - ); - break; + case 2: + aom_quantize_b_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round, + p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, + pd->dequant, eob_ptr, sc->scan, sc->iscan); + break; #endif // CONFIG_TX64X64 - default: assert(0); + default: assert(0); + } +#if CONFIG_AOM_QM } +#endif +} + +static void quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, + int skip_block, const int16_t *round_ptr, + const int16_t quant, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, + uint16_t *eob_ptr, const qm_val_t *qm_ptr, + const qm_val_t *iqm_ptr, const int log_scale) { + const int rc = 0; + const int coeff = coeff_ptr[rc]; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + int64_t tmp, eob = -1; + int32_t tmp32; + int dequant; + + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + + if (!skip_block) { + const int wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS); + const int iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS); + tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale), + INT16_MIN, INT16_MAX); + tmp32 = (int32_t)((tmp * wt * quant) >> (16 - log_scale + AOM_QM_BITS)); + qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; + dequant = (dequant_ptr * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; + dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / (1 << log_scale); + if (tmp32) eob = 0; + } + *eob_ptr = eob + 1; } void av1_quantize_dc_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, @@ -686,45 +701,18 @@ const SCAN_ORDER *sc, const QUANT_PARAM *qparam) { // obsolete skip_block const int skip_block = 0; + (void)sc; + assert(qparam->log_scale >= 0 && qparam->log_scale < (2 + CONFIG_TX64X64)); #if CONFIG_AOM_QM const qm_val_t *qm_ptr = qparam->qmatrix; const qm_val_t *iqm_ptr = qparam->iqmatrix; -#endif // CONFIG_AOM_QM - - (void)sc; - - switch (qparam->log_scale) { - case 0: - aom_quantize_dc(coeff_ptr, (int)n_coeffs, skip_block, p->round, - p->quant_fp[0], qcoeff_ptr, dqcoeff_ptr, pd->dequant[0], - eob_ptr -#if CONFIG_AOM_QM - , - qm_ptr, iqm_ptr +#else + const qm_val_t *qm_ptr = NULL; + const qm_val_t *iqm_ptr = NULL; #endif - ); - break; - case 1: - aom_quantize_dc_32x32(coeff_ptr, skip_block, p->round, p->quant_fp[0], - qcoeff_ptr, dqcoeff_ptr, pd->dequant[0], eob_ptr -#if CONFIG_AOM_QM - , - qm_ptr, iqm_ptr -#endif - ); - break; -#if CONFIG_TX64X64 - aom_quantize_dc_64x64(coeff_ptr, skip_block, p->round, p->quant_fp[0], - qcoeff_ptr, dqcoeff_ptr, pd->dequant[0], eob_ptr -#if CONFIG_AOM_QM - , - qm_ptr, iqm_ptr -#endif - ); - case 2: break; -#endif // CONFIG_TX64X64 - default: assert(0); - } + quantize_dc(coeff_ptr, (int)n_coeffs, skip_block, p->round, p->quant_fp[0], + qcoeff_ptr, dqcoeff_ptr, pd->dequant[0], eob_ptr, qm_ptr, iqm_ptr, + qparam->log_scale); } #if CONFIG_NEW_QUANT @@ -857,29 +845,31 @@ #if CONFIG_AOM_QM const qm_val_t *qm_ptr = qparam->qmatrix; const qm_val_t *iqm_ptr = qparam->iqmatrix; + if (1 /*qm_ptr != NULL && iqm_ptr != NULL*/) { + highbd_quantize_fp_helper_c( + coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp, p->quant_fp, + p->quant_shift, qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan, + sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale); + } else { #endif // CONFIG_AOM_QM - if (n_coeffs < 16) { - // TODO(jingning): Need SIMD implementation for smaller block size - // quantization. - av1_highbd_quantize_fp_c(coeff_ptr, n_coeffs, skip_block, p->zbin, - p->round_fp, p->quant_fp, p->quant_shift, - qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr, - sc->scan, sc->iscan, -#if CONFIG_AOM_QM - qm_ptr, iqm_ptr, -#endif - qparam->log_scale); - return; - } + if (n_coeffs < 16) { + // TODO(jingning): Need SIMD implementation for smaller block size + // quantization. + av1_highbd_quantize_fp_c(coeff_ptr, n_coeffs, skip_block, p->zbin, + p->round_fp, p->quant_fp, p->quant_shift, + qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr, + sc->scan, sc->iscan, qparam->log_scale); + return; + } - av1_highbd_quantize_fp(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp, - p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, - pd->dequant, eob_ptr, sc->scan, sc->iscan, + av1_highbd_quantize_fp(coeff_ptr, n_coeffs, skip_block, p->zbin, + p->round_fp, p->quant_fp, p->quant_shift, qcoeff_ptr, + dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan, + sc->iscan, qparam->log_scale); #if CONFIG_AOM_QM - qm_ptr, iqm_ptr, + } #endif - qparam->log_scale); } void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr, @@ -894,86 +884,76 @@ #if CONFIG_AOM_QM const qm_val_t *qm_ptr = qparam->qmatrix; const qm_val_t *iqm_ptr = qparam->iqmatrix; + if (1 /*qm_ptr != NULL && iqm_ptr != NULL*/) { + highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin, + p->round, p->quant, p->quant_shift, qcoeff_ptr, + dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan, + sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale); + } else { #endif // CONFIG_AOM_QM - switch (qparam->log_scale) { - case 0: - if (LIKELY(n_coeffs >= 8)) { - aom_highbd_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin, - p->round, p->quant, p->quant_shift, qcoeff_ptr, - dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan, - sc->iscan -#if CONFIG_AOM_QM - , - qm_ptr, iqm_ptr -#endif - ); - } else { - // TODO(luoyi): Need SIMD (e.g. sse2) for smaller block size - // quantization - aom_highbd_quantize_b_c(coeff_ptr, n_coeffs, skip_block, p->zbin, + switch (qparam->log_scale) { + case 0: + if (LIKELY(n_coeffs >= 8)) { + aom_highbd_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan, - sc->iscan -#if CONFIG_AOM_QM - , - qm_ptr, iqm_ptr -#endif - ); - } - break; - case 1: - aom_highbd_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin, + sc->iscan); + } else { + // TODO(luoyi): Need SIMD (e.g. sse2) for smaller block size + // quantization + aom_highbd_quantize_b_c(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr, - sc->scan, sc->iscan -#if CONFIG_AOM_QM - , - qm_ptr, iqm_ptr -#endif - ); - break; + sc->scan, sc->iscan); + } + break; + case 1: + aom_highbd_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin, + p->round, p->quant, p->quant_shift, + qcoeff_ptr, dqcoeff_ptr, pd->dequant, + eob_ptr, sc->scan, sc->iscan); + break; #if CONFIG_TX64X64 - case 2: - aom_highbd_quantize_b_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin, - p->round, p->quant, p->quant_shift, - qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr, - sc->scan, sc->iscan -#if CONFIG_AOM_QM - , - qm_ptr, iqm_ptr -#endif - ); - break; + case 2: + aom_highbd_quantize_b_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin, + p->round, p->quant, p->quant_shift, + qcoeff_ptr, dqcoeff_ptr, pd->dequant, + eob_ptr, sc->scan, sc->iscan); + break; #endif // CONFIG_TX64X64 - default: assert(0); + default: assert(0); + } +#if CONFIG_AOM_QM } +#endif } static INLINE void highbd_quantize_dc( const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr, -#if CONFIG_AOM_QM - const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr, -#endif - const int log_scale) { + const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr, const int log_scale) { int eob = -1; memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); -#if CONFIG_AOM_QM - (void)qm_ptr; - (void)iqm_ptr; -#endif + if (!skip_block) { + const qm_val_t wt = qm_ptr != NULL ? qm_ptr[0] : (1 << AOM_QM_BITS); + const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[0] : (1 << AOM_QM_BITS); const int coeff = coeff_ptr[0]; const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - const int64_t tmp = abs_coeff + round_ptr[0]; - const int abs_qcoeff = (int)((tmp * quant) >> (16 - log_scale)); + const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], log_scale); + const int64_t tmpw = tmp * wt; + const int abs_qcoeff = + (int)((tmpw * quant) >> (16 - log_scale + AOM_QM_BITS)); qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); - dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / (1 << log_scale); + const int dequant = + (dequant_ptr * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; + + dqcoeff_ptr[0] = (qcoeff_ptr[0] * dequant) / (1 << log_scale); if (abs_qcoeff) eob = 0; } *eob_ptr = eob + 1; @@ -991,17 +971,16 @@ #if CONFIG_AOM_QM const qm_val_t *qm_ptr = qparam->qmatrix; const qm_val_t *iqm_ptr = qparam->iqmatrix; +#else + const qm_val_t *qm_ptr = NULL; + const qm_val_t *iqm_ptr = NULL; #endif // CONFIG_AOM_QM (void)sc; highbd_quantize_dc(coeff_ptr, (int)n_coeffs, skip_block, p->round, p->quant_fp[0], qcoeff_ptr, dqcoeff_ptr, pd->dequant[0], - eob_ptr, -#if CONFIG_AOM_QM - qm_ptr, iqm_ptr, -#endif - qparam->log_scale); + eob_ptr, qm_ptr, iqm_ptr, qparam->log_scale); } #if CONFIG_NEW_QUANT @@ -1517,61 +1496,16 @@ } #endif // CONFIG_NEW_QUANT -void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count, - int skip_block, const int16_t *zbin_ptr, - const int16_t *round_ptr, - const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, - tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan, -#if CONFIG_AOM_QM - const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr, -#endif - int log_scale) { - int i; - int eob = -1; - const int scale = 1 << log_scale; - const int shift = 16 - log_scale; - // TODO(jingning) Decide the need of these arguments after the - // quantization process is completed. - (void)zbin_ptr; - (void)quant_shift_ptr; - (void)iscan; - - memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr)); - memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr)); - - if (!skip_block) { - // Quantization pass: All coefficients with index >= zero_flag are - // skippable. Note: zero_flag can be zero. - for (i = 0; i < count; i++) { - const int rc = scan[i]; - const int coeff = coeff_ptr[rc]; -#if CONFIG_AOM_QM - const qm_val_t wt = qm_ptr[rc]; - const qm_val_t iwt = iqm_ptr[rc]; - const int dequant = - (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> - AOM_QM_BITS; -#endif - const int coeff_sign = (coeff >> 31); - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - const int64_t tmp = abs_coeff + (round_ptr[rc != 0] >> log_scale); -#if CONFIG_AOM_QM - const int abs_qcoeff = - (int)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS)); - qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); - dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant / scale; -#else - const int abs_qcoeff = (int)((tmp * quant_ptr[rc != 0]) >> shift); - qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); - dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / scale; -#endif - if (abs_qcoeff) eob = i; - } - } - *eob_ptr = eob + 1; +void av1_highbd_quantize_fp_c( + const tran_low_t *coeff_ptr, intptr_t count, int skip_block, + const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan, int log_scale) { + highbd_quantize_fp_helper_c(coeff_ptr, count, skip_block, zbin_ptr, round_ptr, + quant_ptr, quant_shift_ptr, qcoeff_ptr, + dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, + NULL, NULL, log_scale); } static void invert_quant(int16_t *quant, int16_t *shift, int d) {
diff --git a/av1/encoder/dct.c b/av1/encoder/dct.c index 2ca4f34..858ea24 100644 --- a/av1/encoder/dct.c +++ b/av1/encoder/dct.c
@@ -2087,12 +2087,8 @@ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, - const int16_t *iscan -#if CONFIG_AOM_QM - , - const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr -#endif - ) { + const int16_t *iscan, const qm_val_t *qm_ptr, + const qm_val_t *iqm_ptr) { int eob = -1; int i, j; @@ -2177,27 +2173,19 @@ for (i = 0; i < n_coeffs; i++) { const int rc = scan[i]; const int coeff = coeff_ptr[rc]; -#if CONFIG_AOM_QM - const qm_val_t wt = qm_ptr[rc]; - const qm_val_t iwt = iqm_ptr[rc]; + const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS); + const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS); const int dequant = (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; -#endif const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; int64_t tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); int tmp32; -#if CONFIG_AOM_QM tmp32 = (int)((tmp * quant_ptr[rc != 0] * wt) >> (16 + AOM_QM_BITS)); qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant; -#else - tmp32 = (int)((tmp * quant_ptr[rc != 0]) >> 16); - qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; - dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0]; -#endif if (tmp32) eob = i; }