Refactor quantization C code.
This commit de-duplicates C reference quantization code
and unifies quantization matrix (QM) and non-QM code
paths when there is no SIMD.
The reorganisation also will facilitate re-using SIMD quant
functions for QM when the matrix is flat, as is the
default when AOM_QM is enabled.
Change-Id: Idbfdac9eb9a31adcffe734aac1877d58b86fab77
diff --git a/aom_dsp/aom_dsp_common.h b/aom_dsp/aom_dsp_common.h
index 5b10432..a3cea56 100644
--- a/aom_dsp/aom_dsp_common.h
+++ b/aom_dsp/aom_dsp_common.h
@@ -52,10 +52,9 @@
#define UNLIKELY(v) (v)
#endif
-#if CONFIG_AOM_QM
typedef uint16_t qm_val_t;
#define AOM_QM_BITS 5
-#endif
+
#if CONFIG_HIGHBITDEPTH
// Note:
// tran_low_t is the datatype used for final transform coefficients.
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index 726b4f2..f54b5150 100755
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -517,23 +517,7 @@
#
# Quantization
#
-if (aom_config("CONFIG_AOM_QM") eq "yes") {
- if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
- add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
-
- add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
-
- add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
-
- add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
-
- add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
-
- add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
-
- } # CONFIG_AV1_ENCODER
-} else {
- if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
+if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/aom_quantize_b sse2/, "$ssse3_x86_64", "$avx_x86_64";
@@ -541,17 +525,18 @@
specialize qw/aom_quantize_b_32x32/, "$ssse3_x86_64", "$avx_x86_64";
add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+} # CONFIG_AV1_ENCODER
- add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/aom_highbd_quantize_b sse2 avx2/;
+if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
+ add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/aom_highbd_quantize_b sse2 avx2/;
- add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/aom_highbd_quantize_b_32x32 sse2/;
+ add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/aom_highbd_quantize_b_32x32 sse2/;
- add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- } # CONFIG_AV1_ENCODER
-} # CONFIG_AOM_QM
+} # CONFIG_AV1_ENCODER
if (aom_config("CONFIG_AV1") eq "yes") {
#
# Alpha blending with mask
diff --git a/aom_dsp/quantize.c b/aom_dsp/quantize.c
index fe98b60..3e8f0d4 100644
--- a/aom_dsp/quantize.c
+++ b/aom_dsp/quantize.c
@@ -12,18 +12,14 @@
#include "aom_dsp/quantize.h"
#include "aom_mem/aom_mem.h"
-static void quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- int skip_block, const int16_t *zbin_ptr,
- const int16_t *round_ptr,
- const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan,
-#if CONFIG_AOM_QM
- const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr,
-#endif
- const int log_scale) {
+void quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *zbin_ptr,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
+ uint16_t *eob_ptr, const int16_t *scan,
+ const int16_t *iscan, const qm_val_t *qm_ptr,
+ const qm_val_t *iqm_ptr, const int log_scale) {
const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale),
ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale) };
const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
@@ -37,20 +33,12 @@
// Pre-scan pass
for (i = (int)n_coeffs - 1; i >= 0; i--) {
const int rc = scan[i];
-#if CONFIG_AOM_QM
- const qm_val_t wt = qm_ptr[rc];
+ const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
const int coeff = coeff_ptr[rc] * wt;
-#else
- const int coeff = coeff_ptr[rc];
-#endif // CONFIG_AOM_QM
-#if CONFIG_AOM_QM
if (coeff < (zbins[rc != 0] << AOM_QM_BITS) &&
coeff > (nzbins[rc != 0] << AOM_QM_BITS))
non_zero_count--;
-#else
- if (coeff < zbins[rc != 0] && coeff > nzbins[rc != 0]) non_zero_count--;
-#endif // CONFIG_AOM_QM
else
break;
}
@@ -64,35 +52,21 @@
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
int tmp32;
-#if CONFIG_AOM_QM
- const qm_val_t wt = qm_ptr[rc];
+ const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
if (abs_coeff * wt >= (zbins[rc != 0] << AOM_QM_BITS)) {
-#else
- if (abs_coeff >= zbins[rc != 0]) {
-#endif // CONFIG_AOM_QM
int64_t tmp =
clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale),
INT16_MIN, INT16_MAX);
-#if CONFIG_AOM_QM
tmp *= wt;
tmp32 = (int)(((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
quant_shift_ptr[rc != 0]) >>
(16 - log_scale + AOM_QM_BITS)); // quantization
-#else
- tmp32 = (int)(((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
- quant_shift_ptr[rc != 0]) >>
- (16 - log_scale)); // quantization
-#endif // CONFIG_AOM_QM
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
-#if CONFIG_AOM_QM
+ const int iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
const int dequant =
- (dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >>
+ (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant / (1 << log_scale);
-#else
- dqcoeff_ptr[rc] =
- qcoeff_ptr[rc] * dequant_ptr[rc != 0] / (1 << log_scale);
-#endif // CONFIG_AOM_QM
if (tmp32) eob = i;
}
@@ -101,25 +75,111 @@
*eob_ptr = eob + 1;
}
+void highbd_quantize_b_helper_c(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
+ const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
+ const qm_val_t *iqm_ptr, const int log_scale) {
+ int i, eob = -1;
+ const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale),
+ ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale) };
+ const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
+ int dequant;
+#if CONFIG_TX64X64
+ int idx_arr[4096];
+#else
+ int idx_arr[1024];
+#endif
+ (void)iscan;
+ int idx = 0;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ // Pre-scan pass
+ for (i = 0; i < n_coeffs; i++) {
+ const int rc = scan[i];
+ const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+ const int coeff = coeff_ptr[rc] * wt;
+
+ // If the coefficient is out of the base ZBIN range, keep it for
+ // quantization.
+ if (coeff >= (zbins[rc != 0] << AOM_QM_BITS) ||
+ coeff <= (nzbins[rc != 0] << AOM_QM_BITS))
+ idx_arr[idx++] = i;
+ }
+
+ // Quantization pass: only process the coefficients selected in
+ // pre-scan pass. Note: idx can be zero.
+ for (i = 0; i < idx; i++) {
+ const int rc = scan[idx_arr[i]];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+ const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ const int64_t tmp1 =
+ abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
+ const int64_t tmpw = tmp1 * wt;
+ const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
+ const uint32_t abs_qcoeff = (uint32_t)(
+ (tmp2 * quant_shift_ptr[rc != 0]) >> (16 - log_scale + AOM_QM_BITS));
+ qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+ dequant = (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+ AOM_QM_BITS;
+ dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / (1 << log_scale);
+ if (abs_qcoeff) eob = idx_arr[i];
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+
+void quantize_dc_helper(const tran_low_t *coeff_ptr, int n_coeffs,
+ int skip_block, const int16_t *round_ptr,
+ const int16_t quant, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr,
+ uint16_t *eob_ptr, const qm_val_t *qm_ptr,
+ const qm_val_t *iqm_ptr, const int log_scale) {
+ const int rc = 0;
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ int64_t tmp, eob = -1;
+ int32_t tmp32;
+ int dequant;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ const int wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+ const int iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+ tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale),
+ INT16_MIN, INT16_MAX);
+ tmp32 = (int32_t)((tmp * wt * quant) >> (16 - log_scale + AOM_QM_BITS));
+ qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
+ dequant = (dequant_ptr * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
+ dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / (1 << log_scale);
+ if (tmp32) eob = 0;
+ }
+ *eob_ptr = eob + 1;
+}
+
+/* These functions should only be called when quantisation matrices
+ are not used. */
void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan,
- const int16_t *iscan
-#if CONFIG_AOM_QM
- ,
- const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
-#endif
- ) {
+ const int16_t *iscan) {
quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
- dequant_ptr, eob_ptr, scan, iscan,
-#if CONFIG_AOM_QM
- qm_ptr, iqm_ptr,
-#endif
- 0);
+ dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 0);
}
void aom_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
@@ -128,19 +188,10 @@
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan
-#if CONFIG_AOM_QM
- ,
- const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
-#endif
- ) {
+ const int16_t *scan, const int16_t *iscan) {
quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
- dequant_ptr, eob_ptr, scan, iscan,
-#if CONFIG_AOM_QM
- qm_ptr, iqm_ptr,
-#endif
- 1);
+ dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 1);
}
#if CONFIG_TX64X64
@@ -150,427 +201,28 @@
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan
-#if CONFIG_AOM_QM
- ,
- const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
-#endif
- ) {
+ const int16_t *scan, const int16_t *iscan) {
quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
- dequant_ptr, eob_ptr, scan, iscan,
-#if CONFIG_AOM_QM
- qm_ptr, iqm_ptr,
-#endif
- 2);
+ dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 2);
}
#endif // CONFIG_TX64X64
-#if CONFIG_AOM_QM
-void aom_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
- const int16_t *round_ptr, const int16_t quant,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr, uint16_t *eob_ptr,
- const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
- const int rc = 0;
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- int64_t tmp, eob = -1;
- int32_t tmp32;
- int dequant =
- (dequant_ptr * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
- tmp32 = (int32_t)((tmp * qm_ptr[rc] * quant) >> (16 + AOM_QM_BITS));
- qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant;
- if (tmp32) eob = 0;
- }
- *eob_ptr = eob + 1;
-}
-
-void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
- const int16_t *round_ptr, const int16_t quant,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr, uint16_t *eob_ptr,
- const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
- const int n_coeffs = 1024;
- const int rc = 0;
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- int64_t tmp, eob = -1;
- int32_t tmp32;
- int dequant;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1),
- INT16_MIN, INT16_MAX);
- tmp32 = (int32_t)((tmp * qm_ptr[rc] * quant) >> (15 + AOM_QM_BITS));
- qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
- dequant =
- (dequant_ptr * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
- dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 2;
- if (tmp32) eob = 0;
- }
- *eob_ptr = eob + 1;
-}
-
-#if CONFIG_TX64X64
-void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
- const int16_t *round_ptr, const int16_t quant,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr, uint16_t *eob_ptr,
- const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
- const int n_coeffs = 1024;
- const int rc = 0;
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- int64_t tmp, eob = -1;
- int32_t tmp32;
- int dequant;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2),
- INT16_MIN, INT16_MAX);
- tmp32 = (int32_t)((tmp * qm_ptr[rc] * quant) >> (14 + AOM_QM_BITS));
- qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
- dequant =
- (dequant_ptr * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
- dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 4;
- if (tmp32) eob = 0;
- }
- *eob_ptr = eob + 1;
-}
-#endif // CONFIG_TX64X64
-
-void aom_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
- int skip_block, const int16_t *round_ptr,
- const int16_t quant, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr,
- uint16_t *eob_ptr, const qm_val_t *qm_ptr,
- const qm_val_t *iqm_ptr) {
- int eob = -1;
- int dequant =
- (dequant_ptr * iqm_ptr[0] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- const int coeff = coeff_ptr[0];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp = abs_coeff + round_ptr[0];
- const uint32_t abs_qcoeff =
- (uint32_t)((tmp * qm_ptr[0] * quant) >> (16 + AOM_QM_BITS));
- qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant;
- if (abs_qcoeff) eob = 0;
- }
- *eob_ptr = eob + 1;
-}
-
-void aom_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
- const int16_t *round_ptr, const int16_t quant,
- tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr, uint16_t *eob_ptr,
- const qm_val_t *qm_ptr,
- const qm_val_t *iqm_ptr) {
- const int n_coeffs = 1024;
- int eob = -1;
- int dequant;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- const int coeff = coeff_ptr[0];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 1);
- const uint32_t abs_qcoeff =
- (uint32_t)((tmp * qm_ptr[0] * quant) >> (15 + AOM_QM_BITS));
- qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- dequant =
- (dequant_ptr * iqm_ptr[0] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
- dqcoeff_ptr[0] = (qcoeff_ptr[0] * dequant) / 2;
- if (abs_qcoeff) eob = 0;
- }
- *eob_ptr = eob + 1;
-}
-
-#if CONFIG_TX64X64
-void aom_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
- const int16_t *round_ptr, const int16_t quant,
- tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr, uint16_t *eob_ptr,
- const qm_val_t *qm_ptr,
- const qm_val_t *iqm_ptr) {
- const int n_coeffs = 1024;
- int eob = -1;
- int dequant;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- const int coeff = coeff_ptr[0];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 2);
- const uint32_t abs_qcoeff =
- (uint32_t)((tmp * qm_ptr[0] * quant) >> (14 + AOM_QM_BITS));
- qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- dequant =
- (dequant_ptr * iqm_ptr[0] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
- dqcoeff_ptr[0] = (qcoeff_ptr[0] * dequant) / 4;
- if (abs_qcoeff) eob = 0;
- }
- *eob_ptr = eob + 1;
-}
-#endif // CONFIG_TX64X64
-
-void aom_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- int skip_block, const int16_t *zbin_ptr,
- const int16_t *round_ptr, const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan,
- const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
- int i, non_zero_count = (int)n_coeffs, eob = -1;
- const int zbins[2] = { zbin_ptr[0], zbin_ptr[1] };
- const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
- int dequant;
- (void)iscan;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- // Pre-scan pass
- for (i = (int)n_coeffs - 1; i >= 0; i--) {
- const int rc = scan[i];
- const qm_val_t wt = qm_ptr[rc];
- const int coeff = coeff_ptr[rc] * wt;
-
- if (coeff < (zbins[rc != 0] << AOM_QM_BITS) &&
- coeff > (nzbins[rc != 0] << AOM_QM_BITS))
- non_zero_count--;
- else
- break;
- }
-
- // Quantization pass: All coefficients with index >= zero_flag are
- // skippable. Note: zero_flag can be zero.
- for (i = 0; i < non_zero_count; i++) {
- const int rc = scan[i];
- const int coeff = coeff_ptr[rc];
- const qm_val_t wt = qm_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-
- if (abs_coeff * wt >= (zbins[rc != 0] << AOM_QM_BITS)) {
- const int64_t tmp1 = abs_coeff + round_ptr[rc != 0];
- const int64_t tmpw = tmp1 * wt;
- const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
- const uint32_t abs_qcoeff =
- (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> (16 + AOM_QM_BITS));
- qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- dequant =
- (dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >>
- AOM_QM_BITS;
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant;
- if (abs_qcoeff) eob = i;
- }
- }
- }
- *eob_ptr = eob + 1;
-}
-
-void aom_highbd_quantize_b_32x32_c(
- const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
- const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
- const qm_val_t *iqm_ptr) {
- const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 1),
- ROUND_POWER_OF_TWO(zbin_ptr[1], 1) };
- const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
-
- int idx = 0;
- int idx_arr[1024];
- int i, eob = -1;
- int dequant;
- (void)iscan;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- // Pre-scan pass
- for (i = 0; i < n_coeffs; i++) {
- const int rc = scan[i];
- const qm_val_t wt = qm_ptr[rc];
- const int coeff = coeff_ptr[rc] * wt;
-
- // If the coefficient is out of the base ZBIN range, keep it for
- // quantization.
- if (coeff >= (zbins[rc != 0] << AOM_QM_BITS) ||
- coeff <= (nzbins[rc != 0] << AOM_QM_BITS))
- idx_arr[idx++] = i;
- }
-
- // Quantization pass: only process the coefficients selected in
- // pre-scan pass. Note: idx can be zero.
- for (i = 0; i < idx; i++) {
- const int rc = scan[idx_arr[i]];
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const qm_val_t wt = qm_ptr[rc];
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp1 =
- abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
- const int64_t tmpw = tmp1 * wt;
- const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
- const uint32_t abs_qcoeff =
- (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> (15 + AOM_QM_BITS));
- qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- dequant =
- (dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >>
- AOM_QM_BITS;
- dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 2;
- if (abs_qcoeff) eob = idx_arr[i];
- }
- }
- *eob_ptr = eob + 1;
-}
-
-#if CONFIG_TX64X64
-void aom_highbd_quantize_b_64x64_c(
- const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
- const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
- const qm_val_t *iqm_ptr) {
- const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 2),
- ROUND_POWER_OF_TWO(zbin_ptr[1], 2) };
- const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
-
- int idx = 0;
- int idx_arr[4096];
- int i, eob = -1;
- int dequant;
- (void)iscan;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- // Pre-scan pass
- for (i = 0; i < n_coeffs; i++) {
- const int rc = scan[i];
- const qm_val_t wt = qm_ptr[rc];
- const int coeff = coeff_ptr[rc] * wt;
-
- // If the coefficient is out of the base ZBIN range, keep it for
- // quantization.
- if (coeff >= (zbins[rc != 0] << AOM_QM_BITS) ||
- coeff <= (nzbins[rc != 0] << AOM_QM_BITS))
- idx_arr[idx++] = i;
- }
-
- // Quantization pass: only process the coefficients selected in
- // pre-scan pass. Note: idx can be zero.
- for (i = 0; i < idx; i++) {
- const int rc = scan[idx_arr[i]];
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const qm_val_t wt = qm_ptr[rc];
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp1 =
- abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2);
- const int64_t tmpw = tmp1 * wt;
- const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
- const uint32_t abs_qcoeff =
- (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> (14 + AOM_QM_BITS));
- qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- dequant =
- (dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >>
- AOM_QM_BITS;
- dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 4;
- if (abs_qcoeff) eob = idx_arr[i];
- }
- }
- *eob_ptr = eob + 1;
-}
-#endif // CONFIG_TX64X64
-
-#else // CONFIG_AOM_QM
-
void aom_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
const int16_t *round_ptr, const int16_t quant,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr) {
- const int rc = 0;
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- int tmp, eob = -1;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
- tmp = (tmp * quant) >> 16;
- qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr;
- if (tmp) eob = 0;
- }
- *eob_ptr = eob + 1;
+ quantize_dc_helper(coeff_ptr, n_coeffs, skip_block, round_ptr, quant,
+ qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, NULL, NULL,
+ 0);
}
void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
const int16_t *round_ptr, const int16_t quant,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr) {
- const int n_coeffs = 1024;
- const int rc = 0;
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- int tmp, eob = -1;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1),
- INT16_MIN, INT16_MAX);
- tmp = (tmp * quant) >> 15;
- qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2;
- if (tmp) eob = 0;
- }
- *eob_ptr = eob + 1;
+ quantize_dc_helper(coeff_ptr, 1024, skip_block, round_ptr, quant, qcoeff_ptr,
+ dqcoeff_ptr, dequant_ptr, eob_ptr, NULL, NULL, 1);
}
#if CONFIG_TX64X64
@@ -578,100 +230,8 @@
const int16_t *round_ptr, const int16_t quant,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr) {
- const int n_coeffs = 4096;
- const int rc = 0;
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- int tmp, eob = -1;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2),
- INT16_MIN, INT16_MAX);
- tmp = (tmp * quant) >> 14;
- qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 4;
- if (tmp) eob = 0;
- }
- *eob_ptr = eob + 1;
-}
-#endif // CONFIG_TX64X64
-
-void aom_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
- int skip_block, const int16_t *round_ptr,
- const int16_t quant, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr,
- uint16_t *eob_ptr) {
- int eob = -1;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- const int coeff = coeff_ptr[0];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp = abs_coeff + round_ptr[0];
- const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 16);
- qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr;
- if (abs_qcoeff) eob = 0;
- }
- *eob_ptr = eob + 1;
-}
-
-void aom_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
- const int16_t *round_ptr, const int16_t quant,
- tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr,
- uint16_t *eob_ptr) {
- const int n_coeffs = 1024;
- int eob = -1;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- const int coeff = coeff_ptr[0];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 1);
- const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 15);
- qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / 2;
- if (abs_qcoeff) eob = 0;
- }
- *eob_ptr = eob + 1;
-}
-
-#if CONFIG_TX64X64
-void aom_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
- const int16_t *round_ptr, const int16_t quant,
- tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr,
- uint16_t *eob_ptr) {
- const int n_coeffs = 4096;
- int eob = -1;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- const int coeff = coeff_ptr[0];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 2);
- const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 14);
- qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / 4;
- if (abs_qcoeff) eob = 0;
- }
- *eob_ptr = eob + 1;
+ quantize_dc_helper(coeff_ptr, 4096, skip_block, round_ptr, quant, qcoeff_ptr,
+ dqcoeff_ptr, dequant_ptr, eob_ptr, NULL, NULL, 2);
}
#endif // CONFIG_TX64X64
@@ -682,45 +242,10 @@
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
- int i, non_zero_count = (int)n_coeffs, eob = -1;
- const int zbins[2] = { zbin_ptr[0], zbin_ptr[1] };
- const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
- (void)iscan;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- // Pre-scan pass
- for (i = (int)n_coeffs - 1; i >= 0; i--) {
- const int rc = scan[i];
- const int coeff = coeff_ptr[rc];
-
- if (coeff < zbins[rc != 0] && coeff > nzbins[rc != 0])
- non_zero_count--;
- else
- break;
- }
-
- // Quantization pass: All coefficients with index >= zero_flag are
- // skippable. Note: zero_flag can be zero.
- for (i = 0; i < non_zero_count; i++) {
- const int rc = scan[i];
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-
- if (abs_coeff >= zbins[rc != 0]) {
- const int64_t tmp1 = abs_coeff + round_ptr[rc != 0];
- const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
- const int abs_qcoeff = (int)((tmp2 * quant_shift_ptr[rc != 0]) >> 16);
- qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
- if (abs_qcoeff) eob = i;
- }
- }
- }
- *eob_ptr = eob + 1;
+ highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr,
+ round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr,
+ dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
+ NULL, NULL, 0);
}
void aom_highbd_quantize_b_32x32_c(
@@ -729,47 +254,10 @@
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
- const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 1),
- ROUND_POWER_OF_TWO(zbin_ptr[1], 1) };
- const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
-
- int idx = 0;
- int idx_arr[1024];
- int i, eob = -1;
- (void)iscan;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- // Pre-scan pass
- for (i = 0; i < n_coeffs; i++) {
- const int rc = scan[i];
- const int coeff = coeff_ptr[rc];
-
- // If the coefficient is out of the base ZBIN range, keep it for
- // quantization.
- if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0])
- idx_arr[idx++] = i;
- }
-
- // Quantization pass: only process the coefficients selected in
- // pre-scan pass. Note: idx can be zero.
- for (i = 0; i < idx; i++) {
- const int rc = scan[idx_arr[i]];
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp1 =
- abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
- const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
- const int abs_qcoeff = (int)((tmp2 * quant_shift_ptr[rc != 0]) >> 15);
- qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
- if (abs_qcoeff) eob = idx_arr[i];
- }
- }
- *eob_ptr = eob + 1;
+ highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr,
+ round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr,
+ dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
+ NULL, NULL, 1);
}
#if CONFIG_TX64X64
@@ -779,47 +267,9 @@
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
- const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 2),
- ROUND_POWER_OF_TWO(zbin_ptr[1], 2) };
- const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
-
- int idx = 0;
- int idx_arr[4096];
- int i, eob = -1;
- (void)iscan;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- // Pre-scan pass
- for (i = 0; i < n_coeffs; i++) {
- const int rc = scan[i];
- const int coeff = coeff_ptr[rc];
-
- // If the coefficient is out of the base ZBIN range, keep it for
- // quantization.
- if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0])
- idx_arr[idx++] = i;
- }
-
- // Quantization pass: only process the coefficients selected in
- // pre-scan pass. Note: idx can be zero.
- for (i = 0; i < idx; i++) {
- const int rc = scan[idx_arr[i]];
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp1 =
- abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2);
- const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
- const int abs_qcoeff = (int)((tmp2 * quant_shift_ptr[rc != 0]) >> 14);
- qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 4;
- if (abs_qcoeff) eob = idx_arr[i];
- }
- }
- *eob_ptr = eob + 1;
+ highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr,
+ round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr,
+ dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
+ NULL, NULL, 2);
}
#endif // CONFIG_TX64X64
-#endif // CONFIG_AOM_QM
diff --git a/aom_dsp/quantize.h b/aom_dsp/quantize.h
index fe49b83..e4bbfb9 100644
--- a/aom_dsp/quantize.h
+++ b/aom_dsp/quantize.h
@@ -19,32 +19,57 @@
extern "C" {
#endif
-#if CONFIG_AOM_QM
-void aom_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
- const int16_t *round_ptr, const int16_t quant_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr, uint16_t *eob_ptr,
- const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr);
-void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
- const int16_t *round_ptr, const int16_t quant_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr, uint16_t *eob_ptr,
- const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr);
-#if CONFIG_TX64X64
-void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
- const int16_t *round_ptr, const int16_t quant_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr, uint16_t *eob_ptr,
- const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr);
-#endif // CONFIG_TX64X64
+void quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *zbin_ptr,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
+ uint16_t *eob_ptr, const int16_t *scan,
+ const int16_t *iscan, const qm_val_t *qm_ptr,
+ const qm_val_t *iqm_ptr, const int log_scale);
+
void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan,
- const int16_t *iscan, const qm_val_t *qm_ptr,
- const qm_val_t *iqm_ptr);
+ const int16_t *iscan);
+
+#if CONFIG_HIGHBITDEPTH
+void aom_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *zbin_ptr,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan);
+
+void highbd_quantize_b_helper_c(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
+ const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
+ const qm_val_t *iqm_ptr, const int log_scale);
+#endif
+
+void aom_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
+ const int16_t *round_ptr, const int16_t quant_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t dequant_ptr, uint16_t *eob_ptr);
+void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
+ const int16_t *round_ptr, const int16_t quant_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t dequant_ptr, uint16_t *eob_ptr);
+#if CONFIG_TX64X64
+void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
+ const int16_t *round_ptr, const int16_t quant_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t dequant_ptr, uint16_t *eob_ptr);
+#endif // CONFIG_TX64X64
+
+#if CONFIG_AOM_QM
#if CONFIG_HIGHBITDEPTH
void aom_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
int skip_block, const int16_t *round_ptr,
@@ -64,32 +89,10 @@
const int16_t dequant_ptr, uint16_t *eob_ptr, const qm_val_t *qm_ptr,
const qm_val_t *iqm_ptr);
#endif // CONFIG_TX64X64
-void aom_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- int skip_block, const int16_t *zbin_ptr,
- const int16_t *round_ptr, const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan,
- const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr);
#endif // CONFIG_HIGHBITDEPTH
#else // CONFIG_AOM_QM
-void aom_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
- const int16_t *round_ptr, const int16_t quant_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr, uint16_t *eob_ptr);
-void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
- const int16_t *round_ptr, const int16_t quant_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr, uint16_t *eob_ptr);
-#if CONFIG_TX64X64
-void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
- const int16_t *round_ptr, const int16_t quant_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr, uint16_t *eob_ptr);
-#endif // CONFIG_TX64X64
#if CONFIG_HIGHBITDEPTH
void aom_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
int skip_block, const int16_t *round_ptr,
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 91af8dd..edc07f8 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -278,41 +278,7 @@
# ENCODEMB INVOKE
-if (aom_config("CONFIG_AOM_QM") eq "yes") {
- if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
- # the transform coefficients are held in 32-bit
- # values, so the assembler code for av1_block_error can no longer be used.
- add_proto qw/int64_t av1_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
- specialize qw/av1_block_error avx2/;
-
- add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
-
- add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
-
- if (aom_config("CONFIG_TX64X64") eq "yes") {
- add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
- }
-
- add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
- } else {
- add_proto qw/int64_t av1_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
- specialize qw/av1_block_error avx2 msa/, "$sse2_x86inc";
-
- add_proto qw/int64_t av1_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
- specialize qw/av1_block_error_fp neon/, "$sse2_x86inc";
-
- add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
-
- add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
-
- if (aom_config("CONFIG_TX64X64") eq "yes") {
- add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
- }
-
- add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
- }
-} else {
- if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
# the transform coefficients are held in 32-bit
# values, so the assembler code for av1_block_error can no longer be used.
add_proto qw/int64_t av1_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
@@ -327,9 +293,7 @@
if (aom_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
}
-
- add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- } else {
+} else {
add_proto qw/int64_t av1_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/av1_block_error sse2 avx2 msa/;
@@ -346,12 +310,26 @@
add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
}
- add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/av1_fdct8x8_quant sse2 ssse3 neon/;
- }
-
}
+#Unused
+#if (aom_config("CONFIG_AOM_QM") eq "yes") {
+# if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+# add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
+# } else {
+# add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
+# }
+#} else {
+# if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+#
+# add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+# } else {
+# add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+# specialize qw/av1_fdct8x8_quant sse2 ssse3 neon/;
+# }
+#
+#}
+
# fdct functions
add_proto qw/void av1_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
@@ -488,22 +466,8 @@
}
-if (aom_config("CONFIG_AOM_QM") eq "yes") {
- add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
-
- add_proto qw/void av1_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
-
- if (aom_config("CONFIG_TX64X64") eq "yes") {
- add_proto qw/void av1_highbd_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
- }
-
- add_proto qw/void av1_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
-} else {
- add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
- specialize qw/av1_highbd_quantize_fp sse4_1 avx2/;
-
- add_proto qw/void av1_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
-}
+ add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
+ specialize qw/av1_highbd_quantize_fp sse4_1 avx2/;
add_proto qw/void av1_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
diff --git a/av1/encoder/av1_quantize.c b/av1/encoder/av1_quantize.c
index dd53d42..b44e06f 100644
--- a/av1/encoder/av1_quantize.c
+++ b/av1/encoder/av1_quantize.c
@@ -443,11 +443,8 @@
const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan,
-#if CONFIG_AOM_QM
- const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr,
-#endif
- int log_scale) {
+ const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
+ const qm_val_t *iqm_ptr, int log_scale) {
int i, eob = -1;
// TODO(jingning) Decide the need of these arguments after the
// quantization process is completed.
@@ -464,35 +461,22 @@
for (i = 0; i < n_coeffs; i++) {
const int rc = scan[i];
const int coeff = coeff_ptr[rc];
-#if CONFIG_AOM_QM
- const qm_val_t wt = qm_ptr[rc];
- const qm_val_t iwt = iqm_ptr[rc];
+ const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+ const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
const int dequant =
(dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
-#endif
const int coeff_sign = (coeff >> 31);
- int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
int tmp32 = 0;
-#if CONFIG_AOM_QM
if (abs_coeff * wt >=
(dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) {
-#else
- if (abs_coeff >= (dequant_ptr[rc != 0] >> (1 + log_scale))) {
-#endif
abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
- abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
-#if CONFIG_AOM_QM
+ abs_coeff = clamp64(abs_coeff, INT16_MIN, INT16_MAX);
tmp32 = (int)((abs_coeff * wt * quant_ptr[rc != 0]) >>
- ((16 - log_scale) + AOM_QM_BITS));
+ (16 - log_scale + AOM_QM_BITS));
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant / (1 << log_scale);
-#else
- tmp32 = (int)((abs_coeff * quant_ptr[rc != 0]) >> (16 - log_scale));
- qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
- dqcoeff_ptr[rc] =
- qcoeff_ptr[rc] * dequant_ptr[rc != 0] / (1 << log_scale);
-#endif
}
if (tmp32) eob = i;
@@ -501,25 +485,60 @@
*eob_ptr = eob + 1;
}
+static void highbd_quantize_fp_helper_c(
+ const tran_low_t *coeff_ptr, intptr_t count, int skip_block,
+ const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
+ const qm_val_t *iqm_ptr, int log_scale) {
+ int i;
+ int eob = -1;
+ const int scale = 1 << log_scale;
+ const int shift = 16 - log_scale;
+ // TODO(jingning) Decide the need of these arguments after the
+ // quantization process is completed.
+ (void)zbin_ptr;
+ (void)quant_shift_ptr;
+ (void)iscan;
+
+ memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ // Quantization pass: All coefficients with index >= zero_flag are
+ // skippable. Note: zero_flag can be zero.
+ for (i = 0; i < count; i++) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+ const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+ const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+ const int dequant =
+ (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+ AOM_QM_BITS;
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ const int64_t tmp = abs_coeff + (round_ptr[rc != 0] >> log_scale);
+ const int abs_qcoeff =
+ (int)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS));
+ qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant / scale;
+ if (abs_qcoeff) eob = i;
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+
void av1_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan,
- const int16_t *iscan
-#if CONFIG_AOM_QM
- ,
- const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
-#endif
- ) {
+ const int16_t *iscan) {
quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
- dequant_ptr, eob_ptr, scan, iscan,
-#if CONFIG_AOM_QM
- qm_ptr, iqm_ptr,
-#endif
- 0);
+ dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 0);
}
void av1_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
@@ -528,19 +547,10 @@
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan
-#if CONFIG_AOM_QM
- ,
- const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
-#endif
- ) {
+ const int16_t *scan, const int16_t *iscan) {
quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
- dequant_ptr, eob_ptr, scan, iscan,
-#if CONFIG_AOM_QM
- qm_ptr, iqm_ptr,
-#endif
- 1);
+ dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 1);
}
#if CONFIG_TX64X64
@@ -550,19 +560,10 @@
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan
-#if CONFIG_AOM_QM
- ,
- const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
-#endif
- ) {
+ const int16_t *scan, const int16_t *iscan) {
quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
- dequant_ptr, eob_ptr, scan, iscan,
-#if CONFIG_AOM_QM
- qm_ptr, iqm_ptr,
-#endif
- 2);
+ dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 2);
}
#endif // CONFIG_TX64X64
@@ -576,58 +577,47 @@
#if CONFIG_AOM_QM
const qm_val_t *qm_ptr = qparam->qmatrix;
const qm_val_t *iqm_ptr = qparam->iqmatrix;
-#endif // CONFIG_AOM_QM
-
- switch (qparam->log_scale) {
- case 0:
- if (n_coeffs < 16) {
- // TODO(jingning): Need SIMD implementation for smaller block size
- // quantization.
- quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin,
- p->round_fp, p->quant_fp, p->quant_shift,
- qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
- sc->scan, sc->iscan,
-#if CONFIG_AOM_QM
- qm_ptr, iqm_ptr,
+ if (1 /*qm_ptr != NULL || iqm_ptr != NULL*/) {
+ quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp,
+ p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+ pd->dequant, eob_ptr, sc->scan, sc->iscan, qm_ptr,
+ iqm_ptr, qparam->log_scale);
+ } else {
#endif
- qparam->log_scale);
- } else {
- av1_quantize_fp(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp,
- p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
- pd->dequant, eob_ptr, sc->scan, sc->iscan
-#if CONFIG_AOM_QM
- ,
- qm_ptr, iqm_ptr
-#endif
- );
- }
- break;
- case 1:
- av1_quantize_fp_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin,
- p->round_fp, p->quant_fp, p->quant_shift,
- qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
- sc->scan, sc->iscan
-#if CONFIG_AOM_QM
- ,
- qm_ptr, iqm_ptr
-#endif
- );
- break;
+ switch (qparam->log_scale) {
+ case 0:
+ if (n_coeffs < 16) {
+ // TODO(jingning): Need SIMD implementation for smaller block size
+ // quantization.
+ quantize_fp_helper_c(
+ coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp,
+ p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, pd->dequant,
+ eob_ptr, sc->scan, sc->iscan, NULL, NULL, qparam->log_scale);
+ } else {
+ av1_quantize_fp(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp,
+ p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+ pd->dequant, eob_ptr, sc->scan, sc->iscan);
+ }
+ break;
+ case 1:
+ av1_quantize_fp_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin,
+ p->round_fp, p->quant_fp, p->quant_shift,
+ qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
+ sc->scan, sc->iscan);
+ break;
#if CONFIG_TX64X64
- case 2:
- av1_quantize_fp_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin,
- p->round_fp, p->quant_fp, p->quant_shift,
- qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
- sc->scan, sc->iscan
-#if CONFIG_AOM_QM
- ,
- qm_ptr, iqm_ptr
-#endif
- );
- break;
+ case 2:
+ av1_quantize_fp_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin,
+ p->round_fp, p->quant_fp, p->quant_shift,
+ qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
+ sc->scan, sc->iscan);
+ break;
#endif // CONFIG_TX64X64
- default: assert(0);
+ default: assert(0);
+ }
+#if CONFIG_AOM_QM
}
+#endif
}
void av1_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
@@ -640,43 +630,68 @@
#if CONFIG_AOM_QM
const qm_val_t *qm_ptr = qparam->qmatrix;
const qm_val_t *iqm_ptr = qparam->iqmatrix;
+ if (1 /*qm_ptr != NULL && iqm_ptr != NULL*/) {
+ quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
+ p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+ pd->dequant, eob_ptr, sc->scan, sc->iscan, qm_ptr,
+ iqm_ptr, qparam->log_scale);
+ } else {
#endif // CONFIG_AOM_QM
- switch (qparam->log_scale) {
- case 0:
- aom_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
- p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
- pd->dequant, eob_ptr, sc->scan, sc->iscan
-#if CONFIG_AOM_QM
- ,
- qm_ptr, iqm_ptr
-#endif
- );
- break;
- case 1:
- aom_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
- p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
- pd->dequant, eob_ptr, sc->scan, sc->iscan
-#if CONFIG_AOM_QM
- ,
- qm_ptr, iqm_ptr
-#endif
- );
- break;
+ switch (qparam->log_scale) {
+ case 0:
+ aom_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
+ p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+ pd->dequant, eob_ptr, sc->scan, sc->iscan);
+ break;
+ case 1:
+ aom_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
+ p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+ pd->dequant, eob_ptr, sc->scan, sc->iscan);
+ break;
#if CONFIG_TX64X64
- case 2:
- aom_quantize_b_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
- p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
- pd->dequant, eob_ptr, sc->scan, sc->iscan
-#if CONFIG_AOM_QM
- ,
- qm_ptr, iqm_ptr
-#endif
- );
- break;
+ case 2:
+ aom_quantize_b_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
+ p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+ pd->dequant, eob_ptr, sc->scan, sc->iscan);
+ break;
#endif // CONFIG_TX64X64
- default: assert(0);
+ default: assert(0);
+ }
+#if CONFIG_AOM_QM
}
+#endif
+}
+
+static void quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
+ int skip_block, const int16_t *round_ptr,
+ const int16_t quant, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr,
+ uint16_t *eob_ptr, const qm_val_t *qm_ptr,
+ const qm_val_t *iqm_ptr, const int log_scale) {
+ const int rc = 0;
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ int64_t tmp, eob = -1;
+ int32_t tmp32;
+ int dequant;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ const int wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+ const int iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+ tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale),
+ INT16_MIN, INT16_MAX);
+ tmp32 = (int32_t)((tmp * wt * quant) >> (16 - log_scale + AOM_QM_BITS));
+ qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
+ dequant = (dequant_ptr * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
+ dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / (1 << log_scale);
+ if (tmp32) eob = 0;
+ }
+ *eob_ptr = eob + 1;
}
void av1_quantize_dc_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
@@ -686,45 +701,18 @@
const SCAN_ORDER *sc, const QUANT_PARAM *qparam) {
// obsolete skip_block
const int skip_block = 0;
+ (void)sc;
+ assert(qparam->log_scale >= 0 && qparam->log_scale < (2 + CONFIG_TX64X64));
#if CONFIG_AOM_QM
const qm_val_t *qm_ptr = qparam->qmatrix;
const qm_val_t *iqm_ptr = qparam->iqmatrix;
-#endif // CONFIG_AOM_QM
-
- (void)sc;
-
- switch (qparam->log_scale) {
- case 0:
- aom_quantize_dc(coeff_ptr, (int)n_coeffs, skip_block, p->round,
- p->quant_fp[0], qcoeff_ptr, dqcoeff_ptr, pd->dequant[0],
- eob_ptr
-#if CONFIG_AOM_QM
- ,
- qm_ptr, iqm_ptr
+#else
+ const qm_val_t *qm_ptr = NULL;
+ const qm_val_t *iqm_ptr = NULL;
#endif
- );
- break;
- case 1:
- aom_quantize_dc_32x32(coeff_ptr, skip_block, p->round, p->quant_fp[0],
- qcoeff_ptr, dqcoeff_ptr, pd->dequant[0], eob_ptr
-#if CONFIG_AOM_QM
- ,
- qm_ptr, iqm_ptr
-#endif
- );
- break;
-#if CONFIG_TX64X64
- aom_quantize_dc_64x64(coeff_ptr, skip_block, p->round, p->quant_fp[0],
- qcoeff_ptr, dqcoeff_ptr, pd->dequant[0], eob_ptr
-#if CONFIG_AOM_QM
- ,
- qm_ptr, iqm_ptr
-#endif
- );
- case 2: break;
-#endif // CONFIG_TX64X64
- default: assert(0);
- }
+ quantize_dc(coeff_ptr, (int)n_coeffs, skip_block, p->round, p->quant_fp[0],
+ qcoeff_ptr, dqcoeff_ptr, pd->dequant[0], eob_ptr, qm_ptr, iqm_ptr,
+ qparam->log_scale);
}
#if CONFIG_NEW_QUANT
@@ -857,29 +845,31 @@
#if CONFIG_AOM_QM
const qm_val_t *qm_ptr = qparam->qmatrix;
const qm_val_t *iqm_ptr = qparam->iqmatrix;
+ if (1 /*qm_ptr != NULL && iqm_ptr != NULL*/) {
+ highbd_quantize_fp_helper_c(
+ coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp, p->quant_fp,
+ p->quant_shift, qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan,
+ sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
+ } else {
#endif // CONFIG_AOM_QM
- if (n_coeffs < 16) {
- // TODO(jingning): Need SIMD implementation for smaller block size
- // quantization.
- av1_highbd_quantize_fp_c(coeff_ptr, n_coeffs, skip_block, p->zbin,
- p->round_fp, p->quant_fp, p->quant_shift,
- qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
- sc->scan, sc->iscan,
-#if CONFIG_AOM_QM
- qm_ptr, iqm_ptr,
-#endif
- qparam->log_scale);
- return;
- }
+ if (n_coeffs < 16) {
+ // TODO(jingning): Need SIMD implementation for smaller block size
+ // quantization.
+ av1_highbd_quantize_fp_c(coeff_ptr, n_coeffs, skip_block, p->zbin,
+ p->round_fp, p->quant_fp, p->quant_shift,
+ qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
+ sc->scan, sc->iscan, qparam->log_scale);
+ return;
+ }
- av1_highbd_quantize_fp(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp,
- p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
- pd->dequant, eob_ptr, sc->scan, sc->iscan,
+ av1_highbd_quantize_fp(coeff_ptr, n_coeffs, skip_block, p->zbin,
+ p->round_fp, p->quant_fp, p->quant_shift, qcoeff_ptr,
+ dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan,
+ sc->iscan, qparam->log_scale);
#if CONFIG_AOM_QM
- qm_ptr, iqm_ptr,
+ }
#endif
- qparam->log_scale);
}
void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr,
@@ -894,86 +884,76 @@
#if CONFIG_AOM_QM
const qm_val_t *qm_ptr = qparam->qmatrix;
const qm_val_t *iqm_ptr = qparam->iqmatrix;
+ if (1 /*qm_ptr != NULL && iqm_ptr != NULL*/) {
+ highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin,
+ p->round, p->quant, p->quant_shift, qcoeff_ptr,
+ dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan,
+ sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
+ } else {
#endif // CONFIG_AOM_QM
- switch (qparam->log_scale) {
- case 0:
- if (LIKELY(n_coeffs >= 8)) {
- aom_highbd_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin,
- p->round, p->quant, p->quant_shift, qcoeff_ptr,
- dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan,
- sc->iscan
-#if CONFIG_AOM_QM
- ,
- qm_ptr, iqm_ptr
-#endif
- );
- } else {
- // TODO(luoyi): Need SIMD (e.g. sse2) for smaller block size
- // quantization
- aom_highbd_quantize_b_c(coeff_ptr, n_coeffs, skip_block, p->zbin,
+ switch (qparam->log_scale) {
+ case 0:
+ if (LIKELY(n_coeffs >= 8)) {
+ aom_highbd_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin,
p->round, p->quant, p->quant_shift, qcoeff_ptr,
dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan,
- sc->iscan
-#if CONFIG_AOM_QM
- ,
- qm_ptr, iqm_ptr
-#endif
- );
- }
- break;
- case 1:
- aom_highbd_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin,
+ sc->iscan);
+ } else {
+ // TODO(luoyi): Need SIMD (e.g. sse2) for smaller block size
+ // quantization
+ aom_highbd_quantize_b_c(coeff_ptr, n_coeffs, skip_block, p->zbin,
p->round, p->quant, p->quant_shift,
qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
- sc->scan, sc->iscan
-#if CONFIG_AOM_QM
- ,
- qm_ptr, iqm_ptr
-#endif
- );
- break;
+ sc->scan, sc->iscan);
+ }
+ break;
+ case 1:
+ aom_highbd_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin,
+ p->round, p->quant, p->quant_shift,
+ qcoeff_ptr, dqcoeff_ptr, pd->dequant,
+ eob_ptr, sc->scan, sc->iscan);
+ break;
#if CONFIG_TX64X64
- case 2:
- aom_highbd_quantize_b_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin,
- p->round, p->quant, p->quant_shift,
- qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
- sc->scan, sc->iscan
-#if CONFIG_AOM_QM
- ,
- qm_ptr, iqm_ptr
-#endif
- );
- break;
+ case 2:
+ aom_highbd_quantize_b_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin,
+ p->round, p->quant, p->quant_shift,
+ qcoeff_ptr, dqcoeff_ptr, pd->dequant,
+ eob_ptr, sc->scan, sc->iscan);
+ break;
#endif // CONFIG_TX64X64
- default: assert(0);
+ default: assert(0);
+ }
+#if CONFIG_AOM_QM
}
+#endif
}
static INLINE void highbd_quantize_dc(
const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr,
-#if CONFIG_AOM_QM
- const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr,
-#endif
- const int log_scale) {
+ const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr, const int log_scale) {
int eob = -1;
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-#if CONFIG_AOM_QM
- (void)qm_ptr;
- (void)iqm_ptr;
-#endif
+
if (!skip_block) {
+ const qm_val_t wt = qm_ptr != NULL ? qm_ptr[0] : (1 << AOM_QM_BITS);
+ const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[0] : (1 << AOM_QM_BITS);
const int coeff = coeff_ptr[0];
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp = abs_coeff + round_ptr[0];
- const int abs_qcoeff = (int)((tmp * quant) >> (16 - log_scale));
+ const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], log_scale);
+ const int64_t tmpw = tmp * wt;
+ const int abs_qcoeff =
+ (int)((tmpw * quant) >> (16 - log_scale + AOM_QM_BITS));
qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / (1 << log_scale);
+ const int dequant =
+ (dequant_ptr * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
+
+ dqcoeff_ptr[0] = (qcoeff_ptr[0] * dequant) / (1 << log_scale);
if (abs_qcoeff) eob = 0;
}
*eob_ptr = eob + 1;
@@ -991,17 +971,16 @@
#if CONFIG_AOM_QM
const qm_val_t *qm_ptr = qparam->qmatrix;
const qm_val_t *iqm_ptr = qparam->iqmatrix;
+#else
+ const qm_val_t *qm_ptr = NULL;
+ const qm_val_t *iqm_ptr = NULL;
#endif // CONFIG_AOM_QM
(void)sc;
highbd_quantize_dc(coeff_ptr, (int)n_coeffs, skip_block, p->round,
p->quant_fp[0], qcoeff_ptr, dqcoeff_ptr, pd->dequant[0],
- eob_ptr,
-#if CONFIG_AOM_QM
- qm_ptr, iqm_ptr,
-#endif
- qparam->log_scale);
+ eob_ptr, qm_ptr, iqm_ptr, qparam->log_scale);
}
#if CONFIG_NEW_QUANT
@@ -1517,61 +1496,16 @@
}
#endif // CONFIG_NEW_QUANT
-void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count,
- int skip_block, const int16_t *zbin_ptr,
- const int16_t *round_ptr,
- const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan,
-#if CONFIG_AOM_QM
- const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr,
-#endif
- int log_scale) {
- int i;
- int eob = -1;
- const int scale = 1 << log_scale;
- const int shift = 16 - log_scale;
- // TODO(jingning) Decide the need of these arguments after the
- // quantization process is completed.
- (void)zbin_ptr;
- (void)quant_shift_ptr;
- (void)iscan;
-
- memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- // Quantization pass: All coefficients with index >= zero_flag are
- // skippable. Note: zero_flag can be zero.
- for (i = 0; i < count; i++) {
- const int rc = scan[i];
- const int coeff = coeff_ptr[rc];
-#if CONFIG_AOM_QM
- const qm_val_t wt = qm_ptr[rc];
- const qm_val_t iwt = iqm_ptr[rc];
- const int dequant =
- (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
- AOM_QM_BITS;
-#endif
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp = abs_coeff + (round_ptr[rc != 0] >> log_scale);
-#if CONFIG_AOM_QM
- const int abs_qcoeff =
- (int)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS));
- qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant / scale;
-#else
- const int abs_qcoeff = (int)((tmp * quant_ptr[rc != 0]) >> shift);
- qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / scale;
-#endif
- if (abs_qcoeff) eob = i;
- }
- }
- *eob_ptr = eob + 1;
+void av1_highbd_quantize_fp_c(
+ const tran_low_t *coeff_ptr, intptr_t count, int skip_block,
+ const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan, int log_scale) {
+ highbd_quantize_fp_helper_c(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
+ quant_ptr, quant_shift_ptr, qcoeff_ptr,
+ dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
+ NULL, NULL, log_scale);
}
static void invert_quant(int16_t *quant, int16_t *shift, int d) {
diff --git a/av1/encoder/dct.c b/av1/encoder/dct.c
index 2ca4f34..858ea24 100644
--- a/av1/encoder/dct.c
+++ b/av1/encoder/dct.c
@@ -2087,12 +2087,8 @@
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan,
- const int16_t *iscan
-#if CONFIG_AOM_QM
- ,
- const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
-#endif
- ) {
+ const int16_t *iscan, const qm_val_t *qm_ptr,
+ const qm_val_t *iqm_ptr) {
int eob = -1;
int i, j;
@@ -2177,27 +2173,19 @@
for (i = 0; i < n_coeffs; i++) {
const int rc = scan[i];
const int coeff = coeff_ptr[rc];
-#if CONFIG_AOM_QM
- const qm_val_t wt = qm_ptr[rc];
- const qm_val_t iwt = iqm_ptr[rc];
+ const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+ const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
const int dequant =
(dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
-#endif
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
int64_t tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
int tmp32;
-#if CONFIG_AOM_QM
tmp32 = (int)((tmp * quant_ptr[rc != 0] * wt) >> (16 + AOM_QM_BITS));
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant;
-#else
- tmp32 = (int)((tmp * quant_ptr[rc != 0]) >> 16);
- qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
-#endif
if (tmp32) eob = i;
}