Add improvements to the b quantizer
These are controlled by a macro because they do not currently have
SIMD.
Lowres Compression Gains:
Full Trellis Optimization: 0.02% PSNR Gain
Partial Trellis (disabled in tx search): 0.38% PSNR Gain
Trellis Fully Disabled: 0.68% PSNR Gain
Change-Id: I5a42e6ff9fcca3dc31691c14deb1f0599a0ff894
diff --git a/aom_dsp/quantize.c b/aom_dsp/quantize.c
index 62dbd86..5a28698 100644
--- a/aom_dsp/quantize.c
+++ b/aom_dsp/quantize.c
@@ -12,6 +12,67 @@
#include "aom_dsp/quantize.h"
#include "aom_mem/aom_mem.h"
+void quantize_b_adaptive_helper_c(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
+ const qm_val_t *iqm_ptr, const int log_scale) {
+ const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale),
+ ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale) };
+ const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
+ int i, non_zero_count = (int)n_coeffs, eob = -1;
+ (void)iscan;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ // Pre-scan pass
+ for (i = (int)n_coeffs - 1; i >= 0; i--) {
+ const int rc = scan[i];
+ const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+ const int coeff = coeff_ptr[rc] * wt;
+
+ int prescan_add = ROUND_POWER_OF_TWO(dequant_ptr[rc != 0] * 325, 7);
+ if (coeff < (zbins[rc != 0] * (1 << AOM_QM_BITS) + prescan_add) &&
+ coeff > (nzbins[rc != 0] * (1 << AOM_QM_BITS) - prescan_add))
+ non_zero_count--;
+ else
+ break;
+ }
+
+ // Quantization pass: All coefficients with index >= zero_flag are
+ // skippable. Note: zero_flag can be zero.
+ for (i = 0; i < non_zero_count; i++) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ int tmp32;
+
+ const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+ if (abs_coeff * wt >= (zbins[rc != 0] << AOM_QM_BITS)) {
+ int64_t tmp =
+ clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale),
+ INT16_MIN, INT16_MAX);
+ tmp *= wt;
+ tmp32 = (int)(((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
+ quant_shift_ptr[rc != 0]) >>
+ (16 - log_scale + AOM_QM_BITS)); // quantization
+ qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
+ const int iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+ const int dequant =
+ (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+ AOM_QM_BITS;
+ const tran_low_t abs_dqcoeff = (tmp32 * dequant) >> log_scale;
+ dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
+
+ if (tmp32) eob = i;
+ }
+ }
+ *eob_ptr = eob + 1;
+}
void quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr,
@@ -74,6 +135,64 @@
*eob_ptr = eob + 1;
}
+void highbd_quantize_b_adaptive_helper_c(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
+ const qm_val_t *iqm_ptr, const int log_scale) {
+ int i, eob = -1;
+ const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale),
+ ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale) };
+ const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
+ int dequant;
+ int idx_arr[4096];
+ (void)iscan;
+ int idx = 0;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ // Pre-scan pass
+ for (i = 0; i < n_coeffs; i++) {
+ const int rc = scan[i];
+ const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+ const int coeff = coeff_ptr[rc] * wt;
+
+ // If the coefficient is out of the base ZBIN range, keep it for
+ // quantization.
+ int prescan_add = ROUND_POWER_OF_TWO(dequant_ptr[rc != 0] * 325, 7);
+ if (coeff >= (zbins[rc != 0] * (1 << AOM_QM_BITS) + prescan_add) &&
+ coeff <= (nzbins[rc != 0] * (1 << AOM_QM_BITS) - prescan_add))
+ idx_arr[idx++] = i;
+ }
+
+ // Quantization pass: only process the coefficients selected in
+ // pre-scan pass. Note: idx can be zero.
+ for (i = 0; i < idx; i++) {
+ const int rc = scan[idx_arr[i]];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+ const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ const int64_t tmp1 =
+ abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
+ const int64_t tmpw = tmp1 * wt;
+ const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
+ const int abs_qcoeff = (int)((tmp2 * quant_shift_ptr[rc != 0]) >>
+ (16 - log_scale + AOM_QM_BITS));
+ qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+ dequant =
+ (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
+ const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
+ dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
+ if (abs_qcoeff) eob = idx_arr[i];
+ }
+ *eob_ptr = eob + 1;
+}
+
void highbd_quantize_b_helper_c(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
@@ -139,9 +258,17 @@
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
+#if ADAPTIVE_QUANT_B
+ quantize_b_adaptive_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
+ quant_ptr, quant_shift_ptr, qcoeff_ptr,
+ dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
+ NULL, NULL, 0);
+
+#else
quantize_b_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
eob_ptr, scan, iscan, NULL, NULL, 0);
+#endif
}
void aom_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
@@ -151,9 +278,16 @@
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
+#if ADAPTIVE_QUANT_B
+ quantize_b_adaptive_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
+ quant_ptr, quant_shift_ptr, qcoeff_ptr,
+ dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
+ NULL, NULL, 1);
+#else
quantize_b_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
eob_ptr, scan, iscan, NULL, NULL, 1);
+#endif
}
void aom_quantize_b_64x64_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
@@ -163,9 +297,16 @@
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
+#if ADAPTIVE_QUANT_B
+ quantize_b_adaptive_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
+ quant_ptr, quant_shift_ptr, qcoeff_ptr,
+ dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
+ NULL, NULL, 2);
+#else
quantize_b_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
eob_ptr, scan, iscan, NULL, NULL, 2);
+#endif
}
void aom_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
@@ -175,10 +316,17 @@
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
+#if ADAPTIVE_QUANT_B
+ highbd_quantize_b_adaptive_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
+ quant_ptr, quant_shift_ptr, qcoeff_ptr,
+ dqcoeff_ptr, dequant_ptr, eob_ptr, scan,
+ iscan, NULL, NULL, 0);
+#else
highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
quant_ptr, quant_shift_ptr, qcoeff_ptr,
dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
NULL, NULL, 0);
+#endif
}
void aom_highbd_quantize_b_32x32_c(
@@ -187,10 +335,17 @@
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
+#if ADAPTIVE_QUANT_B
+ highbd_quantize_b_adaptive_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
+ quant_ptr, quant_shift_ptr, qcoeff_ptr,
+ dqcoeff_ptr, dequant_ptr, eob_ptr, scan,
+ iscan, NULL, NULL, 1);
+#else
highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
quant_ptr, quant_shift_ptr, qcoeff_ptr,
dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
NULL, NULL, 1);
+#endif
}
void aom_highbd_quantize_b_64x64_c(
@@ -199,8 +354,15 @@
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
+#if ADAPTIVE_QUANT_B
+ highbd_quantize_b_adaptive_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
+ quant_ptr, quant_shift_ptr, qcoeff_ptr,
+ dqcoeff_ptr, dequant_ptr, eob_ptr, scan,
+ iscan, NULL, NULL, 2);
+#else
highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
quant_ptr, quant_shift_ptr, qcoeff_ptr,
dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
NULL, NULL, 2);
+#endif
}
diff --git a/aom_dsp/quantize.h b/aom_dsp/quantize.h
index c55ab23..df7f306 100644
--- a/aom_dsp/quantize.h
+++ b/aom_dsp/quantize.h
@@ -16,6 +16,8 @@
#include "aom_dsp/aom_dsp_common.h"
+#define ADAPTIVE_QUANT_B 0
+
#ifdef __cplusplus
extern "C" {
#endif
diff --git a/av1/encoder/av1_quantize.c b/av1/encoder/av1_quantize.c
index 21ab4db..9e48358 100644
--- a/av1/encoder/av1_quantize.c
+++ b/av1/encoder/av1_quantize.c
@@ -281,6 +281,31 @@
dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
} else {
+#if ADAPTIVE_QUANT_B
+ // TODO(sarahparker) These quantize_b optimizations need SIMD
+ // implementations
+ switch (qparam->log_scale) {
+ case 0:
+ aom_quantize_b_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
+ p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
+ dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
+ sc->iscan);
+ break;
+ case 1:
+ aom_quantize_b_32x32_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
+ p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
+ dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
+ sc->iscan);
+ break;
+ case 2:
+ aom_quantize_b_64x64_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
+ p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
+ dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
+ sc->iscan);
+ break;
+ default: assert(0);
+ }
+#else
switch (qparam->log_scale) {
case 0:
aom_quantize_b(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
@@ -302,6 +327,7 @@
break;
default: assert(0);
}
+#endif
}
}