Remove obsoleted skip_block for highbd_quantize_b
1. Remove skip_block from function parameter list
2. Remove branch for skip_block == 1
3. Function list
aom_highbd_quantize_b_{c,sse2,avx2}
aom_highbd_quantize_b_32x32_{c,sse2,avx2}
aom_highbd_quantize_b_64x64_{c,sse2,avx2}
Change-Id: I7aef5798997eaa9997d84ae6d2b48d1cea556ed8
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index 50af6fd..81c9b34 100755
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -529,13 +529,13 @@
} # CONFIG_AV1_ENCODER
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
- add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/aom_highbd_quantize_b sse2 avx2/;
- add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/aom_highbd_quantize_b_32x32 sse2/;
- add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
} # CONFIG_AV1_ENCODER
diff --git a/aom_dsp/quantize.c b/aom_dsp/quantize.c
index e1601cc..d8151f0 100644
--- a/aom_dsp/quantize.c
+++ b/aom_dsp/quantize.c
@@ -77,8 +77,8 @@
}
void highbd_quantize_b_helper_c(
- const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
- const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
@@ -95,42 +95,40 @@
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
- if (!skip_block) {
- // Pre-scan pass
- for (i = 0; i < n_coeffs; i++) {
- const int rc = scan[i];
- const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
- const int coeff = coeff_ptr[rc] * wt;
+ // Pre-scan pass
+ for (i = 0; i < n_coeffs; i++) {
+ const int rc = scan[i];
+ const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+ const int coeff = coeff_ptr[rc] * wt;
- // If the coefficient is out of the base ZBIN range, keep it for
- // quantization.
- if (coeff >= (zbins[rc != 0] * (1 << AOM_QM_BITS)) ||
- coeff <= (nzbins[rc != 0] * (1 << AOM_QM_BITS)))
- idx_arr[idx++] = i;
- }
+ // If the coefficient is out of the base ZBIN range, keep it for
+ // quantization.
+ if (coeff >= (zbins[rc != 0] * (1 << AOM_QM_BITS)) ||
+ coeff <= (nzbins[rc != 0] * (1 << AOM_QM_BITS)))
+ idx_arr[idx++] = i;
+ }
- // Quantization pass: only process the coefficients selected in
- // pre-scan pass. Note: idx can be zero.
- for (i = 0; i < idx; i++) {
- const int rc = scan[idx_arr[i]];
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
- const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp1 =
- abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
- const int64_t tmpw = tmp1 * wt;
- const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
- const int abs_qcoeff = (int)((tmp2 * quant_shift_ptr[rc != 0]) >>
- (16 - log_scale + AOM_QM_BITS));
- qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- dequant = (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
- AOM_QM_BITS;
- const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
- dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
- if (abs_qcoeff) eob = idx_arr[i];
- }
+ // Quantization pass: only process the coefficients selected in
+ // pre-scan pass. Note: idx can be zero.
+ for (i = 0; i < idx; i++) {
+ const int rc = scan[idx_arr[i]];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+ const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ const int64_t tmp1 =
+ abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
+ const int64_t tmpw = tmp1 * wt;
+ const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
+ const int abs_qcoeff = (int)((tmp2 * quant_shift_ptr[rc != 0]) >>
+ (16 - log_scale + AOM_QM_BITS));
+ qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+ dequant =
+ (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
+ const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
+ dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
+ if (abs_qcoeff) eob = idx_arr[i];
}
*eob_ptr = eob + 1;
}
@@ -174,38 +172,38 @@
}
void aom_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- int skip_block, const int16_t *zbin_ptr,
- const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *zbin_ptr, const int16_t *round_ptr,
+ const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
- highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr,
- round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr,
+ highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
+ quant_ptr, quant_shift_ptr, qcoeff_ptr,
dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
NULL, NULL, 0);
}
void aom_highbd_quantize_b_32x32_c(
- const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
- const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
- highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr,
- round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr,
+ highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
+ quant_ptr, quant_shift_ptr, qcoeff_ptr,
dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
NULL, NULL, 1);
}
void aom_highbd_quantize_b_64x64_c(
- const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
- const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
- highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr,
- round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr,
+ highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
+ quant_ptr, quant_shift_ptr, qcoeff_ptr,
dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
NULL, NULL, 2);
}
diff --git a/aom_dsp/quantize.h b/aom_dsp/quantize.h
index a47bd05..47e78f3 100644
--- a/aom_dsp/quantize.h
+++ b/aom_dsp/quantize.h
@@ -38,16 +38,16 @@
const int16_t *iscan);
void highbd_quantize_b_helper_c(
- const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
- const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
const qm_val_t *iqm_ptr, const int log_scale);
void aom_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- int skip_block, const int16_t *zbin_ptr,
- const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *zbin_ptr, const int16_t *round_ptr,
+ const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
diff --git a/aom_dsp/x86/highbd_quantize_intrin_avx2.c b/aom_dsp/x86/highbd_quantize_intrin_avx2.c
index dea113a..b968920 100644
--- a/aom_dsp/x86/highbd_quantize_intrin_avx2.c
+++ b/aom_dsp/x86/highbd_quantize_intrin_avx2.c
@@ -110,7 +110,7 @@
}
void aom_highbd_quantize_b_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- int skip_block, const int16_t *zbin_ptr,
+ const int16_t *zbin_ptr,
const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
@@ -120,12 +120,23 @@
(void)scan;
const unsigned int step = 8;
- if (LIKELY(!skip_block)) {
- __m256i qp[5], coeff;
- init_qp(zbin_ptr, round_ptr, quant_ptr, dequant_ptr, quant_shift_ptr, qp);
- coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr);
+ __m256i qp[5], coeff;
+ init_qp(zbin_ptr, round_ptr, quant_ptr, dequant_ptr, quant_shift_ptr, qp);
+ coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr);
- __m256i eob = _mm256_setzero_si256();
+ __m256i eob = _mm256_setzero_si256();
+ quantize(qp, &coeff, iscan, qcoeff_ptr, dqcoeff_ptr, &eob);
+
+ coeff_ptr += step;
+ qcoeff_ptr += step;
+ dqcoeff_ptr += step;
+ iscan += step;
+ n_coeffs -= step;
+
+ update_qp(qp);
+
+ while (n_coeffs > 0) {
+ coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr);
quantize(qp, &coeff, iscan, qcoeff_ptr, dqcoeff_ptr, &eob);
coeff_ptr += step;
@@ -133,40 +144,17 @@
dqcoeff_ptr += step;
iscan += step;
n_coeffs -= step;
-
- update_qp(qp);
-
- while (n_coeffs > 0) {
- coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr);
- quantize(qp, &coeff, iscan, qcoeff_ptr, dqcoeff_ptr, &eob);
-
- coeff_ptr += step;
- qcoeff_ptr += step;
- dqcoeff_ptr += step;
- iscan += step;
- n_coeffs -= step;
- }
- {
- __m256i eob_s;
- eob_s = _mm256_shuffle_epi32(eob, 0xe);
- eob = _mm256_max_epi16(eob, eob_s);
- eob_s = _mm256_shufflelo_epi16(eob, 0xe);
- eob = _mm256_max_epi16(eob, eob_s);
- eob_s = _mm256_shufflelo_epi16(eob, 1);
- eob = _mm256_max_epi16(eob, eob_s);
- const __m128i final_eob = _mm_max_epi16(_mm256_castsi256_si128(eob),
- _mm256_extractf128_si256(eob, 1));
- *eob_ptr = _mm_extract_epi16(final_eob, 0);
- }
- } else {
- do {
- const __m256i zero = _mm256_setzero_si256();
- _mm256_storeu_si256((__m256i *)qcoeff_ptr, zero);
- _mm256_storeu_si256((__m256i *)dqcoeff_ptr, zero);
- qcoeff_ptr += step;
- dqcoeff_ptr += step;
- n_coeffs -= step;
- } while (n_coeffs > 0);
- *eob_ptr = 0;
+ }
+ {
+ __m256i eob_s;
+ eob_s = _mm256_shuffle_epi32(eob, 0xe);
+ eob = _mm256_max_epi16(eob, eob_s);
+ eob_s = _mm256_shufflelo_epi16(eob, 0xe);
+ eob = _mm256_max_epi16(eob, eob_s);
+ eob_s = _mm256_shufflelo_epi16(eob, 1);
+ eob = _mm256_max_epi16(eob, eob_s);
+ const __m128i final_eob = _mm_max_epi16(_mm256_castsi256_si128(eob),
+ _mm256_extractf128_si256(eob, 1));
+ *eob_ptr = _mm_extract_epi16(final_eob, 0);
}
}
diff --git a/aom_dsp/x86/highbd_quantize_intrin_sse2.c b/aom_dsp/x86/highbd_quantize_intrin_sse2.c
index 5570ca5..58e5f98 100644
--- a/aom_dsp/x86/highbd_quantize_intrin_sse2.c
+++ b/aom_dsp/x86/highbd_quantize_intrin_sse2.c
@@ -16,7 +16,7 @@
#include "aom_ports/mem.h"
void aom_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t count,
- int skip_block, const int16_t *zbin_ptr,
+ const int16_t *zbin_ptr,
const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
@@ -41,50 +41,48 @@
memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr));
memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr));
- if (!skip_block) {
- // Pre-scan pass
- for (i = ((int)count / 4) - 1; i >= 0; i--) {
- __m128i coeffs, cmp1, cmp2;
- int test;
- coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
- cmp1 = _mm_cmplt_epi32(coeffs, zbins[i != 0]);
- cmp2 = _mm_cmpgt_epi32(coeffs, nzbins[i != 0]);
- cmp1 = _mm_and_si128(cmp1, cmp2);
- test = _mm_movemask_epi8(cmp1);
- if (test == 0xffff)
- non_zero_regs--;
- else
- break;
- }
+ // Pre-scan pass
+ for (i = ((int)count / 4) - 1; i >= 0; i--) {
+ __m128i coeffs, cmp1, cmp2;
+ int test;
+ coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
+ cmp1 = _mm_cmplt_epi32(coeffs, zbins[i != 0]);
+ cmp2 = _mm_cmpgt_epi32(coeffs, nzbins[i != 0]);
+ cmp1 = _mm_and_si128(cmp1, cmp2);
+ test = _mm_movemask_epi8(cmp1);
+ if (test == 0xffff)
+ non_zero_regs--;
+ else
+ break;
+ }
- // Quantization pass:
- for (i = 0; i < non_zero_regs; i++) {
- __m128i coeffs, coeffs_sign, tmp1, tmp2;
- int test;
- int abs_coeff[4];
- int coeff_sign[4];
+ // Quantization pass:
+ for (i = 0; i < non_zero_regs; i++) {
+ __m128i coeffs, coeffs_sign, tmp1, tmp2;
+ int test;
+ int abs_coeff[4];
+ int coeff_sign[4];
- coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
- coeffs_sign = _mm_srai_epi32(coeffs, 31);
- coeffs = _mm_sub_epi32(_mm_xor_si128(coeffs, coeffs_sign), coeffs_sign);
- tmp1 = _mm_cmpgt_epi32(coeffs, zbins[i != 0]);
- tmp2 = _mm_cmpeq_epi32(coeffs, zbins[i != 0]);
- tmp1 = _mm_or_si128(tmp1, tmp2);
- test = _mm_movemask_epi8(tmp1);
- _mm_storeu_si128((__m128i *)abs_coeff, coeffs);
- _mm_storeu_si128((__m128i *)coeff_sign, coeffs_sign);
+ coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
+ coeffs_sign = _mm_srai_epi32(coeffs, 31);
+ coeffs = _mm_sub_epi32(_mm_xor_si128(coeffs, coeffs_sign), coeffs_sign);
+ tmp1 = _mm_cmpgt_epi32(coeffs, zbins[i != 0]);
+ tmp2 = _mm_cmpeq_epi32(coeffs, zbins[i != 0]);
+ tmp1 = _mm_or_si128(tmp1, tmp2);
+ test = _mm_movemask_epi8(tmp1);
+ _mm_storeu_si128((__m128i *)abs_coeff, coeffs);
+ _mm_storeu_si128((__m128i *)coeff_sign, coeffs_sign);
- for (j = 0; j < 4; j++) {
- if (test & (1 << (4 * j))) {
- int k = 4 * i + j;
- const int64_t tmp3 = abs_coeff[j] + round_ptr[k != 0];
- const int64_t tmp4 = ((tmp3 * quant_ptr[k != 0]) >> 16) + tmp3;
- const uint32_t abs_qcoeff =
- (uint32_t)((tmp4 * quant_shift_ptr[k != 0]) >> 16);
- qcoeff_ptr[k] = (int)(abs_qcoeff ^ coeff_sign[j]) - coeff_sign[j];
- dqcoeff_ptr[k] = qcoeff_ptr[k] * dequant_ptr[k != 0];
- if (abs_qcoeff) eob_i = iscan[k] > eob_i ? iscan[k] : eob_i;
- }
+ for (j = 0; j < 4; j++) {
+ if (test & (1 << (4 * j))) {
+ int k = 4 * i + j;
+ const int64_t tmp3 = abs_coeff[j] + round_ptr[k != 0];
+ const int64_t tmp4 = ((tmp3 * quant_ptr[k != 0]) >> 16) + tmp3;
+ const uint32_t abs_qcoeff =
+ (uint32_t)((tmp4 * quant_shift_ptr[k != 0]) >> 16);
+ qcoeff_ptr[k] = (int)(abs_qcoeff ^ coeff_sign[j]) - coeff_sign[j];
+ dqcoeff_ptr[k] = qcoeff_ptr[k] * dequant_ptr[k != 0];
+ if (abs_qcoeff) eob_i = iscan[k] > eob_i ? iscan[k] : eob_i;
}
}
}
@@ -92,8 +90,8 @@
}
void aom_highbd_quantize_b_32x32_sse2(
- const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
- const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
@@ -116,38 +114,35 @@
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
- if (!skip_block) {
- // Pre-scan pass
- for (i = 0; i < n_coeffs / 4; i++) {
- __m128i coeffs, cmp1, cmp2;
- int test;
- coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
- cmp1 = _mm_cmplt_epi32(coeffs, zbins[i != 0]);
- cmp2 = _mm_cmpgt_epi32(coeffs, nzbins[i != 0]);
- cmp1 = _mm_and_si128(cmp1, cmp2);
- test = _mm_movemask_epi8(cmp1);
- if (!(test & 0xf)) idx_arr[idx++] = i * 4;
- if (!(test & 0xf0)) idx_arr[idx++] = i * 4 + 1;
- if (!(test & 0xf00)) idx_arr[idx++] = i * 4 + 2;
- if (!(test & 0xf000)) idx_arr[idx++] = i * 4 + 3;
- }
+ // Pre-scan pass
+ for (i = 0; i < n_coeffs / 4; i++) {
+ __m128i coeffs, cmp1, cmp2;
+ int test;
+ coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
+ cmp1 = _mm_cmplt_epi32(coeffs, zbins[i != 0]);
+ cmp2 = _mm_cmpgt_epi32(coeffs, nzbins[i != 0]);
+ cmp1 = _mm_and_si128(cmp1, cmp2);
+ test = _mm_movemask_epi8(cmp1);
+ if (!(test & 0xf)) idx_arr[idx++] = i * 4;
+ if (!(test & 0xf0)) idx_arr[idx++] = i * 4 + 1;
+ if (!(test & 0xf00)) idx_arr[idx++] = i * 4 + 2;
+ if (!(test & 0xf000)) idx_arr[idx++] = i * 4 + 3;
+ }
- // Quantization pass: only process the coefficients selected in
- // pre-scan pass. Note: idx can be zero.
- for (i = 0; i < idx; i++) {
- const int rc = idx_arr[i];
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp1 =
- abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
- const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
- const uint32_t abs_qcoeff =
- (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> 15);
- qcoeff_ptr[rc] = (int)(abs_qcoeff ^ coeff_sign) - coeff_sign;
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
- if (abs_qcoeff) eob = iscan[idx_arr[i]] > eob ? iscan[idx_arr[i]] : eob;
- }
+ // Quantization pass: only process the coefficients selected in
+ // pre-scan pass. Note: idx can be zero.
+ for (i = 0; i < idx; i++) {
+ const int rc = idx_arr[i];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ const int64_t tmp1 = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
+ const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
+ const uint32_t abs_qcoeff =
+ (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> 15);
+ qcoeff_ptr[rc] = (int)(abs_qcoeff ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
+ if (abs_qcoeff) eob = iscan[idx_arr[i]] > eob ? iscan[idx_arr[i]] : eob;
}
*eob_ptr = eob + 1;
}
diff --git a/av1/encoder/av1_quantize.c b/av1/encoder/av1_quantize.c
index d0477b3..f971e56 100644
--- a/av1/encoder/av1_quantize.c
+++ b/av1/encoder/av1_quantize.c
@@ -392,28 +392,25 @@
tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
const SCAN_ORDER *sc,
const QUANT_PARAM *qparam) {
- // obsolete skip_block
- const int skip_block = 0;
const qm_val_t *qm_ptr = qparam->qmatrix;
const qm_val_t *iqm_ptr = qparam->iqmatrix;
if (qm_ptr != NULL && iqm_ptr != NULL) {
- highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
- p->round_QTX, p->quant_QTX, p->quant_shift_QTX,
- qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr,
- sc->scan, sc->iscan, qm_ptr, iqm_ptr,
- qparam->log_scale);
+ highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
+ p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
+ dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
+ sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
} else {
switch (qparam->log_scale) {
case 0:
if (LIKELY(n_coeffs >= 8)) {
- aom_highbd_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
- p->round_QTX, p->quant_QTX, p->quant_shift_QTX,
- qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
- eob_ptr, sc->scan, sc->iscan);
+ aom_highbd_quantize_b(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
+ p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
+ dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
+ sc->iscan);
} else {
// TODO(luoyi): Need SIMD (e.g. sse2) for smaller block size
// quantization
- aom_highbd_quantize_b_c(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
+ aom_highbd_quantize_b_c(coeff_ptr, n_coeffs, p->zbin_QTX,
p->round_QTX, p->quant_QTX,
p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr,
p->dequant_QTX, eob_ptr, sc->scan, sc->iscan);
@@ -421,15 +418,15 @@
break;
case 1:
aom_highbd_quantize_b_32x32(
- coeff_ptr, n_coeffs, skip_block, p->zbin_QTX, p->round_QTX,
- p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr,
- p->dequant_QTX, eob_ptr, sc->scan, sc->iscan);
+ coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
+ p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
+ eob_ptr, sc->scan, sc->iscan);
break;
case 2:
aom_highbd_quantize_b_64x64(
- coeff_ptr, n_coeffs, skip_block, p->zbin_QTX, p->round_QTX,
- p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr,
- p->dequant_QTX, eob_ptr, sc->scan, sc->iscan);
+ coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
+ p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
+ eob_ptr, sc->scan, sc->iscan);
break;
default: assert(0);
}
diff --git a/test/quantize_func_test.cc b/test/quantize_func_test.cc
index 97e73bf..d1c9ed7 100644
--- a/test/quantize_func_test.cc
+++ b/test/quantize_func_test.cc
@@ -27,15 +27,7 @@
namespace {
using libaom_test::ACMRandom;
-#define QUAN_PARAM_LIST \
- const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, \
- const int16_t *zbin_ptr, const int16_t *round_ptr, \
- const int16_t *quant_ptr, const int16_t *quant_shift_ptr, \
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, \
- const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, \
- const int16_t *iscan
-
-#define QUAN_PARAM_LIST_NO_SKIP \
+#define QUAN_PARAM_LIST \
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, \
const int16_t *round_ptr, const int16_t *quant_ptr, \
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, \
@@ -43,44 +35,34 @@
const int16_t *scan, const int16_t *iscan
typedef void (*QuantizeFunc)(QUAN_PARAM_LIST);
-typedef void (*QuantizeFuncHbd)(QUAN_PARAM_LIST_NO_SKIP, int log_scale);
-typedef void (*QuantizeFuncNoSkip)(QUAN_PARAM_LIST_NO_SKIP);
+typedef void (*QuantizeFuncHbd)(QUAN_PARAM_LIST, int log_scale);
#define HBD_QUAN_FUNC \
fn(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, \
qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, log_scale)
-#define LBD_QUAN_FUNC_NO_SKIP \
+#define LBD_QUAN_FUNC \
fn(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, \
qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan)
template <QuantizeFuncHbd fn>
void highbd_quan16x16_wrapper(QUAN_PARAM_LIST) {
- (void)skip_block;
const int log_scale = 0;
HBD_QUAN_FUNC;
}
template <QuantizeFuncHbd fn>
void highbd_quan32x32_wrapper(QUAN_PARAM_LIST) {
- (void)skip_block;
const int log_scale = 1;
HBD_QUAN_FUNC;
}
template <QuantizeFuncHbd fn>
void highbd_quan64x64_wrapper(QUAN_PARAM_LIST) {
- (void)skip_block;
const int log_scale = 2;
HBD_QUAN_FUNC;
}
-template <QuantizeFuncNoSkip fn>
-void lowbd_quan_wrapper(QUAN_PARAM_LIST) {
- (void)skip_block;
- LBD_QUAN_FUNC_NO_SKIP;
-}
-
typedef enum { TYPE_B, TYPE_DC, TYPE_FP } QuantType;
using ::testing::tuple;
@@ -125,7 +107,6 @@
void QuantizeRun(bool is_loop, int q = 0, int test_num = 1) {
tran_low_t *coeff_ptr = coeff_;
const intptr_t n_coeffs = coeff_num();
- const int skip_block = 0;
tran_low_t *qcoeff_ref = coeff_ptr + n_coeffs;
tran_low_t *dqcoeff_ref = qcoeff_ref + n_coeffs;
@@ -158,13 +139,13 @@
memset(qcoeff_ref, 0, 5 * n_coeffs * sizeof(*qcoeff_ref));
- quant_ref_(coeff_ptr, n_coeffs, skip_block, zbin, round, quant,
- quant_shift, qcoeff_ref, dqcoeff_ref, dequant, &eob[0],
- sc->scan, sc->iscan);
+ quant_ref_(coeff_ptr, n_coeffs, zbin, round, quant, quant_shift,
+ qcoeff_ref, dqcoeff_ref, dequant, &eob[0], sc->scan,
+ sc->iscan);
- ASM_REGISTER_STATE_CHECK(quant_(
- coeff_ptr, n_coeffs, skip_block, zbin, round, quant, quant_shift,
- qcoeff, dqcoeff, dequant, &eob[1], sc->scan, sc->iscan));
+ ASM_REGISTER_STATE_CHECK(quant_(coeff_ptr, n_coeffs, zbin, round, quant,
+ quant_shift, qcoeff, dqcoeff, dequant,
+ &eob[1], sc->scan, sc->iscan));
for (int j = 0; j < n_coeffs; ++j) {
ASSERT_EQ(qcoeff_ref[j], qcoeff[j])
@@ -286,7 +267,6 @@
TEST_P(QuantizeTest, DISABLED_Speed) {
tran_low_t *coeff_ptr = coeff_;
const intptr_t n_coeffs = coeff_num();
- const int skip_block = 0;
tran_low_t *qcoeff_ref = coeff_ptr + n_coeffs;
tran_low_t *dqcoeff_ref = qcoeff_ref + n_coeffs;
@@ -312,8 +292,8 @@
aom_usec_timer_start(&timer);
for (int n = 0; n < kNumTests; ++n) {
- quant_(coeff_ptr, n_coeffs, skip_block, zbin, round_fp, quant_fp,
- quant_shift, qcoeff, dqcoeff, dequant, eob, sc->scan, sc->iscan);
+ quant_(coeff_ptr, n_coeffs, zbin, round_fp, quant_fp, quant_shift, qcoeff,
+ dqcoeff, dequant, eob, sc->scan, sc->iscan);
}
aom_usec_timer_mark(&timer);
@@ -325,33 +305,24 @@
#if HAVE_AVX2
const QuantizeParam kQParamArrayAvx2[] = {
- make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
- &lowbd_quan_wrapper<av1_quantize_fp_avx2>, TX_16X16, TYPE_FP,
+ make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, TX_16X16, TYPE_FP,
AOM_BITS_8),
- make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
- &lowbd_quan_wrapper<av1_quantize_fp_avx2>, TX_4X16, TYPE_FP,
+ make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, TX_4X16, TYPE_FP,
AOM_BITS_8),
- make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
- &lowbd_quan_wrapper<av1_quantize_fp_avx2>, TX_16X4, TYPE_FP,
+ make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, TX_16X4, TYPE_FP,
AOM_BITS_8),
- make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
- &lowbd_quan_wrapper<av1_quantize_fp_avx2>, TX_32X8, TYPE_FP,
+ make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, TX_32X8, TYPE_FP,
AOM_BITS_8),
- make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
- &lowbd_quan_wrapper<av1_quantize_fp_avx2>, TX_8X32, TYPE_FP,
+ make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, TX_8X32, TYPE_FP,
AOM_BITS_8),
- make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_32x32_c>,
- &lowbd_quan_wrapper<av1_quantize_fp_32x32_avx2>, TX_32X32, TYPE_FP,
- AOM_BITS_8),
- make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_32x32_c>,
- &lowbd_quan_wrapper<av1_quantize_fp_32x32_avx2>, TX_16X64, TYPE_FP,
- AOM_BITS_8),
- make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_32x32_c>,
- &lowbd_quan_wrapper<av1_quantize_fp_32x32_avx2>, TX_64X16, TYPE_FP,
- AOM_BITS_8),
- make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_64x64_c>,
- &lowbd_quan_wrapper<av1_quantize_fp_64x64_avx2>, TX_64X64, TYPE_FP,
- AOM_BITS_8),
+ make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_avx2, TX_32X32,
+ TYPE_FP, AOM_BITS_8),
+ make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_avx2, TX_16X64,
+ TYPE_FP, AOM_BITS_8),
+ make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_avx2, TX_64X16,
+ TYPE_FP, AOM_BITS_8),
+ make_tuple(&av1_quantize_fp_64x64_c, &av1_quantize_fp_64x64_avx2, TX_64X64,
+ TYPE_FP, AOM_BITS_8),
make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>,
&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_avx2>, TX_16X16,
TYPE_FP, AOM_BITS_8),
@@ -393,20 +364,15 @@
#if HAVE_SSE2
const QuantizeParam kQParamArraySSE2[] = {
- make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
- &lowbd_quan_wrapper<av1_quantize_fp_sse2>, TX_16X16, TYPE_FP,
+ make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, TX_16X16, TYPE_FP,
AOM_BITS_8),
- make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
- &lowbd_quan_wrapper<av1_quantize_fp_sse2>, TX_4X16, TYPE_FP,
+ make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, TX_4X16, TYPE_FP,
AOM_BITS_8),
- make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
- &lowbd_quan_wrapper<av1_quantize_fp_sse2>, TX_16X4, TYPE_FP,
+ make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, TX_16X4, TYPE_FP,
AOM_BITS_8),
- make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
- &lowbd_quan_wrapper<av1_quantize_fp_sse2>, TX_8X32, TYPE_FP,
+ make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, TX_8X32, TYPE_FP,
AOM_BITS_8),
- make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
- &lowbd_quan_wrapper<av1_quantize_fp_sse2>, TX_32X8, TYPE_FP,
+ make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, TX_32X8, TYPE_FP,
AOM_BITS_8),
make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_sse2, TX_16X16,
TYPE_B, AOM_BITS_8),