Add quant and dequant functions for new quant matrices.
Change-Id: If0ba62428216fa343b9a37a3b349edba4103c00a
diff --git a/vp10/common/vp10_rtcd_defs.pl b/vp10/common/vp10_rtcd_defs.pl
index 9831bdd..6f8900a 100644
--- a/vp10/common/vp10_rtcd_defs.pl
+++ b/vp10/common/vp10_rtcd_defs.pl
@@ -309,37 +309,67 @@
# ENCODEMB INVOKE
-if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") {
-# the transform coefficients are held in 32-bit
-# values, so the assembler code for vp10_block_error can no longer be used.
- add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
- specialize qw/vp10_block_error/;
+if (vpx_config("CONFIG_AOM_QM") eq "yes") {
+ if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") {
+ # the transform coefficients are held in 32-bit
+ # values, so the assembler code for vp10_block_error can no longer be used.
+ add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
+ specialize qw/vp10_block_error/;
- add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp10_quantize_fp/;
+ add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
- add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp10_quantize_fp_32x32/;
+ add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
- add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp10_fdct8x8_quant/;
+ add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
+ specialize qw/vp10_fdct8x8_quant/;
+ } else {
+ add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
+ specialize qw/vp10_block_error avx2 msa/, "$sse2_x86inc";
+
+ add_proto qw/int64_t vp10_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
+ specialize qw/vp10_block_error_fp neon/, "$sse2_x86inc";
+
+ add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
+
+ add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
+
+ add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
+ }
} else {
- add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
- specialize qw/vp10_block_error avx2 msa/, "$sse2_x86inc";
+ if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") {
+ # the transform coefficients are held in 32-bit
+ # values, so the assembler code for vp10_block_error can no longer be used.
+ add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
+ specialize qw/vp10_block_error/;
- add_proto qw/int64_t vp10_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
- specialize qw/vp10_block_error_fp neon/, "$sse2_x86inc";
+ add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp10_quantize_fp/;
- add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp10_quantize_fp neon sse2/, "$ssse3_x86_64_x86inc";
+ add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp10_quantize_fp_32x32/;
- add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp10_quantize_fp_32x32/, "$ssse3_x86_64_x86inc";
+ add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp10_fdct8x8_quant/;
+ } else {
+ add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
+ specialize qw/vp10_block_error avx2 msa/, "$sse2_x86inc";
- add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp10_fdct8x8_quant sse2 ssse3 neon/;
+ add_proto qw/int64_t vp10_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
+ specialize qw/vp10_block_error_fp neon/, "$sse2_x86inc";
+
+ add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp10_quantize_fp neon sse2/, "$ssse3_x86_64_x86inc";
+
+ add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp10_quantize_fp_32x32/, "$ssse3_x86_64_x86inc";
+
+ add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp10_fdct8x8_quant sse2 ssse3 neon/;
+ }
+
}
+
# fdct functions
if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") {
@@ -574,11 +604,18 @@
add_proto qw/int64_t vp10_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
specialize qw/vp10_highbd_block_error sse2/;
- add_proto qw/void vp10_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp10_highbd_quantize_fp/;
+ if (vpx_config("CONFIG_AOM_QM") eq "yes") {
+ add_proto qw/void vp10_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
- add_proto qw/void vp10_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp10_highbd_quantize_fp_32x32/;
+ add_proto qw/void vp10_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
+ } else {
+ add_proto qw/void vp10_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp10_highbd_quantize_fp/;
+
+ add_proto qw/void vp10_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp10_highbd_quantize_fp_32x32/;
+
+ }
# fdct functions
add_proto qw/void vp10_highbd_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index 59e0c9b..3f7f098 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -712,8 +712,8 @@
}
}
-static INLINE TX_SIZE dec_get_uv_tx_size(const MB_MODE_INFO *mbmi, int n4_wl,
- int n4_hl) {
+static INLINE TX_SIZE
+ dec_get_uv_tx_size(const MB_MODE_INFO *mbmi, int n4_wl, int n4_hl) {
// get minimum log2 num4x4s dimension
const int x = VPXMIN(n4_wl, n4_hl);
return VPXMIN(mbmi->tx_size, x);
@@ -1120,8 +1120,13 @@
static void setup_segmentation_dequant(VP10_COMMON *const cm) {
// Build y/uv dequant values based on segmentation.
+ int i = 0;
+#if CONFIG_AOM_QM
+ int lossless;
+ int j = 0;
+ int qmindex;
+#endif
if (cm->seg.enabled) {
- int i;
for (i = 0; i < MAX_SEGMENTS; ++i) {
const int qindex = vp10_get_qindex(&cm->seg, i, cm->base_qindex);
cm->y_dequant[i][0] =
@@ -1131,6 +1136,19 @@
vp10_dc_quant(qindex, cm->uv_dc_delta_q, cm->bit_depth);
cm->uv_dequant[i][1] =
vp10_ac_quant(qindex, cm->uv_ac_delta_q, cm->bit_depth);
+#if CONFIG_AOM_QM
+ lossless = qindex == 0 && cm->y_dc_delta_q == 0 &&
+ cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
+ // NB: depends on base index so there is only 1 set per frame
+ // No quant weighting when lossless
+ qmindex = lossless ? QINDEX_RANGE - 1 : cm->base_qindex;
+ for (j = 0; j < TX_SIZES; ++j) {
+ cm->y_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmindex, 0, j, 1);
+ cm->y_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmindex, 0, j, 0);
+ cm->uv_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmindex, 1, j, 1);
+ cm->uv_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmindex, 1, j, 0);
+ }
+#endif
}
} else {
const int qindex = cm->base_qindex;
@@ -1143,6 +1161,18 @@
vp10_dc_quant(qindex, cm->uv_dc_delta_q, cm->bit_depth);
cm->uv_dequant[0][1] =
vp10_ac_quant(qindex, cm->uv_ac_delta_q, cm->bit_depth);
+#if CONFIG_AOM_QM
+ lossless = qindex == 0 && cm->y_dc_delta_q == 0 && cm->uv_dc_delta_q == 0 &&
+ cm->uv_ac_delta_q == 0;
+ // No quant weighting when lossless
+ qmindex = lossless ? QINDEX_RANGE - 1 : cm->base_qindex;
+ for (j = 0; j < TX_SIZES; ++j) {
+ cm->y_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmindex, 0, j, 1);
+ cm->y_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmindex, 0, j, 0);
+ cm->uv_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmindex, 1, j, 1);
+ cm->uv_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmindex, 1, j, 0);
+ }
+#endif
}
}
diff --git a/vp10/decoder/decoder.c b/vp10/decoder/decoder.c
index f979abf..438c2da 100644
--- a/vp10/decoder/decoder.c
+++ b/vp10/decoder/decoder.c
@@ -109,6 +109,10 @@
vp10_loop_filter_init(cm);
+#if CONFIG_AOM_QM
+ aom_qm_init(cm);
+#endif
+
cm->error.setjmp = 0;
vpx_get_worker_interface()->init(&pbi->lf_worker);
diff --git a/vp10/decoder/detokenize.c b/vp10/decoder/detokenize.c
index e57d1ef..d4da4bb 100644
--- a/vp10/decoder/detokenize.c
+++ b/vp10/decoder/detokenize.c
@@ -43,14 +43,25 @@
return val;
}
+#if CONFIG_AOM_QM
static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type,
tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq,
int ctx, const int16_t *scan, const int16_t *nb,
- vpx_reader *r) {
+ vpx_reader *r, const qm_val_t *iqm[2][TX_SIZES])
+#else
+static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type,
+ tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq,
+ int ctx, const int16_t *scan, const int16_t *nb,
+ vpx_reader *r)
+#endif
+{
FRAME_COUNTS *counts = xd->counts;
const int max_eob = 16 << (tx_size << 1);
const FRAME_CONTEXT *const fc = xd->fc;
const int ref = is_inter_block(&xd->mi[0]->mbmi);
+#if CONFIG_AOM_QM
+ const qm_val_t *iqmatrix = iqm[!ref][tx_size];
+#endif
int band, c = 0;
const vpx_prob(*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
fc->coef_probs[tx_size][type][ref];
@@ -183,6 +194,10 @@
}
}
}
+#if CONFIG_AOM_QM
+ dqv = ((iqmatrix[scan[c]] * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >>
+ AOM_QM_BITS;
+#endif
v = (val * dqv) >> dq_shift;
#if CONFIG_COEFFICIENT_RANGE_CHECKING
#if CONFIG_VPX_HIGHBITDEPTH
@@ -249,8 +264,16 @@
const int16_t *const dequant = pd->seg_dequant[seg_id];
const int ctx =
get_entropy_context(tx_size, pd->above_context + x, pd->left_context + y);
+#if CONFIG_AOM_QM
+ const int eob =
+ decode_coefs(xd, pd->plane_type, pd->dqcoeff, tx_size, dequant, ctx,
+ sc->scan, sc->neighbors, r, pd->seg_iqmatrix[seg_id]);
+#else
const int eob = decode_coefs(xd, pd->plane_type, pd->dqcoeff, tx_size,
dequant, ctx, sc->scan, sc->neighbors, r);
+#endif
dec_set_contexts(xd, pd, tx_size, eob > 0, x, y);
return eob;
}
+
+
diff --git a/vp10/encoder/dct.c b/vp10/encoder/dct.c
index 2a7ba7e..300a742 100644
--- a/vp10/encoder/dct.c
+++ b/vp10/encoder/dct.c
@@ -1021,7 +1021,12 @@
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
+ const int16_t *scan, const int16_t *iscan
+#if CONFIG_AOM_QM
+ ,
+ const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
+#endif
+ ) {
int eob = -1;
int i, j;
@@ -1107,16 +1112,29 @@
for (i = 0; i < n_coeffs; i++) {
const int rc = scan[i];
const int coeff = coeff_ptr[rc];
+#if CONFIG_AOM_QM
+ const qm_val_t wt = qm_ptr[rc];
+ const qm_val_t iwt = iqm_ptr[rc];
+ const int dequant =
+ (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+ AOM_QM_BITS;
+#endif
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
- tmp = (tmp * quant_ptr[rc != 0]) >> 16;
-
- qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ int64_t tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
+ int tmp32;
+#if CONFIG_AOM_QM
+ tmp32 = (tmp * quant_ptr[rc != 0] * wt) >> (16 + AOM_QM_BITS);
+ qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant;
+#else
+ tmp32 = (tmp * quant_ptr[rc != 0]) >> 16;
+ qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
+#endif
- if (tmp) eob = i;
+ if (tmp32) eob = i;
}
}
*eob_ptr = eob + 1;
diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c
index b2fbf13..afd38a3 100644
--- a/vp10/encoder/encodemb.c
+++ b/vp10/encoder/encodemb.c
@@ -99,6 +99,11 @@
const PLANE_TYPE type = pd->plane_type;
const int default_eob = 16 << (tx_size << 1);
const int mul = 1 + (tx_size == TX_32X32);
+#if CONFIG_AOM_QM
+ int seg_id = xd->mi[0]->mbmi.segment_id;
+ int is_intra = !is_inter_block(&xd->mi[0]->mbmi);
+ const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][tx_size];
+#endif
const int16_t *dequant_ptr = pd->dequant;
const uint8_t *const band_translate = get_band_translate(tx_size);
TX_TYPE tx_type = get_tx_type(type, xd, block);
@@ -138,7 +143,11 @@
for (i = eob; i-- > 0;) {
int base_bits, d2, dx;
+
const int rc = scan[i];
+#if CONFIG_AOM_QM
+ int iwt = iqmatrix[rc];
+#endif
int x = qcoeff[rc];
/* Only add a trellis state for non-zero coefficients. */
if (x) {
@@ -182,9 +191,16 @@
rate0 = tokens[next][0].rate;
rate1 = tokens[next][1].rate;
+#if CONFIG_AOM_QM
+ if ((abs(x) * dequant_ptr[rc != 0] * iwt >
+ ((abs(coeff[rc]) * mul) << AOM_QM_BITS)) &&
+ (abs(x) * dequant_ptr[rc != 0] * iwt <
+ ((abs(coeff[rc]) * mul + dequant_ptr[rc != 0]) << AOM_QM_BITS)))
+#else
if ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) &&
(abs(x) * dequant_ptr[rc != 0] <
abs(coeff[rc]) * mul + dequant_ptr[rc != 0]))
+#endif
shortcut = 1;
else
shortcut = 0;
@@ -239,6 +255,7 @@
#endif // CONFIG_VPX_HIGHBITDEPTH
d2 = dx * dx;
}
+
tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
tokens[i][1].error = d2 + (best ? error1 : error0);
tokens[i][1].next = next;
@@ -288,12 +305,21 @@
for (i = next; i < eob; i = next) {
const int x = tokens[i][best].qc;
const int rc = scan[i];
+#if CONFIG_AOM_QM
+ const int iwt = iqmatrix[rc];
+ const int dequant =
+ (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
+#endif
if (x) {
final_eob = i;
}
qcoeff[rc] = x;
+#if CONFIG_AOM_QM
+ dqcoeff[rc] = (x * dequant) / mul;
+#else
dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul;
+#endif
next = tokens[i][best].next;
best = best_index[i][best];
@@ -328,11 +354,17 @@
vp10_fwht4x4(src_diff, coeff, diff_stride);
} else {
switch (tx_type) {
- case DCT_DCT: vpx_fdct4x4(src_diff, coeff, diff_stride); break;
+ case DCT_DCT:
+ vpx_fdct4x4(src_diff, coeff, diff_stride);
+ break;
case ADST_DCT:
case DCT_ADST:
- case ADST_ADST: vp10_fht4x4(src_diff, coeff, diff_stride, tx_type); break;
- default: assert(0); break;
+ case ADST_ADST:
+ vp10_fht4x4(src_diff, coeff, diff_stride, tx_type);
+ break;
+ default:
+ assert(0);
+ break;
}
}
}
@@ -343,8 +375,12 @@
case DCT_DCT:
case ADST_DCT:
case DCT_ADST:
- case ADST_ADST: vp10_fht8x8(src_diff, coeff, diff_stride, tx_type); break;
- default: assert(0); break;
+ case ADST_ADST:
+ vp10_fht8x8(src_diff, coeff, diff_stride, tx_type);
+ break;
+ default:
+ assert(0);
+ break;
}
}
@@ -354,8 +390,12 @@
case DCT_DCT:
case ADST_DCT:
case DCT_ADST:
- case ADST_ADST: vp10_fht16x16(src_diff, coeff, diff_stride, tx_type); break;
- default: assert(0); break;
+ case ADST_ADST:
+ vp10_fht16x16(src_diff, coeff, diff_stride, tx_type);
+ break;
+ default:
+ assert(0);
+ break;
}
}
@@ -363,11 +403,17 @@
tran_low_t *coeff, int diff_stride,
TX_TYPE tx_type) {
switch (tx_type) {
- case DCT_DCT: fdct32x32(rd_transform, src_diff, coeff, diff_stride); break;
+ case DCT_DCT:
+ fdct32x32(rd_transform, src_diff, coeff, diff_stride);
+ break;
case ADST_DCT:
case DCT_ADST:
- case ADST_ADST: assert(0); break;
- default: assert(0); break;
+ case ADST_ADST:
+ assert(0);
+ break;
+ default:
+ assert(0);
+ break;
}
}
@@ -379,13 +425,17 @@
vp10_highbd_fwht4x4(src_diff, coeff, diff_stride);
} else {
switch (tx_type) {
- case DCT_DCT: vpx_highbd_fdct4x4(src_diff, coeff, diff_stride); break;
+ case DCT_DCT:
+ vpx_highbd_fdct4x4(src_diff, coeff, diff_stride);
+ break;
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
vp10_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
break;
- default: assert(0); break;
+ default:
+ assert(0);
+ break;
}
}
}
@@ -393,26 +443,34 @@
static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type) {
switch (tx_type) {
- case DCT_DCT: vpx_highbd_fdct8x8(src_diff, coeff, diff_stride); break;
+ case DCT_DCT:
+ vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
+ break;
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
vp10_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
break;
- default: assert(0); break;
+ default:
+ assert(0);
+ break;
}
}
static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type) {
switch (tx_type) {
- case DCT_DCT: vpx_highbd_fdct16x16(src_diff, coeff, diff_stride); break;
+ case DCT_DCT:
+ vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
+ break;
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
vp10_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
break;
- default: assert(0); break;
+ default:
+ assert(0);
+ break;
}
}
@@ -425,8 +483,12 @@
break;
case ADST_DCT:
case DCT_ADST:
- case ADST_ADST: assert(0); break;
- default: assert(0); break;
+ case ADST_ADST:
+ assert(0);
+ break;
+ default:
+ assert(0);
+ break;
}
}
#endif // CONFIG_VPX_HIGHBITDEPTH
@@ -444,6 +506,12 @@
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
uint16_t *const eob = &p->eobs[block];
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+#if CONFIG_AOM_QM
+ int seg_id = xd->mi[0]->mbmi.segment_id;
+ int is_intra = !is_inter_block(&xd->mi[0]->mbmi);
+ const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][is_intra][tx_size];
+ const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][tx_size];
+#endif
const int16_t *src_diff;
src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
@@ -455,21 +523,34 @@
vp10_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
p->round_fp, p->quant_fp, p->quant_shift,
qcoeff, dqcoeff, pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
+ scan_order->scan,
+#if !CONFIG_AOM_QM
+ scan_order->iscan);
+#else
+ scan_order->iscan, qmatrix, iqmatrix);
+#endif
break;
case TX_16X16:
vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
vp10_highbd_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan,
+#if !CONFIG_AOM_QM
scan_order->iscan);
+#else
+ scan_order->iscan, qmatrix, iqmatrix);
+#endif
break;
case TX_8X8:
vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
vp10_highbd_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan,
+#if !CONFIG_AOM_QM
scan_order->iscan);
+#else
+ scan_order->iscan, qmatrix, iqmatrix);
+#endif
break;
case TX_4X4:
if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
@@ -480,9 +561,14 @@
vp10_highbd_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan,
+#if !CONFIG_AOM_QM
scan_order->iscan);
+#else
+ scan_order->iscan, qmatrix, iqmatrix);
+#endif
break;
- default: assert(0);
+ default:
+ assert(0);
}
return;
}
@@ -494,19 +580,32 @@
vp10_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan,
+#if !CONFIG_AOM_QM
scan_order->iscan);
+#else
+ scan_order->iscan, qmatrix, iqmatrix);
+#endif
break;
case TX_16X16:
vpx_fdct16x16(src_diff, coeff, diff_stride);
vp10_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob, scan_order->scan, scan_order->iscan);
+ pd->dequant, eob, scan_order->scan,
+#if !CONFIG_AOM_QM
+ scan_order->iscan);
+#else
+ scan_order->iscan, qmatrix, iqmatrix);
+#endif
break;
case TX_8X8:
vp10_fdct8x8_quant(src_diff, diff_stride, coeff, 64, x->skip_block,
p->zbin, p->round_fp, p->quant_fp, p->quant_shift,
qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
+#if !CONFIG_AOM_QM
scan_order->iscan);
+#else
+ scan_order->iscan, qmatrix, iqmatrix);
+#endif
break;
case TX_4X4:
if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
@@ -516,9 +615,16 @@
}
vp10_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, eob, scan_order->scan, scan_order->iscan);
+ pd->dequant, eob, scan_order->scan,
+#if !CONFIG_AOM_QM
+ scan_order->iscan);
+#else
+ scan_order->iscan, qmatrix, iqmatrix);
+#endif
break;
- default: assert(0); break;
+ default:
+ assert(0);
+ break;
}
}
@@ -532,6 +638,12 @@
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
uint16_t *const eob = &p->eobs[block];
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+ int seg_id = xd->mi[0]->mbmi.segment_id;
+#if CONFIG_AOM_QM
+ int is_intra = !is_inter_block(&xd->mi[0]->mbmi);
+ const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][is_intra][tx_size];
+ const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][tx_size];
+#endif
const int16_t *src_diff;
src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
@@ -542,31 +654,49 @@
vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
vpx_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
- pd->dequant[0], eob);
+ pd->dequant[0],
+#if !CONFIG_AOM_QM
+ eob);
+#else
+ eob, qmatrix, iqmatrix);
+#endif
break;
case TX_16X16:
vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
vpx_highbd_quantize_dc(coeff, 256, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
+#if !CONFIG_AOM_QM
eob);
+#else
+ eob, qmatrix, iqmatrix);
+#endif
break;
case TX_8X8:
vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
vpx_highbd_quantize_dc(coeff, 64, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
+#if !CONFIG_AOM_QM
eob);
+#else
+ eob, qmatrix, iqmatrix);
+#endif
break;
case TX_4X4:
- if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+ if (xd->lossless[seg_id]) {
vp10_highbd_fwht4x4(src_diff, coeff, diff_stride);
} else {
vpx_highbd_fdct4x4(src_diff, coeff, diff_stride);
}
vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
+#if !CONFIG_AOM_QM
eob);
+#else
+ eob, qmatrix, iqmatrix);
+#endif
break;
- default: assert(0);
+ default:
+ assert(0);
}
return;
}
@@ -576,28 +706,50 @@
case TX_32X32:
vpx_fdct32x32_1(src_diff, coeff, diff_stride);
vpx_quantize_dc_32x32(coeff, x->skip_block, p->round, p->quant_fp[0],
- qcoeff, dqcoeff, pd->dequant[0], eob);
+ qcoeff, dqcoeff, pd->dequant[0],
+#if !CONFIG_AOM_QM
+ eob);
+#else
+ eob, qmatrix, iqmatrix);
+#endif
break;
case TX_16X16:
vpx_fdct16x16_1(src_diff, coeff, diff_stride);
vpx_quantize_dc(coeff, 256, x->skip_block, p->round, p->quant_fp[0],
- qcoeff, dqcoeff, pd->dequant[0], eob);
+ qcoeff, dqcoeff, pd->dequant[0],
+#if !CONFIG_AOM_QM
+ eob);
+#else
+ eob, qmatrix, iqmatrix);
+#endif
break;
case TX_8X8:
vpx_fdct8x8_1(src_diff, coeff, diff_stride);
vpx_quantize_dc(coeff, 64, x->skip_block, p->round, p->quant_fp[0],
- qcoeff, dqcoeff, pd->dequant[0], eob);
+ qcoeff, dqcoeff, pd->dequant[0],
+#if !CONFIG_AOM_QM
+ eob);
+#else
+ eob, qmatrix, iqmatrix);
+#endif
break;
case TX_4X4:
- if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+ if (xd->lossless[seg_id]) {
vp10_fwht4x4(src_diff, coeff, diff_stride);
} else {
vpx_fdct4x4(src_diff, coeff, diff_stride);
}
vpx_quantize_dc(coeff, 16, x->skip_block, p->round, p->quant_fp[0],
- qcoeff, dqcoeff, pd->dequant[0], eob);
+ qcoeff, dqcoeff, pd->dequant[0],
+#if !CONFIG_AOM_QM
+ eob);
+#else
+ eob, qmatrix, iqmatrix);
+#endif
break;
- default: assert(0); break;
+ default:
+ assert(0);
+ break;
}
}
@@ -614,6 +766,12 @@
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
uint16_t *const eob = &p->eobs[block];
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+ int seg_id = xd->mi[0]->mbmi.segment_id;
+#if CONFIG_AOM_QM
+ int is_intra = !is_inter_block(&xd->mi[0]->mbmi);
+ const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][is_intra][tx_size];
+ const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][tx_size];
+#endif
const int16_t *src_diff;
src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
@@ -626,31 +784,48 @@
vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
p->round, p->quant, p->quant_shift, qcoeff,
dqcoeff, pd->dequant, eob, scan_order->scan,
+#if !CONFIG_AOM_QM
scan_order->iscan);
+#else
+ scan_order->iscan, qmatrix, iqmatrix);
+#endif
break;
case TX_16X16:
highbd_fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type);
vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan,
+#if !CONFIG_AOM_QM
scan_order->iscan);
+#else
+ scan_order->iscan, qmatrix, iqmatrix);
+#endif
break;
case TX_8X8:
highbd_fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type);
vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan,
+#if !CONFIG_AOM_QM
scan_order->iscan);
+#else
+ scan_order->iscan, qmatrix, iqmatrix);
+#endif
break;
case TX_4X4:
vp10_highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type,
- xd->lossless[xd->mi[0]->mbmi.segment_id]);
+ xd->lossless[seg_id]);
vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan,
+#if !CONFIG_AOM_QM
scan_order->iscan);
+#else
+ scan_order->iscan, qmatrix, iqmatrix);
+#endif
break;
- default: assert(0);
+ default:
+ assert(0);
}
return;
}
@@ -662,28 +837,49 @@
vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan,
+#if !CONFIG_AOM_QM
scan_order->iscan);
+#else
+ scan_order->iscan, qmatrix, iqmatrix);
+#endif
break;
case TX_16X16:
fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type);
vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
+ scan_order->scan,
+#if !CONFIG_AOM_QM
+ scan_order->iscan);
+#else
+ scan_order->iscan, qmatrix, iqmatrix);
+#endif
break;
case TX_8X8:
fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type);
vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
+ scan_order->scan,
+#if !CONFIG_AOM_QM
+ scan_order->iscan);
+#else
+ scan_order->iscan, qmatrix, iqmatrix);
+#endif
break;
case TX_4X4:
vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type,
- xd->lossless[xd->mi[0]->mbmi.segment_id]);
+ xd->lossless[seg_id]);
vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
+ scan_order->scan,
+#if !CONFIG_AOM_QM
+ scan_order->iscan);
+#else
+ scan_order->iscan, qmatrix, iqmatrix);
+#endif
break;
- default: assert(0); break;
+ default:
+ assert(0);
+ break;
}
}
@@ -780,7 +976,9 @@
p->eobs[block], xd->bd, tx_type,
xd->lossless[xd->mi[0]->mbmi.segment_id]);
break;
- default: assert(0 && "Invalid transform size"); break;
+ default:
+ assert(0 && "Invalid transform size");
+ break;
}
return;
@@ -807,7 +1005,9 @@
vp10_inv_txfm_add_4x4(dqcoeff, dst, pd->dst.stride, p->eobs[block],
tx_type, xd->lossless[xd->mi[0]->mbmi.segment_id]);
break;
- default: assert(0 && "Invalid transform size"); break;
+ default:
+ assert(0 && "Invalid transform size");
+ break;
}
}
@@ -899,6 +1099,12 @@
uint8_t *src, *dst;
int16_t *src_diff;
uint16_t *eob = &p->eobs[block];
+ int seg_id = xd->mi[0]->mbmi.segment_id;
+#if CONFIG_AOM_QM
+ int is_intra = !is_inter_block(&xd->mi[0]->mbmi);
+ const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][is_intra][tx_size];
+ const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][tx_size];
+#endif
const int src_stride = p->src.stride;
const int dst_stride = pd->dst.stride;
dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)];
@@ -921,7 +1127,12 @@
vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
p->round, p->quant, p->quant_shift,
qcoeff, dqcoeff, pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
+ scan_order->scan,
+#if !CONFIG_AOM_QM
+ scan_order->iscan);
+#else
+ scan_order->iscan, qmatrix, iqmatrix);
+#endif
}
if (*eob)
vp10_highbd_inv_txfm_add_32x32(dqcoeff, dst, dst_stride, *eob, xd->bd,
@@ -935,7 +1146,11 @@
vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan,
+#if !CONFIG_AOM_QM
scan_order->iscan);
+#else
+ scan_order->iscan, qmatrix, iqmatrix);
+#endif
}
if (*eob)
vp10_highbd_inv_txfm_add_16x16(dqcoeff, dst, dst_stride, *eob, xd->bd,
@@ -949,7 +1164,11 @@
vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan,
+#if !CONFIG_AOM_QM
scan_order->iscan);
+#else
+ scan_order->iscan, qmatrix, iqmatrix);
+#endif
}
if (*eob)
vp10_highbd_inv_txfm_add_8x8(dqcoeff, dst, dst_stride, *eob, xd->bd,
@@ -960,11 +1179,15 @@
vpx_highbd_subtract_block(4, 4, src_diff, diff_stride, src,
src_stride, dst, dst_stride, xd->bd);
vp10_highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type,
- xd->lossless[mbmi->segment_id]);
+ xd->lossless[seg_id]);
vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan,
+#if !CONFIG_AOM_QM
scan_order->iscan);
+#else
+ scan_order->iscan, qmatrix, iqmatrix);
+#endif
}
if (*eob)
@@ -972,9 +1195,11 @@
// eob<=1 which is significant (not just an optimization) for the
// lossless case.
vp10_highbd_inv_txfm_add_4x4(dqcoeff, dst, dst_stride, *eob, xd->bd,
- tx_type, xd->lossless[mbmi->segment_id]);
+ tx_type, xd->lossless[seg_id]);
break;
- default: assert(0); return;
+ default:
+ assert(0);
+ return;
}
if (*eob) *(args->skip) = 0;
return;
@@ -991,7 +1216,11 @@
vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan,
+#if !CONFIG_AOM_QM
scan_order->iscan);
+#else
+ scan_order->iscan, qmatrix, iqmatrix);
+#endif
}
if (*eob)
vp10_inv_txfm_add_32x32(dqcoeff, dst, dst_stride, *eob, tx_type);
@@ -1003,7 +1232,12 @@
fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type);
vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
+ scan_order->scan,
+#if !CONFIG_AOM_QM
+ scan_order->iscan);
+#else
+ scan_order->iscan, qmatrix, iqmatrix);
+#endif
}
if (*eob)
vp10_inv_txfm_add_16x16(dqcoeff, dst, dst_stride, *eob, tx_type);
@@ -1015,7 +1249,12 @@
fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type);
vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
+ scan_order->scan,
+#if !CONFIG_AOM_QM
+ scan_order->iscan);
+#else
+ scan_order->iscan, qmatrix, iqmatrix);
+#endif
}
if (*eob) vp10_inv_txfm_add_8x8(dqcoeff, dst, dst_stride, *eob, tx_type);
break;
@@ -1024,10 +1263,15 @@
vpx_subtract_block(4, 4, src_diff, diff_stride, src, src_stride, dst,
dst_stride);
vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type,
- xd->lossless[mbmi->segment_id]);
+ xd->lossless[seg_id]);
vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
+ scan_order->scan,
+#if !CONFIG_AOM_QM
+ scan_order->iscan);
+#else
+ scan_order->iscan, qmatrix, iqmatrix);
+#endif
}
if (*eob) {
@@ -1035,10 +1279,12 @@
// which is significant (not just an optimization) for the lossless
// case.
vp10_inv_txfm_add_4x4(dqcoeff, dst, dst_stride, *eob, tx_type,
- xd->lossless[mbmi->segment_id]);
+ xd->lossless[seg_id]);
}
break;
- default: assert(0); break;
+ default:
+ assert(0);
+ break;
}
if (*eob) *(args->skip) = 0;
}
diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c
index 4231d4b..b7a18cc 100644
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c
@@ -1613,6 +1613,9 @@
* vp10_init_quantizer() for every frame.
*/
vp10_init_quantizer(cpi);
+#if CONFIG_AOM_QM
+ aom_qm_init(cm);
+#endif
vp10_loop_filter_init(cm);
diff --git a/vp10/encoder/quantize.c b/vp10/encoder/quantize.c
index 0688a69..820dc4a 100644
--- a/vp10/encoder/quantize.c
+++ b/vp10/encoder/quantize.c
@@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include <math.h>
#include "./vpx_dsp_rtcd.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
@@ -26,7 +25,12 @@
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan,
- const int16_t *iscan) {
+ const int16_t *iscan
+#if CONFIG_AOM_QM
+ ,
+ const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
+#endif
+ ) {
int i, eob = -1;
// TODO(jingning) Decide the need of these arguments after the
// quantization process is completed.
@@ -43,16 +47,29 @@
for (i = 0; i < n_coeffs; i++) {
const int rc = scan[i];
const int coeff = coeff_ptr[rc];
+#if CONFIG_AOM_QM
+ const qm_val_t wt = qm_ptr[rc];
+ const qm_val_t iwt = iqm_ptr[rc];
+ const int dequant =
+ (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+ AOM_QM_BITS;
+#endif
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
- tmp = (tmp * quant_ptr[rc != 0]) >> 16;
-
- qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ int64_t tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
+ int tmp32;
+#if CONFIG_AOM_QM
+ tmp32 = (tmp * wt * quant_ptr[rc != 0]) >> (16 + AOM_QM_BITS);
+ qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant;
+#else
+ tmp32 = (tmp * quant_ptr[rc != 0]) >> 16;
+ qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
+#endif
- if (tmp) eob = i;
+ if (tmp32) eob = i;
}
}
*eob_ptr = eob + 1;
@@ -66,7 +83,12 @@
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
+ const int16_t *scan, const int16_t *iscan
+#if CONFIG_AOM_QM
+ ,
+ const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
+#endif
+ ) {
int i;
int eob = -1;
// TODO(jingning) Decide the need of these arguments after the
@@ -84,12 +106,26 @@
for (i = 0; i < count; i++) {
const int rc = scan[i];
const int coeff = coeff_ptr[rc];
+#if CONFIG_AOM_QM
+ const qm_val_t wt = qm_ptr[rc];
+ const qm_val_t iwt = iqm_ptr[rc];
+ const int dequant =
+ (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+ AOM_QM_BITS;
+#endif
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
const int64_t tmp = abs_coeff + round_ptr[rc != 0];
+#if CONFIG_AOM_QM
+ const uint32_t abs_qcoeff =
+ (uint32_t)((tmp * quant_ptr[rc != 0] * wt) >> (16 + AOM_QM_BITS));
+ qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant;
+#else
const uint32_t abs_qcoeff = (uint32_t)((tmp * quant_ptr[rc != 0]) >> 16);
qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
+#endif
if (abs_qcoeff) eob = i;
}
}
@@ -106,7 +142,12 @@
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
+ const int16_t *scan, const int16_t *iscan
+#if CONFIG_AOM_QM
+ ,
+ const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
+#endif
+ ) {
int i, eob = -1;
(void)zbin_ptr;
(void)quant_shift_ptr;
@@ -119,19 +160,38 @@
for (i = 0; i < n_coeffs; i++) {
const int rc = scan[i];
const int coeff = coeff_ptr[rc];
+#if CONFIG_AOM_QM
+ const qm_val_t wt = qm_ptr[rc];
+ const qm_val_t iwt = iqm_ptr[rc];
+ const int dequant =
+ (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+ AOM_QM_BITS;
+ int64_t tmp = 0;
+#endif
const int coeff_sign = (coeff >> 31);
- int tmp = 0;
+ int tmp32 = 0;
int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+#if CONFIG_AOM_QM
+ if (abs_coeff * wt >= (dequant_ptr[rc != 0] << (AOM_QM_BITS - 2))) {
+#else
if (abs_coeff >= (dequant_ptr[rc != 0] >> 2)) {
+#endif
abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
- tmp = (abs_coeff * quant_ptr[rc != 0]) >> 15;
- qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
+#if CONFIG_AOM_QM
+ tmp = abs_coeff * wt;
+ tmp32 = (int)(tmp * quant_ptr[rc != 0]) >> (AOM_QM_BITS + 15);
+ qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 2;
+#else
+ tmp32 = (abs_coeff * quant_ptr[rc != 0]) >> 15;
+ qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant_ptr[rc != 0]) / 2;
+#endif
}
- if (tmp) eob = i;
+ if (tmp32) eob = i;
}
}
*eob_ptr = eob + 1;
@@ -143,7 +203,12 @@
const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
+ const int16_t *scan, const int16_t *iscan
+#if CONFIG_AOM_QM
+ ,
+ const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
+#endif
+ ) {
int i, eob = -1;
(void)zbin_ptr;
(void)quant_shift_ptr;
@@ -157,15 +222,32 @@
uint32_t abs_qcoeff = 0;
const int rc = scan[i];
const int coeff = coeff_ptr[rc];
+#if CONFIG_AOM_QM
+ const qm_val_t wt = qm_ptr[rc];
+ const qm_val_t iwt = iqm_ptr[rc];
+ const int dequant =
+ (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+ AOM_QM_BITS;
+#endif
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-
+#if CONFIG_AOM_QM
+ if (abs_coeff * wt >= (dequant_ptr[rc != 0] << (AOM_QM_BITS - 2))) {
+#else
if (abs_coeff >= (dequant_ptr[rc != 0] >> 2)) {
+#endif
const int64_t tmp =
abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
+#if CONFIG_AOM_QM
+ abs_qcoeff =
+ (uint32_t)((tmp * wt * quant_ptr[rc != 0]) >> (AOM_QM_BITS + 15));
+ qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+ dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 2;
+#else
abs_qcoeff = (uint32_t)((tmp * quant_ptr[rc != 0]) >> 15);
qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
+ dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant_ptr[rc != 0]) / 2;
+#endif
}
if (abs_qcoeff) eob = i;
@@ -180,6 +262,12 @@
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblock_plane *p = &x->plane[plane];
struct macroblockd_plane *pd = &xd->plane[plane];
+#if CONFIG_AOM_QM
+ int seg_id = xd->mi[0]->mbmi.segment_id;
+ int is_intra = is_inter_block(&xd->mi[0]->mbmi);
+ const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][is_intra][0];
+ const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][0];
+#endif
#if CONFIG_VPX_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -187,7 +275,12 @@
p->zbin, p->round, p->quant, p->quant_shift,
BLOCK_OFFSET(p->qcoeff, block),
BLOCK_OFFSET(pd->dqcoeff, block), pd->dequant,
+#if !CONFIG_AOM_QM
&p->eobs[block], scan, iscan);
+#else
+ &p->eobs[block], scan, iscan,
+ qmatrix, iqmatrix);
+#endif
return;
}
#endif
@@ -195,7 +288,12 @@
p->round, p->quant, p->quant_shift,
BLOCK_OFFSET(p->qcoeff, block),
BLOCK_OFFSET(pd->dqcoeff, block), pd->dequant, &p->eobs[block],
- scan, iscan);
+#if !CONFIG_AOM_QM
+ scan, iscan);
+#else
+ scan, iscan,
+ qmatrix, iqmatrix);
+#endif
}
static void invert_quant(int16_t *quant, int16_t *shift, int d) {
@@ -212,9 +310,12 @@
const int quant = vp10_dc_quant(q, 0, bit_depth);
#if CONFIG_VPX_HIGHBITDEPTH
switch (bit_depth) {
- case VPX_BITS_8: return q == 0 ? 64 : (quant < 148 ? 84 : 80);
- case VPX_BITS_10: return q == 0 ? 64 : (quant < 592 ? 84 : 80);
- case VPX_BITS_12: return q == 0 ? 64 : (quant < 2368 ? 84 : 80);
+ case VPX_BITS_8:
+ return q == 0 ? 64 : (quant < 148 ? 84 : 80);
+ case VPX_BITS_10:
+ return q == 0 ? 64 : (quant < 592 ? 84 : 80);
+ case VPX_BITS_12:
+ return q == 0 ? 64 : (quant < 2368 ? 84 : 80);
default:
assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
return -1;
@@ -288,6 +389,11 @@
const int qindex = vp10_get_qindex(&cm->seg, segment_id, cm->base_qindex);
const int rdmult = vp10_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q);
int i;
+#if CONFIG_AOM_QM
+ const int lossless = xd->lossless[segment_id];
+ // Quant matrix only depends on the base QP so there is only one set per frame
+ int qmlevel = lossless ? NUM_QM_LEVELS - 1 : aom_get_qmlevel(cm->base_qindex);
+#endif
// Y
x->plane[0].quant = quants->y_quant[qindex];
@@ -296,6 +402,12 @@
x->plane[0].quant_shift = quants->y_quant_shift[qindex];
x->plane[0].zbin = quants->y_zbin[qindex];
x->plane[0].round = quants->y_round[qindex];
+#if CONFIG_AOM_QM
+ memcpy(&xd->plane[0].seg_qmatrix[segment_id], cm->gqmatrix[qmlevel][0],
+ sizeof(cm->gqmatrix[qmlevel][0]));
+ memcpy(&xd->plane[0].seg_iqmatrix[segment_id], cm->giqmatrix[qmlevel][0],
+ sizeof(cm->giqmatrix[qmlevel][0]));
+#endif
xd->plane[0].dequant = cpi->y_dequant[qindex];
x->plane[0].quant_thred[0] = x->plane[0].zbin[0] * x->plane[0].zbin[0];
@@ -309,6 +421,12 @@
x->plane[i].quant_shift = quants->uv_quant_shift[qindex];
x->plane[i].zbin = quants->uv_zbin[qindex];
x->plane[i].round = quants->uv_round[qindex];
+#if CONFIG_AOM_QM
+ memcpy(&xd->plane[i].seg_qmatrix[segment_id], cm->gqmatrix[qmlevel][1],
+ sizeof(cm->gqmatrix[qmlevel][1]));
+ memcpy(&xd->plane[i].seg_iqmatrix[segment_id], cm->giqmatrix[qmlevel][1],
+ sizeof(cm->giqmatrix[qmlevel][1]));
+#endif
xd->plane[i].dequant = cpi->uv_dequant[qindex];
x->plane[i].quant_thred[0] = x->plane[i].zbin[0] * x->plane[i].zbin[0];