Scale PVQ input to OD_COEFF_SHIFT resolution.
This ensures we operate at the same precision that Daala uses, which matters
when activity masking is enabled, because of the gain companding.
Metrics from Patchset 4 (which had slightly incorrect rounding):
With activity masking (5 frames only):
av1_pvq_AM_ref_5f@2017-02-07T03:37:53.702Z -> av1_pvq_AM_derf_fix2_coeff_scaling_5f@2017-02-07T00:12:24.427Z
PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000
0.6653 | -12.3177 | -12.1858 | 0.3350 | 4.1013 | 2.0964 | -4.0539
In particular for Netflix_Crosswalk_1920x1080_60fps_8bit_420_60f.y4m
-5.0589 | -22.3077 | -21.2188 | -7.0389 | -3.3715 |-5.7794 | -13.1891
I.e., it fixes the large regression with AM on this sequence, and
substantially improves chroma (at a lesser cost to other metrics).
Without activity masking (5 frames only):
av1_pvq_ref_5f@2017-02-07T03:52:51.279Z -> av1_pvq_derf_fix2_coeff_scaling_5f@2017-02-07T00:12:48.873Z
PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000
0.0989 | -0.0322 | -0.0464 | 0.1883 | 0.0795 | 0.0579 | 0.0923
Change-Id: I46b808b7c8e4733465f8bebc8336dfd5b75783ec
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index fac99f9..5158ae7 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -1129,7 +1129,9 @@
int tx_size, TX_TYPE tx_type, int *rate, int speed, PVQ_INFO *pvq_info) {
const int tx_blk_size = tx_size_wide[tx_size];
PVQ_SKIP_TYPE ac_dc_coded;
- int quant_shift = get_tx_scale(tx_size);
+ /*TODO(tterribe): Handle CONFIG_AOM_HIGHBITDEPTH.*/
+ int coeff_shift = 3 - get_tx_scale(tx_size);
+ int rounding_mask;
int pvq_dc_quant;
int use_activity_masking = daala_enc->use_activity_masking;
int tell;
@@ -1148,15 +1150,16 @@
DECLARE_ALIGNED(16, int32_t, ref_int32[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
DECLARE_ALIGNED(16, int32_t, out_int32[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
+ assert(OD_COEFF_SHIFT >= 3);
// DC quantizer for PVQ
if (use_activity_masking)
pvq_dc_quant = OD_MAXI(
- 1, (quant[0] >> quant_shift) *
+ 1, (quant[0] << (OD_COEFF_SHIFT - 3)) *
daala_enc->state.pvq_qm_q4[plane]
[od_qm_get_index(tx_size, 0)] >>
4);
else
- pvq_dc_quant = OD_MAXI(1, quant[0] >> quant_shift);
+ pvq_dc_quant = OD_MAXI(1, quant[0] << (OD_COEFF_SHIFT - 3));
*eob = 0;
@@ -1174,8 +1177,8 @@
// copy int16 inputs to int32
for (i = 0; i < tx_blk_size * tx_blk_size; i++) {
- ref_int32[i] = ref_coeff_pvq[i];
- in_int32[i] = coeff_pvq[i];
+ ref_int32[i] = ref_coeff_pvq[i] << (OD_COEFF_SHIFT - coeff_shift);
+ in_int32[i] = coeff_pvq[i] << (OD_COEFF_SHIFT - coeff_shift);
}
#if PVQ_CHROMA_RD
@@ -1192,8 +1195,8 @@
ac_dc_coded = od_pvq_encode(
daala_enc, ref_int32, in_int32, out_int32,
- (int)quant[0] >> quant_shift, // scale/quantizer
- (int)quant[1] >> quant_shift, // scale/quantizer
+ quant[0] << (OD_COEFF_SHIFT - 3), // scale/quantizer
+ quant[1] << (OD_COEFF_SHIFT - 3), // scale/quantizer
plane, tx_size, OD_PVQ_BETA[use_activity_masking][plane][tx_size],
OD_ROBUST_STREAM,
0, // is_keyframe,
@@ -1220,7 +1223,12 @@
out_int32[0] += ref_int32[0];
// copy int32 result back to int16
- for (i = 0; i < tx_blk_size * tx_blk_size; i++) dqcoeff_pvq[i] = out_int32[i];
+ assert(OD_COEFF_SHIFT > coeff_shift);
+ rounding_mask = (1 << (OD_COEFF_SHIFT - coeff_shift - 1)) - 1;
+ for (i = 0; i < tx_blk_size * tx_blk_size; i++) {
+ dqcoeff_pvq[i] = (out_int32[i] + (out_int32[i] < 0) + rounding_mask) >>
+ (OD_COEFF_SHIFT - coeff_shift);
+ }
// Back to original coefficient order
od_coding_order_to_raster(dqcoeff, tx_blk_size, tx_type, dqcoeff_pvq,