Modify RDO for use with Daala TX constant-depth coeffs
Modify the portions of RDO using TX-domain coeff calaculations to deal
with TX_COEFF_DEPTH and constant-depth coefficient scaling. At
present, this represents no functional change.
subset-1:
monty-rest-of-stack-quant-s1@2017-11-13T14:38:43.774Z ->
monty-rest-of-stack-RDO-s1@2017-11-13T14:39:17.093Z
PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000
0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000
objective-1-fast --limit=4:
monty-rest-of-stack-quant-o1f4@2017-11-13T14:38:28.828Z ->
monty-rest-of-stack-RDO-o1f4@2017-11-13T14:38:57.951Z
PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000
0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000
Change-Id: I0fbc45e018f565f48e1fc8fdeabfcd6cb6fa62fe
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 910c1df..aef4fda 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -445,7 +445,9 @@
}
add_proto qw/int64_t av1_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
- specialize qw/av1_highbd_block_error sse2/;
+ if (aom_config("CONFIG_DAALA_TX") ne "yes") {
+ specialize qw/av1_highbd_block_error sse2/;
+ }
add_proto qw/void av1_highbd_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index dadea37..ca650a2 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -147,7 +147,14 @@
get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi);
const int16_t *const scan = scan_order->scan;
const int16_t *const nb = scan_order->neighbors;
+#if CONFIG_DAALA_TX
+ // This is one of the few places where RDO is done on coeffs; it
+ // expects the coeffs to be in Q3/D11, so we need to scale them.
+ int depth_shift = (TX_COEFF_DEPTH - 11) * 2;
+ int depth_round = depth_shift > 1 ? (1 << depth_shift >> 1) : 0;
+#else
const int shift = av1_get_tx_scale(tx_size);
+#endif
#if CONFIG_AOM_QM
int seg_id = xd->mi[0]->mbmi.segment_id;
// Use a flat matrix (i.e. no weighting) for 1D and Identity transforms
@@ -212,14 +219,19 @@
tail_token_costs[band_cur][ctx_cur]);
// accu_error does not change when x==0
} else {
- /* Computing distortion
- */
- // compute the distortion for the first candidate
- // and the distortion for quantizing to 0.
+/* Computing distortion
+ */
+// compute the distortion for the first candidate
+// and the distortion for quantizing to 0.
+#if CONFIG_DAALA_TX
+ int dx0 = coeff[rc];
+ const int64_t d0 = ((int64_t)dx0 * dx0 + depth_round) >> depth_shift;
+#else
int dx0 = abs(coeff[rc]) * (1 << shift);
dx0 >>= xd->bd - 8;
const int64_t d0 = (int64_t)dx0 * dx0;
+#endif
const int x_a = x - 2 * sz - 1;
int dqv;
#if CONFIG_AOM_QM
@@ -233,15 +245,29 @@
dqv = dequant_ptr[rc != 0];
#endif
+#if CONFIG_DAALA_TX
+ int dx = dqcoeff[rc] - coeff[rc];
+ const int64_t d2 = ((int64_t)dx * dx + depth_round) >> depth_shift;
+#else
int dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
dx = signed_shift_right(dx, xd->bd - 8);
const int64_t d2 = (int64_t)dx * dx;
+#endif
/* compute the distortion for the second candidate
* x_a = x - 2 * sz + 1;
*/
int64_t d2_a;
if (x_a != 0) {
+#if CONFIG_DAALA_TX
+#if CONFIG_NEW_QUANT
+ dx = av1_dequant_coeff_nuq(x, dqv, dequant_val[band_translate[i]]) -
+ coeff[rc];
+#else // CONFIG_NEW_QUANT
+ dx -= (dqv + sz) ^ sz;
+#endif // CONFIG_NEW_QUANT
+ d2_a = ((int64_t)dx * dx + depth_round) >> depth_shift;
+#else // CONFIG_DAALA_TX
#if CONFIG_NEW_QUANT
dx = av1_dequant_coeff_nuq(x, dqv, dequant_val[band_translate[i]]) -
(coeff[rc] * (1 << shift));
@@ -250,9 +276,11 @@
dx -= ((dqv >> (xd->bd - 8)) + sz) ^ sz;
#endif // CONFIG_NEW_QUANT
d2_a = (int64_t)dx * dx;
+#endif // CONFIG_DAALA_TX
} else {
d2_a = d0;
}
+
// Computing RD cost
int64_t base_bits;
// rate cost of x
@@ -321,6 +349,15 @@
int dqc_a = 0;
if (best_x || best_eob_x) {
if (x_a != 0) {
+#if CONFIG_DAALA_TX
+#if CONFIG_NEW_QUANT
+ dqc_a = av1_dequant_abscoeff_nuq(abs(x_a), dqv,
+ dequant_val[band_translate[i]]);
+ if (sz) dqc_a = -dqc_a;
+#else
+ dqc_a = x_a * dqv;
+#endif // CONFIG_NEW_QUANT
+#else // CONFIG_DAALA_TX
#if CONFIG_NEW_QUANT
dqc_a = av1_dequant_abscoeff_nuq(abs(x_a), dqv,
dequant_val[band_translate[i]]);
@@ -332,9 +369,10 @@
else
dqc_a = (x_a * dqv) >> shift;
#endif // CONFIG_NEW_QUANT
+#endif // CONFIG_DAALA_TX
} else {
dqc_a = 0;
- } // if (x_a != 0)
+ }
}
// record the better quantized value
diff --git a/av1/encoder/encodetxb.c b/av1/encoder/encodetxb.c
index 2f7481a..6c5f344 100644
--- a/av1/encoder/encodetxb.c
+++ b/av1/encoder/encodetxb.c
@@ -126,8 +126,16 @@
static INLINE int64_t get_coeff_dist(tran_low_t tcoeff, tran_low_t dqcoeff,
int shift) {
+#if CONFIG_DAALA_TX
+ int depth_shift = (TX_COEFF_DEPTH - 11) * 2;
+ int depth_round = depth_shift > 1 ? (1 << (depth_shift - 1)) : 0;
+ const int64_t diff = tcoeff - dqcoeff;
+ const int64_t error = diff * diff + depth_round >> depth_shift;
+ (void)shift;
+#else
const int64_t diff = (tcoeff - dqcoeff) * (1 << shift);
const int64_t error = diff * diff;
+#endif
return error;
}
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 1a56a93..416171e 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1588,7 +1588,12 @@
int64_t *ssz, int bd) {
int i;
int64_t error = 0, sqcoeff = 0;
+#if CONFIG_DAALA_TX
+ (void)bd;
+ int shift = 2 * (TX_COEFF_DEPTH - 11);
+#else
int shift = 2 * (bd - 8);
+#endif
int rounding = shift > 0 ? 1 << (shift - 1) : 0;
for (i = 0; i < block_size; i++) {
@@ -1926,7 +1931,13 @@
// not involve an inverse transform, but it is less accurate.
const int buffer_length = tx_size_2d[tx_size];
int64_t this_sse;
+// TX-domain results need to shift down to Q2/D10 to match pixel
+// domain distortion values which are in Q2^2
+#if CONFIG_DAALA_TX
+ int shift = (TX_COEFF_DEPTH - 10) * 2;
+#else
int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
+#endif
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
@@ -2106,7 +2117,13 @@
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
coeff_ctx, AV1_XFORM_QUANT_FP);
+// TX-domain results need to shift down to Q2/D10 to match pixel
+// domain distortion values which are in Q2^2
+#if CONFIG_DAALA_TX
+ const int shift = (TX_COEFF_DEPTH - 10) * 2;
+#else
const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
+#endif
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
const int buffer_length = tx_size_2d[tx_size];
@@ -3658,6 +3675,7 @@
const int coeff_ctx_one_byte = combine_entropy_contexts(*a, *l);
const uint8_t cur_joint_ctx = (coeff_ctx << 2) + coeff_ctx_one_byte;
+ // Note: tmp below is pixel distortion, not TX domain
tmp = pixel_diff_dist(x, plane, diff, diff_stride, blk_row, blk_col,
plane_bsize, txm_bsize);
@@ -3714,7 +3732,13 @@
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
coeff_ctx, AV1_XFORM_QUANT_FP);
+// TX-domain results need to shift down to Q2/D10 to match pixel
+// domain distortion values which are in Q2^2
+#if CONFIG_DAALA_TX
+ const int shift = (TX_COEFF_DEPTH - 10) * 2;
+#else
const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
+#endif
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
const int buffer_length = tx_size_2d[tx_size];
int64_t tmp_dist, tmp_sse;
diff --git a/test/error_block_test.cc b/test/error_block_test.cc
index 227065f..d1eac05 100644
--- a/test/error_block_test.cc
+++ b/test/error_block_test.cc
@@ -156,7 +156,7 @@
<< "First failed at test case " << first_failure;
}
-#if HAVE_SSE2 || HAVE_AVX
+#if (HAVE_SSE2 || HAVE_AVX) && !CONFIG_DAALA_TX
using std::tr1::make_tuple;
INSTANTIATE_TEST_CASE_P(