cdef-dist and daala-dist is runtime switchable
Use --tune=[cdef-dist|daala-dist] to enable them.
Also, this commit set the use_activity_masking of PVQ as 0 by deafult,
which means that PVQ assumes daala-dist is not used by default.
Since we're currently not signaling which metric the encoder did use
in the bitstream, the compile flag AV1_PVQ_ENABLE_ACTIVITY_MASKING will tell PVQ
whether daala-dist is used or not.
This commit is the last part of prep-work to remove DIST_8X8, CDEF_DIST,
and DAALA_DIST experimental flags.
Change-Id: Ia465b4d6fe64aac7f04852c8f9f4bac3409d2435
diff --git a/av1/common/pvq.h b/av1/common/pvq.h
index 17e54d4..4adf22f 100644
--- a/av1/common/pvq.h
+++ b/av1/common/pvq.h
@@ -19,11 +19,7 @@
extern const uint16_t EXP_CDF_TABLE[][16];
extern const uint16_t LAPLACE_OFFSET[];
-#if CONFIG_DAALA_DIST
-#define AV1_PVQ_ENABLE_ACTIVITY_MASKING (1)
-#else
#define AV1_PVQ_ENABLE_ACTIVITY_MASKING (0)
-#endif
# define PVQ_MAX_PARTITIONS (1 + 3*(OD_TXSIZES-1))
diff --git a/av1/common/quant_common.c b/av1/common/quant_common.c
index 2c07941..72f768a 100644
--- a/av1/common/quant_common.c
+++ b/av1/common/quant_common.c
@@ -14044,7 +14044,7 @@
};
#endif
-#if CONFIG_PVQ || CONFIG_DAALA_DIST
+#if CONFIG_PVQ
/* Quantization matrices for 8x8. For other block sizes, we currently just do
resampling. */
/* Flat quantization, i.e. optimize for PSNR. */
diff --git a/av1/common/quant_common.h b/av1/common/quant_common.h
index f28ffe7..2795425 100644
--- a/av1/common/quant_common.h
+++ b/av1/common/quant_common.h
@@ -99,7 +99,7 @@
}
#endif // CONFIG_NEW_QUANT
-#if CONFIG_PVQ || CONFIG_DAALA_DIST
+#if CONFIG_PVQ
extern const int OD_QM8_Q4_FLAT[];
extern const int OD_QM8_Q4_HVS[];
#endif
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index c60d4c6..6994d36 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -3659,10 +3659,6 @@
// TODO(yushin) : activity masking info needs be signaled by a bitstream
daala_dec->use_activity_masking = AV1_PVQ_ENABLE_ACTIVITY_MASKING;
-#if !CONFIG_DAALA_DIST
- daala_dec->use_activity_masking = 0;
-#endif
-
if (daala_dec->use_activity_masking)
daala_dec->qm = OD_HVS_QM;
else
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 36d7a75..9269876 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -3754,7 +3754,7 @@
decoded_8x8 = (uint8_t *)x->decoded_8x8;
dist_8x8 =
- av1_dist_8x8(cpi, xd, x->plane[0].src.buf - 4 * src_stride - 4,
+ av1_dist_8x8(cpi, x, x->plane[0].src.buf - 4 * src_stride - 4,
src_stride, decoded_8x8, 8, BLOCK_8X8, 8, 8, 8, 8,
x->qindex)
<< 4;
@@ -3939,7 +3939,7 @@
#endif
decoded_8x8 = (uint8_t *)x->decoded_8x8;
- dist_8x8 = av1_dist_8x8(cpi, xd, x->plane[0].src.buf - 4 * src_stride,
+ dist_8x8 = av1_dist_8x8(cpi, x, x->plane[0].src.buf - 4 * src_stride,
src_stride, decoded_8x8, 8, BLOCK_8X8, 8, 8, 8,
8, x->qindex)
<< 4;
@@ -4121,7 +4121,7 @@
decoded_8x8 = (uint8_t *)x->decoded_8x8;
dist_8x8 =
- av1_dist_8x8(cpi, xd, x->plane[0].src.buf - 4, src_stride,
+ av1_dist_8x8(cpi, x, x->plane[0].src.buf - 4, src_stride,
decoded_8x8, 8, BLOCK_8X8, 8, 8, 8, 8, x->qindex)
<< 4;
sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + dist_8x8;
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 5400c5b..4f5865d 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -58,8 +58,6 @@
#include "av1/encoder/tokenize.h"
#if CONFIG_PVQ
#include "av1/encoder/pvq_encoder.h"
-#endif // CONFIG_PVQ
-#if CONFIG_PVQ || CONFIG_DAALA_DIST
#include "av1/common/pvq.h"
#endif // CONFIG_PVQ
#if CONFIG_DUAL_FILTER
@@ -669,7 +667,7 @@
#define FAST_EXT_TX_CORR_MARGIN 0.5
#define FAST_EXT_TX_EDST_MARGIN 0.3
-#if CONFIG_CDEF_DIST
+#if CONFIG_DIST_8X8
static uint64_t cdef_dist_8x8_16bit(uint16_t *dst, int dstride, uint16_t *src,
int sstride, int coeff_shift) {
uint64_t svar = 0;
@@ -713,9 +711,7 @@
return dist;
}
-#endif // CONFIG_CDEF_DIST
-#if CONFIG_DAALA_DIST
static int od_compute_var_4x4(uint16_t *x, int stride) {
int sum;
int s2;
@@ -919,103 +915,83 @@
}
return sum;
}
-#endif // CONFIG_DAALA_DIST
-#if CONFIG_DIST_8X8
-#define NEW_FUTURE_DIST 0
-int64_t av1_dist_8x8(const AV1_COMP *const cpi, const MACROBLOCKD *xd,
+int64_t av1_dist_8x8(const AV1_COMP *const cpi, const MACROBLOCK *x,
const uint8_t *src, int src_stride, const uint8_t *dst,
int dst_stride, const BLOCK_SIZE tx_bsize, int bsw,
int bsh, int visible_w, int visible_h, int qindex) {
int64_t d = 0;
-
-#if CONFIG_DAALA_DIST || NEW_FUTURE_DIST || CONFIG_CDEF_DIST
int i, j;
+ const MACROBLOCKD *xd = &x->e_mbd;
DECLARE_ALIGNED(16, uint16_t, orig[MAX_TX_SQUARE]);
DECLARE_ALIGNED(16, uint16_t, rec[MAX_TX_SQUARE]);
- (void)cpi;
- (void)tx_bsize;
-#endif // CONFIG_DAALA_DIST || NEW_FUTURE_DIST
-#if !CONFIG_HIGHBITDEPTH
- (void)xd;
-#endif
-
-#if !CONFIG_DAALA_DIST
- (void)qindex;
-#endif
-
-#if !CONFIG_DAALA_DIST || !NEW_FUTURE_DIST
- (void)xd;
- (void)bsw, (void)bsh;
- (void)visible_w, (void)visible_h;
-#endif
-
-#if CONFIG_DAALA_DIST || NEW_FUTURE_DIST || CONFIG_CDEF_DIST
- assert((bsw & 0x07) == 0);
- assert((bsh & 0x07) == 0);
+ if (x->tune_metric == AOM_TUNE_CDEF_DIST ||
+ x->tune_metric == AOM_TUNE_DAALA_DIST) {
+ assert((bsw & 0x07) == 0);
+ assert((bsh & 0x07) == 0);
#if CONFIG_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- for (j = 0; j < bsh; j++)
- for (i = 0; i < bsw; i++)
- orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
-
- if ((bsw == visible_w) && (bsh == visible_h)) {
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
for (j = 0; j < bsh; j++)
for (i = 0; i < bsw; i++)
- rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
- } else {
- for (j = 0; j < visible_h; j++)
- for (i = 0; i < visible_w; i++)
- rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
+ orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
- if (visible_w < bsw) {
+ if ((bsw == visible_w) && (bsh == visible_h)) {
for (j = 0; j < bsh; j++)
- for (i = visible_w; i < bsw; i++)
- rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
- }
-
- if (visible_h < bsh) {
- for (j = visible_h; j < bsh; j++)
for (i = 0; i < bsw; i++)
- rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
- }
- }
- } else {
-#endif
- for (j = 0; j < bsh; j++)
- for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
+ rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
+ } else {
+ for (j = 0; j < visible_h; j++)
+ for (i = 0; i < visible_w; i++)
+ rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
- if ((bsw == visible_w) && (bsh == visible_h)) {
- for (j = 0; j < bsh; j++)
- for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i];
+ if (visible_w < bsw) {
+ for (j = 0; j < bsh; j++)
+ for (i = visible_w; i < bsw; i++)
+ rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
+ }
+
+ if (visible_h < bsh) {
+ for (j = visible_h; j < bsh; j++)
+ for (i = 0; i < bsw; i++)
+ rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
+ }
+ }
} else {
- for (j = 0; j < visible_h; j++)
- for (i = 0; i < visible_w; i++)
- rec[j * bsw + i] = dst[j * dst_stride + i];
+#endif
+ for (j = 0; j < bsh; j++)
+ for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
- if (visible_w < bsw) {
+ if ((bsw == visible_w) && (bsh == visible_h)) {
for (j = 0; j < bsh; j++)
- for (i = visible_w; i < bsw; i++)
- rec[j * bsw + i] = src[j * src_stride + i];
- }
+ for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i];
+ } else {
+ for (j = 0; j < visible_h; j++)
+ for (i = 0; i < visible_w; i++)
+ rec[j * bsw + i] = dst[j * dst_stride + i];
- if (visible_h < bsh) {
- for (j = visible_h; j < bsh; j++)
- for (i = 0; i < bsw; i++) rec[j * bsw + i] = src[j * src_stride + i];
+ if (visible_w < bsw) {
+ for (j = 0; j < bsh; j++)
+ for (i = visible_w; i < bsw; i++)
+ rec[j * bsw + i] = src[j * src_stride + i];
+ }
+
+ if (visible_h < bsh) {
+ for (j = visible_h; j < bsh; j++)
+ for (i = 0; i < bsw; i++)
+ rec[j * bsw + i] = src[j * src_stride + i];
+ }
}
- }
#if CONFIG_HIGHBITDEPTH
- }
+ }
#endif // CONFIG_HIGHBITDEPTH
-#endif // CONFIG_DAALA_DIST || NEW_FUTURE_DIST
+ }
-#if CONFIG_DAALA_DIST
- d = (int64_t)od_compute_dist(orig, rec, bsw, bsh, qindex);
-#elif CONFIG_CDEF_DIST
- {
+ if (x->tune_metric == AOM_TUNE_DAALA_DIST) {
+ d = (int64_t)od_compute_dist(orig, rec, bsw, bsh, qindex);
+ } else if (x->tune_metric == AOM_TUNE_CDEF_DIST) {
int coeff_shift = AOMMAX(xd->bd - 8, 0);
for (i = 0; i < bsh; i += 8) {
@@ -1028,95 +1004,70 @@
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
d = ((uint64_t)d) >> 2 * coeff_shift;
#endif
+ } else {
+ // Otherwise, MSE by default
+ unsigned sse;
+ // TODO(Any): Use even faster function which does not calculate variance
+ cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
+ d = sse;
}
-#elif NEW_FUTURE_DIST
- // Call new 8x8-wise distortion function here, for example
- for (i = 0; i < bsh; i += 8) {
- for (j = 0; j < bsw; j += 8) {
- d +=
- av1_compute_dist_8x8(&orig[i * bsw + j], &rec[i * bsw + j], bsw, bsh);
- }
- }
-#else
- // Otherwise, MSE by default
- unsigned sse;
- // TODO(Any): Use even faster function which does not calculate variance
- cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
- d = sse;
-#endif // CONFIG_DAALA_DIST
return d;
}
-static int64_t av1_dist_8x8_diff(const MACROBLOCKD *xd, const uint8_t *src,
+static int64_t av1_dist_8x8_diff(const MACROBLOCK *x, const uint8_t *src,
int src_stride, const int16_t *diff,
int diff_stride, int bsw, int bsh,
int visible_w, int visible_h, int qindex) {
int64_t d = 0;
-
-#if CONFIG_DAALA_DIST || NEW_FUTURE_DIST || CONFIG_CDEF_DIST
int i, j;
+ const MACROBLOCKD *xd = &x->e_mbd;
DECLARE_ALIGNED(16, uint16_t, orig[MAX_TX_SQUARE]);
DECLARE_ALIGNED(16, int16_t, diff16[MAX_TX_SQUARE]);
-#endif // CONFIG_DAALA_DIST || NEW_FUTURE_DIST
-#if !CONFIG_HIGHBITDEPTH
- (void)xd;
-#endif
-
-#if !CONFIG_DAALA_DIST
- (void)qindex;
-#endif
-
-#if !CONFIG_DAALA_DIST || !NEW_FUTURE_DIST
- (void)xd;
- (void)src, (void)src_stride;
- (void)bsw, (void)bsh;
- (void)visible_w, (void)visible_h;
-#endif
-
-#if CONFIG_DAALA_DIST || NEW_FUTURE_DIST || CONFIG_CDEF_DIST
- assert((bsw & 0x07) == 0);
- assert((bsh & 0x07) == 0);
+ if (x->tune_metric == AOM_TUNE_CDEF_DIST ||
+ x->tune_metric == AOM_TUNE_DAALA_DIST) {
+ assert((bsw & 0x07) == 0);
+ assert((bsh & 0x07) == 0);
#if CONFIG_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- for (j = 0; j < bsh; j++)
- for (i = 0; i < bsw; i++)
- orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
- } else {
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ for (j = 0; j < bsh; j++)
+ for (i = 0; i < bsw; i++)
+ orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
+ } else {
#endif
- for (j = 0; j < bsh; j++)
- for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
+ for (j = 0; j < bsh; j++)
+ for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
#if CONFIG_HIGHBITDEPTH
- }
+ }
#endif // CONFIG_HIGHBITDEPTH
- if ((bsw == visible_w) && (bsh == visible_h)) {
- for (j = 0; j < bsh; j++)
- for (i = 0; i < bsw; i++) diff16[j * bsw + i] = diff[j * diff_stride + i];
- } else {
- for (j = 0; j < visible_h; j++)
- for (i = 0; i < visible_w; i++)
- diff16[j * bsw + i] = diff[j * diff_stride + i];
-
- if (visible_w < bsw) {
+ if ((bsw == visible_w) && (bsh == visible_h)) {
for (j = 0; j < bsh; j++)
- for (i = visible_w; i < bsw; i++) diff16[j * bsw + i] = 0;
- }
+ for (i = 0; i < bsw; i++)
+ diff16[j * bsw + i] = diff[j * diff_stride + i];
+ } else {
+ for (j = 0; j < visible_h; j++)
+ for (i = 0; i < visible_w; i++)
+ diff16[j * bsw + i] = diff[j * diff_stride + i];
- if (visible_h < bsh) {
- for (j = visible_h; j < bsh; j++)
- for (i = 0; i < bsw; i++) diff16[j * bsw + i] = 0;
+ if (visible_w < bsw) {
+ for (j = 0; j < bsh; j++)
+ for (i = visible_w; i < bsw; i++) diff16[j * bsw + i] = 0;
+ }
+
+ if (visible_h < bsh) {
+ for (j = visible_h; j < bsh; j++)
+ for (i = 0; i < bsw; i++) diff16[j * bsw + i] = 0;
+ }
}
}
-#endif // CONFIG_DAALA_DIST || NEW_FUTURE_DIST
-#if CONFIG_DAALA_DIST
- d = (int64_t)od_compute_dist_diff(orig, diff16, bsw, bsh, qindex);
-#elif CONFIG_CDEF_DIST
- {
+ if (x->tune_metric == AOM_TUNE_DAALA_DIST) {
+ d = (int64_t)od_compute_dist_diff(orig, diff16, bsw, bsh, qindex);
+ } else if (x->tune_metric == AOM_TUNE_CDEF_DIST) {
int coeff_shift = AOMMAX(xd->bd - 8, 0);
DECLARE_ALIGNED(16, uint16_t, dst16[MAX_TX_SQUARE]);
@@ -1134,19 +1085,10 @@
}
// Don't scale 'd' for HBD since it will be done by caller side for diff
// input
+ } else {
+ // Otherwise, MSE by default
+ d = aom_sum_squares_2d_i16(diff, diff_stride, bsw, bsh);
}
-#elif NEW_FUTURE_DIST
- // Call new 8x8-wise distortion function (with diff input) here, for example
- for (i = 0; i < bsh; i += 8) {
- for (j = 0; j < bsw; j += 8) {
- d += av1_compute_dist_8x8_diff(&orig[i * bsw + j], &diff16[i * bsw + j],
- bsw, bsh);
- }
- }
-#else
- // Otherwise, MSE by default
- d = aom_sum_squares_2d_i16(diff, diff_stride, bsw, bsh);
-#endif // CONFIG_DAALA_DIST
return d;
}
@@ -1802,7 +1744,7 @@
#if CONFIG_DIST_8X8
if (x->using_dist_8x8 && plane == 0 && txb_cols >= 8 && txb_rows >= 8)
- return av1_dist_8x8(cpi, xd, src, src_stride, dst, dst_stride, tx_bsize,
+ return av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride, tx_bsize,
txb_cols, txb_rows, visible_cols, visible_rows,
x->qindex);
#endif // CONFIG_DIST_8X8
@@ -1851,7 +1793,7 @@
#if CONFIG_DIST_8X8
if (x->using_dist_8x8 && plane == 0 && txb_width >= 8 && txb_height >= 8)
- return av1_dist_8x8_diff(xd, src, src_stride, diff, diff_stride, txb_width,
+ return av1_dist_8x8_diff(x, src, src_stride, diff, diff_stride, txb_width,
txb_height, visible_cols, visible_rows, x->qindex);
else
#endif
@@ -2287,9 +2229,9 @@
}
#endif // CONFIG_HIGHBITDEPTH
- tmp1 = av1_dist_8x8(cpi, xd, src, src_stride, pred8, bw, bsize, bw, bh, bw,
- bh, qindex);
- tmp2 = av1_dist_8x8(cpi, xd, src, src_stride, dst, dst_stride, bsize, bw, bh,
+ tmp1 = av1_dist_8x8(cpi, x, src, src_stride, pred8, bw, bsize, bw, bh, bw, bh,
+ qindex);
+ tmp2 = av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride, bsize, bw, bh,
bw, bh, qindex);
if (!is_inter_block(mbmi)) {
@@ -3832,7 +3774,7 @@
use_activity_masking = mb->daala_enc.use_activity_masking;
#endif // CONFIG_PVQ
// Daala-defined distortion computed for the block of 8x8 pixels
- total_distortion = av1_dist_8x8(cpi, xd, src, src_stride, dst, dst_stride,
+ total_distortion = av1_dist_8x8(cpi, mb, src, src_stride, dst, dst_stride,
BLOCK_8X8, 8, 8, 8, 8, mb->qindex)
<< 4;
}
@@ -5005,7 +4947,7 @@
DECLARE_ALIGNED(16, uint8_t, pred8[8 * 8]);
#endif // CONFIG_HIGHBITDEPTH
- dist_8x8 = av1_dist_8x8(cpi, xd, src, src_stride, dst, dst_stride,
+ dist_8x8 = av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride,
BLOCK_8X8, 8, 8, 8, 8, qindex) *
16;
sum_rd_stats.sse = dist_8x8;
@@ -5062,7 +5004,7 @@
#if CONFIG_HIGHBITDEPTH
}
#endif // CONFIG_HIGHBITDEPTH
- dist_8x8 = av1_dist_8x8(cpi, xd, src, src_stride, pred8, 8, BLOCK_8X8, 8,
+ dist_8x8 = av1_dist_8x8(cpi, x, src, src_stride, pred8, 8, BLOCK_8X8, 8,
8, 8, 8, qindex) *
16;
sum_rd_stats.dist = dist_8x8;
diff --git a/av1/encoder/rdopt.h b/av1/encoder/rdopt.h
index e1b5b2d..4923952 100644
--- a/av1/encoder/rdopt.h
+++ b/av1/encoder/rdopt.h
@@ -71,7 +71,7 @@
OUTPUT_STATUS output_status);
#if CONFIG_DIST_8X8
-int64_t av1_dist_8x8(const AV1_COMP *const cpi, const MACROBLOCKD *xd,
+int64_t av1_dist_8x8(const AV1_COMP *const cpi, const MACROBLOCK *x,
const uint8_t *src, int src_stride, const uint8_t *dst,
int dst_stride, const BLOCK_SIZE tx_bsize, int bsw,
int bsh, int visible_w, int visible_h, int qindex);