Introduce runtime switch for dist_8x8
Even if 'dist-8x8' is enabled with configure,
the dist-8x8 is not acutally enabled (so, no change in encoding behaviour)
until the command line option, '--enable-dist-8x8=1" is used.
The cdef-dist and daala-dist can not be enabled by a command line option yet.
This commit is a part of prep-work to remove DIST_8X8, CDEF_DIST,
and DAALA_DIST experimental flags.
Change-Id: I5c2df90f837b32f44e756572a19272dfb4c3dff4
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 12d2aef..ab4f72f 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -18,6 +18,9 @@
#include "av1/encoder/encint.h"
#endif
#include "av1/common/mvref_common.h"
+#if CONFIG_DIST_8X8
+#include "aom/aomcx.h"
+#endif
#ifdef __cplusplus
extern "C" {
@@ -275,6 +278,8 @@
int pvq_coded; // Indicates whether pvq_info needs be stored to tokenize
#endif
#if CONFIG_DIST_8X8
+ int using_dist_8x8;
+ aom_tune_metric tune_metric;
#if CONFIG_CB4X4
#if CONFIG_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint16_t, decoded_8x8[8 * 8]);
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 9e086d6..e9aa9bb 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -3711,7 +3711,8 @@
#endif // CONFIG_SUPERTX
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
- if (bsize == BLOCK_8X8 && this_rdc.rate != INT_MAX) {
+ if (x->using_dist_8x8 && bsize == BLOCK_8X8 &&
+ this_rdc.rate != INT_MAX) {
assert(this_rdc.dist_y < INT64_MAX);
}
#endif
@@ -3729,7 +3730,7 @@
sum_rate_nocoef += this_rate_nocoef;
#endif // CONFIG_SUPERTX
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
- if (bsize == BLOCK_8X8) {
+ if (x->using_dist_8x8 && bsize == BLOCK_8X8) {
assert(this_rdc.dist_y < INT64_MAX);
sum_rdc.dist_y += this_rdc.dist_y;
}
@@ -3739,8 +3740,8 @@
reached_last_index = (idx == 4);
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
- if (reached_last_index && sum_rdc.rdcost != INT64_MAX &&
- bsize == BLOCK_8X8) {
+ if (x->using_dist_8x8 && reached_last_index &&
+ sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) {
int64_t dist_8x8;
const int src_stride = x->plane[0].src.stride;
uint8_t *decoded_8x8;
@@ -3900,7 +3901,7 @@
#endif // CONFIG_SUPERTX
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
- if (this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) {
+ if (x->using_dist_8x8 && this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) {
update_state(cpi, td, &pc_tree->horizontal[1], mi_row + mi_step, mi_col,
subsize, DRY_RUN_NORMAL);
encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row + mi_step, mi_col,
@@ -3921,11 +3922,12 @@
sum_rate_nocoef += this_rate_nocoef;
#endif // CONFIG_SUPERTX
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
- sum_rdc.dist_y += this_rdc.dist_y;
+ if (x->using_dist_8x8) sum_rdc.dist_y += this_rdc.dist_y;
#endif
}
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
- if (sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) {
+ if (x->using_dist_8x8 && sum_rdc.rdcost != INT64_MAX &&
+ bsize == BLOCK_8X8) {
int64_t dist_8x8;
const int src_stride = x->plane[0].src.stride;
uint8_t *decoded_8x8;
@@ -4080,7 +4082,7 @@
#endif // CONFIG_SUPERTX
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
- if (this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) {
+ if (x->using_dist_8x8 && this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) {
update_state(cpi, td, &pc_tree->vertical[1], mi_row, mi_col + mi_step,
subsize, DRY_RUN_NORMAL);
encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col + mi_step,
@@ -4101,11 +4103,12 @@
sum_rate_nocoef += this_rate_nocoef;
#endif // CONFIG_SUPERTX
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
- sum_rdc.dist_y += this_rdc.dist_y;
+ if (x->using_dist_8x8) sum_rdc.dist_y += this_rdc.dist_y;
#endif
}
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
- if (sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) {
+ if (x->using_dist_8x8 && sum_rdc.rdcost != INT64_MAX &&
+ bsize == BLOCK_8X8) {
int64_t dist_8x8;
const int src_stride = x->plane[0].src.stride;
uint8_t *decoded_8x8;
@@ -4377,7 +4380,7 @@
*rd_cost = best_rdc;
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
- if (bsize <= BLOCK_8X8 && rd_cost->rate != INT_MAX) {
+ if (x->using_dist_8x8 && bsize <= BLOCK_8X8 && rd_cost->rate != INT_MAX) {
assert(rd_cost->dist_y < INT64_MAX);
}
#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
@@ -4407,8 +4410,8 @@
#endif
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
- if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
- bsize == BLOCK_4X4 && pc_tree->index == 3) {
+ if (x->using_dist_8x8 && best_rdc.rate < INT_MAX &&
+ best_rdc.dist < INT64_MAX && bsize == BLOCK_4X4 && pc_tree->index == 3) {
encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
pc_tree, NULL);
}
@@ -5055,6 +5058,10 @@
x->min_partition_size = AOMMIN(x->min_partition_size, cm->sb_size);
x->max_partition_size = AOMMIN(x->max_partition_size, cm->sb_size);
+#if CONFIG_DIST_8X8
+ x->using_dist_8x8 = cpi->oxcf.using_dist_8x8;
+ x->tune_metric = cpi->oxcf.tuning;
+#endif
cm->setup_mi(cm);
xd->mi = cm->mi_grid_visible;
@@ -6031,7 +6038,7 @@
}
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
- if (bsize < BLOCK_8X8) {
+ if (x->using_dist_8x8 && bsize < BLOCK_8X8) {
dist_8x8_set_sub8x8_dst(x, (uint8_t *)x->decoded_8x8, bsize,
block_size_wide[bsize], block_size_high[bsize],
mi_row, mi_col);
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 506a4f0..24236b9 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -572,27 +572,34 @@
#if CONFIG_PVQ || CONFIG_DIST_8X8 || CONFIG_LGT || CONFIG_MRC_TX
dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
+#endif // CONFIG_PVQ || CONFIG_DIST_8X8 || CONFIG_LGT || CONFIG_MRC_TX
+
#if CONFIG_PVQ || CONFIG_DIST_8X8
- pred = &pd->pred[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
+ if (CONFIG_PVQ
+#if CONFIG_DIST_8X8
+ || x->using_dist_8x8
+#endif // CONFIG_DIST_8X8
+ ) {
+ pred = &pd->pred[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
// copy uint8 orig and predicted block to int16 buffer
// in order to use existing VP10 transform functions
#if CONFIG_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- for (j = 0; j < txh; j++)
- for (i = 0; i < txw; i++)
- pred[diff_stride * j + i] =
- CONVERT_TO_SHORTPTR(dst)[dst_stride * j + i];
- } else {
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ for (j = 0; j < txh; j++)
+ for (i = 0; i < txw; i++)
+ pred[diff_stride * j + i] =
+ CONVERT_TO_SHORTPTR(dst)[dst_stride * j + i];
+ } else {
#endif // CONFIG_HIGHBITDEPTH
- for (j = 0; j < txh; j++)
- for (i = 0; i < txw; i++)
- pred[diff_stride * j + i] = dst[dst_stride * j + i];
+ for (j = 0; j < txh; j++)
+ for (i = 0; i < txw; i++)
+ pred[diff_stride * j + i] = dst[dst_stride * j + i];
#if CONFIG_HIGHBITDEPTH
- }
+ }
#endif // CONFIG_HIGHBITDEPTH
+ }
#endif // CONFIG_PVQ || CONFIG_DIST_8X8
-#endif // CONFIG_PVQ || CONFIG_DIST_8X8 || CONFIG_LGT || CONFIG_MRC_TX
(void)ctx;
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 45f22d4..acc7b10 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -61,7 +61,7 @@
#endif // CONFIG_PVQ
#if CONFIG_PVQ || CONFIG_DAALA_DIST
#include "av1/common/pvq.h"
-#endif // CONFIG_PVQ || CONFIG_DIST_8X8
+#endif // CONFIG_PVQ
#if CONFIG_DUAL_FILTER
#define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS)
#if USE_EXTRA_FILTER
@@ -1801,7 +1801,7 @@
assert(visible_cols > 0);
#if CONFIG_DIST_8X8
- if (plane == 0 && txb_cols >= 8 && txb_rows >= 8)
+ if (x->using_dist_8x8 && plane == 0 && txb_cols >= 8 && txb_rows >= 8)
return av1_dist_8x8(cpi, xd, src, src_stride, dst, dst_stride, tx_bsize,
txb_cols, txb_rows, visible_cols, visible_rows,
x->qindex);
@@ -1850,7 +1850,7 @@
NULL, &visible_cols, &visible_rows);
#if CONFIG_DIST_8X8
- if (plane == 0 && txb_width >= 8 && txb_height >= 8)
+ if (x->using_dist_8x8 && plane == 0 && txb_width >= 8 && txb_height >= 8)
return av1_dist_8x8_diff(xd, src, src_stride, diff, diff_stride, txb_width,
txb_height, visible_cols, visible_rows, x->qindex);
else
@@ -1906,7 +1906,11 @@
const struct macroblockd_plane *const pd = &xd->plane[plane];
#endif // CONFIG_DIST_8X8
- if (cpi->sf.use_transform_domain_distortion && !CONFIG_DIST_8X8) {
+ if (cpi->sf.use_transform_domain_distortion
+#if CONFIG_DIST_8X8
+ && !x->using_dist_8x8
+#endif
+ ) {
// Transform domain distortion computation is more efficient as it does
// not involve an inverse transform, but it is less accurate.
const int buffer_length = tx_size_2d[tx_size];
@@ -2017,7 +2021,7 @@
tx_type, tx_size, recon, MAX_TX_SIZE, eob);
#if CONFIG_DIST_8X8
- if (plane == 0 && (bsw < 8 || bsh < 8)) {
+ if (x->using_dist_8x8 && plane == 0 && (bsw < 8 || bsh < 8)) {
// Save decoded pixels for inter block in pd->pred to avoid
// block_8x8_rd_txfm_daala_dist() need to produce them
// by calling av1_inverse_transform_block() again.
@@ -2133,7 +2137,7 @@
if (
#if CONFIG_DIST_8X8
- sub8x8tx_in_gte8x8blk_in_plane0 ||
+ (x->using_dist_8x8 && sub8x8tx_in_gte8x8blk_in_plane0) ||
#endif
RDCOST(x->rdmult, 0, tmp_dist) + args->this_rd < args->best_rd) {
av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size,
@@ -2223,7 +2227,7 @@
args->this_rd += rd;
#if CONFIG_DIST_8X8
- if (!sub8x8tx_in_gte8x8blk_in_plane0) {
+ if (!x->using_dist_8x8 || !sub8x8tx_in_gte8x8blk_in_plane0) {
#endif
if (args->this_rd > args->best_rd) {
args->exit_early = 1;
@@ -2330,7 +2334,8 @@
av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
&args);
#if CONFIG_DIST_8X8
- if (!args.exit_early && plane == 0 && bsize >= BLOCK_8X8 &&
+ if (x->using_dist_8x8 && !args.exit_early && plane == 0 &&
+ bsize >= BLOCK_8X8 &&
(tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4))
dist_8x8_sub8x8_txfm_rd(cpi, x, bsize, &args);
#endif
@@ -3803,9 +3808,11 @@
cpi, mb, idy, idx, &best_mode, bmode_costs,
xd->plane[0].above_context + idx, xd->plane[0].left_context + idy, &r,
&ry, &d, bsize, tx_size, y_skip, best_rd - total_rd);
-#if !CONFIG_DIST_8X8
- if (this_rd >= best_rd - total_rd) return INT64_MAX;
-#endif // !CONFIG_DIST_8X8
+#if CONFIG_DIST_8X8
+ if (!cpi->oxcf.using_dist_8x8)
+#endif
+ if (this_rd >= best_rd - total_rd) return INT64_MAX;
+
total_rd += this_rd;
cost += r;
total_distortion += d;
@@ -3823,7 +3830,7 @@
mbmi->mode = mic->bmi[3].as_mode;
#if CONFIG_DIST_8X8
- {
+ if (cpi->oxcf.using_dist_8x8) {
const struct macroblock_plane *p = &mb->plane[0];
const struct macroblockd_plane *pd = &xd->plane[0];
const int src_stride = p->src.stride;
@@ -4620,7 +4627,7 @@
#endif // CONFIG_MRC_TX
if (
#if CONFIG_DIST_8X8
- sub8x8tx_in_gte8x8blk_in_plane0 ||
+ (x->using_dist_8x8 && sub8x8tx_in_gte8x8blk_in_plane0) ||
#endif
RDCOST(x->rdmult, 0, tmp_dist) < rd_stats->ref_rdcost) {
av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size,
@@ -4647,7 +4654,7 @@
#endif
if (eob > 0) {
#if CONFIG_DIST_8X8
- if (plane == 0 && (bw < 8 && bh < 8)) {
+ if (x->using_dist_8x8 && plane == 0 && (bw < 8 && bh < 8)) {
// Save sub8x8 luma decoded pixels
// since 8x8 luma decoded pixels are not available for daala-dist
// after recursive split of BLOCK_8x8 is done.
@@ -4967,20 +4974,22 @@
&this_rd_stats, ref_best_rd - tmp_rd, &this_cost_valid,
rd_stats_stack);
#if CONFIG_DIST_8X8
- if (plane == 0 && tx_size == TX_8X8) {
+ if (x->using_dist_8x8 && plane == 0 && tx_size == TX_8X8) {
sub8x8_eob[i] = p->eobs[block];
}
#endif // CONFIG_DIST_8X8
av1_merge_rd_stats(&sum_rd_stats, &this_rd_stats);
tmp_rd = RDCOST(x->rdmult, sum_rd_stats.rate, sum_rd_stats.dist);
-#if !CONFIG_DIST_8X8
- if (this_rd < tmp_rd) break;
+#if CONFIG_DIST_8X8
+ if (!x->using_dist_8x8)
#endif
+ if (this_rd < tmp_rd) break;
block += sub_step;
}
#if CONFIG_DIST_8X8
- if (this_cost_valid && plane == 0 && tx_size == TX_8X8) {
+ if (x->using_dist_8x8 && this_cost_valid && plane == 0 &&
+ tx_size == TX_8X8) {
const int src_stride = p->src.stride;
const int dst_stride = pd->dst.stride;
@@ -9956,7 +9965,7 @@
}
rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
- rd_cost->dist_y = dist_y;
+ if (x->using_dist_8x8) rd_cost->dist_y = dist_y;
#endif
} else {
rd_cost->rate = INT_MAX;
@@ -11073,7 +11082,7 @@
rate2 += intra_cost_penalty;
distortion2 = distortion_y + distortion_uv;
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
- if (bsize < BLOCK_8X8) distortion2_y = distortion_y;
+ if (x->using_dist_8x8 && bsize < BLOCK_8X8) distortion2_y = distortion_y;
#endif
} else {
int_mv backup_ref_mv[2];
@@ -11182,7 +11191,8 @@
// combined luma and chroma dist and sse.
// This can be seen inside motion_mode_rd(), which is called by
// handle_inter_mode().
- if (bsize < BLOCK_8X8) av1_init_rd_stats(&rd_stats_y);
+ if (x->using_dist_8x8 && bsize < BLOCK_8X8)
+ av1_init_rd_stats(&rd_stats_y);
#endif
rd_stats.rate = rate2;
@@ -11206,7 +11216,7 @@
rate_y = rd_stats_y.rate;
rate_uv = rd_stats_uv.rate;
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
- if (bsize < BLOCK_8X8) {
+ if (x->using_dist_8x8 && bsize < BLOCK_8X8) {
if (rd_stats_y.rate != INT_MAX) {
assert(rd_stats_y.sse < INT64_MAX);
assert(rd_stats_y.dist < INT64_MAX);
@@ -11394,7 +11404,8 @@
// tmp_rd_stats.skip = 1 and tmp_rd_stats.dist and .sse
// represent combined luma and chroma .dist and .sse,
// we should initialized tmp_rd_stats_y.
- if (bsize < BLOCK_8X8) av1_init_rd_stats(&tmp_rd_stats_y);
+ if (x->using_dist_8x8 && bsize < BLOCK_8X8)
+ av1_init_rd_stats(&tmp_rd_stats_y);
#endif
// Point to variables that are not maintained between iterations
args.single_newmv = dummy_single_newmv;
@@ -11470,7 +11481,7 @@
backup_mbmi = *mbmi;
backup_skip = x->skip;
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
- if (bsize < BLOCK_8X8) {
+ if (x->using_dist_8x8 && bsize < BLOCK_8X8) {
if (tmp_rd_stats_y.rate != INT_MAX) {
assert(tmp_rd_stats_y.sse < INT64_MAX);
assert(tmp_rd_stats_y.dist < INT64_MAX);
@@ -11566,7 +11577,7 @@
rate_y = 0;
rate_uv = 0;
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
- if (bsize < BLOCK_8X8) {
+ if (x->using_dist_8x8 && bsize < BLOCK_8X8) {
assert(total_sse_y < INT64_MAX);
distortion2_y = total_sse_y;
}
@@ -11591,9 +11602,8 @@
}
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
- if ((bsize < BLOCK_8X8) && (rate2 != INT_MAX)) {
+ if (x->using_dist_8x8 && bsize < BLOCK_8X8 && rate2 != INT_MAX)
assert(distortion2_y < INT64_MAX);
- }
#endif
if (ref_frame == INTRA_FRAME) {
@@ -11672,7 +11682,7 @@
this_skip2 || skippable);
best_rate_uv = rate_uv;
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
- if (bsize < BLOCK_8X8) {
+ if (x->using_dist_8x8 && bsize < BLOCK_8X8) {
assert(distortion2_y < INT64_MAX);
rd_cost->dist_y = distortion2_y;
}
@@ -11685,9 +11695,8 @@
}
}
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
- if ((bsize < BLOCK_8X8) && (rd_cost->rate != INT_MAX)) {
+ if (x->using_dist_8x8 && bsize < BLOCK_8X8 && rd_cost->rate != INT_MAX)
assert(rd_cost->dist_y < INT64_MAX);
- }
#endif
/* keep record of best compound/single-only prediction */
if (!disable_skip && ref_frame != INTRA_FRAME) {
@@ -11820,7 +11829,7 @@
rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
best_skip2 = skip_blk;
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
- if (bsize < BLOCK_8X8) {
+ if (x->using_dist_8x8 && bsize < BLOCK_8X8) {
assert(rd_cost->rate != INT_MAX);
assert(rd_cost->dist_y < INT64_MAX);
rd_cost->dist_y = rd_stats_y.dist;
@@ -11830,9 +11839,8 @@
}
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
- if ((bsize < BLOCK_8X8) && (rd_cost->rate != INT_MAX)) {
+ if (x->using_dist_8x8 && bsize < BLOCK_8X8 && rd_cost->rate != INT_MAX)
assert(rd_cost->dist_y < INT64_MAX);
- }
#endif
// Only try palette mode when the best mode so far is an intra mode.
@@ -12366,7 +12374,7 @@
rd_cost->dist = distortion2;
rd_cost->rdcost = this_rd;
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
- if (bsize < BLOCK_8X8) rd_cost->dist_y = distortion2;
+ if (x->using_dist_8x8 && bsize < BLOCK_8X8) rd_cost->dist_y = distortion2;
#endif
if (this_rd >= best_rd_so_far) {
rd_cost->rate = INT_MAX;