New experiment DIST_8x8
A framework for computing a distortion at 8x8 luma block level
during RDO-based mode decision search. New 8x8 distortion metric can
be plugged in by way of this tool.
Existing daala_dist now uses this experiment as well.
Other possible applications that can make use of this experiment would be
a distortion meric, which should apply at 8x8 pixels such as PSNR-HVS, SSIM, or etc.
A rd_cost for final coding mode decision for a super block is
computed for a partition size 8x8 or larger. For a block larger than 8x8,
a distortion of each 8x8 block is independently computed then summed up.
The rd_cost for 8x8 block with new 8x8 distortion metric is computed
only when the mode decision of its sub8x8 blocks are completed.
However, MSE distortion metric is used with sub8x8 mode decision. Thus,
early termination is also determined with the MSE based rd_cost.
Because the best rd_cost (i.e. the reference rd_cost) during sub8x8 prediction
or sub8x8 tx is based on new 8x8 distortion while each sub8x8 uses MSE,
the existing early termination cannot be used (And this can be the one of possible reason
for the BD-Rate change with this revision).
For a sub8x8 prediction, prediction mode for each sub8x8 block of a 8x8 block is
decided with existing MSE and then av1_dist_8x8() is applied to the 8x8 pixels.
(There is also av1_dist_8x8_diff, which can input diff signal directly)
For a sub8x8 tx in a block larger than 8x8, instead of computing MSE distortion for
each sub8x8 tx block, we wait until all sub8x8 tx blocks are encoded before av1_dist_8x8()
is applied to 8x8 pixels.
Sub8x8 prediction and transformas were most of tricky parts in this change.
Two kind of distortions, for a) predicted pixels and b) decoded pixels
(i.e. predicted + possible reconstructed residue), are always computed during RDO.
In order to access those two signals a) and b) for a 8x8 block after
its sub8x8 mode decision is finished, a) and b) need be properly stored for later retrieval.
The CB4X4 makes the task of accessing a) and b) signals for sub8x8 block further difficult,
since the intermediate data (i.e. a and/or b) for sub8x8 block
are not easily accessible outside of current partition unless reconstruced
with decided coding modes.
Change-Id: If60301a890c0674a3de1d8206965bbd6a6495bb7
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 9531abe..dbce6a8 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -1315,10 +1315,10 @@
return av1_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
}
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
-static void daala_dist_set_sub8x8_dst(MACROBLOCK *const x, uint8_t *dst8x8,
- BLOCK_SIZE bsize, int bw, int bh,
- int mi_row, int mi_col) {
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
+static void dist_8x8_set_sub8x8_dst(MACROBLOCK *const x, uint8_t *dst8x8,
+ BLOCK_SIZE bsize, int bw, int bh,
+ int mi_row, int mi_col) {
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblockd_plane *const pd = &xd->plane[0];
const int dst_stride = pd->dst.stride;
@@ -3729,7 +3729,7 @@
&this_rdc, best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[idx]);
#endif // CONFIG_SUPERTX
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (bsize == BLOCK_8X8 && this_rdc.rate != INT_MAX) {
assert(this_rdc.dist_y < INT64_MAX);
}
@@ -3747,7 +3747,7 @@
#if CONFIG_SUPERTX
sum_rate_nocoef += this_rate_nocoef;
#endif // CONFIG_SUPERTX
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (bsize == BLOCK_8X8) {
assert(this_rdc.dist_y < INT64_MAX);
sum_rdc.dist_y += this_rdc.dist_y;
@@ -3757,11 +3757,10 @@
}
reached_last_index = (idx == 4);
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (reached_last_index && sum_rdc.rdcost != INT64_MAX &&
bsize == BLOCK_8X8) {
- int use_activity_masking = 0;
- int64_t daala_dist;
+ int64_t dist_8x8;
const int src_stride = x->plane[0].src.stride;
uint8_t *decoded_8x8;
@@ -3772,19 +3771,16 @@
#endif
decoded_8x8 = (uint8_t *)x->decoded_8x8;
-#if CONFIG_PVQ
- use_activity_masking = x->daala_enc.use_activity_masking;
-#endif
- daala_dist =
- av1_daala_dist(xd, x->plane[0].src.buf - 4 * src_stride - 4,
- src_stride, decoded_8x8, 8, 8, 8, 8, 8, 1,
- use_activity_masking, x->qindex)
+ dist_8x8 =
+ av1_dist_8x8(cpi, xd, x->plane[0].src.buf - 4 * src_stride - 4,
+ src_stride, decoded_8x8, 8, BLOCK_8X8, 8, 8, 8, 8,
+ x->qindex)
<< 4;
assert(sum_rdc.dist_y < INT64_MAX);
- sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + daala_dist;
+ sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + dist_8x8;
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
}
-#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4
+#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
#if CONFIG_SUPERTX
if (supertx_allowed && sum_rdc.rdcost < INT64_MAX && reached_last_index) {
@@ -3922,14 +3918,14 @@
best_rdc.rdcost - sum_rdc.rdcost);
#endif // CONFIG_SUPERTX
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) {
update_state(cpi, td, &pc_tree->horizontal[1], mi_row + mi_step, mi_col,
subsize, DRY_RUN_NORMAL);
encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row + mi_step, mi_col,
subsize, NULL);
}
-#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4
+#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
if (this_rdc.rate == INT_MAX) {
sum_rdc.rdcost = INT64_MAX;
@@ -3943,14 +3939,13 @@
#if CONFIG_SUPERTX
sum_rate_nocoef += this_rate_nocoef;
#endif // CONFIG_SUPERTX
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
sum_rdc.dist_y += this_rdc.dist_y;
#endif
}
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) {
- int use_activity_masking = 0;
- int64_t daala_dist;
+ int64_t dist_8x8;
const int src_stride = x->plane[0].src.stride;
uint8_t *decoded_8x8;
@@ -3961,17 +3956,14 @@
#endif
decoded_8x8 = (uint8_t *)x->decoded_8x8;
-#if CONFIG_PVQ
- use_activity_masking = x->daala_enc.use_activity_masking;
-#endif
- daala_dist = av1_daala_dist(xd, x->plane[0].src.buf - 4 * src_stride,
- src_stride, decoded_8x8, 8, 8, 8, 8, 8, 1,
- use_activity_masking, x->qindex)
- << 4;
- sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + daala_dist;
+ dist_8x8 = av1_dist_8x8(cpi, xd, x->plane[0].src.buf - 4 * src_stride,
+ src_stride, decoded_8x8, 8, BLOCK_8X8, 8, 8, 8,
+ 8, x->qindex)
+ << 4;
+ sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + dist_8x8;
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
}
-#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4
+#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
}
#if CONFIG_SUPERTX
@@ -4106,14 +4098,14 @@
best_rdc.rdcost - sum_rdc.rdcost);
#endif // CONFIG_SUPERTX
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) {
update_state(cpi, td, &pc_tree->vertical[1], mi_row, mi_col + mi_step,
subsize, DRY_RUN_NORMAL);
encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col + mi_step,
subsize, NULL);
}
-#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4
+#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
if (this_rdc.rate == INT_MAX) {
sum_rdc.rdcost = INT64_MAX;
@@ -4127,14 +4119,13 @@
#if CONFIG_SUPERTX
sum_rate_nocoef += this_rate_nocoef;
#endif // CONFIG_SUPERTX
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
sum_rdc.dist_y += this_rdc.dist_y;
#endif
}
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) {
- int use_activity_masking = 0;
- int64_t daala_dist;
+ int64_t dist_8x8;
const int src_stride = x->plane[0].src.stride;
uint8_t *decoded_8x8;
@@ -4145,17 +4136,14 @@
#endif
decoded_8x8 = (uint8_t *)x->decoded_8x8;
-#if CONFIG_PVQ
- use_activity_masking = x->daala_enc.use_activity_masking;
-#endif
- daala_dist =
- av1_daala_dist(xd, x->plane[0].src.buf - 4, src_stride, decoded_8x8,
- 8, 8, 8, 8, 8, 1, use_activity_masking, x->qindex)
+ dist_8x8 =
+ av1_dist_8x8(cpi, xd, x->plane[0].src.buf - 4, src_stride,
+ decoded_8x8, 8, BLOCK_8X8, 8, 8, 8, 8, x->qindex)
<< 4;
- sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + daala_dist;
+ sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + dist_8x8;
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
}
-#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4
+#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
}
#if CONFIG_SUPERTX
if (supertx_allowed && sum_rdc.rdcost < INT64_MAX && !abort_flag) {
@@ -4405,11 +4393,11 @@
(void)best_rd;
*rd_cost = best_rdc;
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (bsize <= BLOCK_8X8 && rd_cost->rate != INT_MAX) {
assert(rd_cost->dist_y < INT64_MAX);
}
-#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4
+#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
#if CONFIG_SUPERTX
*rate_nocoef = best_rate_nocoef;
#endif // CONFIG_SUPERTX
@@ -4435,13 +4423,13 @@
x->cfl_store_y = 0;
#endif
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
bsize == BLOCK_4X4 && pc_tree->index == 3) {
encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
pc_tree, NULL);
}
-#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4
+#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
if (bsize == cm->sb_size) {
#if !CONFIG_PVQ && !CONFIG_LV_MAP
@@ -6053,11 +6041,11 @@
#endif
}
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (bsize < BLOCK_8X8) {
- daala_dist_set_sub8x8_dst(x, (uint8_t *)x->decoded_8x8, bsize,
- block_size_wide[bsize], block_size_high[bsize],
- mi_row, mi_col);
+ dist_8x8_set_sub8x8_dst(x, (uint8_t *)x->decoded_8x8, bsize,
+ block_size_wide[bsize], block_size_high[bsize],
+ mi_row, mi_col);
}
#endif