[dist-8x8] Enforce partition and tx size 8x8 or larger
Because of the incompleteness, unreliability and difficult maintainability of the
code, which are deeply coupled with RDO-based mode decision, CONFIG_DIST_8X8 experiment
enforces RDO mode decision to use 8x8 or larger partition and
transform block sizes.
If --tune=[daala-dist|cdef-dist] is given, min partition size is enforced
to 8x8 pixels (4x16 and 16x4 are excluded as well) and tx_size < 8x8 is
excluded during RDO search.
To see the performance change of "--tune=[daala-dist|cdef-dist]",
compare with --enable-dist-8x8, which have the same mode decision options but
RDO uses PSNR as a distortion metric.
Change-Id: If052bd66f82a01e6f3675e9202e71f878b8baf5a
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index cbb23eb..7a57128 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -355,7 +355,6 @@
#if CONFIG_DIST_8X8
int using_dist_8x8;
aom_tune_metric tune_metric;
- DECLARE_ALIGNED(16, int16_t, pred_luma[MAX_SB_SQUARE]);
#endif // CONFIG_DIST_8X8
int comp_idx_cost[COMP_INDEX_CONTEXTS][2];
int comp_group_idx_cost[COMP_GROUP_IDX_CONTEXTS][2];
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 5167d43..a1db574 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -2350,45 +2350,6 @@
#undef RTP_STX_TRY_ARGS
}
-#if CONFIG_DIST_8X8
-static int64_t dist_8x8_yuv(const AV1_COMP *const cpi, MACROBLOCK *const x,
- uint8_t *src_plane_8x8[MAX_MB_PLANE],
- uint8_t *dst_plane_8x8[MAX_MB_PLANE]) {
- const AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- MACROBLOCKD *const xd = &x->e_mbd;
- int64_t dist_8x8, dist_8x8_uv, total_dist;
- const int src_stride = x->plane[0].src.stride;
- int plane;
-
- const int dst_stride = xd->plane[0].dst.stride;
- dist_8x8 =
- av1_dist_8x8(cpi, x, src_plane_8x8[0], src_stride, dst_plane_8x8[0],
- dst_stride, BLOCK_8X8, 8, 8, 8, 8, x->qindex)
- << 4;
-
- // Compute chroma distortion for a luma 8x8 block
- dist_8x8_uv = 0;
-
- if (num_planes > 1) {
- for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
- unsigned sse;
- const int src_stride_uv = x->plane[plane].src.stride;
- const int dst_stride_uv = xd->plane[plane].dst.stride;
- const int ssx = xd->plane[plane].subsampling_x;
- const int ssy = xd->plane[plane].subsampling_y;
- const BLOCK_SIZE plane_bsize = get_plane_block_size(BLOCK_8X8, ssx, ssy);
-
- cpi->fn_ptr[plane_bsize].vf(src_plane_8x8[plane], src_stride_uv,
- dst_plane_8x8[plane], dst_stride_uv, &sse);
- dist_8x8_uv += (int64_t)sse << 4;
- }
- }
-
- return total_dist = dist_8x8 + dist_8x8_uv;
-}
-#endif // CONFIG_DIST_8X8
-
static void reset_partition(PC_TREE *pc_tree, BLOCK_SIZE bsize) {
pc_tree->partitioning = PARTITION_NONE;
pc_tree->cb_search_range = SEARCH_FULL_PLANE;
@@ -2492,6 +2453,11 @@
xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
+#if CONFIG_DIST_8X8
+ if (block_size_high[bsize] <= 8 || block_size_wide[bsize] <= 8)
+ do_square_split = 0;
+#endif
+
// PARTITION_NONE
if (partition_none_allowed) {
if (bsize_at_least_8x8) pc_tree->partitioning = PARTITION_NONE;
@@ -2549,17 +2515,6 @@
int64_t temp_best_rdcost = best_rdc.rdcost;
pn_rdc = best_rdc;
-#if CONFIG_DIST_8X8
- uint8_t *src_plane_8x8[MAX_MB_PLANE], *dst_plane_8x8[MAX_MB_PLANE];
-
- if (x->using_dist_8x8 && bsize == BLOCK_8X8) {
- for (int i = 0; i < MAX_MB_PLANE; i++) {
- src_plane_8x8[i] = x->plane[i].src.buf;
- dst_plane_8x8[i] = xd->plane[i].dst.buf;
- }
- }
-#endif // CONFIG_DIST_8X8
-
// PARTITION_SPLIT
if (do_square_split) {
int reached_last_index = 0;
@@ -2597,14 +2552,6 @@
}
reached_last_index = (idx == 4);
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8 && reached_last_index &&
- sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) {
- sum_rdc.dist = dist_8x8_yuv(cpi, x, src_plane_8x8, dst_plane_8x8);
- sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
- }
-#endif // CONFIG_DIST_8X8
-
if (reached_last_index && sum_rdc.rdcost < best_rdc.rdcost) {
sum_rdc.rate += partition_cost[PARTITION_SPLIT];
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
@@ -2663,14 +2610,6 @@
}
}
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8 && best_rdc.rate < INT_MAX &&
- best_rdc.dist < INT64_MAX && bsize == BLOCK_4X4 && pc_tree->index == 3) {
- encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
- pc_tree, NULL);
- }
-#endif // CONFIG_DIST_8X8
-
if (bsize == cm->seq_params.sb_size) {
assert(best_rdc.rate < INT_MAX);
assert(best_rdc.dist < INT64_MAX);
@@ -3395,6 +3334,15 @@
// Partition block source pixel variance.
unsigned int pb_source_variance = UINT_MAX;
+#if CONFIG_DIST_8X8
+ if (block_size_high[bsize] <= 8) partition_horz_allowed = 0;
+
+ if (block_size_wide[bsize] <= 8) partition_vert_allowed = 0;
+
+ if (block_size_high[bsize] <= 8 || block_size_wide[bsize] <= 8)
+ do_square_split = 0;
+#endif
+
// PARTITION_NONE
if (partition_none_allowed) {
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
@@ -3510,17 +3458,6 @@
// store estimated motion vector
if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx_none);
-#if CONFIG_DIST_8X8
- uint8_t *src_plane_8x8[MAX_MB_PLANE], *dst_plane_8x8[MAX_MB_PLANE];
-
- if (x->using_dist_8x8 && bsize == BLOCK_8X8) {
- for (int i = 0; i < num_planes; i++) {
- src_plane_8x8[i] = x->plane[i].src.buf;
- dst_plane_8x8[i] = xd->plane[i].dst.buf;
- }
- }
-#endif // CONFIG_DIST_8X8
-
// PARTITION_SPLIT
if (do_square_split) {
av1_init_rd_stats(&sum_rdc);
@@ -3571,21 +3508,6 @@
}
const int reached_last_index = (idx == 4);
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8 && reached_last_index &&
- sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) {
- const int64_t dist_8x8 =
- dist_8x8_yuv(cpi, x, src_plane_8x8, dst_plane_8x8);
-#ifdef DEBUG_DIST_8X8
- // TODO(anyone): Fix dist-8x8 assert failure here when CFL is enabled
- if (x->tune_metric == AOM_TUNE_PSNR && xd->bd == 8 && 0 /*!CONFIG_CFL*/)
- assert(sum_rdc.dist == dist_8x8);
-#endif // DEBUG_DIST_8X8
- sum_rdc.dist = dist_8x8;
- sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
- }
-#endif // CONFIG_DIST_8X8
-
if (reached_last_index && sum_rdc.rdcost < best_rdc.rdcost) {
sum_rdc.rate += partition_cost[PARTITION_SPLIT];
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
@@ -3660,15 +3582,6 @@
best_rdc.rdcost - sum_rdc.rdcost);
horz_rd[1] = this_rdc.rdcost;
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8 && this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) {
- update_state(cpi, tile_data, td, &pc_tree->horizontal[1],
- mi_row + mi_step, mi_col, subsize, DRY_RUN_NORMAL);
- encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL,
- mi_row + mi_step, mi_col, subsize, NULL);
- }
-#endif // CONFIG_DIST_8X8
-
if (this_rdc.rate == INT_MAX) {
sum_rdc.rdcost = INT64_MAX;
} else {
@@ -3676,20 +3589,6 @@
sum_rdc.dist += this_rdc.dist;
sum_rdc.rdcost += this_rdc.rdcost;
}
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8 && sum_rdc.rdcost != INT64_MAX &&
- bsize == BLOCK_8X8) {
- const int64_t dist_8x8 =
- dist_8x8_yuv(cpi, x, src_plane_8x8, dst_plane_8x8);
-#ifdef DEBUG_DIST_8X8
- // TODO(anyone): Fix dist-8x8 assert failure here when CFL is enabled
- if (x->tune_metric == AOM_TUNE_PSNR && xd->bd == 8 && 0 /*!CONFIG_CFL*/)
- assert(sum_rdc.dist == dist_8x8);
-#endif // DEBUG_DIST_8X8
- sum_rdc.dist = dist_8x8;
- sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
- }
-#endif // CONFIG_DIST_8X8
}
if (sum_rdc.rdcost < best_rdc.rdcost) {
@@ -3746,15 +3645,6 @@
best_rdc.rdcost - sum_rdc.rdcost);
vert_rd[1] = this_rdc.rdcost;
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8 && this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) {
- update_state(cpi, tile_data, td, &pc_tree->vertical[1], mi_row,
- mi_col + mi_step, subsize, DRY_RUN_NORMAL);
- encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row,
- mi_col + mi_step, subsize, NULL);
- }
-#endif // CONFIG_DIST_8X8
-
if (this_rdc.rate == INT_MAX) {
sum_rdc.rdcost = INT64_MAX;
} else {
@@ -3762,21 +3652,6 @@
sum_rdc.dist += this_rdc.dist;
sum_rdc.rdcost += this_rdc.rdcost;
}
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8 && sum_rdc.rdcost != INT64_MAX &&
- bsize == BLOCK_8X8) {
- const int64_t dist_8x8 =
- dist_8x8_yuv(cpi, x, src_plane_8x8, dst_plane_8x8);
-#ifdef DEBUG_DIST_8X8
- // TODO(anyone): Fix dist-8x8 assert failure here when CFL is enabled
- if (x->tune_metric == AOM_TUNE_PSNR && xd->bd == 8 &&
- 0 /* !CONFIG_CFL */)
- assert(sum_rdc.dist == dist_8x8);
-#endif // DEBUG_DIST_8X8
- sum_rdc.dist = dist_8x8;
- sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
- }
-#endif // CONFIG_DIST_8X8
}
if (sum_rdc.rdcost < best_rdc.rdcost) {
@@ -3810,6 +3685,13 @@
int horzab_partition_allowed = ext_partition_allowed;
int vertab_partition_allowed = ext_partition_allowed;
+#if CONFIG_DIST_8X8
+ if (block_size_high[bsize] <= 8 || block_size_wide[bsize] <= 8) {
+ horzab_partition_allowed = 0;
+ vertab_partition_allowed = 0;
+ }
+#endif
+
if (cpi->sf.prune_ext_partition_types_search_level) {
if (cpi->sf.prune_ext_partition_types_search_level == 1) {
// TODO(debargha,huisu@google.com): may need to tune the threshold for
@@ -4046,6 +3928,13 @@
&partition_vert4_allowed);
}
+#if CONFIG_DIST_8X8
+ if (block_size_high[bsize] <= 16 || block_size_wide[bsize] <= 16) {
+ partition_horz4_allowed = 0;
+ partition_vert4_allowed = 0;
+ }
+#endif
+
// PARTITION_HORZ_4
if (partition_horz4_allowed && has_rows &&
(do_rectangular_split || active_h_edge(cpi, mi_row, mi_step))) {
@@ -4158,14 +4047,6 @@
}
}
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8 && best_rdc.rate < INT_MAX &&
- best_rdc.dist < INT64_MAX && bsize == BLOCK_4X4 && pc_tree->index == 3) {
- encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
- pc_tree, NULL);
- }
-#endif // CONFIG_DIST_8X8
-
if (bsize == cm->seq_params.sb_size) {
assert(best_rdc.rate < INT_MAX);
assert(best_rdc.dist < INT64_MAX);
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 5cf61cd..0c50ba1 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -2018,7 +2018,7 @@
assert(visible_cols > 0);
#if CONFIG_DIST_8X8
- if (x->using_dist_8x8 && plane == 0 && txb_cols >= 8 && txb_rows >= 8)
+ if (x->using_dist_8x8 && plane == 0)
return (unsigned)av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride,
tx_bsize, txb_cols, txb_rows, visible_cols,
visible_rows, x->qindex);
@@ -2036,8 +2036,7 @@
static INLINE int64_t pixel_diff_dist(const MACROBLOCK *x, int plane,
int blk_row, int blk_col,
const BLOCK_SIZE plane_bsize,
- const BLOCK_SIZE tx_bsize,
- int force_sse) {
+ const BLOCK_SIZE tx_bsize) {
int visible_rows, visible_cols;
const MACROBLOCKD *xd = &x->e_mbd;
get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize, NULL,
@@ -2047,8 +2046,7 @@
#if CONFIG_DIST_8X8
int txb_height = block_size_high[tx_bsize];
int txb_width = block_size_wide[tx_bsize];
- if (!force_sse && x->using_dist_8x8 && plane == 0 && txb_width >= 8 &&
- txb_height >= 8) {
+ if (x->using_dist_8x8 && plane == 0) {
const int src_stride = x->plane[plane].src.stride;
const int src_idx = (blk_row * src_stride + blk_col)
<< tx_size_wide_log2[0];
@@ -2059,8 +2057,6 @@
txb_width, txb_height, visible_cols, visible_rows,
x->qindex);
}
-#else
- (void)force_sse;
#endif
diff += ((blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]);
return aom_sum_squares_2d_i16(diff, diff_stride, visible_cols, visible_rows);
@@ -2216,29 +2212,7 @@
av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, recon,
MAX_TX_SIZE, eob,
cpi->common.reduced_tx_set_used);
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8 && plane == 0 && (bsw < 8 || bsh < 8)) {
- // Save decoded pixels for inter block in pd->pred to avoid
- // block_8x8_rd_txfm_daala_dist() need to produce them
- // by calling av1_inverse_transform_block() again.
- const int pred_stride = block_size_wide[plane_bsize];
- const int pred_idx = (blk_row * pred_stride + blk_col)
- << tx_size_wide_log2[0];
- int16_t *pred = &x->pred_luma[pred_idx];
- int i, j;
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- for (j = 0; j < bsh; j++)
- for (i = 0; i < bsw; i++)
- pred[j * pred_stride + i] =
- CONVERT_TO_SHORTPTR(recon)[j * MAX_TX_SIZE + i];
- } else {
- for (j = 0; j < bsh; j++)
- for (i = 0; i < bsw; i++)
- pred[j * pred_stride + i] = recon[j * MAX_TX_SIZE + i];
- }
- }
-#endif // CONFIG_DIST_8X8
return 16 * pixel_dist(cpi, x, plane, src, src_stride, recon, MAX_TX_SIZE,
blk_row, blk_col, plane_bsize, tx_bsize);
}
@@ -3069,7 +3043,6 @@
#if CONFIG_DIST_8X8
if (x->using_dist_8x8) use_transform_domain_distortion = 0;
#endif
-
int calc_pixel_domain_distortion_final =
cpi->sf.use_transform_domain_distortion == 1 &&
use_transform_domain_distortion && x->rd_model != LOW_TXFM_RD &&
@@ -3082,7 +3055,7 @@
const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
int64_t block_sse =
- pixel_diff_dist(x, plane, blk_row, blk_col, plane_bsize, tx_bsize, 1);
+ pixel_diff_dist(x, plane, blk_row, blk_col, plane_bsize, tx_bsize);
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
block_sse = ROUND_POWER_OF_TWO(block_sse, (xd->bd - 8) * 2);
block_sse *= 16;
@@ -3256,21 +3229,6 @@
int64_t rd1, rd2, rd;
RD_STATS this_rd_stats;
-#if CONFIG_DIST_8X8
- // If sub8x8 tx, 8x8 or larger partition, and luma channel,
- // dist-8x8 disables early skip, because the distortion metrics for
- // sub8x8 tx (MSE) and reference distortion from 8x8 or larger partition
- // (new distortion metric) are different.
- // Exception is: dist-8x8 is enabled but still MSE is used,
- // i.e. "--tune=" encoder option is not used.
- int bw = block_size_wide[plane_bsize];
- int bh = block_size_high[plane_bsize];
- int disable_early_skip =
- x->using_dist_8x8 && plane == AOM_PLANE_Y && bw >= 8 && bh >= 8 &&
- (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4) &&
- x->tune_metric != AOM_TUNE_PSNR;
-#endif // CONFIG_DIST_8X8
-
av1_init_rd_stats(&this_rd_stats);
if (args->exit_early) return;
@@ -3314,100 +3272,11 @@
args->this_rd += rd;
-#if CONFIG_DIST_8X8
- if (!disable_early_skip)
-#endif
- if (args->this_rd > args->best_rd) {
- args->exit_early = 1;
- return;
- }
-}
-
-#if CONFIG_DIST_8X8
-static void dist_8x8_sub8x8_txfm_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
- BLOCK_SIZE bsize,
- struct rdcost_block_args *args) {
- MACROBLOCKD *const xd = &x->e_mbd;
- const struct macroblockd_plane *const pd = &xd->plane[0];
- const struct macroblock_plane *const p = &x->plane[0];
- MB_MODE_INFO *const mbmi = xd->mi[0];
- const int src_stride = p->src.stride;
- const int dst_stride = pd->dst.stride;
- const uint8_t *src = &p->src.buf[0];
- const uint8_t *dst = &pd->dst.buf[0];
- const int16_t *pred = &x->pred_luma[0];
- int bw = block_size_wide[bsize];
- int bh = block_size_high[bsize];
- int visible_w = bw;
- int visible_h = bh;
-
- int i, j;
- int64_t rd, rd1, rd2;
- int64_t sse = INT64_MAX, dist = INT64_MAX;
- int qindex = x->qindex;
-
- assert((bw & 0x07) == 0);
- assert((bh & 0x07) == 0);
-
- get_txb_dimensions(xd, 0, bsize, 0, 0, bsize, &bw, &bh, &visible_w,
- &visible_h);
-
- const int diff_stride = block_size_wide[bsize];
- const int16_t *diff = p->src_diff;
- sse = dist_8x8_diff(x, src, src_stride, diff, diff_stride, bw, bh, visible_w,
- visible_h, qindex);
- sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
- sse *= 16;
-
- if (!is_inter_block(mbmi)) {
- dist = av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride, bsize, bw, bh,
- visible_w, visible_h, qindex);
- dist *= 16;
- } else {
- // For inter mode, the decoded pixels are provided in x->pred_luma,
- // while the predicted pixels are in dst.
- uint8_t *pred8;
- DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
-
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- pred8 = CONVERT_TO_BYTEPTR(pred16);
- else
- pred8 = (uint8_t *)pred16;
-
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- for (j = 0; j < bh; j++)
- for (i = 0; i < bw; i++)
- CONVERT_TO_SHORTPTR(pred8)[j * bw + i] = pred[j * bw + i];
- } else {
- for (j = 0; j < bh; j++)
- for (i = 0; i < bw; i++) pred8[j * bw + i] = (uint8_t)pred[j * bw + i];
- }
-
- dist = av1_dist_8x8(cpi, x, src, src_stride, pred8, bw, bsize, bw, bh,
- visible_w, visible_h, qindex);
- dist *= 16;
+ if (args->this_rd > args->best_rd) {
+ args->exit_early = 1;
+ return;
}
-
-#ifdef DEBUG_DIST_8X8
- if (x->tune_metric == AOM_TUNE_PSNR && xd->bd == 8) {
- assert(args->rd_stats.sse == sse);
- assert(args->rd_stats.dist == dist);
- }
-#endif // DEBUG_DIST_8X8
-
- args->rd_stats.sse = sse;
- args->rd_stats.dist = dist;
-
- rd1 = RDCOST(x->rdmult, args->rd_stats.rate, args->rd_stats.dist);
- rd2 = RDCOST(x->rdmult, 0, args->rd_stats.sse);
- rd = AOMMIN(rd1, rd2);
-
- args->rd_stats.rdcost = rd;
- args->this_rd = rd;
-
- if (args->this_rd > args->best_rd) args->exit_early = 1;
}
-#endif // CONFIG_DIST_8X8
static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi,
RD_STATS *rd_stats, int64_t ref_best_rd, int plane,
@@ -3431,15 +3300,6 @@
av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
&args);
-#if CONFIG_DIST_8X8
- int bw = block_size_wide[bsize];
- int bh = block_size_high[bsize];
-
- if (x->using_dist_8x8 && !args.exit_early && plane == 0 && bw >= 8 &&
- bh >= 8 && (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4))
- dist_8x8_sub8x8_txfm_rd(cpi, x, bsize, &args);
-#endif
-
if (args.exit_early) {
av1_invalid_rd_stats(rd_stats);
} else {
@@ -3611,6 +3471,11 @@
prune_tx(cpi, bs, x, xd, EXT_TX_SET_ALL16);
for (n = start_tx; depth <= MAX_TX_DEPTH; depth++, n = sub_tx_size_map[n]) {
+#if CONFIG_DIST_8X8
+ if (x->using_dist_8x8) {
+ if (tx_size_wide[n] < 8 || tx_size_high[n] < 8) continue;
+ }
+#endif
RD_STATS this_rd_stats;
if (mbmi->ref_mv_idx > 0) x->rd_model = LOW_TXFM_RD;
rd = txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, n, FTXS_NONE);
@@ -4835,11 +4700,7 @@
RD_STATS this_rd_stats;
int this_cost_valid = 1;
int64_t tmp_rd = 0;
-#if CONFIG_DIST_8X8
- int sub8x8_eob[4] = { 0, 0, 0, 0 };
- struct macroblock_plane *const p = &x->plane[0];
- struct macroblockd_plane *const pd = &xd->plane[0];
-#endif
+
split_rd_stats->rate = x->txfm_partition_cost[txfm_partition_ctx][1];
assert(tx_size < TX_SIZES_ALL);
@@ -4857,123 +4718,22 @@
&this_cost_valid, ftxs_mode,
(rd_info_node != NULL) ? rd_info_node->children[blk_idx] : NULL);
-#if CONFIG_DIST_8X8
- if (!x->using_dist_8x8)
-#endif
- if (!this_cost_valid) goto LOOP_EXIT;
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8 && tx_size == TX_8X8) {
- sub8x8_eob[2 * (r / bsh) + (c / bsw)] = p->eobs[block];
- }
-#endif // CONFIG_DIST_8X8
+ if (!this_cost_valid) goto LOOP_EXIT;
+
av1_merge_rd_stats(split_rd_stats, &this_rd_stats);
tmp_rd = RDCOST(x->rdmult, split_rd_stats->rate, split_rd_stats->dist);
-#if CONFIG_DIST_8X8
- if (!x->using_dist_8x8)
-#endif
- if (no_split_rd < tmp_rd) {
- this_cost_valid = 0;
- goto LOOP_EXIT;
- }
+
+ if (no_split_rd < tmp_rd) {
+ this_cost_valid = 0;
+ goto LOOP_EXIT;
+ }
block += sub_step;
}
}
LOOP_EXIT : {}
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8 && this_cost_valid && tx_size == TX_8X8) {
- const int src_stride = p->src.stride;
- const int dst_stride = pd->dst.stride;
-
- const uint8_t *src =
- &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
- const uint8_t *dst =
- &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
-
- int64_t dist_8x8;
- const int qindex = x->qindex;
- const int pred_stride = block_size_wide[plane_bsize];
- const int pred_idx = (blk_row * pred_stride + blk_col)
- << tx_size_wide_log2[0];
- const int16_t *pred = &x->pred_luma[pred_idx];
- int i, j;
- int row, col;
-
- uint8_t *pred8;
- DECLARE_ALIGNED(16, uint16_t, pred8_16[8 * 8]);
-
- dist_8x8 = av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride, BLOCK_8X8,
- 8, 8, 8, 8, qindex) *
- 16;
-
-#ifdef DEBUG_DIST_8X8
- if (x->tune_metric == AOM_TUNE_PSNR && xd->bd == 8)
- assert(sum_rd_stats.sse == dist_8x8);
-#endif // DEBUG_DIST_8X8
-
- split_rd_stats->sse = dist_8x8;
-
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- pred8 = CONVERT_TO_BYTEPTR(pred8_16);
- else
- pred8 = (uint8_t *)pred8_16;
-
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- for (row = 0; row < 2; ++row) {
- for (col = 0; col < 2; ++col) {
- int idx = row * 2 + col;
- int eob = sub8x8_eob[idx];
-
- if (eob > 0) {
- for (j = 0; j < 4; j++)
- for (i = 0; i < 4; i++)
- CONVERT_TO_SHORTPTR(pred8)
- [(row * 4 + j) * 8 + 4 * col + i] =
- pred[(row * 4 + j) * pred_stride + 4 * col + i];
- } else {
- for (j = 0; j < 4; j++)
- for (i = 0; i < 4; i++)
- CONVERT_TO_SHORTPTR(pred8)
- [(row * 4 + j) * 8 + 4 * col + i] = CONVERT_TO_SHORTPTR(
- dst)[(row * 4 + j) * dst_stride + 4 * col + i];
- }
- }
- }
- } else {
- for (row = 0; row < 2; ++row) {
- for (col = 0; col < 2; ++col) {
- int idx = row * 2 + col;
- int eob = sub8x8_eob[idx];
-
- if (eob > 0) {
- for (j = 0; j < 4; j++)
- for (i = 0; i < 4; i++)
- pred8[(row * 4 + j) * 8 + 4 * col + i] =
- (uint8_t)pred[(row * 4 + j) * pred_stride + 4 * col + i];
- } else {
- for (j = 0; j < 4; j++)
- for (i = 0; i < 4; i++)
- pred8[(row * 4 + j) * 8 + 4 * col + i] =
- dst[(row * 4 + j) * dst_stride + 4 * col + i];
- }
- }
- }
- }
- dist_8x8 = av1_dist_8x8(cpi, x, src, src_stride, pred8, 8, BLOCK_8X8, 8, 8,
- 8, 8, qindex) *
- 16;
-
-#ifdef DEBUG_DIST_8X8
- if (x->tune_metric == AOM_TUNE_PSNR && xd->bd == 8)
- assert(sum_rd_stats.dist == dist_8x8);
-#endif // DEBUG_DIST_8X8
-
- split_rd_stats->dist = dist_8x8;
- tmp_rd = RDCOST(x->rdmult, split_rd_stats->rate, split_rd_stats->dist);
- }
-#endif // CONFIG_DIST_8X8
if (this_cost_valid) *split_rd = tmp_rd;
}
@@ -5006,7 +4766,10 @@
const int try_no_split = 1;
int try_split = tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH;
-
+#if CONFIG_DIST_8X8
+ if (x->using_dist_8x8)
+ try_split &= tx_size_wide[tx_size] >= 16 && tx_size_high[tx_size] >= 16;
+#endif
TxCandidateInfo no_split = { INT64_MAX, 0, TX_TYPES };
// TX no split
@@ -5705,7 +5468,7 @@
const MACROBLOCKD *xd = &x->e_mbd;
const int16_t dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd);
- *dist = pixel_diff_dist(x, 0, 0, 0, bsize, bsize, 1);
+ *dist = pixel_diff_dist(x, 0, 0, 0, bsize, bsize);
const int64_t mse = *dist / bw / bh;
// Normalized quantizer takes the transform upscaling factor (8 for tx size
// smaller than 32) into account.
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 1aff10f..b7851a0 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -590,5 +590,7 @@
#if CONFIG_DIST_8X8
if (sf->use_transform_domain_distortion > 0) cpi->oxcf.using_dist_8x8 = 0;
+
+ if (cpi->oxcf.using_dist_8x8) x->min_partition_size = BLOCK_8X8;
#endif // CONFIG_DIST_8X8
}