[dist-8x8] Fix for other chroma formats than 4:2:0
Instead of applying different offsets for each of quadri-split, horizontal split,
vertical split, store the start addresses of src and dst 8x8 blocks in each of
YUV palnes then pass it to distortion compute function for dist-8x8, dist_8x8_yuv().
Also, don't save decoded pixels for 8x8 block in additinal buffer but directly
access the xd->dst buffer. Thus removed the decoded-8x8[8x8] buffer in struct macroblock.
However, this patch does NOT fix the case where dist-8x8's assert fails with
CfL enabled.
Fix part of issue (i.e. 4:4:4 chroma iput) in
BUG=aomedia:1094
Change-Id: I6399d0eef3e17cacc9ec6467480a7959c85d419b
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 4843b80..3114a2f 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -343,11 +343,6 @@
#if CONFIG_DIST_8X8
int using_dist_8x8;
aom_tune_metric tune_metric;
-#if CONFIG_HIGHBITDEPTH
- DECLARE_ALIGNED(16, uint16_t, decoded_8x8[8 * 8]);
-#else
- DECLARE_ALIGNED(16, uint8_t, decoded_8x8[8 * 8]);
-#endif
#endif // CONFIG_DIST_8X8
#if CONFIG_JNT_COMP
int comp_idx_cost[COMP_INDEX_CONTEXTS][2];
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 35ee644..e2a401e 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -773,41 +773,6 @@
return av1_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
}
-#if CONFIG_DIST_8X8
-static void dist_8x8_set_sub8x8_dst(MACROBLOCK *const x, uint8_t *dst8x8,
- BLOCK_SIZE bsize, int bw, int bh,
- int mi_row, int mi_col) {
- MACROBLOCKD *const xd = &x->e_mbd;
- struct macroblockd_plane *const pd = &xd->plane[0];
- const int dst_stride = pd->dst.stride;
- uint8_t *dst = pd->dst.buf;
-
- assert(bsize < BLOCK_8X8);
-
- if (bsize < BLOCK_8X8) {
- int i, j;
-#if CONFIG_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- uint16_t *dst8x8_16 = (uint16_t *)dst8x8;
- uint16_t *dst_sub8x8 = &dst8x8_16[((mi_row & 1) * 8 + (mi_col & 1)) << 2];
-
- for (j = 0; j < bh; ++j)
- for (i = 0; i < bw; ++i)
- dst_sub8x8[j * 8 + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
- } else {
-#endif
- uint8_t *dst_sub8x8 = &dst8x8[((mi_row & 1) * 8 + (mi_col & 1)) << 2];
-
- for (j = 0; j < bh; ++j)
- for (i = 0; i < bw; ++i)
- dst_sub8x8[j * 8 + i] = dst[j * dst_stride + i];
-#if CONFIG_HIGHBITDEPTH
- }
-#endif
- }
-}
-#endif // CONFIG_DIST_8X8
-
static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data,
MACROBLOCK *const x, int mi_row, int mi_col,
RD_STATS *rd_cost,
@@ -2388,40 +2353,31 @@
#if CONFIG_DIST_8X8
static int64_t dist_8x8_yuv(const AV1_COMP *const cpi, MACROBLOCK *const x,
- uint8_t *y_src_8x8) {
+ uint8_t *src_plane_8x8[MAX_MB_PLANE],
+ uint8_t *dst_plane_8x8[MAX_MB_PLANE]) {
MACROBLOCKD *const xd = &x->e_mbd;
int64_t dist_8x8, dist_8x8_uv, total_dist;
const int src_stride = x->plane[0].src.stride;
- uint8_t *decoded_8x8;
int plane;
-#if CONFIG_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- decoded_8x8 = CONVERT_TO_BYTEPTR(x->decoded_8x8);
- else
-#endif
- decoded_8x8 = (uint8_t *)x->decoded_8x8;
-
- dist_8x8 = av1_dist_8x8(cpi, x, y_src_8x8, src_stride, decoded_8x8, 8,
- BLOCK_8X8, 8, 8, 8, 8, x->qindex)
- << 4;
+ const int dst_stride = xd->plane[0].dst.stride;
+ dist_8x8 =
+ av1_dist_8x8(cpi, x, src_plane_8x8[0], src_stride, dst_plane_8x8[0],
+ dst_stride, BLOCK_8X8, 8, 8, 8, 8, x->qindex)
+ << 4;
// Compute chroma distortion for a luma 8x8 block
dist_8x8_uv = 0;
for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
+ unsigned sse;
const int src_stride_uv = x->plane[plane].src.stride;
const int dst_stride_uv = xd->plane[plane].dst.stride;
- // uv buff pointers now (i.e. the last sub8x8 block) is the same
- // to those at the first sub8x8 block because
- // uv buff pointer is set only once at first sub8x8 block in a 8x8.
- uint8_t *src_uv = x->plane[plane].src.buf;
- uint8_t *dst_uv = xd->plane[plane].dst.buf;
- unsigned sse;
const BLOCK_SIZE plane_bsize =
AOMMAX(BLOCK_4X4, get_plane_block_size(BLOCK_8X8, &xd->plane[plane]));
- cpi->fn_ptr[plane_bsize].vf(src_uv, src_stride_uv, dst_uv, dst_stride_uv,
- &sse);
+
+ cpi->fn_ptr[plane_bsize].vf(src_plane_8x8[plane], src_stride_uv,
+ dst_plane_8x8[plane], dst_stride_uv, &sse);
dist_8x8_uv += (int64_t)sse << 4;
}
@@ -2720,6 +2676,17 @@
int64_t temp_best_rdcost = best_rdc.rdcost;
+#if CONFIG_DIST_8X8
+ uint8_t *src_plane_8x8[MAX_MB_PLANE], *dst_plane_8x8[MAX_MB_PLANE];
+
+ if (x->using_dist_8x8 && bsize == BLOCK_8X8) {
+ for (int i = 0; i < MAX_MB_PLANE; i++) {
+ src_plane_8x8[i] = x->plane[i].src.buf;
+ dst_plane_8x8[i] = xd->plane[i].dst.buf;
+ }
+ }
+#endif // CONFIG_DIST_8X8
+
// PARTITION_SPLIT
// TODO(jingning): use the motion vectors given by the above search as
// the starting point of motion search in the following partition type check.
@@ -2727,6 +2694,7 @@
int reached_last_index = 0;
subsize = get_subsize(bsize, PARTITION_SPLIT);
int idx;
+
for (idx = 0; idx < 4 && sum_rdc.rdcost < temp_best_rdcost; ++idx) {
const int x_idx = (idx & 1) * mi_step;
const int y_idx = (idx >> 1) * mi_step;
@@ -2760,9 +2728,8 @@
#if CONFIG_DIST_8X8
if (x->using_dist_8x8 && reached_last_index &&
sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) {
- const int src_stride = x->plane[0].src.stride;
int64_t dist_8x8;
- dist_8x8 = dist_8x8_yuv(cpi, x, x->plane[0].src.buf - 4 * src_stride - 4);
+ dist_8x8 = dist_8x8_yuv(cpi, x, src_plane_8x8, dst_plane_8x8);
if (x->tune_metric == AOM_TUNE_PSNR && xd->bd == 8)
assert(sum_rdc.dist == dist_8x8);
sum_rdc.dist = dist_8x8;
@@ -2849,9 +2816,8 @@
#if CONFIG_DIST_8X8
if (x->using_dist_8x8 && sum_rdc.rdcost != INT64_MAX &&
bsize == BLOCK_8X8) {
- const int src_stride = x->plane[0].src.stride;
int64_t dist_8x8;
- dist_8x8 = dist_8x8_yuv(cpi, x, x->plane[0].src.buf - 4 * src_stride);
+ dist_8x8 = dist_8x8_yuv(cpi, x, src_plane_8x8, dst_plane_8x8);
if (x->tune_metric == AOM_TUNE_PSNR && xd->bd == 8)
assert(sum_rdc.dist == dist_8x8);
sum_rdc.dist = dist_8x8;
@@ -2936,7 +2902,7 @@
if (x->using_dist_8x8 && sum_rdc.rdcost != INT64_MAX &&
bsize == BLOCK_8X8) {
int64_t dist_8x8;
- dist_8x8 = dist_8x8_yuv(cpi, x, x->plane[0].src.buf - 4);
+ dist_8x8 = dist_8x8_yuv(cpi, x, src_plane_8x8, dst_plane_8x8);
if (x->tune_metric == AOM_TUNE_PSNR && xd->bd == 8)
assert(sum_rdc.dist == dist_8x8);
sum_rdc.dist = dist_8x8;
@@ -4861,14 +4827,6 @@
tile_data->allow_update_cdf);
}
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8 && bsize < BLOCK_8X8) {
- dist_8x8_set_sub8x8_dst(x, (uint8_t *)x->decoded_8x8, bsize,
- block_size_wide[bsize], block_size_high[bsize],
- mi_row, mi_col);
- }
-#endif // CONFIG_DIST_8X8
-
if (!dry_run) {
#if CONFIG_INTRABC
if (av1_allow_intrabc(bsize, cm))