Make CDEF work with EXT_PARTITION
Make CDEF select filter strength every 64x64 block when block size
could be larger than 64x64.
With/without this patch, coding performances on AWCY and Google
test of lowres and midres are neutral.
BUG=aomedia:662
Change-Id: Ief82cc51be91fc08a7c6d7e87f6d13bcc4336476
diff --git a/av1/common/cdef.c b/av1/common/cdef.c
index a02126a..ba8abbb 100644
--- a/av1/common/cdef.c
+++ b/av1/common/cdef.c
@@ -27,8 +27,8 @@
maxc = cm->mi_cols - mi_col;
maxr = cm->mi_rows - mi_row;
- maxr = AOMMIN(maxr, cm->mib_size);
- maxc = AOMMIN(maxc, cm->mib_size);
+ maxr = AOMMIN(maxr, MI_SIZE_64X64);
+ maxc = AOMMIN(maxc, MI_SIZE_64X64);
for (r = 0; r < maxr; r++) {
for (c = 0; c < maxc; c++) {
@@ -60,8 +60,8 @@
maxc = cm->mi_cols - mi_col;
maxr = cm->mi_rows - mi_row;
- maxr = AOMMIN(maxr, cm->mib_size);
- maxc = AOMMIN(maxc, cm->mib_size);
+ maxr = AOMMIN(maxr, MI_SIZE_64X64);
+ maxc = AOMMIN(maxc, MI_SIZE_64X64);
const int r_step = mi_size_high[BLOCK_8X8];
const int c_step = mi_size_wide[BLOCK_8X8];
@@ -161,7 +161,7 @@
uint16_t src[OD_DERING_INBUF_SIZE];
uint16_t *linebuf[3];
uint16_t *colbuf[3];
- dering_list dlist[MAX_MIB_SIZE * MAX_MIB_SIZE];
+ dering_list dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
unsigned char *row_dering, *prev_row_dering, *curr_row_dering;
int dering_count;
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
@@ -178,8 +178,8 @@
int chroma_dering =
xd->plane[1].subsampling_x == xd->plane[1].subsampling_y &&
xd->plane[2].subsampling_x == xd->plane[2].subsampling_y;
- nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
- nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
+ nvsb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
+ nhsb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
av1_setup_dst_planes(xd->plane, cm->sb_size, frame, 0, 0);
row_dering = aom_malloc(sizeof(*row_dering) * (nhsb + 2) * 2);
memset(row_dering, 1, sizeof(*row_dering) * (nhsb + 2) * 2);
@@ -202,7 +202,7 @@
for (sbr = 0; sbr < nvsb; sbr++) {
for (pli = 0; pli < nplanes; pli++) {
const int block_height =
- (MAX_MIB_SIZE << mi_high_l2[pli]) + 2 * OD_FILT_VBORDER;
+ (MI_SIZE_64X64 << mi_high_l2[pli]) + 2 * OD_FILT_VBORDER;
fill_rect(colbuf[pli], OD_FILT_HBORDER, block_height, OD_FILT_HBORDER,
OD_DERING_VERY_LARGE);
}
@@ -213,41 +213,41 @@
int nhb, nvb;
int cstart = 0;
curr_row_dering[sbc] = 0;
- if (cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
- MAX_MIB_SIZE * sbc] == NULL ||
- cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
- MAX_MIB_SIZE * sbc]
+ if (cm->mi_grid_visible[MI_SIZE_64X64 * sbr * cm->mi_stride +
+ MI_SIZE_64X64 * sbc] == NULL ||
+ cm->mi_grid_visible[MI_SIZE_64X64 * sbr * cm->mi_stride +
+ MI_SIZE_64X64 * sbc]
->mbmi.cdef_strength == -1) {
dering_left = 0;
continue;
}
if (!dering_left) cstart = -OD_FILT_HBORDER;
- nhb = AOMMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc);
- nvb = AOMMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr);
+ nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * sbc);
+ nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * sbr);
int tile_top, tile_left, tile_bottom, tile_right;
- int mi_idx = MAX_MIB_SIZE * sbr * cm->mi_stride + MAX_MIB_SIZE * sbc;
+ int mi_idx = MI_SIZE_64X64 * sbr * cm->mi_stride + MI_SIZE_64X64 * sbc;
MODE_INFO *const mi_tl = cm->mi + mi_idx;
BOUNDARY_TYPE boundary_tl = mi_tl->mbmi.boundary_info;
tile_top = boundary_tl & TILE_ABOVE_BOUNDARY;
tile_left = boundary_tl & TILE_LEFT_BOUNDARY;
if (sbr != nvsb - 1 &&
- (&cm->mi[mi_idx + (MAX_MIB_SIZE - 1) * cm->mi_stride]))
- tile_bottom = cm->mi[mi_idx + (MAX_MIB_SIZE - 1) * cm->mi_stride]
+ (&cm->mi[mi_idx + (MI_SIZE_64X64 - 1) * cm->mi_stride]))
+ tile_bottom = cm->mi[mi_idx + (MI_SIZE_64X64 - 1) * cm->mi_stride]
.mbmi.boundary_info &
TILE_BOTTOM_BOUNDARY;
else
tile_bottom = 1;
- if (sbc != nhsb - 1 && (&cm->mi[mi_idx + MAX_MIB_SIZE - 1]))
- tile_right = cm->mi[mi_idx + MAX_MIB_SIZE - 1].mbmi.boundary_info &
+ if (sbc != nhsb - 1 && (&cm->mi[mi_idx + MI_SIZE_64X64 - 1]))
+ tile_right = cm->mi[mi_idx + MI_SIZE_64X64 - 1].mbmi.boundary_info &
TILE_RIGHT_BOUNDARY;
else
tile_right = 1;
const int mbmi_cdef_strength =
- cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
- MAX_MIB_SIZE * sbc]
+ cm->mi_grid_visible[MI_SIZE_64X64 * sbr * cm->mi_stride +
+ MI_SIZE_64X64 * sbc]
->mbmi.cdef_strength;
level = cm->cdef_strengths[mbmi_cdef_strength] / CLPF_STRENGTHS;
clpf_strength = cm->cdef_strengths[mbmi_cdef_strength] % CLPF_STRENGTHS;
@@ -259,7 +259,7 @@
if ((level == 0 && clpf_strength == 0 && uv_level == 0 &&
uv_clpf_strength == 0) ||
(dering_count = sb_compute_dering_list(
- cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE, dlist,
+ cm, sbr * MI_SIZE_64X64, sbc * MI_SIZE_64X64, dlist,
get_filter_skip(level) || get_filter_skip(uv_level))) == 0) {
dering_left = 0;
continue;
@@ -293,7 +293,7 @@
else
rend = vsize + OD_FILT_VBORDER;
- coffset = sbc * MAX_MIB_SIZE << mi_wide_l2[pli];
+ coffset = sbc * MI_SIZE_64X64 << mi_wide_l2[pli];
if (sbc == nhsb - 1) {
/* On the last superblock column, fill in the right border with
OD_DERING_VERY_LARGE to avoid filtering with the outside. */
@@ -314,14 +314,14 @@
cm,
&src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER + cstart],
OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
- (MAX_MIB_SIZE << mi_high_l2[pli]) * sbr, coffset + cstart,
+ (MI_SIZE_64X64 << mi_high_l2[pli]) * sbr, coffset + cstart,
xd->plane[pli].dst.stride, rend, cend - cstart);
if (!prev_row_dering[sbc]) {
- copy_sb8_16(cm, &src[OD_FILT_HBORDER], OD_FILT_BSTRIDE,
- xd->plane[pli].dst.buf,
- (MAX_MIB_SIZE << mi_high_l2[pli]) * sbr - OD_FILT_VBORDER,
- coffset, xd->plane[pli].dst.stride, OD_FILT_VBORDER,
- hsize);
+ copy_sb8_16(
+ cm, &src[OD_FILT_HBORDER], OD_FILT_BSTRIDE,
+ xd->plane[pli].dst.buf,
+ (MI_SIZE_64X64 << mi_high_l2[pli]) * sbr - OD_FILT_VBORDER,
+ coffset, xd->plane[pli].dst.stride, OD_FILT_VBORDER, hsize);
} else if (sbr > 0) {
copy_rect(&src[OD_FILT_HBORDER], OD_FILT_BSTRIDE,
&linebuf[pli][coffset], stride, OD_FILT_VBORDER, hsize);
@@ -330,10 +330,11 @@
hsize, OD_DERING_VERY_LARGE);
}
if (!prev_row_dering[sbc - 1]) {
- copy_sb8_16(cm, src, OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
- (MAX_MIB_SIZE << mi_high_l2[pli]) * sbr - OD_FILT_VBORDER,
- coffset - OD_FILT_HBORDER, xd->plane[pli].dst.stride,
- OD_FILT_VBORDER, OD_FILT_HBORDER);
+ copy_sb8_16(
+ cm, src, OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
+ (MI_SIZE_64X64 << mi_high_l2[pli]) * sbr - OD_FILT_VBORDER,
+ coffset - OD_FILT_HBORDER, xd->plane[pli].dst.stride,
+ OD_FILT_VBORDER, OD_FILT_HBORDER);
} else if (sbr > 0 && sbc > 0) {
copy_rect(src, OD_FILT_BSTRIDE,
&linebuf[pli][coffset - OD_FILT_HBORDER], stride,
@@ -343,11 +344,12 @@
OD_DERING_VERY_LARGE);
}
if (!prev_row_dering[sbc + 1]) {
- copy_sb8_16(cm, &src[OD_FILT_HBORDER + (nhb << mi_wide_l2[pli])],
- OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
- (MAX_MIB_SIZE << mi_high_l2[pli]) * sbr - OD_FILT_VBORDER,
- coffset + hsize, xd->plane[pli].dst.stride,
- OD_FILT_VBORDER, OD_FILT_HBORDER);
+ copy_sb8_16(
+ cm, &src[OD_FILT_HBORDER + (nhb << mi_wide_l2[pli])],
+ OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
+ (MI_SIZE_64X64 << mi_high_l2[pli]) * sbr - OD_FILT_VBORDER,
+ coffset + hsize, xd->plane[pli].dst.stride, OD_FILT_VBORDER,
+ OD_FILT_HBORDER);
} else if (sbr > 0 && sbc < nhsb - 1) {
copy_rect(&src[hsize + OD_FILT_HBORDER], OD_FILT_BSTRIDE,
&linebuf[pli][coffset + hsize], stride, OD_FILT_VBORDER,
@@ -368,7 +370,7 @@
rend + OD_FILT_VBORDER, OD_FILT_HBORDER);
copy_sb8_16(
cm, &linebuf[pli][coffset], stride, xd->plane[pli].dst.buf,
- (MAX_MIB_SIZE << mi_high_l2[pli]) * (sbr + 1) - OD_FILT_VBORDER,
+ (MI_SIZE_64X64 << mi_high_l2[pli]) * (sbr + 1) - OD_FILT_VBORDER,
coffset, xd->plane[pli].dst.stride, OD_FILT_VBORDER, hsize);
if (tile_top) {
@@ -395,8 +397,8 @@
(uint8_t *)&CONVERT_TO_SHORTPTR(
xd->plane[pli]
.dst.buf)[xd->plane[pli].dst.stride *
- (MAX_MIB_SIZE * sbr << mi_high_l2[pli]) +
- (sbc * MAX_MIB_SIZE << mi_wide_l2[pli])],
+ (MI_SIZE_64X64 * sbr << mi_high_l2[pli]) +
+ (sbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
xd->plane[pli].dst.stride, dst,
&src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER],
xdec[pli], ydec[pli], dir, NULL, var, pli, dlist, dering_count,
@@ -406,8 +408,8 @@
#endif
od_dering(&xd->plane[pli]
.dst.buf[xd->plane[pli].dst.stride *
- (MAX_MIB_SIZE * sbr << mi_high_l2[pli]) +
- (sbc * MAX_MIB_SIZE << mi_wide_l2[pli])],
+ (MI_SIZE_64X64 * sbr << mi_high_l2[pli]) +
+ (sbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
xd->plane[pli].dst.stride, dst,
&src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER],
xdec[pli], ydec[pli], dir, NULL, var, pli, dlist,
diff --git a/av1/common/enums.h b/av1/common/enums.h
index a4dfbf8..2b18d32 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -65,6 +65,8 @@
#define MAX_VARTX_DEPTH 2
#endif
+#define MI_SIZE_64X64 (64 >> MI_SIZE_LOG2)
+
// Bitstream profiles indicated by 2-3 bits in the uncompressed header.
// 00: Profile 0. 8-bit 4:2:0 only.
// 10: Profile 1. 8-bit 4:4:4, 4:2:2, and 4:4:0.
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index c887bb5..247e60e 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -2686,12 +2686,21 @@
#if CONFIG_CDEF
if (bsize == cm->sb_size) {
- if (!sb_all_skip(cm, mi_row, mi_col) && !cm->all_lossless) {
- cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]->mbmi.cdef_strength =
- aom_read_literal(r, cm->cdef_bits, ACCT_STR);
- } else {
- cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]->mbmi.cdef_strength =
- -1;
+ int width_step = mi_size_wide[BLOCK_64X64];
+ int height_step = mi_size_wide[BLOCK_64X64];
+ int w, h;
+ for (h = 0; (h < mi_size_high[cm->sb_size]) && (mi_row + h < cm->mi_rows);
+ h += height_step) {
+ for (w = 0; (w < mi_size_wide[cm->sb_size]) && (mi_col + w < cm->mi_cols);
+ w += width_step) {
+ if (!cm->all_lossless && !sb_all_skip(cm, mi_row + h, mi_col + w))
+ cm->mi_grid_visible[(mi_row + h) * cm->mi_stride + (mi_col + w)]
+ ->mbmi.cdef_strength =
+ aom_read_literal(r, cm->cdef_bits, ACCT_STR);
+ else
+ cm->mi_grid_visible[(mi_row + h) * cm->mi_stride + (mi_col + w)]
+ ->mbmi.cdef_strength = -1;
+ }
}
}
#endif // CONFIG_CDEF
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index dc46224..2e0abc1 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -3097,11 +3097,24 @@
#endif // CONFIG_EXT_PARTITION_TYPES
#if CONFIG_CDEF
- if (bsize == cm->sb_size && !sb_all_skip(cm, mi_row, mi_col) &&
- cm->cdef_bits != 0 && !cm->all_lossless) {
- aom_write_literal(w, cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]
- ->mbmi.cdef_strength,
- cm->cdef_bits);
+ if (bsize == cm->sb_size && cm->cdef_bits != 0 && !cm->all_lossless) {
+ int width_step = mi_size_wide[BLOCK_64X64];
+ int height_step = mi_size_high[BLOCK_64X64];
+ int width, height;
+ for (height = 0; (height < mi_size_high[cm->sb_size]) &&
+ (mi_row + height < cm->mi_rows);
+ height += height_step) {
+ for (width = 0; (width < mi_size_wide[cm->sb_size]) &&
+ (mi_col + width < cm->mi_cols);
+ width += width_step) {
+ if (!sb_all_skip(cm, mi_row + height, mi_col + width))
+ aom_write_literal(
+ w, cm->mi_grid_visible[(mi_row + height) * cm->mi_stride +
+ (mi_col + width)]
+ ->mbmi.cdef_strength,
+ cm->cdef_bits);
+ }
+ }
}
#endif
}
diff --git a/av1/encoder/pickcdef.c b/av1/encoder/pickcdef.c
index 50cf8d7..e4ec388 100644
--- a/av1/encoder/pickcdef.c
+++ b/av1/encoder/pickcdef.c
@@ -285,7 +285,7 @@
int sbr, sbc;
uint16_t *src[3];
uint16_t *ref_coeff[3];
- dering_list dlist[MAX_MIB_SIZE * MAX_MIB_SIZE];
+ dering_list dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
int stride[3];
@@ -300,8 +300,8 @@
uint64_t best_tot_mse = (uint64_t)1 << 63;
uint64_t tot_mse;
int sb_count;
- int nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
- int nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
+ int nvsb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
+ int nhsb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
int *sb_index = aom_malloc(nvsb * nhsb * sizeof(*sb_index));
int *selected_strength = aom_malloc(nvsb * nhsb * sizeof(*sb_index));
uint64_t(*mse[2])[TOTAL_STRENGTHS];
@@ -387,14 +387,14 @@
int nvb, nhb;
int gi;
int dirinit = 0;
- nhb = AOMMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc);
- nvb = AOMMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr);
- cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
- MAX_MIB_SIZE * sbc]
+ nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * sbc);
+ nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * sbr);
+ cm->mi_grid_visible[MI_SIZE_64X64 * sbr * cm->mi_stride +
+ MI_SIZE_64X64 * sbc]
->mbmi.cdef_strength = -1;
- if (sb_all_skip(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE)) continue;
- dering_count = sb_compute_dering_list(cm, sbr * MAX_MIB_SIZE,
- sbc * MAX_MIB_SIZE, dlist, 1);
+ if (sb_all_skip(cm, sbr * MI_SIZE_64X64, sbc * MI_SIZE_64X64)) continue;
+ dering_count = sb_compute_dering_list(cm, sbr * MI_SIZE_64X64,
+ sbc * MI_SIZE_64X64, dlist, 1);
for (pli = 0; pli < nplanes; pli++) {
for (i = 0; i < OD_DERING_INBUF_SIZE; i++)
inbuf[i] = OD_DERING_VERY_LARGE;
@@ -419,8 +419,8 @@
if (clpf_strength == 0)
copy_sb16_16(&in[(-yoff * OD_FILT_BSTRIDE - xoff)], OD_FILT_BSTRIDE,
src[pli],
- (sbr * MAX_MIB_SIZE << mi_high_l2[pli]) - yoff,
- (sbc * MAX_MIB_SIZE << mi_wide_l2[pli]) - xoff,
+ (sbr * MI_SIZE_64X64 << mi_high_l2[pli]) - yoff,
+ (sbc * MI_SIZE_64X64 << mi_wide_l2[pli]) - xoff,
stride[pli], ysize, xsize);
od_dering(clpf_strength ? NULL : (uint8_t *)in, OD_FILT_BSTRIDE,
tmp_dst, in, xdec[pli], ydec[pli], dir, &dirinit, var, pli,
@@ -429,8 +429,8 @@
dering_damping, coeff_shift, clpf_strength != 0, 1);
curr_mse = compute_dering_dist(
ref_coeff[pli] +
- (sbr * MAX_MIB_SIZE << mi_high_l2[pli]) * stride[pli] +
- (sbc * MAX_MIB_SIZE << mi_wide_l2[pli]),
+ (sbr * MI_SIZE_64X64 << mi_high_l2[pli]) * stride[pli] +
+ (sbc * MI_SIZE_64X64 << mi_wide_l2[pli]),
stride[pli], tmp_dst, dlist, dering_count, bsize[pli],
coeff_shift, pli);
if (pli < 2)
@@ -438,7 +438,7 @@
else
mse[1][sb_count][gi] += curr_mse;
sb_index[sb_count] =
- MAX_MIB_SIZE * sbr * cm->mi_stride + MAX_MIB_SIZE * sbc;
+ MI_SIZE_64X64 * sbr * cm->mi_stride + MI_SIZE_64X64 * sbc;
}
}
sb_count++;