Reduce the bandwidth requirement of sub8x8 chroma obmc
HW does not support < 4x4 prediction. To limit the bandwidth
requirement, for small blocks, only blend with neighbors from one
side. If block-size of current plane is 4x4 or 8x4, the above
neighbor will be skipped. If it is 4x8, the left neighbor (dir = 1)
will be skipped.
This change will keep the bandwidth requirement of OBMC not more
than what is required by normal compound inter modes.
Loss of gain (PSNR-Y/PSNR-Cb/PSNR-Cr/CIEDE2000)
AWCY HL: 0.05/0.09/0.14/0.04
AWCY LL: 0.06/0.07/0.10/0.12
Change-Id: I3854afc69c3014da99bde4b19bb726e4c077d59e
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c
index 1b6cdc0..71c8ebd 100644
--- a/av1/common/reconinter.c
+++ b/av1/common/reconinter.c
@@ -1686,6 +1686,27 @@
}
}
+// HW does not support < 4x4 prediction. To limit the bandwidth requirement, for
+// small blocks, only blend with neighbors from one side. If block-size of
+// current plane is 4x4 or 8x4, the above neighbor (dir = 0) will be skipped. If
+// it is 4x8, the left neighbor (dir = 1) will be skipped.
+int skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize, const struct macroblockd_plane *pd,
+ int dir) {
+ assert(is_motion_variation_allowed_bsize(bsize));
+
+ BLOCK_SIZE bsize_plane =
+ ss_size_lookup[bsize][pd->subsampling_x][pd->subsampling_y];
+#if CONFIG_CB4X4
+ if (bsize_plane < BLOCK_4X4) return 1;
+#endif
+ switch (bsize_plane) {
+ case BLOCK_4X4:
+ case BLOCK_8X4: return dir == 0; break;
+ case BLOCK_4X8: return dir == 1; break;
+ default: return 0;
+ }
+}
+
// This function combines motion compensated predictions that is generated by
// top/left neighboring blocks' inter predictors with the regular inter
// prediction. We assume the original prediction (bmc) is stored in
@@ -1727,6 +1748,9 @@
const struct macroblockd_plane *pd = &xd->plane[plane];
const int bw = (mi_step * MI_SIZE) >> pd->subsampling_x;
const int bh = overlap >> pd->subsampling_y;
+
+ if (skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
+
const int dst_stride = pd->dst.stride;
uint8_t *const dst = &pd->dst.buf[(i * MI_SIZE) >> pd->subsampling_x];
const int tmp_stride = above_stride[plane];
@@ -1773,6 +1797,9 @@
const struct macroblockd_plane *pd = &xd->plane[plane];
const int bw = overlap >> pd->subsampling_x;
const int bh = (mi_step * MI_SIZE) >> pd->subsampling_y;
+
+ if (skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
+
const int dst_stride = pd->dst.stride;
uint8_t *const dst =
&pd->dst.buf[(i * MI_SIZE * dst_stride) >> pd->subsampling_y];
@@ -1876,6 +1903,8 @@
bh = AOMMAX((num_4x4_blocks_high_lookup[bsize] * 2) >> pd->subsampling_y,
4);
+ if (skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
+
#if CONFIG_WARPED_MOTION
if (above_mbmi->motion_mode == WARPED_CAUSAL &&
WARP_NEIGHBORS_WITH_OBMC) {
@@ -1981,6 +2010,8 @@
4);
bh = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y;
+ if (skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
+
#if CONFIG_WARPED_MOTION
if (left_mbmi->motion_mode == WARPED_CAUSAL && WARP_NEIGHBORS_WITH_OBMC) {
assert_motion_mode_valid(WARPED_CAUSAL,