Sub8x8 block chroma component inter prediction Handle the sub8x8 chroma component at the unit of 2x2/4x2/2x4 level and use the motion vector inherited from the luma component. This improves the coding performance: lowres 0.4% midres 0.25% hdres 0.15% Change-Id: I34dff4218cfa3e5d55e7ed0341f36f4719389f7e
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c index c0fc494..b07a8bd 100644 --- a/av1/common/reconinter.c +++ b/av1/common/reconinter.c
@@ -647,6 +647,87 @@ } #endif +#if CONFIG_SUB8X8_MC + if (mi->mbmi.sb_type < BLOCK_8X8 && plane > 0) { + // block size in log2 + const int b4_wl = b_width_log2_lookup[mi->mbmi.sb_type]; + const int b4_hl = b_height_log2_lookup[mi->mbmi.sb_type]; + const int b8_sl = b_width_log2_lookup[BLOCK_8X8]; + + // block size + const int b4_w = 1 << b4_wl; + const int b4_h = 1 << b4_hl; + const int b8_s = 1 << b8_sl; + int idx, idy; + + const int x_base = x; + const int y_base = y; + + // processing unit size + const int x_step = w >> (b8_sl - b4_wl); + const int y_step = h >> (b8_sl - b4_hl); + + for (idy = 0; idy < b8_s; idy += b4_h) { + for (idx = 0; idx < b8_s; idx += b4_w) { + const int chr_idx = (idy * 2) + idx; + for (ref = 0; ref < 1 + is_compound; ++ref) { + const struct scale_factors *const sf = &xd->block_refs[ref]->sf; + struct buf_2d *const pre_buf = &pd->pre[ref]; + struct buf_2d *const dst_buf = &pd->dst; + uint8_t *dst = dst_buf->buf; + const MV mv = mi->bmi[chr_idx].as_mv[ref].as_mv; + const MV mv_q4 = clamp_mv_to_umv_border_sb( + xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y); + uint8_t *pre; + MV32 scaled_mv; + int xs, ys, subpel_x, subpel_y; + const int is_scaled = av1_is_scaled(sf); + + x = x_base + idx * x_step; + y = y_base + idy * y_step; + + dst += dst_buf->stride * y + x; + + if (is_scaled) { + pre = + pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf); + scaled_mv = av1_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf); + xs = sf->x_step_q4; + ys = sf->y_step_q4; + } else { + pre = pre_buf->buf + y * pre_buf->stride + x; + scaled_mv.row = mv_q4.row; + scaled_mv.col = mv_q4.col; + xs = ys = 16; + } + + subpel_x = scaled_mv.col & SUBPEL_MASK; + subpel_y = scaled_mv.row & SUBPEL_MASK; + pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride + + (scaled_mv.col >> SUBPEL_BITS); + +#if CONFIG_AOM_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + high_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, + subpel_x, subpel_y, sf, x_step, y_step, ref, + &mi->mbmi.interp_filter, xs, ys, xd->bd); + } else { + inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, + subpel_x, subpel_y, sf, x_step, y_step, ref, + &mi->mbmi.interp_filter, xs, ys); + } +#else + inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, subpel_x, + subpel_y, sf, x_step, y_step, ref, + &mi->mbmi.interp_filter, xs, ys); +#endif + } + } + } + return; + } +#endif + for (ref = 0; ref < 1 + is_compound; ++ref) { const struct scale_factors *const sf = &xd->block_refs[ref]->sf; struct buf_2d *const pre_buf = &pd->pre[ref];
diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h index bfa7e95..5f62f0a 100644 --- a/av1/common/reconinter.h +++ b/av1/common/reconinter.h
@@ -50,7 +50,7 @@ const int16_t *kernel_y = av1_get_interp_filter_subpel_kernel(interp_filter_params_y, subpel_y); #else - if (interp_filter_params.taps == SUBPEL_TAPS) { + if (interp_filter_params.taps == SUBPEL_TAPS && w > 2 && h > 2) { const int16_t *kernel_x = av1_get_interp_filter_subpel_kernel(interp_filter_params, subpel_x); const int16_t *kernel_y = @@ -109,7 +109,7 @@ const int16_t *kernel_y = av1_get_interp_filter_subpel_kernel(interp_filter_params_y, subpel_y); #else - if (interp_filter_params.taps == SUBPEL_TAPS) { + if (interp_filter_params.taps == SUBPEL_TAPS && w > 2 && h > 2) { const int16_t *kernel_x = av1_get_interp_filter_subpel_kernel(interp_filter_params, subpel_x); const int16_t *kernel_y =