Add high precision blending obmc Use high precision intermediate single-ref predictions as for other compound modes. This feature is behind CONFIG_OBMC_HIGH_PREC_BLENDING flag. By default, it is off. To turn it on, apply: -DCONFIG_OBMC_HIGH_PREC_BLENDING=1 Preliminary results: (20 frames, speed 1, ext_partition and ext_partition_types disabled) -0.068% lowres -0.053% midres, -0.08% AWCY BUG=aomedia:1378 Change-Id: I234d6efa8bcd71cd1f0af3aaa1bf682c47ae75b9
diff --git a/av1/common/convolve.h b/av1/common/convolve.h index 99023d3..6500efb 100644 --- a/av1/common/convolve.h +++ b/av1/common/convolve.h
@@ -117,7 +117,8 @@ } static INLINE ConvolveParams get_conv_params_no_round(int ref, int do_average, - int plane, int32_t *dst, + int plane, + CONV_BUF_TYPE *dst, int dst_stride, int is_compound, int bd) { ConvolveParams conv_params;
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c index 87b859a..18e316a 100644 --- a/av1/common/reconinter.c +++ b/av1/common/reconinter.c
@@ -645,7 +645,6 @@ // if sign requested is 1, we need to return the complement index [1] // instead. wedge_params.signflip[w] = (avg < 32); - // printf("%d[%d] = %d\n", sb_type, w, wedge_params.signflip[w]); } } } @@ -1316,6 +1315,33 @@ av1_build_inter_predictors_sbuv(cm, xd, mi_row, mi_col, ctx, bsize); } +#if CONFIG_OBMC_HIGH_PREC_BLENDING +static void setup_dst_plane(struct macroblockd_plane *planes, BLOCK_SIZE bsize, + const YV12_BUFFER_CONFIG *src, int mi_row, + int mi_col, int plane) { + struct macroblockd_plane *const pd = &planes[plane]; + const int is_uv = plane > 0; + setup_pred_plane(&pd->dst, bsize, src->buffers[plane], + src->crop_widths[is_uv], src->crop_heights[is_uv], + src->strides[is_uv], mi_row, mi_col, NULL, pd->subsampling_x, + pd->subsampling_y); +} + +static void setup_pre_plane(MACROBLOCKD *xd, int idx, + const YV12_BUFFER_CONFIG *src, int mi_row, + int mi_col, const struct scale_factors *sf, + int plane) { + if (src != NULL) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + const int is_uv = plane > 0; + setup_pred_plane(&pd->pre[idx], xd->mi[0]->mbmi.sb_type, + src->buffers[plane], src->crop_widths[is_uv], + src->crop_heights[is_uv], src->strides[is_uv], mi_row, + mi_col, sf, pd->subsampling_x, pd->subsampling_y); + } +} +#endif + void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize, const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, const int num_planes) { @@ -1389,6 +1415,41 @@ } } +#if CONFIG_OBMC_HIGH_PREC_BLENDING +// dir = 0: vertical filter, dir = 1: horizontal filter +void av1_get_obmc_mask2d(uint8_t *mask, const int mask_stride, int w, int h, + int dir) { + const uint8_t *mask1d; + + switch (dir ? w : h) { + case 1: mask1d = obmc_mask_1; break; + case 2: mask1d = obmc_mask_2; break; + case 4: mask1d = obmc_mask_4; break; + case 8: mask1d = obmc_mask_8; break; + case 16: mask1d = obmc_mask_16; break; + case 32: mask1d = obmc_mask_32; break; +#if CONFIG_EXT_PARTITION + case 64: mask1d = obmc_mask_64; break; +#endif // CONFIG_EXT_PARTITION + default: assert(0); return; + } + + if (dir == 0) { // Vertical filter + for (int r = 0; r < h; ++r) { + memset(mask, mask1d[r], w * sizeof(mask[0])); + mask += mask_stride; + } + } else if (dir == 1) { // Horizontal filter + for (int r = 0; r < h; ++r) { + memcpy(mask, mask1d, w * sizeof(mask[0])); + mask += mask_stride; + } + } else { + assert(0); + } +} +#endif + static INLINE void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_rc, uint8_t mi_hw, MODE_INFO *mi, void *fun_ctxt, const int num_planes) { @@ -1440,6 +1501,381 @@ } } +void modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) { + if (is_interintra_pred(mbmi)) { + mbmi->ref_frame[1] = NONE_FRAME; + } else if (has_second_ref(mbmi) && + is_masked_compound_type(mbmi->interinter_compound_type)) { + mbmi->interinter_compound_type = COMPOUND_AVERAGE; + mbmi->ref_frame[1] = NONE_FRAME; + } + if (has_second_ref(mbmi)) mbmi->ref_frame[1] = NONE_FRAME; + return; +} + +#if CONFIG_OBMC_HIGH_PREC_BLENDING +struct obmc_inter_pred_plane_no_round_ctxt { + int32_t *base; + int base_stride; + int32_t *adjacent; + int adjacent_stride; + int plane; +}; + +static INLINE void build_obmc_inter_pred_plane_above_no_round( + MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width, + MODE_INFO *above_mi, void *fun_ctxt, int num_planes) { + (void)above_mi; + struct obmc_inter_pred_plane_no_round_ctxt *ctxt = + (struct obmc_inter_pred_plane_no_round_ctxt *)fun_ctxt; + const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; + const int overlap = + AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1; + const int plane = ctxt->plane; + (void)num_planes; + + const struct macroblockd_plane *pd = &xd->plane[plane]; + const int bw = (above_mi_width * MI_SIZE) >> pd->subsampling_x; + const int bh = overlap >> pd->subsampling_y; + const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x; + + if (skip_u4x4_pred_in_obmc(bsize, pd, 0)) return; + + const int dst_stride = ctxt->base_stride; + int32_t *const dst = &ctxt->base[plane_col]; + const int tmp_stride = ctxt->adjacent_stride; + const int32_t *const tmp = &ctxt->adjacent[plane_col]; + uint8_t mask[MAX_SB_SQUARE]; + const int mask_stride = bw; + + av1_get_obmc_mask2d(mask, mask_stride, bw, bh, 0); + aom_blend_a64_d32_mask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, + mask, mask_stride, bh, bw, 0, 0); +} + +static INLINE void build_obmc_inter_pred_plane_left_no_round( + MACROBLOCKD *xd, int rel_mi_row, uint8_t left_mi_height, MODE_INFO *left_mi, + void *fun_ctxt, int num_planes) { + (void)left_mi; + struct obmc_inter_pred_plane_no_round_ctxt *ctxt = + (struct obmc_inter_pred_plane_no_round_ctxt *)fun_ctxt; + const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; + const int overlap = + AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1; + const int plane = ctxt->plane; + (void)num_planes; + + const struct macroblockd_plane *pd = &xd->plane[plane]; + const int bw = overlap >> pd->subsampling_x; + const int bh = (left_mi_height * MI_SIZE) >> pd->subsampling_y; + const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y; + + if (skip_u4x4_pred_in_obmc(bsize, pd, 1)) return; + + const int dst_stride = ctxt->base_stride; + int32_t *const dst = &ctxt->base[plane_row * dst_stride]; + const int tmp_stride = ctxt->adjacent_stride; + const int32_t *const tmp = &ctxt->adjacent[plane_row * tmp_stride]; + uint8_t mask[MAX_SB_SQUARE]; + const int mask_stride = bw; + + av1_get_obmc_mask2d(mask, mask_stride, bw, bh, 1); + aom_blend_a64_d32_mask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, + mask, mask_stride, bh, bw, 0, 0); +} + +void av1_build_obmc_inter_prediction_plane( + const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, int mi_row, int mi_col, + CONV_BUF_TYPE *base, int base_stride, CONV_BUF_TYPE *above, + int above_stride, CONV_BUF_TYPE *left, int left_stride) { + const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; + + // handle above row + struct obmc_inter_pred_plane_no_round_ctxt ctxt_above = { + base, base_stride, above, above_stride, plane + }; + foreach_overlappable_nb_above( + cm, xd, mi_col, max_neighbor_obmc[b_width_log2_lookup[bsize]], + build_obmc_inter_pred_plane_above_no_round, &ctxt_above); + + // handle left column + struct obmc_inter_pred_plane_no_round_ctxt ctxt_left = { + base, base_stride, left, left_stride, plane + }; + foreach_overlappable_nb_left( + cm, xd, mi_row, max_neighbor_obmc[b_height_log2_lookup[bsize]], + build_obmc_inter_pred_plane_left_no_round, &ctxt_left); + + setup_dst_plane(xd->plane, xd->mi[0]->mbmi.sb_type, get_frame_new_buffer(cm), + mi_row, mi_col, plane); + + ConvolveParams conv_params = + get_conv_params_no_round(0, 0, plane, NULL, MAX_SB_SIZE, 1, xd->bd); + const int convolve_rounding_bits = + FILTER_BITS * 2 - conv_params.round_0 - conv_params.round_1; + const struct macroblockd_plane *pd = &xd->plane[plane]; + int bw = pd->width; + int bh = pd->height; + + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + av1_highbd_convolve_rounding(base, base_stride, pd->dst.buf, pd->dst.stride, + bw, bh, convolve_rounding_bits, xd->bd); + else + av1_convolve_rounding(base, base_stride, pd->dst.buf, pd->dst.stride, bw, + bh, convolve_rounding_bits); +} + +struct build_prediction_plane_hp_ctxt { + const AV1_COMMON *cm; + int mi_row; + int mi_col; + CONV_BUF_TYPE *tmp_buf; + int tmp_stride; + int mb_to_far_edge; + int plane; +}; + +static INLINE void build_prediction_plane_by_above_pred_hp( + MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width, + MODE_INFO *above_mi, void *fun_ctxt, const int num_planes) { + MB_MODE_INFO *above_mbmi = &above_mi->mbmi; + const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->sb_type); + struct build_prediction_plane_hp_ctxt *ctxt = + (struct build_prediction_plane_hp_ctxt *)fun_ctxt; + const int above_mi_col = ctxt->mi_col + rel_mi_col; + int32_t *dst[MAX_MB_PLANE]; + const int plane = ctxt->plane; + (void)num_planes; + + MB_MODE_INFO backup_mbmi = *above_mbmi; + modify_neighbor_predictor_for_obmc(above_mbmi); + + const int num_refs = 1 + has_second_ref(above_mbmi); + + assert(num_refs == 1); + + for (int ref = 0; ref < num_refs; ++ref) { + const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref]; + + const RefBuffer *const ref_buf = &ctxt->cm->frame_refs[frame - LAST_FRAME]; + + xd->block_refs[ref] = ref_buf; + if ((!av1_is_valid_scale(&ref_buf->sf))) + aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM, + "Reference frame has invalid dimensions"); + setup_pre_plane(xd, ref, ref_buf->buf, ctxt->mi_row, above_mi_col, + &ref_buf->sf, plane); + } + + xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col); + xd->mb_to_right_edge = ctxt->mb_to_far_edge + + (xd->n8_w - rel_mi_col - above_mi_width) * MI_SIZE * 8; + + int mi_x = above_mi_col << MI_SIZE_LOG2; + int mi_y = ctxt->mi_row << MI_SIZE_LOG2; + + const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; + + do { + const struct macroblockd_plane *pd = &xd->plane[plane]; + int bw = (above_mi_width * MI_SIZE) >> pd->subsampling_x; + int bh = clamp(block_size_high[bsize] >> (pd->subsampling_y + 1), 4, + block_size_high[BLOCK_64X64] >> (pd->subsampling_y + 1)); + + setup_pred_plane_hp(&dst[plane], a_bsize, ctxt->tmp_buf, ctxt->tmp_stride, + 0, rel_mi_col, NULL, pd->subsampling_x, + pd->subsampling_y); + if (skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue; + av1_build_inter_predictor_hp_sr(xd, plane, above_mi, 1, bw, bh, 0, 0, bw, + bh, mi_x, mi_y, 0, dst[plane], + ctxt->tmp_stride); + } while (0); + *above_mbmi = backup_mbmi; +} + +void av1_build_prediction_plane_by_above_preds_hp(const AV1_COMMON *cm, + MACROBLOCKD *xd, int plane, + int mi_row, int mi_col, + int32_t *tmp_buf, + int tmp_stride) { + if (!xd->up_available) return; + + // Adjust mb_to_bottom_edge to have the correct value for the OBMC + // prediction block. This is half the height of the original block, + // except for 128-wide blocks, where we only use a height of 32. + int this_height = xd->n8_h * MI_SIZE; + int pred_height = AOMMIN(this_height / 2, 32); + xd->mb_to_bottom_edge += (this_height - pred_height) * 8; + + struct build_prediction_plane_hp_ctxt ctxt = { + cm, mi_row, mi_col, tmp_buf, tmp_stride, xd->mb_to_right_edge, plane + }; + BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; + foreach_overlappable_nb_above(cm, xd, mi_col, + max_neighbor_obmc[b_width_log2_lookup[bsize]], + build_prediction_plane_by_above_pred_hp, &ctxt); + + xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8); + xd->mb_to_right_edge = ctxt.mb_to_far_edge; + xd->mb_to_bottom_edge -= (this_height - pred_height) * 8; +} + +static INLINE void build_prediction_plane_by_left_pred_hp( + MACROBLOCKD *xd, int rel_mi_row, uint8_t left_mi_height, MODE_INFO *left_mi, + void *fun_ctxt, const int num_planes) { + MB_MODE_INFO *left_mbmi = &left_mi->mbmi; + const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->sb_type); + struct build_prediction_plane_hp_ctxt *ctxt = + (struct build_prediction_plane_hp_ctxt *)fun_ctxt; + const int left_mi_row = ctxt->mi_row + rel_mi_row; + int32_t *dst[MAX_MB_PLANE]; + const int plane = ctxt->plane; + (void)num_planes; + + MB_MODE_INFO backup_mbmi = *left_mbmi; + modify_neighbor_predictor_for_obmc(left_mbmi); + + const int num_refs = 1 + has_second_ref(left_mbmi); + + for (int ref = 0; ref < num_refs; ++ref) { + const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref]; + + const RefBuffer *const ref_buf = &ctxt->cm->frame_refs[frame - LAST_FRAME]; + + xd->block_refs[ref] = ref_buf; + if ((!av1_is_valid_scale(&ref_buf->sf))) + aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM, + "Reference frame has invalid dimensions"); + setup_pre_plane(xd, ref, ref_buf->buf, left_mi_row, ctxt->mi_col, + &ref_buf->sf, plane); + } + + xd->mb_to_top_edge = 8 * MI_SIZE * (-left_mi_row); + xd->mb_to_bottom_edge = + ctxt->mb_to_far_edge + + (xd->n8_h - rel_mi_row - left_mi_height) * MI_SIZE * 8; + + int mi_x = ctxt->mi_col << MI_SIZE_LOG2; + int mi_y = left_mi_row << MI_SIZE_LOG2; + + const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; + + do { + const struct macroblockd_plane *pd = &xd->plane[plane]; + int bw = clamp(block_size_wide[bsize] >> (pd->subsampling_x + 1), 4, + block_size_wide[BLOCK_64X64] >> (pd->subsampling_x + 1)); + int bh = (left_mi_height << MI_SIZE_LOG2) >> pd->subsampling_y; + + setup_pred_plane_hp(&dst[plane], l_bsize, ctxt->tmp_buf, ctxt->tmp_stride, + rel_mi_row, 0, NULL, pd->subsampling_x, + pd->subsampling_y); + if (skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue; + av1_build_inter_predictor_hp_sr(xd, plane, left_mi, 1, bw, bh, 0, 0, bw, bh, + mi_x, mi_y, 0, dst[plane], + ctxt->tmp_stride); + } while (0); + *left_mbmi = backup_mbmi; +} + +void av1_build_prediction_plane_by_left_preds_hp(const AV1_COMMON *cm, + MACROBLOCKD *xd, int plane, + int mi_row, int mi_col, + int32_t *tmp_buf, + int tmp_stride) { + if (!xd->left_available) return; + + // Adjust mb_to_right_edge to have the correct value for the OBMC + // prediction block. This is half the width of the original block, + // except for 128-wide blocks, where we only use a width of 32. + int this_width = xd->n8_w * MI_SIZE; + int pred_width = AOMMIN(this_width / 2, 32); + xd->mb_to_right_edge += (this_width - pred_width) * 8; + + struct build_prediction_plane_hp_ctxt ctxt = { + cm, mi_row, mi_col, tmp_buf, tmp_stride, xd->mb_to_bottom_edge, plane, + }; + BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; + foreach_overlappable_nb_left(cm, xd, mi_row, + max_neighbor_obmc[b_height_log2_lookup[bsize]], + build_prediction_plane_by_left_pred_hp, &ctxt); + + xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8); + xd->mb_to_right_edge -= (this_width - pred_width) * 8; + xd->mb_to_bottom_edge = ctxt.mb_to_far_edge; +} + +void av1_build_obmc_inter_prediction( + const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col, + CONV_BUF_TYPE *base[MAX_MB_PLANE], int base_stride[MAX_MB_PLANE], + CONV_BUF_TYPE *above[MAX_MB_PLANE], int above_stride[MAX_MB_PLANE], + CONV_BUF_TYPE *left[MAX_MB_PLANE], int left_stride[MAX_MB_PLANE]) { + const int num_planes = av1_num_planes(cm); + + for (int plane = 0; plane < num_planes; ++plane) { + av1_build_obmc_inter_prediction_plane( + cm, xd, plane, mi_row, mi_col, base[plane], base_stride[plane], + above[plane], above_stride[plane], left[plane], left_stride[plane]); + } +} + +void av1_build_prediction_by_above_preds_hp(const AV1_COMMON *cm, + MACROBLOCKD *xd, int mi_row, + int mi_col, + int32_t *tmp_buf[MAX_MB_PLANE], + int tmp_stride[MAX_MB_PLANE]) { + const int num_planes = av1_num_planes(cm); + if (!xd->up_available) return; + + for (int plane = 0; plane < num_planes; ++plane) { + av1_build_prediction_plane_by_above_preds_hp( + cm, xd, plane, mi_row, mi_col, tmp_buf[plane], tmp_stride[plane]); + } +} + +void av1_build_prediction_by_left_preds_hp(const AV1_COMMON *cm, + MACROBLOCKD *xd, int mi_row, + int mi_col, + int32_t *tmp_buf[MAX_MB_PLANE], + int tmp_stride[MAX_MB_PLANE]) { + const int num_planes = av1_num_planes(cm); + if (!xd->left_available) return; + + for (int plane = 0; plane < num_planes; ++plane) { + av1_build_prediction_plane_by_left_preds_hp( + cm, xd, plane, mi_row, mi_col, tmp_buf[plane], tmp_stride[plane]); + } +} + +void av1_build_obmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd, + int mi_row, int mi_col) { + const int num_planes = av1_num_planes(cm); + DECLARE_ALIGNED(16, CONV_BUF_TYPE, dst_buf0[MAX_SB_SQUARE]); + DECLARE_ALIGNED(16, CONV_BUF_TYPE, dst_buf1[MAX_SB_SQUARE >> 1]); + DECLARE_ALIGNED(16, CONV_BUF_TYPE, dst_buf2[MAX_SB_SQUARE >> 1]); + + int dst_stride0 = MAX_SB_SIZE; + int dst_stride1 = MAX_SB_SIZE; + int dst_stride2 = MAX_SB_SIZE >> 1; + + for (int j = 0; j < num_planes; ++j) { + const struct macroblockd_plane *pd = &xd->plane[j]; + int bw = pd->width; + int bh = pd->height; + const int mi_x = mi_col * MI_SIZE; + const int mi_y = mi_row * MI_SIZE; + + av1_build_inter_predictor_hp_sr(xd, j, xd->mi[0], 0, bw, bh, 0, 0, bw, bh, + mi_x, mi_y, 0, dst_buf0, dst_stride0); + av1_build_prediction_plane_by_above_preds_hp(cm, xd, j, mi_row, mi_col, + dst_buf1, dst_stride1); + av1_build_prediction_plane_by_left_preds_hp(cm, xd, j, mi_row, mi_col, + dst_buf2, dst_stride2); + av1_build_obmc_inter_prediction_plane(cm, xd, j, mi_row, mi_col, dst_buf0, + dst_stride0, dst_buf1, dst_stride1, + dst_buf2, dst_stride2); + } +} +#else struct obmc_inter_pred_ctxt { uint8_t **adjacent; int *adjacent_stride; @@ -1540,18 +1976,6 @@ build_obmc_inter_pred_left, &ctxt_left); } -void modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) { - if (is_interintra_pred(mbmi)) { - mbmi->ref_frame[1] = NONE_FRAME; - } else if (has_second_ref(mbmi) && - is_masked_compound_type(mbmi->interinter_compound_type)) { - mbmi->interinter_compound_type = COMPOUND_AVERAGE; - mbmi->ref_frame[1] = NONE_FRAME; - } - if (has_second_ref(mbmi)) mbmi->ref_frame[1] = NONE_FRAME; - return; -} - struct build_prediction_ctxt { const AV1_COMMON *cm; int mi_row; @@ -1770,6 +2194,7 @@ av1_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, dst_buf1, dst_stride1, dst_buf2, dst_stride2); } +#endif /* clang-format off */ #if CONFIG_EXT_PARTITION @@ -2088,6 +2513,74 @@ } } +// Build a high precision single ref predictor and store the results in ext_dst. +// The high precision prediction will be used in high precision blending for +// compound modes. +void av1_build_inter_predictor_hp_sr(MACROBLOCKD *xd, int plane, + const MODE_INFO *mi, int build_for_obmc, + int bw, int bh, int x, int y, int w, int h, + int mi_x, int mi_y, int ref, + CONV_BUF_TYPE *const ext_dst, + int ext_dst_stride) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + const struct scale_factors *const sf = &xd->block_refs[ref]->sf; + struct buf_2d *const pre_buf = &pd->pre[ref]; + const MV mv = mi->mbmi.mv[ref].as_mv; + uint8_t *pre; + int xs, ys, subpel_x, subpel_y; + const int is_scaled = av1_is_scaled(sf); + // Set is_compound as 1 to enable high precision output + ConvolveParams conv_params = get_conv_params_no_round( + ref, 0, plane, ext_dst, ext_dst_stride, 1, xd->bd); + WarpTypesAllowed warp_types; + const WarpedMotionParams *const wm = + &xd->global_motion[mi->mbmi.ref_frame[ref]]; + warp_types.global_warp_allowed = is_global_mv_block(mi, wm->wmtype); + warp_types.local_warp_allowed = mi->mbmi.motion_mode == WARPED_CAUSAL; + + if (is_scaled) { + int ssx = pd->subsampling_x; + int ssy = pd->subsampling_y; + int orig_pos_y = (mi_y << (SUBPEL_BITS - ssy)) + (y << SUBPEL_BITS); + orig_pos_y += mv.row * (1 << (1 - ssy)); + int orig_pos_x = (mi_x << (SUBPEL_BITS - ssx)) + (x << SUBPEL_BITS); + orig_pos_x += mv.col * (1 << (1 - ssx)); + int pos_y = sf->scale_value_y(orig_pos_y, sf); + int pos_x = sf->scale_value_x(orig_pos_x, sf); + pos_x += SCALE_EXTRA_OFF; + pos_y += SCALE_EXTRA_OFF; + + const int top = -AOM_LEFT_TOP_MARGIN_SCALED(ssy); + const int left = -AOM_LEFT_TOP_MARGIN_SCALED(ssx); + const int bottom = (pre_buf->height + AOM_INTERP_EXTEND) + << SCALE_SUBPEL_BITS; + const int right = (pre_buf->width + AOM_INTERP_EXTEND) << SCALE_SUBPEL_BITS; + pos_y = clamp(pos_y, top, bottom); + pos_x = clamp(pos_x, left, right); + + pre = pre_buf->buf0 + (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride + + (pos_x >> SCALE_SUBPEL_BITS); + subpel_x = pos_x & SCALE_SUBPEL_MASK; + subpel_y = pos_y & SCALE_SUBPEL_MASK; + xs = sf->x_step_q4; + ys = sf->y_step_q4; + } else { + const MV mv_q4 = clamp_mv_to_umv_border_sb( + xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y); + xs = ys = SCALE_SUBPEL_SHIFTS; + subpel_x = (mv_q4.col & SUBPEL_MASK) << SCALE_EXTRA_BITS; + subpel_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS; + pre = pre_buf->buf + (y + (mv_q4.row >> SUBPEL_BITS)) * pre_buf->stride + + (x + (mv_q4.col >> SUBPEL_BITS)); + } + + av1_make_inter_predictor(pre, pre_buf->stride, NULL, 0, subpel_x, subpel_y, + sf, w, h, &conv_params, mi->mbmi.interp_filters, + &warp_types, (mi_x >> pd->subsampling_x) + x, + (mi_y >> pd->subsampling_y) + y, plane, ref, mi, + build_for_obmc, xs, ys, xd); +} + static void build_wedge_inter_predictor_from_buf( MACROBLOCKD *xd, int plane, int x, int y, int w, int h, uint8_t *ext_dst0, int ext_dst_stride0, uint8_t *ext_dst1, int ext_dst_stride1) {
diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h index f527b6a..dbeb1d5 100644 --- a/av1/common/reconinter.h +++ b/av1/common/reconinter.h
@@ -417,15 +417,45 @@ return 0; } -const uint8_t *av1_get_obmc_mask(int length); -void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd, - int mi_row, int mi_col); -void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd, - int mi_row, int mi_col, - uint8_t *above[MAX_MB_PLANE], - int above_stride[MAX_MB_PLANE], - uint8_t *left[MAX_MB_PLANE], - int left_stride[MAX_MB_PLANE]); +#if CONFIG_OBMC_HIGH_PREC_BLENDING +static INLINE void setup_pred_plane_hp(int32_t **dst_pt, BLOCK_SIZE bsize, + int32_t *dst0, int stride0, int mi_row, + int mi_col, + const struct scale_factors *scale, + int subsampling_x, int subsampling_y) { + // Offset the buffer pointer + if (subsampling_y && (mi_row & 0x01) && (mi_size_high[bsize] == 1)) + mi_row -= 1; + if (subsampling_x && (mi_col & 0x01) && (mi_size_wide[bsize] == 1)) + mi_col -= 1; + + const int x = (MI_SIZE * mi_col) >> subsampling_x; + const int y = (MI_SIZE * mi_row) >> subsampling_y; + *dst_pt = dst0 + scaled_buffer_offset(x, y, stride0, scale); +} + +void av1_build_inter_predictor_hp_sr(MACROBLOCKD *xd, int plane, + const MODE_INFO *mi, int build_for_obmc, + int bw, int bh, int x, int y, int w, int h, + int mi_x, int mi_y, int ref, + int32_t *const ext_dst, + int ext_dst_stride); +void av1_build_prediction_by_above_preds_hp(const AV1_COMMON *cm, + MACROBLOCKD *xd, int mi_row, + int mi_col, + int32_t *tmp_buf[MAX_MB_PLANE], + int tmp_stride[MAX_MB_PLANE]); +void av1_build_prediction_by_left_preds_hp(const AV1_COMMON *cm, + MACROBLOCKD *xd, int mi_row, + int mi_col, + int32_t *tmp_buf[MAX_MB_PLANE], + int tmp_stride[MAX_MB_PLANE]); +void av1_build_obmc_inter_prediction( + const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col, + CONV_BUF_TYPE *base[MAX_MB_PLANE], int base_stride[MAX_MB_PLANE], + CONV_BUF_TYPE *above[MAX_MB_PLANE], int above_stride[MAX_MB_PLANE], + CONV_BUF_TYPE *left[MAX_MB_PLANE], int left_stride[MAX_MB_PLANE]); +#else void av1_build_prediction_by_above_preds(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col, uint8_t *tmp_buf[MAX_MB_PLANE], @@ -438,6 +468,17 @@ int tmp_width[MAX_MB_PLANE], int tmp_height[MAX_MB_PLANE], int tmp_stride[MAX_MB_PLANE]); +void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd, + int mi_row, int mi_col, + uint8_t *above[MAX_MB_PLANE], + int above_stride[MAX_MB_PLANE], + uint8_t *left[MAX_MB_PLANE], + int left_stride[MAX_MB_PLANE]); +#endif + +const uint8_t *av1_get_obmc_mask(int length); +void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd, + int mi_row, int mi_col); void av1_build_obmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col);
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c index 687144a..b884465 100644 --- a/av1/decoder/decodeframe.c +++ b/av1/decoder/decodeframe.c
@@ -422,11 +422,17 @@ } } - av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize); - +#if CONFIG_OBMC_HIGH_PREC_BLENDING if (mbmi->motion_mode == OBMC_CAUSAL) { av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col); + } else { + av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize); } +#else + av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize); + if (mbmi->motion_mode == OBMC_CAUSAL) + av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col); +#endif #if CONFIG_MISMATCH_DEBUG for (int plane = 0; plane < num_planes; ++plane) {
diff --git a/av1/encoder/block.h b/av1/encoder/block.h index 3cbb8ad..7579213 100644 --- a/av1/encoder/block.h +++ b/av1/encoder/block.h
@@ -224,8 +224,13 @@ int32_t *wsrc_buf; int32_t *mask_buf; +#if CONFIG_OBMC_HIGH_PREC_BLENDING + CONV_BUF_TYPE *above_pred_hp_buf; + CONV_BUF_TYPE *left_pred_hp_buf; +#else uint8_t *above_pred_buf; uint8_t *left_pred_buf; +#endif PALETTE_BUFFER *palette_buffer;
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c index f039d8a..2c7b87f 100644 --- a/av1/encoder/encodeframe.c +++ b/av1/encoder/encodeframe.c
@@ -4951,11 +4951,17 @@ &xd->block_refs[ref]->sf, num_planes); } - av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize); - +#if CONFIG_OBMC_HIGH_PREC_BLENDING if (mbmi->motion_mode == OBMC_CAUSAL) { av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col); + } else { + av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize); } +#else + av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize); + if (mbmi->motion_mode == OBMC_CAUSAL) + av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col); +#endif #if CONFIG_MISMATCH_DEBUG if (dry_run == OUTPUT_ENABLED) {
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c index cc3d679..2ab88ad 100644 --- a/av1/encoder/encoder.c +++ b/av1/encoder/encoder.c
@@ -527,11 +527,19 @@ aom_free(cpi->active_map.map); cpi->active_map.map = NULL; +#if CONFIG_OBMC_HIGH_PREC_BLENDING + aom_free(cpi->td.mb.above_pred_hp_buf); + cpi->td.mb.above_pred_hp_buf = NULL; + + aom_free(cpi->td.mb.left_pred_hp_buf); + cpi->td.mb.left_pred_hp_buf = NULL; +#else aom_free(cpi->td.mb.above_pred_buf); cpi->td.mb.above_pred_buf = NULL; aom_free(cpi->td.mb.left_pred_buf); cpi->td.mb.left_pred_buf = NULL; +#endif aom_free(cpi->td.mb.wsrc_buf); cpi->td.mb.wsrc_buf = NULL; @@ -3419,7 +3427,18 @@ av1_init_second_pass(cpi); } - int buf_scaler = 2; + int buf_scaler = 1; + +#if CONFIG_OBMC_HIGH_PREC_BLENDING + CHECK_MEM_ERROR(cm, cpi->td.mb.above_pred_hp_buf, + (CONV_BUF_TYPE *)aom_memalign( + 16, buf_scaler * MAX_MB_PLANE * MAX_SB_SQUARE * + sizeof(*cpi->td.mb.above_pred_hp_buf))); + CHECK_MEM_ERROR(cm, cpi->td.mb.left_pred_hp_buf, + (CONV_BUF_TYPE *)aom_memalign( + 16, buf_scaler * MAX_MB_PLANE * MAX_SB_SQUARE * + sizeof(*cpi->td.mb.left_pred_hp_buf))); +#else CHECK_MEM_ERROR( cm, cpi->td.mb.above_pred_buf, (uint8_t *)aom_memalign(16, buf_scaler * MAX_MB_PLANE * MAX_SB_SQUARE * @@ -3428,6 +3447,7 @@ cm, cpi->td.mb.left_pred_buf, (uint8_t *)aom_memalign(16, buf_scaler * MAX_MB_PLANE * MAX_SB_SQUARE * sizeof(*cpi->td.mb.left_pred_buf))); +#endif CHECK_MEM_ERROR(cm, cpi->td.mb.wsrc_buf, (int32_t *)aom_memalign( @@ -3945,8 +3965,13 @@ // Deallocate allocated thread data. if (t < cpi->num_workers - 1) { aom_free(thread_data->td->palette_buffer); +#if CONFIG_OBMC_HIGH_PREC_BLENDING + aom_free(thread_data->td->above_pred_hp_buf); + aom_free(thread_data->td->left_pred_hp_buf); +#else aom_free(thread_data->td->above_pred_buf); aom_free(thread_data->td->left_pred_buf); +#endif aom_free(thread_data->td->wsrc_buf); aom_free(thread_data->td->mask_buf); aom_free(thread_data->td->counts);
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h index 0c5af2b..92782a1 100644 --- a/av1/encoder/encoder.h +++ b/av1/encoder/encoder.h
@@ -364,8 +364,13 @@ PC_TREE *pc_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2 + 1]; int32_t *wsrc_buf; int32_t *mask_buf; +#if CONFIG_OBMC_HIGH_PREC_BLENDING + int32_t *above_pred_hp_buf; + int32_t *left_pred_hp_buf; +#else uint8_t *above_pred_buf; uint8_t *left_pred_buf; +#endif PALETTE_BUFFER *palette_buffer; #if CONFIG_INTRABC int intrabc_used_this_tile;
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c index cf7b465..5dbccc7 100644 --- a/av1/encoder/ethread.c +++ b/av1/encoder/ethread.c
@@ -86,6 +86,18 @@ av1_setup_pc_tree(cm, thread_data->td); int buf_scaler = 2; +#if CONFIG_OBMC_HIGH_PREC_BLENDING + CHECK_MEM_ERROR( + cm, thread_data->td->above_pred_hp_buf, + (CONV_BUF_TYPE *)aom_memalign( + 16, buf_scaler * MAX_MB_PLANE * MAX_SB_SQUARE * + sizeof(*thread_data->td->above_pred_hp_buf))); + CHECK_MEM_ERROR( + cm, thread_data->td->left_pred_hp_buf, + (CONV_BUF_TYPE *)aom_memalign( + 16, buf_scaler * MAX_MB_PLANE * MAX_SB_SQUARE * + sizeof(*thread_data->td->left_pred_hp_buf))); +#else CHECK_MEM_ERROR(cm, thread_data->td->above_pred_buf, (uint8_t *)aom_memalign( 16, buf_scaler * MAX_MB_PLANE * MAX_SB_SQUARE * @@ -94,6 +106,8 @@ (uint8_t *)aom_memalign( 16, buf_scaler * MAX_MB_PLANE * MAX_SB_SQUARE * sizeof(*thread_data->td->left_pred_buf))); +#endif + CHECK_MEM_ERROR( cm, thread_data->td->wsrc_buf, (int32_t *)aom_memalign( @@ -137,8 +151,14 @@ if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; thread_data->td->rd_counts = cpi->td.rd_counts; +#if CONFIG_OBMC_HIGH_PREC_BLENDING + thread_data->td->mb.above_pred_hp_buf = + thread_data->td->above_pred_hp_buf; + thread_data->td->mb.left_pred_hp_buf = thread_data->td->left_pred_hp_buf; +#else thread_data->td->mb.above_pred_buf = thread_data->td->above_pred_buf; thread_data->td->mb.left_pred_buf = thread_data->td->left_pred_buf; +#endif thread_data->td->mb.wsrc_buf = thread_data->td->wsrc_buf; thread_data->td->mb.mask_buf = thread_data->td->mask_buf; }
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c index 3dabf35..5b9c392 100644 --- a/av1/encoder/rdopt.c +++ b/av1/encoder/rdopt.c
@@ -7224,11 +7224,18 @@ } typedef struct { - // Inter prediction buffers and respective strides +// OBMC secondary prediction buffers and respective strides +#if CONFIG_OBMC_HIGH_PREC_BLENDING + CONV_BUF_TYPE *above_pred_hp_buf[MAX_MB_PLANE]; + int above_pred_hp_stride[MAX_MB_PLANE]; + CONV_BUF_TYPE *left_pred_hp_buf[MAX_MB_PLANE]; + int left_pred_hp_stride[MAX_MB_PLANE]; +#else uint8_t *above_pred_buf[MAX_MB_PLANE]; int above_pred_stride[MAX_MB_PLANE]; uint8_t *left_pred_buf[MAX_MB_PLANE]; int left_pred_stride[MAX_MB_PLANE]; +#endif int_mv *single_newmv; // Pointer to array of motion vectors to use for each ref and their rates // Should point to first of 2 arrays in 2D array @@ -7582,13 +7589,36 @@ tmp_rate2 = rate2_nocoeff - rate_mv + tmp_rate_mv; mbmi->interp_filters = condition_interp_filters_on_mv(mbmi->interp_filters, xd); - av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize); - } else { - av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize); } +#if CONFIG_OBMC_HIGH_PREC_BLENDING + DECLARE_ALIGNED(16, CONV_BUF_TYPE, tmp_buf[MAX_MB_PLANE * MAX_SB_SQUARE]); + CONV_BUF_TYPE *dst_buf[MAX_MB_PLANE]; + int dst_stride[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; + dst_buf[0] = tmp_buf; + dst_buf[1] = tmp_buf + MAX_SB_SQUARE; + dst_buf[2] = tmp_buf + MAX_SB_SQUARE * 2; + + for (int j = 0; j < num_planes; ++j) { + const struct macroblockd_plane *pd = &xd->plane[j]; + int bw = pd->width; + int bh = pd->height; + const int mi_x = mi_col * MI_SIZE; + const int mi_y = mi_row * MI_SIZE; + + av1_build_inter_predictor_hp_sr(xd, j, xd->mi[0], 0, bw, bh, 0, 0, bw, + bh, mi_x, mi_y, 0, dst_buf[j], + dst_stride[j]); + } + av1_build_obmc_inter_prediction( + cm, xd, mi_row, mi_col, dst_buf, dst_stride, args->above_pred_hp_buf, + args->above_pred_hp_stride, args->left_pred_hp_buf, + args->left_pred_hp_stride); +#else + av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize); av1_build_obmc_inter_prediction( cm, xd, mi_row, mi_col, args->above_pred_buf, args->above_pred_stride, args->left_pred_buf, args->left_pred_stride); +#endif } // Local warped motion mode @@ -9005,11 +9035,20 @@ plane_block_height); } +#if CONFIG_OBMC_HIGH_PREC_BLENDING +static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x, + const MACROBLOCKD *xd, int mi_row, + int mi_col, const CONV_BUF_TYPE *above, + int above_stride, + const CONV_BUF_TYPE *left, + int left_stride); +#else static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd, int mi_row, int mi_col, const uint8_t *above, int above_stride, const uint8_t *left, int left_stride); +#endif #if CONFIG_EXT_SKIP static void estimate_skip_mode_rdcost( @@ -9212,6 +9251,8 @@ int *mode_map = tile_data->mode_map[bsize]; const int mode_search_skip_flags = sf->mode_search_skip_flags; int skip_intra_modes = 0; + const int rows = block_size_high[bsize]; + const int cols = block_size_wide[bsize]; HandleInterModeArgs args = { { NULL }, { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }, @@ -9221,19 +9262,19 @@ { { 0 } }, }; - const int rows = block_size_high[bsize]; - const int cols = block_size_wide[bsize]; +#if CONFIG_OBMC_HIGH_PREC_BLENDING + args.above_pred_hp_buf[0] = x->above_pred_hp_buf; + args.above_pred_hp_buf[1] = x->above_pred_hp_buf + MAX_SB_SQUARE; + args.above_pred_hp_buf[2] = x->above_pred_hp_buf + 2 * MAX_SB_SQUARE; + args.left_pred_hp_buf[0] = x->left_pred_hp_buf; + args.left_pred_hp_buf[1] = x->left_pred_hp_buf + MAX_SB_SQUARE; + args.left_pred_hp_buf[2] = x->left_pred_hp_buf + 2 * MAX_SB_SQUARE; +#else int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; - int64_t dist_refs[TOTAL_REFS_PER_FRAME]; - int dist_order_refs[TOTAL_REFS_PER_FRAME]; - int num_available_refs = 0; - memset(dist_refs, -1, sizeof(dist_refs)); - memset(dist_order_refs, -1, sizeof(dist_order_refs)); - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { int len = sizeof(uint16_t); args.above_pred_buf[0] = CONVERT_TO_BYTEPTR(x->above_pred_buf); @@ -9254,6 +9295,13 @@ args.left_pred_buf[1] = x->left_pred_buf + MAX_SB_SQUARE; args.left_pred_buf[2] = x->left_pred_buf + 2 * MAX_SB_SQUARE; } +#endif + + int64_t dist_refs[TOTAL_REFS_PER_FRAME]; + int dist_order_refs[TOTAL_REFS_PER_FRAME]; + int num_available_refs = 0; + memset(dist_refs, -1, sizeof(dist_refs)); + memset(dist_order_refs, -1, sizeof(dist_order_refs)); av1_zero(best_mbmode); av1_zero(pmi_uv); @@ -9321,6 +9369,20 @@ if (check_num_overlappable_neighbors(mbmi) && is_motion_variation_allowed_bsize(bsize)) { +#if CONFIG_OBMC_HIGH_PREC_BLENDING + av1_build_prediction_by_above_preds_hp(cm, xd, mi_row, mi_col, + args.above_pred_hp_buf, + args.above_pred_hp_stride); + av1_build_prediction_by_left_preds_hp(cm, xd, mi_row, mi_col, + args.left_pred_hp_buf, + args.left_pred_hp_stride); + av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row, + mi_col, num_planes); + calc_target_weighted_pred( + cm, x, xd, mi_row, mi_col, args.above_pred_hp_buf[0], + args.above_pred_hp_stride[0], args.left_pred_hp_buf[0], + args.left_pred_hp_stride[0]); +#else av1_build_prediction_by_above_preds(cm, xd, mi_row, mi_col, args.above_pred_buf, dst_width1, dst_height1, args.above_pred_stride); @@ -9332,6 +9394,7 @@ calc_target_weighted_pred(cm, x, xd, mi_row, mi_col, args.above_pred_buf[0], args.above_pred_stride[0], args.left_pred_buf[0], args.left_pred_stride[0]); +#endif } for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { @@ -10299,12 +10362,17 @@ } if (is_inter_mode(mbmi->mode)) { - av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize); +#if CONFIG_OBMC_HIGH_PREC_BLENDING if (mbmi->motion_mode == OBMC_CAUSAL) { - av1_build_obmc_inter_prediction( - cm, xd, mi_row, mi_col, args.above_pred_buf, args.above_pred_stride, - args.left_pred_buf, args.left_pred_stride); + av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col); + } else { + av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize); } +#else + av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize); + if (mbmi->motion_mode == OBMC_CAUSAL) + av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col); +#endif av1_subtract_plane(x, bsize, 0); if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) { // av1_rd_pick_inter_mode_sb @@ -10849,7 +10917,11 @@ struct calc_target_weighted_pred_ctxt { const MACROBLOCK *x; +#if CONFIG_OBMC_HIGH_PREC_BLENDING + const CONV_BUF_TYPE *tmp; +#else const uint8_t *tmp; +#endif int tmp_stride; int overlap; }; @@ -10863,16 +10935,21 @@ struct calc_target_weighted_pred_ctxt *ctxt = (struct calc_target_weighted_pred_ctxt *)fun_ctxt; - const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0; - const int bw = xd->n8_w << MI_SIZE_LOG2; const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap); int32_t *wsrc = ctxt->x->wsrc_buf + (rel_mi_col * MI_SIZE); int32_t *mask = ctxt->x->mask_buf + (rel_mi_col * MI_SIZE); +#if CONFIG_OBMC_HIGH_PREC_BLENDING + const CONV_BUF_TYPE *tmp = ctxt->tmp + rel_mi_col * MI_SIZE; +#else const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE; + const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0; +#endif +#if !CONFIG_OBMC_HIGH_PREC_BLENDING if (!is_hbd) { +#endif for (int row = 0; row < ctxt->overlap; ++row) { const uint8_t m0 = mask1d[row]; const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0; @@ -10884,6 +10961,7 @@ mask += bw; tmp += ctxt->tmp_stride; } +#if !CONFIG_OBMC_HIGH_PREC_BLENDING } else { const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp); @@ -10899,6 +10977,7 @@ tmp16 += ctxt->tmp_stride; } } +#endif } static INLINE void calc_target_weighted_pred_left( @@ -10910,16 +10989,22 @@ struct calc_target_weighted_pred_ctxt *ctxt = (struct calc_target_weighted_pred_ctxt *)fun_ctxt; - const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0; - const int bw = xd->n8_w << MI_SIZE_LOG2; const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap); int32_t *wsrc = ctxt->x->wsrc_buf + (rel_mi_row * MI_SIZE * bw); int32_t *mask = ctxt->x->mask_buf + (rel_mi_row * MI_SIZE * bw); +#if CONFIG_OBMC_HIGH_PREC_BLENDING + const CONV_BUF_TYPE *tmp = + ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride); +#else const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride); + const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0; +#endif +#if !CONFIG_OBMC_HIGH_PREC_BLENDING if (!is_hbd) { +#endif for (int row = 0; row < nb_mi_height * MI_SIZE; ++row) { for (int col = 0; col < ctxt->overlap; ++col) { const uint8_t m0 = mask1d[col]; @@ -10932,6 +11017,7 @@ mask += bw; tmp += ctxt->tmp_stride; } +#if !CONFIG_OBMC_HIGH_PREC_BLENDING } else { const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp); @@ -10948,6 +11034,7 @@ tmp16 += ctxt->tmp_stride; } } +#endif } // This function has a structure similar to av1_build_obmc_inter_prediction @@ -10988,19 +11075,37 @@ // error(x, y) = // wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2) // +#if CONFIG_OBMC_HIGH_PREC_BLENDING +static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x, + const MACROBLOCKD *xd, int mi_row, + int mi_col, const CONV_BUF_TYPE *above, + int above_stride, + const CONV_BUF_TYPE *left, + int left_stride) { +#else static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd, int mi_row, int mi_col, const uint8_t *above, int above_stride, const uint8_t *left, int left_stride) { +#endif const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; const int bw = xd->n8_w << MI_SIZE_LOG2; const int bh = xd->n8_h << MI_SIZE_LOG2; int32_t *mask_buf = x->mask_buf; int32_t *wsrc_buf = x->wsrc_buf; - const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA; const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0; +#if CONFIG_OBMC_HIGH_PREC_BLENDING + ConvolveParams conv_params = + get_conv_params_no_round(0, 0, 0, NULL, MAX_SB_SIZE, 1, xd->bd); + const int convolve_rounding_bits = + FILTER_BITS * 2 - conv_params.round_0 - conv_params.round_1; + const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * + (1 << convolve_rounding_bits); +#else + const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA; +#endif // plane 0 should not be subsampled assert(xd->plane[0].subsampling_x == 0); @@ -11041,7 +11146,12 @@ for (int row = 0; row < bh; ++row) { for (int col = 0; col < bw; ++col) { +#if CONFIG_OBMC_HIGH_PREC_BLENDING + wsrc_buf[col] = ROUND_POWER_OF_TWO_SIGNED( + src[col] * src_scale - wsrc_buf[col], convolve_rounding_bits); +#else wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col]; +#endif } wsrc_buf += bw; src += x->plane[0].src.stride; @@ -11051,7 +11161,12 @@ for (int row = 0; row < bh; ++row) { for (int col = 0; col < bw; ++col) { +#if CONFIG_OBMC_HIGH_PREC_BLENDING + wsrc_buf[col] = ROUND_POWER_OF_TWO_SIGNED( + src[col] * src_scale - wsrc_buf[col], convolve_rounding_bits); +#else wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col]; +#endif } wsrc_buf += bw; src += x->plane[0].src.stride;
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake index 6eb7240..1de2529 100644 --- a/build/cmake/aom_config_defaults.cmake +++ b/build/cmake/aom_config_defaults.cmake
@@ -152,3 +152,4 @@ set(CONFIG_TMV 1 CACHE NUMBER "AV1 experiment flag.") set(CONFIG_TXK_SEL 1 CACHE NUMBER "AV1 experiment flag.") set(CONFIG_FILEOPTIONS 1 CACHE NUMBER "AV1 config option flag.") +set(CONFIG_OBMC_HIGH_PREC_BLENDING 0 CACHE NUMBER "AV1 config option flag.")