Remove mv_refined from MB_MODE_INFO The array mv_refined is only needed in OPFL_REFINE_CHROMA. This CL removes both mv_refined and OPFL_REFINE_CHROMA, which prevents unnecessary memory copies for mbmi->mv_refined and thus improves decoder complexity. Change-Id: I9056081c9e1547c00f36b4843ebf579a7a0d977d
diff --git a/av1/common/blockd.h b/av1/common/blockd.h index f6f9796..628f0bb 100644 --- a/av1/common/blockd.h +++ b/av1/common/blockd.h
@@ -291,9 +291,6 @@ INTERINTER_COMPOUND_DATA interinter_comp; WarpedMotionParams wm_params; int_mv mv[2]; -#if CONFIG_OPTFLOW_REFINEMENT - int_mv mv_refined[2 * N_OF_OFFSETS]; -#endif // CONFIG_OPTFLOW_REFINEMENT // q index for the current coding block. int current_qindex; // Only for INTER blocks
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c index 22b5c29..7505b7a 100644 --- a/av1/common/reconinter.c +++ b/av1/common/reconinter.c
@@ -669,10 +669,6 @@ } #if CONFIG_OPTFLOW_REFINEMENT - -// Whether to refine chroma MV or not -#define OPFL_REFINE_CHROMA 0 - // Use second-pass motion compensation or not #define OPFL_SECOND_PASS_MC 1 @@ -806,7 +802,7 @@ // Negative values indicate gradient returned at reduced precision, and // positive values indicate gradient returned at higher precision. void av1_compute_subpel_gradients_mc_highbd( - MACROBLOCKD *xd, MB_MODE_INFO *mi, int bw, int bh, int mi_x, int mi_y, + MACROBLOCKD *xd, const MB_MODE_INFO *mi, int bw, int bh, int mi_x, int mi_y, uint8_t **mc_buf, InterPredParams *inter_pred_params, CalcSubpelParamsFunc calc_subpel_params_func, int ref, int *grad_prec_bits, int16_t *x_grad, int16_t *y_grad) { @@ -872,7 +868,7 @@ // Negative values indicate gradient returned at reduced precision, and // positive values indicate gradient returned at higher precision. void av1_compute_subpel_gradients_mc_lowbd( - MACROBLOCKD *xd, MB_MODE_INFO *mi, int bw, int bh, int mi_x, int mi_y, + MACROBLOCKD *xd, const MB_MODE_INFO *mi, int bw, int bh, int mi_x, int mi_y, uint8_t **mc_buf, InterPredParams *inter_pred_params, CalcSubpelParamsFunc calc_subpel_params_func, int ref, int *grad_prec_bits, int16_t *x_grad, int16_t *y_grad) { @@ -1449,7 +1445,7 @@ static int get_optflow_based_mv_highbd( const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, MB_MODE_INFO *mbmi, - int bw, int bh, int mi_x, int mi_y, uint8_t **mc_buf, + int_mv *mv_refined, int bw, int bh, int mi_x, int mi_y, uint8_t **mc_buf, CalcSubpelParamsFunc calc_subpel_params_func, int16_t *gx0, int16_t *gy0, int16_t *gx1, int16_t *gy1, int *vx0, int *vy0, int *vx1, int *vy1, uint16_t *dst0, uint16_t *dst1) { @@ -1457,10 +1453,10 @@ // Convert output MV to 1/16th pel assert(MV_REFINE_PREC_BITS >= 3); for (int mvi = 0; mvi < N_OF_OFFSETS; mvi++) { - mbmi->mv_refined[mvi * 2].as_mv.row *= 1 << (MV_REFINE_PREC_BITS - 3); - mbmi->mv_refined[mvi * 2].as_mv.col *= 1 << (MV_REFINE_PREC_BITS - 3); - mbmi->mv_refined[mvi * 2 + 1].as_mv.row *= 1 << (MV_REFINE_PREC_BITS - 3); - mbmi->mv_refined[mvi * 2 + 1].as_mv.col *= 1 << (MV_REFINE_PREC_BITS - 3); + mv_refined[mvi * 2].as_mv.row *= 1 << (MV_REFINE_PREC_BITS - 3); + mv_refined[mvi * 2].as_mv.col *= 1 << (MV_REFINE_PREC_BITS - 3); + mv_refined[mvi * 2 + 1].as_mv.row *= 1 << (MV_REFINE_PREC_BITS - 3); + mv_refined[mvi * 2 + 1].as_mv.col *= 1 << (MV_REFINE_PREC_BITS - 3); } // Obtain d0 and d1 @@ -1542,19 +1538,19 @@ for (int i = 0; i < n_blocks; i++) { #if OPFL_CLAMP_MV_DELTA - mbmi->mv_refined[i * 2].as_mv.row += + mv_refined[i * 2].as_mv.row += clamp(vy0[i], -OPFL_MV_DELTA_LIMIT, OPFL_MV_DELTA_LIMIT); - mbmi->mv_refined[i * 2].as_mv.col += + mv_refined[i * 2].as_mv.col += clamp(vx0[i], -OPFL_MV_DELTA_LIMIT, OPFL_MV_DELTA_LIMIT); - mbmi->mv_refined[i * 2 + 1].as_mv.row += + mv_refined[i * 2 + 1].as_mv.row += clamp(vy1[i], -OPFL_MV_DELTA_LIMIT, OPFL_MV_DELTA_LIMIT); - mbmi->mv_refined[i * 2 + 1].as_mv.col += + mv_refined[i * 2 + 1].as_mv.col += clamp(vx1[i], -OPFL_MV_DELTA_LIMIT, OPFL_MV_DELTA_LIMIT); #else - mbmi->mv_refined[i * 2].as_mv.row += vy0[i]; - mbmi->mv_refined[i * 2].as_mv.col += vx0[i]; - mbmi->mv_refined[i * 2 + 1].as_mv.row += vy1[i]; - mbmi->mv_refined[i * 2 + 1].as_mv.col += vx1[i]; + mv_refined[i * 2].as_mv.row += vy0[i]; + mv_refined[i * 2].as_mv.col += vx0[i]; + mv_refined[i * 2 + 1].as_mv.row += vy1[i]; + mv_refined[i * 2 + 1].as_mv.col += vx1[i]; #endif } @@ -1563,7 +1559,7 @@ static int get_optflow_based_mv_lowbd( const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, MB_MODE_INFO *mbmi, - int bw, int bh, int mi_x, int mi_y, uint8_t **mc_buf, + int_mv *mv_refined, int bw, int bh, int mi_x, int mi_y, uint8_t **mc_buf, CalcSubpelParamsFunc calc_subpel_params_func, int16_t *gx0, int16_t *gy0, int16_t *gx1, int16_t *gy1, int *vx0, int *vy0, int *vx1, int *vy1, uint8_t *dst0, uint8_t *dst1) { @@ -1571,10 +1567,10 @@ // Convert output MV to 1/16th pel assert(MV_REFINE_PREC_BITS >= 3); for (int mvi = 0; mvi < N_OF_OFFSETS; mvi++) { - mbmi->mv_refined[mvi * 2].as_mv.row *= 1 << (MV_REFINE_PREC_BITS - 3); - mbmi->mv_refined[mvi * 2].as_mv.col *= 1 << (MV_REFINE_PREC_BITS - 3); - mbmi->mv_refined[mvi * 2 + 1].as_mv.row *= 1 << (MV_REFINE_PREC_BITS - 3); - mbmi->mv_refined[mvi * 2 + 1].as_mv.col *= 1 << (MV_REFINE_PREC_BITS - 3); + mv_refined[mvi * 2].as_mv.row *= 1 << (MV_REFINE_PREC_BITS - 3); + mv_refined[mvi * 2].as_mv.col *= 1 << (MV_REFINE_PREC_BITS - 3); + mv_refined[mvi * 2 + 1].as_mv.row *= 1 << (MV_REFINE_PREC_BITS - 3); + mv_refined[mvi * 2 + 1].as_mv.col *= 1 << (MV_REFINE_PREC_BITS - 3); } // Obtain d0 and d1 @@ -1655,19 +1651,19 @@ for (int i = 0; i < n_blocks; i++) { #if OPFL_CLAMP_MV_DELTA - mbmi->mv_refined[i * 2].as_mv.row += + mv_refined[i * 2].as_mv.row += clamp(vy0[i], -OPFL_MV_DELTA_LIMIT, OPFL_MV_DELTA_LIMIT); - mbmi->mv_refined[i * 2].as_mv.col += + mv_refined[i * 2].as_mv.col += clamp(vx0[i], -OPFL_MV_DELTA_LIMIT, OPFL_MV_DELTA_LIMIT); - mbmi->mv_refined[i * 2 + 1].as_mv.row += + mv_refined[i * 2 + 1].as_mv.row += clamp(vy1[i], -OPFL_MV_DELTA_LIMIT, OPFL_MV_DELTA_LIMIT); - mbmi->mv_refined[i * 2 + 1].as_mv.col += + mv_refined[i * 2 + 1].as_mv.col += clamp(vx1[i], -OPFL_MV_DELTA_LIMIT, OPFL_MV_DELTA_LIMIT); #else - mbmi->mv_refined[i * 2].as_mv.row += vy0[i]; - mbmi->mv_refined[i * 2].as_mv.col += vx0[i]; - mbmi->mv_refined[i * 2 + 1].as_mv.row += vy1[i]; - mbmi->mv_refined[i * 2 + 1].as_mv.col += vx1[i]; + mv_refined[i * 2].as_mv.row += vy0[i]; + mv_refined[i * 2].as_mv.col += vx0[i]; + mv_refined[i * 2 + 1].as_mv.row += vy1[i]; + mv_refined[i * 2 + 1].as_mv.col += vx1[i]; #endif } @@ -2093,6 +2089,7 @@ const int pre_y = (mi_y + MI_SIZE * row_start) >> pd->subsampling_y; #if CONFIG_OPTFLOW_REFINEMENT + int_mv mv_refined[2 * N_OF_OFFSETS]; const int use_optflow_refinement = (mi->mode > NEW_NEWMV) && is_compound && is_opfl_refine_allowed(cm, mi); assert(IMPLIES(use_optflow_refinement, !build_for_obmc)); @@ -2125,8 +2122,8 @@ const MV mv0 = mi->mv[0].as_mv; const MV mv1 = mi->mv[1].as_mv; for (int mvi = 0; mvi < N_OF_OFFSETS; mvi++) { - mi->mv_refined[mvi * 2].as_mv = mv0; - mi->mv_refined[mvi * 2 + 1].as_mv = mv1; + mv_refined[mvi * 2].as_mv = mv0; + mv_refined[mvi * 2 + 1].as_mv = mv1; } // Refine MV using optical flow. The final output MV will be in 1/16 // precision. @@ -2135,18 +2132,18 @@ aom_calloc(1, MAX_SB_SIZE * MAX_SB_SIZE * sizeof(uint16_t))); dst1 = CONVERT_TO_BYTEPTR( aom_calloc(1, MAX_SB_SIZE * MAX_SB_SIZE * sizeof(uint16_t))); - get_optflow_based_mv_highbd(cm, xd, plane, mi, bw, bh, mi_x, mi_y, mc_buf, - calc_subpel_params_func, gx0, gy0, gx1, gy1, - vx0, vy0, vx1, vy1, CONVERT_TO_SHORTPTR(dst0), - CONVERT_TO_SHORTPTR(dst1)); + get_optflow_based_mv_highbd( + cm, xd, plane, mi, mv_refined, bw, bh, mi_x, mi_y, mc_buf, + calc_subpel_params_func, gx0, gy0, gx1, gy1, vx0, vy0, vx1, vy1, + CONVERT_TO_SHORTPTR(dst0), CONVERT_TO_SHORTPTR(dst1)); } else { dst0 = (uint8_t *)aom_calloc(1, MAX_SB_SIZE * MAX_SB_SIZE * sizeof(uint8_t)); dst1 = (uint8_t *)aom_calloc(1, MAX_SB_SIZE * MAX_SB_SIZE * sizeof(uint8_t)); - get_optflow_based_mv_lowbd(cm, xd, plane, mi, bw, bh, mi_x, mi_y, mc_buf, - calc_subpel_params_func, gx0, gy0, gx1, gy1, - vx0, vy0, vx1, vy1, dst0, dst1); + get_optflow_based_mv_lowbd(cm, xd, plane, mi, mv_refined, bw, bh, mi_x, + mi_y, mc_buf, calc_subpel_params_func, gx0, + gy0, gx1, gy1, vx0, vy0, vx1, vy1, dst0, dst1); } } #endif // CONFIG_OPTFLOW_REFINEMENT @@ -2194,14 +2191,7 @@ } #if CONFIG_OPTFLOW_REFINEMENT -#if OPFL_REFINE_CHROMA - // For luma, always apply offset MVs. For chroma, use the MVs derived for - // luma if luma subblock size is 8x8 (i.e., chroma block size > 8x8), - // and otherwise apply normal compound average. - if (use_optflow_refinement && (plane == 0 || bh > 8 || bw > 8)) { -#else if (use_optflow_refinement && plane == 0) { -#endif #if OPFL_SECOND_PASS_MC int n = opfl_get_subblock_size(bw, bh, plane); inter_pred_params.interp_filter_params[0] = @@ -2220,9 +2210,9 @@ mi->interp_filters.as_filters.y_filter, #endif // CONFIG_REMOVE_DUAL_FILTER n); - av1_opfl_rebuild_inter_predictor( - dst, dst_buf->stride, plane, mi->mv_refined, &inter_pred_params, xd, - mi_x, mi_y, ref, mc_buf, calc_subpel_params_func); + av1_opfl_rebuild_inter_predictor(dst, dst_buf->stride, plane, mv_refined, + &inter_pred_params, xd, mi_x, mi_y, ref, + mc_buf, calc_subpel_params_func); #else if (is_cur_buf_hbd(xd)) { if (ref)