Pass ConvolveParams into prediction functions Those functions includes av1_make_inter_predictor av1_build_inter_predictor inter_predictor Change-Id: Ide3b744277cf30964e8b352fc8de91365d7217a8
diff --git a/av1/common/convolve.c b/av1/common/convolve.c index 7fc5c1f..0294d41 100644 --- a/av1/common/convolve.c +++ b/av1/common/convolve.c
@@ -41,7 +41,7 @@ int k, sum = 0; for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k]; - if (conv_params->round) + if (conv_params->round == CONVOLVE_OPT_ROUND) sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); if (conv_params->ref) @@ -75,7 +75,7 @@ for (k = 0; k < filter_size; ++k) sum += src_y[k * src_stride] * y_filter[k]; - if (conv_params->round) + if (conv_params->round == CONVOLVE_OPT_ROUND) sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); if (conv_params->ref) @@ -97,7 +97,7 @@ int r, c; for (r = 0; r < h; ++r) { memcpy(dst, src, w); - if (conv_params->round == 0) + if (conv_params->round == CONVOLVE_OPT_NO_ROUND) for (c = 0; c < w; ++c) dst[c] = dst[c] << FILTER_BITS; src += src_stride; dst += dst_stride; @@ -105,7 +105,7 @@ } else { int r, c; for (r = 0; r < h; ++r) { - if (conv_params->round) + if (conv_params->round == CONVOLVE_OPT_ROUND) for (c = 0; c < w; ++c) dst[c] = clip_pixel(ROUND_POWER_OF_TWO(dst[c] + src[c], 1)); else @@ -123,7 +123,8 @@ const InterpFilterParams filter_params, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params) { - if (filter_params.taps == SUBPEL_TAPS && conv_params->round == 1) { + if (filter_params.taps == SUBPEL_TAPS && + conv_params->round == CONVOLVE_OPT_ROUND) { const int16_t *filter_x = av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4); if (conv_params->ref == 0) @@ -133,8 +134,14 @@ aom_convolve8_avg_horiz(src, src_stride, dst, dst_stride, filter_x, x_step_q4, NULL, -1, w, h); } else { - av1_convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params, - subpel_x_q4, x_step_q4, conv_params); + if (conv_params->round == CONVOLVE_OPT_ROUND) { + av1_convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params, + subpel_x_q4, x_step_q4, conv_params); + } else { + // TODO(angiebird) need SIMD implementation here + av1_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h, + filter_params, subpel_x_q4, x_step_q4, conv_params); + } } } @@ -143,7 +150,8 @@ const InterpFilterParams filter_params, const int subpel_y_q4, int y_step_q4, ConvolveParams *conv_params) { - if (filter_params.taps == SUBPEL_TAPS && conv_params->round == 1) { + if (filter_params.taps == SUBPEL_TAPS && + conv_params->round == CONVOLVE_OPT_ROUND) { const int16_t *filter_y = av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4); if (conv_params->ref == 0) { @@ -154,8 +162,14 @@ filter_y, y_step_q4, w, h); } } else { - av1_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params, - subpel_y_q4, y_step_q4, conv_params); + if (conv_params->round == CONVOLVE_OPT_ROUND) { + av1_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params, + subpel_y_q4, y_step_q4, conv_params); + } else { + // TODO(angiebird) need SIMD implementation here + av1_convolve_vert_c(src, src_stride, dst, dst_stride, w, h, filter_params, + subpel_y_q4, y_step_q4, conv_params); + } } } @@ -230,7 +244,7 @@ int temp_stride = max_intermediate_size; ConvolveParams temp_conv_params; temp_conv_params.ref = 0; - temp_conv_params.round = 1; + temp_conv_params.round = CONVOLVE_OPT_ROUND; filter_params = filter_params_y; filter_size = filter_params_x.taps; intermediate_width = @@ -257,7 +271,7 @@ int temp_stride = MAX_SB_SIZE; ConvolveParams temp_conv_params; temp_conv_params.ref = 0; - temp_conv_params.round = 1; + temp_conv_params.round = CONVOLVE_OPT_ROUND; #if CONFIG_DUAL_FILTER filter_params = filter_params_x; filter_size = filter_params_y.taps;
diff --git a/av1/common/convolve.h b/av1/common/convolve.h index 673560a..7b4e141 100644 --- a/av1/common/convolve.h +++ b/av1/common/convolve.h
@@ -17,11 +17,24 @@ extern "C" { #endif +typedef enum CONVOLVE_OPT { + // indicate the results in dst buf is rounded by FILTER_BITS or not + CONVOLVE_OPT_ROUND, + CONVOLVE_OPT_NO_ROUND, +} CONVOLVE_OPT; + typedef struct ConvolveParams { int ref; - int round; + CONVOLVE_OPT round; } ConvolveParams; +static INLINE ConvolveParams get_conv_params(int ref) { + ConvolveParams conv_params; + conv_params.ref = ref; + conv_params.round = CONVOLVE_OPT_ROUND; + return conv_params; +} + void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, #if CONFIG_DUAL_FILTER
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c index 4bf4aa4..ba2ed88 100644 --- a/av1/common/reconinter.c +++ b/av1/common/reconinter.c
@@ -610,13 +610,15 @@ #else InterpFilter tmp_ipf = interp_filter; #endif // CONFIG_DUAL_FILTER + ConvolveParams conv_params = get_conv_params(0); #if CONFIG_AOM_HIGHBITDEPTH DECLARE_ALIGNED(16, uint8_t, tmp_dst_[2 * MAX_SB_SQUARE]); uint8_t *tmp_dst = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_; av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x, - subpel_y, sf, w, h, 0, tmp_ipf, xs, ys, xd); + subpel_y, sf, w, h, &conv_params, tmp_ipf, xs, ys, + xd); #if CONFIG_SUPERTX if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) build_masked_compound_wedge_extend_highbd( @@ -652,7 +654,8 @@ #else // CONFIG_AOM_HIGHBITDEPTH DECLARE_ALIGNED(16, uint8_t, tmp_dst[MAX_SB_SQUARE]); av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x, - subpel_y, sf, w, h, 0, tmp_ipf, xs, ys, xd); + subpel_y, sf, w, h, &conv_params, tmp_ipf, xs, ys, + xd); #if CONFIG_SUPERTX build_masked_compound_wedge_extend(dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, comp_data->wedge_index, @@ -700,7 +703,7 @@ void av1_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const MV *src_mv, const struct scale_factors *sf, int w, int h, - int ref, + ConvolveParams *conv_params, #if CONFIG_DUAL_FILTER const InterpFilter *interp_filter, #else @@ -717,7 +720,7 @@ src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS); inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w, - h, ref, interp_filter, sf->x_step_q4, sf->y_step_q4); + h, conv_params, interp_filter, sf->x_step_q4, sf->y_step_q4); } void build_inter_predictors(MACROBLOCKD *xd, int plane, @@ -797,6 +800,7 @@ MV32 scaled_mv; int xs, ys, subpel_x, subpel_y; const int is_scaled = av1_is_scaled(sf); + ConvolveParams conv_params = get_conv_params(ref); x = x_base + idx * x_step; y = y_base + idy * y_step; @@ -838,7 +842,8 @@ #endif // CONFIG_EXT_INTER av1_make_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, subpel_x, subpel_y, sf, x_step, y_step, - ref, mi->mbmi.interp_filter, xs, ys, xd); + &conv_params, mi->mbmi.interp_filter, xs, + ys, xd); } } } @@ -876,6 +881,7 @@ MV32 scaled_mv; int xs, ys, subpel_x, subpel_y; const int is_scaled = av1_is_scaled(sf); + ConvolveParams conv_params = get_conv_params(ref); if (is_scaled) { pre = pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf); @@ -922,7 +928,7 @@ #endif // CONFIG_GLOBAL_MOTION #endif // CONFIG_EXT_INTER av1_make_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, - subpel_x, subpel_y, sf, w, h, ref, + subpel_x, subpel_y, sf, w, h, &conv_params, mi->mbmi.interp_filter, xs, ys, xd); } } @@ -939,6 +945,7 @@ const int is_compound = has_second_ref(&mi->mbmi); for (ref = 0; ref < 1 + is_compound; ++ref) { + ConvolveParams conv_params = get_conv_params(ref); const uint8_t *pre = &pd->pre[ref].buf[(ir * pd->pre[ref].stride + ic) << 2]; #if CONFIG_AOM_HIGHBITDEPTH @@ -952,15 +959,15 @@ av1_build_inter_predictor( pre, pd->pre[ref].stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height, - ref, mi->mbmi.interp_filter, MV_PRECISION_Q3, + &conv_params, mi->mbmi.interp_filter, MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * ic, mi_row * MI_SIZE + 4 * ir); } #else av1_build_inter_predictor( pre, pd->pre[ref].stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height, - ref, mi->mbmi.interp_filter, MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * ic, - mi_row * MI_SIZE + 4 * ir); + &conv_params, mi->mbmi.interp_filter, MV_PRECISION_Q3, + mi_col * MI_SIZE + 4 * ic, mi_row * MI_SIZE + 4 * ir); #endif // CONFIG_AOM_HIGHBITDEPTH } } @@ -2747,6 +2754,7 @@ MV32 scaled_mv; int xs, ys, subpel_x, subpel_y; const int is_scaled = av1_is_scaled(sf); + ConvolveParams conv_params = get_conv_params(0); if (is_scaled) { pre = pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf); @@ -2766,8 +2774,8 @@ (scaled_mv.col >> SUBPEL_BITS); av1_make_inter_predictor(pre, pre_buf->stride, dst, ext_dst_stride, subpel_x, - subpel_y, sf, w, h, 0, mi->mbmi.interp_filter, xs, - ys, xd); + subpel_y, sf, w, h, &conv_params, + mi->mbmi.interp_filter, xs, ys, xd); } void av1_build_inter_predictors_for_planes_single_buf(
diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h index 7bea9ed..19c8032 100644 --- a/av1/common/reconinter.h +++ b/av1/common/reconinter.h
@@ -25,7 +25,7 @@ uint8_t *dst, int dst_stride, const int subpel_x, const int subpel_y, const struct scale_factors *sf, int w, int h, - int ref_idx, + ConvolveParams *conv_params, #if CONFIG_DUAL_FILTER const InterpFilter *interp_filter, #else @@ -34,9 +34,9 @@ int xs, int ys) { #if CONFIG_DUAL_FILTER InterpFilterParams interp_filter_params_x = - av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]); + av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]); InterpFilterParams interp_filter_params_y = - av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]); + av1_get_interp_filter_params(interp_filter[0 + 2 * conv_params->ref]); #else InterpFilterParams interp_filter_params = av1_get_interp_filter_params(interp_filter); @@ -56,17 +56,14 @@ const int16_t *kernel_y = av1_get_interp_filter_subpel_kernel(interp_filter_params, subpel_y); #endif - sf->predict[subpel_x != 0][subpel_y != 0][ref_idx]( + sf->predict[subpel_x != 0][subpel_y != 0][conv_params->ref]( src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h); } else { // ref_idx > 0 means this is the second reference frame // first reference frame's prediction result is already in dst // therefore we need to average the first and second results - ConvolveParams conv_params; - conv_params.round = 1; - conv_params.ref = ref_idx; av1_convolve(src, src_stride, dst, dst_stride, w, h, interp_filter, - subpel_x, xs, subpel_y, ys, &conv_params); + subpel_x, xs, subpel_y, ys, conv_params); } } @@ -219,7 +216,7 @@ static INLINE void av1_make_inter_predictor( const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int subpel_x, const int subpel_y, const struct scale_factors *sf, - int w, int h, int ref, + int w, int h, ConvolveParams *conv_params, #if CONFIG_DUAL_FILTER const InterpFilter *interp_filter, #else @@ -230,11 +227,12 @@ #if CONFIG_AOM_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, - sf, w, h, ref, interp_filter, xs, ys, xd->bd); + sf, w, h, conv_params->ref, interp_filter, xs, ys, + xd->bd); else #endif // CONFIG_AOM_HIGHBITDEPTH inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w, - h, ref, interp_filter, xs, ys); + h, conv_params, interp_filter, xs, ys); } #if CONFIG_EXT_INTER @@ -365,7 +363,7 @@ void av1_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const MV *mv_q3, const struct scale_factors *sf, int w, int h, - int do_avg, + ConvolveParams *conv_params, #if CONFIG_DUAL_FILTER const InterpFilter *interp_filter, #else
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c index ee6cc7f..9418017 100644 --- a/av1/encoder/rdopt.c +++ b/av1/encoder/rdopt.c
@@ -4879,6 +4879,7 @@ int id = ite % 2; // Even iterations search in the first reference frame, // odd iterations search in the second. The predictor // found for the 'other' reference frame is factored in. + ConvolveParams conv_params = get_conv_params(0); // Initialized here because of compiler problem in Visual Studio. ref_yv12[0] = xd->plane[0].pre[0]; @@ -4902,16 +4903,16 @@ MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd->bd); } else { second_pred = (uint8_t *)second_pred_alloc_16; - av1_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride, - second_pred, pw, &frame_mv[refs[!id]].as_mv, - &sf, pw, ph, 0, interp_filter, MV_PRECISION_Q3, - mi_col * MI_SIZE, mi_row * MI_SIZE); + av1_build_inter_predictor( + ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw, + &frame_mv[refs[!id]].as_mv, &sf, pw, ph, &conv_params, interp_filter, + MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE); } #else - av1_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride, - second_pred, pw, &frame_mv[refs[!id]].as_mv, &sf, - pw, ph, 0, interp_filter, MV_PRECISION_Q3, - mi_col * MI_SIZE, mi_row * MI_SIZE); + av1_build_inter_predictor( + ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw, + &frame_mv[refs[!id]].as_mv, &sf, pw, ph, &conv_params, interp_filter, + MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE); #endif // CONFIG_AOM_HIGHBITDEPTH // Do compound motion search on the current reference frame.
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c index 4804499..c727000 100644 --- a/av1/encoder/temporal_filter.c +++ b/av1/encoder/temporal_filter.c
@@ -39,6 +39,7 @@ const MV mv = { mv_row, mv_col }; enum mv_precision mv_precision_uv; int uv_stride; + ConvolveParams conv_params = get_conv_params(which_mv); #if USE_TEMPORALFILTER_12TAP #if CONFIG_DUAL_FILTER @@ -81,15 +82,15 @@ } #endif // CONFIG_AOM_HIGHBITDEPTH av1_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16, &mv, scale, 16, 16, - which_mv, interp_filter, MV_PRECISION_Q3, x, y); + &conv_params, interp_filter, MV_PRECISION_Q3, x, y); av1_build_inter_predictor(u_mb_ptr, uv_stride, &pred[256], uv_block_width, &mv, scale, uv_block_width, uv_block_height, - which_mv, interp_filter, mv_precision_uv, x, y); + &conv_params, interp_filter, mv_precision_uv, x, y); av1_build_inter_predictor(v_mb_ptr, uv_stride, &pred[512], uv_block_width, &mv, scale, uv_block_width, uv_block_height, - which_mv, interp_filter, mv_precision_uv, x, y); + &conv_params, interp_filter, mv_precision_uv, x, y); } void av1_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride,