Clean up and use do_post_rounding consistently
This patch simplifies the setting and use of the do_post_rounding
flag consistently across all convolve/warp operations. It is always
set now to 1 for compound modes, but the masked blending operation
can subsequently turn it off.
To accomplish this, for the warped case, the use of the conv_params->dst
buffer is made consistent with the other non-warp convolves,
in the sense that it is used only when is_compound is 1.
Change-Id: If3a37ffa65c3ca75f1df66ca427e6b5df86ac72f
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c
index ac18606..1ae7e53 100644
--- a/av1/common/reconinter.c
+++ b/av1/common/reconinter.c
@@ -69,6 +69,8 @@
// Make sure the selected motion mode is valid for this configuration
assert_motion_mode_valid(mi->mbmi.motion_mode, xd->global_motion, xd, mi);
+ assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
+
WarpedMotionParams final_warp_params;
const int do_warp =
(w >= 8 && h >= 8 &&
@@ -86,20 +88,17 @@
pre_buf->buf0, pre_buf->width, pre_buf->height,
pre_buf->stride, dst, p_col, p_row, w, h, dst_stride,
pd->subsampling_x, pd->subsampling_y, conv_params);
- assert(IMPLIES(conv_params->dst != NULL, conv_params->do_post_rounding));
- assert(IMPLIES(conv_params->dst == NULL, !conv_params->do_post_rounding));
} else if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
sf, w, h, conv_params, interp_filters, xs, ys,
xd->bd);
- assert(IMPLIES(conv_params->is_compound, conv_params->do_post_rounding));
- assert(!(conv_params->is_compound && conv_params->dst == NULL));
} else {
inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w,
h, conv_params, interp_filters, xs, ys);
- assert(IMPLIES(conv_params->is_compound, conv_params->do_post_rounding));
- assert(!(conv_params->is_compound && conv_params->dst == NULL));
}
+ // For compound, do_post_rounding is always 1.
+ // For masked compound, this flag will be turned off after the blend stage.
+ conv_params->do_post_rounding = conv_params->is_compound;
}
#define NSMOOTHERS 1
@@ -1244,8 +1243,6 @@
subpel_params[ref].ys, xd);
}
- // if (!is_masked_compound_type(mi->mbmi.interinter_compound_type))
- // assert(conv_params.do_post_rounding);
// TODO(angiebird): This part needs optimization
if (conv_params.do_post_rounding) {
assert(!is_masked_compound_type(mi->mbmi.interinter_compound_type));
diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h
index 839cba8..b7a41b1 100644
--- a/av1/common/reconinter.h
+++ b/av1/common/reconinter.h
@@ -58,10 +58,6 @@
av1_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
interp_filters, subpel_x, xs, subpel_y, ys, 1,
conv_params, sf);
- if (conv_params->is_compound)
- conv_params->do_post_rounding = 1;
- else
- conv_params->do_post_rounding = 0;
} else {
assert(conv_params->round == CONVOLVE_OPT_ROUND);
av1_convolve_scale(src, src_stride, dst, dst_stride, w, h, interp_filters,
@@ -81,10 +77,6 @@
interp_filters, subpel_x, xs, subpel_y, ys, 0,
conv_params, sf);
- if (conv_params->is_compound)
- conv_params->do_post_rounding = 1;
- else
- conv_params->do_post_rounding = 0;
} else {
assert(conv_params->round == CONVOLVE_OPT_ROUND);
@@ -132,10 +124,6 @@
av1_highbd_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
interp_filters, subpel_x, xs, subpel_y, ys,
1, conv_params, bd);
- if (conv_params->is_compound)
- conv_params->do_post_rounding = 1;
- else
- conv_params->do_post_rounding = 0;
} else {
av1_highbd_convolve_scale(src, src_stride, dst, dst_stride, w, h,
interp_filters, subpel_x, xs, subpel_y, ys, avg,
@@ -154,10 +142,6 @@
av1_highbd_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
interp_filters, subpel_x, xs, subpel_y, ys,
0, conv_params, bd);
- if (conv_params->is_compound)
- conv_params->do_post_rounding = 1;
- else
- conv_params->do_post_rounding = 0;
} else {
InterpFilterParams filter_params_x, filter_params_y;
#if CONFIG_SHORT_FILTER
diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c
index 0d62acb..fc4d8da 100644
--- a/av1/common/warped_motion.c
+++ b/av1/common/warped_motion.c
@@ -422,21 +422,17 @@
ConvolveParams *conv_params, int16_t alpha,
int16_t beta, int16_t gamma, int16_t delta) {
int32_t tmp[15 * 8];
- const int use_conv_params =
- (conv_params->round == CONVOLVE_OPT_NO_ROUND && conv_params->dst);
- int reduce_bits_horiz = conv_params->round_0;
- if (!use_conv_params && bd + FILTER_BITS + 2 - reduce_bits_horiz > 16)
- reduce_bits_horiz += bd + FILTER_BITS - reduce_bits_horiz - 14;
- const int reduce_bits_vert = use_conv_params
+ const int reduce_bits_horiz =
+ conv_params->round_0 +
+ AOMMAX(bd + FILTER_BITS - conv_params->round_0 - 14, 0);
+ const int reduce_bits_vert = conv_params->is_compound
? conv_params->round_1
: 2 * FILTER_BITS - reduce_bits_horiz;
const int max_bits_horiz = bd + FILTER_BITS + 1 - reduce_bits_horiz;
const int offset_bits_horiz = bd + FILTER_BITS - 1;
const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz;
- if (use_conv_params) {
- conv_params->do_post_rounding = 1;
- }
(void)max_bits_horiz;
+ assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
for (int i = p_row; i < p_row + p_height; i += 8) {
for (int j = p_col; j < p_col + p_width; j += 8) {
@@ -500,7 +496,7 @@
sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
}
- if (use_conv_params) {
+ if (conv_params->is_compound) {
CONV_BUF_TYPE *p =
&conv_params
->dst[(i - p_row + k + 4) * conv_params->dst_stride +
@@ -716,19 +712,15 @@
int16_t gamma, int16_t delta) {
int32_t tmp[15 * 8];
const int bd = 8;
- const int use_conv_params =
- (conv_params->round == CONVOLVE_OPT_NO_ROUND && conv_params->dst);
const int reduce_bits_horiz = conv_params->round_0;
- const int reduce_bits_vert = use_conv_params
+ const int reduce_bits_vert = conv_params->is_compound
? conv_params->round_1
: 2 * FILTER_BITS - reduce_bits_horiz;
const int max_bits_horiz = bd + FILTER_BITS + 1 - reduce_bits_horiz;
const int offset_bits_horiz = bd + FILTER_BITS - 1;
const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz;
- if (use_conv_params) {
- conv_params->do_post_rounding = 1;
- }
(void)max_bits_horiz;
+ assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
for (int i = p_row; i < p_row + p_height; i += 8) {
for (int j = p_col; j < p_col + p_width; j += 8) {
@@ -798,7 +790,7 @@
sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
}
- if (use_conv_params) {
+ if (conv_params->is_compound) {
CONV_BUF_TYPE *p =
&conv_params
->dst[(i - p_row + k + 4) * conv_params->dst_stride +
diff --git a/av1/common/x86/highbd_warp_plane_sse4.c b/av1/common/x86/highbd_warp_plane_sse4.c
index 700d48f..5647eb3 100644
--- a/av1/common/x86/highbd_warp_plane_sse4.c
+++ b/av1/common/x86/highbd_warp_plane_sse4.c
@@ -24,19 +24,14 @@
int comp_avg = conv_params->do_average;
__m128i tmp[15];
int i, j, k;
- const int use_conv_params =
- (conv_params->round == CONVOLVE_OPT_NO_ROUND && conv_params->dst);
- int reduce_bits_horiz = conv_params->round_0;
- if (!use_conv_params && bd + FILTER_BITS + 2 - reduce_bits_horiz > 16)
- reduce_bits_horiz += bd + FILTER_BITS - reduce_bits_horiz - 14;
- const int reduce_bits_vert = use_conv_params
+ const int reduce_bits_horiz =
+ conv_params->round_0 +
+ AOMMAX(bd + FILTER_BITS - conv_params->round_0 - 14, 0);
+ const int reduce_bits_vert = conv_params->is_compound
? conv_params->round_1
: 2 * FILTER_BITS - reduce_bits_horiz;
const int offset_bits_horiz = bd + FILTER_BITS - 1;
- if (use_conv_params) {
- conv_params->do_post_rounding = 1;
- }
- assert(FILTER_BITS == FILTER_BITS);
+ assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
assert(!(bd == 12 && reduce_bits_horiz < 5));
#if CONFIG_JNT_COMP
@@ -302,7 +297,7 @@
__m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
__m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
- if (use_conv_params) {
+ if (conv_params->is_compound) {
__m128i *const p =
(__m128i *)&conv_params
->dst[(i + k + 4) * conv_params->dst_stride + j];
diff --git a/av1/common/x86/warp_plane_sse4.c b/av1/common/x86/warp_plane_sse4.c
index ca20e46..1e8ad47 100644
--- a/av1/common/x86/warp_plane_sse4.c
+++ b/av1/common/x86/warp_plane_sse4.c
@@ -212,16 +212,13 @@
__m128i tmp[15];
int i, j, k;
const int bd = 8;
- const int use_conv_params =
- (conv_params->round == CONVOLVE_OPT_NO_ROUND && conv_params->dst);
const int reduce_bits_horiz = conv_params->round_0;
- const int reduce_bits_vert = use_conv_params
+ const int reduce_bits_vert = conv_params->is_compound
? conv_params->round_1
: 2 * FILTER_BITS - reduce_bits_horiz;
const int offset_bits_horiz = bd + FILTER_BITS - 1;
- if (use_conv_params) {
- conv_params->do_post_rounding = 1;
- }
+ assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
+
#if CONFIG_JNT_COMP
const int w0 = conv_params->fwd_offset;
const int w1 = conv_params->bck_offset;
@@ -475,7 +472,7 @@
__m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
__m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
- if (use_conv_params) {
+ if (conv_params->is_compound) {
__m128i *const p =
(__m128i *)&conv_params
->dst[(i + k + 4) * conv_params->dst_stride + j];