Add CONVOLVE_POST_ROUNDING flag
By turning on CONVOLVE_POST_ROUNDING, in the compound inter
prediction mode, FILTER_BITS rounding is moved after the summation
of two predictions.
Note that the post rounding is only applied on non-sub8x8 block
PSNR BDRate
lowres -0.808% -0.673%
Change-Id: Ib91304e6122c24d832a582ab9f5757d33eac876c
diff --git a/av1/common/convolve.c b/av1/common/convolve.c
index 58c6fd3..dbbd40b 100644
--- a/av1/common/convolve.c
+++ b/av1/common/convolve.c
@@ -53,7 +53,8 @@
tmp = ROUND_POWER_OF_TWO(tmp + sum, 1);
else
tmp = sum;
- conv_params->dst[y * conv_params->dst_stride + x] = tmp;
+ conv_params->dst[y * conv_params->dst_stride + x] =
+ clamp(tmp, INT32_MIN, INT32_MAX);
}
x_q4 += x_step_q4;
@@ -95,7 +96,8 @@
tmp = ROUND_POWER_OF_TWO(tmp + sum, 1);
else
tmp = sum;
- conv_params->dst[y * conv_params->dst_stride + x] = tmp;
+ conv_params->dst[y * conv_params->dst_stride + x] =
+ clamp(tmp, INT32_MIN, INT32_MAX);
}
y_q4 += y_step_q4;
@@ -129,9 +131,9 @@
dst[c] = clip_pixel(ROUND_POWER_OF_TWO(dst[c] + src[c], 1));
} else {
int tmp = conv_params->dst[r * conv_params->dst_stride + c];
- tmp =
- ROUND_POWER_OF_TWO(tmp + (((uint16_t)src[c]) << FILTER_BITS), 1);
- conv_params->dst[r * conv_params->dst_stride + c] = tmp;
+ tmp = ROUND_POWER_OF_TWO(tmp + (((int32_t)src[c]) << FILTER_BITS), 1);
+ conv_params->dst[r * conv_params->dst_stride + c] =
+ clamp(tmp, INT32_MIN, INT32_MAX);
}
}
src += src_stride;
@@ -195,6 +197,19 @@
}
}
+#if CONVOLVE_POST_ROUNDING
+void av1_convolve_rounding(const int32_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h) {
+ int r, c;
+ for (r = 0; r < h; ++r) {
+ for (c = 0; c < w; ++c) {
+ dst[r * dst_stride + c] =
+ clip_pixel(ROUND_POWER_OF_TWO(src[r * src_stride + c], FILTER_BITS));
+ }
+ }
+}
+#endif // CONVOLVE_POST_ROUNDING
+
void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
#if CONFIG_DUAL_FILTER
diff --git a/av1/common/convolve.h b/av1/common/convolve.h
index e62bcea..0f1dd9c 100644
--- a/av1/common/convolve.h
+++ b/av1/common/convolve.h
@@ -17,6 +17,10 @@
extern "C" {
#endif
+#if CONFIG_DUAL_FILTER
+#define CONVOLVE_POST_ROUNDING 1
+#endif
+
typedef enum CONVOLVE_OPT {
// indicate the results in dst buf is rounded by FILTER_BITS or not
CONVOLVE_OPT_ROUND,
@@ -26,7 +30,7 @@
typedef struct ConvolveParams {
int ref;
CONVOLVE_OPT round;
- uint16_t *dst;
+ int32_t *dst;
int dst_stride;
} ConvolveParams;
@@ -37,6 +41,21 @@
return conv_params;
}
+#if CONVOLVE_POST_ROUNDING
+static INLINE ConvolveParams get_conv_params_no_round(int ref, int32_t *dst,
+ int dst_stride) {
+ ConvolveParams conv_params;
+ conv_params.ref = ref;
+ conv_params.round = CONVOLVE_OPT_NO_ROUND;
+ conv_params.dst = dst;
+ conv_params.dst_stride = dst_stride;
+ return conv_params;
+}
+
+void av1_convolve_rounding(const int32_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h);
+#endif // CONVOLVE_POST_ROUNDING
+
void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
#if CONFIG_DUAL_FILTER
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c
index eaff6c8..be4e886 100644
--- a/av1/common/reconinter.c
+++ b/av1/common/reconinter.c
@@ -866,6 +866,9 @@
uint8_t *pre[2];
MV32 scaled_mv[2];
SubpelParams subpel_params[2];
+#if CONVOLVE_POST_ROUNDING
+ int32_t tmp_dst[MAX_SB_SIZE * MAX_SB_SIZE];
+#endif // CONVOLVE_POST_ROUNDING
for (ref = 0; ref < 1 + is_compound; ++ref) {
const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
@@ -916,8 +919,13 @@
for (ref = 0; ref < 1 + is_compound; ++ref) {
const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
- ConvolveParams conv_params = get_conv_params(ref);
struct buf_2d *const pre_buf = &pd->pre[ref];
+#if CONVOLVE_POST_ROUNDING
+ ConvolveParams conv_params =
+ get_conv_params_no_round(ref, tmp_dst, MAX_SB_SIZE);
+#else
+ ConvolveParams conv_params = get_conv_params(ref);
+#endif // CONVOLVE_POST_ROUNDING
#if CONFIG_EXT_INTER
if (ref &&
is_masked_compound_type(mi->mbmi.interinter_compound_data.type))
@@ -955,6 +963,11 @@
&conv_params, mi->mbmi.interp_filter, subpel_params[ref].xs,
subpel_params[ref].ys, xd);
}
+
+#if CONVOLVE_POST_ROUNDING
+ // TODO(angiebird): This part needs optimization
+ av1_convolve_rounding(tmp_dst, MAX_SB_SIZE, dst, dst_buf->stride, w, h);
+#endif // CONVOLVE_POST_ROUNDING
}
}
diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h
index 19c8032..a5d5422 100644
--- a/av1/common/reconinter.h
+++ b/av1/common/reconinter.h
@@ -44,13 +44,15 @@
#if CONFIG_DUAL_FILTER
if (interp_filter_params_x.taps == SUBPEL_TAPS &&
- interp_filter_params_y.taps == SUBPEL_TAPS && w > 2 && h > 2) {
+ interp_filter_params_y.taps == SUBPEL_TAPS && w > 2 && h > 2 &&
+ conv_params->round == CONVOLVE_OPT_ROUND) {
const int16_t *kernel_x =
av1_get_interp_filter_subpel_kernel(interp_filter_params_x, subpel_x);
const int16_t *kernel_y =
av1_get_interp_filter_subpel_kernel(interp_filter_params_y, subpel_y);
#else
- if (interp_filter_params.taps == SUBPEL_TAPS && w > 2 && h > 2) {
+ if (interp_filter_params.taps == SUBPEL_TAPS && w > 2 && h > 2 &&
+ conv_params->round == CONVOLVE_OPT_ROUND) {
const int16_t *kernel_x =
av1_get_interp_filter_subpel_kernel(interp_filter_params, subpel_x);
const int16_t *kernel_y =