Add av1_convolve_2d_facade
When convolve_round is on, av1_convolve_2d_facade will be used for
interpolation rather than av1_convolve. Will remove the experiment
code of convolve_round experiment from av1_convolve in another CL.
So far we use 4-bit rounding in the intermediate stage on top of using
post rounding for compound mode after the last stage.
This will give us roughly 0.45% gain on lowres , 0.39% on midres and
roughly 0.6-0.7% on hdres
Altogether, is 1.15% on lowresm, 0.74% on midres and roughly 1.7-1.8% on
hdres
Note that there no restriction usage of 12-tap filter in the CL.
Adding that, we will lose roughly 0.1% again on lowres.
Change-Id: I6332e1d888e28a3b3ddc29711817d66e52cb5cdf
diff --git a/av1/common/convolve.c b/av1/common/convolve.c
index a1a266b..97ce6ba 100644
--- a/av1/common/convolve.c
+++ b/av1/common/convolve.c
@@ -199,15 +199,79 @@
#if CONFIG_CONVOLVE_ROUND
void av1_convolve_rounding(const int32_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h) {
+ int dst_stride, int w, int h, int bits) {
int r, c;
for (r = 0; r < h; ++r) {
for (c = 0; c < w; ++c) {
dst[r * dst_stride + c] =
- clip_pixel(ROUND_POWER_OF_TWO(src[r * src_stride + c], FILTER_BITS));
+ clip_pixel(ROUND_POWER_OF_TWO_SIGNED(src[r * src_stride + c], bits));
}
}
}
+
+void av1_convolve_2d(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
+ int dst_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y, const int subpel_x_q4,
+ const int subpel_y_q4, ConvolveParams *conv_params) {
+ int x, y, k;
+ CONV_BUF_TYPE im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
+ int im_h = h + filter_params_y->taps - 1;
+ int im_stride = w;
+ const int fo_vert = filter_params_y->taps / 2 - 1;
+ const int fo_horiz = filter_params_x->taps / 2 - 1;
+ (void)conv_params;
+
+ // horizontal filter
+ const uint8_t *src_horiz = src - fo_vert * src_stride;
+ const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
+ *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
+ for (y = 0; y < im_h; ++y) {
+ for (x = 0; x < w; ++x) {
+ CONV_BUF_TYPE sum = 0;
+ for (k = 0; k < filter_params_x->taps; ++k) {
+ sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
+ }
+ im_block[y * im_stride + x] =
+ ROUND_POWER_OF_TWO_SIGNED(sum, conv_params->round_0);
+ }
+ }
+
+ // vertical filter
+ CONV_BUF_TYPE *src_vert = im_block + fo_vert * im_stride;
+ const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
+ *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
+ for (y = 0; y < h; ++y) {
+ for (x = 0; x < w; ++x) {
+ CONV_BUF_TYPE sum = 0;
+ for (k = 0; k < filter_params_y->taps; ++k) {
+ sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
+ }
+ dst[y * dst_stride + x] +=
+ ROUND_POWER_OF_TWO_SIGNED(sum, conv_params->round_1);
+ }
+ }
+}
+
+void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h,
+ const InterpFilter *interp_filter,
+ const int subpel_x_q4, int x_step_q4,
+ const int subpel_y_q4, int y_step_q4,
+ ConvolveParams *conv_params) {
+ (void)x_step_q4;
+ (void)y_step_q4;
+ (void)dst;
+ (void)dst_stride;
+ InterpFilterParams filter_params_x =
+ av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]);
+ InterpFilterParams filter_params_y =
+ av1_get_interp_filter_params(interp_filter[0 + 2 * conv_params->ref]);
+ av1_convolve_2d(src, src_stride, conv_params->dst, conv_params->dst_stride, w,
+ h, &filter_params_x, &filter_params_y, subpel_x_q4,
+ subpel_y_q4, conv_params);
+}
+
#endif // CONFIG_CONVOLVE_ROUND
void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
@@ -294,7 +358,6 @@
filter_params = filter_params_x;
assert(filter_params.taps <= MAX_FILTER_TAP);
-
av1_convolve_horiz_facade(temp + (filter_size / 2 - 1), temp_stride, dst,
dst_stride, w, h, filter_params, subpel_x_q4,
x_step_q4, conv_params);
diff --git a/av1/common/convolve.h b/av1/common/convolve.h
index 349e9ac..88b413d 100644
--- a/av1/common/convolve.h
+++ b/av1/common/convolve.h
@@ -23,11 +23,15 @@
CONVOLVE_OPT_NO_ROUND,
} CONVOLVE_OPT;
+typedef int32_t CONV_BUF_TYPE;
+
typedef struct ConvolveParams {
int ref;
CONVOLVE_OPT round;
- int32_t *dst;
+ CONV_BUF_TYPE *dst;
int dst_stride;
+ int round_0;
+ int round_1;
} ConvolveParams;
static INLINE ConvolveParams get_conv_params(int ref) {
@@ -38,18 +42,33 @@
}
#if CONFIG_CONVOLVE_ROUND
+void av1_convolve_2d(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
+ int dst_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y, const int subpel_x_q4,
+ const int subpel_y_q4, ConvolveParams *conv_params);
+
+void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h,
+ const InterpFilter *interp_filter,
+ const int subpel_x_q4, int x_step_q4,
+ const int subpel_y_q4, int y_step_q4,
+ ConvolveParams *conv_params);
+
static INLINE ConvolveParams get_conv_params_no_round(int ref, int32_t *dst,
int dst_stride) {
ConvolveParams conv_params;
conv_params.ref = ref;
conv_params.round = CONVOLVE_OPT_NO_ROUND;
+ conv_params.round_0 = 5;
+ conv_params.round_1 = 1;
conv_params.dst = dst;
conv_params.dst_stride = dst_stride;
return conv_params;
}
void av1_convolve_rounding(const int32_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h);
+ int dst_stride, int w, int h, int bits);
#endif // CONFIG_CONVOLVE_ROUND
void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c
index 682d002..566a911 100644
--- a/av1/common/reconinter.c
+++ b/av1/common/reconinter.c
@@ -918,7 +918,8 @@
MV32 scaled_mv[2];
SubpelParams subpel_params[2];
#if CONFIG_CONVOLVE_ROUND
- int32_t tmp_dst[MAX_SB_SIZE * MAX_SB_SIZE];
+ DECLARE_ALIGNED(16, int32_t, tmp_dst[MAX_SB_SIZE * MAX_SB_SIZE]);
+ av1_zero(tmp_dst);
#endif // CONFIG_CONVOLVE_ROUND
for (ref = 0; ref < 1 + is_compound; ++ref) {
@@ -968,15 +969,16 @@
(scaled_mv[ref].col >> SUBPEL_BITS);
}
+#if CONFIG_CONVOLVE_ROUND
+ ConvolveParams conv_params =
+ get_conv_params_no_round(ref, tmp_dst, MAX_SB_SIZE);
+#else
+ ConvolveParams conv_params = get_conv_params(ref);
+#endif // CONFIG_CONVOLVE_ROUND
for (ref = 0; ref < 1 + is_compound; ++ref) {
const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
struct buf_2d *const pre_buf = &pd->pre[ref];
-#if CONFIG_CONVOLVE_ROUND
- ConvolveParams conv_params =
- get_conv_params_no_round(ref, tmp_dst, MAX_SB_SIZE);
-#else
- ConvolveParams conv_params = get_conv_params(ref);
-#endif // CONFIG_CONVOLVE_ROUND
+ conv_params.ref = ref;
#if CONFIG_EXT_INTER
if (ref &&
is_masked_compound_type(mi->mbmi.interinter_compound_data.type))
@@ -1014,7 +1016,9 @@
#if CONFIG_AOM_HIGHBITDEPTH
if (!(xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH))
#endif // CONFIG_AOM_HIGHBITDEPTH
- av1_convolve_rounding(tmp_dst, MAX_SB_SIZE, dst, dst_buf->stride, w, h);
+ av1_convolve_rounding(tmp_dst, MAX_SB_SIZE, dst, dst_buf->stride, w, h,
+ FILTER_BITS * 2 + is_compound -
+ conv_params.round_0 - conv_params.round_1);
#endif // CONFIG_CONVOLVE_ROUND
}
}
diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h
index 519a1e3..cc4c858 100644
--- a/av1/common/reconinter.h
+++ b/av1/common/reconinter.h
@@ -64,11 +64,18 @@
sf->predict[subpel_x != 0][subpel_y != 0][conv_params->ref](
src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h);
} else {
- // ref_idx > 0 means this is the second reference frame
- // first reference frame's prediction result is already in dst
- // therefore we need to average the first and second results
- av1_convolve(src, src_stride, dst, dst_stride, w, h, interp_filter,
- subpel_x, xs, subpel_y, ys, conv_params);
+// ref_idx > 0 means this is the second reference frame
+// first reference frame's prediction result is already in dst
+// therefore we need to average the first and second results
+#if CONFIG_CONVOLVE_ROUND
+ if (conv_params->round == CONVOLVE_OPT_NO_ROUND)
+ av1_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
+ interp_filter, subpel_x, xs, subpel_y, ys,
+ conv_params);
+ else
+#endif
+ av1_convolve(src, src_stride, dst, dst_stride, w, h, interp_filter,
+ subpel_x, xs, subpel_y, ys, conv_params);
}
}