Modify convolve function prototype
This patch modified convolve function prototype. The temp buffer used in
convolve function is already available in conv_params, which doesn't need
to be passed in. Instead, pass in the destination buffer so that the
result can be written in the destination directly for single ref case.
More patch will be followed.
Change-Id: Ib28dc3ba5783a1034c70570d78fa8c8af7cbed7c
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index fd35765..b19fe1c 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -585,26 +585,26 @@
}
# CONVOLVE_ROUND/COMPOUND_ROUND functions
-add_proto qw/void av1_convolve_2d/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+add_proto qw/void av1_convolve_2d/, "const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_2d sse2 avx2/;
add_proto qw/void av1_convolve_rounding/, "const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits";
specialize qw/av1_convolve_rounding avx2/;
-add_proto qw/void av1_convolve_2d_copy/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+add_proto qw/void av1_convolve_2d_copy/, "const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_2d_copy sse2/;
-add_proto qw/void av1_convolve_x/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+add_proto qw/void av1_convolve_x/, "const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_x sse2/;
-add_proto qw/void av1_convolve_y/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+add_proto qw/void av1_convolve_y/, "const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_y sse2/;
add_proto qw/void av1_convolve_2d_scale/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params";
specialize qw/av1_convolve_2d_scale sse4_1/;
if (aom_config("CONFIG_JNT_COMP") eq "yes") {
- add_proto qw/void av1_jnt_convolve_2d/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+ add_proto qw/void av1_jnt_convolve_2d/, "const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_jnt_convolve_2d sse4_1/;
- add_proto qw/void av1_jnt_convolve_2d_copy/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+ add_proto qw/void av1_jnt_convolve_2d_copy/, "const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_jnt_convolve_2d_copy sse2/;
}
diff --git a/av1/common/convolve.c b/av1/common/convolve.c
index afdb50d..bd5d9ff 100644
--- a/av1/common/convolve.c
+++ b/av1/common/convolve.c
@@ -373,18 +373,22 @@
bit widths for various intermediate values, see the comments above
av1_warp_affine_c.
*/
-void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
- int dst_stride, int w, int h,
+void av1_convolve_2d_c(const uint8_t *src, int src_stride, const uint8_t *dst0,
+ int dst_stride0, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
int32_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
int im_h = h + filter_params_y->taps - 1;
int im_stride = w;
const int fo_vert = filter_params_y->taps / 2 - 1;
const int fo_horiz = filter_params_x->taps / 2 - 1;
const int bd = 8;
+ (void)dst0;
+ (void)dst_stride0;
// horizontal filter
const uint8_t *src_horiz = src - fo_vert * src_stride;
@@ -425,16 +429,20 @@
}
}
-void av1_convolve_y_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
- int dst_stride, int w, int h,
+void av1_convolve_y_c(const uint8_t *src, int src_stride, const uint8_t *dst0,
+ int dst_stride0, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
const int fo_vert = filter_params_y->taps / 2 - 1;
const int bits = FILTER_BITS - conv_params->round_0 - conv_params->round_1;
(void)filter_params_x;
(void)subpel_x_q4;
+ (void)dst0;
+ (void)dst_stride0;
// vertical filter
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
@@ -454,16 +462,20 @@
}
}
-void av1_convolve_x_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
- int dst_stride, int w, int h,
+void av1_convolve_x_c(const uint8_t *src, int src_stride, const uint8_t *dst0,
+ int dst_stride0, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
const int fo_horiz = filter_params_x->taps / 2 - 1;
const int bits = FILTER_BITS - conv_params->round_1;
(void)filter_params_y;
(void)subpel_y_q4;
+ (void)dst0;
+ (void)dst_stride0;
// horizontal filter
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
@@ -484,11 +496,13 @@
}
void av1_convolve_2d_copy_c(const uint8_t *src, int src_stride,
- CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
+ const uint8_t *dst0, int dst_stride0, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
const int bits =
FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
@@ -496,6 +510,8 @@
(void)filter_params_y;
(void)subpel_x_q4;
(void)subpel_y_q4;
+ (void)dst0;
+ (void)dst_stride0;
for (int y = 0; y < h; ++y) {
for (int x = 0; x < w; ++x) {
@@ -510,17 +526,21 @@
#if CONFIG_JNT_COMP
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride,
- CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
+ const uint8_t *dst0, int dst_stride0, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
int32_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
int im_h = h + filter_params_y->taps - 1;
int im_stride = w;
const int fo_vert = filter_params_y->taps / 2 - 1;
const int fo_horiz = filter_params_x->taps / 2 - 1;
const int bd = 8;
+ (void)dst0;
+ (void)dst_stride0;
// horizontal filter
const uint8_t *src_horiz = src - fo_vert * src_stride;
@@ -573,11 +593,13 @@
}
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride,
- CONV_BUF_TYPE *dst, int dst_stride, int w,
+ const uint8_t *dst0, int dst_stride0, int w,
int h, InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
const int bits =
FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
@@ -585,6 +607,8 @@
(void)filter_params_y;
(void)subpel_x_q4;
(void)subpel_y_q4;
+ (void)dst0;
+ (void)dst_stride0;
for (int y = 0; y < h; ++y) {
for (int x = 0; x < w; ++x) {
@@ -712,111 +736,59 @@
&filter_params_y);
#endif
- if (filter_params_y.taps < filter_params_x.taps) {
- uint8_t tr_src[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) *
- (MAX_SB_SIZE + MAX_FILTER_TAP - 1)];
- int tr_src_stride = MAX_SB_SIZE + MAX_FILTER_TAP - 1;
- CONV_BUF_TYPE tr_dst[MAX_SB_SIZE * MAX_SB_SIZE];
- int tr_dst_stride = MAX_SB_SIZE;
- int fo_vert = filter_params_y.taps / 2 - 1;
- int fo_horiz = filter_params_x.taps / 2 - 1;
-
- transpose_uint8(tr_src, tr_src_stride,
- src - fo_vert * src_stride - fo_horiz, src_stride,
- w + filter_params_x.taps - 1, h + filter_params_y.taps - 1);
- transpose_int32(tr_dst, tr_dst_stride, conv_params->dst,
- conv_params->dst_stride, w, h);
-
-// horizontal and vertical parameters are swapped because of the transpose
#if CONFIG_JNT_COMP
- if (scaled)
- av1_convolve_2d_scale(tr_src + fo_horiz * tr_src_stride + fo_vert,
- tr_src_stride, tr_dst, tr_dst_stride, h, w,
- &filter_params_y, &filter_params_x, subpel_y_q4,
- y_step_q4, subpel_x_q4, x_step_q4, conv_params);
- else
- av1_jnt_convolve_2d(tr_src + fo_horiz * tr_src_stride + fo_vert,
- tr_src_stride, tr_dst, tr_dst_stride, h, w,
- &filter_params_y, &filter_params_x, subpel_y_q4,
- subpel_x_q4, conv_params);
-#else
- if (scaled)
- av1_convolve_2d_scale(tr_src + fo_horiz * tr_src_stride + fo_vert,
- tr_src_stride, tr_dst, tr_dst_stride, h, w,
- &filter_params_y, &filter_params_x, subpel_y_q4,
- y_step_q4, subpel_x_q4, x_step_q4, conv_params);
- else
- av1_convolve_2d(tr_src + fo_horiz * tr_src_stride + fo_vert,
- tr_src_stride, tr_dst, tr_dst_stride, h, w,
- &filter_params_y, &filter_params_x, subpel_y_q4,
- subpel_x_q4, conv_params);
-#endif // CONFIG_JNT_COMP
- transpose_int32(conv_params->dst, conv_params->dst_stride, tr_dst,
- tr_dst_stride, h, w);
+ if (scaled) {
+ av1_convolve_2d_scale(src, src_stride, conv_params->dst,
+ conv_params->dst_stride, w, h, &filter_params_x,
+ &filter_params_y, subpel_x_q4, x_step_q4, subpel_y_q4,
+ y_step_q4, conv_params);
} else {
-#if CONFIG_JNT_COMP
- if (scaled) {
- av1_convolve_2d_scale(src, src_stride, conv_params->dst,
- conv_params->dst_stride, w, h, &filter_params_x,
- &filter_params_y, subpel_x_q4, x_step_q4,
- subpel_y_q4, y_step_q4, conv_params);
+ if (subpel_x_q4 == 0 && subpel_y_q4 == 0) {
+ av1_jnt_convolve_2d_copy(src, src_stride, dst, dst_stride, w, h,
+ &filter_params_x, &filter_params_y, subpel_x_q4,
+ subpel_y_q4, conv_params);
+ } else if (subpel_x_q4 == 0) {
+ // place holder
+ av1_jnt_convolve_2d(src, src_stride, dst, dst_stride, w, h,
+ &filter_params_x, &filter_params_y, subpel_x_q4,
+ subpel_y_q4, conv_params);
+ } else if (subpel_y_q4 == 0) {
+ // place holder
+ av1_jnt_convolve_2d(src, src_stride, dst, dst_stride, w, h,
+ &filter_params_x, &filter_params_y, subpel_x_q4,
+ subpel_y_q4, conv_params);
} else {
- if (subpel_x_q4 == 0 && subpel_y_q4 == 0) {
- av1_jnt_convolve_2d_copy(src, src_stride, conv_params->dst,
- conv_params->dst_stride, w, h,
- &filter_params_x, &filter_params_y,
- subpel_x_q4, subpel_y_q4, conv_params);
- } else if (subpel_x_q4 == 0) {
- // place holder
- av1_jnt_convolve_2d(src, src_stride, conv_params->dst,
- conv_params->dst_stride, w, h, &filter_params_x,
- &filter_params_y, subpel_x_q4, subpel_y_q4,
- conv_params);
- } else if (subpel_y_q4 == 0) {
- // place holder
- av1_jnt_convolve_2d(src, src_stride, conv_params->dst,
- conv_params->dst_stride, w, h, &filter_params_x,
- &filter_params_y, subpel_x_q4, subpel_y_q4,
- conv_params);
- } else {
- av1_jnt_convolve_2d(src, src_stride, conv_params->dst,
- conv_params->dst_stride, w, h, &filter_params_x,
- &filter_params_y, subpel_x_q4, subpel_y_q4,
- conv_params);
- }
+ av1_jnt_convolve_2d(src, src_stride, dst, dst_stride, w, h,
+ &filter_params_x, &filter_params_y, subpel_x_q4,
+ subpel_y_q4, conv_params);
}
-#else
- if (scaled) {
- av1_convolve_2d_scale(src, src_stride, conv_params->dst,
- conv_params->dst_stride, w, h, &filter_params_x,
- &filter_params_y, subpel_x_q4, x_step_q4,
- subpel_y_q4, y_step_q4, conv_params);
- } else {
- // Special case convolve functions should produce the same result as
- // av1_convolve_2d.
- if (subpel_x_q4 == 0 && subpel_y_q4 == 0) {
- av1_convolve_2d_copy(src, src_stride, conv_params->dst,
- conv_params->dst_stride, w, h, &filter_params_x,
- &filter_params_y, subpel_x_q4, subpel_y_q4,
- conv_params);
- } else if (subpel_x_q4 == 0) {
- av1_convolve_y(src, src_stride, conv_params->dst,
- conv_params->dst_stride, w, h, &filter_params_x,
- &filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
- } else if (subpel_y_q4 == 0) {
- av1_convolve_x(src, src_stride, conv_params->dst,
- conv_params->dst_stride, w, h, &filter_params_x,
- &filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
- } else {
- // subpel_x_q4 != 0 && subpel_y_q4 != 0
- av1_convolve_2d(src, src_stride, conv_params->dst,
- conv_params->dst_stride, w, h, &filter_params_x,
- &filter_params_y, subpel_x_q4, subpel_y_q4,
- conv_params);
- }
- }
-#endif // CONFIG_JNT_COMP
}
+#else
+ if (scaled) {
+ av1_convolve_2d_scale(src, src_stride, conv_params->dst,
+ conv_params->dst_stride, w, h, &filter_params_x,
+ &filter_params_y, subpel_x_q4, x_step_q4, subpel_y_q4,
+ y_step_q4, conv_params);
+ } else {
+ // Special case convolve functions should produce the same result as
+ // av1_convolve_2d.
+ if (subpel_x_q4 == 0 && subpel_y_q4 == 0) {
+ av1_convolve_2d_copy(src, src_stride, dst, dst_stride, w, h,
+ &filter_params_x, &filter_params_y, subpel_x_q4,
+ subpel_y_q4, conv_params);
+ } else if (subpel_x_q4 == 0) {
+ av1_convolve_y(src, src_stride, dst, dst_stride, w, h, &filter_params_x,
+ &filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
+ } else if (subpel_y_q4 == 0) {
+ av1_convolve_x(src, src_stride, dst, dst_stride, w, h, &filter_params_x,
+ &filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
+ } else {
+ // subpel_x_q4 != 0 && subpel_y_q4 != 0
+ av1_convolve_2d(src, src_stride, dst, dst_stride, w, h, &filter_params_x,
+ &filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
+ }
+ }
+#endif // CONFIG_JNT_COMP
}
#if CONFIG_HIGHBITDEPTH
diff --git a/av1/common/x86/convolve_2d_avx2.c b/av1/common/x86/convolve_2d_avx2.c
index 3c1a24d..4db1380 100644
--- a/av1/common/x86/convolve_2d_avx2.c
+++ b/av1/common/x86/convolve_2d_avx2.c
@@ -18,12 +18,16 @@
#include "av1/common/convolve.h"
void av1_convolve_2d_avx2(const uint8_t *src, int src_stride,
- CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
+ const uint8_t *dst0, int dst_stride0, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
const int bd = 8;
+ (void)dst0;
+ (void)dst_stride0;
DECLARE_ALIGNED(32, int16_t,
im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);
diff --git a/av1/common/x86/convolve_2d_sse2.c b/av1/common/x86/convolve_2d_sse2.c
index 34b7dc7..a0b911c 100644
--- a/av1/common/x86/convolve_2d_sse2.c
+++ b/av1/common/x86/convolve_2d_sse2.c
@@ -18,12 +18,16 @@
#include "av1/common/convolve.h"
void av1_convolve_2d_sse2(const uint8_t *src, int src_stride,
- CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
+ const uint8_t *dst0, int dst_stride0, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
const int bd = 8;
+ (void)dst0;
+ (void)dst_stride0;
DECLARE_ALIGNED(16, int16_t,
im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);
@@ -201,15 +205,19 @@
}
void av1_convolve_2d_copy_sse2(const uint8_t *src, int src_stride,
- CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
- InterpFilterParams *filter_params_x,
+ const uint8_t *dst0, int dst_stride0, int w,
+ int h, InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
(void)filter_params_x;
(void)filter_params_y;
(void)subpel_x_q4;
(void)subpel_y_q4;
+ (void)dst0;
+ (void)dst_stride0;
const int bits =
FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
@@ -314,15 +322,19 @@
#if CONFIG_JNT_COMP
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride,
- CONV_BUF_TYPE *dst, int dst_stride, int w,
+ const uint8_t *dst0, int dst_stride0, int w,
int h, InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
(void)filter_params_x;
(void)filter_params_y;
(void)subpel_x_q4;
(void)subpel_y_q4;
+ (void)dst0;
+ (void)dst_stride0;
const int bits =
FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
diff --git a/av1/common/x86/convolve_2d_sse4.c b/av1/common/x86/convolve_2d_sse4.c
index ea0811a..4fc946d 100644
--- a/av1/common/x86/convolve_2d_sse4.c
+++ b/av1/common/x86/convolve_2d_sse4.c
@@ -20,12 +20,16 @@
#if CONFIG_JNT_COMP
void av1_jnt_convolve_2d_sse4_1(const uint8_t *src, int src_stride,
- CONV_BUF_TYPE *dst, int dst_stride, int w,
+ const uint8_t *dst0, int dst_stride0, int w,
int h, InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
const int bd = 8;
+ (void)dst0;
+ (void)dst_stride0;
DECLARE_ALIGNED(16, int16_t,
im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);
diff --git a/av1/common/x86/convolve_sse2.c b/av1/common/x86/convolve_sse2.c
index 08ee8c3..b9433ff 100644
--- a/av1/common/x86/convolve_sse2.c
+++ b/av1/common/x86/convolve_sse2.c
@@ -17,12 +17,14 @@
#include "aom_dsp/aom_filter.h"
#include "av1/common/convolve.h"
-void av1_convolve_y_sse2(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
- int dst_stride, int w, int h,
+void av1_convolve_y_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *dst0, int dst_stride0, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
int i, j;
const int fo_vert = filter_params_y->taps / 2 - 1;
const int do_average = conv_params->do_average;
@@ -33,6 +35,8 @@
(void)filter_params_x;
(void)subpel_x_q4;
+ (void)dst0;
+ (void)dst_stride0;
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
@@ -114,12 +118,14 @@
}
}
-void av1_convolve_x_sse2(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
- int dst_stride, int w, int h,
+void av1_convolve_x_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *dst0, int dst_stride0, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
int i, j;
const int fo_horiz = filter_params_x->taps / 2 - 1;
const int do_average = conv_params->do_average;
@@ -130,6 +136,8 @@
(void)filter_params_y;
(void)subpel_y_q4;
+ (void)dst0;
+ (void)dst_stride0;
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
diff --git a/test/av1_convolve_2d_test_util.cc b/test/av1_convolve_2d_test_util.cc
index e45fd87..a79e866 100644
--- a/test/av1_convolve_2d_test_util.cc
+++ b/test/av1_convolve_2d_test_util.cc
@@ -87,10 +87,10 @@
// Choose random locations within the source block
int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
- av1_convolve_2d_c(input + offset_r * w + offset_c, w, output,
+ av1_convolve_2d_c(input + offset_r * w + offset_c, w, NULL,
MAX_SB_SIZE, out_w, out_h, &filter_params_x,
&filter_params_y, subx, suby, &conv_params1);
- test_impl(input + offset_r * w + offset_c, w, output2, MAX_SB_SIZE,
+ test_impl(input + offset_r * w + offset_c, w, NULL, MAX_SB_SIZE,
out_w, out_h, &filter_params_x, &filter_params_y, subx,
suby, &conv_params2);
@@ -155,10 +155,10 @@
// Choose random locations within the source block
int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
- av1_jnt_convolve_2d_c(input + offset_r * w + offset_c, w, output,
+ av1_jnt_convolve_2d_c(input + offset_r * w + offset_c, w, NULL,
MAX_SB_SIZE, out_w, out_h, &filter_params_x,
&filter_params_y, subx, suby, &conv_params1);
- test_impl(input + offset_r * w + offset_c, w, output2, MAX_SB_SIZE,
+ test_impl(input + offset_r * w + offset_c, w, NULL, MAX_SB_SIZE,
out_w, out_h, &filter_params_x, &filter_params_y, subx,
suby, &conv_params2);
@@ -196,13 +196,13 @@
// Choose random locations within the source block
int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
- av1_jnt_convolve_2d_c(input + offset_r * w + offset_c, w,
- output, MAX_SB_SIZE, out_w, out_h,
+ av1_jnt_convolve_2d_c(input + offset_r * w + offset_c, w, NULL,
+ MAX_SB_SIZE, out_w, out_h,
&filter_params_x, &filter_params_y, subx,
suby, &conv_params1);
- test_impl(input + offset_r * w + offset_c, w, output2,
- MAX_SB_SIZE, out_w, out_h, &filter_params_x,
- &filter_params_y, subx, suby, &conv_params2);
+ test_impl(input + offset_r * w + offset_c, w, NULL, MAX_SB_SIZE,
+ out_w, out_h, &filter_params_x, &filter_params_y,
+ subx, suby, &conv_params2);
for (j = 0; j < out_h; ++j)
for (k = 0; k < out_w; ++k) {
diff --git a/test/av1_convolve_2d_test_util.h b/test/av1_convolve_2d_test_util.h
index 974169c..33aa482 100644
--- a/test/av1_convolve_2d_test_util.h
+++ b/test/av1_convolve_2d_test_util.h
@@ -25,7 +25,7 @@
namespace AV1Convolve2D {
typedef void (*convolve_2d_func)(const uint8_t *src, int src_stride,
- CONV_BUF_TYPE *dst, int dst_stride, int w,
+ const uint8_t *dst, int dst_stride, int w,
int h, InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,