Modify convolve function prototype This patch modified convolve function prototype. The temp buffer used in convolve function is already available in conv_params, which doesn't need to be passed in. Instead, pass in the destination buffer so that the result can be written in the destination directly for single ref case. More patch will be followed. Change-Id: Ib28dc3ba5783a1034c70570d78fa8c8af7cbed7c

commit: 940c22a244c667df620c9e1dcc0c77ce0b6fec67 [log] [tgz]
author: Yunqing Wang <yunqingwang@google.com> Tue Dec 19 13:45:44 2017 -0800
committer: Yunqing Wang <yunqingwang@google.com> Wed Dec 20 18:19:36 2017 +0000
tree: b4c199d01b5f23f00cb18a3bf89b42792cf3d9ae
parent: 5891f98f75cfdb534d14289fa018bfba77a31447 [diff]
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index fd35765..b19fe1c 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl

@@ -585,26 +585,26 @@
 }
 
 # CONVOLVE_ROUND/COMPOUND_ROUND functions
-add_proto qw/void av1_convolve_2d/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+add_proto qw/void av1_convolve_2d/, "const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
 specialize qw/av1_convolve_2d sse2 avx2/;
 add_proto qw/void av1_convolve_rounding/, "const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits";
 specialize qw/av1_convolve_rounding avx2/;
 
-add_proto qw/void av1_convolve_2d_copy/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+add_proto qw/void av1_convolve_2d_copy/, "const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
 specialize qw/av1_convolve_2d_copy sse2/;
-add_proto qw/void av1_convolve_x/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+add_proto qw/void av1_convolve_x/, "const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
 specialize qw/av1_convolve_x sse2/;
-add_proto qw/void av1_convolve_y/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+add_proto qw/void av1_convolve_y/, "const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
 specialize qw/av1_convolve_y sse2/;
 
 add_proto qw/void av1_convolve_2d_scale/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params";
 specialize qw/av1_convolve_2d_scale sse4_1/;
 
 if (aom_config("CONFIG_JNT_COMP") eq "yes") {
-  add_proto qw/void av1_jnt_convolve_2d/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+  add_proto qw/void av1_jnt_convolve_2d/, "const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
   specialize qw/av1_jnt_convolve_2d sse4_1/;
 
-  add_proto qw/void av1_jnt_convolve_2d_copy/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+  add_proto qw/void av1_jnt_convolve_2d_copy/, "const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
   specialize qw/av1_jnt_convolve_2d_copy sse2/;
 }
 

diff --git a/av1/common/convolve.c b/av1/common/convolve.c
index afdb50d..bd5d9ff 100644
--- a/av1/common/convolve.c
+++ b/av1/common/convolve.c

@@ -373,18 +373,22 @@
    bit widths for various intermediate values, see the comments above
    av1_warp_affine_c.
 */
-void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
-                       int dst_stride, int w, int h,
+void av1_convolve_2d_c(const uint8_t *src, int src_stride, const uint8_t *dst0,
+                       int dst_stride0, int w, int h,
                        InterpFilterParams *filter_params_x,
                        InterpFilterParams *filter_params_y,
                        const int subpel_x_q4, const int subpel_y_q4,
                        ConvolveParams *conv_params) {
+  CONV_BUF_TYPE *dst = conv_params->dst;
+  int dst_stride = conv_params->dst_stride;
   int32_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
   int im_h = h + filter_params_y->taps - 1;
   int im_stride = w;
   const int fo_vert = filter_params_y->taps / 2 - 1;
   const int fo_horiz = filter_params_x->taps / 2 - 1;
   const int bd = 8;
+  (void)dst0;
+  (void)dst_stride0;
 
   // horizontal filter
   const uint8_t *src_horiz = src - fo_vert * src_stride;
@@ -425,16 +429,20 @@
   }
 }
 
-void av1_convolve_y_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
-                      int dst_stride, int w, int h,
+void av1_convolve_y_c(const uint8_t *src, int src_stride, const uint8_t *dst0,
+                      int dst_stride0, int w, int h,
                       InterpFilterParams *filter_params_x,
                       InterpFilterParams *filter_params_y,
                       const int subpel_x_q4, const int subpel_y_q4,
                       ConvolveParams *conv_params) {
+  CONV_BUF_TYPE *dst = conv_params->dst;
+  int dst_stride = conv_params->dst_stride;
   const int fo_vert = filter_params_y->taps / 2 - 1;
   const int bits = FILTER_BITS - conv_params->round_0 - conv_params->round_1;
   (void)filter_params_x;
   (void)subpel_x_q4;
+  (void)dst0;
+  (void)dst_stride0;
 
   // vertical filter
   const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
@@ -454,16 +462,20 @@
   }
 }
 
-void av1_convolve_x_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
-                      int dst_stride, int w, int h,
+void av1_convolve_x_c(const uint8_t *src, int src_stride, const uint8_t *dst0,
+                      int dst_stride0, int w, int h,
                       InterpFilterParams *filter_params_x,
                       InterpFilterParams *filter_params_y,
                       const int subpel_x_q4, const int subpel_y_q4,
                       ConvolveParams *conv_params) {
+  CONV_BUF_TYPE *dst = conv_params->dst;
+  int dst_stride = conv_params->dst_stride;
   const int fo_horiz = filter_params_x->taps / 2 - 1;
   const int bits = FILTER_BITS - conv_params->round_1;
   (void)filter_params_y;
   (void)subpel_y_q4;
+  (void)dst0;
+  (void)dst_stride0;
 
   // horizontal filter
   const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
@@ -484,11 +496,13 @@
 }
 
 void av1_convolve_2d_copy_c(const uint8_t *src, int src_stride,
-                            CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
+                            const uint8_t *dst0, int dst_stride0, int w, int h,
                             InterpFilterParams *filter_params_x,
                             InterpFilterParams *filter_params_y,
                             const int subpel_x_q4, const int subpel_y_q4,
                             ConvolveParams *conv_params) {
+  CONV_BUF_TYPE *dst = conv_params->dst;
+  int dst_stride = conv_params->dst_stride;
   const int bits =
       FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
 
@@ -496,6 +510,8 @@
   (void)filter_params_y;
   (void)subpel_x_q4;
   (void)subpel_y_q4;
+  (void)dst0;
+  (void)dst_stride0;
 
   for (int y = 0; y < h; ++y) {
     for (int x = 0; x < w; ++x) {
@@ -510,17 +526,21 @@
 
 #if CONFIG_JNT_COMP
 void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride,
-                           CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
+                           const uint8_t *dst0, int dst_stride0, int w, int h,
                            InterpFilterParams *filter_params_x,
                            InterpFilterParams *filter_params_y,
                            const int subpel_x_q4, const int subpel_y_q4,
                            ConvolveParams *conv_params) {
+  CONV_BUF_TYPE *dst = conv_params->dst;
+  int dst_stride = conv_params->dst_stride;
   int32_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
   int im_h = h + filter_params_y->taps - 1;
   int im_stride = w;
   const int fo_vert = filter_params_y->taps / 2 - 1;
   const int fo_horiz = filter_params_x->taps / 2 - 1;
   const int bd = 8;
+  (void)dst0;
+  (void)dst_stride0;
 
   // horizontal filter
   const uint8_t *src_horiz = src - fo_vert * src_stride;
@@ -573,11 +593,13 @@
 }
 
 void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride,
-                                CONV_BUF_TYPE *dst, int dst_stride, int w,
+                                const uint8_t *dst0, int dst_stride0, int w,
                                 int h, InterpFilterParams *filter_params_x,
                                 InterpFilterParams *filter_params_y,
                                 const int subpel_x_q4, const int subpel_y_q4,
                                 ConvolveParams *conv_params) {
+  CONV_BUF_TYPE *dst = conv_params->dst;
+  int dst_stride = conv_params->dst_stride;
   const int bits =
       FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
 
@@ -585,6 +607,8 @@
   (void)filter_params_y;
   (void)subpel_x_q4;
   (void)subpel_y_q4;
+  (void)dst0;
+  (void)dst_stride0;
 
   for (int y = 0; y < h; ++y) {
     for (int x = 0; x < w; ++x) {
@@ -712,111 +736,59 @@
                                  &filter_params_y);
 #endif
 
-  if (filter_params_y.taps < filter_params_x.taps) {
-    uint8_t tr_src[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) *
-                   (MAX_SB_SIZE + MAX_FILTER_TAP - 1)];
-    int tr_src_stride = MAX_SB_SIZE + MAX_FILTER_TAP - 1;
-    CONV_BUF_TYPE tr_dst[MAX_SB_SIZE * MAX_SB_SIZE];
-    int tr_dst_stride = MAX_SB_SIZE;
-    int fo_vert = filter_params_y.taps / 2 - 1;
-    int fo_horiz = filter_params_x.taps / 2 - 1;
-
-    transpose_uint8(tr_src, tr_src_stride,
-                    src - fo_vert * src_stride - fo_horiz, src_stride,
-                    w + filter_params_x.taps - 1, h + filter_params_y.taps - 1);
-    transpose_int32(tr_dst, tr_dst_stride, conv_params->dst,
-                    conv_params->dst_stride, w, h);
-
-// horizontal and vertical parameters are swapped because of the transpose
 #if CONFIG_JNT_COMP
-    if (scaled)
-      av1_convolve_2d_scale(tr_src + fo_horiz * tr_src_stride + fo_vert,
-                            tr_src_stride, tr_dst, tr_dst_stride, h, w,
-                            &filter_params_y, &filter_params_x, subpel_y_q4,
-                            y_step_q4, subpel_x_q4, x_step_q4, conv_params);
-    else
-      av1_jnt_convolve_2d(tr_src + fo_horiz * tr_src_stride + fo_vert,
-                          tr_src_stride, tr_dst, tr_dst_stride, h, w,
-                          &filter_params_y, &filter_params_x, subpel_y_q4,
-                          subpel_x_q4, conv_params);
-#else
-    if (scaled)
-      av1_convolve_2d_scale(tr_src + fo_horiz * tr_src_stride + fo_vert,
-                            tr_src_stride, tr_dst, tr_dst_stride, h, w,
-                            &filter_params_y, &filter_params_x, subpel_y_q4,
-                            y_step_q4, subpel_x_q4, x_step_q4, conv_params);
-    else
-      av1_convolve_2d(tr_src + fo_horiz * tr_src_stride + fo_vert,
-                      tr_src_stride, tr_dst, tr_dst_stride, h, w,
-                      &filter_params_y, &filter_params_x, subpel_y_q4,
-                      subpel_x_q4, conv_params);
-#endif  // CONFIG_JNT_COMP
-    transpose_int32(conv_params->dst, conv_params->dst_stride, tr_dst,
-                    tr_dst_stride, h, w);
+  if (scaled) {
+    av1_convolve_2d_scale(src, src_stride, conv_params->dst,
+                          conv_params->dst_stride, w, h, &filter_params_x,
+                          &filter_params_y, subpel_x_q4, x_step_q4, subpel_y_q4,
+                          y_step_q4, conv_params);
   } else {
-#if CONFIG_JNT_COMP
-    if (scaled) {
-      av1_convolve_2d_scale(src, src_stride, conv_params->dst,
-                            conv_params->dst_stride, w, h, &filter_params_x,
-                            &filter_params_y, subpel_x_q4, x_step_q4,
-                            subpel_y_q4, y_step_q4, conv_params);
+    if (subpel_x_q4 == 0 && subpel_y_q4 == 0) {
+      av1_jnt_convolve_2d_copy(src, src_stride, dst, dst_stride, w, h,
+                               &filter_params_x, &filter_params_y, subpel_x_q4,
+                               subpel_y_q4, conv_params);
+    } else if (subpel_x_q4 == 0) {
+      // place holder
+      av1_jnt_convolve_2d(src, src_stride, dst, dst_stride, w, h,
+                          &filter_params_x, &filter_params_y, subpel_x_q4,
+                          subpel_y_q4, conv_params);
+    } else if (subpel_y_q4 == 0) {
+      // place holder
+      av1_jnt_convolve_2d(src, src_stride, dst, dst_stride, w, h,
+                          &filter_params_x, &filter_params_y, subpel_x_q4,
+                          subpel_y_q4, conv_params);
     } else {
-      if (subpel_x_q4 == 0 && subpel_y_q4 == 0) {
-        av1_jnt_convolve_2d_copy(src, src_stride, conv_params->dst,
-                                 conv_params->dst_stride, w, h,
-                                 &filter_params_x, &filter_params_y,
-                                 subpel_x_q4, subpel_y_q4, conv_params);
-      } else if (subpel_x_q4 == 0) {
-        // place holder
-        av1_jnt_convolve_2d(src, src_stride, conv_params->dst,
-                            conv_params->dst_stride, w, h, &filter_params_x,
-                            &filter_params_y, subpel_x_q4, subpel_y_q4,
-                            conv_params);
-      } else if (subpel_y_q4 == 0) {
-        // place holder
-        av1_jnt_convolve_2d(src, src_stride, conv_params->dst,
-                            conv_params->dst_stride, w, h, &filter_params_x,
-                            &filter_params_y, subpel_x_q4, subpel_y_q4,
-                            conv_params);
-      } else {
-        av1_jnt_convolve_2d(src, src_stride, conv_params->dst,
-                            conv_params->dst_stride, w, h, &filter_params_x,
-                            &filter_params_y, subpel_x_q4, subpel_y_q4,
-                            conv_params);
-      }
+      av1_jnt_convolve_2d(src, src_stride, dst, dst_stride, w, h,
+                          &filter_params_x, &filter_params_y, subpel_x_q4,
+                          subpel_y_q4, conv_params);
     }
-#else
-    if (scaled) {
-      av1_convolve_2d_scale(src, src_stride, conv_params->dst,
-                            conv_params->dst_stride, w, h, &filter_params_x,
-                            &filter_params_y, subpel_x_q4, x_step_q4,
-                            subpel_y_q4, y_step_q4, conv_params);
-    } else {
-      // Special case convolve functions should produce the same result as
-      // av1_convolve_2d.
-      if (subpel_x_q4 == 0 && subpel_y_q4 == 0) {
-        av1_convolve_2d_copy(src, src_stride, conv_params->dst,
-                             conv_params->dst_stride, w, h, &filter_params_x,
-                             &filter_params_y, subpel_x_q4, subpel_y_q4,
-                             conv_params);
-      } else if (subpel_x_q4 == 0) {
-        av1_convolve_y(src, src_stride, conv_params->dst,
-                       conv_params->dst_stride, w, h, &filter_params_x,
-                       &filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
-      } else if (subpel_y_q4 == 0) {
-        av1_convolve_x(src, src_stride, conv_params->dst,
-                       conv_params->dst_stride, w, h, &filter_params_x,
-                       &filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
-      } else {
-        // subpel_x_q4 != 0 && subpel_y_q4 != 0
-        av1_convolve_2d(src, src_stride, conv_params->dst,
-                        conv_params->dst_stride, w, h, &filter_params_x,
-                        &filter_params_y, subpel_x_q4, subpel_y_q4,
-                        conv_params);
-      }
-    }
-#endif  // CONFIG_JNT_COMP
   }
+#else
+  if (scaled) {
+    av1_convolve_2d_scale(src, src_stride, conv_params->dst,
+                          conv_params->dst_stride, w, h, &filter_params_x,
+                          &filter_params_y, subpel_x_q4, x_step_q4, subpel_y_q4,
+                          y_step_q4, conv_params);
+  } else {
+    // Special case convolve functions should produce the same result as
+    // av1_convolve_2d.
+    if (subpel_x_q4 == 0 && subpel_y_q4 == 0) {
+      av1_convolve_2d_copy(src, src_stride, dst, dst_stride, w, h,
+                           &filter_params_x, &filter_params_y, subpel_x_q4,
+                           subpel_y_q4, conv_params);
+    } else if (subpel_x_q4 == 0) {
+      av1_convolve_y(src, src_stride, dst, dst_stride, w, h, &filter_params_x,
+                     &filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
+    } else if (subpel_y_q4 == 0) {
+      av1_convolve_x(src, src_stride, dst, dst_stride, w, h, &filter_params_x,
+                     &filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
+    } else {
+      // subpel_x_q4 != 0 && subpel_y_q4 != 0
+      av1_convolve_2d(src, src_stride, dst, dst_stride, w, h, &filter_params_x,
+                      &filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
+    }
+  }
+#endif  // CONFIG_JNT_COMP
 }
 
 #if CONFIG_HIGHBITDEPTH

diff --git a/av1/common/x86/convolve_2d_avx2.c b/av1/common/x86/convolve_2d_avx2.c
index 3c1a24d..4db1380 100644
--- a/av1/common/x86/convolve_2d_avx2.c
+++ b/av1/common/x86/convolve_2d_avx2.c

@@ -18,12 +18,16 @@
 #include "av1/common/convolve.h"
 
 void av1_convolve_2d_avx2(const uint8_t *src, int src_stride,
-                          CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
+                          const uint8_t *dst0, int dst_stride0, int w, int h,
                           InterpFilterParams *filter_params_x,
                           InterpFilterParams *filter_params_y,
                           const int subpel_x_q4, const int subpel_y_q4,
                           ConvolveParams *conv_params) {
+  CONV_BUF_TYPE *dst = conv_params->dst;
+  int dst_stride = conv_params->dst_stride;
   const int bd = 8;
+  (void)dst0;
+  (void)dst_stride0;
 
   DECLARE_ALIGNED(32, int16_t,
                   im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);

diff --git a/av1/common/x86/convolve_2d_sse2.c b/av1/common/x86/convolve_2d_sse2.c
index 34b7dc7..a0b911c 100644
--- a/av1/common/x86/convolve_2d_sse2.c
+++ b/av1/common/x86/convolve_2d_sse2.c

@@ -18,12 +18,16 @@
 #include "av1/common/convolve.h"
 
 void av1_convolve_2d_sse2(const uint8_t *src, int src_stride,
-                          CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
+                          const uint8_t *dst0, int dst_stride0, int w, int h,
                           InterpFilterParams *filter_params_x,
                           InterpFilterParams *filter_params_y,
                           const int subpel_x_q4, const int subpel_y_q4,
                           ConvolveParams *conv_params) {
+  CONV_BUF_TYPE *dst = conv_params->dst;
+  int dst_stride = conv_params->dst_stride;
   const int bd = 8;
+  (void)dst0;
+  (void)dst_stride0;
 
   DECLARE_ALIGNED(16, int16_t,
                   im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);
@@ -201,15 +205,19 @@
 }
 
 void av1_convolve_2d_copy_sse2(const uint8_t *src, int src_stride,
-                               CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
-                               InterpFilterParams *filter_params_x,
+                               const uint8_t *dst0, int dst_stride0, int w,
+                               int h, InterpFilterParams *filter_params_x,
                                InterpFilterParams *filter_params_y,
                                const int subpel_x_q4, const int subpel_y_q4,
                                ConvolveParams *conv_params) {
+  CONV_BUF_TYPE *dst = conv_params->dst;
+  int dst_stride = conv_params->dst_stride;
   (void)filter_params_x;
   (void)filter_params_y;
   (void)subpel_x_q4;
   (void)subpel_y_q4;
+  (void)dst0;
+  (void)dst_stride0;
 
   const int bits =
       FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
@@ -314,15 +322,19 @@
 
 #if CONFIG_JNT_COMP
 void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride,
-                                   CONV_BUF_TYPE *dst, int dst_stride, int w,
+                                   const uint8_t *dst0, int dst_stride0, int w,
                                    int h, InterpFilterParams *filter_params_x,
                                    InterpFilterParams *filter_params_y,
                                    const int subpel_x_q4, const int subpel_y_q4,
                                    ConvolveParams *conv_params) {
+  CONV_BUF_TYPE *dst = conv_params->dst;
+  int dst_stride = conv_params->dst_stride;
   (void)filter_params_x;
   (void)filter_params_y;
   (void)subpel_x_q4;
   (void)subpel_y_q4;
+  (void)dst0;
+  (void)dst_stride0;
 
   const int bits =
       FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;

diff --git a/av1/common/x86/convolve_2d_sse4.c b/av1/common/x86/convolve_2d_sse4.c
index ea0811a..4fc946d 100644
--- a/av1/common/x86/convolve_2d_sse4.c
+++ b/av1/common/x86/convolve_2d_sse4.c

@@ -20,12 +20,16 @@
 
 #if CONFIG_JNT_COMP
 void av1_jnt_convolve_2d_sse4_1(const uint8_t *src, int src_stride,
-                                CONV_BUF_TYPE *dst, int dst_stride, int w,
+                                const uint8_t *dst0, int dst_stride0, int w,
                                 int h, InterpFilterParams *filter_params_x,
                                 InterpFilterParams *filter_params_y,
                                 const int subpel_x_q4, const int subpel_y_q4,
                                 ConvolveParams *conv_params) {
+  CONV_BUF_TYPE *dst = conv_params->dst;
+  int dst_stride = conv_params->dst_stride;
   const int bd = 8;
+  (void)dst0;
+  (void)dst_stride0;
 
   DECLARE_ALIGNED(16, int16_t,
                   im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);

diff --git a/av1/common/x86/convolve_sse2.c b/av1/common/x86/convolve_sse2.c
index 08ee8c3..b9433ff 100644
--- a/av1/common/x86/convolve_sse2.c
+++ b/av1/common/x86/convolve_sse2.c

@@ -17,12 +17,14 @@
 #include "aom_dsp/aom_filter.h"
 #include "av1/common/convolve.h"
 
-void av1_convolve_y_sse2(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
-                         int dst_stride, int w, int h,
+void av1_convolve_y_sse2(const uint8_t *src, int src_stride,
+                         const uint8_t *dst0, int dst_stride0, int w, int h,
                          InterpFilterParams *filter_params_x,
                          InterpFilterParams *filter_params_y,
                          const int subpel_x_q4, const int subpel_y_q4,
                          ConvolveParams *conv_params) {
+  CONV_BUF_TYPE *dst = conv_params->dst;
+  int dst_stride = conv_params->dst_stride;
   int i, j;
   const int fo_vert = filter_params_y->taps / 2 - 1;
   const int do_average = conv_params->do_average;
@@ -33,6 +35,8 @@
 
   (void)filter_params_x;
   (void)subpel_x_q4;
+  (void)dst0;
+  (void)dst_stride0;
 
   const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
       *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
@@ -114,12 +118,14 @@
   }
 }
 
-void av1_convolve_x_sse2(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
-                         int dst_stride, int w, int h,
+void av1_convolve_x_sse2(const uint8_t *src, int src_stride,
+                         const uint8_t *dst0, int dst_stride0, int w, int h,
                          InterpFilterParams *filter_params_x,
                          InterpFilterParams *filter_params_y,
                          const int subpel_x_q4, const int subpel_y_q4,
                          ConvolveParams *conv_params) {
+  CONV_BUF_TYPE *dst = conv_params->dst;
+  int dst_stride = conv_params->dst_stride;
   int i, j;
   const int fo_horiz = filter_params_x->taps / 2 - 1;
   const int do_average = conv_params->do_average;
@@ -130,6 +136,8 @@
 
   (void)filter_params_y;
   (void)subpel_y_q4;
+  (void)dst0;
+  (void)dst_stride0;
 
   const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
       *filter_params_x, subpel_x_q4 & SUBPEL_MASK);

diff --git a/test/av1_convolve_2d_test_util.cc b/test/av1_convolve_2d_test_util.cc
index e45fd87..a79e866 100644
--- a/test/av1_convolve_2d_test_util.cc
+++ b/test/av1_convolve_2d_test_util.cc

@@ -87,10 +87,10 @@
             // Choose random locations within the source block
             int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
             int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
-            av1_convolve_2d_c(input + offset_r * w + offset_c, w, output,
+            av1_convolve_2d_c(input + offset_r * w + offset_c, w, NULL,
                               MAX_SB_SIZE, out_w, out_h, &filter_params_x,
                               &filter_params_y, subx, suby, &conv_params1);
-            test_impl(input + offset_r * w + offset_c, w, output2, MAX_SB_SIZE,
+            test_impl(input + offset_r * w + offset_c, w, NULL, MAX_SB_SIZE,
                       out_w, out_h, &filter_params_x, &filter_params_y, subx,
                       suby, &conv_params2);
 
@@ -155,10 +155,10 @@
             // Choose random locations within the source block
             int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
             int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
-            av1_jnt_convolve_2d_c(input + offset_r * w + offset_c, w, output,
+            av1_jnt_convolve_2d_c(input + offset_r * w + offset_c, w, NULL,
                                   MAX_SB_SIZE, out_w, out_h, &filter_params_x,
                                   &filter_params_y, subx, suby, &conv_params1);
-            test_impl(input + offset_r * w + offset_c, w, output2, MAX_SB_SIZE,
+            test_impl(input + offset_r * w + offset_c, w, NULL, MAX_SB_SIZE,
                       out_w, out_h, &filter_params_x, &filter_params_y, subx,
                       suby, &conv_params2);
 
@@ -196,13 +196,13 @@
                 // Choose random locations within the source block
                 int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
                 int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
-                av1_jnt_convolve_2d_c(input + offset_r * w + offset_c, w,
-                                      output, MAX_SB_SIZE, out_w, out_h,
+                av1_jnt_convolve_2d_c(input + offset_r * w + offset_c, w, NULL,
+                                      MAX_SB_SIZE, out_w, out_h,
                                       &filter_params_x, &filter_params_y, subx,
                                       suby, &conv_params1);
-                test_impl(input + offset_r * w + offset_c, w, output2,
-                          MAX_SB_SIZE, out_w, out_h, &filter_params_x,
-                          &filter_params_y, subx, suby, &conv_params2);
+                test_impl(input + offset_r * w + offset_c, w, NULL, MAX_SB_SIZE,
+                          out_w, out_h, &filter_params_x, &filter_params_y,
+                          subx, suby, &conv_params2);
 
                 for (j = 0; j < out_h; ++j)
                   for (k = 0; k < out_w; ++k) {

diff --git a/test/av1_convolve_2d_test_util.h b/test/av1_convolve_2d_test_util.h
index 974169c..33aa482 100644
--- a/test/av1_convolve_2d_test_util.h
+++ b/test/av1_convolve_2d_test_util.h

@@ -25,7 +25,7 @@
 namespace AV1Convolve2D {
 
 typedef void (*convolve_2d_func)(const uint8_t *src, int src_stride,
-                                 CONV_BUF_TYPE *dst, int dst_stride, int w,
+                                 const uint8_t *dst, int dst_stride, int w,
                                  int h, InterpFilterParams *filter_params_x,
                                  InterpFilterParams *filter_params_y,
                                  const int subpel_x_q4, const int subpel_y_q4,
commit	940c22a244c667df620c9e1dcc0c77ce0b6fec67	[log] [tgz]
author	Yunqing Wang <yunqingwang@google.com>	Tue Dec 19 13:45:44 2017 -0800
committer	Yunqing Wang <yunqingwang@google.com>	Wed Dec 20 18:19:36 2017 +0000
tree	b4c199d01b5f23f00cb18a3bf89b42792cf3d9ae
parent	5891f98f75cfdb534d14289fa018bfba77a31447 [diff]