JNT_COMP: add convolve _x and _y  c function

Change-Id: Ia448b44ca734fe111422de9afdad97ac48e78b66
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 7572232..778adce 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -510,6 +510,10 @@
 
   add_proto qw/void av1_jnt_convolve_2d_copy/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
   specialize qw/av1_jnt_convolve_2d_copy sse2/;
+
+  add_proto qw/void av1_jnt_convolve_x/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+
+  add_proto qw/void av1_jnt_convolve_y/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
 }
 
 add_proto qw/void av1_highbd_convolve_2d/, "const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
diff --git a/av1/common/convolve.c b/av1/common/convolve.c
index 8fa9275..9f4ec26 100644
--- a/av1/common/convolve.c
+++ b/av1/common/convolve.c
@@ -724,6 +724,90 @@
   }
 }
 
+void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst0,
+                          int dst_stride0, int w, int h,
+                          InterpFilterParams *filter_params_x,
+                          InterpFilterParams *filter_params_y,
+                          const int subpel_x_q4, const int subpel_y_q4,
+                          ConvolveParams *conv_params) {
+  CONV_BUF_TYPE *dst = conv_params->dst;
+  int dst_stride = conv_params->dst_stride;
+  const int fo_vert = filter_params_y->taps / 2 - 1;
+  const int bits = FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+  (void)filter_params_x;
+  (void)subpel_x_q4;
+  (void)dst0;
+  (void)dst_stride0;
+
+  // vertical filter
+  const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
+      *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
+  for (int y = 0; y < h; ++y) {
+    for (int x = 0; x < w; ++x) {
+      CONV_BUF_TYPE res = 0;
+      for (int k = 0; k < filter_params_y->taps; ++k) {
+        res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x];
+      }
+      res *= (1 << bits);
+      if (conv_params->use_jnt_comp_avg) {
+        if (conv_params->do_average) {
+          dst[y * dst_stride + x] += res * conv_params->bck_offset;
+          dst[y * dst_stride + x] >>= (DIST_PRECISION_BITS - 1);
+        } else {
+          dst[y * dst_stride + x] = res * conv_params->fwd_offset;
+        }
+      } else {
+        if (conv_params->do_average)
+          dst[y * dst_stride + x] += res;
+        else
+          dst[y * dst_stride + x] = res;
+      }
+    }
+  }
+}
+
+void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst0,
+                          int dst_stride0, int w, int h,
+                          InterpFilterParams *filter_params_x,
+                          InterpFilterParams *filter_params_y,
+                          const int subpel_x_q4, const int subpel_y_q4,
+                          ConvolveParams *conv_params) {
+  CONV_BUF_TYPE *dst = conv_params->dst;
+  int dst_stride = conv_params->dst_stride;
+  const int fo_horiz = filter_params_x->taps / 2 - 1;
+  const int bits = FILTER_BITS - conv_params->round_1;
+  (void)filter_params_y;
+  (void)subpel_y_q4;
+  (void)dst0;
+  (void)dst_stride0;
+
+  // horizontal filter
+  const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
+      *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
+  for (int y = 0; y < h; ++y) {
+    for (int x = 0; x < w; ++x) {
+      CONV_BUF_TYPE res = 0;
+      for (int k = 0; k < filter_params_x->taps; ++k) {
+        res += x_filter[k] * src[y * src_stride + x - fo_horiz + k];
+      }
+      res = (1 << bits) * ROUND_POWER_OF_TWO(res, conv_params->round_0);
+      if (conv_params->use_jnt_comp_avg) {
+        if (conv_params->do_average) {
+          dst[y * dst_stride + x] += res * conv_params->bck_offset;
+          dst[y * dst_stride + x] >>= (DIST_PRECISION_BITS - 1);
+        } else {
+          dst[y * dst_stride + x] = res * conv_params->fwd_offset;
+        }
+      } else {
+        if (conv_params->do_average)
+          dst[y * dst_stride + x] += res;
+        else
+          dst[y * dst_stride + x] = res;
+      }
+    }
+  }
+}
+
 void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride,
                                 uint8_t *dst0, int dst_stride0, int w, int h,
                                 InterpFilterParams *filter_params_x,
diff --git a/av1/common/scale.c b/av1/common/scale.c
index b19513e..a334bae 100644
--- a/av1/common/scale.c
+++ b/av1/common/scale.c
@@ -190,11 +190,9 @@
   // subpel_x_q4 == 0 && subpel_y_q4 == 0
   sf->convolve[0][0][1] = av1_jnt_convolve_2d_copy;
   // subpel_x_q4 == 0
-  // place holder
-  sf->convolve[0][1][1] = av1_jnt_convolve_2d;
+  sf->convolve[0][1][1] = av1_jnt_convolve_y;
   // subpel_y_q4 == 0
-  // place holder
-  sf->convolve[1][0][1] = av1_jnt_convolve_2d;
+  sf->convolve[1][0][1] = av1_jnt_convolve_x;
   // subpel_x_q4 != 0 && subpel_y_q4 != 0
   sf->convolve[1][1][1] = av1_jnt_convolve_2d;
 #else