Enable SIMD for intrabc prediction

| SPD_SET | TESTSET  | AVG_PSNR | OVR_PSNR |  SSIM   | ENC_T |
|---------|----------|----------|----------|---------|-------|
|    6    | screen_c | +0.000%  | +0.000%  | +0.000% | -0.3% |

Change-Id: I0b08b6d4b88248a277ae12bb9f9cdef2fb4b9eaf
diff --git a/av1/common/convolve.c b/av1/common/convolve.c
index 63dda39..ed77e0d 100644
--- a/av1/common/convolve.c
+++ b/av1/common/convolve.c
@@ -567,28 +567,6 @@
   const InterpFilterParams *filter_params_x = interp_filters[0];
   const InterpFilterParams *filter_params_y = interp_filters[1];
 
-  // TODO(jingning, yunqing): Add SIMD support to 2-tap filter case.
-  // Do we have SIMD support to 4-tap case?
-  // 2-tap filter indicates that it is for IntraBC.
-  if (filter_params_x->taps == 2 || filter_params_y->taps == 2) {
-    assert(filter_params_x->taps == 2 && filter_params_y->taps == 2);
-    assert(!scaled);
-    if (subpel_x_qn && subpel_y_qn) {
-      av1_convolve_2d_sr_c(src, src_stride, dst, dst_stride, w, h,
-                           filter_params_x, filter_params_y, subpel_x_qn,
-                           subpel_y_qn, conv_params);
-      return;
-    } else if (subpel_x_qn) {
-      av1_convolve_x_sr_c(src, src_stride, dst, dst_stride, w, h,
-                          filter_params_x, subpel_x_qn, conv_params);
-      return;
-    } else if (subpel_y_qn) {
-      av1_convolve_y_sr_c(src, src_stride, dst, dst_stride, w, h,
-                          filter_params_y, subpel_y_qn);
-      return;
-    }
-  }
-
   if (scaled) {
     convolve_2d_scale_wrapper(src, src_stride, dst, dst_stride, w, h,
                               filter_params_x, filter_params_y, subpel_x_qn,
diff --git a/av1/common/filter.h b/av1/common/filter.h
index ded5ce5..7511c88 100644
--- a/av1/common/filter.h
+++ b/av1/common/filter.h
@@ -192,14 +192,20 @@
 
 // A special 2-tap bilinear filter for IntraBC chroma. IntraBC uses full pixel
 // MV for luma. If sub-sampling exists, chroma may possibly use half-pel MV.
-DECLARE_ALIGNED(256, static const int16_t,
-                av1_intrabc_bilinear_filter[2 * SUBPEL_SHIFTS]) = {
-  128, 0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-  64,  64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+DECLARE_ALIGNED(256, static const InterpKernel,
+                av1_intrabc_bilinear_filter[SUBPEL_SHIFTS]) = {
+  { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 },
+  { 0, 0, 0, 0, 0, 0, 0, 0 },   { 0, 0, 0, 0, 0, 0, 0, 0 },
+  { 0, 0, 0, 0, 0, 0, 0, 0 },   { 0, 0, 0, 0, 0, 0, 0, 0 },
+  { 0, 0, 0, 0, 0, 0, 0, 0 },   { 0, 0, 0, 0, 0, 0, 0, 0 },
+  { 0, 0, 0, 64, 64, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 },
+  { 0, 0, 0, 0, 0, 0, 0, 0 },   { 0, 0, 0, 0, 0, 0, 0, 0 },
+  { 0, 0, 0, 0, 0, 0, 0, 0 },   { 0, 0, 0, 0, 0, 0, 0, 0 },
+  { 0, 0, 0, 0, 0, 0, 0, 0 },   { 0, 0, 0, 0, 0, 0, 0, 0 },
 };
 
 static const InterpFilterParams av1_intrabc_filter_params = {
-  av1_intrabc_bilinear_filter, 2, BILINEAR
+  (const int16_t *)av1_intrabc_bilinear_filter, SUBPEL_TAPS, BILINEAR
 };
 
 DECLARE_ALIGNED(256, static const InterpKernel,