Exclusively uses 12-tap filter in convolve_round

Performance drop by 0.084% on lowres

Change-Id: I2bcaae96b68033a0af7a1da988505623bc14ed94
diff --git a/av1/common/convolve.c b/av1/common/convolve.c
index 97ce6ba..6cd24e3 100644
--- a/av1/common/convolve.c
+++ b/av1/common/convolve.c
@@ -221,7 +221,6 @@
   const int fo_vert = filter_params_y->taps / 2 - 1;
   const int fo_horiz = filter_params_x->taps / 2 - 1;
   (void)conv_params;
-
   // horizontal filter
   const uint8_t *src_horiz = src - fo_vert * src_stride;
   const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
@@ -267,6 +266,12 @@
       av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]);
   InterpFilterParams filter_params_y =
       av1_get_interp_filter_params(interp_filter[0 + 2 * conv_params->ref]);
+  if (filter_params_x.interp_filter == MULTITAP_SHARP &&
+      filter_params_y.interp_filter == MULTITAP_SHARP) {
+    // Avoid two directions both using 12-tap filter.
+    // This will reduce hardware implementation cost.
+    filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
+  }
   av1_convolve_2d(src, src_stride, conv_params->dst, conv_params->dst_stride, w,
                   h, &filter_params_x, &filter_params_y, subpel_x_q4,
                   subpel_y_q4, conv_params);