Use Neon I8MM 8-tap path for 6-tap filters in av1_convolve_y_sr

The 8-tap Neon I8MM path is faster than the Armv8.0 Neon 6-tap path
that is currently used.

Change-Id: I0e0235816625d00208cd6acf26069e1276cd7656
diff --git a/av1/common/arm/convolve_neon_i8mm.c b/av1/common/arm/convolve_neon_i8mm.c
index a826316..f22017b 100644
--- a/av1/common/arm/convolve_neon_i8mm.c
+++ b/av1/common/arm/convolve_neon_i8mm.c
@@ -712,26 +712,20 @@
   }
 
   const int y_filter_taps = get_filter_tap(filter_params_y, subpel_y_qn);
-
-  if (y_filter_taps <= 6) {
-    av1_convolve_y_sr_neon(src, src_stride, dst, dst_stride, w, h,
-                           filter_params_y, subpel_y_qn);
-    return;
-  }
-
-  const int vert_offset = y_filter_taps / 2 - 1;
-  src -= vert_offset * src_stride;
-
   const int16_t *y_filter_ptr = av1_get_interp_filter_subpel_kernel(
       filter_params_y, subpel_y_qn & SUBPEL_MASK);
 
-  if (y_filter_taps > 8) {
-    convolve_y_sr_12tap_neon_i8mm(src, src_stride, dst, dst_stride, w, h,
-                                  y_filter_ptr);
-    return;
+  if (y_filter_taps <= 4) {
+    av1_convolve_y_sr_neon(src, src_stride, dst, dst_stride, w, h,
+                           filter_params_y, subpel_y_qn);
+  } else if (y_filter_taps == 12) {
+    convolve_y_sr_12tap_neon_i8mm(src - 5 * src_stride, src_stride, dst,
+                                  dst_stride, w, h, y_filter_ptr);
+  } else {
+    // 6-tap or 8-tap.
+    convolve_y_sr_8tap_neon_i8mm(src - 3 * src_stride, src_stride, dst,
+                                 dst_stride, w, h, y_filter_ptr);
   }
-  convolve_y_sr_8tap_neon_i8mm(src, src_stride, dst, dst_stride, w, h,
-                               y_filter_ptr);
 }
 
 static inline int16x8_t convolve8_8_2d_h(uint8x16_t samples,