Use distinct helper for 8-tap Neon dist_wtd_convolve_y

Move the 8-tap filter implementation of av1_dist_wtd_convolve_y_neon
into a distinct helper function, making top-level control-flow more
readable.

Change-Id: I1d7014fbb390dea23a24d35c113a8eb178e9c635
diff --git a/av1/common/arm/jnt_convolve_neon.c b/av1/common/arm/jnt_convolve_neon.c
index 459f885..8cb17fc 100644
--- a/av1/common/arm/jnt_convolve_neon.c
+++ b/av1/common/arm/jnt_convolve_neon.c
@@ -2123,10 +2123,10 @@
   return vshrq_n_s16(sum, ROUND0_BITS - 1);
 }
 
-void dist_wtd_convolve_y_6tap_neon(const uint8_t *src_ptr, int src_stride,
-                                   uint8_t *dst8_ptr, const int dst8_stride,
-                                   int w, int h, const int16x8_t y_filter,
-                                   ConvolveParams *conv_params) {
+static INLINE void dist_wtd_convolve_y_6tap_neon(
+    const uint8_t *src_ptr, int src_stride, uint8_t *dst8_ptr,
+    const int dst8_stride, int w, int h, const int16x8_t y_filter,
+    ConvolveParams *conv_params) {
   CONV_BUF_TYPE *dst_ptr = conv_params->dst;
   const int dst_stride = conv_params->dst_stride;
   const int bd = 8;
@@ -2408,30 +2408,12 @@
   }
 }
 
-void av1_dist_wtd_convolve_y_neon(const uint8_t *src, int src_stride,
-                                  uint8_t *dst8, int dst8_stride, int w, int h,
-                                  const InterpFilterParams *filter_params_y,
-                                  const int subpel_y_qn,
-                                  ConvolveParams *conv_params) {
-  assert(!(w % 4));
-  assert(!(h % 4));
-
-  // vertical filter
-  const int16_t *y_filter_ptr = av1_get_interp_filter_subpel_kernel(
-      filter_params_y, subpel_y_qn & SUBPEL_MASK);
-  // Filter values are even, so downshift by 1 to reduce intermediate
-  // precision requirements.
-  const int16x8_t y_filter = vshrq_n_s16(vld1q_s16(y_filter_ptr), 1);
-
-  const int vert_offset = filter_params_y->taps / 2 - 1;
-  const uint8_t *src_ptr = src - (vert_offset * src_stride);
-
-  if (get_filter_tap(filter_params_y, subpel_y_qn) <= 6) {
-    dist_wtd_convolve_y_6tap_neon(src_ptr + src_stride, src_stride, dst8,
-                                  dst8_stride, w, h, y_filter, conv_params);
-    return;
-  }
-
+static INLINE void dist_wtd_convolve_y_8tap_neon(const uint8_t *src_ptr,
+                                                 int src_stride, uint8_t *dst8,
+                                                 const int dst8_stride, int w,
+                                                 int h,
+                                                 const int16x8_t y_filter,
+                                                 ConvolveParams *conv_params) {
   CONV_BUF_TYPE *dst_ptr = conv_params->dst;
   const int dst_stride = conv_params->dst_stride;
   const int bd = 8;
@@ -2799,3 +2781,30 @@
     } while (width > 0);
   }
 }
+
+void av1_dist_wtd_convolve_y_neon(const uint8_t *src, int src_stride,
+                                  uint8_t *dst8, int dst8_stride, int w, int h,
+                                  const InterpFilterParams *filter_params_y,
+                                  const int subpel_y_qn,
+                                  ConvolveParams *conv_params) {
+  assert(!(w % 4));
+  assert(!(h % 4));
+
+  // vertical filter
+  const int16_t *y_filter_ptr = av1_get_interp_filter_subpel_kernel(
+      filter_params_y, subpel_y_qn & SUBPEL_MASK);
+  // Filter values are even, so downshift by 1 to reduce intermediate
+  // precision requirements.
+  const int16x8_t y_filter = vshrq_n_s16(vld1q_s16(y_filter_ptr), 1);
+
+  const int vert_offset = filter_params_y->taps / 2 - 1;
+  const uint8_t *src_ptr = src - (vert_offset * src_stride);
+
+  if (get_filter_tap(filter_params_y, subpel_y_qn) <= 6) {
+    dist_wtd_convolve_y_6tap_neon(src_ptr + src_stride, src_stride, dst8,
+                                  dst8_stride, w, h, y_filter, conv_params);
+  } else {
+    dist_wtd_convolve_y_8tap_neon(src_ptr, src_stride, dst8, dst8_stride, w, h,
+                                  y_filter, conv_params);
+  }
+}