Use distinct helper for 8-tap Neon dist_wtd_convolve_y
Move the 8-tap filter implementation of av1_dist_wtd_convolve_y_neon
into a distinct helper function, making top-level control-flow more
readable.
Change-Id: I1d7014fbb390dea23a24d35c113a8eb178e9c635
diff --git a/av1/common/arm/jnt_convolve_neon.c b/av1/common/arm/jnt_convolve_neon.c
index 459f885..8cb17fc 100644
--- a/av1/common/arm/jnt_convolve_neon.c
+++ b/av1/common/arm/jnt_convolve_neon.c
@@ -2123,10 +2123,10 @@
return vshrq_n_s16(sum, ROUND0_BITS - 1);
}
-void dist_wtd_convolve_y_6tap_neon(const uint8_t *src_ptr, int src_stride,
- uint8_t *dst8_ptr, const int dst8_stride,
- int w, int h, const int16x8_t y_filter,
- ConvolveParams *conv_params) {
+static INLINE void dist_wtd_convolve_y_6tap_neon(
+ const uint8_t *src_ptr, int src_stride, uint8_t *dst8_ptr,
+ const int dst8_stride, int w, int h, const int16x8_t y_filter,
+ ConvolveParams *conv_params) {
CONV_BUF_TYPE *dst_ptr = conv_params->dst;
const int dst_stride = conv_params->dst_stride;
const int bd = 8;
@@ -2408,30 +2408,12 @@
}
}
-void av1_dist_wtd_convolve_y_neon(const uint8_t *src, int src_stride,
- uint8_t *dst8, int dst8_stride, int w, int h,
- const InterpFilterParams *filter_params_y,
- const int subpel_y_qn,
- ConvolveParams *conv_params) {
- assert(!(w % 4));
- assert(!(h % 4));
-
- // vertical filter
- const int16_t *y_filter_ptr = av1_get_interp_filter_subpel_kernel(
- filter_params_y, subpel_y_qn & SUBPEL_MASK);
- // Filter values are even, so downshift by 1 to reduce intermediate
- // precision requirements.
- const int16x8_t y_filter = vshrq_n_s16(vld1q_s16(y_filter_ptr), 1);
-
- const int vert_offset = filter_params_y->taps / 2 - 1;
- const uint8_t *src_ptr = src - (vert_offset * src_stride);
-
- if (get_filter_tap(filter_params_y, subpel_y_qn) <= 6) {
- dist_wtd_convolve_y_6tap_neon(src_ptr + src_stride, src_stride, dst8,
- dst8_stride, w, h, y_filter, conv_params);
- return;
- }
-
+static INLINE void dist_wtd_convolve_y_8tap_neon(const uint8_t *src_ptr,
+ int src_stride, uint8_t *dst8,
+ const int dst8_stride, int w,
+ int h,
+ const int16x8_t y_filter,
+ ConvolveParams *conv_params) {
CONV_BUF_TYPE *dst_ptr = conv_params->dst;
const int dst_stride = conv_params->dst_stride;
const int bd = 8;
@@ -2799,3 +2781,30 @@
} while (width > 0);
}
}
+
+void av1_dist_wtd_convolve_y_neon(const uint8_t *src, int src_stride,
+ uint8_t *dst8, int dst8_stride, int w, int h,
+ const InterpFilterParams *filter_params_y,
+ const int subpel_y_qn,
+ ConvolveParams *conv_params) {
+ assert(!(w % 4));
+ assert(!(h % 4));
+
+ // vertical filter
+ const int16_t *y_filter_ptr = av1_get_interp_filter_subpel_kernel(
+ filter_params_y, subpel_y_qn & SUBPEL_MASK);
+ // Filter values are even, so downshift by 1 to reduce intermediate
+ // precision requirements.
+ const int16x8_t y_filter = vshrq_n_s16(vld1q_s16(y_filter_ptr), 1);
+
+ const int vert_offset = filter_params_y->taps / 2 - 1;
+ const uint8_t *src_ptr = src - (vert_offset * src_stride);
+
+ if (get_filter_tap(filter_params_y, subpel_y_qn) <= 6) {
+ dist_wtd_convolve_y_6tap_neon(src_ptr + src_stride, src_stride, dst8,
+ dst8_stride, w, h, y_filter, conv_params);
+ } else {
+ dist_wtd_convolve_y_8tap_neon(src_ptr, src_stride, dst8, dst8_stride, w, h,
+ y_filter, conv_params);
+ }
+}