Use distinct helper for 8-tap Neon dist_wtd_convolve_y Move the 8-tap filter implementation of av1_dist_wtd_convolve_y_neon into a distinct helper function, making top-level control-flow more readable. Change-Id: I1d7014fbb390dea23a24d35c113a8eb178e9c635
diff --git a/av1/common/arm/jnt_convolve_neon.c b/av1/common/arm/jnt_convolve_neon.c index 459f885..8cb17fc 100644 --- a/av1/common/arm/jnt_convolve_neon.c +++ b/av1/common/arm/jnt_convolve_neon.c
@@ -2123,10 +2123,10 @@ return vshrq_n_s16(sum, ROUND0_BITS - 1); } -void dist_wtd_convolve_y_6tap_neon(const uint8_t *src_ptr, int src_stride, - uint8_t *dst8_ptr, const int dst8_stride, - int w, int h, const int16x8_t y_filter, - ConvolveParams *conv_params) { +static INLINE void dist_wtd_convolve_y_6tap_neon( + const uint8_t *src_ptr, int src_stride, uint8_t *dst8_ptr, + const int dst8_stride, int w, int h, const int16x8_t y_filter, + ConvolveParams *conv_params) { CONV_BUF_TYPE *dst_ptr = conv_params->dst; const int dst_stride = conv_params->dst_stride; const int bd = 8; @@ -2408,30 +2408,12 @@ } } -void av1_dist_wtd_convolve_y_neon(const uint8_t *src, int src_stride, - uint8_t *dst8, int dst8_stride, int w, int h, - const InterpFilterParams *filter_params_y, - const int subpel_y_qn, - ConvolveParams *conv_params) { - assert(!(w % 4)); - assert(!(h % 4)); - - // vertical filter - const int16_t *y_filter_ptr = av1_get_interp_filter_subpel_kernel( - filter_params_y, subpel_y_qn & SUBPEL_MASK); - // Filter values are even, so downshift by 1 to reduce intermediate - // precision requirements. - const int16x8_t y_filter = vshrq_n_s16(vld1q_s16(y_filter_ptr), 1); - - const int vert_offset = filter_params_y->taps / 2 - 1; - const uint8_t *src_ptr = src - (vert_offset * src_stride); - - if (get_filter_tap(filter_params_y, subpel_y_qn) <= 6) { - dist_wtd_convolve_y_6tap_neon(src_ptr + src_stride, src_stride, dst8, - dst8_stride, w, h, y_filter, conv_params); - return; - } - +static INLINE void dist_wtd_convolve_y_8tap_neon(const uint8_t *src_ptr, + int src_stride, uint8_t *dst8, + const int dst8_stride, int w, + int h, + const int16x8_t y_filter, + ConvolveParams *conv_params) { CONV_BUF_TYPE *dst_ptr = conv_params->dst; const int dst_stride = conv_params->dst_stride; const int bd = 8; @@ -2799,3 +2781,30 @@ } while (width > 0); } } + +void av1_dist_wtd_convolve_y_neon(const uint8_t *src, int src_stride, + uint8_t *dst8, int dst8_stride, int w, int h, + const InterpFilterParams *filter_params_y, + const int subpel_y_qn, + ConvolveParams *conv_params) { + assert(!(w % 4)); + assert(!(h % 4)); + + // vertical filter + const int16_t *y_filter_ptr = av1_get_interp_filter_subpel_kernel( + filter_params_y, subpel_y_qn & SUBPEL_MASK); + // Filter values are even, so downshift by 1 to reduce intermediate + // precision requirements. + const int16x8_t y_filter = vshrq_n_s16(vld1q_s16(y_filter_ptr), 1); + + const int vert_offset = filter_params_y->taps / 2 - 1; + const uint8_t *src_ptr = src - (vert_offset * src_stride); + + if (get_filter_tap(filter_params_y, subpel_y_qn) <= 6) { + dist_wtd_convolve_y_6tap_neon(src_ptr + src_stride, src_stride, dst8, + dst8_stride, w, h, y_filter, conv_params); + } else { + dist_wtd_convolve_y_8tap_neon(src_ptr, src_stride, dst8, dst8_stride, w, h, + y_filter, conv_params); + } +}