Add ARM Neon optimization of convolve_y_sr
Block Gain w.r.t. C
8x4 6.88x
8x8 8.54x
16x8 9.56x
16x16 10.53x
32x16 10.80x
32x32 11.73x
64x32 11.51x
64x64 11.86x
128x64 11.35x
128x128 11.89x
Change-Id: Ib998e1cb7a8b4cb2b7b8a7ec8e59f3906fcbd3f2
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index d80b671..3708bda 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -319,7 +319,7 @@
specialize qw/av1_convolve_2d_sr sse2 avx2/;
specialize qw/av1_convolve_2d_copy_sr sse2 avx2/;
specialize qw/av1_convolve_x_sr sse2 avx2 neon/;
- specialize qw/av1_convolve_y_sr sse2 avx2/;
+ specialize qw/av1_convolve_y_sr sse2 avx2 neon/;
specialize qw/av1_convolve_2d_scale sse4_1/;
specialize qw/av1_jnt_convolve_2d ssse3 avx2/;
specialize qw/av1_jnt_convolve_2d_copy sse2 avx2/;