Add ARM Neon optimization of convolve_y_sr

Block    Gain w.r.t. C
8x4         6.88x
8x8         8.54x
16x8        9.56x
16x16      10.53x
32x16      10.80x
32x32      11.73x
64x32      11.51x
64x64      11.86x
128x64     11.35x
128x128    11.89x

Change-Id: Ib998e1cb7a8b4cb2b7b8a7ec8e59f3906fcbd3f2
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index d80b671..3708bda 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -319,7 +319,7 @@
   specialize qw/av1_convolve_2d_sr sse2 avx2/;
   specialize qw/av1_convolve_2d_copy_sr sse2 avx2/;
   specialize qw/av1_convolve_x_sr sse2 avx2 neon/;
-  specialize qw/av1_convolve_y_sr sse2 avx2/;
+  specialize qw/av1_convolve_y_sr sse2 avx2 neon/;
   specialize qw/av1_convolve_2d_scale sse4_1/;
   specialize qw/av1_jnt_convolve_2d ssse3 avx2/;
   specialize qw/av1_jnt_convolve_2d_copy sse2 avx2/;