Neon version of vp9_sub_pixel_variance16x16(),

vp9_variance16x16(), and vp9_get16x16var().

On a Nexus 7, vpxenc (in realtime mode, speed -12)
reported a performance improvement of ~16.7%.

Change-Id: Ib163aa99f56e680194aabe00dacdd7f0899a4ecb
diff --git a/test/variance_test.cc b/test/variance_test.cc
index 9dc7c6a..83b7435 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -756,6 +756,18 @@
     ::testing::Values(make_tuple(5, 5, subpel_avg_variance32x32_avx2),
                       make_tuple(6, 6, subpel_avg_variance64x64_avx2)));
 #endif  // HAVE_AVX2
+#if HAVE_NEON
+const vp9_variance_fn_t variance16x16_neon = vp9_variance16x16_neon;
+INSTANTIATE_TEST_CASE_P(
+    NEON, VP9VarianceTest,
+    ::testing::Values(make_tuple(4, 4, variance16x16_neon)));
+
+const vp9_subpixvariance_fn_t subpel_variance16x16_neon =
+    vp9_sub_pixel_variance16x16_neon;
+INSTANTIATE_TEST_CASE_P(
+    NEON, VP9SubpelVarianceTest,
+    ::testing::Values(make_tuple(4, 4, subpel_variance16x16_neon)));
+#endif  // HAVE_NEON
 #endif  // CONFIG_VP9_ENCODER
 
 }  // namespace vp9