Neon: Add aom_variance128x128_neon and test

Change-Id: I45b779c14fbe2f0a2679de2d2bf89ce1a3a5adf4
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index a6c09e2..eabb9bf 100755
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -1106,7 +1106,7 @@
     add_proto qw/uint32_t/, "aom_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
     add_proto qw/uint32_t/, "aom_dist_wtd_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS *jcp_param";
   }
-  specialize qw/aom_variance128x128   sse2 avx2         /;
+  specialize qw/aom_variance128x128   sse2 avx2 neon    /;
   specialize qw/aom_variance128x64    sse2 avx2         /;
   specialize qw/aom_variance64x128    sse2 avx2         /;
   specialize qw/aom_variance64x64     sse2 avx2 neon msa/;
diff --git a/aom_dsp/arm/variance_neon.c b/aom_dsp/arm/variance_neon.c
index d94f6e5..82d1113 100644
--- a/aom_dsp/arm/variance_neon.c
+++ b/aom_dsp/arm/variance_neon.c
@@ -130,6 +130,24 @@
   return *sse - (unsigned int)(((int64_t)sum1 * sum1) >> 12);
 }
 
+unsigned int aom_variance128x128_neon(const uint8_t *a, int a_stride,
+                                      const uint8_t *b, int b_stride,
+                                      unsigned int *sse) {
+  int sum1, sum2;
+  uint32_t sse1, sse2;
+  sum1 = sse1 = 0;
+  for (int i = 0; i < 16; i++) {
+    variance_neon_w8(a + (8 * i * a_stride), a_stride, b + (8 * i * b_stride),
+                     b_stride, 128, 8, &sse2, &sum2);
+    sse1 += sse2;
+    sum1 += sum2;
+  }
+
+  *sse = sse1;
+
+  return *sse - (unsigned int)(((int64_t)sum1 * sum1) >> 14);
+}
+
 unsigned int aom_variance16x8_neon(const unsigned char *src_ptr,
                                    int source_stride,
                                    const unsigned char *ref_ptr,
diff --git a/test/variance_test.cc b/test/variance_test.cc
index ff0889c..d39cd5d 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -2302,7 +2302,8 @@
 
 INSTANTIATE_TEST_SUITE_P(
     NEON, AvxVarianceTest,
-    ::testing::Values(VarianceParams(6, 6, &aom_variance64x64_neon),
+    ::testing::Values(VarianceParams(7, 7, &aom_variance128x128_neon),
+                      VarianceParams(6, 6, &aom_variance64x64_neon),
                       VarianceParams(6, 5, &aom_variance64x32_neon),
                       VarianceParams(5, 6, &aom_variance32x64_neon),
                       VarianceParams(5, 5, &aom_variance32x32_neon),