Add missing variance tests for 4:1 blocksizes

Also fixes some functions in highbd_variance_sse2.c

Change-Id: Ia10d8fb17075f7d983228773d6ebcc2f7eb83e12
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index aa97ef6e..4daadab 100755
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -1035,8 +1035,8 @@
   specialize qw/aom_variance64x32     sse2 avx2 neon msa/;
   specialize qw/aom_variance32x64     sse2 avx2 neon msa/;
   specialize qw/aom_variance32x32     sse2 avx2 neon msa/;
-  specialize qw/aom_variance32x16     sse2 avx2 msa/;
-  specialize qw/aom_variance16x32     sse2 avx2 msa/;
+  specialize qw/aom_variance32x16     sse2 avx2      msa/;
+  specialize qw/aom_variance16x32     sse2 avx2      msa/;
   specialize qw/aom_variance16x16     sse2 avx2 neon msa/;
   specialize qw/aom_variance16x8      sse2 avx2 neon msa/;
   specialize qw/aom_variance8x16      sse2      neon msa/;
@@ -1085,6 +1085,7 @@
   specialize qw/aom_variance32x8 sse2 avx2/;
   specialize qw/aom_variance16x64 sse2 avx2/;
   specialize qw/aom_variance64x16 sse2 avx2/;
+
   specialize qw/aom_sub_pixel_variance4x16 sse2 ssse3/;
   specialize qw/aom_sub_pixel_variance16x4 sse2 ssse3/;
   specialize qw/aom_sub_pixel_variance8x32 sse2 ssse3/;
diff --git a/aom_dsp/x86/highbd_variance_sse2.c b/aom_dsp/x86/highbd_variance_sse2.c
index 7c80df7..17f3018 100644
--- a/aom_dsp/x86/highbd_variance_sse2.c
+++ b/aom_dsp/x86/highbd_variance_sse2.c
@@ -192,7 +192,6 @@
 VAR_FN(16, 8, 8, 7);
 VAR_FN(8, 16, 8, 7);
 VAR_FN(8, 8, 8, 6);
-VAR_FN(16, 4, 16, 6);
 VAR_FN(8, 32, 8, 8);
 VAR_FN(32, 8, 8, 8);
 VAR_FN(16, 64, 16, 10);
@@ -303,19 +302,19 @@
       sse += sse2;                                                             \
       if (w > wf) {                                                            \
         se2 = aom_highbd_sub_pixel_variance##wf##xh_##opt(                     \
-            src + 16, src_stride, x_offset, y_offset, dst + 16, dst_stride, h, \
+            src + wf, src_stride, x_offset, y_offset, dst + wf, dst_stride, h, \
             &sse2, NULL, NULL);                                                \
         se += se2;                                                             \
         sse += sse2;                                                           \
         if (w > wf * 2) {                                                      \
           se2 = aom_highbd_sub_pixel_variance##wf##xh_##opt(                   \
-              src + 32, src_stride, x_offset, y_offset, dst + 32, dst_stride,  \
-              h, &sse2, NULL, NULL);                                           \
+              src + 2 * wf, src_stride, x_offset, y_offset, dst + 2 * wf,      \
+              dst_stride, h, &sse2, NULL, NULL);                               \
           se += se2;                                                           \
           sse += sse2;                                                         \
           se2 = aom_highbd_sub_pixel_variance##wf##xh_##opt(                   \
-              src + 48, src_stride, x_offset, y_offset, dst + 48, dst_stride,  \
-              h, &sse2, NULL, NULL);                                           \
+              src + 3 * wf, src_stride, x_offset, y_offset, dst + 3 * wf,      \
+              dst_stride, h, &sse2, NULL, NULL);                               \
           se += se2;                                                           \
           sse += sse2;                                                         \
         }                                                                      \
@@ -346,19 +345,19 @@
       if (w > wf) {                                                            \
         uint32_t sse2;                                                         \
         se2 = aom_highbd_sub_pixel_variance##wf##xh_##opt(                     \
-            src + 16, src_stride, x_offset, y_offset, dst + 16, dst_stride, h, \
+            src + wf, src_stride, x_offset, y_offset, dst + wf, dst_stride, h, \
             &sse2, NULL, NULL);                                                \
         se += se2;                                                             \
         long_sse += sse2;                                                      \
         if (w > wf * 2) {                                                      \
           se2 = aom_highbd_sub_pixel_variance##wf##xh_##opt(                   \
-              src + 32, src_stride, x_offset, y_offset, dst + 32, dst_stride,  \
-              h, &sse2, NULL, NULL);                                           \
+              src + 2 * wf, src_stride, x_offset, y_offset, dst + 2 * wf,      \
+              dst_stride, h, &sse2, NULL, NULL);                               \
           se += se2;                                                           \
           long_sse += sse2;                                                    \
           se2 = aom_highbd_sub_pixel_variance##wf##xh_##opt(                   \
-              src + 48, src_stride, x_offset, y_offset, dst + 48, dst_stride,  \
-              h, &sse2, NULL, NULL);                                           \
+              src + 3 * wf, src_stride, x_offset, y_offset, dst + 3 * wf,      \
+              dst_stride, h, &sse2, NULL, NULL);                               \
           se += se2;                                                           \
           long_sse += sse2;                                                    \
         }                                                                      \
@@ -397,19 +396,19 @@
         long_sse += sse2;                                                      \
         if (w > wf) {                                                          \
           se2 = aom_highbd_sub_pixel_variance##wf##xh_##opt(                   \
-              src_tmp + 16, src_stride, x_offset, y_offset, dst_tmp + 16,      \
+              src_tmp + wf, src_stride, x_offset, y_offset, dst_tmp + wf,      \
               dst_stride, height, &sse2, NULL, NULL);                          \
           se += se2;                                                           \
           long_sse += sse2;                                                    \
           if (w > wf * 2) {                                                    \
             se2 = aom_highbd_sub_pixel_variance##wf##xh_##opt(                 \
-                src_tmp + 32, src_stride, x_offset, y_offset, dst_tmp + 32,    \
-                dst_stride, height, &sse2, NULL, NULL);                        \
+                src_tmp + 2 * wf, src_stride, x_offset, y_offset,              \
+                dst_tmp + 2 * wf, dst_stride, height, &sse2, NULL, NULL);      \
             se += se2;                                                         \
             long_sse += sse2;                                                  \
             se2 = aom_highbd_sub_pixel_variance##wf##xh_##opt(                 \
-                src_tmp + 48, src_stride, x_offset, y_offset, dst_tmp + 48,    \
-                dst_stride, height, &sse2, NULL, NULL);                        \
+                src_tmp + 3 * wf, src_stride, x_offset, y_offset,              \
+                dst_tmp + 3 * wf, dst_stride, height, &sse2, NULL, NULL);      \
             se += se2;                                                         \
             long_sse += sse2;                                                  \
           }                                                                    \
@@ -479,19 +478,19 @@
     if (w > wf) {                                                              \
       uint32_t sse2;                                                           \
       int se2 = aom_highbd_sub_pixel_avg_variance##wf##xh_##opt(               \
-          src + 16, src_stride, x_offset, y_offset, dst + 16, dst_stride,      \
-          sec + 16, w, h, &sse2, NULL, NULL);                                  \
+          src + wf, src_stride, x_offset, y_offset, dst + wf, dst_stride,      \
+          sec + wf, w, h, &sse2, NULL, NULL);                                  \
       se += se2;                                                               \
       sse += sse2;                                                             \
       if (w > wf * 2) {                                                        \
         se2 = aom_highbd_sub_pixel_avg_variance##wf##xh_##opt(                 \
-            src + 32, src_stride, x_offset, y_offset, dst + 32, dst_stride,    \
-            sec + 32, w, h, &sse2, NULL, NULL);                                \
+            src + 2 * wf, src_stride, x_offset, y_offset, dst + 2 * wf,        \
+            dst_stride, sec + 2 * wf, w, h, &sse2, NULL, NULL);                \
         se += se2;                                                             \
         sse += sse2;                                                           \
         se2 = aom_highbd_sub_pixel_avg_variance##wf##xh_##opt(                 \
-            src + 48, src_stride, x_offset, y_offset, dst + 48, dst_stride,    \
-            sec + 48, w, h, &sse2, NULL, NULL);                                \
+            src + 3 * wf, src_stride, x_offset, y_offset, dst + 3 * wf,        \
+            dst_stride, sec + 3 * wf, w, h, &sse2, NULL, NULL);                \
         se += se2;                                                             \
         sse += sse2;                                                           \
       }                                                                        \
@@ -515,19 +514,19 @@
     if (w > wf) {                                                              \
       uint32_t sse2;                                                           \
       int se2 = aom_highbd_sub_pixel_avg_variance##wf##xh_##opt(               \
-          src + 16, src_stride, x_offset, y_offset, dst + 16, dst_stride,      \
-          sec + 16, w, h, &sse2, NULL, NULL);                                  \
+          src + wf, src_stride, x_offset, y_offset, dst + wf, dst_stride,      \
+          sec + wf, w, h, &sse2, NULL, NULL);                                  \
       se += se2;                                                               \
       sse += sse2;                                                             \
       if (w > wf * 2) {                                                        \
         se2 = aom_highbd_sub_pixel_avg_variance##wf##xh_##opt(                 \
-            src + 32, src_stride, x_offset, y_offset, dst + 32, dst_stride,    \
-            sec + 32, w, h, &sse2, NULL, NULL);                                \
+            src + 2 * wf, src_stride, x_offset, y_offset, dst + 2 * wf,        \
+            dst_stride, sec + 2 * wf, w, h, &sse2, NULL, NULL);                \
         se += se2;                                                             \
         sse += sse2;                                                           \
         se2 = aom_highbd_sub_pixel_avg_variance##wf##xh_##opt(                 \
-            src + 48, src_stride, x_offset, y_offset, dst + 48, dst_stride,    \
-            sec + 48, w, h, &sse2, NULL, NULL);                                \
+            src + 3 * wf, src_stride, x_offset, y_offset, dst + 3 * wf,        \
+            dst_stride, sec + 3 * wf, w, h, &sse2, NULL, NULL);                \
         se += se2;                                                             \
         sse += sse2;                                                           \
       }                                                                        \
@@ -562,22 +561,22 @@
       long_sse += sse2;                                                        \
       if (w > wf) {                                                            \
         se2 = aom_highbd_sub_pixel_avg_variance##wf##xh_##opt(                 \
-            src + 16 + (start_row * src_stride), src_stride, x_offset,         \
-            y_offset, dst + 16 + (start_row * dst_stride), dst_stride,         \
-            sec + 16 + (start_row * w), w, height, &sse2, NULL, NULL);         \
+            src + wf + (start_row * src_stride), src_stride, x_offset,         \
+            y_offset, dst + wf + (start_row * dst_stride), dst_stride,         \
+            sec + wf + (start_row * w), w, height, &sse2, NULL, NULL);         \
         se += se2;                                                             \
         long_sse += sse2;                                                      \
         if (w > wf * 2) {                                                      \
           se2 = aom_highbd_sub_pixel_avg_variance##wf##xh_##opt(               \
-              src + 32 + (start_row * src_stride), src_stride, x_offset,       \
-              y_offset, dst + 32 + (start_row * dst_stride), dst_stride,       \
-              sec + 32 + (start_row * w), w, height, &sse2, NULL, NULL);       \
+              src + 2 * wf + (start_row * src_stride), src_stride, x_offset,   \
+              y_offset, dst + 2 * wf + (start_row * dst_stride), dst_stride,   \
+              sec + 2 * wf + (start_row * w), w, height, &sse2, NULL, NULL);   \
           se += se2;                                                           \
           long_sse += sse2;                                                    \
           se2 = aom_highbd_sub_pixel_avg_variance##wf##xh_##opt(               \
-              src + 48 + (start_row * src_stride), src_stride, x_offset,       \
-              y_offset, dst + 48 + (start_row * dst_stride), dst_stride,       \
-              sec + 48 + (start_row * w), w, height, &sse2, NULL, NULL);       \
+              src + 3 * wf + (start_row * src_stride), src_stride, x_offset,   \
+              y_offset, dst + 3 * wf + (start_row * dst_stride), dst_stride,   \
+              sec + 3 * wf + (start_row * w), w, height, &sse2, NULL, NULL);   \
           se += se2;                                                           \
           long_sse += sse2;                                                    \
         }                                                                      \
diff --git a/test/variance_test.cc b/test/variance_test.cc
index 1942de0..a679808 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -1114,7 +1114,14 @@
                       VarianceParams(3, 3, &aom_variance8x8_c),
                       VarianceParams(3, 2, &aom_variance8x4_c),
                       VarianceParams(2, 3, &aom_variance4x8_c),
-                      VarianceParams(2, 2, &aom_variance4x4_c)));
+                      VarianceParams(2, 2, &aom_variance4x4_c),
+
+                      VarianceParams(6, 4, &aom_variance64x16_c),
+                      VarianceParams(4, 6, &aom_variance16x64_c),
+                      VarianceParams(5, 3, &aom_variance32x8_c),
+                      VarianceParams(3, 5, &aom_variance8x32_c),
+                      VarianceParams(4, 2, &aom_variance16x4_c),
+                      VarianceParams(2, 4, &aom_variance4x16_c)));
 
 typedef TestParams<SubpixVarMxNFunc> SubpelVarianceParams;
 INSTANTIATE_TEST_CASE_P(
@@ -1135,7 +1142,14 @@
         SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_c, 0),
         SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_c, 0),
         SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_c, 0),
-        SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_c, 0)));
+        SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_c, 0),
+
+        SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_c, 0),
+        SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_c, 0),
+        SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_c, 0),
+        SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_c, 0),
+        SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_c, 0),
+        SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_c, 0)));
 
 typedef TestParams<SubpixAvgVarMxNFunc> SubpelAvgVarianceParams;
 INSTANTIATE_TEST_CASE_P(
@@ -1156,7 +1170,14 @@
         SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_c, 0),
         SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_c, 0),
         SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_c, 0),
-        SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_c, 0)));
+        SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_c, 0),
+
+        SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_c, 0),
+        SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_c, 0),
+        SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_c, 0),
+        SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_c, 0),
+        SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_c, 0),
+        SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_c, 0)));
 
 typedef TestParams<DistWtdSubpixAvgVarMxNFunc> DistWtdSubpelAvgVarianceParams;
 INSTANTIATE_TEST_CASE_P(
@@ -1186,7 +1207,21 @@
                       DistWtdSubpelAvgVarianceParams(
                           2, 3, &aom_dist_wtd_sub_pixel_avg_variance4x8_c, 0),
                       DistWtdSubpelAvgVarianceParams(
-                          2, 2, &aom_dist_wtd_sub_pixel_avg_variance4x4_c, 0)));
+                          2, 2, &aom_dist_wtd_sub_pixel_avg_variance4x4_c, 0),
+
+                      DistWtdSubpelAvgVarianceParams(
+                          6, 4, &aom_dist_wtd_sub_pixel_avg_variance64x16_c, 0),
+                      DistWtdSubpelAvgVarianceParams(
+                          4, 6, &aom_dist_wtd_sub_pixel_avg_variance16x64_c, 0),
+                      DistWtdSubpelAvgVarianceParams(
+                          5, 3, &aom_dist_wtd_sub_pixel_avg_variance32x8_c, 0),
+                      DistWtdSubpelAvgVarianceParams(
+                          3, 5, &aom_dist_wtd_sub_pixel_avg_variance8x32_c, 0),
+                      DistWtdSubpelAvgVarianceParams(
+                          4, 2, &aom_dist_wtd_sub_pixel_avg_variance16x4_c, 0),
+                      DistWtdSubpelAvgVarianceParams(
+                          2, 4, &aom_dist_wtd_sub_pixel_avg_variance4x16_c,
+                          0)));
 
 INSTANTIATE_TEST_CASE_P(
     C, AvxObmcSubpelVarianceTest,
@@ -1207,7 +1242,14 @@
         ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_c, 0),
         ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_c, 0),
         ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_c, 0),
-        ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_c, 0)));
+        ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_c, 0),
+
+        ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_c, 0),
+        ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_c, 0),
+        ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_c, 0),
+        ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_c, 0),
+        ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_c, 0),
+        ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_c, 0)));
 
 typedef MainTestClass<VarianceMxNFunc> AvxHBDMseTest;
 typedef MainTestClass<VarianceMxNFunc> AvxHBDVarianceTest;
@@ -1292,7 +1334,26 @@
   VarianceParams(3, 3, &aom_highbd_8_variance8x8_c, 8),
   VarianceParams(3, 2, &aom_highbd_8_variance8x4_c, 8),
   VarianceParams(2, 3, &aom_highbd_8_variance4x8_c, 8),
-  VarianceParams(2, 2, &aom_highbd_8_variance4x4_c, 8)
+  VarianceParams(2, 2, &aom_highbd_8_variance4x4_c, 8),
+
+  VarianceParams(6, 4, &aom_highbd_12_variance64x16_c, 12),
+  VarianceParams(4, 6, &aom_highbd_12_variance16x64_c, 12),
+  VarianceParams(5, 3, &aom_highbd_12_variance32x8_c, 12),
+  VarianceParams(3, 5, &aom_highbd_12_variance8x32_c, 12),
+  VarianceParams(4, 2, &aom_highbd_12_variance16x4_c, 12),
+  VarianceParams(2, 4, &aom_highbd_12_variance4x16_c, 12),
+  VarianceParams(6, 4, &aom_highbd_10_variance64x16_c, 10),
+  VarianceParams(4, 6, &aom_highbd_10_variance16x64_c, 10),
+  VarianceParams(5, 3, &aom_highbd_10_variance32x8_c, 10),
+  VarianceParams(3, 5, &aom_highbd_10_variance8x32_c, 10),
+  VarianceParams(4, 2, &aom_highbd_10_variance16x4_c, 10),
+  VarianceParams(2, 4, &aom_highbd_10_variance4x16_c, 10),
+  VarianceParams(6, 4, &aom_highbd_8_variance64x16_c, 8),
+  VarianceParams(4, 6, &aom_highbd_8_variance16x64_c, 8),
+  VarianceParams(5, 3, &aom_highbd_8_variance32x8_c, 8),
+  VarianceParams(3, 5, &aom_highbd_8_variance8x32_c, 8),
+  VarianceParams(4, 2, &aom_highbd_8_variance16x4_c, 8),
+  VarianceParams(2, 4, &aom_highbd_8_variance4x16_c, 8),
 };
 INSTANTIATE_TEST_CASE_P(C, AvxHBDVarianceTest,
                         ::testing::ValuesIn(kArrayHBDVariance_c));
@@ -1355,6 +1416,25 @@
   SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_c, 12),
   SubpelVarianceParams(2, 3, &aom_highbd_12_sub_pixel_variance4x8_c, 12),
   SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_c, 12),
+
+  SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_c, 8),
+  SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_c, 8),
+  SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_c, 8),
+  SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_c, 8),
+  SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_c, 8),
+  SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_c, 8),
+  SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_c, 10),
+  SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_c, 10),
+  SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_c, 10),
+  SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_c, 10),
+  SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_c, 10),
+  SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_c, 10),
+  SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_c, 12),
+  SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_c, 12),
+  SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_c, 12),
+  SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_c, 12),
+  SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_c, 12),
+  SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_c, 12),
 };
 INSTANTIATE_TEST_CASE_P(C, AvxHBDSubpelVarianceTest,
                         ::testing::ValuesIn(kArrayHBDSubpelVariance_c));
@@ -1434,7 +1514,38 @@
   SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_c, 12),
   SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_c, 12),
   SubpelAvgVarianceParams(2, 3, &aom_highbd_12_sub_pixel_avg_variance4x8_c, 12),
-  SubpelAvgVarianceParams(2, 2, &aom_highbd_12_sub_pixel_avg_variance4x4_c, 12)
+  SubpelAvgVarianceParams(2, 2, &aom_highbd_12_sub_pixel_avg_variance4x4_c, 12),
+
+  SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_c, 8),
+  SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_c, 8),
+  SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_c, 8),
+  SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_c, 8),
+  SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_c, 8),
+  SubpelAvgVarianceParams(2, 4, &aom_highbd_8_sub_pixel_avg_variance4x16_c, 8),
+  SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_c,
+                          10),
+  SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_c,
+                          10),
+  SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_c,
+                          10),
+  SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_c,
+                          10),
+  SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_c,
+                          10),
+  SubpelAvgVarianceParams(2, 4, &aom_highbd_10_sub_pixel_avg_variance4x16_c,
+                          10),
+  SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_c,
+                          12),
+  SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_c,
+                          12),
+  SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_c,
+                          12),
+  SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_c,
+                          12),
+  SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_c,
+                          12),
+  SubpelAvgVarianceParams(2, 4, &aom_highbd_12_sub_pixel_avg_variance4x16_c,
+                          12),
 };
 INSTANTIATE_TEST_CASE_P(C, AvxHBDSubpelAvgVarianceTest,
                         ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_c));
@@ -1522,7 +1633,38 @@
   ObmcSubpelVarianceParams(2, 3, &aom_highbd_12_obmc_sub_pixel_variance4x8_c,
                            12),
   ObmcSubpelVarianceParams(2, 2, &aom_highbd_12_obmc_sub_pixel_variance4x4_c,
-                           12)
+                           12),
+
+  ObmcSubpelVarianceParams(6, 4, &aom_highbd_obmc_sub_pixel_variance64x16_c, 8),
+  ObmcSubpelVarianceParams(4, 6, &aom_highbd_obmc_sub_pixel_variance16x64_c, 8),
+  ObmcSubpelVarianceParams(5, 3, &aom_highbd_obmc_sub_pixel_variance32x8_c, 8),
+  ObmcSubpelVarianceParams(3, 5, &aom_highbd_obmc_sub_pixel_variance8x32_c, 8),
+  ObmcSubpelVarianceParams(4, 2, &aom_highbd_obmc_sub_pixel_variance16x4_c, 8),
+  ObmcSubpelVarianceParams(2, 4, &aom_highbd_obmc_sub_pixel_variance4x16_c, 8),
+  ObmcSubpelVarianceParams(6, 4, &aom_highbd_10_obmc_sub_pixel_variance64x16_c,
+                           10),
+  ObmcSubpelVarianceParams(4, 6, &aom_highbd_10_obmc_sub_pixel_variance16x64_c,
+                           10),
+  ObmcSubpelVarianceParams(5, 3, &aom_highbd_10_obmc_sub_pixel_variance32x8_c,
+                           10),
+  ObmcSubpelVarianceParams(3, 5, &aom_highbd_10_obmc_sub_pixel_variance8x32_c,
+                           10),
+  ObmcSubpelVarianceParams(4, 2, &aom_highbd_10_obmc_sub_pixel_variance16x4_c,
+                           10),
+  ObmcSubpelVarianceParams(2, 4, &aom_highbd_10_obmc_sub_pixel_variance4x16_c,
+                           10),
+  ObmcSubpelVarianceParams(6, 4, &aom_highbd_12_obmc_sub_pixel_variance64x16_c,
+                           12),
+  ObmcSubpelVarianceParams(4, 6, &aom_highbd_12_obmc_sub_pixel_variance16x64_c,
+                           12),
+  ObmcSubpelVarianceParams(5, 3, &aom_highbd_12_obmc_sub_pixel_variance32x8_c,
+                           12),
+  ObmcSubpelVarianceParams(3, 5, &aom_highbd_12_obmc_sub_pixel_variance8x32_c,
+                           12),
+  ObmcSubpelVarianceParams(4, 2, &aom_highbd_12_obmc_sub_pixel_variance16x4_c,
+                           12),
+  ObmcSubpelVarianceParams(2, 4, &aom_highbd_12_obmc_sub_pixel_variance4x16_c,
+                           12),
 };
 INSTANTIATE_TEST_CASE_P(C, AvxHBDObmcSubpelVarianceTest,
                         ::testing::ValuesIn(kArrayHBDObmcSubpelVariance_c));
@@ -1580,7 +1722,14 @@
         SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_sse2, 0),
         SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_sse2, 0),
         SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_sse2, 0),
-        SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_sse2, 0)));
+        SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_sse2, 0),
+
+        SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_sse2, 0),
+        SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_sse2, 0),
+        SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_sse2, 0),
+        SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_sse2, 0),
+        SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_sse2, 0),
+        SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_sse2, 0)));
 
 INSTANTIATE_TEST_CASE_P(
     SSE2, AvxSubpelAvgVarianceTest,
@@ -1603,7 +1752,15 @@
         SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_sse2, 0),
         SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_sse2, 0),
         SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_sse2, 0),
-        SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_sse2, 0)));
+        SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_sse2, 0),
+
+        SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_sse2, 0),
+        SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_sse2, 0),
+        SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_sse2, 0),
+        SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_sse2, 0),
+        SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_sse2, 0),
+        SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_sse2,
+                                0)));
 
 #if HAVE_SSE4_1
 INSTANTIATE_TEST_CASE_P(
@@ -1686,7 +1843,26 @@
   VarianceParams(4, 4, &aom_highbd_8_variance16x16_sse2, 8),
   VarianceParams(4, 3, &aom_highbd_8_variance16x8_sse2, 8),
   VarianceParams(3, 4, &aom_highbd_8_variance8x16_sse2, 8),
-  VarianceParams(3, 3, &aom_highbd_8_variance8x8_sse2, 8)
+  VarianceParams(3, 3, &aom_highbd_8_variance8x8_sse2, 8),
+
+  VarianceParams(6, 4, &aom_highbd_12_variance64x16_sse2, 12),
+  VarianceParams(4, 6, &aom_highbd_12_variance16x64_sse2, 12),
+  VarianceParams(5, 3, &aom_highbd_12_variance32x8_sse2, 12),
+  VarianceParams(3, 5, &aom_highbd_12_variance8x32_sse2, 12),
+  // VarianceParams(4, 2, &aom_highbd_12_variance16x4_sse2, 12),
+  // VarianceParams(2, 4, &aom_highbd_12_variance4x16_sse2, 12),
+  VarianceParams(6, 4, &aom_highbd_10_variance64x16_sse2, 10),
+  VarianceParams(4, 6, &aom_highbd_10_variance16x64_sse2, 10),
+  VarianceParams(5, 3, &aom_highbd_10_variance32x8_sse2, 10),
+  VarianceParams(3, 5, &aom_highbd_10_variance8x32_sse2, 10),
+  // VarianceParams(4, 2, &aom_highbd_10_variance16x4_sse2, 10),
+  // VarianceParams(2, 4, &aom_highbd_10_variance4x16_sse2, 10),
+  VarianceParams(6, 4, &aom_highbd_8_variance64x16_sse2, 8),
+  VarianceParams(4, 6, &aom_highbd_8_variance16x64_sse2, 8),
+  VarianceParams(5, 3, &aom_highbd_8_variance32x8_sse2, 8),
+  VarianceParams(3, 5, &aom_highbd_8_variance8x32_sse2, 8),
+  // VarianceParams(4, 2, &aom_highbd_8_variance16x4_sse2, 8),
+  // VarianceParams(2, 4, &aom_highbd_8_variance4x16_sse2, 8),
 };
 INSTANTIATE_TEST_CASE_P(SSE2, AvxHBDVarianceTest,
                         ::testing::ValuesIn(kArrayHBDVariance_sse2));
@@ -1706,7 +1882,7 @@
   VarianceParams(4, 4, &aom_highbd_10_variance16x16_avx2, 10),
   VarianceParams(4, 3, &aom_highbd_10_variance16x8_avx2, 10),
   VarianceParams(3, 4, &aom_highbd_10_variance8x16_avx2, 10),
-  VarianceParams(3, 3, &aom_highbd_10_variance8x8_avx2, 10)
+  VarianceParams(3, 3, &aom_highbd_10_variance8x8_avx2, 10),
 };
 
 INSTANTIATE_TEST_CASE_P(AVX2, AvxHBDVarianceTest,
@@ -1755,7 +1931,26 @@
   SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_sse2, 8),
   SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_sse2, 8),
   SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_sse2, 8),
-  SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_sse2, 8)
+  SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_sse2, 8),
+
+  SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_sse2, 12),
+  SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_sse2, 12),
+  SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_sse2, 12),
+  SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_sse2, 12),
+  SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_sse2, 12),
+  // SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_sse2, 12),
+  SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_sse2, 10),
+  SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_sse2, 10),
+  SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_sse2, 10),
+  SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_sse2, 10),
+  SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_sse2, 10),
+  // SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_sse2, 10),
+  SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_sse2, 8),
+  SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_sse2, 8),
+  SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_sse2, 8),
+  SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_sse2, 8),
+  SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_sse2, 8),
+  // SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_sse2, 8),
 };
 INSTANTIATE_TEST_CASE_P(SSE2, AvxHBDSubpelVarianceTest,
                         ::testing::ValuesIn(kArrayHBDSubpelVariance_sse2));
@@ -1825,7 +2020,45 @@
                           8),
   SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_sse2,
                           8),
-  SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_sse2, 8)
+  SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_sse2,
+                          8),
+
+  SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_sse2,
+                          12),
+  SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_sse2,
+                          12),
+  SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_sse2,
+                          12),
+  SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_sse2,
+                          12),
+  SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_sse2,
+                          12),
+  // SubpelAvgVarianceParams(2, 4,
+  // &aom_highbd_12_sub_pixel_avg_variance4x16_sse2, 12),
+  SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_sse2,
+                          10),
+  SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_sse2,
+                          10),
+  SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_sse2,
+                          10),
+  SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_sse2,
+                          10),
+  SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_sse2,
+                          10),
+  // SubpelAvgVarianceParams(2, 4,
+  // &aom_highbd_10_sub_pixel_avg_variance4x16_sse2, 10),
+  SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_sse2,
+                          8),
+  SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_sse2,
+                          8),
+  SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_sse2,
+                          8),
+  SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_sse2,
+                          8),
+  SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_sse2,
+                          8),
+  // SubpelAvgVarianceParams(2, 4,
+  // &aom_highbd_8_sub_pixel_avg_variance4x16_sse2, 8),
 };
 
 INSTANTIATE_TEST_CASE_P(SSE2, AvxHBDSubpelAvgVarianceTest,
@@ -1851,7 +2084,14 @@
         SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_ssse3, 0),
         SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_ssse3, 0),
         SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_ssse3, 0),
-        SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_ssse3, 0)));
+        SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_ssse3, 0),
+
+        SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_ssse3, 0),
+        SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_ssse3, 0),
+        SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_ssse3, 0),
+        SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_ssse3, 0),
+        SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_ssse3, 0),
+        SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_ssse3, 0)));
 
 INSTANTIATE_TEST_CASE_P(
     SSSE3, AvxSubpelAvgVarianceTest,
@@ -1881,13 +2121,28 @@
         SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_ssse3, 0),
         SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_ssse3, 0),
         SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_ssse3, 0),
-        SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_ssse3,
+        SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_ssse3, 0),
+
+        SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_ssse3,
+                                0),
+        SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_ssse3,
+                                0),
+        SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_ssse3, 0),
+        SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_ssse3, 0),
+        SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_ssse3, 0),
+        SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_ssse3,
                                 0)));
 
 INSTANTIATE_TEST_CASE_P(
     SSSE3, AvxDistWtdSubpelAvgVarianceTest,
     ::testing::Values(
         DistWtdSubpelAvgVarianceParams(
+            7, 7, &aom_dist_wtd_sub_pixel_avg_variance128x128_ssse3, 0),
+        DistWtdSubpelAvgVarianceParams(
+            7, 6, &aom_dist_wtd_sub_pixel_avg_variance128x64_ssse3, 0),
+        DistWtdSubpelAvgVarianceParams(
+            6, 7, &aom_dist_wtd_sub_pixel_avg_variance64x128_ssse3, 0),
+        DistWtdSubpelAvgVarianceParams(
             6, 6, &aom_dist_wtd_sub_pixel_avg_variance64x64_ssse3, 0),
         DistWtdSubpelAvgVarianceParams(
             6, 5, &aom_dist_wtd_sub_pixel_avg_variance64x32_ssse3, 0),
@@ -1912,7 +2167,20 @@
         DistWtdSubpelAvgVarianceParams(
             2, 3, &aom_dist_wtd_sub_pixel_avg_variance4x8_ssse3, 0),
         DistWtdSubpelAvgVarianceParams(
-            2, 2, &aom_dist_wtd_sub_pixel_avg_variance4x4_ssse3, 0)));
+            2, 2, &aom_dist_wtd_sub_pixel_avg_variance4x4_ssse3, 0),
+
+        DistWtdSubpelAvgVarianceParams(
+            6, 4, &aom_dist_wtd_sub_pixel_avg_variance64x16_ssse3, 0),
+        DistWtdSubpelAvgVarianceParams(
+            4, 6, &aom_dist_wtd_sub_pixel_avg_variance16x64_ssse3, 0),
+        DistWtdSubpelAvgVarianceParams(
+            5, 3, &aom_dist_wtd_sub_pixel_avg_variance32x8_ssse3, 0),
+        DistWtdSubpelAvgVarianceParams(
+            3, 5, &aom_dist_wtd_sub_pixel_avg_variance8x32_ssse3, 0),
+        DistWtdSubpelAvgVarianceParams(
+            4, 2, &aom_dist_wtd_sub_pixel_avg_variance16x4_ssse3, 0),
+        DistWtdSubpelAvgVarianceParams(
+            2, 4, &aom_dist_wtd_sub_pixel_avg_variance4x16_ssse3, 0)));
 #endif  // HAVE_SSSE3
 
 #if HAVE_SSE4_1
@@ -1950,6 +2218,19 @@
         ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_sse4_1,
                                  0),
         ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_sse4_1,
+                                 0),
+
+        ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_sse4_1,
+                                 0),
+        ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_sse4_1,
+                                 0),
+        ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_sse4_1,
+                                 0),
+        ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_sse4_1,
+                                 0),
+        ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_sse4_1,
+                                 0),
+        ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_sse4_1,
                                  0)));
 #endif  // HAVE_SSE4_1