Add Neon implementations of OBMC sub pixel variance functions
Add a Neon implementation of aom_obmc_sub_pixel_variance<w>x<h> for
all block sizes - where previously only scalar C implementations
existed.
Also add test coverage for the new Neon functions.
Change-Id: Iba428b1edadcb0ac3b4b2549267c4b47259c2158
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index dae52e6..e555e1e 100755
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -1542,7 +1542,7 @@
add_proto qw/unsigned int/, "aom_obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
add_proto qw/unsigned int/, "aom_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
specialize "aom_obmc_variance${w}x${h}", qw/sse4_1 avx2 neon/;
- specialize "aom_obmc_sub_pixel_variance${w}x${h}", q/sse4_1/;
+ specialize "aom_obmc_sub_pixel_variance${w}x${h}", qw/sse4_1 neon/;
}
if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
diff --git a/aom_dsp/arm/subpel_variance_neon.c b/aom_dsp/arm/subpel_variance_neon.c
index a058860..7946439 100644
--- a/aom_dsp/arm/subpel_variance_neon.c
+++ b/aom_dsp/arm/subpel_variance_neon.c
@@ -549,3 +549,48 @@
#undef SUBPEL_AVG_VARIANCE_WXH_NEON
#undef SPECIALIZED_SUBPEL_AVG_VARIANCE_WXH_NEON
+
+#if !CONFIG_REALTIME_ONLY
+
+#define OBMC_SUBPEL_VARIANCE_WXH_NEON(w, h, padding) \
+ unsigned int aom_obmc_sub_pixel_variance##w##x##h##_neon( \
+ const uint8_t *pre, int pre_stride, int xoffset, int yoffset, \
+ const int32_t *wsrc, const int32_t *mask, unsigned int *sse) { \
+ uint8_t tmp0[w * (h + padding)]; \
+ uint8_t tmp1[w * h]; \
+ var_filter_block2d_bil_w##w(pre, tmp0, pre_stride, 1, h + padding, \
+ xoffset); \
+ var_filter_block2d_bil_w##w(tmp0, tmp1, w, w, h, yoffset); \
+ return aom_obmc_variance##w##x##h(tmp1, w, wsrc, mask, sse); \
+ }
+
+OBMC_SUBPEL_VARIANCE_WXH_NEON(4, 4, 2)
+OBMC_SUBPEL_VARIANCE_WXH_NEON(4, 8, 2)
+OBMC_SUBPEL_VARIANCE_WXH_NEON(4, 16, 2)
+
+OBMC_SUBPEL_VARIANCE_WXH_NEON(8, 4, 1)
+OBMC_SUBPEL_VARIANCE_WXH_NEON(8, 8, 1)
+OBMC_SUBPEL_VARIANCE_WXH_NEON(8, 16, 1)
+OBMC_SUBPEL_VARIANCE_WXH_NEON(8, 32, 1)
+
+OBMC_SUBPEL_VARIANCE_WXH_NEON(16, 4, 1)
+OBMC_SUBPEL_VARIANCE_WXH_NEON(16, 8, 1)
+OBMC_SUBPEL_VARIANCE_WXH_NEON(16, 16, 1)
+OBMC_SUBPEL_VARIANCE_WXH_NEON(16, 32, 1)
+OBMC_SUBPEL_VARIANCE_WXH_NEON(16, 64, 1)
+
+OBMC_SUBPEL_VARIANCE_WXH_NEON(32, 8, 1)
+OBMC_SUBPEL_VARIANCE_WXH_NEON(32, 16, 1)
+OBMC_SUBPEL_VARIANCE_WXH_NEON(32, 32, 1)
+OBMC_SUBPEL_VARIANCE_WXH_NEON(32, 64, 1)
+
+OBMC_SUBPEL_VARIANCE_WXH_NEON(64, 16, 1)
+OBMC_SUBPEL_VARIANCE_WXH_NEON(64, 32, 1)
+OBMC_SUBPEL_VARIANCE_WXH_NEON(64, 64, 1)
+OBMC_SUBPEL_VARIANCE_WXH_NEON(64, 128, 1)
+
+OBMC_SUBPEL_VARIANCE_WXH_NEON(128, 64, 1)
+OBMC_SUBPEL_VARIANCE_WXH_NEON(128, 128, 1)
+
+#undef OBMC_SUBPEL_VARIANCE_WXH_NEON
+#endif // !CONFIG_REALTIME_ONLY
diff --git a/test/variance_test.cc b/test/variance_test.cc
index ce5afec..2c272f0 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -3310,6 +3310,35 @@
INSTANTIATE_TEST_SUITE_P(NEON, AvxSubpelAvgVarianceTest,
::testing::ValuesIn(kArraySubpelAvgVariance_neon));
+#if !CONFIG_REALTIME_ONLY
+const ObmcSubpelVarianceParams kArrayObmcSubpelVariance_neon[] = {
+ ObmcSubpelVarianceParams(7, 7, &aom_obmc_sub_pixel_variance128x128_neon, 0),
+ ObmcSubpelVarianceParams(7, 6, &aom_obmc_sub_pixel_variance128x64_neon, 0),
+ ObmcSubpelVarianceParams(6, 7, &aom_obmc_sub_pixel_variance64x128_neon, 0),
+ ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_neon, 0),
+ ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_neon, 0),
+ ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_neon, 0),
+ ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_neon, 0),
+ ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_neon, 0),
+ ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_neon, 0),
+ ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_neon, 0),
+ ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_neon, 0),
+ ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_neon, 0),
+ ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_neon, 0),
+ ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_neon, 0),
+ ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_neon, 0),
+ ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_neon, 0),
+ ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_neon, 0),
+ ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_neon, 0),
+ ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_neon, 0),
+ ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_neon, 0),
+ ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_neon, 0),
+ ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_neon, 0),
+};
+INSTANTIATE_TEST_SUITE_P(NEON, AvxObmcSubpelVarianceTest,
+ ::testing::ValuesIn(kArrayObmcSubpelVariance_neon));
+#endif
+
const GetSseSumParams kArrayGetSseSum8x8Quad_neon[] = {
GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_neon, 0),
GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_neon, 0),