Opt blend_a64_mask
1. optimize aom_lowbd_blend_a64_d16_mask_sse4_1
2. add aom_lowbd_blend_a64_d16_mask_avx2
Speed up about 0.5% without rd change
test sequence: BasketballDrill_832x480_50.y4m
test command line:./aomenc --cpu-used=1 --psnr -D \
-q --end-usage=vbr --target-bitrate=800 --limit=20 \
BasketballDrill_832x480_50.y4m -otest.webm
Change-Id: I3f909105fcdfe7921eff2d7c47ecdc09a2179253
diff --git a/test/blend_a64_mask_test.cc b/test/blend_a64_mask_test.cc
index c9c6795..4d4f070 100644
--- a/test/blend_a64_mask_test.cc
+++ b/test/blend_a64_mask_test.cc
@@ -270,6 +270,13 @@
aom_lowbd_blend_a64_d16_mask_sse4_1)));
#endif // HAVE_SSE4_1
+#if HAVE_AVX2
+INSTANTIATE_TEST_CASE_P(
+ AVX2, BlendA64MaskTest8B_d16,
+ ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c,
+ aom_lowbd_blend_a64_d16_mask_avx2)));
+#endif // HAVE_AVX2
+
#if HAVE_NEON
INSTANTIATE_TEST_CASE_P(
NEON, BlendA64MaskTest8B_d16,