Opt blend_a64_mask
1. optimize aom_lowbd_blend_a64_d16_mask_sse4_1
2. add aom_lowbd_blend_a64_d16_mask_avx2
Speed up about 0.5% without rd change
test sequence: BasketballDrill_832x480_50.y4m
test command line:./aomenc --cpu-used=1 --psnr -D \
-q --end-usage=vbr --target-bitrate=800 --limit=20 \
BasketballDrill_832x480_50.y4m -otest.webm
Change-Id: I3f909105fcdfe7921eff2d7c47ecdc09a2179253
diff --git a/aom_dsp/aom_dsp.cmake b/aom_dsp/aom_dsp.cmake
index 7c0111a..2d9e775 100644
--- a/aom_dsp/aom_dsp.cmake
+++ b/aom_dsp/aom_dsp.cmake
@@ -76,6 +76,7 @@
"${AOM_ROOT}/aom_dsp/x86/intrapred_ssse3.c")
list(APPEND AOM_DSP_COMMON_INTRIN_SSE4_1
+ "${AOM_ROOT}/aom_dsp/x86/blend_mask_sse4.h"
"${AOM_ROOT}/aom_dsp/x86/blend_a64_hmask_sse4.c"
"${AOM_ROOT}/aom_dsp/x86/blend_a64_mask_sse4.c"
"${AOM_ROOT}/aom_dsp/x86/blend_a64_vmask_sse4.c")
@@ -88,7 +89,8 @@
"${AOM_ROOT}/aom_dsp/x86/fft_avx2.c"
"${AOM_ROOT}/aom_dsp/x86/highbd_convolve_avx2.c"
"${AOM_ROOT}/aom_dsp/x86/highbd_loopfilter_avx2.c"
- "${AOM_ROOT}/aom_dsp/x86/intrapred_avx2.c")
+ "${AOM_ROOT}/aom_dsp/x86/intrapred_avx2.c"
+ "${AOM_ROOT}/aom_dsp/x86/blend_a64_mask_avx2.c")
list(APPEND AOM_DSP_COMMON_INTRIN_NEON
"${AOM_ROOT}/aom_dsp/arm/fwd_txfm_neon.c"