Highbd loop filter AVX2
- Speed test (ms) on i7-6700, Linux x86_64
FUNCTION SSE2 AVX2
horizontal_edge_16 55 28
vertical_16_dual 84 47
horizontal_4_dual 27 13
horizontal_8_dual 36 15
vertical_4_dual 38 25
vertical_8_dual 44 27
- Decoder frame rate improves around 1.2% - 2.8%.
Change-Id: I9c4123869bac9b6d32e626173c2a8e7eb0cf49e7
diff --git a/aom_dsp/aom_dsp.cmake b/aom_dsp/aom_dsp.cmake
index 3ce6761..4828345 100644
--- a/aom_dsp/aom_dsp.cmake
+++ b/aom_dsp/aom_dsp.cmake
@@ -46,6 +46,7 @@
"${AOM_ROOT}/aom_dsp/x86/aom_asm_stubs.c"
"${AOM_ROOT}/aom_dsp/x86/convolve.h"
"${AOM_ROOT}/aom_dsp/x86/txfm_common_sse2.h"
+ "${AOM_ROOT}/aom_dsp/x86/lpf_common_sse2.h"
"${AOM_ROOT}/aom_dsp/x86/loopfilter_sse2.c")
set(AOM_DSP_COMMON_ASM_SSSE3
@@ -66,6 +67,7 @@
"${AOM_ROOT}/aom_dsp/x86/aom_subpixel_8t_intrin_avx2.c"
"${AOM_ROOT}/aom_dsp/x86/loopfilter_avx2.c"
"${AOM_ROOT}/aom_dsp/x86/inv_txfm_avx2.c"
+ "${AOM_ROOT}/aom_dsp/x86/common_avx2.h"
"${AOM_ROOT}/aom_dsp/x86/inv_txfm_common_avx2.h"
"${AOM_ROOT}/aom_dsp/x86/txfm_common_avx2.h")
@@ -189,7 +191,8 @@
set(AOM_DSP_COMMON_INTRIN_AVX2
${AOM_DSP_COMMON_INTRIN_AVX2}
- "${AOM_ROOT}/aom_dsp/x86/highbd_convolve_avx2.c")
+ "${AOM_ROOT}/aom_dsp/x86/highbd_convolve_avx2.c"
+ "${AOM_ROOT}/aom_dsp/x86/highbd_loopfilter_avx2.c")
else ()
set(AOM_DSP_COMMON_INTRIN_DSPR2
${AOM_DSP_COMMON_INTRIN_DSPR2}