Do not split 32-byte unaligned load and store
Added compiler options -mno-avx256-split-unaligned-load
and -mno-avx256-split-unaligned-store. These tuning
options prevent the gcc compiler from generating two
128 bit load/store and an insert instead of a single
unaligned 256 bit load/store. These tuning options are
enabled for AVX2 modules.
Change-Id: I8f499a7f18f365babe9bcfd986bb44a002b3550c
diff --git a/build/cmake/aom_optimization.cmake b/build/cmake/aom_optimization.cmake
index 0b58b91..d8b258f 100644
--- a/build/cmake/aom_optimization.cmake
+++ b/build/cmake/aom_optimization.cmake
@@ -49,6 +49,20 @@
get_msvc_intrinsic_flag(${flag} "flag")
endif()
+ if("${flag}" STREQUAL "-mavx2")
+ unset(FLAG_SUPPORTED)
+ check_c_compiler_flag("-mno-avx256-split-unaligned-load" FLAG_SUPPORTED)
+ if(${FLAG_SUPPORTED})
+ set(flag "${flag} -mno-avx256-split-unaligned-load")
+ endif()
+
+ unset(FLAG_SUPPORTED)
+ check_c_compiler_flag("-mno-avx256-split-unaligned-store" FLAG_SUPPORTED)
+ if(${FLAG_SUPPORTED})
+ set(flag "${flag} -mno-avx256-split-unaligned-store")
+ endif()
+ endif()
+
if(flag)
separate_arguments(flag)
target_compile_options(${target_name} PUBLIC ${flag})