Do not split 32-byte unaligned load and store

Added compiler options -mno-avx256-split-unaligned-load
and -mno-avx256-split-unaligned-store. These tuning
options prevent the gcc compiler from generating two
128 bit load/store and an insert instead of a single
unaligned 256 bit load/store. These tuning options are
enabled for AVX2 modules.

Change-Id: I8f499a7f18f365babe9bcfd986bb44a002b3550c
diff --git a/build/cmake/aom_optimization.cmake b/build/cmake/aom_optimization.cmake
index 0b58b91..d8b258f 100644
--- a/build/cmake/aom_optimization.cmake
+++ b/build/cmake/aom_optimization.cmake
@@ -49,6 +49,20 @@
     get_msvc_intrinsic_flag(${flag} "flag")
   endif()
 
+  if("${flag}" STREQUAL "-mavx2")
+    unset(FLAG_SUPPORTED)
+    check_c_compiler_flag("-mno-avx256-split-unaligned-load" FLAG_SUPPORTED)
+    if(${FLAG_SUPPORTED})
+      set(flag "${flag} -mno-avx256-split-unaligned-load")
+    endif()
+
+    unset(FLAG_SUPPORTED)
+    check_c_compiler_flag("-mno-avx256-split-unaligned-store" FLAG_SUPPORTED)
+    if(${FLAG_SUPPORTED})
+      set(flag "${flag} -mno-avx256-split-unaligned-store")
+    endif()
+  endif()
+
   if(flag)
     separate_arguments(flag)
     target_compile_options(${target_name} PUBLIC ${flag})