Highbd loop filter AVX2

- Speed test (ms) on i7-6700, Linux x86_64
  FUNCTION             SSE2    AVX2
  horizontal_edge_16   55      28
  vertical_16_dual     84      47
  horizontal_4_dual    27      13
  horizontal_8_dual    36      15
  vertical_4_dual      38      25
  vertical_8_dual      44      27
- Decoder frame rate improves around 1.2% - 2.8%.

Change-Id: I9c4123869bac9b6d32e626173c2a8e7eb0cf49e7
diff --git a/aom_dsp/aom_dsp.cmake b/aom_dsp/aom_dsp.cmake
index 3ce6761..4828345 100644
--- a/aom_dsp/aom_dsp.cmake
+++ b/aom_dsp/aom_dsp.cmake
@@ -46,6 +46,7 @@
     "${AOM_ROOT}/aom_dsp/x86/aom_asm_stubs.c"
     "${AOM_ROOT}/aom_dsp/x86/convolve.h"
     "${AOM_ROOT}/aom_dsp/x86/txfm_common_sse2.h"
+    "${AOM_ROOT}/aom_dsp/x86/lpf_common_sse2.h"
     "${AOM_ROOT}/aom_dsp/x86/loopfilter_sse2.c")
 
 set(AOM_DSP_COMMON_ASM_SSSE3
@@ -66,6 +67,7 @@
     "${AOM_ROOT}/aom_dsp/x86/aom_subpixel_8t_intrin_avx2.c"
     "${AOM_ROOT}/aom_dsp/x86/loopfilter_avx2.c"
     "${AOM_ROOT}/aom_dsp/x86/inv_txfm_avx2.c"
+    "${AOM_ROOT}/aom_dsp/x86/common_avx2.h"
     "${AOM_ROOT}/aom_dsp/x86/inv_txfm_common_avx2.h"
     "${AOM_ROOT}/aom_dsp/x86/txfm_common_avx2.h")
 
@@ -189,7 +191,8 @@
 
   set(AOM_DSP_COMMON_INTRIN_AVX2
       ${AOM_DSP_COMMON_INTRIN_AVX2}
-      "${AOM_ROOT}/aom_dsp/x86/highbd_convolve_avx2.c")
+      "${AOM_ROOT}/aom_dsp/x86/highbd_convolve_avx2.c"
+      "${AOM_ROOT}/aom_dsp/x86/highbd_loopfilter_avx2.c")
 else ()
   set(AOM_DSP_COMMON_INTRIN_DSPR2
       ${AOM_DSP_COMMON_INTRIN_DSPR2}