[intra-edge] Vectorize edge filtering functions

Add sse4_1 functions for Intra-edge experiment:
  av1_filter_intra_edge_sse4_1()
  av1_filter_intra_edge_high_sse4_1()

Approx cycle reduction at qp 20, 1 kf:
  Enc (lbd) 1.4% to 0.3%
  Dec (lbd) 0.4% to 0.1%
  Enc (hbd) 1.1% to 0.2%
  Dec (hbd) 0.6% to 0.1%

No change to bitstream

Change-Id: I176b2d125424d7d226114c807915c33dde5c3720
diff --git a/av1/av1_common.mk b/av1/av1_common.mk
index 67a7c79..1e8ac30 100644
--- a/av1/av1_common.mk
+++ b/av1/av1_common.mk
@@ -85,6 +85,9 @@
 AV1_COMMON_SRCS-yes += common/restoration.c
 AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/selfguided_sse4.c
 endif
+ifeq ($(CONFIG_INTRA_EDGE),yes)
+AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/intra_edge_sse4.c
+endif
 ifeq (yes,$(filter $(CONFIG_GLOBAL_MOTION) $(CONFIG_WARPED_MOTION),yes))
 AV1_COMMON_SRCS-yes += common/warped_motion.h
 AV1_COMMON_SRCS-yes += common/warped_motion.c