[intra-edge] Vectorize edge filtering functions

Add sse4_1 functions for Intra-edge experiment:
  av1_filter_intra_edge_sse4_1()
  av1_filter_intra_edge_high_sse4_1()

Approx cycle reduction at qp 20, 1 kf:
  Enc (lbd) 1.4% to 0.3%
  Dec (lbd) 0.4% to 0.1%
  Enc (hbd) 1.1% to 0.2%
  Dec (hbd) 0.6% to 0.1%

No change to bitstream

Change-Id: I176b2d125424d7d226114c807915c33dde5c3720
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 5230aa6..ccce1fc 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -665,4 +665,14 @@
   }
 }
 
+# INTRA_EDGE functions
+if (aom_config("CONFIG_INTRA_EDGE") eq "yes") {
+  add_proto qw/void av1_filter_intra_edge/, "uint8_t *p, int sz, int strength";
+  specialize qw/av1_filter_intra_edge sse4_1/;
+  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+    add_proto qw/void av1_filter_intra_edge_high/, "uint16_t *p, int sz, int strength";
+    specialize qw/av1_filter_intra_edge_high sse4_1/;
+  }
+}
+
 1;