[CFL] Specific header for RTCD

Allows av1_rtcd.h to use CfL function pointers without having to include
all of CfL. Since av1_rtcd.h is included in many files, this resulted in
CfL declarations in many places in the encoder causing issues with gcc on
arm64.

Change-Id: Icc7ac7b898da88c1137d4c346a5c31efe2f0ec67
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index a92e048..53c38b3 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -28,10 +28,6 @@
 #include "av1/common/restoration.h"
 #endif
 
-#if CONFIG_CFL
-#include "av1/common/cfl.h"
-#endif
-
 struct macroblockd;
 
 /* Encoder forward decls */
@@ -42,6 +38,25 @@
 struct mv;
 union int_mv;
 struct yv12_buffer_config;
+
+#if CONFIG_CFL
+/* Function pointers return by CfL functions */
+typedef void (*cfl_subsample_lbd_fn)(const uint8_t *input, int input_stride,
+                                     int16_t *output_q3, int width, int height);
+
+typedef void (*cfl_subsample_hbd_fn)(const uint16_t *input, int input_stride,
+                                     int16_t *output_q3, int width, int height);
+
+typedef void (*cfl_subtract_average_fn)(int16_t *pred_buf_q3);
+
+typedef void (*cfl_predict_lbd_fn)(const int16_t *pred_buf_q3, uint8_t *dst,
+                                   int dst_stride, TX_SIZE tx_size,
+                                   int alpha_q3);
+
+typedef void (*cfl_predict_hbd_fn)(const int16_t *pred_buf_q3, uint16_t *dst,
+                                   int dst_stride, TX_SIZE tx_size,
+                                   int alpha_q3, int bd);
+#endif
 EOF
 }
 forward_decls qw/av1_common_forward_decls/;
diff --git a/av1/common/cfl.h b/av1/common/cfl.h
index c78095b..f8bd4ef 100644
--- a/av1/common/cfl.h
+++ b/av1/common/cfl.h
@@ -14,22 +14,6 @@
 
 #include "av1/common/blockd.h"
 
-typedef void (*cfl_subsample_lbd_fn)(const uint8_t *input, int input_stride,
-                                     int16_t *output_q3, int width, int height);
-
-typedef void (*cfl_subsample_hbd_fn)(const uint16_t *input, int input_stride,
-                                     int16_t *output_q3, int width, int height);
-
-typedef void (*cfl_subtract_average_fn)(int16_t *pred_buf_q3);
-
-typedef void (*cfl_predict_lbd_fn)(const int16_t *pred_buf_q3, uint8_t *dst,
-                                   int dst_stride, TX_SIZE tx_size,
-                                   int alpha_q3);
-
-typedef void (*cfl_predict_hbd_fn)(const int16_t *pred_buf_q3, uint16_t *dst,
-                                   int dst_stride, TX_SIZE tx_size,
-                                   int alpha_q3, int bd);
-
 static INLINE CFL_ALLOWED_TYPE is_cfl_allowed(const MB_MODE_INFO *mbmi) {
   const BLOCK_SIZE bsize = mbmi->sb_type;
   assert(bsize < BLOCK_SIZES_ALL);
diff --git a/av1/common/x86/cfl_sse2.c b/av1/common/x86/cfl_sse2.c
index 6ff54c0..054715f 100644
--- a/av1/common/x86/cfl_sse2.c
+++ b/av1/common/x86/cfl_sse2.c
@@ -11,6 +11,7 @@
 
 #include <emmintrin.h>
 
+#include "av1/common/cfl.h"
 #include "./av1_rtcd.h"
 
 static INLINE __m128i fill_sum_epi32(__m128i l0) {
diff --git a/test/cfl_test.cc b/test/cfl_test.cc
index c3baa19..d7493ab 100644
--- a/test/cfl_test.cc
+++ b/test/cfl_test.cc
@@ -14,7 +14,6 @@
 #include "./av1_rtcd.h"
 #include "test/util.h"
 #include "test/acm_random.h"
-#include "av1/common/cfl.h"
 
 using std::tr1::make_tuple;