[CFL] Fix typedef-redefinition compiler warnings

Instead of forward-declaring AV1_COMMON and MACROBLOCKD,
move the dependent struct and function prototype closer
to where they are used and after these types are defined.

Change-Id: I75f005b46ef322a6fcbc01377b8dded1637c5f73
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 68e59dc..f9dccc0 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -31,9 +31,6 @@
 #include "av1/common/pvq_state.h"
 #include "av1/decoder/decint.h"
 #endif
-#if CONFIG_CFL
-#include "av1/common/cfl.h"
-#endif
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -687,6 +684,59 @@
 } SgrprojInfo;
 #endif  // CONFIG_LOOP_RESTORATION
 
+#if CONFIG_CFL
+typedef struct cfl_ctx {
+  // Pixel buffer containing the luma pixels used as prediction for chroma
+  // TODO(ltrudeau) Convert to uint16 for HBD support
+  uint8_t y_pix[MAX_SB_SQUARE];
+
+  // Pixel buffer containing the downsampled luma pixels used as prediction for
+  // chroma
+  // TODO(ltrudeau) Convert to uint16 for HBD support
+  uint8_t y_down_pix[MAX_SB_SQUARE];
+
+  // Height and width of the luma prediction block currently in the pixel buffer
+  int y_height, y_width;
+
+  // Height and width of the chroma prediction block currently associated with
+  // this context
+  int uv_height, uv_width;
+
+  // Transform level averages of the luma reconstructed values over the entire
+  // prediction unit
+  // Fixed point y_averages is Q12.3:
+  //   * Worst case division is 1/1024
+  //   * Max error will be 1/16th.
+  // Note: 3 is chosen so that y_averages fits in 15 bits when 12 bit input is
+  // used
+  int y_averages_q3[MAX_NUM_TXB];
+  int y_averages_stride;
+
+  int are_parameters_computed;
+
+  // Chroma subsampling
+  int subsampling_x, subsampling_y;
+
+  // Block level DC_PRED for each chromatic plane
+  int dc_pred[CFL_PRED_PLANES];
+
+  int mi_row, mi_col;
+
+  // Whether the reconstructed luma pixels need to be stored
+  int store_y;
+
+#if CONFIG_CB4X4
+  int is_chroma_reference;
+#if CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
+  // The prediction used for sub8x8 blocks originates from multiple luma blocks,
+  // this array is used to validate that cfl_store() is called only once for
+  // each luma block
+  uint8_t sub8x8_val[4];
+#endif  // CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
+#endif  // CONFIG_CB4X4
+} CFL_CTX;
+#endif  // CONFIG_CFL
+
 typedef struct macroblockd {
   struct macroblockd_plane plane[MAX_MB_PLANE];
   uint8_t bmode_blocks_wl;
diff --git a/av1/common/cfl.h b/av1/common/cfl.h
index d4ce900..5dcde22 100644
--- a/av1/common/cfl.h
+++ b/av1/common/cfl.h
@@ -16,67 +16,9 @@
 #include <string.h>
 
 #include "av1/common/common.h"
+#include "av1/common/blockd.h"
 #include "av1/common/enums.h"
 
-// Forward declaration of AV1_COMMON, in order to avoid creating a cyclic
-// dependency by importing av1/common/onyxc_int.h
-typedef struct AV1Common AV1_COMMON;
-
-// Forward declaration of MACROBLOCK, in order to avoid creating a cyclic
-// dependency by importing av1/common/blockd.h
-typedef struct macroblockd MACROBLOCKD;
-
-typedef struct {
-  // Pixel buffer containing the luma pixels used as prediction for chroma
-  // TODO(ltrudeau) Convert to uint16 for HBD support
-  uint8_t y_pix[MAX_SB_SQUARE];
-
-  // Pixel buffer containing the downsampled luma pixels used as prediction for
-  // chroma
-  // TODO(ltrudeau) Convert to uint16 for HBD support
-  uint8_t y_down_pix[MAX_SB_SQUARE];
-
-  // Height and width of the luma prediction block currently in the pixel buffer
-  int y_height, y_width;
-
-  // Height and width of the chroma prediction block currently associated with
-  // this context
-  int uv_height, uv_width;
-
-  // Transform level averages of the luma reconstructed values over the entire
-  // prediction unit
-  // Fixed point y_averages is Q12.3:
-  //   * Worst case division is 1/1024
-  //   * Max error will be 1/16th.
-  // Note: 3 is chosen so that y_averages fits in 15 bits when 12 bit input is
-  // used
-  int y_averages_q3[MAX_NUM_TXB];
-  int y_averages_stride;
-
-  int are_parameters_computed;
-
-  // Chroma subsampling
-  int subsampling_x, subsampling_y;
-
-  // Block level DC_PRED for each chromatic plane
-  int dc_pred[CFL_PRED_PLANES];
-
-  int mi_row, mi_col;
-
-  // Whether the reconstructed luma pixels need to be stored
-  int store_y;
-
-#if CONFIG_CB4X4
-  int is_chroma_reference;
-#if CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
-  // The prediction used for sub8x8 blocks originates from multiple luma blocks,
-  // this array is used to validate that cfl_store() is called only once for
-  // each luma block
-  uint8_t sub8x8_val[4];
-#endif  // CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
-#endif  // CONFIG_CB4X4
-} CFL_CTX;
-
 static INLINE int get_scaled_luma_q0(int alpha_q3, int y_pix, int avg_q3) {
   int scaled_luma_q6 = alpha_q3 * ((y_pix << 3) - avg_q3);
   return ROUND_POWER_OF_TWO_SIGNED(scaled_luma_q6, 6);
@@ -88,8 +30,6 @@
 }
 #endif  // CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
 
-void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm);
-
 void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
                        int row, int col, TX_SIZE tx_size, int plane);
 
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
index ebf7ea4..96d754c 100644
--- a/av1/common/onyxc_int.h
+++ b/av1/common/onyxc_int.h
@@ -35,9 +35,6 @@
 #if CONFIG_PVQ
 #include "av1/common/pvq.h"
 #endif
-#if CONFIG_CFL
-#include "av1/common/cfl.h"
-#endif
 #if CONFIG_HASH_ME
 // TODO(youzhou@microsoft.com): Encoder only. Move it out of common
 #include "av1/encoder/hash_motion.h"
@@ -640,6 +637,10 @@
   return cm->frame_type == KEY_FRAME || cm->intra_only;
 }
 
+#if CONFIG_CFL
+void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm);
+#endif
+
 static INLINE void av1_init_macroblockd(AV1_COMMON *cm, MACROBLOCKD *xd,
 #if CONFIG_PVQ
                                         tran_low_t *pvq_ref_coeff,