[CFL] CfL Initialization Simplification

The CfL context is now stored inside MACROBLOCKD instead of
MACROBLOCKD only storing a pointer to the CfL context.
The intent is to avoid race conditions as MACROBLOCKD is stored
inside ThreadData. This change also simplifies CfL Initialization.

Change-Id: I991503716b21fc9aca60caddb2008b8bff397e6d
diff --git a/av1/common/cfl.c b/av1/common/cfl.c
index d930ec2..0d97694 100644
--- a/av1/common/cfl.c
+++ b/av1/common/cfl.c
@@ -75,7 +75,6 @@
   const int block_row_stride = MAX_SB_SIZE << tx_size_high_log2[tx_size];
   const int num_pel_log2 =
       (tx_size_high_log2[tx_size] + tx_size_wide_log2[tx_size]);
-
   int16_t *pred_buf_q3 = cfl->pred_buf_q3;
 
   cfl_pad(cfl, width, height);
@@ -147,7 +146,7 @@
 #endif  // CONFIG_HIGHBITDEPTH
 
 static void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size) {
-  CFL_CTX *const cfl = xd->cfl;
+  CFL_CTX *const cfl = &xd->cfl;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
 
   // Do not call cfl_compute_parameters multiple time on the same values.
@@ -188,7 +187,7 @@
 
 void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
                        int row, int col, TX_SIZE tx_size, int plane) {
-  CFL_CTX *const cfl = xd->cfl;
+  CFL_CTX *const cfl = &xd->cfl;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
 
   if (!cfl->are_parameters_computed) cfl_compute_parameters(xd, tx_size);
@@ -462,7 +461,7 @@
 
 void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size,
                   BLOCK_SIZE bsize) {
-  CFL_CTX *const cfl = xd->cfl;
+  CFL_CTX *const cfl = &xd->cfl;
   struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
   uint8_t *dst =
       &pd->dst.buf[(row * pd->dst.stride + col) << tx_size_wide_log2[0]];
@@ -480,7 +479,7 @@
 }
 
 void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size) {
-  CFL_CTX *const cfl = xd->cfl;
+  CFL_CTX *const cfl = &xd->cfl;
   struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
   int row = 0;
   int col = 0;