[CFL] Adjust Pixel Buffer for Chroma Sub8x8

Adjust row and col offset for sub8x8 blocks to allow the CfL prediction
to use all available reconstructed luma pixels.

Results on Subset 1 (Compared to b03c2f44 with CfL)

   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.1355 | -0.8517 | -0.4481 |  -0.0579 | -0.0237 | -0.0203 |    -0.2765

Change-Id: Ia91f0a078f0ff4f28bb2d272b096f579e0d04dac
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 0729dbe..75c640d 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1822,7 +1822,9 @@
     const int dst_stride = pd->dst.stride;
     uint8_t *dst =
         &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
-    cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size);
+    // TODO (ltrudeau) Store sub-8x8 inter blocks when bottom right block is
+    // intra predicted.
+    cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size, plane_bsize);
   }
 #endif
 #if CONFIG_DPCM_INTRA
@@ -9107,7 +9109,14 @@
     // so we can store reconstructed luma values
     RD_STATS this_rd_stats;
 
+#if CONFIG_CB4X4
+    // Don't store the luma value if no chroma is associated.
+    // Don't worry, we will store this reconstructed luma in the following
+    // encode dry-run the chroma plane will never know.
+    x->cfl_store_y = !x->skip_chroma_rd;
+#else
     x->cfl_store_y = 1;
+#endif
 
     txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, AOM_PLANE_Y,
                      mbmi->sb_type, mbmi->tx_size,