[CFL] Use seperate buffers for reconstructed pixels and AC

This avoid the rare situation where the average is subtracted twice.

Results on Subset1:
   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.0247 | -0.0212 | -0.0183 |   0.0077 | -0.0491 | -0.0565 |     0.0109

https://arewecompressedyet.com/?job=master%402018-05-10&job=2buf_cfl%402018-05-10T14%3A02%3A12.666Z

BUG=aomedia:1868

Change-Id: I592fbfb42490cc2ee4046a3dbe853f8eca7b91af
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index ccb1af5..41dee5d 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -41,12 +41,12 @@
 typedef void (*cfl_subsample_hbd_fn)(const uint16_t *input, int input_stride,
                                      int16_t *output_q3);
 
-typedef void (*cfl_subtract_average_fn)(int16_t *pred_buf_q3);
+typedef void (*cfl_subtract_average_fn)(const int16_t *src, int16_t *dst);
 
-typedef void (*cfl_predict_lbd_fn)(const int16_t *pred_buf_q3, uint8_t *dst,
+typedef void (*cfl_predict_lbd_fn)(const int16_t *src, uint8_t *dst,
                                    int dst_stride, int alpha_q3);
 
-typedef void (*cfl_predict_hbd_fn)(const int16_t *pred_buf_q3, uint16_t *dst,
+typedef void (*cfl_predict_hbd_fn)(const int16_t *src, uint16_t *dst,
                                    int dst_stride, int alpha_q3, int bd);
 EOF
 }