[CFL] Fixed point implementation for tx average

This change does not impact the bitstream as no loss is incured by using
a fixed point value for the transform size average.

For low bit depth, the transform size average is stored using Q8.10
fixed point format. Worst case, smallest fraction is 1/1024.

Results on Subset1 (Compared to 366b74 with CfL)

  PSNR | PSNR Cb | PSNR Cr | PSNR HVS |   SSIM | MS SSIM | CIEDE 2000
0.0000 |  0.0000 |  0.0000 |   0.0000 | 0.0000 |  0.0000 |     0.0000

Change-Id: Ia5b046b92a0e4c40e413b16af3394bdc0a8c8cd9
diff --git a/av1/common/cfl.c b/av1/common/cfl.c
index 154df73..5f5e397 100644
--- a/av1/common/cfl.c
+++ b/av1/common/cfl.c
@@ -190,12 +190,14 @@
   const int tx_width = tx_size_wide[tx_size];
   const int stride = width >> tx_size_wide_log2[tx_size];
   const int block_row_stride = MAX_SB_SIZE << tx_size_high_log2[tx_size];
-  const double num_pel = tx_width * tx_height;
+  const int num_pel_log2 =
+      (tx_size_high_log2[tx_size] + tx_size_wide_log2[tx_size]);
+
   // TODO(ltrudeau) Convert to uint16 for HBD support
   const uint8_t *y_pix = cfl->y_down_pix;
   // TODO(ltrudeau) Convert to uint16 for HBD support
   const uint8_t *t_y_pix;
-  double *averages = cfl->y_averages;
+  int *averages_q10 = cfl->y_averages_q10;
 
   cfl_load(cfl, 0, 0, width, height);
 
@@ -210,7 +212,11 @@
         }
         t_y_pix += MAX_SB_SIZE;
       }
-      averages[a++] = sum / num_pel;
+      averages_q10[a++] = (sum << 10) >> num_pel_log2;
+
+      // Assert no loss from fixed point
+      assert((double)averages_q10[a - 1] ==
+             (sum / ((double)(1 << num_pel_log2))) * (1 << 10));
     }
     assert(a % stride == 0);
     y_pix += block_row_stride;
@@ -256,7 +262,7 @@
   const int avg_col =
       (col << tx_size_high_log2[0]) >> tx_size_high_log2[tx_size];
   const double avg =
-      cfl->y_averages[cfl->y_averages_stride * avg_row + avg_col];
+      cfl->y_averages_q10[cfl->y_averages_stride * avg_row + avg_col] / 1024.0;
 
   cfl_load(cfl, row, col, width, height);
   for (int j = 0; j < height; j++) {
diff --git a/av1/common/cfl.h b/av1/common/cfl.h
index 6fbbc14..66e5359 100644
--- a/av1/common/cfl.h
+++ b/av1/common/cfl.h
@@ -43,7 +43,9 @@
 
   // Transform level averages of the luma reconstructed values over the entire
   // prediction unit
-  double y_averages[MAX_NUM_TXB];
+  // Fixed point y_averages is Q12.10:
+  //   * Worst case division is 1/1024
+  int y_averages_q10[MAX_NUM_TXB];
   int y_averages_stride;
 
   int are_parameters_computed;
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 842c78c..27ab853 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -1429,7 +1429,7 @@
 
 #if CONFIG_CFL
 static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride,
-                          const double y_averages[MAX_NUM_TXB],
+                          const int y_averages_q10[MAX_NUM_TXB],
                           const uint8_t *src, int src_stride, int width,
                           int height, TX_SIZE tx_size, double dc_pred,
                           double alpha, int *dist_neg_out) {
@@ -1464,12 +1464,13 @@
     const int h = b_j + tx_height;
     for (int b_i = 0; b_i < width; b_i += tx_width) {
       const int w = b_i + tx_width;
-      const double tx_avg = y_averages[a++];
+      // TODO(ltrudeau) Remove div when DC_PRED is also fixed point
+      const double tx_avg_q10 = y_averages_q10[a++] / 1024.0;
       t_y_pix = y_pix;
       t_src = src;
       for (int t_j = b_j; t_j < h; t_j++) {
         for (int t_i = b_i; t_i < w; t_i++) {
-          const double scaled_luma = alpha * (t_y_pix[t_i] - tx_avg);
+          const double scaled_luma = alpha * (t_y_pix[t_i] - tx_avg_q10);
           const int uv = t_src[t_i];
 
           // TODO(ltrudeau) add support for HBD.
@@ -1529,7 +1530,7 @@
   const int height = cfl->uv_height;
   const double dc_pred_u = cfl->dc_pred[CFL_PRED_U];
   const double dc_pred_v = cfl->dc_pred[CFL_PRED_V];
-  const double *y_averages = cfl->y_averages;
+  const int *y_averages = cfl->y_averages_q10;
   const uint8_t *y_pix = cfl->y_down_pix;
 
   CFL_SIGN_TYPE *signs = mbmi->cfl_alpha_signs;