[CFL] Better encapsulation The function cfl_compute_parameters is added and contains the logic related to building the CfL context parameters. As such, many cfl functions can now be encapsulated inside of cfl.c and not exposed to the rest of AV1. This also allows for supplemental asserts that validate that the CfL context is properly built. Results on Subset1 (compared to 9c6f854 with CfL) PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 Change-Id: I6d14a426416b3af5491bdc145db7281b5e988cae

commit: 3dc55e0ff638b2d4e52f86cbd43f1f57e7ca31e9 [log] [tgz]
author: Luc Trudeau <luc@trud.ca> Thu Jun 22 14:03:47 2017 -0400
committer: Luc Trudeau <luc@trud.ca> Thu Jun 29 19:12:23 2017 -0400
tree: 3f016b0f08b16a8904f02b123382c1d4cbc71a61
parent: 24d565b44a52b782b177de848b9feabc3bc580a3 [diff]
diff --git a/av1/common/cfl.c b/av1/common/cfl.c
index aa9a557..0feabe4 100644
--- a/av1/common/cfl.c
+++ b/av1/common/cfl.c

@@ -24,11 +24,97 @@
   memset(&cfl->y_pix, 0, sizeof(uint8_t) * MAX_SB_SQUARE);
   cfl->subsampling_x = cm->subsampling_x;
   cfl->subsampling_y = cm->subsampling_y;
+  cfl->are_parameters_computed = 0;
+}
+
+// Load from the CfL pixel buffer into output
+static void cfl_load(CFL_CTX *cfl, int row, int col, int width, int height) {
+  const int sub_x = cfl->subsampling_x;
+  const int sub_y = cfl->subsampling_y;
+  const int off_log2 = tx_size_wide_log2[0];
+
+  // TODO(ltrudeau) convert to uint16 to add HBD support
+  const uint8_t *y_pix;
+  // TODO(ltrudeau) convert to uint16 to add HBD support
+  uint8_t *output = cfl->y_down_pix;
+
+  int pred_row_offset = 0;
+  int output_row_offset = 0;
+
+  // TODO(ltrudeau) should be faster to downsample when we store the values
+  // TODO(ltrudeau) add support for 4:2:2
+  if (sub_y == 0 && sub_x == 0) {
+    y_pix = &cfl->y_pix[(row * MAX_SB_SIZE + col) << off_log2];
+    for (int j = 0; j < height; j++) {
+      for (int i = 0; i < width; i++) {
+        // In 4:4:4, pixels match 1 to 1
+        output[output_row_offset + i] = y_pix[pred_row_offset + i];
+      }
+      pred_row_offset += MAX_SB_SIZE;
+      output_row_offset += MAX_SB_SIZE;
+    }
+  } else if (sub_y == 1 && sub_x == 1) {
+    y_pix = &cfl->y_pix[(row * MAX_SB_SIZE + col) << (off_log2 + sub_y)];
+    for (int j = 0; j < height; j++) {
+      for (int i = 0; i < width; i++) {
+        int top_left = (pred_row_offset + i) << sub_y;
+        int bot_left = top_left + MAX_SB_SIZE;
+        // In 4:2:0, average pixels in 2x2 grid
+        output[output_row_offset + i] = OD_SHR_ROUND(
+            y_pix[top_left] + y_pix[top_left + 1]        // Top row
+                + y_pix[bot_left] + y_pix[bot_left + 1]  // Bottom row
+            ,
+            2);
+      }
+      pred_row_offset += MAX_SB_SIZE;
+      output_row_offset += MAX_SB_SIZE;
+    }
+  } else {
+    assert(0);  // Unsupported chroma subsampling
+  }
+  // Due to frame boundary issues, it is possible that the total area of
+  // covered by Chroma exceeds that of Luma. When this happens, we write over
+  // the broken data by repeating the last columns and/or rows.
+  //
+  // Note that in order to manage the case where both rows and columns
+  // overrun,
+  // we apply rows first. This way, when the rows overrun the bottom of the
+  // frame, the columns will be copied over them.
+  const int uv_width = (col << off_log2) + width;
+  const int uv_height = (row << off_log2) + height;
+
+  const int diff_width = uv_width - (cfl->y_width >> sub_x);
+  const int diff_height = uv_height - (cfl->y_height >> sub_y);
+
+  if (diff_width > 0) {
+    int last_pixel;
+    output_row_offset = width - diff_width;
+
+    for (int j = 0; j < height; j++) {
+      last_pixel = output_row_offset - 1;
+      for (int i = 0; i < diff_width; i++) {
+        output[output_row_offset + i] = output[last_pixel];
+      }
+      output_row_offset += MAX_SB_SIZE;
+    }
+  }
+
+  if (diff_height > 0) {
+    output_row_offset = (height - diff_height) * MAX_SB_SIZE;
+    const int last_row_offset = output_row_offset - MAX_SB_SIZE;
+
+    for (int j = 0; j < diff_height; j++) {
+      for (int i = 0; i < width; i++) {
+        output[output_row_offset + i] = output[last_row_offset + i];
+      }
+      output_row_offset += MAX_SB_SIZE;
+    }
+  }
 }
 
 // CfL computes its own block-level DC_PRED. This is required to compute both
 // alpha_cb and alpha_cr before the prediction are computed.
-void cfl_dc_pred(MACROBLOCKD *xd, int width, int height) {
+static void cfl_dc_pred(MACROBLOCKD *xd) {
   const struct macroblockd_plane *const pd_u = &xd->plane[AOM_PLANE_U];
   const struct macroblockd_plane *const pd_v = &xd->plane[AOM_PLANE_V];
 
@@ -38,6 +124,9 @@
   const int dst_u_stride = pd_u->dst.stride;
   const int dst_v_stride = pd_v->dst.stride;
 
+  CFL_CTX *const cfl = xd->cfl;
+  const int width = cfl->uv_width;
+  const int height = cfl->uv_height;
   // Number of pixel on the top and left borders.
   const double num_pel = width + height;
 
@@ -83,37 +172,70 @@
     sum_v += height * 129;
   }
 
-  xd->cfl->dc_pred[CFL_PRED_U] = sum_u / num_pel;
-  xd->cfl->dc_pred[CFL_PRED_V] = sum_v / num_pel;
+  cfl->dc_pred[CFL_PRED_U] = sum_u / num_pel;
+  cfl->dc_pred[CFL_PRED_V] = sum_v / num_pel;
 }
 
-double cfl_compute_average(uint8_t *y_pix, int y_stride, int width,
-                           int height) {
+static void cfl_compute_average(CFL_CTX *cfl) {
+  const int width = cfl->uv_width;
+  const int height = cfl->uv_height;
+  const double num_pel = width * height;
+  // TODO(ltrudeau) Convert to uint16 for HBD support
+  const uint8_t *y_pix = cfl->y_down_pix;
+  // TODO(ltrudeau) Convert to uint16 for HBD support
+
+  cfl_load(cfl, 0, 0, width, height);
+
   int sum = 0;
   for (int j = 0; j < height; j++) {
     for (int i = 0; i < width; i++) {
       sum += y_pix[i];
     }
-    y_pix += y_stride;
+    y_pix += MAX_SB_SIZE;
   }
-  return sum / (double)(width * height);
+  cfl->y_average = sum / num_pel;
+}
+
+static INLINE double cfl_idx_to_alpha(int alpha_idx, CFL_SIGN_TYPE alpha_sign,
+                                      CFL_PRED_TYPE pred_type) {
+  const int mag_idx = cfl_alpha_codes[alpha_idx][pred_type];
+  const double abs_alpha = cfl_alpha_mags[mag_idx];
+  if (alpha_sign == CFL_SIGN_POS) {
+    return abs_alpha;
+  } else {
+    assert(abs_alpha != 0.0);
+    assert(cfl_alpha_mags[mag_idx + 1] == -abs_alpha);
+    return -abs_alpha;
+  }
 }
 
 // Predict the current transform block using CfL.
-void cfl_predict_block(const CFL_CTX *cfl, uint8_t *dst, int dst_stride,
-                       int row, int col, TX_SIZE tx_size, double dc_pred,
-                       double alpha) {
+void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
+                       int row, int col, TX_SIZE tx_size, int plane) {
+  CFL_CTX *const cfl = xd->cfl;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+
+  // CfL parameters must be computed before prediction can be done.
+  assert(cfl->are_parameters_computed == 1);
+
   const int width = tx_size_wide[tx_size];
   const int height = tx_size_high[tx_size];
-  const double y_avg = cfl->y_avg;
+  // TODO(ltrudeau) Convert to uint16 to support HBD
+  const uint8_t *y_pix = cfl->y_down_pix;
 
-  cfl_load(cfl, dst, dst_stride, row, col, width, height);
+  const double dc_pred = cfl->dc_pred[plane - 1];
+  const double alpha = cfl_idx_to_alpha(
+      mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs[plane - 1], plane - 1);
 
+  const double avg = cfl->y_average;
+
+  cfl_load(cfl, row, col, width, height);
   for (int j = 0; j < height; j++) {
     for (int i = 0; i < width; i++) {
-      dst[i] = (uint8_t)(alpha * (dst[i] - y_avg) + dc_pred + 0.5);
+      dst[i] = (uint8_t)(alpha * (y_pix[i] - avg) + dc_pred + 0.5);
     }
     dst += dst_stride;
+    y_pix += MAX_SB_SIZE;
   }
 }
 
@@ -130,6 +252,7 @@
   assert(MAX_SB_SIZE * (row + tx_height - 1) + col + tx_width - 1 <
          MAX_SB_SQUARE);
 
+  // TODO(ltrudeau) Speedup possible by moving the downsampling to cfl_store
   for (int j = 0; j < tx_height; j++) {
     for (int i = 0; i < tx_width; i++) {
       y_pix[i] = input[i];
@@ -148,85 +271,34 @@
     cfl->y_width = OD_MAXI((col << tx_off_log2) + tx_width, cfl->y_width);
     cfl->y_height = OD_MAXI((row << tx_off_log2) + tx_height, cfl->y_height);
   }
+
+  // Invalidate current parameters
+  cfl->are_parameters_computed = 0;
 }
 
-// Load from the CfL pixel buffer into output
-void cfl_load(const CFL_CTX *cfl, uint8_t *output, int output_stride, int row,
-              int col, int width, int height) {
-  const int sub_x = cfl->subsampling_x;
-  const int sub_y = cfl->subsampling_y;
-  const int off_log2 = tx_size_wide_log2[0];
+void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size) {
+  CFL_CTX *const cfl = xd->cfl;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
 
-  const uint8_t *y_pix;
+  // Do not call cfl_compute_parameters multiple time on the same values.
+  assert(cfl->are_parameters_computed == 0);
 
-  int pred_row_offset = 0;
-  int output_row_offset = 0;
+#if CONFIG_CHROMA_SUB8X8
+  const BLOCK_SIZE plane_bsize = AOMMAX(
+      BLOCK_4X4, get_plane_block_size(mbmi->sb_type, &xd->plane[AOM_PLANE_U]));
+#else
+  const BLOCK_SIZE plane_bsize =
+      get_plane_block_size(mbmi->sb_type, &xd->plane[AOM_PLANE_U]);
+#endif
+  // AOM_PLANE_U is used, but both planes will have the same sizes.
+  cfl->uv_width = max_intra_block_width(xd, plane_bsize, AOM_PLANE_U, tx_size);
+  cfl->uv_height =
+      max_intra_block_height(xd, plane_bsize, AOM_PLANE_U, tx_size);
 
-  // TODO(ltrudeau) add support for 4:2:2
-  if (sub_y == 0 && sub_x == 0) {
-    y_pix = &cfl->y_pix[(row * MAX_SB_SIZE + col) << off_log2];
-    for (int j = 0; j < height; j++) {
-      for (int i = 0; i < width; i++) {
-        // In 4:4:4, pixels match 1 to 1
-        output[output_row_offset + i] = y_pix[pred_row_offset + i];
-      }
-      pred_row_offset += MAX_SB_SIZE;
-      output_row_offset += output_stride;
-    }
-  } else if (sub_y == 1 && sub_x == 1) {
-    y_pix = &cfl->y_pix[(row * MAX_SB_SIZE + col) << (off_log2 + sub_y)];
-    for (int j = 0; j < height; j++) {
-      for (int i = 0; i < width; i++) {
-        int top_left = (pred_row_offset + i) << sub_y;
-        int bot_left = top_left + MAX_SB_SIZE;
-        // In 4:2:0, average pixels in 2x2 grid
-        output[output_row_offset + i] = OD_SHR_ROUND(
-            y_pix[top_left] + y_pix[top_left + 1]        // Top row
-                + y_pix[bot_left] + y_pix[bot_left + 1]  // Bottom row
-            ,
-            2);
-      }
-      pred_row_offset += MAX_SB_SIZE;
-      output_row_offset += output_stride;
-    }
-  } else {
-    assert(0);  // Unsupported chroma subsampling
-  }
-  // Due to frame boundary issues, it is possible that the total area of
-  // covered by Chroma exceeds that of Luma. When this happens, we write over
-  // the broken data by repeating the last columns and/or rows.
-  //
-  // Note that in order to manage the case where both rows and columns
-  // overrun,
-  // we apply rows first. This way, when the rows overrun the bottom of the
-  // frame, the columns will be copied over them.
-  const int uv_width = (col << off_log2) + width;
-  const int uv_height = (row << off_log2) + height;
-
-  const int diff_width = uv_width - (cfl->y_width >> sub_x);
-  const int diff_height = uv_height - (cfl->y_height >> sub_y);
-
-  if (diff_width > 0) {
-    int last_pixel;
-    output_row_offset = width - diff_width;
-
-    for (int j = 0; j < height; j++) {
-      last_pixel = output_row_offset - 1;
-      for (int i = 0; i < diff_width; i++) {
-        output[output_row_offset + i] = output[last_pixel];
-      }
-      output_row_offset += output_stride;
-    }
-  }
-
-  if (diff_height > 0) {
-    output_row_offset = (height - diff_height) * output_stride;
-    const int last_row_offset = output_row_offset - output_stride;
-    for (int j = 0; j < diff_height; j++) {
-      for (int i = 0; i < width; i++) {
-        output[output_row_offset + i] = output[last_row_offset + i];
-      }
-      output_row_offset += output_stride;
-    }
-  }
+  // Compute block-level DC_PRED for both chromatic planes.
+  // DC_PRED replaces beta in the linear model.
+  cfl_dc_pred(xd);
+  // Compute block-level average on reconstructed luma input.
+  cfl_compute_average(cfl);
+  cfl->are_parameters_computed = 1;
 }

diff --git a/av1/common/cfl.h b/av1/common/cfl.h
index dcc896d..7c11c4b 100644
--- a/av1/common/cfl.h
+++ b/av1/common/cfl.h

@@ -26,13 +26,25 @@
 
 typedef struct {
   // Pixel buffer containing the luma pixels used as prediction for chroma
+  // TODO(ltrudeau) Convert to uint16 for HBD support
   uint8_t y_pix[MAX_SB_SQUARE];
 
+  // Pixel buffer containing the downsampled luma pixels used as prediction for
+  // chroma
+  // TODO(ltrudeau) Convert to uint16 for HBD support
+  uint8_t y_down_pix[MAX_SB_SQUARE];
+
   // Height and width of the luma prediction block currently in the pixel buffer
   int y_height, y_width;
 
+  // Height and width of the chroma prediction block currently associated with
+  // this context
+  int uv_height, uv_width;
+
   // Average of the luma reconstructed values over the entire prediction unit
-  double y_avg;
+  double y_average;
+
+  int are_parameters_computed;
 
   // Chroma subsampling
   int subsampling_x, subsampling_y;
@@ -57,30 +69,12 @@
 
 void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm);
 
-void cfl_dc_pred(MACROBLOCKD *xd, int width, int height);
-
-double cfl_compute_average(uint8_t *y_pix, int y_stride, int height, int width);
-
-static INLINE double cfl_idx_to_alpha(int alpha_idx, CFL_SIGN_TYPE alpha_sign,
-                                      CFL_PRED_TYPE pred_type) {
-  const int mag_idx = cfl_alpha_codes[alpha_idx][pred_type];
-  const double abs_alpha = cfl_alpha_mags[mag_idx];
-  if (alpha_sign == CFL_SIGN_POS) {
-    return abs_alpha;
-  } else {
-    assert(abs_alpha != 0.0);
-    assert(cfl_alpha_mags[mag_idx + 1] == -abs_alpha);
-    return -abs_alpha;
-  }
-}
-
-void cfl_predict_block(const CFL_CTX *cfl, uint8_t *dst, int dst_stride,
-                       int row, int col, TX_SIZE tx_size, double dc_pred,
-                       double alpha);
+void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
+                       int row, int col, TX_SIZE tx_size, int plane);
 
 void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row,
                int col, TX_SIZE tx_size);
 
-void cfl_load(const CFL_CTX *cfl, uint8_t *output, int output_stride, int row,
-              int col, int width, int height);
+void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size);
+
 #endif  // AV1_COMMON_CFL_H_

diff --git a/av1/common/reconintra.c b/av1/common/reconintra.c
index f5719be..723d3d9 100644
--- a/av1/common/reconintra.c
+++ b/av1/common/reconintra.c

@@ -2719,37 +2719,16 @@
                           mode, dst, dst_stride, dst, dst_stride, blk_col,
                           blk_row, plane);
 #if CONFIG_CFL
-  CFL_CTX *const cfl = xd->cfl;
   if (plane != AOM_PLANE_Y && mbmi->uv_mode == DC_PRED) {
     if (plane == AOM_PLANE_U && blk_col == 0 && blk_row == 0) {
-#if CONFIG_CHROMA_SUB8X8
-      const BLOCK_SIZE plane_bsize =
-          AOMMAX(BLOCK_4X4, get_plane_block_size(mbmi->sb_type, pd));
-#else
-      const BLOCK_SIZE plane_bsize = get_plane_block_size(mbmi->sb_type, pd);
-#endif
-      const int width =
-          max_intra_block_width(xd, plane_bsize, AOM_PLANE_U, tx_size);
-      const int height =
-          max_intra_block_height(xd, plane_bsize, AOM_PLANE_U, tx_size);
-
-      // Temporary pixel buffer used to store the CfL prediction when we compute
-      // the average over the reconstructed and downsampled luma pixels
-      // TODO(ltrudeau) Convert to uint16 when adding HBD support
-      uint8_t tmp_pix[MAX_SB_SQUARE];
-
-      // Compute the block-level DC_PRED for both chromatic planes. DC_PRED
-      // replaces beta in the linear model.
-      cfl_dc_pred(xd, width, height);
-      cfl_load(cfl, tmp_pix, MAX_SB_SIZE, 0, 0, width, height);
-      cfl->y_avg = cfl_compute_average(tmp_pix, MAX_SB_SIZE, width, height);
+      // Avoid computing the CfL parameters twice, if they have already been
+      // computed in the encoder_facade
+      if (!xd->cfl->are_parameters_computed)
+        cfl_compute_parameters(xd, tx_size);
     }
 
-    cfl_predict_block(
-        cfl, dst, pd->dst.stride, blk_row, blk_col, tx_size,
-        cfl->dc_pred[plane - 1],
-        cfl_idx_to_alpha(mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs[plane - 1],
-                         plane - 1));
+    cfl_predict_block(xd, dst, pd->dst.stride, blk_row, blk_col, tx_size,
+                      plane);
   }
 #endif
 }

diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 4f8e0cd..1a60985 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c

@@ -1364,8 +1364,22 @@
   FRAME_CONTEXT *const ec_ctx = cm->fc;
 #endif  // CONFIG_EC_ADAPT
 
+#if CONFIG_DEBUG
+  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+// av1_predict_intra_block_facade does not pass plane_bsize, we need to validate
+// that we will get the same value of plane_bsize on the other side.
+#if CONFIG_CHROMA_SUB8X8
+  const BLOCK_SIZE plane_bsize_val =
+      AOMMAX(BLOCK_4X4, get_plane_block_size(mbmi->sb_type, &xd->plane[plane]));
+#else
+  const BLOCK_SIZE plane_bsize_val =
+      get_plane_block_size(mbmi->sb_type, &xd->plane[plane]);
+#endif  // CONFIG_CHROMA_SUB8X8
+  assert(plane_bsize == plane_bsize_val);
+#endif  // CONFIG_DEBUG
+
   av1_predict_intra_block_encoder_facade(x, ec_ctx, plane, block, blk_col,
-                                         blk_row, tx_size, plane_bsize);
+                                         blk_row, tx_size);
 #else
   av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size);
 #endif
@@ -1418,10 +1432,11 @@
 }
 
 #if CONFIG_CFL
-static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, double y_avg,
-                          const uint8_t *src, int src_stride, int width,
-                          int height, TX_SIZE tx_size, double dc_pred,
-                          double alpha, int *dist_neg_out) {
+static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride,
+                          const double y_average, const uint8_t *src,
+                          int src_stride, int width, int height,
+                          TX_SIZE tx_size, double dc_pred, double alpha,
+                          int *dist_neg_out) {
   const double dc_pred_bias = dc_pred + 0.5;
   int dist = 0;
   int diff;
@@ -1444,6 +1459,8 @@
   int dist_neg = 0;
   const int tx_height = tx_size_high[tx_size];
   const int tx_width = tx_size_wide[tx_size];
+  const int y_block_row_off = y_stride * tx_height;
+  const int src_block_row_off = src_stride * tx_height;
   const uint8_t *t_y_pix;
   const uint8_t *t_src;
   for (int b_j = 0; b_j < height; b_j += tx_height) {
@@ -1454,7 +1471,7 @@
       t_src = src;
       for (int t_j = b_j; t_j < h; t_j++) {
         for (int t_i = b_i; t_i < w; t_i++) {
-          const double scaled_luma = alpha * (t_y_pix[t_i] - y_avg);
+          const double scaled_luma = alpha * (t_y_pix[t_i] - y_average);
           const int uv = t_src[t_i];
           diff = uv - (int)(scaled_luma + dc_pred_bias);
           dist += diff * diff;
@@ -1465,8 +1482,8 @@
         t_src += src_stride;
       }
     }
-    y_pix += y_stride * tx_height;
-    src += src_stride * tx_height;
+    y_pix += y_block_row_off;
+    src += src_block_row_off;
   }
 
   if (dist_neg_out) *dist_neg_out = dist_neg;
@@ -1474,73 +1491,6 @@
   return dist;
 }
 
-static int cfl_compute_alpha_ind(MACROBLOCK *const x, const CFL_CTX *const cfl,
-                                 int width, int height, TX_SIZE tx_size,
-                                 uint8_t y_pix[MAX_SB_SQUARE],
-                                 CFL_SIGN_TYPE signs_out[CFL_SIGNS]) {
-  const struct macroblock_plane *const p_u = &x->plane[AOM_PLANE_U];
-  const struct macroblock_plane *const p_v = &x->plane[AOM_PLANE_V];
-  const uint8_t *const src_u = p_u->src.buf;
-  const uint8_t *const src_v = p_v->src.buf;
-  const int src_stride_u = p_u->src.stride;
-  const int src_stride_v = p_v->src.stride;
-  const double dc_pred_u = cfl->dc_pred[CFL_PRED_U];
-  const double dc_pred_v = cfl->dc_pred[CFL_PRED_V];
-  const double y_avg = cfl->y_avg;
-
-  int sse[CFL_PRED_PLANES][CFL_MAGS_SIZE];
-  sse[CFL_PRED_U][0] =
-      cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_avg, src_u, src_stride_u, width,
-                     height, tx_size, dc_pred_u, 0, NULL);
-  sse[CFL_PRED_V][0] =
-      cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_avg, src_v, src_stride_v, width,
-                     height, tx_size, dc_pred_v, 0, NULL);
-  for (int m = 1; m < CFL_MAGS_SIZE; m += 2) {
-    assert(cfl_alpha_mags[m + 1] == -cfl_alpha_mags[m]);
-    sse[CFL_PRED_U][m] = cfl_alpha_dist(
-        y_pix, MAX_SB_SIZE, y_avg, src_u, src_stride_u, width, height, tx_size,
-        dc_pred_u, cfl_alpha_mags[m], &sse[CFL_PRED_U][m + 1]);
-    sse[CFL_PRED_V][m] = cfl_alpha_dist(
-        y_pix, MAX_SB_SIZE, y_avg, src_v, src_stride_v, width, height, tx_size,
-        dc_pred_v, cfl_alpha_mags[m], &sse[CFL_PRED_V][m + 1]);
-  }
-
-  int dist;
-  int64_t cost;
-  int64_t best_cost;
-
-  // Compute least squares parameter of the entire block
-  // IMPORTANT: We assume that the first code is 0,0
-  int ind = 0;
-  signs_out[CFL_PRED_U] = CFL_SIGN_POS;
-  signs_out[CFL_PRED_V] = CFL_SIGN_POS;
-
-  dist = sse[CFL_PRED_U][0] + sse[CFL_PRED_V][0];
-  dist *= 16;
-  best_cost = RDCOST(x->rdmult, cfl->costs[0], dist);
-
-  for (int c = 1; c < CFL_ALPHABET_SIZE; c++) {
-    const int idx_u = cfl_alpha_codes[c][CFL_PRED_U];
-    const int idx_v = cfl_alpha_codes[c][CFL_PRED_V];
-    for (CFL_SIGN_TYPE sign_u = idx_u == 0; sign_u < CFL_SIGNS; sign_u++) {
-      for (CFL_SIGN_TYPE sign_v = idx_v == 0; sign_v < CFL_SIGNS; sign_v++) {
-        dist = sse[CFL_PRED_U][idx_u + (sign_u == CFL_SIGN_NEG)] +
-               sse[CFL_PRED_V][idx_v + (sign_v == CFL_SIGN_NEG)];
-        dist *= 16;
-        cost = RDCOST(x->rdmult, cfl->costs[c], dist);
-        if (cost < best_cost) {
-          best_cost = cost;
-          ind = c;
-          signs_out[CFL_PRED_U] = sign_u;
-          signs_out[CFL_PRED_V] = sign_v;
-        }
-      }
-    }
-  }
-
-  return ind;
-}
-
 static inline void cfl_update_costs(CFL_CTX *cfl, FRAME_CONTEXT *ec_ctx) {
   assert(ec_ctx->cfl_alpha_cdf[CFL_ALPHABET_SIZE - 1] ==
          AOM_ICDF(CDF_PROB_TOP));
@@ -1559,44 +1509,95 @@
   }
 }
 
+static void cfl_compute_alpha_ind(MACROBLOCK *const x, FRAME_CONTEXT *ec_ctx,
+                                  TX_SIZE tx_size) {
+  const struct macroblock_plane *const p_u = &x->plane[AOM_PLANE_U];
+  const struct macroblock_plane *const p_v = &x->plane[AOM_PLANE_V];
+  const uint8_t *const src_u = p_u->src.buf;
+  const uint8_t *const src_v = p_v->src.buf;
+  const int src_stride_u = p_u->src.stride;
+  const int src_stride_v = p_v->src.stride;
+
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+
+  CFL_CTX *const cfl = xd->cfl;
+  cfl_compute_parameters(xd, tx_size);
+  const int width = cfl->uv_width;
+  const int height = cfl->uv_height;
+  const double dc_pred_u = cfl->dc_pred[CFL_PRED_U];
+  const double dc_pred_v = cfl->dc_pred[CFL_PRED_V];
+  const double y_average = cfl->y_average;
+  const uint8_t *y_pix = cfl->y_down_pix;
+
+  CFL_SIGN_TYPE *signs = mbmi->cfl_alpha_signs;
+
+  cfl_update_costs(cfl, ec_ctx);
+
+  int sse[CFL_PRED_PLANES][CFL_MAGS_SIZE];
+  sse[CFL_PRED_U][0] =
+      cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_average, src_u, src_stride_u, width,
+                     height, tx_size, dc_pred_u, 0, NULL);
+  sse[CFL_PRED_V][0] =
+      cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_average, src_v, src_stride_v, width,
+                     height, tx_size, dc_pred_v, 0, NULL);
+  for (int m = 1; m < CFL_MAGS_SIZE; m += 2) {
+    assert(cfl_alpha_mags[m + 1] == -cfl_alpha_mags[m]);
+    sse[CFL_PRED_U][m] = cfl_alpha_dist(
+        y_pix, MAX_SB_SIZE, y_average, src_u, src_stride_u, width, height,
+        tx_size, dc_pred_u, cfl_alpha_mags[m], &sse[CFL_PRED_U][m + 1]);
+    sse[CFL_PRED_V][m] = cfl_alpha_dist(
+        y_pix, MAX_SB_SIZE, y_average, src_v, src_stride_v, width, height,
+        tx_size, dc_pred_v, cfl_alpha_mags[m], &sse[CFL_PRED_V][m + 1]);
+  }
+
+  int dist;
+  int64_t cost;
+  int64_t best_cost;
+
+  // Compute least squares parameter of the entire block
+  // IMPORTANT: We assume that the first code is 0,0
+  int ind = 0;
+  signs[CFL_PRED_U] = CFL_SIGN_POS;
+  signs[CFL_PRED_V] = CFL_SIGN_POS;
+
+  dist = sse[CFL_PRED_U][0] + sse[CFL_PRED_V][0];
+  dist *= 16;
+  best_cost = RDCOST(x->rdmult, cfl->costs[0], dist);
+
+  for (int c = 1; c < CFL_ALPHABET_SIZE; c++) {
+    const int idx_u = cfl_alpha_codes[c][CFL_PRED_U];
+    const int idx_v = cfl_alpha_codes[c][CFL_PRED_V];
+    for (CFL_SIGN_TYPE sign_u = idx_u == 0; sign_u < CFL_SIGNS; sign_u++) {
+      for (CFL_SIGN_TYPE sign_v = idx_v == 0; sign_v < CFL_SIGNS; sign_v++) {
+        dist = sse[CFL_PRED_U][idx_u + (sign_u == CFL_SIGN_NEG)] +
+               sse[CFL_PRED_V][idx_v + (sign_v == CFL_SIGN_NEG)];
+        dist *= 16;
+        cost = RDCOST(x->rdmult, cfl->costs[c], dist);
+        if (cost < best_cost) {
+          best_cost = cost;
+          ind = c;
+          signs[CFL_PRED_U] = sign_u;
+          signs[CFL_PRED_V] = sign_v;
+        }
+      }
+    }
+  }
+
+  mbmi->cfl_alpha_idx = ind;
+}
+
 void av1_predict_intra_block_encoder_facade(MACROBLOCK *x,
                                             FRAME_CONTEXT *ec_ctx, int plane,
                                             int block_idx, int blk_col,
-                                            int blk_row, TX_SIZE tx_size,
-                                            BLOCK_SIZE plane_bsize) {
+                                            int blk_row, TX_SIZE tx_size) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   if (plane != AOM_PLANE_Y && mbmi->uv_mode == DC_PRED) {
     if (blk_col == 0 && blk_row == 0 && plane == AOM_PLANE_U) {
-      const int width =
-          max_intra_block_width(xd, plane_bsize, AOM_PLANE_U, tx_size);
-      const int height =
-          max_intra_block_height(xd, plane_bsize, AOM_PLANE_U, tx_size);
-
-      uint8_t tmp_pix[MAX_SB_SQUARE];
-      CFL_CTX *const cfl = xd->cfl;
-
-      cfl_update_costs(cfl, ec_ctx);
-      cfl_dc_pred(xd, width, height);
-      // Load CfL Prediction over the entire block
-      cfl_load(cfl, tmp_pix, MAX_SB_SIZE, 0, 0, width, height);
-      cfl->y_avg = cfl_compute_average(tmp_pix, MAX_SB_SIZE, width, height);
-      mbmi->cfl_alpha_idx = cfl_compute_alpha_ind(
-          x, cfl, width, height, tx_size, tmp_pix, mbmi->cfl_alpha_signs);
+      cfl_compute_alpha_ind(x, ec_ctx, tx_size);
     }
   }
-#if CONFIG_DEBUG
-// av1_predict_intra_block_facade does not pass plane_bsize, we need to validate
-// that we will get the same value of plane_bsize on the other side.
-#if CONFIG_CHROMA_SUB8X8
-  const BLOCK_SIZE plane_bsize_val =
-      AOMMAX(BLOCK_4X4, get_plane_block_size(mbmi->sb_type, &xd->plane[plane]));
-#else
-  const BLOCK_SIZE plane_bsize_val =
-      get_plane_block_size(mbmi->sb_type, &xd->plane[plane]);
-#endif  // CONFIG_CHROMA_SUB8X8
-  assert(plane_bsize == plane_bsize_val);
-#endif  // CONFIG_DEBUG
   av1_predict_intra_block_facade(xd, plane, block_idx, blk_col, blk_row,
                                  tx_size);
 }

diff --git a/av1/encoder/encodemb.h b/av1/encoder/encodemb.h
index 35a2c15..7292ce0 100644
--- a/av1/encoder/encodemb.h
+++ b/av1/encoder/encodemb.h

@@ -90,8 +90,7 @@
 void av1_predict_intra_block_encoder_facade(MACROBLOCK *x,
                                             FRAME_CONTEXT *ec_ctx, int plane,
                                             int block_idx, int blk_col,
-                                            int blk_row, TX_SIZE tx_size,
-                                            BLOCK_SIZE plane_bsize);
+                                            int blk_row, TX_SIZE tx_size);
 #endif
 
 #if CONFIG_DPCM_INTRA

diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 79fe13e..0729dbe 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c

@@ -1755,8 +1755,20 @@
     FRAME_CONTEXT *const ec_ctx = cm->fc;
 #endif  // CONFIG_EC_ADAPT
 
+#if CONFIG_DEBUG
+// av1_predict_intra_block_facade does not pass plane_bsize, we need to validate
+// that we will get the same value of plane_bsize on the other side.
+#if CONFIG_CHROMA_SUB8X8
+    const BLOCK_SIZE plane_bsize_val = AOMMAX(
+        BLOCK_4X4, get_plane_block_size(mbmi->sb_type, &xd->plane[plane]));
+#else
+    const BLOCK_SIZE plane_bsize_val =
+        get_plane_block_size(mbmi->sb_type, &xd->plane[plane]);
+#endif  // CONFIG_CHROMA_SUB8X8
+    assert(plane_bsize == plane_bsize_val);
+#endif  // CONFIG_DEBUG
     av1_predict_intra_block_encoder_facade(x, ec_ctx, plane, block, blk_col,
-                                           blk_row, tx_size, plane_bsize);
+                                           blk_row, tx_size);
 #else
     av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size);
 #endif
@@ -2631,9 +2643,6 @@
   for (row = 0; row < max_blocks_high; row += stepr) {
     for (col = 0; col < max_blocks_wide; col += stepc) {
 #if CONFIG_CFL
-      const struct macroblockd_plane *const pd = &xd->plane[0];
-      const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
-
 #if CONFIG_EC_ADAPT
       FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
 #else
@@ -2641,7 +2650,7 @@
 #endif  // CONFIG_EC_ADAPT
 
       av1_predict_intra_block_encoder_facade(x, ec_ctx, 0, block, col, row,
-                                             tx_size, plane_bsize);
+                                             tx_size);
 #else
       av1_predict_intra_block_facade(xd, 0, block, col, row, tx_size);
 #endif
commit	3dc55e0ff638b2d4e52f86cbd43f1f57e7ca31e9	[log] [tgz]
author	Luc Trudeau <luc@trud.ca>	Thu Jun 22 14:03:47 2017 -0400
committer	Luc Trudeau <luc@trud.ca>	Thu Jun 29 19:12:23 2017 -0400
tree	3f016b0f08b16a8904f02b123382c1d4cbc71a61
parent	24d565b44a52b782b177de848b9feabc3bc580a3 [diff]