[CFL] Compute Average Over TX Block Instead of Pred Block When computing alpha, multiple averages are computed, one for each transform block. The CfL prediction now uses the transform block average instead of partition block average. This allows the decoder to build the CfL prediction by using only the collocated reconstructed luma values for the current transform size and not the entire partition. Results on Subset 1 (Compared to 0e81b97c with CfL) PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000 0.0180 | 0.2627 | 0.2274 | 0.0233 | 0.0301 | 0.0312 | 0.1506 A small regression is expected, this change was made to simplify hardware implementations. Change-Id: Ib2ce2a3053b85300c5c62ef0e3270af489568a38

commit: 03678940ccd19ed6f9f3d9658785b7d9f08b39d8 [log] [tgz]
author: Luc Trudeau <luc@trud.ca> Mon Jun 12 17:33:19 2017 -0400
committer: Luc Trudeau <luc@trud.ca> Thu Jul 06 09:37:38 2017 -0400
tree: 2c2c6f32b148bcbedf52e6926d17f835b106a543
parent: 5c453db29e1a6645d3bef4e8de0b4ae23e0db4f8 [diff]
diff --git a/av1/common/cfl.c b/av1/common/cfl.c
index 1ffea03..154df73 100644
--- a/av1/common/cfl.c
+++ b/av1/common/cfl.c

@@ -183,24 +183,41 @@
   cfl->dc_pred[CFL_PRED_V] = sum_v / num_pel;
 }
 
-static void cfl_compute_average(CFL_CTX *cfl) {
+static void cfl_compute_averages(CFL_CTX *cfl, TX_SIZE tx_size) {
   const int width = cfl->uv_width;
   const int height = cfl->uv_height;
-  const double num_pel = width * height;
+  const int tx_height = tx_size_high[tx_size];
+  const int tx_width = tx_size_wide[tx_size];
+  const int stride = width >> tx_size_wide_log2[tx_size];
+  const int block_row_stride = MAX_SB_SIZE << tx_size_high_log2[tx_size];
+  const double num_pel = tx_width * tx_height;
   // TODO(ltrudeau) Convert to uint16 for HBD support
   const uint8_t *y_pix = cfl->y_down_pix;
   // TODO(ltrudeau) Convert to uint16 for HBD support
+  const uint8_t *t_y_pix;
+  double *averages = cfl->y_averages;
 
   cfl_load(cfl, 0, 0, width, height);
 
-  int sum = 0;
-  for (int j = 0; j < height; j++) {
-    for (int i = 0; i < width; i++) {
-      sum += y_pix[i];
+  int a = 0;
+  for (int b_j = 0; b_j < height; b_j += tx_height) {
+    for (int b_i = 0; b_i < width; b_i += tx_width) {
+      int sum = 0;
+      t_y_pix = y_pix;
+      for (int t_j = 0; t_j < tx_height; t_j++) {
+        for (int t_i = b_i; t_i < b_i + tx_width; t_i++) {
+          sum += t_y_pix[t_i];
+        }
+        t_y_pix += MAX_SB_SIZE;
+      }
+      averages[a++] = sum / num_pel;
     }
-    y_pix += MAX_SB_SIZE;
+    assert(a % stride == 0);
+    y_pix += block_row_stride;
   }
-  cfl->y_average = sum / num_pel;
+
+  cfl->y_averages_stride = stride;
+  assert(a <= MAX_NUM_TXB);
 }
 
 static INLINE double cfl_idx_to_alpha(int alpha_idx, CFL_SIGN_TYPE alpha_sign,
@@ -234,7 +251,12 @@
   const double alpha = cfl_idx_to_alpha(
       mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs[plane - 1], plane - 1);
 
-  const double avg = cfl->y_average;
+  const int avg_row =
+      (row << tx_size_wide_log2[0]) >> tx_size_wide_log2[tx_size];
+  const int avg_col =
+      (col << tx_size_high_log2[0]) >> tx_size_high_log2[tx_size];
+  const double avg =
+      cfl->y_averages[cfl->y_averages_stride * avg_row + avg_col];
 
   cfl_load(cfl, row, col, width, height);
   for (int j = 0; j < height; j++) {
@@ -348,7 +370,7 @@
   // Compute block-level DC_PRED for both chromatic planes.
   // DC_PRED replaces beta in the linear model.
   cfl_dc_pred(xd, plane_bsize);
-  // Compute block-level average on reconstructed luma input.
-  cfl_compute_average(cfl);
+  // Compute transform-level average on reconstructed luma input.
+  cfl_compute_averages(cfl, tx_size);
   cfl->are_parameters_computed = 1;
 }

diff --git a/av1/common/cfl.h b/av1/common/cfl.h
index cbdf969..6fbbc14 100644
--- a/av1/common/cfl.h
+++ b/av1/common/cfl.h

@@ -41,8 +41,10 @@
   // this context
   int uv_height, uv_width;
 
-  // Average of the luma reconstructed values over the entire prediction unit
-  double y_average;
+  // Transform level averages of the luma reconstructed values over the entire
+  // prediction unit
+  double y_averages[MAX_NUM_TXB];
+  int y_averages_stride;
 
   int are_parameters_computed;
 

diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index f988dee..842c78c 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c

@@ -1429,10 +1429,10 @@
 
 #if CONFIG_CFL
 static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride,
-                          const double y_average, const uint8_t *src,
-                          int src_stride, int width, int height,
-                          TX_SIZE tx_size, double dc_pred, double alpha,
-                          int *dist_neg_out) {
+                          const double y_averages[MAX_NUM_TXB],
+                          const uint8_t *src, int src_stride, int width,
+                          int height, TX_SIZE tx_size, double dc_pred,
+                          double alpha, int *dist_neg_out) {
   const double dc_pred_bias = dc_pred + 0.5;
   int dist = 0;
   int diff;
@@ -1459,15 +1459,17 @@
   const int src_block_row_off = src_stride * tx_height;
   const uint8_t *t_y_pix;
   const uint8_t *t_src;
+  int a = 0;
   for (int b_j = 0; b_j < height; b_j += tx_height) {
     const int h = b_j + tx_height;
     for (int b_i = 0; b_i < width; b_i += tx_width) {
       const int w = b_i + tx_width;
+      const double tx_avg = y_averages[a++];
       t_y_pix = y_pix;
       t_src = src;
       for (int t_j = b_j; t_j < h; t_j++) {
         for (int t_i = b_i; t_i < w; t_i++) {
-          const double scaled_luma = alpha * (t_y_pix[t_i] - y_average);
+          const double scaled_luma = alpha * (t_y_pix[t_i] - tx_avg);
           const int uv = t_src[t_i];
 
           // TODO(ltrudeau) add support for HBD.
@@ -1527,7 +1529,7 @@
   const int height = cfl->uv_height;
   const double dc_pred_u = cfl->dc_pred[CFL_PRED_U];
   const double dc_pred_v = cfl->dc_pred[CFL_PRED_V];
-  const double y_average = cfl->y_average;
+  const double *y_averages = cfl->y_averages;
   const uint8_t *y_pix = cfl->y_down_pix;
 
   CFL_SIGN_TYPE *signs = mbmi->cfl_alpha_signs;
@@ -1536,18 +1538,19 @@
 
   int sse[CFL_PRED_PLANES][CFL_MAGS_SIZE];
   sse[CFL_PRED_U][0] =
-      cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_average, src_u, src_stride_u, width,
+      cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages, src_u, src_stride_u, width,
                      height, tx_size, dc_pred_u, 0, NULL);
   sse[CFL_PRED_V][0] =
-      cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_average, src_v, src_stride_v, width,
+      cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages, src_v, src_stride_v, width,
                      height, tx_size, dc_pred_v, 0, NULL);
+
   for (int m = 1; m < CFL_MAGS_SIZE; m += 2) {
     assert(cfl_alpha_mags[m + 1] == -cfl_alpha_mags[m]);
     sse[CFL_PRED_U][m] = cfl_alpha_dist(
-        y_pix, MAX_SB_SIZE, y_average, src_u, src_stride_u, width, height,
+        y_pix, MAX_SB_SIZE, y_averages, src_u, src_stride_u, width, height,
         tx_size, dc_pred_u, cfl_alpha_mags[m], &sse[CFL_PRED_U][m + 1]);
     sse[CFL_PRED_V][m] = cfl_alpha_dist(
-        y_pix, MAX_SB_SIZE, y_average, src_v, src_stride_v, width, height,
+        y_pix, MAX_SB_SIZE, y_averages, src_v, src_stride_v, width, height,
         tx_size, dc_pred_v, cfl_alpha_mags[m], &sse[CFL_PRED_V][m + 1]);
   }
commit	03678940ccd19ed6f9f3d9658785b7d9f08b39d8	[log] [tgz]
author	Luc Trudeau <luc@trud.ca>	Mon Jun 12 17:33:19 2017 -0400
committer	Luc Trudeau <luc@trud.ca>	Thu Jul 06 09:37:38 2017 -0400
tree	2c2c6f32b148bcbedf52e6926d17f835b106a543
parent	5c453db29e1a6645d3bef4e8de0b4ae23e0db4f8 [diff]