[CFL] Compute Average Over TX Block Instead of Pred Block
When computing alpha, multiple averages are computed, one for each
transform block. The CfL prediction now uses the transform block average
instead of partition block average.
This allows the decoder to build the CfL prediction by using only the
collocated reconstructed luma values for the current transform size and
not the entire partition.
Results on Subset 1 (Compared to 0e81b97c with CfL)
PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000
0.0180 | 0.2627 | 0.2274 | 0.0233 | 0.0301 | 0.0312 | 0.1506
A small regression is expected, this change was made to simplify
hardware implementations.
Change-Id: Ib2ce2a3053b85300c5c62ef0e3270af489568a38
diff --git a/av1/common/cfl.c b/av1/common/cfl.c
index 1ffea03..154df73 100644
--- a/av1/common/cfl.c
+++ b/av1/common/cfl.c
@@ -183,24 +183,41 @@
cfl->dc_pred[CFL_PRED_V] = sum_v / num_pel;
}
-static void cfl_compute_average(CFL_CTX *cfl) {
+static void cfl_compute_averages(CFL_CTX *cfl, TX_SIZE tx_size) {
const int width = cfl->uv_width;
const int height = cfl->uv_height;
- const double num_pel = width * height;
+ const int tx_height = tx_size_high[tx_size];
+ const int tx_width = tx_size_wide[tx_size];
+ const int stride = width >> tx_size_wide_log2[tx_size];
+ const int block_row_stride = MAX_SB_SIZE << tx_size_high_log2[tx_size];
+ const double num_pel = tx_width * tx_height;
// TODO(ltrudeau) Convert to uint16 for HBD support
const uint8_t *y_pix = cfl->y_down_pix;
// TODO(ltrudeau) Convert to uint16 for HBD support
+ const uint8_t *t_y_pix;
+ double *averages = cfl->y_averages;
cfl_load(cfl, 0, 0, width, height);
- int sum = 0;
- for (int j = 0; j < height; j++) {
- for (int i = 0; i < width; i++) {
- sum += y_pix[i];
+ int a = 0;
+ for (int b_j = 0; b_j < height; b_j += tx_height) {
+ for (int b_i = 0; b_i < width; b_i += tx_width) {
+ int sum = 0;
+ t_y_pix = y_pix;
+ for (int t_j = 0; t_j < tx_height; t_j++) {
+ for (int t_i = b_i; t_i < b_i + tx_width; t_i++) {
+ sum += t_y_pix[t_i];
+ }
+ t_y_pix += MAX_SB_SIZE;
+ }
+ averages[a++] = sum / num_pel;
}
- y_pix += MAX_SB_SIZE;
+ assert(a % stride == 0);
+ y_pix += block_row_stride;
}
- cfl->y_average = sum / num_pel;
+
+ cfl->y_averages_stride = stride;
+ assert(a <= MAX_NUM_TXB);
}
static INLINE double cfl_idx_to_alpha(int alpha_idx, CFL_SIGN_TYPE alpha_sign,
@@ -234,7 +251,12 @@
const double alpha = cfl_idx_to_alpha(
mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs[plane - 1], plane - 1);
- const double avg = cfl->y_average;
+ const int avg_row =
+ (row << tx_size_wide_log2[0]) >> tx_size_wide_log2[tx_size];
+ const int avg_col =
+ (col << tx_size_high_log2[0]) >> tx_size_high_log2[tx_size];
+ const double avg =
+ cfl->y_averages[cfl->y_averages_stride * avg_row + avg_col];
cfl_load(cfl, row, col, width, height);
for (int j = 0; j < height; j++) {
@@ -348,7 +370,7 @@
// Compute block-level DC_PRED for both chromatic planes.
// DC_PRED replaces beta in the linear model.
cfl_dc_pred(xd, plane_bsize);
- // Compute block-level average on reconstructed luma input.
- cfl_compute_average(cfl);
+ // Compute transform-level average on reconstructed luma input.
+ cfl_compute_averages(cfl, tx_size);
cfl->are_parameters_computed = 1;
}
diff --git a/av1/common/cfl.h b/av1/common/cfl.h
index cbdf969..6fbbc14 100644
--- a/av1/common/cfl.h
+++ b/av1/common/cfl.h
@@ -41,8 +41,10 @@
// this context
int uv_height, uv_width;
- // Average of the luma reconstructed values over the entire prediction unit
- double y_average;
+ // Transform level averages of the luma reconstructed values over the entire
+ // prediction unit
+ double y_averages[MAX_NUM_TXB];
+ int y_averages_stride;
int are_parameters_computed;
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index f988dee..842c78c 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -1429,10 +1429,10 @@
#if CONFIG_CFL
static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride,
- const double y_average, const uint8_t *src,
- int src_stride, int width, int height,
- TX_SIZE tx_size, double dc_pred, double alpha,
- int *dist_neg_out) {
+ const double y_averages[MAX_NUM_TXB],
+ const uint8_t *src, int src_stride, int width,
+ int height, TX_SIZE tx_size, double dc_pred,
+ double alpha, int *dist_neg_out) {
const double dc_pred_bias = dc_pred + 0.5;
int dist = 0;
int diff;
@@ -1459,15 +1459,17 @@
const int src_block_row_off = src_stride * tx_height;
const uint8_t *t_y_pix;
const uint8_t *t_src;
+ int a = 0;
for (int b_j = 0; b_j < height; b_j += tx_height) {
const int h = b_j + tx_height;
for (int b_i = 0; b_i < width; b_i += tx_width) {
const int w = b_i + tx_width;
+ const double tx_avg = y_averages[a++];
t_y_pix = y_pix;
t_src = src;
for (int t_j = b_j; t_j < h; t_j++) {
for (int t_i = b_i; t_i < w; t_i++) {
- const double scaled_luma = alpha * (t_y_pix[t_i] - y_average);
+ const double scaled_luma = alpha * (t_y_pix[t_i] - tx_avg);
const int uv = t_src[t_i];
// TODO(ltrudeau) add support for HBD.
@@ -1527,7 +1529,7 @@
const int height = cfl->uv_height;
const double dc_pred_u = cfl->dc_pred[CFL_PRED_U];
const double dc_pred_v = cfl->dc_pred[CFL_PRED_V];
- const double y_average = cfl->y_average;
+ const double *y_averages = cfl->y_averages;
const uint8_t *y_pix = cfl->y_down_pix;
CFL_SIGN_TYPE *signs = mbmi->cfl_alpha_signs;
@@ -1536,18 +1538,19 @@
int sse[CFL_PRED_PLANES][CFL_MAGS_SIZE];
sse[CFL_PRED_U][0] =
- cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_average, src_u, src_stride_u, width,
+ cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages, src_u, src_stride_u, width,
height, tx_size, dc_pred_u, 0, NULL);
sse[CFL_PRED_V][0] =
- cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_average, src_v, src_stride_v, width,
+ cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages, src_v, src_stride_v, width,
height, tx_size, dc_pred_v, 0, NULL);
+
for (int m = 1; m < CFL_MAGS_SIZE; m += 2) {
assert(cfl_alpha_mags[m + 1] == -cfl_alpha_mags[m]);
sse[CFL_PRED_U][m] = cfl_alpha_dist(
- y_pix, MAX_SB_SIZE, y_average, src_u, src_stride_u, width, height,
+ y_pix, MAX_SB_SIZE, y_averages, src_u, src_stride_u, width, height,
tx_size, dc_pred_u, cfl_alpha_mags[m], &sse[CFL_PRED_U][m + 1]);
sse[CFL_PRED_V][m] = cfl_alpha_dist(
- y_pix, MAX_SB_SIZE, y_average, src_v, src_stride_v, width, height,
+ y_pix, MAX_SB_SIZE, y_averages, src_v, src_stride_v, width, height,
tx_size, dc_pred_v, cfl_alpha_mags[m], &sse[CFL_PRED_V][m + 1]);
}