[CFL] Compute Luma Average Over Partition Unit
Extract the compution of the luma reconstructed average out of cfl_load
and into cfl_compute_average. The reconstructed luma average is stored
in the CFL_CONTEXT to avoid computing it for each transform block and
for each plane.
Results on subset1 (compared to 803bea2 with CfL)
PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000
-0.0474 | -0.1486 | -0.2931 | -0.0358 | -0.0397 | -0.0127 | -0.1162
Change-Id: I9e34af0fe5961ce8dbe70cb80aea2a16221d0d92
diff --git a/av1/common/cfl.c b/av1/common/cfl.c
index 004bbeb..bce44ea 100644
--- a/av1/common/cfl.c
+++ b/av1/common/cfl.c
@@ -28,7 +28,7 @@
// CfL computes its own block-level DC_PRED. This is required to compute both
// alpha_cb and alpha_cr before the prediction are computed.
-void cfl_dc_pred(MACROBLOCKD *xd, BLOCK_SIZE plane_bsize) {
+void cfl_dc_pred(MACROBLOCKD *xd, int width, int height) {
const struct macroblockd_plane *const pd_u = &xd->plane[AOM_PLANE_U];
const struct macroblockd_plane *const pd_v = &xd->plane[AOM_PLANE_V];
@@ -38,12 +38,8 @@
const int dst_u_stride = pd_u->dst.stride;
const int dst_v_stride = pd_v->dst.stride;
- assert(plane_bsize != BLOCK_INVALID);
- const int block_width = block_size_wide[plane_bsize];
- const int block_height = block_size_high[plane_bsize];
-
// Number of pixel on the top and left borders.
- const double num_pel = block_width + block_height;
+ const double num_pel = width + height;
int sum_u = 0;
int sum_v = 0;
@@ -64,13 +60,13 @@
if (xd->up_available && xd->mb_to_right_edge >= 0) {
#endif
// TODO(ltrudeau) replace this with DC_PRED assembly
- for (int i = 0; i < block_width; i++) {
+ for (int i = 0; i < width; i++) {
sum_u += dst_u[-dst_u_stride + i];
sum_v += dst_v[-dst_v_stride + i];
}
} else {
- sum_u = block_width * 127;
- sum_v = block_width * 127;
+ sum_u = width * 127;
+ sum_v = width * 127;
}
#if CONFIG_CHROMA_SUB8X8
@@ -78,27 +74,40 @@
#else
if (xd->left_available && xd->mb_to_bottom_edge >= 0) {
#endif
- for (int i = 0; i < block_height; i++) {
+ for (int i = 0; i < height; i++) {
sum_u += dst_u[i * dst_u_stride - 1];
sum_v += dst_v[i * dst_v_stride - 1];
}
} else {
- sum_u += block_height * 129;
- sum_v += block_height * 129;
+ sum_u += height * 129;
+ sum_v += height * 129;
}
xd->cfl->dc_pred[CFL_PRED_U] = sum_u / num_pel;
xd->cfl->dc_pred[CFL_PRED_V] = sum_v / num_pel;
}
+double cfl_compute_average(uint8_t *y_pix, int y_stride, int width,
+ int height) {
+ int sum = 0;
+ for (int j = 0; j < height; j++) {
+ for (int i = 0; i < width; i++) {
+ sum += y_pix[i];
+ }
+ y_pix += y_stride;
+ }
+ return sum / (double)(width * height);
+}
+
// Predict the current transform block using CfL.
void cfl_predict_block(const CFL_CTX *cfl, uint8_t *dst, int dst_stride,
int row, int col, TX_SIZE tx_size, double dc_pred,
double alpha) {
const int width = tx_size_wide[tx_size];
const int height = tx_size_high[tx_size];
+ const double y_avg = cfl->y_avg;
- const double y_avg = cfl_load(cfl, dst, dst_stride, row, col, width, height);
+ cfl_load(cfl, dst, dst_stride, row, col, width, height);
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
@@ -142,8 +151,8 @@
}
// Load from the CfL pixel buffer into output
-double cfl_load(const CFL_CTX *cfl, uint8_t *output, int output_stride, int row,
- int col, int width, int height) {
+void cfl_load(const CFL_CTX *cfl, uint8_t *output, int output_stride, int row,
+ int col, int width, int height) {
const int sub_x = cfl->subsampling_x;
const int sub_y = cfl->subsampling_y;
const int tx_off_log2 = tx_size_wide_log2[0];
@@ -226,14 +235,4 @@
output_row_offset += output_stride;
}
}
-
- int avg = 0;
- output_row_offset = 0;
- for (int j = 0; j < height; j++) {
- for (int i = 0; i < width; i++) {
- avg += output[output_row_offset + i];
- }
- output_row_offset += output_stride;
- }
- return avg / (double)(width * height);
}
diff --git a/av1/common/cfl.h b/av1/common/cfl.h
index b5b77cc..239647d 100644
--- a/av1/common/cfl.h
+++ b/av1/common/cfl.h
@@ -31,6 +31,9 @@
// Height and width of the luma prediction block currently in the pixel buffer
int y_height, y_width;
+ // Average of the luma reconstructed values over the entire prediction unit
+ double y_avg;
+
// Chroma subsampling
int subsampling_x, subsampling_y;
@@ -59,7 +62,9 @@
void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm);
-void cfl_dc_pred(MACROBLOCKD *xd, BLOCK_SIZE plane_bsize);
+void cfl_dc_pred(MACROBLOCKD *xd, int width, int height);
+
+double cfl_compute_average(uint8_t *y_pix, int y_stride, int height, int width);
static INLINE double cfl_idx_to_alpha(int alpha_idx, CFL_SIGN_TYPE alpha_sign,
CFL_PRED_TYPE pred_type) {
@@ -81,6 +86,6 @@
void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row,
int col, TX_SIZE tx_size);
-double cfl_load(const CFL_CTX *cfl, uint8_t *output, int output_stride, int row,
- int col, int width, int height);
+void cfl_load(const CFL_CTX *cfl, uint8_t *output, int output_stride, int row,
+ int col, int width, int height);
#endif // AV1_COMMON_CFL_H_
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
index 874508b..dabeb42 100644
--- a/av1/common/onyxc_int.h
+++ b/av1/common/onyxc_int.h
@@ -898,6 +898,24 @@
return max_blocks_high >> tx_size_wide_log2[0];
}
+#if CONFIG_CFL
+static INLINE int max_intra_block_width(const MACROBLOCKD *xd,
+ BLOCK_SIZE plane_bsize, int plane,
+ TX_SIZE tx_size) {
+ const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane)
+ << tx_size_wide_log2[0];
+ return ALIGN_POWER_OF_TWO(max_blocks_wide, tx_size_wide_log2[tx_size]);
+}
+
+static INLINE int max_intra_block_height(const MACROBLOCKD *xd,
+ BLOCK_SIZE plane_bsize, int plane,
+ TX_SIZE tx_size) {
+ const int max_blocks_high = max_block_high(xd, plane_bsize, plane)
+ << tx_size_high_log2[0];
+ return ALIGN_POWER_OF_TWO(max_blocks_high, tx_size_high_log2[tx_size]);
+}
+#endif // CONFIG_CFL
+
static INLINE void av1_zero_above_context(AV1_COMMON *const cm,
int mi_col_start, int mi_col_end) {
const int width = mi_col_end - mi_col_start;
diff --git a/av1/common/reconintra.c b/av1/common/reconintra.c
index f336ccb..0d56f9f 100644
--- a/av1/common/reconintra.c
+++ b/av1/common/reconintra.c
@@ -2526,22 +2526,35 @@
mode, dst, dst_stride, dst, dst_stride, blk_col,
blk_row, plane);
#if CONFIG_CFL
+ CFL_CTX *const cfl = xd->cfl;
if (plane != AOM_PLANE_Y && mbmi->uv_mode == DC_PRED) {
if (plane == AOM_PLANE_U && blk_col == 0 && blk_row == 0) {
-// Compute the block-level DC_PRED for both chromatic planes. DC_PRED replaces
-// beta in the linear model.
#if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
const BLOCK_SIZE plane_bsize =
AOMMAX(BLOCK_4X4, get_plane_block_size(mbmi->sb_type, pd));
#else
const BLOCK_SIZE plane_bsize = get_plane_block_size(mbmi->sb_type, pd);
#endif
- cfl_dc_pred(xd, plane_bsize);
+ const int width =
+ max_intra_block_width(xd, plane_bsize, AOM_PLANE_U, tx_size);
+ const int height =
+ max_intra_block_height(xd, plane_bsize, AOM_PLANE_U, tx_size);
+
+ // Temporary pixel buffer used to store the CfL prediction when we compute
+ // the average over the reconstructed and downsampled luma pixels
+ // TODO(ltrudeau) Convert to uint16 when adding HBD support
+ uint8_t tmp_pix[MAX_SB_SQUARE];
+
+ // Compute the block-level DC_PRED for both chromatic planes. DC_PRED
+ // replaces beta in the linear model.
+ cfl_dc_pred(xd, width, height);
+ cfl_load(cfl, tmp_pix, MAX_SB_SIZE, 0, 0, width, height);
+ cfl->y_avg = cfl_compute_average(tmp_pix, MAX_SB_SIZE, width, height);
}
cfl_predict_block(
- xd->cfl, dst, pd->dst.stride, blk_row, blk_col, tx_size,
- xd->cfl->dc_pred[plane - 1],
+ cfl, dst, pd->dst.stride, blk_row, blk_col, tx_size,
+ cfl->dc_pred[plane - 1],
cfl_idx_to_alpha(mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs[plane - 1],
plane - 1));
}
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 2798959..8c9168c 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -1537,7 +1537,8 @@
}
static int cfl_compute_alpha_ind(MACROBLOCK *const x, const CFL_CTX *const cfl,
- BLOCK_SIZE bsize,
+ int width, int height,
+ uint8_t y_pix[MAX_SB_SQUARE],
CFL_SIGN_TYPE signs_out[CFL_SIGNS]) {
const struct macroblock_plane *const p_u = &x->plane[AOM_PLANE_U];
const struct macroblock_plane *const p_v = &x->plane[AOM_PLANE_V];
@@ -1545,33 +1546,25 @@
const uint8_t *const src_v = p_v->src.buf;
const int src_stride_u = p_u->src.stride;
const int src_stride_v = p_v->src.stride;
- const int block_width = block_size_wide[bsize];
- const int block_height = block_size_high[bsize];
const double dc_pred_u = cfl->dc_pred[CFL_PRED_U];
const double dc_pred_v = cfl->dc_pred[CFL_PRED_V];
-
- // Temporary pixel buffer used to store the CfL prediction when we compute the
- // alpha index.
- uint8_t tmp_pix[MAX_SB_SQUARE];
- // Load CfL Prediction over the entire block
- const double y_avg =
- cfl_load(cfl, tmp_pix, MAX_SB_SIZE, 0, 0, block_width, block_height);
+ const double y_avg = cfl->y_avg;
int sse[CFL_PRED_PLANES][CFL_MAGS_SIZE];
sse[CFL_PRED_U][0] =
- cfl_alpha_dist(tmp_pix, MAX_SB_SIZE, y_avg, src_u, src_stride_u,
- block_width, block_height, dc_pred_u, 0, NULL);
+ cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_avg, src_u, src_stride_u, width,
+ height, dc_pred_u, 0, NULL);
sse[CFL_PRED_V][0] =
- cfl_alpha_dist(tmp_pix, MAX_SB_SIZE, y_avg, src_v, src_stride_v,
- block_width, block_height, dc_pred_v, 0, NULL);
+ cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_avg, src_v, src_stride_v, width,
+ height, dc_pred_v, 0, NULL);
for (int m = 1; m < CFL_MAGS_SIZE; m += 2) {
assert(cfl_alpha_mags[m + 1] == -cfl_alpha_mags[m]);
sse[CFL_PRED_U][m] = cfl_alpha_dist(
- tmp_pix, MAX_SB_SIZE, y_avg, src_u, src_stride_u, block_width,
- block_height, dc_pred_u, cfl_alpha_mags[m], &sse[CFL_PRED_U][m + 1]);
+ y_pix, MAX_SB_SIZE, y_avg, src_u, src_stride_u, width, height,
+ dc_pred_u, cfl_alpha_mags[m], &sse[CFL_PRED_U][m + 1]);
sse[CFL_PRED_V][m] = cfl_alpha_dist(
- tmp_pix, MAX_SB_SIZE, y_avg, src_v, src_stride_v, block_width,
- block_height, dc_pred_v, cfl_alpha_mags[m], &sse[CFL_PRED_V][m + 1]);
+ y_pix, MAX_SB_SIZE, y_avg, src_v, src_stride_v, width, height,
+ dc_pred_v, cfl_alpha_mags[m], &sse[CFL_PRED_V][m + 1]);
}
int dist;
@@ -1637,11 +1630,21 @@
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
if (plane != AOM_PLANE_Y && mbmi->uv_mode == DC_PRED) {
if (blk_col == 0 && blk_row == 0 && plane == AOM_PLANE_U) {
+ const int width =
+ max_intra_block_width(xd, plane_bsize, AOM_PLANE_U, tx_size);
+ const int height =
+ max_intra_block_height(xd, plane_bsize, AOM_PLANE_U, tx_size);
+
+ uint8_t tmp_pix[MAX_SB_SQUARE];
CFL_CTX *const cfl = xd->cfl;
+
cfl_update_costs(cfl, ec_ctx);
- cfl_dc_pred(xd, plane_bsize);
- mbmi->cfl_alpha_idx =
- cfl_compute_alpha_ind(x, cfl, plane_bsize, mbmi->cfl_alpha_signs);
+ cfl_dc_pred(xd, width, height);
+ // Load CfL Prediction over the entire block
+ cfl_load(cfl, tmp_pix, MAX_SB_SIZE, 0, 0, width, height);
+ cfl->y_avg = cfl_compute_average(tmp_pix, MAX_SB_SIZE, width, height);
+ mbmi->cfl_alpha_idx = cfl_compute_alpha_ind(
+ x, cfl, width, height, tmp_pix, mbmi->cfl_alpha_signs);
}
}
#if CONFIG_DEBUG