[CFL] Adjust Pixel Buffer for Chroma Sub8x8
Adjust row and col offset for sub8x8 blocks to allow the CfL prediction
to use all available reconstructed luma pixels.
Results on Subset 1 (Compared to b03c2f44 with CfL)
PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000
-0.1355 | -0.8517 | -0.4481 | -0.0579 | -0.0237 | -0.0203 | -0.2765
Change-Id: Ia91f0a078f0ff4f28bb2d272b096f579e0d04dac
diff --git a/av1/common/cfl.c b/av1/common/cfl.c
index 093ceb5..135b308 100644
--- a/av1/common/cfl.c
+++ b/av1/common/cfl.c
@@ -177,6 +177,8 @@
sum_v += height * 129;
}
+ // TODO(ltrudeau) Because of max_block_wide and max_block_high, num_pel will
+ // not be a power of two. So these divisions will have to use a lookup table.
cfl->dc_pred[CFL_PRED_U] = sum_u / num_pel;
cfl->dc_pred[CFL_PRED_V] = sum_v / num_pel;
}
@@ -245,26 +247,48 @@
}
void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row,
- int col, TX_SIZE tx_size) {
+ int col, TX_SIZE tx_size, BLOCK_SIZE bsize) {
const int tx_width = tx_size_wide[tx_size];
const int tx_height = tx_size_high[tx_size];
const int tx_off_log2 = tx_size_wide_log2[0];
- // Store the input into the CfL pixel buffer
- uint8_t *y_pix = &cfl->y_pix[(row * MAX_SB_SIZE + col) << tx_off_log2];
+#if CONFIG_CHROMA_SUB8X8
+ if (bsize < BLOCK_8X8) {
+#if CONFIG_DEBUG
+ // Transform cannot be smaller than
+ assert(tx_width >= 4);
+ assert(tx_height >= 4);
+#endif
- // Check that we remain inside the pixel buffer.
- assert(MAX_SB_SIZE * (row + tx_height - 1) + col + tx_width - 1 <
- MAX_SB_SQUARE);
+ const int bw = block_size_wide[bsize];
+ const int bh = block_size_high[bsize];
- // TODO(ltrudeau) Speedup possible by moving the downsampling to cfl_store
- for (int j = 0; j < tx_height; j++) {
- for (int i = 0; i < tx_width; i++) {
- y_pix[i] = input[i];
+ // For chroma_sub8x8, the CfL prediction for prediction blocks smaller than
+ // 8X8 uses non chroma reference reconstructed luma pixels. To do so, we
+ // combine the 4X4 non chroma reference into the CfL pixel buffers based on
+ // their row and column index.
+
+ // The following code is adapted from the is_chroma_reference() function.
+ if ((cfl->mi_row &
+ 0x01) // Increment the row index for odd indexed 4X4 blocks
+ && (bh == 4) // But not for 4X8 blocks
+ && cfl->subsampling_y) { // And only when chroma is subsampled
+ assert(row == 0);
+ row++;
}
- y_pix += MAX_SB_SIZE;
- input += input_stride;
+
+ if ((cfl->mi_col &
+ 0x01) // Increment the col index for odd indexed 4X4 blocks
+ && (bw == 4) // But not for 8X4 blocks
+ && cfl->subsampling_x) { // And only when chroma is subsampled
+ assert(col == 0);
+ col++;
+ }
}
+#endif
+
+ // Invalidate current parameters
+ cfl->are_parameters_computed = 0;
// Store the surface of the pixel buffer that was written to, this way we
// can manage chroma overrun (e.g. when the chroma surfaces goes beyond the
@@ -277,8 +301,21 @@
cfl->y_height = OD_MAXI((row << tx_off_log2) + tx_height, cfl->y_height);
}
- // Invalidate current parameters
- cfl->are_parameters_computed = 0;
+ // Check that we will remain inside the pixel buffer.
+ assert((row << tx_off_log2) + tx_height <= MAX_SB_SIZE);
+ assert((col << tx_off_log2) + tx_width <= MAX_SB_SIZE);
+
+ // Store the input into the CfL pixel buffer
+ uint8_t *y_pix = &cfl->y_pix[(row * MAX_SB_SIZE + col) << tx_off_log2];
+
+ // TODO(ltrudeau) Speedup possible by moving the downsampling to cfl_store
+ for (int j = 0; j < tx_height; j++) {
+ for (int i = 0; i < tx_width; i++) {
+ y_pix[i] = input[i];
+ }
+ y_pix += MAX_SB_SIZE;
+ input += input_stride;
+ }
}
void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size) {
diff --git a/av1/common/cfl.h b/av1/common/cfl.h
index 7c11c4b..cbdf969 100644
--- a/av1/common/cfl.h
+++ b/av1/common/cfl.h
@@ -54,6 +54,8 @@
// The rate associated with each alpha codeword
int costs[CFL_ALPHABET_SIZE];
+
+ int mi_row, mi_col;
} CFL_CTX;
static const double cfl_alpha_mags[CFL_MAGS_SIZE] = {
@@ -73,7 +75,7 @@
int row, int col, TX_SIZE tx_size, int plane);
void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row,
- int col, TX_SIZE tx_size);
+ int col, TX_SIZE tx_size, BLOCK_SIZE bsize);
void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size);
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index d12eb54..a1c419b 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -741,9 +741,17 @@
#if CONFIG_CFL
if (plane == AOM_PLANE_Y) {
struct macroblockd_plane *const pd = &xd->plane[plane];
+#if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
+ const BLOCK_SIZE plane_bsize =
+ AOMMAX(BLOCK_4X4, get_plane_block_size(mbmi->sb_type, pd));
+#else
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(mbmi->sb_type, pd);
+#endif
uint8_t *dst =
&pd->dst.buf[(row * pd->dst.stride + col) << tx_size_wide_log2[0]];
- cfl_store(xd->cfl, dst, pd->dst.stride, row, col, tx_size);
+ // TODO (ltrudeau) Store sub-8x8 inter blocks when bottom right block is
+ // intra predicted.
+ cfl_store(xd->cfl, dst, pd->dst.stride, row, col, tx_size, plane_bsize);
}
#endif
}
@@ -876,6 +884,10 @@
xd->mi[0]->mbmi.mi_row = mi_row;
xd->mi[0]->mbmi.mi_col = mi_col;
#endif
+#if CONFIG_CFL
+ xd->cfl->mi_row = mi_row;
+ xd->cfl->mi_col = mi_col;
+#endif
for (y = 0; y < y_mis; ++y)
for (x = !y; x < x_mis; ++x) xd->mi[y * cm->mi_stride + x] = xd->mi[0];
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index e3c6036..49497fd 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -325,6 +325,10 @@
set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize);
mbmi = &xd->mi[0]->mbmi;
+#if CONFIG_CFL
+ xd->cfl->mi_row = mi_row;
+ xd->cfl->mi_col = mi_col;
+#endif
// Setup segment ID.
if (seg->enabled) {
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 6df156c..e0f4516 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -1420,7 +1420,9 @@
#endif
#if CONFIG_CFL
if (plane == AOM_PLANE_Y && x->cfl_store_y) {
- cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size);
+ // TODO (ltrudeau) Store sub-8x8 inter blocks when bottom right block is
+ // intra predicted.
+ cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size, plane_bsize);
}
#endif
}
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 0729dbe..75c640d 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1822,7 +1822,9 @@
const int dst_stride = pd->dst.stride;
uint8_t *dst =
&pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
- cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size);
+ // TODO (ltrudeau) Store sub-8x8 inter blocks when bottom right block is
+ // intra predicted.
+ cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size, plane_bsize);
}
#endif
#if CONFIG_DPCM_INTRA
@@ -9107,7 +9109,14 @@
// so we can store reconstructed luma values
RD_STATS this_rd_stats;
+#if CONFIG_CB4X4
+ // Don't store the luma value if no chroma is associated.
+ // Don't worry, we will store this reconstructed luma in the following
+ // encode dry-run the chroma plane will never know.
+ x->cfl_store_y = !x->skip_chroma_rd;
+#else
x->cfl_store_y = 1;
+#endif
txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, AOM_PLANE_Y,
mbmi->sb_type, mbmi->tx_size,