[CFL] Adjust Pixel Buffer for Chroma Sub8x8 Adjust row and col offset for sub8x8 blocks to allow the CfL prediction to use all available reconstructed luma pixels. Results on Subset 1 (Compared to b03c2f44 with CfL) PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000 -0.1355 | -0.8517 | -0.4481 | -0.0579 | -0.0237 | -0.0203 | -0.2765 Change-Id: Ia91f0a078f0ff4f28bb2d272b096f579e0d04dac
diff --git a/av1/common/cfl.c b/av1/common/cfl.c index 093ceb5..135b308 100644 --- a/av1/common/cfl.c +++ b/av1/common/cfl.c
@@ -177,6 +177,8 @@ sum_v += height * 129; } + // TODO(ltrudeau) Because of max_block_wide and max_block_high, num_pel will + // not be a power of two. So these divisions will have to use a lookup table. cfl->dc_pred[CFL_PRED_U] = sum_u / num_pel; cfl->dc_pred[CFL_PRED_V] = sum_v / num_pel; } @@ -245,26 +247,48 @@ } void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row, - int col, TX_SIZE tx_size) { + int col, TX_SIZE tx_size, BLOCK_SIZE bsize) { const int tx_width = tx_size_wide[tx_size]; const int tx_height = tx_size_high[tx_size]; const int tx_off_log2 = tx_size_wide_log2[0]; - // Store the input into the CfL pixel buffer - uint8_t *y_pix = &cfl->y_pix[(row * MAX_SB_SIZE + col) << tx_off_log2]; +#if CONFIG_CHROMA_SUB8X8 + if (bsize < BLOCK_8X8) { +#if CONFIG_DEBUG + // Transform cannot be smaller than + assert(tx_width >= 4); + assert(tx_height >= 4); +#endif - // Check that we remain inside the pixel buffer. - assert(MAX_SB_SIZE * (row + tx_height - 1) + col + tx_width - 1 < - MAX_SB_SQUARE); + const int bw = block_size_wide[bsize]; + const int bh = block_size_high[bsize]; - // TODO(ltrudeau) Speedup possible by moving the downsampling to cfl_store - for (int j = 0; j < tx_height; j++) { - for (int i = 0; i < tx_width; i++) { - y_pix[i] = input[i]; + // For chroma_sub8x8, the CfL prediction for prediction blocks smaller than + // 8X8 uses non chroma reference reconstructed luma pixels. To do so, we + // combine the 4X4 non chroma reference into the CfL pixel buffers based on + // their row and column index. + + // The following code is adapted from the is_chroma_reference() function. + if ((cfl->mi_row & + 0x01) // Increment the row index for odd indexed 4X4 blocks + && (bh == 4) // But not for 4X8 blocks + && cfl->subsampling_y) { // And only when chroma is subsampled + assert(row == 0); + row++; } - y_pix += MAX_SB_SIZE; - input += input_stride; + + if ((cfl->mi_col & + 0x01) // Increment the col index for odd indexed 4X4 blocks + && (bw == 4) // But not for 8X4 blocks + && cfl->subsampling_x) { // And only when chroma is subsampled + assert(col == 0); + col++; + } } +#endif + + // Invalidate current parameters + cfl->are_parameters_computed = 0; // Store the surface of the pixel buffer that was written to, this way we // can manage chroma overrun (e.g. when the chroma surfaces goes beyond the @@ -277,8 +301,21 @@ cfl->y_height = OD_MAXI((row << tx_off_log2) + tx_height, cfl->y_height); } - // Invalidate current parameters - cfl->are_parameters_computed = 0; + // Check that we will remain inside the pixel buffer. + assert((row << tx_off_log2) + tx_height <= MAX_SB_SIZE); + assert((col << tx_off_log2) + tx_width <= MAX_SB_SIZE); + + // Store the input into the CfL pixel buffer + uint8_t *y_pix = &cfl->y_pix[(row * MAX_SB_SIZE + col) << tx_off_log2]; + + // TODO(ltrudeau) Speedup possible by moving the downsampling to cfl_store + for (int j = 0; j < tx_height; j++) { + for (int i = 0; i < tx_width; i++) { + y_pix[i] = input[i]; + } + y_pix += MAX_SB_SIZE; + input += input_stride; + } } void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size) {
diff --git a/av1/common/cfl.h b/av1/common/cfl.h index 7c11c4b..cbdf969 100644 --- a/av1/common/cfl.h +++ b/av1/common/cfl.h
@@ -54,6 +54,8 @@ // The rate associated with each alpha codeword int costs[CFL_ALPHABET_SIZE]; + + int mi_row, mi_col; } CFL_CTX; static const double cfl_alpha_mags[CFL_MAGS_SIZE] = { @@ -73,7 +75,7 @@ int row, int col, TX_SIZE tx_size, int plane); void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row, - int col, TX_SIZE tx_size); + int col, TX_SIZE tx_size, BLOCK_SIZE bsize); void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size);
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c index d12eb54..a1c419b 100644 --- a/av1/decoder/decodeframe.c +++ b/av1/decoder/decodeframe.c
@@ -741,9 +741,17 @@ #if CONFIG_CFL if (plane == AOM_PLANE_Y) { struct macroblockd_plane *const pd = &xd->plane[plane]; +#if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2 + const BLOCK_SIZE plane_bsize = + AOMMAX(BLOCK_4X4, get_plane_block_size(mbmi->sb_type, pd)); +#else + const BLOCK_SIZE plane_bsize = get_plane_block_size(mbmi->sb_type, pd); +#endif uint8_t *dst = &pd->dst.buf[(row * pd->dst.stride + col) << tx_size_wide_log2[0]]; - cfl_store(xd->cfl, dst, pd->dst.stride, row, col, tx_size); + // TODO (ltrudeau) Store sub-8x8 inter blocks when bottom right block is + // intra predicted. + cfl_store(xd->cfl, dst, pd->dst.stride, row, col, tx_size, plane_bsize); } #endif } @@ -876,6 +884,10 @@ xd->mi[0]->mbmi.mi_row = mi_row; xd->mi[0]->mbmi.mi_col = mi_col; #endif +#if CONFIG_CFL + xd->cfl->mi_row = mi_row; + xd->cfl->mi_col = mi_col; +#endif for (y = 0; y < y_mis; ++y) for (x = !y; x < x_mis; ++x) xd->mi[y * cm->mi_stride + x] = xd->mi[0];
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c index e3c6036..49497fd 100644 --- a/av1/encoder/encodeframe.c +++ b/av1/encoder/encodeframe.c
@@ -325,6 +325,10 @@ set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize); mbmi = &xd->mi[0]->mbmi; +#if CONFIG_CFL + xd->cfl->mi_row = mi_row; + xd->cfl->mi_col = mi_col; +#endif // Setup segment ID. if (seg->enabled) {
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c index 6df156c..e0f4516 100644 --- a/av1/encoder/encodemb.c +++ b/av1/encoder/encodemb.c
@@ -1420,7 +1420,9 @@ #endif #if CONFIG_CFL if (plane == AOM_PLANE_Y && x->cfl_store_y) { - cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size); + // TODO (ltrudeau) Store sub-8x8 inter blocks when bottom right block is + // intra predicted. + cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size, plane_bsize); } #endif }
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c index 0729dbe..75c640d 100644 --- a/av1/encoder/rdopt.c +++ b/av1/encoder/rdopt.c
@@ -1822,7 +1822,9 @@ const int dst_stride = pd->dst.stride; uint8_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]]; - cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size); + // TODO (ltrudeau) Store sub-8x8 inter blocks when bottom right block is + // intra predicted. + cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size, plane_bsize); } #endif #if CONFIG_DPCM_INTRA @@ -9107,7 +9109,14 @@ // so we can store reconstructed luma values RD_STATS this_rd_stats; +#if CONFIG_CB4X4 + // Don't store the luma value if no chroma is associated. + // Don't worry, we will store this reconstructed luma in the following + // encode dry-run the chroma plane will never know. + x->cfl_store_y = !x->skip_chroma_rd; +#else x->cfl_store_y = 1; +#endif txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, AOM_PLANE_Y, mbmi->sb_type, mbmi->tx_size,