[CFL] allow for 4:1 rects if full tx available
Disable CFL sub8x8 validation in this case, as it appears to give
false-negatives for 4:1 blocks. All other tests pass.
The coding gain on subset1 is quite significant.
PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000
-0.1270 | -1.1386 | -1.1426 | -0.1167 | -0.1157 | -0.1264 | -0.4142
Change-Id: Ic20c9b1a5ff28e0fbd4e6491ed2cd2d1f6b487c9
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 0cd4ec4..d63d8dd 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -541,14 +541,14 @@
#endif // CONFIG_DEBUG
int is_chroma_reference;
-#if CONFIG_DEBUG
+#if CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
// Validation buffer is usually 2x2, except for 16x4 and 4x16 in that case it
// is 4x2 and 2x4 respectively. To simplify accessing the buffer we use a
// stride of CFL_SUB8X8_VAL_MI_SIZE resulting in a square of 16.
uint16_t sub8x8_val[CFL_SUB8X8_VAL_MI_SQUARE];
uint16_t store_counter;
uint16_t last_compute_counter;
-#endif // CONFIG_DEBUG
+#endif // CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
} CFL_CTX;
#endif // CONFIG_CFL
diff --git a/av1/common/cfl.c b/av1/common/cfl.c
index d0e69d5..6ab5ad9 100644
--- a/av1/common/cfl.c
+++ b/av1/common/cfl.c
@@ -36,11 +36,11 @@
cfl->use_dc_pred_cache = 0;
cfl->dc_pred_is_cached[CFL_PRED_U] = 0;
cfl->dc_pred_is_cached[CFL_PRED_V] = 0;
-#if CONFIG_DEBUG
+#if CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
cfl_clear_sub8x8_val(cfl);
cfl->store_counter = 0;
cfl->last_compute_counter = 0;
-#endif // CONFIG_DEBUG
+#endif // CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
}
void cfl_store_dc_pred(MACROBLOCKD *const xd, const uint8_t *input,
@@ -205,7 +205,7 @@
// Do not call cfl_compute_parameters multiple time on the same values.
assert(cfl->are_parameters_computed == 0);
-#if CONFIG_DEBUG
+#if CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) {
const uint16_t compute_counter = cfl->sub8x8_val[0];
@@ -225,7 +225,7 @@
}
cfl->last_compute_counter = compute_counter;
}
-#endif // CONFIG_DEBUG
+#endif // CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
cfl_subtract_average(cfl, tx_size);
cfl->are_parameters_computed = 1;
@@ -453,7 +453,7 @@
(*col_out)++;
}
}
-#if CONFIG_DEBUG
+#if CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
// Since the chroma surface of sub8x8 block span across multiple luma blocks,
// this function validates that the reconstructed luma area required to predict
// the chroma block using CfL has been stored during the previous luma encode.
@@ -510,7 +510,7 @@
assert(found);
}
}
-#endif // CONFIG_DEBUG
+#endif // CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size,
BLOCK_SIZE bsize) {
@@ -525,9 +525,9 @@
assert(!((col & 1) && tx_size_wide[tx_size] != 4));
assert(!((row & 1) && tx_size_high[tx_size] != 4));
sub8x8_adjust_offset(cfl, &row, &col);
-#if CONFIG_DEBUG
+#if CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
sub8x8_set_val(cfl, row, col, tx_size);
-#endif // CONFIG_DEBUG
+#endif // CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
}
cfl_store(cfl, dst, pd->dst.stride, row, col, tx_size_wide[tx_size],
tx_size_high[tx_size], get_bitdepth_data_path_index(xd));
@@ -542,14 +542,14 @@
assert(is_cfl_allowed(&xd->mi[0]->mbmi));
if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) {
sub8x8_adjust_offset(cfl, &row, &col);
-#if CONFIG_DEBUG
+#if CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
// Point to the last transform block inside the partition.
const int off_row =
row + (mi_size_high[bsize] - tx_size_high_unit[tx_size]);
const int off_col =
col + (mi_size_wide[bsize] - tx_size_wide_unit[tx_size]);
sub8x8_set_val(cfl, off_row, off_col, tx_size);
-#endif // CONFIG_DEBUG
+#endif // CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
}
const int width = max_intra_block_width(xd, bsize, AOM_PLANE_Y, tx_size);
const int height = max_intra_block_height(xd, bsize, AOM_PLANE_Y, tx_size);
diff --git a/av1/common/cfl.h b/av1/common/cfl.h
index fb0e884..6d5d9cb 100644
--- a/av1/common/cfl.h
+++ b/av1/common/cfl.h
@@ -28,7 +28,12 @@
static INLINE CFL_ALLOWED_TYPE is_cfl_allowed(const MB_MODE_INFO *mbmi) {
const BLOCK_SIZE bsize = mbmi->sb_type;
assert(bsize < BLOCK_SIZES_ALL);
+#if CONFIG_EXT_PARTITION_TYPES && CONFIG_RECT_TX_EXT_INTRA
+ return (CFL_ALLOWED_TYPE)(block_size_wide[bsize] <= 32 &&
+ block_size_high[bsize] <= 32);
+#else
return (CFL_ALLOWED_TYPE)(bsize <= CFL_MAX_BLOCK_SIZE);
+#endif // CONFIG_EXT_PARTITION_TYPES && CONFIG_RECT_TX_EXT_INTRA
}
static INLINE int get_scaled_luma_q0(int alpha_q3, int16_t pred_buf_q3) {
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
index 88201a5..10e0dcb 100644
--- a/av1/common/onyxc_int.h
+++ b/av1/common/onyxc_int.h
@@ -745,11 +745,11 @@
}
#if CONFIG_CFL
-#if CONFIG_DEBUG
+#if CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
static INLINE void cfl_clear_sub8x8_val(CFL_CTX *cfl) {
memset(cfl->sub8x8_val, 0, sizeof(cfl->sub8x8_val));
}
-#endif // CONFIG_DEBUG
+#endif // CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm);
#endif // CONFIG_CFL