[CFL] allow for 4:1 rects if full tx available Disable CFL sub8x8 validation in this case, as it appears to give false-negatives for 4:1 blocks. All other tests pass. The coding gain on subset1 is quite significant. PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000 -0.1270 | -1.1386 | -1.1426 | -0.1167 | -0.1157 | -0.1264 | -0.4142 Change-Id: Ic20c9b1a5ff28e0fbd4e6491ed2cd2d1f6b487c9

commit: d27f1e61db2c97eb89f101eece337eb3efb0af24 [log] [tgz]
author: David Michael Barr <b@rr-dav.id.au> Thu Jan 11 23:03:30 2018 +0900
committer: David Michael Barr <b@rr-dav.id.au> Sun Jan 28 05:55:08 2018 +0000
tree: bc3c7bbc9eadae96653b0f2814e396c197de44ac
parent: 92245c8738f3a8c40fa2078a563c8bd947c70c36 [diff]
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 0cd4ec4..d63d8dd 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h

@@ -541,14 +541,14 @@
 #endif  // CONFIG_DEBUG
 
   int is_chroma_reference;
-#if CONFIG_DEBUG
+#if CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
   // Validation buffer is usually 2x2, except for 16x4 and 4x16 in that case it
   // is 4x2 and 2x4 respectively. To simplify accessing the buffer we use a
   // stride of CFL_SUB8X8_VAL_MI_SIZE resulting in a square of 16.
   uint16_t sub8x8_val[CFL_SUB8X8_VAL_MI_SQUARE];
   uint16_t store_counter;
   uint16_t last_compute_counter;
-#endif  // CONFIG_DEBUG
+#endif  // CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
 } CFL_CTX;
 #endif  // CONFIG_CFL
 

diff --git a/av1/common/cfl.c b/av1/common/cfl.c
index d0e69d5..6ab5ad9 100644
--- a/av1/common/cfl.c
+++ b/av1/common/cfl.c

@@ -36,11 +36,11 @@
   cfl->use_dc_pred_cache = 0;
   cfl->dc_pred_is_cached[CFL_PRED_U] = 0;
   cfl->dc_pred_is_cached[CFL_PRED_V] = 0;
-#if CONFIG_DEBUG
+#if CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
   cfl_clear_sub8x8_val(cfl);
   cfl->store_counter = 0;
   cfl->last_compute_counter = 0;
-#endif  // CONFIG_DEBUG
+#endif  // CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
 }
 
 void cfl_store_dc_pred(MACROBLOCKD *const xd, const uint8_t *input,
@@ -205,7 +205,7 @@
   // Do not call cfl_compute_parameters multiple time on the same values.
   assert(cfl->are_parameters_computed == 0);
 
-#if CONFIG_DEBUG
+#if CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
   BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
   if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) {
     const uint16_t compute_counter = cfl->sub8x8_val[0];
@@ -225,7 +225,7 @@
     }
     cfl->last_compute_counter = compute_counter;
   }
-#endif  // CONFIG_DEBUG
+#endif  // CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
 
   cfl_subtract_average(cfl, tx_size);
   cfl->are_parameters_computed = 1;
@@ -453,7 +453,7 @@
     (*col_out)++;
   }
 }
-#if CONFIG_DEBUG
+#if CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
 // Since the chroma surface of sub8x8 block span across multiple luma blocks,
 // this function validates that the reconstructed luma area required to predict
 // the chroma block using CfL has been stored during the previous luma encode.
@@ -510,7 +510,7 @@
     assert(found);
   }
 }
-#endif  // CONFIG_DEBUG
+#endif  // CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
 
 void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size,
                   BLOCK_SIZE bsize) {
@@ -525,9 +525,9 @@
     assert(!((col & 1) && tx_size_wide[tx_size] != 4));
     assert(!((row & 1) && tx_size_high[tx_size] != 4));
     sub8x8_adjust_offset(cfl, &row, &col);
-#if CONFIG_DEBUG
+#if CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
     sub8x8_set_val(cfl, row, col, tx_size);
-#endif  // CONFIG_DEBUG
+#endif  // CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
   }
   cfl_store(cfl, dst, pd->dst.stride, row, col, tx_size_wide[tx_size],
             tx_size_high[tx_size], get_bitdepth_data_path_index(xd));
@@ -542,14 +542,14 @@
   assert(is_cfl_allowed(&xd->mi[0]->mbmi));
   if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) {
     sub8x8_adjust_offset(cfl, &row, &col);
-#if CONFIG_DEBUG
+#if CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
     // Point to the last transform block inside the partition.
     const int off_row =
         row + (mi_size_high[bsize] - tx_size_high_unit[tx_size]);
     const int off_col =
         col + (mi_size_wide[bsize] - tx_size_wide_unit[tx_size]);
     sub8x8_set_val(cfl, off_row, off_col, tx_size);
-#endif  // CONFIG_DEBUG
+#endif  // CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
   }
   const int width = max_intra_block_width(xd, bsize, AOM_PLANE_Y, tx_size);
   const int height = max_intra_block_height(xd, bsize, AOM_PLANE_Y, tx_size);

diff --git a/av1/common/cfl.h b/av1/common/cfl.h
index fb0e884..6d5d9cb 100644
--- a/av1/common/cfl.h
+++ b/av1/common/cfl.h

@@ -28,7 +28,12 @@
 static INLINE CFL_ALLOWED_TYPE is_cfl_allowed(const MB_MODE_INFO *mbmi) {
   const BLOCK_SIZE bsize = mbmi->sb_type;
   assert(bsize < BLOCK_SIZES_ALL);
+#if CONFIG_EXT_PARTITION_TYPES && CONFIG_RECT_TX_EXT_INTRA
+  return (CFL_ALLOWED_TYPE)(block_size_wide[bsize] <= 32 &&
+                            block_size_high[bsize] <= 32);
+#else
   return (CFL_ALLOWED_TYPE)(bsize <= CFL_MAX_BLOCK_SIZE);
+#endif  // CONFIG_EXT_PARTITION_TYPES && CONFIG_RECT_TX_EXT_INTRA
 }
 
 static INLINE int get_scaled_luma_q0(int alpha_q3, int16_t pred_buf_q3) {

diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
index 88201a5..10e0dcb 100644
--- a/av1/common/onyxc_int.h
+++ b/av1/common/onyxc_int.h

@@ -745,11 +745,11 @@
 }
 
 #if CONFIG_CFL
-#if CONFIG_DEBUG
+#if CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
 static INLINE void cfl_clear_sub8x8_val(CFL_CTX *cfl) {
   memset(cfl->sub8x8_val, 0, sizeof(cfl->sub8x8_val));
 }
-#endif  // CONFIG_DEBUG
+#endif  // CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
 void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm);
 #endif  // CONFIG_CFL
commit	d27f1e61db2c97eb89f101eece337eb3efb0af24	[log] [tgz]
author	David Michael Barr <b@rr-dav.id.au>	Thu Jan 11 23:03:30 2018 +0900
committer	David Michael Barr <b@rr-dav.id.au>	Sun Jan 28 05:55:08 2018 +0000
tree	bc3c7bbc9eadae96653b0f2814e396c197de44ac
parent	92245c8738f3a8c40fa2078a563c8bd947c70c36 [diff]