[CFL] Limit Luma Partition to 32X32

Based on the HW Subgroup call of December 4th 2017, we limit luma partition to
32X32.

Regression on Subset 1
  PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
0.0881 |  1.3504 |  1.2936 |   0.0572 |  0.0182 |  0.0227 |     0.5204

https://two.arewecompressedyet.com/?job=CfL-PartU%402017-12-12T15%3A39%3A36.794Z&job=CfL-Max32x32%402017-12-12T16%3A10%3A09.989Z

Change-Id: I7e3cfd68097c0bc24b1426348b5fd574c4f638a0
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 24be7d9..d5c5426 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -568,6 +568,7 @@
 #define CFL_SUB8X8_VAL_MI_SQUARE \
   (CFL_SUB8X8_VAL_MI_SIZE * CFL_SUB8X8_VAL_MI_SIZE)
 #endif  // CONFIG_DEBUG
+#define CFL_MAX_BLOCK_SIZE (BLOCK_32X32)
 typedef struct cfl_ctx {
   // The CfL prediction buffer is used in two steps:
   //   1. Stores Q3 reconstructed luma pixels
diff --git a/av1/common/cfl.c b/av1/common/cfl.c
index 29a7d0d..c1a9c63 100644
--- a/av1/common/cfl.c
+++ b/av1/common/cfl.c
@@ -189,7 +189,7 @@
                        TX_SIZE tx_size, int plane) {
   CFL_CTX *const cfl = &xd->cfl;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
-  assert(is_cfl_allowed(xd));
+  assert(is_cfl_allowed(mbmi));
 
   if (!cfl->are_parameters_computed) cfl_compute_parameters(xd, tx_size);
 
@@ -466,7 +466,7 @@
   uint8_t *dst =
       &pd->dst.buf[(row * pd->dst.stride + col) << tx_size_wide_log2[0]];
 
-  assert(is_cfl_allowed(xd));
+  assert(is_cfl_allowed(&xd->mi[0]->mbmi));
   if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) {
     // Only dimensions of size 4 can have an odd offset.
     assert(!((col & 1) && tx_size_wide[tx_size] != 4));
@@ -487,7 +487,7 @@
   int col = 0;
   bsize = AOMMAX(BLOCK_4X4, bsize);
 
-  assert(is_cfl_allowed(xd));
+  assert(is_cfl_allowed(&xd->mi[0]->mbmi));
   if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) {
     sub8x8_adjust_offset(cfl, &row, &col);
 #if CONFIG_DEBUG
diff --git a/av1/common/cfl.h b/av1/common/cfl.h
index 92fbbd0..365e420 100644
--- a/av1/common/cfl.h
+++ b/av1/common/cfl.h
@@ -14,12 +14,11 @@
 
 #include "av1/common/blockd.h"
 
-static INLINE int is_cfl_allowed(const MACROBLOCKD *const xd) {
-  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
-  const BLOCK_SIZE plane_bsize = AOMMAX(
-      BLOCK_4X4, get_plane_block_size(mbmi->sb_type, &xd->plane[AOM_PLANE_U]));
-  assert(plane_bsize < BLOCK_SIZES_ALL);
-  return plane_bsize <= BLOCK_32X32;
+static INLINE int is_cfl_allowed(const MB_MODE_INFO *mbmi) {
+  const BLOCK_SIZE bsize = mbmi->sb_type;
+  assert(bsize >= BLOCK_4X4);  // Intra luma partitions can't be < 4X4
+  assert(bsize < BLOCK_SIZES_ALL);
+  return (bsize >= BLOCK_4X4) && (bsize <= CFL_MAX_BLOCK_SIZE);
 }
 
 static INLINE int get_scaled_luma_q0(int alpha_q3, int16_t pred_buf_q3) {
diff --git a/av1/common/reconintra.c b/av1/common/reconintra.c
index 6ce57a8..a4ee580 100644
--- a/av1/common/reconintra.c
+++ b/av1/common/reconintra.c
@@ -2782,7 +2782,7 @@
 #if CONFIG_DEBUG
     assert(blk_col == 0);
     assert(blk_row == 0);
-    assert(is_cfl_allowed(xd));
+    assert(is_cfl_allowed(mbmi));
     const BLOCK_SIZE plane_bsize =
         AOMMAX(BLOCK_4X4, get_plane_block_size(mbmi->sb_type, pd));
     assert(plane_bsize < BLOCK_SIZES_ALL);
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 47b3647..4e6ce01 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -213,7 +213,7 @@
     }
   }
 #if CONFIG_CFL
-  if (plane == AOM_PLANE_Y && xd->cfl.store_y && is_cfl_allowed(xd)) {
+  if (plane == AOM_PLANE_Y && xd->cfl.store_y && is_cfl_allowed(mbmi)) {
     cfl_store_tx(xd, row, col, tx_size, mbmi->sb_type);
   }
 #endif  // CONFIG_CFL
@@ -584,7 +584,7 @@
 #if CONFIG_CFL
   if (mbmi->uv_mode != UV_CFL_PRED) {
     if (!cfl->is_chroma_reference && is_inter_block(mbmi) &&
-        is_cfl_allowed(xd)) {
+        is_cfl_allowed(mbmi)) {
       cfl_store_block(xd, mbmi->sb_type, mbmi->tx_size);
     }
   }
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index c1f60f0..0928fc0 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -1200,7 +1200,7 @@
 
 #if CONFIG_CFL
     if (mbmi->uv_mode == UV_CFL_PRED) {
-      if (!is_cfl_allowed(xd)) {
+      if (!is_cfl_allowed(mbmi)) {
         aom_internal_error(
             &cm->error, AOM_CODEC_UNSUP_BITSTREAM,
             "Chroma from Luma (CfL) cannot be signaled for a %dx%d block.",
@@ -1560,7 +1560,7 @@
 
 #if CONFIG_CFL
     if (mbmi->uv_mode == UV_CFL_PRED) {
-      if (!is_cfl_allowed(xd)) {
+      if (!is_cfl_allowed(mbmi)) {
         aom_internal_error(
             &cm->error, AOM_CODEC_UNSUP_BITSTREAM,
             "Chroma from Luma (CfL) cannot be signaled for a %dx%d block.",
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 75abce6..1c04554 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -1424,7 +1424,7 @@
 
 #if CONFIG_CFL
       if (mbmi->uv_mode == UV_CFL_PRED) {
-        if (!is_cfl_allowed(xd)) {
+        if (!is_cfl_allowed(mbmi)) {
           aom_internal_error(
               &cm->error, AOM_CODEC_UNSUP_BITSTREAM,
               "Chroma from Luma (CfL) cannot be signaled for a %dx%d block.",
@@ -1735,7 +1735,7 @@
 
 #if CONFIG_CFL
     if (mbmi->uv_mode == UV_CFL_PRED) {
-      if (!is_cfl_allowed(xd)) {
+      if (!is_cfl_allowed(mbmi)) {
         aom_internal_error(
             &cm->error, AOM_CODEC_UNSUP_BITSTREAM,
             "Chroma from Luma (CfL) cannot be signaled for a %dx%d block.",
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index c0c7f31..f1d516e 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -4827,7 +4827,7 @@
   if (is_inter_block(mbmi) &&
       !is_chroma_reference(mi_row, mi_col, bsize, cfl->subsampling_x,
                            cfl->subsampling_y) &&
-      is_cfl_allowed(xd)) {
+      is_cfl_allowed(mbmi)) {
     cfl_store_block(xd, mbmi->sb_type, mbmi->tx_size);
   }
 #endif  // CONFIG_CFL
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 354dca8..65bc6be 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -972,7 +972,8 @@
   if (*eob) *(args->skip) = 0;
 
 #if CONFIG_CFL
-  if (plane == AOM_PLANE_Y && xd->cfl.store_y && is_cfl_allowed(xd)) {
+  if (plane == AOM_PLANE_Y && xd->cfl.store_y &&
+      is_cfl_allowed(&xd->mi[0]->mbmi)) {
     cfl_store_tx(xd, blk_row, blk_col, tx_size, plane_bsize);
   }
 #endif  // CONFIG_CFL
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 2917271..afa2301 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -2142,7 +2142,7 @@
     return;
   }
 #if CONFIG_CFL
-  if (plane == AOM_PLANE_Y && xd->cfl.store_y && is_cfl_allowed(xd)) {
+  if (plane == AOM_PLANE_Y && xd->cfl.store_y && is_cfl_allowed(mbmi)) {
     assert(!is_inter_block(mbmi) || plane_bsize < BLOCK_8X8);
     cfl_store_tx(xd, blk_row, blk_col, tx_size, plane_bsize);
   }
@@ -5433,7 +5433,7 @@
 
   const BLOCK_SIZE bsize = mbmi->sb_type;
 #if CONFIG_DEBUG
-  assert(is_cfl_allowed(xd));
+  assert(is_cfl_allowed(mbmi));
   const BLOCK_SIZE plane_bsize = AOMMAX(
       BLOCK_4X4, get_plane_block_size(mbmi->sb_type, &xd->plane[AOM_PLANE_U]));
   assert(plane_bsize < BLOCK_SIZES_ALL);
@@ -5553,7 +5553,7 @@
 #if CONFIG_CFL
     int cfl_alpha_rate = 0;
     if (mode == UV_CFL_PRED) {
-      if (!is_cfl_allowed(xd)) continue;
+      if (!is_cfl_allowed(mbmi)) continue;
       assert(!is_directional_mode);
       const TX_SIZE uv_tx_size =
           av1_get_uv_tx_size(mbmi, &xd->plane[AOM_PLANE_U]);
@@ -5586,7 +5586,7 @@
 
 #if CONFIG_CFL
     if (mode == UV_CFL_PRED) {
-      assert(is_cfl_allowed(xd));
+      assert(is_cfl_allowed(mbmi));
       this_rate += cfl_alpha_rate;
 #if CONFIG_DEBUG
       assert(xd->cfl.rate == this_rate);