[CFL] Limit Luma Partition to 32X32

Based on the HW Subgroup call of December 4th 2017, we limit luma partition to
32X32.

Regression on Subset 1
  PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
0.0881 |  1.3504 |  1.2936 |   0.0572 |  0.0182 |  0.0227 |     0.5204

https://two.arewecompressedyet.com/?job=CfL-PartU%402017-12-12T15%3A39%3A36.794Z&job=CfL-Max32x32%402017-12-12T16%3A10%3A09.989Z

Change-Id: I7e3cfd68097c0bc24b1426348b5fd574c4f638a0
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 24be7d9..d5c5426 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -568,6 +568,7 @@
 #define CFL_SUB8X8_VAL_MI_SQUARE \
   (CFL_SUB8X8_VAL_MI_SIZE * CFL_SUB8X8_VAL_MI_SIZE)
 #endif  // CONFIG_DEBUG
+#define CFL_MAX_BLOCK_SIZE (BLOCK_32X32)
 typedef struct cfl_ctx {
   // The CfL prediction buffer is used in two steps:
   //   1. Stores Q3 reconstructed luma pixels
diff --git a/av1/common/cfl.c b/av1/common/cfl.c
index 29a7d0d..c1a9c63 100644
--- a/av1/common/cfl.c
+++ b/av1/common/cfl.c
@@ -189,7 +189,7 @@
                        TX_SIZE tx_size, int plane) {
   CFL_CTX *const cfl = &xd->cfl;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
-  assert(is_cfl_allowed(xd));
+  assert(is_cfl_allowed(mbmi));
 
   if (!cfl->are_parameters_computed) cfl_compute_parameters(xd, tx_size);
 
@@ -466,7 +466,7 @@
   uint8_t *dst =
       &pd->dst.buf[(row * pd->dst.stride + col) << tx_size_wide_log2[0]];
 
-  assert(is_cfl_allowed(xd));
+  assert(is_cfl_allowed(&xd->mi[0]->mbmi));
   if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) {
     // Only dimensions of size 4 can have an odd offset.
     assert(!((col & 1) && tx_size_wide[tx_size] != 4));
@@ -487,7 +487,7 @@
   int col = 0;
   bsize = AOMMAX(BLOCK_4X4, bsize);
 
-  assert(is_cfl_allowed(xd));
+  assert(is_cfl_allowed(&xd->mi[0]->mbmi));
   if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) {
     sub8x8_adjust_offset(cfl, &row, &col);
 #if CONFIG_DEBUG
diff --git a/av1/common/cfl.h b/av1/common/cfl.h
index 92fbbd0..365e420 100644
--- a/av1/common/cfl.h
+++ b/av1/common/cfl.h
@@ -14,12 +14,11 @@
 
 #include "av1/common/blockd.h"
 
-static INLINE int is_cfl_allowed(const MACROBLOCKD *const xd) {
-  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
-  const BLOCK_SIZE plane_bsize = AOMMAX(
-      BLOCK_4X4, get_plane_block_size(mbmi->sb_type, &xd->plane[AOM_PLANE_U]));
-  assert(plane_bsize < BLOCK_SIZES_ALL);
-  return plane_bsize <= BLOCK_32X32;
+static INLINE int is_cfl_allowed(const MB_MODE_INFO *mbmi) {
+  const BLOCK_SIZE bsize = mbmi->sb_type;
+  assert(bsize >= BLOCK_4X4);  // Intra luma partitions can't be < 4X4
+  assert(bsize < BLOCK_SIZES_ALL);
+  return (bsize >= BLOCK_4X4) && (bsize <= CFL_MAX_BLOCK_SIZE);
 }
 
 static INLINE int get_scaled_luma_q0(int alpha_q3, int16_t pred_buf_q3) {
diff --git a/av1/common/reconintra.c b/av1/common/reconintra.c
index 6ce57a8..a4ee580 100644
--- a/av1/common/reconintra.c
+++ b/av1/common/reconintra.c
@@ -2782,7 +2782,7 @@
 #if CONFIG_DEBUG
     assert(blk_col == 0);
     assert(blk_row == 0);
-    assert(is_cfl_allowed(xd));
+    assert(is_cfl_allowed(mbmi));
     const BLOCK_SIZE plane_bsize =
         AOMMAX(BLOCK_4X4, get_plane_block_size(mbmi->sb_type, pd));
     assert(plane_bsize < BLOCK_SIZES_ALL);