[CFL] Use MAX_NUM_TXB_SQUARE instead of MAX_NUM_TXB

MAX_NUM_TXB represents the maximum number of transform blocks in 1
dimension. CfL requires the maximum number of transform blocks in 2
dimensions. As such, we now use MAX_NUM_TXB_SQUARE instead of the
erroneous MAX_NUM_TXB for the average buffer.

There's an assert guarding for overrun on the average buffer.
This fix stops assertions on 4:4:4 sequences. This patch does
not alter AWCY results as AWCY only covers 4:2:0 sequences (for
which MAX_NUM_TXB turns out to be sufficient). 

Change-Id: I628db0131f60abc2d06cbbe3fe3dc40e28894ce3
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 23ea3b1..007e039 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -714,7 +714,7 @@
   //   * Max error will be 1/16th.
   // Note: 3 is chosen so that y_averages fits in 15 bits when 12 bit input is
   // used
-  int y_averages_q3[MAX_NUM_TXB];
+  int y_averages_q3[MAX_NUM_TXB_SQUARE];
   int y_averages_stride;
 
   int are_parameters_computed;
diff --git a/av1/common/cfl.c b/av1/common/cfl.c
index 39c36f5..2988200 100644
--- a/av1/common/cfl.c
+++ b/av1/common/cfl.c
@@ -214,6 +214,7 @@
         }
         t_y_pix += MAX_SB_SIZE;
       }
+      assert(a < MAX_NUM_TXB_SQUARE);
       averages_q3[a++] =
           ((sum << 3) + (1 << (num_pel_log2 - 1))) >> num_pel_log2;
 
@@ -226,7 +227,6 @@
   }
 
   cfl->y_averages_stride = stride;
-  assert(a <= MAX_NUM_TXB);
 }
 
 static INLINE int cfl_idx_to_alpha(int alpha_idx, int joint_sign,
diff --git a/av1/common/enums.h b/av1/common/enums.h
index fda6a86..15e7d9c 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -206,6 +206,7 @@
 #define MAX_TX_BLOCKS_IN_MAX_SB (1 << MAX_TX_BLOCKS_IN_MAX_SB_LOG2)
 
 #define MAX_NUM_TXB (1 << (MAX_SB_SIZE_LOG2 - MIN_TX_SIZE_LOG2))
+#define MAX_NUM_TXB_SQUARE (MAX_NUM_TXB * MAX_NUM_TXB)
 
 #if CONFIG_NCOBMC_ADAPT_WEIGHT
 typedef enum ATTRIBUTE_PACKED {