Support rectangular tx size in cb4x4 mode

This commit makes ext-tx and rect-tx experiments supported in the
cb4x4 mode. It resolves an enc/dec mismatch issue when all the
transform experiments are enabled.

The coding gains are
        ext-tx + rect-tx   cb4x4    vartx     total
lowres      4.0%           2.3%      0.5%     6.9%

The encoding speed is about the same when cb4x4 and vartx are
further enabled.

BUG=aomedia:139

Change-Id: I3fdabc6d5de23ceb78ac0751a9bf7332ebc0a3ac
diff --git a/av1/common/common_data.h b/av1/common/common_data.h
index a80ca5e..4ea247c 100644
--- a/av1/common/common_data.h
+++ b/av1/common/common_data.h
@@ -950,8 +950,12 @@
       { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
 #if CONFIG_TX64X64
       { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif                                             // CONFIG_TX64X64
+#endif  // CONFIG_TX64X64
+#if CONFIG_CB4X4
+      { { TX_4X8, TX_4X4 }, { TX_2X2, TX_2X2 } },  // used
+#else
       { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },  // used
+#endif
       { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
@@ -977,7 +981,11 @@
       { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
 #endif  // CONFIG_TX64X64
       { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#if CONFIG_CB4X4
+      { { TX_8X4, TX_2X2 }, { TX_4X4, TX_2X2 } },  // used
+#else
       { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },  // used
+#endif
       { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
@@ -1293,6 +1301,7 @@
 // Generates 4 bit field in which each bit set to 1 represents
 // a blocksize partition  1111 means we split 64x64, 32x32, 16x16
 // and 8x8.  1000 means we just split the 64x64 to 32x32
+/* clang-format off */
 static const struct {
   PARTITION_CONTEXT above;
   PARTITION_CONTEXT left;
@@ -1340,6 +1349,7 @@
   { 0, 0 },    // 64X64 - {0b0000, 0b0000}
 #endif  // CONFIG_EXT_PARTITION
 };
+/* clang-format on */
 
 #if CONFIG_SUPERTX
 static const TX_SIZE uvsupertx_size_lookup[TX_SIZES][2][2] = {
diff --git a/av1/common/pred_common.h b/av1/common/pred_common.h
index 75948b6..7825f3c 100644
--- a/av1/common/pred_common.h
+++ b/av1/common/pred_common.h
@@ -184,7 +184,7 @@
   int left_ctx = (has_left && !left_mbmi->skip)
                      ? (int)txsize_sqr_map[left_mbmi->tx_size]
                      : max_tx_size;
-  assert(xd->mi[0]->mbmi.sb_type >= BLOCK_8X8);
+
   if (!has_left) left_ctx = above_ctx;
 
   if (!has_above) above_ctx = left_ctx;
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index 25c7eac..85fd4ca 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -419,8 +419,8 @@
   const TX_MODE tx_mode = cm->tx_mode;
   const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
   if (xd->lossless[xd->mi[0]->mbmi.segment_id]) return TX_4X4;
-#if CONFIG_CB4X4 && CONFIG_VAR_TX
-  if ((bsize > BLOCK_4X4 && is_inter) || bsize >= BLOCK_8X8) {
+#if CONFIG_CB4X4 && (CONFIG_VAR_TX || CONFIG_RECT_TX)
+  if (bsize > BLOCK_4X4) {
 #else
   if (bsize >= BLOCK_8X8) {
 #endif  // CONFIG_CB4X4 && CONFIG_VAR_TX
@@ -1991,8 +1991,7 @@
 
     if (cm->tx_mode == TX_MODE_SELECT &&
 #if CONFIG_CB4X4
-        (bsize >= BLOCK_8X8 ||
-         (bsize >= BLOCK_4X4 && inter_block && !mbmi->skip)) &&
+        bsize > BLOCK_4X4 &&
 #else
         bsize >= BLOCK_8X8 &&
 #endif
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 0d65a15..ca43152 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -427,8 +427,12 @@
                                    aom_writer *w) {
   const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const BLOCK_SIZE bsize = mbmi->sb_type;
-  // For sub8x8 blocks the tx_size symbol does not need to be sent
+// For sub8x8 blocks the tx_size symbol does not need to be sent
+#if CONFIG_CB4X4 && (CONFIG_VAR_TX || CONFIG_RECT_TX)
+  if (bsize > BLOCK_4X4) {
+#else
   if (bsize >= BLOCK_8X8) {
+#endif
     const TX_SIZE tx_size = mbmi->tx_size;
     const int is_inter = is_inter_block(mbmi);
     const int tx_size_ctx = get_tx_size_context(xd);
@@ -1335,8 +1339,8 @@
       aom_write(w, is_inter, av1_get_intra_inter_prob(cm, xd));
 
   if (cm->tx_mode == TX_MODE_SELECT &&
-#if CONFIG_CB4X4 && CONFIG_VAR_TX
-      (bsize >= BLOCK_8X8 || (bsize >= BLOCK_4X4 && is_inter && !skip)) &&
+#if CONFIG_CB4X4 && (CONFIG_VAR_TX || CONFIG_RECT_TX)
+      bsize > BLOCK_4X4 &&
 #else
       bsize >= BLOCK_8X8 &&
 #endif
@@ -1700,7 +1704,12 @@
   write_skip(cm, xd, mbmi->segment_id, mi, w);
 #endif
 
-  if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
+  if (cm->tx_mode == TX_MODE_SELECT &&
+#if CONFIG_CB4X4 && (CONFIG_VAR_TX || CONFIG_RECT_TX)
+      bsize > BLOCK_4X4 &&
+#else
+      bsize >= BLOCK_8X8 &&
+#endif
       !xd->lossless[mbmi->segment_id])
     write_selected_tx_size(cm, xd, w);
 
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index b7d8e84..c6b9f85 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -5683,10 +5683,8 @@
     TX_SIZE tx_size = mbmi->tx_size;
 #endif
     if (cm->tx_mode == TX_MODE_SELECT &&
-#if CONFIG_CB4X4 && CONFIG_VAR_TX
-        (mbmi->sb_type >= BLOCK_8X8 ||
-         (mbmi->sb_type >= BLOCK_4X4 && is_inter &&
-          !(mbmi->skip || seg_skip))) &&
+#if CONFIG_CB4X4 && (CONFIG_VAR_TX || CONFIG_RECT_TX)
+        mbmi->sb_type > BLOCK_4X4 &&
 #else
         mbmi->sb_type >= BLOCK_8X8 &&
 #endif
@@ -5783,7 +5781,7 @@
 #if CONFIG_VAR_TX
   if (cm->tx_mode == TX_MODE_SELECT &&
 #if CONFIG_CB4X4
-      mbmi->sb_type >= BLOCK_4X4 &&
+      mbmi->sb_type > BLOCK_4X4 &&
 #else
       mbmi->sb_type >= BLOCK_8X8 &&
 #endif
@@ -5795,7 +5793,7 @@
     if (is_inter)
       tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode, is_inter);
     else
-      tx_size = (bsize >= BLOCK_8X8) ? tx_size : TX_4X4;
+      tx_size = (bsize > BLOCK_4X4) ? tx_size : TX_4X4;
     mbmi->tx_size = tx_size;
     set_txfm_ctxs(tx_size, xd->n8_w, xd->n8_h, (mbmi->skip || seg_skip), xd);
   }