Enable 4:1 -> 2:1 -> 1:1 splits for 4:1 transforms

Also, splits the sub_tx_size_map array into inter and intra
in order to enable the new 4:1 transforms for inter and
intra separately.
Includes refactoring such as removing the intra_tx_size_cat_lookup
array since it is unnecessary, and consolidating the
max_txsize_rect_lookup array for convenience.

Change-Id: I112553bab612dafb973611c87f36a43e1ac4be85
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 1c41637..8ccce76 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -944,10 +944,7 @@
 }
 
 static INLINE TX_SIZE get_max_rect_tx_size(BLOCK_SIZE bsize, int is_inter) {
-  if (is_inter)
-    return max_txsize_rect_lookup[bsize];
-  else
-    return max_txsize_rect_intra_lookup[bsize];
+  return max_txsize_rect_lookup[is_inter][bsize];
 }
 
 static INLINE TX_SIZE tx_size_from_tx_mode(BLOCK_SIZE bsize, TX_MODE tx_mode,
@@ -1102,18 +1099,31 @@
   int depth = 0;
   while (depth < MAX_TX_DEPTH && tx_size != TX_4X4) {
     depth++;
-    tx_size = sub_tx_size_map[tx_size];
+    tx_size = sub_tx_size_map[is_inter][tx_size];
   }
   return depth;
 }
 
+static INLINE int bsize_to_tx_size_cat(BLOCK_SIZE bsize, int is_inter) {
+  TX_SIZE tx_size = get_max_rect_tx_size(bsize, is_inter);
+  assert(tx_size != TX_4X4);
+  int depth = 0;
+  while (tx_size != TX_4X4) {
+    depth++;
+    tx_size = sub_tx_size_map[is_inter][tx_size];
+    assert(depth < 10);
+  }
+  assert(depth <= MAX_TX_CATS);
+  return depth - 1;
+}
+
 static INLINE int tx_size_to_depth(TX_SIZE tx_size, BLOCK_SIZE bsize,
                                    int is_inter) {
   TX_SIZE ctx_size = get_max_rect_tx_size(bsize, is_inter);
   int depth = 0;
   while (tx_size != ctx_size) {
     depth++;
-    ctx_size = sub_tx_size_map[ctx_size];
+    ctx_size = sub_tx_size_map[is_inter][ctx_size];
     assert(depth <= MAX_TX_DEPTH);
   }
   return depth;
@@ -1123,7 +1133,7 @@
                                        int is_inter) {
   TX_SIZE max_tx_size = get_max_rect_tx_size(bsize, is_inter);
   TX_SIZE tx_size = max_tx_size;
-  for (int d = 0; d < depth; ++d) tx_size = sub_tx_size_map[tx_size];
+  for (int d = 0; d < depth; ++d) tx_size = sub_tx_size_map[is_inter][tx_size];
   return tx_size;
 }
 
diff --git a/av1/common/common_data.h b/av1/common/common_data.h
index d9c6764..181f980 100644
--- a/av1/common/common_data.h
+++ b/av1/common/common_data.h
@@ -647,126 +647,128 @@
 #endif  // CONFIG_EXT_PARTITION
 };
 
-static const TX_SIZE max_txsize_rect_intra_lookup[BLOCK_SIZES_ALL] = {
-  // 2X2,    2X4,      4X2,
-  TX_4X4,    TX_4X4,   TX_4X4,
-  //                   4X4
-                       TX_4X4,
-  // 4X8,    8X4,      8X8
-  TX_4X8,    TX_8X4,   TX_8X8,
-  // 8X16,   16X8,     16X16
-  TX_8X16,   TX_16X8,  TX_16X16,
-  // 16X32,  32X16,    32X32
-  TX_16X32,  TX_32X16, TX_32X32,
+static const TX_SIZE max_txsize_rect_lookup[2][BLOCK_SIZES_ALL] = {
+  {
+      // Intra
+      // 2X2,    2X4,      4X2,
+      TX_4X4,    TX_4X4,   TX_4X4,
+      //                   4X4
+      TX_4X4,
+      // 4X8,    8X4,      8X8
+      TX_4X8,    TX_8X4,   TX_8X8,
+      // 8X16,   16X8,     16X16
+      TX_8X16,   TX_16X8,  TX_16X16,
+      // 16X32,  32X16,    32X32
+      TX_16X32,  TX_32X16, TX_32X32,
 #if CONFIG_TX64X64
-  // 32X64,  64X32,
-  TX_32X64,  TX_64X32,
-  // 64X64
-  TX_64X64,
+      // 32X64,  64X32,
+      TX_32X64,  TX_64X32,
+      // 64X64
+      TX_64X64,
 #if CONFIG_EXT_PARTITION
-  // 64x128, 128x64,   128x128
-  TX_64X64,  TX_64X64, TX_64X64,
+      // 64x128, 128x64,   128x128
+      TX_64X64,  TX_64X64, TX_64X64,
 #endif  // CONFIG_EXT_PARTITION
 #else
-  // 32X64,  64X32,
-  TX_32X32,  TX_32X32,
-  // 64X64
-  TX_32X32,
+      // 32X64,  64X32,
+      TX_32X32,  TX_32X32,
+      // 64X64
+      TX_32X32,
 #if CONFIG_EXT_PARTITION
-  // 64x128, 128x64,   128x128
-  TX_32X32,  TX_32X32, TX_32X32,
+      // 64x128, 128x64,   128x128
+      TX_32X32,  TX_32X32, TX_32X32,
 #endif  // CONFIG_EXT_PARTITION
 #endif  // CONFIG_TX64X64
 #if CONFIG_EXT_PARTITION_TYPES && CONFIG_RECT_TX_EXT_INTRA
-  // 4x16,   16x4,
-  TX_4X16,   TX_16X4,
-  // 8x32,   32x8
-  TX_8X32,   TX_32X8,
+      // 4x16,   16x4,
+      TX_4X16,   TX_16X4,
+      // 8x32,   32x8
+      TX_8X32,   TX_32X8,
 #if CONFIG_TX64X64
-  // 16x64,  64x16
-  TX_16X64,  TX_64X16,
+      // 16x64,  64x16
+      TX_16X64,  TX_64X16,
 #else
-  // 16x64,  64x16
-  TX_16X32,  TX_32X16,
+      // 16x64,  64x16
+      TX_16X32,  TX_32X16,
 #endif  // CONFIG_TX64X64
 #else
-  // 4x16,   16x4,
-  TX_4X8,    TX_8X4,
-  // 8x32,   32x8
-  TX_8X16,   TX_16X8,
-  // 16x64,  64x16
-  TX_16X32,  TX_32X16,
+      // 4x16,   16x4,
+      TX_4X8,    TX_8X4,
+      // 8x32,   32x8
+      TX_8X16,   TX_16X8,
+      // 16x64,  64x16
+      TX_16X32,  TX_32X16,
 #endif  // CONFIG_EXT_PARTITION_TYPES && CONFIG_RECT_TX_EXT_INTRA
 #if CONFIG_EXT_PARTITION
 #if CONFIG_TX64X64
-  // 32x128  128x32
-  TX_32X64,  TX_64X32
+      // 32x128  128x32
+      TX_32X64,  TX_64X32
 #else
-  // 32x128  128x32
-  TX_32X32,  TX_32X32
+      // 32x128  128x32
+      TX_32X32,  TX_32X32
 #endif  // CONFIG_TX64X64
 #endif  // CONFIG_EXT_PARTITION
-};
-
-static const TX_SIZE max_txsize_rect_lookup[BLOCK_SIZES_ALL] = {
-  // 2X2,    2X4,      4X2,
-  TX_4X4,    TX_4X4,   TX_4X4,
-  //                   4X4
-                       TX_4X4,
-  // 4X8,    8X4,      8X8
-  TX_4X8,    TX_8X4,   TX_8X8,
-  // 8X16,   16X8,     16X16
-  TX_8X16,   TX_16X8,  TX_16X16,
-  // 16X32,  32X16,    32X32
-  TX_16X32,  TX_32X16, TX_32X32,
+  }, {
+      // Inter
+      // 2X2,    2X4,      4X2,
+      TX_4X4,    TX_4X4,   TX_4X4,
+      //                   4X4
+      TX_4X4,
+      // 4X8,    8X4,      8X8
+      TX_4X8,    TX_8X4,   TX_8X8,
+      // 8X16,   16X8,     16X16
+      TX_8X16,   TX_16X8,  TX_16X16,
+      // 16X32,  32X16,    32X32
+      TX_16X32,  TX_32X16, TX_32X32,
 #if CONFIG_TX64X64
-  // 32X64,  64X32,
-  TX_32X64,  TX_64X32,
-  // 64X64
-  TX_64X64,
+      // 32X64,  64X32,
+      TX_32X64,  TX_64X32,
+      // 64X64
+      TX_64X64,
 #if CONFIG_EXT_PARTITION
-  // 64x128, 128x64,   128x128
-  TX_64X64,  TX_64X64, TX_64X64,
+      // 64x128, 128x64,   128x128
+      TX_64X64,  TX_64X64, TX_64X64,
 #endif  // CONFIG_EXT_PARTITION
 #else
-  // 32X64,  64X32,
-  TX_32X32,  TX_32X32,
-  // 64X64
-  TX_32X32,
+      // 32X64,  64X32,
+      TX_32X32,  TX_32X32,
+      // 64X64
+      TX_32X32,
 #if CONFIG_EXT_PARTITION
-  // 64x128, 128x64,   128x128
-  TX_32X32,  TX_32X32, TX_32X32,
+      // 64x128, 128x64,   128x128
+      TX_32X32,  TX_32X32, TX_32X32,
 #endif  // CONFIG_EXT_PARTITION
 #endif  // CONFIG_TX64X64
 #if CONFIG_EXT_PARTITION_TYPES && CONFIG_RECT_TX_EXT
-  // 4x16,   16x4,     8x32
-  TX_4X16,   TX_16X4,  TX_8X32,
-  // 32x8
-  TX_32X8,
+      // 4x16,   16x4,     8x32
+      TX_4X16,   TX_16X4,  TX_8X32,
+      // 32x8
+      TX_32X8,
 #if CONFIG_TX64X64
-  // 16x64,  64x16
-  TX_16X64,  TX_64X16,
+      // 16x64,  64x16
+      TX_16X64,  TX_64X16,
 #else
-  // 16x64,  64x16
-  TX_16X32,  TX_32X16,
+      // 16x64,  64x16
+      TX_16X32,  TX_32X16,
 #endif  // CONFIG_TX64X64
 #else
-  // 4x16,   16x4,     8x32
-  TX_4X8,    TX_8X4,   TX_8X16,
-  // 32x8
-  TX_16X8,
-  // 16x64,  64x16
-  TX_16X32,  TX_32X16,
+      // 4x16,   16x4,     8x32
+      TX_4X8,    TX_8X4,   TX_8X16,
+      // 32x8
+      TX_16X8,
+      // 16x64,  64x16
+      TX_16X32,  TX_32X16,
 #endif  // CONFIG_EXT_PARTITION_TYPES && CONFIG_RECT_TX_EXT
 #if CONFIG_EXT_PARTITION
 #if CONFIG_TX64X64
-  // 32x128  128x32
-  TX_32X64,  TX_64X32
+      // 32x128  128x32
+      TX_32X64,  TX_64X32
 #else
-  // 32x128  128x32
-  TX_32X32,  TX_32X32
+      // 32x128  128x32
+      TX_32X32,  TX_32X32
 #endif  // CONFIG_TX64X64
 #endif  // CONFIG_EXT_PARTITION
+  },
 };
 
 static const TX_TYPE_1D vtx_tab[TX_TYPES] = {
@@ -781,88 +783,89 @@
   IDTX_1D, DCT_1D,      IDTX_1D,     ADST_1D,     IDTX_1D, FLIPADST_1D,
 };
 
-// Same as "max_txsize_lookup[bsize] - TX_8X8", except for rectangular
-// block which may use a rectangular transform, in which  case it is
-// "(max_txsize_lookup[bsize] + 1) - TX_8X8", invalid for bsize < 8X8
 #define TXSIZE_CAT_INVALID (-1)
 
-static const int32_t intra_tx_size_cat_lookup[BLOCK_SIZES_ALL] = {
-  // 2X2,             2X4,                4X2,
-  TXSIZE_CAT_INVALID, TXSIZE_CAT_INVALID, TXSIZE_CAT_INVALID,
-  //                                      4X4,
-                                          TXSIZE_CAT_INVALID,
-  // 4X8,             8X4,                8X8,
-  TX_8X8 - TX_8X8,    TX_8X8 - TX_8X8,    TX_8X8 - TX_8X8,
-  // 8X16,            16X8,               16X16
-  TX_16X16 - TX_8X8,  TX_16X16 - TX_8X8,  TX_16X16 - TX_8X8,
-  // 16X32,           32X16,              32X32
-  TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8,
-#if CONFIG_TX64X64
-  // 32X64,           64X32,
-  TX_64X64 - TX_8X8,  TX_64X64 - TX_8X8,
-  // 64X64
-  TX_64X64 - TX_8X8,
-#if CONFIG_EXT_PARTITION
-  // 64x128,          128x64,             128x128
-  TX_64X64 - TX_8X8,  TX_64X64 - TX_8X8,  TX_64X64 - TX_8X8,
-#endif  // CONFIG_EXT_PARTITION
-#else
-  // 32X64,           64X32,
-  TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8,
-  // 64X64
-  TX_32X32 - TX_8X8,
-#if CONFIG_EXT_PARTITION
-  // 64x128,          128x64,             128x128
-  TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8,
-#endif  // CONFIG_EXT_PARTITION
-#endif  // CONFIG_TX64X64
-  // TODO(david.barker): Change these if we support rectangular transforms
-  // for 4:1 shaped partitions
-  // 4x16,            16x4,               8x32
-  TX_8X8 - TX_8X8,    TX_8X8 - TX_8X8,    TX_16X16 - TX_8X8,
-  // 32x8,            16x64,              64x16
-  TX_16X16 - TX_8X8,  TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8,
-#if CONFIG_EXT_PARTITION
-#if CONFIG_TX64X64
-  // 32x128,          128x32
-  TX_64X64 - TX_8X8,  TX_64X64 - TX_8X8
-#else
-  // 32x128,          128x32
-  TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8
-#endif  // CONFIG_TX64X64
-#endif  // CONFIG_EXT_PARTITION
-};
-
-#define inter_tx_size_cat_lookup intra_tx_size_cat_lookup
-
 /* clang-format on */
 
-static const TX_SIZE sub_tx_size_map[TX_SIZES_ALL] = {
-  TX_4X4,    // TX_4X4
-  TX_4X4,    // TX_8X8
-  TX_8X8,    // TX_16X16
-  TX_16X16,  // TX_32X32
+static const TX_SIZE sub_tx_size_map[2][TX_SIZES_ALL] = {
+  {
+      // Intra
+      TX_4X4,    // TX_4X4
+      TX_4X4,    // TX_8X8
+      TX_8X8,    // TX_16X16
+      TX_16X16,  // TX_32X32
 #if CONFIG_TX64X64
-  TX_32X32,  // TX_64X64
-#endif       // CONFIG_TX64X64
-  TX_4X4,    // TX_4X8
-  TX_4X4,    // TX_8X4
-  TX_8X8,    // TX_8X16
-  TX_8X8,    // TX_16X8
-  TX_16X16,  // TX_16X32
-  TX_16X16,  // TX_32X16
+      TX_32X32,  // TX_64X64
+#endif           // CONFIG_TX64X64
+      TX_4X4,    // TX_4X8
+      TX_4X4,    // TX_8X4
+      TX_8X8,    // TX_8X16
+      TX_8X8,    // TX_16X8
+      TX_16X16,  // TX_16X32
+      TX_16X16,  // TX_32X16
 #if CONFIG_TX64X64
-  TX_32X32,  // TX_32X64
-  TX_32X32,  // TX_64X32
-#endif       // CONFIG_TX64X64
-  TX_4X4,    // TX_4X16
-  TX_4X4,    // TX_16X4
-  TX_8X8,    // TX_8X32
-  TX_8X8,    // TX_32X8
+      TX_32X32,  // TX_32X64
+      TX_32X32,  // TX_64X32
+#endif           // CONFIG_TX64X64
+#if CONFIG_EXT_PARTITION_TYPES && CONFIG_RECT_TX_EXT_INTRA
+      TX_4X8,   // TX_4X16
+      TX_8X4,   // TX_16X4
+      TX_8X16,  // TX_8X32
+      TX_16X8,  // TX_32X8
 #if CONFIG_TX64X64
-  TX_16X16,  // TX_16X64
-  TX_16X16,  // TX_64X16
-#endif       // CONFIG_TX64X64
+      TX_16X32,  // TX_16X64
+      TX_32X16,  // TX_64X16
+#endif           // CONFIG_TX64X64
+#else
+      TX_4X4,    // TX_4X16
+      TX_4X4,    // TX_16X4
+      TX_8X8,    // TX_8X32
+      TX_8X8,    // TX_32X8
+#if CONFIG_TX64X64
+      TX_16X16,  // TX_16X64
+      TX_16X16,  // TX_64X16
+#endif  // CONFIG_TX64X64
+#endif  // CONFIG_EXT_PARTITION_TYPES && CONFIG_RECT_TX_EXT_INTRA
+  },
+  {
+      // Inter
+      TX_4X4,    // TX_4X4
+      TX_4X4,    // TX_8X8
+      TX_8X8,    // TX_16X16
+      TX_16X16,  // TX_32X32
+#if CONFIG_TX64X64
+      TX_32X32,  // TX_64X64
+#endif           // CONFIG_TX64X64
+      TX_4X4,    // TX_4X8
+      TX_4X4,    // TX_8X4
+      TX_8X8,    // TX_8X16
+      TX_8X8,    // TX_16X8
+      TX_16X16,  // TX_16X32
+      TX_16X16,  // TX_32X16
+#if CONFIG_TX64X64
+      TX_32X32,  // TX_32X64
+      TX_32X32,  // TX_64X32
+#endif           // CONFIG_TX64X64
+#if CONFIG_EXT_PARTITION_TYPES && CONFIG_RECT_TX_EXT
+      TX_4X8,   // TX_4X16
+      TX_8X4,   // TX_16X4
+      TX_8X16,  // TX_8X32
+      TX_16X8,  // TX_32X8
+#if CONFIG_TX64X64
+      TX_16X32,  // TX_16X64
+      TX_32X16,  // TX_64X16
+#endif           // CONFIG_TX64X64
+#else
+      TX_4X4,    // TX_4X16
+      TX_4X4,    // TX_16X4
+      TX_8X8,    // TX_8X32
+      TX_8X8,    // TX_32X8
+#if CONFIG_TX64X64
+      TX_16X16,  // TX_16X64
+      TX_16X16,  // TX_64X16
+#endif  // CONFIG_TX64X64
+#endif  // CONFIG_EXT_PARTITION_TYPES && CONFIG_RECT_TX_EXT
+  },
 };
 
 static const TX_SIZE txsize_horz_map[TX_SIZES_ALL] = {
@@ -1733,7 +1736,7 @@
 #if CONFIG_TX64X64
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
 #endif  // CONFIG_TX64X64
-      { { TX_4X8, TX_4X8 }, { TX_4X4, TX_4X4 } },
+      { { TX_4X8, TX_4X4 }, { TX_4X8, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X8, TX_4X8 }, { TX_4X4, TX_4X4 } },
       { { TX_4X8, TX_4X8 }, { TX_4X4, TX_4X4 } },
@@ -1743,7 +1746,7 @@
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
 #endif  // CONFIG_TX64X64
-      { { TX_4X16, TX_4X8 }, { TX_4X4, TX_4X4 } },
+      { { TX_4X16, TX_4X8 }, { TX_4X16, TX_4X8 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X16, TX_4X8 }, { TX_4X4, TX_4X4 } },
       { { TX_4X8, TX_4X8 }, { TX_4X4, TX_4X4 } },
@@ -1762,7 +1765,7 @@
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
 #endif  // CONFIG_TX64X64
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-      { { TX_8X4, TX_4X4 }, { TX_8X4, TX_4X4 } },
+      { { TX_8X4, TX_8X4 }, { TX_4X4, TX_4X4 } },
       { { TX_8X4, TX_4X4 }, { TX_8X4, TX_4X4 } },
       { { TX_8X4, TX_4X4 }, { TX_8X4, TX_4X4 } },
       { { TX_8X4, TX_4X4 }, { TX_8X4, TX_4X4 } },
@@ -1772,7 +1775,7 @@
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
 #endif  // CONFIG_TX64X64
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-      { { TX_16X4, TX_4X4 }, { TX_8X4, TX_4X4 } },
+      { { TX_16X4, TX_16X4 }, { TX_8X4, TX_8X4 } },
       { { TX_8X4, TX_4X4 }, { TX_8X4, TX_4X4 } },
       { { TX_16X4, TX_4X4 }, { TX_8X4, TX_4X4 } },
 #if CONFIG_TX64X64
@@ -1791,7 +1794,7 @@
 #endif  // CONFIG_TX64X64
       { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
       { { TX_8X4, TX_8X4 }, { TX_4X4, TX_4X4 } },
-      { { TX_8X16, TX_8X16 }, { TX_4X8, TX_4X8 } },
+      { { TX_8X16, TX_8X8 }, { TX_4X16, TX_4X8 } },
       { { TX_8X8, TX_8X8 }, { TX_4X8, TX_4X8 } },
       { { TX_8X16, TX_8X16 }, { TX_4X8, TX_4X8 } },
       { { TX_8X16, TX_8X16 }, { TX_4X8, TX_4X8 } },
@@ -1820,7 +1823,7 @@
       { { TX_4X8, TX_4X4 }, { TX_4X8, TX_4X4 } },
       { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
       { { TX_8X8, TX_8X4 }, { TX_8X8, TX_8X4 } },
-      { { TX_16X8, TX_8X4 }, { TX_16X8, TX_8X4 } },
+      { { TX_16X8, TX_16X4 }, { TX_8X8, TX_8X4 } },
       { { TX_16X8, TX_8X4 }, { TX_16X8, TX_8X4 } },
       { { TX_16X8, TX_8X4 }, { TX_16X8, TX_8X4 } },
 #if CONFIG_TX64X64
@@ -1849,7 +1852,7 @@
       { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
       { { TX_8X16, TX_8X16 }, { TX_8X16, TX_8X16 } },
       { { TX_16X8, TX_16X8 }, { TX_8X8, TX_8X8 } },
-      { { TX_16X32, TX_16X32 }, { TX_8X16, TX_8X16 } },
+      { { TX_16X32, TX_16X16 }, { TX_8X32, TX_8X16 } },
       { { TX_16X16, TX_16X16 }, { TX_8X16, TX_8X16 } },
 #if CONFIG_TX64X64
       { { TX_16X16, TX_16X16 }, { TX_8X8, TX_8X8 } },
@@ -1878,7 +1881,7 @@
       { { TX_8X16, TX_8X8 }, { TX_8X16, TX_8X8 } },
       { { TX_16X8, TX_16X8 }, { TX_16X8, TX_16X8 } },
       { { TX_16X16, TX_16X8 }, { TX_16X16, TX_16X8 } },
-      { { TX_32X16, TX_16X8 }, { TX_32X16, TX_16X8 } },
+      { { TX_32X16, TX_32X8 }, { TX_16X16, TX_16X8 } },
 #if CONFIG_TX64X64
       { { TX_16X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
       { { TX_16X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 785b99c..6e4e2fe 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -285,7 +285,7 @@
         pd->dst.stride, max_scan_line, eob, cm->reduced_tx_set_used);
     *eob_total += eob;
   } else {
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+    const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
     assert(IMPLIES(tx_size <= TX_4X4, sub_txs == tx_size));
     assert(IMPLIES(tx_size > TX_4X4, sub_txs < tx_size));
     const int bsw = tx_size_wide_unit[sub_txs];
@@ -532,7 +532,7 @@
           const int is_split =
               (l_max_tx_size != mbmi->inter_tx_size[0][0] && bsize == bsizec &&
                txsize_to_bsize[l_max_tx_size] == bsizec);
-          if (is_split) max_tx_size = sub_tx_size_map[max_tx_size];
+          if (is_split) max_tx_size = sub_tx_size_map[1][max_tx_size];
         }
 #endif  // DISABLE_VARTX_FOR_CHROMA == 2
         const int bh_var_tx = tx_size_high_unit[max_tx_size];
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index 4d7da60..ebaf3bc 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -448,7 +448,7 @@
   is_split = aom_read_symbol(r, ec_ctx->txfm_partition_cdf[ctx], 2, ACCT_STR);
 
   if (is_split) {
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+    const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
     const int bsw = tx_size_wide_unit[sub_txs];
     const int bsh = tx_size_high_unit[sub_txs];
 
@@ -495,8 +495,7 @@
   // TODO(debargha): Clean up the logic here. This function should only
   // be called for intra.
   const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
-  const int32_t tx_size_cat = is_inter ? inter_tx_size_cat_lookup[bsize]
-                                       : intra_tx_size_cat_lookup[bsize];
+  const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize, is_inter);
   const int max_depths = bsize_to_max_depth(bsize, 0);
   const int ctx = get_tx_size_context(xd);
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
@@ -957,7 +956,7 @@
   const TX_SIZE mtx_size =
       get_max_rect_tx_size(xd->mi[0]->mbmi.sb_type, inter_block);
   const TX_SIZE tx_size =
-      inter_block ? AOMMAX(sub_tx_size_map[mtx_size], mbmi->min_tx_size)
+      inter_block ? AOMMAX(sub_tx_size_map[1][mtx_size], mbmi->min_tx_size)
                   : mbmi->tx_size;
 #endif  // !CONFIG_TXK_SEL
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 1b75dc9..bfdf4be 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -230,7 +230,7 @@
                           xd->left_txfm_context + blk_row, tx_size, tx_size);
     // TODO(yuec): set correct txfm partition update for qttx
   } else {
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+    const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
     const int bsw = tx_size_wide_unit[sub_txs];
     const int bsh = tx_size_high_unit[sub_txs];
 
@@ -262,9 +262,9 @@
   if (block_signals_txsize(bsize)) {
     const TX_SIZE tx_size = mbmi->tx_size;
     const int tx_size_ctx = get_tx_size_context(xd);
-    const int32_t tx_size_cat = intra_tx_size_cat_lookup[bsize];
     const int depth = tx_size_to_depth(tx_size, bsize, 0);
     const int max_depths = bsize_to_max_depth(bsize, 0);
+    const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize, 0);
 
     assert(depth >= 0 && depth <= max_depths);
     assert(!is_inter_block(mbmi));
@@ -536,7 +536,7 @@
     token_stats->cost += tmp_token_stats.cost;
 #endif
   } else {
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+    const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
     const int bsw = tx_size_wide_unit[sub_txs];
     const int bsh = tx_size_high_unit[sub_txs];
 
@@ -592,7 +592,7 @@
     token_stats->cost += tmp_token_stats.cost;
 #endif
   } else {
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+    const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
     const int bsw = tx_size_wide_unit[sub_txs];
     const int bsh = tx_size_high_unit[sub_txs];
 
@@ -1111,7 +1111,7 @@
   const TX_SIZE mtx_size =
       get_max_rect_tx_size(xd->mi[0]->mbmi.sb_type, is_inter);
   const TX_SIZE tx_size =
-      is_inter ? AOMMAX(sub_tx_size_map[mtx_size], mbmi->min_tx_size)
+      is_inter ? AOMMAX(sub_tx_size_map[1][mtx_size], mbmi->min_tx_size)
                : mbmi->tx_size;
 #endif  // !CONFIG_TXK_SEL
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
@@ -1963,7 +1963,7 @@
     const int is_split =
         (l_max_tx_size != mbmi->inter_tx_size[0][0] && bsize == bsizec &&
          txsize_to_bsize[l_max_tx_size] == bsizec);
-    if (is_split) max_tx_size = sub_tx_size_map[max_tx_size];
+    if (is_split) max_tx_size = sub_tx_size_map[1][max_tx_size];
   }
 #endif  // DISABLE_VARTX_FOR_CHROMA == 2
   const int step =
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index e930bf4..df01ed0 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -4424,7 +4424,7 @@
       allow_update_cdf) {
     const TX_SIZE tx_size = mbmi->tx_size;
     const int tx_size_ctx = get_tx_size_context(xd);
-    const int32_t tx_size_cat = intra_tx_size_cat_lookup[bsize];
+    const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize, 0);
     const int depth = tx_size_to_depth(tx_size, bsize, 0);
     const int max_depths = bsize_to_max_depth(bsize, 0);
     update_cdf(fc->tx_size_cdf[tx_size_cat][tx_size_ctx], depth,
@@ -4590,7 +4590,7 @@
     txfm_partition_update(xd->above_txfm_context + blk_col,
                           xd->left_txfm_context + blk_row, tx_size, tx_size);
   } else {
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+    const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
     const int bsw = tx_size_wide_unit[sub_txs];
     const int bsh = tx_size_high_unit[sub_txs];
 
@@ -4659,7 +4659,7 @@
                           xd->left_txfm_context + blk_row, tx_size, tx_size);
 
   } else {
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+    const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
     const int bsl = tx_size_wide_unit[sub_txs];
     int i;
 
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 8e7888f6..c98bfef 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -685,7 +685,7 @@
     encode_block(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg);
   } else {
     assert(tx_size < TX_SIZES_ALL);
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+    const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
     assert(IMPLIES(tx_size <= TX_4X4, sub_txs == tx_size));
     assert(IMPLIES(tx_size > TX_4X4, sub_txs < tx_size));
     // This is the square transform block partition entry point.
@@ -807,7 +807,7 @@
       const int is_split =
           (l_max_tx_size != mbmi->inter_tx_size[0][0] && bsize == bsizec &&
            txsize_to_bsize[l_max_tx_size] == bsizec);
-      if (is_split) max_tx_size = sub_tx_size_map[max_tx_size];
+      if (is_split) max_tx_size = sub_tx_size_map[1][max_tx_size];
     }
 #endif  // DISABLE_VARTX_FOR_CHROMA == 2
 
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 379b284..dae2186 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -2322,8 +2322,7 @@
 
   if (cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(mbmi->sb_type)) {
     const int is_inter = is_inter_block(mbmi);
-    const int32_t tx_size_cat = is_inter ? inter_tx_size_cat_lookup[bsize]
-                                         : intra_tx_size_cat_lookup[bsize];
+    const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize, is_inter);
     const int depth = tx_size_to_depth(tx_size, bsize, is_inter);
     const int tx_size_ctx = get_tx_size_context(xd);
     int r_tx_size = x->tx_size_cost[tx_size_cat][tx_size_ctx][depth];
@@ -2617,7 +2616,8 @@
   }
 
   last_rd = INT64_MAX;
-  for (n = start_tx; depth <= MAX_TX_DEPTH; depth++, n = sub_tx_size_map[n]) {
+  for (n = start_tx; depth <= MAX_TX_DEPTH;
+       depth++, n = sub_tx_size_map[0][n]) {
     TX_TYPE tx_start = DCT_DCT;
     TX_TYPE tx_end = TX_TYPES;
 #if CONFIG_TXK_SEL
@@ -3938,7 +3938,7 @@
   }
 
   if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH && tx_split_prune_flag == 0) {
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+    const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
     const int bsw = tx_size_wide_unit[sub_txs];
     const int bsh = tx_size_high_unit[sub_txs];
     int sub_step = bsw * bsh;
@@ -4360,7 +4360,7 @@
     txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size,
                           tx_size);
   } else {
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+    const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
     const int bsw = tx_size_wide_unit[sub_txs];
     const int bsh = tx_size_high_unit[sub_txs];
     const int step = bsh * bsw;
@@ -4587,7 +4587,7 @@
   int cur_tx_depth = 0;
   uint8_t parent_idx_buf[MAX_SB_SQUARE] = { 0 };
 
-  int cur_tx_size = max_txsize_rect_lookup[bsize];
+  int cur_tx_size = max_txsize_rect_lookup[1][bsize];
   while (cur_tx_depth <= MAX_VARTX_DEPTH) {
     const BLOCK_SIZE cur_tx_bsize = txsize_to_bsize[cur_tx_size];
     const int cur_tx_bw = block_size_wide[cur_tx_bsize];
@@ -4641,7 +4641,7 @@
         ++cur_rd_info_idx;
       }
     }
-    cur_tx_size = sub_tx_size_map[cur_tx_size];
+    cur_tx_size = sub_tx_size_map[1][cur_tx_size];
     ++cur_tx_depth;
   }
   return 1;
@@ -4823,7 +4823,7 @@
   TX_TYPE tx_type, best_tx_type = DCT_DCT;
   const int is_inter = is_inter_block(mbmi);
   TX_SIZE best_tx_size[MAX_MIB_SIZE][MAX_MIB_SIZE];
-  TX_SIZE best_tx = max_txsize_rect_lookup[bsize];
+  TX_SIZE best_tx = max_txsize_rect_lookup[1][bsize];
   TX_SIZE best_min_tx_size = TX_SIZES_ALL;
   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
   TX_TYPE txk_start = DCT_DCT;
@@ -4836,7 +4836,7 @@
   int idx, idy;
   int prune = 0;
   // Get the tx_size 1 level down
-  TX_SIZE min_tx_size = sub_tx_size_map[max_txsize_rect_lookup[bsize]];
+  TX_SIZE min_tx_size = sub_tx_size_map[1][max_txsize_rect_lookup[1][bsize]];
   const TxSetType tx_set_type = get_ext_tx_set_type(
       min_tx_size, bsize, is_inter, cm->reduced_tx_set_used);
   int within_border = mi_row >= xd->tile.mi_row_start &&
@@ -5001,7 +5001,7 @@
                       plane_bsize, ta, tl, rd_stats, fast, NULL);
     av1_set_txb_context(x, plane, block, tx_size, ta, tl);
   } else {
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+    const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
     assert(IMPLIES(tx_size <= TX_4X4, sub_txs == tx_size));
     assert(IMPLIES(tx_size > TX_4X4, sub_txs < tx_size));
     const int bsw = tx_size_wide_unit[sub_txs];
@@ -5062,7 +5062,7 @@
         const int is_split =
             (l_max_tx_size != mbmi->inter_tx_size[0][0] && bsize == bsizec &&
              txsize_to_bsize[l_max_tx_size] == bsizec);
-        if (is_split) max_tx_size = sub_tx_size_map[max_tx_size];
+        if (is_split) max_tx_size = sub_tx_size_map[1][max_tx_size];
       }
 #endif  // DISABLE_VARTX_FOR_CHROMA == 2
       const int bh = tx_size_high_unit[max_tx_size];
@@ -10619,7 +10619,7 @@
       // Set up tx_size related variables for skip-specific loop filtering.
       best_mbmode.tx_size = block_signals_txsize(bsize)
                                 ? tx_size_from_tx_mode(bsize, cm->tx_mode, 1)
-                                : max_txsize_rect_lookup[bsize];
+                                : max_txsize_rect_lookup[1][bsize];
       {
         const int width = block_size_wide[bsize] >> tx_size_wide_log2[0];
         const int height = block_size_high[bsize] >> tx_size_high_log2[0];
diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c
index 7d2f6e5..38e78f3 100644
--- a/av1/encoder/tokenize.c
+++ b/av1/encoder/tokenize.c
@@ -559,7 +559,7 @@
 #endif
   } else {
     // Half the block size in transform block unit.
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+    const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
     const int bsw = tx_size_wide_unit[sub_txs];
     const int bsh = tx_size_high_unit[sub_txs];
     const int step = bsw * bsh;
@@ -640,7 +640,7 @@
       const int is_split =
           (l_max_tx_size != mbmi->inter_tx_size[0][0] && bsize == bsizec &&
            txsize_to_bsize[l_max_tx_size] == bsizec);
-      if (is_split) max_tx_size = sub_tx_size_map[max_tx_size];
+      if (is_split) max_tx_size = sub_tx_size_map[1][max_tx_size];
     }
 #endif  // DISABLE_VARTX_FOR_CHROMA == 2
     const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];