Enable 4:1 -> 2:1 -> 1:1 splits for 4:1 transforms

Also, splits the sub_tx_size_map array into inter and intra
in order to enable the new 4:1 transforms for inter and
intra separately.
Includes refactoring such as removing the intra_tx_size_cat_lookup
array since it is unnecessary, and consolidating the
max_txsize_rect_lookup array for convenience.

Change-Id: I112553bab612dafb973611c87f36a43e1ac4be85
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 1b75dc9..bfdf4be 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -230,7 +230,7 @@
                           xd->left_txfm_context + blk_row, tx_size, tx_size);
     // TODO(yuec): set correct txfm partition update for qttx
   } else {
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+    const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
     const int bsw = tx_size_wide_unit[sub_txs];
     const int bsh = tx_size_high_unit[sub_txs];
 
@@ -262,9 +262,9 @@
   if (block_signals_txsize(bsize)) {
     const TX_SIZE tx_size = mbmi->tx_size;
     const int tx_size_ctx = get_tx_size_context(xd);
-    const int32_t tx_size_cat = intra_tx_size_cat_lookup[bsize];
     const int depth = tx_size_to_depth(tx_size, bsize, 0);
     const int max_depths = bsize_to_max_depth(bsize, 0);
+    const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize, 0);
 
     assert(depth >= 0 && depth <= max_depths);
     assert(!is_inter_block(mbmi));
@@ -536,7 +536,7 @@
     token_stats->cost += tmp_token_stats.cost;
 #endif
   } else {
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+    const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
     const int bsw = tx_size_wide_unit[sub_txs];
     const int bsh = tx_size_high_unit[sub_txs];
 
@@ -592,7 +592,7 @@
     token_stats->cost += tmp_token_stats.cost;
 #endif
   } else {
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+    const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
     const int bsw = tx_size_wide_unit[sub_txs];
     const int bsh = tx_size_high_unit[sub_txs];
 
@@ -1111,7 +1111,7 @@
   const TX_SIZE mtx_size =
       get_max_rect_tx_size(xd->mi[0]->mbmi.sb_type, is_inter);
   const TX_SIZE tx_size =
-      is_inter ? AOMMAX(sub_tx_size_map[mtx_size], mbmi->min_tx_size)
+      is_inter ? AOMMAX(sub_tx_size_map[1][mtx_size], mbmi->min_tx_size)
                : mbmi->tx_size;
 #endif  // !CONFIG_TXK_SEL
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
@@ -1963,7 +1963,7 @@
     const int is_split =
         (l_max_tx_size != mbmi->inter_tx_size[0][0] && bsize == bsizec &&
          txsize_to_bsize[l_max_tx_size] == bsizec);
-    if (is_split) max_tx_size = sub_tx_size_map[max_tx_size];
+    if (is_split) max_tx_size = sub_tx_size_map[1][max_tx_size];
   }
 #endif  // DISABLE_VARTX_FOR_CHROMA == 2
   const int step =
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index e930bf4..df01ed0 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -4424,7 +4424,7 @@
       allow_update_cdf) {
     const TX_SIZE tx_size = mbmi->tx_size;
     const int tx_size_ctx = get_tx_size_context(xd);
-    const int32_t tx_size_cat = intra_tx_size_cat_lookup[bsize];
+    const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize, 0);
     const int depth = tx_size_to_depth(tx_size, bsize, 0);
     const int max_depths = bsize_to_max_depth(bsize, 0);
     update_cdf(fc->tx_size_cdf[tx_size_cat][tx_size_ctx], depth,
@@ -4590,7 +4590,7 @@
     txfm_partition_update(xd->above_txfm_context + blk_col,
                           xd->left_txfm_context + blk_row, tx_size, tx_size);
   } else {
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+    const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
     const int bsw = tx_size_wide_unit[sub_txs];
     const int bsh = tx_size_high_unit[sub_txs];
 
@@ -4659,7 +4659,7 @@
                           xd->left_txfm_context + blk_row, tx_size, tx_size);
 
   } else {
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+    const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
     const int bsl = tx_size_wide_unit[sub_txs];
     int i;
 
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 8e7888f6..c98bfef 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -685,7 +685,7 @@
     encode_block(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg);
   } else {
     assert(tx_size < TX_SIZES_ALL);
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+    const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
     assert(IMPLIES(tx_size <= TX_4X4, sub_txs == tx_size));
     assert(IMPLIES(tx_size > TX_4X4, sub_txs < tx_size));
     // This is the square transform block partition entry point.
@@ -807,7 +807,7 @@
       const int is_split =
           (l_max_tx_size != mbmi->inter_tx_size[0][0] && bsize == bsizec &&
            txsize_to_bsize[l_max_tx_size] == bsizec);
-      if (is_split) max_tx_size = sub_tx_size_map[max_tx_size];
+      if (is_split) max_tx_size = sub_tx_size_map[1][max_tx_size];
     }
 #endif  // DISABLE_VARTX_FOR_CHROMA == 2
 
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 379b284..dae2186 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -2322,8 +2322,7 @@
 
   if (cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(mbmi->sb_type)) {
     const int is_inter = is_inter_block(mbmi);
-    const int32_t tx_size_cat = is_inter ? inter_tx_size_cat_lookup[bsize]
-                                         : intra_tx_size_cat_lookup[bsize];
+    const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize, is_inter);
     const int depth = tx_size_to_depth(tx_size, bsize, is_inter);
     const int tx_size_ctx = get_tx_size_context(xd);
     int r_tx_size = x->tx_size_cost[tx_size_cat][tx_size_ctx][depth];
@@ -2617,7 +2616,8 @@
   }
 
   last_rd = INT64_MAX;
-  for (n = start_tx; depth <= MAX_TX_DEPTH; depth++, n = sub_tx_size_map[n]) {
+  for (n = start_tx; depth <= MAX_TX_DEPTH;
+       depth++, n = sub_tx_size_map[0][n]) {
     TX_TYPE tx_start = DCT_DCT;
     TX_TYPE tx_end = TX_TYPES;
 #if CONFIG_TXK_SEL
@@ -3938,7 +3938,7 @@
   }
 
   if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH && tx_split_prune_flag == 0) {
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+    const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
     const int bsw = tx_size_wide_unit[sub_txs];
     const int bsh = tx_size_high_unit[sub_txs];
     int sub_step = bsw * bsh;
@@ -4360,7 +4360,7 @@
     txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size,
                           tx_size);
   } else {
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+    const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
     const int bsw = tx_size_wide_unit[sub_txs];
     const int bsh = tx_size_high_unit[sub_txs];
     const int step = bsh * bsw;
@@ -4587,7 +4587,7 @@
   int cur_tx_depth = 0;
   uint8_t parent_idx_buf[MAX_SB_SQUARE] = { 0 };
 
-  int cur_tx_size = max_txsize_rect_lookup[bsize];
+  int cur_tx_size = max_txsize_rect_lookup[1][bsize];
   while (cur_tx_depth <= MAX_VARTX_DEPTH) {
     const BLOCK_SIZE cur_tx_bsize = txsize_to_bsize[cur_tx_size];
     const int cur_tx_bw = block_size_wide[cur_tx_bsize];
@@ -4641,7 +4641,7 @@
         ++cur_rd_info_idx;
       }
     }
-    cur_tx_size = sub_tx_size_map[cur_tx_size];
+    cur_tx_size = sub_tx_size_map[1][cur_tx_size];
     ++cur_tx_depth;
   }
   return 1;
@@ -4823,7 +4823,7 @@
   TX_TYPE tx_type, best_tx_type = DCT_DCT;
   const int is_inter = is_inter_block(mbmi);
   TX_SIZE best_tx_size[MAX_MIB_SIZE][MAX_MIB_SIZE];
-  TX_SIZE best_tx = max_txsize_rect_lookup[bsize];
+  TX_SIZE best_tx = max_txsize_rect_lookup[1][bsize];
   TX_SIZE best_min_tx_size = TX_SIZES_ALL;
   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
   TX_TYPE txk_start = DCT_DCT;
@@ -4836,7 +4836,7 @@
   int idx, idy;
   int prune = 0;
   // Get the tx_size 1 level down
-  TX_SIZE min_tx_size = sub_tx_size_map[max_txsize_rect_lookup[bsize]];
+  TX_SIZE min_tx_size = sub_tx_size_map[1][max_txsize_rect_lookup[1][bsize]];
   const TxSetType tx_set_type = get_ext_tx_set_type(
       min_tx_size, bsize, is_inter, cm->reduced_tx_set_used);
   int within_border = mi_row >= xd->tile.mi_row_start &&
@@ -5001,7 +5001,7 @@
                       plane_bsize, ta, tl, rd_stats, fast, NULL);
     av1_set_txb_context(x, plane, block, tx_size, ta, tl);
   } else {
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+    const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
     assert(IMPLIES(tx_size <= TX_4X4, sub_txs == tx_size));
     assert(IMPLIES(tx_size > TX_4X4, sub_txs < tx_size));
     const int bsw = tx_size_wide_unit[sub_txs];
@@ -5062,7 +5062,7 @@
         const int is_split =
             (l_max_tx_size != mbmi->inter_tx_size[0][0] && bsize == bsizec &&
              txsize_to_bsize[l_max_tx_size] == bsizec);
-        if (is_split) max_tx_size = sub_tx_size_map[max_tx_size];
+        if (is_split) max_tx_size = sub_tx_size_map[1][max_tx_size];
       }
 #endif  // DISABLE_VARTX_FOR_CHROMA == 2
       const int bh = tx_size_high_unit[max_tx_size];
@@ -10619,7 +10619,7 @@
       // Set up tx_size related variables for skip-specific loop filtering.
       best_mbmode.tx_size = block_signals_txsize(bsize)
                                 ? tx_size_from_tx_mode(bsize, cm->tx_mode, 1)
-                                : max_txsize_rect_lookup[bsize];
+                                : max_txsize_rect_lookup[1][bsize];
       {
         const int width = block_size_wide[bsize] >> tx_size_wide_log2[0];
         const int height = block_size_high[bsize] >> tx_size_high_log2[0];
diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c
index 7d2f6e5..38e78f3 100644
--- a/av1/encoder/tokenize.c
+++ b/av1/encoder/tokenize.c
@@ -559,7 +559,7 @@
 #endif
   } else {
     // Half the block size in transform block unit.
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+    const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
     const int bsw = tx_size_wide_unit[sub_txs];
     const int bsh = tx_size_high_unit[sub_txs];
     const int step = bsw * bsh;
@@ -640,7 +640,7 @@
       const int is_split =
           (l_max_tx_size != mbmi->inter_tx_size[0][0] && bsize == bsizec &&
            txsize_to_bsize[l_max_tx_size] == bsizec);
-      if (is_split) max_tx_size = sub_tx_size_map[max_tx_size];
+      if (is_split) max_tx_size = sub_tx_size_map[1][max_tx_size];
     }
 #endif  // DISABLE_VARTX_FOR_CHROMA == 2
     const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];