Reduce memory usage of txk_type[]

Reduce the array size from 1024 to 64.

On a cif video, encoder memory usage drops from 368 to 311 MB;
decoder memory usage drops from 17.6 to 9.1 MB.

Tested 30 frames on lowres, compression stats remain identical.

Change-Id: I3b95bc8d3e57e1074751d81531bb9f79b195506e
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 7579213..49509b7 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -108,7 +108,7 @@
   TX_SIZE inter_tx_size[INTER_TX_SIZE_BUF_LEN];
   uint8_t blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
 #if CONFIG_TXK_SEL
-  TX_TYPE txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
+  TX_TYPE txk_type[TXK_TYPE_BUF_LEN];
 #endif  // CONFIG_TXK_SEL
   RD_STATS rd_stats;
   uint32_t hash_value;
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 2c7b87f..3a3b7a2 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -476,8 +476,7 @@
       reset_tx_size(xd, mbmi, cm->tx_mode);
 #if CONFIG_TXK_SEL
       memset(mbmi->txk_type, DCT_DCT,
-             sizeof(mbmi->txk_type[0]) *
-                 (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)));
+             sizeof(mbmi->txk_type[0]) * TXK_TYPE_BUF_LEN);
 #endif
     }
     // Else for cyclic refresh mode update the segment map, set the segment id
@@ -488,8 +487,7 @@
       reset_tx_size(xd, mbmi, cm->tx_mode);
 #if CONFIG_TXK_SEL
       memset(mbmi->txk_type, DCT_DCT,
-             sizeof(mbmi->txk_type[0]) *
-                 (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)));
+             sizeof(mbmi->txk_type[0]) * TXK_TYPE_BUF_LEN);
 #endif
     }
   }
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index e0fac35..e66b734 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -254,20 +254,17 @@
   }
 
 #if CONFIG_TXK_SEL
-  if (args->cpi->oxcf.aq_mode != NO_AQ && p->eobs[block] == 0 && plane == 0)
-    xd->mi[0]->mbmi.txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col] =
-        DCT_DCT;
-
+  const int txk_type_idx =
+      av1_get_txk_type_index(plane_bsize, blk_row, blk_col);
+  if (args->cpi->oxcf.aq_mode != NO_AQ && p->eobs[block] == 0 && plane == 0) {
+    xd->mi[0]->mbmi.txk_type[txk_type_idx] = DCT_DCT;
+  }
   uint8_t disable_txk_check = args->enable_optimize_b;
-
   if (plane == 0 && p->eobs[block] == 0) {
     if (disable_txk_check) {
-      xd->mi[0]->mbmi.txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col] =
-          DCT_DCT;
+      xd->mi[0]->mbmi.txk_type[txk_type_idx] = DCT_DCT;
     } else {
-      assert(
-          xd->mi[0]->mbmi.txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col] ==
-          DCT_DCT);
+      assert(xd->mi[0]->mbmi.txk_type[txk_type_idx] == DCT_DCT);
     }
   }
 #endif  // CONFIG_TXK_SEL
@@ -537,9 +534,8 @@
     *eob = 0;
     p->txb_entropy_ctx[block] = 0;
     *(args->skip) = 0;
-    assert(xd->mi[0]->mbmi.txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col] ==
-           DCT_DCT);
-
+    assert(xd->mi[0]->mbmi.txk_type[av1_get_txk_type_index(
+               plane_bsize, blk_row, blk_col)] == DCT_DCT);
 #if CONFIG_CFL
     if (plane == AOM_PLANE_Y && xd->cfl.store_y &&
         is_cfl_allowed(&xd->mi[0]->mbmi)) {
@@ -562,9 +558,8 @@
 
 #if CONFIG_TXK_SEL
     if (plane == 0 && p->eobs[block] == 0) {
-      assert(
-          xd->mi[0]->mbmi.txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col] ==
-          DCT_DCT);
+      assert(xd->mi[0]->mbmi.txk_type[av1_get_txk_type_index(
+                 plane_bsize, blk_row, blk_col)] == DCT_DCT);
     }
 #endif  // CONFIG_TXK_SEL
   } else {
@@ -574,9 +569,11 @@
   }
 
 #if CONFIG_TXK_SEL
-  if (args->cpi->oxcf.aq_mode != NO_AQ && !*eob && plane == 0)
-    xd->mi[0]->mbmi.txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col] =
-        DCT_DCT;
+  if (args->cpi->oxcf.aq_mode != NO_AQ && !*eob && plane == 0) {
+    const int txk_type_idx =
+        av1_get_txk_type_index(plane_bsize, blk_row, blk_col);
+    xd->mi[0]->mbmi.txk_type[txk_type_idx] = DCT_DCT;
+  }
 #endif
 
   av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, dst,
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 5b9c392..4ae70ee 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1849,9 +1849,10 @@
 }
 
 #if CONFIG_TXK_SEL
-static void update_txk_array(TX_TYPE *txk_type, int blk_row, int blk_col,
-                             TX_SIZE tx_size, TX_TYPE tx_type) {
-  txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col] = tx_type;
+static void update_txk_array(TX_TYPE *txk_type, BLOCK_SIZE bsize, int blk_row,
+                             int blk_col, TX_SIZE tx_size, TX_TYPE tx_type) {
+  const int txk_type_idx = av1_get_txk_type_index(bsize, blk_row, blk_col);
+  txk_type[txk_type_idx] = tx_type;
 
   const int txw = tx_size_wide_unit[tx_size];
   const int txh = tx_size_high_unit[tx_size];
@@ -1862,10 +1863,13 @@
   if (txw == tx_size_wide_unit[TX_64X64] ||
       txh == tx_size_high_unit[TX_64X64]) {
     const int tx_unit = tx_size_wide_unit[TX_16X16];
-    for (int idy = 0; idy < txh; idy += tx_unit)
-      for (int idx = 0; idx < txw; idx += tx_unit)
-        txk_type[((blk_row + idy) << MAX_MIB_SIZE_LOG2) + (blk_col + idx)] =
-            tx_type;
+    for (int idy = 0; idy < txh; idy += tx_unit) {
+      for (int idx = 0; idx < txw; idx += tx_unit) {
+        const int this_index =
+            av1_get_txk_type_index(bsize, blk_row + idy, blk_col + idx);
+        txk_type[this_index] = tx_type;
+      }
+    }
   }
 }
 
@@ -1919,7 +1923,9 @@
       if (is_inter && x->use_default_inter_tx_type &&
           tx_type != get_default_tx_type(0, xd, tx_size))
         continue;
-      mbmi->txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col] = tx_type;
+      const int txk_type_idx =
+          av1_get_txk_type_index(plane_bsize, blk_row, blk_col);
+      mbmi->txk_type[txk_type_idx] = tx_type;
     }
     const TX_TYPE ref_tx_type =
         av1_get_tx_type(get_plane_type(plane), xd, blk_row, blk_col, tx_size,
@@ -1978,8 +1984,10 @@
 
   if (best_eob == 0) best_tx_type = DCT_DCT;
 
-  if (plane == 0)
-    update_txk_array(mbmi->txk_type, blk_row, blk_col, tx_size, best_tx_type);
+  if (plane == 0) {
+    update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size,
+                     best_tx_type);
+  }
 
   x->plane[plane].txb_entropy_ctx[block] = best_txb_ctx;
   x->plane[plane].eobs[block] = best_eob;
@@ -2592,7 +2600,7 @@
   TX_SIZE best_tx_size = max_rect_tx_size;
   TX_TYPE best_tx_type = DCT_DCT;
 #if CONFIG_TXK_SEL
-  TX_TYPE best_txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
+  TX_TYPE best_txk_type[TXK_TYPE_BUF_LEN];
 #endif  // CONFIG_TXK_SEL
   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
   const int n4 = bsize_to_num_blk(bs);
@@ -2644,8 +2652,7 @@
       if (rd < best_rd) {
 #if CONFIG_TXK_SEL
         memcpy(best_txk_type, mbmi->txk_type,
-               sizeof(best_txk_type[0]) * MAX_SB_SQUARE /
-                   (TX_SIZE_W_MIN * TX_SIZE_H_MIN));
+               sizeof(best_txk_type[0]) * TXK_TYPE_BUF_LEN);
 #endif
         memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * n4);
         best_tx_type = tx_type;
@@ -2671,8 +2678,7 @@
   mbmi->tx_type = best_tx_type;
 #if CONFIG_TXK_SEL
   memcpy(mbmi->txk_type, best_txk_type,
-         sizeof(best_txk_type[0]) * MAX_SB_SQUARE /
-             (TX_SIZE_W_MIN * TX_SIZE_H_MIN));
+         sizeof(best_txk_type[0]) * TXK_TYPE_BUF_LEN);
 #endif
   memcpy(x->blk_skip[0], best_blk_skip, sizeof(best_blk_skip[0]) * n4);
 
@@ -3147,7 +3153,7 @@
   FILTER_INTRA_MODE_INFO filter_intra_mode_info;
   TX_TYPE best_tx_type;
 #if CONFIG_TXK_SEL
-  TX_TYPE best_txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
+  TX_TYPE best_txk_type[TXK_TYPE_BUF_LEN];
 #endif
   (void)ctx;
   av1_zero(filter_intra_mode_info);
@@ -3178,8 +3184,7 @@
       best_tx_type = mbmi->tx_type;
 #if CONFIG_TXK_SEL
       memcpy(best_txk_type, mbmi->txk_type,
-             sizeof(*best_txk_type) *
-                 (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)));
+             sizeof(best_txk_type[0]) * TXK_TYPE_BUF_LEN);
 #endif
       memcpy(ctx->blk_skip[0], x->blk_skip[0],
              sizeof(uint8_t) * ctx->num_4x4_blk);
@@ -3198,8 +3203,7 @@
     mbmi->tx_type = best_tx_type;
 #if CONFIG_TXK_SEL
     memcpy(mbmi->txk_type, best_txk_type,
-           sizeof(*best_txk_type) *
-               (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)));
+           sizeof(best_txk_type[0]) * TXK_TYPE_BUF_LEN);
 #endif
     return 1;
   } else {
@@ -3250,8 +3254,7 @@
   if (this_rd < *best_rd) {
 #if CONFIG_TXK_SEL
     memcpy(best_txk_type, mbmi->txk_type,
-           sizeof(*best_txk_type) *
-               (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)));
+           sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN);
 #endif
     memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * n4);
     *best_rd = this_rd;
@@ -3284,7 +3287,7 @@
   TX_TYPE best_tx_type = mbmi->tx_type;
 #if CONFIG_TXK_SEL
   const int n4 = bsize_to_num_blk(bsize);
-  TX_TYPE best_txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
+  TX_TYPE best_txk_type[TXK_TYPE_BUF_LEN];
 #else
   TX_TYPE *best_txk_type = NULL;
 #endif
@@ -3335,8 +3338,7 @@
   mbmi->tx_type = best_tx_type;
 #if CONFIG_TXK_SEL
   memcpy(mbmi->txk_type, best_txk_type,
-         sizeof(*best_txk_type) *
-             (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)));
+         sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN);
   memcpy(x->blk_skip[0], best_blk_skip, sizeof(best_blk_skip[0]) * n4);
 #endif
   return best_rd;
@@ -3714,6 +3716,8 @@
   const uint16_t cur_joint_ctx =
       (txb_ctx.dc_sign_ctx << 8) + txb_ctx.txb_skip_ctx;
 
+  const int txk_type_idx =
+      av1_get_txk_type_index(plane_bsize, blk_row, blk_col);
   // Look up RD and terminate early in case when we've already processed exactly
   // the same residual with exactly the same entropy context.
   if (rd_info_array != NULL && rd_info_array->valid &&
@@ -3725,8 +3729,7 @@
     p->eobs[block] = rd_info_array->eob;
     p->txb_entropy_ctx[block] = rd_info_array->txb_entropy_ctx;
     if (plane == 0) {
-      x->e_mbd.mi[0]->mbmi.txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col] =
-          rd_info_array->tx_type;
+      x->e_mbd.mi[0]->mbmi.txk_type[txk_type_idx] = rd_info_array->tx_type;
     }
     return;
   }
@@ -3747,9 +3750,7 @@
     rd_info_array->eob = p->eobs[block];
     rd_info_array->txb_entropy_ctx = p->txb_entropy_ctx[block];
     if (plane == 0) {
-      rd_info_array->tx_type =
-          x->e_mbd.mi[0]
-              ->mbmi.txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col];
+      rd_info_array->tx_type = x->e_mbd.mi[0]->mbmi.txk_type[txk_type_idx];
     }
   }
 
@@ -4024,7 +4025,8 @@
       x->blk_skip[plane][blk_row * bw + blk_col] = 1;
       p->eobs[block] = 0;
 #if CONFIG_TXK_SEL
-      update_txk_array(mbmi->txk_type, blk_row, blk_col, tx_size, DCT_DCT);
+      update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size,
+                       DCT_DCT);
 #endif
     } else {
       x->blk_skip[plane][blk_row * bw + blk_col] = 0;
@@ -4037,8 +4039,9 @@
     tmp_eob = p->txb_entropy_ctx[block];
 
 #if CONFIG_TXK_SEL
-    const int txk_idx = (blk_row << MAX_MIB_SIZE_LOG2) + blk_col;
-    best_tx_type = mbmi->txk_type[txk_idx];
+    const int txk_type_idx =
+        av1_get_txk_type_index(plane_bsize, blk_row, blk_col);
+    best_tx_type = mbmi->txk_type[txk_type_idx];
 #endif
   }
 
@@ -4215,7 +4218,8 @@
 
     mbmi->tx_size = tx_size_selected;
 #if CONFIG_TXK_SEL
-    update_txk_array(mbmi->txk_type, blk_row, blk_col, tx_size, best_tx_type);
+    update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size,
+                     best_tx_type);
 #endif
     if (this_rd == INT64_MAX) *is_cost_valid = 0;
     x->blk_skip[plane][blk_row * bw + blk_col] = rd_stats->skip;
@@ -4425,7 +4429,8 @@
       x->plane[plane].eobs[block] = 0;
       x->plane[plane].txb_entropy_ctx[block] = 0;
 #if CONFIG_TXK_SEL
-      update_txk_array(mbmi->txk_type, blk_row, blk_col, tx_size, DCT_DCT);
+      update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size,
+                       DCT_DCT);
 #endif
     } else {
       rd_stats->skip = 0;
@@ -4850,9 +4855,7 @@
   const TX_SIZE tx_size = get_max_rect_tx_size(bsize, is_inter_block(mbmi));
   mbmi->tx_type = DCT_DCT;
 #if CONFIG_TXK_SEL
-  memset(mbmi->txk_type, DCT_DCT,
-         sizeof(mbmi->txk_type[0]) *
-             (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)));
+  memset(mbmi->txk_type, DCT_DCT, sizeof(mbmi->txk_type[0]) * TXK_TYPE_BUF_LEN);
 #endif
   memset(mbmi->inter_tx_size, tx_size, sizeof(mbmi->inter_tx_size));
   mbmi->tx_size = tx_size;
@@ -9798,10 +9801,9 @@
         TX_SIZE best_tx_size = mbmi->tx_size;
         TX_TYPE best_tx_type = mbmi->tx_type;
 #if CONFIG_TXK_SEL
-        TX_TYPE best_txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
+        TX_TYPE best_txk_type[TXK_TYPE_BUF_LEN];
         memcpy(best_txk_type, mbmi->txk_type,
-               sizeof(*best_txk_type) *
-                   (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)));
+               sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN);
 #endif
         FILTER_INTRA_MODE best_fi_mode = FILTER_DC_PRED;
         int64_t best_rd_tmp = INT64_MAX;
@@ -9832,8 +9834,7 @@
             best_tx_type = mbmi->tx_type;
 #if CONFIG_TXK_SEL
             memcpy(best_txk_type, mbmi->txk_type,
-                   sizeof(*best_txk_type) *
-                       (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)));
+                   sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN);
 #endif
             memcpy(best_blk_skip, x->blk_skip[0],
                    sizeof(best_blk_skip[0]) * ctx->num_4x4_blk);
@@ -9851,8 +9852,7 @@
         mbmi->tx_type = best_tx_type;
 #if CONFIG_TXK_SEL
         memcpy(mbmi->txk_type, best_txk_type,
-               sizeof(*best_txk_type) *
-                   (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)));
+               sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN);
 #endif
         memcpy(x->blk_skip[0], best_blk_skip,
                sizeof(x->blk_skip[0][0]) * ctx->num_4x4_blk);