Remove CB_COEFF_BUFFER from MB_MODE_INFO_EXT

This helps reduce encoder memory consumption.

Change-Id: Ie1aacd0688b3f1a878feb08cb99bfd508f239229
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 1db2822..a68d53a 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -367,16 +367,16 @@
                                                          blk_col)];
 
   if (tx_size == plane_tx_size || plane) {
+    const CB_COEFF_BUFFER *cb_coef_buff = x->cb_coef_buff;
     const int txb_offset =
         x->mbmi_ext->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN);
-    tran_low_t *tcoeff_txb =
-        x->mbmi_ext->cb_coef_buff->tcoeff[plane] + x->mbmi_ext->cb_offset;
-    uint16_t *eob_txb = x->mbmi_ext->cb_coef_buff->eobs[plane] + txb_offset;
-    uint8_t *txb_skip_ctx_txb =
-        x->mbmi_ext->cb_coef_buff->txb_skip_ctx[plane] + txb_offset;
-    int *dc_sign_ctx_txb =
-        x->mbmi_ext->cb_coef_buff->dc_sign_ctx[plane] + txb_offset;
-    tran_low_t *tcoeff = BLOCK_OFFSET(tcoeff_txb, block);
+    const tran_low_t *tcoeff_txb =
+        cb_coef_buff->tcoeff[plane] + x->mbmi_ext->cb_offset;
+    const uint16_t *eob_txb = cb_coef_buff->eobs[plane] + txb_offset;
+    const uint8_t *txb_skip_ctx_txb =
+        cb_coef_buff->txb_skip_ctx[plane] + txb_offset;
+    const int *dc_sign_ctx_txb = cb_coef_buff->dc_sign_ctx[plane] + txb_offset;
+    const tran_low_t *tcoeff = BLOCK_OFFSET(tcoeff_txb, block);
     const uint16_t eob = eob_txb[block];
     TXB_CTX txb_ctx = { txb_skip_ctx_txb[block], dc_sign_ctx_txb[block] };
     av1_write_coeffs_txb(cm, xd, w, blk_row, blk_col, plane, tx_size, tcoeff,
@@ -1712,6 +1712,7 @@
 
     for (mi_col = mi_col_start; mi_col < mi_col_end;
          mi_col += cm->seq_params.mib_size) {
+      cpi->td.mb.cb_coef_buff = av1_get_cb_coeff_buffer(cpi, mi_row, mi_col);
       write_modes_sb(cpi, tile, w, &tok, tok_end, mi_row, mi_col,
                      cm->seq_params.sb_size);
     }
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index f1148ee..266f702 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -75,7 +75,6 @@
 
 typedef struct {
   // TODO(angiebird): Reduce the buffer size according to sb_type
-  CB_COEFF_BUFFER *cb_coef_buff;
   CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
   uint16_t weight[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
   int_mv global_mvs[REF_FRAMES];
@@ -434,6 +433,8 @@
   // [Saved stat index]
   COMP_RD_STATS comp_rd_stats[MAX_COMP_RD_STATS];
   int comp_rd_stats_idx;
+
+  CB_COEFF_BUFFER *cb_coef_buff;
 };
 
 static INLINE int is_rect_tx_allowed_bsize(BLOCK_SIZE bsize) {
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index bdf8db6..8de19cc 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -1489,7 +1489,11 @@
     x->rdmult = x->cb_rdmult;
   }
 
-  if (!dry_run) av1_set_coeff_buffer(cpi, x, mi_row, mi_col);
+  if (!dry_run) {
+    x->mbmi_ext->cb_offset = x->cb_offset;
+    assert(x->cb_offset <
+           (1 << num_pels_log2_lookup[cpi->common.seq_params.sb_size]));
+  }
 
   encode_superblock(cpi, tile_data, td, tp, dry_run, mi_row, mi_col, bsize,
                     rate);
@@ -4263,6 +4267,8 @@
     if (cm->delta_q_info.delta_q_present_flag)
       setup_delta_q(cpi, x, tile_info, mi_row, mi_col, num_planes);
 
+    td->mb.cb_coef_buff = av1_get_cb_coeff_buffer(cpi, mi_row, mi_col);
+
     int dummy_rate;
     int64_t dummy_dist;
     RD_STATS dummy_rdc;
diff --git a/av1/encoder/encodetxb.c b/av1/encoder/encodetxb.c
index ee30e63..a64e4b0 100644
--- a/av1/encoder/encodetxb.c
+++ b/av1/encoder/encodetxb.c
@@ -73,17 +73,6 @@
 
 void av1_free_txb_buf(AV1_COMP *cpi) { aom_free(cpi->coeff_buffer_base); }
 
-void av1_set_coeff_buffer(const AV1_COMP *const cpi, MACROBLOCK *const x,
-                          int mi_row, int mi_col) {
-  const AV1_COMMON *const cm = &cpi->common;
-  int mib_size_log2 = cm->seq_params.mib_size_log2;
-  int stride = (cm->mi_cols >> mib_size_log2) + 1;
-  int offset = (mi_row >> mib_size_log2) * stride + (mi_col >> mib_size_log2);
-  x->mbmi_ext->cb_coef_buff = &cpi->coeff_buffer_base[offset];
-  x->mbmi_ext->cb_offset = x->cb_offset;
-  assert(x->cb_offset < (1 << num_pels_log2_lookup[cm->seq_params.sb_size]));
-}
-
 static void write_golomb(aom_writer *w, int level) {
   int x = level + 1;
   int i = x;
@@ -638,17 +627,17 @@
                                   aom_writer *w, int plane, int block,
                                   int blk_row, int blk_col, TX_SIZE tx_size) {
   MACROBLOCKD *xd = &x->e_mbd;
+  const CB_COEFF_BUFFER *cb_coef_buff = x->cb_coef_buff;
   const int txb_offset =
       x->mbmi_ext->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN);
-  tran_low_t *tcoeff_txb =
-      x->mbmi_ext->cb_coef_buff->tcoeff[plane] + x->mbmi_ext->cb_offset;
-  uint16_t *eob_txb = x->mbmi_ext->cb_coef_buff->eobs[plane] + txb_offset;
-  uint8_t *txb_skip_ctx_txb =
-      x->mbmi_ext->cb_coef_buff->txb_skip_ctx[plane] + txb_offset;
-  int *dc_sign_ctx_txb =
-      x->mbmi_ext->cb_coef_buff->dc_sign_ctx[plane] + txb_offset;
-  tran_low_t *tcoeff = BLOCK_OFFSET(tcoeff_txb, block);
-  uint16_t eob = eob_txb[block];
+  const tran_low_t *tcoeff_txb =
+      cb_coef_buff->tcoeff[plane] + x->mbmi_ext->cb_offset;
+  const uint16_t *eob_txb = cb_coef_buff->eobs[plane] + txb_offset;
+  const uint8_t *txb_skip_ctx_txb =
+      cb_coef_buff->txb_skip_ctx[plane] + txb_offset;
+  const int *dc_sign_ctx_txb = cb_coef_buff->dc_sign_ctx[plane] + txb_offset;
+  const tran_low_t *tcoeff = BLOCK_OFFSET(tcoeff_txb, block);
+  const uint16_t eob = eob_txb[block];
   TXB_CTX txb_ctx = { txb_skip_ctx_txb[block], dc_sign_ctx_txb[block] };
   av1_write_coeffs_txb(cm, xd, w, blk_row, blk_col, plane, tx_size, tcoeff, eob,
                        &txb_ctx);
@@ -2008,11 +1997,11 @@
                2);
   }
 
+  CB_COEFF_BUFFER *cb_coef_buff = x->cb_coef_buff;
   const int txb_offset =
       x->mbmi_ext->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN);
-  uint16_t *eob_txb = x->mbmi_ext->cb_coef_buff->eobs[plane] + txb_offset;
-  uint8_t *txb_skip_ctx_txb =
-      x->mbmi_ext->cb_coef_buff->txb_skip_ctx[plane] + txb_offset;
+  uint16_t *eob_txb = cb_coef_buff->eobs[plane] + txb_offset;
+  uint8_t *txb_skip_ctx_txb = cb_coef_buff->txb_skip_ctx[plane] + txb_offset;
   txb_skip_ctx_txb[block] = txb_ctx.txb_skip_ctx;
   eob_txb[block] = eob;
 
@@ -2021,8 +2010,7 @@
     return;
   }
 
-  tran_low_t *tcoeff_txb =
-      x->mbmi_ext->cb_coef_buff->tcoeff[plane] + x->mbmi_ext->cb_offset;
+  tran_low_t *tcoeff_txb = cb_coef_buff->tcoeff[plane] + x->mbmi_ext->cb_offset;
   tran_low_t *tcoeff = BLOCK_OFFSET(tcoeff_txb, block);
   const int segment_id = mbmi->segment_id;
   const int seg_eob = av1_get_tx_eob(&cpi->common.seg, segment_id, tx_size);
@@ -2116,8 +2104,7 @@
 #endif  // CONFIG_ENTROPY_STATS
     if (allow_update_cdf)
       update_cdf(ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], dc_sign, 2);
-    int *dc_sign_ctx_txb =
-        x->mbmi_ext->cb_coef_buff->dc_sign_ctx[plane] + txb_offset;
+    int *dc_sign_ctx_txb = cb_coef_buff->dc_sign_ctx[plane] + txb_offset;
     dc_sign_ctx_txb[block] = dc_sign_ctx;
   }
 
@@ -2155,3 +2142,13 @@
     assert(0);
   }
 }
+
+CB_COEFF_BUFFER *av1_get_cb_coeff_buffer(const struct AV1_COMP *cpi, int mi_row,
+                                         int mi_col) {
+  const AV1_COMMON *const cm = &cpi->common;
+  const int mib_size_log2 = cm->seq_params.mib_size_log2;
+  const int stride = (cm->mi_cols >> mib_size_log2) + 1;
+  const int offset =
+      (mi_row >> mib_size_log2) * stride + (mi_col >> mib_size_log2);
+  return cpi->coeff_buffer_base + offset;
+}
diff --git a/av1/encoder/encodetxb.h b/av1/encoder/encodetxb.h
index 0682590..12c78c8 100644
--- a/av1/encoder/encodetxb.h
+++ b/av1/encoder/encodetxb.h
@@ -71,15 +71,15 @@
                                        int blk_col, BLOCK_SIZE plane_bsize,
                                        TX_SIZE tx_size, void *arg);
 
-void av1_set_coeff_buffer(const AV1_COMP *const cpi, MACROBLOCK *const x,
-                          int mi_row, int mi_col);
-
 void hbt_destroy();
 int av1_optimize_txb_new(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane,
                          int block, TX_SIZE tx_size, TX_TYPE tx_type,
                          const TXB_CTX *const txb_ctx, int *rate_cost,
                          int sharpness, int fast_mode);
 
+CB_COEFF_BUFFER *av1_get_cb_coeff_buffer(const struct AV1_COMP *cpi, int mi_row,
+                                         int mi_col);
+
 // These numbers are empirically obtained.
 static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
   { 17, 13 },