Pack transform block skip and DC sign contexts

Pack the transform block skip and DC sign contexts into 8 bits.
Encoder memory consumption drops by about 0.3%(tested on lowres clips).

Change-Id: I07a8f8dda4dca0695608a1bd6842c9fbf5947aef
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index f3ba4b8..afcfdc2 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -376,12 +376,12 @@
     const tran_low_t *tcoeff_txb =
         cb_coef_buff->tcoeff[plane] + x->mbmi_ext_frame->cb_offset;
     const uint16_t *eob_txb = cb_coef_buff->eobs[plane] + txb_offset;
-    const uint8_t *txb_skip_ctx_txb =
-        cb_coef_buff->txb_skip_ctx[plane] + txb_offset;
-    const int *dc_sign_ctx_txb = cb_coef_buff->dc_sign_ctx[plane] + txb_offset;
     const tran_low_t *tcoeff = BLOCK_OFFSET(tcoeff_txb, block);
     const uint16_t eob = eob_txb[block];
-    TXB_CTX txb_ctx = { txb_skip_ctx_txb[block], dc_sign_ctx_txb[block] };
+    const uint8_t *entropy_ctx = cb_coef_buff->entropy_ctx[plane] + txb_offset;
+    const TXB_CTX txb_ctx = { entropy_ctx[block] & TXB_SKIP_CTX_MASK,
+                              (entropy_ctx[block] >> DC_SIGN_CTX_SHIFT) &
+                                  DC_SIGN_CTX_MASK };
     av1_write_coeffs_txb(cm, xd, w, blk_row, blk_col, plane, tx_size, tcoeff,
                          eob, &txb_ctx);
 #if CONFIG_RD_DEBUG
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index f4aebd9..cab3ecf 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -79,10 +79,10 @@
 typedef struct {
   tran_low_t tcoeff[MAX_MB_PLANE][MAX_SB_SQUARE];
   uint16_t eobs[MAX_MB_PLANE][MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
-  uint8_t txb_skip_ctx[MAX_MB_PLANE]
-                      [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
-  int dc_sign_ctx[MAX_MB_PLANE]
-                 [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
+  // Transform block entropy contexts.
+  // Bits 0~3: txb_skip_ctx; bits 4~5: dc_sign_ctx.
+  uint8_t entropy_ctx[MAX_MB_PLANE]
+                     [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
 } CB_COEFF_BUFFER;
 
 typedef struct {
diff --git a/av1/encoder/encodetxb.c b/av1/encoder/encodetxb.c
index fbfc2d1..44c61f2 100644
--- a/av1/encoder/encodetxb.c
+++ b/av1/encoder/encodetxb.c
@@ -505,7 +505,7 @@
 void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
                           aom_writer *w, int blk_row, int blk_col, int plane,
                           TX_SIZE tx_size, const tran_low_t *tcoeff,
-                          uint16_t eob, TXB_CTX *txb_ctx) {
+                          uint16_t eob, const TXB_CTX *txb_ctx) {
   const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
   aom_write_symbol(w, eob == 0,
@@ -644,12 +644,12 @@
   const tran_low_t *tcoeff_txb =
       cb_coef_buff->tcoeff[plane] + x->mbmi_ext_frame->cb_offset;
   const uint16_t *eob_txb = cb_coef_buff->eobs[plane] + txb_offset;
-  const uint8_t *txb_skip_ctx_txb =
-      cb_coef_buff->txb_skip_ctx[plane] + txb_offset;
-  const int *dc_sign_ctx_txb = cb_coef_buff->dc_sign_ctx[plane] + txb_offset;
   const tran_low_t *tcoeff = BLOCK_OFFSET(tcoeff_txb, block);
   const uint16_t eob = eob_txb[block];
-  TXB_CTX txb_ctx = { txb_skip_ctx_txb[block], dc_sign_ctx_txb[block] };
+  const uint8_t *entropy_ctx = cb_coef_buff->entropy_ctx[plane] + txb_offset;
+  const TXB_CTX txb_ctx = { entropy_ctx[block] & TXB_SKIP_CTX_MASK,
+                            (entropy_ctx[block] >> DC_SIGN_CTX_SHIFT) &
+                                DC_SIGN_CTX_MASK };
   av1_write_coeffs_txb(cm, xd, w, blk_row, blk_col, plane, tx_size, tcoeff, eob,
                        &txb_ctx);
 }
@@ -2044,8 +2044,8 @@
   const int txb_offset =
       x->mbmi_ext_frame->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN);
   uint16_t *eob_txb = cb_coef_buff->eobs[plane] + txb_offset;
-  uint8_t *txb_skip_ctx_txb = cb_coef_buff->txb_skip_ctx[plane] + txb_offset;
-  txb_skip_ctx_txb[block] = txb_ctx.txb_skip_ctx;
+  uint8_t *const entropy_ctx = cb_coef_buff->entropy_ctx[plane] + txb_offset;
+  entropy_ctx[block] = txb_ctx.txb_skip_ctx;
   eob_txb[block] = eob;
 
   if (eob == 0) {
@@ -2153,8 +2153,7 @@
 #endif  // CONFIG_ENTROPY_STATS
     if (allow_update_cdf)
       update_cdf(ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], dc_sign, 2);
-    int *dc_sign_ctx_txb = cb_coef_buff->dc_sign_ctx[plane] + txb_offset;
-    dc_sign_ctx_txb[block] = dc_sign_ctx;
+    entropy_ctx[block] |= dc_sign_ctx << DC_SIGN_CTX_SHIFT;
   }
 
   const int cul_level = av1_get_txb_entropy_context(tcoeff, scan_order, eob);
diff --git a/av1/encoder/encodetxb.h b/av1/encoder/encodetxb.h
index 401d83c..b96148a 100644
--- a/av1/encoder/encodetxb.h
+++ b/av1/encoder/encodetxb.h
@@ -24,6 +24,10 @@
 extern "C" {
 #endif
 
+#define TXB_SKIP_CTX_MASK 15
+#define DC_SIGN_CTX_SHIFT 4
+#define DC_SIGN_CTX_MASK 3
+
 typedef struct TxbInfo {
   tran_low_t *qcoeff;
   uint8_t *levels;  // absolute values and clamped to 255.
@@ -54,7 +58,7 @@
 void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
                           aom_writer *w, int blk_row, int blk_col, int plane,
                           TX_SIZE tx_size, const tran_low_t *tcoeff,
-                          uint16_t eob, TXB_CTX *txb_ctx);
+                          uint16_t eob, const TXB_CTX *txb_ctx);
 void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x, int mi_row,
                          int mi_col, aom_writer *w, BLOCK_SIZE bsize);
 int av1_get_txb_entropy_context(const tran_low_t *qcoeff,