Refactor lv-map coefficient buffers

Reduce the encoder memory footprint in lv-map by ~800X.

BUG=aomedia:713

Change-Id: I7aad604cf3186a0d3c6d5b6d1aa9489ac7c36093
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 8b9633e..54ad2df 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -2847,6 +2847,7 @@
                supertx_enabled,
 #endif
                mi_row, mi_col);
+
 #if CONFIG_MOTION_VAR && (CONFIG_NCOBMC || CONFIG_NCOBMC_ADAPT_WEIGHT)
   (void)tok;
   (void)tok_end;
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 2533614..59f45ee 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -75,6 +75,15 @@
   int base_cost[NUM_BASE_LEVELS][COEFF_BASE_CONTEXTS][2];
   int lps_cost[LEVEL_CONTEXTS][2];
 } LV_MAP_COEFF_COST;
+
+typedef struct {
+  tran_low_t tcoeff[MAX_MB_PLANE][MAX_SB_SQUARE];
+  uint16_t eobs[MAX_MB_PLANE][MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
+  uint8_t txb_skip_ctx[MAX_MB_PLANE]
+                      [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
+  int dc_sign_ctx[MAX_MB_PLANE]
+                 [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
+} CB_COEFF_BUFFER;
 #endif
 
 typedef struct {
@@ -82,12 +91,10 @@
   int16_t mode_context[MODE_CTX_REF_FRAMES];
 #if CONFIG_LV_MAP
   // TODO(angiebird): Reduce the buffer size according to sb_type
-  tran_low_t tcoeff[MAX_MB_PLANE][MAX_SB_SQUARE];
-  uint16_t eobs[MAX_MB_PLANE][MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
-  uint8_t txb_skip_ctx[MAX_MB_PLANE]
-                      [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
-  int dc_sign_ctx[MAX_MB_PLANE]
-                 [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
+  tran_low_t *tcoeff[MAX_MB_PLANE];
+  uint16_t *eobs[MAX_MB_PLANE];
+  uint8_t *txb_skip_ctx[MAX_MB_PLANE];
+  int *dc_sign_ctx[MAX_MB_PLANE];
 #endif
   uint8_t ref_mv_count[MODE_CTX_REF_FRAMES];
   CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
@@ -179,6 +186,7 @@
 
 #if CONFIG_LV_MAP
   LV_MAP_COEFF_COST coeff_costs[TX_SIZES][PLANE_TYPES];
+  uint16_t cb_offset;
 #endif
 
   av1_coeff_cost token_head_costs[TX_SIZES];
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 632acb5..41b24a0 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -273,6 +273,7 @@
   const int mi_height = mi_size_high[bsize];
 
   set_mode_info_offsets(cpi, x, xd, mi_row, mi_col);
+
   set_skip_context(xd, mi_row, mi_col);
 #if CONFIG_VAR_TX
   xd->above_txfm_context =
@@ -2038,8 +2039,17 @@
                          get_frame_new_buffer(&cpi->common), mi_row, mi_col);
   }
 #endif
+
+#if CONFIG_LV_MAP
+  av1_set_coeff_buffer(cpi, x, mi_row, mi_col);
+#endif
   encode_superblock(cpi, td, tp, dry_run, mi_row, mi_col, bsize, rate);
 
+#if CONFIG_LV_MAP
+  if (dry_run == 0)
+    x->cb_offset += block_size_wide[bsize] * block_size_high[bsize];
+#endif
+
   if (!dry_run) {
 #if CONFIG_EXT_DELTA_Q
     mbmi = &xd->mi[0]->mbmi;
@@ -4390,6 +4400,9 @@
 #if CONFIG_MOTION_VAR && CONFIG_NCOBMC
       set_mode_info_sb(cpi, td, tile_info, tp, mi_row, mi_col, bsize, pc_tree);
 #endif
+#if CONFIG_LV_MAP
+      x->cb_offset = 0;
+#endif
       encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
                 pc_tree, NULL);
     } else {
diff --git a/av1/encoder/encodeframe.h b/av1/encoder/encodeframe.h
index 569ec9f..b54e54d 100644
--- a/av1/encoder/encodeframe.h
+++ b/av1/encoder/encodeframe.h
@@ -41,7 +41,6 @@
 #endif
                               BLOCK_SIZE bsize, TX_SIZE tx_size,
                               FRAME_COUNTS *counts);
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index cc7eeb6..81b36ac 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -385,6 +385,9 @@
   QUANTS quants;
   ThreadData td;
   MB_MODE_INFO_EXT *mbmi_ext_base;
+#if CONFIG_LV_MAP
+  CB_COEFF_BUFFER *coeff_buffer_base;
+#endif
   Dequants dequants;
   AV1_COMMON common;
   AV1EncoderConfig oxcf;
diff --git a/av1/encoder/encodetxb.c b/av1/encoder/encodetxb.c
index cc0760b..b00abb5 100644
--- a/av1/encoder/encodetxb.c
+++ b/av1/encoder/encodetxb.c
@@ -38,7 +38,13 @@
         aom_malloc(sizeof(*cpi->tcoeff_buf[i]) * pixel_stride * pixel_height));
   }
 #else
-  (void)cpi;
+  AV1_COMMON *cm = &cpi->common;
+  int size = ((cm->mi_rows >> MAX_MIB_SIZE_LOG2) + 1) *
+             ((cm->mi_cols >> MAX_MIB_SIZE_LOG2) + 1);
+
+  // TODO(jingning): This should be further reduced.
+  CHECK_MEM_ERROR(cm, cpi->coeff_buffer_base,
+                  aom_malloc(sizeof(*cpi->coeff_buffer_base) * size));
 #endif
 }
 
@@ -49,10 +55,27 @@
     aom_free(cpi->tcoeff_buf[i]);
   }
 #else
-  (void)cpi;
+  aom_free(cpi->coeff_buffer_base);
 #endif
 }
 
+void av1_set_coeff_buffer(const AV1_COMP *const cpi, MACROBLOCK *const x,
+                          int mi_row, int mi_col) {
+  int stride = (cpi->common.mi_cols >> MAX_MIB_SIZE_LOG2) + 1;
+  int offset =
+      (mi_row >> MAX_MIB_SIZE_LOG2) * stride + (mi_col >> MAX_MIB_SIZE_LOG2);
+  CB_COEFF_BUFFER *coeff_buf = &cpi->coeff_buffer_base[offset];
+  const int txb_offset = x->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN);
+  for (int plane = 0; plane < MAX_MB_PLANE; ++plane) {
+    x->mbmi_ext->tcoeff[plane] = coeff_buf->tcoeff[plane] + x->cb_offset;
+    x->mbmi_ext->eobs[plane] = coeff_buf->eobs[plane] + txb_offset;
+    x->mbmi_ext->txb_skip_ctx[plane] =
+        coeff_buf->txb_skip_ctx[plane] + txb_offset;
+    x->mbmi_ext->dc_sign_ctx[plane] =
+        coeff_buf->dc_sign_ctx[plane] + txb_offset;
+  }
+}
+
 static void write_golomb(aom_writer *w, int level) {
   int x = level + 1;
   int i = x;
diff --git a/av1/encoder/encodetxb.h b/av1/encoder/encodetxb.h
index 57c2f1f..acca0ea 100644
--- a/av1/encoder/encodetxb.h
+++ b/av1/encoder/encodetxb.h
@@ -92,6 +92,9 @@
                                        int blk_col, BLOCK_SIZE plane_bsize,
                                        TX_SIZE tx_size, void *arg);
 
+void av1_set_coeff_buffer(const AV1_COMP *const cpi, MACROBLOCK *const x,
+                          int mi_row, int mi_col);
+
 #if CONFIG_TXK_SEL
 int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
                             int block, int blk_row, int blk_col,