Refactor lv-map coefficient buffers
Reduce the encoder memory footprint in lv-map by ~800X.
BUG=aomedia:713
Change-Id: I7aad604cf3186a0d3c6d5b6d1aa9489ac7c36093
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 8b9633e..54ad2df 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -2847,6 +2847,7 @@
supertx_enabled,
#endif
mi_row, mi_col);
+
#if CONFIG_MOTION_VAR && (CONFIG_NCOBMC || CONFIG_NCOBMC_ADAPT_WEIGHT)
(void)tok;
(void)tok_end;
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 2533614..59f45ee 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -75,6 +75,15 @@
int base_cost[NUM_BASE_LEVELS][COEFF_BASE_CONTEXTS][2];
int lps_cost[LEVEL_CONTEXTS][2];
} LV_MAP_COEFF_COST;
+
+typedef struct {
+ tran_low_t tcoeff[MAX_MB_PLANE][MAX_SB_SQUARE];
+ uint16_t eobs[MAX_MB_PLANE][MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
+ uint8_t txb_skip_ctx[MAX_MB_PLANE]
+ [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
+ int dc_sign_ctx[MAX_MB_PLANE]
+ [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
+} CB_COEFF_BUFFER;
#endif
typedef struct {
@@ -82,12 +91,10 @@
int16_t mode_context[MODE_CTX_REF_FRAMES];
#if CONFIG_LV_MAP
// TODO(angiebird): Reduce the buffer size according to sb_type
- tran_low_t tcoeff[MAX_MB_PLANE][MAX_SB_SQUARE];
- uint16_t eobs[MAX_MB_PLANE][MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
- uint8_t txb_skip_ctx[MAX_MB_PLANE]
- [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
- int dc_sign_ctx[MAX_MB_PLANE]
- [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
+ tran_low_t *tcoeff[MAX_MB_PLANE];
+ uint16_t *eobs[MAX_MB_PLANE];
+ uint8_t *txb_skip_ctx[MAX_MB_PLANE];
+ int *dc_sign_ctx[MAX_MB_PLANE];
#endif
uint8_t ref_mv_count[MODE_CTX_REF_FRAMES];
CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
@@ -179,6 +186,7 @@
#if CONFIG_LV_MAP
LV_MAP_COEFF_COST coeff_costs[TX_SIZES][PLANE_TYPES];
+ uint16_t cb_offset;
#endif
av1_coeff_cost token_head_costs[TX_SIZES];
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 632acb5..41b24a0 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -273,6 +273,7 @@
const int mi_height = mi_size_high[bsize];
set_mode_info_offsets(cpi, x, xd, mi_row, mi_col);
+
set_skip_context(xd, mi_row, mi_col);
#if CONFIG_VAR_TX
xd->above_txfm_context =
@@ -2038,8 +2039,17 @@
get_frame_new_buffer(&cpi->common), mi_row, mi_col);
}
#endif
+
+#if CONFIG_LV_MAP
+ av1_set_coeff_buffer(cpi, x, mi_row, mi_col);
+#endif
encode_superblock(cpi, td, tp, dry_run, mi_row, mi_col, bsize, rate);
+#if CONFIG_LV_MAP
+ if (dry_run == 0)
+ x->cb_offset += block_size_wide[bsize] * block_size_high[bsize];
+#endif
+
if (!dry_run) {
#if CONFIG_EXT_DELTA_Q
mbmi = &xd->mi[0]->mbmi;
@@ -4390,6 +4400,9 @@
#if CONFIG_MOTION_VAR && CONFIG_NCOBMC
set_mode_info_sb(cpi, td, tile_info, tp, mi_row, mi_col, bsize, pc_tree);
#endif
+#if CONFIG_LV_MAP
+ x->cb_offset = 0;
+#endif
encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
pc_tree, NULL);
} else {
diff --git a/av1/encoder/encodeframe.h b/av1/encoder/encodeframe.h
index 569ec9f..b54e54d 100644
--- a/av1/encoder/encodeframe.h
+++ b/av1/encoder/encodeframe.h
@@ -41,7 +41,6 @@
#endif
BLOCK_SIZE bsize, TX_SIZE tx_size,
FRAME_COUNTS *counts);
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index cc7eeb6..81b36ac 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -385,6 +385,9 @@
QUANTS quants;
ThreadData td;
MB_MODE_INFO_EXT *mbmi_ext_base;
+#if CONFIG_LV_MAP
+ CB_COEFF_BUFFER *coeff_buffer_base;
+#endif
Dequants dequants;
AV1_COMMON common;
AV1EncoderConfig oxcf;
diff --git a/av1/encoder/encodetxb.c b/av1/encoder/encodetxb.c
index cc0760b..b00abb5 100644
--- a/av1/encoder/encodetxb.c
+++ b/av1/encoder/encodetxb.c
@@ -38,7 +38,13 @@
aom_malloc(sizeof(*cpi->tcoeff_buf[i]) * pixel_stride * pixel_height));
}
#else
- (void)cpi;
+ AV1_COMMON *cm = &cpi->common;
+ int size = ((cm->mi_rows >> MAX_MIB_SIZE_LOG2) + 1) *
+ ((cm->mi_cols >> MAX_MIB_SIZE_LOG2) + 1);
+
+ // TODO(jingning): This should be further reduced.
+ CHECK_MEM_ERROR(cm, cpi->coeff_buffer_base,
+ aom_malloc(sizeof(*cpi->coeff_buffer_base) * size));
#endif
}
@@ -49,10 +55,27 @@
aom_free(cpi->tcoeff_buf[i]);
}
#else
- (void)cpi;
+ aom_free(cpi->coeff_buffer_base);
#endif
}
+void av1_set_coeff_buffer(const AV1_COMP *const cpi, MACROBLOCK *const x,
+ int mi_row, int mi_col) {
+ int stride = (cpi->common.mi_cols >> MAX_MIB_SIZE_LOG2) + 1;
+ int offset =
+ (mi_row >> MAX_MIB_SIZE_LOG2) * stride + (mi_col >> MAX_MIB_SIZE_LOG2);
+ CB_COEFF_BUFFER *coeff_buf = &cpi->coeff_buffer_base[offset];
+ const int txb_offset = x->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN);
+ for (int plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ x->mbmi_ext->tcoeff[plane] = coeff_buf->tcoeff[plane] + x->cb_offset;
+ x->mbmi_ext->eobs[plane] = coeff_buf->eobs[plane] + txb_offset;
+ x->mbmi_ext->txb_skip_ctx[plane] =
+ coeff_buf->txb_skip_ctx[plane] + txb_offset;
+ x->mbmi_ext->dc_sign_ctx[plane] =
+ coeff_buf->dc_sign_ctx[plane] + txb_offset;
+ }
+}
+
static void write_golomb(aom_writer *w, int level) {
int x = level + 1;
int i = x;
diff --git a/av1/encoder/encodetxb.h b/av1/encoder/encodetxb.h
index 57c2f1f..acca0ea 100644
--- a/av1/encoder/encodetxb.h
+++ b/av1/encoder/encodetxb.h
@@ -92,6 +92,9 @@
int blk_col, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg);
+void av1_set_coeff_buffer(const AV1_COMP *const cpi, MACROBLOCK *const x,
+ int mi_row, int mi_col);
+
#if CONFIG_TXK_SEL
int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
int block, int blk_row, int blk_col,