Optimize memory in tpl frame buffer
The tpl frame buffer padding has been reduced to 32.
Resolution Tile Memory reduction
Single Multi
thread thread
640x360 2x1 ~13% ~13% (2 threads)
832x480 2x1 ~11% ~11% (2 threads)
1280x720 2x2 ~10% ~9% (4 threads)
1920x1080 4x2 ~7% ~7% (8 threads)
Memory measuring command:
$ command time -v ./aomenc ...
Change-Id: Ib51808f7d84f88d440db0f7fd8f3033abae7ce65
diff --git a/aom_scale/yv12config.h b/aom_scale/yv12config.h
index b183e8a..b40edec 100644
--- a/aom_scale/yv12config.h
+++ b/aom_scale/yv12config.h
@@ -27,6 +27,7 @@
#define AOM_INTERP_EXTEND 4
#define AOM_BORDER_IN_PIXELS 288
#define AOM_ENC_NO_SCALE_BORDER 160
+#define AOM_ENC_TPL_FRAME_BORDER 32
#define AOM_DEC_BORDER_IN_PIXELS 64
typedef struct yv12_buffer_config {
diff --git a/av1/encoder/encoder_alloc.h b/av1/encoder/encoder_alloc.h
index 57331e0..18b8c7b 100644
--- a/av1/encoder/encoder_alloc.h
+++ b/av1/encoder/encoder_alloc.h
@@ -113,6 +113,7 @@
set_tpl_stats_block_size(cm->width, cm->height,
&tpl_data->tpl_stats_block_mis_log2);
const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
+ tpl_data->border_in_pixels = AOM_ENC_TPL_FRAME_BORDER;
for (int frame = 0; frame < MAX_LENGTH_TPL_FRAME_STATS; ++frame) {
const int mi_cols =
@@ -138,7 +139,7 @@
if (aom_alloc_frame_buffer(
&tpl_data->tpl_rec_pool[frame], cm->width, cm->height,
cm->seq_params.subsampling_x, cm->seq_params.subsampling_y,
- cm->seq_params.use_highbitdepth, AOM_ENC_NO_SCALE_BORDER,
+ cm->seq_params.use_highbitdepth, tpl_data->border_in_pixels,
cm->features.byte_alignment))
aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate frame buffer");
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index 84975b2..8777f67 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -846,7 +846,7 @@
// Motion estimation column boundary
av1_set_mv_col_limits(mi_params, &x->mv_limits, mi_col, mi_width,
- cpi->oxcf.border_in_pixels);
+ tpl_data->border_in_pixels);
xd->mb_to_left_edge = -GET_MV_SUBPEL(mi_col * MI_SIZE);
xd->mb_to_right_edge =
GET_MV_SUBPEL(mi_params->mi_cols - mi_width - mi_col);
@@ -873,7 +873,7 @@
for (int mi_row = 0; mi_row < mi_params->mi_rows; mi_row += mi_height) {
// Motion estimation row boundary
av1_set_mv_row_limits(mi_params, &x->mv_limits, mi_row, mi_height,
- cpi->oxcf.border_in_pixels);
+ cpi->tpl_data.border_in_pixels);
xd->mb_to_top_edge = -GET_MV_SUBPEL(mi_row * MI_SIZE);
xd->mb_to_bottom_edge =
GET_MV_SUBPEL((mi_params->mi_rows - mi_height - mi_row) * MI_SIZE);
diff --git a/av1/encoder/tpl_model.h b/av1/encoder/tpl_model.h
index cbbf11e..ff3d6ad 100644
--- a/av1/encoder/tpl_model.h
+++ b/av1/encoder/tpl_model.h
@@ -175,6 +175,11 @@
* multi-threading of tpl
*/
AV1TplRowMultiThreadSync tpl_mt_sync;
+
+ /*!
+ * Frame border for tpl frame.
+ */
+ int border_in_pixels;
} TplParams;
int av1_tpl_setup_stats(struct AV1_COMP *cpi, int gop_eval,