Allocate buffers for comp search on sequence level
Instead of block level. This saves lots of malloc and free operations.
Change-Id: I4d385fbe741c13d2969aab58302d2685221fa6b0
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index c74b394..1d1cbbe 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -176,6 +176,17 @@
int is_global[2];
} COMP_RD_STATS;
+// Struct for buffers used by compound_type_rd() function.
+// For sizes and alignment of these arrays, refer to
+// alloc_compound_type_rd_buffers() function.
+typedef struct {
+ uint8_t *pred0;
+ uint8_t *pred1;
+ int16_t *residual1; // src - pred1
+ int16_t *diff10; // pred1 - pred0
+ uint8_t *tmp_best_mask_buf; // backup of the best segmentation mask
+} CompoundTypeRdBuffers;
+
struct inter_modes_info;
typedef struct macroblock MACROBLOCK;
struct macroblock {
@@ -252,6 +263,7 @@
uint8_t *left_pred_buf;
PALETTE_BUFFER *palette_buffer;
+ CompoundTypeRdBuffers comp_rd_buffer;
CONV_BUF_TYPE *tmp_conv_dst;
uint8_t *tmp_obmc_bufs[2];
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 6d909e4..e60be29 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -581,7 +581,7 @@
av1_free_pc_tree(&cpi->td, num_planes);
aom_free(cpi->td.mb.palette_buffer);
-
+ av1_release_compound_type_rd_buffers(&cpi->td.mb.comp_rd_buffer);
aom_free(cpi->td.mb.tmp_conv_dst);
for (int j = 0; j < 2; ++j) {
aom_free(cpi->td.mb.tmp_obmc_bufs[j]);
@@ -2393,6 +2393,34 @@
aom_calloc(cm->mi_rows * cm->mi_cols, 1));
}
+void av1_alloc_compound_type_rd_buffers(AV1_COMMON *const cm,
+ CompoundTypeRdBuffers *const bufs) {
+ CHECK_MEM_ERROR(
+ cm, bufs->pred0,
+ (uint8_t *)aom_memalign(16, 2 * MAX_SB_SQUARE * sizeof(*bufs->pred0)));
+ CHECK_MEM_ERROR(
+ cm, bufs->pred1,
+ (uint8_t *)aom_memalign(16, 2 * MAX_SB_SQUARE * sizeof(*bufs->pred1)));
+ CHECK_MEM_ERROR(
+ cm, bufs->residual1,
+ (int16_t *)aom_memalign(32, MAX_SB_SQUARE * sizeof(*bufs->residual1)));
+ CHECK_MEM_ERROR(
+ cm, bufs->diff10,
+ (int16_t *)aom_memalign(32, MAX_SB_SQUARE * sizeof(*bufs->diff10)));
+ CHECK_MEM_ERROR(cm, bufs->tmp_best_mask_buf,
+ (uint8_t *)aom_malloc(2 * MAX_SB_SQUARE *
+ sizeof(*bufs->tmp_best_mask_buf)));
+}
+
+void av1_release_compound_type_rd_buffers(CompoundTypeRdBuffers *const bufs) {
+ aom_free(bufs->pred0);
+ aom_free(bufs->pred1);
+ aom_free(bufs->residual1);
+ aom_free(bufs->diff10);
+ aom_free(bufs->tmp_best_mask_buf);
+ av1_zero(*bufs); // Set all pointers to NULL for safety.
+}
+
void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) {
AV1_COMMON *const cm = &cpi->common;
SequenceHeader *const seq_params = &cm->seq_params;
@@ -2484,6 +2512,10 @@
aom_memalign(16, sizeof(*x->palette_buffer)));
}
+ if (x->comp_rd_buffer.pred0 == NULL) {
+ av1_alloc_compound_type_rd_buffers(cm, &x->comp_rd_buffer);
+ }
+
if (x->tmp_conv_dst == NULL) {
CHECK_MEM_ERROR(
cm, x->tmp_conv_dst,
@@ -3093,6 +3125,7 @@
if (t > 0) {
aom_free(thread_data->td->palette_buffer);
aom_free(thread_data->td->tmp_conv_dst);
+ av1_release_compound_type_rd_buffers(&thread_data->td->comp_rd_buffer);
for (int j = 0; j < 2; ++j) {
aom_free(thread_data->td->tmp_obmc_bufs[j]);
}
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 0aa1bac..ac8d055 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -612,6 +612,7 @@
uint8_t *above_pred_buf;
uint8_t *left_pred_buf;
PALETTE_BUFFER *palette_buffer;
+ CompoundTypeRdBuffers comp_rd_buffer;
CONV_BUF_TYPE *tmp_conv_dst;
uint8_t *tmp_obmc_bufs[2];
int intrabc_used;
@@ -1119,6 +1120,10 @@
int av1_convert_sect5obus_to_annexb(uint8_t *buffer, size_t *input_size);
+void av1_alloc_compound_type_rd_buffers(AV1_COMMON *const cm,
+ CompoundTypeRdBuffers *const bufs);
+void av1_release_compound_type_rd_buffers(CompoundTypeRdBuffers *const bufs);
+
// av1 uses 10,000,000 ticks/second as time stamp
#define TICKS_PER_SEC 10000000LL
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 1c170a8..52e69ba 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -454,6 +454,8 @@
cm, thread_data->td->palette_buffer,
aom_memalign(16, sizeof(*thread_data->td->palette_buffer)));
+ av1_alloc_compound_type_rd_buffers(cm, &thread_data->td->comp_rd_buffer);
+
CHECK_MEM_ERROR(
cm, thread_data->td->tmp_conv_dst,
aom_memalign(32, MAX_SB_SIZE * MAX_SB_SIZE *
@@ -572,6 +574,7 @@
if (i > 0) {
thread_data->td->mb.palette_buffer = thread_data->td->palette_buffer;
+ thread_data->td->mb.comp_rd_buffer = thread_data->td->comp_rd_buffer;
thread_data->td->mb.tmp_conv_dst = thread_data->td->tmp_conv_dst;
for (int j = 0; j < 2; ++j) {
thread_data->td->mb.tmp_obmc_bufs[j] =
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 3e9d3ae..8c98cd5 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -9683,22 +9683,11 @@
return cost;
}
-// Struct for buffers used by compound_type_rd() function.
-// For sizes and alignment of these arrays, refer to
-// alloc_compound_type_rd_buffers() function.
-typedef struct {
- uint8_t *pred0;
- uint8_t *pred1;
- int16_t *residual1; // src - pred1
- int16_t *diff10; // pred1 - pred0
- uint8_t *tmp_best_mask_buf; // backup of the best segmentation mask
-} CompoundTypeRdBuffers;
-
static int compound_type_rd(
const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_col,
int mi_row, int_mv *cur_mv, int mode_search_mask, int masked_compound_used,
const BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst,
- CompoundTypeRdBuffers *buffers, int *rate_mv, int64_t *rd,
+ const CompoundTypeRdBuffers *buffers, int *rate_mv, int64_t *rd,
RD_STATS *rd_stats, int64_t ref_best_rd, int *is_luma_interp_done,
int64_t rd_thresh) {
const AV1_COMMON *cm = &cpi->common;
@@ -10210,7 +10199,7 @@
BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
RD_STATS *rd_stats_uv, int *disable_skip, int mi_row, int mi_col,
HandleInterModeArgs *args, int64_t ref_best_rd, uint8_t *const tmp_buf,
- CompoundTypeRdBuffers *rd_buffers, int64_t *best_est_rd,
+ const CompoundTypeRdBuffers *rd_buffers, int64_t *best_est_rd,
const int do_tx_search, InterModesInfo *inter_modes_info) {
const AV1_COMMON *cm = &cpi->common;
const int num_planes = av1_num_planes(cm);
@@ -12491,35 +12480,6 @@
return skip_ref;
}
-static void alloc_compound_type_rd_buffers(AV1_COMMON *const cm,
- CompoundTypeRdBuffers *const bufs) {
- CHECK_MEM_ERROR(
- cm, bufs->pred0,
- (uint8_t *)aom_memalign(16, 2 * MAX_SB_SQUARE * sizeof(*bufs->pred0)));
- CHECK_MEM_ERROR(
- cm, bufs->pred1,
- (uint8_t *)aom_memalign(16, 2 * MAX_SB_SQUARE * sizeof(*bufs->pred1)));
- CHECK_MEM_ERROR(
- cm, bufs->residual1,
- (int16_t *)aom_memalign(32, MAX_SB_SQUARE * sizeof(*bufs->residual1)));
- CHECK_MEM_ERROR(
- cm, bufs->diff10,
- (int16_t *)aom_memalign(32, MAX_SB_SQUARE * sizeof(*bufs->diff10)));
- CHECK_MEM_ERROR(cm, bufs->tmp_best_mask_buf,
- (uint8_t *)aom_malloc(2 * MAX_SB_SQUARE *
- sizeof(*bufs->tmp_best_mask_buf)));
-}
-
-static void release_compound_type_rd_buffers(
- CompoundTypeRdBuffers *const bufs) {
- aom_free(bufs->pred0);
- aom_free(bufs->pred1);
- aom_free(bufs->residual1);
- aom_free(bufs->diff10);
- aom_free(bufs->tmp_best_mask_buf);
- av1_zero(*bufs); // Set all pointers to NULL for safety.
-}
-
#if !CONFIG_REALTIME_ONLY
// Enables do_tx_search on a per-mode basis.
static int do_tx_search_mode(int do_tx_search_global, int midx, int adaptive) {
@@ -12651,9 +12611,6 @@
// Temporary buffers used by handle_inter_mode().
uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_obmc_bufs[0]);
- CompoundTypeRdBuffers rd_buffers;
- alloc_compound_type_rd_buffers(cm, &rd_buffers);
-
// The best RD found for the reference frame, among single reference modes.
// Note that the 0-th element will contain a cut-off that is later used
// to determine if we should skip a compound mode.
@@ -12797,7 +12754,7 @@
this_rd = handle_inter_mode(
cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
&disable_skip, mi_row, mi_col, &args, ref_best_rd, tmp_buf,
- &rd_buffers, &best_est_rd, do_tx_search, inter_modes_info);
+ &x->comp_rd_buffer, &best_est_rd, do_tx_search, inter_modes_info);
rate2 = rd_stats.rate;
skippable = rd_stats.skip;
@@ -12898,8 +12855,6 @@
if (x->skip && !comp_pred) break;
}
- release_compound_type_rd_buffers(&rd_buffers);
-
#if CONFIG_COLLECT_COMPONENT_TIMING
start_timing(cpi, do_tx_search_time);
#endif
@@ -13290,9 +13245,6 @@
// Temporary buffers used by handle_inter_mode().
uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_obmc_bufs[0]);
- CompoundTypeRdBuffers rd_buffers;
- alloc_compound_type_rd_buffers(cm, &rd_buffers);
-
for (int midx = 0; midx < MAX_MODES; ++midx) {
const MODE_DEFINITION *mode_order = &av1_mode_order[midx];
this_mode = mode_order->mode;
@@ -13444,7 +13396,7 @@
this_rd = handle_inter_mode(
cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
&disable_skip, mi_row, mi_col, &args, ref_best_rd, tmp_buf,
- &rd_buffers, &best_est_rd, 0, inter_modes_info);
+ &x->comp_rd_buffer, &best_est_rd, 0, inter_modes_info);
rate2 = rd_stats.rate;
skippable = rd_stats.skip;
distortion2 = rd_stats.dist;
@@ -13523,8 +13475,6 @@
if (x->skip && !comp_pred) break;
}
- release_compound_type_rd_buffers(&rd_buffers);
-
inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
search_state.best_rd = INT64_MAX;