Allocate buffers for comp search on sequence level Instead of block level. This saves lots of malloc and free operations. Change-Id: I4d385fbe741c13d2969aab58302d2685221fa6b0
diff --git a/av1/encoder/block.h b/av1/encoder/block.h index c74b394..1d1cbbe 100644 --- a/av1/encoder/block.h +++ b/av1/encoder/block.h
@@ -176,6 +176,17 @@ int is_global[2]; } COMP_RD_STATS; +// Struct for buffers used by compound_type_rd() function. +// For sizes and alignment of these arrays, refer to +// alloc_compound_type_rd_buffers() function. +typedef struct { + uint8_t *pred0; + uint8_t *pred1; + int16_t *residual1; // src - pred1 + int16_t *diff10; // pred1 - pred0 + uint8_t *tmp_best_mask_buf; // backup of the best segmentation mask +} CompoundTypeRdBuffers; + struct inter_modes_info; typedef struct macroblock MACROBLOCK; struct macroblock { @@ -252,6 +263,7 @@ uint8_t *left_pred_buf; PALETTE_BUFFER *palette_buffer; + CompoundTypeRdBuffers comp_rd_buffer; CONV_BUF_TYPE *tmp_conv_dst; uint8_t *tmp_obmc_bufs[2];
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c index 6d909e4..e60be29 100644 --- a/av1/encoder/encoder.c +++ b/av1/encoder/encoder.c
@@ -581,7 +581,7 @@ av1_free_pc_tree(&cpi->td, num_planes); aom_free(cpi->td.mb.palette_buffer); - + av1_release_compound_type_rd_buffers(&cpi->td.mb.comp_rd_buffer); aom_free(cpi->td.mb.tmp_conv_dst); for (int j = 0; j < 2; ++j) { aom_free(cpi->td.mb.tmp_obmc_bufs[j]); @@ -2393,6 +2393,34 @@ aom_calloc(cm->mi_rows * cm->mi_cols, 1)); } +void av1_alloc_compound_type_rd_buffers(AV1_COMMON *const cm, + CompoundTypeRdBuffers *const bufs) { + CHECK_MEM_ERROR( + cm, bufs->pred0, + (uint8_t *)aom_memalign(16, 2 * MAX_SB_SQUARE * sizeof(*bufs->pred0))); + CHECK_MEM_ERROR( + cm, bufs->pred1, + (uint8_t *)aom_memalign(16, 2 * MAX_SB_SQUARE * sizeof(*bufs->pred1))); + CHECK_MEM_ERROR( + cm, bufs->residual1, + (int16_t *)aom_memalign(32, MAX_SB_SQUARE * sizeof(*bufs->residual1))); + CHECK_MEM_ERROR( + cm, bufs->diff10, + (int16_t *)aom_memalign(32, MAX_SB_SQUARE * sizeof(*bufs->diff10))); + CHECK_MEM_ERROR(cm, bufs->tmp_best_mask_buf, + (uint8_t *)aom_malloc(2 * MAX_SB_SQUARE * + sizeof(*bufs->tmp_best_mask_buf))); +} + +void av1_release_compound_type_rd_buffers(CompoundTypeRdBuffers *const bufs) { + aom_free(bufs->pred0); + aom_free(bufs->pred1); + aom_free(bufs->residual1); + aom_free(bufs->diff10); + aom_free(bufs->tmp_best_mask_buf); + av1_zero(*bufs); // Set all pointers to NULL for safety. +} + void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) { AV1_COMMON *const cm = &cpi->common; SequenceHeader *const seq_params = &cm->seq_params; @@ -2484,6 +2512,10 @@ aom_memalign(16, sizeof(*x->palette_buffer))); } + if (x->comp_rd_buffer.pred0 == NULL) { + av1_alloc_compound_type_rd_buffers(cm, &x->comp_rd_buffer); + } + if (x->tmp_conv_dst == NULL) { CHECK_MEM_ERROR( cm, x->tmp_conv_dst, @@ -3093,6 +3125,7 @@ if (t > 0) { aom_free(thread_data->td->palette_buffer); aom_free(thread_data->td->tmp_conv_dst); + av1_release_compound_type_rd_buffers(&thread_data->td->comp_rd_buffer); for (int j = 0; j < 2; ++j) { aom_free(thread_data->td->tmp_obmc_bufs[j]); }
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h index 0aa1bac..ac8d055 100644 --- a/av1/encoder/encoder.h +++ b/av1/encoder/encoder.h
@@ -612,6 +612,7 @@ uint8_t *above_pred_buf; uint8_t *left_pred_buf; PALETTE_BUFFER *palette_buffer; + CompoundTypeRdBuffers comp_rd_buffer; CONV_BUF_TYPE *tmp_conv_dst; uint8_t *tmp_obmc_bufs[2]; int intrabc_used; @@ -1119,6 +1120,10 @@ int av1_convert_sect5obus_to_annexb(uint8_t *buffer, size_t *input_size); +void av1_alloc_compound_type_rd_buffers(AV1_COMMON *const cm, + CompoundTypeRdBuffers *const bufs); +void av1_release_compound_type_rd_buffers(CompoundTypeRdBuffers *const bufs); + // av1 uses 10,000,000 ticks/second as time stamp #define TICKS_PER_SEC 10000000LL
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c index 1c170a8..52e69ba 100644 --- a/av1/encoder/ethread.c +++ b/av1/encoder/ethread.c
@@ -454,6 +454,8 @@ cm, thread_data->td->palette_buffer, aom_memalign(16, sizeof(*thread_data->td->palette_buffer))); + av1_alloc_compound_type_rd_buffers(cm, &thread_data->td->comp_rd_buffer); + CHECK_MEM_ERROR( cm, thread_data->td->tmp_conv_dst, aom_memalign(32, MAX_SB_SIZE * MAX_SB_SIZE * @@ -572,6 +574,7 @@ if (i > 0) { thread_data->td->mb.palette_buffer = thread_data->td->palette_buffer; + thread_data->td->mb.comp_rd_buffer = thread_data->td->comp_rd_buffer; thread_data->td->mb.tmp_conv_dst = thread_data->td->tmp_conv_dst; for (int j = 0; j < 2; ++j) { thread_data->td->mb.tmp_obmc_bufs[j] =
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c index 3e9d3ae..8c98cd5 100644 --- a/av1/encoder/rdopt.c +++ b/av1/encoder/rdopt.c
@@ -9683,22 +9683,11 @@ return cost; } -// Struct for buffers used by compound_type_rd() function. -// For sizes and alignment of these arrays, refer to -// alloc_compound_type_rd_buffers() function. -typedef struct { - uint8_t *pred0; - uint8_t *pred1; - int16_t *residual1; // src - pred1 - int16_t *diff10; // pred1 - pred0 - uint8_t *tmp_best_mask_buf; // backup of the best segmentation mask -} CompoundTypeRdBuffers; - static int compound_type_rd( const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_col, int mi_row, int_mv *cur_mv, int mode_search_mask, int masked_compound_used, const BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst, - CompoundTypeRdBuffers *buffers, int *rate_mv, int64_t *rd, + const CompoundTypeRdBuffers *buffers, int *rate_mv, int64_t *rd, RD_STATS *rd_stats, int64_t ref_best_rd, int *is_luma_interp_done, int64_t rd_thresh) { const AV1_COMMON *cm = &cpi->common; @@ -10210,7 +10199,7 @@ BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv, int *disable_skip, int mi_row, int mi_col, HandleInterModeArgs *args, int64_t ref_best_rd, uint8_t *const tmp_buf, - CompoundTypeRdBuffers *rd_buffers, int64_t *best_est_rd, + const CompoundTypeRdBuffers *rd_buffers, int64_t *best_est_rd, const int do_tx_search, InterModesInfo *inter_modes_info) { const AV1_COMMON *cm = &cpi->common; const int num_planes = av1_num_planes(cm); @@ -12491,35 +12480,6 @@ return skip_ref; } -static void alloc_compound_type_rd_buffers(AV1_COMMON *const cm, - CompoundTypeRdBuffers *const bufs) { - CHECK_MEM_ERROR( - cm, bufs->pred0, - (uint8_t *)aom_memalign(16, 2 * MAX_SB_SQUARE * sizeof(*bufs->pred0))); - CHECK_MEM_ERROR( - cm, bufs->pred1, - (uint8_t *)aom_memalign(16, 2 * MAX_SB_SQUARE * sizeof(*bufs->pred1))); - CHECK_MEM_ERROR( - cm, bufs->residual1, - (int16_t *)aom_memalign(32, MAX_SB_SQUARE * sizeof(*bufs->residual1))); - CHECK_MEM_ERROR( - cm, bufs->diff10, - (int16_t *)aom_memalign(32, MAX_SB_SQUARE * sizeof(*bufs->diff10))); - CHECK_MEM_ERROR(cm, bufs->tmp_best_mask_buf, - (uint8_t *)aom_malloc(2 * MAX_SB_SQUARE * - sizeof(*bufs->tmp_best_mask_buf))); -} - -static void release_compound_type_rd_buffers( - CompoundTypeRdBuffers *const bufs) { - aom_free(bufs->pred0); - aom_free(bufs->pred1); - aom_free(bufs->residual1); - aom_free(bufs->diff10); - aom_free(bufs->tmp_best_mask_buf); - av1_zero(*bufs); // Set all pointers to NULL for safety. -} - #if !CONFIG_REALTIME_ONLY // Enables do_tx_search on a per-mode basis. static int do_tx_search_mode(int do_tx_search_global, int midx, int adaptive) { @@ -12651,9 +12611,6 @@ // Temporary buffers used by handle_inter_mode(). uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_obmc_bufs[0]); - CompoundTypeRdBuffers rd_buffers; - alloc_compound_type_rd_buffers(cm, &rd_buffers); - // The best RD found for the reference frame, among single reference modes. // Note that the 0-th element will contain a cut-off that is later used // to determine if we should skip a compound mode. @@ -12797,7 +12754,7 @@ this_rd = handle_inter_mode( cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &disable_skip, mi_row, mi_col, &args, ref_best_rd, tmp_buf, - &rd_buffers, &best_est_rd, do_tx_search, inter_modes_info); + &x->comp_rd_buffer, &best_est_rd, do_tx_search, inter_modes_info); rate2 = rd_stats.rate; skippable = rd_stats.skip; @@ -12898,8 +12855,6 @@ if (x->skip && !comp_pred) break; } - release_compound_type_rd_buffers(&rd_buffers); - #if CONFIG_COLLECT_COMPONENT_TIMING start_timing(cpi, do_tx_search_time); #endif @@ -13290,9 +13245,6 @@ // Temporary buffers used by handle_inter_mode(). uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_obmc_bufs[0]); - CompoundTypeRdBuffers rd_buffers; - alloc_compound_type_rd_buffers(cm, &rd_buffers); - for (int midx = 0; midx < MAX_MODES; ++midx) { const MODE_DEFINITION *mode_order = &av1_mode_order[midx]; this_mode = mode_order->mode; @@ -13444,7 +13396,7 @@ this_rd = handle_inter_mode( cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &disable_skip, mi_row, mi_col, &args, ref_best_rd, tmp_buf, - &rd_buffers, &best_est_rd, 0, inter_modes_info); + &x->comp_rd_buffer, &best_est_rd, 0, inter_modes_info); rate2 = rd_stats.rate; skippable = rd_stats.skip; distortion2 = rd_stats.dist; @@ -13523,8 +13475,6 @@ if (x->skip && !comp_pred) break; } - release_compound_type_rd_buffers(&rd_buffers); - inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr); search_state.best_rd = INT64_MAX;