Optimize Memory for firstpass multi-threading
Avoid allocating redundant memory for the workers shared
between the LAP_STAGE and ENCODE_STAGE in case of single pass
encoding.
Change-Id: I1a29e3c4e98df085a2b6b4cb03835eead2823c7d
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 0ef0fd8..8a22da6 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -2193,8 +2193,10 @@
aom_rational64_t timestamp_ratio_la = *timestamp_ratio;
int64_t dst_time_stamp_la = dst_time_stamp;
int64_t dst_end_time_stamp_la = dst_end_time_stamp;
- if (cpi_lap->mt_info.workers == NULL)
+ if (cpi_lap->mt_info.workers == NULL) {
cpi_lap->mt_info.workers = cpi->mt_info.workers;
+ cpi_lap->mt_info.tile_thr_data = cpi->mt_info.tile_thr_data;
+ }
cpi_lap->mt_info.num_workers = cpi->mt_info.num_workers;
status = av1_get_compressed_data(
cpi_lap, &lib_flags, &frame_size, NULL, &dst_time_stamp_la,
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 24a2dbf..23410a7 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -3525,7 +3525,6 @@
static AOM_INLINE void free_thread_data(AV1_COMP *cpi) {
MultiThreadInfo *const mt_info = &cpi->mt_info;
AV1_COMMON *cm = &cpi->common;
- if (mt_info->tile_thr_data == NULL) return;
for (int t = 0; t < mt_info->num_workers; ++t) {
EncWorkerData *const thread_data = &mt_info->tile_thr_data[t];
aom_free(thread_data->td->tctx);
@@ -3668,8 +3667,10 @@
aom_free_frame_buffer(&tpl_data->tpl_rec_pool[frame]);
}
- if (cpi->compressor_stage != LAP_STAGE) terminate_worker_data(cpi);
- free_thread_data(cpi);
+ if (cpi->compressor_stage != LAP_STAGE) {
+ terminate_worker_data(cpi);
+ free_thread_data(cpi);
+ }
#if CONFIG_MULTITHREAD
if (enc_row_mt_mutex_ != NULL) {
@@ -3682,8 +3683,10 @@
}
#endif
av1_row_mt_mem_dealloc(cpi);
- aom_free(mt_info->tile_thr_data);
- if (cpi->compressor_stage != LAP_STAGE) aom_free(mt_info->workers);
+ if (cpi->compressor_stage != LAP_STAGE) {
+ aom_free(mt_info->tile_thr_data);
+ aom_free(mt_info->workers);
+ }
#if !CONFIG_REALTIME_ONLY
av1_tpl_dealloc(&tpl_data->tpl_mt_sync);
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index e65c07c..5a3fdef 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -512,8 +512,8 @@
MultiThreadInfo *const mt_info = &cpi->mt_info;
int sb_mi_size = av1_get_sb_mi_size(cm);
- CHECK_MEM_ERROR(cm, mt_info->tile_thr_data,
- aom_calloc(num_workers, sizeof(*mt_info->tile_thr_data)));
+ assert(mt_info->workers != NULL);
+ assert(mt_info->tile_thr_data != NULL);
#if CONFIG_MULTITHREAD
if (cpi->oxcf.row_mt == 1) {
@@ -542,14 +542,8 @@
thread_data->thread_id = i;
if (i > 0) {
- // Allocate thread data.
- CHECK_MEM_ERROR(cm, thread_data->td,
- aom_memalign(32, sizeof(*thread_data->td)));
- av1_zero(*thread_data->td);
-
// Set up sms_tree.
av1_setup_sms_tree(cpi, thread_data->td);
- av1_setup_shared_coeff_buffer(cm, &thread_data->td->shared_coeff_buf);
av1_alloc_obmc_buffers(&thread_data->td->obmc_buffer, cm);
@@ -619,14 +613,29 @@
AV1_COMMON *const cm = &cpi->common;
MultiThreadInfo *const mt_info = &cpi->mt_info;
const AVxWorkerInterface *const winterface = aom_get_worker_interface();
- mt_info->tile_thr_data = NULL;
CHECK_MEM_ERROR(cm, mt_info->workers,
aom_malloc(num_workers * sizeof(*mt_info->workers)));
+
+ CHECK_MEM_ERROR(cm, mt_info->tile_thr_data,
+ aom_calloc(num_workers, sizeof(*mt_info->tile_thr_data)));
+
for (int i = num_workers - 1; i >= 0; i--) {
AVxWorker *const worker = &mt_info->workers[i];
+ EncWorkerData *const thread_data = &mt_info->tile_thr_data[i];
+
winterface->init(worker);
worker->thread_name = "aom enc worker";
+
+ if (i > 0) {
+ // Allocate thread data.
+ CHECK_MEM_ERROR(cm, thread_data->td,
+ aom_memalign(32, sizeof(*thread_data->td)));
+ av1_zero(*thread_data->td);
+
+ // Set up shared coeff buffers.
+ av1_setup_shared_coeff_buffer(cm, &thread_data->td->shared_coeff_buf);
+ }
++mt_info->num_workers;
}
}
@@ -637,10 +646,8 @@
const AVxWorkerInterface *const winterface = aom_get_worker_interface();
MultiThreadInfo *const mt_info = &cpi->mt_info;
- CHECK_MEM_ERROR(cm, mt_info->tile_thr_data,
- aom_calloc(num_workers, sizeof(*mt_info->tile_thr_data)));
-
assert(mt_info->workers != NULL);
+ assert(mt_info->tile_thr_data != NULL);
#if CONFIG_MULTITHREAD
AV1EncRowMultiThreadInfo *enc_row_mt = &mt_info->enc_row_mt;
@@ -661,14 +668,6 @@
thread_data->thread_id = i;
if (i > 0) {
- // Allocate thread data.
- CHECK_MEM_ERROR(cm, thread_data->td,
- aom_memalign(32, sizeof(*thread_data->td)));
- av1_zero(*thread_data->td);
-
- // Set up shared coeff buffers.
- av1_setup_shared_coeff_buffer(cm, &thread_data->td->shared_coeff_buf);
-
// Set up firstpass PICK_MODE_CONTEXT.
thread_data->td->firstpass_ctx =
av1_alloc_pmc(cm, BLOCK_16X16, &thread_data->td->shared_coeff_buf);
@@ -753,6 +752,11 @@
worker->data1 = thread_data;
worker->data2 = NULL;
+ thread_data->cpi = cpi;
+ if (i == 0) {
+ thread_data->td = &cpi->td;
+ }
+
thread_data->td->intrabc_used = 0;
thread_data->td->deltaq_used = 0;
@@ -809,6 +813,11 @@
worker->data1 = thread_data;
worker->data2 = NULL;
+ thread_data->cpi = cpi;
+ if (i == 0) {
+ thread_data->td = &cpi->td;
+ }
+
// Before encoding a frame, copy the thread data from cpi.
if (thread_data->td != &cpi->td) {
thread_data->td->mb = cpi->td.mb;
@@ -1244,6 +1253,11 @@
worker->data1 = thread_data;
worker->data2 = NULL;
+ thread_data->cpi = cpi;
+ if (i == 0) {
+ thread_data->td = &cpi->td;
+ }
+
// Before encoding a frame, copy the thread data from cpi.
if (thread_data->td != &cpi->td) {
thread_data->td->mb = cpi->td.mb;
@@ -1416,6 +1430,8 @@
worker->hook = hook;
worker->data1 = thread_data;
worker->data2 = NULL;
+
+ thread_data->cpi = cpi;
}
}