Refactor worker creation and tile_thr_data allocations
Separated worker creation from tile_thr_data allocation.
This is done so as to facilitate the abstraction of workers
and tile_thr_data to the primary context.
Change-Id: I1ff3203445854bdfbebeb8082d5b9ffdd2a52b7d
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 11c47bc..9028d18 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -2643,7 +2643,8 @@
if (cpi->oxcf.pass == 1) {
#if !CONFIG_REALTIME_ONLY
- num_workers = av1_fp_compute_num_enc_workers(cpi);
+ num_workers = cpi->mt_info.num_mod_workers[MOD_FP] =
+ av1_fp_compute_num_enc_workers(cpi);
#endif
} else {
av1_compute_num_workers_for_mt(cpi);
@@ -2651,15 +2652,13 @@
}
if ((num_workers > 1) && (cpi->mt_info.num_workers == 0)) {
av1_create_workers(cpi, num_workers);
+ av1_init_tile_thread_data(cpi, cpi->oxcf.pass == 1);
#if CONFIG_MULTITHREAD
av1_init_mt_sync(cpi, cpi->oxcf.pass == 1);
if (cpi_lap != NULL) {
av1_init_mt_sync(cpi_lap, 1);
}
#endif // CONFIG_MULTITHREAD
- if (cpi->oxcf.pass != 1) {
- av1_create_second_pass_workers(cpi, num_workers);
- }
}
// Call for LAP stage
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index fe9d76e..3cdc2a5 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -1425,18 +1425,6 @@
int num_mod_workers[NUM_MT_MODULES];
/*!
- * Flag to indicate whether thread specific buffers need to be allocated for
- * tile/row based multi-threading of first pass stage.
- */
- int fp_mt_buf_init_done;
-
- /*!
- * Flag to indicate whether thread specific buffers need to be allocated for
- * tile/row based multi-threading of encode stage.
- */
- int enc_mt_buf_init_done;
-
- /*!
* Synchronization object used to launch job in the worker thread.
*/
AVxWorker *workers;
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index d274b6b..c70ade2 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -577,105 +577,98 @@
}
#endif // CONFIG_MULTITHREAD
-void av1_create_second_pass_workers(AV1_COMP *cpi, int num_workers) {
- AV1_COMMON *const cm = &cpi->common;
- const AVxWorkerInterface *const winterface = aom_get_worker_interface();
- MultiThreadInfo *const mt_info = &cpi->mt_info;
-
- assert(mt_info->workers != NULL);
- assert(mt_info->tile_thr_data != NULL);
-
- for (int i = num_workers - 1; i >= 0; i--) {
- AVxWorker *const worker = &mt_info->workers[i];
- EncWorkerData *const thread_data = &mt_info->tile_thr_data[i];
-
- thread_data->cpi = cpi;
- thread_data->thread_id = i;
- // Set the starting tile for each thread.
- thread_data->start = i;
-
- if (i > 0) {
- // alloc_obmc_buffers(&thread_data->td->obmc_buffer, cm);
-
- // Create threads
- if (!winterface->reset(worker))
- aom_internal_error(cm->error, AOM_CODEC_ERROR,
- "Tile encoder thread creation failed");
- } else {
- // Main thread acts as a worker and uses the thread data in cpi.
- thread_data->td = &cpi->td;
- }
- winterface->sync(worker);
- }
-}
-
-static AOM_INLINE void create_enc_workers(AV1_COMP *cpi, int num_workers) {
+void av1_init_tile_thread_data(AV1_COMP *cpi, int is_first_pass) {
AV1_COMMON *const cm = &cpi->common;
MultiThreadInfo *const mt_info = &cpi->mt_info;
assert(mt_info->workers != NULL);
assert(mt_info->tile_thr_data != NULL);
+ int num_workers = mt_info->num_workers;
+
for (int i = num_workers - 1; i >= 0; i--) {
EncWorkerData *const thread_data = &mt_info->tile_thr_data[i];
if (i > 0) {
- // Set up sms_tree.
- av1_setup_sms_tree(cpi, thread_data->td);
+ // Allocate thread data.
+ CHECK_MEM_ERROR(cm, thread_data->td,
+ aom_memalign(32, sizeof(*thread_data->td)));
+ av1_zero(*thread_data->td);
- alloc_obmc_buffers(&thread_data->td->obmc_buffer, cm);
-
- CHECK_MEM_ERROR(cm, thread_data->td->inter_modes_info,
- (InterModesInfo *)aom_malloc(
- sizeof(*thread_data->td->inter_modes_info)));
-
- for (int x = 0; x < 2; x++)
- for (int y = 0; y < 2; y++)
- CHECK_MEM_ERROR(
- cm, thread_data->td->hash_value_buffer[x][y],
- (uint32_t *)aom_malloc(
- AOM_BUFFER_SIZE_FOR_BLOCK_HASH *
- sizeof(*thread_data->td->hash_value_buffer[0][0])));
-
- // Allocate frame counters in thread data.
- CHECK_MEM_ERROR(cm, thread_data->td->counts,
- aom_calloc(1, sizeof(*thread_data->td->counts)));
-
- // Allocate buffers used by palette coding mode.
+ // Set up shared coeff buffers.
+ av1_setup_shared_coeff_buffer(cm, &thread_data->td->shared_coeff_buf);
CHECK_MEM_ERROR(
- cm, thread_data->td->palette_buffer,
- aom_memalign(16, sizeof(*thread_data->td->palette_buffer)));
+ cm, thread_data->td->tmp_conv_dst,
+ aom_memalign(32, MAX_SB_SIZE * MAX_SB_SIZE *
+ sizeof(*thread_data->td->tmp_conv_dst)));
- alloc_compound_type_rd_buffers(cm, &thread_data->td->comp_rd_buffer);
-
- for (int j = 0; j < 2; ++j) {
- CHECK_MEM_ERROR(
- cm, thread_data->td->tmp_pred_bufs[j],
- aom_memalign(32, 2 * MAX_MB_PLANE * MAX_SB_SQUARE *
- sizeof(*thread_data->td->tmp_pred_bufs[j])));
+ if (i < mt_info->num_mod_workers[MOD_FP]) {
+ // Set up firstpass PICK_MODE_CONTEXT.
+ thread_data->td->firstpass_ctx =
+ av1_alloc_pmc(cpi, BLOCK_16X16, &thread_data->td->shared_coeff_buf);
}
- const int plane_types = PLANE_TYPES >> cm->seq_params->monochrome;
- CHECK_MEM_ERROR(cm, thread_data->td->pixel_gradient_info,
- aom_malloc(sizeof(*thread_data->td->pixel_gradient_info) *
- plane_types * MAX_SB_SQUARE));
+ if (!is_first_pass && i < mt_info->num_mod_workers[MOD_ENC]) {
+ // Set up sms_tree.
+ av1_setup_sms_tree(cpi, thread_data->td);
- if (cpi->sf.part_sf.partition_search_type == VAR_BASED_PARTITION) {
- const int num_64x64_blocks =
- (cm->seq_params->sb_size == BLOCK_64X64) ? 1 : 4;
+ alloc_obmc_buffers(&thread_data->td->obmc_buffer, cm);
+
+ CHECK_MEM_ERROR(cm, thread_data->td->inter_modes_info,
+ (InterModesInfo *)aom_malloc(
+ sizeof(*thread_data->td->inter_modes_info)));
+
+ for (int x = 0; x < 2; x++)
+ for (int y = 0; y < 2; y++)
+ CHECK_MEM_ERROR(
+ cm, thread_data->td->hash_value_buffer[x][y],
+ (uint32_t *)aom_malloc(
+ AOM_BUFFER_SIZE_FOR_BLOCK_HASH *
+ sizeof(*thread_data->td->hash_value_buffer[0][0])));
+
+ // Allocate frame counters in thread data.
+ CHECK_MEM_ERROR(cm, thread_data->td->counts,
+ aom_calloc(1, sizeof(*thread_data->td->counts)));
+
+ // Allocate buffers used by palette coding mode.
CHECK_MEM_ERROR(
- cm, thread_data->td->vt64x64,
- aom_malloc(sizeof(*thread_data->td->vt64x64) * num_64x64_blocks));
+ cm, thread_data->td->palette_buffer,
+ aom_memalign(16, sizeof(*thread_data->td->palette_buffer)));
+
+ alloc_compound_type_rd_buffers(cm, &thread_data->td->comp_rd_buffer);
+
+ for (int j = 0; j < 2; ++j) {
+ CHECK_MEM_ERROR(
+ cm, thread_data->td->tmp_pred_bufs[j],
+ aom_memalign(32, 2 * MAX_MB_PLANE * MAX_SB_SQUARE *
+ sizeof(*thread_data->td->tmp_pred_bufs[j])));
+ }
+
+ const int plane_types = PLANE_TYPES >> cm->seq_params->monochrome;
+ CHECK_MEM_ERROR(
+ cm, thread_data->td->pixel_gradient_info,
+ aom_malloc(sizeof(*thread_data->td->pixel_gradient_info) *
+ plane_types * MAX_SB_SQUARE));
+
+ if (cpi->sf.part_sf.partition_search_type == VAR_BASED_PARTITION) {
+ const int num_64x64_blocks =
+ (cm->seq_params->sb_size == BLOCK_64X64) ? 1 : 4;
+ CHECK_MEM_ERROR(
+ cm, thread_data->td->vt64x64,
+ aom_malloc(sizeof(*thread_data->td->vt64x64) * num_64x64_blocks));
+ }
}
} else {
thread_data->td = &cpi->td;
}
- if (cpi->oxcf.row_mt == 1)
+
+ if (!is_first_pass && cpi->oxcf.row_mt == 1 &&
+ i < mt_info->num_mod_workers[MOD_ENC]) {
CHECK_MEM_ERROR(
cm, thread_data->td->tctx,
(FRAME_CONTEXT *)aom_memalign(16, sizeof(*thread_data->td->tctx)));
+ }
}
- mt_info->enc_mt_buf_init_done = 1;
}
void av1_create_workers(AV1_COMP *cpi, int num_workers) {
@@ -696,77 +689,21 @@
winterface->init(worker);
worker->thread_name = "aom enc worker";
- if (i > 0) {
- // Allocate thread data.
- CHECK_MEM_ERROR(cm, thread_data->td,
- aom_memalign(32, sizeof(*thread_data->td)));
- av1_zero(*thread_data->td);
-
- // Set up shared coeff buffers.
- av1_setup_shared_coeff_buffer(cm, &thread_data->td->shared_coeff_buf);
- CHECK_MEM_ERROR(
- cm, thread_data->td->tmp_conv_dst,
- aom_memalign(32, MAX_SB_SIZE * MAX_SB_SIZE *
- sizeof(*thread_data->td->tmp_conv_dst)));
- }
- ++mt_info->num_workers;
- }
-}
-
-#if !CONFIG_REALTIME_ONLY
-static AOM_INLINE void fp_create_enc_workers(AV1_COMP *cpi, int num_workers) {
- AV1_COMMON *const cm = &cpi->common;
- const AVxWorkerInterface *const winterface = aom_get_worker_interface();
- MultiThreadInfo *const mt_info = &cpi->mt_info;
- // For single-pass encode, threads are already created during call to
- // av1_create_second_pass_workers(). Create threads only in the case of
- // pass = 1.
- const int create_workers = (mt_info->num_mod_workers[MOD_FP] == 0) ? 1 : 0;
-
- assert(mt_info->workers != NULL);
- assert(mt_info->tile_thr_data != NULL);
-
-#if CONFIG_MULTITHREAD
- AV1EncRowMultiThreadInfo *enc_row_mt = &mt_info->enc_row_mt;
- if (enc_row_mt->mutex_ == NULL) {
- CHECK_MEM_ERROR(cm, enc_row_mt->mutex_,
- aom_malloc(sizeof(*(enc_row_mt->mutex_))));
- if (enc_row_mt->mutex_) pthread_mutex_init(enc_row_mt->mutex_, NULL);
- }
-#endif
-
- for (int i = num_workers - 1; i >= 0; i--) {
- AVxWorker *const worker = &mt_info->workers[i];
- EncWorkerData *const thread_data = &mt_info->tile_thr_data[i];
-
- thread_data->cpi = cpi;
thread_data->thread_id = i;
// Set the starting tile for each thread.
thread_data->start = i;
if (i > 0) {
- // Set up firstpass PICK_MODE_CONTEXT.
- thread_data->td->firstpass_ctx =
- av1_alloc_pmc(cpi, BLOCK_16X16, &thread_data->td->shared_coeff_buf);
+ // Create threads
+ if (!winterface->reset(worker))
+ aom_internal_error(cm->error, AOM_CODEC_ERROR,
+ "Tile encoder thread creation failed");
+ }
+ winterface->sync(worker);
- if (create_workers) {
- // Create threads
- if (!winterface->reset(worker))
- aom_internal_error(cm->error, AOM_CODEC_ERROR,
- "Tile encoder thread creation failed");
- }
- } else {
- // Main thread acts as a worker and uses the thread data in cpi.
- thread_data->td = &cpi->td;
- }
- if (create_workers) {
- winterface->sync(worker);
- ++mt_info->num_mod_workers[MOD_FP];
- }
+ ++mt_info->num_workers;
}
- mt_info->fp_mt_buf_init_done = 1;
}
-#endif
static AOM_INLINE void launch_workers(MultiThreadInfo *const mt_info,
int num_workers) {
@@ -1011,12 +948,8 @@
if (cpi->allocated_tiles < tile_cols * tile_rows) av1_alloc_tile_data(cpi);
av1_init_tile_data(cpi);
- // Only run once to create threads and allocate thread data.
- if (mt_info->enc_mt_buf_init_done == 0) {
- create_enc_workers(cpi, num_workers);
- } else {
- num_workers = AOMMIN(num_workers, mt_info->num_workers);
- }
+ num_workers = AOMMIN(num_workers, mt_info->num_workers);
+
prepare_enc_workers(cpi, enc_worker_hook, num_workers);
launch_workers(&cpi->mt_info, num_workers);
sync_enc_workers(&cpi->mt_info, cm, num_workers);
@@ -1152,12 +1085,8 @@
}
}
- // Only run once to create threads and allocate thread data.
- if (mt_info->enc_mt_buf_init_done == 0) {
- create_enc_workers(cpi, num_workers);
- } else {
- num_workers = AOMMIN(num_workers, mt_info->num_workers);
- }
+ num_workers = AOMMIN(num_workers, mt_info->num_workers);
+
assign_tile_to_thread(thread_id_to_tile_id, tile_cols * tile_rows,
num_workers);
prepare_enc_workers(cpi, enc_row_mt_worker_hook, num_workers);
@@ -1222,9 +1151,6 @@
}
num_workers = AOMMIN(num_workers, mt_info->num_workers);
- // Only run once to create threads and allocate thread data.
- if (mt_info->fp_mt_buf_init_done == 0)
- fp_create_enc_workers(cpi, num_workers);
assign_tile_to_thread(thread_id_to_tile_id, tile_cols * tile_rows,
num_workers);
fp_prepare_enc_workers(cpi, fp_enc_row_mt_worker_hook, num_workers);
diff --git a/av1/encoder/ethread.h b/av1/encoder/ethread.h
index c2ab864..85fcfe5 100644
--- a/av1/encoder/ethread.h
+++ b/av1/encoder/ethread.h
@@ -84,7 +84,7 @@
void av1_init_mt_sync(AV1_COMP *cpi, int is_first_pass);
#endif // CONFIG_MULTITHREAD
-void av1_create_second_pass_workers(AV1_COMP *cpi, int num_workers);
+void av1_init_tile_thread_data(AV1_COMP *cpi, int is_first_pass);
void av1_cdef_mse_calc_frame_mt(AV1_COMMON *cm, MultiThreadInfo *mt_info,
CdefSearchCtx *cdef_search_ctx);