Abstract worker creation for multi-threading
Creation of workers have been abstracted to a common place
so that it can be used by all the multi-threading stages.
Change-Id: I42347a951b23eec7d814518a04b327ccf34629ff
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index db64a24..0ef0fd8 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -24,6 +24,7 @@
#include "av1/av1_iface_common.h"
#include "av1/encoder/bitstream.h"
#include "av1/encoder/encoder.h"
+#include "av1/encoder/ethread.h"
#include "av1/encoder/firstpass.h"
#define MAG_SIZE (4)
@@ -2182,12 +2183,19 @@
int index_size = 0;
int has_fwd_keyframe = 0;
+ const int num_workers = av1_compute_num_enc_workers(cpi);
+ if ((num_workers > 1) && (cpi->mt_info.num_workers == 0))
+ av1_create_workers(cpi, num_workers);
+
// Call for LAP stage
if (cpi_lap != NULL) {
int status;
aom_rational64_t timestamp_ratio_la = *timestamp_ratio;
int64_t dst_time_stamp_la = dst_time_stamp;
int64_t dst_end_time_stamp_la = dst_end_time_stamp;
+ if (cpi_lap->mt_info.workers == NULL)
+ cpi_lap->mt_info.workers = cpi->mt_info.workers;
+ cpi_lap->mt_info.num_workers = cpi->mt_info.num_workers;
status = av1_get_compressed_data(
cpi_lap, &lib_flags, &frame_size, NULL, &dst_time_stamp_la,
&dst_end_time_stamp_la, !img, ×tamp_ratio_la);
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 6ce2769..24a2dbf 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -3525,6 +3525,7 @@
static AOM_INLINE void free_thread_data(AV1_COMP *cpi) {
MultiThreadInfo *const mt_info = &cpi->mt_info;
AV1_COMMON *cm = &cpi->common;
+ if (mt_info->tile_thr_data == NULL) return;
for (int t = 0; t < mt_info->num_workers; ++t) {
EncWorkerData *const thread_data = &mt_info->tile_thr_data[t];
aom_free(thread_data->td->tctx);
@@ -3667,7 +3668,7 @@
aom_free_frame_buffer(&tpl_data->tpl_rec_pool[frame]);
}
- terminate_worker_data(cpi);
+ if (cpi->compressor_stage != LAP_STAGE) terminate_worker_data(cpi);
free_thread_data(cpi);
#if CONFIG_MULTITHREAD
@@ -3682,7 +3683,7 @@
#endif
av1_row_mt_mem_dealloc(cpi);
aom_free(mt_info->tile_thr_data);
- aom_free(mt_info->workers);
+ if (cpi->compressor_stage != LAP_STAGE) aom_free(mt_info->workers);
#if !CONFIG_REALTIME_ONLY
av1_tpl_dealloc(&tpl_data->tpl_mt_sync);
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 02beb5e..1e1b7a9 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -805,9 +805,15 @@
} AV1EncRowMultiThreadInfo;
typedef struct {
- // Number of workers created for encoder multi-threading.
+ // Number of workers created for multi-threading.
int num_workers;
+ // Number of workers created for tpl and tile/row multi-threading of encoder.
+ int num_enc_workers;
+
+ // Number of workers created for first-pass multi-threading.
+ int num_fp_workers;
+
// Synchronization object used to launch job in the worker thread.
AVxWorker *workers;
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 5744a1a..0b93994 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -512,9 +512,6 @@
MultiThreadInfo *const mt_info = &cpi->mt_info;
int sb_mi_size = av1_get_sb_mi_size(cm);
- CHECK_MEM_ERROR(cm, mt_info->workers,
- aom_malloc(num_workers * sizeof(*mt_info->workers)));
-
CHECK_MEM_ERROR(cm, mt_info->tile_thr_data,
aom_calloc(num_workers, sizeof(*mt_info->tile_thr_data)));
@@ -539,9 +536,7 @@
AVxWorker *const worker = &mt_info->workers[i];
EncWorkerData *const thread_data = &mt_info->tile_thr_data[i];
- ++mt_info->num_workers;
- winterface->init(worker);
- worker->thread_name = "aom enc worker";
+ ++mt_info->num_enc_workers;
thread_data->cpi = cpi;
thread_data->thread_id = i;
@@ -620,15 +615,20 @@
}
}
-static AOM_INLINE void create_workers(AV1_COMP *cpi, int num_workers) {
+void av1_create_workers(AV1_COMP *cpi, int num_workers) {
AV1_COMMON *const cm = &cpi->common;
MultiThreadInfo *const mt_info = &cpi->mt_info;
+ const AVxWorkerInterface *const winterface = aom_get_worker_interface();
+ mt_info->tile_thr_data = NULL;
CHECK_MEM_ERROR(cm, mt_info->workers,
aom_malloc(num_workers * sizeof(*mt_info->workers)));
-
- CHECK_MEM_ERROR(cm, mt_info->tile_thr_data,
- aom_calloc(num_workers, sizeof(*mt_info->tile_thr_data)));
+ for (int i = num_workers - 1; i >= 0; i--) {
+ AVxWorker *const worker = &mt_info->workers[i];
+ winterface->init(worker);
+ worker->thread_name = "aom enc worker";
+ ++mt_info->num_workers;
+ }
}
#if !CONFIG_REALTIME_ONLY
@@ -637,7 +637,10 @@
const AVxWorkerInterface *const winterface = aom_get_worker_interface();
MultiThreadInfo *const mt_info = &cpi->mt_info;
- assert(mt_info->workers != NULL && mt_info->tile_thr_data != NULL);
+ CHECK_MEM_ERROR(cm, mt_info->tile_thr_data,
+ aom_calloc(num_workers, sizeof(*mt_info->tile_thr_data)));
+
+ assert(mt_info->workers != NULL);
#if CONFIG_MULTITHREAD
if (cpi->oxcf.row_mt == 1) {
@@ -654,9 +657,7 @@
AVxWorker *const worker = &mt_info->workers[i];
EncWorkerData *const thread_data = &mt_info->tile_thr_data[i];
- ++mt_info->num_workers;
- winterface->init(worker);
- worker->thread_name = "aom enc worker";
+ ++mt_info->num_fp_workers;
thread_data->cpi = cpi;
thread_data->thread_id = i;
@@ -867,10 +868,10 @@
av1_init_tile_data(cpi);
// Only run once to create threads and allocate thread data.
- if (mt_info->num_workers == 0) {
+ if (mt_info->num_enc_workers == 0) {
create_enc_workers(cpi, num_workers);
} else {
- num_workers = AOMMIN(num_workers, mt_info->num_workers);
+ num_workers = AOMMIN(num_workers, mt_info->num_enc_workers);
}
prepare_enc_workers(cpi, enc_worker_hook, num_workers);
launch_enc_workers(&cpi->mt_info, num_workers);
@@ -1007,10 +1008,10 @@
}
// Only run once to create threads and allocate thread data.
- if (mt_info->num_workers == 0) {
+ if (mt_info->num_enc_workers == 0) {
create_enc_workers(cpi, num_workers);
} else {
- num_workers = AOMMIN(num_workers, mt_info->num_workers);
+ num_workers = AOMMIN(num_workers, mt_info->num_enc_workers);
}
assign_tile_to_thread(thread_id_to_tile_id, tile_cols * tile_rows,
num_workers);
@@ -1074,13 +1075,9 @@
}
}
+ num_workers = AOMMIN(num_workers, mt_info->num_workers);
// Only run once to create threads and allocate thread data.
- if (mt_info->num_workers == 0) {
- create_workers(cpi, num_workers);
- fp_create_enc_workers(cpi, num_workers);
- } else {
- num_workers = AOMMIN(num_workers, mt_info->num_workers);
- }
+ if (mt_info->num_fp_workers == 0) fp_create_enc_workers(cpi, num_workers);
assign_tile_to_thread(thread_id_to_tile_id, tile_cols * tile_rows,
num_workers);
fp_prepare_enc_workers(cpi, fp_enc_row_mt_worker_hook, num_workers);
@@ -1169,7 +1166,7 @@
BLOCK_SIZE bsize = convert_length_to_bsize(MC_FLOW_BSIZE_1D);
TX_SIZE tx_size = max_txsize_lookup[bsize];
int mi_height = mi_size_high[bsize];
- int num_active_workers = mt_info->num_workers;
+ int num_active_workers = mt_info->num_enc_workers;
for (int mi_row = thread_data->start * mi_height; mi_row < mi_params->mi_rows;
mi_row += num_active_workers * mi_height) {
// Motion estimation row boundary
@@ -1284,10 +1281,10 @@
memset(tpl_sync->num_finished_cols, -1,
sizeof(*tpl_sync->num_finished_cols) * mb_rows);
- if (mt_info->num_workers == 0)
+ if (mt_info->num_enc_workers == 0)
create_enc_workers(cpi, num_workers);
else
- num_workers = AOMMIN(num_workers, mt_info->num_workers);
+ num_workers = AOMMIN(num_workers, mt_info->num_enc_workers);
prepare_tpl_workers(cpi, tpl_worker_hook, num_workers);
launch_enc_workers(&cpi->mt_info, num_workers);
diff --git a/av1/encoder/ethread.h b/av1/encoder/ethread.h
index 7a4fb27..0edcaa4 100644
--- a/av1/encoder/ethread.h
+++ b/av1/encoder/ethread.h
@@ -70,6 +70,8 @@
int av1_compute_num_enc_workers(AV1_COMP *cpi);
+void av1_create_workers(AV1_COMP *cpi, int num_workers);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index cc7ed86..a7a1618 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@@ -1117,7 +1117,7 @@
enc_row_mt->sync_write_ptr = av1_row_mt_sync_write_dummy;
mt_info->row_mt_enabled = 0;
- if (mt_info->row_mt_enabled) {
+ if (mt_info->row_mt_enabled && (mt_info->num_workers > 1)) {
enc_row_mt->sync_read_ptr = av1_row_mt_sync_read;
enc_row_mt->sync_write_ptr = av1_row_mt_sync_write;
av1_fp_encode_tiles_row_mt(cpi);
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index e18892d..7bb458c 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -1092,7 +1092,7 @@
if (gf_group->size == frame_idx) continue;
init_mc_flow_dispenser(cpi, frame_idx, pframe_qindex);
- if (av1_compute_num_enc_workers(cpi) > 1) {
+ if (mt_info->num_workers > 1) {
tpl_row_mt->sync_read_ptr = av1_tpl_row_mt_sync_read;
tpl_row_mt->sync_write_ptr = av1_tpl_row_mt_sync_write;
av1_mc_flow_dispenser_mt(cpi);