Abstract worker creation for multi-threading

Creation of workers have been abstracted to a common place
so that it can be used by all the multi-threading stages.

Change-Id: I42347a951b23eec7d814518a04b327ccf34629ff
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index db64a24..0ef0fd8 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -24,6 +24,7 @@
 #include "av1/av1_iface_common.h"
 #include "av1/encoder/bitstream.h"
 #include "av1/encoder/encoder.h"
+#include "av1/encoder/ethread.h"
 #include "av1/encoder/firstpass.h"
 
 #define MAG_SIZE (4)
@@ -2182,12 +2183,19 @@
     int index_size = 0;
     int has_fwd_keyframe = 0;
 
+    const int num_workers = av1_compute_num_enc_workers(cpi);
+    if ((num_workers > 1) && (cpi->mt_info.num_workers == 0))
+      av1_create_workers(cpi, num_workers);
+
     // Call for LAP stage
     if (cpi_lap != NULL) {
       int status;
       aom_rational64_t timestamp_ratio_la = *timestamp_ratio;
       int64_t dst_time_stamp_la = dst_time_stamp;
       int64_t dst_end_time_stamp_la = dst_end_time_stamp;
+      if (cpi_lap->mt_info.workers == NULL)
+        cpi_lap->mt_info.workers = cpi->mt_info.workers;
+      cpi_lap->mt_info.num_workers = cpi->mt_info.num_workers;
       status = av1_get_compressed_data(
           cpi_lap, &lib_flags, &frame_size, NULL, &dst_time_stamp_la,
           &dst_end_time_stamp_la, !img, &timestamp_ratio_la);
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 6ce2769..24a2dbf 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -3525,6 +3525,7 @@
 static AOM_INLINE void free_thread_data(AV1_COMP *cpi) {
   MultiThreadInfo *const mt_info = &cpi->mt_info;
   AV1_COMMON *cm = &cpi->common;
+  if (mt_info->tile_thr_data == NULL) return;
   for (int t = 0; t < mt_info->num_workers; ++t) {
     EncWorkerData *const thread_data = &mt_info->tile_thr_data[t];
     aom_free(thread_data->td->tctx);
@@ -3667,7 +3668,7 @@
     aom_free_frame_buffer(&tpl_data->tpl_rec_pool[frame]);
   }
 
-  terminate_worker_data(cpi);
+  if (cpi->compressor_stage != LAP_STAGE) terminate_worker_data(cpi);
   free_thread_data(cpi);
 
 #if CONFIG_MULTITHREAD
@@ -3682,7 +3683,7 @@
 #endif
   av1_row_mt_mem_dealloc(cpi);
   aom_free(mt_info->tile_thr_data);
-  aom_free(mt_info->workers);
+  if (cpi->compressor_stage != LAP_STAGE) aom_free(mt_info->workers);
 
 #if !CONFIG_REALTIME_ONLY
   av1_tpl_dealloc(&tpl_data->tpl_mt_sync);
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 02beb5e..1e1b7a9 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -805,9 +805,15 @@
 } AV1EncRowMultiThreadInfo;
 
 typedef struct {
-  // Number of workers created for encoder multi-threading.
+  // Number of workers created for multi-threading.
   int num_workers;
 
+  // Number of workers created for tpl and tile/row multi-threading of encoder.
+  int num_enc_workers;
+
+  // Number of workers created for first-pass multi-threading.
+  int num_fp_workers;
+
   // Synchronization object used to launch job in the worker thread.
   AVxWorker *workers;
 
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 5744a1a..0b93994 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -512,9 +512,6 @@
   MultiThreadInfo *const mt_info = &cpi->mt_info;
   int sb_mi_size = av1_get_sb_mi_size(cm);
 
-  CHECK_MEM_ERROR(cm, mt_info->workers,
-                  aom_malloc(num_workers * sizeof(*mt_info->workers)));
-
   CHECK_MEM_ERROR(cm, mt_info->tile_thr_data,
                   aom_calloc(num_workers, sizeof(*mt_info->tile_thr_data)));
 
@@ -539,9 +536,7 @@
     AVxWorker *const worker = &mt_info->workers[i];
     EncWorkerData *const thread_data = &mt_info->tile_thr_data[i];
 
-    ++mt_info->num_workers;
-    winterface->init(worker);
-    worker->thread_name = "aom enc worker";
+    ++mt_info->num_enc_workers;
 
     thread_data->cpi = cpi;
     thread_data->thread_id = i;
@@ -620,15 +615,20 @@
   }
 }
 
-static AOM_INLINE void create_workers(AV1_COMP *cpi, int num_workers) {
+void av1_create_workers(AV1_COMP *cpi, int num_workers) {
   AV1_COMMON *const cm = &cpi->common;
   MultiThreadInfo *const mt_info = &cpi->mt_info;
+  const AVxWorkerInterface *const winterface = aom_get_worker_interface();
+  mt_info->tile_thr_data = NULL;
 
   CHECK_MEM_ERROR(cm, mt_info->workers,
                   aom_malloc(num_workers * sizeof(*mt_info->workers)));
-
-  CHECK_MEM_ERROR(cm, mt_info->tile_thr_data,
-                  aom_calloc(num_workers, sizeof(*mt_info->tile_thr_data)));
+  for (int i = num_workers - 1; i >= 0; i--) {
+    AVxWorker *const worker = &mt_info->workers[i];
+    winterface->init(worker);
+    worker->thread_name = "aom enc worker";
+    ++mt_info->num_workers;
+  }
 }
 
 #if !CONFIG_REALTIME_ONLY
@@ -637,7 +637,10 @@
   const AVxWorkerInterface *const winterface = aom_get_worker_interface();
   MultiThreadInfo *const mt_info = &cpi->mt_info;
 
-  assert(mt_info->workers != NULL && mt_info->tile_thr_data != NULL);
+  CHECK_MEM_ERROR(cm, mt_info->tile_thr_data,
+                  aom_calloc(num_workers, sizeof(*mt_info->tile_thr_data)));
+
+  assert(mt_info->workers != NULL);
 
 #if CONFIG_MULTITHREAD
   if (cpi->oxcf.row_mt == 1) {
@@ -654,9 +657,7 @@
     AVxWorker *const worker = &mt_info->workers[i];
     EncWorkerData *const thread_data = &mt_info->tile_thr_data[i];
 
-    ++mt_info->num_workers;
-    winterface->init(worker);
-    worker->thread_name = "aom enc worker";
+    ++mt_info->num_fp_workers;
 
     thread_data->cpi = cpi;
     thread_data->thread_id = i;
@@ -867,10 +868,10 @@
 
   av1_init_tile_data(cpi);
   // Only run once to create threads and allocate thread data.
-  if (mt_info->num_workers == 0) {
+  if (mt_info->num_enc_workers == 0) {
     create_enc_workers(cpi, num_workers);
   } else {
-    num_workers = AOMMIN(num_workers, mt_info->num_workers);
+    num_workers = AOMMIN(num_workers, mt_info->num_enc_workers);
   }
   prepare_enc_workers(cpi, enc_worker_hook, num_workers);
   launch_enc_workers(&cpi->mt_info, num_workers);
@@ -1007,10 +1008,10 @@
   }
 
   // Only run once to create threads and allocate thread data.
-  if (mt_info->num_workers == 0) {
+  if (mt_info->num_enc_workers == 0) {
     create_enc_workers(cpi, num_workers);
   } else {
-    num_workers = AOMMIN(num_workers, mt_info->num_workers);
+    num_workers = AOMMIN(num_workers, mt_info->num_enc_workers);
   }
   assign_tile_to_thread(thread_id_to_tile_id, tile_cols * tile_rows,
                         num_workers);
@@ -1074,13 +1075,9 @@
     }
   }
 
+  num_workers = AOMMIN(num_workers, mt_info->num_workers);
   // Only run once to create threads and allocate thread data.
-  if (mt_info->num_workers == 0) {
-    create_workers(cpi, num_workers);
-    fp_create_enc_workers(cpi, num_workers);
-  } else {
-    num_workers = AOMMIN(num_workers, mt_info->num_workers);
-  }
+  if (mt_info->num_fp_workers == 0) fp_create_enc_workers(cpi, num_workers);
   assign_tile_to_thread(thread_id_to_tile_id, tile_cols * tile_rows,
                         num_workers);
   fp_prepare_enc_workers(cpi, fp_enc_row_mt_worker_hook, num_workers);
@@ -1169,7 +1166,7 @@
   BLOCK_SIZE bsize = convert_length_to_bsize(MC_FLOW_BSIZE_1D);
   TX_SIZE tx_size = max_txsize_lookup[bsize];
   int mi_height = mi_size_high[bsize];
-  int num_active_workers = mt_info->num_workers;
+  int num_active_workers = mt_info->num_enc_workers;
   for (int mi_row = thread_data->start * mi_height; mi_row < mi_params->mi_rows;
        mi_row += num_active_workers * mi_height) {
     // Motion estimation row boundary
@@ -1284,10 +1281,10 @@
   memset(tpl_sync->num_finished_cols, -1,
          sizeof(*tpl_sync->num_finished_cols) * mb_rows);
 
-  if (mt_info->num_workers == 0)
+  if (mt_info->num_enc_workers == 0)
     create_enc_workers(cpi, num_workers);
   else
-    num_workers = AOMMIN(num_workers, mt_info->num_workers);
+    num_workers = AOMMIN(num_workers, mt_info->num_enc_workers);
 
   prepare_tpl_workers(cpi, tpl_worker_hook, num_workers);
   launch_enc_workers(&cpi->mt_info, num_workers);
diff --git a/av1/encoder/ethread.h b/av1/encoder/ethread.h
index 7a4fb27..0edcaa4 100644
--- a/av1/encoder/ethread.h
+++ b/av1/encoder/ethread.h
@@ -70,6 +70,8 @@
 
 int av1_compute_num_enc_workers(AV1_COMP *cpi);
 
+void av1_create_workers(AV1_COMP *cpi, int num_workers);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index cc7ed86..a7a1618 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@@ -1117,7 +1117,7 @@
   enc_row_mt->sync_write_ptr = av1_row_mt_sync_write_dummy;
   mt_info->row_mt_enabled = 0;
 
-  if (mt_info->row_mt_enabled) {
+  if (mt_info->row_mt_enabled && (mt_info->num_workers > 1)) {
     enc_row_mt->sync_read_ptr = av1_row_mt_sync_read;
     enc_row_mt->sync_write_ptr = av1_row_mt_sync_write;
     av1_fp_encode_tiles_row_mt(cpi);
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index e18892d..7bb458c 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -1092,7 +1092,7 @@
 
     if (gf_group->size == frame_idx) continue;
     init_mc_flow_dispenser(cpi, frame_idx, pframe_qindex);
-    if (av1_compute_num_enc_workers(cpi) > 1) {
+    if (mt_info->num_workers > 1) {
       tpl_row_mt->sync_read_ptr = av1_tpl_row_mt_sync_read;
       tpl_row_mt->sync_write_ptr = av1_tpl_row_mt_sync_write;
       av1_mc_flow_dispenser_mt(cpi);