Refactor worker creation and tile_thr_data allocations

Separated worker creation from tile_thr_data allocation.
This is done so as to facilitate the abstraction of workers
and tile_thr_data to the primary context.

Change-Id: I1ff3203445854bdfbebeb8082d5b9ffdd2a52b7d
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 11c47bc..9028d18 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -2643,7 +2643,8 @@
 
     if (cpi->oxcf.pass == 1) {
 #if !CONFIG_REALTIME_ONLY
-      num_workers = av1_fp_compute_num_enc_workers(cpi);
+      num_workers = cpi->mt_info.num_mod_workers[MOD_FP] =
+          av1_fp_compute_num_enc_workers(cpi);
 #endif
     } else {
       av1_compute_num_workers_for_mt(cpi);
@@ -2651,15 +2652,13 @@
     }
     if ((num_workers > 1) && (cpi->mt_info.num_workers == 0)) {
       av1_create_workers(cpi, num_workers);
+      av1_init_tile_thread_data(cpi, cpi->oxcf.pass == 1);
 #if CONFIG_MULTITHREAD
       av1_init_mt_sync(cpi, cpi->oxcf.pass == 1);
       if (cpi_lap != NULL) {
         av1_init_mt_sync(cpi_lap, 1);
       }
 #endif  // CONFIG_MULTITHREAD
-      if (cpi->oxcf.pass != 1) {
-        av1_create_second_pass_workers(cpi, num_workers);
-      }
     }
 
     // Call for LAP stage
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index fe9d76e..3cdc2a5 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -1425,18 +1425,6 @@
   int num_mod_workers[NUM_MT_MODULES];
 
   /*!
-   * Flag to indicate whether thread specific buffers need to be allocated for
-   * tile/row based multi-threading of first pass stage.
-   */
-  int fp_mt_buf_init_done;
-
-  /*!
-   * Flag to indicate whether thread specific buffers need to be allocated for
-   * tile/row based multi-threading of encode stage.
-   */
-  int enc_mt_buf_init_done;
-
-  /*!
    * Synchronization object used to launch job in the worker thread.
    */
   AVxWorker *workers;
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index d274b6b..c70ade2 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -577,105 +577,98 @@
 }
 #endif  // CONFIG_MULTITHREAD
 
-void av1_create_second_pass_workers(AV1_COMP *cpi, int num_workers) {
-  AV1_COMMON *const cm = &cpi->common;
-  const AVxWorkerInterface *const winterface = aom_get_worker_interface();
-  MultiThreadInfo *const mt_info = &cpi->mt_info;
-
-  assert(mt_info->workers != NULL);
-  assert(mt_info->tile_thr_data != NULL);
-
-  for (int i = num_workers - 1; i >= 0; i--) {
-    AVxWorker *const worker = &mt_info->workers[i];
-    EncWorkerData *const thread_data = &mt_info->tile_thr_data[i];
-
-    thread_data->cpi = cpi;
-    thread_data->thread_id = i;
-    // Set the starting tile for each thread.
-    thread_data->start = i;
-
-    if (i > 0) {
-      // alloc_obmc_buffers(&thread_data->td->obmc_buffer, cm);
-
-      // Create threads
-      if (!winterface->reset(worker))
-        aom_internal_error(cm->error, AOM_CODEC_ERROR,
-                           "Tile encoder thread creation failed");
-    } else {
-      // Main thread acts as a worker and uses the thread data in cpi.
-      thread_data->td = &cpi->td;
-    }
-    winterface->sync(worker);
-  }
-}
-
-static AOM_INLINE void create_enc_workers(AV1_COMP *cpi, int num_workers) {
+void av1_init_tile_thread_data(AV1_COMP *cpi, int is_first_pass) {
   AV1_COMMON *const cm = &cpi->common;
   MultiThreadInfo *const mt_info = &cpi->mt_info;
 
   assert(mt_info->workers != NULL);
   assert(mt_info->tile_thr_data != NULL);
 
+  int num_workers = mt_info->num_workers;
+
   for (int i = num_workers - 1; i >= 0; i--) {
     EncWorkerData *const thread_data = &mt_info->tile_thr_data[i];
 
     if (i > 0) {
-      // Set up sms_tree.
-      av1_setup_sms_tree(cpi, thread_data->td);
+      // Allocate thread data.
+      CHECK_MEM_ERROR(cm, thread_data->td,
+                      aom_memalign(32, sizeof(*thread_data->td)));
+      av1_zero(*thread_data->td);
 
-      alloc_obmc_buffers(&thread_data->td->obmc_buffer, cm);
-
-      CHECK_MEM_ERROR(cm, thread_data->td->inter_modes_info,
-                      (InterModesInfo *)aom_malloc(
-                          sizeof(*thread_data->td->inter_modes_info)));
-
-      for (int x = 0; x < 2; x++)
-        for (int y = 0; y < 2; y++)
-          CHECK_MEM_ERROR(
-              cm, thread_data->td->hash_value_buffer[x][y],
-              (uint32_t *)aom_malloc(
-                  AOM_BUFFER_SIZE_FOR_BLOCK_HASH *
-                  sizeof(*thread_data->td->hash_value_buffer[0][0])));
-
-      // Allocate frame counters in thread data.
-      CHECK_MEM_ERROR(cm, thread_data->td->counts,
-                      aom_calloc(1, sizeof(*thread_data->td->counts)));
-
-      // Allocate buffers used by palette coding mode.
+      // Set up shared coeff buffers.
+      av1_setup_shared_coeff_buffer(cm, &thread_data->td->shared_coeff_buf);
       CHECK_MEM_ERROR(
-          cm, thread_data->td->palette_buffer,
-          aom_memalign(16, sizeof(*thread_data->td->palette_buffer)));
+          cm, thread_data->td->tmp_conv_dst,
+          aom_memalign(32, MAX_SB_SIZE * MAX_SB_SIZE *
+                               sizeof(*thread_data->td->tmp_conv_dst)));
 
-      alloc_compound_type_rd_buffers(cm, &thread_data->td->comp_rd_buffer);
-
-      for (int j = 0; j < 2; ++j) {
-        CHECK_MEM_ERROR(
-            cm, thread_data->td->tmp_pred_bufs[j],
-            aom_memalign(32, 2 * MAX_MB_PLANE * MAX_SB_SQUARE *
-                                 sizeof(*thread_data->td->tmp_pred_bufs[j])));
+      if (i < mt_info->num_mod_workers[MOD_FP]) {
+        // Set up firstpass PICK_MODE_CONTEXT.
+        thread_data->td->firstpass_ctx =
+            av1_alloc_pmc(cpi, BLOCK_16X16, &thread_data->td->shared_coeff_buf);
       }
 
-      const int plane_types = PLANE_TYPES >> cm->seq_params->monochrome;
-      CHECK_MEM_ERROR(cm, thread_data->td->pixel_gradient_info,
-                      aom_malloc(sizeof(*thread_data->td->pixel_gradient_info) *
-                                 plane_types * MAX_SB_SQUARE));
+      if (!is_first_pass && i < mt_info->num_mod_workers[MOD_ENC]) {
+        // Set up sms_tree.
+        av1_setup_sms_tree(cpi, thread_data->td);
 
-      if (cpi->sf.part_sf.partition_search_type == VAR_BASED_PARTITION) {
-        const int num_64x64_blocks =
-            (cm->seq_params->sb_size == BLOCK_64X64) ? 1 : 4;
+        alloc_obmc_buffers(&thread_data->td->obmc_buffer, cm);
+
+        CHECK_MEM_ERROR(cm, thread_data->td->inter_modes_info,
+                        (InterModesInfo *)aom_malloc(
+                            sizeof(*thread_data->td->inter_modes_info)));
+
+        for (int x = 0; x < 2; x++)
+          for (int y = 0; y < 2; y++)
+            CHECK_MEM_ERROR(
+                cm, thread_data->td->hash_value_buffer[x][y],
+                (uint32_t *)aom_malloc(
+                    AOM_BUFFER_SIZE_FOR_BLOCK_HASH *
+                    sizeof(*thread_data->td->hash_value_buffer[0][0])));
+
+        // Allocate frame counters in thread data.
+        CHECK_MEM_ERROR(cm, thread_data->td->counts,
+                        aom_calloc(1, sizeof(*thread_data->td->counts)));
+
+        // Allocate buffers used by palette coding mode.
         CHECK_MEM_ERROR(
-            cm, thread_data->td->vt64x64,
-            aom_malloc(sizeof(*thread_data->td->vt64x64) * num_64x64_blocks));
+            cm, thread_data->td->palette_buffer,
+            aom_memalign(16, sizeof(*thread_data->td->palette_buffer)));
+
+        alloc_compound_type_rd_buffers(cm, &thread_data->td->comp_rd_buffer);
+
+        for (int j = 0; j < 2; ++j) {
+          CHECK_MEM_ERROR(
+              cm, thread_data->td->tmp_pred_bufs[j],
+              aom_memalign(32, 2 * MAX_MB_PLANE * MAX_SB_SQUARE *
+                                   sizeof(*thread_data->td->tmp_pred_bufs[j])));
+        }
+
+        const int plane_types = PLANE_TYPES >> cm->seq_params->monochrome;
+        CHECK_MEM_ERROR(
+            cm, thread_data->td->pixel_gradient_info,
+            aom_malloc(sizeof(*thread_data->td->pixel_gradient_info) *
+                       plane_types * MAX_SB_SQUARE));
+
+        if (cpi->sf.part_sf.partition_search_type == VAR_BASED_PARTITION) {
+          const int num_64x64_blocks =
+              (cm->seq_params->sb_size == BLOCK_64X64) ? 1 : 4;
+          CHECK_MEM_ERROR(
+              cm, thread_data->td->vt64x64,
+              aom_malloc(sizeof(*thread_data->td->vt64x64) * num_64x64_blocks));
+        }
       }
     } else {
       thread_data->td = &cpi->td;
     }
-    if (cpi->oxcf.row_mt == 1)
+
+    if (!is_first_pass && cpi->oxcf.row_mt == 1 &&
+        i < mt_info->num_mod_workers[MOD_ENC]) {
       CHECK_MEM_ERROR(
           cm, thread_data->td->tctx,
           (FRAME_CONTEXT *)aom_memalign(16, sizeof(*thread_data->td->tctx)));
+    }
   }
-  mt_info->enc_mt_buf_init_done = 1;
 }
 
 void av1_create_workers(AV1_COMP *cpi, int num_workers) {
@@ -696,77 +689,21 @@
     winterface->init(worker);
     worker->thread_name = "aom enc worker";
 
-    if (i > 0) {
-      // Allocate thread data.
-      CHECK_MEM_ERROR(cm, thread_data->td,
-                      aom_memalign(32, sizeof(*thread_data->td)));
-      av1_zero(*thread_data->td);
-
-      // Set up shared coeff buffers.
-      av1_setup_shared_coeff_buffer(cm, &thread_data->td->shared_coeff_buf);
-      CHECK_MEM_ERROR(
-          cm, thread_data->td->tmp_conv_dst,
-          aom_memalign(32, MAX_SB_SIZE * MAX_SB_SIZE *
-                               sizeof(*thread_data->td->tmp_conv_dst)));
-    }
-    ++mt_info->num_workers;
-  }
-}
-
-#if !CONFIG_REALTIME_ONLY
-static AOM_INLINE void fp_create_enc_workers(AV1_COMP *cpi, int num_workers) {
-  AV1_COMMON *const cm = &cpi->common;
-  const AVxWorkerInterface *const winterface = aom_get_worker_interface();
-  MultiThreadInfo *const mt_info = &cpi->mt_info;
-  // For single-pass encode, threads are already created during call to
-  // av1_create_second_pass_workers(). Create threads only in the case of
-  // pass = 1.
-  const int create_workers = (mt_info->num_mod_workers[MOD_FP] == 0) ? 1 : 0;
-
-  assert(mt_info->workers != NULL);
-  assert(mt_info->tile_thr_data != NULL);
-
-#if CONFIG_MULTITHREAD
-  AV1EncRowMultiThreadInfo *enc_row_mt = &mt_info->enc_row_mt;
-  if (enc_row_mt->mutex_ == NULL) {
-    CHECK_MEM_ERROR(cm, enc_row_mt->mutex_,
-                    aom_malloc(sizeof(*(enc_row_mt->mutex_))));
-    if (enc_row_mt->mutex_) pthread_mutex_init(enc_row_mt->mutex_, NULL);
-  }
-#endif
-
-  for (int i = num_workers - 1; i >= 0; i--) {
-    AVxWorker *const worker = &mt_info->workers[i];
-    EncWorkerData *const thread_data = &mt_info->tile_thr_data[i];
-
-    thread_data->cpi = cpi;
     thread_data->thread_id = i;
     // Set the starting tile for each thread.
     thread_data->start = i;
 
     if (i > 0) {
-      // Set up firstpass PICK_MODE_CONTEXT.
-      thread_data->td->firstpass_ctx =
-          av1_alloc_pmc(cpi, BLOCK_16X16, &thread_data->td->shared_coeff_buf);
+      // Create threads
+      if (!winterface->reset(worker))
+        aom_internal_error(cm->error, AOM_CODEC_ERROR,
+                           "Tile encoder thread creation failed");
+    }
+    winterface->sync(worker);
 
-      if (create_workers) {
-        // Create threads
-        if (!winterface->reset(worker))
-          aom_internal_error(cm->error, AOM_CODEC_ERROR,
-                             "Tile encoder thread creation failed");
-      }
-    } else {
-      // Main thread acts as a worker and uses the thread data in cpi.
-      thread_data->td = &cpi->td;
-    }
-    if (create_workers) {
-      winterface->sync(worker);
-      ++mt_info->num_mod_workers[MOD_FP];
-    }
+    ++mt_info->num_workers;
   }
-  mt_info->fp_mt_buf_init_done = 1;
 }
-#endif
 
 static AOM_INLINE void launch_workers(MultiThreadInfo *const mt_info,
                                       int num_workers) {
@@ -1011,12 +948,8 @@
   if (cpi->allocated_tiles < tile_cols * tile_rows) av1_alloc_tile_data(cpi);
 
   av1_init_tile_data(cpi);
-  // Only run once to create threads and allocate thread data.
-  if (mt_info->enc_mt_buf_init_done == 0) {
-    create_enc_workers(cpi, num_workers);
-  } else {
-    num_workers = AOMMIN(num_workers, mt_info->num_workers);
-  }
+  num_workers = AOMMIN(num_workers, mt_info->num_workers);
+
   prepare_enc_workers(cpi, enc_worker_hook, num_workers);
   launch_workers(&cpi->mt_info, num_workers);
   sync_enc_workers(&cpi->mt_info, cm, num_workers);
@@ -1152,12 +1085,8 @@
     }
   }
 
-  // Only run once to create threads and allocate thread data.
-  if (mt_info->enc_mt_buf_init_done == 0) {
-    create_enc_workers(cpi, num_workers);
-  } else {
-    num_workers = AOMMIN(num_workers, mt_info->num_workers);
-  }
+  num_workers = AOMMIN(num_workers, mt_info->num_workers);
+
   assign_tile_to_thread(thread_id_to_tile_id, tile_cols * tile_rows,
                         num_workers);
   prepare_enc_workers(cpi, enc_row_mt_worker_hook, num_workers);
@@ -1222,9 +1151,6 @@
   }
 
   num_workers = AOMMIN(num_workers, mt_info->num_workers);
-  // Only run once to create threads and allocate thread data.
-  if (mt_info->fp_mt_buf_init_done == 0)
-    fp_create_enc_workers(cpi, num_workers);
   assign_tile_to_thread(thread_id_to_tile_id, tile_cols * tile_rows,
                         num_workers);
   fp_prepare_enc_workers(cpi, fp_enc_row_mt_worker_hook, num_workers);
diff --git a/av1/encoder/ethread.h b/av1/encoder/ethread.h
index c2ab864..85fcfe5 100644
--- a/av1/encoder/ethread.h
+++ b/av1/encoder/ethread.h
@@ -84,7 +84,7 @@
 void av1_init_mt_sync(AV1_COMP *cpi, int is_first_pass);
 #endif  // CONFIG_MULTITHREAD
 
-void av1_create_second_pass_workers(AV1_COMP *cpi, int num_workers);
+void av1_init_tile_thread_data(AV1_COMP *cpi, int is_first_pass);
 
 void av1_cdef_mse_calc_frame_mt(AV1_COMMON *cm, MultiThreadInfo *mt_info,
                                 CdefSearchCtx *cdef_search_ctx);