Allintra: fix an issue in multithreading for deltaq-mode=3

A data race issue was seen due to "av1_frame_init_quantizer" is
called inside each worker thread.

The quantizer initialization could be moved before worker threads
start to run.

BUG=aomedia:3376

Change-Id: Ic901a5eed7197c9fd0157a31400ff80ead2e5658
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index f94e675..1c8631a 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -2417,7 +2417,6 @@
 #endif  // !CONFIG_REALTIME_ONLY
 
 // Allocate memory for row synchronization
-// TODO(chengchen): do we need dealloc? where?
 static void wiener_var_sync_mem_alloc(
     AV1EncRowMultiThreadSync *const row_mt_sync, AV1_COMMON *const cm,
     const int rows) {
@@ -2449,6 +2448,34 @@
   row_mt_sync->sync_range = 1;
 }
 
+// Deallocate row based multi-threading synchronization related mutex and data
+static void wiener_var_sync_mem_dealloc(AV1EncRowMultiThreadSync *row_mt_sync) {
+  if (row_mt_sync != NULL) {
+#if CONFIG_MULTITHREAD
+    int i;
+
+    if (row_mt_sync->mutex_ != NULL) {
+      for (i = 0; i < row_mt_sync->rows; ++i) {
+        pthread_mutex_destroy(&row_mt_sync->mutex_[i]);
+      }
+      aom_free(row_mt_sync->mutex_);
+    }
+    if (row_mt_sync->cond_ != NULL) {
+      for (i = 0; i < row_mt_sync->rows; ++i) {
+        pthread_cond_destroy(&row_mt_sync->cond_[i]);
+      }
+      aom_free(row_mt_sync->cond_);
+    }
+#endif  // CONFIG_MULTITHREAD
+    aom_free(row_mt_sync->num_finished_cols);
+
+    // clear the structure as the source of this call may be dynamic change
+    // in tiles in which case this call will be followed by an _alloc()
+    // which may fail.
+    av1_zero(*row_mt_sync);
+  }
+}
+
 static AOM_INLINE void prepare_wiener_var_workers(AV1_COMP *const cpi,
                                                   AVxWorkerHook hook,
                                                   const int num_workers) {
@@ -2462,12 +2489,20 @@
     worker->data2 = NULL;
 
     thread_data->thread_id = i;
-    // Set the starting tile for each thread.
-    thread_data->start = i;
+    // Set the starting tile for each thread, in this case the preprocessing
+    // stage does not need tiles. So we set it to 0.
+    thread_data->start = 0;
 
     thread_data->cpi = cpi;
-    thread_data->td = &cpi->td;
-    thread_data->td->mb = cpi->td.mb;
+    if (i == 0) {
+      thread_data->td = &cpi->td;
+    } else {
+      thread_data->td = thread_data->original_td;
+    }
+
+    if (thread_data->td != &cpi->td) {
+      thread_data->td->mb = cpi->td.mb;
+    }
   }
 }
 
@@ -2475,6 +2510,8 @@
   (void)unused;
   EncWorkerData *const thread_data = (EncWorkerData *)arg1;
   AV1_COMP *const cpi = thread_data->cpi;
+  MACROBLOCK *x = &thread_data->td->mb;
+  MACROBLOCKD *xd = &x->e_mbd;
   const BLOCK_SIZE bsize = cpi->weber_bsize;
   const int mb_step = mi_size_wide[bsize];
   AV1EncRowMultiThreadSync *const row_mt_sync = &cpi->tile_data[0].row_mt_sync;
@@ -2501,8 +2538,9 @@
 #endif
     if (!has_jobs) break;
     // TODO(chengchen): properly accumulate the distortion and rate.
-    av1_calc_mb_wiener_var_row(cpi, current_mi_row, src_diff, coeff, qcoeff,
-                               dqcoeff, &sum_rec_distortion, &sum_est_rate);
+    av1_calc_mb_wiener_var_row(cpi, x, xd, current_mi_row, src_diff, coeff,
+                               qcoeff, dqcoeff, &sum_rec_distortion,
+                               &sum_est_rate);
 #if CONFIG_MULTITHREAD
     pthread_mutex_lock(enc_row_mt_mutex_);
 #endif
@@ -2550,6 +2588,8 @@
   prepare_wiener_var_workers(cpi, cal_mb_wiener_var_hook, num_workers);
   launch_workers(mt_info, num_workers);
   sync_enc_workers(mt_info, cm, num_workers);
+
+  wiener_var_sync_mem_dealloc(row_mt_sync);
 }
 
 // Compare and order tiles based on absolute sum of tx coeffs.