Add sync_read and sync_write for enc row-mt

sync_read, sync_write functions with relevent allocations
and deallocations have been added to facilitate row-based
multi-threading of encoder.

Change-Id: Ic0f67081f1c219206d0cb7c443be799666d069fc
diff --git a/av1/av1.cmake b/av1/av1.cmake
index 13bdee9..b84b83b 100644
--- a/av1/av1.cmake
+++ b/av1/av1.cmake
@@ -116,6 +116,8 @@
             "${AOM_ROOT}/av1/encoder/av1_fwd_txfm1d.h"
             "${AOM_ROOT}/av1/encoder/av1_fwd_txfm1d_cfg.h"
             "${AOM_ROOT}/av1/encoder/av1_fwd_txfm2d.c"
+            "${AOM_ROOT}/av1/encoder/av1_multi_thread.c"
+            "${AOM_ROOT}/av1/encoder/av1_multi_thread.h"
             "${AOM_ROOT}/av1/encoder/av1_quantize.c"
             "${AOM_ROOT}/av1/encoder/av1_quantize.h"
             "${AOM_ROOT}/av1/encoder/bitstream.c"
diff --git a/av1/encoder/av1_multi_thread.c b/av1/encoder/av1_multi_thread.c
new file mode 100644
index 0000000..c552ccb
--- /dev/null
+++ b/av1/encoder/av1_multi_thread.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <assert.h>
+
+#include "av1/encoder/encoder.h"
+#include "av1/encoder/ethread.h"
+#include "av1/encoder/av1_multi_thread.h"
+
+void av1_row_mt_mem_alloc(AV1_COMP *cpi, int max_sb_rows) {
+  struct AV1Common *cm = &cpi->common;
+  MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
+  int tile_row, tile_col;
+  const int tile_cols = cm->tile_cols;
+  const int tile_rows = cm->tile_rows;
+
+  multi_thread_ctxt->allocated_tile_cols = tile_cols;
+  multi_thread_ctxt->allocated_tile_rows = tile_rows;
+  multi_thread_ctxt->allocated_sb_rows = max_sb_rows;
+
+  // Allocate memory for row based multi-threading
+  for (tile_row = 0; tile_row < multi_thread_ctxt->allocated_tile_rows;
+       tile_row++) {
+    for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols;
+         tile_col++) {
+      TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
+      av1_row_mt_sync_mem_alloc(&this_tile->row_mt_sync, cm, max_sb_rows);
+    }
+  }
+}
+
+void av1_row_mt_mem_dealloc(AV1_COMP *cpi) {
+  MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
+  AV1_COMMON *const cm = &cpi->common;
+  const int tile_cols = cm->tile_cols;
+  int tile_col;
+  int tile_row;
+
+  // Free row based multi-threading sync memory
+  for (tile_row = 0; tile_row < multi_thread_ctxt->allocated_tile_rows;
+       tile_row++) {
+    for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols;
+         tile_col++) {
+      TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
+      av1_row_mt_sync_mem_dealloc(&this_tile->row_mt_sync);
+    }
+  }
+  multi_thread_ctxt->allocated_sb_rows = 0;
+  multi_thread_ctxt->allocated_tile_cols = 0;
+  multi_thread_ctxt->allocated_tile_rows = 0;
+}
diff --git a/av1/encoder/av1_multi_thread.h b/av1/encoder/av1_multi_thread.h
new file mode 100644
index 0000000..2a1cc7d
--- /dev/null
+++ b/av1/encoder/av1_multi_thread.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AV1_ENCODER_AV1_MULTI_THREAD_H
+#define AV1_ENCODER_AV1_MULTI_THREAD_H
+
+#include "av1/encoder/encoder.h"
+
+void av1_row_mt_mem_alloc(AV1_COMP *cpi, int max_sb_rows);
+
+void av1_row_mt_mem_dealloc(AV1_COMP *cpi);
+
+#endif  // AV1_ENCODER_AV1_MULTI_THREAD_H
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 39c6eb3..f1e855e 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -5555,6 +5555,8 @@
     cpi->row_mt = 0;
     if (cpi->oxcf.row_mt && (cpi->oxcf.max_threads > 1)) {
       cpi->row_mt = 1;
+      cpi->row_mt_sync_read_ptr = av1_row_mt_sync_read;
+      cpi->row_mt_sync_write_ptr = av1_row_mt_sync_write;
       av1_encode_tiles_row_mt(cpi);
     } else {
       if (AOMMIN(cpi->oxcf.max_threads, cm->tile_cols * cm->tile_rows) > 1)
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 5d3d75a..26c520d 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -46,6 +46,7 @@
 #include "av1/common/resize.h"
 #include "av1/common/tile_common.h"
 
+#include "av1/encoder/av1_multi_thread.h"
 #include "av1/encoder/aq_complexity.h"
 #include "av1/encoder/aq_cyclicrefresh.h"
 #include "av1/encoder/aq_variance.h"
@@ -3056,6 +3057,7 @@
       aom_free(thread_data->td);
     }
   }
+  av1_row_mt_mem_dealloc(cpi);
   aom_free(cpi->tile_thr_data);
   aom_free(cpi->workers);
 
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 65e2ab6..f58a4a1 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -505,6 +505,12 @@
   unsigned int count;
 } TOKENLIST;
 
+typedef struct MultiThreadHandle {
+  int allocated_tile_rows;
+  int allocated_tile_cols;
+  int allocated_sb_rows;
+} MultiThreadHandle;
+
 typedef struct RD_COUNTS {
   int64_t comp_pred_diff[REFERENCE_MODES];
   // Stores number of 4x4 blocks using global motion per reference frame.
@@ -812,6 +818,7 @@
   // Set as 1 for monochrome and 3 for other color formats
   int default_interp_skip_flags;
   int preserve_arf_as_gld;
+  MultiThreadHandle multi_thread_ctxt;
   void (*row_mt_sync_read_ptr)(AV1RowMTSync *const, int, int);
   void (*row_mt_sync_write_ptr)(AV1RowMTSync *const, int, int, const int);
 } AV1_COMP;
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 5837451..7009990 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -9,6 +9,7 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 
+#include "av1/encoder/av1_multi_thread.h"
 #include "av1/encoder/encodeframe.h"
 #include "av1/encoder/encoder.h"
 #include "av1/encoder/ethread.h"
@@ -44,6 +45,120 @@
   return;
 }
 
+void av1_row_mt_sync_read(AV1RowMTSync *const row_mt_sync, int r, int c) {
+#if CONFIG_MULTITHREAD
+  const int nsync = row_mt_sync->sync_range;
+
+  if (r) {
+    pthread_mutex_t *const mutex = &row_mt_sync->mutex_[r - 1];
+    pthread_mutex_lock(mutex);
+
+    while (c > row_mt_sync->cur_col[r - 1] - nsync) {
+      pthread_cond_wait(&row_mt_sync->cond_[r - 1], mutex);
+    }
+    pthread_mutex_unlock(mutex);
+  }
+#else
+  (void)row_mt_sync;
+  (void)r;
+  (void)c;
+#endif  // CONFIG_MULTITHREAD
+}
+
+void av1_row_mt_sync_write(AV1RowMTSync *const row_mt_sync, int r, int c,
+                           const int cols) {
+#if CONFIG_MULTITHREAD
+  const int nsync = row_mt_sync->sync_range;
+  int cur;
+  // Only signal when there are enough encoded blocks for next row to run.
+  int sig = 1;
+
+  if (c < cols - 1) {
+    cur = c;
+    if (c % nsync) sig = 0;
+  } else {
+    cur = cols + nsync;
+  }
+
+  if (sig) {
+    pthread_mutex_lock(&row_mt_sync->mutex_[r]);
+
+    row_mt_sync->cur_col[r] = cur;
+
+    pthread_cond_signal(&row_mt_sync->cond_[r]);
+    pthread_mutex_unlock(&row_mt_sync->mutex_[r]);
+  }
+#else
+  (void)row_mt_sync;
+  (void)r;
+  (void)c;
+  (void)cols;
+#endif  // CONFIG_MULTITHREAD
+}
+
+// Allocate memory for row synchronization
+void av1_row_mt_sync_mem_alloc(AV1RowMTSync *row_mt_sync, AV1_COMMON *cm,
+                               int rows) {
+  row_mt_sync->rows = rows;
+#if CONFIG_MULTITHREAD
+  {
+    int i;
+
+    CHECK_MEM_ERROR(cm, row_mt_sync->mutex_,
+                    aom_malloc(sizeof(*row_mt_sync->mutex_) * rows));
+    if (row_mt_sync->mutex_) {
+      for (i = 0; i < rows; ++i) {
+        pthread_mutex_init(&row_mt_sync->mutex_[i], NULL);
+      }
+    }
+
+    CHECK_MEM_ERROR(cm, row_mt_sync->cond_,
+                    aom_malloc(sizeof(*row_mt_sync->cond_) * rows));
+    if (row_mt_sync->cond_) {
+      for (i = 0; i < rows; ++i) {
+        pthread_cond_init(&row_mt_sync->cond_[i], NULL);
+      }
+    }
+  }
+#endif  // CONFIG_MULTITHREAD
+
+  CHECK_MEM_ERROR(cm, row_mt_sync->cur_col,
+                  aom_malloc(sizeof(*row_mt_sync->cur_col) * rows));
+
+  // Set up nsync.
+  if (cm->seq_params.mib_size_log2 == 4)
+    row_mt_sync->sync_range = 2;
+  else
+    row_mt_sync->sync_range = 1;
+}
+
+// Deallocate row based multi-threading synchronization related mutex and data
+void av1_row_mt_sync_mem_dealloc(AV1RowMTSync *row_mt_sync) {
+  if (row_mt_sync != NULL) {
+#if CONFIG_MULTITHREAD
+    int i;
+
+    if (row_mt_sync->mutex_ != NULL) {
+      for (i = 0; i < row_mt_sync->rows; ++i) {
+        pthread_mutex_destroy(&row_mt_sync->mutex_[i]);
+      }
+      aom_free(row_mt_sync->mutex_);
+    }
+    if (row_mt_sync->cond_ != NULL) {
+      for (i = 0; i < row_mt_sync->rows; ++i) {
+        pthread_cond_destroy(&row_mt_sync->cond_[i]);
+      }
+      aom_free(row_mt_sync->cond_);
+    }
+#endif  // CONFIG_MULTITHREAD
+    aom_free(row_mt_sync->cur_col);
+    // clear the structure as the source of this call may be dynamic change
+    // in tiles in which case this call will be followed by an _alloc()
+    // which may fail.
+    av1_zero(*row_mt_sync);
+  }
+}
+
 static int enc_row_mt_worker_hook(void *arg1, void *unused) {
   EncWorkerData *const thread_data = (EncWorkerData *)arg1;
   AV1_COMP *const cpi = thread_data->cpi;
@@ -322,12 +437,42 @@
   AV1_COMMON *const cm = &cpi->common;
   const int tile_cols = cm->tile_cols;
   const int tile_rows = cm->tile_rows;
+  MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
   int num_workers = AOMMIN(cpi->oxcf.max_threads, tile_cols * tile_rows);
+  int max_sb_rows = 0;
 
-  if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows)
+  if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) {
+    av1_row_mt_mem_dealloc(cpi);
     av1_alloc_tile_data(cpi);
+  }
 
   av1_init_tile_data(cpi);
+
+  for (int row = 0; row < tile_rows; row++) {
+    for (int col = 0; col < tile_cols; col++) {
+      TileDataEnc *tile_data = &cpi->tile_data[row * cm->tile_cols + col];
+      max_sb_rows = AOMMAX(max_sb_rows,
+                           av1_get_sb_rows_in_tile(cm, tile_data->tile_info));
+    }
+  }
+
+  if (multi_thread_ctxt->allocated_tile_cols != tile_cols ||
+      multi_thread_ctxt->allocated_tile_rows != tile_rows ||
+      multi_thread_ctxt->allocated_sb_rows != max_sb_rows) {
+    av1_row_mt_mem_dealloc(cpi);
+    av1_row_mt_mem_alloc(cpi, max_sb_rows);
+  }
+
+  for (int tile_row = 0; tile_row < tile_rows; tile_row++) {
+    for (int tile_col = 0; tile_col < tile_cols; tile_col++) {
+      TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
+
+      // Initialize cur_col to -1 for all rows.
+      memset(this_tile->row_mt_sync.cur_col, -1,
+             sizeof(*this_tile->row_mt_sync.cur_col) * max_sb_rows);
+    }
+  }
+
   // Only run once to create threads and allocate thread data.
   if (cpi->num_workers == 0) {
     create_enc_workers(cpi, num_workers);
diff --git a/av1/encoder/ethread.h b/av1/encoder/ethread.h
index 36bf5e1..853acb1 100644
--- a/av1/encoder/ethread.h
+++ b/av1/encoder/ethread.h
@@ -26,11 +26,20 @@
   int start;
 } EncWorkerData;
 
+void av1_row_mt_sync_read(AV1RowMTSync *const row_mt_sync, int r, int c);
+void av1_row_mt_sync_write(AV1RowMTSync *const row_mt_sync, int r, int c,
+                           const int cols);
+
 void av1_row_mt_sync_read_dummy(struct AV1RowMTSyncData *const row_mt_sync,
                                 int r, int c);
 void av1_row_mt_sync_write_dummy(struct AV1RowMTSyncData *const row_mt_sync,
                                  int r, int c, const int cols);
 
+void av1_row_mt_sync_mem_dealloc(AV1RowMTSync *row_mt_sync);
+// Allocate memory for row based multi-threading synchronization.
+void av1_row_mt_sync_mem_alloc(AV1RowMTSync *row_mt_sync, struct AV1Common *cm,
+                               int rows);
+
 void av1_encode_tiles_mt(struct AV1_COMP *cpi);
 void av1_encode_tiles_row_mt(struct AV1_COMP *cpi);