Add sync_read and sync_write for enc row-mt
sync_read, sync_write functions with relevent allocations
and deallocations have been added to facilitate row-based
multi-threading of encoder.
Change-Id: Ic0f67081f1c219206d0cb7c443be799666d069fc
diff --git a/av1/av1.cmake b/av1/av1.cmake
index 13bdee9..b84b83b 100644
--- a/av1/av1.cmake
+++ b/av1/av1.cmake
@@ -116,6 +116,8 @@
"${AOM_ROOT}/av1/encoder/av1_fwd_txfm1d.h"
"${AOM_ROOT}/av1/encoder/av1_fwd_txfm1d_cfg.h"
"${AOM_ROOT}/av1/encoder/av1_fwd_txfm2d.c"
+ "${AOM_ROOT}/av1/encoder/av1_multi_thread.c"
+ "${AOM_ROOT}/av1/encoder/av1_multi_thread.h"
"${AOM_ROOT}/av1/encoder/av1_quantize.c"
"${AOM_ROOT}/av1/encoder/av1_quantize.h"
"${AOM_ROOT}/av1/encoder/bitstream.c"
diff --git a/av1/encoder/av1_multi_thread.c b/av1/encoder/av1_multi_thread.c
new file mode 100644
index 0000000..c552ccb
--- /dev/null
+++ b/av1/encoder/av1_multi_thread.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <assert.h>
+
+#include "av1/encoder/encoder.h"
+#include "av1/encoder/ethread.h"
+#include "av1/encoder/av1_multi_thread.h"
+
+void av1_row_mt_mem_alloc(AV1_COMP *cpi, int max_sb_rows) {
+ struct AV1Common *cm = &cpi->common;
+ MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
+ int tile_row, tile_col;
+ const int tile_cols = cm->tile_cols;
+ const int tile_rows = cm->tile_rows;
+
+ multi_thread_ctxt->allocated_tile_cols = tile_cols;
+ multi_thread_ctxt->allocated_tile_rows = tile_rows;
+ multi_thread_ctxt->allocated_sb_rows = max_sb_rows;
+
+ // Allocate memory for row based multi-threading
+ for (tile_row = 0; tile_row < multi_thread_ctxt->allocated_tile_rows;
+ tile_row++) {
+ for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols;
+ tile_col++) {
+ TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
+ av1_row_mt_sync_mem_alloc(&this_tile->row_mt_sync, cm, max_sb_rows);
+ }
+ }
+}
+
+void av1_row_mt_mem_dealloc(AV1_COMP *cpi) {
+ MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
+ AV1_COMMON *const cm = &cpi->common;
+ const int tile_cols = cm->tile_cols;
+ int tile_col;
+ int tile_row;
+
+ // Free row based multi-threading sync memory
+ for (tile_row = 0; tile_row < multi_thread_ctxt->allocated_tile_rows;
+ tile_row++) {
+ for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols;
+ tile_col++) {
+ TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
+ av1_row_mt_sync_mem_dealloc(&this_tile->row_mt_sync);
+ }
+ }
+ multi_thread_ctxt->allocated_sb_rows = 0;
+ multi_thread_ctxt->allocated_tile_cols = 0;
+ multi_thread_ctxt->allocated_tile_rows = 0;
+}
diff --git a/av1/encoder/av1_multi_thread.h b/av1/encoder/av1_multi_thread.h
new file mode 100644
index 0000000..2a1cc7d
--- /dev/null
+++ b/av1/encoder/av1_multi_thread.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AV1_ENCODER_AV1_MULTI_THREAD_H
+#define AV1_ENCODER_AV1_MULTI_THREAD_H
+
+#include "av1/encoder/encoder.h"
+
+void av1_row_mt_mem_alloc(AV1_COMP *cpi, int max_sb_rows);
+
+void av1_row_mt_mem_dealloc(AV1_COMP *cpi);
+
+#endif // AV1_ENCODER_AV1_MULTI_THREAD_H
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 39c6eb3..f1e855e 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -5555,6 +5555,8 @@
cpi->row_mt = 0;
if (cpi->oxcf.row_mt && (cpi->oxcf.max_threads > 1)) {
cpi->row_mt = 1;
+ cpi->row_mt_sync_read_ptr = av1_row_mt_sync_read;
+ cpi->row_mt_sync_write_ptr = av1_row_mt_sync_write;
av1_encode_tiles_row_mt(cpi);
} else {
if (AOMMIN(cpi->oxcf.max_threads, cm->tile_cols * cm->tile_rows) > 1)
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 5d3d75a..26c520d 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -46,6 +46,7 @@
#include "av1/common/resize.h"
#include "av1/common/tile_common.h"
+#include "av1/encoder/av1_multi_thread.h"
#include "av1/encoder/aq_complexity.h"
#include "av1/encoder/aq_cyclicrefresh.h"
#include "av1/encoder/aq_variance.h"
@@ -3056,6 +3057,7 @@
aom_free(thread_data->td);
}
}
+ av1_row_mt_mem_dealloc(cpi);
aom_free(cpi->tile_thr_data);
aom_free(cpi->workers);
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 65e2ab6..f58a4a1 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -505,6 +505,12 @@
unsigned int count;
} TOKENLIST;
+typedef struct MultiThreadHandle {
+ int allocated_tile_rows;
+ int allocated_tile_cols;
+ int allocated_sb_rows;
+} MultiThreadHandle;
+
typedef struct RD_COUNTS {
int64_t comp_pred_diff[REFERENCE_MODES];
// Stores number of 4x4 blocks using global motion per reference frame.
@@ -812,6 +818,7 @@
// Set as 1 for monochrome and 3 for other color formats
int default_interp_skip_flags;
int preserve_arf_as_gld;
+ MultiThreadHandle multi_thread_ctxt;
void (*row_mt_sync_read_ptr)(AV1RowMTSync *const, int, int);
void (*row_mt_sync_write_ptr)(AV1RowMTSync *const, int, int, const int);
} AV1_COMP;
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 5837451..7009990 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -9,6 +9,7 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
+#include "av1/encoder/av1_multi_thread.h"
#include "av1/encoder/encodeframe.h"
#include "av1/encoder/encoder.h"
#include "av1/encoder/ethread.h"
@@ -44,6 +45,120 @@
return;
}
+void av1_row_mt_sync_read(AV1RowMTSync *const row_mt_sync, int r, int c) {
+#if CONFIG_MULTITHREAD
+ const int nsync = row_mt_sync->sync_range;
+
+ if (r) {
+ pthread_mutex_t *const mutex = &row_mt_sync->mutex_[r - 1];
+ pthread_mutex_lock(mutex);
+
+ while (c > row_mt_sync->cur_col[r - 1] - nsync) {
+ pthread_cond_wait(&row_mt_sync->cond_[r - 1], mutex);
+ }
+ pthread_mutex_unlock(mutex);
+ }
+#else
+ (void)row_mt_sync;
+ (void)r;
+ (void)c;
+#endif // CONFIG_MULTITHREAD
+}
+
+void av1_row_mt_sync_write(AV1RowMTSync *const row_mt_sync, int r, int c,
+ const int cols) {
+#if CONFIG_MULTITHREAD
+ const int nsync = row_mt_sync->sync_range;
+ int cur;
+ // Only signal when there are enough encoded blocks for next row to run.
+ int sig = 1;
+
+ if (c < cols - 1) {
+ cur = c;
+ if (c % nsync) sig = 0;
+ } else {
+ cur = cols + nsync;
+ }
+
+ if (sig) {
+ pthread_mutex_lock(&row_mt_sync->mutex_[r]);
+
+ row_mt_sync->cur_col[r] = cur;
+
+ pthread_cond_signal(&row_mt_sync->cond_[r]);
+ pthread_mutex_unlock(&row_mt_sync->mutex_[r]);
+ }
+#else
+ (void)row_mt_sync;
+ (void)r;
+ (void)c;
+ (void)cols;
+#endif // CONFIG_MULTITHREAD
+}
+
+// Allocate memory for row synchronization
+void av1_row_mt_sync_mem_alloc(AV1RowMTSync *row_mt_sync, AV1_COMMON *cm,
+ int rows) {
+ row_mt_sync->rows = rows;
+#if CONFIG_MULTITHREAD
+ {
+ int i;
+
+ CHECK_MEM_ERROR(cm, row_mt_sync->mutex_,
+ aom_malloc(sizeof(*row_mt_sync->mutex_) * rows));
+ if (row_mt_sync->mutex_) {
+ for (i = 0; i < rows; ++i) {
+ pthread_mutex_init(&row_mt_sync->mutex_[i], NULL);
+ }
+ }
+
+ CHECK_MEM_ERROR(cm, row_mt_sync->cond_,
+ aom_malloc(sizeof(*row_mt_sync->cond_) * rows));
+ if (row_mt_sync->cond_) {
+ for (i = 0; i < rows; ++i) {
+ pthread_cond_init(&row_mt_sync->cond_[i], NULL);
+ }
+ }
+ }
+#endif // CONFIG_MULTITHREAD
+
+ CHECK_MEM_ERROR(cm, row_mt_sync->cur_col,
+ aom_malloc(sizeof(*row_mt_sync->cur_col) * rows));
+
+ // Set up nsync.
+ if (cm->seq_params.mib_size_log2 == 4)
+ row_mt_sync->sync_range = 2;
+ else
+ row_mt_sync->sync_range = 1;
+}
+
+// Deallocate row based multi-threading synchronization related mutex and data
+void av1_row_mt_sync_mem_dealloc(AV1RowMTSync *row_mt_sync) {
+ if (row_mt_sync != NULL) {
+#if CONFIG_MULTITHREAD
+ int i;
+
+ if (row_mt_sync->mutex_ != NULL) {
+ for (i = 0; i < row_mt_sync->rows; ++i) {
+ pthread_mutex_destroy(&row_mt_sync->mutex_[i]);
+ }
+ aom_free(row_mt_sync->mutex_);
+ }
+ if (row_mt_sync->cond_ != NULL) {
+ for (i = 0; i < row_mt_sync->rows; ++i) {
+ pthread_cond_destroy(&row_mt_sync->cond_[i]);
+ }
+ aom_free(row_mt_sync->cond_);
+ }
+#endif // CONFIG_MULTITHREAD
+ aom_free(row_mt_sync->cur_col);
+ // clear the structure as the source of this call may be dynamic change
+ // in tiles in which case this call will be followed by an _alloc()
+ // which may fail.
+ av1_zero(*row_mt_sync);
+ }
+}
+
static int enc_row_mt_worker_hook(void *arg1, void *unused) {
EncWorkerData *const thread_data = (EncWorkerData *)arg1;
AV1_COMP *const cpi = thread_data->cpi;
@@ -322,12 +437,42 @@
AV1_COMMON *const cm = &cpi->common;
const int tile_cols = cm->tile_cols;
const int tile_rows = cm->tile_rows;
+ MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
int num_workers = AOMMIN(cpi->oxcf.max_threads, tile_cols * tile_rows);
+ int max_sb_rows = 0;
- if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows)
+ if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) {
+ av1_row_mt_mem_dealloc(cpi);
av1_alloc_tile_data(cpi);
+ }
av1_init_tile_data(cpi);
+
+ for (int row = 0; row < tile_rows; row++) {
+ for (int col = 0; col < tile_cols; col++) {
+ TileDataEnc *tile_data = &cpi->tile_data[row * cm->tile_cols + col];
+ max_sb_rows = AOMMAX(max_sb_rows,
+ av1_get_sb_rows_in_tile(cm, tile_data->tile_info));
+ }
+ }
+
+ if (multi_thread_ctxt->allocated_tile_cols != tile_cols ||
+ multi_thread_ctxt->allocated_tile_rows != tile_rows ||
+ multi_thread_ctxt->allocated_sb_rows != max_sb_rows) {
+ av1_row_mt_mem_dealloc(cpi);
+ av1_row_mt_mem_alloc(cpi, max_sb_rows);
+ }
+
+ for (int tile_row = 0; tile_row < tile_rows; tile_row++) {
+ for (int tile_col = 0; tile_col < tile_cols; tile_col++) {
+ TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
+
+ // Initialize cur_col to -1 for all rows.
+ memset(this_tile->row_mt_sync.cur_col, -1,
+ sizeof(*this_tile->row_mt_sync.cur_col) * max_sb_rows);
+ }
+ }
+
// Only run once to create threads and allocate thread data.
if (cpi->num_workers == 0) {
create_enc_workers(cpi, num_workers);
diff --git a/av1/encoder/ethread.h b/av1/encoder/ethread.h
index 36bf5e1..853acb1 100644
--- a/av1/encoder/ethread.h
+++ b/av1/encoder/ethread.h
@@ -26,11 +26,20 @@
int start;
} EncWorkerData;
+void av1_row_mt_sync_read(AV1RowMTSync *const row_mt_sync, int r, int c);
+void av1_row_mt_sync_write(AV1RowMTSync *const row_mt_sync, int r, int c,
+ const int cols);
+
void av1_row_mt_sync_read_dummy(struct AV1RowMTSyncData *const row_mt_sync,
int r, int c);
void av1_row_mt_sync_write_dummy(struct AV1RowMTSyncData *const row_mt_sync,
int r, int c, const int cols);
+void av1_row_mt_sync_mem_dealloc(AV1RowMTSync *row_mt_sync);
+// Allocate memory for row based multi-threading synchronization.
+void av1_row_mt_sync_mem_alloc(AV1RowMTSync *row_mt_sync, struct AV1Common *cm,
+ int rows);
+
void av1_encode_tiles_mt(struct AV1_COMP *cpi);
void av1_encode_tiles_row_mt(struct AV1_COMP *cpi);