Implement multithreading for CDEF search
This CL adds support for block based multithreading of CDEF
search module.
cpu-used Resolution Tile Average Encode Time
Reduction (%)
2 832x480 2x1 0.39 (2 threads)
3 1280x720 2x2 0.52 (4 threads)
4 1920x1080 4x2 0.51 (8 threads)
5 3840x2160 4x2 2.57 (8 threads)
6 3840x2160 4x2 1.55 (8 threads)
Change-Id: I754d3e7f0cb5355726682163bcf99bfde43ba233
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index cc681bc..9dade55 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -1541,6 +1541,7 @@
#endif
if (mt_info->num_workers > 1) {
av1_loop_filter_dealloc(&mt_info->lf_row_sync);
+ av1_cdef_mt_dealloc(&mt_info->cdef_sync);
#if !CONFIG_REALTIME_ONLY
av1_loop_restoration_dealloc(&mt_info->lr_row_sync, mt_info->num_workers);
av1_gm_dealloc(&mt_info->gm_sync);
@@ -2041,7 +2042,7 @@
start_timing(cpi, cdef_time);
#endif
// Find CDEF parameters
- av1_cdef_search(&cm->cur_frame->buf, cpi->source, cm, xd,
+ av1_cdef_search(&cpi->mt_info, &cm->cur_frame->buf, cpi->source, cm, xd,
cpi->sf.lpf_sf.cdef_pick_method, cpi->td.mb.rdmult);
// Apply the filter
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index ed79ba0..b4bc58d 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -40,6 +40,7 @@
#include "av1/encoder/level.h"
#include "av1/encoder/lookahead.h"
#include "av1/encoder/mcomp.h"
+#include "av1/encoder/pickcdef.h"
#include "av1/encoder/ratectrl.h"
#include "av1/encoder/rd.h"
#include "av1/encoder/speed_features.h"
@@ -1337,7 +1338,7 @@
/*!
* \brief Encoder parameters related to multi-threading.
*/
-typedef struct {
+typedef struct MultiThreadInfo {
/*!
* Number of workers created for multi-threading.
*/
@@ -1399,6 +1400,11 @@
* Temporal Filter multi-threading object.
*/
AV1TemporalFilterSync tf_sync;
+
+ /*!
+ * CDEF search multi-threading object.
+ */
+ AV1CdefSync cdef_sync;
} MultiThreadInfo;
/*!\cond */
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index bf22743..730eedb 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -545,6 +545,12 @@
CHECK_MEM_ERROR(cm, tf_sync->mutex_, aom_malloc(sizeof(*tf_sync->mutex_)));
if (tf_sync->mutex_) pthread_mutex_init(tf_sync->mutex_, NULL);
}
+ AV1CdefSync *cdef_sync = &mt_info->cdef_sync;
+ if (cdef_sync->mutex_ == NULL) {
+ CHECK_MEM_ERROR(cm, cdef_sync->mutex_,
+ aom_malloc(sizeof(*(cdef_sync->mutex_))));
+ if (cdef_sync->mutex_) pthread_mutex_init(cdef_sync->mutex_, NULL);
+ }
#endif
for (int i = num_workers - 1; i >= 0; i--) {
@@ -1662,3 +1668,110 @@
sync_enc_workers(&cpi->mt_info, &cpi->common, num_workers);
}
#endif // !CONFIG_REALTIME_ONLY
+
+// Deallocate memory for CDEF search multi-thread synchronization.
+void av1_cdef_mt_dealloc(AV1CdefSync *cdef_sync) {
+ (void)cdef_sync;
+ assert(cdef_sync != NULL);
+#if CONFIG_MULTITHREAD
+ if (cdef_sync->mutex_ != NULL) {
+ pthread_mutex_destroy(cdef_sync->mutex_);
+ aom_free(cdef_sync->mutex_);
+ }
+#endif // CONFIG_MULTITHREAD
+}
+
+// Updates the row and column indices of the next job to be processed.
+// Also updates end_of_frame flag when the processing of all blocks is complete.
+static void update_next_job_info(AV1CdefSync *cdef_sync, int nvfb, int nhfb) {
+ cdef_sync->fbc++;
+ if (cdef_sync->fbc == nhfb) {
+ cdef_sync->fbr++;
+ if (cdef_sync->fbr == nvfb) {
+ cdef_sync->end_of_frame = 1;
+ } else {
+ cdef_sync->fbc = 0;
+ }
+ }
+}
+
+// Initializes cdef_sync parameters.
+static AOM_INLINE void cdef_reset_job_info(AV1CdefSync *cdef_sync) {
+ cdef_sync->end_of_frame = 0;
+ cdef_sync->fbr = 0;
+ cdef_sync->fbc = 0;
+}
+
+// Checks if a job is available. If job is available,
+// populates next job information and returns 1, else returns 0.
+static AOM_INLINE int cdef_get_next_job(AV1CdefSync *cdef_sync,
+ CdefSearchCtx *cdef_search_ctx,
+ int *cur_fbr, int *cur_fbc,
+ int *sb_count) {
+#if CONFIG_MULTITHREAD
+ pthread_mutex_lock(cdef_sync->mutex_);
+#endif // CONFIG_MULTITHREAD
+ int do_next_block = 0;
+ const int nvfb = cdef_search_ctx->nvfb;
+ const int nhfb = cdef_search_ctx->nhfb;
+
+ // If a block is skip, do not process the block and
+ // check the skip condition for the next block.
+ while ((!cdef_sync->end_of_frame) &&
+ (cdef_sb_skip(cdef_search_ctx->mi_params, cdef_sync->fbr,
+ cdef_sync->fbc))) {
+ update_next_job_info(cdef_sync, nvfb, nhfb);
+ }
+
+ // Populates information needed for current job and update the row,
+ // column indices of the next block to be processed.
+ if (cdef_sync->end_of_frame == 0) {
+ do_next_block = 1;
+ *cur_fbr = cdef_sync->fbr;
+ *cur_fbc = cdef_sync->fbc;
+ *sb_count = cdef_search_ctx->sb_count;
+ cdef_search_ctx->sb_count++;
+ update_next_job_info(cdef_sync, nvfb, nhfb);
+ }
+#if CONFIG_MULTITHREAD
+ pthread_mutex_unlock(cdef_sync->mutex_);
+#endif // CONFIG_MULTITHREAD
+ return do_next_block;
+}
+
+// Hook function for each thread in CDEF search multi-threading.
+static int cdef_filter_block_worker_hook(void *arg1, void *arg2) {
+ AV1CdefSync *const cdef_sync = (AV1CdefSync *)arg1;
+ CdefSearchCtx *cdef_search_ctx = (CdefSearchCtx *)arg2;
+ int cur_fbr, cur_fbc, sb_count;
+ while (cdef_get_next_job(cdef_sync, cdef_search_ctx, &cur_fbr, &cur_fbc,
+ &sb_count)) {
+ av1_cdef_mse_calc_block(cdef_search_ctx, cur_fbr, cur_fbc, sb_count);
+ }
+ return 1;
+}
+
+// Assigns CDEF search hook function and thread data to each worker.
+static void prepare_cdef_workers(MultiThreadInfo *mt_info,
+ CdefSearchCtx *cdef_search_ctx,
+ AVxWorkerHook hook, int num_workers) {
+ for (int i = num_workers - 1; i >= 0; i--) {
+ AVxWorker *worker = &mt_info->workers[i];
+ worker->hook = hook;
+ worker->data1 = &mt_info->cdef_sync;
+ worker->data2 = cdef_search_ctx;
+ }
+}
+
+// Implements multi-threading for CDEF search.
+void av1_cdef_mse_calc_frame_mt(AV1_COMMON *cm, MultiThreadInfo *mt_info,
+ CdefSearchCtx *cdef_search_ctx) {
+ AV1CdefSync *cdef_sync = &mt_info->cdef_sync;
+ const int num_workers = mt_info->num_workers;
+
+ cdef_reset_job_info(cdef_sync);
+ prepare_cdef_workers(mt_info, cdef_search_ctx, cdef_filter_block_worker_hook,
+ num_workers);
+ launch_workers(mt_info, num_workers);
+ sync_enc_workers(mt_info, cm, num_workers);
+}
diff --git a/av1/encoder/ethread.h b/av1/encoder/ethread.h
index ab8e1bb..e8b8ad3 100644
--- a/av1/encoder/ethread.h
+++ b/av1/encoder/ethread.h
@@ -78,6 +78,11 @@
void av1_create_workers(AV1_COMP *cpi, int num_workers);
+void av1_cdef_mse_calc_frame_mt(AV1_COMMON *cm, MultiThreadInfo *mt_info,
+ CdefSearchCtx *cdef_search_ctx);
+
+void av1_cdef_mt_dealloc(AV1CdefSync *cdef_sync);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/av1/encoder/pickcdef.c b/av1/encoder/pickcdef.c
index c71ef31..55e466d 100644
--- a/av1/encoder/pickcdef.c
+++ b/av1/encoder/pickcdef.c
@@ -20,6 +20,7 @@
#include "av1/common/av1_common_int.h"
#include "av1/common/reconinter.h"
#include "av1/encoder/encoder.h"
+#include "av1/encoder/ethread.h"
#include "av1/encoder/pickcdef.h"
// Get primary and secondary filter strength for the given strength index and
@@ -289,11 +290,10 @@
// fbc: Column index in units of 64x64 block
// Returns:
// Nothing will be returned. Contents of cdef_search_ctx will be modified.
-static void cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx, int fbr,
- int fbc) {
- const CommonModeInfoParams *const mi_params = &cdef_search_ctx->cm->mi_params;
+void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx, int fbr, int fbc,
+ int sb_count) {
+ const CommonModeInfoParams *const mi_params = cdef_search_ctx->mi_params;
const YV12_BUFFER_CONFIG *ref = cdef_search_ctx->ref;
- const int sb_count = cdef_search_ctx->sb_count;
const int coeff_shift = cdef_search_ctx->coeff_shift;
const int *mi_wide_l2 = cdef_search_ctx->mi_wide_l2;
const int *mi_high_l2 = cdef_search_ctx->mi_high_l2;
@@ -387,14 +387,14 @@
// Returns:
// Nothing will be returned. Contents of cdef_search_ctx will be modified.
static void cdef_mse_calc_frame(CdefSearchCtx *cdef_search_ctx) {
- const CommonModeInfoParams *const mi_params = &cdef_search_ctx->cm->mi_params;
// Loop over each sb.
for (int fbr = 0; fbr < cdef_search_ctx->nvfb; ++fbr) {
for (int fbc = 0; fbc < cdef_search_ctx->nhfb; ++fbc) {
// Checks if cdef processing can be skipped for particular sb.
- if (cdef_sb_skip(mi_params, fbr, fbc)) continue;
+ if (cdef_sb_skip(cdef_search_ctx->mi_params, fbr, fbc)) continue;
// Calculate mse for each sb and store the relevant sb index.
- cdef_mse_calc_block(cdef_search_ctx, fbr, fbc);
+ av1_cdef_mse_calc_block(cdef_search_ctx, fbr, fbc,
+ cdef_search_ctx->sb_count);
cdef_search_ctx->sb_count++;
}
}
@@ -448,7 +448,7 @@
CDEF_PICK_METHOD pick_method) {
const CommonModeInfoParams *const mi_params = &cm->mi_params;
const int num_planes = av1_num_planes(cm);
- cdef_search_ctx->cm = cm;
+ cdef_search_ctx->mi_params = &cm->mi_params;
cdef_search_ctx->ref = ref;
cdef_search_ctx->nvfb =
(mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
@@ -549,7 +549,7 @@
}
}
-void av1_cdef_search(const YV12_BUFFER_CONFIG *frame,
+void av1_cdef_search(MultiThreadInfo *mt_info, const YV12_BUFFER_CONFIG *frame,
const YV12_BUFFER_CONFIG *ref, AV1_COMMON *cm,
MACROBLOCKD *xd, CDEF_PICK_METHOD pick_method,
int rdmult) {
@@ -568,7 +568,11 @@
// Allocate CDEF search context buffers.
cdef_alloc_data(&cdef_search_ctx);
// Frame level mse calculation.
- cdef_mse_calc_frame(&cdef_search_ctx);
+ if (mt_info->num_workers > 1) {
+ av1_cdef_mse_calc_frame_mt(cm, mt_info, &cdef_search_ctx);
+ } else {
+ cdef_mse_calc_frame(&cdef_search_ctx);
+ }
/* Search for different number of signaling bits. */
int nb_strength_bits = 0;
diff --git a/av1/encoder/pickcdef.h b/av1/encoder/pickcdef.h
index ef342dc..7fe1edb 100644
--- a/av1/encoder/pickcdef.h
+++ b/av1/encoder/pickcdef.h
@@ -19,6 +19,8 @@
#endif
/*!\cond */
+struct MultiThreadInfo;
+
#define REDUCED_PRI_STRENGTHS_LVL1 8
#define REDUCED_PRI_STRENGTHS_LVL2 5
#define REDUCED_SEC_STRENGTHS_LVL3 2
@@ -56,6 +58,20 @@
BLOCK_SIZE bsize, int coeff_shift,
int row, int col);
+// Data related to CDEF search multi-thread synchronization.
+typedef struct AV1CdefSyncData {
+#if CONFIG_MULTITHREAD
+ // Mutex lock used while dispatching jobs.
+ pthread_mutex_t *mutex_;
+#endif // CONFIG_MULTITHREAD
+ // Flag to indicate all blocks are processed and end of frame is reached
+ int end_of_frame;
+ // Row index in units of 64x64 block
+ int fbr;
+ // Column index in units of 64x64 block
+ int fbc;
+} AV1CdefSync;
+
/*! \brief CDEF search context.
*/
typedef struct {
@@ -64,9 +80,9 @@
*/
const YV12_BUFFER_CONFIG *ref;
/*!
- * Pointer to top level common structure
+ * Pointer to params related to MB_MODE_INFO arrays and related info
*/
- AV1_COMMON *cm;
+ CommonModeInfoParams *mi_params;
/*!
* Info specific to each plane
*/
@@ -190,6 +206,9 @@
return 1;
return 0;
}
+
+void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx, int fbr, int fbc,
+ int sb_count);
/*!\endcond */
/*!\brief AV1 CDEF parameter search
@@ -198,6 +217,7 @@
*
* Searches for optimal CDEF parameters for frame
*
+ * \param[in] mt_info Pointer to multi-threading parameters
* \param[in] frame Compressed frame buffer
* \param[in] ref Source frame buffer
* \param[in,out] cm Pointer to top level common structure
@@ -216,7 +236,8 @@
* \arg \c damping_factor: CDEF damping factor.
*
*/
-void av1_cdef_search(const YV12_BUFFER_CONFIG *frame,
+void av1_cdef_search(struct MultiThreadInfo *mt_info,
+ const YV12_BUFFER_CONFIG *frame,
const YV12_BUFFER_CONFIG *ref, AV1_COMMON *cm,
MACROBLOCKD *xd, CDEF_PICK_METHOD pick_method, int rdmult);