Allintra: fix an issue in multithreading for deltaq-mode=3 A data race issue was seen due to "av1_frame_init_quantizer" is called inside each worker thread. The quantizer initialization could be moved before worker threads start to run. BUG=aomedia:3376 Change-Id: Ic901a5eed7197c9fd0157a31400ff80ead2e5658 (cherry picked from commit e142124f80a26187250f9345ebe399e015d99278)
diff --git a/av1/encoder/allintra_vis.c b/av1/encoder/allintra_vis.c index fca671c..b86d747 100644 --- a/av1/encoder/allintra_vis.c +++ b/av1/encoder/allintra_vis.c
@@ -197,7 +197,8 @@ return sb_wiener_var; } -void av1_calc_mb_wiener_var_row(AV1_COMP *const cpi, const int mi_row, +void av1_calc_mb_wiener_var_row(AV1_COMP *const cpi, MACROBLOCK *x, + MACROBLOCKD *xd, const int mi_row, int16_t *src_diff, tran_low_t *coeff, tran_low_t *qcoeff, tran_low_t *dqcoeff, double *sum_rec_distortion, @@ -205,9 +206,6 @@ AV1_COMMON *const cm = &cpi->common; uint8_t *buffer = cpi->source->y_buffer; int buf_stride = cpi->source->y_stride; - ThreadData *td = &cpi->td; - MACROBLOCK *x = &td->mb; - MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO mbmi; memset(&mbmi, 0, sizeof(mbmi)); MB_MODE_INFO *mbmi_ptr = &mbmi; @@ -218,8 +216,6 @@ const int coeff_count = block_size * block_size; const int mb_step = mi_size_wide[bsize]; const BitDepthInfo bd_info = get_bit_depth_info(xd); - cm->quant_params.base_qindex = cpi->oxcf.rc_cfg.cq_level; - av1_frame_init_quantizer(cpi); const AV1EncRowMultiThreadInfo *const enc_row_mt = &cpi->mt_info.enc_row_mt; // We allocate cpi->tile_data (of size 1) when we call this function in // multithreaded mode, so cpi->tile_data may be a null pointer when we call @@ -373,10 +369,14 @@ ++mt_unit_col; } } + // Set the pointer to null since mbmi is only allocated inside this function. + xd->mi = NULL; } static void calc_mb_wiener_var(AV1_COMP *const cpi, double *sum_rec_distortion, double *sum_est_rate) { + MACROBLOCK *x = &cpi->td.mb; + MACROBLOCKD *xd = &x->e_mbd; const BLOCK_SIZE bsize = cpi->weber_bsize; const int mb_step = mi_size_wide[bsize]; DECLARE_ALIGNED(32, int16_t, src_diff[32 * 32]); @@ -384,8 +384,8 @@ DECLARE_ALIGNED(32, tran_low_t, qcoeff[32 * 32]); DECLARE_ALIGNED(32, tran_low_t, dqcoeff[32 * 32]); for (int mi_row = 0; mi_row < cpi->frame_info.mi_rows; mi_row += mb_step) { - av1_calc_mb_wiener_var_row(cpi, mi_row, src_diff, coeff, qcoeff, dqcoeff, - sum_rec_distortion, sum_est_rate); + av1_calc_mb_wiener_var_row(cpi, x, xd, mi_row, src_diff, coeff, qcoeff, + dqcoeff, sum_rec_distortion, sum_est_rate); } } @@ -451,6 +451,17 @@ aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR, "Failed to allocate frame buffer"); cpi->norm_wiener_variance = 0; + + MACROBLOCK *x = &cpi->td.mb; + MACROBLOCKD *xd = &x->e_mbd; + // xd->mi needs to be setup since it is used in av1_frame_init_quantizer. + MB_MODE_INFO mbmi; + memset(&mbmi, 0, sizeof(mbmi)); + MB_MODE_INFO *mbmi_ptr = &mbmi; + xd->mi = &mbmi_ptr; + cm->quant_params.base_qindex = cpi->oxcf.rc_cfg.cq_level; + av1_frame_init_quantizer(cpi); + double sum_rec_distortion = 0.0; double sum_est_rate = 0.0; @@ -512,6 +523,8 @@ cpi->norm_wiener_variance = AOMMAX(1, cpi->norm_wiener_variance); } + // Set the pointer to null since mbmi is only allocated inside this function. + xd->mi = NULL; aom_free_frame_buffer(&cm->cur_frame->buf); }
diff --git a/av1/encoder/allintra_vis.h b/av1/encoder/allintra_vis.h index e783d22..9e10566 100644 --- a/av1/encoder/allintra_vis.h +++ b/av1/encoder/allintra_vis.h
@@ -22,7 +22,8 @@ void av1_init_mb_wiener_var_buffer(AV1_COMP *cpi); -void av1_calc_mb_wiener_var_row(AV1_COMP *const cpi, const int mi_row, +void av1_calc_mb_wiener_var_row(AV1_COMP *const cpi, MACROBLOCK *x, + MACROBLOCKD *xd, const int mi_row, int16_t *src_diff, tran_low_t *coeff, tran_low_t *qcoeff, tran_low_t *dqcoeff, double *sum_rec_distortion,
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c index 518d77e..c0b0e30 100644 --- a/av1/encoder/ethread.c +++ b/av1/encoder/ethread.c
@@ -2418,7 +2418,6 @@ #endif // !CONFIG_REALTIME_ONLY // Allocate memory for row synchronization -// TODO(chengchen): do we need dealloc? where? static void wiener_var_sync_mem_alloc( AV1EncRowMultiThreadSync *const row_mt_sync, AV1_COMMON *const cm, const int rows) { @@ -2450,6 +2449,34 @@ row_mt_sync->sync_range = 1; } +// Deallocate row based multi-threading synchronization related mutex and data +static void wiener_var_sync_mem_dealloc(AV1EncRowMultiThreadSync *row_mt_sync) { + if (row_mt_sync != NULL) { +#if CONFIG_MULTITHREAD + int i; + + if (row_mt_sync->mutex_ != NULL) { + for (i = 0; i < row_mt_sync->rows; ++i) { + pthread_mutex_destroy(&row_mt_sync->mutex_[i]); + } + aom_free(row_mt_sync->mutex_); + } + if (row_mt_sync->cond_ != NULL) { + for (i = 0; i < row_mt_sync->rows; ++i) { + pthread_cond_destroy(&row_mt_sync->cond_[i]); + } + aom_free(row_mt_sync->cond_); + } +#endif // CONFIG_MULTITHREAD + aom_free(row_mt_sync->num_finished_cols); + + // clear the structure as the source of this call may be dynamic change + // in tiles in which case this call will be followed by an _alloc() + // which may fail. + av1_zero(*row_mt_sync); + } +} + static AOM_INLINE void prepare_wiener_var_workers(AV1_COMP *const cpi, AVxWorkerHook hook, const int num_workers) { @@ -2463,12 +2490,20 @@ worker->data2 = NULL; thread_data->thread_id = i; - // Set the starting tile for each thread. - thread_data->start = i; + // Set the starting tile for each thread, in this case the preprocessing + // stage does not need tiles. So we set it to 0. + thread_data->start = 0; thread_data->cpi = cpi; - thread_data->td = &cpi->td; - thread_data->td->mb = cpi->td.mb; + if (i == 0) { + thread_data->td = &cpi->td; + } else { + thread_data->td = thread_data->original_td; + } + + if (thread_data->td != &cpi->td) { + thread_data->td->mb = cpi->td.mb; + } } } @@ -2476,6 +2511,8 @@ (void)unused; EncWorkerData *const thread_data = (EncWorkerData *)arg1; AV1_COMP *const cpi = thread_data->cpi; + MACROBLOCK *x = &thread_data->td->mb; + MACROBLOCKD *xd = &x->e_mbd; const BLOCK_SIZE bsize = cpi->weber_bsize; const int mb_step = mi_size_wide[bsize]; AV1EncRowMultiThreadSync *const row_mt_sync = &cpi->tile_data[0].row_mt_sync; @@ -2502,8 +2539,9 @@ #endif if (!has_jobs) break; // TODO(chengchen): properly accumulate the distortion and rate. - av1_calc_mb_wiener_var_row(cpi, current_mi_row, src_diff, coeff, qcoeff, - dqcoeff, &sum_rec_distortion, &sum_est_rate); + av1_calc_mb_wiener_var_row(cpi, x, xd, current_mi_row, src_diff, coeff, + qcoeff, dqcoeff, &sum_rec_distortion, + &sum_est_rate); #if CONFIG_MULTITHREAD pthread_mutex_lock(enc_row_mt_mutex_); #endif @@ -2551,6 +2589,8 @@ prepare_wiener_var_workers(cpi, cal_mb_wiener_var_hook, num_workers); launch_workers(mt_info, num_workers); sync_enc_workers(mt_info, cm, num_workers); + + wiener_var_sync_mem_dealloc(row_mt_sync); } // Compare and order tiles based on absolute sum of tx coeffs.