Allintra: fix an issue in multithreading for deltaq-mode=3
A data race issue was seen due to "av1_frame_init_quantizer" is
called inside each worker thread.
The quantizer initialization could be moved before worker threads
start to run.
BUG=aomedia:3376
Change-Id: Ic901a5eed7197c9fd0157a31400ff80ead2e5658
(cherry picked from commit e142124f80a26187250f9345ebe399e015d99278)
diff --git a/av1/encoder/allintra_vis.c b/av1/encoder/allintra_vis.c
index fca671c..b86d747 100644
--- a/av1/encoder/allintra_vis.c
+++ b/av1/encoder/allintra_vis.c
@@ -197,7 +197,8 @@
return sb_wiener_var;
}
-void av1_calc_mb_wiener_var_row(AV1_COMP *const cpi, const int mi_row,
+void av1_calc_mb_wiener_var_row(AV1_COMP *const cpi, MACROBLOCK *x,
+ MACROBLOCKD *xd, const int mi_row,
int16_t *src_diff, tran_low_t *coeff,
tran_low_t *qcoeff, tran_low_t *dqcoeff,
double *sum_rec_distortion,
@@ -205,9 +206,6 @@
AV1_COMMON *const cm = &cpi->common;
uint8_t *buffer = cpi->source->y_buffer;
int buf_stride = cpi->source->y_stride;
- ThreadData *td = &cpi->td;
- MACROBLOCK *x = &td->mb;
- MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO mbmi;
memset(&mbmi, 0, sizeof(mbmi));
MB_MODE_INFO *mbmi_ptr = &mbmi;
@@ -218,8 +216,6 @@
const int coeff_count = block_size * block_size;
const int mb_step = mi_size_wide[bsize];
const BitDepthInfo bd_info = get_bit_depth_info(xd);
- cm->quant_params.base_qindex = cpi->oxcf.rc_cfg.cq_level;
- av1_frame_init_quantizer(cpi);
const AV1EncRowMultiThreadInfo *const enc_row_mt = &cpi->mt_info.enc_row_mt;
// We allocate cpi->tile_data (of size 1) when we call this function in
// multithreaded mode, so cpi->tile_data may be a null pointer when we call
@@ -373,10 +369,14 @@
++mt_unit_col;
}
}
+ // Set the pointer to null since mbmi is only allocated inside this function.
+ xd->mi = NULL;
}
static void calc_mb_wiener_var(AV1_COMP *const cpi, double *sum_rec_distortion,
double *sum_est_rate) {
+ MACROBLOCK *x = &cpi->td.mb;
+ MACROBLOCKD *xd = &x->e_mbd;
const BLOCK_SIZE bsize = cpi->weber_bsize;
const int mb_step = mi_size_wide[bsize];
DECLARE_ALIGNED(32, int16_t, src_diff[32 * 32]);
@@ -384,8 +384,8 @@
DECLARE_ALIGNED(32, tran_low_t, qcoeff[32 * 32]);
DECLARE_ALIGNED(32, tran_low_t, dqcoeff[32 * 32]);
for (int mi_row = 0; mi_row < cpi->frame_info.mi_rows; mi_row += mb_step) {
- av1_calc_mb_wiener_var_row(cpi, mi_row, src_diff, coeff, qcoeff, dqcoeff,
- sum_rec_distortion, sum_est_rate);
+ av1_calc_mb_wiener_var_row(cpi, x, xd, mi_row, src_diff, coeff, qcoeff,
+ dqcoeff, sum_rec_distortion, sum_est_rate);
}
}
@@ -451,6 +451,17 @@
aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate frame buffer");
cpi->norm_wiener_variance = 0;
+
+ MACROBLOCK *x = &cpi->td.mb;
+ MACROBLOCKD *xd = &x->e_mbd;
+ // xd->mi needs to be setup since it is used in av1_frame_init_quantizer.
+ MB_MODE_INFO mbmi;
+ memset(&mbmi, 0, sizeof(mbmi));
+ MB_MODE_INFO *mbmi_ptr = &mbmi;
+ xd->mi = &mbmi_ptr;
+ cm->quant_params.base_qindex = cpi->oxcf.rc_cfg.cq_level;
+ av1_frame_init_quantizer(cpi);
+
double sum_rec_distortion = 0.0;
double sum_est_rate = 0.0;
@@ -512,6 +523,8 @@
cpi->norm_wiener_variance = AOMMAX(1, cpi->norm_wiener_variance);
}
+ // Set the pointer to null since mbmi is only allocated inside this function.
+ xd->mi = NULL;
aom_free_frame_buffer(&cm->cur_frame->buf);
}
diff --git a/av1/encoder/allintra_vis.h b/av1/encoder/allintra_vis.h
index e783d22..9e10566 100644
--- a/av1/encoder/allintra_vis.h
+++ b/av1/encoder/allintra_vis.h
@@ -22,7 +22,8 @@
void av1_init_mb_wiener_var_buffer(AV1_COMP *cpi);
-void av1_calc_mb_wiener_var_row(AV1_COMP *const cpi, const int mi_row,
+void av1_calc_mb_wiener_var_row(AV1_COMP *const cpi, MACROBLOCK *x,
+ MACROBLOCKD *xd, const int mi_row,
int16_t *src_diff, tran_low_t *coeff,
tran_low_t *qcoeff, tran_low_t *dqcoeff,
double *sum_rec_distortion,
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 518d77e..c0b0e30 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -2418,7 +2418,6 @@
#endif // !CONFIG_REALTIME_ONLY
// Allocate memory for row synchronization
-// TODO(chengchen): do we need dealloc? where?
static void wiener_var_sync_mem_alloc(
AV1EncRowMultiThreadSync *const row_mt_sync, AV1_COMMON *const cm,
const int rows) {
@@ -2450,6 +2449,34 @@
row_mt_sync->sync_range = 1;
}
+// Deallocate row based multi-threading synchronization related mutex and data
+static void wiener_var_sync_mem_dealloc(AV1EncRowMultiThreadSync *row_mt_sync) {
+ if (row_mt_sync != NULL) {
+#if CONFIG_MULTITHREAD
+ int i;
+
+ if (row_mt_sync->mutex_ != NULL) {
+ for (i = 0; i < row_mt_sync->rows; ++i) {
+ pthread_mutex_destroy(&row_mt_sync->mutex_[i]);
+ }
+ aom_free(row_mt_sync->mutex_);
+ }
+ if (row_mt_sync->cond_ != NULL) {
+ for (i = 0; i < row_mt_sync->rows; ++i) {
+ pthread_cond_destroy(&row_mt_sync->cond_[i]);
+ }
+ aom_free(row_mt_sync->cond_);
+ }
+#endif // CONFIG_MULTITHREAD
+ aom_free(row_mt_sync->num_finished_cols);
+
+ // clear the structure as the source of this call may be dynamic change
+ // in tiles in which case this call will be followed by an _alloc()
+ // which may fail.
+ av1_zero(*row_mt_sync);
+ }
+}
+
static AOM_INLINE void prepare_wiener_var_workers(AV1_COMP *const cpi,
AVxWorkerHook hook,
const int num_workers) {
@@ -2463,12 +2490,20 @@
worker->data2 = NULL;
thread_data->thread_id = i;
- // Set the starting tile for each thread.
- thread_data->start = i;
+ // Set the starting tile for each thread, in this case the preprocessing
+ // stage does not need tiles. So we set it to 0.
+ thread_data->start = 0;
thread_data->cpi = cpi;
- thread_data->td = &cpi->td;
- thread_data->td->mb = cpi->td.mb;
+ if (i == 0) {
+ thread_data->td = &cpi->td;
+ } else {
+ thread_data->td = thread_data->original_td;
+ }
+
+ if (thread_data->td != &cpi->td) {
+ thread_data->td->mb = cpi->td.mb;
+ }
}
}
@@ -2476,6 +2511,8 @@
(void)unused;
EncWorkerData *const thread_data = (EncWorkerData *)arg1;
AV1_COMP *const cpi = thread_data->cpi;
+ MACROBLOCK *x = &thread_data->td->mb;
+ MACROBLOCKD *xd = &x->e_mbd;
const BLOCK_SIZE bsize = cpi->weber_bsize;
const int mb_step = mi_size_wide[bsize];
AV1EncRowMultiThreadSync *const row_mt_sync = &cpi->tile_data[0].row_mt_sync;
@@ -2502,8 +2539,9 @@
#endif
if (!has_jobs) break;
// TODO(chengchen): properly accumulate the distortion and rate.
- av1_calc_mb_wiener_var_row(cpi, current_mi_row, src_diff, coeff, qcoeff,
- dqcoeff, &sum_rec_distortion, &sum_est_rate);
+ av1_calc_mb_wiener_var_row(cpi, x, xd, current_mi_row, src_diff, coeff,
+ qcoeff, dqcoeff, &sum_rec_distortion,
+ &sum_est_rate);
#if CONFIG_MULTITHREAD
pthread_mutex_lock(enc_row_mt_mutex_);
#endif
@@ -2551,6 +2589,8 @@
prepare_wiener_var_workers(cpi, cal_mb_wiener_var_hook, num_workers);
launch_workers(mt_info, num_workers);
sync_enc_workers(mt_info, cm, num_workers);
+
+ wiener_var_sync_mem_dealloc(row_mt_sync);
}
// Compare and order tiles based on absolute sum of tx coeffs.