Fixing multi-thread decoding mismatch When multi-threading is used for decoding, max_scan_line needs to be set according to the correct scan order and not the max EOB. This ensures only the respective buffers are accessed in each decoding thread.
diff --git a/av1/decoder/decodetxb.c b/av1/decoder/decodetxb.c index 37e5fe8..0a2a46b 100644 --- a/av1/decoder/decodetxb.c +++ b/av1/decoder/decodetxb.c
@@ -299,10 +299,8 @@ int8_t signs_buf[TX_PAD_2D]; int8_t *const signs = set_signs(signs_buf, width); eob_info *eob_data = dcb->eob_data[plane] + dcb->txb_offset[plane]; - uint16_t *const eob = &(eob_data->eob); - uint16_t *const max_scan_line = &(eob_data->max_scan_line); - *max_scan_line = 0; - *eob = 0; + eob_data->max_scan_line = 0; + eob_data->eob = av1_get_max_eob(tx_size); const TX_TYPE tx_type = av1_get_tx_type(xd, plane_type, blk_row, blk_col, tx_size, @@ -312,23 +310,21 @@ const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type); const int16_t *const scan = scan_order->scan; - *eob = av1_get_max_eob(tx_size); - eob_data->eob = *eob; - eob_data->max_scan_line = *eob; - - if (*eob > 1) { + if (eob_data->eob > 1) { memset(levels_buf, 0, sizeof(*levels_buf) * TX_PAD_2D); memset(signs_buf, 0, sizeof(*signs_buf) * TX_PAD_2D); base_cdf_arr base_cdf = ec_ctx->coeff_base_cdf_idtx; br_cdf_arr br_cdf = ec_ctx->coeff_br_cdf_idtx; - read_coeffs_forward_2d(r, 0, *eob - 1, scan, bwl, levels, base_cdf, br_cdf); + read_coeffs_forward_2d(r, 0, eob_data->eob - 1, scan, bwl, levels, base_cdf, + br_cdf); } - for (int c = *eob - 1; c >= 0; --c) { + for (int c = eob_data->eob - 1; c >= 0; --c) { const int pos = scan[c]; uint8_t sign; tran_low_t level = levels[get_padded_idx_left(pos, bwl)]; if (level) { + eob_data->max_scan_line = AOMMAX(eob_data->max_scan_line, pos); int idtx_sign_ctx = get_sign_ctx_skip(signs, levels, pos, bwl); sign = aom_read_symbol(r, ec_ctx->idtx_sign_cdf[idtx_sign_ctx], 2, ACCT_STR);
diff --git a/test/decode_multithreaded_test.cc b/test/decode_multithreaded_test.cc index 2da1c3c..dfa707a 100644 --- a/test/decode_multithreaded_test.cc +++ b/test/decode_multithreaded_test.cc
@@ -37,8 +37,8 @@ row_mt_(GET_PARAM(5)) { init_flags_ = AOM_CODEC_USE_PSNR; aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t(); - cfg.w = 704; - cfg.h = 576; + cfg.w = 352; + cfg.h = 288; cfg.threads = 1; single_thread_dec_ = codec_->CreateDecoder(cfg, 0); @@ -110,7 +110,7 @@ cfg_.g_lag_in_frames = 12; cfg_.rc_end_usage = AOM_VBR; - libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 704, 576, + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, timebase.den, timebase.num, 0, 2); ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
diff --git a/test/tile_independence_test.cc b/test/tile_independence_test.cc index 58e4486..aca8c8d 100644 --- a/test/tile_independence_test.cc +++ b/test/tile_independence_test.cc
@@ -32,8 +32,8 @@ n_tile_groups_(GET_PARAM(3)) { init_flags_ = AOM_CODEC_USE_PSNR; aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t(); - cfg.w = 704; - cfg.h = 576; + cfg.w = 352; + cfg.h = 288; cfg.threads = 1; fw_dec_ = codec_->CreateDecoder(cfg, 0); inv_dec_ = codec_->CreateDecoder(cfg, 0); @@ -97,7 +97,7 @@ cfg_.g_lag_in_frames = 12; cfg_.rc_end_usage = AOM_VBR; - libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 704, 576, + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, timebase.den, timebase.num, 0, 5); ASSERT_NO_FATAL_FAILURE(RunLoop(&video));