Add support for dynamic allocation of thread data Added support for reallocation of thread data when the workers for multi-threading in encode stage changes with frame resizing. Also modified TestExternalResizeWorks of ResizeRealtimeTest to test this scenario. BUG=aomedia:3429 Change-Id: Ieee94b229274e942203c9fc7dffd59a9a3fb5c26 (cherry picked from commit 61fcda00d9a21a6b84d8cf2d4ff07681b31a32db)
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c index 178d966..dba3010 100644 --- a/av1/av1_cx_iface.c +++ b/av1/av1_cx_iface.c
@@ -23,6 +23,7 @@ #include "av1/av1_iface_common.h" #include "av1/encoder/bitstream.h" #include "av1/encoder/encoder.h" +#include "av1/encoder/encoder_alloc.h" #include "av1/encoder/encoder_utils.h" #include "av1/encoder/ethread.h" #include "av1/encoder/external_partition.h" @@ -3015,6 +3016,19 @@ } #endif // CONFIG_MULTITHREAD } + + // Re-allocate thread data if workers for encoder multi-threading stage + // exceeds prev_num_enc_workers. + const int num_enc_workers = + av1_get_num_mod_workers_for_alloc(&ppi->p_mt_info, MOD_ENC); + if (ppi->p_mt_info.prev_num_enc_workers < num_enc_workers && + num_enc_workers <= ppi->p_mt_info.num_workers) { + free_thread_data(ppi); + for (int j = 0; j < ppi->num_fp_contexts; j++) + aom_free(ppi->parallel_cpi[j]->td.tctx); + av1_init_tile_thread_data(ppi, cpi->oxcf.pass == AOM_RC_FIRST_PASS); + } + for (int i = 0; i < ppi->num_fp_contexts; i++) { av1_init_frame_mt(ppi, ppi->parallel_cpi[i]); }
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c index 0b12ff0..d82182e 100644 --- a/av1/encoder/encoder.c +++ b/av1/encoder/encoder.c
@@ -1550,40 +1550,6 @@ } } -// Deallocate allocated thread_data. -static AOM_INLINE void free_thread_data(AV1_PRIMARY *ppi) { - PrimaryMultiThreadInfo *const p_mt_info = &ppi->p_mt_info; - for (int t = 1; t < p_mt_info->num_workers; ++t) { - EncWorkerData *const thread_data = &p_mt_info->tile_thr_data[t]; - thread_data->td = thread_data->original_td; - aom_free(thread_data->td->tctx); - aom_free(thread_data->td->palette_buffer); - aom_free(thread_data->td->tmp_conv_dst); - release_compound_type_rd_buffers(&thread_data->td->comp_rd_buffer); - for (int j = 0; j < 2; ++j) { - aom_free(thread_data->td->tmp_pred_bufs[j]); - } - aom_free(thread_data->td->pixel_gradient_info); - aom_free(thread_data->td->src_var_info_of_4x4_sub_blocks); - release_obmc_buffers(&thread_data->td->obmc_buffer); - aom_free(thread_data->td->vt64x64); - - for (int x = 0; x < 2; x++) { - for (int y = 0; y < 2; y++) { - aom_free(thread_data->td->hash_value_buffer[x][y]); - thread_data->td->hash_value_buffer[x][y] = NULL; - } - } - aom_free(thread_data->td->counts); - av1_free_pmc(thread_data->td->firstpass_ctx, - ppi->seq_params.monochrome ? 1 : MAX_MB_PLANE); - thread_data->td->firstpass_ctx = NULL; - av1_free_shared_coeff_buffer(&thread_data->td->shared_coeff_buf); - av1_free_sms_tree(thread_data->td); - aom_free(thread_data->td); - } -} - void av1_remove_primary_compressor(AV1_PRIMARY *ppi) { if (!ppi) return; #if !CONFIG_REALTIME_ONLY
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h index d13f08f..ecbe4a1 100644 --- a/av1/encoder/encoder.h +++ b/av1/encoder/encoder.h
@@ -1619,6 +1619,11 @@ * Number of primary workers created for multi-threading. */ int p_num_workers; + + /*! + * Tracks the number of workers in encode stage multi-threading. + */ + int prev_num_enc_workers; } PrimaryMultiThreadInfo; /*!
diff --git a/av1/encoder/encoder_alloc.h b/av1/encoder/encoder_alloc.h index f4c345f..70da377 100644 --- a/av1/encoder/encoder_alloc.h +++ b/av1/encoder/encoder_alloc.h
@@ -390,6 +390,40 @@ return &cpi->scaled_source; } +// Deallocate allocated thread_data. +static AOM_INLINE void free_thread_data(AV1_PRIMARY *ppi) { + PrimaryMultiThreadInfo *const p_mt_info = &ppi->p_mt_info; + for (int t = 1; t < p_mt_info->num_workers; ++t) { + EncWorkerData *const thread_data = &p_mt_info->tile_thr_data[t]; + thread_data->td = thread_data->original_td; + aom_free(thread_data->td->tctx); + aom_free(thread_data->td->palette_buffer); + aom_free(thread_data->td->tmp_conv_dst); + release_compound_type_rd_buffers(&thread_data->td->comp_rd_buffer); + for (int j = 0; j < 2; ++j) { + aom_free(thread_data->td->tmp_pred_bufs[j]); + } + aom_free(thread_data->td->pixel_gradient_info); + aom_free(thread_data->td->src_var_info_of_4x4_sub_blocks); + release_obmc_buffers(&thread_data->td->obmc_buffer); + aom_free(thread_data->td->vt64x64); + + for (int x = 0; x < 2; x++) { + for (int y = 0; y < 2; y++) { + aom_free(thread_data->td->hash_value_buffer[x][y]); + thread_data->td->hash_value_buffer[x][y] = NULL; + } + } + aom_free(thread_data->td->counts); + av1_free_pmc(thread_data->td->firstpass_ctx, + ppi->seq_params.monochrome ? 1 : MAX_MB_PLANE); + thread_data->td->firstpass_ctx = NULL; + av1_free_shared_coeff_buffer(&thread_data->td->shared_coeff_buf); + av1_free_sms_tree(thread_data->td); + aom_free(thread_data->td); + } +} + #ifdef __cplusplus } // extern "C" #endif
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c index 4127c9a..f98b956 100644 --- a/av1/encoder/ethread.c +++ b/av1/encoder/ethread.c
@@ -777,6 +777,7 @@ int num_workers = p_mt_info->num_workers; int num_enc_workers = av1_get_num_mod_workers_for_alloc(p_mt_info, MOD_ENC); + assert(num_enc_workers <= num_workers); for (int i = num_workers - 1; i >= 0; i--) { EncWorkerData *const thread_data = &p_mt_info->tile_thr_data[i]; @@ -886,6 +887,10 @@ } } } + + // Record the number of workers in encode stage multi-threading for which + // allocation is done. + p_mt_info->prev_num_enc_workers = num_enc_workers; } void av1_create_workers(AV1_PRIMARY *ppi, int num_workers) {
diff --git a/test/resize_test.cc b/test/resize_test.cc index e21f4bf..c87c9d3 100644 --- a/test/resize_test.cc +++ b/test/resize_test.cc
@@ -97,10 +97,16 @@ void ScaleForFrameNumber(unsigned int frame, unsigned int initial_w, unsigned int initial_h, unsigned int *w, - unsigned int *h, int flag_codec) { + unsigned int *h, int flag_codec, + bool change_start_resln_) { if (frame < 10) { - *w = initial_w; - *h = initial_h; + if (change_start_resln_) { + *w = initial_w / 4; + *h = initial_h / 4; + } else { + *w = initial_w; + *h = initial_h; + } return; } if (frame < 20) { @@ -179,15 +185,25 @@ limit_ = 150; } int flag_codec_; + bool change_start_resln_; virtual ~ResizingVideoSource() {} protected: + virtual void Begin() { + frame_ = 0; + unsigned int width; + unsigned int height; + ScaleForFrameNumber(frame_, kInitialWidth, kInitialHeight, &width, &height, + flag_codec_, change_start_resln_); + SetSize(width, height); + FillFrame(); + } virtual void Next() { ++frame_; unsigned int width; unsigned int height; ScaleForFrameNumber(frame_, kInitialWidth, kInitialHeight, &width, &height, - flag_codec_); + flag_codec_, change_start_resln_); SetSize(width, height); FillFrame(); } @@ -225,6 +241,7 @@ TEST_P(ResizeTest, TestExternalResizeWorks) { ResizingVideoSource video; video.flag_codec_ = 0; + video.change_start_resln_ = false; cfg_.g_lag_in_frames = 0; // We use max(kInitialWidth, kInitialHeight) because during the test // the width and height of the frame are swapped @@ -241,7 +258,7 @@ unsigned int expected_w; unsigned int expected_h; ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w, - &expected_h, 0); + &expected_h, 0, video.change_start_resln_); EXPECT_EQ(expected_w, info->w) << "Frame " << frame << " had unexpected width"; EXPECT_EQ(expected_h, info->h) @@ -596,23 +613,30 @@ mismatch_psnr_ = 0.0; mismatch_nframes_ = 0; DefaultConfig(); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + // Test external resizing with start resolution equal to + // 1. kInitialWidth and kInitialHeight + // 2. down-scaled kInitialWidth and kInitialHeight + for (int i = 0; i < 2; i++) { + video.change_start_resln_ = (bool)i; - // Check we decoded the same number of frames as we attempted to encode - ASSERT_EQ(frame_info_list_.size(), video.limit()); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin(); - info != frame_info_list_.end(); ++info) { - const unsigned int frame = static_cast<unsigned>(info->pts); - unsigned int expected_w; - unsigned int expected_h; - ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w, - &expected_h, 1); - EXPECT_EQ(expected_w, info->w) - << "Frame " << frame << " had unexpected width"; - EXPECT_EQ(expected_h, info->h) - << "Frame " << frame << " had unexpected height"; - EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames()); + // Check we decoded the same number of frames as we attempted to encode + ASSERT_EQ(frame_info_list_.size(), video.limit()); + for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin(); + info != frame_info_list_.end(); ++info) { + const unsigned int frame = static_cast<unsigned>(info->pts); + unsigned int expected_w; + unsigned int expected_h; + ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w, + &expected_h, 1, video.change_start_resln_); + EXPECT_EQ(expected_w, info->w) + << "Frame " << frame << " had unexpected width"; + EXPECT_EQ(expected_h, info->h) + << "Frame " << frame << " had unexpected height"; + EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames()); + } + frame_info_list_.clear(); } }