Add support for dynamic allocation of thread data
Added support for reallocation of thread data when the
workers for multi-threading in encode stage changes with
frame resizing. Also modified TestExternalResizeWorks
of ResizeRealtimeTest to test this scenario.
BUG=aomedia:3429
Change-Id: Ieee94b229274e942203c9fc7dffd59a9a3fb5c26
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index d655b65..3b62e00 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -25,6 +25,7 @@
#include "av1/av1_iface_common.h"
#include "av1/encoder/bitstream.h"
#include "av1/encoder/encoder.h"
+#include "av1/encoder/encoder_alloc.h"
#include "av1/encoder/encoder_utils.h"
#include "av1/encoder/ethread.h"
#include "av1/encoder/external_partition.h"
@@ -3100,6 +3101,19 @@
}
#endif // CONFIG_MULTITHREAD
}
+
+ // Re-allocate thread data if workers for encoder multi-threading stage
+ // exceeds prev_num_enc_workers.
+ const int num_enc_workers =
+ av1_get_num_mod_workers_for_alloc(&ppi->p_mt_info, MOD_ENC);
+ if (ppi->p_mt_info.prev_num_enc_workers < num_enc_workers &&
+ num_enc_workers <= ppi->p_mt_info.num_workers) {
+ free_thread_data(ppi);
+ for (int j = 0; j < ppi->num_fp_contexts; j++)
+ aom_free(ppi->parallel_cpi[j]->td.tctx);
+ av1_init_tile_thread_data(ppi, cpi->oxcf.pass == AOM_RC_FIRST_PASS);
+ }
+
for (int i = 0; i < ppi->num_fp_contexts; i++) {
av1_init_frame_mt(ppi, ppi->parallel_cpi[i]);
}
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index c8f65c0..b89a881 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -1617,40 +1617,6 @@
}
}
-// Deallocate allocated thread_data.
-static AOM_INLINE void free_thread_data(AV1_PRIMARY *ppi) {
- PrimaryMultiThreadInfo *const p_mt_info = &ppi->p_mt_info;
- for (int t = 1; t < p_mt_info->num_workers; ++t) {
- EncWorkerData *const thread_data = &p_mt_info->tile_thr_data[t];
- thread_data->td = thread_data->original_td;
- aom_free(thread_data->td->tctx);
- aom_free(thread_data->td->palette_buffer);
- aom_free(thread_data->td->tmp_conv_dst);
- release_compound_type_rd_buffers(&thread_data->td->comp_rd_buffer);
- for (int j = 0; j < 2; ++j) {
- aom_free(thread_data->td->tmp_pred_bufs[j]);
- }
- aom_free(thread_data->td->pixel_gradient_info);
- aom_free(thread_data->td->src_var_info_of_4x4_sub_blocks);
- release_obmc_buffers(&thread_data->td->obmc_buffer);
- aom_free(thread_data->td->vt64x64);
-
- for (int x = 0; x < 2; x++) {
- for (int y = 0; y < 2; y++) {
- aom_free(thread_data->td->hash_value_buffer[x][y]);
- thread_data->td->hash_value_buffer[x][y] = NULL;
- }
- }
- aom_free(thread_data->td->counts);
- av1_free_pmc(thread_data->td->firstpass_ctx,
- ppi->seq_params.monochrome ? 1 : MAX_MB_PLANE);
- thread_data->td->firstpass_ctx = NULL;
- av1_free_shared_coeff_buffer(&thread_data->td->shared_coeff_buf);
- av1_free_sms_tree(thread_data->td);
- aom_free(thread_data->td);
- }
-}
-
void av1_remove_primary_compressor(AV1_PRIMARY *ppi) {
if (!ppi) return;
#if !CONFIG_REALTIME_ONLY
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 2c54ebe..f72e18d 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -1661,6 +1661,11 @@
* Number of primary workers created for multi-threading.
*/
int p_num_workers;
+
+ /*!
+ * Tracks the number of workers in encode stage multi-threading.
+ */
+ int prev_num_enc_workers;
} PrimaryMultiThreadInfo;
/*!
diff --git a/av1/encoder/encoder_alloc.h b/av1/encoder/encoder_alloc.h
index f0b3f20..0121242 100644
--- a/av1/encoder/encoder_alloc.h
+++ b/av1/encoder/encoder_alloc.h
@@ -403,6 +403,40 @@
return &cpi->scaled_source;
}
+// Deallocate allocated thread_data.
+static AOM_INLINE void free_thread_data(AV1_PRIMARY *ppi) {
+ PrimaryMultiThreadInfo *const p_mt_info = &ppi->p_mt_info;
+ for (int t = 1; t < p_mt_info->num_workers; ++t) {
+ EncWorkerData *const thread_data = &p_mt_info->tile_thr_data[t];
+ thread_data->td = thread_data->original_td;
+ aom_free(thread_data->td->tctx);
+ aom_free(thread_data->td->palette_buffer);
+ aom_free(thread_data->td->tmp_conv_dst);
+ release_compound_type_rd_buffers(&thread_data->td->comp_rd_buffer);
+ for (int j = 0; j < 2; ++j) {
+ aom_free(thread_data->td->tmp_pred_bufs[j]);
+ }
+ aom_free(thread_data->td->pixel_gradient_info);
+ aom_free(thread_data->td->src_var_info_of_4x4_sub_blocks);
+ release_obmc_buffers(&thread_data->td->obmc_buffer);
+ aom_free(thread_data->td->vt64x64);
+
+ for (int x = 0; x < 2; x++) {
+ for (int y = 0; y < 2; y++) {
+ aom_free(thread_data->td->hash_value_buffer[x][y]);
+ thread_data->td->hash_value_buffer[x][y] = NULL;
+ }
+ }
+ aom_free(thread_data->td->counts);
+ av1_free_pmc(thread_data->td->firstpass_ctx,
+ ppi->seq_params.monochrome ? 1 : MAX_MB_PLANE);
+ thread_data->td->firstpass_ctx = NULL;
+ av1_free_shared_coeff_buffer(&thread_data->td->shared_coeff_buf);
+ av1_free_sms_tree(thread_data->td);
+ aom_free(thread_data->td);
+ }
+}
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 5aea9d3..fb17b80 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -775,6 +775,7 @@
int num_workers = p_mt_info->num_workers;
int num_enc_workers = av1_get_num_mod_workers_for_alloc(p_mt_info, MOD_ENC);
+ assert(num_enc_workers <= num_workers);
for (int i = num_workers - 1; i >= 0; i--) {
EncWorkerData *const thread_data = &p_mt_info->tile_thr_data[i];
@@ -884,6 +885,10 @@
}
}
}
+
+ // Record the number of workers in encode stage multi-threading for which
+ // allocation is done.
+ p_mt_info->prev_num_enc_workers = num_enc_workers;
}
void av1_create_workers(AV1_PRIMARY *ppi, int num_workers) {
diff --git a/test/resize_test.cc b/test/resize_test.cc
index 135f362..4d74f64 100644
--- a/test/resize_test.cc
+++ b/test/resize_test.cc
@@ -97,10 +97,16 @@
void ScaleForFrameNumber(unsigned int frame, unsigned int initial_w,
unsigned int initial_h, unsigned int *w,
- unsigned int *h, int flag_codec) {
+ unsigned int *h, int flag_codec,
+ bool change_start_resln_) {
if (frame < 10) {
- *w = initial_w;
- *h = initial_h;
+ if (change_start_resln_) {
+ *w = initial_w / 4;
+ *h = initial_h / 4;
+ } else {
+ *w = initial_w;
+ *h = initial_h;
+ }
return;
}
if (frame < 20) {
@@ -179,15 +185,25 @@
limit_ = 150;
}
int flag_codec_;
+ bool change_start_resln_;
virtual ~ResizingVideoSource() {}
protected:
+ virtual void Begin() {
+ frame_ = 0;
+ unsigned int width;
+ unsigned int height;
+ ScaleForFrameNumber(frame_, kInitialWidth, kInitialHeight, &width, &height,
+ flag_codec_, change_start_resln_);
+ SetSize(width, height);
+ FillFrame();
+ }
virtual void Next() {
++frame_;
unsigned int width;
unsigned int height;
ScaleForFrameNumber(frame_, kInitialWidth, kInitialHeight, &width, &height,
- flag_codec_);
+ flag_codec_, change_start_resln_);
SetSize(width, height);
FillFrame();
}
@@ -225,6 +241,7 @@
TEST_P(ResizeTest, TestExternalResizeWorks) {
ResizingVideoSource video;
video.flag_codec_ = 0;
+ video.change_start_resln_ = false;
cfg_.g_lag_in_frames = 0;
// We use max(kInitialWidth, kInitialHeight) because during the test
// the width and height of the frame are swapped
@@ -241,7 +258,7 @@
unsigned int expected_w;
unsigned int expected_h;
ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w,
- &expected_h, 0);
+ &expected_h, 0, video.change_start_resln_);
EXPECT_EQ(expected_w, info->w)
<< "Frame " << frame << " had unexpected width";
EXPECT_EQ(expected_h, info->h)
@@ -596,23 +613,30 @@
mismatch_psnr_ = 0.0;
mismatch_nframes_ = 0;
DefaultConfig();
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ // Test external resizing with start resolution equal to
+ // 1. kInitialWidth and kInitialHeight
+ // 2. down-scaled kInitialWidth and kInitialHeight
+ for (int i = 0; i < 2; i++) {
+ video.change_start_resln_ = (bool)i;
- // Check we decoded the same number of frames as we attempted to encode
- ASSERT_EQ(frame_info_list_.size(), video.limit());
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
- info != frame_info_list_.end(); ++info) {
- const unsigned int frame = static_cast<unsigned>(info->pts);
- unsigned int expected_w;
- unsigned int expected_h;
- ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w,
- &expected_h, 1);
- EXPECT_EQ(expected_w, info->w)
- << "Frame " << frame << " had unexpected width";
- EXPECT_EQ(expected_h, info->h)
- << "Frame " << frame << " had unexpected height";
- EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
+ // Check we decoded the same number of frames as we attempted to encode
+ ASSERT_EQ(frame_info_list_.size(), video.limit());
+ for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
+ info != frame_info_list_.end(); ++info) {
+ const unsigned int frame = static_cast<unsigned>(info->pts);
+ unsigned int expected_w;
+ unsigned int expected_h;
+ ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w,
+ &expected_h, 1, video.change_start_resln_);
+ EXPECT_EQ(expected_w, info->w)
+ << "Frame " << frame << " had unexpected width";
+ EXPECT_EQ(expected_h, info->h)
+ << "Frame " << frame << " had unexpected height";
+ EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
+ }
+ frame_info_list_.clear();
}
}