Improve pack bitstream multi-thread performance
This CL improves pack bitstream multi-thread performance
by ordering tile jobs based on absolute sum of transform
coefficients, instead of tile area.
Change-Id: I0772eae1d889a76e162daa6cc8b576d89453e818
diff --git a/av1/encoder/bitstream.h b/av1/encoder/bitstream.h
index dfbe7bf..e32cd3b 100644
--- a/av1/encoder/bitstream.h
+++ b/av1/encoder/bitstream.h
@@ -59,8 +59,8 @@
} PackBSParams;
typedef struct {
+ uint64_t abs_sum_level;
uint16_t tile_idx;
- int tile_size_mi;
} PackBSTileOrder;
// Pack bitstream data for pack bitstream multi-threading.
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index c403d70..b7a0c27 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -944,6 +944,7 @@
TileInfo *const tile_info = &tile_data->tile_info;
av1_tile_init(tile_info, cm, tile_row, tile_col);
tile_data->firstpass_top_mv = kZeroMv;
+ tile_data->abs_sum_level = 0;
if (pre_tok != NULL && tplist != NULL) {
token_info->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
@@ -1033,6 +1034,7 @@
mi_row += cm->seq_params->mib_size) {
av1_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
}
+ this_tile->abs_sum_level = td->abs_sum_level;
}
/*!\brief Break one frame into tiles and encode the tiles
@@ -1061,6 +1063,7 @@
&cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
cpi->td.intrabc_used = 0;
cpi->td.deltaq_used = 0;
+ cpi->td.abs_sum_level = 0;
cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
cpi->td.mb.tile_pb_ctx = &this_tile->tctx;
// Reset cyclic refresh counters.
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 938f930..98bcb6e 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -1283,6 +1283,7 @@
TileInfo tile_info;
DECLARE_ALIGNED(16, FRAME_CONTEXT, tctx);
FRAME_CONTEXT *row_ctx;
+ uint64_t abs_sum_level;
uint8_t allow_update_cdf;
InterModeRdModel inter_mode_rd_models[BLOCK_SIZES_ALL];
AV1EncRowMultiThreadSync row_mt_sync;
@@ -1311,6 +1312,7 @@
PALETTE_BUFFER *palette_buffer;
CompoundTypeRdBuffers comp_rd_buffer;
CONV_BUF_TYPE *tmp_conv_dst;
+ uint64_t abs_sum_level;
uint8_t *tmp_pred_bufs[2];
int intrabc_used;
int deltaq_used;
diff --git a/av1/encoder/encodetxb.c b/av1/encoder/encodetxb.c
index 2582ba8..0eb1348 100644
--- a/av1/encoder/encodetxb.c
+++ b/av1/encoder/encodetxb.c
@@ -624,6 +624,7 @@
const int coeff_ctx = coeff_contexts[pos];
const tran_low_t v = qcoeff[pos];
const tran_low_t level = abs(v);
+ td->abs_sum_level += level;
if (allow_update_cdf) {
if (c == eob - 1) {
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index fb428a1..3bf0960 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -480,6 +480,7 @@
td->mb.e_mbd.tile_ctx = td->tctx;
td->mb.tile_pb_ctx = &this_tile->tctx;
+ td->abs_sum_level = 0;
if (this_tile->allow_update_cdf) {
td->mb.row_ctx = this_tile->row_ctx;
@@ -502,6 +503,7 @@
#if CONFIG_MULTITHREAD
pthread_mutex_lock(enc_row_mt_mutex_);
#endif
+ this_tile->abs_sum_level += td->abs_sum_level;
row_mt_sync->num_threads_working--;
#if CONFIG_MULTITHREAD
pthread_mutex_unlock(enc_row_mt_mutex_);
@@ -840,6 +842,7 @@
thread_data->td->intrabc_used = 0;
thread_data->td->deltaq_used = 0;
+ thread_data->td->abs_sum_level = 0;
// Before encoding a frame, copy the thread data from cpi.
if (thread_data->td != &cpi->td) {
@@ -1810,14 +1813,14 @@
}
#endif // !CONFIG_REALTIME_ONLY
-// Compare and order tiles based on tile size.
+// Compare and order tiles based on absolute sum of tx coeffs.
static int compare_tile_order(const void *a, const void *b) {
const PackBSTileOrder *const tile_a = (const PackBSTileOrder *)a;
const PackBSTileOrder *const tile_b = (const PackBSTileOrder *)b;
- if (tile_a->tile_size_mi > tile_b->tile_size_mi)
+ if (tile_a->abs_sum_level > tile_b->abs_sum_level)
return -1;
- else if (tile_a->tile_size_mi == tile_b->tile_size_mi)
+ else if (tile_a->abs_sum_level == tile_b->abs_sum_level)
return (tile_a->tile_idx > tile_b->tile_idx ? 1 : -1);
else
return 1;
@@ -2040,8 +2043,8 @@
// Populate pack bitstream tile order structure
for (uint16_t tile_idx = 0; tile_idx < num_tiles; tile_idx++) {
- pack_bs_tile_order[tile_idx].tile_size_mi =
- pack_bs_params[tile_idx].tile_size_mi;
+ pack_bs_tile_order[tile_idx].abs_sum_level =
+ cpi->tile_data[tile_idx].abs_sum_level;
pack_bs_tile_order[tile_idx].tile_idx = tile_idx;
}