Use highest possible no. of threads for temporal filtering
During multi-thread encode, the maximum number of workers as per the
module processing constraints are allocated for temporal filtering.
Speed gains upto 2.8% seen for 720p single-tile encode using 10
threads.
Change-Id: Ic7b627dfb62c14f1c9611b1eff50771e705b9c58
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index b71a40a..bb0cd66 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -1813,7 +1813,19 @@
// Computes num_workers for temporal filter multi-threading.
static AOM_INLINE int compute_num_tf_workers(AV1_COMP *cpi) {
- return compute_num_enc_workers(cpi, cpi->oxcf.max_threads);
+ // For single-pass encode, using no. of workers as per tf block size was not
+ // found to improve speed. Hence the thread assignment for single-pass encode
+ // is kept based on compute_num_enc_workers().
+ if (cpi->oxcf.pass != 2)
+ return (compute_num_enc_workers(cpi, cpi->oxcf.max_threads));
+
+ if (cpi->oxcf.max_threads <= 1) return 1;
+
+ const int frame_height = cpi->common.height;
+ const BLOCK_SIZE block_size = TF_BLOCK_SIZE;
+ const int mb_height = block_size_high[block_size];
+ const int mb_rows = get_num_blocks(frame_height, mb_height);
+ return AOMMIN(cpi->oxcf.max_threads, mb_rows);
}
// Computes num_workers for tpl multi-threading.