Use highest possible no. of threads for temporal filtering During multi-thread encode, the maximum number of workers as per the module processing constraints are allocated for temporal filtering. Speed gains upto 2.8% seen for 720p single-tile encode using 10 threads. Change-Id: Ic7b627dfb62c14f1c9611b1eff50771e705b9c58
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c index b71a40a..bb0cd66 100644 --- a/av1/encoder/ethread.c +++ b/av1/encoder/ethread.c
@@ -1813,7 +1813,19 @@ // Computes num_workers for temporal filter multi-threading. static AOM_INLINE int compute_num_tf_workers(AV1_COMP *cpi) { - return compute_num_enc_workers(cpi, cpi->oxcf.max_threads); + // For single-pass encode, using no. of workers as per tf block size was not + // found to improve speed. Hence the thread assignment for single-pass encode + // is kept based on compute_num_enc_workers(). + if (cpi->oxcf.pass != 2) + return (compute_num_enc_workers(cpi, cpi->oxcf.max_threads)); + + if (cpi->oxcf.max_threads <= 1) return 1; + + const int frame_height = cpi->common.height; + const BLOCK_SIZE block_size = TF_BLOCK_SIZE; + const int mb_height = block_size_high[block_size]; + const int mb_rows = get_num_blocks(frame_height, mb_height); + return AOMMIN(cpi->oxcf.max_threads, mb_rows); } // Computes num_workers for tpl multi-threading.