FPMT: Enable fpmt only for higher threads for multi-tile encode
In case of multi-tile encode, delayed the onstart of frame parallel
encoding until MAX_PARALLEL_FRAMES are possible.
Change-Id: I5f53ed2e88d982fe470f7ff2454d20a1473e3929
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 7ccf1d0..3f1318f 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -908,6 +908,17 @@
// possible for each resolution.
#define MAX_THREADS 100
+// Computes the max number of enc workers possible for each resolution.
+static AOM_INLINE int compute_max_num_enc_workers(
+ CommonModeInfoParams *const mi_params, int mib_size_log2) {
+ int num_sb_rows =
+ ALIGN_POWER_OF_TWO(mi_params->mi_rows, mib_size_log2) >> mib_size_log2;
+ int num_sb_cols =
+ ALIGN_POWER_OF_TWO(mi_params->mi_cols, mib_size_log2) >> mib_size_log2;
+
+ return AOMMIN((num_sb_cols + 1) >> 1, num_sb_rows);
+}
+
// Computes the number of frame parallel(fp) contexts to be created
// based on the number of max_enc_workers.
int av1_compute_num_fp_contexts(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf) {
@@ -915,8 +926,8 @@
if (!av1_check_fpmt_config(ppi, oxcf)) {
return 1;
}
- int max_num_enc_workers =
- av1_compute_num_enc_workers(ppi->parallel_cpi[0], MAX_THREADS);
+ int max_num_enc_workers = compute_max_num_enc_workers(
+ &ppi->cpi->common.mi_params, ppi->cpi->common.seq_params->mib_size_log2);
// Scaling factors and rounding factors used to tune worker_per_frame
// computation.
int rounding_factor[2] = { 2, 4 };
@@ -936,6 +947,13 @@
scaling_factor[index]);
int max_threads = oxcf->max_threads;
int num_fp_contexts = max_threads / workers_per_frame;
+ // Based on empirical results, FPMT gains with multi-tile are significant when
+ // more parallel frames are available. Use FPMT with multi-tile encode only
+ // when sufficient threads are available for parallel encode of
+ // MAX_PARALLEL_FRAMES frames.
+ if (oxcf->tile_cfg.tile_columns > 0 || oxcf->tile_cfg.tile_rows > 0) {
+ if (num_fp_contexts < MAX_PARALLEL_FRAMES) num_fp_contexts = 1;
+ }
num_fp_contexts = AOMMAX(1, AOMMIN(num_fp_contexts, MAX_PARALLEL_FRAMES));
// Limit recalculated num_fp_contexts to ppi->num_fp_contexts.