FPMT: Add av1_compute_num_fp_contexts()

Added av1_compute_num_fp_contexts() to
calculate the max number of frame parallel
encodes possible for the current config. Also
added is_fp_config() which decides whether
frame parallel encode is valid.

Change-Id: I1de7b56a59894b73a88543ea4d1947e1ce163f5a
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 6a0bf16..702a16e 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -2388,6 +2388,15 @@
         if (res != AOM_CODEC_OK) {
           return res;
         }
+        if (i == 0) {
+          // Calculate the maximum number of frames that can be encoded in
+          // parallel
+          priv->ppi->num_fp_contexts = av1_compute_num_fp_contexts(
+              priv->ppi, &priv->ppi->parallel_cpi[i]->oxcf,
+              av1_compute_num_enc_workers(
+                  priv->ppi->parallel_cpi[i],
+                  priv->ppi->parallel_cpi[i]->oxcf.max_threads));
+        }
       }
       priv->ppi->cpi = priv->ppi->parallel_cpi[0];
 #else
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 7e4f11d..203810a 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -730,6 +730,50 @@
 }
 
 #if CONFIG_FRAME_PARALLEL_ENCODE
+static int AOM_INLINE is_fp_config(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf) {
+  // TODO(Mufaddal, Aasaipriya): Test and enable multi-tile, resize and vbr
+  // config.
+  if (oxcf->rc_cfg.mode != AOM_Q) {
+    return 0;
+  }
+  if (ppi->use_svc) {
+    return 0;
+  }
+  if (oxcf->tile_cfg.tile_columns > 0 || oxcf->tile_cfg.tile_rows > 0) {
+    return 0;
+  }
+  if (oxcf->dec_model_cfg.timing_info_present) {
+    return 0;
+  }
+  if (oxcf->mode != GOOD) {
+    return 0;
+  }
+  if (oxcf->tool_cfg.error_resilient_mode) {
+    return 0;
+  }
+  if (oxcf->resize_cfg.resize_mode) {
+    return 0;
+  }
+
+  return 1;
+}
+
+int av1_compute_num_fp_contexts(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf,
+                                int max_num_enc_workers) {
+  if (!is_fp_config(ppi, oxcf)) {
+    return 1;
+  }
+
+  // A parallel frame encode must have at least half the theoretical limit of
+  // max enc workers. TODO(Mufaddal) : Tune this value based on empirical
+  // analysis.
+  int workers_per_frame = (max_num_enc_workers + 1) / 2;
+  int max_threads = oxcf->max_threads;
+  int num_fp_contexts = max_threads / workers_per_frame;
+
+  return AOMMIN(num_fp_contexts, MAX_PARALLEL_FRAMES);
+}
+
 // Prepare level 1 workers. This function is only called for
 // parallel_frame_count > 1. This function populates the mt_info structure of
 // frame level contexts appropriately by dividing the total number of available
@@ -1023,7 +1067,7 @@
 }
 
 // Computes the number of workers for encoding stage (row/tile multi-threading)
-static AOM_INLINE int compute_num_enc_workers(AV1_COMP *cpi, int max_workers) {
+int av1_compute_num_enc_workers(AV1_COMP *cpi, int max_workers) {
   if (max_workers <= 1) return 1;
   if (cpi->oxcf.row_mt)
     return compute_num_enc_row_mt_workers(&cpi->common, max_workers);
@@ -2279,7 +2323,7 @@
   // found to improve speed. Hence the thread assignment for single-pass encode
   // is kept based on compute_num_enc_workers().
   if (cpi->oxcf.pass < AOM_RC_SECOND_PASS)
-    return (compute_num_enc_workers(cpi, cpi->oxcf.max_threads));
+    return (av1_compute_num_enc_workers(cpi, cpi->oxcf.max_threads));
 
   if (cpi->oxcf.max_threads <= 1) return 1;
 
@@ -2292,22 +2336,22 @@
 
 // Computes num_workers for tpl multi-threading.
 static AOM_INLINE int compute_num_tpl_workers(AV1_COMP *cpi) {
-  return compute_num_enc_workers(cpi, cpi->oxcf.max_threads);
+  return av1_compute_num_enc_workers(cpi, cpi->oxcf.max_threads);
 }
 
 // Computes num_workers for loop filter multi-threading.
 static AOM_INLINE int compute_num_lf_workers(AV1_COMP *cpi) {
-  return compute_num_enc_workers(cpi, cpi->oxcf.max_threads);
+  return av1_compute_num_enc_workers(cpi, cpi->oxcf.max_threads);
 }
 
 // Computes num_workers for cdef multi-threading.
 static AOM_INLINE int compute_num_cdef_workers(AV1_COMP *cpi) {
-  return compute_num_enc_workers(cpi, cpi->oxcf.max_threads);
+  return av1_compute_num_enc_workers(cpi, cpi->oxcf.max_threads);
 }
 
 // Computes num_workers for loop-restoration multi-threading.
 static AOM_INLINE int compute_num_lr_workers(AV1_COMP *cpi) {
-  return compute_num_enc_workers(cpi, cpi->oxcf.max_threads);
+  return av1_compute_num_enc_workers(cpi, cpi->oxcf.max_threads);
 }
 
 // Computes num_workers for pack bitstream multi-threading.
@@ -2323,13 +2367,14 @@
       if (cpi->oxcf.pass >= AOM_RC_SECOND_PASS)
         num_mod_workers = 0;
       else
-        num_mod_workers = compute_num_enc_workers(cpi, cpi->oxcf.max_threads);
+        num_mod_workers =
+            av1_compute_num_enc_workers(cpi, cpi->oxcf.max_threads);
       break;
     case MOD_TF: num_mod_workers = compute_num_tf_workers(cpi); break;
     case MOD_TPL: num_mod_workers = compute_num_tpl_workers(cpi); break;
     case MOD_GME: num_mod_workers = 1; break;
     case MOD_ENC:
-      num_mod_workers = compute_num_enc_workers(cpi, cpi->oxcf.max_threads);
+      num_mod_workers = av1_compute_num_enc_workers(cpi, cpi->oxcf.max_threads);
       break;
     case MOD_LPF: num_mod_workers = compute_num_lf_workers(cpi); break;
     case MOD_CDEF_SEARCH:
diff --git a/av1/encoder/ethread.h b/av1/encoder/ethread.h
index 6541110..59fb043 100644
--- a/av1/encoder/ethread.h
+++ b/av1/encoder/ethread.h
@@ -88,6 +88,9 @@
 
 void av1_init_tile_thread_data(AV1_PRIMARY *ppi, int is_first_pass);
 
+int av1_compute_num_fp_contexts(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf,
+                                int max_num_enc_workers);
+
 void av1_cdef_mse_calc_frame_mt(AV1_COMMON *cm, MultiThreadInfo *mt_info,
                                 CdefSearchCtx *cdef_search_ctx);
 
@@ -100,6 +103,8 @@
     unsigned int *max_tile_size, uint32_t *const obu_header_size,
     uint8_t **tile_data_start, const int num_workers);
 
+int av1_compute_num_enc_workers(AV1_COMP *cpi, int max_workers);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif