FPMT: Modify worker distribution per frame

Modified the distribution of workers per frame in
a parallel encode set such that the possible even
distribution of available workers across frames in
the given parallel encode set is ensured.

Change-Id: I2986bdb9f2679975a1e279707a090da430aca427
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 97a4862..d492b11 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -893,9 +893,8 @@
 // Computes the number of workers to process each of the parallel frames.
 static AOM_INLINE int compute_num_workers_per_frame(
     const int num_workers, const int parallel_frame_count) {
-  // Number of level 2 workers per frame context (ceil division)
-  int workers_per_frame = (num_workers / parallel_frame_count) +
-                          (num_workers % parallel_frame_count != 0);
+  // Number of level 2 workers per frame context (floor division).
+  int workers_per_frame = (num_workers / parallel_frame_count);
   return workers_per_frame;
 }
 
@@ -914,11 +913,9 @@
   PrimaryMultiThreadInfo *const p_mt_info = &ppi->p_mt_info;
   int num_workers = p_mt_info->num_workers;
 
-  // Number of level 2 workers per frame context
-  int workers_per_frame =
-      compute_num_workers_per_frame(num_workers, parallel_frame_count);
   int frame_idx = 0;
-  for (int i = 0; i < num_workers; i += workers_per_frame) {
+  int i = 0;
+  while (i < num_workers) {
     // Assign level 1 worker
     AVxWorker *frame_worker = p_mt_info->p_workers[frame_idx] =
         &p_mt_info->workers[i];
@@ -929,7 +926,9 @@
     // Assign start of level 2 worker pool
     mt_info->workers = &p_mt_info->workers[i];
     mt_info->tile_thr_data = &p_mt_info->tile_thr_data[i];
-    mt_info->num_workers = AOMMIN(workers_per_frame, num_workers - i);
+    // Assign number of workers for each frame in the parallel encode set.
+    mt_info->num_workers = compute_num_workers_per_frame(
+        num_workers - i, parallel_frame_count - frame_idx);
     for (int j = MOD_FP; j < NUM_MT_MODULES; j++) {
       mt_info->num_mod_workers[j] =
           AOMMIN(mt_info->num_workers, ppi->p_mt_info.num_mod_workers[j]);
@@ -945,7 +944,7 @@
     }
 #if !CONFIG_REALTIME_ONLY
     // Back up the original LR buffers before update.
-    int idx = AOMMIN(num_workers - 1, i + workers_per_frame - 1);
+    int idx = i + mt_info->num_workers - 1;
     mt_info->restore_state_buf.rst_tmpbuf =
         mt_info->lr_row_sync.lrworkerdata[idx].rst_tmpbuf;
     mt_info->restore_state_buf.rlbs =
@@ -969,6 +968,7 @@
                               ? first_cpi_data
                               : &ppi->parallel_frames_data[frame_idx - 1];
     frame_idx++;
+    i += mt_info->num_workers;
   }
   p_mt_info->p_num_workers = parallel_frame_count;
 }
@@ -1013,15 +1013,14 @@
                                                   int parallel_frame_count) {
   assert(parallel_frame_count <= ppi->num_fp_contexts &&
          parallel_frame_count > 1);
+  (void)parallel_frame_count;
 
   PrimaryMultiThreadInfo *const p_mt_info = &ppi->p_mt_info;
   int num_workers = p_mt_info->num_workers;
 
-  // Number of level 2 workers per frame context
-  int workers_per_frame =
-      compute_num_workers_per_frame(num_workers, parallel_frame_count);
   int frame_idx = 0;
-  for (int i = 0; i < num_workers; i += workers_per_frame) {
+  int i = 0;
+  while (i < num_workers) {
     AV1_COMP *cur_cpi = ppi->parallel_cpi[frame_idx];
     MultiThreadInfo *mt_info = &cur_cpi->mt_info;
     const int num_planes = av1_num_planes(&cur_cpi->common);
@@ -1035,7 +1034,7 @@
     }
 #if !CONFIG_REALTIME_ONLY
     // Restore the original LR buffers.
-    int idx = AOMMIN(num_workers - 1, i + workers_per_frame - 1);
+    int idx = i + mt_info->num_workers - 1;
     mt_info->lr_row_sync.lrworkerdata[idx].rst_tmpbuf =
         mt_info->restore_state_buf.rst_tmpbuf;
     mt_info->lr_row_sync.lrworkerdata[idx].rlbs =
@@ -1043,6 +1042,7 @@
 #endif
 
     frame_idx++;
+    i += mt_info->num_workers;
   }
 }