Add support for dynamic allocation of thread data

Added support for reallocation of thread data when the
workers for multi-threading in encode stage changes with
frame resizing. Also modified TestExternalResizeWorks
of ResizeRealtimeTest to test this scenario.

BUG=aomedia:3429

Change-Id: Ieee94b229274e942203c9fc7dffd59a9a3fb5c26
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index d655b65..3b62e00 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -25,6 +25,7 @@
 #include "av1/av1_iface_common.h"
 #include "av1/encoder/bitstream.h"
 #include "av1/encoder/encoder.h"
+#include "av1/encoder/encoder_alloc.h"
 #include "av1/encoder/encoder_utils.h"
 #include "av1/encoder/ethread.h"
 #include "av1/encoder/external_partition.h"
@@ -3100,6 +3101,19 @@
       }
 #endif  // CONFIG_MULTITHREAD
     }
+
+    // Re-allocate thread data if workers for encoder multi-threading stage
+    // exceeds prev_num_enc_workers.
+    const int num_enc_workers =
+        av1_get_num_mod_workers_for_alloc(&ppi->p_mt_info, MOD_ENC);
+    if (ppi->p_mt_info.prev_num_enc_workers < num_enc_workers &&
+        num_enc_workers <= ppi->p_mt_info.num_workers) {
+      free_thread_data(ppi);
+      for (int j = 0; j < ppi->num_fp_contexts; j++)
+        aom_free(ppi->parallel_cpi[j]->td.tctx);
+      av1_init_tile_thread_data(ppi, cpi->oxcf.pass == AOM_RC_FIRST_PASS);
+    }
+
     for (int i = 0; i < ppi->num_fp_contexts; i++) {
       av1_init_frame_mt(ppi, ppi->parallel_cpi[i]);
     }
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index c8f65c0..b89a881 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -1617,40 +1617,6 @@
   }
 }
 
-// Deallocate allocated thread_data.
-static AOM_INLINE void free_thread_data(AV1_PRIMARY *ppi) {
-  PrimaryMultiThreadInfo *const p_mt_info = &ppi->p_mt_info;
-  for (int t = 1; t < p_mt_info->num_workers; ++t) {
-    EncWorkerData *const thread_data = &p_mt_info->tile_thr_data[t];
-    thread_data->td = thread_data->original_td;
-    aom_free(thread_data->td->tctx);
-    aom_free(thread_data->td->palette_buffer);
-    aom_free(thread_data->td->tmp_conv_dst);
-    release_compound_type_rd_buffers(&thread_data->td->comp_rd_buffer);
-    for (int j = 0; j < 2; ++j) {
-      aom_free(thread_data->td->tmp_pred_bufs[j]);
-    }
-    aom_free(thread_data->td->pixel_gradient_info);
-    aom_free(thread_data->td->src_var_info_of_4x4_sub_blocks);
-    release_obmc_buffers(&thread_data->td->obmc_buffer);
-    aom_free(thread_data->td->vt64x64);
-
-    for (int x = 0; x < 2; x++) {
-      for (int y = 0; y < 2; y++) {
-        aom_free(thread_data->td->hash_value_buffer[x][y]);
-        thread_data->td->hash_value_buffer[x][y] = NULL;
-      }
-    }
-    aom_free(thread_data->td->counts);
-    av1_free_pmc(thread_data->td->firstpass_ctx,
-                 ppi->seq_params.monochrome ? 1 : MAX_MB_PLANE);
-    thread_data->td->firstpass_ctx = NULL;
-    av1_free_shared_coeff_buffer(&thread_data->td->shared_coeff_buf);
-    av1_free_sms_tree(thread_data->td);
-    aom_free(thread_data->td);
-  }
-}
-
 void av1_remove_primary_compressor(AV1_PRIMARY *ppi) {
   if (!ppi) return;
 #if !CONFIG_REALTIME_ONLY
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 2c54ebe..f72e18d 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -1661,6 +1661,11 @@
    * Number of primary workers created for multi-threading.
    */
   int p_num_workers;
+
+  /*!
+   * Tracks the number of workers in encode stage multi-threading.
+   */
+  int prev_num_enc_workers;
 } PrimaryMultiThreadInfo;
 
 /*!
diff --git a/av1/encoder/encoder_alloc.h b/av1/encoder/encoder_alloc.h
index f0b3f20..0121242 100644
--- a/av1/encoder/encoder_alloc.h
+++ b/av1/encoder/encoder_alloc.h
@@ -403,6 +403,40 @@
   return &cpi->scaled_source;
 }
 
+// Deallocate allocated thread_data.
+static AOM_INLINE void free_thread_data(AV1_PRIMARY *ppi) {
+  PrimaryMultiThreadInfo *const p_mt_info = &ppi->p_mt_info;
+  for (int t = 1; t < p_mt_info->num_workers; ++t) {
+    EncWorkerData *const thread_data = &p_mt_info->tile_thr_data[t];
+    thread_data->td = thread_data->original_td;
+    aom_free(thread_data->td->tctx);
+    aom_free(thread_data->td->palette_buffer);
+    aom_free(thread_data->td->tmp_conv_dst);
+    release_compound_type_rd_buffers(&thread_data->td->comp_rd_buffer);
+    for (int j = 0; j < 2; ++j) {
+      aom_free(thread_data->td->tmp_pred_bufs[j]);
+    }
+    aom_free(thread_data->td->pixel_gradient_info);
+    aom_free(thread_data->td->src_var_info_of_4x4_sub_blocks);
+    release_obmc_buffers(&thread_data->td->obmc_buffer);
+    aom_free(thread_data->td->vt64x64);
+
+    for (int x = 0; x < 2; x++) {
+      for (int y = 0; y < 2; y++) {
+        aom_free(thread_data->td->hash_value_buffer[x][y]);
+        thread_data->td->hash_value_buffer[x][y] = NULL;
+      }
+    }
+    aom_free(thread_data->td->counts);
+    av1_free_pmc(thread_data->td->firstpass_ctx,
+                 ppi->seq_params.monochrome ? 1 : MAX_MB_PLANE);
+    thread_data->td->firstpass_ctx = NULL;
+    av1_free_shared_coeff_buffer(&thread_data->td->shared_coeff_buf);
+    av1_free_sms_tree(thread_data->td);
+    aom_free(thread_data->td);
+  }
+}
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 5aea9d3..fb17b80 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -775,6 +775,7 @@
 
   int num_workers = p_mt_info->num_workers;
   int num_enc_workers = av1_get_num_mod_workers_for_alloc(p_mt_info, MOD_ENC);
+  assert(num_enc_workers <= num_workers);
   for (int i = num_workers - 1; i >= 0; i--) {
     EncWorkerData *const thread_data = &p_mt_info->tile_thr_data[i];
 
@@ -884,6 +885,10 @@
       }
     }
   }
+
+  // Record the number of workers in encode stage multi-threading for which
+  // allocation is done.
+  p_mt_info->prev_num_enc_workers = num_enc_workers;
 }
 
 void av1_create_workers(AV1_PRIMARY *ppi, int num_workers) {
diff --git a/test/resize_test.cc b/test/resize_test.cc
index 135f362..4d74f64 100644
--- a/test/resize_test.cc
+++ b/test/resize_test.cc
@@ -97,10 +97,16 @@
 
 void ScaleForFrameNumber(unsigned int frame, unsigned int initial_w,
                          unsigned int initial_h, unsigned int *w,
-                         unsigned int *h, int flag_codec) {
+                         unsigned int *h, int flag_codec,
+                         bool change_start_resln_) {
   if (frame < 10) {
-    *w = initial_w;
-    *h = initial_h;
+    if (change_start_resln_) {
+      *w = initial_w / 4;
+      *h = initial_h / 4;
+    } else {
+      *w = initial_w;
+      *h = initial_h;
+    }
     return;
   }
   if (frame < 20) {
@@ -179,15 +185,25 @@
     limit_ = 150;
   }
   int flag_codec_;
+  bool change_start_resln_;
   virtual ~ResizingVideoSource() {}
 
  protected:
+  virtual void Begin() {
+    frame_ = 0;
+    unsigned int width;
+    unsigned int height;
+    ScaleForFrameNumber(frame_, kInitialWidth, kInitialHeight, &width, &height,
+                        flag_codec_, change_start_resln_);
+    SetSize(width, height);
+    FillFrame();
+  }
   virtual void Next() {
     ++frame_;
     unsigned int width;
     unsigned int height;
     ScaleForFrameNumber(frame_, kInitialWidth, kInitialHeight, &width, &height,
-                        flag_codec_);
+                        flag_codec_, change_start_resln_);
     SetSize(width, height);
     FillFrame();
   }
@@ -225,6 +241,7 @@
 TEST_P(ResizeTest, TestExternalResizeWorks) {
   ResizingVideoSource video;
   video.flag_codec_ = 0;
+  video.change_start_resln_ = false;
   cfg_.g_lag_in_frames = 0;
   // We use max(kInitialWidth, kInitialHeight) because during the test
   // the width and height of the frame are swapped
@@ -241,7 +258,7 @@
     unsigned int expected_w;
     unsigned int expected_h;
     ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w,
-                        &expected_h, 0);
+                        &expected_h, 0, video.change_start_resln_);
     EXPECT_EQ(expected_w, info->w)
         << "Frame " << frame << " had unexpected width";
     EXPECT_EQ(expected_h, info->h)
@@ -596,23 +613,30 @@
   mismatch_psnr_ = 0.0;
   mismatch_nframes_ = 0;
   DefaultConfig();
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  // Test external resizing with start resolution equal to
+  // 1. kInitialWidth and kInitialHeight
+  // 2. down-scaled kInitialWidth and kInitialHeight
+  for (int i = 0; i < 2; i++) {
+    video.change_start_resln_ = (bool)i;
 
-  // Check we decoded the same number of frames as we attempted to encode
-  ASSERT_EQ(frame_info_list_.size(), video.limit());
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
 
-  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
-       info != frame_info_list_.end(); ++info) {
-    const unsigned int frame = static_cast<unsigned>(info->pts);
-    unsigned int expected_w;
-    unsigned int expected_h;
-    ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w,
-                        &expected_h, 1);
-    EXPECT_EQ(expected_w, info->w)
-        << "Frame " << frame << " had unexpected width";
-    EXPECT_EQ(expected_h, info->h)
-        << "Frame " << frame << " had unexpected height";
-    EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
+    // Check we decoded the same number of frames as we attempted to encode
+    ASSERT_EQ(frame_info_list_.size(), video.limit());
+    for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
+         info != frame_info_list_.end(); ++info) {
+      const unsigned int frame = static_cast<unsigned>(info->pts);
+      unsigned int expected_w;
+      unsigned int expected_h;
+      ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w,
+                          &expected_h, 1, video.change_start_resln_);
+      EXPECT_EQ(expected_w, info->w)
+          << "Frame " << frame << " had unexpected width";
+      EXPECT_EQ(expected_h, info->h)
+          << "Frame " << frame << " had unexpected height";
+      EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
+    }
+    frame_info_list_.clear();
   }
 }