Add unit test for decoder row based multi-threading

Added row-mt/row_mt as a command-line/init-time API parameter
in the decoder. Currently, decoding with row-mt=1 has only tile
level multi-threading. Support for row level multi-threading of
the decoding stage will be added eventually.

Change-Id: I429540c3d9fe882a3666d84fb24c85921a0d0ecf
diff --git a/aom/aomdx.h b/aom/aomdx.h
index 7ff21a5..5a6da21 100644
--- a/aom/aomdx.h
+++ b/aom/aomdx.h
@@ -187,6 +187,12 @@
    */
   AV1D_EXT_TILE_DEBUG,
 
+  /** control function to enable the row based multi-threading of decoding. A
+   * value that is equal to 1 indicates that row based multi-threading is
+   * enabled.
+   */
+  AV1D_SET_ROW_MT,
+
   /** control function to indicate whether bitstream is in Annex-B format. */
   AV1D_SET_IS_ANNEXB,
 
@@ -258,6 +264,8 @@
 #define AOM_CTRL_AV1D_SET_EXT_REF_PTR
 AOM_CTRL_USE_TYPE(AV1D_EXT_TILE_DEBUG, unsigned int)
 #define AOM_CTRL_AV1D_EXT_TILE_DEBUG
+AOM_CTRL_USE_TYPE(AV1D_SET_ROW_MT, unsigned int)
+#define AOM_CTRL_AV1D_SET_ROW_MT
 AOM_CTRL_USE_TYPE(AV1D_SET_IS_ANNEXB, unsigned int)
 #define AOM_CTRL_AV1D_SET_IS_ANNEXB
 AOM_CTRL_USE_TYPE(AV1D_SET_OPERATING_POINT, int)
diff --git a/apps/aomdec.c b/apps/aomdec.c
index 6c4d724..834f161 100644
--- a/apps/aomdec.c
+++ b/apps/aomdec.c
@@ -83,6 +83,8 @@
     ARG_DEF("o", "output", 1, "Output file name pattern (see below)");
 static const arg_def_t threadsarg =
     ARG_DEF("t", "threads", 1, "Max threads to use");
+static const arg_def_t rowmtarg =
+    ARG_DEF(NULL, "row-mt", 1, "Enable row based multi-threading");
 static const arg_def_t verbosearg =
     ARG_DEF("v", "verbose", 0, "Show version string");
 static const arg_def_t scalearg =
@@ -114,12 +116,12 @@
     NULL, "all-layers", 0, "Output all decoded frames of a scalable bitstream");
 
 static const arg_def_t *all_args[] = {
-  &help,           &codecarg,   &use_yv12,    &use_i420,      &flipuvarg,
-  &rawvideo,       &noblitarg,  &progressarg, &limitarg,      &skiparg,
-  &postprocarg,    &summaryarg, &outputfile,  &threadsarg,    &verbosearg,
-  &scalearg,       &fb_arg,     &md5arg,      &framestatsarg, &continuearg,
-  &outbitdeptharg, &tilem,      &tiler,       &tilec,         &isannexb,
-  &oppointarg,     &outallarg,  NULL
+  &help,        &codecarg,       &use_yv12,    &use_i420,   &flipuvarg,
+  &rawvideo,    &noblitarg,      &progressarg, &limitarg,   &skiparg,
+  &postprocarg, &summaryarg,     &outputfile,  &threadsarg, &rowmtarg,
+  &verbosearg,  &scalearg,       &fb_arg,      &md5arg,     &framestatsarg,
+  &continuearg, &outbitdeptharg, &tilem,       &tiler,      &tilec,
+  &isannexb,    &oppointarg,     &outallarg,   NULL
 };
 
 #if CONFIG_LIBYUV
@@ -512,6 +514,7 @@
   int do_scale = 0;
   int operating_point = 0;
   int output_all_layers = 0;
+  unsigned int row_mt = 0;
   aom_image_t *scaled_img = NULL;
   aom_image_t *img_shifted = NULL;
   int frame_avail, got_data, flush_decoder = 0;
@@ -601,6 +604,8 @@
       summary = 1;
     } else if (arg_match(&arg, &threadsarg, argi)) {
       cfg.threads = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &rowmtarg, argi)) {
+      row_mt = arg_parse_uint(&arg);
     } else if (arg_match(&arg, &verbosearg, argi)) {
       quiet = 0;
     } else if (arg_match(&arg, &scalearg, argi)) {
@@ -763,6 +768,11 @@
             aom_codec_error(&decoder));
     goto fail;
   }
+
+  if (aom_codec_control(&decoder, AV1D_SET_ROW_MT, row_mt)) {
+    fprintf(stderr, "Failed to set row_mt: %s\n", aom_codec_error(&decoder));
+    goto fail;
+  }
 #endif
 
   if (arg_skip) fprintf(stderr, "Skipping first %d frames.\n", arg_skip);
diff --git a/av1/av1_dx_iface.c b/av1/av1_dx_iface.c
index db338f7..f0862da 100644
--- a/av1/av1_dx_iface.c
+++ b/av1/av1_dx_iface.c
@@ -50,6 +50,7 @@
   int decode_tile_col;
   unsigned int tile_mode;
   unsigned int ext_tile_debug;
+  unsigned int row_mt;
   EXTERNAL_REFERENCES ext_refs;
   unsigned int is_annexb;
   int operating_point;
@@ -429,6 +430,7 @@
     frame_worker_data->pbi->operating_point = ctx->operating_point;
     frame_worker_data->pbi->output_all_layers = ctx->output_all_layers;
     frame_worker_data->pbi->ext_tile_debug = ctx->ext_tile_debug;
+    frame_worker_data->pbi->row_mt = ctx->row_mt;
 
     worker->hook = (AVxWorkerHook)frame_worker_hook;
     if (!winterface->reset(worker)) {
@@ -489,6 +491,7 @@
   frame_worker_data->pbi->dec_tile_row = ctx->decode_tile_row;
   frame_worker_data->pbi->dec_tile_col = ctx->decode_tile_col;
   frame_worker_data->pbi->ext_tile_debug = ctx->ext_tile_debug;
+  frame_worker_data->pbi->row_mt = ctx->row_mt;
   frame_worker_data->pbi->ext_refs = ctx->ext_refs;
 
   frame_worker_data->pbi->common.is_annexb = ctx->is_annexb;
@@ -1124,6 +1127,12 @@
   return AOM_CODEC_OK;
 }
 
+static aom_codec_err_t ctrl_set_row_mt(aom_codec_alg_priv_t *ctx,
+                                       va_list args) {
+  ctx->row_mt = va_arg(args, unsigned int);
+  return AOM_CODEC_OK;
+}
+
 static aom_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
   { AV1_COPY_REFERENCE, ctrl_copy_reference },
 
@@ -1145,6 +1154,7 @@
   { AV1D_SET_OUTPUT_ALL_LAYERS, ctrl_set_output_all_layers },
   { AV1_SET_INSPECTION_CALLBACK, ctrl_set_inspection_callback },
   { AV1D_EXT_TILE_DEBUG, ctrl_ext_tile_debug },
+  { AV1D_SET_ROW_MT, ctrl_set_row_mt },
   { AV1D_SET_EXT_REF_PTR, ctrl_set_ext_ref_ptr },
 
   // Getters
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 8de2b47..3ae2743 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -4386,8 +4386,11 @@
 
   if (initialize_flag) setup_frame_info(pbi);
 
-  if (pbi->max_threads > 1 && tile_count_tg > 1 &&
-      !(cm->large_scale_tile && !pbi->ext_tile_debug))
+  if (pbi->max_threads > 1 && !(cm->large_scale_tile && !pbi->ext_tile_debug) &&
+      pbi->row_mt)
+    *p_data_end = decode_tiles_mt(pbi, data, data_end, start_tile, end_tile);
+  else if (pbi->max_threads > 1 && tile_count_tg > 1 &&
+           !(cm->large_scale_tile && !pbi->ext_tile_debug))
     *p_data_end = decode_tiles_mt(pbi, data, data_end, start_tile, end_tile);
   else
     *p_data_end = decode_tiles(pbi, data, data_end, start_tile, end_tile);
diff --git a/av1/decoder/decoder.h b/av1/decoder/decoder.h
index 42fcc12..3056cda 100644
--- a/av1/decoder/decoder.h
+++ b/av1/decoder/decoder.h
@@ -162,6 +162,7 @@
   int tile_count_minus_1;
   uint32_t coded_tile_data_size;
   unsigned int ext_tile_debug;  // for ext-tile software debug & testing
+  unsigned int row_mt;
   EXTERNAL_REFERENCES ext_refs;
   size_t tile_list_size;
   uint8_t *tile_list_output;
diff --git a/test/codec_factory.h b/test/codec_factory.h
index 65b7609..e6ae7f8 100644
--- a/test/codec_factory.h
+++ b/test/codec_factory.h
@@ -71,6 +71,11 @@
     : public ::testing::TestWithParam< ::testing::tuple<
           const libaom_test::CodecFactory *, T1, T2, T3, T4> > {};
 
+template <class T1, class T2, class T3, class T4, class T5>
+class CodecTestWith5Params
+    : public ::testing::TestWithParam< ::testing::tuple<
+          const libaom_test::CodecFactory *, T1, T2, T3, T4, T5> > {};
+
 /*
  * AV1 Codec Definitions
  */
diff --git a/test/decode_multithreaded_test.cc b/test/decode_multithreaded_test.cc
index cde612d..cea1d14 100644
--- a/test/decode_multithreaded_test.cc
+++ b/test/decode_multithreaded_test.cc
@@ -26,13 +26,14 @@
 static const int kNumMultiThreadDecoders = 3;
 
 class AV1DecodeMultiThreadedTest
-    : public ::libaom_test::CodecTestWith4Params<int, int, int, int>,
+    : public ::libaom_test::CodecTestWith5Params<int, int, int, int, int>,
       public ::libaom_test::EncoderTest {
  protected:
   AV1DecodeMultiThreadedTest()
       : EncoderTest(GET_PARAM(0)), md5_single_thread_(), md5_multi_thread_(),
         n_tile_cols_(GET_PARAM(1)), n_tile_rows_(GET_PARAM(2)),
-        n_tile_groups_(GET_PARAM(3)), set_cpu_used_(GET_PARAM(4)) {
+        n_tile_groups_(GET_PARAM(3)), set_cpu_used_(GET_PARAM(4)),
+        row_mt_(GET_PARAM(5)) {
     init_flags_ = AOM_CODEC_USE_PSNR;
     aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
     cfg.w = 704;
@@ -45,6 +46,7 @@
     for (int i = 0; i < kNumMultiThreadDecoders; ++i) {
       cfg.threads <<= 1;
       multi_thread_dec_[i] = codec_->CreateDecoder(cfg, 0);
+      multi_thread_dec_[i]->Control(AV1D_SET_ROW_MT, row_mt_);
     }
 
     if (single_thread_dec_->IsAV1()) {
@@ -130,6 +132,7 @@
   int n_tile_rows_;
   int n_tile_groups_;
   int set_cpu_used_;
+  int row_mt_;
 };
 
 // run an encode and do the decode both in single thread
@@ -156,11 +159,12 @@
 // TODO(ranjit): More tests have to be added using pre-generated MD5.
 AV1_INSTANTIATE_TEST_CASE(AV1DecodeMultiThreadedTest, ::testing::Values(1, 2),
                           ::testing::Values(1, 2), ::testing::Values(1),
-                          ::testing::Values(3));
+                          ::testing::Values(3), ::testing::Values(0, 1));
 AV1_INSTANTIATE_TEST_CASE(AV1DecodeMultiThreadedTestLarge,
                           ::testing::Values(0, 1, 2, 6),
                           ::testing::Values(0, 1, 2, 6),
-                          ::testing::Values(1, 4), ::testing::Values(0));
+                          ::testing::Values(1, 4), ::testing::Values(0),
+                          ::testing::Values(0, 1));
 
 class AV1DecodeMultiThreadedLSTestLarge
     : public AV1DecodeMultiThreadedTestLarge {};
@@ -175,6 +179,7 @@
 
 AV1_INSTANTIATE_TEST_CASE(AV1DecodeMultiThreadedLSTestLarge,
                           ::testing::Values(6), ::testing::Values(6),
-                          ::testing::Values(1), ::testing::Values(0, 3));
+                          ::testing::Values(1), ::testing::Values(0, 3),
+                          ::testing::Values(0, 1));
 
 }  // namespace
diff --git a/test/test_vector_test.cc b/test/test_vector_test.cc
index 2997e8f..733b925 100644
--- a/test/test_vector_test.cc
+++ b/test/test_vector_test.cc
@@ -30,8 +30,9 @@
 
 const int kThreads = 0;
 const int kFileName = 1;
+const int kRowMT = 2;
 
-typedef ::testing::tuple<int, const char *> DecodeParam;
+typedef ::testing::tuple<int, const char *, int> DecodeParam;
 
 class TestVectorTest : public ::libaom_test::DecoderTest,
                        public ::libaom_test::CodecTestWithParam<DecodeParam> {
@@ -48,6 +49,12 @@
         << "Md5 file open failed. Filename: " << md5_file_name_;
   }
 
+  virtual void PreDecodeFrameHook(
+      const libaom_test::CompressedVideoSource &video,
+      libaom_test::Decoder *decoder) {
+    if (video.frame_number() == 0) decoder->Control(AV1D_SET_ROW_MT, row_mt_);
+  }
+
   virtual void DecompressedFrameHook(const aom_image_t &img,
                                      const unsigned int frame_number) {
     ASSERT_TRUE(md5_file_ != NULL);
@@ -84,6 +91,8 @@
         << "Md5 checksums don't match: frame number = " << frame_number;
   }
 
+  unsigned int row_mt_;
+
  private:
   FILE *md5_file_;
 };
@@ -100,6 +109,7 @@
   char str[256];
 
   cfg.threads = ::testing::get<kThreads>(input);
+  row_mt_ = ::testing::get<kRowMT>(input);
 
   snprintf(str, sizeof(str) / sizeof(str[0]) - 1, "file: %s threads: %d",
            filename.c_str(), cfg.threads);
@@ -140,11 +150,11 @@
 #if CONFIG_AV1_DECODER
 AV1_INSTANTIATE_TEST_CASE(
     TestVectorTest,
-    ::testing::Combine(
-        ::testing::Values(1),  // Single thread.
-        ::testing::ValuesIn(libaom_test::kAV1TestVectors,
-                            libaom_test::kAV1TestVectors +
-                                libaom_test::kNumAV1TestVectors)));
+    ::testing::Combine(::testing::Values(1),  // Single thread.
+                       ::testing::ValuesIn(libaom_test::kAV1TestVectors,
+                                           libaom_test::kAV1TestVectors +
+                                               libaom_test::kNumAV1TestVectors),
+                       ::testing::Values(0)));
 
 // Test AV1 decode in with different numbers of threads.
 INSTANTIATE_TEST_CASE_P(
@@ -156,7 +166,8 @@
             ::testing::Range(2, 9),  // With 2 ~ 8 threads.
             ::testing::ValuesIn(libaom_test::kAV1TestVectors,
                                 libaom_test::kAV1TestVectors +
-                                    libaom_test::kNumAV1TestVectors))));
+                                    libaom_test::kNumAV1TestVectors),
+            ::testing::Range(0, 2))));
 
 #endif  // CONFIG_AV1_DECODER