Add unittest for encoder row based multi-threading

Added row-mt/row_mt as a command-line/init-time API parameter
in the encoder. Currently, encoding with row-mt=1 has only tile
level multi-threading. Support for row level multi-threading of
the encoding stage will be added eventually.

Change-Id: I67450dda05d1a1c32131d0998dd6546e147b681d
diff --git a/aom/aomcx.h b/aom/aomcx.h
index 07e9287..0d6ce7d 100644
--- a/aom/aomcx.h
+++ b/aom/aomcx.h
@@ -282,6 +282,12 @@
    */
   AV1E_SET_LOSSLESS = AV1E_SET_GF_CBR_BOOST_PCT + 2,
 
+  /** control function to enable the row based multi-threading of encoder. A
+   * value that is equal to 1 indicates that row based multi-threading is
+   * enabled.
+   */
+  AV1E_SET_ROW_MT,
+
   /*!\brief Codec control function to set number of tile columns.
    *
    * In encoding and decoding, AV1 allows an input image frame be partitioned
@@ -989,6 +995,9 @@
 AOM_CTRL_USE_TYPE(AOME_SET_CQ_LEVEL, unsigned int)
 #define AOM_CTRL_AOME_SET_CQ_LEVEL
 
+AOM_CTRL_USE_TYPE(AV1E_SET_ROW_MT, int)
+#define AOM_CTRL_AV1E_SET_ROW_MT
+
 AOM_CTRL_USE_TYPE(AV1E_SET_TILE_COLUMNS, int)
 #define AOM_CTRL_AV1E_SET_TILE_COLUMNS
 AOM_CTRL_USE_TYPE(AV1E_SET_TILE_ROWS, int)
diff --git a/apps/aomenc.c b/apps/aomenc.c
index 7abed80..7518e3a 100644
--- a/apps/aomenc.c
+++ b/apps/aomenc.c
@@ -414,6 +414,9 @@
 static const arg_def_t single_tile_decoding =
     ARG_DEF(NULL, "single-tile-decoding", 1,
             "Single tile decoding (0: off (default), 1: on)");
+static const arg_def_t rowmtarg =
+    ARG_DEF(NULL, "row-mt", 1,
+            "Enable row based multi-threading (0: off (default), 1: on)");
 static const arg_def_t tile_cols =
     ARG_DEF(NULL, "tile-columns", 1, "Number of tile columns to use, log2");
 static const arg_def_t tile_rows =
@@ -621,6 +624,7 @@
                                        &sharpness,
                                        &static_thresh,
                                        &single_tile_decoding,
+                                       &rowmtarg,
                                        &tile_cols,
                                        &tile_rows,
                                        &arnr_maxframes,
@@ -676,6 +680,7 @@
                                         AOME_SET_SHARPNESS,
                                         AOME_SET_STATIC_THRESHOLD,
                                         AV1E_SET_SINGLE_TILE_DECODING,
+                                        AV1E_SET_ROW_MT,
                                         AV1E_SET_TILE_COLUMNS,
                                         AV1E_SET_TILE_ROWS,
                                         AOME_SET_ARNR_MAXFRAMES,
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 0dd7c5e..12831f4 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -36,6 +36,7 @@
   unsigned int noise_sensitivity;
   unsigned int sharpness;
   unsigned int static_thresh;
+  unsigned int row_mt;
   unsigned int tile_columns;  // log2 number of tile columns
   unsigned int tile_rows;     // log2 number of tile rows
   unsigned int arnr_max_frames;
@@ -107,6 +108,7 @@
   0,                 // noise_sensitivity
   0,                 // sharpness
   0,                 // static_thresh
+  0,                 // row_mt
   0,                 // tile_columns
   0,                 // tile_rows
   7,                 // arnr_max_frames
@@ -286,6 +288,8 @@
   RANGE_CHECK_HI(cfg, large_scale_tile, 1);
   RANGE_CHECK_HI(extra_cfg, single_tile_decoding, 1);
 
+  RANGE_CHECK_HI(extra_cfg, row_mt, 1);
+
   RANGE_CHECK_HI(extra_cfg, tile_columns, 6);
   RANGE_CHECK_HI(extra_cfg, tile_rows, 6);
 
@@ -631,6 +635,8 @@
       oxcf->superblock_size = AOM_SUPERBLOCK_SIZE_64X64;
   }
 
+  oxcf->row_mt = extra_cfg->row_mt;
+
   oxcf->tile_columns = extra_cfg->tile_columns;
   oxcf->tile_rows = extra_cfg->tile_rows;
 
@@ -794,6 +800,13 @@
   return update_extra_cfg(ctx, &extra_cfg);
 }
 
+static aom_codec_err_t ctrl_set_row_mt(aom_codec_alg_priv_t *ctx,
+                                       va_list args) {
+  struct av1_extracfg extra_cfg = ctx->extra_cfg;
+  extra_cfg.row_mt = CAST(AV1E_SET_ROW_MT, args);
+  return update_extra_cfg(ctx, &extra_cfg);
+}
+
 static aom_codec_err_t ctrl_set_tile_columns(aom_codec_alg_priv_t *ctx,
                                              va_list args) {
   struct av1_extracfg extra_cfg = ctx->extra_cfg;
@@ -1673,6 +1686,7 @@
   { AOME_SET_ENABLEAUTOBWDREF, ctrl_set_enable_auto_bwd_ref },
   { AOME_SET_SHARPNESS, ctrl_set_sharpness },
   { AOME_SET_STATIC_THRESHOLD, ctrl_set_static_thresh },
+  { AV1E_SET_ROW_MT, ctrl_set_row_mt },
   { AV1E_SET_TILE_COLUMNS, ctrl_set_tile_columns },
   { AV1E_SET_TILE_ROWS, ctrl_set_tile_rows },
   { AOME_SET_ARNR_MAXFRAMES, ctrl_set_arnr_max_frames },
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index fd87ac1..3f28a52 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -5172,7 +5172,9 @@
     }
 #endif
 
-    if (AOMMIN(cpi->oxcf.max_threads, cm->tile_cols * cm->tile_rows) > 1)
+    if (cpi->row_mt && (cpi->oxcf.max_threads > 1))
+      av1_encode_tiles_mt(cpi);
+    else if (AOMMIN(cpi->oxcf.max_threads, cm->tile_cols * cm->tile_rows) > 1)
       av1_encode_tiles_mt(cpi);
     else
       encode_tiles(cpi);
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 13fba0b..0be7699 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -2321,6 +2321,7 @@
 
   cpi->oxcf = *oxcf;
   cpi->common.options = oxcf->cfg;
+  cpi->row_mt = oxcf->row_mt;
   x->e_mbd.bd = (int)seq_params->bit_depth;
   x->e_mbd.global_motion = cm->global_motion;
 
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 4fa6d66..621e0e3 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -248,6 +248,7 @@
   int min_gf_interval;
   int max_gf_interval;
 
+  int row_mt;
   int tile_columns;
   int tile_rows;
   int tile_width_count;
@@ -539,6 +540,7 @@
   int previous_index;
   int cur_poc;  // DebugInfo
 
+  unsigned int row_mt;
   int scaled_ref_idx[REF_FRAMES];
   int ref_fb_idx[REF_FRAMES];
   int refresh_fb_idx;  // ref frame buffer index to refresh
diff --git a/test/ethread_test.cc b/test/ethread_test.cc
index dd9fc2f..341c179 100644
--- a/test/ethread_test.cc
+++ b/test/ethread_test.cc
@@ -72,6 +72,7 @@
     if (!encoder_initialized_) {
       SetTileSize(encoder);
       encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
+      encoder->Control(AV1E_SET_ROW_MT, row_mt_);
       if (encoding_mode_ != ::libaom_test::kRealTime) {
         encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
         encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
@@ -119,6 +120,7 @@
     cfg_.rc_target_bitrate = 1000;
 
     // Encode using single thread.
+    row_mt_ = 0;
     cfg_.g_threads = 1;
     init_flags_ = AOM_CODEC_USE_PSNR;
     ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
@@ -149,6 +151,54 @@
     ASSERT_EQ(single_thr_size_enc, multi_thr_size_enc);
     ASSERT_EQ(single_thr_md5_enc, multi_thr_md5_enc);
     ASSERT_EQ(single_thr_md5_dec, multi_thr_md5_dec);
+
+    // Encode using multiple threads row-mt enabled.
+    row_mt_ = 1;
+    cfg_.g_threads = 2;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    std::vector<size_t> multi_thr2_row_mt_size_enc;
+    std::vector<std::string> multi_thr2_row_mt_md5_enc;
+    std::vector<std::string> multi_thr2_row_mt_md5_dec;
+    multi_thr2_row_mt_size_enc = size_enc_;
+    multi_thr2_row_mt_md5_enc = md5_enc_;
+    multi_thr2_row_mt_md5_dec = md5_dec_;
+    size_enc_.clear();
+    md5_enc_.clear();
+    md5_dec_.clear();
+
+    cfg_.g_threads = 3;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    std::vector<size_t> multi_thr3_row_mt_size_enc;
+    std::vector<std::string> multi_thr3_row_mt_md5_enc;
+    std::vector<std::string> multi_thr3_row_mt_md5_dec;
+    multi_thr3_row_mt_size_enc = size_enc_;
+    multi_thr3_row_mt_md5_enc = md5_enc_;
+    multi_thr3_row_mt_md5_dec = md5_dec_;
+    size_enc_.clear();
+    md5_enc_.clear();
+    md5_dec_.clear();
+
+    // Check that the vectors are equal.
+    ASSERT_EQ(multi_thr3_row_mt_size_enc, multi_thr2_row_mt_size_enc);
+    ASSERT_EQ(multi_thr3_row_mt_md5_enc, multi_thr2_row_mt_md5_enc);
+    ASSERT_EQ(multi_thr3_row_mt_md5_dec, multi_thr2_row_mt_md5_dec);
+
+    cfg_.g_threads = 4;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    std::vector<size_t> multi_thr4_row_mt_size_enc;
+    std::vector<std::string> multi_thr4_row_mt_md5_enc;
+    std::vector<std::string> multi_thr4_row_mt_md5_dec;
+    multi_thr4_row_mt_size_enc = size_enc_;
+    multi_thr4_row_mt_md5_enc = md5_enc_;
+    multi_thr4_row_mt_md5_dec = md5_dec_;
+    size_enc_.clear();
+    md5_enc_.clear();
+    md5_dec_.clear();
+
+    // Check that the vectors are equal.
+    ASSERT_EQ(multi_thr4_row_mt_size_enc, multi_thr2_row_mt_size_enc);
+    ASSERT_EQ(multi_thr4_row_mt_md5_enc, multi_thr2_row_mt_md5_enc);
+    ASSERT_EQ(multi_thr4_row_mt_md5_dec, multi_thr2_row_mt_md5_dec);
   }
 
   bool encoder_initialized_;
@@ -156,6 +206,7 @@
   int set_cpu_used_;
   int tile_cols_;
   int tile_rows_;
+  int row_mt_;
   ::libaom_test::Decoder *decoder_;
   std::vector<size_t> size_enc_;
   std::vector<std::string> md5_enc_;
@@ -180,13 +231,14 @@
 AV1_INSTANTIATE_TEST_CASE(AVxEncoderThreadTest,
                           ::testing::Values(::libaom_test::kTwoPassGood,
                                             ::libaom_test::kOnePassGood),
-                          ::testing::Range(2, 4), ::testing::Values(1, 2),
-                          ::testing::Values(0, 1));
+                          ::testing::Values(1, 3), ::testing::Values(0, 1, 2),
+                          ::testing::Values(0, 1, 2));
 
 AV1_INSTANTIATE_TEST_CASE(AVxEncoderThreadTestLarge,
                           ::testing::Values(::libaom_test::kTwoPassGood,
                                             ::libaom_test::kOnePassGood),
-                          ::testing::Range(0, 2), ::testing::Values(0, 1, 2, 6),
+                          ::testing::Values(0, 2),
+                          ::testing::Values(0, 1, 2, 6),
                           ::testing::Values(0, 1, 2, 6));
 
 class AVxEncoderThreadLSTest : public AVxEncoderThreadTest {
@@ -215,11 +267,12 @@
 AV1_INSTANTIATE_TEST_CASE(AVxEncoderThreadLSTest,
                           ::testing::Values(::libaom_test::kTwoPassGood,
                                             ::libaom_test::kOnePassGood),
-                          ::testing::Range(2, 4), ::testing::Values(6),
+                          ::testing::Values(1, 3), ::testing::Values(0, 6),
                           ::testing::Values(0, 6));
+
 AV1_INSTANTIATE_TEST_CASE(AVxEncoderThreadLSTestLarge,
                           ::testing::Values(::libaom_test::kTwoPassGood,
                                             ::libaom_test::kOnePassGood),
-                          ::testing::Range(0, 2), ::testing::Values(6),
+                          ::testing::Range(0, 2), ::testing::Values(0, 6),
                           ::testing::Values(0, 6));
 }  // namespace