Enable tile-row based multi-threading in Encoder

Multi-threading scaling of encoder ~ 2.9x for 4 threads (parkrun_720p50,
with 2 tile-columns and 2 tile-rows).

Change-Id: I6834bdb723e0d376409120023acae69d8dcdf69f
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 27ca537..ba44cda 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -4943,11 +4943,7 @@
     av1_inter_mode_data_init();
 #endif
 
-    // If allowed, encoding tiles in parallel with one thread handling one tile.
-    // TODO(geza.lore): The multi-threaded encoder is not safe with more than
-    // 1 tile rows, as it uses the single above_context et al arrays from
-    // cpi->common
-    if (AOMMIN(cpi->oxcf.max_threads, cm->tile_cols) > 1 && cm->tile_rows == 1)
+    if (AOMMIN(cpi->oxcf.max_threads, cm->tile_cols * cm->tile_rows) > 1)
       av1_encode_tiles_mt(cpi);
     else
       encode_tiles(cpi);
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 637d682..6d9c8a8 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -50,8 +50,9 @@
 void av1_encode_tiles_mt(AV1_COMP *cpi) {
   AV1_COMMON *const cm = &cpi->common;
   const int tile_cols = cm->tile_cols;
+  const int tile_rows = cm->tile_rows;
   const AVxWorkerInterface *const winterface = aom_get_worker_interface();
-  int num_workers = AOMMIN(cpi->oxcf.max_threads, tile_cols);
+  int num_workers = AOMMIN(cpi->oxcf.max_threads, tile_cols * tile_rows);
   int i;
 
   av1_init_tile_data(cpi);