Set sb_size=64 for screen real-time multi-thread

For screen real-time mode: set sb_size=64 for
dynamic (default) mode for mult-thread encode
with low number of 128x128 superblocks per tile.
For resolutions > 720p and  #threads >= 4.

This has ~8-13% speedup (fps) on rtc_screen for
4 and 8 threads.

Change-Id: I1427dedca4263882c56ef914530d03b726be6e7c
diff --git a/av1/encoder/encoder_utils.c b/av1/encoder/encoder_utils.c
index c6bff2e..e2240cd 100644
--- a/av1/encoder/encoder_utils.c
+++ b/av1/encoder/encoder_utils.c
@@ -803,10 +803,21 @@
                ? BLOCK_128X128
                : BLOCK_64X64;
   } else if (oxcf->mode == REALTIME) {
-    if (oxcf->tune_cfg.content == AOM_CONTENT_SCREEN)
-      return AOMMIN(width, height) >= 720 ? BLOCK_128X128 : BLOCK_64X64;
-    else
+    if (oxcf->tune_cfg.content == AOM_CONTENT_SCREEN) {
+      const TileConfig *const tile_cfg = &oxcf->tile_cfg;
+      const int num_tiles =
+          (1 << tile_cfg->tile_columns) * (1 << tile_cfg->tile_rows);
+      // For multi-thread encode: if the number of (128x128) superbllocks
+      // per tile is low use 64X64 superblock.
+      if (oxcf->row_mt == 1 && oxcf->max_threads >= 4 &&
+          oxcf->max_threads >= num_tiles && AOMMIN(width, height) > 720 &&
+          (width * height) / (128 * 128 * num_tiles) <= 32)
+        return BLOCK_64X64;
+      else
+        return AOMMIN(width, height) >= 720 ? BLOCK_128X128 : BLOCK_64X64;
+    } else {
       return AOMMIN(width, height) > 720 ? BLOCK_128X128 : BLOCK_64X64;
+    }
   }
 
   // TODO(any): Possibly could improve this with a heuristic.