row_mt enc: Delay top-right sync when intraBC is enabled

This patch introduces an extra top-right delay when the intraBC
tool is enabled so as to ensure that a sufficient number of
superblocks have finished encoding in the previous rows before
starting to encode the current superblock. This ensures that
row-multithreaded encoding respects the hardware constraints on
parallel processing with the intraBC tool and thus prevents any
invalid access while building the predictor. Also, the unit test
ScreenContentToolsMultiThreadTestLarge.ScreenContentToolsTest is
re-introduced with thread sanitizer as the data race issue is
resolved with this change.

BUG=aomedia:3278

Change-Id: I042434c69d4d8db784a99e6d0ae6241f2b538b49
diff --git a/av1/common/thread_common.h b/av1/common/thread_common.h
index 7cbae33..1c39b20 100644
--- a/av1/common/thread_common.h
+++ b/av1/common/thread_common.h
@@ -22,6 +22,9 @@
 extern "C" {
 #endif
 
+#define INTRABC_ROW_MT_TOP_RIGHT_SB128_DELAY 3
+#define INTRABC_ROW_MT_TOP_RIGHT_SB64_DELAY 5
+
 struct AV1Common;
 
 typedef struct AV1LfMTInfo {
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 00b028e..b12bb5d 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -1368,12 +1368,20 @@
    */
   int *num_finished_cols;
   /*!
-   * Number of extra superblocks of the top row to be complete for encoding
-   * of the current superblock to start. A value of 1 indicates top-right
-   * dependency.
+   * Denotes the superblock interval at which conditional signalling should
+   * happen. Also denotes the minimum number of extra superblocks of the top row
+   * to be complete to start encoding the current superblock. A value of 1
+   * indicates top-right dependency.
    */
   int sync_range;
   /*!
+   * Denotes the additional number of superblocks in the previous row to be
+   * complete to start encoding the current superblock when intraBC tool is
+   * enabled. This additional top-right delay is required to satisfy the
+   * hardware constraints for intraBC tool when row multithreading is enabled.
+   */
+  int intrabc_extra_top_right_sb_delay;
+  /*!
    * Number of superblock rows.
    */
   int rows;
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 7af9a35..1ca2681 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -118,7 +118,8 @@
     pthread_mutex_t *const mutex = &row_mt_sync->mutex_[r - 1];
     pthread_mutex_lock(mutex);
 
-    while (c > row_mt_sync->num_finished_cols[r - 1] - nsync) {
+    while (c > row_mt_sync->num_finished_cols[r - 1] - nsync -
+                   row_mt_sync->intrabc_extra_top_right_sb_delay) {
       pthread_cond_wait(&row_mt_sync->cond_[r - 1], mutex);
     }
     pthread_mutex_unlock(mutex);
@@ -142,7 +143,7 @@
     cur = c;
     if (c % nsync) sig = 0;
   } else {
-    cur = cols + nsync;
+    cur = cols + nsync + row_mt_sync->intrabc_extra_top_right_sb_delay;
   }
 
   if (sig) {
@@ -1508,6 +1509,18 @@
 }
 #endif
 
+static AOM_INLINE int get_intrabc_extra_top_right_sb_delay(
+    const AV1_COMMON *cm) {
+  // No additional top-right delay when intraBC tool is not enabled.
+  if (!av1_allow_intrabc(cm)) return 0;
+  // A minimum top-right delay of 1 superblock is assured with 'sync_range'.
+  // Hence, return only the additional superblock delay with intraBC tool.
+  return (cm->seq_params->sb_size == BLOCK_128X128
+              ? INTRABC_ROW_MT_TOP_RIGHT_SB128_DELAY
+              : INTRABC_ROW_MT_TOP_RIGHT_SB64_DELAY) -
+         1;
+}
+
 void av1_encode_tiles_row_mt(AV1_COMP *cpi) {
   AV1_COMMON *const cm = &cpi->common;
   MultiThreadInfo *const mt_info = &cpi->mt_info;
@@ -1552,6 +1565,8 @@
              sizeof(*row_mt_sync->num_finished_cols) * max_sb_rows);
       row_mt_sync->next_mi_row = this_tile->tile_info.mi_row_start;
       row_mt_sync->num_threads_working = 0;
+      row_mt_sync->intrabc_extra_top_right_sb_delay =
+          get_intrabc_extra_top_right_sb_delay(cm);
 
       av1_inter_mode_data_init(this_tile);
       av1_zero_above_context(cm, &cpi->td.mb.e_mbd,
@@ -1622,6 +1637,10 @@
              sizeof(*row_mt_sync->num_finished_cols) * max_mb_rows);
       row_mt_sync->next_mi_row = this_tile->tile_info.mi_row_start;
       row_mt_sync->num_threads_working = 0;
+
+      // intraBC mode is not evaluated during first-pass encoding. Hence, no
+      // additional top-right delay is required.
+      row_mt_sync->intrabc_extra_top_right_sb_delay = 0;
     }
   }
 
diff --git a/test/screen_content_test.cc b/test/screen_content_test.cc
index acdee6b..4d3e09a 100644
--- a/test/screen_content_test.cc
+++ b/test/screen_content_test.cc
@@ -115,17 +115,6 @@
     : public ScreenContentToolsTestLarge {};
 
 TEST_P(ScreenContentToolsMultiThreadTestLarge, ScreenContentToolsTest) {
-  // TODO(aomedia:3278): This test is known to have data races. Do not run the
-  // test under ThreadSanitizer.
-#if defined(__has_feature)
-#if __has_feature(thread_sanitizer)
-  GTEST_SKIP()
-      << "Skipping the test under ThreadSanitizer. See bug aomedia:3278.";
-#endif
-#elif defined(__SANITIZE_THREAD__)
-  GTEST_SKIP()
-      << "Skipping the test under ThreadSanitizer. See bug aomedia:3278.";
-#endif
   // Don't force screen content, however as the input is screen content
   // allow_screen_content_tools should still be turned on even with
   // multi-threaded encoding.