Fix enc multi-threading mismatch in screen content

AV1 encoder multi-threading mismatch for --tune-content=screen
has been fixed.

BUG= aomedia:2118

Change-Id: I971b382f6b58b98e67fff78dbdcc1e6d6879aa2b
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 93f6205..cbb23eb 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -255,6 +255,16 @@
 
   PALETTE_BUFFER *palette_buffer;
 
+  // buffer for hash value calculation of a block
+  // used only in av1_get_block_hash_value()
+  // [first hash/second hash]
+  // [two buffers used ping-pong]
+  uint32_t *hash_value_buffer[2][2];
+
+  CRC_CALCULATOR crc_calculator1;
+  CRC_CALCULATOR crc_calculator2;
+  int g_crc_initialized;
+
   // These define limits to motion vector components to prevent them
   // from extending outside the UMV borders
   MvLimits mv_limits;
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 6220ac8..5167d43 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -4917,41 +4917,41 @@
 
     av1_hash_table_create(&cm->cur_frame->hash_table);
     av1_generate_block_2x2_hash_value(cpi->source, block_hash_values[0],
-                                      is_block_same[0]);
+                                      is_block_same[0], &cpi->td.mb);
     av1_generate_block_hash_value(cpi->source, 4, block_hash_values[0],
                                   block_hash_values[1], is_block_same[0],
-                                  is_block_same[1]);
+                                  is_block_same[1], &cpi->td.mb);
     av1_add_to_hash_map_by_row_with_precal_data(
         &cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2],
         pic_width, pic_height, 4);
     av1_generate_block_hash_value(cpi->source, 8, block_hash_values[1],
                                   block_hash_values[0], is_block_same[1],
-                                  is_block_same[0]);
+                                  is_block_same[0], &cpi->td.mb);
     av1_add_to_hash_map_by_row_with_precal_data(
         &cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2],
         pic_width, pic_height, 8);
     av1_generate_block_hash_value(cpi->source, 16, block_hash_values[0],
                                   block_hash_values[1], is_block_same[0],
-                                  is_block_same[1]);
+                                  is_block_same[1], &cpi->td.mb);
     av1_add_to_hash_map_by_row_with_precal_data(
         &cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2],
         pic_width, pic_height, 16);
     av1_generate_block_hash_value(cpi->source, 32, block_hash_values[1],
                                   block_hash_values[0], is_block_same[1],
-                                  is_block_same[0]);
+                                  is_block_same[0], &cpi->td.mb);
     av1_add_to_hash_map_by_row_with_precal_data(
         &cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2],
         pic_width, pic_height, 32);
     av1_generate_block_hash_value(cpi->source, 64, block_hash_values[0],
                                   block_hash_values[1], is_block_same[0],
-                                  is_block_same[1]);
+                                  is_block_same[1], &cpi->td.mb);
     av1_add_to_hash_map_by_row_with_precal_data(
         &cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2],
         pic_width, pic_height, 64);
 
     av1_generate_block_hash_value(cpi->source, 128, block_hash_values[1],
                                   block_hash_values[0], is_block_same[1],
-                                  is_block_same[0]);
+                                  is_block_same[0], &cpi->td.mb);
     av1_add_to_hash_map_by_row_with_precal_data(
         &cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2],
         pic_width, pic_height, 128);
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 0be7699..0c0c7e9 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -501,6 +501,11 @@
   aom_free(cpi->td.mb.wsrc_buf);
   cpi->td.mb.wsrc_buf = NULL;
 
+  for (int i = 0; i < 2; i++)
+    for (int j = 0; j < 2; j++) {
+      aom_free(cpi->td.mb.hash_value_buffer[i][j]);
+      cpi->td.mb.hash_value_buffer[i][j] = NULL;
+    }
   aom_free(cpi->td.mb.mask_buf);
   cpi->td.mb.mask_buf = NULL;
 
@@ -2584,6 +2589,15 @@
                   (int32_t *)aom_memalign(
                       16, MAX_SB_SQUARE * sizeof(*cpi->td.mb.wsrc_buf)));
 
+  for (int x = 0; x < 2; x++)
+    for (int y = 0; y < 2; y++)
+      CHECK_MEM_ERROR(
+          cm, cpi->td.mb.hash_value_buffer[x][y],
+          (uint32_t *)aom_malloc(AOM_BUFFER_SIZE_FOR_BLOCK_HASH *
+                                 sizeof(*cpi->td.mb.hash_value_buffer[0][0])));
+
+  cpi->td.mb.g_crc_initialized = 0;
+
   CHECK_MEM_ERROR(cm, cpi->td.mb.mask_buf,
                   (int32_t *)aom_memalign(
                       16, MAX_SB_SQUARE * sizeof(*cpi->td.mb.mask_buf)));
@@ -2912,6 +2926,11 @@
       aom_free(thread_data->td->above_pred_buf);
       aom_free(thread_data->td->left_pred_buf);
       aom_free(thread_data->td->wsrc_buf);
+      for (int x = 0; x < 2; x++)
+        for (int y = 0; y < 2; y++) {
+          aom_free(thread_data->td->hash_value_buffer[x][y]);
+          thread_data->td->hash_value_buffer[x][y] = NULL;
+        }
       aom_free(thread_data->td->mask_buf);
       aom_free(thread_data->td->counts);
       av1_free_pc_tree(thread_data->td, num_planes);
@@ -3746,7 +3765,8 @@
   rst[2].restoration_unit_size = rst[1].restoration_unit_size;
 }
 
-static void init_ref_frame_bufs(AV1_COMMON *cm) {
+static void init_ref_frame_bufs(AV1_COMP *cpi) {
+  AV1_COMMON *const cm = &cpi->common;
   int i;
   BufferPool *const pool = cm->buffer_pool;
   cm->new_fb_idx = INVALID_IDX;
@@ -3756,7 +3776,7 @@
   }
   if (cm->seq_params.force_screen_content_tools) {
     for (i = 0; i < FRAME_BUFFERS; ++i) {
-      av1_hash_table_init(&pool->frame_bufs[i].hash_table);
+      av1_hash_table_init(&pool->frame_bufs[i].hash_table, &cpi->td.mb);
     }
   }
 }
@@ -3774,7 +3794,7 @@
     seq_params->use_highbitdepth = use_highbitdepth;
 
     alloc_raw_frame_buffers(cpi);
-    init_ref_frame_bufs(cm);
+    init_ref_frame_bufs(cpi);
     alloc_util_frame_buffers(cpi);
 
     init_motion_estimation(cpi);  // TODO(agrange) This can be removed.
@@ -5674,7 +5694,7 @@
       av1_get_block_hash_value(
           cur_picture->y_buffer + y_pos * stride_cur + x_pos, stride_cur,
           block_size, &hash_value_1, &hash_value_2,
-          (cur_picture->flags & YV12_FLAG_HIGHBITDEPTH));
+          (cur_picture->flags & YV12_FLAG_HIGHBITDEPTH), &cpi->td.mb);
       // Hashing does not work for highbitdepth currently.
       // TODO(Roger): Make it work for highbitdepth.
       if (av1_use_hash_me(&cpi->common)) {
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 3d0bcab..3ba1354 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -468,6 +468,7 @@
   FRAME_COUNTS *counts;
   PC_TREE *pc_tree;
   PC_TREE *pc_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2 + 1];
+  uint32_t *hash_value_buffer[2][2];
   int32_t *wsrc_buf;
   int32_t *mask_buf;
   uint8_t *above_pred_buf;
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 6d9c8a8..1b1d3b6 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -97,6 +97,15 @@
             cm, thread_data->td->wsrc_buf,
             (int32_t *)aom_memalign(
                 16, MAX_SB_SQUARE * sizeof(*thread_data->td->wsrc_buf)));
+
+        for (int x = 0; x < 2; x++)
+          for (int y = 0; y < 2; y++)
+            CHECK_MEM_ERROR(
+                cm, thread_data->td->hash_value_buffer[x][y],
+                (uint32_t *)aom_malloc(
+                    AOM_BUFFER_SIZE_FOR_BLOCK_HASH *
+                    sizeof(*thread_data->td->hash_value_buffer[0][0])));
+
         CHECK_MEM_ERROR(
             cm, thread_data->td->mask_buf,
             (int32_t *)aom_memalign(
@@ -140,6 +149,15 @@
       thread_data->td->mb.above_pred_buf = thread_data->td->above_pred_buf;
       thread_data->td->mb.left_pred_buf = thread_data->td->left_pred_buf;
       thread_data->td->mb.wsrc_buf = thread_data->td->wsrc_buf;
+      for (int x = 0; x < 2; x++)
+        for (int y = 0; y < 2; y++) {
+          memcpy(thread_data->td->hash_value_buffer[x][y],
+                 cpi->td.mb.hash_value_buffer[x][y],
+                 AOM_BUFFER_SIZE_FOR_BLOCK_HASH *
+                     sizeof(*thread_data->td->hash_value_buffer[0][0]));
+          thread_data->td->mb.hash_value_buffer[x][y] =
+              thread_data->td->hash_value_buffer[x][y];
+        }
       thread_data->td->mb.mask_buf = thread_data->td->mask_buf;
     }
     if (thread_data->td->counts != &cpi->counts) {
diff --git a/av1/encoder/hash.h b/av1/encoder/hash.h
index 8b62275..7e32aa2 100644
--- a/av1/encoder/hash.h
+++ b/av1/encoder/hash.h
@@ -43,6 +43,8 @@
 // init table for software version crc32c
 void av1_crc32c_calculator_init(CRC32C *p_crc32c);
 
+#define AOM_BUFFER_SIZE_FOR_BLOCK_HASH (4096)
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/encoder/hash_motion.c b/av1/encoder/hash_motion.c
index f2ff5b4..e85a516 100644
--- a/av1/encoder/hash_motion.c
+++ b/av1/encoder/hash_motion.c
@@ -13,14 +13,12 @@
 
 #include "config/av1_rtcd.h"
 
+#include "av1/encoder/block.h"
 #include "av1/encoder/hash.h"
 #include "av1/encoder/hash_motion.h"
 
 static const int crc_bits = 16;
 static const int block_size_bits = 3;
-static CRC_CALCULATOR crc_calculator1;
-static CRC_CALCULATOR crc_calculator2;
-static int g_crc_initialized = 0;
 
 static void hash_table_clear_all(hash_table *p_hash_table) {
   if (p_hash_table->p_lookup_table == NULL) {
@@ -106,11 +104,11 @@
   }
 }
 
-void av1_hash_table_init(hash_table *p_hash_table) {
-  if (g_crc_initialized == 0) {
-    av1_crc_calculator_init(&crc_calculator1, 24, 0x5D6DCB);
-    av1_crc_calculator_init(&crc_calculator2, 24, 0x864CFB);
-    g_crc_initialized = 1;
+void av1_hash_table_init(hash_table *p_hash_table, MACROBLOCK *x) {
+  if (x->g_crc_initialized == 0) {
+    av1_crc_calculator_init(&x->crc_calculator1, 24, 0x5D6DCB);
+    av1_crc_calculator_init(&x->crc_calculator2, 24, 0x864CFB);
+    x->g_crc_initialized = 1;
   }
   p_hash_table->p_lookup_table = NULL;
 }
@@ -181,7 +179,8 @@
 
 void av1_generate_block_2x2_hash_value(const YV12_BUFFER_CONFIG *picture,
                                        uint32_t *pic_block_hash[2],
-                                       int8_t *pic_block_same_info[3]) {
+                                       int8_t *pic_block_same_info[3],
+                                       MACROBLOCK *x) {
   const int width = 2;
   const int height = 2;
   const int x_end = picture->y_crop_width - width + 1;
@@ -201,9 +200,9 @@
         pic_block_same_info[1][pos] = is_block16_2x2_col_same_value(p);
 
         pic_block_hash[0][pos] = av1_get_crc_value(
-            &crc_calculator1, (uint8_t *)p, length * sizeof(p[0]));
+            &x->crc_calculator1, (uint8_t *)p, length * sizeof(p[0]));
         pic_block_hash[1][pos] = av1_get_crc_value(
-            &crc_calculator2, (uint8_t *)p, length * sizeof(p[0]));
+            &x->crc_calculator2, (uint8_t *)p, length * sizeof(p[0]));
         pos++;
       }
       pos += width - 1;
@@ -220,9 +219,9 @@
         pic_block_same_info[1][pos] = is_block_2x2_col_same_value(p);
 
         pic_block_hash[0][pos] =
-            av1_get_crc_value(&crc_calculator1, p, length * sizeof(p[0]));
+            av1_get_crc_value(&x->crc_calculator1, p, length * sizeof(p[0]));
         pic_block_hash[1][pos] =
-            av1_get_crc_value(&crc_calculator2, p, length * sizeof(p[0]));
+            av1_get_crc_value(&x->crc_calculator2, p, length * sizeof(p[0]));
         pos++;
       }
       pos += width - 1;
@@ -235,7 +234,8 @@
                                    uint32_t *src_pic_block_hash[2],
                                    uint32_t *dst_pic_block_hash[2],
                                    int8_t *src_pic_block_same_info[3],
-                                   int8_t *dst_pic_block_same_info[3]) {
+                                   int8_t *dst_pic_block_same_info[3],
+                                   MACROBLOCK *x) {
   const int pic_width = picture->y_crop_width;
   const int x_end = picture->y_crop_width - block_size + 1;
   const int y_end = picture->y_crop_height - block_size + 1;
@@ -254,14 +254,14 @@
       p[2] = src_pic_block_hash[0][pos + src_size * pic_width];
       p[3] = src_pic_block_hash[0][pos + src_size * pic_width + src_size];
       dst_pic_block_hash[0][pos] =
-          av1_get_crc_value(&crc_calculator1, (uint8_t *)p, length);
+          av1_get_crc_value(&x->crc_calculator1, (uint8_t *)p, length);
 
       p[0] = src_pic_block_hash[1][pos];
       p[1] = src_pic_block_hash[1][pos + src_size];
       p[2] = src_pic_block_hash[1][pos + src_size * pic_width];
       p[3] = src_pic_block_hash[1][pos + src_size * pic_width + src_size];
       dst_pic_block_hash[1][pos] =
-          av1_get_crc_value(&crc_calculator2, (uint8_t *)p, length);
+          av1_get_crc_value(&x->crc_calculator2, (uint8_t *)p, length);
 
       dst_pic_block_same_info[0][pos] =
           src_pic_block_same_info[0][pos] &&
@@ -388,17 +388,9 @@
   return 1;
 }
 
-// global buffer for hash value calculation of a block
-// used only in av1_get_block_hash_value()
-#define AOM_BUFFER_SIZE_FOR_BLOCK_HASH (4096)
-// [first hash/second hash]
-// [two buffers used ping-pong]
-// [num of 2x2 blocks in 128x128]
-static uint32_t hash_value_buffer[2][2][AOM_BUFFER_SIZE_FOR_BLOCK_HASH];
-
 void av1_get_block_hash_value(uint8_t *y_src, int stride, int block_size,
                               uint32_t *hash_value1, uint32_t *hash_value2,
-                              int use_highbitdepth) {
+                              int use_highbitdepth, MACROBLOCK *x) {
   uint32_t to_hash[4];
   const int add_value = hash_block_size_to_index(block_size) << crc_bits;
   assert(add_value >= 0);
@@ -415,10 +407,12 @@
         get_pixels_in_1D_short_array_by_block_2x2(
             y16_src + y_pos * stride + x_pos, stride, pixel_to_hash);
         assert(pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
-        hash_value_buffer[0][0][pos] = av1_get_crc_value(
-            &crc_calculator1, (uint8_t *)pixel_to_hash, sizeof(pixel_to_hash));
-        hash_value_buffer[1][0][pos] = av1_get_crc_value(
-            &crc_calculator2, (uint8_t *)pixel_to_hash, sizeof(pixel_to_hash));
+        x->hash_value_buffer[0][0][pos] =
+            av1_get_crc_value(&x->crc_calculator1, (uint8_t *)pixel_to_hash,
+                              sizeof(pixel_to_hash));
+        x->hash_value_buffer[1][0][pos] =
+            av1_get_crc_value(&x->crc_calculator2, (uint8_t *)pixel_to_hash,
+                              sizeof(pixel_to_hash));
       }
     }
   } else {
@@ -429,10 +423,10 @@
         get_pixels_in_1D_char_array_by_block_2x2(y_src + y_pos * stride + x_pos,
                                                  stride, pixel_to_hash);
         assert(pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
-        hash_value_buffer[0][0][pos] = av1_get_crc_value(
-            &crc_calculator1, pixel_to_hash, sizeof(pixel_to_hash));
-        hash_value_buffer[1][0][pos] = av1_get_crc_value(
-            &crc_calculator2, pixel_to_hash, sizeof(pixel_to_hash));
+        x->hash_value_buffer[0][0][pos] = av1_get_crc_value(
+            &x->crc_calculator1, pixel_to_hash, sizeof(pixel_to_hash));
+        x->hash_value_buffer[1][0][pos] = av1_get_crc_value(
+            &x->crc_calculator2, pixel_to_hash, sizeof(pixel_to_hash));
       }
     }
   }
@@ -457,24 +451,24 @@
         assert(srcPos + src_sub_block_in_width + 1 <
                AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
         assert(dst_pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
-        to_hash[0] = hash_value_buffer[0][src_idx][srcPos];
-        to_hash[1] = hash_value_buffer[0][src_idx][srcPos + 1];
+        to_hash[0] = x->hash_value_buffer[0][src_idx][srcPos];
+        to_hash[1] = x->hash_value_buffer[0][src_idx][srcPos + 1];
         to_hash[2] =
-            hash_value_buffer[0][src_idx][srcPos + src_sub_block_in_width];
-        to_hash[3] =
-            hash_value_buffer[0][src_idx][srcPos + src_sub_block_in_width + 1];
+            x->hash_value_buffer[0][src_idx][srcPos + src_sub_block_in_width];
+        to_hash[3] = x->hash_value_buffer[0][src_idx]
+                                         [srcPos + src_sub_block_in_width + 1];
 
-        hash_value_buffer[0][dst_idx][dst_pos] = av1_get_crc_value(
-            &crc_calculator1, (uint8_t *)to_hash, sizeof(to_hash));
+        x->hash_value_buffer[0][dst_idx][dst_pos] = av1_get_crc_value(
+            &x->crc_calculator1, (uint8_t *)to_hash, sizeof(to_hash));
 
-        to_hash[0] = hash_value_buffer[1][src_idx][srcPos];
-        to_hash[1] = hash_value_buffer[1][src_idx][srcPos + 1];
+        to_hash[0] = x->hash_value_buffer[1][src_idx][srcPos];
+        to_hash[1] = x->hash_value_buffer[1][src_idx][srcPos + 1];
         to_hash[2] =
-            hash_value_buffer[1][src_idx][srcPos + src_sub_block_in_width];
-        to_hash[3] =
-            hash_value_buffer[1][src_idx][srcPos + src_sub_block_in_width + 1];
-        hash_value_buffer[1][dst_idx][dst_pos] = av1_get_crc_value(
-            &crc_calculator2, (uint8_t *)to_hash, sizeof(to_hash));
+            x->hash_value_buffer[1][src_idx][srcPos + src_sub_block_in_width];
+        to_hash[3] = x->hash_value_buffer[1][src_idx]
+                                         [srcPos + src_sub_block_in_width + 1];
+        x->hash_value_buffer[1][dst_idx][dst_pos] = av1_get_crc_value(
+            &x->crc_calculator2, (uint8_t *)to_hash, sizeof(to_hash));
         dst_pos++;
       }
     }
@@ -483,8 +477,6 @@
     sub_block_in_width >>= 1;
   }
 
-  *hash_value1 = (hash_value_buffer[0][dst_idx][0] & crc_mask) + add_value;
-  *hash_value2 = hash_value_buffer[1][dst_idx][0];
+  *hash_value1 = (x->hash_value_buffer[0][dst_idx][0] & crc_mask) + add_value;
+  *hash_value2 = x->hash_value_buffer[1][dst_idx][0];
 }
-
-#undef AOM_BUFFER_SIZE_FOR_BLOCK_HASH
diff --git a/av1/encoder/hash_motion.h b/av1/encoder/hash_motion.h
index 8deb92e..fa510c1 100644
--- a/av1/encoder/hash_motion.h
+++ b/av1/encoder/hash_motion.h
@@ -34,7 +34,7 @@
   Vector **p_lookup_table;
 } hash_table;
 
-void av1_hash_table_init(hash_table *p_hash_table);
+void av1_hash_table_init(hash_table *p_hash_table, struct macroblock *x);
 void av1_hash_table_destroy(hash_table *p_hash_table);
 void av1_hash_table_create(hash_table *p_hash_table);
 int32_t av1_hash_table_count(hash_table *p_hash_table, uint32_t hash_value);
@@ -44,13 +44,15 @@
                             uint32_t hash_value2);
 void av1_generate_block_2x2_hash_value(const YV12_BUFFER_CONFIG *picture,
                                        uint32_t *pic_block_hash[2],
-                                       int8_t *pic_block_same_info[3]);
+                                       int8_t *pic_block_same_info[3],
+                                       struct macroblock *x);
 void av1_generate_block_hash_value(const YV12_BUFFER_CONFIG *picture,
                                    int block_size,
                                    uint32_t *src_pic_block_hash[2],
                                    uint32_t *dst_pic_block_hash[2],
                                    int8_t *src_pic_block_same_info[3],
-                                   int8_t *dst_pic_block_same_info[3]);
+                                   int8_t *dst_pic_block_same_info[3],
+                                   struct macroblock *x);
 void av1_add_to_hash_map_by_row_with_precal_data(hash_table *p_hash_table,
                                                  uint32_t *pic_hash[2],
                                                  int8_t *pic_is_same,
@@ -67,7 +69,7 @@
                                  int block_size, int x_start, int y_start);
 void av1_get_block_hash_value(uint8_t *y_src, int stride, int block_size,
                               uint32_t *hash_value1, uint32_t *hash_value2,
-                              int use_highbitdepth);
+                              int use_highbitdepth, struct macroblock *x);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index a33aa24..b3ab40c 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -2247,7 +2247,7 @@
 
         av1_get_block_hash_value(
             what, what_stride, block_width, &hash_value1, &hash_value2,
-            x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
+            x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, x);
 
         const int count = av1_hash_table_count(ref_frame_hash, hash_value1);
         // for intra, at lest one matching can be found, itself.