Fix enc multi-threading mismatch in screen content
AV1 encoder multi-threading mismatch for --tune-content=screen
has been fixed.
BUG= aomedia:2118
Change-Id: I971b382f6b58b98e67fff78dbdcc1e6d6879aa2b
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 93f6205..cbb23eb 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -255,6 +255,16 @@
PALETTE_BUFFER *palette_buffer;
+ // buffer for hash value calculation of a block
+ // used only in av1_get_block_hash_value()
+ // [first hash/second hash]
+ // [two buffers used ping-pong]
+ uint32_t *hash_value_buffer[2][2];
+
+ CRC_CALCULATOR crc_calculator1;
+ CRC_CALCULATOR crc_calculator2;
+ int g_crc_initialized;
+
// These define limits to motion vector components to prevent them
// from extending outside the UMV borders
MvLimits mv_limits;
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 6220ac8..5167d43 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -4917,41 +4917,41 @@
av1_hash_table_create(&cm->cur_frame->hash_table);
av1_generate_block_2x2_hash_value(cpi->source, block_hash_values[0],
- is_block_same[0]);
+ is_block_same[0], &cpi->td.mb);
av1_generate_block_hash_value(cpi->source, 4, block_hash_values[0],
block_hash_values[1], is_block_same[0],
- is_block_same[1]);
+ is_block_same[1], &cpi->td.mb);
av1_add_to_hash_map_by_row_with_precal_data(
&cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2],
pic_width, pic_height, 4);
av1_generate_block_hash_value(cpi->source, 8, block_hash_values[1],
block_hash_values[0], is_block_same[1],
- is_block_same[0]);
+ is_block_same[0], &cpi->td.mb);
av1_add_to_hash_map_by_row_with_precal_data(
&cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2],
pic_width, pic_height, 8);
av1_generate_block_hash_value(cpi->source, 16, block_hash_values[0],
block_hash_values[1], is_block_same[0],
- is_block_same[1]);
+ is_block_same[1], &cpi->td.mb);
av1_add_to_hash_map_by_row_with_precal_data(
&cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2],
pic_width, pic_height, 16);
av1_generate_block_hash_value(cpi->source, 32, block_hash_values[1],
block_hash_values[0], is_block_same[1],
- is_block_same[0]);
+ is_block_same[0], &cpi->td.mb);
av1_add_to_hash_map_by_row_with_precal_data(
&cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2],
pic_width, pic_height, 32);
av1_generate_block_hash_value(cpi->source, 64, block_hash_values[0],
block_hash_values[1], is_block_same[0],
- is_block_same[1]);
+ is_block_same[1], &cpi->td.mb);
av1_add_to_hash_map_by_row_with_precal_data(
&cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2],
pic_width, pic_height, 64);
av1_generate_block_hash_value(cpi->source, 128, block_hash_values[1],
block_hash_values[0], is_block_same[1],
- is_block_same[0]);
+ is_block_same[0], &cpi->td.mb);
av1_add_to_hash_map_by_row_with_precal_data(
&cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2],
pic_width, pic_height, 128);
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 0be7699..0c0c7e9 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -501,6 +501,11 @@
aom_free(cpi->td.mb.wsrc_buf);
cpi->td.mb.wsrc_buf = NULL;
+ for (int i = 0; i < 2; i++)
+ for (int j = 0; j < 2; j++) {
+ aom_free(cpi->td.mb.hash_value_buffer[i][j]);
+ cpi->td.mb.hash_value_buffer[i][j] = NULL;
+ }
aom_free(cpi->td.mb.mask_buf);
cpi->td.mb.mask_buf = NULL;
@@ -2584,6 +2589,15 @@
(int32_t *)aom_memalign(
16, MAX_SB_SQUARE * sizeof(*cpi->td.mb.wsrc_buf)));
+ for (int x = 0; x < 2; x++)
+ for (int y = 0; y < 2; y++)
+ CHECK_MEM_ERROR(
+ cm, cpi->td.mb.hash_value_buffer[x][y],
+ (uint32_t *)aom_malloc(AOM_BUFFER_SIZE_FOR_BLOCK_HASH *
+ sizeof(*cpi->td.mb.hash_value_buffer[0][0])));
+
+ cpi->td.mb.g_crc_initialized = 0;
+
CHECK_MEM_ERROR(cm, cpi->td.mb.mask_buf,
(int32_t *)aom_memalign(
16, MAX_SB_SQUARE * sizeof(*cpi->td.mb.mask_buf)));
@@ -2912,6 +2926,11 @@
aom_free(thread_data->td->above_pred_buf);
aom_free(thread_data->td->left_pred_buf);
aom_free(thread_data->td->wsrc_buf);
+ for (int x = 0; x < 2; x++)
+ for (int y = 0; y < 2; y++) {
+ aom_free(thread_data->td->hash_value_buffer[x][y]);
+ thread_data->td->hash_value_buffer[x][y] = NULL;
+ }
aom_free(thread_data->td->mask_buf);
aom_free(thread_data->td->counts);
av1_free_pc_tree(thread_data->td, num_planes);
@@ -3746,7 +3765,8 @@
rst[2].restoration_unit_size = rst[1].restoration_unit_size;
}
-static void init_ref_frame_bufs(AV1_COMMON *cm) {
+static void init_ref_frame_bufs(AV1_COMP *cpi) {
+ AV1_COMMON *const cm = &cpi->common;
int i;
BufferPool *const pool = cm->buffer_pool;
cm->new_fb_idx = INVALID_IDX;
@@ -3756,7 +3776,7 @@
}
if (cm->seq_params.force_screen_content_tools) {
for (i = 0; i < FRAME_BUFFERS; ++i) {
- av1_hash_table_init(&pool->frame_bufs[i].hash_table);
+ av1_hash_table_init(&pool->frame_bufs[i].hash_table, &cpi->td.mb);
}
}
}
@@ -3774,7 +3794,7 @@
seq_params->use_highbitdepth = use_highbitdepth;
alloc_raw_frame_buffers(cpi);
- init_ref_frame_bufs(cm);
+ init_ref_frame_bufs(cpi);
alloc_util_frame_buffers(cpi);
init_motion_estimation(cpi); // TODO(agrange) This can be removed.
@@ -5674,7 +5694,7 @@
av1_get_block_hash_value(
cur_picture->y_buffer + y_pos * stride_cur + x_pos, stride_cur,
block_size, &hash_value_1, &hash_value_2,
- (cur_picture->flags & YV12_FLAG_HIGHBITDEPTH));
+ (cur_picture->flags & YV12_FLAG_HIGHBITDEPTH), &cpi->td.mb);
// Hashing does not work for highbitdepth currently.
// TODO(Roger): Make it work for highbitdepth.
if (av1_use_hash_me(&cpi->common)) {
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 3d0bcab..3ba1354 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -468,6 +468,7 @@
FRAME_COUNTS *counts;
PC_TREE *pc_tree;
PC_TREE *pc_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2 + 1];
+ uint32_t *hash_value_buffer[2][2];
int32_t *wsrc_buf;
int32_t *mask_buf;
uint8_t *above_pred_buf;
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 6d9c8a8..1b1d3b6 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -97,6 +97,15 @@
cm, thread_data->td->wsrc_buf,
(int32_t *)aom_memalign(
16, MAX_SB_SQUARE * sizeof(*thread_data->td->wsrc_buf)));
+
+ for (int x = 0; x < 2; x++)
+ for (int y = 0; y < 2; y++)
+ CHECK_MEM_ERROR(
+ cm, thread_data->td->hash_value_buffer[x][y],
+ (uint32_t *)aom_malloc(
+ AOM_BUFFER_SIZE_FOR_BLOCK_HASH *
+ sizeof(*thread_data->td->hash_value_buffer[0][0])));
+
CHECK_MEM_ERROR(
cm, thread_data->td->mask_buf,
(int32_t *)aom_memalign(
@@ -140,6 +149,15 @@
thread_data->td->mb.above_pred_buf = thread_data->td->above_pred_buf;
thread_data->td->mb.left_pred_buf = thread_data->td->left_pred_buf;
thread_data->td->mb.wsrc_buf = thread_data->td->wsrc_buf;
+ for (int x = 0; x < 2; x++)
+ for (int y = 0; y < 2; y++) {
+ memcpy(thread_data->td->hash_value_buffer[x][y],
+ cpi->td.mb.hash_value_buffer[x][y],
+ AOM_BUFFER_SIZE_FOR_BLOCK_HASH *
+ sizeof(*thread_data->td->hash_value_buffer[0][0]));
+ thread_data->td->mb.hash_value_buffer[x][y] =
+ thread_data->td->hash_value_buffer[x][y];
+ }
thread_data->td->mb.mask_buf = thread_data->td->mask_buf;
}
if (thread_data->td->counts != &cpi->counts) {
diff --git a/av1/encoder/hash.h b/av1/encoder/hash.h
index 8b62275..7e32aa2 100644
--- a/av1/encoder/hash.h
+++ b/av1/encoder/hash.h
@@ -43,6 +43,8 @@
// init table for software version crc32c
void av1_crc32c_calculator_init(CRC32C *p_crc32c);
+#define AOM_BUFFER_SIZE_FOR_BLOCK_HASH (4096)
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/av1/encoder/hash_motion.c b/av1/encoder/hash_motion.c
index f2ff5b4..e85a516 100644
--- a/av1/encoder/hash_motion.c
+++ b/av1/encoder/hash_motion.c
@@ -13,14 +13,12 @@
#include "config/av1_rtcd.h"
+#include "av1/encoder/block.h"
#include "av1/encoder/hash.h"
#include "av1/encoder/hash_motion.h"
static const int crc_bits = 16;
static const int block_size_bits = 3;
-static CRC_CALCULATOR crc_calculator1;
-static CRC_CALCULATOR crc_calculator2;
-static int g_crc_initialized = 0;
static void hash_table_clear_all(hash_table *p_hash_table) {
if (p_hash_table->p_lookup_table == NULL) {
@@ -106,11 +104,11 @@
}
}
-void av1_hash_table_init(hash_table *p_hash_table) {
- if (g_crc_initialized == 0) {
- av1_crc_calculator_init(&crc_calculator1, 24, 0x5D6DCB);
- av1_crc_calculator_init(&crc_calculator2, 24, 0x864CFB);
- g_crc_initialized = 1;
+void av1_hash_table_init(hash_table *p_hash_table, MACROBLOCK *x) {
+ if (x->g_crc_initialized == 0) {
+ av1_crc_calculator_init(&x->crc_calculator1, 24, 0x5D6DCB);
+ av1_crc_calculator_init(&x->crc_calculator2, 24, 0x864CFB);
+ x->g_crc_initialized = 1;
}
p_hash_table->p_lookup_table = NULL;
}
@@ -181,7 +179,8 @@
void av1_generate_block_2x2_hash_value(const YV12_BUFFER_CONFIG *picture,
uint32_t *pic_block_hash[2],
- int8_t *pic_block_same_info[3]) {
+ int8_t *pic_block_same_info[3],
+ MACROBLOCK *x) {
const int width = 2;
const int height = 2;
const int x_end = picture->y_crop_width - width + 1;
@@ -201,9 +200,9 @@
pic_block_same_info[1][pos] = is_block16_2x2_col_same_value(p);
pic_block_hash[0][pos] = av1_get_crc_value(
- &crc_calculator1, (uint8_t *)p, length * sizeof(p[0]));
+ &x->crc_calculator1, (uint8_t *)p, length * sizeof(p[0]));
pic_block_hash[1][pos] = av1_get_crc_value(
- &crc_calculator2, (uint8_t *)p, length * sizeof(p[0]));
+ &x->crc_calculator2, (uint8_t *)p, length * sizeof(p[0]));
pos++;
}
pos += width - 1;
@@ -220,9 +219,9 @@
pic_block_same_info[1][pos] = is_block_2x2_col_same_value(p);
pic_block_hash[0][pos] =
- av1_get_crc_value(&crc_calculator1, p, length * sizeof(p[0]));
+ av1_get_crc_value(&x->crc_calculator1, p, length * sizeof(p[0]));
pic_block_hash[1][pos] =
- av1_get_crc_value(&crc_calculator2, p, length * sizeof(p[0]));
+ av1_get_crc_value(&x->crc_calculator2, p, length * sizeof(p[0]));
pos++;
}
pos += width - 1;
@@ -235,7 +234,8 @@
uint32_t *src_pic_block_hash[2],
uint32_t *dst_pic_block_hash[2],
int8_t *src_pic_block_same_info[3],
- int8_t *dst_pic_block_same_info[3]) {
+ int8_t *dst_pic_block_same_info[3],
+ MACROBLOCK *x) {
const int pic_width = picture->y_crop_width;
const int x_end = picture->y_crop_width - block_size + 1;
const int y_end = picture->y_crop_height - block_size + 1;
@@ -254,14 +254,14 @@
p[2] = src_pic_block_hash[0][pos + src_size * pic_width];
p[3] = src_pic_block_hash[0][pos + src_size * pic_width + src_size];
dst_pic_block_hash[0][pos] =
- av1_get_crc_value(&crc_calculator1, (uint8_t *)p, length);
+ av1_get_crc_value(&x->crc_calculator1, (uint8_t *)p, length);
p[0] = src_pic_block_hash[1][pos];
p[1] = src_pic_block_hash[1][pos + src_size];
p[2] = src_pic_block_hash[1][pos + src_size * pic_width];
p[3] = src_pic_block_hash[1][pos + src_size * pic_width + src_size];
dst_pic_block_hash[1][pos] =
- av1_get_crc_value(&crc_calculator2, (uint8_t *)p, length);
+ av1_get_crc_value(&x->crc_calculator2, (uint8_t *)p, length);
dst_pic_block_same_info[0][pos] =
src_pic_block_same_info[0][pos] &&
@@ -388,17 +388,9 @@
return 1;
}
-// global buffer for hash value calculation of a block
-// used only in av1_get_block_hash_value()
-#define AOM_BUFFER_SIZE_FOR_BLOCK_HASH (4096)
-// [first hash/second hash]
-// [two buffers used ping-pong]
-// [num of 2x2 blocks in 128x128]
-static uint32_t hash_value_buffer[2][2][AOM_BUFFER_SIZE_FOR_BLOCK_HASH];
-
void av1_get_block_hash_value(uint8_t *y_src, int stride, int block_size,
uint32_t *hash_value1, uint32_t *hash_value2,
- int use_highbitdepth) {
+ int use_highbitdepth, MACROBLOCK *x) {
uint32_t to_hash[4];
const int add_value = hash_block_size_to_index(block_size) << crc_bits;
assert(add_value >= 0);
@@ -415,10 +407,12 @@
get_pixels_in_1D_short_array_by_block_2x2(
y16_src + y_pos * stride + x_pos, stride, pixel_to_hash);
assert(pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
- hash_value_buffer[0][0][pos] = av1_get_crc_value(
- &crc_calculator1, (uint8_t *)pixel_to_hash, sizeof(pixel_to_hash));
- hash_value_buffer[1][0][pos] = av1_get_crc_value(
- &crc_calculator2, (uint8_t *)pixel_to_hash, sizeof(pixel_to_hash));
+ x->hash_value_buffer[0][0][pos] =
+ av1_get_crc_value(&x->crc_calculator1, (uint8_t *)pixel_to_hash,
+ sizeof(pixel_to_hash));
+ x->hash_value_buffer[1][0][pos] =
+ av1_get_crc_value(&x->crc_calculator2, (uint8_t *)pixel_to_hash,
+ sizeof(pixel_to_hash));
}
}
} else {
@@ -429,10 +423,10 @@
get_pixels_in_1D_char_array_by_block_2x2(y_src + y_pos * stride + x_pos,
stride, pixel_to_hash);
assert(pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
- hash_value_buffer[0][0][pos] = av1_get_crc_value(
- &crc_calculator1, pixel_to_hash, sizeof(pixel_to_hash));
- hash_value_buffer[1][0][pos] = av1_get_crc_value(
- &crc_calculator2, pixel_to_hash, sizeof(pixel_to_hash));
+ x->hash_value_buffer[0][0][pos] = av1_get_crc_value(
+ &x->crc_calculator1, pixel_to_hash, sizeof(pixel_to_hash));
+ x->hash_value_buffer[1][0][pos] = av1_get_crc_value(
+ &x->crc_calculator2, pixel_to_hash, sizeof(pixel_to_hash));
}
}
}
@@ -457,24 +451,24 @@
assert(srcPos + src_sub_block_in_width + 1 <
AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
assert(dst_pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
- to_hash[0] = hash_value_buffer[0][src_idx][srcPos];
- to_hash[1] = hash_value_buffer[0][src_idx][srcPos + 1];
+ to_hash[0] = x->hash_value_buffer[0][src_idx][srcPos];
+ to_hash[1] = x->hash_value_buffer[0][src_idx][srcPos + 1];
to_hash[2] =
- hash_value_buffer[0][src_idx][srcPos + src_sub_block_in_width];
- to_hash[3] =
- hash_value_buffer[0][src_idx][srcPos + src_sub_block_in_width + 1];
+ x->hash_value_buffer[0][src_idx][srcPos + src_sub_block_in_width];
+ to_hash[3] = x->hash_value_buffer[0][src_idx]
+ [srcPos + src_sub_block_in_width + 1];
- hash_value_buffer[0][dst_idx][dst_pos] = av1_get_crc_value(
- &crc_calculator1, (uint8_t *)to_hash, sizeof(to_hash));
+ x->hash_value_buffer[0][dst_idx][dst_pos] = av1_get_crc_value(
+ &x->crc_calculator1, (uint8_t *)to_hash, sizeof(to_hash));
- to_hash[0] = hash_value_buffer[1][src_idx][srcPos];
- to_hash[1] = hash_value_buffer[1][src_idx][srcPos + 1];
+ to_hash[0] = x->hash_value_buffer[1][src_idx][srcPos];
+ to_hash[1] = x->hash_value_buffer[1][src_idx][srcPos + 1];
to_hash[2] =
- hash_value_buffer[1][src_idx][srcPos + src_sub_block_in_width];
- to_hash[3] =
- hash_value_buffer[1][src_idx][srcPos + src_sub_block_in_width + 1];
- hash_value_buffer[1][dst_idx][dst_pos] = av1_get_crc_value(
- &crc_calculator2, (uint8_t *)to_hash, sizeof(to_hash));
+ x->hash_value_buffer[1][src_idx][srcPos + src_sub_block_in_width];
+ to_hash[3] = x->hash_value_buffer[1][src_idx]
+ [srcPos + src_sub_block_in_width + 1];
+ x->hash_value_buffer[1][dst_idx][dst_pos] = av1_get_crc_value(
+ &x->crc_calculator2, (uint8_t *)to_hash, sizeof(to_hash));
dst_pos++;
}
}
@@ -483,8 +477,6 @@
sub_block_in_width >>= 1;
}
- *hash_value1 = (hash_value_buffer[0][dst_idx][0] & crc_mask) + add_value;
- *hash_value2 = hash_value_buffer[1][dst_idx][0];
+ *hash_value1 = (x->hash_value_buffer[0][dst_idx][0] & crc_mask) + add_value;
+ *hash_value2 = x->hash_value_buffer[1][dst_idx][0];
}
-
-#undef AOM_BUFFER_SIZE_FOR_BLOCK_HASH
diff --git a/av1/encoder/hash_motion.h b/av1/encoder/hash_motion.h
index 8deb92e..fa510c1 100644
--- a/av1/encoder/hash_motion.h
+++ b/av1/encoder/hash_motion.h
@@ -34,7 +34,7 @@
Vector **p_lookup_table;
} hash_table;
-void av1_hash_table_init(hash_table *p_hash_table);
+void av1_hash_table_init(hash_table *p_hash_table, struct macroblock *x);
void av1_hash_table_destroy(hash_table *p_hash_table);
void av1_hash_table_create(hash_table *p_hash_table);
int32_t av1_hash_table_count(hash_table *p_hash_table, uint32_t hash_value);
@@ -44,13 +44,15 @@
uint32_t hash_value2);
void av1_generate_block_2x2_hash_value(const YV12_BUFFER_CONFIG *picture,
uint32_t *pic_block_hash[2],
- int8_t *pic_block_same_info[3]);
+ int8_t *pic_block_same_info[3],
+ struct macroblock *x);
void av1_generate_block_hash_value(const YV12_BUFFER_CONFIG *picture,
int block_size,
uint32_t *src_pic_block_hash[2],
uint32_t *dst_pic_block_hash[2],
int8_t *src_pic_block_same_info[3],
- int8_t *dst_pic_block_same_info[3]);
+ int8_t *dst_pic_block_same_info[3],
+ struct macroblock *x);
void av1_add_to_hash_map_by_row_with_precal_data(hash_table *p_hash_table,
uint32_t *pic_hash[2],
int8_t *pic_is_same,
@@ -67,7 +69,7 @@
int block_size, int x_start, int y_start);
void av1_get_block_hash_value(uint8_t *y_src, int stride, int block_size,
uint32_t *hash_value1, uint32_t *hash_value2,
- int use_highbitdepth);
+ int use_highbitdepth, struct macroblock *x);
#ifdef __cplusplus
} // extern "C"
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index a33aa24..b3ab40c 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -2247,7 +2247,7 @@
av1_get_block_hash_value(
what, what_stride, block_width, &hash_value1, &hash_value2,
- x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
+ x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, x);
const int count = av1_hash_table_count(ref_frame_hash, hash_value1);
// for intra, at lest one matching can be found, itself.