Release hash-table memory after screen-content decision
Screen-content decision did not deallocate the hashed motion estimation
table. As a result, the encoder could potentially create MAX_LAG_BUFFERS
many hash_tables.
This CL deallocates the hash-table after screen-content decision to
limit memory usage. Furthermore, this CL also fixes an assertion that
should have caught this.
Tests on m2.mkv shows that memory foot-print has reduced from ~498MB to
~236MB.
BUG=aomedia:2609
Change-Id: Ia8d53ea32a57cb5653ed9b46512f32657fcd80b2
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 1bad88f..23c6e1f 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -5592,9 +5592,6 @@
}
}
-#if CONFIG_DEBUG
- cm->cur_frame->hash_table.has_content++;
-#endif
av1_hash_table_create(&cm->cur_frame->hash_table);
av1_generate_block_2x2_hash_value(cpi->source, block_hash_values[0],
is_block_same[0], &cpi->td.mb);
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 9962d61..b64b085 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -5255,32 +5255,30 @@
cpi->sf.part_sf.always_this_block_size;
// Setup necessary params for encoding, including frame source, etc.
- {
- aom_clear_system_state();
+ aom_clear_system_state();
- cpi->source =
- av1_scale_if_required(cm, cpi->unscaled_source, &cpi->scaled_source);
- if (cpi->unscaled_last_source != NULL) {
- cpi->last_source = av1_scale_if_required(cm, cpi->unscaled_last_source,
- &cpi->scaled_last_source);
- }
-
- setup_frame(cpi);
-
- if (cm->seg.enabled) {
- if (!cm->seg.update_data && cm->prev_frame) {
- segfeatures_copy(&cm->seg, &cm->prev_frame->seg);
- cm->seg.enabled = cm->prev_frame->seg.enabled;
- } else {
- av1_calculate_segdata(&cm->seg);
- }
- } else {
- memset(&cm->seg, 0, sizeof(cm->seg));
- }
- segfeatures_copy(&cm->cur_frame->seg, &cm->seg);
- cm->cur_frame->seg.enabled = cm->seg.enabled;
+ cpi->source =
+ av1_scale_if_required(cm, cpi->unscaled_source, &cpi->scaled_source);
+ if (cpi->unscaled_last_source != NULL) {
+ cpi->last_source = av1_scale_if_required(cm, cpi->unscaled_last_source,
+ &cpi->scaled_last_source);
}
+ setup_frame(cpi);
+
+ if (cm->seg.enabled) {
+ if (!cm->seg.update_data && cm->prev_frame) {
+ segfeatures_copy(&cm->seg, &cm->prev_frame->seg);
+ cm->seg.enabled = cm->prev_frame->seg.enabled;
+ } else {
+ av1_calculate_segdata(&cm->seg);
+ }
+ } else {
+ memset(&cm->seg, 0, sizeof(cm->seg));
+ }
+ segfeatures_copy(&cm->cur_frame->seg, &cm->seg);
+ cm->cur_frame->seg.enabled = cm->seg.enabled;
+
// The two encoding passes aim to help determine whether to use screen
// content tools, with a high q and fixed partition.
for (int pass = 0; pass < 2; ++pass) {
@@ -5311,6 +5309,8 @@
projected_size_pass, psnr);
}
+ av1_hash_table_destroy(&cm->cur_frame->hash_table);
+
// Set partition speed feature back.
cpi->sf.part_sf.partition_search_type = partition_search_type_orig;
cpi->sf.part_sf.always_this_block_size = fixed_partition_block_size_orig;
diff --git a/av1/encoder/hash_motion.c b/av1/encoder/hash_motion.c
index 25fedf9..dd0c77b 100644
--- a/av1/encoder/hash_motion.c
+++ b/av1/encoder/hash_motion.c
@@ -17,8 +17,9 @@
#include "av1/encoder/hash.h"
#include "av1/encoder/hash_motion.h"
-static const int crc_bits = 16;
-static const int block_size_bits = 3;
+#define kSrcBits 16
+#define kBlockSizeBits 3
+#define kMaxAddr (1 << (kSrcBits + kBlockSizeBits))
// TODO(youzhou@microsoft.com): is higher than 8 bits screen content supported?
// If yes, fix this function
@@ -106,8 +107,7 @@
if (p_hash_table->p_lookup_table == NULL) {
return;
}
- int max_addr = 1 << (crc_bits + block_size_bits);
- for (int i = 0; i < max_addr; i++) {
+ for (int i = 0; i < kMaxAddr; i++) {
if (p_hash_table->p_lookup_table[i] != NULL) {
aom_vector_destroy(p_hash_table->p_lookup_table[i]);
aom_free(p_hash_table->p_lookup_table[i]);
@@ -133,11 +133,10 @@
av1_hash_table_clear_all(p_hash_table);
return;
}
- const int max_addr = 1 << (crc_bits + block_size_bits);
p_hash_table->p_lookup_table =
- (Vector **)aom_malloc(sizeof(p_hash_table->p_lookup_table[0]) * max_addr);
+ (Vector **)aom_malloc(sizeof(p_hash_table->p_lookup_table[0]) * kMaxAddr);
memset(p_hash_table->p_lookup_table, 0,
- sizeof(p_hash_table->p_lookup_table[0]) * max_addr);
+ sizeof(p_hash_table->p_lookup_table[0]) * kMaxAddr);
#if CONFIG_DEBUG
p_hash_table->has_content = 0;
#endif
@@ -327,8 +326,11 @@
int add_value = hash_block_size_to_index(block_size);
assert(add_value >= 0);
- add_value <<= crc_bits;
- const int crc_mask = (1 << crc_bits) - 1;
+ add_value <<= kSrcBits;
+ const int crc_mask = (1 << kSrcBits) - 1;
+#if CONFIG_DEBUG
+ p_hash_table->has_content = 1;
+#endif
for (int x_pos = 0; x_pos < x_end; x_pos++) {
for (int y_pos = 0; y_pos < y_end; y_pos++) {
@@ -409,8 +411,8 @@
uint32_t to_hash[4];
int add_value = hash_block_size_to_index(block_size);
assert(add_value >= 0);
- add_value <<= crc_bits;
- const int crc_mask = (1 << crc_bits) - 1;
+ add_value <<= kSrcBits;
+ const int crc_mask = (1 << kSrcBits) - 1;
// 2x2 subblock hash values in current CU
int sub_block_in_width = (block_size >> 1);