Release hash-table memory after screen-content decision

Screen-content decision did not deallocate the hashed motion estimation
table. As a result, the encoder could potentially create MAX_LAG_BUFFERS
many hash_tables.

This CL deallocates the hash-table after screen-content decision to
limit memory usage. Furthermore, this CL also fixes an assertion that
should have caught this.

Tests on m2.mkv shows that memory foot-print has reduced from ~498MB to
~236MB.

BUG=aomedia:2609

Change-Id: Ia8d53ea32a57cb5653ed9b46512f32657fcd80b2
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 1bad88f..23c6e1f 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -5592,9 +5592,6 @@
       }
     }
 
-#if CONFIG_DEBUG
-    cm->cur_frame->hash_table.has_content++;
-#endif
     av1_hash_table_create(&cm->cur_frame->hash_table);
     av1_generate_block_2x2_hash_value(cpi->source, block_hash_values[0],
                                       is_block_same[0], &cpi->td.mb);
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 9962d61..b64b085 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -5255,32 +5255,30 @@
       cpi->sf.part_sf.always_this_block_size;
 
   // Setup necessary params for encoding, including frame source, etc.
-  {
-    aom_clear_system_state();
+  aom_clear_system_state();
 
-    cpi->source =
-        av1_scale_if_required(cm, cpi->unscaled_source, &cpi->scaled_source);
-    if (cpi->unscaled_last_source != NULL) {
-      cpi->last_source = av1_scale_if_required(cm, cpi->unscaled_last_source,
-                                               &cpi->scaled_last_source);
-    }
-
-    setup_frame(cpi);
-
-    if (cm->seg.enabled) {
-      if (!cm->seg.update_data && cm->prev_frame) {
-        segfeatures_copy(&cm->seg, &cm->prev_frame->seg);
-        cm->seg.enabled = cm->prev_frame->seg.enabled;
-      } else {
-        av1_calculate_segdata(&cm->seg);
-      }
-    } else {
-      memset(&cm->seg, 0, sizeof(cm->seg));
-    }
-    segfeatures_copy(&cm->cur_frame->seg, &cm->seg);
-    cm->cur_frame->seg.enabled = cm->seg.enabled;
+  cpi->source =
+      av1_scale_if_required(cm, cpi->unscaled_source, &cpi->scaled_source);
+  if (cpi->unscaled_last_source != NULL) {
+    cpi->last_source = av1_scale_if_required(cm, cpi->unscaled_last_source,
+                                             &cpi->scaled_last_source);
   }
 
+  setup_frame(cpi);
+
+  if (cm->seg.enabled) {
+    if (!cm->seg.update_data && cm->prev_frame) {
+      segfeatures_copy(&cm->seg, &cm->prev_frame->seg);
+      cm->seg.enabled = cm->prev_frame->seg.enabled;
+    } else {
+      av1_calculate_segdata(&cm->seg);
+    }
+  } else {
+    memset(&cm->seg, 0, sizeof(cm->seg));
+  }
+  segfeatures_copy(&cm->cur_frame->seg, &cm->seg);
+  cm->cur_frame->seg.enabled = cm->seg.enabled;
+
   // The two encoding passes aim to help determine whether to use screen
   // content tools, with a high q and fixed partition.
   for (int pass = 0; pass < 2; ++pass) {
@@ -5311,6 +5309,8 @@
         projected_size_pass, psnr);
   }
 
+  av1_hash_table_destroy(&cm->cur_frame->hash_table);
+
   // Set partition speed feature back.
   cpi->sf.part_sf.partition_search_type = partition_search_type_orig;
   cpi->sf.part_sf.always_this_block_size = fixed_partition_block_size_orig;
diff --git a/av1/encoder/hash_motion.c b/av1/encoder/hash_motion.c
index 25fedf9..dd0c77b 100644
--- a/av1/encoder/hash_motion.c
+++ b/av1/encoder/hash_motion.c
@@ -17,8 +17,9 @@
 #include "av1/encoder/hash.h"
 #include "av1/encoder/hash_motion.h"
 
-static const int crc_bits = 16;
-static const int block_size_bits = 3;
+#define kSrcBits 16
+#define kBlockSizeBits 3
+#define kMaxAddr (1 << (kSrcBits + kBlockSizeBits))
 
 // TODO(youzhou@microsoft.com): is higher than 8 bits screen content supported?
 // If yes, fix this function
@@ -106,8 +107,7 @@
   if (p_hash_table->p_lookup_table == NULL) {
     return;
   }
-  int max_addr = 1 << (crc_bits + block_size_bits);
-  for (int i = 0; i < max_addr; i++) {
+  for (int i = 0; i < kMaxAddr; i++) {
     if (p_hash_table->p_lookup_table[i] != NULL) {
       aom_vector_destroy(p_hash_table->p_lookup_table[i]);
       aom_free(p_hash_table->p_lookup_table[i]);
@@ -133,11 +133,10 @@
     av1_hash_table_clear_all(p_hash_table);
     return;
   }
-  const int max_addr = 1 << (crc_bits + block_size_bits);
   p_hash_table->p_lookup_table =
-      (Vector **)aom_malloc(sizeof(p_hash_table->p_lookup_table[0]) * max_addr);
+      (Vector **)aom_malloc(sizeof(p_hash_table->p_lookup_table[0]) * kMaxAddr);
   memset(p_hash_table->p_lookup_table, 0,
-         sizeof(p_hash_table->p_lookup_table[0]) * max_addr);
+         sizeof(p_hash_table->p_lookup_table[0]) * kMaxAddr);
 #if CONFIG_DEBUG
   p_hash_table->has_content = 0;
 #endif
@@ -327,8 +326,11 @@
 
   int add_value = hash_block_size_to_index(block_size);
   assert(add_value >= 0);
-  add_value <<= crc_bits;
-  const int crc_mask = (1 << crc_bits) - 1;
+  add_value <<= kSrcBits;
+  const int crc_mask = (1 << kSrcBits) - 1;
+#if CONFIG_DEBUG
+  p_hash_table->has_content = 1;
+#endif
 
   for (int x_pos = 0; x_pos < x_end; x_pos++) {
     for (int y_pos = 0; y_pos < y_end; y_pos++) {
@@ -409,8 +411,8 @@
   uint32_t to_hash[4];
   int add_value = hash_block_size_to_index(block_size);
   assert(add_value >= 0);
-  add_value <<= crc_bits;
-  const int crc_mask = (1 << crc_bits) - 1;
+  add_value <<= kSrcBits;
+  const int crc_mask = (1 << kSrcBits) - 1;
 
   // 2x2 subblock hash values in current CU
   int sub_block_in_width = (block_size >> 1);