av1/encoder/hash_motion.c - aom - Git at Google

 /*
  * Copyright (c) 2018, Alliance for Open Media. All rights reserved.
  *
  * This source code is subject to the terms of the BSD 2 Clause License and
  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
  * was not distributed with this source code in the LICENSE file, you can
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */

 #include <assert.h>
 #include <stdbool.h>

 #include "config/av1_rtcd.h"

 #include "av1/encoder/block.h"
 #include "av1/encoder/hash.h"
 #include "av1/encoder/hash_motion.h"

 #define kSrcBits 16
 // kMaxAddr is the number of hash table buckets in p_hash_table->p_lookup_table.
 // p_hash_table->p_lookup_table consists of 6 hash tables of 1 << kSrcBits
 // buckets each. Each of the 6 supported block sizes (4, 8, 16, 32, 64, 128) has
 // its own hash table, indexed by the return value of
 // hash_block_size_to_index().
 #define kMaxAddr (6 << kSrcBits)
 #define kMaxCandidatesPerHashBucket 256

 static void get_pixels_in_1D_char_array_by_block_2x2(const uint8_t *y_src,
                                                      int stride,
                                                      uint8_t *p_pixels_in1D) {
   const uint8_t *p_pel = y_src;
   int index = 0;
   for (int i = 0; i < 2; i++) {
     for (int j = 0; j < 2; j++) {
       p_pixels_in1D[index++] = p_pel[j];
     }
     p_pel += stride;
   }
 }

 static void get_pixels_in_1D_short_array_by_block_2x2(const uint16_t *y_src,
                                                       int stride,
                                                       uint16_t *p_pixels_in1D) {
   const uint16_t *p_pel = y_src;
   int index = 0;
   for (int i = 0; i < 2; i++) {
     for (int j = 0; j < 2; j++) {
       p_pixels_in1D[index++] = p_pel[j];
     }
     p_pel += stride;
   }
 }

 // the hash value (hash_value1) consists of two parts, the first 3 bits relate
 // to the block size and the remaining 16 bits are the crc values. This
 // function is used to get the first 3 bits.
 static int hash_block_size_to_index(int block_size) {
   switch (block_size) {
     case 4: return 0;
     case 8: return 1;
     case 16: return 2;
     case 32: return 3;
     case 64: return 4;
     case 128: return 5;
     default: return -1;
   }
 }

 static uint32_t get_identity_hash_value(const uint8_t a, const uint8_t b,
                                         const uint8_t c, const uint8_t d) {
   // The four input values add up to 32 bits, which is the size of the output.
   // Just pack those values as is.
   return ((uint32_t)a << 24) + ((uint32_t)b << 16) + ((uint32_t)c << 8) +
          ((uint32_t)d);
 }

 static uint32_t get_xor_hash_value_hbd(const uint16_t a, const uint16_t b,
                                        const uint16_t c, const uint16_t d) {
   uint32_t result;
   // Pack the lower 8 bits of each input value to the 32 bit output, then xor
   // with the upper 8 bits of each input value.
   result = ((uint32_t)(a & 0x00ff) << 24) + ((uint32_t)(b & 0x00ff) << 16) +
            ((uint32_t)(c & 0x00ff) << 8) + ((uint32_t)(d & 0x00ff));
   result ^= ((uint32_t)(a & 0xff00) << 16) + ((uint32_t)(b & 0xff00) << 8) +
             ((uint32_t)(c & 0xff00)) + ((uint32_t)(d & 0xff00) >> 8);
   return result;
 }

 void av1_hash_table_init(IntraBCHashInfo *intrabc_hash_info) {
   if (!intrabc_hash_info->crc_initialized) {
     av1_crc32c_calculator_init(&intrabc_hash_info->crc_calculator);
     intrabc_hash_info->crc_initialized = 1;
   }
   intrabc_hash_info->intrabc_hash_table.p_lookup_table = NULL;
 }

 static void clear_all(hash_table *p_hash_table) {
   if (p_hash_table->p_lookup_table == NULL) {
     return;
   }
   for (int i = 0; i < kMaxAddr; i++) {
     if (p_hash_table->p_lookup_table[i] != NULL) {
       aom_vector_destroy(p_hash_table->p_lookup_table[i]);
       aom_free(p_hash_table->p_lookup_table[i]);
       p_hash_table->p_lookup_table[i] = NULL;
     }
   }
 }

 void av1_hash_table_destroy(hash_table *p_hash_table) {
   clear_all(p_hash_table);
   aom_free(p_hash_table->p_lookup_table);
   p_hash_table->p_lookup_table = NULL;
 }

 bool av1_hash_table_create(hash_table *p_hash_table) {
   if (p_hash_table->p_lookup_table != NULL) {
     clear_all(p_hash_table);
     return true;
   }
   p_hash_table->p_lookup_table =
       (Vector **)aom_calloc(kMaxAddr, sizeof(p_hash_table->p_lookup_table[0]));
   if (!p_hash_table->p_lookup_table) return false;
   return true;
 }

 static bool hash_table_add_to_table(hash_table *p_hash_table,
                                     uint32_t hash_value,
                                     const block_hash *curr_block_hash) {
   if (p_hash_table->p_lookup_table[hash_value] == NULL) {
     p_hash_table->p_lookup_table[hash_value] =
         aom_malloc(sizeof(*p_hash_table->p_lookup_table[hash_value]));
     if (p_hash_table->p_lookup_table[hash_value] == NULL) {
       return false;
     }
     if (aom_vector_setup(p_hash_table->p_lookup_table[hash_value], 10,
                          sizeof(*curr_block_hash)) == VECTOR_ERROR)
       return false;
   }
   // Place an upper bound each hash table bucket to up to 256 intrabc
   // block candidates, and ignore subsequent ones. Considering more can
   // unnecessarily slow down encoding for virtually no efficiency gain.
   if (aom_vector_byte_size(p_hash_table->p_lookup_table[hash_value]) <
       kMaxCandidatesPerHashBucket * sizeof(*curr_block_hash)) {
     if (aom_vector_push_back(p_hash_table->p_lookup_table[hash_value],
                              (void *)curr_block_hash) == VECTOR_ERROR)
       return false;
   }
   return true;
 }

 int32_t av1_hash_table_count(const hash_table *p_hash_table,
                              uint32_t hash_value) {
   if (p_hash_table->p_lookup_table[hash_value] == NULL) {
     return 0;
   } else {
     return (int32_t)(p_hash_table->p_lookup_table[hash_value]->size);
   }
 }

 Iterator av1_hash_get_first_iterator(hash_table *p_hash_table,
                                      uint32_t hash_value) {
   assert(av1_hash_table_count(p_hash_table, hash_value) > 0);
   return aom_vector_begin(p_hash_table->p_lookup_table[hash_value]);
 }

 void av1_generate_block_2x2_hash_value(const YV12_BUFFER_CONFIG *picture,
                                        uint32_t *pic_block_hash) {
   const int width = 2;
   const int height = 2;
   const int x_end = picture->y_crop_width - width + 1;
   const int y_end = picture->y_crop_height - height + 1;

   if (picture->flags & YV12_FLAG_HIGHBITDEPTH) {
     uint16_t p[4];
     int pos = 0;
     for (int y_pos = 0; y_pos < y_end; y_pos++) {
       for (int x_pos = 0; x_pos < x_end; x_pos++) {
         get_pixels_in_1D_short_array_by_block_2x2(
             CONVERT_TO_SHORTPTR(picture->y_buffer) + y_pos * picture->y_stride +
                 x_pos,
             picture->y_stride, p);
         // For HBD, we either have 40 or 48 bits of input data that the xor hash
         // reduce to 32 bits. We intentionally don't want to "discard" bits to
         // avoid any kind of biasing.
         pic_block_hash[pos] = get_xor_hash_value_hbd(p[0], p[1], p[2], p[3]);
         pos++;
       }
       pos += width - 1;
     }
   } else {
     uint8_t p[4];
     int pos = 0;
     for (int y_pos = 0; y_pos < y_end; y_pos++) {
       for (int x_pos = 0; x_pos < x_end; x_pos++) {
         get_pixels_in_1D_char_array_by_block_2x2(
             picture->y_buffer + y_pos * picture->y_stride + x_pos,
             picture->y_stride, p);
         // This 2x2 hash isn't used directly as a "key" for the hash table, so
         // we can afford to just copy the 4 8-bit pixel values as a single
         // 32-bit value directly. (i.e. there are no concerns of a lack of
         // uniform distribution)
         pic_block_hash[pos] = get_identity_hash_value(p[0], p[1], p[2], p[3]);
         pos++;
       }
       pos += width - 1;
     }
   }
 }

 void av1_generate_block_hash_value(IntraBCHashInfo *intrabc_hash_info,
                                    const YV12_BUFFER_CONFIG *picture,
                                    int block_size,
                                    const uint32_t *src_pic_block_hash,
                                    uint32_t *dst_pic_block_hash) {
   CRC32C *calc = &intrabc_hash_info->crc_calculator;

   const int pic_width = picture->y_crop_width;
   const int x_end = picture->y_crop_width - block_size + 1;
   const int y_end = picture->y_crop_height - block_size + 1;
   const int src_size = block_size >> 1;

   uint32_t p[4];
   const int length = sizeof(p);

   int pos = 0;
   for (int y_pos = 0; y_pos < y_end; y_pos++) {
     for (int x_pos = 0; x_pos < x_end; x_pos++) {
       // Build up a bigger block from 4 smaller, non-overlapping source block
       // hashes, and compute its hash. Note: source blocks at the right and
       // bottom borders cannot be part of larger blocks, therefore they won't be
       // considered into the block hash value generation process.
       p[0] = src_pic_block_hash[pos];
       p[1] = src_pic_block_hash[pos + src_size];
       p[2] = src_pic_block_hash[pos + src_size * pic_width];
       p[3] = src_pic_block_hash[pos + src_size * pic_width + src_size];
       // TODO: bug aomedia:433531610 - serialize input values in a way that's
       // independent of the computer architecture's endianness
       dst_pic_block_hash[pos] =
           av1_get_crc32c_value(calc, (uint8_t *)p, length);
       pos++;
     }
     pos += block_size - 1;
   }
 }

 bool av1_add_to_hash_map_by_row_with_precal_data(hash_table *p_hash_table,
                                                  const uint32_t *pic_hash,
                                                  int pic_width, int pic_height,
                                                  int block_size) {
   const int x_end = pic_width - block_size + 1;
   const int y_end = pic_height - block_size + 1;

   int add_value = hash_block_size_to_index(block_size);
   assert(add_value >= 0);
   add_value <<= kSrcBits;
   const int crc_mask = (1 << kSrcBits) - 1;
   int step = block_size;
   int x_offset = 0;
   int y_offset = 0;

   // Explore the entire frame hierarchically to add intrabc candidate blocks to
   // the hash table, by starting with coarser steps (the block size), towards
   // finer-grained steps until every candidate block has been considered.
   // The nested for loop goes through the pic_hash array column by column.

   // Doing a hierarchical block exploration helps maximize spatial dispersion
   // of the first and foremost candidate blocks while minimizing overlap between
   // them. This is helpful because we only keep up to 256 entries of the
   // same candidate block (located in different places), so we want those
   // entries to cover the biggest area of the image to encode to maximize coding
   // efficiency.

   // This is the coordinate exploration order example for an 8x8 region, with
   // block_size = 4. The top-left corner (x, y) coordinates of each candidate
   // block are shown below. There are 5 * 5 (25) candidate blocks.
   //    x  0  1  2  3  4  5  6  7
   //  y +------------------------
   //  0 |  1 10  5 13  3
   //  1 | 16 22 18 24 20
   //  2 |  7 11  9 14  8
   //  3 | 17 23 19 25 21
   //  4 |  2 12  6 15  4--------+
   //  5 |              | 4 x 4  |
   //  6 |              | block  |
   //  7 |              +--------+

   // Please note that due to the way block exploration works, the smallest step
   // used is 2 (i.e. no two adjacent blocks will be explored consecutively).
   // Also, the exploration is designed to visit each block candidate only once.
   while (step > 1) {
     for (int x_pos = x_offset; x_pos < x_end; x_pos += step) {
       for (int y_pos = y_offset; y_pos < y_end; y_pos += step) {
         const int pos = y_pos * pic_width + x_pos;
         block_hash curr_block_hash;

         curr_block_hash.x = x_pos;
         curr_block_hash.y = y_pos;

         const uint32_t hash_value1 = (pic_hash[pos] & crc_mask) + add_value;
         curr_block_hash.hash_value2 = pic_hash[pos];

         if (!hash_table_add_to_table(p_hash_table, hash_value1,
                                      &curr_block_hash)) {
           return false;
         }
       }
     }

     // Adjust offsets and step sizes with this state machine.
     // State 0 is needed because no blocks in pic_hash have been explored,
     // so exploration requires a way to account for blocks with both zero
     // x_offset and zero y_offset.
     // State 0 is always meant to be executed first, but the relative order of
     // states 1, 2 and 3 can be arbitrary, as long as no two adjacent blocks
     // are explored consecutively.
     if (x_offset == 0 && y_offset == 0) {
       // State 0 -> State 1: special case
       // This state transition will only execute when step == block_size
       x_offset = step / 2;
     } else if (x_offset == step / 2 && y_offset == 0) {
       // State 1 -> State 2
       x_offset = 0;
       y_offset = step / 2;
     } else if (x_offset == 0 && y_offset == step / 2) {
       // State 2 -> State 3
       x_offset = step / 2;
     } else {
       assert(x_offset == step / 2 && y_offset == step / 2);
       // State 3 -> State 1: We've fully explored all the coordinates for the
       // current step size, continue by halving the step size
       step /= 2;
       x_offset = step / 2;
       y_offset = 0;
     }
   }

   return true;
 }

 int av1_hash_is_horizontal_perfect(const YV12_BUFFER_CONFIG *picture,
                                    int block_size, int x_start, int y_start) {
   const int stride = picture->y_stride;
   const uint8_t *p = picture->y_buffer + y_start * stride + x_start;

   if (picture->flags & YV12_FLAG_HIGHBITDEPTH) {
     const uint16_t *p16 = CONVERT_TO_SHORTPTR(p);
     for (int i = 0; i < block_size; i++) {
       for (int j = 1; j < block_size; j++) {
         if (p16[j] != p16[0]) {
           return 0;
         }
       }
       p16 += stride;
     }
   } else {
     for (int i = 0; i < block_size; i++) {
       for (int j = 1; j < block_size; j++) {
         if (p[j] != p[0]) {
           return 0;
         }
       }
       p += stride;
     }
   }

   return 1;
 }

 int av1_hash_is_vertical_perfect(const YV12_BUFFER_CONFIG *picture,
                                  int block_size, int x_start, int y_start) {
   const int stride = picture->y_stride;
   const uint8_t *p = picture->y_buffer + y_start * stride + x_start;

   if (picture->flags & YV12_FLAG_HIGHBITDEPTH) {
     const uint16_t *p16 = CONVERT_TO_SHORTPTR(p);
     for (int i = 0; i < block_size; i++) {
       for (int j = 1; j < block_size; j++) {
         if (p16[j * stride + i] != p16[i]) {
           return 0;
         }
       }
     }
   } else {
     for (int i = 0; i < block_size; i++) {
       for (int j = 1; j < block_size; j++) {
         if (p[j * stride + i] != p[i]) {
           return 0;
         }
       }
     }
   }
   return 1;
 }

 void av1_get_block_hash_value(IntraBCHashInfo *intra_bc_hash_info,
                               const uint8_t *y_src, int stride, int block_size,
                               uint32_t *hash_value1, uint32_t *hash_value2,
                               int use_highbitdepth) {
   int add_value = hash_block_size_to_index(block_size);
   assert(add_value >= 0);
   add_value <<= kSrcBits;
   const int crc_mask = (1 << kSrcBits) - 1;

   CRC32C *calc = &intra_bc_hash_info->crc_calculator;
   uint32_t **buf = intra_bc_hash_info->hash_value_buffer;

   // 2x2 subblock hash values in current CU
   int sub_block_in_width = (block_size >> 1);
   if (use_highbitdepth) {
     uint16_t pixel_to_hash[4];
     uint16_t *y16_src = CONVERT_TO_SHORTPTR(y_src);
     for (int y_pos = 0; y_pos < block_size; y_pos += 2) {
       for (int x_pos = 0; x_pos < block_size; x_pos += 2) {
         int pos = (y_pos >> 1) * sub_block_in_width + (x_pos >> 1);
         get_pixels_in_1D_short_array_by_block_2x2(
             y16_src + y_pos * stride + x_pos, stride, pixel_to_hash);
         assert(pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
         // For HBD, we either have 40 or 48 bits of input data that the xor hash
         // reduce to 32 bits. We intentionally don't want to "discard" bits to
         // avoid any kind of biasing.
         buf[0][pos] =
             get_xor_hash_value_hbd(pixel_to_hash[0], pixel_to_hash[1],
                                    pixel_to_hash[2], pixel_to_hash[3]);
       }
     }
   } else {
     uint8_t pixel_to_hash[4];
     for (int y_pos = 0; y_pos < block_size; y_pos += 2) {
       for (int x_pos = 0; x_pos < block_size; x_pos += 2) {
         int pos = (y_pos >> 1) * sub_block_in_width + (x_pos >> 1);
         get_pixels_in_1D_char_array_by_block_2x2(y_src + y_pos * stride + x_pos,
                                                  stride, pixel_to_hash);
         assert(pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
         // This 2x2 hash isn't used directly as a "key" for the hash table, so
         // we can afford to just copy the 4 8-bit pixel values as a single
         // 32-bit value directly. (i.e. there are no concerns of a lack of
         // uniform distribution)
         buf[0][pos] =
             get_identity_hash_value(pixel_to_hash[0], pixel_to_hash[1],
                                     pixel_to_hash[2], pixel_to_hash[3]);
       }
     }
   }

   int src_sub_block_in_width = sub_block_in_width;
   sub_block_in_width >>= 1;

   int src_idx = 0;
   int dst_idx = !src_idx;

   // 4x4 subblock hash values to current block hash values
   uint32_t to_hash[4];
   for (int sub_width = 4; sub_width <= block_size;
        sub_width *= 2, src_idx = !src_idx) {
     dst_idx = !src_idx;

     int dst_pos = 0;
     for (int y_pos = 0; y_pos < sub_block_in_width; y_pos++) {
       for (int x_pos = 0; x_pos < sub_block_in_width; x_pos++) {
         int srcPos = (y_pos << 1) * src_sub_block_in_width + (x_pos << 1);

         assert(srcPos + 1 < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
         assert(srcPos + src_sub_block_in_width + 1 <
                AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
         assert(dst_pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);

         to_hash[0] = buf[src_idx][srcPos];
         to_hash[1] = buf[src_idx][srcPos + 1];
         to_hash[2] = buf[src_idx][srcPos + src_sub_block_in_width];
         to_hash[3] = buf[src_idx][srcPos + src_sub_block_in_width + 1];

         // TODO: bug aomedia:433531610 - serialize input values in a way that's
         // independent of the computer architecture's endianness
         buf[dst_idx][dst_pos] =
             av1_get_crc32c_value(calc, (uint8_t *)to_hash, sizeof(to_hash));
         dst_pos++;
       }
     }

     src_sub_block_in_width = sub_block_in_width;
     sub_block_in_width >>= 1;
   }

   *hash_value1 = (buf[dst_idx][0] & crc_mask) + add_value;
   *hash_value2 = buf[dst_idx][0];
 }
	/*
	* Copyright (c) 2018, Alliance for Open Media. All rights reserved.
	*
	* This source code is subject to the terms of the BSD 2 Clause License and
	* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
	* was not distributed with this source code in the LICENSE file, you can
	* obtain it at www.aomedia.org/license/software. If the Alliance for Open
	* Media Patent License 1.0 was not distributed with this source code in the
	* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
	*/

	#include <assert.h>
	#include <stdbool.h>

	#include "config/av1_rtcd.h"

	#include "av1/encoder/block.h"
	#include "av1/encoder/hash.h"
	#include "av1/encoder/hash_motion.h"

	#define kSrcBits 16
	// kMaxAddr is the number of hash table buckets in p_hash_table->p_lookup_table.
	// p_hash_table->p_lookup_table consists of 6 hash tables of 1 << kSrcBits
	// buckets each. Each of the 6 supported block sizes (4, 8, 16, 32, 64, 128) has
	// its own hash table, indexed by the return value of
	// hash_block_size_to_index().
	#define kMaxAddr (6 << kSrcBits)
	#define kMaxCandidatesPerHashBucket 256

	static void get_pixels_in_1D_char_array_by_block_2x2(const uint8_t *y_src,
	int stride,
	uint8_t *p_pixels_in1D) {
	const uint8_t *p_pel = y_src;
	int index = 0;
	for (int i = 0; i < 2; i++) {
	for (int j = 0; j < 2; j++) {
	p_pixels_in1D[index++] = p_pel[j];
	}
	p_pel += stride;
	}
	}

	static void get_pixels_in_1D_short_array_by_block_2x2(const uint16_t *y_src,
	int stride,
	uint16_t *p_pixels_in1D) {
	const uint16_t *p_pel = y_src;
	int index = 0;
	for (int i = 0; i < 2; i++) {
	for (int j = 0; j < 2; j++) {
	p_pixels_in1D[index++] = p_pel[j];
	}
	p_pel += stride;
	}
	}

	// the hash value (hash_value1) consists of two parts, the first 3 bits relate
	// to the block size and the remaining 16 bits are the crc values. This
	// function is used to get the first 3 bits.
	static int hash_block_size_to_index(int block_size) {
	switch (block_size) {
	case 4: return 0;
	case 8: return 1;
	case 16: return 2;
	case 32: return 3;
	case 64: return 4;
	case 128: return 5;
	default: return -1;
	}
	}

	static uint32_t get_identity_hash_value(const uint8_t a, const uint8_t b,
	const uint8_t c, const uint8_t d) {
	// The four input values add up to 32 bits, which is the size of the output.
	// Just pack those values as is.
	return ((uint32_t)a << 24) + ((uint32_t)b << 16) + ((uint32_t)c << 8) +
	((uint32_t)d);
	}

	static uint32_t get_xor_hash_value_hbd(const uint16_t a, const uint16_t b,
	const uint16_t c, const uint16_t d) {
	uint32_t result;
	// Pack the lower 8 bits of each input value to the 32 bit output, then xor
	// with the upper 8 bits of each input value.
	result = ((uint32_t)(a & 0x00ff) << 24) + ((uint32_t)(b & 0x00ff) << 16) +
	((uint32_t)(c & 0x00ff) << 8) + ((uint32_t)(d & 0x00ff));
	result ^= ((uint32_t)(a & 0xff00) << 16) + ((uint32_t)(b & 0xff00) << 8) +
	((uint32_t)(c & 0xff00)) + ((uint32_t)(d & 0xff00) >> 8);
	return result;
	}

	void av1_hash_table_init(IntraBCHashInfo *intrabc_hash_info) {
	if (!intrabc_hash_info->crc_initialized) {
	av1_crc32c_calculator_init(&intrabc_hash_info->crc_calculator);
	intrabc_hash_info->crc_initialized = 1;
	}
	intrabc_hash_info->intrabc_hash_table.p_lookup_table = NULL;
	}

	static void clear_all(hash_table *p_hash_table) {
	if (p_hash_table->p_lookup_table == NULL) {
	return;
	}
	for (int i = 0; i < kMaxAddr; i++) {
	if (p_hash_table->p_lookup_table[i] != NULL) {
	aom_vector_destroy(p_hash_table->p_lookup_table[i]);
	aom_free(p_hash_table->p_lookup_table[i]);
	p_hash_table->p_lookup_table[i] = NULL;
	}
	}
	}

	void av1_hash_table_destroy(hash_table *p_hash_table) {
	clear_all(p_hash_table);
	aom_free(p_hash_table->p_lookup_table);
	p_hash_table->p_lookup_table = NULL;
	}

	bool av1_hash_table_create(hash_table *p_hash_table) {
	if (p_hash_table->p_lookup_table != NULL) {
	clear_all(p_hash_table);
	return true;
	}
	p_hash_table->p_lookup_table =
	(Vector **)aom_calloc(kMaxAddr, sizeof(p_hash_table->p_lookup_table[0]));
	if (!p_hash_table->p_lookup_table) return false;
	return true;
	}

	static bool hash_table_add_to_table(hash_table *p_hash_table,
	uint32_t hash_value,
	const block_hash *curr_block_hash) {
	if (p_hash_table->p_lookup_table[hash_value] == NULL) {
	p_hash_table->p_lookup_table[hash_value] =
	aom_malloc(sizeof(*p_hash_table->p_lookup_table[hash_value]));
	if (p_hash_table->p_lookup_table[hash_value] == NULL) {
	return false;
	}
	if (aom_vector_setup(p_hash_table->p_lookup_table[hash_value], 10,
	sizeof(*curr_block_hash)) == VECTOR_ERROR)
	return false;
	}
	// Place an upper bound each hash table bucket to up to 256 intrabc
	// block candidates, and ignore subsequent ones. Considering more can
	// unnecessarily slow down encoding for virtually no efficiency gain.
	if (aom_vector_byte_size(p_hash_table->p_lookup_table[hash_value]) <
	kMaxCandidatesPerHashBucket * sizeof(*curr_block_hash)) {
	if (aom_vector_push_back(p_hash_table->p_lookup_table[hash_value],
	(void *)curr_block_hash) == VECTOR_ERROR)
	return false;
	}
	return true;
	}

	int32_t av1_hash_table_count(const hash_table *p_hash_table,
	uint32_t hash_value) {
	if (p_hash_table->p_lookup_table[hash_value] == NULL) {
	return 0;
	} else {
	return (int32_t)(p_hash_table->p_lookup_table[hash_value]->size);
	}
	}

	Iterator av1_hash_get_first_iterator(hash_table *p_hash_table,
	uint32_t hash_value) {
	assert(av1_hash_table_count(p_hash_table, hash_value) > 0);
	return aom_vector_begin(p_hash_table->p_lookup_table[hash_value]);
	}

	void av1_generate_block_2x2_hash_value(const YV12_BUFFER_CONFIG *picture,
	uint32_t *pic_block_hash) {
	const int width = 2;
	const int height = 2;
	const int x_end = picture->y_crop_width - width + 1;
	const int y_end = picture->y_crop_height - height + 1;

	if (picture->flags & YV12_FLAG_HIGHBITDEPTH) {
	uint16_t p[4];
	int pos = 0;
	for (int y_pos = 0; y_pos < y_end; y_pos++) {
	for (int x_pos = 0; x_pos < x_end; x_pos++) {
	get_pixels_in_1D_short_array_by_block_2x2(
	CONVERT_TO_SHORTPTR(picture->y_buffer) + y_pos * picture->y_stride +
	x_pos,
	picture->y_stride, p);
	// For HBD, we either have 40 or 48 bits of input data that the xor hash
	// reduce to 32 bits. We intentionally don't want to "discard" bits to
	// avoid any kind of biasing.
	pic_block_hash[pos] = get_xor_hash_value_hbd(p[0], p[1], p[2], p[3]);
	pos++;
	}
	pos += width - 1;
	}
	} else {
	uint8_t p[4];
	int pos = 0;
	for (int y_pos = 0; y_pos < y_end; y_pos++) {
	for (int x_pos = 0; x_pos < x_end; x_pos++) {
	get_pixels_in_1D_char_array_by_block_2x2(
	picture->y_buffer + y_pos * picture->y_stride + x_pos,
	picture->y_stride, p);
	// This 2x2 hash isn't used directly as a "key" for the hash table, so
	// we can afford to just copy the 4 8-bit pixel values as a single
	// 32-bit value directly. (i.e. there are no concerns of a lack of
	// uniform distribution)
	pic_block_hash[pos] = get_identity_hash_value(p[0], p[1], p[2], p[3]);
	pos++;
	}
	pos += width - 1;
	}
	}
	}

	void av1_generate_block_hash_value(IntraBCHashInfo *intrabc_hash_info,
	const YV12_BUFFER_CONFIG *picture,
	int block_size,
	const uint32_t *src_pic_block_hash,
	uint32_t *dst_pic_block_hash) {
	CRC32C *calc = &intrabc_hash_info->crc_calculator;

	const int pic_width = picture->y_crop_width;
	const int x_end = picture->y_crop_width - block_size + 1;
	const int y_end = picture->y_crop_height - block_size + 1;
	const int src_size = block_size >> 1;

	uint32_t p[4];
	const int length = sizeof(p);

	int pos = 0;
	for (int y_pos = 0; y_pos < y_end; y_pos++) {
	for (int x_pos = 0; x_pos < x_end; x_pos++) {
	// Build up a bigger block from 4 smaller, non-overlapping source block
	// hashes, and compute its hash. Note: source blocks at the right and
	// bottom borders cannot be part of larger blocks, therefore they won't be
	// considered into the block hash value generation process.
	p[0] = src_pic_block_hash[pos];
	p[1] = src_pic_block_hash[pos + src_size];
	p[2] = src_pic_block_hash[pos + src_size * pic_width];
	p[3] = src_pic_block_hash[pos + src_size * pic_width + src_size];
	// TODO: bug aomedia:433531610 - serialize input values in a way that's
	// independent of the computer architecture's endianness
	dst_pic_block_hash[pos] =
	av1_get_crc32c_value(calc, (uint8_t *)p, length);
	pos++;
	}
	pos += block_size - 1;
	}
	}

	bool av1_add_to_hash_map_by_row_with_precal_data(hash_table *p_hash_table,
	const uint32_t *pic_hash,
	int pic_width, int pic_height,
	int block_size) {
	const int x_end = pic_width - block_size + 1;
	const int y_end = pic_height - block_size + 1;

	int add_value = hash_block_size_to_index(block_size);
	assert(add_value >= 0);
	add_value <<= kSrcBits;
	const int crc_mask = (1 << kSrcBits) - 1;
	int step = block_size;
	int x_offset = 0;
	int y_offset = 0;

	// Explore the entire frame hierarchically to add intrabc candidate blocks to
	// the hash table, by starting with coarser steps (the block size), towards
	// finer-grained steps until every candidate block has been considered.
	// The nested for loop goes through the pic_hash array column by column.

	// Doing a hierarchical block exploration helps maximize spatial dispersion
	// of the first and foremost candidate blocks while minimizing overlap between
	// them. This is helpful because we only keep up to 256 entries of the
	// same candidate block (located in different places), so we want those
	// entries to cover the biggest area of the image to encode to maximize coding
	// efficiency.

	// This is the coordinate exploration order example for an 8x8 region, with
	// block_size = 4. The top-left corner (x, y) coordinates of each candidate
	// block are shown below. There are 5 * 5 (25) candidate blocks.
	// x 0 1 2 3 4 5 6 7
	// y +------------------------
	// 0 \| 1 10 5 13 3
	// 1 \| 16 22 18 24 20
	// 2 \| 7 11 9 14 8
	// 3 \| 17 23 19 25 21
	// 4 \| 2 12 6 15 4--------+
	// 5 \| \| 4 x 4 \|
	// 6 \| \| block \|
	// 7 \| +--------+

	// Please note that due to the way block exploration works, the smallest step
	// used is 2 (i.e. no two adjacent blocks will be explored consecutively).
	// Also, the exploration is designed to visit each block candidate only once.
	while (step > 1) {
	for (int x_pos = x_offset; x_pos < x_end; x_pos += step) {
	for (int y_pos = y_offset; y_pos < y_end; y_pos += step) {
	const int pos = y_pos * pic_width + x_pos;
	block_hash curr_block_hash;

	curr_block_hash.x = x_pos;
	curr_block_hash.y = y_pos;

	const uint32_t hash_value1 = (pic_hash[pos] & crc_mask) + add_value;
	curr_block_hash.hash_value2 = pic_hash[pos];

	if (!hash_table_add_to_table(p_hash_table, hash_value1,
	&curr_block_hash)) {
	return false;
	}
	}
	}

	// Adjust offsets and step sizes with this state machine.
	// State 0 is needed because no blocks in pic_hash have been explored,
	// so exploration requires a way to account for blocks with both zero
	// x_offset and zero y_offset.
	// State 0 is always meant to be executed first, but the relative order of
	// states 1, 2 and 3 can be arbitrary, as long as no two adjacent blocks
	// are explored consecutively.
	if (x_offset == 0 && y_offset == 0) {
	// State 0 -> State 1: special case
	// This state transition will only execute when step == block_size
	x_offset = step / 2;
	} else if (x_offset == step / 2 && y_offset == 0) {
	// State 1 -> State 2
	x_offset = 0;
	y_offset = step / 2;
	} else if (x_offset == 0 && y_offset == step / 2) {
	// State 2 -> State 3
	x_offset = step / 2;
	} else {
	assert(x_offset == step / 2 && y_offset == step / 2);
	// State 3 -> State 1: We've fully explored all the coordinates for the
	// current step size, continue by halving the step size
	step /= 2;
	x_offset = step / 2;
	y_offset = 0;
	}
	}

	return true;
	}

	int av1_hash_is_horizontal_perfect(const YV12_BUFFER_CONFIG *picture,
	int block_size, int x_start, int y_start) {
	const int stride = picture->y_stride;
	const uint8_t p = picture->y_buffer + y_start stride + x_start;

	if (picture->flags & YV12_FLAG_HIGHBITDEPTH) {
	const uint16_t *p16 = CONVERT_TO_SHORTPTR(p);
	for (int i = 0; i < block_size; i++) {
	for (int j = 1; j < block_size; j++) {
	if (p16[j] != p16[0]) {
	return 0;
	}
	}
	p16 += stride;
	}
	} else {
	for (int i = 0; i < block_size; i++) {
	for (int j = 1; j < block_size; j++) {
	if (p[j] != p[0]) {
	return 0;
	}
	}
	p += stride;
	}
	}

	return 1;
	}

	int av1_hash_is_vertical_perfect(const YV12_BUFFER_CONFIG *picture,
	int block_size, int x_start, int y_start) {
	const int stride = picture->y_stride;
	const uint8_t p = picture->y_buffer + y_start stride + x_start;

	if (picture->flags & YV12_FLAG_HIGHBITDEPTH) {
	const uint16_t *p16 = CONVERT_TO_SHORTPTR(p);
	for (int i = 0; i < block_size; i++) {
	for (int j = 1; j < block_size; j++) {
	if (p16[j * stride + i] != p16[i]) {
	return 0;
	}
	}
	}
	} else {
	for (int i = 0; i < block_size; i++) {
	for (int j = 1; j < block_size; j++) {
	if (p[j * stride + i] != p[i]) {
	return 0;
	}
	}
	}
	}
	return 1;
	}

	void av1_get_block_hash_value(IntraBCHashInfo *intra_bc_hash_info,
	const uint8_t *y_src, int stride, int block_size,
	uint32_t hash_value1, uint32_t hash_value2,
	int use_highbitdepth) {
	int add_value = hash_block_size_to_index(block_size);
	assert(add_value >= 0);
	add_value <<= kSrcBits;
	const int crc_mask = (1 << kSrcBits) - 1;

	CRC32C *calc = &intra_bc_hash_info->crc_calculator;
	uint32_t **buf = intra_bc_hash_info->hash_value_buffer;

	// 2x2 subblock hash values in current CU
	int sub_block_in_width = (block_size >> 1);
	if (use_highbitdepth) {
	uint16_t pixel_to_hash[4];
	uint16_t *y16_src = CONVERT_TO_SHORTPTR(y_src);
	for (int y_pos = 0; y_pos < block_size; y_pos += 2) {
	for (int x_pos = 0; x_pos < block_size; x_pos += 2) {
	int pos = (y_pos >> 1) * sub_block_in_width + (x_pos >> 1);
	get_pixels_in_1D_short_array_by_block_2x2(
	y16_src + y_pos * stride + x_pos, stride, pixel_to_hash);
	assert(pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
	// For HBD, we either have 40 or 48 bits of input data that the xor hash
	// reduce to 32 bits. We intentionally don't want to "discard" bits to
	// avoid any kind of biasing.
	buf[0][pos] =
	get_xor_hash_value_hbd(pixel_to_hash[0], pixel_to_hash[1],
	pixel_to_hash[2], pixel_to_hash[3]);
	}
	}
	} else {
	uint8_t pixel_to_hash[4];
	for (int y_pos = 0; y_pos < block_size; y_pos += 2) {
	for (int x_pos = 0; x_pos < block_size; x_pos += 2) {
	int pos = (y_pos >> 1) * sub_block_in_width + (x_pos >> 1);
	get_pixels_in_1D_char_array_by_block_2x2(y_src + y_pos * stride + x_pos,
	stride, pixel_to_hash);
	assert(pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
	// This 2x2 hash isn't used directly as a "key" for the hash table, so
	// we can afford to just copy the 4 8-bit pixel values as a single
	// 32-bit value directly. (i.e. there are no concerns of a lack of
	// uniform distribution)
	buf[0][pos] =
	get_identity_hash_value(pixel_to_hash[0], pixel_to_hash[1],
	pixel_to_hash[2], pixel_to_hash[3]);
	}
	}
	}

	int src_sub_block_in_width = sub_block_in_width;
	sub_block_in_width >>= 1;

	int src_idx = 0;
	int dst_idx = !src_idx;

	// 4x4 subblock hash values to current block hash values
	uint32_t to_hash[4];
	for (int sub_width = 4; sub_width <= block_size;
	sub_width *= 2, src_idx = !src_idx) {
	dst_idx = !src_idx;

	int dst_pos = 0;
	for (int y_pos = 0; y_pos < sub_block_in_width; y_pos++) {
	for (int x_pos = 0; x_pos < sub_block_in_width; x_pos++) {
	int srcPos = (y_pos << 1) * src_sub_block_in_width + (x_pos << 1);

	assert(srcPos + 1 < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
	assert(srcPos + src_sub_block_in_width + 1 <
	AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
	assert(dst_pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);

	to_hash[0] = buf[src_idx][srcPos];
	to_hash[1] = buf[src_idx][srcPos + 1];
	to_hash[2] = buf[src_idx][srcPos + src_sub_block_in_width];
	to_hash[3] = buf[src_idx][srcPos + src_sub_block_in_width + 1];

	// TODO: bug aomedia:433531610 - serialize input values in a way that's
	// independent of the computer architecture's endianness
	buf[dst_idx][dst_pos] =
	av1_get_crc32c_value(calc, (uint8_t *)to_hash, sizeof(to_hash));
	dst_pos++;
	}
	}

	src_sub_block_in_width = sub_block_in_width;
	sub_block_in_width >>= 1;
	}

	*hash_value1 = (buf[dst_idx][0] & crc_mask) + add_value;
	*hash_value2 = buf[dst_idx][0];
	}