aom_dsp/pyramid.c - aom - Git at Google

 /*
  * Copyright (c) 2022, Alliance for Open Media. All rights reserved.
  *
  * This source code is subject to the terms of the BSD 2 Clause License and
  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
  * was not distributed with this source code in the LICENSE file, you can
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */

 #include "aom_dsp/pyramid.h"
 #include "aom_mem/aom_mem.h"
 #include "aom_ports/bitops.h"
 #include "aom_util/aom_pthread.h"

 // TODO(rachelbarker): Move needed code from av1/ to aom_dsp/
 #include "av1/common/resize.h"

 #include <assert.h>
 #include <string.h>

 // Lifecycle:
 // * Frame buffer alloc code calls aom_get_pyramid_alloc_size()
 //   to work out how much space is needed for a given number of pyramid
 //   levels. This is counted in the size checked against the max allocation
 //   limit
 // * Then calls aom_alloc_pyramid() to actually create the pyramid
 // * Pyramid is initially marked as containing no valid data
 // * Each pyramid layer is computed on-demand, the first time it is requested
 // * Whenever frame buffer is reused, reset the counter of filled levels.
 //   This invalidates all of the existing pyramid levels.
 // * Whenever frame buffer is resized, reallocate pyramid

 size_t aom_get_pyramid_alloc_size(int width, int height, bool image_is_16bit) {
   // Allocate the maximum possible number of layers for this width and height
   const int msb = get_msb(AOMMIN(width, height));
   const int n_levels = AOMMAX(msb - MIN_PYRAMID_SIZE_LOG2, 1);

   size_t alloc_size = 0;
   alloc_size += sizeof(ImagePyramid);
   alloc_size += n_levels * sizeof(PyramidLayer);

   // Calculate how much memory is needed for downscaled frame buffers
   size_t buffer_size = 0;

   // Work out if we need to allocate a few extra bytes for alignment.
   // aom_memalign() will ensure that the start of the allocation is aligned
   // to a multiple of PYRAMID_ALIGNMENT. But we want the first image pixel
   // to be aligned, not the first byte of the allocation.
   //
   // In the loop below, we ensure that the stride of every image is a multiple
   // of PYRAMID_ALIGNMENT. Thus the allocated size of each pyramid level will
   // also be a multiple of PYRAMID_ALIGNMENT. Thus, as long as we can get the
   // first pixel in the first pyramid layer aligned properly, that will
   // automatically mean that the first pixel of every row of every layer is
   // properly aligned too.
   //
   // Thus all we need to consider is the first pixel in the first layer.
   // This is located at offset
   //   extra_bytes + level_stride * PYRAMID_PADDING + PYRAMID_PADDING
   // bytes into the buffer. Since level_stride is a multiple of
   // PYRAMID_ALIGNMENT, we can ignore that. So we need
   //   extra_bytes + PYRAMID_PADDING = multiple of PYRAMID_ALIGNMENT
   //
   // To solve this, we can round PYRAMID_PADDING up to the next multiple
   // of PYRAMID_ALIGNMENT, then subtract the orginal value to calculate
   // how many extra bytes are needed.
   size_t first_px_offset =
       (PYRAMID_PADDING + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1);
   size_t extra_bytes = first_px_offset - PYRAMID_PADDING;
   buffer_size += extra_bytes;

   // If the original image is stored in an 8-bit buffer, then we can point the
   // lowest pyramid level at that buffer rather than allocating a new one.
   int first_allocated_level = image_is_16bit ? 0 : 1;

   for (int level = first_allocated_level; level < n_levels; level++) {
     int level_width = width >> level;
     int level_height = height >> level;

     // Allocate padding for each layer
     int padded_width = level_width + 2 * PYRAMID_PADDING;
     int padded_height = level_height + 2 * PYRAMID_PADDING;

     // Align the layer stride to be a multiple of PYRAMID_ALIGNMENT
     // This ensures that, as long as the top-left pixel in this pyramid level is
     // properly aligned, then so will the leftmost pixel in every row of the
     // pyramid level.
     int level_stride =
         (padded_width + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1);

     buffer_size += level_stride * padded_height;
   }

   alloc_size += buffer_size;

   return alloc_size;
 }

 ImagePyramid *aom_alloc_pyramid(int width, int height, bool image_is_16bit) {
   // Allocate the maximum possible number of layers for this width and height
   const int msb = get_msb(AOMMIN(width, height));
   const int n_levels = AOMMAX(msb - MIN_PYRAMID_SIZE_LOG2, 1);

   ImagePyramid *pyr = aom_calloc(1, sizeof(*pyr));
   if (!pyr) {
     return NULL;
   }

   pyr->layers = aom_calloc(n_levels, sizeof(*pyr->layers));
   if (!pyr->layers) {
     aom_free(pyr);
     return NULL;
   }

   pyr->max_levels = n_levels;
   pyr->filled_levels = 0;

   // Compute sizes and offsets for each pyramid level
   // These are gathered up first, so that we can allocate all pyramid levels
   // in a single buffer
   size_t buffer_size = 0;
   size_t *layer_offsets = aom_calloc(n_levels, sizeof(*layer_offsets));
   if (!layer_offsets) {
     aom_free(pyr->layers);
     aom_free(pyr);
     return NULL;
   }

   // Work out if we need to allocate a few extra bytes for alignment.
   // aom_memalign() will ensure that the start of the allocation is aligned
   // to a multiple of PYRAMID_ALIGNMENT. But we want the first image pixel
   // to be aligned, not the first byte of the allocation.
   //
   // In the loop below, we ensure that the stride of every image is a multiple
   // of PYRAMID_ALIGNMENT. Thus the allocated size of each pyramid level will
   // also be a multiple of PYRAMID_ALIGNMENT. Thus, as long as we can get the
   // first pixel in the first pyramid layer aligned properly, that will
   // automatically mean that the first pixel of every row of every layer is
   // properly aligned too.
   //
   // Thus all we need to consider is the first pixel in the first layer.
   // This is located at offset
   //   extra_bytes + level_stride * PYRAMID_PADDING + PYRAMID_PADDING
   // bytes into the buffer. Since level_stride is a multiple of
   // PYRAMID_ALIGNMENT, we can ignore that. So we need
   //   extra_bytes + PYRAMID_PADDING = multiple of PYRAMID_ALIGNMENT
   //
   // To solve this, we can round PYRAMID_PADDING up to the next multiple
   // of PYRAMID_ALIGNMENT, then subtract the orginal value to calculate
   // how many extra bytes are needed.
   size_t first_px_offset =
       (PYRAMID_PADDING + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1);
   size_t extra_bytes = first_px_offset - PYRAMID_PADDING;
   buffer_size += extra_bytes;

   // If the original image is stored in an 8-bit buffer, then we can point the
   // lowest pyramid level at that buffer rather than allocating a new one.
   int first_allocated_level = image_is_16bit ? 0 : 1;

   for (int level = first_allocated_level; level < n_levels; level++) {
     PyramidLayer *layer = &pyr->layers[level];

     int level_width = width >> level;
     int level_height = height >> level;

     // Allocate padding for each layer
     int padded_width = level_width + 2 * PYRAMID_PADDING;
     int padded_height = level_height + 2 * PYRAMID_PADDING;

     // Align the layer stride to be a multiple of PYRAMID_ALIGNMENT
     // This ensures that, as long as the top-left pixel in this pyramid level is
     // properly aligned, then so will the leftmost pixel in every row of the
     // pyramid level.
     int level_stride =
         (padded_width + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1);

     size_t level_alloc_start = buffer_size;
     size_t level_start =
         level_alloc_start + PYRAMID_PADDING * level_stride + PYRAMID_PADDING;

     buffer_size += level_stride * padded_height;

     layer_offsets[level] = level_start;
     layer->width = level_width;
     layer->height = level_height;
     layer->stride = level_stride;
   }

   pyr->buffer_alloc =
       aom_memalign(PYRAMID_ALIGNMENT, buffer_size * sizeof(*pyr->buffer_alloc));
   if (!pyr->buffer_alloc) {
     aom_free(pyr->layers);
     aom_free(pyr);
     aom_free(layer_offsets);
     return NULL;
   }

   // Fill in pointers for each level
   // If image is 8-bit, then the lowest level is left unconfigured for now,
   // and will be set up properly when the pyramid is filled in
   for (int level = first_allocated_level; level < n_levels; level++) {
     PyramidLayer *layer = &pyr->layers[level];
     layer->buffer = pyr->buffer_alloc + layer_offsets[level];
   }

 #if CONFIG_MULTITHREAD
   pthread_mutex_init(&pyr->mutex, NULL);
 #endif  // CONFIG_MULTITHREAD

   aom_free(layer_offsets);
   return pyr;
 }

 // Fill the border region of a pyramid frame.
 // This must be called after the main image area is filled out.
 // `img_buf` should point to the first pixel in the image area,
 // ie. it should be pyr->level_buffer + pyr->level_loc[level].
 static inline void fill_border(uint8_t *img_buf, const int width,
                                const int height, const int stride) {
   // Fill left and right areas
   for (int row = 0; row < height; row++) {
     uint8_t *row_start = &img_buf[row * stride];
     uint8_t left_pixel = row_start[0];
     memset(row_start - PYRAMID_PADDING, left_pixel, PYRAMID_PADDING);
     uint8_t right_pixel = row_start[width - 1];
     memset(row_start + width, right_pixel, PYRAMID_PADDING);
   }

   // Fill top area
   for (int row = -PYRAMID_PADDING; row < 0; row++) {
     uint8_t *row_start = &img_buf[row * stride];
     memcpy(row_start - PYRAMID_PADDING, img_buf - PYRAMID_PADDING,
            width + 2 * PYRAMID_PADDING);
   }

   // Fill bottom area
   uint8_t *last_row_start = &img_buf[(height - 1) * stride];
   for (int row = height; row < height + PYRAMID_PADDING; row++) {
     uint8_t *row_start = &img_buf[row * stride];
     memcpy(row_start - PYRAMID_PADDING, last_row_start - PYRAMID_PADDING,
            width + 2 * PYRAMID_PADDING);
   }
 }

 // Compute downsampling pyramid for a frame
 //
 // This function will ensure that the first `n_levels` levels of the pyramid
 // are filled, unless the frame is too small to have this many levels.
 // In that case, we will fill all available levels and then stop.
 //
 // Returns the actual number of levels filled, capped at n_levels,
 // or -1 on error.
 //
 // This must only be called while holding frame_pyr->mutex
 static inline int fill_pyramid(const YV12_BUFFER_CONFIG *frame, int bit_depth,
                                int n_levels, ImagePyramid *frame_pyr) {
   int already_filled_levels = frame_pyr->filled_levels;

   // This condition should already be enforced by aom_compute_pyramid
   assert(n_levels <= frame_pyr->max_levels);

   if (already_filled_levels >= n_levels) {
     return n_levels;
   }

   const int frame_width = frame->y_crop_width;
   const int frame_height = frame->y_crop_height;
   const int frame_stride = frame->y_stride;
   assert((frame_width >> n_levels) >= 0);
   assert((frame_height >> n_levels) >= 0);

   if (already_filled_levels == 0) {
     // Fill in largest level from the original image
     PyramidLayer *first_layer = &frame_pyr->layers[0];
     if (frame->flags & YV12_FLAG_HIGHBITDEPTH) {
       // For frames stored in a 16-bit buffer, we need to downconvert to 8 bits
       assert(first_layer->width == frame_width);
       assert(first_layer->height == frame_height);

       uint16_t *frame_buffer = CONVERT_TO_SHORTPTR(frame->y_buffer);
       uint8_t *pyr_buffer = first_layer->buffer;
       int pyr_stride = first_layer->stride;
       for (int y = 0; y < frame_height; y++) {
         uint16_t *frame_row = frame_buffer + y * frame_stride;
         uint8_t *pyr_row = pyr_buffer + y * pyr_stride;
         for (int x = 0; x < frame_width; x++) {
           pyr_row[x] = frame_row[x] >> (bit_depth - 8);
         }
       }

       fill_border(pyr_buffer, frame_width, frame_height, pyr_stride);
     } else {
       // For frames stored in an 8-bit buffer, we don't need to copy anything -
       // we can just reference the original image buffer
       first_layer->buffer = frame->y_buffer;
       first_layer->width = frame_width;
       first_layer->height = frame_height;
       first_layer->stride = frame_stride;
     }

     already_filled_levels = 1;
   }

   // Fill in the remaining levels through progressive downsampling
   for (int level = already_filled_levels; level < n_levels; ++level) {
     bool mem_status = false;
     PyramidLayer *prev_layer = &frame_pyr->layers[level - 1];
     uint8_t *prev_buffer = prev_layer->buffer;
     int prev_stride = prev_layer->stride;

     PyramidLayer *this_layer = &frame_pyr->layers[level];
     uint8_t *this_buffer = this_layer->buffer;
     int this_width = this_layer->width;
     int this_height = this_layer->height;
     int this_stride = this_layer->stride;

     // The width and height of the previous layer that needs to be considered to
     // derive the current layer frame.
     const int input_layer_width = this_width << 1;
     const int input_layer_height = this_height << 1;

     // Compute the this pyramid level by downsampling the current level.
     //
     // We downsample by a factor of exactly 2, clipping the rightmost and
     // bottommost pixel off of the current level if needed. We do this for
     // two main reasons:
     //
     // 1) In the disflow code, when stepping from a higher pyramid level to a
     //    lower pyramid level, we need to not just interpolate the flow field
     //    but also to scale each flow vector by the upsampling ratio.
     //    So it is much more convenient if this ratio is simply 2.
     //
     // 2) Up/downsampling by a factor of 2 can be implemented much more
     //    efficiently than up/downsampling by a generic ratio.
     //    TODO(rachelbarker): Use optimized downsample-by-2 function

     // SIMD support has been added specifically for cases where the downsample
     // factor is exactly 2. In such instances, horizontal and vertical resizing
     // is performed utilizing the down2_symeven() function, which considers the
     // even dimensions of the input layer.
     if (should_resize_by_half(input_layer_height, input_layer_width,
                               this_height, this_width)) {
       assert(input_layer_height % 2 == 0 && input_layer_width % 2 == 0 &&
              "Input width or height cannot be odd.");
       mem_status = av1_resize_plane_to_half(
           prev_buffer, input_layer_height, input_layer_width, prev_stride,
           this_buffer, this_height, this_width, this_stride);
     } else {
       mem_status = av1_resize_plane(prev_buffer, input_layer_height,
                                     input_layer_width, prev_stride, this_buffer,
                                     this_height, this_width, this_stride);
     }

     // Terminate early in cases of memory allocation failure.
     if (!mem_status) {
       frame_pyr->filled_levels = n_levels;
       return -1;
     }

     fill_border(this_buffer, this_width, this_height, this_stride);
   }

   frame_pyr->filled_levels = n_levels;
   return n_levels;
 }

 // Fill out a downsampling pyramid for a given frame.
 //
 // The top level (index 0) will always be an 8-bit copy of the input frame,
 // regardless of the input bit depth. Additional levels are then downscaled
 // by powers of 2.
 //
 // This function will ensure that the first `n_levels` levels of the pyramid
 // are filled, unless the frame is too small to have this many levels.
 // In that case, we will fill all available levels and then stop.
 // No matter how small the frame is, at least one level is guaranteed
 // to be filled.
 //
 // Returns the actual number of levels filled, capped at n_levels,
 // or -1 on error.
 int aom_compute_pyramid(const YV12_BUFFER_CONFIG *frame, int bit_depth,
                         int n_levels, ImagePyramid *pyr) {
   assert(pyr);

   // Per the comments in the ImagePyramid struct, we must take this mutex
   // before reading or writing the filled_levels field, and hold it while
   // computing any additional pyramid levels, to ensure proper behaviour
   // when multithreading is used
 #if CONFIG_MULTITHREAD
   pthread_mutex_lock(&pyr->mutex);
 #endif  // CONFIG_MULTITHREAD

   n_levels = AOMMIN(n_levels, pyr->max_levels);
   int result = n_levels;
   if (pyr->filled_levels < n_levels) {
     // Compute any missing levels that we need
     result = fill_pyramid(frame, bit_depth, n_levels, pyr);
   }

   // At this point, as long as result >= 0, the requested number of pyramid
   // levels are guaranteed to be valid, and can be safely read from without
   // holding the mutex any further
   assert(IMPLIES(result >= 0, pyr->filled_levels >= n_levels));
 #if CONFIG_MULTITHREAD
   pthread_mutex_unlock(&pyr->mutex);
 #endif  // CONFIG_MULTITHREAD
   return result;
 }

 #ifndef NDEBUG
 // Check if a pyramid has already been computed to at least n levels
 // This is mostly a debug helper - as it is necessary to hold pyr->mutex
 // while reading the number of already-computed levels, we cannot just write:
 //   assert(pyr->filled_levels >= n_levels);
 // This function allows the check to be correctly written as:
 //   assert(aom_is_pyramid_valid(pyr, n_levels));
 //
 // Note: This deliberately does not restrict n_levels based on the maximum
 // number of permitted levels for the frame size. This allows the check to
 // catch cases where the caller forgets to handle the case where
 // max_levels is less than the requested number of levels
 bool aom_is_pyramid_valid(ImagePyramid *pyr, int n_levels) {
   assert(pyr);

   // Per the comments in the ImagePyramid struct, we must take this mutex
   // before reading or writing the filled_levels field, to ensure proper
   // behaviour when multithreading is used
 #if CONFIG_MULTITHREAD
   pthread_mutex_lock(&pyr->mutex);
 #endif  // CONFIG_MULTITHREAD

   bool result = (pyr->filled_levels >= n_levels);

 #if CONFIG_MULTITHREAD
   pthread_mutex_unlock(&pyr->mutex);
 #endif  // CONFIG_MULTITHREAD

   return result;
 }
 #endif

 // Mark a pyramid as no longer containing valid data.
 // This must be done whenever the corresponding frame buffer is reused
 void aom_invalidate_pyramid(ImagePyramid *pyr) {
   if (pyr) {
 #if CONFIG_MULTITHREAD
     pthread_mutex_lock(&pyr->mutex);
 #endif  // CONFIG_MULTITHREAD
     pyr->filled_levels = 0;
 #if CONFIG_MULTITHREAD
     pthread_mutex_unlock(&pyr->mutex);
 #endif  // CONFIG_MULTITHREAD
   }
 }

 // Release the memory associated with a pyramid
 void aom_free_pyramid(ImagePyramid *pyr) {
   if (pyr) {
 #if CONFIG_MULTITHREAD
     pthread_mutex_destroy(&pyr->mutex);
 #endif  // CONFIG_MULTITHREAD
     aom_free(pyr->buffer_alloc);
     aom_free(pyr->layers);
     aom_free(pyr);
   }
 }
	/*
	* Copyright (c) 2022, Alliance for Open Media. All rights reserved.
	*
	* This source code is subject to the terms of the BSD 2 Clause License and
	* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
	* was not distributed with this source code in the LICENSE file, you can
	* obtain it at www.aomedia.org/license/software. If the Alliance for Open
	* Media Patent License 1.0 was not distributed with this source code in the
	* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
	*/

	#include "aom_dsp/pyramid.h"
	#include "aom_mem/aom_mem.h"
	#include "aom_ports/bitops.h"
	#include "aom_util/aom_pthread.h"

	// TODO(rachelbarker): Move needed code from av1/ to aom_dsp/
	#include "av1/common/resize.h"

	#include <assert.h>
	#include <string.h>

	// Lifecycle:
	// * Frame buffer alloc code calls aom_get_pyramid_alloc_size()
	// to work out how much space is needed for a given number of pyramid
	// levels. This is counted in the size checked against the max allocation
	// limit
	// * Then calls aom_alloc_pyramid() to actually create the pyramid
	// * Pyramid is initially marked as containing no valid data
	// * Each pyramid layer is computed on-demand, the first time it is requested
	// * Whenever frame buffer is reused, reset the counter of filled levels.
	// This invalidates all of the existing pyramid levels.
	// * Whenever frame buffer is resized, reallocate pyramid

	size_t aom_get_pyramid_alloc_size(int width, int height, bool image_is_16bit) {
	// Allocate the maximum possible number of layers for this width and height
	const int msb = get_msb(AOMMIN(width, height));
	const int n_levels = AOMMAX(msb - MIN_PYRAMID_SIZE_LOG2, 1);

	size_t alloc_size = 0;
	alloc_size += sizeof(ImagePyramid);
	alloc_size += n_levels * sizeof(PyramidLayer);

	// Calculate how much memory is needed for downscaled frame buffers
	size_t buffer_size = 0;

	// Work out if we need to allocate a few extra bytes for alignment.
	// aom_memalign() will ensure that the start of the allocation is aligned
	// to a multiple of PYRAMID_ALIGNMENT. But we want the first image pixel
	// to be aligned, not the first byte of the allocation.
	//
	// In the loop below, we ensure that the stride of every image is a multiple
	// of PYRAMID_ALIGNMENT. Thus the allocated size of each pyramid level will
	// also be a multiple of PYRAMID_ALIGNMENT. Thus, as long as we can get the
	// first pixel in the first pyramid layer aligned properly, that will
	// automatically mean that the first pixel of every row of every layer is
	// properly aligned too.
	//
	// Thus all we need to consider is the first pixel in the first layer.
	// This is located at offset
	// extra_bytes + level_stride * PYRAMID_PADDING + PYRAMID_PADDING
	// bytes into the buffer. Since level_stride is a multiple of
	// PYRAMID_ALIGNMENT, we can ignore that. So we need
	// extra_bytes + PYRAMID_PADDING = multiple of PYRAMID_ALIGNMENT
	//
	// To solve this, we can round PYRAMID_PADDING up to the next multiple
	// of PYRAMID_ALIGNMENT, then subtract the orginal value to calculate
	// how many extra bytes are needed.
	size_t first_px_offset =
	(PYRAMID_PADDING + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1);
	size_t extra_bytes = first_px_offset - PYRAMID_PADDING;
	buffer_size += extra_bytes;

	// If the original image is stored in an 8-bit buffer, then we can point the
	// lowest pyramid level at that buffer rather than allocating a new one.
	int first_allocated_level = image_is_16bit ? 0 : 1;

	for (int level = first_allocated_level; level < n_levels; level++) {
	int level_width = width >> level;
	int level_height = height >> level;

	// Allocate padding for each layer
	int padded_width = level_width + 2 * PYRAMID_PADDING;
	int padded_height = level_height + 2 * PYRAMID_PADDING;

	// Align the layer stride to be a multiple of PYRAMID_ALIGNMENT
	// This ensures that, as long as the top-left pixel in this pyramid level is
	// properly aligned, then so will the leftmost pixel in every row of the
	// pyramid level.
	int level_stride =
	(padded_width + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1);

	buffer_size += level_stride * padded_height;
	}

	alloc_size += buffer_size;

	return alloc_size;
	}

	ImagePyramid *aom_alloc_pyramid(int width, int height, bool image_is_16bit) {
	// Allocate the maximum possible number of layers for this width and height
	const int msb = get_msb(AOMMIN(width, height));
	const int n_levels = AOMMAX(msb - MIN_PYRAMID_SIZE_LOG2, 1);

	ImagePyramid pyr = aom_calloc(1, sizeof(pyr));
	if (!pyr) {
	return NULL;
	}

	pyr->layers = aom_calloc(n_levels, sizeof(*pyr->layers));
	if (!pyr->layers) {
	aom_free(pyr);
	return NULL;
	}

	pyr->max_levels = n_levels;
	pyr->filled_levels = 0;

	// Compute sizes and offsets for each pyramid level
	// These are gathered up first, so that we can allocate all pyramid levels
	// in a single buffer
	size_t buffer_size = 0;
	size_t layer_offsets = aom_calloc(n_levels, sizeof(layer_offsets));
	if (!layer_offsets) {
	aom_free(pyr->layers);
	aom_free(pyr);
	return NULL;
	}

	// Work out if we need to allocate a few extra bytes for alignment.
	// aom_memalign() will ensure that the start of the allocation is aligned
	// to a multiple of PYRAMID_ALIGNMENT. But we want the first image pixel
	// to be aligned, not the first byte of the allocation.
	//
	// In the loop below, we ensure that the stride of every image is a multiple
	// of PYRAMID_ALIGNMENT. Thus the allocated size of each pyramid level will
	// also be a multiple of PYRAMID_ALIGNMENT. Thus, as long as we can get the
	// first pixel in the first pyramid layer aligned properly, that will
	// automatically mean that the first pixel of every row of every layer is
	// properly aligned too.
	//
	// Thus all we need to consider is the first pixel in the first layer.
	// This is located at offset
	// extra_bytes + level_stride * PYRAMID_PADDING + PYRAMID_PADDING
	// bytes into the buffer. Since level_stride is a multiple of
	// PYRAMID_ALIGNMENT, we can ignore that. So we need
	// extra_bytes + PYRAMID_PADDING = multiple of PYRAMID_ALIGNMENT
	//
	// To solve this, we can round PYRAMID_PADDING up to the next multiple
	// of PYRAMID_ALIGNMENT, then subtract the orginal value to calculate
	// how many extra bytes are needed.
	size_t first_px_offset =
	(PYRAMID_PADDING + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1);
	size_t extra_bytes = first_px_offset - PYRAMID_PADDING;
	buffer_size += extra_bytes;

	// If the original image is stored in an 8-bit buffer, then we can point the
	// lowest pyramid level at that buffer rather than allocating a new one.
	int first_allocated_level = image_is_16bit ? 0 : 1;

	for (int level = first_allocated_level; level < n_levels; level++) {
	PyramidLayer *layer = &pyr->layers[level];

	int level_width = width >> level;
	int level_height = height >> level;

	// Allocate padding for each layer
	int padded_width = level_width + 2 * PYRAMID_PADDING;
	int padded_height = level_height + 2 * PYRAMID_PADDING;

	// Align the layer stride to be a multiple of PYRAMID_ALIGNMENT
	// This ensures that, as long as the top-left pixel in this pyramid level is
	// properly aligned, then so will the leftmost pixel in every row of the
	// pyramid level.
	int level_stride =
	(padded_width + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1);

	size_t level_alloc_start = buffer_size;
	size_t level_start =
	level_alloc_start + PYRAMID_PADDING * level_stride + PYRAMID_PADDING;

	buffer_size += level_stride * padded_height;

	layer_offsets[level] = level_start;
	layer->width = level_width;
	layer->height = level_height;
	layer->stride = level_stride;
	}

	pyr->buffer_alloc =
	aom_memalign(PYRAMID_ALIGNMENT, buffer_size * sizeof(*pyr->buffer_alloc));
	if (!pyr->buffer_alloc) {
	aom_free(pyr->layers);
	aom_free(pyr);
	aom_free(layer_offsets);
	return NULL;
	}

	// Fill in pointers for each level
	// If image is 8-bit, then the lowest level is left unconfigured for now,
	// and will be set up properly when the pyramid is filled in
	for (int level = first_allocated_level; level < n_levels; level++) {
	PyramidLayer *layer = &pyr->layers[level];
	layer->buffer = pyr->buffer_alloc + layer_offsets[level];
	}

	#if CONFIG_MULTITHREAD
	pthread_mutex_init(&pyr->mutex, NULL);
	#endif // CONFIG_MULTITHREAD

	aom_free(layer_offsets);
	return pyr;
	}

	// Fill the border region of a pyramid frame.
	// This must be called after the main image area is filled out.
	// `img_buf` should point to the first pixel in the image area,
	// ie. it should be pyr->level_buffer + pyr->level_loc[level].
	static inline void fill_border(uint8_t *img_buf, const int width,
	const int height, const int stride) {
	// Fill left and right areas
	for (int row = 0; row < height; row++) {
	uint8_t row_start = &img_buf[row stride];
	uint8_t left_pixel = row_start[0];
	memset(row_start - PYRAMID_PADDING, left_pixel, PYRAMID_PADDING);
	uint8_t right_pixel = row_start[width - 1];
	memset(row_start + width, right_pixel, PYRAMID_PADDING);
	}

	// Fill top area
	for (int row = -PYRAMID_PADDING; row < 0; row++) {
	uint8_t row_start = &img_buf[row stride];
	memcpy(row_start - PYRAMID_PADDING, img_buf - PYRAMID_PADDING,
	width + 2 * PYRAMID_PADDING);
	}

	// Fill bottom area
	uint8_t last_row_start = &img_buf[(height - 1) stride];
	for (int row = height; row < height + PYRAMID_PADDING; row++) {
	uint8_t row_start = &img_buf[row stride];
	memcpy(row_start - PYRAMID_PADDING, last_row_start - PYRAMID_PADDING,
	width + 2 * PYRAMID_PADDING);
	}
	}

	// Compute downsampling pyramid for a frame
	//
	// This function will ensure that the first `n_levels` levels of the pyramid
	// are filled, unless the frame is too small to have this many levels.
	// In that case, we will fill all available levels and then stop.
	//
	// Returns the actual number of levels filled, capped at n_levels,
	// or -1 on error.
	//
	// This must only be called while holding frame_pyr->mutex
	static inline int fill_pyramid(const YV12_BUFFER_CONFIG *frame, int bit_depth,
	int n_levels, ImagePyramid *frame_pyr) {
	int already_filled_levels = frame_pyr->filled_levels;

	// This condition should already be enforced by aom_compute_pyramid
	assert(n_levels <= frame_pyr->max_levels);

	if (already_filled_levels >= n_levels) {
	return n_levels;
	}

	const int frame_width = frame->y_crop_width;
	const int frame_height = frame->y_crop_height;
	const int frame_stride = frame->y_stride;
	assert((frame_width >> n_levels) >= 0);
	assert((frame_height >> n_levels) >= 0);

	if (already_filled_levels == 0) {
	// Fill in largest level from the original image
	PyramidLayer *first_layer = &frame_pyr->layers[0];
	if (frame->flags & YV12_FLAG_HIGHBITDEPTH) {
	// For frames stored in a 16-bit buffer, we need to downconvert to 8 bits
	assert(first_layer->width == frame_width);
	assert(first_layer->height == frame_height);

	uint16_t *frame_buffer = CONVERT_TO_SHORTPTR(frame->y_buffer);
	uint8_t *pyr_buffer = first_layer->buffer;
	int pyr_stride = first_layer->stride;
	for (int y = 0; y < frame_height; y++) {
	uint16_t frame_row = frame_buffer + y frame_stride;
	uint8_t pyr_row = pyr_buffer + y pyr_stride;
	for (int x = 0; x < frame_width; x++) {
	pyr_row[x] = frame_row[x] >> (bit_depth - 8);
	}
	}

	fill_border(pyr_buffer, frame_width, frame_height, pyr_stride);
	} else {
	// For frames stored in an 8-bit buffer, we don't need to copy anything -
	// we can just reference the original image buffer
	first_layer->buffer = frame->y_buffer;
	first_layer->width = frame_width;
	first_layer->height = frame_height;
	first_layer->stride = frame_stride;
	}

	already_filled_levels = 1;
	}

	// Fill in the remaining levels through progressive downsampling
	for (int level = already_filled_levels; level < n_levels; ++level) {
	bool mem_status = false;
	PyramidLayer *prev_layer = &frame_pyr->layers[level - 1];
	uint8_t *prev_buffer = prev_layer->buffer;
	int prev_stride = prev_layer->stride;

	PyramidLayer *this_layer = &frame_pyr->layers[level];
	uint8_t *this_buffer = this_layer->buffer;
	int this_width = this_layer->width;
	int this_height = this_layer->height;
	int this_stride = this_layer->stride;

	// The width and height of the previous layer that needs to be considered to
	// derive the current layer frame.
	const int input_layer_width = this_width << 1;
	const int input_layer_height = this_height << 1;

	// Compute the this pyramid level by downsampling the current level.
	//
	// We downsample by a factor of exactly 2, clipping the rightmost and
	// bottommost pixel off of the current level if needed. We do this for
	// two main reasons:
	//
	// 1) In the disflow code, when stepping from a higher pyramid level to a
	// lower pyramid level, we need to not just interpolate the flow field
	// but also to scale each flow vector by the upsampling ratio.
	// So it is much more convenient if this ratio is simply 2.
	//
	// 2) Up/downsampling by a factor of 2 can be implemented much more
	// efficiently than up/downsampling by a generic ratio.
	// TODO(rachelbarker): Use optimized downsample-by-2 function

	// SIMD support has been added specifically for cases where the downsample
	// factor is exactly 2. In such instances, horizontal and vertical resizing
	// is performed utilizing the down2_symeven() function, which considers the
	// even dimensions of the input layer.
	if (should_resize_by_half(input_layer_height, input_layer_width,
	this_height, this_width)) {
	assert(input_layer_height % 2 == 0 && input_layer_width % 2 == 0 &&
	"Input width or height cannot be odd.");
	mem_status = av1_resize_plane_to_half(
	prev_buffer, input_layer_height, input_layer_width, prev_stride,
	this_buffer, this_height, this_width, this_stride);
	} else {
	mem_status = av1_resize_plane(prev_buffer, input_layer_height,
	input_layer_width, prev_stride, this_buffer,
	this_height, this_width, this_stride);
	}

	// Terminate early in cases of memory allocation failure.
	if (!mem_status) {
	frame_pyr->filled_levels = n_levels;
	return -1;
	}

	fill_border(this_buffer, this_width, this_height, this_stride);
	}

	frame_pyr->filled_levels = n_levels;
	return n_levels;
	}

	// Fill out a downsampling pyramid for a given frame.
	//
	// The top level (index 0) will always be an 8-bit copy of the input frame,
	// regardless of the input bit depth. Additional levels are then downscaled
	// by powers of 2.
	//
	// This function will ensure that the first `n_levels` levels of the pyramid
	// are filled, unless the frame is too small to have this many levels.
	// In that case, we will fill all available levels and then stop.
	// No matter how small the frame is, at least one level is guaranteed
	// to be filled.
	//
	// Returns the actual number of levels filled, capped at n_levels,
	// or -1 on error.
	int aom_compute_pyramid(const YV12_BUFFER_CONFIG *frame, int bit_depth,
	int n_levels, ImagePyramid *pyr) {
	assert(pyr);

	// Per the comments in the ImagePyramid struct, we must take this mutex
	// before reading or writing the filled_levels field, and hold it while
	// computing any additional pyramid levels, to ensure proper behaviour
	// when multithreading is used
	#if CONFIG_MULTITHREAD
	pthread_mutex_lock(&pyr->mutex);
	#endif // CONFIG_MULTITHREAD

	n_levels = AOMMIN(n_levels, pyr->max_levels);
	int result = n_levels;
	if (pyr->filled_levels < n_levels) {
	// Compute any missing levels that we need
	result = fill_pyramid(frame, bit_depth, n_levels, pyr);
	}

	// At this point, as long as result >= 0, the requested number of pyramid
	// levels are guaranteed to be valid, and can be safely read from without
	// holding the mutex any further
	assert(IMPLIES(result >= 0, pyr->filled_levels >= n_levels));
	#if CONFIG_MULTITHREAD
	pthread_mutex_unlock(&pyr->mutex);
	#endif // CONFIG_MULTITHREAD
	return result;
	}

	#ifndef NDEBUG
	// Check if a pyramid has already been computed to at least n levels
	// This is mostly a debug helper - as it is necessary to hold pyr->mutex
	// while reading the number of already-computed levels, we cannot just write:
	// assert(pyr->filled_levels >= n_levels);
	// This function allows the check to be correctly written as:
	// assert(aom_is_pyramid_valid(pyr, n_levels));
	//
	// Note: This deliberately does not restrict n_levels based on the maximum
	// number of permitted levels for the frame size. This allows the check to
	// catch cases where the caller forgets to handle the case where
	// max_levels is less than the requested number of levels
	bool aom_is_pyramid_valid(ImagePyramid *pyr, int n_levels) {
	assert(pyr);

	// Per the comments in the ImagePyramid struct, we must take this mutex
	// before reading or writing the filled_levels field, to ensure proper
	// behaviour when multithreading is used
	#if CONFIG_MULTITHREAD
	pthread_mutex_lock(&pyr->mutex);
	#endif // CONFIG_MULTITHREAD

	bool result = (pyr->filled_levels >= n_levels);

	#if CONFIG_MULTITHREAD
	pthread_mutex_unlock(&pyr->mutex);
	#endif // CONFIG_MULTITHREAD

	return result;
	}
	#endif

	// Mark a pyramid as no longer containing valid data.
	// This must be done whenever the corresponding frame buffer is reused
	void aom_invalidate_pyramid(ImagePyramid *pyr) {
	if (pyr) {
	#if CONFIG_MULTITHREAD
	pthread_mutex_lock(&pyr->mutex);
	#endif // CONFIG_MULTITHREAD
	pyr->filled_levels = 0;
	#if CONFIG_MULTITHREAD
	pthread_mutex_unlock(&pyr->mutex);
	#endif // CONFIG_MULTITHREAD
	}
	}

	// Release the memory associated with a pyramid
	void aom_free_pyramid(ImagePyramid *pyr) {
	if (pyr) {
	#if CONFIG_MULTITHREAD
	pthread_mutex_destroy(&pyr->mutex);
	#endif // CONFIG_MULTITHREAD
	aom_free(pyr->buffer_alloc);
	aom_free(pyr->layers);
	aom_free(pyr);
	}
	}