| /* |
| * Copyright (c) 2022, Alliance for Open Media. All rights reserved. |
| * |
| * This source code is subject to the terms of the BSD 2 Clause License and |
| * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| * was not distributed with this source code in the LICENSE file, you can |
| * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| * Media Patent License 1.0 was not distributed with this source code in the |
| * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| */ |
| |
| #include "aom_dsp/pyramid.h" |
| #include "aom_mem/aom_mem.h" |
| #include "aom_ports/bitops.h" |
| #include "aom_util/aom_pthread.h" |
| |
| // TODO(rachelbarker): Move needed code from av1/ to aom_dsp/ |
| #include "av1/common/resize.h" |
| |
| #include <assert.h> |
| #include <string.h> |
| |
| // Lifecycle: |
| // * Frame buffer alloc code calls aom_get_pyramid_alloc_size() |
| // to work out how much space is needed for a given number of pyramid |
| // levels. This is counted in the size checked against the max allocation |
| // limit |
| // * Then calls aom_alloc_pyramid() to actually create the pyramid |
| // * Pyramid is initially marked as containing no valid data |
| // * Each pyramid layer is computed on-demand, the first time it is requested |
| // * Whenever frame buffer is reused, reset the counter of filled levels. |
| // This invalidates all of the existing pyramid levels. |
| // * Whenever frame buffer is resized, reallocate pyramid |
| |
| size_t aom_get_pyramid_alloc_size(int width, int height, bool image_is_16bit) { |
| // Allocate the maximum possible number of layers for this width and height |
| const int msb = get_msb(AOMMIN(width, height)); |
| const int n_levels = AOMMAX(msb - MIN_PYRAMID_SIZE_LOG2, 1); |
| |
| size_t alloc_size = 0; |
| alloc_size += sizeof(ImagePyramid); |
| alloc_size += n_levels * sizeof(PyramidLayer); |
| |
| // Calculate how much memory is needed for downscaled frame buffers |
| size_t buffer_size = 0; |
| |
| // Work out if we need to allocate a few extra bytes for alignment. |
| // aom_memalign() will ensure that the start of the allocation is aligned |
| // to a multiple of PYRAMID_ALIGNMENT. But we want the first image pixel |
| // to be aligned, not the first byte of the allocation. |
| // |
| // In the loop below, we ensure that the stride of every image is a multiple |
| // of PYRAMID_ALIGNMENT. Thus the allocated size of each pyramid level will |
| // also be a multiple of PYRAMID_ALIGNMENT. Thus, as long as we can get the |
| // first pixel in the first pyramid layer aligned properly, that will |
| // automatically mean that the first pixel of every row of every layer is |
| // properly aligned too. |
| // |
| // Thus all we need to consider is the first pixel in the first layer. |
| // This is located at offset |
| // extra_bytes + level_stride * PYRAMID_PADDING + PYRAMID_PADDING |
| // bytes into the buffer. Since level_stride is a multiple of |
| // PYRAMID_ALIGNMENT, we can ignore that. So we need |
| // extra_bytes + PYRAMID_PADDING = multiple of PYRAMID_ALIGNMENT |
| // |
| // To solve this, we can round PYRAMID_PADDING up to the next multiple |
| // of PYRAMID_ALIGNMENT, then subtract the orginal value to calculate |
| // how many extra bytes are needed. |
| size_t first_px_offset = |
| (PYRAMID_PADDING + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1); |
| size_t extra_bytes = first_px_offset - PYRAMID_PADDING; |
| buffer_size += extra_bytes; |
| |
| // If the original image is stored in an 8-bit buffer, then we can point the |
| // lowest pyramid level at that buffer rather than allocating a new one. |
| int first_allocated_level = image_is_16bit ? 0 : 1; |
| |
| for (int level = first_allocated_level; level < n_levels; level++) { |
| int level_width = width >> level; |
| int level_height = height >> level; |
| |
| // Allocate padding for each layer |
| int padded_width = level_width + 2 * PYRAMID_PADDING; |
| int padded_height = level_height + 2 * PYRAMID_PADDING; |
| |
| // Align the layer stride to be a multiple of PYRAMID_ALIGNMENT |
| // This ensures that, as long as the top-left pixel in this pyramid level is |
| // properly aligned, then so will the leftmost pixel in every row of the |
| // pyramid level. |
| int level_stride = |
| (padded_width + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1); |
| |
| buffer_size += level_stride * padded_height; |
| } |
| |
| alloc_size += buffer_size; |
| |
| return alloc_size; |
| } |
| |
| ImagePyramid *aom_alloc_pyramid(int width, int height, bool image_is_16bit) { |
| // Allocate the maximum possible number of layers for this width and height |
| const int msb = get_msb(AOMMIN(width, height)); |
| const int n_levels = AOMMAX(msb - MIN_PYRAMID_SIZE_LOG2, 1); |
| |
| ImagePyramid *pyr = aom_calloc(1, sizeof(*pyr)); |
| if (!pyr) { |
| return NULL; |
| } |
| |
| pyr->layers = aom_calloc(n_levels, sizeof(*pyr->layers)); |
| if (!pyr->layers) { |
| aom_free(pyr); |
| return NULL; |
| } |
| |
| pyr->max_levels = n_levels; |
| pyr->filled_levels = 0; |
| |
| // Compute sizes and offsets for each pyramid level |
| // These are gathered up first, so that we can allocate all pyramid levels |
| // in a single buffer |
| size_t buffer_size = 0; |
| size_t *layer_offsets = aom_calloc(n_levels, sizeof(*layer_offsets)); |
| if (!layer_offsets) { |
| aom_free(pyr->layers); |
| aom_free(pyr); |
| return NULL; |
| } |
| |
| // Work out if we need to allocate a few extra bytes for alignment. |
| // aom_memalign() will ensure that the start of the allocation is aligned |
| // to a multiple of PYRAMID_ALIGNMENT. But we want the first image pixel |
| // to be aligned, not the first byte of the allocation. |
| // |
| // In the loop below, we ensure that the stride of every image is a multiple |
| // of PYRAMID_ALIGNMENT. Thus the allocated size of each pyramid level will |
| // also be a multiple of PYRAMID_ALIGNMENT. Thus, as long as we can get the |
| // first pixel in the first pyramid layer aligned properly, that will |
| // automatically mean that the first pixel of every row of every layer is |
| // properly aligned too. |
| // |
| // Thus all we need to consider is the first pixel in the first layer. |
| // This is located at offset |
| // extra_bytes + level_stride * PYRAMID_PADDING + PYRAMID_PADDING |
| // bytes into the buffer. Since level_stride is a multiple of |
| // PYRAMID_ALIGNMENT, we can ignore that. So we need |
| // extra_bytes + PYRAMID_PADDING = multiple of PYRAMID_ALIGNMENT |
| // |
| // To solve this, we can round PYRAMID_PADDING up to the next multiple |
| // of PYRAMID_ALIGNMENT, then subtract the orginal value to calculate |
| // how many extra bytes are needed. |
| size_t first_px_offset = |
| (PYRAMID_PADDING + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1); |
| size_t extra_bytes = first_px_offset - PYRAMID_PADDING; |
| buffer_size += extra_bytes; |
| |
| // If the original image is stored in an 8-bit buffer, then we can point the |
| // lowest pyramid level at that buffer rather than allocating a new one. |
| int first_allocated_level = image_is_16bit ? 0 : 1; |
| |
| for (int level = first_allocated_level; level < n_levels; level++) { |
| PyramidLayer *layer = &pyr->layers[level]; |
| |
| int level_width = width >> level; |
| int level_height = height >> level; |
| |
| // Allocate padding for each layer |
| int padded_width = level_width + 2 * PYRAMID_PADDING; |
| int padded_height = level_height + 2 * PYRAMID_PADDING; |
| |
| // Align the layer stride to be a multiple of PYRAMID_ALIGNMENT |
| // This ensures that, as long as the top-left pixel in this pyramid level is |
| // properly aligned, then so will the leftmost pixel in every row of the |
| // pyramid level. |
| int level_stride = |
| (padded_width + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1); |
| |
| size_t level_alloc_start = buffer_size; |
| size_t level_start = |
| level_alloc_start + PYRAMID_PADDING * level_stride + PYRAMID_PADDING; |
| |
| buffer_size += level_stride * padded_height; |
| |
| layer_offsets[level] = level_start; |
| layer->width = level_width; |
| layer->height = level_height; |
| layer->stride = level_stride; |
| } |
| |
| pyr->buffer_alloc = |
| aom_memalign(PYRAMID_ALIGNMENT, buffer_size * sizeof(*pyr->buffer_alloc)); |
| if (!pyr->buffer_alloc) { |
| aom_free(pyr->layers); |
| aom_free(pyr); |
| aom_free(layer_offsets); |
| return NULL; |
| } |
| |
| // Fill in pointers for each level |
| // If image is 8-bit, then the lowest level is left unconfigured for now, |
| // and will be set up properly when the pyramid is filled in |
| for (int level = first_allocated_level; level < n_levels; level++) { |
| PyramidLayer *layer = &pyr->layers[level]; |
| layer->buffer = pyr->buffer_alloc + layer_offsets[level]; |
| } |
| |
| #if CONFIG_MULTITHREAD |
| pthread_mutex_init(&pyr->mutex, NULL); |
| #endif // CONFIG_MULTITHREAD |
| |
| aom_free(layer_offsets); |
| return pyr; |
| } |
| |
| // Fill the border region of a pyramid frame. |
| // This must be called after the main image area is filled out. |
| // `img_buf` should point to the first pixel in the image area, |
| // ie. it should be pyr->level_buffer + pyr->level_loc[level]. |
| static inline void fill_border(uint8_t *img_buf, const int width, |
| const int height, const int stride) { |
| // Fill left and right areas |
| for (int row = 0; row < height; row++) { |
| uint8_t *row_start = &img_buf[row * stride]; |
| uint8_t left_pixel = row_start[0]; |
| memset(row_start - PYRAMID_PADDING, left_pixel, PYRAMID_PADDING); |
| uint8_t right_pixel = row_start[width - 1]; |
| memset(row_start + width, right_pixel, PYRAMID_PADDING); |
| } |
| |
| // Fill top area |
| for (int row = -PYRAMID_PADDING; row < 0; row++) { |
| uint8_t *row_start = &img_buf[row * stride]; |
| memcpy(row_start - PYRAMID_PADDING, img_buf - PYRAMID_PADDING, |
| width + 2 * PYRAMID_PADDING); |
| } |
| |
| // Fill bottom area |
| uint8_t *last_row_start = &img_buf[(height - 1) * stride]; |
| for (int row = height; row < height + PYRAMID_PADDING; row++) { |
| uint8_t *row_start = &img_buf[row * stride]; |
| memcpy(row_start - PYRAMID_PADDING, last_row_start - PYRAMID_PADDING, |
| width + 2 * PYRAMID_PADDING); |
| } |
| } |
| |
| // Compute downsampling pyramid for a frame |
| // |
| // This function will ensure that the first `n_levels` levels of the pyramid |
| // are filled, unless the frame is too small to have this many levels. |
| // In that case, we will fill all available levels and then stop. |
| // |
| // Returns the actual number of levels filled, capped at n_levels, |
| // or -1 on error. |
| // |
| // This must only be called while holding frame_pyr->mutex |
| static inline int fill_pyramid(const YV12_BUFFER_CONFIG *frame, int bit_depth, |
| int n_levels, ImagePyramid *frame_pyr) { |
| int already_filled_levels = frame_pyr->filled_levels; |
| |
| // This condition should already be enforced by aom_compute_pyramid |
| assert(n_levels <= frame_pyr->max_levels); |
| |
| if (already_filled_levels >= n_levels) { |
| return n_levels; |
| } |
| |
| const int frame_width = frame->y_crop_width; |
| const int frame_height = frame->y_crop_height; |
| const int frame_stride = frame->y_stride; |
| assert((frame_width >> n_levels) >= 0); |
| assert((frame_height >> n_levels) >= 0); |
| |
| if (already_filled_levels == 0) { |
| // Fill in largest level from the original image |
| PyramidLayer *first_layer = &frame_pyr->layers[0]; |
| if (frame->flags & YV12_FLAG_HIGHBITDEPTH) { |
| // For frames stored in a 16-bit buffer, we need to downconvert to 8 bits |
| assert(first_layer->width == frame_width); |
| assert(first_layer->height == frame_height); |
| |
| uint16_t *frame_buffer = CONVERT_TO_SHORTPTR(frame->y_buffer); |
| uint8_t *pyr_buffer = first_layer->buffer; |
| int pyr_stride = first_layer->stride; |
| for (int y = 0; y < frame_height; y++) { |
| uint16_t *frame_row = frame_buffer + y * frame_stride; |
| uint8_t *pyr_row = pyr_buffer + y * pyr_stride; |
| for (int x = 0; x < frame_width; x++) { |
| pyr_row[x] = frame_row[x] >> (bit_depth - 8); |
| } |
| } |
| |
| fill_border(pyr_buffer, frame_width, frame_height, pyr_stride); |
| } else { |
| // For frames stored in an 8-bit buffer, we don't need to copy anything - |
| // we can just reference the original image buffer |
| first_layer->buffer = frame->y_buffer; |
| first_layer->width = frame_width; |
| first_layer->height = frame_height; |
| first_layer->stride = frame_stride; |
| } |
| |
| already_filled_levels = 1; |
| } |
| |
| // Fill in the remaining levels through progressive downsampling |
| for (int level = already_filled_levels; level < n_levels; ++level) { |
| bool mem_status = false; |
| PyramidLayer *prev_layer = &frame_pyr->layers[level - 1]; |
| uint8_t *prev_buffer = prev_layer->buffer; |
| int prev_stride = prev_layer->stride; |
| |
| PyramidLayer *this_layer = &frame_pyr->layers[level]; |
| uint8_t *this_buffer = this_layer->buffer; |
| int this_width = this_layer->width; |
| int this_height = this_layer->height; |
| int this_stride = this_layer->stride; |
| |
| // The width and height of the previous layer that needs to be considered to |
| // derive the current layer frame. |
| const int input_layer_width = this_width << 1; |
| const int input_layer_height = this_height << 1; |
| |
| // Compute the this pyramid level by downsampling the current level. |
| // |
| // We downsample by a factor of exactly 2, clipping the rightmost and |
| // bottommost pixel off of the current level if needed. We do this for |
| // two main reasons: |
| // |
| // 1) In the disflow code, when stepping from a higher pyramid level to a |
| // lower pyramid level, we need to not just interpolate the flow field |
| // but also to scale each flow vector by the upsampling ratio. |
| // So it is much more convenient if this ratio is simply 2. |
| // |
| // 2) Up/downsampling by a factor of 2 can be implemented much more |
| // efficiently than up/downsampling by a generic ratio. |
| // TODO(rachelbarker): Use optimized downsample-by-2 function |
| |
| // SIMD support has been added specifically for cases where the downsample |
| // factor is exactly 2. In such instances, horizontal and vertical resizing |
| // is performed utilizing the down2_symeven() function, which considers the |
| // even dimensions of the input layer. |
| if (should_resize_by_half(input_layer_height, input_layer_width, |
| this_height, this_width)) { |
| assert(input_layer_height % 2 == 0 && input_layer_width % 2 == 0 && |
| "Input width or height cannot be odd."); |
| mem_status = av1_resize_plane_to_half( |
| prev_buffer, input_layer_height, input_layer_width, prev_stride, |
| this_buffer, this_height, this_width, this_stride); |
| } else { |
| mem_status = av1_resize_plane(prev_buffer, input_layer_height, |
| input_layer_width, prev_stride, this_buffer, |
| this_height, this_width, this_stride); |
| } |
| |
| // Terminate early in cases of memory allocation failure. |
| if (!mem_status) { |
| frame_pyr->filled_levels = n_levels; |
| return -1; |
| } |
| |
| fill_border(this_buffer, this_width, this_height, this_stride); |
| } |
| |
| frame_pyr->filled_levels = n_levels; |
| return n_levels; |
| } |
| |
| // Fill out a downsampling pyramid for a given frame. |
| // |
| // The top level (index 0) will always be an 8-bit copy of the input frame, |
| // regardless of the input bit depth. Additional levels are then downscaled |
| // by powers of 2. |
| // |
| // This function will ensure that the first `n_levels` levels of the pyramid |
| // are filled, unless the frame is too small to have this many levels. |
| // In that case, we will fill all available levels and then stop. |
| // No matter how small the frame is, at least one level is guaranteed |
| // to be filled. |
| // |
| // Returns the actual number of levels filled, capped at n_levels, |
| // or -1 on error. |
| int aom_compute_pyramid(const YV12_BUFFER_CONFIG *frame, int bit_depth, |
| int n_levels, ImagePyramid *pyr) { |
| assert(pyr); |
| |
| // Per the comments in the ImagePyramid struct, we must take this mutex |
| // before reading or writing the filled_levels field, and hold it while |
| // computing any additional pyramid levels, to ensure proper behaviour |
| // when multithreading is used |
| #if CONFIG_MULTITHREAD |
| pthread_mutex_lock(&pyr->mutex); |
| #endif // CONFIG_MULTITHREAD |
| |
| n_levels = AOMMIN(n_levels, pyr->max_levels); |
| int result = n_levels; |
| if (pyr->filled_levels < n_levels) { |
| // Compute any missing levels that we need |
| result = fill_pyramid(frame, bit_depth, n_levels, pyr); |
| } |
| |
| // At this point, as long as result >= 0, the requested number of pyramid |
| // levels are guaranteed to be valid, and can be safely read from without |
| // holding the mutex any further |
| assert(IMPLIES(result >= 0, pyr->filled_levels >= n_levels)); |
| #if CONFIG_MULTITHREAD |
| pthread_mutex_unlock(&pyr->mutex); |
| #endif // CONFIG_MULTITHREAD |
| return result; |
| } |
| |
| #ifndef NDEBUG |
| // Check if a pyramid has already been computed to at least n levels |
| // This is mostly a debug helper - as it is necessary to hold pyr->mutex |
| // while reading the number of already-computed levels, we cannot just write: |
| // assert(pyr->filled_levels >= n_levels); |
| // This function allows the check to be correctly written as: |
| // assert(aom_is_pyramid_valid(pyr, n_levels)); |
| // |
| // Note: This deliberately does not restrict n_levels based on the maximum |
| // number of permitted levels for the frame size. This allows the check to |
| // catch cases where the caller forgets to handle the case where |
| // max_levels is less than the requested number of levels |
| bool aom_is_pyramid_valid(ImagePyramid *pyr, int n_levels) { |
| assert(pyr); |
| |
| // Per the comments in the ImagePyramid struct, we must take this mutex |
| // before reading or writing the filled_levels field, to ensure proper |
| // behaviour when multithreading is used |
| #if CONFIG_MULTITHREAD |
| pthread_mutex_lock(&pyr->mutex); |
| #endif // CONFIG_MULTITHREAD |
| |
| bool result = (pyr->filled_levels >= n_levels); |
| |
| #if CONFIG_MULTITHREAD |
| pthread_mutex_unlock(&pyr->mutex); |
| #endif // CONFIG_MULTITHREAD |
| |
| return result; |
| } |
| #endif |
| |
| // Mark a pyramid as no longer containing valid data. |
| // This must be done whenever the corresponding frame buffer is reused |
| void aom_invalidate_pyramid(ImagePyramid *pyr) { |
| if (pyr) { |
| #if CONFIG_MULTITHREAD |
| pthread_mutex_lock(&pyr->mutex); |
| #endif // CONFIG_MULTITHREAD |
| pyr->filled_levels = 0; |
| #if CONFIG_MULTITHREAD |
| pthread_mutex_unlock(&pyr->mutex); |
| #endif // CONFIG_MULTITHREAD |
| } |
| } |
| |
| // Release the memory associated with a pyramid |
| void aom_free_pyramid(ImagePyramid *pyr) { |
| if (pyr) { |
| #if CONFIG_MULTITHREAD |
| pthread_mutex_destroy(&pyr->mutex); |
| #endif // CONFIG_MULTITHREAD |
| aom_free(pyr->buffer_alloc); |
| aom_free(pyr->layers); |
| aom_free(pyr); |
| } |
| } |