Introduce av1_resize_plane_to_half() for Global Motion tool
Currently, the GM tool invokes av1_resize_plane() with a
downsample factor of exactly 2. To facilitate the SIMD for the
same, this CL introduces av1_resize_plane_to_half(), which
incorporates the necessary conditions from av1_resize_plane().
This is a bit-exact change with no impact on encode time.
Change-Id: I87ed23892221472477a209357cddd08919ad8edf
diff --git a/aom_dsp/pyramid.c b/aom_dsp/pyramid.c
index 5de001d..05ddbb2 100644
--- a/aom_dsp/pyramid.c
+++ b/aom_dsp/pyramid.c
@@ -305,6 +305,7 @@
// Fill in the remaining levels through progressive downsampling
for (int level = already_filled_levels; level < n_levels; ++level) {
+ bool mem_status = false;
PyramidLayer *prev_layer = &frame_pyr->layers[level - 1];
uint8_t *prev_buffer = prev_layer->buffer;
int prev_stride = prev_layer->stride;
@@ -315,6 +316,11 @@
int this_height = this_layer->height;
int this_stride = this_layer->stride;
+ // The width and height of the previous layer that needs to be considered to
+ // derive the current layer frame.
+ const int input_layer_width = this_width << 1;
+ const int input_layer_height = this_height << 1;
+
// Compute the this pyramid level by downsampling the current level.
//
// We downsample by a factor of exactly 2, clipping the rightmost and
@@ -329,13 +335,30 @@
// 2) Up/downsampling by a factor of 2 can be implemented much more
// efficiently than up/downsampling by a generic ratio.
// TODO(rachelbarker): Use optimized downsample-by-2 function
- if (!av1_resize_plane(prev_buffer, this_height << 1, this_width << 1,
- prev_stride, this_buffer, this_height, this_width,
- this_stride)) {
- // If we can't allocate memory, we'll have to terminate early
+
+ // SIMD support has been added specifically for cases where the downsample
+ // factor is exactly 2. In such instances, horizontal and vertical resizing
+ // is performed utilizing the down2_symeven() function, which considers the
+ // even dimensions of the input layer.
+ if (should_resize_by_half(input_layer_height, input_layer_width,
+ this_height, this_width)) {
+ assert(input_layer_height % 2 == 0 && input_layer_width % 2 == 0 &&
+ "Input width or height cannot be odd.");
+ mem_status = av1_resize_plane_to_half(
+ prev_buffer, input_layer_height, input_layer_width, prev_stride,
+ this_buffer, this_height, this_width, this_stride);
+ } else {
+ mem_status = av1_resize_plane(prev_buffer, input_layer_height,
+ input_layer_width, prev_stride, this_buffer,
+ this_height, this_width, this_stride);
+ }
+
+ // Terminate early in cases of memory allocation failure.
+ if (!mem_status) {
frame_pyr->filled_levels = n_levels;
return -1;
}
+
fill_border(this_buffer, this_width, this_height, this_stride);
}
diff --git a/av1/common/resize.c b/av1/common/resize.c
index 441323a..ef35fa2 100644
--- a/av1/common/resize.c
+++ b/av1/common/resize.c
@@ -524,6 +524,61 @@
}
}
+static INLINE bool resize_vert_dir(uint8_t *intbuf, uint8_t *output,
+ int out_stride, int height, int height2,
+ int width2) {
+ bool mem_status = true;
+ uint8_t *arrbuf = (uint8_t *)aom_malloc(sizeof(*arrbuf) * height);
+ uint8_t *arrbuf2 = (uint8_t *)aom_malloc(sizeof(*arrbuf2) * height2);
+ if (arrbuf == NULL || arrbuf2 == NULL) {
+ mem_status = false;
+ goto Error;
+ }
+
+ for (int i = 0; i < width2; ++i) {
+ fill_col_to_arr(intbuf + i, width2, height, arrbuf);
+ down2_symeven(arrbuf, height, arrbuf2);
+ fill_arr_to_col(output + i, out_stride, height2, arrbuf2);
+ }
+
+Error:
+ aom_free(arrbuf);
+ aom_free(arrbuf2);
+ return mem_status;
+}
+
+static INLINE void resize_horz_dir(const uint8_t *const input, int in_stride,
+ uint8_t *intbuf, int height,
+ int filtered_length, int width2) {
+ for (int i = 0; i < height; ++i)
+ down2_symeven(input + in_stride * i, filtered_length, intbuf + width2 * i);
+}
+
+bool av1_resize_plane_to_half(const uint8_t *const input, int height, int width,
+ int in_stride, uint8_t *output, int height2,
+ int width2, int out_stride) {
+ uint8_t *intbuf = (uint8_t *)aom_malloc(sizeof(*intbuf) * width2 * height);
+ if (intbuf == NULL) {
+ return false;
+ }
+
+ // Resize in the horizontal direction
+ resize_horz_dir(input, in_stride, intbuf, height, width, width2);
+ // Resize in the vertical direction
+ bool mem_status =
+ resize_vert_dir(intbuf, output, out_stride, height, height2, width2);
+ aom_free(intbuf);
+ return mem_status;
+}
+
+// Check if both the output width and height are half of input width and
+// height respectively.
+bool should_resize_by_half(int height, int width, int height2, int width2) {
+ const bool is_width_by_2 = get_down2_length(width, 1) == width2;
+ const bool is_height_by_2 = get_down2_length(height, 1) == height2;
+ return (is_width_by_2 && is_height_by_2);
+}
+
bool av1_resize_plane(const uint8_t *input, int height, int width,
int in_stride, uint8_t *output, int height2, int width2,
int out_stride) {
diff --git a/av1/common/resize.h b/av1/common/resize.h
index d573a53..6e7d46e 100644
--- a/av1/common/resize.h
+++ b/av1/common/resize.h
@@ -93,6 +93,12 @@
void av1_superres_upscale(AV1_COMMON *cm, BufferPool *const pool,
bool alloc_pyramid);
+bool av1_resize_plane_to_half(const uint8_t *const input, int height, int width,
+ int in_stride, uint8_t *output, int height2,
+ int width2, int out_stride);
+
+bool should_resize_by_half(int height, int width, int height2, int width2);
+
// Returns 1 if a superres upscaled frame is scaled and 0 otherwise.
static INLINE int av1_superres_scaled(const AV1_COMMON *cm) {
// Note: for some corner cases (e.g. cm->width of 1), there may be no scaling