Add upscale functions for floating point input Change-Id: I2bb1a5a5863cd72b0a11349d6b32f5b5655bf48f
diff --git a/av1/common/resize.c b/av1/common/resize.c index b19d219..5d4448a 100644 --- a/av1/common/resize.c +++ b/av1/common/resize.c
@@ -313,6 +313,91 @@ } } +static void interpolate_core_double_prec(const double *const input, + int in_length, double *output, + int out_length, + const int16_t *interp_filters, + int interp_taps) { + const int32_t delta = + (((uint32_t)in_length << RS_SCALE_SUBPEL_BITS) + out_length / 2) / + out_length; + const int32_t offset = + in_length > out_length + ? (((int32_t)(in_length - out_length) << (RS_SCALE_SUBPEL_BITS - 1)) + + out_length / 2) / + out_length + : -(((int32_t)(out_length - in_length) + << (RS_SCALE_SUBPEL_BITS - 1)) + + out_length / 2) / + out_length; + double *optr = output; + int x, x1, x2, k, int_pel, sub_pel; + double sum; + int32_t y; + + x = 0; + y = offset + RS_SCALE_EXTRA_OFF; + while ((y >> RS_SCALE_SUBPEL_BITS) < (interp_taps / 2 - 1)) { + x++; + y += delta; + } + x1 = x; + x = out_length - 1; + y = delta * x + offset + RS_SCALE_EXTRA_OFF; + while ((y >> RS_SCALE_SUBPEL_BITS) + (int32_t)(interp_taps / 2) >= + in_length) { + x--; + y -= delta; + } + x2 = x; + if (x1 > x2) { + for (x = 0, y = offset + RS_SCALE_EXTRA_OFF; x < out_length; + ++x, y += delta) { + int_pel = y >> RS_SCALE_SUBPEL_BITS; + sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK; + const int16_t *filter = &interp_filters[sub_pel * interp_taps]; + sum = 0; + for (k = 0; k < interp_taps; ++k) { + const int pk = int_pel - interp_taps / 2 + 1 + k; + sum += filter[k] * input[AOMMAX(AOMMIN(pk, in_length - 1), 0)]; + } + *optr++ = sum / (1 << FILTER_BITS); + } + } else { + // Initial part. + for (x = 0, y = offset + RS_SCALE_EXTRA_OFF; x < x1; ++x, y += delta) { + int_pel = y >> RS_SCALE_SUBPEL_BITS; + sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK; + const int16_t *filter = &interp_filters[sub_pel * interp_taps]; + sum = 0; + for (k = 0; k < interp_taps; ++k) + sum += filter[k] * input[AOMMAX(int_pel - interp_taps / 2 + 1 + k, 0)]; + *optr++ = sum / (1 << FILTER_BITS); + } + // Middle part. + for (; x <= x2; ++x, y += delta) { + int_pel = y >> RS_SCALE_SUBPEL_BITS; + sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK; + const int16_t *filter = &interp_filters[sub_pel * interp_taps]; + sum = 0; + for (k = 0; k < interp_taps; ++k) + sum += filter[k] * input[int_pel - interp_taps / 2 + 1 + k]; + *optr++ = sum / (1 << FILTER_BITS); + } + // End part. + for (; x < out_length; ++x, y += delta) { + int_pel = y >> RS_SCALE_SUBPEL_BITS; + sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK; + const int16_t *filter = &interp_filters[sub_pel * interp_taps]; + sum = 0; + for (k = 0; k < interp_taps; ++k) + sum += filter[k] * + input[AOMMIN(int_pel - interp_taps / 2 + 1 + k, in_length - 1)]; + *optr++ = sum / (1 << FILTER_BITS); + } + } +} + static void interpolate(const uint8_t *const input, int in_length, uint8_t *output, int out_length) { const InterpKernel *interp_filters = @@ -322,6 +407,15 @@ SUBPEL_TAPS); } +static void interpolate_double_prec(const double *const input, int in_length, + double *output, int out_length) { + const InterpKernel *interp_filters = + choose_interp_filter(in_length, out_length); + + interpolate_core_double_prec(input, in_length, output, out_length, + &interp_filters[0][0], SUBPEL_TAPS); +} + int32_t av1_get_upscale_convolve_step(int in_length, int out_length) { return ((in_length << RS_SCALE_SUBPEL_BITS) + out_length / 2) / out_length; } @@ -505,6 +599,12 @@ } } +static void upscale_multistep_double_prec(const double *const input, int length, + double *output, int olength) { + assert(length < olength); + interpolate_double_prec(input, length, output, olength); +} + static void fill_col_to_arr(uint8_t *img, int stride, int len, uint8_t *arr) { int i; uint8_t *iptr = img; @@ -523,6 +623,26 @@ } } +static void fill_col_to_arr_double_prec(double *img, int stride, int len, + double *arr) { + int i; + double *iptr = img; + double *aptr = arr; + for (i = 0; i < len; ++i, iptr += stride) { + *aptr++ = *iptr; + } +} + +static void fill_arr_to_col_double_prec(double *img, int stride, int len, + double *arr) { + int i; + double *iptr = img; + double *aptr = arr; + for (i = 0; i < len; ++i, iptr += stride) { + *iptr = *aptr++; + } +} + void av1_resize_plane(const uint8_t *const input, int height, int width, int in_stride, uint8_t *output, int height2, int width2, int out_stride) { @@ -554,6 +674,33 @@ aom_free(arrbuf2); } +void av1_upscale_plane_double_prec(const double *const input, int height, + int width, int in_stride, double *output, + int height2, int width2, int out_stride) { + int i; + double *intbuf = (double *)aom_malloc(sizeof(double) * width2 * height); + double *arrbuf = (double *)aom_malloc(sizeof(double) * height); + double *arrbuf2 = (double *)aom_malloc(sizeof(double) * height2); + if (intbuf == NULL || arrbuf == NULL || arrbuf2 == NULL) goto Error; + assert(width > 0); + assert(height > 0); + assert(width2 > 0); + assert(height2 > 0); + for (i = 0; i < height; ++i) + upscale_multistep_double_prec(input + in_stride * i, width, + intbuf + width2 * i, width2); + for (i = 0; i < width2; ++i) { + fill_col_to_arr_double_prec(intbuf + i, width2, height, arrbuf); + upscale_multistep_double_prec(arrbuf, height, arrbuf2, height2); + fill_arr_to_col_double_prec(output + i, out_stride, height2, arrbuf2); + } + +Error: + aom_free(intbuf); + aom_free(arrbuf); + aom_free(arrbuf2); +} + static void upscale_normative_rect(const uint8_t *const input, int height, int width, int in_stride, uint8_t *output, int height2, int width2, int out_stride,
diff --git a/av1/common/resize.h b/av1/common/resize.h index 9a59a8d..43bea58 100644 --- a/av1/common/resize.h +++ b/av1/common/resize.h
@@ -23,6 +23,9 @@ void av1_resize_plane(const uint8_t *const input, int height, int width, int in_stride, uint8_t *output, int height2, int width2, int out_stride); +void av1_upscale_plane_double_prec(const double *const input, int height, + int width, int in_stride, double *output, + int height2, int width2, int out_stride); void av1_resize_frame420(const uint8_t *const y, int y_stride, const uint8_t *const u, const uint8_t *const v, int uv_stride, int height, int width, uint8_t *oy,