Add upscale functions for floating point input
Change-Id: I2bb1a5a5863cd72b0a11349d6b32f5b5655bf48f
diff --git a/av1/common/resize.c b/av1/common/resize.c
index b19d219..5d4448a 100644
--- a/av1/common/resize.c
+++ b/av1/common/resize.c
@@ -313,6 +313,91 @@
}
}
+static void interpolate_core_double_prec(const double *const input,
+ int in_length, double *output,
+ int out_length,
+ const int16_t *interp_filters,
+ int interp_taps) {
+ const int32_t delta =
+ (((uint32_t)in_length << RS_SCALE_SUBPEL_BITS) + out_length / 2) /
+ out_length;
+ const int32_t offset =
+ in_length > out_length
+ ? (((int32_t)(in_length - out_length) << (RS_SCALE_SUBPEL_BITS - 1)) +
+ out_length / 2) /
+ out_length
+ : -(((int32_t)(out_length - in_length)
+ << (RS_SCALE_SUBPEL_BITS - 1)) +
+ out_length / 2) /
+ out_length;
+ double *optr = output;
+ int x, x1, x2, k, int_pel, sub_pel;
+ double sum;
+ int32_t y;
+
+ x = 0;
+ y = offset + RS_SCALE_EXTRA_OFF;
+ while ((y >> RS_SCALE_SUBPEL_BITS) < (interp_taps / 2 - 1)) {
+ x++;
+ y += delta;
+ }
+ x1 = x;
+ x = out_length - 1;
+ y = delta * x + offset + RS_SCALE_EXTRA_OFF;
+ while ((y >> RS_SCALE_SUBPEL_BITS) + (int32_t)(interp_taps / 2) >=
+ in_length) {
+ x--;
+ y -= delta;
+ }
+ x2 = x;
+ if (x1 > x2) {
+ for (x = 0, y = offset + RS_SCALE_EXTRA_OFF; x < out_length;
+ ++x, y += delta) {
+ int_pel = y >> RS_SCALE_SUBPEL_BITS;
+ sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
+ const int16_t *filter = &interp_filters[sub_pel * interp_taps];
+ sum = 0;
+ for (k = 0; k < interp_taps; ++k) {
+ const int pk = int_pel - interp_taps / 2 + 1 + k;
+ sum += filter[k] * input[AOMMAX(AOMMIN(pk, in_length - 1), 0)];
+ }
+ *optr++ = sum / (1 << FILTER_BITS);
+ }
+ } else {
+ // Initial part.
+ for (x = 0, y = offset + RS_SCALE_EXTRA_OFF; x < x1; ++x, y += delta) {
+ int_pel = y >> RS_SCALE_SUBPEL_BITS;
+ sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
+ const int16_t *filter = &interp_filters[sub_pel * interp_taps];
+ sum = 0;
+ for (k = 0; k < interp_taps; ++k)
+ sum += filter[k] * input[AOMMAX(int_pel - interp_taps / 2 + 1 + k, 0)];
+ *optr++ = sum / (1 << FILTER_BITS);
+ }
+ // Middle part.
+ for (; x <= x2; ++x, y += delta) {
+ int_pel = y >> RS_SCALE_SUBPEL_BITS;
+ sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
+ const int16_t *filter = &interp_filters[sub_pel * interp_taps];
+ sum = 0;
+ for (k = 0; k < interp_taps; ++k)
+ sum += filter[k] * input[int_pel - interp_taps / 2 + 1 + k];
+ *optr++ = sum / (1 << FILTER_BITS);
+ }
+ // End part.
+ for (; x < out_length; ++x, y += delta) {
+ int_pel = y >> RS_SCALE_SUBPEL_BITS;
+ sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
+ const int16_t *filter = &interp_filters[sub_pel * interp_taps];
+ sum = 0;
+ for (k = 0; k < interp_taps; ++k)
+ sum += filter[k] *
+ input[AOMMIN(int_pel - interp_taps / 2 + 1 + k, in_length - 1)];
+ *optr++ = sum / (1 << FILTER_BITS);
+ }
+ }
+}
+
static void interpolate(const uint8_t *const input, int in_length,
uint8_t *output, int out_length) {
const InterpKernel *interp_filters =
@@ -322,6 +407,15 @@
SUBPEL_TAPS);
}
+static void interpolate_double_prec(const double *const input, int in_length,
+ double *output, int out_length) {
+ const InterpKernel *interp_filters =
+ choose_interp_filter(in_length, out_length);
+
+ interpolate_core_double_prec(input, in_length, output, out_length,
+ &interp_filters[0][0], SUBPEL_TAPS);
+}
+
int32_t av1_get_upscale_convolve_step(int in_length, int out_length) {
return ((in_length << RS_SCALE_SUBPEL_BITS) + out_length / 2) / out_length;
}
@@ -505,6 +599,12 @@
}
}
+static void upscale_multistep_double_prec(const double *const input, int length,
+ double *output, int olength) {
+ assert(length < olength);
+ interpolate_double_prec(input, length, output, olength);
+}
+
static void fill_col_to_arr(uint8_t *img, int stride, int len, uint8_t *arr) {
int i;
uint8_t *iptr = img;
@@ -523,6 +623,26 @@
}
}
+static void fill_col_to_arr_double_prec(double *img, int stride, int len,
+ double *arr) {
+ int i;
+ double *iptr = img;
+ double *aptr = arr;
+ for (i = 0; i < len; ++i, iptr += stride) {
+ *aptr++ = *iptr;
+ }
+}
+
+static void fill_arr_to_col_double_prec(double *img, int stride, int len,
+ double *arr) {
+ int i;
+ double *iptr = img;
+ double *aptr = arr;
+ for (i = 0; i < len; ++i, iptr += stride) {
+ *iptr = *aptr++;
+ }
+}
+
void av1_resize_plane(const uint8_t *const input, int height, int width,
int in_stride, uint8_t *output, int height2, int width2,
int out_stride) {
@@ -554,6 +674,33 @@
aom_free(arrbuf2);
}
+void av1_upscale_plane_double_prec(const double *const input, int height,
+ int width, int in_stride, double *output,
+ int height2, int width2, int out_stride) {
+ int i;
+ double *intbuf = (double *)aom_malloc(sizeof(double) * width2 * height);
+ double *arrbuf = (double *)aom_malloc(sizeof(double) * height);
+ double *arrbuf2 = (double *)aom_malloc(sizeof(double) * height2);
+ if (intbuf == NULL || arrbuf == NULL || arrbuf2 == NULL) goto Error;
+ assert(width > 0);
+ assert(height > 0);
+ assert(width2 > 0);
+ assert(height2 > 0);
+ for (i = 0; i < height; ++i)
+ upscale_multistep_double_prec(input + in_stride * i, width,
+ intbuf + width2 * i, width2);
+ for (i = 0; i < width2; ++i) {
+ fill_col_to_arr_double_prec(intbuf + i, width2, height, arrbuf);
+ upscale_multistep_double_prec(arrbuf, height, arrbuf2, height2);
+ fill_arr_to_col_double_prec(output + i, out_stride, height2, arrbuf2);
+ }
+
+Error:
+ aom_free(intbuf);
+ aom_free(arrbuf);
+ aom_free(arrbuf2);
+}
+
static void upscale_normative_rect(const uint8_t *const input, int height,
int width, int in_stride, uint8_t *output,
int height2, int width2, int out_stride,
diff --git a/av1/common/resize.h b/av1/common/resize.h
index 9a59a8d..43bea58 100644
--- a/av1/common/resize.h
+++ b/av1/common/resize.h
@@ -23,6 +23,9 @@
void av1_resize_plane(const uint8_t *const input, int height, int width,
int in_stride, uint8_t *output, int height2, int width2,
int out_stride);
+void av1_upscale_plane_double_prec(const double *const input, int height,
+ int width, int in_stride, double *output,
+ int height2, int width2, int out_stride);
void av1_resize_frame420(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride, int height, int width, uint8_t *oy,