Add upscale functions for floating point input

Change-Id: I2bb1a5a5863cd72b0a11349d6b32f5b5655bf48f
diff --git a/av1/common/resize.c b/av1/common/resize.c
index b19d219..5d4448a 100644
--- a/av1/common/resize.c
+++ b/av1/common/resize.c
@@ -313,6 +313,91 @@
   }
 }
 
+static void interpolate_core_double_prec(const double *const input,
+                                         int in_length, double *output,
+                                         int out_length,
+                                         const int16_t *interp_filters,
+                                         int interp_taps) {
+  const int32_t delta =
+      (((uint32_t)in_length << RS_SCALE_SUBPEL_BITS) + out_length / 2) /
+      out_length;
+  const int32_t offset =
+      in_length > out_length
+          ? (((int32_t)(in_length - out_length) << (RS_SCALE_SUBPEL_BITS - 1)) +
+             out_length / 2) /
+                out_length
+          : -(((int32_t)(out_length - in_length)
+               << (RS_SCALE_SUBPEL_BITS - 1)) +
+              out_length / 2) /
+                out_length;
+  double *optr = output;
+  int x, x1, x2, k, int_pel, sub_pel;
+  double sum;
+  int32_t y;
+
+  x = 0;
+  y = offset + RS_SCALE_EXTRA_OFF;
+  while ((y >> RS_SCALE_SUBPEL_BITS) < (interp_taps / 2 - 1)) {
+    x++;
+    y += delta;
+  }
+  x1 = x;
+  x = out_length - 1;
+  y = delta * x + offset + RS_SCALE_EXTRA_OFF;
+  while ((y >> RS_SCALE_SUBPEL_BITS) + (int32_t)(interp_taps / 2) >=
+         in_length) {
+    x--;
+    y -= delta;
+  }
+  x2 = x;
+  if (x1 > x2) {
+    for (x = 0, y = offset + RS_SCALE_EXTRA_OFF; x < out_length;
+         ++x, y += delta) {
+      int_pel = y >> RS_SCALE_SUBPEL_BITS;
+      sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
+      const int16_t *filter = &interp_filters[sub_pel * interp_taps];
+      sum = 0;
+      for (k = 0; k < interp_taps; ++k) {
+        const int pk = int_pel - interp_taps / 2 + 1 + k;
+        sum += filter[k] * input[AOMMAX(AOMMIN(pk, in_length - 1), 0)];
+      }
+      *optr++ = sum / (1 << FILTER_BITS);
+    }
+  } else {
+    // Initial part.
+    for (x = 0, y = offset + RS_SCALE_EXTRA_OFF; x < x1; ++x, y += delta) {
+      int_pel = y >> RS_SCALE_SUBPEL_BITS;
+      sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
+      const int16_t *filter = &interp_filters[sub_pel * interp_taps];
+      sum = 0;
+      for (k = 0; k < interp_taps; ++k)
+        sum += filter[k] * input[AOMMAX(int_pel - interp_taps / 2 + 1 + k, 0)];
+      *optr++ = sum / (1 << FILTER_BITS);
+    }
+    // Middle part.
+    for (; x <= x2; ++x, y += delta) {
+      int_pel = y >> RS_SCALE_SUBPEL_BITS;
+      sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
+      const int16_t *filter = &interp_filters[sub_pel * interp_taps];
+      sum = 0;
+      for (k = 0; k < interp_taps; ++k)
+        sum += filter[k] * input[int_pel - interp_taps / 2 + 1 + k];
+      *optr++ = sum / (1 << FILTER_BITS);
+    }
+    // End part.
+    for (; x < out_length; ++x, y += delta) {
+      int_pel = y >> RS_SCALE_SUBPEL_BITS;
+      sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
+      const int16_t *filter = &interp_filters[sub_pel * interp_taps];
+      sum = 0;
+      for (k = 0; k < interp_taps; ++k)
+        sum += filter[k] *
+               input[AOMMIN(int_pel - interp_taps / 2 + 1 + k, in_length - 1)];
+      *optr++ = sum / (1 << FILTER_BITS);
+    }
+  }
+}
+
 static void interpolate(const uint8_t *const input, int in_length,
                         uint8_t *output, int out_length) {
   const InterpKernel *interp_filters =
@@ -322,6 +407,15 @@
                    SUBPEL_TAPS);
 }
 
+static void interpolate_double_prec(const double *const input, int in_length,
+                                    double *output, int out_length) {
+  const InterpKernel *interp_filters =
+      choose_interp_filter(in_length, out_length);
+
+  interpolate_core_double_prec(input, in_length, output, out_length,
+                               &interp_filters[0][0], SUBPEL_TAPS);
+}
+
 int32_t av1_get_upscale_convolve_step(int in_length, int out_length) {
   return ((in_length << RS_SCALE_SUBPEL_BITS) + out_length / 2) / out_length;
 }
@@ -505,6 +599,12 @@
   }
 }
 
+static void upscale_multistep_double_prec(const double *const input, int length,
+                                          double *output, int olength) {
+  assert(length < olength);
+  interpolate_double_prec(input, length, output, olength);
+}
+
 static void fill_col_to_arr(uint8_t *img, int stride, int len, uint8_t *arr) {
   int i;
   uint8_t *iptr = img;
@@ -523,6 +623,26 @@
   }
 }
 
+static void fill_col_to_arr_double_prec(double *img, int stride, int len,
+                                        double *arr) {
+  int i;
+  double *iptr = img;
+  double *aptr = arr;
+  for (i = 0; i < len; ++i, iptr += stride) {
+    *aptr++ = *iptr;
+  }
+}
+
+static void fill_arr_to_col_double_prec(double *img, int stride, int len,
+                                        double *arr) {
+  int i;
+  double *iptr = img;
+  double *aptr = arr;
+  for (i = 0; i < len; ++i, iptr += stride) {
+    *iptr = *aptr++;
+  }
+}
+
 void av1_resize_plane(const uint8_t *const input, int height, int width,
                       int in_stride, uint8_t *output, int height2, int width2,
                       int out_stride) {
@@ -554,6 +674,33 @@
   aom_free(arrbuf2);
 }
 
+void av1_upscale_plane_double_prec(const double *const input, int height,
+                                   int width, int in_stride, double *output,
+                                   int height2, int width2, int out_stride) {
+  int i;
+  double *intbuf = (double *)aom_malloc(sizeof(double) * width2 * height);
+  double *arrbuf = (double *)aom_malloc(sizeof(double) * height);
+  double *arrbuf2 = (double *)aom_malloc(sizeof(double) * height2);
+  if (intbuf == NULL || arrbuf == NULL || arrbuf2 == NULL) goto Error;
+  assert(width > 0);
+  assert(height > 0);
+  assert(width2 > 0);
+  assert(height2 > 0);
+  for (i = 0; i < height; ++i)
+    upscale_multistep_double_prec(input + in_stride * i, width,
+                                  intbuf + width2 * i, width2);
+  for (i = 0; i < width2; ++i) {
+    fill_col_to_arr_double_prec(intbuf + i, width2, height, arrbuf);
+    upscale_multistep_double_prec(arrbuf, height, arrbuf2, height2);
+    fill_arr_to_col_double_prec(output + i, out_stride, height2, arrbuf2);
+  }
+
+Error:
+  aom_free(intbuf);
+  aom_free(arrbuf);
+  aom_free(arrbuf2);
+}
+
 static void upscale_normative_rect(const uint8_t *const input, int height,
                                    int width, int in_stride, uint8_t *output,
                                    int height2, int width2, int out_stride,
diff --git a/av1/common/resize.h b/av1/common/resize.h
index 9a59a8d..43bea58 100644
--- a/av1/common/resize.h
+++ b/av1/common/resize.h
@@ -23,6 +23,9 @@
 void av1_resize_plane(const uint8_t *const input, int height, int width,
                       int in_stride, uint8_t *output, int height2, int width2,
                       int out_stride);
+void av1_upscale_plane_double_prec(const double *const input, int height,
+                                   int width, int in_stride, double *output,
+                                   int height2, int width2, int out_stride);
 void av1_resize_frame420(const uint8_t *const y, int y_stride,
                          const uint8_t *const u, const uint8_t *const v,
                          int uv_stride, int height, int width, uint8_t *oy,