Rework upscaling in disflow algorithm

Previously, when upscaling the flow field from one pyramid level to
another, we used the same upsampling filter as for upscaling pixels.

However, this has a couple of issues: the pixel filtering code is
much more generic than we need for flow fields, because flow fields
are always upscaled by a factor of exactly 2; and we were not
correctly accounting for the offset between the flow vector locations
at one pyramid level and the next pyramid level down.

Therefore, replace this logic with a much simpler filter, which only
handles 2:1 upscaling and which correctly accounts for the relative
positions of the input and output grids.

After this replacement was made, four filters were tested: Bilinear,
bicubic, and 6- and 8-tap fullband filters taken from elsewhere in
the code. Bicubic performed slightly better than the rest, so use
this option.

This change simplifies the code and provides a better starting point
for SIMD implementation, as well as giving minor speed and quality
improvements.

 Speed | BDRATE-PSNR | BDRATE-SSIM |   Enc time
-------+-------------+-------------+-------------
   1   |   -0.004%   |   +0.001%   |   +0.006%
   2   |   -0.014%   |   -0.003%   |   -0.021%
   3   |   -0.028%   |   -0.073%   |   -0.034%
   4   |   -0.009%   |   -0.003%   |   -0.101%

STATS_CHANGED

Change-Id: I040b486be370627bdb68fbfd57ba855c01c2ea72
diff --git a/aom_dsp/flow_estimation/disflow.c b/aom_dsp/flow_estimation/disflow.c
index 06107c6..147a8ab 100644
--- a/aom_dsp/flow_estimation/disflow.c
+++ b/aom_dsp/flow_estimation/disflow.c
@@ -24,24 +24,29 @@
 
 #include "config/aom_dsp_rtcd.h"
 
-// TODO(rachelbarker):
-// Implement specialized functions for upscaling flow fields,
-// replacing av1_upscale_plane_double_prec().
-// Then we can avoid needing to include code from av1/
-#include "av1/common/resize.h"
-
 // Amount to downsample the flow field by.
 // eg. DOWNSAMPLE_SHIFT = 2 (DOWNSAMPLE_FACTOR == 4) means we calculate
 // one flow point for each 4x4 pixel region of the frame
 // Must be a power of 2
 #define DOWNSAMPLE_SHIFT 3
 #define DOWNSAMPLE_FACTOR (1 << DOWNSAMPLE_SHIFT)
+
+// Filters used when upscaling the flow field from one pyramid level
+// to another. See upscale_flow_component for details on kernel selection
+#define FLOW_UPSCALE_TAPS 4
+
 // Number of outermost flow field entries (on each edge) which can't be
 // computed, because the patch they correspond to extends outside of the
 // frame
 // The border is (DISFLOW_PATCH_SIZE >> 1) pixels, which is
 // (DISFLOW_PATCH_SIZE >> 1) >> DOWNSAMPLE_SHIFT many flow field entries
-#define FLOW_BORDER ((DISFLOW_PATCH_SIZE >> 1) >> DOWNSAMPLE_SHIFT)
+#define FLOW_BORDER_INNER ((DISFLOW_PATCH_SIZE >> 1) >> DOWNSAMPLE_SHIFT)
+
+// Number of extra padding entries on each side of the flow field.
+// These samples are added so that we do not need to apply clamping when
+// interpolating or upsampling the flow field
+#define FLOW_BORDER_OUTER (FLOW_UPSCALE_TAPS / 2)
+
 // When downsampling the flow field, each flow field entry covers a square
 // region of pixels in the image pyramid. This value is equal to the position
 // of the center of that region, as an offset from the top/left edge.
@@ -52,6 +57,12 @@
 // this gives the correct offset of 0 instead of -1.
 #define UPSAMPLE_CENTER_OFFSET ((DOWNSAMPLE_FACTOR - 1) / 2)
 
+static double flow_upscale_filter[2][FLOW_UPSCALE_TAPS] = {
+  // Cubic interpolation kernels for phase=0.75 and phase=0.25, respectively
+  { -3 / 128., 29 / 128., 111 / 128., -9 / 128. },
+  { -9 / 128., 111 / 128., 29 / 128., -3 / 128. }
+};
+
 static INLINE void get_cubic_kernel_dbl(double x, double kernel[4]) {
   // Check that the fractional position is in range.
   //
@@ -135,7 +146,15 @@
     const double flow_sub_y =
         (y & (DOWNSAMPLE_FACTOR - 1)) / (double)DOWNSAMPLE_FACTOR;
 
-    // Make sure that bicubic interpolation won't read outside of the flow field
+    // Exclude points which would sample from the outer border of the flow
+    // field, as this would give lower-quality results.
+    //
+    // Note: As we never read from the border region at pyramid level 0, we
+    // can skip filling it in. If the conditions here are removed, or any
+    // other logic is added which reads from this border region, then
+    // compute_flow_field() will need to be modified to call
+    // fill_flow_field_borders() at pyramid level 0 to set up the correct
+    // border data.
     if (flow_x < 1 || (flow_x + 2) >= width) continue;
     if (flow_y < 1 || (flow_y + 2) >= height) continue;
 
@@ -431,16 +450,16 @@
   // Calculate the bounds of the rectangle which was filled in by
   // compute_flow_field() before calling this function.
   // These indices are inclusive on both ends.
-  const int left_index = FLOW_BORDER;
-  const int right_index = (width - FLOW_BORDER - 1);
-  const int top_index = FLOW_BORDER;
-  const int bottom_index = (height - FLOW_BORDER - 1);
+  const int left_index = FLOW_BORDER_INNER;
+  const int right_index = (width - FLOW_BORDER_INNER - 1);
+  const int top_index = FLOW_BORDER_INNER;
+  const int bottom_index = (height - FLOW_BORDER_INNER - 1);
 
   // Left area
   for (int i = top_index; i <= bottom_index; i += 1) {
     double *row = flow + i * stride;
     const double left = row[left_index];
-    for (int j = 0; j < left_index; j++) {
+    for (int j = -FLOW_BORDER_OUTER; j < left_index; j++) {
       row[j] = left;
     }
   }
@@ -449,23 +468,136 @@
   for (int i = top_index; i <= bottom_index; i += 1) {
     double *row = flow + i * stride;
     const double right = row[right_index];
-    for (int j = right_index + 1; j < width; j++) {
+    for (int j = right_index + 1; j < width + FLOW_BORDER_OUTER; j++) {
       row[j] = right;
     }
   }
 
   // Top area
-  const double *top_row = flow + top_index * stride;
-  for (int i = 0; i < top_index; i++) {
-    double *row = flow + i * stride;
-    memcpy(row, top_row, width * sizeof(*row));
+  const double *top_row = flow + top_index * stride - FLOW_BORDER_OUTER;
+  for (int i = -FLOW_BORDER_OUTER; i < top_index; i++) {
+    double *row = flow + i * stride - FLOW_BORDER_OUTER;
+    size_t length = width + 2 * FLOW_BORDER_OUTER;
+    memcpy(row, top_row, length * sizeof(*row));
   }
 
   // Bottom area
-  const double *bottom_row = flow + bottom_index * stride;
-  for (int i = bottom_index + 1; i < height; i++) {
-    double *row = flow + i * stride;
-    memcpy(row, bottom_row, width * sizeof(*row));
+  const double *bottom_row = flow + bottom_index * stride - FLOW_BORDER_OUTER;
+  for (int i = bottom_index + 1; i < height + FLOW_BORDER_OUTER; i++) {
+    double *row = flow + i * stride - FLOW_BORDER_OUTER;
+    size_t length = width + 2 * FLOW_BORDER_OUTER;
+    memcpy(row, bottom_row, length * sizeof(*row));
+  }
+}
+
+// Upscale one component of the flow field, from a size of
+// cur_width x cur_height to a size of (2*cur_width) x (2*cur_height), storing
+// the result back into the same buffer. This function also scales the flow
+// vector by 2, so that when we move to the next pyramid level down, the implied
+// motion vector is the same.
+//
+// The temporary buffer tmpbuf must be large enough to hold an intermediate
+// array of size stride * cur_height, *plus* FLOW_BORDER_OUTER rows above and
+// below. In other words, indices from -FLOW_BORDER_OUTER * stride to
+// (cur_height + FLOW_BORDER_OUTER) * stride - 1 must be valid.
+//
+// Note that the same stride is used for u before and after upscaling
+// and for the temporary buffer, for simplicity.
+//
+// A note on phasing:
+//
+// The flow fields at two adjacent pyramid levels are offset from each other,
+// and we need to account for this in the construction of the interpolation
+// kernels.
+//
+// Consider an 8x8 pixel patch at pyramid level n. This is split into four
+// patches at pyramid level n-1. Bringing these patches back up to pyramid level
+// n, each sub-patch covers 4x4 pixels, and between them they cover the same
+// 8x8 region.
+//
+// Therefore, at pyramid level n, two adjacent patches look like this:
+//
+//    + - - - - - - - + - - - - - - - +
+//    |               |               |
+//    |   x       x   |   x       x   |
+//    |               |               |
+//    |       #       |       #       |
+//    |               |               |
+//    |   x       x   |   x       x   |
+//    |               |               |
+//    + - - - - - - - + - - - - - - - +
+//
+// where # marks the center of a patch at pyramid level n (the input to this
+// function), and x marks the center of a patch at pyramid level n-1 (the output
+// of this function).
+//
+// By counting pixels (marked by +, -, and |), we can see that the flow vectors
+// at pyramid level n-1 are offset relative to the flow vectors at pyramid
+// level n, by 1/4 of the larger (input) patch size. Therefore, our
+// interpolation kernels need to have phases of 0.25 and 0.75.
+//
+// In addition, in order to handle the frame edges correctly, we need to
+// generate one output vector to the left and one to the right of each input
+// vector, even though these must be interpolated using different source points.
+static void upscale_flow_component(double *flow, int cur_width, int cur_height,
+                                   int stride, double *tmpbuf) {
+  const int half_len = FLOW_UPSCALE_TAPS / 2;
+
+  // Check that the outer border is large enough to avoid needing to clamp
+  // the source locations
+  assert(half_len <= FLOW_BORDER_OUTER);
+
+  // Horizontal upscale and multiply by 2
+  for (int i = 0; i < cur_height; i++) {
+    for (int j = 0; j < cur_width; j++) {
+      double left = 0;
+      for (int k = -half_len; k < half_len; k++) {
+        left +=
+            flow[i * stride + (j + k)] * flow_upscale_filter[0][k + half_len];
+      }
+      tmpbuf[i * stride + (2 * j + 0)] = 2.0 * left;
+
+      // Right output pixel is 0.25 units to the right of the input pixel
+      double right = 0;
+      for (int k = -(half_len - 1); k < (half_len + 1); k++) {
+        right += flow[i * stride + (j + k)] *
+                 flow_upscale_filter[1][k + (half_len - 1)];
+      }
+      tmpbuf[i * stride + (2 * j + 1)] = 2.0 * right;
+    }
+  }
+
+  // Fill in top and bottom borders of tmpbuf
+  const double *top_row = &tmpbuf[0];
+  for (int i = -FLOW_BORDER_OUTER; i < 0; i++) {
+    double *row = &tmpbuf[i * stride];
+    memcpy(row, top_row, 2 * cur_width * sizeof(*row));
+  }
+
+  const double *bottom_row = &tmpbuf[(cur_height - 1) * stride];
+  for (int i = cur_height; i < cur_height + FLOW_BORDER_OUTER; i++) {
+    double *row = &tmpbuf[i * stride];
+    memcpy(row, bottom_row, 2 * cur_width * sizeof(*row));
+  }
+
+  // Vertical upscale
+  int upscaled_width = cur_width * 2;
+  for (int i = 0; i < cur_height; i++) {
+    for (int j = 0; j < upscaled_width; j++) {
+      double top = 0;
+      for (int k = -half_len; k < half_len; k++) {
+        top +=
+            tmpbuf[(i + k) * stride + j] * flow_upscale_filter[0][k + half_len];
+      }
+      flow[(2 * i) * stride + j] = top;
+
+      double bottom = 0;
+      for (int k = -(half_len - 1); k < (half_len + 1); k++) {
+        bottom += tmpbuf[(i + k) * stride + j] *
+                  flow_upscale_filter[1][k + (half_len - 1)];
+      }
+      flow[(2 * i + 1) * stride + j] = bottom;
+    }
   }
 }
 
@@ -478,12 +610,25 @@
   double *flow_u = flow->u;
   double *flow_v = flow->v;
 
-  const size_t flow_size = flow->stride * (size_t)flow->height;
-  double *u_upscale = aom_malloc(flow_size * sizeof(*u_upscale));
-  double *v_upscale = aom_malloc(flow_size * sizeof(*v_upscale));
-  if (!u_upscale || !v_upscale) {
-    mem_status = false;
-    goto free_uvscale;
+  double *tmpbuf0;
+  double *tmpbuf;
+
+  if (src_pyr->n_levels < 2) {
+    // tmpbuf not needed
+    tmpbuf0 = NULL;
+    tmpbuf = NULL;
+  } else {
+    // This line must match the calculation of cur_flow_height below
+    const int layer1_height = src_pyr->layers[1].height >> DOWNSAMPLE_SHIFT;
+
+    const size_t tmpbuf_size =
+        (layer1_height + 2 * FLOW_BORDER_OUTER) * flow->stride;
+    tmpbuf0 = aom_malloc(tmpbuf_size * sizeof(*tmpbuf0));
+    if (!tmpbuf0) {
+      mem_status = false;
+      goto free_tmpbuf;
+    }
+    tmpbuf = tmpbuf0 + FLOW_BORDER_OUTER * flow->stride;
   }
 
   // Compute flow field from coarsest to finest level of the pyramid
@@ -507,8 +652,10 @@
     const int cur_flow_height = cur_height >> DOWNSAMPLE_SHIFT;
     const int cur_flow_stride = flow->stride;
 
-    for (int i = FLOW_BORDER; i < cur_flow_height - FLOW_BORDER; i += 1) {
-      for (int j = FLOW_BORDER; j < cur_flow_width - FLOW_BORDER; j += 1) {
+    for (int i = FLOW_BORDER_INNER; i < cur_flow_height - FLOW_BORDER_INNER;
+         i += 1) {
+      for (int j = FLOW_BORDER_INNER; j < cur_flow_width - FLOW_BORDER_INNER;
+           j += 1) {
         const int flow_field_idx = i * cur_flow_stride + j;
 
         // Calculate the position of a patch of size DISFLOW_PATCH_SIZE pixels,
@@ -541,28 +688,10 @@
       const int upscale_flow_height = cur_flow_height << 1;
       const int upscale_stride = flow->stride;
 
-      bool upscale_u_plane = av1_upscale_plane_double_prec(
-          flow_u, cur_flow_height, cur_flow_width, cur_flow_stride, u_upscale,
-          upscale_flow_height, upscale_flow_width, upscale_stride);
-      bool upscale_v_plane = av1_upscale_plane_double_prec(
-          flow_v, cur_flow_height, cur_flow_width, cur_flow_stride, v_upscale,
-          upscale_flow_height, upscale_flow_width, upscale_stride);
-      if (!upscale_u_plane || !upscale_v_plane) {
-        mem_status = false;
-        goto free_uvscale;
-      }
-
-      // Multiply all flow vectors by 2.
-      // When we move down a pyramid level, the image resolution doubles.
-      // Thus we need to double all vectors in order for them to represent
-      // the same translation at the next level down
-      for (int i = 0; i < upscale_flow_height; i++) {
-        for (int j = 0; j < upscale_flow_width; j++) {
-          const int index = i * upscale_stride + j;
-          flow_u[index] = u_upscale[index] * 2.0;
-          flow_v[index] = v_upscale[index] * 2.0;
-        }
-      }
+      upscale_flow_component(flow_u, cur_flow_width, cur_flow_height,
+                             cur_flow_stride, tmpbuf);
+      upscale_flow_component(flow_v, cur_flow_width, cur_flow_height,
+                             cur_flow_stride, tmpbuf);
 
       // If we didn't fill in the rightmost column or bottommost row during
       // upsampling (in order to keep the ratio to exactly 2), fill them
@@ -592,9 +721,9 @@
       }
     }
   }
-free_uvscale:
-  aom_free(u_upscale);
-  aom_free(v_upscale);
+
+free_tmpbuf:
+  aom_free(tmpbuf0);
   return mem_status;
 }
 
@@ -605,25 +734,25 @@
   // Calculate the size of the bottom (largest) layer of the flow pyramid
   flow->width = frame_width >> DOWNSAMPLE_SHIFT;
   flow->height = frame_height >> DOWNSAMPLE_SHIFT;
-  flow->stride = flow->width;
+  flow->stride = flow->width + 2 * FLOW_BORDER_OUTER;
 
-  const size_t flow_size = flow->stride * (size_t)flow->height;
-  flow->u = aom_calloc(flow_size, sizeof(*flow->u));
-  flow->v = aom_calloc(flow_size, sizeof(*flow->v));
+  const size_t flow_size =
+      flow->stride * (size_t)(flow->height + 2 * FLOW_BORDER_OUTER);
 
-  if (flow->u == NULL || flow->v == NULL) {
-    aom_free(flow->u);
-    aom_free(flow->v);
+  flow->buf0 = aom_calloc(2 * flow_size, sizeof(*flow->buf0));
+  if (!flow->buf0) {
     aom_free(flow);
     return NULL;
   }
 
+  flow->u = flow->buf0 + FLOW_BORDER_OUTER * flow->stride + FLOW_BORDER_OUTER;
+  flow->v = flow->u + flow_size;
+
   return flow;
 }
 
 static void free_flow_field(FlowField *flow) {
-  aom_free(flow->u);
-  aom_free(flow->v);
+  aom_free(flow->buf0);
   aom_free(flow);
 }
 
diff --git a/aom_dsp/flow_estimation/disflow.h b/aom_dsp/flow_estimation/disflow.h
index d772c8a..ef877b6 100644
--- a/aom_dsp/flow_estimation/disflow.h
+++ b/aom_dsp/flow_estimation/disflow.h
@@ -79,6 +79,9 @@
 #define DISFLOW_INTERP_BITS 14
 
 typedef struct {
+  // Start of allocation for u and v buffers
+  double *buf0;
+
   // x and y directions of flow, per patch
   double *u;
   double *v;
diff --git a/av1/common/resize.c b/av1/common/resize.c
index 55dfc6f..1b34883 100644
--- a/av1/common/resize.c
+++ b/av1/common/resize.c
@@ -316,91 +316,6 @@
   }
 }
 
-static void interpolate_core_double_prec(const double *const input,
-                                         int in_length, double *output,
-                                         int out_length,
-                                         const int16_t *interp_filters,
-                                         int interp_taps) {
-  const int32_t delta =
-      (((uint32_t)in_length << RS_SCALE_SUBPEL_BITS) + out_length / 2) /
-      out_length;
-  const int32_t offset =
-      in_length > out_length
-          ? (((int32_t)(in_length - out_length) << (RS_SCALE_SUBPEL_BITS - 1)) +
-             out_length / 2) /
-                out_length
-          : -(((int32_t)(out_length - in_length)
-               << (RS_SCALE_SUBPEL_BITS - 1)) +
-              out_length / 2) /
-                out_length;
-  double *optr = output;
-  int x, x1, x2, k, int_pel, sub_pel;
-  double sum;
-  int32_t y;
-
-  x = 0;
-  y = offset + RS_SCALE_EXTRA_OFF;
-  while ((y >> RS_SCALE_SUBPEL_BITS) < (interp_taps / 2 - 1)) {
-    x++;
-    y += delta;
-  }
-  x1 = x;
-  x = out_length - 1;
-  y = delta * x + offset + RS_SCALE_EXTRA_OFF;
-  while ((y >> RS_SCALE_SUBPEL_BITS) + (int32_t)(interp_taps / 2) >=
-         in_length) {
-    x--;
-    y -= delta;
-  }
-  x2 = x;
-  if (x1 > x2) {
-    for (x = 0, y = offset + RS_SCALE_EXTRA_OFF; x < out_length;
-         ++x, y += delta) {
-      int_pel = y >> RS_SCALE_SUBPEL_BITS;
-      sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
-      const int16_t *filter = &interp_filters[sub_pel * interp_taps];
-      sum = 0;
-      for (k = 0; k < interp_taps; ++k) {
-        const int pk = int_pel - interp_taps / 2 + 1 + k;
-        sum += filter[k] * input[AOMMAX(AOMMIN(pk, in_length - 1), 0)];
-      }
-      *optr++ = sum / (1 << FILTER_BITS);
-    }
-  } else {
-    // Initial part.
-    for (x = 0, y = offset + RS_SCALE_EXTRA_OFF; x < x1; ++x, y += delta) {
-      int_pel = y >> RS_SCALE_SUBPEL_BITS;
-      sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
-      const int16_t *filter = &interp_filters[sub_pel * interp_taps];
-      sum = 0;
-      for (k = 0; k < interp_taps; ++k)
-        sum += filter[k] * input[AOMMAX(int_pel - interp_taps / 2 + 1 + k, 0)];
-      *optr++ = sum / (1 << FILTER_BITS);
-    }
-    // Middle part.
-    for (; x <= x2; ++x, y += delta) {
-      int_pel = y >> RS_SCALE_SUBPEL_BITS;
-      sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
-      const int16_t *filter = &interp_filters[sub_pel * interp_taps];
-      sum = 0;
-      for (k = 0; k < interp_taps; ++k)
-        sum += filter[k] * input[int_pel - interp_taps / 2 + 1 + k];
-      *optr++ = sum / (1 << FILTER_BITS);
-    }
-    // End part.
-    for (; x < out_length; ++x, y += delta) {
-      int_pel = y >> RS_SCALE_SUBPEL_BITS;
-      sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
-      const int16_t *filter = &interp_filters[sub_pel * interp_taps];
-      sum = 0;
-      for (k = 0; k < interp_taps; ++k)
-        sum += filter[k] *
-               input[AOMMIN(int_pel - interp_taps / 2 + 1 + k, in_length - 1)];
-      *optr++ = sum / (1 << FILTER_BITS);
-    }
-  }
-}
-
 static void interpolate(const uint8_t *const input, int in_length,
                         uint8_t *output, int out_length) {
   const InterpKernel *interp_filters =
@@ -410,15 +325,6 @@
                    SUBPEL_TAPS);
 }
 
-static void interpolate_double_prec(const double *const input, int in_length,
-                                    double *output, int out_length) {
-  const InterpKernel *interp_filters =
-      choose_interp_filter(in_length, out_length);
-
-  interpolate_core_double_prec(input, in_length, output, out_length,
-                               &interp_filters[0][0], SUBPEL_TAPS);
-}
-
 int32_t av1_get_upscale_convolve_step(int in_length, int out_length) {
   return ((in_length << RS_SCALE_SUBPEL_BITS) + out_length / 2) / out_length;
 }
@@ -600,12 +506,6 @@
   }
 }
 
-static void upscale_multistep_double_prec(const double *const input, int length,
-                                          double *output, int olength) {
-  assert(length < olength);
-  interpolate_double_prec(input, length, output, olength);
-}
-
 static void fill_col_to_arr(uint8_t *img, int stride, int len, uint8_t *arr) {
   int i;
   uint8_t *iptr = img;
@@ -624,26 +524,6 @@
   }
 }
 
-static void fill_col_to_arr_double_prec(double *img, int stride, int len,
-                                        double *arr) {
-  int i;
-  double *iptr = img;
-  double *aptr = arr;
-  for (i = 0; i < len; ++i, iptr += stride) {
-    *aptr++ = *iptr;
-  }
-}
-
-static void fill_arr_to_col_double_prec(double *img, int stride, int len,
-                                        double *arr) {
-  int i;
-  double *iptr = img;
-  double *aptr = arr;
-  for (i = 0; i < len; ++i, iptr += stride) {
-    *iptr = *aptr++;
-  }
-}
-
 bool av1_resize_plane(const uint8_t *const input, int height, int width,
                       int in_stride, uint8_t *output, int height2, int width2,
                       int out_stride) {
@@ -679,38 +559,6 @@
   return mem_status;
 }
 
-bool av1_upscale_plane_double_prec(const double *const input, int height,
-                                   int width, int in_stride, double *output,
-                                   int height2, int width2, int out_stride) {
-  int i;
-  bool mem_status = true;
-  double *intbuf = (double *)aom_malloc(sizeof(double) * width2 * height);
-  double *arrbuf = (double *)aom_malloc(sizeof(double) * height);
-  double *arrbuf2 = (double *)aom_malloc(sizeof(double) * height2);
-  if (intbuf == NULL || arrbuf == NULL || arrbuf2 == NULL) {
-    mem_status = false;
-    goto Error;
-  }
-  assert(width > 0);
-  assert(height > 0);
-  assert(width2 > 0);
-  assert(height2 > 0);
-  for (i = 0; i < height; ++i)
-    upscale_multistep_double_prec(input + in_stride * i, width,
-                                  intbuf + width2 * i, width2);
-  for (i = 0; i < width2; ++i) {
-    fill_col_to_arr_double_prec(intbuf + i, width2, height, arrbuf);
-    upscale_multistep_double_prec(arrbuf, height, arrbuf2, height2);
-    fill_arr_to_col_double_prec(output + i, out_stride, height2, arrbuf2);
-  }
-
-Error:
-  aom_free(intbuf);
-  aom_free(arrbuf);
-  aom_free(arrbuf2);
-  return mem_status;
-}
-
 static bool upscale_normative_rect(const uint8_t *const input, int height,
                                    int width, int in_stride, uint8_t *output,
                                    int height2, int width2, int out_stride,
diff --git a/av1/common/resize.h b/av1/common/resize.h
index d1fab82..0ba3108 100644
--- a/av1/common/resize.h
+++ b/av1/common/resize.h
@@ -23,9 +23,6 @@
 bool av1_resize_plane(const uint8_t *const input, int height, int width,
                       int in_stride, uint8_t *output, int height2, int width2,
                       int out_stride);
-bool av1_upscale_plane_double_prec(const double *const input, int height,
-                                   int width, int in_stride, double *output,
-                                   int height2, int width2, int out_stride);
 // TODO(aomedia:3228): In libaom 4.0.0, remove av1_resize_frame420 from
 // av1/exports_com and delete this function.
 void av1_resize_frame420(const uint8_t *const y, int y_stride,