Add supporting functions to optical flow.

Change-Id: Ia484b4a2d87e0f51e1e954a960d236e3af0c877c
diff --git a/av1/encoder/optical_flow.c b/av1/encoder/optical_flow.c
index 6360ac8..e54d276 100644
--- a/av1/encoder/optical_flow.c
+++ b/av1/encoder/optical_flow.c
@@ -21,11 +21,454 @@
 
 #if CONFIG_OPTICAL_FLOW_API
 
+// Helper function to determine whether a frame is encoded with high bit-depth.
+static INLINE int is_frame_high_bitdepth(const YV12_BUFFER_CONFIG *frame) {
+  return (frame->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
+}
+
 typedef struct LOCALMV {
   double row;
   double col;
 } LOCALMV;
 
+void gradients_over_window(const YV12_BUFFER_CONFIG *frame,
+                           const YV12_BUFFER_CONFIG *ref_frame,
+                           const double x_coord, const double y_coord,
+                           const int window_size, const int bit_depth,
+                           double *ix, double *iy, double *it, LOCALMV *mv);
+
+// coefficients for bilinear interpolation on unit square
+int pixel_interp(const double x, const double y, const double b00,
+                 const double b01, const double b10, const double b11) {
+  const int xint = (int)x;
+  const int yint = (int)y;
+  const double xdec = x - xint;
+  const double ydec = y - yint;
+  const double a = (1 - xdec) * (1 - ydec);
+  const double b = xdec * (1 - ydec);
+  const double c = (1 - xdec) * ydec;
+  const double d = xdec * ydec;
+  // if x, y are already integers, this results to b00
+  int interp = (int)round(a * b00 + b * b01 + c * b10 + d * b11);
+  return interp;
+}
+// bilinear interpolation to find subpixel values
+int get_subpixels(const YV12_BUFFER_CONFIG *frame, int *pred, const int w,
+                  const int h, LOCALMV mv, const double x_coord,
+                  const double y_coord) {
+  double left = x_coord + mv.row;
+  double top = y_coord + mv.col;
+  const int fromedge = 2;
+  const int height = frame->y_crop_height;
+  const int width = frame->y_crop_width;
+  if (left < 1) left = 1;
+  if (top < 1) top = 1;
+  // could use elements past boundary where stride > width
+  if (top > height - fromedge) top = height - fromedge;
+  if (left > width - fromedge) left = width - fromedge;
+  const uint8_t *buf = frame->y_buffer;
+  const int s = frame->y_stride;
+  int prev = -1;
+
+  int xint;
+  int yint;
+  int idx = 0;
+  for (int y = prev; y < prev + h; y++) {
+    for (int x = prev; x < prev + w; x++) {
+      double xx = left + x;
+      double yy = top + y;
+      xint = (int)xx;
+      yint = (int)yy;
+      int interp = pixel_interp(
+          xx, yy, buf[yint * s + xint], buf[yint * s + (xint + 1)],
+          buf[(yint + 1) * s + xint], buf[(yint + 1) * s + (xint + 1)]);
+      pred[idx++] = interp;
+    }
+  }
+  return 0;
+}
+// Scharr filter to compute spatial gradient
+void spatial_gradient(const YV12_BUFFER_CONFIG *frame, const int x_coord,
+                      const int y_coord, const int direction,
+                      double *derivative) {
+  double *filter;
+  // Scharr filters
+  double gx[9] = { -3, 0, 3, -10, 0, 10, -3, 0, 3 };
+  double gy[9] = { -3, -10, -3, 0, 0, 0, 3, 10, 3 };
+  if (direction == 0) {  // x direction
+    filter = gx;
+  } else {  // y direction
+    filter = gy;
+  }
+  int idx = 0;
+  double d = 0;
+  for (int yy = -1; yy <= 1; yy++) {
+    for (int xx = -1; xx <= 1; xx++) {
+      d += filter[idx] *
+           frame->y_buffer[(y_coord + yy) * frame->y_stride + (x_coord + xx)];
+      idx++;
+    }
+  }
+  // normalization scaling factor for scharr
+  *derivative = d / 32.0;
+}
+// Determine the spatial gradient at subpixel locations
+// For example, when reducing images for pyramidal LK,
+// corners found in original image may be at subpixel locations.
+void gradient_interp(double *fullpel_deriv, const double x_coord,
+                     const double y_coord, const int w, const int h,
+                     double *derivative) {
+  const int xint = (int)x_coord;
+  const int yint = (int)y_coord;
+  double interp;
+  if (xint + 1 > w - 1 || yint + 1 > h - 1) {
+    interp = fullpel_deriv[yint * w + xint];
+  } else {
+    interp = pixel_interp(x_coord, y_coord, fullpel_deriv[yint * w + xint],
+                          fullpel_deriv[yint * w + (xint + 1)],
+                          fullpel_deriv[(yint + 1) * w + xint],
+                          fullpel_deriv[(yint + 1) * w + (xint + 1)]);
+  }
+
+  *derivative = interp;
+}
+
+void temporal_gradient(const YV12_BUFFER_CONFIG *frame,
+                       const YV12_BUFFER_CONFIG *frame2, const double x_coord,
+                       const double y_coord, const int bit_depth,
+                       double *derivative, LOCALMV *mv) {
+  // TODO(any): this is a roundabout way of enforcing build_one_inter_pred
+  // to use the 8-tap filter (instead of lower). it would be more
+  // efficient to apply the filter only at 1 pixel instead of 25 pixels.
+  const int w = 5;
+  const int h = 5;
+  uint8_t pred1[25];
+  uint8_t pred2[25];
+
+  const int y = (int)y_coord;
+  const int x = (int)x_coord;
+  const double ydec = y_coord - y;
+  const double xdec = x_coord - x;
+  const int is_intrabc = 0;  // Is intra-copied?
+  const int is_high_bitdepth = is_frame_high_bitdepth(frame2);
+  const int subsampling_x = 0, subsampling_y = 0;  // for y-buffer
+  const int_interpfilters interp_filters =
+      av1_broadcast_interp_filter(MULTITAP_SHARP);
+  const int plane = 0;  // y-plane
+  const struct buf_2d ref_buf2 = { NULL, frame2->y_buffer, frame2->y_crop_width,
+                                   frame2->y_crop_height, frame2->y_stride };
+  struct scale_factors scale;
+  av1_setup_scale_factors_for_frame(&scale, frame->y_crop_width,
+                                    frame->y_crop_height, frame->y_crop_width,
+                                    frame->y_crop_height);
+  InterPredParams inter_pred_params;
+  av1_init_inter_params(&inter_pred_params, w, h, y, x, subsampling_x,
+                        subsampling_y, bit_depth, is_high_bitdepth, is_intrabc,
+                        &scale, &ref_buf2, interp_filters);
+  inter_pred_params.conv_params = get_conv_params(0, plane, bit_depth);
+  MV newmv = { .row = (int16_t)round((mv->row + xdec) * 8),
+               .col = (int16_t)round((mv->col + ydec) * 8) };
+  av1_enc_build_one_inter_predictor(pred2, w, &newmv, &inter_pred_params);
+  const struct buf_2d ref_buf1 = { NULL, frame->y_buffer, frame->y_crop_width,
+                                   frame->y_crop_height, frame->y_stride };
+  av1_init_inter_params(&inter_pred_params, w, h, y, x, subsampling_x,
+                        subsampling_y, bit_depth, is_high_bitdepth, is_intrabc,
+                        &scale, &ref_buf1, interp_filters);
+  inter_pred_params.conv_params = get_conv_params(0, plane, bit_depth);
+  MV zeroMV = { .row = (int16_t)round(xdec * 8),
+                .col = (int16_t)round(ydec * 8) };
+  av1_enc_build_one_inter_predictor(pred1, w, &zeroMV, &inter_pred_params);
+
+  *derivative = pred2[0] - pred1[0];
+}
+// Numerical differentiate over window_size x window_size surrounding (x,y)
+// location. Alters ix, iy, it to contain numerical partial derivatives
+void gradients_over_window(const YV12_BUFFER_CONFIG *frame,
+                           const YV12_BUFFER_CONFIG *ref_frame,
+                           const double x_coord, const double y_coord,
+                           const int window_size, const int bit_depth,
+                           double *ix, double *iy, double *it, LOCALMV *mv) {
+  const double left = x_coord - window_size / 2;
+  const double top = y_coord - window_size / 2;
+  // gradient operators need pixel before and after (start at 1)
+  const double x_start = AOMMAX(1, left);
+  const double y_start = AOMMAX(1, top);
+  const int frame_height = frame->y_crop_height;
+  const int frame_width = frame->y_crop_width;
+  double deriv_x;
+  double deriv_y;
+  double deriv_t;
+
+  const double x_end = AOMMIN(x_coord + window_size / 2, frame_width - 2);
+  const double y_end = AOMMIN(y_coord + window_size / 2, frame_height - 2);
+  const int xs = (int)AOMMAX(1, x_start - 1);
+  const int ys = (int)AOMMAX(1, y_start - 1);
+  const int xe = (int)AOMMIN(x_end + 2, frame_width - 2);
+  const int ye = (int)AOMMIN(y_end + 2, frame_height - 2);
+  // with normalization, gradients may be double values
+  double *fullpel_dx = aom_malloc((ye - ys) * (xe - xs) * sizeof(deriv_x));
+  double *fullpel_dy = aom_malloc((ye - ys) * (xe - xs) * sizeof(deriv_y));
+  // TODO(any): This could be more efficient in the case that x_coord
+  // and y_coord are integers.. but it may look more messy.
+
+  // calculate spatial gradients at full pixel locations
+  for (int j = ys; j < ye; j++) {
+    for (int i = xs; i < xe; i++) {
+      spatial_gradient(frame, i, j, 0, &deriv_x);
+      spatial_gradient(frame, i, j, 1, &deriv_y);
+      int idx = (j - ys) * (xe - xs) + (i - xs);
+      fullpel_dx[idx] = deriv_x;
+      fullpel_dy[idx] = deriv_y;
+    }
+  }
+  // compute numerical differentiation for every pixel in window
+  // (this potentially includes subpixels)
+  for (double j = y_start; j < y_end; j++) {
+    for (double i = x_start; i < x_end; i++) {
+      temporal_gradient(frame, ref_frame, i, j, bit_depth, &deriv_t, mv);
+      gradient_interp(fullpel_dx, i - xs, j - ys, xe - xs, ye - ys, &deriv_x);
+      gradient_interp(fullpel_dy, i - xs, j - ys, xe - xs, ye - ys, &deriv_y);
+      int idx = (int)(j - top) * window_size + (int)(i - left);
+      ix[idx] = deriv_x;
+      iy[idx] = deriv_y;
+      it[idx] = deriv_t;
+    }
+  }
+  // TODO(any): to avoid setting deriv arrays to zero for every iteration,
+  // could instead pass these two values back through function call
+  // int first_idx = (int)(y_start - top) * window_size + (int)(x_start - left);
+  // int width = window_size - ((int)(x_start - left) + (int)(left + window_size
+  // - x_end));
+
+  aom_free(fullpel_dx);
+  aom_free(fullpel_dy);
+}
+
+// To compute eigenvalues of 2x2 matrix: Solve for lambda where
+// Determinant(matrix - lambda*identity) == 0
+void eigenvalues_2x2(const double *matrix, double *eig) {
+  const double a = 1;
+  const double b = -1 * matrix[0] - matrix[3];
+  const double c = -1 * matrix[1] * matrix[2] + matrix[0] * matrix[3];
+  // quadratic formula
+  const double discriminant = b * b - 4 * a * c;
+  eig[0] = (-b - sqrt(discriminant)) / (2.0 * a);
+  eig[1] = (-b + sqrt(discriminant)) / (2.0 * a);
+  // double check that eigenvalues are ordered by magnitude
+  if (fabs(eig[0]) > fabs(eig[1])) {
+    double tmp = eig[0];
+    eig[0] = eig[1];
+    eig[1] = tmp;
+  }
+}
+// Shi-Tomasi corner detection criteria
+double corner_score(const YV12_BUFFER_CONFIG *frame_to_filter,
+                    const YV12_BUFFER_CONFIG *ref_frame, const int x,
+                    const int y, double *i_x, double *i_y, double *i_t,
+                    const int n, const int bit_depth) {
+  double eig[2];
+  LOCALMV mv = { .row = 0, .col = 0 };
+  // TODO(any): technically, ref_frame and i_t are not used by corner score
+  // so these could be replaced by dummy variables,
+  // or change this to spatial gradient function over window only
+  gradients_over_window(frame_to_filter, ref_frame, x, y, n, bit_depth, i_x,
+                        i_y, i_t, &mv);
+  double Mres1[1] = { 0 }, Mres2[1] = { 0 }, Mres3[1] = { 0 };
+  multiply_mat(i_x, i_x, Mres1, 1, n * n, 1);
+  multiply_mat(i_x, i_y, Mres2, 1, n * n, 1);
+  multiply_mat(i_y, i_y, Mres3, 1, n * n, 1);
+  double M[4] = { Mres1[0], Mres2[0], Mres2[0], Mres3[0] };
+  eigenvalues_2x2(M, eig);
+  return fabs(eig[0]);
+}
+// Finds corners in frame_to_filter
+// For less strict requirements (i.e. more corners), decrease threshold
+int detect_corners(const YV12_BUFFER_CONFIG *frame_to_filter,
+                   const YV12_BUFFER_CONFIG *ref_frame, const int maxcorners,
+                   int *ref_corners, const int bit_depth) {
+  const int frame_height = frame_to_filter->y_crop_height;
+  const int frame_width = frame_to_filter->y_crop_width;
+  // TODO(any): currently if maxcorners is decreased, then it only means
+  // corners will be omited from bottom-right of image. if maxcorners
+  // is actually used, then this algorithm would need to re-iterate
+  // and choose threshold based on that
+  assert(maxcorners == frame_height * frame_width);
+  int countcorners = 0;
+  const double threshold = 0.1;
+  double score;
+  const int n = 3;
+  double i_x[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+  double i_y[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+  double i_t[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+  const int fromedge = n;
+  double max_score = corner_score(frame_to_filter, ref_frame, fromedge,
+                                  fromedge, i_x, i_y, i_t, n, bit_depth);
+  // rough estimate of max corner score in image
+  for (int x = fromedge; x < frame_width - fromedge; x += 1) {
+    for (int y = fromedge; y < frame_height - fromedge; y += frame_height / 5) {
+      for (int i = 0; i < n * n; i++) {
+        i_x[i] = 0;
+        i_y[i] = 0;
+        i_t[i] = 0;
+      }
+      score = corner_score(frame_to_filter, ref_frame, x, y, i_x, i_y, i_t, n,
+                           bit_depth);
+      if (score > max_score) {
+        max_score = score;
+      }
+    }
+  }
+  // score all the points and choose corners over threshold
+  for (int x = fromedge; x < frame_width - fromedge; x += 1) {
+    for (int y = fromedge;
+         (y < frame_height - fromedge) && countcorners < maxcorners; y += 1) {
+      for (int i = 0; i < n * n; i++) {
+        i_x[i] = 0;
+        i_y[i] = 0;
+        i_t[i] = 0;
+      }
+      score = corner_score(frame_to_filter, ref_frame, x, y, i_x, i_y, i_t, n,
+                           bit_depth);
+      if (score > threshold * max_score) {
+        ref_corners[countcorners * 2] = x;
+        ref_corners[countcorners * 2 + 1] = y;
+        countcorners++;
+      }
+    }
+  }
+  return countcorners;
+}
+// weights is an nxn matrix. weights is filled with a gaussian function,
+// with independent variable: distance from the center point.
+void gaussian(const double sigma, const int n, const int normalize,
+              double *weights) {
+  double total_weight = 0;
+  for (int j = 0; j < n; j++) {
+    for (int i = 0; i < n; i++) {
+      double distance = sqrt(pow(n / 2 - i, 2) + pow(n / 2 - j, 2));
+      double weight = exp(-0.5 * pow(distance / sigma, 2));
+      weights[j * n + i] = weight;
+      total_weight += weight;
+    }
+  }
+  if (normalize == 1) {
+    for (int j = 0; j < n; j++) {
+      weights[j] = weights[j] / total_weight;
+    }
+  }
+}
+double convolve(const double *filter, const int *img, const int size) {
+  double result = 0;
+  for (int i = 0; i < size; i++) {
+    result += filter[i] * img[i];
+  }
+  return result;
+}
+// Applies a Gaussian low-pass smoothing filter to produce
+// a corresponding lower resolution image with halved dimensions
+void reduce(uint8_t *img, int height, int width, int stride,
+            uint8_t *reduced_img) {
+  const int new_width = width / 2;
+  const int window_size = 5;
+  const double gaussian_filter[25] = {
+    1. / 256, 1.0 / 64, 3. / 128, 1. / 64,  1. / 256, 1. / 64, 1. / 16,
+    3. / 32,  1. / 16,  1. / 64,  3. / 128, 3. / 32,  9. / 64, 3. / 32,
+    3. / 128, 1. / 64,  1. / 16,  3. / 32,  1. / 16,  1. / 64, 1. / 256,
+    1. / 64,  3. / 128, 1. / 64,  1. / 256
+  };
+  // filter is 5x5 so need prev and forward 2 pixels
+  int img_section[25];
+  for (int y = 0; y < height - 1; y += 2) {
+    for (int x = 0; x < width - 1; x += 2) {
+      int i = 0;
+      for (int yy = y - window_size / 2; yy <= y + window_size / 2; yy++) {
+        for (int xx = x - window_size / 2; xx <= x + window_size / 2; xx++) {
+          int yvalue = yy;
+          int xvalue = xx;
+          // copied pixels outside the boundary
+          if (yvalue < 0) yvalue = 0;
+          if (xvalue < 0) xvalue = 0;
+          if (yvalue >= height) yvalue = height - 1;
+          if (xvalue >= width) xvalue = width - 1;
+          img_section[i++] = img[yvalue * stride + xvalue];
+        }
+      }
+      reduced_img[(y / 2) * new_width + (x / 2)] = (uint8_t)convolve(
+          gaussian_filter, img_section, (int)pow(window_size, 2));
+    }
+  }
+}
+int cmpfunc(const void *a, const void *b) { return (*(int *)a - *(int *)b); }
+void filter_mvs(const MV_FILTER_TYPE mv_filter, const int frame_height,
+                const int frame_width, LOCALMV *localmvs, MV *mvs) {
+  const int n = 5;  // window size
+  // for smoothing filter
+  const double gaussian_filter[25] = {
+    1. / 256, 1. / 64,  3. / 128, 1. / 64,  1. / 256, 1. / 64, 1. / 16,
+    3. / 32,  1. / 16,  1. / 64,  3. / 128, 3. / 32,  9. / 64, 3. / 32,
+    3. / 128, 1. / 64,  1. / 16,  3. / 32,  1. / 16,  1. / 64, 1. / 256,
+    1. / 64,  3. / 128, 1. / 64,  1. / 256
+  };
+  // for median filter
+  int mvrows[25];
+  int mvcols[25];
+  if (mv_filter != MV_FILTER_NONE) {
+    for (int y = 0; y < frame_height; y++) {
+      for (int x = 0; x < frame_width; x++) {
+        int center_idx = y * frame_width + x;
+        if (fabs(localmvs[center_idx].row) > 0 ||
+            fabs(localmvs[center_idx].col) > 0) {
+          int i = 0;
+          double filtered_row = 0;
+          double filtered_col = 0;
+          for (int yy = y - n / 2; yy <= y + n / 2; yy++) {
+            for (int xx = x - n / 2; xx <= x + n / 2; xx++) {
+              int yvalue = yy + y;
+              int xvalue = xx + x;
+              // copied pixels outside the boundary
+              if (yvalue < 0) yvalue = 0;
+              if (xvalue < 0) xvalue = 0;
+              if (yvalue >= frame_height) yvalue = frame_height - 1;
+              if (xvalue >= frame_width) xvalue = frame_width - 1;
+              int index = yvalue * frame_width + xvalue;
+              if (mv_filter == MV_FILTER_SMOOTH) {
+                filtered_row += mvs[index].row * gaussian_filter[i];
+                filtered_col += mvs[index].col * gaussian_filter[i];
+              } else if (mv_filter == MV_FILTER_MEDIAN) {
+                mvrows[i] = mvs[index].row;
+                mvcols[i] = mvs[index].col;
+              }
+              i++;
+            }
+          }
+
+          MV mv = mvs[center_idx];
+          if (mv_filter == MV_FILTER_SMOOTH) {
+            mv.row = (int16_t)filtered_row;
+            mv.col = (int16_t)filtered_col;
+          } else if (mv_filter == MV_FILTER_MEDIAN) {
+            qsort(mvrows, 25, sizeof(mv.row), cmpfunc);
+            qsort(mvcols, 25, sizeof(mv.col), cmpfunc);
+            mv.row = mvrows[25 / 2];
+            mv.col = mvcols[25 / 2];
+          }
+          LOCALMV localmv = { .row = ((double)mv.row) / 8,
+                              .col = ((double)mv.row) / 8 };
+          localmvs[y * frame_width + x] = localmv;
+          // if mvs array is immediately updated here, then the result may
+          // propagate to other pixels.
+        }
+      }
+    }
+    for (int i = 0; i < frame_height * frame_width; i++) {
+      if (fabs(localmvs[i].row) > 0 || fabs(localmvs[i].col) > 0) {
+        MV mv = { .row = (int16_t)round(8 * localmvs[i].row),
+                  .col = (int16_t)round(8 * localmvs[i].col) };
+        mvs[i] = mv;
+      }
+    }
+  }
+}
 // Computes optical flow by applying algorithm at
 // multiple pyramid levels of images (lower-resolution, smoothed images)
 // This accounts for larger motions.
@@ -51,7 +494,6 @@
   (void)to_frame;
   (void)bit_depth;
   (void)opfl_params;
-  (void)mv_filter;
   (void)method;
   const int frame_height = from_frame->y_crop_height;
   const int frame_width = from_frame->y_crop_width;
@@ -68,6 +510,7 @@
     }
     return;
   }
+
   // Initialize double mvs based on input parameter mvs array
   LOCALMV *localmvs = aom_malloc(frame_height * frame_width * sizeof(LOCALMV));
   for (int i = 0; i < frame_width * frame_height; i++) {
@@ -96,6 +539,9 @@
       }
     }
   }
+
+  filter_mvs(mv_filter, frame_height, frame_width, localmvs, mvs);
+
   aom_free(localmvs);
 }
 #endif