Integerize warped motion computation

Integerizes computation of the least squares for warped motion.
The model is restricted to only Affine. Affine seems easiest
to compute and integerize since it can be split into two 3-dim
least squares problems, as opposed to rotation-zoom which needs
a 4-dim least-squares problem to be solved.
The current implementation requires only one division per block.

BDRATE impact is mminimal. The upgrade to the affine model improves
coding efficiency but integerization also degrades efficiency a
little. Overall there is a net gain of about -0.07% BDRATE on
the lowres set.
BDRATE lowres: -1.113% with ----enable-warped-motion vs. without
(up from -1.044%).

Change-Id: I6b9216ac0737d76f59054293eabee48e17739ec4
diff --git a/av1/common/mvref_common.c b/av1/common/mvref_common.c
index b84f9e5..ed86122 100644
--- a/av1/common/mvref_common.c
+++ b/av1/common/mvref_common.c
@@ -1114,7 +1114,7 @@
 #if CONFIG_GLOBAL_MOTION
                              MACROBLOCKD *xd,
 #endif
-                             int x, int y, double *pts_inref) {
+                             int x, int y, int *pts_inref) {
   if (mbmi->motion_mode == WARPED_CAUSAL
 #if CONFIG_GLOBAL_MOTION
       || (mbmi->mode == ZEROMV &&
@@ -1131,16 +1131,19 @@
             &mbmi->wm_params[0];
 
     project_points(wm, ipts, ipts_inref, 1, 2, 2, 0, 0);
-    pts_inref[0] = (double)ipts_inref[0] / (double)WARPEDPIXEL_PREC_SHIFTS;
-    pts_inref[1] = (double)ipts_inref[1] / (double)WARPEDPIXEL_PREC_SHIFTS;
+    pts_inref[0] =
+        ROUND_POWER_OF_TWO_SIGNED(ipts_inref[0], WARPEDPIXEL_PREC_BITS - 3);
+    pts_inref[1] =
+        ROUND_POWER_OF_TWO_SIGNED(ipts_inref[1], WARPEDPIXEL_PREC_BITS - 3);
   } else {
-    pts_inref[0] = (double)x + (double)(mbmi->mv[0].as_mv.col) * 0.125;
-    pts_inref[1] = (double)y + (double)(mbmi->mv[0].as_mv.row) * 0.125;
+    pts_inref[0] = (x * 8) + mbmi->mv[0].as_mv.col;
+    pts_inref[1] = (y * 8) + mbmi->mv[0].as_mv.row;
   }
 }
 
+// Note: Samples returned are at 1/8-pel precision
 int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
-                double *pts, double *pts_inref) {
+                int *pts, int *pts_inref) {
   MB_MODE_INFO *const mbmi0 = &(xd->mi[0]->mbmi);
   int ref_frame = mbmi0->ref_frame[0];
   int up_available = xd->up_available;
@@ -1178,8 +1181,8 @@
           int x = cc_offset + j % 2 + global_offset_c;
           int y = cr_offset + j / 2 + global_offset_r;
 
-          pts[0] = (double)x;
-          pts[1] = (double)y;
+          pts[0] = (x * 8);
+          pts[1] = (y * 8);
           calc_projection_samples(mbmi,
 #if CONFIG_GLOBAL_MOTION
                                   xd,
@@ -1221,8 +1224,8 @@
           int x = cc_offset + j % 2 + global_offset_c;
           int y = cr_offset + j / 2 + global_offset_r;
 
-          pts[0] = (double)x;
-          pts[1] = (double)y;
+          pts[0] = (x * 8);
+          pts[1] = (y * 8);
           calc_projection_samples(mbmi,
 #if CONFIG_GLOBAL_MOTION
                                   xd,
@@ -1260,8 +1263,8 @@
         int x = cc_offset + j % 2 + global_offset_c;
         int y = cr_offset + j / 2 + global_offset_r;
 
-        pts[0] = (double)x;
-        pts[1] = (double)y;
+        pts[0] = (x * 8);
+        pts[1] = (y * 8);
         calc_projection_samples(mbmi,
 #if CONFIG_GLOBAL_MOTION
                                 xd,
@@ -1298,11 +1301,13 @@
       int r_offset = j / 2;
       int c_offset = j % 2;
 
-      pts[0] = (double)(cc_offset + c_offset + global_offset_c);
-      pts[1] = (double)(cr_offset + r_offset + global_offset_r);
+      int x = (cc_offset + c_offset + global_offset_c);
+      int y = (cr_offset + r_offset + global_offset_r);
 
-      pts_inref[0] = pts[0] + (double)(mv_col)*0.125;
-      pts_inref[1] = pts[1] + (double)(mv_row)*0.125;
+      pts[0] = (x * 8);
+      pts[1] = (y * 8);
+      pts_inref[0] = pts[0] + mv_col;
+      pts_inref[1] = pts[1] + mv_row;
 
       pts += 2;
       pts_inref += 2;
diff --git a/av1/common/mvref_common.h b/av1/common/mvref_common.h
index 6b83097..f5a3919 100644
--- a/av1/common/mvref_common.h
+++ b/av1/common/mvref_common.h
@@ -515,7 +515,7 @@
 
 #if CONFIG_WARPED_MOTION
 int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
-                double *pts, double *pts_inref);
+                int *pts, int *pts_inref);
 #endif  // CONFIG_WARPED_MOTION
 
 #ifdef __cplusplus
diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c
index 86ba46c..cf93996 100644
--- a/av1/common/warped_motion.c
+++ b/av1/common/warped_motion.c
@@ -1213,826 +1213,147 @@
                y_scale, ref_frm);
 }
 
-void av1_integerize_model(const double *model, TransformationType wmtype,
-                          WarpedMotionParams *wm) {
-  wm->wmtype = wmtype;
-  switch (wmtype) {
-    case HOMOGRAPHY:
-      assert(fabs(model[8] - 1.0) < 1e-12);
-      wm->wmmat[6] =
-          (int32_t)lrint(model[6] * (1 << WARPEDMODEL_ROW3HOMO_PREC_BITS));
-      wm->wmmat[7] =
-          (int32_t)lrint(model[7] * (1 << WARPEDMODEL_ROW3HOMO_PREC_BITS));
-    /* fallthrough intended */
-    case AFFINE:
-      wm->wmmat[4] = (int32_t)lrint(model[4] * (1 << WARPEDMODEL_PREC_BITS));
-      wm->wmmat[5] = (int32_t)lrint(model[5] * (1 << WARPEDMODEL_PREC_BITS));
-    /* fallthrough intended */
-    case ROTZOOM:
-      wm->wmmat[2] = (int32_t)lrint(model[2] * (1 << WARPEDMODEL_PREC_BITS));
-      wm->wmmat[3] = (int32_t)lrint(model[3] * (1 << WARPEDMODEL_PREC_BITS));
-    /* fallthrough intended */
-    case TRANSLATION:
-      wm->wmmat[0] = (int32_t)lrint(model[0] * (1 << WARPEDMODEL_PREC_BITS));
-      wm->wmmat[1] = (int32_t)lrint(model[1] * (1 << WARPEDMODEL_PREC_BITS));
-      break;
-    default: assert(0 && "Invalid TransformationType");
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-// svdcmp
-// Adopted from Numerical Recipes in C
-
-static const double TINY_NEAR_ZERO = 1.0E-12;
-
-static INLINE double sign(double a, double b) {
-  return ((b) >= 0 ? fabs(a) : -fabs(a));
-}
-
-static INLINE double pythag(double a, double b) {
-  double ct;
-  const double absa = fabs(a);
-  const double absb = fabs(b);
-
-  if (absa > absb) {
-    ct = absb / absa;
-    return absa * sqrt(1.0 + ct * ct);
-  } else {
-    ct = absa / absb;
-    return (absb == 0) ? 0 : absb * sqrt(1.0 + ct * ct);
-  }
-}
-
-static void multiply_mat(const double *m1, const double *m2, double *res,
-                         const int m1_rows, const int inner_dim,
-                         const int m2_cols) {
-  double sum;
-
-  int row, col, inner;
-  for (row = 0; row < m1_rows; ++row) {
-    for (col = 0; col < m2_cols; ++col) {
-      sum = 0;
-      for (inner = 0; inner < inner_dim; ++inner)
-        sum += m1[row * inner_dim + inner] * m2[inner * m2_cols + col];
-      *(res++) = sum;
-    }
-  }
-}
-
-static int svdcmp(double **u, int m, int n, double w[], double **v) {
-  const int max_its = 30;
-  int flag, i, its, j, jj, k, l, nm;
-  double anorm, c, f, g, h, s, scale, x, y, z;
-  double *rv1 = (double *)aom_malloc(sizeof(*rv1) * (n + 1));
-  g = scale = anorm = 0.0;
-  for (i = 0; i < n; i++) {
-    l = i + 1;
-    rv1[i] = scale * g;
-    g = s = scale = 0.0;
-    if (i < m) {
-      for (k = i; k < m; k++) scale += fabs(u[k][i]);
-      if (scale != 0.) {
-        for (k = i; k < m; k++) {
-          u[k][i] /= scale;
-          s += u[k][i] * u[k][i];
-        }
-        f = u[i][i];
-        g = -sign(sqrt(s), f);
-        h = f * g - s;
-        u[i][i] = f - g;
-        for (j = l; j < n; j++) {
-          for (s = 0.0, k = i; k < m; k++) s += u[k][i] * u[k][j];
-          f = s / h;
-          for (k = i; k < m; k++) u[k][j] += f * u[k][i];
-        }
-        for (k = i; k < m; k++) u[k][i] *= scale;
-      }
-    }
-    w[i] = scale * g;
-    g = s = scale = 0.0;
-    if (i < m && i != n - 1) {
-      for (k = l; k < n; k++) scale += fabs(u[i][k]);
-      if (scale != 0.) {
-        for (k = l; k < n; k++) {
-          u[i][k] /= scale;
-          s += u[i][k] * u[i][k];
-        }
-        f = u[i][l];
-        g = -sign(sqrt(s), f);
-        h = f * g - s;
-        u[i][l] = f - g;
-        for (k = l; k < n; k++) rv1[k] = u[i][k] / h;
-        for (j = l; j < m; j++) {
-          for (s = 0.0, k = l; k < n; k++) s += u[j][k] * u[i][k];
-          for (k = l; k < n; k++) u[j][k] += s * rv1[k];
-        }
-        for (k = l; k < n; k++) u[i][k] *= scale;
-      }
-    }
-    anorm = fmax(anorm, (fabs(w[i]) + fabs(rv1[i])));
-  }
-
-  for (i = n - 1; i >= 0; i--) {
-    if (i < n - 1) {
-      if (g != 0.) {
-        for (j = l; j < n; j++) v[j][i] = (u[i][j] / u[i][l]) / g;
-        for (j = l; j < n; j++) {
-          for (s = 0.0, k = l; k < n; k++) s += u[i][k] * v[k][j];
-          for (k = l; k < n; k++) v[k][j] += s * v[k][i];
-        }
-      }
-      for (j = l; j < n; j++) v[i][j] = v[j][i] = 0.0;
-    }
-    v[i][i] = 1.0;
-    g = rv1[i];
-    l = i;
-  }
-  for (i = AOMMIN(m, n) - 1; i >= 0; i--) {
-    l = i + 1;
-    g = w[i];
-    for (j = l; j < n; j++) u[i][j] = 0.0;
-    if (g != 0.) {
-      g = 1.0 / g;
-      for (j = l; j < n; j++) {
-        for (s = 0.0, k = l; k < m; k++) s += u[k][i] * u[k][j];
-        f = (s / u[i][i]) * g;
-        for (k = i; k < m; k++) u[k][j] += f * u[k][i];
-      }
-      for (j = i; j < m; j++) u[j][i] *= g;
-    } else {
-      for (j = i; j < m; j++) u[j][i] = 0.0;
-    }
-    ++u[i][i];
-  }
-  for (k = n - 1; k >= 0; k--) {
-    for (its = 0; its < max_its; its++) {
-      flag = 1;
-      for (l = k; l >= 0; l--) {
-        nm = l - 1;
-        if ((double)(fabs(rv1[l]) + anorm) == anorm || nm < 0) {
-          flag = 0;
-          break;
-        }
-        if ((double)(fabs(w[nm]) + anorm) == anorm) break;
-      }
-      if (flag) {
-        c = 0.0;
-        s = 1.0;
-        for (i = l; i <= k; i++) {
-          f = s * rv1[i];
-          rv1[i] = c * rv1[i];
-          if ((double)(fabs(f) + anorm) == anorm) break;
-          g = w[i];
-          h = pythag(f, g);
-          w[i] = h;
-          h = 1.0 / h;
-          c = g * h;
-          s = -f * h;
-          for (j = 0; j < m; j++) {
-            y = u[j][nm];
-            z = u[j][i];
-            u[j][nm] = y * c + z * s;
-            u[j][i] = z * c - y * s;
-          }
-        }
-      }
-      z = w[k];
-      if (l == k) {
-        if (z < 0.0) {
-          w[k] = -z;
-          for (j = 0; j < n; j++) v[j][k] = -v[j][k];
-        }
-        break;
-      }
-      if (its == max_its - 1) {
-        aom_free(rv1);
-        return 1;
-      }
-      assert(k > 0);
-      x = w[l];
-      nm = k - 1;
-      y = w[nm];
-      g = rv1[nm];
-      h = rv1[k];
-      f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y);
-      g = pythag(f, 1.0);
-      f = ((x - z) * (x + z) + h * ((y / (f + sign(g, f))) - h)) / x;
-      c = s = 1.0;
-      for (j = l; j <= nm; j++) {
-        i = j + 1;
-        g = rv1[i];
-        y = w[i];
-        h = s * g;
-        g = c * g;
-        z = pythag(f, h);
-        rv1[j] = z;
-        c = f / z;
-        s = h / z;
-        f = x * c + g * s;
-        g = g * c - x * s;
-        h = y * s;
-        y *= c;
-        for (jj = 0; jj < n; jj++) {
-          x = v[jj][j];
-          z = v[jj][i];
-          v[jj][j] = x * c + z * s;
-          v[jj][i] = z * c - x * s;
-        }
-        z = pythag(f, h);
-        w[j] = z;
-        if (z != 0.) {
-          z = 1.0 / z;
-          c = f * z;
-          s = h * z;
-        }
-        f = c * g + s * y;
-        x = c * y - s * g;
-        for (jj = 0; jj < m; jj++) {
-          y = u[jj][j];
-          z = u[jj][i];
-          u[jj][j] = y * c + z * s;
-          u[jj][i] = z * c - y * s;
-        }
-      }
-      rv1[l] = 0.0;
-      rv1[k] = f;
-      w[k] = x;
-    }
-  }
-  aom_free(rv1);
-  return 0;
-}
-
-static int SVD(double *U, double *W, double *V, double *matx, int M, int N) {
-  // Assumes allocation for U is MxN
-  double **nrU = (double **)aom_malloc((M) * sizeof(*nrU));
-  double **nrV = (double **)aom_malloc((N) * sizeof(*nrV));
-  int problem, i;
-
-  problem = !(nrU && nrV);
-  if (!problem) {
-    for (i = 0; i < M; i++) {
-      nrU[i] = &U[i * N];
-    }
-    for (i = 0; i < N; i++) {
-      nrV[i] = &V[i * N];
-    }
-  } else {
-    if (nrU) aom_free(nrU);
-    if (nrV) aom_free(nrV);
-    return 1;
-  }
-
-  /* copy from given matx into nrU */
-  for (i = 0; i < M; i++) {
-    memcpy(&(nrU[i][0]), matx + N * i, N * sizeof(*matx));
-  }
-
-  /* HERE IT IS: do SVD */
-  if (svdcmp(nrU, M, N, W, nrV)) {
-    aom_free(nrU);
-    aom_free(nrV);
-    return 1;
-  }
-
-  /* aom_free Numerical Recipes arrays */
-  aom_free(nrU);
-  aom_free(nrV);
-
-  return 0;
-}
-
-int pseudo_inverse(double *inv, double *matx, const int M, const int N) {
-  double ans;
-  int i, j, k;
-  double *const U = (double *)aom_malloc(M * N * sizeof(*matx));
-  double *const W = (double *)aom_malloc(N * sizeof(*matx));
-  double *const V = (double *)aom_malloc(N * N * sizeof(*matx));
-
-  if (!(U && W && V)) {
-    return 1;
-  }
-  if (SVD(U, W, V, matx, M, N)) {
-    aom_free(U);
-    aom_free(W);
-    aom_free(V);
-    return 1;
-  }
-  for (i = 0; i < N; i++) {
-    if (fabs(W[i]) < TINY_NEAR_ZERO) {
-      aom_free(U);
-      aom_free(W);
-      aom_free(V);
-      return 1;
-    }
-  }
-
-  for (i = 0; i < N; i++) {
-    for (j = 0; j < M; j++) {
-      ans = 0;
-      for (k = 0; k < N; k++) {
-        ans += V[k + N * i] * U[k + N * j] / W[k];
-      }
-      inv[j + M * i] = ans;
-    }
-  }
-  aom_free(U);
-  aom_free(W);
-  aom_free(V);
-  return 0;
-}
-
-static void normalize_homography(double *pts, int n, double *T) {
-  double *p = pts;
-  double mean[2] = { 0, 0 };
-  double msqe = 0;
-  double scale;
-  int i;
-  for (i = 0; i < n; ++i, p += 2) {
-    mean[0] += p[0];
-    mean[1] += p[1];
-  }
-  mean[0] /= n;
-  mean[1] /= n;
-  for (p = pts, i = 0; i < n; ++i, p += 2) {
-    p[0] -= mean[0];
-    p[1] -= mean[1];
-    msqe += sqrt(p[0] * p[0] + p[1] * p[1]);
-  }
-  msqe /= n;
-  scale = sqrt(2) / msqe;
-  T[0] = scale;
-  T[1] = 0;
-  T[2] = -scale * mean[0];
-  T[3] = 0;
-  T[4] = scale;
-  T[5] = -scale * mean[1];
-  T[6] = 0;
-  T[7] = 0;
-  T[8] = 1;
-  for (p = pts, i = 0; i < n; ++i, p += 2) {
-    p[0] *= scale;
-    p[1] *= scale;
-  }
-}
-
-static void invnormalize_mat(double *T, double *iT) {
-  double is = 1.0 / T[0];
-  double m0 = -T[2] * is;
-  double m1 = -T[5] * is;
-  iT[0] = is;
-  iT[1] = 0;
-  iT[2] = m0;
-  iT[3] = 0;
-  iT[4] = is;
-  iT[5] = m1;
-  iT[6] = 0;
-  iT[7] = 0;
-  iT[8] = 1;
-}
-
-static void denormalize_homography(double *params, double *T1, double *T2) {
-  double iT2[9];
-  double params2[9];
-  invnormalize_mat(T2, iT2);
-  multiply_mat(params, T1, params2, 3, 3, 3);
-  multiply_mat(iT2, params2, params, 3, 3, 3);
-}
-
-static void denormalize_homography_reorder(double *params, double *T1,
-                                           double *T2) {
-  double params_denorm[MAX_PARAMDIM];
-  memcpy(params_denorm, params, sizeof(*params) * 8);
-  params_denorm[8] = 1.0;
-  denormalize_homography(params_denorm, T1, T2);
-  params[0] = params_denorm[2];
-  params[1] = params_denorm[5];
-  params[2] = params_denorm[0];
-  params[3] = params_denorm[1];
-  params[4] = params_denorm[3];
-  params[5] = params_denorm[4];
-  params[6] = params_denorm[6];
-  params[7] = params_denorm[7];
-}
-
-static void denormalize_affine_reorder(double *params, double *T1, double *T2) {
-  double params_denorm[MAX_PARAMDIM];
-  params_denorm[0] = params[0];
-  params_denorm[1] = params[1];
-  params_denorm[2] = params[4];
-  params_denorm[3] = params[2];
-  params_denorm[4] = params[3];
-  params_denorm[5] = params[5];
-  params_denorm[6] = params_denorm[7] = 0;
-  params_denorm[8] = 1;
-  denormalize_homography(params_denorm, T1, T2);
-  params[0] = params_denorm[2];
-  params[1] = params_denorm[5];
-  params[2] = params_denorm[0];
-  params[3] = params_denorm[1];
-  params[4] = params_denorm[3];
-  params[5] = params_denorm[4];
-  params[6] = params[7] = 0;
-}
-
-static void denormalize_rotzoom_reorder(double *params, double *T1,
-                                        double *T2) {
-  double params_denorm[MAX_PARAMDIM];
-  params_denorm[0] = params[0];
-  params_denorm[1] = params[1];
-  params_denorm[2] = params[2];
-  params_denorm[3] = -params[1];
-  params_denorm[4] = params[0];
-  params_denorm[5] = params[3];
-  params_denorm[6] = params_denorm[7] = 0;
-  params_denorm[8] = 1;
-  denormalize_homography(params_denorm, T1, T2);
-  params[0] = params_denorm[2];
-  params[1] = params_denorm[5];
-  params[2] = params_denorm[0];
-  params[3] = params_denorm[1];
-  params[4] = -params[3];
-  params[5] = params[2];
-  params[6] = params[7] = 0;
-}
-
-static void denormalize_translation_reorder(double *params, double *T1,
-                                            double *T2) {
-  double params_denorm[MAX_PARAMDIM];
-  params_denorm[0] = 1;
-  params_denorm[1] = 0;
-  params_denorm[2] = params[0];
-  params_denorm[3] = 0;
-  params_denorm[4] = 1;
-  params_denorm[5] = params[1];
-  params_denorm[6] = params_denorm[7] = 0;
-  params_denorm[8] = 1;
-  denormalize_homography(params_denorm, T1, T2);
-  params[0] = params_denorm[2];
-  params[1] = params_denorm[5];
-  params[2] = params[5] = 1;
-  params[3] = params[4] = 0;
-  params[6] = params[7] = 0;
-}
-
-int find_translation(const int np, double *pts1, double *pts2, double *mat) {
-  int i;
-  double sx, sy, dx, dy;
-  double sumx, sumy;
-
-  double T1[9], T2[9];
-  normalize_homography(pts1, np, T1);
-  normalize_homography(pts2, np, T2);
-
-  sumx = 0;
-  sumy = 0;
-  for (i = 0; i < np; ++i) {
-    dx = *(pts2++);
-    dy = *(pts2++);
-    sx = *(pts1++);
-    sy = *(pts1++);
-
-    sumx += dx - sx;
-    sumy += dy - sy;
-  }
-  mat[0] = sumx / np;
-  mat[1] = sumy / np;
-  denormalize_translation_reorder(mat, T1, T2);
-  return 0;
-}
-
-int find_rotzoom(const int np, double *pts1, double *pts2, double *mat) {
-  const int np2 = np * 2;
-  double *a = (double *)aom_malloc(sizeof(*a) * np2 * 9);
-  double *b = a + np2 * 4;
-  double *temp = b + np2;
-  int i;
-  double sx, sy, dx, dy;
-
-  double T1[9], T2[9];
-  normalize_homography(pts1, np, T1);
-  normalize_homography(pts2, np, T2);
-
-  for (i = 0; i < np; ++i) {
-    dx = *(pts2++);
-    dy = *(pts2++);
-    sx = *(pts1++);
-    sy = *(pts1++);
-
-    a[i * 2 * 4 + 0] = sx;
-    a[i * 2 * 4 + 1] = sy;
-    a[i * 2 * 4 + 2] = 1;
-    a[i * 2 * 4 + 3] = 0;
-    a[(i * 2 + 1) * 4 + 0] = sy;
-    a[(i * 2 + 1) * 4 + 1] = -sx;
-    a[(i * 2 + 1) * 4 + 2] = 0;
-    a[(i * 2 + 1) * 4 + 3] = 1;
-
-    b[2 * i] = dx;
-    b[2 * i + 1] = dy;
-  }
-  if (pseudo_inverse(temp, a, np2, 4)) {
-    aom_free(a);
-    return 1;
-  }
-  multiply_mat(temp, b, mat, 4, np2, 1);
-  denormalize_rotzoom_reorder(mat, T1, T2);
-  aom_free(a);
-  return 0;
-}
-
-int find_affine(const int np, double *pts1, double *pts2, double *mat) {
-  const int np2 = np * 2;
-  double *a = (double *)aom_malloc(sizeof(*a) * np2 * 13);
-  double *b = a + np2 * 6;
-  double *temp = b + np2;
-  int i;
-  double sx, sy, dx, dy;
-
-  double T1[9], T2[9];
-  normalize_homography(pts1, np, T1);
-  normalize_homography(pts2, np, T2);
-
-  for (i = 0; i < np; ++i) {
-    dx = *(pts2++);
-    dy = *(pts2++);
-    sx = *(pts1++);
-    sy = *(pts1++);
-
-    a[i * 2 * 6 + 0] = sx;
-    a[i * 2 * 6 + 1] = sy;
-    a[i * 2 * 6 + 2] = 0;
-    a[i * 2 * 6 + 3] = 0;
-    a[i * 2 * 6 + 4] = 1;
-    a[i * 2 * 6 + 5] = 0;
-    a[(i * 2 + 1) * 6 + 0] = 0;
-    a[(i * 2 + 1) * 6 + 1] = 0;
-    a[(i * 2 + 1) * 6 + 2] = sx;
-    a[(i * 2 + 1) * 6 + 3] = sy;
-    a[(i * 2 + 1) * 6 + 4] = 0;
-    a[(i * 2 + 1) * 6 + 5] = 1;
-
-    b[2 * i] = dx;
-    b[2 * i + 1] = dy;
-  }
-  if (pseudo_inverse(temp, a, np2, 6)) {
-    aom_free(a);
-    return 1;
-  }
-  multiply_mat(temp, b, mat, 6, np2, 1);
-  denormalize_affine_reorder(mat, T1, T2);
-  aom_free(a);
-  return 0;
-}
-
-int find_vertrapezoid(const int np, double *pts1, double *pts2, double *mat) {
-  const int np3 = np * 3;
-  double *a = (double *)aom_malloc(sizeof(*a) * np3 * 14);
-  double *U = a + np3 * 7;
-  double S[7], V[7 * 7], H[9];
-  int i, mini;
-  double sx, sy, dx, dy;
-  double T1[9], T2[9];
-
-  normalize_homography(pts1, np, T1);
-  normalize_homography(pts2, np, T2);
-
-  for (i = 0; i < np; ++i) {
-    dx = *(pts2++);
-    dy = *(pts2++);
-    sx = *(pts1++);
-    sy = *(pts1++);
-
-    a[i * 3 * 7 + 0] = a[i * 3 * 7 + 1] = 0;
-    a[i * 3 * 7 + 2] = -sx;
-    a[i * 3 * 7 + 3] = -sy;
-    a[i * 3 * 7 + 4] = -1;
-    a[i * 3 * 7 + 5] = dy * sx;
-    a[i * 3 * 7 + 6] = dy;
-
-    a[(i * 3 + 1) * 7 + 0] = sx;
-    a[(i * 3 + 1) * 7 + 1] = 1;
-    a[(i * 3 + 1) * 7 + 2] = a[(i * 3 + 1) * 7 + 3] = a[(i * 3 + 1) * 7 + 4] =
-        0;
-    a[(i * 3 + 1) * 7 + 5] = -dx * sx;
-    a[(i * 3 + 1) * 7 + 6] = -dx;
-
-    a[(i * 3 + 2) * 7 + 0] = -dy * sx;
-    a[(i * 3 + 2) * 7 + 1] = -dy;
-    a[(i * 3 + 2) * 7 + 2] = dx * sx;
-    a[(i * 3 + 2) * 7 + 3] = dx * sy;
-    a[(i * 3 + 2) * 7 + 4] = dx;
-    a[(i * 3 + 2) * 7 + 5] = a[(i * 3 + 2) * 7 + 6] = 0;
-  }
-  if (SVD(U, S, V, a, np3, 7)) {
-    aom_free(a);
-    return 1;
-  } else {
-    double minS = 1e12;
-    mini = -1;
-    for (i = 0; i < 7; ++i) {
-      if (S[i] < minS) {
-        minS = S[i];
-        mini = i;
-      }
-    }
-  }
-  H[1] = H[7] = 0;
-  for (i = 0; i < 1; i++) H[i] = V[i * 7 + mini];
-  for (; i < 6; i++) H[i + 1] = V[i * 7 + mini];
-  for (; i < 7; i++) H[i + 2] = V[i * 7 + mini];
-
-  denormalize_homography_reorder(H, T1, T2);
-  aom_free(a);
-  if (H[8] == 0.0) {
-    return 1;
-  } else {
-    // normalize
-    double f = 1.0 / H[8];
-    for (i = 0; i < 8; i++) mat[i] = f * H[i];
-  }
-  return 0;
-}
-
-int find_hortrapezoid(const int np, double *pts1, double *pts2, double *mat) {
-  const int np3 = np * 3;
-  double *a = (double *)aom_malloc(sizeof(*a) * np3 * 14);
-  double *U = a + np3 * 7;
-  double S[7], V[7 * 7], H[9];
-  int i, mini;
-  double sx, sy, dx, dy;
-  double T1[9], T2[9];
-
-  normalize_homography(pts1, np, T1);
-  normalize_homography(pts2, np, T2);
-
-  for (i = 0; i < np; ++i) {
-    dx = *(pts2++);
-    dy = *(pts2++);
-    sx = *(pts1++);
-    sy = *(pts1++);
-
-    a[i * 3 * 7 + 0] = a[i * 3 * 7 + 1] = a[i * 3 * 7 + 2] = 0;
-    a[i * 3 * 7 + 3] = -sy;
-    a[i * 3 * 7 + 4] = -1;
-    a[i * 3 * 7 + 5] = dy * sy;
-    a[i * 3 * 7 + 6] = dy;
-
-    a[(i * 3 + 1) * 7 + 0] = sx;
-    a[(i * 3 + 1) * 7 + 1] = sy;
-    a[(i * 3 + 1) * 7 + 2] = 1;
-    a[(i * 3 + 1) * 7 + 3] = a[(i * 3 + 1) * 7 + 4] = 0;
-    a[(i * 3 + 1) * 7 + 5] = -dx * sy;
-    a[(i * 3 + 1) * 7 + 6] = -dx;
-
-    a[(i * 3 + 2) * 7 + 0] = -dy * sx;
-    a[(i * 3 + 2) * 7 + 1] = -dy * sy;
-    a[(i * 3 + 2) * 7 + 2] = -dy;
-    a[(i * 3 + 2) * 7 + 3] = dx * sy;
-    a[(i * 3 + 2) * 7 + 4] = dx;
-    a[(i * 3 + 2) * 7 + 5] = a[(i * 3 + 2) * 7 + 6] = 0;
-  }
-
-  if (SVD(U, S, V, a, np3, 7)) {
-    aom_free(a);
-    return 1;
-  } else {
-    double minS = 1e12;
-    mini = -1;
-    for (i = 0; i < 7; ++i) {
-      if (S[i] < minS) {
-        minS = S[i];
-        mini = i;
-      }
-    }
-  }
-  H[3] = H[6] = 0;
-  for (i = 0; i < 3; i++) H[i] = V[i * 7 + mini];
-  for (; i < 5; i++) H[i + 1] = V[i * 7 + mini];
-  for (; i < 7; i++) H[i + 2] = V[i * 7 + mini];
-
-  denormalize_homography_reorder(H, T1, T2);
-  aom_free(a);
-  if (H[8] == 0.0) {
-    return 1;
-  } else {
-    // normalize
-    double f = 1.0 / H[8];
-    for (i = 0; i < 8; i++) mat[i] = f * H[i];
-  }
-  return 0;
-}
-
-int find_homography(const int np, double *pts1, double *pts2, double *mat) {
-  // Implemented from Peter Kovesi's normalized implementation
-  const int np3 = np * 3;
-  double *a = (double *)aom_malloc(sizeof(*a) * np3 * 18);
-  double *U = a + np3 * 9;
-  double S[9], V[9 * 9], H[9];
-  int i, mini;
-  double sx, sy, dx, dy;
-  double T1[9], T2[9];
-
-  normalize_homography(pts1, np, T1);
-  normalize_homography(pts2, np, T2);
-
-  for (i = 0; i < np; ++i) {
-    dx = *(pts2++);
-    dy = *(pts2++);
-    sx = *(pts1++);
-    sy = *(pts1++);
-
-    a[i * 3 * 9 + 0] = a[i * 3 * 9 + 1] = a[i * 3 * 9 + 2] = 0;
-    a[i * 3 * 9 + 3] = -sx;
-    a[i * 3 * 9 + 4] = -sy;
-    a[i * 3 * 9 + 5] = -1;
-    a[i * 3 * 9 + 6] = dy * sx;
-    a[i * 3 * 9 + 7] = dy * sy;
-    a[i * 3 * 9 + 8] = dy;
-
-    a[(i * 3 + 1) * 9 + 0] = sx;
-    a[(i * 3 + 1) * 9 + 1] = sy;
-    a[(i * 3 + 1) * 9 + 2] = 1;
-    a[(i * 3 + 1) * 9 + 3] = a[(i * 3 + 1) * 9 + 4] = a[(i * 3 + 1) * 9 + 5] =
-        0;
-    a[(i * 3 + 1) * 9 + 6] = -dx * sx;
-    a[(i * 3 + 1) * 9 + 7] = -dx * sy;
-    a[(i * 3 + 1) * 9 + 8] = -dx;
-
-    a[(i * 3 + 2) * 9 + 0] = -dy * sx;
-    a[(i * 3 + 2) * 9 + 1] = -dy * sy;
-    a[(i * 3 + 2) * 9 + 2] = -dy;
-    a[(i * 3 + 2) * 9 + 3] = dx * sx;
-    a[(i * 3 + 2) * 9 + 4] = dx * sy;
-    a[(i * 3 + 2) * 9 + 5] = dx;
-    a[(i * 3 + 2) * 9 + 6] = a[(i * 3 + 2) * 9 + 7] = a[(i * 3 + 2) * 9 + 8] =
-        0;
-  }
-
-  if (SVD(U, S, V, a, np3, 9)) {
-    aom_free(a);
-    return 1;
-  } else {
-    double minS = 1e12;
-    mini = -1;
-    for (i = 0; i < 9; ++i) {
-      if (S[i] < minS) {
-        minS = S[i];
-        mini = i;
-      }
-    }
-  }
-
-  for (i = 0; i < 9; i++) H[i] = V[i * 9 + mini];
-  denormalize_homography_reorder(H, T1, T2);
-  aom_free(a);
-  if (H[8] == 0.0) {
-    return 1;
-  } else {
-    // normalize
-    double f = 1.0 / H[8];
-    for (i = 0; i < 8; i++) mat[i] = f * H[i];
-  }
-  return 0;
-}
-
 #if CONFIG_WARPED_MOTION
-int find_projection(const int np, double *pts1, double *pts2,
-                    WarpedMotionParams *wm_params) {
-  double H[9];
-  int result = 1;
 
+#define IDET_PREC_BITS 48
+#define IDET_WARPEDMODEL_DIFF_BITS (IDET_PREC_BITS - WARPEDMODEL_PREC_BITS)
+static int find_affine_int(const int np, int *pts1, int *pts2,
+                           WarpedMotionParams *wm) {
+  int64_t A[3][3] = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 } };
+  int64_t C[3][3] = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 } };
+  int64_t Bx[3] = { 0, 0, 0 };
+  int64_t By[3] = { 0, 0, 0 };
+  int64_t Px[3], Py[3];
+  int64_t Det, iDet;
+  int i, off;
+  // Offsets to make the values in the arrays smaller
+  const int ux = pts1[0], uy = pts1[1];
+  // Let source points (xi, yi) map to destimation points (xi', yi'),
+  //     for i = 0, 1, 2, .... n-1
+  // Then if  P = [x0, y0, 1,
+  //               x1, y1, 1
+  //               x2, y2, 1,
+  //                ....
+  //              ]
+  //          q = [x0', x1', x2', ... ]'
+  //          r = [y0', y1', y2', ... ]'
+  // the least squares problems that need to be solved are:
+  //          [h1, h2, dx]' = inv(P'P)P'q and
+  //          [h3, h4, dy]' = inv(P'P)P'r
+  // where the affine transformation is given by:
+  //          x' = h1.x + h2.y + dx
+  //          y' = h3.x + h4.y + dy
+  //
+  // The loop below computes: A = P'P, Bx = P'q, By = P'r
+  for (i = 0; i < np; ++i) {
+    const int dx = *(pts2++) - ux;
+    const int dy = *(pts2++) - uy;
+    const int sx = *(pts1++) - ux;
+    const int sy = *(pts1++) - uy;
+    A[0][0] += sx * sx;
+    A[0][1] += sx * sy;
+    A[0][2] += sx;
+    A[1][1] += sy * sy;
+    A[1][2] += sy;
+    A[2][2] += 1;
+    Bx[0] += sx * dx;
+    Bx[1] += sy * dx;
+    Bx[2] += dx;
+    By[0] += sx * dy;
+    By[1] += sy * dy;
+    By[2] += dy;
+  }
+  // Compute Cofactors of A
+  C[0][0] = A[1][1] * A[2][2] - A[1][2] * A[1][2];
+  C[0][1] = A[1][2] * A[0][2] - A[0][1] * A[2][2];
+  C[0][2] = A[0][1] * A[1][2] - A[0][2] * A[1][1];
+  C[1][1] = A[0][0] * A[2][2] - A[0][2] * A[0][2];
+  C[1][2] = A[0][1] * A[0][2] - A[0][0] * A[1][2];
+  C[2][2] = A[0][0] * A[1][1] - A[0][1] * A[0][1];
+
+  // Compute Determinant of A
+  Det = C[0][0] * A[0][0] + C[0][1] * A[0][1] + C[0][2] * A[0][2];
+
+  // These are the least squares solutions but need scaling.
+  Px[0] = C[0][0] * Bx[0] + C[0][1] * Bx[1] + C[0][2] * Bx[2];
+  Px[1] = C[0][1] * Bx[0] + C[1][1] * Bx[1] + C[1][2] * Bx[2];
+  Px[2] = C[0][2] * Bx[0] + C[1][2] * Bx[1] + C[2][2] * Bx[2];
+  Py[0] = C[0][0] * By[0] + C[0][1] * By[1] + C[0][2] * By[2];
+  Py[1] = C[0][1] * By[0] + C[1][1] * By[1] + C[1][2] * By[2];
+  Py[2] = C[0][2] * By[0] + C[1][2] * By[1] + C[2][2] * By[2];
+
+  // Scale by 1/16
+  Px[0] = ROUND_POWER_OF_TWO_SIGNED(Px[0], 4);
+  Px[1] = ROUND_POWER_OF_TWO_SIGNED(Px[1], 4);
+  Px[2] = ROUND_POWER_OF_TWO_SIGNED(Px[2], 4);
+  Py[0] = ROUND_POWER_OF_TWO_SIGNED(Py[0], 4);
+  Py[1] = ROUND_POWER_OF_TWO_SIGNED(Py[1], 4);
+  Py[2] = ROUND_POWER_OF_TWO_SIGNED(Py[2], 4);
+  Det = ROUND_POWER_OF_TWO_SIGNED(Det, 4);
+  if (Det == 0) return 1;
+
+  // Compute inverse of the Determinant
+  // TODO(debargha, yuec): Try to remove this only division
+  iDet = ((int64_t)1 << IDET_PREC_BITS) / Det;
+
+  wm->wmmat[2] = ((int64_t)Px[0] * (int64_t)iDet +
+                  ((int64_t)1 << (IDET_WARPEDMODEL_DIFF_BITS - 1))) >>
+                 IDET_WARPEDMODEL_DIFF_BITS;
+  wm->wmmat[3] = ((int64_t)Px[1] * (int64_t)iDet +
+                  ((int64_t)1 << (IDET_WARPEDMODEL_DIFF_BITS - 1))) >>
+                 IDET_WARPEDMODEL_DIFF_BITS;
+  wm->wmmat[0] = ((int64_t)Px[2] * (int64_t)iDet +
+                  ((int64_t)1 << (IDET_WARPEDMODEL_DIFF_BITS + 2))) >>
+                 (IDET_WARPEDMODEL_DIFF_BITS + 3);
+  // Adjust x displacement for the offset
+  off = (ux << WARPEDMODEL_PREC_BITS) - ux * wm->wmmat[2] - uy * wm->wmmat[3];
+  wm->wmmat[0] += ROUND_POWER_OF_TWO_SIGNED(off, 3);
+
+  wm->wmmat[4] = ((int64_t)Py[0] * (int64_t)iDet +
+                  ((int64_t)1 << (IDET_WARPEDMODEL_DIFF_BITS - 1))) >>
+                 IDET_WARPEDMODEL_DIFF_BITS;
+  wm->wmmat[5] = ((int64_t)Py[1] * (int64_t)iDet +
+                  ((int64_t)1 << (IDET_WARPEDMODEL_DIFF_BITS - 1))) >>
+                 IDET_WARPEDMODEL_DIFF_BITS;
+  wm->wmmat[1] = ((int64_t)Py[2] * (int64_t)iDet +
+                  ((int64_t)1 << (IDET_WARPEDMODEL_DIFF_BITS + 2))) >>
+                 (IDET_WARPEDMODEL_DIFF_BITS + 3);
+  // Adjust y displacement for the offset
+  off = (uy << WARPEDMODEL_PREC_BITS) - ux * wm->wmmat[4] - uy * wm->wmmat[5];
+  wm->wmmat[1] += ROUND_POWER_OF_TWO_SIGNED(off, 3);
+  wm->wmmat[6] = wm->wmmat[7] = 0;
+  return 0;
+}
+
+int find_projection(const int np, int *pts1, int *pts2,
+                    WarpedMotionParams *wm_params) {
+  int result = 1;
   switch (wm_params->wmtype) {
-    case AFFINE: result = find_affine(np, pts1, pts2, H); break;
-    case ROTZOOM: result = find_rotzoom(np, pts1, pts2, H); break;
-    case HOMOGRAPHY: result = find_homography(np, pts1, pts2, H); break;
+    case AFFINE: result = find_affine_int(np, pts1, pts2, wm_params); break;
     default: assert(0 && "Invalid warped motion type!"); return 1;
   }
   if (result == 0) {
-    av1_integerize_model(H, wm_params->wmtype, wm_params);
-
     if (wm_params->wmtype == ROTZOOM) {
       wm_params->wmmat[5] = wm_params->wmmat[2];
       wm_params->wmmat[4] = -wm_params->wmmat[3];
     }
-  }
-  if (wm_params->wmtype == AFFINE || wm_params->wmtype == ROTZOOM) {
-    // check compatibility with the fast warp filter
-    int32_t *mat = wm_params->wmmat;
-    int32_t alpha, beta, gamma, delta;
+    if (wm_params->wmtype == AFFINE || wm_params->wmtype == ROTZOOM) {
+      // check compatibility with the fast warp filter
+      int32_t *mat = wm_params->wmmat;
+      int32_t alpha, beta, gamma, delta;
 
-    if (mat[2] == 0) return 1;
+      if (mat[2] == 0) return 1;
 
-    alpha = mat[2] - (1 << WARPEDMODEL_PREC_BITS);
-    beta = mat[3];
-    gamma = ((int64_t)mat[4] << WARPEDMODEL_PREC_BITS) / mat[2];
-    delta = mat[5] - (((int64_t)mat[3] * mat[4] + (mat[2] / 2)) / mat[2]) -
-            (1 << WARPEDMODEL_PREC_BITS);
+      alpha = mat[2] - (1 << WARPEDMODEL_PREC_BITS);
+      beta = mat[3];
+      gamma = ((int64_t)mat[4] << WARPEDMODEL_PREC_BITS) / mat[2];
+      delta = mat[5] - (((int64_t)mat[3] * mat[4] + (mat[2] / 2)) / mat[2]) -
+              (1 << WARPEDMODEL_PREC_BITS);
 
-    if ((4 * abs(alpha) + 7 * abs(beta) > (1 << WARPEDMODEL_PREC_BITS)) ||
-        (4 * abs(gamma) + 4 * abs(delta) > (1 << WARPEDMODEL_PREC_BITS))) {
-      return 1;
+      if ((4 * abs(alpha) + 7 * abs(beta) > (1 << WARPEDMODEL_PREC_BITS)) ||
+          (4 * abs(gamma) + 4 * abs(delta) > (1 << WARPEDMODEL_PREC_BITS))) {
+        return 1;
+      }
     }
   }
 
diff --git a/av1/common/warped_motion.h b/av1/common/warped_motion.h
index b3fb4f2..f113760 100644
--- a/av1/common/warped_motion.h
+++ b/av1/common/warped_motion.h
@@ -27,7 +27,7 @@
 #if CONFIG_WARPED_MOTION
 #define SAMPLES_PER_NEIGHBOR 4
 #define SAMPLES_ARRAY_SIZE ((2 * MAX_MIB_SIZE + 2) * SAMPLES_PER_NEIGHBOR * 2)
-#define DEFAULT_WMTYPE ROTZOOM
+#define DEFAULT_WMTYPE AFFINE
 #endif  // CONFIG_WARPED_MOTION
 
 const int16_t warped_filter[WARPEDPIXEL_PREC_SHIFTS * 3][8];
@@ -86,16 +86,6 @@
                     int p_height, int p_stride, int subsampling_x,
                     int subsampling_y, int x_scale, int y_scale, int ref_frm);
 
-// Integerize model into the WarpedMotionParams structure
-void av1_integerize_model(const double *model, TransformationType wmtype,
-                          WarpedMotionParams *wm);
-
-int find_translation(const int np, double *pts1, double *pts2, double *mat);
-int find_rotzoom(const int np, double *pts1, double *pts2, double *mat);
-int find_affine(const int np, double *pts1, double *pts2, double *mat);
-int find_hortrapezoid(const int np, double *pts1, double *pts2, double *mat);
-int find_vertrapezoid(const int np, double *pts1, double *pts2, double *mat);
-int find_homography(const int np, double *pts1, double *pts2, double *mat);
-int find_projection(const int np, double *pts1, double *pts2,
+int find_projection(const int np, int *pts1, int *pts2,
                     WarpedMotionParams *wm_params);
 #endif  // AV1_COMMON_WARPED_MOTION_H_
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index fc21bc4..daef18b 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -1577,7 +1577,7 @@
 #endif  // CONFIG_REF_MV && CONFIG_EXT_INTER
   int16_t mode_ctx = 0;
 #if CONFIG_WARPED_MOTION
-  double pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
+  int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
 #endif  // CONFIG_WARPED_MOTION
 #if CONFIG_EC_ADAPT
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
diff --git a/av1/encoder/ransac.c b/av1/encoder/ransac.c
index 473b30a..ab0d530 100644
--- a/av1/encoder/ransac.c
+++ b/av1/encoder/ransac.c
@@ -133,6 +133,767 @@
   }
 }
 
+///////////////////////////////////////////////////////////////////////////////
+// svdcmp
+// Adopted from Numerical Recipes in C
+
+static const double TINY_NEAR_ZERO = 1.0E-12;
+
+static INLINE double sign(double a, double b) {
+  return ((b) >= 0 ? fabs(a) : -fabs(a));
+}
+
+static INLINE double pythag(double a, double b) {
+  double ct;
+  const double absa = fabs(a);
+  const double absb = fabs(b);
+
+  if (absa > absb) {
+    ct = absb / absa;
+    return absa * sqrt(1.0 + ct * ct);
+  } else {
+    ct = absa / absb;
+    return (absb == 0) ? 0 : absb * sqrt(1.0 + ct * ct);
+  }
+}
+
+static void multiply_mat(const double *m1, const double *m2, double *res,
+                         const int m1_rows, const int inner_dim,
+                         const int m2_cols) {
+  double sum;
+
+  int row, col, inner;
+  for (row = 0; row < m1_rows; ++row) {
+    for (col = 0; col < m2_cols; ++col) {
+      sum = 0;
+      for (inner = 0; inner < inner_dim; ++inner)
+        sum += m1[row * inner_dim + inner] * m2[inner * m2_cols + col];
+      *(res++) = sum;
+    }
+  }
+}
+
+static int svdcmp(double **u, int m, int n, double w[], double **v) {
+  const int max_its = 30;
+  int flag, i, its, j, jj, k, l, nm;
+  double anorm, c, f, g, h, s, scale, x, y, z;
+  double *rv1 = (double *)aom_malloc(sizeof(*rv1) * (n + 1));
+  g = scale = anorm = 0.0;
+  for (i = 0; i < n; i++) {
+    l = i + 1;
+    rv1[i] = scale * g;
+    g = s = scale = 0.0;
+    if (i < m) {
+      for (k = i; k < m; k++) scale += fabs(u[k][i]);
+      if (scale != 0.) {
+        for (k = i; k < m; k++) {
+          u[k][i] /= scale;
+          s += u[k][i] * u[k][i];
+        }
+        f = u[i][i];
+        g = -sign(sqrt(s), f);
+        h = f * g - s;
+        u[i][i] = f - g;
+        for (j = l; j < n; j++) {
+          for (s = 0.0, k = i; k < m; k++) s += u[k][i] * u[k][j];
+          f = s / h;
+          for (k = i; k < m; k++) u[k][j] += f * u[k][i];
+        }
+        for (k = i; k < m; k++) u[k][i] *= scale;
+      }
+    }
+    w[i] = scale * g;
+    g = s = scale = 0.0;
+    if (i < m && i != n - 1) {
+      for (k = l; k < n; k++) scale += fabs(u[i][k]);
+      if (scale != 0.) {
+        for (k = l; k < n; k++) {
+          u[i][k] /= scale;
+          s += u[i][k] * u[i][k];
+        }
+        f = u[i][l];
+        g = -sign(sqrt(s), f);
+        h = f * g - s;
+        u[i][l] = f - g;
+        for (k = l; k < n; k++) rv1[k] = u[i][k] / h;
+        for (j = l; j < m; j++) {
+          for (s = 0.0, k = l; k < n; k++) s += u[j][k] * u[i][k];
+          for (k = l; k < n; k++) u[j][k] += s * rv1[k];
+        }
+        for (k = l; k < n; k++) u[i][k] *= scale;
+      }
+    }
+    anorm = fmax(anorm, (fabs(w[i]) + fabs(rv1[i])));
+  }
+
+  for (i = n - 1; i >= 0; i--) {
+    if (i < n - 1) {
+      if (g != 0.) {
+        for (j = l; j < n; j++) v[j][i] = (u[i][j] / u[i][l]) / g;
+        for (j = l; j < n; j++) {
+          for (s = 0.0, k = l; k < n; k++) s += u[i][k] * v[k][j];
+          for (k = l; k < n; k++) v[k][j] += s * v[k][i];
+        }
+      }
+      for (j = l; j < n; j++) v[i][j] = v[j][i] = 0.0;
+    }
+    v[i][i] = 1.0;
+    g = rv1[i];
+    l = i;
+  }
+  for (i = AOMMIN(m, n) - 1; i >= 0; i--) {
+    l = i + 1;
+    g = w[i];
+    for (j = l; j < n; j++) u[i][j] = 0.0;
+    if (g != 0.) {
+      g = 1.0 / g;
+      for (j = l; j < n; j++) {
+        for (s = 0.0, k = l; k < m; k++) s += u[k][i] * u[k][j];
+        f = (s / u[i][i]) * g;
+        for (k = i; k < m; k++) u[k][j] += f * u[k][i];
+      }
+      for (j = i; j < m; j++) u[j][i] *= g;
+    } else {
+      for (j = i; j < m; j++) u[j][i] = 0.0;
+    }
+    ++u[i][i];
+  }
+  for (k = n - 1; k >= 0; k--) {
+    for (its = 0; its < max_its; its++) {
+      flag = 1;
+      for (l = k; l >= 0; l--) {
+        nm = l - 1;
+        if ((double)(fabs(rv1[l]) + anorm) == anorm || nm < 0) {
+          flag = 0;
+          break;
+        }
+        if ((double)(fabs(w[nm]) + anorm) == anorm) break;
+      }
+      if (flag) {
+        c = 0.0;
+        s = 1.0;
+        for (i = l; i <= k; i++) {
+          f = s * rv1[i];
+          rv1[i] = c * rv1[i];
+          if ((double)(fabs(f) + anorm) == anorm) break;
+          g = w[i];
+          h = pythag(f, g);
+          w[i] = h;
+          h = 1.0 / h;
+          c = g * h;
+          s = -f * h;
+          for (j = 0; j < m; j++) {
+            y = u[j][nm];
+            z = u[j][i];
+            u[j][nm] = y * c + z * s;
+            u[j][i] = z * c - y * s;
+          }
+        }
+      }
+      z = w[k];
+      if (l == k) {
+        if (z < 0.0) {
+          w[k] = -z;
+          for (j = 0; j < n; j++) v[j][k] = -v[j][k];
+        }
+        break;
+      }
+      if (its == max_its - 1) {
+        aom_free(rv1);
+        return 1;
+      }
+      assert(k > 0);
+      x = w[l];
+      nm = k - 1;
+      y = w[nm];
+      g = rv1[nm];
+      h = rv1[k];
+      f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y);
+      g = pythag(f, 1.0);
+      f = ((x - z) * (x + z) + h * ((y / (f + sign(g, f))) - h)) / x;
+      c = s = 1.0;
+      for (j = l; j <= nm; j++) {
+        i = j + 1;
+        g = rv1[i];
+        y = w[i];
+        h = s * g;
+        g = c * g;
+        z = pythag(f, h);
+        rv1[j] = z;
+        c = f / z;
+        s = h / z;
+        f = x * c + g * s;
+        g = g * c - x * s;
+        h = y * s;
+        y *= c;
+        for (jj = 0; jj < n; jj++) {
+          x = v[jj][j];
+          z = v[jj][i];
+          v[jj][j] = x * c + z * s;
+          v[jj][i] = z * c - x * s;
+        }
+        z = pythag(f, h);
+        w[j] = z;
+        if (z != 0.) {
+          z = 1.0 / z;
+          c = f * z;
+          s = h * z;
+        }
+        f = c * g + s * y;
+        x = c * y - s * g;
+        for (jj = 0; jj < m; jj++) {
+          y = u[jj][j];
+          z = u[jj][i];
+          u[jj][j] = y * c + z * s;
+          u[jj][i] = z * c - y * s;
+        }
+      }
+      rv1[l] = 0.0;
+      rv1[k] = f;
+      w[k] = x;
+    }
+  }
+  aom_free(rv1);
+  return 0;
+}
+
+static int SVD(double *U, double *W, double *V, double *matx, int M, int N) {
+  // Assumes allocation for U is MxN
+  double **nrU = (double **)aom_malloc((M) * sizeof(*nrU));
+  double **nrV = (double **)aom_malloc((N) * sizeof(*nrV));
+  int problem, i;
+
+  problem = !(nrU && nrV);
+  if (!problem) {
+    for (i = 0; i < M; i++) {
+      nrU[i] = &U[i * N];
+    }
+    for (i = 0; i < N; i++) {
+      nrV[i] = &V[i * N];
+    }
+  } else {
+    if (nrU) aom_free(nrU);
+    if (nrV) aom_free(nrV);
+    return 1;
+  }
+
+  /* copy from given matx into nrU */
+  for (i = 0; i < M; i++) {
+    memcpy(&(nrU[i][0]), matx + N * i, N * sizeof(*matx));
+  }
+
+  /* HERE IT IS: do SVD */
+  if (svdcmp(nrU, M, N, W, nrV)) {
+    aom_free(nrU);
+    aom_free(nrV);
+    return 1;
+  }
+
+  /* aom_free Numerical Recipes arrays */
+  aom_free(nrU);
+  aom_free(nrV);
+
+  return 0;
+}
+
+int pseudo_inverse(double *inv, double *matx, const int M, const int N) {
+  double ans;
+  int i, j, k;
+  double *const U = (double *)aom_malloc(M * N * sizeof(*matx));
+  double *const W = (double *)aom_malloc(N * sizeof(*matx));
+  double *const V = (double *)aom_malloc(N * N * sizeof(*matx));
+
+  if (!(U && W && V)) {
+    return 1;
+  }
+  if (SVD(U, W, V, matx, M, N)) {
+    aom_free(U);
+    aom_free(W);
+    aom_free(V);
+    return 1;
+  }
+  for (i = 0; i < N; i++) {
+    if (fabs(W[i]) < TINY_NEAR_ZERO) {
+      aom_free(U);
+      aom_free(W);
+      aom_free(V);
+      return 1;
+    }
+  }
+
+  for (i = 0; i < N; i++) {
+    for (j = 0; j < M; j++) {
+      ans = 0;
+      for (k = 0; k < N; k++) {
+        ans += V[k + N * i] * U[k + N * j] / W[k];
+      }
+      inv[j + M * i] = ans;
+    }
+  }
+  aom_free(U);
+  aom_free(W);
+  aom_free(V);
+  return 0;
+}
+
+static void normalize_homography(double *pts, int n, double *T) {
+  double *p = pts;
+  double mean[2] = { 0, 0 };
+  double msqe = 0;
+  double scale;
+  int i;
+  for (i = 0; i < n; ++i, p += 2) {
+    mean[0] += p[0];
+    mean[1] += p[1];
+  }
+  mean[0] /= n;
+  mean[1] /= n;
+  for (p = pts, i = 0; i < n; ++i, p += 2) {
+    p[0] -= mean[0];
+    p[1] -= mean[1];
+    msqe += sqrt(p[0] * p[0] + p[1] * p[1]);
+  }
+  msqe /= n;
+  scale = sqrt(2) / msqe;
+  T[0] = scale;
+  T[1] = 0;
+  T[2] = -scale * mean[0];
+  T[3] = 0;
+  T[4] = scale;
+  T[5] = -scale * mean[1];
+  T[6] = 0;
+  T[7] = 0;
+  T[8] = 1;
+  for (p = pts, i = 0; i < n; ++i, p += 2) {
+    p[0] *= scale;
+    p[1] *= scale;
+  }
+}
+
+static void invnormalize_mat(double *T, double *iT) {
+  double is = 1.0 / T[0];
+  double m0 = -T[2] * is;
+  double m1 = -T[5] * is;
+  iT[0] = is;
+  iT[1] = 0;
+  iT[2] = m0;
+  iT[3] = 0;
+  iT[4] = is;
+  iT[5] = m1;
+  iT[6] = 0;
+  iT[7] = 0;
+  iT[8] = 1;
+}
+
+static void denormalize_homography(double *params, double *T1, double *T2) {
+  double iT2[9];
+  double params2[9];
+  invnormalize_mat(T2, iT2);
+  multiply_mat(params, T1, params2, 3, 3, 3);
+  multiply_mat(iT2, params2, params, 3, 3, 3);
+}
+
+static void denormalize_homography_reorder(double *params, double *T1,
+                                           double *T2) {
+  double params_denorm[MAX_PARAMDIM];
+  memcpy(params_denorm, params, sizeof(*params) * 8);
+  params_denorm[8] = 1.0;
+  denormalize_homography(params_denorm, T1, T2);
+  params[0] = params_denorm[2];
+  params[1] = params_denorm[5];
+  params[2] = params_denorm[0];
+  params[3] = params_denorm[1];
+  params[4] = params_denorm[3];
+  params[5] = params_denorm[4];
+  params[6] = params_denorm[6];
+  params[7] = params_denorm[7];
+}
+
+static void denormalize_affine_reorder(double *params, double *T1, double *T2) {
+  double params_denorm[MAX_PARAMDIM];
+  params_denorm[0] = params[0];
+  params_denorm[1] = params[1];
+  params_denorm[2] = params[4];
+  params_denorm[3] = params[2];
+  params_denorm[4] = params[3];
+  params_denorm[5] = params[5];
+  params_denorm[6] = params_denorm[7] = 0;
+  params_denorm[8] = 1;
+  denormalize_homography(params_denorm, T1, T2);
+  params[0] = params_denorm[2];
+  params[1] = params_denorm[5];
+  params[2] = params_denorm[0];
+  params[3] = params_denorm[1];
+  params[4] = params_denorm[3];
+  params[5] = params_denorm[4];
+  params[6] = params[7] = 0;
+}
+
+static void denormalize_rotzoom_reorder(double *params, double *T1,
+                                        double *T2) {
+  double params_denorm[MAX_PARAMDIM];
+  params_denorm[0] = params[0];
+  params_denorm[1] = params[1];
+  params_denorm[2] = params[2];
+  params_denorm[3] = -params[1];
+  params_denorm[4] = params[0];
+  params_denorm[5] = params[3];
+  params_denorm[6] = params_denorm[7] = 0;
+  params_denorm[8] = 1;
+  denormalize_homography(params_denorm, T1, T2);
+  params[0] = params_denorm[2];
+  params[1] = params_denorm[5];
+  params[2] = params_denorm[0];
+  params[3] = params_denorm[1];
+  params[4] = -params[3];
+  params[5] = params[2];
+  params[6] = params[7] = 0;
+}
+
+static void denormalize_translation_reorder(double *params, double *T1,
+                                            double *T2) {
+  double params_denorm[MAX_PARAMDIM];
+  params_denorm[0] = 1;
+  params_denorm[1] = 0;
+  params_denorm[2] = params[0];
+  params_denorm[3] = 0;
+  params_denorm[4] = 1;
+  params_denorm[5] = params[1];
+  params_denorm[6] = params_denorm[7] = 0;
+  params_denorm[8] = 1;
+  denormalize_homography(params_denorm, T1, T2);
+  params[0] = params_denorm[2];
+  params[1] = params_denorm[5];
+  params[2] = params[5] = 1;
+  params[3] = params[4] = 0;
+  params[6] = params[7] = 0;
+}
+
+static int find_translation(const int np, double *pts1, double *pts2,
+                            double *mat) {
+  int i;
+  double sx, sy, dx, dy;
+  double sumx, sumy;
+
+  double T1[9], T2[9];
+  normalize_homography(pts1, np, T1);
+  normalize_homography(pts2, np, T2);
+
+  sumx = 0;
+  sumy = 0;
+  for (i = 0; i < np; ++i) {
+    dx = *(pts2++);
+    dy = *(pts2++);
+    sx = *(pts1++);
+    sy = *(pts1++);
+
+    sumx += dx - sx;
+    sumy += dy - sy;
+  }
+  mat[0] = sumx / np;
+  mat[1] = sumy / np;
+  denormalize_translation_reorder(mat, T1, T2);
+  return 0;
+}
+
+static int find_rotzoom(const int np, double *pts1, double *pts2, double *mat) {
+  const int np2 = np * 2;
+  double *a = (double *)aom_malloc(sizeof(*a) * np2 * 9);
+  double *b = a + np2 * 4;
+  double *temp = b + np2;
+  int i;
+  double sx, sy, dx, dy;
+
+  double T1[9], T2[9];
+  normalize_homography(pts1, np, T1);
+  normalize_homography(pts2, np, T2);
+
+  for (i = 0; i < np; ++i) {
+    dx = *(pts2++);
+    dy = *(pts2++);
+    sx = *(pts1++);
+    sy = *(pts1++);
+
+    a[i * 2 * 4 + 0] = sx;
+    a[i * 2 * 4 + 1] = sy;
+    a[i * 2 * 4 + 2] = 1;
+    a[i * 2 * 4 + 3] = 0;
+    a[(i * 2 + 1) * 4 + 0] = sy;
+    a[(i * 2 + 1) * 4 + 1] = -sx;
+    a[(i * 2 + 1) * 4 + 2] = 0;
+    a[(i * 2 + 1) * 4 + 3] = 1;
+
+    b[2 * i] = dx;
+    b[2 * i + 1] = dy;
+  }
+  if (pseudo_inverse(temp, a, np2, 4)) {
+    aom_free(a);
+    return 1;
+  }
+  multiply_mat(temp, b, mat, 4, np2, 1);
+  denormalize_rotzoom_reorder(mat, T1, T2);
+  aom_free(a);
+  return 0;
+}
+
+static int find_affine(const int np, double *pts1, double *pts2, double *mat) {
+  const int np2 = np * 2;
+  double *a = (double *)aom_malloc(sizeof(*a) * np2 * 13);
+  double *b = a + np2 * 6;
+  double *temp = b + np2;
+  int i;
+  double sx, sy, dx, dy;
+
+  double T1[9], T2[9];
+  normalize_homography(pts1, np, T1);
+  normalize_homography(pts2, np, T2);
+
+  for (i = 0; i < np; ++i) {
+    dx = *(pts2++);
+    dy = *(pts2++);
+    sx = *(pts1++);
+    sy = *(pts1++);
+
+    a[i * 2 * 6 + 0] = sx;
+    a[i * 2 * 6 + 1] = sy;
+    a[i * 2 * 6 + 2] = 0;
+    a[i * 2 * 6 + 3] = 0;
+    a[i * 2 * 6 + 4] = 1;
+    a[i * 2 * 6 + 5] = 0;
+    a[(i * 2 + 1) * 6 + 0] = 0;
+    a[(i * 2 + 1) * 6 + 1] = 0;
+    a[(i * 2 + 1) * 6 + 2] = sx;
+    a[(i * 2 + 1) * 6 + 3] = sy;
+    a[(i * 2 + 1) * 6 + 4] = 0;
+    a[(i * 2 + 1) * 6 + 5] = 1;
+
+    b[2 * i] = dx;
+    b[2 * i + 1] = dy;
+  }
+  if (pseudo_inverse(temp, a, np2, 6)) {
+    aom_free(a);
+    return 1;
+  }
+  multiply_mat(temp, b, mat, 6, np2, 1);
+  denormalize_affine_reorder(mat, T1, T2);
+  aom_free(a);
+  return 0;
+}
+
+static int find_vertrapezoid(const int np, double *pts1, double *pts2,
+                             double *mat) {
+  const int np3 = np * 3;
+  double *a = (double *)aom_malloc(sizeof(*a) * np3 * 14);
+  double *U = a + np3 * 7;
+  double S[7], V[7 * 7], H[9];
+  int i, mini;
+  double sx, sy, dx, dy;
+  double T1[9], T2[9];
+
+  normalize_homography(pts1, np, T1);
+  normalize_homography(pts2, np, T2);
+
+  for (i = 0; i < np; ++i) {
+    dx = *(pts2++);
+    dy = *(pts2++);
+    sx = *(pts1++);
+    sy = *(pts1++);
+
+    a[i * 3 * 7 + 0] = a[i * 3 * 7 + 1] = 0;
+    a[i * 3 * 7 + 2] = -sx;
+    a[i * 3 * 7 + 3] = -sy;
+    a[i * 3 * 7 + 4] = -1;
+    a[i * 3 * 7 + 5] = dy * sx;
+    a[i * 3 * 7 + 6] = dy;
+
+    a[(i * 3 + 1) * 7 + 0] = sx;
+    a[(i * 3 + 1) * 7 + 1] = 1;
+    a[(i * 3 + 1) * 7 + 2] = a[(i * 3 + 1) * 7 + 3] = a[(i * 3 + 1) * 7 + 4] =
+        0;
+    a[(i * 3 + 1) * 7 + 5] = -dx * sx;
+    a[(i * 3 + 1) * 7 + 6] = -dx;
+
+    a[(i * 3 + 2) * 7 + 0] = -dy * sx;
+    a[(i * 3 + 2) * 7 + 1] = -dy;
+    a[(i * 3 + 2) * 7 + 2] = dx * sx;
+    a[(i * 3 + 2) * 7 + 3] = dx * sy;
+    a[(i * 3 + 2) * 7 + 4] = dx;
+    a[(i * 3 + 2) * 7 + 5] = a[(i * 3 + 2) * 7 + 6] = 0;
+  }
+  if (SVD(U, S, V, a, np3, 7)) {
+    aom_free(a);
+    return 1;
+  } else {
+    double minS = 1e12;
+    mini = -1;
+    for (i = 0; i < 7; ++i) {
+      if (S[i] < minS) {
+        minS = S[i];
+        mini = i;
+      }
+    }
+  }
+  H[1] = H[7] = 0;
+  for (i = 0; i < 1; i++) H[i] = V[i * 7 + mini];
+  for (; i < 6; i++) H[i + 1] = V[i * 7 + mini];
+  for (; i < 7; i++) H[i + 2] = V[i * 7 + mini];
+
+  denormalize_homography_reorder(H, T1, T2);
+  aom_free(a);
+  if (H[8] == 0.0) {
+    return 1;
+  } else {
+    // normalize
+    double f = 1.0 / H[8];
+    for (i = 0; i < 8; i++) mat[i] = f * H[i];
+  }
+  return 0;
+}
+
+static int find_hortrapezoid(const int np, double *pts1, double *pts2,
+                             double *mat) {
+  const int np3 = np * 3;
+  double *a = (double *)aom_malloc(sizeof(*a) * np3 * 14);
+  double *U = a + np3 * 7;
+  double S[7], V[7 * 7], H[9];
+  int i, mini;
+  double sx, sy, dx, dy;
+  double T1[9], T2[9];
+
+  normalize_homography(pts1, np, T1);
+  normalize_homography(pts2, np, T2);
+
+  for (i = 0; i < np; ++i) {
+    dx = *(pts2++);
+    dy = *(pts2++);
+    sx = *(pts1++);
+    sy = *(pts1++);
+
+    a[i * 3 * 7 + 0] = a[i * 3 * 7 + 1] = a[i * 3 * 7 + 2] = 0;
+    a[i * 3 * 7 + 3] = -sy;
+    a[i * 3 * 7 + 4] = -1;
+    a[i * 3 * 7 + 5] = dy * sy;
+    a[i * 3 * 7 + 6] = dy;
+
+    a[(i * 3 + 1) * 7 + 0] = sx;
+    a[(i * 3 + 1) * 7 + 1] = sy;
+    a[(i * 3 + 1) * 7 + 2] = 1;
+    a[(i * 3 + 1) * 7 + 3] = a[(i * 3 + 1) * 7 + 4] = 0;
+    a[(i * 3 + 1) * 7 + 5] = -dx * sy;
+    a[(i * 3 + 1) * 7 + 6] = -dx;
+
+    a[(i * 3 + 2) * 7 + 0] = -dy * sx;
+    a[(i * 3 + 2) * 7 + 1] = -dy * sy;
+    a[(i * 3 + 2) * 7 + 2] = -dy;
+    a[(i * 3 + 2) * 7 + 3] = dx * sy;
+    a[(i * 3 + 2) * 7 + 4] = dx;
+    a[(i * 3 + 2) * 7 + 5] = a[(i * 3 + 2) * 7 + 6] = 0;
+  }
+
+  if (SVD(U, S, V, a, np3, 7)) {
+    aom_free(a);
+    return 1;
+  } else {
+    double minS = 1e12;
+    mini = -1;
+    for (i = 0; i < 7; ++i) {
+      if (S[i] < minS) {
+        minS = S[i];
+        mini = i;
+      }
+    }
+  }
+  H[3] = H[6] = 0;
+  for (i = 0; i < 3; i++) H[i] = V[i * 7 + mini];
+  for (; i < 5; i++) H[i + 1] = V[i * 7 + mini];
+  for (; i < 7; i++) H[i + 2] = V[i * 7 + mini];
+
+  denormalize_homography_reorder(H, T1, T2);
+  aom_free(a);
+  if (H[8] == 0.0) {
+    return 1;
+  } else {
+    // normalize
+    double f = 1.0 / H[8];
+    for (i = 0; i < 8; i++) mat[i] = f * H[i];
+  }
+  return 0;
+}
+
+static int find_homography(const int np, double *pts1, double *pts2,
+                           double *mat) {
+  // Implemented from Peter Kovesi's normalized implementation
+  const int np3 = np * 3;
+  double *a = (double *)aom_malloc(sizeof(*a) * np3 * 18);
+  double *U = a + np3 * 9;
+  double S[9], V[9 * 9], H[9];
+  int i, mini;
+  double sx, sy, dx, dy;
+  double T1[9], T2[9];
+
+  normalize_homography(pts1, np, T1);
+  normalize_homography(pts2, np, T2);
+
+  for (i = 0; i < np; ++i) {
+    dx = *(pts2++);
+    dy = *(pts2++);
+    sx = *(pts1++);
+    sy = *(pts1++);
+
+    a[i * 3 * 9 + 0] = a[i * 3 * 9 + 1] = a[i * 3 * 9 + 2] = 0;
+    a[i * 3 * 9 + 3] = -sx;
+    a[i * 3 * 9 + 4] = -sy;
+    a[i * 3 * 9 + 5] = -1;
+    a[i * 3 * 9 + 6] = dy * sx;
+    a[i * 3 * 9 + 7] = dy * sy;
+    a[i * 3 * 9 + 8] = dy;
+
+    a[(i * 3 + 1) * 9 + 0] = sx;
+    a[(i * 3 + 1) * 9 + 1] = sy;
+    a[(i * 3 + 1) * 9 + 2] = 1;
+    a[(i * 3 + 1) * 9 + 3] = a[(i * 3 + 1) * 9 + 4] = a[(i * 3 + 1) * 9 + 5] =
+        0;
+    a[(i * 3 + 1) * 9 + 6] = -dx * sx;
+    a[(i * 3 + 1) * 9 + 7] = -dx * sy;
+    a[(i * 3 + 1) * 9 + 8] = -dx;
+
+    a[(i * 3 + 2) * 9 + 0] = -dy * sx;
+    a[(i * 3 + 2) * 9 + 1] = -dy * sy;
+    a[(i * 3 + 2) * 9 + 2] = -dy;
+    a[(i * 3 + 2) * 9 + 3] = dx * sx;
+    a[(i * 3 + 2) * 9 + 4] = dx * sy;
+    a[(i * 3 + 2) * 9 + 5] = dx;
+    a[(i * 3 + 2) * 9 + 6] = a[(i * 3 + 2) * 9 + 7] = a[(i * 3 + 2) * 9 + 8] =
+        0;
+  }
+
+  if (SVD(U, S, V, a, np3, 9)) {
+    aom_free(a);
+    return 1;
+  } else {
+    double minS = 1e12;
+    mini = -1;
+    for (i = 0; i < 9; ++i) {
+      if (S[i] < minS) {
+        minS = S[i];
+        mini = i;
+      }
+    }
+  }
+
+  for (i = 0; i < 9; i++) H[i] = V[i * 9 + mini];
+  denormalize_homography_reorder(H, T1, T2);
+  aom_free(a);
+  if (H[8] == 0.0) {
+    return 1;
+  } else {
+    // normalize
+    double f = 1.0 / H[8];
+    for (i = 0; i < 8; i++) mat[i] = f * H[i];
+  }
+  return 0;
+}
+
 static int get_rand_indices(int npoints, int minpts, int *indices,
                             unsigned int *seed) {
   int i, j;
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index f721df0..42d906d 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -7944,7 +7944,7 @@
 #endif  // CONFIG_EXT_INTER
 #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
 #if CONFIG_WARPED_MOTION
-  double pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
+  int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
 #endif  // CONFIG_WARPED_MOTION
   int64_t rd = INT64_MAX;
   BUFFER_SET orig_dst, tmp_dst;