Changed scaling of MVs to use higher precision.

This is intended to be a no-op when scaling is not
enabled, but is expected to result in more accurate
prediction when references need to be scaled.

However note all xs, yx, subpel_x and subpel_y values
are now at higher than 1/16th precision.

Change-Id: I4b22573ea290a31fc58ead980bb0d5e5a9e89243
diff --git a/av1/common/enums.h b/av1/common/enums.h
index ee0f10c..1b59ce0 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -604,8 +604,6 @@
 } RestorationType;
 #endif  // CONFIG_LOOP_RESTORATION
 
-#define SCALE_DENOMINATOR 16
-
 #if CONFIG_FRAME_SUPERRES
 #define SUPERRES_SCALE_BITS 3
 #define SUPERRES_SCALE_NUMERATOR_MIN 8
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c
index a469370..363e291 100644
--- a/av1/common/reconinter.c
+++ b/av1/common/reconinter.c
@@ -827,11 +827,12 @@
   const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
                      is_q4 ? src_mv->col : src_mv->col * 2 };
   MV32 mv = av1_scale_mv(&mv_q4, x, y, sf);
-  const int subpel_x = mv.col & SUBPEL_MASK;
-  const int subpel_y = mv.row & SUBPEL_MASK;
+  const int subpel_x = mv.col & SCALE_SUBPEL_MASK;
+  const int subpel_y = mv.row & SCALE_SUBPEL_MASK;
   ConvolveParams conv_params = get_conv_params(ref, ref, plane);
 
-  src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
+  src += (mv.row >> SCALE_SUBPEL_BITS) * src_stride +
+         (mv.col >> SCALE_SUBPEL_BITS);
 
   av1_make_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
                            sf, w, h, &conv_params, interp_filter,
@@ -864,10 +865,11 @@
   const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
                      is_q4 ? src_mv->col : src_mv->col * 2 };
   MV32 mv = av1_scale_mv(&mv_q4, x, y, sf);
-  const int subpel_x = mv.col & SUBPEL_MASK;
-  const int subpel_y = mv.row & SUBPEL_MASK;
+  const int subpel_x = mv.col & SCALE_SUBPEL_MASK;
+  const int subpel_y = mv.row & SCALE_SUBPEL_MASK;
 
-  src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
+  src += (mv.row >> SCALE_SUBPEL_BITS) * src_stride +
+         (mv.col >> SCALE_SUBPEL_BITS);
 
   av1_make_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
                            sf, w, h, conv_params, interp_filter,
@@ -1038,27 +1040,28 @@
             int pos_y = sf->scale_value_y(orig_pos_y, sf);
             int pos_x = sf->scale_value_x(orig_pos_x, sf);
 
-            const int top = -((AOM_INTERP_EXTEND + bh) << SUBPEL_BITS);
+            const int top = -((AOM_INTERP_EXTEND + bh) << SCALE_SUBPEL_BITS);
             const int bottom = (pre_buf->height + AOM_INTERP_EXTEND)
-                               << SUBPEL_BITS;
-            const int left = -((AOM_INTERP_EXTEND + bw) << SUBPEL_BITS);
+                               << SCALE_SUBPEL_BITS;
+            const int left = -((AOM_INTERP_EXTEND + bw) << SCALE_SUBPEL_BITS);
             const int right = (pre_buf->width + AOM_INTERP_EXTEND)
-                              << SUBPEL_BITS;
+                              << SCALE_SUBPEL_BITS;
             pos_y = clamp(pos_y, top, bottom);
             pos_x = clamp(pos_x, left, right);
 
-            pre = pre_buf->buf0 + (pos_y >> SUBPEL_BITS) * pre_buf->stride +
-                  (pos_x >> SUBPEL_BITS);
-            subpel_x = pos_x & SUBPEL_MASK;
-            subpel_y = pos_y & SUBPEL_MASK;
+            pre = pre_buf->buf0 +
+                  (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride +
+                  (pos_x >> SCALE_SUBPEL_BITS);
+            subpel_x = pos_x & SCALE_SUBPEL_MASK;
+            subpel_y = pos_y & SCALE_SUBPEL_MASK;
             xs = sf->x_step_q4;
             ys = sf->y_step_q4;
           } else {
             const MV mv_q4 = clamp_mv_to_umv_border_sb(
                 xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y);
-            xs = ys = SCALE_DENOMINATOR;
-            subpel_x = mv_q4.col & SUBPEL_MASK;
-            subpel_y = mv_q4.row & SUBPEL_MASK;
+            xs = ys = SCALE_SUBPEL_SHIFTS;
+            subpel_x = (mv_q4.col & SUBPEL_MASK) << SCALE_EXTRA_BITS;
+            subpel_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS;
             pre = pre_buf->buf +
                   (y + (mv_q4.row >> SUBPEL_BITS)) * pre_buf->stride +
                   (x + (mv_q4.col >> SUBPEL_BITS));
@@ -1160,26 +1163,31 @@
 
         // Clamp against the reference frame borders, with enough extension
         // that we don't force the reference block to be partially onscreen.
-        const int top = -((AOM_INTERP_EXTEND + bh) << SUBPEL_BITS);
-        const int bottom = (pre_buf->height + AOM_INTERP_EXTEND) << SUBPEL_BITS;
-        const int left = -((AOM_INTERP_EXTEND + bw) << SUBPEL_BITS);
-        const int right = (pre_buf->width + AOM_INTERP_EXTEND) << SUBPEL_BITS;
+        const int top = -((AOM_INTERP_EXTEND + bh) << SCALE_SUBPEL_BITS);
+        const int bottom = (pre_buf->height + AOM_INTERP_EXTEND)
+                           << SCALE_SUBPEL_BITS;
+        const int left = -((AOM_INTERP_EXTEND + bw) << SCALE_SUBPEL_BITS);
+        const int right = (pre_buf->width + AOM_INTERP_EXTEND)
+                          << SCALE_SUBPEL_BITS;
         pos_y = clamp(pos_y, top, bottom);
         pos_x = clamp(pos_x, left, right);
 
-        pre[ref] = pre_buf->buf0 + (pos_y >> SUBPEL_BITS) * pre_buf->stride +
-                   (pos_x >> SUBPEL_BITS);
-        subpel_params[ref].subpel_x = pos_x & SUBPEL_MASK;
-        subpel_params[ref].subpel_y = pos_y & SUBPEL_MASK;
+        pre[ref] = pre_buf->buf0 +
+                   (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride +
+                   (pos_x >> SCALE_SUBPEL_BITS);
+        subpel_params[ref].subpel_x = pos_x & SCALE_SUBPEL_MASK;
+        subpel_params[ref].subpel_y = pos_y & SCALE_SUBPEL_MASK;
         subpel_params[ref].xs = sf->x_step_q4;
         subpel_params[ref].ys = sf->y_step_q4;
       } else {
         const MV mv_q4 = clamp_mv_to_umv_border_sb(
             xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y);
-        subpel_params[ref].subpel_x = mv_q4.col & SUBPEL_MASK;
-        subpel_params[ref].subpel_y = mv_q4.row & SUBPEL_MASK;
-        subpel_params[ref].xs = SCALE_DENOMINATOR;
-        subpel_params[ref].ys = SCALE_DENOMINATOR;
+        subpel_params[ref].subpel_x = (mv_q4.col & SUBPEL_MASK)
+                                      << SCALE_EXTRA_BITS;
+        subpel_params[ref].subpel_y = (mv_q4.row & SUBPEL_MASK)
+                                      << SCALE_EXTRA_BITS;
+        subpel_params[ref].xs = SCALE_SUBPEL_SHIFTS;
+        subpel_params[ref].ys = SCALE_SUBPEL_SHIFTS;
         pre[ref] = pre_buf->buf +
                    (y + (mv_q4.row >> SUBPEL_BITS)) * pre_buf->stride +
                    (x + (mv_q4.col >> SUBPEL_BITS));
@@ -1275,6 +1283,7 @@
   }
 }
 
+#if 0
 void av1_build_inter_predictor_sub8x8(const AV1_COMMON *cm, MACROBLOCKD *xd,
                                       int plane, int i, int ir, int ic,
                                       int mi_row, int mi_col) {
@@ -1336,6 +1345,7 @@
                                 mi_row * MI_SIZE + 4 * ir, xd);
   }
 }
+#endif
 
 static void build_inter_predictors_for_planes(const AV1_COMMON *cm,
                                               MACROBLOCKD *xd, BLOCK_SIZE bsize,
@@ -3005,25 +3015,26 @@
     int pos_y = sf->scale_value_y(orig_pos_y, sf);
     int pos_x = sf->scale_value_x(orig_pos_x, sf);
 
-    const int top = -((AOM_INTERP_EXTEND + bh) << SUBPEL_BITS);
-    const int bottom = (pre_buf->height + AOM_INTERP_EXTEND) << SUBPEL_BITS;
-    const int left = -((AOM_INTERP_EXTEND + bw) << SUBPEL_BITS);
-    const int right = (pre_buf->width + AOM_INTERP_EXTEND) << SUBPEL_BITS;
+    const int top = -((AOM_INTERP_EXTEND + bh) << SCALE_SUBPEL_BITS);
+    const int bottom = (pre_buf->height + AOM_INTERP_EXTEND)
+                       << SCALE_SUBPEL_BITS;
+    const int left = -((AOM_INTERP_EXTEND + bw) << SCALE_SUBPEL_BITS);
+    const int right = (pre_buf->width + AOM_INTERP_EXTEND) << SCALE_SUBPEL_BITS;
     pos_y = clamp(pos_y, top, bottom);
     pos_x = clamp(pos_x, left, right);
 
-    pre = pre_buf->buf0 + (pos_y >> SUBPEL_BITS) * pre_buf->stride +
-          (pos_x >> SUBPEL_BITS);
-    subpel_x = pos_x & SUBPEL_MASK;
-    subpel_y = pos_y & SUBPEL_MASK;
+    pre = pre_buf->buf0 + (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride +
+          (pos_x >> SCALE_SUBPEL_BITS);
+    subpel_x = pos_x & SCALE_SUBPEL_MASK;
+    subpel_y = pos_y & SCALE_SUBPEL_MASK;
     xs = sf->x_step_q4;
     ys = sf->y_step_q4;
   } else {
     const MV mv_q4 = clamp_mv_to_umv_border_sb(
         xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y);
-    xs = ys = SCALE_DENOMINATOR;
-    subpel_x = mv_q4.col & SUBPEL_MASK;
-    subpel_y = mv_q4.row & SUBPEL_MASK;
+    xs = ys = SCALE_SUBPEL_SHIFTS;
+    subpel_x = (mv_q4.col & SUBPEL_MASK) << SCALE_EXTRA_BITS;
+    subpel_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS;
     pre = pre_buf->buf + (y + (mv_q4.row >> SUBPEL_BITS)) * pre_buf->stride +
           (x + (mv_q4.col >> SUBPEL_BITS));
   }
diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h
index 8b370d3..ff053e1 100644
--- a/av1/common/reconinter.h
+++ b/av1/common/reconinter.h
@@ -32,7 +32,9 @@
 extern "C" {
 #endif
 
-static INLINE int has_scale(int xs, int ys) { return xs != 16 || ys != 16; }
+static INLINE int has_scale(int xs, int ys) {
+  return xs != SCALE_SUBPEL_SHIFTS || ys != SCALE_SUBPEL_SHIFTS;
+}
 
 static INLINE void inter_predictor(const uint8_t *src, int src_stride,
                                    uint8_t *dst, int dst_stride, int subpel_x,
@@ -62,37 +64,51 @@
   assert(conv_params->do_average == 0 || conv_params->do_average == 1);
   assert(sf);
   if (has_scale(xs, ys)) {
+    // TODO(afergs, debargha): Use a different scale convolve function
+    // that uses higher precision for subpel_x, subpel_y, xs, ys
     av1_convolve_c(src, src_stride, dst, dst_stride, w, h, interp_filter,
-                   subpel_x, xs, subpel_y, ys, conv_params);
-  } else if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
-#if CONFIG_CONVOLVE_ROUND
-    av1_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
-#if CONFIG_DUAL_FILTER
-                           interp_filter,
-#else   // CONFIG_DUAL_FILTER
-                           &interp_filter,
-#endif  // CONFIG_DUAL_FILTER
-                           subpel_x, xs, subpel_y, ys, conv_params);
-    conv_params->do_post_rounding = 1;
-#else
-    assert(0);
-#endif  // CONFIG_CONVOLVE_ROUND
+                   subpel_x >> SCALE_EXTRA_BITS, xs >> SCALE_EXTRA_BITS,
+                   subpel_y >> SCALE_EXTRA_BITS, ys >> SCALE_EXTRA_BITS,
+                   conv_params);
   } else {
-    assert(conv_params->round == CONVOLVE_OPT_ROUND);
-    if (w <= 2 || h <= 2) {
-      av1_convolve_c(src, src_stride, dst, dst_stride, w, h, interp_filter,
-                     subpel_x, xs, subpel_y, ys, conv_params);
-    } else if (interp_filter_params_x.taps == SUBPEL_TAPS &&
-               interp_filter_params_y.taps == SUBPEL_TAPS) {
-      const int16_t *kernel_x =
-          av1_get_interp_filter_subpel_kernel(interp_filter_params_x, subpel_x);
-      const int16_t *kernel_y =
-          av1_get_interp_filter_subpel_kernel(interp_filter_params_y, subpel_y);
-      sf->predict[subpel_x != 0][subpel_y != 0][conv_params->do_average](
-          src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h);
+    subpel_x >>= SCALE_EXTRA_BITS;
+    subpel_y >>= SCALE_EXTRA_BITS;
+    xs >>= SCALE_EXTRA_BITS;
+    ys >>= SCALE_EXTRA_BITS;
+    assert(subpel_x < SUBPEL_SHIFTS);
+    assert(subpel_y < SUBPEL_SHIFTS);
+    assert(xs <= SUBPEL_SHIFTS);
+    assert(ys <= SUBPEL_SHIFTS);
+    if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
+#if CONFIG_CONVOLVE_ROUND
+      av1_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
+#if CONFIG_DUAL_FILTER
+                             interp_filter,
+#else   // CONFIG_DUAL_FILTER
+                             &interp_filter,
+#endif  // CONFIG_DUAL_FILTER
+                             subpel_x, xs, subpel_y, ys, conv_params);
+      conv_params->do_post_rounding = 1;
+#else
+      assert(0);
+#endif  // CONFIG_CONVOLVE_ROUND
     } else {
-      av1_convolve(src, src_stride, dst, dst_stride, w, h, interp_filter,
-                   subpel_x, xs, subpel_y, ys, conv_params);
+      assert(conv_params->round == CONVOLVE_OPT_ROUND);
+      if (w <= 2 || h <= 2) {
+        av1_convolve_c(src, src_stride, dst, dst_stride, w, h, interp_filter,
+                       subpel_x, xs, subpel_y, ys, conv_params);
+      } else if (interp_filter_params_x.taps == SUBPEL_TAPS &&
+                 interp_filter_params_y.taps == SUBPEL_TAPS) {
+        const int16_t *kernel_x = av1_get_interp_filter_subpel_kernel(
+            interp_filter_params_x, subpel_x);
+        const int16_t *kernel_y = av1_get_interp_filter_subpel_kernel(
+            interp_filter_params_y, subpel_y);
+        sf->predict[subpel_x != 0][subpel_y != 0][conv_params->do_average](
+            src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h);
+      } else {
+        av1_convolve(src, src_stride, dst, dst_stride, w, h, interp_filter,
+                     subpel_x, xs, subpel_y, ys, conv_params);
+      }
     }
   }
 }
@@ -100,8 +116,7 @@
 #if CONFIG_HIGHBITDEPTH
 static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride,
                                           uint8_t *dst, int dst_stride,
-                                          const int subpel_x,
-                                          const int subpel_y,
+                                          int subpel_x, int subpel_y,
                                           const struct scale_factors *sf, int w,
                                           int h, ConvolveParams *conv_params,
 #if CONFIG_DUAL_FILTER
@@ -125,34 +140,49 @@
 #endif
 
   if (has_scale(xs, ys)) {
+    // TODO(afergs, debargha): Use a different scale convolve function
+    // that uses higher precision for subpel_x, subpel_y, xs, ys
     av1_highbd_convolve(src, src_stride, dst, dst_stride, w, h, interp_filter,
-                        subpel_x, xs, subpel_y, ys, avg, bd);
-  } else if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
-#if CONFIG_CONVOLVE_ROUND
-    av1_highbd_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
-#if CONFIG_DUAL_FILTER
-                                  interp_filter,
-#else   // CONFIG_DUAL_FILTER
-                                  &interp_filter,
-#endif  // CONFIG_DUAL_FILTER
-                                  subpel_x, xs, subpel_y, ys, conv_params, bd);
-    conv_params->do_post_rounding = 1;
-#else
-    assert(0);
-#endif  // CONFIG_CONVOLVE_ROUND
+                        subpel_x >> SCALE_EXTRA_BITS, xs >> SCALE_EXTRA_BITS,
+                        subpel_y >> SCALE_EXTRA_BITS, ys >> SCALE_EXTRA_BITS,
+                        avg, bd);
   } else {
-    if (interp_filter_params_x.taps == SUBPEL_TAPS &&
-        interp_filter_params_y.taps == SUBPEL_TAPS && w > 2 && h > 2) {
-      const int16_t *kernel_x =
-          av1_get_interp_filter_subpel_kernel(interp_filter_params_x, subpel_x);
-      const int16_t *kernel_y =
-          av1_get_interp_filter_subpel_kernel(interp_filter_params_y, subpel_y);
-      sf->highbd_predict[subpel_x != 0][subpel_y != 0][avg](
-          src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h,
-          bd);
+    subpel_x >>= SCALE_EXTRA_BITS;
+    subpel_y >>= SCALE_EXTRA_BITS;
+    xs >>= SCALE_EXTRA_BITS;
+    ys >>= SCALE_EXTRA_BITS;
+    assert(subpel_x < SUBPEL_SHIFTS);
+    assert(subpel_y < SUBPEL_SHIFTS);
+    assert(xs <= SUBPEL_SHIFTS);
+    assert(ys <= SUBPEL_SHIFTS);
+    if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
+#if CONFIG_CONVOLVE_ROUND
+      av1_highbd_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
+#if CONFIG_DUAL_FILTER
+                                    interp_filter,
+#else  // CONFIG_DUAL_FILTER
+                                    &interp_filter,
+#endif  // CONFIG_DUAL_FILTER
+                                    subpel_x, xs, subpel_y, ys, conv_params,
+                                    bd);
+      conv_params->do_post_rounding = 1;
+#else
+      assert(0);
+#endif  // CONFIG_CONVOLVE_ROUND
     } else {
-      av1_highbd_convolve(src, src_stride, dst, dst_stride, w, h, interp_filter,
-                          subpel_x, xs, subpel_y, ys, avg, bd);
+      if (interp_filter_params_x.taps == SUBPEL_TAPS &&
+          interp_filter_params_y.taps == SUBPEL_TAPS && w > 2 && h > 2) {
+        const int16_t *kernel_x = av1_get_interp_filter_subpel_kernel(
+            interp_filter_params_x, subpel_x);
+        const int16_t *kernel_y = av1_get_interp_filter_subpel_kernel(
+            interp_filter_params_y, subpel_y);
+        sf->highbd_predict[subpel_x != 0][subpel_y != 0][avg](
+            src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h,
+            bd);
+      } else {
+        av1_highbd_convolve(src, src_stride, dst, dst_stride, w, h,
+                            interp_filter, subpel_x, xs, subpel_y, ys, avg, bd);
+      }
     }
   }
 }
@@ -619,8 +649,10 @@
 
 static INLINE int scaled_buffer_offset(int x_offset, int y_offset, int stride,
                                        const struct scale_factors *sf) {
-  const int x = sf ? sf->scale_value_x(x_offset, sf) : x_offset;
-  const int y = sf ? sf->scale_value_y(y_offset, sf) : y_offset;
+  const int x =
+      sf ? sf->scale_value_x(x_offset, sf) >> SCALE_EXTRA_BITS : x_offset;
+  const int y =
+      sf ? sf->scale_value_y(y_offset, sf) >> SCALE_EXTRA_BITS : y_offset;
   return y * stride + x;
 }
 
diff --git a/av1/common/scale.c b/av1/common/scale.c
index 76beaa2..6635b87 100644
--- a/av1/common/scale.c
+++ b/av1/common/scale.c
@@ -14,17 +14,22 @@
 #include "av1/common/scale.h"
 #include "aom_dsp/aom_filter.h"
 
+// Note: Expect val to be in q4 precision
 static INLINE int scaled_x(int val, const struct scale_factors *sf) {
-  return (int)((int64_t)val * sf->x_scale_fp >> REF_SCALE_SHIFT);
+  return (int)((int64_t)val * sf->x_scale_fp >>
+               (REF_SCALE_SHIFT - SCALE_EXTRA_BITS));
 }
 
+// Note: Expect val to be in q4 precision
 static INLINE int scaled_y(int val, const struct scale_factors *sf) {
-  return (int)((int64_t)val * sf->y_scale_fp >> REF_SCALE_SHIFT);
+  return (int)((int64_t)val * sf->y_scale_fp >>
+               (REF_SCALE_SHIFT - SCALE_EXTRA_BITS));
 }
 
+// Note: Expect val to be in q4 precision
 static int unscaled_value(int val, const struct scale_factors *sf) {
   (void)sf;
-  return val;
+  return val << SCALE_EXTRA_BITS;
 }
 
 static int get_fixed_point_scale_factor(int other_size, int this_size) {
@@ -35,11 +40,13 @@
   return (other_size << REF_SCALE_SHIFT) / this_size;
 }
 
-MV32 av1_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf) {
-  const int x_off_q4 = scaled_x(x << SUBPEL_BITS, sf) & SUBPEL_MASK;
-  const int y_off_q4 = scaled_y(y << SUBPEL_BITS, sf) & SUBPEL_MASK;
-  const MV32 res = { scaled_y(mv->row, sf) + y_off_q4,
-                     scaled_x(mv->col, sf) + x_off_q4 };
+// Note: x and y are integer precision, mv is g4 precision.
+MV32 av1_scale_mv(const MV *mvq4, int x, int y,
+                  const struct scale_factors *sf) {
+  const int x_off_q4 = scaled_x(x << SUBPEL_BITS, sf) & SCALE_SUBPEL_MASK;
+  const int y_off_q4 = scaled_y(y << SUBPEL_BITS, sf) & SCALE_SUBPEL_MASK;
+  const MV32 res = { scaled_y(mvq4->row, sf) + y_off_q4,
+                     scaled_x(mvq4->col, sf) + x_off_q4 };
   return res;
 }
 
@@ -59,8 +66,8 @@
 
   sf->x_scale_fp = get_fixed_point_scale_factor(other_w, this_w);
   sf->y_scale_fp = get_fixed_point_scale_factor(other_h, this_h);
-  sf->x_step_q4 = scaled_x(16, sf);
-  sf->y_step_q4 = scaled_y(16, sf);
+  sf->x_step_q4 = scaled_x(SUBPEL_SHIFTS, sf);
+  sf->y_step_q4 = scaled_y(SUBPEL_SHIFTS, sf);
 
   if (av1_is_scaled(sf)) {
     sf->scale_value_x = scaled_x;
@@ -76,8 +83,8 @@
   // applied in one direction only, and not at all for 0,0, seems to give the
   // best quality, but it may be worth trying an additional mode that does
   // do the filtering on full-pel.
-  if (sf->x_step_q4 == 16) {
-    if (sf->y_step_q4 == 16) {
+  if (sf->x_step_q4 == SCALE_SUBPEL_SHIFTS) {
+    if (sf->y_step_q4 == SCALE_SUBPEL_SHIFTS) {
       // No scaling in either direction.
       sf->predict[0][0][0] = aom_convolve_copy;
       sf->predict[0][0][1] = aom_convolve_avg;
@@ -95,7 +102,7 @@
       sf->predict[1][0][1] = aom_convolve8_avg;
     }
   } else {
-    if (sf->y_step_q4 == 16) {
+    if (sf->y_step_q4 == SCALE_SUBPEL_SHIFTS) {
       // No scaling in the y direction. Must always scale in the x direction.
       sf->predict[0][0][0] = aom_convolve8_horiz;
       sf->predict[0][0][1] = aom_convolve8_avg_horiz;
@@ -119,8 +126,8 @@
 
 #if CONFIG_HIGHBITDEPTH
   if (use_highbd) {
-    if (sf->x_step_q4 == 16) {
-      if (sf->y_step_q4 == 16) {
+    if (sf->x_step_q4 == SCALE_SUBPEL_SHIFTS) {
+      if (sf->y_step_q4 == SCALE_SUBPEL_SHIFTS) {
         // No scaling in either direction.
         sf->highbd_predict[0][0][0] = aom_highbd_convolve_copy;
         sf->highbd_predict[0][0][1] = aom_highbd_convolve_avg;
@@ -138,7 +145,7 @@
         sf->highbd_predict[1][0][1] = aom_highbd_convolve8_avg;
       }
     } else {
-      if (sf->y_step_q4 == 16) {
+      if (sf->y_step_q4 == SCALE_SUBPEL_SHIFTS) {
         // No scaling in the y direction. Must always scale in the x direction.
         sf->highbd_predict[0][0][0] = aom_highbd_convolve8_horiz;
         sf->highbd_predict[0][0][1] = aom_highbd_convolve8_avg_horiz;
diff --git a/av1/common/scale.h b/av1/common/scale.h
index ea81efa..e035075 100644
--- a/av1/common/scale.h
+++ b/av1/common/scale.h
@@ -19,6 +19,13 @@
 extern "C" {
 #endif
 
+#define SCALE_DENOMINATOR 16
+
+#define SCALE_SUBPEL_BITS 8
+#define SCALE_SUBPEL_SHIFTS (1 << SCALE_SUBPEL_BITS)
+#define SCALE_SUBPEL_MASK (SCALE_SUBPEL_SHIFTS - 1)
+#define SCALE_EXTRA_BITS (SCALE_SUBPEL_BITS - SUBPEL_BITS)
+
 #define REF_SCALE_SHIFT 14
 #define REF_NO_SCALE (1 << REF_SCALE_SHIFT)
 #define REF_INVALID_SCALE -1
diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c
index 637648a..43191e9 100644
--- a/av1/common/warped_motion.c
+++ b/av1/common/warped_motion.c
@@ -17,6 +17,7 @@
 
 #include "./av1_rtcd.h"
 #include "av1/common/warped_motion.h"
+#include "av1/common/scale.h"
 
 #define WARP_ERROR_BLOCK 32
 
@@ -1147,8 +1148,8 @@
     wm->wmmat[5] = wm->wmmat[2];
     wm->wmmat[4] = -wm->wmmat[3];
   }
-  if ((wm->wmtype == ROTZOOM || wm->wmtype == AFFINE) && x_scale == 16 &&
-      y_scale == 16) {
+  if ((wm->wmtype == ROTZOOM || wm->wmtype == AFFINE) &&
+      x_scale == SCALE_SUBPEL_SHIFTS && y_scale == SCALE_SUBPEL_SHIFTS) {
     const int32_t *const mat = wm->wmmat;
     const int16_t alpha = wm->alpha;
     const int16_t beta = wm->beta;
@@ -1579,8 +1580,8 @@
     wm->wmmat[5] = wm->wmmat[2];
     wm->wmmat[4] = -wm->wmmat[3];
   }
-  if ((wm->wmtype == ROTZOOM || wm->wmtype == AFFINE) && x_scale == 16 &&
-      y_scale == 16) {
+  if ((wm->wmtype == ROTZOOM || wm->wmtype == AFFINE) &&
+      x_scale == SCALE_SUBPEL_SHIFTS && y_scale == SCALE_SUBPEL_SHIFTS) {
     const int32_t *const mat = wm->wmmat;
     const int16_t alpha = wm->alpha;
     const int16_t beta = wm->beta;