Reduce precision of shear parameters to 16 bits

Change-Id: I9cd9362edbb7b642f4b632bf574abfe5b2159ff3
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index e90c021..d08a888 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -668,11 +668,11 @@
 
 if ((aom_config("CONFIG_WARPED_MOTION") eq "yes") ||
     (aom_config("CONFIG_GLOBAL_MOTION") eq "yes")) {
-  add_proto qw/void av1_warp_affine/, "int32_t *mat, uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int ref_frm, int32_t alpha, int32_t beta, int32_t gamma, int32_t delta";
+  add_proto qw/void av1_warp_affine/, "int32_t *mat, uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int ref_frm, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
   specialize qw/av1_warp_affine sse2/;
 
   if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
-    add_proto qw/void av1_highbd_warp_affine/, "int32_t *mat, uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, int ref_frm, int32_t alpha, int32_t beta, int32_t gamma, int32_t delta";
+    add_proto qw/void av1_highbd_warp_affine/, "int32_t *mat, uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, int ref_frm, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
     specialize qw/av1_highbd_warp_affine ssse3/;
   }
 }
diff --git a/av1/common/mv.h b/av1/common/mv.h
index bcf5712..9731689 100644
--- a/av1/common/mv.h
+++ b/av1/common/mv.h
@@ -97,7 +97,7 @@
 typedef struct {
   TransformationType wmtype;
   int32_t wmmat[8];
-  int32_t alpha, beta, gamma, delta;
+  int16_t alpha, beta, gamma, delta;
 } WarpedMotionParams;
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
 
diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c
index 04679bf..ad9bfc9 100644
--- a/av1/common/warped_motion.c
+++ b/av1/common/warped_motion.c
@@ -667,8 +667,8 @@
   return (mat[2] > 0);
 }
 
-static int is_affine_shear_allowed(int32_t alpha, int32_t beta, int32_t gamma,
-                                   int32_t delta) {
+static int is_affine_shear_allowed(int16_t alpha, int16_t beta, int16_t gamma,
+                                   int16_t delta) {
   if ((4 * abs(alpha) + 7 * abs(beta) > (1 << WARPEDMODEL_PREC_BITS)) ||
       (4 * abs(gamma) + 4 * abs(delta) > (1 << WARPEDMODEL_PREC_BITS)))
     return 0;
@@ -680,16 +680,19 @@
 int get_shear_params(WarpedMotionParams *wm) {
   const int32_t *mat = wm->wmmat;
   if (!is_affine_valid(wm)) return 0;
-  wm->alpha = mat[2] - (1 << WARPEDMODEL_PREC_BITS);
-  wm->beta = mat[3];
+  wm->alpha =
+      clamp(mat[2] - (1 << WARPEDMODEL_PREC_BITS), INT16_MIN, INT16_MAX);
+  wm->beta = clamp(mat[3], INT16_MIN, INT16_MAX);
   int16_t shift;
   int16_t y = resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
   int64_t v;
   v = ((int64_t)mat[4] << WARPEDMODEL_PREC_BITS) * y;
-  wm->gamma = ROUND_POWER_OF_TWO_SIGNED_64(v, shift);
+  wm->gamma =
+      clamp(ROUND_POWER_OF_TWO_SIGNED_64(v, shift), INT16_MIN, INT16_MAX);
   v = ((int64_t)mat[3] * mat[4]) * y;
-  wm->delta = mat[5] - ROUND_POWER_OF_TWO_SIGNED_64(v, shift) -
-              (1 << WARPEDMODEL_PREC_BITS);
+  wm->delta = clamp(mat[5] - ROUND_POWER_OF_TWO_SIGNED_64(v, shift) -
+                        (1 << WARPEDMODEL_PREC_BITS),
+                    INT16_MIN, INT16_MAX);
   if (!is_affine_shear_allowed(wm->alpha, wm->beta, wm->gamma, wm->delta))
     return 0;
   return 1;
@@ -872,8 +875,8 @@
                               int p_row, int p_width, int p_height,
                               int p_stride, int subsampling_x,
                               int subsampling_y, int bd, int ref_frm,
-                              int32_t alpha, int32_t beta, int32_t gamma,
-                              int32_t delta) {
+                              int16_t alpha, int16_t beta, int16_t gamma,
+                              int16_t delta) {
 #if HORSHEAR_REDUCE_PREC_BITS >= 5
   int16_t tmp[15 * 8];
 #else
@@ -1001,10 +1004,10 @@
   if ((wm->wmtype == ROTZOOM || wm->wmtype == AFFINE) && x_scale == 16 &&
       y_scale == 16) {
     int32_t *mat = wm->wmmat;
-    const int32_t alpha = wm->alpha;
-    const int32_t beta = wm->beta;
-    const int32_t gamma = wm->gamma;
-    const int32_t delta = wm->delta;
+    const int16_t alpha = wm->alpha;
+    const int16_t beta = wm->beta;
+    const int16_t gamma = wm->gamma;
+    const int16_t delta = wm->delta;
 
     uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
     uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
@@ -1121,8 +1124,8 @@
                        int stride, uint8_t *pred, int p_col, int p_row,
                        int p_width, int p_height, int p_stride,
                        int subsampling_x, int subsampling_y, int ref_frm,
-                       int32_t alpha, int32_t beta, int32_t gamma,
-                       int32_t delta) {
+                       int16_t alpha, int16_t beta, int16_t gamma,
+                       int16_t delta) {
   int16_t tmp[15 * 8];
   int i, j, k, l, m;
 
@@ -1257,10 +1260,10 @@
   if ((wm->wmtype == ROTZOOM || wm->wmtype == AFFINE) && x_scale == 16 &&
       y_scale == 16) {
     int32_t *mat = wm->wmmat;
-    const int32_t alpha = wm->alpha;
-    const int32_t beta = wm->beta;
-    const int32_t gamma = wm->gamma;
-    const int32_t delta = wm->delta;
+    const int16_t alpha = wm->alpha;
+    const int16_t beta = wm->beta;
+    const int16_t gamma = wm->gamma;
+    const int16_t delta = wm->delta;
 
     av1_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
                     p_width, p_height, p_stride, subsampling_x, subsampling_y,
diff --git a/av1/common/x86/warp_plane_sse2.c b/av1/common/x86/warp_plane_sse2.c
index d7253be..9e314b0 100644
--- a/av1/common/x86/warp_plane_sse2.c
+++ b/av1/common/x86/warp_plane_sse2.c
@@ -21,8 +21,8 @@
                           int stride, uint8_t *pred, int p_col, int p_row,
                           int p_width, int p_height, int p_stride,
                           int subsampling_x, int subsampling_y, int ref_frm,
-                          int32_t alpha, int32_t beta, int32_t gamma,
-                          int32_t delta) {
+                          int16_t alpha, int16_t beta, int16_t gamma,
+                          int16_t delta) {
   __m128i tmp[15];
   int i, j, k;
 
diff --git a/test/warp_filter_test_util.cc b/test/warp_filter_test_util.cc
index 25d1e07..f7409ea 100644
--- a/test/warp_filter_test_util.cc
+++ b/test/warp_filter_test_util.cc
@@ -47,9 +47,9 @@
   return v;
 }
 
-void AV1WarpFilterTest::generate_model(int32_t *mat, int32_t *alpha,
-                                       int32_t *beta, int32_t *gamma,
-                                       int32_t *delta) {
+void AV1WarpFilterTest::generate_model(int32_t *mat, int16_t *alpha,
+                                       int16_t *beta, int16_t *gamma,
+                                       int16_t *delta) {
   while (1) {
     mat[0] = random_param(WARPEDMODEL_PREC_BITS + 6);
     mat[1] = random_param(WARPEDMODEL_PREC_BITS + 6);
@@ -71,11 +71,14 @@
     // for the warp filter.
     assert(mat[2] != 0);
 
-    *alpha = mat[2] - (1 << WARPEDMODEL_PREC_BITS);
-    *beta = mat[3];
-    *gamma = ((int64_t)mat[4] << WARPEDMODEL_PREC_BITS) / mat[2];
-    *delta = mat[5] - (((int64_t)mat[3] * mat[4] + (mat[2] / 2)) / mat[2]) -
-             (1 << WARPEDMODEL_PREC_BITS);
+    *alpha = clamp(mat[2] - (1 << WARPEDMODEL_PREC_BITS), INT16_MIN, INT16_MAX);
+    *beta = clamp(mat[3], INT16_MIN, INT16_MAX);
+    *gamma = clamp(((int64_t)mat[4] << WARPEDMODEL_PREC_BITS) / mat[2],
+                   INT16_MIN, INT16_MAX);
+    *delta =
+        clamp(mat[5] - (((int64_t)mat[3] * mat[4] + (mat[2] / 2)) / mat[2]) -
+                  (1 << WARPEDMODEL_PREC_BITS),
+              INT16_MIN, INT16_MAX);
 
     if ((4 * abs(*alpha) + 7 * abs(*beta) > (1 << WARPEDMODEL_PREC_BITS)) ||
         (4 * abs(*gamma) + 4 * abs(*delta) > (1 << WARPEDMODEL_PREC_BITS)))
@@ -98,7 +101,8 @@
   uint8_t *input = input_ + border;
   uint8_t *output = new uint8_t[out_w * out_h];
   uint8_t *output2 = new uint8_t[out_w * out_h];
-  int32_t mat[8], alpha, beta, gamma, delta;
+  int32_t mat[8];
+  int16_t alpha, beta, gamma, delta;
 
   // Generate an input block and extend its borders horizontally
   for (i = 0; i < h; ++i)
@@ -159,9 +163,9 @@
   return v;
 }
 
-void AV1HighbdWarpFilterTest::generate_model(int32_t *mat, int32_t *alpha,
-                                             int32_t *beta, int32_t *gamma,
-                                             int32_t *delta) {
+void AV1HighbdWarpFilterTest::generate_model(int32_t *mat, int16_t *alpha,
+                                             int16_t *beta, int16_t *gamma,
+                                             int16_t *delta) {
   while (1) {
     mat[0] = random_param(WARPEDMODEL_PREC_BITS + 6);
     mat[1] = random_param(WARPEDMODEL_PREC_BITS + 6);
@@ -183,11 +187,14 @@
     // for the warp filter.
     assert(mat[2] != 0);
 
-    *alpha = mat[2] - (1 << WARPEDMODEL_PREC_BITS);
-    *beta = mat[3];
-    *gamma = ((int64_t)mat[4] << WARPEDMODEL_PREC_BITS) / mat[2];
-    *delta = mat[5] - (((int64_t)mat[3] * mat[4] + (mat[2] / 2)) / mat[2]) -
-             (1 << WARPEDMODEL_PREC_BITS);
+    *alpha = clamp(mat[2] - (1 << WARPEDMODEL_PREC_BITS), INT16_MIN, INT16_MAX);
+    *beta = clamp(mat[3], INT16_MIN, INT16_MAX);
+    *gamma = clamp(((int64_t)mat[4] << WARPEDMODEL_PREC_BITS) / mat[2],
+                   INT16_MIN, INT16_MAX);
+    *delta =
+        clamp(mat[5] - (((int64_t)mat[3] * mat[4] + (mat[2] / 2)) / mat[2]) -
+                  (1 << WARPEDMODEL_PREC_BITS),
+              INT16_MIN, INT16_MAX);
 
     if ((4 * abs(*alpha) + 7 * abs(*beta) > (1 << WARPEDMODEL_PREC_BITS)) ||
         (4 * abs(*gamma) + 4 * abs(*delta) > (1 << WARPEDMODEL_PREC_BITS)))
@@ -213,7 +220,8 @@
   uint16_t *input = input_ + border;
   uint16_t *output = new uint16_t[out_w * out_h];
   uint16_t *output2 = new uint16_t[out_w * out_h];
-  int32_t mat[8], alpha, beta, gamma, delta;
+  int32_t mat[8];
+  int16_t alpha, beta, gamma, delta;
 
   // Generate an input block and extend its borders horizontally
   for (i = 0; i < h; ++i)
diff --git a/test/warp_filter_test_util.h b/test/warp_filter_test_util.h
index facd8cb..1314058 100644
--- a/test/warp_filter_test_util.h
+++ b/test/warp_filter_test_util.h
@@ -30,8 +30,8 @@
                                  int height, int stride, uint8_t *pred,
                                  int p_col, int p_row, int p_width,
                                  int p_height, int p_stride, int subsampling_x,
-                                 int subsampling_y, int ref_frm, int32_t alpha,
-                                 int32_t beta, int32_t gamma, int32_t delta);
+                                 int subsampling_y, int ref_frm, int16_t alpha,
+                                 int16_t beta, int16_t gamma, int16_t delta);
 
 typedef std::tr1::tuple<int, int, int> WarpTestParam;
 
@@ -46,8 +46,8 @@
 
  protected:
   int32_t random_param(int bits);
-  void generate_model(int32_t *mat, int32_t *alpha, int32_t *beta,
-                      int32_t *gamma, int32_t *delta);
+  void generate_model(int32_t *mat, int16_t *alpha, int16_t *beta,
+                      int16_t *gamma, int16_t *delta);
 
   void RunCheckOutput(warp_affine_func test_impl);
 
@@ -62,7 +62,7 @@
     int32_t *mat, uint16_t *ref, int width, int height, int stride,
     uint16_t *pred, int p_col, int p_row, int p_width, int p_height,
     int p_stride, int subsampling_x, int subsampling_y, int bd, int ref_frm,
-    int32_t alpha, int32_t beta, int32_t gamma, int32_t delta);
+    int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
 
 typedef std::tr1::tuple<int, int, int, int> HighbdWarpTestParam;
 
@@ -78,8 +78,8 @@
 
  protected:
   int32_t random_param(int bits);
-  void generate_model(int32_t *mat, int32_t *alpha, int32_t *beta,
-                      int32_t *gamma, int32_t *delta);
+  void generate_model(int32_t *mat, int16_t *alpha, int16_t *beta,
+                      int16_t *gamma, int16_t *delta);
 
   void RunCheckOutput(highbd_warp_affine_func test_impl);