Add correctness tests for the SSE2 warp filter

Also rename warp_affine() to av1_warp_affine()

Change-Id: I945baff6be8a1ea942ce88dfcfa5344af6b3a966
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 891d817..0805fd7 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -770,8 +770,8 @@
 
 if ((aom_config("CONFIG_WARPED_MOTION") eq "yes") ||
     (aom_config("CONFIG_GLOBAL_MOTION") eq "yes")) {
-  add_proto qw/void warp_affine/, "int32_t *mat, uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int ref_frm, int32_t alpha, int32_t beta, int32_t gamma, int32_t delta";
-  specialize qw/warp_affine sse2/;
+  add_proto qw/void av1_warp_affine/, "int32_t *mat, uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int ref_frm, int32_t alpha, int32_t beta, int32_t gamma, int32_t delta";
+  specialize qw/av1_warp_affine sse2/;
 }
 
 1;
diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c
index d6c5279..6056914 100644
--- a/av1/common/warped_motion.c
+++ b/av1/common/warped_motion.c
@@ -908,11 +908,12 @@
   return v;
 }
 
-void warp_affine_c(int32_t *mat, uint8_t *ref, int width, int height,
-                   int stride, uint8_t *pred, int p_col, int p_row, int p_width,
-                   int p_height, int p_stride, int subsampling_x,
-                   int subsampling_y, int ref_frm, int32_t alpha, int32_t beta,
-                   int32_t gamma, int32_t delta) {
+void av1_warp_affine_c(int32_t *mat, uint8_t *ref, int width, int height,
+                       int stride, uint8_t *pred, int p_col, int p_row,
+                       int p_width, int p_height, int p_stride,
+                       int subsampling_x, int subsampling_y, int ref_frm,
+                       int32_t alpha, int32_t beta, int32_t gamma,
+                       int32_t delta) {
   int16_t tmp[15 * 8];
   int i, j, k, l, m;
 
@@ -1070,9 +1071,9 @@
       return;
     }
 
-    warp_affine(mat, ref, width, height, stride, pred, p_col, p_row, p_width,
-                p_height, p_stride, subsampling_x, subsampling_y, ref_frm,
-                alpha, beta, gamma, delta);
+    av1_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
+                    p_width, p_height, p_stride, subsampling_x, subsampling_y,
+                    ref_frm, alpha, beta, gamma, delta);
   } else {
     warp_plane_old(wm, ref, width, height, stride, pred, p_col, p_row, p_width,
                    p_height, p_stride, subsampling_x, subsampling_y, x_scale,
diff --git a/av1/common/x86/warp_plane_sse2.c b/av1/common/x86/warp_plane_sse2.c
index 395797a..42c25c9 100644
--- a/av1/common/x86/warp_plane_sse2.c
+++ b/av1/common/x86/warp_plane_sse2.c
@@ -17,12 +17,12 @@
 const __m128i *const filter = (const __m128i *const)warped_filter;
 
 /* SSE2 version of the rotzoom/affine warp filter */
-void warp_affine_sse2(int32_t *mat, uint8_t *ref, int width, int height,
-                      int stride, uint8_t *pred, int p_col, int p_row,
-                      int p_width, int p_height, int p_stride,
-                      int subsampling_x, int subsampling_y, int ref_frm,
-                      int32_t alpha, int32_t beta, int32_t gamma,
-                      int32_t delta) {
+void av1_warp_affine_sse2(int32_t *mat, uint8_t *ref, int width, int height,
+                          int stride, uint8_t *pred, int p_col, int p_row,
+                          int p_width, int p_height, int p_stride,
+                          int subsampling_x, int subsampling_y, int ref_frm,
+                          int32_t alpha, int32_t beta, int32_t gamma,
+                          int32_t delta) {
   __m128i tmp[15];
   int i, j, k;
 
diff --git a/test/test.mk b/test/test.mk
index 85a90f1..c071480 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -202,6 +202,9 @@
 LIBAOM_TEST_SRCS-$(CONFIG_AV1) += av1_inv_txfm2d_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1) += av1_convolve_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1) += av1_convolve_optimz_test.cc
+ifneq ($(findstring yes,$(CONFIG_GLOBAL_MOTION) $(CONFIG_WARPED_MOTION)),)
+LIBAOM_TEST_SRCS-$(HAVE_SSE2) += warp_filter_test.cc
+endif
 
 TEST_INTRA_PRED_SPEED_SRCS-yes := test_intra_pred_speed.cc
 TEST_INTRA_PRED_SPEED_SRCS-yes += ../md5_utils.h ../md5_utils.c
diff --git a/test/warp_filter_test.cc b/test/warp_filter_test.cc
new file mode 100644
index 0000000..914ee74
--- /dev/null
+++ b/test/warp_filter_test.cc
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./av1_rtcd.h"
+#include "./aom_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+#include "av1/common/mv.h"
+
+using std::tr1::tuple;
+using std::tr1::make_tuple;
+using libaom_test::ACMRandom;
+
+typedef tuple<int, int, int> WarpTestParam;
+
+namespace {
+
+class AV1WarpFilterTest : public ::testing::TestWithParam<WarpTestParam> {
+ public:
+  virtual ~AV1WarpFilterTest() {}
+  virtual void SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  int32_t random_param(int bits) {
+    // 1 in 8 chance of generating zero (arbitrarily chosen)
+    if (((rnd_.Rand8()) & 7) == 0) return 0;
+    // Otherwise, enerate uniform values in the range
+    // [-(1 << bits), 1] U [1, 1<<bits]
+    int32_t v = 1 + (rnd_.Rand16() & ((1 << bits) - 1));
+    if ((rnd_.Rand8()) & 1) return -v;
+    return v;
+  }
+  void generate_model(int32_t *mat, int32_t *alpha, int32_t *beta,
+                      int32_t *gamma, int32_t *delta) {
+    while (1) {
+      mat[0] = random_param(WARPEDMODEL_PREC_BITS + 6);
+      mat[1] = random_param(WARPEDMODEL_PREC_BITS + 6);
+      mat[2] = (random_param(WARPEDMODEL_PREC_BITS - 3)) +
+               (1 << WARPEDMODEL_PREC_BITS);
+      mat[3] = random_param(WARPEDMODEL_PREC_BITS - 3);
+      // 50/50 chance of generating ROTZOOM vs. AFFINE models
+      if (rnd_.Rand8() & 1) {
+        // AFFINE
+        mat[4] = random_param(WARPEDMODEL_PREC_BITS - 3);
+        mat[5] = (random_param(WARPEDMODEL_PREC_BITS - 3)) +
+                 (1 << WARPEDMODEL_PREC_BITS);
+      } else {
+        mat[4] = -mat[3];
+        mat[5] = mat[2];
+      }
+
+      // Calculate the derived parameters and check that they are suitable
+      // for the warp filter.
+      assert(mat[2] != 0);
+
+      *alpha = mat[2] - (1 << WARPEDMODEL_PREC_BITS);
+      *beta = mat[3];
+      *gamma = ((int64_t)mat[4] << WARPEDMODEL_PREC_BITS) / mat[2];
+      *delta = mat[5] - (((int64_t)mat[3] * mat[4] + (mat[2] / 2)) / mat[2]) -
+               (1 << WARPEDMODEL_PREC_BITS);
+
+      if ((4 * abs(*alpha) + 7 * abs(*beta) > (1 << WARPEDMODEL_PREC_BITS)) ||
+          (4 * abs(*gamma) + 7 * abs(*delta) > (1 << WARPEDMODEL_PREC_BITS)))
+        continue;
+
+      // We have a valid model, so finish
+      return;
+    }
+  }
+
+  void RunCheckOutput() {
+    const int w = 128, h = 128;
+    const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
+    const int num_iters = GET_PARAM(2);
+    int i, j;
+    uint8_t *input = new uint8_t[w * h];
+    uint8_t *output = new uint8_t[out_w * out_h];
+    uint8_t *output2 = new uint8_t[out_w * out_h];
+    int32_t mat[8], alpha, beta, gamma, delta;
+
+    for (i = 0; i < h; ++i)
+      for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
+
+    /* Try different sizes of prediction block */
+    for (i = 0; i < num_iters; ++i) {
+      generate_model(mat, &alpha, &beta, &gamma, &delta);
+      av1_warp_affine_c(mat, input, w, h, w, output, 32, 32, out_w, out_h,
+                        out_w, 0, 0, 0, alpha, beta, gamma, delta);
+      av1_warp_affine_sse2(mat, input, w, h, w, output2, 32, 32, out_w, out_h,
+                           out_w, 0, 0, 0, alpha, beta, gamma, delta);
+
+      for (j = 0; j < out_w * out_h; ++j)
+        ASSERT_EQ(output[j], output2[j])
+            << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
+            << (j / out_w) << ") on iteration " << i;
+    }
+
+    delete[] input;
+    delete[] output;
+    delete[] output2;
+  }
+
+  ACMRandom rnd_;
+};
+
+TEST_P(AV1WarpFilterTest, CheckOutput) { RunCheckOutput(); }
+
+const WarpTestParam params[] = {
+  make_tuple(4, 4, 50000),  make_tuple(8, 8, 50000),  make_tuple(64, 64, 1000),
+  make_tuple(4, 16, 20000), make_tuple(32, 8, 10000),
+};
+
+INSTANTIATE_TEST_CASE_P(SSE2, AV1WarpFilterTest, ::testing::ValuesIn(params));
+
+}  // namespace