Prepare for vectorizing highbd warp filter

This applies the same refactorings to highbd_warp_plane
which were applied to warp_plane a while ago, and lays the
groundwork for the relevant tests.

Change-Id: Ic4c00bce1accc5a3624bba0c3b4b325e69a42c1a
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index b600c66..b900d4a 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -670,6 +670,10 @@
     (aom_config("CONFIG_GLOBAL_MOTION") eq "yes")) {
   add_proto qw/void av1_warp_affine/, "int32_t *mat, uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int ref_frm, int32_t alpha, int32_t beta, int32_t gamma, int32_t delta";
   specialize qw/av1_warp_affine sse2/;
+
+  if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
+    add_proto qw/void av1_highbd_warp_affine/, "int32_t *mat, uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, int ref_frm, int32_t alpha, int32_t beta, int32_t gamma, int32_t delta";
+  }
 }
 
 # LOOP_RESTORATION functions
diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c
index 1aecd56..94cf5f9 100644
--- a/av1/common/warped_motion.c
+++ b/av1/common/warped_motion.c
@@ -618,6 +618,14 @@
   8240,  8224,  8208,  8192,
 };
 
+static inline int16_t saturate_int16(int32_t v) {
+  if (v > 32767)
+    return 32767;
+  else if (v < -32768)
+    return -32768;
+  return v;
+}
+
 #if CONFIG_WARPED_MOTION
 // Decomposes a divisor D such that 1/D = y/2^shift, where y is returned
 // at precision of DIV_LUT_PREC_BITS along with the shift.
@@ -848,6 +856,138 @@
 
 // Note: For an explanation of the warp algorithm, see the comment
 // above warp_plane()
+//
+// Note also: The "worst case" in terms of modulus of the data stored into 'tmp'
+// (ie, the result of 'sum' in the horizontal filter) occurs when:
+// coeffs = { -2,   8, -22,  87,  72, -21,   8, -2}, and
+// ref =    {  0, 255,   0, 255, 255,   0, 255,  0}
+// Before rounding, this gives sum = 716625. After rounding,
+// HORSHEAR_REDUCE_PREC_BITS = 4 => sum = 44789 > 2^15
+// HORSHEAR_REDUCE_PREC_BITS = 5 => sum = 22395 < 2^15
+//
+// So, as long as HORSHEAR_REDUCE_PREC_BITS >= 5, we can safely use a 16-bit
+// intermediate array.
+void av1_highbd_warp_affine_c(int32_t *mat, uint16_t *ref, int width,
+                              int height, int stride, uint16_t *pred, int p_col,
+                              int p_row, int p_width, int p_height,
+                              int p_stride, int subsampling_x,
+                              int subsampling_y, int bd, int ref_frm,
+                              int32_t alpha, int32_t beta, int32_t gamma,
+                              int32_t delta) {
+#if HORSHEAR_REDUCE_PREC_BITS >= 5
+  int16_t tmp[15 * 8];
+#else
+  int32_t tmp[15 * 8];
+#endif
+  int i, j, k, l, m;
+
+  /* Note: For this code to work, the left/right frame borders need to be
+     extended by at least 13 pixels each. By the time we get here, other
+     code will have set up this border, but we allow an explicit check
+     for debugging purposes.
+  */
+  /*for (i = 0; i < height; ++i) {
+    for (j = 0; j < 13; ++j) {
+      assert(ref[i * stride - 13 + j] == ref[i * stride]);
+      assert(ref[i * stride + width + j] == ref[i * stride + (width - 1)]);
+    }
+  }*/
+
+  for (i = p_row; i < p_row + p_height; i += 8) {
+    for (j = p_col; j < p_col + p_width; j += 8) {
+      int32_t x4, y4, ix4, sx4, iy4, sy4;
+      if (subsampling_x)
+        x4 = ROUND_POWER_OF_TWO_SIGNED(
+            mat[2] * 2 * (j + 4) + mat[3] * 2 * (i + 4) + mat[0] +
+                (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
+            1);
+      else
+        x4 = mat[2] * (j + 4) + mat[3] * (i + 4) + mat[0];
+
+      if (subsampling_y)
+        y4 = ROUND_POWER_OF_TWO_SIGNED(
+            mat[4] * 2 * (j + 4) + mat[5] * 2 * (i + 4) + mat[1] +
+                (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
+            1);
+      else
+        y4 = mat[4] * (j + 4) + mat[5] * (i + 4) + mat[1];
+
+      ix4 = x4 >> WARPEDMODEL_PREC_BITS;
+      sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
+      iy4 = y4 >> WARPEDMODEL_PREC_BITS;
+      sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
+
+      // Horizontal filter
+      for (k = -7; k < 8; ++k) {
+        int iy = iy4 + k;
+        if (iy < 0)
+          iy = 0;
+        else if (iy > height - 1)
+          iy = height - 1;
+
+        if (ix4 <= -7) {
+          for (l = 0; l < 8; ++l) {
+            tmp[(k + 7) * 8 + l] =
+                ref[iy * stride] *
+                (1 << (WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS));
+          }
+        } else if (ix4 >= width + 6) {
+          for (l = 0; l < 8; ++l) {
+            tmp[(k + 7) * 8 + l] =
+                ref[iy * stride + (width - 1)] *
+                (1 << (WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS));
+          }
+        } else {
+          int sx = sx4 + alpha * (-4) + beta * k;
+
+          for (l = -4; l < 4; ++l) {
+            int ix = ix4 + l - 3;
+            const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) +
+                             WARPEDPIXEL_PREC_SHIFTS;
+            const int16_t *coeffs = warped_filter[offs];
+            int32_t sum = 0;
+            // assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
+            for (m = 0; m < 8; ++m) {
+              sum += ref[iy * stride + ix + m] * coeffs[m];
+            }
+            sum = ROUND_POWER_OF_TWO(sum, HORSHEAR_REDUCE_PREC_BITS);
+#if HORSHEAR_REDUCE_PREC_BITS >= 5
+            tmp[(k + 7) * 8 + (l + 4)] = saturate_int16(sum);
+#else
+            tmp[(k + 7) * 8 + (l + 4)] = sum;
+#endif
+            sx += alpha;
+          }
+        }
+      }
+
+      // Vertical filter
+      for (k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) {
+        int sy = sy4 + gamma * (-4) + delta * k;
+        for (l = -4; l < 4; ++l) {
+          uint16_t *p =
+              &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
+          const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) +
+                           WARPEDPIXEL_PREC_SHIFTS;
+          const int16_t *coeffs = warped_filter[offs];
+          int32_t sum = 0;
+          // assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
+          for (m = 0; m < 8; ++m) {
+            sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
+          }
+          sum = clip_pixel_highbd(
+              ROUND_POWER_OF_TWO(sum, VERSHEAR_REDUCE_PREC_BITS), bd);
+          if (ref_frm)
+            *p = ROUND_POWER_OF_TWO(*p + sum, 1);
+          else
+            *p = sum;
+          sy += gamma;
+        }
+      }
+    }
+  }
+}
+
 static void highbd_warp_plane(WarpedMotionParams *wm, uint8_t *ref8, int width,
                               int height, int stride, uint8_t *pred8, int p_col,
                               int p_row, int p_width, int p_height,
@@ -858,91 +998,20 @@
     wm->wmmat[5] = wm->wmmat[2];
     wm->wmmat[4] = -wm->wmmat[3];
   }
-  if (wm->wmtype == ROTZOOM || wm->wmtype == AFFINE) {
-    int32_t tmp[15 * 8];
-    int i, j, k, l, m;
+  if ((wm->wmtype == ROTZOOM || wm->wmtype == AFFINE) && x_scale == 16 &&
+      y_scale == 16) {
     int32_t *mat = wm->wmmat;
-    uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
-    uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
-
     const int32_t alpha = wm->alpha;
     const int32_t beta = wm->beta;
     const int32_t gamma = wm->gamma;
     const int32_t delta = wm->delta;
 
-    for (i = p_row; i < p_row + p_height; i += 8) {
-      for (j = p_col; j < p_col + p_width; j += 8) {
-        int32_t x4, y4, ix4, sx4, iy4, sy4;
-        if (subsampling_x)
-          x4 = ROUND_POWER_OF_TWO_SIGNED(
-              mat[2] * 2 * (j + 4) + mat[3] * 2 * (i + 4) + mat[0] +
-                  (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
-              1);
-        else
-          x4 = mat[2] * (j + 4) + mat[3] * (i + 4) + mat[0];
-
-        if (subsampling_y)
-          y4 = ROUND_POWER_OF_TWO_SIGNED(
-              mat[4] * 2 * (j + 4) + mat[5] * 2 * (i + 4) + mat[1] +
-                  (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
-              1);
-        else
-          y4 = mat[4] * (j + 4) + mat[5] * (i + 4) + mat[1];
-
-        ix4 = x4 >> WARPEDMODEL_PREC_BITS;
-        sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
-        iy4 = y4 >> WARPEDMODEL_PREC_BITS;
-        sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
-
-        // Horizontal filter
-        for (k = -7; k < 8; ++k) {
-          int iy = iy4 + k;
-          if (iy < 0)
-            iy = 0;
-          else if (iy > height - 1)
-            iy = height - 1;
-
-          for (l = -4; l < 4; ++l) {
-            int ix = ix4 + l;
-            int sx = ROUND_POWER_OF_TWO_SIGNED(sx4 + alpha * l + beta * k,
-                                               WARPEDDIFF_PREC_BITS);
-            const int16_t *coeffs = warped_filter[sx + WARPEDPIXEL_PREC_SHIFTS];
-            int32_t sum = 0;
-            for (m = 0; m < 8; ++m) {
-              if (ix + m - 3 < 0)
-                sum += ref[iy * stride] * coeffs[m];
-              else if (ix + m - 3 > width - 1)
-                sum += ref[iy * stride + width - 1] * coeffs[m];
-              else
-                sum += ref[iy * stride + ix + m - 3] * coeffs[m];
-            }
-            sum = ROUND_POWER_OF_TWO(sum, HORSHEAR_REDUCE_PREC_BITS);
-            tmp[(k + 7) * 8 + (l + 4)] = sum;
-          }
-        }
-
-        // Vertical filter
-        for (k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) {
-          for (l = -4; l < AOMMIN(4, p_col + p_width - j - 4); ++l) {
-            uint16_t *p =
-                &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
-            int sy = ROUND_POWER_OF_TWO_SIGNED(sy4 + gamma * l + delta * k,
-                                               WARPEDDIFF_PREC_BITS);
-            const int16_t *coeffs = warped_filter[sy + WARPEDPIXEL_PREC_SHIFTS];
-            int32_t sum = 0;
-            for (m = 0; m < 8; ++m) {
-              sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
-            }
-            sum = clip_pixel_highbd(
-                ROUND_POWER_OF_TWO_SIGNED(sum, VERSHEAR_REDUCE_PREC_BITS), bd);
-            if (ref_frm)
-              *p = ROUND_POWER_OF_TWO_SIGNED(*p + sum, 1);
-            else
-              *p = sum;
-          }
-        }
-      }
-    }
+    uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+    uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
+    av1_highbd_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
+                           p_width, p_height, p_stride, subsampling_x,
+                           subsampling_y, bd, ref_frm, alpha, beta, gamma,
+                           delta);
   } else {
     highbd_warp_plane_old(wm, ref8, width, height, stride, pred8, p_col, p_row,
                           p_width, p_height, p_stride, subsampling_x,
@@ -1048,14 +1117,6 @@
 
    TODO(david.barker): Maybe support scaled references?
 */
-static inline int16_t saturate_int16(int32_t v) {
-  if (v > 32767)
-    return 32767;
-  else if (v < -32768)
-    return -32768;
-  return v;
-}
-
 void av1_warp_affine_c(int32_t *mat, uint8_t *ref, int width, int height,
                        int stride, uint8_t *pred, int p_col, int p_row,
                        int p_width, int p_height, int p_stride,
diff --git a/test/warp_filter_test.cc b/test/warp_filter_test.cc
index 1d36a3f..4eea6c3 100644
--- a/test/warp_filter_test.cc
+++ b/test/warp_filter_test.cc
@@ -16,6 +16,9 @@
 using std::tr1::make_tuple;
 using libaom_test::ACMRandom;
 using libaom_test::AV1WarpFilter::AV1WarpFilterTest;
+#if CONFIG_AOM_HIGHBITDEPTH
+using libaom_test::AV1HighbdWarpFilter::AV1HighbdWarpFilterTest;
+#endif
 
 namespace {
 
diff --git a/test/warp_filter_test_util.cc b/test/warp_filter_test_util.cc
index d0966b9..25d1e07 100644
--- a/test/warp_filter_test_util.cc
+++ b/test/warp_filter_test_util.cc
@@ -17,6 +17,10 @@
 using libaom_test::ACMRandom;
 using libaom_test::AV1WarpFilter::AV1WarpFilterTest;
 using libaom_test::AV1WarpFilter::WarpTestParam;
+#if CONFIG_AOM_HIGHBITDEPTH
+using libaom_test::AV1HighbdWarpFilter::AV1HighbdWarpFilterTest;
+using libaom_test::AV1HighbdWarpFilter::HighbdWarpTestParam;
+#endif
 
 ::testing::internal::ParamGenerator<WarpTestParam>
 libaom_test::AV1WarpFilter::GetDefaultParams() {
@@ -42,6 +46,7 @@
   if ((rnd_.Rand8()) & 1) return -v;
   return v;
 }
+
 void AV1WarpFilterTest::generate_model(int32_t *mat, int32_t *alpha,
                                        int32_t *beta, int32_t *gamma,
                                        int32_t *delta) {
@@ -73,7 +78,7 @@
              (1 << WARPEDMODEL_PREC_BITS);
 
     if ((4 * abs(*alpha) + 7 * abs(*beta) > (1 << WARPEDMODEL_PREC_BITS)) ||
-        (4 * abs(*gamma) + 7 * abs(*delta) > (1 << WARPEDMODEL_PREC_BITS)))
+        (4 * abs(*gamma) + 4 * abs(*delta) > (1 << WARPEDMODEL_PREC_BITS)))
       continue;
 
     // We have a valid model, so finish
@@ -103,7 +108,6 @@
     memset(input + i * stride + w, input[i * stride + (w - 1)], border);
   }
 
-  /* Try different sizes of prediction block */
   for (i = 0; i < num_iters; ++i) {
     for (sub_x = 0; sub_x < 2; ++sub_x)
       for (sub_y = 0; sub_y < 2; ++sub_y) {
@@ -121,3 +125,122 @@
       }
   }
 }
+
+#if CONFIG_AOM_HIGHBITDEPTH
+::testing::internal::ParamGenerator<HighbdWarpTestParam>
+libaom_test::AV1HighbdWarpFilter::GetDefaultParams() {
+  const HighbdWarpTestParam defaultParams[] = {
+    make_tuple(4, 4, 50000, 8),   make_tuple(8, 8, 50000, 8),
+    make_tuple(64, 64, 1000, 8),  make_tuple(4, 16, 20000, 8),
+    make_tuple(32, 8, 10000, 8),  make_tuple(4, 4, 50000, 10),
+    make_tuple(8, 8, 50000, 10),  make_tuple(64, 64, 1000, 10),
+    make_tuple(4, 16, 20000, 10), make_tuple(32, 8, 10000, 10),
+    make_tuple(4, 4, 50000, 12),  make_tuple(8, 8, 50000, 12),
+    make_tuple(64, 64, 1000, 12), make_tuple(4, 16, 20000, 12),
+    make_tuple(32, 8, 10000, 12),
+  };
+  return ::testing::ValuesIn(defaultParams);
+}
+
+AV1HighbdWarpFilterTest::~AV1HighbdWarpFilterTest() {}
+void AV1HighbdWarpFilterTest::SetUp() {
+  rnd_.Reset(ACMRandom::DeterministicSeed());
+}
+
+void AV1HighbdWarpFilterTest::TearDown() { libaom_test::ClearSystemState(); }
+
+int32_t AV1HighbdWarpFilterTest::random_param(int bits) {
+  // 1 in 8 chance of generating zero (arbitrarily chosen)
+  if (((rnd_.Rand8()) & 7) == 0) return 0;
+  // Otherwise, enerate uniform values in the range
+  // [-(1 << bits), 1] U [1, 1<<bits]
+  int32_t v = 1 + (rnd_.Rand16() & ((1 << bits) - 1));
+  if ((rnd_.Rand8()) & 1) return -v;
+  return v;
+}
+
+void AV1HighbdWarpFilterTest::generate_model(int32_t *mat, int32_t *alpha,
+                                             int32_t *beta, int32_t *gamma,
+                                             int32_t *delta) {
+  while (1) {
+    mat[0] = random_param(WARPEDMODEL_PREC_BITS + 6);
+    mat[1] = random_param(WARPEDMODEL_PREC_BITS + 6);
+    mat[2] = (random_param(WARPEDMODEL_PREC_BITS - 3)) +
+             (1 << WARPEDMODEL_PREC_BITS);
+    mat[3] = random_param(WARPEDMODEL_PREC_BITS - 3);
+    // 50/50 chance of generating ROTZOOM vs. AFFINE models
+    if (rnd_.Rand8() & 1) {
+      // AFFINE
+      mat[4] = random_param(WARPEDMODEL_PREC_BITS - 3);
+      mat[5] = (random_param(WARPEDMODEL_PREC_BITS - 3)) +
+               (1 << WARPEDMODEL_PREC_BITS);
+    } else {
+      mat[4] = -mat[3];
+      mat[5] = mat[2];
+    }
+
+    // Calculate the derived parameters and check that they are suitable
+    // for the warp filter.
+    assert(mat[2] != 0);
+
+    *alpha = mat[2] - (1 << WARPEDMODEL_PREC_BITS);
+    *beta = mat[3];
+    *gamma = ((int64_t)mat[4] << WARPEDMODEL_PREC_BITS) / mat[2];
+    *delta = mat[5] - (((int64_t)mat[3] * mat[4] + (mat[2] / 2)) / mat[2]) -
+             (1 << WARPEDMODEL_PREC_BITS);
+
+    if ((4 * abs(*alpha) + 7 * abs(*beta) > (1 << WARPEDMODEL_PREC_BITS)) ||
+        (4 * abs(*gamma) + 4 * abs(*delta) > (1 << WARPEDMODEL_PREC_BITS)))
+      continue;
+
+    // We have a valid model, so finish
+    return;
+  }
+}
+
+void AV1HighbdWarpFilterTest::RunCheckOutput(
+    highbd_warp_affine_func test_impl) {
+  const int w = 128, h = 128;
+  const int border = 16;
+  const int stride = w + 2 * border;
+  const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
+  const int num_iters = GET_PARAM(2);
+  const int bd = GET_PARAM(3);
+  const int mask = (1 << bd) - 1;
+  int i, j, sub_x, sub_y;
+
+  uint16_t *input_ = new uint16_t[h * stride];
+  uint16_t *input = input_ + border;
+  uint16_t *output = new uint16_t[out_w * out_h];
+  uint16_t *output2 = new uint16_t[out_w * out_h];
+  int32_t mat[8], alpha, beta, gamma, delta;
+
+  // Generate an input block and extend its borders horizontally
+  for (i = 0; i < h; ++i)
+    for (j = 0; j < w; ++j) input[i * stride + j] = rnd_.Rand16() & mask;
+  for (i = 0; i < h; ++i) {
+    for (j = 0; j < border; ++j) {
+      input[i * stride - border + j] = input[i * stride];
+      input[i * stride + w + j] = input[i * stride + (w - 1)];
+    }
+  }
+
+  for (i = 0; i < num_iters; ++i) {
+    for (sub_x = 0; sub_x < 2; ++sub_x)
+      for (sub_y = 0; sub_y < 2; ++sub_y) {
+        generate_model(mat, &alpha, &beta, &gamma, &delta);
+
+        av1_highbd_warp_affine_c(mat, input, w, h, stride, output, 32, 32,
+                                 out_w, out_h, out_w, sub_x, sub_y, bd, 0,
+                                 alpha, beta, gamma, delta);
+        test_impl(mat, input, w, h, stride, output2, 32, 32, out_w, out_h,
+                  out_w, sub_x, sub_y, bd, 0, alpha, beta, gamma, delta);
+
+        for (j = 0; j < out_w * out_h; ++j)
+          ASSERT_EQ(output[j], output2[j])
+              << "Pixel mismatch at index " << j << " = (" << (j % out_w)
+              << ", " << (j / out_w) << ") on iteration " << i;
+      }
+  }
+}
+#endif  // CONFIG_AOM_HIGHBITDEPTH
diff --git a/test/warp_filter_test_util.h b/test/warp_filter_test_util.h
index 69dd14b..facd8cb 100644
--- a/test/warp_filter_test_util.h
+++ b/test/warp_filter_test_util.h
@@ -56,6 +56,39 @@
 
 }  // namespace AV1WarpFilter
 
+#if CONFIG_AOM_HIGHBITDEPTH
+namespace AV1HighbdWarpFilter {
+typedef void (*highbd_warp_affine_func)(
+    int32_t *mat, uint16_t *ref, int width, int height, int stride,
+    uint16_t *pred, int p_col, int p_row, int p_width, int p_height,
+    int p_stride, int subsampling_x, int subsampling_y, int bd, int ref_frm,
+    int32_t alpha, int32_t beta, int32_t gamma, int32_t delta);
+
+typedef std::tr1::tuple<int, int, int, int> HighbdWarpTestParam;
+
+::testing::internal::ParamGenerator<HighbdWarpTestParam> GetDefaultParams();
+
+class AV1HighbdWarpFilterTest
+    : public ::testing::TestWithParam<HighbdWarpTestParam> {
+ public:
+  virtual ~AV1HighbdWarpFilterTest();
+  virtual void SetUp();
+
+  virtual void TearDown();
+
+ protected:
+  int32_t random_param(int bits);
+  void generate_model(int32_t *mat, int32_t *alpha, int32_t *beta,
+                      int32_t *gamma, int32_t *delta);
+
+  void RunCheckOutput(highbd_warp_affine_func test_impl);
+
+  libaom_test::ACMRandom rnd_;
+};
+
+}  // namespace AV1HighbdWarpFilter
+#endif  // CONFIG_AOM_HIGHBITDEPTH
+
 }  // namespace libaom_test
 
 #endif  // TEST_WARP_FILTER_TEST_UTIL_H_