test/warp_filter_test_util.cc - aom - Git at Google

 /*
  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
  *
  * This source code is subject to the terms of the BSD 2 Clause License and
  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
  * was not distributed with this source code in the LICENSE file, you can
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 #include <memory>
 #include <new>

 #include "aom_ports/aom_timer.h"
 #include "test/warp_filter_test_util.h"

 using std::make_tuple;
 using std::tuple;

 namespace libaom_test {

 int32_t random_warped_param(libaom_test::ACMRandom *rnd, int bits,
                             int rnd_gen_zeros) {
   // Avoid accidentally generating a zero in speed tests, they are set by the
   // is_*_zero parameters instead.
   if (rnd_gen_zeros) {
     // 1 in 8 chance of generating zero (arbitrarily chosen)
     if (((rnd->Rand8()) & 7) == 0) return 0;
   }
   // Otherwise, enerate uniform values in the range
   // [-(1 << bits), 1] U [1, 1<<bits]
   int32_t v = 1 + (rnd->Rand16() & ((1 << bits) - 1));
   if ((rnd->Rand8()) & 1) return -v;
   return v;
 }

 void generate_warped_model(libaom_test::ACMRandom *rnd, int32_t *mat,
                            int16_t *alpha, int16_t *beta, int16_t *gamma,
                            int16_t *delta, const int is_alpha_zero,
                            const int is_beta_zero, const int is_gamma_zero,
                            const int is_delta_zero, const int rnd_gen_zeros) {
   while (true) {
     int rnd8 = rnd->Rand8() & 3;
     mat[0] = random_warped_param(rnd, WARPEDMODEL_PREC_BITS + 6, rnd_gen_zeros);
     mat[1] = random_warped_param(rnd, WARPEDMODEL_PREC_BITS + 6, rnd_gen_zeros);
     mat[2] =
         (random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3, rnd_gen_zeros)) +
         (1 << WARPEDMODEL_PREC_BITS);
     mat[3] = random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3, rnd_gen_zeros);

     if (rnd8 <= 1) {
       // AFFINE
       mat[4] =
           random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3, rnd_gen_zeros);
       mat[5] =
           (random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3, rnd_gen_zeros)) +
           (1 << WARPEDMODEL_PREC_BITS);
     } else if (rnd8 == 2) {
       mat[4] = -mat[3];
       mat[5] = mat[2];
     } else {
       mat[4] =
           random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3, rnd_gen_zeros);
       mat[5] =
           (random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3, rnd_gen_zeros)) +
           (1 << WARPEDMODEL_PREC_BITS);
     }

     if (is_alpha_zero == 1) {
       mat[2] = 1 << WARPEDMODEL_PREC_BITS;
     }
     if (is_beta_zero == 1) {
       mat[3] = 0;
     }
     if (is_gamma_zero == 1) {
       mat[4] = 0;
     }
     if (is_delta_zero == 1) {
       mat[5] = static_cast<int32_t>(
           ((static_cast<int64_t>(mat[3]) * mat[4] + (mat[2] / 2)) / mat[2]) +
           (1 << WARPEDMODEL_PREC_BITS));
     }

     // Calculate the derived parameters and check that they are suitable
     // for the warp filter.
     assert(mat[2] != 0);

     *alpha = clamp(mat[2] - (1 << WARPEDMODEL_PREC_BITS), INT16_MIN, INT16_MAX);
     *beta = clamp(mat[3], INT16_MIN, INT16_MAX);
     *gamma = static_cast<int16_t>(clamp64(
         (static_cast<int64_t>(mat[4]) * (1 << WARPEDMODEL_PREC_BITS)) / mat[2],
         INT16_MIN, INT16_MAX));
     *delta = static_cast<int16_t>(clamp64(
         mat[5] -
             ((static_cast<int64_t>(mat[3]) * mat[4] + (mat[2] / 2)) / mat[2]) -
             (1 << WARPEDMODEL_PREC_BITS),
         INT16_MIN, INT16_MAX));

     if ((4 * abs(*alpha) + 7 * abs(*beta) >= (1 << WARPEDMODEL_PREC_BITS)) ||
         (4 * abs(*gamma) + 4 * abs(*delta) >= (1 << WARPEDMODEL_PREC_BITS)))
       continue;

     *alpha = ROUND_POWER_OF_TWO_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS) *
              (1 << WARP_PARAM_REDUCE_BITS);
     *beta = ROUND_POWER_OF_TWO_SIGNED(*beta, WARP_PARAM_REDUCE_BITS) *
             (1 << WARP_PARAM_REDUCE_BITS);
     *gamma = ROUND_POWER_OF_TWO_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS) *
              (1 << WARP_PARAM_REDUCE_BITS);
     *delta = ROUND_POWER_OF_TWO_SIGNED(*delta, WARP_PARAM_REDUCE_BITS) *
              (1 << WARP_PARAM_REDUCE_BITS);

     // We have a valid model, so finish
     return;
   }
 }

 namespace AV1WarpFilter {
 ::testing::internal::ParamGenerator<WarpTestParams> BuildParams(
     warp_affine_func filter) {
   WarpTestParam params[] = {
     make_tuple(4, 4, 5000, filter),  make_tuple(8, 8, 5000, filter),
     make_tuple(64, 64, 100, filter), make_tuple(4, 16, 2000, filter),
     make_tuple(32, 8, 1000, filter),
   };
   return ::testing::Combine(::testing::ValuesIn(params),
                             ::testing::Values(0, 1), ::testing::Values(0, 1),
                             ::testing::Values(0, 1), ::testing::Values(0, 1));
 }

 AV1WarpFilterTest::~AV1WarpFilterTest() = default;
 void AV1WarpFilterTest::SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); }

 void AV1WarpFilterTest::RunSpeedTest(warp_affine_func test_impl) {
   const int w = 128, h = 128;
   const int border = 16;
   const int stride = w + 2 * border;
   WarpTestParam params = GET_PARAM(0);
   const int out_w = std::get<0>(params), out_h = std::get<1>(params);
   const int is_alpha_zero = GET_PARAM(1);
   const int is_beta_zero = GET_PARAM(2);
   const int is_gamma_zero = GET_PARAM(3);
   const int is_delta_zero = GET_PARAM(4);
   int sub_x, sub_y;
   const int bd = 8;

   std::unique_ptr<uint8_t[]> input_(new (std::nothrow) uint8_t[h * stride]);
   ASSERT_NE(input_, nullptr);
   uint8_t *input = input_.get() + border;

   // The warp functions always write rows with widths that are multiples of 8.
   // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
   int output_n = ((out_w + 7) & ~7) * out_h;
   std::unique_ptr<uint8_t[]> output(new (std::nothrow) uint8_t[output_n]);
   ASSERT_NE(output, nullptr);
   int32_t mat[8];
   int16_t alpha, beta, gamma, delta;
   ConvolveParams conv_params = get_conv_params(0, 0, bd);
   std::unique_ptr<CONV_BUF_TYPE[]> dsta(new (std::nothrow)
                                             CONV_BUF_TYPE[output_n]);
   ASSERT_NE(dsta, nullptr);
   generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta,
                         is_alpha_zero, is_beta_zero, is_gamma_zero,
                         is_delta_zero, 0);

   for (int r = 0; r < h; ++r)
     for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand8();
   for (int r = 0; r < h; ++r) {
     memset(input + r * stride - border, input[r * stride], border);
     memset(input + r * stride + w, input[r * stride + (w - 1)], border);
   }

   sub_x = 0;
   sub_y = 0;
   int do_average = 0;

   conv_params =
       get_conv_params_no_round(do_average, 0, dsta.get(), out_w, 1, bd);
   conv_params.use_dist_wtd_comp_avg = 0;

   const int num_loops = 1000000000 / (out_w + out_h);
   aom_usec_timer timer;
   aom_usec_timer_start(&timer);
   for (int i = 0; i < num_loops; ++i)
     test_impl(mat, input, w, h, stride, output.get(), 32, 32, out_w, out_h,
               out_w, sub_x, sub_y, &conv_params, alpha, beta, gamma, delta);

   aom_usec_timer_mark(&timer);
   const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
   printf("warp %3dx%-3d alpha=%d beta=%d gamma=%d delta=%d: %7.2f ns \n", out_w,
          out_h, alpha, beta, gamma, delta, 1000.0 * elapsed_time / num_loops);
 }

 void AV1WarpFilterTest::RunCheckOutput(warp_affine_func test_impl) {
   const int w = 128, h = 128;
   const int border = 16;
   const int stride = w + 2 * border;
   WarpTestParam params = GET_PARAM(0);
   const int is_alpha_zero = GET_PARAM(1);
   const int is_beta_zero = GET_PARAM(2);
   const int is_gamma_zero = GET_PARAM(3);
   const int is_delta_zero = GET_PARAM(4);
   const int out_w = std::get<0>(params), out_h = std::get<1>(params);
   const int num_iters = std::get<2>(params);
   const int bd = 8;

   // The warp functions always write rows with widths that are multiples of 8.
   // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
   int output_n = ((out_w + 7) & ~7) * out_h;
   std::unique_ptr<uint8_t[]> input_(new (std::nothrow) uint8_t[h * stride]);
   ASSERT_NE(input_, nullptr);
   uint8_t *input = input_.get() + border;
   std::unique_ptr<uint8_t[]> output(new (std::nothrow) uint8_t[output_n]);
   ASSERT_NE(output, nullptr);
   std::unique_ptr<uint8_t[]> output2(new (std::nothrow) uint8_t[output_n]);
   ASSERT_NE(output2, nullptr);
   int32_t mat[8];
   int16_t alpha, beta, gamma, delta;
   ConvolveParams conv_params = get_conv_params(0, 0, bd);
   std::unique_ptr<CONV_BUF_TYPE[]> dsta(new (std::nothrow)
                                             CONV_BUF_TYPE[output_n]);
   ASSERT_NE(dsta, nullptr);
   std::unique_ptr<CONV_BUF_TYPE[]> dstb(new (std::nothrow)
                                             CONV_BUF_TYPE[output_n]);
   ASSERT_NE(dstb, nullptr);
   for (int i = 0; i < output_n; ++i) output[i] = output2[i] = rnd_.Rand8();

   for (int i = 0; i < num_iters; ++i) {
     // Generate an input block and extend its borders horizontally
     for (int r = 0; r < h; ++r)
       for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand8();
     for (int r = 0; r < h; ++r) {
       memset(input + r * stride - border, input[r * stride], border);
       memset(input + r * stride + w, input[r * stride + (w - 1)], border);
     }
     const int use_no_round = rnd_.Rand8() & 1;
     for (int sub_x = 0; sub_x < 2; ++sub_x)
       for (int sub_y = 0; sub_y < 2; ++sub_y) {
         generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta,
                               is_alpha_zero, is_beta_zero, is_gamma_zero,
                               is_delta_zero, 1);

         for (int ii = 0; ii < 2; ++ii) {
           for (int jj = 0; jj < 5; ++jj) {
             for (int do_average = 0; do_average <= 1; ++do_average) {
               if (use_no_round) {
                 conv_params = get_conv_params_no_round(
                     do_average, 0, dsta.get(), out_w, 1, bd);
               } else {
                 conv_params = get_conv_params(0, 0, bd);
               }
               if (jj >= 4) {
                 conv_params.use_dist_wtd_comp_avg = 0;
               } else {
                 conv_params.use_dist_wtd_comp_avg = 1;
                 conv_params.fwd_offset = quant_dist_lookup_table[jj][ii];
                 conv_params.bck_offset = quant_dist_lookup_table[jj][1 - ii];
               }
               av1_warp_affine_c(mat, input, w, h, stride, output.get(), 32, 32,
                                 out_w, out_h, out_w, sub_x, sub_y, &conv_params,
                                 alpha, beta, gamma, delta);
               if (use_no_round) {
                 conv_params = get_conv_params_no_round(
                     do_average, 0, dstb.get(), out_w, 1, bd);
               }
               if (jj >= 4) {
                 conv_params.use_dist_wtd_comp_avg = 0;
               } else {
                 conv_params.use_dist_wtd_comp_avg = 1;
                 conv_params.fwd_offset = quant_dist_lookup_table[jj][ii];
                 conv_params.bck_offset = quant_dist_lookup_table[jj][1 - ii];
               }
               test_impl(mat, input, w, h, stride, output2.get(), 32, 32, out_w,
                         out_h, out_w, sub_x, sub_y, &conv_params, alpha, beta,
                         gamma, delta);
               if (use_no_round) {
                 for (int j = 0; j < out_w * out_h; ++j)
                   ASSERT_EQ(dsta[j], dstb[j])
                       << "Pixel mismatch at index " << j << " = ("
                       << (j % out_w) << ", " << (j / out_w) << ") on iteration "
                       << i;
                 for (int j = 0; j < out_w * out_h; ++j)
                   ASSERT_EQ(output[j], output2[j])
                       << "Pixel mismatch at index " << j << " = ("
                       << (j % out_w) << ", " << (j / out_w) << ") on iteration "
                       << i;
               } else {
                 for (int j = 0; j < out_w * out_h; ++j)
                   ASSERT_EQ(output[j], output2[j])
                       << "Pixel mismatch at index " << j << " = ("
                       << (j % out_w) << ", " << (j / out_w) << ") on iteration "
                       << i;
               }
             }
           }
         }
       }
   }
 }
 }  // namespace AV1WarpFilter

 #if CONFIG_AV1_HIGHBITDEPTH
 namespace AV1HighbdWarpFilter {
 ::testing::internal::ParamGenerator<HighbdWarpTestParams> BuildParams(
     highbd_warp_affine_func filter) {
   const HighbdWarpTestParam params[] = {
     make_tuple(4, 4, 100, 8, filter),    make_tuple(8, 8, 100, 8, filter),
     make_tuple(64, 64, 100, 8, filter),  make_tuple(4, 16, 100, 8, filter),
     make_tuple(32, 8, 100, 8, filter),   make_tuple(4, 4, 100, 10, filter),
     make_tuple(8, 8, 100, 10, filter),   make_tuple(64, 64, 100, 10, filter),
     make_tuple(4, 16, 100, 10, filter),  make_tuple(32, 8, 100, 10, filter),
     make_tuple(4, 4, 100, 12, filter),   make_tuple(8, 8, 100, 12, filter),
     make_tuple(64, 64, 100, 12, filter), make_tuple(4, 16, 100, 12, filter),
     make_tuple(32, 8, 100, 12, filter),
   };
   return ::testing::Combine(::testing::ValuesIn(params),
                             ::testing::Values(0, 1), ::testing::Values(0, 1),
                             ::testing::Values(0, 1), ::testing::Values(0, 1));
 }

 AV1HighbdWarpFilterTest::~AV1HighbdWarpFilterTest() = default;
 void AV1HighbdWarpFilterTest::SetUp() {
   rnd_.Reset(ACMRandom::DeterministicSeed());
 }

 void AV1HighbdWarpFilterTest::RunSpeedTest(highbd_warp_affine_func test_impl) {
   const int w = 128, h = 128;
   const int border = 16;
   const int stride = w + 2 * border;
   HighbdWarpTestParam param = GET_PARAM(0);
   const int is_alpha_zero = GET_PARAM(1);
   const int is_beta_zero = GET_PARAM(2);
   const int is_gamma_zero = GET_PARAM(3);
   const int is_delta_zero = GET_PARAM(4);
   const int out_w = std::get<0>(param), out_h = std::get<1>(param);
   const int bd = std::get<3>(param);
   const int mask = (1 << bd) - 1;
   int sub_x, sub_y;

   // The warp functions always write rows with widths that are multiples of 8.
   // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
   int output_n = ((out_w + 7) & ~7) * out_h;
   std::unique_ptr<uint16_t[]> input_(new (std::nothrow) uint16_t[h * stride]);
   ASSERT_NE(input_, nullptr);
   uint16_t *input = input_.get() + border;
   std::unique_ptr<uint16_t[]> output(new (std::nothrow) uint16_t[output_n]);
   ASSERT_NE(output, nullptr);
   int32_t mat[8];
   int16_t alpha, beta, gamma, delta;
   ConvolveParams conv_params = get_conv_params(0, 0, bd);
   std::unique_ptr<CONV_BUF_TYPE[]> dsta(new (std::nothrow)
                                             CONV_BUF_TYPE[output_n]);
   ASSERT_NE(dsta, nullptr);

   generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta,
                         is_alpha_zero, is_beta_zero, is_gamma_zero,
                         is_delta_zero, 0);
   // Generate an input block and extend its borders horizontally
   for (int r = 0; r < h; ++r)
     for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand16() & mask;
   for (int r = 0; r < h; ++r) {
     for (int c = 0; c < border; ++c) {
       input[r * stride - border + c] = input[r * stride];
       input[r * stride + w + c] = input[r * stride + (w - 1)];
     }
   }

   sub_x = 0;
   sub_y = 0;
   int do_average = 0;
   conv_params.use_dist_wtd_comp_avg = 0;
   conv_params =
       get_conv_params_no_round(do_average, 0, dsta.get(), out_w, 1, bd);

   const int num_loops = 1000000000 / (out_w + out_h);
   aom_usec_timer timer;
   aom_usec_timer_start(&timer);

   for (int i = 0; i < num_loops; ++i)
     test_impl(mat, input, w, h, stride, output.get(), 32, 32, out_w, out_h,
               out_w, sub_x, sub_y, bd, &conv_params, alpha, beta, gamma, delta);

   aom_usec_timer_mark(&timer);
   const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
   printf("highbd warp %3dx%-3d alpha=%d beta=%d gamma=%d delta=%d: %7.2f ns \n",
          out_w, out_h, alpha, beta, gamma, delta,
          1000.0 * elapsed_time / num_loops);
 }

 void AV1HighbdWarpFilterTest::RunCheckOutput(
     highbd_warp_affine_func test_impl) {
   const int w = 128, h = 128;
   const int border = 16;
   const int stride = w + 2 * border;
   HighbdWarpTestParam param = GET_PARAM(0);
   const int is_alpha_zero = GET_PARAM(1);
   const int is_beta_zero = GET_PARAM(2);
   const int is_gamma_zero = GET_PARAM(3);
   const int is_delta_zero = GET_PARAM(4);
   const int out_w = std::get<0>(param), out_h = std::get<1>(param);
   const int bd = std::get<3>(param);
   const int num_iters = std::get<2>(param);
   const int mask = (1 << bd) - 1;

   // The warp functions always write rows with widths that are multiples of 8.
   // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
   int output_n = ((out_w + 7) & ~7) * out_h;
   std::unique_ptr<uint16_t[]> input_(new (std::nothrow) uint16_t[h * stride]);
   ASSERT_NE(input_, nullptr);
   uint16_t *input = input_.get() + border;
   std::unique_ptr<uint16_t[]> output(new (std::nothrow) uint16_t[output_n]);
   ASSERT_NE(output, nullptr);
   std::unique_ptr<uint16_t[]> output2(new (std::nothrow) uint16_t[output_n]);
   ASSERT_NE(output2, nullptr);
   int32_t mat[8];
   int16_t alpha, beta, gamma, delta;
   ConvolveParams conv_params = get_conv_params(0, 0, bd);
   std::unique_ptr<CONV_BUF_TYPE[]> dsta(new (std::nothrow)
                                             CONV_BUF_TYPE[output_n]);
   ASSERT_NE(dsta, nullptr);
   std::unique_ptr<CONV_BUF_TYPE[]> dstb(new (std::nothrow)
                                             CONV_BUF_TYPE[output_n]);
   ASSERT_NE(dstb, nullptr);
   for (int i = 0; i < output_n; ++i) output[i] = output2[i] = rnd_.Rand16();

   for (int i = 0; i < num_iters; ++i) {
     // Generate an input block and extend its borders horizontally
     for (int r = 0; r < h; ++r)
       for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand16() & mask;
     for (int r = 0; r < h; ++r) {
       for (int c = 0; c < border; ++c) {
         input[r * stride - border + c] = input[r * stride];
         input[r * stride + w + c] = input[r * stride + (w - 1)];
       }
     }
     const int use_no_round = rnd_.Rand8() & 1;
     for (int sub_x = 0; sub_x < 2; ++sub_x)
       for (int sub_y = 0; sub_y < 2; ++sub_y) {
         generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta,
                               is_alpha_zero, is_beta_zero, is_gamma_zero,
                               is_delta_zero, 1);
         for (int ii = 0; ii < 2; ++ii) {
           for (int jj = 0; jj < 5; ++jj) {
             for (int do_average = 0; do_average <= 1; ++do_average) {
               if (use_no_round) {
                 conv_params = get_conv_params_no_round(
                     do_average, 0, dsta.get(), out_w, 1, bd);
               } else {
                 conv_params = get_conv_params(0, 0, bd);
               }
               if (jj >= 4) {
                 conv_params.use_dist_wtd_comp_avg = 0;
               } else {
                 conv_params.use_dist_wtd_comp_avg = 1;
                 conv_params.fwd_offset = quant_dist_lookup_table[jj][ii];
                 conv_params.bck_offset = quant_dist_lookup_table[jj][1 - ii];
               }

               av1_highbd_warp_affine_c(mat, input, w, h, stride, output.get(),
                                        32, 32, out_w, out_h, out_w, sub_x,
                                        sub_y, bd, &conv_params, alpha, beta,
                                        gamma, delta);
               if (use_no_round) {
                 // TODO(angiebird): Change this to test_impl once we have SIMD
                 // implementation
                 conv_params = get_conv_params_no_round(
                     do_average, 0, dstb.get(), out_w, 1, bd);
               }
               if (jj >= 4) {
                 conv_params.use_dist_wtd_comp_avg = 0;
               } else {
                 conv_params.use_dist_wtd_comp_avg = 1;
                 conv_params.fwd_offset = quant_dist_lookup_table[jj][ii];
                 conv_params.bck_offset = quant_dist_lookup_table[jj][1 - ii];
               }
               test_impl(mat, input, w, h, stride, output2.get(), 32, 32, out_w,
                         out_h, out_w, sub_x, sub_y, bd, &conv_params, alpha,
                         beta, gamma, delta);

               if (use_no_round) {
                 for (int j = 0; j < out_w * out_h; ++j)
                   ASSERT_EQ(dsta[j], dstb[j])
                       << "Pixel mismatch at index " << j << " = ("
                       << (j % out_w) << ", " << (j / out_w) << ") on iteration "
                       << i;
                 for (int j = 0; j < out_w * out_h; ++j)
                   ASSERT_EQ(output[j], output2[j])
                       << "Pixel mismatch at index " << j << " = ("
                       << (j % out_w) << ", " << (j / out_w) << ") on iteration "
                       << i;
               } else {
                 for (int j = 0; j < out_w * out_h; ++j)
                   ASSERT_EQ(output[j], output2[j])
                       << "Pixel mismatch at index " << j << " = ("
                       << (j % out_w) << ", " << (j / out_w) << ") on iteration "
                       << i;
               }
             }
           }
         }
       }
   }
 }
 }  // namespace AV1HighbdWarpFilter
 #endif  // CONFIG_AV1_HIGHBITDEPTH
 }  // namespace libaom_test
	/*
	* Copyright (c) 2016, Alliance for Open Media. All rights reserved
	*
	* This source code is subject to the terms of the BSD 2 Clause License and
	* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
	* was not distributed with this source code in the LICENSE file, you can
	* obtain it at www.aomedia.org/license/software. If the Alliance for Open
	* Media Patent License 1.0 was not distributed with this source code in the
	* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
	*/
	#include <memory>
	#include <new>

	#include "aom_ports/aom_timer.h"
	#include "test/warp_filter_test_util.h"

	using std::make_tuple;
	using std::tuple;

	namespace libaom_test {

	int32_t random_warped_param(libaom_test::ACMRandom *rnd, int bits,
	int rnd_gen_zeros) {
	// Avoid accidentally generating a zero in speed tests, they are set by the
	// is_*_zero parameters instead.
	if (rnd_gen_zeros) {
	// 1 in 8 chance of generating zero (arbitrarily chosen)
	if (((rnd->Rand8()) & 7) == 0) return 0;
	}
	// Otherwise, enerate uniform values in the range
	// [-(1 << bits), 1] U [1, 1<<bits]
	int32_t v = 1 + (rnd->Rand16() & ((1 << bits) - 1));
	if ((rnd->Rand8()) & 1) return -v;
	return v;
	}

	void generate_warped_model(libaom_test::ACMRandom rnd, int32_t mat,
	int16_t alpha, int16_t beta, int16_t *gamma,
	int16_t *delta, const int is_alpha_zero,
	const int is_beta_zero, const int is_gamma_zero,
	const int is_delta_zero, const int rnd_gen_zeros) {
	while (true) {
	int rnd8 = rnd->Rand8() & 3;
	mat[0] = random_warped_param(rnd, WARPEDMODEL_PREC_BITS + 6, rnd_gen_zeros);
	mat[1] = random_warped_param(rnd, WARPEDMODEL_PREC_BITS + 6, rnd_gen_zeros);
	mat[2] =
	(random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3, rnd_gen_zeros)) +
	(1 << WARPEDMODEL_PREC_BITS);
	mat[3] = random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3, rnd_gen_zeros);

	if (rnd8 <= 1) {
	// AFFINE
	mat[4] =
	random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3, rnd_gen_zeros);
	mat[5] =
	(random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3, rnd_gen_zeros)) +
	(1 << WARPEDMODEL_PREC_BITS);
	} else if (rnd8 == 2) {
	mat[4] = -mat[3];
	mat[5] = mat[2];
	} else {
	mat[4] =
	random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3, rnd_gen_zeros);
	mat[5] =
	(random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3, rnd_gen_zeros)) +
	(1 << WARPEDMODEL_PREC_BITS);
	}

	if (is_alpha_zero == 1) {
	mat[2] = 1 << WARPEDMODEL_PREC_BITS;
	}
	if (is_beta_zero == 1) {
	mat[3] = 0;
	}
	if (is_gamma_zero == 1) {
	mat[4] = 0;
	}
	if (is_delta_zero == 1) {
	mat[5] = static_cast<int32_t>(
	((static_cast<int64_t>(mat[3]) * mat[4] + (mat[2] / 2)) / mat[2]) +
	(1 << WARPEDMODEL_PREC_BITS));
	}

	// Calculate the derived parameters and check that they are suitable
	// for the warp filter.
	assert(mat[2] != 0);

	*alpha = clamp(mat[2] - (1 << WARPEDMODEL_PREC_BITS), INT16_MIN, INT16_MAX);
	*beta = clamp(mat[3], INT16_MIN, INT16_MAX);
	*gamma = static_cast<int16_t>(clamp64(
	(static_cast<int64_t>(mat[4]) * (1 << WARPEDMODEL_PREC_BITS)) / mat[2],
	INT16_MIN, INT16_MAX));
	*delta = static_cast<int16_t>(clamp64(
	mat[5] -
	((static_cast<int64_t>(mat[3]) * mat[4] + (mat[2] / 2)) / mat[2]) -
	(1 << WARPEDMODEL_PREC_BITS),
	INT16_MIN, INT16_MAX));

	if ((4 * abs(alpha) + 7 abs(*beta) >= (1 << WARPEDMODEL_PREC_BITS)) \|\|
	(4 * abs(gamma) + 4 abs(*delta) >= (1 << WARPEDMODEL_PREC_BITS)))
	continue;

	alpha = ROUND_POWER_OF_TWO_SIGNED(alpha, WARP_PARAM_REDUCE_BITS) *
	(1 << WARP_PARAM_REDUCE_BITS);
	beta = ROUND_POWER_OF_TWO_SIGNED(beta, WARP_PARAM_REDUCE_BITS) *
	(1 << WARP_PARAM_REDUCE_BITS);
	gamma = ROUND_POWER_OF_TWO_SIGNED(gamma, WARP_PARAM_REDUCE_BITS) *
	(1 << WARP_PARAM_REDUCE_BITS);
	delta = ROUND_POWER_OF_TWO_SIGNED(delta, WARP_PARAM_REDUCE_BITS) *
	(1 << WARP_PARAM_REDUCE_BITS);

	// We have a valid model, so finish
	return;
	}
	}

	namespace AV1WarpFilter {
	::testing::internal::ParamGenerator<WarpTestParams> BuildParams(
	warp_affine_func filter) {
	WarpTestParam params[] = {
	make_tuple(4, 4, 5000, filter), make_tuple(8, 8, 5000, filter),
	make_tuple(64, 64, 100, filter), make_tuple(4, 16, 2000, filter),
	make_tuple(32, 8, 1000, filter),
	};
	return ::testing::Combine(::testing::ValuesIn(params),
	::testing::Values(0, 1), ::testing::Values(0, 1),
	::testing::Values(0, 1), ::testing::Values(0, 1));
	}

	AV1WarpFilterTest::~AV1WarpFilterTest() = default;
	void AV1WarpFilterTest::SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); }

	void AV1WarpFilterTest::RunSpeedTest(warp_affine_func test_impl) {
	const int w = 128, h = 128;
	const int border = 16;
	const int stride = w + 2 * border;
	WarpTestParam params = GET_PARAM(0);
	const int out_w = std::get<0>(params), out_h = std::get<1>(params);
	const int is_alpha_zero = GET_PARAM(1);
	const int is_beta_zero = GET_PARAM(2);
	const int is_gamma_zero = GET_PARAM(3);
	const int is_delta_zero = GET_PARAM(4);
	int sub_x, sub_y;
	const int bd = 8;

	std::unique_ptr<uint8_t[]> input_(new (std::nothrow) uint8_t[h * stride]);
	ASSERT_NE(input_, nullptr);
	uint8_t *input = input_.get() + border;

	// The warp functions always write rows with widths that are multiples of 8.
	// So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
	int output_n = ((out_w + 7) & ~7) * out_h;
	std::unique_ptr<uint8_t[]> output(new (std::nothrow) uint8_t[output_n]);
	ASSERT_NE(output, nullptr);
	int32_t mat[8];
	int16_t alpha, beta, gamma, delta;
	ConvolveParams conv_params = get_conv_params(0, 0, bd);
	std::unique_ptr<CONV_BUF_TYPE[]> dsta(new (std::nothrow)
	CONV_BUF_TYPE[output_n]);
	ASSERT_NE(dsta, nullptr);
	generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta,
	is_alpha_zero, is_beta_zero, is_gamma_zero,
	is_delta_zero, 0);

	for (int r = 0; r < h; ++r)
	for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand8();
	for (int r = 0; r < h; ++r) {
	memset(input + r * stride - border, input[r * stride], border);
	memset(input + r * stride + w, input[r * stride + (w - 1)], border);
	}

	sub_x = 0;
	sub_y = 0;
	int do_average = 0;

	conv_params =
	get_conv_params_no_round(do_average, 0, dsta.get(), out_w, 1, bd);
	conv_params.use_dist_wtd_comp_avg = 0;

	const int num_loops = 1000000000 / (out_w + out_h);
	aom_usec_timer timer;
	aom_usec_timer_start(&timer);
	for (int i = 0; i < num_loops; ++i)
	test_impl(mat, input, w, h, stride, output.get(), 32, 32, out_w, out_h,
	out_w, sub_x, sub_y, &conv_params, alpha, beta, gamma, delta);

	aom_usec_timer_mark(&timer);
	const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
	printf("warp %3dx%-3d alpha=%d beta=%d gamma=%d delta=%d: %7.2f ns \n", out_w,
	out_h, alpha, beta, gamma, delta, 1000.0 * elapsed_time / num_loops);
	}

	void AV1WarpFilterTest::RunCheckOutput(warp_affine_func test_impl) {
	const int w = 128, h = 128;
	const int border = 16;
	const int stride = w + 2 * border;
	WarpTestParam params = GET_PARAM(0);
	const int is_alpha_zero = GET_PARAM(1);
	const int is_beta_zero = GET_PARAM(2);
	const int is_gamma_zero = GET_PARAM(3);
	const int is_delta_zero = GET_PARAM(4);
	const int out_w = std::get<0>(params), out_h = std::get<1>(params);
	const int num_iters = std::get<2>(params);
	const int bd = 8;

	// The warp functions always write rows with widths that are multiples of 8.
	// So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
	int output_n = ((out_w + 7) & ~7) * out_h;
	std::unique_ptr<uint8_t[]> input_(new (std::nothrow) uint8_t[h * stride]);
	ASSERT_NE(input_, nullptr);
	uint8_t *input = input_.get() + border;
	std::unique_ptr<uint8_t[]> output(new (std::nothrow) uint8_t[output_n]);
	ASSERT_NE(output, nullptr);
	std::unique_ptr<uint8_t[]> output2(new (std::nothrow) uint8_t[output_n]);
	ASSERT_NE(output2, nullptr);
	int32_t mat[8];
	int16_t alpha, beta, gamma, delta;
	ConvolveParams conv_params = get_conv_params(0, 0, bd);
	std::unique_ptr<CONV_BUF_TYPE[]> dsta(new (std::nothrow)
	CONV_BUF_TYPE[output_n]);
	ASSERT_NE(dsta, nullptr);
	std::unique_ptr<CONV_BUF_TYPE[]> dstb(new (std::nothrow)
	CONV_BUF_TYPE[output_n]);
	ASSERT_NE(dstb, nullptr);
	for (int i = 0; i < output_n; ++i) output[i] = output2[i] = rnd_.Rand8();

	for (int i = 0; i < num_iters; ++i) {
	// Generate an input block and extend its borders horizontally
	for (int r = 0; r < h; ++r)
	for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand8();
	for (int r = 0; r < h; ++r) {
	memset(input + r * stride - border, input[r * stride], border);
	memset(input + r * stride + w, input[r * stride + (w - 1)], border);
	}
	const int use_no_round = rnd_.Rand8() & 1;
	for (int sub_x = 0; sub_x < 2; ++sub_x)
	for (int sub_y = 0; sub_y < 2; ++sub_y) {
	generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta,
	is_alpha_zero, is_beta_zero, is_gamma_zero,
	is_delta_zero, 1);

	for (int ii = 0; ii < 2; ++ii) {
	for (int jj = 0; jj < 5; ++jj) {
	for (int do_average = 0; do_average <= 1; ++do_average) {
	if (use_no_round) {
	conv_params = get_conv_params_no_round(
	do_average, 0, dsta.get(), out_w, 1, bd);
	} else {
	conv_params = get_conv_params(0, 0, bd);
	}
	if (jj >= 4) {
	conv_params.use_dist_wtd_comp_avg = 0;
	} else {
	conv_params.use_dist_wtd_comp_avg = 1;
	conv_params.fwd_offset = quant_dist_lookup_table[jj][ii];
	conv_params.bck_offset = quant_dist_lookup_table[jj][1 - ii];
	}
	av1_warp_affine_c(mat, input, w, h, stride, output.get(), 32, 32,
	out_w, out_h, out_w, sub_x, sub_y, &conv_params,
	alpha, beta, gamma, delta);
	if (use_no_round) {
	conv_params = get_conv_params_no_round(
	do_average, 0, dstb.get(), out_w, 1, bd);
	}
	if (jj >= 4) {
	conv_params.use_dist_wtd_comp_avg = 0;
	} else {
	conv_params.use_dist_wtd_comp_avg = 1;
	conv_params.fwd_offset = quant_dist_lookup_table[jj][ii];
	conv_params.bck_offset = quant_dist_lookup_table[jj][1 - ii];
	}
	test_impl(mat, input, w, h, stride, output2.get(), 32, 32, out_w,
	out_h, out_w, sub_x, sub_y, &conv_params, alpha, beta,
	gamma, delta);
	if (use_no_round) {
	for (int j = 0; j < out_w * out_h; ++j)
	ASSERT_EQ(dsta[j], dstb[j])
	<< "Pixel mismatch at index " << j << " = ("
	<< (j % out_w) << ", " << (j / out_w) << ") on iteration "
	<< i;
	for (int j = 0; j < out_w * out_h; ++j)
	ASSERT_EQ(output[j], output2[j])
	<< "Pixel mismatch at index " << j << " = ("
	<< (j % out_w) << ", " << (j / out_w) << ") on iteration "
	<< i;
	} else {
	for (int j = 0; j < out_w * out_h; ++j)
	ASSERT_EQ(output[j], output2[j])
	<< "Pixel mismatch at index " << j << " = ("
	<< (j % out_w) << ", " << (j / out_w) << ") on iteration "
	<< i;
	}
	}
	}
	}
	}
	}
	}
	} // namespace AV1WarpFilter

	#if CONFIG_AV1_HIGHBITDEPTH
	namespace AV1HighbdWarpFilter {
	::testing::internal::ParamGenerator<HighbdWarpTestParams> BuildParams(
	highbd_warp_affine_func filter) {
	const HighbdWarpTestParam params[] = {
	make_tuple(4, 4, 100, 8, filter), make_tuple(8, 8, 100, 8, filter),
	make_tuple(64, 64, 100, 8, filter), make_tuple(4, 16, 100, 8, filter),
	make_tuple(32, 8, 100, 8, filter), make_tuple(4, 4, 100, 10, filter),
	make_tuple(8, 8, 100, 10, filter), make_tuple(64, 64, 100, 10, filter),
	make_tuple(4, 16, 100, 10, filter), make_tuple(32, 8, 100, 10, filter),
	make_tuple(4, 4, 100, 12, filter), make_tuple(8, 8, 100, 12, filter),
	make_tuple(64, 64, 100, 12, filter), make_tuple(4, 16, 100, 12, filter),
	make_tuple(32, 8, 100, 12, filter),
	};
	return ::testing::Combine(::testing::ValuesIn(params),
	::testing::Values(0, 1), ::testing::Values(0, 1),
	::testing::Values(0, 1), ::testing::Values(0, 1));
	}

	AV1HighbdWarpFilterTest::~AV1HighbdWarpFilterTest() = default;
	void AV1HighbdWarpFilterTest::SetUp() {
	rnd_.Reset(ACMRandom::DeterministicSeed());
	}

	void AV1HighbdWarpFilterTest::RunSpeedTest(highbd_warp_affine_func test_impl) {
	const int w = 128, h = 128;
	const int border = 16;
	const int stride = w + 2 * border;
	HighbdWarpTestParam param = GET_PARAM(0);
	const int is_alpha_zero = GET_PARAM(1);
	const int is_beta_zero = GET_PARAM(2);
	const int is_gamma_zero = GET_PARAM(3);
	const int is_delta_zero = GET_PARAM(4);
	const int out_w = std::get<0>(param), out_h = std::get<1>(param);
	const int bd = std::get<3>(param);
	const int mask = (1 << bd) - 1;
	int sub_x, sub_y;

	// The warp functions always write rows with widths that are multiples of 8.
	// So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
	int output_n = ((out_w + 7) & ~7) * out_h;
	std::unique_ptr<uint16_t[]> input_(new (std::nothrow) uint16_t[h * stride]);
	ASSERT_NE(input_, nullptr);
	uint16_t *input = input_.get() + border;
	std::unique_ptr<uint16_t[]> output(new (std::nothrow) uint16_t[output_n]);
	ASSERT_NE(output, nullptr);
	int32_t mat[8];
	int16_t alpha, beta, gamma, delta;
	ConvolveParams conv_params = get_conv_params(0, 0, bd);
	std::unique_ptr<CONV_BUF_TYPE[]> dsta(new (std::nothrow)
	CONV_BUF_TYPE[output_n]);
	ASSERT_NE(dsta, nullptr);

	generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta,
	is_alpha_zero, is_beta_zero, is_gamma_zero,
	is_delta_zero, 0);
	// Generate an input block and extend its borders horizontally
	for (int r = 0; r < h; ++r)
	for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand16() & mask;
	for (int r = 0; r < h; ++r) {
	for (int c = 0; c < border; ++c) {
	input[r * stride - border + c] = input[r * stride];
	input[r * stride + w + c] = input[r * stride + (w - 1)];
	}
	}

	sub_x = 0;
	sub_y = 0;
	int do_average = 0;
	conv_params.use_dist_wtd_comp_avg = 0;
	conv_params =
	get_conv_params_no_round(do_average, 0, dsta.get(), out_w, 1, bd);

	const int num_loops = 1000000000 / (out_w + out_h);
	aom_usec_timer timer;
	aom_usec_timer_start(&timer);

	for (int i = 0; i < num_loops; ++i)
	test_impl(mat, input, w, h, stride, output.get(), 32, 32, out_w, out_h,
	out_w, sub_x, sub_y, bd, &conv_params, alpha, beta, gamma, delta);

	aom_usec_timer_mark(&timer);
	const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
	printf("highbd warp %3dx%-3d alpha=%d beta=%d gamma=%d delta=%d: %7.2f ns \n",
	out_w, out_h, alpha, beta, gamma, delta,
	1000.0 * elapsed_time / num_loops);
	}

	void AV1HighbdWarpFilterTest::RunCheckOutput(
	highbd_warp_affine_func test_impl) {
	const int w = 128, h = 128;
	const int border = 16;
	const int stride = w + 2 * border;
	HighbdWarpTestParam param = GET_PARAM(0);
	const int is_alpha_zero = GET_PARAM(1);
	const int is_beta_zero = GET_PARAM(2);
	const int is_gamma_zero = GET_PARAM(3);
	const int is_delta_zero = GET_PARAM(4);
	const int out_w = std::get<0>(param), out_h = std::get<1>(param);
	const int bd = std::get<3>(param);
	const int num_iters = std::get<2>(param);
	const int mask = (1 << bd) - 1;

	// The warp functions always write rows with widths that are multiples of 8.
	// So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
	int output_n = ((out_w + 7) & ~7) * out_h;
	std::unique_ptr<uint16_t[]> input_(new (std::nothrow) uint16_t[h * stride]);
	ASSERT_NE(input_, nullptr);
	uint16_t *input = input_.get() + border;
	std::unique_ptr<uint16_t[]> output(new (std::nothrow) uint16_t[output_n]);
	ASSERT_NE(output, nullptr);
	std::unique_ptr<uint16_t[]> output2(new (std::nothrow) uint16_t[output_n]);
	ASSERT_NE(output2, nullptr);
	int32_t mat[8];
	int16_t alpha, beta, gamma, delta;
	ConvolveParams conv_params = get_conv_params(0, 0, bd);
	std::unique_ptr<CONV_BUF_TYPE[]> dsta(new (std::nothrow)
	CONV_BUF_TYPE[output_n]);
	ASSERT_NE(dsta, nullptr);
	std::unique_ptr<CONV_BUF_TYPE[]> dstb(new (std::nothrow)
	CONV_BUF_TYPE[output_n]);
	ASSERT_NE(dstb, nullptr);
	for (int i = 0; i < output_n; ++i) output[i] = output2[i] = rnd_.Rand16();

	for (int i = 0; i < num_iters; ++i) {
	// Generate an input block and extend its borders horizontally
	for (int r = 0; r < h; ++r)
	for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand16() & mask;
	for (int r = 0; r < h; ++r) {
	for (int c = 0; c < border; ++c) {
	input[r * stride - border + c] = input[r * stride];
	input[r * stride + w + c] = input[r * stride + (w - 1)];
	}
	}
	const int use_no_round = rnd_.Rand8() & 1;
	for (int sub_x = 0; sub_x < 2; ++sub_x)
	for (int sub_y = 0; sub_y < 2; ++sub_y) {
	generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta,
	is_alpha_zero, is_beta_zero, is_gamma_zero,
	is_delta_zero, 1);
	for (int ii = 0; ii < 2; ++ii) {
	for (int jj = 0; jj < 5; ++jj) {
	for (int do_average = 0; do_average <= 1; ++do_average) {
	if (use_no_round) {
	conv_params = get_conv_params_no_round(
	do_average, 0, dsta.get(), out_w, 1, bd);
	} else {
	conv_params = get_conv_params(0, 0, bd);
	}
	if (jj >= 4) {
	conv_params.use_dist_wtd_comp_avg = 0;
	} else {
	conv_params.use_dist_wtd_comp_avg = 1;
	conv_params.fwd_offset = quant_dist_lookup_table[jj][ii];
	conv_params.bck_offset = quant_dist_lookup_table[jj][1 - ii];
	}

	av1_highbd_warp_affine_c(mat, input, w, h, stride, output.get(),
	32, 32, out_w, out_h, out_w, sub_x,
	sub_y, bd, &conv_params, alpha, beta,
	gamma, delta);
	if (use_no_round) {
	// TODO(angiebird): Change this to test_impl once we have SIMD
	// implementation
	conv_params = get_conv_params_no_round(
	do_average, 0, dstb.get(), out_w, 1, bd);
	}
	if (jj >= 4) {
	conv_params.use_dist_wtd_comp_avg = 0;
	} else {
	conv_params.use_dist_wtd_comp_avg = 1;
	conv_params.fwd_offset = quant_dist_lookup_table[jj][ii];
	conv_params.bck_offset = quant_dist_lookup_table[jj][1 - ii];
	}
	test_impl(mat, input, w, h, stride, output2.get(), 32, 32, out_w,
	out_h, out_w, sub_x, sub_y, bd, &conv_params, alpha,
	beta, gamma, delta);

	if (use_no_round) {
	for (int j = 0; j < out_w * out_h; ++j)
	ASSERT_EQ(dsta[j], dstb[j])
	<< "Pixel mismatch at index " << j << " = ("
	<< (j % out_w) << ", " << (j / out_w) << ") on iteration "
	<< i;
	for (int j = 0; j < out_w * out_h; ++j)
	ASSERT_EQ(output[j], output2[j])
	<< "Pixel mismatch at index " << j << " = ("
	<< (j % out_w) << ", " << (j / out_w) << ") on iteration "
	<< i;
	} else {
	for (int j = 0; j < out_w * out_h; ++j)
	ASSERT_EQ(output[j], output2[j])
	<< "Pixel mismatch at index " << j << " = ("
	<< (j % out_w) << ", " << (j / out_w) << ") on iteration "
	<< i;
	}
	}
	}
	}
	}
	}
	}
	} // namespace AV1HighbdWarpFilter
	#endif // CONFIG_AV1_HIGHBITDEPTH
	} // namespace libaom_test