Geza Lore | abd0050 | 2016-02-12 16:04:35 +0000 | [diff] [blame] | 1 | /* |
James Zern | b7c05bd | 2024-06-11 19:15:10 -0700 | [diff] [blame] | 2 | * Copyright (c) 2016, Alliance for Open Media. All rights reserved. |
Geza Lore | abd0050 | 2016-02-12 16:04:35 +0000 | [diff] [blame] | 3 | * |
Yaowu Xu | bde4ac8 | 2016-11-28 15:26:06 -0800 | [diff] [blame] | 4 | * This source code is subject to the terms of the BSD 2 Clause License and |
| 5 | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| 6 | * was not distributed with this source code in the LICENSE file, you can |
| 7 | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| 8 | * Media Patent License 1.0 was not distributed with this source code in the |
| 9 | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
Geza Lore | abd0050 | 2016-02-12 16:04:35 +0000 | [diff] [blame] | 10 | */ |
| 11 | |
James Zern | 5e831c5 | 2016-06-09 23:38:31 -0700 | [diff] [blame] | 12 | #include <cmath> |
| 13 | #include <cstdlib> |
| 14 | #include <string> |
sarahparker | a543df5 | 2018-11-02 16:02:05 -0700 | [diff] [blame] | 15 | #include <tuple> |
James Zern | 5e831c5 | 2016-06-09 23:38:31 -0700 | [diff] [blame] | 16 | |
Wan-Teh Chang | c8b1fc2 | 2024-08-01 12:09:28 -0700 | [diff] [blame] | 17 | #include "gtest/gtest.h" |
Geza Lore | abd0050 | 2016-02-12 16:04:35 +0000 | [diff] [blame] | 18 | |
Tom Finegan | 60e653d | 2018-05-22 11:34:58 -0700 | [diff] [blame] | 19 | #include "config/aom_config.h" |
Tom Finegan | 44702c8 | 2018-05-22 13:00:39 -0700 | [diff] [blame] | 20 | #include "config/aom_dsp_rtcd.h" |
Tom Finegan | 60e653d | 2018-05-22 11:34:58 -0700 | [diff] [blame] | 21 | |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 22 | #include "aom_ports/mem.h" |
Salome Thirot | 4ef3627 | 2023-07-10 20:23:08 +0100 | [diff] [blame] | 23 | #include "av1/common/common_data.h" |
James Zern | 5e831c5 | 2016-06-09 23:38:31 -0700 | [diff] [blame] | 24 | #include "test/acm_random.h" |
Geza Lore | abd0050 | 2016-02-12 16:04:35 +0000 | [diff] [blame] | 25 | #include "test/register_state_check.h" |
James Zern | 5e831c5 | 2016-06-09 23:38:31 -0700 | [diff] [blame] | 26 | #include "test/util.h" |
Geza Lore | 471362f | 2016-06-21 20:23:13 +0100 | [diff] [blame] | 27 | #include "test/function_equivalence_test.h" |
James Zern | 5e831c5 | 2016-06-09 23:38:31 -0700 | [diff] [blame] | 28 | |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 29 | using libaom_test::ACMRandom; |
| 30 | using libaom_test::FunctionEquivalenceTest; |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 31 | using ::testing::Combine; |
| 32 | using ::testing::Range; |
| 33 | using ::testing::Values; |
| 34 | using ::testing::ValuesIn; |
Geza Lore | abd0050 | 2016-02-12 16:04:35 +0000 | [diff] [blame] | 35 | |
| 36 | namespace { |
James Zern | 5e831c5 | 2016-06-09 23:38:31 -0700 | [diff] [blame] | 37 | const int kNumIterations = 10000; |
Geza Lore | abd0050 | 2016-02-12 16:04:35 +0000 | [diff] [blame] | 38 | |
Geza Lore | 471362f | 2016-06-21 20:23:13 +0100 | [diff] [blame] | 39 | static const int16_t kInt13Max = (1 << 12) - 1; |
| 40 | |
Alex Converse | 4c5b020 | 2017-03-29 15:48:40 -0700 | [diff] [blame] | 41 | typedef uint64_t (*SSI16Func)(const int16_t *src, int stride, int width, |
| 42 | int height); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 43 | typedef libaom_test::FuncParam<SSI16Func> TestFuncs; |
Geza Lore | abd0050 | 2016-02-12 16:04:35 +0000 | [diff] [blame] | 44 | |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 45 | class SumSquaresTest : public ::testing::TestWithParam<TestFuncs> { |
James Zern | 5e831c5 | 2016-06-09 23:38:31 -0700 | [diff] [blame] | 46 | public: |
James Zern | f1fa1eb | 2023-07-25 15:34:13 -0700 | [diff] [blame] | 47 | ~SumSquaresTest() override = default; |
James Zern | faa2dcf | 2023-07-24 18:29:51 -0700 | [diff] [blame] | 48 | void SetUp() override { |
Peng Bin | c961b99 | 2018-04-10 16:17:36 +0800 | [diff] [blame] | 49 | params_ = this->GetParam(); |
| 50 | rnd_.Reset(ACMRandom::DeterministicSeed()); |
| 51 | src_ = reinterpret_cast<int16_t *>(aom_memalign(16, 256 * 256 * 2)); |
James Zern | 9dea04e | 2022-04-28 13:18:36 -0700 | [diff] [blame] | 52 | ASSERT_NE(src_, nullptr); |
Peng Bin | c961b99 | 2018-04-10 16:17:36 +0800 | [diff] [blame] | 53 | } |
Geza Lore | abd0050 | 2016-02-12 16:04:35 +0000 | [diff] [blame] | 54 | |
James Zern | faa2dcf | 2023-07-24 18:29:51 -0700 | [diff] [blame] | 55 | void TearDown() override { aom_free(src_); } |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 56 | void RunTest(bool is_random); |
Satish Kumar Suman | 95c38b2 | 2018-09-05 10:21:03 +0530 | [diff] [blame] | 57 | void RunSpeedTest(); |
Peng Bin | c961b99 | 2018-04-10 16:17:36 +0800 | [diff] [blame] | 58 | |
| 59 | void GenRandomData(int width, int height, int stride) { |
| 60 | const int msb = 11; // Up to 12 bit input |
| 61 | const int limit = 1 << (msb + 1); |
| 62 | for (int ii = 0; ii < height; ii++) { |
| 63 | for (int jj = 0; jj < width; jj++) { |
| 64 | src_[ii * stride + jj] = rnd_(2) ? rnd_(limit) : -rnd_(limit); |
| 65 | } |
| 66 | } |
| 67 | } |
| 68 | |
| 69 | void GenExtremeData(int width, int height, int stride) { |
| 70 | const int msb = 11; // Up to 12 bit input |
| 71 | const int limit = 1 << (msb + 1); |
| 72 | const int val = rnd_(2) ? limit - 1 : -(limit - 1); |
| 73 | for (int ii = 0; ii < height; ii++) { |
| 74 | for (int jj = 0; jj < width; jj++) { |
| 75 | src_[ii * stride + jj] = val; |
| 76 | } |
| 77 | } |
| 78 | } |
Geza Lore | abd0050 | 2016-02-12 16:04:35 +0000 | [diff] [blame] | 79 | |
James Zern | 5e831c5 | 2016-06-09 23:38:31 -0700 | [diff] [blame] | 80 | protected: |
Geza Lore | a3f7ddc | 2016-07-12 15:26:36 +0100 | [diff] [blame] | 81 | TestFuncs params_; |
Peng Bin | c961b99 | 2018-04-10 16:17:36 +0800 | [diff] [blame] | 82 | int16_t *src_; |
| 83 | ACMRandom rnd_; |
Geza Lore | abd0050 | 2016-02-12 16:04:35 +0000 | [diff] [blame] | 84 | }; |
chiyotsai | 9dfac72 | 2020-07-07 17:43:02 -0700 | [diff] [blame] | 85 | GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SumSquaresTest); |
Geza Lore | abd0050 | 2016-02-12 16:04:35 +0000 | [diff] [blame] | 86 | |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 87 | void SumSquaresTest::RunTest(bool is_random) { |
James Zern | 5e831c5 | 2016-06-09 23:38:31 -0700 | [diff] [blame] | 88 | int failed = 0; |
James Zern | 5e831c5 | 2016-06-09 23:38:31 -0700 | [diff] [blame] | 89 | for (int k = 0; k < kNumIterations; k++) { |
Peng Bin | c961b99 | 2018-04-10 16:17:36 +0800 | [diff] [blame] | 90 | const int width = 4 * (rnd_(31) + 1); // Up to 128x128 |
| 91 | const int height = 4 * (rnd_(31) + 1); // Up to 128x128 |
| 92 | int stride = 4 << rnd_(7); // Up to 256 stride |
| 93 | while (stride < width) { // Make sure it's valid |
| 94 | stride = 4 << rnd_(7); |
James Zern | 5e831c5 | 2016-06-09 23:38:31 -0700 | [diff] [blame] | 95 | } |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 96 | if (is_random) { |
Peng Bin | c961b99 | 2018-04-10 16:17:36 +0800 | [diff] [blame] | 97 | GenRandomData(width, height, stride); |
| 98 | } else { |
| 99 | GenExtremeData(width, height, stride); |
James Zern | 5e831c5 | 2016-06-09 23:38:31 -0700 | [diff] [blame] | 100 | } |
Peng Bin | c961b99 | 2018-04-10 16:17:36 +0800 | [diff] [blame] | 101 | const uint64_t res_ref = params_.ref_func(src_, stride, width, height); |
James Zern | 5e831c5 | 2016-06-09 23:38:31 -0700 | [diff] [blame] | 102 | uint64_t res_tst; |
chiyotsai | ef26106 | 2021-07-01 14:01:45 -0700 | [diff] [blame] | 103 | API_REGISTER_STATE_CHECK(res_tst = |
Peng Bin | c961b99 | 2018-04-10 16:17:36 +0800 | [diff] [blame] | 104 | params_.tst_func(src_, stride, width, height)); |
James Zern | 5e831c5 | 2016-06-09 23:38:31 -0700 | [diff] [blame] | 105 | |
| 106 | if (!failed) { |
| 107 | failed = res_ref != res_tst; |
| 108 | EXPECT_EQ(res_ref, res_tst) |
Peng Bin | c961b99 | 2018-04-10 16:17:36 +0800 | [diff] [blame] | 109 | << "Error: Sum Squares Test [" << width << "x" << height |
| 110 | << "] C output does not match optimized output."; |
James Zern | 5e831c5 | 2016-06-09 23:38:31 -0700 | [diff] [blame] | 111 | } |
Geza Lore | abd0050 | 2016-02-12 16:04:35 +0000 | [diff] [blame] | 112 | } |
| 113 | } |
| 114 | |
Satish Kumar Suman | 95c38b2 | 2018-09-05 10:21:03 +0530 | [diff] [blame] | 115 | void SumSquaresTest::RunSpeedTest() { |
| 116 | for (int block = BLOCK_4X4; block < BLOCK_SIZES_ALL; block++) { |
| 117 | const int width = block_size_wide[block]; // Up to 128x128 |
| 118 | const int height = block_size_high[block]; // Up to 128x128 |
| 119 | int stride = 4 << rnd_(7); // Up to 256 stride |
| 120 | while (stride < width) { // Make sure it's valid |
| 121 | stride = 4 << rnd_(7); |
| 122 | } |
| 123 | GenExtremeData(width, height, stride); |
| 124 | const int num_loops = 1000000000 / (width + height); |
| 125 | aom_usec_timer timer; |
| 126 | aom_usec_timer_start(&timer); |
| 127 | |
| 128 | for (int i = 0; i < num_loops; ++i) |
| 129 | params_.ref_func(src_, stride, width, height); |
| 130 | |
| 131 | aom_usec_timer_mark(&timer); |
| 132 | const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); |
| 133 | printf("SumSquaresTest C %3dx%-3d: %7.2f ns\n", width, height, |
| 134 | 1000.0 * elapsed_time / num_loops); |
| 135 | |
| 136 | aom_usec_timer timer1; |
| 137 | aom_usec_timer_start(&timer1); |
| 138 | for (int i = 0; i < num_loops; ++i) |
| 139 | params_.tst_func(src_, stride, width, height); |
| 140 | aom_usec_timer_mark(&timer1); |
| 141 | const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1)); |
| 142 | printf("SumSquaresTest Test %3dx%-3d: %7.2f ns\n", width, height, |
| 143 | 1000.0 * elapsed_time1 / num_loops); |
| 144 | } |
| 145 | } |
| 146 | |
Peng Bin | c961b99 | 2018-04-10 16:17:36 +0800 | [diff] [blame] | 147 | TEST_P(SumSquaresTest, OperationCheck) { |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 148 | RunTest(true); // GenRandomData |
Peng Bin | c961b99 | 2018-04-10 16:17:36 +0800 | [diff] [blame] | 149 | } |
| 150 | |
James Zern | 5e831c5 | 2016-06-09 23:38:31 -0700 | [diff] [blame] | 151 | TEST_P(SumSquaresTest, ExtremeValues) { |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 152 | RunTest(false); // GenExtremeData |
Geza Lore | abd0050 | 2016-02-12 16:04:35 +0000 | [diff] [blame] | 153 | } |
Geza Lore | abd0050 | 2016-02-12 16:04:35 +0000 | [diff] [blame] | 154 | |
Satish Kumar Suman | 95c38b2 | 2018-09-05 10:21:03 +0530 | [diff] [blame] | 155 | TEST_P(SumSquaresTest, DISABLED_Speed) { RunSpeedTest(); } |
| 156 | |
Geza Lore | abd0050 | 2016-02-12 16:04:35 +0000 | [diff] [blame] | 157 | #if HAVE_SSE2 |
James Zern | 5e831c5 | 2016-06-09 23:38:31 -0700 | [diff] [blame] | 158 | |
Cheng Chen | 96786fe | 2020-02-14 17:28:25 -0800 | [diff] [blame] | 159 | INSTANTIATE_TEST_SUITE_P( |
James Zern | 5e831c5 | 2016-06-09 23:38:31 -0700 | [diff] [blame] | 160 | SSE2, SumSquaresTest, |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 161 | ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c, |
| 162 | &aom_sum_squares_2d_i16_sse2))); |
Geza Lore | a3f7ddc | 2016-07-12 15:26:36 +0100 | [diff] [blame] | 163 | |
Geza Lore | abd0050 | 2016-02-12 16:04:35 +0000 | [diff] [blame] | 164 | #endif // HAVE_SSE2 |
Geza Lore | 471362f | 2016-06-21 20:23:13 +0100 | [diff] [blame] | 165 | |
Vitalii Dziumenko | 22980c7 | 2020-04-17 15:50:12 +0300 | [diff] [blame] | 166 | #if HAVE_NEON |
| 167 | |
| 168 | INSTANTIATE_TEST_SUITE_P( |
| 169 | NEON, SumSquaresTest, |
| 170 | ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c, |
| 171 | &aom_sum_squares_2d_i16_neon))); |
| 172 | |
| 173 | #endif // HAVE_NEON |
| 174 | |
Salome Thirot | bc74ba6 | 2023-12-12 16:54:51 +0000 | [diff] [blame] | 175 | #if HAVE_SVE |
| 176 | INSTANTIATE_TEST_SUITE_P( |
| 177 | SVE, SumSquaresTest, |
| 178 | ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c, |
| 179 | &aom_sum_squares_2d_i16_sve))); |
| 180 | |
| 181 | #endif // HAVE_SVE |
| 182 | |
Satish Kumar Suman | 95c38b2 | 2018-09-05 10:21:03 +0530 | [diff] [blame] | 183 | #if HAVE_AVX2 |
Cheng Chen | 96786fe | 2020-02-14 17:28:25 -0800 | [diff] [blame] | 184 | INSTANTIATE_TEST_SUITE_P( |
Satish Kumar Suman | 95c38b2 | 2018-09-05 10:21:03 +0530 | [diff] [blame] | 185 | AVX2, SumSquaresTest, |
| 186 | ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c, |
| 187 | &aom_sum_squares_2d_i16_avx2))); |
| 188 | #endif // HAVE_AVX2 |
| 189 | |
Geza Lore | 471362f | 2016-06-21 20:23:13 +0100 | [diff] [blame] | 190 | ////////////////////////////////////////////////////////////////////////////// |
| 191 | // 1D version |
| 192 | ////////////////////////////////////////////////////////////////////////////// |
| 193 | |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 194 | typedef uint64_t (*F1D)(const int16_t *src, uint32_t n); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 195 | typedef libaom_test::FuncParam<F1D> TestFuncs1D; |
Geza Lore | 471362f | 2016-06-21 20:23:13 +0100 | [diff] [blame] | 196 | |
| 197 | class SumSquares1DTest : public FunctionEquivalenceTest<F1D> { |
| 198 | protected: |
Geza Lore | 471362f | 2016-06-21 20:23:13 +0100 | [diff] [blame] | 199 | static const int kIterations = 1000; |
| 200 | static const int kMaxSize = 256; |
Geza Lore | 471362f | 2016-06-21 20:23:13 +0100 | [diff] [blame] | 201 | }; |
chiyotsai | 9dfac72 | 2020-07-07 17:43:02 -0700 | [diff] [blame] | 202 | GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SumSquares1DTest); |
Geza Lore | 471362f | 2016-06-21 20:23:13 +0100 | [diff] [blame] | 203 | |
| 204 | TEST_P(SumSquares1DTest, RandomValues) { |
| 205 | DECLARE_ALIGNED(16, int16_t, src[kMaxSize * kMaxSize]); |
| 206 | |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 207 | for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { |
| 208 | for (int i = 0; i < kMaxSize * kMaxSize; ++i) |
Geza Lore | 471362f | 2016-06-21 20:23:13 +0100 | [diff] [blame] | 209 | src[i] = rng_(kInt13Max * 2 + 1) - kInt13Max; |
| 210 | |
Salome Thirot | e7f72bd | 2023-12-13 14:03:52 +0000 | [diff] [blame] | 211 | // Block size is between 64 and 128 * 128 and is always a multiple of 64. |
| 212 | const int n = (rng_(255) + 1) * 64; |
Geza Lore | 471362f | 2016-06-21 20:23:13 +0100 | [diff] [blame] | 213 | |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 214 | const uint64_t ref_res = params_.ref_func(src, n); |
Geza Lore | a3f7ddc | 2016-07-12 15:26:36 +0100 | [diff] [blame] | 215 | uint64_t tst_res; |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 216 | API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(src, n)); |
Geza Lore | 471362f | 2016-06-21 20:23:13 +0100 | [diff] [blame] | 217 | |
| 218 | ASSERT_EQ(ref_res, tst_res); |
| 219 | } |
| 220 | } |
| 221 | |
| 222 | TEST_P(SumSquares1DTest, ExtremeValues) { |
| 223 | DECLARE_ALIGNED(16, int16_t, src[kMaxSize * kMaxSize]); |
| 224 | |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 225 | for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { |
Geza Lore | 471362f | 2016-06-21 20:23:13 +0100 | [diff] [blame] | 226 | if (rng_(2)) { |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 227 | for (int i = 0; i < kMaxSize * kMaxSize; ++i) src[i] = kInt13Max; |
Geza Lore | 471362f | 2016-06-21 20:23:13 +0100 | [diff] [blame] | 228 | } else { |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 229 | for (int i = 0; i < kMaxSize * kMaxSize; ++i) src[i] = -kInt13Max; |
Geza Lore | 471362f | 2016-06-21 20:23:13 +0100 | [diff] [blame] | 230 | } |
| 231 | |
Salome Thirot | e7f72bd | 2023-12-13 14:03:52 +0000 | [diff] [blame] | 232 | // Block size is between 64 and 128 * 128 and is always a multiple of 64. |
| 233 | const int n = (rng_(255) + 1) * 64; |
Geza Lore | 471362f | 2016-06-21 20:23:13 +0100 | [diff] [blame] | 234 | |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 235 | const uint64_t ref_res = params_.ref_func(src, n); |
Geza Lore | a3f7ddc | 2016-07-12 15:26:36 +0100 | [diff] [blame] | 236 | uint64_t tst_res; |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 237 | API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(src, n)); |
Geza Lore | 471362f | 2016-06-21 20:23:13 +0100 | [diff] [blame] | 238 | |
| 239 | ASSERT_EQ(ref_res, tst_res); |
| 240 | } |
| 241 | } |
| 242 | |
Geza Lore | 471362f | 2016-06-21 20:23:13 +0100 | [diff] [blame] | 243 | #if HAVE_SSE2 |
Cheng Chen | 96786fe | 2020-02-14 17:28:25 -0800 | [diff] [blame] | 244 | INSTANTIATE_TEST_SUITE_P(SSE2, SumSquares1DTest, |
| 245 | ::testing::Values(TestFuncs1D( |
| 246 | aom_sum_squares_i16_c, aom_sum_squares_i16_sse2))); |
Geza Lore | a3f7ddc | 2016-07-12 15:26:36 +0100 | [diff] [blame] | 247 | |
Geza Lore | 471362f | 2016-06-21 20:23:13 +0100 | [diff] [blame] | 248 | #endif // HAVE_SSE2 |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 249 | |
Salome Thirot | d8b8594 | 2023-05-04 12:26:47 +0100 | [diff] [blame] | 250 | #if HAVE_NEON |
| 251 | INSTANTIATE_TEST_SUITE_P(NEON, SumSquares1DTest, |
| 252 | ::testing::Values(TestFuncs1D( |
| 253 | aom_sum_squares_i16_c, aom_sum_squares_i16_neon))); |
| 254 | |
| 255 | #endif // HAVE_NEON |
| 256 | |
Salome Thirot | a2e6978 | 2023-12-12 18:50:30 +0000 | [diff] [blame] | 257 | #if HAVE_SVE |
| 258 | INSTANTIATE_TEST_SUITE_P(SVE, SumSquares1DTest, |
| 259 | ::testing::Values(TestFuncs1D( |
| 260 | aom_sum_squares_i16_c, aom_sum_squares_i16_sve))); |
| 261 | |
| 262 | #endif // HAVE_SVE |
| 263 | |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 264 | typedef int64_t (*SSEFunc)(const uint8_t *a, int a_stride, const uint8_t *b, |
| 265 | int b_stride, int width, int height); |
| 266 | typedef libaom_test::FuncParam<SSEFunc> TestSSEFuncs; |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 267 | |
sarahparker | a543df5 | 2018-11-02 16:02:05 -0700 | [diff] [blame] | 268 | typedef std::tuple<TestSSEFuncs, int> SSETestParam; |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 269 | |
| 270 | class SSETest : public ::testing::TestWithParam<SSETestParam> { |
| 271 | public: |
James Zern | f1fa1eb | 2023-07-25 15:34:13 -0700 | [diff] [blame] | 272 | ~SSETest() override = default; |
James Zern | faa2dcf | 2023-07-24 18:29:51 -0700 | [diff] [blame] | 273 | void SetUp() override { |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 274 | params_ = GET_PARAM(0); |
| 275 | width_ = GET_PARAM(1); |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 276 | is_hbd_ = |
Jerome Jiang | f20376c | 2019-08-21 11:21:29 -0700 | [diff] [blame] | 277 | #if CONFIG_AV1_HIGHBITDEPTH |
| 278 | params_.ref_func == aom_highbd_sse_c; |
| 279 | #else |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 280 | false; |
Jerome Jiang | f20376c | 2019-08-21 11:21:29 -0700 | [diff] [blame] | 281 | #endif |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 282 | rnd_.Reset(ACMRandom::DeterministicSeed()); |
| 283 | src_ = reinterpret_cast<uint8_t *>(aom_memalign(32, 256 * 256 * 2)); |
| 284 | ref_ = reinterpret_cast<uint8_t *>(aom_memalign(32, 256 * 256 * 2)); |
James Zern | 9dea04e | 2022-04-28 13:18:36 -0700 | [diff] [blame] | 285 | ASSERT_NE(src_, nullptr); |
| 286 | ASSERT_NE(ref_, nullptr); |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 287 | } |
| 288 | |
James Zern | faa2dcf | 2023-07-24 18:29:51 -0700 | [diff] [blame] | 289 | void TearDown() override { |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 290 | aom_free(src_); |
| 291 | aom_free(ref_); |
| 292 | } |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 293 | void RunTest(bool is_random, int width, int height, int run_times); |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 294 | |
| 295 | void GenRandomData(int width, int height, int stride) { |
Wan-Teh Chang | b15d4f7 | 2023-10-13 15:54:13 -0700 | [diff] [blame] | 296 | uint16_t *src16 = reinterpret_cast<uint16_t *>(src_); |
| 297 | uint16_t *ref16 = reinterpret_cast<uint16_t *>(ref_); |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 298 | const int msb = 11; // Up to 12 bit input |
| 299 | const int limit = 1 << (msb + 1); |
| 300 | for (int ii = 0; ii < height; ii++) { |
| 301 | for (int jj = 0; jj < width; jj++) { |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 302 | if (!is_hbd_) { |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 303 | src_[ii * stride + jj] = rnd_.Rand8(); |
| 304 | ref_[ii * stride + jj] = rnd_.Rand8(); |
| 305 | } else { |
Wan-Teh Chang | b15d4f7 | 2023-10-13 15:54:13 -0700 | [diff] [blame] | 306 | src16[ii * stride + jj] = rnd_(limit); |
| 307 | ref16[ii * stride + jj] = rnd_(limit); |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 308 | } |
| 309 | } |
| 310 | } |
| 311 | } |
| 312 | |
| 313 | void GenExtremeData(int width, int height, int stride, uint8_t *data, |
| 314 | int16_t val) { |
Wan-Teh Chang | b15d4f7 | 2023-10-13 15:54:13 -0700 | [diff] [blame] | 315 | uint16_t *data16 = reinterpret_cast<uint16_t *>(data); |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 316 | for (int ii = 0; ii < height; ii++) { |
| 317 | for (int jj = 0; jj < width; jj++) { |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 318 | if (!is_hbd_) { |
Wan-Teh Chang | b15d4f7 | 2023-10-13 15:54:13 -0700 | [diff] [blame] | 319 | data[ii * stride + jj] = static_cast<uint8_t>(val); |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 320 | } else { |
Wan-Teh Chang | b15d4f7 | 2023-10-13 15:54:13 -0700 | [diff] [blame] | 321 | data16[ii * stride + jj] = val; |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 322 | } |
| 323 | } |
| 324 | } |
| 325 | } |
| 326 | |
| 327 | protected: |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 328 | bool is_hbd_; |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 329 | int width_; |
| 330 | TestSSEFuncs params_; |
| 331 | uint8_t *src_; |
| 332 | uint8_t *ref_; |
| 333 | ACMRandom rnd_; |
| 334 | }; |
chiyotsai | 9dfac72 | 2020-07-07 17:43:02 -0700 | [diff] [blame] | 335 | GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SSETest); |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 336 | |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 337 | void SSETest::RunTest(bool is_random, int width, int height, int run_times) { |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 338 | int failed = 0; |
Aniket Dhok | d52c292 | 2018-12-28 19:50:29 +0530 | [diff] [blame] | 339 | aom_usec_timer ref_timer, test_timer; |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 340 | for (int k = 0; k < 3; k++) { |
| 341 | int stride = 4 << rnd_(7); // Up to 256 stride |
| 342 | while (stride < width) { // Make sure it's valid |
| 343 | stride = 4 << rnd_(7); |
| 344 | } |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 345 | if (is_random) { |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 346 | GenRandomData(width, height, stride); |
| 347 | } else { |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 348 | const int msb = is_hbd_ ? 12 : 8; // Up to 12 bit input |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 349 | const int limit = (1 << msb) - 1; |
| 350 | if (k == 0) { |
| 351 | GenExtremeData(width, height, stride, src_, 0); |
| 352 | GenExtremeData(width, height, stride, ref_, limit); |
| 353 | } else { |
| 354 | GenExtremeData(width, height, stride, src_, limit); |
| 355 | GenExtremeData(width, height, stride, ref_, 0); |
| 356 | } |
| 357 | } |
| 358 | int64_t res_ref, res_tst; |
Wan-Teh Chang | b15d4f7 | 2023-10-13 15:54:13 -0700 | [diff] [blame] | 359 | uint8_t *src = src_; |
| 360 | uint8_t *ref = ref_; |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 361 | if (is_hbd_) { |
Wan-Teh Chang | b15d4f7 | 2023-10-13 15:54:13 -0700 | [diff] [blame] | 362 | src = CONVERT_TO_BYTEPTR(src_); |
| 363 | ref = CONVERT_TO_BYTEPTR(ref_); |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 364 | } |
Wan-Teh Chang | b15d4f7 | 2023-10-13 15:54:13 -0700 | [diff] [blame] | 365 | res_ref = params_.ref_func(src, stride, ref, stride, width, height); |
| 366 | res_tst = params_.tst_func(src, stride, ref, stride, width, height); |
Aniket Dhok | d52c292 | 2018-12-28 19:50:29 +0530 | [diff] [blame] | 367 | if (run_times > 1) { |
| 368 | aom_usec_timer_start(&ref_timer); |
| 369 | for (int j = 0; j < run_times; j++) { |
Wan-Teh Chang | b15d4f7 | 2023-10-13 15:54:13 -0700 | [diff] [blame] | 370 | params_.ref_func(src, stride, ref, stride, width, height); |
Aniket Dhok | d52c292 | 2018-12-28 19:50:29 +0530 | [diff] [blame] | 371 | } |
| 372 | aom_usec_timer_mark(&ref_timer); |
| 373 | const int elapsed_time_c = |
| 374 | static_cast<int>(aom_usec_timer_elapsed(&ref_timer)); |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 375 | |
Aniket Dhok | d52c292 | 2018-12-28 19:50:29 +0530 | [diff] [blame] | 376 | aom_usec_timer_start(&test_timer); |
| 377 | for (int j = 0; j < run_times; j++) { |
Wan-Teh Chang | b15d4f7 | 2023-10-13 15:54:13 -0700 | [diff] [blame] | 378 | params_.tst_func(src, stride, ref, stride, width, height); |
Aniket Dhok | d52c292 | 2018-12-28 19:50:29 +0530 | [diff] [blame] | 379 | } |
| 380 | aom_usec_timer_mark(&test_timer); |
| 381 | const int elapsed_time_simd = |
| 382 | static_cast<int>(aom_usec_timer_elapsed(&test_timer)); |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 383 | |
Aniket Dhok | d52c292 | 2018-12-28 19:50:29 +0530 | [diff] [blame] | 384 | printf( |
| 385 | "c_time=%d \t simd_time=%d \t " |
| 386 | "gain=%d\n", |
| 387 | elapsed_time_c, elapsed_time_simd, |
| 388 | (elapsed_time_c / elapsed_time_simd)); |
| 389 | } else { |
| 390 | if (!failed) { |
| 391 | failed = res_ref != res_tst; |
| 392 | EXPECT_EQ(res_ref, res_tst) |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 393 | << "Error:" << (is_hbd_ ? "hbd " : " ") << k << " SSE Test [" |
Aniket Dhok | d52c292 | 2018-12-28 19:50:29 +0530 | [diff] [blame] | 394 | << width << "x" << height |
| 395 | << "] C output does not match optimized output."; |
| 396 | } |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 397 | } |
| 398 | } |
| 399 | } |
| 400 | |
| 401 | TEST_P(SSETest, OperationCheck) { |
| 402 | for (int height = 4; height <= 128; height += 4) { |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 403 | RunTest(true, width_, height, 1); // GenRandomData |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 404 | } |
| 405 | } |
| 406 | |
| 407 | TEST_P(SSETest, ExtremeValues) { |
| 408 | for (int height = 4; height <= 128; height += 4) { |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 409 | RunTest(false, width_, height, 1); |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 410 | } |
| 411 | } |
| 412 | |
Aniket Dhok | d52c292 | 2018-12-28 19:50:29 +0530 | [diff] [blame] | 413 | TEST_P(SSETest, DISABLED_Speed) { |
| 414 | for (int height = 4; height <= 128; height += 4) { |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 415 | RunTest(true, width_, height, 100); |
Aniket Dhok | d52c292 | 2018-12-28 19:50:29 +0530 | [diff] [blame] | 416 | } |
| 417 | } |
Krishna Malladi | a8c08dd | 2020-02-21 14:08:06 -0800 | [diff] [blame] | 418 | |
| 419 | #if HAVE_NEON |
| 420 | TestSSEFuncs sse_neon[] = { |
| 421 | TestSSEFuncs(&aom_sse_c, &aom_sse_neon), |
Krishna Malladi | cb98296 | 2020-02-25 09:44:11 -0800 | [diff] [blame] | 422 | #if CONFIG_AV1_HIGHBITDEPTH |
| 423 | TestSSEFuncs(&aom_highbd_sse_c, &aom_highbd_sse_neon) |
| 424 | #endif |
Krishna Malladi | a8c08dd | 2020-02-21 14:08:06 -0800 | [diff] [blame] | 425 | }; |
| 426 | INSTANTIATE_TEST_SUITE_P(NEON, SSETest, |
| 427 | Combine(ValuesIn(sse_neon), Range(4, 129, 4))); |
| 428 | #endif // HAVE_NEON |
| 429 | |
Jonathan Wright | 3c1f3af | 2023-07-19 14:02:44 +0100 | [diff] [blame] | 430 | #if HAVE_NEON_DOTPROD |
| 431 | TestSSEFuncs sse_neon_dotprod[] = { |
| 432 | TestSSEFuncs(&aom_sse_c, &aom_sse_neon_dotprod), |
| 433 | }; |
| 434 | INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, SSETest, |
| 435 | Combine(ValuesIn(sse_neon_dotprod), Range(4, 129, 4))); |
| 436 | #endif // HAVE_NEON_DOTPROD |
| 437 | |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 438 | #if HAVE_SSE4_1 |
Jerome Jiang | f20376c | 2019-08-21 11:21:29 -0700 | [diff] [blame] | 439 | TestSSEFuncs sse_sse4[] = { |
| 440 | TestSSEFuncs(&aom_sse_c, &aom_sse_sse4_1), |
| 441 | #if CONFIG_AV1_HIGHBITDEPTH |
| 442 | TestSSEFuncs(&aom_highbd_sse_c, &aom_highbd_sse_sse4_1) |
| 443 | #endif |
| 444 | }; |
Cheng Chen | 96786fe | 2020-02-14 17:28:25 -0800 | [diff] [blame] | 445 | INSTANTIATE_TEST_SUITE_P(SSE4_1, SSETest, |
| 446 | Combine(ValuesIn(sse_sse4), Range(4, 129, 4))); |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 447 | #endif // HAVE_SSE4_1 |
| 448 | |
| 449 | #if HAVE_AVX2 |
| 450 | |
Jerome Jiang | f20376c | 2019-08-21 11:21:29 -0700 | [diff] [blame] | 451 | TestSSEFuncs sse_avx2[] = { |
| 452 | TestSSEFuncs(&aom_sse_c, &aom_sse_avx2), |
| 453 | #if CONFIG_AV1_HIGHBITDEPTH |
| 454 | TestSSEFuncs(&aom_highbd_sse_c, &aom_highbd_sse_avx2) |
| 455 | #endif |
| 456 | }; |
Cheng Chen | 96786fe | 2020-02-14 17:28:25 -0800 | [diff] [blame] | 457 | INSTANTIATE_TEST_SUITE_P(AVX2, SSETest, |
| 458 | Combine(ValuesIn(sse_avx2), Range(4, 129, 4))); |
Peng Bin | a9edad7 | 2018-10-11 10:01:34 +0800 | [diff] [blame] | 459 | #endif // HAVE_AVX2 |
Vishesh | 90a4423 | 2019-09-03 16:16:53 +0530 | [diff] [blame] | 460 | |
Salome Thirot | 7d9c2ae | 2023-11-24 12:01:26 +0000 | [diff] [blame] | 461 | #if HAVE_SVE |
| 462 | #if CONFIG_AV1_HIGHBITDEPTH |
| 463 | TestSSEFuncs sse_sve[] = { TestSSEFuncs(&aom_highbd_sse_c, |
| 464 | &aom_highbd_sse_sve) }; |
| 465 | INSTANTIATE_TEST_SUITE_P(SVE, SSETest, |
| 466 | Combine(ValuesIn(sse_sve), Range(4, 129, 4))); |
| 467 | #endif |
| 468 | #endif // HAVE_SVE |
| 469 | |
Vishesh | 90a4423 | 2019-09-03 16:16:53 +0530 | [diff] [blame] | 470 | ////////////////////////////////////////////////////////////////////////////// |
| 471 | // get_blk sum squares test functions |
| 472 | ////////////////////////////////////////////////////////////////////////////// |
| 473 | |
| 474 | typedef void (*sse_sum_func)(const int16_t *data, int stride, int bw, int bh, |
| 475 | int *x_sum, int64_t *x2_sum); |
| 476 | typedef libaom_test::FuncParam<sse_sum_func> TestSSE_SumFuncs; |
| 477 | |
Salome Thirot | 4ef3627 | 2023-07-10 20:23:08 +0100 | [diff] [blame] | 478 | typedef std::tuple<TestSSE_SumFuncs, TX_SIZE> SSE_SumTestParam; |
Vishesh | 90a4423 | 2019-09-03 16:16:53 +0530 | [diff] [blame] | 479 | |
| 480 | class SSE_Sum_Test : public ::testing::TestWithParam<SSE_SumTestParam> { |
| 481 | public: |
James Zern | f1fa1eb | 2023-07-25 15:34:13 -0700 | [diff] [blame] | 482 | ~SSE_Sum_Test() override = default; |
James Zern | faa2dcf | 2023-07-24 18:29:51 -0700 | [diff] [blame] | 483 | void SetUp() override { |
Vishesh | 90a4423 | 2019-09-03 16:16:53 +0530 | [diff] [blame] | 484 | params_ = GET_PARAM(0); |
Vishesh | 90a4423 | 2019-09-03 16:16:53 +0530 | [diff] [blame] | 485 | rnd_.Reset(ACMRandom::DeterministicSeed()); |
| 486 | src_ = reinterpret_cast<int16_t *>(aom_memalign(32, 256 * 256 * 2)); |
James Zern | 9dea04e | 2022-04-28 13:18:36 -0700 | [diff] [blame] | 487 | ASSERT_NE(src_, nullptr); |
Vishesh | 90a4423 | 2019-09-03 16:16:53 +0530 | [diff] [blame] | 488 | } |
| 489 | |
James Zern | faa2dcf | 2023-07-24 18:29:51 -0700 | [diff] [blame] | 490 | void TearDown() override { aom_free(src_); } |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 491 | void RunTest(bool is_random, int tx_size, int run_times); |
Vishesh | 90a4423 | 2019-09-03 16:16:53 +0530 | [diff] [blame] | 492 | |
| 493 | void GenRandomData(int width, int height, int stride) { |
| 494 | const int msb = 11; // Up to 12 bit input |
| 495 | const int limit = 1 << (msb + 1); |
| 496 | for (int ii = 0; ii < height; ii++) { |
| 497 | for (int jj = 0; jj < width; jj++) { |
| 498 | src_[ii * stride + jj] = rnd_(limit); |
| 499 | } |
| 500 | } |
| 501 | } |
| 502 | |
| 503 | void GenExtremeData(int width, int height, int stride, int16_t *data, |
| 504 | int16_t val) { |
| 505 | for (int ii = 0; ii < height; ii++) { |
| 506 | for (int jj = 0; jj < width; jj++) { |
| 507 | data[ii * stride + jj] = val; |
| 508 | } |
| 509 | } |
| 510 | } |
| 511 | |
| 512 | protected: |
Vishesh | 90a4423 | 2019-09-03 16:16:53 +0530 | [diff] [blame] | 513 | TestSSE_SumFuncs params_; |
| 514 | int16_t *src_; |
| 515 | ACMRandom rnd_; |
| 516 | }; |
chiyotsai | 9dfac72 | 2020-07-07 17:43:02 -0700 | [diff] [blame] | 517 | GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SSE_Sum_Test); |
Vishesh | 90a4423 | 2019-09-03 16:16:53 +0530 | [diff] [blame] | 518 | |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 519 | void SSE_Sum_Test::RunTest(bool is_random, int tx_size, int run_times) { |
Vishesh | 90a4423 | 2019-09-03 16:16:53 +0530 | [diff] [blame] | 520 | aom_usec_timer ref_timer, test_timer; |
Salome Thirot | 4ef3627 | 2023-07-10 20:23:08 +0100 | [diff] [blame] | 521 | int width = tx_size_wide[tx_size]; |
| 522 | int height = tx_size_high[tx_size]; |
Vishesh | 90a4423 | 2019-09-03 16:16:53 +0530 | [diff] [blame] | 523 | for (int k = 0; k < 3; k++) { |
| 524 | int stride = 4 << rnd_(7); // Up to 256 stride |
| 525 | while (stride < width) { // Make sure it's valid |
| 526 | stride = 4 << rnd_(7); |
| 527 | } |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 528 | if (is_random) { |
Vishesh | 90a4423 | 2019-09-03 16:16:53 +0530 | [diff] [blame] | 529 | GenRandomData(width, height, stride); |
| 530 | } else { |
| 531 | const int msb = 12; // Up to 12 bit input |
| 532 | const int limit = (1 << msb) - 1; |
| 533 | if (k == 0) { |
| 534 | GenExtremeData(width, height, stride, src_, limit); |
| 535 | } else { |
| 536 | GenExtremeData(width, height, stride, src_, -limit); |
| 537 | } |
| 538 | } |
| 539 | int sum_c = 0; |
| 540 | int64_t sse_intr = 0; |
| 541 | int sum_intr = 0; |
| 542 | int64_t sse_c = 0; |
| 543 | |
| 544 | params_.ref_func(src_, stride, width, height, &sum_c, &sse_c); |
| 545 | params_.tst_func(src_, stride, width, height, &sum_intr, &sse_intr); |
| 546 | |
| 547 | if (run_times > 1) { |
| 548 | aom_usec_timer_start(&ref_timer); |
| 549 | for (int j = 0; j < run_times; j++) { |
| 550 | params_.ref_func(src_, stride, width, height, &sum_c, &sse_c); |
| 551 | } |
| 552 | aom_usec_timer_mark(&ref_timer); |
| 553 | const int elapsed_time_c = |
| 554 | static_cast<int>(aom_usec_timer_elapsed(&ref_timer)); |
| 555 | |
| 556 | aom_usec_timer_start(&test_timer); |
| 557 | for (int j = 0; j < run_times; j++) { |
| 558 | params_.tst_func(src_, stride, width, height, &sum_intr, &sse_intr); |
| 559 | } |
| 560 | aom_usec_timer_mark(&test_timer); |
| 561 | const int elapsed_time_simd = |
| 562 | static_cast<int>(aom_usec_timer_elapsed(&test_timer)); |
| 563 | |
| 564 | printf( |
| 565 | "c_time=%d \t simd_time=%d \t " |
| 566 | "gain=%f\t width=%d\t height=%d \n", |
| 567 | elapsed_time_c, elapsed_time_simd, |
| 568 | (float)((float)elapsed_time_c / (float)elapsed_time_simd), width, |
| 569 | height); |
| 570 | |
| 571 | } else { |
| 572 | EXPECT_EQ(sum_c, sum_intr) |
| 573 | << "Error:" << k << " SSE Sum Test [" << width << "x" << height |
| 574 | << "] C output does not match optimized output."; |
| 575 | EXPECT_EQ(sse_c, sse_intr) |
| 576 | << "Error:" << k << " SSE Sum Test [" << width << "x" << height |
| 577 | << "] C output does not match optimized output."; |
| 578 | } |
| 579 | } |
| 580 | } |
| 581 | |
| 582 | TEST_P(SSE_Sum_Test, OperationCheck) { |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 583 | RunTest(true, GET_PARAM(1), 1); // GenRandomData |
Vishesh | 90a4423 | 2019-09-03 16:16:53 +0530 | [diff] [blame] | 584 | } |
| 585 | |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 586 | TEST_P(SSE_Sum_Test, ExtremeValues) { RunTest(false, GET_PARAM(1), 1); } |
Vishesh | 90a4423 | 2019-09-03 16:16:53 +0530 | [diff] [blame] | 587 | |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 588 | TEST_P(SSE_Sum_Test, DISABLED_Speed) { RunTest(true, GET_PARAM(1), 10000); } |
Salome Thirot | 4ef3627 | 2023-07-10 20:23:08 +0100 | [diff] [blame] | 589 | |
Salome Thirot | edf9c79 | 2023-07-11 10:32:18 +0100 | [diff] [blame] | 590 | #if HAVE_SSE2 || HAVE_AVX2 || HAVE_NEON |
Salome Thirot | 4ef3627 | 2023-07-10 20:23:08 +0100 | [diff] [blame] | 591 | const TX_SIZE kValidBlockSize[] = { TX_4X4, TX_8X8, TX_16X16, TX_32X32, |
| 592 | TX_64X64, TX_4X8, TX_8X4, TX_8X16, |
| 593 | TX_16X8, TX_16X32, TX_32X16, TX_64X32, |
| 594 | TX_32X64, TX_4X16, TX_16X4, TX_8X32, |
| 595 | TX_32X8, TX_16X64, TX_64X16 }; |
| 596 | #endif |
Vishesh | 90a4423 | 2019-09-03 16:16:53 +0530 | [diff] [blame] | 597 | |
Vishesh | 58df5b1 | 2019-09-13 11:51:26 +0530 | [diff] [blame] | 598 | #if HAVE_SSE2 |
| 599 | TestSSE_SumFuncs sse_sum_sse2[] = { TestSSE_SumFuncs( |
| 600 | &aom_get_blk_sse_sum_c, &aom_get_blk_sse_sum_sse2) }; |
Cheng Chen | 96786fe | 2020-02-14 17:28:25 -0800 | [diff] [blame] | 601 | INSTANTIATE_TEST_SUITE_P(SSE2, SSE_Sum_Test, |
Salome Thirot | 4ef3627 | 2023-07-10 20:23:08 +0100 | [diff] [blame] | 602 | Combine(ValuesIn(sse_sum_sse2), |
| 603 | ValuesIn(kValidBlockSize))); |
Vishesh | 58df5b1 | 2019-09-13 11:51:26 +0530 | [diff] [blame] | 604 | #endif // HAVE_SSE2 |
| 605 | |
Vishesh | 90a4423 | 2019-09-03 16:16:53 +0530 | [diff] [blame] | 606 | #if HAVE_AVX2 |
| 607 | TestSSE_SumFuncs sse_sum_avx2[] = { TestSSE_SumFuncs( |
| 608 | &aom_get_blk_sse_sum_c, &aom_get_blk_sse_sum_avx2) }; |
Cheng Chen | 96786fe | 2020-02-14 17:28:25 -0800 | [diff] [blame] | 609 | INSTANTIATE_TEST_SUITE_P(AVX2, SSE_Sum_Test, |
Salome Thirot | 4ef3627 | 2023-07-10 20:23:08 +0100 | [diff] [blame] | 610 | Combine(ValuesIn(sse_sum_avx2), |
| 611 | ValuesIn(kValidBlockSize))); |
Vishesh | 90a4423 | 2019-09-03 16:16:53 +0530 | [diff] [blame] | 612 | #endif // HAVE_AVX2 |
Jayasanker J | f4368ac | 2020-01-08 19:54:24 +0530 | [diff] [blame] | 613 | |
Salome Thirot | edf9c79 | 2023-07-11 10:32:18 +0100 | [diff] [blame] | 614 | #if HAVE_NEON |
| 615 | TestSSE_SumFuncs sse_sum_neon[] = { TestSSE_SumFuncs( |
| 616 | &aom_get_blk_sse_sum_c, &aom_get_blk_sse_sum_neon) }; |
| 617 | INSTANTIATE_TEST_SUITE_P(NEON, SSE_Sum_Test, |
| 618 | Combine(ValuesIn(sse_sum_neon), |
| 619 | ValuesIn(kValidBlockSize))); |
| 620 | #endif // HAVE_NEON |
| 621 | |
Salome Thirot | 0ed46f6 | 2023-11-30 14:33:02 +0000 | [diff] [blame] | 622 | #if HAVE_SVE |
| 623 | TestSSE_SumFuncs sse_sum_sve[] = { TestSSE_SumFuncs(&aom_get_blk_sse_sum_c, |
| 624 | &aom_get_blk_sse_sum_sve) }; |
| 625 | INSTANTIATE_TEST_SUITE_P(SVE, SSE_Sum_Test, |
| 626 | Combine(ValuesIn(sse_sum_sve), |
| 627 | ValuesIn(kValidBlockSize))); |
| 628 | #endif // HAVE_SVE |
| 629 | |
Jayasanker J | f4368ac | 2020-01-08 19:54:24 +0530 | [diff] [blame] | 630 | ////////////////////////////////////////////////////////////////////////////// |
| 631 | // 2D Variance test functions |
| 632 | ////////////////////////////////////////////////////////////////////////////// |
| 633 | |
| 634 | typedef uint64_t (*Var2DFunc)(uint8_t *src, int stride, int width, int height); |
| 635 | typedef libaom_test::FuncParam<Var2DFunc> TestFuncVar2D; |
| 636 | |
| 637 | const uint16_t test_block_size[2] = { 128, 256 }; |
| 638 | |
| 639 | class Lowbd2dVarTest : public ::testing::TestWithParam<TestFuncVar2D> { |
| 640 | public: |
James Zern | f1fa1eb | 2023-07-25 15:34:13 -0700 | [diff] [blame] | 641 | ~Lowbd2dVarTest() override = default; |
James Zern | faa2dcf | 2023-07-24 18:29:51 -0700 | [diff] [blame] | 642 | void SetUp() override { |
Jayasanker J | f4368ac | 2020-01-08 19:54:24 +0530 | [diff] [blame] | 643 | params_ = this->GetParam(); |
| 644 | rnd_.Reset(ACMRandom::DeterministicSeed()); |
| 645 | src_ = reinterpret_cast<uint8_t *>( |
| 646 | aom_memalign(16, 512 * 512 * sizeof(uint8_t))); |
James Zern | 9dea04e | 2022-04-28 13:18:36 -0700 | [diff] [blame] | 647 | ASSERT_NE(src_, nullptr); |
Jayasanker J | f4368ac | 2020-01-08 19:54:24 +0530 | [diff] [blame] | 648 | } |
| 649 | |
James Zern | faa2dcf | 2023-07-24 18:29:51 -0700 | [diff] [blame] | 650 | void TearDown() override { aom_free(src_); } |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 651 | void RunTest(bool is_random); |
Jayasanker J | f4368ac | 2020-01-08 19:54:24 +0530 | [diff] [blame] | 652 | void RunSpeedTest(); |
| 653 | |
| 654 | void GenRandomData(int width, int height, int stride) { |
| 655 | const int msb = 7; // Up to 8 bit input |
| 656 | const int limit = 1 << (msb + 1); |
| 657 | for (int ii = 0; ii < height; ii++) { |
| 658 | for (int jj = 0; jj < width; jj++) { |
| 659 | src_[ii * stride + jj] = rnd_(limit); |
| 660 | } |
| 661 | } |
| 662 | } |
| 663 | |
| 664 | void GenExtremeData(int width, int height, int stride) { |
| 665 | const int msb = 7; // Up to 8 bit input |
| 666 | const int limit = 1 << (msb + 1); |
| 667 | const int val = rnd_(2) ? limit - 1 : 0; |
| 668 | for (int ii = 0; ii < height; ii++) { |
| 669 | for (int jj = 0; jj < width; jj++) { |
| 670 | src_[ii * stride + jj] = val; |
| 671 | } |
| 672 | } |
| 673 | } |
| 674 | |
| 675 | protected: |
| 676 | TestFuncVar2D params_; |
| 677 | uint8_t *src_; |
| 678 | ACMRandom rnd_; |
| 679 | }; |
chiyotsai | 9dfac72 | 2020-07-07 17:43:02 -0700 | [diff] [blame] | 680 | GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Lowbd2dVarTest); |
Jayasanker J | f4368ac | 2020-01-08 19:54:24 +0530 | [diff] [blame] | 681 | |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 682 | void Lowbd2dVarTest::RunTest(bool is_random) { |
Jayasanker J | f4368ac | 2020-01-08 19:54:24 +0530 | [diff] [blame] | 683 | int failed = 0; |
| 684 | for (int k = 0; k < kNumIterations; k++) { |
| 685 | const int width = 4 * (rnd_(63) + 1); // Up to 256x256 |
| 686 | const int height = 4 * (rnd_(63) + 1); // Up to 256x256 |
| 687 | int stride = 4 << rnd_(8); // Up to 512 stride |
| 688 | while (stride < width) { // Make sure it's valid |
| 689 | stride = 4 << rnd_(8); |
| 690 | } |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 691 | if (is_random) { |
Jayasanker J | f4368ac | 2020-01-08 19:54:24 +0530 | [diff] [blame] | 692 | GenRandomData(width, height, stride); |
| 693 | } else { |
| 694 | GenExtremeData(width, height, stride); |
| 695 | } |
| 696 | |
| 697 | const uint64_t res_ref = params_.ref_func(src_, stride, width, height); |
| 698 | uint64_t res_tst; |
chiyotsai | ef26106 | 2021-07-01 14:01:45 -0700 | [diff] [blame] | 699 | API_REGISTER_STATE_CHECK(res_tst = |
Jayasanker J | f4368ac | 2020-01-08 19:54:24 +0530 | [diff] [blame] | 700 | params_.tst_func(src_, stride, width, height)); |
| 701 | |
| 702 | if (!failed) { |
| 703 | failed = res_ref != res_tst; |
| 704 | EXPECT_EQ(res_ref, res_tst) |
| 705 | << "Error: Sum Squares Test [" << width << "x" << height |
| 706 | << "] C output does not match optimized output."; |
| 707 | } |
| 708 | } |
| 709 | } |
| 710 | |
| 711 | void Lowbd2dVarTest::RunSpeedTest() { |
| 712 | for (int block = 0; block < 2; block++) { |
| 713 | const int width = test_block_size[block]; |
| 714 | const int height = test_block_size[block]; |
| 715 | int stride = 4 << rnd_(8); // Up to 512 stride |
| 716 | while (stride < width) { // Make sure it's valid |
| 717 | stride = 4 << rnd_(8); |
| 718 | } |
| 719 | GenExtremeData(width, height, stride); |
| 720 | const int num_loops = 1000000000 / (width + height); |
| 721 | aom_usec_timer timer; |
| 722 | aom_usec_timer_start(&timer); |
| 723 | |
| 724 | for (int i = 0; i < num_loops; ++i) |
| 725 | params_.ref_func(src_, stride, width, height); |
| 726 | |
| 727 | aom_usec_timer_mark(&timer); |
| 728 | const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); |
| 729 | |
| 730 | aom_usec_timer timer1; |
| 731 | aom_usec_timer_start(&timer1); |
| 732 | for (int i = 0; i < num_loops; ++i) |
| 733 | params_.tst_func(src_, stride, width, height); |
| 734 | aom_usec_timer_mark(&timer1); |
| 735 | const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1)); |
| 736 | printf("%3dx%-3d: Scaling = %.2f\n", width, height, |
| 737 | (double)elapsed_time / elapsed_time1); |
| 738 | } |
| 739 | } |
| 740 | |
| 741 | TEST_P(Lowbd2dVarTest, OperationCheck) { |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 742 | RunTest(true); // GenRandomData |
Jayasanker J | f4368ac | 2020-01-08 19:54:24 +0530 | [diff] [blame] | 743 | } |
| 744 | |
| 745 | TEST_P(Lowbd2dVarTest, ExtremeValues) { |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 746 | RunTest(false); // GenExtremeData |
Jayasanker J | f4368ac | 2020-01-08 19:54:24 +0530 | [diff] [blame] | 747 | } |
| 748 | |
| 749 | TEST_P(Lowbd2dVarTest, DISABLED_Speed) { RunSpeedTest(); } |
| 750 | |
| 751 | #if HAVE_SSE2 |
| 752 | |
Cheng Chen | 96786fe | 2020-02-14 17:28:25 -0800 | [diff] [blame] | 753 | INSTANTIATE_TEST_SUITE_P(SSE2, Lowbd2dVarTest, |
| 754 | ::testing::Values(TestFuncVar2D(&aom_var_2d_u8_c, |
| 755 | &aom_var_2d_u8_sse2))); |
Jayasanker J | f4368ac | 2020-01-08 19:54:24 +0530 | [diff] [blame] | 756 | |
| 757 | #endif // HAVE_SSE2 |
| 758 | |
| 759 | #if HAVE_AVX2 |
| 760 | |
Cheng Chen | 96786fe | 2020-02-14 17:28:25 -0800 | [diff] [blame] | 761 | INSTANTIATE_TEST_SUITE_P(AVX2, Lowbd2dVarTest, |
| 762 | ::testing::Values(TestFuncVar2D(&aom_var_2d_u8_c, |
| 763 | &aom_var_2d_u8_avx2))); |
Jayasanker J | f4368ac | 2020-01-08 19:54:24 +0530 | [diff] [blame] | 764 | |
| 765 | #endif // HAVE_SSE2 |
| 766 | |
Salome Thirot | ca2ac8c | 2023-05-04 16:57:25 +0100 | [diff] [blame] | 767 | #if HAVE_NEON |
| 768 | |
| 769 | INSTANTIATE_TEST_SUITE_P(NEON, Lowbd2dVarTest, |
| 770 | ::testing::Values(TestFuncVar2D(&aom_var_2d_u8_c, |
| 771 | &aom_var_2d_u8_neon))); |
| 772 | |
| 773 | #endif // HAVE_NEON |
| 774 | |
Jonathan Wright | 366bc78 | 2023-07-19 18:00:18 +0100 | [diff] [blame] | 775 | #if HAVE_NEON_DOTPROD |
| 776 | |
| 777 | INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, Lowbd2dVarTest, |
| 778 | ::testing::Values(TestFuncVar2D( |
| 779 | &aom_var_2d_u8_c, &aom_var_2d_u8_neon_dotprod))); |
| 780 | |
| 781 | #endif // HAVE_NEON_DOTPROD |
| 782 | |
Jayasanker J | f4368ac | 2020-01-08 19:54:24 +0530 | [diff] [blame] | 783 | class Highbd2dVarTest : public ::testing::TestWithParam<TestFuncVar2D> { |
| 784 | public: |
James Zern | f1fa1eb | 2023-07-25 15:34:13 -0700 | [diff] [blame] | 785 | ~Highbd2dVarTest() override = default; |
James Zern | faa2dcf | 2023-07-24 18:29:51 -0700 | [diff] [blame] | 786 | void SetUp() override { |
Jayasanker J | f4368ac | 2020-01-08 19:54:24 +0530 | [diff] [blame] | 787 | params_ = this->GetParam(); |
| 788 | rnd_.Reset(ACMRandom::DeterministicSeed()); |
| 789 | src_ = reinterpret_cast<uint16_t *>( |
| 790 | aom_memalign(16, 512 * 512 * sizeof(uint16_t))); |
James Zern | 9dea04e | 2022-04-28 13:18:36 -0700 | [diff] [blame] | 791 | ASSERT_NE(src_, nullptr); |
Jayasanker J | f4368ac | 2020-01-08 19:54:24 +0530 | [diff] [blame] | 792 | } |
| 793 | |
James Zern | faa2dcf | 2023-07-24 18:29:51 -0700 | [diff] [blame] | 794 | void TearDown() override { aom_free(src_); } |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 795 | void RunTest(bool is_random); |
Jayasanker J | f4368ac | 2020-01-08 19:54:24 +0530 | [diff] [blame] | 796 | void RunSpeedTest(); |
| 797 | |
| 798 | void GenRandomData(int width, int height, int stride) { |
| 799 | const int msb = 11; // Up to 12 bit input |
| 800 | const int limit = 1 << (msb + 1); |
| 801 | for (int ii = 0; ii < height; ii++) { |
| 802 | for (int jj = 0; jj < width; jj++) { |
| 803 | src_[ii * stride + jj] = rnd_(limit); |
| 804 | } |
| 805 | } |
| 806 | } |
| 807 | |
| 808 | void GenExtremeData(int width, int height, int stride) { |
| 809 | const int msb = 11; // Up to 12 bit input |
| 810 | const int limit = 1 << (msb + 1); |
| 811 | const int val = rnd_(2) ? limit - 1 : 0; |
| 812 | for (int ii = 0; ii < height; ii++) { |
| 813 | for (int jj = 0; jj < width; jj++) { |
| 814 | src_[ii * stride + jj] = val; |
| 815 | } |
| 816 | } |
| 817 | } |
| 818 | |
| 819 | protected: |
| 820 | TestFuncVar2D params_; |
| 821 | uint16_t *src_; |
| 822 | ACMRandom rnd_; |
| 823 | }; |
chiyotsai | 9dfac72 | 2020-07-07 17:43:02 -0700 | [diff] [blame] | 824 | GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Highbd2dVarTest); |
Jayasanker J | f4368ac | 2020-01-08 19:54:24 +0530 | [diff] [blame] | 825 | |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 826 | void Highbd2dVarTest::RunTest(bool is_random) { |
Jayasanker J | f4368ac | 2020-01-08 19:54:24 +0530 | [diff] [blame] | 827 | int failed = 0; |
| 828 | for (int k = 0; k < kNumIterations; k++) { |
| 829 | const int width = 4 * (rnd_(63) + 1); // Up to 256x256 |
| 830 | const int height = 4 * (rnd_(63) + 1); // Up to 256x256 |
| 831 | int stride = 4 << rnd_(8); // Up to 512 stride |
| 832 | while (stride < width) { // Make sure it's valid |
| 833 | stride = 4 << rnd_(8); |
| 834 | } |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 835 | if (is_random) { |
Jayasanker J | f4368ac | 2020-01-08 19:54:24 +0530 | [diff] [blame] | 836 | GenRandomData(width, height, stride); |
| 837 | } else { |
| 838 | GenExtremeData(width, height, stride); |
| 839 | } |
| 840 | |
| 841 | const uint64_t res_ref = |
| 842 | params_.ref_func(CONVERT_TO_BYTEPTR(src_), stride, width, height); |
| 843 | uint64_t res_tst; |
chiyotsai | ef26106 | 2021-07-01 14:01:45 -0700 | [diff] [blame] | 844 | API_REGISTER_STATE_CHECK( |
Jayasanker J | f4368ac | 2020-01-08 19:54:24 +0530 | [diff] [blame] | 845 | res_tst = |
| 846 | params_.tst_func(CONVERT_TO_BYTEPTR(src_), stride, width, height)); |
| 847 | |
| 848 | if (!failed) { |
| 849 | failed = res_ref != res_tst; |
| 850 | EXPECT_EQ(res_ref, res_tst) |
| 851 | << "Error: Sum Squares Test [" << width << "x" << height |
| 852 | << "] C output does not match optimized output."; |
| 853 | } |
| 854 | } |
| 855 | } |
| 856 | |
| 857 | void Highbd2dVarTest::RunSpeedTest() { |
| 858 | for (int block = 0; block < 2; block++) { |
| 859 | const int width = test_block_size[block]; |
| 860 | const int height = test_block_size[block]; |
| 861 | int stride = 4 << rnd_(8); // Up to 512 stride |
| 862 | while (stride < width) { // Make sure it's valid |
| 863 | stride = 4 << rnd_(8); |
| 864 | } |
| 865 | GenExtremeData(width, height, stride); |
| 866 | const int num_loops = 1000000000 / (width + height); |
| 867 | aom_usec_timer timer; |
| 868 | aom_usec_timer_start(&timer); |
| 869 | |
| 870 | for (int i = 0; i < num_loops; ++i) |
| 871 | params_.ref_func(CONVERT_TO_BYTEPTR(src_), stride, width, height); |
| 872 | |
| 873 | aom_usec_timer_mark(&timer); |
| 874 | const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); |
| 875 | |
| 876 | aom_usec_timer timer1; |
| 877 | aom_usec_timer_start(&timer1); |
| 878 | for (int i = 0; i < num_loops; ++i) |
| 879 | params_.tst_func(CONVERT_TO_BYTEPTR(src_), stride, width, height); |
| 880 | aom_usec_timer_mark(&timer1); |
| 881 | const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1)); |
| 882 | printf("%3dx%-3d: Scaling = %.2f\n", width, height, |
| 883 | (double)elapsed_time / elapsed_time1); |
| 884 | } |
| 885 | } |
| 886 | |
| 887 | TEST_P(Highbd2dVarTest, OperationCheck) { |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 888 | RunTest(true); // GenRandomData |
Jayasanker J | f4368ac | 2020-01-08 19:54:24 +0530 | [diff] [blame] | 889 | } |
| 890 | |
| 891 | TEST_P(Highbd2dVarTest, ExtremeValues) { |
Wan-Teh Chang | ae75ef9 | 2023-10-03 18:01:07 -0700 | [diff] [blame] | 892 | RunTest(false); // GenExtremeData |
Jayasanker J | f4368ac | 2020-01-08 19:54:24 +0530 | [diff] [blame] | 893 | } |
| 894 | |
| 895 | TEST_P(Highbd2dVarTest, DISABLED_Speed) { RunSpeedTest(); } |
| 896 | |
| 897 | #if HAVE_SSE2 |
| 898 | |
Cheng Chen | 96786fe | 2020-02-14 17:28:25 -0800 | [diff] [blame] | 899 | INSTANTIATE_TEST_SUITE_P( |
| 900 | SSE2, Highbd2dVarTest, |
| 901 | ::testing::Values(TestFuncVar2D(&aom_var_2d_u16_c, &aom_var_2d_u16_sse2))); |
Jayasanker J | f4368ac | 2020-01-08 19:54:24 +0530 | [diff] [blame] | 902 | |
| 903 | #endif // HAVE_SSE2 |
| 904 | |
| 905 | #if HAVE_AVX2 |
| 906 | |
Cheng Chen | 96786fe | 2020-02-14 17:28:25 -0800 | [diff] [blame] | 907 | INSTANTIATE_TEST_SUITE_P( |
| 908 | AVX2, Highbd2dVarTest, |
| 909 | ::testing::Values(TestFuncVar2D(&aom_var_2d_u16_c, &aom_var_2d_u16_avx2))); |
Jayasanker J | f4368ac | 2020-01-08 19:54:24 +0530 | [diff] [blame] | 910 | |
| 911 | #endif // HAVE_SSE2 |
Salome Thirot | 62dd360 | 2023-05-04 17:40:55 +0100 | [diff] [blame] | 912 | |
| 913 | #if HAVE_NEON |
| 914 | |
| 915 | INSTANTIATE_TEST_SUITE_P( |
| 916 | NEON, Highbd2dVarTest, |
| 917 | ::testing::Values(TestFuncVar2D(&aom_var_2d_u16_c, &aom_var_2d_u16_neon))); |
| 918 | |
| 919 | #endif // HAVE_NEON |
Salome Thirot | 4022eca | 2023-12-05 12:11:11 +0000 | [diff] [blame] | 920 | |
| 921 | #if HAVE_SVE |
| 922 | |
| 923 | INSTANTIATE_TEST_SUITE_P(SVE, Highbd2dVarTest, |
| 924 | ::testing::Values(TestFuncVar2D(&aom_var_2d_u16_c, |
| 925 | &aom_var_2d_u16_sve))); |
| 926 | |
| 927 | #endif // HAVE_SVE |
Geza Lore | abd0050 | 2016-02-12 16:04:35 +0000 | [diff] [blame] | 928 | } // namespace |