Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 1 | /* |
Yaowu Xu | 2ab7ff0 | 2016-09-02 12:04:54 -0700 | [diff] [blame] | 2 | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 3 | * |
Yaowu Xu | 2ab7ff0 | 2016-09-02 12:04:54 -0700 | [diff] [blame] | 4 | * This source code is subject to the terms of the BSD 2 Clause License and |
| 5 | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| 6 | * was not distributed with this source code in the LICENSE file, you can |
| 7 | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| 8 | * Media Patent License 1.0 was not distributed with this source code in the |
| 9 | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 10 | */ |
| 11 | |
Tom Finegan | 7a07ece | 2017-02-07 17:14:05 -0800 | [diff] [blame] | 12 | #include "third_party/googletest/src/googletest/include/gtest/gtest.h" |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 13 | #include "test/acm_random.h" |
| 14 | |
| 15 | #include "test/function_equivalence_test.h" |
| 16 | #include "test/register_state_check.h" |
| 17 | |
Tom Finegan | 60e653d | 2018-05-22 11:34:58 -0700 | [diff] [blame] | 18 | #include "config/aom_config.h" |
Tom Finegan | 44702c8 | 2018-05-22 13:00:39 -0700 | [diff] [blame] | 19 | #include "config/aom_dsp_rtcd.h" |
Tom Finegan | 60e653d | 2018-05-22 11:34:58 -0700 | [diff] [blame] | 20 | |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 21 | #include "aom/aom_integer.h" |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 22 | |
| 23 | #define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE) |
| 24 | |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 25 | using libaom_test::ACMRandom; |
| 26 | using libaom_test::FunctionEquivalenceTest; |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 27 | |
| 28 | namespace { |
| 29 | |
| 30 | static const int kIterations = 1000; |
| 31 | static const int kMaskMax = 64; |
| 32 | |
| 33 | typedef unsigned int (*ObmcVarF)(const uint8_t *pre, int pre_stride, |
| 34 | const int32_t *wsrc, const int32_t *mask, |
| 35 | unsigned int *sse); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 36 | typedef libaom_test::FuncParam<ObmcVarF> TestFuncs; |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 37 | |
| 38 | //////////////////////////////////////////////////////////////////////////////// |
| 39 | // 8 bit |
| 40 | //////////////////////////////////////////////////////////////////////////////// |
| 41 | |
| 42 | class ObmcVarianceTest : public FunctionEquivalenceTest<ObmcVarF> {}; |
chiyotsai | 9dfac72 | 2020-07-07 17:43:02 -0700 | [diff] [blame] | 43 | GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(ObmcVarianceTest); |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 44 | |
| 45 | TEST_P(ObmcVarianceTest, RandomValues) { |
| 46 | DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]); |
| 47 | DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]); |
| 48 | DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]); |
| 49 | |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 50 | for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 51 | const int pre_stride = this->rng_(MAX_SB_SIZE + 1); |
| 52 | |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 53 | for (int i = 0; i < MAX_SB_SQUARE; ++i) { |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 54 | pre[i] = this->rng_.Rand8(); |
| 55 | wsrc[i] = this->rng_.Rand8() * this->rng_(kMaskMax * kMaskMax + 1); |
| 56 | mask[i] = this->rng_(kMaskMax * kMaskMax + 1); |
| 57 | } |
| 58 | |
| 59 | unsigned int ref_sse, tst_sse; |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 60 | const unsigned int ref_res = |
| 61 | params_.ref_func(pre, pre_stride, wsrc, mask, &ref_sse); |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 62 | unsigned int tst_res; |
chiyotsai | ef26106 | 2021-07-01 14:01:45 -0700 | [diff] [blame] | 63 | API_REGISTER_STATE_CHECK( |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 64 | tst_res = params_.tst_func(pre, pre_stride, wsrc, mask, &tst_sse)); |
| 65 | |
| 66 | ASSERT_EQ(ref_res, tst_res); |
| 67 | ASSERT_EQ(ref_sse, tst_sse); |
| 68 | } |
| 69 | } |
| 70 | |
| 71 | TEST_P(ObmcVarianceTest, ExtremeValues) { |
| 72 | DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]); |
| 73 | DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]); |
| 74 | DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]); |
| 75 | |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 76 | for (int iter = 0; iter < MAX_SB_SIZE && !HasFatalFailure(); ++iter) { |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 77 | const int pre_stride = iter; |
| 78 | |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 79 | for (int i = 0; i < MAX_SB_SQUARE; ++i) { |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 80 | pre[i] = UINT8_MAX; |
| 81 | wsrc[i] = UINT8_MAX * kMaskMax * kMaskMax; |
| 82 | mask[i] = kMaskMax * kMaskMax; |
| 83 | } |
| 84 | |
| 85 | unsigned int ref_sse, tst_sse; |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 86 | const unsigned int ref_res = |
| 87 | params_.ref_func(pre, pre_stride, wsrc, mask, &ref_sse); |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 88 | unsigned int tst_res; |
chiyotsai | ef26106 | 2021-07-01 14:01:45 -0700 | [diff] [blame] | 89 | API_REGISTER_STATE_CHECK( |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 90 | tst_res = params_.tst_func(pre, pre_stride, wsrc, mask, &tst_sse)); |
| 91 | |
| 92 | ASSERT_EQ(ref_res, tst_res); |
| 93 | ASSERT_EQ(ref_sse, tst_sse); |
| 94 | } |
| 95 | } |
| 96 | |
Venkat | f921344 | 2018-08-03 17:39:58 +0530 | [diff] [blame] | 97 | TEST_P(ObmcVarianceTest, DISABLED_Speed) { |
| 98 | DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]); |
| 99 | DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]); |
| 100 | DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]); |
| 101 | |
| 102 | const int pre_stride = this->rng_(MAX_SB_SIZE + 1); |
| 103 | |
| 104 | for (int i = 0; i < MAX_SB_SQUARE; ++i) { |
| 105 | pre[i] = this->rng_.Rand8(); |
| 106 | wsrc[i] = this->rng_.Rand8() * this->rng_(kMaskMax * kMaskMax + 1); |
| 107 | mask[i] = this->rng_(kMaskMax * kMaskMax + 1); |
| 108 | } |
| 109 | |
| 110 | const int num_loops = 1000000; |
| 111 | unsigned int ref_sse, tst_sse; |
| 112 | aom_usec_timer ref_timer, test_timer; |
| 113 | |
| 114 | aom_usec_timer_start(&ref_timer); |
| 115 | for (int i = 0; i < num_loops; ++i) { |
| 116 | params_.ref_func(pre, pre_stride, wsrc, mask, &ref_sse); |
| 117 | } |
| 118 | aom_usec_timer_mark(&ref_timer); |
| 119 | const int elapsed_time_c = |
| 120 | static_cast<int>(aom_usec_timer_elapsed(&ref_timer)); |
| 121 | |
| 122 | aom_usec_timer_start(&test_timer); |
| 123 | for (int i = 0; i < num_loops; ++i) { |
| 124 | params_.tst_func(pre, pre_stride, wsrc, mask, &tst_sse); |
| 125 | } |
| 126 | aom_usec_timer_mark(&test_timer); |
| 127 | const int elapsed_time_simd = |
| 128 | static_cast<int>(aom_usec_timer_elapsed(&test_timer)); |
| 129 | |
James Zern | b70593a | 2023-01-23 13:41:00 -0800 | [diff] [blame] | 130 | printf("c_time=%d \t simd_time=%d \t gain=%f \n", elapsed_time_c, |
| 131 | elapsed_time_simd, |
| 132 | static_cast<double>(elapsed_time_c) / elapsed_time_simd); |
Venkat | f921344 | 2018-08-03 17:39:58 +0530 | [diff] [blame] | 133 | } |
| 134 | |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 135 | #if HAVE_SSE4_1 |
| 136 | const ObmcVarianceTest::ParamType sse4_functions[] = { |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 137 | TestFuncs(aom_obmc_variance128x128_c, aom_obmc_variance128x128_sse4_1), |
| 138 | TestFuncs(aom_obmc_variance128x64_c, aom_obmc_variance128x64_sse4_1), |
| 139 | TestFuncs(aom_obmc_variance64x128_c, aom_obmc_variance64x128_sse4_1), |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 140 | TestFuncs(aom_obmc_variance64x64_c, aom_obmc_variance64x64_sse4_1), |
| 141 | TestFuncs(aom_obmc_variance64x32_c, aom_obmc_variance64x32_sse4_1), |
| 142 | TestFuncs(aom_obmc_variance32x64_c, aom_obmc_variance32x64_sse4_1), |
| 143 | TestFuncs(aom_obmc_variance32x32_c, aom_obmc_variance32x32_sse4_1), |
| 144 | TestFuncs(aom_obmc_variance32x16_c, aom_obmc_variance32x16_sse4_1), |
| 145 | TestFuncs(aom_obmc_variance16x32_c, aom_obmc_variance16x32_sse4_1), |
| 146 | TestFuncs(aom_obmc_variance16x16_c, aom_obmc_variance16x16_sse4_1), |
| 147 | TestFuncs(aom_obmc_variance16x8_c, aom_obmc_variance16x8_sse4_1), |
| 148 | TestFuncs(aom_obmc_variance8x16_c, aom_obmc_variance8x16_sse4_1), |
| 149 | TestFuncs(aom_obmc_variance8x8_c, aom_obmc_variance8x8_sse4_1), |
| 150 | TestFuncs(aom_obmc_variance8x4_c, aom_obmc_variance8x4_sse4_1), |
| 151 | TestFuncs(aom_obmc_variance4x8_c, aom_obmc_variance4x8_sse4_1), |
Debargha Mukherjee | 5427c0c | 2019-07-17 10:49:09 -0700 | [diff] [blame] | 152 | TestFuncs(aom_obmc_variance4x4_c, aom_obmc_variance4x4_sse4_1), |
| 153 | |
| 154 | TestFuncs(aom_obmc_variance64x16_c, aom_obmc_variance64x16_sse4_1), |
| 155 | TestFuncs(aom_obmc_variance16x64_c, aom_obmc_variance16x64_sse4_1), |
| 156 | TestFuncs(aom_obmc_variance32x8_c, aom_obmc_variance32x8_sse4_1), |
| 157 | TestFuncs(aom_obmc_variance8x32_c, aom_obmc_variance8x32_sse4_1), |
| 158 | TestFuncs(aom_obmc_variance16x4_c, aom_obmc_variance16x4_sse4_1), |
| 159 | TestFuncs(aom_obmc_variance4x16_c, aom_obmc_variance4x16_sse4_1), |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 160 | }; |
| 161 | |
Cheng Chen | 96786fe | 2020-02-14 17:28:25 -0800 | [diff] [blame] | 162 | INSTANTIATE_TEST_SUITE_P(SSE4_1, ObmcVarianceTest, |
| 163 | ::testing::ValuesIn(sse4_functions)); |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 164 | #endif // HAVE_SSE4_1 |
| 165 | |
Venkat | f921344 | 2018-08-03 17:39:58 +0530 | [diff] [blame] | 166 | #if HAVE_AVX2 |
| 167 | const ObmcVarianceTest::ParamType avx2_functions[] = { |
| 168 | TestFuncs(aom_obmc_variance128x128_c, aom_obmc_variance128x128_avx2), |
| 169 | TestFuncs(aom_obmc_variance128x64_c, aom_obmc_variance128x64_avx2), |
| 170 | TestFuncs(aom_obmc_variance64x128_c, aom_obmc_variance64x128_avx2), |
| 171 | TestFuncs(aom_obmc_variance64x64_c, aom_obmc_variance64x64_avx2), |
| 172 | TestFuncs(aom_obmc_variance64x32_c, aom_obmc_variance64x32_avx2), |
| 173 | TestFuncs(aom_obmc_variance32x64_c, aom_obmc_variance32x64_avx2), |
| 174 | TestFuncs(aom_obmc_variance32x32_c, aom_obmc_variance32x32_avx2), |
| 175 | TestFuncs(aom_obmc_variance32x16_c, aom_obmc_variance32x16_avx2), |
| 176 | TestFuncs(aom_obmc_variance16x32_c, aom_obmc_variance16x32_avx2), |
| 177 | TestFuncs(aom_obmc_variance16x16_c, aom_obmc_variance16x16_avx2), |
| 178 | TestFuncs(aom_obmc_variance16x8_c, aom_obmc_variance16x8_avx2), |
| 179 | TestFuncs(aom_obmc_variance8x16_c, aom_obmc_variance8x16_avx2), |
| 180 | TestFuncs(aom_obmc_variance8x8_c, aom_obmc_variance8x8_avx2), |
| 181 | TestFuncs(aom_obmc_variance8x4_c, aom_obmc_variance8x4_avx2), |
Debargha Mukherjee | 5427c0c | 2019-07-17 10:49:09 -0700 | [diff] [blame] | 182 | TestFuncs(aom_obmc_variance4x8_c, aom_obmc_variance4x8_avx2), |
| 183 | TestFuncs(aom_obmc_variance4x4_c, aom_obmc_variance4x4_avx2), |
| 184 | |
| 185 | TestFuncs(aom_obmc_variance64x16_c, aom_obmc_variance64x16_avx2), |
| 186 | TestFuncs(aom_obmc_variance16x64_c, aom_obmc_variance16x64_avx2), |
| 187 | TestFuncs(aom_obmc_variance32x8_c, aom_obmc_variance32x8_avx2), |
| 188 | TestFuncs(aom_obmc_variance8x32_c, aom_obmc_variance8x32_avx2), |
| 189 | TestFuncs(aom_obmc_variance16x4_c, aom_obmc_variance16x4_avx2), |
| 190 | TestFuncs(aom_obmc_variance4x16_c, aom_obmc_variance4x16_avx2), |
Venkat | f921344 | 2018-08-03 17:39:58 +0530 | [diff] [blame] | 191 | }; |
| 192 | |
Cheng Chen | 96786fe | 2020-02-14 17:28:25 -0800 | [diff] [blame] | 193 | INSTANTIATE_TEST_SUITE_P(AVX2, ObmcVarianceTest, |
| 194 | ::testing::ValuesIn(avx2_functions)); |
Venkat | f921344 | 2018-08-03 17:39:58 +0530 | [diff] [blame] | 195 | #endif // HAVE_AVX2 |
| 196 | |
George Steed | 25c8479 | 2023-01-07 12:53:25 +0000 | [diff] [blame] | 197 | #if HAVE_NEON |
| 198 | const ObmcVarianceTest::ParamType neon_functions[] = { |
| 199 | TestFuncs(aom_obmc_variance128x128_c, aom_obmc_variance128x128_neon), |
| 200 | TestFuncs(aom_obmc_variance128x64_c, aom_obmc_variance128x64_neon), |
| 201 | TestFuncs(aom_obmc_variance64x128_c, aom_obmc_variance64x128_neon), |
| 202 | TestFuncs(aom_obmc_variance64x64_c, aom_obmc_variance64x64_neon), |
| 203 | TestFuncs(aom_obmc_variance64x32_c, aom_obmc_variance64x32_neon), |
| 204 | TestFuncs(aom_obmc_variance32x64_c, aom_obmc_variance32x64_neon), |
| 205 | TestFuncs(aom_obmc_variance32x32_c, aom_obmc_variance32x32_neon), |
| 206 | TestFuncs(aom_obmc_variance32x16_c, aom_obmc_variance32x16_neon), |
| 207 | TestFuncs(aom_obmc_variance16x32_c, aom_obmc_variance16x32_neon), |
| 208 | TestFuncs(aom_obmc_variance16x16_c, aom_obmc_variance16x16_neon), |
| 209 | TestFuncs(aom_obmc_variance16x8_c, aom_obmc_variance16x8_neon), |
| 210 | TestFuncs(aom_obmc_variance8x16_c, aom_obmc_variance8x16_neon), |
| 211 | TestFuncs(aom_obmc_variance8x8_c, aom_obmc_variance8x8_neon), |
| 212 | TestFuncs(aom_obmc_variance8x4_c, aom_obmc_variance8x4_neon), |
| 213 | TestFuncs(aom_obmc_variance4x8_c, aom_obmc_variance4x8_neon), |
| 214 | TestFuncs(aom_obmc_variance4x4_c, aom_obmc_variance4x4_neon), |
| 215 | |
| 216 | TestFuncs(aom_obmc_variance64x16_c, aom_obmc_variance64x16_neon), |
| 217 | TestFuncs(aom_obmc_variance16x64_c, aom_obmc_variance16x64_neon), |
| 218 | TestFuncs(aom_obmc_variance32x8_c, aom_obmc_variance32x8_neon), |
| 219 | TestFuncs(aom_obmc_variance8x32_c, aom_obmc_variance8x32_neon), |
| 220 | TestFuncs(aom_obmc_variance16x4_c, aom_obmc_variance16x4_neon), |
| 221 | TestFuncs(aom_obmc_variance4x16_c, aom_obmc_variance4x16_neon), |
| 222 | }; |
| 223 | |
| 224 | INSTANTIATE_TEST_SUITE_P(NEON, ObmcVarianceTest, |
| 225 | ::testing::ValuesIn(neon_functions)); |
| 226 | #endif // HAVE_NEON |
| 227 | |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 228 | //////////////////////////////////////////////////////////////////////////////// |
| 229 | // High bit-depth |
| 230 | //////////////////////////////////////////////////////////////////////////////// |
Jerome Jiang | fa1d173 | 2019-08-06 10:31:20 -0700 | [diff] [blame] | 231 | #if CONFIG_AV1_HIGHBITDEPTH |
Jingning Han | a387b19 | 2016-07-14 10:11:32 -0700 | [diff] [blame] | 232 | class ObmcVarianceHBDTest : public FunctionEquivalenceTest<ObmcVarF> {}; |
chiyotsai | 9dfac72 | 2020-07-07 17:43:02 -0700 | [diff] [blame] | 233 | GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(ObmcVarianceHBDTest); |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 234 | |
| 235 | TEST_P(ObmcVarianceHBDTest, RandomValues) { |
| 236 | DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]); |
| 237 | DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]); |
| 238 | DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]); |
| 239 | |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 240 | for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 241 | const int pre_stride = this->rng_(MAX_SB_SIZE + 1); |
| 242 | |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 243 | for (int i = 0; i < MAX_SB_SQUARE; ++i) { |
Jingning Han | a387b19 | 2016-07-14 10:11:32 -0700 | [diff] [blame] | 244 | pre[i] = this->rng_(1 << params_.bit_depth); |
| 245 | wsrc[i] = this->rng_(1 << params_.bit_depth) * |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 246 | this->rng_(kMaskMax * kMaskMax + 1); |
| 247 | mask[i] = this->rng_(kMaskMax * kMaskMax + 1); |
| 248 | } |
| 249 | |
| 250 | unsigned int ref_sse, tst_sse; |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 251 | const unsigned int ref_res = params_.ref_func( |
| 252 | CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask, &ref_sse); |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 253 | unsigned int tst_res; |
chiyotsai | ef26106 | 2021-07-01 14:01:45 -0700 | [diff] [blame] | 254 | API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(CONVERT_TO_BYTEPTR(pre), |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 255 | pre_stride, wsrc, mask, |
| 256 | &tst_sse)); |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 257 | |
| 258 | ASSERT_EQ(ref_res, tst_res); |
| 259 | ASSERT_EQ(ref_sse, tst_sse); |
| 260 | } |
| 261 | } |
| 262 | |
| 263 | TEST_P(ObmcVarianceHBDTest, ExtremeValues) { |
| 264 | DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]); |
| 265 | DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]); |
| 266 | DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]); |
| 267 | |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 268 | for (int iter = 0; iter < MAX_SB_SIZE && !HasFatalFailure(); ++iter) { |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 269 | const int pre_stride = iter; |
| 270 | |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 271 | for (int i = 0; i < MAX_SB_SQUARE; ++i) { |
Jingning Han | a387b19 | 2016-07-14 10:11:32 -0700 | [diff] [blame] | 272 | pre[i] = (1 << params_.bit_depth) - 1; |
| 273 | wsrc[i] = ((1 << params_.bit_depth) - 1) * kMaskMax * kMaskMax; |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 274 | mask[i] = kMaskMax * kMaskMax; |
| 275 | } |
| 276 | |
| 277 | unsigned int ref_sse, tst_sse; |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 278 | const unsigned int ref_res = params_.ref_func( |
| 279 | CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask, &ref_sse); |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 280 | unsigned int tst_res; |
chiyotsai | ef26106 | 2021-07-01 14:01:45 -0700 | [diff] [blame] | 281 | API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(CONVERT_TO_BYTEPTR(pre), |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 282 | pre_stride, wsrc, mask, |
| 283 | &tst_sse)); |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 284 | |
| 285 | ASSERT_EQ(ref_res, tst_res); |
| 286 | ASSERT_EQ(ref_sse, tst_sse); |
| 287 | } |
| 288 | } |
| 289 | |
| 290 | #if HAVE_SSE4_1 |
| 291 | ObmcVarianceHBDTest::ParamType sse4_functions_hbd[] = { |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 292 | TestFuncs(aom_highbd_obmc_variance128x128_c, |
| 293 | aom_highbd_obmc_variance128x128_sse4_1, 8), |
| 294 | TestFuncs(aom_highbd_obmc_variance128x64_c, |
| 295 | aom_highbd_obmc_variance128x64_sse4_1, 8), |
| 296 | TestFuncs(aom_highbd_obmc_variance64x128_c, |
| 297 | aom_highbd_obmc_variance64x128_sse4_1, 8), |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 298 | TestFuncs(aom_highbd_obmc_variance64x64_c, |
| 299 | aom_highbd_obmc_variance64x64_sse4_1, 8), |
| 300 | TestFuncs(aom_highbd_obmc_variance64x32_c, |
| 301 | aom_highbd_obmc_variance64x32_sse4_1, 8), |
| 302 | TestFuncs(aom_highbd_obmc_variance32x64_c, |
| 303 | aom_highbd_obmc_variance32x64_sse4_1, 8), |
| 304 | TestFuncs(aom_highbd_obmc_variance32x32_c, |
| 305 | aom_highbd_obmc_variance32x32_sse4_1, 8), |
| 306 | TestFuncs(aom_highbd_obmc_variance32x16_c, |
| 307 | aom_highbd_obmc_variance32x16_sse4_1, 8), |
| 308 | TestFuncs(aom_highbd_obmc_variance16x32_c, |
| 309 | aom_highbd_obmc_variance16x32_sse4_1, 8), |
| 310 | TestFuncs(aom_highbd_obmc_variance16x16_c, |
| 311 | aom_highbd_obmc_variance16x16_sse4_1, 8), |
| 312 | TestFuncs(aom_highbd_obmc_variance16x8_c, aom_highbd_obmc_variance16x8_sse4_1, |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 313 | 8), |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 314 | TestFuncs(aom_highbd_obmc_variance8x16_c, aom_highbd_obmc_variance8x16_sse4_1, |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 315 | 8), |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 316 | TestFuncs(aom_highbd_obmc_variance8x8_c, aom_highbd_obmc_variance8x8_sse4_1, |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 317 | 8), |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 318 | TestFuncs(aom_highbd_obmc_variance8x4_c, aom_highbd_obmc_variance8x4_sse4_1, |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 319 | 8), |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 320 | TestFuncs(aom_highbd_obmc_variance4x8_c, aom_highbd_obmc_variance4x8_sse4_1, |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 321 | 8), |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 322 | TestFuncs(aom_highbd_obmc_variance4x4_c, aom_highbd_obmc_variance4x4_sse4_1, |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 323 | 8), |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 324 | TestFuncs(aom_highbd_10_obmc_variance128x128_c, |
| 325 | aom_highbd_10_obmc_variance128x128_sse4_1, 10), |
| 326 | TestFuncs(aom_highbd_10_obmc_variance128x64_c, |
| 327 | aom_highbd_10_obmc_variance128x64_sse4_1, 10), |
| 328 | TestFuncs(aom_highbd_10_obmc_variance64x128_c, |
| 329 | aom_highbd_10_obmc_variance64x128_sse4_1, 10), |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 330 | TestFuncs(aom_highbd_10_obmc_variance64x64_c, |
| 331 | aom_highbd_10_obmc_variance64x64_sse4_1, 10), |
| 332 | TestFuncs(aom_highbd_10_obmc_variance64x32_c, |
| 333 | aom_highbd_10_obmc_variance64x32_sse4_1, 10), |
| 334 | TestFuncs(aom_highbd_10_obmc_variance32x64_c, |
| 335 | aom_highbd_10_obmc_variance32x64_sse4_1, 10), |
| 336 | TestFuncs(aom_highbd_10_obmc_variance32x32_c, |
| 337 | aom_highbd_10_obmc_variance32x32_sse4_1, 10), |
| 338 | TestFuncs(aom_highbd_10_obmc_variance32x16_c, |
| 339 | aom_highbd_10_obmc_variance32x16_sse4_1, 10), |
| 340 | TestFuncs(aom_highbd_10_obmc_variance16x32_c, |
| 341 | aom_highbd_10_obmc_variance16x32_sse4_1, 10), |
| 342 | TestFuncs(aom_highbd_10_obmc_variance16x16_c, |
| 343 | aom_highbd_10_obmc_variance16x16_sse4_1, 10), |
| 344 | TestFuncs(aom_highbd_10_obmc_variance16x8_c, |
| 345 | aom_highbd_10_obmc_variance16x8_sse4_1, 10), |
| 346 | TestFuncs(aom_highbd_10_obmc_variance8x16_c, |
| 347 | aom_highbd_10_obmc_variance8x16_sse4_1, 10), |
| 348 | TestFuncs(aom_highbd_10_obmc_variance8x8_c, |
| 349 | aom_highbd_10_obmc_variance8x8_sse4_1, 10), |
| 350 | TestFuncs(aom_highbd_10_obmc_variance8x4_c, |
| 351 | aom_highbd_10_obmc_variance8x4_sse4_1, 10), |
| 352 | TestFuncs(aom_highbd_10_obmc_variance4x8_c, |
| 353 | aom_highbd_10_obmc_variance4x8_sse4_1, 10), |
| 354 | TestFuncs(aom_highbd_10_obmc_variance4x4_c, |
| 355 | aom_highbd_10_obmc_variance4x4_sse4_1, 10), |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 356 | TestFuncs(aom_highbd_12_obmc_variance128x128_c, |
| 357 | aom_highbd_12_obmc_variance128x128_sse4_1, 12), |
| 358 | TestFuncs(aom_highbd_12_obmc_variance128x64_c, |
| 359 | aom_highbd_12_obmc_variance128x64_sse4_1, 12), |
| 360 | TestFuncs(aom_highbd_12_obmc_variance64x128_c, |
| 361 | aom_highbd_12_obmc_variance64x128_sse4_1, 12), |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 362 | TestFuncs(aom_highbd_12_obmc_variance64x64_c, |
| 363 | aom_highbd_12_obmc_variance64x64_sse4_1, 12), |
| 364 | TestFuncs(aom_highbd_12_obmc_variance64x32_c, |
| 365 | aom_highbd_12_obmc_variance64x32_sse4_1, 12), |
| 366 | TestFuncs(aom_highbd_12_obmc_variance32x64_c, |
| 367 | aom_highbd_12_obmc_variance32x64_sse4_1, 12), |
| 368 | TestFuncs(aom_highbd_12_obmc_variance32x32_c, |
| 369 | aom_highbd_12_obmc_variance32x32_sse4_1, 12), |
| 370 | TestFuncs(aom_highbd_12_obmc_variance32x16_c, |
| 371 | aom_highbd_12_obmc_variance32x16_sse4_1, 12), |
| 372 | TestFuncs(aom_highbd_12_obmc_variance16x32_c, |
| 373 | aom_highbd_12_obmc_variance16x32_sse4_1, 12), |
| 374 | TestFuncs(aom_highbd_12_obmc_variance16x16_c, |
| 375 | aom_highbd_12_obmc_variance16x16_sse4_1, 12), |
| 376 | TestFuncs(aom_highbd_12_obmc_variance16x8_c, |
| 377 | aom_highbd_12_obmc_variance16x8_sse4_1, 12), |
| 378 | TestFuncs(aom_highbd_12_obmc_variance8x16_c, |
| 379 | aom_highbd_12_obmc_variance8x16_sse4_1, 12), |
| 380 | TestFuncs(aom_highbd_12_obmc_variance8x8_c, |
| 381 | aom_highbd_12_obmc_variance8x8_sse4_1, 12), |
| 382 | TestFuncs(aom_highbd_12_obmc_variance8x4_c, |
| 383 | aom_highbd_12_obmc_variance8x4_sse4_1, 12), |
| 384 | TestFuncs(aom_highbd_12_obmc_variance4x8_c, |
| 385 | aom_highbd_12_obmc_variance4x8_sse4_1, 12), |
| 386 | TestFuncs(aom_highbd_12_obmc_variance4x4_c, |
Debargha Mukherjee | 5427c0c | 2019-07-17 10:49:09 -0700 | [diff] [blame] | 387 | aom_highbd_12_obmc_variance4x4_sse4_1, 12), |
| 388 | |
| 389 | TestFuncs(aom_highbd_obmc_variance64x16_c, |
| 390 | aom_highbd_obmc_variance64x16_sse4_1, 8), |
| 391 | TestFuncs(aom_highbd_obmc_variance16x64_c, |
| 392 | aom_highbd_obmc_variance16x64_sse4_1, 8), |
| 393 | TestFuncs(aom_highbd_obmc_variance32x8_c, aom_highbd_obmc_variance32x8_sse4_1, |
| 394 | 8), |
| 395 | TestFuncs(aom_highbd_obmc_variance8x32_c, aom_highbd_obmc_variance8x32_sse4_1, |
| 396 | 8), |
| 397 | TestFuncs(aom_highbd_obmc_variance16x4_c, aom_highbd_obmc_variance16x4_sse4_1, |
| 398 | 8), |
| 399 | TestFuncs(aom_highbd_obmc_variance4x16_c, aom_highbd_obmc_variance4x16_sse4_1, |
| 400 | 8), |
| 401 | TestFuncs(aom_highbd_10_obmc_variance64x16_c, |
| 402 | aom_highbd_10_obmc_variance64x16_sse4_1, 10), |
| 403 | TestFuncs(aom_highbd_10_obmc_variance16x64_c, |
| 404 | aom_highbd_10_obmc_variance16x64_sse4_1, 10), |
| 405 | TestFuncs(aom_highbd_10_obmc_variance32x8_c, |
| 406 | aom_highbd_10_obmc_variance32x8_sse4_1, 10), |
| 407 | TestFuncs(aom_highbd_10_obmc_variance8x32_c, |
| 408 | aom_highbd_10_obmc_variance8x32_sse4_1, 10), |
| 409 | TestFuncs(aom_highbd_10_obmc_variance16x4_c, |
| 410 | aom_highbd_10_obmc_variance16x4_sse4_1, 10), |
| 411 | TestFuncs(aom_highbd_10_obmc_variance4x16_c, |
| 412 | aom_highbd_10_obmc_variance4x16_sse4_1, 10), |
| 413 | TestFuncs(aom_highbd_12_obmc_variance64x16_c, |
| 414 | aom_highbd_12_obmc_variance64x16_sse4_1, 12), |
| 415 | TestFuncs(aom_highbd_12_obmc_variance16x64_c, |
| 416 | aom_highbd_12_obmc_variance16x64_sse4_1, 12), |
| 417 | TestFuncs(aom_highbd_12_obmc_variance32x8_c, |
| 418 | aom_highbd_12_obmc_variance32x8_sse4_1, 12), |
| 419 | TestFuncs(aom_highbd_12_obmc_variance8x32_c, |
| 420 | aom_highbd_12_obmc_variance8x32_sse4_1, 12), |
| 421 | TestFuncs(aom_highbd_12_obmc_variance16x4_c, |
| 422 | aom_highbd_12_obmc_variance16x4_sse4_1, 12), |
| 423 | TestFuncs(aom_highbd_12_obmc_variance4x16_c, |
| 424 | aom_highbd_12_obmc_variance4x16_sse4_1, 12), |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 425 | }; |
| 426 | |
Cheng Chen | 96786fe | 2020-02-14 17:28:25 -0800 | [diff] [blame] | 427 | INSTANTIATE_TEST_SUITE_P(SSE4_1, ObmcVarianceHBDTest, |
| 428 | ::testing::ValuesIn(sse4_functions_hbd)); |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 429 | #endif // HAVE_SSE4_1 |
Jerome Jiang | fa1d173 | 2019-08-06 10:31:20 -0700 | [diff] [blame] | 430 | #endif // CONFIG_AV1_HIGHBITDEPTH |
Geza Lore | ebc2d34 | 2016-07-12 11:41:54 +0100 | [diff] [blame] | 431 | } // namespace |