Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 1 | /* |
James Zern | b7c05bd | 2024-06-11 19:15:10 -0700 | [diff] [blame^] | 2 | * Copyright (c) 2016, Alliance for Open Media. All rights reserved. |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 3 | * |
Yaowu Xu | 2ab7ff0 | 2016-09-02 12:04:54 -0700 | [diff] [blame] | 4 | * This source code is subject to the terms of the BSD 2 Clause License and |
| 5 | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| 6 | * was not distributed with this source code in the LICENSE file, you can |
| 7 | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| 8 | * Media Patent License 1.0 was not distributed with this source code in the |
| 9 | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 10 | */ |
| 11 | |
| 12 | #include <math.h> |
| 13 | #include <stdlib.h> |
| 14 | #include <string.h> |
| 15 | |
Tom Finegan | 7a07ece | 2017-02-07 17:14:05 -0800 | [diff] [blame] | 16 | #include "third_party/googletest/src/googletest/include/gtest/gtest.h" |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 17 | #include "test/register_state_check.h" |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 18 | #include "test/function_equivalence_test.h" |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 19 | |
Tom Finegan | 60e653d | 2018-05-22 11:34:58 -0700 | [diff] [blame] | 20 | #include "config/aom_config.h" |
Tom Finegan | 44702c8 | 2018-05-22 13:00:39 -0700 | [diff] [blame] | 21 | #include "config/aom_dsp_rtcd.h" |
| 22 | #include "config/av1_rtcd.h" |
Tom Finegan | 60e653d | 2018-05-22 11:34:58 -0700 | [diff] [blame] | 23 | |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 24 | #include "aom/aom_integer.h" |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 25 | |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 26 | #include "av1/common/enums.h" |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 27 | |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 28 | #include "aom_dsp/blend.h" |
Geza Lore | bfa59b4 | 2016-07-11 12:43:47 +0100 | [diff] [blame] | 29 | |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 30 | using libaom_test::FunctionEquivalenceTest; |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 31 | |
| 32 | namespace { |
| 33 | |
Scott LaVarnway | 3092e71 | 2018-04-24 10:47:15 -0700 | [diff] [blame] | 34 | template <typename BlendA64Func, typename SrcPixel, typename DstPixel> |
| 35 | class BlendA64MaskTest : public FunctionEquivalenceTest<BlendA64Func> { |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 36 | protected: |
Geza Lore | e6f8c17 | 2016-07-06 15:54:29 +0100 | [diff] [blame] | 37 | static const int kIterations = 10000; |
| 38 | static const int kMaxWidth = MAX_SB_SIZE * 5; // * 5 to cover longer strides |
| 39 | static const int kMaxHeight = MAX_SB_SIZE; |
| 40 | static const int kBufSize = kMaxWidth * kMaxHeight; |
| 41 | static const int kMaxMaskWidth = 2 * MAX_SB_SIZE; |
| 42 | static const int kMaxMaskSize = kMaxMaskWidth * kMaxMaskWidth; |
| 43 | |
James Zern | f1fa1eb | 2023-07-25 15:34:13 -0700 | [diff] [blame] | 44 | ~BlendA64MaskTest() override = default; |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 45 | |
Xing Jin | de2b711 | 2018-08-08 19:44:15 +0800 | [diff] [blame] | 46 | virtual void Execute(const SrcPixel *p_src0, const SrcPixel *p_src1, |
| 47 | int run_times) = 0; |
Scott LaVarnway | 3092e71 | 2018-04-24 10:47:15 -0700 | [diff] [blame] | 48 | |
| 49 | template <typename Pixel> |
Xing Jin | de2b711 | 2018-08-08 19:44:15 +0800 | [diff] [blame] | 50 | void GetSources(Pixel **src0, Pixel **src1, Pixel * /*dst*/, int run_times) { |
| 51 | if (run_times > 1) { |
| 52 | *src0 = src0_; |
| 53 | *src1 = src1_; |
| 54 | return; |
| 55 | } |
Scott LaVarnway | 3092e71 | 2018-04-24 10:47:15 -0700 | [diff] [blame] | 56 | switch (this->rng_(3)) { |
| 57 | case 0: // Separate sources |
| 58 | *src0 = src0_; |
| 59 | *src1 = src1_; |
| 60 | break; |
| 61 | case 1: // src0 == dst |
| 62 | *src0 = dst_tst_; |
| 63 | src0_stride_ = dst_stride_; |
| 64 | src0_offset_ = dst_offset_; |
| 65 | *src1 = src1_; |
| 66 | break; |
| 67 | case 2: // src1 == dst |
| 68 | *src0 = src0_; |
| 69 | *src1 = dst_tst_; |
| 70 | src1_stride_ = dst_stride_; |
| 71 | src1_offset_ = dst_offset_; |
| 72 | break; |
| 73 | default: FAIL(); |
| 74 | } |
| 75 | } |
| 76 | |
Xing Jin | de2b711 | 2018-08-08 19:44:15 +0800 | [diff] [blame] | 77 | void GetSources(uint16_t **src0, uint16_t **src1, uint8_t * /*dst*/, |
| 78 | int /*run_times*/) { |
Scott LaVarnway | 3092e71 | 2018-04-24 10:47:15 -0700 | [diff] [blame] | 79 | *src0 = src0_; |
| 80 | *src1 = src1_; |
| 81 | } |
| 82 | |
| 83 | uint8_t Rand1() { return this->rng_.Rand8() & 1; } |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 84 | |
Xing Jin | de2b711 | 2018-08-08 19:44:15 +0800 | [diff] [blame] | 85 | void RunOneTest(int block_size, int subx, int suby, int run_times) { |
| 86 | w_ = block_size_wide[block_size]; |
| 87 | h_ = block_size_high[block_size]; |
| 88 | run_times = run_times > 1 ? run_times / w_ : 1; |
David Turner | b5ed1e6 | 2018-10-11 15:17:53 +0100 | [diff] [blame] | 89 | ASSERT_GT(run_times, 0); |
Xing Jin | de2b711 | 2018-08-08 19:44:15 +0800 | [diff] [blame] | 90 | subx_ = subx; |
| 91 | suby_ = suby; |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 92 | |
Geza Lore | a3f7ddc | 2016-07-12 15:26:36 +0100 | [diff] [blame] | 93 | dst_offset_ = this->rng_(33); |
| 94 | dst_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_; |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 95 | |
Geza Lore | a3f7ddc | 2016-07-12 15:26:36 +0100 | [diff] [blame] | 96 | src0_offset_ = this->rng_(33); |
| 97 | src0_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_; |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 98 | |
Geza Lore | a3f7ddc | 2016-07-12 15:26:36 +0100 | [diff] [blame] | 99 | src1_offset_ = this->rng_(33); |
| 100 | src1_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_; |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 101 | |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 102 | mask_stride_ = |
| 103 | this->rng_(kMaxWidth + 1 - w_ * (subx_ ? 2 : 1)) + w_ * (subx_ ? 2 : 1); |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 104 | |
Scott LaVarnway | 3092e71 | 2018-04-24 10:47:15 -0700 | [diff] [blame] | 105 | SrcPixel *p_src0; |
| 106 | SrcPixel *p_src1; |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 107 | |
Scott LaVarnway | 3092e71 | 2018-04-24 10:47:15 -0700 | [diff] [blame] | 108 | p_src0 = src0_; |
| 109 | p_src1 = src1_; |
| 110 | |
Xing Jin | de2b711 | 2018-08-08 19:44:15 +0800 | [diff] [blame] | 111 | GetSources(&p_src0, &p_src1, &dst_ref_[0], run_times); |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 112 | |
Xing Jin | de2b711 | 2018-08-08 19:44:15 +0800 | [diff] [blame] | 113 | Execute(p_src0, p_src1, run_times); |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 114 | |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 115 | for (int r = 0; r < h_; ++r) { |
| 116 | for (int c = 0; c < w_; ++c) { |
Geza Lore | e6f8c17 | 2016-07-06 15:54:29 +0100 | [diff] [blame] | 117 | ASSERT_EQ(dst_ref_[dst_offset_ + r * dst_stride_ + c], |
Scott LaVarnway | bfa46bc | 2018-05-30 07:57:48 -0700 | [diff] [blame] | 118 | dst_tst_[dst_offset_ + r * dst_stride_ + c]) |
Xing Jin | de2b711 | 2018-08-08 19:44:15 +0800 | [diff] [blame] | 119 | << w_ << "x" << h_ << " subx " << subx_ << " suby " << suby_ |
| 120 | << " r: " << r << " c: " << c; |
Geza Lore | e6f8c17 | 2016-07-06 15:54:29 +0100 | [diff] [blame] | 121 | } |
| 122 | } |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 123 | } |
| 124 | |
Xing Jin | de2b711 | 2018-08-08 19:44:15 +0800 | [diff] [blame] | 125 | void RunTest(int block_size, int run_times) { |
Gerda Zsejke More | d93263f | 2023-08-09 15:26:16 +0200 | [diff] [blame] | 126 | for (subx_ = 0; subx_ <= 1; subx_++) { |
| 127 | for (suby_ = 0; suby_ <= 1; suby_++) { |
| 128 | RunOneTest(block_size, subx_, suby_, run_times); |
| 129 | } |
| 130 | } |
Xing Jin | de2b711 | 2018-08-08 19:44:15 +0800 | [diff] [blame] | 131 | } |
| 132 | |
Scott LaVarnway | 3092e71 | 2018-04-24 10:47:15 -0700 | [diff] [blame] | 133 | DstPixel dst_ref_[kBufSize]; |
| 134 | DstPixel dst_tst_[kBufSize]; |
Jingning Han | 91ae5d9 | 2016-08-26 11:24:36 -0700 | [diff] [blame] | 135 | uint32_t dst_stride_; |
| 136 | uint32_t dst_offset_; |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 137 | |
Scott LaVarnway | 3092e71 | 2018-04-24 10:47:15 -0700 | [diff] [blame] | 138 | SrcPixel src0_[kBufSize]; |
Jingning Han | 91ae5d9 | 2016-08-26 11:24:36 -0700 | [diff] [blame] | 139 | uint32_t src0_stride_; |
| 140 | uint32_t src0_offset_; |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 141 | |
Scott LaVarnway | 3092e71 | 2018-04-24 10:47:15 -0700 | [diff] [blame] | 142 | SrcPixel src1_[kBufSize]; |
Jingning Han | 91ae5d9 | 2016-08-26 11:24:36 -0700 | [diff] [blame] | 143 | uint32_t src1_stride_; |
| 144 | uint32_t src1_offset_; |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 145 | |
Geza Lore | e6f8c17 | 2016-07-06 15:54:29 +0100 | [diff] [blame] | 146 | uint8_t mask_[kMaxMaskSize]; |
| 147 | size_t mask_stride_; |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 148 | |
Geza Lore | e6f8c17 | 2016-07-06 15:54:29 +0100 | [diff] [blame] | 149 | int w_; |
| 150 | int h_; |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 151 | |
Jingning Han | 91ae5d9 | 2016-08-26 11:24:36 -0700 | [diff] [blame] | 152 | int suby_; |
| 153 | int subx_; |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 154 | }; |
| 155 | |
| 156 | ////////////////////////////////////////////////////////////////////////////// |
| 157 | // 8 bit version |
| 158 | ////////////////////////////////////////////////////////////////////////////// |
| 159 | |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 160 | typedef void (*F8B)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, |
| 161 | uint32_t src0_stride, const uint8_t *src1, |
| 162 | uint32_t src1_stride, const uint8_t *mask, |
Scott LaVarnway | 589b7a1 | 2018-06-06 06:29:16 -0700 | [diff] [blame] | 163 | uint32_t mask_stride, int w, int h, int subx, int suby); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 164 | typedef libaom_test::FuncParam<F8B> TestFuncs; |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 165 | |
Scott LaVarnway | 3092e71 | 2018-04-24 10:47:15 -0700 | [diff] [blame] | 166 | class BlendA64MaskTest8B : public BlendA64MaskTest<F8B, uint8_t, uint8_t> { |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 167 | protected: |
James Zern | faa2dcf | 2023-07-24 18:29:51 -0700 | [diff] [blame] | 168 | void Execute(const uint8_t *p_src0, const uint8_t *p_src1, |
| 169 | int run_times) override { |
Xing Jin | de2b711 | 2018-08-08 19:44:15 +0800 | [diff] [blame] | 170 | aom_usec_timer timer; |
| 171 | aom_usec_timer_start(&timer); |
| 172 | for (int i = 0; i < run_times; ++i) { |
| 173 | params_.ref_func(dst_ref_ + dst_offset_, dst_stride_, |
| 174 | p_src0 + src0_offset_, src0_stride_, |
| 175 | p_src1 + src1_offset_, src1_stride_, mask_, |
| 176 | kMaxMaskWidth, w_, h_, subx_, suby_); |
| 177 | } |
| 178 | aom_usec_timer_mark(&timer); |
| 179 | const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); |
| 180 | aom_usec_timer_start(&timer); |
| 181 | for (int i = 0; i < run_times; ++i) { |
| 182 | params_.tst_func(dst_tst_ + dst_offset_, dst_stride_, |
| 183 | p_src0 + src0_offset_, src0_stride_, |
| 184 | p_src1 + src1_offset_, src1_stride_, mask_, |
| 185 | kMaxMaskWidth, w_, h_, subx_, suby_); |
| 186 | } |
| 187 | aom_usec_timer_mark(&timer); |
| 188 | const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); |
| 189 | if (run_times > 1) { |
| 190 | printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_, |
| 191 | time1, time2); |
| 192 | printf("(%3.2f)\n", time1 / time2); |
| 193 | } |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 194 | } |
| 195 | }; |
chiyotsai | 9dfac72 | 2020-07-07 17:43:02 -0700 | [diff] [blame] | 196 | GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BlendA64MaskTest8B); |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 197 | |
Geza Lore | bfa59b4 | 2016-07-11 12:43:47 +0100 | [diff] [blame] | 198 | TEST_P(BlendA64MaskTest8B, RandomValues) { |
Gerda Zsejke More | d93263f | 2023-08-09 15:26:16 +0200 | [diff] [blame] | 199 | for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize) { |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 200 | for (int i = 0; i < kBufSize; ++i) { |
Geza Lore | e6f8c17 | 2016-07-06 15:54:29 +0100 | [diff] [blame] | 201 | dst_ref_[i] = rng_.Rand8(); |
| 202 | dst_tst_[i] = rng_.Rand8(); |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 203 | |
Geza Lore | e6f8c17 | 2016-07-06 15:54:29 +0100 | [diff] [blame] | 204 | src0_[i] = rng_.Rand8(); |
| 205 | src1_[i] = rng_.Rand8(); |
| 206 | } |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 207 | |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 208 | for (int i = 0; i < kMaxMaskSize; ++i) |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 209 | mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 210 | |
Xing Jin | de2b711 | 2018-08-08 19:44:15 +0800 | [diff] [blame] | 211 | RunTest(bsize, 1); |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 212 | } |
| 213 | } |
| 214 | |
Geza Lore | bfa59b4 | 2016-07-11 12:43:47 +0100 | [diff] [blame] | 215 | TEST_P(BlendA64MaskTest8B, ExtremeValues) { |
Gerda Zsejke More | d93263f | 2023-08-09 15:26:16 +0200 | [diff] [blame] | 216 | for (int i = 0; i < kBufSize; ++i) { |
| 217 | dst_ref_[i] = rng_(2) + 254; |
| 218 | dst_tst_[i] = rng_(2) + 254; |
| 219 | src0_[i] = rng_(2) + 254; |
| 220 | src1_[i] = rng_(2) + 254; |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 221 | } |
Gerda Zsejke More | d93263f | 2023-08-09 15:26:16 +0200 | [diff] [blame] | 222 | |
| 223 | for (int i = 0; i < kMaxMaskSize; ++i) |
| 224 | mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1; |
| 225 | |
| 226 | for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize) |
| 227 | RunTest(bsize, 1); |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 228 | } |
Gerda Zsejke More | d93263f | 2023-08-09 15:26:16 +0200 | [diff] [blame] | 229 | |
Xing Jin | de2b711 | 2018-08-08 19:44:15 +0800 | [diff] [blame] | 230 | TEST_P(BlendA64MaskTest8B, DISABLED_Speed) { |
| 231 | const int kRunTimes = 10000000; |
| 232 | for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) { |
| 233 | for (int i = 0; i < kBufSize; ++i) { |
| 234 | dst_ref_[i] = rng_.Rand8(); |
| 235 | dst_tst_[i] = rng_.Rand8(); |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 236 | |
Xing Jin | de2b711 | 2018-08-08 19:44:15 +0800 | [diff] [blame] | 237 | src0_[i] = rng_.Rand8(); |
| 238 | src1_[i] = rng_.Rand8(); |
| 239 | } |
| 240 | |
| 241 | for (int i = 0; i < kMaxMaskSize; ++i) |
| 242 | mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); |
| 243 | |
Gerda Zsejke More | d93263f | 2023-08-09 15:26:16 +0200 | [diff] [blame] | 244 | RunTest(bsize, kRunTimes); |
Xing Jin | de2b711 | 2018-08-08 19:44:15 +0800 | [diff] [blame] | 245 | } |
| 246 | } |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 247 | #if HAVE_SSE4_1 |
Cheng Chen | 96786fe | 2020-02-14 17:28:25 -0800 | [diff] [blame] | 248 | INSTANTIATE_TEST_SUITE_P(SSE4_1, BlendA64MaskTest8B, |
| 249 | ::testing::Values(TestFuncs( |
| 250 | aom_blend_a64_mask_c, aom_blend_a64_mask_sse4_1))); |
David Turner | b5ed1e6 | 2018-10-11 15:17:53 +0100 | [diff] [blame] | 251 | #endif // HAVE_SSE4_1 |
Xing Jin | de2b711 | 2018-08-08 19:44:15 +0800 | [diff] [blame] | 252 | |
| 253 | #if HAVE_AVX2 |
Cheng Chen | 96786fe | 2020-02-14 17:28:25 -0800 | [diff] [blame] | 254 | INSTANTIATE_TEST_SUITE_P(AVX2, BlendA64MaskTest8B, |
| 255 | ::testing::Values(TestFuncs(aom_blend_a64_mask_sse4_1, |
| 256 | aom_blend_a64_mask_avx2))); |
David Turner | b5ed1e6 | 2018-10-11 15:17:53 +0100 | [diff] [blame] | 257 | #endif // HAVE_AVX2 |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 258 | |
Gerda Zsejke More | bb1b3a8 | 2023-07-22 10:43:03 +0200 | [diff] [blame] | 259 | #if HAVE_NEON |
| 260 | INSTANTIATE_TEST_SUITE_P(NEON, BlendA64MaskTest8B, |
| 261 | ::testing::Values(TestFuncs(aom_blend_a64_mask_c, |
| 262 | aom_blend_a64_mask_neon))); |
| 263 | #endif // HAVE_NEON |
| 264 | |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 265 | ////////////////////////////////////////////////////////////////////////////// |
Scott LaVarnway | 3092e71 | 2018-04-24 10:47:15 -0700 | [diff] [blame] | 266 | // 8 bit _d16 version |
| 267 | ////////////////////////////////////////////////////////////////////////////// |
| 268 | |
| 269 | typedef void (*F8B_D16)(uint8_t *dst, uint32_t dst_stride, const uint16_t *src0, |
| 270 | uint32_t src0_stride, const uint16_t *src1, |
| 271 | uint32_t src1_stride, const uint8_t *mask, |
Scott LaVarnway | 589b7a1 | 2018-06-06 06:29:16 -0700 | [diff] [blame] | 272 | uint32_t mask_stride, int w, int h, int subx, int suby, |
Scott LaVarnway | 3092e71 | 2018-04-24 10:47:15 -0700 | [diff] [blame] | 273 | ConvolveParams *conv_params); |
| 274 | typedef libaom_test::FuncParam<F8B_D16> TestFuncs_d16; |
| 275 | |
| 276 | class BlendA64MaskTest8B_d16 |
| 277 | : public BlendA64MaskTest<F8B_D16, uint16_t, uint8_t> { |
| 278 | protected: |
| 279 | // max number of bits used by the source |
| 280 | static const int kSrcMaxBitsMask = 0x3fff; |
| 281 | |
James Zern | faa2dcf | 2023-07-24 18:29:51 -0700 | [diff] [blame] | 282 | void Execute(const uint16_t *p_src0, const uint16_t *p_src1, |
| 283 | int run_times) override { |
Scott LaVarnway | 3092e71 | 2018-04-24 10:47:15 -0700 | [diff] [blame] | 284 | ConvolveParams conv_params; |
| 285 | conv_params.round_0 = ROUND0_BITS; |
| 286 | conv_params.round_1 = COMPOUND_ROUND1_BITS; |
Xing Jin | de2b711 | 2018-08-08 19:44:15 +0800 | [diff] [blame] | 287 | aom_usec_timer timer; |
| 288 | aom_usec_timer_start(&timer); |
| 289 | for (int i = 0; i < run_times; ++i) { |
| 290 | params_.ref_func(dst_ref_ + dst_offset_, dst_stride_, |
| 291 | p_src0 + src0_offset_, src0_stride_, |
| 292 | p_src1 + src1_offset_, src1_stride_, mask_, |
| 293 | kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params); |
| 294 | } |
| 295 | aom_usec_timer_mark(&timer); |
| 296 | const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); |
| 297 | aom_usec_timer_start(&timer); |
| 298 | for (int i = 0; i < run_times; ++i) { |
| 299 | params_.tst_func(dst_tst_ + dst_offset_, dst_stride_, |
| 300 | p_src0 + src0_offset_, src0_stride_, |
| 301 | p_src1 + src1_offset_, src1_stride_, mask_, |
| 302 | kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params); |
| 303 | } |
| 304 | aom_usec_timer_mark(&timer); |
| 305 | const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); |
| 306 | if (run_times > 1) { |
| 307 | printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_, |
| 308 | time1, time2); |
| 309 | printf("(%3.2f)\n", time1 / time2); |
| 310 | } |
Scott LaVarnway | 3092e71 | 2018-04-24 10:47:15 -0700 | [diff] [blame] | 311 | } |
| 312 | }; |
chiyotsai | 9dfac72 | 2020-07-07 17:43:02 -0700 | [diff] [blame] | 313 | GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BlendA64MaskTest8B_d16); |
Scott LaVarnway | 3092e71 | 2018-04-24 10:47:15 -0700 | [diff] [blame] | 314 | |
| 315 | TEST_P(BlendA64MaskTest8B_d16, RandomValues) { |
Gerda Zsejke More | d93263f | 2023-08-09 15:26:16 +0200 | [diff] [blame] | 316 | for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize) { |
Scott LaVarnway | 3092e71 | 2018-04-24 10:47:15 -0700 | [diff] [blame] | 317 | for (int i = 0; i < kBufSize; ++i) { |
| 318 | dst_ref_[i] = rng_.Rand8(); |
| 319 | dst_tst_[i] = rng_.Rand8(); |
| 320 | |
| 321 | src0_[i] = rng_.Rand16() & kSrcMaxBitsMask; |
| 322 | src1_[i] = rng_.Rand16() & kSrcMaxBitsMask; |
| 323 | } |
| 324 | |
| 325 | for (int i = 0; i < kMaxMaskSize; ++i) |
| 326 | mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); |
| 327 | |
Xing Jin | de2b711 | 2018-08-08 19:44:15 +0800 | [diff] [blame] | 328 | RunTest(bsize, 1); |
Scott LaVarnway | 3092e71 | 2018-04-24 10:47:15 -0700 | [diff] [blame] | 329 | } |
| 330 | } |
| 331 | |
| 332 | TEST_P(BlendA64MaskTest8B_d16, ExtremeValues) { |
Gerda Zsejke More | d93263f | 2023-08-09 15:26:16 +0200 | [diff] [blame] | 333 | for (int i = 0; i < kBufSize; ++i) { |
| 334 | dst_ref_[i] = 255; |
| 335 | dst_tst_[i] = 255; |
Scott LaVarnway | 3092e71 | 2018-04-24 10:47:15 -0700 | [diff] [blame] | 336 | |
Gerda Zsejke More | d93263f | 2023-08-09 15:26:16 +0200 | [diff] [blame] | 337 | src0_[i] = kSrcMaxBitsMask; |
| 338 | src1_[i] = kSrcMaxBitsMask; |
| 339 | } |
| 340 | |
| 341 | for (int i = 0; i < kMaxMaskSize; ++i) mask_[i] = AOM_BLEND_A64_MAX_ALPHA - 1; |
| 342 | |
| 343 | for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize) |
| 344 | RunTest(bsize, 1); |
| 345 | } |
| 346 | |
| 347 | TEST_P(BlendA64MaskTest8B_d16, DISABLED_Speed) { |
| 348 | const int kRunTimes = 10000000; |
| 349 | for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) { |
| 350 | for (int i = 0; i < kBufSize; ++i) { |
| 351 | dst_ref_[i] = rng_.Rand8(); |
| 352 | dst_tst_[i] = rng_.Rand8(); |
| 353 | |
| 354 | src0_[i] = rng_.Rand16() & kSrcMaxBitsMask; |
| 355 | src1_[i] = rng_.Rand16() & kSrcMaxBitsMask; |
Scott LaVarnway | 3092e71 | 2018-04-24 10:47:15 -0700 | [diff] [blame] | 356 | } |
| 357 | |
| 358 | for (int i = 0; i < kMaxMaskSize; ++i) |
Gerda Zsejke More | d93263f | 2023-08-09 15:26:16 +0200 | [diff] [blame] | 359 | mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); |
Scott LaVarnway | 3092e71 | 2018-04-24 10:47:15 -0700 | [diff] [blame] | 360 | |
Gerda Zsejke More | d93263f | 2023-08-09 15:26:16 +0200 | [diff] [blame] | 361 | RunTest(bsize, kRunTimes); |
Scott LaVarnway | 3092e71 | 2018-04-24 10:47:15 -0700 | [diff] [blame] | 362 | } |
| 363 | } |
| 364 | |
| 365 | #if HAVE_SSE4_1 |
Cheng Chen | 96786fe | 2020-02-14 17:28:25 -0800 | [diff] [blame] | 366 | INSTANTIATE_TEST_SUITE_P( |
Scott LaVarnway | 3092e71 | 2018-04-24 10:47:15 -0700 | [diff] [blame] | 367 | SSE4_1, BlendA64MaskTest8B_d16, |
| 368 | ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c, |
| 369 | aom_lowbd_blend_a64_d16_mask_sse4_1))); |
| 370 | #endif // HAVE_SSE4_1 |
| 371 | |
Xing Jin | 0a165c4 | 2018-07-18 17:53:37 +0800 | [diff] [blame] | 372 | #if HAVE_AVX2 |
Cheng Chen | 96786fe | 2020-02-14 17:28:25 -0800 | [diff] [blame] | 373 | INSTANTIATE_TEST_SUITE_P( |
Xing Jin | 0a165c4 | 2018-07-18 17:53:37 +0800 | [diff] [blame] | 374 | AVX2, BlendA64MaskTest8B_d16, |
| 375 | ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c, |
| 376 | aom_lowbd_blend_a64_d16_mask_avx2))); |
| 377 | #endif // HAVE_AVX2 |
| 378 | |
Remya | 0ba3c47 | 2018-06-07 00:40:34 +0530 | [diff] [blame] | 379 | #if HAVE_NEON |
Cheng Chen | 96786fe | 2020-02-14 17:28:25 -0800 | [diff] [blame] | 380 | INSTANTIATE_TEST_SUITE_P( |
Remya | 0ba3c47 | 2018-06-07 00:40:34 +0530 | [diff] [blame] | 381 | NEON, BlendA64MaskTest8B_d16, |
| 382 | ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c, |
| 383 | aom_lowbd_blend_a64_d16_mask_neon))); |
| 384 | #endif // HAVE_NEON |
| 385 | |
Scott LaVarnway | 3092e71 | 2018-04-24 10:47:15 -0700 | [diff] [blame] | 386 | ////////////////////////////////////////////////////////////////////////////// |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 387 | // High bit-depth version |
| 388 | ////////////////////////////////////////////////////////////////////////////// |
Jerome Jiang | 1cb298c | 2019-09-17 11:04:04 -0700 | [diff] [blame] | 389 | #if CONFIG_AV1_HIGHBITDEPTH |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 390 | typedef void (*FHBD)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, |
| 391 | uint32_t src0_stride, const uint8_t *src1, |
| 392 | uint32_t src1_stride, const uint8_t *mask, |
Scott LaVarnway | 589b7a1 | 2018-06-06 06:29:16 -0700 | [diff] [blame] | 393 | uint32_t mask_stride, int w, int h, int subx, int suby, |
clang-format | 3a826f1 | 2016-08-11 17:46:05 -0700 | [diff] [blame] | 394 | int bd); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 395 | typedef libaom_test::FuncParam<FHBD> TestFuncsHBD; |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 396 | |
Scott LaVarnway | 3092e71 | 2018-04-24 10:47:15 -0700 | [diff] [blame] | 397 | class BlendA64MaskTestHBD : public BlendA64MaskTest<FHBD, uint16_t, uint16_t> { |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 398 | protected: |
James Zern | faa2dcf | 2023-07-24 18:29:51 -0700 | [diff] [blame] | 399 | void Execute(const uint16_t *p_src0, const uint16_t *p_src1, |
| 400 | int run_times) override { |
Xing Jin | de2b711 | 2018-08-08 19:44:15 +0800 | [diff] [blame] | 401 | aom_usec_timer timer; |
| 402 | aom_usec_timer_start(&timer); |
| 403 | for (int i = 0; i < run_times; ++i) { |
| 404 | params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_, |
| 405 | CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_, |
| 406 | CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_, |
| 407 | mask_, kMaxMaskWidth, w_, h_, subx_, suby_, bit_depth_); |
| 408 | } |
| 409 | aom_usec_timer_mark(&timer); |
| 410 | const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); |
| 411 | aom_usec_timer_start(&timer); |
| 412 | for (int i = 0; i < run_times; ++i) { |
| 413 | params_.tst_func(CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_), dst_stride_, |
| 414 | CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_, |
| 415 | CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_, |
| 416 | mask_, kMaxMaskWidth, w_, h_, subx_, suby_, bit_depth_); |
| 417 | } |
| 418 | aom_usec_timer_mark(&timer); |
| 419 | const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); |
| 420 | if (run_times > 1) { |
| 421 | printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_, |
| 422 | time1, time2); |
| 423 | printf("(%3.2f)\n", time1 / time2); |
| 424 | } |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 425 | } |
| 426 | |
Geza Lore | e6f8c17 | 2016-07-06 15:54:29 +0100 | [diff] [blame] | 427 | int bit_depth_; |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 428 | }; |
chiyotsai | 9dfac72 | 2020-07-07 17:43:02 -0700 | [diff] [blame] | 429 | GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BlendA64MaskTestHBD); |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 430 | |
Geza Lore | bfa59b4 | 2016-07-11 12:43:47 +0100 | [diff] [blame] | 431 | TEST_P(BlendA64MaskTestHBD, RandomValues) { |
Gerda Zsejke More | d93263f | 2023-08-09 15:26:16 +0200 | [diff] [blame] | 432 | for (bit_depth_ = 8; bit_depth_ <= 12 && !HasFatalFailure(); |
| 433 | bit_depth_ += 2) { |
Geza Lore | e6f8c17 | 2016-07-06 15:54:29 +0100 | [diff] [blame] | 434 | const int hi = 1 << bit_depth_; |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 435 | |
Gerda Zsejke More | d93263f | 2023-08-09 15:26:16 +0200 | [diff] [blame] | 436 | for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) { |
| 437 | for (int i = 0; i < kBufSize; ++i) { |
| 438 | dst_ref_[i] = rng_(hi); |
| 439 | dst_tst_[i] = rng_(hi); |
| 440 | src0_[i] = rng_(hi); |
| 441 | src1_[i] = rng_(hi); |
| 442 | } |
| 443 | |
| 444 | for (int i = 0; i < kMaxMaskSize; ++i) |
| 445 | mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); |
| 446 | |
| 447 | RunTest(bsize, 1); |
Geza Lore | e6f8c17 | 2016-07-06 15:54:29 +0100 | [diff] [blame] | 448 | } |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 449 | } |
| 450 | } |
| 451 | |
Geza Lore | bfa59b4 | 2016-07-11 12:43:47 +0100 | [diff] [blame] | 452 | TEST_P(BlendA64MaskTestHBD, ExtremeValues) { |
Gerda Zsejke More | d93263f | 2023-08-09 15:26:16 +0200 | [diff] [blame] | 453 | for (bit_depth_ = 8; bit_depth_ <= 12 && !HasFatalFailure(); |
| 454 | bit_depth_ += 2) { |
Geza Lore | e6f8c17 | 2016-07-06 15:54:29 +0100 | [diff] [blame] | 455 | const int hi = 1 << bit_depth_; |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 456 | const int lo = hi - 2; |
| 457 | |
Gerda Zsejke More | d93263f | 2023-08-09 15:26:16 +0200 | [diff] [blame] | 458 | for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); |
| 459 | ++bsize) { |
| 460 | for (int i = 0; i < kBufSize; ++i) { |
| 461 | dst_ref_[i] = rng_(hi - lo) + lo; |
| 462 | dst_tst_[i] = rng_(hi - lo) + lo; |
| 463 | src0_[i] = rng_(hi - lo) + lo; |
| 464 | src1_[i] = rng_(hi - lo) + lo; |
| 465 | } |
| 466 | |
| 467 | for (int i = 0; i < kMaxMaskSize; ++i) |
| 468 | mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1; |
| 469 | |
| 470 | RunTest(bsize, 1); |
Geza Lore | e6f8c17 | 2016-07-06 15:54:29 +0100 | [diff] [blame] | 471 | } |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 472 | } |
| 473 | } |
| 474 | |
| 475 | #if HAVE_SSE4_1 |
Cheng Chen | 96786fe | 2020-02-14 17:28:25 -0800 | [diff] [blame] | 476 | INSTANTIATE_TEST_SUITE_P( |
Yaowu Xu | 685039d | 2016-12-07 10:56:39 -0800 | [diff] [blame] | 477 | SSE4_1, BlendA64MaskTestHBD, |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 478 | ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c, |
| 479 | aom_highbd_blend_a64_mask_sse4_1))); |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 480 | #endif // HAVE_SSE4_1 |
Scott LaVarnway | bfa46bc | 2018-05-30 07:57:48 -0700 | [diff] [blame] | 481 | |
Gerda Zsejke More | facd9e5 | 2023-08-02 16:35:40 +0200 | [diff] [blame] | 482 | #if HAVE_NEON |
| 483 | INSTANTIATE_TEST_SUITE_P( |
| 484 | NEON, BlendA64MaskTestHBD, |
| 485 | ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c, |
| 486 | aom_highbd_blend_a64_mask_neon))); |
| 487 | #endif // HAVE_NEON |
| 488 | |
Scott LaVarnway | bfa46bc | 2018-05-30 07:57:48 -0700 | [diff] [blame] | 489 | ////////////////////////////////////////////////////////////////////////////// |
| 490 | // HBD _d16 version |
| 491 | ////////////////////////////////////////////////////////////////////////////// |
| 492 | |
| 493 | typedef void (*FHBD_D16)(uint8_t *dst, uint32_t dst_stride, |
| 494 | const CONV_BUF_TYPE *src0, uint32_t src0_stride, |
| 495 | const CONV_BUF_TYPE *src1, uint32_t src1_stride, |
Scott LaVarnway | 589b7a1 | 2018-06-06 06:29:16 -0700 | [diff] [blame] | 496 | const uint8_t *mask, uint32_t mask_stride, int w, |
| 497 | int h, int subx, int suby, ConvolveParams *conv_params, |
Scott LaVarnway | bfa46bc | 2018-05-30 07:57:48 -0700 | [diff] [blame] | 498 | const int bd); |
| 499 | typedef libaom_test::FuncParam<FHBD_D16> TestFuncsHBD_d16; |
| 500 | |
| 501 | class BlendA64MaskTestHBD_d16 |
| 502 | : public BlendA64MaskTest<FHBD_D16, uint16_t, uint16_t> { |
| 503 | protected: |
| 504 | // max number of bits used by the source |
| 505 | static const int kSrcMaxBitsMask = (1 << 14) - 1; |
| 506 | static const int kSrcMaxBitsMaskHBD = (1 << 16) - 1; |
| 507 | |
James Zern | faa2dcf | 2023-07-24 18:29:51 -0700 | [diff] [blame] | 508 | void Execute(const uint16_t *p_src0, const uint16_t *p_src1, |
| 509 | int run_times) override { |
David Turner | b5ed1e6 | 2018-10-11 15:17:53 +0100 | [diff] [blame] | 510 | ASSERT_GT(run_times, 0) << "Cannot run 0 iterations of the test."; |
Scott LaVarnway | bfa46bc | 2018-05-30 07:57:48 -0700 | [diff] [blame] | 511 | ConvolveParams conv_params; |
| 512 | conv_params.round_0 = (bit_depth_ == 12) ? ROUND0_BITS + 2 : ROUND0_BITS; |
| 513 | conv_params.round_1 = COMPOUND_ROUND1_BITS; |
Xing Jin | de2b711 | 2018-08-08 19:44:15 +0800 | [diff] [blame] | 514 | aom_usec_timer timer; |
| 515 | aom_usec_timer_start(&timer); |
| 516 | for (int i = 0; i < run_times; ++i) { |
| 517 | params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_, |
| 518 | p_src0 + src0_offset_, src0_stride_, |
| 519 | p_src1 + src1_offset_, src1_stride_, mask_, |
| 520 | kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params, |
| 521 | bit_depth_); |
| 522 | } |
Scott LaVarnway | bfa46bc | 2018-05-30 07:57:48 -0700 | [diff] [blame] | 523 | if (params_.tst_func) { |
Xing Jin | de2b711 | 2018-08-08 19:44:15 +0800 | [diff] [blame] | 524 | aom_usec_timer_mark(&timer); |
| 525 | const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); |
| 526 | aom_usec_timer_start(&timer); |
| 527 | for (int i = 0; i < run_times; ++i) { |
| 528 | params_.tst_func(CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_), |
| 529 | dst_stride_, p_src0 + src0_offset_, src0_stride_, |
| 530 | p_src1 + src1_offset_, src1_stride_, mask_, |
| 531 | kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params, |
| 532 | bit_depth_); |
| 533 | } |
| 534 | aom_usec_timer_mark(&timer); |
| 535 | const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); |
| 536 | if (run_times > 1) { |
| 537 | printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_, |
| 538 | time1, time2); |
| 539 | printf("(%3.2f)\n", time1 / time2); |
| 540 | } |
Scott LaVarnway | bfa46bc | 2018-05-30 07:57:48 -0700 | [diff] [blame] | 541 | } |
| 542 | } |
| 543 | |
| 544 | int bit_depth_; |
| 545 | int src_max_bits_mask_; |
| 546 | }; |
| 547 | |
| 548 | TEST_P(BlendA64MaskTestHBD_d16, RandomValues) { |
James Zern | 664f04d | 2022-05-24 17:30:58 -0700 | [diff] [blame] | 549 | if (params_.tst_func == nullptr) return; |
Gerda Zsejke More | d93263f | 2023-08-09 15:26:16 +0200 | [diff] [blame] | 550 | for (bit_depth_ = 8; bit_depth_ <= 12 && !HasFatalFailure(); |
| 551 | bit_depth_ += 2) { |
Scott LaVarnway | bfa46bc | 2018-05-30 07:57:48 -0700 | [diff] [blame] | 552 | src_max_bits_mask_ = |
| 553 | (bit_depth_ == 8) ? kSrcMaxBitsMask : kSrcMaxBitsMaskHBD; |
| 554 | |
Gerda Zsejke More | d93263f | 2023-08-09 15:26:16 +0200 | [diff] [blame] | 555 | for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); |
| 556 | ++bsize) { |
Xing Jin | de2b711 | 2018-08-08 19:44:15 +0800 | [diff] [blame] | 557 | for (int i = 0; i < kBufSize; ++i) { |
Gerda Zsejke More | d93263f | 2023-08-09 15:26:16 +0200 | [diff] [blame] | 558 | dst_ref_[i] = rng_.Rand8(); |
| 559 | dst_tst_[i] = rng_.Rand8(); |
Scott LaVarnway | bfa46bc | 2018-05-30 07:57:48 -0700 | [diff] [blame] | 560 | |
Gerda Zsejke More | d93263f | 2023-08-09 15:26:16 +0200 | [diff] [blame] | 561 | src0_[i] = rng_.Rand16() & src_max_bits_mask_; |
| 562 | src1_[i] = rng_.Rand16() & src_max_bits_mask_; |
Xing Jin | de2b711 | 2018-08-08 19:44:15 +0800 | [diff] [blame] | 563 | } |
Scott LaVarnway | bfa46bc | 2018-05-30 07:57:48 -0700 | [diff] [blame] | 564 | |
Gerda Zsejke More | d93263f | 2023-08-09 15:26:16 +0200 | [diff] [blame] | 565 | for (int i = 0; i < kMaxMaskSize; ++i) |
| 566 | mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); |
Xing Jin | de2b711 | 2018-08-08 19:44:15 +0800 | [diff] [blame] | 567 | |
| 568 | RunTest(bsize, 1); |
Scott LaVarnway | bfa46bc | 2018-05-30 07:57:48 -0700 | [diff] [blame] | 569 | } |
Scott LaVarnway | bfa46bc | 2018-05-30 07:57:48 -0700 | [diff] [blame] | 570 | } |
| 571 | } |
Gerda Zsejke More | d93263f | 2023-08-09 15:26:16 +0200 | [diff] [blame] | 572 | |
| 573 | TEST_P(BlendA64MaskTestHBD_d16, ExtremeValues) { |
| 574 | for (bit_depth_ = 8; bit_depth_ <= 12; bit_depth_ += 2) { |
| 575 | src_max_bits_mask_ = |
| 576 | (bit_depth_ == 8) ? kSrcMaxBitsMask : kSrcMaxBitsMaskHBD; |
| 577 | |
| 578 | for (int i = 0; i < kBufSize; ++i) { |
| 579 | dst_ref_[i] = 0; |
| 580 | dst_tst_[i] = (1 << bit_depth_) - 1; |
| 581 | |
| 582 | src0_[i] = src_max_bits_mask_; |
| 583 | src1_[i] = src_max_bits_mask_; |
| 584 | } |
| 585 | |
| 586 | for (int i = 0; i < kMaxMaskSize; ++i) mask_[i] = AOM_BLEND_A64_MAX_ALPHA; |
| 587 | for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) { |
| 588 | RunTest(bsize, 1); |
| 589 | } |
| 590 | } |
| 591 | } |
| 592 | |
David Turner | b5ed1e6 | 2018-10-11 15:17:53 +0100 | [diff] [blame] | 593 | TEST_P(BlendA64MaskTestHBD_d16, DISABLED_Speed) { |
| 594 | const int kRunTimes = 10000000; |
| 595 | for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) { |
| 596 | for (bit_depth_ = 8; bit_depth_ <= 12; bit_depth_ += 2) { |
| 597 | for (int i = 0; i < kBufSize; ++i) { |
| 598 | dst_ref_[i] = rng_.Rand12() % (1 << bit_depth_); |
| 599 | dst_tst_[i] = rng_.Rand12() % (1 << bit_depth_); |
| 600 | |
| 601 | src0_[i] = rng_.Rand16(); |
| 602 | src1_[i] = rng_.Rand16(); |
| 603 | } |
| 604 | |
| 605 | for (int i = 0; i < kMaxMaskSize; ++i) |
| 606 | mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); |
| 607 | |
Gerda Zsejke More | d93263f | 2023-08-09 15:26:16 +0200 | [diff] [blame] | 608 | RunTest(bsize, kRunTimes); |
David Turner | b5ed1e6 | 2018-10-11 15:17:53 +0100 | [diff] [blame] | 609 | } |
| 610 | } |
| 611 | } |
Scott LaVarnway | bfa46bc | 2018-05-30 07:57:48 -0700 | [diff] [blame] | 612 | |
Gerda Zsejke More | d93263f | 2023-08-09 15:26:16 +0200 | [diff] [blame] | 613 | INSTANTIATE_TEST_SUITE_P( |
| 614 | C, BlendA64MaskTestHBD_d16, |
| 615 | ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c, |
| 616 | aom_highbd_blend_a64_d16_mask_c))); |
Scott LaVarnway | bfa46bc | 2018-05-30 07:57:48 -0700 | [diff] [blame] | 617 | |
David Turner | b5ed1e6 | 2018-10-11 15:17:53 +0100 | [diff] [blame] | 618 | #if HAVE_SSE4_1 |
Cheng Chen | 96786fe | 2020-02-14 17:28:25 -0800 | [diff] [blame] | 619 | INSTANTIATE_TEST_SUITE_P( |
David Turner | b5ed1e6 | 2018-10-11 15:17:53 +0100 | [diff] [blame] | 620 | SSE4_1, BlendA64MaskTestHBD_d16, |
| 621 | ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c, |
| 622 | aom_highbd_blend_a64_d16_mask_sse4_1))); |
| 623 | #endif // HAVE_SSE4_1 |
| 624 | |
| 625 | #if HAVE_AVX2 |
Cheng Chen | 96786fe | 2020-02-14 17:28:25 -0800 | [diff] [blame] | 626 | INSTANTIATE_TEST_SUITE_P( |
David Turner | b5ed1e6 | 2018-10-11 15:17:53 +0100 | [diff] [blame] | 627 | AVX2, BlendA64MaskTestHBD_d16, |
| 628 | ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c, |
| 629 | aom_highbd_blend_a64_d16_mask_avx2))); |
| 630 | #endif // HAVE_AVX2 |
| 631 | |
Gerda Zsejke More | 7e1fa9e | 2023-08-07 10:43:46 +0200 | [diff] [blame] | 632 | #if HAVE_NEON |
| 633 | INSTANTIATE_TEST_SUITE_P( |
| 634 | NEON, BlendA64MaskTestHBD_d16, |
| 635 | ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c, |
| 636 | aom_highbd_blend_a64_d16_mask_neon))); |
| 637 | #endif // HAVE_NEON |
| 638 | |
Scott LaVarnway | bfa46bc | 2018-05-30 07:57:48 -0700 | [diff] [blame] | 639 | // TODO(slavarnway): Enable the following in the avx2 commit. (56501) |
| 640 | #if 0 |
| 641 | #if HAVE_AVX2 |
Cheng Chen | 96786fe | 2020-02-14 17:28:25 -0800 | [diff] [blame] | 642 | INSTANTIATE_TEST_SUITE_P( |
Scott LaVarnway | bfa46bc | 2018-05-30 07:57:48 -0700 | [diff] [blame] | 643 | SSE4_1, BlendA64MaskTestHBD, |
| 644 | ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c, |
| 645 | aom_highbd_blend_a64_mask_avx2))); |
| 646 | #endif // HAVE_AVX2 |
| 647 | #endif |
Jerome Jiang | 1cb298c | 2019-09-17 11:04:04 -0700 | [diff] [blame] | 648 | #endif // CONFIG_AV1_HIGHBITDEPTH |
Geza Lore | a661bc8 | 2016-05-20 16:33:12 +0100 | [diff] [blame] | 649 | } // namespace |