blob: 3538677b43e45ac97b8642fc13e652cfdf5d9b7c [file] [log] [blame]
Geza Lorea661bc82016-05-20 16:33:12 +01001/*
James Zernb7c05bd2024-06-11 19:15:10 -07002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
Geza Lorea661bc82016-05-20 16:33:12 +01003 *
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07004 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Geza Lorea661bc82016-05-20 16:33:12 +010010 */
11
12#include <math.h>
13#include <stdlib.h>
14#include <string.h>
15
Tom Finegan7a07ece2017-02-07 17:14:05 -080016#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
Geza Lorea661bc82016-05-20 16:33:12 +010017#include "test/register_state_check.h"
Geza Lorea661bc82016-05-20 16:33:12 +010018#include "test/function_equivalence_test.h"
Geza Lorea661bc82016-05-20 16:33:12 +010019
Tom Finegan60e653d2018-05-22 11:34:58 -070020#include "config/aom_config.h"
Tom Finegan44702c82018-05-22 13:00:39 -070021#include "config/aom_dsp_rtcd.h"
22#include "config/av1_rtcd.h"
Tom Finegan60e653d2018-05-22 11:34:58 -070023
Yaowu Xuf883b422016-08-30 14:01:10 -070024#include "aom/aom_integer.h"
Geza Lorea661bc82016-05-20 16:33:12 +010025
Yaowu Xuc27fc142016-08-22 16:08:15 -070026#include "av1/common/enums.h"
Geza Lorea661bc82016-05-20 16:33:12 +010027
Yaowu Xuc27fc142016-08-22 16:08:15 -070028#include "aom_dsp/blend.h"
Geza Lorebfa59b42016-07-11 12:43:47 +010029
Yaowu Xuc27fc142016-08-22 16:08:15 -070030using libaom_test::FunctionEquivalenceTest;
Geza Lorea661bc82016-05-20 16:33:12 +010031
32namespace {
33
Scott LaVarnway3092e712018-04-24 10:47:15 -070034template <typename BlendA64Func, typename SrcPixel, typename DstPixel>
35class BlendA64MaskTest : public FunctionEquivalenceTest<BlendA64Func> {
Geza Lorea661bc82016-05-20 16:33:12 +010036 protected:
Geza Loree6f8c172016-07-06 15:54:29 +010037 static const int kIterations = 10000;
38 static const int kMaxWidth = MAX_SB_SIZE * 5; // * 5 to cover longer strides
39 static const int kMaxHeight = MAX_SB_SIZE;
40 static const int kBufSize = kMaxWidth * kMaxHeight;
41 static const int kMaxMaskWidth = 2 * MAX_SB_SIZE;
42 static const int kMaxMaskSize = kMaxMaskWidth * kMaxMaskWidth;
43
James Zernf1fa1eb2023-07-25 15:34:13 -070044 ~BlendA64MaskTest() override = default;
Geza Lorea661bc82016-05-20 16:33:12 +010045
Xing Jinde2b7112018-08-08 19:44:15 +080046 virtual void Execute(const SrcPixel *p_src0, const SrcPixel *p_src1,
47 int run_times) = 0;
Scott LaVarnway3092e712018-04-24 10:47:15 -070048
49 template <typename Pixel>
Xing Jinde2b7112018-08-08 19:44:15 +080050 void GetSources(Pixel **src0, Pixel **src1, Pixel * /*dst*/, int run_times) {
51 if (run_times > 1) {
52 *src0 = src0_;
53 *src1 = src1_;
54 return;
55 }
Scott LaVarnway3092e712018-04-24 10:47:15 -070056 switch (this->rng_(3)) {
57 case 0: // Separate sources
58 *src0 = src0_;
59 *src1 = src1_;
60 break;
61 case 1: // src0 == dst
62 *src0 = dst_tst_;
63 src0_stride_ = dst_stride_;
64 src0_offset_ = dst_offset_;
65 *src1 = src1_;
66 break;
67 case 2: // src1 == dst
68 *src0 = src0_;
69 *src1 = dst_tst_;
70 src1_stride_ = dst_stride_;
71 src1_offset_ = dst_offset_;
72 break;
73 default: FAIL();
74 }
75 }
76
Xing Jinde2b7112018-08-08 19:44:15 +080077 void GetSources(uint16_t **src0, uint16_t **src1, uint8_t * /*dst*/,
78 int /*run_times*/) {
Scott LaVarnway3092e712018-04-24 10:47:15 -070079 *src0 = src0_;
80 *src1 = src1_;
81 }
82
83 uint8_t Rand1() { return this->rng_.Rand8() & 1; }
Geza Lorea661bc82016-05-20 16:33:12 +010084
Xing Jinde2b7112018-08-08 19:44:15 +080085 void RunOneTest(int block_size, int subx, int suby, int run_times) {
86 w_ = block_size_wide[block_size];
87 h_ = block_size_high[block_size];
88 run_times = run_times > 1 ? run_times / w_ : 1;
David Turnerb5ed1e62018-10-11 15:17:53 +010089 ASSERT_GT(run_times, 0);
Xing Jinde2b7112018-08-08 19:44:15 +080090 subx_ = subx;
91 suby_ = suby;
Geza Lorea661bc82016-05-20 16:33:12 +010092
Geza Lorea3f7ddc2016-07-12 15:26:36 +010093 dst_offset_ = this->rng_(33);
94 dst_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
Geza Lorea661bc82016-05-20 16:33:12 +010095
Geza Lorea3f7ddc2016-07-12 15:26:36 +010096 src0_offset_ = this->rng_(33);
97 src0_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
Geza Lorea661bc82016-05-20 16:33:12 +010098
Geza Lorea3f7ddc2016-07-12 15:26:36 +010099 src1_offset_ = this->rng_(33);
100 src1_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
Geza Lorea661bc82016-05-20 16:33:12 +0100101
clang-format3a826f12016-08-11 17:46:05 -0700102 mask_stride_ =
103 this->rng_(kMaxWidth + 1 - w_ * (subx_ ? 2 : 1)) + w_ * (subx_ ? 2 : 1);
Geza Lorea661bc82016-05-20 16:33:12 +0100104
Scott LaVarnway3092e712018-04-24 10:47:15 -0700105 SrcPixel *p_src0;
106 SrcPixel *p_src1;
Geza Lorea661bc82016-05-20 16:33:12 +0100107
Scott LaVarnway3092e712018-04-24 10:47:15 -0700108 p_src0 = src0_;
109 p_src1 = src1_;
110
Xing Jinde2b7112018-08-08 19:44:15 +0800111 GetSources(&p_src0, &p_src1, &dst_ref_[0], run_times);
Geza Lorea661bc82016-05-20 16:33:12 +0100112
Xing Jinde2b7112018-08-08 19:44:15 +0800113 Execute(p_src0, p_src1, run_times);
Geza Lorea661bc82016-05-20 16:33:12 +0100114
clang-format3a826f12016-08-11 17:46:05 -0700115 for (int r = 0; r < h_; ++r) {
116 for (int c = 0; c < w_; ++c) {
Geza Loree6f8c172016-07-06 15:54:29 +0100117 ASSERT_EQ(dst_ref_[dst_offset_ + r * dst_stride_ + c],
Scott LaVarnwaybfa46bc2018-05-30 07:57:48 -0700118 dst_tst_[dst_offset_ + r * dst_stride_ + c])
Xing Jinde2b7112018-08-08 19:44:15 +0800119 << w_ << "x" << h_ << " subx " << subx_ << " suby " << suby_
120 << " r: " << r << " c: " << c;
Geza Loree6f8c172016-07-06 15:54:29 +0100121 }
122 }
Geza Lorea661bc82016-05-20 16:33:12 +0100123 }
124
Xing Jinde2b7112018-08-08 19:44:15 +0800125 void RunTest(int block_size, int run_times) {
Gerda Zsejke Mored93263f2023-08-09 15:26:16 +0200126 for (subx_ = 0; subx_ <= 1; subx_++) {
127 for (suby_ = 0; suby_ <= 1; suby_++) {
128 RunOneTest(block_size, subx_, suby_, run_times);
129 }
130 }
Xing Jinde2b7112018-08-08 19:44:15 +0800131 }
132
Scott LaVarnway3092e712018-04-24 10:47:15 -0700133 DstPixel dst_ref_[kBufSize];
134 DstPixel dst_tst_[kBufSize];
Jingning Han91ae5d92016-08-26 11:24:36 -0700135 uint32_t dst_stride_;
136 uint32_t dst_offset_;
Geza Lorea661bc82016-05-20 16:33:12 +0100137
Scott LaVarnway3092e712018-04-24 10:47:15 -0700138 SrcPixel src0_[kBufSize];
Jingning Han91ae5d92016-08-26 11:24:36 -0700139 uint32_t src0_stride_;
140 uint32_t src0_offset_;
Geza Lorea661bc82016-05-20 16:33:12 +0100141
Scott LaVarnway3092e712018-04-24 10:47:15 -0700142 SrcPixel src1_[kBufSize];
Jingning Han91ae5d92016-08-26 11:24:36 -0700143 uint32_t src1_stride_;
144 uint32_t src1_offset_;
Geza Lorea661bc82016-05-20 16:33:12 +0100145
Geza Loree6f8c172016-07-06 15:54:29 +0100146 uint8_t mask_[kMaxMaskSize];
147 size_t mask_stride_;
Geza Lorea661bc82016-05-20 16:33:12 +0100148
Geza Loree6f8c172016-07-06 15:54:29 +0100149 int w_;
150 int h_;
Geza Lorea661bc82016-05-20 16:33:12 +0100151
Jingning Han91ae5d92016-08-26 11:24:36 -0700152 int suby_;
153 int subx_;
Geza Lorea661bc82016-05-20 16:33:12 +0100154};
155
156//////////////////////////////////////////////////////////////////////////////
157// 8 bit version
158//////////////////////////////////////////////////////////////////////////////
159
clang-format3a826f12016-08-11 17:46:05 -0700160typedef void (*F8B)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
161 uint32_t src0_stride, const uint8_t *src1,
162 uint32_t src1_stride, const uint8_t *mask,
Scott LaVarnway589b7a12018-06-06 06:29:16 -0700163 uint32_t mask_stride, int w, int h, int subx, int suby);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700164typedef libaom_test::FuncParam<F8B> TestFuncs;
Geza Lorea661bc82016-05-20 16:33:12 +0100165
Scott LaVarnway3092e712018-04-24 10:47:15 -0700166class BlendA64MaskTest8B : public BlendA64MaskTest<F8B, uint8_t, uint8_t> {
Geza Lorea661bc82016-05-20 16:33:12 +0100167 protected:
James Zernfaa2dcf2023-07-24 18:29:51 -0700168 void Execute(const uint8_t *p_src0, const uint8_t *p_src1,
169 int run_times) override {
Xing Jinde2b7112018-08-08 19:44:15 +0800170 aom_usec_timer timer;
171 aom_usec_timer_start(&timer);
172 for (int i = 0; i < run_times; ++i) {
173 params_.ref_func(dst_ref_ + dst_offset_, dst_stride_,
174 p_src0 + src0_offset_, src0_stride_,
175 p_src1 + src1_offset_, src1_stride_, mask_,
176 kMaxMaskWidth, w_, h_, subx_, suby_);
177 }
178 aom_usec_timer_mark(&timer);
179 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
180 aom_usec_timer_start(&timer);
181 for (int i = 0; i < run_times; ++i) {
182 params_.tst_func(dst_tst_ + dst_offset_, dst_stride_,
183 p_src0 + src0_offset_, src0_stride_,
184 p_src1 + src1_offset_, src1_stride_, mask_,
185 kMaxMaskWidth, w_, h_, subx_, suby_);
186 }
187 aom_usec_timer_mark(&timer);
188 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
189 if (run_times > 1) {
190 printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_,
191 time1, time2);
192 printf("(%3.2f)\n", time1 / time2);
193 }
Geza Lorea661bc82016-05-20 16:33:12 +0100194 }
195};
chiyotsai9dfac722020-07-07 17:43:02 -0700196GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BlendA64MaskTest8B);
Geza Lorea661bc82016-05-20 16:33:12 +0100197
Geza Lorebfa59b42016-07-11 12:43:47 +0100198TEST_P(BlendA64MaskTest8B, RandomValues) {
Gerda Zsejke Mored93263f2023-08-09 15:26:16 +0200199 for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize) {
clang-format3a826f12016-08-11 17:46:05 -0700200 for (int i = 0; i < kBufSize; ++i) {
Geza Loree6f8c172016-07-06 15:54:29 +0100201 dst_ref_[i] = rng_.Rand8();
202 dst_tst_[i] = rng_.Rand8();
Geza Lorea661bc82016-05-20 16:33:12 +0100203
Geza Loree6f8c172016-07-06 15:54:29 +0100204 src0_[i] = rng_.Rand8();
205 src1_[i] = rng_.Rand8();
206 }
Geza Lorea661bc82016-05-20 16:33:12 +0100207
clang-format3a826f12016-08-11 17:46:05 -0700208 for (int i = 0; i < kMaxMaskSize; ++i)
Yaowu Xuf883b422016-08-30 14:01:10 -0700209 mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
Geza Lorea661bc82016-05-20 16:33:12 +0100210
Xing Jinde2b7112018-08-08 19:44:15 +0800211 RunTest(bsize, 1);
Geza Lorea661bc82016-05-20 16:33:12 +0100212 }
213}
214
Geza Lorebfa59b42016-07-11 12:43:47 +0100215TEST_P(BlendA64MaskTest8B, ExtremeValues) {
Gerda Zsejke Mored93263f2023-08-09 15:26:16 +0200216 for (int i = 0; i < kBufSize; ++i) {
217 dst_ref_[i] = rng_(2) + 254;
218 dst_tst_[i] = rng_(2) + 254;
219 src0_[i] = rng_(2) + 254;
220 src1_[i] = rng_(2) + 254;
Geza Lorea661bc82016-05-20 16:33:12 +0100221 }
Gerda Zsejke Mored93263f2023-08-09 15:26:16 +0200222
223 for (int i = 0; i < kMaxMaskSize; ++i)
224 mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1;
225
226 for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize)
227 RunTest(bsize, 1);
Geza Lorea661bc82016-05-20 16:33:12 +0100228}
Gerda Zsejke Mored93263f2023-08-09 15:26:16 +0200229
Xing Jinde2b7112018-08-08 19:44:15 +0800230TEST_P(BlendA64MaskTest8B, DISABLED_Speed) {
231 const int kRunTimes = 10000000;
232 for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
233 for (int i = 0; i < kBufSize; ++i) {
234 dst_ref_[i] = rng_.Rand8();
235 dst_tst_[i] = rng_.Rand8();
Geza Lorea661bc82016-05-20 16:33:12 +0100236
Xing Jinde2b7112018-08-08 19:44:15 +0800237 src0_[i] = rng_.Rand8();
238 src1_[i] = rng_.Rand8();
239 }
240
241 for (int i = 0; i < kMaxMaskSize; ++i)
242 mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
243
Gerda Zsejke Mored93263f2023-08-09 15:26:16 +0200244 RunTest(bsize, kRunTimes);
Xing Jinde2b7112018-08-08 19:44:15 +0800245 }
246}
Geza Lorea661bc82016-05-20 16:33:12 +0100247#if HAVE_SSE4_1
Cheng Chen96786fe2020-02-14 17:28:25 -0800248INSTANTIATE_TEST_SUITE_P(SSE4_1, BlendA64MaskTest8B,
249 ::testing::Values(TestFuncs(
250 aom_blend_a64_mask_c, aom_blend_a64_mask_sse4_1)));
David Turnerb5ed1e62018-10-11 15:17:53 +0100251#endif // HAVE_SSE4_1
Xing Jinde2b7112018-08-08 19:44:15 +0800252
253#if HAVE_AVX2
Cheng Chen96786fe2020-02-14 17:28:25 -0800254INSTANTIATE_TEST_SUITE_P(AVX2, BlendA64MaskTest8B,
255 ::testing::Values(TestFuncs(aom_blend_a64_mask_sse4_1,
256 aom_blend_a64_mask_avx2)));
David Turnerb5ed1e62018-10-11 15:17:53 +0100257#endif // HAVE_AVX2
Geza Lorea661bc82016-05-20 16:33:12 +0100258
Gerda Zsejke Morebb1b3a82023-07-22 10:43:03 +0200259#if HAVE_NEON
260INSTANTIATE_TEST_SUITE_P(NEON, BlendA64MaskTest8B,
261 ::testing::Values(TestFuncs(aom_blend_a64_mask_c,
262 aom_blend_a64_mask_neon)));
263#endif // HAVE_NEON
264
Geza Lorea661bc82016-05-20 16:33:12 +0100265//////////////////////////////////////////////////////////////////////////////
Scott LaVarnway3092e712018-04-24 10:47:15 -0700266// 8 bit _d16 version
267//////////////////////////////////////////////////////////////////////////////
268
269typedef void (*F8B_D16)(uint8_t *dst, uint32_t dst_stride, const uint16_t *src0,
270 uint32_t src0_stride, const uint16_t *src1,
271 uint32_t src1_stride, const uint8_t *mask,
Scott LaVarnway589b7a12018-06-06 06:29:16 -0700272 uint32_t mask_stride, int w, int h, int subx, int suby,
Scott LaVarnway3092e712018-04-24 10:47:15 -0700273 ConvolveParams *conv_params);
274typedef libaom_test::FuncParam<F8B_D16> TestFuncs_d16;
275
276class BlendA64MaskTest8B_d16
277 : public BlendA64MaskTest<F8B_D16, uint16_t, uint8_t> {
278 protected:
279 // max number of bits used by the source
280 static const int kSrcMaxBitsMask = 0x3fff;
281
James Zernfaa2dcf2023-07-24 18:29:51 -0700282 void Execute(const uint16_t *p_src0, const uint16_t *p_src1,
283 int run_times) override {
Scott LaVarnway3092e712018-04-24 10:47:15 -0700284 ConvolveParams conv_params;
285 conv_params.round_0 = ROUND0_BITS;
286 conv_params.round_1 = COMPOUND_ROUND1_BITS;
Xing Jinde2b7112018-08-08 19:44:15 +0800287 aom_usec_timer timer;
288 aom_usec_timer_start(&timer);
289 for (int i = 0; i < run_times; ++i) {
290 params_.ref_func(dst_ref_ + dst_offset_, dst_stride_,
291 p_src0 + src0_offset_, src0_stride_,
292 p_src1 + src1_offset_, src1_stride_, mask_,
293 kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params);
294 }
295 aom_usec_timer_mark(&timer);
296 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
297 aom_usec_timer_start(&timer);
298 for (int i = 0; i < run_times; ++i) {
299 params_.tst_func(dst_tst_ + dst_offset_, dst_stride_,
300 p_src0 + src0_offset_, src0_stride_,
301 p_src1 + src1_offset_, src1_stride_, mask_,
302 kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params);
303 }
304 aom_usec_timer_mark(&timer);
305 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
306 if (run_times > 1) {
307 printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_,
308 time1, time2);
309 printf("(%3.2f)\n", time1 / time2);
310 }
Scott LaVarnway3092e712018-04-24 10:47:15 -0700311 }
312};
chiyotsai9dfac722020-07-07 17:43:02 -0700313GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BlendA64MaskTest8B_d16);
Scott LaVarnway3092e712018-04-24 10:47:15 -0700314
315TEST_P(BlendA64MaskTest8B_d16, RandomValues) {
Gerda Zsejke Mored93263f2023-08-09 15:26:16 +0200316 for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize) {
Scott LaVarnway3092e712018-04-24 10:47:15 -0700317 for (int i = 0; i < kBufSize; ++i) {
318 dst_ref_[i] = rng_.Rand8();
319 dst_tst_[i] = rng_.Rand8();
320
321 src0_[i] = rng_.Rand16() & kSrcMaxBitsMask;
322 src1_[i] = rng_.Rand16() & kSrcMaxBitsMask;
323 }
324
325 for (int i = 0; i < kMaxMaskSize; ++i)
326 mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
327
Xing Jinde2b7112018-08-08 19:44:15 +0800328 RunTest(bsize, 1);
Scott LaVarnway3092e712018-04-24 10:47:15 -0700329 }
330}
331
332TEST_P(BlendA64MaskTest8B_d16, ExtremeValues) {
Gerda Zsejke Mored93263f2023-08-09 15:26:16 +0200333 for (int i = 0; i < kBufSize; ++i) {
334 dst_ref_[i] = 255;
335 dst_tst_[i] = 255;
Scott LaVarnway3092e712018-04-24 10:47:15 -0700336
Gerda Zsejke Mored93263f2023-08-09 15:26:16 +0200337 src0_[i] = kSrcMaxBitsMask;
338 src1_[i] = kSrcMaxBitsMask;
339 }
340
341 for (int i = 0; i < kMaxMaskSize; ++i) mask_[i] = AOM_BLEND_A64_MAX_ALPHA - 1;
342
343 for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize)
344 RunTest(bsize, 1);
345}
346
347TEST_P(BlendA64MaskTest8B_d16, DISABLED_Speed) {
348 const int kRunTimes = 10000000;
349 for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
350 for (int i = 0; i < kBufSize; ++i) {
351 dst_ref_[i] = rng_.Rand8();
352 dst_tst_[i] = rng_.Rand8();
353
354 src0_[i] = rng_.Rand16() & kSrcMaxBitsMask;
355 src1_[i] = rng_.Rand16() & kSrcMaxBitsMask;
Scott LaVarnway3092e712018-04-24 10:47:15 -0700356 }
357
358 for (int i = 0; i < kMaxMaskSize; ++i)
Gerda Zsejke Mored93263f2023-08-09 15:26:16 +0200359 mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
Scott LaVarnway3092e712018-04-24 10:47:15 -0700360
Gerda Zsejke Mored93263f2023-08-09 15:26:16 +0200361 RunTest(bsize, kRunTimes);
Scott LaVarnway3092e712018-04-24 10:47:15 -0700362 }
363}
364
365#if HAVE_SSE4_1
Cheng Chen96786fe2020-02-14 17:28:25 -0800366INSTANTIATE_TEST_SUITE_P(
Scott LaVarnway3092e712018-04-24 10:47:15 -0700367 SSE4_1, BlendA64MaskTest8B_d16,
368 ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c,
369 aom_lowbd_blend_a64_d16_mask_sse4_1)));
370#endif // HAVE_SSE4_1
371
Xing Jin0a165c42018-07-18 17:53:37 +0800372#if HAVE_AVX2
Cheng Chen96786fe2020-02-14 17:28:25 -0800373INSTANTIATE_TEST_SUITE_P(
Xing Jin0a165c42018-07-18 17:53:37 +0800374 AVX2, BlendA64MaskTest8B_d16,
375 ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c,
376 aom_lowbd_blend_a64_d16_mask_avx2)));
377#endif // HAVE_AVX2
378
Remya0ba3c472018-06-07 00:40:34 +0530379#if HAVE_NEON
Cheng Chen96786fe2020-02-14 17:28:25 -0800380INSTANTIATE_TEST_SUITE_P(
Remya0ba3c472018-06-07 00:40:34 +0530381 NEON, BlendA64MaskTest8B_d16,
382 ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c,
383 aom_lowbd_blend_a64_d16_mask_neon)));
384#endif // HAVE_NEON
385
Scott LaVarnway3092e712018-04-24 10:47:15 -0700386//////////////////////////////////////////////////////////////////////////////
Geza Lorea661bc82016-05-20 16:33:12 +0100387// High bit-depth version
388//////////////////////////////////////////////////////////////////////////////
Jerome Jiang1cb298c2019-09-17 11:04:04 -0700389#if CONFIG_AV1_HIGHBITDEPTH
clang-format3a826f12016-08-11 17:46:05 -0700390typedef void (*FHBD)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
391 uint32_t src0_stride, const uint8_t *src1,
392 uint32_t src1_stride, const uint8_t *mask,
Scott LaVarnway589b7a12018-06-06 06:29:16 -0700393 uint32_t mask_stride, int w, int h, int subx, int suby,
clang-format3a826f12016-08-11 17:46:05 -0700394 int bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700395typedef libaom_test::FuncParam<FHBD> TestFuncsHBD;
Geza Lorea661bc82016-05-20 16:33:12 +0100396
Scott LaVarnway3092e712018-04-24 10:47:15 -0700397class BlendA64MaskTestHBD : public BlendA64MaskTest<FHBD, uint16_t, uint16_t> {
Geza Lorea661bc82016-05-20 16:33:12 +0100398 protected:
James Zernfaa2dcf2023-07-24 18:29:51 -0700399 void Execute(const uint16_t *p_src0, const uint16_t *p_src1,
400 int run_times) override {
Xing Jinde2b7112018-08-08 19:44:15 +0800401 aom_usec_timer timer;
402 aom_usec_timer_start(&timer);
403 for (int i = 0; i < run_times; ++i) {
404 params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_,
405 CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_,
406 CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_,
407 mask_, kMaxMaskWidth, w_, h_, subx_, suby_, bit_depth_);
408 }
409 aom_usec_timer_mark(&timer);
410 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
411 aom_usec_timer_start(&timer);
412 for (int i = 0; i < run_times; ++i) {
413 params_.tst_func(CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_), dst_stride_,
414 CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_,
415 CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_,
416 mask_, kMaxMaskWidth, w_, h_, subx_, suby_, bit_depth_);
417 }
418 aom_usec_timer_mark(&timer);
419 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
420 if (run_times > 1) {
421 printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_,
422 time1, time2);
423 printf("(%3.2f)\n", time1 / time2);
424 }
Geza Lorea661bc82016-05-20 16:33:12 +0100425 }
426
Geza Loree6f8c172016-07-06 15:54:29 +0100427 int bit_depth_;
Geza Lorea661bc82016-05-20 16:33:12 +0100428};
chiyotsai9dfac722020-07-07 17:43:02 -0700429GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BlendA64MaskTestHBD);
Geza Lorea661bc82016-05-20 16:33:12 +0100430
Geza Lorebfa59b42016-07-11 12:43:47 +0100431TEST_P(BlendA64MaskTestHBD, RandomValues) {
Gerda Zsejke Mored93263f2023-08-09 15:26:16 +0200432 for (bit_depth_ = 8; bit_depth_ <= 12 && !HasFatalFailure();
433 bit_depth_ += 2) {
Geza Loree6f8c172016-07-06 15:54:29 +0100434 const int hi = 1 << bit_depth_;
Geza Lorea661bc82016-05-20 16:33:12 +0100435
Gerda Zsejke Mored93263f2023-08-09 15:26:16 +0200436 for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
437 for (int i = 0; i < kBufSize; ++i) {
438 dst_ref_[i] = rng_(hi);
439 dst_tst_[i] = rng_(hi);
440 src0_[i] = rng_(hi);
441 src1_[i] = rng_(hi);
442 }
443
444 for (int i = 0; i < kMaxMaskSize; ++i)
445 mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
446
447 RunTest(bsize, 1);
Geza Loree6f8c172016-07-06 15:54:29 +0100448 }
Geza Lorea661bc82016-05-20 16:33:12 +0100449 }
450}
451
Geza Lorebfa59b42016-07-11 12:43:47 +0100452TEST_P(BlendA64MaskTestHBD, ExtremeValues) {
Gerda Zsejke Mored93263f2023-08-09 15:26:16 +0200453 for (bit_depth_ = 8; bit_depth_ <= 12 && !HasFatalFailure();
454 bit_depth_ += 2) {
Geza Loree6f8c172016-07-06 15:54:29 +0100455 const int hi = 1 << bit_depth_;
Geza Lorea661bc82016-05-20 16:33:12 +0100456 const int lo = hi - 2;
457
Gerda Zsejke Mored93263f2023-08-09 15:26:16 +0200458 for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure();
459 ++bsize) {
460 for (int i = 0; i < kBufSize; ++i) {
461 dst_ref_[i] = rng_(hi - lo) + lo;
462 dst_tst_[i] = rng_(hi - lo) + lo;
463 src0_[i] = rng_(hi - lo) + lo;
464 src1_[i] = rng_(hi - lo) + lo;
465 }
466
467 for (int i = 0; i < kMaxMaskSize; ++i)
468 mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1;
469
470 RunTest(bsize, 1);
Geza Loree6f8c172016-07-06 15:54:29 +0100471 }
Geza Lorea661bc82016-05-20 16:33:12 +0100472 }
473}
474
475#if HAVE_SSE4_1
Cheng Chen96786fe2020-02-14 17:28:25 -0800476INSTANTIATE_TEST_SUITE_P(
Yaowu Xu685039d2016-12-07 10:56:39 -0800477 SSE4_1, BlendA64MaskTestHBD,
Yaowu Xuf883b422016-08-30 14:01:10 -0700478 ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c,
479 aom_highbd_blend_a64_mask_sse4_1)));
Geza Lorea661bc82016-05-20 16:33:12 +0100480#endif // HAVE_SSE4_1
Scott LaVarnwaybfa46bc2018-05-30 07:57:48 -0700481
Gerda Zsejke Morefacd9e52023-08-02 16:35:40 +0200482#if HAVE_NEON
483INSTANTIATE_TEST_SUITE_P(
484 NEON, BlendA64MaskTestHBD,
485 ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c,
486 aom_highbd_blend_a64_mask_neon)));
487#endif // HAVE_NEON
488
Scott LaVarnwaybfa46bc2018-05-30 07:57:48 -0700489//////////////////////////////////////////////////////////////////////////////
490// HBD _d16 version
491//////////////////////////////////////////////////////////////////////////////
492
493typedef void (*FHBD_D16)(uint8_t *dst, uint32_t dst_stride,
494 const CONV_BUF_TYPE *src0, uint32_t src0_stride,
495 const CONV_BUF_TYPE *src1, uint32_t src1_stride,
Scott LaVarnway589b7a12018-06-06 06:29:16 -0700496 const uint8_t *mask, uint32_t mask_stride, int w,
497 int h, int subx, int suby, ConvolveParams *conv_params,
Scott LaVarnwaybfa46bc2018-05-30 07:57:48 -0700498 const int bd);
499typedef libaom_test::FuncParam<FHBD_D16> TestFuncsHBD_d16;
500
501class BlendA64MaskTestHBD_d16
502 : public BlendA64MaskTest<FHBD_D16, uint16_t, uint16_t> {
503 protected:
504 // max number of bits used by the source
505 static const int kSrcMaxBitsMask = (1 << 14) - 1;
506 static const int kSrcMaxBitsMaskHBD = (1 << 16) - 1;
507
James Zernfaa2dcf2023-07-24 18:29:51 -0700508 void Execute(const uint16_t *p_src0, const uint16_t *p_src1,
509 int run_times) override {
David Turnerb5ed1e62018-10-11 15:17:53 +0100510 ASSERT_GT(run_times, 0) << "Cannot run 0 iterations of the test.";
Scott LaVarnwaybfa46bc2018-05-30 07:57:48 -0700511 ConvolveParams conv_params;
512 conv_params.round_0 = (bit_depth_ == 12) ? ROUND0_BITS + 2 : ROUND0_BITS;
513 conv_params.round_1 = COMPOUND_ROUND1_BITS;
Xing Jinde2b7112018-08-08 19:44:15 +0800514 aom_usec_timer timer;
515 aom_usec_timer_start(&timer);
516 for (int i = 0; i < run_times; ++i) {
517 params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_,
518 p_src0 + src0_offset_, src0_stride_,
519 p_src1 + src1_offset_, src1_stride_, mask_,
520 kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params,
521 bit_depth_);
522 }
Scott LaVarnwaybfa46bc2018-05-30 07:57:48 -0700523 if (params_.tst_func) {
Xing Jinde2b7112018-08-08 19:44:15 +0800524 aom_usec_timer_mark(&timer);
525 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
526 aom_usec_timer_start(&timer);
527 for (int i = 0; i < run_times; ++i) {
528 params_.tst_func(CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_),
529 dst_stride_, p_src0 + src0_offset_, src0_stride_,
530 p_src1 + src1_offset_, src1_stride_, mask_,
531 kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params,
532 bit_depth_);
533 }
534 aom_usec_timer_mark(&timer);
535 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
536 if (run_times > 1) {
537 printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_,
538 time1, time2);
539 printf("(%3.2f)\n", time1 / time2);
540 }
Scott LaVarnwaybfa46bc2018-05-30 07:57:48 -0700541 }
542 }
543
544 int bit_depth_;
545 int src_max_bits_mask_;
546};
547
548TEST_P(BlendA64MaskTestHBD_d16, RandomValues) {
James Zern664f04d2022-05-24 17:30:58 -0700549 if (params_.tst_func == nullptr) return;
Gerda Zsejke Mored93263f2023-08-09 15:26:16 +0200550 for (bit_depth_ = 8; bit_depth_ <= 12 && !HasFatalFailure();
551 bit_depth_ += 2) {
Scott LaVarnwaybfa46bc2018-05-30 07:57:48 -0700552 src_max_bits_mask_ =
553 (bit_depth_ == 8) ? kSrcMaxBitsMask : kSrcMaxBitsMaskHBD;
554
Gerda Zsejke Mored93263f2023-08-09 15:26:16 +0200555 for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure();
556 ++bsize) {
Xing Jinde2b7112018-08-08 19:44:15 +0800557 for (int i = 0; i < kBufSize; ++i) {
Gerda Zsejke Mored93263f2023-08-09 15:26:16 +0200558 dst_ref_[i] = rng_.Rand8();
559 dst_tst_[i] = rng_.Rand8();
Scott LaVarnwaybfa46bc2018-05-30 07:57:48 -0700560
Gerda Zsejke Mored93263f2023-08-09 15:26:16 +0200561 src0_[i] = rng_.Rand16() & src_max_bits_mask_;
562 src1_[i] = rng_.Rand16() & src_max_bits_mask_;
Xing Jinde2b7112018-08-08 19:44:15 +0800563 }
Scott LaVarnwaybfa46bc2018-05-30 07:57:48 -0700564
Gerda Zsejke Mored93263f2023-08-09 15:26:16 +0200565 for (int i = 0; i < kMaxMaskSize; ++i)
566 mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
Xing Jinde2b7112018-08-08 19:44:15 +0800567
568 RunTest(bsize, 1);
Scott LaVarnwaybfa46bc2018-05-30 07:57:48 -0700569 }
Scott LaVarnwaybfa46bc2018-05-30 07:57:48 -0700570 }
571}
Gerda Zsejke Mored93263f2023-08-09 15:26:16 +0200572
573TEST_P(BlendA64MaskTestHBD_d16, ExtremeValues) {
574 for (bit_depth_ = 8; bit_depth_ <= 12; bit_depth_ += 2) {
575 src_max_bits_mask_ =
576 (bit_depth_ == 8) ? kSrcMaxBitsMask : kSrcMaxBitsMaskHBD;
577
578 for (int i = 0; i < kBufSize; ++i) {
579 dst_ref_[i] = 0;
580 dst_tst_[i] = (1 << bit_depth_) - 1;
581
582 src0_[i] = src_max_bits_mask_;
583 src1_[i] = src_max_bits_mask_;
584 }
585
586 for (int i = 0; i < kMaxMaskSize; ++i) mask_[i] = AOM_BLEND_A64_MAX_ALPHA;
587 for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
588 RunTest(bsize, 1);
589 }
590 }
591}
592
David Turnerb5ed1e62018-10-11 15:17:53 +0100593TEST_P(BlendA64MaskTestHBD_d16, DISABLED_Speed) {
594 const int kRunTimes = 10000000;
595 for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
596 for (bit_depth_ = 8; bit_depth_ <= 12; bit_depth_ += 2) {
597 for (int i = 0; i < kBufSize; ++i) {
598 dst_ref_[i] = rng_.Rand12() % (1 << bit_depth_);
599 dst_tst_[i] = rng_.Rand12() % (1 << bit_depth_);
600
601 src0_[i] = rng_.Rand16();
602 src1_[i] = rng_.Rand16();
603 }
604
605 for (int i = 0; i < kMaxMaskSize; ++i)
606 mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
607
Gerda Zsejke Mored93263f2023-08-09 15:26:16 +0200608 RunTest(bsize, kRunTimes);
David Turnerb5ed1e62018-10-11 15:17:53 +0100609 }
610 }
611}
Scott LaVarnwaybfa46bc2018-05-30 07:57:48 -0700612
Gerda Zsejke Mored93263f2023-08-09 15:26:16 +0200613INSTANTIATE_TEST_SUITE_P(
614 C, BlendA64MaskTestHBD_d16,
615 ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c,
616 aom_highbd_blend_a64_d16_mask_c)));
Scott LaVarnwaybfa46bc2018-05-30 07:57:48 -0700617
David Turnerb5ed1e62018-10-11 15:17:53 +0100618#if HAVE_SSE4_1
Cheng Chen96786fe2020-02-14 17:28:25 -0800619INSTANTIATE_TEST_SUITE_P(
David Turnerb5ed1e62018-10-11 15:17:53 +0100620 SSE4_1, BlendA64MaskTestHBD_d16,
621 ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c,
622 aom_highbd_blend_a64_d16_mask_sse4_1)));
623#endif // HAVE_SSE4_1
624
625#if HAVE_AVX2
Cheng Chen96786fe2020-02-14 17:28:25 -0800626INSTANTIATE_TEST_SUITE_P(
David Turnerb5ed1e62018-10-11 15:17:53 +0100627 AVX2, BlendA64MaskTestHBD_d16,
628 ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c,
629 aom_highbd_blend_a64_d16_mask_avx2)));
630#endif // HAVE_AVX2
631
Gerda Zsejke More7e1fa9e2023-08-07 10:43:46 +0200632#if HAVE_NEON
633INSTANTIATE_TEST_SUITE_P(
634 NEON, BlendA64MaskTestHBD_d16,
635 ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c,
636 aom_highbd_blend_a64_d16_mask_neon)));
637#endif // HAVE_NEON
638
Scott LaVarnwaybfa46bc2018-05-30 07:57:48 -0700639// TODO(slavarnway): Enable the following in the avx2 commit. (56501)
640#if 0
641#if HAVE_AVX2
Cheng Chen96786fe2020-02-14 17:28:25 -0800642INSTANTIATE_TEST_SUITE_P(
Scott LaVarnwaybfa46bc2018-05-30 07:57:48 -0700643 SSE4_1, BlendA64MaskTestHBD,
644 ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c,
645 aom_highbd_blend_a64_mask_avx2)));
646#endif // HAVE_AVX2
647#endif
Jerome Jiang1cb298c2019-09-17 11:04:04 -0700648#endif // CONFIG_AV1_HIGHBITDEPTH
Geza Lorea661bc82016-05-20 16:33:12 +0100649} // namespace