blob: 4bdf4975d35d4214a882ecd69aa343fae1e21d4a [file] [log] [blame]
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001/*
James Zernb7c05bd2024-06-11 19:15:10 -07002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01003 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Johann123e8a62017-12-28 14:40:49 -080010 */
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +010011
12#include <assert.h>
13#include <string>
Tom Finegan44702c82018-05-22 13:00:39 -070014
15#include "config/aom_dsp_rtcd.h"
16
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +010017#include "test/acm_random.h"
Robert Chin07aa6c52021-10-22 02:01:20 +000018// Inlining not forced for the compiler due to some tests calling
19// SIMD_INLINE functions via function pointers
Steinar Midtskogen04305c62016-09-30 13:14:04 +020020#undef SIMD_INLINE
Robert Chin07aa6c52021-10-22 02:01:20 +000021#define SIMD_INLINE static inline
Robert Chinff103ca2021-10-18 18:46:51 -070022#include "aom_dsp/aom_simd.h"
Steinar Midtskogen1e424362016-09-30 13:14:04 +020023#include "aom_dsp/simd/v256_intrinsics_c.h"
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +010024
25// Machine tuned code goes into this file. This file is included from
26// simd_cmp_sse2.cc, simd_cmp_ssse3.cc etc which define the macros
27// ARCH (=neon, sse2, ssse3, etc), SIMD_NAMESPACE and ARCH_POSTFIX().
28
James Zern04401472017-10-20 12:56:33 -070029#ifdef _MSC_VER
30// Disable "value of intrinsic immediate argument 'value' is out of range
31// 'lowerbound - upperbound'" warning. Visual Studio emits this warning though
32// the parameters are conditionally checked in e.g., v256_shr_n_byte. Adding a
33// mask doesn't always appear to be sufficient.
34#pragma warning(disable : 4556)
35#endif
36
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +010037using libaom_test::ACMRandom;
38
39namespace SIMD_NAMESPACE {
40
41// Wrap templates around intrinsics using immediate values
42template <int shift>
43v64 imm_v64_shl_n_byte(v64 a) {
44 return v64_shl_n_byte(a, shift);
45}
46template <int shift>
47v64 imm_v64_shr_n_byte(v64 a) {
48 return v64_shr_n_byte(a, shift);
49}
50template <int shift>
51v64 imm_v64_shl_n_8(v64 a) {
52 return v64_shl_n_8(a, shift);
53}
54template <int shift>
55v64 imm_v64_shr_n_u8(v64 a) {
56 return v64_shr_n_u8(a, shift);
57}
58template <int shift>
59v64 imm_v64_shr_n_s8(v64 a) {
60 return v64_shr_n_s8(a, shift);
61}
62template <int shift>
63v64 imm_v64_shl_n_16(v64 a) {
64 return v64_shl_n_16(a, shift);
65}
66template <int shift>
67v64 imm_v64_shr_n_u16(v64 a) {
68 return v64_shr_n_u16(a, shift);
69}
70template <int shift>
71v64 imm_v64_shr_n_s16(v64 a) {
72 return v64_shr_n_s16(a, shift);
73}
74template <int shift>
75v64 imm_v64_shl_n_32(v64 a) {
76 return v64_shl_n_32(a, shift);
77}
78template <int shift>
79v64 imm_v64_shr_n_u32(v64 a) {
80 return v64_shr_n_u32(a, shift);
81}
82template <int shift>
83v64 imm_v64_shr_n_s32(v64 a) {
84 return v64_shr_n_s32(a, shift);
85}
86template <int shift>
87v64 imm_v64_align(v64 a, v64 b) {
88 return v64_align(a, b, shift);
89}
90
91// Wrap templates around corresponding C implementations of the above
92template <int shift>
93c_v64 c_imm_v64_shl_n_byte(c_v64 a) {
94 return c_v64_shl_n_byte(a, shift);
95}
96template <int shift>
97c_v64 c_imm_v64_shr_n_byte(c_v64 a) {
98 return c_v64_shr_n_byte(a, shift);
99}
100template <int shift>
101c_v64 c_imm_v64_shl_n_8(c_v64 a) {
102 return c_v64_shl_n_8(a, shift);
103}
104template <int shift>
105c_v64 c_imm_v64_shr_n_u8(c_v64 a) {
106 return c_v64_shr_n_u8(a, shift);
107}
108template <int shift>
109c_v64 c_imm_v64_shr_n_s8(c_v64 a) {
110 return c_v64_shr_n_s8(a, shift);
111}
112template <int shift>
113c_v64 c_imm_v64_shl_n_16(c_v64 a) {
114 return c_v64_shl_n_16(a, shift);
115}
116template <int shift>
117c_v64 c_imm_v64_shr_n_u16(c_v64 a) {
118 return c_v64_shr_n_u16(a, shift);
119}
120template <int shift>
121c_v64 c_imm_v64_shr_n_s16(c_v64 a) {
122 return c_v64_shr_n_s16(a, shift);
123}
124template <int shift>
125c_v64 c_imm_v64_shl_n_32(c_v64 a) {
126 return c_v64_shl_n_32(a, shift);
127}
128template <int shift>
129c_v64 c_imm_v64_shr_n_u32(c_v64 a) {
130 return c_v64_shr_n_u32(a, shift);
131}
132template <int shift>
133c_v64 c_imm_v64_shr_n_s32(c_v64 a) {
134 return c_v64_shr_n_s32(a, shift);
135}
136template <int shift>
137c_v64 c_imm_v64_align(c_v64 a, c_v64 b) {
138 return c_v64_align(a, b, shift);
139}
140
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200141template <int shift>
142v128 imm_v128_shl_n_byte(v128 a) {
143 return v128_shl_n_byte(a, shift);
144}
145template <int shift>
146v128 imm_v128_shr_n_byte(v128 a) {
147 return v128_shr_n_byte(a, shift);
148}
149template <int shift>
150v128 imm_v128_shl_n_8(v128 a) {
151 return v128_shl_n_8(a, shift);
152}
153template <int shift>
154v128 imm_v128_shr_n_u8(v128 a) {
155 return v128_shr_n_u8(a, shift);
156}
157template <int shift>
158v128 imm_v128_shr_n_s8(v128 a) {
159 return v128_shr_n_s8(a, shift);
160}
161template <int shift>
162v128 imm_v128_shl_n_16(v128 a) {
163 return v128_shl_n_16(a, shift);
164}
165template <int shift>
166v128 imm_v128_shr_n_u16(v128 a) {
167 return v128_shr_n_u16(a, shift);
168}
169template <int shift>
170v128 imm_v128_shr_n_s16(v128 a) {
171 return v128_shr_n_s16(a, shift);
172}
173template <int shift>
174v128 imm_v128_shl_n_32(v128 a) {
175 return v128_shl_n_32(a, shift);
176}
177template <int shift>
178v128 imm_v128_shr_n_u32(v128 a) {
179 return v128_shr_n_u32(a, shift);
180}
181template <int shift>
182v128 imm_v128_shr_n_s32(v128 a) {
183 return v128_shr_n_s32(a, shift);
184}
185template <int shift>
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200186v128 imm_v128_shl_n_64(v128 a) {
187 return v128_shl_n_64(a, shift);
188}
189template <int shift>
190v128 imm_v128_shr_n_u64(v128 a) {
191 return v128_shr_n_u64(a, shift);
192}
193template <int shift>
194v128 imm_v128_shr_n_s64(v128 a) {
195 return v128_shr_n_s64(a, shift);
196}
197template <int shift>
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200198v128 imm_v128_align(v128 a, v128 b) {
199 return v128_align(a, b, shift);
200}
201
202template <int shift>
203c_v128 c_imm_v128_shl_n_byte(c_v128 a) {
204 return c_v128_shl_n_byte(a, shift);
205}
206template <int shift>
207c_v128 c_imm_v128_shr_n_byte(c_v128 a) {
208 return c_v128_shr_n_byte(a, shift);
209}
210template <int shift>
211c_v128 c_imm_v128_shl_n_8(c_v128 a) {
212 return c_v128_shl_n_8(a, shift);
213}
214template <int shift>
215c_v128 c_imm_v128_shr_n_u8(c_v128 a) {
216 return c_v128_shr_n_u8(a, shift);
217}
218template <int shift>
219c_v128 c_imm_v128_shr_n_s8(c_v128 a) {
220 return c_v128_shr_n_s8(a, shift);
221}
222template <int shift>
223c_v128 c_imm_v128_shl_n_16(c_v128 a) {
224 return c_v128_shl_n_16(a, shift);
225}
226template <int shift>
227c_v128 c_imm_v128_shr_n_u16(c_v128 a) {
228 return c_v128_shr_n_u16(a, shift);
229}
230template <int shift>
231c_v128 c_imm_v128_shr_n_s16(c_v128 a) {
232 return c_v128_shr_n_s16(a, shift);
233}
234template <int shift>
235c_v128 c_imm_v128_shl_n_32(c_v128 a) {
236 return c_v128_shl_n_32(a, shift);
237}
238template <int shift>
239c_v128 c_imm_v128_shr_n_u32(c_v128 a) {
240 return c_v128_shr_n_u32(a, shift);
241}
242template <int shift>
243c_v128 c_imm_v128_shr_n_s32(c_v128 a) {
244 return c_v128_shr_n_s32(a, shift);
245}
246template <int shift>
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200247c_v128 c_imm_v128_shl_n_64(c_v128 a) {
248 return c_v128_shl_n_64(a, shift);
249}
250template <int shift>
251c_v128 c_imm_v128_shr_n_u64(c_v128 a) {
252 return c_v128_shr_n_u64(a, shift);
253}
254template <int shift>
255c_v128 c_imm_v128_shr_n_s64(c_v128 a) {
256 return c_v128_shr_n_s64(a, shift);
257}
258template <int shift>
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200259c_v128 c_imm_v128_align(c_v128 a, c_v128 b) {
260 return c_v128_align(a, b, shift);
261}
262
Steinar Midtskogen1e424362016-09-30 13:14:04 +0200263template <int shift>
Steinar Midtskogenc5a56b92018-06-14 21:03:56 +0200264v256 imm_v256_shl_n_word(v256 a) {
265 return v256_shl_n_word(a, shift);
266}
267template <int shift>
268v256 imm_v256_shr_n_word(v256 a) {
269 return v256_shr_n_word(a, shift);
270}
271template <int shift>
Steinar Midtskogen1e424362016-09-30 13:14:04 +0200272v256 imm_v256_shl_n_byte(v256 a) {
273 return v256_shl_n_byte(a, shift);
274}
275template <int shift>
276v256 imm_v256_shr_n_byte(v256 a) {
277 return v256_shr_n_byte(a, shift);
278}
279template <int shift>
280v256 imm_v256_shl_n_8(v256 a) {
281 return v256_shl_n_8(a, shift);
282}
283template <int shift>
284v256 imm_v256_shr_n_u8(v256 a) {
285 return v256_shr_n_u8(a, shift);
286}
287template <int shift>
288v256 imm_v256_shr_n_s8(v256 a) {
289 return v256_shr_n_s8(a, shift);
290}
291template <int shift>
292v256 imm_v256_shl_n_16(v256 a) {
293 return v256_shl_n_16(a, shift);
294}
295template <int shift>
296v256 imm_v256_shr_n_u16(v256 a) {
297 return v256_shr_n_u16(a, shift);
298}
299template <int shift>
300v256 imm_v256_shr_n_s16(v256 a) {
301 return v256_shr_n_s16(a, shift);
302}
303template <int shift>
304v256 imm_v256_shl_n_32(v256 a) {
305 return v256_shl_n_32(a, shift);
306}
307template <int shift>
308v256 imm_v256_shr_n_u32(v256 a) {
309 return v256_shr_n_u32(a, shift);
310}
311template <int shift>
312v256 imm_v256_shr_n_s32(v256 a) {
313 return v256_shr_n_s32(a, shift);
314}
315template <int shift>
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200316v256 imm_v256_shl_n_64(v256 a) {
317 return v256_shl_n_64(a, shift);
318}
319template <int shift>
320v256 imm_v256_shr_n_u64(v256 a) {
321 return v256_shr_n_u64(a, shift);
322}
323template <int shift>
324v256 imm_v256_shr_n_s64(v256 a) {
325 return v256_shr_n_s64(a, shift);
326}
327template <int shift>
Steinar Midtskogen1e424362016-09-30 13:14:04 +0200328v256 imm_v256_align(v256 a, v256 b) {
329 return v256_align(a, b, shift);
330}
331
332template <int shift>
Steinar Midtskogenc5a56b92018-06-14 21:03:56 +0200333c_v256 c_imm_v256_shl_n_word(c_v256 a) {
334 return c_v256_shl_n_word(a, shift);
335}
336template <int shift>
337c_v256 c_imm_v256_shr_n_word(c_v256 a) {
338 return c_v256_shr_n_word(a, shift);
339}
340template <int shift>
Steinar Midtskogen1e424362016-09-30 13:14:04 +0200341c_v256 c_imm_v256_shl_n_byte(c_v256 a) {
342 return c_v256_shl_n_byte(a, shift);
343}
344template <int shift>
345c_v256 c_imm_v256_shr_n_byte(c_v256 a) {
346 return c_v256_shr_n_byte(a, shift);
347}
348template <int shift>
349c_v256 c_imm_v256_shl_n_8(c_v256 a) {
350 return c_v256_shl_n_8(a, shift);
351}
352template <int shift>
353c_v256 c_imm_v256_shr_n_u8(c_v256 a) {
354 return c_v256_shr_n_u8(a, shift);
355}
356template <int shift>
357c_v256 c_imm_v256_shr_n_s8(c_v256 a) {
358 return c_v256_shr_n_s8(a, shift);
359}
360template <int shift>
361c_v256 c_imm_v256_shl_n_16(c_v256 a) {
362 return c_v256_shl_n_16(a, shift);
363}
364template <int shift>
365c_v256 c_imm_v256_shr_n_u16(c_v256 a) {
366 return c_v256_shr_n_u16(a, shift);
367}
368template <int shift>
369c_v256 c_imm_v256_shr_n_s16(c_v256 a) {
370 return c_v256_shr_n_s16(a, shift);
371}
372template <int shift>
373c_v256 c_imm_v256_shl_n_32(c_v256 a) {
374 return c_v256_shl_n_32(a, shift);
375}
376template <int shift>
377c_v256 c_imm_v256_shr_n_u32(c_v256 a) {
378 return c_v256_shr_n_u32(a, shift);
379}
380template <int shift>
381c_v256 c_imm_v256_shr_n_s32(c_v256 a) {
382 return c_v256_shr_n_s32(a, shift);
383}
384template <int shift>
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200385c_v256 c_imm_v256_shl_n_64(c_v256 a) {
386 return c_v256_shl_n_64(a, shift);
387}
388template <int shift>
389c_v256 c_imm_v256_shr_n_u64(c_v256 a) {
390 return c_v256_shr_n_u64(a, shift);
391}
392template <int shift>
393c_v256 c_imm_v256_shr_n_s64(c_v256 a) {
394 return c_v256_shr_n_s64(a, shift);
395}
396template <int shift>
Steinar Midtskogen1e424362016-09-30 13:14:04 +0200397c_v256 c_imm_v256_align(c_v256 a, c_v256 b) {
398 return c_v256_align(a, b, shift);
399}
400
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100401// Wrappers around the the SAD and SSD functions
402uint32_t v64_sad_u8(v64 a, v64 b) {
403 return v64_sad_u8_sum(::v64_sad_u8(v64_sad_u8_init(), a, b));
404}
405uint32_t v64_ssd_u8(v64 a, v64 b) {
406 return v64_ssd_u8_sum(::v64_ssd_u8(v64_ssd_u8_init(), a, b));
407}
408
409uint32_t c_v64_sad_u8(c_v64 a, c_v64 b) {
410 return c_v64_sad_u8_sum(::c_v64_sad_u8(c_v64_sad_u8_init(), a, b));
411}
412uint32_t c_v64_ssd_u8(c_v64 a, c_v64 b) {
413 return c_v64_ssd_u8_sum(::c_v64_ssd_u8(c_v64_ssd_u8_init(), a, b));
414}
Steinar Midtskogen6c795762017-03-07 20:55:48 +0100415uint32_t v128_sad_u8(v128 a, v128 b) {
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200416 return v128_sad_u8_sum(::v128_sad_u8(v128_sad_u8_init(), a, b));
417}
418uint32_t v128_ssd_u8(v128 a, v128 b) {
419 return v128_ssd_u8_sum(::v128_ssd_u8(v128_ssd_u8_init(), a, b));
420}
421uint32_t c_v128_sad_u8(c_v128 a, c_v128 b) {
422 return c_v128_sad_u8_sum(::c_v128_sad_u8(c_v128_sad_u8_init(), a, b));
423}
424uint32_t c_v128_ssd_u8(c_v128 a, c_v128 b) {
425 return c_v128_ssd_u8_sum(::c_v128_ssd_u8(c_v128_ssd_u8_init(), a, b));
426}
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200427uint32_t v128_sad_u16(v128 a, v128 b) {
428 return v128_sad_u16_sum(::v128_sad_u16(v128_sad_u16_init(), a, b));
429}
430uint64_t v128_ssd_s16(v128 a, v128 b) {
431 return v128_ssd_s16_sum(::v128_ssd_s16(v128_ssd_s16_init(), a, b));
432}
433uint32_t c_v128_sad_u16(c_v128 a, c_v128 b) {
434 return c_v128_sad_u16_sum(::c_v128_sad_u16(c_v128_sad_u16_init(), a, b));
435}
436uint64_t c_v128_ssd_s16(c_v128 a, c_v128 b) {
437 return c_v128_ssd_s16_sum(::c_v128_ssd_s16(c_v128_ssd_s16_init(), a, b));
438}
Steinar Midtskogen1e424362016-09-30 13:14:04 +0200439uint32_t v256_sad_u8(v256 a, v256 b) {
440 return v256_sad_u8_sum(::v256_sad_u8(v256_sad_u8_init(), a, b));
441}
442uint32_t v256_ssd_u8(v256 a, v256 b) {
443 return v256_ssd_u8_sum(::v256_ssd_u8(v256_ssd_u8_init(), a, b));
444}
445uint32_t c_v256_sad_u8(c_v256 a, c_v256 b) {
446 return c_v256_sad_u8_sum(::c_v256_sad_u8(c_v256_sad_u8_init(), a, b));
447}
448uint32_t c_v256_ssd_u8(c_v256 a, c_v256 b) {
449 return c_v256_ssd_u8_sum(::c_v256_ssd_u8(c_v256_ssd_u8_init(), a, b));
450}
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200451uint32_t v256_sad_u16(v256 a, v256 b) {
452 return v256_sad_u16_sum(::v256_sad_u16(v256_sad_u16_init(), a, b));
453}
454uint64_t v256_ssd_s16(v256 a, v256 b) {
455 return v256_ssd_s16_sum(::v256_ssd_s16(v256_ssd_s16_init(), a, b));
456}
457uint32_t c_v256_sad_u16(c_v256 a, c_v256 b) {
458 return c_v256_sad_u16_sum(::c_v256_sad_u16(c_v256_sad_u16_init(), a, b));
459}
460uint64_t c_v256_ssd_s16(c_v256 a, c_v256 b) {
461 return c_v256_ssd_s16_sum(::c_v256_ssd_s16(c_v256_ssd_s16_init(), a, b));
462}
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100463
464namespace {
465
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100466typedef void (*fptr)();
467
468typedef struct {
469 const char *name;
470 fptr ref;
471 fptr simd;
472} mapping;
473
Hien Ho830b8972019-04-04 15:51:14 -0700474#define MAP(name) \
475 { #name, reinterpret_cast < fptr>(c_##name), reinterpret_cast < fptr>(name) }
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100476
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100477const mapping m[] = { MAP(v64_sad_u8),
478 MAP(v64_ssd_u8),
479 MAP(v64_add_8),
480 MAP(v64_add_16),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200481 MAP(v64_sadd_s8),
482 MAP(v64_sadd_u8),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100483 MAP(v64_sadd_s16),
484 MAP(v64_add_32),
485 MAP(v64_sub_8),
486 MAP(v64_ssub_u8),
487 MAP(v64_ssub_s8),
488 MAP(v64_sub_16),
489 MAP(v64_ssub_s16),
Steinar Midtskogen9b8444a2017-03-31 22:11:06 +0200490 MAP(v64_ssub_u16),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100491 MAP(v64_sub_32),
492 MAP(v64_ziplo_8),
493 MAP(v64_ziphi_8),
494 MAP(v64_ziplo_16),
495 MAP(v64_ziphi_16),
496 MAP(v64_ziplo_32),
497 MAP(v64_ziphi_32),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200498 MAP(v64_pack_s32_u16),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100499 MAP(v64_pack_s32_s16),
500 MAP(v64_pack_s16_u8),
501 MAP(v64_pack_s16_s8),
502 MAP(v64_unziphi_8),
503 MAP(v64_unziplo_8),
504 MAP(v64_unziphi_16),
505 MAP(v64_unziplo_16),
506 MAP(v64_or),
507 MAP(v64_xor),
508 MAP(v64_and),
509 MAP(v64_andn),
510 MAP(v64_mullo_s16),
511 MAP(v64_mulhi_s16),
512 MAP(v64_mullo_s32),
513 MAP(v64_madd_s16),
514 MAP(v64_madd_us8),
515 MAP(v64_avg_u8),
516 MAP(v64_rdavg_u8),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200517 MAP(v64_rdavg_u16),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100518 MAP(v64_avg_u16),
519 MAP(v64_min_u8),
520 MAP(v64_max_u8),
521 MAP(v64_min_s8),
522 MAP(v64_max_s8),
523 MAP(v64_min_s16),
524 MAP(v64_max_s16),
525 MAP(v64_cmpgt_s8),
526 MAP(v64_cmplt_s8),
527 MAP(v64_cmpeq_8),
528 MAP(v64_cmpgt_s16),
529 MAP(v64_cmplt_s16),
530 MAP(v64_cmpeq_16),
531 MAP(v64_shuffle_8),
532 MAP(imm_v64_align<1>),
533 MAP(imm_v64_align<2>),
534 MAP(imm_v64_align<3>),
535 MAP(imm_v64_align<4>),
536 MAP(imm_v64_align<5>),
537 MAP(imm_v64_align<6>),
538 MAP(imm_v64_align<7>),
Steinar Midtskogen6033fb82017-04-02 21:32:41 +0200539 MAP(v64_abs_s8),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100540 MAP(v64_abs_s16),
541 MAP(v64_unpacklo_u8_s16),
542 MAP(v64_unpackhi_u8_s16),
Steinar Midtskogen1b2b7392017-04-11 14:19:20 +0200543 MAP(v64_unpacklo_s8_s16),
544 MAP(v64_unpackhi_s8_s16),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100545 MAP(v64_unpacklo_u16_s32),
546 MAP(v64_unpacklo_s16_s32),
547 MAP(v64_unpackhi_u16_s32),
548 MAP(v64_unpackhi_s16_s32),
549 MAP(imm_v64_shr_n_byte<1>),
550 MAP(imm_v64_shr_n_byte<2>),
551 MAP(imm_v64_shr_n_byte<3>),
552 MAP(imm_v64_shr_n_byte<4>),
553 MAP(imm_v64_shr_n_byte<5>),
554 MAP(imm_v64_shr_n_byte<6>),
555 MAP(imm_v64_shr_n_byte<7>),
556 MAP(imm_v64_shl_n_byte<1>),
557 MAP(imm_v64_shl_n_byte<2>),
558 MAP(imm_v64_shl_n_byte<3>),
559 MAP(imm_v64_shl_n_byte<4>),
560 MAP(imm_v64_shl_n_byte<5>),
561 MAP(imm_v64_shl_n_byte<6>),
562 MAP(imm_v64_shl_n_byte<7>),
563 MAP(imm_v64_shl_n_8<1>),
564 MAP(imm_v64_shl_n_8<2>),
565 MAP(imm_v64_shl_n_8<3>),
566 MAP(imm_v64_shl_n_8<4>),
567 MAP(imm_v64_shl_n_8<5>),
568 MAP(imm_v64_shl_n_8<6>),
569 MAP(imm_v64_shl_n_8<7>),
570 MAP(imm_v64_shr_n_u8<1>),
571 MAP(imm_v64_shr_n_u8<2>),
572 MAP(imm_v64_shr_n_u8<3>),
573 MAP(imm_v64_shr_n_u8<4>),
574 MAP(imm_v64_shr_n_u8<5>),
575 MAP(imm_v64_shr_n_u8<6>),
576 MAP(imm_v64_shr_n_u8<7>),
577 MAP(imm_v64_shr_n_s8<1>),
578 MAP(imm_v64_shr_n_s8<2>),
579 MAP(imm_v64_shr_n_s8<3>),
580 MAP(imm_v64_shr_n_s8<4>),
581 MAP(imm_v64_shr_n_s8<5>),
582 MAP(imm_v64_shr_n_s8<6>),
583 MAP(imm_v64_shr_n_s8<7>),
584 MAP(imm_v64_shl_n_16<1>),
585 MAP(imm_v64_shl_n_16<2>),
586 MAP(imm_v64_shl_n_16<4>),
587 MAP(imm_v64_shl_n_16<6>),
588 MAP(imm_v64_shl_n_16<8>),
589 MAP(imm_v64_shl_n_16<10>),
590 MAP(imm_v64_shl_n_16<12>),
591 MAP(imm_v64_shl_n_16<14>),
592 MAP(imm_v64_shr_n_u16<1>),
593 MAP(imm_v64_shr_n_u16<2>),
594 MAP(imm_v64_shr_n_u16<4>),
595 MAP(imm_v64_shr_n_u16<6>),
596 MAP(imm_v64_shr_n_u16<8>),
597 MAP(imm_v64_shr_n_u16<10>),
598 MAP(imm_v64_shr_n_u16<12>),
599 MAP(imm_v64_shr_n_u16<14>),
600 MAP(imm_v64_shr_n_s16<1>),
601 MAP(imm_v64_shr_n_s16<2>),
602 MAP(imm_v64_shr_n_s16<4>),
603 MAP(imm_v64_shr_n_s16<6>),
604 MAP(imm_v64_shr_n_s16<8>),
605 MAP(imm_v64_shr_n_s16<10>),
606 MAP(imm_v64_shr_n_s16<12>),
607 MAP(imm_v64_shr_n_s16<14>),
608 MAP(imm_v64_shl_n_32<1>),
609 MAP(imm_v64_shl_n_32<4>),
610 MAP(imm_v64_shl_n_32<8>),
611 MAP(imm_v64_shl_n_32<12>),
612 MAP(imm_v64_shl_n_32<16>),
613 MAP(imm_v64_shl_n_32<20>),
614 MAP(imm_v64_shl_n_32<24>),
615 MAP(imm_v64_shl_n_32<28>),
616 MAP(imm_v64_shr_n_u32<1>),
617 MAP(imm_v64_shr_n_u32<4>),
618 MAP(imm_v64_shr_n_u32<8>),
619 MAP(imm_v64_shr_n_u32<12>),
620 MAP(imm_v64_shr_n_u32<16>),
621 MAP(imm_v64_shr_n_u32<20>),
622 MAP(imm_v64_shr_n_u32<24>),
623 MAP(imm_v64_shr_n_u32<28>),
624 MAP(imm_v64_shr_n_s32<1>),
625 MAP(imm_v64_shr_n_s32<4>),
626 MAP(imm_v64_shr_n_s32<8>),
627 MAP(imm_v64_shr_n_s32<12>),
628 MAP(imm_v64_shr_n_s32<16>),
629 MAP(imm_v64_shr_n_s32<20>),
630 MAP(imm_v64_shr_n_s32<24>),
631 MAP(imm_v64_shr_n_s32<28>),
632 MAP(v64_shl_8),
633 MAP(v64_shr_u8),
634 MAP(v64_shr_s8),
635 MAP(v64_shl_16),
636 MAP(v64_shr_u16),
637 MAP(v64_shr_s16),
638 MAP(v64_shl_32),
639 MAP(v64_shr_u32),
640 MAP(v64_shr_s32),
641 MAP(v64_hadd_u8),
642 MAP(v64_hadd_s16),
643 MAP(v64_dotp_s16),
Steinar Midtskogen04305c62016-09-30 13:14:04 +0200644 MAP(v64_dotp_su8),
645 MAP(v64_u64),
646 MAP(v64_low_u32),
647 MAP(v64_high_u32),
648 MAP(v64_low_s32),
649 MAP(v64_high_s32),
650 MAP(v64_dup_8),
651 MAP(v64_dup_16),
652 MAP(v64_dup_32),
653 MAP(v64_from_32),
654 MAP(v64_zero),
655 MAP(v64_from_16),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200656 MAP(v128_sad_u8),
657 MAP(v128_ssd_u8),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200658 MAP(v128_sad_u16),
659 MAP(v128_ssd_s16),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200660 MAP(v128_add_8),
661 MAP(v128_add_16),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200662 MAP(v128_sadd_s8),
663 MAP(v128_sadd_u8),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200664 MAP(v128_sadd_s16),
665 MAP(v128_add_32),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200666 MAP(v128_add_64),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200667 MAP(v128_sub_8),
668 MAP(v128_ssub_u8),
669 MAP(v128_ssub_s8),
670 MAP(v128_sub_16),
671 MAP(v128_ssub_s16),
Steinar Midtskogen9b8444a2017-03-31 22:11:06 +0200672 MAP(v128_ssub_u16),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200673 MAP(v128_sub_32),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200674 MAP(v128_sub_64),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200675 MAP(v128_ziplo_8),
676 MAP(v128_ziphi_8),
677 MAP(v128_ziplo_16),
678 MAP(v128_ziphi_16),
679 MAP(v128_ziplo_32),
680 MAP(v128_ziphi_32),
681 MAP(v128_ziplo_64),
682 MAP(v128_ziphi_64),
683 MAP(v128_unziphi_8),
684 MAP(v128_unziplo_8),
685 MAP(v128_unziphi_16),
686 MAP(v128_unziplo_16),
687 MAP(v128_unziphi_32),
688 MAP(v128_unziplo_32),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200689 MAP(v128_pack_s32_u16),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200690 MAP(v128_pack_s32_s16),
691 MAP(v128_pack_s16_u8),
692 MAP(v128_pack_s16_s8),
693 MAP(v128_or),
694 MAP(v128_xor),
695 MAP(v128_and),
696 MAP(v128_andn),
697 MAP(v128_mullo_s16),
698 MAP(v128_mulhi_s16),
699 MAP(v128_mullo_s32),
700 MAP(v128_madd_s16),
701 MAP(v128_madd_us8),
702 MAP(v128_avg_u8),
703 MAP(v128_rdavg_u8),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200704 MAP(v128_rdavg_u16),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200705 MAP(v128_avg_u16),
706 MAP(v128_min_u8),
707 MAP(v128_max_u8),
708 MAP(v128_min_s8),
709 MAP(v128_max_s8),
710 MAP(v128_min_s16),
711 MAP(v128_max_s16),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200712 MAP(v128_min_s32),
713 MAP(v128_max_s32),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200714 MAP(v128_cmpgt_s8),
715 MAP(v128_cmplt_s8),
716 MAP(v128_cmpeq_8),
717 MAP(v128_cmpgt_s16),
718 MAP(v128_cmpeq_16),
719 MAP(v128_cmplt_s16),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200720 MAP(v128_cmpgt_s32),
721 MAP(v128_cmpeq_32),
722 MAP(v128_cmplt_s32),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200723 MAP(v128_shuffle_8),
724 MAP(imm_v128_align<1>),
725 MAP(imm_v128_align<2>),
726 MAP(imm_v128_align<3>),
727 MAP(imm_v128_align<4>),
728 MAP(imm_v128_align<5>),
729 MAP(imm_v128_align<6>),
730 MAP(imm_v128_align<7>),
731 MAP(imm_v128_align<8>),
732 MAP(imm_v128_align<9>),
733 MAP(imm_v128_align<10>),
734 MAP(imm_v128_align<11>),
735 MAP(imm_v128_align<12>),
736 MAP(imm_v128_align<13>),
737 MAP(imm_v128_align<14>),
738 MAP(imm_v128_align<15>),
Steinar Midtskogen6033fb82017-04-02 21:32:41 +0200739 MAP(v128_abs_s8),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200740 MAP(v128_abs_s16),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200741 MAP(v128_padd_u8),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200742 MAP(v128_padd_s16),
743 MAP(v128_unpacklo_u16_s32),
744 MAP(v128_unpacklo_s16_s32),
745 MAP(v128_unpackhi_u16_s32),
746 MAP(v128_unpackhi_s16_s32),
747 MAP(imm_v128_shr_n_byte<1>),
748 MAP(imm_v128_shr_n_byte<2>),
749 MAP(imm_v128_shr_n_byte<3>),
750 MAP(imm_v128_shr_n_byte<4>),
751 MAP(imm_v128_shr_n_byte<5>),
752 MAP(imm_v128_shr_n_byte<6>),
753 MAP(imm_v128_shr_n_byte<7>),
754 MAP(imm_v128_shr_n_byte<8>),
755 MAP(imm_v128_shr_n_byte<9>),
756 MAP(imm_v128_shr_n_byte<10>),
757 MAP(imm_v128_shr_n_byte<11>),
758 MAP(imm_v128_shr_n_byte<12>),
759 MAP(imm_v128_shr_n_byte<13>),
760 MAP(imm_v128_shr_n_byte<14>),
761 MAP(imm_v128_shr_n_byte<15>),
762 MAP(imm_v128_shl_n_byte<1>),
763 MAP(imm_v128_shl_n_byte<2>),
764 MAP(imm_v128_shl_n_byte<3>),
765 MAP(imm_v128_shl_n_byte<4>),
766 MAP(imm_v128_shl_n_byte<5>),
767 MAP(imm_v128_shl_n_byte<6>),
768 MAP(imm_v128_shl_n_byte<7>),
769 MAP(imm_v128_shl_n_byte<8>),
770 MAP(imm_v128_shl_n_byte<9>),
771 MAP(imm_v128_shl_n_byte<10>),
772 MAP(imm_v128_shl_n_byte<11>),
773 MAP(imm_v128_shl_n_byte<12>),
774 MAP(imm_v128_shl_n_byte<13>),
775 MAP(imm_v128_shl_n_byte<14>),
776 MAP(imm_v128_shl_n_byte<15>),
777 MAP(imm_v128_shl_n_8<1>),
778 MAP(imm_v128_shl_n_8<2>),
779 MAP(imm_v128_shl_n_8<3>),
780 MAP(imm_v128_shl_n_8<4>),
781 MAP(imm_v128_shl_n_8<5>),
782 MAP(imm_v128_shl_n_8<6>),
783 MAP(imm_v128_shl_n_8<7>),
784 MAP(imm_v128_shr_n_u8<1>),
785 MAP(imm_v128_shr_n_u8<2>),
786 MAP(imm_v128_shr_n_u8<3>),
787 MAP(imm_v128_shr_n_u8<4>),
788 MAP(imm_v128_shr_n_u8<5>),
789 MAP(imm_v128_shr_n_u8<6>),
790 MAP(imm_v128_shr_n_u8<7>),
791 MAP(imm_v128_shr_n_s8<1>),
792 MAP(imm_v128_shr_n_s8<2>),
793 MAP(imm_v128_shr_n_s8<3>),
794 MAP(imm_v128_shr_n_s8<4>),
795 MAP(imm_v128_shr_n_s8<5>),
796 MAP(imm_v128_shr_n_s8<6>),
797 MAP(imm_v128_shr_n_s8<7>),
798 MAP(imm_v128_shl_n_16<1>),
799 MAP(imm_v128_shl_n_16<2>),
800 MAP(imm_v128_shl_n_16<4>),
801 MAP(imm_v128_shl_n_16<6>),
802 MAP(imm_v128_shl_n_16<8>),
803 MAP(imm_v128_shl_n_16<10>),
804 MAP(imm_v128_shl_n_16<12>),
805 MAP(imm_v128_shl_n_16<14>),
806 MAP(imm_v128_shr_n_u16<1>),
807 MAP(imm_v128_shr_n_u16<2>),
808 MAP(imm_v128_shr_n_u16<4>),
809 MAP(imm_v128_shr_n_u16<6>),
810 MAP(imm_v128_shr_n_u16<8>),
811 MAP(imm_v128_shr_n_u16<10>),
812 MAP(imm_v128_shr_n_u16<12>),
813 MAP(imm_v128_shr_n_u16<14>),
814 MAP(imm_v128_shr_n_s16<1>),
815 MAP(imm_v128_shr_n_s16<2>),
816 MAP(imm_v128_shr_n_s16<4>),
817 MAP(imm_v128_shr_n_s16<6>),
818 MAP(imm_v128_shr_n_s16<8>),
819 MAP(imm_v128_shr_n_s16<10>),
820 MAP(imm_v128_shr_n_s16<12>),
821 MAP(imm_v128_shr_n_s16<14>),
822 MAP(imm_v128_shl_n_32<1>),
823 MAP(imm_v128_shl_n_32<4>),
824 MAP(imm_v128_shl_n_32<8>),
825 MAP(imm_v128_shl_n_32<12>),
826 MAP(imm_v128_shl_n_32<16>),
827 MAP(imm_v128_shl_n_32<20>),
828 MAP(imm_v128_shl_n_32<24>),
829 MAP(imm_v128_shl_n_32<28>),
830 MAP(imm_v128_shr_n_u32<1>),
831 MAP(imm_v128_shr_n_u32<4>),
832 MAP(imm_v128_shr_n_u32<8>),
833 MAP(imm_v128_shr_n_u32<12>),
834 MAP(imm_v128_shr_n_u32<16>),
835 MAP(imm_v128_shr_n_u32<20>),
836 MAP(imm_v128_shr_n_u32<24>),
837 MAP(imm_v128_shr_n_u32<28>),
838 MAP(imm_v128_shr_n_s32<1>),
839 MAP(imm_v128_shr_n_s32<4>),
840 MAP(imm_v128_shr_n_s32<8>),
841 MAP(imm_v128_shr_n_s32<12>),
842 MAP(imm_v128_shr_n_s32<16>),
843 MAP(imm_v128_shr_n_s32<20>),
844 MAP(imm_v128_shr_n_s32<24>),
845 MAP(imm_v128_shr_n_s32<28>),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200846 MAP(imm_v128_shl_n_64<1>),
847 MAP(imm_v128_shl_n_64<4>),
848 MAP(imm_v128_shl_n_64<8>),
849 MAP(imm_v128_shl_n_64<12>),
850 MAP(imm_v128_shl_n_64<16>),
851 MAP(imm_v128_shl_n_64<20>),
852 MAP(imm_v128_shl_n_64<24>),
853 MAP(imm_v128_shl_n_64<28>),
854 MAP(imm_v128_shl_n_64<32>),
855 MAP(imm_v128_shl_n_64<36>),
856 MAP(imm_v128_shl_n_64<40>),
857 MAP(imm_v128_shl_n_64<44>),
858 MAP(imm_v128_shl_n_64<48>),
859 MAP(imm_v128_shl_n_64<52>),
860 MAP(imm_v128_shl_n_64<56>),
861 MAP(imm_v128_shl_n_64<60>),
862 MAP(imm_v128_shr_n_u64<1>),
863 MAP(imm_v128_shr_n_u64<4>),
864 MAP(imm_v128_shr_n_u64<8>),
865 MAP(imm_v128_shr_n_u64<12>),
866 MAP(imm_v128_shr_n_u64<16>),
867 MAP(imm_v128_shr_n_u64<20>),
868 MAP(imm_v128_shr_n_u64<24>),
869 MAP(imm_v128_shr_n_u64<28>),
870 MAP(imm_v128_shr_n_u64<32>),
871 MAP(imm_v128_shr_n_u64<36>),
872 MAP(imm_v128_shr_n_u64<40>),
873 MAP(imm_v128_shr_n_u64<44>),
874 MAP(imm_v128_shr_n_u64<48>),
875 MAP(imm_v128_shr_n_u64<52>),
876 MAP(imm_v128_shr_n_u64<56>),
877 MAP(imm_v128_shr_n_u64<60>),
878 MAP(imm_v128_shr_n_s64<1>),
879 MAP(imm_v128_shr_n_s64<4>),
880 MAP(imm_v128_shr_n_s64<8>),
881 MAP(imm_v128_shr_n_s64<12>),
882 MAP(imm_v128_shr_n_s64<16>),
883 MAP(imm_v128_shr_n_s64<20>),
884 MAP(imm_v128_shr_n_s64<24>),
885 MAP(imm_v128_shr_n_s64<28>),
886 MAP(imm_v128_shr_n_s64<32>),
887 MAP(imm_v128_shr_n_s64<36>),
888 MAP(imm_v128_shr_n_s64<40>),
889 MAP(imm_v128_shr_n_s64<44>),
890 MAP(imm_v128_shr_n_s64<48>),
891 MAP(imm_v128_shr_n_s64<52>),
892 MAP(imm_v128_shr_n_s64<56>),
893 MAP(imm_v128_shr_n_s64<60>),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200894 MAP(v128_from_v64),
895 MAP(v128_zip_8),
896 MAP(v128_zip_16),
897 MAP(v128_zip_32),
898 MAP(v128_mul_s16),
899 MAP(v128_unpack_u8_s16),
Steinar Midtskogen1b2b7392017-04-11 14:19:20 +0200900 MAP(v128_unpack_s8_s16),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200901 MAP(v128_unpack_u16_s32),
902 MAP(v128_unpack_s16_s32),
903 MAP(v128_shl_8),
904 MAP(v128_shr_u8),
905 MAP(v128_shr_s8),
906 MAP(v128_shl_16),
907 MAP(v128_shr_u16),
908 MAP(v128_shr_s16),
909 MAP(v128_shl_32),
910 MAP(v128_shr_u32),
911 MAP(v128_shr_s32),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200912 MAP(v128_shl_64),
913 MAP(v128_shr_u64),
914 MAP(v128_shr_s64),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200915 MAP(v128_hadd_u8),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200916 MAP(v128_dotp_su8),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200917 MAP(v128_dotp_s16),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200918 MAP(v128_dotp_s32),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200919 MAP(v128_low_u32),
920 MAP(v128_low_v64),
921 MAP(v128_high_v64),
922 MAP(v128_from_64),
923 MAP(v128_from_32),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200924 MAP(v128_movemask_8),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200925 MAP(v128_zero),
926 MAP(v128_dup_8),
927 MAP(v128_dup_16),
928 MAP(v128_dup_32),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200929 MAP(v128_dup_64),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200930 MAP(v128_unpacklo_u8_s16),
931 MAP(v128_unpackhi_u8_s16),
Steinar Midtskogen1b2b7392017-04-11 14:19:20 +0200932 MAP(v128_unpacklo_s8_s16),
933 MAP(v128_unpackhi_s8_s16),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200934 MAP(v128_blend_8),
Steinar Midtskogen6d2f3c22017-03-07 11:33:55 +0100935 MAP(u32_load_unaligned),
936 MAP(u32_store_unaligned),
937 MAP(v64_load_unaligned),
938 MAP(v64_store_unaligned),
939 MAP(v128_load_unaligned),
940 MAP(v128_store_unaligned),
Steinar Midtskogen1e424362016-09-30 13:14:04 +0200941 MAP(v256_sad_u8),
942 MAP(v256_ssd_u8),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200943 MAP(v256_sad_u16),
944 MAP(v256_ssd_s16),
Steinar Midtskogen1e424362016-09-30 13:14:04 +0200945 MAP(v256_hadd_u8),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200946 MAP(v256_low_u64),
947 MAP(v256_dotp_su8),
Steinar Midtskogen1e424362016-09-30 13:14:04 +0200948 MAP(v256_dotp_s16),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200949 MAP(v256_dotp_s32),
Steinar Midtskogen1e424362016-09-30 13:14:04 +0200950 MAP(v256_add_8),
951 MAP(v256_add_16),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200952 MAP(v256_sadd_s8),
953 MAP(v256_sadd_u8),
Steinar Midtskogen1e424362016-09-30 13:14:04 +0200954 MAP(v256_sadd_s16),
955 MAP(v256_add_32),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200956 MAP(v256_add_64),
Steinar Midtskogen1e424362016-09-30 13:14:04 +0200957 MAP(v256_sub_8),
958 MAP(v256_ssub_u8),
959 MAP(v256_ssub_s8),
960 MAP(v256_sub_16),
961 MAP(v256_ssub_u16),
962 MAP(v256_ssub_s16),
963 MAP(v256_sub_32),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200964 MAP(v256_sub_64),
Steinar Midtskogen1e424362016-09-30 13:14:04 +0200965 MAP(v256_ziplo_8),
966 MAP(v256_ziphi_8),
967 MAP(v256_ziplo_16),
968 MAP(v256_ziphi_16),
969 MAP(v256_ziplo_32),
970 MAP(v256_ziphi_32),
971 MAP(v256_ziplo_64),
972 MAP(v256_ziphi_64),
973 MAP(v256_unziphi_8),
974 MAP(v256_unziplo_8),
975 MAP(v256_unziphi_16),
976 MAP(v256_unziplo_16),
977 MAP(v256_unziphi_32),
978 MAP(v256_unziplo_32),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200979 MAP(v256_unziphi_64),
980 MAP(v256_unziplo_64),
981 MAP(v256_pack_s32_u16),
Steinar Midtskogen1e424362016-09-30 13:14:04 +0200982 MAP(v256_pack_s32_s16),
983 MAP(v256_pack_s16_u8),
984 MAP(v256_pack_s16_s8),
985 MAP(v256_or),
986 MAP(v256_xor),
987 MAP(v256_and),
988 MAP(v256_andn),
989 MAP(v256_mullo_s16),
990 MAP(v256_mulhi_s16),
991 MAP(v256_mullo_s32),
992 MAP(v256_madd_s16),
993 MAP(v256_madd_us8),
994 MAP(v256_avg_u8),
995 MAP(v256_rdavg_u8),
Steinar Midtskogen0578d432018-05-28 14:47:36 +0200996 MAP(v256_rdavg_u16),
Steinar Midtskogen1e424362016-09-30 13:14:04 +0200997 MAP(v256_avg_u16),
998 MAP(v256_min_u8),
999 MAP(v256_max_u8),
1000 MAP(v256_min_s8),
1001 MAP(v256_max_s8),
1002 MAP(v256_min_s16),
1003 MAP(v256_max_s16),
Steinar Midtskogen0578d432018-05-28 14:47:36 +02001004 MAP(v256_min_s32),
1005 MAP(v256_max_s32),
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001006 MAP(v256_cmpgt_s8),
1007 MAP(v256_cmplt_s8),
1008 MAP(v256_cmpeq_8),
1009 MAP(v256_cmpgt_s16),
1010 MAP(v256_cmplt_s16),
1011 MAP(v256_cmpeq_16),
Steinar Midtskogen0578d432018-05-28 14:47:36 +02001012 MAP(v256_cmpgt_s32),
1013 MAP(v256_cmplt_s32),
1014 MAP(v256_cmpeq_32),
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001015 MAP(v256_shuffle_8),
1016 MAP(v256_pshuffle_8),
Steinar Midtskogen0578d432018-05-28 14:47:36 +02001017 MAP(v256_wideshuffle_8),
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001018 MAP(imm_v256_align<1>),
1019 MAP(imm_v256_align<2>),
1020 MAP(imm_v256_align<3>),
1021 MAP(imm_v256_align<4>),
1022 MAP(imm_v256_align<5>),
1023 MAP(imm_v256_align<6>),
1024 MAP(imm_v256_align<7>),
1025 MAP(imm_v256_align<8>),
1026 MAP(imm_v256_align<9>),
1027 MAP(imm_v256_align<10>),
1028 MAP(imm_v256_align<11>),
1029 MAP(imm_v256_align<12>),
1030 MAP(imm_v256_align<13>),
1031 MAP(imm_v256_align<14>),
1032 MAP(imm_v256_align<15>),
1033 MAP(imm_v256_align<16>),
1034 MAP(imm_v256_align<17>),
1035 MAP(imm_v256_align<18>),
1036 MAP(imm_v256_align<19>),
1037 MAP(imm_v256_align<20>),
1038 MAP(imm_v256_align<21>),
1039 MAP(imm_v256_align<22>),
1040 MAP(imm_v256_align<23>),
1041 MAP(imm_v256_align<24>),
1042 MAP(imm_v256_align<25>),
1043 MAP(imm_v256_align<26>),
1044 MAP(imm_v256_align<27>),
1045 MAP(imm_v256_align<28>),
1046 MAP(imm_v256_align<29>),
1047 MAP(imm_v256_align<30>),
1048 MAP(imm_v256_align<31>),
1049 MAP(v256_from_v128),
1050 MAP(v256_zip_8),
1051 MAP(v256_zip_16),
1052 MAP(v256_zip_32),
1053 MAP(v256_mul_s16),
1054 MAP(v256_unpack_u8_s16),
1055 MAP(v256_unpack_s8_s16),
1056 MAP(v256_unpack_u16_s32),
1057 MAP(v256_unpack_s16_s32),
1058 MAP(v256_shl_8),
1059 MAP(v256_shr_u8),
1060 MAP(v256_shr_s8),
1061 MAP(v256_shl_16),
1062 MAP(v256_shr_u16),
1063 MAP(v256_shr_s16),
1064 MAP(v256_shl_32),
1065 MAP(v256_shr_u32),
1066 MAP(v256_shr_s32),
Steinar Midtskogen0578d432018-05-28 14:47:36 +02001067 MAP(v256_shl_64),
1068 MAP(v256_shr_u64),
1069 MAP(v256_shr_s64),
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001070 MAP(v256_abs_s8),
1071 MAP(v256_abs_s16),
Steinar Midtskogen0578d432018-05-28 14:47:36 +02001072 MAP(v256_padd_u8),
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001073 MAP(v256_padd_s16),
1074 MAP(v256_unpacklo_u16_s32),
1075 MAP(v256_unpacklo_s16_s32),
1076 MAP(v256_unpackhi_u16_s32),
1077 MAP(v256_unpackhi_s16_s32),
Steinar Midtskogenc5a56b92018-06-14 21:03:56 +02001078 MAP(imm_v256_shr_n_word<1>),
1079 MAP(imm_v256_shr_n_word<2>),
1080 MAP(imm_v256_shr_n_word<3>),
1081 MAP(imm_v256_shr_n_word<4>),
1082 MAP(imm_v256_shr_n_word<5>),
1083 MAP(imm_v256_shr_n_word<6>),
1084 MAP(imm_v256_shr_n_word<7>),
1085 MAP(imm_v256_shr_n_word<8>),
1086 MAP(imm_v256_shr_n_word<9>),
1087 MAP(imm_v256_shr_n_word<10>),
1088 MAP(imm_v256_shr_n_word<11>),
1089 MAP(imm_v256_shr_n_word<12>),
1090 MAP(imm_v256_shr_n_word<13>),
1091 MAP(imm_v256_shr_n_word<14>),
1092 MAP(imm_v256_shr_n_word<15>),
1093 MAP(imm_v256_shl_n_word<1>),
1094 MAP(imm_v256_shl_n_word<2>),
1095 MAP(imm_v256_shl_n_word<3>),
1096 MAP(imm_v256_shl_n_word<4>),
1097 MAP(imm_v256_shl_n_word<5>),
1098 MAP(imm_v256_shl_n_word<6>),
1099 MAP(imm_v256_shl_n_word<7>),
1100 MAP(imm_v256_shl_n_word<8>),
1101 MAP(imm_v256_shl_n_word<9>),
1102 MAP(imm_v256_shl_n_word<10>),
1103 MAP(imm_v256_shl_n_word<11>),
1104 MAP(imm_v256_shl_n_word<12>),
1105 MAP(imm_v256_shl_n_word<13>),
1106 MAP(imm_v256_shl_n_word<14>),
1107 MAP(imm_v256_shl_n_word<15>),
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001108 MAP(imm_v256_shr_n_byte<1>),
1109 MAP(imm_v256_shr_n_byte<2>),
1110 MAP(imm_v256_shr_n_byte<3>),
1111 MAP(imm_v256_shr_n_byte<4>),
1112 MAP(imm_v256_shr_n_byte<5>),
1113 MAP(imm_v256_shr_n_byte<6>),
1114 MAP(imm_v256_shr_n_byte<7>),
1115 MAP(imm_v256_shr_n_byte<8>),
1116 MAP(imm_v256_shr_n_byte<9>),
1117 MAP(imm_v256_shr_n_byte<10>),
1118 MAP(imm_v256_shr_n_byte<11>),
1119 MAP(imm_v256_shr_n_byte<12>),
1120 MAP(imm_v256_shr_n_byte<13>),
1121 MAP(imm_v256_shr_n_byte<14>),
1122 MAP(imm_v256_shr_n_byte<15>),
1123 MAP(imm_v256_shr_n_byte<16>),
1124 MAP(imm_v256_shr_n_byte<17>),
1125 MAP(imm_v256_shr_n_byte<18>),
1126 MAP(imm_v256_shr_n_byte<19>),
1127 MAP(imm_v256_shr_n_byte<20>),
1128 MAP(imm_v256_shr_n_byte<21>),
1129 MAP(imm_v256_shr_n_byte<22>),
1130 MAP(imm_v256_shr_n_byte<23>),
1131 MAP(imm_v256_shr_n_byte<24>),
1132 MAP(imm_v256_shr_n_byte<25>),
1133 MAP(imm_v256_shr_n_byte<26>),
1134 MAP(imm_v256_shr_n_byte<27>),
1135 MAP(imm_v256_shr_n_byte<28>),
1136 MAP(imm_v256_shr_n_byte<29>),
1137 MAP(imm_v256_shr_n_byte<30>),
1138 MAP(imm_v256_shr_n_byte<31>),
1139 MAP(imm_v256_shl_n_byte<1>),
1140 MAP(imm_v256_shl_n_byte<2>),
1141 MAP(imm_v256_shl_n_byte<3>),
1142 MAP(imm_v256_shl_n_byte<4>),
1143 MAP(imm_v256_shl_n_byte<5>),
1144 MAP(imm_v256_shl_n_byte<6>),
1145 MAP(imm_v256_shl_n_byte<7>),
1146 MAP(imm_v256_shl_n_byte<8>),
1147 MAP(imm_v256_shl_n_byte<9>),
1148 MAP(imm_v256_shl_n_byte<10>),
1149 MAP(imm_v256_shl_n_byte<11>),
1150 MAP(imm_v256_shl_n_byte<12>),
1151 MAP(imm_v256_shl_n_byte<13>),
1152 MAP(imm_v256_shl_n_byte<14>),
1153 MAP(imm_v256_shl_n_byte<15>),
1154 MAP(imm_v256_shl_n_byte<16>),
1155 MAP(imm_v256_shl_n_byte<17>),
1156 MAP(imm_v256_shl_n_byte<18>),
1157 MAP(imm_v256_shl_n_byte<19>),
1158 MAP(imm_v256_shl_n_byte<20>),
1159 MAP(imm_v256_shl_n_byte<21>),
1160 MAP(imm_v256_shl_n_byte<22>),
1161 MAP(imm_v256_shl_n_byte<23>),
1162 MAP(imm_v256_shl_n_byte<24>),
1163 MAP(imm_v256_shl_n_byte<25>),
1164 MAP(imm_v256_shl_n_byte<26>),
1165 MAP(imm_v256_shl_n_byte<27>),
1166 MAP(imm_v256_shl_n_byte<28>),
1167 MAP(imm_v256_shl_n_byte<29>),
1168 MAP(imm_v256_shl_n_byte<30>),
1169 MAP(imm_v256_shl_n_byte<31>),
1170 MAP(imm_v256_shl_n_8<1>),
1171 MAP(imm_v256_shl_n_8<2>),
1172 MAP(imm_v256_shl_n_8<3>),
1173 MAP(imm_v256_shl_n_8<4>),
1174 MAP(imm_v256_shl_n_8<5>),
1175 MAP(imm_v256_shl_n_8<6>),
1176 MAP(imm_v256_shl_n_8<7>),
1177 MAP(imm_v256_shr_n_u8<1>),
1178 MAP(imm_v256_shr_n_u8<2>),
1179 MAP(imm_v256_shr_n_u8<3>),
1180 MAP(imm_v256_shr_n_u8<4>),
1181 MAP(imm_v256_shr_n_u8<5>),
1182 MAP(imm_v256_shr_n_u8<6>),
1183 MAP(imm_v256_shr_n_u8<7>),
1184 MAP(imm_v256_shr_n_s8<1>),
1185 MAP(imm_v256_shr_n_s8<2>),
1186 MAP(imm_v256_shr_n_s8<3>),
1187 MAP(imm_v256_shr_n_s8<4>),
1188 MAP(imm_v256_shr_n_s8<5>),
1189 MAP(imm_v256_shr_n_s8<6>),
1190 MAP(imm_v256_shr_n_s8<7>),
1191 MAP(imm_v256_shl_n_16<1>),
1192 MAP(imm_v256_shl_n_16<2>),
1193 MAP(imm_v256_shl_n_16<4>),
1194 MAP(imm_v256_shl_n_16<6>),
1195 MAP(imm_v256_shl_n_16<8>),
1196 MAP(imm_v256_shl_n_16<10>),
1197 MAP(imm_v256_shl_n_16<12>),
1198 MAP(imm_v256_shl_n_16<14>),
1199 MAP(imm_v256_shr_n_u16<1>),
1200 MAP(imm_v256_shr_n_u16<2>),
1201 MAP(imm_v256_shr_n_u16<4>),
1202 MAP(imm_v256_shr_n_u16<6>),
1203 MAP(imm_v256_shr_n_u16<8>),
1204 MAP(imm_v256_shr_n_u16<10>),
1205 MAP(imm_v256_shr_n_u16<12>),
1206 MAP(imm_v256_shr_n_u16<14>),
1207 MAP(imm_v256_shr_n_s16<1>),
1208 MAP(imm_v256_shr_n_s16<2>),
1209 MAP(imm_v256_shr_n_s16<4>),
1210 MAP(imm_v256_shr_n_s16<6>),
1211 MAP(imm_v256_shr_n_s16<8>),
1212 MAP(imm_v256_shr_n_s16<10>),
1213 MAP(imm_v256_shr_n_s16<12>),
1214 MAP(imm_v256_shr_n_s16<14>),
1215 MAP(imm_v256_shl_n_32<1>),
1216 MAP(imm_v256_shl_n_32<4>),
1217 MAP(imm_v256_shl_n_32<8>),
1218 MAP(imm_v256_shl_n_32<12>),
1219 MAP(imm_v256_shl_n_32<16>),
1220 MAP(imm_v256_shl_n_32<20>),
1221 MAP(imm_v256_shl_n_32<24>),
1222 MAP(imm_v256_shl_n_32<28>),
1223 MAP(imm_v256_shr_n_u32<1>),
1224 MAP(imm_v256_shr_n_u32<4>),
1225 MAP(imm_v256_shr_n_u32<8>),
1226 MAP(imm_v256_shr_n_u32<12>),
1227 MAP(imm_v256_shr_n_u32<16>),
1228 MAP(imm_v256_shr_n_u32<20>),
1229 MAP(imm_v256_shr_n_u32<24>),
1230 MAP(imm_v256_shr_n_u32<28>),
1231 MAP(imm_v256_shr_n_s32<1>),
1232 MAP(imm_v256_shr_n_s32<4>),
1233 MAP(imm_v256_shr_n_s32<8>),
1234 MAP(imm_v256_shr_n_s32<12>),
1235 MAP(imm_v256_shr_n_s32<16>),
1236 MAP(imm_v256_shr_n_s32<20>),
1237 MAP(imm_v256_shr_n_s32<24>),
1238 MAP(imm_v256_shr_n_s32<28>),
Steinar Midtskogen0578d432018-05-28 14:47:36 +02001239 MAP(imm_v256_shl_n_64<1>),
1240 MAP(imm_v256_shl_n_64<4>),
1241 MAP(imm_v256_shl_n_64<8>),
1242 MAP(imm_v256_shl_n_64<12>),
1243 MAP(imm_v256_shl_n_64<16>),
1244 MAP(imm_v256_shl_n_64<20>),
1245 MAP(imm_v256_shl_n_64<24>),
1246 MAP(imm_v256_shl_n_64<28>),
1247 MAP(imm_v256_shl_n_64<32>),
1248 MAP(imm_v256_shl_n_64<36>),
1249 MAP(imm_v256_shl_n_64<40>),
1250 MAP(imm_v256_shl_n_64<44>),
1251 MAP(imm_v256_shl_n_64<48>),
1252 MAP(imm_v256_shl_n_64<52>),
1253 MAP(imm_v256_shl_n_64<56>),
1254 MAP(imm_v256_shl_n_64<60>),
1255 MAP(imm_v256_shr_n_u64<1>),
1256 MAP(imm_v256_shr_n_u64<4>),
1257 MAP(imm_v256_shr_n_u64<8>),
1258 MAP(imm_v256_shr_n_u64<12>),
1259 MAP(imm_v256_shr_n_u64<16>),
1260 MAP(imm_v256_shr_n_u64<20>),
1261 MAP(imm_v256_shr_n_u64<24>),
1262 MAP(imm_v256_shr_n_u64<28>),
1263 MAP(imm_v256_shr_n_u64<32>),
1264 MAP(imm_v256_shr_n_u64<36>),
1265 MAP(imm_v256_shr_n_u64<40>),
1266 MAP(imm_v256_shr_n_u64<44>),
1267 MAP(imm_v256_shr_n_u64<48>),
1268 MAP(imm_v256_shr_n_u64<52>),
1269 MAP(imm_v256_shr_n_u64<56>),
1270 MAP(imm_v256_shr_n_u64<60>),
1271 MAP(imm_v256_shr_n_s64<1>),
1272 MAP(imm_v256_shr_n_s64<4>),
1273 MAP(imm_v256_shr_n_s64<8>),
1274 MAP(imm_v256_shr_n_s64<12>),
1275 MAP(imm_v256_shr_n_s64<16>),
1276 MAP(imm_v256_shr_n_s64<20>),
1277 MAP(imm_v256_shr_n_s64<24>),
1278 MAP(imm_v256_shr_n_s64<28>),
1279 MAP(imm_v256_shr_n_s64<32>),
1280 MAP(imm_v256_shr_n_s64<36>),
1281 MAP(imm_v256_shr_n_s64<40>),
1282 MAP(imm_v256_shr_n_s64<44>),
1283 MAP(imm_v256_shr_n_s64<48>),
1284 MAP(imm_v256_shr_n_s64<52>),
1285 MAP(imm_v256_shr_n_s64<56>),
1286 MAP(imm_v256_shr_n_s64<60>),
1287 MAP(v256_movemask_8),
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001288 MAP(v256_zero),
1289 MAP(v256_dup_8),
1290 MAP(v256_dup_16),
1291 MAP(v256_dup_32),
Steinar Midtskogen0578d432018-05-28 14:47:36 +02001292 MAP(v256_dup_64),
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001293 MAP(v256_low_u32),
1294 MAP(v256_low_v64),
1295 MAP(v256_from_64),
1296 MAP(v256_from_v64),
1297 MAP(v256_ziplo_128),
1298 MAP(v256_ziphi_128),
1299 MAP(v256_unpacklo_u8_s16),
1300 MAP(v256_unpackhi_u8_s16),
1301 MAP(v256_unpacklo_s8_s16),
1302 MAP(v256_unpackhi_s8_s16),
Steinar Midtskogen0578d432018-05-28 14:47:36 +02001303 MAP(v256_blend_8),
James Zern664f04d2022-05-24 17:30:58 -07001304 { nullptr, nullptr, nullptr } };
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001305#undef MAP
1306
1307// Map reference functions to machine tuned functions. Since the
1308// functions depend on machine tuned types, the non-machine tuned
1309// instantiations of the test can't refer to these functions directly,
1310// so we refer to them by name and do the mapping here.
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001311void Map(const char *name, fptr *ref, fptr *simd) {
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001312 unsigned int i;
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001313 for (i = 0; m[i].name && strcmp(name, m[i].name); i++) {
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001314 }
1315
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001316 *ref = m[i].ref;
1317 *simd = m[i].simd;
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001318}
1319
Steinar Midtskogen0578d432018-05-28 14:47:36 +02001320// Used for printing errors in TestSimd1Arg, TestSimd2Args and TestSimd3Args
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001321std::string Print(const uint8_t *a, int size) {
1322 std::string text = "0x";
1323 for (int i = 0; i < size; i++) {
Steinar Midtskogen03ab5272017-01-10 07:30:47 +01001324 const uint8_t c = a[!CONFIG_BIG_ENDIAN ? size - 1 - i : i];
1325 // Same as snprintf(..., ..., "%02x", c)
1326 text += (c >> 4) + '0' + ((c >> 4) > 9) * ('a' - '0' - 10);
1327 text += (c & 15) + '0' + ((c & 15) > 9) * ('a' - '0' - 10);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001328 }
1329
1330 return text;
1331}
1332
Steinar Midtskogen0578d432018-05-28 14:47:36 +02001333// Used in TestSimd1Arg, TestSimd2Args and TestSimd3Args to restrict argument
1334// ranges
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001335void SetMask(uint8_t *s, int size, uint32_t mask, uint32_t maskwidth) {
1336 switch (maskwidth) {
1337 case 0: {
1338 break;
1339 }
1340 case 8: {
1341 for (int i = 0; i < size; i++) s[i] &= mask;
1342 break;
1343 }
1344 case 16: {
1345 uint16_t *t = reinterpret_cast<uint16_t *>(s);
1346 assert(!(reinterpret_cast<uintptr_t>(s) & 1));
1347 for (int i = 0; i < size / 2; i++) t[i] &= mask;
1348 break;
1349 }
1350 case 32: {
1351 uint32_t *t = reinterpret_cast<uint32_t *>(s);
1352 assert(!(reinterpret_cast<uintptr_t>(s) & 3));
1353 for (int i = 0; i < size / 4; i++) t[i] &= mask;
1354 break;
1355 }
1356 case 64: {
1357 uint64_t *t = reinterpret_cast<uint64_t *>(s);
1358 assert(!(reinterpret_cast<uintptr_t>(s) & 7));
1359 for (int i = 0; i < size / 8; i++) t[i] &= mask;
1360 break;
1361 }
1362 default: {
1363 FAIL() << "Unsupported mask width";
1364 break;
1365 }
1366 }
1367}
1368
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001369// We need some extra load/store functions
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001370void u64_store_aligned(void *p, uint64_t a) {
1371 v64_store_aligned(p, v64_from_64(a));
1372}
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001373void s32_store_aligned(void *p, int32_t a) {
1374 u32_store_aligned(p, static_cast<uint32_t>(a));
1375}
1376void s64_store_aligned(void *p, int64_t a) {
1377 v64_store_aligned(p, v64_from_64(static_cast<uint64_t>(a)));
1378}
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001379
1380void c_u64_store_aligned(void *p, uint64_t a) {
1381 c_v64_store_aligned(p, c_v64_from_64(a));
1382}
1383
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001384void c_s32_store_aligned(void *p, int32_t a) {
1385 c_u32_store_aligned(p, static_cast<uint32_t>(a));
1386}
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001387
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001388void c_s64_store_aligned(void *p, int64_t a) {
1389 c_v64_store_aligned(p, c_v64_from_64(static_cast<uint64_t>(a)));
1390}
1391
1392uint64_t u64_load_aligned(const void *p) {
1393 return v64_u64(v64_load_aligned(p));
1394}
1395uint16_t u16_load_aligned(const void *p) {
1396 return *(reinterpret_cast<const uint16_t *>(p));
1397}
1398uint8_t u8_load_aligned(const void *p) {
1399 return *(reinterpret_cast<const uint8_t *>(p));
1400}
1401
1402uint64_t c_u64_load_aligned(const void *p) {
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001403 return c_v64_u64(c_v64_load_aligned(p));
1404}
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001405uint16_t c_u16_load_aligned(const void *p) {
1406 return *(reinterpret_cast<const uint16_t *>(p));
1407}
1408uint8_t c_u8_load_aligned(const void *p) {
1409 return *(reinterpret_cast<const uint8_t *>(p));
1410}
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001411
Steinar Midtskogen0578d432018-05-28 14:47:36 +02001412// CompareSimd1Arg, CompareSimd2Args and CompareSimd3Args compare
1413// intrinsics taking 1, 2 or 3 arguments respectively with their
1414// corresponding C reference. Ideally, the loads and stores should
1415// have gone into the template parameter list, but v64 and v128 could
1416// be typedef'ed to the same type (which is the case on x86) and then
1417// we can't instantiate both v64 and v128, so the function return and
1418// argument types, including the always differing types in the C
1419// equivalent are used instead. The function arguments must be void
1420// pointers and then go through a cast to avoid matching errors in the
1421// branches eliminated by the typeid tests in the calling function.
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001422template <typename Ret, typename Arg, typename CRet, typename CArg>
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001423int CompareSimd1Arg(fptr store, fptr load, fptr simd, void *d, fptr c_store,
1424 fptr c_load, fptr c_simd, void *ref_d, const void *a) {
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001425 void (*const my_store)(void *, Ret) = (void (*const)(void *, Ret))store;
1426 Arg (*const my_load)(const void *) = (Arg(*const)(const void *))load;
1427 Ret (*const my_simd)(Arg) = (Ret(*const)(Arg))simd;
1428 void (*const my_c_store)(void *, CRet) = (void (*const)(void *, CRet))c_store;
1429 CArg (*const my_c_load)(const void *) = (CArg(*const)(const void *))c_load;
1430 CRet (*const my_c_simd)(CArg) = (CRet(*const)(CArg))c_simd;
1431
1432 // Call reference and intrinsic
Steinar Midtskogenc20176e2017-03-01 09:16:09 +01001433 my_c_store(ref_d, my_c_simd(my_c_load(a)));
1434 my_store(d, my_simd(my_load(a)));
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001435
1436 // Compare results
1437 return memcmp(ref_d, d, sizeof(CRet));
1438}
1439
1440template <typename Ret, typename Arg1, typename Arg2, typename CRet,
1441 typename CArg1, typename CArg2>
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001442int CompareSimd2Args(fptr store, fptr load1, fptr load2, fptr simd, void *d,
1443 fptr c_store, fptr c_load1, fptr c_load2, fptr c_simd,
1444 void *ref_d, const void *a, const void *b) {
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001445 void (*const my_store)(void *, Ret) = (void (*const)(void *, Ret))store;
1446 Arg1 (*const my_load1)(const void *) = (Arg1(*const)(const void *))load1;
1447 Arg2 (*const my_load2)(const void *) = (Arg2(*const)(const void *))load2;
1448 Ret (*const my_simd)(Arg1, Arg2) = (Ret(*const)(Arg1, Arg2))simd;
1449 void (*const my_c_store)(void *, CRet) = (void (*const)(void *, CRet))c_store;
1450 CArg1 (*const my_c_load1)(const void *) =
1451 (CArg1(*const)(const void *))c_load1;
1452 CArg2 (*const my_c_load2)(const void *) =
1453 (CArg2(*const)(const void *))c_load2;
1454 CRet (*const my_c_simd)(CArg1, CArg2) = (CRet(*const)(CArg1, CArg2))c_simd;
1455
1456 // Call reference and intrinsic
Steinar Midtskogenc20176e2017-03-01 09:16:09 +01001457 my_c_store(ref_d, my_c_simd(my_c_load1(a), my_c_load2(b)));
1458 my_store(d, my_simd(my_load1(a), my_load2(b)));
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001459
1460 // Compare results
1461 return memcmp(ref_d, d, sizeof(CRet));
1462}
1463
Steinar Midtskogen0578d432018-05-28 14:47:36 +02001464template <typename Ret, typename Arg1, typename Arg2, typename Arg3,
1465 typename CRet, typename CArg1, typename CArg2, typename CArg3>
1466int CompareSimd3Args(fptr store, fptr load1, fptr load2, fptr load3, fptr simd,
1467 void *d, fptr c_store, fptr c_load1, fptr c_load2,
1468 fptr c_load3, fptr c_simd, void *ref_d, const void *a,
1469 const void *b, const void *c) {
1470 void (*const my_store)(void *, Ret) = (void (*const)(void *, Ret))store;
1471 Arg1 (*const my_load1)(const void *) = (Arg1(*const)(const void *))load1;
1472 Arg2 (*const my_load2)(const void *) = (Arg2(*const)(const void *))load2;
1473 Arg3 (*const my_load3)(const void *) = (Arg3(*const)(const void *))load3;
1474 Ret (*const my_simd)(Arg1, Arg2, Arg3) = (Ret(*const)(Arg1, Arg2, Arg3))simd;
1475 void (*const my_c_store)(void *, CRet) = (void (*const)(void *, CRet))c_store;
1476 CArg1 (*const my_c_load1)(const void *) =
1477 (CArg1(*const)(const void *))c_load1;
1478 CArg2 (*const my_c_load2)(const void *) =
1479 (CArg2(*const)(const void *))c_load2;
Steinar Midtskogen12e7c4a2020-03-24 07:53:19 +01001480 CArg3 (*const my_c_load3)(const void *) =
1481 (CArg3(*const)(const void *))c_load3;
Steinar Midtskogen0578d432018-05-28 14:47:36 +02001482 CRet (*const my_c_simd)(CArg1, CArg2, CArg3) =
1483 (CRet(*const)(CArg1, CArg2, CArg3))c_simd;
1484
1485 // Call reference and intrinsic
1486 my_c_store(ref_d, my_c_simd(my_c_load1(a), my_c_load2(b), my_c_load3(c)));
1487 my_store(d, my_simd(my_load1(a), my_load2(b), my_load3(c)));
1488
1489 // Compare results
1490 return memcmp(ref_d, d, sizeof(CRet));
1491}
1492
Steinar Midtskogen04305c62016-09-30 13:14:04 +02001493} // namespace
1494
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001495template <typename CRet, typename CArg>
1496void TestSimd1Arg(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
1497 const char *name) {
1498 ACMRandom rnd(ACMRandom::DeterministicSeed());
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001499 fptr ref_simd;
1500 fptr simd;
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001501 int error = 0;
Steinar Midtskogen593c6cd2018-06-11 10:00:28 +02001502 DECLARE_ALIGNED(32, uint8_t, s[32]);
1503 DECLARE_ALIGNED(32, uint8_t, d[32]);
1504 DECLARE_ALIGNED(32, uint8_t, ref_d[32]);
1505 assert(sizeof(CArg) <= 32 && sizeof(CRet) <= 32);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001506 memset(ref_d, 0, sizeof(ref_d));
1507 memset(d, 0, sizeof(d));
1508
1509 Map(name, &ref_simd, &simd);
James Zern664f04d2022-05-24 17:30:58 -07001510 if (simd == nullptr || ref_simd == nullptr) {
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001511 FAIL() << "Internal error: Unknown intrinsic function " << name;
1512 }
James Zern8c636c12017-02-28 20:56:06 -08001513 for (unsigned int count = 0;
1514 count < iterations && !error && !testing::Test::HasFailure(); count++) {
Steinar Midtskogen7d532712017-03-19 21:34:47 +01001515 for (unsigned int c = 0; c < sizeof(CArg); c++) s[c] = rnd.Rand8();
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001516
1517 if (maskwidth) {
Steinar Midtskogen7d532712017-03-19 21:34:47 +01001518 SetMask(s, sizeof(CArg), mask, maskwidth);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001519 }
1520
1521 if (typeid(CRet) == typeid(c_v64) && typeid(CArg) == typeid(c_v64)) {
1522 // V64_V64
James Zern460bcce2022-06-04 15:21:23 -07001523 error = CompareSimd1Arg<v64, v64, c_v64, c_v64>(
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001524 reinterpret_cast<fptr>(v64_store_aligned),
1525 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
1526 reinterpret_cast<fptr>(c_v64_store_aligned),
1527 reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogen04305c62016-09-30 13:14:04 +02001528 } else if (typeid(CRet) == typeid(c_v64) &&
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001529 typeid(CArg) == typeid(uint8_t)) {
1530 // V64_U8
James Zern460bcce2022-06-04 15:21:23 -07001531 error = CompareSimd1Arg<v64, uint8_t, c_v64, uint8_t>(
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001532 reinterpret_cast<fptr>(v64_store_aligned),
1533 reinterpret_cast<fptr>(u8_load_aligned), simd, d,
1534 reinterpret_cast<fptr>(c_v64_store_aligned),
1535 reinterpret_cast<fptr>(c_u8_load_aligned), ref_simd, ref_d, s);
1536 } else if (typeid(CRet) == typeid(c_v64) &&
1537 typeid(CArg) == typeid(uint16_t)) {
1538 // V64_U16
James Zern460bcce2022-06-04 15:21:23 -07001539 error = CompareSimd1Arg<v64, uint16_t, c_v64, uint16_t>(
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001540 reinterpret_cast<fptr>(v64_store_aligned),
1541 reinterpret_cast<fptr>(u16_load_aligned), simd, d,
1542 reinterpret_cast<fptr>(c_v64_store_aligned),
1543 reinterpret_cast<fptr>(c_u16_load_aligned), ref_simd, ref_d, s);
1544 } else if (typeid(CRet) == typeid(c_v64) &&
Steinar Midtskogen04305c62016-09-30 13:14:04 +02001545 typeid(CArg) == typeid(uint32_t)) {
1546 // V64_U32
James Zern460bcce2022-06-04 15:21:23 -07001547 error = CompareSimd1Arg<v64, uint32_t, c_v64, uint32_t>(
Steinar Midtskogen04305c62016-09-30 13:14:04 +02001548 reinterpret_cast<fptr>(v64_store_aligned),
1549 reinterpret_cast<fptr>(u32_load_aligned), simd, d,
1550 reinterpret_cast<fptr>(c_v64_store_aligned),
1551 reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001552 } else if (typeid(CRet) == typeid(uint64_t) &&
1553 typeid(CArg) == typeid(c_v64)) {
1554 // U64_V64
James Zern460bcce2022-06-04 15:21:23 -07001555 error = CompareSimd1Arg<uint64_t, v64, uint64_t, c_v64>(
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001556 reinterpret_cast<fptr>(u64_store_aligned),
1557 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
Steinar Midtskogen04305c62016-09-30 13:14:04 +02001558 reinterpret_cast<fptr>(c_u64_store_aligned),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001559 reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001560 } else if (typeid(CRet) == typeid(int64_t) &&
1561 typeid(CArg) == typeid(c_v64)) {
1562 // S64_V64
James Zern460bcce2022-06-04 15:21:23 -07001563 error = CompareSimd1Arg<int64_t, v64, int64_t, c_v64>(
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001564 reinterpret_cast<fptr>(s64_store_aligned),
1565 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
1566 reinterpret_cast<fptr>(c_s64_store_aligned),
1567 reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogen04305c62016-09-30 13:14:04 +02001568 } else if (typeid(CRet) == typeid(uint32_t) &&
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001569 typeid(CArg) == typeid(c_v64)) {
Steinar Midtskogen04305c62016-09-30 13:14:04 +02001570 // U32_V64
James Zern460bcce2022-06-04 15:21:23 -07001571 error = CompareSimd1Arg<uint32_t, v64, uint32_t, c_v64>(
Steinar Midtskogen04305c62016-09-30 13:14:04 +02001572 reinterpret_cast<fptr>(u32_store_aligned),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001573 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
Steinar Midtskogen04305c62016-09-30 13:14:04 +02001574 reinterpret_cast<fptr>(c_u32_store_aligned),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001575 reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001576 } else if (typeid(CRet) == typeid(int32_t) &&
1577 typeid(CArg) == typeid(c_v64)) {
1578 // S32_V64
James Zern460bcce2022-06-04 15:21:23 -07001579 error = CompareSimd1Arg<int32_t, v64, int32_t, c_v64>(
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001580 reinterpret_cast<fptr>(s32_store_aligned),
1581 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
1582 reinterpret_cast<fptr>(c_s32_store_aligned),
1583 reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001584 } else if (typeid(CRet) == typeid(uint32_t) &&
1585 typeid(CArg) == typeid(c_v128)) {
1586 // U32_V128
James Zern460bcce2022-06-04 15:21:23 -07001587 error = CompareSimd1Arg<uint32_t, v128, uint32_t, c_v128>(
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001588 reinterpret_cast<fptr>(u32_store_aligned),
1589 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
1590 reinterpret_cast<fptr>(c_u32_store_aligned),
1591 reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
1592 } else if (typeid(CRet) == typeid(uint64_t) &&
1593 typeid(CArg) == typeid(c_v128)) {
1594 // U64_V128
James Zern460bcce2022-06-04 15:21:23 -07001595 error = CompareSimd1Arg<uint64_t, v128, uint64_t, c_v128>(
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001596 reinterpret_cast<fptr>(u64_store_aligned),
1597 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
1598 reinterpret_cast<fptr>(c_u64_store_aligned),
1599 reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001600 } else if (typeid(CRet) == typeid(uint64_t) &&
1601 typeid(CArg) == typeid(c_v256)) {
1602 // U64_V256
James Zern460bcce2022-06-04 15:21:23 -07001603 error = CompareSimd1Arg<uint64_t, v256, uint64_t, c_v256>(
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001604 reinterpret_cast<fptr>(u64_store_aligned),
1605 reinterpret_cast<fptr>(v256_load_aligned), simd, d,
1606 reinterpret_cast<fptr>(c_u64_store_aligned),
1607 reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001608 } else if (typeid(CRet) == typeid(c_v64) &&
1609 typeid(CArg) == typeid(c_v128)) {
1610 // V64_V128
James Zern460bcce2022-06-04 15:21:23 -07001611 error = CompareSimd1Arg<v64, v128, c_v64, c_v128>(
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001612 reinterpret_cast<fptr>(v64_store_aligned),
1613 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
1614 reinterpret_cast<fptr>(c_v64_store_aligned),
1615 reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
1616 } else if (typeid(CRet) == typeid(c_v128) &&
1617 typeid(CArg) == typeid(c_v128)) {
1618 // V128_V128
James Zern460bcce2022-06-04 15:21:23 -07001619 error = CompareSimd1Arg<v128, v128, c_v128, c_v128>(
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001620 reinterpret_cast<fptr>(v128_store_aligned),
1621 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
1622 reinterpret_cast<fptr>(c_v128_store_aligned),
1623 reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
1624 } else if (typeid(CRet) == typeid(c_v128) &&
1625 typeid(CArg) == typeid(c_v64)) {
1626 // V128_V64
James Zern460bcce2022-06-04 15:21:23 -07001627 error = CompareSimd1Arg<v128, v64, c_v128, c_v64>(
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001628 reinterpret_cast<fptr>(v128_store_aligned),
1629 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
1630 reinterpret_cast<fptr>(c_v128_store_aligned),
1631 reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
1632 } else if (typeid(CRet) == typeid(c_v128) &&
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001633 typeid(CArg) == typeid(uint8_t)) {
1634 // V128_U8
James Zern460bcce2022-06-04 15:21:23 -07001635 error = CompareSimd1Arg<v128, uint8_t, c_v128, uint8_t>(
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001636 reinterpret_cast<fptr>(v128_store_aligned),
1637 reinterpret_cast<fptr>(u8_load_aligned), simd, d,
1638 reinterpret_cast<fptr>(c_v128_store_aligned),
1639 reinterpret_cast<fptr>(c_u8_load_aligned), ref_simd, ref_d, s);
1640 } else if (typeid(CRet) == typeid(c_v128) &&
1641 typeid(CArg) == typeid(uint16_t)) {
1642 // V128_U16
James Zern460bcce2022-06-04 15:21:23 -07001643 error = CompareSimd1Arg<v128, uint16_t, c_v128, uint16_t>(
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001644 reinterpret_cast<fptr>(v128_store_aligned),
1645 reinterpret_cast<fptr>(u16_load_aligned), simd, d,
1646 reinterpret_cast<fptr>(c_v128_store_aligned),
1647 reinterpret_cast<fptr>(c_u16_load_aligned), ref_simd, ref_d, s);
1648 } else if (typeid(CRet) == typeid(c_v128) &&
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001649 typeid(CArg) == typeid(uint32_t)) {
1650 // V128_U32
James Zern460bcce2022-06-04 15:21:23 -07001651 error = CompareSimd1Arg<v128, uint32_t, c_v128, uint32_t>(
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001652 reinterpret_cast<fptr>(v128_store_aligned),
1653 reinterpret_cast<fptr>(u32_load_aligned), simd, d,
1654 reinterpret_cast<fptr>(c_v128_store_aligned),
1655 reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogen0578d432018-05-28 14:47:36 +02001656 } else if (typeid(CRet) == typeid(c_v128) &&
1657 typeid(CArg) == typeid(uint64_t)) {
1658 // V128_U64
James Zern460bcce2022-06-04 15:21:23 -07001659 error = CompareSimd1Arg<v128, uint64_t, c_v128, uint64_t>(
Steinar Midtskogen0578d432018-05-28 14:47:36 +02001660 reinterpret_cast<fptr>(v128_store_aligned),
1661 reinterpret_cast<fptr>(u64_load_aligned), simd, d,
1662 reinterpret_cast<fptr>(c_v128_store_aligned),
1663 reinterpret_cast<fptr>(c_u64_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001664 } else if (typeid(CRet) == typeid(c_v256) &&
1665 typeid(CArg) == typeid(c_v256)) {
1666 // V256_V256
James Zern460bcce2022-06-04 15:21:23 -07001667 error = CompareSimd1Arg<v256, v256, c_v256, c_v256>(
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001668 reinterpret_cast<fptr>(v256_store_aligned),
1669 reinterpret_cast<fptr>(v256_load_aligned), simd, d,
1670 reinterpret_cast<fptr>(c_v256_store_aligned),
1671 reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s);
1672 } else if (typeid(CRet) == typeid(c_v256) &&
1673 typeid(CArg) == typeid(c_v128)) {
1674 // V256_V128
James Zern460bcce2022-06-04 15:21:23 -07001675 error = CompareSimd1Arg<v256, v128, c_v256, c_v128>(
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001676 reinterpret_cast<fptr>(v256_store_aligned),
1677 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
1678 reinterpret_cast<fptr>(c_v256_store_aligned),
1679 reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
1680 } else if (typeid(CRet) == typeid(c_v256) &&
1681 typeid(CArg) == typeid(uint8_t)) {
1682 // V256_U8
James Zern460bcce2022-06-04 15:21:23 -07001683 error = CompareSimd1Arg<v256, uint8_t, c_v256, uint8_t>(
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001684 reinterpret_cast<fptr>(v256_store_aligned),
1685 reinterpret_cast<fptr>(u8_load_aligned), simd, d,
1686 reinterpret_cast<fptr>(c_v256_store_aligned),
1687 reinterpret_cast<fptr>(c_u8_load_aligned), ref_simd, ref_d, s);
1688 } else if (typeid(CRet) == typeid(c_v256) &&
1689 typeid(CArg) == typeid(uint16_t)) {
1690 // V256_U16
James Zern460bcce2022-06-04 15:21:23 -07001691 error = CompareSimd1Arg<v256, uint16_t, c_v256, uint16_t>(
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001692 reinterpret_cast<fptr>(v256_store_aligned),
1693 reinterpret_cast<fptr>(u16_load_aligned), simd, d,
1694 reinterpret_cast<fptr>(c_v256_store_aligned),
1695 reinterpret_cast<fptr>(c_u16_load_aligned), ref_simd, ref_d, s);
1696 } else if (typeid(CRet) == typeid(c_v256) &&
1697 typeid(CArg) == typeid(uint32_t)) {
1698 // V256_U32
James Zern460bcce2022-06-04 15:21:23 -07001699 error = CompareSimd1Arg<v256, uint32_t, c_v256, uint32_t>(
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001700 reinterpret_cast<fptr>(v256_store_aligned),
1701 reinterpret_cast<fptr>(u32_load_aligned), simd, d,
1702 reinterpret_cast<fptr>(c_v256_store_aligned),
1703 reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogen0578d432018-05-28 14:47:36 +02001704 } else if (typeid(CRet) == typeid(c_v256) &&
1705 typeid(CArg) == typeid(uint64_t)) {
1706 // V256_U64
James Zern460bcce2022-06-04 15:21:23 -07001707 error = CompareSimd1Arg<v256, uint64_t, c_v256, uint64_t>(
Steinar Midtskogen0578d432018-05-28 14:47:36 +02001708 reinterpret_cast<fptr>(v256_store_aligned),
1709 reinterpret_cast<fptr>(u64_load_aligned), simd, d,
1710 reinterpret_cast<fptr>(c_v256_store_aligned),
1711 reinterpret_cast<fptr>(c_u64_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001712 } else if (typeid(CRet) == typeid(uint32_t) &&
1713 typeid(CArg) == typeid(c_v256)) {
1714 // U32_V256
James Zern460bcce2022-06-04 15:21:23 -07001715 error = CompareSimd1Arg<uint32_t, v256, uint32_t, c_v256>(
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001716 reinterpret_cast<fptr>(u32_store_aligned),
1717 reinterpret_cast<fptr>(v256_load_aligned), simd, d,
1718 reinterpret_cast<fptr>(c_u32_store_aligned),
1719 reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s);
1720 } else if (typeid(CRet) == typeid(c_v64) &&
1721 typeid(CArg) == typeid(c_v256)) {
1722 // V64_V256
James Zern460bcce2022-06-04 15:21:23 -07001723 error = CompareSimd1Arg<v64, v256, c_v64, c_v256>(
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001724 reinterpret_cast<fptr>(v64_store_aligned),
1725 reinterpret_cast<fptr>(v256_load_aligned), simd, d,
1726 reinterpret_cast<fptr>(c_v64_store_aligned),
1727 reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001728 } else {
1729 FAIL() << "Internal error: Unknown intrinsic function "
1730 << typeid(CRet).name() << " " << name << "(" << typeid(CArg).name()
1731 << ")";
1732 }
1733 }
1734
1735 EXPECT_EQ(0, error) << "Error: mismatch for " << name << "("
Steinar Midtskogenc4669f62020-03-24 08:49:01 +01001736 << Print(s, sizeof(CArg)) << ") -> "
1737 << Print(d, sizeof(CRet)) << " (simd), "
1738 << Print(ref_d, sizeof(CRet)) << " (ref)";
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001739}
1740
1741template <typename CRet, typename CArg1, typename CArg2>
1742void TestSimd2Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
1743 const char *name) {
1744 ACMRandom rnd(ACMRandom::DeterministicSeed());
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001745 fptr ref_simd;
1746 fptr simd;
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001747 int error = 0;
Steinar Midtskogen593c6cd2018-06-11 10:00:28 +02001748 DECLARE_ALIGNED(32, uint8_t, s1[32]);
1749 DECLARE_ALIGNED(32, uint8_t, s2[32]);
1750 DECLARE_ALIGNED(32, uint8_t, d[32]);
1751 DECLARE_ALIGNED(32, uint8_t, ref_d[32]);
1752 assert(sizeof(CArg1) <= 32 && sizeof(CArg2) <= 32 && sizeof(CRet) <= 32);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001753 memset(ref_d, 0, sizeof(ref_d));
1754 memset(d, 0, sizeof(d));
1755
1756 Map(name, &ref_simd, &simd);
James Zern664f04d2022-05-24 17:30:58 -07001757 if (simd == nullptr || ref_simd == nullptr) {
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001758 FAIL() << "Internal error: Unknown intrinsic function " << name;
1759 }
1760
James Zern8c636c12017-02-28 20:56:06 -08001761 for (unsigned int count = 0;
1762 count < iterations && !error && !testing::Test::HasFailure(); count++) {
Steinar Midtskogen7d532712017-03-19 21:34:47 +01001763 for (unsigned int c = 0; c < sizeof(CArg1); c++) s1[c] = rnd.Rand8();
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001764
Steinar Midtskogen7d532712017-03-19 21:34:47 +01001765 for (unsigned int c = 0; c < sizeof(CArg2); c++) s2[c] = rnd.Rand8();
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001766
Steinar Midtskogen7d532712017-03-19 21:34:47 +01001767 if (maskwidth) SetMask(s2, sizeof(CArg2), mask, maskwidth);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001768
1769 if (typeid(CRet) == typeid(c_v64) && typeid(CArg1) == typeid(c_v64) &&
1770 typeid(CArg2) == typeid(c_v64)) {
1771 // V64_V64V64
James Zern460bcce2022-06-04 15:21:23 -07001772 error = CompareSimd2Args<v64, v64, v64, c_v64, c_v64, c_v64>(
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001773 reinterpret_cast<fptr>(v64_store_aligned),
1774 reinterpret_cast<fptr>(v64_load_aligned),
1775 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
1776 reinterpret_cast<fptr>(c_v64_store_aligned),
1777 reinterpret_cast<fptr>(c_v64_load_aligned),
1778 reinterpret_cast<fptr>(c_v64_load_aligned),
1779 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
Steinar Midtskogen04305c62016-09-30 13:14:04 +02001780 } else if (typeid(CRet) == typeid(c_v64) &&
1781 typeid(CArg1) == typeid(uint32_t) &&
1782 typeid(CArg2) == typeid(uint32_t)) {
1783 // V64_U32U32
James Zern460bcce2022-06-04 15:21:23 -07001784 error =
1785 CompareSimd2Args<v64, uint32_t, uint32_t, c_v64, uint32_t, uint32_t>(
1786 reinterpret_cast<fptr>(v64_store_aligned),
1787 reinterpret_cast<fptr>(u32_load_aligned),
1788 reinterpret_cast<fptr>(u32_load_aligned), simd, d,
1789 reinterpret_cast<fptr>(c_v64_store_aligned),
1790 reinterpret_cast<fptr>(c_u32_load_aligned),
1791 reinterpret_cast<fptr>(c_u32_load_aligned),
1792 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001793 } else if (typeid(CRet) == typeid(uint32_t) &&
1794 typeid(CArg1) == typeid(c_v64) &&
1795 typeid(CArg2) == typeid(c_v64)) {
1796 // U32_V64V64
James Zern460bcce2022-06-04 15:21:23 -07001797 error = CompareSimd2Args<uint32_t, v64, v64, uint32_t, c_v64, c_v64>(
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001798 reinterpret_cast<fptr>(u32_store_aligned),
1799 reinterpret_cast<fptr>(v64_load_aligned),
1800 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
1801 reinterpret_cast<fptr>(c_u32_store_aligned),
1802 reinterpret_cast<fptr>(c_v64_load_aligned),
1803 reinterpret_cast<fptr>(c_v64_load_aligned),
1804 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001805 } else if (typeid(CRet) == typeid(int64_t) &&
1806 typeid(CArg1) == typeid(c_v64) &&
1807 typeid(CArg2) == typeid(c_v64)) {
1808 // S64_V64V64
James Zern460bcce2022-06-04 15:21:23 -07001809 error = CompareSimd2Args<int64_t, v64, v64, int64_t, c_v64, c_v64>(
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001810 reinterpret_cast<fptr>(s64_store_aligned),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001811 reinterpret_cast<fptr>(v64_load_aligned),
1812 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001813 reinterpret_cast<fptr>(c_s64_store_aligned),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001814 reinterpret_cast<fptr>(c_v64_load_aligned),
1815 reinterpret_cast<fptr>(c_v64_load_aligned),
1816 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001817 } else if (typeid(CRet) == typeid(c_v64) &&
1818 typeid(CArg1) == typeid(c_v64) &&
1819 typeid(CArg2) == typeid(uint32_t)) {
1820 // V64_V64U32
James Zern460bcce2022-06-04 15:21:23 -07001821 error = CompareSimd2Args<v64, v64, uint32_t, c_v64, c_v64, uint32_t>(
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001822 reinterpret_cast<fptr>(v64_store_aligned),
1823 reinterpret_cast<fptr>(v64_load_aligned),
1824 reinterpret_cast<fptr>(u32_load_aligned), simd, d,
1825 reinterpret_cast<fptr>(c_v64_store_aligned),
1826 reinterpret_cast<fptr>(c_v64_load_aligned),
1827 reinterpret_cast<fptr>(c_u32_load_aligned),
1828 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001829 } else if (typeid(CRet) == typeid(c_v128) &&
1830 typeid(CArg1) == typeid(c_v128) &&
1831 typeid(CArg2) == typeid(c_v128)) {
1832 // V128_V128V128
James Zern460bcce2022-06-04 15:21:23 -07001833 error = CompareSimd2Args<v128, v128, v128, c_v128, c_v128, c_v128>(
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001834 reinterpret_cast<fptr>(v128_store_aligned),
1835 reinterpret_cast<fptr>(v128_load_aligned),
1836 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
1837 reinterpret_cast<fptr>(c_v128_store_aligned),
1838 reinterpret_cast<fptr>(c_v128_load_aligned),
1839 reinterpret_cast<fptr>(c_v128_load_aligned),
1840 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
1841 } else if (typeid(CRet) == typeid(uint32_t) &&
1842 typeid(CArg1) == typeid(c_v128) &&
1843 typeid(CArg2) == typeid(c_v128)) {
1844 // U32_V128V128
James Zern460bcce2022-06-04 15:21:23 -07001845 error = CompareSimd2Args<uint32_t, v128, v128, uint32_t, c_v128, c_v128>(
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001846 reinterpret_cast<fptr>(u32_store_aligned),
1847 reinterpret_cast<fptr>(v128_load_aligned),
1848 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
1849 reinterpret_cast<fptr>(c_u32_store_aligned),
1850 reinterpret_cast<fptr>(c_v128_load_aligned),
1851 reinterpret_cast<fptr>(c_v128_load_aligned),
1852 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
Steinar Midtskogen0578d432018-05-28 14:47:36 +02001853 } else if (typeid(CRet) == typeid(uint64_t) &&
1854 typeid(CArg1) == typeid(c_v128) &&
1855 typeid(CArg2) == typeid(c_v128)) {
1856 // U64_V128V128
James Zern460bcce2022-06-04 15:21:23 -07001857 error = CompareSimd2Args<uint64_t, v128, v128, uint64_t, c_v128, c_v128>(
Steinar Midtskogen0578d432018-05-28 14:47:36 +02001858 reinterpret_cast<fptr>(u64_store_aligned),
1859 reinterpret_cast<fptr>(v128_load_aligned),
1860 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
1861 reinterpret_cast<fptr>(c_u64_store_aligned),
1862 reinterpret_cast<fptr>(c_v128_load_aligned),
1863 reinterpret_cast<fptr>(c_v128_load_aligned),
1864 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001865 } else if (typeid(CRet) == typeid(int64_t) &&
1866 typeid(CArg1) == typeid(c_v128) &&
1867 typeid(CArg2) == typeid(c_v128)) {
1868 // S64_V128V128
James Zern460bcce2022-06-04 15:21:23 -07001869 error = CompareSimd2Args<int64_t, v128, v128, int64_t, c_v128, c_v128>(
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001870 reinterpret_cast<fptr>(s64_store_aligned),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001871 reinterpret_cast<fptr>(v128_load_aligned),
1872 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001873 reinterpret_cast<fptr>(c_s64_store_aligned),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001874 reinterpret_cast<fptr>(c_v128_load_aligned),
1875 reinterpret_cast<fptr>(c_v128_load_aligned),
1876 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
1877 } else if (typeid(CRet) == typeid(c_v128) &&
1878 typeid(CArg1) == typeid(uint64_t) &&
1879 typeid(CArg2) == typeid(uint64_t)) {
1880 // V128_U64U64
James Zern460bcce2022-06-04 15:21:23 -07001881 error = CompareSimd2Args<v128, uint64_t, uint64_t, c_v128, uint64_t,
1882 uint64_t>(
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001883 reinterpret_cast<fptr>(v128_store_aligned),
1884 reinterpret_cast<fptr>(u64_load_aligned),
1885 reinterpret_cast<fptr>(u64_load_aligned), simd, d,
1886 reinterpret_cast<fptr>(c_v128_store_aligned),
1887 reinterpret_cast<fptr>(c_u64_load_aligned),
1888 reinterpret_cast<fptr>(c_u64_load_aligned),
1889 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
1890 } else if (typeid(CRet) == typeid(c_v128) &&
1891 typeid(CArg1) == typeid(c_v64) &&
1892 typeid(CArg2) == typeid(c_v64)) {
1893 // V128_V64V64
James Zern460bcce2022-06-04 15:21:23 -07001894 error = CompareSimd2Args<v128, v64, v64, c_v128, c_v64, c_v64>(
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001895 reinterpret_cast<fptr>(v128_store_aligned),
1896 reinterpret_cast<fptr>(v64_load_aligned),
1897 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
1898 reinterpret_cast<fptr>(c_v128_store_aligned),
1899 reinterpret_cast<fptr>(c_v64_load_aligned),
1900 reinterpret_cast<fptr>(c_v64_load_aligned),
1901 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
1902 } else if (typeid(CRet) == typeid(c_v128) &&
1903 typeid(CArg1) == typeid(c_v128) &&
1904 typeid(CArg2) == typeid(uint32_t)) {
1905 // V128_V128U32
James Zern460bcce2022-06-04 15:21:23 -07001906 error = CompareSimd2Args<v128, v128, uint32_t, c_v128, c_v128, uint32_t>(
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001907 reinterpret_cast<fptr>(v128_store_aligned),
1908 reinterpret_cast<fptr>(v128_load_aligned),
1909 reinterpret_cast<fptr>(u32_load_aligned), simd, d,
1910 reinterpret_cast<fptr>(c_v128_store_aligned),
1911 reinterpret_cast<fptr>(c_v128_load_aligned),
1912 reinterpret_cast<fptr>(c_u32_load_aligned),
1913 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001914 } else if (typeid(CRet) == typeid(c_v256) &&
1915 typeid(CArg1) == typeid(c_v256) &&
1916 typeid(CArg2) == typeid(c_v256)) {
1917 // V256_V256V256
James Zern460bcce2022-06-04 15:21:23 -07001918 error = CompareSimd2Args<v256, v256, v256, c_v256, c_v256, c_v256>(
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001919 reinterpret_cast<fptr>(v256_store_aligned),
1920 reinterpret_cast<fptr>(v256_load_aligned),
1921 reinterpret_cast<fptr>(v256_load_aligned), simd, d,
1922 reinterpret_cast<fptr>(c_v256_store_aligned),
1923 reinterpret_cast<fptr>(c_v256_load_aligned),
1924 reinterpret_cast<fptr>(c_v256_load_aligned),
1925 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
Steinar Midtskogen0578d432018-05-28 14:47:36 +02001926 } else if (typeid(CRet) == typeid(uint64_t) &&
1927 typeid(CArg1) == typeid(c_v256) &&
1928 typeid(CArg2) == typeid(c_v256)) {
1929 // U64_V256V256
James Zern460bcce2022-06-04 15:21:23 -07001930 error = CompareSimd2Args<uint64_t, v256, v256, uint64_t, c_v256, c_v256>(
Steinar Midtskogen0578d432018-05-28 14:47:36 +02001931 reinterpret_cast<fptr>(u64_store_aligned),
1932 reinterpret_cast<fptr>(v256_load_aligned),
1933 reinterpret_cast<fptr>(v256_load_aligned), simd, d,
1934 reinterpret_cast<fptr>(c_u64_store_aligned),
1935 reinterpret_cast<fptr>(c_v256_load_aligned),
1936 reinterpret_cast<fptr>(c_v256_load_aligned),
1937 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001938 } else if (typeid(CRet) == typeid(int64_t) &&
1939 typeid(CArg1) == typeid(c_v256) &&
1940 typeid(CArg2) == typeid(c_v256)) {
1941 // S64_V256V256
James Zern460bcce2022-06-04 15:21:23 -07001942 error = CompareSimd2Args<int64_t, v256, v256, int64_t, c_v256, c_v256>(
James Zern910f479b2017-06-30 17:13:41 -07001943 reinterpret_cast<fptr>(s64_store_aligned),
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001944 reinterpret_cast<fptr>(v256_load_aligned),
1945 reinterpret_cast<fptr>(v256_load_aligned), simd, d,
James Zern910f479b2017-06-30 17:13:41 -07001946 reinterpret_cast<fptr>(c_s64_store_aligned),
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001947 reinterpret_cast<fptr>(c_v256_load_aligned),
1948 reinterpret_cast<fptr>(c_v256_load_aligned),
1949 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
1950 } else if (typeid(CRet) == typeid(uint32_t) &&
1951 typeid(CArg1) == typeid(c_v256) &&
1952 typeid(CArg2) == typeid(c_v256)) {
1953 // U32_V256V256
James Zern460bcce2022-06-04 15:21:23 -07001954 error = CompareSimd2Args<uint32_t, v256, v256, uint32_t, c_v256, c_v256>(
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001955 reinterpret_cast<fptr>(u32_store_aligned),
1956 reinterpret_cast<fptr>(v256_load_aligned),
1957 reinterpret_cast<fptr>(v256_load_aligned), simd, d,
1958 reinterpret_cast<fptr>(c_u32_store_aligned),
1959 reinterpret_cast<fptr>(c_v256_load_aligned),
1960 reinterpret_cast<fptr>(c_v256_load_aligned),
1961 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
1962 } else if (typeid(CRet) == typeid(c_v256) &&
1963 typeid(CArg1) == typeid(c_v128) &&
1964 typeid(CArg2) == typeid(c_v128)) {
1965 // V256_V128V128
James Zern460bcce2022-06-04 15:21:23 -07001966 error = CompareSimd2Args<v256, v128, v128, c_v256, c_v128, c_v128>(
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001967 reinterpret_cast<fptr>(v256_store_aligned),
1968 reinterpret_cast<fptr>(v128_load_aligned),
1969 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
1970 reinterpret_cast<fptr>(c_v256_store_aligned),
1971 reinterpret_cast<fptr>(c_v128_load_aligned),
1972 reinterpret_cast<fptr>(c_v128_load_aligned),
1973 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
1974 } else if (typeid(CRet) == typeid(c_v256) &&
1975 typeid(CArg1) == typeid(c_v256) &&
1976 typeid(CArg2) == typeid(uint32_t)) {
1977 // V256_V256U32
James Zern460bcce2022-06-04 15:21:23 -07001978 error = CompareSimd2Args<v256, v256, uint32_t, c_v256, c_v256, uint32_t>(
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001979 reinterpret_cast<fptr>(v256_store_aligned),
1980 reinterpret_cast<fptr>(v256_load_aligned),
1981 reinterpret_cast<fptr>(u32_load_aligned), simd, d,
1982 reinterpret_cast<fptr>(c_v256_store_aligned),
1983 reinterpret_cast<fptr>(c_v256_load_aligned),
1984 reinterpret_cast<fptr>(c_u32_load_aligned),
1985 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
1986
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001987 } else {
1988 FAIL() << "Internal error: Unknown intrinsic function "
1989 << typeid(CRet).name() << " " << name << "("
1990 << typeid(CArg1).name() << ", " << typeid(CArg2).name() << ")";
1991 }
1992 }
1993
1994 EXPECT_EQ(0, error) << "Error: mismatch for " << name << "("
Steinar Midtskogenc4669f62020-03-24 08:49:01 +01001995 << Print(s1, sizeof(CArg1)) << ", "
1996 << Print(s2, sizeof(CArg2)) << ") -> "
1997 << Print(d, sizeof(CRet)) << " (simd), "
1998 << Print(ref_d, sizeof(CRet)) << " (ref)";
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001999}
2000
Steinar Midtskogen0578d432018-05-28 14:47:36 +02002001template <typename CRet, typename CArg1, typename CArg2, typename CArg3>
2002void TestSimd3Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
2003 const char *name) {
2004 ACMRandom rnd(ACMRandom::DeterministicSeed());
2005 fptr ref_simd;
2006 fptr simd;
2007 int error = 0;
Steinar Midtskogen593c6cd2018-06-11 10:00:28 +02002008 DECLARE_ALIGNED(32, uint8_t, s1[32]);
2009 DECLARE_ALIGNED(32, uint8_t, s2[32]);
2010 DECLARE_ALIGNED(32, uint8_t, s3[32]);
2011 DECLARE_ALIGNED(32, uint8_t, d[32]);
2012 DECLARE_ALIGNED(32, uint8_t, ref_d[32]);
2013 assert(sizeof(CArg1) <= 32 && sizeof(CArg2) <= 32 && sizeof(CArg3) <= 32 &&
2014 sizeof(CRet) <= 32);
Steinar Midtskogen0578d432018-05-28 14:47:36 +02002015 memset(ref_d, 0, sizeof(ref_d));
2016 memset(d, 0, sizeof(d));
2017
2018 Map(name, &ref_simd, &simd);
James Zern664f04d2022-05-24 17:30:58 -07002019 if (simd == nullptr || ref_simd == nullptr) {
Steinar Midtskogen0578d432018-05-28 14:47:36 +02002020 FAIL() << "Internal error: Unknown intrinsic function " << name;
2021 }
2022
2023 for (unsigned int count = 0;
2024 count < iterations && !error && !testing::Test::HasFailure(); count++) {
2025 for (unsigned int c = 0; c < sizeof(CArg1); c++) s1[c] = rnd.Rand8();
2026
2027 for (unsigned int c = 0; c < sizeof(CArg2); c++) s2[c] = rnd.Rand8();
2028
2029 for (unsigned int c = 0; c < sizeof(CArg3); c++) s3[c] = rnd.Rand8();
2030
2031 if (maskwidth) SetMask(s3, sizeof(CArg3), mask, maskwidth);
2032
2033 if (typeid(CRet) == typeid(c_v128) && typeid(CArg1) == typeid(c_v128) &&
2034 typeid(CArg2) == typeid(c_v128) && typeid(CArg3) == typeid(c_v128)) {
2035 // V128_V128V128V128
James Zern460bcce2022-06-04 15:21:23 -07002036 error = CompareSimd3Args<v128, v128, v128, v128, c_v128, c_v128, c_v128,
2037 c_v128>(
2038 reinterpret_cast<fptr>(v128_store_aligned),
2039 reinterpret_cast<fptr>(v128_load_aligned),
2040 reinterpret_cast<fptr>(v128_load_aligned),
2041 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
2042 reinterpret_cast<fptr>(c_v128_store_aligned),
2043 reinterpret_cast<fptr>(c_v128_load_aligned),
2044 reinterpret_cast<fptr>(c_v128_load_aligned),
2045 reinterpret_cast<fptr>(c_v128_load_aligned),
2046 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2, s3);
Steinar Midtskogen0578d432018-05-28 14:47:36 +02002047 } else if (typeid(CRet) == typeid(c_v256) &&
2048 typeid(CArg1) == typeid(c_v256) &&
2049 typeid(CArg2) == typeid(c_v256) &&
2050 typeid(CArg3) == typeid(c_v256)) {
2051 // V256_V256V256V256
James Zern460bcce2022-06-04 15:21:23 -07002052 error = CompareSimd3Args<v256, v256, v256, v256, c_v256, c_v256, c_v256,
2053 c_v256>(
2054 reinterpret_cast<fptr>(v256_store_aligned),
2055 reinterpret_cast<fptr>(v256_load_aligned),
2056 reinterpret_cast<fptr>(v256_load_aligned),
2057 reinterpret_cast<fptr>(v256_load_aligned), simd, d,
2058 reinterpret_cast<fptr>(c_v256_store_aligned),
2059 reinterpret_cast<fptr>(c_v256_load_aligned),
2060 reinterpret_cast<fptr>(c_v256_load_aligned),
2061 reinterpret_cast<fptr>(c_v256_load_aligned),
2062 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2, s3);
Steinar Midtskogen0578d432018-05-28 14:47:36 +02002063 } else {
2064 FAIL() << "Internal error: Unknown intrinsic function "
2065 << typeid(CRet).name() << " " << name << "("
2066 << typeid(CArg1).name() << ", " << typeid(CArg2).name() << ", "
2067 << typeid(CArg3).name() << ")";
2068 }
2069 }
2070
2071 EXPECT_EQ(0, error) << "Error: mismatch for " << name << "("
Steinar Midtskogenc4669f62020-03-24 08:49:01 +01002072 << Print(s1, sizeof(CArg1)) << ", "
2073 << Print(s2, sizeof(CArg2)) << ", "
2074 << Print(s3, sizeof(CArg3)) << ") -> "
2075 << Print(d, sizeof(CRet)) << " (simd), "
2076 << Print(ref_d, sizeof(CRet)) << " (ref)";
Steinar Midtskogen0578d432018-05-28 14:47:36 +02002077}
2078
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01002079// Instantiations to make the functions callable from another files
Steinar Midtskogen6c795762017-03-07 20:55:48 +01002080template void TestSimd1Arg<c_v64, uint8_t>(uint32_t, uint32_t, uint32_t,
2081 const char *);
2082template void TestSimd1Arg<c_v64, uint16_t>(uint32_t, uint32_t, uint32_t,
2083 const char *);
Steinar Midtskogen04305c62016-09-30 13:14:04 +02002084template void TestSimd1Arg<c_v64, uint32_t>(uint32_t, uint32_t, uint32_t,
2085 const char *);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01002086template void TestSimd1Arg<c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
2087 const char *);
Steinar Midtskogen04305c62016-09-30 13:14:04 +02002088template void TestSimd1Arg<uint32_t, c_v64>(uint32_t, uint32_t, uint32_t,
2089 const char *);
Steinar Midtskogen6c795762017-03-07 20:55:48 +01002090template void TestSimd1Arg<int32_t, c_v64>(uint32_t, uint32_t, uint32_t,
2091 const char *);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01002092template void TestSimd1Arg<uint64_t, c_v64>(uint32_t, uint32_t, uint32_t,
2093 const char *);
Steinar Midtskogen6c795762017-03-07 20:55:48 +01002094template void TestSimd1Arg<int64_t, c_v64>(uint32_t, uint32_t, uint32_t,
2095 const char *);
Steinar Midtskogen04305c62016-09-30 13:14:04 +02002096template void TestSimd2Args<c_v64, uint32_t, uint32_t>(uint32_t, uint32_t,
2097 uint32_t, const char *);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01002098template void TestSimd2Args<c_v64, c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
2099 const char *);
2100template void TestSimd2Args<c_v64, c_v64, uint32_t>(uint32_t, uint32_t,
2101 uint32_t, const char *);
2102template void TestSimd2Args<int64_t, c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
2103 const char *);
2104template void TestSimd2Args<uint32_t, c_v64, c_v64>(uint32_t, uint32_t,
2105 uint32_t, const char *);
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02002106template void TestSimd1Arg<c_v128, c_v128>(uint32_t, uint32_t, uint32_t,
2107 const char *);
Steinar Midtskogen6c795762017-03-07 20:55:48 +01002108template void TestSimd1Arg<c_v128, uint8_t>(uint32_t, uint32_t, uint32_t,
2109 const char *);
2110template void TestSimd1Arg<c_v128, uint16_t>(uint32_t, uint32_t, uint32_t,
2111 const char *);
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02002112template void TestSimd1Arg<c_v128, uint32_t>(uint32_t, uint32_t, uint32_t,
2113 const char *);
Steinar Midtskogen0578d432018-05-28 14:47:36 +02002114template void TestSimd1Arg<c_v128, uint64_t>(uint32_t, uint32_t, uint32_t,
2115 const char *);
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02002116template void TestSimd1Arg<c_v128, c_v64>(uint32_t, uint32_t, uint32_t,
2117 const char *);
2118template void TestSimd1Arg<uint32_t, c_v128>(uint32_t, uint32_t, uint32_t,
2119 const char *);
2120template void TestSimd1Arg<uint64_t, c_v128>(uint32_t, uint32_t, uint32_t,
2121 const char *);
2122template void TestSimd1Arg<c_v64, c_v128>(uint32_t, uint32_t, uint32_t,
2123 const char *);
2124template void TestSimd2Args<c_v128, c_v128, c_v128>(uint32_t, uint32_t,
2125 uint32_t, const char *);
2126template void TestSimd2Args<c_v128, c_v128, uint32_t>(uint32_t, uint32_t,
2127 uint32_t, const char *);
2128template void TestSimd2Args<c_v128, uint64_t, uint64_t>(uint32_t, uint32_t,
2129 uint32_t, const char *);
2130template void TestSimd2Args<c_v128, c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
2131 const char *);
Steinar Midtskogen0578d432018-05-28 14:47:36 +02002132template void TestSimd2Args<uint64_t, c_v128, c_v128>(uint32_t, uint32_t,
2133 uint32_t, const char *);
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02002134template void TestSimd2Args<int64_t, c_v128, c_v128>(uint32_t, uint32_t,
2135 uint32_t, const char *);
2136template void TestSimd2Args<uint32_t, c_v128, c_v128>(uint32_t, uint32_t,
2137 uint32_t, const char *);
Steinar Midtskogen0578d432018-05-28 14:47:36 +02002138template void TestSimd3Args<c_v128, c_v128, c_v128, c_v128>(uint32_t, uint32_t,
2139 uint32_t,
2140 const char *);
Steinar Midtskogen1e424362016-09-30 13:14:04 +02002141template void TestSimd1Arg<c_v256, c_v128>(uint32_t, uint32_t, uint32_t,
2142 const char *);
2143template void TestSimd1Arg<c_v256, c_v256>(uint32_t, uint32_t, uint32_t,
2144 const char *);
2145template void TestSimd1Arg<uint64_t, c_v256>(uint32_t, uint32_t, uint32_t,
2146 const char *);
2147template void TestSimd1Arg<c_v256, uint8_t>(uint32_t, uint32_t, uint32_t,
2148 const char *);
2149template void TestSimd1Arg<c_v256, uint16_t>(uint32_t, uint32_t, uint32_t,
2150 const char *);
2151template void TestSimd1Arg<c_v256, uint32_t>(uint32_t, uint32_t, uint32_t,
2152 const char *);
Steinar Midtskogen0578d432018-05-28 14:47:36 +02002153template void TestSimd1Arg<c_v256, uint64_t>(uint32_t, uint32_t, uint32_t,
2154 const char *);
Steinar Midtskogen1e424362016-09-30 13:14:04 +02002155template void TestSimd1Arg<uint32_t, c_v256>(uint32_t, uint32_t, uint32_t,
2156 const char *);
2157template void TestSimd1Arg<c_v64, c_v256>(uint32_t, uint32_t, uint32_t,
2158 const char *);
2159template void TestSimd2Args<c_v256, c_v128, c_v128>(uint32_t, uint32_t,
2160 uint32_t, const char *);
2161template void TestSimd2Args<c_v256, c_v256, c_v256>(uint32_t, uint32_t,
2162 uint32_t, const char *);
2163template void TestSimd2Args<c_v256, c_v256, uint32_t>(uint32_t, uint32_t,
2164 uint32_t, const char *);
Steinar Midtskogen0578d432018-05-28 14:47:36 +02002165template void TestSimd2Args<uint64_t, c_v256, c_v256>(uint32_t, uint32_t,
2166 uint32_t, const char *);
Steinar Midtskogen1e424362016-09-30 13:14:04 +02002167template void TestSimd2Args<int64_t, c_v256, c_v256>(uint32_t, uint32_t,
2168 uint32_t, const char *);
2169template void TestSimd2Args<uint32_t, c_v256, c_v256>(uint32_t, uint32_t,
2170 uint32_t, const char *);
Steinar Midtskogen0578d432018-05-28 14:47:36 +02002171template void TestSimd3Args<c_v256, c_v256, c_v256, c_v256>(uint32_t, uint32_t,
2172 uint32_t,
2173 const char *);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01002174
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01002175} // namespace SIMD_NAMESPACE