blob: 46f46d7516ef028139819788134bd408167caf4f [file] [log] [blame]
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001/*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10*/
11
12#include <assert.h>
13#include <string>
14#include "./aom_dsp_rtcd.h"
15#include "test/acm_random.h"
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +010016#include "aom_dsp/aom_simd.h"
Steinar Midtskogen04305c62016-09-30 13:14:04 +020017#undef SIMD_INLINE
18#define SIMD_INLINE static // Don't enforce inlining
Steinar Midtskogen1e424362016-09-30 13:14:04 +020019#include "aom_dsp/simd/v256_intrinsics_c.h"
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +010020
21// Machine tuned code goes into this file. This file is included from
22// simd_cmp_sse2.cc, simd_cmp_ssse3.cc etc which define the macros
23// ARCH (=neon, sse2, ssse3, etc), SIMD_NAMESPACE and ARCH_POSTFIX().
24
25using libaom_test::ACMRandom;
26
27namespace SIMD_NAMESPACE {
28
29// Wrap templates around intrinsics using immediate values
30template <int shift>
31v64 imm_v64_shl_n_byte(v64 a) {
32 return v64_shl_n_byte(a, shift);
33}
34template <int shift>
35v64 imm_v64_shr_n_byte(v64 a) {
36 return v64_shr_n_byte(a, shift);
37}
38template <int shift>
39v64 imm_v64_shl_n_8(v64 a) {
40 return v64_shl_n_8(a, shift);
41}
42template <int shift>
43v64 imm_v64_shr_n_u8(v64 a) {
44 return v64_shr_n_u8(a, shift);
45}
46template <int shift>
47v64 imm_v64_shr_n_s8(v64 a) {
48 return v64_shr_n_s8(a, shift);
49}
50template <int shift>
51v64 imm_v64_shl_n_16(v64 a) {
52 return v64_shl_n_16(a, shift);
53}
54template <int shift>
55v64 imm_v64_shr_n_u16(v64 a) {
56 return v64_shr_n_u16(a, shift);
57}
58template <int shift>
59v64 imm_v64_shr_n_s16(v64 a) {
60 return v64_shr_n_s16(a, shift);
61}
62template <int shift>
63v64 imm_v64_shl_n_32(v64 a) {
64 return v64_shl_n_32(a, shift);
65}
66template <int shift>
67v64 imm_v64_shr_n_u32(v64 a) {
68 return v64_shr_n_u32(a, shift);
69}
70template <int shift>
71v64 imm_v64_shr_n_s32(v64 a) {
72 return v64_shr_n_s32(a, shift);
73}
74template <int shift>
75v64 imm_v64_align(v64 a, v64 b) {
76 return v64_align(a, b, shift);
77}
78
79// Wrap templates around corresponding C implementations of the above
80template <int shift>
81c_v64 c_imm_v64_shl_n_byte(c_v64 a) {
82 return c_v64_shl_n_byte(a, shift);
83}
84template <int shift>
85c_v64 c_imm_v64_shr_n_byte(c_v64 a) {
86 return c_v64_shr_n_byte(a, shift);
87}
88template <int shift>
89c_v64 c_imm_v64_shl_n_8(c_v64 a) {
90 return c_v64_shl_n_8(a, shift);
91}
92template <int shift>
93c_v64 c_imm_v64_shr_n_u8(c_v64 a) {
94 return c_v64_shr_n_u8(a, shift);
95}
96template <int shift>
97c_v64 c_imm_v64_shr_n_s8(c_v64 a) {
98 return c_v64_shr_n_s8(a, shift);
99}
100template <int shift>
101c_v64 c_imm_v64_shl_n_16(c_v64 a) {
102 return c_v64_shl_n_16(a, shift);
103}
104template <int shift>
105c_v64 c_imm_v64_shr_n_u16(c_v64 a) {
106 return c_v64_shr_n_u16(a, shift);
107}
108template <int shift>
109c_v64 c_imm_v64_shr_n_s16(c_v64 a) {
110 return c_v64_shr_n_s16(a, shift);
111}
112template <int shift>
113c_v64 c_imm_v64_shl_n_32(c_v64 a) {
114 return c_v64_shl_n_32(a, shift);
115}
116template <int shift>
117c_v64 c_imm_v64_shr_n_u32(c_v64 a) {
118 return c_v64_shr_n_u32(a, shift);
119}
120template <int shift>
121c_v64 c_imm_v64_shr_n_s32(c_v64 a) {
122 return c_v64_shr_n_s32(a, shift);
123}
124template <int shift>
125c_v64 c_imm_v64_align(c_v64 a, c_v64 b) {
126 return c_v64_align(a, b, shift);
127}
128
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200129template <int shift>
130v128 imm_v128_shl_n_byte(v128 a) {
131 return v128_shl_n_byte(a, shift);
132}
133template <int shift>
134v128 imm_v128_shr_n_byte(v128 a) {
135 return v128_shr_n_byte(a, shift);
136}
137template <int shift>
138v128 imm_v128_shl_n_8(v128 a) {
139 return v128_shl_n_8(a, shift);
140}
141template <int shift>
142v128 imm_v128_shr_n_u8(v128 a) {
143 return v128_shr_n_u8(a, shift);
144}
145template <int shift>
146v128 imm_v128_shr_n_s8(v128 a) {
147 return v128_shr_n_s8(a, shift);
148}
149template <int shift>
150v128 imm_v128_shl_n_16(v128 a) {
151 return v128_shl_n_16(a, shift);
152}
153template <int shift>
154v128 imm_v128_shr_n_u16(v128 a) {
155 return v128_shr_n_u16(a, shift);
156}
157template <int shift>
158v128 imm_v128_shr_n_s16(v128 a) {
159 return v128_shr_n_s16(a, shift);
160}
161template <int shift>
162v128 imm_v128_shl_n_32(v128 a) {
163 return v128_shl_n_32(a, shift);
164}
165template <int shift>
166v128 imm_v128_shr_n_u32(v128 a) {
167 return v128_shr_n_u32(a, shift);
168}
169template <int shift>
170v128 imm_v128_shr_n_s32(v128 a) {
171 return v128_shr_n_s32(a, shift);
172}
173template <int shift>
174v128 imm_v128_align(v128 a, v128 b) {
175 return v128_align(a, b, shift);
176}
177
178template <int shift>
179c_v128 c_imm_v128_shl_n_byte(c_v128 a) {
180 return c_v128_shl_n_byte(a, shift);
181}
182template <int shift>
183c_v128 c_imm_v128_shr_n_byte(c_v128 a) {
184 return c_v128_shr_n_byte(a, shift);
185}
186template <int shift>
187c_v128 c_imm_v128_shl_n_8(c_v128 a) {
188 return c_v128_shl_n_8(a, shift);
189}
190template <int shift>
191c_v128 c_imm_v128_shr_n_u8(c_v128 a) {
192 return c_v128_shr_n_u8(a, shift);
193}
194template <int shift>
195c_v128 c_imm_v128_shr_n_s8(c_v128 a) {
196 return c_v128_shr_n_s8(a, shift);
197}
198template <int shift>
199c_v128 c_imm_v128_shl_n_16(c_v128 a) {
200 return c_v128_shl_n_16(a, shift);
201}
202template <int shift>
203c_v128 c_imm_v128_shr_n_u16(c_v128 a) {
204 return c_v128_shr_n_u16(a, shift);
205}
206template <int shift>
207c_v128 c_imm_v128_shr_n_s16(c_v128 a) {
208 return c_v128_shr_n_s16(a, shift);
209}
210template <int shift>
211c_v128 c_imm_v128_shl_n_32(c_v128 a) {
212 return c_v128_shl_n_32(a, shift);
213}
214template <int shift>
215c_v128 c_imm_v128_shr_n_u32(c_v128 a) {
216 return c_v128_shr_n_u32(a, shift);
217}
218template <int shift>
219c_v128 c_imm_v128_shr_n_s32(c_v128 a) {
220 return c_v128_shr_n_s32(a, shift);
221}
222template <int shift>
223c_v128 c_imm_v128_align(c_v128 a, c_v128 b) {
224 return c_v128_align(a, b, shift);
225}
226
Steinar Midtskogen1e424362016-09-30 13:14:04 +0200227template <int shift>
228v256 imm_v256_shl_n_byte(v256 a) {
229 return v256_shl_n_byte(a, shift);
230}
231template <int shift>
232v256 imm_v256_shr_n_byte(v256 a) {
233 return v256_shr_n_byte(a, shift);
234}
235template <int shift>
236v256 imm_v256_shl_n_8(v256 a) {
237 return v256_shl_n_8(a, shift);
238}
239template <int shift>
240v256 imm_v256_shr_n_u8(v256 a) {
241 return v256_shr_n_u8(a, shift);
242}
243template <int shift>
244v256 imm_v256_shr_n_s8(v256 a) {
245 return v256_shr_n_s8(a, shift);
246}
247template <int shift>
248v256 imm_v256_shl_n_16(v256 a) {
249 return v256_shl_n_16(a, shift);
250}
251template <int shift>
252v256 imm_v256_shr_n_u16(v256 a) {
253 return v256_shr_n_u16(a, shift);
254}
255template <int shift>
256v256 imm_v256_shr_n_s16(v256 a) {
257 return v256_shr_n_s16(a, shift);
258}
259template <int shift>
260v256 imm_v256_shl_n_32(v256 a) {
261 return v256_shl_n_32(a, shift);
262}
263template <int shift>
264v256 imm_v256_shr_n_u32(v256 a) {
265 return v256_shr_n_u32(a, shift);
266}
267template <int shift>
268v256 imm_v256_shr_n_s32(v256 a) {
269 return v256_shr_n_s32(a, shift);
270}
271template <int shift>
272v256 imm_v256_align(v256 a, v256 b) {
273 return v256_align(a, b, shift);
274}
275
276template <int shift>
277c_v256 c_imm_v256_shl_n_byte(c_v256 a) {
278 return c_v256_shl_n_byte(a, shift);
279}
280template <int shift>
281c_v256 c_imm_v256_shr_n_byte(c_v256 a) {
282 return c_v256_shr_n_byte(a, shift);
283}
284template <int shift>
285c_v256 c_imm_v256_shl_n_8(c_v256 a) {
286 return c_v256_shl_n_8(a, shift);
287}
288template <int shift>
289c_v256 c_imm_v256_shr_n_u8(c_v256 a) {
290 return c_v256_shr_n_u8(a, shift);
291}
292template <int shift>
293c_v256 c_imm_v256_shr_n_s8(c_v256 a) {
294 return c_v256_shr_n_s8(a, shift);
295}
296template <int shift>
297c_v256 c_imm_v256_shl_n_16(c_v256 a) {
298 return c_v256_shl_n_16(a, shift);
299}
300template <int shift>
301c_v256 c_imm_v256_shr_n_u16(c_v256 a) {
302 return c_v256_shr_n_u16(a, shift);
303}
304template <int shift>
305c_v256 c_imm_v256_shr_n_s16(c_v256 a) {
306 return c_v256_shr_n_s16(a, shift);
307}
308template <int shift>
309c_v256 c_imm_v256_shl_n_32(c_v256 a) {
310 return c_v256_shl_n_32(a, shift);
311}
312template <int shift>
313c_v256 c_imm_v256_shr_n_u32(c_v256 a) {
314 return c_v256_shr_n_u32(a, shift);
315}
316template <int shift>
317c_v256 c_imm_v256_shr_n_s32(c_v256 a) {
318 return c_v256_shr_n_s32(a, shift);
319}
320template <int shift>
321c_v256 c_imm_v256_align(c_v256 a, c_v256 b) {
322 return c_v256_align(a, b, shift);
323}
324
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100325// Wrappers around the the SAD and SSD functions
326uint32_t v64_sad_u8(v64 a, v64 b) {
327 return v64_sad_u8_sum(::v64_sad_u8(v64_sad_u8_init(), a, b));
328}
329uint32_t v64_ssd_u8(v64 a, v64 b) {
330 return v64_ssd_u8_sum(::v64_ssd_u8(v64_ssd_u8_init(), a, b));
331}
332
333uint32_t c_v64_sad_u8(c_v64 a, c_v64 b) {
334 return c_v64_sad_u8_sum(::c_v64_sad_u8(c_v64_sad_u8_init(), a, b));
335}
336uint32_t c_v64_ssd_u8(c_v64 a, c_v64 b) {
337 return c_v64_ssd_u8_sum(::c_v64_ssd_u8(c_v64_ssd_u8_init(), a, b));
338}
Steinar Midtskogen6c795762017-03-07 20:55:48 +0100339uint32_t v128_sad_u8(v128 a, v128 b) {
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200340 return v128_sad_u8_sum(::v128_sad_u8(v128_sad_u8_init(), a, b));
341}
342uint32_t v128_ssd_u8(v128 a, v128 b) {
343 return v128_ssd_u8_sum(::v128_ssd_u8(v128_ssd_u8_init(), a, b));
344}
345uint32_t c_v128_sad_u8(c_v128 a, c_v128 b) {
346 return c_v128_sad_u8_sum(::c_v128_sad_u8(c_v128_sad_u8_init(), a, b));
347}
348uint32_t c_v128_ssd_u8(c_v128 a, c_v128 b) {
349 return c_v128_ssd_u8_sum(::c_v128_ssd_u8(c_v128_ssd_u8_init(), a, b));
350}
Steinar Midtskogen1e424362016-09-30 13:14:04 +0200351uint32_t v256_sad_u8(v256 a, v256 b) {
352 return v256_sad_u8_sum(::v256_sad_u8(v256_sad_u8_init(), a, b));
353}
354uint32_t v256_ssd_u8(v256 a, v256 b) {
355 return v256_ssd_u8_sum(::v256_ssd_u8(v256_ssd_u8_init(), a, b));
356}
357uint32_t c_v256_sad_u8(c_v256 a, c_v256 b) {
358 return c_v256_sad_u8_sum(::c_v256_sad_u8(c_v256_sad_u8_init(), a, b));
359}
360uint32_t c_v256_ssd_u8(c_v256 a, c_v256 b) {
361 return c_v256_ssd_u8_sum(::c_v256_ssd_u8(c_v256_ssd_u8_init(), a, b));
362}
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100363
364namespace {
365
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100366typedef void (*fptr)();
367
368typedef struct {
369 const char *name;
370 fptr ref;
371 fptr simd;
372} mapping;
373
374#define MAP(name) \
375 { \
376 #name, reinterpret_cast < fptr > (c_##name), \
377 reinterpret_cast < fptr > (name) \
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100378 }
379
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100380const mapping m[] = { MAP(v64_sad_u8),
381 MAP(v64_ssd_u8),
382 MAP(v64_add_8),
383 MAP(v64_add_16),
384 MAP(v64_sadd_s16),
385 MAP(v64_add_32),
386 MAP(v64_sub_8),
387 MAP(v64_ssub_u8),
388 MAP(v64_ssub_s8),
389 MAP(v64_sub_16),
390 MAP(v64_ssub_s16),
Steinar Midtskogen9b8444a2017-03-31 22:11:06 +0200391 MAP(v64_ssub_u16),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100392 MAP(v64_sub_32),
393 MAP(v64_ziplo_8),
394 MAP(v64_ziphi_8),
395 MAP(v64_ziplo_16),
396 MAP(v64_ziphi_16),
397 MAP(v64_ziplo_32),
398 MAP(v64_ziphi_32),
399 MAP(v64_pack_s32_s16),
400 MAP(v64_pack_s16_u8),
401 MAP(v64_pack_s16_s8),
402 MAP(v64_unziphi_8),
403 MAP(v64_unziplo_8),
404 MAP(v64_unziphi_16),
405 MAP(v64_unziplo_16),
406 MAP(v64_or),
407 MAP(v64_xor),
408 MAP(v64_and),
409 MAP(v64_andn),
410 MAP(v64_mullo_s16),
411 MAP(v64_mulhi_s16),
412 MAP(v64_mullo_s32),
413 MAP(v64_madd_s16),
414 MAP(v64_madd_us8),
415 MAP(v64_avg_u8),
416 MAP(v64_rdavg_u8),
417 MAP(v64_avg_u16),
418 MAP(v64_min_u8),
419 MAP(v64_max_u8),
420 MAP(v64_min_s8),
421 MAP(v64_max_s8),
422 MAP(v64_min_s16),
423 MAP(v64_max_s16),
424 MAP(v64_cmpgt_s8),
425 MAP(v64_cmplt_s8),
426 MAP(v64_cmpeq_8),
427 MAP(v64_cmpgt_s16),
428 MAP(v64_cmplt_s16),
429 MAP(v64_cmpeq_16),
430 MAP(v64_shuffle_8),
431 MAP(imm_v64_align<1>),
432 MAP(imm_v64_align<2>),
433 MAP(imm_v64_align<3>),
434 MAP(imm_v64_align<4>),
435 MAP(imm_v64_align<5>),
436 MAP(imm_v64_align<6>),
437 MAP(imm_v64_align<7>),
Steinar Midtskogen6033fb82017-04-02 21:32:41 +0200438 MAP(v64_abs_s8),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100439 MAP(v64_abs_s16),
440 MAP(v64_unpacklo_u8_s16),
441 MAP(v64_unpackhi_u8_s16),
Steinar Midtskogen1b2b7392017-04-11 14:19:20 +0200442 MAP(v64_unpacklo_s8_s16),
443 MAP(v64_unpackhi_s8_s16),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100444 MAP(v64_unpacklo_u16_s32),
445 MAP(v64_unpacklo_s16_s32),
446 MAP(v64_unpackhi_u16_s32),
447 MAP(v64_unpackhi_s16_s32),
448 MAP(imm_v64_shr_n_byte<1>),
449 MAP(imm_v64_shr_n_byte<2>),
450 MAP(imm_v64_shr_n_byte<3>),
451 MAP(imm_v64_shr_n_byte<4>),
452 MAP(imm_v64_shr_n_byte<5>),
453 MAP(imm_v64_shr_n_byte<6>),
454 MAP(imm_v64_shr_n_byte<7>),
455 MAP(imm_v64_shl_n_byte<1>),
456 MAP(imm_v64_shl_n_byte<2>),
457 MAP(imm_v64_shl_n_byte<3>),
458 MAP(imm_v64_shl_n_byte<4>),
459 MAP(imm_v64_shl_n_byte<5>),
460 MAP(imm_v64_shl_n_byte<6>),
461 MAP(imm_v64_shl_n_byte<7>),
462 MAP(imm_v64_shl_n_8<1>),
463 MAP(imm_v64_shl_n_8<2>),
464 MAP(imm_v64_shl_n_8<3>),
465 MAP(imm_v64_shl_n_8<4>),
466 MAP(imm_v64_shl_n_8<5>),
467 MAP(imm_v64_shl_n_8<6>),
468 MAP(imm_v64_shl_n_8<7>),
469 MAP(imm_v64_shr_n_u8<1>),
470 MAP(imm_v64_shr_n_u8<2>),
471 MAP(imm_v64_shr_n_u8<3>),
472 MAP(imm_v64_shr_n_u8<4>),
473 MAP(imm_v64_shr_n_u8<5>),
474 MAP(imm_v64_shr_n_u8<6>),
475 MAP(imm_v64_shr_n_u8<7>),
476 MAP(imm_v64_shr_n_s8<1>),
477 MAP(imm_v64_shr_n_s8<2>),
478 MAP(imm_v64_shr_n_s8<3>),
479 MAP(imm_v64_shr_n_s8<4>),
480 MAP(imm_v64_shr_n_s8<5>),
481 MAP(imm_v64_shr_n_s8<6>),
482 MAP(imm_v64_shr_n_s8<7>),
483 MAP(imm_v64_shl_n_16<1>),
484 MAP(imm_v64_shl_n_16<2>),
485 MAP(imm_v64_shl_n_16<4>),
486 MAP(imm_v64_shl_n_16<6>),
487 MAP(imm_v64_shl_n_16<8>),
488 MAP(imm_v64_shl_n_16<10>),
489 MAP(imm_v64_shl_n_16<12>),
490 MAP(imm_v64_shl_n_16<14>),
491 MAP(imm_v64_shr_n_u16<1>),
492 MAP(imm_v64_shr_n_u16<2>),
493 MAP(imm_v64_shr_n_u16<4>),
494 MAP(imm_v64_shr_n_u16<6>),
495 MAP(imm_v64_shr_n_u16<8>),
496 MAP(imm_v64_shr_n_u16<10>),
497 MAP(imm_v64_shr_n_u16<12>),
498 MAP(imm_v64_shr_n_u16<14>),
499 MAP(imm_v64_shr_n_s16<1>),
500 MAP(imm_v64_shr_n_s16<2>),
501 MAP(imm_v64_shr_n_s16<4>),
502 MAP(imm_v64_shr_n_s16<6>),
503 MAP(imm_v64_shr_n_s16<8>),
504 MAP(imm_v64_shr_n_s16<10>),
505 MAP(imm_v64_shr_n_s16<12>),
506 MAP(imm_v64_shr_n_s16<14>),
507 MAP(imm_v64_shl_n_32<1>),
508 MAP(imm_v64_shl_n_32<4>),
509 MAP(imm_v64_shl_n_32<8>),
510 MAP(imm_v64_shl_n_32<12>),
511 MAP(imm_v64_shl_n_32<16>),
512 MAP(imm_v64_shl_n_32<20>),
513 MAP(imm_v64_shl_n_32<24>),
514 MAP(imm_v64_shl_n_32<28>),
515 MAP(imm_v64_shr_n_u32<1>),
516 MAP(imm_v64_shr_n_u32<4>),
517 MAP(imm_v64_shr_n_u32<8>),
518 MAP(imm_v64_shr_n_u32<12>),
519 MAP(imm_v64_shr_n_u32<16>),
520 MAP(imm_v64_shr_n_u32<20>),
521 MAP(imm_v64_shr_n_u32<24>),
522 MAP(imm_v64_shr_n_u32<28>),
523 MAP(imm_v64_shr_n_s32<1>),
524 MAP(imm_v64_shr_n_s32<4>),
525 MAP(imm_v64_shr_n_s32<8>),
526 MAP(imm_v64_shr_n_s32<12>),
527 MAP(imm_v64_shr_n_s32<16>),
528 MAP(imm_v64_shr_n_s32<20>),
529 MAP(imm_v64_shr_n_s32<24>),
530 MAP(imm_v64_shr_n_s32<28>),
531 MAP(v64_shl_8),
532 MAP(v64_shr_u8),
533 MAP(v64_shr_s8),
534 MAP(v64_shl_16),
535 MAP(v64_shr_u16),
536 MAP(v64_shr_s16),
537 MAP(v64_shl_32),
538 MAP(v64_shr_u32),
539 MAP(v64_shr_s32),
540 MAP(v64_hadd_u8),
541 MAP(v64_hadd_s16),
542 MAP(v64_dotp_s16),
Steinar Midtskogen04305c62016-09-30 13:14:04 +0200543 MAP(v64_dotp_su8),
544 MAP(v64_u64),
545 MAP(v64_low_u32),
546 MAP(v64_high_u32),
547 MAP(v64_low_s32),
548 MAP(v64_high_s32),
549 MAP(v64_dup_8),
550 MAP(v64_dup_16),
551 MAP(v64_dup_32),
552 MAP(v64_from_32),
553 MAP(v64_zero),
554 MAP(v64_from_16),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200555 MAP(v128_sad_u8),
556 MAP(v128_ssd_u8),
557 MAP(v128_add_8),
558 MAP(v128_add_16),
559 MAP(v128_sadd_s16),
560 MAP(v128_add_32),
561 MAP(v128_sub_8),
562 MAP(v128_ssub_u8),
563 MAP(v128_ssub_s8),
564 MAP(v128_sub_16),
565 MAP(v128_ssub_s16),
Steinar Midtskogen9b8444a2017-03-31 22:11:06 +0200566 MAP(v128_ssub_u16),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200567 MAP(v128_sub_32),
568 MAP(v128_ziplo_8),
569 MAP(v128_ziphi_8),
570 MAP(v128_ziplo_16),
571 MAP(v128_ziphi_16),
572 MAP(v128_ziplo_32),
573 MAP(v128_ziphi_32),
574 MAP(v128_ziplo_64),
575 MAP(v128_ziphi_64),
576 MAP(v128_unziphi_8),
577 MAP(v128_unziplo_8),
578 MAP(v128_unziphi_16),
579 MAP(v128_unziplo_16),
580 MAP(v128_unziphi_32),
581 MAP(v128_unziplo_32),
582 MAP(v128_pack_s32_s16),
583 MAP(v128_pack_s16_u8),
584 MAP(v128_pack_s16_s8),
585 MAP(v128_or),
586 MAP(v128_xor),
587 MAP(v128_and),
588 MAP(v128_andn),
589 MAP(v128_mullo_s16),
590 MAP(v128_mulhi_s16),
591 MAP(v128_mullo_s32),
592 MAP(v128_madd_s16),
593 MAP(v128_madd_us8),
594 MAP(v128_avg_u8),
595 MAP(v128_rdavg_u8),
596 MAP(v128_avg_u16),
597 MAP(v128_min_u8),
598 MAP(v128_max_u8),
599 MAP(v128_min_s8),
600 MAP(v128_max_s8),
601 MAP(v128_min_s16),
602 MAP(v128_max_s16),
603 MAP(v128_cmpgt_s8),
604 MAP(v128_cmplt_s8),
605 MAP(v128_cmpeq_8),
606 MAP(v128_cmpgt_s16),
607 MAP(v128_cmpeq_16),
608 MAP(v128_cmplt_s16),
609 MAP(v128_shuffle_8),
610 MAP(imm_v128_align<1>),
611 MAP(imm_v128_align<2>),
612 MAP(imm_v128_align<3>),
613 MAP(imm_v128_align<4>),
614 MAP(imm_v128_align<5>),
615 MAP(imm_v128_align<6>),
616 MAP(imm_v128_align<7>),
617 MAP(imm_v128_align<8>),
618 MAP(imm_v128_align<9>),
619 MAP(imm_v128_align<10>),
620 MAP(imm_v128_align<11>),
621 MAP(imm_v128_align<12>),
622 MAP(imm_v128_align<13>),
623 MAP(imm_v128_align<14>),
624 MAP(imm_v128_align<15>),
Steinar Midtskogen6033fb82017-04-02 21:32:41 +0200625 MAP(v128_abs_s8),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200626 MAP(v128_abs_s16),
627 MAP(v128_padd_s16),
628 MAP(v128_unpacklo_u16_s32),
629 MAP(v128_unpacklo_s16_s32),
630 MAP(v128_unpackhi_u16_s32),
631 MAP(v128_unpackhi_s16_s32),
632 MAP(imm_v128_shr_n_byte<1>),
633 MAP(imm_v128_shr_n_byte<2>),
634 MAP(imm_v128_shr_n_byte<3>),
635 MAP(imm_v128_shr_n_byte<4>),
636 MAP(imm_v128_shr_n_byte<5>),
637 MAP(imm_v128_shr_n_byte<6>),
638 MAP(imm_v128_shr_n_byte<7>),
639 MAP(imm_v128_shr_n_byte<8>),
640 MAP(imm_v128_shr_n_byte<9>),
641 MAP(imm_v128_shr_n_byte<10>),
642 MAP(imm_v128_shr_n_byte<11>),
643 MAP(imm_v128_shr_n_byte<12>),
644 MAP(imm_v128_shr_n_byte<13>),
645 MAP(imm_v128_shr_n_byte<14>),
646 MAP(imm_v128_shr_n_byte<15>),
647 MAP(imm_v128_shl_n_byte<1>),
648 MAP(imm_v128_shl_n_byte<2>),
649 MAP(imm_v128_shl_n_byte<3>),
650 MAP(imm_v128_shl_n_byte<4>),
651 MAP(imm_v128_shl_n_byte<5>),
652 MAP(imm_v128_shl_n_byte<6>),
653 MAP(imm_v128_shl_n_byte<7>),
654 MAP(imm_v128_shl_n_byte<8>),
655 MAP(imm_v128_shl_n_byte<9>),
656 MAP(imm_v128_shl_n_byte<10>),
657 MAP(imm_v128_shl_n_byte<11>),
658 MAP(imm_v128_shl_n_byte<12>),
659 MAP(imm_v128_shl_n_byte<13>),
660 MAP(imm_v128_shl_n_byte<14>),
661 MAP(imm_v128_shl_n_byte<15>),
662 MAP(imm_v128_shl_n_8<1>),
663 MAP(imm_v128_shl_n_8<2>),
664 MAP(imm_v128_shl_n_8<3>),
665 MAP(imm_v128_shl_n_8<4>),
666 MAP(imm_v128_shl_n_8<5>),
667 MAP(imm_v128_shl_n_8<6>),
668 MAP(imm_v128_shl_n_8<7>),
669 MAP(imm_v128_shr_n_u8<1>),
670 MAP(imm_v128_shr_n_u8<2>),
671 MAP(imm_v128_shr_n_u8<3>),
672 MAP(imm_v128_shr_n_u8<4>),
673 MAP(imm_v128_shr_n_u8<5>),
674 MAP(imm_v128_shr_n_u8<6>),
675 MAP(imm_v128_shr_n_u8<7>),
676 MAP(imm_v128_shr_n_s8<1>),
677 MAP(imm_v128_shr_n_s8<2>),
678 MAP(imm_v128_shr_n_s8<3>),
679 MAP(imm_v128_shr_n_s8<4>),
680 MAP(imm_v128_shr_n_s8<5>),
681 MAP(imm_v128_shr_n_s8<6>),
682 MAP(imm_v128_shr_n_s8<7>),
683 MAP(imm_v128_shl_n_16<1>),
684 MAP(imm_v128_shl_n_16<2>),
685 MAP(imm_v128_shl_n_16<4>),
686 MAP(imm_v128_shl_n_16<6>),
687 MAP(imm_v128_shl_n_16<8>),
688 MAP(imm_v128_shl_n_16<10>),
689 MAP(imm_v128_shl_n_16<12>),
690 MAP(imm_v128_shl_n_16<14>),
691 MAP(imm_v128_shr_n_u16<1>),
692 MAP(imm_v128_shr_n_u16<2>),
693 MAP(imm_v128_shr_n_u16<4>),
694 MAP(imm_v128_shr_n_u16<6>),
695 MAP(imm_v128_shr_n_u16<8>),
696 MAP(imm_v128_shr_n_u16<10>),
697 MAP(imm_v128_shr_n_u16<12>),
698 MAP(imm_v128_shr_n_u16<14>),
699 MAP(imm_v128_shr_n_s16<1>),
700 MAP(imm_v128_shr_n_s16<2>),
701 MAP(imm_v128_shr_n_s16<4>),
702 MAP(imm_v128_shr_n_s16<6>),
703 MAP(imm_v128_shr_n_s16<8>),
704 MAP(imm_v128_shr_n_s16<10>),
705 MAP(imm_v128_shr_n_s16<12>),
706 MAP(imm_v128_shr_n_s16<14>),
707 MAP(imm_v128_shl_n_32<1>),
708 MAP(imm_v128_shl_n_32<4>),
709 MAP(imm_v128_shl_n_32<8>),
710 MAP(imm_v128_shl_n_32<12>),
711 MAP(imm_v128_shl_n_32<16>),
712 MAP(imm_v128_shl_n_32<20>),
713 MAP(imm_v128_shl_n_32<24>),
714 MAP(imm_v128_shl_n_32<28>),
715 MAP(imm_v128_shr_n_u32<1>),
716 MAP(imm_v128_shr_n_u32<4>),
717 MAP(imm_v128_shr_n_u32<8>),
718 MAP(imm_v128_shr_n_u32<12>),
719 MAP(imm_v128_shr_n_u32<16>),
720 MAP(imm_v128_shr_n_u32<20>),
721 MAP(imm_v128_shr_n_u32<24>),
722 MAP(imm_v128_shr_n_u32<28>),
723 MAP(imm_v128_shr_n_s32<1>),
724 MAP(imm_v128_shr_n_s32<4>),
725 MAP(imm_v128_shr_n_s32<8>),
726 MAP(imm_v128_shr_n_s32<12>),
727 MAP(imm_v128_shr_n_s32<16>),
728 MAP(imm_v128_shr_n_s32<20>),
729 MAP(imm_v128_shr_n_s32<24>),
730 MAP(imm_v128_shr_n_s32<28>),
731 MAP(v128_from_v64),
732 MAP(v128_zip_8),
733 MAP(v128_zip_16),
734 MAP(v128_zip_32),
735 MAP(v128_mul_s16),
736 MAP(v128_unpack_u8_s16),
Steinar Midtskogen1b2b7392017-04-11 14:19:20 +0200737 MAP(v128_unpack_s8_s16),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200738 MAP(v128_unpack_u16_s32),
739 MAP(v128_unpack_s16_s32),
740 MAP(v128_shl_8),
741 MAP(v128_shr_u8),
742 MAP(v128_shr_s8),
743 MAP(v128_shl_16),
744 MAP(v128_shr_u16),
745 MAP(v128_shr_s16),
746 MAP(v128_shl_32),
747 MAP(v128_shr_u32),
748 MAP(v128_shr_s32),
749 MAP(v128_hadd_u8),
750 MAP(v128_dotp_s16),
751 MAP(v128_low_u32),
752 MAP(v128_low_v64),
753 MAP(v128_high_v64),
754 MAP(v128_from_64),
755 MAP(v128_from_32),
756 MAP(v128_zero),
757 MAP(v128_dup_8),
758 MAP(v128_dup_16),
759 MAP(v128_dup_32),
760 MAP(v128_unpacklo_u8_s16),
761 MAP(v128_unpackhi_u8_s16),
Steinar Midtskogen1b2b7392017-04-11 14:19:20 +0200762 MAP(v128_unpacklo_s8_s16),
763 MAP(v128_unpackhi_s8_s16),
Steinar Midtskogen6d2f3c22017-03-07 11:33:55 +0100764 MAP(u32_load_unaligned),
765 MAP(u32_store_unaligned),
766 MAP(v64_load_unaligned),
767 MAP(v64_store_unaligned),
768 MAP(v128_load_unaligned),
769 MAP(v128_store_unaligned),
Steinar Midtskogen1e424362016-09-30 13:14:04 +0200770 MAP(v256_sad_u8),
771 MAP(v256_ssd_u8),
772 MAP(v256_hadd_u8),
773 MAP(v256_dotp_s16),
774 MAP(v256_add_8),
775 MAP(v256_add_16),
776 MAP(v256_sadd_s16),
777 MAP(v256_add_32),
778 MAP(v256_sub_8),
779 MAP(v256_ssub_u8),
780 MAP(v256_ssub_s8),
781 MAP(v256_sub_16),
782 MAP(v256_ssub_u16),
783 MAP(v256_ssub_s16),
784 MAP(v256_sub_32),
785 MAP(v256_ziplo_8),
786 MAP(v256_ziphi_8),
787 MAP(v256_ziplo_16),
788 MAP(v256_ziphi_16),
789 MAP(v256_ziplo_32),
790 MAP(v256_ziphi_32),
791 MAP(v256_ziplo_64),
792 MAP(v256_ziphi_64),
793 MAP(v256_unziphi_8),
794 MAP(v256_unziplo_8),
795 MAP(v256_unziphi_16),
796 MAP(v256_unziplo_16),
797 MAP(v256_unziphi_32),
798 MAP(v256_unziplo_32),
799 MAP(v256_pack_s32_s16),
800 MAP(v256_pack_s16_u8),
801 MAP(v256_pack_s16_s8),
802 MAP(v256_or),
803 MAP(v256_xor),
804 MAP(v256_and),
805 MAP(v256_andn),
806 MAP(v256_mullo_s16),
807 MAP(v256_mulhi_s16),
808 MAP(v256_mullo_s32),
809 MAP(v256_madd_s16),
810 MAP(v256_madd_us8),
811 MAP(v256_avg_u8),
812 MAP(v256_rdavg_u8),
813 MAP(v256_avg_u16),
814 MAP(v256_min_u8),
815 MAP(v256_max_u8),
816 MAP(v256_min_s8),
817 MAP(v256_max_s8),
818 MAP(v256_min_s16),
819 MAP(v256_max_s16),
820 MAP(v256_cmpgt_s8),
821 MAP(v256_cmplt_s8),
822 MAP(v256_cmpeq_8),
823 MAP(v256_cmpgt_s16),
824 MAP(v256_cmplt_s16),
825 MAP(v256_cmpeq_16),
826 MAP(v256_shuffle_8),
827 MAP(v256_pshuffle_8),
828 MAP(imm_v256_align<1>),
829 MAP(imm_v256_align<2>),
830 MAP(imm_v256_align<3>),
831 MAP(imm_v256_align<4>),
832 MAP(imm_v256_align<5>),
833 MAP(imm_v256_align<6>),
834 MAP(imm_v256_align<7>),
835 MAP(imm_v256_align<8>),
836 MAP(imm_v256_align<9>),
837 MAP(imm_v256_align<10>),
838 MAP(imm_v256_align<11>),
839 MAP(imm_v256_align<12>),
840 MAP(imm_v256_align<13>),
841 MAP(imm_v256_align<14>),
842 MAP(imm_v256_align<15>),
843 MAP(imm_v256_align<16>),
844 MAP(imm_v256_align<17>),
845 MAP(imm_v256_align<18>),
846 MAP(imm_v256_align<19>),
847 MAP(imm_v256_align<20>),
848 MAP(imm_v256_align<21>),
849 MAP(imm_v256_align<22>),
850 MAP(imm_v256_align<23>),
851 MAP(imm_v256_align<24>),
852 MAP(imm_v256_align<25>),
853 MAP(imm_v256_align<26>),
854 MAP(imm_v256_align<27>),
855 MAP(imm_v256_align<28>),
856 MAP(imm_v256_align<29>),
857 MAP(imm_v256_align<30>),
858 MAP(imm_v256_align<31>),
859 MAP(v256_from_v128),
860 MAP(v256_zip_8),
861 MAP(v256_zip_16),
862 MAP(v256_zip_32),
863 MAP(v256_mul_s16),
864 MAP(v256_unpack_u8_s16),
865 MAP(v256_unpack_s8_s16),
866 MAP(v256_unpack_u16_s32),
867 MAP(v256_unpack_s16_s32),
868 MAP(v256_shl_8),
869 MAP(v256_shr_u8),
870 MAP(v256_shr_s8),
871 MAP(v256_shl_16),
872 MAP(v256_shr_u16),
873 MAP(v256_shr_s16),
874 MAP(v256_shl_32),
875 MAP(v256_shr_u32),
876 MAP(v256_shr_s32),
877 MAP(v256_abs_s8),
878 MAP(v256_abs_s16),
879 MAP(v256_padd_s16),
880 MAP(v256_unpacklo_u16_s32),
881 MAP(v256_unpacklo_s16_s32),
882 MAP(v256_unpackhi_u16_s32),
883 MAP(v256_unpackhi_s16_s32),
884 MAP(imm_v256_shr_n_byte<1>),
885 MAP(imm_v256_shr_n_byte<2>),
886 MAP(imm_v256_shr_n_byte<3>),
887 MAP(imm_v256_shr_n_byte<4>),
888 MAP(imm_v256_shr_n_byte<5>),
889 MAP(imm_v256_shr_n_byte<6>),
890 MAP(imm_v256_shr_n_byte<7>),
891 MAP(imm_v256_shr_n_byte<8>),
892 MAP(imm_v256_shr_n_byte<9>),
893 MAP(imm_v256_shr_n_byte<10>),
894 MAP(imm_v256_shr_n_byte<11>),
895 MAP(imm_v256_shr_n_byte<12>),
896 MAP(imm_v256_shr_n_byte<13>),
897 MAP(imm_v256_shr_n_byte<14>),
898 MAP(imm_v256_shr_n_byte<15>),
899 MAP(imm_v256_shr_n_byte<16>),
900 MAP(imm_v256_shr_n_byte<17>),
901 MAP(imm_v256_shr_n_byte<18>),
902 MAP(imm_v256_shr_n_byte<19>),
903 MAP(imm_v256_shr_n_byte<20>),
904 MAP(imm_v256_shr_n_byte<21>),
905 MAP(imm_v256_shr_n_byte<22>),
906 MAP(imm_v256_shr_n_byte<23>),
907 MAP(imm_v256_shr_n_byte<24>),
908 MAP(imm_v256_shr_n_byte<25>),
909 MAP(imm_v256_shr_n_byte<26>),
910 MAP(imm_v256_shr_n_byte<27>),
911 MAP(imm_v256_shr_n_byte<28>),
912 MAP(imm_v256_shr_n_byte<29>),
913 MAP(imm_v256_shr_n_byte<30>),
914 MAP(imm_v256_shr_n_byte<31>),
915 MAP(imm_v256_shl_n_byte<1>),
916 MAP(imm_v256_shl_n_byte<2>),
917 MAP(imm_v256_shl_n_byte<3>),
918 MAP(imm_v256_shl_n_byte<4>),
919 MAP(imm_v256_shl_n_byte<5>),
920 MAP(imm_v256_shl_n_byte<6>),
921 MAP(imm_v256_shl_n_byte<7>),
922 MAP(imm_v256_shl_n_byte<8>),
923 MAP(imm_v256_shl_n_byte<9>),
924 MAP(imm_v256_shl_n_byte<10>),
925 MAP(imm_v256_shl_n_byte<11>),
926 MAP(imm_v256_shl_n_byte<12>),
927 MAP(imm_v256_shl_n_byte<13>),
928 MAP(imm_v256_shl_n_byte<14>),
929 MAP(imm_v256_shl_n_byte<15>),
930 MAP(imm_v256_shl_n_byte<16>),
931 MAP(imm_v256_shl_n_byte<17>),
932 MAP(imm_v256_shl_n_byte<18>),
933 MAP(imm_v256_shl_n_byte<19>),
934 MAP(imm_v256_shl_n_byte<20>),
935 MAP(imm_v256_shl_n_byte<21>),
936 MAP(imm_v256_shl_n_byte<22>),
937 MAP(imm_v256_shl_n_byte<23>),
938 MAP(imm_v256_shl_n_byte<24>),
939 MAP(imm_v256_shl_n_byte<25>),
940 MAP(imm_v256_shl_n_byte<26>),
941 MAP(imm_v256_shl_n_byte<27>),
942 MAP(imm_v256_shl_n_byte<28>),
943 MAP(imm_v256_shl_n_byte<29>),
944 MAP(imm_v256_shl_n_byte<30>),
945 MAP(imm_v256_shl_n_byte<31>),
946 MAP(imm_v256_shl_n_8<1>),
947 MAP(imm_v256_shl_n_8<2>),
948 MAP(imm_v256_shl_n_8<3>),
949 MAP(imm_v256_shl_n_8<4>),
950 MAP(imm_v256_shl_n_8<5>),
951 MAP(imm_v256_shl_n_8<6>),
952 MAP(imm_v256_shl_n_8<7>),
953 MAP(imm_v256_shr_n_u8<1>),
954 MAP(imm_v256_shr_n_u8<2>),
955 MAP(imm_v256_shr_n_u8<3>),
956 MAP(imm_v256_shr_n_u8<4>),
957 MAP(imm_v256_shr_n_u8<5>),
958 MAP(imm_v256_shr_n_u8<6>),
959 MAP(imm_v256_shr_n_u8<7>),
960 MAP(imm_v256_shr_n_s8<1>),
961 MAP(imm_v256_shr_n_s8<2>),
962 MAP(imm_v256_shr_n_s8<3>),
963 MAP(imm_v256_shr_n_s8<4>),
964 MAP(imm_v256_shr_n_s8<5>),
965 MAP(imm_v256_shr_n_s8<6>),
966 MAP(imm_v256_shr_n_s8<7>),
967 MAP(imm_v256_shl_n_16<1>),
968 MAP(imm_v256_shl_n_16<2>),
969 MAP(imm_v256_shl_n_16<4>),
970 MAP(imm_v256_shl_n_16<6>),
971 MAP(imm_v256_shl_n_16<8>),
972 MAP(imm_v256_shl_n_16<10>),
973 MAP(imm_v256_shl_n_16<12>),
974 MAP(imm_v256_shl_n_16<14>),
975 MAP(imm_v256_shr_n_u16<1>),
976 MAP(imm_v256_shr_n_u16<2>),
977 MAP(imm_v256_shr_n_u16<4>),
978 MAP(imm_v256_shr_n_u16<6>),
979 MAP(imm_v256_shr_n_u16<8>),
980 MAP(imm_v256_shr_n_u16<10>),
981 MAP(imm_v256_shr_n_u16<12>),
982 MAP(imm_v256_shr_n_u16<14>),
983 MAP(imm_v256_shr_n_s16<1>),
984 MAP(imm_v256_shr_n_s16<2>),
985 MAP(imm_v256_shr_n_s16<4>),
986 MAP(imm_v256_shr_n_s16<6>),
987 MAP(imm_v256_shr_n_s16<8>),
988 MAP(imm_v256_shr_n_s16<10>),
989 MAP(imm_v256_shr_n_s16<12>),
990 MAP(imm_v256_shr_n_s16<14>),
991 MAP(imm_v256_shl_n_32<1>),
992 MAP(imm_v256_shl_n_32<4>),
993 MAP(imm_v256_shl_n_32<8>),
994 MAP(imm_v256_shl_n_32<12>),
995 MAP(imm_v256_shl_n_32<16>),
996 MAP(imm_v256_shl_n_32<20>),
997 MAP(imm_v256_shl_n_32<24>),
998 MAP(imm_v256_shl_n_32<28>),
999 MAP(imm_v256_shr_n_u32<1>),
1000 MAP(imm_v256_shr_n_u32<4>),
1001 MAP(imm_v256_shr_n_u32<8>),
1002 MAP(imm_v256_shr_n_u32<12>),
1003 MAP(imm_v256_shr_n_u32<16>),
1004 MAP(imm_v256_shr_n_u32<20>),
1005 MAP(imm_v256_shr_n_u32<24>),
1006 MAP(imm_v256_shr_n_u32<28>),
1007 MAP(imm_v256_shr_n_s32<1>),
1008 MAP(imm_v256_shr_n_s32<4>),
1009 MAP(imm_v256_shr_n_s32<8>),
1010 MAP(imm_v256_shr_n_s32<12>),
1011 MAP(imm_v256_shr_n_s32<16>),
1012 MAP(imm_v256_shr_n_s32<20>),
1013 MAP(imm_v256_shr_n_s32<24>),
1014 MAP(imm_v256_shr_n_s32<28>),
1015 MAP(v256_zero),
1016 MAP(v256_dup_8),
1017 MAP(v256_dup_16),
1018 MAP(v256_dup_32),
1019 MAP(v256_low_u32),
1020 MAP(v256_low_v64),
1021 MAP(v256_from_64),
1022 MAP(v256_from_v64),
1023 MAP(v256_ziplo_128),
1024 MAP(v256_ziphi_128),
1025 MAP(v256_unpacklo_u8_s16),
1026 MAP(v256_unpackhi_u8_s16),
1027 MAP(v256_unpacklo_s8_s16),
1028 MAP(v256_unpackhi_s8_s16),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001029 { NULL, NULL, NULL } };
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001030#undef MAP
1031
1032// Map reference functions to machine tuned functions. Since the
1033// functions depend on machine tuned types, the non-machine tuned
1034// instantiations of the test can't refer to these functions directly,
1035// so we refer to them by name and do the mapping here.
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001036void Map(const char *name, fptr *ref, fptr *simd) {
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001037 unsigned int i;
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001038 for (i = 0; m[i].name && strcmp(name, m[i].name); i++) {
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001039 }
1040
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001041 *ref = m[i].ref;
1042 *simd = m[i].simd;
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001043}
1044
1045// Used for printing errors in TestSimd1Arg and TestSimd2Args
1046std::string Print(const uint8_t *a, int size) {
1047 std::string text = "0x";
1048 for (int i = 0; i < size; i++) {
Steinar Midtskogen03ab5272017-01-10 07:30:47 +01001049 const uint8_t c = a[!CONFIG_BIG_ENDIAN ? size - 1 - i : i];
1050 // Same as snprintf(..., ..., "%02x", c)
1051 text += (c >> 4) + '0' + ((c >> 4) > 9) * ('a' - '0' - 10);
1052 text += (c & 15) + '0' + ((c & 15) > 9) * ('a' - '0' - 10);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001053 }
1054
1055 return text;
1056}
1057
1058// Used in TestSimd1Arg and TestSimd2Args to restrict argument ranges
1059void SetMask(uint8_t *s, int size, uint32_t mask, uint32_t maskwidth) {
1060 switch (maskwidth) {
1061 case 0: {
1062 break;
1063 }
1064 case 8: {
1065 for (int i = 0; i < size; i++) s[i] &= mask;
1066 break;
1067 }
1068 case 16: {
1069 uint16_t *t = reinterpret_cast<uint16_t *>(s);
1070 assert(!(reinterpret_cast<uintptr_t>(s) & 1));
1071 for (int i = 0; i < size / 2; i++) t[i] &= mask;
1072 break;
1073 }
1074 case 32: {
1075 uint32_t *t = reinterpret_cast<uint32_t *>(s);
1076 assert(!(reinterpret_cast<uintptr_t>(s) & 3));
1077 for (int i = 0; i < size / 4; i++) t[i] &= mask;
1078 break;
1079 }
1080 case 64: {
1081 uint64_t *t = reinterpret_cast<uint64_t *>(s);
1082 assert(!(reinterpret_cast<uintptr_t>(s) & 7));
1083 for (int i = 0; i < size / 8; i++) t[i] &= mask;
1084 break;
1085 }
1086 default: {
1087 FAIL() << "Unsupported mask width";
1088 break;
1089 }
1090 }
1091}
1092
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001093// We need some extra load/store functions
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001094void u64_store_aligned(void *p, uint64_t a) {
1095 v64_store_aligned(p, v64_from_64(a));
1096}
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001097void s32_store_aligned(void *p, int32_t a) {
1098 u32_store_aligned(p, static_cast<uint32_t>(a));
1099}
1100void s64_store_aligned(void *p, int64_t a) {
1101 v64_store_aligned(p, v64_from_64(static_cast<uint64_t>(a)));
1102}
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001103
1104void c_u64_store_aligned(void *p, uint64_t a) {
1105 c_v64_store_aligned(p, c_v64_from_64(a));
1106}
1107
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001108void c_s32_store_aligned(void *p, int32_t a) {
1109 c_u32_store_aligned(p, static_cast<uint32_t>(a));
1110}
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001111
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001112void c_s64_store_aligned(void *p, int64_t a) {
1113 c_v64_store_aligned(p, c_v64_from_64(static_cast<uint64_t>(a)));
1114}
1115
1116uint64_t u64_load_aligned(const void *p) {
1117 return v64_u64(v64_load_aligned(p));
1118}
1119uint16_t u16_load_aligned(const void *p) {
1120 return *(reinterpret_cast<const uint16_t *>(p));
1121}
1122uint8_t u8_load_aligned(const void *p) {
1123 return *(reinterpret_cast<const uint8_t *>(p));
1124}
1125
1126uint64_t c_u64_load_aligned(const void *p) {
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001127 return c_v64_u64(c_v64_load_aligned(p));
1128}
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001129uint16_t c_u16_load_aligned(const void *p) {
1130 return *(reinterpret_cast<const uint16_t *>(p));
1131}
1132uint8_t c_u8_load_aligned(const void *p) {
1133 return *(reinterpret_cast<const uint8_t *>(p));
1134}
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001135
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001136// CompareSimd1Arg and CompareSimd2Args compare intrinsics taking 1 or
1137// 2 arguments respectively with their corresponding C reference.
1138// Ideally, the loads and stores should have gone into the template
1139// parameter list, but v64 and v128 could be typedef'ed to the same
1140// type (which is the case on x86) and then we can't instantiate both
1141// v64 and v128, so the function return and argument types, including
1142// the always differing types in the C equivalent are used instead.
1143// The function arguments must be void pointers and then go through a
1144// cast to avoid matching errors in the branches eliminated by the
1145// typeid tests in the calling function.
1146template <typename Ret, typename Arg, typename CRet, typename CArg>
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001147int CompareSimd1Arg(fptr store, fptr load, fptr simd, void *d, fptr c_store,
1148 fptr c_load, fptr c_simd, void *ref_d, const void *a) {
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001149 void (*const my_store)(void *, Ret) = (void (*const)(void *, Ret))store;
1150 Arg (*const my_load)(const void *) = (Arg(*const)(const void *))load;
1151 Ret (*const my_simd)(Arg) = (Ret(*const)(Arg))simd;
1152 void (*const my_c_store)(void *, CRet) = (void (*const)(void *, CRet))c_store;
1153 CArg (*const my_c_load)(const void *) = (CArg(*const)(const void *))c_load;
1154 CRet (*const my_c_simd)(CArg) = (CRet(*const)(CArg))c_simd;
1155
1156 // Call reference and intrinsic
Steinar Midtskogenc20176e2017-03-01 09:16:09 +01001157 my_c_store(ref_d, my_c_simd(my_c_load(a)));
1158 my_store(d, my_simd(my_load(a)));
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001159
1160 // Compare results
1161 return memcmp(ref_d, d, sizeof(CRet));
1162}
1163
1164template <typename Ret, typename Arg1, typename Arg2, typename CRet,
1165 typename CArg1, typename CArg2>
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001166int CompareSimd2Args(fptr store, fptr load1, fptr load2, fptr simd, void *d,
1167 fptr c_store, fptr c_load1, fptr c_load2, fptr c_simd,
1168 void *ref_d, const void *a, const void *b) {
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001169 void (*const my_store)(void *, Ret) = (void (*const)(void *, Ret))store;
1170 Arg1 (*const my_load1)(const void *) = (Arg1(*const)(const void *))load1;
1171 Arg2 (*const my_load2)(const void *) = (Arg2(*const)(const void *))load2;
1172 Ret (*const my_simd)(Arg1, Arg2) = (Ret(*const)(Arg1, Arg2))simd;
1173 void (*const my_c_store)(void *, CRet) = (void (*const)(void *, CRet))c_store;
1174 CArg1 (*const my_c_load1)(const void *) =
1175 (CArg1(*const)(const void *))c_load1;
1176 CArg2 (*const my_c_load2)(const void *) =
1177 (CArg2(*const)(const void *))c_load2;
1178 CRet (*const my_c_simd)(CArg1, CArg2) = (CRet(*const)(CArg1, CArg2))c_simd;
1179
1180 // Call reference and intrinsic
Steinar Midtskogenc20176e2017-03-01 09:16:09 +01001181 my_c_store(ref_d, my_c_simd(my_c_load1(a), my_c_load2(b)));
1182 my_store(d, my_simd(my_load1(a), my_load2(b)));
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001183
1184 // Compare results
1185 return memcmp(ref_d, d, sizeof(CRet));
1186}
1187
Steinar Midtskogen04305c62016-09-30 13:14:04 +02001188} // namespace
1189
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001190template <typename CRet, typename CArg>
1191void TestSimd1Arg(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
1192 const char *name) {
1193 ACMRandom rnd(ACMRandom::DeterministicSeed());
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001194 fptr ref_simd;
1195 fptr simd;
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001196 int error = 0;
Steinar Midtskogen7d532712017-03-19 21:34:47 +01001197 DECLARE_ALIGNED(32, uint8_t, s[sizeof(CArg)]);
Steinar Midtskogen03ab5272017-01-10 07:30:47 +01001198 DECLARE_ALIGNED(32, uint8_t, d[sizeof(CRet)]);
1199 DECLARE_ALIGNED(32, uint8_t, ref_d[sizeof(CRet)]);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001200 memset(ref_d, 0, sizeof(ref_d));
1201 memset(d, 0, sizeof(d));
1202
1203 Map(name, &ref_simd, &simd);
1204 if (simd == NULL || ref_simd == NULL) {
1205 FAIL() << "Internal error: Unknown intrinsic function " << name;
1206 }
James Zern8c636c12017-02-28 20:56:06 -08001207 for (unsigned int count = 0;
1208 count < iterations && !error && !testing::Test::HasFailure(); count++) {
Steinar Midtskogen7d532712017-03-19 21:34:47 +01001209 for (unsigned int c = 0; c < sizeof(CArg); c++) s[c] = rnd.Rand8();
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001210
1211 if (maskwidth) {
Steinar Midtskogen7d532712017-03-19 21:34:47 +01001212 SetMask(s, sizeof(CArg), mask, maskwidth);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001213 }
1214
1215 if (typeid(CRet) == typeid(c_v64) && typeid(CArg) == typeid(c_v64)) {
1216 // V64_V64
1217 error = CompareSimd1Arg<v64, v64, CRet, CArg>(
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001218 reinterpret_cast<fptr>(v64_store_aligned),
1219 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
1220 reinterpret_cast<fptr>(c_v64_store_aligned),
1221 reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogen04305c62016-09-30 13:14:04 +02001222 } else if (typeid(CRet) == typeid(c_v64) &&
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001223 typeid(CArg) == typeid(uint8_t)) {
1224 // V64_U8
1225 error = CompareSimd1Arg<v64, uint8_t, CRet, CArg>(
1226 reinterpret_cast<fptr>(v64_store_aligned),
1227 reinterpret_cast<fptr>(u8_load_aligned), simd, d,
1228 reinterpret_cast<fptr>(c_v64_store_aligned),
1229 reinterpret_cast<fptr>(c_u8_load_aligned), ref_simd, ref_d, s);
1230 } else if (typeid(CRet) == typeid(c_v64) &&
1231 typeid(CArg) == typeid(uint16_t)) {
1232 // V64_U16
1233 error = CompareSimd1Arg<v64, uint16_t, CRet, CArg>(
1234 reinterpret_cast<fptr>(v64_store_aligned),
1235 reinterpret_cast<fptr>(u16_load_aligned), simd, d,
1236 reinterpret_cast<fptr>(c_v64_store_aligned),
1237 reinterpret_cast<fptr>(c_u16_load_aligned), ref_simd, ref_d, s);
1238 } else if (typeid(CRet) == typeid(c_v64) &&
Steinar Midtskogen04305c62016-09-30 13:14:04 +02001239 typeid(CArg) == typeid(uint32_t)) {
1240 // V64_U32
1241 error = CompareSimd1Arg<v64, uint32_t, CRet, CArg>(
1242 reinterpret_cast<fptr>(v64_store_aligned),
1243 reinterpret_cast<fptr>(u32_load_aligned), simd, d,
1244 reinterpret_cast<fptr>(c_v64_store_aligned),
1245 reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001246 } else if (typeid(CRet) == typeid(uint64_t) &&
1247 typeid(CArg) == typeid(c_v64)) {
1248 // U64_V64
1249 error = CompareSimd1Arg<uint64_t, v64, CRet, CArg>(
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001250 reinterpret_cast<fptr>(u64_store_aligned),
1251 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
Steinar Midtskogen04305c62016-09-30 13:14:04 +02001252 reinterpret_cast<fptr>(c_u64_store_aligned),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001253 reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001254 } else if (typeid(CRet) == typeid(int64_t) &&
1255 typeid(CArg) == typeid(c_v64)) {
1256 // S64_V64
1257 error = CompareSimd1Arg<int64_t, v64, CRet, CArg>(
1258 reinterpret_cast<fptr>(s64_store_aligned),
1259 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
1260 reinterpret_cast<fptr>(c_s64_store_aligned),
1261 reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogen04305c62016-09-30 13:14:04 +02001262 } else if (typeid(CRet) == typeid(uint32_t) &&
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001263 typeid(CArg) == typeid(c_v64)) {
Steinar Midtskogen04305c62016-09-30 13:14:04 +02001264 // U32_V64
1265 error = CompareSimd1Arg<uint32_t, v64, CRet, CArg>(
1266 reinterpret_cast<fptr>(u32_store_aligned),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001267 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
Steinar Midtskogen04305c62016-09-30 13:14:04 +02001268 reinterpret_cast<fptr>(c_u32_store_aligned),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001269 reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001270 } else if (typeid(CRet) == typeid(int32_t) &&
1271 typeid(CArg) == typeid(c_v64)) {
1272 // S32_V64
1273 error = CompareSimd1Arg<int32_t, v64, CRet, CArg>(
1274 reinterpret_cast<fptr>(s32_store_aligned),
1275 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
1276 reinterpret_cast<fptr>(c_s32_store_aligned),
1277 reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001278 } else if (typeid(CRet) == typeid(uint32_t) &&
1279 typeid(CArg) == typeid(c_v128)) {
1280 // U32_V128
1281 error = CompareSimd1Arg<uint32_t, v128, CRet, CArg>(
1282 reinterpret_cast<fptr>(u32_store_aligned),
1283 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
1284 reinterpret_cast<fptr>(c_u32_store_aligned),
1285 reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
1286 } else if (typeid(CRet) == typeid(uint64_t) &&
1287 typeid(CArg) == typeid(c_v128)) {
1288 // U64_V128
1289 error = CompareSimd1Arg<uint64_t, v128, CRet, CArg>(
1290 reinterpret_cast<fptr>(u64_store_aligned),
1291 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
1292 reinterpret_cast<fptr>(c_u64_store_aligned),
1293 reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001294 } else if (typeid(CRet) == typeid(uint64_t) &&
1295 typeid(CArg) == typeid(c_v256)) {
1296 // U64_V256
1297 error = CompareSimd1Arg<uint64_t, v256, CRet, CArg>(
1298 reinterpret_cast<fptr>(u64_store_aligned),
1299 reinterpret_cast<fptr>(v256_load_aligned), simd, d,
1300 reinterpret_cast<fptr>(c_u64_store_aligned),
1301 reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001302 } else if (typeid(CRet) == typeid(c_v64) &&
1303 typeid(CArg) == typeid(c_v128)) {
1304 // V64_V128
1305 error = CompareSimd1Arg<v64, v128, CRet, CArg>(
1306 reinterpret_cast<fptr>(v64_store_aligned),
1307 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
1308 reinterpret_cast<fptr>(c_v64_store_aligned),
1309 reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
1310 } else if (typeid(CRet) == typeid(c_v128) &&
1311 typeid(CArg) == typeid(c_v128)) {
1312 // V128_V128
1313 error = CompareSimd1Arg<v128, v128, CRet, CArg>(
1314 reinterpret_cast<fptr>(v128_store_aligned),
1315 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
1316 reinterpret_cast<fptr>(c_v128_store_aligned),
1317 reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
1318 } else if (typeid(CRet) == typeid(c_v128) &&
1319 typeid(CArg) == typeid(c_v64)) {
1320 // V128_V64
1321 error = CompareSimd1Arg<v128, v64, CRet, CArg>(
1322 reinterpret_cast<fptr>(v128_store_aligned),
1323 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
1324 reinterpret_cast<fptr>(c_v128_store_aligned),
1325 reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
1326 } else if (typeid(CRet) == typeid(c_v128) &&
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001327 typeid(CArg) == typeid(uint8_t)) {
1328 // V128_U8
1329 error = CompareSimd1Arg<v128, uint8_t, CRet, CArg>(
1330 reinterpret_cast<fptr>(v128_store_aligned),
1331 reinterpret_cast<fptr>(u8_load_aligned), simd, d,
1332 reinterpret_cast<fptr>(c_v128_store_aligned),
1333 reinterpret_cast<fptr>(c_u8_load_aligned), ref_simd, ref_d, s);
1334 } else if (typeid(CRet) == typeid(c_v128) &&
1335 typeid(CArg) == typeid(uint16_t)) {
1336 // V128_U16
1337 error = CompareSimd1Arg<v128, uint16_t, CRet, CArg>(
1338 reinterpret_cast<fptr>(v128_store_aligned),
1339 reinterpret_cast<fptr>(u16_load_aligned), simd, d,
1340 reinterpret_cast<fptr>(c_v128_store_aligned),
1341 reinterpret_cast<fptr>(c_u16_load_aligned), ref_simd, ref_d, s);
1342 } else if (typeid(CRet) == typeid(c_v128) &&
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001343 typeid(CArg) == typeid(uint32_t)) {
1344 // V128_U32
1345 error = CompareSimd1Arg<v128, uint32_t, CRet, CArg>(
1346 reinterpret_cast<fptr>(v128_store_aligned),
1347 reinterpret_cast<fptr>(u32_load_aligned), simd, d,
1348 reinterpret_cast<fptr>(c_v128_store_aligned),
1349 reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001350 } else if (typeid(CRet) == typeid(c_v256) &&
1351 typeid(CArg) == typeid(c_v256)) {
1352 // V256_V256
1353 error = CompareSimd1Arg<v256, v256, CRet, CArg>(
1354 reinterpret_cast<fptr>(v256_store_aligned),
1355 reinterpret_cast<fptr>(v256_load_aligned), simd, d,
1356 reinterpret_cast<fptr>(c_v256_store_aligned),
1357 reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s);
1358 } else if (typeid(CRet) == typeid(c_v256) &&
1359 typeid(CArg) == typeid(c_v128)) {
1360 // V256_V128
1361 error = CompareSimd1Arg<v256, v128, CRet, CArg>(
1362 reinterpret_cast<fptr>(v256_store_aligned),
1363 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
1364 reinterpret_cast<fptr>(c_v256_store_aligned),
1365 reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
1366 } else if (typeid(CRet) == typeid(c_v256) &&
1367 typeid(CArg) == typeid(uint8_t)) {
1368 // V256_U8
1369 error = CompareSimd1Arg<v256, uint8_t, CRet, CArg>(
1370 reinterpret_cast<fptr>(v256_store_aligned),
1371 reinterpret_cast<fptr>(u8_load_aligned), simd, d,
1372 reinterpret_cast<fptr>(c_v256_store_aligned),
1373 reinterpret_cast<fptr>(c_u8_load_aligned), ref_simd, ref_d, s);
1374 } else if (typeid(CRet) == typeid(c_v256) &&
1375 typeid(CArg) == typeid(uint16_t)) {
1376 // V256_U16
1377 error = CompareSimd1Arg<v256, uint16_t, CRet, CArg>(
1378 reinterpret_cast<fptr>(v256_store_aligned),
1379 reinterpret_cast<fptr>(u16_load_aligned), simd, d,
1380 reinterpret_cast<fptr>(c_v256_store_aligned),
1381 reinterpret_cast<fptr>(c_u16_load_aligned), ref_simd, ref_d, s);
1382 } else if (typeid(CRet) == typeid(c_v256) &&
1383 typeid(CArg) == typeid(uint32_t)) {
1384 // V256_U32
1385 error = CompareSimd1Arg<v256, uint32_t, CRet, CArg>(
1386 reinterpret_cast<fptr>(v256_store_aligned),
1387 reinterpret_cast<fptr>(u32_load_aligned), simd, d,
1388 reinterpret_cast<fptr>(c_v256_store_aligned),
1389 reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s);
1390 } else if (typeid(CRet) == typeid(uint32_t) &&
1391 typeid(CArg) == typeid(c_v256)) {
1392 // U32_V256
1393 error = CompareSimd1Arg<uint32_t, v256, CRet, CArg>(
1394 reinterpret_cast<fptr>(u32_store_aligned),
1395 reinterpret_cast<fptr>(v256_load_aligned), simd, d,
1396 reinterpret_cast<fptr>(c_u32_store_aligned),
1397 reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s);
1398 } else if (typeid(CRet) == typeid(c_v64) &&
1399 typeid(CArg) == typeid(c_v256)) {
1400 // V64_V256
1401 error = CompareSimd1Arg<v64, v256, CRet, CArg>(
1402 reinterpret_cast<fptr>(v64_store_aligned),
1403 reinterpret_cast<fptr>(v256_load_aligned), simd, d,
1404 reinterpret_cast<fptr>(c_v64_store_aligned),
1405 reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001406 } else {
1407 FAIL() << "Internal error: Unknown intrinsic function "
1408 << typeid(CRet).name() << " " << name << "(" << typeid(CArg).name()
1409 << ")";
1410 }
1411 }
1412
1413 EXPECT_EQ(0, error) << "Error: mismatch for " << name << "("
Steinar Midtskogen7d532712017-03-19 21:34:47 +01001414 << Print(s, sizeof(s)) << ") -> " << Print(d, sizeof(d))
1415 << " (simd), " << Print(ref_d, sizeof(ref_d)) << " (ref)";
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001416}
1417
1418template <typename CRet, typename CArg1, typename CArg2>
1419void TestSimd2Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
1420 const char *name) {
1421 ACMRandom rnd(ACMRandom::DeterministicSeed());
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001422 fptr ref_simd;
1423 fptr simd;
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001424 int error = 0;
Steinar Midtskogen7d532712017-03-19 21:34:47 +01001425 DECLARE_ALIGNED(32, uint8_t, s1[sizeof(CArg1)]);
1426 DECLARE_ALIGNED(32, uint8_t, s2[sizeof(CArg2)]);
Steinar Midtskogen03ab5272017-01-10 07:30:47 +01001427 DECLARE_ALIGNED(32, uint8_t, d[sizeof(CRet)]);
1428 DECLARE_ALIGNED(32, uint8_t, ref_d[sizeof(CRet)]);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001429 memset(ref_d, 0, sizeof(ref_d));
1430 memset(d, 0, sizeof(d));
1431
1432 Map(name, &ref_simd, &simd);
1433 if (simd == NULL || ref_simd == NULL) {
1434 FAIL() << "Internal error: Unknown intrinsic function " << name;
1435 }
1436
James Zern8c636c12017-02-28 20:56:06 -08001437 for (unsigned int count = 0;
1438 count < iterations && !error && !testing::Test::HasFailure(); count++) {
Steinar Midtskogen7d532712017-03-19 21:34:47 +01001439 for (unsigned int c = 0; c < sizeof(CArg1); c++) s1[c] = rnd.Rand8();
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001440
Steinar Midtskogen7d532712017-03-19 21:34:47 +01001441 for (unsigned int c = 0; c < sizeof(CArg2); c++) s2[c] = rnd.Rand8();
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001442
Steinar Midtskogen7d532712017-03-19 21:34:47 +01001443 if (maskwidth) SetMask(s2, sizeof(CArg2), mask, maskwidth);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001444
1445 if (typeid(CRet) == typeid(c_v64) && typeid(CArg1) == typeid(c_v64) &&
1446 typeid(CArg2) == typeid(c_v64)) {
1447 // V64_V64V64
1448 error = CompareSimd2Args<v64, v64, v64, CRet, CArg1, CArg2>(
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001449 reinterpret_cast<fptr>(v64_store_aligned),
1450 reinterpret_cast<fptr>(v64_load_aligned),
1451 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
1452 reinterpret_cast<fptr>(c_v64_store_aligned),
1453 reinterpret_cast<fptr>(c_v64_load_aligned),
1454 reinterpret_cast<fptr>(c_v64_load_aligned),
1455 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
Steinar Midtskogen04305c62016-09-30 13:14:04 +02001456 } else if (typeid(CRet) == typeid(c_v64) &&
1457 typeid(CArg1) == typeid(uint32_t) &&
1458 typeid(CArg2) == typeid(uint32_t)) {
1459 // V64_U32U32
1460 error = CompareSimd2Args<v64, uint32_t, uint32_t, CRet, CArg1, CArg2>(
1461 reinterpret_cast<fptr>(v64_store_aligned),
1462 reinterpret_cast<fptr>(u32_load_aligned),
1463 reinterpret_cast<fptr>(u32_load_aligned), simd, d,
1464 reinterpret_cast<fptr>(c_v64_store_aligned),
1465 reinterpret_cast<fptr>(c_u32_load_aligned),
1466 reinterpret_cast<fptr>(c_u32_load_aligned),
1467 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001468 } else if (typeid(CRet) == typeid(uint32_t) &&
1469 typeid(CArg1) == typeid(c_v64) &&
1470 typeid(CArg2) == typeid(c_v64)) {
1471 // U32_V64V64
1472 error = CompareSimd2Args<uint32_t, v64, v64, CRet, CArg1, CArg2>(
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001473 reinterpret_cast<fptr>(u32_store_aligned),
1474 reinterpret_cast<fptr>(v64_load_aligned),
1475 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
1476 reinterpret_cast<fptr>(c_u32_store_aligned),
1477 reinterpret_cast<fptr>(c_v64_load_aligned),
1478 reinterpret_cast<fptr>(c_v64_load_aligned),
1479 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001480 } else if (typeid(CRet) == typeid(int64_t) &&
1481 typeid(CArg1) == typeid(c_v64) &&
1482 typeid(CArg2) == typeid(c_v64)) {
1483 // S64_V64V64
1484 error = CompareSimd2Args<int64_t, v64, v64, CRet, CArg1, CArg2>(
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001485 reinterpret_cast<fptr>(s64_store_aligned),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001486 reinterpret_cast<fptr>(v64_load_aligned),
1487 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001488 reinterpret_cast<fptr>(c_s64_store_aligned),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001489 reinterpret_cast<fptr>(c_v64_load_aligned),
1490 reinterpret_cast<fptr>(c_v64_load_aligned),
1491 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001492 } else if (typeid(CRet) == typeid(c_v64) &&
1493 typeid(CArg1) == typeid(c_v64) &&
1494 typeid(CArg2) == typeid(uint32_t)) {
1495 // V64_V64U32
1496 error = CompareSimd2Args<v64, v64, uint32_t, CRet, CArg1, CArg2>(
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001497 reinterpret_cast<fptr>(v64_store_aligned),
1498 reinterpret_cast<fptr>(v64_load_aligned),
1499 reinterpret_cast<fptr>(u32_load_aligned), simd, d,
1500 reinterpret_cast<fptr>(c_v64_store_aligned),
1501 reinterpret_cast<fptr>(c_v64_load_aligned),
1502 reinterpret_cast<fptr>(c_u32_load_aligned),
1503 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001504 } else if (typeid(CRet) == typeid(c_v128) &&
1505 typeid(CArg1) == typeid(c_v128) &&
1506 typeid(CArg2) == typeid(c_v128)) {
1507 // V128_V128V128
1508 error = CompareSimd2Args<v128, v128, v128, CRet, CArg1, CArg2>(
1509 reinterpret_cast<fptr>(v128_store_aligned),
1510 reinterpret_cast<fptr>(v128_load_aligned),
1511 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
1512 reinterpret_cast<fptr>(c_v128_store_aligned),
1513 reinterpret_cast<fptr>(c_v128_load_aligned),
1514 reinterpret_cast<fptr>(c_v128_load_aligned),
1515 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
1516 } else if (typeid(CRet) == typeid(uint32_t) &&
1517 typeid(CArg1) == typeid(c_v128) &&
1518 typeid(CArg2) == typeid(c_v128)) {
1519 // U32_V128V128
1520 error = CompareSimd2Args<uint32_t, v128, v128, CRet, CArg1, CArg2>(
1521 reinterpret_cast<fptr>(u32_store_aligned),
1522 reinterpret_cast<fptr>(v128_load_aligned),
1523 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
1524 reinterpret_cast<fptr>(c_u32_store_aligned),
1525 reinterpret_cast<fptr>(c_v128_load_aligned),
1526 reinterpret_cast<fptr>(c_v128_load_aligned),
1527 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
1528 } else if (typeid(CRet) == typeid(int64_t) &&
1529 typeid(CArg1) == typeid(c_v128) &&
1530 typeid(CArg2) == typeid(c_v128)) {
1531 // S64_V128V128
1532 error = CompareSimd2Args<int64_t, v128, v128, CRet, CArg1, CArg2>(
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001533 reinterpret_cast<fptr>(s64_store_aligned),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001534 reinterpret_cast<fptr>(v128_load_aligned),
1535 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001536 reinterpret_cast<fptr>(c_s64_store_aligned),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001537 reinterpret_cast<fptr>(c_v128_load_aligned),
1538 reinterpret_cast<fptr>(c_v128_load_aligned),
1539 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
1540 } else if (typeid(CRet) == typeid(c_v128) &&
1541 typeid(CArg1) == typeid(uint64_t) &&
1542 typeid(CArg2) == typeid(uint64_t)) {
1543 // V128_U64U64
1544 error = CompareSimd2Args<v128, uint64_t, uint64_t, CRet, CArg1, CArg2>(
1545 reinterpret_cast<fptr>(v128_store_aligned),
1546 reinterpret_cast<fptr>(u64_load_aligned),
1547 reinterpret_cast<fptr>(u64_load_aligned), simd, d,
1548 reinterpret_cast<fptr>(c_v128_store_aligned),
1549 reinterpret_cast<fptr>(c_u64_load_aligned),
1550 reinterpret_cast<fptr>(c_u64_load_aligned),
1551 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
1552 } else if (typeid(CRet) == typeid(c_v128) &&
1553 typeid(CArg1) == typeid(c_v64) &&
1554 typeid(CArg2) == typeid(c_v64)) {
1555 // V128_V64V64
1556 error = CompareSimd2Args<v128, v64, v64, CRet, CArg1, CArg2>(
1557 reinterpret_cast<fptr>(v128_store_aligned),
1558 reinterpret_cast<fptr>(v64_load_aligned),
1559 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
1560 reinterpret_cast<fptr>(c_v128_store_aligned),
1561 reinterpret_cast<fptr>(c_v64_load_aligned),
1562 reinterpret_cast<fptr>(c_v64_load_aligned),
1563 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
1564 } else if (typeid(CRet) == typeid(c_v128) &&
1565 typeid(CArg1) == typeid(c_v128) &&
1566 typeid(CArg2) == typeid(uint32_t)) {
1567 // V128_V128U32
1568 error = CompareSimd2Args<v128, v128, uint32_t, CRet, CArg1, CArg2>(
1569 reinterpret_cast<fptr>(v128_store_aligned),
1570 reinterpret_cast<fptr>(v128_load_aligned),
1571 reinterpret_cast<fptr>(u32_load_aligned), simd, d,
1572 reinterpret_cast<fptr>(c_v128_store_aligned),
1573 reinterpret_cast<fptr>(c_v128_load_aligned),
1574 reinterpret_cast<fptr>(c_u32_load_aligned),
1575 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001576 } else if (typeid(CRet) == typeid(c_v256) &&
1577 typeid(CArg1) == typeid(c_v256) &&
1578 typeid(CArg2) == typeid(c_v256)) {
1579 // V256_V256V256
1580 error = CompareSimd2Args<v256, v256, v256, CRet, CArg1, CArg2>(
1581 reinterpret_cast<fptr>(v256_store_aligned),
1582 reinterpret_cast<fptr>(v256_load_aligned),
1583 reinterpret_cast<fptr>(v256_load_aligned), simd, d,
1584 reinterpret_cast<fptr>(c_v256_store_aligned),
1585 reinterpret_cast<fptr>(c_v256_load_aligned),
1586 reinterpret_cast<fptr>(c_v256_load_aligned),
1587 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
1588 } else if (typeid(CRet) == typeid(int64_t) &&
1589 typeid(CArg1) == typeid(c_v256) &&
1590 typeid(CArg2) == typeid(c_v256)) {
1591 // S64_V256V256
1592 error = CompareSimd2Args<int64_t, v256, v256, CRet, CArg1, CArg2>(
James Zern910f479b2017-06-30 17:13:41 -07001593 reinterpret_cast<fptr>(s64_store_aligned),
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001594 reinterpret_cast<fptr>(v256_load_aligned),
1595 reinterpret_cast<fptr>(v256_load_aligned), simd, d,
James Zern910f479b2017-06-30 17:13:41 -07001596 reinterpret_cast<fptr>(c_s64_store_aligned),
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001597 reinterpret_cast<fptr>(c_v256_load_aligned),
1598 reinterpret_cast<fptr>(c_v256_load_aligned),
1599 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
1600 } else if (typeid(CRet) == typeid(uint32_t) &&
1601 typeid(CArg1) == typeid(c_v256) &&
1602 typeid(CArg2) == typeid(c_v256)) {
1603 // U32_V256V256
1604 error = CompareSimd2Args<uint32_t, v256, v256, CRet, CArg1, CArg2>(
1605 reinterpret_cast<fptr>(u32_store_aligned),
1606 reinterpret_cast<fptr>(v256_load_aligned),
1607 reinterpret_cast<fptr>(v256_load_aligned), simd, d,
1608 reinterpret_cast<fptr>(c_u32_store_aligned),
1609 reinterpret_cast<fptr>(c_v256_load_aligned),
1610 reinterpret_cast<fptr>(c_v256_load_aligned),
1611 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
1612 } else if (typeid(CRet) == typeid(c_v256) &&
1613 typeid(CArg1) == typeid(c_v128) &&
1614 typeid(CArg2) == typeid(c_v128)) {
1615 // V256_V128V128
1616 error = CompareSimd2Args<v256, v128, v128, CRet, CArg1, CArg2>(
1617 reinterpret_cast<fptr>(v256_store_aligned),
1618 reinterpret_cast<fptr>(v128_load_aligned),
1619 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
1620 reinterpret_cast<fptr>(c_v256_store_aligned),
1621 reinterpret_cast<fptr>(c_v128_load_aligned),
1622 reinterpret_cast<fptr>(c_v128_load_aligned),
1623 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
1624 } else if (typeid(CRet) == typeid(c_v256) &&
1625 typeid(CArg1) == typeid(c_v256) &&
1626 typeid(CArg2) == typeid(uint32_t)) {
1627 // V256_V256U32
1628 error = CompareSimd2Args<v256, v256, uint32_t, CRet, CArg1, CArg2>(
1629 reinterpret_cast<fptr>(v256_store_aligned),
1630 reinterpret_cast<fptr>(v256_load_aligned),
1631 reinterpret_cast<fptr>(u32_load_aligned), simd, d,
1632 reinterpret_cast<fptr>(c_v256_store_aligned),
1633 reinterpret_cast<fptr>(c_v256_load_aligned),
1634 reinterpret_cast<fptr>(c_u32_load_aligned),
1635 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
1636
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001637 } else {
1638 FAIL() << "Internal error: Unknown intrinsic function "
1639 << typeid(CRet).name() << " " << name << "("
1640 << typeid(CArg1).name() << ", " << typeid(CArg2).name() << ")";
1641 }
1642 }
1643
1644 EXPECT_EQ(0, error) << "Error: mismatch for " << name << "("
Steinar Midtskogen7d532712017-03-19 21:34:47 +01001645 << Print(s1, sizeof(s1)) << ", " << Print(s2, sizeof(s2))
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001646 << ") -> " << Print(d, sizeof(d)) << " (simd), "
1647 << Print(ref_d, sizeof(ref_d)) << " (ref)";
1648}
1649
1650// Instantiations to make the functions callable from another files
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001651template void TestSimd1Arg<c_v64, uint8_t>(uint32_t, uint32_t, uint32_t,
1652 const char *);
1653template void TestSimd1Arg<c_v64, uint16_t>(uint32_t, uint32_t, uint32_t,
1654 const char *);
Steinar Midtskogen04305c62016-09-30 13:14:04 +02001655template void TestSimd1Arg<c_v64, uint32_t>(uint32_t, uint32_t, uint32_t,
1656 const char *);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001657template void TestSimd1Arg<c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
1658 const char *);
Steinar Midtskogen04305c62016-09-30 13:14:04 +02001659template void TestSimd1Arg<uint32_t, c_v64>(uint32_t, uint32_t, uint32_t,
1660 const char *);
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001661template void TestSimd1Arg<int32_t, c_v64>(uint32_t, uint32_t, uint32_t,
1662 const char *);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001663template void TestSimd1Arg<uint64_t, c_v64>(uint32_t, uint32_t, uint32_t,
1664 const char *);
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001665template void TestSimd1Arg<int64_t, c_v64>(uint32_t, uint32_t, uint32_t,
1666 const char *);
Steinar Midtskogen04305c62016-09-30 13:14:04 +02001667template void TestSimd2Args<c_v64, uint32_t, uint32_t>(uint32_t, uint32_t,
1668 uint32_t, const char *);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001669template void TestSimd2Args<c_v64, c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
1670 const char *);
1671template void TestSimd2Args<c_v64, c_v64, uint32_t>(uint32_t, uint32_t,
1672 uint32_t, const char *);
1673template void TestSimd2Args<int64_t, c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
1674 const char *);
1675template void TestSimd2Args<uint32_t, c_v64, c_v64>(uint32_t, uint32_t,
1676 uint32_t, const char *);
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001677template void TestSimd1Arg<c_v128, c_v128>(uint32_t, uint32_t, uint32_t,
1678 const char *);
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001679template void TestSimd1Arg<c_v128, uint8_t>(uint32_t, uint32_t, uint32_t,
1680 const char *);
1681template void TestSimd1Arg<c_v128, uint16_t>(uint32_t, uint32_t, uint32_t,
1682 const char *);
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001683template void TestSimd1Arg<c_v128, uint32_t>(uint32_t, uint32_t, uint32_t,
1684 const char *);
1685template void TestSimd1Arg<c_v128, c_v64>(uint32_t, uint32_t, uint32_t,
1686 const char *);
1687template void TestSimd1Arg<uint32_t, c_v128>(uint32_t, uint32_t, uint32_t,
1688 const char *);
1689template void TestSimd1Arg<uint64_t, c_v128>(uint32_t, uint32_t, uint32_t,
1690 const char *);
1691template void TestSimd1Arg<c_v64, c_v128>(uint32_t, uint32_t, uint32_t,
1692 const char *);
1693template void TestSimd2Args<c_v128, c_v128, c_v128>(uint32_t, uint32_t,
1694 uint32_t, const char *);
1695template void TestSimd2Args<c_v128, c_v128, uint32_t>(uint32_t, uint32_t,
1696 uint32_t, const char *);
1697template void TestSimd2Args<c_v128, uint64_t, uint64_t>(uint32_t, uint32_t,
1698 uint32_t, const char *);
1699template void TestSimd2Args<c_v128, c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
1700 const char *);
1701template void TestSimd2Args<int64_t, c_v128, c_v128>(uint32_t, uint32_t,
1702 uint32_t, const char *);
1703template void TestSimd2Args<uint32_t, c_v128, c_v128>(uint32_t, uint32_t,
1704 uint32_t, const char *);
Steinar Midtskogen1e424362016-09-30 13:14:04 +02001705template void TestSimd1Arg<c_v256, c_v128>(uint32_t, uint32_t, uint32_t,
1706 const char *);
1707template void TestSimd1Arg<c_v256, c_v256>(uint32_t, uint32_t, uint32_t,
1708 const char *);
1709template void TestSimd1Arg<uint64_t, c_v256>(uint32_t, uint32_t, uint32_t,
1710 const char *);
1711template void TestSimd1Arg<c_v256, uint8_t>(uint32_t, uint32_t, uint32_t,
1712 const char *);
1713template void TestSimd1Arg<c_v256, uint16_t>(uint32_t, uint32_t, uint32_t,
1714 const char *);
1715template void TestSimd1Arg<c_v256, uint32_t>(uint32_t, uint32_t, uint32_t,
1716 const char *);
1717template void TestSimd1Arg<uint32_t, c_v256>(uint32_t, uint32_t, uint32_t,
1718 const char *);
1719template void TestSimd1Arg<c_v64, c_v256>(uint32_t, uint32_t, uint32_t,
1720 const char *);
1721template void TestSimd2Args<c_v256, c_v128, c_v128>(uint32_t, uint32_t,
1722 uint32_t, const char *);
1723template void TestSimd2Args<c_v256, c_v256, c_v256>(uint32_t, uint32_t,
1724 uint32_t, const char *);
1725template void TestSimd2Args<c_v256, c_v256, uint32_t>(uint32_t, uint32_t,
1726 uint32_t, const char *);
1727template void TestSimd2Args<int64_t, c_v256, c_v256>(uint32_t, uint32_t,
1728 uint32_t, const char *);
1729template void TestSimd2Args<uint32_t, c_v256, c_v256>(uint32_t, uint32_t,
1730 uint32_t, const char *);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001731
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001732} // namespace SIMD_NAMESPACE