blob: 28bd64a5bc022143f70a0635b87056479a679ec6 [file] [log] [blame]
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001/*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10*/
11
12#include <assert.h>
13#include <string>
14#include "./aom_dsp_rtcd.h"
15#include "test/acm_random.h"
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +010016#include "aom_dsp/aom_simd.h"
Steinar Midtskogen04305c62016-09-30 13:14:04 +020017#undef SIMD_INLINE
18#define SIMD_INLINE static // Don't enforce inlining
Steinar Midtskogen82d580c2016-09-30 13:14:04 +020019#include "aom_dsp/simd/v128_intrinsics_c.h"
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +010020
21// Machine tuned code goes into this file. This file is included from
22// simd_cmp_sse2.cc, simd_cmp_ssse3.cc etc which define the macros
23// ARCH (=neon, sse2, ssse3, etc), SIMD_NAMESPACE and ARCH_POSTFIX().
24
25using libaom_test::ACMRandom;
26
27namespace SIMD_NAMESPACE {
28
29// Wrap templates around intrinsics using immediate values
30template <int shift>
31v64 imm_v64_shl_n_byte(v64 a) {
32 return v64_shl_n_byte(a, shift);
33}
34template <int shift>
35v64 imm_v64_shr_n_byte(v64 a) {
36 return v64_shr_n_byte(a, shift);
37}
38template <int shift>
39v64 imm_v64_shl_n_8(v64 a) {
40 return v64_shl_n_8(a, shift);
41}
42template <int shift>
43v64 imm_v64_shr_n_u8(v64 a) {
44 return v64_shr_n_u8(a, shift);
45}
46template <int shift>
47v64 imm_v64_shr_n_s8(v64 a) {
48 return v64_shr_n_s8(a, shift);
49}
50template <int shift>
51v64 imm_v64_shl_n_16(v64 a) {
52 return v64_shl_n_16(a, shift);
53}
54template <int shift>
55v64 imm_v64_shr_n_u16(v64 a) {
56 return v64_shr_n_u16(a, shift);
57}
58template <int shift>
59v64 imm_v64_shr_n_s16(v64 a) {
60 return v64_shr_n_s16(a, shift);
61}
62template <int shift>
63v64 imm_v64_shl_n_32(v64 a) {
64 return v64_shl_n_32(a, shift);
65}
66template <int shift>
67v64 imm_v64_shr_n_u32(v64 a) {
68 return v64_shr_n_u32(a, shift);
69}
70template <int shift>
71v64 imm_v64_shr_n_s32(v64 a) {
72 return v64_shr_n_s32(a, shift);
73}
74template <int shift>
75v64 imm_v64_align(v64 a, v64 b) {
76 return v64_align(a, b, shift);
77}
78
79// Wrap templates around corresponding C implementations of the above
80template <int shift>
81c_v64 c_imm_v64_shl_n_byte(c_v64 a) {
82 return c_v64_shl_n_byte(a, shift);
83}
84template <int shift>
85c_v64 c_imm_v64_shr_n_byte(c_v64 a) {
86 return c_v64_shr_n_byte(a, shift);
87}
88template <int shift>
89c_v64 c_imm_v64_shl_n_8(c_v64 a) {
90 return c_v64_shl_n_8(a, shift);
91}
92template <int shift>
93c_v64 c_imm_v64_shr_n_u8(c_v64 a) {
94 return c_v64_shr_n_u8(a, shift);
95}
96template <int shift>
97c_v64 c_imm_v64_shr_n_s8(c_v64 a) {
98 return c_v64_shr_n_s8(a, shift);
99}
100template <int shift>
101c_v64 c_imm_v64_shl_n_16(c_v64 a) {
102 return c_v64_shl_n_16(a, shift);
103}
104template <int shift>
105c_v64 c_imm_v64_shr_n_u16(c_v64 a) {
106 return c_v64_shr_n_u16(a, shift);
107}
108template <int shift>
109c_v64 c_imm_v64_shr_n_s16(c_v64 a) {
110 return c_v64_shr_n_s16(a, shift);
111}
112template <int shift>
113c_v64 c_imm_v64_shl_n_32(c_v64 a) {
114 return c_v64_shl_n_32(a, shift);
115}
116template <int shift>
117c_v64 c_imm_v64_shr_n_u32(c_v64 a) {
118 return c_v64_shr_n_u32(a, shift);
119}
120template <int shift>
121c_v64 c_imm_v64_shr_n_s32(c_v64 a) {
122 return c_v64_shr_n_s32(a, shift);
123}
124template <int shift>
125c_v64 c_imm_v64_align(c_v64 a, c_v64 b) {
126 return c_v64_align(a, b, shift);
127}
128
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200129template <int shift>
130v128 imm_v128_shl_n_byte(v128 a) {
131 return v128_shl_n_byte(a, shift);
132}
133template <int shift>
134v128 imm_v128_shr_n_byte(v128 a) {
135 return v128_shr_n_byte(a, shift);
136}
137template <int shift>
138v128 imm_v128_shl_n_8(v128 a) {
139 return v128_shl_n_8(a, shift);
140}
141template <int shift>
142v128 imm_v128_shr_n_u8(v128 a) {
143 return v128_shr_n_u8(a, shift);
144}
145template <int shift>
146v128 imm_v128_shr_n_s8(v128 a) {
147 return v128_shr_n_s8(a, shift);
148}
149template <int shift>
150v128 imm_v128_shl_n_16(v128 a) {
151 return v128_shl_n_16(a, shift);
152}
153template <int shift>
154v128 imm_v128_shr_n_u16(v128 a) {
155 return v128_shr_n_u16(a, shift);
156}
157template <int shift>
158v128 imm_v128_shr_n_s16(v128 a) {
159 return v128_shr_n_s16(a, shift);
160}
161template <int shift>
162v128 imm_v128_shl_n_32(v128 a) {
163 return v128_shl_n_32(a, shift);
164}
165template <int shift>
166v128 imm_v128_shr_n_u32(v128 a) {
167 return v128_shr_n_u32(a, shift);
168}
169template <int shift>
170v128 imm_v128_shr_n_s32(v128 a) {
171 return v128_shr_n_s32(a, shift);
172}
173template <int shift>
174v128 imm_v128_align(v128 a, v128 b) {
175 return v128_align(a, b, shift);
176}
177
178template <int shift>
179c_v128 c_imm_v128_shl_n_byte(c_v128 a) {
180 return c_v128_shl_n_byte(a, shift);
181}
182template <int shift>
183c_v128 c_imm_v128_shr_n_byte(c_v128 a) {
184 return c_v128_shr_n_byte(a, shift);
185}
186template <int shift>
187c_v128 c_imm_v128_shl_n_8(c_v128 a) {
188 return c_v128_shl_n_8(a, shift);
189}
190template <int shift>
191c_v128 c_imm_v128_shr_n_u8(c_v128 a) {
192 return c_v128_shr_n_u8(a, shift);
193}
194template <int shift>
195c_v128 c_imm_v128_shr_n_s8(c_v128 a) {
196 return c_v128_shr_n_s8(a, shift);
197}
198template <int shift>
199c_v128 c_imm_v128_shl_n_16(c_v128 a) {
200 return c_v128_shl_n_16(a, shift);
201}
202template <int shift>
203c_v128 c_imm_v128_shr_n_u16(c_v128 a) {
204 return c_v128_shr_n_u16(a, shift);
205}
206template <int shift>
207c_v128 c_imm_v128_shr_n_s16(c_v128 a) {
208 return c_v128_shr_n_s16(a, shift);
209}
210template <int shift>
211c_v128 c_imm_v128_shl_n_32(c_v128 a) {
212 return c_v128_shl_n_32(a, shift);
213}
214template <int shift>
215c_v128 c_imm_v128_shr_n_u32(c_v128 a) {
216 return c_v128_shr_n_u32(a, shift);
217}
218template <int shift>
219c_v128 c_imm_v128_shr_n_s32(c_v128 a) {
220 return c_v128_shr_n_s32(a, shift);
221}
222template <int shift>
223c_v128 c_imm_v128_align(c_v128 a, c_v128 b) {
224 return c_v128_align(a, b, shift);
225}
226
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100227// Wrappers around the the SAD and SSD functions
228uint32_t v64_sad_u8(v64 a, v64 b) {
229 return v64_sad_u8_sum(::v64_sad_u8(v64_sad_u8_init(), a, b));
230}
231uint32_t v64_ssd_u8(v64 a, v64 b) {
232 return v64_ssd_u8_sum(::v64_ssd_u8(v64_ssd_u8_init(), a, b));
233}
234
235uint32_t c_v64_sad_u8(c_v64 a, c_v64 b) {
236 return c_v64_sad_u8_sum(::c_v64_sad_u8(c_v64_sad_u8_init(), a, b));
237}
238uint32_t c_v64_ssd_u8(c_v64 a, c_v64 b) {
239 return c_v64_ssd_u8_sum(::c_v64_ssd_u8(c_v64_ssd_u8_init(), a, b));
240}
Steinar Midtskogen6c795762017-03-07 20:55:48 +0100241uint32_t v128_sad_u8(v128 a, v128 b) {
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200242 return v128_sad_u8_sum(::v128_sad_u8(v128_sad_u8_init(), a, b));
243}
244uint32_t v128_ssd_u8(v128 a, v128 b) {
245 return v128_ssd_u8_sum(::v128_ssd_u8(v128_ssd_u8_init(), a, b));
246}
247uint32_t c_v128_sad_u8(c_v128 a, c_v128 b) {
248 return c_v128_sad_u8_sum(::c_v128_sad_u8(c_v128_sad_u8_init(), a, b));
249}
250uint32_t c_v128_ssd_u8(c_v128 a, c_v128 b) {
251 return c_v128_ssd_u8_sum(::c_v128_ssd_u8(c_v128_ssd_u8_init(), a, b));
252}
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100253
254namespace {
255
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100256typedef void (*fptr)();
257
258typedef struct {
259 const char *name;
260 fptr ref;
261 fptr simd;
262} mapping;
263
264#define MAP(name) \
265 { \
266 #name, reinterpret_cast < fptr > (c_##name), \
267 reinterpret_cast < fptr > (name) \
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100268 }
269
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100270const mapping m[] = { MAP(v64_sad_u8),
271 MAP(v64_ssd_u8),
272 MAP(v64_add_8),
273 MAP(v64_add_16),
274 MAP(v64_sadd_s16),
275 MAP(v64_add_32),
276 MAP(v64_sub_8),
277 MAP(v64_ssub_u8),
278 MAP(v64_ssub_s8),
279 MAP(v64_sub_16),
280 MAP(v64_ssub_s16),
Steinar Midtskogen9b8444a2017-03-31 22:11:06 +0200281 MAP(v64_ssub_u16),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100282 MAP(v64_sub_32),
283 MAP(v64_ziplo_8),
284 MAP(v64_ziphi_8),
285 MAP(v64_ziplo_16),
286 MAP(v64_ziphi_16),
287 MAP(v64_ziplo_32),
288 MAP(v64_ziphi_32),
289 MAP(v64_pack_s32_s16),
290 MAP(v64_pack_s16_u8),
291 MAP(v64_pack_s16_s8),
292 MAP(v64_unziphi_8),
293 MAP(v64_unziplo_8),
294 MAP(v64_unziphi_16),
295 MAP(v64_unziplo_16),
296 MAP(v64_or),
297 MAP(v64_xor),
298 MAP(v64_and),
299 MAP(v64_andn),
300 MAP(v64_mullo_s16),
301 MAP(v64_mulhi_s16),
302 MAP(v64_mullo_s32),
303 MAP(v64_madd_s16),
304 MAP(v64_madd_us8),
305 MAP(v64_avg_u8),
306 MAP(v64_rdavg_u8),
307 MAP(v64_avg_u16),
308 MAP(v64_min_u8),
309 MAP(v64_max_u8),
310 MAP(v64_min_s8),
311 MAP(v64_max_s8),
312 MAP(v64_min_s16),
313 MAP(v64_max_s16),
314 MAP(v64_cmpgt_s8),
315 MAP(v64_cmplt_s8),
316 MAP(v64_cmpeq_8),
317 MAP(v64_cmpgt_s16),
318 MAP(v64_cmplt_s16),
319 MAP(v64_cmpeq_16),
320 MAP(v64_shuffle_8),
321 MAP(imm_v64_align<1>),
322 MAP(imm_v64_align<2>),
323 MAP(imm_v64_align<3>),
324 MAP(imm_v64_align<4>),
325 MAP(imm_v64_align<5>),
326 MAP(imm_v64_align<6>),
327 MAP(imm_v64_align<7>),
Steinar Midtskogen6033fb82017-04-02 21:32:41 +0200328 MAP(v64_abs_s8),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100329 MAP(v64_abs_s16),
330 MAP(v64_unpacklo_u8_s16),
331 MAP(v64_unpackhi_u8_s16),
Steinar Midtskogen1b2b7392017-04-11 14:19:20 +0200332 MAP(v64_unpacklo_s8_s16),
333 MAP(v64_unpackhi_s8_s16),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100334 MAP(v64_unpacklo_u16_s32),
335 MAP(v64_unpacklo_s16_s32),
336 MAP(v64_unpackhi_u16_s32),
337 MAP(v64_unpackhi_s16_s32),
338 MAP(imm_v64_shr_n_byte<1>),
339 MAP(imm_v64_shr_n_byte<2>),
340 MAP(imm_v64_shr_n_byte<3>),
341 MAP(imm_v64_shr_n_byte<4>),
342 MAP(imm_v64_shr_n_byte<5>),
343 MAP(imm_v64_shr_n_byte<6>),
344 MAP(imm_v64_shr_n_byte<7>),
345 MAP(imm_v64_shl_n_byte<1>),
346 MAP(imm_v64_shl_n_byte<2>),
347 MAP(imm_v64_shl_n_byte<3>),
348 MAP(imm_v64_shl_n_byte<4>),
349 MAP(imm_v64_shl_n_byte<5>),
350 MAP(imm_v64_shl_n_byte<6>),
351 MAP(imm_v64_shl_n_byte<7>),
352 MAP(imm_v64_shl_n_8<1>),
353 MAP(imm_v64_shl_n_8<2>),
354 MAP(imm_v64_shl_n_8<3>),
355 MAP(imm_v64_shl_n_8<4>),
356 MAP(imm_v64_shl_n_8<5>),
357 MAP(imm_v64_shl_n_8<6>),
358 MAP(imm_v64_shl_n_8<7>),
359 MAP(imm_v64_shr_n_u8<1>),
360 MAP(imm_v64_shr_n_u8<2>),
361 MAP(imm_v64_shr_n_u8<3>),
362 MAP(imm_v64_shr_n_u8<4>),
363 MAP(imm_v64_shr_n_u8<5>),
364 MAP(imm_v64_shr_n_u8<6>),
365 MAP(imm_v64_shr_n_u8<7>),
366 MAP(imm_v64_shr_n_s8<1>),
367 MAP(imm_v64_shr_n_s8<2>),
368 MAP(imm_v64_shr_n_s8<3>),
369 MAP(imm_v64_shr_n_s8<4>),
370 MAP(imm_v64_shr_n_s8<5>),
371 MAP(imm_v64_shr_n_s8<6>),
372 MAP(imm_v64_shr_n_s8<7>),
373 MAP(imm_v64_shl_n_16<1>),
374 MAP(imm_v64_shl_n_16<2>),
375 MAP(imm_v64_shl_n_16<4>),
376 MAP(imm_v64_shl_n_16<6>),
377 MAP(imm_v64_shl_n_16<8>),
378 MAP(imm_v64_shl_n_16<10>),
379 MAP(imm_v64_shl_n_16<12>),
380 MAP(imm_v64_shl_n_16<14>),
381 MAP(imm_v64_shr_n_u16<1>),
382 MAP(imm_v64_shr_n_u16<2>),
383 MAP(imm_v64_shr_n_u16<4>),
384 MAP(imm_v64_shr_n_u16<6>),
385 MAP(imm_v64_shr_n_u16<8>),
386 MAP(imm_v64_shr_n_u16<10>),
387 MAP(imm_v64_shr_n_u16<12>),
388 MAP(imm_v64_shr_n_u16<14>),
389 MAP(imm_v64_shr_n_s16<1>),
390 MAP(imm_v64_shr_n_s16<2>),
391 MAP(imm_v64_shr_n_s16<4>),
392 MAP(imm_v64_shr_n_s16<6>),
393 MAP(imm_v64_shr_n_s16<8>),
394 MAP(imm_v64_shr_n_s16<10>),
395 MAP(imm_v64_shr_n_s16<12>),
396 MAP(imm_v64_shr_n_s16<14>),
397 MAP(imm_v64_shl_n_32<1>),
398 MAP(imm_v64_shl_n_32<4>),
399 MAP(imm_v64_shl_n_32<8>),
400 MAP(imm_v64_shl_n_32<12>),
401 MAP(imm_v64_shl_n_32<16>),
402 MAP(imm_v64_shl_n_32<20>),
403 MAP(imm_v64_shl_n_32<24>),
404 MAP(imm_v64_shl_n_32<28>),
405 MAP(imm_v64_shr_n_u32<1>),
406 MAP(imm_v64_shr_n_u32<4>),
407 MAP(imm_v64_shr_n_u32<8>),
408 MAP(imm_v64_shr_n_u32<12>),
409 MAP(imm_v64_shr_n_u32<16>),
410 MAP(imm_v64_shr_n_u32<20>),
411 MAP(imm_v64_shr_n_u32<24>),
412 MAP(imm_v64_shr_n_u32<28>),
413 MAP(imm_v64_shr_n_s32<1>),
414 MAP(imm_v64_shr_n_s32<4>),
415 MAP(imm_v64_shr_n_s32<8>),
416 MAP(imm_v64_shr_n_s32<12>),
417 MAP(imm_v64_shr_n_s32<16>),
418 MAP(imm_v64_shr_n_s32<20>),
419 MAP(imm_v64_shr_n_s32<24>),
420 MAP(imm_v64_shr_n_s32<28>),
421 MAP(v64_shl_8),
422 MAP(v64_shr_u8),
423 MAP(v64_shr_s8),
424 MAP(v64_shl_16),
425 MAP(v64_shr_u16),
426 MAP(v64_shr_s16),
427 MAP(v64_shl_32),
428 MAP(v64_shr_u32),
429 MAP(v64_shr_s32),
430 MAP(v64_hadd_u8),
431 MAP(v64_hadd_s16),
432 MAP(v64_dotp_s16),
Steinar Midtskogen04305c62016-09-30 13:14:04 +0200433 MAP(v64_dotp_su8),
434 MAP(v64_u64),
435 MAP(v64_low_u32),
436 MAP(v64_high_u32),
437 MAP(v64_low_s32),
438 MAP(v64_high_s32),
439 MAP(v64_dup_8),
440 MAP(v64_dup_16),
441 MAP(v64_dup_32),
442 MAP(v64_from_32),
443 MAP(v64_zero),
444 MAP(v64_from_16),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200445 MAP(v128_sad_u8),
446 MAP(v128_ssd_u8),
447 MAP(v128_add_8),
448 MAP(v128_add_16),
449 MAP(v128_sadd_s16),
450 MAP(v128_add_32),
451 MAP(v128_sub_8),
452 MAP(v128_ssub_u8),
453 MAP(v128_ssub_s8),
454 MAP(v128_sub_16),
455 MAP(v128_ssub_s16),
Steinar Midtskogen9b8444a2017-03-31 22:11:06 +0200456 MAP(v128_ssub_u16),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200457 MAP(v128_sub_32),
458 MAP(v128_ziplo_8),
459 MAP(v128_ziphi_8),
460 MAP(v128_ziplo_16),
461 MAP(v128_ziphi_16),
462 MAP(v128_ziplo_32),
463 MAP(v128_ziphi_32),
464 MAP(v128_ziplo_64),
465 MAP(v128_ziphi_64),
466 MAP(v128_unziphi_8),
467 MAP(v128_unziplo_8),
468 MAP(v128_unziphi_16),
469 MAP(v128_unziplo_16),
470 MAP(v128_unziphi_32),
471 MAP(v128_unziplo_32),
472 MAP(v128_pack_s32_s16),
473 MAP(v128_pack_s16_u8),
474 MAP(v128_pack_s16_s8),
475 MAP(v128_or),
476 MAP(v128_xor),
477 MAP(v128_and),
478 MAP(v128_andn),
479 MAP(v128_mullo_s16),
480 MAP(v128_mulhi_s16),
481 MAP(v128_mullo_s32),
482 MAP(v128_madd_s16),
483 MAP(v128_madd_us8),
484 MAP(v128_avg_u8),
485 MAP(v128_rdavg_u8),
486 MAP(v128_avg_u16),
487 MAP(v128_min_u8),
488 MAP(v128_max_u8),
489 MAP(v128_min_s8),
490 MAP(v128_max_s8),
491 MAP(v128_min_s16),
492 MAP(v128_max_s16),
493 MAP(v128_cmpgt_s8),
494 MAP(v128_cmplt_s8),
495 MAP(v128_cmpeq_8),
496 MAP(v128_cmpgt_s16),
497 MAP(v128_cmpeq_16),
498 MAP(v128_cmplt_s16),
499 MAP(v128_shuffle_8),
500 MAP(imm_v128_align<1>),
501 MAP(imm_v128_align<2>),
502 MAP(imm_v128_align<3>),
503 MAP(imm_v128_align<4>),
504 MAP(imm_v128_align<5>),
505 MAP(imm_v128_align<6>),
506 MAP(imm_v128_align<7>),
507 MAP(imm_v128_align<8>),
508 MAP(imm_v128_align<9>),
509 MAP(imm_v128_align<10>),
510 MAP(imm_v128_align<11>),
511 MAP(imm_v128_align<12>),
512 MAP(imm_v128_align<13>),
513 MAP(imm_v128_align<14>),
514 MAP(imm_v128_align<15>),
Steinar Midtskogen6033fb82017-04-02 21:32:41 +0200515 MAP(v128_abs_s8),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200516 MAP(v128_abs_s16),
517 MAP(v128_padd_s16),
518 MAP(v128_unpacklo_u16_s32),
519 MAP(v128_unpacklo_s16_s32),
520 MAP(v128_unpackhi_u16_s32),
521 MAP(v128_unpackhi_s16_s32),
522 MAP(imm_v128_shr_n_byte<1>),
523 MAP(imm_v128_shr_n_byte<2>),
524 MAP(imm_v128_shr_n_byte<3>),
525 MAP(imm_v128_shr_n_byte<4>),
526 MAP(imm_v128_shr_n_byte<5>),
527 MAP(imm_v128_shr_n_byte<6>),
528 MAP(imm_v128_shr_n_byte<7>),
529 MAP(imm_v128_shr_n_byte<8>),
530 MAP(imm_v128_shr_n_byte<9>),
531 MAP(imm_v128_shr_n_byte<10>),
532 MAP(imm_v128_shr_n_byte<11>),
533 MAP(imm_v128_shr_n_byte<12>),
534 MAP(imm_v128_shr_n_byte<13>),
535 MAP(imm_v128_shr_n_byte<14>),
536 MAP(imm_v128_shr_n_byte<15>),
537 MAP(imm_v128_shl_n_byte<1>),
538 MAP(imm_v128_shl_n_byte<2>),
539 MAP(imm_v128_shl_n_byte<3>),
540 MAP(imm_v128_shl_n_byte<4>),
541 MAP(imm_v128_shl_n_byte<5>),
542 MAP(imm_v128_shl_n_byte<6>),
543 MAP(imm_v128_shl_n_byte<7>),
544 MAP(imm_v128_shl_n_byte<8>),
545 MAP(imm_v128_shl_n_byte<9>),
546 MAP(imm_v128_shl_n_byte<10>),
547 MAP(imm_v128_shl_n_byte<11>),
548 MAP(imm_v128_shl_n_byte<12>),
549 MAP(imm_v128_shl_n_byte<13>),
550 MAP(imm_v128_shl_n_byte<14>),
551 MAP(imm_v128_shl_n_byte<15>),
552 MAP(imm_v128_shl_n_8<1>),
553 MAP(imm_v128_shl_n_8<2>),
554 MAP(imm_v128_shl_n_8<3>),
555 MAP(imm_v128_shl_n_8<4>),
556 MAP(imm_v128_shl_n_8<5>),
557 MAP(imm_v128_shl_n_8<6>),
558 MAP(imm_v128_shl_n_8<7>),
559 MAP(imm_v128_shr_n_u8<1>),
560 MAP(imm_v128_shr_n_u8<2>),
561 MAP(imm_v128_shr_n_u8<3>),
562 MAP(imm_v128_shr_n_u8<4>),
563 MAP(imm_v128_shr_n_u8<5>),
564 MAP(imm_v128_shr_n_u8<6>),
565 MAP(imm_v128_shr_n_u8<7>),
566 MAP(imm_v128_shr_n_s8<1>),
567 MAP(imm_v128_shr_n_s8<2>),
568 MAP(imm_v128_shr_n_s8<3>),
569 MAP(imm_v128_shr_n_s8<4>),
570 MAP(imm_v128_shr_n_s8<5>),
571 MAP(imm_v128_shr_n_s8<6>),
572 MAP(imm_v128_shr_n_s8<7>),
573 MAP(imm_v128_shl_n_16<1>),
574 MAP(imm_v128_shl_n_16<2>),
575 MAP(imm_v128_shl_n_16<4>),
576 MAP(imm_v128_shl_n_16<6>),
577 MAP(imm_v128_shl_n_16<8>),
578 MAP(imm_v128_shl_n_16<10>),
579 MAP(imm_v128_shl_n_16<12>),
580 MAP(imm_v128_shl_n_16<14>),
581 MAP(imm_v128_shr_n_u16<1>),
582 MAP(imm_v128_shr_n_u16<2>),
583 MAP(imm_v128_shr_n_u16<4>),
584 MAP(imm_v128_shr_n_u16<6>),
585 MAP(imm_v128_shr_n_u16<8>),
586 MAP(imm_v128_shr_n_u16<10>),
587 MAP(imm_v128_shr_n_u16<12>),
588 MAP(imm_v128_shr_n_u16<14>),
589 MAP(imm_v128_shr_n_s16<1>),
590 MAP(imm_v128_shr_n_s16<2>),
591 MAP(imm_v128_shr_n_s16<4>),
592 MAP(imm_v128_shr_n_s16<6>),
593 MAP(imm_v128_shr_n_s16<8>),
594 MAP(imm_v128_shr_n_s16<10>),
595 MAP(imm_v128_shr_n_s16<12>),
596 MAP(imm_v128_shr_n_s16<14>),
597 MAP(imm_v128_shl_n_32<1>),
598 MAP(imm_v128_shl_n_32<4>),
599 MAP(imm_v128_shl_n_32<8>),
600 MAP(imm_v128_shl_n_32<12>),
601 MAP(imm_v128_shl_n_32<16>),
602 MAP(imm_v128_shl_n_32<20>),
603 MAP(imm_v128_shl_n_32<24>),
604 MAP(imm_v128_shl_n_32<28>),
605 MAP(imm_v128_shr_n_u32<1>),
606 MAP(imm_v128_shr_n_u32<4>),
607 MAP(imm_v128_shr_n_u32<8>),
608 MAP(imm_v128_shr_n_u32<12>),
609 MAP(imm_v128_shr_n_u32<16>),
610 MAP(imm_v128_shr_n_u32<20>),
611 MAP(imm_v128_shr_n_u32<24>),
612 MAP(imm_v128_shr_n_u32<28>),
613 MAP(imm_v128_shr_n_s32<1>),
614 MAP(imm_v128_shr_n_s32<4>),
615 MAP(imm_v128_shr_n_s32<8>),
616 MAP(imm_v128_shr_n_s32<12>),
617 MAP(imm_v128_shr_n_s32<16>),
618 MAP(imm_v128_shr_n_s32<20>),
619 MAP(imm_v128_shr_n_s32<24>),
620 MAP(imm_v128_shr_n_s32<28>),
621 MAP(v128_from_v64),
622 MAP(v128_zip_8),
623 MAP(v128_zip_16),
624 MAP(v128_zip_32),
625 MAP(v128_mul_s16),
626 MAP(v128_unpack_u8_s16),
Steinar Midtskogen1b2b7392017-04-11 14:19:20 +0200627 MAP(v128_unpack_s8_s16),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200628 MAP(v128_unpack_u16_s32),
629 MAP(v128_unpack_s16_s32),
630 MAP(v128_shl_8),
631 MAP(v128_shr_u8),
632 MAP(v128_shr_s8),
633 MAP(v128_shl_16),
634 MAP(v128_shr_u16),
635 MAP(v128_shr_s16),
636 MAP(v128_shl_32),
637 MAP(v128_shr_u32),
638 MAP(v128_shr_s32),
639 MAP(v128_hadd_u8),
640 MAP(v128_dotp_s16),
641 MAP(v128_low_u32),
642 MAP(v128_low_v64),
643 MAP(v128_high_v64),
644 MAP(v128_from_64),
645 MAP(v128_from_32),
646 MAP(v128_zero),
647 MAP(v128_dup_8),
648 MAP(v128_dup_16),
649 MAP(v128_dup_32),
650 MAP(v128_unpacklo_u8_s16),
651 MAP(v128_unpackhi_u8_s16),
Steinar Midtskogen1b2b7392017-04-11 14:19:20 +0200652 MAP(v128_unpacklo_s8_s16),
653 MAP(v128_unpackhi_s8_s16),
Steinar Midtskogen6d2f3c22017-03-07 11:33:55 +0100654 MAP(u32_load_unaligned),
655 MAP(u32_store_unaligned),
656 MAP(v64_load_unaligned),
657 MAP(v64_store_unaligned),
658 MAP(v128_load_unaligned),
659 MAP(v128_store_unaligned),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100660 { NULL, NULL, NULL } };
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100661#undef MAP
662
663// Map reference functions to machine tuned functions. Since the
664// functions depend on machine tuned types, the non-machine tuned
665// instantiations of the test can't refer to these functions directly,
666// so we refer to them by name and do the mapping here.
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100667void Map(const char *name, fptr *ref, fptr *simd) {
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100668 unsigned int i;
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100669 for (i = 0; m[i].name && strcmp(name, m[i].name); i++) {
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100670 }
671
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100672 *ref = m[i].ref;
673 *simd = m[i].simd;
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100674}
675
676// Used for printing errors in TestSimd1Arg and TestSimd2Args
677std::string Print(const uint8_t *a, int size) {
678 std::string text = "0x";
679 for (int i = 0; i < size; i++) {
Steinar Midtskogen03ab5272017-01-10 07:30:47 +0100680 const uint8_t c = a[!CONFIG_BIG_ENDIAN ? size - 1 - i : i];
681 // Same as snprintf(..., ..., "%02x", c)
682 text += (c >> 4) + '0' + ((c >> 4) > 9) * ('a' - '0' - 10);
683 text += (c & 15) + '0' + ((c & 15) > 9) * ('a' - '0' - 10);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100684 }
685
686 return text;
687}
688
689// Used in TestSimd1Arg and TestSimd2Args to restrict argument ranges
690void SetMask(uint8_t *s, int size, uint32_t mask, uint32_t maskwidth) {
691 switch (maskwidth) {
692 case 0: {
693 break;
694 }
695 case 8: {
696 for (int i = 0; i < size; i++) s[i] &= mask;
697 break;
698 }
699 case 16: {
700 uint16_t *t = reinterpret_cast<uint16_t *>(s);
701 assert(!(reinterpret_cast<uintptr_t>(s) & 1));
702 for (int i = 0; i < size / 2; i++) t[i] &= mask;
703 break;
704 }
705 case 32: {
706 uint32_t *t = reinterpret_cast<uint32_t *>(s);
707 assert(!(reinterpret_cast<uintptr_t>(s) & 3));
708 for (int i = 0; i < size / 4; i++) t[i] &= mask;
709 break;
710 }
711 case 64: {
712 uint64_t *t = reinterpret_cast<uint64_t *>(s);
713 assert(!(reinterpret_cast<uintptr_t>(s) & 7));
714 for (int i = 0; i < size / 8; i++) t[i] &= mask;
715 break;
716 }
717 default: {
718 FAIL() << "Unsupported mask width";
719 break;
720 }
721 }
722}
723
Steinar Midtskogen6c795762017-03-07 20:55:48 +0100724// We need some extra load/store functions
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100725void u64_store_aligned(void *p, uint64_t a) {
726 v64_store_aligned(p, v64_from_64(a));
727}
Steinar Midtskogen6c795762017-03-07 20:55:48 +0100728void s32_store_aligned(void *p, int32_t a) {
729 u32_store_aligned(p, static_cast<uint32_t>(a));
730}
731void s64_store_aligned(void *p, int64_t a) {
732 v64_store_aligned(p, v64_from_64(static_cast<uint64_t>(a)));
733}
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100734
735void c_u64_store_aligned(void *p, uint64_t a) {
736 c_v64_store_aligned(p, c_v64_from_64(a));
737}
738
Steinar Midtskogen6c795762017-03-07 20:55:48 +0100739void c_s32_store_aligned(void *p, int32_t a) {
740 c_u32_store_aligned(p, static_cast<uint32_t>(a));
741}
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200742
Steinar Midtskogen6c795762017-03-07 20:55:48 +0100743void c_s64_store_aligned(void *p, int64_t a) {
744 c_v64_store_aligned(p, c_v64_from_64(static_cast<uint64_t>(a)));
745}
746
747uint64_t u64_load_aligned(const void *p) {
748 return v64_u64(v64_load_aligned(p));
749}
750uint16_t u16_load_aligned(const void *p) {
751 return *(reinterpret_cast<const uint16_t *>(p));
752}
753uint8_t u8_load_aligned(const void *p) {
754 return *(reinterpret_cast<const uint8_t *>(p));
755}
756
757uint64_t c_u64_load_aligned(const void *p) {
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200758 return c_v64_u64(c_v64_load_aligned(p));
759}
Steinar Midtskogen6c795762017-03-07 20:55:48 +0100760uint16_t c_u16_load_aligned(const void *p) {
761 return *(reinterpret_cast<const uint16_t *>(p));
762}
763uint8_t c_u8_load_aligned(const void *p) {
764 return *(reinterpret_cast<const uint8_t *>(p));
765}
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200766
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100767// CompareSimd1Arg and CompareSimd2Args compare intrinsics taking 1 or
768// 2 arguments respectively with their corresponding C reference.
769// Ideally, the loads and stores should have gone into the template
770// parameter list, but v64 and v128 could be typedef'ed to the same
771// type (which is the case on x86) and then we can't instantiate both
772// v64 and v128, so the function return and argument types, including
773// the always differing types in the C equivalent are used instead.
774// The function arguments must be void pointers and then go through a
775// cast to avoid matching errors in the branches eliminated by the
776// typeid tests in the calling function.
777template <typename Ret, typename Arg, typename CRet, typename CArg>
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100778int CompareSimd1Arg(fptr store, fptr load, fptr simd, void *d, fptr c_store,
779 fptr c_load, fptr c_simd, void *ref_d, const void *a) {
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100780 void (*const my_store)(void *, Ret) = (void (*const)(void *, Ret))store;
781 Arg (*const my_load)(const void *) = (Arg(*const)(const void *))load;
782 Ret (*const my_simd)(Arg) = (Ret(*const)(Arg))simd;
783 void (*const my_c_store)(void *, CRet) = (void (*const)(void *, CRet))c_store;
784 CArg (*const my_c_load)(const void *) = (CArg(*const)(const void *))c_load;
785 CRet (*const my_c_simd)(CArg) = (CRet(*const)(CArg))c_simd;
786
787 // Call reference and intrinsic
Steinar Midtskogenc20176e2017-03-01 09:16:09 +0100788 my_c_store(ref_d, my_c_simd(my_c_load(a)));
789 my_store(d, my_simd(my_load(a)));
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100790
791 // Compare results
792 return memcmp(ref_d, d, sizeof(CRet));
793}
794
795template <typename Ret, typename Arg1, typename Arg2, typename CRet,
796 typename CArg1, typename CArg2>
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100797int CompareSimd2Args(fptr store, fptr load1, fptr load2, fptr simd, void *d,
798 fptr c_store, fptr c_load1, fptr c_load2, fptr c_simd,
799 void *ref_d, const void *a, const void *b) {
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100800 void (*const my_store)(void *, Ret) = (void (*const)(void *, Ret))store;
801 Arg1 (*const my_load1)(const void *) = (Arg1(*const)(const void *))load1;
802 Arg2 (*const my_load2)(const void *) = (Arg2(*const)(const void *))load2;
803 Ret (*const my_simd)(Arg1, Arg2) = (Ret(*const)(Arg1, Arg2))simd;
804 void (*const my_c_store)(void *, CRet) = (void (*const)(void *, CRet))c_store;
805 CArg1 (*const my_c_load1)(const void *) =
806 (CArg1(*const)(const void *))c_load1;
807 CArg2 (*const my_c_load2)(const void *) =
808 (CArg2(*const)(const void *))c_load2;
809 CRet (*const my_c_simd)(CArg1, CArg2) = (CRet(*const)(CArg1, CArg2))c_simd;
810
811 // Call reference and intrinsic
Steinar Midtskogenc20176e2017-03-01 09:16:09 +0100812 my_c_store(ref_d, my_c_simd(my_c_load1(a), my_c_load2(b)));
813 my_store(d, my_simd(my_load1(a), my_load2(b)));
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100814
815 // Compare results
816 return memcmp(ref_d, d, sizeof(CRet));
817}
818
Steinar Midtskogen04305c62016-09-30 13:14:04 +0200819} // namespace
820
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100821template <typename CRet, typename CArg>
822void TestSimd1Arg(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
823 const char *name) {
824 ACMRandom rnd(ACMRandom::DeterministicSeed());
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100825 fptr ref_simd;
826 fptr simd;
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100827 int error = 0;
Steinar Midtskogen7d532712017-03-19 21:34:47 +0100828 DECLARE_ALIGNED(32, uint8_t, s[sizeof(CArg)]);
Steinar Midtskogen03ab5272017-01-10 07:30:47 +0100829 DECLARE_ALIGNED(32, uint8_t, d[sizeof(CRet)]);
830 DECLARE_ALIGNED(32, uint8_t, ref_d[sizeof(CRet)]);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100831 memset(ref_d, 0, sizeof(ref_d));
832 memset(d, 0, sizeof(d));
833
834 Map(name, &ref_simd, &simd);
835 if (simd == NULL || ref_simd == NULL) {
836 FAIL() << "Internal error: Unknown intrinsic function " << name;
837 }
James Zern8c636c12017-02-28 20:56:06 -0800838 for (unsigned int count = 0;
839 count < iterations && !error && !testing::Test::HasFailure(); count++) {
Steinar Midtskogen7d532712017-03-19 21:34:47 +0100840 for (unsigned int c = 0; c < sizeof(CArg); c++) s[c] = rnd.Rand8();
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100841
842 if (maskwidth) {
Steinar Midtskogen7d532712017-03-19 21:34:47 +0100843 SetMask(s, sizeof(CArg), mask, maskwidth);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100844 }
845
846 if (typeid(CRet) == typeid(c_v64) && typeid(CArg) == typeid(c_v64)) {
847 // V64_V64
848 error = CompareSimd1Arg<v64, v64, CRet, CArg>(
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100849 reinterpret_cast<fptr>(v64_store_aligned),
850 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
851 reinterpret_cast<fptr>(c_v64_store_aligned),
852 reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogen04305c62016-09-30 13:14:04 +0200853 } else if (typeid(CRet) == typeid(c_v64) &&
Steinar Midtskogen6c795762017-03-07 20:55:48 +0100854 typeid(CArg) == typeid(uint8_t)) {
855 // V64_U8
856 error = CompareSimd1Arg<v64, uint8_t, CRet, CArg>(
857 reinterpret_cast<fptr>(v64_store_aligned),
858 reinterpret_cast<fptr>(u8_load_aligned), simd, d,
859 reinterpret_cast<fptr>(c_v64_store_aligned),
860 reinterpret_cast<fptr>(c_u8_load_aligned), ref_simd, ref_d, s);
861 } else if (typeid(CRet) == typeid(c_v64) &&
862 typeid(CArg) == typeid(uint16_t)) {
863 // V64_U16
864 error = CompareSimd1Arg<v64, uint16_t, CRet, CArg>(
865 reinterpret_cast<fptr>(v64_store_aligned),
866 reinterpret_cast<fptr>(u16_load_aligned), simd, d,
867 reinterpret_cast<fptr>(c_v64_store_aligned),
868 reinterpret_cast<fptr>(c_u16_load_aligned), ref_simd, ref_d, s);
869 } else if (typeid(CRet) == typeid(c_v64) &&
Steinar Midtskogen04305c62016-09-30 13:14:04 +0200870 typeid(CArg) == typeid(uint32_t)) {
871 // V64_U32
872 error = CompareSimd1Arg<v64, uint32_t, CRet, CArg>(
873 reinterpret_cast<fptr>(v64_store_aligned),
874 reinterpret_cast<fptr>(u32_load_aligned), simd, d,
875 reinterpret_cast<fptr>(c_v64_store_aligned),
876 reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100877 } else if (typeid(CRet) == typeid(uint64_t) &&
878 typeid(CArg) == typeid(c_v64)) {
879 // U64_V64
880 error = CompareSimd1Arg<uint64_t, v64, CRet, CArg>(
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100881 reinterpret_cast<fptr>(u64_store_aligned),
882 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
Steinar Midtskogen04305c62016-09-30 13:14:04 +0200883 reinterpret_cast<fptr>(c_u64_store_aligned),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100884 reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogen6c795762017-03-07 20:55:48 +0100885 } else if (typeid(CRet) == typeid(int64_t) &&
886 typeid(CArg) == typeid(c_v64)) {
887 // S64_V64
888 error = CompareSimd1Arg<int64_t, v64, CRet, CArg>(
889 reinterpret_cast<fptr>(s64_store_aligned),
890 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
891 reinterpret_cast<fptr>(c_s64_store_aligned),
892 reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogen04305c62016-09-30 13:14:04 +0200893 } else if (typeid(CRet) == typeid(uint32_t) &&
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100894 typeid(CArg) == typeid(c_v64)) {
Steinar Midtskogen04305c62016-09-30 13:14:04 +0200895 // U32_V64
896 error = CompareSimd1Arg<uint32_t, v64, CRet, CArg>(
897 reinterpret_cast<fptr>(u32_store_aligned),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100898 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
Steinar Midtskogen04305c62016-09-30 13:14:04 +0200899 reinterpret_cast<fptr>(c_u32_store_aligned),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100900 reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogen6c795762017-03-07 20:55:48 +0100901 } else if (typeid(CRet) == typeid(int32_t) &&
902 typeid(CArg) == typeid(c_v64)) {
903 // S32_V64
904 error = CompareSimd1Arg<int32_t, v64, CRet, CArg>(
905 reinterpret_cast<fptr>(s32_store_aligned),
906 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
907 reinterpret_cast<fptr>(c_s32_store_aligned),
908 reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200909 } else if (typeid(CRet) == typeid(uint32_t) &&
910 typeid(CArg) == typeid(c_v128)) {
911 // U32_V128
912 error = CompareSimd1Arg<uint32_t, v128, CRet, CArg>(
913 reinterpret_cast<fptr>(u32_store_aligned),
914 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
915 reinterpret_cast<fptr>(c_u32_store_aligned),
916 reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
917 } else if (typeid(CRet) == typeid(uint64_t) &&
918 typeid(CArg) == typeid(c_v128)) {
919 // U64_V128
920 error = CompareSimd1Arg<uint64_t, v128, CRet, CArg>(
921 reinterpret_cast<fptr>(u64_store_aligned),
922 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
923 reinterpret_cast<fptr>(c_u64_store_aligned),
924 reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
925 } else if (typeid(CRet) == typeid(c_v64) &&
926 typeid(CArg) == typeid(c_v128)) {
927 // V64_V128
928 error = CompareSimd1Arg<v64, v128, CRet, CArg>(
929 reinterpret_cast<fptr>(v64_store_aligned),
930 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
931 reinterpret_cast<fptr>(c_v64_store_aligned),
932 reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
933 } else if (typeid(CRet) == typeid(c_v128) &&
934 typeid(CArg) == typeid(c_v128)) {
935 // V128_V128
936 error = CompareSimd1Arg<v128, v128, CRet, CArg>(
937 reinterpret_cast<fptr>(v128_store_aligned),
938 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
939 reinterpret_cast<fptr>(c_v128_store_aligned),
940 reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
941 } else if (typeid(CRet) == typeid(c_v128) &&
942 typeid(CArg) == typeid(c_v64)) {
943 // V128_V64
944 error = CompareSimd1Arg<v128, v64, CRet, CArg>(
945 reinterpret_cast<fptr>(v128_store_aligned),
946 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
947 reinterpret_cast<fptr>(c_v128_store_aligned),
948 reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
949 } else if (typeid(CRet) == typeid(c_v128) &&
Steinar Midtskogen6c795762017-03-07 20:55:48 +0100950 typeid(CArg) == typeid(uint8_t)) {
951 // V128_U8
952 error = CompareSimd1Arg<v128, uint8_t, CRet, CArg>(
953 reinterpret_cast<fptr>(v128_store_aligned),
954 reinterpret_cast<fptr>(u8_load_aligned), simd, d,
955 reinterpret_cast<fptr>(c_v128_store_aligned),
956 reinterpret_cast<fptr>(c_u8_load_aligned), ref_simd, ref_d, s);
957 } else if (typeid(CRet) == typeid(c_v128) &&
958 typeid(CArg) == typeid(uint16_t)) {
959 // V128_U16
960 error = CompareSimd1Arg<v128, uint16_t, CRet, CArg>(
961 reinterpret_cast<fptr>(v128_store_aligned),
962 reinterpret_cast<fptr>(u16_load_aligned), simd, d,
963 reinterpret_cast<fptr>(c_v128_store_aligned),
964 reinterpret_cast<fptr>(c_u16_load_aligned), ref_simd, ref_d, s);
965 } else if (typeid(CRet) == typeid(c_v128) &&
Steinar Midtskogen82d580c2016-09-30 13:14:04 +0200966 typeid(CArg) == typeid(uint32_t)) {
967 // V128_U32
968 error = CompareSimd1Arg<v128, uint32_t, CRet, CArg>(
969 reinterpret_cast<fptr>(v128_store_aligned),
970 reinterpret_cast<fptr>(u32_load_aligned), simd, d,
971 reinterpret_cast<fptr>(c_v128_store_aligned),
972 reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100973 } else {
974 FAIL() << "Internal error: Unknown intrinsic function "
975 << typeid(CRet).name() << " " << name << "(" << typeid(CArg).name()
976 << ")";
977 }
978 }
979
980 EXPECT_EQ(0, error) << "Error: mismatch for " << name << "("
Steinar Midtskogen7d532712017-03-19 21:34:47 +0100981 << Print(s, sizeof(s)) << ") -> " << Print(d, sizeof(d))
982 << " (simd), " << Print(ref_d, sizeof(ref_d)) << " (ref)";
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100983}
984
985template <typename CRet, typename CArg1, typename CArg2>
986void TestSimd2Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
987 const char *name) {
988 ACMRandom rnd(ACMRandom::DeterministicSeed());
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100989 fptr ref_simd;
990 fptr simd;
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100991 int error = 0;
Steinar Midtskogen7d532712017-03-19 21:34:47 +0100992 DECLARE_ALIGNED(32, uint8_t, s1[sizeof(CArg1)]);
993 DECLARE_ALIGNED(32, uint8_t, s2[sizeof(CArg2)]);
Steinar Midtskogen03ab5272017-01-10 07:30:47 +0100994 DECLARE_ALIGNED(32, uint8_t, d[sizeof(CRet)]);
995 DECLARE_ALIGNED(32, uint8_t, ref_d[sizeof(CRet)]);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100996 memset(ref_d, 0, sizeof(ref_d));
997 memset(d, 0, sizeof(d));
998
999 Map(name, &ref_simd, &simd);
1000 if (simd == NULL || ref_simd == NULL) {
1001 FAIL() << "Internal error: Unknown intrinsic function " << name;
1002 }
1003
James Zern8c636c12017-02-28 20:56:06 -08001004 for (unsigned int count = 0;
1005 count < iterations && !error && !testing::Test::HasFailure(); count++) {
Steinar Midtskogen7d532712017-03-19 21:34:47 +01001006 for (unsigned int c = 0; c < sizeof(CArg1); c++) s1[c] = rnd.Rand8();
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001007
Steinar Midtskogen7d532712017-03-19 21:34:47 +01001008 for (unsigned int c = 0; c < sizeof(CArg2); c++) s2[c] = rnd.Rand8();
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001009
Steinar Midtskogen7d532712017-03-19 21:34:47 +01001010 if (maskwidth) SetMask(s2, sizeof(CArg2), mask, maskwidth);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001011
1012 if (typeid(CRet) == typeid(c_v64) && typeid(CArg1) == typeid(c_v64) &&
1013 typeid(CArg2) == typeid(c_v64)) {
1014 // V64_V64V64
1015 error = CompareSimd2Args<v64, v64, v64, CRet, CArg1, CArg2>(
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001016 reinterpret_cast<fptr>(v64_store_aligned),
1017 reinterpret_cast<fptr>(v64_load_aligned),
1018 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
1019 reinterpret_cast<fptr>(c_v64_store_aligned),
1020 reinterpret_cast<fptr>(c_v64_load_aligned),
1021 reinterpret_cast<fptr>(c_v64_load_aligned),
1022 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
Steinar Midtskogen04305c62016-09-30 13:14:04 +02001023 } else if (typeid(CRet) == typeid(c_v64) &&
1024 typeid(CArg1) == typeid(uint32_t) &&
1025 typeid(CArg2) == typeid(uint32_t)) {
1026 // V64_U32U32
1027 error = CompareSimd2Args<v64, uint32_t, uint32_t, CRet, CArg1, CArg2>(
1028 reinterpret_cast<fptr>(v64_store_aligned),
1029 reinterpret_cast<fptr>(u32_load_aligned),
1030 reinterpret_cast<fptr>(u32_load_aligned), simd, d,
1031 reinterpret_cast<fptr>(c_v64_store_aligned),
1032 reinterpret_cast<fptr>(c_u32_load_aligned),
1033 reinterpret_cast<fptr>(c_u32_load_aligned),
1034 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001035 } else if (typeid(CRet) == typeid(uint32_t) &&
1036 typeid(CArg1) == typeid(c_v64) &&
1037 typeid(CArg2) == typeid(c_v64)) {
1038 // U32_V64V64
1039 error = CompareSimd2Args<uint32_t, v64, v64, CRet, CArg1, CArg2>(
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001040 reinterpret_cast<fptr>(u32_store_aligned),
1041 reinterpret_cast<fptr>(v64_load_aligned),
1042 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
1043 reinterpret_cast<fptr>(c_u32_store_aligned),
1044 reinterpret_cast<fptr>(c_v64_load_aligned),
1045 reinterpret_cast<fptr>(c_v64_load_aligned),
1046 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001047 } else if (typeid(CRet) == typeid(int64_t) &&
1048 typeid(CArg1) == typeid(c_v64) &&
1049 typeid(CArg2) == typeid(c_v64)) {
1050 // S64_V64V64
1051 error = CompareSimd2Args<int64_t, v64, v64, CRet, CArg1, CArg2>(
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001052 reinterpret_cast<fptr>(s64_store_aligned),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001053 reinterpret_cast<fptr>(v64_load_aligned),
1054 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001055 reinterpret_cast<fptr>(c_s64_store_aligned),
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001056 reinterpret_cast<fptr>(c_v64_load_aligned),
1057 reinterpret_cast<fptr>(c_v64_load_aligned),
1058 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001059 } else if (typeid(CRet) == typeid(c_v64) &&
1060 typeid(CArg1) == typeid(c_v64) &&
1061 typeid(CArg2) == typeid(uint32_t)) {
1062 // V64_V64U32
1063 error = CompareSimd2Args<v64, v64, uint32_t, CRet, CArg1, CArg2>(
Steinar Midtskogen8b28d862017-01-09 11:33:20 +01001064 reinterpret_cast<fptr>(v64_store_aligned),
1065 reinterpret_cast<fptr>(v64_load_aligned),
1066 reinterpret_cast<fptr>(u32_load_aligned), simd, d,
1067 reinterpret_cast<fptr>(c_v64_store_aligned),
1068 reinterpret_cast<fptr>(c_v64_load_aligned),
1069 reinterpret_cast<fptr>(c_u32_load_aligned),
1070 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001071 } else if (typeid(CRet) == typeid(c_v128) &&
1072 typeid(CArg1) == typeid(c_v128) &&
1073 typeid(CArg2) == typeid(c_v128)) {
1074 // V128_V128V128
1075 error = CompareSimd2Args<v128, v128, v128, CRet, CArg1, CArg2>(
1076 reinterpret_cast<fptr>(v128_store_aligned),
1077 reinterpret_cast<fptr>(v128_load_aligned),
1078 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
1079 reinterpret_cast<fptr>(c_v128_store_aligned),
1080 reinterpret_cast<fptr>(c_v128_load_aligned),
1081 reinterpret_cast<fptr>(c_v128_load_aligned),
1082 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
1083 } else if (typeid(CRet) == typeid(uint32_t) &&
1084 typeid(CArg1) == typeid(c_v128) &&
1085 typeid(CArg2) == typeid(c_v128)) {
1086 // U32_V128V128
1087 error = CompareSimd2Args<uint32_t, v128, v128, CRet, CArg1, CArg2>(
1088 reinterpret_cast<fptr>(u32_store_aligned),
1089 reinterpret_cast<fptr>(v128_load_aligned),
1090 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
1091 reinterpret_cast<fptr>(c_u32_store_aligned),
1092 reinterpret_cast<fptr>(c_v128_load_aligned),
1093 reinterpret_cast<fptr>(c_v128_load_aligned),
1094 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
1095 } else if (typeid(CRet) == typeid(int64_t) &&
1096 typeid(CArg1) == typeid(c_v128) &&
1097 typeid(CArg2) == typeid(c_v128)) {
1098 // S64_V128V128
1099 error = CompareSimd2Args<int64_t, v128, v128, CRet, CArg1, CArg2>(
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001100 reinterpret_cast<fptr>(s64_store_aligned),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001101 reinterpret_cast<fptr>(v128_load_aligned),
1102 reinterpret_cast<fptr>(v128_load_aligned), simd, d,
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001103 reinterpret_cast<fptr>(c_s64_store_aligned),
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001104 reinterpret_cast<fptr>(c_v128_load_aligned),
1105 reinterpret_cast<fptr>(c_v128_load_aligned),
1106 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
1107 } else if (typeid(CRet) == typeid(c_v128) &&
1108 typeid(CArg1) == typeid(uint64_t) &&
1109 typeid(CArg2) == typeid(uint64_t)) {
1110 // V128_U64U64
1111 error = CompareSimd2Args<v128, uint64_t, uint64_t, CRet, CArg1, CArg2>(
1112 reinterpret_cast<fptr>(v128_store_aligned),
1113 reinterpret_cast<fptr>(u64_load_aligned),
1114 reinterpret_cast<fptr>(u64_load_aligned), simd, d,
1115 reinterpret_cast<fptr>(c_v128_store_aligned),
1116 reinterpret_cast<fptr>(c_u64_load_aligned),
1117 reinterpret_cast<fptr>(c_u64_load_aligned),
1118 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
1119 } else if (typeid(CRet) == typeid(c_v128) &&
1120 typeid(CArg1) == typeid(c_v64) &&
1121 typeid(CArg2) == typeid(c_v64)) {
1122 // V128_V64V64
1123 error = CompareSimd2Args<v128, v64, v64, CRet, CArg1, CArg2>(
1124 reinterpret_cast<fptr>(v128_store_aligned),
1125 reinterpret_cast<fptr>(v64_load_aligned),
1126 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
1127 reinterpret_cast<fptr>(c_v128_store_aligned),
1128 reinterpret_cast<fptr>(c_v64_load_aligned),
1129 reinterpret_cast<fptr>(c_v64_load_aligned),
1130 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
1131 } else if (typeid(CRet) == typeid(c_v128) &&
1132 typeid(CArg1) == typeid(c_v128) &&
1133 typeid(CArg2) == typeid(uint32_t)) {
1134 // V128_V128U32
1135 error = CompareSimd2Args<v128, v128, uint32_t, CRet, CArg1, CArg2>(
1136 reinterpret_cast<fptr>(v128_store_aligned),
1137 reinterpret_cast<fptr>(v128_load_aligned),
1138 reinterpret_cast<fptr>(u32_load_aligned), simd, d,
1139 reinterpret_cast<fptr>(c_v128_store_aligned),
1140 reinterpret_cast<fptr>(c_v128_load_aligned),
1141 reinterpret_cast<fptr>(c_u32_load_aligned),
1142 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001143 } else {
1144 FAIL() << "Internal error: Unknown intrinsic function "
1145 << typeid(CRet).name() << " " << name << "("
1146 << typeid(CArg1).name() << ", " << typeid(CArg2).name() << ")";
1147 }
1148 }
1149
1150 EXPECT_EQ(0, error) << "Error: mismatch for " << name << "("
Steinar Midtskogen7d532712017-03-19 21:34:47 +01001151 << Print(s1, sizeof(s1)) << ", " << Print(s2, sizeof(s2))
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001152 << ") -> " << Print(d, sizeof(d)) << " (simd), "
1153 << Print(ref_d, sizeof(ref_d)) << " (ref)";
1154}
1155
1156// Instantiations to make the functions callable from another files
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001157template void TestSimd1Arg<c_v64, uint8_t>(uint32_t, uint32_t, uint32_t,
1158 const char *);
1159template void TestSimd1Arg<c_v64, uint16_t>(uint32_t, uint32_t, uint32_t,
1160 const char *);
Steinar Midtskogen04305c62016-09-30 13:14:04 +02001161template void TestSimd1Arg<c_v64, uint32_t>(uint32_t, uint32_t, uint32_t,
1162 const char *);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001163template void TestSimd1Arg<c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
1164 const char *);
Steinar Midtskogen04305c62016-09-30 13:14:04 +02001165template void TestSimd1Arg<uint32_t, c_v64>(uint32_t, uint32_t, uint32_t,
1166 const char *);
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001167template void TestSimd1Arg<int32_t, c_v64>(uint32_t, uint32_t, uint32_t,
1168 const char *);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001169template void TestSimd1Arg<uint64_t, c_v64>(uint32_t, uint32_t, uint32_t,
1170 const char *);
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001171template void TestSimd1Arg<int64_t, c_v64>(uint32_t, uint32_t, uint32_t,
1172 const char *);
Steinar Midtskogen04305c62016-09-30 13:14:04 +02001173template void TestSimd2Args<c_v64, uint32_t, uint32_t>(uint32_t, uint32_t,
1174 uint32_t, const char *);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001175template void TestSimd2Args<c_v64, c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
1176 const char *);
1177template void TestSimd2Args<c_v64, c_v64, uint32_t>(uint32_t, uint32_t,
1178 uint32_t, const char *);
1179template void TestSimd2Args<int64_t, c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
1180 const char *);
1181template void TestSimd2Args<uint32_t, c_v64, c_v64>(uint32_t, uint32_t,
1182 uint32_t, const char *);
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001183template void TestSimd1Arg<c_v128, c_v128>(uint32_t, uint32_t, uint32_t,
1184 const char *);
Steinar Midtskogen6c795762017-03-07 20:55:48 +01001185template void TestSimd1Arg<c_v128, uint8_t>(uint32_t, uint32_t, uint32_t,
1186 const char *);
1187template void TestSimd1Arg<c_v128, uint16_t>(uint32_t, uint32_t, uint32_t,
1188 const char *);
Steinar Midtskogen82d580c2016-09-30 13:14:04 +02001189template void TestSimd1Arg<c_v128, uint32_t>(uint32_t, uint32_t, uint32_t,
1190 const char *);
1191template void TestSimd1Arg<c_v128, c_v64>(uint32_t, uint32_t, uint32_t,
1192 const char *);
1193template void TestSimd1Arg<uint32_t, c_v128>(uint32_t, uint32_t, uint32_t,
1194 const char *);
1195template void TestSimd1Arg<uint64_t, c_v128>(uint32_t, uint32_t, uint32_t,
1196 const char *);
1197template void TestSimd1Arg<c_v64, c_v128>(uint32_t, uint32_t, uint32_t,
1198 const char *);
1199template void TestSimd2Args<c_v128, c_v128, c_v128>(uint32_t, uint32_t,
1200 uint32_t, const char *);
1201template void TestSimd2Args<c_v128, c_v128, uint32_t>(uint32_t, uint32_t,
1202 uint32_t, const char *);
1203template void TestSimd2Args<c_v128, uint64_t, uint64_t>(uint32_t, uint32_t,
1204 uint32_t, const char *);
1205template void TestSimd2Args<c_v128, c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
1206 const char *);
1207template void TestSimd2Args<int64_t, c_v128, c_v128>(uint32_t, uint32_t,
1208 uint32_t, const char *);
1209template void TestSimd2Args<uint32_t, c_v128, c_v128>(uint32_t, uint32_t,
1210 uint32_t, const char *);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001211
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001212} // namespace SIMD_NAMESPACE