blob: 12296b9f63445c0b89a36ccb9c499fa7693ea221 [file] [log] [blame]
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001/*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10*/
11
12#include <assert.h>
13#include <string>
14#include "./aom_dsp_rtcd.h"
15#include "test/acm_random.h"
16#include "test/register_state_check.h"
17#include "aom_dsp/aom_simd.h"
18#include "aom_dsp/simd/v64_intrinsics_c.h"
19
20// Machine tuned code goes into this file. This file is included from
21// simd_cmp_sse2.cc, simd_cmp_ssse3.cc etc which define the macros
22// ARCH (=neon, sse2, ssse3, etc), SIMD_NAMESPACE and ARCH_POSTFIX().
23
24using libaom_test::ACMRandom;
25
26namespace SIMD_NAMESPACE {
27
28// Wrap templates around intrinsics using immediate values
29template <int shift>
30v64 imm_v64_shl_n_byte(v64 a) {
31 return v64_shl_n_byte(a, shift);
32}
33template <int shift>
34v64 imm_v64_shr_n_byte(v64 a) {
35 return v64_shr_n_byte(a, shift);
36}
37template <int shift>
38v64 imm_v64_shl_n_8(v64 a) {
39 return v64_shl_n_8(a, shift);
40}
41template <int shift>
42v64 imm_v64_shr_n_u8(v64 a) {
43 return v64_shr_n_u8(a, shift);
44}
45template <int shift>
46v64 imm_v64_shr_n_s8(v64 a) {
47 return v64_shr_n_s8(a, shift);
48}
49template <int shift>
50v64 imm_v64_shl_n_16(v64 a) {
51 return v64_shl_n_16(a, shift);
52}
53template <int shift>
54v64 imm_v64_shr_n_u16(v64 a) {
55 return v64_shr_n_u16(a, shift);
56}
57template <int shift>
58v64 imm_v64_shr_n_s16(v64 a) {
59 return v64_shr_n_s16(a, shift);
60}
61template <int shift>
62v64 imm_v64_shl_n_32(v64 a) {
63 return v64_shl_n_32(a, shift);
64}
65template <int shift>
66v64 imm_v64_shr_n_u32(v64 a) {
67 return v64_shr_n_u32(a, shift);
68}
69template <int shift>
70v64 imm_v64_shr_n_s32(v64 a) {
71 return v64_shr_n_s32(a, shift);
72}
73template <int shift>
74v64 imm_v64_align(v64 a, v64 b) {
75 return v64_align(a, b, shift);
76}
77
78// Wrap templates around corresponding C implementations of the above
79template <int shift>
80c_v64 c_imm_v64_shl_n_byte(c_v64 a) {
81 return c_v64_shl_n_byte(a, shift);
82}
83template <int shift>
84c_v64 c_imm_v64_shr_n_byte(c_v64 a) {
85 return c_v64_shr_n_byte(a, shift);
86}
87template <int shift>
88c_v64 c_imm_v64_shl_n_8(c_v64 a) {
89 return c_v64_shl_n_8(a, shift);
90}
91template <int shift>
92c_v64 c_imm_v64_shr_n_u8(c_v64 a) {
93 return c_v64_shr_n_u8(a, shift);
94}
95template <int shift>
96c_v64 c_imm_v64_shr_n_s8(c_v64 a) {
97 return c_v64_shr_n_s8(a, shift);
98}
99template <int shift>
100c_v64 c_imm_v64_shl_n_16(c_v64 a) {
101 return c_v64_shl_n_16(a, shift);
102}
103template <int shift>
104c_v64 c_imm_v64_shr_n_u16(c_v64 a) {
105 return c_v64_shr_n_u16(a, shift);
106}
107template <int shift>
108c_v64 c_imm_v64_shr_n_s16(c_v64 a) {
109 return c_v64_shr_n_s16(a, shift);
110}
111template <int shift>
112c_v64 c_imm_v64_shl_n_32(c_v64 a) {
113 return c_v64_shl_n_32(a, shift);
114}
115template <int shift>
116c_v64 c_imm_v64_shr_n_u32(c_v64 a) {
117 return c_v64_shr_n_u32(a, shift);
118}
119template <int shift>
120c_v64 c_imm_v64_shr_n_s32(c_v64 a) {
121 return c_v64_shr_n_s32(a, shift);
122}
123template <int shift>
124c_v64 c_imm_v64_align(c_v64 a, c_v64 b) {
125 return c_v64_align(a, b, shift);
126}
127
128// Wrappers around the the SAD and SSD functions
129uint32_t v64_sad_u8(v64 a, v64 b) {
130 return v64_sad_u8_sum(::v64_sad_u8(v64_sad_u8_init(), a, b));
131}
132uint32_t v64_ssd_u8(v64 a, v64 b) {
133 return v64_ssd_u8_sum(::v64_ssd_u8(v64_ssd_u8_init(), a, b));
134}
135
136uint32_t c_v64_sad_u8(c_v64 a, c_v64 b) {
137 return c_v64_sad_u8_sum(::c_v64_sad_u8(c_v64_sad_u8_init(), a, b));
138}
139uint32_t c_v64_ssd_u8(c_v64 a, c_v64 b) {
140 return c_v64_ssd_u8_sum(::c_v64_ssd_u8(c_v64_ssd_u8_init(), a, b));
141}
142
143namespace {
144
145#define MAP(name) \
146 { \
147 (const void *const) #name, (const void *const)c_##name, \
148 (const void *const)name \
149 }
150
151const void *const m[][3] = { MAP(v64_sad_u8),
152 MAP(v64_ssd_u8),
153 MAP(v64_add_8),
154 MAP(v64_add_16),
155 MAP(v64_sadd_s16),
156 MAP(v64_add_32),
157 MAP(v64_sub_8),
158 MAP(v64_ssub_u8),
159 MAP(v64_ssub_s8),
160 MAP(v64_sub_16),
161 MAP(v64_ssub_s16),
162 MAP(v64_sub_32),
163 MAP(v64_ziplo_8),
164 MAP(v64_ziphi_8),
165 MAP(v64_ziplo_16),
166 MAP(v64_ziphi_16),
167 MAP(v64_ziplo_32),
168 MAP(v64_ziphi_32),
169 MAP(v64_pack_s32_s16),
170 MAP(v64_pack_s16_u8),
171 MAP(v64_pack_s16_s8),
172 MAP(v64_unziphi_8),
173 MAP(v64_unziplo_8),
174 MAP(v64_unziphi_16),
175 MAP(v64_unziplo_16),
176 MAP(v64_or),
177 MAP(v64_xor),
178 MAP(v64_and),
179 MAP(v64_andn),
180 MAP(v64_mullo_s16),
181 MAP(v64_mulhi_s16),
182 MAP(v64_mullo_s32),
183 MAP(v64_madd_s16),
184 MAP(v64_madd_us8),
185 MAP(v64_avg_u8),
186 MAP(v64_rdavg_u8),
187 MAP(v64_avg_u16),
188 MAP(v64_min_u8),
189 MAP(v64_max_u8),
190 MAP(v64_min_s8),
191 MAP(v64_max_s8),
192 MAP(v64_min_s16),
193 MAP(v64_max_s16),
194 MAP(v64_cmpgt_s8),
195 MAP(v64_cmplt_s8),
196 MAP(v64_cmpeq_8),
197 MAP(v64_cmpgt_s16),
198 MAP(v64_cmplt_s16),
199 MAP(v64_cmpeq_16),
200 MAP(v64_shuffle_8),
201 MAP(imm_v64_align<1>),
202 MAP(imm_v64_align<2>),
203 MAP(imm_v64_align<3>),
204 MAP(imm_v64_align<4>),
205 MAP(imm_v64_align<5>),
206 MAP(imm_v64_align<6>),
207 MAP(imm_v64_align<7>),
208 MAP(v64_abs_s16),
209 MAP(v64_unpacklo_u8_s16),
210 MAP(v64_unpackhi_u8_s16),
211 MAP(v64_unpacklo_u16_s32),
212 MAP(v64_unpacklo_s16_s32),
213 MAP(v64_unpackhi_u16_s32),
214 MAP(v64_unpackhi_s16_s32),
215 MAP(imm_v64_shr_n_byte<1>),
216 MAP(imm_v64_shr_n_byte<2>),
217 MAP(imm_v64_shr_n_byte<3>),
218 MAP(imm_v64_shr_n_byte<4>),
219 MAP(imm_v64_shr_n_byte<5>),
220 MAP(imm_v64_shr_n_byte<6>),
221 MAP(imm_v64_shr_n_byte<7>),
222 MAP(imm_v64_shl_n_byte<1>),
223 MAP(imm_v64_shl_n_byte<2>),
224 MAP(imm_v64_shl_n_byte<3>),
225 MAP(imm_v64_shl_n_byte<4>),
226 MAP(imm_v64_shl_n_byte<5>),
227 MAP(imm_v64_shl_n_byte<6>),
228 MAP(imm_v64_shl_n_byte<7>),
229 MAP(imm_v64_shl_n_8<1>),
230 MAP(imm_v64_shl_n_8<2>),
231 MAP(imm_v64_shl_n_8<3>),
232 MAP(imm_v64_shl_n_8<4>),
233 MAP(imm_v64_shl_n_8<5>),
234 MAP(imm_v64_shl_n_8<6>),
235 MAP(imm_v64_shl_n_8<7>),
236 MAP(imm_v64_shr_n_u8<1>),
237 MAP(imm_v64_shr_n_u8<2>),
238 MAP(imm_v64_shr_n_u8<3>),
239 MAP(imm_v64_shr_n_u8<4>),
240 MAP(imm_v64_shr_n_u8<5>),
241 MAP(imm_v64_shr_n_u8<6>),
242 MAP(imm_v64_shr_n_u8<7>),
243 MAP(imm_v64_shr_n_s8<1>),
244 MAP(imm_v64_shr_n_s8<2>),
245 MAP(imm_v64_shr_n_s8<3>),
246 MAP(imm_v64_shr_n_s8<4>),
247 MAP(imm_v64_shr_n_s8<5>),
248 MAP(imm_v64_shr_n_s8<6>),
249 MAP(imm_v64_shr_n_s8<7>),
250 MAP(imm_v64_shl_n_16<1>),
251 MAP(imm_v64_shl_n_16<2>),
252 MAP(imm_v64_shl_n_16<4>),
253 MAP(imm_v64_shl_n_16<6>),
254 MAP(imm_v64_shl_n_16<8>),
255 MAP(imm_v64_shl_n_16<10>),
256 MAP(imm_v64_shl_n_16<12>),
257 MAP(imm_v64_shl_n_16<14>),
258 MAP(imm_v64_shr_n_u16<1>),
259 MAP(imm_v64_shr_n_u16<2>),
260 MAP(imm_v64_shr_n_u16<4>),
261 MAP(imm_v64_shr_n_u16<6>),
262 MAP(imm_v64_shr_n_u16<8>),
263 MAP(imm_v64_shr_n_u16<10>),
264 MAP(imm_v64_shr_n_u16<12>),
265 MAP(imm_v64_shr_n_u16<14>),
266 MAP(imm_v64_shr_n_s16<1>),
267 MAP(imm_v64_shr_n_s16<2>),
268 MAP(imm_v64_shr_n_s16<4>),
269 MAP(imm_v64_shr_n_s16<6>),
270 MAP(imm_v64_shr_n_s16<8>),
271 MAP(imm_v64_shr_n_s16<10>),
272 MAP(imm_v64_shr_n_s16<12>),
273 MAP(imm_v64_shr_n_s16<14>),
274 MAP(imm_v64_shl_n_32<1>),
275 MAP(imm_v64_shl_n_32<4>),
276 MAP(imm_v64_shl_n_32<8>),
277 MAP(imm_v64_shl_n_32<12>),
278 MAP(imm_v64_shl_n_32<16>),
279 MAP(imm_v64_shl_n_32<20>),
280 MAP(imm_v64_shl_n_32<24>),
281 MAP(imm_v64_shl_n_32<28>),
282 MAP(imm_v64_shr_n_u32<1>),
283 MAP(imm_v64_shr_n_u32<4>),
284 MAP(imm_v64_shr_n_u32<8>),
285 MAP(imm_v64_shr_n_u32<12>),
286 MAP(imm_v64_shr_n_u32<16>),
287 MAP(imm_v64_shr_n_u32<20>),
288 MAP(imm_v64_shr_n_u32<24>),
289 MAP(imm_v64_shr_n_u32<28>),
290 MAP(imm_v64_shr_n_s32<1>),
291 MAP(imm_v64_shr_n_s32<4>),
292 MAP(imm_v64_shr_n_s32<8>),
293 MAP(imm_v64_shr_n_s32<12>),
294 MAP(imm_v64_shr_n_s32<16>),
295 MAP(imm_v64_shr_n_s32<20>),
296 MAP(imm_v64_shr_n_s32<24>),
297 MAP(imm_v64_shr_n_s32<28>),
298 MAP(v64_shl_8),
299 MAP(v64_shr_u8),
300 MAP(v64_shr_s8),
301 MAP(v64_shl_16),
302 MAP(v64_shr_u16),
303 MAP(v64_shr_s16),
304 MAP(v64_shl_32),
305 MAP(v64_shr_u32),
306 MAP(v64_shr_s32),
307 MAP(v64_hadd_u8),
308 MAP(v64_hadd_s16),
309 MAP(v64_dotp_s16),
310 { NULL, NULL, NULL } };
311#undef MAP
312
313// Map reference functions to machine tuned functions. Since the
314// functions depend on machine tuned types, the non-machine tuned
315// instantiations of the test can't refer to these functions directly,
316// so we refer to them by name and do the mapping here.
317void Map(const char *name, const void **ref, const void **simd) {
318 unsigned int i;
319 for (i = 0; m[i][0] && strcmp(name, reinterpret_cast<const char *>(m[i][0]));
320 i++) {
321 }
322
323 *ref = m[i][1];
324 *simd = m[i][2];
325}
326
327// Used for printing errors in TestSimd1Arg and TestSimd2Args
328std::string Print(const uint8_t *a, int size) {
329 std::string text = "0x";
330 for (int i = 0; i < size; i++) {
331 char buf[3];
332 snprintf(buf, sizeof(buf), "%02x",
333 a[!CONFIG_BIG_ENDIAN ? size - 1 - i : i]);
334 text += buf;
335 }
336
337 return text;
338}
339
340// Used in TestSimd1Arg and TestSimd2Args to restrict argument ranges
341void SetMask(uint8_t *s, int size, uint32_t mask, uint32_t maskwidth) {
342 switch (maskwidth) {
343 case 0: {
344 break;
345 }
346 case 8: {
347 for (int i = 0; i < size; i++) s[i] &= mask;
348 break;
349 }
350 case 16: {
351 uint16_t *t = reinterpret_cast<uint16_t *>(s);
352 assert(!(reinterpret_cast<uintptr_t>(s) & 1));
353 for (int i = 0; i < size / 2; i++) t[i] &= mask;
354 break;
355 }
356 case 32: {
357 uint32_t *t = reinterpret_cast<uint32_t *>(s);
358 assert(!(reinterpret_cast<uintptr_t>(s) & 3));
359 for (int i = 0; i < size / 4; i++) t[i] &= mask;
360 break;
361 }
362 case 64: {
363 uint64_t *t = reinterpret_cast<uint64_t *>(s);
364 assert(!(reinterpret_cast<uintptr_t>(s) & 7));
365 for (int i = 0; i < size / 8; i++) t[i] &= mask;
366 break;
367 }
368 default: {
369 FAIL() << "Unsupported mask width";
370 break;
371 }
372 }
373}
374
375// We need a store function for uint64_t
376void u64_store_aligned(void *p, uint64_t a) {
377 v64_store_aligned(p, v64_from_64(a));
378}
379
380void c_u64_store_aligned(void *p, uint64_t a) {
381 c_v64_store_aligned(p, c_v64_from_64(a));
382}
383
384// CompareSimd1Arg and CompareSimd2Args compare intrinsics taking 1 or
385// 2 arguments respectively with their corresponding C reference.
386// Ideally, the loads and stores should have gone into the template
387// parameter list, but v64 and v128 could be typedef'ed to the same
388// type (which is the case on x86) and then we can't instantiate both
389// v64 and v128, so the function return and argument types, including
390// the always differing types in the C equivalent are used instead.
391// The function arguments must be void pointers and then go through a
392// cast to avoid matching errors in the branches eliminated by the
393// typeid tests in the calling function.
394template <typename Ret, typename Arg, typename CRet, typename CArg>
395int CompareSimd1Arg(const void *store, const void *load, const void *simd,
396 void *d, const void *c_store, const void *c_load,
397 const void *c_simd, void *ref_d, const void *a) {
398 void (*const my_store)(void *, Ret) = (void (*const)(void *, Ret))store;
399 Arg (*const my_load)(const void *) = (Arg(*const)(const void *))load;
400 Ret (*const my_simd)(Arg) = (Ret(*const)(Arg))simd;
401 void (*const my_c_store)(void *, CRet) = (void (*const)(void *, CRet))c_store;
402 CArg (*const my_c_load)(const void *) = (CArg(*const)(const void *))c_load;
403 CRet (*const my_c_simd)(CArg) = (CRet(*const)(CArg))c_simd;
404
405 // Call reference and intrinsic
406 ASM_REGISTER_STATE_CHECK(my_c_store(ref_d, my_c_simd(my_c_load(a))));
407 ASM_REGISTER_STATE_CHECK(my_store(d, my_simd(my_load(a))));
408
409 // Compare results
410 return memcmp(ref_d, d, sizeof(CRet));
411}
412
413template <typename Ret, typename Arg1, typename Arg2, typename CRet,
414 typename CArg1, typename CArg2>
415int CompareSimd2Args(const void *store, const void *load1, const void *load2,
416 const void *simd, void *d, const void *c_store,
417 const void *c_load1, const void *c_load2,
418 const void *c_simd, void *ref_d, const void *a,
419 const void *b) {
420 void (*const my_store)(void *, Ret) = (void (*const)(void *, Ret))store;
421 Arg1 (*const my_load1)(const void *) = (Arg1(*const)(const void *))load1;
422 Arg2 (*const my_load2)(const void *) = (Arg2(*const)(const void *))load2;
423 Ret (*const my_simd)(Arg1, Arg2) = (Ret(*const)(Arg1, Arg2))simd;
424 void (*const my_c_store)(void *, CRet) = (void (*const)(void *, CRet))c_store;
425 CArg1 (*const my_c_load1)(const void *) =
426 (CArg1(*const)(const void *))c_load1;
427 CArg2 (*const my_c_load2)(const void *) =
428 (CArg2(*const)(const void *))c_load2;
429 CRet (*const my_c_simd)(CArg1, CArg2) = (CRet(*const)(CArg1, CArg2))c_simd;
430
431 // Call reference and intrinsic
432 ASM_REGISTER_STATE_CHECK(
433 my_c_store(ref_d, my_c_simd(my_c_load1(a), my_c_load2(b))));
434 ASM_REGISTER_STATE_CHECK(my_store(d, my_simd(my_load1(a), my_load2(b))));
435
436 // Compare results
437 return memcmp(ref_d, d, sizeof(CRet));
438}
439
440template <typename CRet, typename CArg>
441void TestSimd1Arg(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
442 const char *name) {
443 ACMRandom rnd(ACMRandom::DeterministicSeed());
444 const void *ref_simd;
445 const void *simd;
446 int error = 0;
447 DECLARE_ALIGNED(sizeof(CArg), uint16_t, s[sizeof(CArg) / sizeof(uint16_t)]);
448 DECLARE_ALIGNED(sizeof(CRet), uint8_t, d[sizeof(CRet)]);
449 DECLARE_ALIGNED(sizeof(CRet), uint8_t, ref_d[sizeof(CRet)]);
450 memset(ref_d, 0, sizeof(ref_d));
451 memset(d, 0, sizeof(d));
452
453 Map(name, &ref_simd, &simd);
454 if (simd == NULL || ref_simd == NULL) {
455 FAIL() << "Internal error: Unknown intrinsic function " << name;
456 }
457
458 for (unsigned int count = 0; count < iterations && !error; count++) {
459 for (unsigned int c = 0; c < sizeof(CArg) / sizeof(uint16_t); c++)
460 s[c] = rnd.Rand16();
461
462 if (maskwidth) {
463 SetMask(reinterpret_cast<uint8_t *>(s), sizeof(CArg), mask, maskwidth);
464 }
465
466 if (typeid(CRet) == typeid(c_v64) && typeid(CArg) == typeid(c_v64)) {
467 // V64_V64
468 error = CompareSimd1Arg<v64, v64, CRet, CArg>(
469 reinterpret_cast<const void *>(v64_store_aligned),
470 (const void *)v64_load_aligned, simd, d,
471 reinterpret_cast<const void *>(c_v64_store_aligned),
472 reinterpret_cast<const void *>(c_v64_load_aligned), ref_simd, ref_d,
473 s);
474 } else if (typeid(CRet) == typeid(uint64_t) &&
475 typeid(CArg) == typeid(c_v64)) {
476 // U64_V64
477 error = CompareSimd1Arg<uint64_t, v64, CRet, CArg>(
478 reinterpret_cast<const void *>(u64_store_aligned),
479 reinterpret_cast<const void *>(v64_load_aligned), simd, d,
480 reinterpret_cast<const void *>(c_v64_store_aligned),
481 reinterpret_cast<const void *>(c_v64_load_aligned), ref_simd, ref_d,
482 s);
483 } else if (typeid(CRet) == typeid(int64_t) &&
484 typeid(CArg) == typeid(c_v64)) {
485 // S64_V64
486 error = CompareSimd1Arg<int64_t, v64, CRet, CArg>(
487 reinterpret_cast<const void *>(u64_store_aligned),
488 reinterpret_cast<const void *>(v64_load_aligned), simd, d,
489 reinterpret_cast<const void *>(c_v64_store_aligned),
490 reinterpret_cast<const void *>(c_v64_load_aligned), ref_simd, ref_d,
491 s);
492 } else {
493 FAIL() << "Internal error: Unknown intrinsic function "
494 << typeid(CRet).name() << " " << name << "(" << typeid(CArg).name()
495 << ")";
496 }
497 }
498
499 EXPECT_EQ(0, error) << "Error: mismatch for " << name << "("
500 << Print((uint8_t *)s, sizeof(s)) << ") -> "
501 << Print(d, sizeof(d)) << " (simd), "
502 << Print(ref_d, sizeof(ref_d)) << " (ref)";
503}
504
505template <typename CRet, typename CArg1, typename CArg2>
506void TestSimd2Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
507 const char *name) {
508 ACMRandom rnd(ACMRandom::DeterministicSeed());
509 const void *ref_simd;
510 const void *simd;
511 int error = 0;
512 DECLARE_ALIGNED(sizeof(CArg1), uint16_t,
513 s1[sizeof(CArg1) / sizeof(uint16_t)]);
514 DECLARE_ALIGNED(sizeof(CArg2), uint16_t,
515 s2[sizeof(CArg2) / sizeof(uint16_t)]);
516 DECLARE_ALIGNED(sizeof(CRet), uint8_t, d[sizeof(CRet)]);
517 DECLARE_ALIGNED(sizeof(CRet), uint8_t, ref_d[sizeof(CRet)]);
518 memset(ref_d, 0, sizeof(ref_d));
519 memset(d, 0, sizeof(d));
520
521 Map(name, &ref_simd, &simd);
522 if (simd == NULL || ref_simd == NULL) {
523 FAIL() << "Internal error: Unknown intrinsic function " << name;
524 }
525
526 for (unsigned int count = 0; count < iterations && !error; count++) {
527 for (unsigned int c = 0; c < sizeof(CArg1) / sizeof(uint16_t); c++)
528 s1[c] = rnd.Rand16();
529
530 for (unsigned int c = 0; c < sizeof(CArg2) / sizeof(uint16_t); c++)
531 s2[c] = rnd.Rand16();
532
533 if (maskwidth)
534 SetMask(reinterpret_cast<uint8_t *>(s2), sizeof(CArg2), mask, maskwidth);
535
536 if (typeid(CRet) == typeid(c_v64) && typeid(CArg1) == typeid(c_v64) &&
537 typeid(CArg2) == typeid(c_v64)) {
538 // V64_V64V64
539 error = CompareSimd2Args<v64, v64, v64, CRet, CArg1, CArg2>(
540 reinterpret_cast<const void *>(v64_store_aligned),
541 reinterpret_cast<const void *>(v64_load_aligned),
542 reinterpret_cast<const void *>(v64_load_aligned), simd, d,
543 reinterpret_cast<const void *>(c_v64_store_aligned),
544 reinterpret_cast<const void *>(c_v64_load_aligned),
545 reinterpret_cast<const void *>(c_v64_load_aligned),
546 reinterpret_cast<const void *>(ref_simd), ref_d, s1, s2);
547 } else if (typeid(CRet) == typeid(uint32_t) &&
548 typeid(CArg1) == typeid(c_v64) &&
549 typeid(CArg2) == typeid(c_v64)) {
550 // U32_V64V64
551 error = CompareSimd2Args<uint32_t, v64, v64, CRet, CArg1, CArg2>(
552 reinterpret_cast<const void *>(u32_store_aligned),
553 reinterpret_cast<const void *>(v64_load_aligned),
554 reinterpret_cast<const void *>(v64_load_aligned), simd, d,
555 reinterpret_cast<const void *>(c_u32_store_aligned),
556 reinterpret_cast<const void *>(c_v64_load_aligned),
557 reinterpret_cast<const void *>(c_v64_load_aligned),
558 reinterpret_cast<const void *>(ref_simd), ref_d, s1, s2);
559 } else if (typeid(CRet) == typeid(int64_t) &&
560 typeid(CArg1) == typeid(c_v64) &&
561 typeid(CArg2) == typeid(c_v64)) {
562 // S64_V64V64
563 error = CompareSimd2Args<int64_t, v64, v64, CRet, CArg1, CArg2>(
564 reinterpret_cast<const void *>(u64_store_aligned),
565 reinterpret_cast<const void *>(v64_load_aligned),
566 reinterpret_cast<const void *>(v64_load_aligned), simd, d,
567 reinterpret_cast<const void *>(c_u64_store_aligned),
568 reinterpret_cast<const void *>(c_v64_load_aligned),
569 reinterpret_cast<const void *>(c_v64_load_aligned),
570 reinterpret_cast<const void *>(ref_simd), ref_d, s1, s2);
571 } else if (typeid(CRet) == typeid(c_v64) &&
572 typeid(CArg1) == typeid(c_v64) &&
573 typeid(CArg2) == typeid(uint32_t)) {
574 // V64_V64U32
575 error = CompareSimd2Args<v64, v64, uint32_t, CRet, CArg1, CArg2>(
576 reinterpret_cast<const void *>(v64_store_aligned),
577 reinterpret_cast<const void *>(v64_load_aligned),
578 reinterpret_cast<const void *>(u32_load_aligned), simd, d,
579 reinterpret_cast<const void *>(c_v64_store_aligned),
580 reinterpret_cast<const void *>(c_v64_load_aligned),
581 reinterpret_cast<const void *>(c_u32_load_aligned),
582 reinterpret_cast<const void *>(ref_simd), ref_d, s1, s2);
583 } else {
584 FAIL() << "Internal error: Unknown intrinsic function "
585 << typeid(CRet).name() << " " << name << "("
586 << typeid(CArg1).name() << ", " << typeid(CArg2).name() << ")";
587 }
588 }
589
590 EXPECT_EQ(0, error) << "Error: mismatch for " << name << "("
591 << Print(reinterpret_cast<uint8_t *>(s1), sizeof(s1))
592 << ", "
593 << Print(reinterpret_cast<uint8_t *>(s2), sizeof(s2))
594 << ") -> " << Print(d, sizeof(d)) << " (simd), "
595 << Print(ref_d, sizeof(ref_d)) << " (ref)";
596}
597
598// Instantiations to make the functions callable from another files
599template void TestSimd1Arg<c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
600 const char *);
601template void TestSimd1Arg<int64_t, c_v64>(uint32_t, uint32_t, uint32_t,
602 const char *);
603template void TestSimd1Arg<uint64_t, c_v64>(uint32_t, uint32_t, uint32_t,
604 const char *);
605template void TestSimd2Args<c_v64, c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
606 const char *);
607template void TestSimd2Args<c_v64, c_v64, uint32_t>(uint32_t, uint32_t,
608 uint32_t, const char *);
609template void TestSimd2Args<int64_t, c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
610 const char *);
611template void TestSimd2Args<uint32_t, c_v64, c_v64>(uint32_t, uint32_t,
612 uint32_t, const char *);
613
614} // namespace
615} // namespace SIMD_NAMESPACE