blob: 44a32a85c836fa81c54fcd21ec9b9db4e7c8fd2e [file] [log] [blame]
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +01001/*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10*/
11
12#include <assert.h>
13#include <string>
14#include "./aom_dsp_rtcd.h"
15#include "test/acm_random.h"
16#include "test/register_state_check.h"
17#include "aom_dsp/aom_simd.h"
18#include "aom_dsp/simd/v64_intrinsics_c.h"
19
20// Machine tuned code goes into this file. This file is included from
21// simd_cmp_sse2.cc, simd_cmp_ssse3.cc etc which define the macros
22// ARCH (=neon, sse2, ssse3, etc), SIMD_NAMESPACE and ARCH_POSTFIX().
23
24using libaom_test::ACMRandom;
25
26namespace SIMD_NAMESPACE {
27
28// Wrap templates around intrinsics using immediate values
29template <int shift>
30v64 imm_v64_shl_n_byte(v64 a) {
31 return v64_shl_n_byte(a, shift);
32}
33template <int shift>
34v64 imm_v64_shr_n_byte(v64 a) {
35 return v64_shr_n_byte(a, shift);
36}
37template <int shift>
38v64 imm_v64_shl_n_8(v64 a) {
39 return v64_shl_n_8(a, shift);
40}
41template <int shift>
42v64 imm_v64_shr_n_u8(v64 a) {
43 return v64_shr_n_u8(a, shift);
44}
45template <int shift>
46v64 imm_v64_shr_n_s8(v64 a) {
47 return v64_shr_n_s8(a, shift);
48}
49template <int shift>
50v64 imm_v64_shl_n_16(v64 a) {
51 return v64_shl_n_16(a, shift);
52}
53template <int shift>
54v64 imm_v64_shr_n_u16(v64 a) {
55 return v64_shr_n_u16(a, shift);
56}
57template <int shift>
58v64 imm_v64_shr_n_s16(v64 a) {
59 return v64_shr_n_s16(a, shift);
60}
61template <int shift>
62v64 imm_v64_shl_n_32(v64 a) {
63 return v64_shl_n_32(a, shift);
64}
65template <int shift>
66v64 imm_v64_shr_n_u32(v64 a) {
67 return v64_shr_n_u32(a, shift);
68}
69template <int shift>
70v64 imm_v64_shr_n_s32(v64 a) {
71 return v64_shr_n_s32(a, shift);
72}
73template <int shift>
74v64 imm_v64_align(v64 a, v64 b) {
75 return v64_align(a, b, shift);
76}
77
78// Wrap templates around corresponding C implementations of the above
79template <int shift>
80c_v64 c_imm_v64_shl_n_byte(c_v64 a) {
81 return c_v64_shl_n_byte(a, shift);
82}
83template <int shift>
84c_v64 c_imm_v64_shr_n_byte(c_v64 a) {
85 return c_v64_shr_n_byte(a, shift);
86}
87template <int shift>
88c_v64 c_imm_v64_shl_n_8(c_v64 a) {
89 return c_v64_shl_n_8(a, shift);
90}
91template <int shift>
92c_v64 c_imm_v64_shr_n_u8(c_v64 a) {
93 return c_v64_shr_n_u8(a, shift);
94}
95template <int shift>
96c_v64 c_imm_v64_shr_n_s8(c_v64 a) {
97 return c_v64_shr_n_s8(a, shift);
98}
99template <int shift>
100c_v64 c_imm_v64_shl_n_16(c_v64 a) {
101 return c_v64_shl_n_16(a, shift);
102}
103template <int shift>
104c_v64 c_imm_v64_shr_n_u16(c_v64 a) {
105 return c_v64_shr_n_u16(a, shift);
106}
107template <int shift>
108c_v64 c_imm_v64_shr_n_s16(c_v64 a) {
109 return c_v64_shr_n_s16(a, shift);
110}
111template <int shift>
112c_v64 c_imm_v64_shl_n_32(c_v64 a) {
113 return c_v64_shl_n_32(a, shift);
114}
115template <int shift>
116c_v64 c_imm_v64_shr_n_u32(c_v64 a) {
117 return c_v64_shr_n_u32(a, shift);
118}
119template <int shift>
120c_v64 c_imm_v64_shr_n_s32(c_v64 a) {
121 return c_v64_shr_n_s32(a, shift);
122}
123template <int shift>
124c_v64 c_imm_v64_align(c_v64 a, c_v64 b) {
125 return c_v64_align(a, b, shift);
126}
127
128// Wrappers around the the SAD and SSD functions
129uint32_t v64_sad_u8(v64 a, v64 b) {
130 return v64_sad_u8_sum(::v64_sad_u8(v64_sad_u8_init(), a, b));
131}
132uint32_t v64_ssd_u8(v64 a, v64 b) {
133 return v64_ssd_u8_sum(::v64_ssd_u8(v64_ssd_u8_init(), a, b));
134}
135
136uint32_t c_v64_sad_u8(c_v64 a, c_v64 b) {
137 return c_v64_sad_u8_sum(::c_v64_sad_u8(c_v64_sad_u8_init(), a, b));
138}
139uint32_t c_v64_ssd_u8(c_v64 a, c_v64 b) {
140 return c_v64_ssd_u8_sum(::c_v64_ssd_u8(c_v64_ssd_u8_init(), a, b));
141}
142
143namespace {
144
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100145typedef void (*fptr)();
146
147typedef struct {
148 const char *name;
149 fptr ref;
150 fptr simd;
151} mapping;
152
153#define MAP(name) \
154 { \
155 #name, reinterpret_cast < fptr > (c_##name), \
156 reinterpret_cast < fptr > (name) \
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100157 }
158
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100159const mapping m[] = { MAP(v64_sad_u8),
160 MAP(v64_ssd_u8),
161 MAP(v64_add_8),
162 MAP(v64_add_16),
163 MAP(v64_sadd_s16),
164 MAP(v64_add_32),
165 MAP(v64_sub_8),
166 MAP(v64_ssub_u8),
167 MAP(v64_ssub_s8),
168 MAP(v64_sub_16),
169 MAP(v64_ssub_s16),
170 MAP(v64_sub_32),
171 MAP(v64_ziplo_8),
172 MAP(v64_ziphi_8),
173 MAP(v64_ziplo_16),
174 MAP(v64_ziphi_16),
175 MAP(v64_ziplo_32),
176 MAP(v64_ziphi_32),
177 MAP(v64_pack_s32_s16),
178 MAP(v64_pack_s16_u8),
179 MAP(v64_pack_s16_s8),
180 MAP(v64_unziphi_8),
181 MAP(v64_unziplo_8),
182 MAP(v64_unziphi_16),
183 MAP(v64_unziplo_16),
184 MAP(v64_or),
185 MAP(v64_xor),
186 MAP(v64_and),
187 MAP(v64_andn),
188 MAP(v64_mullo_s16),
189 MAP(v64_mulhi_s16),
190 MAP(v64_mullo_s32),
191 MAP(v64_madd_s16),
192 MAP(v64_madd_us8),
193 MAP(v64_avg_u8),
194 MAP(v64_rdavg_u8),
195 MAP(v64_avg_u16),
196 MAP(v64_min_u8),
197 MAP(v64_max_u8),
198 MAP(v64_min_s8),
199 MAP(v64_max_s8),
200 MAP(v64_min_s16),
201 MAP(v64_max_s16),
202 MAP(v64_cmpgt_s8),
203 MAP(v64_cmplt_s8),
204 MAP(v64_cmpeq_8),
205 MAP(v64_cmpgt_s16),
206 MAP(v64_cmplt_s16),
207 MAP(v64_cmpeq_16),
208 MAP(v64_shuffle_8),
209 MAP(imm_v64_align<1>),
210 MAP(imm_v64_align<2>),
211 MAP(imm_v64_align<3>),
212 MAP(imm_v64_align<4>),
213 MAP(imm_v64_align<5>),
214 MAP(imm_v64_align<6>),
215 MAP(imm_v64_align<7>),
216 MAP(v64_abs_s16),
217 MAP(v64_unpacklo_u8_s16),
218 MAP(v64_unpackhi_u8_s16),
219 MAP(v64_unpacklo_u16_s32),
220 MAP(v64_unpacklo_s16_s32),
221 MAP(v64_unpackhi_u16_s32),
222 MAP(v64_unpackhi_s16_s32),
223 MAP(imm_v64_shr_n_byte<1>),
224 MAP(imm_v64_shr_n_byte<2>),
225 MAP(imm_v64_shr_n_byte<3>),
226 MAP(imm_v64_shr_n_byte<4>),
227 MAP(imm_v64_shr_n_byte<5>),
228 MAP(imm_v64_shr_n_byte<6>),
229 MAP(imm_v64_shr_n_byte<7>),
230 MAP(imm_v64_shl_n_byte<1>),
231 MAP(imm_v64_shl_n_byte<2>),
232 MAP(imm_v64_shl_n_byte<3>),
233 MAP(imm_v64_shl_n_byte<4>),
234 MAP(imm_v64_shl_n_byte<5>),
235 MAP(imm_v64_shl_n_byte<6>),
236 MAP(imm_v64_shl_n_byte<7>),
237 MAP(imm_v64_shl_n_8<1>),
238 MAP(imm_v64_shl_n_8<2>),
239 MAP(imm_v64_shl_n_8<3>),
240 MAP(imm_v64_shl_n_8<4>),
241 MAP(imm_v64_shl_n_8<5>),
242 MAP(imm_v64_shl_n_8<6>),
243 MAP(imm_v64_shl_n_8<7>),
244 MAP(imm_v64_shr_n_u8<1>),
245 MAP(imm_v64_shr_n_u8<2>),
246 MAP(imm_v64_shr_n_u8<3>),
247 MAP(imm_v64_shr_n_u8<4>),
248 MAP(imm_v64_shr_n_u8<5>),
249 MAP(imm_v64_shr_n_u8<6>),
250 MAP(imm_v64_shr_n_u8<7>),
251 MAP(imm_v64_shr_n_s8<1>),
252 MAP(imm_v64_shr_n_s8<2>),
253 MAP(imm_v64_shr_n_s8<3>),
254 MAP(imm_v64_shr_n_s8<4>),
255 MAP(imm_v64_shr_n_s8<5>),
256 MAP(imm_v64_shr_n_s8<6>),
257 MAP(imm_v64_shr_n_s8<7>),
258 MAP(imm_v64_shl_n_16<1>),
259 MAP(imm_v64_shl_n_16<2>),
260 MAP(imm_v64_shl_n_16<4>),
261 MAP(imm_v64_shl_n_16<6>),
262 MAP(imm_v64_shl_n_16<8>),
263 MAP(imm_v64_shl_n_16<10>),
264 MAP(imm_v64_shl_n_16<12>),
265 MAP(imm_v64_shl_n_16<14>),
266 MAP(imm_v64_shr_n_u16<1>),
267 MAP(imm_v64_shr_n_u16<2>),
268 MAP(imm_v64_shr_n_u16<4>),
269 MAP(imm_v64_shr_n_u16<6>),
270 MAP(imm_v64_shr_n_u16<8>),
271 MAP(imm_v64_shr_n_u16<10>),
272 MAP(imm_v64_shr_n_u16<12>),
273 MAP(imm_v64_shr_n_u16<14>),
274 MAP(imm_v64_shr_n_s16<1>),
275 MAP(imm_v64_shr_n_s16<2>),
276 MAP(imm_v64_shr_n_s16<4>),
277 MAP(imm_v64_shr_n_s16<6>),
278 MAP(imm_v64_shr_n_s16<8>),
279 MAP(imm_v64_shr_n_s16<10>),
280 MAP(imm_v64_shr_n_s16<12>),
281 MAP(imm_v64_shr_n_s16<14>),
282 MAP(imm_v64_shl_n_32<1>),
283 MAP(imm_v64_shl_n_32<4>),
284 MAP(imm_v64_shl_n_32<8>),
285 MAP(imm_v64_shl_n_32<12>),
286 MAP(imm_v64_shl_n_32<16>),
287 MAP(imm_v64_shl_n_32<20>),
288 MAP(imm_v64_shl_n_32<24>),
289 MAP(imm_v64_shl_n_32<28>),
290 MAP(imm_v64_shr_n_u32<1>),
291 MAP(imm_v64_shr_n_u32<4>),
292 MAP(imm_v64_shr_n_u32<8>),
293 MAP(imm_v64_shr_n_u32<12>),
294 MAP(imm_v64_shr_n_u32<16>),
295 MAP(imm_v64_shr_n_u32<20>),
296 MAP(imm_v64_shr_n_u32<24>),
297 MAP(imm_v64_shr_n_u32<28>),
298 MAP(imm_v64_shr_n_s32<1>),
299 MAP(imm_v64_shr_n_s32<4>),
300 MAP(imm_v64_shr_n_s32<8>),
301 MAP(imm_v64_shr_n_s32<12>),
302 MAP(imm_v64_shr_n_s32<16>),
303 MAP(imm_v64_shr_n_s32<20>),
304 MAP(imm_v64_shr_n_s32<24>),
305 MAP(imm_v64_shr_n_s32<28>),
306 MAP(v64_shl_8),
307 MAP(v64_shr_u8),
308 MAP(v64_shr_s8),
309 MAP(v64_shl_16),
310 MAP(v64_shr_u16),
311 MAP(v64_shr_s16),
312 MAP(v64_shl_32),
313 MAP(v64_shr_u32),
314 MAP(v64_shr_s32),
315 MAP(v64_hadd_u8),
316 MAP(v64_hadd_s16),
317 MAP(v64_dotp_s16),
318 { NULL, NULL, NULL } };
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100319#undef MAP
320
321// Map reference functions to machine tuned functions. Since the
322// functions depend on machine tuned types, the non-machine tuned
323// instantiations of the test can't refer to these functions directly,
324// so we refer to them by name and do the mapping here.
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100325void Map(const char *name, fptr *ref, fptr *simd) {
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100326 unsigned int i;
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100327 for (i = 0; m[i].name && strcmp(name, m[i].name); i++) {
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100328 }
329
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100330 *ref = m[i].ref;
331 *simd = m[i].simd;
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100332}
333
334// Used for printing errors in TestSimd1Arg and TestSimd2Args
335std::string Print(const uint8_t *a, int size) {
336 std::string text = "0x";
337 for (int i = 0; i < size; i++) {
Steinar Midtskogen03ab5272017-01-10 07:30:47 +0100338 const uint8_t c = a[!CONFIG_BIG_ENDIAN ? size - 1 - i : i];
339 // Same as snprintf(..., ..., "%02x", c)
340 text += (c >> 4) + '0' + ((c >> 4) > 9) * ('a' - '0' - 10);
341 text += (c & 15) + '0' + ((c & 15) > 9) * ('a' - '0' - 10);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100342 }
343
344 return text;
345}
346
347// Used in TestSimd1Arg and TestSimd2Args to restrict argument ranges
348void SetMask(uint8_t *s, int size, uint32_t mask, uint32_t maskwidth) {
349 switch (maskwidth) {
350 case 0: {
351 break;
352 }
353 case 8: {
354 for (int i = 0; i < size; i++) s[i] &= mask;
355 break;
356 }
357 case 16: {
358 uint16_t *t = reinterpret_cast<uint16_t *>(s);
359 assert(!(reinterpret_cast<uintptr_t>(s) & 1));
360 for (int i = 0; i < size / 2; i++) t[i] &= mask;
361 break;
362 }
363 case 32: {
364 uint32_t *t = reinterpret_cast<uint32_t *>(s);
365 assert(!(reinterpret_cast<uintptr_t>(s) & 3));
366 for (int i = 0; i < size / 4; i++) t[i] &= mask;
367 break;
368 }
369 case 64: {
370 uint64_t *t = reinterpret_cast<uint64_t *>(s);
371 assert(!(reinterpret_cast<uintptr_t>(s) & 7));
372 for (int i = 0; i < size / 8; i++) t[i] &= mask;
373 break;
374 }
375 default: {
376 FAIL() << "Unsupported mask width";
377 break;
378 }
379 }
380}
381
382// We need a store function for uint64_t
383void u64_store_aligned(void *p, uint64_t a) {
384 v64_store_aligned(p, v64_from_64(a));
385}
386
387void c_u64_store_aligned(void *p, uint64_t a) {
388 c_v64_store_aligned(p, c_v64_from_64(a));
389}
390
391// CompareSimd1Arg and CompareSimd2Args compare intrinsics taking 1 or
392// 2 arguments respectively with their corresponding C reference.
393// Ideally, the loads and stores should have gone into the template
394// parameter list, but v64 and v128 could be typedef'ed to the same
395// type (which is the case on x86) and then we can't instantiate both
396// v64 and v128, so the function return and argument types, including
397// the always differing types in the C equivalent are used instead.
398// The function arguments must be void pointers and then go through a
399// cast to avoid matching errors in the branches eliminated by the
400// typeid tests in the calling function.
401template <typename Ret, typename Arg, typename CRet, typename CArg>
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100402int CompareSimd1Arg(fptr store, fptr load, fptr simd, void *d, fptr c_store,
403 fptr c_load, fptr c_simd, void *ref_d, const void *a) {
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100404 void (*const my_store)(void *, Ret) = (void (*const)(void *, Ret))store;
405 Arg (*const my_load)(const void *) = (Arg(*const)(const void *))load;
406 Ret (*const my_simd)(Arg) = (Ret(*const)(Arg))simd;
407 void (*const my_c_store)(void *, CRet) = (void (*const)(void *, CRet))c_store;
408 CArg (*const my_c_load)(const void *) = (CArg(*const)(const void *))c_load;
409 CRet (*const my_c_simd)(CArg) = (CRet(*const)(CArg))c_simd;
410
411 // Call reference and intrinsic
412 ASM_REGISTER_STATE_CHECK(my_c_store(ref_d, my_c_simd(my_c_load(a))));
413 ASM_REGISTER_STATE_CHECK(my_store(d, my_simd(my_load(a))));
414
415 // Compare results
416 return memcmp(ref_d, d, sizeof(CRet));
417}
418
419template <typename Ret, typename Arg1, typename Arg2, typename CRet,
420 typename CArg1, typename CArg2>
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100421int CompareSimd2Args(fptr store, fptr load1, fptr load2, fptr simd, void *d,
422 fptr c_store, fptr c_load1, fptr c_load2, fptr c_simd,
423 void *ref_d, const void *a, const void *b) {
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100424 void (*const my_store)(void *, Ret) = (void (*const)(void *, Ret))store;
425 Arg1 (*const my_load1)(const void *) = (Arg1(*const)(const void *))load1;
426 Arg2 (*const my_load2)(const void *) = (Arg2(*const)(const void *))load2;
427 Ret (*const my_simd)(Arg1, Arg2) = (Ret(*const)(Arg1, Arg2))simd;
428 void (*const my_c_store)(void *, CRet) = (void (*const)(void *, CRet))c_store;
429 CArg1 (*const my_c_load1)(const void *) =
430 (CArg1(*const)(const void *))c_load1;
431 CArg2 (*const my_c_load2)(const void *) =
432 (CArg2(*const)(const void *))c_load2;
433 CRet (*const my_c_simd)(CArg1, CArg2) = (CRet(*const)(CArg1, CArg2))c_simd;
434
435 // Call reference and intrinsic
436 ASM_REGISTER_STATE_CHECK(
437 my_c_store(ref_d, my_c_simd(my_c_load1(a), my_c_load2(b))));
438 ASM_REGISTER_STATE_CHECK(my_store(d, my_simd(my_load1(a), my_load2(b))));
439
440 // Compare results
441 return memcmp(ref_d, d, sizeof(CRet));
442}
443
444template <typename CRet, typename CArg>
445void TestSimd1Arg(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
446 const char *name) {
447 ACMRandom rnd(ACMRandom::DeterministicSeed());
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100448 fptr ref_simd;
449 fptr simd;
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100450 int error = 0;
Steinar Midtskogen03ab5272017-01-10 07:30:47 +0100451 DECLARE_ALIGNED(32, uint16_t, s[sizeof(CArg) / sizeof(uint16_t)]);
452 DECLARE_ALIGNED(32, uint8_t, d[sizeof(CRet)]);
453 DECLARE_ALIGNED(32, uint8_t, ref_d[sizeof(CRet)]);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100454 memset(ref_d, 0, sizeof(ref_d));
455 memset(d, 0, sizeof(d));
456
457 Map(name, &ref_simd, &simd);
458 if (simd == NULL || ref_simd == NULL) {
459 FAIL() << "Internal error: Unknown intrinsic function " << name;
460 }
461
462 for (unsigned int count = 0; count < iterations && !error; count++) {
463 for (unsigned int c = 0; c < sizeof(CArg) / sizeof(uint16_t); c++)
464 s[c] = rnd.Rand16();
465
466 if (maskwidth) {
467 SetMask(reinterpret_cast<uint8_t *>(s), sizeof(CArg), mask, maskwidth);
468 }
469
470 if (typeid(CRet) == typeid(c_v64) && typeid(CArg) == typeid(c_v64)) {
471 // V64_V64
472 error = CompareSimd1Arg<v64, v64, CRet, CArg>(
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100473 reinterpret_cast<fptr>(v64_store_aligned),
474 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
475 reinterpret_cast<fptr>(c_v64_store_aligned),
476 reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100477 } else if (typeid(CRet) == typeid(uint64_t) &&
478 typeid(CArg) == typeid(c_v64)) {
479 // U64_V64
480 error = CompareSimd1Arg<uint64_t, v64, CRet, CArg>(
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100481 reinterpret_cast<fptr>(u64_store_aligned),
482 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
483 reinterpret_cast<fptr>(c_v64_store_aligned),
484 reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100485 } else if (typeid(CRet) == typeid(int64_t) &&
486 typeid(CArg) == typeid(c_v64)) {
487 // S64_V64
488 error = CompareSimd1Arg<int64_t, v64, CRet, CArg>(
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100489 reinterpret_cast<fptr>(u64_store_aligned),
490 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
491 reinterpret_cast<fptr>(c_v64_store_aligned),
492 reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100493 } else {
494 FAIL() << "Internal error: Unknown intrinsic function "
495 << typeid(CRet).name() << " " << name << "(" << typeid(CArg).name()
496 << ")";
497 }
498 }
499
500 EXPECT_EQ(0, error) << "Error: mismatch for " << name << "("
501 << Print((uint8_t *)s, sizeof(s)) << ") -> "
502 << Print(d, sizeof(d)) << " (simd), "
503 << Print(ref_d, sizeof(ref_d)) << " (ref)";
504}
505
506template <typename CRet, typename CArg1, typename CArg2>
507void TestSimd2Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
508 const char *name) {
509 ACMRandom rnd(ACMRandom::DeterministicSeed());
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100510 fptr ref_simd;
511 fptr simd;
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100512 int error = 0;
Steinar Midtskogen03ab5272017-01-10 07:30:47 +0100513 DECLARE_ALIGNED(32, uint16_t, s1[sizeof(CArg1) / sizeof(uint16_t)]);
514 DECLARE_ALIGNED(32, uint16_t, s2[sizeof(CArg2) / sizeof(uint16_t)]);
515 DECLARE_ALIGNED(32, uint8_t, d[sizeof(CRet)]);
516 DECLARE_ALIGNED(32, uint8_t, ref_d[sizeof(CRet)]);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100517 memset(ref_d, 0, sizeof(ref_d));
518 memset(d, 0, sizeof(d));
519
520 Map(name, &ref_simd, &simd);
521 if (simd == NULL || ref_simd == NULL) {
522 FAIL() << "Internal error: Unknown intrinsic function " << name;
523 }
524
525 for (unsigned int count = 0; count < iterations && !error; count++) {
526 for (unsigned int c = 0; c < sizeof(CArg1) / sizeof(uint16_t); c++)
527 s1[c] = rnd.Rand16();
528
529 for (unsigned int c = 0; c < sizeof(CArg2) / sizeof(uint16_t); c++)
530 s2[c] = rnd.Rand16();
531
532 if (maskwidth)
533 SetMask(reinterpret_cast<uint8_t *>(s2), sizeof(CArg2), mask, maskwidth);
534
535 if (typeid(CRet) == typeid(c_v64) && typeid(CArg1) == typeid(c_v64) &&
536 typeid(CArg2) == typeid(c_v64)) {
537 // V64_V64V64
538 error = CompareSimd2Args<v64, v64, v64, CRet, CArg1, CArg2>(
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100539 reinterpret_cast<fptr>(v64_store_aligned),
540 reinterpret_cast<fptr>(v64_load_aligned),
541 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
542 reinterpret_cast<fptr>(c_v64_store_aligned),
543 reinterpret_cast<fptr>(c_v64_load_aligned),
544 reinterpret_cast<fptr>(c_v64_load_aligned),
545 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100546 } else if (typeid(CRet) == typeid(uint32_t) &&
547 typeid(CArg1) == typeid(c_v64) &&
548 typeid(CArg2) == typeid(c_v64)) {
549 // U32_V64V64
550 error = CompareSimd2Args<uint32_t, v64, v64, CRet, CArg1, CArg2>(
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100551 reinterpret_cast<fptr>(u32_store_aligned),
552 reinterpret_cast<fptr>(v64_load_aligned),
553 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
554 reinterpret_cast<fptr>(c_u32_store_aligned),
555 reinterpret_cast<fptr>(c_v64_load_aligned),
556 reinterpret_cast<fptr>(c_v64_load_aligned),
557 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100558 } else if (typeid(CRet) == typeid(int64_t) &&
559 typeid(CArg1) == typeid(c_v64) &&
560 typeid(CArg2) == typeid(c_v64)) {
561 // S64_V64V64
562 error = CompareSimd2Args<int64_t, v64, v64, CRet, CArg1, CArg2>(
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100563 reinterpret_cast<fptr>(u64_store_aligned),
564 reinterpret_cast<fptr>(v64_load_aligned),
565 reinterpret_cast<fptr>(v64_load_aligned), simd, d,
566 reinterpret_cast<fptr>(c_u64_store_aligned),
567 reinterpret_cast<fptr>(c_v64_load_aligned),
568 reinterpret_cast<fptr>(c_v64_load_aligned),
569 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100570 } else if (typeid(CRet) == typeid(c_v64) &&
571 typeid(CArg1) == typeid(c_v64) &&
572 typeid(CArg2) == typeid(uint32_t)) {
573 // V64_V64U32
574 error = CompareSimd2Args<v64, v64, uint32_t, CRet, CArg1, CArg2>(
Steinar Midtskogen8b28d862017-01-09 11:33:20 +0100575 reinterpret_cast<fptr>(v64_store_aligned),
576 reinterpret_cast<fptr>(v64_load_aligned),
577 reinterpret_cast<fptr>(u32_load_aligned), simd, d,
578 reinterpret_cast<fptr>(c_v64_store_aligned),
579 reinterpret_cast<fptr>(c_v64_load_aligned),
580 reinterpret_cast<fptr>(c_u32_load_aligned),
581 reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
Steinar Midtskogenfb1425f2016-11-23 09:33:16 +0100582 } else {
583 FAIL() << "Internal error: Unknown intrinsic function "
584 << typeid(CRet).name() << " " << name << "("
585 << typeid(CArg1).name() << ", " << typeid(CArg2).name() << ")";
586 }
587 }
588
589 EXPECT_EQ(0, error) << "Error: mismatch for " << name << "("
590 << Print(reinterpret_cast<uint8_t *>(s1), sizeof(s1))
591 << ", "
592 << Print(reinterpret_cast<uint8_t *>(s2), sizeof(s2))
593 << ") -> " << Print(d, sizeof(d)) << " (simd), "
594 << Print(ref_d, sizeof(ref_d)) << " (ref)";
595}
596
597// Instantiations to make the functions callable from another files
598template void TestSimd1Arg<c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
599 const char *);
600template void TestSimd1Arg<int64_t, c_v64>(uint32_t, uint32_t, uint32_t,
601 const char *);
602template void TestSimd1Arg<uint64_t, c_v64>(uint32_t, uint32_t, uint32_t,
603 const char *);
604template void TestSimd2Args<c_v64, c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
605 const char *);
606template void TestSimd2Args<c_v64, c_v64, uint32_t>(uint32_t, uint32_t,
607 uint32_t, const char *);
608template void TestSimd2Args<int64_t, c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
609 const char *);
610template void TestSimd2Args<uint32_t, c_v64, c_v64>(uint32_t, uint32_t,
611 uint32_t, const char *);
612
613} // namespace
614} // namespace SIMD_NAMESPACE