blob: ebab19655854d78c20132faffeed3b1d7f0aac7c [file] [log] [blame]
Steinar Midtskogen59782122017-07-20 08:49:43 +02001/*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Johann123e8a62017-12-28 14:40:49 -080010 */
Steinar Midtskogen59782122017-07-20 08:49:43 +020011
12#include <cstdlib>
13#include <string>
14
15#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
16
17#include "./aom_config.h"
18#include "./av1_rtcd.h"
19#include "aom_ports/aom_timer.h"
20#include "av1/common/cdef_block.h"
21#include "test/acm_random.h"
22#include "test/clear_system_state.h"
23#include "test/register_state_check.h"
24#include "test/util.h"
25
26using libaom_test::ACMRandom;
27
28namespace {
29
30typedef std::tr1::tuple<cdef_filter_block_func, cdef_filter_block_func, int>
31 cdef_dir_param_t;
32
33class CDEFBlockTest : public ::testing::TestWithParam<cdef_dir_param_t> {
34 public:
35 virtual ~CDEFBlockTest() {}
36 virtual void SetUp() {
37 cdef = GET_PARAM(0);
38 ref_cdef = GET_PARAM(1);
39 bsize = GET_PARAM(2);
40 }
41
42 virtual void TearDown() { libaom_test::ClearSystemState(); }
43
44 protected:
45 int bsize;
46 cdef_filter_block_func cdef;
47 cdef_filter_block_func ref_cdef;
48};
49
50typedef CDEFBlockTest CDEFSpeedTest;
51
52void test_cdef(int bsize, int iterations, cdef_filter_block_func cdef,
53 cdef_filter_block_func ref_cdef) {
54 const int size = 8;
55 const int ysize = size + 2 * CDEF_VBORDER;
56 ACMRandom rnd(ACMRandom::DeterministicSeed());
57 DECLARE_ALIGNED(16, uint16_t, s[ysize * CDEF_BSTRIDE]);
58 DECLARE_ALIGNED(16, static uint16_t, d[size * size]);
59 DECLARE_ALIGNED(16, static uint16_t, ref_d[size * size]);
60 memset(ref_d, 0, sizeof(ref_d));
61 memset(d, 0, sizeof(d));
62
63 int error = 0, pristrength = 0, secstrength, dir;
64 int boundary, pridamping, secdamping, depth, bits, level, count,
65 errdepth = 0, errpristrength = 0, errsecstrength = 0, errboundary = 0,
66 errpridamping = 0, errsecdamping = 0;
67 unsigned int pos = 0;
68
69 for (boundary = 0; boundary < 16; boundary++) {
70 for (depth = 8; depth <= 12; depth += 2) {
James Zern9feda792017-11-03 20:14:46 -070071 const unsigned int max_pos = size * size >> static_cast<int>(depth == 8);
Steinar Midtskogen59782122017-07-20 08:49:43 +020072 for (pridamping = 3 + depth - 8;
73 pridamping < 7 - 3 * !!boundary + depth - 8; pridamping++) {
74 for (secdamping = 3 + depth - 8;
75 secdamping < 7 - 3 * !!boundary + depth - 8; secdamping++) {
76 for (count = 0; count < iterations; count++) {
77 for (level = 0; level < (1 << depth) && !error;
78 level += (2 + 6 * !!boundary) << (depth - 8)) {
79 for (bits = 1; bits <= depth && !error;
80 bits += 1 + 3 * !!boundary) {
81 for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
82 s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
83 (1 << depth) - 1);
84 if (boundary) {
85 if (boundary & 1) { // Left
86 for (int i = 0; i < ysize; i++)
87 for (int j = 0; j < CDEF_HBORDER; j++)
88 s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
89 }
90 if (boundary & 2) { // Right
91 for (int i = 0; i < ysize; i++)
92 for (int j = CDEF_HBORDER + size; j < CDEF_BSTRIDE; j++)
93 s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
94 }
95 if (boundary & 4) { // Above
96 for (int i = 0; i < CDEF_VBORDER; i++)
97 for (int j = 0; j < CDEF_BSTRIDE; j++)
98 s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
99 }
100 if (boundary & 8) { // Below
101 for (int i = CDEF_VBORDER + size; i < ysize; i++)
102 for (int j = 0; j < CDEF_BSTRIDE; j++)
103 s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
104 }
105 }
106 for (dir = 0; dir < 8; dir++) {
107 for (pristrength = 0;
108 pristrength <= 19 << (depth - 8) && !error;
109 pristrength += (1 + 4 * !!boundary) << (depth - 8)) {
110 if (pristrength == 16) pristrength = 19;
111 for (secstrength = 0;
112 secstrength <= 4 << (depth - 8) && !error;
113 secstrength += 1 << (depth - 8)) {
114 if (secstrength == 3 << (depth - 8)) continue;
115 ref_cdef(depth == 8 ? (uint8_t *)ref_d : 0, ref_d, size,
116 s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
117 pristrength, secstrength, dir, pridamping,
Steinar Midtskogenbc753052017-10-23 12:32:10 +0200118 secdamping, bsize, (1 << depth) - 1, depth - 8);
Steinar Midtskogen59782122017-07-20 08:49:43 +0200119 // If cdef and ref_cdef are the same, we're just testing
120 // speed
121 if (cdef != ref_cdef)
Steinar Midtskogenbc753052017-10-23 12:32:10 +0200122 ASM_REGISTER_STATE_CHECK(cdef(
123 depth == 8 ? (uint8_t *)d : 0, d, size,
124 s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
125 pristrength, secstrength, dir, pridamping,
126 secdamping, bsize, (1 << depth) - 1, depth - 8));
Steinar Midtskogen59782122017-07-20 08:49:43 +0200127 if (ref_cdef != cdef) {
Rupert Swarbricke5442922017-10-09 17:23:00 +0100128 for (pos = 0; pos < max_pos && !error; pos++) {
Steinar Midtskogen59782122017-07-20 08:49:43 +0200129 error = ref_d[pos] != d[pos];
130 errdepth = depth;
131 errpristrength = pristrength;
132 errsecstrength = secstrength;
133 errboundary = boundary;
134 errpridamping = pridamping;
135 errsecdamping = secdamping;
136 }
137 }
138 }
139 }
140 }
141 }
142 }
143 }
144 }
145 }
146 }
147 }
148 pos--;
149 EXPECT_EQ(0, error) << "Error: CDEFBlockTest, SIMD and C mismatch."
150 << std::endl
151 << "First error at " << pos % size << "," << pos / size
152 << " (" << (int16_t)ref_d[pos] << " : " << (int16_t)d[pos]
153 << ") " << std::endl
154 << "pristrength: " << errpristrength << std::endl
155 << "pridamping: " << errpridamping << std::endl
156 << "secstrength: " << errsecstrength << std::endl
157 << "secdamping: " << errsecdamping << std::endl
158 << "depth: " << errdepth << std::endl
159 << "size: " << bsize << std::endl
160 << "boundary: " << errboundary << std::endl
161 << std::endl;
162}
163
164void test_cdef_speed(int bsize, int iterations, cdef_filter_block_func cdef,
165 cdef_filter_block_func ref_cdef) {
166 aom_usec_timer ref_timer;
167 aom_usec_timer timer;
168
169 aom_usec_timer_start(&ref_timer);
170 test_cdef(bsize, iterations, ref_cdef, ref_cdef);
171 aom_usec_timer_mark(&ref_timer);
172 int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
173
174 aom_usec_timer_start(&timer);
175 test_cdef(bsize, iterations, cdef, cdef);
176 aom_usec_timer_mark(&timer);
177 int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
178
179#if 0
180 std::cout << "[ ] C time = " << ref_elapsed_time / 1000
181 << " ms, SIMD time = " << elapsed_time / 1000 << " ms" << std::endl;
182#endif
183
184 EXPECT_GT(ref_elapsed_time, elapsed_time)
185 << "Error: CDEFSpeedTest, SIMD slower than C." << std::endl
186 << "C time: " << ref_elapsed_time << " us" << std::endl
187 << "SIMD time: " << elapsed_time << " us" << std::endl;
188}
189
190typedef int (*find_dir_t)(const uint16_t *img, int stride, int32_t *var,
191 int coeff_shift);
192
193typedef std::tr1::tuple<find_dir_t, find_dir_t> find_dir_param_t;
194
195class CDEFFindDirTest : public ::testing::TestWithParam<find_dir_param_t> {
196 public:
197 virtual ~CDEFFindDirTest() {}
198 virtual void SetUp() {
199 finddir = GET_PARAM(0);
200 ref_finddir = GET_PARAM(1);
201 }
202
203 virtual void TearDown() { libaom_test::ClearSystemState(); }
204
205 protected:
206 find_dir_t finddir;
207 find_dir_t ref_finddir;
208};
209
210typedef CDEFFindDirTest CDEFFindDirSpeedTest;
211
212void test_finddir(int (*finddir)(const uint16_t *img, int stride, int32_t *var,
213 int coeff_shift),
214 int (*ref_finddir)(const uint16_t *img, int stride,
215 int32_t *var, int coeff_shift)) {
216 const int size = 8;
217 ACMRandom rnd(ACMRandom::DeterministicSeed());
218 DECLARE_ALIGNED(16, uint16_t, s[size * size]);
219
220 int error = 0;
221 int depth, bits, level, count, errdepth = 0;
222 int ref_res = 0, res = 0;
223 int32_t ref_var = 0, var = 0;
224
225 for (depth = 8; depth <= 12 && !error; depth += 2) {
226 for (count = 0; count < 512 && !error; count++) {
227 for (level = 0; level < (1 << depth) && !error;
228 level += 1 << (depth - 8)) {
229 for (bits = 1; bits <= depth && !error; bits++) {
230 for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
231 s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
232 (1 << depth) - 1);
233 for (int c = 0; c < 1 + 9 * (finddir == ref_finddir); c++)
234 ref_res = ref_finddir(s, size, &ref_var, depth - 8);
235 if (finddir != ref_finddir)
236 ASM_REGISTER_STATE_CHECK(res = finddir(s, size, &var, depth - 8));
237 if (ref_finddir != finddir) {
238 if (res != ref_res || var != ref_var) error = 1;
239 errdepth = depth;
240 }
241 }
242 }
243 }
244 }
245
246 EXPECT_EQ(0, error) << "Error: CDEFFindDirTest, SIMD and C mismatch."
247 << std::endl
248 << "return: " << res << " : " << ref_res << std::endl
249 << "var: " << var << " : " << ref_var << std::endl
250 << "depth: " << errdepth << std::endl
251 << std::endl;
252}
253
254void test_finddir_speed(int (*finddir)(const uint16_t *img, int stride,
255 int32_t *var, int coeff_shift),
256 int (*ref_finddir)(const uint16_t *img, int stride,
257 int32_t *var, int coeff_shift)) {
258 aom_usec_timer ref_timer;
259 aom_usec_timer timer;
260
261 aom_usec_timer_start(&ref_timer);
262 test_finddir(ref_finddir, ref_finddir);
263 aom_usec_timer_mark(&ref_timer);
264 int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
265
266 aom_usec_timer_start(&timer);
267 test_finddir(finddir, finddir);
268 aom_usec_timer_mark(&timer);
269 int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
270
271#if 0
272 std::cout << "[ ] C time = " << ref_elapsed_time / 1000
273 << " ms, SIMD time = " << elapsed_time / 1000 << " ms" << std::endl;
274#endif
275
276 EXPECT_GT(ref_elapsed_time, elapsed_time)
277 << "Error: CDEFFindDirSpeedTest, SIMD slower than C." << std::endl
278 << "C time: " << ref_elapsed_time << " us" << std::endl
279 << "SIMD time: " << elapsed_time << " us" << std::endl;
280}
281
282TEST_P(CDEFBlockTest, TestSIMDNoMismatch) {
283 test_cdef(bsize, 1, cdef, ref_cdef);
284}
285
286TEST_P(CDEFSpeedTest, DISABLED_TestSpeed) {
287 test_cdef_speed(bsize, 4, cdef, ref_cdef);
288}
289
290TEST_P(CDEFFindDirTest, TestSIMDNoMismatch) {
291 test_finddir(finddir, ref_finddir);
292}
293
294TEST_P(CDEFFindDirSpeedTest, DISABLED_TestSpeed) {
295 test_finddir_speed(finddir, ref_finddir);
296}
297
298using std::tr1::make_tuple;
299
300// VS compiling for 32 bit targets does not support vector types in
301// structs as arguments, which makes the v256 type of the intrinsics
302// hard to support, so optimizations for this target are disabled.
303#if defined(_WIN64) || !defined(_MSC_VER) || defined(__clang__)
304#if HAVE_SSE2
305INSTANTIATE_TEST_CASE_P(
306 SSE2, CDEFBlockTest,
307 ::testing::Values(
308 make_tuple(&cdef_filter_block_sse2, &cdef_filter_block_c, BLOCK_4X4),
Steinar Midtskogenab6c9c72017-11-30 13:38:26 +0100309 make_tuple(&cdef_filter_block_sse2, &cdef_filter_block_c, BLOCK_8X4),
310 make_tuple(&cdef_filter_block_sse2, &cdef_filter_block_c, BLOCK_4X8),
Steinar Midtskogen59782122017-07-20 08:49:43 +0200311 make_tuple(&cdef_filter_block_sse2, &cdef_filter_block_c, BLOCK_8X8)));
312INSTANTIATE_TEST_CASE_P(SSE2, CDEFFindDirTest,
313 ::testing::Values(make_tuple(&cdef_find_dir_sse2,
314 &cdef_find_dir_c)));
315#endif
316#if HAVE_SSSE3
317INSTANTIATE_TEST_CASE_P(
318 SSSE3, CDEFBlockTest,
319 ::testing::Values(
320 make_tuple(&cdef_filter_block_ssse3, &cdef_filter_block_c, BLOCK_4X4),
Steinar Midtskogenab6c9c72017-11-30 13:38:26 +0100321 make_tuple(&cdef_filter_block_ssse3, &cdef_filter_block_c, BLOCK_8X4),
Steinar Midtskogen44a6c332017-12-20 15:59:00 +0100322 make_tuple(&cdef_filter_block_ssse3, &cdef_filter_block_c, BLOCK_4X8),
Steinar Midtskogen59782122017-07-20 08:49:43 +0200323 make_tuple(&cdef_filter_block_ssse3, &cdef_filter_block_c, BLOCK_8X8)));
324INSTANTIATE_TEST_CASE_P(SSSE3, CDEFFindDirTest,
325 ::testing::Values(make_tuple(&cdef_find_dir_ssse3,
326 &cdef_find_dir_c)));
327#endif
328
329#if HAVE_SSE4_1
330INSTANTIATE_TEST_CASE_P(
331 SSE4_1, CDEFBlockTest,
Steinar Midtskogen44a6c332017-12-20 15:59:00 +0100332 ::testing::Values(
333 make_tuple(&cdef_filter_block_sse4_1, &cdef_filter_block_c, BLOCK_4X8),
334 make_tuple(&cdef_filter_block_sse4_1, &cdef_filter_block_c, BLOCK_8X4),
335 make_tuple(&cdef_filter_block_sse4_1, &cdef_filter_block_c, BLOCK_4X8),
336 make_tuple(&cdef_filter_block_sse4_1, &cdef_filter_block_c,
337 BLOCK_8X8)));
Steinar Midtskogen59782122017-07-20 08:49:43 +0200338INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFFindDirTest,
339 ::testing::Values(make_tuple(&cdef_find_dir_sse4_1,
340 &cdef_find_dir_c)));
341#endif
342
343#if HAVE_AVX2
344INSTANTIATE_TEST_CASE_P(
345 AVX2, CDEFBlockTest,
346 ::testing::Values(
347 make_tuple(&cdef_filter_block_avx2, &cdef_filter_block_c, BLOCK_4X4),
Steinar Midtskogenab6c9c72017-11-30 13:38:26 +0100348 make_tuple(&cdef_filter_block_avx2, &cdef_filter_block_c, BLOCK_8X4),
349 make_tuple(&cdef_filter_block_avx2, &cdef_filter_block_c, BLOCK_4X8),
Steinar Midtskogen59782122017-07-20 08:49:43 +0200350 make_tuple(&cdef_filter_block_avx2, &cdef_filter_block_c, BLOCK_8X8)));
351INSTANTIATE_TEST_CASE_P(AVX2, CDEFFindDirTest,
352 ::testing::Values(make_tuple(&cdef_find_dir_avx2,
353 &cdef_find_dir_c)));
354#endif
355
356#if HAVE_NEON
357INSTANTIATE_TEST_CASE_P(
358 NEON, CDEFBlockTest,
359 ::testing::Values(
360 make_tuple(&cdef_filter_block_neon, &cdef_filter_block_c, BLOCK_4X4),
Steinar Midtskogen44a6c332017-12-20 15:59:00 +0100361 make_tuple(&cdef_filter_block_neon, &cdef_filter_block_c, BLOCK_8X4),
362 make_tuple(&cdef_filter_block_neon, &cdef_filter_block_c, BLOCK_4X8),
Steinar Midtskogen59782122017-07-20 08:49:43 +0200363 make_tuple(&cdef_filter_block_neon, &cdef_filter_block_c, BLOCK_8X8)));
364INSTANTIATE_TEST_CASE_P(NEON, CDEFFindDirTest,
365 ::testing::Values(make_tuple(&cdef_find_dir_neon,
366 &cdef_find_dir_c)));
367#endif
368
369// Test speed for all supported architectures
370#if HAVE_SSE2
371INSTANTIATE_TEST_CASE_P(
372 SSE2, CDEFSpeedTest,
373 ::testing::Values(
374 make_tuple(&cdef_filter_block_sse2, &cdef_filter_block_c, BLOCK_4X4),
Steinar Midtskogenab6c9c72017-11-30 13:38:26 +0100375 make_tuple(&cdef_filter_block_sse2, &cdef_filter_block_c, BLOCK_8X4),
376 make_tuple(&cdef_filter_block_sse2, &cdef_filter_block_c, BLOCK_4X8),
Steinar Midtskogen59782122017-07-20 08:49:43 +0200377 make_tuple(&cdef_filter_block_sse2, &cdef_filter_block_c, BLOCK_8X8)));
378INSTANTIATE_TEST_CASE_P(SSE2, CDEFFindDirSpeedTest,
379 ::testing::Values(make_tuple(&cdef_find_dir_sse2,
380 &cdef_find_dir_c)));
381#endif
382
383#if HAVE_SSSE3
384INSTANTIATE_TEST_CASE_P(
385 SSSE3, CDEFSpeedTest,
386 ::testing::Values(
387 make_tuple(&cdef_filter_block_ssse3, &cdef_filter_block_c, BLOCK_4X4),
Steinar Midtskogenab6c9c72017-11-30 13:38:26 +0100388 make_tuple(&cdef_filter_block_ssse3, &cdef_filter_block_c, BLOCK_8X4),
389 make_tuple(&cdef_filter_block_ssse3, &cdef_filter_block_c, BLOCK_4X8),
Steinar Midtskogen59782122017-07-20 08:49:43 +0200390 make_tuple(&cdef_filter_block_ssse3, &cdef_filter_block_c, BLOCK_8X8)));
391INSTANTIATE_TEST_CASE_P(SSSE3, CDEFFindDirSpeedTest,
392 ::testing::Values(make_tuple(&cdef_find_dir_ssse3,
393 &cdef_find_dir_c)));
394#endif
395
396#if HAVE_SSE4_1
397INSTANTIATE_TEST_CASE_P(
398 SSE4_1, CDEFSpeedTest,
Steinar Midtskogenab6c9c72017-11-30 13:38:26 +0100399 ::testing::Values(
400 make_tuple(&cdef_filter_block_sse4_1, &cdef_filter_block_c, BLOCK_4X4),
401 make_tuple(&cdef_filter_block_sse4_1, &cdef_filter_block_c, BLOCK_8X4),
402 make_tuple(&cdef_filter_block_sse4_1, &cdef_filter_block_c, BLOCK_4X8),
403 make_tuple(&cdef_filter_block_sse4_1, &cdef_filter_block_c,
404 BLOCK_8X8)));
Steinar Midtskogen59782122017-07-20 08:49:43 +0200405INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFFindDirSpeedTest,
406 ::testing::Values(make_tuple(&cdef_find_dir_sse4_1,
407 &cdef_find_dir_c)));
408#endif
409
410#if HAVE_AVX2
411INSTANTIATE_TEST_CASE_P(
412 AVX2, CDEFSpeedTest,
413 ::testing::Values(
414 make_tuple(&cdef_filter_block_avx2, &cdef_filter_block_c, BLOCK_4X4),
Steinar Midtskogenab6c9c72017-11-30 13:38:26 +0100415 make_tuple(&cdef_filter_block_avx2, &cdef_filter_block_c, BLOCK_8X4),
416 make_tuple(&cdef_filter_block_avx2, &cdef_filter_block_c, BLOCK_4X8),
Steinar Midtskogen59782122017-07-20 08:49:43 +0200417 make_tuple(&cdef_filter_block_avx2, &cdef_filter_block_c, BLOCK_8X8)));
418INSTANTIATE_TEST_CASE_P(AVX2, CDEFFindDirSpeedTest,
419 ::testing::Values(make_tuple(&cdef_find_dir_avx2,
420 &cdef_find_dir_c)));
421#endif
422
423#if HAVE_NEON
424INSTANTIATE_TEST_CASE_P(
425 NEON, CDEFSpeedTest,
426 ::testing::Values(
427 make_tuple(&cdef_filter_block_neon, &cdef_filter_block_c, BLOCK_4X4),
Steinar Midtskogenab6c9c72017-11-30 13:38:26 +0100428 make_tuple(&cdef_filter_block_neon, &cdef_filter_block_c, BLOCK_8X4),
429 make_tuple(&cdef_filter_block_neon, &cdef_filter_block_c, BLOCK_4X8),
Steinar Midtskogen59782122017-07-20 08:49:43 +0200430 make_tuple(&cdef_filter_block_neon, &cdef_filter_block_c, BLOCK_8X8)));
431INSTANTIATE_TEST_CASE_P(NEON, CDEFFindDirSpeedTest,
432 ::testing::Values(make_tuple(&cdef_find_dir_neon,
433 &cdef_find_dir_c)));
434#endif
435
436#endif // defined(_WIN64) || !defined(_MSC_VER)
437} // namespace