blob: 1b795c4fcf9bc987602ea6a06ea5d189e763606e [file] [log] [blame]
Steinar Midtskogenbe668e92016-08-05 12:12:38 +02001/*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10*/
11
12#include <cstdlib>
13#include <string>
14
Tom Finegan7a07ece2017-02-07 17:14:05 -080015#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
Steinar Midtskogenbe668e92016-08-05 12:12:38 +020016
17#include "./aom_config.h"
18#include "./aom_dsp_rtcd.h"
19#include "aom_ports/aom_timer.h"
20#include "test/acm_random.h"
21#include "test/clear_system_state.h"
22#include "test/register_state_check.h"
23#include "test/util.h"
24
25using libaom_test::ACMRandom;
26
27namespace {
28
Steinar Midtskogene8224c72016-08-24 13:00:04 +020029typedef void (*clpf_block_t)(const uint8_t *src, uint8_t *dst, int sstride,
30 int dstride, int x0, int y0, int sizex, int sizey,
Steinar Midtskogend280a842017-03-21 09:59:14 +010031 unsigned int strength, unsigned int bitdepth);
Steinar Midtskogenbe668e92016-08-05 12:12:38 +020032
33typedef std::tr1::tuple<clpf_block_t, clpf_block_t, int, int>
34 clpf_block_param_t;
35
36class ClpfBlockTest : public ::testing::TestWithParam<clpf_block_param_t> {
37 public:
38 virtual ~ClpfBlockTest() {}
39 virtual void SetUp() {
40 clpf = GET_PARAM(0);
41 ref_clpf = GET_PARAM(1);
42 sizex = GET_PARAM(2);
43 sizey = GET_PARAM(3);
44 }
45
46 virtual void TearDown() { libaom_test::ClearSystemState(); }
47
48 protected:
49 int sizex;
50 int sizey;
51 clpf_block_t clpf;
52 clpf_block_t ref_clpf;
53};
54
55typedef ClpfBlockTest ClpfSpeedTest;
56
Steinar Midtskogen3dbd55a2016-09-09 15:23:35 +020057#if CONFIG_AOM_HIGHBITDEPTH
58typedef void (*clpf_block_hbd_t)(const uint16_t *src, uint16_t *dst,
59 int sstride, int dstride, int x0, int y0,
Steinar Midtskogen73ad5232017-01-30 14:39:07 +010060 int sizex, int sizey, unsigned int strength,
Steinar Midtskogend280a842017-03-21 09:59:14 +010061 unsigned int bitdepth);
Steinar Midtskogenbe668e92016-08-05 12:12:38 +020062
Steinar Midtskogen3dbd55a2016-09-09 15:23:35 +020063typedef std::tr1::tuple<clpf_block_hbd_t, clpf_block_hbd_t, int, int>
64 clpf_block_hbd_param_t;
65
66class ClpfBlockHbdTest
67 : public ::testing::TestWithParam<clpf_block_hbd_param_t> {
68 public:
69 virtual ~ClpfBlockHbdTest() {}
70 virtual void SetUp() {
71 clpf = GET_PARAM(0);
72 ref_clpf = GET_PARAM(1);
73 sizex = GET_PARAM(2);
74 sizey = GET_PARAM(3);
75 }
76
77 virtual void TearDown() { libaom_test::ClearSystemState(); }
78
79 protected:
80 int sizex;
81 int sizey;
82 clpf_block_hbd_t clpf;
83 clpf_block_hbd_t ref_clpf;
84};
85
86typedef ClpfBlockHbdTest ClpfHbdSpeedTest;
87#endif
88
89template <typename pixel>
90void test_clpf(int w, int h, int depth, int iterations,
91 void (*clpf)(const pixel *src, pixel *dst, int sstride,
92 int dstride, int x0, int y0, int sizex, int sizey,
Steinar Midtskogend280a842017-03-21 09:59:14 +010093 unsigned int strength, unsigned int bitdepth),
Steinar Midtskogen3dbd55a2016-09-09 15:23:35 +020094 void (*ref_clpf)(const pixel *src, pixel *dst, int sstride,
95 int dstride, int x0, int y0, int sizex,
Steinar Midtskogen73ad5232017-01-30 14:39:07 +010096 int sizey, unsigned int strength,
Steinar Midtskogend280a842017-03-21 09:59:14 +010097 unsigned int bitdepth)) {
Steinar Midtskogenb754ec32017-03-27 19:54:05 +020098 const int size = 40;
Steinar Midtskogen3dbd55a2016-09-09 15:23:35 +020099 ACMRandom rnd(ACMRandom::DeterministicSeed());
100 DECLARE_ALIGNED(16, pixel, s[size * size]);
101 DECLARE_ALIGNED(16, pixel, d[size * size]);
102 DECLARE_ALIGNED(16, pixel, ref_d[size * size]);
103 memset(ref_d, 0, size * size * sizeof(*ref_d));
104 memset(d, 0, size * size * sizeof(*d));
105
106 int error = 0, pos = 0, strength = 0, xpos = 0, ypos = 0;
107 int bits, level, count;
Steinar Midtskogenbe668e92016-08-05 12:12:38 +0200108
109 // Test every combination of:
Steinar Midtskogen3dbd55a2016-09-09 15:23:35 +0200110 // * Input with up to <depth> bits of noise
111 // * Noise level around every value from 0 to (1<<depth)-1
Jean-Marc Valin1f89d0f2017-03-23 20:17:47 -0400112 // * Blocks anywhere in the frame (but not on the edge)
Steinar Midtskogenbe668e92016-08-05 12:12:38 +0200113 // * All strengths
Steinar Midtskogen3dbd55a2016-09-09 15:23:35 +0200114 // If clpf and ref_clpf are the same, we're just testing speed
115 for (count = 0; count < iterations; count++) {
116 for (level = 0; level < (1 << depth) && !error; level++) {
117 for (bits = 1; bits <= depth && !error; bits++) {
118 for (int i = 0; i < size * size; i++)
119 s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
120 (1 << depth) - 1);
Steinar Midtskogenbe668e92016-08-05 12:12:38 +0200121
Steinar Midtskogenb754ec32017-03-27 19:54:05 +0200122 for (ypos = 8; ypos < size - h - 8 && !error; ypos += h * !error) {
123 for (xpos = 8; xpos < size - w - 8 && !error; xpos += w * !error) {
Steinar Midtskogen3dbd55a2016-09-09 15:23:35 +0200124 for (strength = depth - 8; strength < depth - 5 && !error;
125 strength += !error) {
Steinar Midtskogen73ad5232017-01-30 14:39:07 +0100126 ref_clpf(s, ref_d, size, size, xpos, ypos, w, h, 1 << strength,
Steinar Midtskogend280a842017-03-21 09:59:14 +0100127 depth);
Steinar Midtskogen3dbd55a2016-09-09 15:23:35 +0200128 if (clpf != ref_clpf)
129 ASM_REGISTER_STATE_CHECK(clpf(s, d, size, size, xpos, ypos, w,
Steinar Midtskogend280a842017-03-21 09:59:14 +0100130 h, 1 << strength, depth));
Steinar Midtskogen3dbd55a2016-09-09 15:23:35 +0200131 if (ref_clpf != clpf)
132 for (pos = 0; pos < size * size && !error; pos++) {
133 error = ref_d[pos] != d[pos];
134 }
Steinar Midtskogenbe668e92016-08-05 12:12:38 +0200135 }
136 }
137 }
138 }
139 }
140 }
141
Steinar Midtskogenee54e5f2016-09-09 17:30:21 +0200142 pos--;
Steinar Midtskogenbe668e92016-08-05 12:12:38 +0200143 EXPECT_EQ(0, error)
144 << "Error: ClpfBlockTest, SIMD and C mismatch." << std::endl
145 << "First error at " << pos % size << "," << pos / size << " ("
146 << (int16_t)ref_d[pos] << " != " << (int16_t)d[pos] << ") " << std::endl
147 << "strength: " << (1 << strength) << std::endl
148 << "xpos: " << xpos << std::endl
149 << "ypos: " << ypos << std::endl
Steinar Midtskogenecf9a0c2016-09-13 16:37:13 +0200150 << "w: " << w << std::endl
151 << "h: " << h << std::endl
Steinar Midtskogen4f0b3ed2017-02-08 18:48:07 +0100152 << "A=" << (pos > 2 * size ? (int16_t)s[pos - 2 * size] : -1) << std::endl
153 << "B=" << (pos > size ? (int16_t)s[pos - size] : -1) << std::endl
154 << "C=" << (pos % size - 2 >= 0 ? (int16_t)s[pos - 2] : -1) << std::endl
155 << "D=" << (pos % size - 1 >= 0 ? (int16_t)s[pos - 1] : -1) << std::endl
Steinar Midtskogenbe668e92016-08-05 12:12:38 +0200156 << "X=" << (int16_t)s[pos] << std::endl
Steinar Midtskogen4f0b3ed2017-02-08 18:48:07 +0100157 << "E=" << (pos % size + 1 < size ? (int16_t)s[pos + 1] : -1) << std::endl
158 << "F=" << (pos % size + 2 < size ? (int16_t)s[pos + 2] : -1) << std::endl
159 << "G=" << (pos + size < size * size ? (int16_t)s[pos + size] : -1)
160 << std::endl
161 << "H="
162 << (pos + 2 * size < size * size ? (int16_t)s[pos + 2 * size] : -1)
Steinar Midtskogenbe668e92016-08-05 12:12:38 +0200163 << std::endl;
164}
165
Steinar Midtskogen3dbd55a2016-09-09 15:23:35 +0200166template <typename pixel>
167void test_clpf_speed(int w, int h, int depth, int iterations,
168 void (*clpf)(const pixel *src, pixel *dst, int sstride,
169 int dstride, int x0, int y0, int sizex,
Steinar Midtskogen73ad5232017-01-30 14:39:07 +0100170 int sizey, unsigned int strength,
Steinar Midtskogend280a842017-03-21 09:59:14 +0100171 unsigned int bitdepth),
Steinar Midtskogen3dbd55a2016-09-09 15:23:35 +0200172 void (*ref_clpf)(const pixel *src, pixel *dst, int sstride,
173 int dstride, int x0, int y0, int sizex,
Steinar Midtskogen73ad5232017-01-30 14:39:07 +0100174 int sizey, unsigned int strength,
Steinar Midtskogen4f0b3ed2017-02-08 18:48:07 +0100175 unsigned int bitdepth)) {
Steinar Midtskogenbe668e92016-08-05 12:12:38 +0200176 aom_usec_timer ref_timer;
177 aom_usec_timer timer;
178
179 aom_usec_timer_start(&ref_timer);
Steinar Midtskogen3dbd55a2016-09-09 15:23:35 +0200180 test_clpf(w, h, depth, iterations, ref_clpf, ref_clpf);
Steinar Midtskogenbe668e92016-08-05 12:12:38 +0200181 aom_usec_timer_mark(&ref_timer);
Yaowu Xu1e487c32016-11-10 15:54:07 -0800182 int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
Steinar Midtskogenbe668e92016-08-05 12:12:38 +0200183
184 aom_usec_timer_start(&timer);
Steinar Midtskogen3dbd55a2016-09-09 15:23:35 +0200185 test_clpf(w, h, depth, iterations, clpf, clpf);
Steinar Midtskogenbe668e92016-08-05 12:12:38 +0200186 aom_usec_timer_mark(&timer);
Yaowu Xu1e487c32016-11-10 15:54:07 -0800187 int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
Steinar Midtskogenbe668e92016-08-05 12:12:38 +0200188
189#if 0
190 std::cout << "[ ] C time = " << ref_elapsed_time / 1000
191 << " ms, SIMD time = " << elapsed_time / 1000 << " ms" << std::endl;
192#endif
193
194 EXPECT_GT(ref_elapsed_time, elapsed_time)
195 << "Error: ClpfSpeedTest, SIMD slower than C." << std::endl
Steinar Midtskogen61161412016-09-26 21:48:09 +0200196 << "C time: " << ref_elapsed_time << " us" << std::endl
197 << "SIMD time: " << elapsed_time << " us" << std::endl;
Steinar Midtskogenbe668e92016-08-05 12:12:38 +0200198}
199
Steinar Midtskogen3dbd55a2016-09-09 15:23:35 +0200200TEST_P(ClpfBlockTest, TestSIMDNoMismatch) {
201 test_clpf(sizex, sizey, 8, 1, clpf, ref_clpf);
202}
203
204TEST_P(ClpfSpeedTest, TestSpeed) {
205 test_clpf_speed(sizex, sizey, 8, 16, clpf, ref_clpf);
206}
207
208#if CONFIG_AOM_HIGHBITDEPTH
209TEST_P(ClpfBlockHbdTest, TestSIMDNoMismatch) {
210 test_clpf(sizex, sizey, 12, 1, clpf, ref_clpf);
211}
212
213TEST_P(ClpfHbdSpeedTest, TestSpeed) {
214 test_clpf_speed(sizex, sizey, 12, 1, clpf, ref_clpf);
215}
216#endif
217
Steinar Midtskogenbe668e92016-08-05 12:12:38 +0200218using std::tr1::make_tuple;
219
Steinar Midtskogend954f2d2016-12-13 15:21:53 +0100220// VS compiling for 32 bit targets does not support vector types in
221// structs as arguments, which makes the v256 type of the intrinsics
222// hard to support, so optimizations for this target are disabled.
223#if defined(_WIN64) || !defined(_MSC_VER) || defined(__clang__)
Steinar Midtskogenbe668e92016-08-05 12:12:38 +0200224// Test all supported architectures and block sizes
225#if HAVE_SSE2
226INSTANTIATE_TEST_CASE_P(
227 SSE2, ClpfBlockTest,
228 ::testing::Values(make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 8, 8),
229 make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 8, 4),
230 make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 4, 8),
231 make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 4,
232 4)));
233#endif
234
235#if HAVE_SSSE3
236INSTANTIATE_TEST_CASE_P(
237 SSSE3, ClpfBlockTest,
238 ::testing::Values(
239 make_tuple(&aom_clpf_block_ssse3, &aom_clpf_block_c, 8, 8),
240 make_tuple(&aom_clpf_block_ssse3, &aom_clpf_block_c, 8, 4),
241 make_tuple(&aom_clpf_block_ssse3, &aom_clpf_block_c, 4, 8),
242 make_tuple(&aom_clpf_block_ssse3, &aom_clpf_block_c, 4, 4)));
243#endif
244
245#if HAVE_SSE4_1
246INSTANTIATE_TEST_CASE_P(
247 SSSE4_1, ClpfBlockTest,
248 ::testing::Values(
249 make_tuple(&aom_clpf_block_sse4_1, &aom_clpf_block_c, 8, 8),
250 make_tuple(&aom_clpf_block_sse4_1, &aom_clpf_block_c, 8, 4),
251 make_tuple(&aom_clpf_block_sse4_1, &aom_clpf_block_c, 4, 8),
252 make_tuple(&aom_clpf_block_sse4_1, &aom_clpf_block_c, 4, 4)));
253#endif
254
255#if HAVE_NEON
256INSTANTIATE_TEST_CASE_P(
257 NEON, ClpfBlockTest,
258 ::testing::Values(make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 8, 8),
259 make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 8, 4),
260 make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 4, 8),
261 make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 4,
262 4)));
263#endif
264
Steinar Midtskogen3dbd55a2016-09-09 15:23:35 +0200265#if CONFIG_AOM_HIGHBITDEPTH
266#if HAVE_SSE2
267INSTANTIATE_TEST_CASE_P(
268 SSE2, ClpfBlockHbdTest,
269 ::testing::Values(
270 make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 8, 8),
271 make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 8, 4),
272 make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 4, 8),
273 make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 4, 4)));
274#endif
275
276#if HAVE_SSSE3
277INSTANTIATE_TEST_CASE_P(
278 SSSE3, ClpfBlockHbdTest,
279 ::testing::Values(
280 make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 8, 8),
281 make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 8, 4),
282 make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 4, 8),
283 make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 4, 4)));
284#endif
285
286#if HAVE_SSE4_1
287INSTANTIATE_TEST_CASE_P(
288 SSSE4_1, ClpfBlockHbdTest,
289 ::testing::Values(
290 make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 8, 8),
291 make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 8, 4),
292 make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 4, 8),
293 make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 4, 4)));
294#endif
295
296#if HAVE_NEON
297INSTANTIATE_TEST_CASE_P(
298 NEON, ClpfBlockHbdTest,
299 ::testing::Values(
300 make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 8, 8),
301 make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 8, 4),
302 make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 4, 8),
303 make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 4, 4)));
304#endif
Steinar Midtskogend954f2d2016-12-13 15:21:53 +0100305#endif // CONFIG_AOM_HIGHBITDEPTH
Steinar Midtskogen3dbd55a2016-09-09 15:23:35 +0200306
Steinar Midtskogenbe668e92016-08-05 12:12:38 +0200307// Test speed for all supported architectures
308#if HAVE_SSE2
309INSTANTIATE_TEST_CASE_P(SSE2, ClpfSpeedTest,
310 ::testing::Values(make_tuple(&aom_clpf_block_sse2,
311 &aom_clpf_block_c, 8, 8)));
312#endif
313
314#if HAVE_SSSE3
315INSTANTIATE_TEST_CASE_P(SSSE3, ClpfSpeedTest,
316 ::testing::Values(make_tuple(&aom_clpf_block_ssse3,
317 &aom_clpf_block_c, 8, 8)));
318#endif
319
320#if HAVE_SSE4_1
321INSTANTIATE_TEST_CASE_P(SSSE4_1, ClpfSpeedTest,
Steinar Midtskogen929e8062017-03-26 14:32:35 +0200322 ::testing::Values(make_tuple(&aom_clpf_block_sse4_1,
Steinar Midtskogenbe668e92016-08-05 12:12:38 +0200323 &aom_clpf_block_c, 8, 8)));
324#endif
325
326#if HAVE_NEON
327INSTANTIATE_TEST_CASE_P(NEON, ClpfSpeedTest,
328 ::testing::Values(make_tuple(&aom_clpf_block_neon,
329 &aom_clpf_block_c, 8, 8)));
330#endif
Steinar Midtskogen3dbd55a2016-09-09 15:23:35 +0200331
332#if CONFIG_AOM_HIGHBITDEPTH
333#if HAVE_SSE2
334INSTANTIATE_TEST_CASE_P(SSE2, ClpfHbdSpeedTest,
335 ::testing::Values(make_tuple(&aom_clpf_block_hbd_sse2,
336 &aom_clpf_block_hbd_c, 8,
337 8)));
338#endif
339
340#if HAVE_SSSE3
341INSTANTIATE_TEST_CASE_P(SSSE3, ClpfHbdSpeedTest,
342 ::testing::Values(make_tuple(&aom_clpf_block_hbd_ssse3,
343 &aom_clpf_block_hbd_c, 8,
344 8)));
345#endif
346
347#if HAVE_SSE4_1
348INSTANTIATE_TEST_CASE_P(SSSE4_1, ClpfHbdSpeedTest,
Steinar Midtskogen929e8062017-03-26 14:32:35 +0200349 ::testing::Values(make_tuple(&aom_clpf_block_hbd_sse4_1,
Steinar Midtskogen3dbd55a2016-09-09 15:23:35 +0200350 &aom_clpf_block_hbd_c, 8,
351 8)));
352#endif
353
354#if HAVE_NEON
355INSTANTIATE_TEST_CASE_P(NEON, ClpfHbdSpeedTest,
356 ::testing::Values(make_tuple(&aom_clpf_block_hbd_neon,
357 &aom_clpf_block_hbd_c, 8,
358 8)));
359#endif
Steinar Midtskogend954f2d2016-12-13 15:21:53 +0100360#endif // CONFIG_AOM_HIGHBITDEPTH
361#endif // defined(_WIN64) || !defined(_MSC_VER)
Steinar Midtskogen3dbd55a2016-09-09 15:23:35 +0200362
Steinar Midtskogenbe668e92016-08-05 12:12:38 +0200363} // namespace