blob: ea10626f6f31b93c1d28240c100064de0ec1f8fa [file] [log] [blame]
Xing Jin37ee03b2018-07-25 10:15:42 +08001/*
2 * Copyright (c) 2018, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
Urvang Joshi17a418b2018-08-03 12:17:23 -070012#include <vector>
13
Xing Jin37ee03b2018-07-25 10:15:42 +080014#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
15
16#include "test/function_equivalence_test.h"
17#include "test/register_state_check.h"
18
19#include "config/aom_config.h"
20#include "config/aom_dsp_rtcd.h"
21
22#include "aom/aom_integer.h"
23#include "av1/encoder/pickrst.h"
24
25#define MAX_WIENER_BLOCK 384
26#define MAX_DATA_BLOCK (MAX_WIENER_BLOCK + WIENER_WIN)
27using libaom_test::FunctionEquivalenceTest;
28
David Turnercfb52502018-09-25 15:38:39 +010029// 8-bit-depth tests
30namespace wiener_lowbd {
Xing Jin37ee03b2018-07-25 10:15:42 +080031
Urvang Joshi17a418b2018-08-03 12:17:23 -070032static void compute_stats_win_opt_c(int wiener_win, const uint8_t *dgd,
33 const uint8_t *src, int h_start, int h_end,
34 int v_start, int v_end, int dgd_stride,
David Turner9042a3c2018-09-20 16:47:53 +010035 int src_stride, int64_t *M, int64_t *H) {
Urvang Joshi17a418b2018-08-03 12:17:23 -070036 ASSERT_TRUE(wiener_win == WIENER_WIN || wiener_win == WIENER_WIN_CHROMA);
Xing Jin37ee03b2018-07-25 10:15:42 +080037 int i, j, k, l, m, n;
Xing Jin37ee03b2018-07-25 10:15:42 +080038 const int pixel_count = (h_end - h_start) * (v_end - v_start);
39 const int wiener_win2 = wiener_win * wiener_win;
40 const int wiener_halfwin = (wiener_win >> 1);
David Turner9042a3c2018-09-20 16:47:53 +010041 uint8_t avg = find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride);
Xing Jin37ee03b2018-07-25 10:15:42 +080042
Urvang Joshi17a418b2018-08-03 12:17:23 -070043 std::vector<std::vector<int64_t> > M_int(wiener_win,
44 std::vector<int64_t>(wiener_win, 0));
45 std::vector<std::vector<int64_t> > H_int(
46 wiener_win * wiener_win, std::vector<int64_t>(wiener_win * 8, 0));
47 std::vector<std::vector<int32_t> > sumY(wiener_win,
48 std::vector<int32_t>(wiener_win, 0));
Xing Jin37ee03b2018-07-25 10:15:42 +080049 int32_t sumX = 0;
50 const uint8_t *dgd_win = dgd - wiener_halfwin * dgd_stride - wiener_halfwin;
51
52 for (i = v_start; i < v_end; i++) {
53 for (j = h_start; j < h_end; j += 2) {
54 const uint8_t X1 = src[i * src_stride + j];
55 const uint8_t X2 = src[i * src_stride + j + 1];
56 sumX += X1 + X2;
57
58 const uint8_t *dgd_ij = dgd_win + i * dgd_stride + j;
59 for (k = 0; k < wiener_win; k++) {
60 for (l = 0; l < wiener_win; l++) {
61 const uint8_t *dgd_ijkl = dgd_ij + k * dgd_stride + l;
62 int64_t *H_int_temp = &H_int[(l * wiener_win + k)][0];
63 const uint8_t D1 = dgd_ijkl[0];
64 const uint8_t D2 = dgd_ijkl[1];
65 sumY[k][l] += D1 + D2;
66 M_int[l][k] += D1 * X1 + D2 * X2;
67 for (m = 0; m < wiener_win; m++) {
68 for (n = 0; n < wiener_win; n++) {
69 H_int_temp[m * 8 + n] += D1 * dgd_ij[n + dgd_stride * m] +
70 D2 * dgd_ij[n + dgd_stride * m + 1];
71 }
72 }
73 }
74 }
75 }
76 }
77
David Turner9042a3c2018-09-20 16:47:53 +010078 const int64_t avg_square_sum = (int64_t)avg * (int64_t)avg * pixel_count;
Xing Jin37ee03b2018-07-25 10:15:42 +080079 for (k = 0; k < wiener_win; k++) {
80 for (l = 0; l < wiener_win; l++) {
81 M[l * wiener_win + k] =
David Turner9042a3c2018-09-20 16:47:53 +010082 M_int[l][k] + avg_square_sum - (int64_t)avg * (sumX + sumY[k][l]);
Xing Jin37ee03b2018-07-25 10:15:42 +080083 for (m = 0; m < wiener_win; m++) {
84 for (n = 0; n < wiener_win; n++) {
85 H[(l * wiener_win + k) * wiener_win2 + m * wiener_win + n] =
86 H_int[(l * wiener_win + k)][n * 8 + m] + avg_square_sum -
David Turner9042a3c2018-09-20 16:47:53 +010087 (int64_t)avg * (sumY[k][l] + sumY[n][m]);
Xing Jin37ee03b2018-07-25 10:15:42 +080088 }
89 }
90 }
91 }
92}
93
94void compute_stats_opt_c(int wiener_win, const uint8_t *dgd, const uint8_t *src,
95 int h_start, int h_end, int v_start, int v_end,
David Turner9042a3c2018-09-20 16:47:53 +010096 int dgd_stride, int src_stride, int64_t *M,
97 int64_t *H) {
Urvang Joshi17a418b2018-08-03 12:17:23 -070098 if (wiener_win == WIENER_WIN || wiener_win == WIENER_WIN_CHROMA) {
99 compute_stats_win_opt_c(wiener_win, dgd, src, h_start, h_end, v_start,
100 v_end, dgd_stride, src_stride, M, H);
Xing Jin37ee03b2018-07-25 10:15:42 +0800101 } else {
Xing Jin04c7c842018-07-30 17:08:30 +0800102 av1_compute_stats_c(wiener_win, dgd, src, h_start, h_end, v_start, v_end,
103 dgd_stride, src_stride, M, H);
Xing Jin37ee03b2018-07-25 10:15:42 +0800104 }
105}
106
107static const int kIterations = 100;
Xing Jin37ee03b2018-07-25 10:15:42 +0800108typedef void (*compute_stats_Func)(int wiener_win, const uint8_t *dgd,
109 const uint8_t *src, int h_start, int h_end,
110 int v_start, int v_end, int dgd_stride,
David Turner9042a3c2018-09-20 16:47:53 +0100111 int src_stride, int64_t *M, int64_t *H);
Xing Jin37ee03b2018-07-25 10:15:42 +0800112
113typedef libaom_test::FuncParam<compute_stats_Func> TestFuncs;
114
115////////////////////////////////////////////////////////////////////////////////
116// 8 bit
117////////////////////////////////////////////////////////////////////////////////
118
119typedef ::testing::tuple<const compute_stats_Func> WienerTestParam;
120
121class WienerTest : public ::testing::TestWithParam<WienerTestParam> {
122 public:
David Turner4ca00872018-10-10 15:13:56 +0100123 virtual void SetUp() {
124 src_buf = (uint8_t *)aom_memalign(
125 32, MAX_DATA_BLOCK * MAX_DATA_BLOCK * sizeof(uint8_t));
126 dgd_buf = (uint8_t *)aom_memalign(
127 32, MAX_DATA_BLOCK * MAX_DATA_BLOCK * sizeof(uint8_t));
128 target_func_ = GET_PARAM(0);
129 }
130 virtual void TearDown() {
131 aom_free(src_buf);
132 aom_free(dgd_buf);
133 }
Xing Jin37ee03b2018-07-25 10:15:42 +0800134 void runWienerTest(const int32_t wiener_win, int32_t run_times);
135 void runWienerTest_ExtremeValues(const int32_t wiener_win);
136
137 private:
138 compute_stats_Func target_func_;
139 ACMRandom rng_;
David Turner4ca00872018-10-10 15:13:56 +0100140 uint8_t *src_buf;
141 uint8_t *dgd_buf;
Xing Jin37ee03b2018-07-25 10:15:42 +0800142};
143
144void WienerTest::runWienerTest(const int32_t wiener_win, int32_t run_times) {
145 const int32_t wiener_halfwin = wiener_win >> 1;
146 const int32_t wiener_win2 = wiener_win * wiener_win;
David Turner9042a3c2018-09-20 16:47:53 +0100147 DECLARE_ALIGNED(32, int64_t, M_ref[WIENER_WIN2]);
148 DECLARE_ALIGNED(32, int64_t, H_ref[WIENER_WIN2 * WIENER_WIN2]);
149 DECLARE_ALIGNED(32, int64_t, M_test[WIENER_WIN2]);
150 DECLARE_ALIGNED(32, int64_t, H_test[WIENER_WIN2 * WIENER_WIN2]);
Xing Jin37ee03b2018-07-25 10:15:42 +0800151 const int h_start = ((rng_.Rand16() % (MAX_WIENER_BLOCK / 2)) & (~7));
152 int h_end =
153 run_times != 1 ? 256 : ((rng_.Rand16() % MAX_WIENER_BLOCK) & (~7)) + 8;
154 const int v_start = ((rng_.Rand16() % (MAX_WIENER_BLOCK / 2)) & (~7));
155 int v_end =
156 run_times != 1 ? 256 : ((rng_.Rand16() % MAX_WIENER_BLOCK) & (~7)) + 8;
157 const int dgd_stride = h_end;
158 const int src_stride = MAX_DATA_BLOCK;
159 const int iters = run_times == 1 ? kIterations : 2;
160 for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) {
161 for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) {
162 dgd_buf[i] = rng_.Rand8();
163 src_buf[i] = rng_.Rand8();
164 }
165 uint8_t *dgd = dgd_buf + wiener_halfwin * MAX_DATA_BLOCK + wiener_halfwin;
166 uint8_t *src = src_buf;
167
168 aom_usec_timer timer;
169 aom_usec_timer_start(&timer);
170 for (int i = 0; i < run_times; ++i) {
Xing Jin04c7c842018-07-30 17:08:30 +0800171 av1_compute_stats_c(wiener_win, dgd, src, h_start, h_end, v_start, v_end,
172 dgd_stride, src_stride, M_ref, H_ref);
Xing Jin37ee03b2018-07-25 10:15:42 +0800173 }
174 aom_usec_timer_mark(&timer);
175 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
176 aom_usec_timer_start(&timer);
177 for (int i = 0; i < run_times; ++i) {
178 target_func_(wiener_win, dgd, src, h_start, h_end, v_start, v_end,
179 dgd_stride, src_stride, M_test, H_test);
180 }
181 aom_usec_timer_mark(&timer);
182 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
183 if (run_times > 10) {
184 printf("win %d %3dx%-3d:%7.2f/%7.2fns", wiener_win, h_end, v_end, time1,
185 time2);
186 printf("(%3.2f)\n", time1 / time2);
187 }
188 int failed = 0;
189 for (int i = 0; i < wiener_win2; ++i) {
David Turner9042a3c2018-09-20 16:47:53 +0100190 if (M_ref[i] != M_test[i]) {
Xing Jin37ee03b2018-07-25 10:15:42 +0800191 failed = 1;
David Turner9042a3c2018-09-20 16:47:53 +0100192 printf("win %d M iter %d [%4d] ref %6" PRId64 " test %6" PRId64 " \n",
193 wiener_win, iter, i, M_ref[i], M_test[i]);
Xing Jin37ee03b2018-07-25 10:15:42 +0800194 break;
195 }
196 }
197 // ASSERT_EQ(failed, 0);
198 for (int i = 0; i < wiener_win2 * wiener_win2; ++i) {
David Turner9042a3c2018-09-20 16:47:53 +0100199 if (H_ref[i] != H_test[i]) {
Xing Jin37ee03b2018-07-25 10:15:42 +0800200 failed = 1;
David Turner9042a3c2018-09-20 16:47:53 +0100201 printf("win %d H iter %d [%4d] ref %6" PRId64 " test %6" PRId64 " \n",
202 wiener_win, iter, i, H_ref[i], H_test[i]);
Xing Jin37ee03b2018-07-25 10:15:42 +0800203 break;
204 }
205 }
206 ASSERT_EQ(failed, 0);
207 }
208}
209
210void WienerTest::runWienerTest_ExtremeValues(const int32_t wiener_win) {
211 const int32_t wiener_halfwin = wiener_win >> 1;
212 const int32_t wiener_win2 = wiener_win * wiener_win;
David Turner9042a3c2018-09-20 16:47:53 +0100213 DECLARE_ALIGNED(32, int64_t, M_ref[WIENER_WIN2]);
214 DECLARE_ALIGNED(32, int64_t, H_ref[WIENER_WIN2 * WIENER_WIN2]);
215 DECLARE_ALIGNED(32, int64_t, M_test[WIENER_WIN2]);
216 DECLARE_ALIGNED(32, int64_t, H_test[WIENER_WIN2 * WIENER_WIN2]);
Xing Jin37ee03b2018-07-25 10:15:42 +0800217 const int h_start = 16;
218 const int h_end = MAX_WIENER_BLOCK;
219 const int v_start = 16;
220 const int v_end = MAX_WIENER_BLOCK;
221 const int dgd_stride = h_end;
222 const int src_stride = MAX_DATA_BLOCK;
223 const int iters = 1;
224 for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) {
225 for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) {
226 dgd_buf[i] = 255;
227 src_buf[i] = 255;
228 }
229 uint8_t *dgd = dgd_buf + wiener_halfwin * MAX_DATA_BLOCK + wiener_halfwin;
230 uint8_t *src = src_buf;
231
Xing Jin04c7c842018-07-30 17:08:30 +0800232 av1_compute_stats_c(wiener_win, dgd, src, h_start, h_end, v_start, v_end,
233 dgd_stride, src_stride, M_ref, H_ref);
Xing Jin37ee03b2018-07-25 10:15:42 +0800234
235 target_func_(wiener_win, dgd, src, h_start, h_end, v_start, v_end,
236 dgd_stride, src_stride, M_test, H_test);
237
238 int failed = 0;
239 for (int i = 0; i < wiener_win2; ++i) {
David Turner9042a3c2018-09-20 16:47:53 +0100240 if (M_ref[i] != M_test[i]) {
Xing Jin37ee03b2018-07-25 10:15:42 +0800241 failed = 1;
David Turner9042a3c2018-09-20 16:47:53 +0100242 printf("win %d M iter %d [%4d] ref %6" PRId64 " test %6" PRId64 " \n",
243 wiener_win, iter, i, M_ref[i], M_test[i]);
Xing Jin37ee03b2018-07-25 10:15:42 +0800244 break;
245 }
246 }
247 // ASSERT_EQ(failed, 0);
248 for (int i = 0; i < wiener_win2 * wiener_win2; ++i) {
David Turner9042a3c2018-09-20 16:47:53 +0100249 if (H_ref[i] != H_test[i]) {
Xing Jin37ee03b2018-07-25 10:15:42 +0800250 failed = 1;
David Turner9042a3c2018-09-20 16:47:53 +0100251 printf("win %d H iter %d [%4d] ref %6" PRId64 " test %6" PRId64 " \n",
252 wiener_win, iter, i, H_ref[i], H_test[i]);
Xing Jin37ee03b2018-07-25 10:15:42 +0800253 break;
254 }
255 }
256 ASSERT_EQ(failed, 0);
257 }
258}
259
260TEST_P(WienerTest, RandomValues) {
261 runWienerTest(WIENER_WIN, 1);
262 runWienerTest(WIENER_WIN_CHROMA, 1);
263}
264
265TEST_P(WienerTest, ExtremeValues) {
266 runWienerTest_ExtremeValues(WIENER_WIN);
267 runWienerTest_ExtremeValues(WIENER_WIN_CHROMA);
268}
269
270TEST_P(WienerTest, DISABLED_Speed) {
271 runWienerTest(WIENER_WIN, 200);
272 runWienerTest(WIENER_WIN_CHROMA, 200);
273}
274
275INSTANTIATE_TEST_CASE_P(C, WienerTest, ::testing::Values(compute_stats_opt_c));
276
277#if HAVE_SSE4_1
278INSTANTIATE_TEST_CASE_P(SSE4_1, WienerTest,
Xing Jin04c7c842018-07-30 17:08:30 +0800279 ::testing::Values(av1_compute_stats_sse4_1));
Xing Jin37ee03b2018-07-25 10:15:42 +0800280#endif // HAVE_SSE4_1
281
282#if HAVE_AVX2
283
284INSTANTIATE_TEST_CASE_P(AVX2, WienerTest,
Xing Jin04c7c842018-07-30 17:08:30 +0800285 ::testing::Values(av1_compute_stats_avx2));
Xing Jin37ee03b2018-07-25 10:15:42 +0800286#endif // HAVE_AVX2
287
David Turnercfb52502018-09-25 15:38:39 +0100288} // namespace wiener_lowbd
289
290// High bit-depth tests:
291namespace wiener_highbd {
292
293static void compute_stats_highbd_win_opt_c(int wiener_win, const uint8_t *dgd8,
294 const uint8_t *src8, int h_start,
295 int h_end, int v_start, int v_end,
296 int dgd_stride, int src_stride,
297 int64_t *M, int64_t *H,
298 aom_bit_depth_t bit_depth) {
299 ASSERT_TRUE(wiener_win == WIENER_WIN || wiener_win == WIENER_WIN_CHROMA);
300 int i, j, k, l, m, n;
301 const int pixel_count = (h_end - h_start) * (v_end - v_start);
302 const int wiener_win2 = wiener_win * wiener_win;
303 const int wiener_halfwin = (wiener_win >> 1);
304 const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
305 const uint16_t *dgd = CONVERT_TO_SHORTPTR(dgd8);
306 const uint16_t avg =
307 find_average_highbd(dgd, h_start, h_end, v_start, v_end, dgd_stride);
308
309 std::vector<std::vector<int64_t> > M_int(wiener_win,
310 std::vector<int64_t>(wiener_win, 0));
311 std::vector<std::vector<int64_t> > H_int(
312 wiener_win * wiener_win, std::vector<int64_t>(wiener_win * 8, 0));
313 std::vector<std::vector<int32_t> > sumY(wiener_win,
314 std::vector<int32_t>(wiener_win, 0));
315
316 memset(M, 0, sizeof(*M) * wiener_win2);
317 memset(H, 0, sizeof(*H) * wiener_win2 * wiener_win2);
318
319 int64_t sumX = 0;
320 const uint16_t *dgd_win = dgd - wiener_halfwin * dgd_stride - wiener_halfwin;
321
322 for (i = v_start; i < v_end; i++) {
323 for (j = h_start; j < h_end; j += 2) {
324 const uint16_t X1 = src[i * src_stride + j];
325 const uint16_t X2 = src[i * src_stride + j + 1];
326 sumX += X1 + X2;
327
328 const uint16_t *dgd_ij = dgd_win + i * dgd_stride + j;
329 for (k = 0; k < wiener_win; k++) {
330 for (l = 0; l < wiener_win; l++) {
331 const uint16_t *dgd_ijkl = dgd_ij + k * dgd_stride + l;
332 int64_t *H_int_temp = &H_int[(l * wiener_win + k)][0];
333 const uint16_t D1 = dgd_ijkl[0];
334 const uint16_t D2 = dgd_ijkl[1];
335 sumY[k][l] += D1 + D2;
336 M_int[l][k] += D1 * X1 + D2 * X2;
337 for (m = 0; m < wiener_win; m++) {
338 for (n = 0; n < wiener_win; n++) {
339 H_int_temp[m * 8 + n] += D1 * dgd_ij[n + dgd_stride * m] +
340 D2 * dgd_ij[n + dgd_stride * m + 1];
341 }
342 }
343 }
344 }
345 }
346 }
347
348 uint8_t bit_depth_divider = 1;
349 if (bit_depth == AOM_BITS_12)
350 bit_depth_divider = 16;
351 else if (bit_depth == AOM_BITS_10)
352 bit_depth_divider = 4;
353
354 const int64_t avg_square_sum = (int64_t)avg * (int64_t)avg * pixel_count;
355 for (k = 0; k < wiener_win; k++) {
356 for (l = 0; l < wiener_win; l++) {
357 M[l * wiener_win + k] =
358 (M_int[l][k] +
359 (avg_square_sum - (int64_t)avg * (sumX + sumY[k][l]))) /
360 bit_depth_divider;
361 for (m = 0; m < wiener_win; m++) {
362 for (n = 0; n < wiener_win; n++) {
363 H[(l * wiener_win + k) * wiener_win2 + m * wiener_win + n] =
364 (H_int[(l * wiener_win + k)][n * 8 + m] +
365 (avg_square_sum - (int64_t)avg * (sumY[k][l] + sumY[n][m]))) /
366 bit_depth_divider;
367 }
368 }
369 }
370 }
371}
372
373void compute_stats_highbd_opt_c(int wiener_win, const uint8_t *dgd,
374 const uint8_t *src, int h_start, int h_end,
375 int v_start, int v_end, int dgd_stride,
376 int src_stride, int64_t *M, int64_t *H,
377 aom_bit_depth_t bit_depth) {
378 if (wiener_win == WIENER_WIN || wiener_win == WIENER_WIN_CHROMA) {
379 compute_stats_highbd_win_opt_c(wiener_win, dgd, src, h_start, h_end,
380 v_start, v_end, dgd_stride, src_stride, M, H,
381 bit_depth);
382 } else {
383 av1_compute_stats_highbd_c(wiener_win, dgd, src, h_start, h_end, v_start,
384 v_end, dgd_stride, src_stride, M, H, bit_depth);
385 }
386}
387
388static const int kIterations = 100;
389typedef void (*compute_stats_Func)(int wiener_win, const uint8_t *dgd,
390 const uint8_t *src, int h_start, int h_end,
391 int v_start, int v_end, int dgd_stride,
392 int src_stride, int64_t *M, int64_t *H,
393 aom_bit_depth_t bit_depth);
394
395typedef libaom_test::FuncParam<compute_stats_Func> TestFuncs;
396
397typedef ::testing::tuple<const compute_stats_Func> WienerTestParam;
398
399class WienerTestHighbd : public ::testing::TestWithParam<WienerTestParam> {
400 public:
David Turner4ca00872018-10-10 15:13:56 +0100401 virtual void SetUp() {
402 src_buf = (uint16_t *)aom_memalign(
403 32, MAX_DATA_BLOCK * MAX_DATA_BLOCK * sizeof(uint16_t));
404 dgd_buf = (uint16_t *)aom_memalign(
405 32, MAX_DATA_BLOCK * MAX_DATA_BLOCK * sizeof(uint16_t));
406 target_func_ = GET_PARAM(0);
407 }
408 virtual void TearDown() {
409 aom_free(src_buf);
410 aom_free(dgd_buf);
411 }
David Turnercfb52502018-09-25 15:38:39 +0100412 void runWienerTest(const int32_t wiener_win, int32_t run_times,
413 aom_bit_depth_t bit_depth);
414 void runWienerTest_ExtremeValues(const int32_t wiener_win,
415 aom_bit_depth_t bit_depth);
416
417 private:
418 compute_stats_Func target_func_;
419 ACMRandom rng_;
David Turner4ca00872018-10-10 15:13:56 +0100420 uint16_t *src_buf;
421 uint16_t *dgd_buf;
David Turnercfb52502018-09-25 15:38:39 +0100422};
423
424void WienerTestHighbd::runWienerTest(const int32_t wiener_win,
425 int32_t run_times,
426 aom_bit_depth_t bit_depth) {
427 const int32_t wiener_halfwin = wiener_win >> 1;
428 const int32_t wiener_win2 = wiener_win * wiener_win;
David Turnercfb52502018-09-25 15:38:39 +0100429 DECLARE_ALIGNED(32, int64_t, M_ref[WIENER_WIN2]);
430 DECLARE_ALIGNED(32, int64_t, H_ref[WIENER_WIN2 * WIENER_WIN2]);
431 DECLARE_ALIGNED(32, int64_t, M_test[WIENER_WIN2]);
432 DECLARE_ALIGNED(32, int64_t, H_test[WIENER_WIN2 * WIENER_WIN2]);
433 const int h_start = ((rng_.Rand16() % (MAX_WIENER_BLOCK / 2)) & (~7));
434 const int h_end =
435 run_times != 1 ? 256 : ((rng_.Rand16() % MAX_WIENER_BLOCK) & (~7)) + 8;
436 const int v_start = ((rng_.Rand16() % (MAX_WIENER_BLOCK / 2)) & (~7));
437 const int v_end =
438 run_times != 1 ? 256 : ((rng_.Rand16() % MAX_WIENER_BLOCK) & (~7)) + 8;
439 const int dgd_stride = h_end;
440 const int src_stride = MAX_DATA_BLOCK;
441 const int iters = run_times == 1 ? kIterations : 2;
442 for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) {
443 for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) {
444 dgd_buf[i] = rng_.Rand16() % (1 << bit_depth);
445 src_buf[i] = rng_.Rand16() % (1 << bit_depth);
446 }
447 const uint8_t *dgd8 = CONVERT_TO_BYTEPTR(
448 dgd_buf + wiener_halfwin * MAX_DATA_BLOCK + wiener_halfwin);
449 const uint8_t *src8 = CONVERT_TO_BYTEPTR(src_buf);
450
451 aom_usec_timer timer;
452 aom_usec_timer_start(&timer);
453 for (int i = 0; i < run_times; ++i) {
454 av1_compute_stats_highbd_c(wiener_win, dgd8, src8, h_start, h_end,
455 v_start, v_end, dgd_stride, src_stride, M_ref,
456 H_ref, bit_depth);
457 }
458 aom_usec_timer_mark(&timer);
459 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
460 aom_usec_timer_start(&timer);
461 for (int i = 0; i < run_times; ++i) {
462 target_func_(wiener_win, dgd8, src8, h_start, h_end, v_start, v_end,
463 dgd_stride, src_stride, M_test, H_test, bit_depth);
464 }
465 aom_usec_timer_mark(&timer);
466 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
467 if (run_times > 10) {
468 printf("win %d bd %d %3dx%-3d:%7.2f/%7.2fns", wiener_win, bit_depth,
469 h_end, v_end, time1, time2);
470 printf("(%3.2f)\n", time1 / time2);
471 }
472 int failed = 0;
473 for (int i = 0; i < wiener_win2; ++i) {
474 if (M_ref[i] != M_test[i]) {
475 failed = 1;
476 printf("win %d bd %d M iter %d [%4d] ref %6" PRId64 " test %6" PRId64
477 " \n",
478 wiener_win, bit_depth, iter, i, M_ref[i], M_test[i]);
479 break;
480 }
481 }
482 for (int i = 0; i < wiener_win2 * wiener_win2; ++i) {
483 if (H_ref[i] != H_test[i]) {
484 failed = 1;
485 printf("win %d bd %d H iter %d [%4d] ref %6" PRId64 " test %6" PRId64
486 " \n",
487 wiener_win, bit_depth, iter, i, H_ref[i], H_test[i]);
488 break;
489 }
490 }
David Turner4ca00872018-10-10 15:13:56 +0100491 ASSERT_EQ(failed, 0);
David Turnercfb52502018-09-25 15:38:39 +0100492 }
David Turnercfb52502018-09-25 15:38:39 +0100493}
494
495void WienerTestHighbd::runWienerTest_ExtremeValues(const int32_t wiener_win,
496 aom_bit_depth_t bit_depth) {
497 const int32_t wiener_halfwin = wiener_win >> 1;
498 const int32_t wiener_win2 = wiener_win * wiener_win;
David Turnercfb52502018-09-25 15:38:39 +0100499 DECLARE_ALIGNED(32, int64_t, M_ref[WIENER_WIN2]);
500 DECLARE_ALIGNED(32, int64_t, H_ref[WIENER_WIN2 * WIENER_WIN2]);
501 DECLARE_ALIGNED(32, int64_t, M_test[WIENER_WIN2]);
502 DECLARE_ALIGNED(32, int64_t, H_test[WIENER_WIN2 * WIENER_WIN2]);
503 const int h_start = 16;
504 const int h_end = MAX_WIENER_BLOCK;
505 const int v_start = 16;
506 const int v_end = MAX_WIENER_BLOCK;
507 const int dgd_stride = h_end;
508 const int src_stride = MAX_DATA_BLOCK;
509 const int iters = 1;
510 for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) {
511 for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) {
512 dgd_buf[i] = ((uint16_t)1 << bit_depth) - 1;
513 src_buf[i] = ((uint16_t)1 << bit_depth) - 1;
514 }
515 const uint8_t *dgd8 = CONVERT_TO_BYTEPTR(
516 dgd_buf + wiener_halfwin * MAX_DATA_BLOCK + wiener_halfwin);
517 const uint8_t *src8 = CONVERT_TO_BYTEPTR(src_buf);
518
519 av1_compute_stats_highbd_c(wiener_win, dgd8, src8, h_start, h_end, v_start,
520 v_end, dgd_stride, src_stride, M_ref, H_ref,
521 bit_depth);
522
523 target_func_(wiener_win, dgd8, src8, h_start, h_end, v_start, v_end,
524 dgd_stride, src_stride, M_test, H_test, bit_depth);
525
526 int failed = 0;
527 for (int i = 0; i < wiener_win2; ++i) {
528 if (M_ref[i] != M_test[i]) {
529 failed = 1;
530 printf("win %d bd %d M iter %d [%4d] ref %6" PRId64 " test %6" PRId64
531 " \n",
532 wiener_win, bit_depth, iter, i, M_ref[i], M_test[i]);
533 break;
534 }
535 }
536 for (int i = 0; i < wiener_win2 * wiener_win2; ++i) {
537 if (H_ref[i] != H_test[i]) {
538 failed = 1;
539 printf("win %d bd %d H iter %d [%4d] ref %6" PRId64 " test %6" PRId64
540 " \n",
541 wiener_win, bit_depth, iter, i, H_ref[i], H_test[i]);
542 break;
543 }
544 }
David Turner4ca00872018-10-10 15:13:56 +0100545 ASSERT_EQ(failed, 0);
David Turnercfb52502018-09-25 15:38:39 +0100546 }
David Turnercfb52502018-09-25 15:38:39 +0100547}
548
549TEST_P(WienerTestHighbd, RandomValues) {
550 runWienerTest(WIENER_WIN, 1, AOM_BITS_8);
551 runWienerTest(WIENER_WIN_CHROMA, 1, AOM_BITS_8);
552 runWienerTest(WIENER_WIN, 1, AOM_BITS_10);
553 runWienerTest(WIENER_WIN_CHROMA, 1, AOM_BITS_10);
554 runWienerTest(WIENER_WIN, 1, AOM_BITS_12);
555 runWienerTest(WIENER_WIN_CHROMA, 1, AOM_BITS_12);
556}
557
558TEST_P(WienerTestHighbd, ExtremeValues) {
559 runWienerTest_ExtremeValues(WIENER_WIN, AOM_BITS_8);
560 runWienerTest_ExtremeValues(WIENER_WIN_CHROMA, AOM_BITS_8);
561 runWienerTest_ExtremeValues(WIENER_WIN, AOM_BITS_10);
562 runWienerTest_ExtremeValues(WIENER_WIN_CHROMA, AOM_BITS_10);
563 runWienerTest_ExtremeValues(WIENER_WIN, AOM_BITS_12);
564 runWienerTest_ExtremeValues(WIENER_WIN_CHROMA, AOM_BITS_12);
565}
566
567TEST_P(WienerTestHighbd, DISABLED_Speed) {
568 runWienerTest(WIENER_WIN, 200, AOM_BITS_8);
569 runWienerTest(WIENER_WIN_CHROMA, 200, AOM_BITS_8);
570 runWienerTest(WIENER_WIN, 200, AOM_BITS_10);
571 runWienerTest(WIENER_WIN_CHROMA, 200, AOM_BITS_10);
572 runWienerTest(WIENER_WIN, 200, AOM_BITS_12);
573 runWienerTest(WIENER_WIN_CHROMA, 200, AOM_BITS_12);
574}
575
576INSTANTIATE_TEST_CASE_P(C, WienerTestHighbd,
577 ::testing::Values(compute_stats_highbd_opt_c));
578
579#if HAVE_SSE4_1
580INSTANTIATE_TEST_CASE_P(SSE4_1, WienerTestHighbd,
581 ::testing::Values(av1_compute_stats_highbd_sse4_1));
582#endif // HAVE_SSE4_1
583
584#if HAVE_AVX2
585INSTANTIATE_TEST_CASE_P(AVX2, WienerTestHighbd,
586 ::testing::Values(av1_compute_stats_highbd_avx2));
587#endif // HAVE_AVX2
588
589} // namespace wiener_highbd