David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
| 3 | * |
| 4 | * This source code is subject to the terms of the BSD 2 Clause License and |
| 5 | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| 6 | * was not distributed with this source code in the LICENSE file, you can |
| 7 | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| 8 | * Media Patent License 1.0 was not distributed with this source code in the |
| 9 | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| 10 | */ |
| 11 | |
| 12 | #include <ctime> |
| 13 | |
| 14 | #include "third_party/googletest/src/googletest/include/gtest/gtest.h" |
| 15 | |
Tom Finegan | 44702c8 | 2018-05-22 13:00:39 -0700 | [diff] [blame] | 16 | #include "config/av1_rtcd.h" |
| 17 | |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 18 | #include "test/acm_random.h" |
| 19 | #include "test/clear_system_state.h" |
| 20 | #include "test/register_state_check.h" |
| 21 | #include "test/util.h" |
| 22 | |
Yaowu Xu | abdf655 | 2017-11-29 08:41:52 -0800 | [diff] [blame] | 23 | #include "aom_ports/aom_timer.h" |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 24 | #include "av1/common/mv.h" |
| 25 | #include "av1/common/restoration.h" |
| 26 | |
| 27 | namespace { |
| 28 | |
Johann | 54fa62e | 2018-09-25 14:09:31 -0700 | [diff] [blame] | 29 | using libaom_test::ACMRandom; |
James Zern | 9561280 | 2018-03-30 11:37:54 -0700 | [diff] [blame] | 30 | using ::testing::make_tuple; |
| 31 | using ::testing::tuple; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 32 | |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 33 | typedef void (*SgrFunc)(const uint8_t *dat8, int width, int height, int stride, |
| 34 | int eps, const int *xqd, uint8_t *dst8, int dst_stride, |
| 35 | int32_t *tmpbuf, int bit_depth, int highbd); |
| 36 | |
| 37 | // Test parameter list: |
| 38 | // <tst_fun_> |
| 39 | typedef tuple<SgrFunc> FilterTestParam; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 40 | |
| 41 | class AV1SelfguidedFilterTest |
| 42 | : public ::testing::TestWithParam<FilterTestParam> { |
| 43 | public: |
| 44 | virtual ~AV1SelfguidedFilterTest() {} |
| 45 | virtual void SetUp() {} |
| 46 | |
| 47 | virtual void TearDown() { libaom_test::ClearSystemState(); } |
| 48 | |
| 49 | protected: |
| 50 | void RunSpeedTest() { |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 51 | tst_fun_ = GET_PARAM(0); |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 52 | const int pu_width = RESTORATION_PROC_UNIT_SIZE; |
| 53 | const int pu_height = RESTORATION_PROC_UNIT_SIZE; |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 54 | const int width = 256, height = 256, stride = 288, out_stride = 288; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 55 | const int NUM_ITERS = 2000; |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 56 | int i, j, k; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 57 | |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 58 | uint8_t *input_ = |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 59 | (uint8_t *)aom_memalign(32, stride * (height + 32) * sizeof(uint8_t)); |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 60 | uint8_t *output_ = (uint8_t *)aom_memalign( |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 61 | 32, out_stride * (height + 32) * sizeof(uint8_t)); |
| 62 | int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE); |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 63 | uint8_t *input = input_ + stride * 16 + 16; |
| 64 | uint8_t *output = output_ + out_stride * 16 + 16; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 65 | |
| 66 | ACMRandom rnd(ACMRandom::DeterministicSeed()); |
| 67 | |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 68 | for (i = -16; i < height + 16; ++i) |
| 69 | for (j = -16; j < width + 16; ++j) |
| 70 | input[i * stride + j] = rnd.Rand16() & 0xFF; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 71 | |
Johann | f152ff6 | 2018-02-08 14:33:07 -0800 | [diff] [blame] | 72 | int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - |
| 73 | SGRPROJ_PRJ_MIN0), |
| 74 | SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - |
| 75 | SGRPROJ_PRJ_MIN1) }; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 76 | // Fix a parameter set, since the speed depends slightly on r. |
| 77 | // Change this to test different combinations of values of r. |
David Barker | 5765fad | 2017-03-08 11:27:09 +0000 | [diff] [blame] | 78 | int eps = 15; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 79 | |
| 80 | av1_loop_restoration_precal(); |
| 81 | |
Imdad Sardharwalla | d051e56 | 2018-02-02 09:42:07 +0000 | [diff] [blame] | 82 | aom_usec_timer ref_timer; |
| 83 | aom_usec_timer_start(&ref_timer); |
| 84 | for (i = 0; i < NUM_ITERS; ++i) { |
| 85 | for (k = 0; k < height; k += pu_height) |
| 86 | for (j = 0; j < width; j += pu_width) { |
| 87 | int w = AOMMIN(pu_width, width - j); |
| 88 | int h = AOMMIN(pu_height, height - k); |
| 89 | uint8_t *input_p = input + k * stride + j; |
| 90 | uint8_t *output_p = output + k * out_stride + j; |
| 91 | apply_selfguided_restoration_c(input_p, w, h, stride, eps, xqd, |
| 92 | output_p, out_stride, tmpbuf, 8, 0); |
| 93 | } |
| 94 | } |
| 95 | aom_usec_timer_mark(&ref_timer); |
| 96 | const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer); |
| 97 | |
| 98 | aom_usec_timer tst_timer; |
| 99 | aom_usec_timer_start(&tst_timer); |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 100 | for (i = 0; i < NUM_ITERS; ++i) { |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 101 | for (k = 0; k < height; k += pu_height) |
| 102 | for (j = 0; j < width; j += pu_width) { |
| 103 | int w = AOMMIN(pu_width, width - j); |
| 104 | int h = AOMMIN(pu_height, height - k); |
| 105 | uint8_t *input_p = input + k * stride + j; |
| 106 | uint8_t *output_p = output + k * out_stride + j; |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 107 | tst_fun_(input_p, w, h, stride, eps, xqd, output_p, out_stride, |
| 108 | tmpbuf, 8, 0); |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 109 | } |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 110 | } |
Imdad Sardharwalla | d051e56 | 2018-02-02 09:42:07 +0000 | [diff] [blame] | 111 | aom_usec_timer_mark(&tst_timer); |
| 112 | const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer); |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 113 | |
Imdad Sardharwalla | d051e56 | 2018-02-02 09:42:07 +0000 | [diff] [blame] | 114 | std::cout << "[ ] C time = " << ref_time / 1000 |
| 115 | << " ms, SIMD time = " << tst_time / 1000 << " ms\n"; |
| 116 | |
| 117 | EXPECT_GT(ref_time, tst_time) |
| 118 | << "Error: AV1SelfguidedFilterTest.SpeedTest, SIMD slower than C.\n" |
| 119 | << "C time: " << ref_time << " us\n" |
| 120 | << "SIMD time: " << tst_time << " us\n"; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 121 | |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 122 | aom_free(input_); |
| 123 | aom_free(output_); |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 124 | aom_free(tmpbuf); |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 125 | } |
| 126 | |
| 127 | void RunCorrectnessTest() { |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 128 | tst_fun_ = GET_PARAM(0); |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 129 | const int pu_width = RESTORATION_PROC_UNIT_SIZE; |
| 130 | const int pu_height = RESTORATION_PROC_UNIT_SIZE; |
David Barker | bcc5535 | 2017-03-10 15:04:52 +0000 | [diff] [blame] | 131 | // Set the maximum width/height to test here. We actually test a small |
| 132 | // range of sizes *up to* this size, so that we can check, eg., |
| 133 | // the behaviour on tiles which are not a multiple of 4 wide. |
| 134 | const int max_w = 260, max_h = 260, stride = 672, out_stride = 672; |
David Barker | 5765fad | 2017-03-08 11:27:09 +0000 | [diff] [blame] | 135 | const int NUM_ITERS = 81; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 136 | int i, j, k; |
| 137 | |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 138 | uint8_t *input_ = |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 139 | (uint8_t *)aom_memalign(32, stride * (max_h + 32) * sizeof(uint8_t)); |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 140 | uint8_t *output_ = (uint8_t *)aom_memalign( |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 141 | 32, out_stride * (max_h + 32) * sizeof(uint8_t)); |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 142 | uint8_t *output2_ = (uint8_t *)aom_memalign( |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 143 | 32, out_stride * (max_h + 32) * sizeof(uint8_t)); |
| 144 | int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE); |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 145 | |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 146 | uint8_t *input = input_ + stride * 16 + 16; |
| 147 | uint8_t *output = output_ + out_stride * 16 + 16; |
| 148 | uint8_t *output2 = output2_ + out_stride * 16 + 16; |
| 149 | |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 150 | ACMRandom rnd(ACMRandom::DeterministicSeed()); |
| 151 | |
| 152 | av1_loop_restoration_precal(); |
| 153 | |
| 154 | for (i = 0; i < NUM_ITERS; ++i) { |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 155 | for (j = -16; j < max_h + 16; ++j) |
| 156 | for (k = -16; k < max_w + 16; ++k) |
| 157 | input[j * stride + k] = rnd.Rand16() & 0xFF; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 158 | |
Johann | f152ff6 | 2018-02-08 14:33:07 -0800 | [diff] [blame] | 159 | int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - |
| 160 | SGRPROJ_PRJ_MIN0), |
| 161 | SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - |
| 162 | SGRPROJ_PRJ_MIN1) }; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 163 | int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS); |
| 164 | |
David Barker | 5765fad | 2017-03-08 11:27:09 +0000 | [diff] [blame] | 165 | // Test various tile sizes around 256x256 |
David Barker | bcc5535 | 2017-03-10 15:04:52 +0000 | [diff] [blame] | 166 | int test_w = max_w - (i / 9); |
| 167 | int test_h = max_h - (i % 9); |
David Barker | 5765fad | 2017-03-08 11:27:09 +0000 | [diff] [blame] | 168 | |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 169 | for (k = 0; k < test_h; k += pu_height) |
| 170 | for (j = 0; j < test_w; j += pu_width) { |
| 171 | int w = AOMMIN(pu_width, test_w - j); |
| 172 | int h = AOMMIN(pu_height, test_h - k); |
| 173 | uint8_t *input_p = input + k * stride + j; |
| 174 | uint8_t *output_p = output + k * out_stride + j; |
| 175 | uint8_t *output2_p = output2 + k * out_stride + j; |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 176 | tst_fun_(input_p, w, h, stride, eps, xqd, output_p, out_stride, |
| 177 | tmpbuf, 8, 0); |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 178 | apply_selfguided_restoration_c(input_p, w, h, stride, eps, xqd, |
Rupert Swarbrick | 625e50b | 2017-11-22 11:49:55 +0000 | [diff] [blame] | 179 | output2_p, out_stride, tmpbuf, 8, 0); |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 180 | } |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 181 | |
David Barker | 5765fad | 2017-03-08 11:27:09 +0000 | [diff] [blame] | 182 | for (j = 0; j < test_h; ++j) |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 183 | for (k = 0; k < test_w; ++k) { |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 184 | ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]); |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 185 | } |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 186 | } |
| 187 | |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 188 | aom_free(input_); |
| 189 | aom_free(output_); |
| 190 | aom_free(output2_); |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 191 | aom_free(tmpbuf); |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 192 | } |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 193 | |
| 194 | private: |
| 195 | SgrFunc tst_fun_; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 196 | }; |
| 197 | |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 198 | TEST_P(AV1SelfguidedFilterTest, DISABLED_SpeedTest) { RunSpeedTest(); } |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 199 | TEST_P(AV1SelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); } |
| 200 | |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 201 | #if HAVE_SSE4_1 |
| 202 | INSTANTIATE_TEST_CASE_P(SSE4_1, AV1SelfguidedFilterTest, |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 203 | ::testing::Values(apply_selfguided_restoration_sse4_1)); |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 204 | #endif |
| 205 | |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 206 | #if HAVE_AVX2 |
| 207 | INSTANTIATE_TEST_CASE_P(AVX2, AV1SelfguidedFilterTest, |
| 208 | ::testing::Values(apply_selfguided_restoration_avx2)); |
| 209 | #endif |
| 210 | |
Venkat | 0350496 | 2018-06-26 08:41:26 +0530 | [diff] [blame] | 211 | #if HAVE_NEON |
| 212 | INSTANTIATE_TEST_CASE_P(NEON, AV1SelfguidedFilterTest, |
| 213 | ::testing::Values(apply_selfguided_restoration_neon)); |
| 214 | #endif |
| 215 | |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 216 | // Test parameter list: |
| 217 | // <tst_fun_, bit_depth> |
| 218 | typedef tuple<SgrFunc, int> HighbdFilterTestParam; |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 219 | |
| 220 | class AV1HighbdSelfguidedFilterTest |
| 221 | : public ::testing::TestWithParam<HighbdFilterTestParam> { |
| 222 | public: |
| 223 | virtual ~AV1HighbdSelfguidedFilterTest() {} |
| 224 | virtual void SetUp() {} |
| 225 | |
| 226 | virtual void TearDown() { libaom_test::ClearSystemState(); } |
| 227 | |
| 228 | protected: |
| 229 | void RunSpeedTest() { |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 230 | tst_fun_ = GET_PARAM(0); |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 231 | const int pu_width = RESTORATION_PROC_UNIT_SIZE; |
| 232 | const int pu_height = RESTORATION_PROC_UNIT_SIZE; |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 233 | const int width = 256, height = 256, stride = 288, out_stride = 288; |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 234 | const int NUM_ITERS = 2000; |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 235 | int i, j, k; |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 236 | int bit_depth = GET_PARAM(1); |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 237 | int mask = (1 << bit_depth) - 1; |
| 238 | |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 239 | uint16_t *input_ = |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 240 | (uint16_t *)aom_memalign(32, stride * (height + 32) * sizeof(uint16_t)); |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 241 | uint16_t *output_ = (uint16_t *)aom_memalign( |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 242 | 32, out_stride * (height + 32) * sizeof(uint16_t)); |
| 243 | int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE); |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 244 | uint16_t *input = input_ + stride * 16 + 16; |
| 245 | uint16_t *output = output_ + out_stride * 16 + 16; |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 246 | |
| 247 | ACMRandom rnd(ACMRandom::DeterministicSeed()); |
| 248 | |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 249 | for (i = -16; i < height + 16; ++i) |
| 250 | for (j = -16; j < width + 16; ++j) |
| 251 | input[i * stride + j] = rnd.Rand16() & mask; |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 252 | |
Johann | f152ff6 | 2018-02-08 14:33:07 -0800 | [diff] [blame] | 253 | int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - |
| 254 | SGRPROJ_PRJ_MIN0), |
| 255 | SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - |
| 256 | SGRPROJ_PRJ_MIN1) }; |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 257 | // Fix a parameter set, since the speed depends slightly on r. |
| 258 | // Change this to test different combinations of values of r. |
| 259 | int eps = 15; |
| 260 | |
| 261 | av1_loop_restoration_precal(); |
| 262 | |
Imdad Sardharwalla | d051e56 | 2018-02-02 09:42:07 +0000 | [diff] [blame] | 263 | aom_usec_timer ref_timer; |
| 264 | aom_usec_timer_start(&ref_timer); |
| 265 | for (i = 0; i < NUM_ITERS; ++i) { |
| 266 | for (k = 0; k < height; k += pu_height) |
| 267 | for (j = 0; j < width; j += pu_width) { |
| 268 | int w = AOMMIN(pu_width, width - j); |
| 269 | int h = AOMMIN(pu_height, height - k); |
| 270 | uint16_t *input_p = input + k * stride + j; |
| 271 | uint16_t *output_p = output + k * out_stride + j; |
| 272 | apply_selfguided_restoration_c( |
| 273 | CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd, |
| 274 | CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth, 1); |
| 275 | } |
| 276 | } |
| 277 | aom_usec_timer_mark(&ref_timer); |
| 278 | const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer); |
| 279 | |
| 280 | aom_usec_timer tst_timer; |
| 281 | aom_usec_timer_start(&tst_timer); |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 282 | for (i = 0; i < NUM_ITERS; ++i) { |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 283 | for (k = 0; k < height; k += pu_height) |
| 284 | for (j = 0; j < width; j += pu_width) { |
| 285 | int w = AOMMIN(pu_width, width - j); |
| 286 | int h = AOMMIN(pu_height, height - k); |
| 287 | uint16_t *input_p = input + k * stride + j; |
| 288 | uint16_t *output_p = output + k * out_stride + j; |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 289 | tst_fun_(CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd, |
| 290 | CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth, |
| 291 | 1); |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 292 | } |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 293 | } |
Imdad Sardharwalla | d051e56 | 2018-02-02 09:42:07 +0000 | [diff] [blame] | 294 | aom_usec_timer_mark(&tst_timer); |
| 295 | const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer); |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 296 | |
Imdad Sardharwalla | d051e56 | 2018-02-02 09:42:07 +0000 | [diff] [blame] | 297 | std::cout << "[ ] C time = " << ref_time / 1000 |
| 298 | << " ms, SIMD time = " << tst_time / 1000 << " ms\n"; |
| 299 | |
| 300 | EXPECT_GT(ref_time, tst_time) |
| 301 | << "Error: AV1HighbdSelfguidedFilterTest.SpeedTest, SIMD slower than " |
| 302 | "C.\n" |
| 303 | << "C time: " << ref_time << " us\n" |
| 304 | << "SIMD time: " << tst_time << " us\n"; |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 305 | |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 306 | aom_free(input_); |
| 307 | aom_free(output_); |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 308 | aom_free(tmpbuf); |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 309 | } |
| 310 | |
| 311 | void RunCorrectnessTest() { |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 312 | tst_fun_ = GET_PARAM(0); |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 313 | const int pu_width = RESTORATION_PROC_UNIT_SIZE; |
| 314 | const int pu_height = RESTORATION_PROC_UNIT_SIZE; |
David Barker | bcc5535 | 2017-03-10 15:04:52 +0000 | [diff] [blame] | 315 | // Set the maximum width/height to test here. We actually test a small |
| 316 | // range of sizes *up to* this size, so that we can check, eg., |
| 317 | // the behaviour on tiles which are not a multiple of 4 wide. |
| 318 | const int max_w = 260, max_h = 260, stride = 672, out_stride = 672; |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 319 | const int NUM_ITERS = 81; |
| 320 | int i, j, k; |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 321 | int bit_depth = GET_PARAM(1); |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 322 | int mask = (1 << bit_depth) - 1; |
| 323 | |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 324 | uint16_t *input_ = |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 325 | (uint16_t *)aom_memalign(32, stride * (max_h + 32) * sizeof(uint16_t)); |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 326 | uint16_t *output_ = (uint16_t *)aom_memalign( |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 327 | 32, out_stride * (max_h + 32) * sizeof(uint16_t)); |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 328 | uint16_t *output2_ = (uint16_t *)aom_memalign( |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 329 | 32, out_stride * (max_h + 32) * sizeof(uint16_t)); |
| 330 | int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE); |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 331 | |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 332 | uint16_t *input = input_ + stride * 16 + 16; |
| 333 | uint16_t *output = output_ + out_stride * 16 + 16; |
| 334 | uint16_t *output2 = output2_ + out_stride * 16 + 16; |
| 335 | |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 336 | ACMRandom rnd(ACMRandom::DeterministicSeed()); |
| 337 | |
| 338 | av1_loop_restoration_precal(); |
| 339 | |
| 340 | for (i = 0; i < NUM_ITERS; ++i) { |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 341 | for (j = -16; j < max_h + 16; ++j) |
| 342 | for (k = -16; k < max_w + 16; ++k) |
| 343 | input[j * stride + k] = rnd.Rand16() & mask; |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 344 | |
Johann | f152ff6 | 2018-02-08 14:33:07 -0800 | [diff] [blame] | 345 | int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - |
| 346 | SGRPROJ_PRJ_MIN0), |
| 347 | SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - |
| 348 | SGRPROJ_PRJ_MIN1) }; |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 349 | int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS); |
| 350 | |
| 351 | // Test various tile sizes around 256x256 |
David Barker | bcc5535 | 2017-03-10 15:04:52 +0000 | [diff] [blame] | 352 | int test_w = max_w - (i / 9); |
| 353 | int test_h = max_h - (i % 9); |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 354 | |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 355 | for (k = 0; k < test_h; k += pu_height) |
| 356 | for (j = 0; j < test_w; j += pu_width) { |
| 357 | int w = AOMMIN(pu_width, test_w - j); |
| 358 | int h = AOMMIN(pu_height, test_h - k); |
| 359 | uint16_t *input_p = input + k * stride + j; |
| 360 | uint16_t *output_p = output + k * out_stride + j; |
| 361 | uint16_t *output2_p = output2 + k * out_stride + j; |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 362 | tst_fun_(CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd, |
| 363 | CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth, |
| 364 | 1); |
Rupert Swarbrick | 625e50b | 2017-11-22 11:49:55 +0000 | [diff] [blame] | 365 | apply_selfguided_restoration_c( |
| 366 | CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd, |
| 367 | CONVERT_TO_BYTEPTR(output2_p), out_stride, tmpbuf, bit_depth, 1); |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 368 | } |
| 369 | |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 370 | for (j = 0; j < test_h; ++j) |
| 371 | for (k = 0; k < test_w; ++k) |
| 372 | ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]); |
| 373 | } |
| 374 | |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 375 | aom_free(input_); |
| 376 | aom_free(output_); |
| 377 | aom_free(output2_); |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 378 | aom_free(tmpbuf); |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 379 | } |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 380 | |
| 381 | private: |
| 382 | SgrFunc tst_fun_; |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 383 | }; |
| 384 | |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 385 | TEST_P(AV1HighbdSelfguidedFilterTest, DISABLED_SpeedTest) { RunSpeedTest(); } |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 386 | TEST_P(AV1HighbdSelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); } |
| 387 | |
Tom Finegan | 9f02130 | 2017-09-07 07:49:42 -0700 | [diff] [blame] | 388 | #if HAVE_SSE4_1 |
Imdad Sardharwalla | c6acc53 | 2018-01-03 15:18:24 +0000 | [diff] [blame] | 389 | const int highbd_params_sse4_1[] = { 8, 10, 12 }; |
| 390 | INSTANTIATE_TEST_CASE_P( |
| 391 | SSE4_1, AV1HighbdSelfguidedFilterTest, |
| 392 | ::testing::Combine(::testing::Values(apply_selfguided_restoration_sse4_1), |
| 393 | ::testing::ValuesIn(highbd_params_sse4_1))); |
| 394 | #endif |
| 395 | |
| 396 | #if HAVE_AVX2 |
| 397 | const int highbd_params_avx2[] = { 8, 10, 12 }; |
| 398 | INSTANTIATE_TEST_CASE_P( |
| 399 | AVX2, AV1HighbdSelfguidedFilterTest, |
| 400 | ::testing::Combine(::testing::Values(apply_selfguided_restoration_avx2), |
| 401 | ::testing::ValuesIn(highbd_params_avx2))); |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 402 | #endif |
Venkat | 0350496 | 2018-06-26 08:41:26 +0530 | [diff] [blame] | 403 | #if HAVE_NEON |
| 404 | const int highbd_params_neon[] = { 8, 10, 12 }; |
| 405 | INSTANTIATE_TEST_CASE_P( |
| 406 | NEON, AV1HighbdSelfguidedFilterTest, |
| 407 | ::testing::Combine(::testing::Values(apply_selfguided_restoration_neon), |
| 408 | ::testing::ValuesIn(highbd_params_neon))); |
| 409 | #endif |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 410 | } // namespace |