David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
| 3 | * |
| 4 | * This source code is subject to the terms of the BSD 2 Clause License and |
| 5 | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| 6 | * was not distributed with this source code in the LICENSE file, you can |
| 7 | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| 8 | * Media Patent License 1.0 was not distributed with this source code in the |
| 9 | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| 10 | */ |
| 11 | |
| 12 | #include <ctime> |
| 13 | |
| 14 | #include "third_party/googletest/src/googletest/include/gtest/gtest.h" |
| 15 | |
| 16 | #include "./av1_rtcd.h" |
| 17 | #include "test/acm_random.h" |
| 18 | #include "test/clear_system_state.h" |
| 19 | #include "test/register_state_check.h" |
| 20 | #include "test/util.h" |
| 21 | |
| 22 | #include "av1/common/mv.h" |
| 23 | #include "av1/common/restoration.h" |
| 24 | |
| 25 | namespace { |
| 26 | |
| 27 | using std::tr1::tuple; |
| 28 | using std::tr1::make_tuple; |
| 29 | using libaom_test::ACMRandom; |
| 30 | |
| 31 | typedef tuple<> FilterTestParam; |
| 32 | |
| 33 | class AV1SelfguidedFilterTest |
| 34 | : public ::testing::TestWithParam<FilterTestParam> { |
| 35 | public: |
| 36 | virtual ~AV1SelfguidedFilterTest() {} |
| 37 | virtual void SetUp() {} |
| 38 | |
| 39 | virtual void TearDown() { libaom_test::ClearSystemState(); } |
| 40 | |
| 41 | protected: |
| 42 | void RunSpeedTest() { |
| 43 | const int w = 256, h = 256; |
| 44 | const int NUM_ITERS = 2000; |
| 45 | int i, j; |
| 46 | |
David Barker | 7e08ac3 | 2017-03-20 10:05:21 +0000 | [diff] [blame] | 47 | uint8_t *input = (uint8_t *)aom_memalign(16, w * h * sizeof(uint8_t)); |
| 48 | uint8_t *output = (uint8_t *)aom_memalign(16, w * h * sizeof(uint8_t)); |
Yaowu Xu | c3f5bd1 | 2017-03-12 18:27:38 -0700 | [diff] [blame] | 49 | int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE); |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 50 | memset(tmpbuf, 0, RESTORATION_TMPBUF_SIZE); |
| 51 | |
| 52 | ACMRandom rnd(ACMRandom::DeterministicSeed()); |
| 53 | |
| 54 | for (i = 0; i < h; ++i) |
| 55 | for (j = 0; j < w; ++j) input[i * w + j] = rnd.Rand16() & 0xFF; |
| 56 | |
| 57 | int xqd[2] = { |
| 58 | SGRPROJ_PRJ_MIN0 + |
| 59 | rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0), |
| 60 | SGRPROJ_PRJ_MIN1 + |
| 61 | rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1) |
| 62 | }; |
| 63 | // Fix a parameter set, since the speed depends slightly on r. |
| 64 | // Change this to test different combinations of values of r. |
David Barker | 5765fad | 2017-03-08 11:27:09 +0000 | [diff] [blame] | 65 | int eps = 15; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 66 | |
| 67 | av1_loop_restoration_precal(); |
| 68 | |
| 69 | std::clock_t start = std::clock(); |
| 70 | for (i = 0; i < NUM_ITERS; ++i) { |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 71 | apply_selfguided_restoration(input, w, h, w, eps, xqd, output, w, tmpbuf); |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 72 | } |
| 73 | std::clock_t end = std::clock(); |
| 74 | double elapsed = ((end - start) / (double)CLOCKS_PER_SEC); |
| 75 | |
| 76 | printf("%5d %dx%d blocks in %7.3fs = %7.3fus/block\n", NUM_ITERS, w, h, |
| 77 | elapsed, elapsed * 1000000. / NUM_ITERS); |
| 78 | |
David Barker | 7e08ac3 | 2017-03-20 10:05:21 +0000 | [diff] [blame] | 79 | aom_free(input); |
| 80 | aom_free(output); |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 81 | aom_free(tmpbuf); |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 82 | } |
| 83 | |
| 84 | void RunCorrectnessTest() { |
David Barker | bcc5535 | 2017-03-10 15:04:52 +0000 | [diff] [blame] | 85 | // Set the maximum width/height to test here. We actually test a small |
| 86 | // range of sizes *up to* this size, so that we can check, eg., |
| 87 | // the behaviour on tiles which are not a multiple of 4 wide. |
| 88 | const int max_w = 260, max_h = 260, stride = 672, out_stride = 672; |
David Barker | 5765fad | 2017-03-08 11:27:09 +0000 | [diff] [blame] | 89 | const int NUM_ITERS = 81; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 90 | int i, j, k; |
| 91 | |
David Barker | 7e08ac3 | 2017-03-20 10:05:21 +0000 | [diff] [blame] | 92 | uint8_t *input = |
| 93 | (uint8_t *)aom_memalign(16, stride * max_h * sizeof(uint8_t)); |
| 94 | uint8_t *output = |
| 95 | (uint8_t *)aom_memalign(16, out_stride * max_h * sizeof(uint8_t)); |
| 96 | uint8_t *output2 = |
| 97 | (uint8_t *)aom_memalign(16, out_stride * max_h * sizeof(uint8_t)); |
Yaowu Xu | c3f5bd1 | 2017-03-12 18:27:38 -0700 | [diff] [blame] | 98 | int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE); |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 99 | memset(tmpbuf, 0, RESTORATION_TMPBUF_SIZE); |
| 100 | |
| 101 | ACMRandom rnd(ACMRandom::DeterministicSeed()); |
| 102 | |
| 103 | av1_loop_restoration_precal(); |
| 104 | |
| 105 | for (i = 0; i < NUM_ITERS; ++i) { |
David Barker | bcc5535 | 2017-03-10 15:04:52 +0000 | [diff] [blame] | 106 | for (j = 0; j < max_h; ++j) |
| 107 | for (k = 0; k < max_w; ++k) input[j * stride + k] = rnd.Rand16() & 0xFF; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 108 | |
| 109 | int xqd[2] = { |
| 110 | SGRPROJ_PRJ_MIN0 + |
| 111 | rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0), |
| 112 | SGRPROJ_PRJ_MIN1 + |
| 113 | rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1) |
| 114 | }; |
| 115 | int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS); |
| 116 | |
David Barker | 5765fad | 2017-03-08 11:27:09 +0000 | [diff] [blame] | 117 | // Test various tile sizes around 256x256 |
David Barker | bcc5535 | 2017-03-10 15:04:52 +0000 | [diff] [blame] | 118 | int test_w = max_w - (i / 9); |
| 119 | int test_h = max_h - (i % 9); |
David Barker | 5765fad | 2017-03-08 11:27:09 +0000 | [diff] [blame] | 120 | |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 121 | apply_selfguided_restoration(input, test_w, test_h, stride, eps, xqd, |
David Barker | 5765fad | 2017-03-08 11:27:09 +0000 | [diff] [blame] | 122 | output, out_stride, tmpbuf); |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 123 | apply_selfguided_restoration_c(input, test_w, test_h, stride, eps, xqd, |
David Barker | 5765fad | 2017-03-08 11:27:09 +0000 | [diff] [blame] | 124 | output2, out_stride, tmpbuf); |
| 125 | for (j = 0; j < test_h; ++j) |
| 126 | for (k = 0; k < test_w; ++k) |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 127 | ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]); |
| 128 | } |
| 129 | |
David Barker | 7e08ac3 | 2017-03-20 10:05:21 +0000 | [diff] [blame] | 130 | aom_free(input); |
| 131 | aom_free(output); |
| 132 | aom_free(output2); |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 133 | aom_free(tmpbuf); |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 134 | } |
| 135 | }; |
| 136 | |
| 137 | TEST_P(AV1SelfguidedFilterTest, SpeedTest) { RunSpeedTest(); } |
| 138 | TEST_P(AV1SelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); } |
| 139 | |
| 140 | const FilterTestParam params[] = { make_tuple() }; |
| 141 | |
| 142 | #if HAVE_SSE4_1 |
| 143 | INSTANTIATE_TEST_CASE_P(SSE4_1, AV1SelfguidedFilterTest, |
| 144 | ::testing::ValuesIn(params)); |
| 145 | #endif |
| 146 | |
Sebastien Alaiwan | 71e8784 | 2017-04-12 16:03:28 +0200 | [diff] [blame] | 147 | #if CONFIG_HIGHBITDEPTH |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 148 | |
| 149 | typedef tuple<int> HighbdFilterTestParam; |
| 150 | |
| 151 | class AV1HighbdSelfguidedFilterTest |
| 152 | : public ::testing::TestWithParam<HighbdFilterTestParam> { |
| 153 | public: |
| 154 | virtual ~AV1HighbdSelfguidedFilterTest() {} |
| 155 | virtual void SetUp() {} |
| 156 | |
| 157 | virtual void TearDown() { libaom_test::ClearSystemState(); } |
| 158 | |
| 159 | protected: |
| 160 | void RunSpeedTest() { |
| 161 | const int w = 256, h = 256; |
| 162 | const int NUM_ITERS = 2000; |
| 163 | int i, j; |
| 164 | int bit_depth = GET_PARAM(0); |
| 165 | int mask = (1 << bit_depth) - 1; |
| 166 | |
David Barker | 7e08ac3 | 2017-03-20 10:05:21 +0000 | [diff] [blame] | 167 | uint16_t *input = (uint16_t *)aom_memalign(16, w * h * sizeof(uint16_t)); |
| 168 | uint16_t *output = (uint16_t *)aom_memalign(16, w * h * sizeof(uint16_t)); |
Yaowu Xu | c3f5bd1 | 2017-03-12 18:27:38 -0700 | [diff] [blame] | 169 | int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE); |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 170 | memset(tmpbuf, 0, RESTORATION_TMPBUF_SIZE); |
| 171 | |
| 172 | ACMRandom rnd(ACMRandom::DeterministicSeed()); |
| 173 | |
| 174 | for (i = 0; i < h; ++i) |
| 175 | for (j = 0; j < w; ++j) input[i * w + j] = rnd.Rand16() & mask; |
| 176 | |
| 177 | int xqd[2] = { |
| 178 | SGRPROJ_PRJ_MIN0 + |
| 179 | rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0), |
| 180 | SGRPROJ_PRJ_MIN1 + |
| 181 | rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1) |
| 182 | }; |
| 183 | // Fix a parameter set, since the speed depends slightly on r. |
| 184 | // Change this to test different combinations of values of r. |
| 185 | int eps = 15; |
| 186 | |
| 187 | av1_loop_restoration_precal(); |
| 188 | |
| 189 | std::clock_t start = std::clock(); |
| 190 | for (i = 0; i < NUM_ITERS; ++i) { |
| 191 | apply_selfguided_restoration_highbd(input, w, h, w, bit_depth, eps, xqd, |
| 192 | output, w, tmpbuf); |
| 193 | } |
| 194 | std::clock_t end = std::clock(); |
| 195 | double elapsed = ((end - start) / (double)CLOCKS_PER_SEC); |
| 196 | |
| 197 | printf("%5d %dx%d blocks in %7.3fs = %7.3fus/block\n", NUM_ITERS, w, h, |
| 198 | elapsed, elapsed * 1000000. / NUM_ITERS); |
| 199 | |
David Barker | 7e08ac3 | 2017-03-20 10:05:21 +0000 | [diff] [blame] | 200 | aom_free(input); |
| 201 | aom_free(output); |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 202 | aom_free(tmpbuf); |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 203 | } |
| 204 | |
| 205 | void RunCorrectnessTest() { |
David Barker | bcc5535 | 2017-03-10 15:04:52 +0000 | [diff] [blame] | 206 | // Set the maximum width/height to test here. We actually test a small |
| 207 | // range of sizes *up to* this size, so that we can check, eg., |
| 208 | // the behaviour on tiles which are not a multiple of 4 wide. |
| 209 | const int max_w = 260, max_h = 260, stride = 672, out_stride = 672; |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 210 | const int NUM_ITERS = 81; |
| 211 | int i, j, k; |
| 212 | int bit_depth = GET_PARAM(0); |
| 213 | int mask = (1 << bit_depth) - 1; |
| 214 | |
David Barker | 7e08ac3 | 2017-03-20 10:05:21 +0000 | [diff] [blame] | 215 | uint16_t *input = |
| 216 | (uint16_t *)aom_memalign(16, stride * max_h * sizeof(uint16_t)); |
| 217 | uint16_t *output = |
| 218 | (uint16_t *)aom_memalign(16, out_stride * max_h * sizeof(uint16_t)); |
| 219 | uint16_t *output2 = |
| 220 | (uint16_t *)aom_memalign(16, out_stride * max_h * sizeof(uint16_t)); |
Yaowu Xu | c3f5bd1 | 2017-03-12 18:27:38 -0700 | [diff] [blame] | 221 | int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE); |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 222 | memset(tmpbuf, 0, RESTORATION_TMPBUF_SIZE); |
| 223 | |
| 224 | ACMRandom rnd(ACMRandom::DeterministicSeed()); |
| 225 | |
| 226 | av1_loop_restoration_precal(); |
| 227 | |
| 228 | for (i = 0; i < NUM_ITERS; ++i) { |
David Barker | bcc5535 | 2017-03-10 15:04:52 +0000 | [diff] [blame] | 229 | for (j = 0; j < max_h; ++j) |
| 230 | for (k = 0; k < max_w; ++k) input[j * stride + k] = rnd.Rand16() & mask; |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 231 | |
| 232 | int xqd[2] = { |
| 233 | SGRPROJ_PRJ_MIN0 + |
| 234 | rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0), |
| 235 | SGRPROJ_PRJ_MIN1 + |
| 236 | rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1) |
| 237 | }; |
| 238 | int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS); |
| 239 | |
| 240 | // Test various tile sizes around 256x256 |
David Barker | bcc5535 | 2017-03-10 15:04:52 +0000 | [diff] [blame] | 241 | int test_w = max_w - (i / 9); |
| 242 | int test_h = max_h - (i % 9); |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 243 | |
| 244 | apply_selfguided_restoration_highbd(input, test_w, test_h, stride, |
| 245 | bit_depth, eps, xqd, output, |
| 246 | out_stride, tmpbuf); |
| 247 | apply_selfguided_restoration_highbd_c(input, test_w, test_h, stride, |
| 248 | bit_depth, eps, xqd, output2, |
| 249 | out_stride, tmpbuf); |
| 250 | for (j = 0; j < test_h; ++j) |
| 251 | for (k = 0; k < test_w; ++k) |
| 252 | ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]); |
| 253 | } |
| 254 | |
David Barker | 7e08ac3 | 2017-03-20 10:05:21 +0000 | [diff] [blame] | 255 | aom_free(input); |
| 256 | aom_free(output); |
| 257 | aom_free(output2); |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 258 | aom_free(tmpbuf); |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 259 | } |
| 260 | }; |
| 261 | |
| 262 | TEST_P(AV1HighbdSelfguidedFilterTest, SpeedTest) { RunSpeedTest(); } |
| 263 | TEST_P(AV1HighbdSelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); } |
| 264 | |
| 265 | const HighbdFilterTestParam highbd_params[] = { make_tuple(8), make_tuple(10), |
| 266 | make_tuple(12) }; |
| 267 | |
| 268 | #if HAVE_SSE4_1 |
| 269 | INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdSelfguidedFilterTest, |
| 270 | ::testing::ValuesIn(highbd_params)); |
| 271 | #endif |
| 272 | #endif |
| 273 | |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 274 | } // namespace |