Imdad Sardharwalla | 454697c | 2018-01-10 14:19:31 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2018, Alliance for Open Media. All rights reserved |
| 3 | * |
| 4 | * This source code is subject to the terms of the BSD 2 Clause License and |
| 5 | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| 6 | * was not distributed with this source code in the LICENSE file, you can |
| 7 | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| 8 | * Media Patent License 1.0 was not distributed with this source code in the |
| 9 | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| 10 | */ |
| 11 | |
sarahparker | a543df5 | 2018-11-02 16:02:05 -0700 | [diff] [blame] | 12 | #include <tuple> |
Imdad Sardharwalla | 454697c | 2018-01-10 14:19:31 +0000 | [diff] [blame] | 13 | #include <vector> |
| 14 | |
| 15 | #include "third_party/googletest/src/googletest/include/gtest/gtest.h" |
| 16 | |
Tom Finegan | 44702c8 | 2018-05-22 13:00:39 -0700 | [diff] [blame] | 17 | #include "config/av1_rtcd.h" |
| 18 | |
Imdad Sardharwalla | 454697c | 2018-01-10 14:19:31 +0000 | [diff] [blame] | 19 | #include "aom_ports/aom_timer.h" |
| 20 | #include "av1/common/convolve.h" |
| 21 | #include "av1/common/resize.h" |
| 22 | #include "test/acm_random.h" |
Imdad Sardharwalla | 454697c | 2018-01-10 14:19:31 +0000 | [diff] [blame] | 23 | #include "test/register_state_check.h" |
| 24 | #include "test/util.h" |
| 25 | |
| 26 | namespace { |
| 27 | const int kTestIters = 10; |
| 28 | const int kPerfIters = 1000; |
| 29 | |
| 30 | const int kVPad = 32; |
| 31 | const int kHPad = 32; |
| 32 | |
Johann | 54fa62e | 2018-09-25 14:09:31 -0700 | [diff] [blame] | 33 | using libaom_test::ACMRandom; |
sarahparker | a543df5 | 2018-11-02 16:02:05 -0700 | [diff] [blame] | 34 | using std::make_tuple; |
| 35 | using std::tuple; |
Imdad Sardharwalla | 454697c | 2018-01-10 14:19:31 +0000 | [diff] [blame] | 36 | |
| 37 | template <typename Pixel> |
| 38 | class TestImage { |
| 39 | public: |
| 40 | TestImage(int w_src, int h, int superres_denom, int x0, int bd) |
| 41 | : w_src_(w_src), h_(h), superres_denom_(superres_denom), x0_(x0), |
| 42 | bd_(bd) { |
| 43 | assert(bd < 16); |
| 44 | assert(bd <= 8 * static_cast<int>(sizeof(Pixel))); |
| 45 | assert(9 <= superres_denom && superres_denom <= 16); |
| 46 | assert(SCALE_NUMERATOR == 8); |
| 47 | assert(0 <= x0_ && x0_ <= RS_SCALE_SUBPEL_MASK); |
| 48 | |
| 49 | w_dst_ = w_src_; |
James Zern | 664f04d | 2022-05-24 17:30:58 -0700 | [diff] [blame] | 50 | av1_calculate_unscaled_superres_size(&w_dst_, nullptr, superres_denom); |
Imdad Sardharwalla | 454697c | 2018-01-10 14:19:31 +0000 | [diff] [blame] | 51 | |
| 52 | src_stride_ = ALIGN_POWER_OF_TWO(w_src_ + 2 * kHPad, 4); |
| 53 | dst_stride_ = ALIGN_POWER_OF_TWO(w_dst_ + 2 * kHPad, 4); |
| 54 | |
| 55 | // Allocate image data |
| 56 | src_data_.resize(2 * src_block_size()); |
| 57 | dst_data_.resize(2 * dst_block_size()); |
| 58 | } |
| 59 | |
| 60 | void Initialize(ACMRandom *rnd); |
| 61 | void Check() const; |
| 62 | |
| 63 | int src_stride() const { return src_stride_; } |
| 64 | int dst_stride() const { return dst_stride_; } |
| 65 | |
| 66 | int src_block_size() const { return (h_ + 2 * kVPad) * src_stride(); } |
| 67 | int dst_block_size() const { return (h_ + 2 * kVPad) * dst_stride(); } |
| 68 | |
| 69 | int src_width() const { return w_src_; } |
| 70 | int dst_width() const { return w_dst_; } |
| 71 | int height() const { return h_; } |
| 72 | int x0() const { return x0_; } |
| 73 | |
| 74 | const Pixel *GetSrcData(bool ref, bool borders) const { |
| 75 | const Pixel *block = &src_data_[ref ? 0 : src_block_size()]; |
| 76 | return borders ? block : block + kHPad + src_stride_ * kVPad; |
| 77 | } |
| 78 | |
| 79 | Pixel *GetDstData(bool ref, bool borders) { |
| 80 | Pixel *block = &dst_data_[ref ? 0 : dst_block_size()]; |
| 81 | return borders ? block : block + kHPad + dst_stride_ * kVPad; |
| 82 | } |
| 83 | |
| 84 | private: |
| 85 | int w_src_, w_dst_, h_, superres_denom_, x0_, bd_; |
| 86 | int src_stride_, dst_stride_; |
| 87 | |
| 88 | std::vector<Pixel> src_data_; |
| 89 | std::vector<Pixel> dst_data_; |
| 90 | }; |
| 91 | |
| 92 | template <typename Pixel> |
| 93 | void FillEdge(ACMRandom *rnd, int num_pixels, int bd, bool trash, Pixel *data) { |
| 94 | if (!trash) { |
| 95 | memset(data, 0, sizeof(*data) * num_pixels); |
| 96 | return; |
| 97 | } |
| 98 | const Pixel mask = (1 << bd) - 1; |
| 99 | for (int i = 0; i < num_pixels; ++i) data[i] = rnd->Rand16() & mask; |
| 100 | } |
| 101 | |
| 102 | template <typename Pixel> |
| 103 | void PrepBuffers(ACMRandom *rnd, int w, int h, int stride, int bd, |
| 104 | bool trash_edges, Pixel *data) { |
| 105 | assert(rnd); |
| 106 | const Pixel mask = (1 << bd) - 1; |
| 107 | |
| 108 | // Fill in the first buffer with random data |
| 109 | // Top border |
| 110 | FillEdge(rnd, stride * kVPad, bd, trash_edges, data); |
| 111 | for (int r = 0; r < h; ++r) { |
| 112 | Pixel *row_data = data + (kVPad + r) * stride; |
| 113 | // Left border, contents, right border |
| 114 | FillEdge(rnd, kHPad, bd, trash_edges, row_data); |
| 115 | for (int c = 0; c < w; ++c) row_data[kHPad + c] = rnd->Rand16() & mask; |
| 116 | FillEdge(rnd, kHPad, bd, trash_edges, row_data + kHPad + w); |
| 117 | } |
| 118 | // Bottom border |
| 119 | FillEdge(rnd, stride * kVPad, bd, trash_edges, data + stride * (kVPad + h)); |
| 120 | |
| 121 | const int bpp = sizeof(*data); |
| 122 | const int block_elts = stride * (h + 2 * kVPad); |
| 123 | const int block_size = bpp * block_elts; |
| 124 | |
| 125 | // Now copy that to the second buffer |
| 126 | memcpy(data + block_elts, data, block_size); |
| 127 | } |
| 128 | |
| 129 | template <typename Pixel> |
| 130 | void TestImage<Pixel>::Initialize(ACMRandom *rnd) { |
| 131 | PrepBuffers(rnd, w_src_, h_, src_stride_, bd_, false, &src_data_[0]); |
| 132 | PrepBuffers(rnd, w_dst_, h_, dst_stride_, bd_, true, &dst_data_[0]); |
| 133 | } |
| 134 | |
| 135 | template <typename Pixel> |
| 136 | void TestImage<Pixel>::Check() const { |
| 137 | const int num_pixels = dst_block_size(); |
| 138 | const Pixel *ref_dst = &dst_data_[0]; |
| 139 | const Pixel *tst_dst = &dst_data_[num_pixels]; |
| 140 | |
| 141 | // If memcmp returns 0, there's nothing to do. |
| 142 | if (0 == memcmp(ref_dst, tst_dst, sizeof(*ref_dst) * num_pixels)) return; |
| 143 | |
| 144 | // Otherwise, iterate through the buffer looking for differences, *ignoring |
| 145 | // the edges* |
| 146 | const int stride = dst_stride_; |
| 147 | for (int r = kVPad; r < h_ + kVPad; ++r) { |
| 148 | for (int c = kVPad; c < w_dst_ + kHPad; ++c) { |
| 149 | const int32_t ref_value = ref_dst[r * stride + c]; |
| 150 | const int32_t tst_value = tst_dst[r * stride + c]; |
| 151 | |
| 152 | EXPECT_EQ(tst_value, ref_value) |
| 153 | << "Error at row: " << (r - kVPad) << ", col: " << (c - kHPad) |
| 154 | << ", superres_denom: " << superres_denom_ << ", height: " << h_ |
| 155 | << ", src_width: " << w_src_ << ", dst_width: " << w_dst_ |
| 156 | << ", x0: " << x0_; |
| 157 | } |
| 158 | } |
| 159 | } |
| 160 | |
| 161 | template <typename Pixel> |
| 162 | class ConvolveHorizRSTestBase : public ::testing::Test { |
| 163 | public: |
James Zern | 664f04d | 2022-05-24 17:30:58 -0700 | [diff] [blame] | 164 | ConvolveHorizRSTestBase() : image_(nullptr) {} |
Imdad Sardharwalla | 454697c | 2018-01-10 14:19:31 +0000 | [diff] [blame] | 165 | virtual ~ConvolveHorizRSTestBase() {} |
chiyotsai | 6ddbede | 2021-06-30 14:24:15 -0700 | [diff] [blame] | 166 | virtual void TearDown() {} |
Imdad Sardharwalla | 454697c | 2018-01-10 14:19:31 +0000 | [diff] [blame] | 167 | |
| 168 | // Implemented by subclasses (SetUp depends on the parameters passed |
| 169 | // in and RunOne depends on the function to be tested. These can't |
| 170 | // be templated for low/high bit depths because they have different |
| 171 | // numbers of parameters) |
| 172 | virtual void SetUp() = 0; |
| 173 | virtual void RunOne(bool ref) = 0; |
| 174 | |
| 175 | protected: |
| 176 | void SetBitDepth(int bd) { bd_ = bd; } |
| 177 | |
| 178 | void CorrectnessTest() { |
| 179 | ACMRandom rnd(ACMRandom::DeterministicSeed()); |
| 180 | for (int i = 0; i < kTestIters; ++i) { |
| 181 | for (int superres_denom = 9; superres_denom <= 16; superres_denom++) { |
| 182 | // Get a random height between 512 and 767 |
| 183 | int height = rnd.Rand8() + 512; |
| 184 | |
| 185 | // Get a random src width between 128 and 383 |
| 186 | int width_src = rnd.Rand8() + 128; |
| 187 | |
| 188 | // x0 is normally calculated by get_upscale_convolve_x0 in |
| 189 | // av1/common/resize.c. However, this test should work for |
| 190 | // any value of x0 between 0 and RS_SCALE_SUBPEL_MASK |
| 191 | // (inclusive), so we choose one at random. |
| 192 | int x0 = rnd.Rand16() % (RS_SCALE_SUBPEL_MASK + 1); |
| 193 | |
| 194 | image_ = |
| 195 | new TestImage<Pixel>(width_src, height, superres_denom, x0, bd_); |
James Zern | 1633b59 | 2021-11-22 15:48:34 -0800 | [diff] [blame] | 196 | ASSERT_NE(image_, nullptr); |
Imdad Sardharwalla | 454697c | 2018-01-10 14:19:31 +0000 | [diff] [blame] | 197 | |
| 198 | Prep(&rnd); |
| 199 | RunOne(true); |
| 200 | RunOne(false); |
| 201 | image_->Check(); |
| 202 | |
| 203 | delete image_; |
| 204 | } |
| 205 | } |
| 206 | } |
| 207 | |
| 208 | void SpeedTest() { |
| 209 | // Pick some specific parameters to test |
| 210 | int height = 767; |
| 211 | int width_src = 129; |
| 212 | int superres_denom = 13; |
| 213 | int x0 = RS_SCALE_SUBPEL_MASK >> 1; |
| 214 | |
| 215 | image_ = new TestImage<Pixel>(width_src, height, superres_denom, x0, bd_); |
James Zern | 1633b59 | 2021-11-22 15:48:34 -0800 | [diff] [blame] | 216 | ASSERT_NE(image_, nullptr); |
Imdad Sardharwalla | 454697c | 2018-01-10 14:19:31 +0000 | [diff] [blame] | 217 | |
| 218 | ACMRandom rnd(ACMRandom::DeterministicSeed()); |
| 219 | Prep(&rnd); |
| 220 | |
| 221 | aom_usec_timer ref_timer; |
| 222 | aom_usec_timer_start(&ref_timer); |
| 223 | for (int i = 0; i < kPerfIters; ++i) RunOne(true); |
| 224 | aom_usec_timer_mark(&ref_timer); |
| 225 | const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer); |
| 226 | |
| 227 | aom_usec_timer tst_timer; |
| 228 | aom_usec_timer_start(&tst_timer); |
| 229 | for (int i = 0; i < kPerfIters; ++i) RunOne(false); |
| 230 | aom_usec_timer_mark(&tst_timer); |
| 231 | const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer); |
| 232 | |
| 233 | std::cout << "[ ] C time = " << ref_time / 1000 |
| 234 | << " ms, SIMD time = " << tst_time / 1000 << " ms\n"; |
| 235 | |
| 236 | EXPECT_GT(ref_time, tst_time) |
Imdad Sardharwalla | 48d23d5 | 2018-04-27 13:25:08 +0100 | [diff] [blame] | 237 | << "Error: ConvolveHorizRSTest (Speed Test), SIMD slower than C.\n" |
Imdad Sardharwalla | 454697c | 2018-01-10 14:19:31 +0000 | [diff] [blame] | 238 | << "C time: " << ref_time << " us\n" |
| 239 | << "SIMD time: " << tst_time << " us\n"; |
| 240 | } |
| 241 | |
| 242 | void Prep(ACMRandom *rnd) { |
| 243 | assert(rnd); |
| 244 | image_->Initialize(rnd); |
| 245 | } |
| 246 | |
| 247 | int bd_; |
| 248 | TestImage<Pixel> *image_; |
| 249 | }; |
| 250 | |
| 251 | typedef void (*LowBDConvolveHorizRsFunc)(const uint8_t *src, int src_stride, |
| 252 | uint8_t *dst, int dst_stride, int w, |
| 253 | int h, const int16_t *x_filters, |
| 254 | const int x0_qn, const int x_step_qn); |
| 255 | |
| 256 | // Test parameter list: |
| 257 | // <tst_fun_> |
| 258 | typedef tuple<LowBDConvolveHorizRsFunc> LowBDParams; |
| 259 | |
| 260 | class LowBDConvolveHorizRSTest |
| 261 | : public ConvolveHorizRSTestBase<uint8_t>, |
| 262 | public ::testing::WithParamInterface<LowBDParams> { |
| 263 | public: |
| 264 | virtual ~LowBDConvolveHorizRSTest() {} |
| 265 | |
| 266 | void SetUp() { |
| 267 | tst_fun_ = GET_PARAM(0); |
| 268 | const int bd = 8; |
| 269 | SetBitDepth(bd); |
| 270 | } |
| 271 | |
| 272 | void RunOne(bool ref) { |
| 273 | const uint8_t *src = image_->GetSrcData(ref, false); |
| 274 | uint8_t *dst = image_->GetDstData(ref, false); |
| 275 | const int src_stride = image_->src_stride(); |
| 276 | const int dst_stride = image_->dst_stride(); |
| 277 | const int width_src = image_->src_width(); |
| 278 | const int width_dst = image_->dst_width(); |
| 279 | const int height = image_->height(); |
| 280 | const int x0_qn = image_->x0(); |
| 281 | |
| 282 | const int32_t x_step_qn = |
| 283 | av1_get_upscale_convolve_step(width_src, width_dst); |
| 284 | |
| 285 | if (ref) { |
| 286 | av1_convolve_horiz_rs_c(src, src_stride, dst, dst_stride, width_dst, |
| 287 | height, &av1_resize_filter_normative[0][0], x0_qn, |
| 288 | x_step_qn); |
| 289 | } else { |
| 290 | tst_fun_(src, src_stride, dst, dst_stride, width_dst, height, |
| 291 | &av1_resize_filter_normative[0][0], x0_qn, x_step_qn); |
| 292 | } |
| 293 | } |
| 294 | |
| 295 | private: |
| 296 | LowBDConvolveHorizRsFunc tst_fun_; |
| 297 | }; |
| 298 | |
| 299 | TEST_P(LowBDConvolveHorizRSTest, Correctness) { CorrectnessTest(); } |
| 300 | TEST_P(LowBDConvolveHorizRSTest, DISABLED_Speed) { SpeedTest(); } |
| 301 | |
Cheng Chen | 96786fe | 2020-02-14 17:28:25 -0800 | [diff] [blame] | 302 | INSTANTIATE_TEST_SUITE_P(SSE4_1, LowBDConvolveHorizRSTest, |
| 303 | ::testing::Values(av1_convolve_horiz_rs_sse4_1)); |
Imdad Sardharwalla | 454697c | 2018-01-10 14:19:31 +0000 | [diff] [blame] | 304 | |
Jerome Jiang | ebba9c7 | 2019-09-20 12:23:01 -0700 | [diff] [blame] | 305 | #if CONFIG_AV1_HIGHBITDEPTH |
Imdad Sardharwalla | 454697c | 2018-01-10 14:19:31 +0000 | [diff] [blame] | 306 | typedef void (*HighBDConvolveHorizRsFunc)(const uint16_t *src, int src_stride, |
| 307 | uint16_t *dst, int dst_stride, int w, |
| 308 | int h, const int16_t *x_filters, |
| 309 | const int x0_qn, const int x_step_qn, |
| 310 | int bd); |
| 311 | |
| 312 | // Test parameter list: |
| 313 | // <tst_fun_, bd_> |
| 314 | typedef tuple<HighBDConvolveHorizRsFunc, int> HighBDParams; |
| 315 | |
| 316 | class HighBDConvolveHorizRSTest |
| 317 | : public ConvolveHorizRSTestBase<uint16_t>, |
| 318 | public ::testing::WithParamInterface<HighBDParams> { |
| 319 | public: |
| 320 | virtual ~HighBDConvolveHorizRSTest() {} |
| 321 | |
| 322 | void SetUp() { |
| 323 | tst_fun_ = GET_PARAM(0); |
| 324 | const int bd = GET_PARAM(1); |
| 325 | SetBitDepth(bd); |
| 326 | } |
| 327 | |
| 328 | void RunOne(bool ref) { |
| 329 | const uint16_t *src = image_->GetSrcData(ref, false); |
| 330 | uint16_t *dst = image_->GetDstData(ref, false); |
| 331 | const int src_stride = image_->src_stride(); |
| 332 | const int dst_stride = image_->dst_stride(); |
| 333 | const int width_src = image_->src_width(); |
| 334 | const int width_dst = image_->dst_width(); |
| 335 | const int height = image_->height(); |
| 336 | const int x0_qn = image_->x0(); |
| 337 | |
| 338 | const int32_t x_step_qn = |
| 339 | av1_get_upscale_convolve_step(width_src, width_dst); |
| 340 | |
| 341 | if (ref) { |
| 342 | av1_highbd_convolve_horiz_rs_c( |
| 343 | src, src_stride, dst, dst_stride, width_dst, height, |
| 344 | &av1_resize_filter_normative[0][0], x0_qn, x_step_qn, bd_); |
| 345 | } else { |
| 346 | tst_fun_(src, src_stride, dst, dst_stride, width_dst, height, |
| 347 | &av1_resize_filter_normative[0][0], x0_qn, x_step_qn, bd_); |
| 348 | } |
| 349 | } |
| 350 | |
| 351 | private: |
| 352 | HighBDConvolveHorizRsFunc tst_fun_; |
| 353 | }; |
| 354 | |
| 355 | const int kBDs[] = { 8, 10, 12 }; |
| 356 | |
| 357 | TEST_P(HighBDConvolveHorizRSTest, Correctness) { CorrectnessTest(); } |
| 358 | TEST_P(HighBDConvolveHorizRSTest, DISABLED_Speed) { SpeedTest(); } |
| 359 | |
Cheng Chen | 96786fe | 2020-02-14 17:28:25 -0800 | [diff] [blame] | 360 | INSTANTIATE_TEST_SUITE_P( |
Imdad Sardharwalla | 454697c | 2018-01-10 14:19:31 +0000 | [diff] [blame] | 361 | SSE4_1, HighBDConvolveHorizRSTest, |
| 362 | ::testing::Combine(::testing::Values(av1_highbd_convolve_horiz_rs_sse4_1), |
| 363 | ::testing::ValuesIn(kBDs))); |
Jerome Jiang | ebba9c7 | 2019-09-20 12:23:01 -0700 | [diff] [blame] | 364 | #endif // CONFIG_AV1_HIGHBITDEPTH |
Imdad Sardharwalla | 454697c | 2018-01-10 14:19:31 +0000 | [diff] [blame] | 365 | |
| 366 | } // namespace |