Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2017, Alliance for Open Media. All rights reserved |
| 3 | * |
| 4 | * This source code is subject to the terms of the BSD 2 Clause License and |
| 5 | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| 6 | * was not distributed with this source code in the LICENSE file, you can |
| 7 | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| 8 | * Media Patent License 1.0 was not distributed with this source code in the |
| 9 | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| 10 | */ |
| 11 | |
| 12 | #include <vector> |
| 13 | |
| 14 | #include "third_party/googletest/src/googletest/include/gtest/gtest.h" |
| 15 | |
Tom Finegan | 44702c8 | 2018-05-22 13:00:39 -0700 | [diff] [blame] | 16 | #include "config/av1_rtcd.h" |
| 17 | |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 18 | #include "aom_ports/aom_timer.h" |
| 19 | #include "test/acm_random.h" |
| 20 | #include "test/clear_system_state.h" |
| 21 | #include "test/register_state_check.h" |
| 22 | #include "test/util.h" |
| 23 | |
Cheng Chen | 03c7549 | 2017-11-02 16:38:14 -0700 | [diff] [blame] | 24 | #include "av1/common/common_data.h" |
Cheng Chen | 03c7549 | 2017-11-02 16:38:14 -0700 | [diff] [blame] | 25 | |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 26 | namespace { |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 27 | const int kTestIters = 10; |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 28 | const int kPerfIters = 1000; |
| 29 | |
| 30 | const int kVPad = 32; |
| 31 | const int kHPad = 32; |
| 32 | const int kXStepQn = 16; |
| 33 | const int kYStepQn = 20; |
| 34 | |
James Zern | 9561280 | 2018-03-30 11:37:54 -0700 | [diff] [blame] | 35 | using ::testing::make_tuple; |
| 36 | using ::testing::tuple; |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 37 | using libaom_test::ACMRandom; |
| 38 | |
| 39 | enum NTaps { EIGHT_TAP, TEN_TAP, TWELVE_TAP }; |
| 40 | int NTapsToInt(NTaps ntaps) { return 8 + static_cast<int>(ntaps) * 2; } |
| 41 | |
| 42 | // A 16-bit filter with a configurable number of taps. |
| 43 | class TestFilter { |
| 44 | public: |
| 45 | void set(NTaps ntaps, bool backwards); |
| 46 | |
| 47 | InterpFilterParams params_; |
| 48 | |
| 49 | private: |
| 50 | std::vector<int16_t> coeffs_; |
| 51 | }; |
| 52 | |
| 53 | void TestFilter::set(NTaps ntaps, bool backwards) { |
| 54 | const int n = NTapsToInt(ntaps); |
| 55 | assert(n >= 8 && n <= 12); |
| 56 | |
| 57 | // The filter has n * SUBPEL_SHIFTS proper elements and an extra 8 bogus |
| 58 | // elements at the end so that convolutions can read off the end safely. |
| 59 | coeffs_.resize(n * SUBPEL_SHIFTS + 8); |
| 60 | |
| 61 | // The coefficients are pretty much arbitrary, but convolutions shouldn't |
| 62 | // over or underflow. For the first filter (subpels = 0), we use an |
| 63 | // increasing or decreasing ramp (depending on the backwards parameter). We |
| 64 | // don't want any zero coefficients, so we make it have an x-intercept at -1 |
| 65 | // or n. To ensure absence of under/overflow, we normalise the area under the |
| 66 | // ramp to be I = 1 << FILTER_BITS (so that convolving a constant function |
| 67 | // gives the identity). |
| 68 | // |
| 69 | // When increasing, the function has the form: |
| 70 | // |
| 71 | // f(x) = A * (x + 1) |
| 72 | // |
| 73 | // Summing and rearranging for A gives A = 2 * I / (n * (n + 1)). If the |
| 74 | // filter is reversed, we have the same A but with formula |
| 75 | // |
| 76 | // g(x) = A * (n - x) |
| 77 | const int I = 1 << FILTER_BITS; |
| 78 | const float A = 2.f * I / (n * (n + 1.f)); |
| 79 | for (int i = 0; i < n; ++i) { |
| 80 | coeffs_[i] = static_cast<int16_t>(A * (backwards ? (n - i) : (i + 1))); |
| 81 | } |
| 82 | |
| 83 | // For the other filters, make them slightly different by swapping two |
| 84 | // columns. Filter k will have the columns (k % n) and (7 * k) % n swapped. |
| 85 | const size_t filter_size = sizeof(coeffs_[0] * n); |
| 86 | int16_t *const filter0 = &coeffs_[0]; |
| 87 | for (int k = 1; k < SUBPEL_SHIFTS; ++k) { |
| 88 | int16_t *filterk = &coeffs_[k * n]; |
| 89 | memcpy(filterk, filter0, filter_size); |
| 90 | |
| 91 | const int idx0 = k % n; |
| 92 | const int idx1 = (7 * k) % n; |
| 93 | |
| 94 | const int16_t tmp = filterk[idx0]; |
| 95 | filterk[idx0] = filterk[idx1]; |
| 96 | filterk[idx1] = tmp; |
| 97 | } |
| 98 | |
| 99 | // Finally, write some rubbish at the end to make sure we don't use it. |
| 100 | for (int i = 0; i < 8; ++i) coeffs_[n * SUBPEL_SHIFTS + i] = 123 + i; |
| 101 | |
| 102 | // Fill in params |
| 103 | params_.filter_ptr = &coeffs_[0]; |
| 104 | params_.taps = n; |
| 105 | // These are ignored by the functions being tested. Set them to whatever. |
| 106 | params_.subpel_shifts = SUBPEL_SHIFTS; |
| 107 | params_.interp_filter = EIGHTTAP_REGULAR; |
| 108 | } |
| 109 | |
| 110 | template <typename SrcPixel> |
| 111 | class TestImage { |
| 112 | public: |
| 113 | TestImage(int w, int h, int bd) : w_(w), h_(h), bd_(bd) { |
| 114 | assert(bd < 16); |
| 115 | assert(bd <= 8 * static_cast<int>(sizeof(SrcPixel))); |
| 116 | |
| 117 | // Pad width by 2*kHPad and then round up to the next multiple of 16 |
| 118 | // to get src_stride_. Add another 16 for dst_stride_ (to make sure |
| 119 | // something goes wrong if we use the wrong one) |
| 120 | src_stride_ = (w_ + 2 * kHPad + 15) & ~15; |
| 121 | dst_stride_ = src_stride_ + 16; |
| 122 | |
| 123 | // Allocate image data |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 124 | src_data_.resize(2 * src_block_size()); |
| 125 | dst_data_.resize(2 * dst_block_size()); |
Cherma Rajan A | a7be368 | 2018-03-20 10:00:51 +0530 | [diff] [blame] | 126 | dst_16_data_.resize(2 * dst_block_size()); |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 127 | } |
| 128 | |
| 129 | void Initialize(ACMRandom *rnd); |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 130 | void Check() const; |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 131 | |
| 132 | int src_stride() const { return src_stride_; } |
| 133 | int dst_stride() const { return dst_stride_; } |
| 134 | |
| 135 | int src_block_size() const { return (h_ + 2 * kVPad) * src_stride(); } |
| 136 | int dst_block_size() const { return (h_ + 2 * kVPad) * dst_stride(); } |
| 137 | |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 138 | const SrcPixel *GetSrcData(bool ref, bool borders) const { |
| 139 | const SrcPixel *block = &src_data_[ref ? 0 : src_block_size()]; |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 140 | return borders ? block : block + kHPad + src_stride_ * kVPad; |
| 141 | } |
| 142 | |
Cherma Rajan A | a7be368 | 2018-03-20 10:00:51 +0530 | [diff] [blame] | 143 | SrcPixel *GetDstData(bool ref, bool borders) { |
| 144 | SrcPixel *block = &dst_data_[ref ? 0 : dst_block_size()]; |
| 145 | return borders ? block : block + kHPad + dst_stride_ * kVPad; |
| 146 | } |
| 147 | |
| 148 | CONV_BUF_TYPE *GetDst16Data(bool ref, bool borders) { |
| 149 | CONV_BUF_TYPE *block = &dst_16_data_[ref ? 0 : dst_block_size()]; |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 150 | return borders ? block : block + kHPad + dst_stride_ * kVPad; |
| 151 | } |
| 152 | |
| 153 | private: |
| 154 | int w_, h_, bd_; |
| 155 | int src_stride_, dst_stride_; |
| 156 | |
| 157 | std::vector<SrcPixel> src_data_; |
Cherma Rajan A | a7be368 | 2018-03-20 10:00:51 +0530 | [diff] [blame] | 158 | std::vector<SrcPixel> dst_data_; |
| 159 | std::vector<CONV_BUF_TYPE> dst_16_data_; |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 160 | }; |
| 161 | |
| 162 | template <typename Pixel> |
| 163 | void FillEdge(ACMRandom *rnd, int num_pixels, int bd, bool trash, Pixel *data) { |
| 164 | if (!trash) { |
| 165 | memset(data, 0, sizeof(*data) * num_pixels); |
| 166 | return; |
| 167 | } |
| 168 | const Pixel mask = (1 << bd) - 1; |
| 169 | for (int i = 0; i < num_pixels; ++i) data[i] = rnd->Rand16() & mask; |
| 170 | } |
| 171 | |
| 172 | template <typename Pixel> |
| 173 | void PrepBuffers(ACMRandom *rnd, int w, int h, int stride, int bd, |
| 174 | bool trash_edges, Pixel *data) { |
| 175 | assert(rnd); |
| 176 | const Pixel mask = (1 << bd) - 1; |
| 177 | |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 178 | // Fill in the first buffer with random data |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 179 | // Top border |
| 180 | FillEdge(rnd, stride * kVPad, bd, trash_edges, data); |
| 181 | for (int r = 0; r < h; ++r) { |
| 182 | Pixel *row_data = data + (kVPad + r) * stride; |
| 183 | // Left border, contents, right border |
| 184 | FillEdge(rnd, kHPad, bd, trash_edges, row_data); |
| 185 | for (int c = 0; c < w; ++c) row_data[kHPad + c] = rnd->Rand16() & mask; |
| 186 | FillEdge(rnd, kHPad, bd, trash_edges, row_data + kHPad + w); |
| 187 | } |
| 188 | // Bottom border |
| 189 | FillEdge(rnd, stride * kVPad, bd, trash_edges, data + stride * (kVPad + h)); |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 190 | |
| 191 | const int bpp = sizeof(*data); |
| 192 | const int block_elts = stride * (h + 2 * kVPad); |
| 193 | const int block_size = bpp * block_elts; |
| 194 | |
| 195 | // Now copy that to the second buffer |
| 196 | memcpy(data + block_elts, data, block_size); |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 197 | } |
| 198 | |
| 199 | template <typename SrcPixel> |
| 200 | void TestImage<SrcPixel>::Initialize(ACMRandom *rnd) { |
| 201 | PrepBuffers(rnd, w_, h_, src_stride_, bd_, false, &src_data_[0]); |
| 202 | PrepBuffers(rnd, w_, h_, dst_stride_, bd_, true, &dst_data_[0]); |
Cherma Rajan A | a7be368 | 2018-03-20 10:00:51 +0530 | [diff] [blame] | 203 | PrepBuffers(rnd, w_, h_, dst_stride_, bd_, true, &dst_16_data_[0]); |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 204 | } |
| 205 | |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 206 | template <typename SrcPixel> |
| 207 | void TestImage<SrcPixel>::Check() const { |
| 208 | // If memcmp returns 0, there's nothing to do. |
| 209 | const int num_pixels = dst_block_size(); |
Cherma Rajan A | a7be368 | 2018-03-20 10:00:51 +0530 | [diff] [blame] | 210 | const SrcPixel *ref_dst = &dst_data_[0]; |
| 211 | const SrcPixel *tst_dst = &dst_data_[num_pixels]; |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 212 | |
Cherma Rajan A | a7be368 | 2018-03-20 10:00:51 +0530 | [diff] [blame] | 213 | const CONV_BUF_TYPE *ref_16_dst = &dst_16_data_[0]; |
| 214 | const CONV_BUF_TYPE *tst_16_dst = &dst_16_data_[num_pixels]; |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 215 | |
Cherma Rajan A | a7be368 | 2018-03-20 10:00:51 +0530 | [diff] [blame] | 216 | if (0 == memcmp(ref_dst, tst_dst, sizeof(*ref_dst) * num_pixels)) { |
| 217 | if (0 == memcmp(ref_16_dst, tst_16_dst, sizeof(*ref_16_dst) * num_pixels)) |
| 218 | return; |
| 219 | } |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 220 | // Otherwise, iterate through the buffer looking for differences (including |
| 221 | // the edges) |
| 222 | const int stride = dst_stride_; |
| 223 | for (int r = 0; r < h_ + 2 * kVPad; ++r) { |
| 224 | for (int c = 0; c < w_ + 2 * kHPad; ++c) { |
| 225 | const int32_t ref_value = ref_dst[r * stride + c]; |
| 226 | const int32_t tst_value = tst_dst[r * stride + c]; |
| 227 | |
| 228 | EXPECT_EQ(tst_value, ref_value) |
| 229 | << "Error at row: " << (r - kVPad) << ", col: " << (c - kHPad); |
| 230 | } |
| 231 | } |
Cherma Rajan A | a7be368 | 2018-03-20 10:00:51 +0530 | [diff] [blame] | 232 | |
| 233 | for (int r = 0; r < h_ + 2 * kVPad; ++r) { |
| 234 | for (int c = 0; c < w_ + 2 * kHPad; ++c) { |
| 235 | const int32_t ref_value = ref_16_dst[r * stride + c]; |
| 236 | const int32_t tst_value = tst_16_dst[r * stride + c]; |
| 237 | |
| 238 | EXPECT_EQ(tst_value, ref_value) |
| 239 | << "Error in 16 bit buffer " |
| 240 | << "Error at row: " << (r - kVPad) << ", col: " << (c - kHPad); |
| 241 | } |
| 242 | } |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 243 | } |
| 244 | |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 245 | typedef tuple<int, int> BlockDimension; |
| 246 | |
| 247 | struct BaseParams { |
| 248 | BaseParams(BlockDimension dims, NTaps ntaps_x, NTaps ntaps_y, bool avg) |
| 249 | : dims(dims), ntaps_x(ntaps_x), ntaps_y(ntaps_y), avg(avg) {} |
| 250 | |
| 251 | BlockDimension dims; |
| 252 | NTaps ntaps_x, ntaps_y; |
| 253 | bool avg; |
| 254 | }; |
| 255 | |
| 256 | template <typename SrcPixel> |
| 257 | class ConvolveScaleTestBase : public ::testing::Test { |
| 258 | public: |
| 259 | ConvolveScaleTestBase() : image_(NULL) {} |
| 260 | virtual ~ConvolveScaleTestBase() { delete image_; } |
| 261 | virtual void TearDown() { libaom_test::ClearSystemState(); } |
| 262 | |
| 263 | // Implemented by subclasses (SetUp depends on the parameters passed |
| 264 | // in and RunOne depends on the function to be tested. These can't |
| 265 | // be templated for low/high bit depths because they have different |
| 266 | // numbers of parameters) |
| 267 | virtual void SetUp() = 0; |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 268 | virtual void RunOne(bool ref) = 0; |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 269 | |
| 270 | protected: |
| 271 | void SetParams(const BaseParams ¶ms, int bd) { |
James Zern | 9561280 | 2018-03-30 11:37:54 -0700 | [diff] [blame] | 272 | width_ = ::testing::get<0>(params.dims); |
| 273 | height_ = ::testing::get<1>(params.dims); |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 274 | ntaps_x_ = params.ntaps_x; |
| 275 | ntaps_y_ = params.ntaps_y; |
| 276 | bd_ = bd; |
| 277 | avg_ = params.avg; |
| 278 | |
| 279 | filter_x_.set(ntaps_x_, false); |
| 280 | filter_y_.set(ntaps_y_, true); |
Yunqing Wang | 17be4d8 | 2017-12-19 17:00:27 -0800 | [diff] [blame] | 281 | convolve_params_ = |
Debargha Mukherjee | e820b82 | 2018-02-09 13:18:29 -0800 | [diff] [blame] | 282 | get_conv_params_no_round(0, avg_ != false, 0, NULL, 0, 1, bd); |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 283 | |
| 284 | delete image_; |
| 285 | image_ = new TestImage<SrcPixel>(width_, height_, bd_); |
| 286 | } |
| 287 | |
Cherma Rajan A | a7be368 | 2018-03-20 10:00:51 +0530 | [diff] [blame] | 288 | void SetConvParamOffset(int i, int j, int is_compound, int do_average, |
| 289 | int use_jnt_comp_avg) { |
Cheng Chen | 03c7549 | 2017-11-02 16:38:14 -0700 | [diff] [blame] | 290 | if (i == -1 && j == -1) { |
Cherma Rajan A | a7be368 | 2018-03-20 10:00:51 +0530 | [diff] [blame] | 291 | convolve_params_.use_jnt_comp_avg = use_jnt_comp_avg; |
| 292 | convolve_params_.is_compound = is_compound; |
| 293 | convolve_params_.do_average = do_average; |
Cheng Chen | 03c7549 | 2017-11-02 16:38:14 -0700 | [diff] [blame] | 294 | } else { |
Cherma Rajan A | a7be368 | 2018-03-20 10:00:51 +0530 | [diff] [blame] | 295 | convolve_params_.use_jnt_comp_avg = use_jnt_comp_avg; |
Cheng Chen | 03c7549 | 2017-11-02 16:38:14 -0700 | [diff] [blame] | 296 | convolve_params_.fwd_offset = quant_dist_lookup_table[i][j][0]; |
| 297 | convolve_params_.bck_offset = quant_dist_lookup_table[i][j][1]; |
Cherma Rajan A | a7be368 | 2018-03-20 10:00:51 +0530 | [diff] [blame] | 298 | convolve_params_.is_compound = is_compound; |
| 299 | convolve_params_.do_average = do_average; |
Cheng Chen | 03c7549 | 2017-11-02 16:38:14 -0700 | [diff] [blame] | 300 | } |
| 301 | } |
Cheng Chen | 03c7549 | 2017-11-02 16:38:14 -0700 | [diff] [blame] | 302 | |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 303 | void Run() { |
| 304 | ACMRandom rnd(ACMRandom::DeterministicSeed()); |
| 305 | for (int i = 0; i < kTestIters; ++i) { |
Cherma Rajan A | a7be368 | 2018-03-20 10:00:51 +0530 | [diff] [blame] | 306 | int is_compound = 0; |
| 307 | SetConvParamOffset(-1, -1, is_compound, 0, 0); |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 308 | Prep(&rnd); |
| 309 | RunOne(true); |
| 310 | RunOne(false); |
| 311 | image_->Check(); |
Cheng Chen | 03c7549 | 2017-11-02 16:38:14 -0700 | [diff] [blame] | 312 | |
Cherma Rajan A | a7be368 | 2018-03-20 10:00:51 +0530 | [diff] [blame] | 313 | is_compound = 1; |
| 314 | for (int do_average = 0; do_average < 2; do_average++) { |
| 315 | for (int use_jnt_comp_avg = 0; use_jnt_comp_avg < 2; |
| 316 | use_jnt_comp_avg++) { |
| 317 | for (int j = 0; j < 2; ++j) { |
| 318 | for (int k = 0; k < 4; ++k) { |
| 319 | SetConvParamOffset(j, k, is_compound, do_average, |
| 320 | use_jnt_comp_avg); |
| 321 | Prep(&rnd); |
| 322 | RunOne(true); |
| 323 | RunOne(false); |
| 324 | image_->Check(); |
| 325 | } |
| 326 | } |
Cheng Chen | 03c7549 | 2017-11-02 16:38:14 -0700 | [diff] [blame] | 327 | } |
| 328 | } |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 329 | } |
| 330 | } |
| 331 | |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 332 | void SpeedTest() { |
| 333 | ACMRandom rnd(ACMRandom::DeterministicSeed()); |
| 334 | Prep(&rnd); |
| 335 | |
| 336 | aom_usec_timer ref_timer; |
| 337 | aom_usec_timer_start(&ref_timer); |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 338 | for (int i = 0; i < kPerfIters; ++i) RunOne(true); |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 339 | aom_usec_timer_mark(&ref_timer); |
| 340 | const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer); |
| 341 | |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 342 | aom_usec_timer tst_timer; |
| 343 | aom_usec_timer_start(&tst_timer); |
| 344 | for (int i = 0; i < kPerfIters; ++i) RunOne(false); |
| 345 | aom_usec_timer_mark(&tst_timer); |
| 346 | const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer); |
| 347 | |
| 348 | std::cout << "[ ] C time = " << ref_time / 1000 |
| 349 | << " ms, SIMD time = " << tst_time / 1000 << " ms\n"; |
| 350 | |
| 351 | EXPECT_GT(ref_time, tst_time) |
| 352 | << "Error: CDEFSpeedTest, SIMD slower than C.\n" |
| 353 | << "C time: " << ref_time << " us\n" |
| 354 | << "SIMD time: " << tst_time << " us\n"; |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 355 | } |
| 356 | |
| 357 | static int RandomSubpel(ACMRandom *rnd) { |
| 358 | const uint8_t subpel_mode = rnd->Rand8(); |
| 359 | if ((subpel_mode & 7) == 0) { |
| 360 | return 0; |
| 361 | } else if ((subpel_mode & 7) == 1) { |
| 362 | return SCALE_SUBPEL_SHIFTS - 1; |
| 363 | } else { |
| 364 | return 1 + rnd->PseudoUniform(SCALE_SUBPEL_SHIFTS - 2); |
| 365 | } |
| 366 | } |
| 367 | |
| 368 | void Prep(ACMRandom *rnd) { |
| 369 | assert(rnd); |
| 370 | |
| 371 | // Choose subpel_x_ and subpel_y_. They should be less than |
| 372 | // SCALE_SUBPEL_SHIFTS; we also want to add extra weight to "interesting" |
| 373 | // values: 0 and SCALE_SUBPEL_SHIFTS - 1 |
| 374 | subpel_x_ = RandomSubpel(rnd); |
| 375 | subpel_y_ = RandomSubpel(rnd); |
| 376 | |
| 377 | image_->Initialize(rnd); |
| 378 | } |
| 379 | |
| 380 | int width_, height_, bd_; |
| 381 | NTaps ntaps_x_, ntaps_y_; |
| 382 | bool avg_; |
| 383 | int subpel_x_, subpel_y_; |
| 384 | TestFilter filter_x_, filter_y_; |
| 385 | TestImage<SrcPixel> *image_; |
| 386 | ConvolveParams convolve_params_; |
| 387 | }; |
| 388 | |
| 389 | typedef tuple<int, int> BlockDimension; |
| 390 | |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 391 | typedef void (*LowbdConvolveFunc)(const uint8_t *src, int src_stride, |
Cherma Rajan A | a7be368 | 2018-03-20 10:00:51 +0530 | [diff] [blame] | 392 | uint8_t *dst, int dst_stride, int w, int h, |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 393 | InterpFilterParams *filter_params_x, |
| 394 | InterpFilterParams *filter_params_y, |
| 395 | const int subpel_x_qn, const int x_step_qn, |
| 396 | const int subpel_y_qn, const int y_step_qn, |
| 397 | ConvolveParams *conv_params); |
| 398 | |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 399 | // Test parameter list: |
| 400 | // <tst_fun, dims, ntaps_x, ntaps_y, avg> |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 401 | typedef tuple<LowbdConvolveFunc, BlockDimension, NTaps, NTaps, bool> |
| 402 | LowBDParams; |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 403 | |
| 404 | class LowBDConvolveScaleTest |
| 405 | : public ConvolveScaleTestBase<uint8_t>, |
| 406 | public ::testing::WithParamInterface<LowBDParams> { |
| 407 | public: |
| 408 | virtual ~LowBDConvolveScaleTest() {} |
| 409 | |
| 410 | void SetUp() { |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 411 | tst_fun_ = GET_PARAM(0); |
| 412 | |
| 413 | const BlockDimension &block = GET_PARAM(1); |
| 414 | const NTaps ntaps_x = GET_PARAM(2); |
| 415 | const NTaps ntaps_y = GET_PARAM(3); |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 416 | const int bd = 8; |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 417 | const bool avg = GET_PARAM(4); |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 418 | |
| 419 | SetParams(BaseParams(block, ntaps_x, ntaps_y, avg), bd); |
| 420 | } |
| 421 | |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 422 | void RunOne(bool ref) { |
| 423 | const uint8_t *src = image_->GetSrcData(ref, false); |
Cherma Rajan A | a7be368 | 2018-03-20 10:00:51 +0530 | [diff] [blame] | 424 | uint8_t *dst = image_->GetDstData(ref, false); |
| 425 | convolve_params_.dst = image_->GetDst16Data(ref, false); |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 426 | const int src_stride = image_->src_stride(); |
| 427 | const int dst_stride = image_->dst_stride(); |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 428 | if (ref) { |
| 429 | av1_convolve_2d_scale_c(src, src_stride, dst, dst_stride, width_, height_, |
| 430 | &filter_x_.params_, &filter_y_.params_, subpel_x_, |
| 431 | kXStepQn, subpel_y_, kYStepQn, &convolve_params_); |
| 432 | } else { |
| 433 | tst_fun_(src, src_stride, dst, dst_stride, width_, height_, |
| 434 | &filter_x_.params_, &filter_y_.params_, subpel_x_, kXStepQn, |
| 435 | subpel_y_, kYStepQn, &convolve_params_); |
| 436 | } |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 437 | } |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 438 | |
| 439 | private: |
| 440 | LowbdConvolveFunc tst_fun_; |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 441 | }; |
| 442 | |
| 443 | const BlockDimension kBlockDim[] = { |
| 444 | make_tuple(2, 2), make_tuple(2, 4), make_tuple(4, 4), |
| 445 | make_tuple(4, 8), make_tuple(8, 4), make_tuple(8, 8), |
| 446 | make_tuple(8, 16), make_tuple(16, 8), make_tuple(16, 16), |
| 447 | make_tuple(16, 32), make_tuple(32, 16), make_tuple(32, 32), |
| 448 | make_tuple(32, 64), make_tuple(64, 32), make_tuple(64, 64), |
| 449 | make_tuple(64, 128), make_tuple(128, 64), make_tuple(128, 128), |
| 450 | }; |
| 451 | |
Cherma Rajan A | a7be368 | 2018-03-20 10:00:51 +0530 | [diff] [blame] | 452 | const NTaps kNTaps[] = { EIGHT_TAP }; |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 453 | |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 454 | TEST_P(LowBDConvolveScaleTest, Check) { Run(); } |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 455 | TEST_P(LowBDConvolveScaleTest, DISABLED_Speed) { SpeedTest(); } |
| 456 | |
Rupert Swarbrick | 98dc22b | 2017-10-04 09:45:51 +0100 | [diff] [blame] | 457 | INSTANTIATE_TEST_CASE_P( |
| 458 | SSE4_1, LowBDConvolveScaleTest, |
| 459 | ::testing::Combine(::testing::Values(av1_convolve_2d_scale_sse4_1), |
| 460 | ::testing::ValuesIn(kBlockDim), |
| 461 | ::testing::ValuesIn(kNTaps), ::testing::ValuesIn(kNTaps), |
| 462 | ::testing::Bool())); |
Rupert Swarbrick | 724d31e | 2017-10-04 09:42:46 +0100 | [diff] [blame] | 463 | |
Rupert Swarbrick | 724d31e | 2017-10-04 09:42:46 +0100 | [diff] [blame] | 464 | typedef void (*HighbdConvolveFunc)(const uint16_t *src, int src_stride, |
Cherma Rajan A | a7be368 | 2018-03-20 10:00:51 +0530 | [diff] [blame] | 465 | uint16_t *dst, int dst_stride, int w, int h, |
Rupert Swarbrick | 724d31e | 2017-10-04 09:42:46 +0100 | [diff] [blame] | 466 | InterpFilterParams *filter_params_x, |
| 467 | InterpFilterParams *filter_params_y, |
| 468 | const int subpel_x_qn, const int x_step_qn, |
| 469 | const int subpel_y_qn, const int y_step_qn, |
| 470 | ConvolveParams *conv_params, int bd); |
| 471 | |
| 472 | // Test parameter list: |
| 473 | // <tst_fun, dims, ntaps_x, ntaps_y, avg, bd> |
| 474 | typedef tuple<HighbdConvolveFunc, BlockDimension, NTaps, NTaps, bool, int> |
| 475 | HighBDParams; |
| 476 | |
| 477 | class HighBDConvolveScaleTest |
| 478 | : public ConvolveScaleTestBase<uint16_t>, |
| 479 | public ::testing::WithParamInterface<HighBDParams> { |
| 480 | public: |
| 481 | virtual ~HighBDConvolveScaleTest() {} |
| 482 | |
| 483 | void SetUp() { |
| 484 | tst_fun_ = GET_PARAM(0); |
| 485 | |
| 486 | const BlockDimension &block = GET_PARAM(1); |
| 487 | const NTaps ntaps_x = GET_PARAM(2); |
| 488 | const NTaps ntaps_y = GET_PARAM(3); |
| 489 | const bool avg = GET_PARAM(4); |
| 490 | const int bd = GET_PARAM(5); |
| 491 | |
| 492 | SetParams(BaseParams(block, ntaps_x, ntaps_y, avg), bd); |
| 493 | } |
| 494 | |
| 495 | void RunOne(bool ref) { |
| 496 | const uint16_t *src = image_->GetSrcData(ref, false); |
Cherma Rajan A | a7be368 | 2018-03-20 10:00:51 +0530 | [diff] [blame] | 497 | uint16_t *dst = image_->GetDstData(ref, false); |
| 498 | convolve_params_.dst = image_->GetDst16Data(ref, false); |
Rupert Swarbrick | 724d31e | 2017-10-04 09:42:46 +0100 | [diff] [blame] | 499 | const int src_stride = image_->src_stride(); |
| 500 | const int dst_stride = image_->dst_stride(); |
| 501 | |
| 502 | if (ref) { |
| 503 | av1_highbd_convolve_2d_scale_c( |
| 504 | src, src_stride, dst, dst_stride, width_, height_, &filter_x_.params_, |
| 505 | &filter_y_.params_, subpel_x_, kXStepQn, subpel_y_, kYStepQn, |
| 506 | &convolve_params_, bd_); |
| 507 | } else { |
| 508 | tst_fun_(src, src_stride, dst, dst_stride, width_, height_, |
| 509 | &filter_x_.params_, &filter_y_.params_, subpel_x_, kXStepQn, |
| 510 | subpel_y_, kYStepQn, &convolve_params_, bd_); |
| 511 | } |
| 512 | } |
| 513 | |
| 514 | private: |
| 515 | HighbdConvolveFunc tst_fun_; |
| 516 | }; |
| 517 | |
| 518 | const int kBDs[] = { 8, 10, 12 }; |
| 519 | |
| 520 | TEST_P(HighBDConvolveScaleTest, Check) { Run(); } |
| 521 | TEST_P(HighBDConvolveScaleTest, DISABLED_Speed) { SpeedTest(); } |
| 522 | |
| 523 | INSTANTIATE_TEST_CASE_P( |
| 524 | SSE4_1, HighBDConvolveScaleTest, |
| 525 | ::testing::Combine(::testing::Values(av1_highbd_convolve_2d_scale_sse4_1), |
| 526 | ::testing::ValuesIn(kBlockDim), |
| 527 | ::testing::ValuesIn(kNTaps), ::testing::ValuesIn(kNTaps), |
| 528 | ::testing::Bool(), ::testing::ValuesIn(kBDs))); |
Rupert Swarbrick | 1ea7ab4 | 2017-10-04 09:40:37 +0100 | [diff] [blame] | 529 | } // namespace |