|  | /* | 
|  | * Copyright (c) 2018, Alliance for Open Media. All rights reserved. | 
|  | * | 
|  | * This source code is subject to the terms of the BSD 2 Clause License and | 
|  | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License | 
|  | * was not distributed with this source code in the LICENSE file, you can | 
|  | * obtain it at www.aomedia.org/license/software. If the Alliance for Open | 
|  | * Media Patent License 1.0 was not distributed with this source code in the | 
|  | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. | 
|  | */ | 
|  |  | 
|  | #include <tuple> | 
|  | #include <vector> | 
|  |  | 
|  | #include "gtest/gtest.h" | 
|  |  | 
|  | #include "config/av1_rtcd.h" | 
|  |  | 
|  | #include "aom_ports/aom_timer.h" | 
|  | #include "av1/common/convolve.h" | 
|  | #include "av1/common/resize.h" | 
|  | #include "test/acm_random.h" | 
|  | #include "test/register_state_check.h" | 
|  | #include "test/util.h" | 
|  |  | 
|  | namespace { | 
|  | const int kTestIters = 10; | 
|  | const int kPerfIters = 1000; | 
|  |  | 
|  | const int kVPad = 32; | 
|  | const int kHPad = 32; | 
|  |  | 
|  | using libaom_test::ACMRandom; | 
|  | using std::make_tuple; | 
|  | using std::tuple; | 
|  |  | 
|  | // Inverse of av1_calculate_scaled_superres_size(): calculates the original | 
|  | // dimensions from the given scaled dimensions and the scale denominator. | 
|  | void calculate_unscaled_superres_size(int *width, int denom) { | 
|  | if (denom != SCALE_NUMERATOR) { | 
|  | // Note: av1_calculate_scaled_superres_size() rounds *up* after division | 
|  | // when the resulting dimensions are odd. So here, we round *down*. | 
|  | *width = *width * denom / SCALE_NUMERATOR; | 
|  | } | 
|  | } | 
|  |  | 
|  | template <typename Pixel> | 
|  | class TestImage { | 
|  | public: | 
|  | TestImage(int w_src, int h, int superres_denom, int x0, int bd) | 
|  | : w_src_(w_src), h_(h), superres_denom_(superres_denom), x0_(x0), | 
|  | bd_(bd) { | 
|  | assert(bd < 16); | 
|  | assert(bd <= 8 * static_cast<int>(sizeof(Pixel))); | 
|  | assert(9 <= superres_denom && superres_denom <= 16); | 
|  | assert(SCALE_NUMERATOR == 8); | 
|  | assert(0 <= x0_ && x0_ <= RS_SCALE_SUBPEL_MASK); | 
|  |  | 
|  | w_dst_ = w_src_; | 
|  | calculate_unscaled_superres_size(&w_dst_, superres_denom); | 
|  |  | 
|  | src_stride_ = ALIGN_POWER_OF_TWO(w_src_ + 2 * kHPad, 4); | 
|  | dst_stride_ = ALIGN_POWER_OF_TWO(w_dst_ + 2 * kHPad, 4); | 
|  |  | 
|  | // Allocate image data | 
|  | src_data_.resize(2 * src_block_size()); | 
|  | dst_data_.resize(2 * dst_block_size()); | 
|  | } | 
|  |  | 
|  | void Initialize(ACMRandom *rnd); | 
|  | void Check() const; | 
|  |  | 
|  | int src_stride() const { return src_stride_; } | 
|  | int dst_stride() const { return dst_stride_; } | 
|  |  | 
|  | int src_block_size() const { return (h_ + 2 * kVPad) * src_stride(); } | 
|  | int dst_block_size() const { return (h_ + 2 * kVPad) * dst_stride(); } | 
|  |  | 
|  | int src_width() const { return w_src_; } | 
|  | int dst_width() const { return w_dst_; } | 
|  | int height() const { return h_; } | 
|  | int x0() const { return x0_; } | 
|  |  | 
|  | const Pixel *GetSrcData(bool ref, bool borders) const { | 
|  | const Pixel *block = &src_data_[ref ? 0 : src_block_size()]; | 
|  | return borders ? block : block + kHPad + src_stride_ * kVPad; | 
|  | } | 
|  |  | 
|  | Pixel *GetDstData(bool ref, bool borders) { | 
|  | Pixel *block = &dst_data_[ref ? 0 : dst_block_size()]; | 
|  | return borders ? block : block + kHPad + dst_stride_ * kVPad; | 
|  | } | 
|  |  | 
|  | private: | 
|  | int w_src_, w_dst_, h_, superres_denom_, x0_, bd_; | 
|  | int src_stride_, dst_stride_; | 
|  |  | 
|  | std::vector<Pixel> src_data_; | 
|  | std::vector<Pixel> dst_data_; | 
|  | }; | 
|  |  | 
|  | template <typename Pixel> | 
|  | void FillEdge(ACMRandom *rnd, int num_pixels, int bd, bool trash, Pixel *data) { | 
|  | if (!trash) { | 
|  | memset(data, 0, sizeof(*data) * num_pixels); | 
|  | return; | 
|  | } | 
|  | const Pixel mask = (1 << bd) - 1; | 
|  | for (int i = 0; i < num_pixels; ++i) data[i] = rnd->Rand16() & mask; | 
|  | } | 
|  |  | 
|  | template <typename Pixel> | 
|  | void PrepBuffers(ACMRandom *rnd, int w, int h, int stride, int bd, | 
|  | bool trash_edges, Pixel *data) { | 
|  | assert(rnd); | 
|  | const Pixel mask = (1 << bd) - 1; | 
|  |  | 
|  | // Fill in the first buffer with random data | 
|  | // Top border | 
|  | FillEdge(rnd, stride * kVPad, bd, trash_edges, data); | 
|  | for (int r = 0; r < h; ++r) { | 
|  | Pixel *row_data = data + (kVPad + r) * stride; | 
|  | // Left border, contents, right border | 
|  | FillEdge(rnd, kHPad, bd, trash_edges, row_data); | 
|  | for (int c = 0; c < w; ++c) row_data[kHPad + c] = rnd->Rand16() & mask; | 
|  | FillEdge(rnd, kHPad, bd, trash_edges, row_data + kHPad + w); | 
|  | } | 
|  | // Bottom border | 
|  | FillEdge(rnd, stride * kVPad, bd, trash_edges, data + stride * (kVPad + h)); | 
|  |  | 
|  | const int bpp = sizeof(*data); | 
|  | const int block_elts = stride * (h + 2 * kVPad); | 
|  | const int block_size = bpp * block_elts; | 
|  |  | 
|  | // Now copy that to the second buffer | 
|  | memcpy(data + block_elts, data, block_size); | 
|  | } | 
|  |  | 
|  | template <typename Pixel> | 
|  | void TestImage<Pixel>::Initialize(ACMRandom *rnd) { | 
|  | PrepBuffers(rnd, w_src_, h_, src_stride_, bd_, false, &src_data_[0]); | 
|  | PrepBuffers(rnd, w_dst_, h_, dst_stride_, bd_, true, &dst_data_[0]); | 
|  | } | 
|  |  | 
|  | template <typename Pixel> | 
|  | void TestImage<Pixel>::Check() const { | 
|  | const int num_pixels = dst_block_size(); | 
|  | const Pixel *ref_dst = &dst_data_[0]; | 
|  | const Pixel *tst_dst = &dst_data_[num_pixels]; | 
|  |  | 
|  | // If memcmp returns 0, there's nothing to do. | 
|  | if (0 == memcmp(ref_dst, tst_dst, sizeof(*ref_dst) * num_pixels)) return; | 
|  |  | 
|  | // Otherwise, iterate through the buffer looking for differences, *ignoring | 
|  | // the edges* | 
|  | const int stride = dst_stride_; | 
|  | for (int r = kVPad; r < h_ + kVPad; ++r) { | 
|  | for (int c = kVPad; c < w_dst_ + kHPad; ++c) { | 
|  | const int32_t ref_value = ref_dst[r * stride + c]; | 
|  | const int32_t tst_value = tst_dst[r * stride + c]; | 
|  |  | 
|  | EXPECT_EQ(tst_value, ref_value) | 
|  | << "Error at row: " << (r - kVPad) << ", col: " << (c - kHPad) | 
|  | << ", superres_denom: " << superres_denom_ << ", height: " << h_ | 
|  | << ", src_width: " << w_src_ << ", dst_width: " << w_dst_ | 
|  | << ", x0: " << x0_; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | template <typename Pixel> | 
|  | class ConvolveHorizRSTestBase : public ::testing::Test { | 
|  | public: | 
|  | ConvolveHorizRSTestBase() : image_(nullptr) {} | 
|  | ~ConvolveHorizRSTestBase() override = default; | 
|  |  | 
|  | // Implemented by subclasses (SetUp depends on the parameters passed | 
|  | // in and RunOne depends on the function to be tested. These can't | 
|  | // be templated for low/high bit depths because they have different | 
|  | // numbers of parameters) | 
|  | void SetUp() override = 0; | 
|  | virtual void RunOne(bool ref) = 0; | 
|  |  | 
|  | protected: | 
|  | void SetBitDepth(int bd) { bd_ = bd; } | 
|  |  | 
|  | void CorrectnessTest() { | 
|  | ACMRandom rnd(ACMRandom::DeterministicSeed()); | 
|  | for (int i = 0; i < kTestIters; ++i) { | 
|  | for (int superres_denom = 9; superres_denom <= 16; superres_denom++) { | 
|  | // Get a random height between 512 and 767 | 
|  | int height = rnd.Rand8() + 512; | 
|  |  | 
|  | // Get a random src width between 128 and 383 | 
|  | int width_src = rnd.Rand8() + 128; | 
|  |  | 
|  | // x0 is normally calculated by get_upscale_convolve_x0 in | 
|  | // av1/common/resize.c. However, this test should work for | 
|  | // any value of x0 between 0 and RS_SCALE_SUBPEL_MASK | 
|  | // (inclusive), so we choose one at random. | 
|  | int x0 = rnd.Rand16() % (RS_SCALE_SUBPEL_MASK + 1); | 
|  |  | 
|  | image_ = | 
|  | new TestImage<Pixel>(width_src, height, superres_denom, x0, bd_); | 
|  | ASSERT_NE(image_, nullptr); | 
|  |  | 
|  | Prep(&rnd); | 
|  | RunOne(true); | 
|  | RunOne(false); | 
|  | image_->Check(); | 
|  |  | 
|  | delete image_; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void SpeedTest() { | 
|  | // Pick some specific parameters to test | 
|  | int height = 767; | 
|  | int width_src = 129; | 
|  | int superres_denom = 13; | 
|  | int x0 = RS_SCALE_SUBPEL_MASK >> 1; | 
|  |  | 
|  | image_ = new TestImage<Pixel>(width_src, height, superres_denom, x0, bd_); | 
|  | ASSERT_NE(image_, nullptr); | 
|  |  | 
|  | ACMRandom rnd(ACMRandom::DeterministicSeed()); | 
|  | Prep(&rnd); | 
|  |  | 
|  | aom_usec_timer ref_timer; | 
|  | aom_usec_timer_start(&ref_timer); | 
|  | for (int i = 0; i < kPerfIters; ++i) RunOne(true); | 
|  | aom_usec_timer_mark(&ref_timer); | 
|  | const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer); | 
|  |  | 
|  | aom_usec_timer tst_timer; | 
|  | aom_usec_timer_start(&tst_timer); | 
|  | for (int i = 0; i < kPerfIters; ++i) RunOne(false); | 
|  | aom_usec_timer_mark(&tst_timer); | 
|  | const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer); | 
|  |  | 
|  | std::cout << "[          ] C time = " << ref_time / 1000 | 
|  | << " ms, SIMD time = " << tst_time / 1000 << " ms\n"; | 
|  |  | 
|  | EXPECT_GT(ref_time, tst_time) | 
|  | << "Error: ConvolveHorizRSTest (Speed Test), SIMD slower than C.\n" | 
|  | << "C time: " << ref_time << " us\n" | 
|  | << "SIMD time: " << tst_time << " us\n"; | 
|  | } | 
|  |  | 
|  | void Prep(ACMRandom *rnd) { | 
|  | assert(rnd); | 
|  | image_->Initialize(rnd); | 
|  | } | 
|  |  | 
|  | int bd_; | 
|  | TestImage<Pixel> *image_; | 
|  | }; | 
|  |  | 
|  | using LowBDConvolveHorizRsFunc = void (*)(const uint8_t *src, int src_stride, | 
|  | uint8_t *dst, int dst_stride, int w, | 
|  | int h, const int16_t *x_filters, | 
|  | const int x0_qn, const int x_step_qn); | 
|  |  | 
|  | // Test parameter list: | 
|  | //  <tst_fun_> | 
|  | using LowBDParams = tuple<LowBDConvolveHorizRsFunc>; | 
|  |  | 
|  | class LowBDConvolveHorizRSTest | 
|  | : public ConvolveHorizRSTestBase<uint8_t>, | 
|  | public ::testing::WithParamInterface<LowBDParams> { | 
|  | public: | 
|  | ~LowBDConvolveHorizRSTest() override = default; | 
|  |  | 
|  | void SetUp() override { | 
|  | tst_fun_ = GET_PARAM(0); | 
|  | const int bd = 8; | 
|  | SetBitDepth(bd); | 
|  | } | 
|  |  | 
|  | void RunOne(bool ref) override { | 
|  | const uint8_t *src = image_->GetSrcData(ref, false); | 
|  | uint8_t *dst = image_->GetDstData(ref, false); | 
|  | const int src_stride = image_->src_stride(); | 
|  | const int dst_stride = image_->dst_stride(); | 
|  | const int width_src = image_->src_width(); | 
|  | const int width_dst = image_->dst_width(); | 
|  | const int height = image_->height(); | 
|  | const int x0_qn = image_->x0(); | 
|  |  | 
|  | const int32_t x_step_qn = | 
|  | av1_get_upscale_convolve_step(width_src, width_dst); | 
|  |  | 
|  | if (ref) { | 
|  | av1_convolve_horiz_rs_c(src, src_stride, dst, dst_stride, width_dst, | 
|  | height, &av1_resize_filter_normative[0][0], x0_qn, | 
|  | x_step_qn); | 
|  | } else { | 
|  | tst_fun_(src, src_stride, dst, dst_stride, width_dst, height, | 
|  | &av1_resize_filter_normative[0][0], x0_qn, x_step_qn); | 
|  | } | 
|  | } | 
|  |  | 
|  | private: | 
|  | LowBDConvolveHorizRsFunc tst_fun_; | 
|  | }; | 
|  |  | 
|  | TEST_P(LowBDConvolveHorizRSTest, Correctness) { CorrectnessTest(); } | 
|  | TEST_P(LowBDConvolveHorizRSTest, DISABLED_Speed) { SpeedTest(); } | 
|  |  | 
|  | INSTANTIATE_TEST_SUITE_P(C, LowBDConvolveHorizRSTest, | 
|  | ::testing::Values(av1_convolve_horiz_rs_c)); | 
|  |  | 
|  | #if HAVE_NEON | 
|  | INSTANTIATE_TEST_SUITE_P(NEON, LowBDConvolveHorizRSTest, | 
|  | ::testing::Values(av1_convolve_horiz_rs_neon)); | 
|  | #endif | 
|  |  | 
|  | #if HAVE_SSE4_1 | 
|  | INSTANTIATE_TEST_SUITE_P(SSE4_1, LowBDConvolveHorizRSTest, | 
|  | ::testing::Values(av1_convolve_horiz_rs_sse4_1)); | 
|  | #endif | 
|  |  | 
|  | #if CONFIG_AV1_HIGHBITDEPTH | 
|  | using HighBDConvolveHorizRsFunc = void (*)(const uint16_t *src, int src_stride, | 
|  | uint16_t *dst, int dst_stride, int w, | 
|  | int h, const int16_t *x_filters, | 
|  | const int x0_qn, const int x_step_qn, | 
|  | int bd); | 
|  |  | 
|  | // Test parameter list: | 
|  | //  <tst_fun_, bd_> | 
|  | using HighBDParams = tuple<HighBDConvolveHorizRsFunc, int>; | 
|  |  | 
|  | class HighBDConvolveHorizRSTest | 
|  | : public ConvolveHorizRSTestBase<uint16_t>, | 
|  | public ::testing::WithParamInterface<HighBDParams> { | 
|  | public: | 
|  | ~HighBDConvolveHorizRSTest() override = default; | 
|  |  | 
|  | void SetUp() override { | 
|  | tst_fun_ = GET_PARAM(0); | 
|  | const int bd = GET_PARAM(1); | 
|  | SetBitDepth(bd); | 
|  | } | 
|  |  | 
|  | void RunOne(bool ref) override { | 
|  | const uint16_t *src = image_->GetSrcData(ref, false); | 
|  | uint16_t *dst = image_->GetDstData(ref, false); | 
|  | const int src_stride = image_->src_stride(); | 
|  | const int dst_stride = image_->dst_stride(); | 
|  | const int width_src = image_->src_width(); | 
|  | const int width_dst = image_->dst_width(); | 
|  | const int height = image_->height(); | 
|  | const int x0_qn = image_->x0(); | 
|  |  | 
|  | const int32_t x_step_qn = | 
|  | av1_get_upscale_convolve_step(width_src, width_dst); | 
|  |  | 
|  | if (ref) { | 
|  | av1_highbd_convolve_horiz_rs_c( | 
|  | src, src_stride, dst, dst_stride, width_dst, height, | 
|  | &av1_resize_filter_normative[0][0], x0_qn, x_step_qn, bd_); | 
|  | } else { | 
|  | tst_fun_(src, src_stride, dst, dst_stride, width_dst, height, | 
|  | &av1_resize_filter_normative[0][0], x0_qn, x_step_qn, bd_); | 
|  | } | 
|  | } | 
|  |  | 
|  | private: | 
|  | HighBDConvolveHorizRsFunc tst_fun_; | 
|  | }; | 
|  |  | 
|  | const int kBDs[] = { 8, 10, 12 }; | 
|  |  | 
|  | TEST_P(HighBDConvolveHorizRSTest, Correctness) { CorrectnessTest(); } | 
|  | TEST_P(HighBDConvolveHorizRSTest, DISABLED_Speed) { SpeedTest(); } | 
|  |  | 
|  | INSTANTIATE_TEST_SUITE_P( | 
|  | C, HighBDConvolveHorizRSTest, | 
|  | ::testing::Combine(::testing::Values(av1_highbd_convolve_horiz_rs_c), | 
|  | ::testing::ValuesIn(kBDs))); | 
|  |  | 
|  | #if HAVE_SSE4_1 | 
|  | INSTANTIATE_TEST_SUITE_P( | 
|  | SSE4_1, HighBDConvolveHorizRSTest, | 
|  | ::testing::Combine(::testing::Values(av1_highbd_convolve_horiz_rs_sse4_1), | 
|  | ::testing::ValuesIn(kBDs))); | 
|  | #endif  // HAVE_SSE4_1 | 
|  |  | 
|  | #if HAVE_NEON | 
|  | INSTANTIATE_TEST_SUITE_P( | 
|  | NEON, HighBDConvolveHorizRSTest, | 
|  | ::testing::Combine(::testing::Values(av1_highbd_convolve_horiz_rs_neon), | 
|  | ::testing::ValuesIn(kBDs))); | 
|  | #endif  // HAVE_NEON | 
|  |  | 
|  | #endif  // CONFIG_AV1_HIGHBITDEPTH | 
|  |  | 
|  | }  // namespace |