| /* | 
 |  * Copyright (c) 2016, Alliance for Open Media. All rights reserved | 
 |  * | 
 |  * This source code is subject to the terms of the BSD 2 Clause License and | 
 |  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License | 
 |  * was not distributed with this source code in the LICENSE file, you can | 
 |  * obtain it at www.aomedia.org/license/software. If the Alliance for Open | 
 |  * Media Patent License 1.0 was not distributed with this source code in the | 
 |  * PATENTS file, you can obtain it at www.aomedia.org/license/patent. | 
 |  */ | 
 |  | 
 | #include <ctime> | 
 | #include <tuple> | 
 |  | 
 | #include "third_party/googletest/src/googletest/include/gtest/gtest.h" | 
 |  | 
 | #include "config/av1_rtcd.h" | 
 |  | 
 | #include "test/acm_random.h" | 
 | #include "test/register_state_check.h" | 
 | #include "test/util.h" | 
 |  | 
 | #include "aom_ports/aom_timer.h" | 
 | #include "av1/common/mv.h" | 
 | #include "av1/common/restoration.h" | 
 |  | 
 | namespace { | 
 |  | 
 | using libaom_test::ACMRandom; | 
 | using std::make_tuple; | 
 | using std::tuple; | 
 |  | 
 | typedef void (*SgrFunc)(const uint8_t *dat8, int width, int height, int stride, | 
 |                         int eps, const int *xqd, uint8_t *dst8, int dst_stride, | 
 |                         int32_t *tmpbuf, int bit_depth, int highbd); | 
 |  | 
 | // Test parameter list: | 
 | //  <tst_fun_> | 
 | typedef tuple<SgrFunc> FilterTestParam; | 
 |  | 
 | class AV1SelfguidedFilterTest | 
 |     : public ::testing::TestWithParam<FilterTestParam> { | 
 |  public: | 
 |   virtual ~AV1SelfguidedFilterTest() {} | 
 |   virtual void SetUp() {} | 
 |  | 
 |   virtual void TearDown() {} | 
 |  | 
 |  protected: | 
 |   void RunSpeedTest() { | 
 |     tst_fun_ = GET_PARAM(0); | 
 |     const int pu_width = RESTORATION_PROC_UNIT_SIZE; | 
 |     const int pu_height = RESTORATION_PROC_UNIT_SIZE; | 
 |     const int width = 256, height = 256, stride = 288, out_stride = 288; | 
 |     const int NUM_ITERS = 2000; | 
 |     int i, j, k; | 
 |  | 
 |     uint8_t *input_ = | 
 |         (uint8_t *)aom_memalign(32, stride * (height + 32) * sizeof(uint8_t)); | 
 |     uint8_t *output_ = (uint8_t *)aom_memalign( | 
 |         32, out_stride * (height + 32) * sizeof(uint8_t)); | 
 |     int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE); | 
 |     uint8_t *input = input_ + stride * 16 + 16; | 
 |     uint8_t *output = output_ + out_stride * 16 + 16; | 
 |  | 
 |     ACMRandom rnd(ACMRandom::DeterministicSeed()); | 
 |  | 
 |     for (i = -16; i < height + 16; ++i) | 
 |       for (j = -16; j < width + 16; ++j) | 
 |         input[i * stride + j] = rnd.Rand16() & 0xFF; | 
 |  | 
 |     int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - | 
 |                                                         SGRPROJ_PRJ_MIN0), | 
 |                    SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - | 
 |                                                         SGRPROJ_PRJ_MIN1) }; | 
 |     // Fix a parameter set, since the speed depends slightly on r. | 
 |     // Change this to test different combinations of values of r. | 
 |     int eps = 15; | 
 |  | 
 |     av1_loop_restoration_precal(); | 
 |  | 
 |     aom_usec_timer ref_timer; | 
 |     aom_usec_timer_start(&ref_timer); | 
 |     for (i = 0; i < NUM_ITERS; ++i) { | 
 |       for (k = 0; k < height; k += pu_height) | 
 |         for (j = 0; j < width; j += pu_width) { | 
 |           int w = AOMMIN(pu_width, width - j); | 
 |           int h = AOMMIN(pu_height, height - k); | 
 |           uint8_t *input_p = input + k * stride + j; | 
 |           uint8_t *output_p = output + k * out_stride + j; | 
 |           av1_apply_selfguided_restoration_c(input_p, w, h, stride, eps, xqd, | 
 |                                              output_p, out_stride, tmpbuf, 8, | 
 |                                              0); | 
 |         } | 
 |     } | 
 |     aom_usec_timer_mark(&ref_timer); | 
 |     const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer); | 
 |  | 
 |     aom_usec_timer tst_timer; | 
 |     aom_usec_timer_start(&tst_timer); | 
 |     for (i = 0; i < NUM_ITERS; ++i) { | 
 |       for (k = 0; k < height; k += pu_height) | 
 |         for (j = 0; j < width; j += pu_width) { | 
 |           int w = AOMMIN(pu_width, width - j); | 
 |           int h = AOMMIN(pu_height, height - k); | 
 |           uint8_t *input_p = input + k * stride + j; | 
 |           uint8_t *output_p = output + k * out_stride + j; | 
 |           tst_fun_(input_p, w, h, stride, eps, xqd, output_p, out_stride, | 
 |                    tmpbuf, 8, 0); | 
 |         } | 
 |     } | 
 |     aom_usec_timer_mark(&tst_timer); | 
 |     const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer); | 
 |  | 
 |     std::cout << "[          ] C time = " << ref_time / 1000 | 
 |               << " ms, SIMD time = " << tst_time / 1000 << " ms\n"; | 
 |  | 
 |     EXPECT_GT(ref_time, tst_time) | 
 |         << "Error: AV1SelfguidedFilterTest.SpeedTest, SIMD slower than C.\n" | 
 |         << "C time: " << ref_time << " us\n" | 
 |         << "SIMD time: " << tst_time << " us\n"; | 
 |  | 
 |     aom_free(input_); | 
 |     aom_free(output_); | 
 |     aom_free(tmpbuf); | 
 |   } | 
 |  | 
 |   void RunCorrectnessTest() { | 
 |     tst_fun_ = GET_PARAM(0); | 
 |     const int pu_width = RESTORATION_PROC_UNIT_SIZE; | 
 |     const int pu_height = RESTORATION_PROC_UNIT_SIZE; | 
 |     // Set the maximum width/height to test here. We actually test a small | 
 |     // range of sizes *up to* this size, so that we can check, eg., | 
 |     // the behaviour on tiles which are not a multiple of 4 wide. | 
 |     const int max_w = 260, max_h = 260, stride = 672, out_stride = 672; | 
 |     const int NUM_ITERS = 81; | 
 |     int i, j, k; | 
 |  | 
 |     uint8_t *input_ = | 
 |         (uint8_t *)aom_memalign(32, stride * (max_h + 32) * sizeof(uint8_t)); | 
 |     uint8_t *output_ = (uint8_t *)aom_memalign( | 
 |         32, out_stride * (max_h + 32) * sizeof(uint8_t)); | 
 |     uint8_t *output2_ = (uint8_t *)aom_memalign( | 
 |         32, out_stride * (max_h + 32) * sizeof(uint8_t)); | 
 |     int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE); | 
 |  | 
 |     uint8_t *input = input_ + stride * 16 + 16; | 
 |     uint8_t *output = output_ + out_stride * 16 + 16; | 
 |     uint8_t *output2 = output2_ + out_stride * 16 + 16; | 
 |  | 
 |     ACMRandom rnd(ACMRandom::DeterministicSeed()); | 
 |  | 
 |     av1_loop_restoration_precal(); | 
 |  | 
 |     for (i = 0; i < NUM_ITERS; ++i) { | 
 |       for (j = -16; j < max_h + 16; ++j) | 
 |         for (k = -16; k < max_w + 16; ++k) | 
 |           input[j * stride + k] = rnd.Rand16() & 0xFF; | 
 |  | 
 |       int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - | 
 |                                                           SGRPROJ_PRJ_MIN0), | 
 |                      SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - | 
 |                                                           SGRPROJ_PRJ_MIN1) }; | 
 |       int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS); | 
 |  | 
 |       // Test various tile sizes around 256x256 | 
 |       int test_w = max_w - (i / 9); | 
 |       int test_h = max_h - (i % 9); | 
 |  | 
 |       for (k = 0; k < test_h; k += pu_height) | 
 |         for (j = 0; j < test_w; j += pu_width) { | 
 |           int w = AOMMIN(pu_width, test_w - j); | 
 |           int h = AOMMIN(pu_height, test_h - k); | 
 |           uint8_t *input_p = input + k * stride + j; | 
 |           uint8_t *output_p = output + k * out_stride + j; | 
 |           uint8_t *output2_p = output2 + k * out_stride + j; | 
 |           tst_fun_(input_p, w, h, stride, eps, xqd, output_p, out_stride, | 
 |                    tmpbuf, 8, 0); | 
 |           av1_apply_selfguided_restoration_c(input_p, w, h, stride, eps, xqd, | 
 |                                              output2_p, out_stride, tmpbuf, 8, | 
 |                                              0); | 
 |         } | 
 |  | 
 |       for (j = 0; j < test_h; ++j) | 
 |         for (k = 0; k < test_w; ++k) { | 
 |           ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]); | 
 |         } | 
 |     } | 
 |  | 
 |     aom_free(input_); | 
 |     aom_free(output_); | 
 |     aom_free(output2_); | 
 |     aom_free(tmpbuf); | 
 |   } | 
 |  | 
 |  private: | 
 |   SgrFunc tst_fun_; | 
 | }; | 
 | GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1SelfguidedFilterTest); | 
 |  | 
 | TEST_P(AV1SelfguidedFilterTest, DISABLED_SpeedTest) { RunSpeedTest(); } | 
 | TEST_P(AV1SelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); } | 
 |  | 
 | #if HAVE_SSE4_1 | 
 | INSTANTIATE_TEST_SUITE_P( | 
 |     SSE4_1, AV1SelfguidedFilterTest, | 
 |     ::testing::Values(av1_apply_selfguided_restoration_sse4_1)); | 
 | #endif | 
 |  | 
 | #if HAVE_AVX2 | 
 | INSTANTIATE_TEST_SUITE_P( | 
 |     AVX2, AV1SelfguidedFilterTest, | 
 |     ::testing::Values(av1_apply_selfguided_restoration_avx2)); | 
 | #endif | 
 |  | 
 | #if HAVE_NEON | 
 | INSTANTIATE_TEST_SUITE_P( | 
 |     NEON, AV1SelfguidedFilterTest, | 
 |     ::testing::Values(av1_apply_selfguided_restoration_neon)); | 
 | #endif | 
 |  | 
 | #if CONFIG_AV1_HIGHBITDEPTH | 
 | // Test parameter list: | 
 | //  <tst_fun_, bit_depth> | 
 | typedef tuple<SgrFunc, int> HighbdFilterTestParam; | 
 |  | 
 | class AV1HighbdSelfguidedFilterTest | 
 |     : public ::testing::TestWithParam<HighbdFilterTestParam> { | 
 |  public: | 
 |   virtual ~AV1HighbdSelfguidedFilterTest() {} | 
 |   virtual void SetUp() {} | 
 |  | 
 |   virtual void TearDown() {} | 
 |  | 
 |  protected: | 
 |   void RunSpeedTest() { | 
 |     tst_fun_ = GET_PARAM(0); | 
 |     const int pu_width = RESTORATION_PROC_UNIT_SIZE; | 
 |     const int pu_height = RESTORATION_PROC_UNIT_SIZE; | 
 |     const int width = 256, height = 256, stride = 288, out_stride = 288; | 
 |     const int NUM_ITERS = 2000; | 
 |     int i, j, k; | 
 |     int bit_depth = GET_PARAM(1); | 
 |     int mask = (1 << bit_depth) - 1; | 
 |  | 
 |     uint16_t *input_ = | 
 |         (uint16_t *)aom_memalign(32, stride * (height + 32) * sizeof(uint16_t)); | 
 |     uint16_t *output_ = (uint16_t *)aom_memalign( | 
 |         32, out_stride * (height + 32) * sizeof(uint16_t)); | 
 |     int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE); | 
 |     uint16_t *input = input_ + stride * 16 + 16; | 
 |     uint16_t *output = output_ + out_stride * 16 + 16; | 
 |  | 
 |     ACMRandom rnd(ACMRandom::DeterministicSeed()); | 
 |  | 
 |     for (i = -16; i < height + 16; ++i) | 
 |       for (j = -16; j < width + 16; ++j) | 
 |         input[i * stride + j] = rnd.Rand16() & mask; | 
 |  | 
 |     int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - | 
 |                                                         SGRPROJ_PRJ_MIN0), | 
 |                    SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - | 
 |                                                         SGRPROJ_PRJ_MIN1) }; | 
 |     // Fix a parameter set, since the speed depends slightly on r. | 
 |     // Change this to test different combinations of values of r. | 
 |     int eps = 15; | 
 |  | 
 |     av1_loop_restoration_precal(); | 
 |  | 
 |     aom_usec_timer ref_timer; | 
 |     aom_usec_timer_start(&ref_timer); | 
 |     for (i = 0; i < NUM_ITERS; ++i) { | 
 |       for (k = 0; k < height; k += pu_height) | 
 |         for (j = 0; j < width; j += pu_width) { | 
 |           int w = AOMMIN(pu_width, width - j); | 
 |           int h = AOMMIN(pu_height, height - k); | 
 |           uint16_t *input_p = input + k * stride + j; | 
 |           uint16_t *output_p = output + k * out_stride + j; | 
 |           av1_apply_selfguided_restoration_c( | 
 |               CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd, | 
 |               CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth, 1); | 
 |         } | 
 |     } | 
 |     aom_usec_timer_mark(&ref_timer); | 
 |     const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer); | 
 |  | 
 |     aom_usec_timer tst_timer; | 
 |     aom_usec_timer_start(&tst_timer); | 
 |     for (i = 0; i < NUM_ITERS; ++i) { | 
 |       for (k = 0; k < height; k += pu_height) | 
 |         for (j = 0; j < width; j += pu_width) { | 
 |           int w = AOMMIN(pu_width, width - j); | 
 |           int h = AOMMIN(pu_height, height - k); | 
 |           uint16_t *input_p = input + k * stride + j; | 
 |           uint16_t *output_p = output + k * out_stride + j; | 
 |           tst_fun_(CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd, | 
 |                    CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth, | 
 |                    1); | 
 |         } | 
 |     } | 
 |     aom_usec_timer_mark(&tst_timer); | 
 |     const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer); | 
 |  | 
 |     std::cout << "[          ] C time = " << ref_time / 1000 | 
 |               << " ms, SIMD time = " << tst_time / 1000 << " ms\n"; | 
 |  | 
 |     EXPECT_GT(ref_time, tst_time) | 
 |         << "Error: AV1HighbdSelfguidedFilterTest.SpeedTest, SIMD slower than " | 
 |            "C.\n" | 
 |         << "C time: " << ref_time << " us\n" | 
 |         << "SIMD time: " << tst_time << " us\n"; | 
 |  | 
 |     aom_free(input_); | 
 |     aom_free(output_); | 
 |     aom_free(tmpbuf); | 
 |   } | 
 |  | 
 |   void RunCorrectnessTest() { | 
 |     tst_fun_ = GET_PARAM(0); | 
 |     const int pu_width = RESTORATION_PROC_UNIT_SIZE; | 
 |     const int pu_height = RESTORATION_PROC_UNIT_SIZE; | 
 |     // Set the maximum width/height to test here. We actually test a small | 
 |     // range of sizes *up to* this size, so that we can check, eg., | 
 |     // the behaviour on tiles which are not a multiple of 4 wide. | 
 |     const int max_w = 260, max_h = 260, stride = 672, out_stride = 672; | 
 |     const int NUM_ITERS = 81; | 
 |     int i, j, k; | 
 |     int bit_depth = GET_PARAM(1); | 
 |     int mask = (1 << bit_depth) - 1; | 
 |  | 
 |     uint16_t *input_ = | 
 |         (uint16_t *)aom_memalign(32, stride * (max_h + 32) * sizeof(uint16_t)); | 
 |     uint16_t *output_ = (uint16_t *)aom_memalign( | 
 |         32, out_stride * (max_h + 32) * sizeof(uint16_t)); | 
 |     uint16_t *output2_ = (uint16_t *)aom_memalign( | 
 |         32, out_stride * (max_h + 32) * sizeof(uint16_t)); | 
 |     int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE); | 
 |  | 
 |     uint16_t *input = input_ + stride * 16 + 16; | 
 |     uint16_t *output = output_ + out_stride * 16 + 16; | 
 |     uint16_t *output2 = output2_ + out_stride * 16 + 16; | 
 |  | 
 |     ACMRandom rnd(ACMRandom::DeterministicSeed()); | 
 |  | 
 |     av1_loop_restoration_precal(); | 
 |  | 
 |     for (i = 0; i < NUM_ITERS; ++i) { | 
 |       for (j = -16; j < max_h + 16; ++j) | 
 |         for (k = -16; k < max_w + 16; ++k) | 
 |           input[j * stride + k] = rnd.Rand16() & mask; | 
 |  | 
 |       int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - | 
 |                                                           SGRPROJ_PRJ_MIN0), | 
 |                      SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - | 
 |                                                           SGRPROJ_PRJ_MIN1) }; | 
 |       int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS); | 
 |  | 
 |       // Test various tile sizes around 256x256 | 
 |       int test_w = max_w - (i / 9); | 
 |       int test_h = max_h - (i % 9); | 
 |  | 
 |       for (k = 0; k < test_h; k += pu_height) | 
 |         for (j = 0; j < test_w; j += pu_width) { | 
 |           int w = AOMMIN(pu_width, test_w - j); | 
 |           int h = AOMMIN(pu_height, test_h - k); | 
 |           uint16_t *input_p = input + k * stride + j; | 
 |           uint16_t *output_p = output + k * out_stride + j; | 
 |           uint16_t *output2_p = output2 + k * out_stride + j; | 
 |           tst_fun_(CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd, | 
 |                    CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth, | 
 |                    1); | 
 |           av1_apply_selfguided_restoration_c( | 
 |               CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd, | 
 |               CONVERT_TO_BYTEPTR(output2_p), out_stride, tmpbuf, bit_depth, 1); | 
 |         } | 
 |  | 
 |       for (j = 0; j < test_h; ++j) | 
 |         for (k = 0; k < test_w; ++k) | 
 |           ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]); | 
 |     } | 
 |  | 
 |     aom_free(input_); | 
 |     aom_free(output_); | 
 |     aom_free(output2_); | 
 |     aom_free(tmpbuf); | 
 |   } | 
 |  | 
 |  private: | 
 |   SgrFunc tst_fun_; | 
 | }; | 
 | GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdSelfguidedFilterTest); | 
 |  | 
 | TEST_P(AV1HighbdSelfguidedFilterTest, DISABLED_SpeedTest) { RunSpeedTest(); } | 
 | TEST_P(AV1HighbdSelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); } | 
 |  | 
 | #if HAVE_SSE4_1 | 
 | const int highbd_params_sse4_1[] = { 8, 10, 12 }; | 
 | INSTANTIATE_TEST_SUITE_P( | 
 |     SSE4_1, AV1HighbdSelfguidedFilterTest, | 
 |     ::testing::Combine( | 
 |         ::testing::Values(av1_apply_selfguided_restoration_sse4_1), | 
 |         ::testing::ValuesIn(highbd_params_sse4_1))); | 
 | #endif | 
 |  | 
 | #if HAVE_AVX2 | 
 | const int highbd_params_avx2[] = { 8, 10, 12 }; | 
 | INSTANTIATE_TEST_SUITE_P( | 
 |     AVX2, AV1HighbdSelfguidedFilterTest, | 
 |     ::testing::Combine(::testing::Values(av1_apply_selfguided_restoration_avx2), | 
 |                        ::testing::ValuesIn(highbd_params_avx2))); | 
 | #endif | 
 |  | 
 | #if HAVE_NEON | 
 | const int highbd_params_neon[] = { 8, 10, 12 }; | 
 | INSTANTIATE_TEST_SUITE_P( | 
 |     NEON, AV1HighbdSelfguidedFilterTest, | 
 |     ::testing::Combine(::testing::Values(av1_apply_selfguided_restoration_neon), | 
 |                        ::testing::ValuesIn(highbd_params_neon))); | 
 | #endif | 
 | #endif  // CONFIG_AV1_HIGHBITDEPTH | 
 | }  // namespace |