|  | /* | 
|  | * Copyright (c) 2016, Alliance for Open Media. All rights reserved. | 
|  | * | 
|  | * This source code is subject to the terms of the BSD 2 Clause License and | 
|  | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License | 
|  | * was not distributed with this source code in the LICENSE file, you can | 
|  | * obtain it at www.aomedia.org/license/software. If the Alliance for Open | 
|  | * Media Patent License 1.0 was not distributed with this source code in the | 
|  | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. | 
|  | */ | 
|  |  | 
|  | #include <cmath> | 
|  | #include <cstdlib> | 
|  | #include <string> | 
|  | #include <tuple> | 
|  |  | 
|  | #include "gtest/gtest.h" | 
|  |  | 
|  | #include "config/aom_config.h" | 
|  | #include "config/av1_rtcd.h" | 
|  |  | 
|  | #include "test/acm_random.h" | 
|  | #include "test/register_state_check.h" | 
|  | #include "test/util.h" | 
|  | #include "av1/common/entropy.h" | 
|  | #include "aom/aom_codec.h" | 
|  | #include "aom/aom_integer.h" | 
|  |  | 
|  | using libaom_test::ACMRandom; | 
|  |  | 
|  | namespace { | 
|  | const int kNumIterations = 1000; | 
|  |  | 
|  | using ErrorBlockFunc = int64_t (*)(const tran_low_t *coeff, | 
|  | const tran_low_t *dqcoeff, | 
|  | intptr_t block_size, int64_t *ssz, int bps); | 
|  |  | 
|  | using ErrorBlockFunc8Bits = int64_t (*)(const tran_low_t *coeff, | 
|  | const tran_low_t *dqcoeff, | 
|  | intptr_t block_size, int64_t *ssz); | 
|  |  | 
|  | using ErrorBlockLpFunc = int64_t (*)(const int16_t *coeff, | 
|  | const int16_t *dqcoeff, | 
|  | intptr_t block_size); | 
|  |  | 
|  | using ErrorBlockParam = | 
|  | std::tuple<ErrorBlockFunc, ErrorBlockFunc, aom_bit_depth_t>; | 
|  |  | 
|  | template <ErrorBlockFunc8Bits fn> | 
|  | int64_t BlockError8BitWrapper(const tran_low_t *coeff, | 
|  | const tran_low_t *dqcoeff, intptr_t block_size, | 
|  | int64_t *ssz, int bps) { | 
|  | EXPECT_EQ(bps, 8); | 
|  | return fn(coeff, dqcoeff, block_size, ssz); | 
|  | } | 
|  |  | 
|  | template <ErrorBlockLpFunc fn> | 
|  | int64_t BlockErrorLpWrapper(const tran_low_t *coeff, const tran_low_t *dqcoeff, | 
|  | intptr_t block_size, int64_t *ssz, int bps) { | 
|  | EXPECT_EQ(bps, 8); | 
|  | *ssz = -1; | 
|  | return fn(reinterpret_cast<const int16_t *>(coeff), | 
|  | reinterpret_cast<const int16_t *>(dqcoeff), block_size); | 
|  | } | 
|  |  | 
|  | class ErrorBlockTest : public ::testing::TestWithParam<ErrorBlockParam> { | 
|  | public: | 
|  | ~ErrorBlockTest() override = default; | 
|  | void SetUp() override { | 
|  | error_block_op_ = GET_PARAM(0); | 
|  | ref_error_block_op_ = GET_PARAM(1); | 
|  | bit_depth_ = GET_PARAM(2); | 
|  | } | 
|  |  | 
|  | protected: | 
|  | aom_bit_depth_t bit_depth_; | 
|  | ErrorBlockFunc error_block_op_; | 
|  | ErrorBlockFunc ref_error_block_op_; | 
|  | }; | 
|  | GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(ErrorBlockTest); | 
|  |  | 
|  | TEST_P(ErrorBlockTest, OperationCheck) { | 
|  | ACMRandom rnd(ACMRandom::DeterministicSeed()); | 
|  | DECLARE_ALIGNED(16, tran_low_t, coeff[4096]); | 
|  | DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]); | 
|  | int err_count_total = 0; | 
|  | int first_failure = -1; | 
|  | intptr_t block_size; | 
|  | int64_t ssz; | 
|  | int64_t ret; | 
|  | int64_t ref_ssz; | 
|  | int64_t ref_ret; | 
|  | const int msb = bit_depth_ + 8 - 1; | 
|  | for (int i = 0; i < kNumIterations; ++i) { | 
|  | int err_count = 0; | 
|  | block_size = 16 << (i % 9);  // All block sizes from 4x4, 8x4 ..64x64 | 
|  | for (int j = 0; j < block_size; j++) { | 
|  | // coeff and dqcoeff will always have at least the same sign, and this | 
|  | // can be used for optimization, so generate test input precisely. | 
|  | if (rnd(2)) { | 
|  | // Positive number | 
|  | coeff[j] = rnd(1 << msb); | 
|  | dqcoeff[j] = rnd(1 << msb); | 
|  | } else { | 
|  | // Negative number | 
|  | coeff[j] = -rnd(1 << msb); | 
|  | dqcoeff[j] = -rnd(1 << msb); | 
|  | } | 
|  | } | 
|  | ref_ret = | 
|  | ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz, bit_depth_); | 
|  | API_REGISTER_STATE_CHECK( | 
|  | ret = error_block_op_(coeff, dqcoeff, block_size, &ssz, bit_depth_)); | 
|  | err_count += (ref_ret != ret) | (ref_ssz != ssz); | 
|  | if (err_count && !err_count_total) { | 
|  | first_failure = i; | 
|  | } | 
|  | err_count_total += err_count; | 
|  | } | 
|  | EXPECT_EQ(0, err_count_total) | 
|  | << "Error: Error Block Test, C output doesn't match optimized output. " | 
|  | << "First failed at test case " << first_failure; | 
|  | } | 
|  |  | 
|  | TEST_P(ErrorBlockTest, ExtremeValues) { | 
|  | ACMRandom rnd(ACMRandom::DeterministicSeed()); | 
|  | DECLARE_ALIGNED(16, tran_low_t, coeff[4096]); | 
|  | DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]); | 
|  | int err_count_total = 0; | 
|  | int first_failure = -1; | 
|  | intptr_t block_size; | 
|  | int64_t ssz; | 
|  | int64_t ret; | 
|  | int64_t ref_ssz; | 
|  | int64_t ref_ret; | 
|  | const int msb = bit_depth_ + 8 - 1; | 
|  | int max_val = ((1 << msb) - 1); | 
|  | for (int i = 0; i < kNumIterations; ++i) { | 
|  | int err_count = 0; | 
|  | int k = (i / 9) % 9; | 
|  |  | 
|  | // Change the maximum coeff value, to test different bit boundaries | 
|  | if (k == 8 && (i % 9) == 0) { | 
|  | max_val >>= 1; | 
|  | } | 
|  | block_size = 16 << (i % 9);  // All block sizes from 4x4, 8x4 ..64x64 | 
|  | for (int j = 0; j < block_size; j++) { | 
|  | if (k < 4) { | 
|  | // Test at positive maximum values | 
|  | coeff[j] = k % 2 ? max_val : 0; | 
|  | dqcoeff[j] = (k >> 1) % 2 ? max_val : 0; | 
|  | } else if (k < 8) { | 
|  | // Test at negative maximum values | 
|  | coeff[j] = k % 2 ? -max_val : 0; | 
|  | dqcoeff[j] = (k >> 1) % 2 ? -max_val : 0; | 
|  | } else { | 
|  | if (rnd(2)) { | 
|  | // Positive number | 
|  | coeff[j] = rnd(1 << 14); | 
|  | dqcoeff[j] = rnd(1 << 14); | 
|  | } else { | 
|  | // Negative number | 
|  | coeff[j] = -rnd(1 << 14); | 
|  | dqcoeff[j] = -rnd(1 << 14); | 
|  | } | 
|  | } | 
|  | } | 
|  | ref_ret = | 
|  | ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz, bit_depth_); | 
|  | API_REGISTER_STATE_CHECK( | 
|  | ret = error_block_op_(coeff, dqcoeff, block_size, &ssz, bit_depth_)); | 
|  | err_count += (ref_ret != ret) | (ref_ssz != ssz); | 
|  | if (err_count && !err_count_total) { | 
|  | first_failure = i; | 
|  | } | 
|  | err_count_total += err_count; | 
|  | } | 
|  | EXPECT_EQ(0, err_count_total) | 
|  | << "Error: Error Block Test, C output doesn't match optimized output. " | 
|  | << "First failed at test case " << first_failure; | 
|  | } | 
|  |  | 
|  | TEST_P(ErrorBlockTest, DISABLED_Speed) { | 
|  | ACMRandom rnd(ACMRandom::DeterministicSeed()); | 
|  | DECLARE_ALIGNED(16, tran_low_t, coeff[4096]); | 
|  | DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]); | 
|  | intptr_t block_size; | 
|  | int64_t ssz; | 
|  | int num_iters = 100000; | 
|  | int64_t ref_ssz; | 
|  | const int msb = bit_depth_ + 8 - 1; | 
|  | for (int i = 0; i < 9; ++i) { | 
|  | block_size = 16 << (i % 9);  // All block sizes from 4x4, 8x4 ..64x64 | 
|  | for (int k = 0; k < 9; k++) { | 
|  | for (int j = 0; j < block_size; j++) { | 
|  | if (k < 5) { | 
|  | if (rnd(2)) { | 
|  | // Positive number | 
|  | coeff[j] = rnd(1 << msb); | 
|  | dqcoeff[j] = rnd(1 << msb); | 
|  | } else { | 
|  | // Negative number | 
|  | coeff[j] = -rnd(1 << msb); | 
|  | dqcoeff[j] = -rnd(1 << msb); | 
|  | } | 
|  | } else { | 
|  | if (rnd(2)) { | 
|  | // Positive number | 
|  | coeff[j] = rnd(1 << 14); | 
|  | dqcoeff[j] = rnd(1 << 14); | 
|  | } else { | 
|  | // Negative number | 
|  | coeff[j] = -rnd(1 << 14); | 
|  | dqcoeff[j] = -rnd(1 << 14); | 
|  | } | 
|  | } | 
|  | } | 
|  | aom_usec_timer ref_timer, test_timer; | 
|  |  | 
|  | aom_usec_timer_start(&ref_timer); | 
|  | for (int iter = 0; iter < num_iters; ++iter) { | 
|  | ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz, bit_depth_); | 
|  | } | 
|  | aom_usec_timer_mark(&ref_timer); | 
|  | const int elapsed_time_c = | 
|  | static_cast<int>(aom_usec_timer_elapsed(&ref_timer)); | 
|  |  | 
|  | aom_usec_timer_start(&test_timer); | 
|  | for (int iter = 0; iter < num_iters; ++iter) { | 
|  | error_block_op_(coeff, dqcoeff, block_size, &ssz, bit_depth_); | 
|  | } | 
|  | aom_usec_timer_mark(&test_timer); | 
|  |  | 
|  | const int elapsed_time_simd = | 
|  | static_cast<int>(aom_usec_timer_elapsed(&test_timer)); | 
|  |  | 
|  | printf( | 
|  | " c_time=%d \t simd_time=%d \t " | 
|  | "gain=%d \n", | 
|  | elapsed_time_c, elapsed_time_simd, | 
|  | (elapsed_time_c / elapsed_time_simd)); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | using std::make_tuple; | 
|  |  | 
|  | #if HAVE_SSE2 | 
|  | const ErrorBlockParam kErrorBlockTestParamsSse2[] = { | 
|  | #if CONFIG_AV1_HIGHBITDEPTH | 
|  | make_tuple(&av1_highbd_block_error_sse2, &av1_highbd_block_error_c, | 
|  | AOM_BITS_10), | 
|  | make_tuple(&av1_highbd_block_error_sse2, &av1_highbd_block_error_c, | 
|  | AOM_BITS_12), | 
|  | make_tuple(&av1_highbd_block_error_sse2, &av1_highbd_block_error_c, | 
|  | AOM_BITS_8), | 
|  | #endif | 
|  | make_tuple(&BlockError8BitWrapper<av1_block_error_sse2>, | 
|  | &BlockError8BitWrapper<av1_block_error_c>, AOM_BITS_8), | 
|  | make_tuple(&BlockErrorLpWrapper<av1_block_error_lp_sse2>, | 
|  | &BlockErrorLpWrapper<av1_block_error_lp_c>, AOM_BITS_8) | 
|  | }; | 
|  |  | 
|  | INSTANTIATE_TEST_SUITE_P(SSE2, ErrorBlockTest, | 
|  | ::testing::ValuesIn(kErrorBlockTestParamsSse2)); | 
|  | #endif  // HAVE_SSE2 | 
|  |  | 
|  | #if HAVE_AVX2 | 
|  | const ErrorBlockParam kErrorBlockTestParamsAvx2[] = { | 
|  | #if CONFIG_AV1_HIGHBITDEPTH | 
|  | make_tuple(&av1_highbd_block_error_avx2, &av1_highbd_block_error_c, | 
|  | AOM_BITS_10), | 
|  | make_tuple(&av1_highbd_block_error_avx2, &av1_highbd_block_error_c, | 
|  | AOM_BITS_12), | 
|  | make_tuple(&av1_highbd_block_error_avx2, &av1_highbd_block_error_c, | 
|  | AOM_BITS_8), | 
|  | #endif | 
|  | make_tuple(&BlockError8BitWrapper<av1_block_error_avx2>, | 
|  | &BlockError8BitWrapper<av1_block_error_c>, AOM_BITS_8), | 
|  | make_tuple(&BlockErrorLpWrapper<av1_block_error_lp_avx2>, | 
|  | &BlockErrorLpWrapper<av1_block_error_lp_c>, AOM_BITS_8) | 
|  | }; | 
|  |  | 
|  | INSTANTIATE_TEST_SUITE_P(AVX2, ErrorBlockTest, | 
|  | ::testing::ValuesIn(kErrorBlockTestParamsAvx2)); | 
|  | #endif  // HAVE_AVX2 | 
|  |  | 
|  | #if HAVE_NEON | 
|  | const ErrorBlockParam kErrorBlockTestParamsNeon[] = { | 
|  | #if CONFIG_AV1_HIGHBITDEPTH | 
|  | make_tuple(&av1_highbd_block_error_neon, &av1_highbd_block_error_c, | 
|  | AOM_BITS_10), | 
|  | make_tuple(&av1_highbd_block_error_neon, &av1_highbd_block_error_c, | 
|  | AOM_BITS_12), | 
|  | make_tuple(&av1_highbd_block_error_neon, &av1_highbd_block_error_c, | 
|  | AOM_BITS_8), | 
|  | #endif | 
|  | make_tuple(&BlockError8BitWrapper<av1_block_error_neon>, | 
|  | &BlockError8BitWrapper<av1_block_error_c>, AOM_BITS_8), | 
|  | make_tuple(&BlockErrorLpWrapper<av1_block_error_lp_neon>, | 
|  | &BlockErrorLpWrapper<av1_block_error_lp_c>, AOM_BITS_8) | 
|  | }; | 
|  |  | 
|  | INSTANTIATE_TEST_SUITE_P(NEON, ErrorBlockTest, | 
|  | ::testing::ValuesIn(kErrorBlockTestParamsNeon)); | 
|  | #endif  // HAVE_NEON | 
|  |  | 
|  | #if HAVE_SVE | 
|  | const ErrorBlockParam kErrorBlockTestParamsSVE[] = { | 
|  | make_tuple(&BlockError8BitWrapper<av1_block_error_sve>, | 
|  | &BlockError8BitWrapper<av1_block_error_c>, AOM_BITS_8), | 
|  | make_tuple(&BlockErrorLpWrapper<av1_block_error_lp_sve>, | 
|  | &BlockErrorLpWrapper<av1_block_error_lp_c>, AOM_BITS_8) | 
|  | }; | 
|  |  | 
|  | INSTANTIATE_TEST_SUITE_P(SVE, ErrorBlockTest, | 
|  | ::testing::ValuesIn(kErrorBlockTestParamsSVE)); | 
|  | #endif  // HAVE_SVE | 
|  | }  // namespace |