blob: aea4edb8722317963e898740fe11b3190bf2b54f [file] [log] [blame] [edit]
/*
* Copyright (c) 2025, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 3-Clause Clear License
* and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
* License was not distributed with this source code in the LICENSE file, you
* can obtain it at aomedia.org/license/software-license/bsd-3-c-c/. If the
* Alliance for Open Media Patent License 1.0 was not distributed with this
* source code in the PATENTS file, you can obtain it at
* aomedia.org/license/patent-license/.
*/
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
#include "test/register_state_check.h"
#include "test/function_equivalence_test.h"
#include "config/aom_config.h"
#include "config/aom_dsp_rtcd.h"
#include "config/av1_rtcd.h"
#include "aom/aom_integer.h"
#include "aom_ports/aom_timer.h"
#include "av1/common/enums.h"
#include "av1/common/intra_dip.h"
#include "av1/common/intra_matrix.h"
using libaom_test::FunctionEquivalenceTest;
namespace {
template <typename F, typename T>
class IntraMatrixTest : public FunctionEquivalenceTest<F> {
protected:
static const int kIterations = 1000000;
static const int kBufSize = 8 * 8;
virtual ~IntraMatrixTest() {}
virtual void Execute(T *dip_tst) = 0;
void Common() {
dip_ref_ = &dip_ref_data_[0];
dip_tst_ = &dip_tst_data_[0];
Execute(dip_tst_);
for (int r = 0; r < kBufSize; ++r) {
ASSERT_EQ(dip_ref_[r], dip_tst_[r]);
}
}
T dip_arr_[DIP_ROWS * DIP_COLS];
T dip_feat_[DIP_COLS];
T dip_ref_data_[kBufSize];
T dip_tst_data_[kBufSize];
T *dip_ref_;
T *dip_tst_;
};
//////////////////////////////////////////////////////////////////////////////
// High bit-depth version
//////////////////////////////////////////////////////////////////////////////
typedef void (*IMHB)(const uint16_t *A, const uint16_t *B, uint16_t *C, int bd);
typedef libaom_test::FuncParam<IMHB> IntraMatrixTestFuncsHBD;
class IntraMatrixTestHB : public IntraMatrixTest<IMHB, uint16_t> {
protected:
void Execute(uint16_t *dip_tst) {
params_.ref_func(dip_arr_, dip_feat_, dip_ref_, bit_depth_);
ASM_REGISTER_STATE_CHECK(
params_.tst_func(dip_arr_, dip_feat_, dip_tst, bit_depth_));
}
int bit_depth_;
};
TEST_P(IntraMatrixTestHB, RandomValues) {
for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
switch (rng_(3)) {
case 0: bit_depth_ = 8; break;
case 1: bit_depth_ = 10; break;
default: bit_depth_ = 12; break;
}
const int hi = 1 << bit_depth_;
for (int i = 0; i < 16; ++i) {
dip_feat_[i] = rng_(hi);
}
int mode = iter % INTRA_DIP_MODE_CNT;
for (int r = 0; r < DIP_ROWS; ++r) {
for (int c = 0; c < DIP_FEATURES; ++c) {
dip_arr_[r * DIP_COLS + c] = av1_intra_matrix_weights[mode][r][c];
}
}
Common();
}
}
#if HAVE_AVX2
INSTANTIATE_TEST_SUITE_P(AVX2, IntraMatrixTestHB,
::testing::Values(IntraMatrixTestFuncsHBD(
av1_dip_matrix_multiplication_c,
av1_dip_matrix_multiplication_avx2)));
#endif // HAVE_AVX2
// Speed tests
TEST_P(IntraMatrixTestHB, DISABLED_Speed) {
const int test_count = 10000000;
bit_depth_ = 12;
const int hi = 1 << bit_depth_;
for (int i = 0; i < 16; ++i) {
dip_feat_[i] = rng_(hi);
}
for (int r = 0; r < 64; ++r) {
for (int c = 0; c < 11; ++c) {
dip_arr_[r * 16 + c] = av1_intra_matrix_weights[0][r][c];
}
}
dip_tst_ = &dip_tst_data_[0];
for (int iter = 0; iter < test_count; ++iter) {
ASM_REGISTER_STATE_CHECK(
params_.tst_func(dip_arr_, dip_feat_, dip_tst_, bit_depth_));
}
}
} // namespace
//////////////////////////////////////////////////////////////////////////////
// ResampleOutputTest
//////////////////////////////////////////////////////////////////////////////
typedef void (*ResampleOutputFunc)(uint16_t *dst, int dst_stride,
const uint16_t *above_row,
const uint16_t *left_col,
uint16_t *ml_output, int bw_log2,
int bh_log2, int transpose);
typedef libaom_test::FuncParam<ResampleOutputFunc> ResampleOutputTestFuncs;
class ResampleOutputTest : public FunctionEquivalenceTest<ResampleOutputFunc> {
protected:
static const int kMaxWidth = 64;
static const int kMaxHeight = 64;
static const int kBufSize = kMaxWidth * kMaxHeight;
static const int kMlOutputSize = 8 * 8;
static const int kContextSize = kMaxWidth + kMaxHeight + 1;
ResampleOutputTest() {
dst_ref_ = &dst_ref_data_[0];
dst_tst_ = &dst_tst_data_[0];
above_row_ = &context_data_[1];
left_col_ = &context_data_[kMaxWidth + 2];
ml_output_ = &ml_output_data_[0];
}
virtual ~ResampleOutputTest() {}
int get_log2(int val) {
switch (val) {
case 4: return 2;
case 8: return 3;
case 16: return 4;
case 32: return 5;
case 64: return 6;
default: EXPECT_TRUE(false) << "Invalid block size"; return 0;
}
}
void RunCorrectnessTest() {
const int block_sizes[] = { 8, 16, 32, 64 };
for (int bw : block_sizes) {
for (int bh : block_sizes) {
// Data-driven intra prediction only applies to blocks with w*h >= 128.
if (bw * bh < 128) continue;
for (int transpose = 0; transpose < 2; ++transpose) {
const int bw_log2 = get_log2(bw);
const int bh_log2 = get_log2(bh);
const int dst_stride = kMaxWidth;
const int bit_depth = 12;
const int hi = (1 << bit_depth) - 1;
for (int i = 0; i < kContextSize; ++i) {
context_data_[i] = rng_(hi);
}
for (int i = 0; i < kMlOutputSize; ++i) {
// The range of ml_output is clipped to the corresponding bitdepth.
// i.e. v = clip_pixel_highbd(v, bit_depth);
// See av1_dip_matrix_mulplication.
ml_output_data_[i] = rng_(hi);
}
// The top-left corner is shared between above_row[-1] and
// left_col[-1]
above_row_[-1] = context_data_[0];
left_col_[-1] = context_data_[0];
params_.ref_func(dst_ref_, dst_stride, above_row_, left_col_,
ml_output_, bw_log2, bh_log2, transpose);
ASM_REGISTER_STATE_CHECK(
params_.tst_func(dst_tst_, dst_stride, above_row_, left_col_,
ml_output_, bw_log2, bh_log2, transpose));
for (int r = 0; r < bh; ++r) {
for (int c = 0; c < bw; ++c) {
ASSERT_EQ(dst_ref_[r * dst_stride + c],
dst_tst_[r * dst_stride + c])
<< "Mismatch at (" << c << ", " << r << ") for block size "
<< bw << "x" << bh << " (transpose=" << transpose << ")";
}
}
}
}
}
}
void RunSpeedTest() {
const int block_sizes[] = { 8, 16, 32, 64 };
for (int bw : block_sizes) {
for (int bh : block_sizes) {
// Data-driven intra prediction only applies to blocks with w*h >= 128.
if (bw * bh < 128) continue;
for (int transpose = 0; transpose < 2; ++transpose) {
const int bw_log2 = get_log2(bw);
const int bh_log2 = get_log2(bh);
const int dst_stride = kMaxWidth;
const int bit_depth = 12;
const int hi = (1 << bit_depth) - 1;
const int kIterations = 100000;
for (int i = 0; i < kContextSize; ++i) {
context_data_[i] = rng_(hi);
}
for (int i = 0; i < kMlOutputSize; ++i) {
// The range of ml_output is clipped to the corresponding bitdepth.
// i.e. v = clip_pixel_highbd(v, bit_depth);
// See av1_dip_matrix_mulplication.
ml_output_data_[i] = rng_(hi);
}
above_row_[-1] = context_data_[0];
left_col_[-1] = context_data_[0];
aom_usec_timer ref_timer, tst_timer;
aom_usec_timer_start(&ref_timer);
for (int i = 0; i < kIterations; ++i) {
params_.ref_func(dst_ref_, dst_stride, above_row_, left_col_,
ml_output_, bw_log2, bh_log2, transpose);
}
aom_usec_timer_mark(&ref_timer);
const double ref_time =
static_cast<double>(aom_usec_timer_elapsed(&ref_timer));
aom_usec_timer_start(&tst_timer);
for (int i = 0; i < kIterations; ++i) {
params_.tst_func(dst_tst_, dst_stride, above_row_, left_col_,
ml_output_, bw_log2, bh_log2, transpose);
}
aom_usec_timer_mark(&tst_timer);
const double tst_time =
static_cast<double>(aom_usec_timer_elapsed(&tst_timer));
printf(
"Block %2dx%2d (T=%d): C time = %7.2f us, SIMD time = %7.2f us, "
"Speedup = %4.2fx\n",
bw, bh, transpose, ref_time, tst_time, ref_time / tst_time);
}
}
}
}
uint16_t dst_ref_data_[kBufSize];
uint16_t dst_tst_data_[kBufSize];
uint16_t context_data_[kContextSize];
uint16_t ml_output_data_[kMlOutputSize];
uint16_t *dst_ref_;
uint16_t *dst_tst_;
uint16_t *above_row_;
uint16_t *left_col_;
uint16_t *ml_output_;
};
TEST_P(ResampleOutputTest, Correctness) { RunCorrectnessTest(); }
TEST_P(ResampleOutputTest, DISABLED_Speed) { RunSpeedTest(); }
#if HAVE_AVX2
INSTANTIATE_TEST_SUITE_P(AVX2, ResampleOutputTest,
::testing::Values(ResampleOutputTestFuncs(
resample_output_c, resample_output_avx2)));
#endif // HAVE_AVX2