blob: 2c17cea3ec42eb008576c42171915bb7f41c04fe [file] [log] [blame] [edit]
/*
* Copyright (c) 2021, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 3-Clause Clear License
* and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
* License was not distributed with this source code in the LICENSE file, you
* can obtain it at aomedia.org/license/software-license/bsd-3-c-c/. If the
* Alliance for Open Media Patent License 1.0 was not distributed with this
* source code in the PATENTS file, you can obtain it at
* aomedia.org/license/patent-license/.
*/
#include <tuple>
#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
#include "config/av1_rtcd.h"
#include "av1/common/cfl.h"
#include "aom_ports/aom_timer.h"
#include "test/util.h"
#include "test/acm_random.h"
using std::make_tuple;
using libaom_test::ACMRandom;
#define NUM_ITERATIONS (100)
#define NUM_ITERATIONS_SPEED (INT16_MAX)
#define ALL_CFL_TX_SIZES(function) \
make_tuple(static_cast<TX_SIZE>(TX_4X4), &function), \
make_tuple(static_cast<TX_SIZE>(TX_4X8), &function), \
make_tuple(static_cast<TX_SIZE>(TX_4X16), &function), \
make_tuple(static_cast<TX_SIZE>(TX_4X32), &function), \
make_tuple(static_cast<TX_SIZE>(TX_8X4), &function), \
make_tuple(static_cast<TX_SIZE>(TX_8X8), &function), \
make_tuple(static_cast<TX_SIZE>(TX_8X16), &function), \
make_tuple(static_cast<TX_SIZE>(TX_8X32), &function), \
make_tuple(static_cast<TX_SIZE>(TX_16X4), &function), \
make_tuple(static_cast<TX_SIZE>(TX_16X8), &function), \
make_tuple(static_cast<TX_SIZE>(TX_16X16), &function), \
make_tuple(static_cast<TX_SIZE>(TX_16X32), &function), \
make_tuple(static_cast<TX_SIZE>(TX_32X4), &function), \
make_tuple(static_cast<TX_SIZE>(TX_32X8), &function), \
make_tuple(static_cast<TX_SIZE>(TX_32X16), &function), \
make_tuple(static_cast<TX_SIZE>(TX_32X32), &function)
#define ALL_CFL_TX_SIZES_SUBSAMPLE(fun420, fun422, fun444) \
make_tuple(static_cast<TX_SIZE>(TX_4X4), &fun420, &fun422, &fun444), \
make_tuple(static_cast<TX_SIZE>(TX_4X8), &fun420, &fun422, &fun444), \
make_tuple(static_cast<TX_SIZE>(TX_4X16), &fun420, &fun422, &fun444), \
make_tuple(static_cast<TX_SIZE>(TX_4X32), &fun420, &fun422, &fun444), \
make_tuple(static_cast<TX_SIZE>(TX_8X4), &fun420, &fun422, &fun444), \
make_tuple(static_cast<TX_SIZE>(TX_8X8), &fun420, &fun422, &fun444), \
make_tuple(static_cast<TX_SIZE>(TX_8X16), &fun420, &fun422, &fun444), \
make_tuple(static_cast<TX_SIZE>(TX_8X32), &fun420, &fun422, &fun444), \
make_tuple(static_cast<TX_SIZE>(TX_16X4), &fun420, &fun422, &fun444), \
make_tuple(static_cast<TX_SIZE>(TX_16X8), &fun420, &fun422, &fun444), \
make_tuple(static_cast<TX_SIZE>(TX_16X16), &fun420, &fun422, &fun444), \
make_tuple(static_cast<TX_SIZE>(TX_16X32), &fun420, &fun422, &fun444), \
make_tuple(static_cast<TX_SIZE>(TX_32X4), &fun420, &fun422, &fun444), \
make_tuple(static_cast<TX_SIZE>(TX_32X8), &fun420, &fun422, &fun444), \
make_tuple(static_cast<TX_SIZE>(TX_32X16), &fun420, &fun422, &fun444), \
make_tuple(static_cast<TX_SIZE>(TX_32X32), &fun420, &fun422, &fun444), \
make_tuple(static_cast<TX_SIZE>(TX_64X64), &fun420, &fun422, &fun444), \
make_tuple(static_cast<TX_SIZE>(TX_32X64), &fun420, &fun422, &fun444), \
make_tuple(static_cast<TX_SIZE>(TX_64X32), &fun420, &fun422, &fun444), \
make_tuple(static_cast<TX_SIZE>(TX_16X64), &fun420, &fun422, &fun444), \
make_tuple(static_cast<TX_SIZE>(TX_64X16), &fun420, &fun422, &fun444), \
make_tuple(static_cast<TX_SIZE>(TX_8X64), &fun420, &fun422, &fun444), \
make_tuple(static_cast<TX_SIZE>(TX_64X8), &fun420, &fun422, &fun444), \
make_tuple(static_cast<TX_SIZE>(TX_4X64), &fun420, &fun422, &fun444), \
make_tuple(static_cast<TX_SIZE>(TX_64X4), &fun420, &fun422, &fun444)
namespace {
template <typename A>
static void assert_eq(const A *a, const A *b, int width, int height) {
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
ASSERT_EQ(a[j * CFL_BUF_LINE + i], b[j * CFL_BUF_LINE + i]);
}
}
}
static void assertFaster(int ref_elapsed_time, int elapsed_time) {
EXPECT_GT(ref_elapsed_time, elapsed_time)
<< "Error: CFLSubtractSpeedTest, SIMD slower than C." << std::endl
<< "C time: " << ref_elapsed_time << " us" << std::endl
<< "SIMD time: " << elapsed_time << " us" << std::endl;
}
static void printSpeed(int ref_elapsed_time, int elapsed_time, int width,
int height) {
std::cout.precision(2);
std::cout << "[ ] " << width << "x" << height
<< ": C time = " << ref_elapsed_time
<< " us, SIMD time = " << elapsed_time << " us" << " (~"
<< ref_elapsed_time / (double)elapsed_time << "x) " << std::endl;
}
class CFLTest {
public:
virtual ~CFLTest() {}
void init(TX_SIZE tx) {
tx_size = tx;
width = tx_size_wide[tx_size];
height = tx_size_high[tx_size];
rnd.Reset(ACMRandom::DeterministicSeed());
}
protected:
TX_SIZE tx_size;
int width;
int height;
ACMRandom rnd;
};
template <typename I>
class CFLTestWithData : public CFLTest {
public:
virtual ~CFLTestWithData() {}
protected:
I data[CFL_BUF_SQUARE];
I data_ref[CFL_BUF_SQUARE];
void randData(I (ACMRandom::*random)()) {
for (int j = 0; j < this->height; j++) {
for (int i = 0; i < this->width; i++) {
const I d = (this->rnd.*random)();
data[j * CFL_BUF_LINE + i] = d;
data_ref[j * CFL_BUF_LINE + i] = d;
}
}
}
};
template <typename I>
class CFLTestWithAlignedData : public CFLTest {
public:
CFLTestWithAlignedData() {
chroma_pels_ref =
reinterpret_cast<I *>(aom_memalign(32, sizeof(I) * CFL_BUF_SQUARE));
chroma_pels =
reinterpret_cast<I *>(aom_memalign(32, sizeof(I) * CFL_BUF_SQUARE));
sub_luma_pels_ref = reinterpret_cast<int16_t *>(
aom_memalign(32, sizeof(int16_t) * CFL_BUF_SQUARE));
sub_luma_pels = reinterpret_cast<int16_t *>(
aom_memalign(32, sizeof(int16_t) * CFL_BUF_SQUARE));
memset(chroma_pels_ref, 0, sizeof(I) * CFL_BUF_SQUARE);
memset(chroma_pels, 0, sizeof(I) * CFL_BUF_SQUARE);
memset(sub_luma_pels_ref, 0, sizeof(int16_t) * CFL_BUF_SQUARE);
memset(sub_luma_pels, 0, sizeof(int16_t) * CFL_BUF_SQUARE);
}
~CFLTestWithAlignedData() {
aom_free(chroma_pels_ref);
aom_free(sub_luma_pels_ref);
aom_free(chroma_pels);
aom_free(sub_luma_pels);
}
protected:
I *chroma_pels_ref;
I *chroma_pels;
int16_t *sub_luma_pels_ref;
int16_t *sub_luma_pels;
int alpha_q3;
I dc;
void randData(int bd) {
alpha_q3 = this->rnd(33) - 16;
dc = this->rnd(1 << bd);
for (int j = 0; j < this->height; j++) {
for (int i = 0; i < this->width; i++) {
chroma_pels[j * CFL_BUF_LINE + i] = dc;
chroma_pels_ref[j * CFL_BUF_LINE + i] = dc;
sub_luma_pels_ref[j * CFL_BUF_LINE + i] =
sub_luma_pels[j * CFL_BUF_LINE + i] = this->rnd(1 << (bd + 3));
}
}
}
};
typedef cfl_subtract_average_fn (*sub_avg_fn)(TX_SIZE tx_size);
typedef std::tuple<TX_SIZE, sub_avg_fn> sub_avg_param;
class CFLSubAvgTest : public ::testing::TestWithParam<sub_avg_param>,
public CFLTestWithData<int16_t> {
public:
virtual void SetUp() {
CFLTest::init(std::get<0>(this->GetParam()));
sub_avg = std::get<1>(this->GetParam())(tx_size);
sub_avg_ref = cfl_get_subtract_average_fn_c(tx_size);
}
virtual ~CFLSubAvgTest() {}
protected:
cfl_subtract_average_fn sub_avg;
cfl_subtract_average_fn sub_avg_ref;
};
GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLSubAvgTest);
TEST_P(CFLSubAvgTest, SubAvgTest) {
for (int it = 0; it < NUM_ITERATIONS; it++) {
randData(&ACMRandom::Rand15Signed);
sub_avg((uint16_t *)data, data);
sub_avg_ref((uint16_t *)data_ref, data_ref);
assert_eq<int16_t>(data, data_ref, width, height);
}
}
TEST_P(CFLSubAvgTest, DISABLED_SubAvgSpeedTest) {
aom_usec_timer ref_timer;
aom_usec_timer timer;
randData(&ACMRandom::Rand15Signed);
aom_usec_timer_start(&ref_timer);
for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
sub_avg_ref((uint16_t *)data_ref, data_ref);
}
aom_usec_timer_mark(&ref_timer);
int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
aom_usec_timer_start(&timer);
for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
sub_avg((uint16_t *)data, data);
}
aom_usec_timer_mark(&timer);
int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
printSpeed(ref_elapsed_time, elapsed_time, width, height);
assertFaster(ref_elapsed_time, elapsed_time);
}
template <typename S, typename T, typename I>
class CFLSubsampleTest : public ::testing::TestWithParam<S>,
public CFLTestWithData<I> {
public:
virtual void SetUp() {
CFLTest::init(std::get<0>(this->GetParam()));
fun_420 = std::get<1>(this->GetParam())(this->tx_size);
fun_422 = std::get<2>(this->GetParam())(this->tx_size);
fun_444 = std::get<3>(this->GetParam())(this->tx_size);
}
protected:
T fun_420;
T fun_422;
T fun_444;
T fun_420_ref;
T fun_422_ref;
T fun_444_ref;
void subsampleTest(T fun, T fun_ref, int sub_width, int sub_height,
I (ACMRandom::*random)()) {
uint16_t sub_luma_pels[CFL_BUF_SQUARE];
uint16_t sub_luma_pels_ref[CFL_BUF_SQUARE];
for (int it = 0; it < NUM_ITERATIONS; it++) {
CFLTestWithData<I>::randData(random);
fun(this->data, CFL_BUF_LINE, sub_luma_pels);
fun_ref(this->data_ref, CFL_BUF_LINE, sub_luma_pels_ref);
assert_eq<uint16_t>(sub_luma_pels, sub_luma_pels_ref, sub_width,
sub_height);
}
}
void subsampleSpeedTest(T fun, T fun_ref, I (ACMRandom::*random)()) {
uint16_t sub_luma_pels[CFL_BUF_SQUARE];
uint16_t sub_luma_pels_ref[CFL_BUF_SQUARE];
aom_usec_timer ref_timer;
aom_usec_timer timer;
CFLTestWithData<I>::randData(random);
aom_usec_timer_start(&ref_timer);
for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
fun_ref(this->data_ref, CFL_BUF_LINE, sub_luma_pels);
}
aom_usec_timer_mark(&ref_timer);
int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
aom_usec_timer_start(&timer);
for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
fun(this->data, CFL_BUF_LINE, sub_luma_pels_ref);
}
aom_usec_timer_mark(&timer);
int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
printSpeed(ref_elapsed_time, elapsed_time, this->width, this->height);
assertFaster(ref_elapsed_time, elapsed_time);
}
};
typedef cfl_subsample_hbd_fn (*get_subsample_hbd_fn)(TX_SIZE tx_size);
typedef std::tuple<TX_SIZE, get_subsample_hbd_fn, get_subsample_hbd_fn,
get_subsample_hbd_fn>
subsample_hbd_param;
class CFLSubsampleHBDTest
: public CFLSubsampleTest<subsample_hbd_param, cfl_subsample_hbd_fn,
uint16_t> {
public:
virtual ~CFLSubsampleHBDTest() {}
virtual void SetUp() {
CFLSubsampleTest::SetUp();
fun_420_ref = cfl_get_luma_subsampling_420_hbd_c(tx_size);
fun_422_ref = cfl_get_luma_subsampling_422_hbd_c(tx_size);
fun_444_ref = cfl_get_luma_subsampling_444_hbd_c(tx_size);
}
};
GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLSubsampleHBDTest);
TEST_P(CFLSubsampleHBDTest, SubsampleHBD420Test) {
subsampleTest(fun_420, fun_420_ref, width >> 1, height >> 1,
&ACMRandom::Rand12);
}
TEST_P(CFLSubsampleHBDTest, DISABLED_SubsampleHBD420SpeedTest) {
subsampleSpeedTest(fun_420, fun_420_ref, &ACMRandom::Rand12);
}
TEST_P(CFLSubsampleHBDTest, SubsampleHBD422Test) {
subsampleTest(fun_422, fun_422_ref, width >> 1, height, &ACMRandom::Rand12);
}
TEST_P(CFLSubsampleHBDTest, DISABLED_SubsampleHBD422SpeedTest) {
subsampleSpeedTest(fun_422, fun_422_ref, &ACMRandom::Rand12);
}
TEST_P(CFLSubsampleHBDTest, SubsampleHBD444Test) {
subsampleTest(fun_444, fun_444_ref, width, height, &ACMRandom::Rand12);
}
TEST_P(CFLSubsampleHBDTest, DISABLED_SubsampleHBD444SpeedTest) {
subsampleSpeedTest(fun_444, fun_444_ref, &ACMRandom::Rand12);
}
typedef cfl_predict_hbd_fn (*get_predict_fn_hbd)(TX_SIZE tx_size);
typedef std::tuple<TX_SIZE, get_predict_fn_hbd> predict_param_hbd;
class CFLPredictHBDTest : public ::testing::TestWithParam<predict_param_hbd>,
public CFLTestWithAlignedData<uint16_t> {
public:
virtual void SetUp() {
CFLTest::init(std::get<0>(this->GetParam()));
predict = std::get<1>(this->GetParam())(tx_size);
predict_ref = cfl_get_predict_hbd_fn_c(tx_size);
}
virtual ~CFLPredictHBDTest() {}
protected:
cfl_predict_hbd_fn predict;
cfl_predict_hbd_fn predict_ref;
};
GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLPredictHBDTest);
TEST_P(CFLPredictHBDTest, PredictHBDTest) {
int bd = 12;
for (int it = 0; it < NUM_ITERATIONS; it++) {
randData(bd);
predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3, bd);
predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3, bd);
assert_eq<uint16_t>(chroma_pels, chroma_pels_ref, width, height);
}
}
TEST_P(CFLPredictHBDTest, DISABLED_PredictHBDSpeedTest) {
aom_usec_timer ref_timer;
aom_usec_timer timer;
const int bd = 12;
randData(bd);
aom_usec_timer_start(&ref_timer);
for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3, bd);
}
aom_usec_timer_mark(&ref_timer);
int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
aom_usec_timer_start(&timer);
for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3, bd);
}
aom_usec_timer_mark(&timer);
int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
printSpeed(ref_elapsed_time, elapsed_time, width, height);
assertFaster(ref_elapsed_time, elapsed_time);
}
#define ALL_CFL_TX_SIZES_121(function) \
make_tuple(static_cast<TX_SIZE>(TX_4X4), &function), \
make_tuple(static_cast<TX_SIZE>(TX_4X8), &function), \
make_tuple(static_cast<TX_SIZE>(TX_4X16), &function), \
make_tuple(static_cast<TX_SIZE>(TX_4X32), &function), \
make_tuple(static_cast<TX_SIZE>(TX_8X4), &function), \
make_tuple(static_cast<TX_SIZE>(TX_8X8), &function), \
make_tuple(static_cast<TX_SIZE>(TX_8X16), &function), \
make_tuple(static_cast<TX_SIZE>(TX_8X32), &function), \
make_tuple(static_cast<TX_SIZE>(TX_16X4), &function), \
make_tuple(static_cast<TX_SIZE>(TX_16X8), &function), \
make_tuple(static_cast<TX_SIZE>(TX_16X16), &function), \
make_tuple(static_cast<TX_SIZE>(TX_16X32), &function), \
make_tuple(static_cast<TX_SIZE>(TX_32X4), &function), \
make_tuple(static_cast<TX_SIZE>(TX_32X8), &function), \
make_tuple(static_cast<TX_SIZE>(TX_32X16), &function), \
make_tuple(static_cast<TX_SIZE>(TX_32X32), &function), \
make_tuple(static_cast<TX_SIZE>(TX_64X64), &function), \
make_tuple(static_cast<TX_SIZE>(TX_32X64), &function), \
make_tuple(static_cast<TX_SIZE>(TX_64X32), &function), \
make_tuple(static_cast<TX_SIZE>(TX_16X64), &function), \
make_tuple(static_cast<TX_SIZE>(TX_64X16), &function), \
make_tuple(static_cast<TX_SIZE>(TX_8X64), &function), \
make_tuple(static_cast<TX_SIZE>(TX_64X8), &function), \
make_tuple(static_cast<TX_SIZE>(TX_4X64), &function), \
make_tuple(static_cast<TX_SIZE>(TX_64X4), &function)
typedef cfl_subsample_hbd_fn (*get_subsample_hbd_fn)(TX_SIZE tx_size);
typedef std::tuple<TX_SIZE, get_subsample_hbd_fn> CflSubsample121HbdParam;
class CflSubsample121HBDTest
: public ::testing::TestWithParam<CflSubsample121HbdParam>,
public CFLTestWithData<uint16_t> {
public:
virtual ~CflSubsample121HBDTest() {}
virtual void SetUp() {
CFLTest::init(std::get<0>(GetParam()));
get_subsample_hbd_fn tgt_getter = std::get<1>(GetParam());
tgt_fn_ = tgt_getter(tx_size);
ref_fn_ = cfl_get_luma_subsampling_420_hbd_121_c(tx_size);
}
protected:
cfl_subsample_hbd_fn ref_fn_;
cfl_subsample_hbd_fn tgt_fn_;
};
TEST_P(CflSubsample121HBDTest, Match) {
uint16_t ref_output[CFL_BUF_SQUARE];
uint16_t tgt_output[CFL_BUF_SQUARE];
for (int it = 0; it < NUM_ITERATIONS; it++) {
randData(&ACMRandom::Rand12);
ref_fn_(data_ref, CFL_BUF_LINE, ref_output);
tgt_fn_(data, CFL_BUF_LINE, tgt_output);
assert_eq<uint16_t>(ref_output, tgt_output, width >> 1, height >> 1);
}
}
TEST_P(CflSubsample121HBDTest, DISABLED_Speed) {
uint16_t ref_output[CFL_BUF_SQUARE];
uint16_t tgt_output[CFL_BUF_SQUARE];
aom_usec_timer ref_timer;
aom_usec_timer timer;
randData(&ACMRandom::Rand12);
aom_usec_timer_start(&ref_timer);
for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
ref_fn_(data_ref, CFL_BUF_LINE, ref_output);
}
aom_usec_timer_mark(&ref_timer);
const int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
aom_usec_timer_start(&timer);
for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
tgt_fn_(data, CFL_BUF_LINE, tgt_output);
}
aom_usec_timer_mark(&timer);
const int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
printSpeed(ref_elapsed_time, elapsed_time, width, height);
assertFaster(ref_elapsed_time, elapsed_time);
}
#if HAVE_AVX2
const CflSubsample121HbdParam cfl_subsample_121_hbd_avx2_params[] = {
ALL_CFL_TX_SIZES_121(cfl_get_luma_subsampling_420_hbd_121_avx2)
};
INSTANTIATE_TEST_SUITE_P(
AVX2, CflSubsample121HBDTest,
::testing::ValuesIn(cfl_subsample_121_hbd_avx2_params));
#endif
#define ALL_CFL_TX_SIZES_COLOCATED(function) \
make_tuple(static_cast<TX_SIZE>(TX_4X4), &function), \
make_tuple(static_cast<TX_SIZE>(TX_4X8), &function), \
make_tuple(static_cast<TX_SIZE>(TX_4X16), &function), \
make_tuple(static_cast<TX_SIZE>(TX_4X32), &function), \
make_tuple(static_cast<TX_SIZE>(TX_8X4), &function), \
make_tuple(static_cast<TX_SIZE>(TX_8X8), &function), \
make_tuple(static_cast<TX_SIZE>(TX_8X16), &function), \
make_tuple(static_cast<TX_SIZE>(TX_8X32), &function), \
make_tuple(static_cast<TX_SIZE>(TX_16X4), &function), \
make_tuple(static_cast<TX_SIZE>(TX_16X8), &function), \
make_tuple(static_cast<TX_SIZE>(TX_16X16), &function), \
make_tuple(static_cast<TX_SIZE>(TX_16X32), &function), \
make_tuple(static_cast<TX_SIZE>(TX_32X4), &function), \
make_tuple(static_cast<TX_SIZE>(TX_32X8), &function), \
make_tuple(static_cast<TX_SIZE>(TX_32X16), &function), \
make_tuple(static_cast<TX_SIZE>(TX_32X32), &function), \
make_tuple(static_cast<TX_SIZE>(TX_64X64), &function), \
make_tuple(static_cast<TX_SIZE>(TX_32X64), &function), \
make_tuple(static_cast<TX_SIZE>(TX_64X32), &function), \
make_tuple(static_cast<TX_SIZE>(TX_16X64), &function), \
make_tuple(static_cast<TX_SIZE>(TX_64X16), &function), \
make_tuple(static_cast<TX_SIZE>(TX_8X64), &function), \
make_tuple(static_cast<TX_SIZE>(TX_64X8), &function), \
make_tuple(static_cast<TX_SIZE>(TX_4X64), &function), \
make_tuple(static_cast<TX_SIZE>(TX_64X4), &function)
typedef cfl_subsample_hbd_fn (*get_subsample_hbd_fn)(TX_SIZE tx_size);
typedef std::tuple<TX_SIZE, get_subsample_hbd_fn> CflSubsampleColocatedHbdParam;
class CflSubsampleColocatedHBDTest
: public ::testing::TestWithParam<CflSubsampleColocatedHbdParam>,
public CFLTestWithData<uint16_t> {
public:
virtual ~CflSubsampleColocatedHBDTest() {}
virtual void SetUp() {
CFLTest::init(std::get<0>(GetParam()));
get_subsample_hbd_fn tgt_getter = std::get<1>(GetParam());
tgt_fn_ = tgt_getter(tx_size);
ref_fn_ = cfl_get_luma_subsampling_420_hbd_colocated_c(tx_size);
}
protected:
cfl_subsample_hbd_fn ref_fn_;
cfl_subsample_hbd_fn tgt_fn_;
};
TEST_P(CflSubsampleColocatedHBDTest, Match) {
uint16_t ref_output[CFL_BUF_SQUARE];
uint16_t tgt_output[CFL_BUF_SQUARE];
for (int it = 0; it < NUM_ITERATIONS; it++) {
randData(&ACMRandom::Rand12);
ref_fn_(data_ref, CFL_BUF_LINE, ref_output);
tgt_fn_(data, CFL_BUF_LINE, tgt_output);
assert_eq<uint16_t>(ref_output, tgt_output, width >> 1, height >> 1);
}
}
TEST_P(CflSubsampleColocatedHBDTest, DISABLED_Speed) {
uint16_t ref_output[CFL_BUF_SQUARE];
uint16_t tgt_output[CFL_BUF_SQUARE];
aom_usec_timer ref_timer;
aom_usec_timer timer;
randData(&ACMRandom::Rand12);
aom_usec_timer_start(&ref_timer);
for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
ref_fn_(data_ref, CFL_BUF_LINE, ref_output);
}
aom_usec_timer_mark(&ref_timer);
const int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
aom_usec_timer_start(&timer);
for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
tgt_fn_(data, CFL_BUF_LINE, tgt_output);
}
aom_usec_timer_mark(&timer);
const int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
printSpeed(ref_elapsed_time, elapsed_time, width, height);
assertFaster(ref_elapsed_time, elapsed_time);
}
#if HAVE_AVX2
const CflSubsampleColocatedHbdParam
cfl_subsample_colocated_hbd_avx2_params[] = { ALL_CFL_TX_SIZES_COLOCATED(
cfl_get_luma_subsampling_420_hbd_colocated_avx2) };
INSTANTIATE_TEST_SUITE_P(
AVX2, CflSubsampleColocatedHBDTest,
::testing::ValuesIn(cfl_subsample_colocated_hbd_avx2_params));
#endif
// Temporarily disable the sse4 function since it might overflow.
// Re-enable the test when the parameter range is restricted to 32 bits,
// or an updated sse4 function could handle intermediate 64 bits.
#if HAVE_SSE4_1 && 0
typedef void (*mhccp_predict_hv_hbd_fn)(const uint16_t *input, uint16_t *dst,
bool have_top, bool have_left,
int dst_stride, int64_t *alpha_q3,
int bit_depth, int width, int height,
int dir);
typedef std::tuple<bool, bool, int, int, int, int> mhccp_param;
class MhccpPredictHVHBDTest : public ::testing::TestWithParam<mhccp_param> {
public:
virtual void SetUp() {
have_top_ = std::get<0>(this->GetParam());
have_left_ = std::get<1>(this->GetParam());
bit_depth_ = std::get<2>(this->GetParam());
width_ = std::get<3>(this->GetParam());
height_ = std::get<4>(this->GetParam());
dir_ = std::get<5>(this->GetParam());
tgt_fn_ = mhccp_predict_hv_hbd_sse4_1;
ref_fn_ = mhccp_predict_hv_hbd_c;
memset(tgt_buffer_, 0, sizeof(uint16_t) * CFL_BUF_SQUARE);
memset(ref_buffer_, 0, sizeof(uint16_t) * CFL_BUF_SQUARE);
}
virtual ~MhccpPredictHVHBDTest() {}
protected:
mhccp_predict_hv_hbd_fn tgt_fn_;
mhccp_predict_hv_hbd_fn ref_fn_;
bool have_top_;
bool have_left_;
int64_t alpha_q3_[MHCCP_NUM_PARAMS];
int bit_depth_;
int width_;
int height_;
int dir_;
uint16_t input_buffer_[(LINE_NUM + 1 + CFL_BUF_LINE * 2) *
(LINE_NUM + 1 + CFL_BUF_LINE * 2)];
uint16_t tgt_buffer_[CFL_BUF_SQUARE];
uint16_t ref_buffer_[CFL_BUF_SQUARE];
ACMRandom rnd_;
void randData() {
for (int i = 0; i < MHCCP_NUM_PARAMS; ++i) {
alpha_q3_[i] = this->rnd_(33);
}
for (int j = 0; j < height_; j++) {
for (int i = 0; i < width_; i++) {
input_buffer_[j * CFL_BUF_LINE + i] = this->rnd_.Rand16();
}
}
}
};
TEST_P(MhccpPredictHVHBDTest, PredictTest) {
const int input_stride = 2 * CFL_BUF_LINE;
const int offset = (LINE_NUM + 1) + (LINE_NUM + 1) * input_stride;
for (int it = 0; it < NUM_ITERATIONS; it++) {
randData();
tgt_fn_(input_buffer_ + offset, tgt_buffer_, have_top_, have_left_,
CFL_BUF_LINE, alpha_q3_, bit_depth_, width_, height_, dir_);
ref_fn_(input_buffer_ + offset, ref_buffer_, have_top_, have_left_,
CFL_BUF_LINE, alpha_q3_, bit_depth_, width_, height_, dir_);
assert_eq<uint16_t>(ref_buffer_, tgt_buffer_, width_, height_);
}
}
TEST_P(MhccpPredictHVHBDTest, DISABLED_PredictSpeedTest) {
aom_usec_timer ref_timer;
aom_usec_timer timer;
randData();
aom_usec_timer_start(&ref_timer);
const int input_stride = 2 * CFL_BUF_LINE;
const int offset = (LINE_NUM + 1) + (LINE_NUM + 1) * input_stride;
for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
ref_fn_(input_buffer_ + offset, tgt_buffer_, have_top_, have_left_,
CFL_BUF_LINE, alpha_q3_, bit_depth_, width_, height_, dir_);
}
aom_usec_timer_mark(&ref_timer);
const int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
aom_usec_timer_start(&timer);
for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
tgt_fn_(input_buffer_ + offset, tgt_buffer_, have_top_, have_left_,
CFL_BUF_LINE, alpha_q3_, bit_depth_, width_, height_, dir_);
}
aom_usec_timer_mark(&timer);
const int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
printSpeed(ref_elapsed_time, elapsed_time, width_, height_);
assertFaster(ref_elapsed_time, elapsed_time);
}
INSTANTIATE_TEST_SUITE_P(SSE4_1, MhccpPredictHVHBDTest,
::testing::Combine(::testing::Bool(),
::testing::Bool(),
::testing::Values(10, 12),
::testing::Values(4, 8, 16, 32, 64),
::testing::Values(4, 8, 16, 32, 64),
::testing::Values(0, 1)));
#endif // HAVE_SSE4_1
#if HAVE_SSE2
const sub_avg_param sub_avg_sizes_sse2[] = { ALL_CFL_TX_SIZES(
cfl_get_subtract_average_fn_sse2) };
INSTANTIATE_TEST_SUITE_P(SSE2, CFLSubAvgTest,
::testing::ValuesIn(sub_avg_sizes_sse2));
#endif
#if HAVE_SSSE3
const subsample_hbd_param subsample_hbd_sizes_ssse3[] = {
ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_hbd_ssse3,
cfl_get_luma_subsampling_422_hbd_ssse3,
cfl_get_luma_subsampling_444_hbd_ssse3)
};
INSTANTIATE_TEST_SUITE_P(SSSE3, CFLSubsampleHBDTest,
::testing::ValuesIn(subsample_hbd_sizes_ssse3));
#endif // HAVE_SSSE3
#if HAVE_AVX2
const sub_avg_param sub_avg_sizes_avx2[] = { ALL_CFL_TX_SIZES(
cfl_get_subtract_average_fn_avx2) };
INSTANTIATE_TEST_SUITE_P(AVX2, CFLSubAvgTest,
::testing::ValuesIn(sub_avg_sizes_avx2));
const subsample_hbd_param subsample_hbd_sizes_avx2[] = {
ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_hbd_avx2,
cfl_get_luma_subsampling_422_hbd_avx2,
cfl_get_luma_subsampling_444_hbd_avx2)
};
const predict_param_hbd predict_sizes_hbd_avx2[] = { ALL_CFL_TX_SIZES(
cfl_get_predict_hbd_fn_avx2) };
INSTANTIATE_TEST_SUITE_P(AVX2, CFLSubsampleHBDTest,
::testing::ValuesIn(subsample_hbd_sizes_avx2));
INSTANTIATE_TEST_SUITE_P(AVX2, CFLPredictHBDTest,
::testing::ValuesIn(predict_sizes_hbd_avx2));
#endif // HAVE_AVX2
#if HAVE_NEON
const sub_avg_param sub_avg_sizes_neon[] = { ALL_CFL_TX_SIZES(
cfl_get_subtract_average_fn_neon) };
INSTANTIATE_TEST_SUITE_P(NEON, CFLSubAvgTest,
::testing::ValuesIn(sub_avg_sizes_neon));
const subsample_hbd_param subsample_hbd_sizes_neon[] = {
ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_hbd_neon,
cfl_get_luma_subsampling_422_hbd_neon,
cfl_get_luma_subsampling_444_hbd_neon)
};
const predict_param_hbd predict_sizes_hbd_neon[] = { ALL_CFL_TX_SIZES(
cfl_get_predict_hbd_fn_neon) };
INSTANTIATE_TEST_SUITE_P(NEON, CFLSubsampleHBDTest,
::testing::ValuesIn(subsample_hbd_sizes_neon));
INSTANTIATE_TEST_SUITE_P(NEON, CFLPredictHBDTest,
::testing::ValuesIn(predict_sizes_hbd_neon));
#endif // HAVE_NEON
#if HAVE_VSX
const sub_avg_param sub_avg_sizes_vsx[] = { ALL_CFL_TX_SIZES(
cfl_get_subtract_average_fn_vsx) };
INSTANTIATE_TEST_SUITE_P(VSX, CFLSubAvgTest,
::testing::ValuesIn(sub_avg_sizes_vsx));
#endif
} // namespace