Downsample sad computation in hd motion search
This CL adds a speed feature to downsample the SAD computation during
motion search by skipping every other row in sad computation.
The downsampling is only performed when there are sufficiently many rows
to get a good estimation. Empirically, this happens when there are at
leas 16 rows.
Currently this speed feature is only enabled on hdres set.
Performance:
SPD_SET | OVR_PSNR | AVG_PSNR | SSIM | SPD
0 | +0.021% | +0.025% | +0.084% | +2.3%
1 | +0.053% | +0.071% | +0.136% | +3.8%
2 | +0.037% | +0.055% | +0.091% | +4.3%
3 | +0.025% | +0.038% | +0.074% | +2.6%
4 | +0.029% | +0.043% | +0.093% | +3.0%
5 | +0.117% | +0.128% | +0.194% | +3.3%
6 | +0.065% | +0.082% | +0.179% | +3.2%
BUG=aomedia:2781
STATS_CHANGED
Change-Id: Ibf2afd9a7ffed939897249527b41bbaa4152a62c
(cherry picked from commit 0a32d3c251a0ae49b6e0a76249a699d33244e0be)
diff --git a/test/sad_test.cc b/test/sad_test.cc
index 101204f..fdcf49f 100644
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -31,6 +31,10 @@
const uint8_t *ref_ptr, int ref_stride);
typedef std::tuple<int, int, SadMxNFunc, int> SadMxNParam;
+typedef unsigned int (*SadSkipMxNFunc)(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride);
+typedef std::tuple<int, int, SadSkipMxNFunc, int> SadSkipMxNParam;
+
typedef uint32_t (*SadMxNAvgFunc)(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
const uint8_t *second_pred);
@@ -60,6 +64,11 @@
uint32_t *sad_array);
typedef std::tuple<int, int, SadMxNx4Func, int> SadMxNx4Param;
+typedef void (*SadSkipMxNx4Func)(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const ref_ptr[], int ref_stride,
+ uint32_t *sad_array);
+typedef std::tuple<int, int, SadSkipMxNx4Func, int> SadSkipMxNx4Param;
+
typedef void (*SadMxNx4AvgFunc)(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_ptr[], int ref_stride,
const uint8_t *second_pred,
@@ -182,6 +191,31 @@
return sad;
}
+ // Sum of Absolute Differences Skip rows. Given two blocks,
+ // calculate the absolute difference between two pixels in the same
+ // relative location every other row; accumulate and double the result at the
+ // end.
+ unsigned int ReferenceSADSkip(int block_idx) {
+ unsigned int sad = 0;
+ const uint8_t *const reference8 = GetReference(block_idx);
+ const uint8_t *const source8 = source_data_;
+ const uint16_t *const reference16 =
+ CONVERT_TO_SHORTPTR(GetReference(block_idx));
+ const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_);
+ for (int h = 0; h < height_; h += 2) {
+ for (int w = 0; w < width_; ++w) {
+ if (!use_high_bit_depth_) {
+ sad += abs(source8[h * source_stride_ + w] -
+ reference8[h * reference_stride_ + w]);
+ } else {
+ sad += abs(source16[h * source_stride_ + w] -
+ reference16[h * reference_stride_ + w]);
+ }
+ }
+ }
+ return sad * 2;
+ }
+
// Sum of Absolute Differences Average. Given two blocks, and a prediction
// calculate the absolute difference between one pixel and average of the
// corresponding and predicted pixels; accumulate.
@@ -343,6 +377,50 @@
EXPECT_EQ(reference_sad, exp_sad[block]) << "block " << block;
}
}
+
+ void SpeedSAD() {
+ int test_count = 2000000;
+ unsigned int exp_sad[4];
+ while (test_count > 0) {
+ SADs(exp_sad);
+ test_count -= 1;
+ }
+ }
+};
+
+class SADSkipx4Test : public ::testing::WithParamInterface<SadMxNx4Param>,
+ public SADTestBase {
+ public:
+ SADSkipx4Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
+
+ protected:
+ void SADs(unsigned int *results) {
+ const uint8_t *references[] = { GetReference(0), GetReference(1),
+ GetReference(2), GetReference(3) };
+
+ ASM_REGISTER_STATE_CHECK(GET_PARAM(2)(
+ source_data_, source_stride_, references, reference_stride_, results));
+ }
+
+ void CheckSADs() {
+ unsigned int reference_sad, exp_sad[4];
+
+ SADs(exp_sad);
+ for (int block = 0; block < 4; ++block) {
+ reference_sad = ReferenceSADSkip(block);
+
+ EXPECT_EQ(reference_sad, exp_sad[block]) << "block " << block;
+ }
+ }
+
+ void SpeedSAD() {
+ int test_count = 2000000;
+ unsigned int exp_sad[4];
+ while (test_count > 0) {
+ SADs(exp_sad);
+ test_count -= 1;
+ }
+ }
};
class SADx4AvgTest : public ::testing::WithParamInterface<SadMxNx4AvgParam>,
@@ -412,6 +490,37 @@
}
};
+class SADSkipTest : public ::testing::WithParamInterface<SadMxNParam>,
+ public SADTestBase {
+ public:
+ SADSkipTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
+
+ protected:
+ unsigned int SAD(int block_idx) {
+ unsigned int ret;
+ const uint8_t *const reference = GetReference(block_idx);
+
+ ASM_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
+ reference, reference_stride_));
+ return ret;
+ }
+
+ void CheckSAD() {
+ const unsigned int reference_sad = ReferenceSADSkip(0);
+ const unsigned int exp_sad = SAD(0);
+
+ ASSERT_EQ(reference_sad, exp_sad);
+ }
+
+ void SpeedSAD() {
+ int test_count = 20000000;
+ while (test_count > 0) {
+ SAD(0);
+ test_count -= 1;
+ }
+ }
+};
+
class SADavgTest : public ::testing::WithParamInterface<SadMxNAvgParam>,
public SADTestBase {
public:
@@ -608,6 +717,62 @@
}
#endif
+TEST_P(SADSkipTest, MaxRef) {
+ FillConstant(source_data_, source_stride_, 0);
+ FillConstant(reference_data_, reference_stride_, mask_);
+ CheckSAD();
+}
+
+TEST_P(SADSkipTest, MaxSrc) {
+ FillConstant(source_data_, source_stride_, mask_);
+ FillConstant(reference_data_, reference_stride_, 0);
+ CheckSAD();
+}
+
+TEST_P(SADSkipTest, ShortRef) {
+ const int tmp_stride = reference_stride_;
+ reference_stride_ >>= 1;
+ FillRandom(source_data_, source_stride_);
+ FillRandom(reference_data_, reference_stride_);
+ CheckSAD();
+ reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADSkipTest, UnalignedRef) {
+ // The reference frame, but not the source frame, may be unaligned for
+ // certain types of searches.
+ const int tmp_stride = reference_stride_;
+ reference_stride_ -= 1;
+ FillRandom(source_data_, source_stride_);
+ FillRandom(reference_data_, reference_stride_);
+ CheckSAD();
+ reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADSkipTest, ShortSrc) {
+ const int tmp_stride = source_stride_;
+ source_stride_ >>= 1;
+ int test_count = 2000;
+ while (test_count > 0) {
+ FillRandom(source_data_, source_stride_);
+ FillRandom(reference_data_, reference_stride_);
+ CheckSAD();
+ test_count -= 1;
+ }
+ source_stride_ = tmp_stride;
+}
+
+#if SPEED_TEST
+TEST_P(SADSkipTest, Speed) {
+ const int tmp_stride = source_stride_;
+ source_stride_ >>= 1;
+ FillRandom(source_data_, source_stride_);
+ FillRandom(reference_data_, reference_stride_);
+ SpeedSAD();
+ source_stride_ = tmp_stride;
+}
+#endif
+
TEST_P(SADavgTest, MaxRef) {
FillConstant(source_data_, source_stride_, 0);
FillConstant(reference_data_, reference_stride_, mask_);
@@ -855,6 +1020,101 @@
source_data_ = tmp_source_data;
}
+#if SPEED_TEST
+TEST_P(SADx4Test, Speed) {
+ FillRandom(source_data_, source_stride_);
+ FillRandom(GetReference(0), reference_stride_);
+ FillRandom(GetReference(1), reference_stride_);
+ FillRandom(GetReference(2), reference_stride_);
+ FillRandom(GetReference(3), reference_stride_);
+ SpeedSAD();
+}
+#endif
+
+// SADSkipx4
+TEST_P(SADSkipx4Test, MaxRef) {
+ FillConstant(source_data_, source_stride_, 0);
+ FillConstant(GetReference(0), reference_stride_, mask_);
+ FillConstant(GetReference(1), reference_stride_, mask_);
+ FillConstant(GetReference(2), reference_stride_, mask_);
+ FillConstant(GetReference(3), reference_stride_, mask_);
+ CheckSADs();
+}
+
+TEST_P(SADSkipx4Test, MaxSrc) {
+ FillConstant(source_data_, source_stride_, mask_);
+ FillConstant(GetReference(0), reference_stride_, 0);
+ FillConstant(GetReference(1), reference_stride_, 0);
+ FillConstant(GetReference(2), reference_stride_, 0);
+ FillConstant(GetReference(3), reference_stride_, 0);
+ CheckSADs();
+}
+
+TEST_P(SADSkipx4Test, ShortRef) {
+ int tmp_stride = reference_stride_;
+ reference_stride_ >>= 1;
+ FillRandom(source_data_, source_stride_);
+ FillRandom(GetReference(0), reference_stride_);
+ FillRandom(GetReference(1), reference_stride_);
+ FillRandom(GetReference(2), reference_stride_);
+ FillRandom(GetReference(3), reference_stride_);
+ CheckSADs();
+ reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADSkipx4Test, UnalignedRef) {
+ // The reference frame, but not the source frame, may be unaligned for
+ // certain types of searches.
+ int tmp_stride = reference_stride_;
+ reference_stride_ -= 1;
+ FillRandom(source_data_, source_stride_);
+ FillRandom(GetReference(0), reference_stride_);
+ FillRandom(GetReference(1), reference_stride_);
+ FillRandom(GetReference(2), reference_stride_);
+ FillRandom(GetReference(3), reference_stride_);
+ CheckSADs();
+ reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADSkipx4Test, ShortSrc) {
+ int tmp_stride = source_stride_;
+ source_stride_ >>= 1;
+ int test_count = 1000;
+ while (test_count > 0) {
+ FillRandom(source_data_, source_stride_);
+ FillRandom(GetReference(0), reference_stride_);
+ FillRandom(GetReference(1), reference_stride_);
+ FillRandom(GetReference(2), reference_stride_);
+ FillRandom(GetReference(3), reference_stride_);
+ CheckSADs();
+ test_count -= 1;
+ }
+ source_stride_ = tmp_stride;
+}
+
+TEST_P(SADSkipx4Test, SrcAlignedByWidth) {
+ uint8_t *tmp_source_data = source_data_;
+ source_data_ += width_;
+ FillRandom(source_data_, source_stride_);
+ FillRandom(GetReference(0), reference_stride_);
+ FillRandom(GetReference(1), reference_stride_);
+ FillRandom(GetReference(2), reference_stride_);
+ FillRandom(GetReference(3), reference_stride_);
+ CheckSADs();
+ source_data_ = tmp_source_data;
+}
+
+#if SPEED_TEST
+TEST_P(SADSkipx4Test, Speed) {
+ FillRandom(source_data_, source_stride_);
+ FillRandom(GetReference(0), reference_stride_);
+ FillRandom(GetReference(1), reference_stride_);
+ FillRandom(GetReference(2), reference_stride_);
+ FillRandom(GetReference(3), reference_stride_);
+ SpeedSAD();
+}
+#endif
+
using std::make_tuple;
#if SPEED_TEST
@@ -1022,6 +1282,103 @@
};
INSTANTIATE_TEST_SUITE_P(C, SADTest, ::testing::ValuesIn(c_tests));
+const SadSkipMxNParam skip_c_tests[] = {
+ make_tuple(128, 128, &aom_sad_skip_128x128_c, -1),
+ make_tuple(128, 64, &aom_sad_skip_128x64_c, -1),
+ make_tuple(64, 128, &aom_sad_skip_64x128_c, -1),
+ make_tuple(64, 64, &aom_sad_skip_64x64_c, -1),
+ make_tuple(64, 32, &aom_sad_skip_64x32_c, -1),
+ make_tuple(32, 64, &aom_sad_skip_32x64_c, -1),
+ make_tuple(32, 32, &aom_sad_skip_32x32_c, -1),
+ make_tuple(32, 16, &aom_sad_skip_32x16_c, -1),
+ make_tuple(16, 32, &aom_sad_skip_16x32_c, -1),
+ make_tuple(16, 16, &aom_sad_skip_16x16_c, -1),
+ make_tuple(16, 8, &aom_sad_skip_16x8_c, -1),
+ make_tuple(8, 16, &aom_sad_skip_8x16_c, -1),
+ make_tuple(8, 8, &aom_sad_skip_8x8_c, -1),
+ make_tuple(8, 4, &aom_sad_skip_8x4_c, -1),
+ make_tuple(4, 8, &aom_sad_skip_4x8_c, -1),
+ make_tuple(4, 4, &aom_sad_skip_4x4_c, -1),
+ make_tuple(64, 16, &aom_sad_skip_64x16_c, -1),
+ make_tuple(16, 64, &aom_sad_skip_16x64_c, -1),
+ make_tuple(32, 8, &aom_sad_skip_32x8_c, -1),
+ make_tuple(8, 32, &aom_sad_skip_8x32_c, -1),
+ make_tuple(16, 4, &aom_sad_skip_16x4_c, -1),
+ make_tuple(4, 16, &aom_sad_skip_4x16_c, -1),
+
+#if CONFIG_AV1_HIGHBITDEPTH
+ make_tuple(128, 128, &aom_highbd_sad_skip_128x128_c, 8),
+ make_tuple(128, 64, &aom_highbd_sad_skip_128x64_c, 8),
+ make_tuple(64, 128, &aom_highbd_sad_skip_64x128_c, 8),
+ make_tuple(64, 64, &aom_highbd_sad_skip_64x64_c, 8),
+ make_tuple(64, 32, &aom_highbd_sad_skip_64x32_c, 8),
+ make_tuple(32, 64, &aom_highbd_sad_skip_32x64_c, 8),
+ make_tuple(32, 32, &aom_highbd_sad_skip_32x32_c, 8),
+ make_tuple(32, 16, &aom_highbd_sad_skip_32x16_c, 8),
+ make_tuple(16, 32, &aom_highbd_sad_skip_16x32_c, 8),
+ make_tuple(16, 16, &aom_highbd_sad_skip_16x16_c, 8),
+ make_tuple(16, 8, &aom_highbd_sad_skip_16x8_c, 8),
+ make_tuple(8, 16, &aom_highbd_sad_skip_8x16_c, 8),
+ make_tuple(8, 8, &aom_highbd_sad_skip_8x8_c, 8),
+ make_tuple(8, 4, &aom_highbd_sad_skip_8x4_c, 8),
+ make_tuple(4, 8, &aom_highbd_sad_skip_4x8_c, 8),
+ make_tuple(4, 4, &aom_highbd_sad_skip_4x4_c, 8),
+ make_tuple(64, 16, &aom_highbd_sad_skip_64x16_c, 8),
+ make_tuple(16, 64, &aom_highbd_sad_skip_16x64_c, 8),
+ make_tuple(32, 8, &aom_highbd_sad_skip_32x8_c, 8),
+ make_tuple(8, 32, &aom_highbd_sad_skip_8x32_c, 8),
+ make_tuple(16, 4, &aom_highbd_sad_skip_16x4_c, 8),
+ make_tuple(4, 16, &aom_highbd_sad_skip_4x16_c, 8),
+
+ make_tuple(128, 128, &aom_highbd_sad_skip_128x128_c, 10),
+ make_tuple(128, 64, &aom_highbd_sad_skip_128x64_c, 10),
+ make_tuple(64, 128, &aom_highbd_sad_skip_64x128_c, 10),
+ make_tuple(64, 64, &aom_highbd_sad_skip_64x64_c, 10),
+ make_tuple(64, 32, &aom_highbd_sad_skip_64x32_c, 10),
+ make_tuple(32, 64, &aom_highbd_sad_skip_32x64_c, 10),
+ make_tuple(32, 32, &aom_highbd_sad_skip_32x32_c, 10),
+ make_tuple(32, 16, &aom_highbd_sad_skip_32x16_c, 10),
+ make_tuple(16, 32, &aom_highbd_sad_skip_16x32_c, 10),
+ make_tuple(16, 16, &aom_highbd_sad_skip_16x16_c, 10),
+ make_tuple(16, 8, &aom_highbd_sad_skip_16x8_c, 10),
+ make_tuple(8, 16, &aom_highbd_sad_skip_8x16_c, 10),
+ make_tuple(8, 8, &aom_highbd_sad_skip_8x8_c, 10),
+ make_tuple(8, 4, &aom_highbd_sad_skip_8x4_c, 10),
+ make_tuple(4, 8, &aom_highbd_sad_skip_4x8_c, 10),
+ make_tuple(4, 4, &aom_highbd_sad_skip_4x4_c, 10),
+ make_tuple(64, 16, &aom_highbd_sad_skip_64x16_c, 10),
+ make_tuple(16, 64, &aom_highbd_sad_skip_16x64_c, 10),
+ make_tuple(32, 8, &aom_highbd_sad_skip_32x8_c, 10),
+ make_tuple(8, 32, &aom_highbd_sad_skip_8x32_c, 10),
+ make_tuple(16, 4, &aom_highbd_sad_skip_16x4_c, 10),
+ make_tuple(4, 16, &aom_highbd_sad_skip_4x16_c, 10),
+
+ make_tuple(128, 128, &aom_highbd_sad_skip_128x128_c, 12),
+ make_tuple(128, 64, &aom_highbd_sad_skip_128x64_c, 12),
+ make_tuple(64, 128, &aom_highbd_sad_skip_64x128_c, 12),
+ make_tuple(64, 64, &aom_highbd_sad_skip_64x64_c, 12),
+ make_tuple(64, 32, &aom_highbd_sad_skip_64x32_c, 12),
+ make_tuple(32, 64, &aom_highbd_sad_skip_32x64_c, 12),
+ make_tuple(32, 32, &aom_highbd_sad_skip_32x32_c, 12),
+ make_tuple(32, 16, &aom_highbd_sad_skip_32x16_c, 12),
+ make_tuple(16, 32, &aom_highbd_sad_skip_16x32_c, 12),
+ make_tuple(16, 16, &aom_highbd_sad_skip_16x16_c, 12),
+ make_tuple(16, 8, &aom_highbd_sad_skip_16x8_c, 12),
+ make_tuple(8, 16, &aom_highbd_sad_skip_8x16_c, 12),
+ make_tuple(8, 8, &aom_highbd_sad_skip_8x8_c, 12),
+ make_tuple(8, 4, &aom_highbd_sad_skip_8x4_c, 12),
+ make_tuple(4, 8, &aom_highbd_sad_skip_4x8_c, 12),
+ make_tuple(4, 4, &aom_highbd_sad_skip_4x4_c, 12),
+ make_tuple(64, 16, &aom_highbd_sad_skip_64x16_c, 12),
+ make_tuple(16, 64, &aom_highbd_sad_skip_16x64_c, 12),
+ make_tuple(32, 8, &aom_highbd_sad_skip_32x8_c, 12),
+ make_tuple(8, 32, &aom_highbd_sad_skip_8x32_c, 12),
+ make_tuple(16, 4, &aom_highbd_sad_skip_16x4_c, 12),
+ make_tuple(4, 16, &aom_highbd_sad_skip_4x16_c, 12),
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(C, SADSkipTest, ::testing::ValuesIn(skip_c_tests));
+
const SadMxNAvgParam avg_c_tests[] = {
make_tuple(128, 128, &aom_sad128x128_avg_c, -1),
make_tuple(128, 64, &aom_sad128x64_avg_c, -1),
@@ -1281,6 +1638,101 @@
};
INSTANTIATE_TEST_SUITE_P(C, SADx4Test, ::testing::ValuesIn(x4d_c_tests));
+const SadMxNx4Param skip_x4d_c_tests[] = {
+ make_tuple(128, 128, &aom_sad_skip_128x128x4d_c, -1),
+ make_tuple(128, 64, &aom_sad_skip_128x64x4d_c, -1),
+ make_tuple(64, 128, &aom_sad_skip_64x128x4d_c, -1),
+ make_tuple(64, 64, &aom_sad_skip_64x64x4d_c, -1),
+ make_tuple(64, 32, &aom_sad_skip_64x32x4d_c, -1),
+ make_tuple(32, 64, &aom_sad_skip_32x64x4d_c, -1),
+ make_tuple(32, 32, &aom_sad_skip_32x32x4d_c, -1),
+ make_tuple(32, 16, &aom_sad_skip_32x16x4d_c, -1),
+ make_tuple(16, 32, &aom_sad_skip_16x32x4d_c, -1),
+ make_tuple(16, 16, &aom_sad_skip_16x16x4d_c, -1),
+ make_tuple(16, 8, &aom_sad_skip_16x8x4d_c, -1),
+ make_tuple(8, 16, &aom_sad_skip_8x16x4d_c, -1),
+ make_tuple(8, 8, &aom_sad_skip_8x8x4d_c, -1),
+ make_tuple(4, 8, &aom_sad_skip_4x8x4d_c, -1),
+ make_tuple(64, 16, &aom_sad_skip_64x16x4d_c, -1),
+ make_tuple(16, 64, &aom_sad_skip_16x64x4d_c, -1),
+ make_tuple(32, 8, &aom_sad_skip_32x8x4d_c, -1),
+ make_tuple(8, 32, &aom_sad_skip_8x32x4d_c, -1),
+ make_tuple(4, 16, &aom_sad_skip_4x16x4d_c, -1),
+
+#if CONFIG_AV1_HIGHBITDEPTH
+ make_tuple(128, 128, &aom_highbd_sad_skip_128x128x4d_c, 8),
+ make_tuple(128, 64, &aom_highbd_sad_skip_128x64x4d_c, 8),
+ make_tuple(64, 128, &aom_highbd_sad_skip_64x128x4d_c, 8),
+ make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_c, 8),
+ make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_c, 8),
+ make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_c, 8),
+ make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_c, 8),
+ make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_c, 8),
+ make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_c, 8),
+ make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_c, 8),
+ make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_c, 8),
+ make_tuple(8, 16, &aom_highbd_sad_skip_8x16x4d_c, 8),
+ make_tuple(8, 8, &aom_highbd_sad_skip_8x8x4d_c, 8),
+ make_tuple(8, 4, &aom_highbd_sad_skip_8x4x4d_c, 8),
+ make_tuple(4, 8, &aom_highbd_sad_skip_4x8x4d_c, 8),
+ make_tuple(4, 4, &aom_highbd_sad_skip_4x4x4d_c, 8),
+ make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_c, 8),
+ make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_c, 8),
+ make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_c, 8),
+ make_tuple(8, 32, &aom_highbd_sad_skip_8x32x4d_c, 8),
+ make_tuple(16, 4, &aom_highbd_sad_skip_16x4x4d_c, 8),
+ make_tuple(4, 16, &aom_highbd_sad_skip_4x16x4d_c, 8),
+
+ make_tuple(128, 128, &aom_highbd_sad_skip_128x128x4d_c, 10),
+ make_tuple(128, 64, &aom_highbd_sad_skip_128x64x4d_c, 10),
+ make_tuple(64, 128, &aom_highbd_sad_skip_64x128x4d_c, 10),
+ make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_c, 10),
+ make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_c, 10),
+ make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_c, 10),
+ make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_c, 10),
+ make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_c, 10),
+ make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_c, 10),
+ make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_c, 10),
+ make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_c, 10),
+ make_tuple(8, 16, &aom_highbd_sad_skip_8x16x4d_c, 10),
+ make_tuple(8, 8, &aom_highbd_sad_skip_8x8x4d_c, 10),
+ make_tuple(8, 4, &aom_highbd_sad_skip_8x4x4d_c, 10),
+ make_tuple(4, 8, &aom_highbd_sad_skip_4x8x4d_c, 10),
+ make_tuple(4, 4, &aom_highbd_sad_skip_4x4x4d_c, 10),
+ make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_c, 10),
+ make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_c, 10),
+ make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_c, 10),
+ make_tuple(8, 32, &aom_highbd_sad_skip_8x32x4d_c, 10),
+ make_tuple(16, 4, &aom_highbd_sad_skip_16x4x4d_c, 10),
+ make_tuple(4, 16, &aom_highbd_sad_skip_4x16x4d_c, 10),
+
+ make_tuple(128, 128, &aom_highbd_sad_skip_128x128x4d_c, 12),
+ make_tuple(128, 64, &aom_highbd_sad_skip_128x64x4d_c, 12),
+ make_tuple(64, 128, &aom_highbd_sad_skip_64x128x4d_c, 12),
+ make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_c, 12),
+ make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_c, 12),
+ make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_c, 12),
+ make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_c, 12),
+ make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_c, 12),
+ make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_c, 12),
+ make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_c, 12),
+ make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_c, 12),
+ make_tuple(8, 16, &aom_highbd_sad_skip_8x16x4d_c, 12),
+ make_tuple(8, 8, &aom_highbd_sad_skip_8x8x4d_c, 12),
+ make_tuple(8, 4, &aom_highbd_sad_skip_8x4x4d_c, 12),
+ make_tuple(4, 8, &aom_highbd_sad_skip_4x8x4d_c, 12),
+ make_tuple(4, 4, &aom_highbd_sad_skip_4x4x4d_c, 12),
+ make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_c, 12),
+ make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_c, 12),
+ make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_c, 12),
+ make_tuple(8, 32, &aom_highbd_sad_skip_8x32x4d_c, 12),
+ make_tuple(16, 4, &aom_highbd_sad_skip_16x4x4d_c, 12),
+ make_tuple(4, 16, &aom_highbd_sad_skip_4x16x4d_c, 12),
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(C, SADSkipx4Test,
+ ::testing::ValuesIn(skip_x4d_c_tests));
+
const SadMxNx4AvgParam x4d_avg_c_tests[] = {
make_tuple(128, 128, &aom_sad128x128x4d_avg_c, -1),
make_tuple(128, 64, &aom_sad128x64x4d_avg_c, -1),
@@ -1424,6 +1876,83 @@
};
INSTANTIATE_TEST_SUITE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests));
+const SadSkipMxNParam skip_sse2_tests[] = {
+ make_tuple(128, 128, &aom_sad_skip_128x128_sse2, -1),
+ make_tuple(128, 64, &aom_sad_skip_128x64_sse2, -1),
+ make_tuple(64, 128, &aom_sad_skip_64x128_sse2, -1),
+ make_tuple(64, 64, &aom_sad_skip_64x64_sse2, -1),
+ make_tuple(64, 32, &aom_sad_skip_64x32_sse2, -1),
+ make_tuple(32, 64, &aom_sad_skip_32x64_sse2, -1),
+ make_tuple(32, 32, &aom_sad_skip_32x32_sse2, -1),
+ make_tuple(32, 16, &aom_sad_skip_32x16_sse2, -1),
+ make_tuple(16, 32, &aom_sad_skip_16x32_sse2, -1),
+ make_tuple(16, 16, &aom_sad_skip_16x16_sse2, -1),
+ make_tuple(16, 8, &aom_sad_skip_16x8_sse2, -1),
+ make_tuple(8, 16, &aom_sad_skip_8x16_sse2, -1),
+ make_tuple(8, 8, &aom_sad_skip_8x8_sse2, -1),
+ make_tuple(4, 8, &aom_sad_skip_4x8_sse2, -1),
+ make_tuple(64, 16, &aom_sad_skip_64x16_sse2, -1),
+ make_tuple(16, 64, &aom_sad_skip_16x64_sse2, -1),
+ make_tuple(32, 8, &aom_sad_skip_32x8_sse2, -1),
+ make_tuple(8, 32, &aom_sad_skip_8x32_sse2, -1),
+ make_tuple(4, 16, &aom_sad_skip_4x16_sse2, -1),
+
+#if CONFIG_AV1_HIGHBITDEPTH
+ make_tuple(64, 64, &aom_highbd_sad_skip_64x64_sse2, 8),
+ make_tuple(64, 32, &aom_highbd_sad_skip_64x32_sse2, 8),
+ make_tuple(32, 64, &aom_highbd_sad_skip_32x64_sse2, 8),
+ make_tuple(32, 32, &aom_highbd_sad_skip_32x32_sse2, 8),
+ make_tuple(32, 16, &aom_highbd_sad_skip_32x16_sse2, 8),
+ make_tuple(16, 32, &aom_highbd_sad_skip_16x32_sse2, 8),
+ make_tuple(16, 16, &aom_highbd_sad_skip_16x16_sse2, 8),
+ make_tuple(16, 8, &aom_highbd_sad_skip_16x8_sse2, 8),
+ make_tuple(8, 16, &aom_highbd_sad_skip_8x16_sse2, 8),
+ make_tuple(8, 8, &aom_highbd_sad_skip_8x8_sse2, 8),
+ make_tuple(4, 8, &aom_highbd_sad_skip_4x8_sse2, 8),
+ make_tuple(64, 16, &aom_highbd_sad_skip_64x16_sse2, 8),
+ make_tuple(16, 64, &aom_highbd_sad_skip_16x64_sse2, 8),
+ make_tuple(32, 8, &aom_highbd_sad_skip_32x8_sse2, 8),
+ make_tuple(8, 32, &aom_highbd_sad_skip_8x32_sse2, 8),
+ make_tuple(4, 16, &aom_highbd_sad_skip_4x16_sse2, 8),
+
+ make_tuple(64, 64, &aom_highbd_sad_skip_64x64_sse2, 10),
+ make_tuple(64, 32, &aom_highbd_sad_skip_64x32_sse2, 10),
+ make_tuple(32, 64, &aom_highbd_sad_skip_32x64_sse2, 10),
+ make_tuple(32, 32, &aom_highbd_sad_skip_32x32_sse2, 10),
+ make_tuple(32, 16, &aom_highbd_sad_skip_32x16_sse2, 10),
+ make_tuple(16, 32, &aom_highbd_sad_skip_16x32_sse2, 10),
+ make_tuple(16, 16, &aom_highbd_sad_skip_16x16_sse2, 10),
+ make_tuple(16, 8, &aom_highbd_sad_skip_16x8_sse2, 10),
+ make_tuple(8, 16, &aom_highbd_sad_skip_8x16_sse2, 10),
+ make_tuple(8, 8, &aom_highbd_sad_skip_8x8_sse2, 10),
+ make_tuple(4, 8, &aom_highbd_sad_skip_4x8_sse2, 10),
+ make_tuple(64, 16, &aom_highbd_sad_skip_64x16_sse2, 10),
+ make_tuple(16, 64, &aom_highbd_sad_skip_16x64_sse2, 10),
+ make_tuple(32, 8, &aom_highbd_sad_skip_32x8_sse2, 10),
+ make_tuple(8, 32, &aom_highbd_sad_skip_8x32_sse2, 10),
+ make_tuple(4, 16, &aom_highbd_sad_skip_4x16_sse2, 10),
+
+ make_tuple(64, 64, &aom_highbd_sad_skip_64x64_sse2, 12),
+ make_tuple(64, 32, &aom_highbd_sad_skip_64x32_sse2, 12),
+ make_tuple(32, 64, &aom_highbd_sad_skip_32x64_sse2, 12),
+ make_tuple(32, 32, &aom_highbd_sad_skip_32x32_sse2, 12),
+ make_tuple(32, 16, &aom_highbd_sad_skip_32x16_sse2, 12),
+ make_tuple(16, 32, &aom_highbd_sad_skip_16x32_sse2, 12),
+ make_tuple(16, 16, &aom_highbd_sad_skip_16x16_sse2, 12),
+ make_tuple(16, 8, &aom_highbd_sad_skip_16x8_sse2, 12),
+ make_tuple(8, 16, &aom_highbd_sad_skip_8x16_sse2, 12),
+ make_tuple(8, 8, &aom_highbd_sad_skip_8x8_sse2, 12),
+ make_tuple(4, 8, &aom_highbd_sad_skip_4x8_sse2, 12),
+ make_tuple(64, 16, &aom_highbd_sad_skip_64x16_sse2, 12),
+ make_tuple(16, 64, &aom_highbd_sad_skip_16x64_sse2, 12),
+ make_tuple(32, 8, &aom_highbd_sad_skip_32x8_sse2, 12),
+ make_tuple(8, 32, &aom_highbd_sad_skip_8x32_sse2, 12),
+ make_tuple(4, 16, &aom_highbd_sad_skip_4x16_sse2, 12),
+#endif // CONFIG_AV1_HIGHBITDEPTH
+};
+INSTANTIATE_TEST_SUITE_P(SSE2, SADSkipTest,
+ ::testing::ValuesIn(skip_sse2_tests));
+
const SadMxNAvgParam avg_sse2_tests[] = {
make_tuple(128, 128, &aom_sad128x128_avg_sse2, -1),
make_tuple(128, 64, &aom_sad128x64_avg_sse2, -1),
@@ -1606,6 +2135,84 @@
};
INSTANTIATE_TEST_SUITE_P(SSE2, SADx4Test, ::testing::ValuesIn(x4d_sse2_tests));
+const SadSkipMxNx4Param skip_x4d_sse2_tests[] = {
+ make_tuple(128, 128, &aom_sad_skip_128x128x4d_sse2, -1),
+ make_tuple(128, 64, &aom_sad_skip_128x64x4d_sse2, -1),
+ make_tuple(64, 128, &aom_sad_skip_64x128x4d_sse2, -1),
+ make_tuple(64, 64, &aom_sad_skip_64x64x4d_sse2, -1),
+ make_tuple(64, 32, &aom_sad_skip_64x32x4d_sse2, -1),
+ make_tuple(32, 64, &aom_sad_skip_32x64x4d_sse2, -1),
+ make_tuple(32, 32, &aom_sad_skip_32x32x4d_sse2, -1),
+ make_tuple(32, 16, &aom_sad_skip_32x16x4d_sse2, -1),
+ make_tuple(16, 32, &aom_sad_skip_16x32x4d_sse2, -1),
+ make_tuple(16, 16, &aom_sad_skip_16x16x4d_sse2, -1),
+ make_tuple(16, 8, &aom_sad_skip_16x8x4d_sse2, -1),
+ make_tuple(8, 16, &aom_sad_skip_8x16x4d_sse2, -1),
+ make_tuple(8, 8, &aom_sad_skip_8x8x4d_sse2, -1),
+ make_tuple(4, 8, &aom_sad_skip_4x8x4d_sse2, -1),
+ make_tuple(64, 16, &aom_sad_skip_64x16x4d_sse2, -1),
+ make_tuple(16, 64, &aom_sad_skip_16x64x4d_sse2, -1),
+ make_tuple(32, 8, &aom_sad_skip_32x8x4d_sse2, -1),
+ make_tuple(8, 32, &aom_sad_skip_8x32x4d_sse2, -1),
+ make_tuple(4, 16, &aom_sad_skip_4x16x4d_sse2, -1),
+
+#if CONFIG_AV1_HIGHBITDEPTH
+ make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_sse2, 8),
+ make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_sse2, 8),
+ make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_sse2, 8),
+ make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_sse2, 8),
+ make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_sse2, 8),
+ make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_sse2, 8),
+ make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_sse2, 8),
+ make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_sse2, 8),
+ make_tuple(8, 16, &aom_highbd_sad_skip_8x16x4d_sse2, 8),
+ make_tuple(8, 8, &aom_highbd_sad_skip_8x8x4d_sse2, 8),
+ make_tuple(4, 8, &aom_highbd_sad_skip_4x8x4d_sse2, 8),
+ make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_sse2, 8),
+ make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_sse2, 8),
+ make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_sse2, 8),
+ make_tuple(8, 32, &aom_highbd_sad_skip_8x32x4d_sse2, 8),
+ make_tuple(4, 16, &aom_highbd_sad_skip_4x16x4d_sse2, 8),
+
+ make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_sse2, 10),
+ make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_sse2, 10),
+ make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_sse2, 10),
+ make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_sse2, 10),
+ make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_sse2, 10),
+ make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_sse2, 10),
+ make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_sse2, 10),
+ make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_sse2, 10),
+ make_tuple(8, 16, &aom_highbd_sad_skip_8x16x4d_sse2, 10),
+ make_tuple(8, 8, &aom_highbd_sad_skip_8x8x4d_sse2, 10),
+ make_tuple(4, 8, &aom_highbd_sad_skip_4x8x4d_sse2, 10),
+ make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_sse2, 10),
+ make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_sse2, 10),
+ make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_sse2, 10),
+ make_tuple(8, 32, &aom_highbd_sad_skip_8x32x4d_sse2, 10),
+ make_tuple(4, 16, &aom_highbd_sad_skip_4x16x4d_sse2, 10),
+
+ make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_sse2, 12),
+ make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_sse2, 12),
+ make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_sse2, 12),
+ make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_sse2, 12),
+ make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_sse2, 12),
+ make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_sse2, 12),
+ make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_sse2, 12),
+ make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_sse2, 12),
+ make_tuple(8, 16, &aom_highbd_sad_skip_8x16x4d_sse2, 12),
+ make_tuple(8, 8, &aom_highbd_sad_skip_8x8x4d_sse2, 12),
+ make_tuple(4, 8, &aom_highbd_sad_skip_4x8x4d_sse2, 12),
+ make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_sse2, 12),
+ make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_sse2, 12),
+ make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_sse2, 12),
+ make_tuple(8, 32, &aom_highbd_sad_skip_8x32x4d_sse2, 12),
+ make_tuple(4, 16, &aom_highbd_sad_skip_4x16x4d_sse2, 12),
+
+#endif // CONFIG_AV1_HIGHBITDEPTH
+};
+INSTANTIATE_TEST_SUITE_P(SSE2, SADSkipx4Test,
+ ::testing::ValuesIn(skip_x4d_sse2_tests));
+
const SadMxNx4AvgParam x4d_avg_sse2_tests[] = {
make_tuple(128, 128, &aom_sad128x128x4d_avg_sse2, -1),
make_tuple(128, 64, &aom_sad128x64x4d_avg_sse2, -1),
@@ -1802,6 +2409,59 @@
};
INSTANTIATE_TEST_SUITE_P(AVX2, SADTest, ::testing::ValuesIn(avx2_tests));
+const SadSkipMxNParam skip_avx2_tests[] = {
+ make_tuple(128, 128, &aom_sad_skip_128x128_avx2, -1),
+ make_tuple(128, 64, &aom_sad_skip_128x64_avx2, -1),
+ make_tuple(64, 128, &aom_sad_skip_64x128_avx2, -1),
+ make_tuple(64, 64, &aom_sad_skip_64x64_avx2, -1),
+ make_tuple(64, 32, &aom_sad_skip_64x32_avx2, -1),
+ make_tuple(32, 64, &aom_sad_skip_32x64_avx2, -1),
+ make_tuple(32, 32, &aom_sad_skip_32x32_avx2, -1),
+ make_tuple(32, 16, &aom_sad_skip_32x16_avx2, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+ make_tuple(128, 128, &aom_highbd_sad_skip_128x128_avx2, 8),
+ make_tuple(128, 64, &aom_highbd_sad_skip_128x64_avx2, 8),
+ make_tuple(64, 128, &aom_highbd_sad_skip_64x128_avx2, 8),
+ make_tuple(64, 64, &aom_highbd_sad_skip_64x64_avx2, 8),
+ make_tuple(64, 32, &aom_highbd_sad_skip_64x32_avx2, 8),
+ make_tuple(32, 64, &aom_highbd_sad_skip_32x64_avx2, 8),
+ make_tuple(32, 32, &aom_highbd_sad_skip_32x32_avx2, 8),
+ make_tuple(32, 16, &aom_highbd_sad_skip_32x16_avx2, 8),
+ make_tuple(16, 64, &aom_highbd_sad_skip_16x64_avx2, 8),
+ make_tuple(16, 32, &aom_highbd_sad_skip_16x32_avx2, 8),
+ make_tuple(16, 16, &aom_highbd_sad_skip_16x16_avx2, 8),
+ make_tuple(16, 8, &aom_highbd_sad_skip_16x8_avx2, 8),
+
+ make_tuple(128, 128, &aom_highbd_sad_skip_128x128_avx2, 10),
+ make_tuple(128, 64, &aom_highbd_sad_skip_128x64_avx2, 10),
+ make_tuple(64, 128, &aom_highbd_sad_skip_64x128_avx2, 10),
+ make_tuple(64, 64, &aom_highbd_sad_skip_64x64_avx2, 10),
+ make_tuple(64, 32, &aom_highbd_sad_skip_64x32_avx2, 10),
+ make_tuple(32, 64, &aom_highbd_sad_skip_32x64_avx2, 10),
+ make_tuple(32, 32, &aom_highbd_sad_skip_32x32_avx2, 10),
+ make_tuple(32, 16, &aom_highbd_sad_skip_32x16_avx2, 10),
+ make_tuple(16, 64, &aom_highbd_sad_skip_16x64_avx2, 10),
+ make_tuple(16, 32, &aom_highbd_sad_skip_16x32_avx2, 10),
+ make_tuple(16, 16, &aom_highbd_sad_skip_16x16_avx2, 10),
+ make_tuple(16, 8, &aom_highbd_sad_skip_16x8_avx2, 10),
+
+ make_tuple(128, 128, &aom_highbd_sad_skip_128x128_avx2, 12),
+ make_tuple(128, 64, &aom_highbd_sad_skip_128x64_avx2, 12),
+ make_tuple(64, 128, &aom_highbd_sad_skip_64x128_avx2, 12),
+ make_tuple(64, 64, &aom_highbd_sad_skip_64x64_avx2, 12),
+ make_tuple(64, 32, &aom_highbd_sad_skip_64x32_avx2, 12),
+ make_tuple(32, 64, &aom_highbd_sad_skip_32x64_avx2, 12),
+ make_tuple(32, 32, &aom_highbd_sad_skip_32x32_avx2, 12),
+ make_tuple(32, 16, &aom_highbd_sad_skip_32x16_avx2, 12),
+ make_tuple(16, 64, &aom_highbd_sad_skip_16x64_avx2, 12),
+ make_tuple(16, 32, &aom_highbd_sad_skip_16x32_avx2, 12),
+ make_tuple(16, 16, &aom_highbd_sad_skip_16x16_avx2, 12),
+ make_tuple(16, 8, &aom_highbd_sad_skip_16x8_avx2, 12),
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(AVX2, SADSkipTest,
+ ::testing::ValuesIn(skip_avx2_tests));
+
const SadMxNAvgParam avg_avx2_tests[] = {
make_tuple(64, 128, &aom_sad64x128_avg_avx2, -1),
make_tuple(128, 64, &aom_sad128x64_avg_avx2, -1),
@@ -1862,6 +2522,67 @@
};
INSTANTIATE_TEST_SUITE_P(AVX2, SADavgTest, ::testing::ValuesIn(avg_avx2_tests));
+const SadSkipMxNx4Param skip_x4d_avx2_tests[] = {
+ make_tuple(128, 128, &aom_sad_skip_128x128x4d_avx2, -1),
+ make_tuple(128, 64, &aom_sad_skip_128x64x4d_avx2, -1),
+ make_tuple(64, 128, &aom_sad_skip_64x128x4d_avx2, -1),
+ make_tuple(64, 64, &aom_sad_skip_64x64x4d_avx2, -1),
+ make_tuple(64, 32, &aom_sad_skip_64x32x4d_avx2, -1),
+ make_tuple(64, 16, &aom_sad_skip_64x16x4d_avx2, -1),
+ make_tuple(32, 64, &aom_sad_skip_32x64x4d_avx2, -1),
+ make_tuple(32, 32, &aom_sad_skip_32x32x4d_avx2, -1),
+ make_tuple(32, 16, &aom_sad_skip_32x16x4d_avx2, -1),
+ make_tuple(32, 8, &aom_sad_skip_32x8x4d_avx2, -1),
+#if CONFIG_AV1_HIGHBITDEPTH
+ make_tuple(128, 128, &aom_highbd_sad_skip_128x128x4d_avx2, 8),
+ make_tuple(128, 64, &aom_highbd_sad_skip_128x64x4d_avx2, 8),
+ make_tuple(64, 128, &aom_highbd_sad_skip_64x128x4d_avx2, 8),
+ make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_avx2, 8),
+ make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_avx2, 8),
+ make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_avx2, 8),
+ make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_avx2, 8),
+ make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_avx2, 8),
+ make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_avx2, 8),
+ make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_avx2, 8),
+ make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_avx2, 8),
+ make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_avx2, 8),
+ make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_avx2, 8),
+ make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_avx2, 8),
+
+ make_tuple(128, 128, &aom_highbd_sad_skip_128x128x4d_avx2, 10),
+ make_tuple(128, 64, &aom_highbd_sad_skip_128x64x4d_avx2, 10),
+ make_tuple(64, 128, &aom_highbd_sad_skip_64x128x4d_avx2, 10),
+ make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_avx2, 10),
+ make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_avx2, 10),
+ make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_avx2, 10),
+ make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_avx2, 10),
+ make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_avx2, 10),
+ make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_avx2, 10),
+ make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_avx2, 10),
+ make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_avx2, 10),
+ make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_avx2, 10),
+ make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_avx2, 10),
+ make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_avx2, 10),
+
+ make_tuple(128, 128, &aom_highbd_sad_skip_128x128x4d_avx2, 12),
+ make_tuple(128, 64, &aom_highbd_sad_skip_128x64x4d_avx2, 12),
+ make_tuple(64, 128, &aom_highbd_sad_skip_64x128x4d_avx2, 12),
+ make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_avx2, 12),
+ make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_avx2, 12),
+ make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_avx2, 12),
+ make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_avx2, 12),
+ make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_avx2, 12),
+ make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_avx2, 12),
+ make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_avx2, 12),
+ make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_avx2, 12),
+ make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_avx2, 12),
+ make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_avx2, 12),
+ make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_avx2, 12),
+#endif
+};
+INSTANTIATE_TEST_SUITE_P(AVX2, SADSkipx4Test,
+ ::testing::ValuesIn(skip_x4d_avx2_tests));
+
const SadMxNx4Param x4d_avx2_tests[] = {
make_tuple(32, 64, &aom_sad32x64x4d_avx2, -1),
make_tuple(32, 32, &aom_sad32x32x4d_avx2, -1),