Merge "ANS: Switch from PDFs to CDFs." into nextgenv2
diff --git a/test/convolve_test.cc b/test/convolve_test.cc
index 12022be..0e54c40 100644
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -28,7 +28,7 @@
namespace {
-static const unsigned int kMaxDimension = 64;
+static const unsigned int kMaxDimension = MAX_CU_SIZE;
typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
@@ -102,7 +102,7 @@
// = 23
// and filter_max_width = 16
//
- uint8_t intermediate_buffer[71 * kMaxDimension];
+ uint8_t intermediate_buffer[(kMaxDimension+8) * kMaxDimension];
const int intermediate_next_stride = 1 - intermediate_height * output_width;
// Horizontal pass (src -> transposed intermediate).
@@ -183,9 +183,9 @@
assert(output_width <= kMaxDimension);
assert(output_height <= kMaxDimension);
- filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
+ filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, kMaxDimension,
output_width, output_height);
- block2d_average_c(tmp, 64, dst_ptr, dst_stride,
+ block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride,
output_width, output_height);
}
@@ -214,7 +214,7 @@
* = 23
* and filter_max_width = 16
*/
- uint16_t intermediate_buffer[71 * kMaxDimension];
+ uint16_t intermediate_buffer[(kMaxDimension+8) * kMaxDimension];
const int intermediate_next_stride = 1 - intermediate_height * output_width;
// Horizontal pass (src -> transposed intermediate).
@@ -302,9 +302,10 @@
assert(output_width <= kMaxDimension);
assert(output_height <= kMaxDimension);
- highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
+ highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
+ tmp, kMaxDimension,
output_width, output_height, bd);
- highbd_block2d_average_c(tmp, 64, dst_ptr, dst_stride,
+ highbd_block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride,
output_width, output_height);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -351,7 +352,7 @@
protected:
static const int kDataAlignment = 16;
- static const int kOuterBlockSize = 256;
+ static const int kOuterBlockSize = 4*kMaxDimension;
static const int kInputStride = kOuterBlockSize;
static const int kOutputStride = kOuterBlockSize;
static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize;
@@ -414,7 +415,8 @@
void CopyOutputToRef() {
memcpy(output_ref_, output_, kOutputBufferSize);
#if CONFIG_VP9_HIGHBITDEPTH
- memcpy(output16_ref_, output16_, kOutputBufferSize);
+ memcpy(output16_ref_, output16_,
+ kOutputBufferSize * sizeof(*output16_ref_));
#endif
}
@@ -426,41 +428,41 @@
}
uint8_t *input() const {
+ const int index = BorderTop() * kOuterBlockSize + BorderLeft();
#if CONFIG_VP9_HIGHBITDEPTH
if (UUT_->use_highbd_ == 0) {
- return input_ + BorderTop() * kOuterBlockSize + BorderLeft();
+ return input_ + index;
} else {
- return CONVERT_TO_BYTEPTR(input16_ + BorderTop() * kOuterBlockSize +
- BorderLeft());
+ return CONVERT_TO_BYTEPTR(input16_) + index;
}
#else
- return input_ + BorderTop() * kOuterBlockSize + BorderLeft();
+ return input_ + index;
#endif
}
uint8_t *output() const {
+ const int index = BorderTop() * kOuterBlockSize + BorderLeft();
#if CONFIG_VP9_HIGHBITDEPTH
if (UUT_->use_highbd_ == 0) {
- return output_ + BorderTop() * kOuterBlockSize + BorderLeft();
+ return output_ + index;
} else {
- return CONVERT_TO_BYTEPTR(output16_ + BorderTop() * kOuterBlockSize +
- BorderLeft());
+ return CONVERT_TO_BYTEPTR(output16_ + index);
}
#else
- return output_ + BorderTop() * kOuterBlockSize + BorderLeft();
+ return output_ + index;
#endif
}
uint8_t *output_ref() const {
+ const int index = BorderTop() * kOuterBlockSize + BorderLeft();
#if CONFIG_VP9_HIGHBITDEPTH
if (UUT_->use_highbd_ == 0) {
- return output_ref_ + BorderTop() * kOuterBlockSize + BorderLeft();
+ return output_ref_ + index;
} else {
- return CONVERT_TO_BYTEPTR(output16_ref_ + BorderTop() * kOuterBlockSize +
- BorderLeft());
+ return CONVERT_TO_BYTEPTR(output16_ref_ + index);
}
#else
- return output_ref_ + BorderTop() * kOuterBlockSize + BorderLeft();
+ return output_ref_ + index;
#endif
}
@@ -1035,6 +1037,11 @@
wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8,
wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8);
INSTANTIATE_TEST_CASE_P(C_8, ConvolveTest, ::testing::Values(
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 64, &convolve8_c),
+ make_tuple(64, 128, &convolve8_c),
+ make_tuple(128, 128, &convolve8_c),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(4, 4, &convolve8_c),
make_tuple(8, 4, &convolve8_c),
make_tuple(4, 8, &convolve8_c),
@@ -1057,6 +1064,11 @@
wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10,
wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 10);
INSTANTIATE_TEST_CASE_P(C_10, ConvolveTest, ::testing::Values(
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 64, &convolve10_c),
+ make_tuple(64, 128, &convolve10_c),
+ make_tuple(128, 128, &convolve10_c),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(4, 4, &convolve10_c),
make_tuple(8, 4, &convolve10_c),
make_tuple(4, 8, &convolve10_c),
@@ -1079,6 +1091,11 @@
wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12,
wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 12);
INSTANTIATE_TEST_CASE_P(C_12, ConvolveTest, ::testing::Values(
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 64, &convolve12_c),
+ make_tuple(64, 128, &convolve12_c),
+ make_tuple(128, 128, &convolve12_c),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(4, 4, &convolve12_c),
make_tuple(8, 4, &convolve12_c),
make_tuple(4, 8, &convolve12_c),
@@ -1105,6 +1122,11 @@
vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 64, &convolve8_c),
+ make_tuple(64, 128, &convolve8_c),
+ make_tuple(128, 128, &convolve8_c),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(4, 4, &convolve8_c),
make_tuple(8, 4, &convolve8_c),
make_tuple(4, 8, &convolve8_c),
@@ -1158,7 +1180,12 @@
wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
-INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
+INSTANTIATE_TEST_CASE_P(SSE2_8, ConvolveTest, ::testing::Values(
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 64, &convolve8_sse2),
+ make_tuple(64, 128, &convolve8_sse2),
+ make_tuple(128, 128, &convolve8_sse2),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(4, 4, &convolve8_sse2),
make_tuple(8, 4, &convolve8_sse2),
make_tuple(4, 8, &convolve8_sse2),
@@ -1171,7 +1198,13 @@
make_tuple(32, 32, &convolve8_sse2),
make_tuple(64, 32, &convolve8_sse2),
make_tuple(32, 64, &convolve8_sse2),
- make_tuple(64, 64, &convolve8_sse2),
+ make_tuple(64, 64, &convolve8_sse2)));
+INSTANTIATE_TEST_CASE_P(SSE2_10, ConvolveTest, ::testing::Values(
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 64, &convolve10_sse2),
+ make_tuple(64, 128, &convolve10_sse2),
+ make_tuple(128, 128, &convolve10_sse2),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(4, 4, &convolve10_sse2),
make_tuple(8, 4, &convolve10_sse2),
make_tuple(4, 8, &convolve10_sse2),
@@ -1184,7 +1217,13 @@
make_tuple(32, 32, &convolve10_sse2),
make_tuple(64, 32, &convolve10_sse2),
make_tuple(32, 64, &convolve10_sse2),
- make_tuple(64, 64, &convolve10_sse2),
+ make_tuple(64, 64, &convolve10_sse2)));
+INSTANTIATE_TEST_CASE_P(SSE2_12, ConvolveTest, ::testing::Values(
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 64, &convolve12_sse2),
+ make_tuple(64, 128, &convolve12_sse2),
+ make_tuple(128, 128, &convolve12_sse2),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(4, 4, &convolve12_sse2),
make_tuple(8, 4, &convolve12_sse2),
make_tuple(4, 8, &convolve12_sse2),
@@ -1213,6 +1252,11 @@
vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 64, &convolve8_sse2),
+ make_tuple(64, 128, &convolve8_sse2),
+ make_tuple(128, 128, &convolve8_sse2),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(4, 4, &convolve8_sse2),
make_tuple(8, 4, &convolve8_sse2),
make_tuple(4, 8, &convolve8_sse2),
@@ -1237,9 +1281,14 @@
vpx_convolve8_ssse3, vpx_convolve8_avg_ssse3,
vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
- vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
+ vpx_scaled_2d_ssse3, vpx_scaled_avg_2d_c, 0);
INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 64, &convolve8_ssse3),
+ make_tuple(64, 128, &convolve8_ssse3),
+ make_tuple(128, 128, &convolve8_ssse3),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(4, 4, &convolve8_ssse3),
make_tuple(8, 4, &convolve8_ssse3),
make_tuple(4, 8, &convolve8_ssse3),
@@ -1266,6 +1315,11 @@
vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 64, &convolve8_avx2),
+ make_tuple(64, 128, &convolve8_avx2),
+ make_tuple(128, 128, &convolve8_avx2),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(4, 4, &convolve8_avx2),
make_tuple(8, 4, &convolve8_avx2),
make_tuple(4, 8, &convolve8_avx2),
@@ -1281,7 +1335,8 @@
make_tuple(64, 64, &convolve8_avx2)));
#endif // HAVE_AVX2 && HAVE_SSSE3
-#if HAVE_NEON
+// TODO(any): Make NEON versions support 128x128 128x64 64x128 block sizes
+#if HAVE_NEON && !(CONFIG_VP10 && CONFIG_EXT_PARTITION)
#if HAVE_NEON_ASM
const ConvolveFunctions convolve8_neon(
vpx_convolve_copy_neon, vpx_convolve_avg_neon,
@@ -1303,6 +1358,11 @@
#endif // HAVE_NEON_ASM
INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 64, &convolve8_neon),
+ make_tuple(64, 128, &convolve8_neon),
+ make_tuple(128, 128, &convolve8_neon),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(4, 4, &convolve8_neon),
make_tuple(8, 4, &convolve8_neon),
make_tuple(4, 8, &convolve8_neon),
@@ -1318,7 +1378,8 @@
make_tuple(64, 64, &convolve8_neon)));
#endif // HAVE_NEON
-#if HAVE_DSPR2
+// TODO(any): Make DSPR2 versions support 128x128 128x64 64x128 block sizes
+#if HAVE_DSPR2 && !(CONFIG_VP10 && CONFIG_EXT_PARTITION)
const ConvolveFunctions convolve8_dspr2(
vpx_convolve_copy_dspr2, vpx_convolve_avg_dspr2,
vpx_convolve8_horiz_dspr2, vpx_convolve8_avg_horiz_dspr2,
@@ -1329,6 +1390,11 @@
vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values(
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 64, &convolve8_dspr2),
+ make_tuple(64, 128, &convolve8_dspr2),
+ make_tuple(128, 128, &convolve8_dspr2),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(4, 4, &convolve8_dspr2),
make_tuple(8, 4, &convolve8_dspr2),
make_tuple(4, 8, &convolve8_dspr2),
@@ -1344,7 +1410,8 @@
make_tuple(64, 64, &convolve8_dspr2)));
#endif
-#if HAVE_MSA
+// TODO(any): Make MSA versions support 128x128 128x64 64x128 block sizes
+#if HAVE_MSA && !(CONFIG_VP10 && CONFIG_EXT_PARTITION)
const ConvolveFunctions convolve8_msa(
vpx_convolve_copy_msa, vpx_convolve_avg_msa,
vpx_convolve8_horiz_msa, vpx_convolve8_avg_horiz_msa,
@@ -1355,6 +1422,11 @@
vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest, ::testing::Values(
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 64, &convolve8_msa),
+ make_tuple(64, 128, &convolve8_msa),
+ make_tuple(128, 128, &convolve8_msa),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(4, 4, &convolve8_msa),
make_tuple(8, 4, &convolve8_msa),
make_tuple(4, 8, &convolve8_msa),
diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc
index 0c91aee..59ce895 100644
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -19,8 +19,8 @@
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
+#include "test/transform_test_base.h"
#include "test/util.h"
-#include "vp9/common/vp9_entropy.h"
#include "vpx/vpx_codec.h"
#include "vpx/vpx_integer.h"
#include "vpx_ports/mem.h"
@@ -28,16 +28,16 @@
using libvpx_test::ACMRandom;
namespace {
-const int kNumCoeffs = 16;
typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
-typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
- int tx_type);
typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
int tx_type);
+using libvpx_test::FhtFunc;
-typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct4x4Param;
-typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht4x4Param;
+typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t, int>
+Dct4x4Param;
+typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t, int>
+Ht4x4Param;
void fdct4x4_ref(const int16_t *in, tran_low_t *out, int stride,
int /*tx_type*/) {
@@ -89,197 +89,9 @@
#endif // HAVE_SSE2
#endif // CONFIG_VP9_HIGHBITDEPTH
-class Trans4x4TestBase {
- public:
- virtual ~Trans4x4TestBase() {}
-
- protected:
- virtual void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) = 0;
-
- virtual void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) = 0;
-
- void RunAccuracyCheck(int limit) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- uint32_t max_error = 0;
- int64_t total_error = 0;
- const int count_test_block = 10000;
- for (int i = 0; i < count_test_block; ++i) {
- DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
- DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
- DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
- DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
-#if CONFIG_VP9_HIGHBITDEPTH
- DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
- DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
-#endif
-
- // Initialize a test block with input range [-255, 255].
- for (int j = 0; j < kNumCoeffs; ++j) {
- if (bit_depth_ == VPX_BITS_8) {
- src[j] = rnd.Rand8();
- dst[j] = rnd.Rand8();
- test_input_block[j] = src[j] - dst[j];
-#if CONFIG_VP9_HIGHBITDEPTH
- } else {
- src16[j] = rnd.Rand16() & mask_;
- dst16[j] = rnd.Rand16() & mask_;
- test_input_block[j] = src16[j] - dst16[j];
-#endif
- }
- }
-
- ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
- test_temp_block, pitch_));
- if (bit_depth_ == VPX_BITS_8) {
- ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
-#if CONFIG_VP9_HIGHBITDEPTH
- } else {
- ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block,
- CONVERT_TO_BYTEPTR(dst16), pitch_));
-#endif
- }
-
- for (int j = 0; j < kNumCoeffs; ++j) {
-#if CONFIG_VP9_HIGHBITDEPTH
- const uint32_t diff =
- bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
-#else
- ASSERT_EQ(VPX_BITS_8, bit_depth_);
- const uint32_t diff = dst[j] - src[j];
-#endif
- const uint32_t error = diff * diff;
- if (max_error < error)
- max_error = error;
- total_error += error;
- }
- }
-
- EXPECT_GE(static_cast<uint32_t>(limit), max_error)
- << "Error: 4x4 FHT/IHT has an individual round trip error > "
- << limit;
-
- EXPECT_GE(count_test_block * limit, total_error)
- << "Error: 4x4 FHT/IHT has average round trip error > " << limit
- << " per block";
- }
-
- void RunCoeffCheck() {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- const int count_test_block = 5000;
- DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
- DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
- DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
-
- for (int i = 0; i < count_test_block; ++i) {
- // Initialize a test block with input range [-mask_, mask_].
- for (int j = 0; j < kNumCoeffs; ++j)
- input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
-
- fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
- ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
-
- // The minimum quant value is 4.
- for (int j = 0; j < kNumCoeffs; ++j)
- EXPECT_EQ(output_block[j], output_ref_block[j]);
- }
- }
-
- void RunMemCheck() {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- const int count_test_block = 5000;
- DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
- DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
- DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
-
- for (int i = 0; i < count_test_block; ++i) {
- // Initialize a test block with input range [-mask_, mask_].
- for (int j = 0; j < kNumCoeffs; ++j) {
- input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
- }
- if (i == 0) {
- for (int j = 0; j < kNumCoeffs; ++j)
- input_extreme_block[j] = mask_;
- } else if (i == 1) {
- for (int j = 0; j < kNumCoeffs; ++j)
- input_extreme_block[j] = -mask_;
- }
-
- fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
- ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block,
- output_block, pitch_));
-
- // The minimum quant value is 4.
- for (int j = 0; j < kNumCoeffs; ++j) {
- EXPECT_EQ(output_block[j], output_ref_block[j]);
- EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
- << "Error: 4x4 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
- }
- }
- }
-
- void RunInvAccuracyCheck(int limit) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- const int count_test_block = 1000;
- DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
- DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
- DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
- DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
-#if CONFIG_VP9_HIGHBITDEPTH
- DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
- DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
-#endif
-
- for (int i = 0; i < count_test_block; ++i) {
- // Initialize a test block with input range [-mask_, mask_].
- for (int j = 0; j < kNumCoeffs; ++j) {
- if (bit_depth_ == VPX_BITS_8) {
- src[j] = rnd.Rand8();
- dst[j] = rnd.Rand8();
- in[j] = src[j] - dst[j];
-#if CONFIG_VP9_HIGHBITDEPTH
- } else {
- src16[j] = rnd.Rand16() & mask_;
- dst16[j] = rnd.Rand16() & mask_;
- in[j] = src16[j] - dst16[j];
-#endif
- }
- }
-
- fwd_txfm_ref(in, coeff, pitch_, tx_type_);
-
- if (bit_depth_ == VPX_BITS_8) {
- ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
-#if CONFIG_VP9_HIGHBITDEPTH
- } else {
- ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
- pitch_));
-#endif
- }
-
- for (int j = 0; j < kNumCoeffs; ++j) {
-#if CONFIG_VP9_HIGHBITDEPTH
- const uint32_t diff =
- bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
-#else
- const uint32_t diff = dst[j] - src[j];
-#endif
- const uint32_t error = diff * diff;
- EXPECT_GE(static_cast<uint32_t>(limit), error)
- << "Error: 4x4 IDCT has error " << error
- << " at index " << j;
- }
- }
- }
-
- int pitch_;
- int tx_type_;
- FhtFunc fwd_txfm_ref;
- vpx_bit_depth_t bit_depth_;
- int mask_;
-};
class Trans4x4DCT
- : public Trans4x4TestBase,
+ : public libvpx_test::TransformTestBase,
public ::testing::TestWithParam<Dct4x4Param> {
public:
virtual ~Trans4x4DCT() {}
@@ -292,6 +104,7 @@
fwd_txfm_ref = fdct4x4_ref;
bit_depth_ = GET_PARAM(3);
mask_ = (1 << bit_depth_) - 1;
+ num_coeffs_ = GET_PARAM(4);
}
virtual void TearDown() { libvpx_test::ClearSystemState(); }
@@ -324,7 +137,7 @@
}
class Trans4x4HT
- : public Trans4x4TestBase,
+ : public libvpx_test::TransformTestBase,
public ::testing::TestWithParam<Ht4x4Param> {
public:
virtual ~Trans4x4HT() {}
@@ -337,6 +150,7 @@
fwd_txfm_ref = fht4x4_ref;
bit_depth_ = GET_PARAM(3);
mask_ = (1 << bit_depth_) - 1;
+ num_coeffs_ = GET_PARAM(4);
}
virtual void TearDown() { libvpx_test::ClearSystemState(); }
@@ -370,7 +184,7 @@
}
class Trans4x4WHT
- : public Trans4x4TestBase,
+ : public libvpx_test::TransformTestBase,
public ::testing::TestWithParam<Dct4x4Param> {
public:
virtual ~Trans4x4WHT() {}
@@ -383,6 +197,7 @@
fwd_txfm_ref = fwht4x4_ref;
bit_depth_ = GET_PARAM(3);
mask_ = (1 << bit_depth_) - 1;
+ num_coeffs_ = GET_PARAM(4);
}
virtual void TearDown() { libvpx_test::ClearSystemState(); }
@@ -419,54 +234,54 @@
INSTANTIATE_TEST_CASE_P(
C, Trans4x4DCT,
::testing::Values(
- make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_10, 0, VPX_BITS_10),
- make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_12, 0, VPX_BITS_12),
- make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 0, VPX_BITS_8)));
+ make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_10, 0, VPX_BITS_10, 16),
+ make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_12, 0, VPX_BITS_12, 16),
+ make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 0, VPX_BITS_8, 16)));
#else
INSTANTIATE_TEST_CASE_P(
C, Trans4x4DCT,
::testing::Values(
- make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 0, VPX_BITS_8)));
+ make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 0, VPX_BITS_8, 16)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
C, Trans4x4HT,
::testing::Values(
- make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 0, VPX_BITS_10),
- make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 1, VPX_BITS_10),
- make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 2, VPX_BITS_10),
- make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 3, VPX_BITS_10),
- make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 0, VPX_BITS_12),
- make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 1, VPX_BITS_12),
- make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 2, VPX_BITS_12),
- make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 3, VPX_BITS_12),
- make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8),
- make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
- make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
- make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
+ make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 0, VPX_BITS_10, 16),
+ make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 1, VPX_BITS_10, 16),
+ make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 2, VPX_BITS_10, 16),
+ make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 3, VPX_BITS_10, 16),
+ make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 0, VPX_BITS_12, 16),
+ make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 1, VPX_BITS_12, 16),
+ make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 2, VPX_BITS_12, 16),
+ make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 3, VPX_BITS_12, 16),
+ make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8, 16),
+ make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8, 16),
+ make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8, 16),
+ make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8, 16)));
#else
INSTANTIATE_TEST_CASE_P(
C, Trans4x4HT,
::testing::Values(
- make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8),
- make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
- make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
- make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
+ make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8, 16),
+ make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8, 16),
+ make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8, 16),
+ make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8, 16)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
C, Trans4x4WHT,
::testing::Values(
- make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_10, 0, VPX_BITS_10),
- make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_12, 0, VPX_BITS_12),
- make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8)));
+ make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_10, 0, VPX_BITS_10, 16),
+ make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_12, 0, VPX_BITS_12, 16),
+ make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8, 16)));
#else
INSTANTIATE_TEST_CASE_P(
C, Trans4x4WHT,
::testing::Values(
- make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8)));
+ make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8, 16)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
@@ -474,17 +289,17 @@
NEON, Trans4x4DCT,
::testing::Values(
make_tuple(&vpx_fdct4x4_c,
- &vpx_idct4x4_16_add_neon, 0, VPX_BITS_8)));
+ &vpx_idct4x4_16_add_neon, 0, VPX_BITS_8, 16)));
#endif // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
NEON, Trans4x4HT,
::testing::Values(
- make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 0, VPX_BITS_8),
- make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 1, VPX_BITS_8),
- make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 2, VPX_BITS_8),
- make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3, VPX_BITS_8)));
+ make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 0, VPX_BITS_8, 16),
+ make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 1, VPX_BITS_8, 16),
+ make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 2, VPX_BITS_8, 16),
+ make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3, VPX_BITS_8, 16)));
#endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if CONFIG_USE_X86INC && HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH && \
@@ -492,7 +307,8 @@
INSTANTIATE_TEST_CASE_P(
MMX, Trans4x4WHT,
::testing::Values(
- make_tuple(&vp9_fwht4x4_mmx, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8)));
+ make_tuple(&vp9_fwht4x4_mmx, &vpx_iwht4x4_16_add_c, 0,
+ VPX_BITS_8, 16)));
#endif
#if CONFIG_USE_X86INC && HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && \
@@ -500,7 +316,8 @@
INSTANTIATE_TEST_CASE_P(
SSE2, Trans4x4WHT,
::testing::Values(
- make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_sse2, 0, VPX_BITS_8)));
+ make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_sse2, 0,
+ VPX_BITS_8, 16)));
#endif
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
@@ -508,47 +325,60 @@
SSE2, Trans4x4DCT,
::testing::Values(
make_tuple(&vpx_fdct4x4_sse2,
- &vpx_idct4x4_16_add_sse2, 0, VPX_BITS_8)));
+ &vpx_idct4x4_16_add_sse2, 0, VPX_BITS_8, 16)));
INSTANTIATE_TEST_CASE_P(
SSE2, Trans4x4HT,
::testing::Values(
- make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 0, VPX_BITS_8),
- make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 1, VPX_BITS_8),
- make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 2, VPX_BITS_8),
- make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3, VPX_BITS_8)));
+ make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 0,
+ VPX_BITS_8, 16),
+ make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 1,
+ VPX_BITS_8, 16),
+ make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 2,
+ VPX_BITS_8, 16),
+ make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3,
+ VPX_BITS_8, 16)));
#endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
SSE2, Trans4x4DCT,
::testing::Values(
- make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_10_sse2, 0, VPX_BITS_10),
- make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_10_sse2, 0, VPX_BITS_10),
- make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_12_sse2, 0, VPX_BITS_12),
- make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_12_sse2, 0, VPX_BITS_12),
+ make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_10_sse2, 0,
+ VPX_BITS_10, 16),
+ make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_10_sse2, 0,
+ VPX_BITS_10, 16),
+ make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_12_sse2, 0,
+ VPX_BITS_12, 16),
+ make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_12_sse2, 0,
+ VPX_BITS_12, 16),
make_tuple(&vpx_fdct4x4_sse2, &vpx_idct4x4_16_add_c, 0,
- VPX_BITS_8)));
+ VPX_BITS_8, 16)));
INSTANTIATE_TEST_CASE_P(
SSE2, Trans4x4HT,
::testing::Values(
- make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8),
- make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
- make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
- make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
+ make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8, 16),
+ make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8, 16),
+ make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8, 16),
+ make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8, 16)));
#endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
MSA, Trans4x4DCT,
::testing::Values(
- make_tuple(&vpx_fdct4x4_msa, &vpx_idct4x4_16_add_msa, 0, VPX_BITS_8)));
+ make_tuple(&vpx_fdct4x4_msa, &vpx_idct4x4_16_add_msa, 0,
+ VPX_BITS_8, 16)));
INSTANTIATE_TEST_CASE_P(
MSA, Trans4x4HT,
::testing::Values(
- make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 0, VPX_BITS_8),
- make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 1, VPX_BITS_8),
- make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 2, VPX_BITS_8),
- make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 3, VPX_BITS_8)));
+ make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 0,
+ VPX_BITS_8, 16),
+ make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 1,
+ VPX_BITS_8, 16),
+ make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 2,
+ VPX_BITS_8, 16),
+ make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 3,
+ VPX_BITS_8, 16)));
#endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
} // namespace
diff --git a/test/masked_sad_test.cc b/test/masked_sad_test.cc
index c09104c..34223ea 100644
--- a/test/masked_sad_test.cc
+++ b/test/masked_sad_test.cc
@@ -50,16 +50,16 @@
TEST_P(MaskedSADTest, OperationCheck) {
unsigned int ref_ret, ret;
ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint8_t, src_ptr[4096]);
- DECLARE_ALIGNED(16, uint8_t, ref_ptr[4096]);
- DECLARE_ALIGNED(16, uint8_t, msk_ptr[4096]);
+ DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
+ DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
+ DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
int err_count = 0;
int first_failure = -1;
- int src_stride = 64;
- int ref_stride = 64;
- int msk_stride = 64;
+ int src_stride = MAX_CU_SIZE;
+ int ref_stride = MAX_CU_SIZE;
+ int msk_stride = MAX_CU_SIZE;
for (int i = 0; i < number_of_iterations; ++i) {
- for (int j = 0; j < 4096; j++) {
+ for (int j = 0; j < MAX_CU_SIZE*MAX_CU_SIZE; j++) {
src_ptr[j] = rnd.Rand8();
ref_ptr[j] = rnd.Rand8();
msk_ptr[j] = ((rnd.Rand8()&0x7f) > 64) ? rnd.Rand8()&0x3f : 64;
@@ -108,18 +108,18 @@
TEST_P(HighbdMaskedSADTest, OperationCheck) {
unsigned int ref_ret, ret;
ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint16_t, src_ptr[4096]);
- DECLARE_ALIGNED(16, uint16_t, ref_ptr[4096]);
- DECLARE_ALIGNED(16, uint8_t, msk_ptr[4096]);
+ DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
+ DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
+ DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
int err_count = 0;
int first_failure = -1;
- int src_stride = 64;
- int ref_stride = 64;
- int msk_stride = 64;
+ int src_stride = MAX_CU_SIZE;
+ int ref_stride = MAX_CU_SIZE;
+ int msk_stride = MAX_CU_SIZE;
for (int i = 0; i < number_of_iterations; ++i) {
- for (int j = 0; j < 4096; j++) {
+ for (int j = 0; j < MAX_CU_SIZE*MAX_CU_SIZE; j++) {
src_ptr[j] = rnd.Rand16()&0xfff;
ref_ptr[j] = rnd.Rand16()&0xfff;
msk_ptr[j] = ((rnd.Rand8()&0x7f) > 64) ? rnd.Rand8()&0x3f : 64;
@@ -148,6 +148,14 @@
INSTANTIATE_TEST_CASE_P(
SSSE3_C_COMPARE, MaskedSADTest,
::testing::Values(
+#if CONFIG_EXT_PARTITION
+ make_tuple(&vpx_masked_sad128x128_ssse3,
+ &vpx_masked_sad128x128_c),
+ make_tuple(&vpx_masked_sad128x64_ssse3,
+ &vpx_masked_sad128x64_c),
+ make_tuple(&vpx_masked_sad64x128_ssse3,
+ &vpx_masked_sad64x128_c),
+#endif // CONFIG_EXT_PARTITION
make_tuple(&vpx_masked_sad64x64_ssse3,
&vpx_masked_sad64x64_c),
make_tuple(&vpx_masked_sad64x32_ssse3,
@@ -178,32 +186,40 @@
INSTANTIATE_TEST_CASE_P(
SSSE3_C_COMPARE, HighbdMaskedSADTest,
::testing::Values(
- make_tuple(&vp9_highbd_masked_sad64x64_ssse3,
- &vp9_highbd_masked_sad64x64_c),
- make_tuple(&vp9_highbd_masked_sad64x32_ssse3,
- &vp9_highbd_masked_sad64x32_c),
- make_tuple(&vp9_highbd_masked_sad32x64_ssse3,
- &vp9_highbd_masked_sad32x64_c),
- make_tuple(&vp9_highbd_masked_sad32x32_ssse3,
- &vp9_highbd_masked_sad32x32_c),
- make_tuple(&vp9_highbd_masked_sad32x16_ssse3,
- &vp9_highbd_masked_sad32x16_c),
- make_tuple(&vp9_highbd_masked_sad16x32_ssse3,
- &vp9_highbd_masked_sad16x32_c),
- make_tuple(&vp9_highbd_masked_sad16x16_ssse3,
- &vp9_highbd_masked_sad16x16_c),
- make_tuple(&vp9_highbd_masked_sad16x8_ssse3,
- &vp9_highbd_masked_sad16x8_c),
- make_tuple(&vp9_highbd_masked_sad8x16_ssse3,
- &vp9_highbd_masked_sad8x16_c),
- make_tuple(&vp9_highbd_masked_sad8x8_ssse3,
- &vp9_highbd_masked_sad8x8_c),
- make_tuple(&vp9_highbd_masked_sad8x4_ssse3,
- &vp9_highbd_masked_sad8x4_c),
- make_tuple(&vp9_highbd_masked_sad4x8_ssse3,
- &vp9_highbd_masked_sad4x8_c),
- make_tuple(&vp9_highbd_masked_sad4x4_ssse3,
- &vp9_highbd_masked_sad4x4_c)));
+#if CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_masked_sad128x128_ssse3,
+ &vpx_highbd_masked_sad128x128_c),
+ make_tuple(&vpx_highbd_masked_sad128x64_ssse3,
+ &vpx_highbd_masked_sad128x64_c),
+ make_tuple(&vpx_highbd_masked_sad64x128_ssse3,
+ &vpx_highbd_masked_sad64x128_c),
+#endif // CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_masked_sad64x64_ssse3,
+ &vpx_highbd_masked_sad64x64_c),
+ make_tuple(&vpx_highbd_masked_sad64x32_ssse3,
+ &vpx_highbd_masked_sad64x32_c),
+ make_tuple(&vpx_highbd_masked_sad32x64_ssse3,
+ &vpx_highbd_masked_sad32x64_c),
+ make_tuple(&vpx_highbd_masked_sad32x32_ssse3,
+ &vpx_highbd_masked_sad32x32_c),
+ make_tuple(&vpx_highbd_masked_sad32x16_ssse3,
+ &vpx_highbd_masked_sad32x16_c),
+ make_tuple(&vpx_highbd_masked_sad16x32_ssse3,
+ &vpx_highbd_masked_sad16x32_c),
+ make_tuple(&vpx_highbd_masked_sad16x16_ssse3,
+ &vpx_highbd_masked_sad16x16_c),
+ make_tuple(&vpx_highbd_masked_sad16x8_ssse3,
+ &vpx_highbd_masked_sad16x8_c),
+ make_tuple(&vpx_highbd_masked_sad8x16_ssse3,
+ &vpx_highbd_masked_sad8x16_c),
+ make_tuple(&vpx_highbd_masked_sad8x8_ssse3,
+ &vpx_highbd_masked_sad8x8_c),
+ make_tuple(&vpx_highbd_masked_sad8x4_ssse3,
+ &vpx_highbd_masked_sad8x4_c),
+ make_tuple(&vpx_highbd_masked_sad4x8_ssse3,
+ &vpx_highbd_masked_sad4x8_c),
+ make_tuple(&vpx_highbd_masked_sad4x4_ssse3,
+ &vpx_highbd_masked_sad4x4_c)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_SSSE3
} // namespace
diff --git a/test/masked_variance_test.cc b/test/masked_variance_test.cc
index fc37759..1f8bf1e2 100644
--- a/test/masked_variance_test.cc
+++ b/test/masked_variance_test.cc
@@ -20,10 +20,10 @@
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_codec.h"
#include "vpx/vpx_integer.h"
#include "vpx_dsp/vpx_filter.h"
-
-#define MAX_SIZE 64
+#include "vpx_mem/vpx_mem.h"
using libvpx_test::ACMRandom;
@@ -58,17 +58,17 @@
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SIZE*MAX_SIZE]);
- DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SIZE*MAX_SIZE]);
- DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SIZE*MAX_SIZE]);
+ DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
+ DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
+ DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
int err_count = 0;
int first_failure = -1;
- int src_stride = MAX_SIZE;
- int ref_stride = MAX_SIZE;
- int msk_stride = MAX_SIZE;
+ int src_stride = MAX_CU_SIZE;
+ int ref_stride = MAX_CU_SIZE;
+ int msk_stride = MAX_CU_SIZE;
for (int i = 0; i < number_of_iterations; ++i) {
- for (int j = 0; j < MAX_SIZE*MAX_SIZE; j++) {
+ for (int j = 0; j < MAX_CU_SIZE*MAX_CU_SIZE; j++) {
src_ptr[j] = rnd.Rand8();
ref_ptr[j] = rnd.Rand8();
msk_ptr[j] = rnd(65);
@@ -100,19 +100,19 @@
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SIZE*MAX_SIZE]);
- DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SIZE*MAX_SIZE]);
- DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SIZE*MAX_SIZE]);
+ DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
+ DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
+ DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
int err_count = 0;
int first_failure = -1;
- int src_stride = MAX_SIZE;
- int ref_stride = MAX_SIZE;
- int msk_stride = MAX_SIZE;
+ int src_stride = MAX_CU_SIZE;
+ int ref_stride = MAX_CU_SIZE;
+ int msk_stride = MAX_CU_SIZE;
for (int i = 0; i < 8; ++i) {
- memset(src_ptr, (i & 0x1) ? 255 : 0, MAX_SIZE*MAX_SIZE);
- memset(ref_ptr, (i & 0x2) ? 255 : 0, MAX_SIZE*MAX_SIZE);
- memset(msk_ptr, (i & 0x4) ? 64 : 0, MAX_SIZE*MAX_SIZE);
+ memset(src_ptr, (i & 0x1) ? 255 : 0, MAX_CU_SIZE*MAX_CU_SIZE);
+ memset(ref_ptr, (i & 0x2) ? 255 : 0, MAX_CU_SIZE*MAX_CU_SIZE);
+ memset(msk_ptr, (i & 0x4) ? 64 : 0, MAX_CU_SIZE*MAX_CU_SIZE);
ref_ret = ref_func_(src_ptr, src_stride,
ref_ptr, ref_stride,
@@ -166,21 +166,21 @@
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_SIZE+1)*(MAX_SIZE+1)]);
- DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_SIZE+1)*(MAX_SIZE+1)]);
- DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SIZE+1)*(MAX_SIZE+1)]);
+ DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
+ DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
+ DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
int err_count = 0;
int first_failure = -1;
- int src_stride = (MAX_SIZE+1);
- int ref_stride = (MAX_SIZE+1);
- int msk_stride = (MAX_SIZE+1);
+ int src_stride = (MAX_CU_SIZE+1);
+ int ref_stride = (MAX_CU_SIZE+1);
+ int msk_stride = (MAX_CU_SIZE+1);
int xoffset;
int yoffset;
for (int i = 0; i < number_of_iterations; ++i) {
int xoffsets[] = {0, 4, rnd(BIL_SUBPEL_SHIFTS)};
int yoffsets[] = {0, 4, rnd(BIL_SUBPEL_SHIFTS)};
- for (int j = 0; j < (MAX_SIZE+1)*(MAX_SIZE+1); j++) {
+ for (int j = 0; j < (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1); j++) {
src_ptr[j] = rnd.Rand8();
ref_ptr[j] = rnd.Rand8();
msk_ptr[j] = rnd(65);
@@ -221,23 +221,23 @@
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_SIZE+1)*(MAX_SIZE+1)]);
- DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_SIZE+1)*(MAX_SIZE+1)]);
- DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SIZE+1)*(MAX_SIZE+1)]);
+ DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
+ DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
+ DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
int first_failure_x = -1;
int first_failure_y = -1;
int err_count = 0;
int first_failure = -1;
- int src_stride = (MAX_SIZE+1);
- int ref_stride = (MAX_SIZE+1);
- int msk_stride = (MAX_SIZE+1);
+ int src_stride = (MAX_CU_SIZE+1);
+ int ref_stride = (MAX_CU_SIZE+1);
+ int msk_stride = (MAX_CU_SIZE+1);
for (int xoffset = 0 ; xoffset < BIL_SUBPEL_SHIFTS ; xoffset++) {
for (int yoffset = 0 ; yoffset < BIL_SUBPEL_SHIFTS ; yoffset++) {
for (int i = 0; i < 8; ++i) {
- memset(src_ptr, (i & 0x1) ? 255 : 0, (MAX_SIZE+1)*(MAX_SIZE+1));
- memset(ref_ptr, (i & 0x2) ? 255 : 0, (MAX_SIZE+1)*(MAX_SIZE+1));
- memset(msk_ptr, (i & 0x4) ? 64 : 0, (MAX_SIZE+1)*(MAX_SIZE+1));
+ memset(src_ptr, (i & 0x1) ? 255 : 0, (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1));
+ memset(ref_ptr, (i & 0x2) ? 255 : 0, (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1));
+ memset(msk_ptr, (i & 0x4) ? 64 : 0, (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1));
ref_ret = ref_func_(src_ptr, src_stride,
xoffset, yoffset,
@@ -297,19 +297,19 @@
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SIZE*MAX_SIZE]);
- DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SIZE*MAX_SIZE]);
- DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SIZE*MAX_SIZE]);
+ DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
+ DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
+ DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
int err_count = 0;
int first_failure = -1;
- int src_stride = MAX_SIZE;
- int ref_stride = MAX_SIZE;
- int msk_stride = MAX_SIZE;
+ int src_stride = MAX_CU_SIZE;
+ int ref_stride = MAX_CU_SIZE;
+ int msk_stride = MAX_CU_SIZE;
for (int i = 0; i < number_of_iterations; ++i) {
- for (int j = 0; j < MAX_SIZE*MAX_SIZE; j++) {
+ for (int j = 0; j < MAX_CU_SIZE*MAX_CU_SIZE; j++) {
src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
msk_ptr[j] = rnd(65);
@@ -341,23 +341,23 @@
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SIZE*MAX_SIZE]);
- DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SIZE*MAX_SIZE]);
- DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SIZE*MAX_SIZE]);
+ DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
+ DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
+ DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
int err_count = 0;
int first_failure = -1;
- int src_stride = MAX_SIZE;
- int ref_stride = MAX_SIZE;
- int msk_stride = MAX_SIZE;
+ int src_stride = MAX_CU_SIZE;
+ int ref_stride = MAX_CU_SIZE;
+ int msk_stride = MAX_CU_SIZE;
for (int i = 0; i < 8; ++i) {
vpx_memset16(src_ptr, (i & 0x1) ? ((1 << bit_depth_) - 1) : 0,
- MAX_SIZE*MAX_SIZE);
+ MAX_CU_SIZE*MAX_CU_SIZE);
vpx_memset16(ref_ptr, (i & 0x2) ? ((1 << bit_depth_) - 1) : 0,
- MAX_SIZE*MAX_SIZE);
- memset(msk_ptr, (i & 0x4) ? 64 : 0, MAX_SIZE*MAX_SIZE);
+ MAX_CU_SIZE*MAX_CU_SIZE);
+ memset(msk_ptr, (i & 0x4) ? 64 : 0, MAX_CU_SIZE*MAX_CU_SIZE);
ref_ret = ref_func_(src8_ptr, src_stride,
ref8_ptr, ref_stride,
@@ -407,24 +407,24 @@
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_SIZE+1)*(MAX_SIZE+1)]);
- DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_SIZE+1)*(MAX_SIZE+1)]);
- DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SIZE+1)*(MAX_SIZE+1)]);
+ DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
+ DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
+ DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
int err_count = 0;
int first_failure = -1;
int first_failure_x = -1;
int first_failure_y = -1;
- int src_stride = (MAX_SIZE+1);
- int ref_stride = (MAX_SIZE+1);
- int msk_stride = (MAX_SIZE+1);
+ int src_stride = (MAX_CU_SIZE+1);
+ int ref_stride = (MAX_CU_SIZE+1);
+ int msk_stride = (MAX_CU_SIZE+1);
int xoffset, yoffset;
for (int i = 0; i < number_of_iterations; ++i) {
for (xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) {
for (yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) {
- for (int j = 0; j < (MAX_SIZE+1)*(MAX_SIZE+1); j++) {
+ for (int j = 0; j < (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1); j++) {
src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
msk_ptr[j] = rnd(65);
@@ -465,27 +465,27 @@
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_SIZE+1)*(MAX_SIZE+1)]);
- DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_SIZE+1)*(MAX_SIZE+1)]);
- DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SIZE+1)*(MAX_SIZE+1)]);
+ DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
+ DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
+ DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
int first_failure_x = -1;
int first_failure_y = -1;
int err_count = 0;
int first_failure = -1;
- int src_stride = (MAX_SIZE+1);
- int ref_stride = (MAX_SIZE+1);
- int msk_stride = (MAX_SIZE+1);
+ int src_stride = (MAX_CU_SIZE+1);
+ int ref_stride = (MAX_CU_SIZE+1);
+ int msk_stride = (MAX_CU_SIZE+1);
for (int xoffset = 0 ; xoffset < BIL_SUBPEL_SHIFTS ; xoffset++) {
for (int yoffset = 0 ; yoffset < BIL_SUBPEL_SHIFTS ; yoffset++) {
for (int i = 0; i < 8; ++i) {
vpx_memset16(src_ptr, (i & 0x1) ? ((1 << bit_depth_) - 1) : 0,
- (MAX_SIZE+1)*(MAX_SIZE+1));
+ (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1));
vpx_memset16(ref_ptr, (i & 0x2) ? ((1 << bit_depth_) - 1) : 0,
- (MAX_SIZE+1)*(MAX_SIZE+1));
- memset(msk_ptr, (i & 0x4) ? 64 : 0, (MAX_SIZE+1)*(MAX_SIZE+1));
+ (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1));
+ memset(msk_ptr, (i & 0x4) ? 64 : 0, (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1));
ref_ret = ref_func_(src8_ptr, src_stride,
xoffset, yoffset,
@@ -525,6 +525,14 @@
INSTANTIATE_TEST_CASE_P(
SSSE3_C_COMPARE, MaskedVarianceTest,
::testing::Values(
+#if CONFIG_EXT_PARTITION
+ make_tuple(&vpx_masked_variance128x128_ssse3,
+ &vpx_masked_variance128x128_c),
+ make_tuple(&vpx_masked_variance128x64_ssse3,
+ &vpx_masked_variance128x64_c),
+ make_tuple(&vpx_masked_variance64x128_ssse3,
+ &vpx_masked_variance64x128_c),
+#endif // CONFIG_EXT_PARTITION
make_tuple(&vpx_masked_variance64x64_ssse3,
&vpx_masked_variance64x64_c),
make_tuple(&vpx_masked_variance64x32_ssse3,
@@ -555,197 +563,253 @@
INSTANTIATE_TEST_CASE_P(
SSSE3_C_COMPARE, MaskedSubPixelVarianceTest,
::testing::Values(
+#if CONFIG_EXT_PARTITION
+ make_tuple(&vpx_masked_sub_pixel_variance128x128_ssse3,
+ &vpx_masked_sub_pixel_variance128x128_c),
+ make_tuple(&vpx_masked_sub_pixel_variance128x64_ssse3,
+ &vpx_masked_sub_pixel_variance128x64_c),
+ make_tuple(&vpx_masked_sub_pixel_variance64x128_ssse3,
+ &vpx_masked_sub_pixel_variance64x128_c),
+#endif // CONFIG_EXT_PARTITION
make_tuple(&vpx_masked_sub_pixel_variance64x64_ssse3,
- &vpx_masked_sub_pixel_variance64x64_c),
+ &vpx_masked_sub_pixel_variance64x64_c),
make_tuple(&vpx_masked_sub_pixel_variance64x32_ssse3,
- &vpx_masked_sub_pixel_variance64x32_c),
+ &vpx_masked_sub_pixel_variance64x32_c),
make_tuple(&vpx_masked_sub_pixel_variance32x64_ssse3,
- &vpx_masked_sub_pixel_variance32x64_c),
+ &vpx_masked_sub_pixel_variance32x64_c),
make_tuple(&vpx_masked_sub_pixel_variance32x32_ssse3,
- &vpx_masked_sub_pixel_variance32x32_c),
+ &vpx_masked_sub_pixel_variance32x32_c),
make_tuple(&vpx_masked_sub_pixel_variance32x16_ssse3,
- &vpx_masked_sub_pixel_variance32x16_c),
+ &vpx_masked_sub_pixel_variance32x16_c),
make_tuple(&vpx_masked_sub_pixel_variance16x32_ssse3,
- &vpx_masked_sub_pixel_variance16x32_c),
+ &vpx_masked_sub_pixel_variance16x32_c),
make_tuple(&vpx_masked_sub_pixel_variance16x16_ssse3,
- &vpx_masked_sub_pixel_variance16x16_c),
+ &vpx_masked_sub_pixel_variance16x16_c),
make_tuple(&vpx_masked_sub_pixel_variance16x8_ssse3,
- &vpx_masked_sub_pixel_variance16x8_c),
+ &vpx_masked_sub_pixel_variance16x8_c),
make_tuple(&vpx_masked_sub_pixel_variance8x16_ssse3,
- &vpx_masked_sub_pixel_variance8x16_c),
+ &vpx_masked_sub_pixel_variance8x16_c),
make_tuple(&vpx_masked_sub_pixel_variance8x8_ssse3,
- &vpx_masked_sub_pixel_variance8x8_c),
+ &vpx_masked_sub_pixel_variance8x8_c),
make_tuple(&vpx_masked_sub_pixel_variance8x4_ssse3,
- &vpx_masked_sub_pixel_variance8x4_c),
+ &vpx_masked_sub_pixel_variance8x4_c),
make_tuple(&vpx_masked_sub_pixel_variance4x8_ssse3,
- &vpx_masked_sub_pixel_variance4x8_c),
+ &vpx_masked_sub_pixel_variance4x8_c),
make_tuple(&vpx_masked_sub_pixel_variance4x4_ssse3,
- &vpx_masked_sub_pixel_variance4x4_c)));
+ &vpx_masked_sub_pixel_variance4x4_c)));
#if CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
SSSE3_C_COMPARE, HighbdMaskedVarianceTest,
::testing::Values(
- make_tuple(&vp9_highbd_masked_variance64x64_ssse3,
- &vp9_highbd_masked_variance64x64_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_variance64x32_ssse3,
- &vp9_highbd_masked_variance64x32_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_variance32x64_ssse3,
- &vp9_highbd_masked_variance32x64_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_variance32x32_ssse3,
- &vp9_highbd_masked_variance32x32_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_variance32x16_ssse3,
- &vp9_highbd_masked_variance32x16_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_variance16x32_ssse3,
- &vp9_highbd_masked_variance16x32_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_variance16x16_ssse3,
- &vp9_highbd_masked_variance16x16_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_variance16x8_ssse3,
- &vp9_highbd_masked_variance16x8_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_variance8x16_ssse3,
- &vp9_highbd_masked_variance8x16_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_variance8x8_ssse3,
- &vp9_highbd_masked_variance8x8_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_variance8x4_ssse3,
- &vp9_highbd_masked_variance8x4_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_variance4x8_ssse3,
- &vp9_highbd_masked_variance4x8_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_variance4x4_ssse3,
- &vp9_highbd_masked_variance4x4_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_10_masked_variance64x64_ssse3,
- &vp9_highbd_10_masked_variance64x64_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_variance64x32_ssse3,
- &vp9_highbd_10_masked_variance64x32_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_variance32x64_ssse3,
- &vp9_highbd_10_masked_variance32x64_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_variance32x32_ssse3,
- &vp9_highbd_10_masked_variance32x32_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_variance32x16_ssse3,
- &vp9_highbd_10_masked_variance32x16_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_variance16x32_ssse3,
- &vp9_highbd_10_masked_variance16x32_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_variance16x16_ssse3,
- &vp9_highbd_10_masked_variance16x16_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_variance16x8_ssse3,
- &vp9_highbd_10_masked_variance16x8_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_variance8x16_ssse3,
- &vp9_highbd_10_masked_variance8x16_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_variance8x8_ssse3,
- &vp9_highbd_10_masked_variance8x8_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_variance8x4_ssse3,
- &vp9_highbd_10_masked_variance8x4_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_variance4x8_ssse3,
- &vp9_highbd_10_masked_variance4x8_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_variance4x4_ssse3,
- &vp9_highbd_10_masked_variance4x4_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_12_masked_variance64x64_ssse3,
- &vp9_highbd_12_masked_variance64x64_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_variance64x32_ssse3,
- &vp9_highbd_12_masked_variance64x32_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_variance32x64_ssse3,
- &vp9_highbd_12_masked_variance32x64_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_variance32x32_ssse3,
- &vp9_highbd_12_masked_variance32x32_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_variance32x16_ssse3,
- &vp9_highbd_12_masked_variance32x16_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_variance16x32_ssse3,
- &vp9_highbd_12_masked_variance16x32_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_variance16x16_ssse3,
- &vp9_highbd_12_masked_variance16x16_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_variance16x8_ssse3,
- &vp9_highbd_12_masked_variance16x8_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_variance8x16_ssse3,
- &vp9_highbd_12_masked_variance8x16_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_variance8x8_ssse3,
- &vp9_highbd_12_masked_variance8x8_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_variance8x4_ssse3,
- &vp9_highbd_12_masked_variance8x4_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_variance4x8_ssse3,
- &vp9_highbd_12_masked_variance4x8_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_variance4x4_ssse3,
- &vp9_highbd_12_masked_variance4x4_c, VPX_BITS_12)));
+#if CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_masked_variance128x128_ssse3,
+ &vpx_highbd_masked_variance128x128_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance128x64_ssse3,
+ &vpx_highbd_masked_variance128x64_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance64x128_ssse3,
+ &vpx_highbd_masked_variance64x128_c, VPX_BITS_8),
+#endif // CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_masked_variance64x64_ssse3,
+ &vpx_highbd_masked_variance64x64_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance64x32_ssse3,
+ &vpx_highbd_masked_variance64x32_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance32x64_ssse3,
+ &vpx_highbd_masked_variance32x64_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance32x32_ssse3,
+ &vpx_highbd_masked_variance32x32_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance32x16_ssse3,
+ &vpx_highbd_masked_variance32x16_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance16x32_ssse3,
+ &vpx_highbd_masked_variance16x32_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance16x16_ssse3,
+ &vpx_highbd_masked_variance16x16_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance16x8_ssse3,
+ &vpx_highbd_masked_variance16x8_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance8x16_ssse3,
+ &vpx_highbd_masked_variance8x16_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance8x8_ssse3,
+ &vpx_highbd_masked_variance8x8_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance8x4_ssse3,
+ &vpx_highbd_masked_variance8x4_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance4x8_ssse3,
+ &vpx_highbd_masked_variance4x8_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance4x4_ssse3,
+ &vpx_highbd_masked_variance4x4_c, VPX_BITS_8),
+#if CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_10_masked_variance128x128_ssse3,
+ &vpx_highbd_10_masked_variance128x128_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance128x64_ssse3,
+ &vpx_highbd_10_masked_variance128x64_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance64x128_ssse3,
+ &vpx_highbd_10_masked_variance64x128_c, VPX_BITS_10),
+#endif // CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_10_masked_variance64x64_ssse3,
+ &vpx_highbd_10_masked_variance64x64_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance64x32_ssse3,
+ &vpx_highbd_10_masked_variance64x32_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance32x64_ssse3,
+ &vpx_highbd_10_masked_variance32x64_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance32x32_ssse3,
+ &vpx_highbd_10_masked_variance32x32_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance32x16_ssse3,
+ &vpx_highbd_10_masked_variance32x16_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance16x32_ssse3,
+ &vpx_highbd_10_masked_variance16x32_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance16x16_ssse3,
+ &vpx_highbd_10_masked_variance16x16_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance16x8_ssse3,
+ &vpx_highbd_10_masked_variance16x8_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance8x16_ssse3,
+ &vpx_highbd_10_masked_variance8x16_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance8x8_ssse3,
+ &vpx_highbd_10_masked_variance8x8_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance8x4_ssse3,
+ &vpx_highbd_10_masked_variance8x4_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance4x8_ssse3,
+ &vpx_highbd_10_masked_variance4x8_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance4x4_ssse3,
+ &vpx_highbd_10_masked_variance4x4_c, VPX_BITS_10),
+#if CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_12_masked_variance128x128_ssse3,
+ &vpx_highbd_12_masked_variance128x128_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance128x64_ssse3,
+ &vpx_highbd_12_masked_variance128x64_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance64x128_ssse3,
+ &vpx_highbd_12_masked_variance64x128_c, VPX_BITS_12),
+#endif // CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_12_masked_variance64x64_ssse3,
+ &vpx_highbd_12_masked_variance64x64_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance64x32_ssse3,
+ &vpx_highbd_12_masked_variance64x32_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance32x64_ssse3,
+ &vpx_highbd_12_masked_variance32x64_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance32x32_ssse3,
+ &vpx_highbd_12_masked_variance32x32_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance32x16_ssse3,
+ &vpx_highbd_12_masked_variance32x16_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance16x32_ssse3,
+ &vpx_highbd_12_masked_variance16x32_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance16x16_ssse3,
+ &vpx_highbd_12_masked_variance16x16_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance16x8_ssse3,
+ &vpx_highbd_12_masked_variance16x8_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance8x16_ssse3,
+ &vpx_highbd_12_masked_variance8x16_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance8x8_ssse3,
+ &vpx_highbd_12_masked_variance8x8_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance8x4_ssse3,
+ &vpx_highbd_12_masked_variance8x4_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance4x8_ssse3,
+ &vpx_highbd_12_masked_variance4x8_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance4x4_ssse3,
+ &vpx_highbd_12_masked_variance4x4_c, VPX_BITS_12)));
INSTANTIATE_TEST_CASE_P(
SSSE3_C_COMPARE, HighbdMaskedSubPixelVarianceTest,
::testing::Values(
- make_tuple(&vp9_highbd_masked_sub_pixel_variance64x64_ssse3,
- &vp9_highbd_masked_sub_pixel_variance64x64_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_sub_pixel_variance64x32_ssse3,
- &vp9_highbd_masked_sub_pixel_variance64x32_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_sub_pixel_variance32x64_ssse3,
- &vp9_highbd_masked_sub_pixel_variance32x64_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_sub_pixel_variance32x32_ssse3,
- &vp9_highbd_masked_sub_pixel_variance32x32_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_sub_pixel_variance32x16_ssse3,
- &vp9_highbd_masked_sub_pixel_variance32x16_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_sub_pixel_variance16x32_ssse3,
- &vp9_highbd_masked_sub_pixel_variance16x32_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_sub_pixel_variance16x16_ssse3,
- &vp9_highbd_masked_sub_pixel_variance16x16_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_sub_pixel_variance16x8_ssse3,
- &vp9_highbd_masked_sub_pixel_variance16x8_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_sub_pixel_variance8x16_ssse3,
- &vp9_highbd_masked_sub_pixel_variance8x16_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_sub_pixel_variance8x8_ssse3,
- &vp9_highbd_masked_sub_pixel_variance8x8_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_sub_pixel_variance8x4_ssse3,
- &vp9_highbd_masked_sub_pixel_variance8x4_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_sub_pixel_variance4x8_ssse3,
- &vp9_highbd_masked_sub_pixel_variance4x8_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_sub_pixel_variance4x4_ssse3,
- &vp9_highbd_masked_sub_pixel_variance4x4_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_10_masked_sub_pixel_variance64x64_ssse3,
- &vp9_highbd_10_masked_sub_pixel_variance64x64_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_sub_pixel_variance64x32_ssse3,
- &vp9_highbd_10_masked_sub_pixel_variance64x32_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_sub_pixel_variance32x64_ssse3,
- &vp9_highbd_10_masked_sub_pixel_variance32x64_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_sub_pixel_variance32x32_ssse3,
- &vp9_highbd_10_masked_sub_pixel_variance32x32_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_sub_pixel_variance32x16_ssse3,
- &vp9_highbd_10_masked_sub_pixel_variance32x16_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_sub_pixel_variance16x32_ssse3,
- &vp9_highbd_10_masked_sub_pixel_variance16x32_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_sub_pixel_variance16x16_ssse3,
- &vp9_highbd_10_masked_sub_pixel_variance16x16_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_sub_pixel_variance16x8_ssse3,
- &vp9_highbd_10_masked_sub_pixel_variance16x8_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_sub_pixel_variance8x16_ssse3,
- &vp9_highbd_10_masked_sub_pixel_variance8x16_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_sub_pixel_variance8x8_ssse3,
- &vp9_highbd_10_masked_sub_pixel_variance8x8_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_sub_pixel_variance8x4_ssse3,
- &vp9_highbd_10_masked_sub_pixel_variance8x4_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_sub_pixel_variance4x8_ssse3,
- &vp9_highbd_10_masked_sub_pixel_variance4x8_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_sub_pixel_variance4x4_ssse3,
- &vp9_highbd_10_masked_sub_pixel_variance4x4_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_12_masked_sub_pixel_variance64x64_ssse3,
- &vp9_highbd_12_masked_sub_pixel_variance64x64_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_sub_pixel_variance64x32_ssse3,
- &vp9_highbd_12_masked_sub_pixel_variance64x32_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_sub_pixel_variance32x64_ssse3,
- &vp9_highbd_12_masked_sub_pixel_variance32x64_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_sub_pixel_variance32x32_ssse3,
- &vp9_highbd_12_masked_sub_pixel_variance32x32_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_sub_pixel_variance32x16_ssse3,
- &vp9_highbd_12_masked_sub_pixel_variance32x16_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_sub_pixel_variance16x32_ssse3,
- &vp9_highbd_12_masked_sub_pixel_variance16x32_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_sub_pixel_variance16x16_ssse3,
- &vp9_highbd_12_masked_sub_pixel_variance16x16_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_sub_pixel_variance16x8_ssse3,
- &vp9_highbd_12_masked_sub_pixel_variance16x8_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_sub_pixel_variance8x16_ssse3,
- &vp9_highbd_12_masked_sub_pixel_variance8x16_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_sub_pixel_variance8x8_ssse3,
- &vp9_highbd_12_masked_sub_pixel_variance8x8_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_sub_pixel_variance8x4_ssse3,
- &vp9_highbd_12_masked_sub_pixel_variance8x4_c, VPX_BITS_12) ,
- make_tuple(&vp9_highbd_12_masked_sub_pixel_variance4x8_ssse3,
- &vp9_highbd_12_masked_sub_pixel_variance4x8_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_sub_pixel_variance4x4_ssse3,
- &vp9_highbd_12_masked_sub_pixel_variance4x4_c, VPX_BITS_12)));
+#if CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance128x128_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance128x128_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance128x64_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance128x64_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance64x128_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance64x128_c, VPX_BITS_8),
+#endif // CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance64x64_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance64x64_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance64x32_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance64x32_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance32x64_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance32x64_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance32x32_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance32x32_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance32x16_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance32x16_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance16x32_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance16x32_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance16x16_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance16x16_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance16x8_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance16x8_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance8x16_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance8x16_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance8x8_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance8x8_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance8x4_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance8x4_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance4x8_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance4x8_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance4x4_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance4x4_c, VPX_BITS_8),
+#if CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance128x128_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance128x128_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance128x64_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance128x64_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance64x128_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance64x128_c, VPX_BITS_10),
+#endif // CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance64x64_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance64x64_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance64x32_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance64x32_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance32x64_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance32x64_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance32x32_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance32x32_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance32x16_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance32x16_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance16x32_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance16x32_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance16x16_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance16x16_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance16x8_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance16x8_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance8x16_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance8x16_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance8x8_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance8x8_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance8x4_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance8x4_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance4x8_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance4x8_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance4x4_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance4x4_c, VPX_BITS_10),
+#if CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance128x128_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance128x128_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance128x64_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance128x64_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance64x128_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance64x128_c, VPX_BITS_12),
+#endif // CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance64x64_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance64x64_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance64x32_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance64x32_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance32x64_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance32x64_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance32x32_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance32x32_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance32x16_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance32x16_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance16x32_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance16x32_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance16x16_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance16x16_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance16x8_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance16x8_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance8x16_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance8x16_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance8x8_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance8x8_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance8x4_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance8x4_c, VPX_BITS_12) ,
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance4x8_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance4x8_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance4x4_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance4x4_c, VPX_BITS_12)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_SSSE3
diff --git a/test/sad_test.cc b/test/sad_test.cc
index 3f0f74c..1985e18 100644
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -59,13 +59,13 @@
reference_data8_ = reinterpret_cast<uint8_t*>(
vpx_memalign(kDataAlignment, kDataBufferSize));
second_pred8_ = reinterpret_cast<uint8_t*>(
- vpx_memalign(kDataAlignment, 64*64));
+ vpx_memalign(kDataAlignment, 128*128));
source_data16_ = reinterpret_cast<uint16_t*>(
vpx_memalign(kDataAlignment, kDataBlockSize*sizeof(uint16_t)));
reference_data16_ = reinterpret_cast<uint16_t*>(
vpx_memalign(kDataAlignment, kDataBufferSize*sizeof(uint16_t)));
second_pred16_ = reinterpret_cast<uint16_t*>(
- vpx_memalign(kDataAlignment, 64*64*sizeof(uint16_t)));
+ vpx_memalign(kDataAlignment, 128*128*sizeof(uint16_t)));
}
static void TearDownTestCase() {
@@ -88,9 +88,9 @@
}
protected:
- // Handle blocks up to 4 blocks 64x64 with stride up to 128
+ // Handle up to 4 128x128 blocks, with stride up to 256
static const int kDataAlignment = 16;
- static const int kDataBlockSize = 64 * 128;
+ static const int kDataBlockSize = 128 * 256;
static const int kDataBufferSize = 4 * kDataBlockSize;
virtual void SetUp() {
@@ -485,6 +485,11 @@
//------------------------------------------------------------------------------
// C functions
const SadMxNParam c_tests[] = {
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 128, &vpx_sad128x128_c, -1),
+ make_tuple(128, 64, &vpx_sad128x64_c, -1),
+ make_tuple(64, 128, &vpx_sad64x128_c, -1),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(64, 64, &vpx_sad64x64_c, -1),
make_tuple(64, 32, &vpx_sad64x32_c, -1),
make_tuple(32, 64, &vpx_sad32x64_c, -1),
@@ -499,6 +504,11 @@
make_tuple(4, 8, &vpx_sad4x8_c, -1),
make_tuple(4, 4, &vpx_sad4x4_c, -1),
#if CONFIG_VP9_HIGHBITDEPTH
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 128, &vpx_highbd_sad128x128_c, 8),
+ make_tuple(128, 64, &vpx_highbd_sad128x64_c, 8),
+ make_tuple(64, 128, &vpx_highbd_sad64x128_c, 8),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(64, 64, &vpx_highbd_sad64x64_c, 8),
make_tuple(64, 32, &vpx_highbd_sad64x32_c, 8),
make_tuple(32, 64, &vpx_highbd_sad32x64_c, 8),
@@ -512,6 +522,11 @@
make_tuple(8, 4, &vpx_highbd_sad8x4_c, 8),
make_tuple(4, 8, &vpx_highbd_sad4x8_c, 8),
make_tuple(4, 4, &vpx_highbd_sad4x4_c, 8),
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 128, &vpx_highbd_sad128x128_c, 10),
+ make_tuple(128, 64, &vpx_highbd_sad128x64_c, 10),
+ make_tuple(64, 128, &vpx_highbd_sad64x128_c, 10),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(64, 64, &vpx_highbd_sad64x64_c, 10),
make_tuple(64, 32, &vpx_highbd_sad64x32_c, 10),
make_tuple(32, 64, &vpx_highbd_sad32x64_c, 10),
@@ -525,6 +540,11 @@
make_tuple(8, 4, &vpx_highbd_sad8x4_c, 10),
make_tuple(4, 8, &vpx_highbd_sad4x8_c, 10),
make_tuple(4, 4, &vpx_highbd_sad4x4_c, 10),
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 128, &vpx_highbd_sad128x128_c, 12),
+ make_tuple(128, 64, &vpx_highbd_sad128x64_c, 12),
+ make_tuple(64, 128, &vpx_highbd_sad64x128_c, 12),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(64, 64, &vpx_highbd_sad64x64_c, 12),
make_tuple(64, 32, &vpx_highbd_sad64x32_c, 12),
make_tuple(32, 64, &vpx_highbd_sad32x64_c, 12),
@@ -543,6 +563,11 @@
INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::ValuesIn(c_tests));
const SadMxNAvgParam avg_c_tests[] = {
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 128, &vpx_sad128x128_avg_c, -1),
+ make_tuple(128, 64, &vpx_sad128x64_avg_c, -1),
+ make_tuple(64, 128, &vpx_sad64x128_avg_c, -1),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(64, 64, &vpx_sad64x64_avg_c, -1),
make_tuple(64, 32, &vpx_sad64x32_avg_c, -1),
make_tuple(32, 64, &vpx_sad32x64_avg_c, -1),
@@ -557,6 +582,11 @@
make_tuple(4, 8, &vpx_sad4x8_avg_c, -1),
make_tuple(4, 4, &vpx_sad4x4_avg_c, -1),
#if CONFIG_VP9_HIGHBITDEPTH
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 128, &vpx_highbd_sad128x128_avg_c, 8),
+ make_tuple(128, 64, &vpx_highbd_sad128x64_avg_c, 8),
+ make_tuple(64, 128, &vpx_highbd_sad64x128_avg_c, 8),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(64, 64, &vpx_highbd_sad64x64_avg_c, 8),
make_tuple(64, 32, &vpx_highbd_sad64x32_avg_c, 8),
make_tuple(32, 64, &vpx_highbd_sad32x64_avg_c, 8),
@@ -570,6 +600,11 @@
make_tuple(8, 4, &vpx_highbd_sad8x4_avg_c, 8),
make_tuple(4, 8, &vpx_highbd_sad4x8_avg_c, 8),
make_tuple(4, 4, &vpx_highbd_sad4x4_avg_c, 8),
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 128, &vpx_highbd_sad128x128_avg_c, 10),
+ make_tuple(128, 64, &vpx_highbd_sad128x64_avg_c, 10),
+ make_tuple(64, 128, &vpx_highbd_sad64x128_avg_c, 10),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(64, 64, &vpx_highbd_sad64x64_avg_c, 10),
make_tuple(64, 32, &vpx_highbd_sad64x32_avg_c, 10),
make_tuple(32, 64, &vpx_highbd_sad32x64_avg_c, 10),
@@ -583,6 +618,11 @@
make_tuple(8, 4, &vpx_highbd_sad8x4_avg_c, 10),
make_tuple(4, 8, &vpx_highbd_sad4x8_avg_c, 10),
make_tuple(4, 4, &vpx_highbd_sad4x4_avg_c, 10),
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 128, &vpx_highbd_sad128x128_avg_c, 12),
+ make_tuple(128, 64, &vpx_highbd_sad128x64_avg_c, 12),
+ make_tuple(64, 128, &vpx_highbd_sad64x128_avg_c, 12),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(64, 64, &vpx_highbd_sad64x64_avg_c, 12),
make_tuple(64, 32, &vpx_highbd_sad64x32_avg_c, 12),
make_tuple(32, 64, &vpx_highbd_sad32x64_avg_c, 12),
@@ -601,6 +641,11 @@
INSTANTIATE_TEST_CASE_P(C, SADavgTest, ::testing::ValuesIn(avg_c_tests));
const SadMxNx4Param x4d_c_tests[] = {
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 128, &vpx_sad128x128x4d_c, -1),
+ make_tuple(128, 64, &vpx_sad128x64x4d_c, -1),
+ make_tuple(64, 128, &vpx_sad64x128x4d_c, -1),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(64, 64, &vpx_sad64x64x4d_c, -1),
make_tuple(64, 32, &vpx_sad64x32x4d_c, -1),
make_tuple(32, 64, &vpx_sad32x64x4d_c, -1),
@@ -615,6 +660,11 @@
make_tuple(4, 8, &vpx_sad4x8x4d_c, -1),
make_tuple(4, 4, &vpx_sad4x4x4d_c, -1),
#if CONFIG_VP9_HIGHBITDEPTH
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 128, &vpx_highbd_sad128x128x4d_c, 8),
+ make_tuple(128, 64, &vpx_highbd_sad128x64x4d_c, 8),
+ make_tuple(64, 128, &vpx_highbd_sad64x128x4d_c, 8),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(64, 64, &vpx_highbd_sad64x64x4d_c, 8),
make_tuple(64, 32, &vpx_highbd_sad64x32x4d_c, 8),
make_tuple(32, 64, &vpx_highbd_sad32x64x4d_c, 8),
@@ -628,6 +678,11 @@
make_tuple(8, 4, &vpx_highbd_sad8x4x4d_c, 8),
make_tuple(4, 8, &vpx_highbd_sad4x8x4d_c, 8),
make_tuple(4, 4, &vpx_highbd_sad4x4x4d_c, 8),
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 128, &vpx_highbd_sad128x128x4d_c, 10),
+ make_tuple(128, 64, &vpx_highbd_sad128x64x4d_c, 10),
+ make_tuple(64, 128, &vpx_highbd_sad64x128x4d_c, 10),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(64, 64, &vpx_highbd_sad64x64x4d_c, 10),
make_tuple(64, 32, &vpx_highbd_sad64x32x4d_c, 10),
make_tuple(32, 64, &vpx_highbd_sad32x64x4d_c, 10),
@@ -641,6 +696,11 @@
make_tuple(8, 4, &vpx_highbd_sad8x4x4d_c, 10),
make_tuple(4, 8, &vpx_highbd_sad4x8x4d_c, 10),
make_tuple(4, 4, &vpx_highbd_sad4x4x4d_c, 10),
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 128, &vpx_highbd_sad128x128x4d_c, 12),
+ make_tuple(128, 64, &vpx_highbd_sad128x64x4d_c, 12),
+ make_tuple(64, 128, &vpx_highbd_sad64x128x4d_c, 12),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(64, 64, &vpx_highbd_sad64x64x4d_c, 12),
make_tuple(64, 32, &vpx_highbd_sad64x32x4d_c, 12),
make_tuple(32, 64, &vpx_highbd_sad32x64x4d_c, 12),
diff --git a/test/test.mk b/test/test.mk
index 95dfa16..21b8919 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -9,6 +9,7 @@
LIBVPX_TEST_SRCS-yes += test_vectors.h
LIBVPX_TEST_SRCS-yes += util.h
LIBVPX_TEST_SRCS-yes += video_source.h
+LIBVPX_TEST_SRCS-yes += transform_test_base.h
##
## BLACK BOX TESTS
@@ -165,6 +166,7 @@
ifeq ($(CONFIG_VP10),yes)
LIBVPX_TEST_SRCS-yes += vp10_inv_txfm_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += vp10_dct_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += vp10_fht4x4_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ANS) += vp10_ans_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += sum_squares_test.cc
diff --git a/test/transform_test_base.h b/test/transform_test_base.h
new file mode 100644
index 0000000..cf2facd
--- /dev/null
+++ b/test/transform_test_base.h
@@ -0,0 +1,291 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef TEST_TRANSFORM_TEST_BASE_H_
+#define TEST_TRANSFORM_TEST_BASE_H_
+
+#include "./vpx_config.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vpx/vpx_codec.h"
+
+
+namespace libvpx_test {
+
+// Note:
+// Same constant are defined in vp9/common/vp9_entropy.h and
+// vp10/common/entropy.h. Goal is to make this base class
+// to use for future codec transform testing. But including
+// either of them would lead to compiling error when we do
+// unit test for another codec. Suggest to move the definition
+// to a vpx header file.
+const int kDctMaxValue = 16384;
+
+typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
+ int tx_type);
+
+class TransformTestBase {
+ public:
+ virtual ~TransformTestBase() {}
+
+ protected:
+ virtual void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) = 0;
+
+ virtual void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) = 0;
+
+ void RunAccuracyCheck(int limit) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ uint32_t max_error = 0;
+ int64_t total_error = 0;
+ const int count_test_block = 10000;
+
+ int16_t *test_input_block = reinterpret_cast<int16_t *>
+ (vpx_memalign(16, sizeof(int16_t) * num_coeffs_));
+ tran_low_t *test_temp_block = reinterpret_cast<tran_low_t *>
+ (vpx_memalign(16, sizeof(tran_low_t) * num_coeffs_));
+ uint8_t *dst = reinterpret_cast<uint8_t *>
+ (vpx_memalign(16, sizeof(uint8_t) * num_coeffs_));
+ uint8_t *src = reinterpret_cast<uint8_t *>
+ (vpx_memalign(16, sizeof(uint8_t) * num_coeffs_));
+#if CONFIG_VP9_HIGHBITDEPTH
+ uint16_t *dst16 = reinterpret_cast<uint16_t *>
+ (vpx_memalign(16, sizeof(uint16_t) * num_coeffs_));
+ uint16_t *src16 = reinterpret_cast<uint16_t *>
+ (vpx_memalign(16, sizeof(uint16_t) * num_coeffs_));
+#endif
+
+ for (int i = 0; i < count_test_block; ++i) {
+ // Initialize a test block with input range [-255, 255].
+ for (int j = 0; j < num_coeffs_; ++j) {
+ if (bit_depth_ == VPX_BITS_8) {
+ src[j] = rnd.Rand8();
+ dst[j] = rnd.Rand8();
+ test_input_block[j] = src[j] - dst[j];
+#if CONFIG_VP9_HIGHBITDEPTH
+ } else {
+ src16[j] = rnd.Rand16() & mask_;
+ dst16[j] = rnd.Rand16() & mask_;
+ test_input_block[j] = src16[j] - dst16[j];
+#endif
+ }
+ }
+
+ ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
+ test_temp_block, pitch_));
+ if (bit_depth_ == VPX_BITS_8) {
+ ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
+#if CONFIG_VP9_HIGHBITDEPTH
+ } else {
+ ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block,
+ CONVERT_TO_BYTEPTR(dst16), pitch_));
+#endif
+ }
+
+ for (int j = 0; j < num_coeffs_; ++j) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ const uint32_t diff =
+ bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
+#else
+ ASSERT_EQ(VPX_BITS_8, bit_depth_);
+ const uint32_t diff = dst[j] - src[j];
+#endif
+ const uint32_t error = diff * diff;
+ if (max_error < error)
+ max_error = error;
+ total_error += error;
+ }
+ }
+
+ EXPECT_GE(static_cast<uint32_t>(limit), max_error)
+ << "Error: 4x4 FHT/IHT has an individual round trip error > "
+ << limit;
+
+ EXPECT_GE(count_test_block * limit, total_error)
+ << "Error: 4x4 FHT/IHT has average round trip error > " << limit
+ << " per block";
+
+ vpx_free(test_input_block);
+ vpx_free(test_temp_block);
+ vpx_free(dst);
+ vpx_free(src);
+#if CONFIG_VP9_HIGHBITDEPTH
+ vpx_free(dst16);
+ vpx_free(src16);
+#endif
+ }
+
+ void RunCoeffCheck() {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ const int count_test_block = 5000;
+
+ int16_t *input_block = reinterpret_cast<int16_t *>
+ (vpx_memalign(16, sizeof(int16_t) * num_coeffs_));
+ tran_low_t *output_ref_block = reinterpret_cast<tran_low_t *>
+ (vpx_memalign(16, sizeof(tran_low_t) * num_coeffs_));
+ tran_low_t *output_block = reinterpret_cast<tran_low_t *>
+ (vpx_memalign(16, sizeof(tran_low_t) * num_coeffs_));
+
+ for (int i = 0; i < count_test_block; ++i) {
+ // Initialize a test block with input range [-mask_, mask_].
+ for (int j = 0; j < num_coeffs_; ++j)
+ input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
+
+ fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
+ ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
+
+ // The minimum quant value is 4.
+ for (int j = 0; j < num_coeffs_; ++j) {
+ EXPECT_EQ(output_block[j], output_ref_block[j])
+ << "Error: not bit-exact result at index: " << j
+ << " at test block: " << i;
+ }
+ }
+ vpx_free(input_block);
+ vpx_free(output_ref_block);
+ vpx_free(output_block);
+ }
+
+ void RunMemCheck() {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ const int count_test_block = 5000;
+
+ int16_t *input_extreme_block = reinterpret_cast<int16_t *>
+ (vpx_memalign(16, sizeof(int16_t) * num_coeffs_));
+ tran_low_t *output_ref_block = reinterpret_cast<tran_low_t *>
+ (vpx_memalign(16, sizeof(tran_low_t) * num_coeffs_));
+ tran_low_t *output_block = reinterpret_cast<tran_low_t *>
+ (vpx_memalign(16, sizeof(tran_low_t) * num_coeffs_));
+
+ for (int i = 0; i < count_test_block; ++i) {
+ // Initialize a test block with input range [-mask_, mask_].
+ for (int j = 0; j < num_coeffs_; ++j) {
+ input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
+ }
+ if (i == 0) {
+ for (int j = 0; j < num_coeffs_; ++j)
+ input_extreme_block[j] = mask_;
+ } else if (i == 1) {
+ for (int j = 0; j < num_coeffs_; ++j)
+ input_extreme_block[j] = -mask_;
+ }
+
+ fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
+ ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block,
+ output_block, pitch_));
+
+ int row_length = FindRowLength();
+ // The minimum quant value is 4.
+ for (int j = 0; j < num_coeffs_; ++j) {
+ EXPECT_EQ(output_block[j], output_ref_block[j]);
+ EXPECT_GE(row_length * kDctMaxValue << (bit_depth_ - 8),
+ abs(output_block[j]))
+ << "Error: NxN FDCT has coefficient larger than N*DCT_MAX_VALUE";
+ }
+ }
+ vpx_free(input_extreme_block);
+ vpx_free(output_ref_block);
+ vpx_free(output_block);
+ }
+
+ void RunInvAccuracyCheck(int limit) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ const int count_test_block = 1000;
+
+ int16_t *in = reinterpret_cast<int16_t *>
+ (vpx_memalign(16, sizeof(int16_t) * num_coeffs_));
+ tran_low_t *coeff = reinterpret_cast<tran_low_t *>
+ (vpx_memalign(16, sizeof(tran_low_t) * num_coeffs_));
+ uint8_t *dst = reinterpret_cast<uint8_t *>
+ (vpx_memalign(16, sizeof(uint8_t) * num_coeffs_));
+ uint8_t *src = reinterpret_cast<uint8_t *>
+ (vpx_memalign(16, sizeof(uint8_t) * num_coeffs_));
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ uint16_t *dst16 = reinterpret_cast<uint16_t *>
+ (vpx_memalign(16, sizeof(uint16_t) * num_coeffs_));
+ uint16_t *src16 = reinterpret_cast<uint16_t *>
+ (vpx_memalign(16, sizeof(uint16_t) * num_coeffs_));
+#endif
+
+ for (int i = 0; i < count_test_block; ++i) {
+ // Initialize a test block with input range [-mask_, mask_].
+ for (int j = 0; j < num_coeffs_; ++j) {
+ if (bit_depth_ == VPX_BITS_8) {
+ src[j] = rnd.Rand8();
+ dst[j] = rnd.Rand8();
+ in[j] = src[j] - dst[j];
+#if CONFIG_VP9_HIGHBITDEPTH
+ } else {
+ src16[j] = rnd.Rand16() & mask_;
+ dst16[j] = rnd.Rand16() & mask_;
+ in[j] = src16[j] - dst16[j];
+#endif
+ }
+ }
+
+ fwd_txfm_ref(in, coeff, pitch_, tx_type_);
+
+ if (bit_depth_ == VPX_BITS_8) {
+ ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
+#if CONFIG_VP9_HIGHBITDEPTH
+ } else {
+ ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
+ pitch_));
+#endif
+ }
+
+ for (int j = 0; j < num_coeffs_; ++j) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ const uint32_t diff =
+ bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
+#else
+ const uint32_t diff = dst[j] - src[j];
+#endif
+ const uint32_t error = diff * diff;
+ EXPECT_GE(static_cast<uint32_t>(limit), error)
+ << "Error: 4x4 IDCT has error " << error
+ << " at index " << j;
+ }
+ }
+ vpx_free(in);
+ vpx_free(coeff);
+ vpx_free(dst);
+ vpx_free(src);
+#if CONFIG_VP9_HIGHBITDEPTH
+ vpx_free(src16);
+ vpx_free(dst16);
+#endif
+ }
+
+ int pitch_;
+ int tx_type_;
+ FhtFunc fwd_txfm_ref;
+ vpx_bit_depth_t bit_depth_;
+ int mask_;
+ int num_coeffs_;
+
+ private:
+ // Assume transform size is 4x4, 8x8, 16x16,...
+ int FindRowLength() const {
+ int row = 4;
+ if (16 == num_coeffs_) {
+ row = 4;
+ } else if (64 == num_coeffs_) {
+ row = 8;
+ } else if (256 == num_coeffs_) {
+ row = 16;
+ } else if (1024 == num_coeffs_) {
+ row = 32;
+ }
+ return row;
+ }
+};
+
+} // namespace libvpx_test
+
+#endif // TEST_TRANSFORM_TEST_BASE_H_
diff --git a/test/variance_test.cc b/test/variance_test.cc
index 6f50f78..97c5516 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -759,7 +759,13 @@
INSTANTIATE_TEST_CASE_P(
C, VpxVarianceTest,
- ::testing::Values(make_tuple(6, 6, &vpx_variance64x64_c, 0),
+ ::testing::Values(
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(7, 7, &vpx_variance128x128_c, 0),
+ make_tuple(7, 6, &vpx_variance128x64_c, 0),
+ make_tuple(6, 7, &vpx_variance64x128_c, 0),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(6, 6, &vpx_variance64x64_c, 0),
make_tuple(6, 5, &vpx_variance64x32_c, 0),
make_tuple(5, 6, &vpx_variance32x64_c, 0),
make_tuple(5, 5, &vpx_variance32x32_c, 0),
@@ -775,7 +781,13 @@
INSTANTIATE_TEST_CASE_P(
C, VpxSubpelVarianceTest,
- ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_variance64x64_c, 0),
+ ::testing::Values(
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(7, 7, &vpx_sub_pixel_variance128x128_c, 0),
+ make_tuple(7, 6, &vpx_sub_pixel_variance128x64_c, 0),
+ make_tuple(6, 7, &vpx_sub_pixel_variance64x128_c, 0),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(6, 6, &vpx_sub_pixel_variance64x64_c, 0),
make_tuple(6, 5, &vpx_sub_pixel_variance64x32_c, 0),
make_tuple(5, 6, &vpx_sub_pixel_variance32x64_c, 0),
make_tuple(5, 5, &vpx_sub_pixel_variance32x32_c, 0),
@@ -791,7 +803,13 @@
INSTANTIATE_TEST_CASE_P(
C, VpxSubpelAvgVarianceTest,
- ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_avg_variance64x64_c, 0),
+ ::testing::Values(
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(7, 7, &vpx_sub_pixel_avg_variance128x128_c, 0),
+ make_tuple(7, 6, &vpx_sub_pixel_avg_variance128x64_c, 0),
+ make_tuple(6, 7, &vpx_sub_pixel_avg_variance64x128_c, 0),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(6, 6, &vpx_sub_pixel_avg_variance64x64_c, 0),
make_tuple(6, 5, &vpx_sub_pixel_avg_variance64x32_c, 0),
make_tuple(5, 6, &vpx_sub_pixel_avg_variance32x64_c, 0),
make_tuple(5, 5, &vpx_sub_pixel_avg_variance32x32_c, 0),
@@ -841,7 +859,13 @@
INSTANTIATE_TEST_CASE_P(
C, VpxHBDVarianceTest,
- ::testing::Values(make_tuple(6, 6, &vpx_highbd_12_variance64x64_c, 12),
+ ::testing::Values(
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(7, 7, &vpx_highbd_12_variance128x128_c, 12),
+ make_tuple(7, 6, &vpx_highbd_12_variance128x64_c, 12),
+ make_tuple(6, 7, &vpx_highbd_12_variance64x128_c, 12),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(6, 6, &vpx_highbd_12_variance64x64_c, 12),
make_tuple(6, 5, &vpx_highbd_12_variance64x32_c, 12),
make_tuple(5, 6, &vpx_highbd_12_variance32x64_c, 12),
make_tuple(5, 5, &vpx_highbd_12_variance32x32_c, 12),
@@ -854,6 +878,11 @@
make_tuple(3, 2, &vpx_highbd_12_variance8x4_c, 12),
make_tuple(2, 3, &vpx_highbd_12_variance4x8_c, 12),
make_tuple(2, 2, &vpx_highbd_12_variance4x4_c, 12),
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(7, 7, &vpx_highbd_10_variance128x128_c, 10),
+ make_tuple(7, 6, &vpx_highbd_10_variance128x64_c, 10),
+ make_tuple(6, 7, &vpx_highbd_10_variance64x128_c, 10),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(6, 6, &vpx_highbd_10_variance64x64_c, 10),
make_tuple(6, 5, &vpx_highbd_10_variance64x32_c, 10),
make_tuple(5, 6, &vpx_highbd_10_variance32x64_c, 10),
@@ -867,6 +896,11 @@
make_tuple(3, 2, &vpx_highbd_10_variance8x4_c, 10),
make_tuple(2, 3, &vpx_highbd_10_variance4x8_c, 10),
make_tuple(2, 2, &vpx_highbd_10_variance4x4_c, 10),
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(7, 7, &vpx_highbd_8_variance128x128_c, 8),
+ make_tuple(7, 6, &vpx_highbd_8_variance128x64_c, 8),
+ make_tuple(6, 7, &vpx_highbd_8_variance64x128_c, 8),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(6, 6, &vpx_highbd_8_variance64x64_c, 8),
make_tuple(6, 5, &vpx_highbd_8_variance64x32_c, 8),
make_tuple(5, 6, &vpx_highbd_8_variance32x64_c, 8),
@@ -884,6 +918,11 @@
INSTANTIATE_TEST_CASE_P(
C, VpxHBDSubpelVarianceTest,
::testing::Values(
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(7, 7, &vpx_highbd_8_sub_pixel_variance128x128_c, 8),
+ make_tuple(7, 6, &vpx_highbd_8_sub_pixel_variance128x64_c, 8),
+ make_tuple(6, 7, &vpx_highbd_8_sub_pixel_variance64x128_c, 8),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(6, 6, &vpx_highbd_8_sub_pixel_variance64x64_c, 8),
make_tuple(6, 5, &vpx_highbd_8_sub_pixel_variance64x32_c, 8),
make_tuple(5, 6, &vpx_highbd_8_sub_pixel_variance32x64_c, 8),
@@ -897,6 +936,11 @@
make_tuple(3, 2, &vpx_highbd_8_sub_pixel_variance8x4_c, 8),
make_tuple(2, 3, &vpx_highbd_8_sub_pixel_variance4x8_c, 8),
make_tuple(2, 2, &vpx_highbd_8_sub_pixel_variance4x4_c, 8),
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(7, 7, &vpx_highbd_10_sub_pixel_variance128x128_c, 10),
+ make_tuple(7, 6, &vpx_highbd_10_sub_pixel_variance128x64_c, 10),
+ make_tuple(6, 7, &vpx_highbd_10_sub_pixel_variance64x128_c, 10),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(6, 6, &vpx_highbd_10_sub_pixel_variance64x64_c, 10),
make_tuple(6, 5, &vpx_highbd_10_sub_pixel_variance64x32_c, 10),
make_tuple(5, 6, &vpx_highbd_10_sub_pixel_variance32x64_c, 10),
@@ -910,6 +954,11 @@
make_tuple(3, 2, &vpx_highbd_10_sub_pixel_variance8x4_c, 10),
make_tuple(2, 3, &vpx_highbd_10_sub_pixel_variance4x8_c, 10),
make_tuple(2, 2, &vpx_highbd_10_sub_pixel_variance4x4_c, 10),
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(7, 7, &vpx_highbd_12_sub_pixel_variance128x128_c, 12),
+ make_tuple(7, 6, &vpx_highbd_12_sub_pixel_variance128x64_c, 12),
+ make_tuple(6, 7, &vpx_highbd_12_sub_pixel_variance64x128_c, 12),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(6, 6, &vpx_highbd_12_sub_pixel_variance64x64_c, 12),
make_tuple(6, 5, &vpx_highbd_12_sub_pixel_variance64x32_c, 12),
make_tuple(5, 6, &vpx_highbd_12_sub_pixel_variance32x64_c, 12),
@@ -927,6 +976,11 @@
INSTANTIATE_TEST_CASE_P(
C, VpxHBDSubpelAvgVarianceTest,
::testing::Values(
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(7, 7, &vpx_highbd_8_sub_pixel_avg_variance128x128_c, 8),
+ make_tuple(7, 6, &vpx_highbd_8_sub_pixel_avg_variance128x64_c, 8),
+ make_tuple(6, 7, &vpx_highbd_8_sub_pixel_avg_variance64x128_c, 8),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(6, 6, &vpx_highbd_8_sub_pixel_avg_variance64x64_c, 8),
make_tuple(6, 5, &vpx_highbd_8_sub_pixel_avg_variance64x32_c, 8),
make_tuple(5, 6, &vpx_highbd_8_sub_pixel_avg_variance32x64_c, 8),
@@ -940,6 +994,11 @@
make_tuple(3, 2, &vpx_highbd_8_sub_pixel_avg_variance8x4_c, 8),
make_tuple(2, 3, &vpx_highbd_8_sub_pixel_avg_variance4x8_c, 8),
make_tuple(2, 2, &vpx_highbd_8_sub_pixel_avg_variance4x4_c, 8),
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(7, 7, &vpx_highbd_10_sub_pixel_avg_variance128x128_c, 10),
+ make_tuple(7, 6, &vpx_highbd_10_sub_pixel_avg_variance128x64_c, 10),
+ make_tuple(6, 7, &vpx_highbd_10_sub_pixel_avg_variance64x128_c, 10),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(6, 6, &vpx_highbd_10_sub_pixel_avg_variance64x64_c, 10),
make_tuple(6, 5, &vpx_highbd_10_sub_pixel_avg_variance64x32_c, 10),
make_tuple(5, 6, &vpx_highbd_10_sub_pixel_avg_variance32x64_c, 10),
@@ -953,6 +1012,11 @@
make_tuple(3, 2, &vpx_highbd_10_sub_pixel_avg_variance8x4_c, 10),
make_tuple(2, 3, &vpx_highbd_10_sub_pixel_avg_variance4x8_c, 10),
make_tuple(2, 2, &vpx_highbd_10_sub_pixel_avg_variance4x4_c, 10),
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(7, 7, &vpx_highbd_12_sub_pixel_avg_variance128x128_c, 12),
+ make_tuple(7, 6, &vpx_highbd_12_sub_pixel_avg_variance128x64_c, 12),
+ make_tuple(6, 7, &vpx_highbd_12_sub_pixel_avg_variance64x128_c, 12),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(6, 6, &vpx_highbd_12_sub_pixel_avg_variance64x64_c, 12),
make_tuple(6, 5, &vpx_highbd_12_sub_pixel_avg_variance64x32_c, 12),
make_tuple(5, 6, &vpx_highbd_12_sub_pixel_avg_variance32x64_c, 12),
diff --git a/test/vp10_fht4x4_test.cc b/test/vp10_fht4x4_test.cc
new file mode 100644
index 0000000..d2598f9
--- /dev/null
+++ b/test/vp10_fht4x4_test.cc
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp10_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/transform_test_base.h"
+#include "test/util.h"
+#include "vpx_ports/mem.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+ int tx_type);
+
+using libvpx_test::FhtFunc;
+typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t, int> Ht4x4Param;
+
+void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
+ int tx_type) {
+ vp10_fht4x4_c(in, out, stride, tx_type);
+}
+
+class VP10Trans4x4HT
+ : public libvpx_test::TransformTestBase,
+ public ::testing::TestWithParam<Ht4x4Param> {
+ public:
+ virtual ~VP10Trans4x4HT() {}
+
+ virtual void SetUp() {
+ fwd_txfm_ = GET_PARAM(0);
+ inv_txfm_ = GET_PARAM(1);
+ tx_type_ = GET_PARAM(2);
+ pitch_ = 4;
+ fwd_txfm_ref = fht4x4_ref;
+ bit_depth_ = GET_PARAM(3);
+ mask_ = (1 << bit_depth_) - 1;
+ num_coeffs_ = GET_PARAM(4);
+ }
+ virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+ void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
+ fwd_txfm_(in, out, stride, tx_type_);
+ }
+
+ void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
+ inv_txfm_(out, dst, stride, tx_type_);
+ }
+
+ FhtFunc fwd_txfm_;
+ IhtFunc inv_txfm_;
+};
+
+TEST_P(VP10Trans4x4HT, CoeffCheck) {
+ RunCoeffCheck();
+}
+
+using std::tr1::make_tuple;
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+ SSE2, VP10Trans4x4HT,
+ ::testing::Values(
+#if !CONFIG_EXT_TX
+ make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 0,
+ VPX_BITS_8, 16),
+ make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 1,
+ VPX_BITS_8, 16),
+ make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 2,
+ VPX_BITS_8, 16),
+ make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 3,
+ VPX_BITS_8, 16)));
+#else
+ make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 0,
+ VPX_BITS_8, 16),
+ make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 1,
+ VPX_BITS_8, 16),
+ make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 2,
+ VPX_BITS_8, 16),
+ make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 3,
+ VPX_BITS_8, 16),
+ make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 4,
+ VPX_BITS_8, 16),
+ make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 5,
+ VPX_BITS_8, 16),
+ make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 6,
+ VPX_BITS_8, 16),
+ make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 7,
+ VPX_BITS_8, 16),
+ make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 8,
+ VPX_BITS_8, 16),
+ make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 9,
+ VPX_BITS_8, 16),
+ make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 10,
+ VPX_BITS_8, 16),
+ make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 11,
+ VPX_BITS_8, 16),
+ make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 12,
+ VPX_BITS_8, 16),
+ make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 13,
+ VPX_BITS_8, 16),
+ make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 14,
+ VPX_BITS_8, 16),
+ make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 15,
+ VPX_BITS_8, 16)));
+#endif // !CONFIG_EXT_TX
+#endif // HAVE_SSE2
+
+} // namespace
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index f96aa2e..a1b5683 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -360,7 +360,7 @@
#define USE_MSKTX_FOR_32X32 1
static const int num_ext_tx_set_inter[EXT_TX_SETS_INTER] = {
- 1, 17, 10, 2
+ 1, 19, 12, 2
};
static const int num_ext_tx_set_intra[EXT_TX_SETS_INTRA] = {
1, 17, 10
@@ -421,10 +421,10 @@
// Transform types used in each inter set
static const int ext_tx_used_inter[EXT_TX_SETS_INTER][TX_TYPES] = {
- { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
- { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, },
- { 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, },
- { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, },
+ { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1},
+ { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1},
};
static INLINE int get_ext_tx_types(TX_SIZE tx_size, BLOCK_SIZE bs,
@@ -580,17 +580,12 @@
static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi,
const struct macroblockd_plane *pd) {
#if CONFIG_SUPERTX
- if (!supertx_enabled(mbmi)) {
- return get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type, pd->subsampling_x,
- pd->subsampling_y);
- } else {
+ if (supertx_enabled(mbmi))
return uvsupertx_size_lookup[mbmi->tx_size][pd->subsampling_x]
[pd->subsampling_y];
- }
-#else
+#endif // CONFIG_SUPERTX
return get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type, pd->subsampling_x,
pd->subsampling_y);
-#endif // CONFIG_SUPERTX
}
static INLINE BLOCK_SIZE get_plane_block_size(BLOCK_SIZE bsize,
diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c
index 8bb653c..e4c27a7 100644
--- a/vp10/common/entropymode.c
+++ b/vp10/common/entropymode.c
@@ -838,46 +838,52 @@
[TREE_SIZE(TX_TYPES)] = {
{ // ToDo(yaowu): remove used entry 0.
-IDTX, 2,
- -DCT_DCT, 4,
- -DST_DST, 6,
- 8, 18,
- 10, 12,
- -DST_DCT, -DCT_DST,
+ -V_DCT, 4,
+ -H_DCT, 6,
+ -DCT_DCT, 8,
+ -DST_DST, 10,
+ 12, 22,
14, 16,
+ -DST_DCT, -DCT_DST,
+ 18, 20,
-ADST_DCT, -DCT_ADST,
-FLIPADST_DCT, -DCT_FLIPADST,
- 20, 26,
- 22, 24,
+ 24, 30,
+ 26, 28,
-DST_ADST, -ADST_DST,
-DST_FLIPADST, -FLIPADST_DST,
- 28, 30,
+ 32, 34,
-ADST_ADST, -FLIPADST_FLIPADST,
-ADST_FLIPADST, -FLIPADST_ADST,
}, {
-IDTX, 2,
- -DCT_DCT, 4,
- -DST_DST, 6,
- 8, 18,
- 10, 12,
- -DST_DCT, -DCT_DST,
+ -V_DCT, 4,
+ -H_DCT, 6,
+ -DCT_DCT, 8,
+ -DST_DST, 10,
+ 12, 22,
14, 16,
+ -DST_DCT, -DCT_DST,
+ 18, 20,
-ADST_DCT, -DCT_ADST,
-FLIPADST_DCT, -DCT_FLIPADST,
- 20, 26,
- 22, 24,
+ 24, 30,
+ 26, 28,
-DST_ADST, -ADST_DST,
-DST_FLIPADST, -FLIPADST_DST,
- 28, 30,
+ 32, 34,
-ADST_ADST, -FLIPADST_FLIPADST,
-ADST_FLIPADST, -FLIPADST_ADST,
}, {
-IDTX, 2,
- -DCT_DCT, 4,
- 6, 12,
- 8, 10,
+ -V_DCT, 4,
+ -H_DCT, 6,
+ -DCT_DCT, 8,
+ 10, 16,
+ 12, 14,
-ADST_DCT, -DCT_ADST,
-FLIPADST_DCT, -DCT_FLIPADST,
- 14, 16,
+ 18, 20,
-ADST_ADST, -FLIPADST_FLIPADST,
-ADST_FLIPADST, -FLIPADST_ADST
}, {
@@ -937,33 +943,33 @@
static const vpx_prob
default_inter_ext_tx_prob[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES - 1] = {
{ // ToDo(yaowu): remove unused entry 0.
- { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128 },
- { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128 },
- { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128 },
+ { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128 },
#if EXT_TX_SIZES == 4
- { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128 },
+ { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128 },
#endif
}, {
- { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128 },
- { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128 },
- { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128 },
+ { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128 },
#if EXT_TX_SIZES == 4
- { 12, 160, 16, 144, 160, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128 },
+ { 12, 15, 15, 160, 16, 144, 160, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128 },
#endif
}, {
- { 12, 112, 128, 128, 128, 128, 128, 128, 128 },
- { 12, 112, 128, 128, 128, 128, 128, 128, 128 },
- { 12, 112, 128, 128, 128, 128, 128, 128, 128 },
+ { 12, 15, 15, 112, 128, 128, 128, 128, 128, 128, 128 },
+ { 12, 15, 15, 112, 128, 128, 128, 128, 128, 128, 128 },
+ { 12, 15, 15, 112, 128, 128, 128, 128, 128, 128, 128 },
#if EXT_TX_SIZES == 4
- { 12, 160, 128, 128, 128, 128, 128, 128, 128 },
+ { 12, 15, 15, 160, 128, 128, 128, 128, 128, 128, 128 },
#endif
}, {
{ 12, },
diff --git a/vp10/common/enums.h b/vp10/common/enums.h
index 2233649..4e3a5b1 100644
--- a/vp10/common/enums.h
+++ b/vp10/common/enums.h
@@ -108,6 +108,8 @@
FLIPADST_DST = 14,
DST_DST = 15,
IDTX = 16,
+ V_DCT = 17,
+ H_DCT = 18,
#endif // CONFIG_EXT_TX
TX_TYPES,
} TX_TYPE;
diff --git a/vp10/common/filter.c b/vp10/common/filter.c
index 2023cd6..36a17a8 100644
--- a/vp10/common/filter.c
+++ b/vp10/common/filter.c
@@ -77,6 +77,7 @@
{0, 1, -3, 8, 126, -5, 1, 0},
};
+#if CONFIG_EXT_INTRA
DECLARE_ALIGNED(256, static const InterpKernel,
sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = {
// intfilt 0.8
@@ -97,6 +98,28 @@
{-2, 4, -7, 18, 124, -12, 5, -2},
{-1, 2, -4, 9, 127, -6, 2, -1},
};
+#endif // CONFIG_EXT_INTRA
+
+DECLARE_ALIGNED(256, static const int16_t,
+ sub_pel_filters_10sharp[SUBPEL_SHIFTS][10]) = {
+ // intfilt 0.77
+ {0, 0, 0, 0, 128, 0, 0, 0, 0, 0},
+ {0, -1, 3, -6, 127, 8, -4, 2, -1, 0},
+ {1, -2, 5, -12, 124, 18, -7, 3, -2, 0},
+ {1, -3, 7, -17, 119, 28, -11, 5, -2, 1},
+ {1, -4, 8, -20, 114, 38, -14, 7, -3, 1},
+ {1, -4, 9, -22, 107, 49, -17, 8, -4, 1},
+ {2, -5, 10, -24, 99, 59, -20, 9, -4, 2},
+ {2, -5, 10, -24, 90, 70, -22, 10, -5, 2},
+ {2, -5, 10, -23, 80, 80, -23, 10, -5, 2},
+ {2, -5, 10, -22, 70, 90, -24, 10, -5, 2},
+ {2, -4, 9, -20, 59, 99, -24, 10, -5, 2},
+ {1, -4, 8, -17, 49, 107, -22, 9, -4, 1},
+ {1, -3, 7, -14, 38, 114, -20, 8, -4, 1},
+ {1, -2, 5, -11, 28, 119, -17, 7, -3, 1},
+ {0, -2, 3, -7, 18, 124, -12, 5, -2, 1},
+ {0, -1, 2, -4, 8, 127, -6, 3, -1, 0},
+};
#if SWITCHABLE_FILTERS >= 4
DECLARE_ALIGNED(256, static const InterpKernel,
@@ -145,23 +168,23 @@
#if SWITCHABLE_FILTERS == 5
DECLARE_ALIGNED(16, static const int16_t,
sub_pel_filters_12sharp[SUBPEL_SHIFTS][12]) = {
- // intfilt 0.8
+ // intfilt 0.85
{0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0},
- {0, 1, -1, 3, -7, 127, 8, -4, 2, -1, 0, 0},
- {0, 1, -3, 5, -12, 124, 18, -8, 4, -2, 1, 0},
- {-1, 2, -4, 8, -17, 120, 28, -11, 6, -3, 1, -1},
- {-1, 2, -4, 10, -21, 114, 38, -15, 8, -4, 2, -1},
- {-1, 3, -5, 11, -23, 107, 49, -18, 9, -5, 2, -1},
- {-1, 3, -6, 12, -25, 99, 60, -21, 11, -6, 3, -1},
- {-1, 3, -6, 12, -25, 90, 70, -23, 12, -6, 3, -1},
- {-1, 3, -6, 12, -24, 80, 80, -24, 12, -6, 3, -1},
- {-1, 3, -6, 12, -23, 70, 90, -25, 12, -6, 3, -1},
- {-1, 3, -6, 11, -21, 60, 99, -25, 12, -6, 3, -1},
- {-1, 2, -5, 9, -18, 49, 107, -23, 11, -5, 3, -1},
- {-1, 2, -4, 8, -15, 38, 114, -21, 10, -4, 2, -1},
- {-1, 1, -3, 6, -11, 28, 120, -17, 8, -4, 2, -1},
- {0, 1, -2, 4, -8, 18, 124, -12, 5, -3, 1, 0},
- {0, 0, -1, 2, -4, 8, 127, -7, 3, -1, 1, 0},
+ {0, 1, -2, 3, -7, 127, 8, -4, 2, -1, 1, 0},
+ {-1, 2, -3, 6, -13, 124, 18, -8, 4, -2, 2, -1},
+ {-1, 3, -4, 8, -18, 120, 28, -12, 7, -4, 2, -1},
+ {-1, 3, -6, 10, -21, 115, 38, -15, 8, -5, 3, -1},
+ {-2, 4, -6, 12, -24, 108, 49, -18, 10, -6, 3, -2},
+ {-2, 4, -7, 13, -25, 100, 60, -21, 11, -7, 4, -2},
+ {-2, 4, -7, 13, -26, 91, 71, -24, 13, -7, 4, -2},
+ {-2, 4, -7, 13, -25, 81, 81, -25, 13, -7, 4, -2},
+ {-2, 4, -7, 13, -24, 71, 91, -26, 13, -7, 4, -2},
+ {-2, 4, -7, 11, -21, 60, 100, -25, 13, -7, 4, -2},
+ {-2, 3, -6, 10, -18, 49, 108, -24, 12, -6, 4, -2},
+ {-1, 3, -5, 8, -15, 38, 115, -21, 10, -6, 3, -1},
+ {-1, 2, -4, 7, -12, 28, 120, -18, 8, -4, 3, -1},
+ {-1, 2, -2, 4, -8, 18, 124, -13, 6, -3, 2, -1},
+ {0, 1, -1, 2, -4, 8, 127, -7, 3, -2, 1, 0},
};
#endif
@@ -245,7 +268,7 @@
vp10_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = {
{(const int16_t*)sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS},
{(const int16_t*)sub_pel_filters_8smooth, SUBPEL_TAPS, SUBPEL_SHIFTS},
- {(const int16_t*)sub_pel_filters_8sharp, SUBPEL_TAPS, SUBPEL_SHIFTS},
+ {(const int16_t*)sub_pel_filters_10sharp, 10, SUBPEL_SHIFTS},
#if SWITCHABLE_FILTERS >= 4
{(const int16_t*)sub_pel_filters_8smooth2, SUBPEL_TAPS, SUBPEL_SHIFTS},
#endif
@@ -266,7 +289,7 @@
#if USE_TEMPORALFILTER_12TAP
static const InterpFilterParams vp10_interp_temporalfilter_12tap = {
- (const int16_t*)sub_pel_filters_temporalfilter_12, 12, SUBPEL_SHIFTS
+ (const int16_t*)sub_pel_filters_temporalfilter_12, 12, SUBPEL_SHIFTS
};
#endif // USE_TEMPORALFILTER_12TAP
diff --git a/vp10/common/idct.c b/vp10/common/idct.c
index dbb50fb..a941f64 100644
--- a/vp10/common/idct.c
+++ b/vp10/common/idct.c
@@ -326,11 +326,79 @@
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // CONFIG_EXT_TX
-// Inverse identiy transform and add.
+// Inverse identity transform and add.
static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
- int bs) {
+ int bs, int tx_type) {
int r, c;
const int shift = bs < 32 ? 3 : 2;
+
+ tran_low_t temp_in[32], temp_out[32];
+ transform_2d ht = {idct4_c, idct4_c};
+ int out_scale = 1;
+ int coeff_stride = 0;
+
+ switch (bs) {
+ case 4:
+ ht.cols = idct4_c;
+ ht.rows = idct4_c;
+ out_scale = cospi_16_64 >> 3;
+ coeff_stride = 4;
+ break;
+ case 8:
+ ht.cols = idct8_c;
+ ht.rows = idct8_c;
+ out_scale = (1 << (DCT_CONST_BITS - 4));
+ coeff_stride = 8;
+ break;
+ case 16:
+ ht.cols = idct16_c;
+ ht.rows = idct16_c;
+ out_scale = cospi_16_64 >> 4;
+ coeff_stride = 16;
+ break;
+ case 32:
+ ht.cols = idct32_c;
+ ht.rows = idct32_c;
+ out_scale = (1 << (DCT_CONST_BITS - 4));
+ coeff_stride = 32;
+ break;
+ default:
+ assert(0);
+ }
+
+ // Columns
+ if (tx_type == V_DCT) {
+ for (c = 0; c < bs; ++c) {
+ for (r = 0; r < bs; ++r)
+ temp_in[r] = input[r * coeff_stride + c];
+ ht.cols(temp_in, temp_out);
+
+ for (r = 0; r < bs; ++r) {
+ tran_high_t temp = (tran_high_t)temp_out[r] * out_scale;
+ temp >>= DCT_CONST_BITS;
+ dest[r * stride + c] = clip_pixel_add(dest[r * stride + c],
+ (tran_low_t)temp);
+ }
+ }
+ return;
+ }
+
+ if (tx_type == H_DCT) {
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c)
+ temp_in[c] = input[r * coeff_stride + c];
+ ht.rows(temp_in, temp_out);
+
+ for (c = 0; c < bs; ++c) {
+ tran_high_t temp = (tran_high_t)temp_out[c] * out_scale;
+ temp >>= DCT_CONST_BITS;
+ dest[r * stride + c] = clip_pixel_add(dest[r * stride + c],
+ (tran_low_t)temp);
+ }
+ }
+ return;
+ }
+
for (r = 0; r < bs; ++r) {
for (c = 0; c < bs; ++c)
dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
@@ -360,6 +428,8 @@
case DST_DCT:
case DST_ADST:
case ADST_DST:
+ case V_DCT:
+ case H_DCT:
break;
case FLIPADST_DCT:
case FLIPADST_ADST:
@@ -1031,8 +1101,10 @@
// Use C version since DST only exists in C code
vp10_iht4x4_16_add_c(input, dest, stride, tx_type);
break;
+ case H_DCT:
+ case V_DCT:
case IDTX:
- inv_idtx_add_c(input, dest, stride, 4);
+ inv_idtx_add_c(input, dest, stride, 4, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
@@ -1070,8 +1142,10 @@
// Use C version since DST only exists in C code
vp10_iht8x8_64_add_c(input, dest, stride, tx_type);
break;
+ case H_DCT:
+ case V_DCT:
case IDTX:
- inv_idtx_add_c(input, dest, stride, 8);
+ inv_idtx_add_c(input, dest, stride, 8, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
@@ -1109,8 +1183,10 @@
// Use C version since DST only exists in C code
vp10_iht16x16_256_add_c(input, dest, stride, tx_type);
break;
+ case H_DCT:
+ case V_DCT:
case IDTX:
- inv_idtx_add_c(input, dest, stride, 16);
+ inv_idtx_add_c(input, dest, stride, 16, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
@@ -1143,8 +1219,10 @@
case DST_FLIPADST:
vp10_iht32x32_1024_add_c(input, dest, stride, tx_type);
break;
+ case H_DCT:
+ case V_DCT:
case IDTX:
- inv_idtx_add_c(input, dest, stride, 32);
+ inv_idtx_add_c(input, dest, stride, 32, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
diff --git a/vp10/common/mvref_common.c b/vp10/common/mvref_common.c
index 5a2def0..c67beed 100644
--- a/vp10/common/mvref_common.c
+++ b/vp10/common/mvref_common.c
@@ -17,6 +17,7 @@
const MV_REFERENCE_FRAME rf[2],
uint8_t *refmv_count,
CANDIDATE_MV *ref_mv_stack,
+ const int use_hp,
int len, int block, int col) {
const int weight = len;
int index = 0, ref;
@@ -28,6 +29,8 @@
if (candidate->ref_frame[ref] == rf[0]) {
int_mv this_refmv =
get_sub_block_mv(candidate_mi, ref, col, block);
+ lower_mv_precision(&this_refmv.as_mv, use_hp);
+
for (index = 0; index < *refmv_count; ++index)
if (ref_mv_stack[index].this_mv.as_int == this_refmv.as_int)
break;
@@ -168,8 +171,9 @@
num_8x8_blocks_wide_lookup[candidate->sb_type]);
newmv_count += add_ref_mv_candidate(candidate_mi, candidate, rf,
- refmv_count, ref_mv_stack, len,
- block, mi_pos.col);
+ refmv_count, ref_mv_stack,
+ cm->allow_high_precision_mv,
+ len, block, mi_pos.col);
i += len;
} else {
++i;
@@ -202,8 +206,9 @@
num_8x8_blocks_high_lookup[candidate->sb_type]);
newmv_count += add_ref_mv_candidate(candidate_mi, candidate, rf,
- refmv_count, ref_mv_stack, len,
- block, mi_pos.col);
+ refmv_count, ref_mv_stack,
+ cm->allow_high_precision_mv,
+ len, block, mi_pos.col);
i += len;
} else {
++i;
@@ -234,8 +239,9 @@
const int len = 1;
newmv_count += add_ref_mv_candidate(candidate_mi, candidate, rf,
- refmv_count, ref_mv_stack, len,
- block, mi_pos.col);
+ refmv_count, ref_mv_stack,
+ cm->allow_high_precision_mv,
+ len, block, mi_pos.col);
} // Analyze a single 8x8 block motion information.
return newmv_count;
}
@@ -355,9 +361,12 @@
for (ref = 0; ref < 2; ++ref) {
if (prev_frame_mvs->ref_frame[ref] == ref_frame) {
+ int_mv this_refmv = prev_frame_mvs->mv[ref];
+ lower_mv_precision(&this_refmv.as_mv,
+ cm->allow_high_precision_mv);
+
for (idx = 0; idx < *refmv_count; ++idx)
- if (prev_frame_mvs->mv[ref].as_int ==
- ref_mv_stack[idx].this_mv.as_int)
+ if (this_refmv.as_int == ref_mv_stack[idx].this_mv.as_int)
break;
if (idx < *refmv_count)
@@ -365,7 +374,7 @@
if (idx == *refmv_count &&
*refmv_count < MAX_REF_MV_STACK_SIZE) {
- ref_mv_stack[idx].this_mv.as_int = prev_frame_mvs->mv[ref].as_int;
+ ref_mv_stack[idx].this_mv.as_int = this_refmv.as_int;
ref_mv_stack[idx].weight = 2;
++(*refmv_count);
diff --git a/vp10/common/reconinter.c b/vp10/common/reconinter.c
index 97046bb..72e6ae0 100644
--- a/vp10/common/reconinter.c
+++ b/vp10/common/reconinter.c
@@ -792,7 +792,7 @@
for (row = 0; row < bh; ++row) {
for (col = 0; col < bw; ++col)
- dst16[col] = (mask[0][row] * dst16[col] + mask[1][row] * tmp16[col]
+ dst16[col] = (mask[0][col] * dst16[col] + mask[1][col] * tmp16[col]
+ 32) >> 6;
dst16 += dst_stride;
tmp16 += tmp_stride;
diff --git a/vp10/common/reconintra.c b/vp10/common/reconintra.c
index 76d50c6..e28f01c 100644
--- a/vp10/common/reconintra.c
+++ b/vp10/common/reconintra.c
@@ -1345,7 +1345,7 @@
const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
const struct macroblockd_plane *const pd = &xd->plane[plane];
const int right_available =
- mi_col + (bw >> !pd->subsampling_x) < xd->tile.mi_col_end;
+ mi_col + (1 << mi_width_log2_lookup[bsize]) < xd->tile.mi_col_end;
const int have_right = vp10_has_right(bsize, mi_row, mi_col,
right_available,
tx_size, row_off, col_off,
diff --git a/vp10/common/scan.c b/vp10/common/scan.c
index 672ac1d..21d291f 100644
--- a/vp10/common/scan.c
+++ b/vp10/common/scan.c
@@ -1790,6 +1790,8 @@
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors},
+ {col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors},
}, { // TX_8X8
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors},
@@ -1808,6 +1810,8 @@
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors},
+ {col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors},
}, { // TX_16X16
{default_scan_16x16, vp10_default_iscan_16x16,
default_scan_16x16_neighbors},
@@ -1841,6 +1845,8 @@
default_scan_16x16_neighbors},
{default_scan_16x16, vp10_default_iscan_16x16,
default_scan_16x16_neighbors},
+ {row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors},
+ {col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors},
}, { // TX_32X32
{default_scan_32x32, vp10_default_iscan_32x32,
default_scan_32x32_neighbors},
@@ -1876,6 +1882,10 @@
qtr_scan_32x32_neighbors},
{default_scan_32x32, vp10_default_iscan_32x32,
default_scan_32x32_neighbors},
+ {h2_scan_32x32, vp10_h2_iscan_32x32,
+ h2_scan_32x32_neighbors},
+ {v2_scan_32x32, vp10_v2_iscan_32x32,
+ v2_scan_32x32_neighbors},
}
};
@@ -1898,6 +1908,8 @@
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors},
+ {col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors},
}, { // TX_8X8
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
@@ -1916,6 +1928,8 @@
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors},
+ {col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors},
}, { // TX_16X16
{default_scan_16x16, vp10_default_iscan_16x16,
default_scan_16x16_neighbors},
@@ -1951,6 +1965,8 @@
default_scan_16x16_neighbors},
{default_scan_16x16, vp10_default_iscan_16x16,
default_scan_16x16_neighbors},
+ {row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors},
+ {col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors},
}, { // TX_32X32
{default_scan_32x32, vp10_default_iscan_32x32,
default_scan_32x32_neighbors},
@@ -1986,6 +2002,10 @@
qtr_scan_32x32_neighbors},
{default_scan_32x32, vp10_default_iscan_32x32,
default_scan_32x32_neighbors},
+ {h2_scan_32x32, vp10_h2_iscan_32x32,
+ h2_scan_32x32_neighbors},
+ {v2_scan_32x32, vp10_v2_iscan_32x32,
+ v2_scan_32x32_neighbors},
}
};
diff --git a/vp10/common/vp10_rtcd_defs.pl b/vp10/common/vp10_rtcd_defs.pl
index c9f0295..2344ce2 100644
--- a/vp10/common/vp10_rtcd_defs.pl
+++ b/vp10/common/vp10_rtcd_defs.pl
@@ -426,6 +426,9 @@
specialize qw/vp10_fwht4x4 msa/, "$mmx_x86inc";
}
+add_proto qw/void vp10_fwd_idtx/, "const int16_t *src_diff, tran_low_t *coeff, int stride, int bs, int tx_type";
+ specialize qw/vp10_fwd_idtx/;
+
# Inverse transform
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# Note as optimized versions of these functions are added we need to add a check to ensure
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index af6016a..64ac3cc 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -811,7 +811,9 @@
subpel_y, sf, w, h, ref, interp_filter, xs, ys);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
+#endif // (CONFIG_SUPERTX || CONFIG_OBMC)
+#if CONFIG_SUPERTX
static void dec_build_inter_predictors_sb(VP10Decoder *const pbi,
MACROBLOCKD *xd,
int mi_row, int mi_col) {
@@ -881,9 +883,7 @@
sb_type);
#endif // CONFIG_EXT_INTER
}
-#endif // (CONFIG_SUPERTX || CONFIG_OBMC)
-#if CONFIG_SUPERTX
static void dec_build_inter_predictors_sb_sub8x8(VP10Decoder *const pbi,
MACROBLOCKD *xd,
int mi_row, int mi_col,
diff --git a/vp10/encoder/block.h b/vp10/encoder/block.h
index 7921300..ce650b1 100644
--- a/vp10/encoder/block.h
+++ b/vp10/encoder/block.h
@@ -143,6 +143,9 @@
uint8_t zcoeff_blk[TX_SIZES][256];
#if CONFIG_VAR_TX
uint8_t blk_skip[MAX_MB_PLANE][256];
+#if CONFIG_REF_MV
+ uint8_t blk_skip_drl[MAX_MB_PLANE][256];
+#endif
#endif
int skip;
diff --git a/vp10/encoder/dct.c b/vp10/encoder/dct.c
index 333adbb..31a4c87 100644
--- a/vp10/encoder/dct.c
+++ b/vp10/encoder/dct.c
@@ -1315,6 +1315,8 @@
case DST_DCT:
case DST_ADST:
case ADST_DST:
+ case H_DCT:
+ case V_DCT:
break;
case FLIPADST_DCT:
case FLIPADST_ADST:
@@ -1758,6 +1760,95 @@
}
}
+// Forward identity transform.
+void vp10_fwd_idtx_c(const int16_t *src_diff,
+ tran_low_t *coeff, int stride,
+ int bs, int tx_type) {
+ int r, c;
+ const int shift = bs < 32 ? 3 : 2;
+
+ const int16_t *input = src_diff;
+ tran_low_t *output = coeff;
+
+ int i, j;
+ tran_low_t temp_in[32], temp_out[32];
+ transform_2d ht = {fdct4, fdct4};
+ int in_scale = 1;
+ int out_scale = 1;
+ int coeff_stride = 0;
+
+ switch (bs) {
+ case 4:
+ ht.cols = fdct4;
+ ht.rows = fdct4;
+ in_scale = 16;
+ out_scale = cospi_16_64 >> 1;
+ coeff_stride = 4;
+ break;
+ case 8:
+ ht.cols = fdct8;
+ ht.rows = fdct8;
+ in_scale = 4;
+ out_scale = (1 << DCT_CONST_BITS);
+ coeff_stride = 8;
+ break;
+ case 16:
+ ht.cols = fdct16;
+ ht.rows = fdct16;
+ in_scale = 4;
+ out_scale = cospi_16_64;
+ coeff_stride = 16;
+ break;
+ case 32:
+ ht.cols = fdct32;
+ ht.rows = fdct32;
+ in_scale = 4;
+ out_scale = (1 << (DCT_CONST_BITS - 2));
+ coeff_stride = 32;
+ break;
+ default:
+ assert(0);
+ }
+
+ // Columns
+ if (tx_type == V_DCT) {
+ for (i = 0; i < bs; ++i) {
+ for (j = 0; j < bs; ++j)
+ temp_in[j] = input[j * stride + i] * in_scale;
+ ht.cols(temp_in, temp_out);
+
+ for (j = 0; j < bs; ++j) {
+ tran_high_t temp = (tran_high_t)temp_out[j] * out_scale;
+ temp >>= DCT_CONST_BITS;
+ output[j * coeff_stride + i] = (tran_low_t)temp;
+ }
+ }
+ return;
+ }
+
+ // Rows
+ if (tx_type == H_DCT) {
+ for (j = 0; j < bs; ++j) {
+ for (i = 0; i < bs; ++i)
+ temp_in[i] = input[j * stride + i] * in_scale;
+ ht.rows(temp_in, temp_out);
+
+ for (i = 0; i < bs; ++i) {
+ tran_high_t temp = (tran_high_t)temp_out[i] * out_scale;
+ temp >>= DCT_CONST_BITS;
+ output[j * coeff_stride + i] = (tran_low_t)temp;
+ }
+ }
+ return;
+ }
+
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) coeff[c] = src_diff[c] << shift;
+ src_diff += stride;
+ coeff += bs;
+ }
+}
+
#if CONFIG_VP9_HIGHBITDEPTH
void vp10_highbd_fht32x32_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index d3ea94b..8c7af63 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -3987,6 +3987,9 @@
vp10_zero(x->skip_txfm);
#if CONFIG_VAR_TX
vp10_zero(x->blk_skip);
+#if CONFIG_REF_MV
+ vp10_zero(x->blk_skip_drl);
+#endif
#endif
{
diff --git a/vp10/encoder/hybrid_fwd_txfm.c b/vp10/encoder/hybrid_fwd_txfm.c
index 03d9c6d..029240f 100644
--- a/vp10/encoder/hybrid_fwd_txfm.c
+++ b/vp10/encoder/hybrid_fwd_txfm.c
@@ -33,21 +33,6 @@
}
#endif // CONFIG_VP9_HIGHBITDEPTH
-#if CONFIG_EXT_TX
-// Forward identity transform.
-static void fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride,
- int bs) {
- int r, c;
- const int shift = bs < 32 ? 3 : 2;
-
- for (r = 0; r < bs; ++r) {
- for (c = 0; c < bs; ++c) coeff[c] = src_diff[c] << shift;
- src_diff += stride;
- coeff += bs;
- }
-}
-#endif // CONFIG_EXT_TX
-
void vp10_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type, int lossless) {
if (lossless) {
@@ -78,8 +63,10 @@
case FLIPADST_DST:
vp10_fht4x4(src_diff, coeff, diff_stride, tx_type);
break;
+ case H_DCT:
+ case V_DCT:
case IDTX:
- fwd_idtx_c(src_diff, coeff, diff_stride, 4);
+ vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 4, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
@@ -116,8 +103,10 @@
case FLIPADST_DST:
vp10_fht8x8(src_diff, coeff, diff_stride, tx_type);
break;
+ case H_DCT:
+ case V_DCT:
case IDTX:
- fwd_idtx_c(src_diff, coeff, diff_stride, 8);
+ vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 8, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
@@ -157,8 +146,10 @@
// Use C version since DST exists only in C
vp10_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
break;
+ case H_DCT:
+ case V_DCT:
case IDTX:
- fwd_idtx_c(src_diff, coeff, diff_stride, 16);
+ vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 16, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
@@ -195,8 +186,10 @@
case FLIPADST_DST:
vp10_fht32x32_c(src_diff, coeff, diff_stride, tx_type);
break;
+ case H_DCT:
+ case V_DCT:
case IDTX:
- fwd_idtx_c(src_diff, coeff, diff_stride, 32);
+ vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 32, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
@@ -240,7 +233,7 @@
vp10_highbd_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
break;
case IDTX:
- fwd_idtx_c(src_diff, coeff, diff_stride, 4);
+ vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 4, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
@@ -282,7 +275,7 @@
vp10_highbd_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
break;
case IDTX:
- fwd_idtx_c(src_diff, coeff, diff_stride, 8);
+ vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 8, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
@@ -324,7 +317,7 @@
vp10_highbd_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
break;
case IDTX:
- fwd_idtx_c(src_diff, coeff, diff_stride, 16);
+ vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 16, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
@@ -362,7 +355,7 @@
vp10_highbd_fht32x32_c(src_diff, coeff, diff_stride, tx_type);
break;
case IDTX:
- fwd_idtx_c(src_diff, coeff, diff_stride, 32);
+ vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 32, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 96edc0f..b86f6c0 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -354,6 +354,126 @@
}
}
+#if CONFIG_EXT_TX
+typedef enum {
+ DCT_1D = 0,
+ ADST_1D = 1,
+ FLIPADST_1D = 2,
+ DST_1D = 3,
+ TX_TYPES_1D = 4,
+} TX_TYPE_1D;
+
+static int prune_two_for_sby(const VP10_COMP *cpi,
+ BLOCK_SIZE bsize,
+ MACROBLOCK *x,
+ MACROBLOCKD *xd) {
+ (void) cpi;
+ (void) bsize;
+ (void) x;
+ (void) xd;
+ return 3;
+}
+
+static int prune_three_for_sby(const VP10_COMP *cpi,
+ BLOCK_SIZE bsize,
+ MACROBLOCK *x,
+ MACROBLOCKD *xd) {
+ (void) cpi;
+ (void) bsize;
+ (void) x;
+ (void) xd;
+ return 7;
+}
+
+#endif // CONFIG_EXT_TX
+
+static int prune_one_for_sby(const VP10_COMP *cpi,
+ BLOCK_SIZE bsize,
+ MACROBLOCK *x,
+ MACROBLOCKD *xd) {
+ (void) cpi;
+ (void) bsize;
+ (void) x;
+ (void) xd;
+ return 1;
+}
+
+static int prune_tx_types(const VP10_COMP *cpi,
+ BLOCK_SIZE bsize,
+ MACROBLOCK *x,
+ MACROBLOCKD *xd) {
+ switch (cpi->sf.tx_type_search) {
+ case NO_PRUNE:
+ return 0;
+ break;
+ case PRUNE_ONE :
+ return prune_one_for_sby(cpi, bsize, x, xd);
+ break;
+ #if CONFIG_EXT_TX
+ case PRUNE_TWO :
+ return prune_two_for_sby(cpi, bsize, x, xd);
+ break;
+ case PRUNE_THREE :
+ return prune_three_for_sby(cpi, bsize, x, xd);
+ break;
+ #endif
+ }
+ assert(0);
+ return 0;
+}
+
+static int do_tx_type_search(TX_TYPE tx_type,
+ int prune) {
+// TODO(sarahparker) implement for non ext tx
+#if CONFIG_EXT_TX
+ static TX_TYPE_1D vtx_tab[TX_TYPES] = {
+ DCT_1D,
+ ADST_1D,
+ DCT_1D,
+ ADST_1D,
+ FLIPADST_1D,
+ DCT_1D,
+ FLIPADST_1D,
+ ADST_1D,
+ FLIPADST_1D,
+ DST_1D,
+ DCT_1D,
+ DST_1D,
+ ADST_1D,
+ DST_1D,
+ FLIPADST_1D,
+ DST_1D,
+ };
+ static TX_TYPE_1D htx_tab[TX_TYPES] = {
+ DCT_1D,
+ DCT_1D,
+ ADST_1D,
+ ADST_1D,
+ DCT_1D,
+ FLIPADST_1D,
+ FLIPADST_1D,
+ FLIPADST_1D,
+ ADST_1D,
+ DCT_1D,
+ DST_1D,
+ ADST_1D,
+ DST_1D,
+ FLIPADST_1D,
+ DST_1D,
+ DST_1D,
+ };
+ if (tx_type == IDTX)
+ return 1;
+ return !(((prune >> vtx_tab[tx_type]) & 1) |
+ ((prune >> (htx_tab[tx_type] + TX_TYPES_1D)) & 1));
+#else
+ // temporary to avoid compiler warnings
+ (void) tx_type;
+ (void) prune;
+ return 1;
+#endif
+}
+
static void model_rd_for_sb(VP10_COMP *cpi, BLOCK_SIZE bsize,
MACROBLOCK *x, MACROBLOCKD *xd,
int *out_rate_sum, int64_t *out_dist_sum,
@@ -394,8 +514,10 @@
// low enough so that we can skip the mode search.
const int64_t low_dc_thr = VPXMIN(50, dc_thr >> 2);
const int64_t low_ac_thr = VPXMIN(80, ac_thr >> 2);
- int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
- int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
+ int bw_shift = (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
+ int bh_shift = (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
+ int bw = 1 << bw_shift;
+ int bh = 1 << bh_shift;
int idx, idy;
int lw = b_width_log2_lookup[unit_size] + 2;
int lh = b_height_log2_lookup[unit_size] + 2;
@@ -406,7 +528,7 @@
for (idx = 0; idx < bw; ++idx) {
uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
- int block_idx = (idy << 1) + idx;
+ int block_idx = (idy << bw_shift) + idx;
int low_err_skip = 0;
var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
@@ -988,11 +1110,14 @@
vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
int s0 = vp10_cost_bit(skip_prob, 0);
int s1 = vp10_cost_bit(skip_prob, 1);
+ const int is_inter = is_inter_block(mbmi);
+ int prune = 0;
#if CONFIG_EXT_TX
int ext_tx_set;
#endif // CONFIG_EXT_TX
- const int is_inter = is_inter_block(mbmi);
+ if (is_inter && cpi->sf.tx_type_search > 0)
+ prune = prune_tx_types(cpi, bs, x, xd);
mbmi->tx_size = VPXMIN(max_tx_size, largest_tx_size);
#if CONFIG_EXT_TX
@@ -1004,6 +1129,15 @@
if (is_inter) {
if (!ext_tx_used_inter[ext_tx_set][tx_type])
continue;
+ if (cpi->sf.tx_type_search > 0) {
+ if (!do_tx_type_search(tx_type, prune))
+ continue;
+ } else if (ext_tx_set == 1 &&
+ tx_type >= DST_ADST && tx_type < IDTX &&
+ best_tx_type == DCT_DCT) {
+ tx_type = IDTX - 1;
+ continue;
+ }
} else {
if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
if (tx_type != intra_mode_to_tx_type_context[mbmi->mode])
@@ -1011,15 +1145,15 @@
}
if (!ext_tx_used_intra[ext_tx_set][tx_type])
continue;
+ if (ext_tx_set == 1 &&
+ tx_type >= DST_ADST && tx_type < IDTX &&
+ best_tx_type == DCT_DCT) {
+ tx_type = IDTX - 1;
+ continue;
+ }
}
mbmi->tx_type = tx_type;
- if (ext_tx_set == 1 &&
- mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX &&
- best_tx_type == DCT_DCT) {
- tx_type = IDTX - 1;
- continue;
- }
txfm_rd_in_plane(x,
cpi,
@@ -1067,12 +1201,15 @@
cpi->sf.use_fast_coef_costing);
if (r == INT_MAX)
continue;
- if (is_inter)
+ if (is_inter) {
r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
- else
+ if (cpi->sf.tx_type_search > 0 && !do_tx_type_search(tx_type, prune))
+ continue;
+ } else {
r += cpi->intra_tx_type_costs[mbmi->tx_size]
[intra_mode_to_tx_type_context[mbmi->mode]]
[mbmi->tx_type];
+ }
if (s)
this_rd = RDCOST(x->rdmult, x->rddiv, s1, psse);
else
@@ -1150,13 +1287,17 @@
TX_SIZE best_tx = max_tx_size;
int start_tx, end_tx;
const int tx_select = cm->tx_mode == TX_MODE_SELECT;
- TX_TYPE tx_type, best_tx_type = DCT_DCT;
const int is_inter = is_inter_block(mbmi);
+ const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
+ TX_TYPE tx_type, best_tx_type = DCT_DCT;
+ int prune = 0;
#if CONFIG_EXT_TX
int ext_tx_set;
#endif // CONFIG_EXT_TX
- const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
+ if (is_inter && cpi->sf.tx_type_search > 0)
+ prune = prune_tx_types(cpi, bs, x, xd);
+
assert(skip_prob > 0);
s0 = vp10_cost_bit(skip_prob, 0);
s1 = vp10_cost_bit(skip_prob, 1);
@@ -1187,6 +1328,15 @@
if (is_inter) {
if (!ext_tx_used_inter[ext_tx_set][tx_type])
continue;
+ if (cpi->sf.tx_type_search > 0) {
+ if (!do_tx_type_search(tx_type, prune))
+ continue;
+ } else if (ext_tx_set == 1 &&
+ tx_type >= DST_ADST && tx_type < IDTX &&
+ best_tx_type == DCT_DCT) {
+ tx_type = IDTX - 1;
+ continue;
+ }
} else {
if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
if (tx_type != intra_mode_to_tx_type_context[mbmi->mode])
@@ -1194,14 +1344,14 @@
}
if (!ext_tx_used_intra[ext_tx_set][tx_type])
continue;
+ if (ext_tx_set == 1 &&
+ tx_type >= DST_ADST && tx_type < IDTX &&
+ best_tx_type == DCT_DCT) {
+ tx_type = IDTX - 1;
+ break;
+ }
}
mbmi->tx_type = tx_type;
- if (ext_tx_set == 1 &&
- mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX &&
- best_tx_type == DCT_DCT) {
- tx_type = IDTX - 1;
- break;
- }
txfm_rd_in_plane(x,
cpi,
&r, &d, &s,
@@ -1233,12 +1383,15 @@
if (n < TX_32X32 &&
!xd->lossless[xd->mi[0]->mbmi.segment_id] &&
r != INT_MAX && !FIXED_TX_TYPE) {
- if (is_inter)
+ if (is_inter) {
r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
- else
+ if (cpi->sf.tx_type_search > 0 && !do_tx_type_search(tx_type, prune))
+ continue;
+ } else {
r += cpi->intra_tx_type_costs[mbmi->tx_size]
[intra_mode_to_tx_type_context[mbmi->mode]]
[mbmi->tx_type];
+ }
}
#endif // CONFIG_EXT_TX
@@ -2803,9 +2956,13 @@
uint8_t best_blk_skip[256];
const int n4 = 1 << (num_pels_log2_lookup[bsize] - 4);
int idx, idy;
+ int prune = 0;
#if CONFIG_EXT_TX
int ext_tx_set = get_ext_tx_set(max_tx_size, bsize, is_inter);
-#endif
+#endif // CONFIG_EXT_TX
+
+ if (is_inter && cpi->sf.tx_type_search > 0)
+ prune = prune_tx_types(cpi, bsize, x, xd);
*distortion = INT64_MAX;
*rate = INT_MAX;
@@ -2821,6 +2978,15 @@
if (is_inter) {
if (!ext_tx_used_inter[ext_tx_set][tx_type])
continue;
+ if (cpi->sf.tx_type_search > 0) {
+ if (!do_tx_type_search(tx_type, prune))
+ continue;
+ } else if (ext_tx_set == 1 &&
+ tx_type >= DST_ADST && tx_type < IDTX &&
+ best_tx_type == DCT_DCT) {
+ tx_type = IDTX - 1;
+ continue;
+ }
} else {
if (!ALLOW_INTRA_EXT_TX && bsize >= BLOCK_8X8) {
if (tx_type != intra_mode_to_tx_type_context[mbmi->mode])
@@ -2828,17 +2994,16 @@
}
if (!ext_tx_used_intra[ext_tx_set][tx_type])
continue;
+ if (ext_tx_set == 1 &&
+ tx_type >= DST_ADST && tx_type < IDTX &&
+ best_tx_type == DCT_DCT) {
+ tx_type = IDTX - 1;
+ break;
+ }
}
mbmi->tx_type = tx_type;
- if (ext_tx_set == 1 &&
- mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX &&
- best_tx_type == DCT_DCT) {
- tx_type = IDTX - 1;
- break;
- }
-
inter_block_yrd(cpi, x, &this_rate, &this_dist, &this_skip, &this_sse,
bsize, ref_best_rd);
@@ -2867,12 +3032,15 @@
if (max_tx_size < TX_32X32 &&
!xd->lossless[xd->mi[0]->mbmi.segment_id] &&
this_rate != INT_MAX) {
- if (is_inter)
+ if (is_inter) {
this_rate += cpi->inter_tx_type_costs[max_tx_size][mbmi->tx_type];
- else
+ if (cpi->sf.tx_type_search > 0 && !do_tx_type_search(tx_type, prune))
+ continue;
+ } else {
this_rate += cpi->intra_tx_type_costs[max_tx_size]
[intra_mode_to_tx_type_context[mbmi->mode]]
[mbmi->tx_type];
+ }
}
#endif // CONFIG_EXT_TX
@@ -7208,6 +7376,11 @@
rate_y - rate_uv,
total_sse);
}
+#if CONFIG_VAR_TX
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ memcpy(x->blk_skip_drl[i], x->blk_skip[i],
+ sizeof(uint8_t) * ctx->num_4x4_blk);
+#endif
for (ref_idx = 0; ref_idx < ref_set; ++ref_idx) {
int64_t tmp_alt_rd = INT64_MAX;
@@ -7235,8 +7408,6 @@
#else
int_mv dummy_single_newmv[MAX_REF_FRAMES] = { { 0 } };
#endif
-
-
mbmi->ref_mv_idx = 1 + ref_idx;
frame_mv[NEARMV][ref_frame] = cur_mv;
@@ -7299,12 +7470,22 @@
this_rd = tmp_alt_rd;
tmp_ref_rd = tmp_alt_rd;
backup_mbmi = *mbmi;
+#if CONFIG_VAR_TX
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ memcpy(x->blk_skip_drl[i], x->blk_skip[i],
+ sizeof(uint8_t) * ctx->num_4x4_blk);
+#endif
} else {
*mbmi = backup_mbmi;
}
}
frame_mv[NEARMV][ref_frame] = backup_mv;
+#if CONFIG_VAR_TX
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ memcpy(x->blk_skip[i], x->blk_skip_drl[i],
+ sizeof(uint8_t) * ctx->num_4x4_blk);
+#endif
}
#endif // CONFIG_REF_MV
diff --git a/vp10/encoder/speed_features.c b/vp10/encoder/speed_features.c
index 1f70dcb..ec8acda 100644
--- a/vp10/encoder/speed_features.c
+++ b/vp10/encoder/speed_features.c
@@ -159,7 +159,7 @@
sf->tx_size_search_breakout = 1;
sf->partition_search_breakout_rate_thr = 80;
-
+ sf->tx_type_search = PRUNE_ONE;
// Use transform domain distortion.
// Note var-tx expt always uses pixel domain distortion.
sf->use_transform_domain_distortion = 1;
@@ -177,6 +177,9 @@
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->allow_partition_search_skip = 1;
+#if CONFIG_EXT_TX
+ sf->tx_type_search = PRUNE_TWO;
+#endif
}
if (speed >= 3) {
@@ -195,6 +198,9 @@
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;
sf->adaptive_interp_filter_search = 1;
+#if CONFIG_EXT_TX
+ sf->tx_type_search = PRUNE_THREE;
+#endif
}
if (speed >= 4) {
@@ -473,6 +479,7 @@
sf->alt_ref_search_fp = 0;
sf->use_quant_fp = 0;
sf->partition_search_type = SEARCH_PARTITION;
+ sf->tx_type_search = NO_PRUNE;
sf->less_rectangular_check = 0;
sf->use_square_partition_only = 0;
sf->auto_min_max_partition_size = NOT_IN_USE;
diff --git a/vp10/encoder/speed_features.h b/vp10/encoder/speed_features.h
index 4f931d8..fbb6988 100644
--- a/vp10/encoder/speed_features.h
+++ b/vp10/encoder/speed_features.h
@@ -182,6 +182,18 @@
} INTERP_FILTER_MASK;
typedef enum {
+ NO_PRUNE = 0,
+ // eliminates one tx type in vertical and horizontal direction
+ PRUNE_ONE = 1,
+#if CONFIG_EXT_TX
+ // eliminates two tx types in each direction
+ PRUNE_TWO = 2,
+ // eliminates three tx types in each direction
+ PRUNE_THREE = 3,
+#endif
+} TX_TYPE_SEARCH;
+
+typedef enum {
// Search partitions using RD criterion
SEARCH_PARTITION,
@@ -298,6 +310,7 @@
PARTITION_SEARCH_TYPE partition_search_type;
+ TX_TYPE_SEARCH tx_type_search;
// Used if partition_search_type = FIXED_SIZE_PARTITION
BLOCK_SIZE always_this_block_size;
diff --git a/vp10/encoder/x86/dct_sse2.c b/vp10/encoder/x86/dct_sse2.c
index 79d1e88..aaf1e6a 100644
--- a/vp10/encoder/x86/dct_sse2.c
+++ b/vp10/encoder/x86/dct_sse2.c
@@ -1635,7 +1635,7 @@
const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64);
const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
- const __m128i k__cospi_p08_m24 = pair_set_epi16(cospi_8_64, -cospi_24_64);
+ const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64);
const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64);
@@ -1839,10 +1839,10 @@
v[0] = _mm_madd_epi16(u[0], k__cospi_m08_p24);
v[1] = _mm_madd_epi16(u[1], k__cospi_m08_p24);
- v[2] = _mm_madd_epi16(u[2], k__cospi_p24_p08);
- v[3] = _mm_madd_epi16(u[3], k__cospi_p24_p08);
- v[4] = _mm_madd_epi16(u[2], k__cospi_p08_m24);
- v[5] = _mm_madd_epi16(u[3], k__cospi_p08_m24);
+ v[2] = _mm_madd_epi16(u[2], k__cospi_m24_m08);
+ v[3] = _mm_madd_epi16(u[3], k__cospi_m24_m08);
+ v[4] = _mm_madd_epi16(u[2], k__cospi_m08_p24);
+ v[5] = _mm_madd_epi16(u[3], k__cospi_m08_p24);
v[6] = _mm_madd_epi16(u[0], k__cospi_p24_p08);
v[7] = _mm_madd_epi16(u[1], k__cospi_p24_p08);
@@ -1872,10 +1872,10 @@
// stage 5
s[0] = _mm_add_epi16(p[0], t[1]);
s[1] = _mm_sub_epi16(p[0], t[1]);
- s[2] = _mm_add_epi16(p[3], t[2]);
- s[3] = _mm_sub_epi16(p[3], t[2]);
- s[4] = _mm_sub_epi16(p[4], t[5]);
- s[5] = _mm_add_epi16(p[4], t[5]);
+ s[2] = _mm_sub_epi16(p[3], t[2]);
+ s[3] = _mm_add_epi16(p[3], t[2]);
+ s[4] = _mm_add_epi16(p[4], t[5]);
+ s[5] = _mm_sub_epi16(p[4], t[5]);
s[6] = _mm_sub_epi16(p[7], t[6]);
s[7] = _mm_add_epi16(p[7], t[6]);
diff --git a/vpx_dsp/sad.c b/vpx_dsp/sad.c
index 204cede..c500206 100644
--- a/vpx_dsp/sad.c
+++ b/vpx_dsp/sad.c
@@ -108,6 +108,22 @@
sad_array[i] = vpx_sad##m##x##n##_c(src, src_stride, ref_array[i], ref_stride); \
}
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+// 128x128
+sadMxN(128, 128)
+sadMxNxK(128, 128, 3)
+sadMxNxK(128, 128, 8)
+sadMxNx4D(128, 128)
+
+// 128x64
+sadMxN(128, 64)
+sadMxNx4D(128, 64)
+
+// 64x128
+sadMxN(64, 128)
+sadMxNx4D(64, 128)
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
+
// 64x64
sadMxN(64, 64)
sadMxNxK(64, 64, 3)
@@ -247,6 +263,22 @@
} \
}
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+// 128x128
+highbd_sadMxN(128, 128)
+highbd_sadMxNxK(128, 128, 3)
+highbd_sadMxNxK(128, 128, 8)
+highbd_sadMxNx4D(128, 128)
+
+// 128x64
+highbd_sadMxN(128, 64)
+highbd_sadMxNx4D(128, 64)
+
+// 64x128
+highbd_sadMxN(64, 128)
+highbd_sadMxNx4D(64, 128)
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
+
// 64x64
highbd_sadMxN(64, 64)
highbd_sadMxNxK(64, 64, 3)
diff --git a/vpx_dsp/variance.c b/vpx_dsp/variance.c
index 14d7f99..169769a 100644
--- a/vpx_dsp/variance.c
+++ b/vpx_dsp/variance.c
@@ -17,17 +17,6 @@
#include "vpx_dsp/variance.h"
#include "vpx_dsp/vpx_filter.h"
-const uint8_t vpx_bilinear_filters[BIL_SUBPEL_SHIFTS][2] = {
- { 128, 0 },
- { 112, 16 },
- { 96, 32 },
- { 80, 48 },
- { 64, 64 },
- { 48, 80 },
- { 32, 96 },
- { 16, 112 },
-};
-
uint32_t vpx_get4x4sse_cs_c(const uint8_t *a, int a_stride,
const uint8_t *b, int b_stride) {
int distortion = 0;
@@ -176,9 +165,9 @@
uint8_t temp2[H * W]; \
\
var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
- vpx_bilinear_filters[xoffset]); \
+ bilinear_filters_2t[xoffset]); \
var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- vpx_bilinear_filters[yoffset]); \
+ bilinear_filters_2t[yoffset]); \
\
return vpx_variance##W##x##H##_c(temp2, W, b, b_stride, sse); \
}
@@ -196,9 +185,9 @@
DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
\
var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
- vpx_bilinear_filters[xoffset]); \
+ bilinear_filters_2t[xoffset]); \
var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- vpx_bilinear_filters[yoffset]); \
+ bilinear_filters_2t[yoffset]); \
\
vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
\
@@ -235,6 +224,11 @@
SUBPIX_VAR(W, H) \
SUBPIX_AVG_VAR(W, H)
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+VARIANCES(128, 128)
+VARIANCES(128, 64)
+VARIANCES(64, 128)
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
VARIANCES(64, 64)
VARIANCES(64, 32)
VARIANCES(32, 64)
@@ -501,9 +495,9 @@
uint16_t temp2[H * W]; \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, vpx_bilinear_filters[xoffset]); \
+ W, bilinear_filters_2t[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- vpx_bilinear_filters[yoffset]); \
+ bilinear_filters_2t[yoffset]); \
\
return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
dst_stride, sse); \
@@ -518,9 +512,9 @@
uint16_t temp2[H * W]; \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, vpx_bilinear_filters[xoffset]); \
+ W, bilinear_filters_2t[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- vpx_bilinear_filters[yoffset]); \
+ bilinear_filters_2t[yoffset]); \
\
return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, sse); \
@@ -535,9 +529,9 @@
uint16_t temp2[H * W]; \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, vpx_bilinear_filters[xoffset]); \
+ W, bilinear_filters_2t[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- vpx_bilinear_filters[yoffset]); \
+ bilinear_filters_2t[yoffset]); \
\
return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, sse); \
@@ -555,9 +549,9 @@
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, vpx_bilinear_filters[xoffset]); \
+ W, bilinear_filters_2t[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- vpx_bilinear_filters[yoffset]); \
+ bilinear_filters_2t[yoffset]); \
\
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
@@ -577,9 +571,9 @@
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, vpx_bilinear_filters[xoffset]); \
+ W, bilinear_filters_2t[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- vpx_bilinear_filters[yoffset]); \
+ bilinear_filters_2t[yoffset]); \
\
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
@@ -599,9 +593,9 @@
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, vpx_bilinear_filters[xoffset]); \
+ W, bilinear_filters_2t[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- vpx_bilinear_filters[yoffset]); \
+ bilinear_filters_2t[yoffset]); \
\
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
@@ -616,6 +610,11 @@
HIGHBD_SUBPIX_VAR(W, H) \
HIGHBD_SUBPIX_AVG_VAR(W, H)
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+HIGHBD_VARIANCES(128, 128)
+HIGHBD_VARIANCES(128, 64)
+HIGHBD_VARIANCES(64, 128)
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
HIGHBD_VARIANCES(64, 64)
HIGHBD_VARIANCES(64, 32)
HIGHBD_VARIANCES(32, 64)
@@ -677,8 +676,9 @@
b += b_stride;
m += m_stride;
}
- *sum = (sum64 >= 0) ? ((sum64 + 31) >> 6) : -((-sum64 + 31) >> 6);
- *sse = (sse64 + 2047) >> 12;
+ sum64 = (sum64 >= 0) ? sum64 : -sum64;
+ *sum = ROUND_POWER_OF_TWO(sum64, 6);
+ *sse = ROUND_POWER_OF_TWO(sse64, 12);
}
#define MASK_VAR(W, H) \
@@ -702,9 +702,9 @@
uint8_t temp2[H * W]; \
\
var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
- vpx_bilinear_filters[xoffset]); \
+ bilinear_filters_2t[xoffset]); \
var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- vpx_bilinear_filters[yoffset]); \
+ bilinear_filters_2t[yoffset]); \
\
return vpx_masked_variance##W##x##H##_c(temp2, W, dst, dst_stride, \
msk, msk_stride, sse); \
@@ -765,27 +765,28 @@
const uint8_t *b8, int b_stride,
const uint8_t *m, int m_stride,
int w, int h,
- uint64_t *sse64, int *sum) {
+ uint64_t *sse, int64_t *sum) {
int i, j;
uint16_t *a = CONVERT_TO_SHORTPTR(a8);
uint16_t *b = CONVERT_TO_SHORTPTR(b8);
- int64_t sum64 = 0;
- *sse64 = 0;
+ *sum = 0;
+ *sse = 0;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
const int diff = (a[j] - b[j]) * (m[j]);
- sum64 += diff;
- *sse64 += (int64_t)diff * diff;
+ *sum += (int64_t)diff;
+ *sse += (int64_t)diff * diff;
}
a += a_stride;
b += b_stride;
m += m_stride;
}
- *sum = (sum64 >= 0) ? ((sum64 + 31) >> 6) : -((-sum64 + 31) >> 6);
- *sse64 = (*sse64 + 2047) >> 12;
+ *sum = (*sum >= 0) ? *sum : -*sum;
+ *sum = ROUND_POWER_OF_TWO(*sum, 6);
+ *sse = ROUND_POWER_OF_TWO(*sse, 12);
}
void highbd_masked_variance(const uint8_t *a8, int a_stride,
@@ -793,9 +794,11 @@
const uint8_t *m, int m_stride,
int w, int h,
unsigned int *sse, int *sum) {
+ int64_t sum64;
uint64_t sse64;
highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride,
- w, h, &sse64, sum);
+ w, h, &sse64, &sum64);
+ *sum = (int)sum64;
*sse = (unsigned int)sse64;
}
@@ -804,10 +807,11 @@
const uint8_t *m, int m_stride,
int w, int h,
unsigned int *sse, int *sum) {
+ int64_t sum64;
uint64_t sse64;
highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride,
- w, h, &sse64, sum);
- *sum = ROUND_POWER_OF_TWO(*sum, 2);
+ w, h, &sse64, &sum64);
+ *sum = (int)ROUND_POWER_OF_TWO(sum64, 2);
*sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 4);
}
@@ -816,10 +820,11 @@
const uint8_t *m, int m_stride,
int w, int h,
unsigned int *sse, int *sum) {
+ int64_t sum64;
uint64_t sse64;
highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride,
- w, h, &sse64, sum);
- *sum = ROUND_POWER_OF_TWO(*sum, 4);
+ w, h, &sse64, &sum64);
+ *sum = (int)ROUND_POWER_OF_TWO(sum64, 4);
*sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8);
}
@@ -875,9 +880,9 @@
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \
H + 1, W, \
- vpx_bilinear_filters[xoffset]); \
+ bilinear_filters_2t[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- vpx_bilinear_filters[yoffset]); \
+ bilinear_filters_2t[yoffset]); \
\
return vpx_highbd_masked_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, \
@@ -895,9 +900,9 @@
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \
H + 1, W, \
- vpx_bilinear_filters[xoffset]); \
+ bilinear_filters_2t[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- vpx_bilinear_filters[yoffset]); \
+ bilinear_filters_2t[yoffset]); \
\
return vpx_highbd_10_masked_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, \
@@ -915,9 +920,9 @@
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \
H + 1, W, \
- vpx_bilinear_filters[xoffset]); \
+ bilinear_filters_2t[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- vpx_bilinear_filters[yoffset]); \
+ bilinear_filters_2t[yoffset]); \
\
return vpx_highbd_12_masked_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, \
diff --git a/vpx_dsp/vpx_convolve.c b/vpx_dsp/vpx_convolve.c
index 2d1c927..2e85ed4 100644
--- a/vpx_dsp/vpx_convolve.c
+++ b/vpx_dsp/vpx_convolve.c
@@ -130,18 +130,21 @@
// --Must round-up because block may be located at sub-pixel position.
// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
- uint8_t temp[135 * 64];
+ uint8_t temp[MAX_EXT_SIZE * MAX_CU_SIZE];
int intermediate_height =
(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
- assert(w <= 64);
- assert(h <= 64);
+ assert(w <= MAX_CU_SIZE);
+ assert(h <= MAX_CU_SIZE);
+
assert(y_step_q4 <= 32);
assert(x_step_q4 <= 32);
- convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64,
+ convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride,
+ temp, MAX_CU_SIZE,
x_filters, x0_q4, x_step_q4, w, intermediate_height);
- convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride,
+ convolve_vert(temp + MAX_CU_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_CU_SIZE,
+ dst, dst_stride,
y_filters, y0_q4, y_step_q4, w, h);
}
@@ -237,13 +240,14 @@
const int16_t *filter_y, int y_step_q4,
int w, int h) {
/* Fixed size intermediate buffer places limits on parameters. */
- DECLARE_ALIGNED(16, uint8_t, temp[64 * 64]);
- assert(w <= 64);
- assert(h <= 64);
+ DECLARE_ALIGNED(16, uint8_t, temp[MAX_CU_SIZE * MAX_CU_SIZE]);
+ assert(w <= MAX_CU_SIZE);
+ assert(h <= MAX_CU_SIZE);
- vpx_convolve8_c(src, src_stride, temp, 64,
+ vpx_convolve8_c(src, src_stride, temp, MAX_CU_SIZE,
filter_x, x_step_q4, filter_y, y_step_q4, w, h);
- vpx_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
+ vpx_convolve_avg_c(temp, MAX_CU_SIZE, dst, dst_stride,
+ NULL, 0, NULL, 0, w, h);
}
void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
@@ -459,22 +463,23 @@
// --Must round-up because block may be located at sub-pixel position.
// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
- uint16_t temp[64 * 135];
+ uint16_t temp[MAX_EXT_SIZE * MAX_CU_SIZE];
int intermediate_height =
(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
- assert(w <= 64);
- assert(h <= 64);
+ assert(w <= MAX_CU_SIZE);
+ assert(h <= MAX_CU_SIZE);
assert(y_step_q4 <= 32);
assert(x_step_q4 <= 32);
- highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1),
- src_stride, CONVERT_TO_BYTEPTR(temp), 64,
+ highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride,
+ CONVERT_TO_BYTEPTR(temp), MAX_CU_SIZE,
x_filters, x0_q4, x_step_q4, w,
intermediate_height, bd);
- highbd_convolve_vert(CONVERT_TO_BYTEPTR(temp) + 64 * (SUBPEL_TAPS / 2 - 1),
- 64, dst, dst_stride, y_filters, y0_q4, y_step_q4,
- w, h, bd);
+ highbd_convolve_vert(
+ CONVERT_TO_BYTEPTR(temp) + MAX_CU_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_CU_SIZE,
+ dst, dst_stride,
+ y_filters, y0_q4, y_step_q4, w, h, bd);
}
@@ -556,13 +561,15 @@
const int16_t *filter_y, int y_step_q4,
int w, int h, int bd) {
// Fixed size intermediate buffer places limits on parameters.
- DECLARE_ALIGNED(16, uint16_t, temp[64 * 64]);
- assert(w <= 64);
- assert(h <= 64);
+ DECLARE_ALIGNED(16, uint16_t, temp[MAX_CU_SIZE * MAX_CU_SIZE]);
+ assert(w <= MAX_CU_SIZE);
+ assert(h <= MAX_CU_SIZE);
- vpx_highbd_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64,
+ vpx_highbd_convolve8_c(src, src_stride,
+ CONVERT_TO_BYTEPTR(temp), MAX_CU_SIZE,
filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd);
- vpx_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride,
+ vpx_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), MAX_CU_SIZE,
+ dst, dst_stride,
NULL, 0, NULL, 0, w, h, bd);
}
diff --git a/vpx_dsp/vpx_convolve.h b/vpx_dsp/vpx_convolve.h
index 9ed3f17..bd8679d 100644
--- a/vpx_dsp/vpx_convolve.h
+++ b/vpx_dsp/vpx_convolve.h
@@ -17,6 +17,24 @@
extern "C" {
#endif
+// Note: Fixed size intermediate buffers, place limits on parameters
+// of some functions. 2d filtering proceeds in 2 steps:
+// (1) Interpolate horizontally into an intermediate buffer, temp.
+// (2) Interpolate temp vertically to derive the sub-pixel result.
+// Deriving the maximum number of rows in the temp buffer (135):
+// --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
+// --Largest block size is 64x64 pixels.
+// --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
+// original frame (in 1/16th pixel units).
+// --Must round-up because block may be located at sub-pixel position.
+// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
+// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+# define MAX_EXT_SIZE 263
+#else
+# define MAX_EXT_SIZE 135
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
+
typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int x_step_q4,
diff --git a/vpx_dsp/vpx_dsp_common.h b/vpx_dsp/vpx_dsp_common.h
index b4e6f4c..8d9bf55 100644
--- a/vpx_dsp/vpx_dsp_common.h
+++ b/vpx_dsp/vpx_dsp_common.h
@@ -20,6 +20,12 @@
extern "C" {
#endif
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+# define MAX_CU_SIZE 128
+#else
+# define MAX_CU_SIZE 64
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
+
#define VPXMIN(x, y) (((x) < (y)) ? (x) : (y))
#define VPXMAX(x, y) (((x) > (y)) ? (x) : (y))
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index fdfd20c..583d9fa 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -50,6 +50,19 @@
$avx2_x86_64 = 'avx2';
}
+if (vpx_config("CONFIG_EXT_PARTITION") eq "yes") {
+ @block_widths = (4, 8, 16, 32, 64, 128)
+} else {
+ @block_widths = (4, 8, 16, 32, 64)
+}
+
+@block_sizes = ();
+foreach $w (@block_widths) {
+ foreach $h (@block_widths) {
+ push @block_sizes, [$w, $h] if ($w <= 2*$h && $h <= 2*$w) ;
+ }
+}
+
#
# Intra prediction
#
@@ -453,52 +466,44 @@
#
# Sub Pixel Filters
#
-add_proto qw/void vpx_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve_copy neon dspr2 msa/, "$sse2_x86inc";
-
-add_proto qw/void vpx_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve_avg neon dspr2 msa/, "$sse2_x86inc";
-
-add_proto qw/void vpx_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve8 sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
-
-add_proto qw/void vpx_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve8_horiz sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
-
-add_proto qw/void vpx_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve8_vert sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
-
-add_proto qw/void vpx_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve8_avg sse2 ssse3 neon dspr2 msa/;
-
+add_proto qw/void vpx_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+add_proto qw/void vpx_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+add_proto qw/void vpx_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+add_proto qw/void vpx_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+add_proto qw/void vpx_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+add_proto qw/void vpx_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void vpx_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve8_avg_horiz sse2 ssse3 neon dspr2 msa/;
+add_proto qw/void vpx_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+add_proto qw/void vpx_scaled_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+add_proto qw/void vpx_scaled_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+add_proto qw/void vpx_scaled_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+add_proto qw/void vpx_scaled_avg_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+add_proto qw/void vpx_scaled_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+add_proto qw/void vpx_scaled_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-add_proto qw/void vpx_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve8_avg_vert sse2 ssse3 neon dspr2 msa/;
+specialize qw/vpx_convolve_copy /, "$sse2_x86inc";
+specialize qw/vpx_convolve_avg /, "$sse2_x86inc";
+specialize qw/vpx_convolve8 sse2 ssse3/, "$avx2_ssse3";
+specialize qw/vpx_convolve8_horiz sse2 ssse3/, "$avx2_ssse3";
+specialize qw/vpx_convolve8_vert sse2 ssse3/, "$avx2_ssse3";
+specialize qw/vpx_convolve8_avg sse2 ssse3/;
+specialize qw/vpx_convolve8_avg_horiz sse2 ssse3/;
+specialize qw/vpx_convolve8_avg_vert sse2 ssse3/;
+specialize qw/vpx_scaled_2d ssse3/;
-add_proto qw/void vpx_scaled_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vpx_scaled_2d ssse3/;
-
-add_proto qw/void vpx_scaled_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vpx_scaled_horiz/;
-
-add_proto qw/void vpx_scaled_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vpx_scaled_vert/;
-
-add_proto qw/void vpx_scaled_avg_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vpx_scaled_avg_2d/;
-
-add_proto qw/void vpx_scaled_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vpx_scaled_avg_horiz/;
-
-add_proto qw/void vpx_scaled_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vpx_scaled_avg_vert/;
+# TODO(any): These need to be extended to up to 128x128 block sizes
+if (!(vpx_config("CONFIG_VP10") eq "yes" && vpx_config("CONFIG_EXT_PARTITION") eq "yes")) {
+ specialize qw/vpx_convolve_copy neon dspr2 msa/;
+ specialize qw/vpx_convolve_avg neon dspr2 msa/;
+ specialize qw/vpx_convolve8 neon dspr2 msa/;
+ specialize qw/vpx_convolve8_horiz neon dspr2 msa/;
+ specialize qw/vpx_convolve8_vert neon dspr2 msa/;
+ specialize qw/vpx_convolve8_avg neon dspr2 msa/;
+ specialize qw/vpx_convolve8_avg_horiz neon dspr2 msa/;
+ specialize qw/vpx_convolve8_avg_vert neon dspr2 msa/;
+}
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
- #
- # Sub Pixel Filters
- #
add_proto qw/void vpx_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
specialize qw/vpx_highbd_convolve_copy/, "$sse2_x86inc";
@@ -960,69 +965,43 @@
#
add_proto qw/void vpx_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
specialize qw/vpx_subtract_block neon msa/, "$sse2_x86inc";
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ add_proto qw/void vpx_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd";
+ specialize qw/vpx_highbd_subtract_block/;
+}
if (vpx_config("CONFIG_VP10_ENCODER") eq "yes") {
-#
-# Sum of Squares
-#
+ #
+ # Sum of Squares
+ #
add_proto qw/uint64_t vpx_sum_squares_2d_i16/, "const int16_t *src, int stride, int size";
specialize qw/vpx_sum_squares_2d_i16 sse2/;
}
-#
-# Single block SAD
-#
-add_proto qw/unsigned int vpx_sad64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad64x64 avx2 neon msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad64x32 avx2 msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad32x64 avx2 msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad32x32 avx2 neon msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad32x16 avx2 msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad16x32 msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad16x16 mmx media neon msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad16x8 mmx neon msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad8x16 mmx neon msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad8x8 mmx neon msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad8x4 msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad4x8 msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad4x4 mmx neon msa/, "$sse2_x86inc";
-
-#
-# Avg
-#
if ((vpx_config("CONFIG_VP9_ENCODER") eq "yes") || (vpx_config("CONFIG_VP10_ENCODER") eq "yes")) {
+ #
+ # Avg
+ #
add_proto qw/unsigned int vpx_avg_8x8/, "const uint8_t *, int p";
specialize qw/vpx_avg_8x8 sse2 neon msa/;
-
add_proto qw/unsigned int vpx_avg_4x4/, "const uint8_t *, int p";
specialize qw/vpx_avg_4x4 sse2 neon msa/;
+ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ add_proto qw/unsigned int vpx_highbd_avg_8x8/, "const uint8_t *, int p";
+ specialize qw/vpx_highbd_avg_8x8/;
+ add_proto qw/unsigned int vpx_highbd_avg_4x4/, "const uint8_t *, int p";
+ specialize qw/vpx_highbd_avg_4x4/;
+ }
+ #
+ # Minmax
+ #
add_proto qw/void vpx_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
specialize qw/vpx_minmax_8x8 sse2/;
+ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ add_proto qw/void vpx_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
+ specialize qw/vpx_highbd_minmax_8x8/;
+ }
add_proto qw/void vpx_hadamard_8x8/, "int16_t const *src_diff, int src_stride, int16_t *coeff";
specialize qw/vpx_hadamard_8x8 sse2/, "$ssse3_x86_64_x86inc";
@@ -1043,575 +1022,217 @@
specialize qw/vpx_vector_var neon sse2/;
} # CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
-add_proto qw/unsigned int vpx_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+#
+# Single block SAD / Single block Avg SAD
+#
+foreach (@block_sizes) {
+ ($w, $h) = @$_;
+ add_proto qw/unsigned int/, "vpx_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ add_proto qw/unsigned int/, "vpx_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+}
+
+specialize qw/vpx_sad64x64 avx2 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad64x32 avx2 msa/, "$sse2_x86inc";
+specialize qw/vpx_sad32x64 avx2 msa/, "$sse2_x86inc";
+specialize qw/vpx_sad32x32 avx2 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad32x16 avx2 msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x32 msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x16 mmx media neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x8 mmx neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x16 mmx neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x8 mmx neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x4 msa/, "$sse2_x86inc";
+specialize qw/vpx_sad4x8 msa/, "$sse2_x86inc";
+specialize qw/vpx_sad4x4 mmx neon msa/, "$sse2_x86inc";
+
specialize qw/vpx_sad64x64_avg avx2 msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad64x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_sad64x32_avg avx2 msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad32x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_sad32x64_avg avx2 msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad32x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_sad32x32_avg avx2 msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad32x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_sad32x16_avg avx2 msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x32_avg msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x16_avg msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x8_avg msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x16_avg msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x8_avg msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x4_avg msa/, "$sse2_x86inc";
+specialize qw/vpx_sad4x8_avg msa/, "$sse2_x86inc";
+specialize qw/vpx_sad4x4_avg msa/, "$sse2_x86inc";
-add_proto qw/unsigned int vpx_sad16x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad16x32_avg msa/, "$sse2_x86inc";
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ foreach (@block_sizes) {
+ ($w, $h) = @$_;
+ add_proto qw/unsigned int/, "vpx_highbd_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ add_proto qw/unsigned int/, "vpx_highbd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+ if ($w != 128 && $h != 128 && $w != 4) {
+ specialize "vpx_highbd_sad${w}x${h}", "$sse2_x86inc";
+ specialize "vpx_highbd_sad${w}x${h}_avg", "$sse2_x86inc";
+ }
+ }
+}
-add_proto qw/unsigned int vpx_sad16x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad16x16_avg msa/, "$sse2_x86inc";
+#
+# Masked SAD
+#
+if (vpx_config("CONFIG_EXT_INTER") eq "yes") {
+ foreach (@block_sizes) {
+ ($w, $h) = @$_;
+ add_proto qw/unsigned int/, "vpx_masked_sad${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
+ specialize "vpx_masked_sad${w}x${h}", qw/ssse3/;
+ }
-add_proto qw/unsigned int vpx_sad16x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad16x8_avg msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad8x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad8x16_avg msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad8x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad8x8_avg msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad8x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad8x4_avg msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad4x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad4x8_avg msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad4x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad4x4_avg msa/, "$sse2_x86inc";
+ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ foreach (@block_sizes) {
+ ($w, $h) = @$_;
+ add_proto qw/unsigned int/, "vpx_highbd_masked_sad${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
+ specialize "vpx_highbd_masked_sad${w}x${h}", qw/ssse3/;
+ }
+ }
+}
#
# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
#
# Blocks of 3
-add_proto qw/void vpx_sad64x64x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad64x64x3 msa/;
-
-add_proto qw/void vpx_sad32x32x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad32x32x3 msa/;
-
-add_proto qw/void vpx_sad16x16x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+foreach $s (@block_widths) {
+ add_proto qw/void/, "vpx_sad${s}x${s}x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+}
+specialize qw/vpx_sad64x64x3 msa/;
+specialize qw/vpx_sad32x32x3 msa/;
specialize qw/vpx_sad16x16x3 sse3 ssse3 msa/;
+specialize qw/vpx_sad8x8x3 sse3 msa/;
+specialize qw/vpx_sad4x4x3 sse3 msa/;
-add_proto qw/void vpx_sad16x8x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+add_proto qw/void/, "vpx_sad16x8x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad16x8x3 sse3 ssse3 msa/;
-
-add_proto qw/void vpx_sad8x16x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+add_proto qw/void/, "vpx_sad8x16x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad8x16x3 sse3 msa/;
-add_proto qw/void vpx_sad8x8x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad8x8x3 sse3 msa/;
-
-add_proto qw/void vpx_sad4x4x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad4x4x3 sse3 msa/;
-
# Blocks of 8
-add_proto qw/void vpx_sad64x64x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad64x64x8 msa/;
-
-add_proto qw/void vpx_sad32x32x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad32x32x8 msa/;
-
-add_proto qw/void vpx_sad16x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+foreach $s (@block_widths) {
+ add_proto qw/void/, "vpx_sad${s}x${s}x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+}
+specialize qw/vpx_sad64x64x8 msa/;
+specialize qw/vpx_sad32x32x8 msa/;
specialize qw/vpx_sad16x16x8 sse4_1 msa/;
+specialize qw/vpx_sad8x8x8 sse4_1 msa/;
+specialize qw/vpx_sad4x4x8 sse4_1 msa/;
-add_proto qw/void vpx_sad16x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+add_proto qw/void/, "vpx_sad16x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad16x8x8 sse4_1 msa/;
-
-add_proto qw/void vpx_sad8x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+add_proto qw/void/, "vpx_sad8x16x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad8x16x8 sse4_1 msa/;
-
-add_proto qw/void vpx_sad8x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad8x8x8 sse4_1 msa/;
-
-add_proto qw/void vpx_sad8x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+add_proto qw/void/, "vpx_sad8x4x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad8x4x8 msa/;
-
-add_proto qw/void vpx_sad4x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+add_proto qw/void/, "vpx_sad4x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad4x8x8 msa/;
-add_proto qw/void vpx_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad4x4x8 sse4_1 msa/;
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ foreach $s (@block_widths) {
+ # Blocks of 3
+ add_proto qw/void/, "vpx_highbd_sad${s}x${s}x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ # Blocks of 8
+ add_proto qw/void/, "vpx_highbd_sad${s}x${s}x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ }
+ # Blocks of 3
+ add_proto qw/void/, "vpx_highbd_sad16x8x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ add_proto qw/void/, "vpx_highbd_sad8x16x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ # Blocks of 8
+ add_proto qw/void/, "vpx_highbd_sad16x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ add_proto qw/void/, "vpx_highbd_sad8x16x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ add_proto qw/void/, "vpx_highbd_sad8x4x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ add_proto qw/void/, "vpx_highbd_sad4x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+}
#
# Multi-block SAD, comparing a reference to N independent blocks
#
-add_proto qw/void vpx_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
+foreach (@block_sizes) {
+ ($w, $h) = @$_;
+ add_proto qw/void/, "vpx_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
+}
specialize qw/vpx_sad64x64x4d avx2 neon msa/, "$sse2_x86inc";
-
-add_proto qw/void vpx_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad64x32x4d msa/, "$sse2_x86inc";
-
-add_proto qw/void vpx_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad32x64x4d msa/, "$sse2_x86inc";
-
-add_proto qw/void vpx_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
+specialize qw/vpx_sad64x32x4d msa/, "$sse2_x86inc";
+specialize qw/vpx_sad32x64x4d msa/, "$sse2_x86inc";
specialize qw/vpx_sad32x32x4d avx2 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad32x16x4d msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x32x4d msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x16x4d neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x8x4d msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x16x4d msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x8x4d msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x4x4d msa/, "$sse2_x86inc";
+specialize qw/vpx_sad4x8x4d msa/, "$sse2_x86inc";
+specialize qw/vpx_sad4x4x4d msa/, "$sse2_x86inc";
-add_proto qw/void vpx_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad32x16x4d msa/, "$sse2_x86inc";
-
-add_proto qw/void vpx_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad16x32x4d msa/, "$sse2_x86inc";
-
-add_proto qw/void vpx_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad16x16x4d neon msa/, "$sse2_x86inc";
-
-add_proto qw/void vpx_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad16x8x4d msa/, "$sse2_x86inc";
-
-add_proto qw/void vpx_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad8x16x4d msa/, "$sse2_x86inc";
-
-add_proto qw/void vpx_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad8x8x4d msa/, "$sse2_x86inc";
-
-add_proto qw/void vpx_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad8x4x4d msa/, "$sse2_x86inc";
-
-add_proto qw/void vpx_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad4x8x4d msa/, "$sse2_x86inc";
-
-add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad4x4x4d msa/, "$sse2_x86inc";
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ #
+ # Multi-block SAD, comparing a reference to N independent blocks
+ #
+ foreach (@block_sizes) {
+ ($w, $h) = @$_;
+ add_proto qw/void/, "vpx_highbd_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
+ if ($w != 128 && $h != 128) {
+ specialize "vpx_highbd_sad${w}x${h}x4d", "$sse2_x86inc";
+ }
+ }
+}
#
# Structured Similarity (SSIM)
#
if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
- add_proto qw/void vpx_ssim_parms_8x8/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
- specialize qw/vpx_ssim_parms_8x8/, "$sse2_x86_64";
+ add_proto qw/void vpx_ssim_parms_8x8/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
+ specialize qw/vpx_ssim_parms_8x8/, "$sse2_x86_64";
- add_proto qw/void vpx_ssim_parms_16x16/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
- specialize qw/vpx_ssim_parms_16x16/, "$sse2_x86_64";
-}
+ add_proto qw/void vpx_ssim_parms_16x16/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
+ specialize qw/vpx_ssim_parms_16x16/, "$sse2_x86_64";
-if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
- #
- # Block subtraction
- #
- add_proto qw/void vpx_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd";
- specialize qw/vpx_highbd_subtract_block/;
-
- #
- # Single block SAD
- #
- add_proto qw/unsigned int vpx_highbd_sad64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad64x64/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad64x32/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad32x64/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad32x32/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad32x16/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad16x32/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad16x16/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad16x8/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad8x16/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad8x8/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad8x4/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad4x8/;
-
- add_proto qw/unsigned int vpx_highbd_sad4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad4x4/;
-
- #
- # Avg
- #
- add_proto qw/unsigned int vpx_highbd_avg_8x8/, "const uint8_t *, int p";
- specialize qw/vpx_highbd_avg_8x8/;
- add_proto qw/unsigned int vpx_highbd_avg_4x4/, "const uint8_t *, int p";
- specialize qw/vpx_highbd_avg_4x4/;
- add_proto qw/void vpx_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
- specialize qw/vpx_highbd_minmax_8x8/;
-
- add_proto qw/unsigned int vpx_highbd_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad64x64_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad64x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad64x32_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad32x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad32x64_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad32x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad32x32_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad32x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad32x16_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad16x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad16x32_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad16x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad16x16_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad16x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad16x8_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad8x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad8x16_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad8x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad8x8_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad8x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad8x4_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad4x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad4x8_avg/;
-
- add_proto qw/unsigned int vpx_highbd_sad4x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad4x4_avg/;
-
- #
- # Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
- #
- # Blocks of 3
- add_proto qw/void vpx_highbd_sad64x64x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad64x64x3/;
-
- add_proto qw/void vpx_highbd_sad32x32x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad32x32x3/;
-
- add_proto qw/void vpx_highbd_sad16x16x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad16x16x3/;
-
- add_proto qw/void vpx_highbd_sad16x8x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad16x8x3/;
-
- add_proto qw/void vpx_highbd_sad8x16x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad8x16x3/;
-
- add_proto qw/void vpx_highbd_sad8x8x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad8x8x3/;
-
- add_proto qw/void vpx_highbd_sad4x4x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad4x4x3/;
-
- # Blocks of 8
- add_proto qw/void vpx_highbd_sad64x64x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad64x64x8/;
-
- add_proto qw/void vpx_highbd_sad32x32x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad32x32x8/;
-
- add_proto qw/void vpx_highbd_sad16x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad16x16x8/;
-
- add_proto qw/void vpx_highbd_sad16x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad16x8x8/;
-
- add_proto qw/void vpx_highbd_sad8x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad8x16x8/;
-
- add_proto qw/void vpx_highbd_sad8x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad8x8x8/;
-
- add_proto qw/void vpx_highbd_sad8x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad8x4x8/;
-
- add_proto qw/void vpx_highbd_sad4x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad4x8x8/;
-
- add_proto qw/void vpx_highbd_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad4x4x8/;
-
- #
- # Multi-block SAD, comparing a reference to N independent blocks
- #
- add_proto qw/void vpx_highbd_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad64x64x4d/, "$sse2_x86inc";
-
- add_proto qw/void vpx_highbd_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad64x32x4d/, "$sse2_x86inc";
-
- add_proto qw/void vpx_highbd_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad32x64x4d/, "$sse2_x86inc";
-
- add_proto qw/void vpx_highbd_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad32x32x4d/, "$sse2_x86inc";
-
- add_proto qw/void vpx_highbd_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad32x16x4d/, "$sse2_x86inc";
-
- add_proto qw/void vpx_highbd_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad16x32x4d/, "$sse2_x86inc";
-
- add_proto qw/void vpx_highbd_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad16x16x4d/, "$sse2_x86inc";
-
- add_proto qw/void vpx_highbd_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad16x8x4d/, "$sse2_x86inc";
-
- add_proto qw/void vpx_highbd_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad8x16x4d/, "$sse2_x86inc";
-
- add_proto qw/void vpx_highbd_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad8x8x4d/, "$sse2_x86inc";
-
- add_proto qw/void vpx_highbd_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad8x4x4d/, "$sse2_x86inc";
-
- add_proto qw/void vpx_highbd_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad4x8x4d/, "$sse2_x86inc";
-
- add_proto qw/void vpx_highbd_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad4x4x4d/, "$sse2_x86inc";
-
- #
- # Structured Similarity (SSIM)
- #
- if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
+ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vpx_highbd_ssim_parms_8x8/, "const uint16_t *s, int sp, const uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
- specialize qw/vpx_highbd_ssim_parms_8x8/;
}
-} # CONFIG_VP9_HIGHBITDEPTH
+}
} # CONFIG_ENCODERS
if (vpx_config("CONFIG_ENCODERS") eq "yes" || vpx_config("CONFIG_POSTPROC") eq "yes" || vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
#
-# Variance
-#
-add_proto qw/unsigned int vpx_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance64x64 sse2 avx2 neon msa/;
-
-add_proto qw/unsigned int vpx_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance64x32 sse2 avx2 neon msa/;
-
-add_proto qw/unsigned int vpx_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance32x64 sse2 neon msa/;
-
-add_proto qw/unsigned int vpx_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance32x32 sse2 avx2 neon msa/;
-
-add_proto qw/unsigned int vpx_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance32x16 sse2 avx2 msa/;
-
-add_proto qw/unsigned int vpx_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance16x32 sse2 msa/;
-
-add_proto qw/unsigned int vpx_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance16x16 mmx sse2 avx2 media neon msa/;
-
-add_proto qw/unsigned int vpx_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance16x8 mmx sse2 neon msa/;
-
-add_proto qw/unsigned int vpx_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance8x16 mmx sse2 neon msa/;
-
-add_proto qw/unsigned int vpx_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance8x8 mmx sse2 media neon msa/;
-
-add_proto qw/unsigned int vpx_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance8x4 sse2 msa/;
-
-add_proto qw/unsigned int vpx_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance4x8 sse2 msa/;
-
-add_proto qw/unsigned int vpx_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance4x4 mmx sse2 msa/;
-
-#
# Specialty Variance
#
add_proto qw/void vpx_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- specialize qw/vpx_get16x16var sse2 avx2 neon msa/;
-
add_proto qw/void vpx_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- specialize qw/vpx_get8x8var mmx sse2 neon msa/;
+
+specialize qw/vpx_get16x16var avx2 sse2 neon msa/;
+specialize qw/vpx_get8x8var mmx sse2 neon msa/;
add_proto qw/unsigned int vpx_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_mse16x16 mmx sse2 avx2 media neon msa/;
-
add_proto qw/unsigned int vpx_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_mse16x8 sse2 msa/;
-
add_proto qw/unsigned int vpx_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_mse8x16 sse2 msa/;
-
add_proto qw/unsigned int vpx_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_mse8x8 sse2 msa/;
-add_proto qw/unsigned int vpx_get_mb_ss/, "const int16_t *";
- specialize qw/vpx_get_mb_ss mmx sse2 msa/;
+specialize qw/vpx_mse16x16 mmx avx2 sse2 media neon msa/;
+specialize qw/vpx_mse16x8 sse2 msa/;
+specialize qw/vpx_mse8x16 sse2 msa/;
+specialize qw/vpx_mse8x8 sse2 msa/;
-add_proto qw/unsigned int vpx_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride";
- specialize qw/vpx_get4x4sse_cs neon msa/;
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ foreach $bd (8, 10, 12) {
+ add_proto qw/void/, "vpx_highbd_${bd}_get16x16var", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+ add_proto qw/void/, "vpx_highbd_${bd}_get8x8var", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-add_proto qw/void vpx_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
+ add_proto qw/unsigned int/, "vpx_highbd_${bd}_mse16x16", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ add_proto qw/unsigned int/, "vpx_highbd_${bd}_mse16x8", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ add_proto qw/unsigned int/, "vpx_highbd_${bd}_mse8x16", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ add_proto qw/unsigned int/, "vpx_highbd_${bd}_mse8x8", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
-if (vpx_config("CONFIG_EXT_INTER") eq "yes") {
- add_proto qw/unsigned int vpx_masked_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance32x16 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance16x32 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance64x32 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance32x64 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance32x32 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance64x64 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance16x16 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance16x8 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance8x16 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance8x8 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance8x4 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance4x8 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance4x4 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance64x64 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance32x64 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance64x32 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance32x16 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance16x32 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance32x32 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance16x16 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance8x16 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance16x8 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance8x8 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance8x4 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance4x8 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance4x4 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad64x64 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad32x64 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad64x32 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad32x16 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad16x32 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad32x32 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad16x16 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad16x8 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad8x16 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad8x8 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad8x4 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad4x8 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad4x4 ssse3/;
-
- if (vpx_config("CONFIG_EXT_PARTITION") eq "yes") {
- add_proto qw/unsigned int vpx_masked_variance128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance128x128/;
-
- add_proto qw/unsigned int vpx_masked_variance128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masdctked_variance128x64/;
-
- add_proto qw/unsigned int vpx_masked_variance64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance64x128/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance128x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance128x128/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance128x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance128x64/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance64x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance64x128/;
-
- add_proto qw/unsigned int vpx_masked_sad128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad128x128/;
-
- add_proto qw/unsigned int vpx_masked_sad128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad128x64/;
-
- add_proto qw/unsigned int vpx_masked_sad64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad64x128/;
+ specialize "vpx_highbd_${bd}_mse16x16", qw/sse2/;
+ specialize "vpx_highbd_${bd}_mse8x8", qw/sse2/;
}
}
+#
+# ...
+#
if (vpx_config("CONFIG_AFFINE_MOTION") eq "yes") {
add_proto qw/void vpx_upsampled_pred/, "uint8_t *comp_pred, int width, int height, const uint8_t *ref, int ref_stride";
specialize qw/vpx_upsampled_pred sse2/;
@@ -1620,796 +1241,129 @@
}
#
-# Subpixel Variance
+# ...
#
-add_proto qw/uint32_t vpx_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance64x64 avx2 neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
+add_proto qw/unsigned int vpx_get_mb_ss/, "const int16_t *";
+add_proto qw/unsigned int vpx_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride";
-add_proto qw/uint32_t vpx_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance64x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_get_mb_ss mmx sse2 msa/;
+specialize qw/vpx_get4x4sse_cs neon msa/;
-add_proto qw/uint32_t vpx_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance32x64 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+#
+# Variance / Subpixel Variance / Subpixel Avg Variance
+#
+foreach (@block_sizes) {
+ ($w, $h) = @$_;
+ add_proto qw/unsigned int/, "vpx_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/uint32_t/, "vpx_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t/, "vpx_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+}
-add_proto qw/uint32_t vpx_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance32x32 avx2 neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_variance64x64 sse2 avx2 neon msa/;
+specialize qw/vpx_variance64x32 sse2 avx2 neon msa/;
+specialize qw/vpx_variance32x64 sse2 neon msa/;
+specialize qw/vpx_variance32x32 sse2 avx2 neon msa/;
+specialize qw/vpx_variance32x16 sse2 avx2 msa/;
+specialize qw/vpx_variance16x32 sse2 msa/;
+specialize qw/vpx_variance16x16 mmx sse2 avx2 media neon msa/;
+specialize qw/vpx_variance16x8 mmx sse2 neon msa/;
+specialize qw/vpx_variance8x16 mmx sse2 neon msa/;
+specialize qw/vpx_variance8x8 mmx sse2 media neon msa/;
+specialize qw/vpx_variance8x4 sse2 msa/;
+specialize qw/vpx_variance4x8 sse2 msa/;
+specialize qw/vpx_variance4x4 mmx sse2 msa/;
-add_proto qw/uint32_t vpx_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance32x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_variance64x64 avx2 neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_variance64x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_variance32x64 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_variance32x32 avx2 neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_variance32x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_variance16x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_variance16x16 mmx media neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_variance16x8 mmx msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_variance8x16 mmx msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_variance8x8 mmx media neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_variance8x4 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_variance4x8 msa/, "$sse_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_variance4x4 mmx msa/, "$sse_x86inc", "$ssse3_x86inc";
-add_proto qw/uint32_t vpx_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance16x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_avg_variance64x64 avx2 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_avg_variance64x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_avg_variance32x64 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_avg_variance32x32 avx2 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_avg_variance32x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_avg_variance16x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_avg_variance16x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_avg_variance16x8 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_avg_variance8x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_avg_variance8x8 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_avg_variance8x4 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_avg_variance4x8 msa/, "$sse_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_avg_variance4x4 msa/, "$sse_x86inc", "$ssse3_x86inc";
-add_proto qw/uint32_t vpx_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance16x16 mmx media neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ foreach $bd (8, 10, 12) {
+ foreach (@block_sizes) {
+ ($w, $h) = @$_;
+ add_proto qw/unsigned int/, "vpx_highbd_${bd}_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t/, "vpx_highbd_${bd}_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t/, "vpx_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ if ($w != 128 && $h != 128 && $w != 4 && $h != 4) {
+ specialize "vpx_highbd_${bd}_variance${w}x${h}", "sse2";
+ }
+ if ($w != 128 && $h != 128 && $w != 4) {
+ specialize "vpx_highbd_${bd}_sub_pixel_variance${w}x${h}", $sse2_x86inc;
+ specialize "vpx_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", $sse2_x86inc;
+ }
+ }
+ }
+} # CONFIG_VP9_HIGHBITDEPTH
-add_proto qw/uint32_t vpx_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance16x8 mmx msa/, "$sse2_x86inc", "$ssse3_x86inc";
+#
+# Masked Variance / Masked Subpixel Variance
+#
+if (vpx_config("CONFIG_EXT_INTER") eq "yes") {
+ foreach (@block_sizes) {
+ ($w, $h) = @$_;
+ add_proto qw/unsigned int/, "vpx_masked_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+ add_proto qw/unsigned int/, "vpx_masked_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+ specialize "vpx_masked_variance${w}x${h}", qw/ssse3/;
+ specialize "vpx_masked_sub_pixel_variance${w}x${h}", qw/ssse3/;
+ }
-add_proto qw/uint32_t vpx_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance8x16 mmx msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance8x8 mmx media neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance8x4 msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance4x8 msa/, "$sse_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance4x4 mmx msa/, "$sse_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance64x64 avx2 msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance64x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance32x64 msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance32x32 avx2 msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance32x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance16x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance16x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance16x8 msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance8x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance8x8 msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance8x4 msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance4x8 msa/, "$sse_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance4x4 msa/, "$sse_x86inc", "$ssse3_x86inc";
+ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ foreach $bd ("_", "_10_", "_12_") {
+ foreach (@block_sizes) {
+ ($w, $h) = @$_;
+ add_proto qw/unsigned int/, "vpx_highbd${bd}masked_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+ add_proto qw/unsigned int/, "vpx_highbd${bd}masked_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+ specialize "vpx_highbd${bd}masked_variance${w}x${h}", qw/ssse3/;
+ specialize "vpx_highbd${bd}masked_sub_pixel_variance${w}x${h}", qw/ssse3/;
+ }
+ }
+ }
+}
#
# Specialty Subpixel
#
-add_proto qw/uint32_t vpx_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_variance_halfpixvar16x16_h mmx sse2 media/;
+add_proto qw/uint32_t vpx_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
+specialize qw/vpx_variance_halfpixvar16x16_h mmx sse2 media/;
-add_proto qw/uint32_t vpx_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_variance_halfpixvar16x16_v mmx sse2 media/;
+add_proto qw/uint32_t vpx_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
+specialize qw/vpx_variance_halfpixvar16x16_v mmx sse2 media/;
-add_proto qw/uint32_t vpx_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_variance_halfpixvar16x16_hv mmx sse2 media/;
+add_proto qw/uint32_t vpx_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
+specialize qw/vpx_variance_halfpixvar16x16_hv mmx sse2 media/;
+#
+# Comp Avg
+#
+add_proto qw/void vpx_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
- add_proto qw/unsigned int vpx_highbd_12_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_variance64x64 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_12_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_variance64x32 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_12_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_variance32x64 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_12_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_variance32x32 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_12_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_variance32x16 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_12_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_variance16x32 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_12_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_variance16x16 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_12_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_variance16x8 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_12_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_variance8x16 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_12_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_variance8x8 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_12_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_12_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_12_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-
- add_proto qw/unsigned int vpx_highbd_10_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_variance64x64 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_10_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_variance64x32 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_10_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_variance32x64 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_10_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_variance32x32 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_10_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_variance32x16 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_10_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_variance16x32 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_10_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_variance16x16 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_10_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_variance16x8 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_10_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_variance8x16 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_10_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_variance8x8 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_10_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_10_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_10_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-
- add_proto qw/unsigned int vpx_highbd_8_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_8_variance64x64 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_8_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_8_variance64x32 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_8_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_8_variance32x64 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_8_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_8_variance32x32 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_8_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_8_variance32x16 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_8_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_8_variance16x32 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_8_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_8_variance16x16 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_8_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_8_variance16x8 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_8_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_8_variance8x16 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_8_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_8_variance8x8 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_8_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_8_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_8_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-
- add_proto qw/void vpx_highbd_8_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- add_proto qw/void vpx_highbd_8_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-
- add_proto qw/void vpx_highbd_10_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- add_proto qw/void vpx_highbd_10_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-
- add_proto qw/void vpx_highbd_12_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- add_proto qw/void vpx_highbd_12_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-
- add_proto qw/unsigned int vpx_highbd_8_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_highbd_8_mse16x16 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_8_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_8_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_8_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_highbd_8_mse8x8 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_10_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_mse16x16 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_10_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_10_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_10_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_mse8x8 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_12_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_mse16x16 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_12_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_12_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_12_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_mse8x8 sse2/;
-
- if (vpx_config("CONFIG_EXT_INTER") eq "yes") {
- add_proto qw/unsigned int vpx_highbd_masked_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance32x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance16x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance64x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance32x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance32x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance64x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance16x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance16x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance8x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance8x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance8x4 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance4x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance4x4 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance32x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance16x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance64x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance32x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance32x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance64x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance16x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance16x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance8x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance8x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance8x4 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance4x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance4x4 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_variance32x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_variance16x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_variance64x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_variance32x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_variance32x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_variance64x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_variance16x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_variance16x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_variance8x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_variance8x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_variance8x4 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_variance4x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_variance4x4 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance64x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance64x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance32x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance32x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance32x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance16x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance16x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance16x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance8x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance8x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance8x4 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance4x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance4x4 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance64x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance64x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance32x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance32x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance32x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance16x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance16x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance16x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance8x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance8x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance8x4 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance4x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance4x4 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance64x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance64x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance32x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance32x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance32x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance16x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance16x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance16x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance8x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance8x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance8x4 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance4x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance4x4 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad64x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad32x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad64x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad32x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad16x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad32x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad16x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad16x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad8x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad8x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad8x4 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad4x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad4x4 ssse3/;
-
- if (vpx_config("CONFIG_EXT_PARTITION") eq "yes") {
- add_proto qw/unsigned int vpx_highbd_masked_variance128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance128x128/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance128x64/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance64x128/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance128x128/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance128x64/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance64x128/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance128x128/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance128x64/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance64x128/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance128x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance128x128/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance128x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance128x64/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance64x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance64x128/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance128x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance128x128/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance128x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance128x64/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance64x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance64x128/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance128x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance128x128/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance128x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance128x64/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance64x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance64x128/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad128x128/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad128x64/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad64x128/;
- }
- }
-
add_proto qw/void vpx_highbd_comp_avg_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride";
-
- #
- # Subpixel Variance
- #
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance64x64/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance64x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance32x64/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance32x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance32x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance16x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance16x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance16x8/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance8x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance8x8/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance8x4/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance64x64/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance64x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance32x64/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance32x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance32x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance16x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance16x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance16x8/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance8x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance8x8/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance8x4/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance64x64/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance64x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance32x64/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance32x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance32x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance16x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance16x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance16x8/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance8x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance8x8/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance8x4/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance64x64/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance64x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x64/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x8/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x8/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x4/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance64x64/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance64x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x64/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x8/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x8/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x4/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance64x64/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance64x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x64/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x8/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x8/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x4/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+}
-} # CONFIG_VP9_HIGHBITDEPTH
} # CONFIG_ENCODERS || CONFIG_POSTPROC || CONFIG_VP9_POSTPROC
1;
diff --git a/vpx_dsp/vpx_filter.h b/vpx_dsp/vpx_filter.h
index e049f74..cfe8161 100644
--- a/vpx_dsp/vpx_filter.h
+++ b/vpx_dsp/vpx_filter.h
@@ -29,7 +29,18 @@
#define BIL_SUBPEL_BITS 3
#define BIL_SUBPEL_SHIFTS (1 << BIL_SUBPEL_BITS)
-extern const uint8_t vpx_bilinear_filters[BIL_SUBPEL_SHIFTS][2];
+
+// 2 tap bilinear filters
+static const uint8_t bilinear_filters_2t[BIL_SUBPEL_SHIFTS][2] = {
+ { 128, 0 },
+ { 112, 16 },
+ { 96, 32 },
+ { 80, 48 },
+ { 64, 64 },
+ { 48, 80 },
+ { 32, 96 },
+ { 16, 112 },
+};
#ifdef __cplusplus
} // extern "C"
diff --git a/vpx_dsp/x86/convolve.h b/vpx_dsp/x86/convolve.h
index b6fbfcf..95aa790 100644
--- a/vpx_dsp/x86/convolve.h
+++ b/vpx_dsp/x86/convolve.h
@@ -15,6 +15,7 @@
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
#include "vpx_ports/mem.h"
+#include "vpx_dsp/vpx_convolve.h"
typedef void filter8_1dfunction (
const uint8_t *src_ptr,
@@ -112,25 +113,27 @@
int w, int h) { \
assert(filter_x[3] != 128); \
assert(filter_y[3] != 128); \
- assert(w <= 64); \
- assert(h <= 64); \
+ assert(w <= MAX_CU_SIZE); \
+ assert(h <= MAX_CU_SIZE); \
assert(x_step_q4 == 16); \
assert(y_step_q4 == 16); \
if (filter_x[0] || filter_x[1] || filter_x[2]|| \
filter_y[0] || filter_y[1] || filter_y[2]) { \
- DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \
- vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
+ DECLARE_ALIGNED(16, uint8_t, fdata2[MAX_CU_SIZE * (MAX_CU_SIZE+7)]); \
+ vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \
+ fdata2, MAX_CU_SIZE, \
filter_x, x_step_q4, filter_y, y_step_q4, \
w, h + 7); \
- vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \
+ vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * MAX_CU_SIZE, MAX_CU_SIZE, \
+ dst, dst_stride, \
filter_x, x_step_q4, filter_y, \
y_step_q4, w, h); \
} else { \
- DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \
- vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \
+ DECLARE_ALIGNED(16, uint8_t, fdata2[MAX_CU_SIZE * (MAX_CU_SIZE+1)]); \
+ vpx_convolve8_horiz_##opt(src, src_stride, fdata2, MAX_CU_SIZE, \
filter_x, x_step_q4, filter_y, y_step_q4, \
w, h + 1); \
- vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \
+ vpx_convolve8_##avg##vert_##opt(fdata2, MAX_CU_SIZE, dst, dst_stride, \
filter_x, x_step_q4, filter_y, \
y_step_q4, w, h); \
} \
@@ -250,31 +253,40 @@
const int16_t *filter_x, int x_step_q4, \
const int16_t *filter_y, int y_step_q4, \
int w, int h, int bd) { \
- assert(w <= 64); \
- assert(h <= 64); \
+ assert(w <= MAX_CU_SIZE); \
+ assert(h <= MAX_CU_SIZE); \
if (x_step_q4 == 16 && y_step_q4 == 16) { \
if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \
filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \
- DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \
- vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \
- CONVERT_TO_BYTEPTR(fdata2), 64, \
+ DECLARE_ALIGNED(16, uint16_t, fdata2[MAX_CU_SIZE * (MAX_CU_SIZE+7)]); \
+ vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, \
+ src_stride, \
+ CONVERT_TO_BYTEPTR(fdata2), \
+ MAX_CU_SIZE, \
filter_x, x_step_q4, \
filter_y, y_step_q4, \
w, h + 7, bd); \
- vpx_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \
- 64, dst, dst_stride, \
- filter_x, x_step_q4, \
- filter_y, y_step_q4, \
- w, h, bd); \
+ vpx_highbd_convolve8_##avg##vert_##opt( \
+ CONVERT_TO_BYTEPTR(fdata2) + 3 * MAX_CU_SIZE, \
+ MAX_CU_SIZE, \
+ dst, \
+ dst_stride, \
+ filter_x, x_step_q4, \
+ filter_y, y_step_q4, \
+ w, h, bd); \
} else { \
- DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \
- vpx_highbd_convolve8_horiz_##opt(src, src_stride, \
- CONVERT_TO_BYTEPTR(fdata2), 64, \
+ DECLARE_ALIGNED(16, uint16_t, fdata2[MAX_CU_SIZE * (MAX_CU_SIZE+1)]); \
+ vpx_highbd_convolve8_horiz_##opt(src, \
+ src_stride, \
+ CONVERT_TO_BYTEPTR(fdata2), \
+ MAX_CU_SIZE, \
filter_x, x_step_q4, \
filter_y, y_step_q4, \
w, h + 1, bd); \
- vpx_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \
- dst, dst_stride, \
+ vpx_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), \
+ MAX_CU_SIZE, \
+ dst, \
+ dst_stride, \
filter_x, x_step_q4, \
filter_y, y_step_q4, \
w, h, bd); \
diff --git a/vpx_dsp/x86/masked_sad_intrin_ssse3.c b/vpx_dsp/x86/masked_sad_intrin_ssse3.c
index 384f89b..8b9ff10 100644
--- a/vpx_dsp/x86/masked_sad_intrin_ssse3.c
+++ b/vpx_dsp/x86/masked_sad_intrin_ssse3.c
@@ -64,6 +64,11 @@
m, n); \
}
+#if CONFIG_EXT_PARTITION
+MASKSADMXN_SSSE3(128, 128)
+MASKSADMXN_SSSE3(128, 64)
+MASKSADMXN_SSSE3(64, 128)
+#endif // CONFIG_EXT_PARTITION
MASKSADMXN_SSSE3(64, 64)
MASKSADMXN_SSSE3(64, 32)
MASKSADMXN_SSSE3(32, 64)
@@ -100,7 +105,7 @@
MASKSAD4XN_SSSE3(4)
// For width a multiple of 16
-// Assumes values in m are <=64 and w = 16, 32, or 64
+// Assumes values in m are <=64
static INLINE unsigned int masked_sad_ssse3(const uint8_t *a_ptr, int a_stride,
const uint8_t *b_ptr, int b_stride,
const uint8_t *m_ptr, int m_stride,
@@ -255,6 +260,11 @@
msk_stride, m, n); \
}
+#if CONFIG_EXT_PARTITION
+HIGHBD_MASKSADMXN_SSSE3(128, 128)
+HIGHBD_MASKSADMXN_SSSE3(128, 64)
+HIGHBD_MASKSADMXN_SSSE3(64, 128)
+#endif // CONFIG_EXT_PARTITION
HIGHBD_MASKSADMXN_SSSE3(64, 64)
HIGHBD_MASKSADMXN_SSSE3(64, 32)
HIGHBD_MASKSADMXN_SSSE3(32, 64)
diff --git a/vpx_dsp/x86/masked_variance_intrin_ssse3.c b/vpx_dsp/x86/masked_variance_intrin_ssse3.c
index 96af421..ca4f6fc 100644
--- a/vpx_dsp/x86/masked_variance_intrin_ssse3.c
+++ b/vpx_dsp/x86/masked_variance_intrin_ssse3.c
@@ -18,17 +18,63 @@
#include "vpx_ports/mem.h"
#include "vpx_dsp/vpx_filter.h"
-// Assumes mask values are <= 64
-// Log 2 of powers of 2 as an expression
-#define LOG2_P2(n) ((n) == 1 ? 0 : \
- (n) == 2 ? 1 : \
- (n) == 4 ? 2 : \
- (n) == 8 ? 3 : \
- (n) == 16 ? 4 : \
- (n) == 32 ? 5 : \
- (n) == 64 ? 6 : \
- (n) == 128 ? 7 : -1)
+// Half pixel shift
+#define HALF_PIXEL_OFFSET (BIL_SUBPEL_SHIFTS/2)
+
+/*****************************************************************************
+ * Horizontal additions
+ *****************************************************************************/
+
+static INLINE int32_t hsum_epi32_si32(__m128i v_d) {
+ v_d = _mm_hadd_epi32(v_d, v_d);
+ v_d = _mm_hadd_epi32(v_d, v_d);
+ return _mm_cvtsi128_si32(v_d);
+}
+
+static INLINE int64_t hsum_epi64_si64(__m128i v_q) {
+ v_q = _mm_add_epi64(v_q, _mm_srli_si128(v_q, 8));
+#if ARCH_X86_64
+ return _mm_cvtsi128_si64(v_q);
+#else
+ {
+ int64_t tmp;
+ _mm_storel_epi64((__m128i*)&tmp, v_q);
+ return tmp;
+ }
+#endif
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE int64_t hsum_epi32_si64(__m128i v_d) {
+ const __m128i v_sign_d = _mm_cmplt_epi32(v_d, _mm_setzero_si128());
+ const __m128i v_0_q = _mm_unpacklo_epi32(v_d, v_sign_d);
+ const __m128i v_1_q = _mm_unpackhi_epi32(v_d, v_sign_d);
+ return hsum_epi64_si64(_mm_add_epi64(v_0_q, v_1_q));
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+static INLINE int calc_masked_variance(__m128i v_sum_d, __m128i v_sse_q,
+ unsigned int* sse,
+ const int w, const int h) {
+ int64_t sum64;
+ uint64_t sse64;
+
+ // Horizontal sum
+ sum64 = hsum_epi32_si32(v_sum_d);
+ sse64 = hsum_epi64_si64(v_sse_q);
+
+ sum64 = (sum64 >= 0) ? sum64 : -sum64;
+
+ // Round
+ sum64 = ROUND_POWER_OF_TWO(sum64, 6);
+ sse64 = ROUND_POWER_OF_TWO(sse64, 12);
+
+ // Store the SSE
+ *sse = (unsigned int)sse64;
+ // Compute the variance
+ return *sse - ((sum64 * sum64) / (w * h));
+}
/*****************************************************************************
* n*16 Wide versions
@@ -98,30 +144,7 @@
m += m_stride;
}
- // Horizontal sum
- v_sum_d = _mm_hadd_epi32(v_sum_d, v_sum_d);
- v_sum_d = _mm_hadd_epi32(v_sum_d, v_sum_d);
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_srli_si128(v_sse_q, 8));
-
- // Round
- v_sum_d = _mm_sub_epi32(v_sum_d, _mm_cmplt_epi32(v_sum_d, v_zero));
- v_sum_d = _mm_add_epi32(v_sum_d, _mm_set_epi32(0, 0, 0, 31));
- v_sum_d = _mm_srai_epi32(v_sum_d, 6);
-
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_set_epi32(0, 0, 0, 2047));
- v_sse_q = _mm_srli_epi64(v_sse_q, 12);
-
- // Store the SSE
- *sse = _mm_cvtsi128_si32(v_sse_q);
-
- // Compute the variance
- v_sum_d = _mm_abs_epi32(v_sum_d);
- v_sum_d = _mm_mul_epu32(v_sum_d, v_sum_d);
- v_sum_d = _mm_srl_epi64(v_sum_d,
- _mm_set_epi32(0, 0, 0, LOG2_P2(w) + LOG2_P2(h)));
- v_sse_q = _mm_sub_epi64(v_sse_q, v_sum_d);
-
- return _mm_cvtsi128_si32(v_sse_q);
+ return calc_masked_variance(v_sum_d, v_sse_q, sse, w, h);
}
#define MASKED_VARWXH(W, H) \
@@ -144,6 +167,11 @@
MASKED_VARWXH(32, 64)
MASKED_VARWXH(64, 32)
MASKED_VARWXH(64, 64)
+#if CONFIG_EXT_PARTITION
+MASKED_VARWXH(64, 128)
+MASKED_VARWXH(128, 64)
+MASKED_VARWXH(128, 128)
+#endif // CONFIG_EXT_PARTITION
/*****************************************************************************
* 8 Wide versions
@@ -198,29 +226,7 @@
m += m_stride;
}
- // Horizontal sum
- v_sum_d = _mm_hadd_epi32(v_sum_d, v_sum_d);
- v_sum_d = _mm_hadd_epi32(v_sum_d, v_sum_d);
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_srli_si128(v_sse_q, 8));
-
- // Round
- v_sum_d = _mm_sub_epi32(v_sum_d, _mm_cmplt_epi32(v_sum_d, v_zero));
- v_sum_d = _mm_add_epi32(v_sum_d, _mm_set_epi32(0, 0, 0, 31));
- v_sum_d = _mm_srai_epi32(v_sum_d, 6);
-
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_set_epi32(0, 0, 0, 2047));
- v_sse_q = _mm_srli_epi64(v_sse_q, 12);
-
- // Store the SSE
- *sse = _mm_cvtsi128_si32(v_sse_q);
-
- // Compute the variance
- v_sum_d = _mm_abs_epi32(v_sum_d);
- v_sum_d = _mm_mul_epu32(v_sum_d, v_sum_d);
- v_sum_d = _mm_srl_epi64(v_sum_d, _mm_set_epi32(0, 0, 0, LOG2_P2(h) + 3));
- v_sse_q = _mm_sub_epi64(v_sse_q, v_sum_d);
-
- return _mm_cvtsi128_si32(v_sse_q);
+ return calc_masked_variance(v_sum_d, v_sse_q, sse, 8, h);
}
#define MASKED_VAR8XH(H) \
@@ -302,29 +308,7 @@
m += m_stride * 2;
}
- // Horizontal sum
- v_sum_d = _mm_hadd_epi32(v_sum_d, v_sum_d);
- v_sum_d = _mm_hadd_epi32(v_sum_d, v_sum_d);
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_srli_si128(v_sse_q, 8));
-
- // Round
- v_sum_d = _mm_sub_epi32(v_sum_d, _mm_cmplt_epi32(v_sum_d, v_zero));
- v_sum_d = _mm_add_epi32(v_sum_d, _mm_set_epi32(0, 0, 0, 31));
- v_sum_d = _mm_srai_epi32(v_sum_d, 6);
-
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_set_epi32(0, 0, 0, 2047));
- v_sse_q = _mm_srli_epi64(v_sse_q, 12);
-
- // Store the SSE
- *sse = _mm_cvtsi128_si32(v_sse_q);
-
- // Compute the variance
- v_sum_d = _mm_abs_epi32(v_sum_d);
- v_sum_d = _mm_mul_epu32(v_sum_d, v_sum_d);
- v_sum_d = _mm_srl_epi64(v_sum_d, _mm_set_epi32(0, 0, 0, LOG2_P2(h) + 2));
- v_sse_q = _mm_sub_epi64(v_sse_q, v_sum_d);
-
- return _mm_cvtsi128_si32(v_sse_q);
+ return calc_masked_variance(v_sum_d, v_sse_q, sse, 4, h);
}
#define MASKED_VAR4XH(H) \
@@ -350,13 +334,13 @@
const uint16_t *b, int b_stride,
const uint8_t *m, int m_stride,
int w, int h,
- __m128i* v_sum_d, __m128i* v_sse_q) {
+ int64_t *sum, uint64_t *sse) {
int ii, jj;
const __m128i v_zero = _mm_setzero_si128();
- *v_sum_d = _mm_setzero_si128();
- *v_sse_q = _mm_setzero_si128();
+ __m128i v_sum_d = _mm_setzero_si128();
+ __m128i v_sse_q = _mm_setzero_si128();
assert((w % 8) == 0);
@@ -373,7 +357,7 @@
// Difference: [-4095, 4095]
const __m128i v_d_w = _mm_sub_epi16(v_a_w, v_b_w);
- // Error - [-4095, 4095] * [0, 64] => fits in 19 bits (incld sign bit)
+ // Error - [-4095, 4095] * [0, 64] => sum of 2 of these fits in 19 bits
const __m128i v_e_d = _mm_madd_epi16(v_d_w, v_m_w);
// Squared error - max (18 bits * 18 bits) = 36 bits (no sign bit)
@@ -397,8 +381,8 @@
v_se_q = _mm_add_epi64(v_se0_q, v_se1_q);
// Accumulate
- *v_sum_d = _mm_add_epi32(*v_sum_d, v_e_d);
- *v_sse_q = _mm_add_epi64(*v_sse_q, v_se_q);
+ v_sum_d = _mm_add_epi32(v_sum_d, v_e_d);
+ v_sse_q = _mm_add_epi64(v_sse_q, v_se_q);
}
// Move on to next row
@@ -408,17 +392,13 @@
}
// Horizontal sum
- *v_sum_d = _mm_hadd_epi32(*v_sum_d, *v_sum_d);
- *v_sum_d = _mm_hadd_epi32(*v_sum_d, *v_sum_d);
- *v_sse_q = _mm_add_epi64(*v_sse_q, _mm_srli_si128(*v_sse_q, 8));
+ *sum = hsum_epi32_si64(v_sum_d);
+ *sse = hsum_epi64_si64(v_sse_q);
// Round
- *v_sum_d = _mm_sub_epi32(*v_sum_d, _mm_cmplt_epi32(*v_sum_d, v_zero));
- *v_sum_d = _mm_add_epi32(*v_sum_d, _mm_set_epi32(0, 0, 0, 31));
- *v_sum_d = _mm_srai_epi32(*v_sum_d, 6);
-
- *v_sse_q = _mm_add_epi64(*v_sse_q, _mm_set_epi32(0, 0, 0, 2047));
- *v_sse_q = _mm_srli_epi64(*v_sse_q, 12);
+ *sum = (*sum >= 0) ? *sum : -*sum;
+ *sum = ROUND_POWER_OF_TWO(*sum, 6);
+ *sse = ROUND_POWER_OF_TWO(*sse, 12);
}
// Main calculation for 4 wide blocks
@@ -427,13 +407,13 @@
const uint16_t *b, int b_stride,
const uint8_t *m, int m_stride,
int h,
- __m128i* v_sum_d, __m128i* v_sse_q) {
+ int64_t *sum, uint64_t *sse) {
int ii;
const __m128i v_zero = _mm_setzero_si128();
- *v_sum_d = _mm_setzero_si128();
- *v_sse_q = _mm_setzero_si128();
+ __m128i v_sum_d = _mm_setzero_si128();
+ __m128i v_sse_q = _mm_setzero_si128();
assert((h % 2) == 0);
@@ -481,8 +461,8 @@
v_se_q = _mm_add_epi64(v_se0_q, v_se1_q);
// Accumulate
- *v_sum_d = _mm_add_epi32(*v_sum_d, v_e_d);
- *v_sse_q = _mm_add_epi64(*v_sse_q, v_se_q);
+ v_sum_d = _mm_add_epi32(v_sum_d, v_e_d);
+ v_sse_q = _mm_add_epi64(v_sse_q, v_se_q);
// Move on to next row
a += a_stride * 2;
@@ -491,17 +471,13 @@
}
// Horizontal sum
- *v_sum_d = _mm_hadd_epi32(*v_sum_d, *v_sum_d);
- *v_sum_d = _mm_hadd_epi32(*v_sum_d, *v_sum_d);
- *v_sse_q = _mm_add_epi64(*v_sse_q, _mm_srli_si128(*v_sse_q, 8));
+ *sum = hsum_epi32_si32(v_sum_d);
+ *sse = hsum_epi64_si64(v_sse_q);
// Round
- *v_sum_d = _mm_sub_epi32(*v_sum_d, _mm_cmplt_epi32(*v_sum_d, v_zero));
- *v_sum_d = _mm_add_epi32(*v_sum_d, _mm_set_epi32(0, 0, 0, 31));
- *v_sum_d = _mm_srai_epi32(*v_sum_d, 6);
-
- *v_sse_q = _mm_add_epi64(*v_sse_q, _mm_set_epi32(0, 0, 0, 2047));
- *v_sse_q = _mm_srli_epi64(*v_sse_q, 12);
+ *sum = (*sum >= 0) ? *sum : -*sum;
+ *sum = ROUND_POWER_OF_TWO(*sum, 6);
+ *sse = ROUND_POWER_OF_TWO(*sse, 12);
}
static INLINE unsigned int highbd_masked_variancewxh_ssse3(
@@ -510,26 +486,20 @@
const uint8_t *m, int m_stride,
int w, int h,
unsigned int *sse) {
- __m128i v_sum_d, v_sse_q;
+ uint64_t sse64;
+ int64_t sum64;
if (w == 4)
highbd_masked_variance64_4wide_ssse3(a, a_stride, b, b_stride, m, m_stride,
- h, &v_sum_d, &v_sse_q);
+ h, &sum64, &sse64);
else
highbd_masked_variance64_ssse3(a, a_stride, b, b_stride, m, m_stride, w, h,
- &v_sum_d, &v_sse_q);
+ &sum64, &sse64);
// Store the SSE
- *sse = _mm_cvtsi128_si32(v_sse_q);
-
- // Compute the variance
- v_sum_d = _mm_abs_epi32(v_sum_d);
- v_sum_d = _mm_mul_epu32(v_sum_d, v_sum_d);
- v_sum_d = _mm_srl_epi64(v_sum_d,
- _mm_set_epi32(0, 0, 0, LOG2_P2(w) + LOG2_P2(h)));
- v_sse_q = _mm_sub_epi64(v_sse_q, v_sum_d);
-
- return _mm_cvtsi128_si32(v_sse_q);
+ *sse = (unsigned int)sse64;
+ // Compute and return variance
+ return *sse - ((sum64 * sum64) / (w * h));
}
static INLINE unsigned int highbd_10_masked_variancewxh_ssse3(
@@ -538,32 +508,24 @@
const uint8_t *m, int m_stride,
int w, int h,
unsigned int *sse) {
- __m128i v_sum_d, v_sse_q;
+ uint64_t sse64;
+ int64_t sum64;
if (w == 4)
highbd_masked_variance64_4wide_ssse3(a, a_stride, b, b_stride, m, m_stride,
- h, &v_sum_d, &v_sse_q);
+ h, &sum64, &sse64);
else
highbd_masked_variance64_ssse3(a, a_stride, b, b_stride, m, m_stride, w, h,
- &v_sum_d, &v_sse_q);
+ &sum64, &sse64);
- // Round sum and sse
- v_sum_d = _mm_srai_epi32(_mm_add_epi32(v_sum_d,
- _mm_set_epi32(0, 0, 0, 1 << 1)), 2);
- v_sse_q = _mm_srli_epi64(_mm_add_epi64(v_sse_q,
- _mm_set_epi32(0, 0, 0, 1 << 3)), 4);
+ // Normalise
+ sum64 = ROUND_POWER_OF_TWO(sum64, 2);
+ sse64 = ROUND_POWER_OF_TWO(sse64, 4);
// Store the SSE
- *sse = _mm_cvtsi128_si32(v_sse_q);
-
- // Compute the variance
- v_sum_d = _mm_abs_epi32(v_sum_d);
- v_sum_d = _mm_mul_epu32(v_sum_d, v_sum_d);
- v_sum_d = _mm_srl_epi64(v_sum_d,
- _mm_set_epi32(0, 0, 0, LOG2_P2(w) + LOG2_P2(h)));
- v_sse_q = _mm_sub_epi64(v_sse_q, v_sum_d);
-
- return _mm_cvtsi128_si32(v_sse_q);
+ *sse = (unsigned int)sse64;
+ // Compute and return variance
+ return *sse - ((sum64 * sum64) / (w * h));
}
static INLINE unsigned int highbd_12_masked_variancewxh_ssse3(
@@ -572,32 +534,23 @@
const uint8_t *m, int m_stride,
int w, int h,
unsigned int *sse) {
- __m128i v_sum_d, v_sse_q;
+ uint64_t sse64;
+ int64_t sum64;
if (w == 4)
highbd_masked_variance64_4wide_ssse3(a, a_stride, b, b_stride, m, m_stride,
- h, &v_sum_d, &v_sse_q);
+ h, &sum64, &sse64);
else
highbd_masked_variance64_ssse3(a, a_stride, b, b_stride, m, m_stride, w, h,
- &v_sum_d, &v_sse_q);
+ &sum64, &sse64);
- // Round sum and sse
- v_sum_d = _mm_srai_epi32(_mm_add_epi32(v_sum_d,
- _mm_set_epi32(0, 0, 0, 1 << 3)), 4);
- v_sse_q = _mm_srli_epi64(_mm_add_epi64(v_sse_q,
- _mm_set_epi32(0, 0, 0, 1 << 7)), 8);
+ sum64 = ROUND_POWER_OF_TWO(sum64, 4);
+ sse64 = ROUND_POWER_OF_TWO(sse64, 8);
// Store the SSE
- *sse = _mm_cvtsi128_si32(v_sse_q);
-
- // Compute the variance
- v_sum_d = _mm_abs_epi32(v_sum_d);
- v_sum_d = _mm_mul_epu32(v_sum_d, v_sum_d);
- v_sum_d = _mm_srl_epi64(v_sum_d,
- _mm_set_epi32(0, 0, 0, LOG2_P2(w) + LOG2_P2(h)));
- v_sse_q = _mm_sub_epi64(v_sse_q, v_sum_d);
-
- return _mm_cvtsi128_si32(v_sse_q);
+ *sse = (unsigned int)sse64;
+ // Compute and return variance
+ return *sse - ((sum64 * sum64) / (w * h));
}
#define HIGHBD_MASKED_VARWXH(W, H) \
@@ -653,6 +606,11 @@
HIGHBD_MASKED_VARWXH(32, 64)
HIGHBD_MASKED_VARWXH(64, 32)
HIGHBD_MASKED_VARWXH(64, 64)
+#if CONFIG_EXT_PARTITION
+HIGHBD_MASKED_VARWXH(64, 128)
+HIGHBD_MASKED_VARWXH(128, 64)
+HIGHBD_MASKED_VARWXH(128, 128)
+#endif // CONFIG_EXT_PARTITION
#endif
@@ -663,8 +621,8 @@
typedef __m128i (*filter_fn_t)(__m128i v_a_b, __m128i v_b_b,
__m128i v_filter_b);
-static INLINE __m128i apply_filter8(const __m128i v_a_b, const __m128i v_b_b,
- const __m128i v_filter_b) {
+static INLINE __m128i apply_filter_avg(const __m128i v_a_b, const __m128i v_b_b,
+ const __m128i v_filter_b) {
(void) v_filter_b;
return _mm_avg_epu8(v_a_b, v_b_b);
}
@@ -735,31 +693,6 @@
*v_sse_q = _mm_add_epi64(*v_sse_q, v_se_hi_q);
}
-static INLINE int calc_masked_variance(__m128i v_sum_d, __m128i v_sse_q,
- unsigned int* sse,
- const int w, const int h) {
- int sum;
-
- // Horizontal sum
- v_sum_d = _mm_hadd_epi32(v_sum_d, v_sum_d);
- v_sum_d = _mm_hadd_epi32(v_sum_d, v_sum_d);
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_srli_si128(v_sse_q, 8));
-
- // Round
- sum = _mm_cvtsi128_si32(v_sum_d);
- sum = (sum >= 0) ? ((sum + 31) >> 6) : -((-sum + 31) >> 6);
-
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_set_epi32(0, 0, 0, 2047));
- v_sse_q = _mm_srli_epi64(v_sse_q, 12);
-
- // Store the SSE
- *sse = _mm_cvtsi128_si32(v_sse_q);
-
- // Compute the variance
- return *sse - (((int64_t)sum * sum) >> (LOG2_P2(h) + LOG2_P2(w)));
-}
-
-
// Functions for width (W) >= 16
unsigned int vpx_masked_subpel_varWxH_xzero(
const uint8_t *src, int src_stride, int yoffset,
@@ -770,9 +703,9 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
const __m128i v_filter_b = _mm_set1_epi16((
- vpx_bilinear_filters[yoffset][1] << 8) +
- vpx_bilinear_filters[yoffset][0]);
- assert(yoffset < 8);
+ bilinear_filters_2t[yoffset][1] << 8) +
+ bilinear_filters_2t[yoffset][0]);
+ assert(yoffset < BIL_SUBPEL_SHIFTS);
for (j = 0; j < w; j += 16) {
// Load the first row ready
v_src0_b = _mm_loadu_si128((const __m128i*)(src + j));
@@ -814,9 +747,9 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
const __m128i v_filter_b = _mm_set1_epi16((
- vpx_bilinear_filters[xoffset][1] << 8) +
- vpx_bilinear_filters[xoffset][0]);
- assert(xoffset < 8);
+ bilinear_filters_2t[xoffset][1] << 8) +
+ bilinear_filters_2t[xoffset][0]);
+ assert(xoffset < BIL_SUBPEL_SHIFTS);
for (i = 0; i < h; i++) {
for (j = 0; j < w; j += 16) {
// Load this row and one below & apply the filter to them
@@ -846,13 +779,13 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
const __m128i v_filterx_b = _mm_set1_epi16((
- vpx_bilinear_filters[xoffset][1] << 8) +
- vpx_bilinear_filters[xoffset][0]);
+ bilinear_filters_2t[xoffset][1] << 8) +
+ bilinear_filters_2t[xoffset][0]);
const __m128i v_filtery_b = _mm_set1_epi16((
- vpx_bilinear_filters[yoffset][1] << 8) +
- vpx_bilinear_filters[yoffset][0]);
- assert(yoffset < 8);
- assert(xoffset < 8);
+ bilinear_filters_2t[yoffset][1] << 8) +
+ bilinear_filters_2t[yoffset][0]);
+ assert(yoffset < BIL_SUBPEL_SHIFTS);
+ assert(xoffset < BIL_SUBPEL_SHIFTS);
for (j = 0; j < w; j += 16) {
// Load the first row ready
v_src0_b = _mm_loadu_si128((const __m128i*)(src + j));
@@ -908,9 +841,9 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
__m128i v_filter_b = _mm_set1_epi16((
- vpx_bilinear_filters[yoffset][1] << 8) +
- vpx_bilinear_filters[yoffset][0]);
- assert(yoffset < 8);
+ bilinear_filters_2t[yoffset][1] << 8) +
+ bilinear_filters_2t[yoffset][0]);
+ assert(yoffset < BIL_SUBPEL_SHIFTS);
// Load the first row of src data ready
v_src0_b = _mm_loadl_epi64((const __m128i*)src);
for (i = 0; i < h; i += 4) {
@@ -938,7 +871,7 @@
v_msk2_b = _mm_unpacklo_epi32(v_msk3_b, v_msk2_b);
v_msk0_b = _mm_unpacklo_epi64(v_msk2_b, v_msk0_b);
// Apply the y filter
- if (yoffset == 8) {
+ if (yoffset == HALF_PIXEL_OFFSET) {
v_src1_b = _mm_unpacklo_epi64(v_src3_b, v_src1_b);
v_src2_b = _mm_or_si128(_mm_slli_si128(v_src1_b, 4),
_mm_and_si128(v_src0_b, _mm_setr_epi32(-1, 0, 0, 0)));
@@ -974,13 +907,13 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
__m128i v_filter_b = _mm_set1_epi16((
- vpx_bilinear_filters[yoffset][1] << 8) +
- vpx_bilinear_filters[yoffset][0]);
- assert(yoffset < 8);
+ bilinear_filters_2t[yoffset][1] << 8) +
+ bilinear_filters_2t[yoffset][0]);
+ assert(yoffset < BIL_SUBPEL_SHIFTS);
// Load the first row of src data ready
v_src0_b = _mm_loadl_epi64((const __m128i*)src);
for (i = 0; i < h; i += 2) {
- if (yoffset == 8) {
+ if (yoffset == HALF_PIXEL_OFFSET) {
// Load the rest of the source data for these rows
v_src1_b = _mm_or_si128(
_mm_slli_si128(v_src0_b, 8),
@@ -1030,9 +963,9 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
__m128i v_filter_b = _mm_set1_epi16((
- vpx_bilinear_filters[xoffset][1] << 8) +
- vpx_bilinear_filters[xoffset][0]);
- assert(xoffset < 8);
+ bilinear_filters_2t[xoffset][1] << 8) +
+ bilinear_filters_2t[xoffset][0]);
+ assert(xoffset < BIL_SUBPEL_SHIFTS);
for (i = 0; i < h; i += 4) {
// Load the src data
v_src0_b = _mm_loadl_epi64((const __m128i*)src);
@@ -1064,7 +997,7 @@
v_msk2_b = _mm_unpacklo_epi32(v_msk3_b, v_msk2_b);
v_msk0_b = _mm_unpacklo_epi64(v_msk2_b, v_msk0_b);
// Apply the x filter
- if (xoffset == 8) {
+ if (xoffset == HALF_PIXEL_OFFSET) {
v_src0_b = _mm_unpacklo_epi64(v_src2_b, v_src0_b);
v_src0_shift_b = _mm_unpacklo_epi64(v_src2_shift_b, v_src0_shift_b);
v_res_b = _mm_avg_epu8(v_src0_b, v_src0_shift_b);
@@ -1093,9 +1026,9 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
__m128i v_filter_b = _mm_set1_epi16((
- vpx_bilinear_filters[xoffset][1] << 8) +
- vpx_bilinear_filters[xoffset][0]);
- assert(xoffset < 8);
+ bilinear_filters_2t[xoffset][1] << 8) +
+ bilinear_filters_2t[xoffset][0]);
+ assert(xoffset < BIL_SUBPEL_SHIFTS);
for (i = 0; i < h; i += 2) {
// Load the src data
v_src0_b = _mm_loadu_si128((const __m128i*)(src));
@@ -1103,7 +1036,7 @@
v_src1_b = _mm_loadu_si128((const __m128i*)(src + src_stride));
v_src1_shift_b = _mm_srli_si128(v_src1_b, 1);
// Apply the x filter
- if (xoffset == 8) {
+ if (xoffset == HALF_PIXEL_OFFSET) {
v_src1_b = _mm_unpacklo_epi64(v_src0_b, v_src1_b);
v_src1_shift_b = _mm_unpacklo_epi64(v_src0_shift_b, v_src1_shift_b);
v_res_b = _mm_avg_epu8(v_src1_b, v_src1_shift_b);
@@ -1145,13 +1078,13 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
__m128i v_filterx_b = _mm_set1_epi16((
- vpx_bilinear_filters[xoffset][1] << 8) +
- vpx_bilinear_filters[xoffset][0]);
+ bilinear_filters_2t[xoffset][1] << 8) +
+ bilinear_filters_2t[xoffset][0]);
__m128i v_filtery_b = _mm_set1_epi16((
- vpx_bilinear_filters[yoffset][1] << 8) +
- vpx_bilinear_filters[yoffset][0]);
- assert(xoffset < 8);
- assert(yoffset < 8);
+ bilinear_filters_2t[yoffset][1] << 8) +
+ bilinear_filters_2t[yoffset][0]);
+ assert(xoffset < BIL_SUBPEL_SHIFTS);
+ assert(yoffset < BIL_SUBPEL_SHIFTS);
for (i = 0; i < h; i += 4) {
// Load the src data
v_src0_b = _mm_loadl_epi64((const __m128i*)src);
@@ -1167,7 +1100,7 @@
v_src3_shift_b = _mm_srli_si128(v_src3_b, 1);
v_src2_shift_b = _mm_unpacklo_epi32(v_src3_shift_b, v_src2_shift_b);
// Apply the x filter
- if (xoffset == 8) {
+ if (xoffset == HALF_PIXEL_OFFSET) {
v_src0_b = _mm_unpacklo_epi64(v_src2_b, v_src0_b);
v_src0_shift_b = _mm_unpacklo_epi64(v_src2_shift_b, v_src0_shift_b);
v_xres_b[i == 0 ? 0 : 1] = _mm_avg_epu8(v_src0_b, v_src0_shift_b);
@@ -1183,7 +1116,7 @@
v_src0_b = _mm_loadl_epi64((const __m128i*)src);
v_src0_shift_b = _mm_srli_si128(v_src0_b, 1);
// Apply the x filter
- if (xoffset == 8) {
+ if (xoffset == HALF_PIXEL_OFFSET) {
v_extra_row_b = _mm_and_si128(
_mm_avg_epu8(v_src0_b, v_src0_shift_b),
_mm_setr_epi32(-1, 0, 0, 0));
@@ -1203,7 +1136,7 @@
v_extra_row_b);
}
// Apply the y filter
- if (yoffset == 8) {
+ if (yoffset == HALF_PIXEL_OFFSET) {
v_res_b = _mm_avg_epu8(v_xres_b[i == 0 ? 0 : 1], v_temp_b);
} else {
v_res_b = apply_filter(v_xres_b[i == 0 ? 0 : 1], v_temp_b, v_filtery_b);
@@ -1245,21 +1178,20 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
__m128i v_filterx_b = _mm_set1_epi16((
- vpx_bilinear_filters[xoffset][1] << 8) +
- vpx_bilinear_filters[xoffset][0]);
+ bilinear_filters_2t[xoffset][1] << 8) +
+ bilinear_filters_2t[xoffset][0]);
__m128i v_filtery_b = _mm_set1_epi16((
- vpx_bilinear_filters[yoffset][1] << 8) +
- vpx_bilinear_filters[yoffset][0]);
- assert(xoffset < 8);
- assert(yoffset < 8);
-
+ bilinear_filters_2t[yoffset][1] << 8) +
+ bilinear_filters_2t[yoffset][0]);
+ assert(xoffset < BIL_SUBPEL_SHIFTS);
+ assert(yoffset < BIL_SUBPEL_SHIFTS);
// Load the first block of src data
v_src0_b = _mm_loadu_si128((const __m128i*)(src));
v_src0_shift_b = _mm_srli_si128(v_src0_b, 1);
v_src1_b = _mm_loadu_si128((const __m128i*)(src + src_stride));
v_src1_shift_b = _mm_srli_si128(v_src1_b, 1);
// Apply the x filter
- if (xoffset == 8) {
+ if (xoffset == HALF_PIXEL_OFFSET) {
v_src1_b = _mm_unpacklo_epi64(v_src0_b, v_src1_b);
v_src1_shift_b = _mm_unpacklo_epi64(v_src0_shift_b, v_src1_shift_b);
v_xres0_b = _mm_avg_epu8(v_src1_b, v_src1_shift_b);
@@ -1275,7 +1207,7 @@
v_src1_b = _mm_loadu_si128((const __m128i*)(src + src_stride * 3));
v_src1_shift_b = _mm_srli_si128(v_src1_b, 1);
// Apply the x filter
- if (xoffset == 8) {
+ if (xoffset == HALF_PIXEL_OFFSET) {
v_src1_b = _mm_unpacklo_epi64(v_src0_b, v_src1_b);
v_src1_shift_b = _mm_unpacklo_epi64(v_src0_shift_b, v_src1_shift_b);
v_xres1_b = _mm_avg_epu8(v_src1_b, v_src1_shift_b);
@@ -1287,7 +1219,7 @@
// Apply the y filter to the previous block
v_temp_b = _mm_or_si128(_mm_srli_si128(v_xres0_b, 8),
_mm_slli_si128(v_xres1_b, 8));
- if (yoffset == 8) {
+ if (yoffset == HALF_PIXEL_OFFSET) {
v_res_b = _mm_avg_epu8(v_xres0_b, v_temp_b);
} else {
v_res_b = apply_filter(v_xres0_b, v_temp_b, v_filtery_b);
@@ -1309,7 +1241,7 @@
v_src1_b = _mm_loadu_si128((const __m128i*)(src + src_stride * 5));
v_src1_shift_b = _mm_srli_si128(v_src1_b, 1);
// Apply the x filter
- if (xoffset == 8) {
+ if (xoffset == HALF_PIXEL_OFFSET) {
v_src1_b = _mm_unpacklo_epi64(v_src0_b, v_src1_b);
v_src1_shift_b = _mm_unpacklo_epi64(v_src0_shift_b, v_src1_shift_b);
v_xres0_b = _mm_avg_epu8(v_src1_b, v_src1_shift_b);
@@ -1321,7 +1253,7 @@
// Apply the y filter to the previous block
v_temp_b = _mm_or_si128(_mm_srli_si128(v_xres1_b, 8),
_mm_slli_si128(v_xres0_b, 8));
- if (yoffset == 8) {
+ if (yoffset == HALF_PIXEL_OFFSET) {
v_res_b = _mm_avg_epu8(v_xres1_b, v_temp_b);
} else {
v_res_b = apply_filter(v_xres1_b, v_temp_b, v_filtery_b);
@@ -1359,41 +1291,45 @@
return vpx_masked_variance##W##x##H##_ssse3(src, src_stride, \
dst, dst_stride, \
msk, msk_stride, sse); \
- else if (yoffset == 8) \
- return vpx_masked_subpel_varWxH_xzero(src, src_stride, 8, \
+ else if (yoffset == HALF_PIXEL_OFFSET) \
+ return vpx_masked_subpel_varWxH_xzero(src, src_stride, \
+ HALF_PIXEL_OFFSET, \
dst, dst_stride, msk, msk_stride, \
- sse, W, H, apply_filter8); \
+ sse, W, H, apply_filter_avg); \
else \
- return vpx_masked_subpel_varWxH_xzero(src, src_stride, yoffset, \
+ return vpx_masked_subpel_varWxH_xzero(src, src_stride, \
+ yoffset, \
dst, dst_stride, msk, msk_stride, \
sse, W, H, apply_filter); \
} else if (yoffset == 0) { \
- if (xoffset == 8) \
- return vpx_masked_subpel_varWxH_yzero(src, src_stride, 8, \
+ if (xoffset == HALF_PIXEL_OFFSET) \
+ return vpx_masked_subpel_varWxH_yzero(src, src_stride, \
+ HALF_PIXEL_OFFSET, \
dst, dst_stride, msk, msk_stride, \
- sse, W, H, apply_filter8); \
+ sse, W, H, apply_filter_avg); \
else \
- return vpx_masked_subpel_varWxH_yzero(src, src_stride, xoffset, \
+ return vpx_masked_subpel_varWxH_yzero(src, src_stride, \
+ xoffset, \
dst, dst_stride, msk, msk_stride, \
sse, W, H, apply_filter); \
- } else if (xoffset == 8) { \
- if (yoffset == 8) \
+ } else if (xoffset == HALF_PIXEL_OFFSET) { \
+ if (yoffset == HALF_PIXEL_OFFSET) \
return vpx_masked_subpel_varWxH_xnonzero_ynonzero(src, src_stride, \
- 8, 8, dst, dst_stride, msk, msk_stride, sse, W, H, \
- apply_filter8, apply_filter8); \
+ HALF_PIXEL_OFFSET, HALF_PIXEL_OFFSET, dst, dst_stride, msk, \
+ msk_stride, sse, W, H, apply_filter_avg, apply_filter_avg); \
else \
return vpx_masked_subpel_varWxH_xnonzero_ynonzero(src, src_stride, \
- 8, yoffset, dst, dst_stride, msk, msk_stride, sse, W, H, \
- apply_filter8, apply_filter); \
+ HALF_PIXEL_OFFSET, yoffset, dst, dst_stride, msk, \
+ msk_stride, sse, W, H, apply_filter_avg, apply_filter); \
} else { \
- if (yoffset == 8) \
+ if (yoffset == HALF_PIXEL_OFFSET) \
return vpx_masked_subpel_varWxH_xnonzero_ynonzero(src, src_stride, \
- xoffset, 8, dst, dst_stride, msk, msk_stride, sse, W, H, \
- apply_filter, apply_filter8); \
+ xoffset, HALF_PIXEL_OFFSET, dst, dst_stride, msk, \
+ msk_stride, sse, W, H, apply_filter, apply_filter_avg); \
else \
return vpx_masked_subpel_varWxH_xnonzero_ynonzero(src, src_stride, \
- xoffset, yoffset, dst, dst_stride, msk, msk_stride, sse, W, H, \
- apply_filter, apply_filter); \
+ xoffset, yoffset, dst, dst_stride, msk, \
+ msk_stride, sse, W, H, apply_filter, apply_filter); \
} \
}
@@ -1437,6 +1373,11 @@
MASK_SUBPIX_VAR_LARGE(32, 64)
MASK_SUBPIX_VAR_LARGE(64, 32)
MASK_SUBPIX_VAR_LARGE(64, 64)
+#if CONFIG_EXT_PARTITION
+MASK_SUBPIX_VAR_LARGE(64, 128)
+MASK_SUBPIX_VAR_LARGE(128, 64)
+MASK_SUBPIX_VAR_LARGE(128, 128)
+#endif // CONFIG_EXT_PARTITION
#if CONFIG_VP9_HIGHBITDEPTH
typedef int (*highbd_calc_masked_var_t)(__m128i v_sum_d, __m128i v_sse_q,
@@ -1449,9 +1390,9 @@
typedef __m128i (*highbd_filter_fn_t)(__m128i v_a_w, __m128i v_b_w,
__m128i v_filter_w);
-static INLINE __m128i highbd_apply_filter8(const __m128i v_a_w,
- const __m128i v_b_w,
- const __m128i v_filter_w) {
+static INLINE __m128i highbd_apply_filter_avg(const __m128i v_a_w,
+ const __m128i v_b_w,
+ const __m128i v_filter_w) {
(void) v_filter_w;
return _mm_avg_epu16(v_a_w, v_b_w);
}
@@ -1523,55 +1464,53 @@
__m128i v_sse_q,
unsigned int* sse,
const int w, const int h) {
- int sum;
+ int64_t sum64;
+ uint64_t sse64;
// Horizontal sum
- v_sum_d = _mm_hadd_epi32(v_sum_d, v_sum_d);
- v_sum_d = _mm_hadd_epi32(v_sum_d, v_sum_d);
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_srli_si128(v_sse_q, 8));
+ sum64 = hsum_epi32_si32(v_sum_d);
+ sse64 = hsum_epi64_si64(v_sse_q);
+
+ sum64 = (sum64 >= 0) ? sum64 : -sum64;
// Round
- sum = _mm_cvtsi128_si32(v_sum_d);
- sum = (sum >= 0) ? ((sum + 31) >> 6) : -((-sum + 31) >> 6);
- sum = ROUND_POWER_OF_TWO(sum, 2);
+ sum64 = ROUND_POWER_OF_TWO(sum64, 6);
+ sse64 = ROUND_POWER_OF_TWO(sse64, 12);
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_set_epi32(0, 0, 0, 2047));
- v_sse_q = _mm_srli_epi64(v_sse_q, 12);
+ // Normalise
+ sum64 = ROUND_POWER_OF_TWO(sum64, 2);
+ sse64 = ROUND_POWER_OF_TWO(sse64, 4);
// Store the SSE
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_set_epi32(0, 0, 0, 0x8));
- v_sse_q = _mm_srli_epi64(v_sse_q, 4);
- *sse = _mm_cvtsi128_si32(v_sse_q);
-
+ *sse = (unsigned int)sse64;
// Compute the variance
- return *sse - (((int64_t)sum * sum) >> (LOG2_P2(h) + LOG2_P2(w)));
+ return *sse - ((sum64 * sum64) / (w * h));
}
static INLINE int highbd_12_calc_masked_variance(__m128i v_sum_d,
__m128i v_sse_q,
unsigned int* sse,
const int w, const int h) {
- int sum;
+ int64_t sum64;
+ uint64_t sse64;
// Horizontal sum
- v_sum_d = _mm_hadd_epi32(v_sum_d, v_sum_d);
- v_sum_d = _mm_hadd_epi32(v_sum_d, v_sum_d);
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_srli_si128(v_sse_q, 8));
+ sum64 = hsum_epi32_si64(v_sum_d);
+ sse64 = hsum_epi64_si64(v_sse_q);
+
+ sum64 = (sum64 >= 0) ? sum64 : -sum64;
// Round
- sum = _mm_cvtsi128_si32(v_sum_d);
- sum = (sum >= 0) ? ((sum + 31) >> 6) : -((-sum + 31) >> 6);
- sum = ROUND_POWER_OF_TWO(sum, 4);
+ sum64 = ROUND_POWER_OF_TWO(sum64, 6);
+ sse64 = ROUND_POWER_OF_TWO(sse64, 12);
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_set_epi32(0, 0, 0, 2047));
- v_sse_q = _mm_srli_epi64(v_sse_q, 12);
+ // Normalise
+ sum64 = ROUND_POWER_OF_TWO(sum64, 4);
+ sse64 = ROUND_POWER_OF_TWO(sse64, 8);
// Store the SSE
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_set_epi32(0, 0, 0, 0x80));
- v_sse_q = _mm_srli_epi64(v_sse_q, 8);
- *sse = _mm_cvtsi128_si32(v_sse_q);
-
+ *sse = (unsigned int)sse64;
// Compute the variance
- return *sse - (((int64_t)sum * sum) >> (LOG2_P2(h) + LOG2_P2(w)));
+ return *sse - ((sum64 * sum64) / (w * h));
}
@@ -1586,9 +1525,9 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
const __m128i v_filter_w = _mm_set1_epi32((
- vpx_bilinear_filters[yoffset][1] << 16) +
- vpx_bilinear_filters[yoffset][0]);
- assert(yoffset < 8);
+ bilinear_filters_2t[yoffset][1] << 16) +
+ bilinear_filters_2t[yoffset][0]);
+ assert(yoffset < BIL_SUBPEL_SHIFTS);
for (j = 0; j < w; j += 8) {
// Load the first row ready
v_src0_w = _mm_loadu_si128((const __m128i*)(src + j));
@@ -1631,9 +1570,9 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
const __m128i v_filter_w = _mm_set1_epi32((
- vpx_bilinear_filters[xoffset][1] << 16) +
- vpx_bilinear_filters[xoffset][0]);
- assert(xoffset < 8);
+ bilinear_filters_2t[xoffset][1] << 16) +
+ bilinear_filters_2t[xoffset][0]);
+ assert(xoffset < BIL_SUBPEL_SHIFTS);
for (i = 0; i < h; i++) {
for (j = 0; j < w; j += 8) {
// Load this row & apply the filter to them
@@ -1664,13 +1603,13 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
const __m128i v_filterx_w = _mm_set1_epi32((
- vpx_bilinear_filters[xoffset][1] << 16) +
- vpx_bilinear_filters[xoffset][0]);
+ bilinear_filters_2t[xoffset][1] << 16) +
+ bilinear_filters_2t[xoffset][0]);
const __m128i v_filtery_w = _mm_set1_epi32((
- vpx_bilinear_filters[yoffset][1] << 16) +
- vpx_bilinear_filters[yoffset][0]);
- assert(xoffset < 8);
- assert(yoffset < 8);
+ bilinear_filters_2t[yoffset][1] << 16) +
+ bilinear_filters_2t[yoffset][0]);
+ assert(xoffset < BIL_SUBPEL_SHIFTS);
+ assert(yoffset < BIL_SUBPEL_SHIFTS);
for (j = 0; j < w; j += 8) {
// Load the first row ready
v_src0_w = _mm_loadu_si128((const __m128i*)(src + j));
@@ -1724,13 +1663,13 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
__m128i v_filter_w = _mm_set1_epi32((
- vpx_bilinear_filters[yoffset][1] << 16) +
- vpx_bilinear_filters[yoffset][0]);
- assert(yoffset < 8);
+ bilinear_filters_2t[yoffset][1] << 16) +
+ bilinear_filters_2t[yoffset][0]);
+ assert(yoffset < BIL_SUBPEL_SHIFTS);
// Load the first row of src data ready
v_src0_w = _mm_loadl_epi64((const __m128i*)src);
for (i = 0; i < h; i += 2) {
- if (yoffset == 8) {
+ if (yoffset == HALF_PIXEL_OFFSET) {
// Load the rest of the source data for these rows
v_src1_w = _mm_or_si128(
_mm_slli_si128(v_src0_w, 8),
@@ -1776,9 +1715,9 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
__m128i v_filter_w = _mm_set1_epi32((
- vpx_bilinear_filters[xoffset][1] << 16) +
- vpx_bilinear_filters[xoffset][0]);
- assert(xoffset < 8);
+ bilinear_filters_2t[xoffset][1] << 16) +
+ bilinear_filters_2t[xoffset][0]);
+ assert(xoffset < BIL_SUBPEL_SHIFTS);
for (i = 0; i < h; i += 2) {
// Load the src data
v_src0_w = _mm_loadu_si128((const __m128i*)(src));
@@ -1786,7 +1725,7 @@
v_src1_w = _mm_loadu_si128((const __m128i*)(src + src_stride));
v_src1_shift_w = _mm_srli_si128(v_src1_w, 2);
// Apply the x filter
- if (xoffset == 8) {
+ if (xoffset == HALF_PIXEL_OFFSET) {
v_src1_w = _mm_unpacklo_epi64(v_src0_w, v_src1_w);
v_src1_shift_w = _mm_unpacklo_epi64(v_src0_shift_w, v_src1_shift_w);
v_res_w = _mm_avg_epu16(v_src1_w, v_src1_shift_w);
@@ -1826,21 +1765,20 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
__m128i v_filterx_w = _mm_set1_epi32((
- vpx_bilinear_filters[xoffset][1] << 16) +
- vpx_bilinear_filters[xoffset][0]);
+ bilinear_filters_2t[xoffset][1] << 16) +
+ bilinear_filters_2t[xoffset][0]);
__m128i v_filtery_w = _mm_set1_epi32((
- vpx_bilinear_filters[yoffset][1] << 16) +
- vpx_bilinear_filters[yoffset][0]);
- assert(xoffset < 8);
- assert(yoffset < 8);
-
+ bilinear_filters_2t[yoffset][1] << 16) +
+ bilinear_filters_2t[yoffset][0]);
+ assert(xoffset < BIL_SUBPEL_SHIFTS);
+ assert(yoffset < BIL_SUBPEL_SHIFTS);
// Load the first block of src data
v_src0_w = _mm_loadu_si128((const __m128i*)(src));
v_src0_shift_w = _mm_srli_si128(v_src0_w, 2);
v_src1_w = _mm_loadu_si128((const __m128i*)(src + src_stride));
v_src1_shift_w = _mm_srli_si128(v_src1_w, 2);
// Apply the x filter
- if (xoffset == 8) {
+ if (xoffset == HALF_PIXEL_OFFSET) {
v_src1_w = _mm_unpacklo_epi64(v_src0_w, v_src1_w);
v_src1_shift_w = _mm_unpacklo_epi64(v_src0_shift_w, v_src1_shift_w);
v_xres0_w = _mm_avg_epu16(v_src1_w, v_src1_shift_w);
@@ -1858,7 +1796,7 @@
v_src1_w = _mm_loadu_si128((const __m128i*)(src + src_stride * 3));
v_src1_shift_w = _mm_srli_si128(v_src1_w, 2);
// Apply the x filter
- if (xoffset == 8) {
+ if (xoffset == HALF_PIXEL_OFFSET) {
v_src1_w = _mm_unpacklo_epi64(v_src0_w, v_src1_w);
v_src1_shift_w = _mm_unpacklo_epi64(v_src0_shift_w, v_src1_shift_w);
v_xres1_w = _mm_avg_epu16(v_src1_w, v_src1_shift_w);
@@ -1872,7 +1810,7 @@
// Apply the y filter to the previous block
v_temp_w = _mm_or_si128(_mm_srli_si128(v_xres0_w, 8),
_mm_slli_si128(v_xres1_w, 8));
- if (yoffset == 8) {
+ if (yoffset == HALF_PIXEL_OFFSET) {
v_res_w = _mm_avg_epu16(v_xres0_w, v_temp_w);
} else {
v_res_w = highbd_apply_filter(v_xres0_w, v_temp_w, v_filtery_w);
@@ -1894,7 +1832,7 @@
v_src1_w = _mm_loadu_si128((const __m128i*)(src + src_stride * 5));
v_src1_shift_w = _mm_srli_si128(v_src1_w, 2);
// Apply the x filter
- if (xoffset == 8) {
+ if (xoffset == HALF_PIXEL_OFFSET) {
v_src1_w = _mm_unpacklo_epi64(v_src0_w, v_src1_w);
v_src1_shift_w = _mm_unpacklo_epi64(v_src0_shift_w, v_src1_shift_w);
v_xres0_w = _mm_avg_epu16(v_src1_w, v_src1_shift_w);
@@ -1908,7 +1846,7 @@
// Apply the y filter to the previous block
v_temp_w = _mm_or_si128(_mm_srli_si128(v_xres1_w, 8),
_mm_slli_si128(v_xres0_w, 8));
- if (yoffset == 8) {
+ if (yoffset == HALF_PIXEL_OFFSET) {
v_res_w = _mm_avg_epu16(v_xres1_w, v_temp_w);
} else {
v_res_w = highbd_apply_filter(v_xres1_w, v_temp_w, v_filtery_w);
@@ -1948,55 +1886,61 @@
if (yoffset == 0) \
return full_variance_function(src8, src_stride, dst8, dst_stride, \
msk, msk_stride, sse); \
- else if (yoffset == 8) \
- return vpx_highbd_masked_subpel_varWxH_xzero(src, src_stride, 8, \
+ else if (yoffset == HALF_PIXEL_OFFSET) \
+ return vpx_highbd_masked_subpel_varWxH_xzero(src, src_stride, \
+ HALF_PIXEL_OFFSET, \
dst, dst_stride, \
msk, msk_stride, \
sse, W, H, \
- highbd_apply_filter8, \
+ highbd_apply_filter_avg, \
calc_var); \
else \
- return vpx_highbd_masked_subpel_varWxH_xzero(src, src_stride, yoffset, \
+ return vpx_highbd_masked_subpel_varWxH_xzero(src, src_stride, \
+ yoffset, \
dst, dst_stride, \
msk, msk_stride, \
sse, W, H, \
highbd_apply_filter, \
calc_var); \
} else if (yoffset == 0) { \
- if (xoffset == 8) \
- return vpx_highbd_masked_subpel_varWxH_yzero(src, src_stride, 8, \
+ if (xoffset == HALF_PIXEL_OFFSET) \
+ return vpx_highbd_masked_subpel_varWxH_yzero(src, src_stride, \
+ HALF_PIXEL_OFFSET, \
dst, dst_stride, \
msk, msk_stride, \
sse, W, H, \
- highbd_apply_filter8, \
+ highbd_apply_filter_avg, \
calc_var); \
else \
- return vpx_highbd_masked_subpel_varWxH_yzero(src, src_stride, xoffset, \
+ return vpx_highbd_masked_subpel_varWxH_yzero(src, src_stride, \
+ xoffset, \
dst, dst_stride, \
msk, msk_stride, \
sse, W, H, \
highbd_apply_filter, \
calc_var); \
- } else if (xoffset == 8) { \
- if (yoffset == 8) \
+ } else if (xoffset == HALF_PIXEL_OFFSET) { \
+ if (yoffset == HALF_PIXEL_OFFSET) \
return vpx_highbd_masked_subpel_varWxH_xnonzero_ynonzero( \
- src, src_stride, 8, 8, dst, dst_stride, msk, msk_stride, \
- sse, W, H, highbd_apply_filter8, highbd_apply_filter8, calc_var);\
+ src, src_stride, HALF_PIXEL_OFFSET, HALF_PIXEL_OFFSET, \
+ dst, dst_stride, msk, msk_stride, sse, W, H, \
+ highbd_apply_filter_avg, highbd_apply_filter_avg, calc_var); \
else \
return vpx_highbd_masked_subpel_varWxH_xnonzero_ynonzero( \
- src, src_stride, 8, yoffset, dst, dst_stride, \
- msk, msk_stride, sse, W, H, highbd_apply_filter8, \
+ src, src_stride, HALF_PIXEL_OFFSET, yoffset, dst, dst_stride, \
+ msk, msk_stride, sse, W, H, highbd_apply_filter_avg, \
highbd_apply_filter, calc_var); \
} else { \
- if (yoffset == 8) \
+ if (yoffset == HALF_PIXEL_OFFSET) \
return vpx_highbd_masked_subpel_varWxH_xnonzero_ynonzero( \
- src, src_stride, xoffset, 8, dst, dst_stride, msk, msk_stride, \
- sse, W, H, highbd_apply_filter, highbd_apply_filter8, calc_var); \
+ src, src_stride, xoffset, HALF_PIXEL_OFFSET, \
+ dst, dst_stride, msk, msk_stride, sse, W, H, \
+ highbd_apply_filter, highbd_apply_filter_avg, calc_var); \
else \
return vpx_highbd_masked_subpel_varWxH_xnonzero_ynonzero( \
- src, src_stride, xoffset, yoffset, dst, dst_stride, \
- msk, msk_stride, sse, W, H, highbd_apply_filter, \
- highbd_apply_filter, calc_var); \
+ src, src_stride, xoffset, yoffset, \
+ dst, dst_stride, msk, msk_stride, sse, W, H, \
+ highbd_apply_filter, highbd_apply_filter, calc_var); \
} \
}
@@ -2093,4 +2037,12 @@
HIGHBD_MASK_SUBPIX_VAR_WRAPPERS(64, 32)
HIGHBD_MASK_SUBPIX_VAR_LARGE(64, 64)
HIGHBD_MASK_SUBPIX_VAR_WRAPPERS(64, 64)
+#if CONFIG_EXT_PARTITION
+HIGHBD_MASK_SUBPIX_VAR_LARGE(64, 128)
+HIGHBD_MASK_SUBPIX_VAR_WRAPPERS(64, 128)
+HIGHBD_MASK_SUBPIX_VAR_LARGE(128, 64)
+HIGHBD_MASK_SUBPIX_VAR_WRAPPERS(128, 64)
+HIGHBD_MASK_SUBPIX_VAR_LARGE(128, 128)
+HIGHBD_MASK_SUBPIX_VAR_WRAPPERS(128, 128)
+#endif // CONFIG_EXT_PARTITION
#endif
diff --git a/vpx_dsp/x86/vpx_convolve_copy_sse2.asm b/vpx_dsp/x86/vpx_convolve_copy_sse2.asm
index abc0270..6d43fc1 100644
--- a/vpx_dsp/x86/vpx_convolve_copy_sse2.asm
+++ b/vpx_dsp/x86/vpx_convolve_copy_sse2.asm
@@ -46,6 +46,119 @@
je .w16
cmp r4d, 32
je .w32
+
+%if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ cmp r4d, 64
+ je .w64
+%ifidn %2, highbd
+ cmp r4d, 128
+ je .w128
+
+.w256:
+ mov r4d, dword hm
+.loop256:
+ movu m0, [srcq]
+ movu m1, [srcq+16]
+ movu m2, [srcq+32]
+ movu m3, [srcq+48]
+%ifidn %1, avg
+ pavg m0, [dstq]
+ pavg m1, [dstq+16]
+ pavg m2, [dstq+32]
+ pavg m3, [dstq+48]
+%endif
+ mova [dstq ], m0
+ mova [dstq+16], m1
+ mova [dstq+32], m2
+ mova [dstq+48], m3
+ movu m0, [srcq+64]
+ movu m1, [srcq+80]
+ movu m2, [srcq+96]
+ movu m3, [srcq+112]
+%ifidn %1, avg
+ pavg m0, [dstq+64]
+ pavg m1, [dstq+80]
+ pavg m2, [dstq+96]
+ pavg m3, [dstq+112]
+%endif
+ mova [dstq+64], m0
+ mova [dstq+80], m1
+ mova [dstq+96], m2
+ mova [dstq+112], m3
+ movu m0, [srcq+128]
+ movu m1, [srcq+128+16]
+ movu m2, [srcq+128+32]
+ movu m3, [srcq+128+48]
+%ifidn %1, avg
+ pavg m0, [dstq+128]
+ pavg m1, [dstq+128+16]
+ pavg m2, [dstq+128+32]
+ pavg m3, [dstq+128+48]
+%endif
+ mova [dstq+128 ], m0
+ mova [dstq+128+16], m1
+ mova [dstq+128+32], m2
+ mova [dstq+128+48], m3
+ movu m0, [srcq+128+64]
+ movu m1, [srcq+128+80]
+ movu m2, [srcq+128+96]
+ movu m3, [srcq+128+112]
+ add srcq, src_strideq
+%ifidn %1, avg
+ pavg m0, [dstq+128+64]
+ pavg m1, [dstq+128+80]
+ pavg m2, [dstq+128+96]
+ pavg m3, [dstq+128+112]
+%endif
+ mova [dstq+128+64], m0
+ mova [dstq+128+80], m1
+ mova [dstq+128+96], m2
+ mova [dstq+128+112], m3
+ add dstq, dst_strideq
+ sub r4d, 1
+ jnz .loop256
+ RET
+%endif
+
+.w128:
+ mov r4d, dword hm
+.loop128:
+ movu m0, [srcq]
+ movu m1, [srcq+16]
+ movu m2, [srcq+32]
+ movu m3, [srcq+48]
+%ifidn %1, avg
+ pavg m0, [dstq]
+ pavg m1, [dstq+16]
+ pavg m2, [dstq+32]
+ pavg m3, [dstq+48]
+%endif
+ mova [dstq ], m0
+ mova [dstq+16], m1
+ mova [dstq+32], m2
+ mova [dstq+48], m3
+ movu m0, [srcq+64]
+ movu m1, [srcq+80]
+ movu m2, [srcq+96]
+ movu m3, [srcq+112]
+ add srcq, src_strideq
+%ifidn %1, avg
+ pavg m0, [dstq+64]
+ pavg m1, [dstq+80]
+ pavg m2, [dstq+96]
+ pavg m3, [dstq+112]
+%endif
+ mova [dstq+64], m0
+ mova [dstq+80], m1
+ mova [dstq+96], m2
+ mova [dstq+112], m3
+ add dstq, dst_strideq
+ sub r4d, 1
+ jnz .loop128
+ RET
+
+%else ; CONFIG_VP10 && CONFIG_EXT_PARTITION
+
%ifidn %2, highbd
cmp r4d, 64
je .w64
@@ -82,10 +195,11 @@
mova [dstq+96], m2
mova [dstq+112], m3
add dstq, dst_strideq
- dec r4d
+ sub r4d, 1
jnz .loop128
RET
%endif
+%endif ; CONFIG_VP10 && CONFIG_EXT_PARTITION
.w64
mov r4d, dword hm
@@ -106,7 +220,7 @@
mova [dstq+32], m2
mova [dstq+48], m3
add dstq, dst_strideq
- dec r4d
+ sub r4d, 1
jnz .loop64
RET
diff --git a/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c b/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
index 6fd5208..6c59918 100644
--- a/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
+++ b/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
@@ -844,34 +844,49 @@
// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
// --Require an additional 8 rows for the horiz_w8 transpose tail.
- DECLARE_ALIGNED(16, uint8_t, temp[(135 + 8) * 64]);
+ DECLARE_ALIGNED(16, uint8_t, temp[(MAX_EXT_SIZE + 8) * MAX_CU_SIZE]);
const int intermediate_height =
(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
- assert(w <= 64);
- assert(h <= 64);
+ assert(w <= MAX_CU_SIZE);
+ assert(h <= MAX_CU_SIZE);
assert(y_step_q4 <= 32);
assert(x_step_q4 <= 32);
if (w >= 8) {
scaledconvolve_horiz_w8(src - src_stride * (SUBPEL_TAPS / 2 - 1),
- src_stride, temp, 64, x_filters, x0_q4, x_step_q4,
+ src_stride,
+ temp,
+ MAX_CU_SIZE,
+ x_filters, x0_q4, x_step_q4,
w, intermediate_height);
} else {
scaledconvolve_horiz_w4(src - src_stride * (SUBPEL_TAPS / 2 - 1),
- src_stride, temp, 64, x_filters, x0_q4, x_step_q4,
+ src_stride,
+ temp,
+ MAX_CU_SIZE,
+ x_filters, x0_q4, x_step_q4,
w, intermediate_height);
}
if (w >= 16) {
- scaledconvolve_vert_w16(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst,
- dst_stride, y_filters, y0_q4, y_step_q4, w, h);
+ scaledconvolve_vert_w16(temp + MAX_CU_SIZE * (SUBPEL_TAPS / 2 - 1),
+ MAX_CU_SIZE,
+ dst,
+ dst_stride,
+ y_filters, y0_q4, y_step_q4, w, h);
} else if (w == 8) {
- scaledconvolve_vert_w8(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst,
- dst_stride, y_filters, y0_q4, y_step_q4, w, h);
+ scaledconvolve_vert_w8(temp + MAX_CU_SIZE * (SUBPEL_TAPS / 2 - 1),
+ MAX_CU_SIZE,
+ dst,
+ dst_stride,
+ y_filters, y0_q4, y_step_q4, w, h);
} else {
- scaledconvolve_vert_w4(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst,
- dst_stride, y_filters, y0_q4, y_step_q4, w, h);
+ scaledconvolve_vert_w4(temp + MAX_CU_SIZE * (SUBPEL_TAPS / 2 - 1),
+ MAX_CU_SIZE,
+ dst,
+ dst_stride,
+ y_filters, y0_q4, y_step_q4, w, h);
}
}