Add 128 pixel variance and SAD functions
Change-Id: I8fde245b32c9e586683a28aa6925da0b83850b39
diff --git a/test/masked_sad_test.cc b/test/masked_sad_test.cc
index c09104c..d7c6fce 100644
--- a/test/masked_sad_test.cc
+++ b/test/masked_sad_test.cc
@@ -22,6 +22,8 @@
#include "./vpx_dsp_rtcd.h"
#include "vpx/vpx_integer.h"
+#define MAX_CU_SIZE 128
+
using libvpx_test::ACMRandom;
namespace {
@@ -50,16 +52,16 @@
TEST_P(MaskedSADTest, OperationCheck) {
unsigned int ref_ret, ret;
ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint8_t, src_ptr[4096]);
- DECLARE_ALIGNED(16, uint8_t, ref_ptr[4096]);
- DECLARE_ALIGNED(16, uint8_t, msk_ptr[4096]);
+ DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
+ DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
+ DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
int err_count = 0;
int first_failure = -1;
- int src_stride = 64;
- int ref_stride = 64;
- int msk_stride = 64;
+ int src_stride = MAX_CU_SIZE;
+ int ref_stride = MAX_CU_SIZE;
+ int msk_stride = MAX_CU_SIZE;
for (int i = 0; i < number_of_iterations; ++i) {
- for (int j = 0; j < 4096; j++) {
+ for (int j = 0; j < MAX_CU_SIZE*MAX_CU_SIZE; j++) {
src_ptr[j] = rnd.Rand8();
ref_ptr[j] = rnd.Rand8();
msk_ptr[j] = ((rnd.Rand8()&0x7f) > 64) ? rnd.Rand8()&0x3f : 64;
@@ -108,18 +110,18 @@
TEST_P(HighbdMaskedSADTest, OperationCheck) {
unsigned int ref_ret, ret;
ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint16_t, src_ptr[4096]);
- DECLARE_ALIGNED(16, uint16_t, ref_ptr[4096]);
- DECLARE_ALIGNED(16, uint8_t, msk_ptr[4096]);
+ DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
+ DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
+ DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
int err_count = 0;
int first_failure = -1;
- int src_stride = 64;
- int ref_stride = 64;
- int msk_stride = 64;
+ int src_stride = MAX_CU_SIZE;
+ int ref_stride = MAX_CU_SIZE;
+ int msk_stride = MAX_CU_SIZE;
for (int i = 0; i < number_of_iterations; ++i) {
- for (int j = 0; j < 4096; j++) {
+ for (int j = 0; j < MAX_CU_SIZE*MAX_CU_SIZE; j++) {
src_ptr[j] = rnd.Rand16()&0xfff;
ref_ptr[j] = rnd.Rand16()&0xfff;
msk_ptr[j] = ((rnd.Rand8()&0x7f) > 64) ? rnd.Rand8()&0x3f : 64;
@@ -148,6 +150,14 @@
INSTANTIATE_TEST_CASE_P(
SSSE3_C_COMPARE, MaskedSADTest,
::testing::Values(
+#if CONFIG_EXT_PARTITION
+ make_tuple(&vpx_masked_sad128x128_ssse3,
+ &vpx_masked_sad128x128_c),
+ make_tuple(&vpx_masked_sad128x64_ssse3,
+ &vpx_masked_sad128x64_c),
+ make_tuple(&vpx_masked_sad64x128_ssse3,
+ &vpx_masked_sad64x128_c),
+#endif // CONFIG_EXT_PARTITION
make_tuple(&vpx_masked_sad64x64_ssse3,
&vpx_masked_sad64x64_c),
make_tuple(&vpx_masked_sad64x32_ssse3,
@@ -178,32 +188,40 @@
INSTANTIATE_TEST_CASE_P(
SSSE3_C_COMPARE, HighbdMaskedSADTest,
::testing::Values(
- make_tuple(&vp9_highbd_masked_sad64x64_ssse3,
- &vp9_highbd_masked_sad64x64_c),
- make_tuple(&vp9_highbd_masked_sad64x32_ssse3,
- &vp9_highbd_masked_sad64x32_c),
- make_tuple(&vp9_highbd_masked_sad32x64_ssse3,
- &vp9_highbd_masked_sad32x64_c),
- make_tuple(&vp9_highbd_masked_sad32x32_ssse3,
- &vp9_highbd_masked_sad32x32_c),
- make_tuple(&vp9_highbd_masked_sad32x16_ssse3,
- &vp9_highbd_masked_sad32x16_c),
- make_tuple(&vp9_highbd_masked_sad16x32_ssse3,
- &vp9_highbd_masked_sad16x32_c),
- make_tuple(&vp9_highbd_masked_sad16x16_ssse3,
- &vp9_highbd_masked_sad16x16_c),
- make_tuple(&vp9_highbd_masked_sad16x8_ssse3,
- &vp9_highbd_masked_sad16x8_c),
- make_tuple(&vp9_highbd_masked_sad8x16_ssse3,
- &vp9_highbd_masked_sad8x16_c),
- make_tuple(&vp9_highbd_masked_sad8x8_ssse3,
- &vp9_highbd_masked_sad8x8_c),
- make_tuple(&vp9_highbd_masked_sad8x4_ssse3,
- &vp9_highbd_masked_sad8x4_c),
- make_tuple(&vp9_highbd_masked_sad4x8_ssse3,
- &vp9_highbd_masked_sad4x8_c),
- make_tuple(&vp9_highbd_masked_sad4x4_ssse3,
- &vp9_highbd_masked_sad4x4_c)));
+#if CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_masked_sad128x128_ssse3,
+ &vpx_highbd_masked_sad128x128_c),
+ make_tuple(&vpx_highbd_masked_sad128x64_ssse3,
+ &vpx_highbd_masked_sad128x64_c),
+ make_tuple(&vpx_highbd_masked_sad64x128_ssse3,
+ &vpx_highbd_masked_sad64x128_c),
+#endif // CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_masked_sad64x64_ssse3,
+ &vpx_highbd_masked_sad64x64_c),
+ make_tuple(&vpx_highbd_masked_sad64x32_ssse3,
+ &vpx_highbd_masked_sad64x32_c),
+ make_tuple(&vpx_highbd_masked_sad32x64_ssse3,
+ &vpx_highbd_masked_sad32x64_c),
+ make_tuple(&vpx_highbd_masked_sad32x32_ssse3,
+ &vpx_highbd_masked_sad32x32_c),
+ make_tuple(&vpx_highbd_masked_sad32x16_ssse3,
+ &vpx_highbd_masked_sad32x16_c),
+ make_tuple(&vpx_highbd_masked_sad16x32_ssse3,
+ &vpx_highbd_masked_sad16x32_c),
+ make_tuple(&vpx_highbd_masked_sad16x16_ssse3,
+ &vpx_highbd_masked_sad16x16_c),
+ make_tuple(&vpx_highbd_masked_sad16x8_ssse3,
+ &vpx_highbd_masked_sad16x8_c),
+ make_tuple(&vpx_highbd_masked_sad8x16_ssse3,
+ &vpx_highbd_masked_sad8x16_c),
+ make_tuple(&vpx_highbd_masked_sad8x8_ssse3,
+ &vpx_highbd_masked_sad8x8_c),
+ make_tuple(&vpx_highbd_masked_sad8x4_ssse3,
+ &vpx_highbd_masked_sad8x4_c),
+ make_tuple(&vpx_highbd_masked_sad4x8_ssse3,
+ &vpx_highbd_masked_sad4x8_c),
+ make_tuple(&vpx_highbd_masked_sad4x4_ssse3,
+ &vpx_highbd_masked_sad4x4_c)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_SSSE3
} // namespace
diff --git a/test/masked_variance_test.cc b/test/masked_variance_test.cc
index fc37759..c312899 100644
--- a/test/masked_variance_test.cc
+++ b/test/masked_variance_test.cc
@@ -20,10 +20,12 @@
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_codec.h"
#include "vpx/vpx_integer.h"
#include "vpx_dsp/vpx_filter.h"
+#include "vpx_mem/vpx_mem.h"
-#define MAX_SIZE 64
+#define MAX_CU_SIZE 128
using libvpx_test::ACMRandom;
@@ -58,17 +60,17 @@
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SIZE*MAX_SIZE]);
- DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SIZE*MAX_SIZE]);
- DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SIZE*MAX_SIZE]);
+ DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
+ DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
+ DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
int err_count = 0;
int first_failure = -1;
- int src_stride = MAX_SIZE;
- int ref_stride = MAX_SIZE;
- int msk_stride = MAX_SIZE;
+ int src_stride = MAX_CU_SIZE;
+ int ref_stride = MAX_CU_SIZE;
+ int msk_stride = MAX_CU_SIZE;
for (int i = 0; i < number_of_iterations; ++i) {
- for (int j = 0; j < MAX_SIZE*MAX_SIZE; j++) {
+ for (int j = 0; j < MAX_CU_SIZE*MAX_CU_SIZE; j++) {
src_ptr[j] = rnd.Rand8();
ref_ptr[j] = rnd.Rand8();
msk_ptr[j] = rnd(65);
@@ -100,19 +102,19 @@
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SIZE*MAX_SIZE]);
- DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SIZE*MAX_SIZE]);
- DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SIZE*MAX_SIZE]);
+ DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
+ DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
+ DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
int err_count = 0;
int first_failure = -1;
- int src_stride = MAX_SIZE;
- int ref_stride = MAX_SIZE;
- int msk_stride = MAX_SIZE;
+ int src_stride = MAX_CU_SIZE;
+ int ref_stride = MAX_CU_SIZE;
+ int msk_stride = MAX_CU_SIZE;
for (int i = 0; i < 8; ++i) {
- memset(src_ptr, (i & 0x1) ? 255 : 0, MAX_SIZE*MAX_SIZE);
- memset(ref_ptr, (i & 0x2) ? 255 : 0, MAX_SIZE*MAX_SIZE);
- memset(msk_ptr, (i & 0x4) ? 64 : 0, MAX_SIZE*MAX_SIZE);
+ memset(src_ptr, (i & 0x1) ? 255 : 0, MAX_CU_SIZE*MAX_CU_SIZE);
+ memset(ref_ptr, (i & 0x2) ? 255 : 0, MAX_CU_SIZE*MAX_CU_SIZE);
+ memset(msk_ptr, (i & 0x4) ? 64 : 0, MAX_CU_SIZE*MAX_CU_SIZE);
ref_ret = ref_func_(src_ptr, src_stride,
ref_ptr, ref_stride,
@@ -166,21 +168,21 @@
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_SIZE+1)*(MAX_SIZE+1)]);
- DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_SIZE+1)*(MAX_SIZE+1)]);
- DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SIZE+1)*(MAX_SIZE+1)]);
+ DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
+ DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
+ DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
int err_count = 0;
int first_failure = -1;
- int src_stride = (MAX_SIZE+1);
- int ref_stride = (MAX_SIZE+1);
- int msk_stride = (MAX_SIZE+1);
+ int src_stride = (MAX_CU_SIZE+1);
+ int ref_stride = (MAX_CU_SIZE+1);
+ int msk_stride = (MAX_CU_SIZE+1);
int xoffset;
int yoffset;
for (int i = 0; i < number_of_iterations; ++i) {
int xoffsets[] = {0, 4, rnd(BIL_SUBPEL_SHIFTS)};
int yoffsets[] = {0, 4, rnd(BIL_SUBPEL_SHIFTS)};
- for (int j = 0; j < (MAX_SIZE+1)*(MAX_SIZE+1); j++) {
+ for (int j = 0; j < (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1); j++) {
src_ptr[j] = rnd.Rand8();
ref_ptr[j] = rnd.Rand8();
msk_ptr[j] = rnd(65);
@@ -221,23 +223,23 @@
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_SIZE+1)*(MAX_SIZE+1)]);
- DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_SIZE+1)*(MAX_SIZE+1)]);
- DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SIZE+1)*(MAX_SIZE+1)]);
+ DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
+ DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
+ DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
int first_failure_x = -1;
int first_failure_y = -1;
int err_count = 0;
int first_failure = -1;
- int src_stride = (MAX_SIZE+1);
- int ref_stride = (MAX_SIZE+1);
- int msk_stride = (MAX_SIZE+1);
+ int src_stride = (MAX_CU_SIZE+1);
+ int ref_stride = (MAX_CU_SIZE+1);
+ int msk_stride = (MAX_CU_SIZE+1);
for (int xoffset = 0 ; xoffset < BIL_SUBPEL_SHIFTS ; xoffset++) {
for (int yoffset = 0 ; yoffset < BIL_SUBPEL_SHIFTS ; yoffset++) {
for (int i = 0; i < 8; ++i) {
- memset(src_ptr, (i & 0x1) ? 255 : 0, (MAX_SIZE+1)*(MAX_SIZE+1));
- memset(ref_ptr, (i & 0x2) ? 255 : 0, (MAX_SIZE+1)*(MAX_SIZE+1));
- memset(msk_ptr, (i & 0x4) ? 64 : 0, (MAX_SIZE+1)*(MAX_SIZE+1));
+ memset(src_ptr, (i & 0x1) ? 255 : 0, (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1));
+ memset(ref_ptr, (i & 0x2) ? 255 : 0, (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1));
+ memset(msk_ptr, (i & 0x4) ? 64 : 0, (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1));
ref_ret = ref_func_(src_ptr, src_stride,
xoffset, yoffset,
@@ -297,19 +299,19 @@
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SIZE*MAX_SIZE]);
- DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SIZE*MAX_SIZE]);
- DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SIZE*MAX_SIZE]);
+ DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
+ DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
+ DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
int err_count = 0;
int first_failure = -1;
- int src_stride = MAX_SIZE;
- int ref_stride = MAX_SIZE;
- int msk_stride = MAX_SIZE;
+ int src_stride = MAX_CU_SIZE;
+ int ref_stride = MAX_CU_SIZE;
+ int msk_stride = MAX_CU_SIZE;
for (int i = 0; i < number_of_iterations; ++i) {
- for (int j = 0; j < MAX_SIZE*MAX_SIZE; j++) {
+ for (int j = 0; j < MAX_CU_SIZE*MAX_CU_SIZE; j++) {
src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
msk_ptr[j] = rnd(65);
@@ -341,23 +343,23 @@
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SIZE*MAX_SIZE]);
- DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SIZE*MAX_SIZE]);
- DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SIZE*MAX_SIZE]);
+ DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
+ DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
+ DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
int err_count = 0;
int first_failure = -1;
- int src_stride = MAX_SIZE;
- int ref_stride = MAX_SIZE;
- int msk_stride = MAX_SIZE;
+ int src_stride = MAX_CU_SIZE;
+ int ref_stride = MAX_CU_SIZE;
+ int msk_stride = MAX_CU_SIZE;
for (int i = 0; i < 8; ++i) {
vpx_memset16(src_ptr, (i & 0x1) ? ((1 << bit_depth_) - 1) : 0,
- MAX_SIZE*MAX_SIZE);
+ MAX_CU_SIZE*MAX_CU_SIZE);
vpx_memset16(ref_ptr, (i & 0x2) ? ((1 << bit_depth_) - 1) : 0,
- MAX_SIZE*MAX_SIZE);
- memset(msk_ptr, (i & 0x4) ? 64 : 0, MAX_SIZE*MAX_SIZE);
+ MAX_CU_SIZE*MAX_CU_SIZE);
+ memset(msk_ptr, (i & 0x4) ? 64 : 0, MAX_CU_SIZE*MAX_CU_SIZE);
ref_ret = ref_func_(src8_ptr, src_stride,
ref8_ptr, ref_stride,
@@ -407,24 +409,24 @@
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_SIZE+1)*(MAX_SIZE+1)]);
- DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_SIZE+1)*(MAX_SIZE+1)]);
- DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SIZE+1)*(MAX_SIZE+1)]);
+ DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
+ DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
+ DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
int err_count = 0;
int first_failure = -1;
int first_failure_x = -1;
int first_failure_y = -1;
- int src_stride = (MAX_SIZE+1);
- int ref_stride = (MAX_SIZE+1);
- int msk_stride = (MAX_SIZE+1);
+ int src_stride = (MAX_CU_SIZE+1);
+ int ref_stride = (MAX_CU_SIZE+1);
+ int msk_stride = (MAX_CU_SIZE+1);
int xoffset, yoffset;
for (int i = 0; i < number_of_iterations; ++i) {
for (xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) {
for (yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) {
- for (int j = 0; j < (MAX_SIZE+1)*(MAX_SIZE+1); j++) {
+ for (int j = 0; j < (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1); j++) {
src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
msk_ptr[j] = rnd(65);
@@ -465,27 +467,27 @@
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_SIZE+1)*(MAX_SIZE+1)]);
- DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_SIZE+1)*(MAX_SIZE+1)]);
- DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SIZE+1)*(MAX_SIZE+1)]);
+ DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
+ DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
+ DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
int first_failure_x = -1;
int first_failure_y = -1;
int err_count = 0;
int first_failure = -1;
- int src_stride = (MAX_SIZE+1);
- int ref_stride = (MAX_SIZE+1);
- int msk_stride = (MAX_SIZE+1);
+ int src_stride = (MAX_CU_SIZE+1);
+ int ref_stride = (MAX_CU_SIZE+1);
+ int msk_stride = (MAX_CU_SIZE+1);
for (int xoffset = 0 ; xoffset < BIL_SUBPEL_SHIFTS ; xoffset++) {
for (int yoffset = 0 ; yoffset < BIL_SUBPEL_SHIFTS ; yoffset++) {
for (int i = 0; i < 8; ++i) {
vpx_memset16(src_ptr, (i & 0x1) ? ((1 << bit_depth_) - 1) : 0,
- (MAX_SIZE+1)*(MAX_SIZE+1));
+ (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1));
vpx_memset16(ref_ptr, (i & 0x2) ? ((1 << bit_depth_) - 1) : 0,
- (MAX_SIZE+1)*(MAX_SIZE+1));
- memset(msk_ptr, (i & 0x4) ? 64 : 0, (MAX_SIZE+1)*(MAX_SIZE+1));
+ (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1));
+ memset(msk_ptr, (i & 0x4) ? 64 : 0, (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1));
ref_ret = ref_func_(src8_ptr, src_stride,
xoffset, yoffset,
@@ -525,6 +527,14 @@
INSTANTIATE_TEST_CASE_P(
SSSE3_C_COMPARE, MaskedVarianceTest,
::testing::Values(
+#if CONFIG_EXT_PARTITION
+ make_tuple(&vpx_masked_variance128x128_ssse3,
+ &vpx_masked_variance128x128_c),
+ make_tuple(&vpx_masked_variance128x64_ssse3,
+ &vpx_masked_variance128x64_c),
+ make_tuple(&vpx_masked_variance64x128_ssse3,
+ &vpx_masked_variance64x128_c),
+#endif // CONFIG_EXT_PARTITION
make_tuple(&vpx_masked_variance64x64_ssse3,
&vpx_masked_variance64x64_c),
make_tuple(&vpx_masked_variance64x32_ssse3,
@@ -555,197 +565,253 @@
INSTANTIATE_TEST_CASE_P(
SSSE3_C_COMPARE, MaskedSubPixelVarianceTest,
::testing::Values(
+#if CONFIG_EXT_PARTITION
+ make_tuple(&vpx_masked_sub_pixel_variance128x128_ssse3,
+ &vpx_masked_sub_pixel_variance128x128_c),
+ make_tuple(&vpx_masked_sub_pixel_variance128x64_ssse3,
+ &vpx_masked_sub_pixel_variance128x64_c),
+ make_tuple(&vpx_masked_sub_pixel_variance64x128_ssse3,
+ &vpx_masked_sub_pixel_variance64x128_c),
+#endif // CONFIG_EXT_PARTITION
make_tuple(&vpx_masked_sub_pixel_variance64x64_ssse3,
- &vpx_masked_sub_pixel_variance64x64_c),
+ &vpx_masked_sub_pixel_variance64x64_c),
make_tuple(&vpx_masked_sub_pixel_variance64x32_ssse3,
- &vpx_masked_sub_pixel_variance64x32_c),
+ &vpx_masked_sub_pixel_variance64x32_c),
make_tuple(&vpx_masked_sub_pixel_variance32x64_ssse3,
- &vpx_masked_sub_pixel_variance32x64_c),
+ &vpx_masked_sub_pixel_variance32x64_c),
make_tuple(&vpx_masked_sub_pixel_variance32x32_ssse3,
- &vpx_masked_sub_pixel_variance32x32_c),
+ &vpx_masked_sub_pixel_variance32x32_c),
make_tuple(&vpx_masked_sub_pixel_variance32x16_ssse3,
- &vpx_masked_sub_pixel_variance32x16_c),
+ &vpx_masked_sub_pixel_variance32x16_c),
make_tuple(&vpx_masked_sub_pixel_variance16x32_ssse3,
- &vpx_masked_sub_pixel_variance16x32_c),
+ &vpx_masked_sub_pixel_variance16x32_c),
make_tuple(&vpx_masked_sub_pixel_variance16x16_ssse3,
- &vpx_masked_sub_pixel_variance16x16_c),
+ &vpx_masked_sub_pixel_variance16x16_c),
make_tuple(&vpx_masked_sub_pixel_variance16x8_ssse3,
- &vpx_masked_sub_pixel_variance16x8_c),
+ &vpx_masked_sub_pixel_variance16x8_c),
make_tuple(&vpx_masked_sub_pixel_variance8x16_ssse3,
- &vpx_masked_sub_pixel_variance8x16_c),
+ &vpx_masked_sub_pixel_variance8x16_c),
make_tuple(&vpx_masked_sub_pixel_variance8x8_ssse3,
- &vpx_masked_sub_pixel_variance8x8_c),
+ &vpx_masked_sub_pixel_variance8x8_c),
make_tuple(&vpx_masked_sub_pixel_variance8x4_ssse3,
- &vpx_masked_sub_pixel_variance8x4_c),
+ &vpx_masked_sub_pixel_variance8x4_c),
make_tuple(&vpx_masked_sub_pixel_variance4x8_ssse3,
- &vpx_masked_sub_pixel_variance4x8_c),
+ &vpx_masked_sub_pixel_variance4x8_c),
make_tuple(&vpx_masked_sub_pixel_variance4x4_ssse3,
- &vpx_masked_sub_pixel_variance4x4_c)));
+ &vpx_masked_sub_pixel_variance4x4_c)));
#if CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
SSSE3_C_COMPARE, HighbdMaskedVarianceTest,
::testing::Values(
- make_tuple(&vp9_highbd_masked_variance64x64_ssse3,
- &vp9_highbd_masked_variance64x64_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_variance64x32_ssse3,
- &vp9_highbd_masked_variance64x32_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_variance32x64_ssse3,
- &vp9_highbd_masked_variance32x64_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_variance32x32_ssse3,
- &vp9_highbd_masked_variance32x32_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_variance32x16_ssse3,
- &vp9_highbd_masked_variance32x16_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_variance16x32_ssse3,
- &vp9_highbd_masked_variance16x32_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_variance16x16_ssse3,
- &vp9_highbd_masked_variance16x16_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_variance16x8_ssse3,
- &vp9_highbd_masked_variance16x8_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_variance8x16_ssse3,
- &vp9_highbd_masked_variance8x16_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_variance8x8_ssse3,
- &vp9_highbd_masked_variance8x8_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_variance8x4_ssse3,
- &vp9_highbd_masked_variance8x4_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_variance4x8_ssse3,
- &vp9_highbd_masked_variance4x8_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_variance4x4_ssse3,
- &vp9_highbd_masked_variance4x4_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_10_masked_variance64x64_ssse3,
- &vp9_highbd_10_masked_variance64x64_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_variance64x32_ssse3,
- &vp9_highbd_10_masked_variance64x32_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_variance32x64_ssse3,
- &vp9_highbd_10_masked_variance32x64_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_variance32x32_ssse3,
- &vp9_highbd_10_masked_variance32x32_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_variance32x16_ssse3,
- &vp9_highbd_10_masked_variance32x16_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_variance16x32_ssse3,
- &vp9_highbd_10_masked_variance16x32_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_variance16x16_ssse3,
- &vp9_highbd_10_masked_variance16x16_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_variance16x8_ssse3,
- &vp9_highbd_10_masked_variance16x8_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_variance8x16_ssse3,
- &vp9_highbd_10_masked_variance8x16_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_variance8x8_ssse3,
- &vp9_highbd_10_masked_variance8x8_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_variance8x4_ssse3,
- &vp9_highbd_10_masked_variance8x4_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_variance4x8_ssse3,
- &vp9_highbd_10_masked_variance4x8_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_variance4x4_ssse3,
- &vp9_highbd_10_masked_variance4x4_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_12_masked_variance64x64_ssse3,
- &vp9_highbd_12_masked_variance64x64_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_variance64x32_ssse3,
- &vp9_highbd_12_masked_variance64x32_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_variance32x64_ssse3,
- &vp9_highbd_12_masked_variance32x64_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_variance32x32_ssse3,
- &vp9_highbd_12_masked_variance32x32_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_variance32x16_ssse3,
- &vp9_highbd_12_masked_variance32x16_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_variance16x32_ssse3,
- &vp9_highbd_12_masked_variance16x32_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_variance16x16_ssse3,
- &vp9_highbd_12_masked_variance16x16_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_variance16x8_ssse3,
- &vp9_highbd_12_masked_variance16x8_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_variance8x16_ssse3,
- &vp9_highbd_12_masked_variance8x16_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_variance8x8_ssse3,
- &vp9_highbd_12_masked_variance8x8_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_variance8x4_ssse3,
- &vp9_highbd_12_masked_variance8x4_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_variance4x8_ssse3,
- &vp9_highbd_12_masked_variance4x8_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_variance4x4_ssse3,
- &vp9_highbd_12_masked_variance4x4_c, VPX_BITS_12)));
+#if CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_masked_variance128x128_ssse3,
+ &vpx_highbd_masked_variance128x128_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance128x64_ssse3,
+ &vpx_highbd_masked_variance128x64_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance64x128_ssse3,
+ &vpx_highbd_masked_variance64x128_c, VPX_BITS_8),
+#endif // CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_masked_variance64x64_ssse3,
+ &vpx_highbd_masked_variance64x64_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance64x32_ssse3,
+ &vpx_highbd_masked_variance64x32_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance32x64_ssse3,
+ &vpx_highbd_masked_variance32x64_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance32x32_ssse3,
+ &vpx_highbd_masked_variance32x32_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance32x16_ssse3,
+ &vpx_highbd_masked_variance32x16_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance16x32_ssse3,
+ &vpx_highbd_masked_variance16x32_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance16x16_ssse3,
+ &vpx_highbd_masked_variance16x16_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance16x8_ssse3,
+ &vpx_highbd_masked_variance16x8_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance8x16_ssse3,
+ &vpx_highbd_masked_variance8x16_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance8x8_ssse3,
+ &vpx_highbd_masked_variance8x8_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance8x4_ssse3,
+ &vpx_highbd_masked_variance8x4_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance4x8_ssse3,
+ &vpx_highbd_masked_variance4x8_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_variance4x4_ssse3,
+ &vpx_highbd_masked_variance4x4_c, VPX_BITS_8),
+#if CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_10_masked_variance128x128_ssse3,
+ &vpx_highbd_10_masked_variance128x128_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance128x64_ssse3,
+ &vpx_highbd_10_masked_variance128x64_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance64x128_ssse3,
+ &vpx_highbd_10_masked_variance64x128_c, VPX_BITS_10),
+#endif // CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_10_masked_variance64x64_ssse3,
+ &vpx_highbd_10_masked_variance64x64_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance64x32_ssse3,
+ &vpx_highbd_10_masked_variance64x32_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance32x64_ssse3,
+ &vpx_highbd_10_masked_variance32x64_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance32x32_ssse3,
+ &vpx_highbd_10_masked_variance32x32_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance32x16_ssse3,
+ &vpx_highbd_10_masked_variance32x16_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance16x32_ssse3,
+ &vpx_highbd_10_masked_variance16x32_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance16x16_ssse3,
+ &vpx_highbd_10_masked_variance16x16_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance16x8_ssse3,
+ &vpx_highbd_10_masked_variance16x8_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance8x16_ssse3,
+ &vpx_highbd_10_masked_variance8x16_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance8x8_ssse3,
+ &vpx_highbd_10_masked_variance8x8_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance8x4_ssse3,
+ &vpx_highbd_10_masked_variance8x4_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance4x8_ssse3,
+ &vpx_highbd_10_masked_variance4x8_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_variance4x4_ssse3,
+ &vpx_highbd_10_masked_variance4x4_c, VPX_BITS_10),
+#if CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_12_masked_variance128x128_ssse3,
+ &vpx_highbd_12_masked_variance128x128_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance128x64_ssse3,
+ &vpx_highbd_12_masked_variance128x64_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance64x128_ssse3,
+ &vpx_highbd_12_masked_variance64x128_c, VPX_BITS_12),
+#endif // CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_12_masked_variance64x64_ssse3,
+ &vpx_highbd_12_masked_variance64x64_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance64x32_ssse3,
+ &vpx_highbd_12_masked_variance64x32_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance32x64_ssse3,
+ &vpx_highbd_12_masked_variance32x64_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance32x32_ssse3,
+ &vpx_highbd_12_masked_variance32x32_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance32x16_ssse3,
+ &vpx_highbd_12_masked_variance32x16_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance16x32_ssse3,
+ &vpx_highbd_12_masked_variance16x32_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance16x16_ssse3,
+ &vpx_highbd_12_masked_variance16x16_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance16x8_ssse3,
+ &vpx_highbd_12_masked_variance16x8_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance8x16_ssse3,
+ &vpx_highbd_12_masked_variance8x16_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance8x8_ssse3,
+ &vpx_highbd_12_masked_variance8x8_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance8x4_ssse3,
+ &vpx_highbd_12_masked_variance8x4_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance4x8_ssse3,
+ &vpx_highbd_12_masked_variance4x8_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_variance4x4_ssse3,
+ &vpx_highbd_12_masked_variance4x4_c, VPX_BITS_12)));
INSTANTIATE_TEST_CASE_P(
SSSE3_C_COMPARE, HighbdMaskedSubPixelVarianceTest,
::testing::Values(
- make_tuple(&vp9_highbd_masked_sub_pixel_variance64x64_ssse3,
- &vp9_highbd_masked_sub_pixel_variance64x64_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_sub_pixel_variance64x32_ssse3,
- &vp9_highbd_masked_sub_pixel_variance64x32_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_sub_pixel_variance32x64_ssse3,
- &vp9_highbd_masked_sub_pixel_variance32x64_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_sub_pixel_variance32x32_ssse3,
- &vp9_highbd_masked_sub_pixel_variance32x32_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_sub_pixel_variance32x16_ssse3,
- &vp9_highbd_masked_sub_pixel_variance32x16_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_sub_pixel_variance16x32_ssse3,
- &vp9_highbd_masked_sub_pixel_variance16x32_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_sub_pixel_variance16x16_ssse3,
- &vp9_highbd_masked_sub_pixel_variance16x16_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_sub_pixel_variance16x8_ssse3,
- &vp9_highbd_masked_sub_pixel_variance16x8_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_sub_pixel_variance8x16_ssse3,
- &vp9_highbd_masked_sub_pixel_variance8x16_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_sub_pixel_variance8x8_ssse3,
- &vp9_highbd_masked_sub_pixel_variance8x8_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_sub_pixel_variance8x4_ssse3,
- &vp9_highbd_masked_sub_pixel_variance8x4_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_sub_pixel_variance4x8_ssse3,
- &vp9_highbd_masked_sub_pixel_variance4x8_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_masked_sub_pixel_variance4x4_ssse3,
- &vp9_highbd_masked_sub_pixel_variance4x4_c, VPX_BITS_8),
- make_tuple(&vp9_highbd_10_masked_sub_pixel_variance64x64_ssse3,
- &vp9_highbd_10_masked_sub_pixel_variance64x64_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_sub_pixel_variance64x32_ssse3,
- &vp9_highbd_10_masked_sub_pixel_variance64x32_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_sub_pixel_variance32x64_ssse3,
- &vp9_highbd_10_masked_sub_pixel_variance32x64_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_sub_pixel_variance32x32_ssse3,
- &vp9_highbd_10_masked_sub_pixel_variance32x32_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_sub_pixel_variance32x16_ssse3,
- &vp9_highbd_10_masked_sub_pixel_variance32x16_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_sub_pixel_variance16x32_ssse3,
- &vp9_highbd_10_masked_sub_pixel_variance16x32_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_sub_pixel_variance16x16_ssse3,
- &vp9_highbd_10_masked_sub_pixel_variance16x16_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_sub_pixel_variance16x8_ssse3,
- &vp9_highbd_10_masked_sub_pixel_variance16x8_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_sub_pixel_variance8x16_ssse3,
- &vp9_highbd_10_masked_sub_pixel_variance8x16_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_sub_pixel_variance8x8_ssse3,
- &vp9_highbd_10_masked_sub_pixel_variance8x8_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_sub_pixel_variance8x4_ssse3,
- &vp9_highbd_10_masked_sub_pixel_variance8x4_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_sub_pixel_variance4x8_ssse3,
- &vp9_highbd_10_masked_sub_pixel_variance4x8_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_10_masked_sub_pixel_variance4x4_ssse3,
- &vp9_highbd_10_masked_sub_pixel_variance4x4_c, VPX_BITS_10),
- make_tuple(&vp9_highbd_12_masked_sub_pixel_variance64x64_ssse3,
- &vp9_highbd_12_masked_sub_pixel_variance64x64_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_sub_pixel_variance64x32_ssse3,
- &vp9_highbd_12_masked_sub_pixel_variance64x32_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_sub_pixel_variance32x64_ssse3,
- &vp9_highbd_12_masked_sub_pixel_variance32x64_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_sub_pixel_variance32x32_ssse3,
- &vp9_highbd_12_masked_sub_pixel_variance32x32_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_sub_pixel_variance32x16_ssse3,
- &vp9_highbd_12_masked_sub_pixel_variance32x16_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_sub_pixel_variance16x32_ssse3,
- &vp9_highbd_12_masked_sub_pixel_variance16x32_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_sub_pixel_variance16x16_ssse3,
- &vp9_highbd_12_masked_sub_pixel_variance16x16_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_sub_pixel_variance16x8_ssse3,
- &vp9_highbd_12_masked_sub_pixel_variance16x8_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_sub_pixel_variance8x16_ssse3,
- &vp9_highbd_12_masked_sub_pixel_variance8x16_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_sub_pixel_variance8x8_ssse3,
- &vp9_highbd_12_masked_sub_pixel_variance8x8_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_sub_pixel_variance8x4_ssse3,
- &vp9_highbd_12_masked_sub_pixel_variance8x4_c, VPX_BITS_12) ,
- make_tuple(&vp9_highbd_12_masked_sub_pixel_variance4x8_ssse3,
- &vp9_highbd_12_masked_sub_pixel_variance4x8_c, VPX_BITS_12),
- make_tuple(&vp9_highbd_12_masked_sub_pixel_variance4x4_ssse3,
- &vp9_highbd_12_masked_sub_pixel_variance4x4_c, VPX_BITS_12)));
+#if CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance128x128_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance128x128_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance128x64_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance128x64_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance64x128_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance64x128_c, VPX_BITS_8),
+#endif // CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance64x64_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance64x64_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance64x32_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance64x32_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance32x64_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance32x64_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance32x32_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance32x32_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance32x16_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance32x16_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance16x32_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance16x32_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance16x16_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance16x16_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance16x8_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance16x8_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance8x16_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance8x16_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance8x8_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance8x8_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance8x4_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance8x4_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance4x8_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance4x8_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_masked_sub_pixel_variance4x4_ssse3,
+ &vpx_highbd_masked_sub_pixel_variance4x4_c, VPX_BITS_8),
+#if CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance128x128_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance128x128_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance128x64_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance128x64_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance64x128_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance64x128_c, VPX_BITS_10),
+#endif // CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance64x64_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance64x64_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance64x32_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance64x32_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance32x64_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance32x64_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance32x32_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance32x32_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance32x16_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance32x16_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance16x32_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance16x32_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance16x16_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance16x16_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance16x8_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance16x8_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance8x16_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance8x16_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance8x8_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance8x8_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance8x4_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance8x4_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance4x8_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance4x8_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_10_masked_sub_pixel_variance4x4_ssse3,
+ &vpx_highbd_10_masked_sub_pixel_variance4x4_c, VPX_BITS_10),
+#if CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance128x128_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance128x128_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance128x64_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance128x64_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance64x128_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance64x128_c, VPX_BITS_12),
+#endif // CONFIG_EXT_PARTITION
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance64x64_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance64x64_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance64x32_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance64x32_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance32x64_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance32x64_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance32x32_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance32x32_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance32x16_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance32x16_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance16x32_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance16x32_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance16x16_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance16x16_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance16x8_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance16x8_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance8x16_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance8x16_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance8x8_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance8x8_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance8x4_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance8x4_c, VPX_BITS_12) ,
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance4x8_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance4x8_c, VPX_BITS_12),
+ make_tuple(&vpx_highbd_12_masked_sub_pixel_variance4x4_ssse3,
+ &vpx_highbd_12_masked_sub_pixel_variance4x4_c, VPX_BITS_12)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_SSSE3
diff --git a/test/sad_test.cc b/test/sad_test.cc
index 3f0f74c..1985e18 100644
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -59,13 +59,13 @@
reference_data8_ = reinterpret_cast<uint8_t*>(
vpx_memalign(kDataAlignment, kDataBufferSize));
second_pred8_ = reinterpret_cast<uint8_t*>(
- vpx_memalign(kDataAlignment, 64*64));
+ vpx_memalign(kDataAlignment, 128*128));
source_data16_ = reinterpret_cast<uint16_t*>(
vpx_memalign(kDataAlignment, kDataBlockSize*sizeof(uint16_t)));
reference_data16_ = reinterpret_cast<uint16_t*>(
vpx_memalign(kDataAlignment, kDataBufferSize*sizeof(uint16_t)));
second_pred16_ = reinterpret_cast<uint16_t*>(
- vpx_memalign(kDataAlignment, 64*64*sizeof(uint16_t)));
+ vpx_memalign(kDataAlignment, 128*128*sizeof(uint16_t)));
}
static void TearDownTestCase() {
@@ -88,9 +88,9 @@
}
protected:
- // Handle blocks up to 4 blocks 64x64 with stride up to 128
+ // Handle up to 4 128x128 blocks, with stride up to 256
static const int kDataAlignment = 16;
- static const int kDataBlockSize = 64 * 128;
+ static const int kDataBlockSize = 128 * 256;
static const int kDataBufferSize = 4 * kDataBlockSize;
virtual void SetUp() {
@@ -485,6 +485,11 @@
//------------------------------------------------------------------------------
// C functions
const SadMxNParam c_tests[] = {
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 128, &vpx_sad128x128_c, -1),
+ make_tuple(128, 64, &vpx_sad128x64_c, -1),
+ make_tuple(64, 128, &vpx_sad64x128_c, -1),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(64, 64, &vpx_sad64x64_c, -1),
make_tuple(64, 32, &vpx_sad64x32_c, -1),
make_tuple(32, 64, &vpx_sad32x64_c, -1),
@@ -499,6 +504,11 @@
make_tuple(4, 8, &vpx_sad4x8_c, -1),
make_tuple(4, 4, &vpx_sad4x4_c, -1),
#if CONFIG_VP9_HIGHBITDEPTH
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 128, &vpx_highbd_sad128x128_c, 8),
+ make_tuple(128, 64, &vpx_highbd_sad128x64_c, 8),
+ make_tuple(64, 128, &vpx_highbd_sad64x128_c, 8),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(64, 64, &vpx_highbd_sad64x64_c, 8),
make_tuple(64, 32, &vpx_highbd_sad64x32_c, 8),
make_tuple(32, 64, &vpx_highbd_sad32x64_c, 8),
@@ -512,6 +522,11 @@
make_tuple(8, 4, &vpx_highbd_sad8x4_c, 8),
make_tuple(4, 8, &vpx_highbd_sad4x8_c, 8),
make_tuple(4, 4, &vpx_highbd_sad4x4_c, 8),
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 128, &vpx_highbd_sad128x128_c, 10),
+ make_tuple(128, 64, &vpx_highbd_sad128x64_c, 10),
+ make_tuple(64, 128, &vpx_highbd_sad64x128_c, 10),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(64, 64, &vpx_highbd_sad64x64_c, 10),
make_tuple(64, 32, &vpx_highbd_sad64x32_c, 10),
make_tuple(32, 64, &vpx_highbd_sad32x64_c, 10),
@@ -525,6 +540,11 @@
make_tuple(8, 4, &vpx_highbd_sad8x4_c, 10),
make_tuple(4, 8, &vpx_highbd_sad4x8_c, 10),
make_tuple(4, 4, &vpx_highbd_sad4x4_c, 10),
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 128, &vpx_highbd_sad128x128_c, 12),
+ make_tuple(128, 64, &vpx_highbd_sad128x64_c, 12),
+ make_tuple(64, 128, &vpx_highbd_sad64x128_c, 12),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(64, 64, &vpx_highbd_sad64x64_c, 12),
make_tuple(64, 32, &vpx_highbd_sad64x32_c, 12),
make_tuple(32, 64, &vpx_highbd_sad32x64_c, 12),
@@ -543,6 +563,11 @@
INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::ValuesIn(c_tests));
const SadMxNAvgParam avg_c_tests[] = {
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 128, &vpx_sad128x128_avg_c, -1),
+ make_tuple(128, 64, &vpx_sad128x64_avg_c, -1),
+ make_tuple(64, 128, &vpx_sad64x128_avg_c, -1),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(64, 64, &vpx_sad64x64_avg_c, -1),
make_tuple(64, 32, &vpx_sad64x32_avg_c, -1),
make_tuple(32, 64, &vpx_sad32x64_avg_c, -1),
@@ -557,6 +582,11 @@
make_tuple(4, 8, &vpx_sad4x8_avg_c, -1),
make_tuple(4, 4, &vpx_sad4x4_avg_c, -1),
#if CONFIG_VP9_HIGHBITDEPTH
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 128, &vpx_highbd_sad128x128_avg_c, 8),
+ make_tuple(128, 64, &vpx_highbd_sad128x64_avg_c, 8),
+ make_tuple(64, 128, &vpx_highbd_sad64x128_avg_c, 8),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(64, 64, &vpx_highbd_sad64x64_avg_c, 8),
make_tuple(64, 32, &vpx_highbd_sad64x32_avg_c, 8),
make_tuple(32, 64, &vpx_highbd_sad32x64_avg_c, 8),
@@ -570,6 +600,11 @@
make_tuple(8, 4, &vpx_highbd_sad8x4_avg_c, 8),
make_tuple(4, 8, &vpx_highbd_sad4x8_avg_c, 8),
make_tuple(4, 4, &vpx_highbd_sad4x4_avg_c, 8),
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 128, &vpx_highbd_sad128x128_avg_c, 10),
+ make_tuple(128, 64, &vpx_highbd_sad128x64_avg_c, 10),
+ make_tuple(64, 128, &vpx_highbd_sad64x128_avg_c, 10),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(64, 64, &vpx_highbd_sad64x64_avg_c, 10),
make_tuple(64, 32, &vpx_highbd_sad64x32_avg_c, 10),
make_tuple(32, 64, &vpx_highbd_sad32x64_avg_c, 10),
@@ -583,6 +618,11 @@
make_tuple(8, 4, &vpx_highbd_sad8x4_avg_c, 10),
make_tuple(4, 8, &vpx_highbd_sad4x8_avg_c, 10),
make_tuple(4, 4, &vpx_highbd_sad4x4_avg_c, 10),
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 128, &vpx_highbd_sad128x128_avg_c, 12),
+ make_tuple(128, 64, &vpx_highbd_sad128x64_avg_c, 12),
+ make_tuple(64, 128, &vpx_highbd_sad64x128_avg_c, 12),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(64, 64, &vpx_highbd_sad64x64_avg_c, 12),
make_tuple(64, 32, &vpx_highbd_sad64x32_avg_c, 12),
make_tuple(32, 64, &vpx_highbd_sad32x64_avg_c, 12),
@@ -601,6 +641,11 @@
INSTANTIATE_TEST_CASE_P(C, SADavgTest, ::testing::ValuesIn(avg_c_tests));
const SadMxNx4Param x4d_c_tests[] = {
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 128, &vpx_sad128x128x4d_c, -1),
+ make_tuple(128, 64, &vpx_sad128x64x4d_c, -1),
+ make_tuple(64, 128, &vpx_sad64x128x4d_c, -1),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(64, 64, &vpx_sad64x64x4d_c, -1),
make_tuple(64, 32, &vpx_sad64x32x4d_c, -1),
make_tuple(32, 64, &vpx_sad32x64x4d_c, -1),
@@ -615,6 +660,11 @@
make_tuple(4, 8, &vpx_sad4x8x4d_c, -1),
make_tuple(4, 4, &vpx_sad4x4x4d_c, -1),
#if CONFIG_VP9_HIGHBITDEPTH
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 128, &vpx_highbd_sad128x128x4d_c, 8),
+ make_tuple(128, 64, &vpx_highbd_sad128x64x4d_c, 8),
+ make_tuple(64, 128, &vpx_highbd_sad64x128x4d_c, 8),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(64, 64, &vpx_highbd_sad64x64x4d_c, 8),
make_tuple(64, 32, &vpx_highbd_sad64x32x4d_c, 8),
make_tuple(32, 64, &vpx_highbd_sad32x64x4d_c, 8),
@@ -628,6 +678,11 @@
make_tuple(8, 4, &vpx_highbd_sad8x4x4d_c, 8),
make_tuple(4, 8, &vpx_highbd_sad4x8x4d_c, 8),
make_tuple(4, 4, &vpx_highbd_sad4x4x4d_c, 8),
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 128, &vpx_highbd_sad128x128x4d_c, 10),
+ make_tuple(128, 64, &vpx_highbd_sad128x64x4d_c, 10),
+ make_tuple(64, 128, &vpx_highbd_sad64x128x4d_c, 10),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(64, 64, &vpx_highbd_sad64x64x4d_c, 10),
make_tuple(64, 32, &vpx_highbd_sad64x32x4d_c, 10),
make_tuple(32, 64, &vpx_highbd_sad32x64x4d_c, 10),
@@ -641,6 +696,11 @@
make_tuple(8, 4, &vpx_highbd_sad8x4x4d_c, 10),
make_tuple(4, 8, &vpx_highbd_sad4x8x4d_c, 10),
make_tuple(4, 4, &vpx_highbd_sad4x4x4d_c, 10),
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(128, 128, &vpx_highbd_sad128x128x4d_c, 12),
+ make_tuple(128, 64, &vpx_highbd_sad128x64x4d_c, 12),
+ make_tuple(64, 128, &vpx_highbd_sad64x128x4d_c, 12),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(64, 64, &vpx_highbd_sad64x64x4d_c, 12),
make_tuple(64, 32, &vpx_highbd_sad64x32x4d_c, 12),
make_tuple(32, 64, &vpx_highbd_sad32x64x4d_c, 12),
diff --git a/test/variance_test.cc b/test/variance_test.cc
index 6f50f78..97c5516 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -759,7 +759,13 @@
INSTANTIATE_TEST_CASE_P(
C, VpxVarianceTest,
- ::testing::Values(make_tuple(6, 6, &vpx_variance64x64_c, 0),
+ ::testing::Values(
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(7, 7, &vpx_variance128x128_c, 0),
+ make_tuple(7, 6, &vpx_variance128x64_c, 0),
+ make_tuple(6, 7, &vpx_variance64x128_c, 0),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(6, 6, &vpx_variance64x64_c, 0),
make_tuple(6, 5, &vpx_variance64x32_c, 0),
make_tuple(5, 6, &vpx_variance32x64_c, 0),
make_tuple(5, 5, &vpx_variance32x32_c, 0),
@@ -775,7 +781,13 @@
INSTANTIATE_TEST_CASE_P(
C, VpxSubpelVarianceTest,
- ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_variance64x64_c, 0),
+ ::testing::Values(
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(7, 7, &vpx_sub_pixel_variance128x128_c, 0),
+ make_tuple(7, 6, &vpx_sub_pixel_variance128x64_c, 0),
+ make_tuple(6, 7, &vpx_sub_pixel_variance64x128_c, 0),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(6, 6, &vpx_sub_pixel_variance64x64_c, 0),
make_tuple(6, 5, &vpx_sub_pixel_variance64x32_c, 0),
make_tuple(5, 6, &vpx_sub_pixel_variance32x64_c, 0),
make_tuple(5, 5, &vpx_sub_pixel_variance32x32_c, 0),
@@ -791,7 +803,13 @@
INSTANTIATE_TEST_CASE_P(
C, VpxSubpelAvgVarianceTest,
- ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_avg_variance64x64_c, 0),
+ ::testing::Values(
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(7, 7, &vpx_sub_pixel_avg_variance128x128_c, 0),
+ make_tuple(7, 6, &vpx_sub_pixel_avg_variance128x64_c, 0),
+ make_tuple(6, 7, &vpx_sub_pixel_avg_variance64x128_c, 0),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(6, 6, &vpx_sub_pixel_avg_variance64x64_c, 0),
make_tuple(6, 5, &vpx_sub_pixel_avg_variance64x32_c, 0),
make_tuple(5, 6, &vpx_sub_pixel_avg_variance32x64_c, 0),
make_tuple(5, 5, &vpx_sub_pixel_avg_variance32x32_c, 0),
@@ -841,7 +859,13 @@
INSTANTIATE_TEST_CASE_P(
C, VpxHBDVarianceTest,
- ::testing::Values(make_tuple(6, 6, &vpx_highbd_12_variance64x64_c, 12),
+ ::testing::Values(
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(7, 7, &vpx_highbd_12_variance128x128_c, 12),
+ make_tuple(7, 6, &vpx_highbd_12_variance128x64_c, 12),
+ make_tuple(6, 7, &vpx_highbd_12_variance64x128_c, 12),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(6, 6, &vpx_highbd_12_variance64x64_c, 12),
make_tuple(6, 5, &vpx_highbd_12_variance64x32_c, 12),
make_tuple(5, 6, &vpx_highbd_12_variance32x64_c, 12),
make_tuple(5, 5, &vpx_highbd_12_variance32x32_c, 12),
@@ -854,6 +878,11 @@
make_tuple(3, 2, &vpx_highbd_12_variance8x4_c, 12),
make_tuple(2, 3, &vpx_highbd_12_variance4x8_c, 12),
make_tuple(2, 2, &vpx_highbd_12_variance4x4_c, 12),
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(7, 7, &vpx_highbd_10_variance128x128_c, 10),
+ make_tuple(7, 6, &vpx_highbd_10_variance128x64_c, 10),
+ make_tuple(6, 7, &vpx_highbd_10_variance64x128_c, 10),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(6, 6, &vpx_highbd_10_variance64x64_c, 10),
make_tuple(6, 5, &vpx_highbd_10_variance64x32_c, 10),
make_tuple(5, 6, &vpx_highbd_10_variance32x64_c, 10),
@@ -867,6 +896,11 @@
make_tuple(3, 2, &vpx_highbd_10_variance8x4_c, 10),
make_tuple(2, 3, &vpx_highbd_10_variance4x8_c, 10),
make_tuple(2, 2, &vpx_highbd_10_variance4x4_c, 10),
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(7, 7, &vpx_highbd_8_variance128x128_c, 8),
+ make_tuple(7, 6, &vpx_highbd_8_variance128x64_c, 8),
+ make_tuple(6, 7, &vpx_highbd_8_variance64x128_c, 8),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(6, 6, &vpx_highbd_8_variance64x64_c, 8),
make_tuple(6, 5, &vpx_highbd_8_variance64x32_c, 8),
make_tuple(5, 6, &vpx_highbd_8_variance32x64_c, 8),
@@ -884,6 +918,11 @@
INSTANTIATE_TEST_CASE_P(
C, VpxHBDSubpelVarianceTest,
::testing::Values(
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(7, 7, &vpx_highbd_8_sub_pixel_variance128x128_c, 8),
+ make_tuple(7, 6, &vpx_highbd_8_sub_pixel_variance128x64_c, 8),
+ make_tuple(6, 7, &vpx_highbd_8_sub_pixel_variance64x128_c, 8),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(6, 6, &vpx_highbd_8_sub_pixel_variance64x64_c, 8),
make_tuple(6, 5, &vpx_highbd_8_sub_pixel_variance64x32_c, 8),
make_tuple(5, 6, &vpx_highbd_8_sub_pixel_variance32x64_c, 8),
@@ -897,6 +936,11 @@
make_tuple(3, 2, &vpx_highbd_8_sub_pixel_variance8x4_c, 8),
make_tuple(2, 3, &vpx_highbd_8_sub_pixel_variance4x8_c, 8),
make_tuple(2, 2, &vpx_highbd_8_sub_pixel_variance4x4_c, 8),
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(7, 7, &vpx_highbd_10_sub_pixel_variance128x128_c, 10),
+ make_tuple(7, 6, &vpx_highbd_10_sub_pixel_variance128x64_c, 10),
+ make_tuple(6, 7, &vpx_highbd_10_sub_pixel_variance64x128_c, 10),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(6, 6, &vpx_highbd_10_sub_pixel_variance64x64_c, 10),
make_tuple(6, 5, &vpx_highbd_10_sub_pixel_variance64x32_c, 10),
make_tuple(5, 6, &vpx_highbd_10_sub_pixel_variance32x64_c, 10),
@@ -910,6 +954,11 @@
make_tuple(3, 2, &vpx_highbd_10_sub_pixel_variance8x4_c, 10),
make_tuple(2, 3, &vpx_highbd_10_sub_pixel_variance4x8_c, 10),
make_tuple(2, 2, &vpx_highbd_10_sub_pixel_variance4x4_c, 10),
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(7, 7, &vpx_highbd_12_sub_pixel_variance128x128_c, 12),
+ make_tuple(7, 6, &vpx_highbd_12_sub_pixel_variance128x64_c, 12),
+ make_tuple(6, 7, &vpx_highbd_12_sub_pixel_variance64x128_c, 12),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(6, 6, &vpx_highbd_12_sub_pixel_variance64x64_c, 12),
make_tuple(6, 5, &vpx_highbd_12_sub_pixel_variance64x32_c, 12),
make_tuple(5, 6, &vpx_highbd_12_sub_pixel_variance32x64_c, 12),
@@ -927,6 +976,11 @@
INSTANTIATE_TEST_CASE_P(
C, VpxHBDSubpelAvgVarianceTest,
::testing::Values(
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(7, 7, &vpx_highbd_8_sub_pixel_avg_variance128x128_c, 8),
+ make_tuple(7, 6, &vpx_highbd_8_sub_pixel_avg_variance128x64_c, 8),
+ make_tuple(6, 7, &vpx_highbd_8_sub_pixel_avg_variance64x128_c, 8),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(6, 6, &vpx_highbd_8_sub_pixel_avg_variance64x64_c, 8),
make_tuple(6, 5, &vpx_highbd_8_sub_pixel_avg_variance64x32_c, 8),
make_tuple(5, 6, &vpx_highbd_8_sub_pixel_avg_variance32x64_c, 8),
@@ -940,6 +994,11 @@
make_tuple(3, 2, &vpx_highbd_8_sub_pixel_avg_variance8x4_c, 8),
make_tuple(2, 3, &vpx_highbd_8_sub_pixel_avg_variance4x8_c, 8),
make_tuple(2, 2, &vpx_highbd_8_sub_pixel_avg_variance4x4_c, 8),
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(7, 7, &vpx_highbd_10_sub_pixel_avg_variance128x128_c, 10),
+ make_tuple(7, 6, &vpx_highbd_10_sub_pixel_avg_variance128x64_c, 10),
+ make_tuple(6, 7, &vpx_highbd_10_sub_pixel_avg_variance64x128_c, 10),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(6, 6, &vpx_highbd_10_sub_pixel_avg_variance64x64_c, 10),
make_tuple(6, 5, &vpx_highbd_10_sub_pixel_avg_variance64x32_c, 10),
make_tuple(5, 6, &vpx_highbd_10_sub_pixel_avg_variance32x64_c, 10),
@@ -953,6 +1012,11 @@
make_tuple(3, 2, &vpx_highbd_10_sub_pixel_avg_variance8x4_c, 10),
make_tuple(2, 3, &vpx_highbd_10_sub_pixel_avg_variance4x8_c, 10),
make_tuple(2, 2, &vpx_highbd_10_sub_pixel_avg_variance4x4_c, 10),
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+ make_tuple(7, 7, &vpx_highbd_12_sub_pixel_avg_variance128x128_c, 12),
+ make_tuple(7, 6, &vpx_highbd_12_sub_pixel_avg_variance128x64_c, 12),
+ make_tuple(6, 7, &vpx_highbd_12_sub_pixel_avg_variance64x128_c, 12),
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple(6, 6, &vpx_highbd_12_sub_pixel_avg_variance64x64_c, 12),
make_tuple(6, 5, &vpx_highbd_12_sub_pixel_avg_variance64x32_c, 12),
make_tuple(5, 6, &vpx_highbd_12_sub_pixel_avg_variance32x64_c, 12),
diff --git a/vpx_dsp/sad.c b/vpx_dsp/sad.c
index 204cede..c500206 100644
--- a/vpx_dsp/sad.c
+++ b/vpx_dsp/sad.c
@@ -108,6 +108,22 @@
sad_array[i] = vpx_sad##m##x##n##_c(src, src_stride, ref_array[i], ref_stride); \
}
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+// 128x128
+sadMxN(128, 128)
+sadMxNxK(128, 128, 3)
+sadMxNxK(128, 128, 8)
+sadMxNx4D(128, 128)
+
+// 128x64
+sadMxN(128, 64)
+sadMxNx4D(128, 64)
+
+// 64x128
+sadMxN(64, 128)
+sadMxNx4D(64, 128)
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
+
// 64x64
sadMxN(64, 64)
sadMxNxK(64, 64, 3)
@@ -247,6 +263,22 @@
} \
}
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+// 128x128
+highbd_sadMxN(128, 128)
+highbd_sadMxNxK(128, 128, 3)
+highbd_sadMxNxK(128, 128, 8)
+highbd_sadMxNx4D(128, 128)
+
+// 128x64
+highbd_sadMxN(128, 64)
+highbd_sadMxNx4D(128, 64)
+
+// 64x128
+highbd_sadMxN(64, 128)
+highbd_sadMxNx4D(64, 128)
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
+
// 64x64
highbd_sadMxN(64, 64)
highbd_sadMxNxK(64, 64, 3)
diff --git a/vpx_dsp/variance.c b/vpx_dsp/variance.c
index 14d7f99..169769a 100644
--- a/vpx_dsp/variance.c
+++ b/vpx_dsp/variance.c
@@ -17,17 +17,6 @@
#include "vpx_dsp/variance.h"
#include "vpx_dsp/vpx_filter.h"
-const uint8_t vpx_bilinear_filters[BIL_SUBPEL_SHIFTS][2] = {
- { 128, 0 },
- { 112, 16 },
- { 96, 32 },
- { 80, 48 },
- { 64, 64 },
- { 48, 80 },
- { 32, 96 },
- { 16, 112 },
-};
-
uint32_t vpx_get4x4sse_cs_c(const uint8_t *a, int a_stride,
const uint8_t *b, int b_stride) {
int distortion = 0;
@@ -176,9 +165,9 @@
uint8_t temp2[H * W]; \
\
var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
- vpx_bilinear_filters[xoffset]); \
+ bilinear_filters_2t[xoffset]); \
var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- vpx_bilinear_filters[yoffset]); \
+ bilinear_filters_2t[yoffset]); \
\
return vpx_variance##W##x##H##_c(temp2, W, b, b_stride, sse); \
}
@@ -196,9 +185,9 @@
DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
\
var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
- vpx_bilinear_filters[xoffset]); \
+ bilinear_filters_2t[xoffset]); \
var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- vpx_bilinear_filters[yoffset]); \
+ bilinear_filters_2t[yoffset]); \
\
vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
\
@@ -235,6 +224,11 @@
SUBPIX_VAR(W, H) \
SUBPIX_AVG_VAR(W, H)
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+VARIANCES(128, 128)
+VARIANCES(128, 64)
+VARIANCES(64, 128)
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
VARIANCES(64, 64)
VARIANCES(64, 32)
VARIANCES(32, 64)
@@ -501,9 +495,9 @@
uint16_t temp2[H * W]; \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, vpx_bilinear_filters[xoffset]); \
+ W, bilinear_filters_2t[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- vpx_bilinear_filters[yoffset]); \
+ bilinear_filters_2t[yoffset]); \
\
return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
dst_stride, sse); \
@@ -518,9 +512,9 @@
uint16_t temp2[H * W]; \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, vpx_bilinear_filters[xoffset]); \
+ W, bilinear_filters_2t[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- vpx_bilinear_filters[yoffset]); \
+ bilinear_filters_2t[yoffset]); \
\
return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, sse); \
@@ -535,9 +529,9 @@
uint16_t temp2[H * W]; \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, vpx_bilinear_filters[xoffset]); \
+ W, bilinear_filters_2t[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- vpx_bilinear_filters[yoffset]); \
+ bilinear_filters_2t[yoffset]); \
\
return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, sse); \
@@ -555,9 +549,9 @@
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, vpx_bilinear_filters[xoffset]); \
+ W, bilinear_filters_2t[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- vpx_bilinear_filters[yoffset]); \
+ bilinear_filters_2t[yoffset]); \
\
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
@@ -577,9 +571,9 @@
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, vpx_bilinear_filters[xoffset]); \
+ W, bilinear_filters_2t[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- vpx_bilinear_filters[yoffset]); \
+ bilinear_filters_2t[yoffset]); \
\
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
@@ -599,9 +593,9 @@
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, vpx_bilinear_filters[xoffset]); \
+ W, bilinear_filters_2t[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- vpx_bilinear_filters[yoffset]); \
+ bilinear_filters_2t[yoffset]); \
\
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
@@ -616,6 +610,11 @@
HIGHBD_SUBPIX_VAR(W, H) \
HIGHBD_SUBPIX_AVG_VAR(W, H)
+#if CONFIG_VP10 && CONFIG_EXT_PARTITION
+HIGHBD_VARIANCES(128, 128)
+HIGHBD_VARIANCES(128, 64)
+HIGHBD_VARIANCES(64, 128)
+#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
HIGHBD_VARIANCES(64, 64)
HIGHBD_VARIANCES(64, 32)
HIGHBD_VARIANCES(32, 64)
@@ -677,8 +676,9 @@
b += b_stride;
m += m_stride;
}
- *sum = (sum64 >= 0) ? ((sum64 + 31) >> 6) : -((-sum64 + 31) >> 6);
- *sse = (sse64 + 2047) >> 12;
+ sum64 = (sum64 >= 0) ? sum64 : -sum64;
+ *sum = ROUND_POWER_OF_TWO(sum64, 6);
+ *sse = ROUND_POWER_OF_TWO(sse64, 12);
}
#define MASK_VAR(W, H) \
@@ -702,9 +702,9 @@
uint8_t temp2[H * W]; \
\
var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
- vpx_bilinear_filters[xoffset]); \
+ bilinear_filters_2t[xoffset]); \
var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- vpx_bilinear_filters[yoffset]); \
+ bilinear_filters_2t[yoffset]); \
\
return vpx_masked_variance##W##x##H##_c(temp2, W, dst, dst_stride, \
msk, msk_stride, sse); \
@@ -765,27 +765,28 @@
const uint8_t *b8, int b_stride,
const uint8_t *m, int m_stride,
int w, int h,
- uint64_t *sse64, int *sum) {
+ uint64_t *sse, int64_t *sum) {
int i, j;
uint16_t *a = CONVERT_TO_SHORTPTR(a8);
uint16_t *b = CONVERT_TO_SHORTPTR(b8);
- int64_t sum64 = 0;
- *sse64 = 0;
+ *sum = 0;
+ *sse = 0;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
const int diff = (a[j] - b[j]) * (m[j]);
- sum64 += diff;
- *sse64 += (int64_t)diff * diff;
+ *sum += (int64_t)diff;
+ *sse += (int64_t)diff * diff;
}
a += a_stride;
b += b_stride;
m += m_stride;
}
- *sum = (sum64 >= 0) ? ((sum64 + 31) >> 6) : -((-sum64 + 31) >> 6);
- *sse64 = (*sse64 + 2047) >> 12;
+ *sum = (*sum >= 0) ? *sum : -*sum;
+ *sum = ROUND_POWER_OF_TWO(*sum, 6);
+ *sse = ROUND_POWER_OF_TWO(*sse, 12);
}
void highbd_masked_variance(const uint8_t *a8, int a_stride,
@@ -793,9 +794,11 @@
const uint8_t *m, int m_stride,
int w, int h,
unsigned int *sse, int *sum) {
+ int64_t sum64;
uint64_t sse64;
highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride,
- w, h, &sse64, sum);
+ w, h, &sse64, &sum64);
+ *sum = (int)sum64;
*sse = (unsigned int)sse64;
}
@@ -804,10 +807,11 @@
const uint8_t *m, int m_stride,
int w, int h,
unsigned int *sse, int *sum) {
+ int64_t sum64;
uint64_t sse64;
highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride,
- w, h, &sse64, sum);
- *sum = ROUND_POWER_OF_TWO(*sum, 2);
+ w, h, &sse64, &sum64);
+ *sum = (int)ROUND_POWER_OF_TWO(sum64, 2);
*sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 4);
}
@@ -816,10 +820,11 @@
const uint8_t *m, int m_stride,
int w, int h,
unsigned int *sse, int *sum) {
+ int64_t sum64;
uint64_t sse64;
highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride,
- w, h, &sse64, sum);
- *sum = ROUND_POWER_OF_TWO(*sum, 4);
+ w, h, &sse64, &sum64);
+ *sum = (int)ROUND_POWER_OF_TWO(sum64, 4);
*sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8);
}
@@ -875,9 +880,9 @@
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \
H + 1, W, \
- vpx_bilinear_filters[xoffset]); \
+ bilinear_filters_2t[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- vpx_bilinear_filters[yoffset]); \
+ bilinear_filters_2t[yoffset]); \
\
return vpx_highbd_masked_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, \
@@ -895,9 +900,9 @@
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \
H + 1, W, \
- vpx_bilinear_filters[xoffset]); \
+ bilinear_filters_2t[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- vpx_bilinear_filters[yoffset]); \
+ bilinear_filters_2t[yoffset]); \
\
return vpx_highbd_10_masked_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, \
@@ -915,9 +920,9 @@
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \
H + 1, W, \
- vpx_bilinear_filters[xoffset]); \
+ bilinear_filters_2t[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- vpx_bilinear_filters[yoffset]); \
+ bilinear_filters_2t[yoffset]); \
\
return vpx_highbd_12_masked_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, \
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index fdfd20c..2ce0b99 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -50,6 +50,19 @@
$avx2_x86_64 = 'avx2';
}
+if (vpx_config("CONFIG_EXT_PARTITION") eq "yes") {
+ @block_widths = (4, 8, 16, 32, 64, 128)
+} else {
+ @block_widths = (4, 8, 16, 32, 64)
+}
+
+@block_sizes = ();
+foreach $w (@block_widths) {
+ foreach $h (@block_widths) {
+ push @block_sizes, [$w, $h] if ($w <= 2*$h && $h <= 2*$w) ;
+ }
+}
+
#
# Intra prediction
#
@@ -960,69 +973,43 @@
#
add_proto qw/void vpx_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
specialize qw/vpx_subtract_block neon msa/, "$sse2_x86inc";
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ add_proto qw/void vpx_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd";
+ specialize qw/vpx_highbd_subtract_block/;
+}
if (vpx_config("CONFIG_VP10_ENCODER") eq "yes") {
-#
-# Sum of Squares
-#
+ #
+ # Sum of Squares
+ #
add_proto qw/uint64_t vpx_sum_squares_2d_i16/, "const int16_t *src, int stride, int size";
specialize qw/vpx_sum_squares_2d_i16 sse2/;
}
-#
-# Single block SAD
-#
-add_proto qw/unsigned int vpx_sad64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad64x64 avx2 neon msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad64x32 avx2 msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad32x64 avx2 msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad32x32 avx2 neon msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad32x16 avx2 msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad16x32 msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad16x16 mmx media neon msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad16x8 mmx neon msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad8x16 mmx neon msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad8x8 mmx neon msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad8x4 msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad4x8 msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad4x4 mmx neon msa/, "$sse2_x86inc";
-
-#
-# Avg
-#
if ((vpx_config("CONFIG_VP9_ENCODER") eq "yes") || (vpx_config("CONFIG_VP10_ENCODER") eq "yes")) {
+ #
+ # Avg
+ #
add_proto qw/unsigned int vpx_avg_8x8/, "const uint8_t *, int p";
specialize qw/vpx_avg_8x8 sse2 neon msa/;
-
add_proto qw/unsigned int vpx_avg_4x4/, "const uint8_t *, int p";
specialize qw/vpx_avg_4x4 sse2 neon msa/;
+ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ add_proto qw/unsigned int vpx_highbd_avg_8x8/, "const uint8_t *, int p";
+ specialize qw/vpx_highbd_avg_8x8/;
+ add_proto qw/unsigned int vpx_highbd_avg_4x4/, "const uint8_t *, int p";
+ specialize qw/vpx_highbd_avg_4x4/;
+ }
+ #
+ # Minmax
+ #
add_proto qw/void vpx_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
specialize qw/vpx_minmax_8x8 sse2/;
+ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ add_proto qw/void vpx_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
+ specialize qw/vpx_highbd_minmax_8x8/;
+ }
add_proto qw/void vpx_hadamard_8x8/, "int16_t const *src_diff, int src_stride, int16_t *coeff";
specialize qw/vpx_hadamard_8x8 sse2/, "$ssse3_x86_64_x86inc";
@@ -1043,575 +1030,217 @@
specialize qw/vpx_vector_var neon sse2/;
} # CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
-add_proto qw/unsigned int vpx_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+#
+# Single block SAD / Single block Avg SAD
+#
+foreach (@block_sizes) {
+ ($w, $h) = @$_;
+ add_proto qw/unsigned int/, "vpx_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ add_proto qw/unsigned int/, "vpx_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+}
+
+specialize qw/vpx_sad64x64 avx2 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad64x32 avx2 msa/, "$sse2_x86inc";
+specialize qw/vpx_sad32x64 avx2 msa/, "$sse2_x86inc";
+specialize qw/vpx_sad32x32 avx2 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad32x16 avx2 msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x32 msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x16 mmx media neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x8 mmx neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x16 mmx neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x8 mmx neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x4 msa/, "$sse2_x86inc";
+specialize qw/vpx_sad4x8 msa/, "$sse2_x86inc";
+specialize qw/vpx_sad4x4 mmx neon msa/, "$sse2_x86inc";
+
specialize qw/vpx_sad64x64_avg avx2 msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad64x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_sad64x32_avg avx2 msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad32x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_sad32x64_avg avx2 msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad32x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_sad32x32_avg avx2 msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad32x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_sad32x16_avg avx2 msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x32_avg msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x16_avg msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x8_avg msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x16_avg msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x8_avg msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x4_avg msa/, "$sse2_x86inc";
+specialize qw/vpx_sad4x8_avg msa/, "$sse2_x86inc";
+specialize qw/vpx_sad4x4_avg msa/, "$sse2_x86inc";
-add_proto qw/unsigned int vpx_sad16x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad16x32_avg msa/, "$sse2_x86inc";
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ foreach (@block_sizes) {
+ ($w, $h) = @$_;
+ add_proto qw/unsigned int/, "vpx_highbd_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ add_proto qw/unsigned int/, "vpx_highbd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+ if ($w != 128 && $h != 128 && $w != 4) {
+ specialize "vpx_highbd_sad${w}x${h}", "$sse2_x86inc";
+ specialize "vpx_highbd_sad${w}x${h}_avg", "$sse2_x86inc";
+ }
+ }
+}
-add_proto qw/unsigned int vpx_sad16x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad16x16_avg msa/, "$sse2_x86inc";
+#
+# Masked SAD
+#
+if (vpx_config("CONFIG_EXT_INTER") eq "yes") {
+ foreach (@block_sizes) {
+ ($w, $h) = @$_;
+ add_proto qw/unsigned int/, "vpx_masked_sad${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
+ specialize "vpx_masked_sad${w}x${h}", qw/ssse3/;
+ }
-add_proto qw/unsigned int vpx_sad16x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad16x8_avg msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad8x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad8x16_avg msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad8x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad8x8_avg msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad8x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad8x4_avg msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad4x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad4x8_avg msa/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vpx_sad4x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad4x4_avg msa/, "$sse2_x86inc";
+ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ foreach (@block_sizes) {
+ ($w, $h) = @$_;
+ add_proto qw/unsigned int/, "vpx_highbd_masked_sad${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
+ specialize "vpx_highbd_masked_sad${w}x${h}", qw/ssse3/;
+ }
+ }
+}
#
# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
#
# Blocks of 3
-add_proto qw/void vpx_sad64x64x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad64x64x3 msa/;
-
-add_proto qw/void vpx_sad32x32x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad32x32x3 msa/;
-
-add_proto qw/void vpx_sad16x16x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+foreach $s (@block_widths) {
+ add_proto qw/void/, "vpx_sad${s}x${s}x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+}
+specialize qw/vpx_sad64x64x3 msa/;
+specialize qw/vpx_sad32x32x3 msa/;
specialize qw/vpx_sad16x16x3 sse3 ssse3 msa/;
+specialize qw/vpx_sad8x8x3 sse3 msa/;
+specialize qw/vpx_sad4x4x3 sse3 msa/;
-add_proto qw/void vpx_sad16x8x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+add_proto qw/void/, "vpx_sad16x8x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad16x8x3 sse3 ssse3 msa/;
-
-add_proto qw/void vpx_sad8x16x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+add_proto qw/void/, "vpx_sad8x16x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad8x16x3 sse3 msa/;
-add_proto qw/void vpx_sad8x8x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad8x8x3 sse3 msa/;
-
-add_proto qw/void vpx_sad4x4x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad4x4x3 sse3 msa/;
-
# Blocks of 8
-add_proto qw/void vpx_sad64x64x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad64x64x8 msa/;
-
-add_proto qw/void vpx_sad32x32x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad32x32x8 msa/;
-
-add_proto qw/void vpx_sad16x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+foreach $s (@block_widths) {
+ add_proto qw/void/, "vpx_sad${s}x${s}x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+}
+specialize qw/vpx_sad64x64x8 msa/;
+specialize qw/vpx_sad32x32x8 msa/;
specialize qw/vpx_sad16x16x8 sse4_1 msa/;
+specialize qw/vpx_sad8x8x8 sse4_1 msa/;
+specialize qw/vpx_sad4x4x8 sse4_1 msa/;
-add_proto qw/void vpx_sad16x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+add_proto qw/void/, "vpx_sad16x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad16x8x8 sse4_1 msa/;
-
-add_proto qw/void vpx_sad8x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+add_proto qw/void/, "vpx_sad8x16x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad8x16x8 sse4_1 msa/;
-
-add_proto qw/void vpx_sad8x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad8x8x8 sse4_1 msa/;
-
-add_proto qw/void vpx_sad8x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+add_proto qw/void/, "vpx_sad8x4x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad8x4x8 msa/;
-
-add_proto qw/void vpx_sad4x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+add_proto qw/void/, "vpx_sad4x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad4x8x8 msa/;
-add_proto qw/void vpx_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad4x4x8 sse4_1 msa/;
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ foreach $s (@block_widths) {
+ # Blocks of 3
+ add_proto qw/void/, "vpx_highbd_sad${s}x${s}x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ # Blocks of 8
+ add_proto qw/void/, "vpx_highbd_sad${s}x${s}x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ }
+ # Blocks of 3
+ add_proto qw/void/, "vpx_highbd_sad16x8x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ add_proto qw/void/, "vpx_highbd_sad8x16x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ # Blocks of 8
+ add_proto qw/void/, "vpx_highbd_sad16x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ add_proto qw/void/, "vpx_highbd_sad8x16x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ add_proto qw/void/, "vpx_highbd_sad8x4x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ add_proto qw/void/, "vpx_highbd_sad4x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+}
#
# Multi-block SAD, comparing a reference to N independent blocks
#
-add_proto qw/void vpx_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
+foreach (@block_sizes) {
+ ($w, $h) = @$_;
+ add_proto qw/void/, "vpx_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
+}
specialize qw/vpx_sad64x64x4d avx2 neon msa/, "$sse2_x86inc";
-
-add_proto qw/void vpx_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad64x32x4d msa/, "$sse2_x86inc";
-
-add_proto qw/void vpx_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad32x64x4d msa/, "$sse2_x86inc";
-
-add_proto qw/void vpx_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
+specialize qw/vpx_sad64x32x4d msa/, "$sse2_x86inc";
+specialize qw/vpx_sad32x64x4d msa/, "$sse2_x86inc";
specialize qw/vpx_sad32x32x4d avx2 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad32x16x4d msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x32x4d msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x16x4d neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x8x4d msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x16x4d msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x8x4d msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x4x4d msa/, "$sse2_x86inc";
+specialize qw/vpx_sad4x8x4d msa/, "$sse2_x86inc";
+specialize qw/vpx_sad4x4x4d msa/, "$sse2_x86inc";
-add_proto qw/void vpx_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad32x16x4d msa/, "$sse2_x86inc";
-
-add_proto qw/void vpx_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad16x32x4d msa/, "$sse2_x86inc";
-
-add_proto qw/void vpx_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad16x16x4d neon msa/, "$sse2_x86inc";
-
-add_proto qw/void vpx_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad16x8x4d msa/, "$sse2_x86inc";
-
-add_proto qw/void vpx_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad8x16x4d msa/, "$sse2_x86inc";
-
-add_proto qw/void vpx_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad8x8x4d msa/, "$sse2_x86inc";
-
-add_proto qw/void vpx_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad8x4x4d msa/, "$sse2_x86inc";
-
-add_proto qw/void vpx_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad4x8x4d msa/, "$sse2_x86inc";
-
-add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad4x4x4d msa/, "$sse2_x86inc";
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ #
+ # Multi-block SAD, comparing a reference to N independent blocks
+ #
+ foreach (@block_sizes) {
+ ($w, $h) = @$_;
+ add_proto qw/void/, "vpx_highbd_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
+ if ($w != 128 && $h != 128) {
+ specialize "vpx_highbd_sad${w}x${h}x4d", "$sse2_x86inc";
+ }
+ }
+}
#
# Structured Similarity (SSIM)
#
if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
- add_proto qw/void vpx_ssim_parms_8x8/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
- specialize qw/vpx_ssim_parms_8x8/, "$sse2_x86_64";
+ add_proto qw/void vpx_ssim_parms_8x8/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
+ specialize qw/vpx_ssim_parms_8x8/, "$sse2_x86_64";
- add_proto qw/void vpx_ssim_parms_16x16/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
- specialize qw/vpx_ssim_parms_16x16/, "$sse2_x86_64";
-}
+ add_proto qw/void vpx_ssim_parms_16x16/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
+ specialize qw/vpx_ssim_parms_16x16/, "$sse2_x86_64";
-if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
- #
- # Block subtraction
- #
- add_proto qw/void vpx_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd";
- specialize qw/vpx_highbd_subtract_block/;
-
- #
- # Single block SAD
- #
- add_proto qw/unsigned int vpx_highbd_sad64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad64x64/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad64x32/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad32x64/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad32x32/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad32x16/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad16x32/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad16x16/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad16x8/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad8x16/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad8x8/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad8x4/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad4x8/;
-
- add_proto qw/unsigned int vpx_highbd_sad4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad4x4/;
-
- #
- # Avg
- #
- add_proto qw/unsigned int vpx_highbd_avg_8x8/, "const uint8_t *, int p";
- specialize qw/vpx_highbd_avg_8x8/;
- add_proto qw/unsigned int vpx_highbd_avg_4x4/, "const uint8_t *, int p";
- specialize qw/vpx_highbd_avg_4x4/;
- add_proto qw/void vpx_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
- specialize qw/vpx_highbd_minmax_8x8/;
-
- add_proto qw/unsigned int vpx_highbd_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad64x64_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad64x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad64x32_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad32x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad32x64_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad32x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad32x32_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad32x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad32x16_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad16x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad16x32_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad16x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad16x16_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad16x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad16x8_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad8x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad8x16_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad8x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad8x8_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad8x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad8x4_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vpx_highbd_sad4x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad4x8_avg/;
-
- add_proto qw/unsigned int vpx_highbd_sad4x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad4x4_avg/;
-
- #
- # Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
- #
- # Blocks of 3
- add_proto qw/void vpx_highbd_sad64x64x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad64x64x3/;
-
- add_proto qw/void vpx_highbd_sad32x32x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad32x32x3/;
-
- add_proto qw/void vpx_highbd_sad16x16x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad16x16x3/;
-
- add_proto qw/void vpx_highbd_sad16x8x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad16x8x3/;
-
- add_proto qw/void vpx_highbd_sad8x16x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad8x16x3/;
-
- add_proto qw/void vpx_highbd_sad8x8x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad8x8x3/;
-
- add_proto qw/void vpx_highbd_sad4x4x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad4x4x3/;
-
- # Blocks of 8
- add_proto qw/void vpx_highbd_sad64x64x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad64x64x8/;
-
- add_proto qw/void vpx_highbd_sad32x32x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad32x32x8/;
-
- add_proto qw/void vpx_highbd_sad16x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad16x16x8/;
-
- add_proto qw/void vpx_highbd_sad16x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad16x8x8/;
-
- add_proto qw/void vpx_highbd_sad8x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad8x16x8/;
-
- add_proto qw/void vpx_highbd_sad8x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad8x8x8/;
-
- add_proto qw/void vpx_highbd_sad8x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad8x4x8/;
-
- add_proto qw/void vpx_highbd_sad4x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad4x8x8/;
-
- add_proto qw/void vpx_highbd_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad4x4x8/;
-
- #
- # Multi-block SAD, comparing a reference to N independent blocks
- #
- add_proto qw/void vpx_highbd_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad64x64x4d/, "$sse2_x86inc";
-
- add_proto qw/void vpx_highbd_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad64x32x4d/, "$sse2_x86inc";
-
- add_proto qw/void vpx_highbd_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad32x64x4d/, "$sse2_x86inc";
-
- add_proto qw/void vpx_highbd_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad32x32x4d/, "$sse2_x86inc";
-
- add_proto qw/void vpx_highbd_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad32x16x4d/, "$sse2_x86inc";
-
- add_proto qw/void vpx_highbd_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad16x32x4d/, "$sse2_x86inc";
-
- add_proto qw/void vpx_highbd_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad16x16x4d/, "$sse2_x86inc";
-
- add_proto qw/void vpx_highbd_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad16x8x4d/, "$sse2_x86inc";
-
- add_proto qw/void vpx_highbd_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad8x16x4d/, "$sse2_x86inc";
-
- add_proto qw/void vpx_highbd_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad8x8x4d/, "$sse2_x86inc";
-
- add_proto qw/void vpx_highbd_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad8x4x4d/, "$sse2_x86inc";
-
- add_proto qw/void vpx_highbd_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad4x8x4d/, "$sse2_x86inc";
-
- add_proto qw/void vpx_highbd_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad4x4x4d/, "$sse2_x86inc";
-
- #
- # Structured Similarity (SSIM)
- #
- if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
+ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vpx_highbd_ssim_parms_8x8/, "const uint16_t *s, int sp, const uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
- specialize qw/vpx_highbd_ssim_parms_8x8/;
}
-} # CONFIG_VP9_HIGHBITDEPTH
+}
} # CONFIG_ENCODERS
if (vpx_config("CONFIG_ENCODERS") eq "yes" || vpx_config("CONFIG_POSTPROC") eq "yes" || vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
#
-# Variance
-#
-add_proto qw/unsigned int vpx_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance64x64 sse2 avx2 neon msa/;
-
-add_proto qw/unsigned int vpx_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance64x32 sse2 avx2 neon msa/;
-
-add_proto qw/unsigned int vpx_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance32x64 sse2 neon msa/;
-
-add_proto qw/unsigned int vpx_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance32x32 sse2 avx2 neon msa/;
-
-add_proto qw/unsigned int vpx_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance32x16 sse2 avx2 msa/;
-
-add_proto qw/unsigned int vpx_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance16x32 sse2 msa/;
-
-add_proto qw/unsigned int vpx_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance16x16 mmx sse2 avx2 media neon msa/;
-
-add_proto qw/unsigned int vpx_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance16x8 mmx sse2 neon msa/;
-
-add_proto qw/unsigned int vpx_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance8x16 mmx sse2 neon msa/;
-
-add_proto qw/unsigned int vpx_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance8x8 mmx sse2 media neon msa/;
-
-add_proto qw/unsigned int vpx_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance8x4 sse2 msa/;
-
-add_proto qw/unsigned int vpx_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance4x8 sse2 msa/;
-
-add_proto qw/unsigned int vpx_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance4x4 mmx sse2 msa/;
-
-#
# Specialty Variance
#
add_proto qw/void vpx_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- specialize qw/vpx_get16x16var sse2 avx2 neon msa/;
-
add_proto qw/void vpx_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- specialize qw/vpx_get8x8var mmx sse2 neon msa/;
+
+specialize qw/vpx_get16x16var avx2 sse2 neon msa/;
+specialize qw/vpx_get8x8var mmx sse2 neon msa/;
add_proto qw/unsigned int vpx_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_mse16x16 mmx sse2 avx2 media neon msa/;
-
add_proto qw/unsigned int vpx_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_mse16x8 sse2 msa/;
-
add_proto qw/unsigned int vpx_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_mse8x16 sse2 msa/;
-
add_proto qw/unsigned int vpx_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_mse8x8 sse2 msa/;
-add_proto qw/unsigned int vpx_get_mb_ss/, "const int16_t *";
- specialize qw/vpx_get_mb_ss mmx sse2 msa/;
+specialize qw/vpx_mse16x16 mmx avx2 sse2 media neon msa/;
+specialize qw/vpx_mse16x8 sse2 msa/;
+specialize qw/vpx_mse8x16 sse2 msa/;
+specialize qw/vpx_mse8x8 sse2 msa/;
-add_proto qw/unsigned int vpx_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride";
- specialize qw/vpx_get4x4sse_cs neon msa/;
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ foreach $bd (8, 10, 12) {
+ add_proto qw/void/, "vpx_highbd_${bd}_get16x16var", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+ add_proto qw/void/, "vpx_highbd_${bd}_get8x8var", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-add_proto qw/void vpx_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
+ add_proto qw/unsigned int/, "vpx_highbd_${bd}_mse16x16", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ add_proto qw/unsigned int/, "vpx_highbd_${bd}_mse16x8", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ add_proto qw/unsigned int/, "vpx_highbd_${bd}_mse8x16", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ add_proto qw/unsigned int/, "vpx_highbd_${bd}_mse8x8", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
-if (vpx_config("CONFIG_EXT_INTER") eq "yes") {
- add_proto qw/unsigned int vpx_masked_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance32x16 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance16x32 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance64x32 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance32x64 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance32x32 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance64x64 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance16x16 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance16x8 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance8x16 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance8x8 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance8x4 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance4x8 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance4x4 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance64x64 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance32x64 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance64x32 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance32x16 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance16x32 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance32x32 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance16x16 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance8x16 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance16x8 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance8x8 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance8x4 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance4x8 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance4x4 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad64x64 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad32x64 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad64x32 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad32x16 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad16x32 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad32x32 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad16x16 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad16x8 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad8x16 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad8x8 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad8x4 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad4x8 ssse3/;
-
- add_proto qw/unsigned int vpx_masked_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad4x4 ssse3/;
-
- if (vpx_config("CONFIG_EXT_PARTITION") eq "yes") {
- add_proto qw/unsigned int vpx_masked_variance128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance128x128/;
-
- add_proto qw/unsigned int vpx_masked_variance128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masdctked_variance128x64/;
-
- add_proto qw/unsigned int vpx_masked_variance64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_variance64x128/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance128x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance128x128/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance128x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance128x64/;
-
- add_proto qw/unsigned int vpx_masked_sub_pixel_variance64x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_masked_sub_pixel_variance64x128/;
-
- add_proto qw/unsigned int vpx_masked_sad128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad128x128/;
-
- add_proto qw/unsigned int vpx_masked_sad128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad128x64/;
-
- add_proto qw/unsigned int vpx_masked_sad64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_masked_sad64x128/;
+ specialize "vpx_highbd_${bd}_mse16x16", qw/sse2/;
+ specialize "vpx_highbd_${bd}_mse8x8", qw/sse2/;
}
}
+#
+# ...
+#
if (vpx_config("CONFIG_AFFINE_MOTION") eq "yes") {
add_proto qw/void vpx_upsampled_pred/, "uint8_t *comp_pred, int width, int height, const uint8_t *ref, int ref_stride";
specialize qw/vpx_upsampled_pred sse2/;
@@ -1620,796 +1249,129 @@
}
#
-# Subpixel Variance
+# ...
#
-add_proto qw/uint32_t vpx_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance64x64 avx2 neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
+add_proto qw/unsigned int vpx_get_mb_ss/, "const int16_t *";
+add_proto qw/unsigned int vpx_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride";
-add_proto qw/uint32_t vpx_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance64x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_get_mb_ss mmx sse2 msa/;
+specialize qw/vpx_get4x4sse_cs neon msa/;
-add_proto qw/uint32_t vpx_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance32x64 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+#
+# Variance / Subpixel Variance / Subpixel Avg Variance
+#
+foreach (@block_sizes) {
+ ($w, $h) = @$_;
+ add_proto qw/unsigned int/, "vpx_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/uint32_t/, "vpx_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t/, "vpx_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+}
-add_proto qw/uint32_t vpx_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance32x32 avx2 neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_variance64x64 sse2 avx2 neon msa/;
+specialize qw/vpx_variance64x32 sse2 avx2 neon msa/;
+specialize qw/vpx_variance32x64 sse2 neon msa/;
+specialize qw/vpx_variance32x32 sse2 avx2 neon msa/;
+specialize qw/vpx_variance32x16 sse2 avx2 msa/;
+specialize qw/vpx_variance16x32 sse2 msa/;
+specialize qw/vpx_variance16x16 mmx sse2 avx2 media neon msa/;
+specialize qw/vpx_variance16x8 mmx sse2 neon msa/;
+specialize qw/vpx_variance8x16 mmx sse2 neon msa/;
+specialize qw/vpx_variance8x8 mmx sse2 media neon msa/;
+specialize qw/vpx_variance8x4 sse2 msa/;
+specialize qw/vpx_variance4x8 sse2 msa/;
+specialize qw/vpx_variance4x4 mmx sse2 msa/;
-add_proto qw/uint32_t vpx_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance32x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_variance64x64 avx2 neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_variance64x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_variance32x64 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_variance32x32 avx2 neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_variance32x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_variance16x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_variance16x16 mmx media neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_variance16x8 mmx msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_variance8x16 mmx msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_variance8x8 mmx media neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_variance8x4 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_variance4x8 msa/, "$sse_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_variance4x4 mmx msa/, "$sse_x86inc", "$ssse3_x86inc";
-add_proto qw/uint32_t vpx_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance16x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_avg_variance64x64 avx2 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_avg_variance64x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_avg_variance32x64 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_avg_variance32x32 avx2 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_avg_variance32x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_avg_variance16x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_avg_variance16x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_avg_variance16x8 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_avg_variance8x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_avg_variance8x8 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_avg_variance8x4 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_avg_variance4x8 msa/, "$sse_x86inc", "$ssse3_x86inc";
+specialize qw/vpx_sub_pixel_avg_variance4x4 msa/, "$sse_x86inc", "$ssse3_x86inc";
-add_proto qw/uint32_t vpx_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance16x16 mmx media neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ foreach $bd (8, 10, 12) {
+ foreach (@block_sizes) {
+ ($w, $h) = @$_;
+ add_proto qw/unsigned int/, "vpx_highbd_${bd}_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t/, "vpx_highbd_${bd}_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t/, "vpx_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ if ($w != 128 && $h != 128 && $w != 4 && $h != 4) {
+ specialize "vpx_highbd_${bd}_variance${w}x${h}", "sse2";
+ }
+ if ($w != 128 && $h != 128 && $w != 4) {
+ specialize "vpx_highbd_${bd}_sub_pixel_variance${w}x${h}", $sse2_x86inc;
+ specialize "vpx_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", $sse2_x86inc;
+ }
+ }
+ }
+} # CONFIG_VP9_HIGHBITDEPTH
-add_proto qw/uint32_t vpx_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance16x8 mmx msa/, "$sse2_x86inc", "$ssse3_x86inc";
+#
+# Masked Variance / Masked Subpixel Variance
+#
+if (vpx_config("CONFIG_EXT_INTER") eq "yes") {
+ foreach (@block_sizes) {
+ ($w, $h) = @$_;
+ add_proto qw/unsigned int/, "vpx_masked_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+ add_proto qw/unsigned int/, "vpx_masked_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+ specialize "vpx_masked_variance${w}x${h}", qw/ssse3/;
+ specialize "vpx_masked_sub_pixel_variance${w}x${h}", qw/ssse3/;
+ }
-add_proto qw/uint32_t vpx_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance8x16 mmx msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance8x8 mmx media neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance8x4 msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance4x8 msa/, "$sse_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance4x4 mmx msa/, "$sse_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance64x64 avx2 msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance64x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance32x64 msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance32x32 avx2 msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance32x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance16x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance16x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance16x8 msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance8x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance8x8 msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance8x4 msa/, "$sse2_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance4x8 msa/, "$sse_x86inc", "$ssse3_x86inc";
-
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance4x4 msa/, "$sse_x86inc", "$ssse3_x86inc";
+ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ foreach $bd ("_", "_10_", "_12_") {
+ foreach (@block_sizes) {
+ ($w, $h) = @$_;
+ add_proto qw/unsigned int/, "vpx_highbd${bd}masked_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+ add_proto qw/unsigned int/, "vpx_highbd${bd}masked_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+ specialize "vpx_highbd${bd}masked_variance${w}x${h}", qw/ssse3/;
+ specialize "vpx_highbd${bd}masked_sub_pixel_variance${w}x${h}", qw/ssse3/;
+ }
+ }
+ }
+}
#
# Specialty Subpixel
#
-add_proto qw/uint32_t vpx_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_variance_halfpixvar16x16_h mmx sse2 media/;
+add_proto qw/uint32_t vpx_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
+specialize qw/vpx_variance_halfpixvar16x16_h mmx sse2 media/;
-add_proto qw/uint32_t vpx_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_variance_halfpixvar16x16_v mmx sse2 media/;
+add_proto qw/uint32_t vpx_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
+specialize qw/vpx_variance_halfpixvar16x16_v mmx sse2 media/;
-add_proto qw/uint32_t vpx_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_variance_halfpixvar16x16_hv mmx sse2 media/;
+add_proto qw/uint32_t vpx_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
+specialize qw/vpx_variance_halfpixvar16x16_hv mmx sse2 media/;
+#
+# Comp Avg
+#
+add_proto qw/void vpx_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
- add_proto qw/unsigned int vpx_highbd_12_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_variance64x64 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_12_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_variance64x32 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_12_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_variance32x64 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_12_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_variance32x32 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_12_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_variance32x16 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_12_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_variance16x32 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_12_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_variance16x16 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_12_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_variance16x8 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_12_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_variance8x16 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_12_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_variance8x8 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_12_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_12_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_12_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-
- add_proto qw/unsigned int vpx_highbd_10_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_variance64x64 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_10_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_variance64x32 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_10_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_variance32x64 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_10_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_variance32x32 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_10_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_variance32x16 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_10_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_variance16x32 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_10_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_variance16x16 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_10_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_variance16x8 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_10_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_variance8x16 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_10_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_variance8x8 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_10_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_10_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_10_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-
- add_proto qw/unsigned int vpx_highbd_8_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_8_variance64x64 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_8_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_8_variance64x32 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_8_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_8_variance32x64 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_8_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_8_variance32x32 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_8_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_8_variance32x16 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_8_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_8_variance16x32 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_8_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_8_variance16x16 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_8_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_8_variance16x8 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_8_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_8_variance8x16 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_8_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_highbd_8_variance8x8 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_8_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_8_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_8_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-
- add_proto qw/void vpx_highbd_8_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- add_proto qw/void vpx_highbd_8_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-
- add_proto qw/void vpx_highbd_10_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- add_proto qw/void vpx_highbd_10_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-
- add_proto qw/void vpx_highbd_12_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- add_proto qw/void vpx_highbd_12_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-
- add_proto qw/unsigned int vpx_highbd_8_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_highbd_8_mse16x16 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_8_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_8_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_8_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_highbd_8_mse8x8 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_10_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_mse16x16 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_10_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_10_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_10_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_mse8x8 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_12_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_mse16x16 sse2/;
-
- add_proto qw/unsigned int vpx_highbd_12_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_12_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_12_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_mse8x8 sse2/;
-
- if (vpx_config("CONFIG_EXT_INTER") eq "yes") {
- add_proto qw/unsigned int vpx_highbd_masked_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance32x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance16x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance64x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance32x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance32x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance64x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance16x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance16x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance8x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance8x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance8x4 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance4x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance4x4 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance32x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance16x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance64x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance32x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance32x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance64x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance16x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance16x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance8x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance8x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance8x4 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance4x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance4x4 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_variance32x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_variance16x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_variance64x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_variance32x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_variance32x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_variance64x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_variance16x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_variance16x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_variance8x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_variance8x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_variance8x4 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_variance4x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_variance4x4 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance64x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance64x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance32x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance32x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance32x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance16x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance16x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance16x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance8x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance8x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance8x4 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance4x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance4x4 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance64x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance64x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance32x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance32x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance32x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance16x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance16x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance16x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance8x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance8x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance8x4 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance4x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance4x4 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance64x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance64x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance32x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance32x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance32x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance16x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance16x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance16x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance8x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance8x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance8x4 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance4x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance4x4 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad64x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad32x64 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad64x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad32x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad16x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad32x32 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad16x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad16x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad8x16 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad8x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad8x4 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad4x8 ssse3/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad4x4 ssse3/;
-
- if (vpx_config("CONFIG_EXT_PARTITION") eq "yes") {
- add_proto qw/unsigned int vpx_highbd_masked_variance128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance128x128/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance128x64/;
-
- add_proto qw/unsigned int vpx_highbd_masked_variance64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_variance64x128/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance128x128/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance128x64/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_variance64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance64x128/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance128x128/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance128x64/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_variance64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_variance64x128/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance128x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance128x128/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance128x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance128x64/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sub_pixel_variance64x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_masked_sub_pixel_variance64x128/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance128x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance128x128/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance128x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance128x64/;
-
- add_proto qw/unsigned int vpx_highbd_10_masked_sub_pixel_variance64x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_10_masked_sub_pixel_variance64x128/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance128x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance128x128/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance128x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance128x64/;
-
- add_proto qw/unsigned int vpx_highbd_12_masked_sub_pixel_variance64x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
- specialize qw/vpx_highbd_12_masked_sub_pixel_variance64x128/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad128x128/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad128x64/;
-
- add_proto qw/unsigned int vpx_highbd_masked_sad64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
- specialize qw/vpx_highbd_masked_sad64x128/;
- }
- }
-
add_proto qw/void vpx_highbd_comp_avg_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride";
-
- #
- # Subpixel Variance
- #
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance64x64/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance64x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance32x64/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance32x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance32x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance16x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance16x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance16x8/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance8x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance8x8/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance8x4/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance64x64/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance64x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance32x64/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance32x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance32x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance16x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance16x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance16x8/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance8x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance8x8/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance8x4/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance64x64/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance64x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance32x64/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance32x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance32x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance16x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance16x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance16x8/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance8x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance8x8/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance8x4/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance64x64/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance64x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x64/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x8/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x8/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x4/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance64x64/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance64x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x64/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x8/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x8/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x4/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance64x64/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance64x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x64/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x32/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x8/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x16/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x8/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x4/, "$sse2_x86inc";
-
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+}
-} # CONFIG_VP9_HIGHBITDEPTH
} # CONFIG_ENCODERS || CONFIG_POSTPROC || CONFIG_VP9_POSTPROC
1;
diff --git a/vpx_dsp/vpx_filter.h b/vpx_dsp/vpx_filter.h
index e049f74..cfe8161 100644
--- a/vpx_dsp/vpx_filter.h
+++ b/vpx_dsp/vpx_filter.h
@@ -29,7 +29,18 @@
#define BIL_SUBPEL_BITS 3
#define BIL_SUBPEL_SHIFTS (1 << BIL_SUBPEL_BITS)
-extern const uint8_t vpx_bilinear_filters[BIL_SUBPEL_SHIFTS][2];
+
+// 2 tap bilinear filters
+static const uint8_t bilinear_filters_2t[BIL_SUBPEL_SHIFTS][2] = {
+ { 128, 0 },
+ { 112, 16 },
+ { 96, 32 },
+ { 80, 48 },
+ { 64, 64 },
+ { 48, 80 },
+ { 32, 96 },
+ { 16, 112 },
+};
#ifdef __cplusplus
} // extern "C"
diff --git a/vpx_dsp/x86/masked_sad_intrin_ssse3.c b/vpx_dsp/x86/masked_sad_intrin_ssse3.c
index 384f89b..8b9ff10 100644
--- a/vpx_dsp/x86/masked_sad_intrin_ssse3.c
+++ b/vpx_dsp/x86/masked_sad_intrin_ssse3.c
@@ -64,6 +64,11 @@
m, n); \
}
+#if CONFIG_EXT_PARTITION
+MASKSADMXN_SSSE3(128, 128)
+MASKSADMXN_SSSE3(128, 64)
+MASKSADMXN_SSSE3(64, 128)
+#endif // CONFIG_EXT_PARTITION
MASKSADMXN_SSSE3(64, 64)
MASKSADMXN_SSSE3(64, 32)
MASKSADMXN_SSSE3(32, 64)
@@ -100,7 +105,7 @@
MASKSAD4XN_SSSE3(4)
// For width a multiple of 16
-// Assumes values in m are <=64 and w = 16, 32, or 64
+// Assumes values in m are <=64
static INLINE unsigned int masked_sad_ssse3(const uint8_t *a_ptr, int a_stride,
const uint8_t *b_ptr, int b_stride,
const uint8_t *m_ptr, int m_stride,
@@ -255,6 +260,11 @@
msk_stride, m, n); \
}
+#if CONFIG_EXT_PARTITION
+HIGHBD_MASKSADMXN_SSSE3(128, 128)
+HIGHBD_MASKSADMXN_SSSE3(128, 64)
+HIGHBD_MASKSADMXN_SSSE3(64, 128)
+#endif // CONFIG_EXT_PARTITION
HIGHBD_MASKSADMXN_SSSE3(64, 64)
HIGHBD_MASKSADMXN_SSSE3(64, 32)
HIGHBD_MASKSADMXN_SSSE3(32, 64)
diff --git a/vpx_dsp/x86/masked_variance_intrin_ssse3.c b/vpx_dsp/x86/masked_variance_intrin_ssse3.c
index 96af421..ca4f6fc 100644
--- a/vpx_dsp/x86/masked_variance_intrin_ssse3.c
+++ b/vpx_dsp/x86/masked_variance_intrin_ssse3.c
@@ -18,17 +18,63 @@
#include "vpx_ports/mem.h"
#include "vpx_dsp/vpx_filter.h"
-// Assumes mask values are <= 64
-// Log 2 of powers of 2 as an expression
-#define LOG2_P2(n) ((n) == 1 ? 0 : \
- (n) == 2 ? 1 : \
- (n) == 4 ? 2 : \
- (n) == 8 ? 3 : \
- (n) == 16 ? 4 : \
- (n) == 32 ? 5 : \
- (n) == 64 ? 6 : \
- (n) == 128 ? 7 : -1)
+// Half pixel shift
+#define HALF_PIXEL_OFFSET (BIL_SUBPEL_SHIFTS/2)
+
+/*****************************************************************************
+ * Horizontal additions
+ *****************************************************************************/
+
+static INLINE int32_t hsum_epi32_si32(__m128i v_d) {
+ v_d = _mm_hadd_epi32(v_d, v_d);
+ v_d = _mm_hadd_epi32(v_d, v_d);
+ return _mm_cvtsi128_si32(v_d);
+}
+
+static INLINE int64_t hsum_epi64_si64(__m128i v_q) {
+ v_q = _mm_add_epi64(v_q, _mm_srli_si128(v_q, 8));
+#if ARCH_X86_64
+ return _mm_cvtsi128_si64(v_q);
+#else
+ {
+ int64_t tmp;
+ _mm_storel_epi64((__m128i*)&tmp, v_q);
+ return tmp;
+ }
+#endif
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE int64_t hsum_epi32_si64(__m128i v_d) {
+ const __m128i v_sign_d = _mm_cmplt_epi32(v_d, _mm_setzero_si128());
+ const __m128i v_0_q = _mm_unpacklo_epi32(v_d, v_sign_d);
+ const __m128i v_1_q = _mm_unpackhi_epi32(v_d, v_sign_d);
+ return hsum_epi64_si64(_mm_add_epi64(v_0_q, v_1_q));
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+static INLINE int calc_masked_variance(__m128i v_sum_d, __m128i v_sse_q,
+ unsigned int* sse,
+ const int w, const int h) {
+ int64_t sum64;
+ uint64_t sse64;
+
+ // Horizontal sum
+ sum64 = hsum_epi32_si32(v_sum_d);
+ sse64 = hsum_epi64_si64(v_sse_q);
+
+ sum64 = (sum64 >= 0) ? sum64 : -sum64;
+
+ // Round
+ sum64 = ROUND_POWER_OF_TWO(sum64, 6);
+ sse64 = ROUND_POWER_OF_TWO(sse64, 12);
+
+ // Store the SSE
+ *sse = (unsigned int)sse64;
+ // Compute the variance
+ return *sse - ((sum64 * sum64) / (w * h));
+}
/*****************************************************************************
* n*16 Wide versions
@@ -98,30 +144,7 @@
m += m_stride;
}
- // Horizontal sum
- v_sum_d = _mm_hadd_epi32(v_sum_d, v_sum_d);
- v_sum_d = _mm_hadd_epi32(v_sum_d, v_sum_d);
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_srli_si128(v_sse_q, 8));
-
- // Round
- v_sum_d = _mm_sub_epi32(v_sum_d, _mm_cmplt_epi32(v_sum_d, v_zero));
- v_sum_d = _mm_add_epi32(v_sum_d, _mm_set_epi32(0, 0, 0, 31));
- v_sum_d = _mm_srai_epi32(v_sum_d, 6);
-
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_set_epi32(0, 0, 0, 2047));
- v_sse_q = _mm_srli_epi64(v_sse_q, 12);
-
- // Store the SSE
- *sse = _mm_cvtsi128_si32(v_sse_q);
-
- // Compute the variance
- v_sum_d = _mm_abs_epi32(v_sum_d);
- v_sum_d = _mm_mul_epu32(v_sum_d, v_sum_d);
- v_sum_d = _mm_srl_epi64(v_sum_d,
- _mm_set_epi32(0, 0, 0, LOG2_P2(w) + LOG2_P2(h)));
- v_sse_q = _mm_sub_epi64(v_sse_q, v_sum_d);
-
- return _mm_cvtsi128_si32(v_sse_q);
+ return calc_masked_variance(v_sum_d, v_sse_q, sse, w, h);
}
#define MASKED_VARWXH(W, H) \
@@ -144,6 +167,11 @@
MASKED_VARWXH(32, 64)
MASKED_VARWXH(64, 32)
MASKED_VARWXH(64, 64)
+#if CONFIG_EXT_PARTITION
+MASKED_VARWXH(64, 128)
+MASKED_VARWXH(128, 64)
+MASKED_VARWXH(128, 128)
+#endif // CONFIG_EXT_PARTITION
/*****************************************************************************
* 8 Wide versions
@@ -198,29 +226,7 @@
m += m_stride;
}
- // Horizontal sum
- v_sum_d = _mm_hadd_epi32(v_sum_d, v_sum_d);
- v_sum_d = _mm_hadd_epi32(v_sum_d, v_sum_d);
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_srli_si128(v_sse_q, 8));
-
- // Round
- v_sum_d = _mm_sub_epi32(v_sum_d, _mm_cmplt_epi32(v_sum_d, v_zero));
- v_sum_d = _mm_add_epi32(v_sum_d, _mm_set_epi32(0, 0, 0, 31));
- v_sum_d = _mm_srai_epi32(v_sum_d, 6);
-
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_set_epi32(0, 0, 0, 2047));
- v_sse_q = _mm_srli_epi64(v_sse_q, 12);
-
- // Store the SSE
- *sse = _mm_cvtsi128_si32(v_sse_q);
-
- // Compute the variance
- v_sum_d = _mm_abs_epi32(v_sum_d);
- v_sum_d = _mm_mul_epu32(v_sum_d, v_sum_d);
- v_sum_d = _mm_srl_epi64(v_sum_d, _mm_set_epi32(0, 0, 0, LOG2_P2(h) + 3));
- v_sse_q = _mm_sub_epi64(v_sse_q, v_sum_d);
-
- return _mm_cvtsi128_si32(v_sse_q);
+ return calc_masked_variance(v_sum_d, v_sse_q, sse, 8, h);
}
#define MASKED_VAR8XH(H) \
@@ -302,29 +308,7 @@
m += m_stride * 2;
}
- // Horizontal sum
- v_sum_d = _mm_hadd_epi32(v_sum_d, v_sum_d);
- v_sum_d = _mm_hadd_epi32(v_sum_d, v_sum_d);
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_srli_si128(v_sse_q, 8));
-
- // Round
- v_sum_d = _mm_sub_epi32(v_sum_d, _mm_cmplt_epi32(v_sum_d, v_zero));
- v_sum_d = _mm_add_epi32(v_sum_d, _mm_set_epi32(0, 0, 0, 31));
- v_sum_d = _mm_srai_epi32(v_sum_d, 6);
-
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_set_epi32(0, 0, 0, 2047));
- v_sse_q = _mm_srli_epi64(v_sse_q, 12);
-
- // Store the SSE
- *sse = _mm_cvtsi128_si32(v_sse_q);
-
- // Compute the variance
- v_sum_d = _mm_abs_epi32(v_sum_d);
- v_sum_d = _mm_mul_epu32(v_sum_d, v_sum_d);
- v_sum_d = _mm_srl_epi64(v_sum_d, _mm_set_epi32(0, 0, 0, LOG2_P2(h) + 2));
- v_sse_q = _mm_sub_epi64(v_sse_q, v_sum_d);
-
- return _mm_cvtsi128_si32(v_sse_q);
+ return calc_masked_variance(v_sum_d, v_sse_q, sse, 4, h);
}
#define MASKED_VAR4XH(H) \
@@ -350,13 +334,13 @@
const uint16_t *b, int b_stride,
const uint8_t *m, int m_stride,
int w, int h,
- __m128i* v_sum_d, __m128i* v_sse_q) {
+ int64_t *sum, uint64_t *sse) {
int ii, jj;
const __m128i v_zero = _mm_setzero_si128();
- *v_sum_d = _mm_setzero_si128();
- *v_sse_q = _mm_setzero_si128();
+ __m128i v_sum_d = _mm_setzero_si128();
+ __m128i v_sse_q = _mm_setzero_si128();
assert((w % 8) == 0);
@@ -373,7 +357,7 @@
// Difference: [-4095, 4095]
const __m128i v_d_w = _mm_sub_epi16(v_a_w, v_b_w);
- // Error - [-4095, 4095] * [0, 64] => fits in 19 bits (incld sign bit)
+ // Error - [-4095, 4095] * [0, 64] => sum of 2 of these fits in 19 bits
const __m128i v_e_d = _mm_madd_epi16(v_d_w, v_m_w);
// Squared error - max (18 bits * 18 bits) = 36 bits (no sign bit)
@@ -397,8 +381,8 @@
v_se_q = _mm_add_epi64(v_se0_q, v_se1_q);
// Accumulate
- *v_sum_d = _mm_add_epi32(*v_sum_d, v_e_d);
- *v_sse_q = _mm_add_epi64(*v_sse_q, v_se_q);
+ v_sum_d = _mm_add_epi32(v_sum_d, v_e_d);
+ v_sse_q = _mm_add_epi64(v_sse_q, v_se_q);
}
// Move on to next row
@@ -408,17 +392,13 @@
}
// Horizontal sum
- *v_sum_d = _mm_hadd_epi32(*v_sum_d, *v_sum_d);
- *v_sum_d = _mm_hadd_epi32(*v_sum_d, *v_sum_d);
- *v_sse_q = _mm_add_epi64(*v_sse_q, _mm_srli_si128(*v_sse_q, 8));
+ *sum = hsum_epi32_si64(v_sum_d);
+ *sse = hsum_epi64_si64(v_sse_q);
// Round
- *v_sum_d = _mm_sub_epi32(*v_sum_d, _mm_cmplt_epi32(*v_sum_d, v_zero));
- *v_sum_d = _mm_add_epi32(*v_sum_d, _mm_set_epi32(0, 0, 0, 31));
- *v_sum_d = _mm_srai_epi32(*v_sum_d, 6);
-
- *v_sse_q = _mm_add_epi64(*v_sse_q, _mm_set_epi32(0, 0, 0, 2047));
- *v_sse_q = _mm_srli_epi64(*v_sse_q, 12);
+ *sum = (*sum >= 0) ? *sum : -*sum;
+ *sum = ROUND_POWER_OF_TWO(*sum, 6);
+ *sse = ROUND_POWER_OF_TWO(*sse, 12);
}
// Main calculation for 4 wide blocks
@@ -427,13 +407,13 @@
const uint16_t *b, int b_stride,
const uint8_t *m, int m_stride,
int h,
- __m128i* v_sum_d, __m128i* v_sse_q) {
+ int64_t *sum, uint64_t *sse) {
int ii;
const __m128i v_zero = _mm_setzero_si128();
- *v_sum_d = _mm_setzero_si128();
- *v_sse_q = _mm_setzero_si128();
+ __m128i v_sum_d = _mm_setzero_si128();
+ __m128i v_sse_q = _mm_setzero_si128();
assert((h % 2) == 0);
@@ -481,8 +461,8 @@
v_se_q = _mm_add_epi64(v_se0_q, v_se1_q);
// Accumulate
- *v_sum_d = _mm_add_epi32(*v_sum_d, v_e_d);
- *v_sse_q = _mm_add_epi64(*v_sse_q, v_se_q);
+ v_sum_d = _mm_add_epi32(v_sum_d, v_e_d);
+ v_sse_q = _mm_add_epi64(v_sse_q, v_se_q);
// Move on to next row
a += a_stride * 2;
@@ -491,17 +471,13 @@
}
// Horizontal sum
- *v_sum_d = _mm_hadd_epi32(*v_sum_d, *v_sum_d);
- *v_sum_d = _mm_hadd_epi32(*v_sum_d, *v_sum_d);
- *v_sse_q = _mm_add_epi64(*v_sse_q, _mm_srli_si128(*v_sse_q, 8));
+ *sum = hsum_epi32_si32(v_sum_d);
+ *sse = hsum_epi64_si64(v_sse_q);
// Round
- *v_sum_d = _mm_sub_epi32(*v_sum_d, _mm_cmplt_epi32(*v_sum_d, v_zero));
- *v_sum_d = _mm_add_epi32(*v_sum_d, _mm_set_epi32(0, 0, 0, 31));
- *v_sum_d = _mm_srai_epi32(*v_sum_d, 6);
-
- *v_sse_q = _mm_add_epi64(*v_sse_q, _mm_set_epi32(0, 0, 0, 2047));
- *v_sse_q = _mm_srli_epi64(*v_sse_q, 12);
+ *sum = (*sum >= 0) ? *sum : -*sum;
+ *sum = ROUND_POWER_OF_TWO(*sum, 6);
+ *sse = ROUND_POWER_OF_TWO(*sse, 12);
}
static INLINE unsigned int highbd_masked_variancewxh_ssse3(
@@ -510,26 +486,20 @@
const uint8_t *m, int m_stride,
int w, int h,
unsigned int *sse) {
- __m128i v_sum_d, v_sse_q;
+ uint64_t sse64;
+ int64_t sum64;
if (w == 4)
highbd_masked_variance64_4wide_ssse3(a, a_stride, b, b_stride, m, m_stride,
- h, &v_sum_d, &v_sse_q);
+ h, &sum64, &sse64);
else
highbd_masked_variance64_ssse3(a, a_stride, b, b_stride, m, m_stride, w, h,
- &v_sum_d, &v_sse_q);
+ &sum64, &sse64);
// Store the SSE
- *sse = _mm_cvtsi128_si32(v_sse_q);
-
- // Compute the variance
- v_sum_d = _mm_abs_epi32(v_sum_d);
- v_sum_d = _mm_mul_epu32(v_sum_d, v_sum_d);
- v_sum_d = _mm_srl_epi64(v_sum_d,
- _mm_set_epi32(0, 0, 0, LOG2_P2(w) + LOG2_P2(h)));
- v_sse_q = _mm_sub_epi64(v_sse_q, v_sum_d);
-
- return _mm_cvtsi128_si32(v_sse_q);
+ *sse = (unsigned int)sse64;
+ // Compute and return variance
+ return *sse - ((sum64 * sum64) / (w * h));
}
static INLINE unsigned int highbd_10_masked_variancewxh_ssse3(
@@ -538,32 +508,24 @@
const uint8_t *m, int m_stride,
int w, int h,
unsigned int *sse) {
- __m128i v_sum_d, v_sse_q;
+ uint64_t sse64;
+ int64_t sum64;
if (w == 4)
highbd_masked_variance64_4wide_ssse3(a, a_stride, b, b_stride, m, m_stride,
- h, &v_sum_d, &v_sse_q);
+ h, &sum64, &sse64);
else
highbd_masked_variance64_ssse3(a, a_stride, b, b_stride, m, m_stride, w, h,
- &v_sum_d, &v_sse_q);
+ &sum64, &sse64);
- // Round sum and sse
- v_sum_d = _mm_srai_epi32(_mm_add_epi32(v_sum_d,
- _mm_set_epi32(0, 0, 0, 1 << 1)), 2);
- v_sse_q = _mm_srli_epi64(_mm_add_epi64(v_sse_q,
- _mm_set_epi32(0, 0, 0, 1 << 3)), 4);
+ // Normalise
+ sum64 = ROUND_POWER_OF_TWO(sum64, 2);
+ sse64 = ROUND_POWER_OF_TWO(sse64, 4);
// Store the SSE
- *sse = _mm_cvtsi128_si32(v_sse_q);
-
- // Compute the variance
- v_sum_d = _mm_abs_epi32(v_sum_d);
- v_sum_d = _mm_mul_epu32(v_sum_d, v_sum_d);
- v_sum_d = _mm_srl_epi64(v_sum_d,
- _mm_set_epi32(0, 0, 0, LOG2_P2(w) + LOG2_P2(h)));
- v_sse_q = _mm_sub_epi64(v_sse_q, v_sum_d);
-
- return _mm_cvtsi128_si32(v_sse_q);
+ *sse = (unsigned int)sse64;
+ // Compute and return variance
+ return *sse - ((sum64 * sum64) / (w * h));
}
static INLINE unsigned int highbd_12_masked_variancewxh_ssse3(
@@ -572,32 +534,23 @@
const uint8_t *m, int m_stride,
int w, int h,
unsigned int *sse) {
- __m128i v_sum_d, v_sse_q;
+ uint64_t sse64;
+ int64_t sum64;
if (w == 4)
highbd_masked_variance64_4wide_ssse3(a, a_stride, b, b_stride, m, m_stride,
- h, &v_sum_d, &v_sse_q);
+ h, &sum64, &sse64);
else
highbd_masked_variance64_ssse3(a, a_stride, b, b_stride, m, m_stride, w, h,
- &v_sum_d, &v_sse_q);
+ &sum64, &sse64);
- // Round sum and sse
- v_sum_d = _mm_srai_epi32(_mm_add_epi32(v_sum_d,
- _mm_set_epi32(0, 0, 0, 1 << 3)), 4);
- v_sse_q = _mm_srli_epi64(_mm_add_epi64(v_sse_q,
- _mm_set_epi32(0, 0, 0, 1 << 7)), 8);
+ sum64 = ROUND_POWER_OF_TWO(sum64, 4);
+ sse64 = ROUND_POWER_OF_TWO(sse64, 8);
// Store the SSE
- *sse = _mm_cvtsi128_si32(v_sse_q);
-
- // Compute the variance
- v_sum_d = _mm_abs_epi32(v_sum_d);
- v_sum_d = _mm_mul_epu32(v_sum_d, v_sum_d);
- v_sum_d = _mm_srl_epi64(v_sum_d,
- _mm_set_epi32(0, 0, 0, LOG2_P2(w) + LOG2_P2(h)));
- v_sse_q = _mm_sub_epi64(v_sse_q, v_sum_d);
-
- return _mm_cvtsi128_si32(v_sse_q);
+ *sse = (unsigned int)sse64;
+ // Compute and return variance
+ return *sse - ((sum64 * sum64) / (w * h));
}
#define HIGHBD_MASKED_VARWXH(W, H) \
@@ -653,6 +606,11 @@
HIGHBD_MASKED_VARWXH(32, 64)
HIGHBD_MASKED_VARWXH(64, 32)
HIGHBD_MASKED_VARWXH(64, 64)
+#if CONFIG_EXT_PARTITION
+HIGHBD_MASKED_VARWXH(64, 128)
+HIGHBD_MASKED_VARWXH(128, 64)
+HIGHBD_MASKED_VARWXH(128, 128)
+#endif // CONFIG_EXT_PARTITION
#endif
@@ -663,8 +621,8 @@
typedef __m128i (*filter_fn_t)(__m128i v_a_b, __m128i v_b_b,
__m128i v_filter_b);
-static INLINE __m128i apply_filter8(const __m128i v_a_b, const __m128i v_b_b,
- const __m128i v_filter_b) {
+static INLINE __m128i apply_filter_avg(const __m128i v_a_b, const __m128i v_b_b,
+ const __m128i v_filter_b) {
(void) v_filter_b;
return _mm_avg_epu8(v_a_b, v_b_b);
}
@@ -735,31 +693,6 @@
*v_sse_q = _mm_add_epi64(*v_sse_q, v_se_hi_q);
}
-static INLINE int calc_masked_variance(__m128i v_sum_d, __m128i v_sse_q,
- unsigned int* sse,
- const int w, const int h) {
- int sum;
-
- // Horizontal sum
- v_sum_d = _mm_hadd_epi32(v_sum_d, v_sum_d);
- v_sum_d = _mm_hadd_epi32(v_sum_d, v_sum_d);
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_srli_si128(v_sse_q, 8));
-
- // Round
- sum = _mm_cvtsi128_si32(v_sum_d);
- sum = (sum >= 0) ? ((sum + 31) >> 6) : -((-sum + 31) >> 6);
-
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_set_epi32(0, 0, 0, 2047));
- v_sse_q = _mm_srli_epi64(v_sse_q, 12);
-
- // Store the SSE
- *sse = _mm_cvtsi128_si32(v_sse_q);
-
- // Compute the variance
- return *sse - (((int64_t)sum * sum) >> (LOG2_P2(h) + LOG2_P2(w)));
-}
-
-
// Functions for width (W) >= 16
unsigned int vpx_masked_subpel_varWxH_xzero(
const uint8_t *src, int src_stride, int yoffset,
@@ -770,9 +703,9 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
const __m128i v_filter_b = _mm_set1_epi16((
- vpx_bilinear_filters[yoffset][1] << 8) +
- vpx_bilinear_filters[yoffset][0]);
- assert(yoffset < 8);
+ bilinear_filters_2t[yoffset][1] << 8) +
+ bilinear_filters_2t[yoffset][0]);
+ assert(yoffset < BIL_SUBPEL_SHIFTS);
for (j = 0; j < w; j += 16) {
// Load the first row ready
v_src0_b = _mm_loadu_si128((const __m128i*)(src + j));
@@ -814,9 +747,9 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
const __m128i v_filter_b = _mm_set1_epi16((
- vpx_bilinear_filters[xoffset][1] << 8) +
- vpx_bilinear_filters[xoffset][0]);
- assert(xoffset < 8);
+ bilinear_filters_2t[xoffset][1] << 8) +
+ bilinear_filters_2t[xoffset][0]);
+ assert(xoffset < BIL_SUBPEL_SHIFTS);
for (i = 0; i < h; i++) {
for (j = 0; j < w; j += 16) {
// Load this row and one below & apply the filter to them
@@ -846,13 +779,13 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
const __m128i v_filterx_b = _mm_set1_epi16((
- vpx_bilinear_filters[xoffset][1] << 8) +
- vpx_bilinear_filters[xoffset][0]);
+ bilinear_filters_2t[xoffset][1] << 8) +
+ bilinear_filters_2t[xoffset][0]);
const __m128i v_filtery_b = _mm_set1_epi16((
- vpx_bilinear_filters[yoffset][1] << 8) +
- vpx_bilinear_filters[yoffset][0]);
- assert(yoffset < 8);
- assert(xoffset < 8);
+ bilinear_filters_2t[yoffset][1] << 8) +
+ bilinear_filters_2t[yoffset][0]);
+ assert(yoffset < BIL_SUBPEL_SHIFTS);
+ assert(xoffset < BIL_SUBPEL_SHIFTS);
for (j = 0; j < w; j += 16) {
// Load the first row ready
v_src0_b = _mm_loadu_si128((const __m128i*)(src + j));
@@ -908,9 +841,9 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
__m128i v_filter_b = _mm_set1_epi16((
- vpx_bilinear_filters[yoffset][1] << 8) +
- vpx_bilinear_filters[yoffset][0]);
- assert(yoffset < 8);
+ bilinear_filters_2t[yoffset][1] << 8) +
+ bilinear_filters_2t[yoffset][0]);
+ assert(yoffset < BIL_SUBPEL_SHIFTS);
// Load the first row of src data ready
v_src0_b = _mm_loadl_epi64((const __m128i*)src);
for (i = 0; i < h; i += 4) {
@@ -938,7 +871,7 @@
v_msk2_b = _mm_unpacklo_epi32(v_msk3_b, v_msk2_b);
v_msk0_b = _mm_unpacklo_epi64(v_msk2_b, v_msk0_b);
// Apply the y filter
- if (yoffset == 8) {
+ if (yoffset == HALF_PIXEL_OFFSET) {
v_src1_b = _mm_unpacklo_epi64(v_src3_b, v_src1_b);
v_src2_b = _mm_or_si128(_mm_slli_si128(v_src1_b, 4),
_mm_and_si128(v_src0_b, _mm_setr_epi32(-1, 0, 0, 0)));
@@ -974,13 +907,13 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
__m128i v_filter_b = _mm_set1_epi16((
- vpx_bilinear_filters[yoffset][1] << 8) +
- vpx_bilinear_filters[yoffset][0]);
- assert(yoffset < 8);
+ bilinear_filters_2t[yoffset][1] << 8) +
+ bilinear_filters_2t[yoffset][0]);
+ assert(yoffset < BIL_SUBPEL_SHIFTS);
// Load the first row of src data ready
v_src0_b = _mm_loadl_epi64((const __m128i*)src);
for (i = 0; i < h; i += 2) {
- if (yoffset == 8) {
+ if (yoffset == HALF_PIXEL_OFFSET) {
// Load the rest of the source data for these rows
v_src1_b = _mm_or_si128(
_mm_slli_si128(v_src0_b, 8),
@@ -1030,9 +963,9 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
__m128i v_filter_b = _mm_set1_epi16((
- vpx_bilinear_filters[xoffset][1] << 8) +
- vpx_bilinear_filters[xoffset][0]);
- assert(xoffset < 8);
+ bilinear_filters_2t[xoffset][1] << 8) +
+ bilinear_filters_2t[xoffset][0]);
+ assert(xoffset < BIL_SUBPEL_SHIFTS);
for (i = 0; i < h; i += 4) {
// Load the src data
v_src0_b = _mm_loadl_epi64((const __m128i*)src);
@@ -1064,7 +997,7 @@
v_msk2_b = _mm_unpacklo_epi32(v_msk3_b, v_msk2_b);
v_msk0_b = _mm_unpacklo_epi64(v_msk2_b, v_msk0_b);
// Apply the x filter
- if (xoffset == 8) {
+ if (xoffset == HALF_PIXEL_OFFSET) {
v_src0_b = _mm_unpacklo_epi64(v_src2_b, v_src0_b);
v_src0_shift_b = _mm_unpacklo_epi64(v_src2_shift_b, v_src0_shift_b);
v_res_b = _mm_avg_epu8(v_src0_b, v_src0_shift_b);
@@ -1093,9 +1026,9 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
__m128i v_filter_b = _mm_set1_epi16((
- vpx_bilinear_filters[xoffset][1] << 8) +
- vpx_bilinear_filters[xoffset][0]);
- assert(xoffset < 8);
+ bilinear_filters_2t[xoffset][1] << 8) +
+ bilinear_filters_2t[xoffset][0]);
+ assert(xoffset < BIL_SUBPEL_SHIFTS);
for (i = 0; i < h; i += 2) {
// Load the src data
v_src0_b = _mm_loadu_si128((const __m128i*)(src));
@@ -1103,7 +1036,7 @@
v_src1_b = _mm_loadu_si128((const __m128i*)(src + src_stride));
v_src1_shift_b = _mm_srli_si128(v_src1_b, 1);
// Apply the x filter
- if (xoffset == 8) {
+ if (xoffset == HALF_PIXEL_OFFSET) {
v_src1_b = _mm_unpacklo_epi64(v_src0_b, v_src1_b);
v_src1_shift_b = _mm_unpacklo_epi64(v_src0_shift_b, v_src1_shift_b);
v_res_b = _mm_avg_epu8(v_src1_b, v_src1_shift_b);
@@ -1145,13 +1078,13 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
__m128i v_filterx_b = _mm_set1_epi16((
- vpx_bilinear_filters[xoffset][1] << 8) +
- vpx_bilinear_filters[xoffset][0]);
+ bilinear_filters_2t[xoffset][1] << 8) +
+ bilinear_filters_2t[xoffset][0]);
__m128i v_filtery_b = _mm_set1_epi16((
- vpx_bilinear_filters[yoffset][1] << 8) +
- vpx_bilinear_filters[yoffset][0]);
- assert(xoffset < 8);
- assert(yoffset < 8);
+ bilinear_filters_2t[yoffset][1] << 8) +
+ bilinear_filters_2t[yoffset][0]);
+ assert(xoffset < BIL_SUBPEL_SHIFTS);
+ assert(yoffset < BIL_SUBPEL_SHIFTS);
for (i = 0; i < h; i += 4) {
// Load the src data
v_src0_b = _mm_loadl_epi64((const __m128i*)src);
@@ -1167,7 +1100,7 @@
v_src3_shift_b = _mm_srli_si128(v_src3_b, 1);
v_src2_shift_b = _mm_unpacklo_epi32(v_src3_shift_b, v_src2_shift_b);
// Apply the x filter
- if (xoffset == 8) {
+ if (xoffset == HALF_PIXEL_OFFSET) {
v_src0_b = _mm_unpacklo_epi64(v_src2_b, v_src0_b);
v_src0_shift_b = _mm_unpacklo_epi64(v_src2_shift_b, v_src0_shift_b);
v_xres_b[i == 0 ? 0 : 1] = _mm_avg_epu8(v_src0_b, v_src0_shift_b);
@@ -1183,7 +1116,7 @@
v_src0_b = _mm_loadl_epi64((const __m128i*)src);
v_src0_shift_b = _mm_srli_si128(v_src0_b, 1);
// Apply the x filter
- if (xoffset == 8) {
+ if (xoffset == HALF_PIXEL_OFFSET) {
v_extra_row_b = _mm_and_si128(
_mm_avg_epu8(v_src0_b, v_src0_shift_b),
_mm_setr_epi32(-1, 0, 0, 0));
@@ -1203,7 +1136,7 @@
v_extra_row_b);
}
// Apply the y filter
- if (yoffset == 8) {
+ if (yoffset == HALF_PIXEL_OFFSET) {
v_res_b = _mm_avg_epu8(v_xres_b[i == 0 ? 0 : 1], v_temp_b);
} else {
v_res_b = apply_filter(v_xres_b[i == 0 ? 0 : 1], v_temp_b, v_filtery_b);
@@ -1245,21 +1178,20 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
__m128i v_filterx_b = _mm_set1_epi16((
- vpx_bilinear_filters[xoffset][1] << 8) +
- vpx_bilinear_filters[xoffset][0]);
+ bilinear_filters_2t[xoffset][1] << 8) +
+ bilinear_filters_2t[xoffset][0]);
__m128i v_filtery_b = _mm_set1_epi16((
- vpx_bilinear_filters[yoffset][1] << 8) +
- vpx_bilinear_filters[yoffset][0]);
- assert(xoffset < 8);
- assert(yoffset < 8);
-
+ bilinear_filters_2t[yoffset][1] << 8) +
+ bilinear_filters_2t[yoffset][0]);
+ assert(xoffset < BIL_SUBPEL_SHIFTS);
+ assert(yoffset < BIL_SUBPEL_SHIFTS);
// Load the first block of src data
v_src0_b = _mm_loadu_si128((const __m128i*)(src));
v_src0_shift_b = _mm_srli_si128(v_src0_b, 1);
v_src1_b = _mm_loadu_si128((const __m128i*)(src + src_stride));
v_src1_shift_b = _mm_srli_si128(v_src1_b, 1);
// Apply the x filter
- if (xoffset == 8) {
+ if (xoffset == HALF_PIXEL_OFFSET) {
v_src1_b = _mm_unpacklo_epi64(v_src0_b, v_src1_b);
v_src1_shift_b = _mm_unpacklo_epi64(v_src0_shift_b, v_src1_shift_b);
v_xres0_b = _mm_avg_epu8(v_src1_b, v_src1_shift_b);
@@ -1275,7 +1207,7 @@
v_src1_b = _mm_loadu_si128((const __m128i*)(src + src_stride * 3));
v_src1_shift_b = _mm_srli_si128(v_src1_b, 1);
// Apply the x filter
- if (xoffset == 8) {
+ if (xoffset == HALF_PIXEL_OFFSET) {
v_src1_b = _mm_unpacklo_epi64(v_src0_b, v_src1_b);
v_src1_shift_b = _mm_unpacklo_epi64(v_src0_shift_b, v_src1_shift_b);
v_xres1_b = _mm_avg_epu8(v_src1_b, v_src1_shift_b);
@@ -1287,7 +1219,7 @@
// Apply the y filter to the previous block
v_temp_b = _mm_or_si128(_mm_srli_si128(v_xres0_b, 8),
_mm_slli_si128(v_xres1_b, 8));
- if (yoffset == 8) {
+ if (yoffset == HALF_PIXEL_OFFSET) {
v_res_b = _mm_avg_epu8(v_xres0_b, v_temp_b);
} else {
v_res_b = apply_filter(v_xres0_b, v_temp_b, v_filtery_b);
@@ -1309,7 +1241,7 @@
v_src1_b = _mm_loadu_si128((const __m128i*)(src + src_stride * 5));
v_src1_shift_b = _mm_srli_si128(v_src1_b, 1);
// Apply the x filter
- if (xoffset == 8) {
+ if (xoffset == HALF_PIXEL_OFFSET) {
v_src1_b = _mm_unpacklo_epi64(v_src0_b, v_src1_b);
v_src1_shift_b = _mm_unpacklo_epi64(v_src0_shift_b, v_src1_shift_b);
v_xres0_b = _mm_avg_epu8(v_src1_b, v_src1_shift_b);
@@ -1321,7 +1253,7 @@
// Apply the y filter to the previous block
v_temp_b = _mm_or_si128(_mm_srli_si128(v_xres1_b, 8),
_mm_slli_si128(v_xres0_b, 8));
- if (yoffset == 8) {
+ if (yoffset == HALF_PIXEL_OFFSET) {
v_res_b = _mm_avg_epu8(v_xres1_b, v_temp_b);
} else {
v_res_b = apply_filter(v_xres1_b, v_temp_b, v_filtery_b);
@@ -1359,41 +1291,45 @@
return vpx_masked_variance##W##x##H##_ssse3(src, src_stride, \
dst, dst_stride, \
msk, msk_stride, sse); \
- else if (yoffset == 8) \
- return vpx_masked_subpel_varWxH_xzero(src, src_stride, 8, \
+ else if (yoffset == HALF_PIXEL_OFFSET) \
+ return vpx_masked_subpel_varWxH_xzero(src, src_stride, \
+ HALF_PIXEL_OFFSET, \
dst, dst_stride, msk, msk_stride, \
- sse, W, H, apply_filter8); \
+ sse, W, H, apply_filter_avg); \
else \
- return vpx_masked_subpel_varWxH_xzero(src, src_stride, yoffset, \
+ return vpx_masked_subpel_varWxH_xzero(src, src_stride, \
+ yoffset, \
dst, dst_stride, msk, msk_stride, \
sse, W, H, apply_filter); \
} else if (yoffset == 0) { \
- if (xoffset == 8) \
- return vpx_masked_subpel_varWxH_yzero(src, src_stride, 8, \
+ if (xoffset == HALF_PIXEL_OFFSET) \
+ return vpx_masked_subpel_varWxH_yzero(src, src_stride, \
+ HALF_PIXEL_OFFSET, \
dst, dst_stride, msk, msk_stride, \
- sse, W, H, apply_filter8); \
+ sse, W, H, apply_filter_avg); \
else \
- return vpx_masked_subpel_varWxH_yzero(src, src_stride, xoffset, \
+ return vpx_masked_subpel_varWxH_yzero(src, src_stride, \
+ xoffset, \
dst, dst_stride, msk, msk_stride, \
sse, W, H, apply_filter); \
- } else if (xoffset == 8) { \
- if (yoffset == 8) \
+ } else if (xoffset == HALF_PIXEL_OFFSET) { \
+ if (yoffset == HALF_PIXEL_OFFSET) \
return vpx_masked_subpel_varWxH_xnonzero_ynonzero(src, src_stride, \
- 8, 8, dst, dst_stride, msk, msk_stride, sse, W, H, \
- apply_filter8, apply_filter8); \
+ HALF_PIXEL_OFFSET, HALF_PIXEL_OFFSET, dst, dst_stride, msk, \
+ msk_stride, sse, W, H, apply_filter_avg, apply_filter_avg); \
else \
return vpx_masked_subpel_varWxH_xnonzero_ynonzero(src, src_stride, \
- 8, yoffset, dst, dst_stride, msk, msk_stride, sse, W, H, \
- apply_filter8, apply_filter); \
+ HALF_PIXEL_OFFSET, yoffset, dst, dst_stride, msk, \
+ msk_stride, sse, W, H, apply_filter_avg, apply_filter); \
} else { \
- if (yoffset == 8) \
+ if (yoffset == HALF_PIXEL_OFFSET) \
return vpx_masked_subpel_varWxH_xnonzero_ynonzero(src, src_stride, \
- xoffset, 8, dst, dst_stride, msk, msk_stride, sse, W, H, \
- apply_filter, apply_filter8); \
+ xoffset, HALF_PIXEL_OFFSET, dst, dst_stride, msk, \
+ msk_stride, sse, W, H, apply_filter, apply_filter_avg); \
else \
return vpx_masked_subpel_varWxH_xnonzero_ynonzero(src, src_stride, \
- xoffset, yoffset, dst, dst_stride, msk, msk_stride, sse, W, H, \
- apply_filter, apply_filter); \
+ xoffset, yoffset, dst, dst_stride, msk, \
+ msk_stride, sse, W, H, apply_filter, apply_filter); \
} \
}
@@ -1437,6 +1373,11 @@
MASK_SUBPIX_VAR_LARGE(32, 64)
MASK_SUBPIX_VAR_LARGE(64, 32)
MASK_SUBPIX_VAR_LARGE(64, 64)
+#if CONFIG_EXT_PARTITION
+MASK_SUBPIX_VAR_LARGE(64, 128)
+MASK_SUBPIX_VAR_LARGE(128, 64)
+MASK_SUBPIX_VAR_LARGE(128, 128)
+#endif // CONFIG_EXT_PARTITION
#if CONFIG_VP9_HIGHBITDEPTH
typedef int (*highbd_calc_masked_var_t)(__m128i v_sum_d, __m128i v_sse_q,
@@ -1449,9 +1390,9 @@
typedef __m128i (*highbd_filter_fn_t)(__m128i v_a_w, __m128i v_b_w,
__m128i v_filter_w);
-static INLINE __m128i highbd_apply_filter8(const __m128i v_a_w,
- const __m128i v_b_w,
- const __m128i v_filter_w) {
+static INLINE __m128i highbd_apply_filter_avg(const __m128i v_a_w,
+ const __m128i v_b_w,
+ const __m128i v_filter_w) {
(void) v_filter_w;
return _mm_avg_epu16(v_a_w, v_b_w);
}
@@ -1523,55 +1464,53 @@
__m128i v_sse_q,
unsigned int* sse,
const int w, const int h) {
- int sum;
+ int64_t sum64;
+ uint64_t sse64;
// Horizontal sum
- v_sum_d = _mm_hadd_epi32(v_sum_d, v_sum_d);
- v_sum_d = _mm_hadd_epi32(v_sum_d, v_sum_d);
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_srli_si128(v_sse_q, 8));
+ sum64 = hsum_epi32_si32(v_sum_d);
+ sse64 = hsum_epi64_si64(v_sse_q);
+
+ sum64 = (sum64 >= 0) ? sum64 : -sum64;
// Round
- sum = _mm_cvtsi128_si32(v_sum_d);
- sum = (sum >= 0) ? ((sum + 31) >> 6) : -((-sum + 31) >> 6);
- sum = ROUND_POWER_OF_TWO(sum, 2);
+ sum64 = ROUND_POWER_OF_TWO(sum64, 6);
+ sse64 = ROUND_POWER_OF_TWO(sse64, 12);
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_set_epi32(0, 0, 0, 2047));
- v_sse_q = _mm_srli_epi64(v_sse_q, 12);
+ // Normalise
+ sum64 = ROUND_POWER_OF_TWO(sum64, 2);
+ sse64 = ROUND_POWER_OF_TWO(sse64, 4);
// Store the SSE
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_set_epi32(0, 0, 0, 0x8));
- v_sse_q = _mm_srli_epi64(v_sse_q, 4);
- *sse = _mm_cvtsi128_si32(v_sse_q);
-
+ *sse = (unsigned int)sse64;
// Compute the variance
- return *sse - (((int64_t)sum * sum) >> (LOG2_P2(h) + LOG2_P2(w)));
+ return *sse - ((sum64 * sum64) / (w * h));
}
static INLINE int highbd_12_calc_masked_variance(__m128i v_sum_d,
__m128i v_sse_q,
unsigned int* sse,
const int w, const int h) {
- int sum;
+ int64_t sum64;
+ uint64_t sse64;
// Horizontal sum
- v_sum_d = _mm_hadd_epi32(v_sum_d, v_sum_d);
- v_sum_d = _mm_hadd_epi32(v_sum_d, v_sum_d);
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_srli_si128(v_sse_q, 8));
+ sum64 = hsum_epi32_si64(v_sum_d);
+ sse64 = hsum_epi64_si64(v_sse_q);
+
+ sum64 = (sum64 >= 0) ? sum64 : -sum64;
// Round
- sum = _mm_cvtsi128_si32(v_sum_d);
- sum = (sum >= 0) ? ((sum + 31) >> 6) : -((-sum + 31) >> 6);
- sum = ROUND_POWER_OF_TWO(sum, 4);
+ sum64 = ROUND_POWER_OF_TWO(sum64, 6);
+ sse64 = ROUND_POWER_OF_TWO(sse64, 12);
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_set_epi32(0, 0, 0, 2047));
- v_sse_q = _mm_srli_epi64(v_sse_q, 12);
+ // Normalise
+ sum64 = ROUND_POWER_OF_TWO(sum64, 4);
+ sse64 = ROUND_POWER_OF_TWO(sse64, 8);
// Store the SSE
- v_sse_q = _mm_add_epi64(v_sse_q, _mm_set_epi32(0, 0, 0, 0x80));
- v_sse_q = _mm_srli_epi64(v_sse_q, 8);
- *sse = _mm_cvtsi128_si32(v_sse_q);
-
+ *sse = (unsigned int)sse64;
// Compute the variance
- return *sse - (((int64_t)sum * sum) >> (LOG2_P2(h) + LOG2_P2(w)));
+ return *sse - ((sum64 * sum64) / (w * h));
}
@@ -1586,9 +1525,9 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
const __m128i v_filter_w = _mm_set1_epi32((
- vpx_bilinear_filters[yoffset][1] << 16) +
- vpx_bilinear_filters[yoffset][0]);
- assert(yoffset < 8);
+ bilinear_filters_2t[yoffset][1] << 16) +
+ bilinear_filters_2t[yoffset][0]);
+ assert(yoffset < BIL_SUBPEL_SHIFTS);
for (j = 0; j < w; j += 8) {
// Load the first row ready
v_src0_w = _mm_loadu_si128((const __m128i*)(src + j));
@@ -1631,9 +1570,9 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
const __m128i v_filter_w = _mm_set1_epi32((
- vpx_bilinear_filters[xoffset][1] << 16) +
- vpx_bilinear_filters[xoffset][0]);
- assert(xoffset < 8);
+ bilinear_filters_2t[xoffset][1] << 16) +
+ bilinear_filters_2t[xoffset][0]);
+ assert(xoffset < BIL_SUBPEL_SHIFTS);
for (i = 0; i < h; i++) {
for (j = 0; j < w; j += 8) {
// Load this row & apply the filter to them
@@ -1664,13 +1603,13 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
const __m128i v_filterx_w = _mm_set1_epi32((
- vpx_bilinear_filters[xoffset][1] << 16) +
- vpx_bilinear_filters[xoffset][0]);
+ bilinear_filters_2t[xoffset][1] << 16) +
+ bilinear_filters_2t[xoffset][0]);
const __m128i v_filtery_w = _mm_set1_epi32((
- vpx_bilinear_filters[yoffset][1] << 16) +
- vpx_bilinear_filters[yoffset][0]);
- assert(xoffset < 8);
- assert(yoffset < 8);
+ bilinear_filters_2t[yoffset][1] << 16) +
+ bilinear_filters_2t[yoffset][0]);
+ assert(xoffset < BIL_SUBPEL_SHIFTS);
+ assert(yoffset < BIL_SUBPEL_SHIFTS);
for (j = 0; j < w; j += 8) {
// Load the first row ready
v_src0_w = _mm_loadu_si128((const __m128i*)(src + j));
@@ -1724,13 +1663,13 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
__m128i v_filter_w = _mm_set1_epi32((
- vpx_bilinear_filters[yoffset][1] << 16) +
- vpx_bilinear_filters[yoffset][0]);
- assert(yoffset < 8);
+ bilinear_filters_2t[yoffset][1] << 16) +
+ bilinear_filters_2t[yoffset][0]);
+ assert(yoffset < BIL_SUBPEL_SHIFTS);
// Load the first row of src data ready
v_src0_w = _mm_loadl_epi64((const __m128i*)src);
for (i = 0; i < h; i += 2) {
- if (yoffset == 8) {
+ if (yoffset == HALF_PIXEL_OFFSET) {
// Load the rest of the source data for these rows
v_src1_w = _mm_or_si128(
_mm_slli_si128(v_src0_w, 8),
@@ -1776,9 +1715,9 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
__m128i v_filter_w = _mm_set1_epi32((
- vpx_bilinear_filters[xoffset][1] << 16) +
- vpx_bilinear_filters[xoffset][0]);
- assert(xoffset < 8);
+ bilinear_filters_2t[xoffset][1] << 16) +
+ bilinear_filters_2t[xoffset][0]);
+ assert(xoffset < BIL_SUBPEL_SHIFTS);
for (i = 0; i < h; i += 2) {
// Load the src data
v_src0_w = _mm_loadu_si128((const __m128i*)(src));
@@ -1786,7 +1725,7 @@
v_src1_w = _mm_loadu_si128((const __m128i*)(src + src_stride));
v_src1_shift_w = _mm_srli_si128(v_src1_w, 2);
// Apply the x filter
- if (xoffset == 8) {
+ if (xoffset == HALF_PIXEL_OFFSET) {
v_src1_w = _mm_unpacklo_epi64(v_src0_w, v_src1_w);
v_src1_shift_w = _mm_unpacklo_epi64(v_src0_shift_w, v_src1_shift_w);
v_res_w = _mm_avg_epu16(v_src1_w, v_src1_shift_w);
@@ -1826,21 +1765,20 @@
__m128i v_sum_d = _mm_setzero_si128();
__m128i v_sse_q = _mm_setzero_si128();
__m128i v_filterx_w = _mm_set1_epi32((
- vpx_bilinear_filters[xoffset][1] << 16) +
- vpx_bilinear_filters[xoffset][0]);
+ bilinear_filters_2t[xoffset][1] << 16) +
+ bilinear_filters_2t[xoffset][0]);
__m128i v_filtery_w = _mm_set1_epi32((
- vpx_bilinear_filters[yoffset][1] << 16) +
- vpx_bilinear_filters[yoffset][0]);
- assert(xoffset < 8);
- assert(yoffset < 8);
-
+ bilinear_filters_2t[yoffset][1] << 16) +
+ bilinear_filters_2t[yoffset][0]);
+ assert(xoffset < BIL_SUBPEL_SHIFTS);
+ assert(yoffset < BIL_SUBPEL_SHIFTS);
// Load the first block of src data
v_src0_w = _mm_loadu_si128((const __m128i*)(src));
v_src0_shift_w = _mm_srli_si128(v_src0_w, 2);
v_src1_w = _mm_loadu_si128((const __m128i*)(src + src_stride));
v_src1_shift_w = _mm_srli_si128(v_src1_w, 2);
// Apply the x filter
- if (xoffset == 8) {
+ if (xoffset == HALF_PIXEL_OFFSET) {
v_src1_w = _mm_unpacklo_epi64(v_src0_w, v_src1_w);
v_src1_shift_w = _mm_unpacklo_epi64(v_src0_shift_w, v_src1_shift_w);
v_xres0_w = _mm_avg_epu16(v_src1_w, v_src1_shift_w);
@@ -1858,7 +1796,7 @@
v_src1_w = _mm_loadu_si128((const __m128i*)(src + src_stride * 3));
v_src1_shift_w = _mm_srli_si128(v_src1_w, 2);
// Apply the x filter
- if (xoffset == 8) {
+ if (xoffset == HALF_PIXEL_OFFSET) {
v_src1_w = _mm_unpacklo_epi64(v_src0_w, v_src1_w);
v_src1_shift_w = _mm_unpacklo_epi64(v_src0_shift_w, v_src1_shift_w);
v_xres1_w = _mm_avg_epu16(v_src1_w, v_src1_shift_w);
@@ -1872,7 +1810,7 @@
// Apply the y filter to the previous block
v_temp_w = _mm_or_si128(_mm_srli_si128(v_xres0_w, 8),
_mm_slli_si128(v_xres1_w, 8));
- if (yoffset == 8) {
+ if (yoffset == HALF_PIXEL_OFFSET) {
v_res_w = _mm_avg_epu16(v_xres0_w, v_temp_w);
} else {
v_res_w = highbd_apply_filter(v_xres0_w, v_temp_w, v_filtery_w);
@@ -1894,7 +1832,7 @@
v_src1_w = _mm_loadu_si128((const __m128i*)(src + src_stride * 5));
v_src1_shift_w = _mm_srli_si128(v_src1_w, 2);
// Apply the x filter
- if (xoffset == 8) {
+ if (xoffset == HALF_PIXEL_OFFSET) {
v_src1_w = _mm_unpacklo_epi64(v_src0_w, v_src1_w);
v_src1_shift_w = _mm_unpacklo_epi64(v_src0_shift_w, v_src1_shift_w);
v_xres0_w = _mm_avg_epu16(v_src1_w, v_src1_shift_w);
@@ -1908,7 +1846,7 @@
// Apply the y filter to the previous block
v_temp_w = _mm_or_si128(_mm_srli_si128(v_xres1_w, 8),
_mm_slli_si128(v_xres0_w, 8));
- if (yoffset == 8) {
+ if (yoffset == HALF_PIXEL_OFFSET) {
v_res_w = _mm_avg_epu16(v_xres1_w, v_temp_w);
} else {
v_res_w = highbd_apply_filter(v_xres1_w, v_temp_w, v_filtery_w);
@@ -1948,55 +1886,61 @@
if (yoffset == 0) \
return full_variance_function(src8, src_stride, dst8, dst_stride, \
msk, msk_stride, sse); \
- else if (yoffset == 8) \
- return vpx_highbd_masked_subpel_varWxH_xzero(src, src_stride, 8, \
+ else if (yoffset == HALF_PIXEL_OFFSET) \
+ return vpx_highbd_masked_subpel_varWxH_xzero(src, src_stride, \
+ HALF_PIXEL_OFFSET, \
dst, dst_stride, \
msk, msk_stride, \
sse, W, H, \
- highbd_apply_filter8, \
+ highbd_apply_filter_avg, \
calc_var); \
else \
- return vpx_highbd_masked_subpel_varWxH_xzero(src, src_stride, yoffset, \
+ return vpx_highbd_masked_subpel_varWxH_xzero(src, src_stride, \
+ yoffset, \
dst, dst_stride, \
msk, msk_stride, \
sse, W, H, \
highbd_apply_filter, \
calc_var); \
} else if (yoffset == 0) { \
- if (xoffset == 8) \
- return vpx_highbd_masked_subpel_varWxH_yzero(src, src_stride, 8, \
+ if (xoffset == HALF_PIXEL_OFFSET) \
+ return vpx_highbd_masked_subpel_varWxH_yzero(src, src_stride, \
+ HALF_PIXEL_OFFSET, \
dst, dst_stride, \
msk, msk_stride, \
sse, W, H, \
- highbd_apply_filter8, \
+ highbd_apply_filter_avg, \
calc_var); \
else \
- return vpx_highbd_masked_subpel_varWxH_yzero(src, src_stride, xoffset, \
+ return vpx_highbd_masked_subpel_varWxH_yzero(src, src_stride, \
+ xoffset, \
dst, dst_stride, \
msk, msk_stride, \
sse, W, H, \
highbd_apply_filter, \
calc_var); \
- } else if (xoffset == 8) { \
- if (yoffset == 8) \
+ } else if (xoffset == HALF_PIXEL_OFFSET) { \
+ if (yoffset == HALF_PIXEL_OFFSET) \
return vpx_highbd_masked_subpel_varWxH_xnonzero_ynonzero( \
- src, src_stride, 8, 8, dst, dst_stride, msk, msk_stride, \
- sse, W, H, highbd_apply_filter8, highbd_apply_filter8, calc_var);\
+ src, src_stride, HALF_PIXEL_OFFSET, HALF_PIXEL_OFFSET, \
+ dst, dst_stride, msk, msk_stride, sse, W, H, \
+ highbd_apply_filter_avg, highbd_apply_filter_avg, calc_var); \
else \
return vpx_highbd_masked_subpel_varWxH_xnonzero_ynonzero( \
- src, src_stride, 8, yoffset, dst, dst_stride, \
- msk, msk_stride, sse, W, H, highbd_apply_filter8, \
+ src, src_stride, HALF_PIXEL_OFFSET, yoffset, dst, dst_stride, \
+ msk, msk_stride, sse, W, H, highbd_apply_filter_avg, \
highbd_apply_filter, calc_var); \
} else { \
- if (yoffset == 8) \
+ if (yoffset == HALF_PIXEL_OFFSET) \
return vpx_highbd_masked_subpel_varWxH_xnonzero_ynonzero( \
- src, src_stride, xoffset, 8, dst, dst_stride, msk, msk_stride, \
- sse, W, H, highbd_apply_filter, highbd_apply_filter8, calc_var); \
+ src, src_stride, xoffset, HALF_PIXEL_OFFSET, \
+ dst, dst_stride, msk, msk_stride, sse, W, H, \
+ highbd_apply_filter, highbd_apply_filter_avg, calc_var); \
else \
return vpx_highbd_masked_subpel_varWxH_xnonzero_ynonzero( \
- src, src_stride, xoffset, yoffset, dst, dst_stride, \
- msk, msk_stride, sse, W, H, highbd_apply_filter, \
- highbd_apply_filter, calc_var); \
+ src, src_stride, xoffset, yoffset, \
+ dst, dst_stride, msk, msk_stride, sse, W, H, \
+ highbd_apply_filter, highbd_apply_filter, calc_var); \
} \
}
@@ -2093,4 +2037,12 @@
HIGHBD_MASK_SUBPIX_VAR_WRAPPERS(64, 32)
HIGHBD_MASK_SUBPIX_VAR_LARGE(64, 64)
HIGHBD_MASK_SUBPIX_VAR_WRAPPERS(64, 64)
+#if CONFIG_EXT_PARTITION
+HIGHBD_MASK_SUBPIX_VAR_LARGE(64, 128)
+HIGHBD_MASK_SUBPIX_VAR_WRAPPERS(64, 128)
+HIGHBD_MASK_SUBPIX_VAR_LARGE(128, 64)
+HIGHBD_MASK_SUBPIX_VAR_WRAPPERS(128, 64)
+HIGHBD_MASK_SUBPIX_VAR_LARGE(128, 128)
+HIGHBD_MASK_SUBPIX_VAR_WRAPPERS(128, 128)
+#endif // CONFIG_EXT_PARTITION
#endif