Move shared SAD code to vpx_dsp
Create a new component, vpx_dsp, for code that can be shared
between codecs. Move the SAD code into the component.
This reduces the size of vpxenc/dec by 36k on x86_64 builds.
Change-Id: I73f837ddaecac6b350bf757af0cfe19c4ab9327a
diff --git a/libs.mk b/libs.mk
index 3046e1b..6eee003 100644
--- a/libs.mk
+++ b/libs.mk
@@ -54,6 +54,9 @@
include $(SRC_PATH_BARE)/vpx_ports/vpx_ports.mk
CODEC_SRCS-yes += $(addprefix vpx_ports/,$(call enabled,PORTS_SRCS))
+include $(SRC_PATH_BARE)/vpx_dsp/vpx_dsp.mk
+CODEC_SRCS-yes += $(addprefix vpx_dsp/,$(call enabled,DSP_SRCS))
+
ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)
VP8_PREFIX=vp8/
include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk
diff --git a/test/sad_test.cc b/test/sad_test.cc
index 65e9561..6c28edb 100644
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -14,14 +14,25 @@
#include <stdio.h>
#include "./vpx_config.h"
-#if CONFIG_VP8_ENCODER
-#include "./vp8_rtcd.h"
-#endif
-#if CONFIG_VP9_ENCODER
-#include "./vp9_rtcd.h"
-#endif
+#include "./vpx_dsp_rtcd.h"
#include "vpx_mem/vpx_mem.h"
+/* Needed for ROUND_POWER_OF_TWO and CONVERT_TO* macros, both of which should be
+ * moved to a more generic location. Alternatively the *avg functions could be
+ * restricted to VP9 builds, but it would be better to avoid that sort of
+ * specificity.
+ * TODO(johannkoenig): move these macros to a common location.
+ */
+#if CONFIG_VP9_HIGHBITDEPTH
+#include "vp9/common/vp9_common.h"
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#ifndef ROUND_POWER_OF_TWO
+#define ROUND_POWER_OF_TWO(value, n) \
+ (((value) + (1 << ((n) - 1))) >> (n))
+#endif // ROUND_POWER_OF_TWO
+
+
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
@@ -30,27 +41,18 @@
#include "vpx/vpx_codec.h"
-#if CONFIG_VP8_ENCODER
-typedef unsigned int (*SadMxNFunc)(const unsigned char *source_ptr,
- int source_stride,
- const unsigned char *reference_ptr,
- int reference_stride,
- unsigned int max_sad);
+typedef unsigned int (*SadMxNFunc)(const uint8_t *src_ptr,
+ int src_stride,
+ const uint8_t *ref_ptr,
+ int ref_stride);
typedef std::tr1::tuple<int, int, SadMxNFunc, int> SadMxNParam;
-#endif
-#if CONFIG_VP9_ENCODER
-typedef unsigned int (*SadMxNVp9Func)(const unsigned char *source_ptr,
- int source_stride,
- const unsigned char *reference_ptr,
- int reference_stride);
-typedef std::tr1::tuple<int, int, SadMxNVp9Func, int> SadMxNVp9Param;
-typedef uint32_t (*SadMxNAvgVp9Func)(const uint8_t *source_ptr,
- int source_stride,
- const uint8_t *reference_ptr,
- int reference_stride,
- const uint8_t *second_pred);
-typedef std::tr1::tuple<int, int, SadMxNAvgVp9Func, int> SadMxNAvgVp9Param;
-#endif
+
+typedef uint32_t (*SadMxNAvgFunc)(const uint8_t *src_ptr,
+ int src_stride,
+ const uint8_t *ref_ptr,
+ int ref_stride,
+ const uint8_t *second_pred);
+typedef std::tr1::tuple<int, int, SadMxNAvgFunc, int> SadMxNAvgParam;
typedef void (*SadMxNx4Func)(const uint8_t *src_ptr,
int src_stride,
@@ -68,7 +70,6 @@
width_(width), height_(height), bd_(bit_depth) {}
static void SetUpTestCase() {
-#if CONFIG_VP9_HIGHBITDEPTH
source_data8_ = reinterpret_cast<uint8_t*>(
vpx_memalign(kDataAlignment, kDataBlockSize));
reference_data8_ = reinterpret_cast<uint8_t*>(
@@ -81,18 +82,9 @@
vpx_memalign(kDataAlignment, kDataBufferSize*sizeof(uint16_t)));
second_pred16_ = reinterpret_cast<uint16_t*>(
vpx_memalign(kDataAlignment, 64*64*sizeof(uint16_t)));
-#else
- source_data_ = reinterpret_cast<uint8_t*>(
- vpx_memalign(kDataAlignment, kDataBlockSize));
- reference_data_ = reinterpret_cast<uint8_t*>(
- vpx_memalign(kDataAlignment, kDataBufferSize));
- second_pred_ = reinterpret_cast<uint8_t*>(
- vpx_memalign(kDataAlignment, 64*64));
-#endif
}
static void TearDownTestCase() {
-#if CONFIG_VP9_HIGHBITDEPTH
vpx_free(source_data8_);
source_data8_ = NULL;
vpx_free(reference_data8_);
@@ -105,14 +97,6 @@
reference_data16_ = NULL;
vpx_free(second_pred16_);
second_pred16_ = NULL;
-#else
- vpx_free(source_data_);
- source_data_ = NULL;
- vpx_free(reference_data_);
- reference_data_ = NULL;
- vpx_free(second_pred_);
- second_pred_ = NULL;
-#endif
}
virtual void TearDown() {
@@ -126,23 +110,21 @@
static const int kDataBufferSize = 4 * kDataBlockSize;
virtual void SetUp() {
-#if CONFIG_VP9_HIGHBITDEPTH
if (bd_ == -1) {
use_high_bit_depth_ = false;
bit_depth_ = VPX_BITS_8;
source_data_ = source_data8_;
reference_data_ = reference_data8_;
second_pred_ = second_pred8_;
+#if CONFIG_VP9_HIGHBITDEPTH
} else {
use_high_bit_depth_ = true;
bit_depth_ = static_cast<vpx_bit_depth_t>(bd_);
source_data_ = CONVERT_TO_BYTEPTR(source_data16_);
reference_data_ = CONVERT_TO_BYTEPTR(reference_data16_);
second_pred_ = CONVERT_TO_BYTEPTR(second_pred16_);
+#endif // CONFIG_VP9_HIGHBITDEPTH
}
-#else
- bit_depth_ = VPX_BITS_8;
-#endif
mask_ = (1 << bit_depth_) - 1;
source_stride_ = (width_ + 31) & ~31;
reference_stride_ = width_ * 2;
@@ -151,51 +133,35 @@
virtual uint8_t *GetReference(int block_idx) {
#if CONFIG_VP9_HIGHBITDEPTH
- if (!use_high_bit_depth_) {
- return reference_data_ + block_idx * kDataBlockSize;
- } else {
+ if (use_high_bit_depth_)
return CONVERT_TO_BYTEPTR(CONVERT_TO_SHORTPTR(reference_data_) +
block_idx * kDataBlockSize);
- }
-#else
+#endif // CONFIG_VP9_HIGHBITDEPTH
return reference_data_ + block_idx * kDataBlockSize;
-#endif
}
// Sum of Absolute Differences. Given two blocks, calculate the absolute
// difference between two pixels in the same relative location; accumulate.
- unsigned int ReferenceSAD(unsigned int max_sad, int block_idx) {
+ unsigned int ReferenceSAD(int block_idx) {
unsigned int sad = 0;
-#if CONFIG_VP9_HIGHBITDEPTH
const uint8_t *const reference8 = GetReference(block_idx);
const uint8_t *const source8 = source_data_;
+#if CONFIG_VP9_HIGHBITDEPTH
const uint16_t *const reference16 =
CONVERT_TO_SHORTPTR(GetReference(block_idx));
const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_);
-#else
- const uint8_t *const reference = GetReference(block_idx);
- const uint8_t *const source = source_data_;
-#endif
+#endif // CONFIG_VP9_HIGHBITDEPTH
for (int h = 0; h < height_; ++h) {
for (int w = 0; w < width_; ++w) {
-#if CONFIG_VP9_HIGHBITDEPTH
if (!use_high_bit_depth_) {
- sad +=
- abs(source8[h * source_stride_ + w] -
- reference8[h * reference_stride_ + w]);
+ sad += abs(source8[h * source_stride_ + w] -
+ reference8[h * reference_stride_ + w]);
+#if CONFIG_VP9_HIGHBITDEPTH
} else {
- sad +=
- abs(source16[h * source_stride_ + w] -
- reference16[h * reference_stride_ + w]);
+ sad += abs(source16[h * source_stride_ + w] -
+ reference16[h * reference_stride_ + w]);
+#endif // CONFIG_VP9_HIGHBITDEPTH
}
-#else
- sad +=
- abs(source[h * source_stride_ + w] -
- reference[h * reference_stride_ + w]);
-#endif
- }
- if (sad > max_sad) {
- break;
}
}
return sad;
@@ -204,85 +170,69 @@
// Sum of Absolute Differences Average. Given two blocks, and a prediction
// calculate the absolute difference between one pixel and average of the
// corresponding and predicted pixels; accumulate.
- unsigned int ReferenceSADavg(unsigned int max_sad, int block_idx) {
+ unsigned int ReferenceSADavg(int block_idx) {
unsigned int sad = 0;
+ const uint8_t *const reference8 = GetReference(block_idx);
+ const uint8_t *const source8 = source_data_;
+ const uint8_t *const second_pred8 = second_pred_;
#if CONFIG_VP9_HIGHBITDEPTH
- const uint8_t *const reference8 = GetReference(block_idx);
- const uint8_t *const source8 = source_data_;
- const uint8_t *const second_pred8 = second_pred_;
- const uint16_t *const reference16 =
- CONVERT_TO_SHORTPTR(GetReference(block_idx));
- const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_);
- const uint16_t *const second_pred16 = CONVERT_TO_SHORTPTR(second_pred_);
-#else
- const uint8_t *const reference = GetReference(block_idx);
- const uint8_t *const source = source_data_;
- const uint8_t *const second_pred = second_pred_;
-#endif
+ const uint16_t *const reference16 =
+ CONVERT_TO_SHORTPTR(GetReference(block_idx));
+ const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_);
+ const uint16_t *const second_pred16 = CONVERT_TO_SHORTPTR(second_pred_);
+#endif // CONFIG_VP9_HIGHBITDEPTH
for (int h = 0; h < height_; ++h) {
for (int w = 0; w < width_; ++w) {
-#if CONFIG_VP9_HIGHBITDEPTH
if (!use_high_bit_depth_) {
const int tmp = second_pred8[h * width_ + w] +
reference8[h * reference_stride_ + w];
const uint8_t comp_pred = ROUND_POWER_OF_TWO(tmp, 1);
sad += abs(source8[h * source_stride_ + w] - comp_pred);
+#if CONFIG_VP9_HIGHBITDEPTH
} else {
const int tmp = second_pred16[h * width_ + w] +
reference16[h * reference_stride_ + w];
const uint16_t comp_pred = ROUND_POWER_OF_TWO(tmp, 1);
sad += abs(source16[h * source_stride_ + w] - comp_pred);
+#endif // CONFIG_VP9_HIGHBITDEPTH
}
-#else
- const int tmp = second_pred[h * width_ + w] +
- reference[h * reference_stride_ + w];
- const uint8_t comp_pred = (tmp + 1) >> 1;
- sad += abs(source[h * source_stride_ + w] - comp_pred);
-#endif
- }
- if (sad > max_sad) {
- break;
}
}
return sad;
}
void FillConstant(uint8_t *data, int stride, uint16_t fill_constant) {
-#if CONFIG_VP9_HIGHBITDEPTH
uint8_t *data8 = data;
+#if CONFIG_VP9_HIGHBITDEPTH
uint16_t *data16 = CONVERT_TO_SHORTPTR(data);
-#endif
+#endif // CONFIG_VP9_HIGHBITDEPTH
for (int h = 0; h < height_; ++h) {
for (int w = 0; w < width_; ++w) {
-#if CONFIG_VP9_HIGHBITDEPTH
if (!use_high_bit_depth_) {
data8[h * stride + w] = static_cast<uint8_t>(fill_constant);
+#if CONFIG_VP9_HIGHBITDEPTH
} else {
data16[h * stride + w] = fill_constant;
+#endif // CONFIG_VP9_HIGHBITDEPTH
}
-#else
- data[h * stride + w] = static_cast<uint8_t>(fill_constant);
-#endif
}
}
}
void FillRandom(uint8_t *data, int stride) {
-#if CONFIG_VP9_HIGHBITDEPTH
uint8_t *data8 = data;
+#if CONFIG_VP9_HIGHBITDEPTH
uint16_t *data16 = CONVERT_TO_SHORTPTR(data);
-#endif
+#endif // CONFIG_VP9_HIGHBITDEPTH
for (int h = 0; h < height_; ++h) {
for (int w = 0; w < width_; ++w) {
-#if CONFIG_VP9_HIGHBITDEPTH
if (!use_high_bit_depth_) {
data8[h * stride + w] = rnd_.Rand8();
+#if CONFIG_VP9_HIGHBITDEPTH
} else {
data16[h * stride + w] = rnd_.Rand16() & mask_;
+#endif // CONFIG_VP9_HIGHBITDEPTH
}
-#else
- data[h * stride + w] = rnd_.Rand8();
-#endif
}
}
}
@@ -293,7 +243,6 @@
static uint8_t *reference_data_;
static uint8_t *second_pred_;
int source_stride_;
-#if CONFIG_VP9_HIGHBITDEPTH
bool use_high_bit_depth_;
static uint8_t *source_data8_;
static uint8_t *reference_data8_;
@@ -301,7 +250,6 @@
static uint16_t *source_data16_;
static uint16_t *reference_data16_;
static uint16_t *second_pred16_;
-#endif
int reference_stride_;
ACMRandom rnd_;
@@ -315,11 +263,11 @@
protected:
void SADs(unsigned int *results) {
- const uint8_t *refs[] = {GetReference(0), GetReference(1),
- GetReference(2), GetReference(3)};
+ const uint8_t *references[] = {GetReference(0), GetReference(1),
+ GetReference(2), GetReference(3)};
ASM_REGISTER_STATE_CHECK(GET_PARAM(2)(source_data_, source_stride_,
- refs, reference_stride_,
+ references, reference_stride_,
results));
}
@@ -328,14 +276,13 @@
SADs(exp_sad);
for (int block = 0; block < 4; ++block) {
- reference_sad = ReferenceSAD(UINT_MAX, block);
+ reference_sad = ReferenceSAD(block);
EXPECT_EQ(reference_sad, exp_sad[block]) << "block " << block;
}
}
};
-#if CONFIG_VP8_ENCODER
class SADTest
: public SADTestBase,
public ::testing::WithParamInterface<SadMxNParam> {
@@ -343,38 +290,6 @@
SADTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
protected:
- unsigned int SAD(unsigned int max_sad, int block_idx) {
- unsigned int ret;
- const uint8_t *const reference = GetReference(block_idx);
-
- ASM_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
- reference, reference_stride_,
- max_sad));
- return ret;
- }
-
- void CheckSAD(unsigned int max_sad) {
- const unsigned int reference_sad = ReferenceSAD(max_sad, 0);
- const unsigned int exp_sad = SAD(max_sad, 0);
-
- if (reference_sad <= max_sad) {
- ASSERT_EQ(exp_sad, reference_sad);
- } else {
- // Alternative implementations are not required to check max_sad
- ASSERT_GE(exp_sad, reference_sad);
- }
- }
-};
-#endif // CONFIG_VP8_ENCODER
-
-#if CONFIG_VP9_ENCODER
-class SADVP9Test
- : public SADTestBase,
- public ::testing::WithParamInterface<SadMxNVp9Param> {
- public:
- SADVP9Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
-
- protected:
unsigned int SAD(int block_idx) {
unsigned int ret;
const uint8_t *const reference = GetReference(block_idx);
@@ -385,18 +300,18 @@
}
void CheckSAD() {
- const unsigned int reference_sad = ReferenceSAD(UINT_MAX, 0);
+ const unsigned int reference_sad = ReferenceSAD(0);
const unsigned int exp_sad = SAD(0);
ASSERT_EQ(reference_sad, exp_sad);
}
};
-class SADavgVP9Test
+class SADavgTest
: public SADTestBase,
- public ::testing::WithParamInterface<SadMxNAvgVp9Param> {
+ public ::testing::WithParamInterface<SadMxNAvgParam> {
public:
- SADavgVP9Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
+ SADavgTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
protected:
unsigned int SAD_avg(int block_idx) {
@@ -410,45 +325,41 @@
}
void CheckSAD() {
- const unsigned int reference_sad = ReferenceSADavg(UINT_MAX, 0);
+ const unsigned int reference_sad = ReferenceSADavg(0);
const unsigned int exp_sad = SAD_avg(0);
ASSERT_EQ(reference_sad, exp_sad);
}
};
-#endif // CONFIG_VP9_ENCODER
uint8_t *SADTestBase::source_data_ = NULL;
uint8_t *SADTestBase::reference_data_ = NULL;
uint8_t *SADTestBase::second_pred_ = NULL;
-#if CONFIG_VP9_ENCODER && CONFIG_VP9_HIGHBITDEPTH
uint8_t *SADTestBase::source_data8_ = NULL;
uint8_t *SADTestBase::reference_data8_ = NULL;
uint8_t *SADTestBase::second_pred8_ = NULL;
uint16_t *SADTestBase::source_data16_ = NULL;
uint16_t *SADTestBase::reference_data16_ = NULL;
uint16_t *SADTestBase::second_pred16_ = NULL;
-#endif
-#if CONFIG_VP8_ENCODER
TEST_P(SADTest, MaxRef) {
FillConstant(source_data_, source_stride_, 0);
FillConstant(reference_data_, reference_stride_, mask_);
- CheckSAD(UINT_MAX);
+ CheckSAD();
}
TEST_P(SADTest, MaxSrc) {
FillConstant(source_data_, source_stride_, mask_);
FillConstant(reference_data_, reference_stride_, 0);
- CheckSAD(UINT_MAX);
+ CheckSAD();
}
TEST_P(SADTest, ShortRef) {
- int tmp_stride = reference_stride_;
+ const int tmp_stride = reference_stride_;
reference_stride_ >>= 1;
FillRandom(source_data_, source_stride_);
FillRandom(reference_data_, reference_stride_);
- CheckSAD(UINT_MAX);
+ CheckSAD();
reference_stride_ = tmp_stride;
}
@@ -459,7 +370,7 @@
reference_stride_ -= 1;
FillRandom(source_data_, source_stride_);
FillRandom(reference_data_, reference_stride_);
- CheckSAD(UINT_MAX);
+ CheckSAD();
reference_stride_ = tmp_stride;
}
@@ -468,75 +379,24 @@
source_stride_ >>= 1;
FillRandom(source_data_, source_stride_);
FillRandom(reference_data_, reference_stride_);
- CheckSAD(UINT_MAX);
- source_stride_ = tmp_stride;
-}
-
-TEST_P(SADTest, MaxSAD) {
- // Verify that, when max_sad is set, the implementation does not return a
- // value lower than the reference.
- FillConstant(source_data_, source_stride_, mask_);
- FillConstant(reference_data_, reference_stride_, 0);
- CheckSAD(128);
-}
-#endif // CONFIG_VP8_ENCODER
-
-#if CONFIG_VP9_ENCODER
-TEST_P(SADVP9Test, MaxRef) {
- FillConstant(source_data_, source_stride_, 0);
- FillConstant(reference_data_, reference_stride_, mask_);
- CheckSAD();
-}
-
-TEST_P(SADVP9Test, MaxSrc) {
- FillConstant(source_data_, source_stride_, mask_);
- FillConstant(reference_data_, reference_stride_, 0);
- CheckSAD();
-}
-
-TEST_P(SADVP9Test, ShortRef) {
- const int tmp_stride = reference_stride_;
- reference_stride_ >>= 1;
- FillRandom(source_data_, source_stride_);
- FillRandom(reference_data_, reference_stride_);
- CheckSAD();
- reference_stride_ = tmp_stride;
-}
-
-TEST_P(SADVP9Test, UnalignedRef) {
- // The reference frame, but not the source frame, may be unaligned for
- // certain types of searches.
- const int tmp_stride = reference_stride_;
- reference_stride_ -= 1;
- FillRandom(source_data_, source_stride_);
- FillRandom(reference_data_, reference_stride_);
- CheckSAD();
- reference_stride_ = tmp_stride;
-}
-
-TEST_P(SADVP9Test, ShortSrc) {
- const int tmp_stride = source_stride_;
- source_stride_ >>= 1;
- FillRandom(source_data_, source_stride_);
- FillRandom(reference_data_, reference_stride_);
CheckSAD();
source_stride_ = tmp_stride;
}
-TEST_P(SADavgVP9Test, MaxRef) {
+TEST_P(SADavgTest, MaxRef) {
FillConstant(source_data_, source_stride_, 0);
FillConstant(reference_data_, reference_stride_, mask_);
FillConstant(second_pred_, width_, 0);
CheckSAD();
}
-TEST_P(SADavgVP9Test, MaxSrc) {
+TEST_P(SADavgTest, MaxSrc) {
FillConstant(source_data_, source_stride_, mask_);
FillConstant(reference_data_, reference_stride_, 0);
FillConstant(second_pred_, width_, 0);
CheckSAD();
}
-TEST_P(SADavgVP9Test, ShortRef) {
+TEST_P(SADavgTest, ShortRef) {
const int tmp_stride = reference_stride_;
reference_stride_ >>= 1;
FillRandom(source_data_, source_stride_);
@@ -546,7 +406,7 @@
reference_stride_ = tmp_stride;
}
-TEST_P(SADavgVP9Test, UnalignedRef) {
+TEST_P(SADavgTest, UnalignedRef) {
// The reference frame, but not the source frame, may be unaligned for
// certain types of searches.
const int tmp_stride = reference_stride_;
@@ -558,7 +418,7 @@
reference_stride_ = tmp_stride;
}
-TEST_P(SADavgVP9Test, ShortSrc) {
+TEST_P(SADavgTest, ShortSrc) {
const int tmp_stride = source_stride_;
source_stride_ >>= 1;
FillRandom(source_data_, source_stride_);
@@ -567,7 +427,6 @@
CheckSAD();
source_stride_ = tmp_stride;
}
-#endif // CONFIG_VP9_ENCODER
TEST_P(SADx4Test, MaxRef) {
FillConstant(source_data_, source_stride_, 0);
@@ -641,617 +500,633 @@
//------------------------------------------------------------------------------
// C functions
-#if CONFIG_VP8_ENCODER
-const SadMxNFunc sad_16x16_c = vp8_sad16x16_c;
-const SadMxNFunc sad_8x16_c = vp8_sad8x16_c;
-const SadMxNFunc sad_16x8_c = vp8_sad16x8_c;
-const SadMxNFunc sad_8x8_c = vp8_sad8x8_c;
-const SadMxNFunc sad_4x4_c = vp8_sad4x4_c;
+const SadMxNFunc sad64x64_c = vpx_sad64x64_c;
+const SadMxNFunc sad64x32_c = vpx_sad64x32_c;
+const SadMxNFunc sad32x64_c = vpx_sad32x64_c;
+const SadMxNFunc sad32x32_c = vpx_sad32x32_c;
+const SadMxNFunc sad32x16_c = vpx_sad32x16_c;
+const SadMxNFunc sad16x32_c = vpx_sad16x32_c;
+const SadMxNFunc sad16x16_c = vpx_sad16x16_c;
+const SadMxNFunc sad16x8_c = vpx_sad16x8_c;
+const SadMxNFunc sad8x16_c = vpx_sad8x16_c;
+const SadMxNFunc sad8x8_c = vpx_sad8x8_c;
+const SadMxNFunc sad8x4_c = vpx_sad8x4_c;
+const SadMxNFunc sad4x8_c = vpx_sad4x8_c;
+const SadMxNFunc sad4x4_c = vpx_sad4x4_c;
+#if CONFIG_VP9_HIGHBITDEPTH
+const SadMxNFunc highbd_sad64x64_c = vpx_highbd_sad64x64_c;
+const SadMxNFunc highbd_sad64x32_c = vpx_highbd_sad64x32_c;
+const SadMxNFunc highbd_sad32x64_c = vpx_highbd_sad32x64_c;
+const SadMxNFunc highbd_sad32x32_c = vpx_highbd_sad32x32_c;
+const SadMxNFunc highbd_sad32x16_c = vpx_highbd_sad32x16_c;
+const SadMxNFunc highbd_sad16x32_c = vpx_highbd_sad16x32_c;
+const SadMxNFunc highbd_sad16x16_c = vpx_highbd_sad16x16_c;
+const SadMxNFunc highbd_sad16x8_c = vpx_highbd_sad16x8_c;
+const SadMxNFunc highbd_sad8x16_c = vpx_highbd_sad8x16_c;
+const SadMxNFunc highbd_sad8x8_c = vpx_highbd_sad8x8_c;
+const SadMxNFunc highbd_sad8x4_c = vpx_highbd_sad8x4_c;
+const SadMxNFunc highbd_sad4x8_c = vpx_highbd_sad4x8_c;
+const SadMxNFunc highbd_sad4x4_c = vpx_highbd_sad4x4_c;
+#endif // CONFIG_VP9_HIGHBITDEPTH
const SadMxNParam c_tests[] = {
- make_tuple(16, 16, sad_16x16_c, -1),
- make_tuple(8, 16, sad_8x16_c, -1),
- make_tuple(16, 8, sad_16x8_c, -1),
- make_tuple(8, 8, sad_8x8_c, -1),
- make_tuple(4, 4, sad_4x4_c, -1),
+ make_tuple(64, 64, sad64x64_c, -1),
+ make_tuple(64, 32, sad64x32_c, -1),
+ make_tuple(32, 64, sad32x64_c, -1),
+ make_tuple(32, 32, sad32x32_c, -1),
+ make_tuple(32, 16, sad32x16_c, -1),
+ make_tuple(16, 32, sad16x32_c, -1),
+ make_tuple(16, 16, sad16x16_c, -1),
+ make_tuple(16, 8, sad16x8_c, -1),
+ make_tuple(8, 16, sad8x16_c, -1),
+ make_tuple(8, 8, sad8x8_c, -1),
+ make_tuple(8, 4, sad8x4_c, -1),
+ make_tuple(4, 8, sad4x8_c, -1),
+ make_tuple(4, 4, sad4x4_c, -1),
+#if CONFIG_VP9_HIGHBITDEPTH
+ make_tuple(64, 64, highbd_sad64x64_c, 8),
+ make_tuple(64, 32, highbd_sad64x32_c, 8),
+ make_tuple(32, 64, highbd_sad32x64_c, 8),
+ make_tuple(32, 32, highbd_sad32x32_c, 8),
+ make_tuple(32, 16, highbd_sad32x16_c, 8),
+ make_tuple(16, 32, highbd_sad16x32_c, 8),
+ make_tuple(16, 16, highbd_sad16x16_c, 8),
+ make_tuple(16, 8, highbd_sad16x8_c, 8),
+ make_tuple(8, 16, highbd_sad8x16_c, 8),
+ make_tuple(8, 8, highbd_sad8x8_c, 8),
+ make_tuple(8, 4, highbd_sad8x4_c, 8),
+ make_tuple(4, 8, highbd_sad4x8_c, 8),
+ make_tuple(4, 4, highbd_sad4x4_c, 8),
+ make_tuple(64, 64, highbd_sad64x64_c, 10),
+ make_tuple(64, 32, highbd_sad64x32_c, 10),
+ make_tuple(32, 64, highbd_sad32x64_c, 10),
+ make_tuple(32, 32, highbd_sad32x32_c, 10),
+ make_tuple(32, 16, highbd_sad32x16_c, 10),
+ make_tuple(16, 32, highbd_sad16x32_c, 10),
+ make_tuple(16, 16, highbd_sad16x16_c, 10),
+ make_tuple(16, 8, highbd_sad16x8_c, 10),
+ make_tuple(8, 16, highbd_sad8x16_c, 10),
+ make_tuple(8, 8, highbd_sad8x8_c, 10),
+ make_tuple(8, 4, highbd_sad8x4_c, 10),
+ make_tuple(4, 8, highbd_sad4x8_c, 10),
+ make_tuple(4, 4, highbd_sad4x4_c, 10),
+ make_tuple(64, 64, highbd_sad64x64_c, 12),
+ make_tuple(64, 32, highbd_sad64x32_c, 12),
+ make_tuple(32, 64, highbd_sad32x64_c, 12),
+ make_tuple(32, 32, highbd_sad32x32_c, 12),
+ make_tuple(32, 16, highbd_sad32x16_c, 12),
+ make_tuple(16, 32, highbd_sad16x32_c, 12),
+ make_tuple(16, 16, highbd_sad16x16_c, 12),
+ make_tuple(16, 8, highbd_sad16x8_c, 12),
+ make_tuple(8, 16, highbd_sad8x16_c, 12),
+ make_tuple(8, 8, highbd_sad8x8_c, 12),
+ make_tuple(8, 4, highbd_sad8x4_c, 12),
+ make_tuple(4, 8, highbd_sad4x8_c, 12),
+ make_tuple(4, 4, highbd_sad4x4_c, 12),
+#endif // CONFIG_VP9_HIGHBITDEPTH
};
INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::ValuesIn(c_tests));
-#endif // CONFIG_VP8_ENCODER
-#if CONFIG_VP9_ENCODER
-const SadMxNVp9Func sad_64x64_c_vp9 = vp9_sad64x64_c;
-const SadMxNVp9Func sad_32x32_c_vp9 = vp9_sad32x32_c;
-const SadMxNVp9Func sad_16x16_c_vp9 = vp9_sad16x16_c;
-const SadMxNVp9Func sad_8x16_c_vp9 = vp9_sad8x16_c;
-const SadMxNVp9Func sad_16x8_c_vp9 = vp9_sad16x8_c;
-const SadMxNVp9Func sad_8x8_c_vp9 = vp9_sad8x8_c;
-const SadMxNVp9Func sad_8x4_c_vp9 = vp9_sad8x4_c;
-const SadMxNVp9Func sad_4x8_c_vp9 = vp9_sad4x8_c;
-const SadMxNVp9Func sad_4x4_c_vp9 = vp9_sad4x4_c;
-const SadMxNVp9Param c_vp9_tests[] = {
- make_tuple(64, 64, sad_64x64_c_vp9, -1),
- make_tuple(32, 32, sad_32x32_c_vp9, -1),
- make_tuple(16, 16, sad_16x16_c_vp9, -1),
- make_tuple(8, 16, sad_8x16_c_vp9, -1),
- make_tuple(16, 8, sad_16x8_c_vp9, -1),
- make_tuple(8, 8, sad_8x8_c_vp9, -1),
- make_tuple(8, 4, sad_8x4_c_vp9, -1),
- make_tuple(4, 8, sad_4x8_c_vp9, -1),
- make_tuple(4, 4, sad_4x4_c_vp9, -1),
-};
-INSTANTIATE_TEST_CASE_P(C, SADVP9Test, ::testing::ValuesIn(c_vp9_tests));
-
-const SadMxNx4Func sad_64x64x4d_c = vp9_sad64x64x4d_c;
-const SadMxNx4Func sad_64x32x4d_c = vp9_sad64x32x4d_c;
-const SadMxNx4Func sad_32x64x4d_c = vp9_sad32x64x4d_c;
-const SadMxNx4Func sad_32x32x4d_c = vp9_sad32x32x4d_c;
-const SadMxNx4Func sad_32x16x4d_c = vp9_sad32x16x4d_c;
-const SadMxNx4Func sad_16x32x4d_c = vp9_sad16x32x4d_c;
-const SadMxNx4Func sad_16x16x4d_c = vp9_sad16x16x4d_c;
-const SadMxNx4Func sad_16x8x4d_c = vp9_sad16x8x4d_c;
-const SadMxNx4Func sad_8x16x4d_c = vp9_sad8x16x4d_c;
-const SadMxNx4Func sad_8x8x4d_c = vp9_sad8x8x4d_c;
-const SadMxNx4Func sad_8x4x4d_c = vp9_sad8x4x4d_c;
-const SadMxNx4Func sad_4x8x4d_c = vp9_sad4x8x4d_c;
-const SadMxNx4Func sad_4x4x4d_c = vp9_sad4x4x4d_c;
-INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::Values(
- make_tuple(64, 64, sad_64x64x4d_c, -1),
- make_tuple(64, 32, sad_64x32x4d_c, -1),
- make_tuple(32, 64, sad_32x64x4d_c, -1),
- make_tuple(32, 32, sad_32x32x4d_c, -1),
- make_tuple(32, 16, sad_32x16x4d_c, -1),
- make_tuple(16, 32, sad_16x32x4d_c, -1),
- make_tuple(16, 16, sad_16x16x4d_c, -1),
- make_tuple(16, 8, sad_16x8x4d_c, -1),
- make_tuple(8, 16, sad_8x16x4d_c, -1),
- make_tuple(8, 8, sad_8x8x4d_c, -1),
- make_tuple(8, 4, sad_8x4x4d_c, -1),
- make_tuple(4, 8, sad_4x8x4d_c, -1),
- make_tuple(4, 4, sad_4x4x4d_c, -1)));
-
+const SadMxNAvgFunc sad64x64_avg_c = vpx_sad64x64_avg_c;
+const SadMxNAvgFunc sad64x32_avg_c = vpx_sad64x32_avg_c;
+const SadMxNAvgFunc sad32x64_avg_c = vpx_sad32x64_avg_c;
+const SadMxNAvgFunc sad32x32_avg_c = vpx_sad32x32_avg_c;
+const SadMxNAvgFunc sad32x16_avg_c = vpx_sad32x16_avg_c;
+const SadMxNAvgFunc sad16x32_avg_c = vpx_sad16x32_avg_c;
+const SadMxNAvgFunc sad16x16_avg_c = vpx_sad16x16_avg_c;
+const SadMxNAvgFunc sad16x8_avg_c = vpx_sad16x8_avg_c;
+const SadMxNAvgFunc sad8x16_avg_c = vpx_sad8x16_avg_c;
+const SadMxNAvgFunc sad8x8_avg_c = vpx_sad8x8_avg_c;
+const SadMxNAvgFunc sad8x4_avg_c = vpx_sad8x4_avg_c;
+const SadMxNAvgFunc sad4x8_avg_c = vpx_sad4x8_avg_c;
+const SadMxNAvgFunc sad4x4_avg_c = vpx_sad4x4_avg_c;
#if CONFIG_VP9_HIGHBITDEPTH
-const SadMxNVp9Func highbd_sad_64x64_c_vp9 = vp9_highbd_sad64x64_c;
-const SadMxNVp9Func highbd_sad_32x32_c_vp9 = vp9_highbd_sad32x32_c;
-const SadMxNVp9Func highbd_sad_16x16_c_vp9 = vp9_highbd_sad16x16_c;
-const SadMxNVp9Func highbd_sad_8x16_c_vp9 = vp9_highbd_sad8x16_c;
-const SadMxNVp9Func highbd_sad_16x8_c_vp9 = vp9_highbd_sad16x8_c;
-const SadMxNVp9Func highbd_sad_8x8_c_vp9 = vp9_highbd_sad8x8_c;
-const SadMxNVp9Func highbd_sad_8x4_c_vp9 = vp9_highbd_sad8x4_c;
-const SadMxNVp9Func highbd_sad_4x8_c_vp9 = vp9_highbd_sad4x8_c;
-const SadMxNVp9Func highbd_sad_4x4_c_vp9 = vp9_highbd_sad4x4_c;
-const SadMxNVp9Param c_vp9_highbd_8_tests[] = {
- make_tuple(64, 64, highbd_sad_64x64_c_vp9, 8),
- make_tuple(32, 32, highbd_sad_32x32_c_vp9, 8),
- make_tuple(16, 16, highbd_sad_16x16_c_vp9, 8),
- make_tuple(8, 16, highbd_sad_8x16_c_vp9, 8),
- make_tuple(16, 8, highbd_sad_16x8_c_vp9, 8),
- make_tuple(8, 8, highbd_sad_8x8_c_vp9, 8),
- make_tuple(8, 4, highbd_sad_8x4_c_vp9, 8),
- make_tuple(4, 8, highbd_sad_4x8_c_vp9, 8),
- make_tuple(4, 4, highbd_sad_4x4_c_vp9, 8),
-};
-INSTANTIATE_TEST_CASE_P(C_8, SADVP9Test,
- ::testing::ValuesIn(c_vp9_highbd_8_tests));
-
-const SadMxNVp9Param c_vp9_highbd_10_tests[] = {
- make_tuple(64, 64, highbd_sad_64x64_c_vp9, 10),
- make_tuple(32, 32, highbd_sad_32x32_c_vp9, 10),
- make_tuple(16, 16, highbd_sad_16x16_c_vp9, 10),
- make_tuple(8, 16, highbd_sad_8x16_c_vp9, 10),
- make_tuple(16, 8, highbd_sad_16x8_c_vp9, 10),
- make_tuple(8, 8, highbd_sad_8x8_c_vp9, 10),
- make_tuple(8, 4, highbd_sad_8x4_c_vp9, 10),
- make_tuple(4, 8, highbd_sad_4x8_c_vp9, 10),
- make_tuple(4, 4, highbd_sad_4x4_c_vp9, 10),
-};
-INSTANTIATE_TEST_CASE_P(C_10, SADVP9Test,
- ::testing::ValuesIn(c_vp9_highbd_10_tests));
-
-const SadMxNVp9Param c_vp9_highbd_12_tests[] = {
- make_tuple(64, 64, highbd_sad_64x64_c_vp9, 12),
- make_tuple(32, 32, highbd_sad_32x32_c_vp9, 12),
- make_tuple(16, 16, highbd_sad_16x16_c_vp9, 12),
- make_tuple(8, 16, highbd_sad_8x16_c_vp9, 12),
- make_tuple(16, 8, highbd_sad_16x8_c_vp9, 12),
- make_tuple(8, 8, highbd_sad_8x8_c_vp9, 12),
- make_tuple(8, 4, highbd_sad_8x4_c_vp9, 12),
- make_tuple(4, 8, highbd_sad_4x8_c_vp9, 12),
- make_tuple(4, 4, highbd_sad_4x4_c_vp9, 12),
-};
-INSTANTIATE_TEST_CASE_P(C_12, SADVP9Test,
- ::testing::ValuesIn(c_vp9_highbd_12_tests));
-
-const SadMxNAvgVp9Func highbd_sad8x4_avg_c_vp9 = vp9_highbd_sad8x4_avg_c;
-const SadMxNAvgVp9Func highbd_sad8x8_avg_c_vp9 = vp9_highbd_sad8x8_avg_c;
-const SadMxNAvgVp9Func highbd_sad8x16_avg_c_vp9 = vp9_highbd_sad8x16_avg_c;
-const SadMxNAvgVp9Func highbd_sad16x8_avg_c_vp9 = vp9_highbd_sad16x8_avg_c;
-const SadMxNAvgVp9Func highbd_sad16x16_avg_c_vp9 = vp9_highbd_sad16x16_avg_c;
-const SadMxNAvgVp9Func highbd_sad16x32_avg_c_vp9 = vp9_highbd_sad16x32_avg_c;
-const SadMxNAvgVp9Func highbd_sad32x16_avg_c_vp9 = vp9_highbd_sad32x16_avg_c;
-const SadMxNAvgVp9Func highbd_sad32x32_avg_c_vp9 = vp9_highbd_sad32x32_avg_c;
-const SadMxNAvgVp9Func highbd_sad32x64_avg_c_vp9 = vp9_highbd_sad32x64_avg_c;
-const SadMxNAvgVp9Func highbd_sad64x32_avg_c_vp9 = vp9_highbd_sad64x32_avg_c;
-const SadMxNAvgVp9Func highbd_sad64x64_avg_c_vp9 = vp9_highbd_sad64x64_avg_c;
-SadMxNAvgVp9Param avg_c_vp9_highbd_8_tests[] = {
- make_tuple(8, 4, highbd_sad8x4_avg_c_vp9, 8),
- make_tuple(8, 8, highbd_sad8x8_avg_c_vp9, 8),
- make_tuple(8, 16, highbd_sad8x16_avg_c_vp9, 8),
- make_tuple(16, 8, highbd_sad16x8_avg_c_vp9, 8),
- make_tuple(16, 16, highbd_sad16x16_avg_c_vp9, 8),
- make_tuple(16, 32, highbd_sad16x32_avg_c_vp9, 8),
- make_tuple(32, 16, highbd_sad32x16_avg_c_vp9, 8),
- make_tuple(32, 32, highbd_sad32x32_avg_c_vp9, 8),
- make_tuple(32, 64, highbd_sad32x64_avg_c_vp9, 8),
- make_tuple(64, 32, highbd_sad64x32_avg_c_vp9, 8),
- make_tuple(64, 64, highbd_sad64x64_avg_c_vp9, 8)};
-INSTANTIATE_TEST_CASE_P(C_8, SADavgVP9Test,
- ::testing::ValuesIn(avg_c_vp9_highbd_8_tests));
-
-SadMxNAvgVp9Param avg_c_vp9_highbd_10_tests[] = {
- make_tuple(8, 4, highbd_sad8x4_avg_c_vp9, 10),
- make_tuple(8, 8, highbd_sad8x8_avg_c_vp9, 10),
- make_tuple(8, 16, highbd_sad8x16_avg_c_vp9, 10),
- make_tuple(16, 8, highbd_sad16x8_avg_c_vp9, 10),
- make_tuple(16, 16, highbd_sad16x16_avg_c_vp9, 10),
- make_tuple(16, 32, highbd_sad16x32_avg_c_vp9, 10),
- make_tuple(32, 16, highbd_sad32x16_avg_c_vp9, 10),
- make_tuple(32, 32, highbd_sad32x32_avg_c_vp9, 10),
- make_tuple(32, 64, highbd_sad32x64_avg_c_vp9, 10),
- make_tuple(64, 32, highbd_sad64x32_avg_c_vp9, 10),
- make_tuple(64, 64, highbd_sad64x64_avg_c_vp9, 10)};
-INSTANTIATE_TEST_CASE_P(C_10, SADavgVP9Test,
- ::testing::ValuesIn(avg_c_vp9_highbd_10_tests));
-
-SadMxNAvgVp9Param avg_c_vp9_highbd_12_tests[] = {
- make_tuple(8, 4, highbd_sad8x4_avg_c_vp9, 12),
- make_tuple(8, 8, highbd_sad8x8_avg_c_vp9, 12),
- make_tuple(8, 16, highbd_sad8x16_avg_c_vp9, 12),
- make_tuple(16, 8, highbd_sad16x8_avg_c_vp9, 12),
- make_tuple(16, 16, highbd_sad16x16_avg_c_vp9, 12),
- make_tuple(16, 32, highbd_sad16x32_avg_c_vp9, 12),
- make_tuple(32, 16, highbd_sad32x16_avg_c_vp9, 12),
- make_tuple(32, 32, highbd_sad32x32_avg_c_vp9, 12),
- make_tuple(32, 64, highbd_sad32x64_avg_c_vp9, 12),
- make_tuple(64, 32, highbd_sad64x32_avg_c_vp9, 12),
- make_tuple(64, 64, highbd_sad64x64_avg_c_vp9, 12)};
-INSTANTIATE_TEST_CASE_P(C_12, SADavgVP9Test,
- ::testing::ValuesIn(avg_c_vp9_highbd_12_tests));
-
-const SadMxNx4Func highbd_sad_64x64x4d_c = vp9_highbd_sad64x64x4d_c;
-const SadMxNx4Func highbd_sad_64x32x4d_c = vp9_highbd_sad64x32x4d_c;
-const SadMxNx4Func highbd_sad_32x64x4d_c = vp9_highbd_sad32x64x4d_c;
-const SadMxNx4Func highbd_sad_32x32x4d_c = vp9_highbd_sad32x32x4d_c;
-const SadMxNx4Func highbd_sad_32x16x4d_c = vp9_highbd_sad32x16x4d_c;
-const SadMxNx4Func highbd_sad_16x32x4d_c = vp9_highbd_sad16x32x4d_c;
-const SadMxNx4Func highbd_sad_16x16x4d_c = vp9_highbd_sad16x16x4d_c;
-const SadMxNx4Func highbd_sad_16x8x4d_c = vp9_highbd_sad16x8x4d_c;
-const SadMxNx4Func highbd_sad_8x16x4d_c = vp9_highbd_sad8x16x4d_c;
-const SadMxNx4Func highbd_sad_8x8x4d_c = vp9_highbd_sad8x8x4d_c;
-const SadMxNx4Func highbd_sad_8x4x4d_c = vp9_highbd_sad8x4x4d_c;
-const SadMxNx4Func highbd_sad_4x8x4d_c = vp9_highbd_sad4x8x4d_c;
-const SadMxNx4Func highbd_sad_4x4x4d_c = vp9_highbd_sad4x4x4d_c;
-INSTANTIATE_TEST_CASE_P(C_8, SADx4Test, ::testing::Values(
- make_tuple(64, 64, highbd_sad_64x64x4d_c, 8),
- make_tuple(64, 32, highbd_sad_64x32x4d_c, 8),
- make_tuple(32, 64, highbd_sad_32x64x4d_c, 8),
- make_tuple(32, 32, highbd_sad_32x32x4d_c, 8),
- make_tuple(32, 16, highbd_sad_32x16x4d_c, 8),
- make_tuple(16, 32, highbd_sad_16x32x4d_c, 8),
- make_tuple(16, 16, highbd_sad_16x16x4d_c, 8),
- make_tuple(16, 8, highbd_sad_16x8x4d_c, 8),
- make_tuple(8, 16, highbd_sad_8x16x4d_c, 8),
- make_tuple(8, 8, highbd_sad_8x8x4d_c, 8),
- make_tuple(8, 4, highbd_sad_8x4x4d_c, 8),
- make_tuple(4, 8, highbd_sad_4x8x4d_c, 8),
- make_tuple(4, 4, highbd_sad_4x4x4d_c, 8)));
-
-INSTANTIATE_TEST_CASE_P(C_10, SADx4Test, ::testing::Values(
- make_tuple(64, 64, highbd_sad_64x64x4d_c, 10),
- make_tuple(64, 32, highbd_sad_64x32x4d_c, 10),
- make_tuple(32, 64, highbd_sad_32x64x4d_c, 10),
- make_tuple(32, 32, highbd_sad_32x32x4d_c, 10),
- make_tuple(32, 16, highbd_sad_32x16x4d_c, 10),
- make_tuple(16, 32, highbd_sad_16x32x4d_c, 10),
- make_tuple(16, 16, highbd_sad_16x16x4d_c, 10),
- make_tuple(16, 8, highbd_sad_16x8x4d_c, 10),
- make_tuple(8, 16, highbd_sad_8x16x4d_c, 10),
- make_tuple(8, 8, highbd_sad_8x8x4d_c, 10),
- make_tuple(8, 4, highbd_sad_8x4x4d_c, 10),
- make_tuple(4, 8, highbd_sad_4x8x4d_c, 10),
- make_tuple(4, 4, highbd_sad_4x4x4d_c, 10)));
-
-INSTANTIATE_TEST_CASE_P(C_12, SADx4Test, ::testing::Values(
- make_tuple(64, 64, highbd_sad_64x64x4d_c, 12),
- make_tuple(64, 32, highbd_sad_64x32x4d_c, 12),
- make_tuple(32, 64, highbd_sad_32x64x4d_c, 12),
- make_tuple(32, 32, highbd_sad_32x32x4d_c, 12),
- make_tuple(32, 16, highbd_sad_32x16x4d_c, 12),
- make_tuple(16, 32, highbd_sad_16x32x4d_c, 12),
- make_tuple(16, 16, highbd_sad_16x16x4d_c, 12),
- make_tuple(16, 8, highbd_sad_16x8x4d_c, 12),
- make_tuple(8, 16, highbd_sad_8x16x4d_c, 12),
- make_tuple(8, 8, highbd_sad_8x8x4d_c, 12),
- make_tuple(8, 4, highbd_sad_8x4x4d_c, 12),
- make_tuple(4, 8, highbd_sad_4x8x4d_c, 12),
- make_tuple(4, 4, highbd_sad_4x4x4d_c, 12)));
+const SadMxNAvgFunc highbd_sad64x64_avg_c = vpx_highbd_sad64x64_avg_c;
+const SadMxNAvgFunc highbd_sad64x32_avg_c = vpx_highbd_sad64x32_avg_c;
+const SadMxNAvgFunc highbd_sad32x64_avg_c = vpx_highbd_sad32x64_avg_c;
+const SadMxNAvgFunc highbd_sad32x32_avg_c = vpx_highbd_sad32x32_avg_c;
+const SadMxNAvgFunc highbd_sad32x16_avg_c = vpx_highbd_sad32x16_avg_c;
+const SadMxNAvgFunc highbd_sad16x32_avg_c = vpx_highbd_sad16x32_avg_c;
+const SadMxNAvgFunc highbd_sad16x16_avg_c = vpx_highbd_sad16x16_avg_c;
+const SadMxNAvgFunc highbd_sad16x8_avg_c = vpx_highbd_sad16x8_avg_c;
+const SadMxNAvgFunc highbd_sad8x16_avg_c = vpx_highbd_sad8x16_avg_c;
+const SadMxNAvgFunc highbd_sad8x8_avg_c = vpx_highbd_sad8x8_avg_c;
+const SadMxNAvgFunc highbd_sad8x4_avg_c = vpx_highbd_sad8x4_avg_c;
+const SadMxNAvgFunc highbd_sad4x8_avg_c = vpx_highbd_sad4x8_avg_c;
+const SadMxNAvgFunc highbd_sad4x4_avg_c = vpx_highbd_sad4x4_avg_c;
#endif // CONFIG_VP9_HIGHBITDEPTH
-#endif // CONFIG_VP9_ENCODER
+const SadMxNAvgParam avg_c_tests[] = {
+ make_tuple(64, 64, sad64x64_avg_c, -1),
+ make_tuple(64, 32, sad64x32_avg_c, -1),
+ make_tuple(32, 64, sad32x64_avg_c, -1),
+ make_tuple(32, 32, sad32x32_avg_c, -1),
+ make_tuple(32, 16, sad32x16_avg_c, -1),
+ make_tuple(16, 32, sad16x32_avg_c, -1),
+ make_tuple(16, 16, sad16x16_avg_c, -1),
+ make_tuple(16, 8, sad16x8_avg_c, -1),
+ make_tuple(8, 16, sad8x16_avg_c, -1),
+ make_tuple(8, 8, sad8x8_avg_c, -1),
+ make_tuple(8, 4, sad8x4_avg_c, -1),
+ make_tuple(4, 8, sad4x8_avg_c, -1),
+ make_tuple(4, 4, sad4x4_avg_c, -1),
+#if CONFIG_VP9_HIGHBITDEPTH
+ make_tuple(64, 64, highbd_sad64x64_avg_c, 8),
+ make_tuple(64, 32, highbd_sad64x32_avg_c, 8),
+ make_tuple(32, 64, highbd_sad32x64_avg_c, 8),
+ make_tuple(32, 32, highbd_sad32x32_avg_c, 8),
+ make_tuple(32, 16, highbd_sad32x16_avg_c, 8),
+ make_tuple(16, 32, highbd_sad16x32_avg_c, 8),
+ make_tuple(16, 16, highbd_sad16x16_avg_c, 8),
+ make_tuple(16, 8, highbd_sad16x8_avg_c, 8),
+ make_tuple(8, 16, highbd_sad8x16_avg_c, 8),
+ make_tuple(8, 8, highbd_sad8x8_avg_c, 8),
+ make_tuple(8, 4, highbd_sad8x4_avg_c, 8),
+ make_tuple(4, 8, highbd_sad4x8_avg_c, 8),
+ make_tuple(4, 4, highbd_sad4x4_avg_c, 8),
+ make_tuple(64, 64, highbd_sad64x64_avg_c, 10),
+ make_tuple(64, 32, highbd_sad64x32_avg_c, 10),
+ make_tuple(32, 64, highbd_sad32x64_avg_c, 10),
+ make_tuple(32, 32, highbd_sad32x32_avg_c, 10),
+ make_tuple(32, 16, highbd_sad32x16_avg_c, 10),
+ make_tuple(16, 32, highbd_sad16x32_avg_c, 10),
+ make_tuple(16, 16, highbd_sad16x16_avg_c, 10),
+ make_tuple(16, 8, highbd_sad16x8_avg_c, 10),
+ make_tuple(8, 16, highbd_sad8x16_avg_c, 10),
+ make_tuple(8, 8, highbd_sad8x8_avg_c, 10),
+ make_tuple(8, 4, highbd_sad8x4_avg_c, 10),
+ make_tuple(4, 8, highbd_sad4x8_avg_c, 10),
+ make_tuple(4, 4, highbd_sad4x4_avg_c, 10),
+ make_tuple(64, 64, highbd_sad64x64_avg_c, 12),
+ make_tuple(64, 32, highbd_sad64x32_avg_c, 12),
+ make_tuple(32, 64, highbd_sad32x64_avg_c, 12),
+ make_tuple(32, 32, highbd_sad32x32_avg_c, 12),
+ make_tuple(32, 16, highbd_sad32x16_avg_c, 12),
+ make_tuple(16, 32, highbd_sad16x32_avg_c, 12),
+ make_tuple(16, 16, highbd_sad16x16_avg_c, 12),
+ make_tuple(16, 8, highbd_sad16x8_avg_c, 12),
+ make_tuple(8, 16, highbd_sad8x16_avg_c, 12),
+ make_tuple(8, 8, highbd_sad8x8_avg_c, 12),
+ make_tuple(8, 4, highbd_sad8x4_avg_c, 12),
+ make_tuple(4, 8, highbd_sad4x8_avg_c, 12),
+ make_tuple(4, 4, highbd_sad4x4_avg_c, 12),
+#endif // CONFIG_VP9_HIGHBITDEPTH
+};
+INSTANTIATE_TEST_CASE_P(C, SADavgTest, ::testing::ValuesIn(avg_c_tests));
+
+const SadMxNx4Func sad64x64x4d_c = vpx_sad64x64x4d_c;
+const SadMxNx4Func sad64x32x4d_c = vpx_sad64x32x4d_c;
+const SadMxNx4Func sad32x64x4d_c = vpx_sad32x64x4d_c;
+const SadMxNx4Func sad32x32x4d_c = vpx_sad32x32x4d_c;
+const SadMxNx4Func sad32x16x4d_c = vpx_sad32x16x4d_c;
+const SadMxNx4Func sad16x32x4d_c = vpx_sad16x32x4d_c;
+const SadMxNx4Func sad16x16x4d_c = vpx_sad16x16x4d_c;
+const SadMxNx4Func sad16x8x4d_c = vpx_sad16x8x4d_c;
+const SadMxNx4Func sad8x16x4d_c = vpx_sad8x16x4d_c;
+const SadMxNx4Func sad8x8x4d_c = vpx_sad8x8x4d_c;
+const SadMxNx4Func sad8x4x4d_c = vpx_sad8x4x4d_c;
+const SadMxNx4Func sad4x8x4d_c = vpx_sad4x8x4d_c;
+const SadMxNx4Func sad4x4x4d_c = vpx_sad4x4x4d_c;
+#if CONFIG_VP9_HIGHBITDEPTH
+const SadMxNx4Func highbd_sad64x64x4d_c = vpx_highbd_sad64x64x4d_c;
+const SadMxNx4Func highbd_sad64x32x4d_c = vpx_highbd_sad64x32x4d_c;
+const SadMxNx4Func highbd_sad32x64x4d_c = vpx_highbd_sad32x64x4d_c;
+const SadMxNx4Func highbd_sad32x32x4d_c = vpx_highbd_sad32x32x4d_c;
+const SadMxNx4Func highbd_sad32x16x4d_c = vpx_highbd_sad32x16x4d_c;
+const SadMxNx4Func highbd_sad16x32x4d_c = vpx_highbd_sad16x32x4d_c;
+const SadMxNx4Func highbd_sad16x16x4d_c = vpx_highbd_sad16x16x4d_c;
+const SadMxNx4Func highbd_sad16x8x4d_c = vpx_highbd_sad16x8x4d_c;
+const SadMxNx4Func highbd_sad8x16x4d_c = vpx_highbd_sad8x16x4d_c;
+const SadMxNx4Func highbd_sad8x8x4d_c = vpx_highbd_sad8x8x4d_c;
+const SadMxNx4Func highbd_sad8x4x4d_c = vpx_highbd_sad8x4x4d_c;
+const SadMxNx4Func highbd_sad4x8x4d_c = vpx_highbd_sad4x8x4d_c;
+const SadMxNx4Func highbd_sad4x4x4d_c = vpx_highbd_sad4x4x4d_c;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+const SadMxNx4Param x4d_c_tests[] = {
+ make_tuple(64, 64, sad64x64x4d_c, -1),
+ make_tuple(64, 32, sad64x32x4d_c, -1),
+ make_tuple(32, 64, sad32x64x4d_c, -1),
+ make_tuple(32, 32, sad32x32x4d_c, -1),
+ make_tuple(32, 16, sad32x16x4d_c, -1),
+ make_tuple(16, 32, sad16x32x4d_c, -1),
+ make_tuple(16, 16, sad16x16x4d_c, -1),
+ make_tuple(16, 8, sad16x8x4d_c, -1),
+ make_tuple(8, 16, sad8x16x4d_c, -1),
+ make_tuple(8, 8, sad8x8x4d_c, -1),
+ make_tuple(8, 4, sad8x4x4d_c, -1),
+ make_tuple(4, 8, sad4x8x4d_c, -1),
+ make_tuple(4, 4, sad4x4x4d_c, -1),
+#if CONFIG_VP9_HIGHBITDEPTH
+ make_tuple(64, 64, highbd_sad64x64x4d_c, 8),
+ make_tuple(64, 32, highbd_sad64x32x4d_c, 8),
+ make_tuple(32, 64, highbd_sad32x64x4d_c, 8),
+ make_tuple(32, 32, highbd_sad32x32x4d_c, 8),
+ make_tuple(32, 16, highbd_sad32x16x4d_c, 8),
+ make_tuple(16, 32, highbd_sad16x32x4d_c, 8),
+ make_tuple(16, 16, highbd_sad16x16x4d_c, 8),
+ make_tuple(16, 8, highbd_sad16x8x4d_c, 8),
+ make_tuple(8, 16, highbd_sad8x16x4d_c, 8),
+ make_tuple(8, 8, highbd_sad8x8x4d_c, 8),
+ make_tuple(8, 4, highbd_sad8x4x4d_c, 8),
+ make_tuple(4, 8, highbd_sad4x8x4d_c, 8),
+ make_tuple(4, 4, highbd_sad4x4x4d_c, 8),
+ make_tuple(64, 64, highbd_sad64x64x4d_c, 10),
+ make_tuple(64, 32, highbd_sad64x32x4d_c, 10),
+ make_tuple(32, 64, highbd_sad32x64x4d_c, 10),
+ make_tuple(32, 32, highbd_sad32x32x4d_c, 10),
+ make_tuple(32, 16, highbd_sad32x16x4d_c, 10),
+ make_tuple(16, 32, highbd_sad16x32x4d_c, 10),
+ make_tuple(16, 16, highbd_sad16x16x4d_c, 10),
+ make_tuple(16, 8, highbd_sad16x8x4d_c, 10),
+ make_tuple(8, 16, highbd_sad8x16x4d_c, 10),
+ make_tuple(8, 8, highbd_sad8x8x4d_c, 10),
+ make_tuple(8, 4, highbd_sad8x4x4d_c, 10),
+ make_tuple(4, 8, highbd_sad4x8x4d_c, 10),
+ make_tuple(4, 4, highbd_sad4x4x4d_c, 10),
+ make_tuple(64, 64, highbd_sad64x64x4d_c, 12),
+ make_tuple(64, 32, highbd_sad64x32x4d_c, 12),
+ make_tuple(32, 64, highbd_sad32x64x4d_c, 12),
+ make_tuple(32, 32, highbd_sad32x32x4d_c, 12),
+ make_tuple(32, 16, highbd_sad32x16x4d_c, 12),
+ make_tuple(16, 32, highbd_sad16x32x4d_c, 12),
+ make_tuple(16, 16, highbd_sad16x16x4d_c, 12),
+ make_tuple(16, 8, highbd_sad16x8x4d_c, 12),
+ make_tuple(8, 16, highbd_sad8x16x4d_c, 12),
+ make_tuple(8, 8, highbd_sad8x8x4d_c, 12),
+ make_tuple(8, 4, highbd_sad8x4x4d_c, 12),
+ make_tuple(4, 8, highbd_sad4x8x4d_c, 12),
+ make_tuple(4, 4, highbd_sad4x4x4d_c, 12),
+#endif // CONFIG_VP9_HIGHBITDEPTH
+};
+INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::ValuesIn(x4d_c_tests));
//------------------------------------------------------------------------------
// ARM functions
#if HAVE_MEDIA
-#if CONFIG_VP8_ENCODER
-const SadMxNFunc sad_16x16_armv6 = vp8_sad16x16_armv6;
-INSTANTIATE_TEST_CASE_P(MEDIA, SADTest, ::testing::Values(
- make_tuple(16, 16, sad_16x16_armv6, -1)));
-#endif // CONFIG_VP8_ENCODER
+const SadMxNFunc sad16x16_media = vpx_sad16x16_media;
+const SadMxNParam media_tests[] = {
+ make_tuple(16, 16, sad16x16_media, -1),
+};
+INSTANTIATE_TEST_CASE_P(MEDIA, SADTest, ::testing::ValuesIn(media_tests));
#endif // HAVE_MEDIA
#if HAVE_NEON
-#if CONFIG_VP8_ENCODER
-const SadMxNFunc sad_16x16_neon = vp8_sad16x16_neon;
-const SadMxNFunc sad_8x16_neon = vp8_sad8x16_neon;
-const SadMxNFunc sad_16x8_neon = vp8_sad16x8_neon;
-const SadMxNFunc sad_8x8_neon = vp8_sad8x8_neon;
-const SadMxNFunc sad_4x4_neon = vp8_sad4x4_neon;
-INSTANTIATE_TEST_CASE_P(NEON, SADTest, ::testing::Values(
- make_tuple(16, 16, sad_16x16_neon, -1),
- make_tuple(8, 16, sad_8x16_neon, -1),
- make_tuple(16, 8, sad_16x8_neon, -1),
- make_tuple(8, 8, sad_8x8_neon, -1),
- make_tuple(4, 4, sad_4x4_neon, -1)));
-#endif // CONFIG_VP8_ENCODER
-#if CONFIG_VP9_ENCODER
-const SadMxNVp9Func sad_64x64_neon_vp9 = vp9_sad64x64_neon;
-const SadMxNVp9Func sad_32x32_neon_vp9 = vp9_sad32x32_neon;
-const SadMxNVp9Func sad_16x16_neon_vp9 = vp9_sad16x16_neon;
-const SadMxNVp9Func sad_8x8_neon_vp9 = vp9_sad8x8_neon;
-const SadMxNVp9Param neon_vp9_tests[] = {
- make_tuple(64, 64, sad_64x64_neon_vp9, -1),
- make_tuple(32, 32, sad_32x32_neon_vp9, -1),
- make_tuple(16, 16, sad_16x16_neon_vp9, -1),
- make_tuple(8, 8, sad_8x8_neon_vp9, -1),
+const SadMxNFunc sad64x64_neon = vpx_sad64x64_neon;
+const SadMxNFunc sad32x32_neon = vpx_sad32x32_neon;
+const SadMxNFunc sad16x16_neon = vpx_sad16x16_neon;
+const SadMxNFunc sad16x8_neon = vpx_sad16x8_neon;
+const SadMxNFunc sad8x16_neon = vpx_sad8x16_neon;
+const SadMxNFunc sad8x8_neon = vpx_sad8x8_neon;
+const SadMxNFunc sad4x4_neon = vpx_sad4x4_neon;
+
+const SadMxNParam neon_tests[] = {
+ make_tuple(64, 64, sad64x64_neon, -1),
+ make_tuple(32, 32, sad32x32_neon, -1),
+ make_tuple(16, 16, sad16x16_neon, -1),
+ make_tuple(16, 8, sad16x8_neon, -1),
+ make_tuple(8, 16, sad8x16_neon, -1),
+ make_tuple(8, 8, sad8x8_neon, -1),
+ make_tuple(4, 4, sad4x4_neon, -1),
};
-INSTANTIATE_TEST_CASE_P(NEON, SADVP9Test, ::testing::ValuesIn(neon_vp9_tests));
-#endif // CONFIG_VP9_ENCODER
+INSTANTIATE_TEST_CASE_P(NEON, SADTest, ::testing::ValuesIn(neon_tests));
+
+const SadMxNx4Func sad64x64x4d_neon = vpx_sad64x64x4d_neon;
+const SadMxNx4Func sad32x32x4d_neon = vpx_sad32x32x4d_neon;
+const SadMxNx4Func sad16x16x4d_neon = vpx_sad16x16x4d_neon;
+const SadMxNx4Param x4d_neon_tests[] = {
+ make_tuple(64, 64, sad64x64x4d_neon, -1),
+ make_tuple(32, 32, sad32x32x4d_neon, -1),
+ make_tuple(16, 16, sad16x16x4d_neon, -1),
+};
+INSTANTIATE_TEST_CASE_P(NEON, SADx4Test, ::testing::ValuesIn(x4d_neon_tests));
#endif // HAVE_NEON
//------------------------------------------------------------------------------
// x86 functions
#if HAVE_MMX
-#if CONFIG_VP8_ENCODER
-const SadMxNFunc sad_16x16_mmx = vp8_sad16x16_mmx;
-const SadMxNFunc sad_8x16_mmx = vp8_sad8x16_mmx;
-const SadMxNFunc sad_16x8_mmx = vp8_sad16x8_mmx;
-const SadMxNFunc sad_8x8_mmx = vp8_sad8x8_mmx;
-const SadMxNFunc sad_4x4_mmx = vp8_sad4x4_mmx;
+const SadMxNFunc sad16x16_mmx = vpx_sad16x16_mmx;
+const SadMxNFunc sad16x8_mmx = vpx_sad16x8_mmx;
+const SadMxNFunc sad8x16_mmx = vpx_sad8x16_mmx;
+const SadMxNFunc sad8x8_mmx = vpx_sad8x8_mmx;
+const SadMxNFunc sad4x4_mmx = vpx_sad4x4_mmx;
const SadMxNParam mmx_tests[] = {
- make_tuple(16, 16, sad_16x16_mmx, -1),
- make_tuple(8, 16, sad_8x16_mmx, -1),
- make_tuple(16, 8, sad_16x8_mmx, -1),
- make_tuple(8, 8, sad_8x8_mmx, -1),
- make_tuple(4, 4, sad_4x4_mmx, -1),
+ make_tuple(16, 16, sad16x16_mmx, -1),
+ make_tuple(16, 8, sad16x8_mmx, -1),
+ make_tuple(8, 16, sad8x16_mmx, -1),
+ make_tuple(8, 8, sad8x8_mmx, -1),
+ make_tuple(4, 4, sad4x4_mmx, -1),
};
INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::ValuesIn(mmx_tests));
-#endif // CONFIG_VP8_ENCODER
-
#endif // HAVE_MMX
#if HAVE_SSE
-#if CONFIG_VP9_ENCODER
#if CONFIG_USE_X86INC
-const SadMxNVp9Func sad_4x4_sse_vp9 = vp9_sad4x4_sse;
-const SadMxNVp9Func sad_4x8_sse_vp9 = vp9_sad4x8_sse;
-INSTANTIATE_TEST_CASE_P(SSE, SADVP9Test, ::testing::Values(
- make_tuple(4, 4, sad_4x4_sse_vp9, -1),
- make_tuple(4, 8, sad_4x8_sse_vp9, -1)));
+const SadMxNFunc sad4x8_sse = vpx_sad4x8_sse;
+const SadMxNFunc sad4x4_sse = vpx_sad4x4_sse;
+const SadMxNParam sse_tests[] = {
+ make_tuple(4, 8, sad4x8_sse, -1),
+ make_tuple(4, 4, sad4x4_sse, -1),
+};
+INSTANTIATE_TEST_CASE_P(SSE, SADTest, ::testing::ValuesIn(sse_tests));
-const SadMxNx4Func sad_4x8x4d_sse = vp9_sad4x8x4d_sse;
-const SadMxNx4Func sad_4x4x4d_sse = vp9_sad4x4x4d_sse;
-INSTANTIATE_TEST_CASE_P(SSE, SADx4Test, ::testing::Values(
- make_tuple(4, 8, sad_4x8x4d_sse, -1),
- make_tuple(4, 4, sad_4x4x4d_sse, -1)));
+const SadMxNAvgFunc sad4x8_avg_sse = vpx_sad4x8_avg_sse;
+const SadMxNAvgFunc sad4x4_avg_sse = vpx_sad4x4_avg_sse;
+const SadMxNAvgParam avg_sse_tests[] = {
+ make_tuple(4, 8, sad4x8_avg_sse, -1),
+ make_tuple(4, 4, sad4x4_avg_sse, -1),
+};
+INSTANTIATE_TEST_CASE_P(SSE, SADavgTest, ::testing::ValuesIn(avg_sse_tests));
+
+const SadMxNx4Func sad4x8x4d_sse = vpx_sad4x8x4d_sse;
+const SadMxNx4Func sad4x4x4d_sse = vpx_sad4x4x4d_sse;
+const SadMxNx4Param x4d_sse_tests[] = {
+ make_tuple(4, 8, sad4x8x4d_sse, -1),
+ make_tuple(4, 4, sad4x4x4d_sse, -1),
+};
+INSTANTIATE_TEST_CASE_P(SSE, SADx4Test, ::testing::ValuesIn(x4d_sse_tests));
#endif // CONFIG_USE_X86INC
-#endif // CONFIG_VP9_ENCODER
#endif // HAVE_SSE
#if HAVE_SSE2
-#if CONFIG_VP8_ENCODER
-const SadMxNFunc sad_16x16_wmt = vp8_sad16x16_wmt;
-const SadMxNFunc sad_8x16_wmt = vp8_sad8x16_wmt;
-const SadMxNFunc sad_16x8_wmt = vp8_sad16x8_wmt;
-const SadMxNFunc sad_8x8_wmt = vp8_sad8x8_wmt;
-const SadMxNFunc sad_4x4_wmt = vp8_sad4x4_wmt;
+#if CONFIG_USE_X86INC
+const SadMxNFunc sad64x64_sse2 = vpx_sad64x64_sse2;
+const SadMxNFunc sad64x32_sse2 = vpx_sad64x32_sse2;
+const SadMxNFunc sad32x64_sse2 = vpx_sad32x64_sse2;
+const SadMxNFunc sad32x32_sse2 = vpx_sad32x32_sse2;
+const SadMxNFunc sad32x16_sse2 = vpx_sad32x16_sse2;
+const SadMxNFunc sad16x32_sse2 = vpx_sad16x32_sse2;
+const SadMxNFunc sad16x16_sse2 = vpx_sad16x16_sse2;
+const SadMxNFunc sad16x8_sse2 = vpx_sad16x8_sse2;
+const SadMxNFunc sad8x16_sse2 = vpx_sad8x16_sse2;
+const SadMxNFunc sad8x8_sse2 = vpx_sad8x8_sse2;
+const SadMxNFunc sad8x4_sse2 = vpx_sad8x4_sse2;
+#if CONFIG_VP9_HIGHBITDEPTH
+const SadMxNFunc highbd_sad64x64_sse2 = vpx_highbd_sad64x64_sse2;
+const SadMxNFunc highbd_sad64x32_sse2 = vpx_highbd_sad64x32_sse2;
+const SadMxNFunc highbd_sad32x64_sse2 = vpx_highbd_sad32x64_sse2;
+const SadMxNFunc highbd_sad32x32_sse2 = vpx_highbd_sad32x32_sse2;
+const SadMxNFunc highbd_sad32x16_sse2 = vpx_highbd_sad32x16_sse2;
+const SadMxNFunc highbd_sad16x32_sse2 = vpx_highbd_sad16x32_sse2;
+const SadMxNFunc highbd_sad16x16_sse2 = vpx_highbd_sad16x16_sse2;
+const SadMxNFunc highbd_sad16x8_sse2 = vpx_highbd_sad16x8_sse2;
+const SadMxNFunc highbd_sad8x16_sse2 = vpx_highbd_sad8x16_sse2;
+const SadMxNFunc highbd_sad8x8_sse2 = vpx_highbd_sad8x8_sse2;
+const SadMxNFunc highbd_sad8x4_sse2 = vpx_highbd_sad8x4_sse2;
+#endif // CONFIG_VP9_HIGHBITDEPTH
const SadMxNParam sse2_tests[] = {
- make_tuple(16, 16, sad_16x16_wmt, -1),
- make_tuple(8, 16, sad_8x16_wmt, -1),
- make_tuple(16, 8, sad_16x8_wmt, -1),
- make_tuple(8, 8, sad_8x8_wmt, -1),
- make_tuple(4, 4, sad_4x4_wmt, -1),
+ make_tuple(64, 64, sad64x64_sse2, -1),
+ make_tuple(64, 32, sad64x32_sse2, -1),
+ make_tuple(32, 64, sad32x64_sse2, -1),
+ make_tuple(32, 32, sad32x32_sse2, -1),
+ make_tuple(32, 16, sad32x16_sse2, -1),
+ make_tuple(16, 32, sad16x32_sse2, -1),
+ make_tuple(16, 16, sad16x16_sse2, -1),
+ make_tuple(16, 8, sad16x8_sse2, -1),
+ make_tuple(8, 16, sad8x16_sse2, -1),
+ make_tuple(8, 8, sad8x8_sse2, -1),
+ make_tuple(8, 4, sad8x4_sse2, -1),
+#if CONFIG_VP9_HIGHBITDEPTH
+ make_tuple(64, 64, highbd_sad64x64_sse2, 8),
+ make_tuple(64, 32, highbd_sad64x32_sse2, 8),
+ make_tuple(32, 64, highbd_sad32x64_sse2, 8),
+ make_tuple(32, 32, highbd_sad32x32_sse2, 8),
+ make_tuple(32, 16, highbd_sad32x16_sse2, 8),
+ make_tuple(16, 32, highbd_sad16x32_sse2, 8),
+ make_tuple(16, 16, highbd_sad16x16_sse2, 8),
+ make_tuple(16, 8, highbd_sad16x8_sse2, 8),
+ make_tuple(8, 16, highbd_sad8x16_sse2, 8),
+ make_tuple(8, 8, highbd_sad8x8_sse2, 8),
+ make_tuple(8, 4, highbd_sad8x4_sse2, 8),
+ make_tuple(64, 64, highbd_sad64x64_sse2, 10),
+ make_tuple(64, 32, highbd_sad64x32_sse2, 10),
+ make_tuple(32, 64, highbd_sad32x64_sse2, 10),
+ make_tuple(32, 32, highbd_sad32x32_sse2, 10),
+ make_tuple(32, 16, highbd_sad32x16_sse2, 10),
+ make_tuple(16, 32, highbd_sad16x32_sse2, 10),
+ make_tuple(16, 16, highbd_sad16x16_sse2, 10),
+ make_tuple(16, 8, highbd_sad16x8_sse2, 10),
+ make_tuple(8, 16, highbd_sad8x16_sse2, 10),
+ make_tuple(8, 8, highbd_sad8x8_sse2, 10),
+ make_tuple(8, 4, highbd_sad8x4_sse2, 10),
+ make_tuple(64, 64, highbd_sad64x64_sse2, 12),
+ make_tuple(64, 32, highbd_sad64x32_sse2, 12),
+ make_tuple(32, 64, highbd_sad32x64_sse2, 12),
+ make_tuple(32, 32, highbd_sad32x32_sse2, 12),
+ make_tuple(32, 16, highbd_sad32x16_sse2, 12),
+ make_tuple(16, 32, highbd_sad16x32_sse2, 12),
+ make_tuple(16, 16, highbd_sad16x16_sse2, 12),
+ make_tuple(16, 8, highbd_sad16x8_sse2, 12),
+ make_tuple(8, 16, highbd_sad8x16_sse2, 12),
+ make_tuple(8, 8, highbd_sad8x8_sse2, 12),
+ make_tuple(8, 4, highbd_sad8x4_sse2, 12),
+#endif // CONFIG_VP9_HIGHBITDEPTH
};
INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests));
-#endif // CONFIG_VP8_ENCODER
-#if CONFIG_VP9_ENCODER
-#if CONFIG_USE_X86INC
-const SadMxNVp9Func sad_64x64_sse2_vp9 = vp9_sad64x64_sse2;
-const SadMxNVp9Func sad_64x32_sse2_vp9 = vp9_sad64x32_sse2;
-const SadMxNVp9Func sad_32x64_sse2_vp9 = vp9_sad32x64_sse2;
-const SadMxNVp9Func sad_32x32_sse2_vp9 = vp9_sad32x32_sse2;
-const SadMxNVp9Func sad_32x16_sse2_vp9 = vp9_sad32x16_sse2;
-const SadMxNVp9Func sad_16x32_sse2_vp9 = vp9_sad16x32_sse2;
-const SadMxNVp9Func sad_16x16_sse2_vp9 = vp9_sad16x16_sse2;
-const SadMxNVp9Func sad_16x8_sse2_vp9 = vp9_sad16x8_sse2;
-const SadMxNVp9Func sad_8x16_sse2_vp9 = vp9_sad8x16_sse2;
-const SadMxNVp9Func sad_8x8_sse2_vp9 = vp9_sad8x8_sse2;
-const SadMxNVp9Func sad_8x4_sse2_vp9 = vp9_sad8x4_sse2;
-
-const SadMxNx4Func sad_64x64x4d_sse2 = vp9_sad64x64x4d_sse2;
-const SadMxNx4Func sad_64x32x4d_sse2 = vp9_sad64x32x4d_sse2;
-const SadMxNx4Func sad_32x64x4d_sse2 = vp9_sad32x64x4d_sse2;
-const SadMxNx4Func sad_32x32x4d_sse2 = vp9_sad32x32x4d_sse2;
-const SadMxNx4Func sad_32x16x4d_sse2 = vp9_sad32x16x4d_sse2;
-const SadMxNx4Func sad_16x32x4d_sse2 = vp9_sad16x32x4d_sse2;
-const SadMxNx4Func sad_16x16x4d_sse2 = vp9_sad16x16x4d_sse2;
-const SadMxNx4Func sad_16x8x4d_sse2 = vp9_sad16x8x4d_sse2;
-const SadMxNx4Func sad_8x16x4d_sse2 = vp9_sad8x16x4d_sse2;
-const SadMxNx4Func sad_8x8x4d_sse2 = vp9_sad8x8x4d_sse2;
-const SadMxNx4Func sad_8x4x4d_sse2 = vp9_sad8x4x4d_sse2;
-
+const SadMxNAvgFunc sad64x64_avg_sse2 = vpx_sad64x64_avg_sse2;
+const SadMxNAvgFunc sad64x32_avg_sse2 = vpx_sad64x32_avg_sse2;
+const SadMxNAvgFunc sad32x64_avg_sse2 = vpx_sad32x64_avg_sse2;
+const SadMxNAvgFunc sad32x32_avg_sse2 = vpx_sad32x32_avg_sse2;
+const SadMxNAvgFunc sad32x16_avg_sse2 = vpx_sad32x16_avg_sse2;
+const SadMxNAvgFunc sad16x32_avg_sse2 = vpx_sad16x32_avg_sse2;
+const SadMxNAvgFunc sad16x16_avg_sse2 = vpx_sad16x16_avg_sse2;
+const SadMxNAvgFunc sad16x8_avg_sse2 = vpx_sad16x8_avg_sse2;
+const SadMxNAvgFunc sad8x16_avg_sse2 = vpx_sad8x16_avg_sse2;
+const SadMxNAvgFunc sad8x8_avg_sse2 = vpx_sad8x8_avg_sse2;
+const SadMxNAvgFunc sad8x4_avg_sse2 = vpx_sad8x4_avg_sse2;
#if CONFIG_VP9_HIGHBITDEPTH
-const SadMxNVp9Func highbd_sad8x4_sse2_vp9 = vp9_highbd_sad8x4_sse2;
-const SadMxNVp9Func highbd_sad8x8_sse2_vp9 = vp9_highbd_sad8x8_sse2;
-const SadMxNVp9Func highbd_sad8x16_sse2_vp9 = vp9_highbd_sad8x16_sse2;
-const SadMxNVp9Func highbd_sad16x8_sse2_vp9 = vp9_highbd_sad16x8_sse2;
-const SadMxNVp9Func highbd_sad16x16_sse2_vp9 = vp9_highbd_sad16x16_sse2;
-const SadMxNVp9Func highbd_sad16x32_sse2_vp9 = vp9_highbd_sad16x32_sse2;
-const SadMxNVp9Func highbd_sad32x16_sse2_vp9 = vp9_highbd_sad32x16_sse2;
-const SadMxNVp9Func highbd_sad32x32_sse2_vp9 = vp9_highbd_sad32x32_sse2;
-const SadMxNVp9Func highbd_sad32x64_sse2_vp9 = vp9_highbd_sad32x64_sse2;
-const SadMxNVp9Func highbd_sad64x32_sse2_vp9 = vp9_highbd_sad64x32_sse2;
-const SadMxNVp9Func highbd_sad64x64_sse2_vp9 = vp9_highbd_sad64x64_sse2;
-
-INSTANTIATE_TEST_CASE_P(SSE2, SADVP9Test, ::testing::Values(
- make_tuple(64, 64, sad_64x64_sse2_vp9, -1),
- make_tuple(64, 32, sad_64x32_sse2_vp9, -1),
- make_tuple(32, 64, sad_32x64_sse2_vp9, -1),
- make_tuple(32, 32, sad_32x32_sse2_vp9, -1),
- make_tuple(32, 16, sad_32x16_sse2_vp9, -1),
- make_tuple(16, 32, sad_16x32_sse2_vp9, -1),
- make_tuple(16, 16, sad_16x16_sse2_vp9, -1),
- make_tuple(16, 8, sad_16x8_sse2_vp9, -1),
- make_tuple(8, 16, sad_8x16_sse2_vp9, -1),
- make_tuple(8, 8, sad_8x8_sse2_vp9, -1),
- make_tuple(8, 4, sad_8x4_sse2_vp9, -1),
- make_tuple(8, 4, highbd_sad8x4_sse2_vp9, 8),
- make_tuple(8, 8, highbd_sad8x8_sse2_vp9, 8),
- make_tuple(8, 16, highbd_sad8x16_sse2_vp9, 8),
- make_tuple(16, 8, highbd_sad16x8_sse2_vp9, 8),
- make_tuple(16, 16, highbd_sad16x16_sse2_vp9, 8),
- make_tuple(16, 32, highbd_sad16x32_sse2_vp9, 8),
- make_tuple(32, 16, highbd_sad32x16_sse2_vp9, 8),
- make_tuple(32, 32, highbd_sad32x32_sse2_vp9, 8),
- make_tuple(32, 64, highbd_sad32x64_sse2_vp9, 8),
- make_tuple(64, 32, highbd_sad64x32_sse2_vp9, 8),
- make_tuple(64, 64, highbd_sad64x64_sse2_vp9, 8),
- make_tuple(8, 4, highbd_sad8x4_sse2_vp9, 10),
- make_tuple(8, 8, highbd_sad8x8_sse2_vp9, 10),
- make_tuple(8, 16, highbd_sad8x16_sse2_vp9, 10),
- make_tuple(16, 8, highbd_sad16x8_sse2_vp9, 10),
- make_tuple(16, 16, highbd_sad16x16_sse2_vp9, 10),
- make_tuple(16, 32, highbd_sad16x32_sse2_vp9, 10),
- make_tuple(32, 16, highbd_sad32x16_sse2_vp9, 10),
- make_tuple(32, 32, highbd_sad32x32_sse2_vp9, 10),
- make_tuple(32, 64, highbd_sad32x64_sse2_vp9, 10),
- make_tuple(64, 32, highbd_sad64x32_sse2_vp9, 10),
- make_tuple(64, 64, highbd_sad64x64_sse2_vp9, 10),
- make_tuple(8, 4, highbd_sad8x4_sse2_vp9, 12),
- make_tuple(8, 8, highbd_sad8x8_sse2_vp9, 12),
- make_tuple(8, 16, highbd_sad8x16_sse2_vp9, 12),
- make_tuple(16, 8, highbd_sad16x8_sse2_vp9, 12),
- make_tuple(16, 16, highbd_sad16x16_sse2_vp9, 12),
- make_tuple(16, 32, highbd_sad16x32_sse2_vp9, 12),
- make_tuple(32, 16, highbd_sad32x16_sse2_vp9, 12),
- make_tuple(32, 32, highbd_sad32x32_sse2_vp9, 12),
- make_tuple(32, 64, highbd_sad32x64_sse2_vp9, 12),
- make_tuple(64, 32, highbd_sad64x32_sse2_vp9, 12),
- make_tuple(64, 64, highbd_sad64x64_sse2_vp9, 12)));
-
-const SadMxNAvgVp9Func highbd_sad8x4_avg_sse2_vp9 = vp9_highbd_sad8x4_avg_sse2;
-const SadMxNAvgVp9Func highbd_sad8x8_avg_sse2_vp9 = vp9_highbd_sad8x8_avg_sse2;
-const SadMxNAvgVp9Func highbd_sad8x16_avg_sse2_vp9 =
- vp9_highbd_sad8x16_avg_sse2;
-const SadMxNAvgVp9Func highbd_sad16x8_avg_sse2_vp9 =
- vp9_highbd_sad16x8_avg_sse2;
-const SadMxNAvgVp9Func highbd_sad16x16_avg_sse2_vp9 =
- vp9_highbd_sad16x16_avg_sse2;
-const SadMxNAvgVp9Func highbd_sad16x32_avg_sse2_vp9 =
- vp9_highbd_sad16x32_avg_sse2;
-const SadMxNAvgVp9Func highbd_sad32x16_avg_sse2_vp9 =
- vp9_highbd_sad32x16_avg_sse2;
-const SadMxNAvgVp9Func highbd_sad32x32_avg_sse2_vp9 =
- vp9_highbd_sad32x32_avg_sse2;
-const SadMxNAvgVp9Func highbd_sad32x64_avg_sse2_vp9 =
- vp9_highbd_sad32x64_avg_sse2;
-const SadMxNAvgVp9Func highbd_sad64x32_avg_sse2_vp9 =
- vp9_highbd_sad64x32_avg_sse2;
-const SadMxNAvgVp9Func highbd_sad64x64_avg_sse2_vp9 =
- vp9_highbd_sad64x64_avg_sse2;
-
-INSTANTIATE_TEST_CASE_P(SSE2, SADavgVP9Test, ::testing::Values(
- make_tuple(8, 4, highbd_sad8x4_avg_sse2_vp9, 8),
- make_tuple(8, 8, highbd_sad8x8_avg_sse2_vp9, 8),
- make_tuple(8, 16, highbd_sad8x16_avg_sse2_vp9, 8),
- make_tuple(16, 8, highbd_sad16x8_avg_sse2_vp9, 8),
- make_tuple(16, 16, highbd_sad16x16_avg_sse2_vp9, 8),
- make_tuple(16, 32, highbd_sad16x32_avg_sse2_vp9, 8),
- make_tuple(32, 16, highbd_sad32x16_avg_sse2_vp9, 8),
- make_tuple(32, 32, highbd_sad32x32_avg_sse2_vp9, 8),
- make_tuple(32, 64, highbd_sad32x64_avg_sse2_vp9, 8),
- make_tuple(64, 32, highbd_sad64x32_avg_sse2_vp9, 8),
- make_tuple(64, 64, highbd_sad64x64_avg_sse2_vp9, 8),
- make_tuple(8, 4, highbd_sad8x4_avg_sse2_vp9, 10),
- make_tuple(8, 8, highbd_sad8x8_avg_sse2_vp9, 10),
- make_tuple(8, 16, highbd_sad8x16_avg_sse2_vp9, 10),
- make_tuple(16, 8, highbd_sad16x8_avg_sse2_vp9, 10),
- make_tuple(16, 16, highbd_sad16x16_avg_sse2_vp9, 10),
- make_tuple(16, 32, highbd_sad16x32_avg_sse2_vp9, 10),
- make_tuple(32, 16, highbd_sad32x16_avg_sse2_vp9, 10),
- make_tuple(32, 32, highbd_sad32x32_avg_sse2_vp9, 10),
- make_tuple(32, 64, highbd_sad32x64_avg_sse2_vp9, 10),
- make_tuple(64, 32, highbd_sad64x32_avg_sse2_vp9, 10),
- make_tuple(64, 64, highbd_sad64x64_avg_sse2_vp9, 10),
- make_tuple(8, 4, highbd_sad8x4_avg_sse2_vp9, 12),
- make_tuple(8, 8, highbd_sad8x8_avg_sse2_vp9, 12),
- make_tuple(8, 16, highbd_sad8x16_avg_sse2_vp9, 12),
- make_tuple(16, 8, highbd_sad16x8_avg_sse2_vp9, 12),
- make_tuple(16, 16, highbd_sad16x16_avg_sse2_vp9, 12),
- make_tuple(16, 32, highbd_sad16x32_avg_sse2_vp9, 12),
- make_tuple(32, 16, highbd_sad32x16_avg_sse2_vp9, 12),
- make_tuple(32, 32, highbd_sad32x32_avg_sse2_vp9, 12),
- make_tuple(32, 64, highbd_sad32x64_avg_sse2_vp9, 12),
- make_tuple(64, 32, highbd_sad64x32_avg_sse2_vp9, 12),
- make_tuple(64, 64, highbd_sad64x64_avg_sse2_vp9, 12)));
-
-const SadMxNx4Func highbd_sad_64x64x4d_sse2 = vp9_highbd_sad64x64x4d_sse2;
-const SadMxNx4Func highbd_sad_64x32x4d_sse2 = vp9_highbd_sad64x32x4d_sse2;
-const SadMxNx4Func highbd_sad_32x64x4d_sse2 = vp9_highbd_sad32x64x4d_sse2;
-const SadMxNx4Func highbd_sad_32x32x4d_sse2 = vp9_highbd_sad32x32x4d_sse2;
-const SadMxNx4Func highbd_sad_32x16x4d_sse2 = vp9_highbd_sad32x16x4d_sse2;
-const SadMxNx4Func highbd_sad_16x32x4d_sse2 = vp9_highbd_sad16x32x4d_sse2;
-const SadMxNx4Func highbd_sad_16x16x4d_sse2 = vp9_highbd_sad16x16x4d_sse2;
-const SadMxNx4Func highbd_sad_16x8x4d_sse2 = vp9_highbd_sad16x8x4d_sse2;
-const SadMxNx4Func highbd_sad_8x16x4d_sse2 = vp9_highbd_sad8x16x4d_sse2;
-const SadMxNx4Func highbd_sad_8x8x4d_sse2 = vp9_highbd_sad8x8x4d_sse2;
-const SadMxNx4Func highbd_sad_8x4x4d_sse2 = vp9_highbd_sad8x4x4d_sse2;
-const SadMxNx4Func highbd_sad_4x8x4d_sse2 = vp9_highbd_sad4x8x4d_sse2;
-const SadMxNx4Func highbd_sad_4x4x4d_sse2 = vp9_highbd_sad4x4x4d_sse2;
-
-INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::Values(
- make_tuple(64, 64, sad_64x64x4d_sse2, -1),
- make_tuple(64, 32, sad_64x32x4d_sse2, -1),
- make_tuple(32, 64, sad_32x64x4d_sse2, -1),
- make_tuple(32, 32, sad_32x32x4d_sse2, -1),
- make_tuple(32, 16, sad_32x16x4d_sse2, -1),
- make_tuple(16, 32, sad_16x32x4d_sse2, -1),
- make_tuple(16, 16, sad_16x16x4d_sse2, -1),
- make_tuple(16, 8, sad_16x8x4d_sse2, -1),
- make_tuple(8, 16, sad_8x16x4d_sse2, -1),
- make_tuple(8, 8, sad_8x8x4d_sse2, -1),
- make_tuple(8, 4, sad_8x4x4d_sse2, -1),
- make_tuple(64, 64, highbd_sad_64x64x4d_sse2, 8),
- make_tuple(64, 32, highbd_sad_64x32x4d_sse2, 8),
- make_tuple(32, 64, highbd_sad_32x64x4d_sse2, 8),
- make_tuple(32, 32, highbd_sad_32x32x4d_sse2, 8),
- make_tuple(32, 16, highbd_sad_32x16x4d_sse2, 8),
- make_tuple(16, 32, highbd_sad_16x32x4d_sse2, 8),
- make_tuple(16, 16, highbd_sad_16x16x4d_sse2, 8),
- make_tuple(16, 8, highbd_sad_16x8x4d_sse2, 8),
- make_tuple(8, 16, highbd_sad_8x16x4d_sse2, 8),
- make_tuple(8, 8, highbd_sad_8x8x4d_sse2, 8),
- make_tuple(8, 4, highbd_sad_8x4x4d_sse2, 8),
- make_tuple(4, 8, highbd_sad_4x8x4d_sse2, 8),
- make_tuple(4, 4, highbd_sad_4x4x4d_sse2, 8),
- make_tuple(64, 64, highbd_sad_64x64x4d_sse2, 10),
- make_tuple(64, 32, highbd_sad_64x32x4d_sse2, 10),
- make_tuple(32, 64, highbd_sad_32x64x4d_sse2, 10),
- make_tuple(32, 32, highbd_sad_32x32x4d_sse2, 10),
- make_tuple(32, 16, highbd_sad_32x16x4d_sse2, 10),
- make_tuple(16, 32, highbd_sad_16x32x4d_sse2, 10),
- make_tuple(16, 16, highbd_sad_16x16x4d_sse2, 10),
- make_tuple(16, 8, highbd_sad_16x8x4d_sse2, 10),
- make_tuple(8, 16, highbd_sad_8x16x4d_sse2, 10),
- make_tuple(8, 8, highbd_sad_8x8x4d_sse2, 10),
- make_tuple(8, 4, highbd_sad_8x4x4d_sse2, 10),
- make_tuple(4, 8, highbd_sad_4x8x4d_sse2, 10),
- make_tuple(4, 4, highbd_sad_4x4x4d_sse2, 10),
- make_tuple(64, 64, highbd_sad_64x64x4d_sse2, 12),
- make_tuple(64, 32, highbd_sad_64x32x4d_sse2, 12),
- make_tuple(32, 64, highbd_sad_32x64x4d_sse2, 12),
- make_tuple(32, 32, highbd_sad_32x32x4d_sse2, 12),
- make_tuple(32, 16, highbd_sad_32x16x4d_sse2, 12),
- make_tuple(16, 32, highbd_sad_16x32x4d_sse2, 12),
- make_tuple(16, 16, highbd_sad_16x16x4d_sse2, 12),
- make_tuple(16, 8, highbd_sad_16x8x4d_sse2, 12),
- make_tuple(8, 16, highbd_sad_8x16x4d_sse2, 12),
- make_tuple(8, 8, highbd_sad_8x8x4d_sse2, 12),
- make_tuple(8, 4, highbd_sad_8x4x4d_sse2, 12),
- make_tuple(4, 8, highbd_sad_4x8x4d_sse2, 12),
- make_tuple(4, 4, highbd_sad_4x4x4d_sse2, 12)));
-#else
-INSTANTIATE_TEST_CASE_P(SSE2, SADVP9Test, ::testing::Values(
- make_tuple(64, 64, sad_64x64_sse2_vp9, -1),
- make_tuple(64, 32, sad_64x32_sse2_vp9, -1),
- make_tuple(32, 64, sad_32x64_sse2_vp9, -1),
- make_tuple(32, 32, sad_32x32_sse2_vp9, -1),
- make_tuple(32, 16, sad_32x16_sse2_vp9, -1),
- make_tuple(16, 32, sad_16x32_sse2_vp9, -1),
- make_tuple(16, 16, sad_16x16_sse2_vp9, -1),
- make_tuple(16, 8, sad_16x8_sse2_vp9, -1),
- make_tuple(8, 16, sad_8x16_sse2_vp9, -1),
- make_tuple(8, 8, sad_8x8_sse2_vp9, -1),
- make_tuple(8, 4, sad_8x4_sse2_vp9, -1)));
-
-INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::Values(
- make_tuple(64, 64, sad_64x64x4d_sse2, -1),
- make_tuple(64, 32, sad_64x32x4d_sse2, -1),
- make_tuple(32, 64, sad_32x64x4d_sse2, -1),
- make_tuple(32, 32, sad_32x32x4d_sse2, -1),
- make_tuple(32, 16, sad_32x16x4d_sse2, -1),
- make_tuple(16, 32, sad_16x32x4d_sse2, -1),
- make_tuple(16, 16, sad_16x16x4d_sse2, -1),
- make_tuple(16, 8, sad_16x8x4d_sse2, -1),
- make_tuple(8, 16, sad_8x16x4d_sse2, -1),
- make_tuple(8, 8, sad_8x8x4d_sse2, -1),
- make_tuple(8, 4, sad_8x4x4d_sse2, -1)));
+const SadMxNAvgFunc highbd_sad64x64_avg_sse2 = vpx_highbd_sad64x64_avg_sse2;
+const SadMxNAvgFunc highbd_sad64x32_avg_sse2 = vpx_highbd_sad64x32_avg_sse2;
+const SadMxNAvgFunc highbd_sad32x64_avg_sse2 = vpx_highbd_sad32x64_avg_sse2;
+const SadMxNAvgFunc highbd_sad32x32_avg_sse2 = vpx_highbd_sad32x32_avg_sse2;
+const SadMxNAvgFunc highbd_sad32x16_avg_sse2 = vpx_highbd_sad32x16_avg_sse2;
+const SadMxNAvgFunc highbd_sad16x32_avg_sse2 = vpx_highbd_sad16x32_avg_sse2;
+const SadMxNAvgFunc highbd_sad16x16_avg_sse2 = vpx_highbd_sad16x16_avg_sse2;
+const SadMxNAvgFunc highbd_sad16x8_avg_sse2 = vpx_highbd_sad16x8_avg_sse2;
+const SadMxNAvgFunc highbd_sad8x16_avg_sse2 = vpx_highbd_sad8x16_avg_sse2;
+const SadMxNAvgFunc highbd_sad8x8_avg_sse2 = vpx_highbd_sad8x8_avg_sse2;
+const SadMxNAvgFunc highbd_sad8x4_avg_sse2 = vpx_highbd_sad8x4_avg_sse2;
#endif // CONFIG_VP9_HIGHBITDEPTH
+const SadMxNAvgParam avg_sse2_tests[] = {
+ make_tuple(64, 64, sad64x64_avg_sse2, -1),
+ make_tuple(64, 32, sad64x32_avg_sse2, -1),
+ make_tuple(32, 64, sad32x64_avg_sse2, -1),
+ make_tuple(32, 32, sad32x32_avg_sse2, -1),
+ make_tuple(32, 16, sad32x16_avg_sse2, -1),
+ make_tuple(16, 32, sad16x32_avg_sse2, -1),
+ make_tuple(16, 16, sad16x16_avg_sse2, -1),
+ make_tuple(16, 8, sad16x8_avg_sse2, -1),
+ make_tuple(8, 16, sad8x16_avg_sse2, -1),
+ make_tuple(8, 8, sad8x8_avg_sse2, -1),
+ make_tuple(8, 4, sad8x4_avg_sse2, -1),
+#if CONFIG_VP9_HIGHBITDEPTH
+ make_tuple(64, 64, highbd_sad64x64_avg_sse2, 8),
+ make_tuple(64, 32, highbd_sad64x32_avg_sse2, 8),
+ make_tuple(32, 64, highbd_sad32x64_avg_sse2, 8),
+ make_tuple(32, 32, highbd_sad32x32_avg_sse2, 8),
+ make_tuple(32, 16, highbd_sad32x16_avg_sse2, 8),
+ make_tuple(16, 32, highbd_sad16x32_avg_sse2, 8),
+ make_tuple(16, 16, highbd_sad16x16_avg_sse2, 8),
+ make_tuple(16, 8, highbd_sad16x8_avg_sse2, 8),
+ make_tuple(8, 16, highbd_sad8x16_avg_sse2, 8),
+ make_tuple(8, 8, highbd_sad8x8_avg_sse2, 8),
+ make_tuple(8, 4, highbd_sad8x4_avg_sse2, 8),
+ make_tuple(64, 64, highbd_sad64x64_avg_sse2, 10),
+ make_tuple(64, 32, highbd_sad64x32_avg_sse2, 10),
+ make_tuple(32, 64, highbd_sad32x64_avg_sse2, 10),
+ make_tuple(32, 32, highbd_sad32x32_avg_sse2, 10),
+ make_tuple(32, 16, highbd_sad32x16_avg_sse2, 10),
+ make_tuple(16, 32, highbd_sad16x32_avg_sse2, 10),
+ make_tuple(16, 16, highbd_sad16x16_avg_sse2, 10),
+ make_tuple(16, 8, highbd_sad16x8_avg_sse2, 10),
+ make_tuple(8, 16, highbd_sad8x16_avg_sse2, 10),
+ make_tuple(8, 8, highbd_sad8x8_avg_sse2, 10),
+ make_tuple(8, 4, highbd_sad8x4_avg_sse2, 10),
+ make_tuple(64, 64, highbd_sad64x64_avg_sse2, 12),
+ make_tuple(64, 32, highbd_sad64x32_avg_sse2, 12),
+ make_tuple(32, 64, highbd_sad32x64_avg_sse2, 12),
+ make_tuple(32, 32, highbd_sad32x32_avg_sse2, 12),
+ make_tuple(32, 16, highbd_sad32x16_avg_sse2, 12),
+ make_tuple(16, 32, highbd_sad16x32_avg_sse2, 12),
+ make_tuple(16, 16, highbd_sad16x16_avg_sse2, 12),
+ make_tuple(16, 8, highbd_sad16x8_avg_sse2, 12),
+ make_tuple(8, 16, highbd_sad8x16_avg_sse2, 12),
+ make_tuple(8, 8, highbd_sad8x8_avg_sse2, 12),
+ make_tuple(8, 4, highbd_sad8x4_avg_sse2, 12),
+#endif // CONFIG_VP9_HIGHBITDEPTH
+};
+INSTANTIATE_TEST_CASE_P(SSE2, SADavgTest, ::testing::ValuesIn(avg_sse2_tests));
+
+const SadMxNx4Func sad64x64x4d_sse2 = vpx_sad64x64x4d_sse2;
+const SadMxNx4Func sad64x32x4d_sse2 = vpx_sad64x32x4d_sse2;
+const SadMxNx4Func sad32x64x4d_sse2 = vpx_sad32x64x4d_sse2;
+const SadMxNx4Func sad32x32x4d_sse2 = vpx_sad32x32x4d_sse2;
+const SadMxNx4Func sad32x16x4d_sse2 = vpx_sad32x16x4d_sse2;
+const SadMxNx4Func sad16x32x4d_sse2 = vpx_sad16x32x4d_sse2;
+const SadMxNx4Func sad16x16x4d_sse2 = vpx_sad16x16x4d_sse2;
+const SadMxNx4Func sad16x8x4d_sse2 = vpx_sad16x8x4d_sse2;
+const SadMxNx4Func sad8x16x4d_sse2 = vpx_sad8x16x4d_sse2;
+const SadMxNx4Func sad8x8x4d_sse2 = vpx_sad8x8x4d_sse2;
+const SadMxNx4Func sad8x4x4d_sse2 = vpx_sad8x4x4d_sse2;
+#if CONFIG_VP9_HIGHBITDEPTH
+const SadMxNx4Func highbd_sad64x64x4d_sse2 = vpx_highbd_sad64x64x4d_sse2;
+const SadMxNx4Func highbd_sad64x32x4d_sse2 = vpx_highbd_sad64x32x4d_sse2;
+const SadMxNx4Func highbd_sad32x64x4d_sse2 = vpx_highbd_sad32x64x4d_sse2;
+const SadMxNx4Func highbd_sad32x32x4d_sse2 = vpx_highbd_sad32x32x4d_sse2;
+const SadMxNx4Func highbd_sad32x16x4d_sse2 = vpx_highbd_sad32x16x4d_sse2;
+const SadMxNx4Func highbd_sad16x32x4d_sse2 = vpx_highbd_sad16x32x4d_sse2;
+const SadMxNx4Func highbd_sad16x16x4d_sse2 = vpx_highbd_sad16x16x4d_sse2;
+const SadMxNx4Func highbd_sad16x8x4d_sse2 = vpx_highbd_sad16x8x4d_sse2;
+const SadMxNx4Func highbd_sad8x16x4d_sse2 = vpx_highbd_sad8x16x4d_sse2;
+const SadMxNx4Func highbd_sad8x8x4d_sse2 = vpx_highbd_sad8x8x4d_sse2;
+const SadMxNx4Func highbd_sad8x4x4d_sse2 = vpx_highbd_sad8x4x4d_sse2;
+const SadMxNx4Func highbd_sad4x8x4d_sse2 = vpx_highbd_sad4x8x4d_sse2;
+const SadMxNx4Func highbd_sad4x4x4d_sse2 = vpx_highbd_sad4x4x4d_sse2;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+const SadMxNx4Param x4d_sse2_tests[] = {
+ make_tuple(64, 64, sad64x64x4d_sse2, -1),
+ make_tuple(64, 32, sad64x32x4d_sse2, -1),
+ make_tuple(32, 64, sad32x64x4d_sse2, -1),
+ make_tuple(32, 32, sad32x32x4d_sse2, -1),
+ make_tuple(32, 16, sad32x16x4d_sse2, -1),
+ make_tuple(16, 32, sad16x32x4d_sse2, -1),
+ make_tuple(16, 16, sad16x16x4d_sse2, -1),
+ make_tuple(16, 8, sad16x8x4d_sse2, -1),
+ make_tuple(8, 16, sad8x16x4d_sse2, -1),
+ make_tuple(8, 8, sad8x8x4d_sse2, -1),
+ make_tuple(8, 4, sad8x4x4d_sse2, -1),
+#if CONFIG_VP9_HIGHBITDEPTH
+ make_tuple(64, 64, highbd_sad64x64x4d_sse2, 8),
+ make_tuple(64, 32, highbd_sad64x32x4d_sse2, 8),
+ make_tuple(32, 64, highbd_sad32x64x4d_sse2, 8),
+ make_tuple(32, 32, highbd_sad32x32x4d_sse2, 8),
+ make_tuple(32, 16, highbd_sad32x16x4d_sse2, 8),
+ make_tuple(16, 32, highbd_sad16x32x4d_sse2, 8),
+ make_tuple(16, 16, highbd_sad16x16x4d_sse2, 8),
+ make_tuple(16, 8, highbd_sad16x8x4d_sse2, 8),
+ make_tuple(8, 16, highbd_sad8x16x4d_sse2, 8),
+ make_tuple(8, 8, highbd_sad8x8x4d_sse2, 8),
+ make_tuple(8, 4, highbd_sad8x4x4d_sse2, 8),
+ make_tuple(4, 8, highbd_sad4x8x4d_sse2, 8),
+ make_tuple(4, 4, highbd_sad4x4x4d_sse2, 8),
+ make_tuple(64, 64, highbd_sad64x64x4d_sse2, 10),
+ make_tuple(64, 32, highbd_sad64x32x4d_sse2, 10),
+ make_tuple(32, 64, highbd_sad32x64x4d_sse2, 10),
+ make_tuple(32, 32, highbd_sad32x32x4d_sse2, 10),
+ make_tuple(32, 16, highbd_sad32x16x4d_sse2, 10),
+ make_tuple(16, 32, highbd_sad16x32x4d_sse2, 10),
+ make_tuple(16, 16, highbd_sad16x16x4d_sse2, 10),
+ make_tuple(16, 8, highbd_sad16x8x4d_sse2, 10),
+ make_tuple(8, 16, highbd_sad8x16x4d_sse2, 10),
+ make_tuple(8, 8, highbd_sad8x8x4d_sse2, 10),
+ make_tuple(8, 4, highbd_sad8x4x4d_sse2, 10),
+ make_tuple(4, 8, highbd_sad4x8x4d_sse2, 10),
+ make_tuple(4, 4, highbd_sad4x4x4d_sse2, 10),
+ make_tuple(64, 64, highbd_sad64x64x4d_sse2, 12),
+ make_tuple(64, 32, highbd_sad64x32x4d_sse2, 12),
+ make_tuple(32, 64, highbd_sad32x64x4d_sse2, 12),
+ make_tuple(32, 32, highbd_sad32x32x4d_sse2, 12),
+ make_tuple(32, 16, highbd_sad32x16x4d_sse2, 12),
+ make_tuple(16, 32, highbd_sad16x32x4d_sse2, 12),
+ make_tuple(16, 16, highbd_sad16x16x4d_sse2, 12),
+ make_tuple(16, 8, highbd_sad16x8x4d_sse2, 12),
+ make_tuple(8, 16, highbd_sad8x16x4d_sse2, 12),
+ make_tuple(8, 8, highbd_sad8x8x4d_sse2, 12),
+ make_tuple(8, 4, highbd_sad8x4x4d_sse2, 12),
+ make_tuple(4, 8, highbd_sad4x8x4d_sse2, 12),
+ make_tuple(4, 4, highbd_sad4x4x4d_sse2, 12),
+#endif // CONFIG_VP9_HIGHBITDEPTH
+};
+INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::ValuesIn(x4d_sse2_tests));
#endif // CONFIG_USE_X86INC
-#endif // CONFIG_VP9_ENCODER
#endif // HAVE_SSE2
#if HAVE_SSE3
-#if CONFIG_VP8_ENCODER
-const SadMxNx4Func sad_16x16x4d_sse3 = vp8_sad16x16x4d_sse3;
-const SadMxNx4Func sad_16x8x4d_sse3 = vp8_sad16x8x4d_sse3;
-const SadMxNx4Func sad_8x16x4d_sse3 = vp8_sad8x16x4d_sse3;
-const SadMxNx4Func sad_8x8x4d_sse3 = vp8_sad8x8x4d_sse3;
-const SadMxNx4Func sad_4x4x4d_sse3 = vp8_sad4x4x4d_sse3;
-INSTANTIATE_TEST_CASE_P(SSE3, SADx4Test, ::testing::Values(
- make_tuple(16, 16, sad_16x16x4d_sse3, -1),
- make_tuple(16, 8, sad_16x8x4d_sse3, -1),
- make_tuple(8, 16, sad_8x16x4d_sse3, -1),
- make_tuple(8, 8, sad_8x8x4d_sse3, -1),
- make_tuple(4, 4, sad_4x4x4d_sse3, -1)));
-#endif // CONFIG_VP8_ENCODER
+// Only functions are x3, which do not have tests.
#endif // HAVE_SSE3
#if HAVE_SSSE3
-#if CONFIG_USE_X86INC
-#if CONFIG_VP8_ENCODER
-const SadMxNFunc sad_16x16_sse3 = vp8_sad16x16_sse3;
-INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values(
- make_tuple(16, 16, sad_16x16_sse3, -1)));
-#endif // CONFIG_VP8_ENCODER
-#endif // CONFIG_USE_X86INC
+// Only functions are x3, which do not have tests.
#endif // HAVE_SSSE3
-#if CONFIG_VP9_ENCODER
-#if HAVE_AVX2
-const SadMxNx4Func sad_64x64x4d_avx2 = vp9_sad64x64x4d_avx2;
-const SadMxNx4Func sad_32x32x4d_avx2 = vp9_sad32x32x4d_avx2;
-INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::Values(
- make_tuple(32, 32, sad_32x32x4d_avx2, -1),
- make_tuple(64, 64, sad_64x64x4d_avx2, -1)));
-#endif // HAVE_AVX2
+#if HAVE_SSE4_1
+// Only functions are x8, which do not have tests.
+#endif // HAVE_SSE4_1
-#if HAVE_NEON
-const SadMxNx4Func sad_16x16x4d_neon = vp9_sad16x16x4d_neon;
-const SadMxNx4Func sad_32x32x4d_neon = vp9_sad32x32x4d_neon;
-const SadMxNx4Func sad_64x64x4d_neon = vp9_sad64x64x4d_neon;
-INSTANTIATE_TEST_CASE_P(NEON, SADx4Test, ::testing::Values(
- make_tuple(16, 16, sad_16x16x4d_neon, -1),
- make_tuple(32, 32, sad_32x32x4d_neon, -1),
- make_tuple(64, 64, sad_64x64x4d_neon, -1)));
-#endif // HAVE_NEON
-#endif // CONFIG_VP9_ENCODER
+#if HAVE_AVX2
+const SadMxNFunc sad64x64_avx2 = vpx_sad64x64_avx2;
+const SadMxNFunc sad64x32_avx2 = vpx_sad64x32_avx2;
+const SadMxNFunc sad32x64_avx2 = vpx_sad32x64_avx2;
+const SadMxNFunc sad32x32_avx2 = vpx_sad32x32_avx2;
+const SadMxNFunc sad32x16_avx2 = vpx_sad32x16_avx2;
+const SadMxNParam avx2_tests[] = {
+ make_tuple(64, 64, sad64x64_avx2, -1),
+ make_tuple(64, 32, sad64x32_avx2, -1),
+ make_tuple(32, 64, sad32x64_avx2, -1),
+ make_tuple(32, 32, sad32x32_avx2, -1),
+ make_tuple(32, 16, sad32x16_avx2, -1),
+};
+INSTANTIATE_TEST_CASE_P(AVX2, SADTest, ::testing::ValuesIn(avx2_tests));
+
+const SadMxNAvgFunc sad64x64_avg_avx2 = vpx_sad64x64_avg_avx2;
+const SadMxNAvgFunc sad64x32_avg_avx2 = vpx_sad64x32_avg_avx2;
+const SadMxNAvgFunc sad32x64_avg_avx2 = vpx_sad32x64_avg_avx2;
+const SadMxNAvgFunc sad32x32_avg_avx2 = vpx_sad32x32_avg_avx2;
+const SadMxNAvgFunc sad32x16_avg_avx2 = vpx_sad32x16_avg_avx2;
+const SadMxNAvgParam avg_avx2_tests[] = {
+ make_tuple(64, 64, sad64x64_avg_avx2, -1),
+ make_tuple(64, 32, sad64x32_avg_avx2, -1),
+ make_tuple(32, 64, sad32x64_avg_avx2, -1),
+ make_tuple(32, 32, sad32x32_avg_avx2, -1),
+ make_tuple(32, 16, sad32x16_avg_avx2, -1),
+};
+INSTANTIATE_TEST_CASE_P(AVX2, SADavgTest, ::testing::ValuesIn(avg_avx2_tests));
+
+const SadMxNx4Func sad64x64x4d_avx2 = vpx_sad64x64x4d_avx2;
+const SadMxNx4Func sad32x32x4d_avx2 = vpx_sad32x32x4d_avx2;
+const SadMxNx4Param x4d_avx2_tests[] = {
+ make_tuple(64, 64, sad64x64x4d_avx2, -1),
+ make_tuple(32, 32, sad32x32x4d_avx2, -1),
+};
+INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::ValuesIn(x4d_avx2_tests));
+#endif // HAVE_AVX2
} // namespace
diff --git a/test/test_libvpx.cc b/test/test_libvpx.cc
index 30a5255..edbeec2 100644
--- a/test/test_libvpx.cc
+++ b/test/test_libvpx.cc
@@ -19,6 +19,7 @@
#if CONFIG_VP9
extern void vp9_rtcd();
#endif // CONFIG_VP9
+extern void vpx_dsp_rtcd();
extern void vpx_scale_rtcd();
}
#include "third_party/googletest/src/include/gtest/gtest.h"
@@ -64,6 +65,7 @@
#if CONFIG_VP9
vp9_rtcd();
#endif // CONFIG_VP9
+ vpx_dsp_rtcd();
vpx_scale_rtcd();
#endif // !CONFIG_SHARED
diff --git a/vp8/common/arm/neon/sad_neon.c b/vp8/common/arm/neon/sad_neon.c
deleted file mode 100644
index 6595ac0..0000000
--- a/vp8/common/arm/neon/sad_neon.c
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <arm_neon.h>
-
-unsigned int vp8_sad8x8_neon(
- unsigned char *src_ptr,
- int src_stride,
- unsigned char *ref_ptr,
- int ref_stride) {
- uint8x8_t d0, d8;
- uint16x8_t q12;
- uint32x4_t q1;
- uint64x2_t q3;
- uint32x2_t d5;
- int i;
-
- d0 = vld1_u8(src_ptr);
- src_ptr += src_stride;
- d8 = vld1_u8(ref_ptr);
- ref_ptr += ref_stride;
- q12 = vabdl_u8(d0, d8);
-
- for (i = 0; i < 7; i++) {
- d0 = vld1_u8(src_ptr);
- src_ptr += src_stride;
- d8 = vld1_u8(ref_ptr);
- ref_ptr += ref_stride;
- q12 = vabal_u8(q12, d0, d8);
- }
-
- q1 = vpaddlq_u16(q12);
- q3 = vpaddlq_u32(q1);
- d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
- vreinterpret_u32_u64(vget_high_u64(q3)));
-
- return vget_lane_u32(d5, 0);
-}
-
-unsigned int vp8_sad8x16_neon(
- unsigned char *src_ptr,
- int src_stride,
- unsigned char *ref_ptr,
- int ref_stride) {
- uint8x8_t d0, d8;
- uint16x8_t q12;
- uint32x4_t q1;
- uint64x2_t q3;
- uint32x2_t d5;
- int i;
-
- d0 = vld1_u8(src_ptr);
- src_ptr += src_stride;
- d8 = vld1_u8(ref_ptr);
- ref_ptr += ref_stride;
- q12 = vabdl_u8(d0, d8);
-
- for (i = 0; i < 15; i++) {
- d0 = vld1_u8(src_ptr);
- src_ptr += src_stride;
- d8 = vld1_u8(ref_ptr);
- ref_ptr += ref_stride;
- q12 = vabal_u8(q12, d0, d8);
- }
-
- q1 = vpaddlq_u16(q12);
- q3 = vpaddlq_u32(q1);
- d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
- vreinterpret_u32_u64(vget_high_u64(q3)));
-
- return vget_lane_u32(d5, 0);
-}
-
-unsigned int vp8_sad4x4_neon(
- unsigned char *src_ptr,
- int src_stride,
- unsigned char *ref_ptr,
- int ref_stride) {
- uint8x8_t d0, d8;
- uint16x8_t q12;
- uint32x2_t d1;
- uint64x1_t d3;
- int i;
-
- d0 = vld1_u8(src_ptr);
- src_ptr += src_stride;
- d8 = vld1_u8(ref_ptr);
- ref_ptr += ref_stride;
- q12 = vabdl_u8(d0, d8);
-
- for (i = 0; i < 3; i++) {
- d0 = vld1_u8(src_ptr);
- src_ptr += src_stride;
- d8 = vld1_u8(ref_ptr);
- ref_ptr += ref_stride;
- q12 = vabal_u8(q12, d0, d8);
- }
-
- d1 = vpaddl_u16(vget_low_u16(q12));
- d3 = vpaddl_u32(d1);
-
- return vget_lane_u32(vreinterpret_u32_u64(d3), 0);
-}
-
-unsigned int vp8_sad16x16_neon(
- unsigned char *src_ptr,
- int src_stride,
- unsigned char *ref_ptr,
- int ref_stride) {
- uint8x16_t q0, q4;
- uint16x8_t q12, q13;
- uint32x4_t q1;
- uint64x2_t q3;
- uint32x2_t d5;
- int i;
-
- q0 = vld1q_u8(src_ptr);
- src_ptr += src_stride;
- q4 = vld1q_u8(ref_ptr);
- ref_ptr += ref_stride;
- q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4));
- q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4));
-
- for (i = 0; i < 15; i++) {
- q0 = vld1q_u8(src_ptr);
- src_ptr += src_stride;
- q4 = vld1q_u8(ref_ptr);
- ref_ptr += ref_stride;
- q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4));
- q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4));
- }
-
- q12 = vaddq_u16(q12, q13);
- q1 = vpaddlq_u16(q12);
- q3 = vpaddlq_u32(q1);
- d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
- vreinterpret_u32_u64(vget_high_u64(q3)));
-
- return vget_lane_u32(d5, 0);
-}
-
-unsigned int vp8_sad16x8_neon(
- unsigned char *src_ptr,
- int src_stride,
- unsigned char *ref_ptr,
- int ref_stride) {
- uint8x16_t q0, q4;
- uint16x8_t q12, q13;
- uint32x4_t q1;
- uint64x2_t q3;
- uint32x2_t d5;
- int i;
-
- q0 = vld1q_u8(src_ptr);
- src_ptr += src_stride;
- q4 = vld1q_u8(ref_ptr);
- ref_ptr += ref_stride;
- q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4));
- q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4));
-
- for (i = 0; i < 7; i++) {
- q0 = vld1q_u8(src_ptr);
- src_ptr += src_stride;
- q4 = vld1q_u8(ref_ptr);
- ref_ptr += ref_stride;
- q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4));
- q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4));
- }
-
- q12 = vaddq_u16(q12, q13);
- q1 = vpaddlq_u16(q12);
- q3 = vpaddlq_u32(q1);
- d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
- vreinterpret_u32_u64(vget_high_u64(q3)));
-
- return vget_lane_u32(d5, 0);
-}
diff --git a/vp8/common/copy_c.c b/vp8/common/copy_c.c
new file mode 100644
index 0000000..febfcb2
--- /dev/null
+++ b/vp8/common/copy_c.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_config.h"
+#include "vpx/vpx_integer.h"
+
+/* Copy 2 macroblocks to a buffer */
+void vp8_copy32xn_c(unsigned char *src_ptr, int src_stride,
+ unsigned char *dst_ptr, int dst_stride,
+ int height)
+{
+ int r;
+
+ for (r = 0; r < height; r++)
+ {
+#if !(CONFIG_FAST_UNALIGNED)
+ dst_ptr[0] = src_ptr[0];
+ dst_ptr[1] = src_ptr[1];
+ dst_ptr[2] = src_ptr[2];
+ dst_ptr[3] = src_ptr[3];
+ dst_ptr[4] = src_ptr[4];
+ dst_ptr[5] = src_ptr[5];
+ dst_ptr[6] = src_ptr[6];
+ dst_ptr[7] = src_ptr[7];
+ dst_ptr[8] = src_ptr[8];
+ dst_ptr[9] = src_ptr[9];
+ dst_ptr[10] = src_ptr[10];
+ dst_ptr[11] = src_ptr[11];
+ dst_ptr[12] = src_ptr[12];
+ dst_ptr[13] = src_ptr[13];
+ dst_ptr[14] = src_ptr[14];
+ dst_ptr[15] = src_ptr[15];
+ dst_ptr[16] = src_ptr[16];
+ dst_ptr[17] = src_ptr[17];
+ dst_ptr[18] = src_ptr[18];
+ dst_ptr[19] = src_ptr[19];
+ dst_ptr[20] = src_ptr[20];
+ dst_ptr[21] = src_ptr[21];
+ dst_ptr[22] = src_ptr[22];
+ dst_ptr[23] = src_ptr[23];
+ dst_ptr[24] = src_ptr[24];
+ dst_ptr[25] = src_ptr[25];
+ dst_ptr[26] = src_ptr[26];
+ dst_ptr[27] = src_ptr[27];
+ dst_ptr[28] = src_ptr[28];
+ dst_ptr[29] = src_ptr[29];
+ dst_ptr[30] = src_ptr[30];
+ dst_ptr[31] = src_ptr[31];
+#else
+ ((uint32_t *)dst_ptr)[0] = ((uint32_t *)src_ptr)[0] ;
+ ((uint32_t *)dst_ptr)[1] = ((uint32_t *)src_ptr)[1] ;
+ ((uint32_t *)dst_ptr)[2] = ((uint32_t *)src_ptr)[2] ;
+ ((uint32_t *)dst_ptr)[3] = ((uint32_t *)src_ptr)[3] ;
+ ((uint32_t *)dst_ptr)[4] = ((uint32_t *)src_ptr)[4] ;
+ ((uint32_t *)dst_ptr)[5] = ((uint32_t *)src_ptr)[5] ;
+ ((uint32_t *)dst_ptr)[6] = ((uint32_t *)src_ptr)[6] ;
+ ((uint32_t *)dst_ptr)[7] = ((uint32_t *)src_ptr)[7] ;
+#endif
+ src_ptr += src_stride;
+ dst_ptr += dst_stride;
+
+ }
+}
diff --git a/vp8/common/mfqe.c b/vp8/common/mfqe.c
index eea6347..d12dea1 100644
--- a/vp8/common/mfqe.c
+++ b/vp8/common/mfqe.c
@@ -17,10 +17,11 @@
* higher quality.
*/
-#include "postproc.h"
-#include "variance.h"
+#include "./vp8_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vp8/common/postproc.h"
+#include "vp8/common/variance.h"
#include "vpx_mem/vpx_mem.h"
-#include "vp8_rtcd.h"
#include "vpx_scale/yv12config.h"
#include <limits.h>
@@ -160,9 +161,9 @@
vp8_variance8x8(v, uv_stride, vd, uvd_stride, &sse);
vsad = (sse + 32)>>6;
#else
- sad = (vp8_sad16x16(y, y_stride, yd, yd_stride, UINT_MAX) + 128) >> 8;
- usad = (vp8_sad8x8(u, uv_stride, ud, uvd_stride, UINT_MAX) + 32) >> 6;
- vsad = (vp8_sad8x8(v, uv_stride, vd, uvd_stride, UINT_MAX)+ 32) >> 6;
+ sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
+ usad = (vpx_sad8x8(u, uv_stride, ud, uvd_stride) + 32) >> 6;
+ vsad = (vpx_sad8x8(v, uv_stride, vd, uvd_stride)+ 32) >> 6;
#endif
}
else /* if (blksize == 8) */
@@ -177,9 +178,9 @@
vp8_variance4x4(v, uv_stride, vd, uvd_stride, &sse);
vsad = (sse + 8)>>4;
#else
- sad = (vp8_sad8x8(y, y_stride, yd, yd_stride, UINT_MAX) + 32) >> 6;
- usad = (vp8_sad4x4(u, uv_stride, ud, uvd_stride, UINT_MAX) + 8) >> 4;
- vsad = (vp8_sad4x4(v, uv_stride, vd, uvd_stride, UINT_MAX) + 8) >> 4;
+ sad = (vpx_sad8x8(y, y_stride, yd, yd_stride) + 32) >> 6;
+ usad = (vpx_sad4x4(u, uv_stride, ud, uvd_stride) + 8) >> 4;
+ vsad = (vpx_sad4x4(v, uv_stride, vd, uvd_stride) + 8) >> 4;
#endif
}
diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl
index 63fde4c..56b7db7 100644
--- a/vp8/common/rtcd_defs.pl
+++ b/vp8/common/rtcd_defs.pl
@@ -304,88 +304,6 @@
$vp8_variance_halfpixvar16x16_hv_media=vp8_variance_halfpixvar16x16_hv_armv6;
#
-# Single block SAD
-#
-add_proto qw/unsigned int vp8_sad4x4/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
-specialize qw/vp8_sad4x4 mmx sse2 neon/;
-$vp8_sad4x4_sse2=vp8_sad4x4_wmt;
-
-add_proto qw/unsigned int vp8_sad8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
-specialize qw/vp8_sad8x8 mmx sse2 neon/;
-$vp8_sad8x8_sse2=vp8_sad8x8_wmt;
-
-add_proto qw/unsigned int vp8_sad8x16/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
-specialize qw/vp8_sad8x16 mmx sse2 neon/;
-$vp8_sad8x16_sse2=vp8_sad8x16_wmt;
-
-add_proto qw/unsigned int vp8_sad16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
-specialize qw/vp8_sad16x8 mmx sse2 neon/;
-$vp8_sad16x8_sse2=vp8_sad16x8_wmt;
-
-add_proto qw/unsigned int vp8_sad16x16/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
-specialize qw/vp8_sad16x16 mmx sse2 sse3 media neon/;
-$vp8_sad16x16_sse2=vp8_sad16x16_wmt;
-$vp8_sad16x16_media=vp8_sad16x16_armv6;
-
-#
-# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
-#
-add_proto qw/void vp8_sad4x4x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array";
-specialize qw/vp8_sad4x4x3 sse3/;
-
-add_proto qw/void vp8_sad8x8x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array";
-specialize qw/vp8_sad8x8x3 sse3/;
-
-add_proto qw/void vp8_sad8x16x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array";
-specialize qw/vp8_sad8x16x3 sse3/;
-
-add_proto qw/void vp8_sad16x8x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array";
-specialize qw/vp8_sad16x8x3 sse3 ssse3/;
-
-add_proto qw/void vp8_sad16x16x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array";
-specialize qw/vp8_sad16x16x3 sse3 ssse3/;
-
-# Note the only difference in the following prototypes is that they return into
-# an array of short
-add_proto qw/void vp8_sad4x4x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array";
-specialize qw/vp8_sad4x4x8 sse4_1/;
-$vp8_sad4x4x8_sse4_1=vp8_sad4x4x8_sse4;
-
-add_proto qw/void vp8_sad8x8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array";
-specialize qw/vp8_sad8x8x8 sse4_1/;
-$vp8_sad8x8x8_sse4_1=vp8_sad8x8x8_sse4;
-
-add_proto qw/void vp8_sad8x16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array";
-specialize qw/vp8_sad8x16x8 sse4_1/;
-$vp8_sad8x16x8_sse4_1=vp8_sad8x16x8_sse4;
-
-add_proto qw/void vp8_sad16x8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array";
-specialize qw/vp8_sad16x8x8 sse4_1/;
-$vp8_sad16x8x8_sse4_1=vp8_sad16x8x8_sse4;
-
-add_proto qw/void vp8_sad16x16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array";
-specialize qw/vp8_sad16x16x8 sse4_1/;
-$vp8_sad16x16x8_sse4_1=vp8_sad16x16x8_sse4;
-
-#
-# Multi-block SAD, comparing a reference to N independent blocks
-#
-add_proto qw/void vp8_sad4x4x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array";
-specialize qw/vp8_sad4x4x4d sse3/;
-
-add_proto qw/void vp8_sad8x8x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array";
-specialize qw/vp8_sad8x8x4d sse3/;
-
-add_proto qw/void vp8_sad8x16x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array";
-specialize qw/vp8_sad8x16x4d sse3/;
-
-add_proto qw/void vp8_sad16x8x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array";
-specialize qw/vp8_sad16x8x4d sse3/;
-
-add_proto qw/void vp8_sad16x16x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array";
-specialize qw/vp8_sad16x16x4d sse3/;
-
-#
# Encoder functions below this point.
#
if (vpx_config("CONFIG_VP8_ENCODER") eq "yes") {
diff --git a/vp8/common/sad_c.c b/vp8/common/sad_c.c
deleted file mode 100644
index 5f36fc9..0000000
--- a/vp8/common/sad_c.c
+++ /dev/null
@@ -1,302 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include <limits.h>
-#include <stdlib.h>
-#include "vpx_config.h"
-#include "vpx/vpx_integer.h"
-
-static unsigned int sad_mx_n_c(const unsigned char *src_ptr, int src_stride,
- const unsigned char *ref_ptr, int ref_stride,
- unsigned int max_sad, int m, int n)
-{
- int r, c;
- unsigned int sad = 0;
-
- for (r = 0; r < n; r++)
- {
- for (c = 0; c < m; c++)
- {
- sad += abs(src_ptr[c] - ref_ptr[c]);
- }
-
- if (sad > max_sad)
- break;
-
- src_ptr += src_stride;
- ref_ptr += ref_stride;
- }
-
- return sad;
-}
-
-/* max_sad is provided as an optional optimization point. Alternative
- * implementations of these functions are not required to check it.
- */
-
-unsigned int vp8_sad16x16_c(const unsigned char *src_ptr, int src_stride,
- const unsigned char *ref_ptr, int ref_stride,
- unsigned int max_sad)
-{
- return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 16, 16);
-}
-
-unsigned int vp8_sad8x8_c(const unsigned char *src_ptr, int src_stride,
- const unsigned char *ref_ptr, int ref_stride,
- unsigned int max_sad)
-{
- return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 8, 8);
-}
-
-unsigned int vp8_sad16x8_c(const unsigned char *src_ptr, int src_stride,
- const unsigned char *ref_ptr, int ref_stride,
- unsigned int max_sad)
-{
- return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 16, 8);
-
-}
-
-unsigned int vp8_sad8x16_c(const unsigned char *src_ptr, int src_stride,
- const unsigned char *ref_ptr, int ref_stride,
- unsigned int max_sad)
-{
- return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 8, 16);
-}
-
-unsigned int vp8_sad4x4_c(const unsigned char *src_ptr, int src_stride,
- const unsigned char *ref_ptr, int ref_stride,
- unsigned int max_sad)
-{
- return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 4, 4);
-}
-
-void vp8_sad16x16x3_c(const unsigned char *src_ptr, int src_stride,
- const unsigned char *ref_ptr, int ref_stride,
- unsigned int *sad_array)
-{
- sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
- sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
- sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
-}
-
-void vp8_sad16x16x8_c(const unsigned char *src_ptr, int src_stride,
- const unsigned char *ref_ptr, int ref_stride,
- unsigned short *sad_array)
-{
- sad_array[0] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
- sad_array[1] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
- sad_array[2] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
- sad_array[3] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
- sad_array[4] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
- sad_array[5] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
- sad_array[6] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
- sad_array[7] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
-}
-
-void vp8_sad16x8x3_c(const unsigned char *src_ptr, int src_stride,
- const unsigned char *ref_ptr, int ref_stride,
- unsigned int *sad_array)
-{
- sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
- sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
- sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
-}
-
-void vp8_sad16x8x8_c(const unsigned char *src_ptr, int src_stride,
- const unsigned char *ref_ptr, int ref_stride,
- unsigned short *sad_array)
-{
- sad_array[0] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
- sad_array[1] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
- sad_array[2] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
- sad_array[3] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
- sad_array[4] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
- sad_array[5] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
- sad_array[6] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
- sad_array[7] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
-}
-
-void vp8_sad8x8x3_c(const unsigned char *src_ptr, int src_stride,
- const unsigned char *ref_ptr, int ref_stride,
- unsigned int *sad_array)
-{
- sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
- sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
- sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
-}
-
-void vp8_sad8x8x8_c(const unsigned char *src_ptr, int src_stride,
- const unsigned char *ref_ptr, int ref_stride,
- unsigned short *sad_array)
-{
- sad_array[0] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
- sad_array[1] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
- sad_array[2] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
- sad_array[3] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
- sad_array[4] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
- sad_array[5] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
- sad_array[6] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
- sad_array[7] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
-}
-
-void vp8_sad8x16x3_c(const unsigned char *src_ptr, int src_stride,
- const unsigned char *ref_ptr, int ref_stride,
- unsigned int *sad_array)
-{
- sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
- sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
- sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
-}
-
-void vp8_sad8x16x8_c(const unsigned char *src_ptr, int src_stride,
- const unsigned char *ref_ptr, int ref_stride,
- unsigned short *sad_array)
-{
- sad_array[0] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
- sad_array[1] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
- sad_array[2] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
- sad_array[3] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
- sad_array[4] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
- sad_array[5] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
- sad_array[6] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
- sad_array[7] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
-}
-
-void vp8_sad4x4x3_c(const unsigned char *src_ptr, int src_stride,
- const unsigned char *ref_ptr, int ref_stride,
- unsigned int *sad_array)
-{
- sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
- sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
- sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
-}
-
-void vp8_sad4x4x8_c(const unsigned char *src_ptr, int src_stride,
- const unsigned char *ref_ptr, int ref_stride,
- unsigned short *sad_array)
-{
- sad_array[0] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
- sad_array[1] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
- sad_array[2] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
- sad_array[3] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
- sad_array[4] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
- sad_array[5] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
- sad_array[6] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
- sad_array[7] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
-}
-
-void vp8_sad16x16x4d_c(const unsigned char *src_ptr, int src_stride,
- const unsigned char * const ref_ptr[], int ref_stride,
- unsigned int *sad_array)
-{
- sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
- sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
- sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
- sad_array[3] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
-}
-
-void vp8_sad16x8x4d_c(const unsigned char *src_ptr, int src_stride,
- const unsigned char * const ref_ptr[], int ref_stride,
- unsigned int *sad_array)
-{
- sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
- sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
- sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
- sad_array[3] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
-}
-
-void vp8_sad8x8x4d_c(const unsigned char *src_ptr, int src_stride,
- const unsigned char * const ref_ptr[], int ref_stride,
- unsigned int *sad_array)
-{
- sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
- sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
- sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
- sad_array[3] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
-}
-
-void vp8_sad8x16x4d_c(const unsigned char *src_ptr, int src_stride,
- const unsigned char * const ref_ptr[], int ref_stride,
- unsigned int *sad_array)
-{
- sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
- sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
- sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
- sad_array[3] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
-}
-
-void vp8_sad4x4x4d_c(const unsigned char *src_ptr, int src_stride,
- const unsigned char * const ref_ptr[], int ref_stride,
- unsigned int *sad_array)
-{
- sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
- sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
- sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
- sad_array[3] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
-}
-
-/* Copy 2 macroblocks to a buffer */
-void vp8_copy32xn_c(unsigned char *src_ptr, int src_stride,
- unsigned char *dst_ptr, int dst_stride,
- int height)
-{
- int r;
-
- for (r = 0; r < height; r++)
- {
-#if !(CONFIG_FAST_UNALIGNED)
- dst_ptr[0] = src_ptr[0];
- dst_ptr[1] = src_ptr[1];
- dst_ptr[2] = src_ptr[2];
- dst_ptr[3] = src_ptr[3];
- dst_ptr[4] = src_ptr[4];
- dst_ptr[5] = src_ptr[5];
- dst_ptr[6] = src_ptr[6];
- dst_ptr[7] = src_ptr[7];
- dst_ptr[8] = src_ptr[8];
- dst_ptr[9] = src_ptr[9];
- dst_ptr[10] = src_ptr[10];
- dst_ptr[11] = src_ptr[11];
- dst_ptr[12] = src_ptr[12];
- dst_ptr[13] = src_ptr[13];
- dst_ptr[14] = src_ptr[14];
- dst_ptr[15] = src_ptr[15];
- dst_ptr[16] = src_ptr[16];
- dst_ptr[17] = src_ptr[17];
- dst_ptr[18] = src_ptr[18];
- dst_ptr[19] = src_ptr[19];
- dst_ptr[20] = src_ptr[20];
- dst_ptr[21] = src_ptr[21];
- dst_ptr[22] = src_ptr[22];
- dst_ptr[23] = src_ptr[23];
- dst_ptr[24] = src_ptr[24];
- dst_ptr[25] = src_ptr[25];
- dst_ptr[26] = src_ptr[26];
- dst_ptr[27] = src_ptr[27];
- dst_ptr[28] = src_ptr[28];
- dst_ptr[29] = src_ptr[29];
- dst_ptr[30] = src_ptr[30];
- dst_ptr[31] = src_ptr[31];
-#else
- ((uint32_t *)dst_ptr)[0] = ((uint32_t *)src_ptr)[0] ;
- ((uint32_t *)dst_ptr)[1] = ((uint32_t *)src_ptr)[1] ;
- ((uint32_t *)dst_ptr)[2] = ((uint32_t *)src_ptr)[2] ;
- ((uint32_t *)dst_ptr)[3] = ((uint32_t *)src_ptr)[3] ;
- ((uint32_t *)dst_ptr)[4] = ((uint32_t *)src_ptr)[4] ;
- ((uint32_t *)dst_ptr)[5] = ((uint32_t *)src_ptr)[5] ;
- ((uint32_t *)dst_ptr)[6] = ((uint32_t *)src_ptr)[6] ;
- ((uint32_t *)dst_ptr)[7] = ((uint32_t *)src_ptr)[7] ;
-#endif
- src_ptr += src_stride;
- dst_ptr += dst_stride;
-
- }
-}
diff --git a/vp8/common/variance.h b/vp8/common/variance.h
index 89a32a7..552a280 100644
--- a/vp8/common/variance.h
+++ b/vp8/common/variance.h
@@ -14,16 +14,17 @@
#include "vpx_config.h"
+#include "vpx/vpx_integer.h"
+
#ifdef __cplusplus
extern "C" {
#endif
-typedef unsigned int(*vp8_sad_fn_t)(
- const unsigned char *src_ptr,
+typedef unsigned int(*vpx_sad_fn_t)(
+ const uint8_t *src_ptr,
int source_stride,
- const unsigned char *ref_ptr,
- int ref_stride,
- unsigned int max_sad);
+ const uint8_t *ref_ptr,
+ int ref_stride);
typedef void (*vp8_copy32xn_fn_t)(
const unsigned char *src_ptr,
@@ -32,27 +33,17 @@
int ref_stride,
int n);
-typedef void (*vp8_sad_multi_fn_t)(
+typedef void (*vpx_sad_multi_fn_t)(
const unsigned char *src_ptr,
int source_stride,
- const unsigned char *ref_ptr,
+ const unsigned char *ref_array,
int ref_stride,
unsigned int *sad_array);
-
-typedef void (*vp8_sad_multi1_fn_t)
+typedef void (*vpx_sad_multi_d_fn_t)
(
const unsigned char *src_ptr,
int source_stride,
- const unsigned char *ref_ptr,
- int ref_stride,
- unsigned short *sad_array
- );
-
-typedef void (*vp8_sad_multi_d_fn_t)
- (
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char * const ref_ptr[],
+ const unsigned char * const ref_array[],
int ref_stride,
unsigned int *sad_array
);
@@ -102,15 +93,15 @@
typedef struct variance_vtable
{
- vp8_sad_fn_t sdf;
+ vpx_sad_fn_t sdf;
vp8_variance_fn_t vf;
vp8_subpixvariance_fn_t svf;
vp8_variance_fn_t svf_halfpix_h;
vp8_variance_fn_t svf_halfpix_v;
vp8_variance_fn_t svf_halfpix_hv;
- vp8_sad_multi_fn_t sdx3f;
- vp8_sad_multi1_fn_t sdx8f;
- vp8_sad_multi_d_fn_t sdx4df;
+ vpx_sad_multi_fn_t sdx3f;
+ vpx_sad_multi_fn_t sdx8f;
+ vpx_sad_multi_d_fn_t sdx4df;
#if ARCH_X86 || ARCH_X86_64
vp8_copy32xn_fn_t copymem;
#endif
diff --git a/vp8/common/x86/copy_sse2.asm b/vp8/common/x86/copy_sse2.asm
new file mode 100644
index 0000000..86fae26
--- /dev/null
+++ b/vp8/common/x86/copy_sse2.asm
@@ -0,0 +1,93 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+
+;void vp8_copy32xn_sse2(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *dst_ptr,
+; int dst_stride,
+; int height);
+global sym(vp8_copy32xn_sse2) PRIVATE
+sym(vp8_copy32xn_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;dst_ptr
+
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;dst_stride
+ movsxd rcx, dword ptr arg(4) ;height
+
+.block_copy_sse2_loopx4:
+ movdqu xmm0, XMMWORD PTR [rsi]
+ movdqu xmm1, XMMWORD PTR [rsi + 16]
+ movdqu xmm2, XMMWORD PTR [rsi + rax]
+ movdqu xmm3, XMMWORD PTR [rsi + rax + 16]
+
+ lea rsi, [rsi+rax*2]
+
+ movdqu xmm4, XMMWORD PTR [rsi]
+ movdqu xmm5, XMMWORD PTR [rsi + 16]
+ movdqu xmm6, XMMWORD PTR [rsi + rax]
+ movdqu xmm7, XMMWORD PTR [rsi + rax + 16]
+
+ lea rsi, [rsi+rax*2]
+
+ movdqa XMMWORD PTR [rdi], xmm0
+ movdqa XMMWORD PTR [rdi + 16], xmm1
+ movdqa XMMWORD PTR [rdi + rdx], xmm2
+ movdqa XMMWORD PTR [rdi + rdx + 16], xmm3
+
+ lea rdi, [rdi+rdx*2]
+
+ movdqa XMMWORD PTR [rdi], xmm4
+ movdqa XMMWORD PTR [rdi + 16], xmm5
+ movdqa XMMWORD PTR [rdi + rdx], xmm6
+ movdqa XMMWORD PTR [rdi + rdx + 16], xmm7
+
+ lea rdi, [rdi+rdx*2]
+
+ sub rcx, 4
+ cmp rcx, 4
+ jge .block_copy_sse2_loopx4
+
+ cmp rcx, 0
+ je .copy_is_done
+
+.block_copy_sse2_loop:
+ movdqu xmm0, XMMWORD PTR [rsi]
+ movdqu xmm1, XMMWORD PTR [rsi + 16]
+ lea rsi, [rsi+rax]
+
+ movdqa XMMWORD PTR [rdi], xmm0
+ movdqa XMMWORD PTR [rdi + 16], xmm1
+ lea rdi, [rdi+rdx]
+
+ sub rcx, 1
+ jne .block_copy_sse2_loop
+
+.copy_is_done:
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
diff --git a/vp8/common/x86/copy_sse3.asm b/vp8/common/x86/copy_sse3.asm
new file mode 100644
index 0000000..d789a40
--- /dev/null
+++ b/vp8/common/x86/copy_sse3.asm
@@ -0,0 +1,146 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%include "vpx_ports/x86_abi_support.asm"
+
+%macro STACK_FRAME_CREATE_X3 0
+%if ABI_IS_32BIT
+ %define src_ptr rsi
+ %define src_stride rax
+ %define ref_ptr rdi
+ %define ref_stride rdx
+ %define end_ptr rcx
+ %define ret_var rbx
+ %define result_ptr arg(4)
+ %define max_sad arg(4)
+ %define height dword ptr arg(4)
+ push rbp
+ mov rbp, rsp
+ push rsi
+ push rdi
+ push rbx
+
+ mov rsi, arg(0) ; src_ptr
+ mov rdi, arg(2) ; ref_ptr
+
+ movsxd rax, dword ptr arg(1) ; src_stride
+ movsxd rdx, dword ptr arg(3) ; ref_stride
+%else
+ %if LIBVPX_YASM_WIN64
+ SAVE_XMM 7, u
+ %define src_ptr rcx
+ %define src_stride rdx
+ %define ref_ptr r8
+ %define ref_stride r9
+ %define end_ptr r10
+ %define ret_var r11
+ %define result_ptr [rsp+xmm_stack_space+8+4*8]
+ %define max_sad [rsp+xmm_stack_space+8+4*8]
+ %define height dword ptr [rsp+xmm_stack_space+8+4*8]
+ %else
+ %define src_ptr rdi
+ %define src_stride rsi
+ %define ref_ptr rdx
+ %define ref_stride rcx
+ %define end_ptr r9
+ %define ret_var r10
+ %define result_ptr r8
+ %define max_sad r8
+ %define height r8
+ %endif
+%endif
+
+%endmacro
+
+%macro STACK_FRAME_DESTROY_X3 0
+ %define src_ptr
+ %define src_stride
+ %define ref_ptr
+ %define ref_stride
+ %define end_ptr
+ %define ret_var
+ %define result_ptr
+ %define max_sad
+ %define height
+
+%if ABI_IS_32BIT
+ pop rbx
+ pop rdi
+ pop rsi
+ pop rbp
+%else
+ %if LIBVPX_YASM_WIN64
+ RESTORE_XMM
+ %endif
+%endif
+ ret
+%endmacro
+
+
+;void vp8_copy32xn_sse3(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *dst_ptr,
+; int dst_stride,
+; int height);
+global sym(vp8_copy32xn_sse3) PRIVATE
+sym(vp8_copy32xn_sse3):
+
+ STACK_FRAME_CREATE_X3
+
+.block_copy_sse3_loopx4:
+ lea end_ptr, [src_ptr+src_stride*2]
+
+ movdqu xmm0, XMMWORD PTR [src_ptr]
+ movdqu xmm1, XMMWORD PTR [src_ptr + 16]
+ movdqu xmm2, XMMWORD PTR [src_ptr + src_stride]
+ movdqu xmm3, XMMWORD PTR [src_ptr + src_stride + 16]
+ movdqu xmm4, XMMWORD PTR [end_ptr]
+ movdqu xmm5, XMMWORD PTR [end_ptr + 16]
+ movdqu xmm6, XMMWORD PTR [end_ptr + src_stride]
+ movdqu xmm7, XMMWORD PTR [end_ptr + src_stride + 16]
+
+ lea src_ptr, [src_ptr+src_stride*4]
+
+ lea end_ptr, [ref_ptr+ref_stride*2]
+
+ movdqa XMMWORD PTR [ref_ptr], xmm0
+ movdqa XMMWORD PTR [ref_ptr + 16], xmm1
+ movdqa XMMWORD PTR [ref_ptr + ref_stride], xmm2
+ movdqa XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3
+ movdqa XMMWORD PTR [end_ptr], xmm4
+ movdqa XMMWORD PTR [end_ptr + 16], xmm5
+ movdqa XMMWORD PTR [end_ptr + ref_stride], xmm6
+ movdqa XMMWORD PTR [end_ptr + ref_stride + 16], xmm7
+
+ lea ref_ptr, [ref_ptr+ref_stride*4]
+
+ sub height, 4
+ cmp height, 4
+ jge .block_copy_sse3_loopx4
+
+ ;Check to see if there is more rows need to be copied.
+ cmp height, 0
+ je .copy_is_done
+
+.block_copy_sse3_loop:
+ movdqu xmm0, XMMWORD PTR [src_ptr]
+ movdqu xmm1, XMMWORD PTR [src_ptr + 16]
+ lea src_ptr, [src_ptr+src_stride]
+
+ movdqa XMMWORD PTR [ref_ptr], xmm0
+ movdqa XMMWORD PTR [ref_ptr + 16], xmm1
+ lea ref_ptr, [ref_ptr+ref_stride]
+
+ sub height, 1
+ jne .block_copy_sse3_loop
+
+.copy_is_done:
+ STACK_FRAME_DESTROY_X3
diff --git a/vp8/common/x86/sad_sse2.asm b/vp8/common/x86/sad_sse2.asm
deleted file mode 100644
index 8d86abc..0000000
--- a/vp8/common/x86/sad_sse2.asm
+++ /dev/null
@@ -1,410 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-;unsigned int vp8_sad16x16_wmt(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride)
-global sym(vp8_sad16x16_wmt) PRIVATE
-sym(vp8_sad16x16_wmt):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
- SAVE_XMM 6
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- lea rcx, [rsi+rax*8]
-
- lea rcx, [rcx+rax*8]
- pxor xmm6, xmm6
-
-.x16x16sad_wmt_loop:
-
- movq xmm0, QWORD PTR [rsi]
- movq xmm2, QWORD PTR [rsi+8]
-
- movq xmm1, QWORD PTR [rdi]
- movq xmm3, QWORD PTR [rdi+8]
-
- movq xmm4, QWORD PTR [rsi+rax]
- movq xmm5, QWORD PTR [rdi+rdx]
-
-
- punpcklbw xmm0, xmm2
- punpcklbw xmm1, xmm3
-
- psadbw xmm0, xmm1
- movq xmm2, QWORD PTR [rsi+rax+8]
-
- movq xmm3, QWORD PTR [rdi+rdx+8]
- lea rsi, [rsi+rax*2]
-
- lea rdi, [rdi+rdx*2]
- punpcklbw xmm4, xmm2
-
- punpcklbw xmm5, xmm3
- psadbw xmm4, xmm5
-
- paddw xmm6, xmm0
- paddw xmm6, xmm4
-
- cmp rsi, rcx
- jne .x16x16sad_wmt_loop
-
- movq xmm0, xmm6
- psrldq xmm6, 8
-
- paddw xmm0, xmm6
- movq rax, xmm0
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;unsigned int vp8_sad8x16_wmt(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride,
-; int max_sad)
-global sym(vp8_sad8x16_wmt) PRIVATE
-sym(vp8_sad8x16_wmt):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- push rbx
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rbx, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- lea rcx, [rsi+rbx*8]
-
- lea rcx, [rcx+rbx*8]
- pxor mm7, mm7
-
-.x8x16sad_wmt_loop:
-
- movq rax, mm7
- cmp eax, arg(4)
- ja .x8x16sad_wmt_early_exit
-
- movq mm0, QWORD PTR [rsi]
- movq mm1, QWORD PTR [rdi]
-
- movq mm2, QWORD PTR [rsi+rbx]
- movq mm3, QWORD PTR [rdi+rdx]
-
- psadbw mm0, mm1
- psadbw mm2, mm3
-
- lea rsi, [rsi+rbx*2]
- lea rdi, [rdi+rdx*2]
-
- paddw mm7, mm0
- paddw mm7, mm2
-
- cmp rsi, rcx
- jne .x8x16sad_wmt_loop
-
- movq rax, mm7
-
-.x8x16sad_wmt_early_exit:
-
- ; begin epilog
- pop rdi
- pop rsi
- pop rbx
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;unsigned int vp8_sad8x8_wmt(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride)
-global sym(vp8_sad8x8_wmt) PRIVATE
-sym(vp8_sad8x8_wmt):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- push rbx
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rbx, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- lea rcx, [rsi+rbx*8]
- pxor mm7, mm7
-
-.x8x8sad_wmt_loop:
-
- movq rax, mm7
- cmp eax, arg(4)
- ja .x8x8sad_wmt_early_exit
-
- movq mm0, QWORD PTR [rsi]
- movq mm1, QWORD PTR [rdi]
-
- psadbw mm0, mm1
- lea rsi, [rsi+rbx]
-
- add rdi, rdx
- paddw mm7, mm0
-
- cmp rsi, rcx
- jne .x8x8sad_wmt_loop
-
- movq rax, mm7
-.x8x8sad_wmt_early_exit:
-
- ; begin epilog
- pop rdi
- pop rsi
- pop rbx
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;unsigned int vp8_sad4x4_wmt(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride)
-global sym(vp8_sad4x4_wmt) PRIVATE
-sym(vp8_sad4x4_wmt):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- movd mm0, DWORD PTR [rsi]
- movd mm1, DWORD PTR [rdi]
-
- movd mm2, DWORD PTR [rsi+rax]
- movd mm3, DWORD PTR [rdi+rdx]
-
- punpcklbw mm0, mm2
- punpcklbw mm1, mm3
-
- psadbw mm0, mm1
- lea rsi, [rsi+rax*2]
-
- lea rdi, [rdi+rdx*2]
- movd mm4, DWORD PTR [rsi]
-
- movd mm5, DWORD PTR [rdi]
- movd mm6, DWORD PTR [rsi+rax]
-
- movd mm7, DWORD PTR [rdi+rdx]
- punpcklbw mm4, mm6
-
- punpcklbw mm5, mm7
- psadbw mm4, mm5
-
- paddw mm0, mm4
- movq rax, mm0
-
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;unsigned int vp8_sad16x8_wmt(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride)
-global sym(vp8_sad16x8_wmt) PRIVATE
-sym(vp8_sad16x8_wmt):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- push rbx
- push rsi
- push rdi
- ; end prolog
-
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rbx, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- lea rcx, [rsi+rbx*8]
- pxor mm7, mm7
-
-.x16x8sad_wmt_loop:
-
- movq rax, mm7
- cmp eax, arg(4)
- ja .x16x8sad_wmt_early_exit
-
- movq mm0, QWORD PTR [rsi]
- movq mm2, QWORD PTR [rsi+8]
-
- movq mm1, QWORD PTR [rdi]
- movq mm3, QWORD PTR [rdi+8]
-
- movq mm4, QWORD PTR [rsi+rbx]
- movq mm5, QWORD PTR [rdi+rdx]
-
- psadbw mm0, mm1
- psadbw mm2, mm3
-
- movq mm1, QWORD PTR [rsi+rbx+8]
- movq mm3, QWORD PTR [rdi+rdx+8]
-
- psadbw mm4, mm5
- psadbw mm1, mm3
-
- lea rsi, [rsi+rbx*2]
- lea rdi, [rdi+rdx*2]
-
- paddw mm0, mm2
- paddw mm4, mm1
-
- paddw mm7, mm0
- paddw mm7, mm4
-
- cmp rsi, rcx
- jne .x16x8sad_wmt_loop
-
- movq rax, mm7
-
-.x16x8sad_wmt_early_exit:
-
- ; begin epilog
- pop rdi
- pop rsi
- pop rbx
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp8_copy32xn_sse2(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *dst_ptr,
-; int dst_stride,
-; int height);
-global sym(vp8_copy32xn_sse2) PRIVATE
-sym(vp8_copy32xn_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- SAVE_XMM 7
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;dst_ptr
-
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;dst_stride
- movsxd rcx, dword ptr arg(4) ;height
-
-.block_copy_sse2_loopx4:
- movdqu xmm0, XMMWORD PTR [rsi]
- movdqu xmm1, XMMWORD PTR [rsi + 16]
- movdqu xmm2, XMMWORD PTR [rsi + rax]
- movdqu xmm3, XMMWORD PTR [rsi + rax + 16]
-
- lea rsi, [rsi+rax*2]
-
- movdqu xmm4, XMMWORD PTR [rsi]
- movdqu xmm5, XMMWORD PTR [rsi + 16]
- movdqu xmm6, XMMWORD PTR [rsi + rax]
- movdqu xmm7, XMMWORD PTR [rsi + rax + 16]
-
- lea rsi, [rsi+rax*2]
-
- movdqa XMMWORD PTR [rdi], xmm0
- movdqa XMMWORD PTR [rdi + 16], xmm1
- movdqa XMMWORD PTR [rdi + rdx], xmm2
- movdqa XMMWORD PTR [rdi + rdx + 16], xmm3
-
- lea rdi, [rdi+rdx*2]
-
- movdqa XMMWORD PTR [rdi], xmm4
- movdqa XMMWORD PTR [rdi + 16], xmm5
- movdqa XMMWORD PTR [rdi + rdx], xmm6
- movdqa XMMWORD PTR [rdi + rdx + 16], xmm7
-
- lea rdi, [rdi+rdx*2]
-
- sub rcx, 4
- cmp rcx, 4
- jge .block_copy_sse2_loopx4
-
- cmp rcx, 0
- je .copy_is_done
-
-.block_copy_sse2_loop:
- movdqu xmm0, XMMWORD PTR [rsi]
- movdqu xmm1, XMMWORD PTR [rsi + 16]
- lea rsi, [rsi+rax]
-
- movdqa XMMWORD PTR [rdi], xmm0
- movdqa XMMWORD PTR [rdi + 16], xmm1
- lea rdi, [rdi+rdx]
-
- sub rcx, 1
- jne .block_copy_sse2_loop
-
-.copy_is_done:
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
diff --git a/vp8/common/x86/sad_sse3.asm b/vp8/common/x86/sad_sse3.asm
deleted file mode 100644
index 69c8d37..0000000
--- a/vp8/common/x86/sad_sse3.asm
+++ /dev/null
@@ -1,960 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-%include "vpx_ports/x86_abi_support.asm"
-
-%macro STACK_FRAME_CREATE_X3 0
-%if ABI_IS_32BIT
- %define src_ptr rsi
- %define src_stride rax
- %define ref_ptr rdi
- %define ref_stride rdx
- %define end_ptr rcx
- %define ret_var rbx
- %define result_ptr arg(4)
- %define max_sad arg(4)
- %define height dword ptr arg(4)
- push rbp
- mov rbp, rsp
- push rsi
- push rdi
- push rbx
-
- mov rsi, arg(0) ; src_ptr
- mov rdi, arg(2) ; ref_ptr
-
- movsxd rax, dword ptr arg(1) ; src_stride
- movsxd rdx, dword ptr arg(3) ; ref_stride
-%else
- %if LIBVPX_YASM_WIN64
- SAVE_XMM 7, u
- %define src_ptr rcx
- %define src_stride rdx
- %define ref_ptr r8
- %define ref_stride r9
- %define end_ptr r10
- %define ret_var r11
- %define result_ptr [rsp+xmm_stack_space+8+4*8]
- %define max_sad [rsp+xmm_stack_space+8+4*8]
- %define height dword ptr [rsp+xmm_stack_space+8+4*8]
- %else
- %define src_ptr rdi
- %define src_stride rsi
- %define ref_ptr rdx
- %define ref_stride rcx
- %define end_ptr r9
- %define ret_var r10
- %define result_ptr r8
- %define max_sad r8
- %define height r8
- %endif
-%endif
-
-%endmacro
-
-%macro STACK_FRAME_DESTROY_X3 0
- %define src_ptr
- %define src_stride
- %define ref_ptr
- %define ref_stride
- %define end_ptr
- %define ret_var
- %define result_ptr
- %define max_sad
- %define height
-
-%if ABI_IS_32BIT
- pop rbx
- pop rdi
- pop rsi
- pop rbp
-%else
- %if LIBVPX_YASM_WIN64
- RESTORE_XMM
- %endif
-%endif
- ret
-%endmacro
-
-%macro STACK_FRAME_CREATE_X4 0
-%if ABI_IS_32BIT
- %define src_ptr rsi
- %define src_stride rax
- %define r0_ptr rcx
- %define r1_ptr rdx
- %define r2_ptr rbx
- %define r3_ptr rdi
- %define ref_stride rbp
- %define result_ptr arg(4)
- push rbp
- mov rbp, rsp
- push rsi
- push rdi
- push rbx
-
- push rbp
- mov rdi, arg(2) ; ref_ptr_base
-
- LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
-
- mov rsi, arg(0) ; src_ptr
-
- movsxd rbx, dword ptr arg(1) ; src_stride
- movsxd rbp, dword ptr arg(3) ; ref_stride
-
- xchg rbx, rax
-%else
- %if LIBVPX_YASM_WIN64
- SAVE_XMM 7, u
- %define src_ptr rcx
- %define src_stride rdx
- %define r0_ptr rsi
- %define r1_ptr r10
- %define r2_ptr r11
- %define r3_ptr r8
- %define ref_stride r9
- %define result_ptr [rsp+xmm_stack_space+16+4*8]
- push rsi
-
- LOAD_X4_ADDRESSES r8, r0_ptr, r1_ptr, r2_ptr, r3_ptr
- %else
- %define src_ptr rdi
- %define src_stride rsi
- %define r0_ptr r9
- %define r1_ptr r10
- %define r2_ptr r11
- %define r3_ptr rdx
- %define ref_stride rcx
- %define result_ptr r8
-
- LOAD_X4_ADDRESSES rdx, r0_ptr, r1_ptr, r2_ptr, r3_ptr
-
- %endif
-%endif
-%endmacro
-
-%macro STACK_FRAME_DESTROY_X4 0
- %define src_ptr
- %define src_stride
- %define r0_ptr
- %define r1_ptr
- %define r2_ptr
- %define r3_ptr
- %define ref_stride
- %define result_ptr
-
-%if ABI_IS_32BIT
- pop rbx
- pop rdi
- pop rsi
- pop rbp
-%else
- %if LIBVPX_YASM_WIN64
- pop rsi
- RESTORE_XMM
- %endif
-%endif
- ret
-%endmacro
-
-%macro PROCESS_16X2X3 5
-%if %1==0
- movdqa xmm0, XMMWORD PTR [%2]
- lddqu xmm5, XMMWORD PTR [%3]
- lddqu xmm6, XMMWORD PTR [%3+1]
- lddqu xmm7, XMMWORD PTR [%3+2]
-
- psadbw xmm5, xmm0
- psadbw xmm6, xmm0
- psadbw xmm7, xmm0
-%else
- movdqa xmm0, XMMWORD PTR [%2]
- lddqu xmm1, XMMWORD PTR [%3]
- lddqu xmm2, XMMWORD PTR [%3+1]
- lddqu xmm3, XMMWORD PTR [%3+2]
-
- psadbw xmm1, xmm0
- psadbw xmm2, xmm0
- psadbw xmm3, xmm0
-
- paddw xmm5, xmm1
- paddw xmm6, xmm2
- paddw xmm7, xmm3
-%endif
- movdqa xmm0, XMMWORD PTR [%2+%4]
- lddqu xmm1, XMMWORD PTR [%3+%5]
- lddqu xmm2, XMMWORD PTR [%3+%5+1]
- lddqu xmm3, XMMWORD PTR [%3+%5+2]
-
-%if %1==0 || %1==1
- lea %2, [%2+%4*2]
- lea %3, [%3+%5*2]
-%endif
-
- psadbw xmm1, xmm0
- psadbw xmm2, xmm0
- psadbw xmm3, xmm0
-
- paddw xmm5, xmm1
- paddw xmm6, xmm2
- paddw xmm7, xmm3
-%endmacro
-
-%macro PROCESS_8X2X3 5
-%if %1==0
- movq mm0, QWORD PTR [%2]
- movq mm5, QWORD PTR [%3]
- movq mm6, QWORD PTR [%3+1]
- movq mm7, QWORD PTR [%3+2]
-
- psadbw mm5, mm0
- psadbw mm6, mm0
- psadbw mm7, mm0
-%else
- movq mm0, QWORD PTR [%2]
- movq mm1, QWORD PTR [%3]
- movq mm2, QWORD PTR [%3+1]
- movq mm3, QWORD PTR [%3+2]
-
- psadbw mm1, mm0
- psadbw mm2, mm0
- psadbw mm3, mm0
-
- paddw mm5, mm1
- paddw mm6, mm2
- paddw mm7, mm3
-%endif
- movq mm0, QWORD PTR [%2+%4]
- movq mm1, QWORD PTR [%3+%5]
- movq mm2, QWORD PTR [%3+%5+1]
- movq mm3, QWORD PTR [%3+%5+2]
-
-%if %1==0 || %1==1
- lea %2, [%2+%4*2]
- lea %3, [%3+%5*2]
-%endif
-
- psadbw mm1, mm0
- psadbw mm2, mm0
- psadbw mm3, mm0
-
- paddw mm5, mm1
- paddw mm6, mm2
- paddw mm7, mm3
-%endmacro
-
-%macro LOAD_X4_ADDRESSES 5
- mov %2, [%1+REG_SZ_BYTES*0]
- mov %3, [%1+REG_SZ_BYTES*1]
-
- mov %4, [%1+REG_SZ_BYTES*2]
- mov %5, [%1+REG_SZ_BYTES*3]
-%endmacro
-
-%macro PROCESS_16X2X4 8
-%if %1==0
- movdqa xmm0, XMMWORD PTR [%2]
- lddqu xmm4, XMMWORD PTR [%3]
- lddqu xmm5, XMMWORD PTR [%4]
- lddqu xmm6, XMMWORD PTR [%5]
- lddqu xmm7, XMMWORD PTR [%6]
-
- psadbw xmm4, xmm0
- psadbw xmm5, xmm0
- psadbw xmm6, xmm0
- psadbw xmm7, xmm0
-%else
- movdqa xmm0, XMMWORD PTR [%2]
- lddqu xmm1, XMMWORD PTR [%3]
- lddqu xmm2, XMMWORD PTR [%4]
- lddqu xmm3, XMMWORD PTR [%5]
-
- psadbw xmm1, xmm0
- psadbw xmm2, xmm0
- psadbw xmm3, xmm0
-
- paddw xmm4, xmm1
- lddqu xmm1, XMMWORD PTR [%6]
- paddw xmm5, xmm2
- paddw xmm6, xmm3
-
- psadbw xmm1, xmm0
- paddw xmm7, xmm1
-%endif
- movdqa xmm0, XMMWORD PTR [%2+%7]
- lddqu xmm1, XMMWORD PTR [%3+%8]
- lddqu xmm2, XMMWORD PTR [%4+%8]
- lddqu xmm3, XMMWORD PTR [%5+%8]
-
- psadbw xmm1, xmm0
- psadbw xmm2, xmm0
- psadbw xmm3, xmm0
-
- paddw xmm4, xmm1
- lddqu xmm1, XMMWORD PTR [%6+%8]
- paddw xmm5, xmm2
- paddw xmm6, xmm3
-
-%if %1==0 || %1==1
- lea %2, [%2+%7*2]
- lea %3, [%3+%8*2]
-
- lea %4, [%4+%8*2]
- lea %5, [%5+%8*2]
-
- lea %6, [%6+%8*2]
-%endif
- psadbw xmm1, xmm0
- paddw xmm7, xmm1
-
-%endmacro
-
-%macro PROCESS_8X2X4 8
-%if %1==0
- movq mm0, QWORD PTR [%2]
- movq mm4, QWORD PTR [%3]
- movq mm5, QWORD PTR [%4]
- movq mm6, QWORD PTR [%5]
- movq mm7, QWORD PTR [%6]
-
- psadbw mm4, mm0
- psadbw mm5, mm0
- psadbw mm6, mm0
- psadbw mm7, mm0
-%else
- movq mm0, QWORD PTR [%2]
- movq mm1, QWORD PTR [%3]
- movq mm2, QWORD PTR [%4]
- movq mm3, QWORD PTR [%5]
-
- psadbw mm1, mm0
- psadbw mm2, mm0
- psadbw mm3, mm0
-
- paddw mm4, mm1
- movq mm1, QWORD PTR [%6]
- paddw mm5, mm2
- paddw mm6, mm3
-
- psadbw mm1, mm0
- paddw mm7, mm1
-%endif
- movq mm0, QWORD PTR [%2+%7]
- movq mm1, QWORD PTR [%3+%8]
- movq mm2, QWORD PTR [%4+%8]
- movq mm3, QWORD PTR [%5+%8]
-
- psadbw mm1, mm0
- psadbw mm2, mm0
- psadbw mm3, mm0
-
- paddw mm4, mm1
- movq mm1, QWORD PTR [%6+%8]
- paddw mm5, mm2
- paddw mm6, mm3
-
-%if %1==0 || %1==1
- lea %2, [%2+%7*2]
- lea %3, [%3+%8*2]
-
- lea %4, [%4+%8*2]
- lea %5, [%5+%8*2]
-
- lea %6, [%6+%8*2]
-%endif
- psadbw mm1, mm0
- paddw mm7, mm1
-
-%endmacro
-
-;void int vp8_sad16x16x3_sse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride,
-; int *results)
-global sym(vp8_sad16x16x3_sse3) PRIVATE
-sym(vp8_sad16x16x3_sse3):
-
- STACK_FRAME_CREATE_X3
-
- PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
- PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
- PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
- PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
- PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
- PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
- PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
- PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
-
- mov rcx, result_ptr
-
- movq xmm0, xmm5
- psrldq xmm5, 8
-
- paddw xmm0, xmm5
- movd [rcx], xmm0
-;-
- movq xmm0, xmm6
- psrldq xmm6, 8
-
- paddw xmm0, xmm6
- movd [rcx+4], xmm0
-;-
- movq xmm0, xmm7
- psrldq xmm7, 8
-
- paddw xmm0, xmm7
- movd [rcx+8], xmm0
-
- STACK_FRAME_DESTROY_X3
-
-;void int vp8_sad16x8x3_sse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride,
-; int *results)
-global sym(vp8_sad16x8x3_sse3) PRIVATE
-sym(vp8_sad16x8x3_sse3):
-
- STACK_FRAME_CREATE_X3
-
- PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
- PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
- PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
- PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
-
- mov rcx, result_ptr
-
- movq xmm0, xmm5
- psrldq xmm5, 8
-
- paddw xmm0, xmm5
- movd [rcx], xmm0
-;-
- movq xmm0, xmm6
- psrldq xmm6, 8
-
- paddw xmm0, xmm6
- movd [rcx+4], xmm0
-;-
- movq xmm0, xmm7
- psrldq xmm7, 8
-
- paddw xmm0, xmm7
- movd [rcx+8], xmm0
-
- STACK_FRAME_DESTROY_X3
-
-;void int vp8_sad8x16x3_sse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride,
-; int *results)
-global sym(vp8_sad8x16x3_sse3) PRIVATE
-sym(vp8_sad8x16x3_sse3):
-
- STACK_FRAME_CREATE_X3
-
- PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
- PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
- PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
- PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
- PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
- PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
- PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
- PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
-
- mov rcx, result_ptr
-
- punpckldq mm5, mm6
-
- movq [rcx], mm5
- movd [rcx+8], mm7
-
- STACK_FRAME_DESTROY_X3
-
-;void int vp8_sad8x8x3_sse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride,
-; int *results)
-global sym(vp8_sad8x8x3_sse3) PRIVATE
-sym(vp8_sad8x8x3_sse3):
-
- STACK_FRAME_CREATE_X3
-
- PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
- PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
- PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
- PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
-
- mov rcx, result_ptr
-
- punpckldq mm5, mm6
-
- movq [rcx], mm5
- movd [rcx+8], mm7
-
- STACK_FRAME_DESTROY_X3
-
-;void int vp8_sad4x4x3_sse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride,
-; int *results)
-global sym(vp8_sad4x4x3_sse3) PRIVATE
-sym(vp8_sad4x4x3_sse3):
-
- STACK_FRAME_CREATE_X3
-
- movd mm0, DWORD PTR [src_ptr]
- movd mm1, DWORD PTR [ref_ptr]
-
- movd mm2, DWORD PTR [src_ptr+src_stride]
- movd mm3, DWORD PTR [ref_ptr+ref_stride]
-
- punpcklbw mm0, mm2
- punpcklbw mm1, mm3
-
- movd mm4, DWORD PTR [ref_ptr+1]
- movd mm5, DWORD PTR [ref_ptr+2]
-
- movd mm2, DWORD PTR [ref_ptr+ref_stride+1]
- movd mm3, DWORD PTR [ref_ptr+ref_stride+2]
-
- psadbw mm1, mm0
-
- punpcklbw mm4, mm2
- punpcklbw mm5, mm3
-
- psadbw mm4, mm0
- psadbw mm5, mm0
-
- lea src_ptr, [src_ptr+src_stride*2]
- lea ref_ptr, [ref_ptr+ref_stride*2]
-
- movd mm0, DWORD PTR [src_ptr]
- movd mm2, DWORD PTR [ref_ptr]
-
- movd mm3, DWORD PTR [src_ptr+src_stride]
- movd mm6, DWORD PTR [ref_ptr+ref_stride]
-
- punpcklbw mm0, mm3
- punpcklbw mm2, mm6
-
- movd mm3, DWORD PTR [ref_ptr+1]
- movd mm7, DWORD PTR [ref_ptr+2]
-
- psadbw mm2, mm0
-
- paddw mm1, mm2
-
- movd mm2, DWORD PTR [ref_ptr+ref_stride+1]
- movd mm6, DWORD PTR [ref_ptr+ref_stride+2]
-
- punpcklbw mm3, mm2
- punpcklbw mm7, mm6
-
- psadbw mm3, mm0
- psadbw mm7, mm0
-
- paddw mm3, mm4
- paddw mm7, mm5
-
- mov rcx, result_ptr
-
- punpckldq mm1, mm3
-
- movq [rcx], mm1
- movd [rcx+8], mm7
-
- STACK_FRAME_DESTROY_X3
-
-;unsigned int vp8_sad16x16_sse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride,
-; int max_sad)
-;%define lddqu movdqu
-global sym(vp8_sad16x16_sse3) PRIVATE
-sym(vp8_sad16x16_sse3):
-
- STACK_FRAME_CREATE_X3
-
- mov end_ptr, 4
- pxor xmm7, xmm7
-
-.vp8_sad16x16_sse3_loop:
- movdqa xmm0, XMMWORD PTR [src_ptr]
- movdqu xmm1, XMMWORD PTR [ref_ptr]
- movdqa xmm2, XMMWORD PTR [src_ptr+src_stride]
- movdqu xmm3, XMMWORD PTR [ref_ptr+ref_stride]
-
- lea src_ptr, [src_ptr+src_stride*2]
- lea ref_ptr, [ref_ptr+ref_stride*2]
-
- movdqa xmm4, XMMWORD PTR [src_ptr]
- movdqu xmm5, XMMWORD PTR [ref_ptr]
- movdqa xmm6, XMMWORD PTR [src_ptr+src_stride]
-
- psadbw xmm0, xmm1
-
- movdqu xmm1, XMMWORD PTR [ref_ptr+ref_stride]
-
- psadbw xmm2, xmm3
- psadbw xmm4, xmm5
- psadbw xmm6, xmm1
-
- lea src_ptr, [src_ptr+src_stride*2]
- lea ref_ptr, [ref_ptr+ref_stride*2]
-
- paddw xmm7, xmm0
- paddw xmm7, xmm2
- paddw xmm7, xmm4
- paddw xmm7, xmm6
-
- sub end_ptr, 1
- jne .vp8_sad16x16_sse3_loop
-
- movq xmm0, xmm7
- psrldq xmm7, 8
- paddw xmm0, xmm7
- movq rax, xmm0
-
- STACK_FRAME_DESTROY_X3
-
-;void vp8_copy32xn_sse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *dst_ptr,
-; int dst_stride,
-; int height);
-global sym(vp8_copy32xn_sse3) PRIVATE
-sym(vp8_copy32xn_sse3):
-
- STACK_FRAME_CREATE_X3
-
-.block_copy_sse3_loopx4:
- lea end_ptr, [src_ptr+src_stride*2]
-
- movdqu xmm0, XMMWORD PTR [src_ptr]
- movdqu xmm1, XMMWORD PTR [src_ptr + 16]
- movdqu xmm2, XMMWORD PTR [src_ptr + src_stride]
- movdqu xmm3, XMMWORD PTR [src_ptr + src_stride + 16]
- movdqu xmm4, XMMWORD PTR [end_ptr]
- movdqu xmm5, XMMWORD PTR [end_ptr + 16]
- movdqu xmm6, XMMWORD PTR [end_ptr + src_stride]
- movdqu xmm7, XMMWORD PTR [end_ptr + src_stride + 16]
-
- lea src_ptr, [src_ptr+src_stride*4]
-
- lea end_ptr, [ref_ptr+ref_stride*2]
-
- movdqa XMMWORD PTR [ref_ptr], xmm0
- movdqa XMMWORD PTR [ref_ptr + 16], xmm1
- movdqa XMMWORD PTR [ref_ptr + ref_stride], xmm2
- movdqa XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3
- movdqa XMMWORD PTR [end_ptr], xmm4
- movdqa XMMWORD PTR [end_ptr + 16], xmm5
- movdqa XMMWORD PTR [end_ptr + ref_stride], xmm6
- movdqa XMMWORD PTR [end_ptr + ref_stride + 16], xmm7
-
- lea ref_ptr, [ref_ptr+ref_stride*4]
-
- sub height, 4
- cmp height, 4
- jge .block_copy_sse3_loopx4
-
- ;Check to see if there is more rows need to be copied.
- cmp height, 0
- je .copy_is_done
-
-.block_copy_sse3_loop:
- movdqu xmm0, XMMWORD PTR [src_ptr]
- movdqu xmm1, XMMWORD PTR [src_ptr + 16]
- lea src_ptr, [src_ptr+src_stride]
-
- movdqa XMMWORD PTR [ref_ptr], xmm0
- movdqa XMMWORD PTR [ref_ptr + 16], xmm1
- lea ref_ptr, [ref_ptr+ref_stride]
-
- sub height, 1
- jne .block_copy_sse3_loop
-
-.copy_is_done:
- STACK_FRAME_DESTROY_X3
-
-;void vp8_sad16x16x4d_sse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr_base,
-; int ref_stride,
-; int *results)
-global sym(vp8_sad16x16x4d_sse3) PRIVATE
-sym(vp8_sad16x16x4d_sse3):
-
- STACK_FRAME_CREATE_X4
-
- PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-
-%if ABI_IS_32BIT
- pop rbp
-%endif
- mov rcx, result_ptr
-
- movq xmm0, xmm4
- psrldq xmm4, 8
-
- paddw xmm0, xmm4
- movd [rcx], xmm0
-;-
- movq xmm0, xmm5
- psrldq xmm5, 8
-
- paddw xmm0, xmm5
- movd [rcx+4], xmm0
-;-
- movq xmm0, xmm6
- psrldq xmm6, 8
-
- paddw xmm0, xmm6
- movd [rcx+8], xmm0
-;-
- movq xmm0, xmm7
- psrldq xmm7, 8
-
- paddw xmm0, xmm7
- movd [rcx+12], xmm0
-
- STACK_FRAME_DESTROY_X4
-
-;void vp8_sad16x8x4d_sse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr_base,
-; int ref_stride,
-; int *results)
-global sym(vp8_sad16x8x4d_sse3) PRIVATE
-sym(vp8_sad16x8x4d_sse3):
-
- STACK_FRAME_CREATE_X4
-
- PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-
-%if ABI_IS_32BIT
- pop rbp
-%endif
- mov rcx, result_ptr
-
- movq xmm0, xmm4
- psrldq xmm4, 8
-
- paddw xmm0, xmm4
- movd [rcx], xmm0
-;-
- movq xmm0, xmm5
- psrldq xmm5, 8
-
- paddw xmm0, xmm5
- movd [rcx+4], xmm0
-;-
- movq xmm0, xmm6
- psrldq xmm6, 8
-
- paddw xmm0, xmm6
- movd [rcx+8], xmm0
-;-
- movq xmm0, xmm7
- psrldq xmm7, 8
-
- paddw xmm0, xmm7
- movd [rcx+12], xmm0
-
- STACK_FRAME_DESTROY_X4
-
-;void int vp8_sad8x16x4d_sse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride,
-; int *results)
-global sym(vp8_sad8x16x4d_sse3) PRIVATE
-sym(vp8_sad8x16x4d_sse3):
-
- STACK_FRAME_CREATE_X4
-
- PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-
-%if ABI_IS_32BIT
- pop rbp
-%endif
- mov rcx, result_ptr
-
- punpckldq mm4, mm5
- punpckldq mm6, mm7
-
- movq [rcx], mm4
- movq [rcx+8], mm6
-
- STACK_FRAME_DESTROY_X4
-
-;void int vp8_sad8x8x4d_sse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride,
-; int *results)
-global sym(vp8_sad8x8x4d_sse3) PRIVATE
-sym(vp8_sad8x8x4d_sse3):
-
- STACK_FRAME_CREATE_X4
-
- PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-
-%if ABI_IS_32BIT
- pop rbp
-%endif
- mov rcx, result_ptr
-
- punpckldq mm4, mm5
- punpckldq mm6, mm7
-
- movq [rcx], mm4
- movq [rcx+8], mm6
-
- STACK_FRAME_DESTROY_X4
-
-;void int vp8_sad4x4x4d_sse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride,
-; int *results)
-global sym(vp8_sad4x4x4d_sse3) PRIVATE
-sym(vp8_sad4x4x4d_sse3):
-
- STACK_FRAME_CREATE_X4
-
- movd mm0, DWORD PTR [src_ptr]
- movd mm1, DWORD PTR [r0_ptr]
-
- movd mm2, DWORD PTR [src_ptr+src_stride]
- movd mm3, DWORD PTR [r0_ptr+ref_stride]
-
- punpcklbw mm0, mm2
- punpcklbw mm1, mm3
-
- movd mm4, DWORD PTR [r1_ptr]
- movd mm5, DWORD PTR [r2_ptr]
-
- movd mm6, DWORD PTR [r3_ptr]
- movd mm2, DWORD PTR [r1_ptr+ref_stride]
-
- movd mm3, DWORD PTR [r2_ptr+ref_stride]
- movd mm7, DWORD PTR [r3_ptr+ref_stride]
-
- psadbw mm1, mm0
-
- punpcklbw mm4, mm2
- punpcklbw mm5, mm3
-
- punpcklbw mm6, mm7
- psadbw mm4, mm0
-
- psadbw mm5, mm0
- psadbw mm6, mm0
-
-
-
- lea src_ptr, [src_ptr+src_stride*2]
- lea r0_ptr, [r0_ptr+ref_stride*2]
-
- lea r1_ptr, [r1_ptr+ref_stride*2]
- lea r2_ptr, [r2_ptr+ref_stride*2]
-
- lea r3_ptr, [r3_ptr+ref_stride*2]
-
- movd mm0, DWORD PTR [src_ptr]
- movd mm2, DWORD PTR [r0_ptr]
-
- movd mm3, DWORD PTR [src_ptr+src_stride]
- movd mm7, DWORD PTR [r0_ptr+ref_stride]
-
- punpcklbw mm0, mm3
- punpcklbw mm2, mm7
-
- movd mm3, DWORD PTR [r1_ptr]
- movd mm7, DWORD PTR [r2_ptr]
-
- psadbw mm2, mm0
-%if ABI_IS_32BIT
- mov rax, rbp
-
- pop rbp
-%define ref_stride rax
-%endif
- mov rsi, result_ptr
-
- paddw mm1, mm2
- movd [rsi], mm1
-
- movd mm2, DWORD PTR [r1_ptr+ref_stride]
- movd mm1, DWORD PTR [r2_ptr+ref_stride]
-
- punpcklbw mm3, mm2
- punpcklbw mm7, mm1
-
- psadbw mm3, mm0
- psadbw mm7, mm0
-
- movd mm2, DWORD PTR [r3_ptr]
- movd mm1, DWORD PTR [r3_ptr+ref_stride]
-
- paddw mm3, mm4
- paddw mm7, mm5
-
- movd [rsi+4], mm3
- punpcklbw mm2, mm1
-
- movd [rsi+8], mm7
- psadbw mm2, mm0
-
- paddw mm2, mm6
- movd [rsi+12], mm2
-
-
- STACK_FRAME_DESTROY_X4
-
diff --git a/vp8/common/x86/sad_sse4.asm b/vp8/common/x86/sad_sse4.asm
deleted file mode 100644
index f7fccd7..0000000
--- a/vp8/common/x86/sad_sse4.asm
+++ /dev/null
@@ -1,353 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-%macro PROCESS_16X2X8 1
-%if %1
- movdqa xmm0, XMMWORD PTR [rsi]
- movq xmm1, MMWORD PTR [rdi]
- movq xmm3, MMWORD PTR [rdi+8]
- movq xmm2, MMWORD PTR [rdi+16]
- punpcklqdq xmm1, xmm3
- punpcklqdq xmm3, xmm2
-
- movdqa xmm2, xmm1
- mpsadbw xmm1, xmm0, 0x0
- mpsadbw xmm2, xmm0, 0x5
-
- psrldq xmm0, 8
-
- movdqa xmm4, xmm3
- mpsadbw xmm3, xmm0, 0x0
- mpsadbw xmm4, xmm0, 0x5
-
- paddw xmm1, xmm2
- paddw xmm1, xmm3
- paddw xmm1, xmm4
-%else
- movdqa xmm0, XMMWORD PTR [rsi]
- movq xmm5, MMWORD PTR [rdi]
- movq xmm3, MMWORD PTR [rdi+8]
- movq xmm2, MMWORD PTR [rdi+16]
- punpcklqdq xmm5, xmm3
- punpcklqdq xmm3, xmm2
-
- movdqa xmm2, xmm5
- mpsadbw xmm5, xmm0, 0x0
- mpsadbw xmm2, xmm0, 0x5
-
- psrldq xmm0, 8
-
- movdqa xmm4, xmm3
- mpsadbw xmm3, xmm0, 0x0
- mpsadbw xmm4, xmm0, 0x5
-
- paddw xmm5, xmm2
- paddw xmm5, xmm3
- paddw xmm5, xmm4
-
- paddw xmm1, xmm5
-%endif
- movdqa xmm0, XMMWORD PTR [rsi + rax]
- movq xmm5, MMWORD PTR [rdi+ rdx]
- movq xmm3, MMWORD PTR [rdi+ rdx+8]
- movq xmm2, MMWORD PTR [rdi+ rdx+16]
- punpcklqdq xmm5, xmm3
- punpcklqdq xmm3, xmm2
-
- lea rsi, [rsi+rax*2]
- lea rdi, [rdi+rdx*2]
-
- movdqa xmm2, xmm5
- mpsadbw xmm5, xmm0, 0x0
- mpsadbw xmm2, xmm0, 0x5
-
- psrldq xmm0, 8
- movdqa xmm4, xmm3
- mpsadbw xmm3, xmm0, 0x0
- mpsadbw xmm4, xmm0, 0x5
-
- paddw xmm5, xmm2
- paddw xmm5, xmm3
- paddw xmm5, xmm4
-
- paddw xmm1, xmm5
-%endmacro
-
-%macro PROCESS_8X2X8 1
-%if %1
- movq xmm0, MMWORD PTR [rsi]
- movq xmm1, MMWORD PTR [rdi]
- movq xmm3, MMWORD PTR [rdi+8]
- punpcklqdq xmm1, xmm3
-
- movdqa xmm2, xmm1
- mpsadbw xmm1, xmm0, 0x0
- mpsadbw xmm2, xmm0, 0x5
- paddw xmm1, xmm2
-%else
- movq xmm0, MMWORD PTR [rsi]
- movq xmm5, MMWORD PTR [rdi]
- movq xmm3, MMWORD PTR [rdi+8]
- punpcklqdq xmm5, xmm3
-
- movdqa xmm2, xmm5
- mpsadbw xmm5, xmm0, 0x0
- mpsadbw xmm2, xmm0, 0x5
- paddw xmm5, xmm2
-
- paddw xmm1, xmm5
-%endif
- movq xmm0, MMWORD PTR [rsi + rax]
- movq xmm5, MMWORD PTR [rdi+ rdx]
- movq xmm3, MMWORD PTR [rdi+ rdx+8]
- punpcklqdq xmm5, xmm3
-
- lea rsi, [rsi+rax*2]
- lea rdi, [rdi+rdx*2]
-
- movdqa xmm2, xmm5
- mpsadbw xmm5, xmm0, 0x0
- mpsadbw xmm2, xmm0, 0x5
- paddw xmm5, xmm2
-
- paddw xmm1, xmm5
-%endmacro
-
-%macro PROCESS_4X2X8 1
-%if %1
- movd xmm0, [rsi]
- movq xmm1, MMWORD PTR [rdi]
- movq xmm3, MMWORD PTR [rdi+8]
- punpcklqdq xmm1, xmm3
-
- mpsadbw xmm1, xmm0, 0x0
-%else
- movd xmm0, [rsi]
- movq xmm5, MMWORD PTR [rdi]
- movq xmm3, MMWORD PTR [rdi+8]
- punpcklqdq xmm5, xmm3
-
- mpsadbw xmm5, xmm0, 0x0
-
- paddw xmm1, xmm5
-%endif
- movd xmm0, [rsi + rax]
- movq xmm5, MMWORD PTR [rdi+ rdx]
- movq xmm3, MMWORD PTR [rdi+ rdx+8]
- punpcklqdq xmm5, xmm3
-
- lea rsi, [rsi+rax*2]
- lea rdi, [rdi+rdx*2]
-
- mpsadbw xmm5, xmm0, 0x0
-
- paddw xmm1, xmm5
-%endmacro
-
-
-;void vp8_sad16x16x8_sse4(
-; const unsigned char *src_ptr,
-; int src_stride,
-; const unsigned char *ref_ptr,
-; int ref_stride,
-; unsigned short *sad_array);
-global sym(vp8_sad16x16x8_sse4) PRIVATE
-sym(vp8_sad16x16x8_sse4):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- PROCESS_16X2X8 1
- PROCESS_16X2X8 0
- PROCESS_16X2X8 0
- PROCESS_16X2X8 0
- PROCESS_16X2X8 0
- PROCESS_16X2X8 0
- PROCESS_16X2X8 0
- PROCESS_16X2X8 0
-
- mov rdi, arg(4) ;Results
- movdqa XMMWORD PTR [rdi], xmm1
-
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void vp8_sad16x8x8_sse4(
-; const unsigned char *src_ptr,
-; int src_stride,
-; const unsigned char *ref_ptr,
-; int ref_stride,
-; unsigned short *sad_array
-;);
-global sym(vp8_sad16x8x8_sse4) PRIVATE
-sym(vp8_sad16x8x8_sse4):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- PROCESS_16X2X8 1
- PROCESS_16X2X8 0
- PROCESS_16X2X8 0
- PROCESS_16X2X8 0
-
- mov rdi, arg(4) ;Results
- movdqa XMMWORD PTR [rdi], xmm1
-
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void vp8_sad8x8x8_sse4(
-; const unsigned char *src_ptr,
-; int src_stride,
-; const unsigned char *ref_ptr,
-; int ref_stride,
-; unsigned short *sad_array
-;);
-global sym(vp8_sad8x8x8_sse4) PRIVATE
-sym(vp8_sad8x8x8_sse4):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- PROCESS_8X2X8 1
- PROCESS_8X2X8 0
- PROCESS_8X2X8 0
- PROCESS_8X2X8 0
-
- mov rdi, arg(4) ;Results
- movdqa XMMWORD PTR [rdi], xmm1
-
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void vp8_sad8x16x8_sse4(
-; const unsigned char *src_ptr,
-; int src_stride,
-; const unsigned char *ref_ptr,
-; int ref_stride,
-; unsigned short *sad_array
-;);
-global sym(vp8_sad8x16x8_sse4) PRIVATE
-sym(vp8_sad8x16x8_sse4):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- PROCESS_8X2X8 1
- PROCESS_8X2X8 0
- PROCESS_8X2X8 0
- PROCESS_8X2X8 0
- PROCESS_8X2X8 0
- PROCESS_8X2X8 0
- PROCESS_8X2X8 0
- PROCESS_8X2X8 0
- mov rdi, arg(4) ;Results
- movdqa XMMWORD PTR [rdi], xmm1
-
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void vp8_sad4x4x8_c(
-; const unsigned char *src_ptr,
-; int src_stride,
-; const unsigned char *ref_ptr,
-; int ref_stride,
-; unsigned short *sad_array
-;);
-global sym(vp8_sad4x4x8_sse4) PRIVATE
-sym(vp8_sad4x4x8_sse4):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- PROCESS_4X2X8 1
- PROCESS_4X2X8 0
-
- mov rdi, arg(4) ;Results
- movdqa XMMWORD PTR [rdi], xmm1
-
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-
-
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index 7149f5f..06f7f46 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -9,6 +9,8 @@
*/
+#include "./vp8_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "onyx_int.h"
#include "mcomp.h"
#include "vpx_mem/vpx_mem.h"
@@ -900,7 +902,7 @@
this_offset = base_offset + (br * (pre_stride)) + bc;
this_mv.as_mv.row = br;
this_mv.as_mv.col = bc;
- bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, UINT_MAX)
+ bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride)
+ mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
#if CONFIG_MULTI_RES_ENCODING
@@ -927,7 +929,7 @@
this_mv.as_mv.row = br + hex[i].row;
this_mv.as_mv.col = bc + hex[i].col;
this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
- thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
CHECK_BETTER
}
}else
@@ -938,7 +940,7 @@
this_mv.as_mv.col = bc + hex[i].col;
CHECK_POINT
this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
- thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
CHECK_BETTER
}
}
@@ -964,7 +966,7 @@
this_mv.as_mv.row = br + next_chkpts[k][i].row;
this_mv.as_mv.col = bc + next_chkpts[k][i].col;
this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
- thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
CHECK_BETTER
}
}else
@@ -975,7 +977,7 @@
this_mv.as_mv.col = bc + next_chkpts[k][i].col;
CHECK_POINT
this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
- thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
CHECK_BETTER
}
}
@@ -1006,7 +1008,7 @@
this_mv.as_mv.row = br + neighbors[i].row;
this_mv.as_mv.col = bc + neighbors[i].col;
this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
- thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
CHECK_BETTER
}
}else
@@ -1017,7 +1019,7 @@
this_mv.as_mv.col = bc + neighbors[i].col;
CHECK_POINT
this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
- thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
CHECK_BETTER
}
}
@@ -1101,7 +1103,7 @@
best_address = in_what;
/* Check the starting position */
- bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
+ bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
+ mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
/* search_param determines the length of the initial step and hence
@@ -1126,7 +1128,7 @@
{
check_here = ss[i].offset + best_address;
- thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
+ thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
if (thissad < bestsad)
{
@@ -1225,7 +1227,7 @@
best_address = in_what;
/* Check the starting position */
- bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
+ bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
+ mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
/* search_param determines the length of the initial step and hence the
@@ -1293,7 +1295,7 @@
(this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
{
check_here = ss[i].offset + best_address;
- thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
+ thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
if (thissad < bestsad)
{
@@ -1376,8 +1378,7 @@
best_mv->as_mv.col = ref_col;
/* Baseline value at the centre */
- bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
- in_what_stride, UINT_MAX)
+ bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride)
+ mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
/* Apply further limits to prevent us looking using vectors that
@@ -1402,7 +1403,7 @@
for (c = col_min; c < col_max; c++)
{
- thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
+ thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
this_mv.as_mv.col = c;
thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
@@ -1474,8 +1475,7 @@
best_mv->as_mv.col = ref_col;
/* Baseline value at the centre */
- bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
- in_what_stride, UINT_MAX)
+ bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride)
+ mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
/* Apply further limits to prevent us looking using vectors that stretch
@@ -1531,7 +1531,7 @@
while (c < col_max)
{
- thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
+ thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
if (thissad < bestsad)
{
@@ -1590,7 +1590,8 @@
int col_min = ref_col - distance;
int col_max = ref_col + distance;
- DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
+ // TODO(johannkoenig): check if this alignment is necessary.
+ DECLARE_ALIGNED_ARRAY(16, unsigned int, sad_array8, 8);
unsigned int sad_array[3];
int *mvsadcost[2];
@@ -1609,8 +1610,7 @@
best_mv->as_mv.col = ref_col;
/* Baseline value at the centre */
- bestsad = fn_ptr->sdf(what, what_stride,
- bestaddress, in_what_stride, UINT_MAX)
+ bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride)
+ mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
/* Apply further limits to prevent us looking using vectors that stretch
@@ -1696,7 +1696,7 @@
while (c < col_max)
{
- thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
+ thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride);
if (thissad < bestsad)
{
@@ -1754,8 +1754,7 @@
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
- bestsad = fn_ptr->sdf(what, what_stride, best_address,
- in_what_stride, UINT_MAX)
+ bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride)
+ mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
for (i=0; i<search_range; i++)
@@ -1771,7 +1770,7 @@
(this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
{
check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
- thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
+ thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride);
if (thissad < bestsad)
{
@@ -1834,8 +1833,7 @@
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
- bestsad = fn_ptr->sdf(what, what_stride, best_address,
- in_what_stride, UINT_MAX)
+ bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride)
+ mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
for (i=0; i<search_range; i++)
@@ -1886,7 +1884,7 @@
(this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
{
check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
- thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
+ thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride);
if (thissad < bestsad)
{
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index e7cbbc3..5b45231 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -11,6 +11,7 @@
#include "vpx_config.h"
#include "./vpx_scale_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "./vp8_rtcd.h"
#include "vp8/common/onyxc_int.h"
#include "vp8/common/blockd.h"
@@ -2126,55 +2127,55 @@
}
#endif
- cpi->fn_ptr[BLOCK_16X16].sdf = vp8_sad16x16;
+ cpi->fn_ptr[BLOCK_16X16].sdf = vpx_sad16x16;
cpi->fn_ptr[BLOCK_16X16].vf = vp8_variance16x16;
cpi->fn_ptr[BLOCK_16X16].svf = vp8_sub_pixel_variance16x16;
cpi->fn_ptr[BLOCK_16X16].svf_halfpix_h = vp8_variance_halfpixvar16x16_h;
cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v = vp8_variance_halfpixvar16x16_v;
cpi->fn_ptr[BLOCK_16X16].svf_halfpix_hv = vp8_variance_halfpixvar16x16_hv;
- cpi->fn_ptr[BLOCK_16X16].sdx3f = vp8_sad16x16x3;
- cpi->fn_ptr[BLOCK_16X16].sdx8f = vp8_sad16x16x8;
- cpi->fn_ptr[BLOCK_16X16].sdx4df = vp8_sad16x16x4d;
+ cpi->fn_ptr[BLOCK_16X16].sdx3f = vpx_sad16x16x3;
+ cpi->fn_ptr[BLOCK_16X16].sdx8f = vpx_sad16x16x8;
+ cpi->fn_ptr[BLOCK_16X16].sdx4df = vpx_sad16x16x4d;
- cpi->fn_ptr[BLOCK_16X8].sdf = vp8_sad16x8;
+ cpi->fn_ptr[BLOCK_16X8].sdf = vpx_sad16x8;
cpi->fn_ptr[BLOCK_16X8].vf = vp8_variance16x8;
cpi->fn_ptr[BLOCK_16X8].svf = vp8_sub_pixel_variance16x8;
cpi->fn_ptr[BLOCK_16X8].svf_halfpix_h = NULL;
cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v = NULL;
cpi->fn_ptr[BLOCK_16X8].svf_halfpix_hv = NULL;
- cpi->fn_ptr[BLOCK_16X8].sdx3f = vp8_sad16x8x3;
- cpi->fn_ptr[BLOCK_16X8].sdx8f = vp8_sad16x8x8;
- cpi->fn_ptr[BLOCK_16X8].sdx4df = vp8_sad16x8x4d;
+ cpi->fn_ptr[BLOCK_16X8].sdx3f = vpx_sad16x8x3;
+ cpi->fn_ptr[BLOCK_16X8].sdx8f = vpx_sad16x8x8;
+ cpi->fn_ptr[BLOCK_16X8].sdx4df = vpx_sad16x8x4d;
- cpi->fn_ptr[BLOCK_8X16].sdf = vp8_sad8x16;
+ cpi->fn_ptr[BLOCK_8X16].sdf = vpx_sad8x16;
cpi->fn_ptr[BLOCK_8X16].vf = vp8_variance8x16;
cpi->fn_ptr[BLOCK_8X16].svf = vp8_sub_pixel_variance8x16;
cpi->fn_ptr[BLOCK_8X16].svf_halfpix_h = NULL;
cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v = NULL;
cpi->fn_ptr[BLOCK_8X16].svf_halfpix_hv = NULL;
- cpi->fn_ptr[BLOCK_8X16].sdx3f = vp8_sad8x16x3;
- cpi->fn_ptr[BLOCK_8X16].sdx8f = vp8_sad8x16x8;
- cpi->fn_ptr[BLOCK_8X16].sdx4df = vp8_sad8x16x4d;
+ cpi->fn_ptr[BLOCK_8X16].sdx3f = vpx_sad8x16x3;
+ cpi->fn_ptr[BLOCK_8X16].sdx8f = vpx_sad8x16x8;
+ cpi->fn_ptr[BLOCK_8X16].sdx4df = vpx_sad8x16x4d;
- cpi->fn_ptr[BLOCK_8X8].sdf = vp8_sad8x8;
+ cpi->fn_ptr[BLOCK_8X8].sdf = vpx_sad8x8;
cpi->fn_ptr[BLOCK_8X8].vf = vp8_variance8x8;
cpi->fn_ptr[BLOCK_8X8].svf = vp8_sub_pixel_variance8x8;
cpi->fn_ptr[BLOCK_8X8].svf_halfpix_h = NULL;
cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v = NULL;
cpi->fn_ptr[BLOCK_8X8].svf_halfpix_hv = NULL;
- cpi->fn_ptr[BLOCK_8X8].sdx3f = vp8_sad8x8x3;
- cpi->fn_ptr[BLOCK_8X8].sdx8f = vp8_sad8x8x8;
- cpi->fn_ptr[BLOCK_8X8].sdx4df = vp8_sad8x8x4d;
+ cpi->fn_ptr[BLOCK_8X8].sdx3f = vpx_sad8x8x3;
+ cpi->fn_ptr[BLOCK_8X8].sdx8f = vpx_sad8x8x8;
+ cpi->fn_ptr[BLOCK_8X8].sdx4df = vpx_sad8x8x4d;
- cpi->fn_ptr[BLOCK_4X4].sdf = vp8_sad4x4;
+ cpi->fn_ptr[BLOCK_4X4].sdf = vpx_sad4x4;
cpi->fn_ptr[BLOCK_4X4].vf = vp8_variance4x4;
cpi->fn_ptr[BLOCK_4X4].svf = vp8_sub_pixel_variance4x4;
cpi->fn_ptr[BLOCK_4X4].svf_halfpix_h = NULL;
cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v = NULL;
cpi->fn_ptr[BLOCK_4X4].svf_halfpix_hv = NULL;
- cpi->fn_ptr[BLOCK_4X4].sdx3f = vp8_sad4x4x3;
- cpi->fn_ptr[BLOCK_4X4].sdx8f = vp8_sad4x4x8;
- cpi->fn_ptr[BLOCK_4X4].sdx4df = vp8_sad4x4x4d;
+ cpi->fn_ptr[BLOCK_4X4].sdx3f = vpx_sad4x4x3;
+ cpi->fn_ptr[BLOCK_4X4].sdx8f = vpx_sad4x4x8;
+ cpi->fn_ptr[BLOCK_4X4].sdx4df = vpx_sad4x4x4d;
#if ARCH_X86 || ARCH_X86_64
cpi->fn_ptr[BLOCK_16X16].copymem = vp8_copy32xn;
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index a945890..4f90402 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -1690,16 +1690,16 @@
}else if(xd->mb_to_top_edge==0)
{ /* only has left MB for sad calculation. */
near_sad[0] = near_sad[2] = INT_MAX;
- near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX);
+ near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride);
}else if(xd->mb_to_left_edge ==0)
{ /* only has left MB for sad calculation. */
near_sad[1] = near_sad[2] = INT_MAX;
- near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX);
+ near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride);
}else
{
- near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX);
- near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX);
- near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, UINT_MAX);
+ near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride);
+ near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride);
+ near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride);
}
if(cpi->common.last_frame_type != KEY_FRAME)
@@ -1714,14 +1714,14 @@
if(xd->mb_to_bottom_edge==0) near_sad[7] = INT_MAX;
if(near_sad[4] != INT_MAX)
- near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, UINT_MAX);
+ near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride);
if(near_sad[5] != INT_MAX)
- near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride, UINT_MAX);
- near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer, pre_y_stride, UINT_MAX);
+ near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride);
+ near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer, pre_y_stride);
if(near_sad[6] != INT_MAX)
- near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride, UINT_MAX);
+ near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride);
if(near_sad[7] != INT_MAX)
- near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride, UINT_MAX);
+ near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride);
}
if(cpi->common.last_frame_type != KEY_FRAME)
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index 9b11c0d..b4c8140 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -15,6 +15,7 @@
VP8_COMMON_SRCS-yes += common/alloccommon.c
VP8_COMMON_SRCS-yes += common/blockd.c
VP8_COMMON_SRCS-yes += common/coefupdateprobs.h
+VP8_COMMON_SRCS-yes += common/copy_c.c
VP8_COMMON_SRCS-yes += common/debugmodes.c
VP8_COMMON_SRCS-yes += common/default_coef_probs.h
VP8_COMMON_SRCS-yes += common/dequantize.c
@@ -60,7 +61,6 @@
VP8_COMMON_SRCS-yes += common/reconinter.c
VP8_COMMON_SRCS-yes += common/reconintra.c
VP8_COMMON_SRCS-yes += common/reconintra4x4.c
-VP8_COMMON_SRCS-yes += common/sad_c.c
VP8_COMMON_SRCS-yes += common/setupintrarecon.c
VP8_COMMON_SRCS-yes += common/swapyv12buffer.c
VP8_COMMON_SRCS-yes += common/variance_c.c
@@ -85,26 +85,23 @@
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/iwalsh_mmx.asm
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/loopfilter_mmx.asm
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/recon_mmx.asm
-VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/sad_mmx.asm
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/subpixel_mmx.asm
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/variance_mmx.c
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/variance_impl_mmx.asm
+VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/copy_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idct_blk_sse2.c
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idctllm_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_wrapper_sse2.c
-VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/sad_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/subpixel_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/iwalsh_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/variance_sse2.c
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/variance_impl_sse2.asm
-VP8_COMMON_SRCS-$(HAVE_SSE3) += common/x86/sad_sse3.asm
-VP8_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/sad_ssse3.asm
+VP8_COMMON_SRCS-$(HAVE_SSE3) += common/x86/copy_sse3.asm
VP8_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/subpixel_ssse3.asm
VP8_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/variance_ssse3.c
VP8_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/variance_impl_ssse3.asm
-VP8_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/sad_sse4.asm
ifeq ($(CONFIG_POSTPROC),yes)
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/postproc_mmx.asm
@@ -148,7 +145,6 @@
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/dequant_idct_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/dequantize_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/idct_blk_v6.c
-VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_sad16x16_armv6$(ASM)
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance8x8_armv6$(ASM)
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance16x16_armv6$(ASM)
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6$(ASM)
@@ -170,7 +166,6 @@
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimpleverticaledge_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/mbloopfilter_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/reconintra_neon.c
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sad_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/shortidct4x4llm_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/variance_neon.c
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index b4c4c09..af9cc73 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -11,6 +11,7 @@
#include "./vpx_config.h"
#include "./vp8_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "./vpx_scale_rtcd.h"
#include "vpx/vpx_codec.h"
#include "vpx/internal/vpx_codec_internal.h"
@@ -650,6 +651,7 @@
vp8_rtcd();
+ vpx_dsp_rtcd();
vpx_scale_rtcd();
if (!ctx->priv)
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index d677455..72e4770 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -12,6 +12,7 @@
#include <stdlib.h>
#include <string.h>
#include "./vp8_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "./vpx_scale_rtcd.h"
#include "vpx/vpx_decoder.h"
#include "vpx/vp8dx.h"
@@ -107,6 +108,7 @@
(void) data;
vp8_rtcd();
+ vpx_dsp_rtcd();
vpx_scale_rtcd();
/* This function only allocates space for the vpx_codec_alg_priv_t
diff --git a/vp9/common/vp9_mfqe.c b/vp9/common/vp9_mfqe.c
index 1494c3f..57189df 100644
--- a/vp9/common/vp9_mfqe.c
+++ b/vp9/common/vp9_mfqe.c
@@ -9,8 +9,9 @@
*/
#include "./vpx_config.h"
-#include "./vpx_scale_rtcd.h"
#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
+#include "./vpx_scale_rtcd.h"
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_postproc.h"
@@ -171,13 +172,13 @@
if (bs == BLOCK_16X16) {
vdiff = (vp9_variance16x16(y, y_stride, yd, yd_stride, &sse) + 128) >> 8;
- sad = (vp9_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
+ sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
} else if (bs == BLOCK_32X32) {
vdiff = (vp9_variance32x32(y, y_stride, yd, yd_stride, &sse) + 512) >> 10;
- sad = (vp9_sad32x32(y, y_stride, yd, yd_stride) + 512) >> 10;
+ sad = (vpx_sad32x32(y, y_stride, yd, yd_stride) + 512) >> 10;
} else /* if (bs == BLOCK_64X64) */ {
vdiff = (vp9_variance64x64(y, y_stride, yd, yd_stride, &sse) + 2048) >> 12;
- sad = (vp9_sad64x64(y, y_stride, yd, yd_stride) + 2048) >> 12;
+ sad = (vpx_sad64x64(y, y_stride, yd, yd_stride) + 2048) >> 12;
}
// vdiff > sad * 3 means vdiff should not be too small, otherwise,
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 42cb8fe..8765ac7 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -922,177 +922,6 @@
add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";
-add_proto qw/unsigned int vp9_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vp9_sad64x64 neon avx2/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vp9_sad32x64 avx2/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vp9_sad64x32 avx2/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vp9_sad32x16 avx2/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vp9_sad16x32/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vp9_sad32x32 neon avx2/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vp9_sad16x16 neon/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vp9_sad16x8/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vp9_sad8x16/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vp9_sad8x8 neon/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vp9_sad8x4/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vp9_sad4x8/, "$sse_x86inc";
-
-add_proto qw/unsigned int vp9_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vp9_sad4x4/, "$sse_x86inc";
-
-add_proto qw/unsigned int vp9_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vp9_sad64x64_avg avx2/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_sad32x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vp9_sad32x64_avg avx2/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_sad64x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vp9_sad64x32_avg avx2/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_sad32x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vp9_sad32x16_avg avx2/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_sad16x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vp9_sad16x32_avg/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_sad32x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vp9_sad32x32_avg avx2/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_sad16x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vp9_sad16x16_avg/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_sad16x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vp9_sad16x8_avg/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_sad8x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vp9_sad8x16_avg/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_sad8x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vp9_sad8x8_avg/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_sad8x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vp9_sad8x4_avg/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_sad4x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vp9_sad4x8_avg/, "$sse_x86inc";
-
-add_proto qw/unsigned int vp9_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vp9_sad4x4_avg/, "$sse_x86inc";
-
-add_proto qw/void vp9_sad64x64x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad64x64x3/;
-
-add_proto qw/void vp9_sad32x32x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad32x32x3/;
-
-add_proto qw/void vp9_sad16x16x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad16x16x3 sse3 ssse3/;
-
-add_proto qw/void vp9_sad16x8x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad16x8x3 sse3 ssse3/;
-
-add_proto qw/void vp9_sad8x16x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad8x16x3 sse3/;
-
-add_proto qw/void vp9_sad8x8x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad8x8x3 sse3/;
-
-add_proto qw/void vp9_sad4x4x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad4x4x3 sse3/;
-
-add_proto qw/void vp9_sad64x64x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/vp9_sad64x64x8/;
-
-add_proto qw/void vp9_sad32x32x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/vp9_sad32x32x8/;
-
-add_proto qw/void vp9_sad16x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/vp9_sad16x16x8 sse4_1/;
-$vp9_sad16x16x8_sse4_1=vp9_sad16x16x8_sse4;
-
-add_proto qw/void vp9_sad16x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/vp9_sad16x8x8 sse4_1/;
-$vp9_sad16x8x8_sse4_1=vp9_sad16x8x8_sse4;
-
-add_proto qw/void vp9_sad8x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/vp9_sad8x16x8 sse4_1/;
-$vp9_sad8x16x8_sse4_1=vp9_sad8x16x8_sse4;
-
-add_proto qw/void vp9_sad8x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/vp9_sad8x8x8 sse4_1/;
-$vp9_sad8x8x8_sse4_1=vp9_sad8x8x8_sse4;
-
-add_proto qw/void vp9_sad8x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/vp9_sad8x4x8/;
-
-add_proto qw/void vp9_sad4x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/vp9_sad4x8x8/;
-
-add_proto qw/void vp9_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/vp9_sad4x4x8 sse4_1/;
-$vp9_sad4x4x8_sse4_1=vp9_sad4x4x8_sse4;
-
-add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad64x64x4d sse2 avx2 neon/;
-
-add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad32x64x4d sse2/;
-
-add_proto qw/void vp9_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad64x32x4d sse2/;
-
-add_proto qw/void vp9_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad32x16x4d sse2/;
-
-add_proto qw/void vp9_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad16x32x4d sse2/;
-
-add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad32x32x4d sse2 avx2 neon/;
-
-add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad16x16x4d sse2 neon/;
-
-add_proto qw/void vp9_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad16x8x4d sse2/;
-
-add_proto qw/void vp9_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad8x16x4d sse2/;
-
-add_proto qw/void vp9_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad8x8x4d sse2/;
-
-# TODO(jingning): need to convert these 4x8/8x4 functions into sse2 form
-add_proto qw/void vp9_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad8x4x4d sse2/;
-
-add_proto qw/void vp9_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad4x8x4d sse/;
-
-add_proto qw/void vp9_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad4x4x4d sse/;
-
add_proto qw/unsigned int vp9_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
specialize qw/vp9_mse16x16 avx2/, "$sse2_x86inc";
@@ -1682,171 +1511,6 @@
add_proto qw/unsigned int vp9_highbd_12_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_highbd_12_sub_pixel_avg_variance4x4/;
- add_proto qw/unsigned int vp9_highbd_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vp9_highbd_sad64x64/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vp9_highbd_sad32x64/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vp9_highbd_sad64x32/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vp9_highbd_sad32x16/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vp9_highbd_sad16x32/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vp9_highbd_sad32x32/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vp9_highbd_sad16x16/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vp9_highbd_sad16x8/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vp9_highbd_sad8x16/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vp9_highbd_sad8x8/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vp9_highbd_sad8x4/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vp9_highbd_sad4x8/;
-
- add_proto qw/unsigned int vp9_highbd_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vp9_highbd_sad4x4/;
-
- add_proto qw/unsigned int vp9_highbd_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sad64x64_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_sad32x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sad32x64_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_sad64x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sad64x32_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_sad32x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sad32x16_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_sad16x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sad16x32_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_sad32x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sad32x32_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_sad16x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sad16x16_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_sad16x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sad16x8_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_sad8x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sad8x16_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_sad8x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sad8x8_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_sad8x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sad8x4_avg/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_sad4x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sad4x8_avg/;
-
- add_proto qw/unsigned int vp9_highbd_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sad4x4_avg/;
-
- add_proto qw/void vp9_highbd_sad64x64x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad64x64x3/;
-
- add_proto qw/void vp9_highbd_sad32x32x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad32x32x3/;
-
- add_proto qw/void vp9_highbd_sad16x16x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad16x16x3/;
-
- add_proto qw/void vp9_highbd_sad16x8x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad16x8x3/;
-
- add_proto qw/void vp9_highbd_sad8x16x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad8x16x3/;
-
- add_proto qw/void vp9_highbd_sad8x8x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad8x8x3/;
-
- add_proto qw/void vp9_highbd_sad4x4x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad4x4x3/;
-
- add_proto qw/void vp9_highbd_sad64x64x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vp9_highbd_sad64x64x8/;
-
- add_proto qw/void vp9_highbd_sad32x32x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vp9_highbd_sad32x32x8/;
-
- add_proto qw/void vp9_highbd_sad16x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vp9_highbd_sad16x16x8/;
-
- add_proto qw/void vp9_highbd_sad16x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vp9_highbd_sad16x8x8/;
-
- add_proto qw/void vp9_highbd_sad8x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vp9_highbd_sad8x16x8/;
-
- add_proto qw/void vp9_highbd_sad8x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vp9_highbd_sad8x8x8/;
-
- add_proto qw/void vp9_highbd_sad8x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vp9_highbd_sad8x4x8/;
-
- add_proto qw/void vp9_highbd_sad4x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vp9_highbd_sad4x8x8/;
-
- add_proto qw/void vp9_highbd_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
- specialize qw/vp9_highbd_sad4x4x8/;
-
- add_proto qw/void vp9_highbd_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad64x64x4d sse2/;
-
- add_proto qw/void vp9_highbd_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad32x64x4d sse2/;
-
- add_proto qw/void vp9_highbd_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad64x32x4d sse2/;
-
- add_proto qw/void vp9_highbd_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad32x16x4d sse2/;
-
- add_proto qw/void vp9_highbd_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad16x32x4d sse2/;
-
- add_proto qw/void vp9_highbd_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad32x32x4d sse2/;
-
- add_proto qw/void vp9_highbd_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad16x16x4d sse2/;
-
- add_proto qw/void vp9_highbd_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad16x8x4d sse2/;
-
- add_proto qw/void vp9_highbd_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad8x16x4d sse2/;
-
- add_proto qw/void vp9_highbd_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad8x8x4d sse2/;
-
- add_proto qw/void vp9_highbd_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad8x4x4d sse2/;
-
- add_proto qw/void vp9_highbd_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad4x8x4d sse2/;
-
- add_proto qw/void vp9_highbd_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad4x4x4d sse2/;
-
add_proto qw/unsigned int vp9_highbd_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
specialize qw/vp9_highbd_mse16x16/, "$sse2_x86inc";
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index bf40377..288d869 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -13,6 +13,7 @@
#include <stdio.h>
#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "./vpx_scale_rtcd.h"
#include "vpx_mem/vpx_mem.h"
@@ -40,6 +41,7 @@
if (!init_done) {
vp9_rtcd();
+ vpx_dsp_rtcd();
vpx_scale_rtcd();
vp9_init_intra_predictors();
init_done = 1;
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index a6e4c9c..a1018ad 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -14,6 +14,7 @@
#include "./vpx_config.h"
#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "./vpx_scale_rtcd.h"
#include "vpx/internal/vpx_psnr.h"
#include "vpx_ports/vpx_timer.h"
@@ -318,6 +319,7 @@
if (!init_done) {
vp9_rtcd();
+ vpx_dsp_rtcd();
vpx_scale_rtcd();
vp9_init_intra_predictors();
vp9_init_me_luts();
@@ -929,61 +931,61 @@
sad_array[i] >>= 4; \
}
-MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad32x16)
-MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad32x16_avg)
-MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad32x16x4d)
-MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad16x32)
-MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad16x32_avg)
-MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad16x32x4d)
-MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad64x32)
-MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad64x32_avg)
-MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad64x32x4d)
-MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad32x64)
-MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad32x64_avg)
-MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad32x64x4d)
-MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad32x32)
-MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad32x32_avg)
-MAKE_BFP_SAD3_WRAPPER(vp9_highbd_sad32x32x3)
-MAKE_BFP_SAD8_WRAPPER(vp9_highbd_sad32x32x8)
-MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad32x32x4d)
-MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad64x64)
-MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad64x64_avg)
-MAKE_BFP_SAD3_WRAPPER(vp9_highbd_sad64x64x3)
-MAKE_BFP_SAD8_WRAPPER(vp9_highbd_sad64x64x8)
-MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad64x64x4d)
-MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad16x16)
-MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad16x16_avg)
-MAKE_BFP_SAD3_WRAPPER(vp9_highbd_sad16x16x3)
-MAKE_BFP_SAD8_WRAPPER(vp9_highbd_sad16x16x8)
-MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad16x16x4d)
-MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad16x8)
-MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad16x8_avg)
-MAKE_BFP_SAD3_WRAPPER(vp9_highbd_sad16x8x3)
-MAKE_BFP_SAD8_WRAPPER(vp9_highbd_sad16x8x8)
-MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad16x8x4d)
-MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad8x16)
-MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad8x16_avg)
-MAKE_BFP_SAD3_WRAPPER(vp9_highbd_sad8x16x3)
-MAKE_BFP_SAD8_WRAPPER(vp9_highbd_sad8x16x8)
-MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad8x16x4d)
-MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad8x8)
-MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad8x8_avg)
-MAKE_BFP_SAD3_WRAPPER(vp9_highbd_sad8x8x3)
-MAKE_BFP_SAD8_WRAPPER(vp9_highbd_sad8x8x8)
-MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad8x8x4d)
-MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad8x4)
-MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad8x4_avg)
-MAKE_BFP_SAD8_WRAPPER(vp9_highbd_sad8x4x8)
-MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad8x4x4d)
-MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad4x8)
-MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad4x8_avg)
-MAKE_BFP_SAD8_WRAPPER(vp9_highbd_sad4x8x8)
-MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad4x8x4d)
-MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad4x4)
-MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad4x4_avg)
-MAKE_BFP_SAD3_WRAPPER(vp9_highbd_sad4x4x3)
-MAKE_BFP_SAD8_WRAPPER(vp9_highbd_sad4x4x8)
-MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad4x4x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x16)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x16_avg)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x16x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x32)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x32_avg)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x32x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x32)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x32_avg)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x32x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x64)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x64_avg)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x64x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x32)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x32_avg)
+MAKE_BFP_SAD3_WRAPPER(vpx_highbd_sad32x32x3)
+MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad32x32x8)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x32x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x64)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x64_avg)
+MAKE_BFP_SAD3_WRAPPER(vpx_highbd_sad64x64x3)
+MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad64x64x8)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x64x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x16)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x16_avg)
+MAKE_BFP_SAD3_WRAPPER(vpx_highbd_sad16x16x3)
+MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad16x16x8)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x16x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x8)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x8_avg)
+MAKE_BFP_SAD3_WRAPPER(vpx_highbd_sad16x8x3)
+MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad16x8x8)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x8x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x16)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x16_avg)
+MAKE_BFP_SAD3_WRAPPER(vpx_highbd_sad8x16x3)
+MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad8x16x8)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x16x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x8)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x8_avg)
+MAKE_BFP_SAD3_WRAPPER(vpx_highbd_sad8x8x3)
+MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad8x8x8)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x8x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x4)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x4_avg)
+MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad8x4x8)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x4x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad4x8)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad4x8_avg)
+MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad4x8x8)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad4x8x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad4x4)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad4x4_avg)
+MAKE_BFP_SAD3_WRAPPER(vpx_highbd_sad4x4x3)
+MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad4x4x8)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad4x4x4d)
static void highbd_set_var_fns(VP9_COMP *const cpi) {
VP9_COMMON *const cm = &cpi->common;
@@ -991,398 +993,398 @@
switch (cm->bit_depth) {
case VPX_BITS_8:
HIGHBD_BFP(BLOCK_32X16,
- vp9_highbd_sad32x16_bits8,
- vp9_highbd_sad32x16_avg_bits8,
+ vpx_highbd_sad32x16_bits8,
+ vpx_highbd_sad32x16_avg_bits8,
vp9_highbd_variance32x16,
vp9_highbd_sub_pixel_variance32x16,
vp9_highbd_sub_pixel_avg_variance32x16,
NULL,
NULL,
- vp9_highbd_sad32x16x4d_bits8)
+ vpx_highbd_sad32x16x4d_bits8)
HIGHBD_BFP(BLOCK_16X32,
- vp9_highbd_sad16x32_bits8,
- vp9_highbd_sad16x32_avg_bits8,
+ vpx_highbd_sad16x32_bits8,
+ vpx_highbd_sad16x32_avg_bits8,
vp9_highbd_variance16x32,
vp9_highbd_sub_pixel_variance16x32,
vp9_highbd_sub_pixel_avg_variance16x32,
NULL,
NULL,
- vp9_highbd_sad16x32x4d_bits8)
+ vpx_highbd_sad16x32x4d_bits8)
HIGHBD_BFP(BLOCK_64X32,
- vp9_highbd_sad64x32_bits8,
- vp9_highbd_sad64x32_avg_bits8,
+ vpx_highbd_sad64x32_bits8,
+ vpx_highbd_sad64x32_avg_bits8,
vp9_highbd_variance64x32,
vp9_highbd_sub_pixel_variance64x32,
vp9_highbd_sub_pixel_avg_variance64x32,
NULL,
NULL,
- vp9_highbd_sad64x32x4d_bits8)
+ vpx_highbd_sad64x32x4d_bits8)
HIGHBD_BFP(BLOCK_32X64,
- vp9_highbd_sad32x64_bits8,
- vp9_highbd_sad32x64_avg_bits8,
+ vpx_highbd_sad32x64_bits8,
+ vpx_highbd_sad32x64_avg_bits8,
vp9_highbd_variance32x64,
vp9_highbd_sub_pixel_variance32x64,
vp9_highbd_sub_pixel_avg_variance32x64,
NULL,
NULL,
- vp9_highbd_sad32x64x4d_bits8)
+ vpx_highbd_sad32x64x4d_bits8)
HIGHBD_BFP(BLOCK_32X32,
- vp9_highbd_sad32x32_bits8,
- vp9_highbd_sad32x32_avg_bits8,
+ vpx_highbd_sad32x32_bits8,
+ vpx_highbd_sad32x32_avg_bits8,
vp9_highbd_variance32x32,
vp9_highbd_sub_pixel_variance32x32,
vp9_highbd_sub_pixel_avg_variance32x32,
- vp9_highbd_sad32x32x3_bits8,
- vp9_highbd_sad32x32x8_bits8,
- vp9_highbd_sad32x32x4d_bits8)
+ vpx_highbd_sad32x32x3_bits8,
+ vpx_highbd_sad32x32x8_bits8,
+ vpx_highbd_sad32x32x4d_bits8)
HIGHBD_BFP(BLOCK_64X64,
- vp9_highbd_sad64x64_bits8,
- vp9_highbd_sad64x64_avg_bits8,
+ vpx_highbd_sad64x64_bits8,
+ vpx_highbd_sad64x64_avg_bits8,
vp9_highbd_variance64x64,
vp9_highbd_sub_pixel_variance64x64,
vp9_highbd_sub_pixel_avg_variance64x64,
- vp9_highbd_sad64x64x3_bits8,
- vp9_highbd_sad64x64x8_bits8,
- vp9_highbd_sad64x64x4d_bits8)
+ vpx_highbd_sad64x64x3_bits8,
+ vpx_highbd_sad64x64x8_bits8,
+ vpx_highbd_sad64x64x4d_bits8)
HIGHBD_BFP(BLOCK_16X16,
- vp9_highbd_sad16x16_bits8,
- vp9_highbd_sad16x16_avg_bits8,
+ vpx_highbd_sad16x16_bits8,
+ vpx_highbd_sad16x16_avg_bits8,
vp9_highbd_variance16x16,
vp9_highbd_sub_pixel_variance16x16,
vp9_highbd_sub_pixel_avg_variance16x16,
- vp9_highbd_sad16x16x3_bits8,
- vp9_highbd_sad16x16x8_bits8,
- vp9_highbd_sad16x16x4d_bits8)
+ vpx_highbd_sad16x16x3_bits8,
+ vpx_highbd_sad16x16x8_bits8,
+ vpx_highbd_sad16x16x4d_bits8)
HIGHBD_BFP(BLOCK_16X8,
- vp9_highbd_sad16x8_bits8,
- vp9_highbd_sad16x8_avg_bits8,
+ vpx_highbd_sad16x8_bits8,
+ vpx_highbd_sad16x8_avg_bits8,
vp9_highbd_variance16x8,
vp9_highbd_sub_pixel_variance16x8,
vp9_highbd_sub_pixel_avg_variance16x8,
- vp9_highbd_sad16x8x3_bits8,
- vp9_highbd_sad16x8x8_bits8,
- vp9_highbd_sad16x8x4d_bits8)
+ vpx_highbd_sad16x8x3_bits8,
+ vpx_highbd_sad16x8x8_bits8,
+ vpx_highbd_sad16x8x4d_bits8)
HIGHBD_BFP(BLOCK_8X16,
- vp9_highbd_sad8x16_bits8,
- vp9_highbd_sad8x16_avg_bits8,
+ vpx_highbd_sad8x16_bits8,
+ vpx_highbd_sad8x16_avg_bits8,
vp9_highbd_variance8x16,
vp9_highbd_sub_pixel_variance8x16,
vp9_highbd_sub_pixel_avg_variance8x16,
- vp9_highbd_sad8x16x3_bits8,
- vp9_highbd_sad8x16x8_bits8,
- vp9_highbd_sad8x16x4d_bits8)
+ vpx_highbd_sad8x16x3_bits8,
+ vpx_highbd_sad8x16x8_bits8,
+ vpx_highbd_sad8x16x4d_bits8)
HIGHBD_BFP(BLOCK_8X8,
- vp9_highbd_sad8x8_bits8,
- vp9_highbd_sad8x8_avg_bits8,
+ vpx_highbd_sad8x8_bits8,
+ vpx_highbd_sad8x8_avg_bits8,
vp9_highbd_variance8x8,
vp9_highbd_sub_pixel_variance8x8,
vp9_highbd_sub_pixel_avg_variance8x8,
- vp9_highbd_sad8x8x3_bits8,
- vp9_highbd_sad8x8x8_bits8,
- vp9_highbd_sad8x8x4d_bits8)
+ vpx_highbd_sad8x8x3_bits8,
+ vpx_highbd_sad8x8x8_bits8,
+ vpx_highbd_sad8x8x4d_bits8)
HIGHBD_BFP(BLOCK_8X4,
- vp9_highbd_sad8x4_bits8,
- vp9_highbd_sad8x4_avg_bits8,
+ vpx_highbd_sad8x4_bits8,
+ vpx_highbd_sad8x4_avg_bits8,
vp9_highbd_variance8x4,
vp9_highbd_sub_pixel_variance8x4,
vp9_highbd_sub_pixel_avg_variance8x4,
NULL,
- vp9_highbd_sad8x4x8_bits8,
- vp9_highbd_sad8x4x4d_bits8)
+ vpx_highbd_sad8x4x8_bits8,
+ vpx_highbd_sad8x4x4d_bits8)
HIGHBD_BFP(BLOCK_4X8,
- vp9_highbd_sad4x8_bits8,
- vp9_highbd_sad4x8_avg_bits8,
+ vpx_highbd_sad4x8_bits8,
+ vpx_highbd_sad4x8_avg_bits8,
vp9_highbd_variance4x8,
vp9_highbd_sub_pixel_variance4x8,
vp9_highbd_sub_pixel_avg_variance4x8,
NULL,
- vp9_highbd_sad4x8x8_bits8,
- vp9_highbd_sad4x8x4d_bits8)
+ vpx_highbd_sad4x8x8_bits8,
+ vpx_highbd_sad4x8x4d_bits8)
HIGHBD_BFP(BLOCK_4X4,
- vp9_highbd_sad4x4_bits8,
- vp9_highbd_sad4x4_avg_bits8,
+ vpx_highbd_sad4x4_bits8,
+ vpx_highbd_sad4x4_avg_bits8,
vp9_highbd_variance4x4,
vp9_highbd_sub_pixel_variance4x4,
vp9_highbd_sub_pixel_avg_variance4x4,
- vp9_highbd_sad4x4x3_bits8,
- vp9_highbd_sad4x4x8_bits8,
- vp9_highbd_sad4x4x4d_bits8)
+ vpx_highbd_sad4x4x3_bits8,
+ vpx_highbd_sad4x4x8_bits8,
+ vpx_highbd_sad4x4x4d_bits8)
break;
case VPX_BITS_10:
HIGHBD_BFP(BLOCK_32X16,
- vp9_highbd_sad32x16_bits10,
- vp9_highbd_sad32x16_avg_bits10,
+ vpx_highbd_sad32x16_bits10,
+ vpx_highbd_sad32x16_avg_bits10,
vp9_highbd_10_variance32x16,
vp9_highbd_10_sub_pixel_variance32x16,
vp9_highbd_10_sub_pixel_avg_variance32x16,
NULL,
NULL,
- vp9_highbd_sad32x16x4d_bits10)
+ vpx_highbd_sad32x16x4d_bits10)
HIGHBD_BFP(BLOCK_16X32,
- vp9_highbd_sad16x32_bits10,
- vp9_highbd_sad16x32_avg_bits10,
+ vpx_highbd_sad16x32_bits10,
+ vpx_highbd_sad16x32_avg_bits10,
vp9_highbd_10_variance16x32,
vp9_highbd_10_sub_pixel_variance16x32,
vp9_highbd_10_sub_pixel_avg_variance16x32,
NULL,
NULL,
- vp9_highbd_sad16x32x4d_bits10)
+ vpx_highbd_sad16x32x4d_bits10)
HIGHBD_BFP(BLOCK_64X32,
- vp9_highbd_sad64x32_bits10,
- vp9_highbd_sad64x32_avg_bits10,
+ vpx_highbd_sad64x32_bits10,
+ vpx_highbd_sad64x32_avg_bits10,
vp9_highbd_10_variance64x32,
vp9_highbd_10_sub_pixel_variance64x32,
vp9_highbd_10_sub_pixel_avg_variance64x32,
NULL,
NULL,
- vp9_highbd_sad64x32x4d_bits10)
+ vpx_highbd_sad64x32x4d_bits10)
HIGHBD_BFP(BLOCK_32X64,
- vp9_highbd_sad32x64_bits10,
- vp9_highbd_sad32x64_avg_bits10,
+ vpx_highbd_sad32x64_bits10,
+ vpx_highbd_sad32x64_avg_bits10,
vp9_highbd_10_variance32x64,
vp9_highbd_10_sub_pixel_variance32x64,
vp9_highbd_10_sub_pixel_avg_variance32x64,
NULL,
NULL,
- vp9_highbd_sad32x64x4d_bits10)
+ vpx_highbd_sad32x64x4d_bits10)
HIGHBD_BFP(BLOCK_32X32,
- vp9_highbd_sad32x32_bits10,
- vp9_highbd_sad32x32_avg_bits10,
+ vpx_highbd_sad32x32_bits10,
+ vpx_highbd_sad32x32_avg_bits10,
vp9_highbd_10_variance32x32,
vp9_highbd_10_sub_pixel_variance32x32,
vp9_highbd_10_sub_pixel_avg_variance32x32,
- vp9_highbd_sad32x32x3_bits10,
- vp9_highbd_sad32x32x8_bits10,
- vp9_highbd_sad32x32x4d_bits10)
+ vpx_highbd_sad32x32x3_bits10,
+ vpx_highbd_sad32x32x8_bits10,
+ vpx_highbd_sad32x32x4d_bits10)
HIGHBD_BFP(BLOCK_64X64,
- vp9_highbd_sad64x64_bits10,
- vp9_highbd_sad64x64_avg_bits10,
+ vpx_highbd_sad64x64_bits10,
+ vpx_highbd_sad64x64_avg_bits10,
vp9_highbd_10_variance64x64,
vp9_highbd_10_sub_pixel_variance64x64,
vp9_highbd_10_sub_pixel_avg_variance64x64,
- vp9_highbd_sad64x64x3_bits10,
- vp9_highbd_sad64x64x8_bits10,
- vp9_highbd_sad64x64x4d_bits10)
+ vpx_highbd_sad64x64x3_bits10,
+ vpx_highbd_sad64x64x8_bits10,
+ vpx_highbd_sad64x64x4d_bits10)
HIGHBD_BFP(BLOCK_16X16,
- vp9_highbd_sad16x16_bits10,
- vp9_highbd_sad16x16_avg_bits10,
+ vpx_highbd_sad16x16_bits10,
+ vpx_highbd_sad16x16_avg_bits10,
vp9_highbd_10_variance16x16,
vp9_highbd_10_sub_pixel_variance16x16,
vp9_highbd_10_sub_pixel_avg_variance16x16,
- vp9_highbd_sad16x16x3_bits10,
- vp9_highbd_sad16x16x8_bits10,
- vp9_highbd_sad16x16x4d_bits10)
+ vpx_highbd_sad16x16x3_bits10,
+ vpx_highbd_sad16x16x8_bits10,
+ vpx_highbd_sad16x16x4d_bits10)
HIGHBD_BFP(BLOCK_16X8,
- vp9_highbd_sad16x8_bits10,
- vp9_highbd_sad16x8_avg_bits10,
+ vpx_highbd_sad16x8_bits10,
+ vpx_highbd_sad16x8_avg_bits10,
vp9_highbd_10_variance16x8,
vp9_highbd_10_sub_pixel_variance16x8,
vp9_highbd_10_sub_pixel_avg_variance16x8,
- vp9_highbd_sad16x8x3_bits10,
- vp9_highbd_sad16x8x8_bits10,
- vp9_highbd_sad16x8x4d_bits10)
+ vpx_highbd_sad16x8x3_bits10,
+ vpx_highbd_sad16x8x8_bits10,
+ vpx_highbd_sad16x8x4d_bits10)
HIGHBD_BFP(BLOCK_8X16,
- vp9_highbd_sad8x16_bits10,
- vp9_highbd_sad8x16_avg_bits10,
+ vpx_highbd_sad8x16_bits10,
+ vpx_highbd_sad8x16_avg_bits10,
vp9_highbd_10_variance8x16,
vp9_highbd_10_sub_pixel_variance8x16,
vp9_highbd_10_sub_pixel_avg_variance8x16,
- vp9_highbd_sad8x16x3_bits10,
- vp9_highbd_sad8x16x8_bits10,
- vp9_highbd_sad8x16x4d_bits10)
+ vpx_highbd_sad8x16x3_bits10,
+ vpx_highbd_sad8x16x8_bits10,
+ vpx_highbd_sad8x16x4d_bits10)
HIGHBD_BFP(BLOCK_8X8,
- vp9_highbd_sad8x8_bits10,
- vp9_highbd_sad8x8_avg_bits10,
+ vpx_highbd_sad8x8_bits10,
+ vpx_highbd_sad8x8_avg_bits10,
vp9_highbd_10_variance8x8,
vp9_highbd_10_sub_pixel_variance8x8,
vp9_highbd_10_sub_pixel_avg_variance8x8,
- vp9_highbd_sad8x8x3_bits10,
- vp9_highbd_sad8x8x8_bits10,
- vp9_highbd_sad8x8x4d_bits10)
+ vpx_highbd_sad8x8x3_bits10,
+ vpx_highbd_sad8x8x8_bits10,
+ vpx_highbd_sad8x8x4d_bits10)
HIGHBD_BFP(BLOCK_8X4,
- vp9_highbd_sad8x4_bits10,
- vp9_highbd_sad8x4_avg_bits10,
+ vpx_highbd_sad8x4_bits10,
+ vpx_highbd_sad8x4_avg_bits10,
vp9_highbd_10_variance8x4,
vp9_highbd_10_sub_pixel_variance8x4,
vp9_highbd_10_sub_pixel_avg_variance8x4,
NULL,
- vp9_highbd_sad8x4x8_bits10,
- vp9_highbd_sad8x4x4d_bits10)
+ vpx_highbd_sad8x4x8_bits10,
+ vpx_highbd_sad8x4x4d_bits10)
HIGHBD_BFP(BLOCK_4X8,
- vp9_highbd_sad4x8_bits10,
- vp9_highbd_sad4x8_avg_bits10,
+ vpx_highbd_sad4x8_bits10,
+ vpx_highbd_sad4x8_avg_bits10,
vp9_highbd_10_variance4x8,
vp9_highbd_10_sub_pixel_variance4x8,
vp9_highbd_10_sub_pixel_avg_variance4x8,
NULL,
- vp9_highbd_sad4x8x8_bits10,
- vp9_highbd_sad4x8x4d_bits10)
+ vpx_highbd_sad4x8x8_bits10,
+ vpx_highbd_sad4x8x4d_bits10)
HIGHBD_BFP(BLOCK_4X4,
- vp9_highbd_sad4x4_bits10,
- vp9_highbd_sad4x4_avg_bits10,
+ vpx_highbd_sad4x4_bits10,
+ vpx_highbd_sad4x4_avg_bits10,
vp9_highbd_10_variance4x4,
vp9_highbd_10_sub_pixel_variance4x4,
vp9_highbd_10_sub_pixel_avg_variance4x4,
- vp9_highbd_sad4x4x3_bits10,
- vp9_highbd_sad4x4x8_bits10,
- vp9_highbd_sad4x4x4d_bits10)
+ vpx_highbd_sad4x4x3_bits10,
+ vpx_highbd_sad4x4x8_bits10,
+ vpx_highbd_sad4x4x4d_bits10)
break;
case VPX_BITS_12:
HIGHBD_BFP(BLOCK_32X16,
- vp9_highbd_sad32x16_bits12,
- vp9_highbd_sad32x16_avg_bits12,
+ vpx_highbd_sad32x16_bits12,
+ vpx_highbd_sad32x16_avg_bits12,
vp9_highbd_12_variance32x16,
vp9_highbd_12_sub_pixel_variance32x16,
vp9_highbd_12_sub_pixel_avg_variance32x16,
NULL,
NULL,
- vp9_highbd_sad32x16x4d_bits12)
+ vpx_highbd_sad32x16x4d_bits12)
HIGHBD_BFP(BLOCK_16X32,
- vp9_highbd_sad16x32_bits12,
- vp9_highbd_sad16x32_avg_bits12,
+ vpx_highbd_sad16x32_bits12,
+ vpx_highbd_sad16x32_avg_bits12,
vp9_highbd_12_variance16x32,
vp9_highbd_12_sub_pixel_variance16x32,
vp9_highbd_12_sub_pixel_avg_variance16x32,
NULL,
NULL,
- vp9_highbd_sad16x32x4d_bits12)
+ vpx_highbd_sad16x32x4d_bits12)
HIGHBD_BFP(BLOCK_64X32,
- vp9_highbd_sad64x32_bits12,
- vp9_highbd_sad64x32_avg_bits12,
+ vpx_highbd_sad64x32_bits12,
+ vpx_highbd_sad64x32_avg_bits12,
vp9_highbd_12_variance64x32,
vp9_highbd_12_sub_pixel_variance64x32,
vp9_highbd_12_sub_pixel_avg_variance64x32,
NULL,
NULL,
- vp9_highbd_sad64x32x4d_bits12)
+ vpx_highbd_sad64x32x4d_bits12)
HIGHBD_BFP(BLOCK_32X64,
- vp9_highbd_sad32x64_bits12,
- vp9_highbd_sad32x64_avg_bits12,
+ vpx_highbd_sad32x64_bits12,
+ vpx_highbd_sad32x64_avg_bits12,
vp9_highbd_12_variance32x64,
vp9_highbd_12_sub_pixel_variance32x64,
vp9_highbd_12_sub_pixel_avg_variance32x64,
NULL,
NULL,
- vp9_highbd_sad32x64x4d_bits12)
+ vpx_highbd_sad32x64x4d_bits12)
HIGHBD_BFP(BLOCK_32X32,
- vp9_highbd_sad32x32_bits12,
- vp9_highbd_sad32x32_avg_bits12,
+ vpx_highbd_sad32x32_bits12,
+ vpx_highbd_sad32x32_avg_bits12,
vp9_highbd_12_variance32x32,
vp9_highbd_12_sub_pixel_variance32x32,
vp9_highbd_12_sub_pixel_avg_variance32x32,
- vp9_highbd_sad32x32x3_bits12,
- vp9_highbd_sad32x32x8_bits12,
- vp9_highbd_sad32x32x4d_bits12)
+ vpx_highbd_sad32x32x3_bits12,
+ vpx_highbd_sad32x32x8_bits12,
+ vpx_highbd_sad32x32x4d_bits12)
HIGHBD_BFP(BLOCK_64X64,
- vp9_highbd_sad64x64_bits12,
- vp9_highbd_sad64x64_avg_bits12,
+ vpx_highbd_sad64x64_bits12,
+ vpx_highbd_sad64x64_avg_bits12,
vp9_highbd_12_variance64x64,
vp9_highbd_12_sub_pixel_variance64x64,
vp9_highbd_12_sub_pixel_avg_variance64x64,
- vp9_highbd_sad64x64x3_bits12,
- vp9_highbd_sad64x64x8_bits12,
- vp9_highbd_sad64x64x4d_bits12)
+ vpx_highbd_sad64x64x3_bits12,
+ vpx_highbd_sad64x64x8_bits12,
+ vpx_highbd_sad64x64x4d_bits12)
HIGHBD_BFP(BLOCK_16X16,
- vp9_highbd_sad16x16_bits12,
- vp9_highbd_sad16x16_avg_bits12,
+ vpx_highbd_sad16x16_bits12,
+ vpx_highbd_sad16x16_avg_bits12,
vp9_highbd_12_variance16x16,
vp9_highbd_12_sub_pixel_variance16x16,
vp9_highbd_12_sub_pixel_avg_variance16x16,
- vp9_highbd_sad16x16x3_bits12,
- vp9_highbd_sad16x16x8_bits12,
- vp9_highbd_sad16x16x4d_bits12)
+ vpx_highbd_sad16x16x3_bits12,
+ vpx_highbd_sad16x16x8_bits12,
+ vpx_highbd_sad16x16x4d_bits12)
HIGHBD_BFP(BLOCK_16X8,
- vp9_highbd_sad16x8_bits12,
- vp9_highbd_sad16x8_avg_bits12,
+ vpx_highbd_sad16x8_bits12,
+ vpx_highbd_sad16x8_avg_bits12,
vp9_highbd_12_variance16x8,
vp9_highbd_12_sub_pixel_variance16x8,
vp9_highbd_12_sub_pixel_avg_variance16x8,
- vp9_highbd_sad16x8x3_bits12,
- vp9_highbd_sad16x8x8_bits12,
- vp9_highbd_sad16x8x4d_bits12)
+ vpx_highbd_sad16x8x3_bits12,
+ vpx_highbd_sad16x8x8_bits12,
+ vpx_highbd_sad16x8x4d_bits12)
HIGHBD_BFP(BLOCK_8X16,
- vp9_highbd_sad8x16_bits12,
- vp9_highbd_sad8x16_avg_bits12,
+ vpx_highbd_sad8x16_bits12,
+ vpx_highbd_sad8x16_avg_bits12,
vp9_highbd_12_variance8x16,
vp9_highbd_12_sub_pixel_variance8x16,
vp9_highbd_12_sub_pixel_avg_variance8x16,
- vp9_highbd_sad8x16x3_bits12,
- vp9_highbd_sad8x16x8_bits12,
- vp9_highbd_sad8x16x4d_bits12)
+ vpx_highbd_sad8x16x3_bits12,
+ vpx_highbd_sad8x16x8_bits12,
+ vpx_highbd_sad8x16x4d_bits12)
HIGHBD_BFP(BLOCK_8X8,
- vp9_highbd_sad8x8_bits12,
- vp9_highbd_sad8x8_avg_bits12,
+ vpx_highbd_sad8x8_bits12,
+ vpx_highbd_sad8x8_avg_bits12,
vp9_highbd_12_variance8x8,
vp9_highbd_12_sub_pixel_variance8x8,
vp9_highbd_12_sub_pixel_avg_variance8x8,
- vp9_highbd_sad8x8x3_bits12,
- vp9_highbd_sad8x8x8_bits12,
- vp9_highbd_sad8x8x4d_bits12)
+ vpx_highbd_sad8x8x3_bits12,
+ vpx_highbd_sad8x8x8_bits12,
+ vpx_highbd_sad8x8x4d_bits12)
HIGHBD_BFP(BLOCK_8X4,
- vp9_highbd_sad8x4_bits12,
- vp9_highbd_sad8x4_avg_bits12,
+ vpx_highbd_sad8x4_bits12,
+ vpx_highbd_sad8x4_avg_bits12,
vp9_highbd_12_variance8x4,
vp9_highbd_12_sub_pixel_variance8x4,
vp9_highbd_12_sub_pixel_avg_variance8x4,
NULL,
- vp9_highbd_sad8x4x8_bits12,
- vp9_highbd_sad8x4x4d_bits12)
+ vpx_highbd_sad8x4x8_bits12,
+ vpx_highbd_sad8x4x4d_bits12)
HIGHBD_BFP(BLOCK_4X8,
- vp9_highbd_sad4x8_bits12,
- vp9_highbd_sad4x8_avg_bits12,
+ vpx_highbd_sad4x8_bits12,
+ vpx_highbd_sad4x8_avg_bits12,
vp9_highbd_12_variance4x8,
vp9_highbd_12_sub_pixel_variance4x8,
vp9_highbd_12_sub_pixel_avg_variance4x8,
NULL,
- vp9_highbd_sad4x8x8_bits12,
- vp9_highbd_sad4x8x4d_bits12)
+ vpx_highbd_sad4x8x8_bits12,
+ vpx_highbd_sad4x8x4d_bits12)
HIGHBD_BFP(BLOCK_4X4,
- vp9_highbd_sad4x4_bits12,
- vp9_highbd_sad4x4_avg_bits12,
+ vpx_highbd_sad4x4_bits12,
+ vpx_highbd_sad4x4_avg_bits12,
vp9_highbd_12_variance4x4,
vp9_highbd_12_sub_pixel_variance4x4,
vp9_highbd_12_sub_pixel_avg_variance4x4,
- vp9_highbd_sad4x4x3_bits12,
- vp9_highbd_sad4x4x8_bits12,
- vp9_highbd_sad4x4x4d_bits12)
+ vpx_highbd_sad4x4x3_bits12,
+ vpx_highbd_sad4x4x8_bits12,
+ vpx_highbd_sad4x4x4d_bits12)
break;
default:
@@ -1799,64 +1801,64 @@
cpi->fn_ptr[BT].sdx8f = SDX8F; \
cpi->fn_ptr[BT].sdx4df = SDX4DF;
- BFP(BLOCK_32X16, vp9_sad32x16, vp9_sad32x16_avg,
+ BFP(BLOCK_32X16, vpx_sad32x16, vpx_sad32x16_avg,
vp9_variance32x16, vp9_sub_pixel_variance32x16,
- vp9_sub_pixel_avg_variance32x16, NULL, NULL, vp9_sad32x16x4d)
+ vp9_sub_pixel_avg_variance32x16, NULL, NULL, vpx_sad32x16x4d)
- BFP(BLOCK_16X32, vp9_sad16x32, vp9_sad16x32_avg,
+ BFP(BLOCK_16X32, vpx_sad16x32, vpx_sad16x32_avg,
vp9_variance16x32, vp9_sub_pixel_variance16x32,
- vp9_sub_pixel_avg_variance16x32, NULL, NULL, vp9_sad16x32x4d)
+ vp9_sub_pixel_avg_variance16x32, NULL, NULL, vpx_sad16x32x4d)
- BFP(BLOCK_64X32, vp9_sad64x32, vp9_sad64x32_avg,
+ BFP(BLOCK_64X32, vpx_sad64x32, vpx_sad64x32_avg,
vp9_variance64x32, vp9_sub_pixel_variance64x32,
- vp9_sub_pixel_avg_variance64x32, NULL, NULL, vp9_sad64x32x4d)
+ vp9_sub_pixel_avg_variance64x32, NULL, NULL, vpx_sad64x32x4d)
- BFP(BLOCK_32X64, vp9_sad32x64, vp9_sad32x64_avg,
+ BFP(BLOCK_32X64, vpx_sad32x64, vpx_sad32x64_avg,
vp9_variance32x64, vp9_sub_pixel_variance32x64,
- vp9_sub_pixel_avg_variance32x64, NULL, NULL, vp9_sad32x64x4d)
+ vp9_sub_pixel_avg_variance32x64, NULL, NULL, vpx_sad32x64x4d)
- BFP(BLOCK_32X32, vp9_sad32x32, vp9_sad32x32_avg,
+ BFP(BLOCK_32X32, vpx_sad32x32, vpx_sad32x32_avg,
vp9_variance32x32, vp9_sub_pixel_variance32x32,
- vp9_sub_pixel_avg_variance32x32, vp9_sad32x32x3, vp9_sad32x32x8,
- vp9_sad32x32x4d)
+ vp9_sub_pixel_avg_variance32x32, vpx_sad32x32x3, vpx_sad32x32x8,
+ vpx_sad32x32x4d)
- BFP(BLOCK_64X64, vp9_sad64x64, vp9_sad64x64_avg,
+ BFP(BLOCK_64X64, vpx_sad64x64, vpx_sad64x64_avg,
vp9_variance64x64, vp9_sub_pixel_variance64x64,
- vp9_sub_pixel_avg_variance64x64, vp9_sad64x64x3, vp9_sad64x64x8,
- vp9_sad64x64x4d)
+ vp9_sub_pixel_avg_variance64x64, vpx_sad64x64x3, vpx_sad64x64x8,
+ vpx_sad64x64x4d)
- BFP(BLOCK_16X16, vp9_sad16x16, vp9_sad16x16_avg,
+ BFP(BLOCK_16X16, vpx_sad16x16, vpx_sad16x16_avg,
vp9_variance16x16, vp9_sub_pixel_variance16x16,
- vp9_sub_pixel_avg_variance16x16, vp9_sad16x16x3, vp9_sad16x16x8,
- vp9_sad16x16x4d)
+ vp9_sub_pixel_avg_variance16x16, vpx_sad16x16x3, vpx_sad16x16x8,
+ vpx_sad16x16x4d)
- BFP(BLOCK_16X8, vp9_sad16x8, vp9_sad16x8_avg,
+ BFP(BLOCK_16X8, vpx_sad16x8, vpx_sad16x8_avg,
vp9_variance16x8, vp9_sub_pixel_variance16x8,
vp9_sub_pixel_avg_variance16x8,
- vp9_sad16x8x3, vp9_sad16x8x8, vp9_sad16x8x4d)
+ vpx_sad16x8x3, vpx_sad16x8x8, vpx_sad16x8x4d)
- BFP(BLOCK_8X16, vp9_sad8x16, vp9_sad8x16_avg,
+ BFP(BLOCK_8X16, vpx_sad8x16, vpx_sad8x16_avg,
vp9_variance8x16, vp9_sub_pixel_variance8x16,
vp9_sub_pixel_avg_variance8x16,
- vp9_sad8x16x3, vp9_sad8x16x8, vp9_sad8x16x4d)
+ vpx_sad8x16x3, vpx_sad8x16x8, vpx_sad8x16x4d)
- BFP(BLOCK_8X8, vp9_sad8x8, vp9_sad8x8_avg,
+ BFP(BLOCK_8X8, vpx_sad8x8, vpx_sad8x8_avg,
vp9_variance8x8, vp9_sub_pixel_variance8x8,
vp9_sub_pixel_avg_variance8x8,
- vp9_sad8x8x3, vp9_sad8x8x8, vp9_sad8x8x4d)
+ vpx_sad8x8x3, vpx_sad8x8x8, vpx_sad8x8x4d)
- BFP(BLOCK_8X4, vp9_sad8x4, vp9_sad8x4_avg,
+ BFP(BLOCK_8X4, vpx_sad8x4, vpx_sad8x4_avg,
vp9_variance8x4, vp9_sub_pixel_variance8x4,
- vp9_sub_pixel_avg_variance8x4, NULL, vp9_sad8x4x8, vp9_sad8x4x4d)
+ vp9_sub_pixel_avg_variance8x4, NULL, vpx_sad8x4x8, vpx_sad8x4x4d)
- BFP(BLOCK_4X8, vp9_sad4x8, vp9_sad4x8_avg,
+ BFP(BLOCK_4X8, vpx_sad4x8, vpx_sad4x8_avg,
vp9_variance4x8, vp9_sub_pixel_variance4x8,
- vp9_sub_pixel_avg_variance4x8, NULL, vp9_sad4x8x8, vp9_sad4x8x4d)
+ vp9_sub_pixel_avg_variance4x8, NULL, vpx_sad4x8x8, vpx_sad4x8x4d)
- BFP(BLOCK_4X4, vp9_sad4x4, vp9_sad4x4_avg,
+ BFP(BLOCK_4X4, vpx_sad4x4, vpx_sad4x4_avg,
vp9_variance4x4, vp9_sub_pixel_variance4x4,
vp9_sub_pixel_avg_variance4x4,
- vp9_sad4x4x3, vp9_sad4x4x8, vp9_sad4x4x4d)
+ vpx_sad4x4x3, vpx_sad4x4x8, vpx_sad4x4x4d)
#if CONFIG_VP9_HIGHBITDEPTH
highbd_set_var_fns(cpi);
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index 06c3885..d5eeb9c 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -10,6 +10,9 @@
#include <limits.h>
+#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
+
#include "vpx_mem/vpx_mem.h"
#include "vp9/encoder/vp9_segmentation.h"
#include "vp9/encoder/vp9_mcomp.h"
@@ -74,8 +77,8 @@
x->mv_row_min = tmp_row_min;
x->mv_row_max = tmp_row_max;
- return vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
- xd->plane[0].dst.buf, xd->plane[0].dst.stride);
+ return vpx_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
+ xd->plane[0].dst.buf, xd->plane[0].dst.stride);
}
static int do_16x16_motion_search(VP9_COMP *cpi, const MV *ref_mv,
@@ -87,7 +90,7 @@
// Try zero MV first
// FIXME should really use something like near/nearest MV and/or MV prediction
- err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
+ err = vpx_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride);
dst_mv->as_int = 0;
@@ -123,7 +126,7 @@
// Try zero MV first
// FIXME should really use something like near/nearest MV and/or MV prediction
- err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
+ err = vpx_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride);
dst_mv->as_int = 0;
@@ -146,7 +149,7 @@
x->plane[0].src.buf, x->plane[0].src.stride,
xd->plane[0].dst.buf, xd->plane[0].dst.stride,
0, 0, 0);
- err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
+ err = vpx_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
xd->plane[0].dst.buf, xd->plane[0].dst.stride);
// find best
diff --git a/vp9/encoder/vp9_sad.c b/vp9/encoder/vp9_sad.c
deleted file mode 100644
index 73134f2..0000000
--- a/vp9/encoder/vp9_sad.c
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <stdlib.h>
-
-#include "./vp9_rtcd.h"
-#include "./vpx_config.h"
-
-#include "vpx/vpx_integer.h"
-#if CONFIG_VP9_HIGHBITDEPTH
-#include "vp9/common/vp9_common.h"
-#endif
-#include "vp9/encoder/vp9_variance.h"
-
-static INLINE unsigned int sad(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- int width, int height) {
- int y, x;
- unsigned int sad = 0;
-
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x++)
- sad += abs(a[x] - b[x]);
-
- a += a_stride;
- b += b_stride;
- }
- return sad;
-}
-
-#define sadMxN(m, n) \
-unsigned int vp9_sad##m##x##n##_c(const uint8_t *src, int src_stride, \
- const uint8_t *ref, int ref_stride) { \
- return sad(src, src_stride, ref, ref_stride, m, n); \
-} \
-unsigned int vp9_sad##m##x##n##_avg_c(const uint8_t *src, int src_stride, \
- const uint8_t *ref, int ref_stride, \
- const uint8_t *second_pred) { \
- uint8_t comp_pred[m * n]; \
- vp9_comp_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \
- return sad(src, src_stride, comp_pred, m, m, n); \
-}
-
-#define sadMxNxK(m, n, k) \
-void vp9_sad##m##x##n##x##k##_c(const uint8_t *src, int src_stride, \
- const uint8_t *ref, int ref_stride, \
- unsigned int *sads) { \
- int i; \
- for (i = 0; i < k; ++i) \
- sads[i] = vp9_sad##m##x##n##_c(src, src_stride, &ref[i], ref_stride); \
-}
-
-#define sadMxNx4D(m, n) \
-void vp9_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \
- const uint8_t *const refs[], int ref_stride, \
- unsigned int *sads) { \
- int i; \
- for (i = 0; i < 4; ++i) \
- sads[i] = vp9_sad##m##x##n##_c(src, src_stride, refs[i], ref_stride); \
-}
-
-// 64x64
-sadMxN(64, 64)
-sadMxNxK(64, 64, 3)
-sadMxNxK(64, 64, 8)
-sadMxNx4D(64, 64)
-
-// 64x32
-sadMxN(64, 32)
-sadMxNx4D(64, 32)
-
-// 32x64
-sadMxN(32, 64)
-sadMxNx4D(32, 64)
-
-// 32x32
-sadMxN(32, 32)
-sadMxNxK(32, 32, 3)
-sadMxNxK(32, 32, 8)
-sadMxNx4D(32, 32)
-
-// 32x16
-sadMxN(32, 16)
-sadMxNx4D(32, 16)
-
-// 16x32
-sadMxN(16, 32)
-sadMxNx4D(16, 32)
-
-// 16x16
-sadMxN(16, 16)
-sadMxNxK(16, 16, 3)
-sadMxNxK(16, 16, 8)
-sadMxNx4D(16, 16)
-
-// 16x8
-sadMxN(16, 8)
-sadMxNxK(16, 8, 3)
-sadMxNxK(16, 8, 8)
-sadMxNx4D(16, 8)
-
-// 8x16
-sadMxN(8, 16)
-sadMxNxK(8, 16, 3)
-sadMxNxK(8, 16, 8)
-sadMxNx4D(8, 16)
-
-// 8x8
-sadMxN(8, 8)
-sadMxNxK(8, 8, 3)
-sadMxNxK(8, 8, 8)
-sadMxNx4D(8, 8)
-
-// 8x4
-sadMxN(8, 4)
-sadMxNxK(8, 4, 8)
-sadMxNx4D(8, 4)
-
-// 4x8
-sadMxN(4, 8)
-sadMxNxK(4, 8, 8)
-sadMxNx4D(4, 8)
-
-// 4x4
-sadMxN(4, 4)
-sadMxNxK(4, 4, 3)
-sadMxNxK(4, 4, 8)
-sadMxNx4D(4, 4)
-
-#if CONFIG_VP9_HIGHBITDEPTH
-static INLINE unsigned int highbd_sad(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride,
- int width, int height) {
- int y, x;
- unsigned int sad = 0;
- const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
- const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x++)
- sad += abs(a[x] - b[x]);
-
- a += a_stride;
- b += b_stride;
- }
- return sad;
-}
-
-static INLINE unsigned int highbd_sadb(const uint8_t *a8, int a_stride,
- const uint16_t *b, int b_stride,
- int width, int height) {
- int y, x;
- unsigned int sad = 0;
- const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x++)
- sad += abs(a[x] - b[x]);
-
- a += a_stride;
- b += b_stride;
- }
- return sad;
-}
-
-#define highbd_sadMxN(m, n) \
-unsigned int vp9_highbd_sad##m##x##n##_c(const uint8_t *src, int src_stride, \
- const uint8_t *ref, int ref_stride) { \
- return highbd_sad(src, src_stride, ref, ref_stride, m, n); \
-} \
-unsigned int vp9_highbd_sad##m##x##n##_avg_c(const uint8_t *src, \
- int src_stride, \
- const uint8_t *ref, \
- int ref_stride, \
- const uint8_t *second_pred) { \
- uint16_t comp_pred[m * n]; \
- vp9_highbd_comp_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \
- return highbd_sadb(src, src_stride, comp_pred, m, m, n); \
-}
-
-#define highbd_sadMxNxK(m, n, k) \
-void vp9_highbd_sad##m##x##n##x##k##_c(const uint8_t *src, int src_stride, \
- const uint8_t *ref, int ref_stride, \
- unsigned int *sads) { \
- int i; \
- for (i = 0; i < k; ++i) { \
- sads[i] = vp9_highbd_sad##m##x##n##_c(src, src_stride, &ref[i], \
- ref_stride); \
- } \
-}
-
-#define highbd_sadMxNx4D(m, n) \
-void vp9_highbd_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \
- const uint8_t *const refs[], \
- int ref_stride, unsigned int *sads) { \
- int i; \
- for (i = 0; i < 4; ++i) { \
- sads[i] = vp9_highbd_sad##m##x##n##_c(src, src_stride, refs[i], \
- ref_stride); \
- } \
-}
-
-// 64x64
-highbd_sadMxN(64, 64)
-highbd_sadMxNxK(64, 64, 3)
-highbd_sadMxNxK(64, 64, 8)
-highbd_sadMxNx4D(64, 64)
-
-// 64x32
-highbd_sadMxN(64, 32)
-highbd_sadMxNx4D(64, 32)
-
-// 32x64
-highbd_sadMxN(32, 64)
-highbd_sadMxNx4D(32, 64)
-
-// 32x32
-highbd_sadMxN(32, 32)
-highbd_sadMxNxK(32, 32, 3)
-highbd_sadMxNxK(32, 32, 8)
-highbd_sadMxNx4D(32, 32)
-
-// 32x16
-highbd_sadMxN(32, 16)
-highbd_sadMxNx4D(32, 16)
-
-// 16x32
-highbd_sadMxN(16, 32)
-highbd_sadMxNx4D(16, 32)
-
-// 16x16
-highbd_sadMxN(16, 16)
-highbd_sadMxNxK(16, 16, 3)
-highbd_sadMxNxK(16, 16, 8)
-highbd_sadMxNx4D(16, 16)
-
-// 16x8
-highbd_sadMxN(16, 8)
-highbd_sadMxNxK(16, 8, 3)
-highbd_sadMxNxK(16, 8, 8)
-highbd_sadMxNx4D(16, 8)
-
-// 8x16
-highbd_sadMxN(8, 16)
-highbd_sadMxNxK(8, 16, 3)
-highbd_sadMxNxK(8, 16, 8)
-highbd_sadMxNx4D(8, 16)
-
-// 8x8
-highbd_sadMxN(8, 8)
-highbd_sadMxNxK(8, 8, 3)
-highbd_sadMxNxK(8, 8, 8)
-highbd_sadMxNx4D(8, 8)
-
-// 8x4
-highbd_sadMxN(8, 4)
-highbd_sadMxNxK(8, 4, 8)
-highbd_sadMxNx4D(8, 4)
-
-// 4x8
-highbd_sadMxN(4, 8)
-highbd_sadMxNxK(4, 8, 8)
-highbd_sadMxNx4D(4, 8)
-
-// 4x4
-highbd_sadMxN(4, 4)
-highbd_sadMxNxK(4, 4, 3)
-highbd_sadMxNxK(4, 4, 8)
-highbd_sadMxNx4D(4, 4)
-
-#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/vp9/encoder/x86/vp9_sad_ssse3.asm b/vp9/encoder/x86/vp9_sad_ssse3.asm
deleted file mode 100644
index 0cb3542..0000000
--- a/vp9/encoder/x86/vp9_sad_ssse3.asm
+++ /dev/null
@@ -1,370 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-%macro PROCESS_16X2X3 1
-%if %1
- movdqa xmm0, XMMWORD PTR [rsi]
- lddqu xmm5, XMMWORD PTR [rdi]
- lddqu xmm6, XMMWORD PTR [rdi+1]
- lddqu xmm7, XMMWORD PTR [rdi+2]
-
- psadbw xmm5, xmm0
- psadbw xmm6, xmm0
- psadbw xmm7, xmm0
-%else
- movdqa xmm0, XMMWORD PTR [rsi]
- lddqu xmm1, XMMWORD PTR [rdi]
- lddqu xmm2, XMMWORD PTR [rdi+1]
- lddqu xmm3, XMMWORD PTR [rdi+2]
-
- psadbw xmm1, xmm0
- psadbw xmm2, xmm0
- psadbw xmm3, xmm0
-
- paddw xmm5, xmm1
- paddw xmm6, xmm2
- paddw xmm7, xmm3
-%endif
- movdqa xmm0, XMMWORD PTR [rsi+rax]
- lddqu xmm1, XMMWORD PTR [rdi+rdx]
- lddqu xmm2, XMMWORD PTR [rdi+rdx+1]
- lddqu xmm3, XMMWORD PTR [rdi+rdx+2]
-
- lea rsi, [rsi+rax*2]
- lea rdi, [rdi+rdx*2]
-
- psadbw xmm1, xmm0
- psadbw xmm2, xmm0
- psadbw xmm3, xmm0
-
- paddw xmm5, xmm1
- paddw xmm6, xmm2
- paddw xmm7, xmm3
-%endmacro
-
-%macro PROCESS_16X2X3_OFFSET 2
-%if %1
- movdqa xmm0, XMMWORD PTR [rsi]
- movdqa xmm4, XMMWORD PTR [rdi]
- movdqa xmm7, XMMWORD PTR [rdi+16]
-
- movdqa xmm5, xmm7
- palignr xmm5, xmm4, %2
-
- movdqa xmm6, xmm7
- palignr xmm6, xmm4, (%2+1)
-
- palignr xmm7, xmm4, (%2+2)
-
- psadbw xmm5, xmm0
- psadbw xmm6, xmm0
- psadbw xmm7, xmm0
-%else
- movdqa xmm0, XMMWORD PTR [rsi]
- movdqa xmm4, XMMWORD PTR [rdi]
- movdqa xmm3, XMMWORD PTR [rdi+16]
-
- movdqa xmm1, xmm3
- palignr xmm1, xmm4, %2
-
- movdqa xmm2, xmm3
- palignr xmm2, xmm4, (%2+1)
-
- palignr xmm3, xmm4, (%2+2)
-
- psadbw xmm1, xmm0
- psadbw xmm2, xmm0
- psadbw xmm3, xmm0
-
- paddw xmm5, xmm1
- paddw xmm6, xmm2
- paddw xmm7, xmm3
-%endif
- movdqa xmm0, XMMWORD PTR [rsi+rax]
- movdqa xmm4, XMMWORD PTR [rdi+rdx]
- movdqa xmm3, XMMWORD PTR [rdi+rdx+16]
-
- movdqa xmm1, xmm3
- palignr xmm1, xmm4, %2
-
- movdqa xmm2, xmm3
- palignr xmm2, xmm4, (%2+1)
-
- palignr xmm3, xmm4, (%2+2)
-
- lea rsi, [rsi+rax*2]
- lea rdi, [rdi+rdx*2]
-
- psadbw xmm1, xmm0
- psadbw xmm2, xmm0
- psadbw xmm3, xmm0
-
- paddw xmm5, xmm1
- paddw xmm6, xmm2
- paddw xmm7, xmm3
-%endmacro
-
-%macro PROCESS_16X16X3_OFFSET 2
-%2_aligned_by_%1:
-
- sub rdi, %1
-
- PROCESS_16X2X3_OFFSET 1, %1
- PROCESS_16X2X3_OFFSET 0, %1
- PROCESS_16X2X3_OFFSET 0, %1
- PROCESS_16X2X3_OFFSET 0, %1
- PROCESS_16X2X3_OFFSET 0, %1
- PROCESS_16X2X3_OFFSET 0, %1
- PROCESS_16X2X3_OFFSET 0, %1
- PROCESS_16X2X3_OFFSET 0, %1
-
- jmp %2_store_off
-
-%endmacro
-
-%macro PROCESS_16X8X3_OFFSET 2
-%2_aligned_by_%1:
-
- sub rdi, %1
-
- PROCESS_16X2X3_OFFSET 1, %1
- PROCESS_16X2X3_OFFSET 0, %1
- PROCESS_16X2X3_OFFSET 0, %1
- PROCESS_16X2X3_OFFSET 0, %1
-
- jmp %2_store_off
-
-%endmacro
-
-;void int vp9_sad16x16x3_ssse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride,
-; int *results)
-global sym(vp9_sad16x16x3_ssse3) PRIVATE
-sym(vp9_sad16x16x3_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- SAVE_XMM 7
- push rsi
- push rdi
- push rcx
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- mov rdx, 0xf
- and rdx, rdi
-
- jmp .vp9_sad16x16x3_ssse3_skiptable
-.vp9_sad16x16x3_ssse3_jumptable:
- dd .vp9_sad16x16x3_ssse3_aligned_by_0 - .vp9_sad16x16x3_ssse3_do_jump
- dd .vp9_sad16x16x3_ssse3_aligned_by_1 - .vp9_sad16x16x3_ssse3_do_jump
- dd .vp9_sad16x16x3_ssse3_aligned_by_2 - .vp9_sad16x16x3_ssse3_do_jump
- dd .vp9_sad16x16x3_ssse3_aligned_by_3 - .vp9_sad16x16x3_ssse3_do_jump
- dd .vp9_sad16x16x3_ssse3_aligned_by_4 - .vp9_sad16x16x3_ssse3_do_jump
- dd .vp9_sad16x16x3_ssse3_aligned_by_5 - .vp9_sad16x16x3_ssse3_do_jump
- dd .vp9_sad16x16x3_ssse3_aligned_by_6 - .vp9_sad16x16x3_ssse3_do_jump
- dd .vp9_sad16x16x3_ssse3_aligned_by_7 - .vp9_sad16x16x3_ssse3_do_jump
- dd .vp9_sad16x16x3_ssse3_aligned_by_8 - .vp9_sad16x16x3_ssse3_do_jump
- dd .vp9_sad16x16x3_ssse3_aligned_by_9 - .vp9_sad16x16x3_ssse3_do_jump
- dd .vp9_sad16x16x3_ssse3_aligned_by_10 - .vp9_sad16x16x3_ssse3_do_jump
- dd .vp9_sad16x16x3_ssse3_aligned_by_11 - .vp9_sad16x16x3_ssse3_do_jump
- dd .vp9_sad16x16x3_ssse3_aligned_by_12 - .vp9_sad16x16x3_ssse3_do_jump
- dd .vp9_sad16x16x3_ssse3_aligned_by_13 - .vp9_sad16x16x3_ssse3_do_jump
- dd .vp9_sad16x16x3_ssse3_aligned_by_14 - .vp9_sad16x16x3_ssse3_do_jump
- dd .vp9_sad16x16x3_ssse3_aligned_by_15 - .vp9_sad16x16x3_ssse3_do_jump
-.vp9_sad16x16x3_ssse3_skiptable:
-
- call .vp9_sad16x16x3_ssse3_do_jump
-.vp9_sad16x16x3_ssse3_do_jump:
- pop rcx ; get the address of do_jump
- mov rax, .vp9_sad16x16x3_ssse3_jumptable - .vp9_sad16x16x3_ssse3_do_jump
- add rax, rcx ; get the absolute address of vp9_sad16x16x3_ssse3_jumptable
-
- movsxd rax, dword [rax + 4*rdx] ; get the 32 bit offset from the jumptable
- add rcx, rax
-
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- jmp rcx
-
- PROCESS_16X16X3_OFFSET 0, .vp9_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 1, .vp9_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 2, .vp9_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 3, .vp9_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 4, .vp9_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 5, .vp9_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 6, .vp9_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 7, .vp9_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 8, .vp9_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 9, .vp9_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 10, .vp9_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 11, .vp9_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 12, .vp9_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 13, .vp9_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 14, .vp9_sad16x16x3_ssse3
-
-.vp9_sad16x16x3_ssse3_aligned_by_15:
- PROCESS_16X2X3 1
- PROCESS_16X2X3 0
- PROCESS_16X2X3 0
- PROCESS_16X2X3 0
- PROCESS_16X2X3 0
- PROCESS_16X2X3 0
- PROCESS_16X2X3 0
- PROCESS_16X2X3 0
-
-.vp9_sad16x16x3_ssse3_store_off:
- mov rdi, arg(4) ;Results
-
- movq xmm0, xmm5
- psrldq xmm5, 8
-
- paddw xmm0, xmm5
- movd [rdi], xmm0
-;-
- movq xmm0, xmm6
- psrldq xmm6, 8
-
- paddw xmm0, xmm6
- movd [rdi+4], xmm0
-;-
- movq xmm0, xmm7
- psrldq xmm7, 8
-
- paddw xmm0, xmm7
- movd [rdi+8], xmm0
-
- ; begin epilog
- pop rcx
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void int vp9_sad16x8x3_ssse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride,
-; int *results)
-global sym(vp9_sad16x8x3_ssse3) PRIVATE
-sym(vp9_sad16x8x3_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- SAVE_XMM 7
- push rsi
- push rdi
- push rcx
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- mov rdx, 0xf
- and rdx, rdi
-
- jmp .vp9_sad16x8x3_ssse3_skiptable
-.vp9_sad16x8x3_ssse3_jumptable:
- dd .vp9_sad16x8x3_ssse3_aligned_by_0 - .vp9_sad16x8x3_ssse3_do_jump
- dd .vp9_sad16x8x3_ssse3_aligned_by_1 - .vp9_sad16x8x3_ssse3_do_jump
- dd .vp9_sad16x8x3_ssse3_aligned_by_2 - .vp9_sad16x8x3_ssse3_do_jump
- dd .vp9_sad16x8x3_ssse3_aligned_by_3 - .vp9_sad16x8x3_ssse3_do_jump
- dd .vp9_sad16x8x3_ssse3_aligned_by_4 - .vp9_sad16x8x3_ssse3_do_jump
- dd .vp9_sad16x8x3_ssse3_aligned_by_5 - .vp9_sad16x8x3_ssse3_do_jump
- dd .vp9_sad16x8x3_ssse3_aligned_by_6 - .vp9_sad16x8x3_ssse3_do_jump
- dd .vp9_sad16x8x3_ssse3_aligned_by_7 - .vp9_sad16x8x3_ssse3_do_jump
- dd .vp9_sad16x8x3_ssse3_aligned_by_8 - .vp9_sad16x8x3_ssse3_do_jump
- dd .vp9_sad16x8x3_ssse3_aligned_by_9 - .vp9_sad16x8x3_ssse3_do_jump
- dd .vp9_sad16x8x3_ssse3_aligned_by_10 - .vp9_sad16x8x3_ssse3_do_jump
- dd .vp9_sad16x8x3_ssse3_aligned_by_11 - .vp9_sad16x8x3_ssse3_do_jump
- dd .vp9_sad16x8x3_ssse3_aligned_by_12 - .vp9_sad16x8x3_ssse3_do_jump
- dd .vp9_sad16x8x3_ssse3_aligned_by_13 - .vp9_sad16x8x3_ssse3_do_jump
- dd .vp9_sad16x8x3_ssse3_aligned_by_14 - .vp9_sad16x8x3_ssse3_do_jump
- dd .vp9_sad16x8x3_ssse3_aligned_by_15 - .vp9_sad16x8x3_ssse3_do_jump
-.vp9_sad16x8x3_ssse3_skiptable:
-
- call .vp9_sad16x8x3_ssse3_do_jump
-.vp9_sad16x8x3_ssse3_do_jump:
- pop rcx ; get the address of do_jump
- mov rax, .vp9_sad16x8x3_ssse3_jumptable - .vp9_sad16x8x3_ssse3_do_jump
- add rax, rcx ; get the absolute address of vp9_sad16x8x3_ssse3_jumptable
-
- movsxd rax, dword [rax + 4*rdx] ; get the 32 bit offset from the jumptable
- add rcx, rax
-
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- jmp rcx
-
- PROCESS_16X8X3_OFFSET 0, .vp9_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 1, .vp9_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 2, .vp9_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 3, .vp9_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 4, .vp9_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 5, .vp9_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 6, .vp9_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 7, .vp9_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 8, .vp9_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 9, .vp9_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 10, .vp9_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 11, .vp9_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 12, .vp9_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 13, .vp9_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 14, .vp9_sad16x8x3_ssse3
-
-.vp9_sad16x8x3_ssse3_aligned_by_15:
-
- PROCESS_16X2X3 1
- PROCESS_16X2X3 0
- PROCESS_16X2X3 0
- PROCESS_16X2X3 0
-
-.vp9_sad16x8x3_ssse3_store_off:
- mov rdi, arg(4) ;Results
-
- movq xmm0, xmm5
- psrldq xmm5, 8
-
- paddw xmm0, xmm5
- movd [rdi], xmm0
-;-
- movq xmm0, xmm6
- psrldq xmm6, 8
-
- paddw xmm0, xmm6
- movd [rdi+4], xmm0
-;-
- movq xmm0, xmm7
- psrldq xmm7, 8
-
- paddw xmm0, xmm7
- movd [rdi+8], xmm0
-
- ; begin epilog
- pop rcx
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index f5f9be8..7359b2d 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -69,7 +69,6 @@
VP9_CX_SRCS-yes += encoder/vp9_rd.c
VP9_CX_SRCS-yes += encoder/vp9_rdopt.c
VP9_CX_SRCS-yes += encoder/vp9_pickmode.c
-VP9_CX_SRCS-yes += encoder/vp9_sad.c
VP9_CX_SRCS-yes += encoder/vp9_segmentation.c
VP9_CX_SRCS-yes += encoder/vp9_segmentation.h
VP9_CX_SRCS-yes += encoder/vp9_speed_features.c
@@ -104,15 +103,11 @@
VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_impl_intrin_avx2.c
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_avg_intrin_sse2.c
-VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_sad4d_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
-VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/vp9_sad_sse3.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.c
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_sad4d_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_variance_impl_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_quantize_intrin_sse2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_block_error_intrin_sse2.c
@@ -121,12 +116,10 @@
ifeq ($(CONFIG_USE_X86INC),yes)
VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subtract_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_sse2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance.asm
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_sad_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_variance_sse2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_subpel_variance.asm
endif
@@ -136,9 +129,6 @@
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3_x86_64.asm
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3_x86_64.asm
endif
-VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_sad_ssse3.asm
-VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/vp9_sad_sse4.asm
-VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_sad_intrin_avx2.c
VP9_CX_SRCS-$(ARCH_X86_64) += encoder/x86/vp9_ssim_opt_x86_64.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.c
@@ -161,8 +151,6 @@
endif
VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_avg_neon.c
VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_quantize_neon.c
-VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_sad4d_neon.c
-VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_sad_neon.c
VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_subtract_neon.c
VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_variance_neon.c
diff --git a/vp9/encoder/arm/neon/vp9_sad4d_neon.c b/vpx_dsp/arm/sad4d_neon.c
similarity index 95%
rename from vp9/encoder/arm/neon/vp9_sad4d_neon.c
rename to vpx_dsp/arm/sad4d_neon.c
index cec1689..c7704dc 100644
--- a/vp9/encoder/arm/neon/vp9_sad4d_neon.c
+++ b/vpx_dsp/arm/sad4d_neon.c
@@ -9,9 +9,9 @@
*/
#include <arm_neon.h>
-#include "./vp9_rtcd.h"
-#include "./vpx_config.h"
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
#include "vpx/vpx_integer.h"
static INLINE unsigned int horizontal_long_add_16x8(const uint16x8_t vec_lo,
@@ -80,9 +80,9 @@
vget_high_u8(vec_ref_16));
}
-void vp9_sad64x64x4d_neon(const uint8_t *src, int src_stride,
+void vpx_sad64x64x4d_neon(const uint8_t *src, int src_stride,
const uint8_t* const ref[4], int ref_stride,
- unsigned int *res) {
+ uint32_t *res) {
int i;
uint16x8_t vec_sum_ref0_lo = vdupq_n_u16(0);
uint16x8_t vec_sum_ref0_hi = vdupq_n_u16(0);
@@ -126,9 +126,9 @@
res[3] = horizontal_long_add_16x8(vec_sum_ref3_lo, vec_sum_ref3_hi);
}
-void vp9_sad32x32x4d_neon(const uint8_t *src, int src_stride,
+void vpx_sad32x32x4d_neon(const uint8_t *src, int src_stride,
const uint8_t* const ref[4], int ref_stride,
- unsigned int *res) {
+ uint32_t *res) {
int i;
uint16x8_t vec_sum_ref0_lo = vdupq_n_u16(0);
uint16x8_t vec_sum_ref0_hi = vdupq_n_u16(0);
@@ -170,9 +170,9 @@
res[3] = horizontal_long_add_16x8(vec_sum_ref3_lo, vec_sum_ref3_hi);
}
-void vp9_sad16x16x4d_neon(const uint8_t *src, int src_stride,
+void vpx_sad16x16x4d_neon(const uint8_t *src, int src_stride,
const uint8_t* const ref[4], int ref_stride,
- unsigned int *res) {
+ uint32_t *res) {
int i;
uint16x8_t vec_sum_ref0_lo = vdupq_n_u16(0);
uint16x8_t vec_sum_ref0_hi = vdupq_n_u16(0);
diff --git a/vp8/common/arm/armv6/vp8_sad16x16_armv6.asm b/vpx_dsp/arm/sad_media.asm
similarity index 96%
rename from vp8/common/arm/armv6/vp8_sad16x16_armv6.asm
rename to vpx_dsp/arm/sad_media.asm
index 1b4f5cf..aed1d3a 100644
--- a/vp8/common/arm/armv6/vp8_sad16x16_armv6.asm
+++ b/vpx_dsp/arm/sad_media.asm
@@ -9,7 +9,7 @@
;
- EXPORT |vp8_sad16x16_armv6|
+ EXPORT |vpx_sad16x16_media|
ARM
REQUIRE8
@@ -21,8 +21,7 @@
; r1 int src_stride
; r2 const unsigned char *ref_ptr
; r3 int ref_stride
-; stack max_sad (not used)
-|vp8_sad16x16_armv6| PROC
+|vpx_sad16x16_media| PROC
stmfd sp!, {r4-r12, lr}
pld [r0, r1, lsl #0]
diff --git a/vp9/encoder/arm/neon/vp9_sad_neon.c b/vpx_dsp/arm/sad_neon.c
similarity index 65%
rename from vp9/encoder/arm/neon/vp9_sad_neon.c
rename to vpx_dsp/arm/sad_neon.c
index c4cd856..173f08a 100644
--- a/vp9/encoder/arm/neon/vp9_sad_neon.c
+++ b/vpx_dsp/arm/sad_neon.c
@@ -9,11 +9,113 @@
*/
#include <arm_neon.h>
-#include "./vp9_rtcd.h"
+
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
+unsigned int vpx_sad8x16_neon(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride) {
+ uint8x8_t d0, d8;
+ uint16x8_t q12;
+ uint32x4_t q1;
+ uint64x2_t q3;
+ uint32x2_t d5;
+ int i;
+
+ d0 = vld1_u8(src_ptr);
+ src_ptr += src_stride;
+ d8 = vld1_u8(ref_ptr);
+ ref_ptr += ref_stride;
+ q12 = vabdl_u8(d0, d8);
+
+ for (i = 0; i < 15; i++) {
+ d0 = vld1_u8(src_ptr);
+ src_ptr += src_stride;
+ d8 = vld1_u8(ref_ptr);
+ ref_ptr += ref_stride;
+ q12 = vabal_u8(q12, d0, d8);
+ }
+
+ q1 = vpaddlq_u16(q12);
+ q3 = vpaddlq_u32(q1);
+ d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
+ vreinterpret_u32_u64(vget_high_u64(q3)));
+
+ return vget_lane_u32(d5, 0);
+}
+
+unsigned int vpx_sad4x4_neon(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride) {
+ uint8x8_t d0, d8;
+ uint16x8_t q12;
+ uint32x2_t d1;
+ uint64x1_t d3;
+ int i;
+
+ d0 = vld1_u8(src_ptr);
+ src_ptr += src_stride;
+ d8 = vld1_u8(ref_ptr);
+ ref_ptr += ref_stride;
+ q12 = vabdl_u8(d0, d8);
+
+ for (i = 0; i < 3; i++) {
+ d0 = vld1_u8(src_ptr);
+ src_ptr += src_stride;
+ d8 = vld1_u8(ref_ptr);
+ ref_ptr += ref_stride;
+ q12 = vabal_u8(q12, d0, d8);
+ }
+
+ d1 = vpaddl_u16(vget_low_u16(q12));
+ d3 = vpaddl_u32(d1);
+
+ return vget_lane_u32(vreinterpret_u32_u64(d3), 0);
+}
+
+unsigned int vpx_sad16x8_neon(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride) {
+ uint8x16_t q0, q4;
+ uint16x8_t q12, q13;
+ uint32x4_t q1;
+ uint64x2_t q3;
+ uint32x2_t d5;
+ int i;
+
+ q0 = vld1q_u8(src_ptr);
+ src_ptr += src_stride;
+ q4 = vld1q_u8(ref_ptr);
+ ref_ptr += ref_stride;
+ q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4));
+ q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4));
+
+ for (i = 0; i < 7; i++) {
+ q0 = vld1q_u8(src_ptr);
+ src_ptr += src_stride;
+ q4 = vld1q_u8(ref_ptr);
+ ref_ptr += ref_stride;
+ q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4));
+ q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4));
+ }
+
+ q12 = vaddq_u16(q12, q13);
+ q1 = vpaddlq_u16(q12);
+ q3 = vpaddlq_u32(q1);
+ d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
+ vreinterpret_u32_u64(vget_high_u64(q3)));
+
+ return vget_lane_u32(d5, 0);
+}
+
static INLINE unsigned int horizontal_long_add_16x8(const uint16x8_t vec_lo,
const uint16x8_t vec_hi) {
const uint32x4_t vec_l_lo = vaddl_u16(vget_low_u16(vec_lo),
@@ -34,7 +136,7 @@
return vget_lane_u32(c, 0);
}
-unsigned int vp9_sad64x64_neon(const uint8_t *src, int src_stride,
+unsigned int vpx_sad64x64_neon(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride) {
int i;
uint16x8_t vec_accum_lo = vdupq_n_u16(0);
@@ -70,7 +172,7 @@
return horizontal_long_add_16x8(vec_accum_lo, vec_accum_hi);
}
-unsigned int vp9_sad32x32_neon(const uint8_t *src, int src_stride,
+unsigned int vpx_sad32x32_neon(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride) {
int i;
uint16x8_t vec_accum_lo = vdupq_n_u16(0);
@@ -95,7 +197,7 @@
return horizontal_add_16x8(vaddq_u16(vec_accum_lo, vec_accum_hi));
}
-unsigned int vp9_sad16x16_neon(const uint8_t *src, int src_stride,
+unsigned int vpx_sad16x16_neon(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride) {
int i;
uint16x8_t vec_accum_lo = vdupq_n_u16(0);
@@ -114,7 +216,7 @@
return horizontal_add_16x8(vaddq_u16(vec_accum_lo, vec_accum_hi));
}
-unsigned int vp9_sad8x8_neon(const uint8_t *src, int src_stride,
+unsigned int vpx_sad8x8_neon(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride) {
int i;
uint16x8_t vec_accum = vdupq_n_u16(0);
diff --git a/vpx_dsp/sad.c b/vpx_dsp/sad.c
new file mode 100644
index 0000000..9db312f
--- /dev/null
+++ b/vpx_dsp/sad.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+
+#include "vpx/vpx_integer.h"
+
+#if CONFIG_VP9_HIGHBITDEPTH
+#include "vp9/common/vp9_common.h"
+#endif // CONFIG_VP9_HIGHBITDEPTH
+// Temporary ...
+#define ROUND_POWER_OF_TWO(value, n) \
+ (((value) + (1 << ((n) - 1))) >> (n))
+
+/* Sum the difference between every corresponding element of the buffers. */
+static INLINE unsigned int sad(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ int width, int height) {
+ int y, x;
+ unsigned int sad = 0;
+
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++)
+ sad += abs(a[x] - b[x]);
+
+ a += a_stride;
+ b += b_stride;
+ }
+ return sad;
+}
+
+/* Remove dependency on vp9 variance function by duplicating vp9_comp_avg_pred.
+ * The function averages every corresponding element of the buffers and stores
+ * the value in a third buffer, comp_pred.
+ * pred and comp_pred are assumed to have stride = width
+ * In the usage below comp_pred is a local array.
+ */
+static INLINE void avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
+ int height, const uint8_t *ref, int ref_stride) {
+ int i, j;
+
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++) {
+ const int tmp = pred[j] + ref[j];
+ comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
+ }
+ comp_pred += width;
+ pred += width;
+ ref += ref_stride;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE void highbd_avg_pred(uint16_t *comp_pred, const uint8_t *pred8,
+ int width, int height, const uint8_t *ref8,
+ int ref_stride) {
+ int i, j;
+ uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++) {
+ const int tmp = pred[j] + ref[j];
+ comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
+ }
+ comp_pred += width;
+ pred += width;
+ ref += ref_stride;
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#define sadMxN(m, n) \
+unsigned int vpx_sad##m##x##n##_c(const uint8_t *src, int src_stride, \
+ const uint8_t *ref, int ref_stride) { \
+ return sad(src, src_stride, ref, ref_stride, m, n); \
+} \
+unsigned int vpx_sad##m##x##n##_avg_c(const uint8_t *src, int src_stride, \
+ const uint8_t *ref, int ref_stride, \
+ const uint8_t *second_pred) { \
+ uint8_t comp_pred[m * n]; \
+ avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \
+ return sad(src, src_stride, comp_pred, m, m, n); \
+}
+
+// depending on call sites, pass **ref_array to avoid & in subsequent call and
+// de-dup with 4D below.
+#define sadMxNxK(m, n, k) \
+void vpx_sad##m##x##n##x##k##_c(const uint8_t *src, int src_stride, \
+ const uint8_t *ref_array, int ref_stride, \
+ uint32_t *sad_array) { \
+ int i; \
+ for (i = 0; i < k; ++i) \
+ sad_array[i] = vpx_sad##m##x##n##_c(src, src_stride, &ref_array[i], ref_stride); \
+}
+
+// This appears to be equivalent to the above when k == 4 and refs is const
+#define sadMxNx4D(m, n) \
+void vpx_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \
+ const uint8_t *const ref_array[], int ref_stride, \
+ uint32_t *sad_array) { \
+ int i; \
+ for (i = 0; i < 4; ++i) \
+ sad_array[i] = vpx_sad##m##x##n##_c(src, src_stride, ref_array[i], ref_stride); \
+}
+
+// 64x64
+sadMxN(64, 64)
+sadMxNxK(64, 64, 3)
+sadMxNxK(64, 64, 8)
+sadMxNx4D(64, 64)
+
+// 64x32
+sadMxN(64, 32)
+sadMxNx4D(64, 32)
+
+// 32x64
+sadMxN(32, 64)
+sadMxNx4D(32, 64)
+
+// 32x32
+sadMxN(32, 32)
+sadMxNxK(32, 32, 3)
+sadMxNxK(32, 32, 8)
+sadMxNx4D(32, 32)
+
+// 32x16
+sadMxN(32, 16)
+sadMxNx4D(32, 16)
+
+// 16x32
+sadMxN(16, 32)
+sadMxNx4D(16, 32)
+
+// 16x16
+sadMxN(16, 16)
+sadMxNxK(16, 16, 3)
+sadMxNxK(16, 16, 8)
+sadMxNx4D(16, 16)
+
+// 16x8
+sadMxN(16, 8)
+sadMxNxK(16, 8, 3)
+sadMxNxK(16, 8, 8)
+sadMxNx4D(16, 8)
+
+// 8x16
+sadMxN(8, 16)
+sadMxNxK(8, 16, 3)
+sadMxNxK(8, 16, 8)
+sadMxNx4D(8, 16)
+
+// 8x8
+sadMxN(8, 8)
+sadMxNxK(8, 8, 3)
+sadMxNxK(8, 8, 8)
+sadMxNx4D(8, 8)
+
+// 8x4
+sadMxN(8, 4)
+sadMxNxK(8, 4, 8)
+sadMxNx4D(8, 4)
+
+// 4x8
+sadMxN(4, 8)
+sadMxNxK(4, 8, 8)
+sadMxNx4D(4, 8)
+
+// 4x4
+sadMxN(4, 4)
+sadMxNxK(4, 4, 3)
+sadMxNxK(4, 4, 8)
+sadMxNx4D(4, 4)
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE unsigned int highbd_sad(const uint8_t *a8, int a_stride,
+ const uint8_t *b8, int b_stride,
+ int width, int height) {
+ int y, x;
+ unsigned int sad = 0;
+ const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
+ const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++)
+ sad += abs(a[x] - b[x]);
+
+ a += a_stride;
+ b += b_stride;
+ }
+ return sad;
+}
+
+static INLINE unsigned int highbd_sadb(const uint8_t *a8, int a_stride,
+ const uint16_t *b, int b_stride,
+ int width, int height) {
+ int y, x;
+ unsigned int sad = 0;
+ const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++)
+ sad += abs(a[x] - b[x]);
+
+ a += a_stride;
+ b += b_stride;
+ }
+ return sad;
+}
+
+#define highbd_sadMxN(m, n) \
+unsigned int vpx_highbd_sad##m##x##n##_c(const uint8_t *src, int src_stride, \
+ const uint8_t *ref, int ref_stride) { \
+ return highbd_sad(src, src_stride, ref, ref_stride, m, n); \
+} \
+unsigned int vpx_highbd_sad##m##x##n##_avg_c(const uint8_t *src, \
+ int src_stride, \
+ const uint8_t *ref, \
+ int ref_stride, \
+ const uint8_t *second_pred) { \
+ uint16_t comp_pred[m * n]; \
+ highbd_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \
+ return highbd_sadb(src, src_stride, comp_pred, m, m, n); \
+}
+
+#define highbd_sadMxNxK(m, n, k) \
+void vpx_highbd_sad##m##x##n##x##k##_c(const uint8_t *src, int src_stride, \
+ const uint8_t *ref_array, int ref_stride, \
+ uint32_t *sad_array) { \
+ int i; \
+ for (i = 0; i < k; ++i) { \
+ sad_array[i] = vpx_highbd_sad##m##x##n##_c(src, src_stride, &ref_array[i], \
+ ref_stride); \
+ } \
+}
+
+#define highbd_sadMxNx4D(m, n) \
+void vpx_highbd_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \
+ const uint8_t *const ref_array[], \
+ int ref_stride, uint32_t *sad_array) { \
+ int i; \
+ for (i = 0; i < 4; ++i) { \
+ sad_array[i] = vpx_highbd_sad##m##x##n##_c(src, src_stride, ref_array[i], \
+ ref_stride); \
+ } \
+}
+
+// 64x64
+highbd_sadMxN(64, 64)
+highbd_sadMxNxK(64, 64, 3)
+highbd_sadMxNxK(64, 64, 8)
+highbd_sadMxNx4D(64, 64)
+
+// 64x32
+highbd_sadMxN(64, 32)
+highbd_sadMxNx4D(64, 32)
+
+// 32x64
+highbd_sadMxN(32, 64)
+highbd_sadMxNx4D(32, 64)
+
+// 32x32
+highbd_sadMxN(32, 32)
+highbd_sadMxNxK(32, 32, 3)
+highbd_sadMxNxK(32, 32, 8)
+highbd_sadMxNx4D(32, 32)
+
+// 32x16
+highbd_sadMxN(32, 16)
+highbd_sadMxNx4D(32, 16)
+
+// 16x32
+highbd_sadMxN(16, 32)
+highbd_sadMxNx4D(16, 32)
+
+// 16x16
+highbd_sadMxN(16, 16)
+highbd_sadMxNxK(16, 16, 3)
+highbd_sadMxNxK(16, 16, 8)
+highbd_sadMxNx4D(16, 16)
+
+// 16x8
+highbd_sadMxN(16, 8)
+highbd_sadMxNxK(16, 8, 3)
+highbd_sadMxNxK(16, 8, 8)
+highbd_sadMxNx4D(16, 8)
+
+// 8x16
+highbd_sadMxN(8, 16)
+highbd_sadMxNxK(8, 16, 3)
+highbd_sadMxNxK(8, 16, 8)
+highbd_sadMxNx4D(8, 16)
+
+// 8x8
+highbd_sadMxN(8, 8)
+highbd_sadMxNxK(8, 8, 3)
+highbd_sadMxNxK(8, 8, 8)
+highbd_sadMxNx4D(8, 8)
+
+// 8x4
+highbd_sadMxN(8, 4)
+highbd_sadMxNxK(8, 4, 8)
+highbd_sadMxNx4D(8, 4)
+
+// 4x8
+highbd_sadMxN(4, 8)
+highbd_sadMxNxK(4, 8, 8)
+highbd_sadMxNx4D(4, 8)
+
+// 4x4
+highbd_sadMxN(4, 4)
+highbd_sadMxNxK(4, 4, 3)
+highbd_sadMxNxK(4, 4, 8)
+highbd_sadMxNx4D(4, 4)
+
+#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk
new file mode 100644
index 0000000..606515d
--- /dev/null
+++ b/vpx_dsp/vpx_dsp.mk
@@ -0,0 +1,40 @@
+##
+## Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+
+DSP_SRCS-yes += vpx_dsp.mk
+
+ifeq ($(CONFIG_ENCODERS),yes)
+DSP_SRCS-yes += sad.c
+
+DSP_SRCS-$(HAVE_MEDIA) += arm/sad_media$(ASM)
+DSP_SRCS-$(HAVE_NEON) += arm/sad4d_neon.c
+DSP_SRCS-$(HAVE_NEON) += arm/sad_neon.c
+
+DSP_SRCS-$(HAVE_MMX) += x86/sad_mmx.asm
+DSP_SRCS-$(HAVE_SSE2) += x86/sad4d_sse2.asm
+DSP_SRCS-$(HAVE_SSE2) += x86/sad_sse2.asm
+DSP_SRCS-$(HAVE_SSE3) += x86/sad_sse3.asm
+DSP_SRCS-$(HAVE_SSSE3) += x86/sad_ssse3.asm
+DSP_SRCS-$(HAVE_SSE4_1) += x86/sad_sse4.asm
+DSP_SRCS-$(HAVE_AVX2) += x86/sad4d_avx2.c
+DSP_SRCS-$(HAVE_AVX2) += x86/sad_avx2.c
+
+ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+DSP_SRCS-$(HAVE_SSE2) += x86/highbd_sad4d_sse2.asm
+DSP_SRCS-$(HAVE_SSE2) += x86/highbd_sad_sse2.asm
+endif # CONFIG_VP9_HIGHBITDEPTH
+endif # CONFIG_ENCODERS
+
+DSP_SRCS-no += $(DSP_SRCS_REMOVE-yes)
+
+DSP_SRCS-yes += vpx_dsp_rtcd.c
+DSP_SRCS-yes += vpx_dsp_rtcd_defs.pl
+
+$(eval $(call rtcd_h_template,vpx_dsp_rtcd,vpx_dsp/vpx_dsp_rtcd_defs.pl))
diff --git a/vpx_dsp/vpx_dsp_rtcd.c b/vpx_dsp/vpx_dsp_rtcd.c
new file mode 100644
index 0000000..5fe27b6
--- /dev/null
+++ b/vpx_dsp/vpx_dsp_rtcd.c
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "./vpx_config.h"
+#define RTCD_C
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_ports/vpx_once.h"
+
+void vpx_dsp_rtcd() {
+ once(setup_rtcd_internal);
+}
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
new file mode 100644
index 0000000..ebec9ec
--- /dev/null
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -0,0 +1,395 @@
+sub vpx_dsp_forward_decls() {
+print <<EOF
+/*
+ * DSP
+ */
+
+#include "vpx/vpx_integer.h"
+
+EOF
+}
+forward_decls qw/vpx_dsp_forward_decls/;
+
+# Functions which use x86inc.asm instead of x86_abi_support.asm
+if (vpx_config("CONFIG_USE_X86INC") eq "yes") {
+ $mmx_x86inc = 'mmx';
+ $sse_x86inc = 'sse';
+ $sse2_x86inc = 'sse2';
+ $ssse3_x86inc = 'ssse3';
+ $avx_x86inc = 'avx';
+ $avx2_x86inc = 'avx2';
+} else {
+ $mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc =
+ $avx_x86inc = $avx2_x86inc = '';
+}
+
+# Functions which are 64 bit only.
+if ($opts{arch} eq "x86_64") {
+ $mmx_x86_64 = 'mmx';
+ $sse2_x86_64 = 'sse2';
+ $ssse3_x86_64 = 'ssse3';
+ $avx_x86_64 = 'avx';
+ $avx2_x86_64 = 'avx2';
+} else {
+ $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 =
+ $avx_x86_64 = $avx2_x86_64 = '';
+}
+
+if (vpx_config("CONFIG_ENCODERS") eq "yes") {
+#
+# Single block SAD
+#
+add_proto qw/unsigned int vpx_sad64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+specialize qw/vpx_sad64x64 avx2 neon/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vpx_sad64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+specialize qw/vpx_sad64x32 avx2/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vpx_sad32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+specialize qw/vpx_sad32x64 avx2/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vpx_sad32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+specialize qw/vpx_sad32x32 avx2 neon/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vpx_sad32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+specialize qw/vpx_sad32x16 avx2/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vpx_sad16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+specialize qw/vpx_sad16x32/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vpx_sad16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+specialize qw/vpx_sad16x16 mmx media neon/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vpx_sad16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+specialize qw/vpx_sad16x8 mmx neon/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vpx_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+specialize qw/vpx_sad8x16 mmx neon/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vpx_sad8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+specialize qw/vpx_sad8x8 mmx neon/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vpx_sad8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+specialize qw/vpx_sad8x4/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vpx_sad4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+specialize qw/vpx_sad4x8/, "$sse_x86inc";
+
+add_proto qw/unsigned int vpx_sad4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+specialize qw/vpx_sad4x4 mmx neon/, "$sse_x86inc";
+
+#
+# Avg
+#
+add_proto qw/unsigned int vpx_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+specialize qw/vpx_sad64x64_avg avx2/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vpx_sad64x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+specialize qw/vpx_sad64x32_avg avx2/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vpx_sad32x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+specialize qw/vpx_sad32x64_avg avx2/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vpx_sad32x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+specialize qw/vpx_sad32x32_avg avx2/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vpx_sad32x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+specialize qw/vpx_sad32x16_avg avx2/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vpx_sad16x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+specialize qw/vpx_sad16x32_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vpx_sad16x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+specialize qw/vpx_sad16x16_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vpx_sad16x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+specialize qw/vpx_sad16x8_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vpx_sad8x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+specialize qw/vpx_sad8x16_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vpx_sad8x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+specialize qw/vpx_sad8x8_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vpx_sad8x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+specialize qw/vpx_sad8x4_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vpx_sad4x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+specialize qw/vpx_sad4x8_avg/, "$sse_x86inc";
+
+add_proto qw/unsigned int vpx_sad4x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+specialize qw/vpx_sad4x4_avg/, "$sse_x86inc";
+
+#
+# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
+#
+# Blocks of 3
+add_proto qw/void vpx_sad64x64x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+
+add_proto qw/void vpx_sad32x32x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+
+add_proto qw/void vpx_sad16x16x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+specialize qw/vpx_sad16x16x3 sse3 ssse3/;
+
+add_proto qw/void vpx_sad16x8x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+specialize qw/vpx_sad16x8x3 sse3 ssse3/;
+
+add_proto qw/void vpx_sad8x16x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+specialize qw/vpx_sad8x16x3 sse3/;
+
+add_proto qw/void vpx_sad8x8x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+specialize qw/vpx_sad8x8x3 sse3/;
+
+add_proto qw/void vpx_sad4x4x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+specialize qw/vpx_sad4x4x3 sse3/;
+
+# Blocks of 8
+add_proto qw/void vpx_sad64x64x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+
+add_proto qw/void vpx_sad32x32x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+
+add_proto qw/void vpx_sad16x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+specialize qw/vpx_sad16x16x8 sse4_1/;
+
+add_proto qw/void vpx_sad16x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+specialize qw/vpx_sad16x8x8 sse4_1/;
+
+add_proto qw/void vpx_sad8x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+specialize qw/vpx_sad8x16x8 sse4_1/;
+
+add_proto qw/void vpx_sad8x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+specialize qw/vpx_sad8x8x8 sse4_1/;
+
+add_proto qw/void vpx_sad8x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+
+add_proto qw/void vpx_sad4x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+
+add_proto qw/void vpx_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+specialize qw/vpx_sad4x4x8 sse4_1/;
+
+#
+# Multi-block SAD, comparing a reference to N independent blocks
+#
+add_proto qw/void vpx_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
+specialize qw/vpx_sad64x64x4d avx2 neon/, "$sse2_x86inc";
+
+add_proto qw/void vpx_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
+specialize qw/vpx_sad64x32x4d/, "$sse2_x86inc";
+
+add_proto qw/void vpx_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
+specialize qw/vpx_sad32x64x4d/, "$sse2_x86inc";
+
+add_proto qw/void vpx_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
+specialize qw/vpx_sad32x32x4d avx2 neon/, "$sse2_x86inc";
+
+add_proto qw/void vpx_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
+specialize qw/vpx_sad32x16x4d/, "$sse2_x86inc";
+
+add_proto qw/void vpx_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
+specialize qw/vpx_sad16x32x4d/, "$sse2_x86inc";
+
+add_proto qw/void vpx_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
+specialize qw/vpx_sad16x16x4d neon/, "$sse2_x86inc";
+
+add_proto qw/void vpx_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
+specialize qw/vpx_sad16x8x4d/, "$sse2_x86inc";
+
+add_proto qw/void vpx_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
+specialize qw/vpx_sad8x16x4d/, "$sse2_x86inc";
+
+add_proto qw/void vpx_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
+specialize qw/vpx_sad8x8x4d/, "$sse2_x86inc";
+
+add_proto qw/void vpx_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
+specialize qw/vpx_sad8x4x4d/, "$sse2_x86inc";
+
+add_proto qw/void vpx_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
+specialize qw/vpx_sad4x8x4d/, "$sse_x86inc";
+
+add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
+specialize qw/vpx_sad4x4x4d/, "$sse_x86inc";
+
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ #
+ # Single block SAD
+ #
+ add_proto qw/unsigned int vpx_highbd_sad64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ specialize qw/vpx_highbd_sad64x64/, "$sse2_x86inc";
+
+ add_proto qw/unsigned int vpx_highbd_sad64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ specialize qw/vpx_highbd_sad64x32/, "$sse2_x86inc";
+
+ add_proto qw/unsigned int vpx_highbd_sad32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ specialize qw/vpx_highbd_sad32x64/, "$sse2_x86inc";
+
+ add_proto qw/unsigned int vpx_highbd_sad32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ specialize qw/vpx_highbd_sad32x32/, "$sse2_x86inc";
+
+ add_proto qw/unsigned int vpx_highbd_sad32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ specialize qw/vpx_highbd_sad32x16/, "$sse2_x86inc";
+
+ add_proto qw/unsigned int vpx_highbd_sad16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ specialize qw/vpx_highbd_sad16x32/, "$sse2_x86inc";
+
+ add_proto qw/unsigned int vpx_highbd_sad16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ specialize qw/vpx_highbd_sad16x16/, "$sse2_x86inc";
+
+ add_proto qw/unsigned int vpx_highbd_sad16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ specialize qw/vpx_highbd_sad16x8/, "$sse2_x86inc";
+
+ add_proto qw/unsigned int vpx_highbd_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ specialize qw/vpx_highbd_sad8x16/, "$sse2_x86inc";
+
+ add_proto qw/unsigned int vpx_highbd_sad8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ specialize qw/vpx_highbd_sad8x8/, "$sse2_x86inc";
+
+ add_proto qw/unsigned int vpx_highbd_sad8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ specialize qw/vpx_highbd_sad8x4/, "$sse2_x86inc";
+
+ add_proto qw/unsigned int vpx_highbd_sad4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ specialize qw/vpx_highbd_sad4x8/;
+
+ add_proto qw/unsigned int vpx_highbd_sad4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ specialize qw/vpx_highbd_sad4x4/;
+
+ #
+ # Avg
+ #
+ add_proto qw/unsigned int vpx_highbd_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+ specialize qw/vpx_highbd_sad64x64_avg/, "$sse2_x86inc";
+
+ add_proto qw/unsigned int vpx_highbd_sad64x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+ specialize qw/vpx_highbd_sad64x32_avg/, "$sse2_x86inc";
+
+ add_proto qw/unsigned int vpx_highbd_sad32x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+ specialize qw/vpx_highbd_sad32x64_avg/, "$sse2_x86inc";
+
+ add_proto qw/unsigned int vpx_highbd_sad32x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+ specialize qw/vpx_highbd_sad32x32_avg/, "$sse2_x86inc";
+
+ add_proto qw/unsigned int vpx_highbd_sad32x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+ specialize qw/vpx_highbd_sad32x16_avg/, "$sse2_x86inc";
+
+ add_proto qw/unsigned int vpx_highbd_sad16x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+ specialize qw/vpx_highbd_sad16x32_avg/, "$sse2_x86inc";
+
+ add_proto qw/unsigned int vpx_highbd_sad16x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+ specialize qw/vpx_highbd_sad16x16_avg/, "$sse2_x86inc";
+
+ add_proto qw/unsigned int vpx_highbd_sad16x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+ specialize qw/vpx_highbd_sad16x8_avg/, "$sse2_x86inc";
+
+ add_proto qw/unsigned int vpx_highbd_sad8x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+ specialize qw/vpx_highbd_sad8x16_avg/, "$sse2_x86inc";
+
+ add_proto qw/unsigned int vpx_highbd_sad8x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+ specialize qw/vpx_highbd_sad8x8_avg/, "$sse2_x86inc";
+
+ add_proto qw/unsigned int vpx_highbd_sad8x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+ specialize qw/vpx_highbd_sad8x4_avg/, "$sse2_x86inc";
+
+ add_proto qw/unsigned int vpx_highbd_sad4x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+ specialize qw/vpx_highbd_sad4x8_avg/;
+
+ add_proto qw/unsigned int vpx_highbd_sad4x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+ specialize qw/vpx_highbd_sad4x4_avg/;
+
+ #
+ # Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
+ #
+ # Blocks of 3
+ add_proto qw/void vpx_highbd_sad64x64x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad64x64x3/;
+
+ add_proto qw/void vpx_highbd_sad32x32x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad32x32x3/;
+
+ add_proto qw/void vpx_highbd_sad16x16x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad16x16x3/;
+
+ add_proto qw/void vpx_highbd_sad16x8x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad16x8x3/;
+
+ add_proto qw/void vpx_highbd_sad8x16x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad8x16x3/;
+
+ add_proto qw/void vpx_highbd_sad8x8x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad8x8x3/;
+
+ add_proto qw/void vpx_highbd_sad4x4x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad4x4x3/;
+
+ # Blocks of 8
+ add_proto qw/void vpx_highbd_sad64x64x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad64x64x8/;
+
+ add_proto qw/void vpx_highbd_sad32x32x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad32x32x8/;
+
+ add_proto qw/void vpx_highbd_sad16x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad16x16x8/;
+
+ add_proto qw/void vpx_highbd_sad16x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad16x8x8/;
+
+ add_proto qw/void vpx_highbd_sad8x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad8x16x8/;
+
+ add_proto qw/void vpx_highbd_sad8x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad8x8x8/;
+
+ add_proto qw/void vpx_highbd_sad8x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad8x4x8/;
+
+ add_proto qw/void vpx_highbd_sad4x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad4x8x8/;
+
+ add_proto qw/void vpx_highbd_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad4x4x8/;
+
+ #
+ # Multi-block SAD, comparing a reference to N independent blocks
+ #
+ add_proto qw/void vpx_highbd_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad64x64x4d/, "$sse2_x86inc";
+
+ add_proto qw/void vpx_highbd_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad64x32x4d/, "$sse2_x86inc";
+
+ add_proto qw/void vpx_highbd_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad32x64x4d/, "$sse2_x86inc";
+
+ add_proto qw/void vpx_highbd_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad32x32x4d/, "$sse2_x86inc";
+
+ add_proto qw/void vpx_highbd_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad32x16x4d/, "$sse2_x86inc";
+
+ add_proto qw/void vpx_highbd_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad16x32x4d/, "$sse2_x86inc";
+
+ add_proto qw/void vpx_highbd_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad16x16x4d/, "$sse2_x86inc";
+
+ add_proto qw/void vpx_highbd_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad16x8x4d/, "$sse2_x86inc";
+
+ add_proto qw/void vpx_highbd_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad8x16x4d/, "$sse2_x86inc";
+
+ add_proto qw/void vpx_highbd_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad8x8x4d/, "$sse2_x86inc";
+
+ add_proto qw/void vpx_highbd_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad8x4x4d/, "$sse2_x86inc";
+
+ add_proto qw/void vpx_highbd_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad4x8x4d/, "$sse2_x86inc";
+
+ add_proto qw/void vpx_highbd_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
+ specialize qw/vpx_highbd_sad4x4x4d/, "$sse2_x86inc";
+
+} # CONFIG_VP9_HIGHBITDEPTH
+} # CONFIG_ENCODERS
+
+1;
diff --git a/vp9/encoder/x86/vp9_highbd_sad4d_sse2.asm b/vpx_dsp/x86/highbd_sad4d_sse2.asm
similarity index 98%
rename from vp9/encoder/x86/vp9_highbd_sad4d_sse2.asm
rename to vpx_dsp/x86/highbd_sad4d_sse2.asm
index f79a59f..95cc437 100644
--- a/vp9/encoder/x86/vp9_highbd_sad4d_sse2.asm
+++ b/vpx_dsp/x86/highbd_sad4d_sse2.asm
@@ -8,6 +8,8 @@
; be found in the AUTHORS file in the root of the source tree.
;
+%define program_name vpx
+
%include "third_party/x86inc/x86inc.asm"
SECTION .text
@@ -209,9 +211,9 @@
HIGH_PROCESS_32x2x4 0, %4, %5, (%4 + 32), (%5 + 32), %6
%endmacro
-; void vp9_highbd_sadNxNx4d_sse2(uint8_t *src, int src_stride,
+; void vpx_highbd_sadNxNx4d_sse2(uint8_t *src, int src_stride,
; uint8_t *ref[4], int ref_stride,
-; unsigned int res[4]);
+; uint32_t res[4]);
; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8
%macro HIGH_SADNXN4D 2
%if UNIX64
diff --git a/vp9/encoder/x86/vp9_highbd_sad_sse2.asm b/vpx_dsp/x86/highbd_sad_sse2.asm
similarity index 97%
rename from vp9/encoder/x86/vp9_highbd_sad_sse2.asm
rename to vpx_dsp/x86/highbd_sad_sse2.asm
index c895ac0..4d422dd 100644
--- a/vp9/encoder/x86/vp9_highbd_sad_sse2.asm
+++ b/vpx_dsp/x86/highbd_sad_sse2.asm
@@ -8,6 +8,8 @@
; be found in the AUTHORS file in the root of the source tree.
;
+%define program_name vpx
+
%include "third_party/x86inc/x86inc.asm"
SECTION .text
@@ -50,7 +52,7 @@
%endif
%endmacro
-; unsigned int vp9_highbd_sad64x{16,32,64}_sse2(uint8_t *src, int src_stride,
+; unsigned int vpx_highbd_sad64x{16,32,64}_sse2(uint8_t *src, int src_stride,
; uint8_t *ref, int ref_stride);
%macro HIGH_SAD64XN 1-2 0
HIGH_SAD_FN 64, %1, 5, %2
@@ -157,7 +159,7 @@
HIGH_SAD64XN 32, 1 ; highbd_sad64x32_avg_sse2
-; unsigned int vp9_highbd_sad32x{16,32,64}_sse2(uint8_t *src, int src_stride,
+; unsigned int vpx_highbd_sad32x{16,32,64}_sse2(uint8_t *src, int src_stride,
; uint8_t *ref, int ref_stride);
%macro HIGH_SAD32XN 1-2 0
HIGH_SAD_FN 32, %1, 5, %2
@@ -225,7 +227,7 @@
HIGH_SAD32XN 32, 1 ; highbd_sad32x32_avg_sse2
HIGH_SAD32XN 16, 1 ; highbd_sad32x16_avg_sse2
-; unsigned int vp9_highbd_sad16x{8,16,32}_sse2(uint8_t *src, int src_stride,
+; unsigned int vpx_highbd_sad16x{8,16,32}_sse2(uint8_t *src, int src_stride,
; uint8_t *ref, int ref_stride);
%macro HIGH_SAD16XN 1-2 0
HIGH_SAD_FN 16, %1, 5, %2
@@ -294,7 +296,7 @@
HIGH_SAD16XN 8, 1 ; highbd_sad16x8_avg_sse2
-; unsigned int vp9_highbd_sad8x{4,8,16}_sse2(uint8_t *src, int src_stride,
+; unsigned int vpx_highbd_sad8x{4,8,16}_sse2(uint8_t *src, int src_stride,
; uint8_t *ref, int ref_stride);
%macro HIGH_SAD8XN 1-2 0
HIGH_SAD_FN 8, %1, 7, %2
diff --git a/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c b/vpx_dsp/x86/sad4d_avx2.c
similarity index 96%
rename from vp9/encoder/x86/vp9_sad4d_intrin_avx2.c
rename to vpx_dsp/x86/sad4d_avx2.c
index 1feed62..4128f2a 100644
--- a/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c
+++ b/vpx_dsp/x86/sad4d_avx2.c
@@ -10,11 +10,11 @@
#include <immintrin.h> // AVX2
#include "vpx/vpx_integer.h"
-void vp9_sad32x32x4d_avx2(uint8_t *src,
+void vpx_sad32x32x4d_avx2(uint8_t *src,
int src_stride,
uint8_t *ref[4],
int ref_stride,
- unsigned int res[4]) {
+ uint32_t res[4]) {
__m256i src_reg, ref0_reg, ref1_reg, ref2_reg, ref3_reg;
__m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3;
__m256i sum_mlow, sum_mhigh;
@@ -80,11 +80,11 @@
}
}
-void vp9_sad64x64x4d_avx2(uint8_t *src,
+void vpx_sad64x64x4d_avx2(uint8_t *src,
int src_stride,
uint8_t *ref[4],
int ref_stride,
- unsigned int res[4]) {
+ uint32_t res[4]) {
__m256i src_reg, srcnext_reg, ref0_reg, ref0next_reg;
__m256i ref1_reg, ref1next_reg, ref2_reg, ref2next_reg;
__m256i ref3_reg, ref3next_reg;
diff --git a/vp9/encoder/x86/vp9_sad4d_sse2.asm b/vpx_dsp/x86/sad4d_sse2.asm
similarity index 97%
rename from vp9/encoder/x86/vp9_sad4d_sse2.asm
rename to vpx_dsp/x86/sad4d_sse2.asm
index b493628..0f7fb93 100644
--- a/vp9/encoder/x86/vp9_sad4d_sse2.asm
+++ b/vpx_dsp/x86/sad4d_sse2.asm
@@ -8,6 +8,8 @@
; be found in the AUTHORS file in the root of the source tree.
;
+%define program_name vpx
+
%include "third_party/x86inc/x86inc.asm"
SECTION .text
@@ -167,9 +169,9 @@
PROCESS_32x2x4 0, %4, %5, %4 + 32, %5 + 32, %6
%endmacro
-; void vp9_sadNxNx4d_sse2(uint8_t *src, int src_stride,
+; void vpx_sadNxNx4d_sse2(uint8_t *src, int src_stride,
; uint8_t *ref[4], int ref_stride,
-; unsigned int res[4]);
+; uint32_t res[4]);
; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8
%macro SADNXN4D 2
%if UNIX64
diff --git a/vp9/encoder/x86/vp9_sad_intrin_avx2.c b/vpx_dsp/x86/sad_avx2.c
similarity index 95%
rename from vp9/encoder/x86/vp9_sad_intrin_avx2.c
rename to vpx_dsp/x86/sad_avx2.c
index 1131930..78536a4 100644
--- a/vp9/encoder/x86/vp9_sad_intrin_avx2.c
+++ b/vpx_dsp/x86/sad_avx2.c
@@ -11,7 +11,7 @@
#include "vpx_ports/mem.h"
#define FSAD64_H(h) \
-unsigned int vp9_sad64x##h##_avx2(const uint8_t *src_ptr, \
+unsigned int vpx_sad64x##h##_avx2(const uint8_t *src_ptr, \
int src_stride, \
const uint8_t *ref_ptr, \
int ref_stride) { \
@@ -40,7 +40,7 @@
}
#define FSAD32_H(h) \
-unsigned int vp9_sad32x##h##_avx2(const uint8_t *src_ptr, \
+unsigned int vpx_sad32x##h##_avx2(const uint8_t *src_ptr, \
int src_stride, \
const uint8_t *ref_ptr, \
int ref_stride) { \
@@ -89,7 +89,7 @@
#undef FSAD32_H
#define FSADAVG64_H(h) \
-unsigned int vp9_sad64x##h##_avg_avx2(const uint8_t *src_ptr, \
+unsigned int vpx_sad64x##h##_avg_avx2(const uint8_t *src_ptr, \
int src_stride, \
const uint8_t *ref_ptr, \
int ref_stride, \
@@ -124,7 +124,7 @@
}
#define FSADAVG32_H(h) \
-unsigned int vp9_sad32x##h##_avg_avx2(const uint8_t *src_ptr, \
+unsigned int vpx_sad32x##h##_avg_avx2(const uint8_t *src_ptr, \
int src_stride, \
const uint8_t *ref_ptr, \
int ref_stride, \
diff --git a/vp8/common/x86/sad_mmx.asm b/vpx_dsp/x86/sad_mmx.asm
similarity index 95%
rename from vp8/common/x86/sad_mmx.asm
rename to vpx_dsp/x86/sad_mmx.asm
index 592112f..9968992 100644
--- a/vp8/common/x86/sad_mmx.asm
+++ b/vpx_dsp/x86/sad_mmx.asm
@@ -11,18 +11,18 @@
%include "vpx_ports/x86_abi_support.asm"
-global sym(vp8_sad16x16_mmx) PRIVATE
-global sym(vp8_sad8x16_mmx) PRIVATE
-global sym(vp8_sad8x8_mmx) PRIVATE
-global sym(vp8_sad4x4_mmx) PRIVATE
-global sym(vp8_sad16x8_mmx) PRIVATE
+global sym(vpx_sad16x16_mmx) PRIVATE
+global sym(vpx_sad8x16_mmx) PRIVATE
+global sym(vpx_sad8x8_mmx) PRIVATE
+global sym(vpx_sad4x4_mmx) PRIVATE
+global sym(vpx_sad16x8_mmx) PRIVATE
-;unsigned int vp8_sad16x16_mmx(
+;unsigned int vpx_sad16x16_mmx(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride)
-sym(vp8_sad16x16_mmx):
+sym(vpx_sad16x16_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
@@ -109,12 +109,12 @@
ret
-;unsigned int vp8_sad8x16_mmx(
+;unsigned int vpx_sad8x16_mmx(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride)
-sym(vp8_sad8x16_mmx):
+sym(vpx_sad8x16_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
@@ -181,12 +181,12 @@
ret
-;unsigned int vp8_sad8x8_mmx(
+;unsigned int vpx_sad8x8_mmx(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride)
-sym(vp8_sad8x8_mmx):
+sym(vpx_sad8x8_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
@@ -251,12 +251,12 @@
ret
-;unsigned int vp8_sad4x4_mmx(
+;unsigned int vpx_sad4x4_mmx(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride)
-sym(vp8_sad4x4_mmx):
+sym(vpx_sad4x4_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
@@ -340,12 +340,12 @@
ret
-;unsigned int vp8_sad16x8_mmx(
+;unsigned int vpx_sad16x8_mmx(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride)
-sym(vp8_sad16x8_mmx):
+sym(vpx_sad16x8_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
diff --git a/vp9/encoder/x86/vp9_sad_sse2.asm b/vpx_dsp/x86/sad_sse2.asm
similarity index 95%
rename from vp9/encoder/x86/vp9_sad_sse2.asm
rename to vpx_dsp/x86/sad_sse2.asm
index c4c5c54..c6a829d 100644
--- a/vp9/encoder/x86/vp9_sad_sse2.asm
+++ b/vpx_dsp/x86/sad_sse2.asm
@@ -8,6 +8,8 @@
; be found in the AUTHORS file in the root of the source tree.
;
+%define program_name vpx
+
%include "third_party/x86inc/x86inc.asm"
SECTION .text
@@ -44,7 +46,7 @@
%endif ; %3 == 7
%endmacro
-; unsigned int vp9_sad64x64_sse2(uint8_t *src, int src_stride,
+; unsigned int vpx_sad64x64_sse2(uint8_t *src, int src_stride,
; uint8_t *ref, int ref_stride);
%macro SAD64XN 1-2 0
SAD_FN 64, %1, 5, %2
@@ -87,7 +89,7 @@
SAD64XN 64, 1 ; sad64x64_avg_sse2
SAD64XN 32, 1 ; sad64x32_avg_sse2
-; unsigned int vp9_sad32x32_sse2(uint8_t *src, int src_stride,
+; unsigned int vpx_sad32x32_sse2(uint8_t *src, int src_stride,
; uint8_t *ref, int ref_stride);
%macro SAD32XN 1-2 0
SAD_FN 32, %1, 5, %2
@@ -132,7 +134,7 @@
SAD32XN 32, 1 ; sad32x32_avg_sse2
SAD32XN 16, 1 ; sad32x16_avg_sse2
-; unsigned int vp9_sad16x{8,16}_sse2(uint8_t *src, int src_stride,
+; unsigned int vpx_sad16x{8,16}_sse2(uint8_t *src, int src_stride,
; uint8_t *ref, int ref_stride);
%macro SAD16XN 1-2 0
SAD_FN 16, %1, 7, %2
@@ -178,7 +180,7 @@
SAD16XN 16, 1 ; sad16x16_avg_sse2
SAD16XN 8, 1 ; sad16x8_avg_sse2
-; unsigned int vp9_sad8x{8,16}_sse2(uint8_t *src, int src_stride,
+; unsigned int vpx_sad8x{8,16}_sse2(uint8_t *src, int src_stride,
; uint8_t *ref, int ref_stride);
%macro SAD8XN 1-2 0
SAD_FN 8, %1, 7, %2
@@ -222,7 +224,7 @@
SAD8XN 8, 1 ; sad8x8_avg_sse2
SAD8XN 4, 1 ; sad8x4_avg_sse2
-; unsigned int vp9_sad4x{4, 8}_sse(uint8_t *src, int src_stride,
+; unsigned int vpx_sad4x{4, 8}_sse(uint8_t *src, int src_stride,
; uint8_t *ref, int ref_stride);
%macro SAD4XN 1-2 0
SAD_FN 4, %1, 7, %2
diff --git a/vp9/encoder/x86/vp9_sad_sse3.asm b/vpx_dsp/x86/sad_sse3.asm
similarity index 94%
rename from vp9/encoder/x86/vp9_sad_sse3.asm
rename to vpx_dsp/x86/sad_sse3.asm
index 2b90a5d..18279bd 100644
--- a/vp9/encoder/x86/vp9_sad_sse3.asm
+++ b/vpx_dsp/x86/sad_sse3.asm
@@ -19,7 +19,6 @@
%define end_ptr rcx
%define ret_var rbx
%define result_ptr arg(4)
- %define max_err arg(4)
%define height dword ptr arg(4)
push rbp
mov rbp, rsp
@@ -42,7 +41,6 @@
%define end_ptr r10
%define ret_var r11
%define result_ptr [rsp+xmm_stack_space+8+4*8]
- %define max_err [rsp+xmm_stack_space+8+4*8]
%define height dword ptr [rsp+xmm_stack_space+8+4*8]
%else
%define src_ptr rdi
@@ -52,7 +50,6 @@
%define end_ptr r9
%define ret_var r10
%define result_ptr r8
- %define max_err r8
%define height r8
%endif
%endif
@@ -67,7 +64,6 @@
%define end_ptr
%define ret_var
%define result_ptr
- %define max_err
%define height
%if ABI_IS_32BIT
@@ -169,14 +165,14 @@
paddw mm7, mm3
%endmacro
-;void int vp9_sad16x16x3_sse3(
+;void int vpx_sad16x16x3_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp9_sad16x16x3_sse3) PRIVATE
-sym(vp9_sad16x16x3_sse3):
+global sym(vpx_sad16x16x3_sse3) PRIVATE
+sym(vpx_sad16x16x3_sse3):
STACK_FRAME_CREATE_X3
@@ -211,14 +207,14 @@
STACK_FRAME_DESTROY_X3
-;void int vp9_sad16x8x3_sse3(
+;void int vpx_sad16x8x3_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp9_sad16x8x3_sse3) PRIVATE
-sym(vp9_sad16x8x3_sse3):
+global sym(vpx_sad16x8x3_sse3) PRIVATE
+sym(vpx_sad16x8x3_sse3):
STACK_FRAME_CREATE_X3
@@ -249,14 +245,14 @@
STACK_FRAME_DESTROY_X3
-;void int vp9_sad8x16x3_sse3(
+;void int vpx_sad8x16x3_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp9_sad8x16x3_sse3) PRIVATE
-sym(vp9_sad8x16x3_sse3):
+global sym(vpx_sad8x16x3_sse3) PRIVATE
+sym(vpx_sad8x16x3_sse3):
STACK_FRAME_CREATE_X3
@@ -278,14 +274,14 @@
STACK_FRAME_DESTROY_X3
-;void int vp9_sad8x8x3_sse3(
+;void int vpx_sad8x8x3_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp9_sad8x8x3_sse3) PRIVATE
-sym(vp9_sad8x8x3_sse3):
+global sym(vpx_sad8x8x3_sse3) PRIVATE
+sym(vpx_sad8x8x3_sse3):
STACK_FRAME_CREATE_X3
@@ -303,14 +299,14 @@
STACK_FRAME_DESTROY_X3
-;void int vp9_sad4x4x3_sse3(
+;void int vpx_sad4x4x3_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp9_sad4x4x3_sse3) PRIVATE
-sym(vp9_sad4x4x3_sse3):
+global sym(vpx_sad4x4x3_sse3) PRIVATE
+sym(vpx_sad4x4x3_sse3):
STACK_FRAME_CREATE_X3
diff --git a/vp9/encoder/x86/vp9_sad_sse4.asm b/vpx_dsp/x86/sad_sse4.asm
similarity index 94%
rename from vp9/encoder/x86/vp9_sad_sse4.asm
rename to vpx_dsp/x86/sad_sse4.asm
index faf1768..bc67447 100644
--- a/vp9/encoder/x86/vp9_sad_sse4.asm
+++ b/vpx_dsp/x86/sad_sse4.asm
@@ -165,14 +165,14 @@
movdqa [rdi + 16], xmm2
%endmacro
-;void vp9_sad16x16x8_sse4(
+;void vpx_sad16x16x8_sse4_1(
; const unsigned char *src_ptr,
; int src_stride,
; const unsigned char *ref_ptr,
; int ref_stride,
; unsigned short *sad_array);
-global sym(vp9_sad16x16x8_sse4) PRIVATE
-sym(vp9_sad16x16x8_sse4):
+global sym(vpx_sad16x16x8_sse4_1) PRIVATE
+sym(vpx_sad16x16x8_sse4_1):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
@@ -205,15 +205,15 @@
ret
-;void vp9_sad16x8x8_sse4(
+;void vpx_sad16x8x8_sse4_1(
; const unsigned char *src_ptr,
; int src_stride,
; const unsigned char *ref_ptr,
; int ref_stride,
; unsigned short *sad_array
;);
-global sym(vp9_sad16x8x8_sse4) PRIVATE
-sym(vp9_sad16x8x8_sse4):
+global sym(vpx_sad16x8x8_sse4_1) PRIVATE
+sym(vpx_sad16x8x8_sse4_1):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
@@ -242,15 +242,15 @@
ret
-;void vp9_sad8x8x8_sse4(
+;void vpx_sad8x8x8_sse4_1(
; const unsigned char *src_ptr,
; int src_stride,
; const unsigned char *ref_ptr,
; int ref_stride,
; unsigned short *sad_array
;);
-global sym(vp9_sad8x8x8_sse4) PRIVATE
-sym(vp9_sad8x8x8_sse4):
+global sym(vpx_sad8x8x8_sse4_1) PRIVATE
+sym(vpx_sad8x8x8_sse4_1):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
@@ -279,15 +279,15 @@
ret
-;void vp9_sad8x16x8_sse4(
+;void vpx_sad8x16x8_sse4_1(
; const unsigned char *src_ptr,
; int src_stride,
; const unsigned char *ref_ptr,
; int ref_stride,
; unsigned short *sad_array
;);
-global sym(vp9_sad8x16x8_sse4) PRIVATE
-sym(vp9_sad8x16x8_sse4):
+global sym(vpx_sad8x16x8_sse4_1) PRIVATE
+sym(vpx_sad8x16x8_sse4_1):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
@@ -320,15 +320,15 @@
ret
-;void vp9_sad4x4x8_c(
+;void vpx_sad4x4x8_sse4_1(
; const unsigned char *src_ptr,
; int src_stride,
; const unsigned char *ref_ptr,
; int ref_stride,
; unsigned short *sad_array
;);
-global sym(vp9_sad4x4x8_sse4) PRIVATE
-sym(vp9_sad4x4x8_sse4):
+global sym(vpx_sad4x4x8_sse4_1) PRIVATE
+sym(vpx_sad4x4x8_sse4_1):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
diff --git a/vp8/common/x86/sad_ssse3.asm b/vpx_dsp/x86/sad_ssse3.asm
similarity index 64%
rename from vp8/common/x86/sad_ssse3.asm
rename to vpx_dsp/x86/sad_ssse3.asm
index 278fc06..49f204f 100644
--- a/vp8/common/x86/sad_ssse3.asm
+++ b/vpx_dsp/x86/sad_ssse3.asm
@@ -146,14 +146,14 @@
%endmacro
-;void int vp8_sad16x16x3_ssse3(
+;void int vpx_sad16x16x3_ssse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp8_sad16x16x3_ssse3) PRIVATE
-sym(vp8_sad16x16x3_ssse3):
+global sym(vpx_sad16x16x3_ssse3) PRIVATE
+sym(vpx_sad16x16x3_ssse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
@@ -169,31 +169,31 @@
mov rdx, 0xf
and rdx, rdi
- jmp .vp8_sad16x16x3_ssse3_skiptable
-.vp8_sad16x16x3_ssse3_jumptable:
- dd .vp8_sad16x16x3_ssse3_aligned_by_0 - .vp8_sad16x16x3_ssse3_do_jump
- dd .vp8_sad16x16x3_ssse3_aligned_by_1 - .vp8_sad16x16x3_ssse3_do_jump
- dd .vp8_sad16x16x3_ssse3_aligned_by_2 - .vp8_sad16x16x3_ssse3_do_jump
- dd .vp8_sad16x16x3_ssse3_aligned_by_3 - .vp8_sad16x16x3_ssse3_do_jump
- dd .vp8_sad16x16x3_ssse3_aligned_by_4 - .vp8_sad16x16x3_ssse3_do_jump
- dd .vp8_sad16x16x3_ssse3_aligned_by_5 - .vp8_sad16x16x3_ssse3_do_jump
- dd .vp8_sad16x16x3_ssse3_aligned_by_6 - .vp8_sad16x16x3_ssse3_do_jump
- dd .vp8_sad16x16x3_ssse3_aligned_by_7 - .vp8_sad16x16x3_ssse3_do_jump
- dd .vp8_sad16x16x3_ssse3_aligned_by_8 - .vp8_sad16x16x3_ssse3_do_jump
- dd .vp8_sad16x16x3_ssse3_aligned_by_9 - .vp8_sad16x16x3_ssse3_do_jump
- dd .vp8_sad16x16x3_ssse3_aligned_by_10 - .vp8_sad16x16x3_ssse3_do_jump
- dd .vp8_sad16x16x3_ssse3_aligned_by_11 - .vp8_sad16x16x3_ssse3_do_jump
- dd .vp8_sad16x16x3_ssse3_aligned_by_12 - .vp8_sad16x16x3_ssse3_do_jump
- dd .vp8_sad16x16x3_ssse3_aligned_by_13 - .vp8_sad16x16x3_ssse3_do_jump
- dd .vp8_sad16x16x3_ssse3_aligned_by_14 - .vp8_sad16x16x3_ssse3_do_jump
- dd .vp8_sad16x16x3_ssse3_aligned_by_15 - .vp8_sad16x16x3_ssse3_do_jump
-.vp8_sad16x16x3_ssse3_skiptable:
+ jmp .vpx_sad16x16x3_ssse3_skiptable
+.vpx_sad16x16x3_ssse3_jumptable:
+ dd .vpx_sad16x16x3_ssse3_aligned_by_0 - .vpx_sad16x16x3_ssse3_do_jump
+ dd .vpx_sad16x16x3_ssse3_aligned_by_1 - .vpx_sad16x16x3_ssse3_do_jump
+ dd .vpx_sad16x16x3_ssse3_aligned_by_2 - .vpx_sad16x16x3_ssse3_do_jump
+ dd .vpx_sad16x16x3_ssse3_aligned_by_3 - .vpx_sad16x16x3_ssse3_do_jump
+ dd .vpx_sad16x16x3_ssse3_aligned_by_4 - .vpx_sad16x16x3_ssse3_do_jump
+ dd .vpx_sad16x16x3_ssse3_aligned_by_5 - .vpx_sad16x16x3_ssse3_do_jump
+ dd .vpx_sad16x16x3_ssse3_aligned_by_6 - .vpx_sad16x16x3_ssse3_do_jump
+ dd .vpx_sad16x16x3_ssse3_aligned_by_7 - .vpx_sad16x16x3_ssse3_do_jump
+ dd .vpx_sad16x16x3_ssse3_aligned_by_8 - .vpx_sad16x16x3_ssse3_do_jump
+ dd .vpx_sad16x16x3_ssse3_aligned_by_9 - .vpx_sad16x16x3_ssse3_do_jump
+ dd .vpx_sad16x16x3_ssse3_aligned_by_10 - .vpx_sad16x16x3_ssse3_do_jump
+ dd .vpx_sad16x16x3_ssse3_aligned_by_11 - .vpx_sad16x16x3_ssse3_do_jump
+ dd .vpx_sad16x16x3_ssse3_aligned_by_12 - .vpx_sad16x16x3_ssse3_do_jump
+ dd .vpx_sad16x16x3_ssse3_aligned_by_13 - .vpx_sad16x16x3_ssse3_do_jump
+ dd .vpx_sad16x16x3_ssse3_aligned_by_14 - .vpx_sad16x16x3_ssse3_do_jump
+ dd .vpx_sad16x16x3_ssse3_aligned_by_15 - .vpx_sad16x16x3_ssse3_do_jump
+.vpx_sad16x16x3_ssse3_skiptable:
- call .vp8_sad16x16x3_ssse3_do_jump
-.vp8_sad16x16x3_ssse3_do_jump:
+ call .vpx_sad16x16x3_ssse3_do_jump
+.vpx_sad16x16x3_ssse3_do_jump:
pop rcx ; get the address of do_jump
- mov rax, .vp8_sad16x16x3_ssse3_jumptable - .vp8_sad16x16x3_ssse3_do_jump
- add rax, rcx ; get the absolute address of vp8_sad16x16x3_ssse3_jumptable
+ mov rax, .vpx_sad16x16x3_ssse3_jumptable - .vpx_sad16x16x3_ssse3_do_jump
+ add rax, rcx ; get the absolute address of vpx_sad16x16x3_ssse3_jumptable
movsxd rax, dword [rax + 4*rdx] ; get the 32 bit offset from the jumptable
add rcx, rax
@@ -203,23 +203,23 @@
jmp rcx
- PROCESS_16X16X3_OFFSET 0, .vp8_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 1, .vp8_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 2, .vp8_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 3, .vp8_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 4, .vp8_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 5, .vp8_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 6, .vp8_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 7, .vp8_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 8, .vp8_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 9, .vp8_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 10, .vp8_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 11, .vp8_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 12, .vp8_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 13, .vp8_sad16x16x3_ssse3
- PROCESS_16X16X3_OFFSET 14, .vp8_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 0, .vpx_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 1, .vpx_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 2, .vpx_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 3, .vpx_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 4, .vpx_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 5, .vpx_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 6, .vpx_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 7, .vpx_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 8, .vpx_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 9, .vpx_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 10, .vpx_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 11, .vpx_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 12, .vpx_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 13, .vpx_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 14, .vpx_sad16x16x3_ssse3
-.vp8_sad16x16x3_ssse3_aligned_by_15:
+.vpx_sad16x16x3_ssse3_aligned_by_15:
PROCESS_16X2X3 1
PROCESS_16X2X3 0
PROCESS_16X2X3 0
@@ -229,7 +229,7 @@
PROCESS_16X2X3 0
PROCESS_16X2X3 0
-.vp8_sad16x16x3_ssse3_store_off:
+.vpx_sad16x16x3_ssse3_store_off:
mov rdi, arg(4) ;Results
movq xmm0, xmm5
@@ -259,14 +259,14 @@
pop rbp
ret
-;void int vp8_sad16x8x3_ssse3(
+;void int vpx_sad16x8x3_ssse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp8_sad16x8x3_ssse3) PRIVATE
-sym(vp8_sad16x8x3_ssse3):
+global sym(vpx_sad16x8x3_ssse3) PRIVATE
+sym(vpx_sad16x8x3_ssse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
@@ -282,31 +282,31 @@
mov rdx, 0xf
and rdx, rdi
- jmp .vp8_sad16x8x3_ssse3_skiptable
-.vp8_sad16x8x3_ssse3_jumptable:
- dd .vp8_sad16x8x3_ssse3_aligned_by_0 - .vp8_sad16x8x3_ssse3_do_jump
- dd .vp8_sad16x8x3_ssse3_aligned_by_1 - .vp8_sad16x8x3_ssse3_do_jump
- dd .vp8_sad16x8x3_ssse3_aligned_by_2 - .vp8_sad16x8x3_ssse3_do_jump
- dd .vp8_sad16x8x3_ssse3_aligned_by_3 - .vp8_sad16x8x3_ssse3_do_jump
- dd .vp8_sad16x8x3_ssse3_aligned_by_4 - .vp8_sad16x8x3_ssse3_do_jump
- dd .vp8_sad16x8x3_ssse3_aligned_by_5 - .vp8_sad16x8x3_ssse3_do_jump
- dd .vp8_sad16x8x3_ssse3_aligned_by_6 - .vp8_sad16x8x3_ssse3_do_jump
- dd .vp8_sad16x8x3_ssse3_aligned_by_7 - .vp8_sad16x8x3_ssse3_do_jump
- dd .vp8_sad16x8x3_ssse3_aligned_by_8 - .vp8_sad16x8x3_ssse3_do_jump
- dd .vp8_sad16x8x3_ssse3_aligned_by_9 - .vp8_sad16x8x3_ssse3_do_jump
- dd .vp8_sad16x8x3_ssse3_aligned_by_10 - .vp8_sad16x8x3_ssse3_do_jump
- dd .vp8_sad16x8x3_ssse3_aligned_by_11 - .vp8_sad16x8x3_ssse3_do_jump
- dd .vp8_sad16x8x3_ssse3_aligned_by_12 - .vp8_sad16x8x3_ssse3_do_jump
- dd .vp8_sad16x8x3_ssse3_aligned_by_13 - .vp8_sad16x8x3_ssse3_do_jump
- dd .vp8_sad16x8x3_ssse3_aligned_by_14 - .vp8_sad16x8x3_ssse3_do_jump
- dd .vp8_sad16x8x3_ssse3_aligned_by_15 - .vp8_sad16x8x3_ssse3_do_jump
-.vp8_sad16x8x3_ssse3_skiptable:
+ jmp .vpx_sad16x8x3_ssse3_skiptable
+.vpx_sad16x8x3_ssse3_jumptable:
+ dd .vpx_sad16x8x3_ssse3_aligned_by_0 - .vpx_sad16x8x3_ssse3_do_jump
+ dd .vpx_sad16x8x3_ssse3_aligned_by_1 - .vpx_sad16x8x3_ssse3_do_jump
+ dd .vpx_sad16x8x3_ssse3_aligned_by_2 - .vpx_sad16x8x3_ssse3_do_jump
+ dd .vpx_sad16x8x3_ssse3_aligned_by_3 - .vpx_sad16x8x3_ssse3_do_jump
+ dd .vpx_sad16x8x3_ssse3_aligned_by_4 - .vpx_sad16x8x3_ssse3_do_jump
+ dd .vpx_sad16x8x3_ssse3_aligned_by_5 - .vpx_sad16x8x3_ssse3_do_jump
+ dd .vpx_sad16x8x3_ssse3_aligned_by_6 - .vpx_sad16x8x3_ssse3_do_jump
+ dd .vpx_sad16x8x3_ssse3_aligned_by_7 - .vpx_sad16x8x3_ssse3_do_jump
+ dd .vpx_sad16x8x3_ssse3_aligned_by_8 - .vpx_sad16x8x3_ssse3_do_jump
+ dd .vpx_sad16x8x3_ssse3_aligned_by_9 - .vpx_sad16x8x3_ssse3_do_jump
+ dd .vpx_sad16x8x3_ssse3_aligned_by_10 - .vpx_sad16x8x3_ssse3_do_jump
+ dd .vpx_sad16x8x3_ssse3_aligned_by_11 - .vpx_sad16x8x3_ssse3_do_jump
+ dd .vpx_sad16x8x3_ssse3_aligned_by_12 - .vpx_sad16x8x3_ssse3_do_jump
+ dd .vpx_sad16x8x3_ssse3_aligned_by_13 - .vpx_sad16x8x3_ssse3_do_jump
+ dd .vpx_sad16x8x3_ssse3_aligned_by_14 - .vpx_sad16x8x3_ssse3_do_jump
+ dd .vpx_sad16x8x3_ssse3_aligned_by_15 - .vpx_sad16x8x3_ssse3_do_jump
+.vpx_sad16x8x3_ssse3_skiptable:
- call .vp8_sad16x8x3_ssse3_do_jump
-.vp8_sad16x8x3_ssse3_do_jump:
+ call .vpx_sad16x8x3_ssse3_do_jump
+.vpx_sad16x8x3_ssse3_do_jump:
pop rcx ; get the address of do_jump
- mov rax, .vp8_sad16x8x3_ssse3_jumptable - .vp8_sad16x8x3_ssse3_do_jump
- add rax, rcx ; get the absolute address of vp8_sad16x8x3_ssse3_jumptable
+ mov rax, .vpx_sad16x8x3_ssse3_jumptable - .vpx_sad16x8x3_ssse3_do_jump
+ add rax, rcx ; get the absolute address of vpx_sad16x8x3_ssse3_jumptable
movsxd rax, dword [rax + 4*rdx] ; get the 32 bit offset from the jumptable
add rcx, rax
@@ -316,30 +316,30 @@
jmp rcx
- PROCESS_16X8X3_OFFSET 0, .vp8_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 1, .vp8_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 2, .vp8_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 3, .vp8_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 4, .vp8_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 5, .vp8_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 6, .vp8_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 7, .vp8_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 8, .vp8_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 9, .vp8_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 10, .vp8_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 11, .vp8_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 12, .vp8_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 13, .vp8_sad16x8x3_ssse3
- PROCESS_16X8X3_OFFSET 14, .vp8_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 0, .vpx_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 1, .vpx_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 2, .vpx_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 3, .vpx_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 4, .vpx_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 5, .vpx_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 6, .vpx_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 7, .vpx_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 8, .vpx_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 9, .vpx_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 10, .vpx_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 11, .vpx_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 12, .vpx_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 13, .vpx_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 14, .vpx_sad16x8x3_ssse3
-.vp8_sad16x8x3_ssse3_aligned_by_15:
+.vpx_sad16x8x3_ssse3_aligned_by_15:
PROCESS_16X2X3 1
PROCESS_16X2X3 0
PROCESS_16X2X3 0
PROCESS_16X2X3 0
-.vp8_sad16x8x3_ssse3_store_off:
+.vpx_sad16x8x3_ssse3_store_off:
mov rdi, arg(4) ;Results
movq xmm0, xmm5