HBD convolution filtering (10/12 taps) SSE4.1 optimization
- For experiment EXT_INTERP under high bit depth.
- Add unit test to verify bit-exact.
- Speed performance improvement:
On Xeon E5-2680, park_joy_1080p_12.y4m, 50 frames, encoding time
drops from 6682503 ms to 5390270 ms.
Change-Id: Iea4debf5414f3accf1eb5672abeab56a0539ac77
diff --git a/test/vp10_convolve_optimz_test.cc b/test/vp10_convolve_optimz_test.cc
index 66f267f..ec77035 100644
--- a/test/vp10_convolve_optimz_test.cc
+++ b/test/vp10_convolve_optimz_test.cc
@@ -24,12 +24,25 @@
typedef void (*conv_filter_t)(const uint8_t*, int, uint8_t*, int,
int, int, const InterpFilterParams,
const int, int, int);
+#if CONFIG_VP9_HIGHBITDEPTH
+typedef void (*hbd_conv_filter_t)(const uint16_t*, int, uint16_t*, int,
+ int, int, const InterpFilterParams,
+ const int, int, int, int);
+#endif
+
// Test parameter list:
// <convolve_horiz_func, convolve_vert_func,
// <width, height>, filter_params, subpel_x_q4, avg>
typedef tuple<int, int> BlockDimension;
typedef tuple<conv_filter_t, conv_filter_t, BlockDimension, INTERP_FILTER,
int, int> ConvParams;
+#if CONFIG_VP9_HIGHBITDEPTH
+// Test parameter list:
+// <convolve_horiz_func, convolve_vert_func,
+// <width, height>, filter_params, subpel_x_q4, avg, bit_dpeth>
+typedef tuple<hbd_conv_filter_t, hbd_conv_filter_t, BlockDimension,
+ INTERP_FILTER, int, int, int> HbdConvParams;
+#endif
// Note:
// src_ and src_ref_ have special boundary requirement
@@ -75,11 +88,8 @@
void RunVertFilterBitExactCheck();
private:
- void PrepFilterBuffer(uint8_t *src, uint8_t *src_ref,
- uint8_t *dst, uint8_t *dst_ref,
- int w, int h);
- void DiffFilterBuffer(const uint8_t *buf, const uint8_t *buf_ref,
- int w, int h, int fgroup, int findex);
+ void PrepFilterBuffer(int w, int h);
+ void DiffFilterBuffer();
conv_filter_t conv_horiz_;
conv_filter_t conv_vert_;
uint8_t *alloc_;
@@ -94,18 +104,16 @@
int avg_;
};
-void VP10ConvolveOptimzTest::PrepFilterBuffer(uint8_t *src, uint8_t *src_ref,
- uint8_t *dst, uint8_t *dst_ref,
- int w, int h) {
+void VP10ConvolveOptimzTest::PrepFilterBuffer(int w, int h) {
int r, c;
ACMRandom rnd(ACMRandom::DeterministicSeed());
memset(alloc_, 0, 4 * maxBlockSize * sizeof(alloc_[0]));
- uint8_t *src_ptr = src;
- uint8_t *dst_ptr = dst;
- uint8_t *src_ref_ptr = src_ref;
- uint8_t *dst_ref_ptr = dst_ref;
+ uint8_t *src_ptr = src_;
+ uint8_t *dst_ptr = dst_;
+ uint8_t *src_ref_ptr = src_ref_;
+ uint8_t *dst_ref_ptr = dst_ref_;
for (r = 0; r < height_; ++r) {
for (c = 0; c < width_; ++c) {
@@ -121,21 +129,17 @@
}
}
-void VP10ConvolveOptimzTest::DiffFilterBuffer(const uint8_t *buf,
- const uint8_t *buf_ref,
- int w, int h,
- int filter_group,
- int filter_index) {
+void VP10ConvolveOptimzTest::DiffFilterBuffer() {
int r, c;
- const uint8_t *dst_ptr = buf;
- const uint8_t *dst_ref_ptr = buf_ref;
- for (r = 0; r < h; ++r) {
- for (c = 0; c < w; ++c) {
+ const uint8_t *dst_ptr = dst_;
+ const uint8_t *dst_ref_ptr = dst_ref_;
+ for (r = 0; r < height_; ++r) {
+ for (c = 0; c < width_; ++c) {
EXPECT_EQ((uint8_t)dst_ref_ptr[c], (uint8_t)dst_ptr[c])
<< "Error at row: " << r << " col: " << c << " "
- << "w = " << w << " " << "h = " << h << " "
- << "filter group index = " << filter_group << " "
- << "filter index = " << filter_index;
+ << "w = " << width_ << " " << "h = " << height_ << " "
+ << "filter group index = " << filter_ << " "
+ << "filter index = " << subpel_;
}
dst_ptr += stride;
dst_ref_ptr += stride;
@@ -143,7 +147,7 @@
}
void VP10ConvolveOptimzTest::RunHorizFilterBitExactCheck() {
- PrepFilterBuffer(src_, src_ref_, dst_, dst_ref_, testMaxBlk, testMaxBlk);
+ PrepFilterBuffer(testMaxBlk, testMaxBlk);
InterpFilterParams filter_params = vp10_get_interp_filter_params(filter_);
@@ -153,14 +157,14 @@
conv_horiz_(src_, stride, dst_, stride, width_, height_,
filter_params, subpel_, x_step_q4, avg_);
- DiffFilterBuffer(dst_, dst_ref_, width_, height_, filter_, subpel_);
+ DiffFilterBuffer();
// Note:
// Here we need calculate a height which is different from the specified one
// and test again.
int intermediate_height =
(((height_ - 1) * 16 + subpel_) >> SUBPEL_BITS) + filter_params.taps;
- PrepFilterBuffer(src_, src_ref_, dst_, dst_ref_, testMaxBlk, testMaxBlk);
+ PrepFilterBuffer(testMaxBlk, testMaxBlk);
vp10_convolve_horiz_c(src_ref_, stride, dst_ref_, stride, width_,
intermediate_height, filter_params, subpel_, x_step_q4,
@@ -170,12 +174,11 @@
intermediate_height, filter_params, subpel_, x_step_q4,
avg_);
- DiffFilterBuffer(dst_, dst_ref_, width_, intermediate_height, filter_,
- subpel_);
+ DiffFilterBuffer();
}
void VP10ConvolveOptimzTest::RunVertFilterBitExactCheck() {
- PrepFilterBuffer(src_, src_ref_, dst_, dst_ref_, testMaxBlk, testMaxBlk);
+ PrepFilterBuffer(testMaxBlk, testMaxBlk);
InterpFilterParams filter_params = vp10_get_interp_filter_params(filter_);
@@ -185,7 +188,7 @@
conv_vert_(src_, stride, dst_, stride, width_, height_,
filter_params, subpel_, x_step_q4, avg_);
- DiffFilterBuffer(dst_, dst_ref_, width_, height_, filter_, subpel_);
+ DiffFilterBuffer();
}
TEST_P(VP10ConvolveOptimzTest, HorizBitExactCheck) {
@@ -197,7 +200,7 @@
using std::tr1::make_tuple;
-#if HAVE_SSSE3 && CONFIG_EXT_INTERP
+#if (HAVE_SSSE3 || HAVE_SSE4_1) && CONFIG_EXT_INTERP
const BlockDimension kBlockDim[] = {
make_tuple(2, 2),
make_tuple(2, 4),
@@ -225,7 +228,9 @@
const int kSubpelQ4[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
const int kAvg[] = {0, 1};
+#endif
+#if HAVE_SSSE3 && CONFIG_EXT_INTERP
INSTANTIATE_TEST_CASE_P(
SSSE3, VP10ConvolveOptimzTest,
::testing::Combine(
@@ -236,4 +241,167 @@
::testing::ValuesIn(kSubpelQ4),
::testing::ValuesIn(kAvg)));
#endif // HAVE_SSSE3 && CONFIG_EXT_INTERP
+
+#if CONFIG_VP9_HIGHBITDEPTH
+typedef ::testing::TestWithParam<HbdConvParams> TestWithHbdConvParams;
+class VP10HbdConvolveOptimzTest : public TestWithHbdConvParams {
+ public:
+ virtual ~VP10HbdConvolveOptimzTest() {}
+ virtual void SetUp() {
+ conv_horiz_ = GET_PARAM(0);
+ conv_vert_ = GET_PARAM(1);
+ BlockDimension block = GET_PARAM(2);
+ width_ = std::tr1::get<0>(block);
+ height_ = std::tr1::get<1>(block);
+ filter_ = GET_PARAM(3);
+ subpel_ = GET_PARAM(4);
+ avg_ = GET_PARAM(5);
+ bit_depth_ = GET_PARAM(6);
+
+ alloc_ = new uint16_t[maxBlockSize * 4];
+ src_ = alloc_ + (vertiOffset * maxWidth);
+ src_ += horizOffset;
+ src_ref_ = src_ + maxBlockSize;
+
+ dst_ = alloc_ + 2 * maxBlockSize;
+ dst_ref_ = alloc_ + 3 * maxBlockSize;
+ }
+
+ virtual void TearDown() {
+ delete[] alloc_;
+ libvpx_test::ClearSystemState();
+ }
+
+ protected:
+ void RunHorizFilterBitExactCheck();
+ void RunVertFilterBitExactCheck();
+
+ private:
+ void PrepFilterBuffer(int w, int h);
+ void DiffFilterBuffer();
+ hbd_conv_filter_t conv_horiz_;
+ hbd_conv_filter_t conv_vert_;
+ uint16_t *alloc_;
+ uint16_t *src_;
+ uint16_t *dst_;
+ uint16_t *src_ref_;
+ uint16_t *dst_ref_;
+ int width_;
+ int height_;
+ int filter_;
+ int subpel_;
+ int avg_;
+ int bit_depth_;
+};
+
+void VP10HbdConvolveOptimzTest::PrepFilterBuffer(int w, int h) {
+ int r, c;
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+ memset(alloc_, 0, 4 * maxBlockSize * sizeof(alloc_[0]));
+
+ uint16_t *src_ptr = src_;
+ uint16_t *dst_ptr = dst_;
+ uint16_t *dst_ref_ptr = dst_ref_;
+ uint16_t hbd_mask = (1 << bit_depth_) - 1;
+
+ for (r = 0; r < height_; ++r) {
+ for (c = 0; c < width_; ++c) {
+ src_ptr[c] = rnd.Rand16() & hbd_mask;
+ dst_ptr[c] = rnd.Rand16() & hbd_mask;
+ dst_ref_ptr[c] = dst_ptr[c];
+ }
+ src_ptr += stride;
+ dst_ptr += stride;
+ dst_ref_ptr += stride;
+ }
+}
+
+void VP10HbdConvolveOptimzTest::DiffFilterBuffer() {
+ int r, c;
+ const uint16_t *dst_ptr = dst_;
+ const uint16_t *dst_ref_ptr = dst_ref_;
+ for (r = 0; r < height_; ++r) {
+ for (c = 0; c < width_; ++c) {
+ EXPECT_EQ((uint16_t)dst_ref_ptr[c], (uint16_t)dst_ptr[c])
+ << "Error at row: " << r << " col: " << c << " "
+ << "w = " << width_ << " " << "h = " << height_ << " "
+ << "filter group index = " << filter_ << " "
+ << "filter index = " << subpel_ << " "
+ << "bit depth = " << bit_depth_;
+ }
+ dst_ptr += stride;
+ dst_ref_ptr += stride;
+ }
+}
+
+void VP10HbdConvolveOptimzTest::RunHorizFilterBitExactCheck() {
+ PrepFilterBuffer(testMaxBlk, testMaxBlk);
+
+ InterpFilterParams filter_params = vp10_get_interp_filter_params(filter_);
+
+ vp10_highbd_convolve_horiz_c(src_, stride, dst_ref_, stride, width_,
+ height_, filter_params, subpel_, x_step_q4,
+ avg_, bit_depth_);
+
+ conv_horiz_(src_, stride, dst_, stride, width_, height_,
+ filter_params, subpel_, x_step_q4, avg_, bit_depth_);
+
+ DiffFilterBuffer();
+
+ // Note:
+ // Here we need calculate a height which is different from the specified one
+ // and test again.
+ int intermediate_height =
+ (((height_ - 1) * 16 + subpel_) >> SUBPEL_BITS) + filter_params.taps;
+ PrepFilterBuffer(testMaxBlk, testMaxBlk);
+
+ vp10_highbd_convolve_horiz_c(src_, stride, dst_ref_, stride, width_,
+ intermediate_height, filter_params, subpel_,
+ x_step_q4, avg_, bit_depth_);
+
+ conv_horiz_(src_, stride, dst_, stride, width_, intermediate_height,
+ filter_params, subpel_, x_step_q4, avg_, bit_depth_);
+
+ DiffFilterBuffer();
+}
+
+void VP10HbdConvolveOptimzTest::RunVertFilterBitExactCheck() {
+ PrepFilterBuffer(testMaxBlk, testMaxBlk);
+
+ InterpFilterParams filter_params = vp10_get_interp_filter_params(filter_);
+
+ vp10_highbd_convolve_vert_c(src_, stride, dst_ref_, stride, width_, height_,
+ filter_params, subpel_, x_step_q4, avg_,
+ bit_depth_);
+
+ conv_vert_(src_, stride, dst_, stride, width_, height_,
+ filter_params, subpel_, x_step_q4, avg_, bit_depth_);
+
+ DiffFilterBuffer();
+}
+
+TEST_P(VP10HbdConvolveOptimzTest, HorizBitExactCheck) {
+ RunHorizFilterBitExactCheck();
+}
+TEST_P(VP10HbdConvolveOptimzTest, VertBitExactCheck) {
+ RunVertFilterBitExactCheck();
+}
+
+#if HAVE_SSE4_1 && CONFIG_EXT_INTERP
+
+const int kBitdepth[] = {10, 12};
+
+INSTANTIATE_TEST_CASE_P(
+ SSE4_1, VP10HbdConvolveOptimzTest,
+ ::testing::Combine(
+ ::testing::Values(vp10_highbd_convolve_horiz_sse4_1),
+ ::testing::Values(vp10_highbd_convolve_vert_sse4_1),
+ ::testing::ValuesIn(kBlockDim),
+ ::testing::ValuesIn(kFilter),
+ ::testing::ValuesIn(kSubpelQ4),
+ ::testing::ValuesIn(kAvg),
+ ::testing::ValuesIn(kBitdepth)));
+#endif // HAVE_SSE4_1 && CONFIG_EXT_INTERP
+#endif // CONFIG_VP9_HIGHBITDEPTH
} // namespace
diff --git a/vp10/common/filter.c b/vp10/common/filter.c
index 5dde3ab..8427237 100644
--- a/vp10/common/filter.c
+++ b/vp10/common/filter.c
@@ -342,3 +342,25 @@
(void)index;
return NULL;
}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+HbdSubpelFilterCoeffs vp10_hbd_get_subpel_filter_ver_signal_dir(
+ const InterpFilterParams p, int index) {
+#if CONFIG_EXT_INTERP && HAVE_SSE4_1
+ if (p.filter_ptr == (const int16_t *)sub_pel_filters_12sharp) {
+ return &sub_pel_filters_12sharp_highbd_ver_signal_dir[index][0];
+ }
+ if (p.filter_ptr == (const int16_t *)sub_pel_filters_10sharp) {
+ return &sub_pel_filters_10sharp_highbd_ver_signal_dir[index][0];
+ }
+#endif
+#if USE_TEMPORALFILTER_12TAP && HAVE_SSE4_1
+ if (p.filter_ptr == (const int16_t *)sub_pel_filters_temporalfilter_12) {
+ return &sub_pel_filters_temporalfilter_12_highbd_ver_signal_dir[index][0];
+ }
+#endif
+ (void)p;
+ (void)index;
+ return NULL;
+}
+#endif
diff --git a/vp10/common/filter.h b/vp10/common/filter.h
index 591ac4d..5ebf2a5 100644
--- a/vp10/common/filter.h
+++ b/vp10/common/filter.h
@@ -95,6 +95,10 @@
#if USE_TEMPORALFILTER_12TAP
extern const int8_t sub_pel_filters_temporalfilter_12_signal_dir[15][2][16];
extern const int8_t sub_pel_filters_temporalfilter_12_ver_signal_dir[15][6][16];
+#if CONFIG_VP9_HIGHBITDEPTH
+extern const
+int16_t sub_pel_filters_temporalfilter_12_highbd_ver_signal_dir[15][6][8];
+#endif
#endif
#if CONFIG_EXT_INTERP
@@ -102,15 +106,26 @@
extern const int8_t sub_pel_filters_10sharp_signal_dir[15][2][16];
extern const int8_t sub_pel_filters_12sharp_ver_signal_dir[15][6][16];
extern const int8_t sub_pel_filters_10sharp_ver_signal_dir[15][6][16];
+#if CONFIG_VP9_HIGHBITDEPTH
+extern const int16_t sub_pel_filters_12sharp_highbd_ver_signal_dir[15][6][8];
+extern const int16_t sub_pel_filters_10sharp_highbd_ver_signal_dir[15][6][8];
+#endif
#endif
typedef const int8_t (*SubpelFilterCoeffs)[16];
+#if CONFIG_VP9_HIGHBITDEPTH
+typedef const int16_t (*HbdSubpelFilterCoeffs)[8];
+#endif
SubpelFilterCoeffs vp10_get_subpel_filter_signal_dir(
const InterpFilterParams p, int index);
SubpelFilterCoeffs vp10_get_subpel_filter_ver_signal_dir(
const InterpFilterParams p, int index);
+#if CONFIG_VP9_HIGHBITDEPTH
+HbdSubpelFilterCoeffs vp10_hbd_get_subpel_filter_ver_signal_dir(
+ const InterpFilterParams p, int index);
+#endif
#ifdef __cplusplus
} // extern "C"
diff --git a/vp10/common/vp10_convolve.c b/vp10/common/vp10_convolve.c
index 2026df1..3332508 100644
--- a/vp10/common/vp10_convolve.c
+++ b/vp10/common/vp10_convolve.c
@@ -182,7 +182,7 @@
}
#if CONFIG_VP9_HIGHBITDEPTH
-static void highbd_convolve_horiz(const uint16_t *src, int src_stride,
+void vp10_highbd_convolve_horiz_c(const uint16_t *src, int src_stride,
uint16_t *dst, int dst_stride, int w, int h,
const InterpFilterParams filter_params,
const int subpel_x_q4, int x_step_q4, int avg,
@@ -213,7 +213,7 @@
}
}
-static void highbd_convolve_vert(const uint16_t *src, int src_stride,
+void vp10_highbd_convolve_vert_c(const uint16_t *src, int src_stride,
uint16_t *dst, int dst_stride, int w, int h,
const InterpFilterParams filter_params,
const int subpel_y_q4, int y_step_q4, int avg,
@@ -300,8 +300,9 @@
InterpFilterParams filter_params =
vp10_get_interp_filter_params(interp_filter);
#endif
- highbd_convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
- subpel_x_q4, x_step_q4, ref_idx, bd);
+ vp10_highbd_convolve_horiz(src, src_stride, dst, dst_stride, w, h,
+ filter_params, subpel_x_q4, x_step_q4, ref_idx,
+ bd);
} else if (ignore_horiz) {
#if CONFIG_DUAL_FILTER
InterpFilterParams filter_params =
@@ -310,8 +311,9 @@
InterpFilterParams filter_params =
vp10_get_interp_filter_params(interp_filter);
#endif
- highbd_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
- subpel_y_q4, y_step_q4, ref_idx, bd);
+ vp10_highbd_convolve_vert(src, src_stride, dst, dst_stride, w, h,
+ filter_params, subpel_y_q4, y_step_q4, ref_idx,
+ bd);
} else {
// temp's size is set to (maximum possible intermediate_height) *
// MAX_BLOCK_WIDTH
@@ -336,9 +338,10 @@
int intermediate_height =
(((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
- highbd_convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride,
- temp, temp_stride, w, intermediate_height,
- filter_params, subpel_x_q4, x_step_q4, 0, bd);
+ vp10_highbd_convolve_horiz(src - src_stride * (filter_size / 2 - 1),
+ src_stride, temp, temp_stride, w,
+ intermediate_height, filter_params, subpel_x_q4,
+ x_step_q4, 0, bd);
#if CONFIG_DUAL_FILTER
filter_params = filter_params_y;
@@ -346,9 +349,9 @@
filter_size = filter_params.taps;
assert(filter_params.taps <= MAX_FILTER_TAP);
- highbd_convolve_vert(temp + temp_stride * (filter_size / 2 - 1),
- temp_stride, dst, dst_stride, w, h, filter_params,
- subpel_y_q4, y_step_q4, ref_idx, bd);
+ vp10_highbd_convolve_vert(temp + temp_stride * (filter_size / 2 - 1),
+ temp_stride, dst, dst_stride, w, h, filter_params,
+ subpel_y_q4, y_step_q4, ref_idx, bd);
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/vp10/common/vp10_rtcd_defs.pl b/vp10/common/vp10_rtcd_defs.pl
index 369933d..9865d63 100644
--- a/vp10/common/vp10_rtcd_defs.pl
+++ b/vp10/common/vp10_rtcd_defs.pl
@@ -93,6 +93,13 @@
add_proto qw/void vp10_convolve_vert/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg";
specialize qw/vp10_convolve_vert ssse3/;
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ add_proto qw/void vp10_highbd_convolve_horiz/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd";
+ specialize qw/vp10_highbd_convolve_horiz sse4_1/;
+ add_proto qw/void vp10_highbd_convolve_vert/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd";
+ specialize qw/vp10_highbd_convolve_vert sse4_1/;
+}
+
#
# dct
#
diff --git a/vp10/common/x86/vp10_convolve_filters_ssse3.c b/vp10/common/x86/vp10_convolve_filters_ssse3.c
index 410da89..2f7b3c7 100644
--- a/vp10/common/x86/vp10_convolve_filters_ssse3.c
+++ b/vp10/common/x86/vp10_convolve_filters_ssse3.c
@@ -7,626 +7,936 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
-
#include "./vpx_config.h"
#include "vp10/common/filter.h"
-// Note:
-// Filter coefficients are from "filter.c". We use,
-// sub_pel_filters_temporalfilter_12[],
-// sub_pel_filters_12sharp[],
-// sub_pel_filters_10sharp[].
-
-// (2-1) Parallel filtering along the intended signal direction
-
-// 12-tap filter padding:
-// {filter_coefficients, 0, 0, 0, 0},
-// {0, 0, filter_coefficients, 0, 0),
-#if USE_TEMPORALFILTER_12TAP
+#if CONFIG_EXT_INTERP
DECLARE_ALIGNED(16, const int8_t,
- sub_pel_filters_temporalfilter_12_signal_dir[15][2][16]) = {
+ sub_pel_filters_10sharp_signal_dir[15][2][16]) = {
{
- {0, 1, -1, 3, -7, 127, 8, -4, 2, -1, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 1, -1, 3, -7, 127, 8, -4, 2, -1, 0, 0, 0, 0},
+ { 0, 0, -1, 3, -6, 127, 8, -4,
+ 2, -1, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 0, -1, 3, -6, 127,
+ 8, -4, 2, -1, 0, 0, 0, 0, },
},
{
- {0, 1, -3, 5, -12, 124, 18, -8, 4, -2, 1, 0, 0, 0, 0, 0},
- {0, 0, 0, 1, -3, 5, -12, 124, 18, -8, 4, -2, 1, 0, 0, 0},
+ { 0, 1, -2, 5, -12, 124, 18, -7,
+ 3, -2, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 1, -2, 5, -12, 124,
+ 18, -7, 3, -2, 0, 0, 0, 0, },
},
{
- {-1, 2, -4, 8, -17, 120, 28, -11, 6, -3, 1, -1, 0, 0, 0, 0},
- {0, 0, -1, 2, -4, 8, -17, 120, 28, -11, 6, -3, 1, -1, 0, 0},
+ { 0, 1, -3, 7, -17, 119, 28, -11,
+ 5, -2, 1, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 1, -3, 7, -17, 119,
+ 28, -11, 5, -2, 1, 0, 0, 0, },
},
{
- {-1, 2, -4, 10, -21, 114, 38, -15, 8, -4, 2, -1, 0, 0, 0, 0},
- {0, 0, -1, 2, -4, 10, -21, 114, 38, -15, 8, -4, 2, -1, 0, 0},
+ { 0, 1, -4, 8, -20, 114, 38, -14,
+ 7, -3, 1, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 1, -4, 8, -20, 114,
+ 38, -14, 7, -3, 1, 0, 0, 0, },
},
{
- {-1, 3, -5, 11, -23, 107, 49, -18, 9, -5, 2, -1, 0, 0, 0, 0},
- {0, 0, -1, 3, -5, 11, -23, 107, 49, -18, 9, -5, 2, -1, 0, 0},
+ { 0, 1, -4, 9, -22, 107, 49, -17,
+ 8, -4, 1, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 1, -4, 9, -22, 107,
+ 49, -17, 8, -4, 1, 0, 0, 0, },
},
{
- {-1, 3, -6, 12, -25, 99, 60, -21, 11, -6, 3, -1, 0, 0, 0, 0},
- {0, 0, -1, 3, -6, 12, -25, 99, 60, -21, 11, -6, 3, -1, 0, 0},
+ { 0, 2, -5, 10, -24, 99, 59, -20,
+ 9, -4, 2, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 2, -5, 10, -24, 99,
+ 59, -20, 9, -4, 2, 0, 0, 0, },
},
{
- {-1, 3, -6, 12, -25, 90, 70, -23, 12, -6, 3, -1, 0, 0, 0, 0},
- {0, 0, -1, 3, -6, 12, -25, 90, 70, -23, 12, -6, 3, -1, 0, 0},
+ { 0, 2, -5, 10, -24, 90, 70, -22,
+ 10, -5, 2, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 2, -5, 10, -24, 90,
+ 70, -22, 10, -5, 2, 0, 0, 0, },
},
{
- {-1, 3, -6, 12, -24, 80, 80, -24, 12, -6, 3, -1, 0, 0, 0, 0},
- {0, 0, -1, 3, -6, 12, -24, 80, 80, -24, 12, -6, 3, -1, 0, 0},
+ { 0, 2, -5, 10, -23, 80, 80, -23,
+ 10, -5, 2, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 2, -5, 10, -23, 80,
+ 80, -23, 10, -5, 2, 0, 0, 0, },
},
{
- {-1, 3, -6, 12, -23, 70, 90, -25, 12, -6, 3, -1, 0, 0, 0, 0},
- {0, 0, -1, 3, -6, 12, -23, 70, 90, -25, 12, -6, 3, -1, 0, 0},
+ { 0, 2, -5, 10, -22, 70, 90, -24,
+ 10, -5, 2, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 2, -5, 10, -22, 70,
+ 90, -24, 10, -5, 2, 0, 0, 0, },
},
{
- {-1, 3, -6, 11, -21, 60, 99, -25, 12, -6, 3, -1, 0, 0, 0, 0},
- {0, 0, -1, 3, -6, 11, -21, 60, 99, -25, 12, -6, 3, -1, 0, 0},
+ { 0, 2, -4, 9, -20, 59, 99, -24,
+ 10, -5, 2, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 2, -4, 9, -20, 59,
+ 99, -24, 10, -5, 2, 0, 0, 0, },
},
{
- {-1, 2, -5, 9, -18, 49, 107, -23, 11, -5, 3, -1, 0, 0, 0, 0},
- {0, 0, -1, 2, -5, 9, -18, 49, 107, -23, 11, -5, 3, -1, 0, 0},
+ { 0, 1, -4, 8, -17, 49, 107, -22,
+ 9, -4, 1, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 1, -4, 8, -17, 49,
+ 107, -22, 9, -4, 1, 0, 0, 0, },
},
{
- {-1, 2, -4, 8, -15, 38, 114, -21, 10, -4, 2, -1, 0, 0, 0, 0},
- {0, 0, -1, 2, -4, 8, -15, 38, 114, -21, 10, -4, 2, -1, 0, 0},
+ { 0, 1, -3, 7, -14, 38, 114, -20,
+ 8, -4, 1, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 1, -3, 7, -14, 38,
+ 114, -20, 8, -4, 1, 0, 0, 0, },
},
{
- {-1, 1, -3, 6, -11, 28, 120, -17, 8, -4, 2, -1, 0, 0, 0, 0},
- {0, 0, -1, 1, -3, 6, -11, 28, 120, -17, 8, -4, 2, -1, 0, 0},
+ { 0, 1, -2, 5, -11, 28, 119, -17,
+ 7, -3, 1, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 1, -2, 5, -11, 28,
+ 119, -17, 7, -3, 1, 0, 0, 0, },
},
{
- {0, 1, -2, 4, -8, 18, 124, -12, 5, -3, 1, 0, 0, 0, 0, 0},
- {0, 0, 0, 1, -2, 4, -8, 18, 124, -12, 5, -3, 1, 0, 0, 0},
+ { 0, 0, -2, 3, -7, 18, 124, -12,
+ 5, -2, 1, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 0, -2, 3, -7, 18,
+ 124, -12, 5, -2, 1, 0, 0, 0, },
},
{
- {0, 0, -1, 2, -4, 8, 127, -7, 3, -1, 1, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, -1, 2, -4, 8, 127, -7, 3, -1, 1, 0, 0, 0},
+ { 0, 0, -1, 2, -4, 8, 127, -6,
+ 3, -1, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 0, -1, 2, -4, 8,
+ 127, -6, 3, -1, 0, 0, 0, 0, },
},
};
-#endif // USE_TEMPORALFILTER_12TAP
-
+#endif
+#if CONFIG_EXT_INTERP
+DECLARE_ALIGNED(16, const int8_t,
+ sub_pel_filters_10sharp_ver_signal_dir[15][6][16]) = {
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, },
+ { -1, 3, -1, 3, -1, 3, -1, 3,
+ -1, 3, -1, 3, -1, 3, -1, 3, },
+ { -6, 127, -6, 127, -6, 127, -6, 127,
+ -6, 127, -6, 127, -6, 127, -6, 127, },
+ { 8, -4, 8, -4, 8, -4, 8, -4,
+ 8, -4, 8, -4, 8, -4, 8, -4, },
+ { 2, -1, 2, -1, 2, -1, 2, -1,
+ 2, -1, 2, -1, 2, -1, 2, -1, },
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, },
+ { -2, 5, -2, 5, -2, 5, -2, 5,
+ -2, 5, -2, 5, -2, 5, -2, 5, },
+ {-12, 124, -12, 124, -12, 124, -12, 124,
+ -12, 124, -12, 124, -12, 124, -12, 124, },
+ { 18, -7, 18, -7, 18, -7, 18, -7,
+ 18, -7, 18, -7, 18, -7, 18, -7, },
+ { 3, -2, 3, -2, 3, -2, 3, -2,
+ 3, -2, 3, -2, 3, -2, 3, -2, },
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, },
+ { -3, 7, -3, 7, -3, 7, -3, 7,
+ -3, 7, -3, 7, -3, 7, -3, 7, },
+ {-17, 119, -17, 119, -17, 119, -17, 119,
+ -17, 119, -17, 119, -17, 119, -17, 119, },
+ { 28, -11, 28, -11, 28, -11, 28, -11,
+ 28, -11, 28, -11, 28, -11, 28, -11, },
+ { 5, -2, 5, -2, 5, -2, 5, -2,
+ 5, -2, 5, -2, 5, -2, 5, -2, },
+ { 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, },
+ { -4, 8, -4, 8, -4, 8, -4, 8,
+ -4, 8, -4, 8, -4, 8, -4, 8, },
+ {-20, 114, -20, 114, -20, 114, -20, 114,
+ -20, 114, -20, 114, -20, 114, -20, 114, },
+ { 38, -14, 38, -14, 38, -14, 38, -14,
+ 38, -14, 38, -14, 38, -14, 38, -14, },
+ { 7, -3, 7, -3, 7, -3, 7, -3,
+ 7, -3, 7, -3, 7, -3, 7, -3, },
+ { 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, },
+ { -4, 9, -4, 9, -4, 9, -4, 9,
+ -4, 9, -4, 9, -4, 9, -4, 9, },
+ {-22, 107, -22, 107, -22, 107, -22, 107,
+ -22, 107, -22, 107, -22, 107, -22, 107, },
+ { 49, -17, 49, -17, 49, -17, 49, -17,
+ 49, -17, 49, -17, 49, -17, 49, -17, },
+ { 8, -4, 8, -4, 8, -4, 8, -4,
+ 8, -4, 8, -4, 8, -4, 8, -4, },
+ { 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, },
+ },
+ {
+ { 0, 2, 0, 2, 0, 2, 0, 2,
+ 0, 2, 0, 2, 0, 2, 0, 2, },
+ { -5, 10, -5, 10, -5, 10, -5, 10,
+ -5, 10, -5, 10, -5, 10, -5, 10, },
+ {-24, 99, -24, 99, -24, 99, -24, 99,
+ -24, 99, -24, 99, -24, 99, -24, 99, },
+ { 59, -20, 59, -20, 59, -20, 59, -20,
+ 59, -20, 59, -20, 59, -20, 59, -20, },
+ { 9, -4, 9, -4, 9, -4, 9, -4,
+ 9, -4, 9, -4, 9, -4, 9, -4, },
+ { 2, 0, 2, 0, 2, 0, 2, 0,
+ 2, 0, 2, 0, 2, 0, 2, 0, },
+ },
+ {
+ { 0, 2, 0, 2, 0, 2, 0, 2,
+ 0, 2, 0, 2, 0, 2, 0, 2, },
+ { -5, 10, -5, 10, -5, 10, -5, 10,
+ -5, 10, -5, 10, -5, 10, -5, 10, },
+ {-24, 90, -24, 90, -24, 90, -24, 90,
+ -24, 90, -24, 90, -24, 90, -24, 90, },
+ { 70, -22, 70, -22, 70, -22, 70, -22,
+ 70, -22, 70, -22, 70, -22, 70, -22, },
+ { 10, -5, 10, -5, 10, -5, 10, -5,
+ 10, -5, 10, -5, 10, -5, 10, -5, },
+ { 2, 0, 2, 0, 2, 0, 2, 0,
+ 2, 0, 2, 0, 2, 0, 2, 0, },
+ },
+ {
+ { 0, 2, 0, 2, 0, 2, 0, 2,
+ 0, 2, 0, 2, 0, 2, 0, 2, },
+ { -5, 10, -5, 10, -5, 10, -5, 10,
+ -5, 10, -5, 10, -5, 10, -5, 10, },
+ {-23, 80, -23, 80, -23, 80, -23, 80,
+ -23, 80, -23, 80, -23, 80, -23, 80, },
+ { 80, -23, 80, -23, 80, -23, 80, -23,
+ 80, -23, 80, -23, 80, -23, 80, -23, },
+ { 10, -5, 10, -5, 10, -5, 10, -5,
+ 10, -5, 10, -5, 10, -5, 10, -5, },
+ { 2, 0, 2, 0, 2, 0, 2, 0,
+ 2, 0, 2, 0, 2, 0, 2, 0, },
+ },
+ {
+ { 0, 2, 0, 2, 0, 2, 0, 2,
+ 0, 2, 0, 2, 0, 2, 0, 2, },
+ { -5, 10, -5, 10, -5, 10, -5, 10,
+ -5, 10, -5, 10, -5, 10, -5, 10, },
+ {-22, 70, -22, 70, -22, 70, -22, 70,
+ -22, 70, -22, 70, -22, 70, -22, 70, },
+ { 90, -24, 90, -24, 90, -24, 90, -24,
+ 90, -24, 90, -24, 90, -24, 90, -24, },
+ { 10, -5, 10, -5, 10, -5, 10, -5,
+ 10, -5, 10, -5, 10, -5, 10, -5, },
+ { 2, 0, 2, 0, 2, 0, 2, 0,
+ 2, 0, 2, 0, 2, 0, 2, 0, },
+ },
+ {
+ { 0, 2, 0, 2, 0, 2, 0, 2,
+ 0, 2, 0, 2, 0, 2, 0, 2, },
+ { -4, 9, -4, 9, -4, 9, -4, 9,
+ -4, 9, -4, 9, -4, 9, -4, 9, },
+ {-20, 59, -20, 59, -20, 59, -20, 59,
+ -20, 59, -20, 59, -20, 59, -20, 59, },
+ { 99, -24, 99, -24, 99, -24, 99, -24,
+ 99, -24, 99, -24, 99, -24, 99, -24, },
+ { 10, -5, 10, -5, 10, -5, 10, -5,
+ 10, -5, 10, -5, 10, -5, 10, -5, },
+ { 2, 0, 2, 0, 2, 0, 2, 0,
+ 2, 0, 2, 0, 2, 0, 2, 0, },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, },
+ { -4, 8, -4, 8, -4, 8, -4, 8,
+ -4, 8, -4, 8, -4, 8, -4, 8, },
+ {-17, 49, -17, 49, -17, 49, -17, 49,
+ -17, 49, -17, 49, -17, 49, -17, 49, },
+ {107, -22, 107, -22, 107, -22, 107, -22,
+ 107, -22, 107, -22, 107, -22, 107, -22, },
+ { 9, -4, 9, -4, 9, -4, 9, -4,
+ 9, -4, 9, -4, 9, -4, 9, -4, },
+ { 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, },
+ { -3, 7, -3, 7, -3, 7, -3, 7,
+ -3, 7, -3, 7, -3, 7, -3, 7, },
+ {-14, 38, -14, 38, -14, 38, -14, 38,
+ -14, 38, -14, 38, -14, 38, -14, 38, },
+ {114, -20, 114, -20, 114, -20, 114, -20,
+ 114, -20, 114, -20, 114, -20, 114, -20, },
+ { 8, -4, 8, -4, 8, -4, 8, -4,
+ 8, -4, 8, -4, 8, -4, 8, -4, },
+ { 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, },
+ { -2, 5, -2, 5, -2, 5, -2, 5,
+ -2, 5, -2, 5, -2, 5, -2, 5, },
+ {-11, 28, -11, 28, -11, 28, -11, 28,
+ -11, 28, -11, 28, -11, 28, -11, 28, },
+ {119, -17, 119, -17, 119, -17, 119, -17,
+ 119, -17, 119, -17, 119, -17, 119, -17, },
+ { 7, -3, 7, -3, 7, -3, 7, -3,
+ 7, -3, 7, -3, 7, -3, 7, -3, },
+ { 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, },
+ },
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, },
+ { -2, 3, -2, 3, -2, 3, -2, 3,
+ -2, 3, -2, 3, -2, 3, -2, 3, },
+ { -7, 18, -7, 18, -7, 18, -7, 18,
+ -7, 18, -7, 18, -7, 18, -7, 18, },
+ {124, -12, 124, -12, 124, -12, 124, -12,
+ 124, -12, 124, -12, 124, -12, 124, -12, },
+ { 5, -2, 5, -2, 5, -2, 5, -2,
+ 5, -2, 5, -2, 5, -2, 5, -2, },
+ { 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, },
+ },
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, },
+ { -1, 2, -1, 2, -1, 2, -1, 2,
+ -1, 2, -1, 2, -1, 2, -1, 2, },
+ { -4, 8, -4, 8, -4, 8, -4, 8,
+ -4, 8, -4, 8, -4, 8, -4, 8, },
+ {127, -6, 127, -6, 127, -6, 127, -6,
+ 127, -6, 127, -6, 127, -6, 127, -6, },
+ { 3, -1, 3, -1, 3, -1, 3, -1,
+ 3, -1, 3, -1, 3, -1, 3, -1, },
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, },
+ },
+};
+#endif
#if CONFIG_EXT_INTERP
DECLARE_ALIGNED(16, const int8_t,
sub_pel_filters_12sharp_signal_dir[15][2][16]) = {
{
- {0, 1, -2, 3, -7, 127, 8, -4, 2, -1, 1, 0, 0, 0, 0, 0},
- {0, 0, 0, 1, -2, 3, -7, 127, 8, -4, 2, -1, 1, 0, 0, 0},
+ { 0, 1, -2, 3, -7, 127, 8, -4,
+ 2, -1, 1, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 1, -2, 3, -7, 127,
+ 8, -4, 2, -1, 1, 0, 0, 0, },
},
{
- {-1, 2, -3, 6, -13, 124, 18, -8, 4, -2, 2, -1, 0, 0, 0, 0},
- {0, 0, -1, 2, -3, 6, -13, 124, 18, -8, 4, -2, 2, -1, 0, 0},
+ { -1, 2, -3, 6, -13, 124, 18, -8,
+ 4, -2, 2, -1, 0, 0, 0, 0, },
+ { 0, 0, -1, 2, -3, 6, -13, 124,
+ 18, -8, 4, -2, 2, -1, 0, 0, },
},
{
- {-1, 3, -4, 8, -18, 120, 28, -12, 7, -4, 2, -1, 0, 0, 0, 0},
- {0, 0, -1, 3, -4, 8, -18, 120, 28, -12, 7, -4, 2, -1, 0, 0},
+ { -1, 3, -4, 8, -18, 120, 28, -12,
+ 7, -4, 2, -1, 0, 0, 0, 0, },
+ { 0, 0, -1, 3, -4, 8, -18, 120,
+ 28, -12, 7, -4, 2, -1, 0, 0, },
},
{
- {-1, 3, -6, 10, -21, 115, 38, -15, 8, -5, 3, -1, 0, 0, 0, 0},
- {0, 0, -1, 3, -6, 10, -21, 115, 38, -15, 8, -5, 3, -1, 0, 0},
+ { -1, 3, -6, 10, -21, 115, 38, -15,
+ 8, -5, 3, -1, 0, 0, 0, 0, },
+ { 0, 0, -1, 3, -6, 10, -21, 115,
+ 38, -15, 8, -5, 3, -1, 0, 0, },
},
{
- {-2, 4, -6, 12, -24, 108, 49, -18, 10, -6, 3, -2, 0, 0, 0, 0},
- {0, 0, -2, 4, -6, 12, -24, 108, 49, -18, 10, -6, 3, -2, 0, 0},
+ { -2, 4, -6, 12, -24, 108, 49, -18,
+ 10, -6, 3, -2, 0, 0, 0, 0, },
+ { 0, 0, -2, 4, -6, 12, -24, 108,
+ 49, -18, 10, -6, 3, -2, 0, 0, },
},
{
- {-2, 4, -7, 13, -25, 100, 60, -21, 11, -7, 4, -2, 0, 0, 0, 0},
- {0, 0, -2, 4, -7, 13, -25, 100, 60, -21, 11, -7, 4, -2, 0, 0},
+ { -2, 4, -7, 13, -25, 100, 60, -21,
+ 11, -7, 4, -2, 0, 0, 0, 0, },
+ { 0, 0, -2, 4, -7, 13, -25, 100,
+ 60, -21, 11, -7, 4, -2, 0, 0, },
},
{
- {-2, 4, -7, 13, -26, 91, 71, -24, 13, -7, 4, -2, 0, 0, 0, 0},
- {0, 0, -2, 4, -7, 13, -26, 91, 71, -24, 13, -7, 4, -2, 0, 0},
+ { -2, 4, -7, 13, -26, 91, 71, -24,
+ 13, -7, 4, -2, 0, 0, 0, 0, },
+ { 0, 0, -2, 4, -7, 13, -26, 91,
+ 71, -24, 13, -7, 4, -2, 0, 0, },
},
{
- {-2, 4, -7, 13, -25, 81, 81, -25, 13, -7, 4, -2, 0, 0, 0, 0},
- {0, 0, -2, 4, -7, 13, -25, 81, 81, -25, 13, -7, 4, -2, 0, 0},
+ { -2, 4, -7, 13, -25, 81, 81, -25,
+ 13, -7, 4, -2, 0, 0, 0, 0, },
+ { 0, 0, -2, 4, -7, 13, -25, 81,
+ 81, -25, 13, -7, 4, -2, 0, 0, },
},
{
- {-2, 4, -7, 13, -24, 71, 91, -26, 13, -7, 4, -2, 0, 0, 0, 0},
- {0, 0, -2, 4, -7, 13, -24, 71, 91, -26, 13, -7, 4, -2, 0, 0},
+ { -2, 4, -7, 13, -24, 71, 91, -26,
+ 13, -7, 4, -2, 0, 0, 0, 0, },
+ { 0, 0, -2, 4, -7, 13, -24, 71,
+ 91, -26, 13, -7, 4, -2, 0, 0, },
},
{
- {-2, 4, -7, 11, -21, 60, 100, -25, 13, -7, 4, -2, 0, 0, 0, 0},
- {0, 0, -2, 4, -7, 11, -21, 60, 100, -25, 13, -7, 4, -2, 0, 0},
+ { -2, 4, -7, 11, -21, 60, 100, -25,
+ 13, -7, 4, -2, 0, 0, 0, 0, },
+ { 0, 0, -2, 4, -7, 11, -21, 60,
+ 100, -25, 13, -7, 4, -2, 0, 0, },
},
{
- {-2, 3, -6, 10, -18, 49, 108, -24, 12, -6, 4, -2, 0, 0, 0, 0},
- {0, 0, -2, 3, -6, 10, -18, 49, 108, -24, 12, -6, 4, -2, 0, 0},
+ { -2, 3, -6, 10, -18, 49, 108, -24,
+ 12, -6, 4, -2, 0, 0, 0, 0, },
+ { 0, 0, -2, 3, -6, 10, -18, 49,
+ 108, -24, 12, -6, 4, -2, 0, 0, },
},
{
- {-1, 3, -5, 8, -15, 38, 115, -21, 10, -6, 3, -1, 0, 0, 0, 0},
- {0, 0, -1, 3, -5, 8, -15, 38, 115, -21, 10, -6, 3, -1, 0, 0},
+ { -1, 3, -5, 8, -15, 38, 115, -21,
+ 10, -6, 3, -1, 0, 0, 0, 0, },
+ { 0, 0, -1, 3, -5, 8, -15, 38,
+ 115, -21, 10, -6, 3, -1, 0, 0, },
},
{
- {-1, 2, -4, 7, -12, 28, 120, -18, 8, -4, 3, -1, 0, 0, 0, 0},
- {0, 0, -1, 2, -4, 7, -12, 28, 120, -18, 8, -4, 3, -1, 0, 0},
+ { -1, 2, -4, 7, -12, 28, 120, -18,
+ 8, -4, 3, -1, 0, 0, 0, 0, },
+ { 0, 0, -1, 2, -4, 7, -12, 28,
+ 120, -18, 8, -4, 3, -1, 0, 0, },
},
{
- {-1, 2, -2, 4, -8, 18, 124, -13, 6, -3, 2, -1, 0, 0, 0, 0},
- {0, 0, -1, 2, -2, 4, -8, 18, 124, -13, 6, -3, 2, -1, 0, 0},
+ { -1, 2, -2, 4, -8, 18, 124, -13,
+ 6, -3, 2, -1, 0, 0, 0, 0, },
+ { 0, 0, -1, 2, -2, 4, -8, 18,
+ 124, -13, 6, -3, 2, -1, 0, 0, },
},
{
- {0, 1, -1, 2, -4, 8, 127, -7, 3, -2, 1, 0, 0, 0, 0, 0},
- {0, 0, 0, 1, -1, 2, -4, 8, 127, -7, 3, -2, 1, 0, 0, 0},
+ { 0, 1, -1, 2, -4, 8, 127, -7,
+ 3, -2, 1, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 1, -1, 2, -4, 8,
+ 127, -7, 3, -2, 1, 0, 0, 0, },
},
};
-
-// 10-tap filter padding:
-// {0, filter_coefficients, 0, 0, 0, 0, 0},
-// {0, 0, 0, filter_coefficients, 0, 0, 0),
-DECLARE_ALIGNED(16, const int8_t,
- sub_pel_filters_10sharp_signal_dir[15][2][16]) = {
- {
- {0, 0, -1, 3, -6, 127, 8, -4, 2, -1, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, -1, 3, -6, 127, 8, -4, 2, -1, 0, 0, 0, 0},
- },
- {
- {0, 1, -2, 5, -12, 124, 18, -7, 3, -2, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 1, -2, 5, -12, 124, 18, -7, 3, -2, 0, 0, 0, 0},
- },
- {
- {0, 1, -3, 7, -17, 119, 28, -11, 5, -2, 1, 0, 0, 0, 0, 0},
- {0, 0, 0, 1, -3, 7, -17, 119, 28, -11, 5, -2, 1, 0, 0, 0},
- },
- {
- {0, 1, -4, 8, -20, 114, 38, -14, 7, -3, 1, 0, 0, 0, 0, 0},
- {0, 0, 0, 1, -4, 8, -20, 114, 38, -14, 7, -3, 1, 0, 0, 0},
- },
- {
- {0, 1, -4, 9, -22, 107, 49, -17, 8, -4, 1, 0, 0, 0, 0, 0},
- {0, 0, 0, 1, -4, 9, -22, 107, 49, -17, 8, -4, 1, 0, 0, 0},
- },
- {
- {0, 2, -5, 10, -24, 99, 59, -20, 9, -4, 2, 0, 0, 0, 0, 0},
- {0, 0, 0, 2, -5, 10, -24, 99, 59, -20, 9, -4, 2, 0, 0, 0},
- },
- {
- {0, 2, -5, 10, -24, 90, 70, -22, 10, -5, 2, 0, 0, 0, 0, 0},
- {0, 0, 0, 2, -5, 10, -24, 90, 70, -22, 10, -5, 2, 0, 0, 0},
- },
- {
- {0, 2, -5, 10, -23, 80, 80, -23, 10, -5, 2, 0, 0, 0, 0, 0},
- {0, 0, 0, 2, -5, 10, -23, 80, 80, -23, 10, -5, 2, 0, 0, 0},
- },
- {
- {0, 2, -5, 10, -22, 70, 90, -24, 10, -5, 2, 0, 0, 0, 0, 0},
- {0, 0, 0, 2, -5, 10, -22, 70, 90, -24, 10, -5, 2, 0, 0, 0},
- },
- {
- {0, 2, -4, 9, -20, 59, 99, -24, 10, -5, 2, 0, 0, 0, 0, 0},
- {0, 0, 0, 2, -4, 9, -20, 59, 99, -24, 10, -5, 2, 0, 0, 0},
- },
- {
- {0, 1, -4, 8, -17, 49, 107, -22, 9, -4, 1, 0, 0, 0, 0, 0},
- {0, 0, 0, 1, -4, 8, -17, 49, 107, -22, 9, -4, 1, 0, 0, 0},
- },
- {
- {0, 1, -3, 7, -14, 38, 114, -20, 8, -4, 1, 0, 0, 0, 0, 0},
- {0, 0, 0, 1, -3, 7, -14, 38, 114, -20, 8, -4, 1, 0, 0, 0},
- },
- {
- {0, 1, -2, 5, -11, 28, 119, -17, 7, -3, 1, 0, 0, 0, 0, 0},
- {0, 0, 0, 1, -2, 5, -11, 28, 119, -17, 7, -3, 1, 0, 0, 0},
- },
- {
- {0, 0, -2, 3, -7, 18, 124, -12, 5, -2, 1, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, -2, 3, -7, 18, 124, -12, 5, -2, 1, 0, 0, 0},
- },
- {
- {0, 0, -1, 2, -4, 8, 127, -6, 3, -1, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, -1, 2, -4, 8, 127, -6, 3, -1, 0, 0, 0, 0},
- },
-};
-#endif // CONFIG_EXT_INTERP
-
-// (2-2) Parallel filtering vertically to signal direction
-#if USE_TEMPORALFILTER_12TAP
-DECLARE_ALIGNED(16, const int8_t,
- sub_pel_filters_temporalfilter_12_ver_signal_dir[15][6][16]) = {
- {
- {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
- {-1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3},
- {-7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127,
- -7, 127, -7, 127},
- {8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4},
- {2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- },
- {
- {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
- {-3, 5, -3, 5, -3, 5, -3, 5, -3, 5, -3, 5, -3, 5, -3, 5},
- {-12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124,
- -12, 124, -12, 124},
- {18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8},
- {4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2},
- {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
- },
- {
- {-1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2},
- {-4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8},
- {-17, 120, -17, 120, -17, 120, -17, 120, -17, 120, -17, 120,
- -17, 120, -17, 120},
- {28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11},
- {6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3},
- {1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1},
- },
- {
- {-1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2},
- {-4, 10, -4, 10, -4, 10, -4, 10, -4, 10, -4, 10, -4, 10, -4, 10},
- {-21, 114, -21, 114, -21, 114, -21, 114, -21, 114, -21, 114,
- -21, 114, -21, 114},
- {38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15,
- 38, -15, 38, -15},
- {8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4},
- {2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1},
- },
- {
- {-1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3},
- {-5, 11, -5, 11, -5, 11, -5, 11, -5, 11, -5, 11, -5, 11, -5, 11},
- {-23, 107, -23, 107, -23, 107, -23, 107, -23, 107, -23, 107,
- -23, 107, -23, 107},
- {49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18},
- {9, -5, 9, -5, 9, -5, 9, -5, 9, -5, 9, -5, 9, -5, 9, -5},
- {2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1},
- },
- {
- {-1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3},
- {-6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12},
- {-25, 99, -25, 99, -25, 99, -25, 99, -25, 99, -25, 99, -25, 99, -25, 99},
- {60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21},
- {11, -6, 11, -6, 11, -6, 11, -6, 11, -6, 11, -6, 11, -6, 11, -6},
- {3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1},
- },
- {
- {-1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3},
- {-6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12},
- {-25, 90, -25, 90, -25, 90, -25, 90, -25, 90, -25, 90, -25, 90, -25, 90},
- {70, -23, 70, -23, 70, -23, 70, -23, 70, -23, 70, -23, 70, -23, 70, -23},
- {12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6},
- {3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1},
- },
- {
- {-1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3},
- {-6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12},
- {-24, 80, -24, 80, -24, 80, -24, 80, -24, 80, -24, 80, -24, 80, -24, 80},
- {80, -24, 80, -24, 80, -24, 80, -24, 80, -24, 80, -24, 80, -24, 80, -24},
- {12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6},
- {3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1},
- },
- {
- {-1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3},
- {-6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12},
- {-23, 70, -23, 70, -23, 70, -23, 70, -23, 70, -23, 70, -23, 70, -23, 70},
- {90, -25, 90, -25, 90, -25, 90, -25, 90, -25, 90, -25, 90, -25, 90, -25},
- {12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6},
- {3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1},
- },
- {
- {-1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3},
- {-6, 11, -6, 11, -6, 11, -6, 11, -6, 11, -6, 11, -6, 11, -6, 11},
- {-21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60},
- {99, -25, 99, -25, 99, -25, 99, -25, 99, -25, 99, -25, 99, -25, 99, -25},
- {12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6},
- {3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1},
- },
- {
- {-1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2},
- {-5, 9, -5, 9, -5, 9, -5, 9, -5, 9, -5, 9, -5, 9, -5, 9},
- {-18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49},
- {107, -23, 107, -23, 107, -23, 107, -23, 107, -23, 107, -23, 107,
- -23, 107, -23},
- {11, -5, 11, -5, 11, -5, 11, -5, 11, -5, 11, -5, 11, -5, 11, -5},
- {3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1},
- },
- {
- {-1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2},
- {-4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8},
- {-15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38},
- {114, -21, 114, -21, 114, -21, 114, -21, 114, -21, 114, -21,
- 114, -21, 114, -21},
- {10, -4, 10, -4, 10, -4, 10, -4, 10, -4, 10, -4, 10, -4, 10, -4},
- {2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1},
- },
- {
- {-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1},
- {-3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6},
- {-11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28},
- {120, -17, 120, -17, 120, -17, 120, -17, 120, -17, 120, -17,
- 120, -17, 120, -17},
- {8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4},
- {2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1},
- },
- {
- {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
- {-2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4},
- {-8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18},
- {124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12,
- 124, -12, 124, -12},
- {5, -3, 5, -3, 5, -3, 5, -3, 5, -3, 5, -3, 5, -3, 5, -3},
- {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
- },
- {
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {-1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2},
- {-4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8},
- {127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7,
- 127, -7, 127, -7},
- {3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1},
- {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
- },
-};
-#endif // USE_TEMPORALFILTER_12TAP
-
+#endif
#if CONFIG_EXT_INTERP
DECLARE_ALIGNED(16, const int8_t,
sub_pel_filters_12sharp_ver_signal_dir[15][6][16]) = {
{
- {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
- {-2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3},
- {-7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127,
- -7, 127, -7, 127},
- {8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4},
- {2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1},
- {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
+ { 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, },
+ { -2, 3, -2, 3, -2, 3, -2, 3,
+ -2, 3, -2, 3, -2, 3, -2, 3, },
+ { -7, 127, -7, 127, -7, 127, -7, 127,
+ -7, 127, -7, 127, -7, 127, -7, 127, },
+ { 8, -4, 8, -4, 8, -4, 8, -4,
+ 8, -4, 8, -4, 8, -4, 8, -4, },
+ { 2, -1, 2, -1, 2, -1, 2, -1,
+ 2, -1, 2, -1, 2, -1, 2, -1, },
+ { 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, },
},
{
- {-1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2},
- {-3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6},
- {-13, 124, -13, 124, -13, 124, -13, 124, -13, 124, -13, 124,
- -13, 124, -13, 124},
- {18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8},
- {4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2},
- {2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1},
+ { -1, 2, -1, 2, -1, 2, -1, 2,
+ -1, 2, -1, 2, -1, 2, -1, 2, },
+ { -3, 6, -3, 6, -3, 6, -3, 6,
+ -3, 6, -3, 6, -3, 6, -3, 6, },
+ {-13, 124, -13, 124, -13, 124, -13, 124,
+ -13, 124, -13, 124, -13, 124, -13, 124, },
+ { 18, -8, 18, -8, 18, -8, 18, -8,
+ 18, -8, 18, -8, 18, -8, 18, -8, },
+ { 4, -2, 4, -2, 4, -2, 4, -2,
+ 4, -2, 4, -2, 4, -2, 4, -2, },
+ { 2, -1, 2, -1, 2, -1, 2, -1,
+ 2, -1, 2, -1, 2, -1, 2, -1, },
},
{
- {-1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3},
- {-4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8},
- {-18, 120, -18, 120, -18, 120, -18, 120, -18, 120, -18, 120,
- -18, 120, -18, 120},
- {28, -12, 28, -12, 28, -12, 28, -12, 28, -12, 28, -12, 28, -12, 28, -12},
- {7, -4, 7, -4, 7, -4, 7, -4, 7, -4, 7, -4, 7, -4, 7, -4},
- {2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1},
+ { -1, 3, -1, 3, -1, 3, -1, 3,
+ -1, 3, -1, 3, -1, 3, -1, 3, },
+ { -4, 8, -4, 8, -4, 8, -4, 8,
+ -4, 8, -4, 8, -4, 8, -4, 8, },
+ {-18, 120, -18, 120, -18, 120, -18, 120,
+ -18, 120, -18, 120, -18, 120, -18, 120, },
+ { 28, -12, 28, -12, 28, -12, 28, -12,
+ 28, -12, 28, -12, 28, -12, 28, -12, },
+ { 7, -4, 7, -4, 7, -4, 7, -4,
+ 7, -4, 7, -4, 7, -4, 7, -4, },
+ { 2, -1, 2, -1, 2, -1, 2, -1,
+ 2, -1, 2, -1, 2, -1, 2, -1, },
},
{
- {-1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3},
- {-6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10},
- {-21, 115, -21, 115, -21, 115, -21, 115, -21, 115, -21, 115,
- -21, 115, -21, 115},
- {38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15,
- 38, -15, 38, -15},
- {8, -5, 8, -5, 8, -5, 8, -5, 8, -5, 8, -5, 8, -5, 8, -5},
- {3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1},
+ { -1, 3, -1, 3, -1, 3, -1, 3,
+ -1, 3, -1, 3, -1, 3, -1, 3, },
+ { -6, 10, -6, 10, -6, 10, -6, 10,
+ -6, 10, -6, 10, -6, 10, -6, 10, },
+ {-21, 115, -21, 115, -21, 115, -21, 115,
+ -21, 115, -21, 115, -21, 115, -21, 115, },
+ { 38, -15, 38, -15, 38, -15, 38, -15,
+ 38, -15, 38, -15, 38, -15, 38, -15, },
+ { 8, -5, 8, -5, 8, -5, 8, -5,
+ 8, -5, 8, -5, 8, -5, 8, -5, },
+ { 3, -1, 3, -1, 3, -1, 3, -1,
+ 3, -1, 3, -1, 3, -1, 3, -1, },
},
{
- {-2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4},
- {-6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12},
- {-24, 108, -24, 108, -24, 108, -24, 108, -24, 108, -24, 108,
- -24, 108, -24, 108},
- {49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18},
- {10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6},
- {3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2},
+ { -2, 4, -2, 4, -2, 4, -2, 4,
+ -2, 4, -2, 4, -2, 4, -2, 4, },
+ { -6, 12, -6, 12, -6, 12, -6, 12,
+ -6, 12, -6, 12, -6, 12, -6, 12, },
+ {-24, 108, -24, 108, -24, 108, -24, 108,
+ -24, 108, -24, 108, -24, 108, -24, 108, },
+ { 49, -18, 49, -18, 49, -18, 49, -18,
+ 49, -18, 49, -18, 49, -18, 49, -18, },
+ { 10, -6, 10, -6, 10, -6, 10, -6,
+ 10, -6, 10, -6, 10, -6, 10, -6, },
+ { 3, -2, 3, -2, 3, -2, 3, -2,
+ 3, -2, 3, -2, 3, -2, 3, -2, },
},
{
- {-2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4},
- {-7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13},
- {-25, 100, -25, 100, -25, 100, -25, 100, -25, 100, -25, 100,
- -25, 100, -25, 100},
- {60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21},
- {11, -7, 11, -7, 11, -7, 11, -7, 11, -7, 11, -7, 11, -7, 11, -7},
- {4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2},
+ { -2, 4, -2, 4, -2, 4, -2, 4,
+ -2, 4, -2, 4, -2, 4, -2, 4, },
+ { -7, 13, -7, 13, -7, 13, -7, 13,
+ -7, 13, -7, 13, -7, 13, -7, 13, },
+ {-25, 100, -25, 100, -25, 100, -25, 100,
+ -25, 100, -25, 100, -25, 100, -25, 100, },
+ { 60, -21, 60, -21, 60, -21, 60, -21,
+ 60, -21, 60, -21, 60, -21, 60, -21, },
+ { 11, -7, 11, -7, 11, -7, 11, -7,
+ 11, -7, 11, -7, 11, -7, 11, -7, },
+ { 4, -2, 4, -2, 4, -2, 4, -2,
+ 4, -2, 4, -2, 4, -2, 4, -2, },
},
{
- {-2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4},
- {-7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13},
- {-26, 91, -26, 91, -26, 91, -26, 91, -26, 91, -26, 91, -26, 91, -26, 91},
- {71, -24, 71, -24, 71, -24, 71, -24, 71, -24, 71, -24, 71, -24, 71, -24},
- {13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7},
- {4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2},
+ { -2, 4, -2, 4, -2, 4, -2, 4,
+ -2, 4, -2, 4, -2, 4, -2, 4, },
+ { -7, 13, -7, 13, -7, 13, -7, 13,
+ -7, 13, -7, 13, -7, 13, -7, 13, },
+ {-26, 91, -26, 91, -26, 91, -26, 91,
+ -26, 91, -26, 91, -26, 91, -26, 91, },
+ { 71, -24, 71, -24, 71, -24, 71, -24,
+ 71, -24, 71, -24, 71, -24, 71, -24, },
+ { 13, -7, 13, -7, 13, -7, 13, -7,
+ 13, -7, 13, -7, 13, -7, 13, -7, },
+ { 4, -2, 4, -2, 4, -2, 4, -2,
+ 4, -2, 4, -2, 4, -2, 4, -2, },
},
{
- {-2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4},
- {-7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13},
- {-25, 81, -25, 81, -25, 81, -25, 81, -25, 81, -25, 81, -25, 81, -25, 81},
- {81, -25, 81, -25, 81, -25, 81, -25, 81, -25, 81, -25, 81, -25, 81, -25},
- {13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7},
- {4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2},
+ { -2, 4, -2, 4, -2, 4, -2, 4,
+ -2, 4, -2, 4, -2, 4, -2, 4, },
+ { -7, 13, -7, 13, -7, 13, -7, 13,
+ -7, 13, -7, 13, -7, 13, -7, 13, },
+ {-25, 81, -25, 81, -25, 81, -25, 81,
+ -25, 81, -25, 81, -25, 81, -25, 81, },
+ { 81, -25, 81, -25, 81, -25, 81, -25,
+ 81, -25, 81, -25, 81, -25, 81, -25, },
+ { 13, -7, 13, -7, 13, -7, 13, -7,
+ 13, -7, 13, -7, 13, -7, 13, -7, },
+ { 4, -2, 4, -2, 4, -2, 4, -2,
+ 4, -2, 4, -2, 4, -2, 4, -2, },
},
{
- {-2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4},
- {-7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13},
- {-24, 71, -24, 71, -24, 71, -24, 71, -24, 71, -24, 71, -24, 71, -24, 71},
- {91, -26, 91, -26, 91, -26, 91, -26, 91, -26, 91, -26, 91, -26, 91, -26},
- {13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7},
- {4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2},
+ { -2, 4, -2, 4, -2, 4, -2, 4,
+ -2, 4, -2, 4, -2, 4, -2, 4, },
+ { -7, 13, -7, 13, -7, 13, -7, 13,
+ -7, 13, -7, 13, -7, 13, -7, 13, },
+ {-24, 71, -24, 71, -24, 71, -24, 71,
+ -24, 71, -24, 71, -24, 71, -24, 71, },
+ { 91, -26, 91, -26, 91, -26, 91, -26,
+ 91, -26, 91, -26, 91, -26, 91, -26, },
+ { 13, -7, 13, -7, 13, -7, 13, -7,
+ 13, -7, 13, -7, 13, -7, 13, -7, },
+ { 4, -2, 4, -2, 4, -2, 4, -2,
+ 4, -2, 4, -2, 4, -2, 4, -2, },
},
{
- {-2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4},
- {-7, 11, -7, 11, -7, 11, -7, 11, -7, 11, -7, 11, -7, 11, -7, 11},
- {-21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60},
- {100, -25, 100, -25, 100, -25, 100, -25, 100, -25, 100, -25,
- 100, -25, 100, -25},
- {13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7},
- {4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2},
+ { -2, 4, -2, 4, -2, 4, -2, 4,
+ -2, 4, -2, 4, -2, 4, -2, 4, },
+ { -7, 11, -7, 11, -7, 11, -7, 11,
+ -7, 11, -7, 11, -7, 11, -7, 11, },
+ {-21, 60, -21, 60, -21, 60, -21, 60,
+ -21, 60, -21, 60, -21, 60, -21, 60, },
+ {100, -25, 100, -25, 100, -25, 100, -25,
+ 100, -25, 100, -25, 100, -25, 100, -25, },
+ { 13, -7, 13, -7, 13, -7, 13, -7,
+ 13, -7, 13, -7, 13, -7, 13, -7, },
+ { 4, -2, 4, -2, 4, -2, 4, -2,
+ 4, -2, 4, -2, 4, -2, 4, -2, },
},
{
- {-2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3},
- {-6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10},
- {-18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49},
- {108, -24, 108, -24, 108, -24, 108, -24, 108, -24, 108, -24,
- 108, -24, 108, -24},
- {12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6},
- {4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2},
+ { -2, 3, -2, 3, -2, 3, -2, 3,
+ -2, 3, -2, 3, -2, 3, -2, 3, },
+ { -6, 10, -6, 10, -6, 10, -6, 10,
+ -6, 10, -6, 10, -6, 10, -6, 10, },
+ {-18, 49, -18, 49, -18, 49, -18, 49,
+ -18, 49, -18, 49, -18, 49, -18, 49, },
+ {108, -24, 108, -24, 108, -24, 108, -24,
+ 108, -24, 108, -24, 108, -24, 108, -24, },
+ { 12, -6, 12, -6, 12, -6, 12, -6,
+ 12, -6, 12, -6, 12, -6, 12, -6, },
+ { 4, -2, 4, -2, 4, -2, 4, -2,
+ 4, -2, 4, -2, 4, -2, 4, -2, },
},
{
- {-1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3},
- {-5, 8, -5, 8, -5, 8, -5, 8, -5, 8, -5, 8, -5, 8, -5, 8},
- {-15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38},
- {115, -21, 115, -21, 115, -21, 115, -21, 115, -21, 115, -21,
- 115, -21, 115, -21},
- {10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6},
- {3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1},
+ { -1, 3, -1, 3, -1, 3, -1, 3,
+ -1, 3, -1, 3, -1, 3, -1, 3, },
+ { -5, 8, -5, 8, -5, 8, -5, 8,
+ -5, 8, -5, 8, -5, 8, -5, 8, },
+ {-15, 38, -15, 38, -15, 38, -15, 38,
+ -15, 38, -15, 38, -15, 38, -15, 38, },
+ {115, -21, 115, -21, 115, -21, 115, -21,
+ 115, -21, 115, -21, 115, -21, 115, -21, },
+ { 10, -6, 10, -6, 10, -6, 10, -6,
+ 10, -6, 10, -6, 10, -6, 10, -6, },
+ { 3, -1, 3, -1, 3, -1, 3, -1,
+ 3, -1, 3, -1, 3, -1, 3, -1, },
},
{
- {-1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2},
- {-4, 7, -4, 7, -4, 7, -4, 7, -4, 7, -4, 7, -4, 7, -4, 7},
- {-12, 28, -12, 28, -12, 28, -12, 28, -12, 28, -12, 28, -12, 28, -12, 28},
- {120, -18, 120, -18, 120, -18, 120, -18, 120, -18, 120, -18,
- 120, -18, 120, -18},
- {8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4},
- {3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1},
+ { -1, 2, -1, 2, -1, 2, -1, 2,
+ -1, 2, -1, 2, -1, 2, -1, 2, },
+ { -4, 7, -4, 7, -4, 7, -4, 7,
+ -4, 7, -4, 7, -4, 7, -4, 7, },
+ {-12, 28, -12, 28, -12, 28, -12, 28,
+ -12, 28, -12, 28, -12, 28, -12, 28, },
+ {120, -18, 120, -18, 120, -18, 120, -18,
+ 120, -18, 120, -18, 120, -18, 120, -18, },
+ { 8, -4, 8, -4, 8, -4, 8, -4,
+ 8, -4, 8, -4, 8, -4, 8, -4, },
+ { 3, -1, 3, -1, 3, -1, 3, -1,
+ 3, -1, 3, -1, 3, -1, 3, -1, },
},
{
- {-1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2},
- {-2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4},
- {-8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18},
- {124, -13, 124, -13, 124, -13, 124, -13, 124, -13, 124, -13,
- 124, -13, 124, -13},
- {6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3},
- {2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1},
+ { -1, 2, -1, 2, -1, 2, -1, 2,
+ -1, 2, -1, 2, -1, 2, -1, 2, },
+ { -2, 4, -2, 4, -2, 4, -2, 4,
+ -2, 4, -2, 4, -2, 4, -2, 4, },
+ { -8, 18, -8, 18, -8, 18, -8, 18,
+ -8, 18, -8, 18, -8, 18, -8, 18, },
+ {124, -13, 124, -13, 124, -13, 124, -13,
+ 124, -13, 124, -13, 124, -13, 124, -13, },
+ { 6, -3, 6, -3, 6, -3, 6, -3,
+ 6, -3, 6, -3, 6, -3, 6, -3, },
+ { 2, -1, 2, -1, 2, -1, 2, -1,
+ 2, -1, 2, -1, 2, -1, 2, -1, },
},
{
- {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
- {-1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2},
- {-4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8},
- {127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7,
- 127, -7, 127, -7},
- {3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2},
- {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
+ { 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, },
+ { -1, 2, -1, 2, -1, 2, -1, 2,
+ -1, 2, -1, 2, -1, 2, -1, 2, },
+ { -4, 8, -4, 8, -4, 8, -4, 8,
+ -4, 8, -4, 8, -4, 8, -4, 8, },
+ {127, -7, 127, -7, 127, -7, 127, -7,
+ 127, -7, 127, -7, 127, -7, 127, -7, },
+ { 3, -2, 3, -2, 3, -2, 3, -2,
+ 3, -2, 3, -2, 3, -2, 3, -2, },
+ { 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, },
},
};
-
+#endif
+#if USE_TEMPORALFILTER_12TAP
DECLARE_ALIGNED(16, const int8_t,
- sub_pel_filters_10sharp_ver_signal_dir[15][6][16]) = {
+ sub_pel_filters_temporalfilter_12_signal_dir[15][2][16]) = {
{
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {-1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3},
- {-6, 127, -6, 127, -6, 127, -6, 127, -6, 127, -6, 127, -6, 127, -6, 127},
- {8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4},
- {2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 1, -1, 3, -7, 127, 8, -4,
+ 2, -1, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 1, -1, 3, -7, 127,
+ 8, -4, 2, -1, 0, 0, 0, 0, },
},
{
- {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
- {-2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5},
- {-12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124,
- -12, 124, -12, 124},
- {18, -7, 18, -7, 18, -7, 18, -7, 18, -7, 18, -7, 18, -7, 18, -7},
- {3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 1, -3, 5, -12, 124, 18, -8,
+ 4, -2, 1, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 1, -3, 5, -12, 124,
+ 18, -8, 4, -2, 1, 0, 0, 0, },
},
{
- {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
- {-3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7},
- {-17, 119, -17, 119, -17, 119, -17, 119, -17, 119, -17, 119,
- -17, 119, -17, 119},
- {28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11},
- {5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2},
- {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
+ { -1, 2, -4, 8, -17, 120, 28, -11,
+ 6, -3, 1, -1, 0, 0, 0, 0, },
+ { 0, 0, -1, 2, -4, 8, -17, 120,
+ 28, -11, 6, -3, 1, -1, 0, 0, },
},
{
- {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
- {-4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8},
- {-20, 114, -20, 114, -20, 114, -20, 114, -20, 114, -20, 114,
- -20, 114, -20, 114},
- {38, -14, 38, -14, 38, -14, 38, -14, 38, -14, 38, -14, 38, -14, 38, -14},
- {7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3},
- {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
+ { -1, 2, -4, 10, -21, 114, 38, -15,
+ 8, -4, 2, -1, 0, 0, 0, 0, },
+ { 0, 0, -1, 2, -4, 10, -21, 114,
+ 38, -15, 8, -4, 2, -1, 0, 0, },
},
{
- {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
- {-4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9},
- {-22, 107, -22, 107, -22, 107, -22, 107, -22, 107, -22, 107,
- -22, 107, -22, 107},
- {49, -17, 49, -17, 49, -17, 49, -17, 49, -17, 49, -17, 49, -17, 49, -17},
- {8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4},
- {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
+ { -1, 3, -5, 11, -23, 107, 49, -18,
+ 9, -5, 2, -1, 0, 0, 0, 0, },
+ { 0, 0, -1, 3, -5, 11, -23, 107,
+ 49, -18, 9, -5, 2, -1, 0, 0, },
},
{
- {0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2},
- {-5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10},
- {-24, 99, -24, 99, -24, 99, -24, 99, -24, 99, -24, 99, -24, 99, -24, 99},
- {59, -20, 59, -20, 59, -20, 59, -20, 59, -20, 59, -20, 59, -20, 59, -20},
- {9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4},
- {2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0},
+ { -1, 3, -6, 12, -25, 99, 60, -21,
+ 11, -6, 3, -1, 0, 0, 0, 0, },
+ { 0, 0, -1, 3, -6, 12, -25, 99,
+ 60, -21, 11, -6, 3, -1, 0, 0, },
},
{
- {0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2},
- {-5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10},
- {-24, 90, -24, 90, -24, 90, -24, 90, -24, 90, -24, 90, -24, 90, -24, 90},
- {70, -22, 70, -22, 70, -22, 70, -22, 70, -22, 70, -22, 70, -22, 70, -22},
- {10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5},
- {2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0},
+ { -1, 3, -6, 12, -25, 90, 70, -23,
+ 12, -6, 3, -1, 0, 0, 0, 0, },
+ { 0, 0, -1, 3, -6, 12, -25, 90,
+ 70, -23, 12, -6, 3, -1, 0, 0, },
},
{
- {0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2},
- {-5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10},
- {-23, 80, -23, 80, -23, 80, -23, 80, -23, 80, -23, 80, -23, 80, -23, 80},
- {80, -23, 80, -23, 80, -23, 80, -23, 80, -23, 80, -23, 80, -23, 80, -23},
- {10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5},
- {2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0},
+ { -1, 3, -6, 12, -24, 80, 80, -24,
+ 12, -6, 3, -1, 0, 0, 0, 0, },
+ { 0, 0, -1, 3, -6, 12, -24, 80,
+ 80, -24, 12, -6, 3, -1, 0, 0, },
},
{
- {0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2},
- {-5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10},
- {-22, 70, -22, 70, -22, 70, -22, 70, -22, 70, -22, 70, -22, 70, -22, 70},
- {90, -24, 90, -24, 90, -24, 90, -24, 90, -24, 90, -24, 90, -24, 90, -24},
- {10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5},
- {2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0},
+ { -1, 3, -6, 12, -23, 70, 90, -25,
+ 12, -6, 3, -1, 0, 0, 0, 0, },
+ { 0, 0, -1, 3, -6, 12, -23, 70,
+ 90, -25, 12, -6, 3, -1, 0, 0, },
},
{
- {0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2},
- {-4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9},
- {-20, 59, -20, 59, -20, 59, -20, 59, -20, 59, -20, 59, -20, 59, -20, 59},
- {99, -24, 99, -24, 99, -24, 99, -24, 99, -24, 99, -24, 99, -24, 99, -24},
- {10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5},
- {2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0},
+ { -1, 3, -6, 11, -21, 60, 99, -25,
+ 12, -6, 3, -1, 0, 0, 0, 0, },
+ { 0, 0, -1, 3, -6, 11, -21, 60,
+ 99, -25, 12, -6, 3, -1, 0, 0, },
},
{
- {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
- {-4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8},
- {-17, 49, -17, 49, -17, 49, -17, 49, -17, 49, -17, 49, -17, 49, -17, 49},
- {107, -22, 107, -22, 107, -22, 107, -22, 107, -22, 107, -22,
- 107, -22, 107, -22},
- {9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4},
- {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
+ { -1, 2, -5, 9, -18, 49, 107, -23,
+ 11, -5, 3, -1, 0, 0, 0, 0, },
+ { 0, 0, -1, 2, -5, 9, -18, 49,
+ 107, -23, 11, -5, 3, -1, 0, 0, },
},
{
- {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
- {-3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7},
- {-14, 38, -14, 38, -14, 38, -14, 38, -14, 38, -14, 38, -14, 38, -14, 38},
- {114, -20, 114, -20, 114, -20, 114, -20, 114, -20, 114, -20,
- 114, -20, 114, -20},
- {8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4},
- {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
+ { -1, 2, -4, 8, -15, 38, 114, -21,
+ 10, -4, 2, -1, 0, 0, 0, 0, },
+ { 0, 0, -1, 2, -4, 8, -15, 38,
+ 114, -21, 10, -4, 2, -1, 0, 0, },
},
{
- {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
- {-2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5},
- {-11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28},
- {119, -17, 119, -17, 119, -17, 119, -17, 119, -17, 119, -17,
- 119, -17, 119, -17},
- {7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3},
- {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
+ { -1, 1, -3, 6, -11, 28, 120, -17,
+ 8, -4, 2, -1, 0, 0, 0, 0, },
+ { 0, 0, -1, 1, -3, 6, -11, 28,
+ 120, -17, 8, -4, 2, -1, 0, 0, },
},
{
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {-2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3},
- {-7, 18, -7, 18, -7, 18, -7, 18, -7, 18, -7, 18, -7, 18, -7, 18},
- {124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12,
- 124, -12, 124, -12},
- {5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2},
- {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
+ { 0, 1, -2, 4, -8, 18, 124, -12,
+ 5, -3, 1, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 1, -2, 4, -8, 18,
+ 124, -12, 5, -3, 1, 0, 0, 0, },
},
{
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {-1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2},
- {-4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8},
- {127, -6, 127, -6, 127, -6, 127, -6, 127, -6, 127, -6,
- 127, -6, 127, -6},
- {3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 0, -1, 2, -4, 8, 127, -7,
+ 3, -1, 1, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 0, -1, 2, -4, 8,
+ 127, -7, 3, -1, 1, 0, 0, 0, },
},
};
-#endif // CONFIG_EXT_INTERP
+#endif
+#if USE_TEMPORALFILTER_12TAP
+DECLARE_ALIGNED(16, const int8_t,
+ sub_pel_filters_temporalfilter_12_ver_signal_dir[15][6][16]) = {
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, },
+ { -1, 3, -1, 3, -1, 3, -1, 3,
+ -1, 3, -1, 3, -1, 3, -1, 3, },
+ { -7, 127, -7, 127, -7, 127, -7, 127,
+ -7, 127, -7, 127, -7, 127, -7, 127, },
+ { 8, -4, 8, -4, 8, -4, 8, -4,
+ 8, -4, 8, -4, 8, -4, 8, -4, },
+ { 2, -1, 2, -1, 2, -1, 2, -1,
+ 2, -1, 2, -1, 2, -1, 2, -1, },
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, },
+ { -3, 5, -3, 5, -3, 5, -3, 5,
+ -3, 5, -3, 5, -3, 5, -3, 5, },
+ {-12, 124, -12, 124, -12, 124, -12, 124,
+ -12, 124, -12, 124, -12, 124, -12, 124, },
+ { 18, -8, 18, -8, 18, -8, 18, -8,
+ 18, -8, 18, -8, 18, -8, 18, -8, },
+ { 4, -2, 4, -2, 4, -2, 4, -2,
+ 4, -2, 4, -2, 4, -2, 4, -2, },
+ { 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, },
+ },
+ {
+ { -1, 2, -1, 2, -1, 2, -1, 2,
+ -1, 2, -1, 2, -1, 2, -1, 2, },
+ { -4, 8, -4, 8, -4, 8, -4, 8,
+ -4, 8, -4, 8, -4, 8, -4, 8, },
+ {-17, 120, -17, 120, -17, 120, -17, 120,
+ -17, 120, -17, 120, -17, 120, -17, 120, },
+ { 28, -11, 28, -11, 28, -11, 28, -11,
+ 28, -11, 28, -11, 28, -11, 28, -11, },
+ { 6, -3, 6, -3, 6, -3, 6, -3,
+ 6, -3, 6, -3, 6, -3, 6, -3, },
+ { 1, -1, 1, -1, 1, -1, 1, -1,
+ 1, -1, 1, -1, 1, -1, 1, -1, },
+ },
+ {
+ { -1, 2, -1, 2, -1, 2, -1, 2,
+ -1, 2, -1, 2, -1, 2, -1, 2, },
+ { -4, 10, -4, 10, -4, 10, -4, 10,
+ -4, 10, -4, 10, -4, 10, -4, 10, },
+ {-21, 114, -21, 114, -21, 114, -21, 114,
+ -21, 114, -21, 114, -21, 114, -21, 114, },
+ { 38, -15, 38, -15, 38, -15, 38, -15,
+ 38, -15, 38, -15, 38, -15, 38, -15, },
+ { 8, -4, 8, -4, 8, -4, 8, -4,
+ 8, -4, 8, -4, 8, -4, 8, -4, },
+ { 2, -1, 2, -1, 2, -1, 2, -1,
+ 2, -1, 2, -1, 2, -1, 2, -1, },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3,
+ -1, 3, -1, 3, -1, 3, -1, 3, },
+ { -5, 11, -5, 11, -5, 11, -5, 11,
+ -5, 11, -5, 11, -5, 11, -5, 11, },
+ {-23, 107, -23, 107, -23, 107, -23, 107,
+ -23, 107, -23, 107, -23, 107, -23, 107, },
+ { 49, -18, 49, -18, 49, -18, 49, -18,
+ 49, -18, 49, -18, 49, -18, 49, -18, },
+ { 9, -5, 9, -5, 9, -5, 9, -5,
+ 9, -5, 9, -5, 9, -5, 9, -5, },
+ { 2, -1, 2, -1, 2, -1, 2, -1,
+ 2, -1, 2, -1, 2, -1, 2, -1, },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3,
+ -1, 3, -1, 3, -1, 3, -1, 3, },
+ { -6, 12, -6, 12, -6, 12, -6, 12,
+ -6, 12, -6, 12, -6, 12, -6, 12, },
+ {-25, 99, -25, 99, -25, 99, -25, 99,
+ -25, 99, -25, 99, -25, 99, -25, 99, },
+ { 60, -21, 60, -21, 60, -21, 60, -21,
+ 60, -21, 60, -21, 60, -21, 60, -21, },
+ { 11, -6, 11, -6, 11, -6, 11, -6,
+ 11, -6, 11, -6, 11, -6, 11, -6, },
+ { 3, -1, 3, -1, 3, -1, 3, -1,
+ 3, -1, 3, -1, 3, -1, 3, -1, },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3,
+ -1, 3, -1, 3, -1, 3, -1, 3, },
+ { -6, 12, -6, 12, -6, 12, -6, 12,
+ -6, 12, -6, 12, -6, 12, -6, 12, },
+ {-25, 90, -25, 90, -25, 90, -25, 90,
+ -25, 90, -25, 90, -25, 90, -25, 90, },
+ { 70, -23, 70, -23, 70, -23, 70, -23,
+ 70, -23, 70, -23, 70, -23, 70, -23, },
+ { 12, -6, 12, -6, 12, -6, 12, -6,
+ 12, -6, 12, -6, 12, -6, 12, -6, },
+ { 3, -1, 3, -1, 3, -1, 3, -1,
+ 3, -1, 3, -1, 3, -1, 3, -1, },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3,
+ -1, 3, -1, 3, -1, 3, -1, 3, },
+ { -6, 12, -6, 12, -6, 12, -6, 12,
+ -6, 12, -6, 12, -6, 12, -6, 12, },
+ {-24, 80, -24, 80, -24, 80, -24, 80,
+ -24, 80, -24, 80, -24, 80, -24, 80, },
+ { 80, -24, 80, -24, 80, -24, 80, -24,
+ 80, -24, 80, -24, 80, -24, 80, -24, },
+ { 12, -6, 12, -6, 12, -6, 12, -6,
+ 12, -6, 12, -6, 12, -6, 12, -6, },
+ { 3, -1, 3, -1, 3, -1, 3, -1,
+ 3, -1, 3, -1, 3, -1, 3, -1, },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3,
+ -1, 3, -1, 3, -1, 3, -1, 3, },
+ { -6, 12, -6, 12, -6, 12, -6, 12,
+ -6, 12, -6, 12, -6, 12, -6, 12, },
+ {-23, 70, -23, 70, -23, 70, -23, 70,
+ -23, 70, -23, 70, -23, 70, -23, 70, },
+ { 90, -25, 90, -25, 90, -25, 90, -25,
+ 90, -25, 90, -25, 90, -25, 90, -25, },
+ { 12, -6, 12, -6, 12, -6, 12, -6,
+ 12, -6, 12, -6, 12, -6, 12, -6, },
+ { 3, -1, 3, -1, 3, -1, 3, -1,
+ 3, -1, 3, -1, 3, -1, 3, -1, },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3,
+ -1, 3, -1, 3, -1, 3, -1, 3, },
+ { -6, 11, -6, 11, -6, 11, -6, 11,
+ -6, 11, -6, 11, -6, 11, -6, 11, },
+ {-21, 60, -21, 60, -21, 60, -21, 60,
+ -21, 60, -21, 60, -21, 60, -21, 60, },
+ { 99, -25, 99, -25, 99, -25, 99, -25,
+ 99, -25, 99, -25, 99, -25, 99, -25, },
+ { 12, -6, 12, -6, 12, -6, 12, -6,
+ 12, -6, 12, -6, 12, -6, 12, -6, },
+ { 3, -1, 3, -1, 3, -1, 3, -1,
+ 3, -1, 3, -1, 3, -1, 3, -1, },
+ },
+ {
+ { -1, 2, -1, 2, -1, 2, -1, 2,
+ -1, 2, -1, 2, -1, 2, -1, 2, },
+ { -5, 9, -5, 9, -5, 9, -5, 9,
+ -5, 9, -5, 9, -5, 9, -5, 9, },
+ {-18, 49, -18, 49, -18, 49, -18, 49,
+ -18, 49, -18, 49, -18, 49, -18, 49, },
+ {107, -23, 107, -23, 107, -23, 107, -23,
+ 107, -23, 107, -23, 107, -23, 107, -23, },
+ { 11, -5, 11, -5, 11, -5, 11, -5,
+ 11, -5, 11, -5, 11, -5, 11, -5, },
+ { 3, -1, 3, -1, 3, -1, 3, -1,
+ 3, -1, 3, -1, 3, -1, 3, -1, },
+ },
+ {
+ { -1, 2, -1, 2, -1, 2, -1, 2,
+ -1, 2, -1, 2, -1, 2, -1, 2, },
+ { -4, 8, -4, 8, -4, 8, -4, 8,
+ -4, 8, -4, 8, -4, 8, -4, 8, },
+ {-15, 38, -15, 38, -15, 38, -15, 38,
+ -15, 38, -15, 38, -15, 38, -15, 38, },
+ {114, -21, 114, -21, 114, -21, 114, -21,
+ 114, -21, 114, -21, 114, -21, 114, -21, },
+ { 10, -4, 10, -4, 10, -4, 10, -4,
+ 10, -4, 10, -4, 10, -4, 10, -4, },
+ { 2, -1, 2, -1, 2, -1, 2, -1,
+ 2, -1, 2, -1, 2, -1, 2, -1, },
+ },
+ {
+ { -1, 1, -1, 1, -1, 1, -1, 1,
+ -1, 1, -1, 1, -1, 1, -1, 1, },
+ { -3, 6, -3, 6, -3, 6, -3, 6,
+ -3, 6, -3, 6, -3, 6, -3, 6, },
+ {-11, 28, -11, 28, -11, 28, -11, 28,
+ -11, 28, -11, 28, -11, 28, -11, 28, },
+ {120, -17, 120, -17, 120, -17, 120, -17,
+ 120, -17, 120, -17, 120, -17, 120, -17, },
+ { 8, -4, 8, -4, 8, -4, 8, -4,
+ 8, -4, 8, -4, 8, -4, 8, -4, },
+ { 2, -1, 2, -1, 2, -1, 2, -1,
+ 2, -1, 2, -1, 2, -1, 2, -1, },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, },
+ { -2, 4, -2, 4, -2, 4, -2, 4,
+ -2, 4, -2, 4, -2, 4, -2, 4, },
+ { -8, 18, -8, 18, -8, 18, -8, 18,
+ -8, 18, -8, 18, -8, 18, -8, 18, },
+ {124, -12, 124, -12, 124, -12, 124, -12,
+ 124, -12, 124, -12, 124, -12, 124, -12, },
+ { 5, -3, 5, -3, 5, -3, 5, -3,
+ 5, -3, 5, -3, 5, -3, 5, -3, },
+ { 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, },
+ },
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, },
+ { -1, 2, -1, 2, -1, 2, -1, 2,
+ -1, 2, -1, 2, -1, 2, -1, 2, },
+ { -4, 8, -4, 8, -4, 8, -4, 8,
+ -4, 8, -4, 8, -4, 8, -4, 8, },
+ {127, -7, 127, -7, 127, -7, 127, -7,
+ 127, -7, 127, -7, 127, -7, 127, -7, },
+ { 3, -1, 3, -1, 3, -1, 3, -1,
+ 3, -1, 3, -1, 3, -1, 3, -1, },
+ { 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, },
+ },
+};
+#endif
diff --git a/vp10/common/x86/vp10_highbd_convolve_filters_sse4.c b/vp10/common/x86/vp10_highbd_convolve_filters_sse4.c
new file mode 100644
index 0000000..0251022
--- /dev/null
+++ b/vp10/common/x86/vp10_highbd_convolve_filters_sse4.c
@@ -0,0 +1,393 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "./vpx_config.h"
+#include "vp10/common/filter.h"
+
+#if CONFIG_VP9_HIGHBITDEPTH
+#if CONFIG_EXT_INTERP
+DECLARE_ALIGNED(16, const int16_t,
+ sub_pel_filters_10sharp_highbd_ver_signal_dir[15][6][8]) = {
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, },
+ { -1, 3, -1, 3, -1, 3, -1, 3, },
+ { -6, 127, -6, 127, -6, 127, -6, 127, },
+ { 8, -4, 8, -4, 8, -4, 8, -4, },
+ { 2, -1, 2, -1, 2, -1, 2, -1, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1, },
+ { -2, 5, -2, 5, -2, 5, -2, 5, },
+ {-12, 124, -12, 124, -12, 124, -12, 124, },
+ { 18, -7, 18, -7, 18, -7, 18, -7, },
+ { 3, -2, 3, -2, 3, -2, 3, -2, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1, },
+ { -3, 7, -3, 7, -3, 7, -3, 7, },
+ {-17, 119, -17, 119, -17, 119, -17, 119, },
+ { 28, -11, 28, -11, 28, -11, 28, -11, },
+ { 5, -2, 5, -2, 5, -2, 5, -2, },
+ { 1, 0, 1, 0, 1, 0, 1, 0, },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1, },
+ { -4, 8, -4, 8, -4, 8, -4, 8, },
+ {-20, 114, -20, 114, -20, 114, -20, 114, },
+ { 38, -14, 38, -14, 38, -14, 38, -14, },
+ { 7, -3, 7, -3, 7, -3, 7, -3, },
+ { 1, 0, 1, 0, 1, 0, 1, 0, },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1, },
+ { -4, 9, -4, 9, -4, 9, -4, 9, },
+ {-22, 107, -22, 107, -22, 107, -22, 107, },
+ { 49, -17, 49, -17, 49, -17, 49, -17, },
+ { 8, -4, 8, -4, 8, -4, 8, -4, },
+ { 1, 0, 1, 0, 1, 0, 1, 0, },
+ },
+ {
+ { 0, 2, 0, 2, 0, 2, 0, 2, },
+ { -5, 10, -5, 10, -5, 10, -5, 10, },
+ {-24, 99, -24, 99, -24, 99, -24, 99, },
+ { 59, -20, 59, -20, 59, -20, 59, -20, },
+ { 9, -4, 9, -4, 9, -4, 9, -4, },
+ { 2, 0, 2, 0, 2, 0, 2, 0, },
+ },
+ {
+ { 0, 2, 0, 2, 0, 2, 0, 2, },
+ { -5, 10, -5, 10, -5, 10, -5, 10, },
+ {-24, 90, -24, 90, -24, 90, -24, 90, },
+ { 70, -22, 70, -22, 70, -22, 70, -22, },
+ { 10, -5, 10, -5, 10, -5, 10, -5, },
+ { 2, 0, 2, 0, 2, 0, 2, 0, },
+ },
+ {
+ { 0, 2, 0, 2, 0, 2, 0, 2, },
+ { -5, 10, -5, 10, -5, 10, -5, 10, },
+ {-23, 80, -23, 80, -23, 80, -23, 80, },
+ { 80, -23, 80, -23, 80, -23, 80, -23, },
+ { 10, -5, 10, -5, 10, -5, 10, -5, },
+ { 2, 0, 2, 0, 2, 0, 2, 0, },
+ },
+ {
+ { 0, 2, 0, 2, 0, 2, 0, 2, },
+ { -5, 10, -5, 10, -5, 10, -5, 10, },
+ {-22, 70, -22, 70, -22, 70, -22, 70, },
+ { 90, -24, 90, -24, 90, -24, 90, -24, },
+ { 10, -5, 10, -5, 10, -5, 10, -5, },
+ { 2, 0, 2, 0, 2, 0, 2, 0, },
+ },
+ {
+ { 0, 2, 0, 2, 0, 2, 0, 2, },
+ { -4, 9, -4, 9, -4, 9, -4, 9, },
+ {-20, 59, -20, 59, -20, 59, -20, 59, },
+ { 99, -24, 99, -24, 99, -24, 99, -24, },
+ { 10, -5, 10, -5, 10, -5, 10, -5, },
+ { 2, 0, 2, 0, 2, 0, 2, 0, },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1, },
+ { -4, 8, -4, 8, -4, 8, -4, 8, },
+ {-17, 49, -17, 49, -17, 49, -17, 49, },
+ {107, -22, 107, -22, 107, -22, 107, -22, },
+ { 9, -4, 9, -4, 9, -4, 9, -4, },
+ { 1, 0, 1, 0, 1, 0, 1, 0, },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1, },
+ { -3, 7, -3, 7, -3, 7, -3, 7, },
+ {-14, 38, -14, 38, -14, 38, -14, 38, },
+ {114, -20, 114, -20, 114, -20, 114, -20, },
+ { 8, -4, 8, -4, 8, -4, 8, -4, },
+ { 1, 0, 1, 0, 1, 0, 1, 0, },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1, },
+ { -2, 5, -2, 5, -2, 5, -2, 5, },
+ {-11, 28, -11, 28, -11, 28, -11, 28, },
+ {119, -17, 119, -17, 119, -17, 119, -17, },
+ { 7, -3, 7, -3, 7, -3, 7, -3, },
+ { 1, 0, 1, 0, 1, 0, 1, 0, },
+ },
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, },
+ { -2, 3, -2, 3, -2, 3, -2, 3, },
+ { -7, 18, -7, 18, -7, 18, -7, 18, },
+ {124, -12, 124, -12, 124, -12, 124, -12, },
+ { 5, -2, 5, -2, 5, -2, 5, -2, },
+ { 1, 0, 1, 0, 1, 0, 1, 0, },
+ },
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, },
+ { -1, 2, -1, 2, -1, 2, -1, 2, },
+ { -4, 8, -4, 8, -4, 8, -4, 8, },
+ {127, -6, 127, -6, 127, -6, 127, -6, },
+ { 3, -1, 3, -1, 3, -1, 3, -1, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, },
+ },
+};
+#endif
+#endif
+#if CONFIG_VP9_HIGHBITDEPTH
+#if CONFIG_EXT_INTERP
+DECLARE_ALIGNED(16, const int16_t,
+ sub_pel_filters_12sharp_highbd_ver_signal_dir[15][6][8]) = {
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1, },
+ { -2, 3, -2, 3, -2, 3, -2, 3, },
+ { -7, 127, -7, 127, -7, 127, -7, 127, },
+ { 8, -4, 8, -4, 8, -4, 8, -4, },
+ { 2, -1, 2, -1, 2, -1, 2, -1, },
+ { 1, 0, 1, 0, 1, 0, 1, 0, },
+ },
+ {
+ { -1, 2, -1, 2, -1, 2, -1, 2, },
+ { -3, 6, -3, 6, -3, 6, -3, 6, },
+ {-13, 124, -13, 124, -13, 124, -13, 124, },
+ { 18, -8, 18, -8, 18, -8, 18, -8, },
+ { 4, -2, 4, -2, 4, -2, 4, -2, },
+ { 2, -1, 2, -1, 2, -1, 2, -1, },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3, },
+ { -4, 8, -4, 8, -4, 8, -4, 8, },
+ {-18, 120, -18, 120, -18, 120, -18, 120, },
+ { 28, -12, 28, -12, 28, -12, 28, -12, },
+ { 7, -4, 7, -4, 7, -4, 7, -4, },
+ { 2, -1, 2, -1, 2, -1, 2, -1, },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3, },
+ { -6, 10, -6, 10, -6, 10, -6, 10, },
+ {-21, 115, -21, 115, -21, 115, -21, 115, },
+ { 38, -15, 38, -15, 38, -15, 38, -15, },
+ { 8, -5, 8, -5, 8, -5, 8, -5, },
+ { 3, -1, 3, -1, 3, -1, 3, -1, },
+ },
+ {
+ { -2, 4, -2, 4, -2, 4, -2, 4, },
+ { -6, 12, -6, 12, -6, 12, -6, 12, },
+ {-24, 108, -24, 108, -24, 108, -24, 108, },
+ { 49, -18, 49, -18, 49, -18, 49, -18, },
+ { 10, -6, 10, -6, 10, -6, 10, -6, },
+ { 3, -2, 3, -2, 3, -2, 3, -2, },
+ },
+ {
+ { -2, 4, -2, 4, -2, 4, -2, 4, },
+ { -7, 13, -7, 13, -7, 13, -7, 13, },
+ {-25, 100, -25, 100, -25, 100, -25, 100, },
+ { 60, -21, 60, -21, 60, -21, 60, -21, },
+ { 11, -7, 11, -7, 11, -7, 11, -7, },
+ { 4, -2, 4, -2, 4, -2, 4, -2, },
+ },
+ {
+ { -2, 4, -2, 4, -2, 4, -2, 4, },
+ { -7, 13, -7, 13, -7, 13, -7, 13, },
+ {-26, 91, -26, 91, -26, 91, -26, 91, },
+ { 71, -24, 71, -24, 71, -24, 71, -24, },
+ { 13, -7, 13, -7, 13, -7, 13, -7, },
+ { 4, -2, 4, -2, 4, -2, 4, -2, },
+ },
+ {
+ { -2, 4, -2, 4, -2, 4, -2, 4, },
+ { -7, 13, -7, 13, -7, 13, -7, 13, },
+ {-25, 81, -25, 81, -25, 81, -25, 81, },
+ { 81, -25, 81, -25, 81, -25, 81, -25, },
+ { 13, -7, 13, -7, 13, -7, 13, -7, },
+ { 4, -2, 4, -2, 4, -2, 4, -2, },
+ },
+ {
+ { -2, 4, -2, 4, -2, 4, -2, 4, },
+ { -7, 13, -7, 13, -7, 13, -7, 13, },
+ {-24, 71, -24, 71, -24, 71, -24, 71, },
+ { 91, -26, 91, -26, 91, -26, 91, -26, },
+ { 13, -7, 13, -7, 13, -7, 13, -7, },
+ { 4, -2, 4, -2, 4, -2, 4, -2, },
+ },
+ {
+ { -2, 4, -2, 4, -2, 4, -2, 4, },
+ { -7, 11, -7, 11, -7, 11, -7, 11, },
+ {-21, 60, -21, 60, -21, 60, -21, 60, },
+ {100, -25, 100, -25, 100, -25, 100, -25, },
+ { 13, -7, 13, -7, 13, -7, 13, -7, },
+ { 4, -2, 4, -2, 4, -2, 4, -2, },
+ },
+ {
+ { -2, 3, -2, 3, -2, 3, -2, 3, },
+ { -6, 10, -6, 10, -6, 10, -6, 10, },
+ {-18, 49, -18, 49, -18, 49, -18, 49, },
+ {108, -24, 108, -24, 108, -24, 108, -24, },
+ { 12, -6, 12, -6, 12, -6, 12, -6, },
+ { 4, -2, 4, -2, 4, -2, 4, -2, },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3, },
+ { -5, 8, -5, 8, -5, 8, -5, 8, },
+ {-15, 38, -15, 38, -15, 38, -15, 38, },
+ {115, -21, 115, -21, 115, -21, 115, -21, },
+ { 10, -6, 10, -6, 10, -6, 10, -6, },
+ { 3, -1, 3, -1, 3, -1, 3, -1, },
+ },
+ {
+ { -1, 2, -1, 2, -1, 2, -1, 2, },
+ { -4, 7, -4, 7, -4, 7, -4, 7, },
+ {-12, 28, -12, 28, -12, 28, -12, 28, },
+ {120, -18, 120, -18, 120, -18, 120, -18, },
+ { 8, -4, 8, -4, 8, -4, 8, -4, },
+ { 3, -1, 3, -1, 3, -1, 3, -1, },
+ },
+ {
+ { -1, 2, -1, 2, -1, 2, -1, 2, },
+ { -2, 4, -2, 4, -2, 4, -2, 4, },
+ { -8, 18, -8, 18, -8, 18, -8, 18, },
+ {124, -13, 124, -13, 124, -13, 124, -13, },
+ { 6, -3, 6, -3, 6, -3, 6, -3, },
+ { 2, -1, 2, -1, 2, -1, 2, -1, },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1, },
+ { -1, 2, -1, 2, -1, 2, -1, 2, },
+ { -4, 8, -4, 8, -4, 8, -4, 8, },
+ {127, -7, 127, -7, 127, -7, 127, -7, },
+ { 3, -2, 3, -2, 3, -2, 3, -2, },
+ { 1, 0, 1, 0, 1, 0, 1, 0, },
+ },
+};
+#endif
+#endif
+#if CONFIG_VP9_HIGHBITDEPTH
+#if USE_TEMPORALFILTER_12TAP
+DECLARE_ALIGNED(16, const int16_t,
+ sub_pel_filters_temporalfilter_12_highbd_ver_signal_dir[15][6][8]) = {
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1, },
+ { -1, 3, -1, 3, -1, 3, -1, 3, },
+ { -7, 127, -7, 127, -7, 127, -7, 127, },
+ { 8, -4, 8, -4, 8, -4, 8, -4, },
+ { 2, -1, 2, -1, 2, -1, 2, -1, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1, },
+ { -3, 5, -3, 5, -3, 5, -3, 5, },
+ {-12, 124, -12, 124, -12, 124, -12, 124, },
+ { 18, -8, 18, -8, 18, -8, 18, -8, },
+ { 4, -2, 4, -2, 4, -2, 4, -2, },
+ { 1, 0, 1, 0, 1, 0, 1, 0, },
+ },
+ {
+ { -1, 2, -1, 2, -1, 2, -1, 2, },
+ { -4, 8, -4, 8, -4, 8, -4, 8, },
+ {-17, 120, -17, 120, -17, 120, -17, 120, },
+ { 28, -11, 28, -11, 28, -11, 28, -11, },
+ { 6, -3, 6, -3, 6, -3, 6, -3, },
+ { 1, -1, 1, -1, 1, -1, 1, -1, },
+ },
+ {
+ { -1, 2, -1, 2, -1, 2, -1, 2, },
+ { -4, 10, -4, 10, -4, 10, -4, 10, },
+ {-21, 114, -21, 114, -21, 114, -21, 114, },
+ { 38, -15, 38, -15, 38, -15, 38, -15, },
+ { 8, -4, 8, -4, 8, -4, 8, -4, },
+ { 2, -1, 2, -1, 2, -1, 2, -1, },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3, },
+ { -5, 11, -5, 11, -5, 11, -5, 11, },
+ {-23, 107, -23, 107, -23, 107, -23, 107, },
+ { 49, -18, 49, -18, 49, -18, 49, -18, },
+ { 9, -5, 9, -5, 9, -5, 9, -5, },
+ { 2, -1, 2, -1, 2, -1, 2, -1, },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3, },
+ { -6, 12, -6, 12, -6, 12, -6, 12, },
+ {-25, 99, -25, 99, -25, 99, -25, 99, },
+ { 60, -21, 60, -21, 60, -21, 60, -21, },
+ { 11, -6, 11, -6, 11, -6, 11, -6, },
+ { 3, -1, 3, -1, 3, -1, 3, -1, },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3, },
+ { -6, 12, -6, 12, -6, 12, -6, 12, },
+ {-25, 90, -25, 90, -25, 90, -25, 90, },
+ { 70, -23, 70, -23, 70, -23, 70, -23, },
+ { 12, -6, 12, -6, 12, -6, 12, -6, },
+ { 3, -1, 3, -1, 3, -1, 3, -1, },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3, },
+ { -6, 12, -6, 12, -6, 12, -6, 12, },
+ {-24, 80, -24, 80, -24, 80, -24, 80, },
+ { 80, -24, 80, -24, 80, -24, 80, -24, },
+ { 12, -6, 12, -6, 12, -6, 12, -6, },
+ { 3, -1, 3, -1, 3, -1, 3, -1, },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3, },
+ { -6, 12, -6, 12, -6, 12, -6, 12, },
+ {-23, 70, -23, 70, -23, 70, -23, 70, },
+ { 90, -25, 90, -25, 90, -25, 90, -25, },
+ { 12, -6, 12, -6, 12, -6, 12, -6, },
+ { 3, -1, 3, -1, 3, -1, 3, -1, },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3, },
+ { -6, 11, -6, 11, -6, 11, -6, 11, },
+ {-21, 60, -21, 60, -21, 60, -21, 60, },
+ { 99, -25, 99, -25, 99, -25, 99, -25, },
+ { 12, -6, 12, -6, 12, -6, 12, -6, },
+ { 3, -1, 3, -1, 3, -1, 3, -1, },
+ },
+ {
+ { -1, 2, -1, 2, -1, 2, -1, 2, },
+ { -5, 9, -5, 9, -5, 9, -5, 9, },
+ {-18, 49, -18, 49, -18, 49, -18, 49, },
+ {107, -23, 107, -23, 107, -23, 107, -23, },
+ { 11, -5, 11, -5, 11, -5, 11, -5, },
+ { 3, -1, 3, -1, 3, -1, 3, -1, },
+ },
+ {
+ { -1, 2, -1, 2, -1, 2, -1, 2, },
+ { -4, 8, -4, 8, -4, 8, -4, 8, },
+ {-15, 38, -15, 38, -15, 38, -15, 38, },
+ {114, -21, 114, -21, 114, -21, 114, -21, },
+ { 10, -4, 10, -4, 10, -4, 10, -4, },
+ { 2, -1, 2, -1, 2, -1, 2, -1, },
+ },
+ {
+ { -1, 1, -1, 1, -1, 1, -1, 1, },
+ { -3, 6, -3, 6, -3, 6, -3, 6, },
+ {-11, 28, -11, 28, -11, 28, -11, 28, },
+ {120, -17, 120, -17, 120, -17, 120, -17, },
+ { 8, -4, 8, -4, 8, -4, 8, -4, },
+ { 2, -1, 2, -1, 2, -1, 2, -1, },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1, },
+ { -2, 4, -2, 4, -2, 4, -2, 4, },
+ { -8, 18, -8, 18, -8, 18, -8, 18, },
+ {124, -12, 124, -12, 124, -12, 124, -12, },
+ { 5, -3, 5, -3, 5, -3, 5, -3, },
+ { 1, 0, 1, 0, 1, 0, 1, 0, },
+ },
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, },
+ { -1, 2, -1, 2, -1, 2, -1, 2, },
+ { -4, 8, -4, 8, -4, 8, -4, 8, },
+ {127, -7, 127, -7, 127, -7, 127, -7, },
+ { 3, -1, 3, -1, 3, -1, 3, -1, },
+ { 1, 0, 1, 0, 1, 0, 1, 0, },
+ },
+};
+#endif
+#endif
diff --git a/vp10/common/x86/vp10_highbd_convolve_sse4.c b/vp10/common/x86/vp10_highbd_convolve_sse4.c
new file mode 100644
index 0000000..e828178
--- /dev/null
+++ b/vp10/common/x86/vp10_highbd_convolve_sse4.c
@@ -0,0 +1,474 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <smmintrin.h>
+
+#include "./vp10_rtcd.h"
+#include "vp10/common/filter.h"
+
+typedef void (*TransposeSave)(const int width, int pixelsNum,
+ uint32_t *src, int src_stride,
+ uint16_t *dst, int dst_stride,
+ int bd);
+
+// pixelsNum 0: write all 4 pixels
+// 1/2/3: residual pixels 1/2/3
+static void writePixel(__m128i *u, int width, int pixelsNum,
+ uint16_t *dst, int dst_stride) {
+ if (2 == width) {
+ if (0 == pixelsNum) {
+ *(int *)dst = _mm_cvtsi128_si32(u[0]);
+ *(int *)(dst + dst_stride) = _mm_cvtsi128_si32(u[1]);
+ *(int *)(dst + 2 * dst_stride) = _mm_cvtsi128_si32(u[2]);
+ *(int *)(dst + 3 * dst_stride) = _mm_cvtsi128_si32(u[3]);
+ } else if (1 == pixelsNum) {
+ *(int *)dst = _mm_cvtsi128_si32(u[0]);
+ } else if (2 == pixelsNum) {
+ *(int *)dst = _mm_cvtsi128_si32(u[0]);
+ *(int *)(dst + dst_stride) = _mm_cvtsi128_si32(u[1]);
+ } else if (3 == pixelsNum) {
+ *(int *)dst = _mm_cvtsi128_si32(u[0]);
+ *(int *)(dst + dst_stride) = _mm_cvtsi128_si32(u[1]);
+ *(int *)(dst + 2 * dst_stride) = _mm_cvtsi128_si32(u[2]);
+ }
+ } else {
+ if (0 == pixelsNum) {
+ _mm_storel_epi64((__m128i *)dst, u[0]);
+ _mm_storel_epi64((__m128i *)(dst + dst_stride), u[1]);
+ _mm_storel_epi64((__m128i *)(dst + 2 * dst_stride), u[2]);
+ _mm_storel_epi64((__m128i *)(dst + 3 * dst_stride), u[3]);
+ } else if (1 == pixelsNum) {
+ _mm_storel_epi64((__m128i *)dst, u[0]);
+ } else if (2 == pixelsNum) {
+ _mm_storel_epi64((__m128i *)dst, u[0]);
+ _mm_storel_epi64((__m128i *)(dst + dst_stride), u[1]);
+ } else if (3 == pixelsNum) {
+ _mm_storel_epi64((__m128i *)dst, u[0]);
+ _mm_storel_epi64((__m128i *)(dst + dst_stride), u[1]);
+ _mm_storel_epi64((__m128i *)(dst + 2 * dst_stride), u[2]);
+ }
+ }
+}
+
+// 16-bit pixels clip with bd (10/12)
+static void highbd_clip(__m128i *p, int numVecs, int bd) {
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i one = _mm_set1_epi16(1);
+ const __m128i max = _mm_sub_epi16(_mm_slli_epi16(one, bd), one);
+ __m128i clamped, mask;
+ int i;
+
+ for (i = 0; i < numVecs; i++) {
+ mask = _mm_cmpgt_epi16(p[i], max);
+ clamped = _mm_andnot_si128(mask, p[i]);
+ mask = _mm_and_si128(mask, max);
+ clamped = _mm_or_si128(mask, clamped);
+ mask = _mm_cmpgt_epi16(clamped, zero);
+ p[i] = _mm_and_si128(clamped, mask);
+ }
+}
+
+static void transClipPixel(uint32_t *src, int src_stride, __m128i *u, int bd) {
+ __m128i v0, v1;
+ __m128i rnd = _mm_set1_epi32(1 << (FILTER_BITS - 1));
+
+ u[0] = _mm_loadu_si128((__m128i const *)src);
+ u[1] = _mm_loadu_si128((__m128i const *)(src + src_stride));
+ u[2] = _mm_loadu_si128((__m128i const *)(src + 2 * src_stride));
+ u[3] = _mm_loadu_si128((__m128i const *)(src + 3 * src_stride));
+
+ u[0] = _mm_add_epi32(u[0], rnd);
+ u[1] = _mm_add_epi32(u[1], rnd);
+ u[2] = _mm_add_epi32(u[2], rnd);
+ u[3] = _mm_add_epi32(u[3], rnd);
+
+ u[0] = _mm_srai_epi32(u[0], FILTER_BITS);
+ u[1] = _mm_srai_epi32(u[1], FILTER_BITS);
+ u[2] = _mm_srai_epi32(u[2], FILTER_BITS);
+ u[3] = _mm_srai_epi32(u[3], FILTER_BITS);
+
+ u[0] = _mm_packus_epi32(u[0], u[1]);
+ u[1] = _mm_packus_epi32(u[2], u[3]);
+
+ highbd_clip(u, 2, bd);
+
+ v0 = _mm_unpacklo_epi16(u[0], u[1]);
+ v1 = _mm_unpackhi_epi16(u[0], u[1]);
+
+ u[0] = _mm_unpacklo_epi16(v0, v1);
+ u[2] = _mm_unpackhi_epi16(v0, v1);
+
+ u[1] = _mm_srli_si128(u[0], 8);
+ u[3] = _mm_srli_si128(u[2], 8);
+}
+
+// pixelsNum = 0 : all 4 rows of pixels will be saved.
+// pixelsNum = 1/2/3 : residual 1/2/4 rows of pixels will be saved.
+void trans_save_4x4(const int width, int pixelsNum,
+ uint32_t *src, int src_stride,
+ uint16_t *dst, int dst_stride,
+ int bd) {
+ __m128i u[4];
+ transClipPixel(src, src_stride, u, bd);
+ writePixel(u, width, pixelsNum, dst, dst_stride);
+}
+
+void trans_accum_save_4x4(const int width, int pixelsNum,
+ uint32_t *src, int src_stride,
+ uint16_t *dst, int dst_stride,
+ int bd) {
+ __m128i u[4], v[4];
+ const __m128i ones = _mm_set1_epi16(1);
+
+ transClipPixel(src, src_stride, u, bd);
+
+ v[0] = _mm_loadl_epi64((__m128i const *)dst);
+ v[1] = _mm_loadl_epi64((__m128i const *)(dst + dst_stride));
+ v[2] = _mm_loadl_epi64((__m128i const *)(dst + 2 * dst_stride));
+ v[3] = _mm_loadl_epi64((__m128i const *)(dst + 3 * dst_stride));
+
+ u[0] = _mm_add_epi16(u[0], v[0]);
+ u[1] = _mm_add_epi16(u[1], v[1]);
+ u[2] = _mm_add_epi16(u[2], v[2]);
+ u[3] = _mm_add_epi16(u[3], v[3]);
+
+ u[0] = _mm_add_epi16(u[0], ones);
+ u[1] = _mm_add_epi16(u[1], ones);
+ u[2] = _mm_add_epi16(u[2], ones);
+ u[3] = _mm_add_epi16(u[3], ones);
+
+ u[0] = _mm_srai_epi16(u[0], 1);
+ u[1] = _mm_srai_epi16(u[1], 1);
+ u[2] = _mm_srai_epi16(u[2], 1);
+ u[3] = _mm_srai_epi16(u[3], 1);
+
+ writePixel(u, width, pixelsNum, dst, dst_stride);
+}
+
+static TransposeSave transSaveTab[2] = {
+ trans_save_4x4, trans_accum_save_4x4};
+
+static INLINE void transpose_pair(__m128i *in, __m128i *out) {
+ __m128i x0, x1;
+
+ x0 = _mm_unpacklo_epi32(in[0], in[1]);
+ x1 = _mm_unpacklo_epi32(in[2], in[3]);
+
+ out[0] = _mm_unpacklo_epi64(x0, x1);
+ out[1] = _mm_unpackhi_epi64(x0, x1);
+
+ x0 = _mm_unpackhi_epi32(in[0], in[1]);
+ x1 = _mm_unpackhi_epi32(in[2], in[3]);
+
+ out[2] = _mm_unpacklo_epi64(x0, x1);
+ out[3] = _mm_unpackhi_epi64(x0, x1);
+
+ x0 = _mm_unpacklo_epi32(in[4], in[5]);
+ x1 = _mm_unpacklo_epi32(in[6], in[7]);
+
+ out[4] = _mm_unpacklo_epi64(x0, x1);
+ out[5] = _mm_unpackhi_epi64(x0, x1);
+}
+
+static void highbd_filter_horiz(const uint16_t *src, int src_stride,
+ __m128i *f, int tapsNum, uint32_t *buf) {
+ __m128i u[8], v[6];
+
+ if (tapsNum == 10) {
+ src -= 1;
+ }
+
+ u[0] = _mm_loadu_si128((__m128i const *)src);
+ u[1] = _mm_loadu_si128((__m128i const *)(src + src_stride));
+ u[2] = _mm_loadu_si128((__m128i const *)(src + 2 * src_stride));
+ u[3] = _mm_loadu_si128((__m128i const *)(src + 3 * src_stride));
+
+ u[4] = _mm_loadu_si128((__m128i const *)(src + 8));
+ u[5] = _mm_loadu_si128((__m128i const *)(src + src_stride + 8));
+ u[6] = _mm_loadu_si128((__m128i const *)(src + 2 * src_stride + 8));
+ u[7] = _mm_loadu_si128((__m128i const *)(src + 3 * src_stride + 8));
+
+ transpose_pair(u, v);
+
+ u[0] = _mm_madd_epi16(v[0], f[0]);
+ u[1] = _mm_madd_epi16(v[1], f[1]);
+ u[2] = _mm_madd_epi16(v[2], f[2]);
+ u[3] = _mm_madd_epi16(v[3], f[3]);
+ u[4] = _mm_madd_epi16(v[4], f[4]);
+ u[5] = _mm_madd_epi16(v[5], f[5]);
+
+ u[6] = _mm_min_epi32(u[2], u[3]);
+ u[7] = _mm_max_epi32(u[2], u[3]);
+
+ u[0] = _mm_add_epi32(u[0], u[1]);
+ u[0] = _mm_add_epi32(u[0], u[5]);
+ u[0] = _mm_add_epi32(u[0], u[4]);
+ u[0] = _mm_add_epi32(u[0], u[6]);
+ u[0] = _mm_add_epi32(u[0], u[7]);
+
+ _mm_storeu_si128((__m128i *)buf, u[0]);
+}
+
+void vp10_highbd_convolve_horiz_sse4_1(const uint16_t *src, int src_stride,
+ uint16_t *dst, int dst_stride,
+ int w, int h,
+ const InterpFilterParams filter_params,
+ const int subpel_x_q4, int x_step_q4,
+ int avg, int bd) {
+ DECLARE_ALIGNED(16, uint32_t, temp[4 * 4]);
+ __m128i verf[6];
+ HbdSubpelFilterCoeffs vCoeffs;
+ const uint16_t *srcPtr;
+ const int tapsNum = filter_params.taps;
+ int i, col, count, blkResidu, blkHeight;
+ TransposeSave transSave = transSaveTab[avg];
+ (void)x_step_q4;
+
+ if (0 == subpel_x_q4 || 16 != x_step_q4) {
+ vp10_highbd_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h,
+ filter_params, subpel_x_q4, x_step_q4, avg,
+ bd);
+ return;
+ }
+
+ vCoeffs = vp10_hbd_get_subpel_filter_ver_signal_dir(
+ filter_params, subpel_x_q4 - 1);
+ if (!vCoeffs) {
+ vp10_highbd_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h,
+ filter_params, subpel_x_q4, x_step_q4, avg,
+ bd);
+ return;
+ }
+
+ verf[0] = *((const __m128i *)(vCoeffs));
+ verf[1] = *((const __m128i *)(vCoeffs + 1));
+ verf[2] = *((const __m128i *)(vCoeffs + 2));
+ verf[3] = *((const __m128i *)(vCoeffs + 3));
+ verf[4] = *((const __m128i *)(vCoeffs + 4));
+ verf[5] = *((const __m128i *)(vCoeffs + 5));
+
+ src -= (tapsNum >> 1) - 1;
+ srcPtr = src;
+
+ count = 0;
+ blkHeight = h >> 2;
+ blkResidu = h & 3;
+
+ while (blkHeight != 0) {
+ for (col = 0; col < w; col += 4) {
+ for (i = 0; i < 4; ++i) {
+ highbd_filter_horiz(srcPtr, src_stride, verf, tapsNum, temp + (i * 4));
+ srcPtr += 1;
+ }
+ transSave(w, 0, temp, 4, dst + col, dst_stride, bd);
+ }
+ count++;
+ srcPtr = src + count * src_stride * 4;
+ dst += dst_stride * 4;
+ blkHeight--;
+ }
+
+ for (col = 0; col < w; col += 4) {
+ for (i = 0; i < 4; ++i) {
+ highbd_filter_horiz(srcPtr, src_stride, verf, tapsNum, temp + (i * 4));
+ srcPtr += 1;
+ }
+ transSave(w, blkResidu, temp, 4, dst + col, dst_stride, bd);
+ }
+}
+
+// Vertical convolutional filter
+
+typedef void (*WritePixels)(__m128i *u, int bd, uint16_t *dst);
+
+static void highbdRndingPacks(__m128i *u) {
+ __m128i rnd = _mm_set1_epi32(1 << (FILTER_BITS - 1));
+ u[0] = _mm_add_epi32(u[0], rnd);
+ u[0] = _mm_srai_epi32(u[0], FILTER_BITS);
+ u[0] = _mm_packus_epi32(u[0], u[0]);
+}
+
+static void write2pixelsOnly(__m128i *u, int bd, uint16_t *dst) {
+ highbdRndingPacks(u);
+ highbd_clip(u, 1, bd);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(u[0]);
+}
+
+static void write2pixelsAccum(__m128i *u, int bd, uint16_t *dst) {
+ __m128i v = _mm_loadl_epi64((__m128i const *)dst);
+ const __m128i ones = _mm_set1_epi16(1);
+
+ highbdRndingPacks(u);
+ highbd_clip(u, 1, bd);
+
+ v = _mm_add_epi16(v, u[0]);
+ v = _mm_add_epi16(v, ones);
+ v = _mm_srai_epi16(v, 1);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(v);
+}
+
+WritePixels write2pixelsTab[2] = {write2pixelsOnly, write2pixelsAccum};
+
+static void write4pixelsOnly(__m128i *u, int bd, uint16_t *dst) {
+ highbdRndingPacks(u);
+ highbd_clip(u, 1, bd);
+ _mm_storel_epi64((__m128i *)dst, u[0]);
+}
+
+static void write4pixelsAccum(__m128i *u, int bd, uint16_t *dst) {
+ __m128i v = _mm_loadl_epi64((__m128i const *)dst);
+ const __m128i ones = _mm_set1_epi16(1);
+
+ highbdRndingPacks(u);
+ highbd_clip(u, 1, bd);
+
+ v = _mm_add_epi16(v, u[0]);
+ v = _mm_add_epi16(v, ones);
+ v = _mm_srai_epi16(v, 1);
+ _mm_storel_epi64((__m128i *)dst, v);
+}
+
+WritePixels write4pixelsTab[2] = {write4pixelsOnly, write4pixelsAccum};
+
+static void filter_vert_horiz_parallel(const uint16_t *src, int src_stride,
+ const __m128i *f, int taps,
+ uint16_t *dst, WritePixels saveFunc,
+ int bd) {
+ __m128i s[12];
+ __m128i zero = _mm_setzero_si128();
+ int i = 0;
+ int r = 0;
+
+ // TODO(luoyi) treat s[12] as a circular buffer in width = 2 case
+ if (10 == taps) {
+ i += 1;
+ s[0] = zero;
+ }
+ while (i < 12) {
+ s[i] = _mm_loadu_si128((__m128i const *)(src + r * src_stride));
+ i += 1;
+ r += 1;
+ }
+
+ s[0] = _mm_unpacklo_epi16(s[0], s[1]);
+ s[2] = _mm_unpacklo_epi16(s[2], s[3]);
+ s[4] = _mm_unpacklo_epi16(s[4], s[5]);
+ s[6] = _mm_unpacklo_epi16(s[6], s[7]);
+ s[8] = _mm_unpacklo_epi16(s[8], s[9]);
+ s[10] = _mm_unpacklo_epi16(s[10], s[11]);
+
+ s[0] = _mm_madd_epi16(s[0], f[0]);
+ s[2] = _mm_madd_epi16(s[2], f[1]);
+ s[4] = _mm_madd_epi16(s[4], f[2]);
+ s[6] = _mm_madd_epi16(s[6], f[3]);
+ s[8] = _mm_madd_epi16(s[8], f[4]);
+ s[10] = _mm_madd_epi16(s[10], f[5]);
+
+ s[1] = _mm_min_epi32(s[4], s[6]);
+ s[3] = _mm_max_epi32(s[4], s[6]);
+
+ s[0] = _mm_add_epi32(s[0], s[2]);
+ s[0] = _mm_add_epi32(s[0], s[10]);
+ s[0] = _mm_add_epi32(s[0], s[8]);
+ s[0] = _mm_add_epi32(s[0], s[1]);
+ s[0] = _mm_add_epi32(s[0], s[3]);
+
+ saveFunc(s, bd, dst);
+}
+
+static void highbd_filter_vert_compute_large(const uint16_t *src,
+ int src_stride,
+ const __m128i *f, int taps,
+ int w, int h,
+ uint16_t *dst, int dst_stride,
+ int avg, int bd) {
+ int col;
+ int rowIndex = 0;
+ const uint16_t *src_ptr = src;
+ uint16_t *dst_ptr = dst;
+ const int step = 4;
+ WritePixels write4pixels = write4pixelsTab[avg];
+
+ do {
+ for (col = 0; col < w; col += step) {
+ filter_vert_horiz_parallel(src_ptr, src_stride, f, taps,
+ dst_ptr, write4pixels, bd);
+ src_ptr += step;
+ dst_ptr += step;
+ }
+ rowIndex++;
+ src_ptr = src + rowIndex * src_stride;
+ dst_ptr = dst + rowIndex * dst_stride;
+ } while (rowIndex < h);
+}
+
+static void highbd_filter_vert_compute_small(const uint16_t *src,
+ int src_stride,
+ const __m128i *f, int taps,
+ int w, int h,
+ uint16_t *dst, int dst_stride,
+ int avg, int bd) {
+ int rowIndex = 0;
+ WritePixels write2pixels = write2pixelsTab[avg];
+ (void)w;
+
+ do {
+ filter_vert_horiz_parallel(src, src_stride, f, taps, dst, write2pixels,
+ bd);
+ rowIndex++;
+ src += src_stride;
+ dst += dst_stride;
+ } while (rowIndex < h);
+}
+
+void vp10_highbd_convolve_vert_sse4_1(const uint16_t *src, int src_stride,
+ uint16_t *dst, int dst_stride,
+ int w, int h,
+ const InterpFilterParams filter_params,
+ const int subpel_y_q4, int y_step_q4,
+ int avg, int bd) {
+ __m128i verf[6];
+ HbdSubpelFilterCoeffs vCoeffs;
+ const int tapsNum = filter_params.taps;
+
+ if (0 == subpel_y_q4 || 16 != y_step_q4) {
+ vp10_highbd_convolve_vert_c(src, src_stride, dst, dst_stride, w, h,
+ filter_params, subpel_y_q4, y_step_q4, avg,
+ bd);
+ return;
+ }
+
+ vCoeffs = vp10_hbd_get_subpel_filter_ver_signal_dir(
+ filter_params, subpel_y_q4 - 1);
+ if (!vCoeffs) {
+ vp10_highbd_convolve_vert_c(src, src_stride, dst, dst_stride, w, h,
+ filter_params, subpel_y_q4, y_step_q4, avg,
+ bd);
+ return;
+ }
+
+ verf[0] = *((const __m128i *)(vCoeffs));
+ verf[1] = *((const __m128i *)(vCoeffs + 1));
+ verf[2] = *((const __m128i *)(vCoeffs + 2));
+ verf[3] = *((const __m128i *)(vCoeffs + 3));
+ verf[4] = *((const __m128i *)(vCoeffs + 4));
+ verf[5] = *((const __m128i *)(vCoeffs + 5));
+
+ src -= src_stride * ((tapsNum >> 1) - 1);
+
+ if (w > 2) {
+ highbd_filter_vert_compute_large(src, src_stride, verf, tapsNum, w, h,
+ dst, dst_stride, avg, bd);
+ } else {
+ highbd_filter_vert_compute_small(src, src_stride, verf, tapsNum, w, h,
+ dst, dst_stride, avg, bd);
+ }
+}
diff --git a/vp10/vp10_common.mk b/vp10/vp10_common.mk
index 0253b4c..e68e083 100644
--- a/vp10/vp10_common.mk
+++ b/vp10/vp10_common.mk
@@ -74,6 +74,10 @@
VP10_COMMON_SRCS-yes += common/vp10_inv_txfm2d_cfg.h
VP10_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp10_convolve_ssse3.c
VP10_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp10_convolve_filters_ssse3.c
+ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+VP10_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/vp10_highbd_convolve_sse4.c
+VP10_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/vp10_highbd_convolve_filters_sse4.c
+endif
VP10_COMMON_SRCS-yes += common/vp10_convolve.c
VP10_COMMON_SRCS-yes += common/vp10_convolve.h
VP10_COMMON_SRCS-$(CONFIG_ANS) += common/ans.h