Merge "[spatial svc]Add layer bitrates options and clean up parsing options from string"
diff --git a/test/invalid_file_test.cc b/test/invalid_file_test.cc
index cf86cf5..50e7c23 100644
--- a/test/invalid_file_test.cc
+++ b/test/invalid_file_test.cc
@@ -113,9 +113,12 @@
const DecodeParam kVP9InvalidFileTests[] = {
{1, "invalid-vp90-02-v2.webm"},
{1, "invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.v2.ivf"},
- {1, "invalid-vp90-03-v2.webm"},
+ {1, "invalid-vp90-03-v3.webm"},
{1, "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf"},
{1, "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf"},
+ {1, "invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf"},
+ {1, "invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf"},
+ {1, "invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.ivf"},
};
VP9_INSTANTIATE_TEST_CASE(InvalidFileTest,
diff --git a/test/test-data.sha1 b/test/test-data.sha1
index 6166749..84b13f9 100644
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -10,8 +10,8 @@
25751f5d3b05ff03f0719ad42cd625348eb8961e invalid-vp90-01-v2.webm.res
d78e2fceba5ac942246503ec8366f879c4775ca5 invalid-vp90-02-v2.webm
8e2eff4af87d2b561cce2365713269e301457ef3 invalid-vp90-02-v2.webm.res
-df1a1453feb3c00d7d89746c7003b4163523bff3 invalid-vp90-03-v2.webm
-25dd58c22d23f75304d7ce7f69f4e5b02ef9119a invalid-vp90-03-v2.webm.res
+df1a1453feb3c00d7d89746c7003b4163523bff3 invalid-vp90-03-v3.webm
+4935c62becc68c13642a03db1e6d3e2331c1c612 invalid-vp90-03-v3.webm.res
d637297561dd904eb2c97a9015deeb31c4a1e8d2 invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm
3a204bdbeaa3c6458b77bcebb8366d107267f55d invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm.res
a432f96ff0a787268e2f94a8092ab161a18d1b06 park_joy_90p_10_420.y4m
@@ -691,3 +691,9 @@
368dccdde5288c13c25695d2eacdc7402cadf613 vp90-2-19-skip.webm.md5
ffe460282df2b0e7d4603c2158653ad96f574b02 vp90-2-19-skip-01.webm
bd21bc9eda4a4a36b221d71ede3a139fc3c7bd85 vp90-2-19-skip-01.webm.md5
+b03c408cf23158638da18dbc3323b99a1635c68a invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf
+0a3884edb3fd8f9d9b500223e650f7de257b67d8 invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf.res
+5e67e24e7f53fd189e565513cef8519b1bd6c712 invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf
+741158f67c0d9d23726624d06bdc482ad368afc9 invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf.res
+8b1f7bf7e86c0976d277f60e8fcd9539e75a079a invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.ivf
+fb79dcbbbb8c82d5a750e339acce66e39a32f15f invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.ivf.res
diff --git a/test/test.mk b/test/test.mk
index 7c34169..c839c92 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -797,14 +797,16 @@
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v2.webm.res
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02-v2.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02-v2.webm.res
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-03-v2.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-03-v2.webm.res
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-03-v3.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-03-v3.webm.res
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.v2.ivf
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.v2.ivf.res
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf.res
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf.res
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf.res
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf.res
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm
@@ -813,6 +815,10 @@
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.ivf.res
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-subpixel-00.ivf.s19552_r01-05_b6-.ivf
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-subpixel-00.ivf.s19552_r01-05_b6-.ivf.res
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.ivf.res
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf.res
ifeq ($(CONFIG_DECODE_PERF_TESTS),yes)
# BBB VP9 streams
diff --git a/test/variance_test.cc b/test/variance_test.cc
index 925ff03..f76402e 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -35,6 +35,14 @@
using ::std::tr1::tuple;
using libvpx_test::ACMRandom;
+static unsigned int mb_ss_ref(const int16_t *src) {
+ unsigned int res = 0;
+ for (int i = 0; i < 256; ++i) {
+ res += src[i] * src[i];
+ }
+ return res;
+}
+
static unsigned int variance_ref(const uint8_t *ref, const uint8_t *src,
int l2w, int l2h, unsigned int *sse_ptr) {
int se = 0;
@@ -76,6 +84,50 @@
return sse - (((int64_t) se * se) >> (l2w + l2h));
}
+typedef unsigned int (*SumOfSquaresFunction)(const int16_t *src);
+
+class SumOfSquaresTest : public ::testing::TestWithParam<SumOfSquaresFunction> {
+ public:
+ SumOfSquaresTest() : func_(GetParam()) {}
+
+ virtual ~SumOfSquaresTest() {
+ libvpx_test::ClearSystemState();
+ }
+
+ protected:
+ void ConstTest();
+ void RefTest();
+
+ SumOfSquaresFunction func_;
+ ACMRandom rnd_;
+};
+
+void SumOfSquaresTest::ConstTest() {
+ int16_t mem[256];
+ unsigned int res;
+ for (int v = 0; v < 256; ++v) {
+ for (int i = 0; i < 256; ++i) {
+ mem[i] = v;
+ }
+ ASM_REGISTER_STATE_CHECK(res = func_(mem));
+ EXPECT_EQ(256u * (v * v), res);
+ }
+}
+
+void SumOfSquaresTest::RefTest() {
+ int16_t mem[256];
+ for (int i = 0; i < 100; ++i) {
+ for (int j = 0; j < 256; ++j) {
+ mem[j] = rnd_.Rand8() - rnd_.Rand8();
+ }
+
+ const unsigned int expected = mb_ss_ref(mem);
+ unsigned int res;
+ ASM_REGISTER_STATE_CHECK(res = func_(mem));
+ EXPECT_EQ(expected, res);
+ }
+}
+
template<typename VarianceFunctionType>
class VarianceTest
: public ::testing::TestWithParam<tuple<int, int, VarianceFunctionType> > {
@@ -88,7 +140,7 @@
height_ = 1 << log2height_;
variance_ = get<2>(params);
- rnd(ACMRandom::DeterministicSeed());
+ rnd_.Reset(ACMRandom::DeterministicSeed());
block_size_ = width_ * height_;
src_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size_));
ref_ = new uint8_t[block_size_];
@@ -107,7 +159,7 @@
void RefTest();
void OneQuarterTest();
- ACMRandom rnd;
+ ACMRandom rnd_;
uint8_t* src_;
uint8_t* ref_;
int width_, log2width_;
@@ -135,8 +187,8 @@
void VarianceTest<VarianceFunctionType>::RefTest() {
for (int i = 0; i < 10; ++i) {
for (int j = 0; j < block_size_; j++) {
- src_[j] = rnd.Rand8();
- ref_[j] = rnd.Rand8();
+ src_[j] = rnd_.Rand8();
+ ref_[j] = rnd_.Rand8();
}
unsigned int sse1, sse2;
unsigned int var1;
@@ -206,7 +258,7 @@
height_ = 1 << log2height_;
subpel_variance_ = get<2>(params);
- rnd(ACMRandom::DeterministicSeed());
+ rnd_.Reset(ACMRandom::DeterministicSeed());
block_size_ = width_ * height_;
src_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size_));
sec_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size_));
@@ -226,7 +278,7 @@
protected:
void RefTest();
- ACMRandom rnd;
+ ACMRandom rnd_;
uint8_t *src_;
uint8_t *ref_;
uint8_t *sec_;
@@ -241,10 +293,10 @@
for (int x = 0; x < 16; ++x) {
for (int y = 0; y < 16; ++y) {
for (int j = 0; j < block_size_; j++) {
- src_[j] = rnd.Rand8();
+ src_[j] = rnd_.Rand8();
}
for (int j = 0; j < block_size_ + width_ + height_ + 1; j++) {
- ref_[j] = rnd.Rand8();
+ ref_[j] = rnd_.Rand8();
}
unsigned int sse1, sse2;
unsigned int var1;
@@ -263,11 +315,11 @@
for (int x = 0; x < 16; ++x) {
for (int y = 0; y < 16; ++y) {
for (int j = 0; j < block_size_; j++) {
- src_[j] = rnd.Rand8();
- sec_[j] = rnd.Rand8();
+ src_[j] = rnd_.Rand8();
+ sec_[j] = rnd_.Rand8();
}
for (int j = 0; j < block_size_ + width_ + height_ + 1; j++) {
- ref_[j] = rnd.Rand8();
+ ref_[j] = rnd_.Rand8();
}
unsigned int sse1, sse2;
unsigned int var1;
@@ -362,6 +414,13 @@
namespace vp9 {
#if CONFIG_VP9_ENCODER
+
+TEST_P(SumOfSquaresTest, Const) { ConstTest(); }
+TEST_P(SumOfSquaresTest, Ref) { RefTest(); }
+
+INSTANTIATE_TEST_CASE_P(C, SumOfSquaresTest,
+ ::testing::Values(vp9_get_mb_ss_c));
+
typedef VarianceTest<vp9_variance_fn_t> VP9VarianceTest;
typedef SubpelVarianceTest<vp9_subpixvariance_fn_t> VP9SubpelVarianceTest;
typedef SubpelVarianceTest<vp9_subp_avg_variance_fn_t> VP9SubpelAvgVarianceTest;
@@ -487,6 +546,10 @@
#if HAVE_SSE2
#if CONFIG_USE_X86INC
+
+INSTANTIATE_TEST_CASE_P(SSE2, SumOfSquaresTest,
+ ::testing::Values(vp9_get_mb_ss_sse2));
+
const vp9_variance_fn_t variance4x4_sse2 = vp9_variance4x4_sse2;
const vp9_variance_fn_t variance4x8_sse2 = vp9_variance4x8_sse2;
const vp9_variance_fn_t variance8x4_sse2 = vp9_variance8x4_sse2;
diff --git a/third_party/libyuv/README.libvpx b/third_party/libyuv/README.libvpx
index fb2b9d1..3869d25 100644
--- a/third_party/libyuv/README.libvpx
+++ b/third_party/libyuv/README.libvpx
@@ -13,4 +13,4 @@
in order to encode multiple resolution bit streams.
Local Modifications:
-None.
+cherry-pick 'Issue 24479004: Fix building with MSVC for arm'
diff --git a/third_party/libyuv/source/cpu_id.cc b/third_party/libyuv/source/cpu_id.cc
index deb4c44..8f8a403 100644
--- a/third_party/libyuv/source/cpu_id.cc
+++ b/third_party/libyuv/source/cpu_id.cc
@@ -15,7 +15,8 @@
#endif
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
!defined(__native_client__) && \
- defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
+ defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219) && \
+ (defined(_M_IX86) || defined(_M_X64))
#include <immintrin.h> // For _xgetbv()
#endif
diff --git a/third_party/libyuv/source/row_win.cc b/third_party/libyuv/source/row_win.cc
index f58fc51..d79c353 100644
--- a/third_party/libyuv/source/row_win.cc
+++ b/third_party/libyuv/source/row_win.cc
@@ -21,7 +21,8 @@
#endif
// This module is for Visual C.
-#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER)
+#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
+ (defined(_M_IX86) || defined(_M_X64))
#define YG 74 /* (int8)(1.164 * 64 + 0.5) */
diff --git a/vp8/common/arm/neon/bilinearpredict_neon.c b/vp8/common/arm/neon/bilinearpredict_neon.c
index d77f2ba..9824a31 100644
--- a/vp8/common/arm/neon/bilinearpredict_neon.c
+++ b/vp8/common/arm/neon/bilinearpredict_neon.c
@@ -10,7 +10,7 @@
#include <arm_neon.h>
-static const uint16_t bifilter4_coeff[8][2] = {
+static const uint8_t bifilter4_coeff[8][2] = {
{128, 0},
{112, 16},
{ 96, 32},
@@ -64,8 +64,8 @@
q1u8 = vcombine_u8(d2u8, d3u8);
q2u8 = vcombine_u8(d4u8, d5u8);
- d0u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][0]);
- d1u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][1]);
+ d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
+ d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
q4u64 = vshrq_n_u64(vreinterpretq_u64_u8(q1u8), 8);
q5u64 = vshrq_n_u64(vreinterpretq_u64_u8(q2u8), 8);
@@ -155,8 +155,8 @@
q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
q5u8 = vld1q_u8(src_ptr);
- d0u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][0]);
- d1u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][1]);
+ d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
+ d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
@@ -245,8 +245,8 @@
q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
- d0u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][0]);
- d1u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][1]);
+ d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
+ d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
diff --git a/vp8/common/arm/variance_arm.c b/vp8/common/arm/variance_arm.c
index e3f7083..467a509 100644
--- a/vp8/common/arm/variance_arm.c
+++ b/vp8/common/arm/variance_arm.c
@@ -95,7 +95,7 @@
#endif /* HAVE_MEDIA */
-#if HAVE_NEON_ASM
+#if HAVE_NEON
extern unsigned int vp8_sub_pixel_variance16x16_neon_func
(
diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl
index 651809d..a90c876 100644
--- a/vp8/common/rtcd_defs.pl
+++ b/vp8/common/rtcd_defs.pl
@@ -530,13 +530,6 @@
}
#
-# Pick Loopfilter
-#
-add_proto qw/void vp8_yv12_copy_partial_frame/, "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc";
-specialize qw/vp8_yv12_copy_partial_frame neon_asm/;
-$vp8_yv12_copy_partial_frame_neon_asm=vp8_yv12_copy_partial_frame_neon;
-
-#
# Denoiser filter
#
if (vpx_config("CONFIG_TEMPORAL_DENOISING") eq "yes") {
@@ -544,7 +537,6 @@
specialize qw/vp8_denoiser_filter sse2 neon/;
add_proto qw/int vp8_denoiser_filter_uv/, "unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising";
specialize qw/vp8_denoiser_filter_uv sse2 neon/;
-
}
# End of encoder only functions
diff --git a/vp8/encoder/arm/neon/picklpf_arm.c b/vp8/encoder/arm/neon/picklpf_arm.c
deleted file mode 100644
index ec8071e..0000000
--- a/vp8/encoder/arm/neon/picklpf_arm.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "vp8/common/loopfilter.h"
-#include "vpx_scale/yv12config.h"
-
-extern void vp8_memcpy_partial_neon(unsigned char *dst_ptr,
- unsigned char *src_ptr,
- int sz);
-
-
-void vp8_yv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc,
- YV12_BUFFER_CONFIG *dst_ybc)
-{
- unsigned char *src_y, *dst_y;
- int yheight;
- int ystride;
- int yoffset;
- int linestocopy;
-
- yheight = src_ybc->y_height;
- ystride = src_ybc->y_stride;
-
- /* number of MB rows to use in partial filtering */
- linestocopy = (yheight >> 4) / PARTIAL_FRAME_FRACTION;
- linestocopy = linestocopy ? linestocopy << 4 : 16; /* 16 lines per MB */
-
- /* Copy extra 4 so that full filter context is available if filtering done
- * on the copied partial frame and not original. Partial filter does mb
- * filtering for top row also, which can modify3 pixels above.
- */
- linestocopy += 4;
- /* partial image starts at ~middle of frame (macroblock border) */
- yoffset = ystride * (((yheight >> 5) * 16) - 4);
- src_y = src_ybc->y_buffer + yoffset;
- dst_y = dst_ybc->y_buffer + yoffset;
-
- vp8_memcpy_partial_neon(dst_y, src_y, ystride * linestocopy);
-}
diff --git a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
deleted file mode 100644
index d219e2d..0000000
--- a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
+++ /dev/null
@@ -1,72 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_memcpy_partial_neon|
-
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-;=========================================
-;this is not a full memcpy function!!!
-;void vp8_memcpy_partial_neon(unsigned char *dst_ptr, unsigned char *src_ptr,
-; int sz);
-|vp8_memcpy_partial_neon| PROC
- vpush {d8-d15}
- ;pld [r1] ;preload pred data
- ;pld [r1, #128]
- ;pld [r1, #256]
- ;pld [r1, #384]
-
- mov r12, r2, lsr #8 ;copy 256 bytes data at one time
-
-memcpy_neon_loop
- vld1.8 {q0, q1}, [r1]! ;load src data
- subs r12, r12, #1
- vld1.8 {q2, q3}, [r1]!
- vst1.8 {q0, q1}, [r0]! ;copy to dst_ptr
- vld1.8 {q4, q5}, [r1]!
- vst1.8 {q2, q3}, [r0]!
- vld1.8 {q6, q7}, [r1]!
- vst1.8 {q4, q5}, [r0]!
- vld1.8 {q8, q9}, [r1]!
- vst1.8 {q6, q7}, [r0]!
- vld1.8 {q10, q11}, [r1]!
- vst1.8 {q8, q9}, [r0]!
- vld1.8 {q12, q13}, [r1]!
- vst1.8 {q10, q11}, [r0]!
- vld1.8 {q14, q15}, [r1]!
- vst1.8 {q12, q13}, [r0]!
- vst1.8 {q14, q15}, [r0]!
-
- ;pld [r1] ;preload pred data -- need to adjust for real device
- ;pld [r1, #128]
- ;pld [r1, #256]
- ;pld [r1, #384]
-
- bne memcpy_neon_loop
-
- ands r3, r2, #0xff ;extra copy
- beq done_copy_neon_loop
-
-extra_copy_neon_loop
- vld1.8 {q0}, [r1]! ;load src data
- subs r3, r3, #16
- vst1.8 {q0}, [r0]!
- bne extra_copy_neon_loop
-
-done_copy_neon_loop
- vpop {d8-d15}
- bx lr
- ENDP
-
- END
diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c
index 250d04c..f0c8f28 100644
--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c
@@ -23,8 +23,8 @@
extern int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest);
-void vp8_yv12_copy_partial_frame_c(YV12_BUFFER_CONFIG *src_ybc,
- YV12_BUFFER_CONFIG *dst_ybc)
+static void yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc,
+ YV12_BUFFER_CONFIG *dst_ybc)
{
unsigned char *src_y, *dst_y;
int yheight;
@@ -173,7 +173,7 @@
/* Get the err using the previous frame's filter value. */
/* Copy the unfiltered / processed recon buffer to the new buffer */
- vp8_yv12_copy_partial_frame(saved_frame, cm->frame_to_show);
+ yv12_copy_partial_frame(saved_frame, cm->frame_to_show);
vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
best_err = calc_partial_ssl_err(sd, cm->frame_to_show);
@@ -184,7 +184,7 @@
while (filt_val >= min_filter_level)
{
/* Apply the loop filter */
- vp8_yv12_copy_partial_frame(saved_frame, cm->frame_to_show);
+ yv12_copy_partial_frame(saved_frame, cm->frame_to_show);
vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
/* Get the err for filtered frame */
@@ -214,7 +214,7 @@
while (filt_val < max_filter_level)
{
/* Apply the loop filter */
- vp8_yv12_copy_partial_frame(saved_frame, cm->frame_to_show);
+ yv12_copy_partial_frame(saved_frame, cm->frame_to_show);
vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index fc8c407..b1b079c 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -635,7 +635,6 @@
}
ctx->priv = (vpx_codec_priv_t *)priv;
- ctx->priv->sz = sizeof(*priv);
ctx->priv->init_flags = ctx->init_flags;
if (ctx->config.enc)
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index 6d9ecc0..3ab8ed0 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -84,7 +84,6 @@
(vpx_codec_alg_priv_t *)vpx_calloc(1, sizeof(*priv));
ctx->priv = (vpx_codec_priv_t *)priv;
- ctx->priv->sz = sizeof(*priv);
ctx->priv->init_flags = ctx->init_flags;
priv->si.sz = sizeof(priv->si);
diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk
index eb94202..551271e 100644
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@@ -36,9 +36,7 @@
#File list for neon
# encoder
VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/fastquantizeb_neon$(ASM)
-VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/picklpf_arm.c
VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/vp8_mse16x16_neon$(ASM)
-VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/vp8_memcpy_neon$(ASM)
VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/denoising_neon.c
VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp8_shortwalsh4x4_neon.c
diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c
index 403e105..471929a 100644
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@@ -9,11 +9,9 @@
*/
#include "./vpx_config.h"
+#include "./vp9_rtcd.h"
#include "vpx_mem/vpx_mem.h"
-#include "vpx_ports/vpx_once.h"
-
-#include "./vp9_rtcd.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_onyxc_int.h"
@@ -292,32 +290,32 @@
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left);
-static intra_pred_fn pred[INTRA_MODES][4];
-static intra_pred_fn dc_pred[2][2][4];
+static intra_pred_fn pred[INTRA_MODES][TX_SIZES];
+static intra_pred_fn dc_pred[2][2][TX_SIZES];
-static void init_intra_pred_fn_ptrs(void) {
-#define intra_pred_allsizes(l, type) \
- l[0] = vp9_##type##_predictor_4x4; \
- l[1] = vp9_##type##_predictor_8x8; \
- l[2] = vp9_##type##_predictor_16x16; \
- l[3] = vp9_##type##_predictor_32x32
+void vp9_init_intra_predictors() {
+#define INIT_ALL_SIZES(p, type) \
+ p[TX_4X4] = vp9_##type##_predictor_4x4; \
+ p[TX_8X8] = vp9_##type##_predictor_8x8; \
+ p[TX_16X16] = vp9_##type##_predictor_16x16; \
+ p[TX_32X32] = vp9_##type##_predictor_32x32
- intra_pred_allsizes(pred[V_PRED], v);
- intra_pred_allsizes(pred[H_PRED], h);
- intra_pred_allsizes(pred[D207_PRED], d207);
- intra_pred_allsizes(pred[D45_PRED], d45);
- intra_pred_allsizes(pred[D63_PRED], d63);
- intra_pred_allsizes(pred[D117_PRED], d117);
- intra_pred_allsizes(pred[D135_PRED], d135);
- intra_pred_allsizes(pred[D153_PRED], d153);
- intra_pred_allsizes(pred[TM_PRED], tm);
+ INIT_ALL_SIZES(pred[V_PRED], v);
+ INIT_ALL_SIZES(pred[H_PRED], h);
+ INIT_ALL_SIZES(pred[D207_PRED], d207);
+ INIT_ALL_SIZES(pred[D45_PRED], d45);
+ INIT_ALL_SIZES(pred[D63_PRED], d63);
+ INIT_ALL_SIZES(pred[D117_PRED], d117);
+ INIT_ALL_SIZES(pred[D135_PRED], d135);
+ INIT_ALL_SIZES(pred[D153_PRED], d153);
+ INIT_ALL_SIZES(pred[TM_PRED], tm);
- intra_pred_allsizes(dc_pred[0][0], dc_128);
- intra_pred_allsizes(dc_pred[0][1], dc_top);
- intra_pred_allsizes(dc_pred[1][0], dc_left);
- intra_pred_allsizes(dc_pred[1][1], dc);
+ INIT_ALL_SIZES(dc_pred[0][0], dc_128);
+ INIT_ALL_SIZES(dc_pred[0][1], dc_top);
+ INIT_ALL_SIZES(dc_pred[1][0], dc_left);
+ INIT_ALL_SIZES(dc_pred[1][1], dc);
-#undef intra_pred_allsizes
+#undef INIT_ALL_SIZES
}
static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
@@ -343,8 +341,6 @@
// 129 G H .. S T T T T T
// ..
- once(init_intra_pred_fn_ptrs);
-
// Get current frame pointer, width and height.
if (plane == 0) {
frame_width = xd->cur_buf->y_width;
diff --git a/vp9/common/vp9_reconintra.h b/vp9/common/vp9_reconintra.h
index d09d2a1..845f3bc 100644
--- a/vp9/common/vp9_reconintra.h
+++ b/vp9/common/vp9_reconintra.h
@@ -18,6 +18,8 @@
extern "C" {
#endif
+void vp9_init_intra_predictors();
+
void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in,
TX_SIZE tx_size, PREDICTION_MODE mode,
const uint8_t *ref, int ref_stride,
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index f2a3eef..667e057 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -268,7 +268,7 @@
#
if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
add_proto qw/void vp9_mbpost_proc_down/, "uint8_t *dst, int pitch, int rows, int cols, int flimit";
-specialize qw/vp9_mbpost_proc_down mmx sse2/;
+specialize qw/vp9_mbpost_proc_down sse2/;
$vp9_mbpost_proc_down_sse2=vp9_mbpost_proc_down_xmm;
add_proto qw/void vp9_mbpost_proc_across_ip/, "uint8_t *src, int pitch, int rows, int cols, int flimit";
@@ -276,23 +276,14 @@
$vp9_mbpost_proc_across_ip_sse2=vp9_mbpost_proc_across_ip_xmm;
add_proto qw/void vp9_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
-specialize qw/vp9_post_proc_down_and_across mmx sse2/;
+specialize qw/vp9_post_proc_down_and_across sse2/;
$vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm;
add_proto qw/void vp9_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
-specialize qw/vp9_plane_add_noise mmx sse2/;
+specialize qw/vp9_plane_add_noise sse2/;
$vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt;
}
-add_proto qw/void vp9_blend_mb_inner/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride";
-specialize qw/vp9_blend_mb_inner/;
-
-add_proto qw/void vp9_blend_mb_outer/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride";
-specialize qw/vp9_blend_mb_outer/;
-
-add_proto qw/void vp9_blend_b/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride";
-specialize qw/vp9_blend_b/;
-
#
# Sub Pixel Filters
#
@@ -693,19 +684,19 @@
specialize qw/vp9_sad4x4x4d sse/;
add_proto qw/unsigned int vp9_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
-specialize qw/vp9_mse16x16 sse2 avx2/;
+specialize qw/vp9_mse16x16 avx2/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
-specialize qw/vp9_mse8x16 sse2/;
+specialize qw/vp9_mse8x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
-specialize qw/vp9_mse16x8 sse2/;
+specialize qw/vp9_mse16x8/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
-specialize qw/vp9_mse8x8 sse2/;
+specialize qw/vp9_mse8x8/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *";
-specialize qw/vp9_get_mb_ss sse2/;
+specialize qw/vp9_get_mb_ss/, "$sse2_x86inc";
# ENCODEMB INVOKE
add_proto qw/int64_t vp9_block_error/, "const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size, int64_t *ssz";
diff --git a/vp9/common/x86/vp9_postproc_mmx.asm b/vp9/common/x86/vp9_postproc_mmx.asm
deleted file mode 100644
index 5b8deef..0000000
--- a/vp9/common/x86/vp9_postproc_mmx.asm
+++ /dev/null
@@ -1,533 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-%define VP9_FILTER_WEIGHT 128
-%define VP9_FILTER_SHIFT 7
-
-;void vp9_post_proc_down_and_across_mmx
-;(
-; unsigned char *src_ptr,
-; unsigned char *dst_ptr,
-; int src_pixels_per_line,
-; int dst_pixels_per_line,
-; int rows,
-; int cols,
-; int flimit
-;)
-global sym(vp9_post_proc_down_and_across_mmx) PRIVATE
-sym(vp9_post_proc_down_and_across_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
-%if ABI_IS_32BIT=1 && CONFIG_PIC=1
- ; move the global rd onto the stack, since we don't have enough registers
- ; to do PIC addressing
- movq mm0, [GLOBAL(rd)]
- sub rsp, 8
- movq [rsp], mm0
-%define RD [rsp]
-%else
-%define RD [GLOBAL(rd)]
-%endif
-
- push rbx
- lea rbx, [GLOBAL(Blur)]
- movd mm2, dword ptr arg(6) ;flimit
- punpcklwd mm2, mm2
- punpckldq mm2, mm2
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(1) ;dst_ptr
-
- movsxd rcx, DWORD PTR arg(4) ;rows
- movsxd rax, DWORD PTR arg(2) ;src_pixels_per_line ; destination pitch?
- pxor mm0, mm0 ; mm0 = 00000000
-
-.nextrow:
-
- xor rdx, rdx ; clear out rdx for use as loop counter
-.nextcol:
-
- pxor mm7, mm7 ; mm7 = 00000000
- movq mm6, [rbx + 32 ] ; mm6 = kernel 2 taps
- movq mm3, [rsi] ; mm4 = r0 p0..p7
- punpcklbw mm3, mm0 ; mm3 = p0..p3
- movq mm1, mm3 ; mm1 = p0..p3
- pmullw mm3, mm6 ; mm3 *= kernel 2 modifiers
-
- movq mm6, [rbx + 48] ; mm6 = kernel 3 taps
- movq mm5, [rsi + rax] ; mm4 = r1 p0..p7
- punpcklbw mm5, mm0 ; mm5 = r1 p0..p3
- pmullw mm6, mm5 ; mm6 *= p0..p3 * kernel 3 modifiers
- paddusw mm3, mm6 ; mm3 += mm6
-
- ; thresholding
- movq mm7, mm1 ; mm7 = r0 p0..p3
- psubusw mm7, mm5 ; mm7 = r0 p0..p3 - r1 p0..p3
- psubusw mm5, mm1 ; mm5 = r1 p0..p3 - r0 p0..p3
- paddusw mm7, mm5 ; mm7 = abs(r0 p0..p3 - r1 p0..p3)
- pcmpgtw mm7, mm2
-
- movq mm6, [rbx + 64 ] ; mm6 = kernel 4 modifiers
- movq mm5, [rsi + 2*rax] ; mm4 = r2 p0..p7
- punpcklbw mm5, mm0 ; mm5 = r2 p0..p3
- pmullw mm6, mm5 ; mm5 *= kernel 4 modifiers
- paddusw mm3, mm6 ; mm3 += mm5
-
- ; thresholding
- movq mm6, mm1 ; mm6 = r0 p0..p3
- psubusw mm6, mm5 ; mm6 = r0 p0..p3 - r2 p0..p3
- psubusw mm5, mm1 ; mm5 = r2 p0..p3 - r2 p0..p3
- paddusw mm6, mm5 ; mm6 = abs(r0 p0..p3 - r2 p0..p3)
- pcmpgtw mm6, mm2
- por mm7, mm6 ; accumulate thresholds
-
-
- neg rax
- movq mm6, [rbx ] ; kernel 0 taps
- movq mm5, [rsi+2*rax] ; mm4 = r-2 p0..p7
- punpcklbw mm5, mm0 ; mm5 = r-2 p0..p3
- pmullw mm6, mm5 ; mm5 *= kernel 0 modifiers
- paddusw mm3, mm6 ; mm3 += mm5
-
- ; thresholding
- movq mm6, mm1 ; mm6 = r0 p0..p3
- psubusw mm6, mm5 ; mm6 = p0..p3 - r-2 p0..p3
- psubusw mm5, mm1 ; mm5 = r-2 p0..p3 - p0..p3
- paddusw mm6, mm5 ; mm6 = abs(r0 p0..p3 - r-2 p0..p3)
- pcmpgtw mm6, mm2
- por mm7, mm6 ; accumulate thresholds
-
- movq mm6, [rbx + 16] ; kernel 1 taps
- movq mm4, [rsi+rax] ; mm4 = r-1 p0..p7
- punpcklbw mm4, mm0 ; mm4 = r-1 p0..p3
- pmullw mm6, mm4 ; mm4 *= kernel 1 modifiers.
- paddusw mm3, mm6 ; mm3 += mm5
-
- ; thresholding
- movq mm6, mm1 ; mm6 = r0 p0..p3
- psubusw mm6, mm4 ; mm6 = p0..p3 - r-2 p0..p3
- psubusw mm4, mm1 ; mm5 = r-1 p0..p3 - p0..p3
- paddusw mm6, mm4 ; mm6 = abs(r0 p0..p3 - r-1 p0..p3)
- pcmpgtw mm6, mm2
- por mm7, mm6 ; accumulate thresholds
-
-
- paddusw mm3, RD ; mm3 += round value
- psraw mm3, VP9_FILTER_SHIFT ; mm3 /= 128
-
- pand mm1, mm7 ; mm1 select vals > thresh from source
- pandn mm7, mm3 ; mm7 select vals < thresh from blurred result
- paddusw mm1, mm7 ; combination
-
- packuswb mm1, mm0 ; pack to bytes
-
- movd [rdi], mm1 ;
- neg rax ; pitch is positive
-
-
- add rsi, 4
- add rdi, 4
- add rdx, 4
-
- cmp edx, dword ptr arg(5) ;cols
- jl .nextcol
- ; done with the all cols, start the across filtering in place
- sub rsi, rdx
- sub rdi, rdx
-
-
- push rax
- xor rdx, rdx
- mov rax, [rdi-4];
-
-.acrossnextcol:
- pxor mm7, mm7 ; mm7 = 00000000
- movq mm6, [rbx + 32 ] ;
- movq mm4, [rdi+rdx] ; mm4 = p0..p7
- movq mm3, mm4 ; mm3 = p0..p7
- punpcklbw mm3, mm0 ; mm3 = p0..p3
- movq mm1, mm3 ; mm1 = p0..p3
- pmullw mm3, mm6 ; mm3 *= kernel 2 modifiers
-
- movq mm6, [rbx + 48]
- psrlq mm4, 8 ; mm4 = p1..p7
- movq mm5, mm4 ; mm5 = p1..p7
- punpcklbw mm5, mm0 ; mm5 = p1..p4
- pmullw mm6, mm5 ; mm6 *= p1..p4 * kernel 3 modifiers
- paddusw mm3, mm6 ; mm3 += mm6
-
- ; thresholding
- movq mm7, mm1 ; mm7 = p0..p3
- psubusw mm7, mm5 ; mm7 = p0..p3 - p1..p4
- psubusw mm5, mm1 ; mm5 = p1..p4 - p0..p3
- paddusw mm7, mm5 ; mm7 = abs(p0..p3 - p1..p4)
- pcmpgtw mm7, mm2
-
- movq mm6, [rbx + 64 ]
- psrlq mm4, 8 ; mm4 = p2..p7
- movq mm5, mm4 ; mm5 = p2..p7
- punpcklbw mm5, mm0 ; mm5 = p2..p5
- pmullw mm6, mm5 ; mm5 *= kernel 4 modifiers
- paddusw mm3, mm6 ; mm3 += mm5
-
- ; thresholding
- movq mm6, mm1 ; mm6 = p0..p3
- psubusw mm6, mm5 ; mm6 = p0..p3 - p1..p4
- psubusw mm5, mm1 ; mm5 = p1..p4 - p0..p3
- paddusw mm6, mm5 ; mm6 = abs(p0..p3 - p1..p4)
- pcmpgtw mm6, mm2
- por mm7, mm6 ; accumulate thresholds
-
-
- movq mm6, [rbx ]
- movq mm4, [rdi+rdx-2] ; mm4 = p-2..p5
- movq mm5, mm4 ; mm5 = p-2..p5
- punpcklbw mm5, mm0 ; mm5 = p-2..p1
- pmullw mm6, mm5 ; mm5 *= kernel 0 modifiers
- paddusw mm3, mm6 ; mm3 += mm5
-
- ; thresholding
- movq mm6, mm1 ; mm6 = p0..p3
- psubusw mm6, mm5 ; mm6 = p0..p3 - p1..p4
- psubusw mm5, mm1 ; mm5 = p1..p4 - p0..p3
- paddusw mm6, mm5 ; mm6 = abs(p0..p3 - p1..p4)
- pcmpgtw mm6, mm2
- por mm7, mm6 ; accumulate thresholds
-
- movq mm6, [rbx + 16]
- psrlq mm4, 8 ; mm4 = p-1..p5
- punpcklbw mm4, mm0 ; mm4 = p-1..p2
- pmullw mm6, mm4 ; mm4 *= kernel 1 modifiers.
- paddusw mm3, mm6 ; mm3 += mm5
-
- ; thresholding
- movq mm6, mm1 ; mm6 = p0..p3
- psubusw mm6, mm4 ; mm6 = p0..p3 - p1..p4
- psubusw mm4, mm1 ; mm5 = p1..p4 - p0..p3
- paddusw mm6, mm4 ; mm6 = abs(p0..p3 - p1..p4)
- pcmpgtw mm6, mm2
- por mm7, mm6 ; accumulate thresholds
-
- paddusw mm3, RD ; mm3 += round value
- psraw mm3, VP9_FILTER_SHIFT ; mm3 /= 128
-
- pand mm1, mm7 ; mm1 select vals > thresh from source
- pandn mm7, mm3 ; mm7 select vals < thresh from blurred result
- paddusw mm1, mm7 ; combination
-
- packuswb mm1, mm0 ; pack to bytes
- mov DWORD PTR [rdi+rdx-4], eax ; store previous four bytes
- movd eax, mm1
-
- add rdx, 4
- cmp edx, dword ptr arg(5) ;cols
- jl .acrossnextcol;
-
- mov DWORD PTR [rdi+rdx-4], eax
- pop rax
-
- ; done with this rwo
- add rsi,rax ; next line
- movsxd rax, dword ptr arg(3) ;dst_pixels_per_line ; destination pitch?
- add rdi,rax ; next destination
- movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; destination pitch?
-
- dec rcx ; decrement count
- jnz .nextrow ; next row
- pop rbx
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-%undef RD
-
-
-;void vp9_mbpost_proc_down_mmx(unsigned char *dst,
-; int pitch, int rows, int cols,int flimit)
-extern sym(vp9_rv)
-global sym(vp9_mbpost_proc_down_mmx) PRIVATE
-sym(vp9_mbpost_proc_down_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 136
-
- ; unsigned char d[16][8] at [rsp]
- ; create flimit2 at [rsp+128]
- mov eax, dword ptr arg(4) ;flimit
- mov [rsp+128], eax
- mov [rsp+128+4], eax
-%define flimit2 [rsp+128]
-
-%if ABI_IS_32BIT=0
- lea r8, [GLOBAL(sym(vp9_rv))]
-%endif
-
- ;rows +=8;
- add dword ptr arg(2), 8
-
- ;for(c=0; c<cols; c+=4)
-.loop_col:
- mov rsi, arg(0) ;s
- pxor mm0, mm0 ;
-
- movsxd rax, dword ptr arg(1) ;pitch ;
- neg rax ; rax = -pitch
-
- lea rsi, [rsi + rax*8]; ; rdi = s[-pitch*8]
- neg rax
-
-
- pxor mm5, mm5
- pxor mm6, mm6 ;
-
- pxor mm7, mm7 ;
- mov rdi, rsi
-
- mov rcx, 15 ;
-
-.loop_initvar:
- movd mm1, DWORD PTR [rdi];
- punpcklbw mm1, mm0 ;
-
- paddw mm5, mm1 ;
- pmullw mm1, mm1 ;
-
- movq mm2, mm1 ;
- punpcklwd mm1, mm0 ;
-
- punpckhwd mm2, mm0 ;
- paddd mm6, mm1 ;
-
- paddd mm7, mm2 ;
- lea rdi, [rdi+rax] ;
-
- dec rcx
- jne .loop_initvar
- ;save the var and sum
- xor rdx, rdx
-.loop_row:
- movd mm1, DWORD PTR [rsi] ; [s-pitch*8]
- movd mm2, DWORD PTR [rdi] ; [s+pitch*7]
-
- punpcklbw mm1, mm0
- punpcklbw mm2, mm0
-
- paddw mm5, mm2
- psubw mm5, mm1
-
- pmullw mm2, mm2
- movq mm4, mm2
-
- punpcklwd mm2, mm0
- punpckhwd mm4, mm0
-
- paddd mm6, mm2
- paddd mm7, mm4
-
- pmullw mm1, mm1
- movq mm2, mm1
-
- punpcklwd mm1, mm0
- psubd mm6, mm1
-
- punpckhwd mm2, mm0
- psubd mm7, mm2
-
-
- movq mm3, mm6
- pslld mm3, 4
-
- psubd mm3, mm6
- movq mm1, mm5
-
- movq mm4, mm5
- pmullw mm1, mm1
-
- pmulhw mm4, mm4
- movq mm2, mm1
-
- punpcklwd mm1, mm4
- punpckhwd mm2, mm4
-
- movq mm4, mm7
- pslld mm4, 4
-
- psubd mm4, mm7
-
- psubd mm3, mm1
- psubd mm4, mm2
-
- psubd mm3, flimit2
- psubd mm4, flimit2
-
- psrad mm3, 31
- psrad mm4, 31
-
- packssdw mm3, mm4
- packsswb mm3, mm0
-
- movd mm1, DWORD PTR [rsi+rax*8]
-
- movq mm2, mm1
- punpcklbw mm1, mm0
-
- paddw mm1, mm5
- mov rcx, rdx
-
- and rcx, 127
-%if ABI_IS_32BIT=1 && CONFIG_PIC=1
- push rax
- lea rax, [GLOBAL(sym(vp9_rv))]
- movq mm4, [rax + rcx*2] ;vp9_rv[rcx*2]
- pop rax
-%elif ABI_IS_32BIT=0
- movq mm4, [r8 + rcx*2] ;vp9_rv[rcx*2]
-%else
- movq mm4, [sym(vp9_rv) + rcx*2]
-%endif
- paddw mm1, mm4
- ;paddw xmm1, eight8s
- psraw mm1, 4
-
- packuswb mm1, mm0
- pand mm1, mm3
-
- pandn mm3, mm2
- por mm1, mm3
-
- and rcx, 15
- movd DWORD PTR [rsp+rcx*4], mm1 ;d[rcx*4]
-
- mov rcx, rdx
- sub rcx, 8
-
- and rcx, 15
- movd mm1, DWORD PTR [rsp+rcx*4] ;d[rcx*4]
-
- movd [rsi], mm1
- lea rsi, [rsi+rax]
-
- lea rdi, [rdi+rax]
- add rdx, 1
-
- cmp edx, dword arg(2) ;rows
- jl .loop_row
-
-
- add dword arg(0), 4 ; s += 4
- sub dword arg(3), 4 ; cols -= 4
- cmp dword arg(3), 0
- jg .loop_col
-
- add rsp, 136
- pop rsp
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-%undef flimit2
-
-
-;void vp9_plane_add_noise_mmx (unsigned char *start, unsigned char *noise,
-; unsigned char blackclamp[16],
-; unsigned char whiteclamp[16],
-; unsigned char bothclamp[16],
-; unsigned int width, unsigned int height, int pitch)
-global sym(vp9_plane_add_noise_mmx) PRIVATE
-sym(vp9_plane_add_noise_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 8
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
-.addnoise_loop:
- call sym(LIBVPX_RAND) WRT_PLT
- mov rcx, arg(1) ;noise
- and rax, 0xff
- add rcx, rax
-
- ; we rely on the fact that the clamping vectors are stored contiguously
- ; in black/white/both order. Note that we have to reload this here because
- ; rdx could be trashed by rand()
- mov rdx, arg(2) ; blackclamp
-
-
- mov rdi, rcx
- movsxd rcx, dword arg(5) ;[Width]
- mov rsi, arg(0) ;Pos
- xor rax,rax
-
-.addnoise_nextset:
- movq mm1,[rsi+rax] ; get the source
-
- psubusb mm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
- paddusb mm1, [rdx+32] ;bothclamp
- psubusb mm1, [rdx+16] ;whiteclamp
-
- movq mm2,[rdi+rax] ; get the noise for this line
- paddb mm1,mm2 ; add it in
- movq [rsi+rax],mm1 ; store the result
-
- add rax,8 ; move to the next line
-
- cmp rax, rcx
- jl .addnoise_nextset
-
- movsxd rax, dword arg(7) ; Pitch
- add arg(0), rax ; Start += Pitch
- sub dword arg(6), 1 ; Height -= 1
- jg .addnoise_loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-SECTION_RODATA
-align 16
-Blur:
- times 16 dw 16
- times 8 dw 64
- times 16 dw 16
- times 8 dw 0
-
-rd:
- times 4 dw 0x40
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 372dc83..a9c03f0 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -330,6 +330,9 @@
if (!vp9_is_valid_scale(&ref_buffer->sf))
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
"Invalid scale factors");
+ if (ref_buffer->buf->corrupted)
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+ "Block reference is corrupt");
vp9_setup_pre_planes(xd, idx, ref_buffer->buf, mi_row, mi_col,
&ref_buffer->sf);
xd->corrupted |= ref_buffer->buf->corrupted;
@@ -628,11 +631,13 @@
#endif
if (cm->width != width || cm->height != height) {
const int new_mi_rows =
- calc_mi_size(ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2) >> MI_SIZE_LOG2);
+ ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2) >> MI_SIZE_LOG2;
const int new_mi_cols =
- calc_mi_size(ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2) >> MI_SIZE_LOG2);
- if (new_mi_cols > cm->mi_stride ||
- (new_mi_rows * new_mi_cols > cm->mi_alloc_size)) {
+ ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2) >> MI_SIZE_LOG2;
+
+ // Allocations in vp9_alloc_context_buffers() depend on individual
+ // dimensions as well as the overall size.
+ if (new_mi_cols > cm->mi_cols || new_mi_rows > cm->mi_rows) {
if (vp9_alloc_context_buffers(cm, width, height))
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate context buffers");
@@ -675,6 +680,10 @@
YV12_BUFFER_CONFIG *const buf = cm->frame_refs[i].buf;
width = buf->y_crop_width;
height = buf->y_crop_height;
+ if (buf->corrupted) {
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+ "Frame reference is corrupt");
+ }
found = 1;
break;
}
@@ -892,7 +901,7 @@
pbi->mb.corrupted |= tile_data->xd.corrupted;
}
// Loopfilter one row.
- if (cm->lf.filter_level) {
+ if (cm->lf.filter_level && !pbi->mb.corrupted) {
const int lf_start = mi_row - MI_BLOCK_SIZE;
LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
@@ -915,7 +924,7 @@
}
// Loopfilter remaining rows in the frame.
- if (cm->lf.filter_level) {
+ if (cm->lf.filter_level && !pbi->mb.corrupted) {
LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
winterface->sync(&pbi->lf_worker);
lf_data->start = lf_data->stop;
@@ -1442,9 +1451,11 @@
if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1 &&
cm->frame_parallel_decoding_mode) {
*p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end);
- // If multiple threads are used to decode tiles, then we use those threads
- // to do parallel loopfiltering.
- vp9_loop_filter_frame_mt(new_fb, pbi, cm, cm->lf.filter_level, 0);
+ if (!xd->corrupted) {
+ // If multiple threads are used to decode tiles, then we use those threads
+ // to do parallel loopfiltering.
+ vp9_loop_filter_frame_mt(new_fb, pbi, cm, cm->lf.filter_level, 0);
+ }
} else {
*p_data_end = decode_tiles(pbi, data + first_partition_size, data_end);
}
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index f461af5..9106b0d 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -25,6 +25,7 @@
#include "vp9/common/vp9_postproc.h"
#endif
#include "vp9/common/vp9_quant_common.h"
+#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/decoder/vp9_decodeframe.h"
@@ -36,7 +37,9 @@
static int init_done = 0;
if (!init_done) {
+ vp9_rtcd();
vp9_init_neighbors();
+ vp9_init_intra_predictors();
init_done = 1;
}
}
@@ -59,8 +62,6 @@
cm->error.setjmp = 1;
initialize_dec();
- vp9_rtcd();
-
// Initialize the references to not point to any frame buffers.
vpx_memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index aef20f2..b726383 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -112,7 +112,7 @@
int quant_fp;
// skip forward transform and quantization
- int skip_txfm[MAX_MB_PLANE << 2];
+ uint8_t skip_txfm[MAX_MB_PLANE << 2];
int64_t bsse[MAX_MB_PLANE << 2];
diff --git a/vp9/encoder/vp9_context_tree.h b/vp9/encoder/vp9_context_tree.h
index fccdaf5..236389b 100644
--- a/vp9/encoder/vp9_context_tree.h
+++ b/vp9/encoder/vp9_context_tree.h
@@ -36,7 +36,7 @@
// For current partition, only if all Y, U, and V transform blocks'
// coefficients are quantized to 0, skippable is set to 0.
int skippable;
- int skip_txfm[MAX_MB_PLANE << 2];
+ uint8_t skip_txfm[MAX_MB_PLANE << 2];
int best_mode_index;
int hybrid_pred_diff;
int comp_pred_diff;
diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c
index e047f7e..c4cf5ee 100644
--- a/vp9/encoder/vp9_denoiser.c
+++ b/vp9/encoder/vp9_denoiser.c
@@ -78,7 +78,8 @@
int mc_avg_stride,
uint8_t *avg, int avg_stride,
int increase_denoising,
- BLOCK_SIZE bs) {
+ BLOCK_SIZE bs,
+ int motion_magnitude) {
int r, c;
const uint8_t *sig_start = sig;
const uint8_t *mc_avg_start = mc_avg;
@@ -86,6 +87,19 @@
int diff, adj, absdiff, delta;
int adj_val[] = {3, 4, 6};
int total_adj = 0;
+ int shift_inc = 1;
+
+ /* If motion_magnitude is small, making the denoiser more aggressive by
+ * increasing the adjustment for each level. Add another increment for
+ * blocks that are labeled for increase denoising. */
+ if (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) {
+ if (increase_denoising) {
+ shift_inc = 2;
+ }
+ adj_val[0] += shift_inc;
+ adj_val[1] += shift_inc;
+ adj_val[2] += shift_inc;
+ }
// First attempt to apply a strong temporal denoising filter.
for (r = 0; r < heights[bs]; ++r) {
@@ -130,7 +144,8 @@
// Otherwise, we try to dampen the filter if the delta is not too high.
delta = ((abs(total_adj) - total_adj_strong_thresh(bs, increase_denoising))
>> 8) + 1;
- if (delta > delta_thresh(bs, increase_denoising)) {
+
+ if (delta >= delta_thresh(bs, increase_denoising)) {
return COPY_BLOCK;
}
@@ -191,7 +206,8 @@
int increase_denoising,
int mi_row,
int mi_col,
- PICK_MODE_CONTEXT *ctx
+ PICK_MODE_CONTEXT *ctx,
+ int *motion_magnitude
) {
int mv_col, mv_row;
int sse_diff = ctx->zeromv_sse - ctx->newmv_sse;
@@ -216,6 +232,8 @@
mv_col = ctx->best_sse_mv.as_mv.col;
mv_row = ctx->best_sse_mv.as_mv.row;
+ *motion_magnitude = mv_row * mv_row + mv_col * mv_col;
+
frame = ctx->best_reference_frame;
// If the best reference frame uses inter-prediction and there is enough of a
@@ -303,6 +321,7 @@
void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
int mi_row, int mi_col, BLOCK_SIZE bs,
PICK_MODE_CONTEXT *ctx) {
+ int motion_magnitude = 0;
VP9_DENOISER_DECISION decision = FILTER_BLOCK;
YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME];
YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y;
@@ -313,13 +332,14 @@
decision = perform_motion_compensation(denoiser, mb, bs,
denoiser->increase_denoising,
- mi_row, mi_col, ctx);
+ mi_row, mi_col, ctx,
+ &motion_magnitude);
if (decision == FILTER_BLOCK) {
decision = denoiser_filter(src.buf, src.stride,
mc_avg_start, mc_avg.y_stride,
avg_start, avg.y_stride,
- 0, bs);
+ 0, bs, motion_magnitude);
}
if (decision == FILTER_BLOCK) {
diff --git a/vp9/encoder/vp9_denoiser.h b/vp9/encoder/vp9_denoiser.h
index 1c827b6..a913add 100644
--- a/vp9/encoder/vp9_denoiser.h
+++ b/vp9/encoder/vp9_denoiser.h
@@ -18,6 +18,8 @@
extern "C" {
#endif
+#define MOTION_MAGNITUDE_THRESHOLD (8*3)
+
typedef enum vp9_denoiser_decision {
COPY_BLOCK,
FILTER_BLOCK
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 0026ce8..2ca91b9 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -24,6 +24,7 @@
#include "vp9/common/vp9_postproc.h"
#endif
#include "vp9/common/vp9_reconinter.h"
+#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/common/vp9_tile_common.h"
@@ -142,7 +143,9 @@
static int init_done = 0;
if (!init_done) {
+ vp9_rtcd();
vp9_init_neighbors();
+ vp9_init_intra_predictors();
vp9_coef_tree_initialize();
vp9_tokenize_initialize();
vp9_init_me_luts();
@@ -422,21 +425,6 @@
}
}
-
-static void set_speed_features(VP9_COMP *cpi) {
-#if CONFIG_INTERNAL_STATS
- int i;
- for (i = 0; i < MAX_MODES; ++i)
- cpi->mode_chosen_counts[i] = 0;
-#endif
-
- vp9_set_speed_features(cpi);
-
- // Set rd thresholds based on mode and speed setting
- vp9_set_rd_speed_thresholds(cpi);
- vp9_set_rd_speed_thresholds_sub8x8(cpi);
-}
-
static void alloc_raw_frame_buffers(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
const VP9EncoderConfig *oxcf = &cpi->oxcf;
@@ -768,8 +756,6 @@
cm->error.setjmp = 1;
- vp9_rtcd();
-
cpi->use_svc = 0;
init_config(cpi, oxcf);
@@ -985,7 +971,7 @@
}
}
- set_speed_features(cpi);
+ vp9_set_speed_features(cpi);
// Allocate memory to store variances for a frame.
CHECK_MEM_ERROR(cm, cpi->source_diff_var,
@@ -2246,7 +2232,16 @@
}
#endif
- set_speed_features(cpi);
+#if CONFIG_INTERNAL_STATS
+ int i;
+ for (i = 0; i < MAX_MODES; ++i)
+ cpi->mode_chosen_counts[i] = 0;
+#endif
+
+ vp9_set_speed_features(cpi);
+
+ vp9_set_rd_speed_thresholds(cpi);
+ vp9_set_rd_speed_thresholds_sub8x8(cpi);
// Decide q and q bounds.
q = vp9_rc_pick_q_and_bounds(cpi, &bottom_index, &top_index);
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 9651cee..eee6ffe 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -423,7 +423,7 @@
VP9_ALT_FLAG };
int64_t best_rd = INT64_MAX;
int64_t this_rd = INT64_MAX;
- int skip_txfm = 0;
+ uint8_t skip_txfm = 0;
int rate = INT_MAX;
int64_t dist = INT64_MAX;
// var_y and sse_y are saved to be used in skipping checking
diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c
index b9e4408..b826ff4 100644
--- a/vp9/encoder/vp9_rd.c
+++ b/vp9/encoder/vp9_rd.c
@@ -374,7 +374,6 @@
uint8_t *ref_y_ptr;
const int num_mv_refs = MAX_MV_REF_CANDIDATES +
(cpi->sf.adaptive_motion_search &&
- cpi->common.show_frame &&
block_size < cpi->sf.max_partition_size);
MV pred_mv[3];
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 9af6a0c..506c9bc 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1398,7 +1398,7 @@
mvp_full.row = bsi->mvp.as_mv.row >> 3;
mvp_full.col = bsi->mvp.as_mv.col >> 3;
- if (cpi->sf.adaptive_motion_search && cm->show_frame) {
+ if (cpi->sf.adaptive_motion_search) {
mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].row >> 3;
mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].col >> 3;
step_param = MAX(step_param, 8);
@@ -1815,8 +1815,7 @@
step_param = cpi->mv_step_param;
}
- if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 &&
- cm->show_frame) {
+ if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) {
int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize),
b_width_log2(bsize)));
step_param = MAX(step_param, boffset);
@@ -1876,7 +1875,7 @@
*rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
- if (cpi->sf.adaptive_motion_search && cm->show_frame)
+ if (cpi->sf.adaptive_motion_search)
x->pred_mv[ref] = tmp_mv->as_mv;
if (scaled_ref_frame) {
@@ -2130,6 +2129,8 @@
int_mv (*mode_mv)[MAX_REF_FRAMES],
int mi_row, int mi_col,
int_mv single_newmv[MAX_REF_FRAMES],
+ INTERP_FILTER (*single_filter)[MAX_REF_FRAMES],
+ int (*single_skippable)[MAX_REF_FRAMES],
int64_t *psse,
const int64_t ref_best_rd) {
VP9_COMMON *cm = &cpi->common;
@@ -2153,7 +2154,7 @@
int orig_dst_stride[MAX_MB_PLANE];
int rs = 0;
INTERP_FILTER best_filter = SWITCHABLE;
- int skip_txfm[MAX_MB_PLANE << 2] = {0};
+ uint8_t skip_txfm[MAX_MB_PLANE << 2] = {0};
int64_t bsse[MAX_MB_PLANE << 2] = {0};
int bsl = mi_width_log2_lookup[bsize];
@@ -2176,6 +2177,12 @@
if (frame_mv[refs[0]].as_int == INVALID_MV ||
frame_mv[refs[1]].as_int == INVALID_MV)
return INT64_MAX;
+
+ if (cpi->sf.adaptive_mode_search) {
+ if (single_filter[this_mode][refs[0]] ==
+ single_filter[this_mode][refs[1]])
+ best_filter = single_filter[this_mode][refs[0]];
+ }
}
if (this_mode == NEWMV) {
@@ -2365,8 +2372,19 @@
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist);
rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
+ vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
+ vpx_memcpy(bsse, x->bsse, sizeof(bsse));
}
+ if (!is_comp_pred)
+ single_filter[this_mode][refs[0]] = mbmi->interp_filter;
+
+ if (cpi->sf.adaptive_mode_search)
+ if (is_comp_pred)
+ if (single_skippable[this_mode][refs[0]] &&
+ single_skippable[this_mode][refs[1]])
+ vpx_memset(skip_txfm, 1, sizeof(skip_txfm));
+
if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
// if current pred_error modeled rd is substantially more than the best
// so far, do not bother doing full rd
@@ -2426,6 +2444,9 @@
*skippable = skippable_y && skippable_uv;
}
+ if (!is_comp_pred)
+ single_skippable[this_mode][refs[0]] = *skippable;
+
restore_dst_buf(xd, orig_dst, orig_dst_stride);
return this_rd; // if 0, this will be re-calculated by caller
}
@@ -2532,10 +2553,12 @@
PREDICTION_MODE this_mode;
MV_REFERENCE_FRAME ref_frame, second_ref_frame;
unsigned char segment_id = mbmi->segment_id;
- int comp_pred, i;
+ int comp_pred, i, k;
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
struct buf_2d yv12_mb[4][MAX_MB_PLANE];
int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
+ INTERP_FILTER single_inter_filter[MB_MODE_COUNT][MAX_REF_FRAMES];
+ int single_skippable[MB_MODE_COUNT][MAX_REF_FRAMES];
static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
VP9_ALT_FLAG };
int64_t best_rd = best_rd_so_far;
@@ -2584,6 +2607,12 @@
rate_uv_intra[i] = INT_MAX;
for (i = 0; i < MAX_REF_FRAMES; ++i)
x->pred_sse[i] = INT_MAX;
+ for (i = 0; i < MB_MODE_COUNT; ++i) {
+ for (k = 0; k < MAX_REF_FRAMES; ++k) {
+ single_inter_filter[i][k] = SWITCHABLE;
+ single_skippable[i][k] = 0;
+ }
+ }
*returnrate = INT_MAX;
@@ -2820,6 +2849,8 @@
// them for this frame.
mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
: cm->interp_filter;
+ mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
+
x->skip = 0;
set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
@@ -2866,7 +2897,8 @@
&rate_uv, &distortion_uv,
&disable_skip, frame_mv,
mi_row, mi_col,
- single_newmv, &total_sse, best_rd);
+ single_newmv, single_inter_filter,
+ single_skippable, &total_sse, best_rd);
if (this_rd == INT64_MAX)
continue;
@@ -3064,6 +3096,28 @@
break;
}
+ // The inter modes' rate costs are not calculated precisely in some cases.
+ // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
+ // ZEROMV. Here, checks are added for those cases, and the mode decisions
+ // are corrected.
+ if (best_mbmode.mode == NEWMV) {
+ const MV_REFERENCE_FRAME refs[2] = {best_mbmode.ref_frame[0],
+ best_mbmode.ref_frame[1]};
+ int comp_pred_mode = refs[1] > INTRA_FRAME;
+
+ if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
+ ((comp_pred_mode && frame_mv[NEARESTMV][refs[1]].as_int ==
+ best_mbmode.mv[1].as_int) || !comp_pred_mode))
+ best_mbmode.mode = NEARESTMV;
+ else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
+ ((comp_pred_mode && frame_mv[NEARMV][refs[1]].as_int ==
+ best_mbmode.mv[1].as_int) || !comp_pred_mode))
+ best_mbmode.mode = NEARMV;
+ else if (best_mbmode.mv[0].as_int == 0 &&
+ ((comp_pred_mode && best_mbmode.mv[1].as_int == 0) || !comp_pred_mode))
+ best_mbmode.mode = ZEROMV;
+ }
+
if (best_mode_index < 0 || best_rd >= best_rd_so_far)
return INT64_MAX;
diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c
index eb5ae2e..afbb191 100644
--- a/vp9/encoder/vp9_variance.c
+++ b/vp9/encoder/vp9_variance.c
@@ -103,8 +103,9 @@
unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
unsigned int i, sum = 0;
- for (i = 0; i < 256; i++)
+ for (i = 0; i < 256; ++i) {
sum += src_ptr[i] * src_ptr[i];
+ }
return sum;
}
diff --git a/vp9/encoder/x86/vp9_variance_impl_sse2.asm b/vp9/encoder/x86/vp9_variance_impl_sse2.asm
deleted file mode 100644
index 6278f2a..0000000
--- a/vp9/encoder/x86/vp9_variance_impl_sse2.asm
+++ /dev/null
@@ -1,69 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-;unsigned int vp9_get_mb_ss_sse2
-;(
-; short *src_ptr
-;)
-global sym(vp9_get_mb_ss_sse2) PRIVATE
-sym(vp9_get_mb_ss_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 1
- GET_GOT rbx
- push rsi
- push rdi
- sub rsp, 16
- ; end prolog
-
-
- mov rax, arg(0) ;[src_ptr]
- mov rcx, 8
- pxor xmm4, xmm4
-
-.NEXTROW:
- movdqa xmm0, [rax]
- movdqa xmm1, [rax+16]
- movdqa xmm2, [rax+32]
- movdqa xmm3, [rax+48]
- pmaddwd xmm0, xmm0
- pmaddwd xmm1, xmm1
- pmaddwd xmm2, xmm2
- pmaddwd xmm3, xmm3
-
- paddd xmm0, xmm1
- paddd xmm2, xmm3
- paddd xmm4, xmm0
- paddd xmm4, xmm2
-
- add rax, 0x40
- dec rcx
- ja .NEXTROW
-
- movdqa xmm3,xmm4
- psrldq xmm4,8
- paddd xmm4,xmm3
- movdqa xmm3,xmm4
- psrldq xmm4,4
- paddd xmm4,xmm3
- movq rax,xmm4
-
-
- ; begin epilog
- add rsp, 16
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
diff --git a/vp9/encoder/x86/vp9_variance_sse2.c b/vp9/encoder/x86/vp9_variance_sse2.c
index 1cc4bbc..b4d2b0a 100644
--- a/vp9/encoder/x86/vp9_variance_sse2.c
+++ b/vp9/encoder/x86/vp9_variance_sse2.c
@@ -19,6 +19,21 @@
const unsigned char *ref, int ref_stride,
unsigned int *sse, int *sum);
+unsigned int vp9_get_mb_ss_sse2(const int16_t *src) {
+ __m128i vsum = _mm_setzero_si128();
+ int i;
+
+ for (i = 0; i < 32; ++i) {
+ const __m128i v = _mm_loadu_si128((const __m128i *)src);
+ vsum = _mm_add_epi32(vsum, _mm_madd_epi16(v, v));
+ src += 8;
+ }
+
+ vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8));
+ vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4));
+ return _mm_cvtsi128_si32(vsum);
+}
+
#define READ64(p, stride, i) \
_mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const uint32_t *)(p + i * stride)), \
_mm_cvtsi32_si128(*(const uint32_t *)(p + (i + 1) * stride)))
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index 8e3e885..90f0342 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -80,7 +80,6 @@
VP9_COMMON_SRCS-$(HAVE_AVX2) += common/x86/vp9_subpixel_8t_intrin_avx2.c
VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_intrin_ssse3.c
ifeq ($(CONFIG_VP9_POSTPROC),yes)
-VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_postproc_mmx.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_postproc_sse2.asm
endif
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 26db30c..0f0b7a5 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -670,7 +670,6 @@
return VPX_CODEC_MEM_ERROR;
ctx->priv = (vpx_codec_priv_t *)priv;
- ctx->priv->sz = sizeof(*priv);
ctx->priv->init_flags = ctx->init_flags;
ctx->priv->enc.total_encoders = 1;
@@ -837,18 +836,19 @@
vpx_enc_frame_flags_t flags,
unsigned long deadline) {
vpx_codec_err_t res = VPX_CODEC_OK;
+ VP9_COMP *const cpi = ctx->cpi;
const vpx_rational_t *const timebase = &ctx->cfg.g_timebase;
if (img != NULL) {
res = validate_img(ctx, img);
// TODO(jzern) the checks related to cpi's validity should be treated as a
// failure condition, encoder setup is done fully in init() currently.
- if (res == VPX_CODEC_OK && ctx->cpi != NULL && ctx->cx_data == NULL) {
+ if (res == VPX_CODEC_OK && cpi != NULL && ctx->cx_data == NULL) {
// There's no codec control for multiple alt-refs so check the encoder
// instance for its status to determine the compressed data size.
ctx->cx_data_sz = ctx->cfg.g_w * ctx->cfg.g_h *
get_image_bps(img) / 8 *
- (ctx->cpi->multi_arf_allowed ? 8 : 2);
+ (cpi->multi_arf_allowed ? 8 : 2);
if (ctx->cx_data_sz < 4096) ctx->cx_data_sz = 4096;
ctx->cx_data = (unsigned char *)malloc(ctx->cx_data_sz);
@@ -868,7 +868,7 @@
return VPX_CODEC_INVALID_PARAM;
}
- vp9_apply_encoding_flags(ctx->cpi, flags);
+ vp9_apply_encoding_flags(cpi, flags);
// Handle fixed keyframe intervals
if (ctx->cfg.kf_mode == VPX_KF_AUTO &&
@@ -880,7 +880,7 @@
}
// Initialize the encoder instance on the first frame.
- if (res == VPX_CODEC_OK && ctx->cpi != NULL) {
+ if (res == VPX_CODEC_OK && cpi != NULL) {
unsigned int lib_flags = 0;
YV12_BUFFER_CONFIG sd;
int64_t dst_time_stamp = timebase_units_to_ticks(timebase, pts);
@@ -891,16 +891,15 @@
// Set up internal flags
if (ctx->base.init_flags & VPX_CODEC_USE_PSNR)
- ((VP9_COMP *)ctx->cpi)->b_calculate_psnr = 1;
+ cpi->b_calculate_psnr = 1;
if (img != NULL) {
res = image2yuvconfig(img, &sd);
// Store the original flags in to the frame buffer. Will extract the
// key frame flag when we actually encode this frame.
- if (vp9_receive_raw_frame(ctx->cpi, flags,
+ if (vp9_receive_raw_frame(cpi, flags,
&sd, dst_time_stamp, dst_end_time_stamp)) {
- VP9_COMP *cpi = (VP9_COMP *)ctx->cpi;
res = update_error_state(ctx, &cpi->common.error);
}
}
@@ -925,11 +924,10 @@
}
while (cx_data_sz >= ctx->cx_data_sz / 2 &&
- -1 != vp9_get_compressed_data(ctx->cpi, &lib_flags, &size,
+ -1 != vp9_get_compressed_data(cpi, &lib_flags, &size,
cx_data, &dst_time_stamp,
&dst_end_time_stamp, !img)) {
if (size) {
- VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi;
vpx_codec_cx_pkt_t pkt;
#if CONFIG_SPATIAL_SVC
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index b0fb282..393c66e 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -63,7 +63,6 @@
return VPX_CODEC_MEM_ERROR;
ctx->priv = (vpx_codec_priv_t *)priv;
- ctx->priv->sz = sizeof(*priv);
ctx->priv->init_flags = ctx->init_flags;
priv->si.sz = sizeof(priv->si);
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 312f717..e450f7b 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -93,7 +93,6 @@
VP9_CX_SRCS-yes += encoder/vp9_mbgraph.c
VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_impl_sse2.asm
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_impl_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_sad4d_intrin_avx2.c
diff --git a/vpx/internal/vpx_codec_internal.h b/vpx/internal/vpx_codec_internal.h
index 02f2079..cbfffd0 100644
--- a/vpx/internal/vpx_codec_internal.h
+++ b/vpx/internal/vpx_codec_internal.h
@@ -335,7 +335,6 @@
* and the pointer cast to the proper type.
*/
struct vpx_codec_priv {
- unsigned int sz;
const char *err_detail;
vpx_codec_flags_t init_flags;
struct {
diff --git a/vpxenc.c b/vpxenc.c
index b1156e1..5afca24 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -1202,7 +1202,7 @@
// Check that the codec bit depth is greater than the input bit depth.
if (stream->config.cfg.g_input_bit_depth >
- (int)stream->config.cfg.g_bit_depth) {
+ (unsigned int)stream->config.cfg.g_bit_depth) {
fatal("Stream %d: codec bit depth (%d) less than input bit depth (%d)",
stream->index, (int)stream->config.cfg.g_bit_depth,
stream->config.cfg.g_input_bit_depth);