Merge "win: Include <intrin.h> instead of manually declaring intrinsics."
diff --git a/build/make/configure.sh b/build/make/configure.sh
index 33f658e..ee887ab 100644
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -185,6 +185,25 @@
#
# Boolean Manipulation Functions
#
+
+enable_codec(){
+ enabled $1 || echo " enabling $1"
+ set_all yes $1
+
+ is_in $1 vp8 vp9 vp10 && \
+ set_all yes $1_encoder && \
+ set_all yes $1_decoder
+}
+
+disable_codec(){
+ disabled $1 || echo " disabling $1"
+ set_all no $1
+
+ is_in $1 vp8 vp9 vp10 && \
+ set_all no $1_encoder && \
+ set_all no $1_decoder
+}
+
enable_feature(){
set_all yes $*
}
@@ -521,22 +540,20 @@
;;
--enable-?*|--disable-?*)
eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
- if echo "${ARCH_EXT_LIST}" | grep "^ *$option\$" >/dev/null; then
+ if is_in ${option} ${ARCH_EXT_LIST}; then
[ $action = "disable" ] && RTCD_OPTIONS="${RTCD_OPTIONS}--disable-${option} "
elif [ $action = "disable" ] && ! disabled $option ; then
- echo "${CMDLINE_SELECT}" | grep "^ *$option\$" >/dev/null ||
- die_unknown $opt
+ is_in ${option} ${CMDLINE_SELECT} || die_unknown $opt
log_echo " disabling $option"
elif [ $action = "enable" ] && ! enabled $option ; then
- echo "${CMDLINE_SELECT}" | grep "^ *$option\$" >/dev/null ||
- die_unknown $opt
+ is_in ${option} ${CMDLINE_SELECT} || die_unknown $opt
log_echo " enabling $option"
fi
${action}_feature $option
;;
--require-?*)
eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
- if echo "${ARCH_EXT_LIST}" none | grep "^ *$option\$" >/dev/null; then
+ if is_in ${option} ${ARCH_EXT_LIST}; then
RTCD_OPTIONS="${RTCD_OPTIONS}${opt} "
else
die_unknown $opt
@@ -638,6 +655,26 @@
xcrun --sdk $1 --show-sdk-version 2>/dev/null | cut -d. -f1
}
+# Print the Xcode version.
+show_xcode_version() {
+ xcodebuild -version | head -n1 | cut -d' ' -f2
+}
+
+# Fails when Xcode version is less than 6.3.
+check_xcode_minimum_version() {
+ xcode_major=$(show_xcode_version | cut -f1 -d.)
+ xcode_minor=$(show_xcode_version | cut -f2 -d.)
+ xcode_min_major=6
+ xcode_min_minor=3
+ if [ ${xcode_major} -lt ${xcode_min_major} ]; then
+ return 1
+ fi
+ if [ ${xcode_major} -eq ${xcode_min_major} ] \
+ && [ ${xcode_minor} -lt ${xcode_min_minor} ]; then
+ return 1
+ fi
+}
+
process_common_toolchain() {
if [ -z "$toolchain" ]; then
gcctarget="${CHOST:-$(gcc -dumpmachine 2> /dev/null)}"
@@ -751,13 +788,14 @@
enabled shared && soft_enable pic
# Minimum iOS version for all target platforms (darwin and iphonesimulator).
+ # Shared library framework builds are only possible on iOS 8 and later.
if enabled shared; then
IOS_VERSION_OPTIONS="--enable-shared"
+ IOS_VERSION_MIN="8.0"
else
IOS_VERSION_OPTIONS=""
+ IOS_VERSION_MIN="6.0"
fi
- IOS_VERSION_MIN=$("${source_path}/build/make/ios-version.sh" \
- ${IOS_VERSION_OPTIONS})
# Handle darwin variants. Newer SDKs allow targeting older
# platforms, so use the newest one available.
@@ -1050,6 +1088,19 @@
[ -d "${try_dir}" ] && add_ldflags -L"${try_dir}"
done
+ case ${tgt_isa} in
+ armv7|armv7s|armv8|arm64)
+ if enabled neon && ! check_xcode_minimum_version; then
+ soft_disable neon
+ log_echo " neon disabled: upgrade Xcode (need v6.3+)."
+ if enabled neon_asm; then
+ soft_disable neon_asm
+ log_echo " neon_asm disabled: upgrade Xcode (need v6.3+)."
+ fi
+ fi
+ ;;
+ esac
+
asm_conversion_cmd="${source_path}/build/make/ads2gas_apple.pl"
if [ "$(show_darwin_sdk_major_version iphoneos)" -gt 8 ]; then
@@ -1105,7 +1156,7 @@
check_add_ldflags -mfp64
;;
i6400)
- check_add_cflags -mips64r6 -mabi=64 -funroll-loops -msched-weight
+ check_add_cflags -mips64r6 -mabi=64 -funroll-loops -msched-weight
check_add_cflags -mload-store-pairs -mhard-float -mfp64
check_add_asflags -mips64r6 -mabi=64 -mhard-float -mfp64
check_add_ldflags -mips64r6 -mabi=64 -mfp64
diff --git a/build/make/ios-version.sh b/build/make/ios-version.sh
deleted file mode 100755
index 7252eb4..0000000
--- a/build/make/ios-version.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/sh
-##
-## Copyright (c) 2016 The WebM project authors. All Rights Reserved.
-##
-## Use of this source code is governed by a BSD-style license
-## that can be found in the LICENSE file in the root of the source
-## tree. An additional intellectual property rights grant can be found
-## in the file PATENTS. All contributing project authors may
-## be found in the AUTHORS file in the root of the source tree.
-##
-
-if [ "$1" = "--enable-shared" ]; then
- # Shared library framework builds are only possible on iOS 8 and later.
- echo "8.0"
-else
- echo "6.0"
-fi
diff --git a/build/make/iosbuild.sh b/build/make/iosbuild.sh
index 96dc6cc..c703f22 100755
--- a/build/make/iosbuild.sh
+++ b/build/make/iosbuild.sh
@@ -347,10 +347,11 @@
if [ "$ENABLE_SHARED" = "yes" ]; then
IOS_VERSION_OPTIONS="--enable-shared"
+ IOS_VERSION_MIN="8.0"
else
IOS_VERSION_OPTIONS=""
+ IOS_VERSION_MIN="6.0"
fi
-IOS_VERSION_MIN=$("${SCRIPT_DIR}/ios-version.sh" ${IOS_VERSION_OPTIONS})
if [ "${VERBOSE}" = "yes" ]; then
cat << EOF
diff --git a/configure b/configure
index 04ea0f4..73b0e0a 100755
--- a/configure
+++ b/configure
@@ -195,12 +195,12 @@
fi
# disable codecs when their source directory does not exist
-[ -d "${source_path}/vp8" ] || disable_feature vp8
-[ -d "${source_path}/vp9" ] || disable_feature vp9
-[ -d "${source_path}/vp10" ] || disable_feature vp10
+[ -d "${source_path}/vp8" ] || disable_codec vp8
+[ -d "${source_path}/vp9" ] || disable_codec vp9
+[ -d "${source_path}/vp10" ] || disable_codec vp10
# disable vp10 codec by default
-disable_feature vp10
+disable_codec vp10
# install everything except the sources, by default. sources will have
# to be enabled when doing dist builds, since that's no longer a common
@@ -391,15 +391,19 @@
for opt do
optval="${opt#*=}"
case "$opt" in
- --disable-codecs) for c in ${CODECS}; do disable_feature $c; done ;;
+ --disable-codecs)
+ for c in ${CODEC_FAMILIES}; do disable_codec $c; done
+ ;;
--enable-?*|--disable-?*)
eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
- if echo "${EXPERIMENT_LIST}" | grep "^ *$option\$" >/dev/null; then
+ if is_in ${option} ${EXPERIMENT_LIST}; then
if enabled experimental; then
${action}_feature $option
else
log_echo "Ignoring $opt -- not in experimental mode."
fi
+ elif is_in ${option} "${CODECS} ${CODEC_FAMILIES}"; then
+ ${action}_codec ${option}
else
process_common_cmdline $opt
fi
@@ -413,14 +417,6 @@
post_process_cmdline() {
c=""
- # If the codec family is disabled, disable all components of that family.
- # If the codec family is enabled, enable all components of that family.
- log_echo "Configuring selected codecs"
- for c in ${CODECS}; do
- disabled ${c%%_*} && disable_feature ${c}
- enabled ${c%%_*} && enable_feature ${c}
- done
-
# Enable all detected codecs, if they haven't been disabled
for c in ${CODECS}; do soft_enable $c; done
diff --git a/test/datarate_test.cc b/test/datarate_test.cc
index 3941e16..2f1db9c 100644
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
@@ -90,7 +90,7 @@
<< pkt->data.frame.pts;
}
- const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
+ const int64_t frame_size_in_bits = pkt->data.frame.sz * 8;
// Subtract from the buffer the bits associated with a played back frame.
bits_in_buffer_model_ -= frame_size_in_bits;
diff --git a/test/hadamard_test.cc b/test/hadamard_test.cc
index 400939a..7a5bd5b 100644
--- a/test/hadamard_test.cc
+++ b/test/hadamard_test.cc
@@ -21,20 +21,7 @@
using ::libvpx_test::ACMRandom;
-typedef void (*Hadamard8x8Func)(const int16_t *a, int a_stride,
- int16_t *b);
-
-class HadamardTest : public ::testing::TestWithParam<Hadamard8x8Func> {
- public:
- virtual void SetUp() {
- h_func_ = GetParam();
- rnd_.Reset(ACMRandom::DeterministicSeed());
- }
-
- protected:
- Hadamard8x8Func h_func_;
- ACMRandom rnd_;
-};
+typedef void (*HadamardFunc)(const int16_t *a, int a_stride, int16_t *b);
void hadamard_loop(const int16_t *a, int a_stride, int16_t *out) {
int16_t b[8];
@@ -59,28 +46,74 @@
out[5] = c[3] - c[7];
}
-void reference_hadamard(const int16_t *a, int a_stride, int16_t *b) {
+void reference_hadamard8x8(const int16_t *a, int a_stride, int16_t *b) {
int16_t buf[64];
- for (int i = 0; i < 8; i++) {
+ for (int i = 0; i < 8; ++i) {
hadamard_loop(a + i, a_stride, buf + i * 8);
}
- for (int i = 0; i < 8; i++) {
+ for (int i = 0; i < 8; ++i) {
hadamard_loop(buf + i, 8, b + i * 8);
}
}
-TEST_P(HadamardTest, CompareReferenceRandom) {
+void reference_hadamard16x16(const int16_t *a, int a_stride, int16_t *b) {
+ /* The source is a 16x16 block. The destination is rearranged to 8x32.
+ * Input is 9 bit. */
+ reference_hadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0);
+ reference_hadamard8x8(a + 8 + 0 * a_stride, a_stride, b + 64);
+ reference_hadamard8x8(a + 0 + 8 * a_stride, a_stride, b + 128);
+ reference_hadamard8x8(a + 8 + 8 * a_stride, a_stride, b + 192);
+
+ /* Overlay the 8x8 blocks and combine. */
+ for (int i = 0; i < 64; ++i) {
+ /* 8x8 steps the range up to 15 bits. */
+ const int16_t a0 = b[0];
+ const int16_t a1 = b[64];
+ const int16_t a2 = b[128];
+ const int16_t a3 = b[192];
+
+ /* Prevent the result from escaping int16_t. */
+ const int16_t b0 = (a0 + a1) >> 1;
+ const int16_t b1 = (a0 - a1) >> 1;
+ const int16_t b2 = (a2 + a3) >> 1;
+ const int16_t b3 = (a2 - a3) >> 1;
+
+ /* Store a 16 bit value. */
+ b[ 0] = b0 + b2;
+ b[ 64] = b1 + b3;
+ b[128] = b0 - b2;
+ b[192] = b1 - b3;
+
+ ++b;
+ }
+}
+
+class HadamardTestBase : public ::testing::TestWithParam<HadamardFunc> {
+ public:
+ virtual void SetUp() {
+ h_func_ = GetParam();
+ rnd_.Reset(ACMRandom::DeterministicSeed());
+ }
+
+ protected:
+ HadamardFunc h_func_;
+ ACMRandom rnd_;
+};
+
+class Hadamard8x8Test : public HadamardTestBase {};
+
+TEST_P(Hadamard8x8Test, CompareReferenceRandom) {
DECLARE_ALIGNED(16, int16_t, a[64]);
DECLARE_ALIGNED(16, int16_t, b[64]);
int16_t b_ref[64];
- for (int i = 0; i < 64; i++) {
+ for (int i = 0; i < 64; ++i) {
a[i] = rnd_.Rand9Signed();
}
memset(b, 0, sizeof(b));
memset(b_ref, 0, sizeof(b_ref));
- reference_hadamard(a, 8, b_ref);
+ reference_hadamard8x8(a, 8, b_ref);
ASM_REGISTER_STATE_CHECK(h_func_(a, 8, b));
// The order of the output is not important. Sort before checking.
@@ -89,11 +122,11 @@
EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
}
-TEST_P(HadamardTest, VaryStride) {
+TEST_P(Hadamard8x8Test, VaryStride) {
DECLARE_ALIGNED(16, int16_t, a[64 * 8]);
DECLARE_ALIGNED(16, int16_t, b[64]);
int16_t b_ref[64];
- for (int i = 0; i < 64 * 8; i++) {
+ for (int i = 0; i < 64 * 8; ++i) {
a[i] = rnd_.Rand9Signed();
}
@@ -101,7 +134,7 @@
memset(b, 0, sizeof(b));
memset(b_ref, 0, sizeof(b_ref));
- reference_hadamard(a, i, b_ref);
+ reference_hadamard8x8(a, i, b_ref);
ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
// The order of the output is not important. Sort before checking.
@@ -111,21 +144,77 @@
}
}
-INSTANTIATE_TEST_CASE_P(C, HadamardTest,
+INSTANTIATE_TEST_CASE_P(C, Hadamard8x8Test,
::testing::Values(&vpx_hadamard_8x8_c));
#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, HadamardTest,
+INSTANTIATE_TEST_CASE_P(SSE2, Hadamard8x8Test,
::testing::Values(&vpx_hadamard_8x8_sse2));
#endif // HAVE_SSE2
#if HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64
-INSTANTIATE_TEST_CASE_P(SSSE3, HadamardTest,
+INSTANTIATE_TEST_CASE_P(SSSE3, Hadamard8x8Test,
::testing::Values(&vpx_hadamard_8x8_ssse3));
#endif // HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64
#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, HadamardTest,
+INSTANTIATE_TEST_CASE_P(NEON, Hadamard8x8Test,
::testing::Values(&vpx_hadamard_8x8_neon));
#endif // HAVE_NEON
+
+class Hadamard16x16Test : public HadamardTestBase {};
+
+TEST_P(Hadamard16x16Test, CompareReferenceRandom) {
+ DECLARE_ALIGNED(16, int16_t, a[16 * 16]);
+ DECLARE_ALIGNED(16, int16_t, b[16 * 16]);
+ int16_t b_ref[16 * 16];
+ for (int i = 0; i < 16 * 16; ++i) {
+ a[i] = rnd_.Rand9Signed();
+ }
+ memset(b, 0, sizeof(b));
+ memset(b_ref, 0, sizeof(b_ref));
+
+ reference_hadamard16x16(a, 16, b_ref);
+ ASM_REGISTER_STATE_CHECK(h_func_(a, 16, b));
+
+ // The order of the output is not important. Sort before checking.
+ std::sort(b, b + 16 * 16);
+ std::sort(b_ref, b_ref + 16 * 16);
+ EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
+}
+
+TEST_P(Hadamard16x16Test, VaryStride) {
+ DECLARE_ALIGNED(16, int16_t, a[16 * 16 * 8]);
+ DECLARE_ALIGNED(16, int16_t, b[16 * 16]);
+ int16_t b_ref[16 * 16];
+ for (int i = 0; i < 16 * 16 * 8; ++i) {
+ a[i] = rnd_.Rand9Signed();
+ }
+
+ for (int i = 8; i < 64; i += 8) {
+ memset(b, 0, sizeof(b));
+ memset(b_ref, 0, sizeof(b_ref));
+
+ reference_hadamard16x16(a, i, b_ref);
+ ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
+
+ // The order of the output is not important. Sort before checking.
+ std::sort(b, b + 16 * 16);
+ std::sort(b_ref, b_ref + 16 * 16);
+ EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
+ }
+}
+
+INSTANTIATE_TEST_CASE_P(C, Hadamard16x16Test,
+ ::testing::Values(&vpx_hadamard_16x16_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, Hadamard16x16Test,
+ ::testing::Values(&vpx_hadamard_16x16_sse2));
+#endif // HAVE_SSE2
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, Hadamard16x16Test,
+ ::testing::Values(&vpx_hadamard_16x16_neon));
+#endif // HAVE_NEON
} // namespace
diff --git a/test/level_test.cc b/test/level_test.cc
index 5b9ce41..62d0247 100644
--- a/test/level_test.cc
+++ b/test/level_test.cc
@@ -22,7 +22,9 @@
: EncoderTest(GET_PARAM(0)),
encoding_mode_(GET_PARAM(1)),
cpu_used_(GET_PARAM(2)),
- target_level_(0) {}
+ min_gf_internal_(24),
+ target_level_(0),
+ level_(0) {}
virtual ~LevelTest() {}
virtual void SetUp() {
@@ -47,6 +49,7 @@
if (video->frame() == 0) {
encoder->Control(VP8E_SET_CPUUSED, cpu_used_);
encoder->Control(VP9E_SET_TARGET_LEVEL, target_level_);
+ encoder->Control(VP9E_SET_MIN_GF_INTERVAL, min_gf_internal_);
if (encoding_mode_ != ::libvpx_test::kRealTime) {
encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
@@ -54,20 +57,33 @@
encoder->Control(VP8E_SET_ARNR_TYPE, 3);
}
}
+ encoder->Control(VP9E_GET_LEVEL, &level_);
+ ASSERT_LE(level_, 51);
+ ASSERT_GE(level_, 0);
}
::libvpx_test::TestMode encoding_mode_;
int cpu_used_;
+ int min_gf_internal_;
int target_level_;
+ int level_;
};
+// Test for keeping level stats only
TEST_P(LevelTest, TestTargetLevel0) {
::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
- 30);
+ 40);
target_level_ = 0;
+ min_gf_internal_ = 4;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ ASSERT_EQ(11, level_);
+
+ cfg_.rc_target_bitrate = 1600;
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ ASSERT_EQ(20, level_);
}
+// Test for level control being turned off
TEST_P(LevelTest, TestTargetLevel255) {
::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
30);
@@ -98,7 +114,6 @@
VP9_INSTANTIATE_TEST_CASE(LevelTest,
::testing::Values(::libvpx_test::kTwoPassGood,
- ::libvpx_test::kOnePassGood,
- ::libvpx_test::kRealTime),
+ ::libvpx_test::kOnePassGood),
::testing::Range(0, 9));
} // namespace
diff --git a/test/realtime_test.cc b/test/realtime_test.cc
new file mode 100644
index 0000000..24749e4
--- /dev/null
+++ b/test/realtime_test.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/video_source.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+namespace {
+
+const int kVideoSourceWidth = 320;
+const int kVideoSourceHeight = 240;
+const int kFramesToEncode = 2;
+
+class RealtimeTest
+ : public ::libvpx_test::EncoderTest,
+ public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
+ protected:
+ RealtimeTest()
+ : EncoderTest(GET_PARAM(0)), frame_packets_(0) {}
+ virtual ~RealtimeTest() {}
+
+ virtual void SetUp() {
+ InitializeConfig();
+ cfg_.g_lag_in_frames = 0;
+ SetMode(::libvpx_test::kRealTime);
+ }
+
+ virtual void BeginPassHook(unsigned int /*pass*/) {
+ // TODO(tomfinegan): We're changing the pass value here to make sure
+ // we get frames when real time mode is combined with |g_pass| set to
+ // VPX_RC_FIRST_PASS. This is necessary because EncoderTest::RunLoop() sets
+ // the pass value based on the mode passed into EncoderTest::SetMode(),
+ // which overrides the one specified in SetUp() above.
+ cfg_.g_pass = VPX_RC_FIRST_PASS;
+ }
+ virtual void FramePktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {
+ frame_packets_++;
+ }
+
+ int frame_packets_;
+};
+
+TEST_P(RealtimeTest, RealtimeFirstPassProducesFrames) {
+ ::libvpx_test::RandomVideoSource video;
+ video.SetSize(kVideoSourceWidth, kVideoSourceHeight);
+ video.set_limit(kFramesToEncode);
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ EXPECT_EQ(kFramesToEncode, frame_packets_);
+}
+
+VP8_INSTANTIATE_TEST_CASE(RealtimeTest,
+ ::testing::Values(::libvpx_test::kRealTime));
+VP9_INSTANTIATE_TEST_CASE(RealtimeTest,
+ ::testing::Values(::libvpx_test::kRealTime));
+
+} // namespace
diff --git a/test/test.mk b/test/test.mk
index 1eb702f..04acd96 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -24,6 +24,7 @@
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += encode_api_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += error_resilience_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += i420_video_source.h
+LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += realtime_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += resize_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += y4m_video_source.h
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += yuv_video_source.h
diff --git a/third_party/googletest/README.libvpx b/third_party/googletest/README.libvpx
index 7201a67..1eca78d 100644
--- a/third_party/googletest/README.libvpx
+++ b/third_party/googletest/README.libvpx
@@ -12,4 +12,8 @@
generation.
Local Modifications:
-Removed unused declarations of kPathSeparatorString to have warning free build.
\ No newline at end of file
+- Removed unused declarations of kPathSeparatorString to have warning
+ free build.
+- Added GTEST_ATTRIBUTE_UNUSED_ to test registering dummies in TEST_P
+ and INSTANTIATE_TEST_CASE_P to remove warnings about unused variables
+ under GCC 5.
\ No newline at end of file
diff --git a/third_party/googletest/src/include/gtest/gtest.h b/third_party/googletest/src/include/gtest/gtest.h
index 4f3804f..581a44e 100644
--- a/third_party/googletest/src/include/gtest/gtest.h
+++ b/third_party/googletest/src/include/gtest/gtest.h
@@ -16960,7 +16960,7 @@
GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>()); \
return 0; \
} \
- static int gtest_registering_dummy_; \
+ static int gtest_registering_dummy_ GTEST_ATTRIBUTE_UNUSED_; \
GTEST_DISALLOW_COPY_AND_ASSIGN_(\
GTEST_TEST_CLASS_NAME_(test_case_name, test_name)); \
}; \
@@ -16972,7 +16972,7 @@
# define INSTANTIATE_TEST_CASE_P(prefix, test_case_name, generator) \
::testing::internal::ParamGenerator<test_case_name::ParamType> \
gtest_##prefix##test_case_name##_EvalGenerator_() { return generator; } \
- int gtest_##prefix##test_case_name##_dummy_ = \
+ int gtest_##prefix##test_case_name##_dummy_ GTEST_ATTRIBUTE_UNUSED_ = \
::testing::UnitTest::GetInstance()->parameterized_test_registry(). \
GetTestCasePatternHolder<test_case_name>(\
#test_case_name, __FILE__, __LINE__)->AddTestCaseInstantiation(\
diff --git a/vp8/common/findnearmv.h b/vp8/common/findnearmv.h
index 155847c..472a7b5 100644
--- a/vp8/common/findnearmv.h
+++ b/vp8/common/findnearmv.h
@@ -104,7 +104,7 @@
extern const unsigned char vp8_mbsplit_offset[4][16];
-static INLINE int left_block_mv(const MODE_INFO *cur_mb, int b)
+static INLINE uint32_t left_block_mv(const MODE_INFO *cur_mb, int b)
{
if (!(b & 3))
{
@@ -119,7 +119,8 @@
return (cur_mb->bmi + b - 1)->mv.as_int;
}
-static INLINE int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride)
+static INLINE uint32_t above_block_mv(const MODE_INFO *cur_mb, int b,
+ int mi_stride)
{
if (!(b >> 2))
{
diff --git a/vp8/decoder/decodeframe.c b/vp8/decoder/decodeframe.c
index 4bc87eb..566972e 100644
--- a/vp8/decoder/decodeframe.c
+++ b/vp8/decoder/decodeframe.c
@@ -986,7 +986,8 @@
VP8_COMMON *const pc = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
const unsigned char *data = pbi->fragments.ptrs[0];
- const unsigned char *data_end = data + pbi->fragments.sizes[0];
+ const unsigned int data_sz = pbi->fragments.sizes[0];
+ const unsigned char *data_end = data + data_sz;
ptrdiff_t first_partition_length_in_bytes;
int i, j, k, l;
@@ -1022,7 +1023,7 @@
const unsigned char *clear = data;
if (pbi->decrypt_cb)
{
- int n = (int)VPXMIN(sizeof(clear_buffer), data_end - data);
+ int n = (int)VPXMIN(sizeof(clear_buffer), data_sz);
pbi->decrypt_cb(pbi->decrypt_state, data, clear_buffer, n);
clear = clear_buffer;
}
diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c
index cff99c0..26ce120 100644
--- a/vp8/encoder/denoising.c
+++ b/vp8/encoder/denoising.c
@@ -529,7 +529,7 @@
// Bias on zero motion vector sse.
const int zero_bias = denoiser->denoise_pars.denoise_mv_bias;
zero_mv_sse = (unsigned int)((int64_t)zero_mv_sse * zero_bias / 100);
- sse_diff = zero_mv_sse - best_sse;
+ sse_diff = (int)zero_mv_sse - (int)best_sse;
saved_mbmi = *mbmi;
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 95bb394..c526a3e 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -18,6 +18,7 @@
#include "onyx_int.h"
#include "vpx_dsp/variance.h"
#include "encodeintra.h"
+#include "vp8/common/common.h"
#include "vp8/common/setupintrarecon.h"
#include "vp8/common/systemdependent.h"
#include "mcomp.h"
@@ -2417,7 +2418,7 @@
int tmp_q;
int frames_left = (int)(cpi->twopass.total_stats.count - cpi->common.current_video_frame);
- FIRSTPASS_STATS this_frame = {0};
+ FIRSTPASS_STATS this_frame;
FIRSTPASS_STATS this_frame_copy;
double this_frame_intra_error;
@@ -2425,6 +2426,8 @@
int overhead_bits;
+ vp8_zero(this_frame);
+
if (!cpi->twopass.stats_in)
{
return ;
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 6617422..d5a0fff 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1523,7 +1523,8 @@
void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
{
VP8_COMMON *cm = &cpi->common;
- int last_w, last_h, prev_number_of_layers;
+ int last_w, last_h;
+ unsigned int prev_number_of_layers;
if (!cpi)
return;
@@ -1786,10 +1787,8 @@
if (last_w != cpi->oxcf.Width || last_h != cpi->oxcf.Height)
cpi->force_next_frame_intra = 1;
- if (((cm->Width + 15) & 0xfffffff0) !=
- cm->yv12_fb[cm->lst_fb_idx].y_width ||
- ((cm->Height + 15) & 0xfffffff0) !=
- cm->yv12_fb[cm->lst_fb_idx].y_height ||
+ if (((cm->Width + 15) & ~15) != cm->yv12_fb[cm->lst_fb_idx].y_width ||
+ ((cm->Height + 15) & ~15) != cm->yv12_fb[cm->lst_fb_idx].y_height ||
cm->yv12_fb[cm->lst_fb_idx].y_width == 0)
{
dealloc_raw_frame_buffers(cpi);
@@ -5221,7 +5220,7 @@
vp8_second_pass(cpi);
encode_frame_to_data_rate(cpi, size, dest, dest_end, frame_flags);
- cpi->twopass.bits_left -= 8 * *size;
+ cpi->twopass.bits_left -= 8 * (int)(*size);
if (!cpi->common.refresh_alt_ref_frame)
{
@@ -5857,7 +5856,7 @@
return -1;
// Check number of rows and columns match
- if (cpi->common.mb_rows != rows || cpi->common.mb_cols != cols)
+ if (cpi->common.mb_rows != (int)rows || cpi->common.mb_cols != (int)cols)
return -1;
// Range check the delta Q values and convert the external Q range values
@@ -5913,7 +5912,7 @@
int vp8_set_active_map(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigned int cols)
{
- if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols)
+ if ((int)rows == cpi->common.mb_rows && (int)cols == cpi->common.mb_cols)
{
if (map)
{
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 6ede9b9..44fbbd4 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -371,7 +371,7 @@
double key_frame_rate_correction_factor;
double gf_rate_correction_factor;
- unsigned int frames_since_golden;
+ int frames_since_golden;
/* Count down till next GF */
int frames_till_gf_update_due;
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 257d2a0..b19ab7a 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -22,6 +22,7 @@
#include "vpx/vp8cx.h"
#include "vp8/encoder/firstpass.h"
#include "vp8/common/onyx.h"
+#include "vp8/common/common.h"
#include <stdlib.h>
#include <string.h>
@@ -760,7 +761,7 @@
unsigned long duration,
unsigned long deadline)
{
- unsigned int new_qc;
+ int new_qc;
#if !(CONFIG_REALTIME_ONLY)
/* Use best quality mode if no deadline is given. */
@@ -785,7 +786,9 @@
new_qc = MODE_REALTIME;
#endif
- if (ctx->cfg.g_pass == VPX_RC_FIRST_PASS)
+ if (deadline == VPX_DL_REALTIME)
+ new_qc = MODE_REALTIME;
+ else if (ctx->cfg.g_pass == VPX_RC_FIRST_PASS)
new_qc = MODE_FIRSTPASS;
else if (ctx->cfg.g_pass == VPX_RC_LAST_PASS)
new_qc = (new_qc == MODE_BESTQUALITY)
@@ -1116,7 +1119,8 @@
{
YV12_BUFFER_CONFIG sd;
- vp8_ppflags_t flags = {0};
+ vp8_ppflags_t flags;
+ vp8_zero(flags);
if (ctx->preview_ppcfg.post_proc_flag)
{
@@ -1305,8 +1309,8 @@
30, /* rc_resize_up_thresold */
VPX_VBR, /* rc_end_usage */
- {0}, /* rc_twopass_stats_in */
- {0}, /* rc_firstpass_mb_stats_in */
+ {NULL, 0}, /* rc_twopass_stats_in */
+ {NULL, 0}, /* rc_firstpass_mb_stats_in */
256, /* rc_target_bandwidth */
4, /* rc_min_quantizer */
63, /* rc_max_quantizer */
@@ -1334,6 +1338,8 @@
{0}, /* ts_rate_decimator */
0, /* ts_periodicity */
{0}, /* ts_layer_id */
+ {0}, /* layer_target_bitrate */
+ 0 /* temporal_layering_mode */
}},
};
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index 9c78de1..fc9288d 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -522,7 +522,8 @@
{
YV12_BUFFER_CONFIG sd;
int64_t time_stamp = 0, time_end_stamp = 0;
- vp8_ppflags_t flags = {0};
+ vp8_ppflags_t flags;
+ vp8_zero(flags);
if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)
{
@@ -816,11 +817,12 @@
},
{ /* encoder functions */
0,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL
+ NULL, /* vpx_codec_enc_cfg_map_t */
+ NULL, /* vpx_codec_encode_fn_t */
+ NULL, /* vpx_codec_get_cx_data_fn_t */
+ NULL, /* vpx_codec_enc_config_set_fn_t */
+ NULL, /* vpx_codec_get_global_headers_fn_t */
+ NULL, /* vpx_codec_get_preview_frame_fn_t */
+ NULL /* vpx_codec_enc_mr_get_mem_loc_fn_t */
}
};
diff --git a/vp9/common/vp9_common.h b/vp9/common/vp9_common.h
index 2aff132..908fa80 100644
--- a/vp9/common/vp9_common.h
+++ b/vp9/common/vp9_common.h
@@ -67,25 +67,6 @@
#define VP9_FRAME_MARKER 0x2
-typedef enum {
- LEVEL_UNKNOWN = 0,
- LEVEL_1 = 10,
- LEVEL_1_1 = 11,
- LEVEL_2 = 20,
- LEVEL_2_1 = 21,
- LEVEL_3 = 30,
- LEVEL_3_1 = 31,
- LEVEL_4 = 40,
- LEVEL_4_1 = 41,
- LEVEL_5 = 50,
- LEVEL_5_1 = 51,
- LEVEL_5_2 = 52,
- LEVEL_6 = 60,
- LEVEL_6_1 = 61,
- LEVEL_6_2 = 62,
- LEVEL_NOT_CARE = 255,
-} VP9_LEVEL;
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/common/vp9_common_data.c b/vp9/common/vp9_common_data.c
index a6dae6a..3409d04 100644
--- a/vp9/common/vp9_common_data.c
+++ b/vp9/common/vp9_common_data.c
@@ -159,3 +159,18 @@
{0, 8 }, // 64X32 - {0b0000, 0b1000}
{0, 0 }, // 64X64 - {0b0000, 0b0000}
};
+
+#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
+const uint8_t need_top_left[INTRA_MODES] = {
+ 0, // DC_PRED
+ 0, // V_PRED
+ 0, // H_PRED
+ 0, // D45_PRED
+ 1, // D135_PRED
+ 1, // D117_PRED
+ 1, // D153_PRED
+ 0, // D207_PRED
+ 0, // D63_PRED
+ 1, // TM_PRED
+};
+#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
diff --git a/vp9/common/vp9_common_data.h b/vp9/common/vp9_common_data.h
index 95a1179..0ae24da 100644
--- a/vp9/common/vp9_common_data.h
+++ b/vp9/common/vp9_common_data.h
@@ -33,6 +33,9 @@
extern const BLOCK_SIZE txsize_to_bsize[TX_SIZES];
extern const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES];
extern const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2];
+#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
+extern const uint8_t need_top_left[INTRA_MODES];
+#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
#ifdef __cplusplus
} // extern "C"
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 1df6f08..3fd935e 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -168,8 +168,6 @@
int allow_high_precision_mv;
- int keep_level_stats;
-
// Flag signaling that the frame context should be reset to default values.
// 0 or 1 implies don't reset, 2 reset just the context specified in the
// frame header, 3 reset all contexts.
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index fe89829..ffc6839 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -902,4 +902,10 @@
frame_mvs += cm->mi_cols;
}
}
+#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
+ if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) &&
+ (xd->above_mi == NULL || xd->left_mi == NULL) &&
+ !is_inter_block(mi) && need_top_left[mi->uv_mode])
+ assert(0);
+#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
}
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 61cca39..73a2db0 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -891,7 +891,7 @@
vpx_wb_write_bit(wb, cm->log2_tile_rows != 1);
}
-static int get_refresh_mask(VP9_COMP *cpi) {
+int vp9_get_refresh_mask(VP9_COMP *cpi) {
if (vp9_preserve_existing_gf(cpi)) {
// We have decided to preserve the previously existing golden frame as our
// new ARF frame. However, in the short term we leave it in the GF slot and,
@@ -1107,11 +1107,11 @@
write_bitdepth_colorspace_sampling(cm, wb);
}
- vpx_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES);
+ vpx_wb_write_literal(wb, vp9_get_refresh_mask(cpi), REF_FRAMES);
write_frame_size(cm, wb);
} else {
MV_REFERENCE_FRAME ref_frame;
- vpx_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES);
+ vpx_wb_write_literal(wb, vp9_get_refresh_mask(cpi), REF_FRAMES);
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
assert(get_ref_frame_map_idx(cpi, ref_frame) != INVALID_IDX);
vpx_wb_write_literal(wb, get_ref_frame_map_idx(cpi, ref_frame),
diff --git a/vp9/encoder/vp9_bitstream.h b/vp9/encoder/vp9_bitstream.h
index da6b414..f24d20f 100644
--- a/vp9/encoder/vp9_bitstream.h
+++ b/vp9/encoder/vp9_bitstream.h
@@ -18,6 +18,8 @@
#include "vp9/encoder/vp9_encoder.h"
+int vp9_get_refresh_mask(VP9_COMP *cpi);
+
void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size);
static INLINE int vp9_preserve_existing_gf(VP9_COMP *cpi) {
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index bbdfbb8..069c335 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -146,9 +146,9 @@
uint8_t sb_is_skin;
// Used to save the status of whether a block has a low variance in
- // choose_partitioning. 0 for 64x64, 1 2 for 64x32, 3 4 for 32x64, 5~8 for
- // 32x32.
- uint8_t variance_low[9];
+ // choose_partitioning. 0 for 64x64, 1~2 for 64x32, 3~4 for 32x64, 5~8 for
+ // 32x32, 9~24 for 16x16.
+ uint8_t variance_low[25];
void (*fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride);
void (*itxm_add)(const tran_low_t *input, uint8_t *dest, int stride, int eob);
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 21a66bb..984f98a 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -134,7 +134,7 @@
0, &sse);
break;
}
- return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
+ return ROUND_POWER_OF_TWO((int64_t)var, num_pels_log2_lookup[bs]);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -773,9 +773,7 @@
}
}
- for (i = 0; i < 9; i++) {
- x->variance_low[i] = 0;
- }
+ memset(x->variance_low, 0, sizeof(x->variance_low));
if (xd->mb_to_right_edge < 0)
pixels_wide += (xd->mb_to_right_edge >> 3);
@@ -798,7 +796,7 @@
const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
const YV12_BUFFER_CONFIG *yv12_g = NULL;
- unsigned int y_sad, y_sad_g;
+ unsigned int y_sad, y_sad_g, y_sad_thr;
const BLOCK_SIZE bsize = BLOCK_32X32
+ (mi_col + 4 < cm->mi_cols) * 2 + (mi_row + 4 < cm->mi_rows);
@@ -831,7 +829,10 @@
mi->interp_filter = BILINEAR;
y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col);
- if (y_sad_g < y_sad) {
+ // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad
+ // are close if short_circuit_low_temp_var is on.
+ y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad;
+ if (y_sad_g < y_sad_thr) {
vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
&cm->frame_refs[GOLDEN_FRAME - 1].sf);
mi->ref_frame[0] = GOLDEN_FRAME;
@@ -1083,28 +1084,53 @@
}
if (cpi->sf.short_circuit_low_temp_var) {
- // Set low variance flag, only for blocks >= 32x32 and if LAST_FRAME was
- // selected.
- if (ref_frame_partition == LAST_FRAME) {
+ const int mv_thr = cm->width > 640 ? 8 : 4;
+ // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected
+ // and int_pro mv is small. If the temporal variance is small set the
+ // variance_low flag for the block. The variance threshold can be adjusted,
+ // the higher the more aggressive.
+ if (ref_frame_partition == LAST_FRAME &&
+ (cpi->sf.short_circuit_low_temp_var == 1 ||
+ (xd->mi[0]->mv[0].as_mv.col < mv_thr &&
+ xd->mi[0]->mv[0].as_mv.col > -mv_thr &&
+ xd->mi[0]->mv[0].as_mv.row < mv_thr &&
+ xd->mi[0]->mv[0].as_mv.row > -mv_thr))) {
if (xd->mi[0]->sb_type == BLOCK_64X64 &&
vt.part_variances.none.variance < (thresholds[0] >> 1)) {
x->variance_low[0] = 1;
} else if (xd->mi[0]->sb_type == BLOCK_64X32) {
- if (vt.part_variances.horz[0].variance < (thresholds[0] >> 2))
- x->variance_low[1] = 1;
- if (vt.part_variances.horz[1].variance < (thresholds[0] >> 2))
- x->variance_low[2] = 1;
+ for (j = 0; j < 2; j++) {
+ if (vt.part_variances.horz[j].variance < (thresholds[0] >> 2))
+ x->variance_low[j + 1] = 1;
+ }
} else if (xd->mi[0]->sb_type == BLOCK_32X64) {
- if (vt.part_variances.vert[0].variance < (thresholds[0] >> 2))
- x->variance_low[3] = 1;
- if (vt.part_variances.vert[1].variance < (thresholds[0] >> 2))
- x->variance_low[4] = 1;
+ for (j = 0; j < 2; j++) {
+ if (vt.part_variances.vert[j].variance < (thresholds[0] >> 2))
+ x->variance_low[j + 3] = 1;
+ }
} else {
- // 32x32
for (i = 0; i < 4; i++) {
- if (!force_split[i + 1] &&
- vt.split[i].part_variances.none.variance < (thresholds[1] >> 1))
- x->variance_low[i + 5] = 1;
+ if (!force_split[i + 1]) {
+ // 32x32
+ if (vt.split[i].part_variances.none.variance <
+ (thresholds[1] >> 1))
+ x->variance_low[i + 5] = 1;
+ } else if (cpi->sf.short_circuit_low_temp_var == 2) {
+ int idx[4] = {0, 4, xd->mi_stride << 2, (xd->mi_stride << 2) + 4};
+ const int idx_str = cm->mi_stride * mi_row + mi_col + idx[i];
+ MODE_INFO **this_mi = cm->mi_grid_visible + idx_str;
+ // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
+ // inside.
+ if ((*this_mi)->sb_type == BLOCK_16X16 ||
+ (*this_mi)->sb_type == BLOCK_32X16 ||
+ (*this_mi)->sb_type == BLOCK_16X32) {
+ for (j = 0; j < 4; j++) {
+ if (vt.split[i].split[j].part_variances.none.variance <
+ (thresholds[2] >> 8))
+ x->variance_low[(i << 2) + j + 9] = 1;
+ }
+ }
+ }
}
}
}
@@ -4417,6 +4443,12 @@
if (!is_inter_block(mi)) {
int plane;
+#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
+ if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) &&
+ (xd->above_mi == NULL || xd->left_mi == NULL) &&
+ need_top_left[mi->uv_mode])
+ assert(0);
+#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
mi->skip = 1;
for (plane = 0; plane < MAX_MB_PLANE; ++plane)
vp9_encode_intra_block_plane(x, VPXMAX(bsize, BLOCK_8X8), plane, 1);
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 936e625..fdf403e 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -50,16 +50,14 @@
pd->dst.buf, pd->dst.stride);
}
-#define RDTRUNC(RM, DM, R, D) \
- (((1 << (VP9_PROB_COST_SHIFT - 1)) + (R) * (RM)) & \
- ((1 << VP9_PROB_COST_SHIFT) - 1))
-
+// TODO(aconverse): Re-pack this structure.
typedef struct vp9_token_state {
int rate;
- int error;
+ int64_t error;
int next;
int16_t token;
tran_low_t qc;
+ tran_low_t dqc;
} vp9_token_state;
static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] ={ {10, 6}, {8, 7}, };
@@ -68,10 +66,6 @@
{\
rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\
rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\
- if (rd_cost0 == rd_cost1) {\
- rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\
- rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\
- }\
}
// This function is a place holder for now but may ultimately need
@@ -102,17 +96,19 @@
const int eob = p->eobs[block];
const PLANE_TYPE type = get_plane_type(plane);
const int default_eob = 16 << (tx_size << 1);
- const int mul = 1 + (tx_size == TX_32X32);
- const int16_t *dequant_ptr = pd->dequant;
- const uint8_t *const band_translate = get_band_translate(tx_size);
+ const int shift = (tx_size == TX_32X32);
+ const int16_t* const dequant_ptr = pd->dequant;
+ const uint8_t* const band_translate = get_band_translate(tx_size);
const scan_order *const so = get_scan(xd, tx_size, type, block);
const int16_t *const scan = so->scan;
const int16_t *const nb = so->neighbors;
+ const int dq_step[2] = { dequant_ptr[0] >> shift, dequant_ptr[1] >> shift };
int next = eob, sz = 0;
const int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][type]) >> 1;
const int64_t rddiv = mb->rddiv;
int64_t rd_cost0, rd_cost1;
- int rate0, rate1, error0, error1;
+ int rate0, rate1;
+ int64_t error0, error1;
int16_t t0, t1;
EXTRABIT e0;
int best, band, pt, i, final_eob;
@@ -164,7 +160,7 @@
/* And pick the best. */
best = rd_cost1 < rd_cost0;
base_bits = vp9_get_cost(t0, e0, cat6_high_cost);
- dx = mul * (dqcoeff[rc] - coeff[rc]);
+ dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
dx >>= xd->bd - 8;
@@ -176,14 +172,15 @@
tokens[i][0].next = next;
tokens[i][0].token = t0;
tokens[i][0].qc = x;
+ tokens[i][0].dqc = dqcoeff[rc];
best_index[i][0] = best;
/* Evaluate the second possibility for this state. */
rate0 = tokens[next][0].rate;
rate1 = tokens[next][1].rate;
- if ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) &&
- (abs(x) * dequant_ptr[rc != 0] < abs(coeff[rc]) * mul +
+ if ((abs(x) * dequant_ptr[rc != 0] > (abs(coeff[rc]) << shift)) &&
+ (abs(x) * dequant_ptr[rc != 0] < (abs(coeff[rc]) << shift) +
dequant_ptr[rc != 0]))
shortcut = 1;
else
@@ -192,6 +189,11 @@
if (shortcut) {
sz = -(x < 0);
x -= 2 * sz + 1;
+ } else {
+ tokens[i][1] = tokens[i][0];
+ best_index[i][1] = best_index[i][0];
+ next = i;
+ continue;
}
/* Consider both possible successor states. */
@@ -242,6 +244,24 @@
tokens[i][1].next = next;
tokens[i][1].token = best ? t1 : t0;
tokens[i][1].qc = x;
+
+ if (x) {
+ tran_low_t offset = dq_step[rc != 0];
+ // The 32x32 transform coefficient uses half quantization step size.
+ // Account for the rounding difference in the dequantized coefficeint
+ // value when the quantization index is dropped from an even number
+ // to an odd number.
+ if (shift & x)
+ offset += (dequant_ptr[rc != 0] & 0x01);
+
+ if (sz == 0)
+ tokens[i][1].dqc = dqcoeff[rc] - offset;
+ else
+ tokens[i][1].dqc = dqcoeff[rc] + offset;
+ } else {
+ tokens[i][1].dqc = 0;
+ }
+
best_index[i][1] = best;
/* Finally, make this the new head of the trellis. */
next = i;
@@ -281,18 +301,13 @@
UPDATE_RD_COST();
best = rd_cost1 < rd_cost0;
final_eob = -1;
- memset(qcoeff, 0, sizeof(*qcoeff) * (16 << (tx_size * 2)));
- memset(dqcoeff, 0, sizeof(*dqcoeff) * (16 << (tx_size * 2)));
+
for (i = next; i < eob; i = next) {
const int x = tokens[i][best].qc;
const int rc = scan[i];
- if (x) {
- final_eob = i;
- }
-
+ if (x) final_eob = i;
qcoeff[rc] = x;
- dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul;
-
+ dqcoeff[rc] = tokens[i][best].dqc;
next = tokens[i][best].next;
best = best_index[i][best];
}
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 8201794..3f88d9c 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -86,6 +86,25 @@
FILE *keyfile;
#endif
+static const Vp9LevelSpec vp9_level_defs[VP9_LEVELS] = {
+ {LEVEL_1, 829440, 36864, 200, 400, 2, 1, 4, 8},
+ {LEVEL_1_1, 2764800, 73728, 800, 1000, 2, 1, 4, 8},
+ {LEVEL_2, 4608000, 122880, 1800, 1500, 2, 1, 4, 8},
+ {LEVEL_2_1, 9216000, 245760, 3600, 2800, 2, 2, 4, 8},
+ {LEVEL_3, 20736000, 552960, 7200, 6000, 2, 4, 4, 8},
+ {LEVEL_3_1, 36864000, 983040, 12000, 10000, 2, 4, 4, 8},
+ {LEVEL_4, 83558400, 2228224, 18000, 16000, 4, 4, 4, 8},
+ {LEVEL_4_1, 160432128, 2228224, 30000, 18000, 4, 4, 5, 6},
+ {LEVEL_5, 311951360, 8912896, 60000, 36000, 6, 8, 6, 4},
+ {LEVEL_5_1, 588251136, 8912896, 120000, 46000, 8, 8, 10, 4},
+ // TODO(huisu): update max_cpb_size for level 5_2 ~ 6_2 when
+ // they are finalized (currently TBD).
+ {LEVEL_5_2, 1176502272, 8912896, 180000, 0, 8, 8, 10, 4},
+ {LEVEL_6, 1176502272, 35651584, 180000, 0, 8, 16, 10, 4},
+ {LEVEL_6_1, 2353004544u, 35651584, 240000, 0, 8, 16, 10, 4},
+ {LEVEL_6_2, 4706009088u, 35651584, 480000, 0, 8, 16, 10, 4},
+};
+
static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) {
switch (mode) {
case NORMAL:
@@ -159,6 +178,39 @@
}
}
+static void init_level_info(Vp9LevelInfo *level_info) {
+ Vp9LevelStats *const level_stats = &level_info->level_stats;
+ Vp9LevelSpec *const level_spec = &level_info->level_spec;
+
+ memset(level_stats, 0, sizeof(*level_stats));
+ memset(level_spec, 0, sizeof(*level_spec));
+ level_spec->level = LEVEL_UNKNOWN;
+ level_spec->min_altref_distance = INT_MAX;
+}
+
+VP9_LEVEL vp9_get_level(const Vp9LevelSpec * const level_spec) {
+ int i;
+ const Vp9LevelSpec *this_level;
+
+ vpx_clear_system_state();
+
+ for (i = 0; i < VP9_LEVELS; ++i) {
+ this_level = &vp9_level_defs[i];
+ if ((double)level_spec->max_luma_sample_rate * (1 + SAMPLE_RATE_GRACE_P) >
+ (double)this_level->max_luma_sample_rate ||
+ level_spec->max_luma_picture_size > this_level->max_luma_picture_size ||
+ level_spec->average_bitrate > this_level->average_bitrate ||
+ level_spec->max_cpb_size > this_level->max_cpb_size ||
+ level_spec->compression_ratio < this_level->compression_ratio ||
+ level_spec->max_col_tiles > this_level->max_col_tiles ||
+ level_spec->min_altref_distance < this_level->min_altref_distance ||
+ level_spec->max_ref_frame_buffers > this_level->max_ref_frame_buffers)
+ continue;
+ break;
+ }
+ return (i == VP9_LEVELS) ? LEVEL_UNKNOWN : vp9_level_defs[i].level;
+}
+
int vp9_set_active_map(VP9_COMP* cpi,
unsigned char* new_map_16x16,
int rows,
@@ -783,7 +835,7 @@
cm->color_range = oxcf->color_range;
cpi->target_level = oxcf->target_level;
- cm->keep_level_stats = oxcf->target_level != LEVEL_NOT_CARE;
+ cpi->keep_level_stats = oxcf->target_level != LEVEL_MAX;
cm->width = oxcf->width;
cm->height = oxcf->height;
@@ -1476,7 +1528,7 @@
cm->color_range = oxcf->color_range;
cpi->target_level = oxcf->target_level;
- cm->keep_level_stats = oxcf->target_level != LEVEL_NOT_CARE;
+ cpi->keep_level_stats = oxcf->target_level != LEVEL_MAX;
if (cm->profile <= PROFILE_1)
assert(cm->bit_depth == VPX_BITS_8);
@@ -1660,7 +1712,6 @@
} while (++i <= MV_MAX);
}
-
VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
BufferPool *const pool) {
unsigned int i;
@@ -1749,6 +1800,9 @@
cpi->multi_arf_last_grp_enabled = 0;
cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
+
+ init_level_info(&cpi->level_info);
+
#if CONFIG_INTERNAL_STATS
cpi->b_calculate_ssimg = 0;
cpi->b_calculate_blockiness = 1;
@@ -2199,7 +2253,7 @@
static void encoder_highbd_variance64(const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride,
int w, int h, uint64_t *sse,
- uint64_t *sum) {
+ int64_t *sum) {
int i, j;
uint16_t *a = CONVERT_TO_SHORTPTR(a8);
@@ -2223,7 +2277,7 @@
int w, int h,
unsigned int *sse, int *sum) {
uint64_t sse_long = 0;
- uint64_t sum_long = 0;
+ int64_t sum_long = 0;
encoder_highbd_variance64(a8, a_stride, b8, b_stride, w, h,
&sse_long, &sum_long);
*sse = (unsigned int)sse_long;
@@ -2798,7 +2852,7 @@
} else if (vp9_preserve_existing_gf(cpi)) {
// We have decided to preserve the previously existing golden frame as our
// new ARF frame. However, in the short term in function
- // vp9_bitstream.c::get_refresh_mask() we left it in the GF slot and, if
+ // vp9_get_refresh_mask() we left it in the GF slot and, if
// we're updating the GF with the current decoded frame, we save it to the
// ARF slot instead.
// We now have to update the ARF with the current frame and swap gld_fb_idx
@@ -4420,6 +4474,124 @@
}
#endif // CONFIG_INTERNAL_STATS
+static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) {
+ VP9_COMMON *const cm = &cpi->common;
+ Vp9LevelInfo *const level_info = &cpi->level_info;
+ Vp9LevelSpec *const level_spec = &level_info->level_spec;
+ Vp9LevelStats *const level_stats = &level_info->level_stats;
+ int i, idx;
+ uint64_t luma_samples, dur_end;
+ const uint32_t luma_pic_size = cm->width * cm->height;
+ double cpb_data_size;
+
+ vpx_clear_system_state();
+
+ // update level_stats
+ level_stats->total_compressed_size += *size;
+ if (cm->show_frame) {
+ level_stats->total_uncompressed_size +=
+ luma_pic_size +
+ 2 * (luma_pic_size >> (cm->subsampling_x + cm->subsampling_y));
+ level_stats->time_encoded =
+ (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) /
+ (double)TICKS_PER_SEC;
+ }
+
+ if (arf_src_index > 0) {
+ if (!level_stats->seen_first_altref) {
+ level_stats->seen_first_altref = 1;
+ } else if (level_stats->frames_since_last_altref <
+ level_spec->min_altref_distance) {
+ level_spec->min_altref_distance = level_stats->frames_since_last_altref;
+ }
+ level_stats->frames_since_last_altref = 0;
+ } else {
+ ++level_stats->frames_since_last_altref;
+ }
+
+ if (level_stats->frame_window_buffer.len < FRAME_WINDOW_SIZE - 1) {
+ idx = (level_stats->frame_window_buffer.start +
+ level_stats->frame_window_buffer.len++) % FRAME_WINDOW_SIZE;
+ } else {
+ idx = level_stats->frame_window_buffer.start;
+ level_stats->frame_window_buffer.start = (idx + 1) % FRAME_WINDOW_SIZE;
+ }
+ level_stats->frame_window_buffer.buf[idx].ts = cpi->last_time_stamp_seen;
+ level_stats->frame_window_buffer.buf[idx].size = (uint32_t)(*size);
+ level_stats->frame_window_buffer.buf[idx].luma_samples = luma_pic_size;
+
+ if (cm->frame_type == KEY_FRAME) {
+ level_stats->ref_refresh_map = 0;
+ } else {
+ int count = 0;
+ level_stats->ref_refresh_map |= vp9_get_refresh_mask(cpi);
+ // Also need to consider the case where the encoder refers to a buffer
+ // that has been implicitly refreshed after encoding a keyframe.
+ if (!cm->intra_only) {
+ level_stats->ref_refresh_map |= (1 << cpi->lst_fb_idx);
+ level_stats->ref_refresh_map |= (1 << cpi->gld_fb_idx);
+ level_stats->ref_refresh_map |= (1 << cpi->alt_fb_idx);
+ }
+ for (i = 0; i < REF_FRAMES; ++i) {
+ count += (level_stats->ref_refresh_map >> i) & 1;
+ }
+ if (count > level_spec->max_ref_frame_buffers) {
+ level_spec->max_ref_frame_buffers = count;
+ }
+ }
+
+ // update average_bitrate
+ level_spec->average_bitrate =
+ (double)level_stats->total_compressed_size / 125.0 /
+ level_stats->time_encoded;
+
+ // update max_luma_sample_rate
+ luma_samples = 0;
+ for (i = 0; i < level_stats->frame_window_buffer.len; ++i) {
+ idx = (level_stats->frame_window_buffer.start +
+ level_stats->frame_window_buffer.len - 1 - i) % FRAME_WINDOW_SIZE;
+ if (i == 0) {
+ dur_end = level_stats->frame_window_buffer.buf[idx].ts;
+ }
+ if (dur_end - level_stats->frame_window_buffer.buf[idx].ts >=
+ TICKS_PER_SEC) {
+ break;
+ }
+ luma_samples += level_stats->frame_window_buffer.buf[idx].luma_samples;
+ }
+ if (luma_samples > level_spec->max_luma_sample_rate) {
+ level_spec->max_luma_sample_rate = luma_samples;
+ }
+
+ // update max_cpb_size
+ cpb_data_size = 0;
+ for (i = 0; i < CPB_WINDOW_SIZE; ++i) {
+ if (i >= level_stats->frame_window_buffer.len) break;
+ idx = (level_stats->frame_window_buffer.start +
+ level_stats->frame_window_buffer.len - 1 - i) % FRAME_WINDOW_SIZE;
+ cpb_data_size += level_stats->frame_window_buffer.buf[idx].size;
+ }
+ cpb_data_size = cpb_data_size / 125.0;
+ if (cpb_data_size > level_spec->max_cpb_size) {
+ level_spec->max_cpb_size = cpb_data_size;
+ }
+
+ // update max_luma_picture_size
+ if (luma_pic_size > level_spec->max_luma_picture_size) {
+ level_spec->max_luma_picture_size = luma_pic_size;
+ }
+
+ // update compression_ratio
+ level_spec->compression_ratio =
+ (double)level_stats->total_uncompressed_size * cm->bit_depth /
+ level_stats->total_compressed_size / 8.0;
+
+ // update max_col_tiles
+ if (level_spec->max_col_tiles < (1 << cm->log2_tile_cols)) {
+ level_spec->max_col_tiles = (1 << cm->log2_tile_cols);
+ }
+}
+
int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
size_t *size, uint8_t *dest,
int64_t *time_stamp, int64_t *time_end, int flush) {
@@ -4690,6 +4862,9 @@
if (cpi->b_calculate_psnr && oxcf->pass != 1 && cm->show_frame)
generate_psnr_packet(cpi);
+ if (cpi->keep_level_stats && oxcf->pass != 1)
+ update_level_info(cpi, size, arf_src_index);
+
#if CONFIG_INTERNAL_STATS
if (oxcf->pass != 1) {
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 6be61ac..b65dfa8 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -20,6 +20,7 @@
#include "vpx_dsp/ssim.h"
#endif
#include "vpx_dsp/variance.h"
+#include "vpx_ports/system_state.h"
#include "vpx_util/vpx_thread.h"
#include "vp9/common/vp9_alloccommon.h"
@@ -51,6 +52,9 @@
extern "C" {
#endif
+// vp9 uses 10,000,000 ticks/second as time stamp
+#define TICKS_PER_SEC 10000000
+
typedef struct {
int nmvjointcost[MV_JOINTS];
int nmvcosts[2][MV_VALS];
@@ -297,6 +301,69 @@
double worst;
} ImageStat;
+#define CPB_WINDOW_SIZE 4
+#define FRAME_WINDOW_SIZE 128
+#define SAMPLE_RATE_GRACE_P 0.015
+#define VP9_LEVELS 14
+
+typedef enum {
+ LEVEL_UNKNOWN = 0,
+ LEVEL_1 = 10,
+ LEVEL_1_1 = 11,
+ LEVEL_2 = 20,
+ LEVEL_2_1 = 21,
+ LEVEL_3 = 30,
+ LEVEL_3_1 = 31,
+ LEVEL_4 = 40,
+ LEVEL_4_1 = 41,
+ LEVEL_5 = 50,
+ LEVEL_5_1 = 51,
+ LEVEL_5_2 = 52,
+ LEVEL_6 = 60,
+ LEVEL_6_1 = 61,
+ LEVEL_6_2 = 62,
+ LEVEL_MAX = 255
+} VP9_LEVEL;
+
+typedef struct {
+ VP9_LEVEL level;
+ uint64_t max_luma_sample_rate;
+ uint32_t max_luma_picture_size;
+ double average_bitrate; // in kilobits per second
+ double max_cpb_size; // in kilobits
+ double compression_ratio;
+ uint8_t max_col_tiles;
+ uint32_t min_altref_distance;
+ uint8_t max_ref_frame_buffers;
+} Vp9LevelSpec;
+
+typedef struct {
+ int64_t ts; // timestamp
+ uint32_t luma_samples;
+ uint32_t size; // in bytes
+} FrameRecord;
+
+typedef struct {
+ FrameRecord buf[FRAME_WINDOW_SIZE];
+ uint8_t start;
+ uint8_t len;
+} FrameWindowBuffer;
+
+typedef struct {
+ uint8_t seen_first_altref;
+ uint32_t frames_since_last_altref;
+ uint64_t total_compressed_size;
+ uint64_t total_uncompressed_size;
+ double time_encoded; // in seconds
+ FrameWindowBuffer frame_window_buffer;
+ int ref_refresh_map;
+} Vp9LevelStats;
+
+typedef struct {
+ Vp9LevelStats level_stats;
+ Vp9LevelSpec level_spec;
+} Vp9LevelInfo;
+
typedef struct VP9_COMP {
QUANTS quants;
ThreadData td;
@@ -519,6 +586,9 @@
VPxWorker *workers;
struct EncWorkerData *tile_thr_data;
VP9LfSync lf_row_sync;
+
+ int keep_level_stats;
+ Vp9LevelInfo level_info;
} VP9_COMP;
void vp9_initialize_enc(void);
@@ -674,6 +744,8 @@
return cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL;
}
+VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec);
+
void vp9_new_framerate(VP9_COMP *cpi, double framerate);
#define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl))
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index a70eaea..f6e61b6 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -41,8 +41,6 @@
#define OUTPUT_FPF 0
#define ARF_STATS_OUTPUT 0
-#define GROUP_ADAPTIVE_MAXQ 1
-
#define BOOST_BREAKOUT 12.5
#define BOOST_FACTOR 12.5
#define FACTOR_PT_LOW 0.70
@@ -1343,6 +1341,7 @@
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
const int is_two_pass_svc = (svc->number_spatial_layers > 1) ||
(svc->number_temporal_layers > 1);
+ RATE_CONTROL *const rc = &cpi->rc;
TWO_PASS *const twopass = is_two_pass_svc ?
&svc->layer_context[svc->spatial_layer_id].twopass : &cpi->twopass;
double frame_rate;
@@ -1399,15 +1398,21 @@
}
// Reset the vbr bits off target counters
- cpi->rc.vbr_bits_off_target = 0;
- cpi->rc.vbr_bits_off_target_fast = 0;
-
- cpi->rc.rate_error_estimate = 0;
+ rc->vbr_bits_off_target = 0;
+ rc->vbr_bits_off_target_fast = 0;
+ rc->rate_error_estimate = 0;
// Static sequence monitor variables.
twopass->kf_zeromotion_pct = 100;
twopass->last_kfgroup_zeromotion_pct = 100;
+ // Initialize bits per macro_block estimate correction factor.
+ twopass->bpm_factor = 1.0;
+ // Initiallize actual and target bits counters for ARF groups so that
+ // at the start we have a neutral bpm adjustment.
+ twopass->rolling_arf_group_target_bits = 1;
+ twopass->rolling_arf_group_actual_bits = 1;
+
if (oxcf->resize_mode != RESIZE_NONE) {
init_subsampling(cpi);
}
@@ -1932,9 +1937,7 @@
double boost_score = 0.0;
double old_boost_score = 0.0;
double gf_group_err = 0.0;
-#if GROUP_ADAPTIVE_MAXQ
double gf_group_raw_error = 0.0;
-#endif
double gf_group_skip_pct = 0.0;
double gf_group_inactive_zone_rows = 0.0;
double gf_first_frame_err = 0.0;
@@ -1984,9 +1987,7 @@
// the error score / cost of this frame has already been accounted for.
if (arf_active_or_kf) {
gf_group_err -= gf_first_frame_err;
-#if GROUP_ADAPTIVE_MAXQ
gf_group_raw_error -= this_frame->coded_error;
-#endif
gf_group_skip_pct -= this_frame->intra_skip_pct;
gf_group_inactive_zone_rows -= this_frame->inactive_zone_rows;
}
@@ -2040,9 +2041,7 @@
// Accumulate error score of frames in this gf group.
mod_frame_err = calculate_modified_err(cpi, twopass, oxcf, this_frame);
gf_group_err += mod_frame_err;
-#if GROUP_ADAPTIVE_MAXQ
gf_group_raw_error += this_frame->coded_error;
-#endif
gf_group_skip_pct += this_frame->intra_skip_pct;
gf_group_inactive_zone_rows += this_frame->inactive_zone_rows;
@@ -2142,9 +2141,7 @@
if (EOF == input_stats(twopass, this_frame))
break;
gf_group_err += calculate_modified_err(cpi, twopass, oxcf, this_frame);
-#if GROUP_ADAPTIVE_MAXQ
gf_group_raw_error += this_frame->coded_error;
-#endif
gf_group_skip_pct += this_frame->intra_skip_pct;
gf_group_inactive_zone_rows += this_frame->inactive_zone_rows;
}
@@ -2159,7 +2156,6 @@
// Calculate the bits to be allocated to the gf/arf group as a whole
gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err);
-#if GROUP_ADAPTIVE_MAXQ
// Calculate an estimate of the maxq needed for the group.
// We are more agressive about correcting for sections
// where there could be significant overshoot than for easier
@@ -2181,7 +2177,6 @@
twopass->active_worst_quality =
(tmp_q + (twopass->active_worst_quality * 3)) >> 2;
}
-#endif
// Calculate the extra bits to be used for boosted frame(s)
gf_arf_bits = calculate_boost_bits(rc->baseline_gf_interval,
@@ -2718,13 +2713,6 @@
((double)cm->mb_rows * section_length);
int tmp_q;
- // Initialize bits per macro_block estimate correction factor.
- twopass->bpm_factor = 1.0;
- // Initiallize actual and target bits counters for ARF groups so that
- // at the start we have a neutral bpm adjustment.
- twopass->rolling_arf_group_target_bits = 1;
- twopass->rolling_arf_group_actual_bits = 1;
-
tmp_q = get_twopass_worst_quality(cpi, section_error,
section_intra_skip + section_inactive_zone, section_target_bandwidth);
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index 3ae5b21..14a0b16 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -59,8 +59,8 @@
// Try sub-pixel MC
// if (bestsme > error_thresh && bestsme < INT_MAX)
{
- int distortion;
- unsigned int sse;
+ uint32_t distortion;
+ uint32_t sse;
cpi->find_fractional_mv_step(
x, dst_mv, ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
&v_fn_ptr, 0, mv_sf->subpel_iters_per_step,
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index f3ffe35..e747277 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -162,6 +162,33 @@
return &buf[(r >> 3) * stride + (c >> 3)];
}
+#if CONFIG_VP9_HIGHBITDEPTH
+/* checks if (r, c) has better score than previous best */
+#define CHECK_BETTER(v, r, c) \
+ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ int64_t tmpmse; \
+ if (second_pred == NULL) { \
+ thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), \
+ sp(r), z, src_stride, &sse); \
+ } else { \
+ thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), \
+ sp(r), z, src_stride, &sse, second_pred); \
+ } \
+ tmpmse = thismse; \
+ tmpmse += MVC(r, c); \
+ if (tmpmse >= INT_MAX) { \
+ v = INT_MAX; \
+ } else if ((v = (uint32_t)tmpmse) < besterr) { \
+ besterr = v; \
+ br = r; \
+ bc = c; \
+ *distortion = thismse; \
+ *sse1 = sse; \
+ } \
+ } else { \
+ v = INT_MAX; \
+ }
+#else
/* checks if (r, c) has better score than previous best */
#define CHECK_BETTER(v, r, c) \
if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
@@ -182,6 +209,7 @@
v = INT_MAX; \
}
+#endif
#define FIRST_LEVEL_CHECKS \
{ \
unsigned int left, right, up, down, diag; \
@@ -310,10 +338,10 @@
const uint8_t *second_pred,
int w, int h, int offset,
int *mvjcost, int *mvcost[2],
- unsigned int *sse1,
- int *distortion) {
- unsigned int besterr;
+ uint32_t *sse1,
+ uint32_t *distortion) {
#if CONFIG_VP9_HIGHBITDEPTH
+ uint64_t besterr;
if (second_pred != NULL) {
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]);
@@ -329,9 +357,13 @@
} else {
besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
}
- *distortion = besterr;
+ *distortion = (uint32_t)besterr;
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
+ if (besterr >= UINT32_MAX)
+ return UINT32_MAX;
+ return (uint32_t)besterr;
#else
+ uint32_t besterr;
(void) xd;
if (second_pred != NULL) {
DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
@@ -342,8 +374,8 @@
}
*distortion = besterr;
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
-#endif // CONFIG_VP9_HIGHBITDEPTH
return besterr;
+#endif // CONFIG_VP9_HIGHBITDEPTH
}
static INLINE int divide_and_round(const int n, const int d) {
@@ -373,7 +405,7 @@
(cost_list[4] - 2 * cost_list[0] + cost_list[2]));
}
-int vp9_skip_sub_pixel_tree(
+uint32_t vp9_skip_sub_pixel_tree(
const MACROBLOCK *x,
MV *bestmv, const MV *ref_mv,
int allow_hp,
@@ -383,8 +415,8 @@
int iters_per_step,
int *cost_list,
int *mvjcost, int *mvcost[2],
- int *distortion,
- unsigned int *sse1,
+ uint32_t *distortion,
+ uint32_t *sse1,
const uint8_t *second_pred,
int w, int h) {
SETUP_SUBPEL_SEARCH;
@@ -418,7 +450,7 @@
return besterr;
}
-int vp9_find_best_sub_pixel_tree_pruned_evenmore(
+uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore(
const MACROBLOCK *x,
MV *bestmv, const MV *ref_mv,
int allow_hp,
@@ -428,8 +460,8 @@
int iters_per_step,
int *cost_list,
int *mvjcost, int *mvcost[2],
- int *distortion,
- unsigned int *sse1,
+ uint32_t *distortion,
+ uint32_t *sse1,
const uint8_t *second_pred,
int w, int h) {
SETUP_SUBPEL_SEARCH;
@@ -498,7 +530,7 @@
return besterr;
}
-int vp9_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x,
+uint32_t vp9_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x,
MV *bestmv, const MV *ref_mv,
int allow_hp,
int error_per_bit,
@@ -507,8 +539,8 @@
int iters_per_step,
int *cost_list,
int *mvjcost, int *mvcost[2],
- int *distortion,
- unsigned int *sse1,
+ uint32_t *distortion,
+ uint32_t *sse1,
const uint8_t *second_pred,
int w, int h) {
SETUP_SUBPEL_SEARCH;
@@ -572,7 +604,7 @@
return besterr;
}
-int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
+uint32_t vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
MV *bestmv, const MV *ref_mv,
int allow_hp,
int error_per_bit,
@@ -581,8 +613,8 @@
int iters_per_step,
int *cost_list,
int *mvjcost, int *mvcost[2],
- int *distortion,
- unsigned int *sse1,
+ uint32_t *distortion,
+ uint32_t *sse1,
const uint8_t *second_pred,
int w, int h) {
SETUP_SUBPEL_SEARCH;
@@ -674,19 +706,19 @@
{0, -1}, {0, 1}, {-1, 0}, {1, 0}
};
-int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
- MV *bestmv, const MV *ref_mv,
- int allow_hp,
- int error_per_bit,
- const vp9_variance_fn_ptr_t *vfp,
- int forced_stop,
- int iters_per_step,
- int *cost_list,
- int *mvjcost, int *mvcost[2],
- int *distortion,
- unsigned int *sse1,
- const uint8_t *second_pred,
- int w, int h) {
+uint32_t vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
+ MV *bestmv, const MV *ref_mv,
+ int allow_hp,
+ int error_per_bit,
+ const vp9_variance_fn_ptr_t *vfp,
+ int forced_stop,
+ int iters_per_step,
+ int *cost_list,
+ int *mvjcost, int *mvcost[2],
+ uint32_t *distortion,
+ uint32_t *sse1,
+ const uint8_t *second_pred,
+ int w, int h) {
const uint8_t *const z = x->plane[0].src.buf;
const uint8_t *const src_address = z;
const int src_stride = x->plane[0].src.stride;
@@ -1381,12 +1413,22 @@
const struct buf_2d *const what = &x->plane[0].src;
const struct buf_2d *const in_what = &xd->plane[0].pre[0];
const MV mv = {best_mv->row * 8, best_mv->col * 8};
- unsigned int unused;
-
+ uint32_t unused;
+#if CONFIG_VP9_HIGHBITDEPTH
+ uint64_t err= vfp->vf(what->buf, what->stride,
+ get_buf_from_mv(in_what, best_mv),
+ in_what->stride, &unused);
+ err += (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost,
+ x->mvcost, x->errorperbit) : 0);
+ if (err >= INT_MAX)
+ return INT_MAX;
+ return (int)err;
+#else
return vfp->vf(what->buf, what->stride,
get_buf_from_mv(in_what, best_mv), in_what->stride, &unused) +
(use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost,
x->mvcost, x->errorperbit) : 0);
+#endif
}
int vp9_get_mvpred_av_var(const MACROBLOCK *x,
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index 1b0c860..86cd267 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -74,7 +74,7 @@
BLOCK_SIZE bsize,
int mi_row, int mi_col);
-typedef int (fractional_mv_step_fp) (
+typedef uint32_t (fractional_mv_step_fp) (
const MACROBLOCK *x,
MV *bestmv, const MV *ref_mv,
int allow_hp,
@@ -84,7 +84,7 @@
int iters_per_step,
int *cost_list,
int *mvjcost, int *mvcost[2],
- int *distortion, unsigned int *sse1,
+ uint32_t *distortion, uint32_t *sse1,
const uint8_t *second_pred,
int w, int h);
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 554409b..c13f24f 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -40,6 +40,14 @@
int in_use;
} PRED_BUFFER;
+
+static const int pos_shift_16x16[4][4] = {
+ {9, 10, 13, 14},
+ {11, 12, 15, 16},
+ {17, 18, 21, 22},
+ {19, 20, 23, 24}
+};
+
static int mv_refs_rt(VP9_COMP *cpi, const VP9_COMMON *cm,
const MACROBLOCK *x,
const MACROBLOCKD *xd,
@@ -149,7 +157,7 @@
const int ref = mi->ref_frame[0];
const MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
MV center_mv;
- int dis;
+ uint32_t dis;
int rate_mode;
const int tmp_col_min = x->mv_col_min;
const int tmp_col_max = x->mv_col_max;
@@ -582,39 +590,46 @@
}
#if CONFIG_VP9_HIGHBITDEPTH
-static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
- int *skippable, int64_t *sse, int plane,
- BLOCK_SIZE bsize, TX_SIZE tx_size) {
+static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc,
+ int *skippable, int64_t *sse, BLOCK_SIZE bsize,
+ TX_SIZE tx_size) {
MACROBLOCKD *xd = &x->e_mbd;
unsigned int var_y, sse_y;
- (void)plane;
+
(void)tx_size;
- model_rd_for_sb_y(cpi, bsize, x, xd, rate, dist, &var_y, &sse_y);
+ model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc->rate, &this_rdc->dist, &var_y,
+ &sse_y);
*sse = INT_MAX;
*skippable = 0;
return;
}
#else
-static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
- int *skippable, int64_t *sse, int plane,
- BLOCK_SIZE bsize, TX_SIZE tx_size) {
+static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc,
+ int *skippable, int64_t *sse, BLOCK_SIZE bsize,
+ TX_SIZE tx_size) {
MACROBLOCKD *xd = &x->e_mbd;
- const struct macroblockd_plane *pd = &xd->plane[plane];
- const struct macroblock_plane *const p = &x->plane[plane];
+ const struct macroblockd_plane *pd = &xd->plane[0];
+ struct macroblock_plane *const p = &x->plane[0];
const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
const int step = 1 << (tx_size << 1);
const int block_step = (1 << tx_size);
int block = 0, r, c;
- int shift = tx_size == TX_32X32 ? 0 : 2;
const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 :
- xd->mb_to_right_edge >> (5 + pd->subsampling_x));
+ xd->mb_to_right_edge >> 5);
const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 :
- xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
+ xd->mb_to_bottom_edge >> 5);
int eob_cost = 0;
+ const int bw = 4 * num_4x4_w;
+ const int bh = 4 * num_4x4_h;
(void)cpi;
- vp9_subtract_plane(x, bsize, plane);
+
+ // The max tx_size passed in is TX_16X16.
+ assert(tx_size != TX_32X32);
+
+ vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
+ pd->dst.buf, pd->dst.stride);
*skippable = 1;
// Keep track of the row and column of the blocks we use so that we know
// if we are in the unrestricted motion border.
@@ -626,18 +641,11 @@
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
uint16_t *const eob = &p->eobs[block];
- const int diff_stride = 4 * num_4x4_blocks_wide_lookup[bsize];
+ const int diff_stride = bw;
const int16_t *src_diff;
src_diff = &p->src_diff[(r * diff_stride + c) << 2];
switch (tx_size) {
- case TX_32X32:
- vpx_fdct32x32_rd(src_diff, coeff, diff_stride);
- vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
- p->round_fp, p->quant_fp, p->quant_shift,
- qcoeff, dqcoeff, pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
- break;
case TX_16X16:
vpx_hadamard_16x16(src_diff, diff_stride, (int16_t *)coeff);
vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
@@ -670,18 +678,17 @@
}
}
- if (*skippable && *sse < INT64_MAX) {
- *rate = 0;
- *dist = (*sse << 6) >> shift;
- *sse = *dist;
- return;
+ this_rdc->rate = 0;
+ if (*sse < INT64_MAX) {
+ *sse = (*sse << 6) >> 2;
+ if (*skippable) {
+ this_rdc->dist = *sse;
+ return;
+ }
}
block = 0;
- *rate = 0;
- *dist = 0;
- if (*sse < INT64_MAX)
- *sse = (*sse << 6) >> shift;
+ this_rdc->dist = 0;
for (r = 0; r < max_blocks_high; r += block_step) {
for (c = 0; c < num_4x4_w; c += block_step) {
if (c < max_blocks_wide) {
@@ -691,25 +698,26 @@
uint16_t *const eob = &p->eobs[block];
if (*eob == 1)
- *rate += (int)abs(qcoeff[0]);
+ this_rdc->rate += (int)abs(qcoeff[0]);
else if (*eob > 1)
- *rate += vpx_satd((const int16_t *)qcoeff, step << 4);
+ this_rdc->rate += vpx_satd((const int16_t *)qcoeff, step << 4);
- *dist += vp9_block_error_fp(coeff, dqcoeff, step << 4) >> shift;
+ this_rdc->dist +=
+ vp9_block_error_fp(coeff, dqcoeff, step << 4) >> 2;
}
block += step;
}
}
// If skippable is set, rate gets clobbered later.
- *rate <<= (2 + VP9_PROB_COST_SHIFT);
- *rate += (eob_cost << VP9_PROB_COST_SHIFT);
+ this_rdc->rate <<= (2 + VP9_PROB_COST_SHIFT);
+ this_rdc->rate += (eob_cost << VP9_PROB_COST_SHIFT);
}
#endif
static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE plane_bsize,
MACROBLOCK *x, MACROBLOCKD *xd,
- int *out_rate_sum, int64_t *out_dist_sum,
+ RD_COST *this_rdc,
unsigned int *var_y, unsigned int *sse_y,
int start_plane, int stop_plane) {
// Note our transform coeffs are 8 times an orthogonal transform.
@@ -720,8 +728,8 @@
int64_t dist;
int i;
- *out_rate_sum = 0;
- *out_dist_sum = 0;
+ this_rdc->rate = 0;
+ this_rdc->dist = 0;
for (i = start_plane; i <= stop_plane; ++i) {
struct macroblock_plane *const p = &x->plane[i];
@@ -752,8 +760,8 @@
dc_quant >> 3, &rate, &dist);
#endif // CONFIG_VP9_HIGHBITDEPTH
- *out_rate_sum += rate >> 1;
- *out_dist_sum += dist << 3;
+ this_rdc->rate += rate >> 1;
+ this_rdc->dist += dist << 3;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -768,8 +776,8 @@
ac_quant >> 3, &rate, &dist);
#endif // CONFIG_VP9_HIGHBITDEPTH
- *out_rate_sum += rate;
- *out_dist_sum += dist << 4;
+ this_rdc->rate += rate;
+ this_rdc->dist += dist << 4;
}
}
@@ -906,8 +914,7 @@
MACROBLOCK *x;
PREDICTION_MODE mode;
int skippable;
- int rate;
- int64_t dist;
+ RD_COST *rdc;
};
static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
@@ -924,8 +931,7 @@
const int src_stride = p->src.stride;
const int dst_stride = pd->dst.stride;
int i, j;
- int rate;
- int64_t dist;
+ RD_COST this_rdc;
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
@@ -942,19 +948,19 @@
if (plane == 0) {
int64_t this_sse = INT64_MAX;
// TODO(jingning): This needs further refactoring.
- block_yrd(cpi, x, &rate, &dist, &args->skippable, &this_sse, 0,
- bsize_tx, VPXMIN(tx_size, TX_16X16));
+ block_yrd(cpi, x, &this_rdc, &args->skippable, &this_sse, bsize_tx,
+ VPXMIN(tx_size, TX_16X16));
} else {
unsigned int var = 0;
unsigned int sse = 0;
- model_rd_for_sb_uv(cpi, plane_bsize, x, xd, &rate, &dist, &var, &sse,
- plane, plane);
+ model_rd_for_sb_uv(cpi, plane_bsize, x, xd, &this_rdc, &var, &sse, plane,
+ plane);
}
p->src.buf = src_buf_base;
pd->dst.buf = dst_buf_base;
- args->rate += rate;
- args->dist += dist;
+ args->rdc->rate += this_rdc.rate;
+ args->rdc->dist += this_rdc.dist;
}
static const THR_MODES mode_idx[MAX_REF_FRAMES - 1][4] = {
@@ -1007,7 +1013,7 @@
MODE_INFO *const mi = xd->mi[0];
RD_COST this_rdc, best_rdc;
PREDICTION_MODE this_mode;
- struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0, 0 };
+ struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0 };
const TX_SIZE intra_tx_size =
VPXMIN(max_txsize_lookup[bsize],
tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
@@ -1031,22 +1037,20 @@
// Change the limit of this loop to add other intra prediction
// mode tests.
for (this_mode = DC_PRED; this_mode <= H_PRED; ++this_mode) {
+ this_rdc.dist = this_rdc.rate = 0;
args.mode = this_mode;
args.skippable = 1;
- args.rate = 0;
- args.dist = 0;
+ args.rdc = &this_rdc;
mi->tx_size = intra_tx_size;
vp9_foreach_transformed_block_in_plane(xd, bsize, 0,
estimate_block_intra, &args);
if (args.skippable) {
x->skip_txfm[0] = SKIP_TXFM_AC_DC;
- args.rate = vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 1);
+ this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 1);
} else {
x->skip_txfm[0] = SKIP_TXFM_NONE;
- args.rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 0);
+ this_rdc.rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 0);
}
- this_rdc.rate = args.rate;
- this_rdc.dist = args.dist;
this_rdc.rate += bmode_costs[this_mode];
this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
this_rdc.rate, this_rdc.dist);
@@ -1270,9 +1274,11 @@
}
#endif // CONFIG_VP9_TEMPORAL_DENOISING
-static INLINE int set_force_skip_low_temp_var(uint8_t *variance_low,
+static INLINE int get_force_skip_low_temp_var(uint8_t *variance_low,
int mi_row, int mi_col,
BLOCK_SIZE bsize) {
+ const int i = (mi_row & 0x7) >> 1;
+ const int j = (mi_col & 0x7) >> 1;
int force_skip_low_temp_var = 0;
// Set force_skip_low_temp_var based on the block size and block offset.
if (bsize == BLOCK_64X64) {
@@ -1299,6 +1305,19 @@
} else if ((mi_col & 0x7) && (mi_row & 0x7)) {
force_skip_low_temp_var = variance_low[8];
}
+ } else if (bsize == BLOCK_16X16) {
+ force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]];
+ } else if (bsize == BLOCK_32X16) {
+ // The col shift index for the second 16x16 block.
+ const int j2 = ((mi_col + 2) & 0x7) >> 1;
+ // Only if each 16x16 block inside has low temporal variance.
+ force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]] &&
+ variance_low[pos_shift_16x16[i][j2]];
+ } else if (bsize == BLOCK_16X32) {
+ // The row shift index for the second 16x16 block.
+ const int i2 = ((mi_row + 2) & 0x7) >> 1;
+ force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]] &&
+ variance_low[pos_shift_16x16[i2][j]];
}
return force_skip_low_temp_var;
}
@@ -1450,7 +1469,7 @@
if (cpi->sf.short_circuit_low_temp_var) {
force_skip_low_temp_var =
- set_force_skip_low_temp_var(&x->variance_low[0], mi_row, mi_col, bsize);
+ get_force_skip_low_temp_var(&x->variance_low[0], mi_row, mi_col, bsize);
}
if (!((cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) &&
@@ -1503,6 +1522,12 @@
continue;
}
+ if (cpi->sf.short_circuit_low_temp_var == 2 &&
+ force_skip_low_temp_var && ref_frame == LAST_FRAME &&
+ this_mode == NEWMV) {
+ continue;
+ }
+
if (cpi->use_svc) {
if (svc_force_zero_mode[ref_frame - 1] &&
frame_mv[this_mode][ref_frame].as_int != 0)
@@ -1539,7 +1564,8 @@
!cpi->use_svc &&
cpi->oxcf.rc_mode == VPX_CBR) {
int tmp_sad;
- int dis, cost_list[5];
+ uint32_t dis;
+ int cost_list[5];
if (bsize < BLOCK_16X16)
continue;
@@ -1640,8 +1666,9 @@
if ((this_mode == NEWMV || filter_ref == SWITCHABLE) && pred_filter_search
&& (ref_frame == LAST_FRAME ||
- (ref_frame == GOLDEN_FRAME && cpi->use_svc))
- && (((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07) != 0)) {
+ (ref_frame == GOLDEN_FRAME &&
+ (cpi->use_svc || cpi->oxcf.rc_mode == VPX_VBR))) &&
+ (((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07) != 0)) {
int pf_rate[3];
int64_t pf_dist[3];
unsigned int pf_var[3];
@@ -1696,7 +1723,7 @@
vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
// For large partition blocks, extra testing is done.
- if (bsize > BLOCK_32X32 &&
+ if (cpi->oxcf.rc_mode == VPX_CBR && bsize > BLOCK_32X32 &&
!cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id) &&
cm->base_qindex) {
model_rd_for_sb_y_large(cpi, bsize, x, xd, &this_rdc.rate,
@@ -1710,8 +1737,8 @@
if (!this_early_term) {
this_sse = (int64_t)sse_y;
- block_yrd(cpi, x, &this_rdc.rate, &this_rdc.dist, &is_skippable,
- &this_sse, 0, bsize, VPXMIN(mi->tx_size, TX_16X16));
+ block_yrd(cpi, x, &this_rdc, &is_skippable, &this_sse, bsize,
+ VPXMIN(mi->tx_size, TX_16X16));
x->skip_txfm[0] = is_skippable;
if (is_skippable) {
this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
@@ -1737,17 +1764,15 @@
}
if (x->color_sensitivity[0] || x->color_sensitivity[1]) {
- int uv_rate = 0;
- int64_t uv_dist = 0;
+ RD_COST rdc_uv;
const BLOCK_SIZE uv_bsize = get_plane_block_size(bsize, &xd->plane[1]);
if (x->color_sensitivity[0])
vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 1);
if (x->color_sensitivity[1])
vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 2);
- model_rd_for_sb_uv(cpi, uv_bsize, x, xd, &uv_rate, &uv_dist,
- &var_y, &sse_y, 1, 2);
- this_rdc.rate += uv_rate;
- this_rdc.dist += uv_dist;
+ model_rd_for_sb_uv(cpi, uv_bsize, x, xd, &rdc_uv, &var_y, &sse_y, 1, 2);
+ this_rdc.rate += rdc_uv.rate;
+ this_rdc.dist += rdc_uv.dist;
}
this_rdc.rate += rate_mv;
@@ -1842,12 +1867,13 @@
inter_mode_thresh = (inter_mode_thresh << 1) + inter_mode_thresh;
}
// Perform intra prediction search, if the best SAD is above a certain
- // threshold. Skip intra prediction if force_skip_low_temp_var is set.
- if (!force_skip_low_temp_var && perform_intra_pred &&
+ // threshold.
+ if ((!force_skip_low_temp_var || bsize < BLOCK_32X32) &&
+ perform_intra_pred &&
(best_rdc.rdcost == INT64_MAX ||
(!x->skip && best_rdc.rdcost > inter_mode_thresh &&
bsize <= cpi->sf.max_intra_bsize))) {
- struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0, 0 };
+ struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0 };
int i;
TX_SIZE best_intra_tx_size = TX_SIZES;
TX_SIZE intra_tx_size =
@@ -1896,10 +1922,10 @@
mi->mode = this_mode;
mi->ref_frame[0] = INTRA_FRAME;
+ this_rdc.dist = this_rdc.rate = 0;
args.mode = this_mode;
args.skippable = 1;
- args.rate = 0;
- args.dist = 0;
+ args.rdc = &this_rdc;
mi->tx_size = intra_tx_size;
vp9_foreach_transformed_block_in_plane(xd, bsize, 0,
estimate_block_intra, &args);
@@ -1907,10 +1933,10 @@
// mirrors the behavior used by inter
if (args.skippable) {
x->skip_txfm[0] = SKIP_TXFM_AC_DC;
- args.rate = vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 1);
+ this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 1);
} else {
x->skip_txfm[0] = SKIP_TXFM_NONE;
- args.rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 0);
+ this_rdc.rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 0);
}
// Inter and intra RD will mismatch in scale for non-screen content.
if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) {
@@ -1921,8 +1947,6 @@
vp9_foreach_transformed_block_in_plane(xd, bsize, 2,
estimate_block_intra, &args);
}
- this_rdc.rate = args.rate;
- this_rdc.dist = args.dist;
this_rdc.rate += cpi->mbmode_cost[this_mode];
this_rdc.rate += ref_frame_cost[INTRA_FRAME];
this_rdc.rate += intra_cost_penalty;
@@ -2152,7 +2176,7 @@
const int tmp_col_max = x->mv_col_max;
const int tmp_row_min = x->mv_row_min;
const int tmp_row_max = x->mv_row_max;
- int dummy_dist;
+ uint32_t dummy_dist;
if (i == 0) {
mvp_full.row = b_mv[NEARESTMV].as_mv.row >> 3;
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 9766c05..d68b684 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -94,7 +94,7 @@
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
const int64_t tmp = abs_coeff + round_ptr[rc != 0];
- const uint32_t abs_qcoeff = (uint32_t)((tmp * quant_ptr[rc != 0]) >> 16);
+ const int abs_qcoeff = (int)((tmp * quant_ptr[rc != 0]) >> 16);
qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
if (abs_qcoeff)
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index a304182..0ed0850 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1214,6 +1214,11 @@
for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
continue;
+#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
+ if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) &&
+ (xd->above_mi == NULL || xd->left_mi == NULL) && need_top_left[mode])
+ continue;
+#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
xd->mi[0]->uv_mode = mode;
@@ -1710,8 +1715,8 @@
x->mv_row_max = tmp_row_max;
if (bestsme < INT_MAX) {
- int dis; /* TODO: use dis in distortion calculation later. */
- unsigned int sse;
+ uint32_t dis; /* TODO: use dis in distortion calculation later. */
+ uint32_t sse;
bestsme = cpi->find_fractional_mv_step(
x, &tmp_mv,
&ref_mv[id].as_mv,
@@ -1911,7 +1916,7 @@
INT_MAX, 1);
if (bestsme < INT_MAX) {
- int distortion;
+ uint32_t distortion;
cpi->find_fractional_mv_step(
x,
new_mv,
@@ -2341,7 +2346,7 @@
x->mv_row_max = tmp_row_max;
if (bestsme < INT_MAX) {
- int dis; /* TODO: use dis in distortion calculation later. */
+ uint32_t dis; /* TODO: use dis in distortion calculation later. */
cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
cm->allow_high_precision_mv,
x->errorperbit,
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 0090b4f..e7f04a2 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -429,7 +429,7 @@
sf->mv.search_method = NSTEP;
sf->mv.reduce_first_step_size = 1;
sf->skip_encode_sb = 0;
- if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR && cpi->oxcf.pass == 0 &&
+ if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR &&
content != VP9E_CONTENT_SCREEN) {
// Enable short circuit for low temporal variance.
sf->short_circuit_low_temp_var = 1;
@@ -450,6 +450,17 @@
sf->adaptive_rd_thresh = 4;
sf->mv.subpel_force_stop = (content == VP9E_CONTENT_SCREEN) ? 3 : 2;
sf->lpf_pick = LPF_PICK_MINIMAL_LPF;
+ // Only keep INTRA_DC mode for speed 8.
+ if (!is_keyframe) {
+ int i = 0;
+ for (i = 0; i < BLOCK_SIZES; ++i)
+ sf->intra_y_mode_bsize_mask[i] = INTRA_DC;
+ }
+ if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR &&
+ content != VP9E_CONTENT_SCREEN) {
+ // More aggressive short circuit for speed 8.
+ sf->short_circuit_low_temp_var = 2;
+ }
}
}
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index 71ff0ac..e88a7df 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -449,6 +449,10 @@
// Skip a number of expensive mode evaluations for blocks with very low
// temporal variance.
+ // 1: Skip golden non-zeromv and ALL INTRA for bsize >= 32x32.
+ // 2: Skip golden non-zeromv and newmv-last for bsize >= 16x16, skip ALL
+ // INTRA for bsize >= 32x32 and vert/horz INTRA for bsize 16x16, 16x32 and
+ // 32x16.
int short_circuit_low_temp_var;
} SPEED_FEATURES;
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 2ba2750..02bcf5a 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -264,8 +264,8 @@
int step_param;
int sadpb = x->sadperbit16;
int bestsme = INT_MAX;
- int distortion;
- unsigned int sse;
+ uint32_t distortion;
+ uint32_t sse;
int cost_list[5];
MV best_ref_mv1 = {0, 0};
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 1a11a6d..9ad86cb 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -205,7 +205,7 @@
level != LEVEL_4 && level != LEVEL_4_1 && level != LEVEL_5 &&
level != LEVEL_5_1 && level != LEVEL_5_2 && level != LEVEL_6 &&
level != LEVEL_6_1 && level != LEVEL_6_2 &&
- level != LEVEL_UNKNOWN && level != LEVEL_NOT_CARE)
+ level != LEVEL_UNKNOWN && level != LEVEL_MAX)
ERROR("target_level is invalid");
}
@@ -807,6 +807,13 @@
return update_extra_cfg(ctx, &extra_cfg);
}
+static vpx_codec_err_t ctrl_get_level(vpx_codec_alg_priv_t *ctx, va_list args) {
+ int *const arg = va_arg(args, int *);
+ if (arg == NULL) return VPX_CODEC_INVALID_PARAM;
+ *arg = (int)vp9_get_level(&ctx->cpi->level_info.level_spec);
+ return VPX_CODEC_OK;
+}
+
static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx,
vpx_codec_priv_enc_mr_cfg_t *data) {
vpx_codec_err_t res = VPX_CODEC_OK;
@@ -898,6 +905,11 @@
break;
}
+ if (deadline == VPX_DL_REALTIME) {
+ ctx->oxcf.pass = 0;
+ new_mode = REALTIME;
+ }
+
if (ctx->oxcf.mode != new_mode) {
ctx->oxcf.mode = new_mode;
vp9_change_config(ctx->cpi, &ctx->oxcf);
@@ -964,9 +976,6 @@
return index_sz;
}
-// vp9 uses 10,000,000 ticks/second as time stamp
-#define TICKS_PER_SEC 10000000LL
-
static int64_t timebase_units_to_ticks(const vpx_rational_t *timebase,
int64_t n) {
return n * TICKS_PER_SEC * timebase->num / timebase->den;
@@ -974,7 +983,7 @@
static int64_t ticks_to_timebase_units(const vpx_rational_t *timebase,
int64_t n) {
- const int64_t round = TICKS_PER_SEC * timebase->num / 2 - 1;
+ const int64_t round = (int64_t)TICKS_PER_SEC * timebase->num / 2 - 1;
return (n * timebase->den + round) / timebase->num / TICKS_PER_SEC;
}
@@ -1547,6 +1556,7 @@
{VP9_GET_REFERENCE, ctrl_get_reference},
{VP9E_GET_SVC_LAYER_ID, ctrl_get_svc_layer_id},
{VP9E_GET_ACTIVEMAP, ctrl_get_active_map},
+ {VP9E_GET_LEVEL, ctrl_get_level},
{ -1, NULL},
};
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index b059d47..2752a86 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -562,7 +562,13 @@
*
* Supported in codecs: VP9
*/
- VP9E_SET_TARGET_LEVEL
+ VP9E_SET_TARGET_LEVEL,
+
+ /*!\brief Codec control function to get bitstream level.
+ *
+ * Supported in codecs: VP9
+ */
+ VP9E_GET_LEVEL
};
/*!\brief vpx 1-D scaling mode
@@ -821,6 +827,9 @@
VPX_CTRL_USE_TYPE(VP9E_SET_TARGET_LEVEL, unsigned int)
#define VPX_CTRL_VP9E_SET_TARGET_LEVEL
+VPX_CTRL_USE_TYPE(VP9E_GET_LEVEL, int *)
+#define VPX_CTRL_VP9E_GET_LEVEL
+
/*!\endcond */
/*! @} - end defgroup vp8_encoder */
#ifdef __cplusplus
diff --git a/vpx_dsp/arm/hadamard_neon.c b/vpx_dsp/arm/hadamard_neon.c
index cc9e804..21e3e3d 100644
--- a/vpx_dsp/arm/hadamard_neon.c
+++ b/vpx_dsp/arm/hadamard_neon.c
@@ -160,3 +160,42 @@
vst1q_s16(coeff + 48, a6);
vst1q_s16(coeff + 56, a7);
}
+
+void vpx_hadamard_16x16_neon(const int16_t *src_diff, int src_stride,
+ int16_t *coeff) {
+ int i;
+
+ /* Rearrange 16x16 to 8x32 and remove stride.
+ * Top left first. */
+ vpx_hadamard_8x8_neon(src_diff + 0 + 0 * src_stride, src_stride, coeff + 0);
+ /* Top right. */
+ vpx_hadamard_8x8_neon(src_diff + 8 + 0 * src_stride, src_stride, coeff + 64);
+ /* Bottom left. */
+ vpx_hadamard_8x8_neon(src_diff + 0 + 8 * src_stride, src_stride, coeff + 128);
+ /* Bottom right. */
+ vpx_hadamard_8x8_neon(src_diff + 8 + 8 * src_stride, src_stride, coeff + 192);
+
+ for (i = 0; i < 64; i += 8) {
+ const int16x8_t a0 = vld1q_s16(coeff + 0);
+ const int16x8_t a1 = vld1q_s16(coeff + 64);
+ const int16x8_t a2 = vld1q_s16(coeff + 128);
+ const int16x8_t a3 = vld1q_s16(coeff + 192);
+
+ const int16x8_t b0 = vhaddq_s16(a0, a1);
+ const int16x8_t b1 = vhsubq_s16(a0, a1);
+ const int16x8_t b2 = vhaddq_s16(a2, a3);
+ const int16x8_t b3 = vhsubq_s16(a2, a3);
+
+ const int16x8_t c0 = vaddq_s16(b0, b2);
+ const int16x8_t c1 = vaddq_s16(b1, b3);
+ const int16x8_t c2 = vsubq_s16(b0, b2);
+ const int16x8_t c3 = vsubq_s16(b1, b3);
+
+ vst1q_s16(coeff + 0, c0);
+ vst1q_s16(coeff + 64, c1);
+ vst1q_s16(coeff + 128, c2);
+ vst1q_s16(coeff + 192, c3);
+
+ coeff += 8;
+ }
+}
diff --git a/vpx_dsp/inv_txfm.h b/vpx_dsp/inv_txfm.h
index c407dd8..9cfe1be 100644
--- a/vpx_dsp/inv_txfm.h
+++ b/vpx_dsp/inv_txfm.h
@@ -118,13 +118,13 @@
static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans,
int bd) {
trans = HIGHBD_WRAPLOW(trans, bd);
- return clip_pixel_highbd(dest + trans, bd);
+ return clip_pixel_highbd(dest + (int)trans, bd);
}
#endif
static INLINE uint8_t clip_pixel_add(uint8_t dest, tran_high_t trans) {
trans = WRAPLOW(trans);
- return clip_pixel(dest + trans);
+ return clip_pixel(dest + (int)trans);
}
#ifdef __cplusplus
} // extern "C"
diff --git a/vpx_dsp/quantize.c b/vpx_dsp/quantize.c
index 6426ccc..80fcd66 100644
--- a/vpx_dsp/quantize.c
+++ b/vpx_dsp/quantize.c
@@ -53,7 +53,7 @@
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
const int64_t tmp = abs_coeff + round_ptr[0];
- const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 16);
+ const int abs_qcoeff = (int)((tmp * quant) >> 16);
qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr;
if (abs_qcoeff)
@@ -109,7 +109,7 @@
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 1);
- const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 15);
+ const int abs_qcoeff = (int)((tmp * quant) >> 15);
qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / 2;
if (abs_qcoeff)
diff --git a/vpx_dsp/variance.c b/vpx_dsp/variance.c
index e8bddb0..d960c54 100644
--- a/vpx_dsp/variance.c
+++ b/vpx_dsp/variance.c
@@ -275,7 +275,7 @@
#if CONFIG_VP9_HIGHBITDEPTH
static void highbd_variance64(const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride,
- int w, int h, uint64_t *sse, uint64_t *sum) {
+ int w, int h, uint64_t *sse, int64_t *sum) {
int i, j;
uint16_t *a = CONVERT_TO_SHORTPTR(a8);
@@ -298,7 +298,7 @@
const uint8_t *b8, int b_stride,
int w, int h, uint32_t *sse, int *sum) {
uint64_t sse_long = 0;
- uint64_t sum_long = 0;
+ int64_t sum_long = 0;
highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
*sse = (uint32_t)sse_long;
*sum = (int)sum_long;
@@ -308,7 +308,7 @@
const uint8_t *b8, int b_stride,
int w, int h, uint32_t *sse, int *sum) {
uint64_t sse_long = 0;
- uint64_t sum_long = 0;
+ int64_t sum_long = 0;
highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
*sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
*sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
@@ -318,7 +318,7 @@
const uint8_t *b8, int b_stride,
int w, int h, uint32_t *sse, int *sum) {
uint64_t sse_long = 0;
- uint64_t sum_long = 0;
+ int64_t sum_long = 0;
highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
*sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
*sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
@@ -341,8 +341,10 @@
int b_stride, \
uint32_t *sse) { \
int sum; \
+ int64_t var; \
highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
- return *sse - (((int64_t)sum * sum) / (W * H)); \
+ var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
+ return (var >= 0) ? (uint32_t)var : 0; \
} \
\
uint32_t vpx_highbd_12_variance##W##x##H##_c(const uint8_t *a, \
@@ -351,8 +353,10 @@
int b_stride, \
uint32_t *sse) { \
int sum; \
+ int64_t var; \
highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
- return *sse - (((int64_t)sum * sum) / (W * H)); \
+ var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
+ return (var >= 0) ? (uint32_t)var : 0; \
}
#define HIGHBD_GET_VAR(S) \
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 4144281..a62acb7 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -1020,7 +1020,7 @@
specialize qw/vpx_hadamard_8x8 sse2 neon/, "$ssse3_x86_64_x86inc";
add_proto qw/void vpx_hadamard_16x16/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
- specialize qw/vpx_hadamard_16x16 sse2/;
+ specialize qw/vpx_hadamard_16x16 sse2 neon/;
add_proto qw/int vpx_satd/, "const int16_t *coeff, int length";
specialize qw/vpx_satd sse2 neon/;
diff --git a/vpx_dsp/x86/inv_txfm_sse2.c b/vpx_dsp/x86/inv_txfm_sse2.c
index 9c0d3eb..df5068c 100644
--- a/vpx_dsp/x86/inv_txfm_sse2.c
+++ b/vpx_dsp/x86/inv_txfm_sse2.c
@@ -158,8 +158,8 @@
const __m128i zero = _mm_setzero_si128();
int a;
- a = dct_const_round_shift(input[0] * cospi_16_64);
- a = dct_const_round_shift(a * cospi_16_64);
+ a = (int)dct_const_round_shift(input[0] * cospi_16_64);
+ a = (int)dct_const_round_shift(a * cospi_16_64);
a = ROUND_POWER_OF_TWO(a, 4);
dc_value = _mm_set1_epi16(a);
@@ -527,8 +527,8 @@
const __m128i zero = _mm_setzero_si128();
int a;
- a = dct_const_round_shift(input[0] * cospi_16_64);
- a = dct_const_round_shift(a * cospi_16_64);
+ a = (int)dct_const_round_shift(input[0] * cospi_16_64);
+ a = (int)dct_const_round_shift(a * cospi_16_64);
a = ROUND_POWER_OF_TWO(a, 5);
dc_value = _mm_set1_epi16(a);
@@ -1305,8 +1305,8 @@
const __m128i zero = _mm_setzero_si128();
int a, i;
- a = dct_const_round_shift(input[0] * cospi_16_64);
- a = dct_const_round_shift(a * cospi_16_64);
+ a = (int)dct_const_round_shift(input[0] * cospi_16_64);
+ a = (int)dct_const_round_shift(a * cospi_16_64);
a = ROUND_POWER_OF_TWO(a, 6);
dc_value = _mm_set1_epi16(a);
@@ -3462,8 +3462,8 @@
const __m128i zero = _mm_setzero_si128();
int a, j;
- a = dct_const_round_shift(input[0] * cospi_16_64);
- a = dct_const_round_shift(a * cospi_16_64);
+ a = (int)dct_const_round_shift(input[0] * cospi_16_64);
+ a = (int)dct_const_round_shift(a * cospi_16_64);
a = ROUND_POWER_OF_TWO(a, 6);
dc_value = _mm_set1_epi16(a);