Merge "x86.h,x86_simd_caps: add an explicit cast w/strtol"
diff --git a/README b/README
index 979440e..29072b9 100644
--- a/README
+++ b/README
@@ -47,7 +47,6 @@
--help output of the configure script. As of this writing, the list of
available targets is:
- armv6-darwin-gcc
armv6-linux-rvct
armv6-linux-gcc
armv6-none-rvct
diff --git a/build/make/configure.sh b/build/make/configure.sh
index b305d33..7b471ca 100644
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -1018,18 +1018,7 @@
NM="$(${XCRUN_FIND} nm)"
RANLIB="$(${XCRUN_FIND} ranlib)"
AS_SFX=.s
-
- # Special handling of ld for armv6 because libclang_rt.ios.a does
- # not contain armv6 support in Apple's clang package:
- # Apple LLVM version 5.1 (clang-503.0.40) (based on LLVM 3.4svn).
- # TODO(tomfinegan): Remove this. Our minimum iOS version (6.0)
- # renders support for armv6 unnecessary because the 3GS and up
- # support neon.
- if [ "${tgt_isa}" = "armv6" ]; then
- LD="$(${XCRUN_FIND} ld)"
- else
- LD="${CXX:-$(${XCRUN_FIND} ld)}"
- fi
+ LD="${CXX:-$(${XCRUN_FIND} ld)}"
# ASFLAGS is written here instead of using check_add_asflags
# because we need to overwrite all of ASFLAGS and purge the
@@ -1069,7 +1058,7 @@
if enabled rvct; then
# Check if we have CodeSourcery GCC in PATH. Needed for
# libraries
- hash arm-none-linux-gnueabi-gcc 2>&- || \
+ which arm-none-linux-gnueabi-gcc 2>&- || \
die "Couldn't find CodeSourcery GCC from PATH"
# Use armcc as a linker to enable translation of
diff --git a/build/make/gen_msvs_proj.sh b/build/make/gen_msvs_proj.sh
index 0cf335b..2b91fbf 100755
--- a/build/make/gen_msvs_proj.sh
+++ b/build/make/gen_msvs_proj.sh
@@ -193,7 +193,7 @@
done
# Make one call to fix_path for file_list to improve performance.
-fix_file_list
+fix_file_list file_list
outfile=${outfile:-/dev/stdout}
guid=${guid:-`generate_uuid`}
diff --git a/build/make/gen_msvs_vcxproj.sh b/build/make/gen_msvs_vcxproj.sh
index 182ea28..e98611d 100755
--- a/build/make/gen_msvs_vcxproj.sh
+++ b/build/make/gen_msvs_vcxproj.sh
@@ -211,7 +211,7 @@
done
# Make one call to fix_path for file_list to improve performance.
-fix_file_list
+fix_file_list file_list
outfile=${outfile:-/dev/stdout}
guid=${guid:-`generate_uuid`}
diff --git a/build/make/msvs_common.sh b/build/make/msvs_common.sh
index 90c1488..88f1cf9 100644
--- a/build/make/msvs_common.sh
+++ b/build/make/msvs_common.sh
@@ -39,11 +39,12 @@
}
# Corrects the paths in file_list in one pass for efficiency.
+# $1 is the name of the array to be modified.
fix_file_list() {
- # TODO(jzern): this could be more generic and take the array as a param.
- files=$(fix_path "${file_list[@]}")
+ declare -n array_ref=$1
+ files=$(fix_path "${array_ref[@]}")
local IFS=$'\n'
- file_list=($files)
+ array_ref=($files)
}
generate_uuid() {
diff --git a/configure b/configure
index ff97dee..91407d3 100755
--- a/configure
+++ b/configure
@@ -98,7 +98,6 @@
# all_platforms is a list of all supported target platforms. Maintain
# alphabetically by architecture, generic-gnu last.
-all_platforms="${all_platforms} armv6-darwin-gcc"
all_platforms="${all_platforms} armv6-linux-rvct"
all_platforms="${all_platforms} armv6-linux-gcc"
all_platforms="${all_platforms} armv6-none-rvct"
diff --git a/test/convolve_test.cc b/test/convolve_test.cc
index 12022be..22a2e77 100644
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -69,6 +69,21 @@
typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
+#define ALL_SIZES(convolve_fn) \
+ make_tuple(4, 4, &convolve_fn), \
+ make_tuple(8, 4, &convolve_fn), \
+ make_tuple(4, 8, &convolve_fn), \
+ make_tuple(8, 8, &convolve_fn), \
+ make_tuple(16, 8, &convolve_fn), \
+ make_tuple(8, 16, &convolve_fn), \
+ make_tuple(16, 16, &convolve_fn), \
+ make_tuple(32, 16, &convolve_fn), \
+ make_tuple(16, 32, &convolve_fn), \
+ make_tuple(32, 32, &convolve_fn), \
+ make_tuple(64, 32, &convolve_fn), \
+ make_tuple(32, 64, &convolve_fn), \
+ make_tuple(64, 64, &convolve_fn)
+
// Reference 8-tap subpixel filter, slightly modified to fit into this test.
#define VP9_FILTER_WEIGHT 128
#define VP9_FILTER_SHIFT 7
@@ -1034,20 +1049,6 @@
wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8,
wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8);
-INSTANTIATE_TEST_CASE_P(C_8, ConvolveTest, ::testing::Values(
- make_tuple(4, 4, &convolve8_c),
- make_tuple(8, 4, &convolve8_c),
- make_tuple(4, 8, &convolve8_c),
- make_tuple(8, 8, &convolve8_c),
- make_tuple(16, 8, &convolve8_c),
- make_tuple(8, 16, &convolve8_c),
- make_tuple(16, 16, &convolve8_c),
- make_tuple(32, 16, &convolve8_c),
- make_tuple(16, 32, &convolve8_c),
- make_tuple(32, 32, &convolve8_c),
- make_tuple(64, 32, &convolve8_c),
- make_tuple(32, 64, &convolve8_c),
- make_tuple(64, 64, &convolve8_c)));
const ConvolveFunctions convolve10_c(
wrap_convolve_copy_c_10, wrap_convolve_avg_c_10,
wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
@@ -1056,20 +1057,6 @@
wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10,
wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 10);
-INSTANTIATE_TEST_CASE_P(C_10, ConvolveTest, ::testing::Values(
- make_tuple(4, 4, &convolve10_c),
- make_tuple(8, 4, &convolve10_c),
- make_tuple(4, 8, &convolve10_c),
- make_tuple(8, 8, &convolve10_c),
- make_tuple(16, 8, &convolve10_c),
- make_tuple(8, 16, &convolve10_c),
- make_tuple(16, 16, &convolve10_c),
- make_tuple(32, 16, &convolve10_c),
- make_tuple(16, 32, &convolve10_c),
- make_tuple(32, 32, &convolve10_c),
- make_tuple(64, 32, &convolve10_c),
- make_tuple(32, 64, &convolve10_c),
- make_tuple(64, 64, &convolve10_c)));
const ConvolveFunctions convolve12_c(
wrap_convolve_copy_c_12, wrap_convolve_avg_c_12,
wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
@@ -1078,23 +1065,13 @@
wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12,
wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 12);
-INSTANTIATE_TEST_CASE_P(C_12, ConvolveTest, ::testing::Values(
- make_tuple(4, 4, &convolve12_c),
- make_tuple(8, 4, &convolve12_c),
- make_tuple(4, 8, &convolve12_c),
- make_tuple(8, 8, &convolve12_c),
- make_tuple(16, 8, &convolve12_c),
- make_tuple(8, 16, &convolve12_c),
- make_tuple(16, 16, &convolve12_c),
- make_tuple(32, 16, &convolve12_c),
- make_tuple(16, 32, &convolve12_c),
- make_tuple(32, 32, &convolve12_c),
- make_tuple(64, 32, &convolve12_c),
- make_tuple(32, 64, &convolve12_c),
- make_tuple(64, 64, &convolve12_c)));
+const ConvolveParam kArrayConvolve_c[] = {
+ ALL_SIZES(convolve8_c),
+ ALL_SIZES(convolve10_c),
+ ALL_SIZES(convolve12_c)
+};
#else
-
const ConvolveFunctions convolve8_c(
vpx_convolve_copy_c, vpx_convolve_avg_c,
vpx_convolve8_horiz_c, vpx_convolve8_avg_horiz_c,
@@ -1103,22 +1080,10 @@
vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
-
-INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
- make_tuple(4, 4, &convolve8_c),
- make_tuple(8, 4, &convolve8_c),
- make_tuple(4, 8, &convolve8_c),
- make_tuple(8, 8, &convolve8_c),
- make_tuple(16, 8, &convolve8_c),
- make_tuple(8, 16, &convolve8_c),
- make_tuple(16, 16, &convolve8_c),
- make_tuple(32, 16, &convolve8_c),
- make_tuple(16, 32, &convolve8_c),
- make_tuple(32, 32, &convolve8_c),
- make_tuple(64, 32, &convolve8_c),
- make_tuple(32, 64, &convolve8_c),
- make_tuple(64, 64, &convolve8_c)));
+const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c) };
#endif
+INSTANTIATE_TEST_CASE_P(C, ConvolveTest,
+ ::testing::ValuesIn(kArrayConvolve_c));
#if HAVE_SSE2 && ARCH_X86_64
#if CONFIG_VP9_HIGHBITDEPTH
@@ -1158,46 +1123,11 @@
wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
-INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
- make_tuple(4, 4, &convolve8_sse2),
- make_tuple(8, 4, &convolve8_sse2),
- make_tuple(4, 8, &convolve8_sse2),
- make_tuple(8, 8, &convolve8_sse2),
- make_tuple(16, 8, &convolve8_sse2),
- make_tuple(8, 16, &convolve8_sse2),
- make_tuple(16, 16, &convolve8_sse2),
- make_tuple(32, 16, &convolve8_sse2),
- make_tuple(16, 32, &convolve8_sse2),
- make_tuple(32, 32, &convolve8_sse2),
- make_tuple(64, 32, &convolve8_sse2),
- make_tuple(32, 64, &convolve8_sse2),
- make_tuple(64, 64, &convolve8_sse2),
- make_tuple(4, 4, &convolve10_sse2),
- make_tuple(8, 4, &convolve10_sse2),
- make_tuple(4, 8, &convolve10_sse2),
- make_tuple(8, 8, &convolve10_sse2),
- make_tuple(16, 8, &convolve10_sse2),
- make_tuple(8, 16, &convolve10_sse2),
- make_tuple(16, 16, &convolve10_sse2),
- make_tuple(32, 16, &convolve10_sse2),
- make_tuple(16, 32, &convolve10_sse2),
- make_tuple(32, 32, &convolve10_sse2),
- make_tuple(64, 32, &convolve10_sse2),
- make_tuple(32, 64, &convolve10_sse2),
- make_tuple(64, 64, &convolve10_sse2),
- make_tuple(4, 4, &convolve12_sse2),
- make_tuple(8, 4, &convolve12_sse2),
- make_tuple(4, 8, &convolve12_sse2),
- make_tuple(8, 8, &convolve12_sse2),
- make_tuple(16, 8, &convolve12_sse2),
- make_tuple(8, 16, &convolve12_sse2),
- make_tuple(16, 16, &convolve12_sse2),
- make_tuple(32, 16, &convolve12_sse2),
- make_tuple(16, 32, &convolve12_sse2),
- make_tuple(32, 32, &convolve12_sse2),
- make_tuple(64, 32, &convolve12_sse2),
- make_tuple(32, 64, &convolve12_sse2),
- make_tuple(64, 64, &convolve12_sse2)));
+const ConvolveParam kArrayConvolve_sse2[] = {
+ ALL_SIZES(convolve8_sse2),
+ ALL_SIZES(convolve10_sse2),
+ ALL_SIZES(convolve12_sse2)
+};
#else
const ConvolveFunctions convolve8_sse2(
#if CONFIG_USE_X86INC
@@ -1212,21 +1142,10 @@
vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
-INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
- make_tuple(4, 4, &convolve8_sse2),
- make_tuple(8, 4, &convolve8_sse2),
- make_tuple(4, 8, &convolve8_sse2),
- make_tuple(8, 8, &convolve8_sse2),
- make_tuple(16, 8, &convolve8_sse2),
- make_tuple(8, 16, &convolve8_sse2),
- make_tuple(16, 16, &convolve8_sse2),
- make_tuple(32, 16, &convolve8_sse2),
- make_tuple(16, 32, &convolve8_sse2),
- make_tuple(32, 32, &convolve8_sse2),
- make_tuple(64, 32, &convolve8_sse2),
- make_tuple(32, 64, &convolve8_sse2),
- make_tuple(64, 64, &convolve8_sse2)));
+const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2) };
#endif // CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest,
+ ::testing::ValuesIn(kArrayConvolve_sse2));
#endif
#if HAVE_SSSE3
@@ -1237,22 +1156,11 @@
vpx_convolve8_ssse3, vpx_convolve8_avg_ssse3,
vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
- vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
+ vpx_scaled_2d_ssse3, vpx_scaled_avg_2d_c, 0);
-INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
- make_tuple(4, 4, &convolve8_ssse3),
- make_tuple(8, 4, &convolve8_ssse3),
- make_tuple(4, 8, &convolve8_ssse3),
- make_tuple(8, 8, &convolve8_ssse3),
- make_tuple(16, 8, &convolve8_ssse3),
- make_tuple(8, 16, &convolve8_ssse3),
- make_tuple(16, 16, &convolve8_ssse3),
- make_tuple(32, 16, &convolve8_ssse3),
- make_tuple(16, 32, &convolve8_ssse3),
- make_tuple(32, 32, &convolve8_ssse3),
- make_tuple(64, 32, &convolve8_ssse3),
- make_tuple(32, 64, &convolve8_ssse3),
- make_tuple(64, 64, &convolve8_ssse3)));
+const ConvolveParam kArrayConvolve8_ssse3[] = { ALL_SIZES(convolve8_ssse3) };
+INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest,
+ ::testing::ValuesIn(kArrayConvolve8_ssse3));
#endif
#if HAVE_AVX2 && HAVE_SSSE3
@@ -1265,20 +1173,9 @@
vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
-INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
- make_tuple(4, 4, &convolve8_avx2),
- make_tuple(8, 4, &convolve8_avx2),
- make_tuple(4, 8, &convolve8_avx2),
- make_tuple(8, 8, &convolve8_avx2),
- make_tuple(8, 16, &convolve8_avx2),
- make_tuple(16, 8, &convolve8_avx2),
- make_tuple(16, 16, &convolve8_avx2),
- make_tuple(32, 16, &convolve8_avx2),
- make_tuple(16, 32, &convolve8_avx2),
- make_tuple(32, 32, &convolve8_avx2),
- make_tuple(64, 32, &convolve8_avx2),
- make_tuple(32, 64, &convolve8_avx2),
- make_tuple(64, 64, &convolve8_avx2)));
+const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES(convolve8_avx2) };
+INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest,
+ ::testing::ValuesIn(kArrayConvolve8_avx2));
#endif // HAVE_AVX2 && HAVE_SSSE3
#if HAVE_NEON
@@ -1302,20 +1199,9 @@
vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
#endif // HAVE_NEON_ASM
-INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
- make_tuple(4, 4, &convolve8_neon),
- make_tuple(8, 4, &convolve8_neon),
- make_tuple(4, 8, &convolve8_neon),
- make_tuple(8, 8, &convolve8_neon),
- make_tuple(16, 8, &convolve8_neon),
- make_tuple(8, 16, &convolve8_neon),
- make_tuple(16, 16, &convolve8_neon),
- make_tuple(32, 16, &convolve8_neon),
- make_tuple(16, 32, &convolve8_neon),
- make_tuple(32, 32, &convolve8_neon),
- make_tuple(64, 32, &convolve8_neon),
- make_tuple(32, 64, &convolve8_neon),
- make_tuple(64, 64, &convolve8_neon)));
+const ConvolveParam kArrayConvolve8_neon[] = { ALL_SIZES(convolve8_neon) };
+INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest,
+ ::testing::ValuesIn(kArrayConvolve8_neon));
#endif // HAVE_NEON
#if HAVE_DSPR2
@@ -1328,21 +1214,10 @@
vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
-INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values(
- make_tuple(4, 4, &convolve8_dspr2),
- make_tuple(8, 4, &convolve8_dspr2),
- make_tuple(4, 8, &convolve8_dspr2),
- make_tuple(8, 8, &convolve8_dspr2),
- make_tuple(16, 8, &convolve8_dspr2),
- make_tuple(8, 16, &convolve8_dspr2),
- make_tuple(16, 16, &convolve8_dspr2),
- make_tuple(32, 16, &convolve8_dspr2),
- make_tuple(16, 32, &convolve8_dspr2),
- make_tuple(32, 32, &convolve8_dspr2),
- make_tuple(64, 32, &convolve8_dspr2),
- make_tuple(32, 64, &convolve8_dspr2),
- make_tuple(64, 64, &convolve8_dspr2)));
-#endif
+const ConvolveParam kArrayConvolve8_dspr2[] = { ALL_SIZES(convolve8_dspr2) };
+INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest,
+ ::testing::ValuesIn(kArrayConvolve8_dspr2));
+#endif // HAVE_DSPR2
#if HAVE_MSA
const ConvolveFunctions convolve8_msa(
@@ -1354,19 +1229,8 @@
vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
-INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest, ::testing::Values(
- make_tuple(4, 4, &convolve8_msa),
- make_tuple(8, 4, &convolve8_msa),
- make_tuple(4, 8, &convolve8_msa),
- make_tuple(8, 8, &convolve8_msa),
- make_tuple(16, 8, &convolve8_msa),
- make_tuple(8, 16, &convolve8_msa),
- make_tuple(16, 16, &convolve8_msa),
- make_tuple(32, 16, &convolve8_msa),
- make_tuple(16, 32, &convolve8_msa),
- make_tuple(32, 32, &convolve8_msa),
- make_tuple(64, 32, &convolve8_msa),
- make_tuple(32, 64, &convolve8_msa),
- make_tuple(64, 64, &convolve8_msa)));
+const ConvolveParam kArrayConvolve8_msa[] = { ALL_SIZES(convolve8_msa) };
+INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest,
+ ::testing::ValuesIn(kArrayConvolve8_msa));
#endif // HAVE_MSA
} // namespace
diff --git a/test/vp9_ethread_test.cc b/test/vp9_ethread_test.cc
index 4983d7f..f0b8cef 100644
--- a/test/vp9_ethread_test.cc
+++ b/test/vp9_ethread_test.cc
@@ -29,16 +29,9 @@
encoding_mode_(GET_PARAM(1)),
set_cpu_used_(GET_PARAM(2)) {
init_flags_ = VPX_CODEC_USE_PSNR;
- vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
- cfg.w = 1280;
- cfg.h = 720;
- decoder_ = codec_->CreateDecoder(cfg, 0);
-
md5_.clear();
}
- virtual ~VPxEncoderThreadTest() {
- delete decoder_;
- }
+ virtual ~VPxEncoderThreadTest() {}
virtual void SetUp() {
InitializeConfig();
@@ -81,31 +74,28 @@
}
}
- virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
-#if CONFIG_VP9_DECODER
- const vpx_codec_err_t res = decoder_->DecodeFrame(
- reinterpret_cast<uint8_t*>(pkt->data.frame.buf), pkt->data.frame.sz);
- if (res != VPX_CODEC_OK) {
- abort_ = true;
- ASSERT_EQ(VPX_CODEC_OK, res);
- }
- const vpx_image_t *img = decoder_->GetDxData().Next();
+ virtual void DecompressedFrameHook(const vpx_image_t &img,
+ vpx_codec_pts_t /*pts*/) {
+ ::libvpx_test::MD5 md5_res;
+ md5_res.Add(&img);
+ md5_.push_back(md5_res.Get());
+ }
- if (img) {
- ::libvpx_test::MD5 md5_res;
- md5_res.Add(img);
- md5_.push_back(md5_res.Get());
+ virtual bool HandleDecodeResult(const vpx_codec_err_t res,
+ const libvpx_test::VideoSource& /*video*/,
+ libvpx_test::Decoder * /*decoder*/) {
+ if (res != VPX_CODEC_OK) {
+ EXPECT_EQ(VPX_CODEC_OK, res);
+ return false;
}
-#else
- ASSERT_EQ(NULL, decoder_);
-#endif
+
+ return true;
}
bool encoder_initialized_;
int tiles_;
::libvpx_test::TestMode encoding_mode_;
int set_cpu_used_;
- ::libvpx_test::Decoder *decoder_;
std::vector<std::string> md5_;
};
diff --git a/vp10/common/postproc.c b/vp10/common/postproc.c
index a6ea9c0..e8a9f81 100644
--- a/vp10/common/postproc.c
+++ b/vp10/common/postproc.c
@@ -13,6 +13,7 @@
#include <stdio.h>
#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
#include "./vpx_scale_rtcd.h"
#include "./vp10_rtcd.h"
@@ -587,32 +588,6 @@
state->last_noise = a;
}
-void vp10_plane_add_noise_c(uint8_t *start, char *noise,
- char blackclamp[16],
- char whiteclamp[16],
- char bothclamp[16],
- unsigned int width, unsigned int height, int pitch) {
- unsigned int i, j;
-
- // TODO(jbb): why does simd code use both but c doesn't, normalize and
- // fix..
- (void) bothclamp;
- for (i = 0; i < height; i++) {
- uint8_t *pos = start + i * pitch;
- char *ref = (char *)(noise + (rand() & 0xff)); // NOLINT
-
- for (j = 0; j < width; j++) {
- if (pos[j] < blackclamp[0])
- pos[j] = blackclamp[0];
-
- if (pos[j] > 255 + whiteclamp[0])
- pos[j] = 255 + whiteclamp[0];
-
- pos[j] += ref[j];
- }
- }
-}
-
static void swap_mi_and_prev_mi(VP10_COMMON *cm) {
// Current mip will be the prev_mip for the next frame.
MODE_INFO *temp = cm->postproc_state.prev_mip;
@@ -727,7 +702,7 @@
fillrd(ppstate, 63 - q, noise_level);
}
- vp10_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
+ vpx_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
ppstate->whiteclamp, ppstate->bothclamp,
ppbuf->y_width, ppbuf->y_height, ppbuf->y_stride);
}
diff --git a/vp10/common/vp10_rtcd_defs.pl b/vp10/common/vp10_rtcd_defs.pl
index 9860bae..f2414f8 100644
--- a/vp10/common/vp10_rtcd_defs.pl
+++ b/vp10/common/vp10_rtcd_defs.pl
@@ -70,10 +70,6 @@
specialize qw/vp10_post_proc_down_and_across sse2/;
$vp10_post_proc_down_and_across_sse2=vp10_post_proc_down_and_across_xmm;
-add_proto qw/void vp10_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
-specialize qw/vp10_plane_add_noise sse2/;
-$vp10_plane_add_noise_sse2=vp10_plane_add_noise_wmt;
-
add_proto qw/void vp10_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
specialize qw/vp10_filter_by_weight16x16 sse2 msa/;
@@ -326,9 +322,6 @@
add_proto qw/void vp10_highbd_post_proc_down_and_across/, "const uint16_t *src_ptr, uint16_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
specialize qw/vp10_highbd_post_proc_down_and_across/;
-
- add_proto qw/void vp10_highbd_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
- specialize qw/vp10_highbd_plane_add_noise/;
}
#
diff --git a/vp10/common/x86/postproc_sse2.asm b/vp10/common/x86/postproc_sse2.asm
index d5f8e92..d477a65 100644
--- a/vp10/common/x86/postproc_sse2.asm
+++ b/vp10/common/x86/postproc_sse2.asm
@@ -624,68 +624,6 @@
%undef flimit4
-;void vp10_plane_add_noise_wmt (unsigned char *start, unsigned char *noise,
-; unsigned char blackclamp[16],
-; unsigned char whiteclamp[16],
-; unsigned char bothclamp[16],
-; unsigned int width, unsigned int height, int pitch)
-global sym(vp10_plane_add_noise_wmt) PRIVATE
-sym(vp10_plane_add_noise_wmt):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 8
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
-.addnoise_loop:
- call sym(LIBVPX_RAND) WRT_PLT
- mov rcx, arg(1) ;noise
- and rax, 0xff
- add rcx, rax
-
- ; we rely on the fact that the clamping vectors are stored contiguously
- ; in black/white/both order. Note that we have to reload this here because
- ; rdx could be trashed by rand()
- mov rdx, arg(2) ; blackclamp
-
-
- mov rdi, rcx
- movsxd rcx, dword arg(5) ;[Width]
- mov rsi, arg(0) ;Pos
- xor rax,rax
-
-.addnoise_nextset:
- movdqu xmm1,[rsi+rax] ; get the source
-
- psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
- paddusb xmm1, [rdx+32] ;bothclamp
- psubusb xmm1, [rdx+16] ;whiteclamp
-
- movdqu xmm2,[rdi+rax] ; get the noise for this line
- paddb xmm1,xmm2 ; add it in
- movdqu [rsi+rax],xmm1 ; store the result
-
- add rax,16 ; move to the next line
-
- cmp rax, rcx
- jl .addnoise_nextset
-
- movsxd rax, dword arg(7) ; Pitch
- add arg(0), rax ; Start += Pitch
- sub dword arg(6), 1 ; Height -= 1
- jg .addnoise_loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
SECTION_RODATA
align 16
rd42:
diff --git a/vp10/encoder/lookahead.c b/vp10/encoder/lookahead.c
index dce0139..3185cb6 100644
--- a/vp10/encoder/lookahead.c
+++ b/vp10/encoder/lookahead.c
@@ -20,8 +20,8 @@
/* Return the buffer at the given absolute index and increment the index */
static struct lookahead_entry *pop(struct lookahead_ctx *ctx,
- unsigned int *idx) {
- unsigned int index = *idx;
+ int *idx) {
+ int index = *idx;
struct lookahead_entry *buf = ctx->buf + index;
assert(index < ctx->max_sz);
@@ -35,7 +35,7 @@
void vp10_lookahead_destroy(struct lookahead_ctx *ctx) {
if (ctx) {
if (ctx->buf) {
- unsigned int i;
+ int i;
for (i = 0; i < ctx->max_sz; i++)
vpx_free_frame_buffer(&ctx->buf[i].img);
@@ -221,9 +221,9 @@
if (index >= 0) {
// Forward peek
- if (index < (int)ctx->sz) {
+ if (index < ctx->sz) {
index += ctx->read_idx;
- if (index >= (int)ctx->max_sz)
+ if (index >= ctx->max_sz)
index -= ctx->max_sz;
buf = ctx->buf + index;
}
diff --git a/vp10/encoder/lookahead.h b/vp10/encoder/lookahead.h
index 22429ae..f650f80 100644
--- a/vp10/encoder/lookahead.h
+++ b/vp10/encoder/lookahead.h
@@ -31,10 +31,10 @@
#define MAX_PRE_FRAMES 1
struct lookahead_ctx {
- unsigned int max_sz; /* Absolute size of the queue */
- unsigned int sz; /* Number of buffers currently in the queue */
- unsigned int read_idx; /* Read index */
- unsigned int write_idx; /* Write index */
+ int max_sz; /* Absolute size of the queue */
+ int sz; /* Number of buffers currently in the queue */
+ int read_idx; /* Read index */
+ int write_idx; /* Write index */
struct lookahead_entry *buf; /* Buffer list */
};
diff --git a/vp10/encoder/quantize.c b/vp10/encoder/quantize.c
index 86b324f..136efe3 100644
--- a/vp10/encoder/quantize.c
+++ b/vp10/encoder/quantize.c
@@ -219,12 +219,12 @@
static void invert_quant(int16_t *quant, int16_t *shift, int d) {
unsigned t;
- int l;
+ int l, m;
t = d;
for (l = 0; t > 1; l++)
t >>= 1;
- t = 1 + (1 << (16 + l)) / d;
- *quant = (int16_t)(t - (1 << 16));
+ m = 1 + (1 << (16 + l)) / d;
+ *quant = (int16_t)(m - (1 << 16));
*shift = 1 << (16 - l);
}
diff --git a/vp8/common/mips/msa/postproc_msa.c b/vp8/common/mips/msa/postproc_msa.c
index c88f302..23dcde2 100644
--- a/vp8/common/mips/msa/postproc_msa.c
+++ b/vp8/common/mips/msa/postproc_msa.c
@@ -10,6 +10,7 @@
#include <stdlib.h>
#include "./vp8_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "vp8/common/mips/msa/vp8_macros_msa.h"
static const int16_t vp8_rv_msa[] =
@@ -798,54 +799,3 @@
}
}
}
-
-void vp8_plane_add_noise_msa(uint8_t *start_ptr, char *noise,
- char blackclamp[16], char whiteclamp[16],
- char bothclamp[16],
- uint32_t width, uint32_t height,
- int32_t pitch)
-{
- uint32_t i, j;
-
- for (i = 0; i < height / 2; ++i)
- {
- uint8_t *pos0_ptr = start_ptr + (2 * i) * pitch;
- int8_t *ref0_ptr = (int8_t *) (noise + (rand() & 0xff));
- uint8_t *pos1_ptr = start_ptr + (2 * i + 1) * pitch;
- int8_t *ref1_ptr = (int8_t *) (noise + (rand() & 0xff));
- for (j = width / 16; j--;)
- {
- v16i8 temp00_s, temp01_s;
- v16u8 temp00, temp01, black_clamp, white_clamp;
- v16u8 pos0, ref0, pos1, ref1;
- v16i8 const127 = __msa_ldi_b(127);
-
- pos0 = LD_UB(pos0_ptr);
- ref0 = LD_UB(ref0_ptr);
- pos1 = LD_UB(pos1_ptr);
- ref1 = LD_UB(ref1_ptr);
- black_clamp = (v16u8)__msa_fill_b(blackclamp[0]);
- white_clamp = (v16u8)__msa_fill_b(whiteclamp[0]);
- temp00 = (pos0 < black_clamp);
- pos0 = __msa_bmnz_v(pos0, black_clamp, temp00);
- temp01 = (pos1 < black_clamp);
- pos1 = __msa_bmnz_v(pos1, black_clamp, temp01);
- XORI_B2_128_UB(pos0, pos1);
- temp00_s = __msa_adds_s_b((v16i8)white_clamp, const127);
- temp00 = (v16u8)(temp00_s < pos0);
- pos0 = (v16u8)__msa_bmnz_v((v16u8)pos0, (v16u8)temp00_s, temp00);
- temp01_s = __msa_adds_s_b((v16i8)white_clamp, const127);
- temp01 = (temp01_s < pos1);
- pos1 = (v16u8)__msa_bmnz_v((v16u8)pos1, (v16u8)temp01_s, temp01);
- XORI_B2_128_UB(pos0, pos1);
- pos0 += ref0;
- ST_UB(pos0, pos0_ptr);
- pos1 += ref1;
- ST_UB(pos1, pos1_ptr);
- pos0_ptr += 16;
- pos1_ptr += 16;
- ref0_ptr += 16;
- ref1_ptr += 16;
- }
- }
-}
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index 322b613..6baf00f 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -10,6 +10,7 @@
#include "vpx_config.h"
+#include "vpx_dsp_rtcd.h"
#include "vp8_rtcd.h"
#include "vpx_scale_rtcd.h"
#include "vpx_scale/yv12config.h"
@@ -490,54 +491,6 @@
state->last_noise = a;
}
-/****************************************************************************
- *
- * ROUTINE : plane_add_noise_c
- *
- * INPUTS : unsigned char *Start starting address of buffer to add gaussian
- * noise to
- * unsigned int Width width of plane
- * unsigned int Height height of plane
- * int Pitch distance between subsequent lines of frame
- * int q quantizer used to determine amount of noise
- * to add
- *
- * OUTPUTS : None.
- *
- * RETURNS : void.
- *
- * FUNCTION : adds gaussian noise to a plane of pixels
- *
- * SPECIAL NOTES : None.
- *
- ****************************************************************************/
-void vp8_plane_add_noise_c(unsigned char *Start, char *noise,
- char blackclamp[16],
- char whiteclamp[16],
- char bothclamp[16],
- unsigned int Width, unsigned int Height, int Pitch)
-{
- unsigned int i, j;
- (void)bothclamp;
-
- for (i = 0; i < Height; i++)
- {
- unsigned char *Pos = Start + i * Pitch;
- char *Ref = (char *)(noise + (rand() & 0xff));
-
- for (j = 0; j < Width; j++)
- {
- if (Pos[j] < blackclamp[0])
- Pos[j] = blackclamp[0];
-
- if (Pos[j] > 255 + whiteclamp[0])
- Pos[j] = 255 + whiteclamp[0];
-
- Pos[j] += Ref[j];
- }
- }
-}
-
/* Blend the macro block with a solid colored square. Leave the
* edges unblended to give distinction to macro blocks in areas
* filled with the same color block.
@@ -828,7 +781,7 @@
fillrd(&oci->postproc_state, 63 - q, noise_level);
}
- vp8_plane_add_noise
+ vpx_plane_add_noise
(oci->post_proc_buffer.y_buffer,
oci->postproc_state.noise,
oci->postproc_state.blackclamp,
diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl
index 6799c27..b942d5b 100644
--- a/vp8/common/rtcd_defs.pl
+++ b/vp8/common/rtcd_defs.pl
@@ -167,10 +167,6 @@
add_proto qw/void vp8_post_proc_down_and_across_mb_row/, "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size";
specialize qw/vp8_post_proc_down_and_across_mb_row sse2 msa/;
- add_proto qw/void vp8_plane_add_noise/, "unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch";
- specialize qw/vp8_plane_add_noise mmx sse2 msa/;
- $vp8_plane_add_noise_sse2=vp8_plane_add_noise_wmt;
-
add_proto qw/void vp8_blend_mb_inner/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
# no asm yet
diff --git a/vp8/common/x86/postproc_mmx.asm b/vp8/common/x86/postproc_mmx.asm
index a2b1632..1a89e7e 100644
--- a/vp8/common/x86/postproc_mmx.asm
+++ b/vp8/common/x86/postproc_mmx.asm
@@ -241,68 +241,6 @@
%undef flimit2
-;void vp8_plane_add_noise_mmx (unsigned char *Start, unsigned char *noise,
-; unsigned char blackclamp[16],
-; unsigned char whiteclamp[16],
-; unsigned char bothclamp[16],
-; unsigned int Width, unsigned int Height, int Pitch)
-global sym(vp8_plane_add_noise_mmx) PRIVATE
-sym(vp8_plane_add_noise_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 8
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
-.addnoise_loop:
- call sym(LIBVPX_RAND) WRT_PLT
- mov rcx, arg(1) ;noise
- and rax, 0xff
- add rcx, rax
-
- ; we rely on the fact that the clamping vectors are stored contiguously
- ; in black/white/both order. Note that we have to reload this here because
- ; rdx could be trashed by rand()
- mov rdx, arg(2) ; blackclamp
-
-
- mov rdi, rcx
- movsxd rcx, dword arg(5) ;[Width]
- mov rsi, arg(0) ;Pos
- xor rax,rax
-
-.addnoise_nextset:
- movq mm1,[rsi+rax] ; get the source
-
- psubusb mm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
- paddusb mm1, [rdx+32] ;bothclamp
- psubusb mm1, [rdx+16] ;whiteclamp
-
- movq mm2,[rdi+rax] ; get the noise for this line
- paddb mm1,mm2 ; add it in
- movq [rsi+rax],mm1 ; store the result
-
- add rax,8 ; move to the next line
-
- cmp rax, rcx
- jl .addnoise_nextset
-
- movsxd rax, dword arg(7) ; Pitch
- add arg(0), rax ; Start += Pitch
- sub dword arg(6), 1 ; Height -= 1
- jg .addnoise_loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
SECTION_RODATA
align 16
Blur:
diff --git a/vp8/common/x86/postproc_sse2.asm b/vp8/common/x86/postproc_sse2.asm
index fed4ee5..de17afa 100644
--- a/vp8/common/x86/postproc_sse2.asm
+++ b/vp8/common/x86/postproc_sse2.asm
@@ -655,68 +655,6 @@
%undef flimit4
-;void vp8_plane_add_noise_wmt (unsigned char *Start, unsigned char *noise,
-; unsigned char blackclamp[16],
-; unsigned char whiteclamp[16],
-; unsigned char bothclamp[16],
-; unsigned int Width, unsigned int Height, int Pitch)
-global sym(vp8_plane_add_noise_wmt) PRIVATE
-sym(vp8_plane_add_noise_wmt):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 8
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
-.addnoise_loop:
- call sym(LIBVPX_RAND) WRT_PLT
- mov rcx, arg(1) ;noise
- and rax, 0xff
- add rcx, rax
-
- ; we rely on the fact that the clamping vectors are stored contiguously
- ; in black/white/both order. Note that we have to reload this here because
- ; rdx could be trashed by rand()
- mov rdx, arg(2) ; blackclamp
-
-
- mov rdi, rcx
- movsxd rcx, dword arg(5) ;[Width]
- mov rsi, arg(0) ;Pos
- xor rax,rax
-
-.addnoise_nextset:
- movdqu xmm1,[rsi+rax] ; get the source
-
- psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
- paddusb xmm1, [rdx+32] ;bothclamp
- psubusb xmm1, [rdx+16] ;whiteclamp
-
- movdqu xmm2,[rdi+rax] ; get the noise for this line
- paddb xmm1,xmm2 ; add it in
- movdqu [rsi+rax],xmm1 ; store the result
-
- add rax,16 ; move to the next line
-
- cmp rax, rcx
- jl .addnoise_nextset
-
- movsxd rax, dword arg(7) ; Pitch
- add arg(0), rax ; Start += Pitch
- sub dword arg(6), 1 ; Height -= 1
- jg .addnoise_loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
SECTION_RODATA
align 16
four8s:
diff --git a/vp8/encoder/vp8_quantize.c b/vp8/encoder/vp8_quantize.c
index ee922c9..0d101ba 100644
--- a/vp8/encoder/vp8_quantize.c
+++ b/vp8/encoder/vp8_quantize.c
@@ -227,12 +227,12 @@
if(improved_quant)
{
unsigned t;
- int l;
+ int l, m;
t = d;
for(l = 0; t > 1; l++)
t>>=1;
- t = 1 + (1<<(16+l))/d;
- *quant = (short)(t - (1<<16));
+ m = 1 + (1<<(16+l))/d;
+ *quant = (short)(m - (1<<16));
*shift = l;
/* use multiplication and constant shift by 16 */
*shift = 1 << (16 - *shift);
diff --git a/vp9/common/vp9_postproc.c b/vp9/common/vp9_postproc.c
index b685d81..c04cc8f 100644
--- a/vp9/common/vp9_postproc.c
+++ b/vp9/common/vp9_postproc.c
@@ -12,6 +12,7 @@
#include <stdlib.h>
#include <stdio.h>
+#include "./vpx_dsp_rtcd.h"
#include "./vpx_config.h"
#include "./vpx_scale_rtcd.h"
#include "./vp9_rtcd.h"
@@ -587,32 +588,6 @@
state->last_noise = a;
}
-void vp9_plane_add_noise_c(uint8_t *start, char *noise,
- char blackclamp[16],
- char whiteclamp[16],
- char bothclamp[16],
- unsigned int width, unsigned int height, int pitch) {
- unsigned int i, j;
-
- // TODO(jbb): why does simd code use both but c doesn't, normalize and
- // fix..
- (void) bothclamp;
- for (i = 0; i < height; i++) {
- uint8_t *pos = start + i * pitch;
- char *ref = (char *)(noise + (rand() & 0xff)); // NOLINT
-
- for (j = 0; j < width; j++) {
- if (pos[j] < blackclamp[0])
- pos[j] = blackclamp[0];
-
- if (pos[j] > 255 + whiteclamp[0])
- pos[j] = 255 + whiteclamp[0];
-
- pos[j] += ref[j];
- }
- }
-}
-
static void swap_mi_and_prev_mi(VP9_COMMON *cm) {
// Current mip will be the prev_mip for the next frame.
MODE_INFO *temp = cm->postproc_state.prev_mip;
@@ -726,8 +701,7 @@
ppstate->last_noise != noise_level) {
fillrd(ppstate, 63 - q, noise_level);
}
-
- vp9_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
+ vpx_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
ppstate->whiteclamp, ppstate->bothclamp,
ppbuf->y_width, ppbuf->y_height, ppbuf->y_stride);
}
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 1cf636c..d7f5a21 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -70,10 +70,6 @@
specialize qw/vp9_post_proc_down_and_across sse2/;
$vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm;
-add_proto qw/void vp9_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
-specialize qw/vp9_plane_add_noise sse2/;
-$vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt;
-
add_proto qw/void vp9_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
specialize qw/vp9_filter_by_weight16x16 sse2 msa/;
@@ -169,9 +165,6 @@
add_proto qw/void vp9_highbd_post_proc_down_and_across/, "const uint16_t *src_ptr, uint16_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
specialize qw/vp9_highbd_post_proc_down_and_across/;
-
- add_proto qw/void vp9_highbd_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
- specialize qw/vp9_highbd_plane_add_noise/;
}
#
diff --git a/vp9/common/x86/vp9_postproc_sse2.asm b/vp9/common/x86/vp9_postproc_sse2.asm
index ec8bfdb..4307628 100644
--- a/vp9/common/x86/vp9_postproc_sse2.asm
+++ b/vp9/common/x86/vp9_postproc_sse2.asm
@@ -624,68 +624,6 @@
%undef flimit4
-;void vp9_plane_add_noise_wmt (unsigned char *start, unsigned char *noise,
-; unsigned char blackclamp[16],
-; unsigned char whiteclamp[16],
-; unsigned char bothclamp[16],
-; unsigned int width, unsigned int height, int pitch)
-global sym(vp9_plane_add_noise_wmt) PRIVATE
-sym(vp9_plane_add_noise_wmt):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 8
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
-.addnoise_loop:
- call sym(LIBVPX_RAND) WRT_PLT
- mov rcx, arg(1) ;noise
- and rax, 0xff
- add rcx, rax
-
- ; we rely on the fact that the clamping vectors are stored contiguously
- ; in black/white/both order. Note that we have to reload this here because
- ; rdx could be trashed by rand()
- mov rdx, arg(2) ; blackclamp
-
-
- mov rdi, rcx
- movsxd rcx, dword arg(5) ;[Width]
- mov rsi, arg(0) ;Pos
- xor rax,rax
-
-.addnoise_nextset:
- movdqu xmm1,[rsi+rax] ; get the source
-
- psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
- paddusb xmm1, [rdx+32] ;bothclamp
- psubusb xmm1, [rdx+16] ;whiteclamp
-
- movdqu xmm2,[rdi+rax] ; get the noise for this line
- paddb xmm1,xmm2 ; add it in
- movdqu [rsi+rax],xmm1 ; store the result
-
- add rax,16 ; move to the next line
-
- cmp rax, rcx
- jl .addnoise_nextset
-
- movsxd rax, dword arg(7) ; Pitch
- add arg(0), rax ; Start += Pitch
- sub dword arg(6), 1 ; Height -= 1
- jg .addnoise_loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
SECTION_RODATA
align 16
rd42:
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index ae5ca7d..e96c96c 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -662,12 +662,79 @@
}
}
+#if !CONFIG_VP9_HIGHBITDEPTH
+// Check if most of the superblock is skin content, and if so, force split to
+// 32x32, and set x->sb_is_skin for use in mode selection.
+static int skin_sb_split(VP9_COMP *cpi, MACROBLOCK *x, const int low_res,
+ int mi_row, int mi_col, int *force_split) {
+ VP9_COMMON * const cm = &cpi->common;
+ // Avoid checking superblocks on/near boundary and avoid low resolutions.
+ // Note superblock may still pick 64X64 if y_sad is very small
+ // (i.e., y_sad < cpi->vbp_threshold_sad) below. For now leave this as is.
+ if (!low_res && (mi_col >= 8 && mi_col + 8 < cm->mi_cols && mi_row >= 8 &&
+ mi_row + 8 < cm->mi_rows)) {
+ int num_16x16_skin = 0;
+ int num_16x16_nonskin = 0;
+ uint8_t *ysignal = x->plane[0].src.buf;
+ uint8_t *usignal = x->plane[1].src.buf;
+ uint8_t *vsignal = x->plane[2].src.buf;
+ int sp = x->plane[0].src.stride;
+ int spuv = x->plane[1].src.stride;
+ const int block_index = mi_row * cm->mi_cols + mi_col;
+ const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
+ const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64];
+ const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
+ const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
+ // Loop through the 16x16 sub-blocks.
+ int i, j;
+ for (i = 0; i < ymis; i+=2) {
+ for (j = 0; j < xmis; j+=2) {
+ int bl_index = block_index + i * cm->mi_cols + j;
+ int bl_index1 = bl_index + 1;
+ int bl_index2 = bl_index + cm->mi_cols;
+ int bl_index3 = bl_index2 + 1;
+ int consec_zeromv = VPXMIN(cpi->consec_zero_mv[bl_index],
+ VPXMIN(cpi->consec_zero_mv[bl_index1],
+ VPXMIN(cpi->consec_zero_mv[bl_index2],
+ cpi->consec_zero_mv[bl_index3])));
+ int is_skin = vp9_compute_skin_block(ysignal,
+ usignal,
+ vsignal,
+ sp,
+ spuv,
+ BLOCK_16X16,
+ consec_zeromv,
+ 0);
+ num_16x16_skin += is_skin;
+ num_16x16_nonskin += (1 - is_skin);
+ if (num_16x16_nonskin > 3) {
+ // Exit loop if at least 4 of the 16x16 blocks are not skin.
+ i = ymis;
+ break;
+ }
+ ysignal += 16;
+ usignal += 8;
+ vsignal += 8;
+ }
+ ysignal += (sp << 4) - 64;
+ usignal += (spuv << 3) - 32;
+ vsignal += (spuv << 3) - 32;
+ }
+ if (num_16x16_skin > 12) {
+ *force_split = 1;
+ return 1;
+ }
+ }
+ return 0;
+}
+#endif
+
// This function chooses partitioning based on the variance between source and
// reconstructed last, where variance is computed for down-sampled inputs.
static int choose_partitioning(VP9_COMP *cpi,
- const TileInfo *const tile,
- MACROBLOCK *x,
- int mi_row, int mi_col) {
+ const TileInfo *const tile,
+ MACROBLOCK *x,
+ int mi_row, int mi_col) {
VP9_COMMON * const cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
int i, j, k, m;
@@ -771,70 +838,13 @@
set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64);
- // Check if most of the superblock is skin content, and if so, force split
- // to 32x32, and set x->sb_is_skin for use in mode selection.
- // Avoid checking superblocks on/near boundary and avoid low resolutions.
- // Note superblock may still pick 64X64 if y_sad is very small
- // (i.e., y_sad < cpi->vbp_threshold_sad) below. For now leave this as is.
x->sb_is_skin = 0;
#if !CONFIG_VP9_HIGHBITDEPTH
- if (cpi->use_skin_detection && !low_res && (mi_col >= 8 &&
- mi_col + 8 < cm->mi_cols && mi_row >= 8 && mi_row + 8 < cm->mi_rows)) {
- int bl_index1, bl_index2, bl_index3;
- int num_16x16_skin = 0;
- int num_16x16_nonskin = 0;
- int is_skin = 0;
- int consec_zeromv = 0;
- uint8_t *ysignal = x->plane[0].src.buf;
- uint8_t *usignal = x->plane[1].src.buf;
- uint8_t *vsignal = x->plane[2].src.buf;
- int spuv = x->plane[1].src.stride;
- const int block_index = mi_row * cm->mi_cols + mi_col;
- const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
- const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64];
- const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
- const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
- // Loop through the 16x16 sub-blocks.
- int j, i;
- for (i = 0; i < ymis; i+=2) {
- for (j = 0; j < xmis; j+=2) {
- int bl_index = block_index + i * cm->mi_cols + j;
- bl_index1 = bl_index + 1;
- bl_index2 = bl_index + cm->mi_cols;
- bl_index3 = bl_index2 + 1;
- consec_zeromv = VPXMIN(cpi->consec_zero_mv[bl_index],
- VPXMIN(cpi->consec_zero_mv[bl_index1],
- VPXMIN(cpi->consec_zero_mv[bl_index2],
- cpi->consec_zero_mv[bl_index3])));
- is_skin = vp9_compute_skin_block(ysignal,
- usignal,
- vsignal,
- sp,
- spuv,
- BLOCK_16X16,
- consec_zeromv,
- 0);
- num_16x16_skin += is_skin;
- num_16x16_nonskin += (1 - is_skin);
- if (num_16x16_nonskin > 3) {
- // Exit loop if at least 4 of the 16x16 blocks are not skin.
- i = ymis;
- j = xmis;
- }
- ysignal += 16;
- usignal += 8;
- vsignal += 8;
- }
- ysignal += (sp << 4) - 64;
- usignal += (spuv << 3) - 32;
- vsignal += (spuv << 3) - 32;
- }
- if (num_16x16_skin > 12) {
- x->sb_is_skin = 1;
- force_split[0] = 1;
- }
- }
+ if (cpi->use_skin_detection)
+ x->sb_is_skin = skin_sb_split(cpi, x, low_res, mi_row, mi_col,
+ &force_split[0]);
#endif
+
for (i = 1; i <= 2; ++i) {
struct macroblock_plane *p = &x->plane[i];
struct macroblockd_plane *pd = &xd->plane[i];
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 68537e9..4be043d 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -1692,9 +1692,6 @@
cpi->use_skin_detection = 0;
cpi->common.buffer_pool = pool;
- cpi->rc.high_source_sad = 0;
- cpi->rc.count_last_scene_change = 0;
-
init_config(cpi, oxcf);
vp9_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc);
diff --git a/vp9/encoder/vp9_lookahead.c b/vp9/encoder/vp9_lookahead.c
index def9b8c..441280c 100644
--- a/vp9/encoder/vp9_lookahead.c
+++ b/vp9/encoder/vp9_lookahead.c
@@ -20,8 +20,8 @@
/* Return the buffer at the given absolute index and increment the index */
static struct lookahead_entry *pop(struct lookahead_ctx *ctx,
- unsigned int *idx) {
- unsigned int index = *idx;
+ int *idx) {
+ int index = *idx;
struct lookahead_entry *buf = ctx->buf + index;
assert(index < ctx->max_sz);
@@ -35,7 +35,7 @@
void vp9_lookahead_destroy(struct lookahead_ctx *ctx) {
if (ctx) {
if (ctx->buf) {
- unsigned int i;
+ int i;
for (i = 0; i < ctx->max_sz; i++)
vpx_free_frame_buffer(&ctx->buf[i].img);
@@ -221,9 +221,9 @@
if (index >= 0) {
// Forward peek
- if (index < (int)ctx->sz) {
+ if (index < ctx->sz) {
index += ctx->read_idx;
- if (index >= (int)ctx->max_sz)
+ if (index >= ctx->max_sz)
index -= ctx->max_sz;
buf = ctx->buf + index;
}
diff --git a/vp9/encoder/vp9_lookahead.h b/vp9/encoder/vp9_lookahead.h
index 1382038..db0fd1c 100644
--- a/vp9/encoder/vp9_lookahead.h
+++ b/vp9/encoder/vp9_lookahead.h
@@ -36,10 +36,10 @@
#define MAX_PRE_FRAMES 1
struct lookahead_ctx {
- unsigned int max_sz; /* Absolute size of the queue */
- unsigned int sz; /* Number of buffers currently in the queue */
- unsigned int read_idx; /* Read index */
- unsigned int write_idx; /* Write index */
+ int max_sz; /* Absolute size of the queue */
+ int sz; /* Number of buffers currently in the queue */
+ int read_idx; /* Read index */
+ int write_idx; /* Write index */
struct lookahead_entry *buf; /* Buffer list */
};
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 2e27f94..fd51598 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -1845,8 +1845,7 @@
cpi->denoiser.denoising_level > kDenLowLow &&
cpi->denoiser.reset == 0) {
VP9_DENOISER_DECISION decision = COPY_BLOCK;
- vp9_denoiser_denoise(cpi, x, mi_row, mi_col, VPXMAX(BLOCK_8X8, bsize),
- ctx, &decision);
+ vp9_denoiser_denoise(cpi, x, mi_row, mi_col, bsize, ctx, &decision);
// If INTRA or GOLDEN reference was selected, re-evaluate ZEROMV on denoised
// result. Only do this under noise conditions, and if rdcost of ZEROMV on
// original source is not significantly higher than rdcost of best mode.
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 91f877e..9766c05 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -219,12 +219,12 @@
static void invert_quant(int16_t *quant, int16_t *shift, int d) {
unsigned t;
- int l;
+ int l, m;
t = d;
for (l = 0; t > 1; l++)
t >>= 1;
- t = 1 + (1 << (16 + l)) / d;
- *quant = (int16_t)(t - (1 << 16));
+ m = 1 + (1 << (16 + l)) / d;
+ *quant = (int16_t)(m - (1 << 16));
*shift = 1 << (16 - l);
}
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index d53e60a..0675d4a 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -339,6 +339,9 @@
rc->total_target_vs_actual = 0;
rc->avg_intersize_gfint = 0;
rc->avg_frame_low_motion = 0;
+ rc->high_source_sad = 0;
+ rc->count_last_scene_change = 0;
+ rc->avg_source_sad = 0;
rc->frames_since_key = 8; // Sensible default for first frame.
rc->this_key_frame_forced = 0;
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index ebe28b8..2ba2750 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -143,8 +143,8 @@
for (idy = -1; idy <= 1; ++idy) {
for (idx = -1; idx <= 1; ++idx) {
- int row = i + idy;
- int col = j + idx;
+ int row = (int)i + idy;
+ int col = (int)j + idx;
if (row >= 0 && row < (int)block_height &&
col >= 0 && col < (int)block_width) {
@@ -211,8 +211,8 @@
for (idy = -1; idy <= 1; ++idy) {
for (idx = -1; idx <= 1; ++idx) {
- int row = i + idy;
- int col = j + idx;
+ int row = (int)i + idy;
+ int col = (int)j + idx;
if (row >= 0 && row < (int)block_height &&
col >= 0 && col < (int)block_width) {
diff --git a/vp9/encoder/x86/vp9_denoiser_sse2.c b/vp9/encoder/x86/vp9_denoiser_sse2.c
index f4a149d..883507a 100644
--- a/vp9/encoder/x86/vp9_denoiser_sse2.c
+++ b/vp9/encoder/x86/vp9_denoiser_sse2.c
@@ -147,8 +147,9 @@
const __m128i l32 = _mm_set1_epi8(2);
// Difference between level 2 and level 1 is 1.
const __m128i l21 = _mm_set1_epi8(1);
+ const int b_height = (4 << b_height_log2_lookup[bs]) >> 1;
- for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> 1); ++r) {
+ for (r = 0; r < b_height; ++r) {
memcpy(sig_buffer[r], sig, width);
memcpy(sig_buffer[r] + width, sig + sig_stride, width);
memcpy(mc_running_buffer[r], mc_running_avg_y, width);
@@ -188,8 +189,8 @@
// Only apply the adjustment for max delta up to 3.
if (delta < 4) {
const __m128i k_delta = _mm_set1_epi8(delta);
- running_avg_y -= avg_y_stride * (4 << b_height_log2_lookup[bs]);
- for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> 1); ++r) {
+ running_avg_y -= avg_y_stride * (b_height << 1);
+ for (r = 0; r < b_height; ++r) {
acc_diff = vp9_denoiser_adj_16x1_sse2(
sig_buffer[r], mc_running_buffer[r], running_buffer[r],
k_0, k_delta, acc_diff);
@@ -235,38 +236,37 @@
const __m128i l32 = _mm_set1_epi8(2);
// Difference between level 2 and level 1 is 1.
const __m128i l21 = _mm_set1_epi8(1);
+ const int b_width = (4 << b_width_log2_lookup[bs]);
+ const int b_height = (4 << b_height_log2_lookup[bs]);
+ const int b_width_shift4 = b_width >> 4;
- for (c = 0; c < 4; ++c) {
- for (r = 0; r < 4; ++r) {
+ for (r = 0; r < 4; ++r) {
+ for (c = 0; c < b_width_shift4; ++c) {
acc_diff[c][r] = _mm_setzero_si128();
}
}
- for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) {
- for (c = 0; c < (4 << b_width_log2_lookup[bs]); c += 16) {
- acc_diff[c>>4][r>>4] = vp9_denoiser_16x1_sse2(
+ for (r = 0; r < b_height; ++r) {
+ for (c = 0; c < b_width_shift4; ++c) {
+ acc_diff[c][r>>4] = vp9_denoiser_16x1_sse2(
sig, mc_running_avg_y, running_avg_y, &k_0, &k_4,
- &k_8, &k_16, &l3, &l32, &l21, acc_diff[c>>4][r>>4]);
+ &k_8, &k_16, &l3, &l32, &l21, acc_diff[c][r>>4]);
// Update pointers for next iteration.
sig += 16;
mc_running_avg_y += 16;
running_avg_y += 16;
}
- if ((r + 1) % 16 == 0 || (bs == BLOCK_16X8 && r == 7)) {
- for (c = 0; c < (4 << b_width_log2_lookup[bs]); c += 16) {
- sum_diff += sum_diff_16x1(acc_diff[c>>4][r>>4]);
+ if ((r & 0xf) == 0xf || (bs == BLOCK_16X8 && r == 7)) {
+ for (c = 0; c < b_width_shift4; ++c) {
+ sum_diff += sum_diff_16x1(acc_diff[c][r>>4]);
}
}
// Update pointers for next iteration.
- sig = sig - 16 * ((4 << b_width_log2_lookup[bs]) >> 4) + sig_stride;
- mc_running_avg_y = mc_running_avg_y -
- 16 * ((4 << b_width_log2_lookup[bs]) >> 4) +
- mc_avg_y_stride;
- running_avg_y = running_avg_y -
- 16 * ((4 << b_width_log2_lookup[bs]) >> 4) +
- avg_y_stride;
+ sig = sig - b_width + sig_stride;
+ mc_running_avg_y = mc_running_avg_y - b_width + mc_avg_y_stride;
+ running_avg_y = running_avg_y - b_width + avg_y_stride;
}
{
@@ -278,33 +278,29 @@
// Only apply the adjustment for max delta up to 3.
if (delta < 4) {
const __m128i k_delta = _mm_set1_epi8(delta);
- sig -= sig_stride * (4 << b_height_log2_lookup[bs]);
- mc_running_avg_y -= mc_avg_y_stride * (4 << b_height_log2_lookup[bs]);
- running_avg_y -= avg_y_stride * (4 << b_height_log2_lookup[bs]);
+ sig -= sig_stride * b_height;
+ mc_running_avg_y -= mc_avg_y_stride * b_height;
+ running_avg_y -= avg_y_stride * b_height;
sum_diff = 0;
- for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) {
- for (c = 0; c < (4 << b_width_log2_lookup[bs]); c += 16) {
- acc_diff[c>>4][r>>4] = vp9_denoiser_adj_16x1_sse2(
+ for (r = 0; r < b_height; ++r) {
+ for (c = 0; c < b_width_shift4; ++c) {
+ acc_diff[c][r>>4] = vp9_denoiser_adj_16x1_sse2(
sig, mc_running_avg_y, running_avg_y, k_0,
- k_delta, acc_diff[c>>4][r>>4]);
+ k_delta, acc_diff[c][r>>4]);
// Update pointers for next iteration.
sig += 16;
mc_running_avg_y += 16;
running_avg_y += 16;
}
- if ((r + 1) % 16 == 0 || (bs == BLOCK_16X8 && r == 7)) {
- for (c = 0; c < (4 << b_width_log2_lookup[bs]); c += 16) {
- sum_diff += sum_diff_16x1(acc_diff[c>>4][r>>4]);
+ if ((r & 0xf) == 0xf || (bs == BLOCK_16X8 && r == 7)) {
+ for (c = 0; c < b_width_shift4; ++c) {
+ sum_diff += sum_diff_16x1(acc_diff[c][r>>4]);
}
}
- sig = sig - 16 * ((4 << b_width_log2_lookup[bs]) >> 4) + sig_stride;
- mc_running_avg_y = mc_running_avg_y -
- 16 * ((4 << b_width_log2_lookup[bs]) >> 4) +
- mc_avg_y_stride;
- running_avg_y = running_avg_y -
- 16 * ((4 << b_width_log2_lookup[bs]) >> 4) +
- avg_y_stride;
+ sig = sig - b_width + sig_stride;
+ mc_running_avg_y = mc_running_avg_y - b_width + mc_avg_y_stride;
+ running_avg_y = running_avg_y - b_width + avg_y_stride;
}
if (abs(sum_diff) > sum_diff_thresh) {
return COPY_BLOCK;
diff --git a/vpx_dsp/bitreader.c b/vpx_dsp/bitreader.c
index 6ad806a..8140e78 100644
--- a/vpx_dsp/bitreader.c
+++ b/vpx_dsp/bitreader.c
@@ -69,7 +69,7 @@
buffer += (bits >> 3);
value = r->value | (nv << (shift & 0x7));
} else {
- const int bits_over = (int)(shift + CHAR_BIT - bits_left);
+ const int bits_over = (int)(shift + CHAR_BIT - (int)bits_left);
int loop_end = 0;
if (bits_over >= 0) {
count += LOTS_OF_BITS;
diff --git a/vpx_dsp/mips/postproc_msa.c b/vpx_dsp/mips/postproc_msa.c
new file mode 100644
index 0000000..366770c
--- /dev/null
+++ b/vpx_dsp/mips/postproc_msa.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include "./macros_msa.h"
+
+void vpx_plane_add_noise_msa(uint8_t *start_ptr, char *noise,
+ char blackclamp[16], char whiteclamp[16],
+ char bothclamp[16], uint32_t width,
+ uint32_t height, int32_t pitch) {
+ uint32_t i, j;
+
+ for (i = 0; i < height / 2; ++i) {
+ uint8_t *pos0_ptr = start_ptr + (2 * i) * pitch;
+ int8_t *ref0_ptr = (int8_t *)(noise + (rand() & 0xff));
+ uint8_t *pos1_ptr = start_ptr + (2 * i + 1) * pitch;
+ int8_t *ref1_ptr = (int8_t *)(noise + (rand() & 0xff));
+ for (j = width / 16; j--;) {
+ v16i8 temp00_s, temp01_s;
+ v16u8 temp00, temp01, black_clamp, white_clamp;
+ v16u8 pos0, ref0, pos1, ref1;
+ v16i8 const127 = __msa_ldi_b(127);
+
+ pos0 = LD_UB(pos0_ptr);
+ ref0 = LD_UB(ref0_ptr);
+ pos1 = LD_UB(pos1_ptr);
+ ref1 = LD_UB(ref1_ptr);
+ black_clamp = (v16u8)__msa_fill_b(blackclamp[0]);
+ white_clamp = (v16u8)__msa_fill_b(whiteclamp[0]);
+ temp00 = (pos0 < black_clamp);
+ pos0 = __msa_bmnz_v(pos0, black_clamp, temp00);
+ temp01 = (pos1 < black_clamp);
+ pos1 = __msa_bmnz_v(pos1, black_clamp, temp01);
+ XORI_B2_128_UB(pos0, pos1);
+ temp00_s = __msa_adds_s_b((v16i8)white_clamp, const127);
+ temp00 = (v16u8)(temp00_s < pos0);
+ pos0 = (v16u8)__msa_bmnz_v((v16u8)pos0, (v16u8)temp00_s, temp00);
+ temp01_s = __msa_adds_s_b((v16i8)white_clamp, const127);
+ temp01 = (temp01_s < pos1);
+ pos1 = (v16u8)__msa_bmnz_v((v16u8)pos1, (v16u8)temp01_s, temp01);
+ XORI_B2_128_UB(pos0, pos1);
+ pos0 += ref0;
+ ST_UB(pos0, pos0_ptr);
+ pos1 += ref1;
+ ST_UB(pos1, pos1_ptr);
+ pos0_ptr += 16;
+ pos1_ptr += 16;
+ ref0_ptr += 16;
+ ref1_ptr += 16;
+ }
+ }
+}
diff --git a/vpx_dsp/postproc.c b/vpx_dsp/postproc.c
new file mode 100644
index 0000000..1fa0204
--- /dev/null
+++ b/vpx_dsp/postproc.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+
+#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
+
+void vpx_plane_add_noise_c(uint8_t *start, char *noise,
+ char blackclamp[16],
+ char whiteclamp[16],
+ char bothclamp[16],
+ unsigned int width, unsigned int height, int pitch) {
+ unsigned int i, j;
+
+ // TODO(jbb): why does simd code use both but c doesn't, normalize and
+ // fix..
+ (void) bothclamp;
+ for (i = 0; i < height; i++) {
+ uint8_t *pos = start + i * pitch;
+ char *ref = (char *)(noise + (rand() & 0xff)); // NOLINT
+
+ for (j = 0; j < width; j++) {
+ if (pos[j] < blackclamp[0])
+ pos[j] = blackclamp[0];
+
+ if (pos[j] > 255 + whiteclamp[0])
+ pos[j] = 255 + whiteclamp[0];
+
+ pos[j] += ref[j];
+ }
+ }
+}
diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk
index 581ec3a..ef319a8 100644
--- a/vpx_dsp/vpx_dsp.mk
+++ b/vpx_dsp/vpx_dsp.mk
@@ -53,6 +53,13 @@
endif # CONFIG_USE_X86INC
endif # CONFIG_VP9_HIGHBITDEPTH
+ifneq ($(filter yes,$(CONFIG_POSTPROC) $(CONFIG_VP9_POSTPROC)),)
+DSP_SRCS-yes += postproc.c
+DSP_SRCS-$(HAVE_MSA) += mips/postproc_msa.c
+DSP_SRCS-$(HAVE_MMX) += x86/postproc_mmx.asm
+DSP_SRCS-$(HAVE_SSE2) += x86/postproc_sse2.asm
+endif # CONFIG_POSTPROC
+
DSP_SRCS-$(HAVE_NEON_ASM) += arm/intrapred_neon_asm$(ASM)
DSP_SRCS-$(HAVE_NEON) += arm/intrapred_neon.c
DSP_SRCS-$(HAVE_MSA) += mips/intrapred_msa.c
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 9ea80a0..f883ce5 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -1907,6 +1907,15 @@
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
} # CONFIG_VP9_HIGHBITDEPTH
+
+#
+# Post Processing
+#
+if (vpx_config("CONFIG_POSTPROC") eq "yes" || vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
+ add_proto qw/void vpx_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
+ specialize qw/vpx_plane_add_noise mmx sse2 msa/;
+}
+
} # CONFIG_ENCODERS || CONFIG_POSTPROC || CONFIG_VP9_POSTPROC
1;
diff --git a/vpx_dsp/x86/postproc_mmx.asm b/vpx_dsp/x86/postproc_mmx.asm
new file mode 100644
index 0000000..9703975
--- /dev/null
+++ b/vpx_dsp/x86/postproc_mmx.asm
@@ -0,0 +1,84 @@
+;
+; Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;void vpx_plane_add_noise_mmx (unsigned char *Start, unsigned char *noise,
+; unsigned char blackclamp[16],
+; unsigned char whiteclamp[16],
+; unsigned char bothclamp[16],
+; unsigned int Width, unsigned int Height, int Pitch)
+global sym(vpx_plane_add_noise_mmx) PRIVATE
+sym(vpx_plane_add_noise_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 8
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+.addnoise_loop:
+ call sym(LIBVPX_RAND) WRT_PLT
+ mov rcx, arg(1) ;noise
+ and rax, 0xff
+ add rcx, rax
+
+ ; we rely on the fact that the clamping vectors are stored contiguously
+ ; in black/white/both order. Note that we have to reload this here because
+ ; rdx could be trashed by rand()
+ mov rdx, arg(2) ; blackclamp
+
+
+ mov rdi, rcx
+ movsxd rcx, dword arg(5) ;[Width]
+ mov rsi, arg(0) ;Pos
+ xor rax,rax
+
+.addnoise_nextset:
+ movq mm1,[rsi+rax] ; get the source
+
+ psubusb mm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
+ paddusb mm1, [rdx+32] ;bothclamp
+ psubusb mm1, [rdx+16] ;whiteclamp
+
+ movq mm2,[rdi+rax] ; get the noise for this line
+ paddb mm1,mm2 ; add it in
+ movq [rsi+rax],mm1 ; store the result
+
+ add rax,8 ; move to the next line
+
+ cmp rax, rcx
+ jl .addnoise_nextset
+
+ movsxd rax, dword arg(7) ; Pitch
+ add arg(0), rax ; Start += Pitch
+ sub dword arg(6), 1 ; Height -= 1
+ jg .addnoise_loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+SECTION_RODATA
+align 16
+Blur:
+ times 16 dw 16
+ times 8 dw 64
+ times 16 dw 16
+ times 8 dw 0
+
+rd:
+ times 4 dw 0x40
diff --git a/vpx_dsp/x86/postproc_sse2.asm b/vpx_dsp/x86/postproc_sse2.asm
new file mode 100644
index 0000000..f4bc893
--- /dev/null
+++ b/vpx_dsp/x86/postproc_sse2.asm
@@ -0,0 +1,82 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;void vpx_plane_add_noise_sse2(unsigned char *start, unsigned char *noise,
+; unsigned char blackclamp[16],
+; unsigned char whiteclamp[16],
+; unsigned char bothclamp[16],
+; unsigned int width, unsigned int height,
+; int pitch)
+global sym(vpx_plane_add_noise_sse2) PRIVATE
+sym(vpx_plane_add_noise_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 8
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+.addnoise_loop:
+ call sym(LIBVPX_RAND) WRT_PLT
+ mov rcx, arg(1) ;noise
+ and rax, 0xff
+ add rcx, rax
+
+ ; we rely on the fact that the clamping vectors are stored contiguously
+ ; in black/white/both order. Note that we have to reload this here because
+ ; rdx could be trashed by rand()
+ mov rdx, arg(2) ; blackclamp
+
+
+ mov rdi, rcx
+ movsxd rcx, dword arg(5) ;[Width]
+ mov rsi, arg(0) ;Pos
+ xor rax,rax
+
+.addnoise_nextset:
+ movdqu xmm1,[rsi+rax] ; get the source
+
+ psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
+ paddusb xmm1, [rdx+32] ;bothclamp
+ psubusb xmm1, [rdx+16] ;whiteclamp
+
+ movdqu xmm2,[rdi+rax] ; get the noise for this line
+ paddb xmm1,xmm2 ; add it in
+ movdqu [rsi+rax],xmm1 ; store the result
+
+ add rax,16 ; move to the next line
+
+ cmp rax, rcx
+ jl .addnoise_nextset
+
+ movsxd rax, dword arg(7) ; Pitch
+ add arg(0), rax ; Start += Pitch
+ sub dword arg(6), 1 ; Height -= 1
+ jg .addnoise_loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+SECTION_RODATA
+align 16
+rd42:
+ times 8 dw 0x04
+four8s:
+ times 4 dd 8
diff --git a/vpx_dsp/x86/variance_sse2.c b/vpx_dsp/x86/variance_sse2.c
index e6c9365..43f4603 100644
--- a/vpx_dsp/x86/variance_sse2.c
+++ b/vpx_dsp/x86/variance_sse2.c
@@ -171,7 +171,7 @@
unsigned int *sse) {
int sum;
get4x4var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
- return *sse - (((unsigned int)sum * sum) >> 4);
+ return *sse - ((sum * sum) >> 4);
}
unsigned int vpx_variance8x4_sse2(const uint8_t *src, int src_stride,
@@ -180,7 +180,7 @@
int sum;
variance_sse2(src, src_stride, ref, ref_stride, 8, 4,
sse, &sum, get4x4var_sse2, 4);
- return *sse - (((unsigned int)sum * sum) >> 5);
+ return *sse - ((sum * sum) >> 5);
}
unsigned int vpx_variance4x8_sse2(const uint8_t *src, int src_stride,
@@ -189,7 +189,7 @@
int sum;
variance_sse2(src, src_stride, ref, ref_stride, 4, 8,
sse, &sum, get4x4var_sse2, 4);
- return *sse - (((unsigned int)sum * sum) >> 5);
+ return *sse - ((sum * sum) >> 5);
}
unsigned int vpx_variance8x8_sse2(const unsigned char *src, int src_stride,
@@ -197,7 +197,7 @@
unsigned int *sse) {
int sum;
vpx_get8x8var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
- return *sse - (((unsigned int)sum * sum) >> 6);
+ return *sse - ((sum * sum) >> 6);
}
unsigned int vpx_variance16x8_sse2(const unsigned char *src, int src_stride,
@@ -206,7 +206,7 @@
int sum;
variance_sse2(src, src_stride, ref, ref_stride, 16, 8,
sse, &sum, vpx_get8x8var_sse2, 8);
- return *sse - (((unsigned int)sum * sum) >> 7);
+ return *sse - ((sum * sum) >> 7);
}
unsigned int vpx_variance8x16_sse2(const unsigned char *src, int src_stride,
@@ -215,7 +215,7 @@
int sum;
variance_sse2(src, src_stride, ref, ref_stride, 8, 16,
sse, &sum, vpx_get8x8var_sse2, 8);
- return *sse - (((unsigned int)sum * sum) >> 7);
+ return *sse - ((sum * sum) >> 7);
}
unsigned int vpx_variance16x16_sse2(const unsigned char *src, int src_stride,
@@ -223,7 +223,7 @@
unsigned int *sse) {
int sum;
vpx_get16x16var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
- return *sse - (((unsigned int)sum * sum) >> 8);
+ return *sse - (((uint32_t)((int64_t)sum * sum)) >> 8);
}
unsigned int vpx_variance32x32_sse2(const uint8_t *src, int src_stride,
@@ -329,7 +329,7 @@
#undef DECLS
#undef DECL
-#define FN(w, h, wf, wlog2, hlog2, opt, cast) \
+#define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \
unsigned int vpx_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src, \
int src_stride, \
int x_offset, \
@@ -365,23 +365,23 @@
} \
} \
*sse_ptr = sse; \
- return sse - ((cast se * se) >> (wlog2 + hlog2)); \
+ return sse - (cast_prod (cast se * se) >> (wlog2 + hlog2)); \
}
#define FNS(opt1, opt2) \
-FN(64, 64, 16, 6, 6, opt1, (int64_t)); \
-FN(64, 32, 16, 6, 5, opt1, (int64_t)); \
-FN(32, 64, 16, 5, 6, opt1, (int64_t)); \
-FN(32, 32, 16, 5, 5, opt1, (int64_t)); \
-FN(32, 16, 16, 5, 4, opt1, (int64_t)); \
-FN(16, 32, 16, 4, 5, opt1, (int64_t)); \
-FN(16, 16, 16, 4, 4, opt1, (uint32_t)); \
-FN(16, 8, 16, 4, 3, opt1, (uint32_t)); \
-FN(8, 16, 8, 3, 4, opt1, (uint32_t)); \
-FN(8, 8, 8, 3, 3, opt1, (uint32_t)); \
-FN(8, 4, 8, 3, 2, opt1, (uint32_t)); \
-FN(4, 8, 4, 2, 3, opt2, (uint32_t)); \
-FN(4, 4, 4, 2, 2, opt2, (uint32_t))
+FN(64, 64, 16, 6, 6, opt1, (int64_t), (int64_t)); \
+FN(64, 32, 16, 6, 5, opt1, (int64_t), (int64_t)); \
+FN(32, 64, 16, 5, 6, opt1, (int64_t), (int64_t)); \
+FN(32, 32, 16, 5, 5, opt1, (int64_t), (int64_t)); \
+FN(32, 16, 16, 5, 4, opt1, (int64_t), (int64_t)); \
+FN(16, 32, 16, 4, 5, opt1, (int64_t), (int64_t)); \
+FN(16, 16, 16, 4, 4, opt1, (uint32_t), (int64_t)); \
+FN(16, 8, 16, 4, 3, opt1, (int32_t), (int32_t)); \
+FN(8, 16, 8, 3, 4, opt1, (int32_t), (int32_t)); \
+FN(8, 8, 8, 3, 3, opt1, (int32_t), (int32_t)); \
+FN(8, 4, 8, 3, 2, opt1, (int32_t), (int32_t)); \
+FN(4, 8, 4, 2, 3, opt2, (int32_t), (int32_t)); \
+FN(4, 4, 4, 2, 2, opt2, (int32_t), (int32_t))
FNS(sse2, sse);
FNS(ssse3, ssse3);
@@ -410,7 +410,7 @@
#undef DECL
#undef DECLS
-#define FN(w, h, wf, wlog2, hlog2, opt, cast) \
+#define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \
unsigned int vpx_sub_pixel_avg_variance##w##x##h##_##opt(const uint8_t *src, \
int src_stride, \
int x_offset, \
@@ -451,23 +451,23 @@
} \
} \
*sseptr = sse; \
- return sse - ((cast se * se) >> (wlog2 + hlog2)); \
+ return sse - (cast_prod (cast se * se) >> (wlog2 + hlog2)); \
}
#define FNS(opt1, opt2) \
-FN(64, 64, 16, 6, 6, opt1, (int64_t)); \
-FN(64, 32, 16, 6, 5, opt1, (int64_t)); \
-FN(32, 64, 16, 5, 6, opt1, (int64_t)); \
-FN(32, 32, 16, 5, 5, opt1, (int64_t)); \
-FN(32, 16, 16, 5, 4, opt1, (int64_t)); \
-FN(16, 32, 16, 4, 5, opt1, (int64_t)); \
-FN(16, 16, 16, 4, 4, opt1, (uint32_t)); \
-FN(16, 8, 16, 4, 3, opt1, (uint32_t)); \
-FN(8, 16, 8, 3, 4, opt1, (uint32_t)); \
-FN(8, 8, 8, 3, 3, opt1, (uint32_t)); \
-FN(8, 4, 8, 3, 2, opt1, (uint32_t)); \
-FN(4, 8, 4, 2, 3, opt2, (uint32_t)); \
-FN(4, 4, 4, 2, 2, opt2, (uint32_t))
+FN(64, 64, 16, 6, 6, opt1, (int64_t), (int64_t)); \
+FN(64, 32, 16, 6, 5, opt1, (int64_t), (int64_t)); \
+FN(32, 64, 16, 5, 6, opt1, (int64_t), (int64_t)); \
+FN(32, 32, 16, 5, 5, opt1, (int64_t), (int64_t)); \
+FN(32, 16, 16, 5, 4, opt1, (int64_t), (int64_t)); \
+FN(16, 32, 16, 4, 5, opt1, (int64_t), (int64_t)); \
+FN(16, 16, 16, 4, 4, opt1, (uint32_t), (int64_t)); \
+FN(16, 8, 16, 4, 3, opt1, (uint32_t), (int32_t)); \
+FN(8, 16, 8, 3, 4, opt1, (uint32_t), (int32_t)); \
+FN(8, 8, 8, 3, 3, opt1, (uint32_t), (int32_t)); \
+FN(8, 4, 8, 3, 2, opt1, (uint32_t), (int32_t)); \
+FN(4, 8, 4, 2, 3, opt2, (uint32_t), (int32_t)); \
+FN(4, 4, 4, 2, 2, opt2, (uint32_t), (int32_t))
FNS(sse2, sse);
FNS(ssse3, ssse3);