Merge "vpxenc: add --aq-mode flag to control adaptive quantization"
diff --git a/.mailmap b/.mailmap
index ba1279b..fb82a24 100644
--- a/.mailmap
+++ b/.mailmap
@@ -1,8 +1,18 @@
Adrian Grange <agrange@google.com>
+Alexis Ballier <aballier@gentoo.org> <alexis.ballier@gmail.com>
+Hangyu Kuang <hkuang@google.com>
+Jim Bankoski <jimbankoski@google.com>
+John Koleszar <jkoleszar@google.com>
Johann Koenig <johannkoenig@google.com>
+Johann Koenig <johannkoenig@google.com> <johann.koenig@duck.com>
+Johann Koenig <johannkoenig@google.com> <johannkoenig@dhcp-172-19-7-52.mtv.corp.google.com>
+Pascal Massimino <pascal.massimino@gmail.com>
+Sami Pietilä <samipietila@google.com>
Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com>
+Timothy B. Terriberry <tterribe@xiph.org> Tim Terriberry <tterriberry@mozilla.com>
Tom Finegan <tomfinegan@google.com>
Ralph Giles <giles@xiph.org> <giles@entropywave.com>
Ralph Giles <giles@xiph.org> <giles@mozilla.com>
Alpha Lam <hclam@google.com> <hclam@chromium.org>
Deb Mukherjee <debargha@google.com>
+Yaowu Xu <yaowu@google.com> <yaowu@xuyaowu.com>
diff --git a/AUTHORS b/AUTHORS
index 0937d5d..a9aa481 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -2,62 +2,97 @@
# by tools/gen_authors.sh.
Aaron Watry <awatry@gmail.com>
+Abo Talib Mahfoodh <ab.mahfoodh@gmail.com>
Adrian Grange <agrange@google.com>
+Ahmad Sharif <asharif@google.com>
+Alexander Voronov <avoronov@graphics.cs.msu.ru>
Alex Converse <alex.converse@gmail.com>
Alexis Ballier <aballier@gentoo.org>
Alok Ahuja <waveletcoeff@gmail.com>
Alpha Lam <hclam@google.com>
+A.Mahfoodh <ab.mahfoodh@gmail.com>
+Ami Fischman <fischman@chromium.org>
Andoni Morales Alastruey <ylatuya@gmail.com>
Andres Mejia <mcitadel@gmail.com>
Aron Rosenberg <arosenberg@logitech.com>
Attila Nagy <attilanagy@google.com>
+changjun.yang <changjun.yang@intel.com>
+chm <chm@rock-chips.com>
+Christian Duvivier <cduvivier@google.com>
+Daniel Kang <ddkang@google.com>
Deb Mukherjee <debargha@google.com>
+Dmitry Kovalev <dkovalev@google.com>
+Dragan Mrdjan <dmrdjan@mips.com>
+Erik Niemeyer <erik.a.niemeyer@gmail.com>
Fabio Pedretti <fabio.ped@libero.it>
Frank Galligan <fgalligan@google.com>
Fredrik Söderquist <fs@opera.com>
Fritz Koenig <frkoenig@google.com>
Gaute Strokkenes <gaute.strokkenes@broadcom.com>
Giuseppe Scrivano <gscrivano@gnu.org>
+Guillaume Martres <gmartres@google.com>
Guillermo Ballester Valor <gbvalor@gmail.com>
+Hangyu Kuang <hkuang@google.com>
Henrik Lundin <hlundin@google.com>
+Hui Su <huisu@google.com>
+Ivan Maltz <ivanmaltz@google.com>
James Berry <jamesberry@google.com>
James Zern <jzern@google.com>
Jan Kratochvil <jan.kratochvil@redhat.com>
+Janne Salonen <jsalonen@google.com>
Jeff Faust <jfaust@google.com>
Jeff Muizelaar <jmuizelaar@mozilla.com>
+Jeff Petkau <jpet@chromium.org>
Jim Bankoski <jimbankoski@google.com>
+Jingning Han <jingning@google.com>
Johann Koenig <johannkoenig@google.com>
John Koleszar <jkoleszar@google.com>
Joshua Bleecher Snyder <josh@treelinelabs.com>
+Joshua Litt <joshualitt@google.com>
Justin Clift <justin@salasaga.org>
Justin Lebar <justin.lebar@gmail.com>
KO Myung-Hun <komh@chollian.net>
Lou Quillio <louquillio@google.com>
Luca Barbato <lu_zero@gentoo.org>
Makoto Kato <makoto.kt@gmail.com>
+Mans Rullgard <mans@mansr.com>
Marco Paniconi <marpan@google.com>
+Mark Mentovai <mark@chromium.org>
Martin Ettl <ettl.martin78@googlemail.com>
+Martin Storsjo <martin@martin.st>
+Matthew Heaney <matthewjheaney@chromium.org>
Michael Kohler <michaelkohler@live.com>
+Mike Frysinger <vapier@chromium.org>
Mike Hommey <mhommey@mozilla.com>
Mikhal Shemer <mikhal@google.com>
+Morton Jonuschat <yabawock@gmail.com>
+Parag Salasakar <img.mips1@gmail.com>
Pascal Massimino <pascal.massimino@gmail.com>
Patrik Westin <patrik.westin@gmail.com>
Paul Wilkins <paulwilkins@google.com>
Pavol Rusnak <stick@gk2.sk>
+Paweł Hajdan <phajdan@google.com>
Philip Jägenstedt <philipj@opera.com>
Priit Laes <plaes@plaes.org>
Rafael Ávila de Espíndola <rafael.espindola@gmail.com>
Rafaël Carré <funman@videolan.org>
Ralph Giles <giles@xiph.org>
+Rob Bradford <rob@linux.intel.com>
Ronald S. Bultje <rbultje@google.com>
+Sami Pietilä <samipietila@google.com>
+Scott Graham <scottmg@chromium.org>
Scott LaVarnway <slavarnway@google.com>
+Shimon Doodkin <helpmepro1@gmail.com>
Stefan Holmer <holmer@google.com>
+Suman Sunkara <sunkaras@google.com>
Taekhyun Kim <takim@nvidia.com>
Takanori MATSUURA <t.matsuu@gmail.com>
+Tamar Levy <tamar.levy@intel.com>
Tero Rintaluoma <teror@google.com>
Thijs Vermeir <thijsvermeir@gmail.com>
Timothy B. Terriberry <tterribe@xiph.org>
Tom Finegan <tomfinegan@google.com>
+Vignesh Venkatasubramanian <vigneshv@google.com>
Yaowu Xu <yaowu@google.com>
Yunqing Wang <yunqingwang@google.com>
Google Inc.
diff --git a/CHANGELOG b/CHANGELOG
index ef64a96..97c9a7b 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,53 @@
+2013-11-15 v1.3.0 "Forest"
+ This release introduces the VP9 codec in a backward-compatible way.
+ All existing users of VP8 can continue to use the library without
+ modification. However, some VP8 options do not map to VP9 in the same manner.
+
+ The VP9 encoder in this release is not feature complete. Users interested in
+ the encoder are advised to use the git master branch and discuss issues on
+ libvpx mailing lists.
+
+ - Upgrading:
+ This release is ABI and API compatible with Duclair (v1.0.0). Users
+ of older releases should refer to the Upgrading notes in this document
+ for that release.
+
+ - Enhancements:
+ Get rid of bashisms in the main build scripts
+ Added usage info on command line options
+ Add lossless compression mode
+ Dll build of libvpx
+ Add additional Mac OS X targets: 10.7, 10.8 and 10.9 (darwin11-13)
+ Add option to disable documentation
+ configure: add --enable-external-build support
+ make: support V=1 as short form of verbose=yes
+ configure: support mingw-w64
+ configure: support hardfloat armv7 CHOSTS
+ configure: add support for android x86
+ Add estimated completion time to vpxenc
+ Don't exit on decode errors in vpxenc
+ vpxenc: support scaling prior to encoding
+ vpxdec: support scaling output
+ vpxenc: improve progress indicators with --skip
+ msvs: Don't link to winmm.lib
+ Add a new script for producing vcxproj files
+ Produce Visual Studio 10 and 11 project files
+ Produce Windows Phone project files
+ msvs-build: use msbuild for vs >= 2005
+ configure: default configure log to config.log
+ Add encoding option --static-thresh
+
+ - Speed:
+ Miscellaneous speed optimizations for VP8 and VP9.
+
+ - Quality:
+ In general, quality is consistent with the Eider release.
+
+ - Bug Fixes:
+ This release represents approximately a year of engineering effort,
+ and contains multiple bug fixes. Please refer to git history for details.
+
+
2012-12-21 v1.2.0
This release acts as a checkpoint for a large amount of internal refactoring
and testing. It also contains a number of small bugfixes, so all users are
diff --git a/README b/README
index d7cb11a..ce9c1c6 100644
--- a/README
+++ b/README
@@ -64,6 +64,7 @@
armv7-linux-gcc
armv7-none-rvct
armv7-win32-vs11
+ armv7-win32-vs12
mips32-linux-gcc
ppc32-darwin8-gcc
ppc32-darwin9-gcc
@@ -91,6 +92,7 @@
x86-win32-vs9
x86-win32-vs10
x86-win32-vs11
+ x86-win32-vs12
x86_64-darwin9-gcc
x86_64-darwin10-gcc
x86_64-darwin11-gcc
@@ -104,6 +106,7 @@
x86_64-win64-vs9
x86_64-win64-vs10
x86_64-win64-vs11
+ x86_64-win64-vs12
universal-darwin8-gcc
universal-darwin9-gcc
universal-darwin10-gcc
diff --git a/build/make/ads2gas_apple.pl b/build/make/ads2gas_apple.pl
index 51e6fbc..befb3db 100755
--- a/build/make/ads2gas_apple.pl
+++ b/build/make/ads2gas_apple.pl
@@ -17,6 +17,13 @@
#
# Usage: cat inputfile | perl ads2gas_apple.pl > outputfile
#
+
+my $chromium = 0;
+
+foreach my $arg (@ARGV) {
+ $chromium = 1 if ($arg eq "-chromium");
+}
+
print "@ This file was created from a .asm file\n";
print "@ using the ads2gas_apple.pl script.\n\n";
print "\t.set WIDE_REFERENCE, 0\n";
@@ -47,7 +54,7 @@
s/@/,:/g;
# Comment character
- s/;/@/g;
+ s/;/ @/g;
# Hexadecimal constants prefaced by 0x
s/#&/#0x/g;
@@ -188,7 +195,7 @@
$trimmed =~ s/,//g;
# string to array
- @incoming_array = split(/ /, $trimmed);
+ @incoming_array = split(/\s+/, $trimmed);
print ".macro @incoming_array[0]\n";
@@ -210,5 +217,19 @@
# s/\$/\\/g; # End macro definition
s/MEND/.endm/; # No need to tell it where to stop assembling
next if /^\s*END\s*$/;
+
+ # Clang used by Chromium differs slightly from clang in XCode in what it
+ # will accept in the assembly.
+ if ($chromium) {
+ s/qsubaddx/qsax/i;
+ s/qaddsubx/qasx/i;
+ s/ldrneb/ldrbne/i;
+ s/ldrneh/ldrhne/i;
+ s/(vqshrun\.s16 .*, \#)0$/${1}8/i;
+
+ # http://llvm.org/bugs/show_bug.cgi?id=16022
+ s/\.include/#include/;
+ }
+
print;
}
diff --git a/build/make/configure.sh b/build/make/configure.sh
index b43a4ec..8dcb9bb 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -925,41 +925,26 @@
;;
darwin*)
- if [ -z "${sdk_path}" ]; then
- SDK_PATH=`xcode-select -print-path 2> /dev/null`
- SDK_PATH=${SDK_PATH}/Platforms/iPhoneOS.platform/Developer
- else
- SDK_PATH=${sdk_path}
- fi
- TOOLCHAIN_PATH=${SDK_PATH}/usr/bin
- CXX=${TOOLCHAIN_PATH}/g++
- CC=${TOOLCHAIN_PATH}/gcc
- AR=${TOOLCHAIN_PATH}/ar
- LD=${TOOLCHAIN_PATH}/arm-apple-darwin10-llvm-gcc-4.2
- AS=${TOOLCHAIN_PATH}/as
- STRIP=${TOOLCHAIN_PATH}/strip
- NM=${TOOLCHAIN_PATH}/nm
+
+ XCRUN_FIND="xcrun --sdk iphoneos -find"
+ CXX="$(${XCRUN_FIND} clang++)"
+ CC="$(${XCRUN_FIND} clang)"
+ AR="$(${XCRUN_FIND} ar)"
+ LD="$(${XCRUN_FIND} ld)"
+ AS="$(${XCRUN_FIND} as)"
+ STRIP="$(${XCRUN_FIND} strip)"
+ NM="$(${XCRUN_FIND} nm)"
+ RANLIB="$(${XCRUN_FIND} ranlib)"
AS_SFX=.s
# ASFLAGS is written here instead of using check_add_asflags
# because we need to overwrite all of ASFLAGS and purge the
# options that were put in above
- ASFLAGS="-version -arch ${tgt_isa} -g"
+ ASFLAGS="-arch ${tgt_isa} -g"
- add_cflags -arch ${tgt_isa}
- add_ldflags -arch_only ${tgt_isa}
-
- if [ -z "${alt_libc}" ]; then
- alt_libc=${SDK_PATH}/SDKs/iPhoneOS6.0.sdk
- fi
-
- add_cflags "-isysroot ${alt_libc}"
-
- # Add the paths for the alternate libc
- for d in usr/include; do
- try_dir="${alt_libc}/${d}"
- [ -d "${try_dir}" ] && add_cflags -I"${try_dir}"
- done
+ alt_libc="$(xcrun --sdk iphoneos --show-sdk-path)"
+ add_cflags -arch ${tgt_isa} -isysroot ${alt_libc}
+ add_ldflags -arch ${tgt_isa} -ios_version_min 7.0
for d in lib usr/lib usr/lib/system; do
try_dir="${alt_libc}/${d}"
@@ -1093,7 +1078,7 @@
msvs_arch_dir=x86-msvs
vc_version=${tgt_cc##vs}
case $vc_version in
- 7|8|9)
+ 7|8|9|10)
echo "${tgt_cc} does not support avx/avx2, disabling....."
RTCD_OPTIONS="${RTCD_OPTIONS}--disable-avx --disable-avx2 "
soft_disable avx
diff --git a/build/make/gen_msvs_sln.sh b/build/make/gen_msvs_sln.sh
index 0c269b1..ffa3706 100755
--- a/build/make/gen_msvs_sln.sh
+++ b/build/make/gen_msvs_sln.sh
@@ -255,7 +255,7 @@
;;
--ver=*) vs_ver="$optval"
case $optval in
- [789]|10|11)
+ [789]|10|11|12)
;;
*) die Unrecognized Visual Studio Version in $opt
;;
@@ -297,12 +297,15 @@
11) sln_vers="12.00"
sln_vers_str="Visual Studio 2012"
;;
+ 12) sln_vers="12.00"
+ sln_vers_str="Visual Studio 2013"
+ ;;
esac
case "${vs_ver:-8}" in
[789])
sfx=vcproj
;;
- 10|11)
+ 10|11|12)
sfx=vcxproj
;;
esac
diff --git a/build/make/gen_msvs_vcxproj.sh b/build/make/gen_msvs_vcxproj.sh
index 4875915..359157c 100755
--- a/build/make/gen_msvs_vcxproj.sh
+++ b/build/make/gen_msvs_vcxproj.sh
@@ -33,7 +33,7 @@
--name=project_name Name of the project (required)
--proj-guid=GUID GUID to use for the project
--module-def=filename File containing export definitions (for DLLs)
- --ver=version Version (10,11) of visual studio to generate for
+ --ver=version Version (10,11,12) of visual studio to generate for
--src-path-bare=dir Path to root of source tree
-Ipath/to/include Additional include directories
-DFLAG[=value] Preprocessor macros to define
@@ -228,7 +228,7 @@
--ver=*)
vs_ver="$optval"
case "$optval" in
- 10|11)
+ 10|11|12)
;;
*) die Unrecognized Visual Studio Version in $opt
;;
@@ -269,7 +269,7 @@
asm_use_custom_step=false
uses_asm=${uses_asm:-false}
case "${vs_ver:-11}" in
- 10|11)
+ 10|11|12)
asm_use_custom_step=$uses_asm
;;
esac
@@ -383,6 +383,20 @@
tag_content PlatformToolset v110
fi
fi
+ if [ "$vs_ver" = "12" ]; then
+ if [ "$plat" = "ARM" ]; then
+ # Setting the wp80 toolchain automatically sets the
+ # WINAPI_FAMILY define, which is required for building
+ # code for arm with the windows headers. Alternatively,
+ # one could add AppContainerApplication=true in the Globals
+ # section and add PrecompiledHeader=NotUsing and
+ # CompileAsWinRT=false in ClCompile and SubSystem=Console
+ # in Link.
+ tag_content PlatformToolset v120_wp80
+ else
+ tag_content PlatformToolset v120
+ fi
+ fi
tag_content CharacterSet Unicode
if [ "$config" = "Release" ]; then
tag_content WholeProgramOptimization true
diff --git a/configure b/configure
index 729c986..f9454ba 100755
--- a/configure
+++ b/configure
@@ -24,9 +24,10 @@
${toggle_examples} examples
${toggle_docs} documentation
${toggle_unit_tests} unit tests
+ ${toggle_decode_perf_tests} build decoder perf tests with unit tests
--libc=PATH path to alternate libc
--as={yasm|nasm|auto} use specified assembler [auto, yasm preferred]
- --sdk-path=PATH path to root of sdk (iOS, android builds only)
+ --sdk-path=PATH path to root of sdk (android builds only)
${toggle_fast_unaligned} don't use unaligned accesses, even when
supported by hardware [auto]
${toggle_codec_srcs} in/exclude codec library source code
@@ -100,6 +101,7 @@
all_platforms="${all_platforms} armv7-linux-gcc" #neon Cortex-A8
all_platforms="${all_platforms} armv7-none-rvct" #neon Cortex-A8
all_platforms="${all_platforms} armv7-win32-vs11"
+all_platforms="${all_platforms} armv7-win32-vs12"
all_platforms="${all_platforms} mips32-linux-gcc"
all_platforms="${all_platforms} ppc32-darwin8-gcc"
all_platforms="${all_platforms} ppc32-darwin9-gcc"
@@ -127,6 +129,7 @@
all_platforms="${all_platforms} x86-win32-vs9"
all_platforms="${all_platforms} x86-win32-vs10"
all_platforms="${all_platforms} x86-win32-vs11"
+all_platforms="${all_platforms} x86-win32-vs12"
all_platforms="${all_platforms} x86_64-darwin9-gcc"
all_platforms="${all_platforms} x86_64-darwin10-gcc"
all_platforms="${all_platforms} x86_64-darwin11-gcc"
@@ -140,6 +143,7 @@
all_platforms="${all_platforms} x86_64-win64-vs9"
all_platforms="${all_platforms} x86_64-win64-vs10"
all_platforms="${all_platforms} x86_64-win64-vs11"
+all_platforms="${all_platforms} x86_64-win64-vs12"
all_platforms="${all_platforms} universal-darwin8-gcc"
all_platforms="${all_platforms} universal-darwin9-gcc"
all_platforms="${all_platforms} universal-darwin10-gcc"
@@ -299,6 +303,7 @@
postproc_visualizer
os_support
unit_tests
+ decode_perf_tests
multi_res_encoding
temporal_denoising
experimental
@@ -352,6 +357,7 @@
small
postproc_visualizer
unit_tests
+ decode_perf_tests
multi_res_encoding
temporal_denoising
experimental
@@ -670,7 +676,7 @@
VCPROJ_SFX=vcproj
gen_vcproj_cmd=${source_path}/build/make/gen_msvs_proj.sh
;;
- 10|11)
+ 10|11|12)
VCPROJ_SFX=vcxproj
gen_vcproj_cmd=${source_path}/build/make/gen_msvs_vcxproj.sh
;;
diff --git a/examples.mk b/examples.mk
index 16f3c8f..36d20df 100644
--- a/examples.mk
+++ b/examples.mk
@@ -23,7 +23,9 @@
vpxdec.SRCS += vpx_ports/vpx_timer.h
vpxdec.SRCS += vpx/vpx_integer.h
vpxdec.SRCS += args.c args.h
+vpxdec.SRCS += ivfdec.c ivfdec.h
vpxdec.SRCS += tools_common.c tools_common.h
+vpxdec.SRCS += webmdec.c webmdec.h
vpxdec.SRCS += nestegg/halloc/halloc.h
vpxdec.SRCS += nestegg/halloc/src/align.h
vpxdec.SRCS += nestegg/halloc/src/halloc.c
@@ -36,6 +38,8 @@
vpxdec.DESCRIPTION = Full featured decoder
UTILS-$(CONFIG_ENCODERS) += vpxenc.c
vpxenc.SRCS += args.c args.h y4minput.c y4minput.h
+vpxenc.SRCS += ivfdec.c ivfdec.h
+vpxenc.SRCS += ivfenc.c ivfenc.h
vpxenc.SRCS += tools_common.c tools_common.h
vpxenc.SRCS += webmenc.c webmenc.h
vpxenc.SRCS += vpx_ports/mem_ops.h
@@ -53,18 +57,11 @@
vp8_scalable_patterns.DESCRIPTION = Temporal Scalability Encoder
UTILS-$(CONFIG_VP9_ENCODER) += vp9_spatial_scalable_encoder.c
vp9_spatial_scalable_encoder.SRCS += args.c args.h
+vp9_spatial_scalable_encoder.SRCS += ivfenc.c ivfenc.h
+vp9_spatial_scalable_encoder.SRCS += tools_common.c tools_common.h
vp9_spatial_scalable_encoder.GUID = 4A38598D-627D-4505-9C7B-D4020C84100D
vp9_spatial_scalable_encoder.DESCRIPTION = Spatial Scalable Encoder
-# Clean up old ivfenc, ivfdec binaries.
-ifeq ($(CONFIG_MSVS),yes)
-CLEAN-OBJS += $(foreach p,$(VS_PLATFORMS),$(p)/Release/ivfenc.exe)
-CLEAN-OBJS += $(foreach p,$(VS_PLATFORMS),$(p)/Release/ivfdec.exe)
-else
-CLEAN-OBJS += ivfenc{.c.o,.c.d,.dox,.exe,}
-CLEAN-OBJS += ivfdec{.c.o,.c.d,.dox,.exe,}
-endif
-
# XMA example disabled for now, not used in VP8
#UTILS-$(CONFIG_DECODERS) += example_xma.c
#example_xma.GUID = A955FC4A-73F1-44F7-135E-30D84D32F022
diff --git a/ivfdec.c b/ivfdec.c
new file mode 100644
index 0000000..4a0816f
--- /dev/null
+++ b/ivfdec.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./ivfdec.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int file_is_ivf(struct VpxInputContext *input_ctx) {
+ char raw_hdr[32];
+ int is_ivf = 0;
+
+ // TODO(tomfinegan): This can eventually go away, but for now it's required
+ // because the means by which file types are detected differ in vpxdec and
+ // vpxenc.
+ rewind(input_ctx->file);
+
+ if (fread(raw_hdr, 1, 32, input_ctx->file) == 32) {
+ if (raw_hdr[0] == 'D' && raw_hdr[1] == 'K' &&
+ raw_hdr[2] == 'I' && raw_hdr[3] == 'F') {
+ is_ivf = 1;
+
+ if (mem_get_le16(raw_hdr + 4) != 0) {
+ fprintf(stderr, "Error: Unrecognized IVF version! This file may not"
+ " decode properly.");
+ }
+
+ input_ctx->fourcc = mem_get_le32(raw_hdr + 8);
+ input_ctx->width = mem_get_le16(raw_hdr + 12);
+ input_ctx->height = mem_get_le16(raw_hdr + 14);
+ input_ctx->framerate.numerator = mem_get_le32(raw_hdr + 16);
+ input_ctx->framerate.denominator = mem_get_le32(raw_hdr + 20);
+
+ /* Some versions of vpxenc used 1/(2*fps) for the timebase, so
+ * we can guess the framerate using only the timebase in this
+ * case. Other files would require reading ahead to guess the
+ * timebase, like we do for webm.
+ */
+ if (input_ctx->framerate.numerator < 1000) {
+ /* Correct for the factor of 2 applied to the timebase in the
+ * encoder.
+ */
+ if (input_ctx->framerate.numerator & 1)
+ input_ctx->framerate.denominator <<= 1;
+ else
+ input_ctx->framerate.numerator >>= 1;
+ } else {
+ /* Don't know FPS for sure, and don't have readahead code
+ * (yet?), so just default to 30fps.
+ */
+ input_ctx->framerate.numerator = 30;
+ input_ctx->framerate.denominator = 1;
+ }
+ }
+ }
+
+ if (!is_ivf) {
+ rewind(input_ctx->file);
+ input_ctx->detect.buf_read = 0;
+ } else {
+ input_ctx->detect.position = 4;
+ }
+ return is_ivf;
+}
+
+int ivf_read_frame(struct VpxInputContext *input_ctx,
+ uint8_t **buffer,
+ size_t *bytes_read,
+ size_t *buffer_size) {
+ char raw_header[IVF_FRAME_HDR_SZ] = {0};
+ size_t frame_size = 0;
+ FILE *infile = input_ctx->file;
+
+ if (input_ctx->file_type != FILE_TYPE_IVF)
+ return 0;
+
+ if (fread(raw_header, IVF_FRAME_HDR_SZ, 1, infile) != 1) {
+ if (!feof(infile))
+ warn("Failed to read frame size\n");
+ } else {
+ frame_size = mem_get_le32(raw_header);
+
+ if (frame_size > 256 * 1024 * 1024) {
+ warn("Read invalid frame size (%u)\n", (unsigned int)frame_size);
+ frame_size = 0;
+ }
+
+ if (frame_size > *buffer_size) {
+ uint8_t *new_buffer = realloc(*buffer, 2 * frame_size);
+
+ if (new_buffer) {
+ *buffer = new_buffer;
+ *buffer_size = 2 * frame_size;
+ } else {
+ warn("Failed to allocate compressed data buffer\n");
+ frame_size = 0;
+ }
+ }
+ }
+
+ if (!feof(infile)) {
+ if (fread(*buffer, 1, frame_size, infile) != frame_size) {
+ warn("Failed to read full frame\n");
+ return 1;
+ }
+
+ *bytes_read = frame_size;
+ return 0;
+ }
+
+ return 1;
+}
diff --git a/ivfdec.h b/ivfdec.h
new file mode 100644
index 0000000..b1468a9
--- /dev/null
+++ b/ivfdec.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef IVFDEC_H_
+#define IVFDEC_H_
+
+#include "./tools_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int file_is_ivf(struct VpxInputContext *input);
+
+int ivf_read_frame(struct VpxInputContext *input,
+ uint8_t **buffer,
+ size_t *bytes_read,
+ size_t *buffer_size);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* IVFDEC_H_ */
diff --git a/ivfenc.c b/ivfenc.c
new file mode 100644
index 0000000..fa92566
--- /dev/null
+++ b/ivfenc.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./ivfenc.h"
+
+#include "./tools_common.h"
+#include "vpx/vpx_encoder.h"
+#include "vpx_ports/mem_ops.h"
+
+void ivf_write_file_header(FILE *outfile,
+ const struct vpx_codec_enc_cfg *cfg,
+ unsigned int fourcc,
+ int frame_cnt) {
+ char header[32];
+
+ if (cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS)
+ return;
+
+ header[0] = 'D';
+ header[1] = 'K';
+ header[2] = 'I';
+ header[3] = 'F';
+ mem_put_le16(header + 4, 0); /* version */
+ mem_put_le16(header + 6, 32); /* headersize */
+ mem_put_le32(header + 8, fourcc); /* four CC */
+ mem_put_le16(header + 12, cfg->g_w); /* width */
+ mem_put_le16(header + 14, cfg->g_h); /* height */
+ mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */
+ mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */
+ mem_put_le32(header + 24, frame_cnt); /* length */
+ mem_put_le32(header + 28, 0); /* unused */
+
+ (void) fwrite(header, 1, 32, outfile);
+}
+
+void ivf_write_frame_header(FILE *outfile, const struct vpx_codec_cx_pkt *pkt) {
+ char header[12];
+ vpx_codec_pts_t pts;
+
+ if (pkt->kind != VPX_CODEC_CX_FRAME_PKT)
+ return;
+
+ pts = pkt->data.frame.pts;
+ mem_put_le32(header, (int)pkt->data.frame.sz);
+ mem_put_le32(header + 4, pts & 0xFFFFFFFF);
+ mem_put_le32(header + 8, pts >> 32);
+
+ (void) fwrite(header, 1, 12, outfile);
+}
+
+void ivf_write_frame_size(FILE *outfile, size_t size) {
+ char header[4];
+ mem_put_le32(header, (int)size);
+ (void) fwrite(header, 1, 4, outfile);
+}
diff --git a/ivfenc.h b/ivfenc.h
new file mode 100644
index 0000000..a332c7d
--- /dev/null
+++ b/ivfenc.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef IVFENC_H_
+#define IVFENC_H_
+
+#include "./tools_common.h"
+
+struct vpx_codec_enc_cfg;
+struct vpx_codec_cx_pkt;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void ivf_write_file_header(FILE *outfile,
+ const struct vpx_codec_enc_cfg *cfg,
+ uint32_t fourcc,
+ int frame_cnt);
+void ivf_write_frame_header(FILE *outfile, const struct vpx_codec_cx_pkt *pkt);
+void ivf_write_frame_size(FILE *outfile, size_t size);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* IVFENC_H_ */
diff --git a/test/android/Android.mk b/test/android/Android.mk
index 8d8ce16..13af601 100644
--- a/test/android/Android.mk
+++ b/test/android/Android.mk
@@ -10,9 +10,6 @@
# The test app itself runs on the command line through adb shell
# The paths are really messed up as the libvpx make file
# expects to be made from a parent directory.
-# TODO(joshualitt)
-# Fix android make files so they can be built from anywhere, will require
-# changing the libvpx make file and this one.
CUR_WD := $(call my-dir)
BINDINGS_DIR := $(CUR_WD)/../../..
LOCAL_PATH := $(CUR_WD)/../../..
@@ -20,12 +17,11 @@
#libvpx
include $(CLEAR_VARS)
include $(BINDINGS_DIR)/libvpx/build/make/Android.mk
-# Restore path
-# TODO joshualitt Fix makefiles so this is no longer needed
LOCAL_PATH := $(CUR_WD)/../..
#libgtest
include $(CLEAR_VARS)
+LOCAL_ARM_MODE := arm
LOCAL_CPP_EXTENSION := .cc
LOCAL_MODULE := gtest
LOCAL_C_INCLUDES := $(LOCAL_PATH)/third_party/googletest/src/
@@ -33,31 +29,14 @@
LOCAL_SRC_FILES := ./third_party/googletest/src/src/gtest-all.cc
include $(BUILD_STATIC_LIBRARY)
-#libnestegg
-include $(CLEAR_VARS)
-LOCAL_CPP_EXTENSION := .cc
-LOCAL_MODULE := nestegg
-NESTEGG_PATH := $(LOCAL_PATH)/nestegg
-LOCAL_C_INCLUDES := $(NESTEGG_PATH)/include
-LOCAL_C_INCLUDES += $(LOCAL_PATH)/
-LOCAL_C_INCLUDES += $(NESTEGG_PATH)/halloc/
-LOCAL_SRC_FILES := ./nestegg/halloc/src/halloc.c
-LOCAL_SRC_FILES += ./nestegg/src/nestegg.c
-include $(BUILD_STATIC_LIBRARY)
-
#libvpx_test
include $(CLEAR_VARS)
+LOCAL_ARM_MODE := arm
LOCAL_MODULE := libvpx_test
LOCAL_STATIC_LIBRARIES := gtest
-LOCAL_STATIC_LIBRARIES += nestegg
-LOCAL_STATIC_LIBRARIES += cpufeatures
LOCAL_SHARED_LIBRARIES := vpx
-LOCAL_C_INCLUDES := $(LOCAL_PATH)/
-LOCAL_C_INCLUDES += $(BINDINGS_DIR)/
-LOCAL_C_INCLUDES += $(LOCAL_PATH)/third_party/googletest/src/include
-LOCAL_SRC_FILES := ./args.c
-LOCAL_SRC_FILES += ./md5_utils.c
-LOCAL_SRC_FILES += ./test/decode_test_driver.cc
-LOCAL_SRC_FILES += ./test/test_libvpx.cc
-LOCAL_SRC_FILES += ./test/test_vector_test.cc
+include $(LOCAL_PATH)/test/test.mk
+LOCAL_C_INCLUDES := $(BINDINGS_DIR)
+FILTERED_SRC := $(sort $(filter %.cc %.c, $(LIBVPX_TEST_SRCS-yes)))
+LOCAL_SRC_FILES := $(addprefix ./test/, $(FILTERED_SRC))
include $(BUILD_EXECUTABLE)
diff --git a/test/android/README b/test/android/README
index 8bc1569..6840d91 100644
--- a/test/android/README
+++ b/test/android/README
@@ -1,23 +1,31 @@
Android.mk will build vpx unittests on android.
-1) configure libvpx from the parent directory:
-./libvpx/configure --target=armv7-android-gcc --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --disable-runtime-cpu-detect --sdk=$NDK
+1) Configure libvpx from the parent directory:
+./libvpx/configure --target=armv7-android-gcc --enable-external-build \
+ --enable-postproc --disable-install-srcs --enable-multi-res-encoding \
+ --enable-temporal-denoising --disable-unit-tests --disable-install-docs \
+ --disable-examples --disable-runtime-cpu-detect --sdk=$NDK
-2) from the parent directory, invoke ndk-build:
-NDK_PROJECT_PATH=. ndk-build APP_BUILD_SCRIPT=./libvpx/test/android/Android.mk APP_ABI=armeabi-v7a APP_PLATFORM=android-18 APP_OPTIM=release APP_STL=gnustl_static APP_CPPFLAGS=-frtti
+2) From the parent directory, invoke ndk-build:
+NDK_PROJECT_PATH=. ndk-build APP_BUILD_SCRIPT=./libvpx/test/android/Android.mk \
+ APP_ABI=armeabi-v7a APP_PLATFORM=android-18 APP_OPTIM=release \
+ APP_STL=gnustl_static
+
+Note: Both adb and ndk-build are available prebuilt at:
+ https://chromium.googlesource.com/android_tools
3) Run get_files.py to download the test files:
-python get_files.py -i /path/to/test-data.sha1 -o /path/to/put/files -u http://libvpx-test-file-url
+python get_files.py -i /path/to/test-data.sha1 -o /path/to/put/files \
+ -u http://downloads.webmproject.org/test_data/libvpx
-NOTE: currently the url of the test files is http://downloads.webmproject.org/test_data/libvpx
-
-4) transfer files to device using adb. Currently, I put these files in /data/local/tmp
+4) Transfer files to device using adb. Ensure you have proper permissions for
+the target
adb push /path/to/test_files /data/local/tmp
adb push /path/to/built_libs /data/local/tmp
NOTE: Built_libs defaults to parent_dir/libs/armeabi-v7a
-5) run tests:
+5) Run tests:
adb shell
(on device)
cd /data/local/tmp
diff --git a/test/datarate_test.cc b/test/datarate_test.cc
index 85f4bb6..5785a0a 100644
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
@@ -248,9 +248,9 @@
cfg_.rc_target_bitrate = i;
ResetModel();
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.8)
+ ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.9)
<< " The datarate for the file exceeds the target by too much!";
- ASSERT_LE(cfg_.rc_target_bitrate, effective_datarate_ * 1.3)
+ ASSERT_LE(cfg_.rc_target_bitrate, effective_datarate_ * 1.1)
<< " The datarate for the file missed the target!";
}
}
diff --git a/test/decode_perf_test.cc b/test/decode_perf_test.cc
new file mode 100644
index 0000000..95600db
--- /dev/null
+++ b/test/decode_perf_test.cc
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/ivf_video_source.h"
+#include "test/md5_helper.h"
+#include "test/util.h"
+#include "test/webm_video_source.h"
+#include "vpx_ports/vpx_timer.h"
+#include "./vpx_version.h"
+
+using std::tr1::make_tuple;
+
+namespace {
+
+#define VIDEO_NAME 0
+#define THREADS 1
+
+const double kUsecsInSec = 1000000.0;
+
+/*
+ DecodePerfTest takes a tuple of filename + number of threads to decode with
+ */
+typedef std::tr1::tuple<const char *const, unsigned> decode_perf_param_t;
+
+const decode_perf_param_t kVP9DecodePerfVectors[] = {
+ make_tuple("vp90-2-bbb_426x240_tile_1x1_180kbps.webm", 1),
+ make_tuple("vp90-2-bbb_640x360_tile_1x2_337kbps.webm", 2),
+ make_tuple("vp90-2-bbb_854x480_tile_1x2_651kbps.webm", 2),
+ make_tuple("vp90-2-bbb_1280x720_tile_1x4_1310kbps.webm", 4),
+ make_tuple("vp90-2-bbb_1920x1080_tile_1x1_2581kbps.webm", 1),
+ make_tuple("vp90-2-bbb_1920x1080_tile_1x4_2586kbps.webm", 4),
+ make_tuple("vp90-2-bbb_1920x1080_tile_1x4_fpm_2304kbps.webm", 4),
+ make_tuple("vp90-2-sintel_426x182_tile_1x1_171kbps.webm", 1),
+ make_tuple("vp90-2-sintel_640x272_tile_1x2_318kbps.webm", 2),
+ make_tuple("vp90-2-sintel_854x364_tile_1x2_621kbps.webm", 2),
+ make_tuple("vp90-2-sintel_1280x546_tile_1x4_1257kbps.webm", 4),
+ make_tuple("vp90-2-sintel_1920x818_tile_1x4_fpm_2279kbps.webm", 4),
+ make_tuple("vp90-2-tos_426x178_tile_1x1_181kbps.webm", 1),
+ make_tuple("vp90-2-tos_640x266_tile_1x2_336kbps.webm", 2),
+ make_tuple("vp90-2-tos_854x356_tile_1x2_656kbps.webm", 2),
+ make_tuple("vp90-2-tos_1280x534_tile_1x4_1306kbps.webm", 4),
+ make_tuple("vp90-2-tos_1920x800_tile_1x4_fpm_2335kbps.webm", 4),
+};
+
+/*
+ In order to reflect real world performance as much as possible, Perf tests
+ *DO NOT* do any correctness checks. Please run them alongside correctness
+ tests to ensure proper codec integrity. Furthermore, in this test we
+ deliberately limit the amount of system calls we make to avoid OS
+ preemption.
+
+ TODO(joshualitt) create a more detailed perf measurement test to collect
+ power/temp/min max frame decode times/etc
+ */
+
+class DecodePerfTest : public ::testing::TestWithParam<decode_perf_param_t> {
+};
+
+TEST_P(DecodePerfTest, PerfTest) {
+ const char *const video_name = GET_PARAM(VIDEO_NAME);
+ const unsigned threads = GET_PARAM(THREADS);
+
+ libvpx_test::WebMVideoSource video(video_name);
+ video.Init();
+
+ vpx_codec_dec_cfg_t cfg = {0};
+ cfg.threads = threads;
+ libvpx_test::VP9Decoder decoder(cfg, 0);
+
+ vpx_usec_timer t;
+ vpx_usec_timer_start(&t);
+
+ for (video.Begin(); video.cxdata() != NULL; video.Next()) {
+ decoder.DecodeFrame(video.cxdata(), video.frame_size());
+ }
+
+ vpx_usec_timer_mark(&t);
+ const double elapsed_secs = double(vpx_usec_timer_elapsed(&t))
+ / kUsecsInSec;
+ const unsigned frames = video.frame_number();
+ const double fps = double(frames) / elapsed_secs;
+
+ printf("{\n");
+ printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP);
+ printf("\t\"videoName\" : \"%s\",\n", video_name);
+ printf("\t\"threadCount\" : %u,\n", threads);
+ printf("\t\"decodeTimeSecs\" : %f,\n", elapsed_secs);
+ printf("\t\"totalFrames\" : %u,\n", frames);
+ printf("\t\"framesPerSecond\" : %f\n", fps);
+ printf("}\n");
+}
+
+INSTANTIATE_TEST_CASE_P(VP9, DecodePerfTest,
+ ::testing::ValuesIn(kVP9DecodePerfVectors));
+
+} // namespace
diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc
index 796a2e9..9d8b0bd 100644
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -13,178 +13,288 @@
#include <string.h>
#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
extern "C" {
+#include "vp9/common/vp9_entropy.h"
#include "./vp9_rtcd.h"
+void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *output, int pitch);
}
-
-#include "test/acm_random.h"
#include "vpx/vpx_integer.h"
-#include "vpx_ports/mem.h"
using libvpx_test::ACMRandom;
namespace {
-void fdct4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/,
- int stride, int /*tx_type*/) {
+const int kNumCoeffs = 16;
+typedef void (*fdct_t)(const int16_t *in, int16_t *out, int stride);
+typedef void (*idct_t)(const int16_t *in, uint8_t *out, int stride);
+typedef void (*fht_t) (const int16_t *in, int16_t *out, int stride,
+ int tx_type);
+typedef void (*iht_t) (const int16_t *in, uint8_t *out, int stride,
+ int tx_type);
+
+void fdct4x4_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
vp9_fdct4x4_c(in, out, stride);
}
-void idct4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
- int stride, int /*tx_type*/) {
- vp9_idct4x4_16_add_c(out, dst, stride);
-}
-void fht4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/,
- int stride, int tx_type) {
+
+void fht4x4_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
vp9_short_fht4x4_c(in, out, stride, tx_type);
}
-void iht4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
- int stride, int tx_type) {
- vp9_iht4x4_16_add_c(out, dst, stride, tx_type);
-}
-class FwdTrans4x4Test : public ::testing::TestWithParam<int> {
+class Trans4x4TestBase {
public:
- virtual ~FwdTrans4x4Test() {}
- virtual void SetUp() {
- tx_type_ = GetParam();
- if (tx_type_ == 0) {
- fwd_txfm_ = fdct4x4;
- inv_txfm_ = idct4x4_add;
- } else {
- fwd_txfm_ = fht4x4;
- inv_txfm_ = iht4x4_add;
- }
- }
+ virtual ~Trans4x4TestBase() {}
protected:
- void RunFwdTxfm(int16_t *in, int16_t *out, uint8_t *dst,
- int stride, int tx_type) {
- (*fwd_txfm_)(in, out, dst, stride, tx_type);
+ virtual void RunFwdTxfm(const int16_t *in, int16_t *out, int stride) = 0;
+
+ virtual void RunInvTxfm(const int16_t *out, uint8_t *dst, int stride) = 0;
+
+ void RunAccuracyCheck() {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ uint32_t max_error = 0;
+ int64_t total_error = 0;
+ const int count_test_block = 10000;
+ for (int i = 0; i < count_test_block; ++i) {
+ DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, kNumCoeffs);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
+
+ // Initialize a test block with input range [-255, 255].
+ for (int j = 0; j < kNumCoeffs; ++j) {
+ src[j] = rnd.Rand8();
+ dst[j] = rnd.Rand8();
+ test_input_block[j] = src[j] - dst[j];
+ }
+
+ REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
+ test_temp_block, pitch_));
+ REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
+
+ for (int j = 0; j < kNumCoeffs; ++j) {
+ const uint32_t diff = dst[j] - src[j];
+ const uint32_t error = diff * diff;
+ if (max_error < error)
+ max_error = error;
+ total_error += error;
+ }
+ }
+
+ EXPECT_GE(1u, max_error)
+ << "Error: 4x4 FHT/IHT has an individual round trip error > 1";
+
+ EXPECT_GE(count_test_block , total_error)
+ << "Error: 4x4 FHT/IHT has average round trip error > 1 per block";
}
- void RunInvTxfm(int16_t *in, int16_t *out, uint8_t *dst,
- int stride, int tx_type) {
- (*inv_txfm_)(in, out, dst, stride, tx_type);
+ void RunCoeffCheck() {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ const int count_test_block = 5000;
+ DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
+
+ for (int i = 0; i < count_test_block; ++i) {
+ // Initialize a test block with input range [-255, 255].
+ for (int j = 0; j < kNumCoeffs; ++j)
+ input_block[j] = rnd.Rand8() - rnd.Rand8();
+
+ fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
+ REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
+
+ // The minimum quant value is 4.
+ for (int j = 0; j < kNumCoeffs; ++j)
+ EXPECT_EQ(output_block[j], output_ref_block[j]);
+ }
}
+ void RunMemCheck() {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ const int count_test_block = 5000;
+ DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
+
+ for (int i = 0; i < count_test_block; ++i) {
+ // Initialize a test block with input range [-255, 255].
+ for (int j = 0; j < kNumCoeffs; ++j) {
+ input_block[j] = rnd.Rand8() - rnd.Rand8();
+ input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
+ }
+ if (i == 0)
+ for (int j = 0; j < kNumCoeffs; ++j)
+ input_extreme_block[j] = 255;
+ if (i == 1)
+ for (int j = 0; j < kNumCoeffs; ++j)
+ input_extreme_block[j] = -255;
+
+ fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
+ REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block,
+ output_block, pitch_));
+
+ // The minimum quant value is 4.
+ for (int j = 0; j < kNumCoeffs; ++j) {
+ EXPECT_EQ(output_block[j], output_ref_block[j]);
+ EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_block[j]))
+ << "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
+ }
+ }
+ }
+
+ void RunInvAccuracyCheck() {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ const int count_test_block = 1000;
+ DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, coeff, kNumCoeffs);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
+
+ for (int i = 0; i < count_test_block; ++i) {
+ // Initialize a test block with input range [-255, 255].
+ for (int j = 0; j < kNumCoeffs; ++j) {
+ src[j] = rnd.Rand8();
+ dst[j] = rnd.Rand8();
+ in[j] = src[j] - dst[j];
+ }
+
+ fwd_txfm_ref(in, coeff, pitch_, tx_type_);
+
+ REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
+
+ for (int j = 0; j < kNumCoeffs; ++j) {
+ const uint32_t diff = dst[j] - src[j];
+ const uint32_t error = diff * diff;
+ EXPECT_GE(1u, error)
+ << "Error: 16x16 IDCT has error " << error
+ << " at index " << j;
+ }
+ }
+ }
+
+ int pitch_;
int tx_type_;
- void (*fwd_txfm_)(int16_t *in, int16_t *out, uint8_t *dst,
- int stride, int tx_type);
- void (*inv_txfm_)(int16_t *in, int16_t *out, uint8_t *dst,
- int stride, int tx_type);
+ fht_t fwd_txfm_ref;
};
-TEST_P(FwdTrans4x4Test, SignBiasCheck) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 16);
- DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 16);
- const int pitch = 4;
- int count_sign_block[16][2];
- const int count_test_block = 1000000;
+class Trans4x4DCT
+ : public Trans4x4TestBase,
+ public PARAMS(fdct_t, idct_t, int) {
+ public:
+ virtual ~Trans4x4DCT() {}
- memset(count_sign_block, 0, sizeof(count_sign_block));
- for (int i = 0; i < count_test_block; ++i) {
- // Initialize a test block with input range [-255, 255].
- for (int j = 0; j < 16; ++j)
- test_input_block[j] = rnd.Rand8() - rnd.Rand8();
+ virtual void SetUp() {
+ fwd_txfm_ = GET_PARAM(0);
+ inv_txfm_ = GET_PARAM(1);
+ tx_type_ = GET_PARAM(2);
+ pitch_ = 4;
+ fwd_txfm_ref = fdct4x4_ref;
+ }
+ virtual void TearDown() { libvpx_test::ClearSystemState(); }
- RunFwdTxfm(test_input_block, test_output_block, NULL, pitch, tx_type_);
-
- for (int j = 0; j < 16; ++j) {
- if (test_output_block[j] < 0)
- ++count_sign_block[j][0];
- else if (test_output_block[j] > 0)
- ++count_sign_block[j][1];
- }
+ protected:
+ void RunFwdTxfm(const int16_t *in, int16_t *out, int stride) {
+ fwd_txfm_(in, out, stride);
+ }
+ void RunInvTxfm(const int16_t *out, uint8_t *dst, int stride) {
+ inv_txfm_(out, dst, stride);
}
- for (int j = 0; j < 16; ++j) {
- const bool bias_acceptable = (abs(count_sign_block[j][0] -
- count_sign_block[j][1]) < 10000);
- EXPECT_TRUE(bias_acceptable)
- << "Error: 4x4 FDCT/FHT has a sign bias > 1%"
- << " for input range [-255, 255] at index " << j
- << " tx_type " << tx_type_;
- }
+ fdct_t fwd_txfm_;
+ idct_t inv_txfm_;
+};
- memset(count_sign_block, 0, sizeof(count_sign_block));
- for (int i = 0; i < count_test_block; ++i) {
- // Initialize a test block with input range [-15, 15].
- for (int j = 0; j < 16; ++j)
- test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
-
- RunFwdTxfm(test_input_block, test_output_block, NULL, pitch, tx_type_);
-
- for (int j = 0; j < 16; ++j) {
- if (test_output_block[j] < 0)
- ++count_sign_block[j][0];
- else if (test_output_block[j] > 0)
- ++count_sign_block[j][1];
- }
- }
-
- for (int j = 0; j < 16; ++j) {
- const bool bias_acceptable = (abs(count_sign_block[j][0] -
- count_sign_block[j][1]) < 100000);
- EXPECT_TRUE(bias_acceptable)
- << "Error: 4x4 FDCT/FHT has a sign bias > 10%"
- << " for input range [-15, 15] at index " << j;
- }
+TEST_P(Trans4x4DCT, AccuracyCheck) {
+ RunAccuracyCheck();
}
-TEST_P(FwdTrans4x4Test, RoundTripErrorCheck) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
-
- int max_error = 0;
- int total_error = 0;
- const int count_test_block = 1000000;
- for (int i = 0; i < count_test_block; ++i) {
- DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 16);
- DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 16);
- DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 16);
- DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 16);
-
- for (int j = 0; j < 16; ++j) {
- src[j] = rnd.Rand8();
- dst[j] = rnd.Rand8();
- }
- // Initialize a test block with input range [-255, 255].
- for (int j = 0; j < 16; ++j)
- test_input_block[j] = src[j] - dst[j];
-
- const int pitch = 4;
- RunFwdTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
-
- for (int j = 0; j < 16; ++j) {
- if (test_temp_block[j] > 0) {
- test_temp_block[j] += 2;
- test_temp_block[j] /= 4;
- test_temp_block[j] *= 4;
- } else {
- test_temp_block[j] -= 2;
- test_temp_block[j] /= 4;
- test_temp_block[j] *= 4;
- }
- }
-
- // inverse transform and reconstruct the pixel block
- RunInvTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
-
- for (int j = 0; j < 16; ++j) {
- const int diff = dst[j] - src[j];
- const int error = diff * diff;
- if (max_error < error)
- max_error = error;
- total_error += error;
- }
- }
- EXPECT_GE(1, max_error)
- << "Error: FDCT/IDCT or FHT/IHT has an individual roundtrip error > 1";
-
- EXPECT_GE(count_test_block, total_error)
- << "Error: FDCT/IDCT or FHT/IHT has average "
- << "roundtrip error > 1 per block";
+TEST_P(Trans4x4DCT, CoeffCheck) {
+ RunCoeffCheck();
}
-INSTANTIATE_TEST_CASE_P(VP9, FwdTrans4x4Test, ::testing::Range(0, 4));
+TEST_P(Trans4x4DCT, MemCheck) {
+ RunMemCheck();
+}
+
+TEST_P(Trans4x4DCT, InvAccuracyCheck) {
+ RunInvAccuracyCheck();
+}
+
+class Trans4x4HT
+ : public Trans4x4TestBase,
+ public PARAMS(fht_t, iht_t, int) {
+ public:
+ virtual ~Trans4x4HT() {}
+
+ virtual void SetUp() {
+ fwd_txfm_ = GET_PARAM(0);
+ inv_txfm_ = GET_PARAM(1);
+ tx_type_ = GET_PARAM(2);
+ pitch_ = 4;
+ fwd_txfm_ref = fht4x4_ref;
+ }
+ virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+ void RunFwdTxfm(const int16_t *in, int16_t *out, int stride) {
+ fwd_txfm_(in, out, stride, tx_type_);
+ }
+
+ void RunInvTxfm(const int16_t *out, uint8_t *dst, int stride) {
+ inv_txfm_(out, dst, stride, tx_type_);
+ }
+
+ fht_t fwd_txfm_;
+ iht_t inv_txfm_;
+};
+
+TEST_P(Trans4x4HT, AccuracyCheck) {
+ RunAccuracyCheck();
+}
+
+TEST_P(Trans4x4HT, CoeffCheck) {
+ RunCoeffCheck();
+}
+
+TEST_P(Trans4x4HT, MemCheck) {
+ RunMemCheck();
+}
+
+TEST_P(Trans4x4HT, InvAccuracyCheck) {
+ RunInvAccuracyCheck();
+}
+
+using std::tr1::make_tuple;
+
+INSTANTIATE_TEST_CASE_P(
+ C, Trans4x4DCT,
+ ::testing::Values(
+ make_tuple(&vp9_fdct4x4_c, &vp9_idct4x4_16_add_c, 0)));
+INSTANTIATE_TEST_CASE_P(
+ C, Trans4x4HT,
+ ::testing::Values(
+ make_tuple(&vp9_short_fht4x4_c, &vp9_iht4x4_16_add_c, 0),
+ make_tuple(&vp9_short_fht4x4_c, &vp9_iht4x4_16_add_c, 1),
+ make_tuple(&vp9_short_fht4x4_c, &vp9_iht4x4_16_add_c, 2),
+ make_tuple(&vp9_short_fht4x4_c, &vp9_iht4x4_16_add_c, 3)));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+ SSE2, Trans4x4DCT,
+ ::testing::Values(
+ make_tuple(&vp9_fdct4x4_sse2,
+ &vp9_idct4x4_16_add_sse2, 0)));
+INSTANTIATE_TEST_CASE_P(
+ SSE2, Trans4x4HT,
+ ::testing::Values(
+ make_tuple(&vp9_short_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 0),
+ make_tuple(&vp9_short_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 1),
+ make_tuple(&vp9_short_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 2),
+ make_tuple(&vp9_short_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3)));
+#endif
+
} // namespace
diff --git a/test/test-data.sha1 b/test/test-data.sha1
index 5229d09..827ae31 100644
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -542,3 +542,20 @@
ae7451810247fd13975cc257aa0301ff17102255 vp90-2-08-tile-4x4.webm.md5
2ec6e15422ac7a61af072dc5f27fcaf1942ce116 vp90-2-08-tile-4x1.webm
0094f5ee5e46345017c30e0aa4835b550212d853 vp90-2-08-tile-4x1.webm.md5
+8cdd435d89029987ee196896e21520e5f879f04d vp90-2-bbb_1280x720_tile_1x4_1310kbps.webm
+091b373aa2ecb59aa5c647affd5bcafcc7547364 vp90-2-bbb_1920x1080_tile_1x1_2581kbps.webm
+87ee28032b0963a44b73a850fcc816a6dc83efbb vp90-2-bbb_1920x1080_tile_1x4_2586kbps.webm
+c6ce25c4bfd4bdfc2932b70428e3dfe11210ec4f vp90-2-bbb_1920x1080_tile_1x4_fpm_2304kbps.webm
+2064bdb22aa71c2691e0469fb62e8087a43f08f8 vp90-2-bbb_426x240_tile_1x1_180kbps.webm
+8080eda22694910162f0996e8a962612f381a57f vp90-2-bbb_640x360_tile_1x2_337kbps.webm
+a484b335c27ea189c0f0d77babea4a510ce12d50 vp90-2-bbb_854x480_tile_1x2_651kbps.webm
+3eacf1f006250be4cc5c92a7ef146e385ee62653 vp90-2-sintel_1280x546_tile_1x4_1257kbps.webm
+217f089a16447490823127b36ce0d945522accfd vp90-2-sintel_1920x818_tile_1x4_fpm_2279kbps.webm
+eedb3c641e60dacbe082491a16df529a5c9187df vp90-2-sintel_426x182_tile_1x1_171kbps.webm
+cb7e4955af183dff33bcba0c837f0922ab066400 vp90-2-sintel_640x272_tile_1x2_318kbps.webm
+48613f9380e2580002f8a09d6e412ea4e89a52b9 vp90-2-sintel_854x364_tile_1x2_621kbps.webm
+990a91f24dd284562d21d714ae773dff5452cad8 vp90-2-tos_1280x534_tile_1x4_1306kbps.webm
+b6dd558c90bca466b4bcbd03b3371648186465a7 vp90-2-tos_1920x800_tile_1x4_fpm_2335kbps.webm
+1a9c2914ba932a38f0a143efc1ad0e318e78888b vp90-2-tos_426x178_tile_1x1_181kbps.webm
+a3d2b09f24debad4747a1b3066f572be4273bced vp90-2-tos_640x266_tile_1x2_336kbps.webm
+c64b03b5c090e6888cb39685c31f00a6b79fa45c vp90-2-tos_854x356_tile_1x2_656kbps.webm
diff --git a/test/test.mk b/test/test.mk
index ac072d0..32601c5 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -44,6 +44,10 @@
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += webm_video_source.h
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += test_vector_test.cc
+# Currently we only support decoder perf tests for vp9
+ifeq ($(CONFIG_DECODE_PERF_TESTS)$(CONFIG_VP9_DECODER), yesyes)
+LIBVPX_TEST_SRCS-yes += decode_perf_test.cc
+endif
##
## WHITE BOX TESTS
@@ -652,3 +656,43 @@
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x1.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm.md5
+
+ifeq ($(CONFIG_DECODE_PERF_TESTS),yes)
+# BBB VP9 streams
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += \
+ vp90-2-bbb_426x240_tile_1x1_180kbps.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += \
+ vp90-2-bbb_640x360_tile_1x2_337kbps.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += \
+ vp90-2-bbb_854x480_tile_1x2_651kbps.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += \
+ vp90-2-bbb_1280x720_tile_1x4_1310kbps.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += \
+ vp90-2-bbb_1920x1080_tile_1x1_2581kbps.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += \
+ vp90-2-bbb_1920x1080_tile_1x4_2586kbps.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += \
+ vp90-2-bbb_1920x1080_tile_1x4_fpm_2304kbps.webm
+#Sintel VP9 streams
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += \
+ vp90-2-sintel_426x182_tile_1x1_171kbps.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += \
+ vp90-2-sintel_640x272_tile_1x2_318kbps.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += \
+ vp90-2-sintel_854x364_tile_1x2_621kbps.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += \
+ vp90-2-sintel_1280x546_tile_1x4_1257kbps.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += \
+ vp90-2-sintel_1920x818_tile_1x4_fpm_2279kbps.webm
+# TOS VP9 streams
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += \
+ vp90-2-tos_426x178_tile_1x1_181kbps.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += \
+ vp90-2-tos_640x266_tile_1x2_336kbps.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += \
+ vp90-2-tos_854x356_tile_1x2_656kbps.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += \
+ vp90-2-tos_1280x534_tile_1x4_1306kbps.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += \
+ vp90-2-tos_1920x800_tile_1x4_fpm_2335kbps.webm
+endif # CONFIG_DECODE_PERF_TESTS
diff --git a/tools_common.c b/tools_common.c
index 44b2a3f..9c24983 100644
--- a/tools_common.c
+++ b/tools_common.c
@@ -7,10 +7,13 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
+
#include "tools_common.h"
#include <stdarg.h>
+#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#if defined(_WIN32) || defined(__OS2__)
#include <io.h>
@@ -56,3 +59,74 @@
void warn(const char *fmt, ...) {
LOG_ERROR("Warning");
}
+
+uint16_t mem_get_le16(const void *data) {
+ uint16_t val;
+ const uint8_t *mem = (const uint8_t*)data;
+
+ val = mem[1] << 8;
+ val |= mem[0];
+ return val;
+}
+
+uint32_t mem_get_le32(const void *data) {
+ uint32_t val;
+ const uint8_t *mem = (const uint8_t*)data;
+
+ val = mem[3] << 24;
+ val |= mem[2] << 16;
+ val |= mem[1] << 8;
+ val |= mem[0];
+ return val;
+}
+
+int read_yuv_frame(struct VpxInputContext *input_ctx, vpx_image_t *yuv_frame) {
+ FILE *f = input_ctx->file;
+ struct FileTypeDetectionBuffer *detect = &input_ctx->detect;
+ int plane = 0;
+ int shortread = 0;
+
+ for (plane = 0; plane < 3; ++plane) {
+ uint8_t *ptr;
+ const int w = (plane ? (1 + yuv_frame->d_w) / 2 : yuv_frame->d_w);
+ const int h = (plane ? (1 + yuv_frame->d_h) / 2 : yuv_frame->d_h);
+ int r;
+
+ /* Determine the correct plane based on the image format. The for-loop
+ * always counts in Y,U,V order, but this may not match the order of
+ * the data on disk.
+ */
+ switch (plane) {
+ case 1:
+ ptr = yuv_frame->planes[
+ yuv_frame->fmt == VPX_IMG_FMT_YV12 ? VPX_PLANE_V : VPX_PLANE_U];
+ break;
+ case 2:
+ ptr = yuv_frame->planes[
+ yuv_frame->fmt == VPX_IMG_FMT_YV12 ? VPX_PLANE_U : VPX_PLANE_V];
+ break;
+ default:
+ ptr = yuv_frame->planes[plane];
+ }
+
+ for (r = 0; r < h; ++r) {
+ size_t needed = w;
+ size_t buf_position = 0;
+ const size_t left = detect->buf_read - detect->position;
+ if (left > 0) {
+ const size_t more = (left < needed) ? left : needed;
+ memcpy(ptr, detect->buf + detect->position, more);
+ buf_position = more;
+ needed -= more;
+ detect->position += more;
+ }
+ if (needed > 0) {
+ shortread |= (fread(ptr + buf_position, 1, needed, f) < needed);
+ }
+
+ ptr += yuv_frame->stride[plane];
+ }
+ }
+
+ return shortread;
+}
diff --git a/tools_common.h b/tools_common.h
index 068e7b5..7500523 100644
--- a/tools_common.h
+++ b/tools_common.h
@@ -13,6 +13,12 @@
#include <stdio.h>
#include "./vpx_config.h"
+#include "vpx/vpx_image.h"
+#include "vpx/vpx_integer.h"
+
+#if CONFIG_ENCODERS
+#include "./y4minput.h"
+#endif
#if defined(_MSC_VER)
/* MSVS doesn't define off_t, and uses _f{seek,tell}i64. */
@@ -52,11 +58,55 @@
#define PATH_MAX 512
#endif
+#define IVF_FRAME_HDR_SZ (4 + 8) /* 4 byte size + 8 byte timestamp */
+#define IVF_FILE_HDR_SZ 32
+
+#define RAW_FRAME_HDR_SZ sizeof(uint32_t)
+
#define VP8_FOURCC (0x30385056)
#define VP9_FOURCC (0x30395056)
#define VP8_FOURCC_MASK (0x00385056)
#define VP9_FOURCC_MASK (0x00395056)
+enum VideoFileType {
+ FILE_TYPE_RAW,
+ FILE_TYPE_IVF,
+ FILE_TYPE_Y4M,
+ FILE_TYPE_WEBM
+};
+
+struct FileTypeDetectionBuffer {
+ char buf[4];
+ size_t buf_read;
+ size_t position;
+};
+
+struct VpxRational {
+ int numerator;
+ int denominator;
+};
+
+struct VpxInputContext {
+ const char *filename;
+ FILE *file;
+ off_t length;
+ struct FileTypeDetectionBuffer detect;
+ enum VideoFileType file_type;
+ uint32_t width;
+ uint32_t height;
+ int use_i420;
+ int only_i420;
+ uint32_t fourcc;
+ struct VpxRational framerate;
+#if CONFIG_ENCODERS
+ y4m_input y4m;
+#endif
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/* Sets a stdio stream into binary mode */
FILE *set_binary_mode(FILE *stream);
@@ -67,4 +117,13 @@
/* The tool including this file must define usage_exit() */
void usage_exit();
+uint16_t mem_get_le16(const void *data);
+uint32_t mem_get_le32(const void *data);
+
+int read_yuv_frame(struct VpxInputContext *input_ctx, vpx_image_t *yuv_frame);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
#endif // TOOLS_COMMON_H_
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 4b60cfd..881ada1 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -3808,7 +3808,7 @@
/* Setup background Q adjustment for error resilient mode.
* For multi-layer encodes only enable this for the base layer.
- */
+ */
if (cpi->cyclic_refresh_mode_enabled)
{
if (cpi->current_layer==0)
diff --git a/vp9/common/arm/neon/vp9_loopfilter_16_neon.c b/vp9/common/arm/neon/vp9_loopfilter_16_neon.c
new file mode 100644
index 0000000..2f022dc
--- /dev/null
+++ b/vp9/common/arm/neon/vp9_loopfilter_16_neon.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+
+void vp9_loop_filter_horizontal_edge_16_neon(uint8_t *s, int p /* pitch */,
+ const uint8_t *blimit0,
+ const uint8_t *limit0,
+ const uint8_t *thresh0,
+ const uint8_t *blimit1,
+ const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ vp9_loop_filter_horizontal_edge(s, p, blimit0, limit0, thresh0, 1);
+ vp9_loop_filter_horizontal_edge(s + 8, p, blimit1, limit1, thresh1, 1);
+}
+
+void vp9_mbloop_filter_horizontal_edge_16_neon(uint8_t *s, int p /* pitch */,
+ const uint8_t *blimit0,
+ const uint8_t *limit0,
+ const uint8_t *thresh0,
+ const uint8_t *blimit1,
+ const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ vp9_mbloop_filter_horizontal_edge(s, p, blimit0, limit0, thresh0, 1);
+ vp9_mbloop_filter_horizontal_edge(s + 8, p, blimit1, limit1, thresh1, 1);
+}
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index 0d65651..d298160 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -79,6 +79,57 @@
vp9_update_mode_info_border(cm, cm->prev_mip);
}
+int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) {
+ const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2);
+ const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2);
+ const int ss_x = cm->subsampling_x;
+ const int ss_y = cm->subsampling_y;
+ int mi_size;
+
+ if (vp9_realloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y,
+ VP9BORDERINPIXELS) < 0)
+ goto fail;
+
+ set_mb_mi(cm, aligned_width, aligned_height);
+
+ // Allocation
+ mi_size = cm->mode_info_stride * (cm->mi_rows + MI_BLOCK_SIZE);
+
+ vpx_free(cm->mip);
+ cm->mip = vpx_calloc(mi_size, sizeof(MODE_INFO));
+ if (!cm->mip)
+ goto fail;
+
+ vpx_free(cm->prev_mip);
+ cm->prev_mip = vpx_calloc(mi_size, sizeof(MODE_INFO));
+ if (!cm->prev_mip)
+ goto fail;
+
+ vpx_free(cm->mi_grid_base);
+ cm->mi_grid_base = vpx_calloc(mi_size, sizeof(*cm->mi_grid_base));
+ if (!cm->mi_grid_base)
+ goto fail;
+
+ vpx_free(cm->prev_mi_grid_base);
+ cm->prev_mi_grid_base = vpx_calloc(mi_size, sizeof(*cm->prev_mi_grid_base));
+ if (!cm->prev_mi_grid_base)
+ goto fail;
+
+ setup_mi(cm);
+
+ // Create the segmentation map structure and set to 0.
+ vpx_free(cm->last_frame_seg_map);
+ cm->last_frame_seg_map = vpx_calloc(cm->mi_rows * cm->mi_cols, 1);
+ if (!cm->last_frame_seg_map)
+ goto fail;
+
+ return 0;
+
+ fail:
+ vp9_free_frame_buffers(cm);
+ return 1;
+}
+
int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) {
int i;
diff --git a/vp9/common/vp9_alloccommon.h b/vp9/common/vp9_alloccommon.h
index 5d5fae9..cf8dca5 100644
--- a/vp9/common/vp9_alloccommon.h
+++ b/vp9/common/vp9_alloccommon.h
@@ -21,6 +21,7 @@
void vp9_create_common(VP9_COMMON *cm);
void vp9_remove_common(VP9_COMMON *cm);
+int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height);
int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height);
void vp9_free_frame_buffers(VP9_COMMON *cm);
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index c5da375..121947b 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -381,22 +381,6 @@
const int stride = 4 << b_width_log2(plane_bsize);
return base + raster_block_offset(plane_bsize, raster_block, stride);
}
-static uint8_t* raster_block_offset_uint8(BLOCK_SIZE plane_bsize,
- int raster_block, uint8_t *base,
- int stride) {
- return base + raster_block_offset(plane_bsize, raster_block, stride);
-}
-
-static int txfrm_block_to_raster_block(BLOCK_SIZE plane_bsize,
- TX_SIZE tx_size, int block) {
- const int bwl = b_width_log2(plane_bsize);
- const int tx_cols_log2 = bwl - tx_size;
- const int tx_cols = 1 << tx_cols_log2;
- const int raster_mb = block >> (tx_size << 1);
- const int x = (raster_mb & (tx_cols - 1)) << tx_size;
- const int y = (raster_mb >> tx_cols_log2) << tx_size;
- return x + (y << bwl);
-}
static void txfrm_block_to_raster_xy(BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, int block,
diff --git a/vp9/common/vp9_common_data.c b/vp9/common/vp9_common_data.c
index f858900..388f38d 100644
--- a/vp9/common/vp9_common_data.c
+++ b/vp9/common/vp9_common_data.c
@@ -123,8 +123,6 @@
TX_32X32, // TX_MODE_SELECT
};
-
-
const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2] = {
// ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1
// ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1
@@ -143,4 +141,24 @@
{{BLOCK_64X64, BLOCK_64X32}, {BLOCK_32X64, BLOCK_32X32}},
};
-
+// Generates 4 bit field in which each bit set to 1 represents
+// a blocksize partition 1111 means we split 64x64, 32x32, 16x16
+// and 8x8. 1000 means we just split the 64x64 to 32x32
+const struct {
+ PARTITION_CONTEXT above;
+ PARTITION_CONTEXT left;
+} partition_context_lookup[BLOCK_SIZES]= {
+ {15, 15}, // 4X4 - {0b1111, 0b1111}
+ {15, 14}, // 4X8 - {0b1111, 0b1110}
+ {14, 15}, // 8X4 - {0b1110, 0b1111}
+ {14, 14}, // 8X8 - {0b1110, 0b1110}
+ {14, 12}, // 8X16 - {0b1110, 0b1100}
+ {12, 14}, // 16X8 - {0b1100, 0b1110}
+ {12, 12}, // 16X16 - {0b1100, 0b1100}
+ {12, 8 }, // 16X32 - {0b1100, 0b1000}
+ {8, 12}, // 32X16 - {0b1000, 0b1100}
+ {8, 8 }, // 32X32 - {0b1000, 0b1000}
+ {8, 0 }, // 32X64 - {0b1000, 0b0000}
+ {0, 8 }, // 64X32 - {0b0000, 0b1000}
+ {0, 0 }, // 64X64 - {0b0000, 0b0000}
+};
diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c
index 2676762..82aa77e 100644
--- a/vp9/common/vp9_entropy.c
+++ b/vp9/common/vp9_entropy.c
@@ -15,7 +15,6 @@
#include "vpx_mem/vpx_mem.h"
#include "vpx/vpx_integer.h"
-#define MODEL_NODES (ENTROPY_NODES - UNCONSTRAINED_NODES)
DECLARE_ALIGNED(16, const uint8_t, vp9_norm[256]) = {
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
@@ -37,25 +36,84 @@
};
DECLARE_ALIGNED(16, const uint8_t,
- vp9_coefband_trans_8x8plus[MAXBAND_INDEX + 1]) = {
+ vp9_coefband_trans_8x8plus[1024]) = {
0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 5
+ 4, 4, 4, 4, 4, 5,
+ // beyond MAXBAND_INDEX+1 all values are filled as 5
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
};
-DECLARE_ALIGNED(16, const uint8_t,
- vp9_coefband_trans_4x4[MAXBAND_INDEX + 1]) = {
+DECLARE_ALIGNED(16, const uint8_t, vp9_coefband_trans_4x4[16]) = {
0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5,
- 5, 5, 5, 5, 5, 5
};
DECLARE_ALIGNED(16, const uint8_t, vp9_pt_energy_class[MAX_ENTROPY_TOKENS]) = {
0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 5, 5
};
-
-
-/* Array indices are identical to previously-existing CONTEXT_NODE indices */
-
+// Array indices are identical to previously-existing CONTEXT_NODE indices
const vp9_tree_index vp9_coef_tree[TREE_SIZE(MAX_ENTROPY_TOKENS)] = {
-DCT_EOB_TOKEN, 2, /* 0 = EOB */
-ZERO_TOKEN, 4, /* 1 = ZERO */
@@ -99,7 +157,9 @@
// the probabilities for the rest of the nodes.
// beta = 8
-static const vp9_prob modelcoefprobs_pareto8[COEFPROB_MODELS][MODEL_NODES] = {
+
+
+static const vp9_prob pareto8_probs[COEFPROB_MODELS][MODEL_NODES] = {
{ 3, 86, 128, 6, 86, 23, 88, 29},
{ 9, 86, 129, 17, 88, 61, 94, 76},
{ 15, 87, 129, 28, 89, 93, 100, 110},
@@ -230,26 +290,285 @@
{255, 246, 247, 255, 239, 255, 253, 255}
};
-static void extend_model_to_full_distribution(vp9_prob p,
- vp9_prob *tree_probs) {
+// This table is an expansion of the table : modelcoefprobs_pareto8
+// to all 255 probabilities using the code as follows to do the expansion:
+// tree_probs[i] = (model[l][i - UNCONSTRAINED_NODES] +
+// model[l + 1][i - UNCONSTRAINED_NODES]) >> 1;
+const vp9_prob vp9_pareto8_full[255][MODEL_NODES] = {
+ { 3, 86, 128, 6, 86, 23, 88, 29},
+ { 6, 86, 128, 11, 87, 42, 91, 52},
+ { 9, 86, 129, 17, 88, 61, 94, 76},
+ { 12, 86, 129, 22, 88, 77, 97, 93},
+ { 15, 87, 129, 28, 89, 93, 100, 110},
+ { 17, 87, 129, 33, 90, 105, 103, 123},
+ { 20, 88, 130, 38, 91, 118, 106, 136},
+ { 23, 88, 130, 43, 91, 128, 108, 146},
+ { 26, 89, 131, 48, 92, 139, 111, 156},
+ { 28, 89, 131, 53, 93, 147, 114, 163},
+ { 31, 90, 131, 58, 94, 156, 117, 171},
+ { 34, 90, 131, 62, 94, 163, 119, 177},
+ { 37, 90, 132, 66, 95, 171, 122, 184},
+ { 39, 90, 132, 70, 96, 177, 124, 189},
+ { 42, 91, 132, 75, 97, 183, 127, 194},
+ { 44, 91, 132, 79, 97, 188, 129, 198},
+ { 47, 92, 133, 83, 98, 193, 132, 202},
+ { 49, 92, 133, 86, 99, 197, 134, 205},
+ { 52, 93, 133, 90, 100, 201, 137, 208},
+ { 54, 93, 133, 94, 100, 204, 139, 211},
+ { 57, 94, 134, 98, 101, 208, 142, 214},
+ { 59, 94, 134, 101, 102, 211, 144, 216},
+ { 62, 94, 135, 105, 103, 214, 146, 218},
+ { 64, 94, 135, 108, 103, 216, 148, 220},
+ { 66, 95, 135, 111, 104, 219, 151, 222},
+ { 68, 95, 135, 114, 105, 221, 153, 223},
+ { 71, 96, 136, 117, 106, 224, 155, 225},
+ { 73, 96, 136, 120, 106, 225, 157, 226},
+ { 76, 97, 136, 123, 107, 227, 159, 228},
+ { 78, 97, 136, 126, 108, 229, 160, 229},
+ { 80, 98, 137, 129, 109, 231, 162, 231},
+ { 82, 98, 137, 131, 109, 232, 164, 232},
+ { 84, 98, 138, 134, 110, 234, 166, 233},
+ { 86, 98, 138, 137, 111, 235, 168, 234},
+ { 89, 99, 138, 140, 112, 236, 170, 235},
+ { 91, 99, 138, 142, 112, 237, 171, 235},
+ { 93, 100, 139, 145, 113, 238, 173, 236},
+ { 95, 100, 139, 147, 114, 239, 174, 237},
+ { 97, 101, 140, 149, 115, 240, 176, 238},
+ { 99, 101, 140, 151, 115, 241, 177, 238},
+ {101, 102, 140, 154, 116, 242, 179, 239},
+ {103, 102, 140, 156, 117, 242, 180, 239},
+ {105, 103, 141, 158, 118, 243, 182, 240},
+ {107, 103, 141, 160, 118, 243, 183, 240},
+ {109, 104, 141, 162, 119, 244, 185, 241},
+ {111, 104, 141, 164, 119, 244, 186, 241},
+ {113, 104, 142, 166, 120, 245, 187, 242},
+ {114, 104, 142, 168, 121, 245, 188, 242},
+ {116, 105, 143, 170, 122, 246, 190, 243},
+ {118, 105, 143, 171, 122, 246, 191, 243},
+ {120, 106, 143, 173, 123, 247, 192, 244},
+ {121, 106, 143, 175, 124, 247, 193, 244},
+ {123, 107, 144, 177, 125, 248, 195, 244},
+ {125, 107, 144, 178, 125, 248, 196, 244},
+ {127, 108, 145, 180, 126, 249, 197, 245},
+ {128, 108, 145, 181, 127, 249, 198, 245},
+ {130, 109, 145, 183, 128, 249, 199, 245},
+ {132, 109, 145, 184, 128, 249, 200, 245},
+ {134, 110, 146, 186, 129, 250, 201, 246},
+ {135, 110, 146, 187, 130, 250, 202, 246},
+ {137, 111, 147, 189, 131, 251, 203, 246},
+ {138, 111, 147, 190, 131, 251, 204, 246},
+ {140, 112, 147, 192, 132, 251, 205, 247},
+ {141, 112, 147, 193, 132, 251, 206, 247},
+ {143, 113, 148, 194, 133, 251, 207, 247},
+ {144, 113, 148, 195, 134, 251, 207, 247},
+ {146, 114, 149, 197, 135, 252, 208, 248},
+ {147, 114, 149, 198, 135, 252, 209, 248},
+ {149, 115, 149, 199, 136, 252, 210, 248},
+ {150, 115, 149, 200, 137, 252, 210, 248},
+ {152, 115, 150, 201, 138, 252, 211, 248},
+ {153, 115, 150, 202, 138, 252, 212, 248},
+ {155, 116, 151, 204, 139, 253, 213, 249},
+ {156, 116, 151, 205, 139, 253, 213, 249},
+ {158, 117, 151, 206, 140, 253, 214, 249},
+ {159, 117, 151, 207, 141, 253, 215, 249},
+ {161, 118, 152, 208, 142, 253, 216, 249},
+ {162, 118, 152, 209, 142, 253, 216, 249},
+ {163, 119, 153, 210, 143, 253, 217, 249},
+ {164, 119, 153, 211, 143, 253, 217, 249},
+ {166, 120, 153, 212, 144, 254, 218, 250},
+ {167, 120, 153, 212, 145, 254, 219, 250},
+ {168, 121, 154, 213, 146, 254, 220, 250},
+ {169, 121, 154, 214, 146, 254, 220, 250},
+ {171, 122, 155, 215, 147, 254, 221, 250},
+ {172, 122, 155, 216, 147, 254, 221, 250},
+ {173, 123, 155, 217, 148, 254, 222, 250},
+ {174, 123, 155, 217, 149, 254, 222, 250},
+ {176, 124, 156, 218, 150, 254, 223, 250},
+ {177, 124, 156, 219, 150, 254, 223, 250},
+ {178, 125, 157, 220, 151, 254, 224, 251},
+ {179, 125, 157, 220, 151, 254, 224, 251},
+ {180, 126, 157, 221, 152, 254, 225, 251},
+ {181, 126, 157, 221, 152, 254, 225, 251},
+ {183, 127, 158, 222, 153, 254, 226, 251},
+ {184, 127, 158, 223, 154, 254, 226, 251},
+ {185, 128, 159, 224, 155, 255, 227, 251},
+ {186, 128, 159, 224, 155, 255, 227, 251},
+ {187, 129, 160, 225, 156, 255, 228, 251},
+ {188, 130, 160, 225, 156, 255, 228, 251},
+ {189, 131, 160, 226, 157, 255, 228, 251},
+ {190, 131, 160, 226, 158, 255, 228, 251},
+ {191, 132, 161, 227, 159, 255, 229, 251},
+ {192, 132, 161, 227, 159, 255, 229, 251},
+ {193, 133, 162, 228, 160, 255, 230, 252},
+ {194, 133, 162, 229, 160, 255, 230, 252},
+ {195, 134, 163, 230, 161, 255, 231, 252},
+ {196, 134, 163, 230, 161, 255, 231, 252},
+ {197, 135, 163, 231, 162, 255, 231, 252},
+ {198, 135, 163, 231, 162, 255, 231, 252},
+ {199, 136, 164, 232, 163, 255, 232, 252},
+ {200, 136, 164, 232, 164, 255, 232, 252},
+ {201, 137, 165, 233, 165, 255, 233, 252},
+ {201, 137, 165, 233, 165, 255, 233, 252},
+ {202, 138, 166, 233, 166, 255, 233, 252},
+ {203, 138, 166, 233, 166, 255, 233, 252},
+ {204, 139, 166, 234, 167, 255, 234, 252},
+ {205, 139, 166, 234, 167, 255, 234, 252},
+ {206, 140, 167, 235, 168, 255, 235, 252},
+ {206, 140, 167, 235, 168, 255, 235, 252},
+ {207, 141, 168, 236, 169, 255, 235, 252},
+ {208, 141, 168, 236, 170, 255, 235, 252},
+ {209, 142, 169, 237, 171, 255, 236, 252},
+ {209, 143, 169, 237, 171, 255, 236, 252},
+ {210, 144, 169, 237, 172, 255, 236, 252},
+ {211, 144, 169, 237, 172, 255, 236, 252},
+ {212, 145, 170, 238, 173, 255, 237, 252},
+ {213, 145, 170, 238, 173, 255, 237, 252},
+ {214, 146, 171, 239, 174, 255, 237, 253},
+ {214, 146, 171, 239, 174, 255, 237, 253},
+ {215, 147, 172, 240, 175, 255, 238, 253},
+ {215, 147, 172, 240, 175, 255, 238, 253},
+ {216, 148, 173, 240, 176, 255, 238, 253},
+ {217, 148, 173, 240, 176, 255, 238, 253},
+ {218, 149, 173, 241, 177, 255, 239, 253},
+ {218, 149, 173, 241, 178, 255, 239, 253},
+ {219, 150, 174, 241, 179, 255, 239, 253},
+ {219, 151, 174, 241, 179, 255, 239, 253},
+ {220, 152, 175, 242, 180, 255, 240, 253},
+ {221, 152, 175, 242, 180, 255, 240, 253},
+ {222, 153, 176, 242, 181, 255, 240, 253},
+ {222, 153, 176, 242, 181, 255, 240, 253},
+ {223, 154, 177, 243, 182, 255, 240, 253},
+ {223, 154, 177, 243, 182, 255, 240, 253},
+ {224, 155, 178, 244, 183, 255, 241, 253},
+ {224, 155, 178, 244, 183, 255, 241, 253},
+ {225, 156, 178, 244, 184, 255, 241, 253},
+ {225, 157, 178, 244, 184, 255, 241, 253},
+ {226, 158, 179, 244, 185, 255, 242, 253},
+ {227, 158, 179, 244, 185, 255, 242, 253},
+ {228, 159, 180, 245, 186, 255, 242, 253},
+ {228, 159, 180, 245, 186, 255, 242, 253},
+ {229, 160, 181, 245, 187, 255, 242, 253},
+ {229, 160, 181, 245, 187, 255, 242, 253},
+ {230, 161, 182, 246, 188, 255, 243, 253},
+ {230, 162, 182, 246, 188, 255, 243, 253},
+ {231, 163, 183, 246, 189, 255, 243, 253},
+ {231, 163, 183, 246, 189, 255, 243, 253},
+ {232, 164, 184, 247, 190, 255, 243, 253},
+ {232, 164, 184, 247, 190, 255, 243, 253},
+ {233, 165, 185, 247, 191, 255, 244, 253},
+ {233, 165, 185, 247, 191, 255, 244, 253},
+ {234, 166, 185, 247, 192, 255, 244, 253},
+ {234, 167, 185, 247, 192, 255, 244, 253},
+ {235, 168, 186, 248, 193, 255, 244, 253},
+ {235, 168, 186, 248, 193, 255, 244, 253},
+ {236, 169, 187, 248, 194, 255, 244, 253},
+ {236, 169, 187, 248, 194, 255, 244, 253},
+ {236, 170, 188, 248, 195, 255, 245, 253},
+ {236, 170, 188, 248, 195, 255, 245, 253},
+ {237, 171, 189, 249, 196, 255, 245, 254},
+ {237, 172, 189, 249, 196, 255, 245, 254},
+ {238, 173, 190, 249, 197, 255, 245, 254},
+ {238, 173, 190, 249, 197, 255, 245, 254},
+ {239, 174, 191, 249, 198, 255, 245, 254},
+ {239, 174, 191, 249, 198, 255, 245, 254},
+ {240, 175, 192, 249, 199, 255, 246, 254},
+ {240, 176, 192, 249, 199, 255, 246, 254},
+ {240, 177, 193, 250, 200, 255, 246, 254},
+ {240, 177, 193, 250, 200, 255, 246, 254},
+ {241, 178, 194, 250, 201, 255, 246, 254},
+ {241, 178, 194, 250, 201, 255, 246, 254},
+ {242, 179, 195, 250, 202, 255, 246, 254},
+ {242, 180, 195, 250, 202, 255, 246, 254},
+ {242, 181, 196, 250, 203, 255, 247, 254},
+ {242, 181, 196, 250, 203, 255, 247, 254},
+ {243, 182, 197, 251, 204, 255, 247, 254},
+ {243, 183, 197, 251, 204, 255, 247, 254},
+ {244, 184, 198, 251, 205, 255, 247, 254},
+ {244, 184, 198, 251, 205, 255, 247, 254},
+ {244, 185, 199, 251, 206, 255, 247, 254},
+ {244, 185, 199, 251, 206, 255, 247, 254},
+ {245, 186, 200, 251, 207, 255, 247, 254},
+ {245, 187, 200, 251, 207, 255, 247, 254},
+ {246, 188, 201, 252, 207, 255, 248, 254},
+ {246, 188, 201, 252, 207, 255, 248, 254},
+ {246, 189, 202, 252, 208, 255, 248, 254},
+ {246, 190, 202, 252, 208, 255, 248, 254},
+ {247, 191, 203, 252, 209, 255, 248, 254},
+ {247, 191, 203, 252, 209, 255, 248, 254},
+ {247, 192, 204, 252, 210, 255, 248, 254},
+ {247, 193, 204, 252, 210, 255, 248, 254},
+ {248, 194, 205, 252, 211, 255, 248, 254},
+ {248, 194, 205, 252, 211, 255, 248, 254},
+ {248, 195, 206, 252, 212, 255, 249, 254},
+ {248, 196, 206, 252, 212, 255, 249, 254},
+ {249, 197, 207, 253, 213, 255, 249, 254},
+ {249, 197, 207, 253, 213, 255, 249, 254},
+ {249, 198, 208, 253, 214, 255, 249, 254},
+ {249, 199, 209, 253, 214, 255, 249, 254},
+ {250, 200, 210, 253, 215, 255, 249, 254},
+ {250, 200, 210, 253, 215, 255, 249, 254},
+ {250, 201, 211, 253, 215, 255, 249, 254},
+ {250, 202, 211, 253, 215, 255, 249, 254},
+ {250, 203, 212, 253, 216, 255, 249, 254},
+ {250, 203, 212, 253, 216, 255, 249, 254},
+ {251, 204, 213, 253, 217, 255, 250, 254},
+ {251, 205, 213, 253, 217, 255, 250, 254},
+ {251, 206, 214, 254, 218, 255, 250, 254},
+ {251, 206, 215, 254, 218, 255, 250, 254},
+ {252, 207, 216, 254, 219, 255, 250, 254},
+ {252, 208, 216, 254, 219, 255, 250, 254},
+ {252, 209, 217, 254, 220, 255, 250, 254},
+ {252, 210, 217, 254, 220, 255, 250, 254},
+ {252, 211, 218, 254, 221, 255, 250, 254},
+ {252, 212, 218, 254, 221, 255, 250, 254},
+ {253, 213, 219, 254, 222, 255, 250, 254},
+ {253, 213, 220, 254, 222, 255, 250, 254},
+ {253, 214, 221, 254, 223, 255, 250, 254},
+ {253, 215, 221, 254, 223, 255, 250, 254},
+ {253, 216, 222, 254, 224, 255, 251, 254},
+ {253, 217, 223, 254, 224, 255, 251, 254},
+ {253, 218, 224, 254, 225, 255, 251, 254},
+ {253, 219, 224, 254, 225, 255, 251, 254},
+ {254, 220, 225, 254, 225, 255, 251, 254},
+ {254, 221, 226, 254, 225, 255, 251, 254},
+ {254, 222, 227, 255, 226, 255, 251, 254},
+ {254, 223, 227, 255, 226, 255, 251, 254},
+ {254, 224, 228, 255, 227, 255, 251, 254},
+ {254, 225, 229, 255, 227, 255, 251, 254},
+ {254, 226, 230, 255, 228, 255, 251, 254},
+ {254, 227, 230, 255, 229, 255, 251, 254},
+ {255, 228, 231, 255, 230, 255, 251, 254},
+ {255, 229, 232, 255, 230, 255, 251, 254},
+ {255, 230, 233, 255, 231, 255, 252, 254},
+ {255, 231, 234, 255, 231, 255, 252, 254},
+ {255, 232, 235, 255, 232, 255, 252, 254},
+ {255, 233, 236, 255, 232, 255, 252, 254},
+ {255, 235, 237, 255, 233, 255, 252, 254},
+ {255, 236, 238, 255, 234, 255, 252, 254},
+ {255, 238, 240, 255, 235, 255, 252, 255},
+ {255, 239, 241, 255, 235, 255, 252, 254},
+ {255, 241, 243, 255, 236, 255, 252, 254},
+ {255, 243, 245, 255, 237, 255, 252, 254},
+ {255, 246, 247, 255, 239, 255, 253, 255},
+};
+
+static void extend_to_full_distribution(vp9_prob *probs, vp9_prob p) {
const int l = (p - 1) / 2;
- const vp9_prob (*model)[MODEL_NODES] = modelcoefprobs_pareto8;
if (p & 1) {
- vpx_memcpy(tree_probs + UNCONSTRAINED_NODES,
- model[l], MODEL_NODES * sizeof(vp9_prob));
+ // Just copy
+ vpx_memcpy(probs, pareto8_probs[l], MODEL_NODES * sizeof(vp9_prob));
} else {
- // interpolate
+ // Interpolate
int i;
- for (i = UNCONSTRAINED_NODES; i < ENTROPY_NODES; ++i)
- tree_probs[i] = (model[l][i - UNCONSTRAINED_NODES] +
- model[l + 1][i - UNCONSTRAINED_NODES]) >> 1;
+ for (i = 0; i < MODEL_NODES; ++i)
+ probs[i] = (pareto8_probs[l][i] + pareto8_probs[l + 1][i]) >> 1;
}
}
void vp9_model_to_full_probs(const vp9_prob *model, vp9_prob *full) {
if (full != model)
vpx_memcpy(full, model, sizeof(vp9_prob) * UNCONSTRAINED_NODES);
- extend_model_to_full_distribution(model[PIVOT_NODE], full);
+ extend_to_full_distribution(&full[UNCONSTRAINED_NODES], model[PIVOT_NODE]);
}
static vp9_tree_index cat1[2], cat2[4], cat3[6], cat4[8], cat5[10], cat6[28];
@@ -275,18 +594,18 @@
}
const vp9_extra_bit vp9_extra_bits[MAX_ENTROPY_TOKENS] = {
- { 0, 0, 0, 0},
- { 0, 0, 0, 1},
- { 0, 0, 0, 2},
- { 0, 0, 0, 3},
- { 0, 0, 0, 4},
- { cat1, Pcat1, 1, 5},
- { cat2, Pcat2, 2, 7},
- { cat3, Pcat3, 3, 11},
- { cat4, Pcat4, 4, 19},
- { cat5, Pcat5, 5, 35},
- { cat6, Pcat6, 14, 67},
- { 0, 0, 0, 0}
+ {0, 0, 0, 0}, // ZERO_TOKEN
+ {0, 0, 0, 1}, // ONE_TOKEN
+ {0, 0, 0, 2}, // TWO_TOKEN
+ {0, 0, 0, 3}, // THREE_TOKEN
+ {0, 0, 0, 4}, // FOUR_TOKEN
+ {cat1, Pcat1, 1, 5}, // DCT_VAL_CATEGORY1
+ {cat2, Pcat2, 2, 7}, // DCT_VAL_CATEGORY2
+ {cat3, Pcat3, 3, 11}, // DCT_VAL_CATEGORY3
+ {cat4, Pcat4, 4, 19}, // DCT_VAL_CATEGORY4
+ {cat5, Pcat5, 5, 35}, // DCT_VAL_CATEGORY5
+ {cat6, Pcat6, 14, 67}, // DCT_VAL_CATEGORY6
+ {0, 0, 0, 0} // DCT_EOB_TOKEN
};
#include "vp9/common/vp9_default_coef_probs.h"
@@ -303,8 +622,6 @@
vp9_tokens_from_tree(vp9_coef_encodings, vp9_coef_tree);
}
-// #define COEF_COUNT_TESTING
-
#define COEF_COUNT_SAT 24
#define COEF_MAX_UPDATE_FACTOR 112
#define COEF_COUNT_SAT_KEY 24
diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h
index 67b1669..0370b32 100644
--- a/vp9/common/vp9_entropy.h
+++ b/vp9/common/vp9_entropy.h
@@ -17,6 +17,7 @@
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_scan.h"
#include "vp9/common/vp9_treecoder.h"
+#include "vp9/common/vp9_entropymode.h"
#define DIFF_UPDATE_PROB 252
@@ -120,12 +121,16 @@
// This is the index in the scan order beyond which all coefficients for
// 8x8 transform and above are in the top band.
-// For 4x4 blocks the index is less but to keep things common the lookup
-// table for 4x4 is padded out to this index.
+// This macro is currently unused but may be used by certain implementations
#define MAXBAND_INDEX 21
-extern const uint8_t vp9_coefband_trans_8x8plus[MAXBAND_INDEX + 1];
-extern const uint8_t vp9_coefband_trans_4x4[MAXBAND_INDEX + 1];
+extern const uint8_t vp9_coefband_trans_8x8plus[1024];
+extern const uint8_t vp9_coefband_trans_4x4[16];
+
+static const uint8_t *get_band_translate(TX_SIZE tx_size) {
+ return tx_size == TX_4X4 ? vp9_coefband_trans_4x4
+ : vp9_coefband_trans_8x8plus;
+}
// 128 lists of probabilities are stored for the following ONE node probs:
// 1, 3, 5, 7, ..., 253, 255
@@ -137,6 +142,9 @@
#define PIVOT_NODE 2 // which node is pivot
+#define MODEL_NODES (ENTROPY_NODES - UNCONSTRAINED_NODES)
+extern const vp9_prob vp9_pareto8_full[255][MODEL_NODES];
+
typedef vp9_prob vp9_coeff_probs_model[REF_TYPES][COEF_BANDS]
[PREV_COEF_CONTEXTS]
[UNCONSTRAINED_NODES];
@@ -178,23 +186,19 @@
static void get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size,
PLANE_TYPE type, int block_idx,
const int16_t **scan, const int16_t **scan_nb) {
- switch (tx_size) {
- case TX_4X4:
- get_scan_nb_4x4(get_tx_type_4x4(type, xd, block_idx), scan, scan_nb);
- break;
- case TX_8X8:
- get_scan_nb_8x8(get_tx_type_8x8(type, xd), scan, scan_nb);
- break;
- case TX_16X16:
- get_scan_nb_16x16(get_tx_type_16x16(type, xd), scan, scan_nb);
- break;
- case TX_32X32:
- *scan = vp9_default_scan_32x32;
- *scan_nb = vp9_default_scan_32x32_neighbors;
- break;
- default:
- assert(!"Invalid transform size.");
+ const MODE_INFO *const mi = xd->mi_8x8[0];
+ const MB_MODE_INFO *const mbmi = &mi->mbmi;
+ const scan_order *so;
+
+ if (is_inter_block(mbmi) || type != PLANE_TYPE_Y_WITH_DC || xd->lossless) {
+ so = &inter_scan_orders[tx_size];
+ } else {
+ const MB_PREDICTION_MODE mode =
+ mbmi->sb_type < BLOCK_8X8 ? mi->bmi[block_idx].as_mode : mbmi->mode;
+ so = &intra_scan_orders[tx_size][mode];
}
+ *scan = so->scan;
+ *scan_nb = so->neighbors;
}
#endif // VP9_COMMON_VP9_ENTROPY_H_
diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c
index ea8683e..149362a 100644
--- a/vp9/common/vp9_idct.c
+++ b/vp9/common/vp9_idct.c
@@ -835,7 +835,8 @@
ht.cols(temp_in, temp_out);
for (j = 0; j < 16; ++j)
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
- + dest[j * stride + i]); }
+ + dest[j * stride + i]);
+ }
}
void vp9_idct16x16_10_add_c(const int16_t *input, uint8_t *dest, int stride) {
@@ -1276,7 +1277,7 @@
idct32_1d(temp_in, temp_out);
for (j = 0; j < 32; ++j)
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
- + dest[j * stride + i]);
+ + dest[j * stride + i]);
}
}
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index ff504a1..8e13afb 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -283,10 +283,10 @@
// n_shift is the a multiplier for lf_deltas
// the multiplier is 1 for when filter_lvl is between 0 and 31;
// 2 when filter_lvl is between 32 and 63
- const int n_shift = default_filt_lvl >> 5;
+ const int scale = 1 << (default_filt_lvl >> 5);
loop_filter_info_n *const lfi = &cm->lf_info;
struct loopfilter *const lf = &cm->lf;
- struct segmentation *const seg = &cm->seg;
+ const struct segmentation *const seg = &cm->seg;
// update limits if sharpness has changed
if (lf->last_sharpness_level != lf->sharpness_level) {
@@ -295,9 +295,7 @@
}
for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
- int lvl_seg = default_filt_lvl, ref, mode, intra_lvl;
-
- // Set the baseline filter values for each segment
+ int lvl_seg = default_filt_lvl;
if (vp9_segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
const int data = vp9_get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
lvl_seg = seg->abs_delta == SEGMENT_ABSDATA
@@ -309,31 +307,22 @@
// we could get rid of this if we assume that deltas are set to
// zero when not in use; encoder always uses deltas
vpx_memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id]));
- continue;
- }
+ } else {
+ int ref, mode;
+ const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
+ lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER);
- intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * (1 << n_shift);
- lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER);
-
- for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref)
- for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
- const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * (1 << n_shift)
- + lf->mode_deltas[mode] * (1 << n_shift);
- lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER);
+ for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) {
+ for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
+ const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale
+ + lf->mode_deltas[mode] * scale;
+ lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER);
+ }
}
+ }
}
}
-static uint8_t build_lfi(const loop_filter_info_n *lfi_n,
- const MB_MODE_INFO *mbmi) {
- const int seg = mbmi->segment_id;
- const int ref = mbmi->ref_frame[0];
- const int mode = lfi_n->mode_lf_lut[mbmi->mode];
- const int filter_level = lfi_n->lvl[seg][ref][mode];
-
- return filter_level;
-}
-
static void filter_selectively_vert(uint8_t *s, int pitch,
unsigned int mask_16x16,
unsigned int mask_8x8,
@@ -351,19 +340,12 @@
if (mask_16x16 & 1) {
vp9_mb_lpf_vertical_edge_w(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr);
- assert(!(mask_8x8 & 1));
- assert(!(mask_4x4 & 1));
- assert(!(mask_4x4_int & 1));
} else if (mask_8x8 & 1) {
vp9_mbloop_filter_vertical_edge(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
- assert(!(mask_16x16 & 1));
- assert(!(mask_4x4 & 1));
} else if (mask_4x4 & 1) {
vp9_loop_filter_vertical_edge(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
- assert(!(mask_16x16 & 1));
- assert(!(mask_8x8 & 1));
}
}
if (mask_4x4_int & 1)
@@ -403,27 +385,21 @@
vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
}
- assert(!(mask_8x8 & 1));
- assert(!(mask_4x4 & 1));
- assert(!(mask_4x4_int & 1));
} else if (mask_8x8 & 1) {
if ((mask_8x8 & 3) == 3) {
// Next block's thresholds
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
- // TODO(yunqingwang): Combine next 2 calls as 1 wide filtering.
- vp9_mbloop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
- vp9_mbloop_filter_horizontal_edge(s + 8, pitch, lfin->mblim,
- lfin->lim, lfin->hev_thr, 1);
+ vp9_mbloop_filter_horizontal_edge_16(s, pitch, lfi->mblim,
+ lfi->lim, lfi->hev_thr,
+ lfin->mblim, lfin->lim,
+ lfin->hev_thr);
if ((mask_4x4_int & 3) == 3) {
- // TODO(yunqingwang): Combine next 2 calls as 1 wide filtering.
- vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1);
- vp9_loop_filter_horizontal_edge(s + 8 + 4 * pitch, pitch,
- lfin->mblim, lfin->lim,
- lfin->hev_thr, 1);
+ vp9_loop_filter_horizontal_edge_16(s + 4 * pitch, pitch, lfi->mblim,
+ lfi->lim, lfi->hev_thr,
+ lfin->mblim, lfin->lim,
+ lfin->hev_thr);
} else {
if (mask_4x4_int & 1)
vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
@@ -442,26 +418,20 @@
vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, 1);
}
- assert(!(mask_16x16 & 1));
- assert(!(mask_4x4 & 1));
} else if (mask_4x4 & 1) {
if ((mask_4x4 & 3) == 3) {
// Next block's thresholds
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
- // TODO(yunqingwang): Combine next 2 calls as 1 wide filtering.
- vp9_loop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
- vp9_loop_filter_horizontal_edge(s + 8, pitch, lfin->mblim, lfin->lim,
- lfin->hev_thr, 1);
-
+ vp9_loop_filter_horizontal_edge_16(s, pitch, lfi->mblim,
+ lfi->lim, lfi->hev_thr,
+ lfin->mblim, lfin->lim,
+ lfin->hev_thr);
if ((mask_4x4_int & 3) == 3) {
- // TODO(yunqingwang): Combine next 2 calls as 1 wide filtering.
- vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1);
- vp9_loop_filter_horizontal_edge(s + 8 + 4 * pitch, pitch,
- lfin->mblim, lfin->lim,
- lfin->hev_thr, 1);
+ vp9_loop_filter_horizontal_edge_16(s + 4 * pitch, pitch, lfi->mblim,
+ lfi->lim, lfi->hev_thr,
+ lfin->mblim, lfin->lim,
+ lfin->hev_thr);
} else {
if (mask_4x4_int & 1)
vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
@@ -480,8 +450,6 @@
vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, 1);
}
- assert(!(mask_16x16 & 1));
- assert(!(mask_8x8 & 1));
} else if (mask_4x4_int & 1) {
vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, 1);
@@ -864,9 +832,37 @@
lfm->left_uv[i] &= 0xeeee;
}
}
+
+ // Assert if we try to apply 2 different loop filters at the same position.
+ assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_8X8]));
+ assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_4X4]));
+ assert(!(lfm->left_y[TX_8X8] & lfm->left_y[TX_4X4]));
+ assert(!(lfm->int_4x4_y & lfm->left_y[TX_16X16]));
+ assert(!(lfm->left_uv[TX_16X16]&lfm->left_uv[TX_8X8]));
+ assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4]));
+ assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4]));
+ assert(!(lfm->int_4x4_uv & lfm->left_uv[TX_16X16]));
+ assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8]));
+ assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4]));
+ assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4]));
+ assert(!(lfm->int_4x4_y & lfm->above_y[TX_16X16]));
+ assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8]));
+ assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4]));
+ assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4]));
+ assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16]));
}
#if CONFIG_NON420
+static uint8_t build_lfi(const loop_filter_info_n *lfi_n,
+ const MB_MODE_INFO *mbmi) {
+ const int seg = mbmi->segment_id;
+ const int ref = mbmi->ref_frame[0];
+ const int mode = lfi_n->mode_lf_lut[mbmi->mode];
+ const int filter_level = lfi_n->lvl[seg][ref][mode];
+
+ return filter_level;
+}
+
static void filter_block_plane_non420(VP9_COMMON *cm,
struct macroblockd_plane *plane,
MODE_INFO **mi_8x8,
diff --git a/vp9/common/vp9_loopfilter_filters.c b/vp9/common/vp9_loopfilter_filters.c
index 2c4bf6c..9edf870 100644
--- a/vp9/common/vp9_loopfilter_filters.c
+++ b/vp9/common/vp9_loopfilter_filters.c
@@ -121,6 +121,34 @@
}
}
+void vp9_loop_filter_horizontal_edge_16_c(uint8_t *s, int p /* pitch */,
+ const uint8_t *blimit0,
+ const uint8_t *limit0,
+ const uint8_t *thresh0,
+ const uint8_t *blimit1,
+ const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ int i, j;
+ const uint8_t *blimit = blimit0;
+ const uint8_t *limit = limit0;
+ const uint8_t *thresh = thresh0;
+
+ for (i = 0; i < 2; ++i) {
+ for (j = 0; j < 8; ++j) {
+ const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
+ const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
+ const int8_t mask = filter_mask(*limit, *blimit,
+ p3, p2, p1, p0, q0, q1, q2, q3);
+ const int8_t hev = hev_mask(*thresh, p1, p0, q0, q1);
+ filter4(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p);
+ ++s;
+ }
+ blimit = blimit1;
+ limit = limit1;
+ thresh = thresh1;
+ }
+}
+
void vp9_loop_filter_vertical_edge_c(uint8_t *s, int pitch,
const uint8_t *blimit,
const uint8_t *limit,
@@ -185,6 +213,37 @@
}
}
+void vp9_mbloop_filter_horizontal_edge_16_c(uint8_t *s, int p /* pitch */,
+ const uint8_t *blimit0,
+ const uint8_t *limit0,
+ const uint8_t *thresh0,
+ const uint8_t *blimit1,
+ const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ int i, j;
+ const uint8_t *blimit = blimit0;
+ const uint8_t *limit = limit0;
+ const uint8_t *thresh = thresh0;
+
+ for (i = 0; i < 2; ++i) {
+ for (j = 0; j < 8; ++j) {
+ const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
+ const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
+
+ const int8_t mask = filter_mask(*limit, *blimit,
+ p3, p2, p1, p0, q0, q1, q2, q3);
+ const int8_t hev = hev_mask(*thresh, p1, p0, q0, q1);
+ const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
+ filter8(mask, hev, flat, s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p,
+ s, s + 1 * p, s + 2 * p, s + 3 * p);
+ ++s;
+ }
+ blimit = blimit1;
+ limit = limit1;
+ thresh = thresh1;
+ }
+}
+
void vp9_mbloop_filter_vertical_edge_c(uint8_t *s, int pitch,
const uint8_t *blimit,
const uint8_t *limit,
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index a2af57a..fb959cb 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -38,6 +38,11 @@
#define NUM_FRAME_CONTEXTS_LOG2 2
#define NUM_FRAME_CONTEXTS (1 << NUM_FRAME_CONTEXTS_LOG2)
+extern const struct {
+ PARTITION_CONTEXT above;
+ PARTITION_CONTEXT left;
+} partition_context_lookup[BLOCK_SIZES];
+
typedef struct frame_contexts {
vp9_prob y_mode_prob[BLOCK_SIZE_GROUPS][INTRA_MODES - 1];
vp9_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
@@ -298,52 +303,40 @@
static INLINE void update_partition_context(
PARTITION_CONTEXT *above_seg_context,
PARTITION_CONTEXT left_seg_context[8],
- int mi_row, int mi_col,
- BLOCK_SIZE sb_type,
- BLOCK_SIZE sb_size) {
- PARTITION_CONTEXT *above_ctx = above_seg_context + mi_col;
- PARTITION_CONTEXT *left_ctx = left_seg_context + (mi_row & MI_MASK);
+ int mi_row, int mi_col, BLOCK_SIZE subsize, BLOCK_SIZE bsize) {
+ PARTITION_CONTEXT *const above_ctx = above_seg_context + mi_col;
+ PARTITION_CONTEXT *const left_ctx = left_seg_context + (mi_row & MI_MASK);
- const int bsl = b_width_log2(sb_size), bs = (1 << bsl) / 2;
- const int bwl = b_width_log2(sb_type);
- const int bhl = b_height_log2(sb_type);
- const int boffset = b_width_log2(BLOCK_64X64) - bsl;
- const char pcval0 = ~(0xe << boffset);
- const char pcval1 = ~(0xf << boffset);
- const char pcvalue[2] = {pcval0, pcval1};
-
- assert(MAX(bwl, bhl) <= bsl);
+ // num_4x4_blocks_wide_lookup[bsize] / 2
+ const int bs = num_8x8_blocks_wide_lookup[bsize];
// update the partition context at the end notes. set partition bits
// of block sizes larger than the current one to be one, and partition
// bits of smaller block sizes to be zero.
- vpx_memset(above_ctx, pcvalue[bwl == bsl], bs);
- vpx_memset(left_ctx, pcvalue[bhl == bsl], bs);
+ vpx_memset(above_ctx, partition_context_lookup[subsize].above, bs);
+ vpx_memset(left_ctx, partition_context_lookup[subsize].left, bs);
}
static INLINE int partition_plane_context(
const PARTITION_CONTEXT *above_seg_context,
const PARTITION_CONTEXT left_seg_context[8],
- int mi_row, int mi_col,
- BLOCK_SIZE sb_type) {
+ int mi_row, int mi_col, BLOCK_SIZE bsize) {
const PARTITION_CONTEXT *above_ctx = above_seg_context + mi_col;
const PARTITION_CONTEXT *left_ctx = left_seg_context + (mi_row & MI_MASK);
- int bsl = mi_width_log2(sb_type), bs = 1 << bsl;
+ const int bsl = mi_width_log2(bsize);
+ const int bs = 1 << bsl;
int above = 0, left = 0, i;
- int boffset = mi_width_log2(BLOCK_64X64) - bsl;
- assert(mi_width_log2(sb_type) == mi_height_log2(sb_type));
+ assert(mi_width_log2(bsize) == mi_height_log2(bsize));
assert(bsl >= 0);
- assert(boffset >= 0);
- for (i = 0; i < bs; i++)
- above |= (above_ctx[i] & (1 << boffset));
- for (i = 0; i < bs; i++)
- left |= (left_ctx[i] & (1 << boffset));
-
- above = (above > 0);
- left = (left > 0);
+ for (i = 0; i < bs; i++) {
+ above |= above_ctx[i];
+ left |= left_ctx[i];
+ }
+ above = (above & bs) > 0;
+ left = (left & bs) > 0;
return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
}
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index 1c96788..3add81b 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -20,25 +20,6 @@
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
-void vp9_setup_interp_filters(MACROBLOCKD *xd,
- INTERPOLATION_TYPE mcomp_filter_type,
- VP9_COMMON *cm) {
- if (xd->mi_8x8 && xd->mi_8x8[0]) {
- MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
-
- set_scale_factors(xd, mbmi->ref_frame[0] - LAST_FRAME,
- mbmi->ref_frame[1] - LAST_FRAME,
- cm->active_ref_scale);
- } else {
- set_scale_factors(xd, -1, -1, cm->active_ref_scale);
- }
-
- xd->subpix.filter_x = xd->subpix.filter_y =
- vp9_get_filter_kernel(mcomp_filter_type == SWITCHABLE ?
- EIGHTTAP : mcomp_filter_type);
-
- assert(((intptr_t)xd->subpix.filter_x & 0xff) == 0);
-}
static void inter_predictor(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
@@ -117,16 +98,13 @@
return clamped_mv;
}
-struct build_inter_predictors_args {
- MACROBLOCKD *xd;
- int x, y;
-};
-static void build_inter_predictors(int plane, int block, BLOCK_SIZE bsize,
- int pred_w, int pred_h,
- void *argv) {
- const struct build_inter_predictors_args* const arg = argv;
- MACROBLOCKD *const xd = arg->xd;
+// TODO(jkoleszar): In principle, pred_w, pred_h are unnecessary, as we could
+// calculate the subsampled BLOCK_SIZE, but that type isn't defined for
+// sizes smaller than 16x16 yet.
+static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
+ BLOCK_SIZE bsize, int pred_w, int pred_h,
+ int mi_x, int mi_y) {
struct macroblockd_plane *const pd = &xd->plane[plane];
const int bwl = b_width_log2(bsize) - pd->subsampling_x;
const int bw = 4 << bwl;
@@ -172,7 +150,7 @@
if (vp9_is_scaled(scale->sfc)) {
pre = pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, scale);
- scale->sfc->set_scaled_offsets(scale, arg->y + y, arg->x + x);
+ scale->sfc->set_scaled_offsets(scale, mi_y + y, mi_x + x);
scaled_mv = scale->sfc->scale_mv(&mv_q4, scale);
xs = scale->sfc->x_step_q4;
ys = scale->sfc->y_step_q4;
@@ -190,40 +168,25 @@
}
}
-// TODO(jkoleszar): In principle, pred_w, pred_h are unnecessary, as we could
-// calculate the subsampled BLOCK_SIZE, but that type isn't defined for
-// sizes smaller than 16x16 yet.
-typedef void (*foreach_predicted_block_visitor)(int plane, int block,
- BLOCK_SIZE bsize,
- int pred_w, int pred_h,
- void *arg);
-static INLINE void foreach_predicted_block_in_plane(
- const MACROBLOCKD* const xd, BLOCK_SIZE bsize, int plane,
- foreach_predicted_block_visitor visit, void *arg) {
- const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
- const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
-
- if (xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8) {
- int i = 0, x, y;
- assert(bsize == BLOCK_8X8);
- for (y = 0; y < 1 << bhl; ++y)
- for (x = 0; x < 1 << bwl; ++x)
- visit(plane, i++, bsize, 0, 0, arg);
- } else {
- visit(plane, 0, bsize, bwl, bhl, arg);
- }
-}
-
static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize,
int mi_row, int mi_col,
int plane_from, int plane_to) {
int plane;
for (plane = plane_from; plane <= plane_to; ++plane) {
- struct build_inter_predictors_args args = {
- xd, mi_col * MI_SIZE, mi_row * MI_SIZE,
- };
- foreach_predicted_block_in_plane(xd, bsize, plane, build_inter_predictors,
- &args);
+ const int mi_x = mi_col * MI_SIZE;
+ const int mi_y = mi_row * MI_SIZE;
+ const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
+ const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
+
+ if (xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8) {
+ int i = 0, x, y;
+ assert(bsize == BLOCK_8X8);
+ for (y = 0; y < 1 << bhl; ++y)
+ for (x = 0; x < 1 << bwl; ++x)
+ build_inter_predictors(xd, plane, i++, bsize, 0, 0, mi_x, mi_y);
+ } else {
+ build_inter_predictors(xd, plane, 0, bsize, bwl, bhl, mi_x, mi_y);
+ }
}
}
diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h
index 2c8a6e4..b328754 100644
--- a/vp9/common/vp9_reconinter.h
+++ b/vp9/common/vp9_reconinter.h
@@ -24,10 +24,6 @@
void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,
BLOCK_SIZE bsize);
-void vp9_setup_interp_filters(MACROBLOCKD *xd,
- INTERPOLATION_TYPE filter,
- VP9_COMMON *cm);
-
void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const MV *mv_q3,
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 2c0864e..e18e757 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -205,9 +205,15 @@
prototype void vp9_mbloop_filter_horizontal_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
specialize vp9_mbloop_filter_horizontal_edge sse2 neon dspr2
+prototype void vp9_mbloop_filter_horizontal_edge_16 "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
+specialize vp9_mbloop_filter_horizontal_edge_16 sse2 neon
+
prototype void vp9_loop_filter_horizontal_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
specialize vp9_loop_filter_horizontal_edge mmx neon dspr2
+prototype void vp9_loop_filter_horizontal_edge_16 "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
+specialize vp9_loop_filter_horizontal_edge_16 sse2 neon
+
#
# post proc
#
@@ -296,7 +302,8 @@
specialize vp9_idct32x32_1024_add sse2 neon dspr2
prototype void vp9_idct32x32_34_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct32x32_34_add sse2 dspr2
+specialize vp9_idct32x32_34_add sse2 neon dspr2
+vp9_idct32x32_34_add_neon=vp9_idct32x32_1024_add_neon
prototype void vp9_idct32x32_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct32x32_1_add sse2 neon dspr2
diff --git a/vp9/common/vp9_scan.c b/vp9/common/vp9_scan.c
index f17da91..f62150f 100644
--- a/vp9/common/vp9_scan.c
+++ b/vp9/common/vp9_scan.c
@@ -266,6 +266,62 @@
DECLARE_ALIGNED(16, int16_t, vp9_default_iscan_16x16[256]);
DECLARE_ALIGNED(16, int16_t, vp9_default_iscan_32x32[1024]);
+const scan_order inter_scan_orders[TX_SIZES] = {
+ {vp9_default_scan_4x4, vp9_default_scan_4x4_neighbors}, // NEWMV
+ {vp9_default_scan_8x8, vp9_default_scan_8x8_neighbors}, // NEWMV
+ {vp9_default_scan_16x16, vp9_default_scan_16x16_neighbors}, // NEWMV
+ {vp9_default_scan_32x32, vp9_default_scan_32x32_neighbors}, // NEWMV
+};
+
+const scan_order intra_scan_orders[TX_SIZES][INTRA_MODES] = {
+ { // 4X4
+ {vp9_default_scan_4x4, vp9_default_scan_4x4_neighbors}, // DC
+ {vp9_row_scan_4x4, vp9_row_scan_4x4_neighbors}, // V
+ {vp9_col_scan_4x4, vp9_col_scan_4x4_neighbors}, // H
+ {vp9_default_scan_4x4, vp9_default_scan_4x4_neighbors}, // D45
+ {vp9_default_scan_4x4, vp9_default_scan_4x4_neighbors}, // D135
+ {vp9_row_scan_4x4, vp9_row_scan_4x4_neighbors}, // D117
+ {vp9_col_scan_4x4, vp9_col_scan_4x4_neighbors}, // D153
+ {vp9_col_scan_4x4, vp9_col_scan_4x4_neighbors}, // D207
+ {vp9_row_scan_4x4, vp9_row_scan_4x4_neighbors}, // D63
+ {vp9_default_scan_4x4, vp9_default_scan_4x4_neighbors}, // TM
+ }, { // 8x8
+ {vp9_default_scan_8x8, vp9_default_scan_8x8_neighbors}, // DC
+ {vp9_row_scan_8x8, vp9_row_scan_8x8_neighbors}, // V
+ {vp9_col_scan_8x8, vp9_col_scan_8x8_neighbors}, // H
+ {vp9_default_scan_8x8, vp9_default_scan_8x8_neighbors}, // D45
+ {vp9_default_scan_8x8, vp9_default_scan_8x8_neighbors}, // D135
+ {vp9_row_scan_8x8, vp9_row_scan_8x8_neighbors}, // D117
+ {vp9_col_scan_8x8, vp9_col_scan_8x8_neighbors}, // D153
+ {vp9_col_scan_8x8, vp9_col_scan_8x8_neighbors}, // D207
+ {vp9_row_scan_8x8, vp9_row_scan_8x8_neighbors}, // D63
+ {vp9_default_scan_8x8, vp9_default_scan_8x8_neighbors}, // TM
+ }, { // 16x16
+ {vp9_default_scan_16x16, vp9_default_scan_16x16_neighbors}, // DC
+ {vp9_row_scan_16x16, vp9_row_scan_16x16_neighbors}, // V
+ {vp9_col_scan_16x16, vp9_col_scan_16x16_neighbors}, // H
+ {vp9_default_scan_16x16, vp9_default_scan_16x16_neighbors}, // D45
+ {vp9_default_scan_16x16, vp9_default_scan_16x16_neighbors}, // D135
+ {vp9_row_scan_16x16, vp9_row_scan_16x16_neighbors}, // D117
+ {vp9_col_scan_16x16, vp9_col_scan_16x16_neighbors}, // D153
+ {vp9_col_scan_16x16, vp9_col_scan_16x16_neighbors}, // D207
+ {vp9_row_scan_16x16, vp9_row_scan_16x16_neighbors}, // D63
+ {vp9_default_scan_16x16, vp9_default_scan_16x16_neighbors}, // TM
+ }, { // 32x32
+ {vp9_default_scan_32x32, vp9_default_scan_32x32_neighbors}, // DC
+ {vp9_default_scan_32x32, vp9_default_scan_32x32_neighbors}, // V
+ {vp9_default_scan_32x32, vp9_default_scan_32x32_neighbors}, // H
+ {vp9_default_scan_32x32, vp9_default_scan_32x32_neighbors}, // D45
+ {vp9_default_scan_32x32, vp9_default_scan_32x32_neighbors}, // D135
+ {vp9_default_scan_32x32, vp9_default_scan_32x32_neighbors}, // D117
+ {vp9_default_scan_32x32, vp9_default_scan_32x32_neighbors}, // D153
+ {vp9_default_scan_32x32, vp9_default_scan_32x32_neighbors}, // D207
+ {vp9_default_scan_32x32, vp9_default_scan_32x32_neighbors}, // D63
+ {vp9_default_scan_32x32, vp9_default_scan_32x32_neighbors}, // TM
+ }
+};
+
+
static int find_in_scan(const int16_t *scan, int l, int idx) {
int n, l2 = l * l;
for (n = 0; n < l2; n++) {
diff --git a/vp9/common/vp9_scan.h b/vp9/common/vp9_scan.h
index 14a1a7e..98fc607 100644
--- a/vp9/common/vp9_scan.h
+++ b/vp9/common/vp9_scan.h
@@ -15,6 +15,7 @@
#include "vpx_ports/mem.h"
#include "vp9/common/vp9_enums.h"
+#include "vp9/common/vp9_blockd.h"
#define MAX_NEIGHBORS 2
@@ -67,9 +68,16 @@
extern DECLARE_ALIGNED(16, int16_t,
vp9_default_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]);
-
void vp9_init_neighbors();
+typedef struct {
+ const int16_t *scan;
+ const int16_t *neighbors;
+} scan_order;
+
+extern const scan_order intra_scan_orders[TX_SIZES][INTRA_MODES];
+extern const scan_order inter_scan_orders[TX_SIZES];
+
static INLINE const int16_t* get_scan_4x4(TX_TYPE tx_type) {
switch (tx_type) {
case ADST_DCT:
diff --git a/vp9/common/vp9_tile_common.c b/vp9/common/vp9_tile_common.c
index e3035d0..78909dd 100644
--- a/vp9/common/vp9_tile_common.c
+++ b/vp9/common/vp9_tile_common.c
@@ -15,46 +15,37 @@
#define MIN_TILE_WIDTH_B64 4
#define MAX_TILE_WIDTH_B64 64
-static int to_sbs(n_mis) {
- return mi_cols_aligned_to_sb(n_mis) >> MI_BLOCK_SIZE_LOG2;
+static int get_tile_offset(int idx, int mis, int log2) {
+ const int sb_cols = mi_cols_aligned_to_sb(mis) >> MI_BLOCK_SIZE_LOG2;
+ const int offset = ((idx * sb_cols) >> log2) << MI_BLOCK_SIZE_LOG2;
+ return MIN(offset, mis);
}
-static void get_tile_offsets(int *min_tile_off, int *max_tile_off,
- int tile_idx, int log2_n_tiles, int n_mis) {
- const int n_sbs = to_sbs(n_mis);
- const int sb_off1 = (tile_idx * n_sbs) >> log2_n_tiles;
- const int sb_off2 = ((tile_idx + 1) * n_sbs) >> log2_n_tiles;
-
- *min_tile_off = MIN(sb_off1 << 3, n_mis);
- *max_tile_off = MIN(sb_off2 << 3, n_mis);
-}
-
-void vp9_tile_init(TileInfo *tile, const VP9_COMMON *cm,
- int row_idx, int col_idx) {
- get_tile_offsets(&tile->mi_row_start, &tile->mi_row_end,
- row_idx, cm->log2_tile_rows, cm->mi_rows);
- get_tile_offsets(&tile->mi_col_start, &tile->mi_col_end,
- col_idx, cm->log2_tile_cols, cm->mi_cols);
+void vp9_tile_init(TileInfo *tile, const VP9_COMMON *cm, int row, int col) {
+ tile->mi_row_start = get_tile_offset(row, cm->mi_rows, cm->log2_tile_rows);
+ tile->mi_row_end = get_tile_offset(row + 1, cm->mi_rows, cm->log2_tile_rows);
+ tile->mi_col_start = get_tile_offset(col, cm->mi_cols, cm->log2_tile_cols);
+ tile->mi_col_end = get_tile_offset(col + 1, cm->mi_cols, cm->log2_tile_cols);
}
void vp9_get_tile_n_bits(int mi_cols,
int *min_log2_tile_cols, int *max_log2_tile_cols) {
- const int sb_cols = to_sbs(mi_cols);
- int min_log2_n_tiles, max_log2_n_tiles;
+ const int sb_cols = mi_cols_aligned_to_sb(mi_cols) >> MI_BLOCK_SIZE_LOG2;
+ int min_log2 = 0, max_log2 = 0;
- for (max_log2_n_tiles = 0;
- (sb_cols >> max_log2_n_tiles) >= MIN_TILE_WIDTH_B64;
- max_log2_n_tiles++) {}
- max_log2_n_tiles--;
- if (max_log2_n_tiles < 0)
- max_log2_n_tiles = 0;
+ // max
+ while ((sb_cols >> max_log2) >= MIN_TILE_WIDTH_B64)
+ ++max_log2;
+ --max_log2;
+ if (max_log2 < 0)
+ max_log2 = 0;
- for (min_log2_n_tiles = 0;
- (MAX_TILE_WIDTH_B64 << min_log2_n_tiles) < sb_cols;
- min_log2_n_tiles++) {}
+ // min
+ while ((MAX_TILE_WIDTH_B64 << min_log2) < sb_cols)
+ ++min_log2;
- assert(min_log2_n_tiles <= max_log2_n_tiles);
+ assert(min_log2 <= max_log2);
- *min_log2_tile_cols = min_log2_n_tiles;
- *max_log2_tile_cols = max_log2_n_tiles;
+ *min_log2_tile_cols = min_log2;
+ *max_log2_tile_cols = max_log2;
}
diff --git a/vp9/common/vp9_tile_common.h b/vp9/common/vp9_tile_common.h
index a110abb..a09876e 100644
--- a/vp9/common/vp9_tile_common.h
+++ b/vp9/common/vp9_tile_common.h
@@ -18,10 +18,10 @@
int mi_col_start, mi_col_end;
} TileInfo;
-// initializes 'tile->mi_(row|col)_(start|end)' for (row_idx, col_idx) based on
+// initializes 'tile->mi_(row|col)_(start|end)' for (row, col) based on
// 'cm->log2_tile_(rows|cols)' & 'cm->mi_(rows|cols)'
void vp9_tile_init(TileInfo *tile, const struct VP9Common *cm,
- int row_idx, int col_idx);
+ int row, int col);
void vp9_get_tile_n_bits(int mi_cols,
int *min_log2_tile_cols, int *max_log2_tile_cols);
diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c
index 2a33844..c65184f 100644
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -174,12 +174,10 @@
static INLINE void transpose_4x4(__m128i *res) {
const __m128i tr0_0 = _mm_unpacklo_epi16(res[0], res[1]);
- const __m128i tr0_1 = _mm_unpacklo_epi16(res[2], res[3]);
- res[0] = _mm_unpacklo_epi32(tr0_0, tr0_1);
- res[2] = _mm_unpackhi_epi32(tr0_0, tr0_1);
+ const __m128i tr0_1 = _mm_unpackhi_epi16(res[0], res[1]);
- res[1] = _mm_unpackhi_epi64(res[0], res[0]);
- res[3] = _mm_unpackhi_epi64(res[2], res[2]);
+ res[0] = _mm_unpacklo_epi16(tr0_0, tr0_1);
+ res[1] = _mm_unpackhi_epi16(tr0_0, tr0_1);
}
static void idct4_1d_sse2(__m128i *in) {
@@ -192,8 +190,8 @@
transpose_4x4(in);
// stage 1
- u[0] = _mm_unpacklo_epi16(in[0], in[2]);
- u[1] = _mm_unpacklo_epi16(in[1], in[3]);
+ u[0] = _mm_unpacklo_epi16(in[0], in[1]);
+ u[1] = _mm_unpackhi_epi16(in[0], in[1]);
v[0] = _mm_madd_epi16(u[0], k__cospi_p16_p16);
v[1] = _mm_madd_epi16(u[0], k__cospi_p16_m16);
v[2] = _mm_madd_epi16(u[1], k__cospi_p24_m08);
@@ -209,16 +207,13 @@
v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
- u[0] = _mm_packs_epi32(v[0], v[2]);
- u[1] = _mm_packs_epi32(v[1], v[3]);
- u[2] = _mm_unpackhi_epi64(u[0], u[0]);
- u[3] = _mm_unpackhi_epi64(u[1], u[1]);
+ u[0] = _mm_packs_epi32(v[0], v[1]);
+ u[1] = _mm_packs_epi32(v[3], v[2]);
// stage 2
- in[0] = _mm_add_epi16(u[0], u[3]);
- in[1] = _mm_add_epi16(u[1], u[2]);
- in[2] = _mm_sub_epi16(u[1], u[2]);
- in[3] = _mm_sub_epi16(u[0], u[3]);
+ in[0] = _mm_add_epi16(u[0], u[1]);
+ in[1] = _mm_sub_epi16(u[0], u[1]);
+ in[1] = _mm_shuffle_epi32(in[1], 0x4E);
}
static void iadst4_1d_sse2(__m128i *in) {
@@ -232,13 +227,14 @@
__m128i u[8], v[8], in7;
transpose_4x4(in);
- in7 = _mm_add_epi16(in[0], in[3]);
- in7 = _mm_sub_epi16(in7, in[2]);
+ in7 = _mm_srli_si128(in[1], 8);
+ in7 = _mm_add_epi16(in7, in[0]);
+ in7 = _mm_sub_epi16(in7, in[1]);
- u[0] = _mm_unpacklo_epi16(in[0], in[2]);
- u[1] = _mm_unpacklo_epi16(in[1], in[3]);
+ u[0] = _mm_unpacklo_epi16(in[0], in[1]);
+ u[1] = _mm_unpackhi_epi16(in[0], in[1]);
u[2] = _mm_unpacklo_epi16(in7, kZero);
- u[3] = _mm_unpacklo_epi16(in[1], kZero);
+ u[3] = _mm_unpackhi_epi16(in[0], kZero);
v[0] = _mm_madd_epi16(u[0], k__sinpi_p01_p04); // s0 + s3
v[1] = _mm_madd_epi16(u[1], k__sinpi_p03_p02); // s2 + s5
@@ -265,22 +261,18 @@
u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
- in[0] = _mm_packs_epi32(u[0], u[2]);
- in[1] = _mm_packs_epi32(u[1], u[3]);
- in[2] = _mm_unpackhi_epi64(in[0], in[0]);
- in[3] = _mm_unpackhi_epi64(in[1], in[1]);
+ in[0] = _mm_packs_epi32(u[0], u[1]);
+ in[1] = _mm_packs_epi32(u[2], u[3]);
}
void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride,
int tx_type) {
- __m128i in[4];
+ __m128i in[2];
const __m128i zero = _mm_setzero_si128();
const __m128i eight = _mm_set1_epi16(8);
- in[0] = _mm_loadl_epi64((const __m128i *)input);
- in[1] = _mm_loadl_epi64((const __m128i *)(input + 4));
- in[2] = _mm_loadl_epi64((const __m128i *)(input + 8));
- in[3] = _mm_loadl_epi64((const __m128i *)(input + 12));
+ in[0]= _mm_loadu_si128((const __m128i *)(input));
+ in[1]= _mm_loadu_si128((const __m128i *)(input + 8));
switch (tx_type) {
case 0: // DCT_DCT
@@ -307,18 +299,35 @@
// Final round and shift
in[0] = _mm_add_epi16(in[0], eight);
in[1] = _mm_add_epi16(in[1], eight);
- in[2] = _mm_add_epi16(in[2], eight);
- in[3] = _mm_add_epi16(in[3], eight);
in[0] = _mm_srai_epi16(in[0], 4);
in[1] = _mm_srai_epi16(in[1], 4);
- in[2] = _mm_srai_epi16(in[2], 4);
- in[3] = _mm_srai_epi16(in[3], 4);
- RECON_AND_STORE4X4(dest, in[0]);
- RECON_AND_STORE4X4(dest, in[1]);
- RECON_AND_STORE4X4(dest, in[2]);
- RECON_AND_STORE4X4(dest, in[3]);
+ // Reconstruction and Store
+ {
+ __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest));
+ __m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2));
+ d0 = _mm_unpacklo_epi32(d0,
+ _mm_cvtsi32_si128(*(const int *) (dest + stride)));
+ d2 = _mm_unpacklo_epi32(d2, _mm_cvtsi32_si128(
+ *(const int *) (dest + stride * 3)));
+ d0 = _mm_unpacklo_epi8(d0, zero);
+ d2 = _mm_unpacklo_epi8(d2, zero);
+ d0 = _mm_add_epi16(d0, in[0]);
+ d2 = _mm_add_epi16(d2, in[1]);
+ d0 = _mm_packus_epi16(d0, d2);
+ // store result[0]
+ *(int *)dest = _mm_cvtsi128_si32(d0);
+ // store result[1]
+ d0 = _mm_srli_si128(d0, 4);
+ *(int *)(dest + stride) = _mm_cvtsi128_si32(d0);
+ // store result[2]
+ d0 = _mm_srli_si128(d0, 4);
+ *(int *)(dest + stride * 2) = _mm_cvtsi128_si32(d0);
+ // store result[3]
+ d0 = _mm_srli_si128(d0, 4);
+ *(int *)(dest + stride * 3) = _mm_cvtsi128_si32(d0);
+ }
}
#define TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, \
diff --git a/vp9/common/x86/vp9_loopfilter_intrin_sse2.c b/vp9/common/x86/vp9_loopfilter_intrin_sse2.c
index fa4dd9b..925f74d 100644
--- a/vp9/common/x86/vp9_loopfilter_intrin_sse2.c
+++ b/vp9/common/x86/vp9_loopfilter_intrin_sse2.c
@@ -17,20 +17,14 @@
const unsigned char *_blimit,
const unsigned char *_limit,
const unsigned char *_thresh) {
- __m128i mask, hev, flat, flat2;
const __m128i zero = _mm_set1_epi16(0);
const __m128i one = _mm_set1_epi8(1);
+ const __m128i blimit = _mm_load_si128((const __m128i *)_blimit);
+ const __m128i limit = _mm_load_si128((const __m128i *)_limit);
+ const __m128i thresh = _mm_load_si128((const __m128i *)_thresh);
+ __m128i mask, hev, flat, flat2;
__m128i q7p7, q6p6, q5p5, q4p4, q3p3, q2p2, q1p1, q0p0, p0q0, p1q1;
__m128i abs_p1p0;
- const unsigned int extended_thresh = _thresh[0] * 0x01010101u;
- const unsigned int extended_limit = _limit[0] * 0x01010101u;
- const unsigned int extended_blimit = _blimit[0] * 0x01010101u;
- const __m128i thresh =
- _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_thresh), 0);
- const __m128i limit =
- _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_limit), 0);
- const __m128i blimit =
- _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_blimit), 0);
q4p4 = _mm_loadl_epi64((__m128i *)(s - 5 * p));
q4p4 = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(q4p4),
@@ -375,32 +369,25 @@
const unsigned char *_blimit,
const unsigned char *_limit,
const unsigned char *_thresh) {
- DECLARE_ALIGNED(16, unsigned char, flat2_op[7][16]);
- DECLARE_ALIGNED(16, unsigned char, flat2_oq[7][16]);
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, flat2_op, 7 * 16);
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, flat2_oq, 7 * 16);
- DECLARE_ALIGNED(16, unsigned char, flat_op[3][16]);
- DECLARE_ALIGNED(16, unsigned char, flat_oq[3][16]);
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op, 3 * 16);
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_oq, 3 * 16);
- DECLARE_ALIGNED(16, unsigned char, ap[8][16]);
- DECLARE_ALIGNED(16, unsigned char, aq[8][16]);
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, ap, 8 * 16);
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, aq, 8 * 16);
-
- __m128i mask, hev, flat, flat2;
const __m128i zero = _mm_set1_epi16(0);
const __m128i one = _mm_set1_epi8(1);
+ const __m128i blimit = _mm_load_si128((const __m128i *)_blimit);
+ const __m128i limit = _mm_load_si128((const __m128i *)_limit);
+ const __m128i thresh = _mm_load_si128((const __m128i *)_thresh);
+ __m128i mask, hev, flat, flat2;
__m128i p7, p6, p5;
__m128i p4, p3, p2, p1, p0, q0, q1, q2, q3, q4;
__m128i q5, q6, q7;
int i = 0;
- const unsigned int extended_thresh = _thresh[0] * 0x01010101u;
- const unsigned int extended_limit = _limit[0] * 0x01010101u;
- const unsigned int extended_blimit = _blimit[0] * 0x01010101u;
- const __m128i thresh =
- _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_thresh), 0);
- const __m128i limit =
- _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_limit), 0);
- const __m128i blimit =
- _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_blimit), 0);
p4 = _mm_loadu_si128((__m128i *)(s - 5 * p));
p3 = _mm_loadu_si128((__m128i *)(s - 4 * p));
@@ -413,16 +400,16 @@
q3 = _mm_loadu_si128((__m128i *)(s + 3 * p));
q4 = _mm_loadu_si128((__m128i *)(s + 4 * p));
- _mm_store_si128((__m128i *)ap[4], p4);
- _mm_store_si128((__m128i *)ap[3], p3);
- _mm_store_si128((__m128i *)ap[2], p2);
- _mm_store_si128((__m128i *)ap[1], p1);
- _mm_store_si128((__m128i *)ap[0], p0);
- _mm_store_si128((__m128i *)aq[4], q4);
- _mm_store_si128((__m128i *)aq[3], q3);
- _mm_store_si128((__m128i *)aq[2], q2);
- _mm_store_si128((__m128i *)aq[1], q1);
- _mm_store_si128((__m128i *)aq[0], q0);
+ _mm_store_si128((__m128i *)&ap[4 * 16], p4);
+ _mm_store_si128((__m128i *)&ap[3 * 16], p3);
+ _mm_store_si128((__m128i *)&ap[2 * 16], p2);
+ _mm_store_si128((__m128i *)&ap[1 * 16], p1);
+ _mm_store_si128((__m128i *)&ap[0 * 16], p0);
+ _mm_store_si128((__m128i *)&aq[4 * 16], q4);
+ _mm_store_si128((__m128i *)&aq[3 * 16], q3);
+ _mm_store_si128((__m128i *)&aq[2 * 16], q2);
+ _mm_store_si128((__m128i *)&aq[1 * 16], q1);
+ _mm_store_si128((__m128i *)&aq[0 * 16], q0);
{
@@ -546,8 +533,8 @@
_mm_subs_epu8(p0, p5)),
_mm_or_si128(_mm_subs_epu8(q5, q0),
_mm_subs_epu8(q0, q5)));
- _mm_store_si128((__m128i *)ap[5], p5);
- _mm_store_si128((__m128i *)aq[5], q5);
+ _mm_store_si128((__m128i *)&ap[5 * 16], p5);
+ _mm_store_si128((__m128i *)&aq[5 * 16], q5);
flat2 = _mm_max_epu8(work, flat2);
p6 = _mm_loadu_si128((__m128i *)(s - 7 * p));
q6 = _mm_loadu_si128((__m128i *)(s + 6 * p));
@@ -555,8 +542,8 @@
_mm_subs_epu8(p0, p6)),
_mm_or_si128(_mm_subs_epu8(q6, q0),
_mm_subs_epu8(q0, q6)));
- _mm_store_si128((__m128i *)ap[6], p6);
- _mm_store_si128((__m128i *)aq[6], q6);
+ _mm_store_si128((__m128i *)&ap[6 * 16], p6);
+ _mm_store_si128((__m128i *)&aq[6 * 16], q6);
flat2 = _mm_max_epu8(work, flat2);
p7 = _mm_loadu_si128((__m128i *)(s - 8 * p));
@@ -565,8 +552,8 @@
_mm_subs_epu8(p0, p7)),
_mm_or_si128(_mm_subs_epu8(q7, q0),
_mm_subs_epu8(q0, q7)));
- _mm_store_si128((__m128i *)ap[7], p7);
- _mm_store_si128((__m128i *)aq[7], q7);
+ _mm_store_si128((__m128i *)&ap[7 * 16], p7);
+ _mm_store_si128((__m128i *)&aq[7 * 16], q7);
flat2 = _mm_max_epu8(work, flat2);
flat2 = _mm_subs_epu8(flat2, one);
flat2 = _mm_cmpeq_epi8(flat2, zero);
@@ -586,22 +573,38 @@
__m128i a, b, c;
unsigned int off = i * 8;
- p7 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(ap[7] + off)), zero);
- p6 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(ap[6] + off)), zero);
- p5 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(ap[5] + off)), zero);
- p4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(ap[4] + off)), zero);
- p3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(ap[3] + off)), zero);
- p2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(ap[2] + off)), zero);
- p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(ap[1] + off)), zero);
- p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(ap[0] + off)), zero);
- q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(aq[0] + off)), zero);
- q1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(aq[1] + off)), zero);
- q2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(aq[2] + off)), zero);
- q3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(aq[3] + off)), zero);
- q4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(aq[4] + off)), zero);
- q5 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(aq[5] + off)), zero);
- q6 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(aq[6] + off)), zero);
- q7 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(aq[7] + off)), zero);
+ p7 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(&ap[7 * 16] + off)),
+ zero);
+ p6 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(&ap[6 * 16] + off)),
+ zero);
+ p5 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(&ap[5 * 16] + off)),
+ zero);
+ p4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(&ap[4 * 16] + off)),
+ zero);
+ p3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(&ap[3 * 16] + off)),
+ zero);
+ p2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(&ap[2 * 16] + off)),
+ zero);
+ p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(&ap[1 * 16] + off)),
+ zero);
+ p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(&ap[0 * 16] + off)),
+ zero);
+ q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(&aq[0 * 16] + off)),
+ zero);
+ q1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(&aq[1 * 16] + off)),
+ zero);
+ q2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(&aq[2 * 16] + off)),
+ zero);
+ q3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(&aq[3 * 16] + off)),
+ zero);
+ q4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(&aq[4 * 16] + off)),
+ zero);
+ q5 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(&aq[5 * 16] + off)),
+ zero);
+ q6 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(&aq[6 * 16] + off)),
+ zero);
+ q7 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(&aq[7 * 16] + off)),
+ zero);
c = _mm_sub_epi16(_mm_slli_epi16(p7, 3), p7); // p7 * 7
c = _mm_add_epi16(_mm_slli_epi16(p6, 1), _mm_add_epi16(p4, c));
@@ -610,117 +613,117 @@
a = _mm_add_epi16(p3, _mm_add_epi16(p2, p1));
a = _mm_add_epi16(_mm_add_epi16(p0, q0), a);
- _mm_storel_epi64((__m128i *)&flat_op[2][i*8],
+ _mm_storel_epi64((__m128i *)&flat_op[2 * 16 + i * 8],
_mm_packus_epi16(_mm_srli_epi16(_mm_add_epi16(a, b), 3)
, b));
c = _mm_add_epi16(_mm_add_epi16(p5, eight), c);
workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
- _mm_storel_epi64((__m128i *)&flat2_op[6][i*8],
+ _mm_storel_epi64((__m128i *)&flat2_op[6 * 16 + i * 8],
_mm_packus_epi16(workp_shft, workp_shft));
a = _mm_add_epi16(q1, a);
b = _mm_add_epi16(_mm_sub_epi16(b, _mm_add_epi16(p3, p2)), p1);
- _mm_storel_epi64((__m128i *)&flat_op[1][i*8],
+ _mm_storel_epi64((__m128i *)&flat_op[1 * 16 + i * 8],
_mm_packus_epi16(_mm_srli_epi16(_mm_add_epi16(a, b), 3)
, b));
c = _mm_add_epi16(_mm_sub_epi16(c, _mm_add_epi16(p7, p6)), p5);
workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
- _mm_storel_epi64((__m128i *)&flat2_op[5][i*8],
+ _mm_storel_epi64((__m128i *)&flat2_op[5 * 16 + i * 8],
_mm_packus_epi16(workp_shft, workp_shft));
a = _mm_add_epi16(q2, a);
b = _mm_add_epi16(_mm_sub_epi16(b, _mm_add_epi16(p3, p1)), p0);
- _mm_storel_epi64((__m128i *)&flat_op[0][i*8],
+ _mm_storel_epi64((__m128i *)&flat_op[i * 8],
_mm_packus_epi16(_mm_srli_epi16(_mm_add_epi16(a, b), 3)
, b));
c = _mm_add_epi16(_mm_sub_epi16(c, _mm_add_epi16(p7, p5)), p4);
workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
- _mm_storel_epi64((__m128i *)&flat2_op[4][i*8],
+ _mm_storel_epi64((__m128i *)&flat2_op[4 * 16 + i * 8],
_mm_packus_epi16(workp_shft, workp_shft));
a = _mm_add_epi16(q3, a);
b = _mm_add_epi16(_mm_sub_epi16(b, _mm_add_epi16(p3, p0)), q0);
- _mm_storel_epi64((__m128i *)&flat_oq[0][i*8],
+ _mm_storel_epi64((__m128i *)&flat_oq[i * 8],
_mm_packus_epi16(_mm_srli_epi16(_mm_add_epi16(a, b), 3)
, b));
c = _mm_add_epi16(_mm_sub_epi16(c, _mm_add_epi16(p7, p4)), p3);
workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
- _mm_storel_epi64((__m128i *)&flat2_op[3][i*8],
+ _mm_storel_epi64((__m128i *)&flat2_op[3 * 16 + i * 8],
_mm_packus_epi16(workp_shft, workp_shft));
b = _mm_add_epi16(q3, b);
b = _mm_add_epi16(_mm_sub_epi16(b, _mm_add_epi16(p2, q0)), q1);
- _mm_storel_epi64((__m128i *)&flat_oq[1][i*8],
+ _mm_storel_epi64((__m128i *)&flat_oq[16 + i * 8],
_mm_packus_epi16(_mm_srli_epi16(_mm_add_epi16(a, b), 3)
, b));
c = _mm_add_epi16(q4, c);
c = _mm_add_epi16(_mm_sub_epi16(c, _mm_add_epi16(p7, p3)), p2);
workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
- _mm_storel_epi64((__m128i *)&flat2_op[2][i*8],
+ _mm_storel_epi64((__m128i *)&flat2_op[2 * 16 + i * 8],
_mm_packus_epi16(workp_shft, workp_shft));
b = _mm_add_epi16(q3, b);
b = _mm_add_epi16(_mm_sub_epi16(b, _mm_add_epi16(p1, q1)), q2);
- _mm_storel_epi64((__m128i *)&flat_oq[2][i*8],
+ _mm_storel_epi64((__m128i *)&flat_oq[2 * 16 + i * 8],
_mm_packus_epi16(_mm_srli_epi16(_mm_add_epi16(a, b), 3)
, b));
a = _mm_add_epi16(q5, a);
c = _mm_add_epi16(_mm_sub_epi16(c, _mm_add_epi16(p7, p2)), p1);
workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
- _mm_storel_epi64((__m128i *)&flat2_op[1][i*8],
+ _mm_storel_epi64((__m128i *)&flat2_op[16 + i * 8],
_mm_packus_epi16(workp_shft, workp_shft));
a = _mm_add_epi16(q6, a);
c = _mm_add_epi16(_mm_sub_epi16(c, _mm_add_epi16(p7, p1)), p0);
workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
- _mm_storel_epi64((__m128i *)&flat2_op[0][i*8],
+ _mm_storel_epi64((__m128i *)&flat2_op[i * 8],
_mm_packus_epi16(workp_shft, workp_shft));
a = _mm_add_epi16(q7, a);
c = _mm_add_epi16(_mm_sub_epi16(c, _mm_add_epi16(p7, p0)), q0);
workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
- _mm_storel_epi64((__m128i *)&flat2_oq[0][i*8],
+ _mm_storel_epi64((__m128i *)&flat2_oq[i * 8],
_mm_packus_epi16(workp_shft, workp_shft));
a = _mm_add_epi16(q7, a);
c = _mm_add_epi16(_mm_sub_epi16(c, _mm_add_epi16(p6, q0)), q1);
workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
- _mm_storel_epi64((__m128i *)&flat2_oq[1][i*8],
+ _mm_storel_epi64((__m128i *)&flat2_oq[16 + i * 8],
_mm_packus_epi16(workp_shft, workp_shft));
a = _mm_add_epi16(q7, a);
c = _mm_add_epi16(_mm_sub_epi16(c, _mm_add_epi16(p5, q1)), q2);
workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
- _mm_storel_epi64((__m128i *)&flat2_oq[2][i*8],
+ _mm_storel_epi64((__m128i *)&flat2_oq[2 * 16 + i * 8],
_mm_packus_epi16(workp_shft, workp_shft));
a = _mm_add_epi16(q7, a);
c = _mm_add_epi16(_mm_sub_epi16(c, _mm_add_epi16(p4, q2)), q3);
workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
- _mm_storel_epi64((__m128i *)&flat2_oq[3][i*8],
+ _mm_storel_epi64((__m128i *)&flat2_oq[3 * 16 + i * 8],
_mm_packus_epi16(workp_shft, workp_shft));
a = _mm_add_epi16(q7, a);
c = _mm_add_epi16(_mm_sub_epi16(c, _mm_add_epi16(p3, q3)), q4);
workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
- _mm_storel_epi64((__m128i *)&flat2_oq[4][i*8],
+ _mm_storel_epi64((__m128i *)&flat2_oq[4 * 16 + i * 8],
_mm_packus_epi16(workp_shft, workp_shft));
a = _mm_add_epi16(q7, a);
c = _mm_add_epi16(_mm_sub_epi16(c, _mm_add_epi16(p2, q4)), q5);
workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
- _mm_storel_epi64((__m128i *)&flat2_oq[5][i*8],
+ _mm_storel_epi64((__m128i *)&flat2_oq[5 * 16 + i * 8],
_mm_packus_epi16(workp_shft, workp_shft));
a = _mm_add_epi16(q7, a);
c = _mm_add_epi16(_mm_sub_epi16(c, _mm_add_epi16(p1, q5)), q6);
workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
- _mm_storel_epi64((__m128i *)&flat2_oq[6][i*8],
+ _mm_storel_epi64((__m128i *)&flat2_oq[6 * 16 + i * 8],
_mm_packus_epi16(workp_shft, workp_shft));
temp_flat2 = _mm_srli_si128(temp_flat2, 8);
@@ -730,51 +733,51 @@
// wide flat
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- work_a = _mm_load_si128((__m128i *)ap[2]);
- p2 = _mm_load_si128((__m128i *)flat_op[2]);
+ work_a = _mm_load_si128((__m128i *)&ap[2 * 16]);
+ p2 = _mm_load_si128((__m128i *)&flat_op[2 * 16]);
work_a = _mm_andnot_si128(flat, work_a);
p2 = _mm_and_si128(flat, p2);
p2 = _mm_or_si128(work_a, p2);
- _mm_store_si128((__m128i *)flat_op[2], p2);
+ _mm_store_si128((__m128i *)&flat_op[2 * 16], p2);
- p1 = _mm_load_si128((__m128i *)flat_op[1]);
+ p1 = _mm_load_si128((__m128i *)&flat_op[1 * 16]);
work_a = _mm_andnot_si128(flat, ps1);
p1 = _mm_and_si128(flat, p1);
p1 = _mm_or_si128(work_a, p1);
- _mm_store_si128((__m128i *)flat_op[1], p1);
+ _mm_store_si128((__m128i *)&flat_op[1 * 16], p1);
- p0 = _mm_load_si128((__m128i *)flat_op[0]);
+ p0 = _mm_load_si128((__m128i *)&flat_op[0]);
work_a = _mm_andnot_si128(flat, ps0);
p0 = _mm_and_si128(flat, p0);
p0 = _mm_or_si128(work_a, p0);
- _mm_store_si128((__m128i *)flat_op[0], p0);
+ _mm_store_si128((__m128i *)&flat_op[0], p0);
- q0 = _mm_load_si128((__m128i *)flat_oq[0]);
+ q0 = _mm_load_si128((__m128i *)&flat_oq[0]);
work_a = _mm_andnot_si128(flat, qs0);
q0 = _mm_and_si128(flat, q0);
q0 = _mm_or_si128(work_a, q0);
- _mm_store_si128((__m128i *)flat_oq[0], q0);
+ _mm_store_si128((__m128i *)&flat_oq[0], q0);
- q1 = _mm_load_si128((__m128i *)flat_oq[1]);
+ q1 = _mm_load_si128((__m128i *)&flat_oq[1 * 16]);
work_a = _mm_andnot_si128(flat, qs1);
q1 = _mm_and_si128(flat, q1);
q1 = _mm_or_si128(work_a, q1);
- _mm_store_si128((__m128i *)flat_oq[1], q1);
+ _mm_store_si128((__m128i *)&flat_oq[1 * 16], q1);
- work_a = _mm_load_si128((__m128i *)aq[2]);
- q2 = _mm_load_si128((__m128i *)flat_oq[2]);
+ work_a = _mm_load_si128((__m128i *)&aq[2 * 16]);
+ q2 = _mm_load_si128((__m128i *)&flat_oq[2 * 16]);
work_a = _mm_andnot_si128(flat, work_a);
q2 = _mm_and_si128(flat, q2);
q2 = _mm_or_si128(work_a, q2);
- _mm_store_si128((__m128i *)flat_oq[2], q2);
+ _mm_store_si128((__m128i *)&flat_oq[2 * 16], q2);
// write out op6 - op3
{
unsigned char *dst = (s - 7 * p);
for (i = 6; i > 2; i--) {
__m128i flat2_output;
- work_a = _mm_load_si128((__m128i *)ap[i]);
- flat2_output = _mm_load_si128((__m128i *)flat2_op[i]);
+ work_a = _mm_load_si128((__m128i *)&ap[i * 16]);
+ flat2_output = _mm_load_si128((__m128i *)&flat2_op[i * 16]);
work_a = _mm_andnot_si128(flat2, work_a);
flat2_output = _mm_and_si128(flat2, flat2_output);
work_a = _mm_or_si128(work_a, flat2_output);
@@ -783,43 +786,43 @@
}
}
- work_a = _mm_load_si128((__m128i *)flat_op[2]);
- p2 = _mm_load_si128((__m128i *)flat2_op[2]);
+ work_a = _mm_load_si128((__m128i *)&flat_op[2 * 16]);
+ p2 = _mm_load_si128((__m128i *)&flat2_op[2 * 16]);
work_a = _mm_andnot_si128(flat2, work_a);
p2 = _mm_and_si128(flat2, p2);
p2 = _mm_or_si128(work_a, p2);
_mm_storeu_si128((__m128i *)(s - 3 * p), p2);
- work_a = _mm_load_si128((__m128i *)flat_op[1]);
- p1 = _mm_load_si128((__m128i *)flat2_op[1]);
+ work_a = _mm_load_si128((__m128i *)&flat_op[1 * 16]);
+ p1 = _mm_load_si128((__m128i *)&flat2_op[1 * 16]);
work_a = _mm_andnot_si128(flat2, work_a);
p1 = _mm_and_si128(flat2, p1);
p1 = _mm_or_si128(work_a, p1);
_mm_storeu_si128((__m128i *)(s - 2 * p), p1);
- work_a = _mm_load_si128((__m128i *)flat_op[0]);
- p0 = _mm_load_si128((__m128i *)flat2_op[0]);
+ work_a = _mm_load_si128((__m128i *)&flat_op[0]);
+ p0 = _mm_load_si128((__m128i *)&flat2_op[0]);
work_a = _mm_andnot_si128(flat2, work_a);
p0 = _mm_and_si128(flat2, p0);
p0 = _mm_or_si128(work_a, p0);
_mm_storeu_si128((__m128i *)(s - 1 * p), p0);
- work_a = _mm_load_si128((__m128i *)flat_oq[0]);
- q0 = _mm_load_si128((__m128i *)flat2_oq[0]);
+ work_a = _mm_load_si128((__m128i *)&flat_oq[0]);
+ q0 = _mm_load_si128((__m128i *)&flat2_oq[0]);
work_a = _mm_andnot_si128(flat2, work_a);
q0 = _mm_and_si128(flat2, q0);
q0 = _mm_or_si128(work_a, q0);
_mm_storeu_si128((__m128i *)(s - 0 * p), q0);
- work_a = _mm_load_si128((__m128i *)flat_oq[1]);
- q1 = _mm_load_si128((__m128i *)flat2_oq[1]);
+ work_a = _mm_load_si128((__m128i *)&flat_oq[1 * 16]);
+ q1 = _mm_load_si128((__m128i *)&flat2_oq[16]);
work_a = _mm_andnot_si128(flat2, work_a);
q1 = _mm_and_si128(flat2, q1);
q1 = _mm_or_si128(work_a, q1);
_mm_storeu_si128((__m128i *)(s + 1 * p), q1);
- work_a = _mm_load_si128((__m128i *)flat_oq[2]);
- q2 = _mm_load_si128((__m128i *)flat2_oq[2]);
+ work_a = _mm_load_si128((__m128i *)&flat_oq[2 * 16]);
+ q2 = _mm_load_si128((__m128i *)&flat2_oq[2 * 16]);
work_a = _mm_andnot_si128(flat2, work_a);
q2 = _mm_and_si128(flat2, q2);
q2 = _mm_or_si128(work_a, q2);
@@ -830,8 +833,8 @@
unsigned char *dst = (s + 3 * p);
for (i = 3; i < 7; i++) {
__m128i flat2_output;
- work_a = _mm_load_si128((__m128i *)aq[i]);
- flat2_output = _mm_load_si128((__m128i *)flat2_oq[i]);
+ work_a = _mm_load_si128((__m128i *)&aq[i * 16]);
+ flat2_output = _mm_load_si128((__m128i *)&flat2_oq[i * 16]);
work_a = _mm_andnot_si128(flat2, work_a);
flat2_output = _mm_and_si128(flat2, flat2_output);
work_a = _mm_or_si128(work_a, flat2_output);
@@ -842,6 +845,7 @@
}
}
+// TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly.
void vp9_mb_lpf_horizontal_edge_w_sse2(unsigned char *s,
int p,
const unsigned char *_blimit,
@@ -860,47 +864,47 @@
const unsigned char *_limit,
const unsigned char *_thresh,
int count) {
- DECLARE_ALIGNED(16, unsigned char, flat_op2[16]);
- DECLARE_ALIGNED(16, unsigned char, flat_op1[16]);
- DECLARE_ALIGNED(16, unsigned char, flat_op0[16]);
- DECLARE_ALIGNED(16, unsigned char, flat_oq2[16]);
- DECLARE_ALIGNED(16, unsigned char, flat_oq1[16]);
- DECLARE_ALIGNED(16, unsigned char, flat_oq0[16]);
- __m128i mask, hev, flat;
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op2, 16);
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op1, 16);
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op0, 16);
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_oq2, 16);
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_oq1, 16);
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_oq0, 16);
const __m128i zero = _mm_set1_epi16(0);
+ const __m128i blimit = _mm_load_si128((const __m128i *)_blimit);
+ const __m128i limit = _mm_load_si128((const __m128i *)_limit);
+ const __m128i thresh = _mm_load_si128((const __m128i *)_thresh);
+ __m128i mask, hev, flat;
__m128i p3, p2, p1, p0, q0, q1, q2, q3;
- const unsigned int extended_thresh = _thresh[0] * 0x01010101u;
- const unsigned int extended_limit = _limit[0] * 0x01010101u;
- const unsigned int extended_blimit = _blimit[0] * 0x01010101u;
- const __m128i thresh =
- _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_thresh), 0);
- const __m128i limit =
- _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_limit), 0);
- const __m128i blimit =
- _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_blimit), 0);
+ __m128i q3p3, q2p2, q1p1, q0p0, p1q1, p0q0;
(void)count;
- p3 = _mm_loadl_epi64((__m128i *)(s - 4 * p));
- p2 = _mm_loadl_epi64((__m128i *)(s - 3 * p));
- p1 = _mm_loadl_epi64((__m128i *)(s - 2 * p));
- p0 = _mm_loadl_epi64((__m128i *)(s - 1 * p));
- q0 = _mm_loadl_epi64((__m128i *)(s - 0 * p));
- q1 = _mm_loadl_epi64((__m128i *)(s + 1 * p));
- q2 = _mm_loadl_epi64((__m128i *)(s + 2 * p));
- q3 = _mm_loadl_epi64((__m128i *)(s + 3 * p));
+
+ q3p3 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 4 * p)),
+ _mm_loadl_epi64((__m128i *)(s + 3 * p)));
+ q2p2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 3 * p)),
+ _mm_loadl_epi64((__m128i *)(s + 2 * p)));
+ q1p1 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 2 * p)),
+ _mm_loadl_epi64((__m128i *)(s + 1 * p)));
+ q0p0 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 1 * p)),
+ _mm_loadl_epi64((__m128i *)(s - 0 * p)));
+ p1q1 = _mm_shuffle_epi32(q1p1, 78);
+ p0q0 = _mm_shuffle_epi32(q0p0, 78);
+
{
- const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu8(p1, p0),
- _mm_subs_epu8(p0, p1));
- const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0),
- _mm_subs_epu8(q0, q1));
+ // filter_mask and hev_mask
const __m128i one = _mm_set1_epi8(1);
const __m128i fe = _mm_set1_epi8(0xfe);
- const __m128i ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0);
- __m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu8(p0, q0),
- _mm_subs_epu8(q0, p0));
- __m128i abs_p1q1 = _mm_or_si128(_mm_subs_epu8(p1, q1),
- _mm_subs_epu8(q1, p1));
- __m128i work;
+ const __m128i ff = _mm_cmpeq_epi8(fe, fe);
+ __m128i abs_p1q1, abs_p0q0, abs_q1q0, abs_p1p0, work;
+ abs_p1p0 = _mm_or_si128(_mm_subs_epu8(q1p1, q0p0),
+ _mm_subs_epu8(q0p0, q1p1));
+ abs_q1q0 = _mm_srli_si128(abs_p1p0, 8);
+
+ abs_p0q0 = _mm_or_si128(_mm_subs_epu8(q0p0, p0q0),
+ _mm_subs_epu8(p0q0, q0p0));
+ abs_p1q1 = _mm_or_si128(_mm_subs_epu8(q1p1, p1q1),
+ _mm_subs_epu8(p1q1, q1p1));
flat = _mm_max_epu8(abs_p1p0, abs_q1q0);
hev = _mm_subs_epu8(flat, thresh);
hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff);
@@ -910,36 +914,32 @@
mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit);
mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff);
// mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1;
- mask = _mm_max_epu8(flat, mask);
+ mask = _mm_max_epu8(abs_p1p0, mask);
// mask |= (abs(p1 - p0) > limit) * -1;
// mask |= (abs(q1 - q0) > limit) * -1;
- work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p2, p1),
- _mm_subs_epu8(p1, p2)),
- _mm_or_si128(_mm_subs_epu8(p3, p2),
- _mm_subs_epu8(p2, p3)));
+
+ work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(q2p2, q1p1),
+ _mm_subs_epu8(q1p1, q2p2)),
+ _mm_or_si128(_mm_subs_epu8(q3p3, q2p2),
+ _mm_subs_epu8(q2p2, q3p3)));
mask = _mm_max_epu8(work, mask);
- work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(q2, q1),
- _mm_subs_epu8(q1, q2)),
- _mm_or_si128(_mm_subs_epu8(q3, q2),
- _mm_subs_epu8(q2, q3)));
- mask = _mm_max_epu8(work, mask);
+ mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 8));
mask = _mm_subs_epu8(mask, limit);
mask = _mm_cmpeq_epi8(mask, zero);
- work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p2, p0),
- _mm_subs_epu8(p0, p2)),
- _mm_or_si128(_mm_subs_epu8(q2, q0),
- _mm_subs_epu8(q0, q2)));
- flat = _mm_max_epu8(work, flat);
- work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p3, p0),
- _mm_subs_epu8(p0, p3)),
- _mm_or_si128(_mm_subs_epu8(q3, q0),
- _mm_subs_epu8(q0, q3)));
- flat = _mm_max_epu8(work, flat);
+ // flat_mask4
+
+ flat = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(q2p2, q0p0),
+ _mm_subs_epu8(q0p0, q2p2)),
+ _mm_or_si128(_mm_subs_epu8(q3p3, q0p0),
+ _mm_subs_epu8(q0p0, q3p3)));
+ flat = _mm_max_epu8(abs_p1p0, flat);
+ flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8));
flat = _mm_subs_epu8(flat, one);
flat = _mm_cmpeq_epi8(flat, zero);
flat = _mm_and_si128(flat, mask);
}
+
{
const __m128i four = _mm_set1_epi16(4);
unsigned char *src = s;
@@ -996,11 +996,7 @@
const __m128i t4 = _mm_set1_epi8(4);
const __m128i t3 = _mm_set1_epi8(3);
const __m128i t80 = _mm_set1_epi8(0x80);
- const __m128i te0 = _mm_set1_epi8(0xe0);
- const __m128i t1f = _mm_set1_epi8(0x1f);
const __m128i t1 = _mm_set1_epi8(0x1);
- const __m128i t7f = _mm_set1_epi8(0x7f);
-
const __m128i ps1 = _mm_xor_si128(_mm_loadl_epi64((__m128i *)(s - 2 * p)),
t80);
const __m128i ps0 = _mm_xor_si128(_mm_loadl_epi64((__m128i *)(s - 1 * p)),
@@ -1025,26 +1021,20 @@
filter2 = _mm_adds_epi8(filt, t3);
/* Filter1 >> 3 */
- work_a = _mm_cmpgt_epi8(zero, filter1);
- filter1 = _mm_srli_epi16(filter1, 3);
- work_a = _mm_and_si128(work_a, te0);
- filter1 = _mm_and_si128(filter1, t1f);
- filter1 = _mm_or_si128(filter1, work_a);
+ filter1 = _mm_unpacklo_epi8(zero, filter1);
+ filter1 = _mm_srai_epi16(filter1, 11);
+ filter1 = _mm_packs_epi16(filter1, filter1);
/* Filter2 >> 3 */
- work_a = _mm_cmpgt_epi8(zero, filter2);
- filter2 = _mm_srli_epi16(filter2, 3);
- work_a = _mm_and_si128(work_a, te0);
- filter2 = _mm_and_si128(filter2, t1f);
- filter2 = _mm_or_si128(filter2, work_a);
+ filter2 = _mm_unpacklo_epi8(zero, filter2);
+ filter2 = _mm_srai_epi16(filter2, 11);
+ filter2 = _mm_packs_epi16(filter2, zero);
/* filt >> 1 */
filt = _mm_adds_epi8(filter1, t1);
- work_a = _mm_cmpgt_epi8(zero, filt);
- filt = _mm_srli_epi16(filt, 1);
- work_a = _mm_and_si128(work_a, t80);
- filt = _mm_and_si128(filt, t7f);
- filt = _mm_or_si128(filt, work_a);
+ filt = _mm_unpacklo_epi8(zero, filt);
+ filt = _mm_srai_epi16(filt, 9);
+ filt = _mm_packs_epi16(filt, zero);
filt = _mm_andnot_si128(hev, filt);
@@ -1093,6 +1083,392 @@
}
}
+void vp9_mbloop_filter_horizontal_edge_16_sse2(uint8_t *s, int p /* pitch */,
+ const uint8_t *_blimit0,
+ const uint8_t *_limit0,
+ const uint8_t *_thresh0,
+ const uint8_t *_blimit1,
+ const uint8_t *_limit1,
+ const uint8_t *_thresh1) {
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op2, 16);
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op1, 16);
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op0, 16);
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_oq2, 16);
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_oq1, 16);
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_oq0, 16);
+ const __m128i zero = _mm_set1_epi16(0);
+ const __m128i blimit =
+ _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_blimit0),
+ _mm_load_si128((const __m128i *)_blimit1));
+ const __m128i limit =
+ _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_limit0),
+ _mm_load_si128((const __m128i *)_limit1));
+ const __m128i thresh =
+ _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_thresh0),
+ _mm_load_si128((const __m128i *)_thresh1));
+
+ __m128i mask, hev, flat;
+ __m128i p3, p2, p1, p0, q0, q1, q2, q3;
+
+ p3 = _mm_loadu_si128((__m128i *)(s - 4 * p));
+ p2 = _mm_loadu_si128((__m128i *)(s - 3 * p));
+ p1 = _mm_loadu_si128((__m128i *)(s - 2 * p));
+ p0 = _mm_loadu_si128((__m128i *)(s - 1 * p));
+ q0 = _mm_loadu_si128((__m128i *)(s - 0 * p));
+ q1 = _mm_loadu_si128((__m128i *)(s + 1 * p));
+ q2 = _mm_loadu_si128((__m128i *)(s + 2 * p));
+ q3 = _mm_loadu_si128((__m128i *)(s + 3 * p));
+ {
+ const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu8(p1, p0),
+ _mm_subs_epu8(p0, p1));
+ const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0),
+ _mm_subs_epu8(q0, q1));
+ const __m128i one = _mm_set1_epi8(1);
+ const __m128i fe = _mm_set1_epi8(0xfe);
+ const __m128i ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0);
+ __m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu8(p0, q0),
+ _mm_subs_epu8(q0, p0));
+ __m128i abs_p1q1 = _mm_or_si128(_mm_subs_epu8(p1, q1),
+ _mm_subs_epu8(q1, p1));
+ __m128i work;
+
+ // filter_mask and hev_mask
+ flat = _mm_max_epu8(abs_p1p0, abs_q1q0);
+ hev = _mm_subs_epu8(flat, thresh);
+ hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff);
+
+ abs_p0q0 =_mm_adds_epu8(abs_p0q0, abs_p0q0);
+ abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1);
+ mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit);
+ mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff);
+ // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1;
+ mask = _mm_max_epu8(flat, mask);
+ // mask |= (abs(p1 - p0) > limit) * -1;
+ // mask |= (abs(q1 - q0) > limit) * -1;
+ work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p2, p1),
+ _mm_subs_epu8(p1, p2)),
+ _mm_or_si128(_mm_subs_epu8(p3, p2),
+ _mm_subs_epu8(p2, p3)));
+ mask = _mm_max_epu8(work, mask);
+ work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(q2, q1),
+ _mm_subs_epu8(q1, q2)),
+ _mm_or_si128(_mm_subs_epu8(q3, q2),
+ _mm_subs_epu8(q2, q3)));
+ mask = _mm_max_epu8(work, mask);
+ mask = _mm_subs_epu8(mask, limit);
+ mask = _mm_cmpeq_epi8(mask, zero);
+
+ // flat_mask4
+ work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p2, p0),
+ _mm_subs_epu8(p0, p2)),
+ _mm_or_si128(_mm_subs_epu8(q2, q0),
+ _mm_subs_epu8(q0, q2)));
+ flat = _mm_max_epu8(work, flat);
+ work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p3, p0),
+ _mm_subs_epu8(p0, p3)),
+ _mm_or_si128(_mm_subs_epu8(q3, q0),
+ _mm_subs_epu8(q0, q3)));
+ flat = _mm_max_epu8(work, flat);
+ flat = _mm_subs_epu8(flat, one);
+ flat = _mm_cmpeq_epi8(flat, zero);
+ flat = _mm_and_si128(flat, mask);
+ }
+ {
+ const __m128i four = _mm_set1_epi16(4);
+ unsigned char *src = s;
+ int i = 0;
+
+ do {
+ __m128i workp_a, workp_b, workp_shft;
+ p3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 4 * p)), zero);
+ p2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 3 * p)), zero);
+ p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 2 * p)), zero);
+ p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 1 * p)), zero);
+ q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 0 * p)), zero);
+ q1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 1 * p)), zero);
+ q2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 2 * p)), zero);
+ q3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 3 * p)), zero);
+
+ workp_a = _mm_add_epi16(_mm_add_epi16(p3, p3), _mm_add_epi16(p2, p1));
+ workp_a = _mm_add_epi16(_mm_add_epi16(workp_a, four), p0);
+ workp_b = _mm_add_epi16(_mm_add_epi16(q0, p2), p3);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
+ _mm_storel_epi64((__m128i *)&flat_op2[i * 8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ workp_b = _mm_add_epi16(_mm_add_epi16(q0, q1), p1);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
+ _mm_storel_epi64((__m128i *)&flat_op1[i * 8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q2);
+ workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p1), p0);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
+ _mm_storel_epi64((__m128i *)&flat_op0[i * 8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q3);
+ workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p0), q0);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
+ _mm_storel_epi64((__m128i *)&flat_oq0[i * 8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p2), q3);
+ workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q0), q1);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
+ _mm_storel_epi64((__m128i *)&flat_oq1[i * 8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p1), q3);
+ workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q1), q2);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
+ _mm_storel_epi64((__m128i *)&flat_oq2[i * 8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ src += 8;
+ } while (++i < 2);
+ }
+ // lp filter
+ {
+ const __m128i t4 = _mm_set1_epi8(4);
+ const __m128i t3 = _mm_set1_epi8(3);
+ const __m128i t80 = _mm_set1_epi8(0x80);
+ const __m128i te0 = _mm_set1_epi8(0xe0);
+ const __m128i t1f = _mm_set1_epi8(0x1f);
+ const __m128i t1 = _mm_set1_epi8(0x1);
+ const __m128i t7f = _mm_set1_epi8(0x7f);
+
+ const __m128i ps1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 2 * p)),
+ t80);
+ const __m128i ps0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 1 * p)),
+ t80);
+ const __m128i qs0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 0 * p)),
+ t80);
+ const __m128i qs1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 1 * p)),
+ t80);
+ __m128i filt;
+ __m128i work_a;
+ __m128i filter1, filter2;
+
+ filt = _mm_and_si128(_mm_subs_epi8(ps1, qs1), hev);
+ work_a = _mm_subs_epi8(qs0, ps0);
+ filt = _mm_adds_epi8(filt, work_a);
+ filt = _mm_adds_epi8(filt, work_a);
+ filt = _mm_adds_epi8(filt, work_a);
+ /* (vp9_filter + 3 * (qs0 - ps0)) & mask */
+ filt = _mm_and_si128(filt, mask);
+
+ filter1 = _mm_adds_epi8(filt, t4);
+ filter2 = _mm_adds_epi8(filt, t3);
+
+ /* Filter1 >> 3 */
+ work_a = _mm_cmpgt_epi8(zero, filter1);
+ filter1 = _mm_srli_epi16(filter1, 3);
+ work_a = _mm_and_si128(work_a, te0);
+ filter1 = _mm_and_si128(filter1, t1f);
+ filter1 = _mm_or_si128(filter1, work_a);
+
+ /* Filter2 >> 3 */
+ work_a = _mm_cmpgt_epi8(zero, filter2);
+ filter2 = _mm_srli_epi16(filter2, 3);
+ work_a = _mm_and_si128(work_a, te0);
+ filter2 = _mm_and_si128(filter2, t1f);
+ filter2 = _mm_or_si128(filter2, work_a);
+
+ /* filt >> 1 */
+ filt = _mm_adds_epi8(filter1, t1);
+ work_a = _mm_cmpgt_epi8(zero, filt);
+ filt = _mm_srli_epi16(filt, 1);
+ work_a = _mm_and_si128(work_a, t80);
+ filt = _mm_and_si128(filt, t7f);
+ filt = _mm_or_si128(filt, work_a);
+
+ filt = _mm_andnot_si128(hev, filt);
+
+ work_a = _mm_xor_si128(_mm_subs_epi8(qs0, filter1), t80);
+ q0 = _mm_load_si128((__m128i *)flat_oq0);
+ work_a = _mm_andnot_si128(flat, work_a);
+ q0 = _mm_and_si128(flat, q0);
+ q0 = _mm_or_si128(work_a, q0);
+
+ work_a = _mm_xor_si128(_mm_subs_epi8(qs1, filt), t80);
+ q1 = _mm_load_si128((__m128i *)flat_oq1);
+ work_a = _mm_andnot_si128(flat, work_a);
+ q1 = _mm_and_si128(flat, q1);
+ q1 = _mm_or_si128(work_a, q1);
+
+ work_a = _mm_loadu_si128((__m128i *)(s + 2 * p));
+ q2 = _mm_load_si128((__m128i *)flat_oq2);
+ work_a = _mm_andnot_si128(flat, work_a);
+ q2 = _mm_and_si128(flat, q2);
+ q2 = _mm_or_si128(work_a, q2);
+
+ work_a = _mm_xor_si128(_mm_adds_epi8(ps0, filter2), t80);
+ p0 = _mm_load_si128((__m128i *)flat_op0);
+ work_a = _mm_andnot_si128(flat, work_a);
+ p0 = _mm_and_si128(flat, p0);
+ p0 = _mm_or_si128(work_a, p0);
+
+ work_a = _mm_xor_si128(_mm_adds_epi8(ps1, filt), t80);
+ p1 = _mm_load_si128((__m128i *)flat_op1);
+ work_a = _mm_andnot_si128(flat, work_a);
+ p1 = _mm_and_si128(flat, p1);
+ p1 = _mm_or_si128(work_a, p1);
+
+ work_a = _mm_loadu_si128((__m128i *)(s - 3 * p));
+ p2 = _mm_load_si128((__m128i *)flat_op2);
+ work_a = _mm_andnot_si128(flat, work_a);
+ p2 = _mm_and_si128(flat, p2);
+ p2 = _mm_or_si128(work_a, p2);
+
+ _mm_storeu_si128((__m128i *)(s - 3 * p), p2);
+ _mm_storeu_si128((__m128i *)(s - 2 * p), p1);
+ _mm_storeu_si128((__m128i *)(s - 1 * p), p0);
+ _mm_storeu_si128((__m128i *)(s + 0 * p), q0);
+ _mm_storeu_si128((__m128i *)(s + 1 * p), q1);
+ _mm_storeu_si128((__m128i *)(s + 2 * p), q2);
+ }
+}
+
+void vp9_loop_filter_horizontal_edge_16_sse2(unsigned char *s,
+ int p,
+ const unsigned char *_blimit0,
+ const unsigned char *_limit0,
+ const unsigned char *_thresh0,
+ const unsigned char *_blimit1,
+ const unsigned char *_limit1,
+ const unsigned char *_thresh1) {
+ const __m128i blimit =
+ _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_blimit0),
+ _mm_load_si128((const __m128i *)_blimit1));
+ const __m128i limit =
+ _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_limit0),
+ _mm_load_si128((const __m128i *)_limit1));
+ const __m128i thresh =
+ _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_thresh0),
+ _mm_load_si128((const __m128i *)_thresh1));
+ const __m128i zero = _mm_set1_epi16(0);
+ __m128i p3, p2, p1, p0, q0, q1, q2, q3;
+ __m128i mask, hev, flat;
+
+ p3 = _mm_loadu_si128((__m128i *)(s - 4 * p));
+ p2 = _mm_loadu_si128((__m128i *)(s - 3 * p));
+ p1 = _mm_loadu_si128((__m128i *)(s - 2 * p));
+ p0 = _mm_loadu_si128((__m128i *)(s - 1 * p));
+ q0 = _mm_loadu_si128((__m128i *)(s - 0 * p));
+ q1 = _mm_loadu_si128((__m128i *)(s + 1 * p));
+ q2 = _mm_loadu_si128((__m128i *)(s + 2 * p));
+ q3 = _mm_loadu_si128((__m128i *)(s + 3 * p));
+
+ // filter_mask and hev_mask
+ {
+ const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu8(p1, p0),
+ _mm_subs_epu8(p0, p1));
+ const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0),
+ _mm_subs_epu8(q0, q1));
+ const __m128i fe = _mm_set1_epi8(0xfe);
+ const __m128i ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0);
+ __m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu8(p0, q0),
+ _mm_subs_epu8(q0, p0));
+ __m128i abs_p1q1 = _mm_or_si128(_mm_subs_epu8(p1, q1),
+ _mm_subs_epu8(q1, p1));
+ __m128i work;
+
+ flat = _mm_max_epu8(abs_p1p0, abs_q1q0);
+ hev = _mm_subs_epu8(flat, thresh);
+ hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff);
+
+ abs_p0q0 =_mm_adds_epu8(abs_p0q0, abs_p0q0);
+ abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1);
+ mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit);
+ mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff);
+ // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1;
+ mask = _mm_max_epu8(flat, mask);
+ // mask |= (abs(p1 - p0) > limit) * -1;
+ // mask |= (abs(q1 - q0) > limit) * -1;
+ work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p2, p1),
+ _mm_subs_epu8(p1, p2)),
+ _mm_or_si128(_mm_subs_epu8(p3, p2),
+ _mm_subs_epu8(p2, p3)));
+ mask = _mm_max_epu8(work, mask);
+ work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(q2, q1),
+ _mm_subs_epu8(q1, q2)),
+ _mm_or_si128(_mm_subs_epu8(q3, q2),
+ _mm_subs_epu8(q2, q3)));
+ mask = _mm_max_epu8(work, mask);
+ mask = _mm_subs_epu8(mask, limit);
+ mask = _mm_cmpeq_epi8(mask, zero);
+ }
+
+ // filter4
+ {
+ const __m128i t4 = _mm_set1_epi8(4);
+ const __m128i t3 = _mm_set1_epi8(3);
+ const __m128i t80 = _mm_set1_epi8(0x80);
+ const __m128i te0 = _mm_set1_epi8(0xe0);
+ const __m128i t1f = _mm_set1_epi8(0x1f);
+ const __m128i t1 = _mm_set1_epi8(0x1);
+ const __m128i t7f = _mm_set1_epi8(0x7f);
+
+ const __m128i ps1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 2 * p)),
+ t80);
+ const __m128i ps0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 1 * p)),
+ t80);
+ const __m128i qs0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 0 * p)),
+ t80);
+ const __m128i qs1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 1 * p)),
+ t80);
+ __m128i filt;
+ __m128i work_a;
+ __m128i filter1, filter2;
+
+ filt = _mm_and_si128(_mm_subs_epi8(ps1, qs1), hev);
+ work_a = _mm_subs_epi8(qs0, ps0);
+ filt = _mm_adds_epi8(filt, work_a);
+ filt = _mm_adds_epi8(filt, work_a);
+ filt = _mm_adds_epi8(filt, work_a);
+ /* (vp9_filter + 3 * (qs0 - ps0)) & mask */
+ filt = _mm_and_si128(filt, mask);
+
+ filter1 = _mm_adds_epi8(filt, t4);
+ filter2 = _mm_adds_epi8(filt, t3);
+
+ /* Filter1 >> 3 */
+ work_a = _mm_cmpgt_epi8(zero, filter1);
+ filter1 = _mm_srli_epi16(filter1, 3);
+ work_a = _mm_and_si128(work_a, te0);
+ filter1 = _mm_and_si128(filter1, t1f);
+ filter1 = _mm_or_si128(filter1, work_a);
+
+ /* Filter2 >> 3 */
+ work_a = _mm_cmpgt_epi8(zero, filter2);
+ filter2 = _mm_srli_epi16(filter2, 3);
+ work_a = _mm_and_si128(work_a, te0);
+ filter2 = _mm_and_si128(filter2, t1f);
+ filter2 = _mm_or_si128(filter2, work_a);
+
+ /* filt >> 1 */
+ filt = _mm_adds_epi8(filter1, t1);
+ work_a = _mm_cmpgt_epi8(zero, filt);
+ filt = _mm_srli_epi16(filt, 1);
+ work_a = _mm_and_si128(work_a, t80);
+ filt = _mm_and_si128(filt, t7f);
+ filt = _mm_or_si128(filt, work_a);
+
+ filt = _mm_andnot_si128(hev, filt);
+
+ q0 = _mm_xor_si128(_mm_subs_epi8(qs0, filter1), t80);
+ q1 = _mm_xor_si128(_mm_subs_epi8(qs1, filt), t80);
+ p0 = _mm_xor_si128(_mm_adds_epi8(ps0, filter2), t80);
+ p1 = _mm_xor_si128(_mm_adds_epi8(ps1, filt), t80);
+
+ _mm_storeu_si128((__m128i *)(s - 2 * p), p1);
+ _mm_storeu_si128((__m128i *)(s - 1 * p), p0);
+ _mm_storeu_si128((__m128i *)(s + 0 * p), q0);
+ _mm_storeu_si128((__m128i *)(s + 1 * p), q1);
+ }
+}
+
static INLINE void transpose8x16(unsigned char *in0, unsigned char *in1,
int in_p, unsigned char *out, int out_p) {
__m128i x0, x1, x2, x3, x4, x5, x6, x7;
diff --git a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
index 7a5cca0..dbc17ec 100644
--- a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
+++ b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
@@ -11,17 +11,6 @@
%include "vpx_ports/x86_abi_support.asm"
-;/************************************************************************************
-; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The
-; input pixel array has output_height rows. This routine assumes that output_height is an
-; even number. This function handles 8 pixels in horizontal direction, calculating ONE
-; rows each iteration to take advantage of the 128 bits operations.
-;
-; This is an implementation of some of the SSE optimizations first seen in ffvp8
-;
-;*************************************************************************************/
-
-
%macro VERTx4 1
mov rdx, arg(5) ;filter ptr
mov rsi, arg(0) ;src_ptr
@@ -81,11 +70,14 @@
pmaddubsw xmm4, k4k5
pmaddubsw xmm6, k6k7
+ movdqa xmm1, xmm2
paddsw xmm0, xmm6
- paddsw xmm0, xmm2
+ pmaxsw xmm2, xmm4
+ pminsw xmm4, xmm1
paddsw xmm0, xmm4
- paddsw xmm0, krd
+ paddsw xmm0, xmm2
+ paddsw xmm0, krd
psraw xmm0, 7
packuswb xmm0, xmm0
@@ -538,14 +530,22 @@
movdqa %2, %1
pshufb %1, [GLOBAL(shuf_t0t1)]
pshufb %2, [GLOBAL(shuf_t2t3)]
- pmaddubsw %1, xmm6
- pmaddubsw %2, xmm7
+ pmaddubsw %1, k0k1k4k5
+ pmaddubsw %2, k2k3k6k7
- paddsw %1, %2
- movdqa %2, %1
+ movdqa xmm4, %1
+ movdqa xmm5, %2
+ psrldq %1, 8
psrldq %2, 8
- paddsw %1, %2
- paddsw %1, xmm5
+ movdqa xmm6, xmm5
+
+ paddsw xmm4, %2
+ pmaxsw xmm5, %1
+ pminsw %1, xmm6
+ paddsw %1, xmm4
+ paddsw %1, xmm5
+
+ paddsw %1, krd
psraw %1, 7
packuswb %1, %1
%endm
@@ -565,6 +565,10 @@
pshufhw xmm7, xmm7, 11111111b ;k2_k3_k6_k7
pshufd xmm5, xmm5, 0 ;rounding
+ movdqa k0k1k4k5, xmm6
+ movdqa k2k3k6k7, xmm7
+ movdqa krd, xmm5
+
movsxd rax, dword ptr arg(1) ;src_pixels_per_line
movsxd rdx, dword ptr arg(3) ;output_pitch
movsxd rcx, dword ptr arg(4) ;output_height
@@ -826,8 +830,15 @@
push rdi
; end prolog
+ ALIGN_STACK 16, rax
+ sub rsp, 16 * 3
+ %define k0k1k4k5 [rsp + 16 * 0]
+ %define k2k3k6k7 [rsp + 16 * 1]
+ %define krd [rsp + 16 * 2]
+
HORIZx4 0
+ add rsp, 16 * 3
; begin epilog
pop rdi
pop rsi
@@ -932,8 +943,15 @@
push rdi
; end prolog
+ ALIGN_STACK 16, rax
+ sub rsp, 16 * 3
+ %define k0k1k4k5 [rsp + 16 * 0]
+ %define k2k3k6k7 [rsp + 16 * 1]
+ %define krd [rsp + 16 * 2]
+
HORIZx4 1
+ add rsp, 16 * 3
; begin epilog
pop rdi
pop rsi
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodeframe.c
similarity index 93%
rename from vp9/decoder/vp9_decodframe.c
rename to vp9/decoder/vp9_decodeframe.c
index 75c6384..dbcae76 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -18,7 +18,6 @@
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_entropy.h"
#include "vp9/common/vp9_entropymode.h"
-#include "vp9/common/vp9_extend.h"
#include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_quant_common.h"
@@ -28,7 +27,7 @@
#include "vp9/common/vp9_tile_common.h"
#include "vp9/decoder/vp9_dboolhuff.h"
-#include "vp9/decoder/vp9_decodframe.h"
+#include "vp9/decoder/vp9_decodeframe.h"
#include "vp9/decoder/vp9_detokenize.h"
#include "vp9/decoder/vp9_decodemv.h"
#include "vp9/decoder/vp9_dsubexp.h"
@@ -45,7 +44,6 @@
DECLARE_ALIGNED(16, int16_t, qcoeff[MAX_MB_PLANE][64 * 64]);
DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]);
DECLARE_ALIGNED(16, uint16_t, eobs[MAX_MB_PLANE][256]);
- const uint8_t *band_translate[2];
} TileWorkerData;
static int read_be32(const uint8_t *p) {
@@ -243,16 +241,13 @@
}
static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
- TX_SIZE tx_size, int x, int y) {
+ TX_SIZE tx_size, uint8_t *dst, int stride) {
struct macroblockd_plane *const pd = &xd->plane[plane];
const int eob = pd->eobs[block];
if (eob > 0) {
TX_TYPE tx_type;
const int plane_type = pd->plane_type;
- const int stride = pd->dst.stride;
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
- uint8_t *const dst = &pd->dst.buf[4 * y * stride + 4 * x];
-
switch (tx_size) {
case TX_4X4:
tx_type = get_tx_type_4x4(plane_type, xd, block);
@@ -295,7 +290,6 @@
MACROBLOCKD *xd;
vp9_reader *r;
uint8_t *token_cache;
- const uint8_t *band_translate[2];
};
static void predict_and_reconstruct_intra_block(int plane, int block,
@@ -304,9 +298,6 @@
struct intra_args *const args = arg;
VP9_COMMON *const cm = args->cm;
MACROBLOCKD *const xd = args->xd;
- const uint8_t *band_translate[2] = {
- args->band_translate[0], args->band_translate[1]
- };
struct macroblockd_plane *const pd = &xd->plane[plane];
MODE_INFO *const mi = xd->mi_8x8[0];
const MB_PREDICTION_MODE mode = (plane == 0)
@@ -327,8 +318,8 @@
if (!mi->mbmi.skip_coeff) {
vp9_decode_block_tokens(cm, xd, plane, block, plane_bsize, x, y, tx_size,
- args->r, args->token_cache, band_translate);
- inverse_transform_block(xd, plane, block, tx_size, x, y);
+ args->r, args->token_cache);
+ inverse_transform_block(xd, plane, block, tx_size, dst, pd->dst.stride);
}
}
@@ -338,7 +329,6 @@
vp9_reader *r;
int *eobtotal;
uint8_t *token_cache;
- const uint8_t *band_translate[2];
};
static void reconstruct_inter_block(int plane, int block,
@@ -347,17 +337,15 @@
struct inter_args *args = arg;
VP9_COMMON *const cm = args->cm;
MACROBLOCKD *const xd = args->xd;
- const uint8_t *band_translate[2] = {
- args->band_translate[0], args->band_translate[1]
- };
+ struct macroblockd_plane *const pd = &xd->plane[plane];
int x, y;
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y);
-
*args->eobtotal += vp9_decode_block_tokens(cm, xd, plane, block,
plane_bsize, x, y, tx_size,
- args->r, args->token_cache,
- band_translate);
- inverse_transform_block(xd, plane, block, tx_size, x, y);
+ args->r, args->token_cache);
+ inverse_transform_block(xd, plane, block, tx_size,
+ &pd->dst.buf[4 * y * pd->dst.stride + 4 * x],
+ pd->dst.stride);
}
static void set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
@@ -408,8 +396,7 @@
const TileInfo *const tile,
int mi_row, int mi_col,
vp9_reader *r, BLOCK_SIZE bsize,
- uint8_t *token_cache,
- const uint8_t *band_translate[2]) {
+ uint8_t *token_cache) {
const int less8x8 = bsize < BLOCK_8X8;
MB_MODE_INFO *mbmi;
@@ -432,7 +419,7 @@
if (!is_inter_block(mbmi)) {
struct intra_args arg = {
- cm, xd, r, token_cache, {band_translate[0], band_translate[1]}
+ cm, xd, r, token_cache
};
foreach_transformed_block(xd, bsize, predict_and_reconstruct_intra_block,
&arg);
@@ -452,8 +439,7 @@
if (!mbmi->skip_coeff) {
int eobtotal = 0;
struct inter_args arg = {
- cm, xd, r, &eobtotal, token_cache,
- {band_translate[0], band_translate[1]}
+ cm, xd, r, &eobtotal, token_cache
};
foreach_transformed_block(xd, bsize, reconstruct_inter_block, &arg);
if (!less8x8 && eobtotal == 0)
@@ -494,8 +480,7 @@
const TileInfo *const tile,
int mi_row, int mi_col,
vp9_reader* r, BLOCK_SIZE bsize,
- uint8_t *token_cache,
- const uint8_t *band_translate[2]) {
+ uint8_t *token_cache) {
const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
PARTITION_TYPE partition;
BLOCK_SIZE subsize;
@@ -506,37 +491,33 @@
partition = read_partition(cm, xd, hbs, mi_row, mi_col, bsize, r);
subsize = get_subsize(bsize, partition);
if (subsize < BLOCK_8X8) {
- decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache,
- band_translate);
+ decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache);
} else {
switch (partition) {
case PARTITION_NONE:
- decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache,
- band_translate);
+ decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache);
break;
case PARTITION_HORZ:
- decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache,
- band_translate);
+ decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache);
if (mi_row + hbs < cm->mi_rows)
decode_modes_b(cm, xd, tile, mi_row + hbs, mi_col, r, subsize,
- token_cache, band_translate);
+ token_cache);
break;
case PARTITION_VERT:
- decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache,
- band_translate);
+ decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache);
if (mi_col + hbs < cm->mi_cols)
decode_modes_b(cm, xd, tile, mi_row, mi_col + hbs, r, subsize,
- token_cache, band_translate);
+ token_cache);
break;
case PARTITION_SPLIT:
decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, subsize,
- token_cache, band_translate);
+ token_cache);
decode_modes_sb(cm, xd, tile, mi_row, mi_col + hbs, r, subsize,
- token_cache, band_translate);
+ token_cache);
decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col, r, subsize,
- token_cache, band_translate);
+ token_cache);
decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize,
- token_cache, band_translate);
+ token_cache);
break;
default:
assert(!"Invalid partition type");
@@ -721,20 +702,19 @@
VP9_COMMON *cm = &pbi->common;
if (cm->width != width || cm->height != height) {
- if (!pbi->initial_width || !pbi->initial_height) {
- if (vp9_alloc_frame_buffers(cm, width, height))
+ // Change in frame size.
+ if (cm->width == 0 || cm->height == 0) {
+ // Assign new frame buffer on first call.
+ cm->new_fb_idx = NUM_YV12_BUFFERS - 1;
+ cm->fb_idx_ref_cnt[cm->new_fb_idx] = 1;
+ }
+
+ // TODO(agrange) Don't test width/height, check overall size.
+ if (width > cm->width || height > cm->height) {
+ // Rescale frame buffers only if they're not big enough already.
+ if (vp9_resize_frame_buffers(cm, width, height))
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate frame buffers");
- pbi->initial_width = width;
- pbi->initial_height = height;
- } else {
- if (width > pbi->initial_width)
- vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
- "Frame width too large");
-
- if (height > pbi->initial_height)
- vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
- "Frame height too large");
}
cm->width = width;
@@ -820,11 +800,8 @@
vp9_zero(xd->left_seg_context);
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MI_BLOCK_SIZE) {
- const uint8_t *band_translate[2] = {
- vp9_coefband_trans_4x4, pbi->coefband_trans_8x8plus
- };
decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64,
- pbi->token_cache, band_translate);
+ pbi->token_cache);
}
if (pbi->do_loopfilter_inline) {
@@ -985,8 +962,7 @@
mi_col += MI_BLOCK_SIZE) {
decode_modes_sb(tile_data->cm, &tile_data->xd, tile,
mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64,
- tile_data->token_cache,
- tile_data->band_translate);
+ tile_data->token_cache);
}
}
return !tile_data->xd.corrupted;
@@ -1045,8 +1021,6 @@
tile_data->cm = cm;
tile_data->xd = pbi->mb;
tile_data->xd.corrupted = 0;
- tile_data->band_translate[0] = vp9_coefband_trans_4x4;
- tile_data->band_translate[1] = pbi->coefband_trans_8x8plus;
vp9_tile_init(tile, tile_data->cm, 0, tile_col);
setup_token_decoder(data, data_end, size, &cm->error,
@@ -1327,13 +1301,6 @@
const int tile_cols = 1 << cm->log2_tile_cols;
YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm);
- vpx_memset(pbi->coefband_trans_8x8plus,
- (COEF_BANDS - 1),
- sizeof(pbi->coefband_trans_8x8plus));
- vpx_memcpy(pbi->coefband_trans_8x8plus,
- vp9_coefband_trans_8x8plus,
- sizeof(vp9_coefband_trans_8x8plus));
-
if (!first_partition_size) {
// showing a frame directly
*p_data_end = data + 1;
diff --git a/vp9/decoder/vp9_decodframe.h b/vp9/decoder/vp9_decodeframe.h
similarity index 82%
rename from vp9/decoder/vp9_decodframe.h
rename to vp9/decoder/vp9_decodeframe.h
index c665f6f..7245a98 100644
--- a/vp9/decoder/vp9_decodframe.h
+++ b/vp9/decoder/vp9_decodeframe.h
@@ -9,8 +9,8 @@
*/
-#ifndef VP9_DECODER_VP9_DECODFRAME_H_
-#define VP9_DECODER_VP9_DECODFRAME_H_
+#ifndef VP9_DECODER_VP9_DECODEFRAME_H_
+#define VP9_DECODER_VP9_DECODEFRAME_H_
struct VP9Common;
struct VP9Decompressor;
@@ -18,4 +18,4 @@
void vp9_init_dequantizer(struct VP9Common *cm);
int vp9_decode_frame(struct VP9Decompressor *cpi, const uint8_t **p_data_end);
-#endif // VP9_DECODER_VP9_DECODFRAME_H_
+#endif // VP9_DECODER_VP9_DECODEFRAME_H_
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index b948429..9c1f610 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -21,7 +21,7 @@
#include "vp9/common/vp9_seg_common.h"
#include "vp9/decoder/vp9_decodemv.h"
-#include "vp9/decoder/vp9_decodframe.h"
+#include "vp9/decoder/vp9_decodeframe.h"
#include "vp9/decoder/vp9_onyxd_int.h"
#include "vp9/decoder/vp9_treereader.h"
diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c
index d5ad303..75e7e40 100644
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -23,14 +23,14 @@
#define EOB_CONTEXT_NODE 0
#define ZERO_CONTEXT_NODE 1
#define ONE_CONTEXT_NODE 2
-#define LOW_VAL_CONTEXT_NODE 3
-#define TWO_CONTEXT_NODE 4
-#define THREE_CONTEXT_NODE 5
-#define HIGH_LOW_CONTEXT_NODE 6
-#define CAT_ONE_CONTEXT_NODE 7
-#define CAT_THREEFOUR_CONTEXT_NODE 8
-#define CAT_THREE_CONTEXT_NODE 9
-#define CAT_FIVE_CONTEXT_NODE 10
+#define LOW_VAL_CONTEXT_NODE 0
+#define TWO_CONTEXT_NODE 1
+#define THREE_CONTEXT_NODE 2
+#define HIGH_LOW_CONTEXT_NODE 3
+#define CAT_ONE_CONTEXT_NODE 4
+#define CAT_THREEFOUR_CONTEXT_NODE 5
+#define CAT_THREE_CONTEXT_NODE 6
+#define CAT_FIVE_CONTEXT_NODE 7
#define CAT1_MIN_VAL 5
#define CAT2_MIN_VAL 7
@@ -67,42 +67,41 @@
TWO_TOKEN, TWO_TOKEN, TWO_TOKEN, DCT_EOB_MODEL_TOKEN
};
-#define INCREMENT_COUNT(token) \
- do { \
- if (!cm->frame_parallel_decoding_mode) { \
+#define INCREMENT_COUNT(token) \
+ do { \
+ if (!cm->frame_parallel_decoding_mode) \
++coef_counts[band][pt][token_to_counttoken[token]]; \
- } \
- } while (0);
+ } while (0)
+
#define WRITE_COEF_CONTINUE(val, token) \
{ \
- dqcoeff_ptr[scan[c]] = vp9_read_and_apply_sign(r, val) * \
- dq[c > 0] / (1 + (tx_size == TX_32X32)); \
+ v = (val * dqv) >> dq_shift; \
+ dqcoeff_ptr[scan[c]] = (vp9_read_bit(r) ? -v : v); \
INCREMENT_COUNT(token); \
token_cache[scan[c]] = vp9_pt_energy_class[token]; \
- c++; \
+ ++c; \
+ dqv = dq[1]; \
continue; \
}
+
#define ADJUST_COEF(prob, bits_count) \
do { \
val += (vp9_read(r, prob) << bits_count); \
- } while (0);
+ } while (0)
static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
vp9_reader *r, int block_idx,
PLANE_TYPE type, int seg_eob, int16_t *dqcoeff_ptr,
TX_SIZE tx_size, const int16_t *dq, int pt,
- uint8_t *token_cache,
- const uint8_t *band_translate) {
+ uint8_t *token_cache) {
const FRAME_CONTEXT *const fc = &cm->fc;
FRAME_COUNTS *const counts = &cm->counts;
const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi);
int band, c = 0;
const vp9_prob (*coef_probs)[PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES] =
fc->coef_probs[tx_size][type][ref];
- vp9_prob coef_probs_full[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
- uint8_t load_map[COEF_BANDS][PREV_COEF_CONTEXTS] = { { 0 } };
const vp9_prob *prob;
unsigned int (*coef_counts)[PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES + 1] =
counts->coef[tx_size][type][ref];
@@ -110,6 +109,11 @@
counts->eob_branch[tx_size][type][ref];
const int16_t *scan, *nb;
const uint8_t *cat6;
+ const uint8_t *band_translate = get_band_translate(tx_size);
+ const int dq_shift = (tx_size == TX_32X32);
+ int v;
+ int16_t dqv = dq[0];
+
get_scan(xd, tx_size, type, block_idx, &scan, &nb);
while (c < seg_eob) {
@@ -136,6 +140,7 @@
if (!vp9_read(r, prob[ZERO_CONTEXT_NODE])) {
INCREMENT_COUNT(ZERO_TOKEN);
token_cache[scan[c]] = vp9_pt_energy_class[ZERO_TOKEN];
+ dqv = dq[1]; \
++c;
goto SKIP_START;
}
@@ -144,13 +149,9 @@
if (!vp9_read(r, prob[ONE_CONTEXT_NODE])) {
WRITE_COEF_CONTINUE(1, ONE_TOKEN);
}
- // Load full probabilities if not already loaded
- if (!load_map[band][pt]) {
- vp9_model_to_full_probs(coef_probs[band][pt],
- coef_probs_full[band][pt]);
- load_map[band][pt] = 1;
- }
- prob = coef_probs_full[band][pt];
+
+ prob = vp9_pareto8_full[coef_probs[band][pt][PIVOT_NODE]-1];
+
// LOW_VAL_CONTEXT_NODE_0_
if (!vp9_read(r, prob[LOW_VAL_CONTEXT_NODE])) {
if (!vp9_read(r, prob[TWO_CONTEXT_NODE])) {
@@ -201,10 +202,10 @@
}
val = 0;
cat6 = cat6_prob;
- while (*cat6) {
+ while (*cat6)
val = (val << 1) | vp9_read(r, *cat6++);
- }
val += CAT6_MIN_VAL;
+
WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY6);
}
@@ -219,8 +220,7 @@
int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
int plane, int block, BLOCK_SIZE plane_bsize,
int x, int y, TX_SIZE tx_size, vp9_reader *r,
- uint8_t *token_cache,
- const uint8_t *band_translate[2]) {
+ uint8_t *token_cache) {
struct macroblockd_plane *const pd = &xd->plane[plane];
const int seg_eob = get_tx_eob(&cm->seg, xd->mi_8x8[0]->mbmi.segment_id,
tx_size);
@@ -228,8 +228,7 @@
pd->left_context + y);
const int eob = decode_coefs(cm, xd, r, block, pd->plane_type, seg_eob,
BLOCK_OFFSET(pd->dqcoeff, block), tx_size,
- pd->dequant, pt, token_cache,
- band_translate[tx_size != TX_4X4]);
+ pd->dequant, pt, token_cache);
set_contexts(xd, pd, plane_bsize, tx_size, eob > 0, x, y);
pd->eobs[block] = eob;
return eob;
diff --git a/vp9/decoder/vp9_detokenize.h b/vp9/decoder/vp9_detokenize.h
index 7522c97..e858a19 100644
--- a/vp9/decoder/vp9_detokenize.h
+++ b/vp9/decoder/vp9_detokenize.h
@@ -18,7 +18,6 @@
int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
int plane, int block, BLOCK_SIZE plane_bsize,
int x, int y, TX_SIZE tx_size, vp9_reader *r,
- uint8_t *token_cache,
- const uint8_t *band_translate[2]);
+ uint8_t *token_cache);
#endif // VP9_DECODER_VP9_DETOKENIZE_H_
diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c
index cb45d37..7c0f91d 100644
--- a/vp9/decoder/vp9_onyxd_if.c
+++ b/vp9/decoder/vp9_onyxd_if.c
@@ -25,7 +25,7 @@
#include "vpx_scale/vpx_scale.h"
#include "vp9/common/vp9_systemdependent.h"
#include "vpx_ports/vpx_timer.h"
-#include "vp9/decoder/vp9_decodframe.h"
+#include "vp9/decoder/vp9_decodeframe.h"
#include "vp9/decoder/vp9_detokenize.h"
#include "./vpx_scale_rtcd.h"
diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h
index e29b453..d3d29e9 100644
--- a/vp9/decoder/vp9_onyxd_int.h
+++ b/vp9/decoder/vp9_onyxd_int.h
@@ -55,7 +55,6 @@
PARTITION_CONTEXT *above_seg_context;
DECLARE_ALIGNED(16, uint8_t, token_cache[1024]);
- DECLARE_ALIGNED(16, uint8_t, coefband_trans_8x8plus[1024]);
} VP9D_COMP;
#endif // VP9_DECODER_VP9_ONYXD_INT_H_
diff --git a/vp9/decoder/vp9_treereader.h b/vp9/decoder/vp9_treereader.h
index f612497..41680d2 100644
--- a/vp9/decoder/vp9_treereader.h
+++ b/vp9/decoder/vp9_treereader.h
@@ -15,8 +15,6 @@
#include "vp9/common/vp9_treecoder.h"
#include "vp9/decoder/vp9_dboolhuff.h"
-#define vp9_read_and_apply_sign(r, value) (vp9_read_bit(r) ? -(value) : (value))
-
// Intent of tree data structure is to make decoding trivial.
static int treed_read(vp9_reader *const r, /* !!! must return a 0 or 1 !!! */
vp9_tree t,
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 6427f7f..4445970 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -27,16 +27,16 @@
typedef struct {
MODE_INFO mic;
uint8_t *zcoeff_blk;
- int16_t *coeff[MAX_MB_PLANE][2];
- int16_t *qcoeff[MAX_MB_PLANE][2];
- int16_t *dqcoeff[MAX_MB_PLANE][2];
- uint16_t *eobs[MAX_MB_PLANE][2];
+ int16_t *coeff[MAX_MB_PLANE][3];
+ int16_t *qcoeff[MAX_MB_PLANE][3];
+ int16_t *dqcoeff[MAX_MB_PLANE][3];
+ uint16_t *eobs[MAX_MB_PLANE][3];
// dual buffer pointers, 0: in use, 1: best in store
- int16_t *coeff_pbuf[MAX_MB_PLANE][2];
- int16_t *qcoeff_pbuf[MAX_MB_PLANE][2];
- int16_t *dqcoeff_pbuf[MAX_MB_PLANE][2];
- uint16_t *eobs_pbuf[MAX_MB_PLANE][2];
+ int16_t *coeff_pbuf[MAX_MB_PLANE][3];
+ int16_t *qcoeff_pbuf[MAX_MB_PLANE][3];
+ int16_t *dqcoeff_pbuf[MAX_MB_PLANE][3];
+ uint16_t *eobs_pbuf[MAX_MB_PLANE][3];
int is_coded;
int num_4x4_blk;
@@ -94,6 +94,7 @@
MACROBLOCKD e_mbd;
int skip_block;
int select_txfm_size;
+ int skip_recode;
int skip_optimize;
int q_index;
@@ -193,9 +194,6 @@
BLOCK_SIZE sb64_partitioning;
void (*fwd_txm4x4)(const int16_t *input, int16_t *output, int stride);
-
- // band cache
- DECLARE_ALIGNED(16, uint8_t, coefband_trans_8x8plus[1024]);
};
// TODO(jingning): the variables used here are little complicated. need further
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index c733652..2a85dee 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -20,7 +20,6 @@
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_entropy.h"
#include "vp9/common/vp9_entropymode.h"
-#include "vp9/common/vp9_extend.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_mvref_common.h"
@@ -31,9 +30,9 @@
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_tile_common.h"
#include "vp9/encoder/vp9_encodeframe.h"
-#include "vp9/encoder/vp9_encodeintra.h"
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_encodemv.h"
+#include "vp9/encoder/vp9_extend.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_segmentation.h"
@@ -377,6 +376,7 @@
const int mis = cm->mode_info_stride;
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ int max_plane;
assert(mi->mbmi.mode < MB_MODE_COUNT);
assert(mi->mbmi.ref_frame[0] < MAX_REF_FRAMES);
@@ -385,13 +385,21 @@
*mi_addr = *mi;
- for (i = 0; i < MAX_MB_PLANE; ++i) {
+ max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1;
+ for (i = 0; i < max_plane; ++i) {
p[i].coeff = ctx->coeff_pbuf[i][1];
pd[i].qcoeff = ctx->qcoeff_pbuf[i][1];
pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
pd[i].eobs = ctx->eobs_pbuf[i][1];
}
+ for (i = max_plane; i < MAX_MB_PLANE; ++i) {
+ p[i].coeff = ctx->coeff_pbuf[i][2];
+ pd[i].qcoeff = ctx->qcoeff_pbuf[i][2];
+ pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
+ pd[i].eobs = ctx->eobs_pbuf[i][2];
+ }
+
// Restore the coding context of the MB to that that was in place
// when the mode was picked for it
for (y = 0; y < mi_height; y++)
@@ -619,6 +627,7 @@
pd[i].eobs = ctx->eobs_pbuf[i][0];
}
ctx->is_coded = 0;
+ x->skip_recode = 0;
// Set to zero to make sure we do not use the previous encoded frame stats
xd->mi_8x8[0]->mbmi.skip_coeff = 0;
@@ -2406,6 +2415,7 @@
const int mis = cm->mode_info_stride;
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8;
x->skip_optimize = ctx->is_coded;
ctx->is_coded = 1;
x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c
deleted file mode 100644
index 32b4593..0000000
--- a/vp9/encoder/vp9_encodeintra.c
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "./vpx_config.h"
-#include "./vp9_rtcd.h"
-#include "vp9/encoder/vp9_quantize.h"
-#include "vp9/common/vp9_reconintra.h"
-#include "vp9/encoder/vp9_encodemb.h"
-#include "vp9/encoder/vp9_encodeintra.h"
-
-int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred) {
- MB_MODE_INFO * mbmi = &x->e_mbd.mi_8x8[0]->mbmi;
- x->skip_encode = 0;
- mbmi->mode = DC_PRED;
- mbmi->ref_frame[0] = INTRA_FRAME;
- mbmi->tx_size = use_16x16_pred ? (mbmi->sb_type >= BLOCK_16X16 ? TX_16X16
- : TX_8X8)
- : TX_4X4;
- vp9_encode_intra_block_y(x, mbmi->sb_type);
- return vp9_get_mb_ss(x->plane[0].src_diff);
-}
diff --git a/vp9/encoder/vp9_encodeintra.h b/vp9/encoder/vp9_encodeintra.h
deleted file mode 100644
index e217924..0000000
--- a/vp9/encoder/vp9_encodeintra.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VP9_ENCODER_VP9_ENCODEINTRA_H_
-#define VP9_ENCODER_VP9_ENCODEINTRA_H_
-
-#include "vp9/encoder/vp9_onyx_int.h"
-
-int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred);
-void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
- TX_SIZE tx_size, void *arg);
-
-#endif // VP9_ENCODER_VP9_ENCODEINTRA_H_
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 7afed26..0e1523b 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -25,6 +25,26 @@
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_tokenize.h"
+void vp9_setup_interp_filters(MACROBLOCKD *xd,
+ INTERPOLATION_TYPE mcomp_filter_type,
+ VP9_COMMON *cm) {
+ if (xd->mi_8x8 && xd->mi_8x8[0]) {
+ MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
+
+ set_scale_factors(xd, mbmi->ref_frame[0] - LAST_FRAME,
+ mbmi->ref_frame[1] - LAST_FRAME,
+ cm->active_ref_scale);
+ } else {
+ set_scale_factors(xd, -1, -1, cm->active_ref_scale);
+ }
+
+ xd->subpix.filter_x = xd->subpix.filter_y =
+ vp9_get_filter_kernel(mcomp_filter_type == SWITCHABLE ?
+ EIGHTTAP : mcomp_filter_type);
+
+ assert(((intptr_t)xd->subpix.filter_x & 0xff) == 0);
+}
+
void vp9_subtract_block_c(int rows, int cols,
int16_t *diff_ptr, ptrdiff_t diff_stride,
const uint8_t *src_ptr, ptrdiff_t src_stride,
@@ -136,16 +156,13 @@
const int16_t *scan, *nb;
const int mul = 1 + (tx_size == TX_32X32);
uint8_t token_cache[1024];
- const int ib = txfrm_block_to_raster_block(plane_bsize, tx_size, block);
const int16_t *dequant_ptr = pd->dequant;
- const uint8_t *const band_translate = (tx_size == TX_4X4 ?
- vp9_coefband_trans_4x4 :
- mb->coefband_trans_8x8plus);
+ const uint8_t *const band_translate = get_band_translate(tx_size);
assert((!type && !plane) || (type && plane));
dqcoeff_ptr = BLOCK_OFFSET(pd->dqcoeff, block);
qcoeff_ptr = BLOCK_OFFSET(pd->qcoeff, block);
- get_scan(xd, tx_size, type, ib, &scan, &nb);
+ get_scan(xd, tx_size, type, block, &scan, &nb);
assert(eob <= default_eob);
/* Now set up a Viterbi trellis to evaluate alternative roundings. */
@@ -422,35 +439,29 @@
MACROBLOCKD *const xd = &x->e_mbd;
struct optimize_ctx *const ctx = args->ctx;
struct macroblockd_plane *const pd = &xd->plane[plane];
- const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size,
- block);
-
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
- uint8_t *const dst = raster_block_offset_uint8(plane_bsize, raster_block,
- pd->dst.buf, pd->dst.stride);
+ int i, j;
+ uint8_t *dst;
+ txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
+ dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
// TODO(jingning): per transformed block zero forcing only enabled for
// luma component. will integrate chroma components as well.
if (x->zcoeff_blk[tx_size][block] && plane == 0) {
- int i, k;
pd->eobs[block] = 0;
- txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &k);
ctx->ta[plane][i] = 0;
- ctx->tl[plane][k] = 0;
+ ctx->tl[plane][j] = 0;
return;
}
- if (x->select_txfm_size || xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8)
+ if (!x->skip_recode)
vp9_xform_quant(plane, block, plane_bsize, tx_size, arg);
- if (x->optimize && (x->select_txfm_size ||
- xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8|| !x->skip_optimize)) {
+ if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
vp9_optimize_b(plane, block, plane_bsize, tx_size, x, ctx);
} else {
- int i, k;
- txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &k);
ctx->ta[plane][i] = pd->eobs[block] > 0;
- ctx->tl[plane][k] = pd->eobs[block] > 0;
+ ctx->tl[plane][j] = pd->eobs[block] > 0;
}
if (x->skip_encode || pd->eobs[block] == 0)
@@ -483,12 +494,11 @@
MACROBLOCK *const x = args->x;
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblockd_plane *const pd = &xd->plane[plane];
- const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size,
- block);
-
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
- uint8_t *const dst = raster_block_offset_uint8(plane_bsize, raster_block,
- pd->dst.buf, pd->dst.stride);
+ int i, j;
+ uint8_t *dst;
+ txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
+ dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
vp9_xform_quant(plane, block, plane_bsize, tx_size, arg);
@@ -515,10 +525,10 @@
struct optimize_ctx ctx;
struct encode_b_args arg = {x, &ctx};
- if (x->select_txfm_size || xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8)
+ if (!x->skip_recode)
vp9_subtract_sb(x, bsize);
- if (x->optimize) {
+ if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
int i;
for (i = 0; i < MAX_MB_PLANE; ++i)
optimize_init_b(i, bsize, &arg);
@@ -563,19 +573,22 @@
xoff = 32 * (block & twmask);
yoff = 32 * (block >> twl);
dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
- src = p->src.buf + yoff * p->src.stride + xoff;
- src_diff = p->src_diff + 4 * bw * yoff + xoff;
vp9_predict_intra_block(xd, block, bwl, TX_32X32, mode,
dst, pd->dst.stride, dst, pd->dst.stride);
- vp9_subtract_block(32, 32, src_diff, bw * 4,
- src, p->src.stride, dst, pd->dst.stride);
- if (x->use_lp32x32fdct)
- vp9_fdct32x32_rd(src_diff, coeff, bw * 4);
- else
- vp9_fdct32x32(src_diff, coeff, bw * 4);
- vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
- p->quant, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, p->zbin_extra, eob, scan, iscan);
+
+ if (!x->skip_recode) {
+ src = p->src.buf + yoff * p->src.stride + xoff;
+ src_diff = p->src_diff + 4 * bw * yoff + xoff;
+ vp9_subtract_block(32, 32, src_diff, bw * 4,
+ src, p->src.stride, dst, pd->dst.stride);
+ if (x->use_lp32x32fdct)
+ vp9_fdct32x32_rd(src_diff, coeff, bw * 4);
+ else
+ vp9_fdct32x32(src_diff, coeff, bw * 4);
+ vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
+ p->quant, p->quant_shift, qcoeff, dqcoeff,
+ pd->dequant, p->zbin_extra, eob, scan, iscan);
+ }
if (!x->skip_encode && *eob)
vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, *eob);
break;
@@ -588,16 +601,18 @@
xoff = 16 * (block & twmask);
yoff = 16 * (block >> twl);
dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
- src = p->src.buf + yoff * p->src.stride + xoff;
- src_diff = p->src_diff + 4 * bw * yoff + xoff;
vp9_predict_intra_block(xd, block, bwl, TX_16X16, mode,
dst, pd->dst.stride, dst, pd->dst.stride);
- vp9_subtract_block(16, 16, src_diff, bw * 4,
- src, p->src.stride, dst, pd->dst.stride);
- vp9_fht16x16(tx_type, src_diff, coeff, bw * 4);
- vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
- p->quant, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, p->zbin_extra, eob, scan, iscan);
+ if (!x->skip_recode) {
+ src = p->src.buf + yoff * p->src.stride + xoff;
+ src_diff = p->src_diff + 4 * bw * yoff + xoff;
+ vp9_subtract_block(16, 16, src_diff, bw * 4,
+ src, p->src.stride, dst, pd->dst.stride);
+ vp9_fht16x16(tx_type, src_diff, coeff, bw * 4);
+ vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
+ p->quant, p->quant_shift, qcoeff, dqcoeff,
+ pd->dequant, p->zbin_extra, eob, scan, iscan);
+ }
if (!x->skip_encode && *eob)
vp9_iht16x16_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob);
break;
@@ -610,16 +625,18 @@
xoff = 8 * (block & twmask);
yoff = 8 * (block >> twl);
dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
- src = p->src.buf + yoff * p->src.stride + xoff;
- src_diff = p->src_diff + 4 * bw * yoff + xoff;
vp9_predict_intra_block(xd, block, bwl, TX_8X8, mode,
dst, pd->dst.stride, dst, pd->dst.stride);
- vp9_subtract_block(8, 8, src_diff, bw * 4,
- src, p->src.stride, dst, pd->dst.stride);
- vp9_fht8x8(tx_type, src_diff, coeff, bw * 4);
- vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
- p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, p->zbin_extra, eob, scan, iscan);
+ if (!x->skip_recode) {
+ src = p->src.buf + yoff * p->src.stride + xoff;
+ src_diff = p->src_diff + 4 * bw * yoff + xoff;
+ vp9_subtract_block(8, 8, src_diff, bw * 4,
+ src, p->src.stride, dst, pd->dst.stride);
+ vp9_fht8x8(tx_type, src_diff, coeff, bw * 4);
+ vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
+ p->quant_shift, qcoeff, dqcoeff,
+ pd->dequant, p->zbin_extra, eob, scan, iscan);
+ }
if (!x->skip_encode && *eob)
vp9_iht8x8_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob);
break;
@@ -635,19 +652,23 @@
xoff = 4 * (block & twmask);
yoff = 4 * (block >> twl);
dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
- src = p->src.buf + yoff * p->src.stride + xoff;
- src_diff = p->src_diff + 4 * bw * yoff + xoff;
vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,
dst, pd->dst.stride, dst, pd->dst.stride);
- vp9_subtract_block(4, 4, src_diff, bw * 4,
- src, p->src.stride, dst, pd->dst.stride);
- if (tx_type != DCT_DCT)
- vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type);
- else
- x->fwd_txm4x4(src_diff, coeff, bw * 4);
- vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
- p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, p->zbin_extra, eob, scan, iscan);
+
+ if (!x->skip_recode) {
+ src = p->src.buf + yoff * p->src.stride + xoff;
+ src_diff = p->src_diff + 4 * bw * yoff + xoff;
+ vp9_subtract_block(4, 4, src_diff, bw * 4,
+ src, p->src.stride, dst, pd->dst.stride);
+ if (tx_type != DCT_DCT)
+ vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type);
+ else
+ x->fwd_txm4x4(src_diff, coeff, bw * 4);
+ vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
+ p->quant_shift, qcoeff, dqcoeff,
+ pd->dequant, p->zbin_extra, eob, scan, iscan);
+ }
+
if (!x->skip_encode && *eob) {
if (tx_type == DCT_DCT)
// this is like vp9_short_idct4x4 but has a special case around eob<=1
@@ -678,3 +699,14 @@
foreach_transformed_block_uv(xd, bsize, vp9_encode_block_intra, &arg);
}
+int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred) {
+ MB_MODE_INFO * mbmi = &x->e_mbd.mi_8x8[0]->mbmi;
+ x->skip_encode = 0;
+ mbmi->mode = DC_PRED;
+ mbmi->ref_frame[0] = INTRA_FRAME;
+ mbmi->tx_size = use_16x16_pred ? (mbmi->sb_type >= BLOCK_16X16 ? TX_16X16
+ : TX_8X8)
+ : TX_4X4;
+ vp9_encode_intra_block_y(x, mbmi->sb_type);
+ return vp9_get_mb_ss(x->plane[0].src_diff);
+}
diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h
index 61dd735..7be6621 100644
--- a/vp9/encoder/vp9_encodemb.h
+++ b/vp9/encoder/vp9_encodemb.h
@@ -47,8 +47,14 @@
void vp9_subtract_sbuv(MACROBLOCK *x, BLOCK_SIZE bsize);
void vp9_subtract_sb(MACROBLOCK *x, BLOCK_SIZE bsize);
+void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
+ TX_SIZE tx_size, void *arg);
+
void vp9_encode_intra_block_y(MACROBLOCK *x, BLOCK_SIZE bsize);
void vp9_encode_intra_block_uv(MACROBLOCK *x, BLOCK_SIZE bsize);
-
+int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred);
+void vp9_setup_interp_filters(MACROBLOCKD *xd,
+ INTERPOLATION_TYPE mcomp_filter_type,
+ VP9_COMMON *cm);
#endif // VP9_ENCODER_VP9_ENCODEMB_H_
diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c
index 030ca64..7e838c9 100644
--- a/vp9/encoder/vp9_encodemv.c
+++ b/vp9/encoder/vp9_encodemv.c
@@ -126,20 +126,15 @@
static int update_mv(vp9_writer *w, const unsigned int ct[2], vp9_prob *cur_p,
vp9_prob upd_p) {
- const vp9_prob new_p = get_binary_prob(ct[0], ct[1]);
- vp9_prob mod_p = new_p | 1;
- const int cur_b = cost_branch256(ct, *cur_p);
- const int mod_b = cost_branch256(ct, mod_p);
- const int cost = 7 * 256 + (vp9_cost_one(upd_p) - vp9_cost_zero(upd_p));
- if (cur_b - mod_b > cost) {
- *cur_p = mod_p;
- vp9_write(w, 1, upd_p);
- vp9_write_literal(w, mod_p >> 1, 7);
- return 1;
- } else {
- vp9_write(w, 0, upd_p);
- return 0;
+ const vp9_prob new_p = get_binary_prob(ct[0], ct[1]) | 1;
+ const int update = cost_branch256(ct, *cur_p) + vp9_cost_zero(upd_p) >
+ cost_branch256(ct, new_p) + vp9_cost_one(upd_p) + 7 * 256;
+ vp9_write(w, update, upd_p);
+ if (update) {
+ *cur_p = new_p;
+ vp9_write_literal(w, new_p >> 1, 7);
}
+ return update;
}
static void counts_to_nmv_context(
diff --git a/vp9/common/vp9_extend.c b/vp9/encoder/vp9_extend.c
similarity index 98%
rename from vp9/common/vp9_extend.c
rename to vp9/encoder/vp9_extend.c
index 07c68c8..dcbb5ac 100644
--- a/vp9/common/vp9_extend.c
+++ b/vp9/encoder/vp9_extend.c
@@ -11,7 +11,7 @@
#include "vpx_mem/vpx_mem.h"
#include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_extend.h"
+#include "vp9/encoder/vp9_extend.h"
static void copy_and_extend_plane(const uint8_t *src, int src_pitch,
uint8_t *dst, int dst_pitch,
@@ -62,7 +62,7 @@
const int et_y = 16;
const int el_y = 16;
// Motion estimation may use src block variance with the block size up
- // to 64x64, so the right and bottom need to be extended to 64 mulitple
+ // to 64x64, so the right and bottom need to be extended to 64 multiple
// or up to 16, whichever is greater.
const int eb_y = MAX(ALIGN_POWER_OF_TWO(src->y_width, 6) - src->y_width,
16);
diff --git a/vp9/common/vp9_extend.h b/vp9/encoder/vp9_extend.h
similarity index 100%
rename from vp9/common/vp9_extend.h
rename to vp9/encoder/vp9_extend.h
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 4d6da51..377cffb 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -11,17 +11,16 @@
#include <math.h>
#include <limits.h>
#include <stdio.h>
+#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_block.h"
-#include "vp9/encoder/vp9_onyx_int.h"
-#include "vp9/encoder/vp9_variance.h"
-#include "vp9/encoder/vp9_encodeintra.h"
-#include "vp9/encoder/vp9_mcomp.h"
-#include "vp9/encoder/vp9_firstpass.h"
-#include "vpx_scale/vpx_scale.h"
#include "vp9/encoder/vp9_encodeframe.h"
#include "vp9/encoder/vp9_encodemb.h"
-#include "vp9/common/vp9_extend.h"
-#include "vp9/common/vp9_systemdependent.h"
+#include "vp9/encoder/vp9_extend.h"
+#include "vp9/encoder/vp9_firstpass.h"
+#include "vp9/encoder/vp9_mcomp.h"
+#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_variance.h"
+#include "vpx_scale/vpx_scale.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_scale/yv12config.h"
#include "vp9/encoder/vp9_quantize.h"
@@ -407,8 +406,6 @@
// for first pass test
while ((quart_frm << sr) < MAX_FULL_PEL_VAL)
sr++;
- if (sr)
- sr--;
step_param += sr;
further_steps -= sr;
@@ -535,6 +532,7 @@
pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
pd[i].eobs = ctx->eobs_pbuf[i][1];
}
+ x->skip_recode = 0;
// Initialise the MV cost table to the defaults
diff --git a/vp9/encoder/vp9_lookahead.c b/vp9/encoder/vp9_lookahead.c
index c28c868..277bd7d 100644
--- a/vp9/encoder/vp9_lookahead.c
+++ b/vp9/encoder/vp9_lookahead.c
@@ -12,8 +12,8 @@
#include "./vpx_config.h"
#include "vp9/common/vp9_common.h"
+#include "vp9/encoder/vp9_extend.h"
#include "vp9/encoder/vp9_lookahead.h"
-#include "vp9/common/vp9_extend.h"
struct lookahead_ctx {
unsigned int max_sz; /* Absolute size of the queue */
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index 7b605b2..9870738 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -11,7 +11,6 @@
#include <limits.h>
#include "vpx_mem/vpx_mem.h"
-#include "vp9/encoder/vp9_encodeintra.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_segmentation.h"
#include "vp9/encoder/vp9_mcomp.h"
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index a52f5b1..a383164 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -51,9 +51,6 @@
while ((size << sr) < MAX_FULL_PEL_VAL)
sr++;
- if (sr)
- sr--;
-
sr += cpi->sf.reduce_first_step_size;
sr = MIN(sr, (cpi->sf.max_step_search_steps - 2));
return sr;
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index bcab679..10c2e4f 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -18,8 +18,9 @@
// The maximum number of steps in a step search given the largest
// allowed initial step
#define MAX_MVSEARCH_STEPS 11
-// Max full pel mv specified in 1 pel units
-#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS)) - 1)
+// Max full pel mv specified in the unit of full pixel
+// Enable the use of motion vector in range [-1023, 1023].
+#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS - 1)) - 1)
// Maximum size of the first step in full pel units
#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1))
// Allowed motion vector pixel distance outside image border
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index a383378..b7874d5 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -1222,13 +1222,6 @@
cpi->fixed_divide[0] = 0;
for (i = 1; i < 512; i++)
cpi->fixed_divide[i] = 0x80000 / i;
-
- vpx_memset(cpi->mb.coefband_trans_8x8plus,
- (COEF_BANDS-1),
- sizeof(cpi->mb.coefband_trans_8x8plus));
- vpx_memcpy(cpi->mb.coefband_trans_8x8plus,
- vp9_coefband_trans_8x8plus,
- sizeof(vp9_coefband_trans_8x8plus));
}
@@ -1450,7 +1443,7 @@
CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
vpx_calloc(num_4x4_blk, sizeof(uint8_t)));
for (i = 0; i < MAX_MB_PLANE; ++i) {
- for (k = 0; k < 2; ++k) {
+ for (k = 0; k < 3; ++k) {
CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
vpx_memalign(16, num_pix * sizeof(int16_t)));
CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k],
@@ -1472,7 +1465,7 @@
vpx_free(ctx->zcoeff_blk);
ctx->zcoeff_blk = 0;
for (i = 0; i < MAX_MB_PLANE; ++i) {
- for (k = 0; k < 2; ++k) {
+ for (k = 0; k < 3; ++k) {
vpx_free(ctx->coeff[i][k]);
ctx->coeff[i][k] = 0;
vpx_free(ctx->qcoeff[i][k]);
@@ -3441,7 +3434,9 @@
// Post encode loop adjustment of Q prediction.
if (!active_worst_qchanged)
- vp9_update_rate_correction_factors(cpi, (cpi->sf.recode_loop) ? 2 : 0);
+ vp9_update_rate_correction_factors(cpi, (cpi->sf.recode_loop ||
+ cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0);
+
cpi->last_q[cm->frame_type] = cm->base_qindex;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 695a2e2..5d37f83 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -18,7 +18,6 @@
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
-#include "vp9/encoder/vp9_encodeintra.h"
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
@@ -246,7 +245,8 @@
vp9_set_speed_features(cpi);
- cpi->mb.select_txfm_size = cpi->sf.tx_size_search_method == USE_LARGESTALL ?
+ cpi->mb.select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
+ cm->frame_type != KEY_FRAME) ?
0 : 1;
set_block_thresholds(cpi);
@@ -612,8 +612,9 @@
// TODO(jingning): temporarily enabled only for luma component
rd = MIN(rd1, rd2);
- if (!xd->lossless && plane == 0)
- x->zcoeff_blk[tx_size][block] = rd1 > rd2 || !xd->plane[plane].eobs[block];
+ if (plane == 0)
+ x->zcoeff_blk[tx_size][block] = !xd->plane[plane].eobs[block] ||
+ (rd1 > rd2 && !xd->lossless);
args->this_rate += args->rate;
args->this_dist += args->dist;
@@ -1026,10 +1027,10 @@
struct macroblockd_plane *pd = &xd->plane[0];
const int src_stride = p->src.stride;
const int dst_stride = pd->dst.stride;
- uint8_t *src_init = raster_block_offset_uint8(BLOCK_8X8, ib,
- p->src.buf, src_stride);
- uint8_t *dst_init = raster_block_offset_uint8(BLOCK_8X8, ib,
- pd->dst.buf, dst_stride);
+ const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib,
+ src_stride)];
+ uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib,
+ dst_stride)];
int16_t *src_diff, *coeff;
ENTROPY_CONTEXT ta[2], tempa[2];
@@ -1071,7 +1072,7 @@
int64_t ssz;
const int16_t *scan;
const int16_t *nb;
- uint8_t *src = src_init + idx * 4 + idy * 4 * src_stride;
+ const uint8_t *src = src_init + idx * 4 + idy * 4 * src_stride;
uint8_t *dst = dst_init + idx * 4 + idy * 4 * dst_stride;
const int block = ib + idy * 2 + idx;
TX_TYPE tx_type;
@@ -1329,6 +1330,7 @@
}
static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
+ PICK_MODE_CONTEXT *ctx,
int *rate, int *rate_tokenonly,
int64_t *distortion, int *skippable,
BLOCK_SIZE bsize) {
@@ -1364,6 +1366,27 @@
*rate_tokenonly = this_rate_tokenonly;
*distortion = this_distortion;
*skippable = s;
+ if (!x->select_txfm_size) {
+ int i;
+ struct macroblock_plane *const p = x->plane;
+ struct macroblockd_plane *const pd = x->e_mbd.plane;
+ for (i = 1; i < MAX_MB_PLANE; ++i) {
+ p[i].coeff = ctx->coeff_pbuf[i][2];
+ pd[i].qcoeff = ctx->qcoeff_pbuf[i][2];
+ pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
+ pd[i].eobs = ctx->eobs_pbuf[i][2];
+
+ ctx->coeff_pbuf[i][2] = ctx->coeff_pbuf[i][0];
+ ctx->qcoeff_pbuf[i][2] = ctx->qcoeff_pbuf[i][0];
+ ctx->dqcoeff_pbuf[i][2] = ctx->dqcoeff_pbuf[i][0];
+ ctx->eobs_pbuf[i][2] = ctx->eobs_pbuf[i][0];
+
+ ctx->coeff_pbuf[i][0] = p[i].coeff;
+ ctx->qcoeff_pbuf[i][0] = pd[i].qcoeff;
+ ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
+ ctx->eobs_pbuf[i][0] = pd[i].eobs;
+ }
+ }
}
}
@@ -1389,8 +1412,9 @@
return this_rd;
}
-static void choose_intra_uv_mode(VP9_COMP *cpi, BLOCK_SIZE bsize,
- int *rate_uv, int *rate_uv_tokenonly,
+static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
+ BLOCK_SIZE bsize, int *rate_uv,
+ int *rate_uv_tokenonly,
int64_t *dist_uv, int *skip_uv,
MB_PREDICTION_MODE *mode_uv) {
MACROBLOCK *const x = &cpi->mb;
@@ -1403,7 +1427,7 @@
// Else do a proper rd search for each possible transform size that may
// be considered in the main rd loop.
} else {
- rd_pick_intra_sbuv_mode(cpi, x,
+ rd_pick_intra_sbuv_mode(cpi, x, ctx,
rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
}
@@ -1530,16 +1554,16 @@
const int height = plane_block_height(bsize, pd);
int idx, idy;
- uint8_t *const src = raster_block_offset_uint8(BLOCK_8X8, i,
- p->src.buf, p->src.stride);
- uint8_t *const dst = raster_block_offset_uint8(BLOCK_8X8, i,
- pd->dst.buf, pd->dst.stride);
+ const uint8_t *const src = &p->src.buf[raster_block_offset(BLOCK_8X8, i,
+ p->src.stride)];
+ uint8_t *const dst = &pd->dst.buf[raster_block_offset(BLOCK_8X8, i,
+ pd->dst.stride)];
int64_t thisdistortion = 0, thissse = 0;
int thisrate = 0, ref;
const int is_compound = has_second_ref(&mi->mbmi);
for (ref = 0; ref < 1 + is_compound; ++ref) {
- const uint8_t *pre = raster_block_offset_uint8(BLOCK_8X8, i,
- pd->pre[ref].buf, pd->pre[ref].stride);
+ const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i,
+ pd->pre[ref].stride)];
vp9_build_inter_predictor(pre, pd->pre[ref].stride,
dst, pd->dst.stride,
&mi->bmi[i].as_mv[ref].as_mv,
@@ -1627,14 +1651,13 @@
struct macroblock_plane *const p = &x->plane[0];
struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
- p->src.buf = raster_block_offset_uint8(BLOCK_8X8, i, p->src.buf,
- p->src.stride);
+ p->src.buf = &p->src.buf[raster_block_offset(BLOCK_8X8, i, p->src.stride)];
assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
- pd->pre[0].buf = raster_block_offset_uint8(BLOCK_8X8, i, pd->pre[0].buf,
- pd->pre[0].stride);
+ pd->pre[0].buf = &pd->pre[0].buf[raster_block_offset(BLOCK_8X8, i,
+ pd->pre[0].stride)];
if (has_second_ref(mbmi))
- pd->pre[1].buf = raster_block_offset_uint8(BLOCK_8X8, i, pd->pre[1].buf,
- pd->pre[1].stride);
+ pd->pre[1].buf = &pd->pre[1].buf[raster_block_offset(BLOCK_8X8, i,
+ pd->pre[1].stride)];
}
static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
@@ -2613,6 +2636,16 @@
vpx_free(second_pred);
}
+static INLINE void restore_dst_buf(MACROBLOCKD *xd,
+ uint8_t *orig_dst[MAX_MB_PLANE],
+ int orig_dst_stride[MAX_MB_PLANE]) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = orig_dst[i];
+ xd->plane[i].dst.stride = orig_dst_stride[i];
+ }
+}
+
static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
const TileInfo *const tile,
BLOCK_SIZE bsize,
@@ -2764,6 +2797,7 @@
if (is_comp_pred)
intpel_mv &= (mbmi->mv[1].as_mv.row & 15) == 0 &&
(mbmi->mv[1].as_mv.col & 15) == 0;
+
// Search for best switchable filter by checking the variance of
// pred error irrespective of whether the filter will be used
if (cm->mcomp_filter_type != BILINEAR) {
@@ -2803,10 +2837,7 @@
(cm->mcomp_filter_type != SWITCHABLE &&
(cm->mcomp_filter_type == mbmi->interp_filter ||
(i == 0 && intpel_mv)))) {
- for (j = 0; j < MAX_MB_PLANE; j++) {
- xd->plane[j].dst.buf = orig_dst[j];
- xd->plane[j].dst.stride = orig_dst_stride[j];
- }
+ restore_dst_buf(xd, orig_dst, orig_dst_stride);
} else {
for (j = 0; j < MAX_MB_PLANE; j++) {
xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
@@ -2830,10 +2861,7 @@
}
if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
if (rd / 2 > ref_best_rd) {
- for (i = 0; i < MAX_MB_PLANE; i++) {
- xd->plane[i].dst.buf = orig_dst[i];
- xd->plane[i].dst.stride = orig_dst_stride[i];
- }
+ restore_dst_buf(xd, orig_dst, orig_dst_stride);
return INT64_MAX;
}
}
@@ -2852,11 +2880,7 @@
pred_exists = 1;
}
}
-
- for (i = 0; i < MAX_MB_PLANE; i++) {
- xd->plane[i].dst.buf = orig_dst[i];
- xd->plane[i].dst.stride = orig_dst_stride[i];
- }
+ restore_dst_buf(xd, orig_dst, orig_dst_stride);
}
}
// Set the appropriate filter
@@ -2888,10 +2912,7 @@
// if current pred_error modeled rd is substantially more than the best
// so far, do not bother doing full rd
if (rd / 2 > ref_best_rd) {
- for (i = 0; i < MAX_MB_PLANE; i++) {
- xd->plane[i].dst.buf = orig_dst[i];
- xd->plane[i].dst.stride = orig_dst_stride[i];
- }
+ restore_dst_buf(xd, orig_dst, orig_dst_stride);
return INT64_MAX;
}
}
@@ -2994,10 +3015,7 @@
if (*rate_y == INT_MAX) {
*rate2 = INT_MAX;
*distortion = INT64_MAX;
- for (i = 0; i < MAX_MB_PLANE; i++) {
- xd->plane[i].dst.buf = orig_dst[i];
- xd->plane[i].dst.stride = orig_dst_stride[i];
- }
+ restore_dst_buf(xd, orig_dst, orig_dst_stride);
return INT64_MAX;
}
@@ -3012,10 +3030,7 @@
if (*rate_uv == INT_MAX) {
*rate2 = INT_MAX;
*distortion = INT64_MAX;
- for (i = 0; i < MAX_MB_PLANE; i++) {
- xd->plane[i].dst.buf = orig_dst[i];
- xd->plane[i].dst.stride = orig_dst_stride[i];
- }
+ restore_dst_buf(xd, orig_dst, orig_dst_stride);
return INT64_MAX;
}
@@ -3025,20 +3040,17 @@
*skippable = skippable_y && skippable_uv;
}
- for (i = 0; i < MAX_MB_PLANE; i++) {
- xd->plane[i].dst.buf = orig_dst[i];
- xd->plane[i].dst.stride = orig_dst_stride[i];
- }
-
+ restore_dst_buf(xd, orig_dst, orig_dst_stride);
return this_rd; // if 0, this will be re-calculated by caller
}
-static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
- int i;
+static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
+ int max_plane) {
struct macroblock_plane *const p = x->plane;
struct macroblockd_plane *const pd = x->e_mbd.plane;
+ int i;
- for (i = 0; i < MAX_MB_PLANE; ++i) {
+ for (i = 0; i < max_plane; ++i) {
p[i].coeff = ctx->coeff_pbuf[i][1];
pd[i].qcoeff = ctx->qcoeff_pbuf[i][1];
pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
@@ -3075,7 +3087,7 @@
*returnrate = INT_MAX;
return;
}
- rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
+ rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
&dist_uv, &uv_skip, bsize);
} else {
y_skip = 0;
@@ -3084,7 +3096,7 @@
*returnrate = INT_MAX;
return;
}
- rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
+ rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
&dist_uv, &uv_skip, BLOCK_8X8);
}
@@ -3450,7 +3462,7 @@
uv_tx = MIN(mbmi->tx_size, max_uv_txsize_lookup[bsize]);
if (rate_uv_intra[uv_tx] == INT_MAX) {
- choose_intra_uv_mode(cpi, bsize, &rate_uv_intra[uv_tx],
+ choose_intra_uv_mode(cpi, ctx, bsize, &rate_uv_intra[uv_tx],
&rate_uv_tokenonly[uv_tx],
&dist_uv[uv_tx], &skip_uv[uv_tx],
&mode_uv[uv_tx]);
@@ -3584,6 +3596,7 @@
// Did this mode help.. i.e. is it the new best mode
if (this_rd < best_rd || x->skip) {
+ int max_plane = MAX_MB_PLANE;
if (!mode_excluded) {
// Note index of best mode so far
best_mode_index = mode_index;
@@ -3591,6 +3604,7 @@
if (ref_frame == INTRA_FRAME) {
/* required for left and above block mv */
mbmi->mv[0].as_int = 0;
+ max_plane = 1;
}
*returnrate = rate2;
@@ -3599,7 +3613,7 @@
best_mbmode = *mbmi;
best_skip2 = this_skip2;
if (!x->select_txfm_size)
- swap_block_ptr(x, ctx);
+ swap_block_ptr(x, ctx, max_plane);
vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
sizeof(uint8_t) * ctx->num_4x4_blk);
@@ -3706,7 +3720,7 @@
// Do Intra UV best rd mode selection if best mode choice above was intra.
if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) {
TX_SIZE uv_tx_size = get_uv_tx_size(mbmi);
- rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[uv_tx_size],
+ rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
&rate_uv_tokenonly[uv_tx_size],
&dist_uv[uv_tx_size],
&skip_uv[uv_tx_size],
@@ -4075,7 +4089,7 @@
distortion2 += distortion_y;
if (rate_uv_intra[TX_4X4] == INT_MAX) {
- choose_intra_uv_mode(cpi, bsize, &rate_uv_intra[TX_4X4],
+ choose_intra_uv_mode(cpi, ctx, bsize, &rate_uv_intra[TX_4X4],
&rate_uv_tokenonly[TX_4X4],
&dist_uv[TX_4X4], &skip_uv[TX_4X4],
&mode_uv[TX_4X4]);
@@ -4329,12 +4343,14 @@
// Did this mode help.. i.e. is it the new best mode
if (this_rd < best_rd || x->skip) {
if (!mode_excluded) {
+ int max_plane = MAX_MB_PLANE;
// Note index of best mode so far
best_mode_index = mode_index;
if (ref_frame == INTRA_FRAME) {
/* required for left and above block mv */
mbmi->mv[0].as_int = 0;
+ max_plane = 1;
}
*returnrate = rate2;
@@ -4345,7 +4361,7 @@
best_mbmode = *mbmi;
best_skip2 = this_skip2;
if (!x->select_txfm_size)
- swap_block_ptr(x, ctx);
+ swap_block_ptr(x, ctx, max_plane);
vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
sizeof(uint8_t) * ctx->num_4x4_blk);
@@ -4452,7 +4468,7 @@
// Do Intra UV best rd mode selection if best mode choice above was intra.
if (vp9_ref_order[best_mode_index].ref_frame == INTRA_FRAME) {
TX_SIZE uv_tx_size = get_uv_tx_size(mbmi);
- rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[uv_tx_size],
+ rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
&rate_uv_tokenonly[uv_tx_size],
&dist_uv[uv_tx_size],
&skip_uv[uv_tx_size],
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h
index 92fb235..f0e8849 100644
--- a/vp9/encoder/vp9_rdopt.h
+++ b/vp9/encoder/vp9_rdopt.h
@@ -8,10 +8,11 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-
#ifndef VP9_ENCODER_VP9_RDOPT_H_
#define VP9_ENCODER_VP9_RDOPT_H_
+#include "vp9/encoder/vp9_onyx_int.h"
+
#define RDDIV_BITS 7
#define RDCOST(RM, DM, R, D) \
diff --git a/vp9/encoder/vp9_sad_c.c b/vp9/encoder/vp9_sad_c.c
index 42ddb21..55d595b 100644
--- a/vp9/encoder/vp9_sad_c.c
+++ b/vp9/encoder/vp9_sad_c.c
@@ -10,11 +10,11 @@
#include <stdlib.h>
-#include "vp9/common/vp9_sadmxn.h"
-#include "vp9/encoder/vp9_variance.h"
-#include "./vpx_config.h"
-#include "vpx/vpx_integer.h"
#include "./vp9_rtcd.h"
+#include "./vpx_config.h"
+#include "vp9/encoder/vp9_sadmxn.h"
+#include "vp9/encoder/vp9_variance.h"
+#include "vpx/vpx_integer.h"
#define sad_mxn_func(m, n) \
unsigned int vp9_sad##m##x##n##_c(const uint8_t *src_ptr, \
diff --git a/vp9/common/vp9_sadmxn.h b/vp9/encoder/vp9_sadmxn.h
similarity index 100%
rename from vp9/common/vp9_sadmxn.h
rename to vp9/encoder/vp9_sadmxn.h
diff --git a/vp9/encoder/vp9_subexp.c b/vp9/encoder/vp9_subexp.c
index 387fc90..f31e568 100644
--- a/vp9/encoder/vp9_subexp.c
+++ b/vp9/encoder/vp9_subexp.c
@@ -14,7 +14,6 @@
#include "vp9/encoder/vp9_boolhuff.h"
#include "vp9/encoder/vp9_treewriter.h"
-#define vp9_cost_upd ((int)(vp9_cost_one(upd) - vp9_cost_zero(upd)) >> 8)
#define vp9_cost_upd256 ((int)(vp9_cost_one(upd) - vp9_cost_zero(upd)))
static int update_bits[255];
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 2cace03..3bffb12 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -11,22 +11,22 @@
#include <math.h>
#include <limits.h>
-#include "vp9/common/vp9_onyxc_int.h"
-#include "vp9/common/vp9_reconinter.h"
-#include "vp9/encoder/vp9_onyx_int.h"
-#include "vp9/common/vp9_systemdependent.h"
-#include "vp9/encoder/vp9_quantize.h"
#include "vp9/common/vp9_alloccommon.h"
-#include "vp9/encoder/vp9_mcomp.h"
-#include "vp9/encoder/vp9_firstpass.h"
-#include "vp9/encoder/vp9_psnr.h"
-#include "vpx_scale/vpx_scale.h"
-#include "vp9/common/vp9_extend.h"
-#include "vp9/encoder/vp9_ratectrl.h"
+#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_quant_common.h"
+#include "vp9/common/vp9_reconinter.h"
+#include "vp9/common/vp9_systemdependent.h"
+#include "vp9/encoder/vp9_extend.h"
+#include "vp9/encoder/vp9_firstpass.h"
+#include "vp9/encoder/vp9_mcomp.h"
+#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_psnr.h"
+#include "vp9/encoder/vp9_quantize.h"
+#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/encoder/vp9_segmentation.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/vpx_timer.h"
+#include "vpx_scale/vpx_scale.h"
#define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering
#define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index 3d21ea8..c7336d0 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -115,9 +115,7 @@
vp9_coeff_count *const counts = cpi->coef_counts[tx_size];
vp9_coeff_probs_model *const coef_probs = cpi->common.fc.coef_probs[tx_size];
const int ref = is_inter_block(mbmi);
- const uint8_t *const band_translate = (tx_size == TX_4X4 ?
- vp9_coefband_trans_4x4 :
- cpi->mb.coefband_trans_8x8plus);
+ const uint8_t *const band_translate = get_band_translate(tx_size);
const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size);
int aoff, loff;
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &aoff, &loff);
diff --git a/vp9/encoder/vp9_treewriter.h b/vp9/encoder/vp9_treewriter.h
index eeda5cd..41d1bfb 100644
--- a/vp9/encoder/vp9_treewriter.h
+++ b/vp9/encoder/vp9_treewriter.h
@@ -19,31 +19,20 @@
#include "vp9/encoder/vp9_boolhuff.h" /* for now */
-
#define vp9_write_prob(w, v) vp9_write_literal((w), (v), 8)
-/* Approximate length of an encoded bool in 256ths of a bit at given prob */
+#define vp9_cost_zero(prob) (vp9_prob_cost[prob])
-#define vp9_cost_zero(x) (vp9_prob_cost[x])
-#define vp9_cost_one(x) vp9_cost_zero(vp9_complement(x))
+#define vp9_cost_one(prob) vp9_cost_zero(vp9_complement(prob))
-#define vp9_cost_bit(x, b) vp9_cost_zero((b) ? vp9_complement(x) : (x))
+#define vp9_cost_bit(prob, bit) vp9_cost_zero((bit) ? vp9_complement(prob) \
+ : (prob))
-/* VP8BC version is scaled by 2^20 rather than 2^8; see bool_coder.h */
-
-
-/* Both of these return bits, not scaled bits. */
static INLINE unsigned int cost_branch256(const unsigned int ct[2],
vp9_prob p) {
return ct[0] * vp9_cost_zero(p) + ct[1] * vp9_cost_one(p);
}
-static INLINE unsigned int cost_branch(const unsigned int ct[2],
- vp9_prob p) {
- return cost_branch256(ct, p) >> 8;
-}
-
-
static INLINE void treed_write(vp9_writer *w,
vp9_tree tree, const vp9_prob *probs,
int bits, int len) {
diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c
index dc11501..fefca66 100644
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@@ -206,12 +206,12 @@
const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
__m128i u[8], v[8];
__m128i in7 = _mm_add_epi16(in[0], in[1]);
- in7 = _mm_sub_epi16(in7, in[3]);
u[0] = _mm_unpacklo_epi16(in[0], in[1]);
u[1] = _mm_unpacklo_epi16(in[2], in[3]);
u[2] = _mm_unpacklo_epi16(in7, kZero);
u[3] = _mm_unpacklo_epi16(in[2], kZero);
+ u[4] = _mm_unpacklo_epi16(in[3], kZero);
v[0] = _mm_madd_epi16(u[0], k__sinpi_p01_p02); // s0 + s2
v[1] = _mm_madd_epi16(u[1], k__sinpi_p03_p04); // s4 + s5
@@ -219,9 +219,10 @@
v[3] = _mm_madd_epi16(u[0], k__sinpi_p04_m01); // s1 - s3
v[4] = _mm_madd_epi16(u[1], k__sinpi_m03_p02); // -s4 + s6
v[5] = _mm_madd_epi16(u[3], k__sinpi_p03_p03); // s4
+ v[6] = _mm_madd_epi16(u[4], k__sinpi_p03_p03);
u[0] = _mm_add_epi32(v[0], v[1]);
- u[1] = v[2];
+ u[1] = _mm_sub_epi32(v[2], v[6]);
u[2] = _mm_add_epi32(v[3], v[4]);
u[3] = _mm_sub_epi32(u[2], u[0]);
u[4] = _mm_slli_epi32(v[5], 2);
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index 2dd2bf0..c566765 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -21,7 +21,6 @@
VP9_COMMON_SRCS-yes += common/vp9_entropy.c
VP9_COMMON_SRCS-yes += common/vp9_entropymode.c
VP9_COMMON_SRCS-yes += common/vp9_entropymv.c
-VP9_COMMON_SRCS-yes += common/vp9_extend.c
VP9_COMMON_SRCS-yes += common/vp9_filter.c
VP9_COMMON_SRCS-yes += common/vp9_filter.h
VP9_COMMON_SRCS-yes += common/vp9_findnearmv.c
@@ -34,7 +33,6 @@
VP9_COMMON_SRCS-yes += common/vp9_entropymode.h
VP9_COMMON_SRCS-yes += common/vp9_entropymv.h
VP9_COMMON_SRCS-yes += common/vp9_enums.h
-VP9_COMMON_SRCS-yes += common/vp9_extend.h
VP9_COMMON_SRCS-yes += common/vp9_findnearmv.h
VP9_COMMON_SRCS-yes += common/vp9_idct.h
VP9_COMMON_SRCS-yes += common/vp9_loopfilter.h
@@ -47,7 +45,6 @@
VP9_COMMON_SRCS-yes += common/vp9_reconintra.h
VP9_COMMON_SRCS-yes += common/vp9_rtcd.c
VP9_COMMON_SRCS-yes += common/vp9_rtcd_defs.sh
-VP9_COMMON_SRCS-yes += common/vp9_sadmxn.h
VP9_COMMON_SRCS-yes += common/vp9_scale.h
VP9_COMMON_SRCS-yes += common/vp9_scale.c
VP9_COMMON_SRCS-yes += common/vp9_seg_common.h
@@ -123,6 +120,7 @@
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve_neon.c
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct16x16_neon.c
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_16_neon.c
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_avg_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_neon$(ASM)
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 0993c6c..bd13518 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -23,17 +23,17 @@
VP9_CX_SRCS-yes += encoder/vp9_dct.h
VP9_CX_SRCS-yes += encoder/vp9_encodeframe.c
VP9_CX_SRCS-yes += encoder/vp9_encodeframe.h
-VP9_CX_SRCS-yes += encoder/vp9_encodeintra.c
VP9_CX_SRCS-yes += encoder/vp9_encodemb.c
VP9_CX_SRCS-yes += encoder/vp9_encodemv.c
+VP9_CX_SRCS-yes += encoder/vp9_extend.c
VP9_CX_SRCS-yes += encoder/vp9_firstpass.c
VP9_CX_SRCS-yes += encoder/vp9_block.h
VP9_CX_SRCS-yes += encoder/vp9_boolhuff.h
VP9_CX_SRCS-yes += encoder/vp9_write_bit_buffer.h
VP9_CX_SRCS-yes += encoder/vp9_bitstream.h
-VP9_CX_SRCS-yes += encoder/vp9_encodeintra.h
VP9_CX_SRCS-yes += encoder/vp9_encodemb.h
VP9_CX_SRCS-yes += encoder/vp9_encodemv.h
+VP9_CX_SRCS-yes += encoder/vp9_extend.h
VP9_CX_SRCS-yes += encoder/vp9_firstpass.h
VP9_CX_SRCS-yes += encoder/vp9_lookahead.c
VP9_CX_SRCS-yes += encoder/vp9_lookahead.h
@@ -44,6 +44,7 @@
VP9_CX_SRCS-yes += encoder/vp9_quantize.h
VP9_CX_SRCS-yes += encoder/vp9_ratectrl.h
VP9_CX_SRCS-yes += encoder/vp9_rdopt.h
+VP9_CX_SRCS-yes += encoder/vp9_sadmxn.h
VP9_CX_SRCS-yes += encoder/vp9_tokenize.h
VP9_CX_SRCS-yes += encoder/vp9_treewriter.h
VP9_CX_SRCS-yes += encoder/vp9_variance.h
diff --git a/vp9/vp9dx.mk b/vp9/vp9dx.mk
index 3a27cdd..7e76682 100644
--- a/vp9/vp9dx.mk
+++ b/vp9/vp9dx.mk
@@ -19,8 +19,8 @@
VP9_DX_SRCS-yes += decoder/vp9_dboolhuff.c
VP9_DX_SRCS-yes += decoder/vp9_decodemv.c
-VP9_DX_SRCS-yes += decoder/vp9_decodframe.c
-VP9_DX_SRCS-yes += decoder/vp9_decodframe.h
+VP9_DX_SRCS-yes += decoder/vp9_decodeframe.c
+VP9_DX_SRCS-yes += decoder/vp9_decodeframe.h
VP9_DX_SRCS-yes += decoder/vp9_detokenize.c
VP9_DX_SRCS-yes += decoder/vp9_dboolhuff.h
VP9_DX_SRCS-yes += decoder/vp9_read_bit_buffer.h
diff --git a/vp9_spatial_scalable_encoder.c b/vp9_spatial_scalable_encoder.c
index 9acfa29..9aaec82 100644
--- a/vp9_spatial_scalable_encoder.c
+++ b/vp9_spatial_scalable_encoder.c
@@ -19,12 +19,12 @@
#include <string.h>
#include <time.h>
#include "./args.h"
+#include "./ivfenc.h"
+#include "./tools_common.h"
#include "vpx/svc_context.h"
#include "vpx/vp8cx.h"
#include "vpx/vpx_encoder.h"
-#define VP90_FOURCC 0x30395056
-
static const struct arg_enum_list encoding_mode_enum[] = {
{"i", INTER_LAYER_PREDICTION_I},
{"alt-ip", ALT_INTER_LAYER_PREDICTION_IP},
@@ -77,25 +77,13 @@
static const int default_use_dummy_frame = 1;
typedef struct {
- char *input_filename;
char *output_filename;
uint32_t frames_to_code;
uint32_t frames_to_skip;
+ struct VpxInputContext input_ctx;
} AppInput;
-static void mem_put_le16(char *mem, uint32_t val) {
- mem[0] = val;
- mem[1] = val >> 8;
-}
-
-static void mem_put_le32(char *mem, uint32_t val) {
- mem[0] = val;
- mem[1] = val >> 8;
- mem[2] = val >> 16;
- mem[3] = val >> 24;
-}
-
-static void usage(const char *exec_name) {
+void usage_exit(const char *exec_name) {
fprintf(stderr, "Usage: %s <options> input_filename output_filename\n",
exec_name);
fprintf(stderr, "Options:\n");
@@ -103,15 +91,6 @@
exit(EXIT_FAILURE);
}
-void die(const char *fmt, ...) {
- va_list ap;
-
- va_start(ap, fmt);
- vfprintf(stderr, fmt, ap);
- if (fmt[strlen(fmt) - 1] != '\n') printf("\n");
- exit(EXIT_FAILURE);
-}
-
static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
const char *detail = vpx_codec_error_detail(ctx);
@@ -120,83 +99,12 @@
exit(EXIT_FAILURE);
}
-static int read_frame(FILE *f, vpx_image_t *img) {
- size_t nbytes;
- int res = 1;
- int plane;
-
- for (plane = 0; plane < 3; ++plane) {
- uint8_t *ptr;
- const int w = (plane ? (1 + img->d_w) / 2 : img->d_w);
- const int h = (plane ? (1 + img->d_h) / 2 : img->d_h);
- int r;
-
- switch (plane) {
- case 1:
- ptr = img->planes[VPX_PLANE_U];
- break;
- case 2:
- ptr = img->planes[VPX_PLANE_V];
- break;
- default:
- ptr = img->planes[plane];
- }
- for (r = 0; r < h; ++r) {
- const int to_read = w;
-
- nbytes = fread(ptr, 1, to_read, f);
- if (nbytes != to_read) {
- res = 0;
- if (nbytes > 0)
- printf("Warning: Read partial frame. Check your width & height!\n");
- break;
- }
- ptr += img->stride[plane];
- }
- if (!res) break;
- }
- return res;
-}
-
static int create_dummy_frame(vpx_image_t *img) {
const size_t buf_size = img->w * img->h * 3 / 2;
memset(img->planes[0], 129, buf_size);
return 1;
}
-static void write_ivf_file_header(FILE *outfile,
- uint32_t width, uint32_t height,
- int timebase_num, int timebase_den,
- int frame_cnt) {
- char header[32];
-
- header[0] = 'D';
- header[1] = 'K';
- header[2] = 'I';
- header[3] = 'F';
- mem_put_le16(header + 4, 0); /* version */
- mem_put_le16(header + 6, 32); /* headersize */
- mem_put_le32(header + 8, VP90_FOURCC); /* fourcc */
- mem_put_le16(header + 12, width); /* width */
- mem_put_le16(header + 14, height); /* height */
- mem_put_le32(header + 16, timebase_den); /* rate */
- mem_put_le32(header + 20, timebase_num); /* scale */
- mem_put_le32(header + 24, frame_cnt); /* length */
- mem_put_le32(header + 28, 0); /* unused */
-
- (void)fwrite(header, 1, 32, outfile);
-}
-
-static void write_ivf_frame_header(FILE *outfile, vpx_codec_pts_t pts,
- size_t sz) {
- char header[12];
- mem_put_le32(header, (uint32_t)sz);
- mem_put_le32(header + 4, pts & 0xFFFFFFFF);
- mem_put_le32(header + 8, pts >> 32);
-
- (void)fwrite(header, 1, 12, outfile);
-}
-
static void parse_command_line(int argc, const char **argv_,
AppInput *app_input, SvcContext *svc_ctx,
vpx_codec_enc_cfg_t *enc_cfg) {
@@ -272,9 +180,9 @@
die("Error: Unrecognized option %s\n", *argi);
if (argv[0] == NULL || argv[1] == 0) {
- usage(argv_[0]);
+ usage_exit(argv_[0]);
}
- app_input->input_filename = argv[0];
+ app_input->input_ctx.filename = argv[0];
app_input->output_filename = argv[1];
free(argv);
@@ -298,7 +206,7 @@
int main(int argc, const char **argv) {
AppInput app_input = {0};
- FILE *infile, *outfile;
+ FILE *outfile;
vpx_codec_ctx_t codec;
vpx_codec_enc_cfg_t enc_cfg;
SvcContext svc_ctx;
@@ -308,6 +216,8 @@
vpx_codec_err_t res;
int pts = 0; /* PTS starts at 0 */
int frame_duration = 1; /* 1 timebase tick per frame */
+ vpx_codec_cx_pkt_t packet = {0};
+ packet.kind = VPX_CODEC_CX_FRAME_PKT;
memset(&svc_ctx, 0, sizeof(svc_ctx));
svc_ctx.log_print = 1;
@@ -317,8 +227,8 @@
if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, enc_cfg.g_w, enc_cfg.g_h, 32))
die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h);
- if (!(infile = fopen(app_input.input_filename, "rb")))
- die("Failed to open %s for reading\n", app_input.input_filename);
+ if (!(app_input.input_ctx.file = fopen(app_input.input_ctx.filename, "rb")))
+ die("Failed to open %s for reading\n", app_input.input_ctx.filename);
if (!(outfile = fopen(app_input.output_filename, "wb")))
die("Failed to open %s for writing\n", app_input.output_filename);
@@ -328,12 +238,11 @@
VPX_CODEC_OK)
die("Failed to initialize encoder\n");
- write_ivf_file_header(outfile, enc_cfg.g_w, enc_cfg.g_h,
- enc_cfg.g_timebase.num, enc_cfg.g_timebase.den, 0);
+ ivf_write_file_header(outfile, &enc_cfg, VP9_FOURCC, 0);
// skip initial frames
for (i = 0; i < app_input.frames_to_skip; ++i) {
- read_frame(infile, &raw);
+ read_yuv_frame(&app_input.input_ctx, &raw);
}
// Encode frames
@@ -341,7 +250,7 @@
if (frame_cnt == 0 && svc_ctx.first_frame_full_size) {
create_dummy_frame(&raw);
} else {
- if (!read_frame(infile, &raw)) break;
+ if (!read_yuv_frame(&app_input.input_ctx, &raw)) break;
}
res = vpx_svc_encode(&svc_ctx, &codec, &raw, pts, frame_duration,
VPX_DL_REALTIME);
@@ -350,7 +259,9 @@
die_codec(&codec, "Failed to encode frame");
}
if (vpx_svc_get_frame_size(&svc_ctx) > 0) {
- write_ivf_frame_header(outfile, pts, vpx_svc_get_frame_size(&svc_ctx));
+ packet.data.frame.pts = pts;
+ packet.data.frame.sz = vpx_svc_get_frame_size(&svc_ctx);
+ ivf_write_frame_header(outfile, &packet);
(void)fwrite(vpx_svc_get_buffer(&svc_ctx), 1,
vpx_svc_get_frame_size(&svc_ctx), outfile);
}
@@ -360,14 +271,12 @@
printf("Processed %d frames\n", frame_cnt - svc_ctx.first_frame_full_size);
- fclose(infile);
+ fclose(app_input.input_ctx.file);
if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
// rewrite the output file headers with the actual frame count
if (!fseek(outfile, 0, SEEK_SET)) {
- write_ivf_file_header(outfile, enc_cfg.g_w, enc_cfg.g_h,
- enc_cfg.g_timebase.num, enc_cfg.g_timebase.den,
- frame_cnt);
+ ivf_write_file_header(outfile, &enc_cfg, VP9_FOURCC, frame_cnt);
}
fclose(outfile);
vpx_img_free(&raw);
diff --git a/vpx_scale/generic/yv12config.c b/vpx_scale/generic/yv12config.c
index a89e29d..7c3f7ec 100644
--- a/vpx_scale/generic/yv12config.c
+++ b/vpx_scale/generic/yv12config.c
@@ -148,7 +148,10 @@
#else
const int frame_size = yplane_size + 2 * uvplane_size;
#endif
- if (!ybf->buffer_alloc) {
+ if (frame_size > ybf->buffer_alloc_sz) {
+ // Allocation to hold larger frame, or first allocation.
+ if (ybf->buffer_alloc)
+ vpx_free(ybf->buffer_alloc);
ybf->buffer_alloc = vpx_memalign(32, frame_size);
ybf->buffer_alloc_sz = frame_size;
}
diff --git a/vpxdec.c b/vpxdec.c
index 110e4ac..dc2eec8 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -8,10 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-
-/* This is a simple program that reads ivf files and decodes them
- * using the new interface. Decoded frames are output as YV12 raw.
- */
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
@@ -19,27 +15,34 @@
#include <string.h>
#include <limits.h>
+#include "third_party/libyuv/include/libyuv/scale.h"
+
+#include "./args.h"
+#include "./ivfdec.h"
+
#define VPX_CODEC_DISABLE_COMPAT 1
-#include "vpx_config.h"
+#include "./vpx_config.h"
#include "vpx/vpx_decoder.h"
#include "vpx_ports/vpx_timer.h"
+
#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
#include "vpx/vp8dx.h"
#endif
+
#if CONFIG_MD5
-#include "md5_utils.h"
+#include "./md5_utils.h"
#endif
-#include "tools_common.h"
-#include "nestegg/include/nestegg/nestegg.h"
-#include "third_party/libyuv/include/libyuv/scale.h"
+
+#include "./tools_common.h"
+#include "./webmdec.h"
static const char *exec_name;
static const struct {
char const *name;
const vpx_codec_iface_t *(*iface)(void);
- unsigned int fourcc;
- unsigned int fourcc_mask;
+ uint32_t fourcc;
+ uint32_t fourcc_mask;
} ifaces[] = {
#if CONFIG_VP8_DECODER
{"vp8", vpx_codec_vp8_dx, VP8_FOURCC_MASK, 0x00FFFFFF},
@@ -49,7 +52,11 @@
#endif
};
-#include "args.h"
+struct VpxDecInputContext {
+ struct VpxInputContext *vpx_input_ctx;
+ struct WebmInputContext *webm_ctx;
+};
+
static const arg_def_t looparg = ARG_DEF(NULL, "loops", 1,
"Number of times to decode the file");
static const arg_def_t codecarg = ARG_DEF(NULL, "codec", 1,
@@ -161,123 +168,61 @@
exit(EXIT_FAILURE);
}
-static unsigned int mem_get_le16(const void *vmem) {
- unsigned int val;
- const unsigned char *mem = (const unsigned char *)vmem;
+static int read_frame(struct VpxDecInputContext *input,
+ uint8_t **buf,
+ size_t *bytes_in_buffer,
+ size_t *buffer_size) {
+ char raw_hdr[RAW_FRAME_HDR_SZ];
+ size_t bytes_to_read = 0;
+ FILE *infile = input->vpx_input_ctx->file;
+ enum VideoFileType kind = input->vpx_input_ctx->file_type;
+ if (kind == FILE_TYPE_WEBM) {
+ return webm_read_frame(input->webm_ctx,
+ buf, bytes_in_buffer, buffer_size);
+ } else if (kind == FILE_TYPE_RAW) {
+ if (fread(raw_hdr, RAW_FRAME_HDR_SZ, 1, infile) != 1) {
+ if (!feof(infile))
+ warn("Failed to read RAW frame size\n");
+ } else {
+ const int kCorruptFrameThreshold = 256 * 1024 * 1024;
+ const int kFrameTooSmallThreshold = 256 * 1024;
+ bytes_to_read = mem_get_le32(raw_hdr);
- val = mem[1] << 8;
- val |= mem[0];
- return val;
-}
+ if (bytes_to_read > kCorruptFrameThreshold) {
+ warn("Read invalid frame size (%u)\n", (unsigned int)bytes_to_read);
+ bytes_to_read = 0;
+ }
-static unsigned int mem_get_le32(const void *vmem) {
- unsigned int val;
- const unsigned char *mem = (const unsigned char *)vmem;
+ if (kind == FILE_TYPE_RAW && bytes_to_read < kFrameTooSmallThreshold) {
+ warn("Warning: Read invalid frame size (%u) - not a raw file?\n",
+ (unsigned int)bytes_to_read);
+ }
- val = mem[3] << 24;
- val |= mem[2] << 16;
- val |= mem[1] << 8;
- val |= mem[0];
- return val;
-}
+ if (bytes_to_read > *buffer_size) {
+ uint8_t *new_buf = realloc(*buf, 2 * bytes_to_read);
-enum file_kind {
- RAW_FILE,
- IVF_FILE,
- WEBM_FILE
-};
-
-struct input_ctx {
- enum file_kind kind;
- FILE *infile;
- nestegg *nestegg_ctx;
- nestegg_packet *pkt;
- unsigned int chunk;
- unsigned int chunks;
- unsigned int video_track;
-};
-
-#define IVF_FRAME_HDR_SZ (sizeof(uint32_t) + sizeof(uint64_t))
-#define RAW_FRAME_HDR_SZ (sizeof(uint32_t))
-static int read_frame(struct input_ctx *input,
- uint8_t **buf,
- size_t *buf_sz,
- size_t *buf_alloc_sz) {
- char raw_hdr[IVF_FRAME_HDR_SZ];
- size_t new_buf_sz;
- FILE *infile = input->infile;
- enum file_kind kind = input->kind;
- if (kind == WEBM_FILE) {
- if (input->chunk >= input->chunks) {
- unsigned int track;
-
- do {
- /* End of this packet, get another. */
- if (input->pkt)
- nestegg_free_packet(input->pkt);
-
- if (nestegg_read_packet(input->nestegg_ctx, &input->pkt) <= 0
- || nestegg_packet_track(input->pkt, &track))
- return 1;
-
- } while (track != input->video_track);
-
- if (nestegg_packet_count(input->pkt, &input->chunks))
- return 1;
- input->chunk = 0;
- }
-
- if (nestegg_packet_data(input->pkt, input->chunk, buf, buf_sz))
- return 1;
- input->chunk++;
-
- return 0;
- }
- /* For both the raw and ivf formats, the frame size is the first 4 bytes
- * of the frame header. We just need to special case on the header
- * size.
- */
- else if (fread(raw_hdr, kind == IVF_FILE
- ? IVF_FRAME_HDR_SZ : RAW_FRAME_HDR_SZ, 1, infile) != 1) {
- if (!feof(infile))
- fprintf(stderr, "Failed to read frame size\n");
-
- new_buf_sz = 0;
- } else {
- new_buf_sz = mem_get_le32(raw_hdr);
-
- if (new_buf_sz > 256 * 1024 * 1024) {
- fprintf(stderr, "Error: Read invalid frame size (%u)\n",
- (unsigned int)new_buf_sz);
- new_buf_sz = 0;
- }
-
- if (kind == RAW_FILE && new_buf_sz > 256 * 1024)
- fprintf(stderr, "Warning: Read invalid frame size (%u)"
- " - not a raw file?\n", (unsigned int)new_buf_sz);
-
- if (new_buf_sz > *buf_alloc_sz) {
- uint8_t *new_buf = realloc(*buf, 2 * new_buf_sz);
-
- if (new_buf) {
- *buf = new_buf;
- *buf_alloc_sz = 2 * new_buf_sz;
- } else {
- fprintf(stderr, "Failed to allocate compressed data buffer\n");
- new_buf_sz = 0;
+ if (new_buf) {
+ *buf = new_buf;
+ *buffer_size = 2 * bytes_to_read;
+ } else {
+ warn("Failed to allocate compressed data buffer\n");
+ bytes_to_read = 0;
+ }
}
}
- }
- *buf_sz = new_buf_sz;
-
- if (!feof(infile)) {
- if (fread(*buf, 1, *buf_sz, infile) != *buf_sz) {
- fprintf(stderr, "Failed to read full frame\n");
- return 1;
+ if (!feof(infile)) {
+ if (fread(*buf, 1, bytes_to_read, infile) != bytes_to_read) {
+ warn("Failed to read full frame\n");
+ return 1;
+ }
+ *bytes_in_buffer = bytes_to_read;
}
return 0;
+ } else if (kind == FILE_TYPE_IVF) {
+ return ivf_read_frame(input->vpx_input_ctx,
+ buf, bytes_in_buffer, buffer_size);
}
return 1;
@@ -297,8 +242,7 @@
: set_binary_mode(stdout);
if (!outfile) {
- fprintf(stderr, "Failed to output file");
- exit(EXIT_FAILURE);
+ fatal("Failed to output file");
}
}
@@ -334,254 +278,42 @@
}
}
-unsigned int file_is_ivf(FILE *infile,
- unsigned int *fourcc,
- unsigned int *width,
- unsigned int *height,
- unsigned int *fps_den,
- unsigned int *fps_num) {
- char raw_hdr[32];
- int is_ivf = 0;
-
- if (fread(raw_hdr, 1, 32, infile) == 32) {
- if (raw_hdr[0] == 'D' && raw_hdr[1] == 'K'
- && raw_hdr[2] == 'I' && raw_hdr[3] == 'F') {
- is_ivf = 1;
-
- if (mem_get_le16(raw_hdr + 4) != 0)
- fprintf(stderr, "Error: Unrecognized IVF version! This file may not"
- " decode properly.");
-
- *fourcc = mem_get_le32(raw_hdr + 8);
- *width = mem_get_le16(raw_hdr + 12);
- *height = mem_get_le16(raw_hdr + 14);
- *fps_num = mem_get_le32(raw_hdr + 16);
- *fps_den = mem_get_le32(raw_hdr + 20);
-
- /* Some versions of vpxenc used 1/(2*fps) for the timebase, so
- * we can guess the framerate using only the timebase in this
- * case. Other files would require reading ahead to guess the
- * timebase, like we do for webm.
- */
- if (*fps_num < 1000) {
- /* Correct for the factor of 2 applied to the timebase in the
- * encoder.
- */
- if (*fps_num & 1)*fps_den <<= 1;
- else *fps_num >>= 1;
- } else {
- /* Don't know FPS for sure, and don't have readahead code
- * (yet?), so just default to 30fps.
- */
- *fps_num = 30;
- *fps_den = 1;
- }
- }
- }
-
- if (!is_ivf)
- rewind(infile);
-
- return is_ivf;
-}
-
-
-unsigned int file_is_raw(FILE *infile,
- unsigned int *fourcc,
- unsigned int *width,
- unsigned int *height,
- unsigned int *fps_den,
- unsigned int *fps_num) {
- unsigned char buf[32];
+int file_is_raw(struct VpxInputContext *input) {
+ uint8_t buf[32];
int is_raw = 0;
vpx_codec_stream_info_t si;
si.sz = sizeof(si);
- if (fread(buf, 1, 32, infile) == 32) {
+ if (fread(buf, 1, 32, input->file) == 32) {
int i;
- if (mem_get_le32(buf) < 256 * 1024 * 1024)
- for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
+ if (mem_get_le32(buf) < 256 * 1024 * 1024) {
+ for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++) {
if (!vpx_codec_peek_stream_info(ifaces[i].iface(),
buf + 4, 32 - 4, &si)) {
is_raw = 1;
- *fourcc = ifaces[i].fourcc;
- *width = si.w;
- *height = si.h;
- *fps_num = 30;
- *fps_den = 1;
+ input->fourcc = ifaces[i].fourcc;
+ input->width = si.w;
+ input->height = si.h;
+ input->framerate.numerator = 30;
+ input->framerate.denominator = 1;
break;
}
+ }
+ }
}
- rewind(infile);
+ rewind(input->file);
return is_raw;
}
-
-static int
-nestegg_read_cb(void *buffer, size_t length, void *userdata) {
- FILE *f = userdata;
-
- if (fread(buffer, 1, length, f) < length) {
- if (ferror(f))
- return -1;
- if (feof(f))
- return 0;
- }
- return 1;
-}
-
-
-static int
-nestegg_seek_cb(int64_t offset, int whence, void *userdata) {
- switch (whence) {
- case NESTEGG_SEEK_SET:
- whence = SEEK_SET;
- break;
- case NESTEGG_SEEK_CUR:
- whence = SEEK_CUR;
- break;
- case NESTEGG_SEEK_END:
- whence = SEEK_END;
- break;
- };
- return fseek(userdata, (long)offset, whence) ? -1 : 0;
-}
-
-
-static int64_t
-nestegg_tell_cb(void *userdata) {
- return ftell(userdata);
-}
-
-
-static void
-nestegg_log_cb(nestegg *context, unsigned int severity, char const *format,
- ...) {
- va_list ap;
-
- va_start(ap, format);
- vfprintf(stderr, format, ap);
- fprintf(stderr, "\n");
- va_end(ap);
-}
-
-
-static int
-webm_guess_framerate(struct input_ctx *input,
- unsigned int *fps_den,
- unsigned int *fps_num) {
- unsigned int i;
- uint64_t tstamp = 0;
-
- /* Check to see if we can seek before we parse any data. */
- if (nestegg_track_seek(input->nestegg_ctx, input->video_track, 0)) {
- fprintf(stderr,
- "WARNING: Failed to guess framerate (no Cues), set to 30fps.\n");
- *fps_num = 30;
- *fps_den = 1;
- return 0;
- }
-
- /* Guess the framerate. Read up to 1 second, or 50 video packets,
- * whichever comes first.
- */
- for (i = 0; tstamp < 1000000000 && i < 50;) {
- nestegg_packet *pkt;
- unsigned int track;
-
- if (nestegg_read_packet(input->nestegg_ctx, &pkt) <= 0)
- break;
-
- nestegg_packet_track(pkt, &track);
- if (track == input->video_track) {
- nestegg_packet_tstamp(pkt, &tstamp);
- i++;
- }
-
- nestegg_free_packet(pkt);
- }
-
- if (nestegg_track_seek(input->nestegg_ctx, input->video_track, 0))
- goto fail;
-
- *fps_num = (i - 1) * 1000000;
- *fps_den = (unsigned int)(tstamp / 1000);
- return 0;
-fail:
- nestegg_destroy(input->nestegg_ctx);
- input->nestegg_ctx = NULL;
- rewind(input->infile);
- return 1;
-}
-
-
-static int
-file_is_webm(struct input_ctx *input,
- unsigned int *fourcc,
- unsigned int *width,
- unsigned int *height,
- unsigned int *fps_den,
- unsigned int *fps_num) {
- unsigned int i, n;
- int track_type = -1;
- int codec_id;
-
- nestegg_io io = {nestegg_read_cb, nestegg_seek_cb, nestegg_tell_cb, 0};
- nestegg_video_params params;
-
- io.userdata = input->infile;
- if (nestegg_init(&input->nestegg_ctx, io, NULL))
- goto fail;
-
- if (nestegg_track_count(input->nestegg_ctx, &n))
- goto fail;
-
- for (i = 0; i < n; i++) {
- track_type = nestegg_track_type(input->nestegg_ctx, i);
-
- if (track_type == NESTEGG_TRACK_VIDEO)
- break;
- else if (track_type < 0)
- goto fail;
- }
-
- codec_id = nestegg_track_codec_id(input->nestegg_ctx, i);
- if (codec_id == NESTEGG_CODEC_VP8) {
- *fourcc = VP8_FOURCC_MASK;
- } else if (codec_id == NESTEGG_CODEC_VP9) {
- *fourcc = VP9_FOURCC_MASK;
- } else {
- fprintf(stderr, "Not VPx video, quitting.\n");
- exit(1);
- }
-
- input->video_track = i;
-
- if (nestegg_track_video_params(input->nestegg_ctx, i, ¶ms))
- goto fail;
-
- *fps_den = 0;
- *fps_num = 0;
- *width = params.width;
- *height = params.height;
- return 1;
-fail:
- input->nestegg_ctx = NULL;
- rewind(input->infile);
- return 0;
-}
-
-
void show_progress(int frame_in, int frame_out, unsigned long dx_time) {
fprintf(stderr, "%d decoded frames/%d showed frames in %lu us (%.2f fps)\r",
frame_in, frame_out, dx_time,
(float)frame_out * 1000000.0 / (float)dx_time);
}
-
void generate_filename(const char *pattern, char *out, size_t q_len,
unsigned int d_w, unsigned int d_h,
unsigned int frame_in) {
@@ -663,18 +395,18 @@
int main_loop(int argc, const char **argv_) {
- vpx_codec_ctx_t decoder;
+ vpx_codec_ctx_t decoder;
char *fn = NULL;
int i;
uint8_t *buf = NULL;
- size_t buf_sz = 0, buf_alloc_sz = 0;
+ size_t bytes_in_buffer = 0, buffer_size = 0;
FILE *infile;
- int frame_in = 0, frame_out = 0, flipuv = 0, noblit = 0, do_md5 = 0, progress = 0;
+ int frame_in = 0, frame_out = 0, flipuv = 0, noblit = 0;
+ int do_md5 = 0, progress = 0;
int stop_after = 0, postproc = 0, summary = 0, quiet = 1;
int arg_skip = 0;
int ec_enabled = 0;
vpx_codec_iface_t *iface = NULL;
- unsigned int fourcc;
unsigned long dx_time = 0;
struct arg arg;
char **argv, **argi, **argj;
@@ -682,10 +414,6 @@
char outfile[PATH_MAX];
int single_file;
int use_y4m = 1;
- unsigned int width;
- unsigned int height;
- unsigned int fps_den;
- unsigned int fps_num;
void *out = NULL;
vpx_codec_dec_cfg_t cfg = {0};
#if CONFIG_VP8_DECODER
@@ -695,7 +423,6 @@
int vp8_dbg_color_b_modes = 0;
int vp8_dbg_display_mv = 0;
#endif
- struct input_ctx input = {0};
int frames_corrupted = 0;
int dec_flags = 0;
int do_scale = 0;
@@ -703,6 +430,12 @@
vpx_image_t *scaled_img = NULL;
int frame_avail, got_data;
+ struct VpxDecInputContext input = {0};
+ struct VpxInputContext vpx_input_ctx = {0};
+ struct WebmInputContext webm_ctx = {0};
+ input.vpx_input_ctx = &vpx_input_ctx;
+ input.webm_ctx = &webm_ctx;
+
/* Parse command line */
exec_name = argv_[0];
argv = argv_dup(argc - 1, argv_ + 1);
@@ -840,14 +573,13 @@
return EXIT_FAILURE;
}
#endif
- input.infile = infile;
- if (file_is_ivf(infile, &fourcc, &width, &height, &fps_den,
- &fps_num))
- input.kind = IVF_FILE;
- else if (file_is_webm(&input, &fourcc, &width, &height, &fps_den, &fps_num))
- input.kind = WEBM_FILE;
- else if (file_is_raw(infile, &fourcc, &width, &height, &fps_den, &fps_num))
- input.kind = RAW_FILE;
+ input.vpx_input_ctx->file = infile;
+ if (file_is_ivf(input.vpx_input_ctx))
+ input.vpx_input_ctx->file_type = FILE_TYPE_IVF;
+ else if (file_is_webm(input.webm_ctx, input.vpx_input_ctx))
+ input.vpx_input_ctx->file_type = FILE_TYPE_WEBM;
+ else if (file_is_raw(input.vpx_input_ctx))
+ input.vpx_input_ctx->file_type = FILE_TYPE_RAW;
else {
fprintf(stderr, "Unrecognized input file type.\n");
return EXIT_FAILURE;
@@ -874,7 +606,7 @@
if (single_file && !noblit) {
generate_filename(outfile_pattern, outfile, sizeof(outfile) - 1,
- width, height, 0);
+ vpx_input_ctx.width, vpx_input_ctx.height, 0);
out = out_open(outfile, do_md5);
}
@@ -887,8 +619,8 @@
return EXIT_FAILURE;
}
- if (input.kind == WEBM_FILE)
- if (webm_guess_framerate(&input, &fps_den, &fps_num)) {
+ if (vpx_input_ctx.file_type == FILE_TYPE_WEBM)
+ if (webm_guess_framerate(input.webm_ctx, input.vpx_input_ctx)) {
fprintf(stderr, "Failed to guess framerate -- error parsing "
"webm file?\n");
return EXIT_FAILURE;
@@ -899,21 +631,23 @@
store one, and neither does VP8.
That will have to wait until these tools support WebM natively.*/
snprintf(buffer, sizeof(buffer), "YUV4MPEG2 W%u H%u F%u:%u I%c ",
- width, height, fps_num, fps_den, 'p');
+ vpx_input_ctx.width, vpx_input_ctx.height,
+ vpx_input_ctx.framerate.numerator,
+ vpx_input_ctx.framerate.denominator,
+ 'p');
out_put(out, (unsigned char *)buffer,
(unsigned int)strlen(buffer), do_md5);
}
/* Try to determine the codec from the fourcc. */
for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
- if ((fourcc & ifaces[i].fourcc_mask) == ifaces[i].fourcc) {
- vpx_codec_iface_t *ivf_iface = ifaces[i].iface();
+ if ((vpx_input_ctx.fourcc & ifaces[i].fourcc_mask) == ifaces[i].fourcc) {
+ vpx_codec_iface_t *vpx_iface = ifaces[i].iface();
- if (iface && iface != ivf_iface)
- fprintf(stderr, "Notice -- IVF header indicates codec: %s\n",
- ifaces[i].name);
+ if (iface && iface != vpx_iface)
+ warn("Header indicates codec: %s\n", ifaces[i].name);
else
- iface = ivf_iface;
+ iface = vpx_iface;
break;
}
@@ -963,10 +697,10 @@
#endif
- if(arg_skip)
+ if (arg_skip)
fprintf(stderr, "Skiping first %d frames.\n", arg_skip);
while (arg_skip) {
- if (read_frame(&input, &buf, &buf_sz, &buf_alloc_sz))
+ if (read_frame(&input, &buf, &bytes_in_buffer, &buffer_size))
break;
arg_skip--;
}
@@ -983,19 +717,19 @@
frame_avail = 0;
if (!stop_after || frame_in < stop_after) {
- if(!read_frame(&input, &buf, &buf_sz, &buf_alloc_sz)) {
+ if (!read_frame(&input, &buf, &bytes_in_buffer, &buffer_size)) {
frame_avail = 1;
frame_in++;
vpx_usec_timer_start(&timer);
- if (vpx_codec_decode(&decoder, buf, (unsigned int)buf_sz, NULL, 0)) {
+ if (vpx_codec_decode(&decoder, buf, bytes_in_buffer, NULL, 0)) {
const char *detail = vpx_codec_error_detail(&decoder);
- fprintf(stderr, "Failed to decode frame: %s\n",
- vpx_codec_error(&decoder));
+ warn("Failed to decode frame %d: %s",
+ frame_in, vpx_codec_error(&decoder));
if (detail)
- fprintf(stderr, " Additional information: %s\n", detail);
+ warn("Additional information: %s", detail);
goto fail;
}
@@ -1016,8 +750,7 @@
dx_time += (unsigned int)vpx_usec_timer_elapsed(&timer);
if (vpx_codec_control(&decoder, VP8D_GET_FRAME_CORRUPTED, &corrupted)) {
- fprintf(stderr, "Failed VP8_GET_FRAME_CORRUPTED: %s\n",
- vpx_codec_error(&decoder));
+ warn("Failed VP8_GET_FRAME_CORRUPTED: %s", vpx_codec_error(&decoder));
goto fail;
}
frames_corrupted += corrupted;
@@ -1131,10 +864,11 @@
if (single_file && !noblit)
out_close(out, outfile, do_md5);
- if (input.nestegg_ctx)
- nestegg_destroy(input.nestegg_ctx);
- if (input.kind != WEBM_FILE)
+ if (input.vpx_input_ctx->file_type == FILE_TYPE_WEBM)
+ webm_free(input.webm_ctx);
+ else
free(buf);
+
fclose(infile);
free(argv);
diff --git a/vpxenc.c b/vpxenc.c
index 674da14..2d92ae8 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -10,32 +10,23 @@
#include "./vpx_config.h"
-#if defined(_WIN32) || defined(__OS2__) || !CONFIG_OS_SUPPORT
-#define USE_POSIX_MMAP 0
-#else
-#define USE_POSIX_MMAP 1
-#endif
-
+#include <assert.h>
+#include <limits.h>
#include <math.h>
+#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
-#include <stdarg.h>
#include <string.h>
-#include <limits.h>
-#include <assert.h>
+
#include "vpx/vpx_encoder.h"
#if CONFIG_DECODERS
#include "vpx/vpx_decoder.h"
#endif
-#if USE_POSIX_MMAP
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-#include <unistd.h>
-#endif
#include "third_party/libyuv/include/libyuv/scale.h"
+#include "./args.h"
+#include "./ivfdec.h"
+#include "./ivfenc.h"
#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
#include "vpx/vp8cx.h"
@@ -118,199 +109,28 @@
va_end(ap);
}
-enum video_file_type {
- FILE_TYPE_RAW,
- FILE_TYPE_IVF,
- FILE_TYPE_Y4M
-};
-
-struct detect_buffer {
- char buf[4];
- size_t buf_read;
- size_t position;
-};
-
-
-struct input_state {
- char *fn;
- FILE *file;
- off_t length;
- y4m_input y4m;
- struct detect_buffer detect;
- enum video_file_type file_type;
- unsigned int w;
- unsigned int h;
- struct vpx_rational framerate;
- int use_i420;
- int only_i420;
-};
-
-#define IVF_FRAME_HDR_SZ (4+8) /* 4 byte size + 8 byte timestamp */
-static int read_frame(struct input_state *input, vpx_image_t *img) {
- FILE *f = input->file;
- enum video_file_type file_type = input->file_type;
- y4m_input *y4m = &input->y4m;
- struct detect_buffer *detect = &input->detect;
- int plane = 0;
+int read_frame(struct VpxInputContext *input_ctx, vpx_image_t *img) {
+ FILE *f = input_ctx->file;
+ y4m_input *y4m = &input_ctx->y4m;
int shortread = 0;
- if (file_type == FILE_TYPE_Y4M) {
+ if (input_ctx->file_type == FILE_TYPE_Y4M) {
if (y4m_input_fetch_frame(y4m, f, img) < 1)
return 0;
} else {
- if (file_type == FILE_TYPE_IVF) {
- char junk[IVF_FRAME_HDR_SZ];
-
- /* Skip the frame header. We know how big the frame should be. See
- * write_ivf_frame_header() for documentation on the frame header
- * layout.
- */
- (void) fread(junk, 1, IVF_FRAME_HDR_SZ, f);
- }
-
- for (plane = 0; plane < 3; plane++) {
- unsigned char *ptr;
- int w = (plane ? (1 + img->d_w) / 2 : img->d_w);
- int h = (plane ? (1 + img->d_h) / 2 : img->d_h);
- int r;
-
- /* Determine the correct plane based on the image format. The for-loop
- * always counts in Y,U,V order, but this may not match the order of
- * the data on disk.
- */
- switch (plane) {
- case 1:
- ptr = img->planes[img->fmt == VPX_IMG_FMT_YV12 ? VPX_PLANE_V : VPX_PLANE_U];
- break;
- case 2:
- ptr = img->planes[img->fmt == VPX_IMG_FMT_YV12 ? VPX_PLANE_U : VPX_PLANE_V];
- break;
- default:
- ptr = img->planes[plane];
- }
-
- for (r = 0; r < h; r++) {
- size_t needed = w;
- size_t buf_position = 0;
- const size_t left = detect->buf_read - detect->position;
- if (left > 0) {
- const size_t more = (left < needed) ? left : needed;
- memcpy(ptr, detect->buf + detect->position, more);
- buf_position = more;
- needed -= more;
- detect->position += more;
- }
- if (needed > 0) {
- shortread |= (fread(ptr + buf_position, 1, needed, f) < needed);
- }
-
- ptr += img->stride[plane];
- }
- }
+ shortread = read_yuv_frame(input_ctx, img);
}
return !shortread;
}
-
-unsigned int file_is_y4m(FILE *infile,
- y4m_input *y4m,
- char detect[4]) {
+int file_is_y4m(FILE *infile, y4m_input *y4m, const char detect[4]) {
if (memcmp(detect, "YUV4", 4) == 0) {
return 1;
}
return 0;
}
-#define IVF_FILE_HDR_SZ (32)
-unsigned int file_is_ivf(struct input_state *input,
- unsigned int *fourcc) {
- char raw_hdr[IVF_FILE_HDR_SZ];
- int is_ivf = 0;
- FILE *infile = input->file;
- unsigned int *width = &input->w;
- unsigned int *height = &input->h;
- struct detect_buffer *detect = &input->detect;
-
- if (memcmp(detect->buf, "DKIF", 4) != 0)
- return 0;
-
- /* See write_ivf_file_header() for more documentation on the file header
- * layout.
- */
- if (fread(raw_hdr + 4, 1, IVF_FILE_HDR_SZ - 4, infile)
- == IVF_FILE_HDR_SZ - 4) {
- {
- is_ivf = 1;
-
- if (mem_get_le16(raw_hdr + 4) != 0)
- warn("Unrecognized IVF version! This file may not decode "
- "properly.");
-
- *fourcc = mem_get_le32(raw_hdr + 8);
- }
- }
-
- if (is_ivf) {
- *width = mem_get_le16(raw_hdr + 12);
- *height = mem_get_le16(raw_hdr + 14);
- detect->position = 4;
- }
-
- return is_ivf;
-}
-
-
-static void write_ivf_file_header(FILE *outfile,
- const vpx_codec_enc_cfg_t *cfg,
- unsigned int fourcc,
- int frame_cnt) {
- char header[32];
-
- if (cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS)
- return;
-
- header[0] = 'D';
- header[1] = 'K';
- header[2] = 'I';
- header[3] = 'F';
- mem_put_le16(header + 4, 0); /* version */
- mem_put_le16(header + 6, 32); /* headersize */
- mem_put_le32(header + 8, fourcc); /* headersize */
- mem_put_le16(header + 12, cfg->g_w); /* width */
- mem_put_le16(header + 14, cfg->g_h); /* height */
- mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */
- mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */
- mem_put_le32(header + 24, frame_cnt); /* length */
- mem_put_le32(header + 28, 0); /* unused */
-
- (void) fwrite(header, 1, 32, outfile);
-}
-
-
-static void write_ivf_frame_header(FILE *outfile,
- const vpx_codec_cx_pkt_t *pkt) {
- char header[12];
- vpx_codec_pts_t pts;
-
- if (pkt->kind != VPX_CODEC_CX_FRAME_PKT)
- return;
-
- pts = pkt->data.frame.pts;
- mem_put_le32(header, (int)pkt->data.frame.sz);
- mem_put_le32(header + 4, pts & 0xFFFFFFFF);
- mem_put_le32(header + 8, pts >> 32);
-
- (void) fwrite(header, 1, 12, outfile);
-}
-
-static void write_ivf_frame_size(FILE *outfile, size_t size) {
- char header[4];
- mem_put_le32(header, (int)size);
- (void) fwrite(header, 1, 4, outfile);
-}
-
-
/* Murmur hash derived from public domain reference implementation at
* http:// sites.google.com/site/murmurhash/
@@ -360,7 +180,6 @@
}
-#include "args.h"
static const arg_def_t debugmode = ARG_DEF("D", "debug", 0,
"Debug mode (makes output deterministic)");
static const arg_def_t outputfile = ARG_DEF("o", "output", 1,
@@ -817,9 +636,9 @@
};
-static void init_rate_histogram(struct rate_hist *hist,
+static void init_rate_histogram(struct rate_hist *hist,
const vpx_codec_enc_cfg_t *cfg,
- const vpx_rational_t *fps) {
+ const vpx_rational_t *fps) {
int i;
/* Determine the number of samples in the buffer. Use the file's framerate
@@ -1215,12 +1034,10 @@
}
-void open_input_file(struct input_state *input) {
- unsigned int fourcc;
-
+void open_input_file(struct VpxInputContext *input) {
/* Parse certain options from the input file, if possible */
- input->file = strcmp(input->fn, "-") ? fopen(input->fn, "rb")
- : set_binary_mode(stdin);
+ input->file = strcmp(input->filename, "-")
+ ? fopen(input->filename, "rb") : set_binary_mode(stdin);
if (!input->file)
fatal("Failed to open input file");
@@ -1244,14 +1061,14 @@
if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4,
input->only_i420) >= 0) {
input->file_type = FILE_TYPE_Y4M;
- input->w = input->y4m.pic_w;
- input->h = input->y4m.pic_h;
- input->framerate.num = input->y4m.fps_n;
- input->framerate.den = input->y4m.fps_d;
+ input->width = input->y4m.pic_w;
+ input->height = input->y4m.pic_h;
+ input->framerate.numerator = input->y4m.fps_n;
+ input->framerate.denominator = input->y4m.fps_d;
input->use_i420 = 0;
} else
fatal("Unsupported Y4M stream.");
- } else if (input->detect.buf_read == 4 && file_is_ivf(input, &fourcc)) {
+ } else if (input->detect.buf_read == 4 && file_is_ivf(input)) {
fatal("IVF is not supported as input.");
} else {
input->file_type = FILE_TYPE_RAW;
@@ -1259,7 +1076,7 @@
}
-static void close_input_file(struct input_state *input) {
+static void close_input_file(struct VpxInputContext *input) {
fclose(input->file);
if (input->file_type == FILE_TYPE_Y4M)
y4m_input_close(&input->y4m);
@@ -1534,7 +1351,7 @@
static void show_stream_config(struct stream_state *stream,
struct global_config *global,
- struct input_state *input) {
+ struct VpxInputContext *input) {
#define SHOW(field) \
fprintf(stderr, " %-28s = %d\n", #field, stream->config.cfg.field)
@@ -1542,7 +1359,7 @@
if (stream->index == 0) {
fprintf(stderr, "Codec: %s\n",
vpx_codec_iface_name(global->codec->iface()));
- fprintf(stderr, "Source file: %s Format: %s\n", input->fn,
+ fprintf(stderr, "Source file: %s Format: %s\n", input->filename,
input->use_i420 ? "I420" : "YV12");
}
if (stream->next || stream->index)
@@ -1601,7 +1418,7 @@
stream->config.stereo_fmt,
global->codec->fourcc);
} else
- write_ivf_file_header(stream->file, &stream->config.cfg,
+ ivf_write_file_header(stream->file, &stream->config.cfg,
global->codec->fourcc, 0);
}
@@ -1614,7 +1431,7 @@
stream->ebml.cue_list = NULL;
} else {
if (!fseek(stream->file, 0, SEEK_SET))
- write_ivf_file_header(stream->file, &stream->config.cfg,
+ ivf_write_file_header(stream->file, &stream->config.cfg,
fourcc,
stream->frames_out);
}
@@ -1774,14 +1591,14 @@
ivf_header_pos = ftello(stream->file);
fsize = pkt->data.frame.sz;
- write_ivf_frame_header(stream->file, pkt);
+ ivf_write_frame_header(stream->file, pkt);
} else {
fsize += pkt->data.frame.sz;
if (!(pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT)) {
off_t currpos = ftello(stream->file);
fseeko(stream->file, ivf_header_pos, SEEK_SET);
- write_ivf_frame_size(stream->file, fsize);
+ ivf_write_frame_size(stream->file, fsize);
fseeko(stream->file, currpos, SEEK_SET);
}
}
@@ -1939,8 +1756,8 @@
vpx_image_t raw;
int frame_avail, got_data;
- struct input_state input = {0};
- struct global_config global;
+ struct VpxInputContext input = {0};
+ struct global_config global;
struct stream_state *streams = NULL;
char **argv, **argi;
uint64_t cx_time = 0;
@@ -1953,8 +1770,8 @@
usage_exit();
/* Setup default input stream settings */
- input.framerate.num = 30;
- input.framerate.den = 1;
+ input.framerate.numerator = 30;
+ input.framerate.denominator = 1;
input.use_i420 = 1;
input.only_i420 = 1;
@@ -1986,9 +1803,9 @@
die("Error: Unrecognized option %s\n", *argi);
/* Handle non-option arguments */
- input.fn = argv[0];
+ input.filename = argv[0];
- if (!input.fn)
+ if (!input.filename)
usage_exit();
#if CONFIG_NON420
@@ -2008,20 +1825,20 @@
/* If the input file doesn't specify its w/h (raw files), try to get
* the data from the first stream's configuration.
*/
- if (!input.w || !input.h)
+ if (!input.width || !input.height)
FOREACH_STREAM( {
if (stream->config.cfg.g_w && stream->config.cfg.g_h) {
- input.w = stream->config.cfg.g_w;
- input.h = stream->config.cfg.g_h;
+ input.width = stream->config.cfg.g_w;
+ input.height = stream->config.cfg.g_h;
break;
}
});
/* Update stream configurations from the input file's parameters */
- if (!input.w || !input.h)
+ if (!input.width || !input.height)
fatal("Specify stream dimensions with --width (-w) "
" and --height (-h)");
- FOREACH_STREAM(set_stream_dimensions(stream, input.w, input.h));
+ FOREACH_STREAM(set_stream_dimensions(stream, input.width, input.height));
FOREACH_STREAM(validate_stream_config(stream));
/* Ensure that --passes and --pass are consistent. If --pass is set and
@@ -2037,8 +1854,10 @@
/* Use the frame rate from the file only if none was specified
* on the command-line.
*/
- if (!global.have_framerate)
- global.framerate = input.framerate;
+ if (!global.have_framerate) {
+ global.framerate.num = input.framerate.numerator;
+ global.framerate.den = input.framerate.denominator;
+ }
FOREACH_STREAM(set_default_kf_interval(stream, &global));
@@ -2056,7 +1875,7 @@
vpx_img_alloc(&raw,
input.use_i420 ? VPX_IMG_FMT_I420
: VPX_IMG_FMT_YV12,
- input.w, input.h, 32);
+ input.width, input.height, 32);
FOREACH_STREAM(init_rate_histogram(&stream->rate_hist,
&stream->config.cfg,
diff --git a/webmdec.c b/webmdec.c
new file mode 100644
index 0000000..4bf7c7e
--- /dev/null
+++ b/webmdec.c
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./webmdec.h"
+
+#include <stdarg.h>
+
+#include "nestegg/include/nestegg/nestegg.h"
+
+static int nestegg_read_cb(void *buffer, size_t length, void *userdata) {
+ FILE *f = userdata;
+
+ if (fread(buffer, 1, length, f) < length) {
+ if (ferror(f))
+ return -1;
+ if (feof(f))
+ return 0;
+ }
+ return 1;
+}
+
+static int nestegg_seek_cb(int64_t offset, int whence, void *userdata) {
+ switch (whence) {
+ case NESTEGG_SEEK_SET:
+ whence = SEEK_SET;
+ break;
+ case NESTEGG_SEEK_CUR:
+ whence = SEEK_CUR;
+ break;
+ case NESTEGG_SEEK_END:
+ whence = SEEK_END;
+ break;
+ };
+ return fseek(userdata, (int32_t)offset, whence) ? -1 : 0;
+}
+
+static int64_t nestegg_tell_cb(void *userdata) {
+ return ftell(userdata);
+}
+
+static void nestegg_log_cb(nestegg *context,
+ unsigned int severity,
+ char const *format, ...) {
+ va_list ap;
+ va_start(ap, format);
+ vfprintf(stderr, format, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
+}
+
+int file_is_webm(struct WebmInputContext *webm_ctx,
+ struct VpxInputContext *vpx_ctx) {
+ uint32_t i, n;
+ int track_type = -1;
+ int codec_id;
+
+ nestegg_io io = {nestegg_read_cb, nestegg_seek_cb, nestegg_tell_cb, 0};
+ nestegg_video_params params;
+
+ io.userdata = vpx_ctx->file;
+ if (nestegg_init(&webm_ctx->nestegg_ctx, io, NULL))
+ goto fail;
+
+ if (nestegg_track_count(webm_ctx->nestegg_ctx, &n))
+ goto fail;
+
+ for (i = 0; i < n; i++) {
+ track_type = nestegg_track_type(webm_ctx->nestegg_ctx, i);
+
+ if (track_type == NESTEGG_TRACK_VIDEO)
+ break;
+ else if (track_type < 0)
+ goto fail;
+ }
+
+ codec_id = nestegg_track_codec_id(webm_ctx->nestegg_ctx, i);
+ if (codec_id == NESTEGG_CODEC_VP8) {
+ vpx_ctx->fourcc = VP8_FOURCC_MASK;
+ } else if (codec_id == NESTEGG_CODEC_VP9) {
+ vpx_ctx->fourcc = VP9_FOURCC_MASK;
+ } else {
+ fatal("Not VPx video, quitting.\n");
+ }
+
+ webm_ctx->video_track = i;
+
+ if (nestegg_track_video_params(webm_ctx->nestegg_ctx, i, ¶ms))
+ goto fail;
+
+ vpx_ctx->framerate.denominator = 0;
+ vpx_ctx->framerate.numerator = 0;
+ vpx_ctx->width = params.width;
+ vpx_ctx->height = params.height;
+
+ return 1;
+
+ fail:
+ webm_ctx->nestegg_ctx = NULL;
+ rewind(vpx_ctx->file);
+
+ return 0;
+}
+
+int webm_read_frame(struct WebmInputContext *webm_ctx,
+ uint8_t **buffer,
+ size_t *bytes_in_buffer,
+ size_t *buffer_size) {
+ if (webm_ctx->chunk >= webm_ctx->chunks) {
+ uint32_t track;
+
+ do {
+ /* End of this packet, get another. */
+ if (webm_ctx->pkt)
+ nestegg_free_packet(webm_ctx->pkt);
+
+ if (nestegg_read_packet(webm_ctx->nestegg_ctx, &webm_ctx->pkt) <= 0 ||
+ nestegg_packet_track(webm_ctx->pkt, &track)) {
+ return 1;
+ }
+ } while (track != webm_ctx->video_track);
+
+ if (nestegg_packet_count(webm_ctx->pkt, &webm_ctx->chunks))
+ return 1;
+
+ webm_ctx->chunk = 0;
+ }
+
+ if (nestegg_packet_data(webm_ctx->pkt, webm_ctx->chunk,
+ buffer, bytes_in_buffer)) {
+ return 1;
+ }
+
+ webm_ctx->chunk++;
+ return 0;
+}
+
+int webm_guess_framerate(struct WebmInputContext *webm_ctx,
+ struct VpxInputContext *vpx_ctx) {
+ uint32_t i;
+ uint64_t tstamp = 0;
+
+ /* Check to see if we can seek before we parse any data. */
+ if (nestegg_track_seek(webm_ctx->nestegg_ctx, webm_ctx->video_track, 0)) {
+ warn("Failed to guess framerate (no Cues), set to 30fps.\n");
+ vpx_ctx->framerate.numerator = 30;
+ vpx_ctx->framerate.denominator = 1;
+ return 0;
+ }
+
+ /* Guess the framerate. Read up to 1 second, or 50 video packets,
+ * whichever comes first.
+ */
+ for (i = 0; tstamp < 1000000000 && i < 50;) {
+ nestegg_packet *pkt;
+ uint32_t track;
+
+ if (nestegg_read_packet(webm_ctx->nestegg_ctx, &pkt) <= 0)
+ break;
+
+ nestegg_packet_track(pkt, &track);
+ if (track == webm_ctx->video_track) {
+ nestegg_packet_tstamp(pkt, &tstamp);
+ ++i;
+ }
+
+ nestegg_free_packet(pkt);
+ }
+
+ if (nestegg_track_seek(webm_ctx->nestegg_ctx, webm_ctx->video_track, 0))
+ goto fail;
+
+ vpx_ctx->framerate.numerator = (i - 1) * 1000000;
+ vpx_ctx->framerate.denominator = (int)(tstamp / 1000);
+ return 0;
+
+ fail:
+ nestegg_destroy(webm_ctx->nestegg_ctx);
+ webm_ctx->nestegg_ctx = NULL;
+ rewind(vpx_ctx->file);
+ return 1;
+}
+
+void webm_free(struct WebmInputContext *webm_ctx) {
+ if (webm_ctx && webm_ctx->nestegg_ctx)
+ nestegg_destroy(webm_ctx->nestegg_ctx);
+}
diff --git a/webmdec.h b/webmdec.h
new file mode 100644
index 0000000..002fbe6
--- /dev/null
+++ b/webmdec.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef WEBMDEC_H_
+#define WEBMDEC_H_
+
+#include "./tools_common.h"
+
+struct nestegg;
+struct nestegg_packet;
+struct VpxInputContext;
+
+struct WebmInputContext {
+ uint32_t chunk;
+ uint32_t chunks;
+ uint32_t video_track;
+ struct nestegg *nestegg_ctx;
+ struct nestegg_packet *pkt;
+};
+
+int file_is_webm(struct WebmInputContext *webm_ctx,
+ struct VpxInputContext *vpx_ctx);
+
+int webm_read_frame(struct WebmInputContext *webm_ctx,
+ uint8_t **buffer,
+ size_t *bytes_in_buffer,
+ size_t *buffer_size);
+
+int webm_guess_framerate(struct WebmInputContext *webm_ctx,
+ struct VpxInputContext *vpx_ctx);
+
+void webm_free(struct WebmInputContext *webm_ctx);
+
+#endif // WEBMDEC_H_