Merge "Fixing constant value used to calculate frame pts and duration."
diff --git a/README b/README
index ce9c1c6..6d7d5ec 100644
--- a/README
+++ b/README
@@ -12,22 +12,20 @@
* All x86 targets require the Yasm[1] assembler be installed.
* All Windows builds require that Cygwin[2] be installed.
- * Building the documentation requires PHP[3] and Doxygen[4]. If you do not
- have these packages, you must pass --disable-install-docs to the
- configure script.
- * Downloading the data for the unit tests requires curl[5] and sha1sum.
+ * Building the documentation requires Doxygen[3]. If you do not
+ have this package, the install-docs option will be disabled.
+ * Downloading the data for the unit tests requires curl[4] and sha1sum.
sha1sum is provided via the GNU coreutils, installed by default on
many *nix platforms, as well as MinGW and Cygwin. If coreutils is not
available, a compatible version of sha1sum can be built from
- source[6]. These requirements are optional if not running the unit
+ source[5]. These requirements are optional if not running the unit
tests.
[1]: http://www.tortall.net/projects/yasm
[2]: http://www.cygwin.com
- [3]: http://php.net
- [4]: http://www.doxygen.org
- [5]: http://curl.haxx.se
- [6]: http://www.microbrew.org/tools/md5sha1sum/
+ [3]: http://www.doxygen.org
+ [4]: http://curl.haxx.se
+ [5]: http://www.microbrew.org/tools/md5sha1sum/
2. Out-of-tree builds
Out of tree builds are a supported method of building the application. For
diff --git a/build/make/Android.mk b/build/make/Android.mk
index 48a0dd7..826ff2f 100644
--- a/build/make/Android.mk
+++ b/build/make/Android.mk
@@ -53,12 +53,23 @@
ASM_CNV_PATH_LOCAL := $(TARGET_ARCH_ABI)/ads2gas
ASM_CNV_PATH := $(LOCAL_PATH)/$(ASM_CNV_PATH_LOCAL)
-# Makefiles created by the libvpx configure process
-# This will need to be fixed to handle x86.
+# Use the makefiles generated by upstream configure to determine which files to
+# build. Also set any architecture-specific flags.
ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
include $(CONFIG_DIR)libs-armv7-android-gcc.mk
-else
+ LOCAL_ARM_MODE := arm
+else ifeq ($(TARGET_ARCH_ABI),armeabi)
include $(CONFIG_DIR)libs-armv5te-android-gcc.mk
+ LOCAL_ARM_MODE := arm
+else ifeq ($(TARGET_ARCH_ABI),arm64-v8a)
+ include $(CONFIG_DIR)libs-armv8-android-gcc.mk
+ LOCAL_ARM_MODE := arm
+else ifeq ($(TARGET_ARCH_ABI),x86)
+ include $(CONFIG_DIR)libs-x86-android-gcc.mk
+else ifeq ($(TARGET_ARCH_ABI),mips)
+ include $(CONFIG_DIR)libs-mips-android-gcc.mk
+else
+ $(error Not a supported TARGET_ARCH_ABI: $(TARGET_ARCH_ABI))
endif
# Rule that is normally in Makefile created by libvpx
@@ -72,10 +83,13 @@
# Include the list of files to be built
include $(LIBVPX_PATH)/libs.mk
-# Want arm, not thumb, optimized
-LOCAL_ARM_MODE := arm
+# Optimise the code. May want to revisit this setting in the future.
LOCAL_CFLAGS := -O3
+# For x86, include the source code in the search path so it will find files
+# like x86inc.asm and x86_abi_support.asm
+LOCAL_ASMFLAGS := -I$(LIBVPX_PATH)
+
# -----------------------------------------------------------------------------
# Template : asm_offsets_template
# Arguments : 1: assembly offsets file to be created
@@ -109,13 +123,13 @@
@grep $(OFFSET_PATTERN) $$< | tr -d '\#' | $(CONFIG_DIR)$(ASM_CONVERSION) > $$@
endef
-# Use ads2gas script to convert from RVCT format to GAS format. This passes
+# Use ads2gas script to convert from RVCT format to GAS format. This
# puts the processed file under $(ASM_CNV_PATH). Local clean rule
# to handle removing these
ifeq ($(CONFIG_VP8_ENCODER), yes)
ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vp8_asm_enc_offsets.asm
endif
-ifeq ($(HAVE_NEON), yes)
+ifeq ($(HAVE_NEON_ASM), yes)
ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vpx_scale_asm_offsets.asm
endif
@@ -142,22 +156,34 @@
LOCAL_CODEC_SRCS_C = $(filter-out vpx_config.c %_neon.c, $(CODEC_SRCS_C))
LOCAL_SRC_FILES += $(foreach file, $(LOCAL_CODEC_SRCS_C), libvpx/$(file))
-LOCAL_SRC_FILES += $(foreach file, $(LOCAL_NEON_SRCS_C), libvpx/$(file).neon)
+ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
+ LOCAL_SRC_FILES += $(foreach file, $(LOCAL_NEON_SRCS_C), libvpx/$(file).neon)
+else # If there are neon sources then we are building for arm64 and do not need to specify .neon
+ LOCAL_SRC_FILES += $(foreach file, $(LOCAL_NEON_SRCS_C), libvpx/$(file))
+endif
# Pull out assembly files, splitting NEON from the rest. This is
# done to specify that the NEON assembly files use NEON assembler flags.
-CODEC_SRCS_ASM_ALL = $(filter %.asm.s, $(CODEC_SRCS_UNIQUE))
-CODEC_SRCS_ASM = $(foreach v, \
- $(CODEC_SRCS_ASM_ALL), \
- $(if $(findstring neon,$(v)),,$(v)))
+# x86 assembly matches %.asm, arm matches %.asm.s
+
+# x86:
+
+CODEC_SRCS_ASM_X86 = $(filter %.asm, $(CODEC_SRCS_UNIQUE))
+LOCAL_SRC_FILES += $(foreach file, $(CODEC_SRCS_ASM_X86), libvpx/$(file))
+
+# arm:
+CODEC_SRCS_ASM_ARM_ALL = $(filter %.asm.s, $(CODEC_SRCS_UNIQUE))
+CODEC_SRCS_ASM_ARM = $(foreach v, \
+ $(CODEC_SRCS_ASM_ARM_ALL), \
+ $(if $(findstring neon,$(v)),,$(v)))
CODEC_SRCS_ASM_ADS2GAS = $(patsubst %.s, \
$(ASM_CNV_PATH_LOCAL)/libvpx/%.s, \
- $(CODEC_SRCS_ASM))
+ $(CODEC_SRCS_ASM_ARM))
LOCAL_SRC_FILES += $(CODEC_SRCS_ASM_ADS2GAS)
ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
CODEC_SRCS_ASM_NEON = $(foreach v, \
- $(CODEC_SRCS_ASM_ALL),\
+ $(CODEC_SRCS_ASM_ARM_ALL),\
$(if $(findstring neon,$(v)),$(v),))
CODEC_SRCS_ASM_NEON_ADS2GAS = $(patsubst %.s, \
$(ASM_CNV_PATH_LOCAL)/libvpx/%.s, \
@@ -189,6 +215,10 @@
endif
$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_scale_rtcd.h
+ifeq ($(TARGET_ARCH_ABI),x86)
+$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_config.asm
+endif
+
.PHONY: clean
clean:
@echo "Clean: ads2gas files [$(TARGET_ARCH_ABI)]"
diff --git a/build/make/Makefile b/build/make/Makefile
index 03dacce..c4d53f1 100644
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -411,6 +411,7 @@
DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_proj.sh
DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_sln.sh
DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_vcxproj.sh
+ DIST-SRCS-$(CONFIG_MSVS) += build/make/msvs_common.sh
DIST-SRCS-$(CONFIG_MSVS) += build/x86-msvs/obj_int_extract.bat
DIST-SRCS-$(CONFIG_MSVS) += build/arm-msvs/obj_int_extract.bat
DIST-SRCS-$(CONFIG_RVCT) += build/make/armlink_adapter.sh
diff --git a/build/make/configure.sh b/build/make/configure.sh
index a65d395..a2a64c2 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -792,8 +792,12 @@
arm*)
# on arm, isa versions are supersets
case ${tgt_isa} in
+ armv8)
+ soft_enable neon
+ ;;
armv7)
soft_enable neon
+ soft_enable neon_asm
soft_enable media
soft_enable edsp
soft_enable fast_unaligned
@@ -859,6 +863,13 @@
msvs_arch_dir=arm-msvs
disable_feature multithread
disable_feature unit_tests
+ vs_version=${tgt_cc##vs}
+ if [ $vs_version -ge 12 ]; then
+ # MSVC 2013 doesn't allow doing plain .exe projects for ARM,
+ # only "AppContainerApplication" which requires an AppxManifest.
+ # Therefore disable the examples, just build the library.
+ disable_feature examples
+ fi
;;
rvct)
CC=armcc
diff --git a/build/make/gen_msvs_proj.sh b/build/make/gen_msvs_proj.sh
index d0cbf3e..4e803b8 100755
--- a/build/make/gen_msvs_proj.sh
+++ b/build/make/gen_msvs_proj.sh
@@ -9,17 +9,11 @@
## be found in the AUTHORS file in the root of the source tree.
##
-
self=$0
self_basename=${self##*/}
self_dirname=$(dirname "$0")
-EOL=$'\n'
-if [ "$(uname -o 2>/dev/null)" = "Cygwin" ] \
- && cygpath --help >/dev/null 2>&1; then
- FIXPATH='cygpath -m'
-else
- FIXPATH='echo'
-fi
+
+. "$self_dirname/msvs_common.sh"|| exit 127
show_help() {
cat <<EOF
@@ -49,86 +43,6 @@
exit 1
}
-die() {
- echo "${self_basename}: $@" >&2
- exit 1
-}
-
-die_unknown(){
- echo "Unknown option \"$1\"." >&2
- echo "See ${self_basename} --help for available options." >&2
- exit 1
-}
-
-fix_path() {
- $FIXPATH "$1"
-}
-
-generate_uuid() {
- local hex="0123456789ABCDEF"
- local i
- local uuid=""
- local j
- #93995380-89BD-4b04-88EB-625FBE52EBFB
- for ((i=0; i<32; i++)); do
- (( j = $RANDOM % 16 ))
- uuid="${uuid}${hex:$j:1}"
- done
- echo "${uuid:0:8}-${uuid:8:4}-${uuid:12:4}-${uuid:16:4}-${uuid:20:12}"
-}
-
-indent1=" "
-indent=""
-indent_push() {
- indent="${indent}${indent1}"
-}
-indent_pop() {
- indent="${indent%${indent1}}"
-}
-
-tag_attributes() {
- for opt in "$@"; do
- optval="${opt#*=}"
- [ -n "${optval}" ] ||
- die "Missing attribute value in '$opt' while generating $tag tag"
- echo "${indent}${opt%%=*}=\"${optval}\""
- done
-}
-
-open_tag() {
- local tag=$1
- shift
- if [ $# -ne 0 ]; then
- echo "${indent}<${tag}"
- indent_push
- tag_attributes "$@"
- echo "${indent}>"
- else
- echo "${indent}<${tag}>"
- indent_push
- fi
-}
-
-close_tag() {
- local tag=$1
- indent_pop
- echo "${indent}</${tag}>"
-}
-
-tag() {
- local tag=$1
- shift
- if [ $# -ne 0 ]; then
- echo "${indent}<${tag}"
- indent_push
- tag_attributes "$@"
- indent_pop
- echo "${indent}/>"
- else
- echo "${indent}<${tag}/>"
- fi
-}
-
generate_filter() {
local var=$1
local name=$2
diff --git a/build/make/gen_msvs_vcxproj.sh b/build/make/gen_msvs_vcxproj.sh
index a64e129..8529eed 100755
--- a/build/make/gen_msvs_vcxproj.sh
+++ b/build/make/gen_msvs_vcxproj.sh
@@ -9,17 +9,11 @@
## be found in the AUTHORS file in the root of the source tree.
##
-
self=$0
self_basename=${self##*/}
self_dirname=$(dirname "$0")
-EOL=$'\n'
-if [ "$(uname -o 2>/dev/null)" = "Cygwin" ] \
- && cygpath --help >/dev/null 2>&1; then
- FIXPATH='cygpath -m'
-else
- FIXPATH='echo'
-fi
+
+. "$self_dirname/msvs_common.sh"|| exit 127
show_help() {
cat <<EOF
@@ -50,86 +44,6 @@
exit 1
}
-die() {
- echo "${self_basename}: $@" >&2
- exit 1
-}
-
-die_unknown(){
- echo "Unknown option \"$1\"." >&2
- echo "See ${self_basename} --help for available options." >&2
- exit 1
-}
-
-fix_path() {
- $FIXPATH "$1"
-}
-
-generate_uuid() {
- local hex="0123456789ABCDEF"
- local i
- local uuid=""
- local j
- #93995380-89BD-4b04-88EB-625FBE52EBFB
- for ((i=0; i<32; i++)); do
- (( j = $RANDOM % 16 ))
- uuid="${uuid}${hex:$j:1}"
- done
- echo "${uuid:0:8}-${uuid:8:4}-${uuid:12:4}-${uuid:16:4}-${uuid:20:12}"
-}
-
-indent1=" "
-indent=""
-indent_push() {
- indent="${indent}${indent1}"
-}
-indent_pop() {
- indent="${indent%${indent1}}"
-}
-
-tag_attributes() {
- for opt in "$@"; do
- optval="${opt#*=}"
- [ -n "${optval}" ] ||
- die "Missing attribute value in '$opt' while generating $tag tag"
- echo "${indent}${opt%%=*}=\"${optval}\""
- done
-}
-
-open_tag() {
- local tag=$1
- shift
- if [ $# -ne 0 ]; then
- echo "${indent}<${tag}"
- indent_push
- tag_attributes "$@"
- echo "${indent}>"
- else
- echo "${indent}<${tag}>"
- indent_push
- fi
-}
-
-close_tag() {
- local tag=$1
- indent_pop
- echo "${indent}</${tag}>"
-}
-
-tag() {
- local tag=$1
- shift
- if [ $# -ne 0 ]; then
- echo "${indent}<${tag}"
- indent_push
- tag_attributes "$@"
- indent_pop
- echo "${indent}/>"
- else
- echo "${indent}<${tag}/>"
- fi
-}
-
tag_content() {
local tag=$1
local content=$2
@@ -378,6 +292,18 @@
tag_content ProjectGuid "{${guid}}"
tag_content RootNamespace ${name}
tag_content Keyword ManagedCProj
+ if [ $vs_ver -ge 12 ] && [ "${platforms[0]}" = "ARM" ]; then
+ tag_content AppContainerApplication true
+ # The application type can be one of "Windows Store",
+ # "Windows Phone" or "Windows Phone Silverlight". The
+ # actual value doesn't matter from the libvpx point of view,
+ # since a static library built for one works on the others.
+ # The PlatformToolset field needs to be set in sync with this;
+ # for Windows Store and Windows Phone Silverlight it should be
+ # v120 while it should be v120_wp81 if the type is Windows Phone.
+ tag_content ApplicationType "Windows Store"
+ tag_content ApplicationTypeRevision 8.1
+ fi
close_tag PropertyGroup
tag Import \
@@ -410,18 +336,10 @@
fi
fi
if [ "$vs_ver" = "12" ]; then
- if [ "$plat" = "ARM" ]; then
- # Setting the wp80 toolchain automatically sets the
- # WINAPI_FAMILY define, which is required for building
- # code for arm with the windows headers. Alternatively,
- # one could add AppContainerApplication=true in the Globals
- # section and add PrecompiledHeader=NotUsing and
- # CompileAsWinRT=false in ClCompile and SubSystem=Console
- # in Link.
- tag_content PlatformToolset v120_wp80
- else
- tag_content PlatformToolset v120
- fi
+ # Setting a PlatformToolset indicating windows phone isn't
+ # enough to build code for arm with MSVC 2013, one strictly
+ # has to enable AppContainerApplication as well.
+ tag_content PlatformToolset v120
fi
tag_content CharacterSet Unicode
if [ "$config" = "Release" ]; then
@@ -513,15 +431,25 @@
if ${werror:-false}; then
tag_content TreatWarningAsError true
fi
+ if [ $vs_ver -ge 11 ]; then
+ # We need to override the defaults for these settings
+ # if AppContainerApplication is set.
+ tag_content CompileAsWinRT false
+ tag_content PrecompiledHeader NotUsing
+ tag_content SDLCheck false
+ fi
close_tag ClCompile
case "$proj_kind" in
exe)
open_tag Link
if [ "$name" != "obj_int_extract" ]; then
- tag_content AdditionalDependencies "$curlibs"
+ tag_content AdditionalDependencies "$curlibs;%(AdditionalDependencies)"
tag_content AdditionalLibraryDirectories "$libdirs;%(AdditionalLibraryDirectories)"
fi
tag_content GenerateDebugInformation true
+ # Console is the default normally, but if
+ # AppContainerApplication is set, we need to override it.
+ tag_content SubSystem Console
close_tag Link
;;
dll)
diff --git a/build/make/msvs_common.sh b/build/make/msvs_common.sh
new file mode 100644
index 0000000..eb2eb7b
--- /dev/null
+++ b/build/make/msvs_common.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+##
+## Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+
+if [ "$(uname -o 2>/dev/null)" = "Cygwin" ] \
+ && cygpath --help >/dev/null 2>&1; then
+ FIXPATH='cygpath -m'
+else
+ FIXPATH='echo'
+fi
+
+die() {
+ echo "${self_basename}: $@" >&2
+ exit 1
+}
+
+die_unknown(){
+ echo "Unknown option \"$1\"." >&2
+ echo "See ${self_basename} --help for available options." >&2
+ exit 1
+}
+
+fix_path() {
+ $FIXPATH "$1"
+}
+
+generate_uuid() {
+ local hex="0123456789ABCDEF"
+ local i
+ local uuid=""
+ local j
+ #93995380-89BD-4b04-88EB-625FBE52EBFB
+ for ((i=0; i<32; i++)); do
+ (( j = $RANDOM % 16 ))
+ uuid="${uuid}${hex:$j:1}"
+ done
+ echo "${uuid:0:8}-${uuid:8:4}-${uuid:12:4}-${uuid:16:4}-${uuid:20:12}"
+}
+
+indent1=" "
+indent=""
+indent_push() {
+ indent="${indent}${indent1}"
+}
+indent_pop() {
+ indent="${indent%${indent1}}"
+}
+
+tag_attributes() {
+ for opt in "$@"; do
+ optval="${opt#*=}"
+ [ -n "${optval}" ] ||
+ die "Missing attribute value in '$opt' while generating $tag tag"
+ echo "${indent}${opt%%=*}=\"${optval}\""
+ done
+}
+
+open_tag() {
+ local tag=$1
+ shift
+ if [ $# -ne 0 ]; then
+ echo "${indent}<${tag}"
+ indent_push
+ tag_attributes "$@"
+ echo "${indent}>"
+ else
+ echo "${indent}<${tag}>"
+ indent_push
+ fi
+}
+
+close_tag() {
+ local tag=$1
+ indent_pop
+ echo "${indent}</${tag}>"
+}
+
+tag() {
+ local tag=$1
+ shift
+ if [ $# -ne 0 ]; then
+ echo "${indent}<${tag}"
+ indent_push
+ tag_attributes "$@"
+ indent_pop
+ echo "${indent}/>"
+ else
+ echo "${indent}<${tag}/>"
+ fi
+}
+
diff --git a/build/make/rtcd.pl b/build/make/rtcd.pl
index 18ee80d..f5f59b1 100755
--- a/build/make/rtcd.pl
+++ b/build/make/rtcd.pl
@@ -272,6 +272,9 @@
# Assign the helper variable for each enabled extension
foreach my $opt (@ALL_ARCHS) {
my $opt_uc = uc $opt;
+ # Enable neon assembly based on HAVE_NEON logic instead of adding new
+ # HAVE_NEON_ASM logic
+ if ($opt eq 'neon_asm') { $opt_uc = 'NEON' }
eval "\$have_${opt}=\"flags & HAS_${opt_uc}\"";
}
@@ -381,7 +384,10 @@
@ALL_ARCHS = filter(qw/edsp media/);
arm;
} elsif ($opts{arch} eq 'armv7') {
- @ALL_ARCHS = filter(qw/edsp media neon/);
+ @ALL_ARCHS = filter(qw/edsp media neon_asm neon/);
+ arm;
+} elsif ($opts{arch} eq 'armv8') {
+ @ALL_ARCHS = filter(qw/neon/);
arm;
} else {
unoptimized;
diff --git a/configure b/configure
index 29683cd..bd95056 100755
--- a/configure
+++ b/configure
@@ -189,7 +189,7 @@
# install everything except the sources, by default. sources will have
# to be enabled when doing dist builds, since that's no longer a common
# case.
-enabled doxygen && php -v >/dev/null 2>&1 && enable_feature install_docs
+enabled doxygen && enable_feature install_docs
enable_feature install_bins
enable_feature install_libs
@@ -239,6 +239,7 @@
edsp
media
neon
+ neon_asm
mips32
dspr2
@@ -709,6 +710,7 @@
soft_enable webm_io
;;
*-android-*)
+ soft_enable webm_io
# GTestLog must be modified to use Android logging utilities.
;;
*-darwin-*)
diff --git a/docs.mk b/docs.mk
index 797b466..889d182 100644
--- a/docs.mk
+++ b/docs.mk
@@ -23,12 +23,6 @@
# Other doxy files sourced in Markdown
TXT_DOX = $(call enabled,TXT_DOX)
-%.dox: %.txt
- @echo " [DOXY] $@"
- @$(SRC_PATH_BARE)/examples/gen_example_doxy.php \
- $(@:.dox=) "$($@.DESC)" > $@ < $<
-
-
EXAMPLE_PATH += $(SRC_PATH_BARE) #for CHANGELOG, README, etc
EXAMPLE_PATH += $(SRC_PATH_BARE)/examples
diff --git a/examples.mk b/examples.mk
index 91b9801..c36159f 100644
--- a/examples.mk
+++ b/examples.mk
@@ -25,6 +25,11 @@
third_party/libwebm/mkvwriter.hpp \
third_party/libwebm/webmids.hpp
+LIBWEBM_PARSER_SRCS = third_party/libwebm/mkvparser.cpp \
+ third_party/libwebm/mkvreader.cpp \
+ third_party/libwebm/mkvparser.hpp \
+ third_party/libwebm/mkvreader.hpp
+
# List of examples to build. UTILS are tools meant for distribution
# while EXAMPLES demonstrate specific portions of the API.
UTILS-$(CONFIG_DECODERS) += vpxdec.c
@@ -39,14 +44,8 @@
vpxdec.SRCS += y4menc.c y4menc.h
vpxdec.SRCS += $(LIBYUV_SRCS)
ifeq ($(CONFIG_WEBM_IO),yes)
- vpxdec.SRCS += third_party/nestegg/halloc/halloc.h
- vpxdec.SRCS += third_party/nestegg/halloc/src/align.h
- vpxdec.SRCS += third_party/nestegg/halloc/src/halloc.c
- vpxdec.SRCS += third_party/nestegg/halloc/src/hlist.h
- vpxdec.SRCS += third_party/nestegg/halloc/src/macros.h
- vpxdec.SRCS += third_party/nestegg/include/nestegg/nestegg.h
- vpxdec.SRCS += third_party/nestegg/src/nestegg.c
- vpxdec.SRCS += webmdec.c webmdec.h
+ vpxdec.SRCS += $(LIBWEBM_PARSER_SRCS)
+ vpxdec.SRCS += webmdec.cc webmdec.h
endif
vpxdec.GUID = BA5FE66F-38DD-E034-F542-B1578C5FB950
vpxdec.DESCRIPTION = Full featured decoder
@@ -82,11 +81,6 @@
EXAMPLES-$(CONFIG_VP9_ENCODER) += resize_util.c
endif
-# XMA example disabled for now, not used in VP8
-#UTILS-$(CONFIG_DECODERS) += example_xma.c
-#example_xma.GUID = A955FC4A-73F1-44F7-135E-30D84D32F022
-#example_xma.DESCRIPTION = External Memory Allocation mode usage
-
EXAMPLES-$(CONFIG_ENCODERS) += vpx_temporal_scalable_patterns.c
vpx_temporal_scalable_patterns.SRCS += ivfenc.c ivfenc.h
vpx_temporal_scalable_patterns.SRCS += tools_common.c tools_common.h
@@ -147,11 +141,6 @@
endif
decode_with_drops.GUID = CE5C53C4-8DDA-438A-86ED-0DDD3CDB8D26
decode_with_drops.DESCRIPTION = Drops frames while decoding
-ifeq ($(CONFIG_VP8_DECODER),yes)
-EXAMPLES-$(CONFIG_ERROR_CONCEALMENT) += decode_with_partial_drops.c
-endif
-decode_with_partial_drops.GUID = 61C2D026-5754-46AC-916F-1343ECC5537E
-decode_with_partial_drops.DESCRIPTION = Drops parts of frames while decoding
EXAMPLES-$(CONFIG_ENCODERS) += set_maps.c
set_maps.SRCS += ivfenc.h ivfenc.c
set_maps.SRCS += tools_common.h tools_common.c
diff --git a/examples/decode_with_partial_drops.c b/examples/decode_with_partial_drops.c
deleted file mode 100644
index d7132de..0000000
--- a/examples/decode_with_partial_drops.c
+++ /dev/null
@@ -1,328 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-// Decode With Partial Drops Example
-// =========================
-//
-// This is an example utility which drops a series of frames (or parts of
-// frames), as specified on the command line. This is useful for observing the
-// error recovery features of the codec.
-//
-// Usage
-// -----
-// This example adds a single argument to the `simple_decoder` example,
-// which specifies the range or pattern of frames to drop. The parameter is
-// parsed as follows.
-//
-// Dropping A Range Of Frames
-// --------------------------
-// To drop a range of frames, specify the starting frame and the ending
-// frame to drop, separated by a dash. The following command will drop
-// frames 5 through 10 (base 1).
-//
-// $ ./decode_with_partial_drops in.ivf out.i420 5-10
-//
-//
-// Dropping A Pattern Of Frames
-// ----------------------------
-// To drop a pattern of frames, specify the number of frames to drop and
-// the number of frames after which to repeat the pattern, separated by
-// a forward-slash. The following command will drop 3 of 7 frames.
-// Specifically, it will decode 4 frames, then drop 3 frames, and then
-// repeat.
-//
-// $ ./decode_with_partial_drops in.ivf out.i420 3/7
-//
-// Dropping Random Parts Of Frames
-// -------------------------------
-// A third argument tuple is available to split the frame into 1500 bytes pieces
-// and randomly drop pieces rather than frames. The frame will be split at
-// partition boundaries where possible. The following example will seed the RNG
-// with the seed 123 and drop approximately 5% of the pieces. Pieces which
-// are depending on an already dropped piece will also be dropped.
-//
-// $ ./decode_with_partial_drops in.ivf out.i420 5,123
-//
-// Extra Variables
-// ---------------
-// This example maintains the pattern passed on the command line in the
-// `n`, `m`, and `is_range` variables:
-//
-// Making The Drop Decision
-// ------------------------
-// The example decides whether to drop the frame based on the current
-// frame number, immediately before decoding the frame.
-
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#define VPX_CODEC_DISABLE_COMPAT 1
-#include "./vpx_config.h"
-#include "vpx/vp8dx.h"
-#include "vpx/vpx_decoder.h"
-#define interface (vpx_codec_vp8_dx())
-#include <time.h>
-
-
-#define IVF_FILE_HDR_SZ (32)
-#define IVF_FRAME_HDR_SZ (12)
-
-static unsigned int mem_get_le32(const unsigned char *mem) {
- return (mem[3] << 24)|(mem[2] << 16)|(mem[1] << 8)|(mem[0]);
-}
-
-static void die(const char *fmt, ...) {
- va_list ap;
-
- va_start(ap, fmt);
- vprintf(fmt, ap);
- if(fmt[strlen(fmt)-1] != '\n')
- printf("\n");
- exit(EXIT_FAILURE);
-}
-
-static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
- const char *detail = vpx_codec_error_detail(ctx);
-
- printf("%s: %s\n", s, vpx_codec_error(ctx));
- if(detail)
- printf(" %s\n",detail);
- exit(EXIT_FAILURE);
-}
-
-struct parsed_header
-{
- char key_frame;
- int version;
- char show_frame;
- int first_part_size;
-};
-
-int next_packet(struct parsed_header* hdr, int pos, int length, int mtu)
-{
- int size = 0;
- int remaining = length - pos;
- /* Uncompressed part is 3 bytes for P frames and 10 bytes for I frames */
- int uncomp_part_size = (hdr->key_frame ? 10 : 3);
- /* number of bytes yet to send from header and the first partition */
- int remainFirst = uncomp_part_size + hdr->first_part_size - pos;
- if (remainFirst > 0)
- {
- if (remainFirst <= mtu)
- {
- size = remainFirst;
- }
- else
- {
- size = mtu;
- }
-
- return size;
- }
-
- /* second partition; just slot it up according to MTU */
- if (remaining <= mtu)
- {
- size = remaining;
- return size;
- }
- return mtu;
-}
-
-void throw_packets(unsigned char* frame, int* size, int loss_rate,
- int* thrown, int* kept)
-{
- unsigned char loss_frame[256*1024];
- int pkg_size = 1;
- int pos = 0;
- int loss_pos = 0;
- struct parsed_header hdr;
- unsigned int tmp;
- int mtu = 1500;
-
- if (*size < 3)
- {
- return;
- }
- putc('|', stdout);
- /* parse uncompressed 3 bytes */
- tmp = (frame[2] << 16) | (frame[1] << 8) | frame[0];
- hdr.key_frame = !(tmp & 0x1); /* inverse logic */
- hdr.version = (tmp >> 1) & 0x7;
- hdr.show_frame = (tmp >> 4) & 0x1;
- hdr.first_part_size = (tmp >> 5) & 0x7FFFF;
-
- /* don't drop key frames */
- if (hdr.key_frame)
- {
- int i;
- *kept = *size/mtu + ((*size % mtu > 0) ? 1 : 0); /* approximate */
- for (i=0; i < *kept; i++)
- putc('.', stdout);
- return;
- }
-
- while ((pkg_size = next_packet(&hdr, pos, *size, mtu)) > 0)
- {
- int loss_event = ((rand() + 1.0)/(RAND_MAX + 1.0) < loss_rate/100.0);
- if (*thrown == 0 && !loss_event)
- {
- memcpy(loss_frame + loss_pos, frame + pos, pkg_size);
- loss_pos += pkg_size;
- (*kept)++;
- putc('.', stdout);
- }
- else
- {
- (*thrown)++;
- putc('X', stdout);
- }
- pos += pkg_size;
- }
- memcpy(frame, loss_frame, loss_pos);
- memset(frame + loss_pos, 0, *size - loss_pos);
- *size = loss_pos;
-}
-
-int main(int argc, char **argv) {
- FILE *infile, *outfile;
- vpx_codec_ctx_t codec;
- int flags = 0, frame_cnt = 0;
- unsigned char file_hdr[IVF_FILE_HDR_SZ];
- unsigned char frame_hdr[IVF_FRAME_HDR_SZ];
- unsigned char frame[256*1024];
- vpx_codec_err_t res;
- int n, m, mode;
- unsigned int seed;
- int thrown=0, kept=0;
- int thrown_frame=0, kept_frame=0;
- vpx_codec_dec_cfg_t dec_cfg = {0};
-
- (void)res;
- /* Open files */
- if(argc < 4 || argc > 6)
- die("Usage: %s <infile> <outfile> [-t <num threads>] <N-M|N/M|L,S>\n",
- argv[0]);
- {
- char *nptr;
- int arg_num = 3;
- if (argc == 6 && strncmp(argv[arg_num++], "-t", 2) == 0)
- dec_cfg.threads = strtol(argv[arg_num++], NULL, 0);
- n = strtol(argv[arg_num], &nptr, 0);
- mode = (*nptr == '\0' || *nptr == ',') ? 2 : (*nptr == '-') ? 1 : 0;
-
- m = strtol(nptr+1, NULL, 0);
- if((!n && !m) || (*nptr != '-' && *nptr != '/' &&
- *nptr != '\0' && *nptr != ','))
- die("Couldn't parse pattern %s\n", argv[3]);
- }
- seed = (m > 0) ? m : (unsigned int)time(NULL);
- srand(seed);thrown_frame = 0;
- printf("Seed: %u\n", seed);
- printf("Threads: %d\n", dec_cfg.threads);
- if(!(infile = fopen(argv[1], "rb")))
- die("Failed to open %s for reading", argv[1]);
- if(!(outfile = fopen(argv[2], "wb")))
- die("Failed to open %s for writing", argv[2]);
-
- /* Read file header */
- if(!(fread(file_hdr, 1, IVF_FILE_HDR_SZ, infile) == IVF_FILE_HDR_SZ
- && file_hdr[0]=='D' && file_hdr[1]=='K' && file_hdr[2]=='I'
- && file_hdr[3]=='F'))
- die("%s is not an IVF file.", argv[1]);
-
- printf("Using %s\n",vpx_codec_iface_name(interface));
- /* Initialize codec */
- flags = VPX_CODEC_USE_ERROR_CONCEALMENT;
- res = vpx_codec_dec_init(&codec, interface, &dec_cfg, flags);
- if(res)
- die_codec(&codec, "Failed to initialize decoder");
-
-
- /* Read each frame */
- while(fread(frame_hdr, 1, IVF_FRAME_HDR_SZ, infile) == IVF_FRAME_HDR_SZ) {
- int frame_sz = mem_get_le32(frame_hdr);
- vpx_codec_iter_t iter = NULL;
- vpx_image_t *img;
-
-
- frame_cnt++;
- if(frame_sz > sizeof(frame))
- die("Frame %d data too big for example code buffer", frame_sz);
- if(fread(frame, 1, frame_sz, infile) != frame_sz)
- die("Frame %d failed to read complete frame", frame_cnt);
-
- /* Decide whether to throw parts of the frame or the whole frame
- depending on the drop mode */
- thrown_frame = 0;
- kept_frame = 0;
- switch (mode)
- {
- case 0:
- if (m - (frame_cnt-1)%m <= n)
- {
- frame_sz = 0;
- }
- break;
- case 1:
- if (frame_cnt >= n && frame_cnt <= m)
- {
- frame_sz = 0;
- }
- break;
- case 2:
- throw_packets(frame, &frame_sz, n, &thrown_frame, &kept_frame);
- break;
- default: break;
- }
- if (mode < 2)
- {
- if (frame_sz == 0)
- {
- putc('X', stdout);
- thrown_frame++;
- }
- else
- {
- putc('.', stdout);
- kept_frame++;
- }
- }
- thrown += thrown_frame;
- kept += kept_frame;
- fflush(stdout);
- /* Decode the frame */
- if(vpx_codec_decode(&codec, frame, frame_sz, NULL, 0))
- die_codec(&codec, "Failed to decode frame");
-
- /* Write decoded data to disk */
- while((img = vpx_codec_get_frame(&codec, &iter))) {
- unsigned int plane, y;
-
- for(plane=0; plane < 3; plane++) {
- unsigned char *buf =img->planes[plane];
-
- for(y=0; y < (plane ? (img->d_h + 1) >> 1 : img->d_h); y++) {
- (void) fwrite(buf, 1, (plane ? (img->d_w + 1) >> 1 : img->d_w),
- outfile);
- buf += img->stride[plane];
- }
- }
- }
- }
- printf("Processed %d frames.\n",frame_cnt);
- if(vpx_codec_destroy(&codec))
- die_codec(&codec, "Failed to destroy codec");
-
- fclose(outfile);
- fclose(infile);
- return EXIT_SUCCESS;
-}
diff --git a/examples/example_xma.c b/examples/example_xma.c
deleted file mode 100644
index c960c28..0000000
--- a/examples/example_xma.c
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/* This is a simple program showing how to initialize the decoder in XMA mode */
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-#define VPX_CODEC_DISABLE_COMPAT 1
-#include "vpx_config.h"
-#include "vpx/vpx_decoder.h"
-#include "vpx/vpx_integer.h"
-#if CONFIG_VP9_DECODER
-#include "vpx/vp8dx.h"
-#endif
-
-static char *exec_name;
-static int verbose = 0;
-
-static const struct {
- const char *name;
- vpx_codec_iface_t *iface;
-} ifaces[] = {
-#if CONFIG_VP9_DECODER
- {"vp9", &vpx_codec_vp8_dx_algo},
-#endif
-};
-
-static void usage_exit(void) {
- int i;
-
- printf("Usage: %s <options>\n\n"
- "Options:\n"
- "\t--codec <name>\tCodec to use (default=%s)\n"
- "\t-h <height>\tHeight of the simulated video frame, in pixels\n"
- "\t-w <width> \tWidth of the simulated video frame, in pixels\n"
- "\t-v \tVerbose mode (show individual segment sizes)\n"
- "\t--help \tShow this message\n"
- "\n"
- "Included decoders:\n"
- "\n",
- exec_name,
- ifaces[0].name);
-
- for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
- printf(" %-6s - %s\n",
- ifaces[i].name,
- vpx_codec_iface_name(ifaces[i].iface));
-
- exit(EXIT_FAILURE);
-}
-
-static void usage_error(const char *fmt, ...) {
- va_list ap;
- va_start(ap, fmt);
- vprintf(fmt, ap);
- printf("\n");
- usage_exit();
-}
-
-void my_mem_dtor(vpx_codec_mmap_t *mmap) {
- if (verbose)
- printf("freeing segment %d\n", mmap->id);
-
- free(mmap->priv);
-}
-
-int main(int argc, char **argv) {
- vpx_codec_ctx_t decoder;
- vpx_codec_iface_t *iface = ifaces[0].iface;
- vpx_codec_iter_t iter;
- vpx_codec_dec_cfg_t cfg;
- vpx_codec_err_t res = VPX_CODEC_OK;
- unsigned int alloc_sz = 0;
- unsigned int w = 352;
- unsigned int h = 288;
- int i;
-
- exec_name = argv[0];
-
- for (i = 1; i < argc; i++) {
- if (!strcmp(argv[i], "--codec")) {
- if (i + 1 < argc) {
- int j, k = -1;
-
- i++;
-
- for (j = 0; j < sizeof(ifaces) / sizeof(ifaces[0]); j++)
- if (!strcmp(ifaces[j].name, argv[i]))
- k = j;
-
- if (k >= 0)
- iface = ifaces[k].iface;
- else
- usage_error("Error: Unrecognized argument (%s) to --codec\n",
- argv[i]);
- } else
- usage_error("Error: Option --codec requires argument.\n");
- } else if (!strcmp(argv[i], "-v"))
- verbose = 1;
- else if (!strcmp(argv[i], "-h"))
- if (i + 1 < argc) {
- h = atoi(argv[++i]);
- } else
- usage_error("Error: Option -h requires argument.\n");
- else if (!strcmp(argv[i], "-w"))
- if (i + 1 < argc) {
- w = atoi(argv[++i]);
- } else
- usage_error("Error: Option -w requires argument.\n");
- else if (!strcmp(argv[i], "--help"))
- usage_exit();
- else
- usage_error("Error: Unrecognized option %s\n\n", argv[i]);
- }
-
- if (argc == 1)
- printf("Using built-in defaults. For options, rerun with --help\n\n");
-
- /* XMA mode is not supported on all decoders! */
- if (!(vpx_codec_get_caps(iface) & VPX_CODEC_CAP_XMA)) {
- printf("%s does not support XMA mode!\n", vpx_codec_iface_name(iface));
- return EXIT_FAILURE;
- }
-
- /* The codec knows how much memory to allocate based on the size of the
- * encoded frames. This data can be parsed from the bitstream with
- * vpx_codec_peek_stream_info() if a bitstream is available. Otherwise,
- * a fixed size can be used that will be the upper limit on the frame
- * size the decoder can decode.
- */
- cfg.w = w;
- cfg.h = h;
-
- /* Initialize the decoder in XMA mode. */
- if (vpx_codec_dec_init(&decoder, iface, &cfg, VPX_CODEC_USE_XMA)) {
- printf("Failed to initialize decoder in XMA mode: %s\n",
- vpx_codec_error(&decoder));
- return EXIT_FAILURE;
- }
-
- /* Iterate through the list of memory maps, allocating them with the
- * requested alignment.
- */
- iter = NULL;
-
- do {
- vpx_codec_mmap_t mmap;
- unsigned int align;
-
- res = vpx_codec_get_mem_map(&decoder, &mmap, &iter);
- align = mmap.align ? mmap.align - 1 : 0;
-
- if (!res) {
- if (verbose)
- printf("Allocating segment %u, size %lu, align %u %s\n",
- mmap.id, mmap.sz, mmap.align,
- mmap.flags & VPX_CODEC_MEM_ZERO ? "(ZEROED)" : "");
-
- if (mmap.flags & VPX_CODEC_MEM_ZERO)
- mmap.priv = calloc(1, mmap.sz + align);
- else
- mmap.priv = malloc(mmap.sz + align);
-
- mmap.base = (void *)((((uintptr_t)mmap.priv) + align) &
- ~(uintptr_t)align);
- mmap.dtor = my_mem_dtor;
- alloc_sz += mmap.sz + align;
-
- if (vpx_codec_set_mem_map(&decoder, &mmap, 1)) {
- printf("Failed to set mmap: %s\n", vpx_codec_error(&decoder));
- return EXIT_FAILURE;
- }
- } else if (res != VPX_CODEC_LIST_END) {
- printf("Failed to get mmap: %s\n", vpx_codec_error(&decoder));
- return EXIT_FAILURE;
- }
- } while (res != VPX_CODEC_LIST_END);
-
- printf("%s\n %d bytes external memory required for %dx%d.\n",
- decoder.name, alloc_sz, cfg.w, cfg.h);
- vpx_codec_destroy(&decoder);
- return EXIT_SUCCESS;
-
-}
diff --git a/examples/vp9_spatial_scalable_encoder.c b/examples/vp9_spatial_scalable_encoder.c
index 64e62ef..983f52d 100644
--- a/examples/vp9_spatial_scalable_encoder.c
+++ b/examples/vp9_spatial_scalable_encoder.c
@@ -344,7 +344,7 @@
}
res = vpx_svc_encode(&svc_ctx, &codec, (end_of_stream ? NULL : &raw),
- pts, frame_duration, VPX_DL_REALTIME);
+ pts, frame_duration, VPX_DL_GOOD_QUALITY);
printf("%s", vpx_svc_get_message(&svc_ctx));
if (res != VPX_CODEC_OK) {
die_codec(&codec, "Failed to encode frame");
diff --git a/test/android/Android.mk b/test/android/Android.mk
index 13af601..4e750b2 100644
--- a/test/android/Android.mk
+++ b/test/android/Android.mk
@@ -14,8 +14,14 @@
BINDINGS_DIR := $(CUR_WD)/../../..
LOCAL_PATH := $(CUR_WD)/../../..
+#libwebm
+include $(CLEAR_VARS)
+include $(BINDINGS_DIR)/libvpx/third_party/libwebm/Android.mk
+LOCAL_PATH := $(CUR_WD)/../../..
+
#libvpx
include $(CLEAR_VARS)
+LOCAL_STATIC_LIBRARIES := libwebm
include $(BINDINGS_DIR)/libvpx/build/make/Android.mk
LOCAL_PATH := $(CUR_WD)/../..
@@ -33,7 +39,7 @@
include $(CLEAR_VARS)
LOCAL_ARM_MODE := arm
LOCAL_MODULE := libvpx_test
-LOCAL_STATIC_LIBRARIES := gtest
+LOCAL_STATIC_LIBRARIES := gtest libwebm
LOCAL_SHARED_LIBRARIES := vpx
include $(LOCAL_PATH)/test/test.mk
LOCAL_C_INCLUDES := $(BINDINGS_DIR)
diff --git a/test/android/README b/test/android/README
index 6840d91..4a1adcf 100644
--- a/test/android/README
+++ b/test/android/README
@@ -3,7 +3,7 @@
./libvpx/configure --target=armv7-android-gcc --enable-external-build \
--enable-postproc --disable-install-srcs --enable-multi-res-encoding \
--enable-temporal-denoising --disable-unit-tests --disable-install-docs \
- --disable-examples --disable-runtime-cpu-detect --sdk=$NDK
+ --disable-examples --disable-runtime-cpu-detect --sdk-path=$NDK
2) From the parent directory, invoke ndk-build:
NDK_PROJECT_PATH=. ndk-build APP_BUILD_SCRIPT=./libvpx/test/android/Android.mk \
diff --git a/test/borders_test.cc b/test/borders_test.cc
index a2f5a1b..b30be45 100644
--- a/test/borders_test.cc
+++ b/test/borders_test.cc
@@ -35,6 +35,7 @@
encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
+ encoder->Control(VP8E_SET_ARNR_TYPE, 3);
}
}
diff --git a/test/convolve_test.cc b/test/convolve_test.cc
index 37ee0ef..cbb4036 100644
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -634,7 +634,7 @@
make_tuple(64, 64, &convolve8_ssse3)));
#endif
-#if HAVE_NEON
+#if HAVE_NEON_ASM
const ConvolveFunctions convolve8_neon(
vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon,
vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon,
diff --git a/test/cpu_speed_test.cc b/test/cpu_speed_test.cc
index ca201bb..be651b4 100644
--- a/test/cpu_speed_test.cc
+++ b/test/cpu_speed_test.cc
@@ -37,6 +37,7 @@
encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
+ encoder->Control(VP8E_SET_ARNR_TYPE, 3);
}
}
diff --git a/test/datarate_test.cc b/test/datarate_test.cc
index 2b4aa3a..80be05e 100644
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
@@ -522,10 +522,14 @@
cfg_.ts_target_bitrate[2] = cfg_.rc_target_bitrate;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
- ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.85)
+ // TODO(yaowu): Work out more stable rc control strategy and
+ // Adjust the thresholds to be tighter than .75.
+ ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.75)
<< " The datarate for the file is lower than target by too much, "
"for layer: " << j;
- ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.15)
+ // TODO(yaowu): Work out more stable rc control strategy and
+ // Adjust the thresholds to be tighter than 1.25.
+ ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.25)
<< " The datarate for the file is greater than target by too much, "
"for layer: " << j;
}
diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc
index cb5562e..143a267 100644
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -512,7 +512,7 @@
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2),
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3)));
-#if HAVE_NEON
+#if HAVE_NEON_ASM
INSTANTIATE_TEST_CASE_P(
NEON, Trans16x16DCT,
::testing::Values(
diff --git a/test/dct32x32_test.cc b/test/dct32x32_test.cc
index 013f451..72c0bd6 100644
--- a/test/dct32x32_test.cc
+++ b/test/dct32x32_test.cc
@@ -248,7 +248,7 @@
make_tuple(&vp9_fdct32x32_c, &vp9_idct32x32_1024_add_c, 0),
make_tuple(&vp9_fdct32x32_rd_c, &vp9_idct32x32_1024_add_c, 1)));
-#if HAVE_NEON
+#if HAVE_NEON_ASM
INSTANTIATE_TEST_CASE_P(
NEON, Trans32x32Test,
::testing::Values(
diff --git a/test/decode_to_md5.sh b/test/decode_to_md5.sh
index da1a870..f64acc8 100755
--- a/test/decode_to_md5.sh
+++ b/test/decode_to_md5.sh
@@ -24,22 +24,25 @@
fi
}
-# Runs decode_to_md5 on $1 and echoes the MD5 sum for the final frame. $2 is
-# interpreted as codec name and used solely to name the output file.
+# Runs decode_to_md5 on $1 and captures the md5 sum for the final frame. $2 is
+# interpreted as codec name and used solely to name the output file. $3 is the
+# expected md5 sum: It must match that of the final frame.
decode_to_md5() {
local decoder="${LIBVPX_BIN_PATH}/decode_to_md5${VPX_TEST_EXE_SUFFIX}"
local input_file="$1"
local codec="$2"
+ local expected_md5="$3"
local output_file="${VPX_TEST_OUTPUT_DIR}/decode_to_md5_${codec}"
[ -x "${decoder}" ] || return 1
- "${decoder}" "${input_file}" "${output_file}" > /dev/null 2>&1
+ eval "${decoder}" "${input_file}" "${output_file}" ${devnull}
[ -e "${output_file}" ] || return 1
local md5_last_frame=$(tail -n1 "${output_file}")
- echo "${md5_last_frame% *}" | tr -d [:space:]
+ local actual_md5=$(echo "${md5_last_frame% *}" | tr -d [:space:])
+ [ "${actual_md5}" = "${expected_md5}" ] || return 1
}
decode_to_md5_vp8() {
@@ -47,8 +50,7 @@
local expected_md5="56794d911b02190212bca92f88ad60c6"
if [ "$(vp8_decode_available)" = "yes" ]; then
- local actual_md5="$(decode_to_md5 "${VP8_IVF_FILE}" vp8)" || return 1
- [ "${actual_md5}" = "${expected_md5}" ] || return 1
+ decode_to_md5 "${VP8_IVF_FILE}" "vp8" "${expected_md5}"
fi
}
@@ -57,8 +59,7 @@
local expected_md5="2952c0eae93f3dadd1aa84c50d3fd6d2"
if [ "$(vp9_decode_available)" = "yes" ]; then
- local actual_md5="$(decode_to_md5 "${VP9_IVF_FILE}" vp9)" || return 1
- [ "${actual_md5}" = "${expected_md5}" ] || return 1
+ decode_to_md5 "${VP9_IVF_FILE}" "vp9" "${expected_md5}"
fi
}
diff --git a/test/decode_with_drops.sh b/test/decode_with_drops.sh
new file mode 100755
index 0000000..82e934d
--- /dev/null
+++ b/test/decode_with_drops.sh
@@ -0,0 +1,75 @@
+#!/bin/sh
+##
+## Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+## This file tests the libvpx decode_with_drops example. To add new tests to
+## this file, do the following:
+## 1. Write a shell function (this is your test).
+## 2. Add the function to decode_with_drops_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: Make sure input is available:
+# $VP8_IVF_FILE and $VP9_IVF_FILE are required.
+decode_with_drops_verify_environment() {
+ if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_IVF_FILE}" ]; then
+ echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
+ return 1
+ fi
+}
+
+# Runs decode_with_drops on $1, $2 is interpreted as codec name and used solely
+# to name the output file. $3 is the drop mode, and is passed directly to
+# decode_with_drops.
+decode_with_drops() {
+ local decoder="${LIBVPX_BIN_PATH}/decode_with_drops${VPX_TEST_EXE_SUFFIX}"
+ local input_file="$1"
+ local codec="$2"
+ local output_file="${VPX_TEST_OUTPUT_DIR}/decode_with_drops_${codec}"
+ local drop_mode="$3"
+
+ [ -x "${decoder}" ] || return 1
+
+ eval "${decoder}" "${input_file}" "${output_file}" "${drop_mode}" ${devnull}
+
+ [ -e "${output_file}" ] || return 1
+}
+
+# Decodes $VP8_IVF_FILE while dropping frames, twice: once in sequence mode,
+# and once in pattern mode.
+# Note: This test assumes that $VP8_IVF_FILE has exactly 29 frames, and could
+# break if the file is modified.
+decode_with_drops_vp8() {
+ if [ "$(vp8_decode_available)" = "yes" ]; then
+ # Test sequence mode: Drop frames 2-28.
+ decode_with_drops "${VP8_IVF_FILE}" "vp8" "2-28"
+
+ # Test pattern mode: Drop 3 of every 4 frames.
+ decode_with_drops "${VP8_IVF_FILE}" "vp8" "3/4"
+ fi
+}
+
+# Decodes $VP9_IVF_FILE while dropping frames, twice: once in sequence mode,
+# and once in pattern mode.
+# Note: This test assumes that $VP9_IVF_FILE has exactly 20 frames, and could
+# break if the file is modified.
+decode_with_drops_vp9() {
+ if [ "$(vp9_decode_available)" = "yes" ]; then
+ # Test sequence mode: Drop frames 2-28.
+ decode_with_drops "${VP9_IVF_FILE}" "vp9" "2-19"
+
+ # Test pattern mode: Drop 3 of every 4 frames.
+ decode_with_drops "${VP9_IVF_FILE}" "vp9" "3/4"
+ fi
+}
+
+decode_with_drops_tests="decode_with_drops_vp8
+ decode_with_drops_vp9"
+
+run_tests decode_with_drops_verify_environment "${decode_with_drops_tests}"
diff --git a/test/examples.sh b/test/examples.sh
new file mode 100755
index 0000000..ac2a18c
--- /dev/null
+++ b/test/examples.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+##
+## Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+## This file runs all of the tests for the libvpx examples.
+##
+. $(dirname $0)/tools_common.sh
+
+example_tests=$(ls $(dirname $0)/*.sh)
+
+# List of script names to exclude.
+exclude_list="examples vpxdec vpxenc tools_common"
+
+# Filter out the scripts in $exclude_list.
+for word in ${exclude_list}; do
+ example_tests=$(filter_strings "${example_tests}" "${word}" exclude)
+done
+
+for test in ${example_tests}; do
+ # Source each test script so that exporting variables can be avoided.
+ . "${test}"
+done
diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc
index 127775c..030665e 100644
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -48,6 +48,10 @@
vp9_fht4x4_c(in, out, stride, tx_type);
}
+void fwht4x4_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
+ vp9_fwht4x4_c(in, out, stride);
+}
+
class Trans4x4TestBase {
public:
virtual ~Trans4x4TestBase() {}
@@ -57,7 +61,7 @@
virtual void RunInvTxfm(const int16_t *out, uint8_t *dst, int stride) = 0;
- void RunAccuracyCheck() {
+ void RunAccuracyCheck(int limit) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
uint32_t max_error = 0;
int64_t total_error = 0;
@@ -88,11 +92,13 @@
}
}
- EXPECT_GE(1u, max_error)
- << "Error: 4x4 FHT/IHT has an individual round trip error > 1";
+ EXPECT_GE(static_cast<uint32_t>(limit), max_error)
+ << "Error: 4x4 FHT/IHT has an individual round trip error > "
+ << limit;
- EXPECT_GE(count_test_block , total_error)
- << "Error: 4x4 FHT/IHT has average round trip error > 1 per block";
+ EXPECT_GE(count_test_block * limit, total_error)
+ << "Error: 4x4 FHT/IHT has average round trip error > " << limit
+ << " per block";
}
void RunCoeffCheck() {
@@ -150,7 +156,7 @@
}
}
- void RunInvAccuracyCheck() {
+ void RunInvAccuracyCheck(int limit) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 1000;
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
@@ -173,8 +179,8 @@
for (int j = 0; j < kNumCoeffs; ++j) {
const uint32_t diff = dst[j] - src[j];
const uint32_t error = diff * diff;
- EXPECT_GE(1u, error)
- << "Error: 16x16 IDCT has error " << error
+ EXPECT_GE(static_cast<uint32_t>(limit), error)
+ << "Error: 4x4 IDCT has error " << error
<< " at index " << j;
}
}
@@ -213,7 +219,7 @@
};
TEST_P(Trans4x4DCT, AccuracyCheck) {
- RunAccuracyCheck();
+ RunAccuracyCheck(1);
}
TEST_P(Trans4x4DCT, CoeffCheck) {
@@ -225,7 +231,7 @@
}
TEST_P(Trans4x4DCT, InvAccuracyCheck) {
- RunInvAccuracyCheck();
+ RunInvAccuracyCheck(1);
}
class Trans4x4HT
@@ -257,7 +263,7 @@
};
TEST_P(Trans4x4HT, AccuracyCheck) {
- RunAccuracyCheck();
+ RunAccuracyCheck(1);
}
TEST_P(Trans4x4HT, CoeffCheck) {
@@ -269,9 +275,51 @@
}
TEST_P(Trans4x4HT, InvAccuracyCheck) {
- RunInvAccuracyCheck();
+ RunInvAccuracyCheck(1);
}
+class Trans4x4WHT
+ : public Trans4x4TestBase,
+ public ::testing::TestWithParam<dct_4x4_param_t> {
+ public:
+ virtual ~Trans4x4WHT() {}
+
+ virtual void SetUp() {
+ fwd_txfm_ = GET_PARAM(0);
+ inv_txfm_ = GET_PARAM(1);
+ tx_type_ = GET_PARAM(2);
+ pitch_ = 4;
+ fwd_txfm_ref = fwht4x4_ref;
+ }
+ virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+ void RunFwdTxfm(const int16_t *in, int16_t *out, int stride) {
+ fwd_txfm_(in, out, stride);
+ }
+ void RunInvTxfm(const int16_t *out, uint8_t *dst, int stride) {
+ inv_txfm_(out, dst, stride);
+ }
+
+ fdct_t fwd_txfm_;
+ idct_t inv_txfm_;
+};
+
+TEST_P(Trans4x4WHT, AccuracyCheck) {
+ RunAccuracyCheck(0);
+}
+
+TEST_P(Trans4x4WHT, CoeffCheck) {
+ RunCoeffCheck();
+}
+
+TEST_P(Trans4x4WHT, MemCheck) {
+ RunMemCheck();
+}
+
+TEST_P(Trans4x4WHT, InvAccuracyCheck) {
+ RunInvAccuracyCheck(0);
+}
using std::tr1::make_tuple;
INSTANTIATE_TEST_CASE_P(
@@ -285,8 +333,12 @@
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1),
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2),
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3)));
+INSTANTIATE_TEST_CASE_P(
+ C, Trans4x4WHT,
+ ::testing::Values(
+ make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0)));
-#if HAVE_NEON
+#if HAVE_NEON_ASM
INSTANTIATE_TEST_CASE_P(
NEON, Trans4x4DCT,
::testing::Values(
@@ -301,6 +353,13 @@
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3)));
#endif
+#if CONFIG_USE_X86INC && HAVE_MMX
+INSTANTIATE_TEST_CASE_P(
+ MMX, Trans4x4WHT,
+ ::testing::Values(
+ make_tuple(&vp9_fwht4x4_mmx, &vp9_iwht4x4_16_add_c, 0)));
+#endif
+
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(
SSE2, Trans4x4DCT,
diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc
index 6f2d7d1..c7cf164 100644
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -313,7 +313,7 @@
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2),
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3)));
-#if HAVE_NEON
+#if HAVE_NEON_ASM
INSTANTIATE_TEST_CASE_P(
NEON, FwdTrans8x8DCT,
::testing::Values(
@@ -340,4 +340,11 @@
make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2),
make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3)));
#endif
+
+#if HAVE_SSSE3 && ARCH_X86_64
+INSTANTIATE_TEST_CASE_P(
+ SSSE3, FwdTrans8x8DCT,
+ ::testing::Values(
+ make_tuple(&vp9_fdct8x8_ssse3, &vp9_idct8x8_64_add_ssse3, 0)));
+#endif
} // namespace
diff --git a/test/partial_idct_test.cc b/test/partial_idct_test.cc
index 8849ce6..79ef521 100644
--- a/test/partial_idct_test.cc
+++ b/test/partial_idct_test.cc
@@ -132,15 +132,15 @@
&vp9_idct16x16_1_add_c,
TX_16X16, 1),
make_tuple(&vp9_idct8x8_64_add_c,
- &vp9_idct8x8_10_add_c,
- TX_8X8, 10),
+ &vp9_idct8x8_12_add_c,
+ TX_8X8, 12),
make_tuple(&vp9_idct8x8_64_add_c,
&vp9_idct8x8_1_add_c,
TX_8X8, 1),
make_tuple(&vp9_idct4x4_16_add_c,
&vp9_idct4x4_1_add_c,
TX_4X4, 1)));
-#if HAVE_NEON
+#if HAVE_NEON_ASM
INSTANTIATE_TEST_CASE_P(
NEON, PartialIDctTest,
::testing::Values(
@@ -154,8 +154,8 @@
&vp9_idct16x16_1_add_neon,
TX_16X16, 1),
make_tuple(&vp9_idct8x8_64_add_c,
- &vp9_idct8x8_10_add_neon,
- TX_8X8, 10),
+ &vp9_idct8x8_12_add_neon,
+ TX_8X8, 12),
make_tuple(&vp9_idct8x8_64_add_c,
&vp9_idct8x8_1_add_neon,
TX_8X8, 1),
@@ -181,8 +181,8 @@
&vp9_idct16x16_1_add_sse2,
TX_16X16, 1),
make_tuple(&vp9_idct8x8_64_add_c,
- &vp9_idct8x8_10_add_sse2,
- TX_8X8, 10),
+ &vp9_idct8x8_12_add_sse2,
+ TX_8X8, 12),
make_tuple(&vp9_idct8x8_64_add_c,
&vp9_idct8x8_1_add_sse2,
TX_8X8, 1),
@@ -190,4 +190,13 @@
&vp9_idct4x4_1_add_sse2,
TX_4X4, 1)));
#endif
+
+#if HAVE_SSSE3 && ARCH_X86_64
+INSTANTIATE_TEST_CASE_P(
+ SSSE3, PartialIDctTest,
+ ::testing::Values(
+ make_tuple(&vp9_idct8x8_64_add_c,
+ &vp9_idct8x8_12_add_ssse3,
+ TX_8X8, 12)));
+#endif
} // namespace
diff --git a/test/postproc.sh b/test/postproc.sh
new file mode 100755
index 0000000..050a368
--- /dev/null
+++ b/test/postproc.sh
@@ -0,0 +1,59 @@
+#!/bin/sh
+##
+## Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+## This file tests the libvpx postproc example code. To add new tests to this
+## file, do the following:
+## 1. Write a shell function (this is your test).
+## 2. Add the function to postproc_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: Make sure input is available:
+# $VP8_IVF_FILE and $VP9_IVF_FILE are required.
+postproc_verify_environment() {
+ if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_IVF_FILE}" ]; then
+ echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
+ return 1
+ fi
+}
+
+# Runs postproc using $1 as input file. $2 is the codec name, and is used
+# solely to name the output file.
+postproc() {
+ local decoder="${LIBVPX_BIN_PATH}/postproc${VPX_TEST_EXE_SUFFIX}"
+ local input_file="$1"
+ local codec="$2"
+ local output_file="${VPX_TEST_OUTPUT_DIR}/postproc_${codec}.raw"
+
+ [ -x "${decoder}" ] || return 1
+
+ eval "${decoder}" "${input_file}" "${output_file}" ${devnull}
+
+ [ -e "${output_file}" ] || return 1
+}
+
+postproc_vp8() {
+ if [ "$(vp8_decode_available)" = "yes" ]; then
+ postproc "${VP8_IVF_FILE}" vp8 || return 1
+ fi
+}
+
+postproc_vp9() {
+ if [ "$(vpx_config_option_enabled CONFIG_VP9_POSTPROC)" = "yes" ]; then
+ if [ "$(vp9_decode_available)" = "yes" ]; then
+ postproc "${VP9_IVF_FILE}" vp9 || return 1
+ fi
+ fi
+}
+
+postproc_tests="postproc_vp8
+ postproc_vp9"
+
+run_tests postproc_verify_environment "${postproc_tests}"
diff --git a/test/register_state_check.h b/test/register_state_check.h
index 7e3d053..1ee149b 100644
--- a/test/register_state_check.h
+++ b/test/register_state_check.h
@@ -82,8 +82,8 @@
} // namespace libvpx_test
-#elif defined(CONFIG_SHARED) && defined(HAVE_NEON) \
- && !CONFIG_SHARED && HAVE_NEON
+#elif defined(CONFIG_SHARED) && defined(HAVE_NEON_ASM) && defined(CONFIG_VP9) \
+ && !CONFIG_SHARED && HAVE_NEON_ASM && CONFIG_VP9
#include "vpx/vpx_integer.h"
diff --git a/test/simple_decoder.sh b/test/simple_decoder.sh
index a0db58f..24b17c5 100755
--- a/test/simple_decoder.sh
+++ b/test/simple_decoder.sh
@@ -34,7 +34,7 @@
[ -x "${decoder}" ] || return 1
- "${decoder}" "${input_file}" "${output_file}" > /dev/null 2>&1
+ eval "${decoder}" "${input_file}" "${output_file}" ${devnull}
[ -e "${output_file}" ] || return 1
}
diff --git a/test/simple_encoder.sh b/test/simple_encoder.sh
new file mode 100755
index 0000000..6232093
--- /dev/null
+++ b/test/simple_encoder.sh
@@ -0,0 +1,59 @@
+#!/bin/sh
+##
+## Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+## This file tests the libvpx simple_encoder example. To add new tests to this
+## file, do the following:
+## 1. Write a shell function (this is your test).
+## 2. Add the function to simple_encoder_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: $YUV_RAW_INPUT is required.
+simple_encoder_verify_environment() {
+ if [ ! -e "${YUV_RAW_INPUT}" ]; then
+ echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
+ return 1
+ fi
+}
+
+# Runs simple_encoder using the codec specified by $1.
+simple_encoder() {
+ local encoder="${LIBVPX_BIN_PATH}/simple_encoder${VPX_TEST_EXE_SUFFIX}"
+ local codec="$1"
+ local output_file="${VPX_TEST_OUTPUT_DIR}/simple_encoder_${codec}.ivf"
+
+ [ -x "${encoder}" ] || return 1
+
+ eval "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
+ "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" 9999 \
+ ${devnull}
+
+ [ -e "${output_file}" ] || return 1
+}
+
+simple_encoder_vp8() {
+ if [ "$(vp8_encode_available)" = "yes" ]; then
+ simple_encoder vp8 || return 1
+ fi
+}
+
+# TODO(tomfinegan): Add a frame limit param to simple_encoder and enable this
+# test. VP9 is just too slow right now: This test takes 4m30s+ on a fast
+# machine.
+DISABLED_simple_encoder_vp9() {
+ if [ "$(vp9_encode_available)" = "yes" ]; then
+ simple_encoder vp9 || return 1
+ fi
+}
+
+simple_encoder_tests="simple_encoder_vp8
+ DISABLED_simple_encoder_vp9"
+
+run_tests simple_encoder_verify_environment "${simple_encoder_tests}"
diff --git a/test/subtract_test.cc b/test/subtract_test.cc
index 3efb955..63e999d 100644
--- a/test/subtract_test.cc
+++ b/test/subtract_test.cc
@@ -105,7 +105,7 @@
INSTANTIATE_TEST_CASE_P(C, SubtractBlockTest,
::testing::Values(vp8_subtract_b_c));
-#if HAVE_NEON
+#if HAVE_NEON_ASM
INSTANTIATE_TEST_CASE_P(NEON, SubtractBlockTest,
::testing::Values(vp8_subtract_b_neon));
#endif
diff --git a/test/test-data.sha1 b/test/test-data.sha1
index cf2ad1e..9c23929 100644
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -635,3 +635,8 @@
1765315acccfe6cd12230e731369fcb15325ebfa vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm.md5
4a2b7a683576fe8e330c7d1c4f098ff4e70a43a8 vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm
1ef480392112b3509cb190afbb96f9a38dd9fbac vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm.md5
+e615575ded499ea1d992f3b38e3baa434509cdcd vp90-2-15-segkey.webm
+e3ab35d4316c5e81325c50f5236ceca4bc0d35df vp90-2-15-segkey.webm.md5
+9b7ca2cac09d34c4a5d296c1900f93b1e2f69d0d vp90-2-15-segkey_adpq.webm
+8f46ba5f785d0c2170591a153e0d0d146a7c8090 vp90-2-15-segkey_adpq.webm.md5
+
diff --git a/test/test.mk b/test/test.mk
index da56b00..44d2f9c 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -43,15 +43,13 @@
## WebM Parsing
ifeq ($(CONFIG_WEBM_IO), yes)
-NESTEGG_SRCS += ../third_party/nestegg/halloc/halloc.h
-NESTEGG_SRCS += ../third_party/nestegg/halloc/src/align.h
-NESTEGG_SRCS += ../third_party/nestegg/halloc/src/halloc.c
-NESTEGG_SRCS += ../third_party/nestegg/halloc/src/hlist.h
-NESTEGG_SRCS += ../third_party/nestegg/include/nestegg/nestegg.h
-NESTEGG_SRCS += ../third_party/nestegg/src/nestegg.c
-LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += $(NESTEGG_SRCS)
+LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser.cpp
+LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvreader.cpp
+LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser.hpp
+LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvreader.hpp
+LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += $(LIBWEBM_PARSER_SRCS)
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../tools_common.h
-LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../webmdec.c
+LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../webmdec.cc
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../webmdec.h
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += webm_video_source.h
endif
@@ -750,6 +748,10 @@
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey_adpq.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey_adpq.webm.md5
ifeq ($(CONFIG_DECODE_PERF_TESTS),yes)
# BBB VP9 streams
diff --git a/test/test_vectors.cc b/test/test_vectors.cc
index ff3c389..fd8c4c3 100644
--- a/test/test_vectors.cc
+++ b/test/test_vectors.cc
@@ -177,7 +177,8 @@
"vp90-2-14-resize-fp-tiles-4-16.webm", "vp90-2-14-resize-fp-tiles-4-1.webm",
"vp90-2-14-resize-fp-tiles-4-2.webm", "vp90-2-14-resize-fp-tiles-4-8.webm",
"vp90-2-14-resize-fp-tiles-8-16.webm", "vp90-2-14-resize-fp-tiles-8-1.webm",
- "vp90-2-14-resize-fp-tiles-8-2.webm", "vp90-2-14-resize-fp-tiles-8-4.webm"
+ "vp90-2-14-resize-fp-tiles-8-2.webm", "vp90-2-14-resize-fp-tiles-8-4.webm",
+ "vp90-2-15-segkey.webm", "vp90-2-15-segkey_adpq.webm"
};
const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors);
#endif // CONFIG_VP9_DECODER
diff --git a/test/tools_common.sh b/test/tools_common.sh
index 45b7771..9c10d48 100755
--- a/test/tools_common.sh
+++ b/test/tools_common.sh
@@ -9,7 +9,19 @@
## be found in the AUTHORS file in the root of the source tree.
##
## This file contains shell code shared by test scripts for libvpx tools.
+
+# Use $VPX_TEST_TOOLS_COMMON_SH as a pseudo include guard.
+if [ -z "${VPX_TEST_TOOLS_COMMON_SH}" ]; then
+VPX_TEST_TOOLS_COMMON_SH=included
+
set -e
+devnull='> /dev/null 2>&1'
+
+vlog() {
+ if [ "${VPX_TEST_VERBOSE_OUTPUT}" = "yes" ]; then
+ echo "$@"
+ fi
+}
# Sets $VPX_TOOL_TEST to the name specified by positional parameter one.
test_begin() {
@@ -188,9 +200,9 @@
local decoder="${LIBVPX_BIN_PATH}/vpxdec${VPX_TEST_EXE_SUFFIX}"
if [ -z "${pipe_input}" ]; then
- "${decoder}" "$input" --summary --noblit "$@" > /dev/null 2>&1
+ eval "${decoder}" "$input" --summary --noblit "$@" ${devnull}
else
- cat "${input}" | "${decoder}" - --summary --noblit "$@" > /dev/null 2>&1
+ cat "${input}" | eval "${decoder}" - --summary --noblit "$@" ${devnull}
fi
}
@@ -236,14 +248,16 @@
fi
if [ -z "${pipe_input}" ]; then
- "${encoder}" --codec=${codec} --width=${width} --height=${height} \
+ eval "${encoder}" --codec=${codec} --width=${width} --height=${height} \
--limit=${frames} ${use_ivf} ${extra_flags} --output="${output}" \
- "${input}" > /dev/null 2>&1
+ "${input}" \
+ ${devnull}
else
cat "${input}" \
- | "${encoder}" --codec=${codec} --width=${width} --height=${height} \
- --limit=${frames} ${use_ivf} ${extra_flags} --output="${output}" - \
- > /dev/null 2>&1
+ | eval "${encoder}" --codec=${codec} --width=${width} \
+ --height=${height} --limit=${frames} ${use_ivf} ${extra_flags} \
+ --output="${output}" - \
+ ${devnull}
fi
if [ ! -e "${output}" ]; then
@@ -308,9 +322,9 @@
# Run tests.
for test in ${tests_to_run}; do
test_begin "${test}"
- [ "${VPX_TEST_VERBOSE_OUTPUT}" = "yes" ] && echo " RUN ${test}"
+ vlog " RUN ${test}"
"${test}"
- [ "${VPX_TEST_VERBOSE_OUTPUT}" = "yes" ] && echo " PASS ${test}"
+ vlog " PASS ${test}"
test_end "${test}"
done
@@ -327,6 +341,7 @@
--run-disabled-tests: Run disabled tests.
--help: Display this message and exit.
--test-data-path <path to libvpx test data directory>
+ --show-program-output: Shows output from all programs being tested.
--verbose: Verbose output.
When the --bin-path option is not specified the script attempts to use
@@ -379,6 +394,9 @@
--verbose)
VPX_TEST_VERBOSE_OUTPUT=yes
;;
+ --show-program-output)
+ devnull=
+ ;;
*)
vpx_test_usage
exit 1
@@ -429,9 +447,7 @@
# Setup a trap function to clean up after tests complete.
trap cleanup EXIT
-if [ "${VPX_TEST_VERBOSE_OUTPUT}" = "yes" ]; then
-cat << EOF
-$(basename "${0%.*}") test configuration:
+vlog "$(basename "${0%.*}") test configuration:
LIBVPX_BIN_PATH=${LIBVPX_BIN_PATH}
LIBVPX_CONFIG_PATH=${LIBVPX_CONFIG_PATH}
LIBVPX_TEST_DATA_PATH=${LIBVPX_TEST_DATA_PATH}
@@ -439,5 +455,6 @@
VPX_TEST_VERBOSE_OUTPUT=${VPX_TEST_VERBOSE_OUTPUT}
VPX_TEST_FILTER=${VPX_TEST_FILTER}
VPX_TEST_RUN_DISABLED_TESTS=${VPX_TEST_RUN_DISABLED_TESTS}
-EOF
-fi
+ VPX_TEST_SHOW_PROGRAM_OUTPUT=${VPX_TEST_SHOW_PROGRAM_OUTPUT}"
+
+fi # End $VPX_TEST_TOOLS_COMMON_SH pseudo include guard.
diff --git a/test/vp8cx_set_ref.sh b/test/vp8cx_set_ref.sh
new file mode 100755
index 0000000..ef9d0c0
--- /dev/null
+++ b/test/vp8cx_set_ref.sh
@@ -0,0 +1,54 @@
+#!/bin/sh
+##
+## Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+## This file tests the libvpx vp8cx_set_ref example. To add new tests to this
+## file, do the following:
+## 1. Write a shell function (this is your test).
+## 2. Add the function to vp8cx_set_ref_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: $YUV_RAW_INPUT is required.
+vp8cx_set_ref_verify_environment() {
+ if [ ! -e "${YUV_RAW_INPUT}" ]; then
+ echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
+ return 1
+ fi
+}
+
+# Runs vp8cx_set_ref and updates the reference frame before encoding frame 90.
+# $1 is the codec name, which vp8cx_set_ref does not support at present: It's
+# currently used only to name the output file.
+# TODO(tomfinegan): Pass the codec param once the example is updated to support
+# VP9.
+vpx_set_ref() {
+ local encoder="${LIBVPX_BIN_PATH}/vp8cx_set_ref${VPX_TEST_EXE_SUFFIX}"
+ local codec="$1"
+ local output_file="${VPX_TEST_OUTPUT_DIR}/vp8cx_set_ref_${codec}.ivf"
+ local ref_frame_num=90
+
+ [ -x "${encoder}" ] || return 1
+
+ eval "${encoder}" "${YUV_RAW_INPUT_WIDTH}" "${YUV_RAW_INPUT_HEIGHT}" \
+ "${YUV_RAW_INPUT}" "${output_file}" "${ref_frame_num}" \
+ ${devnull}
+
+ [ -e "${output_file}" ] || return 1
+}
+
+vp8cx_set_ref_vp8() {
+ if [ "$(vp8_encode_available)" = "yes" ]; then
+ vpx_set_ref vp8 || return 1
+ fi
+}
+
+vp8cx_set_ref_tests="vp8cx_set_ref_vp8"
+
+run_tests vp8cx_set_ref_verify_environment "${vp8cx_set_ref_tests}"
diff --git a/third_party/libwebm/Android.mk b/third_party/libwebm/Android.mk
new file mode 100644
index 0000000..13868b6
--- /dev/null
+++ b/third_party/libwebm/Android.mk
@@ -0,0 +1,11 @@
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+LOCAL_CPP_EXTENSION := .cpp
+LOCAL_SRC_FILES := mkvmuxer.cpp \
+ mkvmuxerutil.cpp \
+ mkvparser.cpp \
+ mkvreader.cpp \
+ mkvwriter.cpp
+LOCAL_MODULE := libwebm
+include $(BUILD_STATIC_LIBRARY)
diff --git a/third_party/libwebm/README.libvpx b/third_party/libwebm/README.libvpx
index 2c7570d..93814b7 100644
--- a/third_party/libwebm/README.libvpx
+++ b/third_party/libwebm/README.libvpx
@@ -1,5 +1,5 @@
URL: https://chromium.googlesource.com/webm/libwebm
-Version: a7118d8ec564e9db841da1eb01f547f3229f240a
+Version: 249629d46c6e9391f25a90cff6d19075f47474cb
License: BSD
License File: LICENSE.txt
diff --git a/third_party/libwebm/mkvmuxer.cpp b/third_party/libwebm/mkvmuxer.cpp
index 8ae0dda..45167ea 100644
--- a/third_party/libwebm/mkvmuxer.cpp
+++ b/third_party/libwebm/mkvmuxer.cpp
@@ -22,7 +22,7 @@
#ifdef _MSC_VER
// Disable MSVC warnings that suggest making code non-portable.
-#pragma warning(disable:4996)
+#pragma warning(disable : 4996)
#endif
namespace mkvmuxer {
@@ -40,7 +40,7 @@
char*& dst = *dst_ptr;
- delete [] dst;
+ delete[] dst;
dst = NULL;
if (src == NULL)
@@ -61,11 +61,9 @@
//
// IMkvWriter Class
-IMkvWriter::IMkvWriter() {
-}
+IMkvWriter::IMkvWriter() {}
-IMkvWriter::~IMkvWriter() {
-}
+IMkvWriter::~IMkvWriter() {}
bool WriteEbmlHeader(IMkvWriter* writer) {
// Level 0
@@ -97,8 +95,7 @@
return true;
}
-bool ChunkedCopy(mkvparser::IMkvReader* source,
- mkvmuxer::IMkvWriter* dst,
+bool ChunkedCopy(mkvparser::IMkvReader* source, mkvmuxer::IMkvWriter* dst,
mkvmuxer::int64 start, int64 size) {
// TODO(vigneshv): Check if this is a reasonable value.
const uint32 kBufSize = 2048;
@@ -130,12 +127,11 @@
length_(0),
track_number_(0),
timestamp_(0),
- discard_padding_(0) {
-}
+ discard_padding_(0) {}
Frame::~Frame() {
- delete [] frame_;
- delete [] additional_;
+ delete[] frame_;
+ delete[] additional_;
}
bool Frame::Init(const uint8* frame, uint64 length) {
@@ -144,7 +140,7 @@
if (!data)
return false;
- delete [] frame_;
+ delete[] frame_;
frame_ = data;
length_ = length;
@@ -159,7 +155,7 @@
if (!data)
return false;
- delete [] additional_;
+ delete[] additional_;
additional_ = data;
additional_length_ = length;
add_id_ = add_id;
@@ -177,11 +173,9 @@
track_(0),
cluster_pos_(0),
block_number_(1),
- output_block_number_(true) {
-}
+ output_block_number_(true) {}
-CuePoint::~CuePoint() {
-}
+CuePoint::~CuePoint() {}
bool CuePoint::Write(IMkvWriter* writer) const {
if (!writer || track_ < 1 || cluster_pos_ < 1)
@@ -191,10 +185,10 @@
size += EbmlElementSize(kMkvCueTrack, track_);
if (output_block_number_ && block_number_ > 1)
size += EbmlElementSize(kMkvCueBlockNumber, block_number_);
- const uint64 track_pos_size = EbmlMasterElementSize(kMkvCueTrackPositions,
- size) + size;
- const uint64 payload_size = EbmlElementSize(kMkvCueTime, time_) +
- track_pos_size;
+ const uint64 track_pos_size =
+ EbmlMasterElementSize(kMkvCueTrackPositions, size) + size;
+ const uint64 payload_size =
+ EbmlElementSize(kMkvCueTime, time_) + track_pos_size;
if (!WriteEbmlMasterElement(writer, kMkvCuePoint, payload_size))
return false;
@@ -231,10 +225,10 @@
size += EbmlElementSize(kMkvCueTrack, track_);
if (output_block_number_ && block_number_ > 1)
size += EbmlElementSize(kMkvCueBlockNumber, block_number_);
- const uint64 track_pos_size = EbmlMasterElementSize(kMkvCueTrackPositions,
- size) + size;
- const uint64 payload_size = EbmlElementSize(kMkvCueTime, time_) +
- track_pos_size;
+ const uint64 track_pos_size =
+ EbmlMasterElementSize(kMkvCueTrackPositions, size) + size;
+ const uint64 payload_size =
+ EbmlElementSize(kMkvCueTime, time_) + track_pos_size;
return payload_size;
}
@@ -252,8 +246,7 @@
: cue_entries_capacity_(0),
cue_entries_size_(0),
cue_entries_(NULL),
- output_block_number_(true) {
-}
+ output_block_number_(true) {}
Cues::~Cues() {
if (cue_entries_) {
@@ -261,7 +254,7 @@
CuePoint* const cue = cue_entries_[i];
delete cue;
}
- delete [] cue_entries_;
+ delete[] cue_entries_;
}
}
@@ -278,7 +271,7 @@
return false;
CuePoint** const cues =
- new (std::nothrow) CuePoint*[new_capacity]; // NOLINT
+ new (std::nothrow) CuePoint* [new_capacity]; // NOLINT
if (!cues)
return false;
@@ -286,7 +279,7 @@
cues[i] = cue_entries_[i];
}
- delete [] cue_entries_;
+ delete[] cue_entries_;
cue_entries_ = cues;
cue_entries_capacity_ = new_capacity;
@@ -402,18 +395,15 @@
encoding_order_(0),
encoding_scope_(1),
encoding_type_(1),
- enc_key_id_length_(0) {
-}
+ enc_key_id_length_(0) {}
-ContentEncoding::~ContentEncoding() {
- delete [] enc_key_id_;
-}
+ContentEncoding::~ContentEncoding() { delete[] enc_key_id_; }
bool ContentEncoding::SetEncryptionID(const uint8* id, uint64 length) {
if (!id || length < 1)
return false;
- delete [] enc_key_id_;
+ delete[] enc_key_id_;
enc_key_id_ =
new (std::nothrow) uint8[static_cast<size_t>(length)]; // NOLINT
@@ -429,9 +419,8 @@
uint64 ContentEncoding::Size() const {
const uint64 encryption_size = EncryptionSize();
const uint64 encoding_size = EncodingSize(0, encryption_size);
- const uint64 encodings_size = EbmlMasterElementSize(kMkvContentEncoding,
- encoding_size) +
- encoding_size;
+ const uint64 encodings_size =
+ EbmlMasterElementSize(kMkvContentEncoding, encoding_size) + encoding_size;
return encodings_size;
}
@@ -439,9 +428,8 @@
bool ContentEncoding::Write(IMkvWriter* writer) const {
const uint64 encryption_size = EncryptionSize();
const uint64 encoding_size = EncodingSize(0, encryption_size);
- const uint64 size = EbmlMasterElementSize(kMkvContentEncoding,
- encoding_size) +
- encoding_size;
+ const uint64 size =
+ EbmlMasterElementSize(kMkvContentEncoding, encoding_size) + encoding_size;
const int64 payload_position = writer->Position();
if (payload_position < 0)
@@ -460,9 +448,7 @@
return false;
if (!WriteEbmlElement(writer, kMkvContentEncAlgo, enc_algo_))
return false;
- if (!WriteEbmlElement(writer,
- kMkvContentEncKeyID,
- enc_key_id_,
+ if (!WriteEbmlElement(writer, kMkvContentEncKeyID, enc_key_id_,
enc_key_id_length_))
return false;
@@ -486,9 +472,9 @@
uint64 encoding_size = 0;
if (encryption_size > 0) {
- encoding_size += EbmlMasterElementSize(kMkvContentEncryption,
- encryption_size) +
- encryption_size;
+ encoding_size +=
+ EbmlMasterElementSize(kMkvContentEncryption, encryption_size) +
+ encryption_size;
}
encoding_size += EbmlElementSize(kMkvContentEncodingType, encoding_type_);
encoding_size += EbmlElementSize(kMkvContentEncodingScope, encoding_scope_);
@@ -500,9 +486,8 @@
uint64 ContentEncoding::EncryptionSize() const {
const uint64 aes_size = enc_aes_settings_.Size();
- uint64 encryption_size = EbmlElementSize(kMkvContentEncKeyID,
- enc_key_id_,
- enc_key_id_length_);
+ uint64 encryption_size =
+ EbmlElementSize(kMkvContentEncKeyID, enc_key_id_, enc_key_id_length_);
encryption_size += EbmlElementSize(kMkvContentEncAlgo, enc_algo_);
return encryption_size + aes_size;
@@ -523,23 +508,23 @@
uid_(MakeUID(seed)),
codec_delay_(0),
seek_pre_roll_(0),
+ default_duration_(0),
codec_private_length_(0),
content_encoding_entries_(NULL),
- content_encoding_entries_size_(0) {
-}
+ content_encoding_entries_size_(0) {}
Track::~Track() {
- delete [] codec_id_;
- delete [] codec_private_;
- delete [] language_;
- delete [] name_;
+ delete[] codec_id_;
+ delete[] codec_private_;
+ delete[] language_;
+ delete[] name_;
if (content_encoding_entries_) {
for (uint32 i = 0; i < content_encoding_entries_size_; ++i) {
ContentEncoding* const encoding = content_encoding_entries_[i];
delete encoding;
}
- delete [] content_encoding_entries_;
+ delete[] content_encoding_entries_;
}
}
@@ -547,14 +532,14 @@
const uint32 count = content_encoding_entries_size_ + 1;
ContentEncoding** const content_encoding_entries =
- new (std::nothrow) ContentEncoding*[count]; // NOLINT
+ new (std::nothrow) ContentEncoding* [count]; // NOLINT
if (!content_encoding_entries)
return false;
ContentEncoding* const content_encoding =
new (std::nothrow) ContentEncoding(); // NOLINT
if (!content_encoding) {
- delete [] content_encoding_entries;
+ delete[] content_encoding_entries;
return false;
}
@@ -562,7 +547,7 @@
content_encoding_entries[i] = content_encoding_entries_[i];
}
- delete [] content_encoding_entries_;
+ delete[] content_encoding_entries_;
content_encoding_entries_ = content_encoding_entries;
content_encoding_entries_[content_encoding_entries_size_] = content_encoding;
@@ -587,8 +572,7 @@
if (codec_id_)
size += EbmlElementSize(kMkvCodecID, codec_id_);
if (codec_private_)
- size += EbmlElementSize(kMkvCodecPrivate,
- codec_private_,
+ size += EbmlElementSize(kMkvCodecPrivate, codec_private_,
codec_private_length_);
if (language_)
size += EbmlElementSize(kMkvLanguage, language_);
@@ -600,6 +584,8 @@
size += EbmlElementSize(kMkvCodecDelay, codec_delay_);
if (seek_pre_roll_)
size += EbmlElementSize(kMkvSeekPreRoll, seek_pre_roll_);
+ if (default_duration_)
+ size += EbmlElementSize(kMkvDefaultDuration, default_duration_);
if (content_encoding_entries_size_ > 0) {
uint64 content_encodings_size = 0;
@@ -608,9 +594,9 @@
content_encodings_size += encoding->Size();
}
- size += EbmlMasterElementSize(kMkvContentEncodings,
- content_encodings_size) +
- content_encodings_size;
+ size +=
+ EbmlMasterElementSize(kMkvContentEncodings, content_encodings_size) +
+ content_encodings_size;
}
return size;
@@ -633,14 +619,17 @@
if (!WriteEbmlMasterElement(writer, kMkvTrackEntry, payload_size))
return false;
+ // |type_| has to be specified before the Track can be written.
+ if (!type_)
+ return false;
+
uint64 size = EbmlElementSize(kMkvTrackNumber, number_);
size += EbmlElementSize(kMkvTrackUID, uid_);
size += EbmlElementSize(kMkvTrackType, type_);
if (codec_id_)
size += EbmlElementSize(kMkvCodecID, codec_id_);
if (codec_private_)
- size += EbmlElementSize(kMkvCodecPrivate,
- codec_private_,
+ size += EbmlElementSize(kMkvCodecPrivate, codec_private_,
codec_private_length_);
if (language_)
size += EbmlElementSize(kMkvLanguage, language_);
@@ -652,7 +641,8 @@
size += EbmlElementSize(kMkvCodecDelay, codec_delay_);
if (seek_pre_roll_)
size += EbmlElementSize(kMkvSeekPreRoll, seek_pre_roll_);
-
+ if (default_duration_)
+ size += EbmlElementSize(kMkvDefaultDuration, default_duration_);
const int64 payload_position = writer->Position();
if (payload_position < 0)
@@ -665,8 +655,7 @@
if (!WriteEbmlElement(writer, kMkvTrackType, type_))
return false;
if (max_block_additional_id_) {
- if (!WriteEbmlElement(writer,
- kMkvMaxBlockAdditionID,
+ if (!WriteEbmlElement(writer, kMkvMaxBlockAdditionID,
max_block_additional_id_)) {
return false;
}
@@ -679,14 +668,16 @@
if (!WriteEbmlElement(writer, kMkvSeekPreRoll, seek_pre_roll_))
return false;
}
+ if (default_duration_) {
+ if (!WriteEbmlElement(writer, kMkvDefaultDuration, default_duration_))
+ return false;
+ }
if (codec_id_) {
if (!WriteEbmlElement(writer, kMkvCodecID, codec_id_))
return false;
}
if (codec_private_) {
- if (!WriteEbmlElement(writer,
- kMkvCodecPrivate,
- codec_private_,
+ if (!WriteEbmlElement(writer, kMkvCodecPrivate, codec_private_,
codec_private_length_))
return false;
}
@@ -711,8 +702,7 @@
content_encodings_size += encoding->Size();
}
- if (!WriteEbmlMasterElement(writer,
- kMkvContentEncodings,
+ if (!WriteEbmlMasterElement(writer, kMkvContentEncodings,
content_encodings_size))
return false;
@@ -733,7 +723,7 @@
if (!codec_private || length < 1)
return false;
- delete [] codec_private_;
+ delete[] codec_private_;
codec_private_ =
new (std::nothrow) uint8[static_cast<size_t>(length)]; // NOLINT
@@ -748,7 +738,7 @@
void Track::set_codec_id(const char* codec_id) {
if (codec_id) {
- delete [] codec_id_;
+ delete[] codec_id_;
const size_t length = strlen(codec_id) + 1;
codec_id_ = new (std::nothrow) char[length]; // NOLINT
@@ -765,7 +755,7 @@
// TODO(fgalligan): Vet the language parameter.
void Track::set_language(const char* language) {
if (language) {
- delete [] language_;
+ delete[] language_;
const size_t length = strlen(language) + 1;
language_ = new (std::nothrow) char[length]; // NOLINT
@@ -781,7 +771,7 @@
void Track::set_name(const char* name) {
if (name) {
- delete [] name_;
+ delete[] name_;
const size_t length = strlen(name) + 1;
name_ = new (std::nothrow) char[length]; // NOLINT
@@ -807,15 +797,12 @@
height_(0),
stereo_mode_(0),
alpha_mode_(0),
- width_(0) {
-}
+ width_(0) {}
-VideoTrack::~VideoTrack() {
-}
+VideoTrack::~VideoTrack() {}
bool VideoTrack::SetStereoMode(uint64 stereo_mode) {
- if (stereo_mode != kMono &&
- stereo_mode != kSideBySideLeftIsFirst &&
+ if (stereo_mode != kMono && stereo_mode != kSideBySideLeftIsFirst &&
stereo_mode != kTopBottomRightIsFirst &&
stereo_mode != kTopBottomLeftIsFirst &&
stereo_mode != kSideBySideRightIsFirst)
@@ -826,8 +813,7 @@
}
bool VideoTrack::SetAlphaMode(uint64 alpha_mode) {
- if (alpha_mode != kNoAlpha &&
- alpha_mode != kAlpha)
+ if (alpha_mode != kNoAlpha && alpha_mode != kAlpha)
return false;
alpha_mode_ = alpha_mode;
@@ -873,8 +859,7 @@
if (!WriteEbmlElement(writer, kMkvAlphaMode, alpha_mode_))
return false;
if (frame_rate_ > 0.0)
- if (!WriteEbmlElement(writer,
- kMkvFrameRate,
+ if (!WriteEbmlElement(writer, kMkvFrameRate,
static_cast<float>(frame_rate_)))
return false;
@@ -908,20 +893,15 @@
// AudioTrack Class
AudioTrack::AudioTrack(unsigned int* seed)
- : Track(seed),
- bit_depth_(0),
- channels_(1),
- sample_rate_(0.0) {
-}
+ : Track(seed), bit_depth_(0), channels_(1), sample_rate_(0.0) {}
-AudioTrack::~AudioTrack() {
-}
+AudioTrack::~AudioTrack() {}
uint64 AudioTrack::PayloadSize() const {
const uint64 parent_size = Track::PayloadSize();
- uint64 size = EbmlElementSize(kMkvSamplingFrequency,
- static_cast<float>(sample_rate_));
+ uint64 size =
+ EbmlElementSize(kMkvSamplingFrequency, static_cast<float>(sample_rate_));
size += EbmlElementSize(kMkvChannels, channels_);
if (bit_depth_ > 0)
size += EbmlElementSize(kMkvBitDepth, bit_depth_);
@@ -935,8 +915,8 @@
return false;
// Calculate AudioSettings size.
- uint64 size = EbmlElementSize(kMkvSamplingFrequency,
- static_cast<float>(sample_rate_));
+ uint64 size =
+ EbmlElementSize(kMkvSamplingFrequency, static_cast<float>(sample_rate_));
size += EbmlElementSize(kMkvChannels, channels_);
if (bit_depth_ > 0)
size += EbmlElementSize(kMkvBitDepth, bit_depth_);
@@ -948,8 +928,7 @@
if (payload_position < 0)
return false;
- if (!WriteEbmlElement(writer,
- kMkvSamplingFrequency,
+ if (!WriteEbmlElement(writer, kMkvSamplingFrequency,
static_cast<float>(sample_rate_)))
return false;
if (!WriteEbmlElement(writer, kMkvChannels, channels_))
@@ -975,11 +954,7 @@
const char Tracks::kVp8CodecId[] = "V_VP8";
const char Tracks::kVp9CodecId[] = "V_VP9";
-
-Tracks::Tracks()
- : track_entries_(NULL),
- track_entries_size_(0) {
-}
+Tracks::Tracks() : track_entries_(NULL), track_entries_size_(0) {}
Tracks::~Tracks() {
if (track_entries_) {
@@ -987,7 +962,7 @@
Track* const track = track_entries_[i];
delete track;
}
- delete [] track_entries_;
+ delete[] track_entries_;
}
}
@@ -1015,7 +990,7 @@
const uint32 count = track_entries_size_ + 1;
- Track** const track_entries = new (std::nothrow) Track*[count]; // NOLINT
+ Track** const track_entries = new (std::nothrow) Track* [count]; // NOLINT
if (!track_entries)
return false;
@@ -1023,7 +998,7 @@
track_entries[i] = track_entries_[i];
}
- delete [] track_entries_;
+ delete[] track_entries_;
// Find the lowest availible track number > 0.
if (track_num == 0) {
@@ -1125,21 +1100,16 @@
//
// Chapter Class
-bool Chapter::set_id(const char* id) {
- return StrCpy(id, &id_);
-}
+bool Chapter::set_id(const char* id) { return StrCpy(id, &id_); }
-void Chapter::set_time(const Segment& segment,
- uint64 start_ns,
- uint64 end_ns) {
+void Chapter::set_time(const Segment& segment, uint64 start_ns, uint64 end_ns) {
const SegmentInfo* const info = segment.GetSegmentInfo();
const uint64 timecode_scale = info->timecode_scale();
start_timecode_ = start_ns / timecode_scale;
end_timecode_ = end_ns / timecode_scale;
}
-bool Chapter::add_string(const char* title,
- const char* language,
+bool Chapter::add_string(const char* title, const char* language,
const char* country) {
if (!ExpandDisplaysArray())
return false;
@@ -1171,8 +1141,7 @@
// active on the array.
}
-Chapter::~Chapter() {
-}
+Chapter::~Chapter() {}
void Chapter::Init(unsigned int* seed) {
id_ = NULL;
@@ -1200,7 +1169,7 @@
d.Clear();
}
- delete [] displays_;
+ delete[] displays_;
displays_ = NULL;
displays_size_ = 0;
@@ -1220,7 +1189,7 @@
displays[idx] = displays_[idx]; // shallow copy
}
- delete [] displays_;
+ delete[] displays_;
displays_ = displays;
displays_size_ = size;
@@ -1229,11 +1198,10 @@
}
uint64 Chapter::WriteAtom(IMkvWriter* writer) const {
- uint64 payload_size =
- EbmlElementSize(kMkvChapterStringUID, id_) +
- EbmlElementSize(kMkvChapterUID, uid_) +
- EbmlElementSize(kMkvChapterTimeStart, start_timecode_) +
- EbmlElementSize(kMkvChapterTimeEnd, end_timecode_);
+ uint64 payload_size = EbmlElementSize(kMkvChapterStringUID, id_) +
+ EbmlElementSize(kMkvChapterUID, uid_) +
+ EbmlElementSize(kMkvChapterTimeStart, start_timecode_) +
+ EbmlElementSize(kMkvChapterTimeEnd, end_timecode_);
for (int idx = 0; idx < displays_count_; ++idx) {
const Display& d = displays_[idx];
@@ -1241,8 +1209,7 @@
}
const uint64 atom_size =
- EbmlMasterElementSize(kMkvChapterAtom, payload_size) +
- payload_size;
+ EbmlMasterElementSize(kMkvChapterAtom, payload_size) + payload_size;
if (writer == NULL)
return atom_size;
@@ -1313,8 +1280,7 @@
payload_size += EbmlElementSize(kMkvChapCountry, country_);
const uint64 display_size =
- EbmlMasterElementSize(kMkvChapterDisplay, payload_size) +
- payload_size;
+ EbmlMasterElementSize(kMkvChapterDisplay, payload_size) + payload_size;
if (writer == NULL)
return display_size;
@@ -1349,11 +1315,7 @@
//
// Chapters Class
-Chapters::Chapters()
- : chapters_size_(0),
- chapters_count_(0),
- chapters_(NULL) {
-}
+Chapters::Chapters() : chapters_size_(0), chapters_count_(0), chapters_(NULL) {}
Chapters::~Chapters() {
while (chapters_count_ > 0) {
@@ -1361,13 +1323,11 @@
chapter.Clear();
}
- delete [] chapters_;
+ delete[] chapters_;
chapters_ = NULL;
}
-int Chapters::Count() const {
- return chapters_count_;
-}
+int Chapters::Count() const { return chapters_count_; }
Chapter* Chapters::AddChapter(unsigned int* seed) {
if (!ExpandChaptersArray())
@@ -1417,7 +1377,7 @@
src.ShallowCopy(dst);
}
- delete [] chapters_;
+ delete[] chapters_;
chapters_ = chapters;
chapters_size_ = size;
@@ -1434,8 +1394,7 @@
}
const uint64 edition_size =
- EbmlMasterElementSize(kMkvEditionEntry, payload_size) +
- payload_size;
+ EbmlMasterElementSize(kMkvEditionEntry, payload_size) + payload_size;
if (writer == NULL) // return size only
return edition_size;
@@ -1473,11 +1432,9 @@
position_for_cues_(cues_pos),
size_position_(-1),
timecode_(timecode),
- writer_(NULL) {
-}
+ writer_(NULL) {}
-Cluster::~Cluster() {
-}
+Cluster::~Cluster() {}
bool Cluster::Init(IMkvWriter* ptr_writer) {
if (!ptr_writer) {
@@ -1487,69 +1444,39 @@
return true;
}
-bool Cluster::AddFrame(const uint8* frame,
- uint64 length,
- uint64 track_number,
- uint64 abs_timecode,
- bool is_key) {
- return DoWriteBlock(frame,
- length,
- track_number,
- abs_timecode,
- is_key ? 1 : 0,
+bool Cluster::AddFrame(const uint8* frame, uint64 length, uint64 track_number,
+ uint64 abs_timecode, bool is_key) {
+ return DoWriteBlock(frame, length, track_number, abs_timecode, is_key ? 1 : 0,
&WriteSimpleBlock);
}
-bool Cluster::AddFrameWithAdditional(const uint8* frame,
- uint64 length,
+bool Cluster::AddFrameWithAdditional(const uint8* frame, uint64 length,
const uint8* additional,
- uint64 additional_length,
- uint64 add_id,
- uint64 track_number,
- uint64 abs_timecode,
+ uint64 additional_length, uint64 add_id,
+ uint64 track_number, uint64 abs_timecode,
bool is_key) {
- return DoWriteBlockWithAdditional(frame,
- length,
- additional,
- additional_length,
- add_id,
- track_number,
- abs_timecode,
- is_key ? 1 : 0,
- &WriteBlockWithAdditional);
+ return DoWriteBlockWithAdditional(
+ frame, length, additional, additional_length, add_id, track_number,
+ abs_timecode, is_key ? 1 : 0, &WriteBlockWithAdditional);
}
-bool Cluster::AddFrameWithDiscardPadding(const uint8* frame,
- uint64 length,
+bool Cluster::AddFrameWithDiscardPadding(const uint8* frame, uint64 length,
int64 discard_padding,
uint64 track_number,
- uint64 abs_timecode,
- bool is_key) {
- return DoWriteBlockWithDiscardPadding(frame,
- length,
- discard_padding,
- track_number,
- abs_timecode,
- is_key ? 1 : 0,
- &WriteBlockWithDiscardPadding);
+ uint64 abs_timecode, bool is_key) {
+ return DoWriteBlockWithDiscardPadding(
+ frame, length, discard_padding, track_number, abs_timecode,
+ is_key ? 1 : 0, &WriteBlockWithDiscardPadding);
}
-bool Cluster::AddMetadata(const uint8* frame,
- uint64 length,
- uint64 track_number,
- uint64 abs_timecode,
+bool Cluster::AddMetadata(const uint8* frame, uint64 length,
+ uint64 track_number, uint64 abs_timecode,
uint64 duration_timecode) {
- return DoWriteBlock(frame,
- length,
- track_number,
- abs_timecode,
- duration_timecode,
- &WriteMetadataBlock);
+ return DoWriteBlock(frame, length, track_number, abs_timecode,
+ duration_timecode, &WriteMetadataBlock);
}
-void Cluster::AddPayloadSize(uint64 size) {
- payload_size_ += size;
-}
+void Cluster::AddPayloadSize(uint64 size) { payload_size_ += size; }
bool Cluster::Finalize() {
if (!writer_ || finalized_ || size_position_ == -1)
@@ -1575,8 +1502,7 @@
uint64 Cluster::Size() const {
const uint64 element_size =
- EbmlMasterElementSize(kMkvCluster,
- 0xFFFFFFFFFFFFFFFFULL) + payload_size_;
+ EbmlMasterElementSize(kMkvCluster, 0xFFFFFFFFFFFFFFFFULL) + payload_size_;
return element_size;
}
@@ -1616,13 +1542,9 @@
return rel_timecode;
}
-bool Cluster::DoWriteBlock(
- const uint8* frame,
- uint64 length,
- uint64 track_number,
- uint64 abs_timecode,
- uint64 generic_arg,
- WriteBlock write_block) {
+bool Cluster::DoWriteBlock(const uint8* frame, uint64 length,
+ uint64 track_number, uint64 abs_timecode,
+ uint64 generic_arg, WriteBlock write_block) {
if (frame == NULL || length == 0)
return false;
@@ -1636,12 +1558,8 @@
if (!PreWriteBlock(write_block))
return false;
- const uint64 element_size = (*write_block)(writer_,
- frame,
- length,
- track_number,
- rel_timecode,
- generic_arg);
+ const uint64 element_size = (*write_block)(
+ writer_, frame, length, track_number, rel_timecode, generic_arg);
if (element_size == 0)
return false;
@@ -1650,17 +1568,11 @@
}
bool Cluster::DoWriteBlockWithAdditional(
- const uint8* frame,
- uint64 length,
- const uint8* additional,
- uint64 additional_length,
- uint64 add_id,
- uint64 track_number,
- uint64 abs_timecode,
- uint64 generic_arg,
- WriteBlockAdditional write_block) {
- if (frame == NULL || length == 0 ||
- additional == NULL || additional_length == 0)
+ const uint8* frame, uint64 length, const uint8* additional,
+ uint64 additional_length, uint64 add_id, uint64 track_number,
+ uint64 abs_timecode, uint64 generic_arg, WriteBlockAdditional write_block) {
+ if (frame == NULL || length == 0 || additional == NULL ||
+ additional_length == 0)
return false;
if (!IsValidTrackNumber(track_number))
@@ -1673,15 +1585,9 @@
if (!PreWriteBlock(write_block))
return false;
- const uint64 element_size = (*write_block)(writer_,
- frame,
- length,
- additional,
- additional_length,
- add_id,
- track_number,
- rel_timecode,
- generic_arg);
+ const uint64 element_size =
+ (*write_block)(writer_, frame, length, additional, additional_length,
+ add_id, track_number, rel_timecode, generic_arg);
if (element_size == 0)
return false;
@@ -1690,12 +1596,8 @@
}
bool Cluster::DoWriteBlockWithDiscardPadding(
- const uint8* frame,
- uint64 length,
- int64 discard_padding,
- uint64 track_number,
- uint64 abs_timecode,
- uint64 generic_arg,
+ const uint8* frame, uint64 length, int64 discard_padding,
+ uint64 track_number, uint64 abs_timecode, uint64 generic_arg,
WriteBlockDiscardPadding write_block) {
if (frame == NULL || length == 0 || discard_padding <= 0)
return false;
@@ -1710,13 +1612,9 @@
if (!PreWriteBlock(write_block))
return false;
- const uint64 element_size = (*write_block)(writer_,
- frame,
- length,
- discard_padding,
- track_number,
- rel_timecode,
- generic_arg);
+ const uint64 element_size =
+ (*write_block)(writer_, frame, length, discard_padding, track_number,
+ rel_timecode, generic_arg);
if (element_size == 0)
return false;
@@ -1758,8 +1656,7 @@
}
}
-SeekHead::~SeekHead() {
-}
+SeekHead::~SeekHead() {}
bool SeekHead::Finalize(IMkvWriter* writer) const {
if (writer->Seekable()) {
@@ -1771,13 +1668,12 @@
for (int32 i = 0; i < kSeekEntryCount; ++i) {
if (seek_entry_id_[i] != 0) {
- entry_size[i] = EbmlElementSize(
- kMkvSeekID,
- static_cast<uint64>(seek_entry_id_[i]));
+ entry_size[i] =
+ EbmlElementSize(kMkvSeekID, static_cast<uint64>(seek_entry_id_[i]));
entry_size[i] += EbmlElementSize(kMkvSeekPosition, seek_entry_pos_[i]);
- payload_size += EbmlMasterElementSize(kMkvSeek, entry_size[i]) +
- entry_size[i];
+ payload_size +=
+ EbmlMasterElementSize(kMkvSeek, entry_size[i]) + entry_size[i];
}
}
@@ -1797,8 +1693,7 @@
if (!WriteEbmlMasterElement(writer, kMkvSeek, entry_size[i]))
return false;
- if (!WriteEbmlElement(writer,
- kMkvSeekID,
+ if (!WriteEbmlElement(writer, kMkvSeekID,
static_cast<uint64>(seek_entry_id_[i])))
return false;
@@ -1809,8 +1704,8 @@
const uint64 total_entry_size = kSeekEntryCount * MaxEntrySize();
const uint64 total_size =
- EbmlMasterElementSize(kMkvSeekHead,
- total_entry_size) + total_entry_size;
+ EbmlMasterElementSize(kMkvSeekHead, total_entry_size) +
+ total_entry_size;
const int64 size_left = total_size - (writer->Position() - start_pos_);
const uint64 bytes_written = WriteVoidElement(writer, size_left);
@@ -1888,12 +1783,12 @@
muxing_app_(NULL),
timecode_scale_(1000000ULL),
writing_app_(NULL),
- duration_pos_(-1) {
-}
+ date_utc_(LLONG_MIN),
+ duration_pos_(-1) {}
SegmentInfo::~SegmentInfo() {
- delete [] muxing_app_;
- delete [] writing_app_;
+ delete[] muxing_app_;
+ delete[] writing_app_;
}
bool SegmentInfo::Init() {
@@ -1904,26 +1799,16 @@
GetVersion(&major, &minor, &build, &revision);
char temp[256];
#ifdef _MSC_VER
- sprintf_s(temp,
- sizeof(temp)/sizeof(temp[0]),
- "libwebm-%d.%d.%d.%d",
- major,
- minor,
- build,
- revision);
+ sprintf_s(temp, sizeof(temp) / sizeof(temp[0]), "libwebm-%d.%d.%d.%d", major,
+ minor, build, revision);
#else
- snprintf(temp,
- sizeof(temp)/sizeof(temp[0]),
- "libwebm-%d.%d.%d.%d",
- major,
- minor,
- build,
- revision);
+ snprintf(temp, sizeof(temp) / sizeof(temp[0]), "libwebm-%d.%d.%d.%d", major,
+ minor, build, revision);
#endif
const size_t app_len = strlen(temp) + 1;
- delete [] muxing_app_;
+ delete[] muxing_app_;
muxing_app_ = new (std::nothrow) char[app_len]; // NOLINT
if (!muxing_app_)
@@ -1955,8 +1840,7 @@
if (writer->Position(duration_pos_))
return false;
- if (!WriteEbmlElement(writer,
- kMkvDuration,
+ if (!WriteEbmlElement(writer, kMkvDuration,
static_cast<float>(duration_)))
return false;
@@ -1975,6 +1859,8 @@
uint64 size = EbmlElementSize(kMkvTimecodeScale, timecode_scale_);
if (duration_ > 0.0)
size += EbmlElementSize(kMkvDuration, static_cast<float>(duration_));
+ if (date_utc_ != LLONG_MIN)
+ size += EbmlDateElementSize(kMkvDateUTC, date_utc_);
size += EbmlElementSize(kMkvMuxingApp, muxing_app_);
size += EbmlElementSize(kMkvWritingApp, writing_app_);
@@ -1996,6 +1882,9 @@
return false;
}
+ if (date_utc_ != LLONG_MIN)
+ WriteEbmlDateElement(writer, kMkvDateUTC, date_utc_);
+
if (!WriteEbmlElement(writer, kMkvMuxingApp, muxing_app_))
return false;
if (!WriteEbmlElement(writer, kMkvWritingApp, writing_app_))
@@ -2022,7 +1911,7 @@
strcpy(temp_str, app);
#endif
- delete [] muxing_app_;
+ delete[] muxing_app_;
muxing_app_ = temp_str;
}
}
@@ -2040,7 +1929,7 @@
strcpy(temp_str, app);
#endif
- delete [] writing_app_;
+ delete[] writing_app_;
writing_app_ = temp_str;
}
}
@@ -2093,7 +1982,7 @@
Cluster* const cluster = cluster_list_[i];
delete cluster;
}
- delete [] cluster_list_;
+ delete[] cluster_list_;
}
if (frames_) {
@@ -2101,11 +1990,11 @@
Frame* const frame = frames_[i];
delete frame;
}
- delete [] frames_;
+ delete[] frames_;
}
- delete [] chunk_name_;
- delete [] chunking_base_name_;
+ delete[] chunk_name_;
+ delete[] chunking_base_name_;
if (chunk_writer_cluster_) {
chunk_writer_cluster_->Close();
@@ -2121,8 +2010,7 @@
}
}
-void Segment::MoveCuesBeforeClustersHelper(uint64 diff,
- int32 index,
+void Segment::MoveCuesBeforeClustersHelper(uint64 diff, int32 index,
uint64* cues_size) {
const uint64 old_cues_size = *cues_size;
CuePoint* const cue_point = cues_.GetCueByIndex(index);
@@ -2139,9 +2027,9 @@
// Let d = a + b + c. Now d is the new size of the Cues element which is
// passed on to the next recursive call.
const uint64 cue_point_size_diff = cue_point->Size() - old_cue_point_size;
- const uint64 cue_size_diff = GetCodedUIntSize(*cues_size +
- cue_point_size_diff) -
- GetCodedUIntSize(*cues_size);
+ const uint64 cue_size_diff =
+ GetCodedUIntSize(*cues_size + cue_point_size_diff) -
+ GetCodedUIntSize(*cues_size);
*cues_size += cue_point_size_diff + cue_size_diff;
diff = *cues_size - old_cues_size;
if (diff > 0) {
@@ -2187,8 +2075,8 @@
IMkvWriter* writer) {
if (!writer->Seekable() || chunking_)
return false;
- const int64 cluster_offset = cluster_list_[0]->size_position() -
- GetUIntSize(kMkvCluster);
+ const int64 cluster_offset =
+ cluster_list_[0]->size_position() - GetUIntSize(kMkvCluster);
// Copy the headers.
if (!ChunkedCopy(reader, writer, 0, cluster_offset))
@@ -2214,8 +2102,7 @@
const int64 pos = writer->Position();
const int64 segment_size = writer->Position() - payload_pos_;
if (writer->Position(size_position_) ||
- WriteUIntSize(writer, segment_size, 8) ||
- writer->Position(pos))
+ WriteUIntSize(writer, segment_size, 8) || writer->Position(pos))
return false;
return true;
}
@@ -2227,7 +2114,7 @@
if (mode_ == kFile) {
if (cluster_list_size_ > 0) {
// Update last cluster's size
- Cluster* const old_cluster = cluster_list_[cluster_list_size_-1];
+ Cluster* const old_cluster = cluster_list_[cluster_list_size_ - 1];
if (!old_cluster || !old_cluster->Finalize())
return false;
@@ -2258,7 +2145,7 @@
return false;
const bool cues_open = chunk_writer_cues_->Open(name);
- delete [] name;
+ delete[] name;
if (!cues_open)
return false;
}
@@ -2321,9 +2208,7 @@
return track;
}
-Chapter* Segment::AddChapter() {
- return chapters_.AddChapter(&seed_);
-}
+Chapter* Segment::AddChapter() { return chapters_.AddChapter(&seed_); }
uint64 Segment::AddVideoTrack(int32 width, int32 height, int32 number) {
VideoTrack* const track = new (std::nothrow) VideoTrack(&seed_); // NOLINT
@@ -2342,10 +2227,10 @@
}
bool Segment::AddCuePoint(uint64 timestamp, uint64 track) {
- if (cluster_list_size_ < 1)
+ if (cluster_list_size_ < 1)
return false;
- const Cluster* const cluster = cluster_list_[cluster_list_size_-1];
+ const Cluster* const cluster = cluster_list_[cluster_list_size_ - 1];
if (!cluster)
return false;
@@ -2364,9 +2249,7 @@
return true;
}
-uint64 Segment::AddAudioTrack(int32 sample_rate,
- int32 channels,
- int32 number) {
+uint64 Segment::AddAudioTrack(int32 sample_rate, int32 channels, int32 number) {
AudioTrack* const track = new (std::nothrow) AudioTrack(&seed_); // NOLINT
if (!track)
return 0;
@@ -2381,11 +2264,8 @@
return track->number();
}
-bool Segment::AddFrame(const uint8* frame,
- uint64 length,
- uint64 track_number,
- uint64 timestamp,
- bool is_key) {
+bool Segment::AddFrame(const uint8* frame, uint64 length, uint64 track_number,
+ uint64 timestamp, bool is_key) {
if (!frame)
return false;
@@ -2426,11 +2306,7 @@
const uint64 timecode_scale = segment_info_.timecode_scale();
const uint64 abs_timecode = timestamp / timecode_scale;
- if (!cluster->AddFrame(frame,
- length,
- track_number,
- abs_timecode,
- is_key))
+ if (!cluster->AddFrame(frame, length, track_number, abs_timecode, is_key))
return false;
if (new_cuepoint_ && cues_track_ == track_number) {
@@ -2444,13 +2320,10 @@
return true;
}
-bool Segment::AddFrameWithAdditional(const uint8* frame,
- uint64 length,
+bool Segment::AddFrameWithAdditional(const uint8* frame, uint64 length,
const uint8* additional,
- uint64 additional_length,
- uint64 add_id,
- uint64 track_number,
- uint64 timestamp,
+ uint64 additional_length, uint64 add_id,
+ uint64 track_number, uint64 timestamp,
bool is_key) {
if (frame == NULL || additional == NULL)
return false;
@@ -2492,14 +2365,9 @@
const uint64 timecode_scale = segment_info_.timecode_scale();
const uint64 abs_timecode = timestamp / timecode_scale;
- if (!cluster->AddFrameWithAdditional(frame,
- length,
- additional,
- additional_length,
- add_id,
- track_number,
- abs_timecode,
- is_key))
+ if (!cluster->AddFrameWithAdditional(frame, length, additional,
+ additional_length, add_id, track_number,
+ abs_timecode, is_key))
return false;
if (new_cuepoint_ && cues_track_ == track_number) {
@@ -2513,11 +2381,9 @@
return true;
}
-bool Segment::AddFrameWithDiscardPadding(const uint8* frame,
- uint64 length,
+bool Segment::AddFrameWithDiscardPadding(const uint8* frame, uint64 length,
int64 discard_padding,
- uint64 track_number,
- uint64 timestamp,
+ uint64 track_number, uint64 timestamp,
bool is_key) {
if (frame == NULL || discard_padding <= 0)
return false;
@@ -2560,11 +2426,8 @@
const uint64 timecode_scale = segment_info_.timecode_scale();
const uint64 abs_timecode = timestamp / timecode_scale;
- if (!cluster->AddFrameWithDiscardPadding(frame, length,
- discard_padding,
- track_number,
- abs_timecode,
- is_key)) {
+ if (!cluster->AddFrameWithDiscardPadding(
+ frame, length, discard_padding, track_number, abs_timecode, is_key)) {
return false;
}
@@ -2579,10 +2442,8 @@
return true;
}
-bool Segment::AddMetadata(const uint8* frame,
- uint64 length,
- uint64 track_number,
- uint64 timestamp_ns,
+bool Segment::AddMetadata(const uint8* frame, uint64 length,
+ uint64 track_number, uint64 timestamp_ns,
uint64 duration_ns) {
if (!frame)
return false;
@@ -2600,7 +2461,7 @@
if (cluster_list_size_ < 1)
return false;
- Cluster* const cluster = cluster_list_[cluster_list_size_-1];
+ Cluster* const cluster = cluster_list_[cluster_list_size_ - 1];
if (!cluster)
return false;
@@ -2609,10 +2470,7 @@
const uint64 abs_timecode = timestamp_ns / timecode_scale;
const uint64 duration_timecode = duration_ns / timecode_scale;
- if (!cluster->AddMetadata(frame,
- length,
- track_number,
- abs_timecode,
+ if (!cluster->AddMetadata(frame, length, track_number, abs_timecode,
duration_timecode))
return false;
@@ -2625,40 +2483,25 @@
bool Segment::AddGenericFrame(const Frame* frame) {
last_block_duration_ = frame->duration();
if (!tracks_.TrackIsAudio(frame->track_number()) &&
- !tracks_.TrackIsVideo(frame->track_number()) &&
- frame->duration() > 0) {
- return AddMetadata(frame->frame(),
- frame->length(),
- frame->track_number(),
- frame->timestamp(),
- frame->duration());
+ !tracks_.TrackIsVideo(frame->track_number()) && frame->duration() > 0) {
+ return AddMetadata(frame->frame(), frame->length(), frame->track_number(),
+ frame->timestamp(), frame->duration());
} else if (frame->additional() && frame->additional_length() > 0) {
- return AddFrameWithAdditional(frame->frame(),
- frame->length(),
- frame->additional(),
- frame->additional_length(),
- frame->add_id(),
- frame->track_number(),
- frame->timestamp(),
- frame->is_key());
+ return AddFrameWithAdditional(
+ frame->frame(), frame->length(), frame->additional(),
+ frame->additional_length(), frame->add_id(), frame->track_number(),
+ frame->timestamp(), frame->is_key());
} else if (frame->discard_padding() > 0) {
- return AddFrameWithDiscardPadding(frame->frame(), frame->length(),
- frame->discard_padding(),
- frame->track_number(),
- frame->timestamp(),
- frame->is_key());
+ return AddFrameWithDiscardPadding(
+ frame->frame(), frame->length(), frame->discard_padding(),
+ frame->track_number(), frame->timestamp(), frame->is_key());
} else {
- return AddFrame(frame->frame(),
- frame->length(),
- frame->track_number(),
- frame->timestamp(),
- frame->is_key());
+ return AddFrame(frame->frame(), frame->length(), frame->track_number(),
+ frame->timestamp(), frame->is_key());
}
}
-void Segment::OutputCues(bool output_cues) {
- output_cues_ = output_cues;
-}
+void Segment::OutputCues(bool output_cues) { output_cues_ = output_cues; }
bool Segment::SetChunking(bool chunking, const char* filename) {
if (chunk_count_ > 0)
@@ -2683,7 +2526,7 @@
strcpy(temp, filename);
#endif
- delete [] chunking_base_name_;
+ delete[] chunking_base_name_;
chunking_base_name_ = temp;
if (!UpdateChunkName("chk", &chunk_name_))
@@ -2723,7 +2566,7 @@
strcat(header, ".hdr");
#endif
if (!chunk_writer_header_->Open(header)) {
- delete [] header;
+ delete[] header;
return false;
}
@@ -2731,7 +2574,7 @@
writer_cues_ = chunk_writer_cues_;
writer_header_ = chunk_writer_header_;
- delete [] header;
+ delete[] header;
}
chunking_ = chunking;
@@ -2748,9 +2591,7 @@
return true;
}
-void Segment::ForceNewClusterOnNextFrame() {
- force_new_cluster_ = true;
-}
+void Segment::ForceNewClusterOnNextFrame() { force_new_cluster_ = true; }
Track* Segment::GetTrackByNumber(uint64 track_number) const {
return tracks_.GetTrackByNumber(track_number);
@@ -2775,7 +2616,7 @@
if (SerializeInt(writer_header_, kEbmlUnknownValue, 8))
return false;
- payload_pos_ = writer_header_->Position();
+ payload_pos_ = writer_header_->Position();
if (mode_ == kFile && writer_header_->Seekable()) {
// Set the duration > 0.0 so SegmentInfo will write out the duration. When
@@ -2819,8 +2660,7 @@
// Here we are testing whether to create a new cluster, given a frame
// having time frame_timestamp_ns.
//
-int Segment::TestFrame(uint64 track_number,
- uint64 frame_timestamp_ns,
+int Segment::TestFrame(uint64 track_number, uint64 frame_timestamp_ns,
bool is_key) const {
if (force_new_cluster_)
return 1;
@@ -2850,7 +2690,7 @@
// so this indicates a bug somewhere in our algorithm.
if (frame_timecode < last_cluster_timecode) // should never happen
- return -1; // error
+ return -1;
// If the frame has a timestamp significantly larger than the last
// cluster (in Matroska, cluster-relative timestamps are serialized
@@ -2900,7 +2740,7 @@
const int32 new_capacity =
(cluster_list_capacity_ <= 0) ? 1 : cluster_list_capacity_ * 2;
Cluster** const clusters =
- new (std::nothrow) Cluster*[new_capacity]; // NOLINT
+ new (std::nothrow) Cluster* [new_capacity]; // NOLINT
if (!clusters)
return false;
@@ -2908,7 +2748,7 @@
clusters[i] = cluster_list_[i];
}
- delete [] cluster_list_;
+ delete[] cluster_list_;
cluster_list_ = clusters;
cluster_list_capacity_ = new_capacity;
@@ -2968,8 +2808,7 @@
}
bool Segment::DoNewClusterProcessing(uint64 track_number,
- uint64 frame_timestamp_ns,
- bool is_key) {
+ uint64 frame_timestamp_ns, bool is_key) {
for (;;) {
// Based on the characteristics of the current frame and current
// cluster, decide whether to create a new cluster.
@@ -2977,12 +2816,12 @@
if (result < 0) // error
return false;
- // Always set force_new_cluster_ to false after TestFrame.
- force_new_cluster_ = false;
+ // Always set force_new_cluster_ to false after TestFrame.
+ force_new_cluster_ = false;
- // A non-zero result means create a new cluster.
- if (result > 0 && !MakeNewCluster(frame_timestamp_ns))
- return false;
+ // A non-zero result means create a new cluster.
+ if (result > 0 && !MakeNewCluster(frame_timestamp_ns))
+ return false;
// Write queued (audio) frames.
const int frame_count = WriteFramesAll();
@@ -3051,14 +2890,14 @@
return false;
#ifdef _MSC_VER
- strcpy_s(str, length-strlen(ext_chk), chunking_base_name_);
+ strcpy_s(str, length - strlen(ext_chk), chunking_base_name_);
strcat_s(str, length, ext_chk);
#else
strcpy(str, chunking_base_name_);
strcat(str, ext_chk);
#endif
- delete [] *name;
+ delete[] * name;
*name = str;
return true;
@@ -3093,7 +2932,7 @@
if (new_capacity < 1)
return false;
- Frame** const frames = new (std::nothrow) Frame*[new_capacity]; // NOLINT
+ Frame** const frames = new (std::nothrow) Frame* [new_capacity]; // NOLINT
if (!frames)
return false;
@@ -3101,7 +2940,7 @@
frames[i] = frames_[i];
}
- delete [] frames_;
+ delete[] frames_;
frames_ = frames;
frames_capacity_ = new_capacity;
}
@@ -3118,7 +2957,7 @@
if (cluster_list_size_ < 1)
return -1;
- Cluster* const cluster = cluster_list_[cluster_list_size_-1];
+ Cluster* const cluster = cluster_list_[cluster_list_size_ - 1];
if (!cluster)
return -1;
@@ -3131,19 +2970,14 @@
const uint64 frame_timecode = frame_timestamp / timecode_scale;
if (frame->discard_padding() > 0) {
- if (!cluster->AddFrameWithDiscardPadding(frame->frame(),
- frame->length(),
- frame->discard_padding(),
- frame->track_number(),
- frame_timecode,
- frame->is_key())) {
+ if (!cluster->AddFrameWithDiscardPadding(
+ frame->frame(), frame->length(), frame->discard_padding(),
+ frame->track_number(), frame_timecode, frame->is_key())) {
return -1;
}
} else {
- if (!cluster->AddFrame(frame->frame(),
- frame->length(),
- frame->track_number(),
- frame_timecode,
+ if (!cluster->AddFrame(frame->frame(), frame->length(),
+ frame->track_number(), frame_timecode,
frame->is_key())) {
return -1;
}
@@ -3175,7 +3009,7 @@
if (!frames_)
return false;
- Cluster* const cluster = cluster_list_[cluster_list_size_-1];
+ Cluster* const cluster = cluster_list_[cluster_list_size_ - 1];
if (!cluster)
return false;
@@ -3190,25 +3024,21 @@
if (frame_curr->timestamp() > timestamp)
break;
- const Frame* const frame_prev = frames_[i-1];
+ const Frame* const frame_prev = frames_[i - 1];
const uint64 frame_timestamp = frame_prev->timestamp();
const uint64 frame_timecode = frame_timestamp / timecode_scale;
const int64 discard_padding = frame_prev->discard_padding();
if (discard_padding > 0) {
- if (!cluster->AddFrameWithDiscardPadding(frame_prev->frame(),
- frame_prev->length(),
- discard_padding,
- frame_prev->track_number(),
- frame_timecode,
- frame_prev->is_key())) {
+ if (!cluster->AddFrameWithDiscardPadding(
+ frame_prev->frame(), frame_prev->length(), discard_padding,
+ frame_prev->track_number(), frame_timecode,
+ frame_prev->is_key())) {
return false;
}
} else {
- if (!cluster->AddFrame(frame_prev->frame(),
- frame_prev->length(),
- frame_prev->track_number(),
- frame_timecode,
+ if (!cluster->AddFrame(frame_prev->frame(), frame_prev->length(),
+ frame_prev->track_number(), frame_timecode,
frame_prev->is_key())) {
return false;
}
@@ -3232,7 +3062,7 @@
const int32 new_frames_size = frames_size_ - shift_left;
for (int32 i = 0; i < new_frames_size; ++i) {
- frames_[i] = frames_[i+shift_left];
+ frames_[i] = frames_[i + shift_left];
}
frames_size_ = new_frames_size;
diff --git a/third_party/libwebm/mkvmuxer.hpp b/third_party/libwebm/mkvmuxer.hpp
index 63a315e..1c1c310 100644
--- a/third_party/libwebm/mkvmuxer.hpp
+++ b/third_party/libwebm/mkvmuxer.hpp
@@ -15,7 +15,7 @@
// http://www.webmproject.org/code/specs/container/.
namespace mkvparser {
- class IMkvReader;
+class IMkvReader;
} // end namespace
namespace mkvmuxer {
@@ -60,8 +60,8 @@
bool WriteEbmlHeader(IMkvWriter* writer);
// Copies in Chunk from source to destination between the given byte positions
-bool ChunkedCopy(mkvparser::IMkvReader* source, IMkvWriter* dst,
- int64 start, int64 size);
+bool ChunkedCopy(mkvparser::IMkvReader* source, IMkvWriter* dst, int64 start,
+ int64 size);
///////////////////////////////////////////////////////////////
// Class to hold data the will be written to a block.
@@ -74,8 +74,7 @@
bool Init(const uint8* frame, uint64 length);
// Copies |additional| data into |additional_|. Returns true on success.
- bool AddAdditionalData(const uint8* additional, uint64 length,
- uint64 add_id);
+ bool AddAdditionalData(const uint8* additional, uint64 length, uint64 add_id);
uint64 add_id() const { return add_id_; }
const uint8* additional() const { return additional_; }
@@ -223,9 +222,7 @@
// ContentEncAESSettings element
class ContentEncAESSettings {
public:
- enum {
- kCTR = 1
- };
+ enum { kCTR = 1 };
ContentEncAESSettings();
~ContentEncAESSettings() {}
@@ -353,6 +350,10 @@
seek_pre_roll_ = seek_pre_roll;
}
uint64 seek_pre_roll() const { return seek_pre_roll_; }
+ void set_default_duration(uint64 default_duration) {
+ default_duration_ = default_duration;
+ }
+ uint64 default_duration() const { return default_duration_; }
uint64 codec_private_length() const { return codec_private_length_; }
uint32 content_encoding_entries_size() const {
@@ -360,7 +361,7 @@
}
private:
- // Track element names
+ // Track element names.
char* codec_id_;
uint8* codec_private_;
char* language_;
@@ -371,6 +372,7 @@
uint64 uid_;
uint64 codec_delay_;
uint64 seek_pre_roll_;
+ uint64 default_duration_;
// Size of the CodecPrivate data in bytes.
uint64 codec_private_length_;
@@ -391,16 +393,13 @@
// Supported modes for stereo 3D.
enum StereoMode {
kMono = 0,
- kSideBySideLeftIsFirst = 1,
- kTopBottomRightIsFirst = 2,
- kTopBottomLeftIsFirst = 3,
+ kSideBySideLeftIsFirst = 1,
+ kTopBottomRightIsFirst = 2,
+ kTopBottomLeftIsFirst = 3,
kSideBySideRightIsFirst = 11
};
- enum AlphaMode {
- kNoAlpha = 0,
- kAlpha = 1
- };
+ enum AlphaMode { kNoAlpha = 0, kAlpha = 1 };
// The |seed| parameter is used to synthesize a UID for the track.
explicit VideoTrack(unsigned int* seed);
@@ -484,10 +483,7 @@
class Tracks {
public:
// Audio and video type defined by the Matroska specs.
- enum {
- kVideo = 0x1,
- kAudio = 0x2
- };
+ enum { kVideo = 0x1, kAudio = 0x2 };
// Opus, Vorbis, VP8, and VP9 codec ids defined by the Matroska specs.
static const char kOpusCodecId[];
static const char kVorbisCodecId[];
@@ -544,8 +540,7 @@
// Converts the nanosecond start and stop times of this chapter to
// their corresponding timecode values, and stores them that way.
- void set_time(const Segment& segment,
- uint64 start_time_ns,
+ void set_time(const Segment& segment, uint64 start_time_ns,
uint64 end_time_ns);
// Sets the uid for this chapter. Primarily used to enable
@@ -568,9 +563,7 @@
// http://www.iana.org/domains/root/db/
//
// The function returns false if the string could not be allocated.
- bool add_string(const char* title,
- const char* language,
- const char* country);
+ bool add_string(const char* title, const char* language, const char* country);
private:
friend class Chapters;
@@ -724,9 +717,7 @@
// timecode: Absolute (not relative to cluster) timestamp of the
// frame, expressed in timecode units.
// is_key: Flag telling whether or not this frame is a key frame.
- bool AddFrame(const uint8* frame,
- uint64 length,
- uint64 track_number,
+ bool AddFrame(const uint8* frame, uint64 length, uint64 track_number,
uint64 timecode, // timecode units (absolute)
bool is_key);
@@ -743,14 +734,10 @@
// abs_timecode: Absolute (not relative to cluster) timestamp of the
// frame, expressed in timecode units.
// is_key: Flag telling whether or not this frame is a key frame.
- bool AddFrameWithAdditional(const uint8* frame,
- uint64 length,
- const uint8* additional,
- uint64 additional_length,
- uint64 add_id,
- uint64 track_number,
- uint64 abs_timecode,
- bool is_key);
+ bool AddFrameWithAdditional(const uint8* frame, uint64 length,
+ const uint8* additional, uint64 additional_length,
+ uint64 add_id, uint64 track_number,
+ uint64 abs_timecode, bool is_key);
// Adds a frame to be output in the file. The frame is written out through
// |writer_| if successful. Returns true on success.
@@ -763,12 +750,9 @@
// abs_timecode: Absolute (not relative to cluster) timestamp of the
// frame, expressed in timecode units.
// is_key: Flag telling whether or not this frame is a key frame.
- bool AddFrameWithDiscardPadding(const uint8* frame,
- uint64 length,
- int64 discard_padding,
- uint64 track_number,
- uint64 abs_timecode,
- bool is_key);
+ bool AddFrameWithDiscardPadding(const uint8* frame, uint64 length,
+ int64 discard_padding, uint64 track_number,
+ uint64 abs_timecode, bool is_key);
// Writes a frame of metadata to the output medium; returns true on
// success.
@@ -784,11 +768,8 @@
// The metadata frame is written as a block group, with a duration
// sub-element but no reference time sub-elements (indicating that
// it is considered a keyframe, per Matroska semantics).
- bool AddMetadata(const uint8* frame,
- uint64 length,
- uint64 track_number,
- uint64 timecode, // timecode units (absolute)
- uint64 duration); // timecode units
+ bool AddMetadata(const uint8* frame, uint64 length, uint64 track_number,
+ uint64 timecode, uint64 duration);
// Increments the size of the cluster's data in bytes.
void AddPayloadSize(uint64 size);
@@ -809,34 +790,26 @@
private:
// Signature that matches either of WriteSimpleBlock or WriteMetadataBlock
// in the muxer utilities package.
- typedef uint64 (*WriteBlock)(IMkvWriter* writer,
- const uint8* data,
- uint64 length,
- uint64 track_number,
- int64 timecode,
- uint64 generic_arg);
+ typedef uint64 (*WriteBlock)(IMkvWriter* writer, const uint8* data,
+ uint64 length, uint64 track_number,
+ int64 timecode, uint64 generic_arg);
// Signature that matches WriteBlockWithAdditional
// in the muxer utilities package.
- typedef uint64 (*WriteBlockAdditional)(IMkvWriter* writer,
- const uint8* data,
- uint64 length,
- const uint8* additional,
+ typedef uint64 (*WriteBlockAdditional)(IMkvWriter* writer, const uint8* data,
+ uint64 length, const uint8* additional,
uint64 add_id,
uint64 additional_length,
- uint64 track_number,
- int64 timecode,
+ uint64 track_number, int64 timecode,
uint64 is_key);
// Signature that matches WriteBlockWithDiscardPadding
// in the muxer utilities package.
typedef uint64 (*WriteBlockDiscardPadding)(IMkvWriter* writer,
- const uint8* data,
- uint64 length,
+ const uint8* data, uint64 length,
int64 discard_padding,
uint64 track_number,
- int64 timecode,
- uint64 is_key);
+ int64 timecode, uint64 is_key);
// Utility method that confirms that blocks can still be added, and that the
// cluster header has been written. Used by |DoWriteBlock*|. Returns true
@@ -858,27 +831,20 @@
int64 GetRelativeTimecode(int64 abs_timecode) const;
// Used to implement AddFrame and AddMetadata.
- bool DoWriteBlock(const uint8* frame,
- uint64 length,
- uint64 track_number,
- uint64 absolute_timecode,
- uint64 generic_arg,
+ bool DoWriteBlock(const uint8* frame, uint64 length, uint64 track_number,
+ uint64 absolute_timecode, uint64 generic_arg,
WriteBlock write_block);
// Used to implement AddFrameWithAdditional
- bool DoWriteBlockWithAdditional(const uint8* frame,
- uint64 length,
+ bool DoWriteBlockWithAdditional(const uint8* frame, uint64 length,
const uint8* additional,
- uint64 additional_length,
- uint64 add_id,
- uint64 track_number,
- uint64 absolute_timecode,
+ uint64 additional_length, uint64 add_id,
+ uint64 track_number, uint64 absolute_timecode,
uint64 generic_arg,
WriteBlockAdditional write_block);
// Used to implement AddFrameWithDiscardPadding
- bool DoWriteBlockWithDiscardPadding(const uint8* frame,
- uint64 length,
+ bool DoWriteBlockWithDiscardPadding(const uint8* frame, uint64 length,
int64 discard_padding,
uint64 track_number,
uint64 absolute_timecode,
@@ -993,6 +959,8 @@
uint64 timecode_scale() const { return timecode_scale_; }
void set_writing_app(const char* app);
const char* writing_app() const { return writing_app_; }
+ void set_date_utc(int64 date_utc) { date_utc_ = date_utc; }
+ int64 date_utc() const { return date_utc_; }
private:
// Segment Information element names.
@@ -1004,6 +972,8 @@
uint64 timecode_scale_;
// Initially set to libwebm-%d.%d.%d.%d, major, minor, build, revision.
char* writing_app_;
+ // LLONG_MIN when DateUTC is not set.
+ int64 date_utc_;
// The file position of the duration element.
int64 duration_pos_;
@@ -1019,10 +989,7 @@
// |Init| must be called before any other method in this class.
class Segment {
public:
- enum Mode {
- kLive = 0x1,
- kFile = 0x2
- };
+ enum Mode { kLive = 0x1, kFile = 0x2 };
enum CuesPosition {
kAfterClusters = 0x0, // Position Cues after Clusters - Default
@@ -1070,11 +1037,8 @@
// functions.
// timestamp: Timestamp of the frame in nanoseconds from 0.
// is_key: Flag telling whether or not this frame is a key frame.
- bool AddFrame(const uint8* frame,
- uint64 length,
- uint64 track_number,
- uint64 timestamp_ns,
- bool is_key);
+ bool AddFrame(const uint8* frame, uint64 length, uint64 track_number,
+ uint64 timestamp_ns, bool is_key);
// Writes a frame of metadata to the output medium; returns true on
// success.
@@ -1090,11 +1054,8 @@
// The metadata frame is written as a block group, with a duration
// sub-element but no reference time sub-elements (indicating that
// it is considered a keyframe, per Matroska semantics).
- bool AddMetadata(const uint8* frame,
- uint64 length,
- uint64 track_number,
- uint64 timestamp_ns,
- uint64 duration_ns);
+ bool AddMetadata(const uint8* frame, uint64 length, uint64 track_number,
+ uint64 timestamp_ns, uint64 duration_ns);
// Writes a frame with additional data to the output medium; returns true on
// success.
@@ -1109,14 +1070,10 @@
// timestamp: Absolute timestamp of the frame, expressed in nanosecond
// units.
// is_key: Flag telling whether or not this frame is a key frame.
- bool AddFrameWithAdditional(const uint8* frame,
- uint64 length,
- const uint8* additional,
- uint64 additional_length,
- uint64 add_id,
- uint64 track_number,
- uint64 timestamp,
- bool is_key);
+ bool AddFrameWithAdditional(const uint8* frame, uint64 length,
+ const uint8* additional, uint64 additional_length,
+ uint64 add_id, uint64 track_number,
+ uint64 timestamp, bool is_key);
// Writes a frame with DiscardPadding to the output medium; returns true on
// success.
@@ -1129,12 +1086,9 @@
// timestamp: Absolute timestamp of the frame, expressed in nanosecond
// units.
// is_key: Flag telling whether or not this frame is a key frame.
- bool AddFrameWithDiscardPadding(const uint8* frame,
- uint64 length,
- int64 discard_padding,
- uint64 track_number,
- uint64 timestamp,
- bool is_key);
+ bool AddFrameWithDiscardPadding(const uint8* frame, uint64 length,
+ int64 discard_padding, uint64 track_number,
+ uint64 timestamp, bool is_key);
// Writes a Frame to the output medium. Chooses the correct way of writing
// the frame (Block vs SimpleBlock) based on the parameters passed.
@@ -1268,7 +1222,6 @@
// was necessary but creation was not successful.
bool DoNewClusterProcessing(uint64 track_num, uint64 timestamp_ns, bool key);
-
// Adjusts Cue Point values (to place Cues before Clusters) so that they
// reflect the correct offsets.
void MoveCuesBeforeClusters();
@@ -1398,6 +1351,6 @@
LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Segment);
};
-} //end namespace mkvmuxer
+} // end namespace mkvmuxer
-#endif //MKVMUXER_HPP
+#endif // MKVMUXER_HPP
diff --git a/third_party/libwebm/mkvmuxertypes.hpp b/third_party/libwebm/mkvmuxertypes.hpp
index 2c66fd2..d0fc9fe 100644
--- a/third_party/libwebm/mkvmuxertypes.hpp
+++ b/third_party/libwebm/mkvmuxertypes.hpp
@@ -1,30 +1,30 @@
-// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the LICENSE file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-
-#ifndef MKVMUXERTYPES_HPP
-#define MKVMUXERTYPES_HPP
-
-// Copied from Chromium basictypes.h
-// A macro to disallow the copy constructor and operator= functions
-// This should be used in the private: declarations for a class
-#define LIBWEBM_DISALLOW_COPY_AND_ASSIGN(TypeName) \
- TypeName(const TypeName&); \
- void operator=(const TypeName&)
-
-namespace mkvmuxer {
-
-typedef unsigned char uint8;
-typedef short int16;
-typedef int int32;
-typedef unsigned int uint32;
-typedef long long int64;
-typedef unsigned long long uint64;
-
-} //end namespace mkvmuxer
-
-#endif // MKVMUXERTYPES_HPP
+// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+#ifndef MKVMUXERTYPES_HPP
+#define MKVMUXERTYPES_HPP
+
+// Copied from Chromium basictypes.h
+// A macro to disallow the copy constructor and operator= functions
+// This should be used in the private: declarations for a class
+#define LIBWEBM_DISALLOW_COPY_AND_ASSIGN(TypeName) \
+ TypeName(const TypeName&); \
+ void operator=(const TypeName&)
+
+namespace mkvmuxer {
+
+typedef unsigned char uint8;
+typedef short int16;
+typedef int int32;
+typedef unsigned int uint32;
+typedef long long int64;
+typedef unsigned long long uint64;
+
+} // end namespace mkvmuxer
+
+#endif // MKVMUXERTYPES_HPP
diff --git a/third_party/libwebm/mkvmuxerutil.cpp b/third_party/libwebm/mkvmuxerutil.cpp
index 18060e9..3fb9bc9 100644
--- a/third_party/libwebm/mkvmuxerutil.cpp
+++ b/third_party/libwebm/mkvmuxerutil.cpp
@@ -29,6 +29,13 @@
namespace mkvmuxer {
+namespace {
+
+// Date elements are always 8 octets in size.
+const int kDateElementSize = 8;
+
+} // namespace
+
int32 GetCodedUIntSize(uint64 value) {
if (value < 0x000000000000007FULL)
return 1;
@@ -92,7 +99,7 @@
return ebml_size;
}
-uint64 EbmlElementSize(uint64 type, float /* value */ ) {
+uint64 EbmlElementSize(uint64 type, float /* value */) {
// Size of EBML ID
uint64 ebml_size = GetUIntSize(type);
@@ -137,6 +144,19 @@
return ebml_size;
}
+uint64 EbmlDateElementSize(uint64 type, int64 value) {
+ // Size of EBML ID
+ uint64 ebml_size = GetUIntSize(type);
+
+ // Datasize
+ ebml_size += kDateElementSize;
+
+ // Size of Datasize
+ ebml_size++;
+
+ return ebml_size;
+}
+
int32 SerializeInt(IMkvWriter* writer, int64 value, int32 size) {
if (!writer || size < 1 || size > 8)
return -1;
@@ -302,9 +322,7 @@
return true;
}
-bool WriteEbmlElement(IMkvWriter* writer,
- uint64 type,
- const uint8* value,
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const uint8* value,
uint64 size) {
if (!writer || !value || size < 1)
return false;
@@ -321,12 +339,24 @@
return true;
}
-uint64 WriteSimpleBlock(IMkvWriter* writer,
- const uint8* data,
- uint64 length,
- uint64 track_number,
- int64 timecode,
- uint64 is_key) {
+bool WriteEbmlDateElement(IMkvWriter* writer, uint64 type, int64 value) {
+ if (!writer)
+ return false;
+
+ if (WriteID(writer, type))
+ return false;
+
+ if (WriteUInt(writer, kDateElementSize))
+ return false;
+
+ if (SerializeInt(writer, value, kDateElementSize))
+ return false;
+
+ return true;
+}
+
+uint64 WriteSimpleBlock(IMkvWriter* writer, const uint8* data, uint64 length,
+ uint64 track_number, int64 timecode, uint64 is_key) {
if (!writer)
return false;
@@ -372,7 +402,7 @@
return 0;
const uint64 element_size =
- GetUIntSize(kMkvSimpleBlock) + GetCodedUIntSize(size) + 4 + length;
+ GetUIntSize(kMkvSimpleBlock) + GetCodedUIntSize(size) + 4 + length;
return element_size;
}
@@ -391,11 +421,8 @@
// Duration size
// (duration payload)
//
-uint64 WriteMetadataBlock(IMkvWriter* writer,
- const uint8* data,
- uint64 length,
- uint64 track_number,
- int64 timecode,
+uint64 WriteMetadataBlock(IMkvWriter* writer, const uint8* data, uint64 length,
+ uint64 track_number, int64 timecode,
uint64 duration) {
// We don't backtrack when writing to the stream, so we must
// pre-compute the BlockGroup size, by summing the sizes of each
@@ -487,47 +514,37 @@
// 1 (Denotes Alpha)
// BlockAdditional
// Data
-uint64 WriteBlockWithAdditional(IMkvWriter* writer,
- const uint8* data,
- uint64 length,
- const uint8* additional,
- uint64 additional_length,
- uint64 add_id,
- uint64 track_number,
- int64 timecode,
+uint64 WriteBlockWithAdditional(IMkvWriter* writer, const uint8* data,
+ uint64 length, const uint8* additional,
+ uint64 additional_length, uint64 add_id,
+ uint64 track_number, int64 timecode,
uint64 is_key) {
if (!data || !additional || length < 1 || additional_length < 1)
return 0;
const uint64 block_payload_size = 4 + length;
- const uint64 block_elem_size = EbmlMasterElementSize(kMkvBlock,
- block_payload_size) +
- block_payload_size;
- const uint64 block_additional_elem_size = EbmlElementSize(kMkvBlockAdditional,
- additional,
- additional_length);
+ const uint64 block_elem_size =
+ EbmlMasterElementSize(kMkvBlock, block_payload_size) + block_payload_size;
+ const uint64 block_additional_elem_size =
+ EbmlElementSize(kMkvBlockAdditional, additional, additional_length);
const uint64 block_addid_elem_size = EbmlElementSize(kMkvBlockAddID, add_id);
- const uint64 block_more_payload_size = block_addid_elem_size +
- block_additional_elem_size;
- const uint64 block_more_elem_size = EbmlMasterElementSize(
- kMkvBlockMore,
- block_more_payload_size) +
- block_more_payload_size;
+ const uint64 block_more_payload_size =
+ block_addid_elem_size + block_additional_elem_size;
+ const uint64 block_more_elem_size =
+ EbmlMasterElementSize(kMkvBlockMore, block_more_payload_size) +
+ block_more_payload_size;
const uint64 block_additions_payload_size = block_more_elem_size;
- const uint64 block_additions_elem_size = EbmlMasterElementSize(
- kMkvBlockAdditions,
- block_additions_payload_size) +
- block_additions_payload_size;
- const uint64 block_group_payload_size = block_elem_size +
- block_additions_elem_size;
- const uint64 block_group_elem_size = EbmlMasterElementSize(
- kMkvBlockGroup,
- block_group_payload_size) +
- block_group_payload_size;
+ const uint64 block_additions_elem_size =
+ EbmlMasterElementSize(kMkvBlockAdditions, block_additions_payload_size) +
+ block_additions_payload_size;
+ const uint64 block_group_payload_size =
+ block_elem_size + block_additions_elem_size;
+ const uint64 block_group_elem_size =
+ EbmlMasterElementSize(kMkvBlockGroup, block_group_payload_size) +
+ block_group_payload_size;
- if (!WriteEbmlMasterElement(writer, kMkvBlockGroup,
- block_group_payload_size))
+ if (!WriteEbmlMasterElement(writer, kMkvBlockGroup, block_group_payload_size))
return 0;
if (!WriteEbmlMasterElement(writer, kMkvBlock, block_payload_size))
@@ -558,8 +575,8 @@
if (!WriteEbmlElement(writer, kMkvBlockAddID, add_id))
return 0;
- if (!WriteEbmlElement(writer, kMkvBlockAdditional,
- additional, additional_length))
+ if (!WriteEbmlElement(writer, kMkvBlockAdditional, additional,
+ additional_length))
return 0;
return block_group_elem_size;
@@ -571,31 +588,25 @@
// Block
// Data
// DiscardPadding
-uint64 WriteBlockWithDiscardPadding(IMkvWriter* writer,
- const uint8* data,
- uint64 length,
- int64 discard_padding,
- uint64 track_number,
- int64 timecode,
+uint64 WriteBlockWithDiscardPadding(IMkvWriter* writer, const uint8* data,
+ uint64 length, int64 discard_padding,
+ uint64 track_number, int64 timecode,
uint64 is_key) {
if (!data || length < 1 || discard_padding <= 0)
return 0;
const uint64 block_payload_size = 4 + length;
- const uint64 block_elem_size = EbmlMasterElementSize(kMkvBlock,
- block_payload_size) +
- block_payload_size;
- const uint64 discard_padding_elem_size = EbmlElementSize(kMkvDiscardPadding,
- discard_padding);
- const uint64 block_group_payload_size = block_elem_size +
- discard_padding_elem_size;
- const uint64 block_group_elem_size = EbmlMasterElementSize(
- kMkvBlockGroup,
- block_group_payload_size) +
- block_group_payload_size;
+ const uint64 block_elem_size =
+ EbmlMasterElementSize(kMkvBlock, block_payload_size) + block_payload_size;
+ const uint64 discard_padding_elem_size =
+ EbmlElementSize(kMkvDiscardPadding, discard_padding);
+ const uint64 block_group_payload_size =
+ block_elem_size + discard_padding_elem_size;
+ const uint64 block_group_elem_size =
+ EbmlMasterElementSize(kMkvBlockGroup, block_group_payload_size) +
+ block_group_payload_size;
- if (!WriteEbmlMasterElement(writer, kMkvBlockGroup,
- block_group_payload_size))
+ if (!WriteEbmlMasterElement(writer, kMkvBlockGroup, block_group_payload_size))
return 0;
if (!WriteEbmlMasterElement(writer, kMkvBlock, block_payload_size))
@@ -634,9 +645,9 @@
return false;
// Subtract one for the void ID and the coded size.
- uint64 void_entry_size = size - 1 - GetCodedUIntSize(size-1);
- uint64 void_size = EbmlMasterElementSize(kMkvVoid, void_entry_size) +
- void_entry_size;
+ uint64 void_entry_size = size - 1 - GetCodedUIntSize(size - 1);
+ uint64 void_size =
+ EbmlMasterElementSize(kMkvVoid, void_entry_size) + void_entry_size;
if (void_size != size)
return 0;
@@ -684,13 +695,13 @@
for (int i = 0; i < 7; ++i) { // avoid problems with 8-byte values
uid <<= 8;
- // TODO(fgalligan): Move random number generation to platform specific code.
+// TODO(fgalligan): Move random number generation to platform specific code.
#ifdef _MSC_VER
(void)seed;
unsigned int random_value;
const errno_t e = rand_s(&random_value);
(void)e;
- const int32 nn = random_value;
+ const int32 nn = random_value;
#elif __ANDROID__
int32 temp_num = 1;
int fd = open("/dev/urandom", O_RDONLY);
diff --git a/third_party/libwebm/mkvmuxerutil.hpp b/third_party/libwebm/mkvmuxerutil.hpp
index d196ad3..a092abe 100644
--- a/third_party/libwebm/mkvmuxerutil.hpp
+++ b/third_party/libwebm/mkvmuxerutil.hpp
@@ -30,6 +30,7 @@
uint64 EbmlElementSize(uint64 type, float value);
uint64 EbmlElementSize(uint64 type, const char* value);
uint64 EbmlElementSize(uint64 type, const uint8* value, uint64 size);
+uint64 EbmlDateElementSize(uint64 type, int64 value);
// Creates an EBML coded number from |value| and writes it out. The size of
// the coded number is determined by the value of |value|. |value| must not
@@ -52,10 +53,9 @@
bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value);
bool WriteEbmlElement(IMkvWriter* writer, uint64 type, float value);
bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const char* value);
-bool WriteEbmlElement(IMkvWriter* writer,
- uint64 type,
- const uint8* value,
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const uint8* value,
uint64 size);
+bool WriteEbmlDateElement(IMkvWriter* writer, uint64 type, int64 value);
// Output an Mkv Simple Block.
// Inputs:
@@ -67,12 +67,8 @@
// timecode: Relative timecode of the Block. Only values in the
// range [0, 2^15) are permitted.
// is_key: Non-zero value specifies that frame is a key frame.
-uint64 WriteSimpleBlock(IMkvWriter* writer,
- const uint8* data,
- uint64 length,
- uint64 track_number,
- int64 timecode,
- uint64 is_key);
+uint64 WriteSimpleBlock(IMkvWriter* writer, const uint8* data, uint64 length,
+ uint64 track_number, int64 timecode, uint64 is_key);
// Output a metadata keyframe, using a Block Group element.
// Inputs:
@@ -84,11 +80,8 @@
// timecode Timecode of frame, relative to cluster timecode. Only
// values in the range [0, 2^15) are permitted.
// duration_timecode Duration of frame, using timecode units.
-uint64 WriteMetadataBlock(IMkvWriter* writer,
- const uint8* data,
- uint64 length,
- uint64 track_number,
- int64 timecode,
+uint64 WriteMetadataBlock(IMkvWriter* writer, const uint8* data, uint64 length,
+ uint64 track_number, int64 timecode,
uint64 duration_timecode);
// Output an Mkv Block with BlockAdditional data.
@@ -104,14 +97,10 @@
// timecode: Relative timecode of the Block. Only values in the
// range [0, 2^15) are permitted.
// is_key: Non-zero value specifies that frame is a key frame.
-uint64 WriteBlockWithAdditional(IMkvWriter* writer,
- const uint8* data,
- uint64 length,
- const uint8* additional,
- uint64 additional_length,
- uint64 add_id,
- uint64 track_number,
- int64 timecode,
+uint64 WriteBlockWithAdditional(IMkvWriter* writer, const uint8* data,
+ uint64 length, const uint8* additional,
+ uint64 additional_length, uint64 add_id,
+ uint64 track_number, int64 timecode,
uint64 is_key);
// Output an Mkv Block with a DiscardPadding element.
@@ -125,12 +114,9 @@
// timecode: Relative timecode of the Block. Only values in the
// range [0, 2^15) are permitted.
// is_key: Non-zero value specifies that frame is a key frame.
-uint64 WriteBlockWithDiscardPadding(IMkvWriter* writer,
- const uint8* data,
- uint64 length,
- int64 discard_padding,
- uint64 track_number,
- int64 timecode,
+uint64 WriteBlockWithDiscardPadding(IMkvWriter* writer, const uint8* data,
+ uint64 length, int64 discard_padding,
+ uint64 track_number, int64 timecode,
uint64 is_key);
// Output a void element. |size| must be the entire size in bytes that will be
@@ -146,6 +132,6 @@
// the random-number generator (see POSIX rand_r() for semantics).
uint64 MakeUID(unsigned int* seed);
-} //end namespace mkvmuxer
+} // end namespace mkvmuxer
-#endif // MKVMUXERUTIL_HPP
+#endif // MKVMUXERUTIL_HPP
diff --git a/third_party/libwebm/mkvparser.cpp b/third_party/libwebm/mkvparser.cpp
index b41456a..441f165 100644
--- a/third_party/libwebm/mkvparser.cpp
+++ b/third_party/libwebm/mkvparser.cpp
@@ -14,1383 +14,1204 @@
#ifdef _MSC_VER
// Disable MSVC warnings that suggest making code non-portable.
-#pragma warning(disable:4996)
+#pragma warning(disable : 4996)
#endif
-mkvparser::IMkvReader::~IMkvReader()
-{
+mkvparser::IMkvReader::~IMkvReader() {}
+
+void mkvparser::GetVersion(int& major, int& minor, int& build, int& revision) {
+ major = 1;
+ minor = 0;
+ build = 0;
+ revision = 28;
}
-void mkvparser::GetVersion(int& major, int& minor, int& build, int& revision)
-{
- major = 1;
- minor = 0;
- build = 0;
- revision = 27;
-}
+long long mkvparser::ReadUInt(IMkvReader* pReader, long long pos, long& len) {
+ assert(pReader);
+ assert(pos >= 0);
-long long mkvparser::ReadUInt(IMkvReader* pReader, long long pos, long& len)
-{
- assert(pReader);
- assert(pos >= 0);
+ int status;
- int status;
+ //#ifdef _DEBUG
+ // long long total, available;
+ // status = pReader->Length(&total, &available);
+ // assert(status >= 0);
+ // assert((total < 0) || (available <= total));
+ // assert(pos < available);
+ // assert((available - pos) >= 1); //assume here max u-int len is 8
+ //#endif
-//#ifdef _DEBUG
-// long long total, available;
-// status = pReader->Length(&total, &available);
-// assert(status >= 0);
-// assert((total < 0) || (available <= total));
-// assert(pos < available);
-// assert((available - pos) >= 1); //assume here max u-int len is 8
-//#endif
+ len = 1;
- len = 1;
+ unsigned char b;
- unsigned char b;
+ status = pReader->Read(pos, 1, &b);
+ if (status < 0) // error or underflow
+ return status;
+
+ if (status > 0) // interpreted as "underflow"
+ return E_BUFFER_NOT_FULL;
+
+ if (b == 0) // we can't handle u-int values larger than 8 bytes
+ return E_FILE_FORMAT_INVALID;
+
+ unsigned char m = 0x80;
+
+ while (!(b & m)) {
+ m >>= 1;
+ ++len;
+ }
+
+ //#ifdef _DEBUG
+ // assert((available - pos) >= len);
+ //#endif
+
+ long long result = b & (~m);
+ ++pos;
+
+ for (int i = 1; i < len; ++i) {
status = pReader->Read(pos, 1, &b);
- if (status < 0) //error or underflow
- return status;
-
- if (status > 0) //interpreted as "underflow"
- return E_BUFFER_NOT_FULL;
-
- if (b == 0) //we can't handle u-int values larger than 8 bytes
- return E_FILE_FORMAT_INVALID;
-
- unsigned char m = 0x80;
-
- while (!(b & m))
- {
- m >>= 1;
- ++len;
+ if (status < 0) {
+ len = 1;
+ return status;
}
-//#ifdef _DEBUG
-// assert((available - pos) >= len);
-//#endif
+ if (status > 0) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
+ }
- long long result = b & (~m);
+ result <<= 8;
+ result |= b;
+
++pos;
+ }
- for (int i = 1; i < len; ++i)
- {
- status = pReader->Read(pos, 1, &b);
-
- if (status < 0)
- {
- len = 1;
- return status;
- }
-
- if (status > 0)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- result <<= 8;
- result |= b;
-
- ++pos;
- }
-
- return result;
+ return result;
}
-long long mkvparser::GetUIntLength(
- IMkvReader* pReader,
- long long pos,
- long& len)
-{
- assert(pReader);
- assert(pos >= 0);
+long long mkvparser::GetUIntLength(IMkvReader* pReader, long long pos,
+ long& len) {
+ assert(pReader);
+ assert(pos >= 0);
- long long total, available;
+ long long total, available;
- int status = pReader->Length(&total, &available);
- assert(status >= 0);
- assert((total < 0) || (available <= total));
+ int status = pReader->Length(&total, &available);
+ assert(status >= 0);
+ assert((total < 0) || (available <= total));
- len = 1;
+ len = 1;
- if (pos >= available)
- return pos; //too few bytes available
+ if (pos >= available)
+ return pos; // too few bytes available
+ unsigned char b;
+
+ status = pReader->Read(pos, 1, &b);
+
+ if (status < 0)
+ return status;
+
+ assert(status == 0);
+
+ if (b == 0) // we can't handle u-int values larger than 8 bytes
+ return E_FILE_FORMAT_INVALID;
+
+ unsigned char m = 0x80;
+
+ while (!(b & m)) {
+ m >>= 1;
+ ++len;
+ }
+
+ return 0; // success
+}
+
+long long mkvparser::UnserializeUInt(IMkvReader* pReader, long long pos,
+ long long size) {
+ assert(pReader);
+ assert(pos >= 0);
+
+ if ((size <= 0) || (size > 8))
+ return E_FILE_FORMAT_INVALID;
+
+ long long result = 0;
+
+ for (long long i = 0; i < size; ++i) {
unsigned char b;
- status = pReader->Read(pos, 1, &b);
+ const long status = pReader->Read(pos, 1, &b);
if (status < 0)
- return status;
+ return status;
- assert(status == 0);
+ result <<= 8;
+ result |= b;
- if (b == 0) //we can't handle u-int values larger than 8 bytes
- return E_FILE_FORMAT_INVALID;
+ ++pos;
+ }
- unsigned char m = 0x80;
-
- while (!(b & m))
- {
- m >>= 1;
- ++len;
- }
-
- return 0; //success
+ return result;
}
+long mkvparser::UnserializeFloat(IMkvReader* pReader, long long pos,
+ long long size_, double& result) {
+ assert(pReader);
+ assert(pos >= 0);
-long long mkvparser::UnserializeUInt(
- IMkvReader* pReader,
- long long pos,
- long long size)
-{
- assert(pReader);
- assert(pos >= 0);
+ if ((size_ != 4) && (size_ != 8))
+ return E_FILE_FORMAT_INVALID;
- if ((size <= 0) || (size > 8))
- return E_FILE_FORMAT_INVALID;
+ const long size = static_cast<long>(size_);
- long long result = 0;
+ unsigned char buf[8];
- for (long long i = 0; i < size; ++i)
- {
- unsigned char b;
+ const int status = pReader->Read(pos, size, buf);
- const long status = pReader->Read(pos, 1, &b);
+ if (status < 0) // error
+ return status;
- if (status < 0)
- return status;
+ if (size == 4) {
+ union {
+ float f;
+ unsigned long ff;
+ };
- result <<= 8;
- result |= b;
+ ff = 0;
- ++pos;
+ for (int i = 0;;) {
+ ff |= buf[i];
+
+ if (++i >= 4)
+ break;
+
+ ff <<= 8;
}
- return result;
+ result = f;
+ } else {
+ assert(size == 8);
+
+ union {
+ double d;
+ unsigned long long dd;
+ };
+
+ dd = 0;
+
+ for (int i = 0;;) {
+ dd |= buf[i];
+
+ if (++i >= 8)
+ break;
+
+ dd <<= 8;
+ }
+
+ result = d;
+ }
+
+ return 0;
}
+long mkvparser::UnserializeInt(IMkvReader* pReader, long long pos, long size,
+ long long& result) {
+ assert(pReader);
+ assert(pos >= 0);
+ assert(size > 0);
+ assert(size <= 8);
-long mkvparser::UnserializeFloat(
- IMkvReader* pReader,
- long long pos,
- long long size_,
- double& result)
-{
- assert(pReader);
- assert(pos >= 0);
+ {
+ signed char b;
- if ((size_ != 4) && (size_ != 8))
- return E_FILE_FORMAT_INVALID;
+ const long status = pReader->Read(pos, 1, (unsigned char*)&b);
- const long size = static_cast<long>(size_);
+ if (status < 0)
+ return status;
- unsigned char buf[8];
+ result = b;
- const int status = pReader->Read(pos, size, buf);
+ ++pos;
+ }
- if (status < 0) //error
- return status;
+ for (long i = 1; i < size; ++i) {
+ unsigned char b;
- if (size == 4)
- {
- union
- {
- float f;
- unsigned long ff;
- };
+ const long status = pReader->Read(pos, 1, &b);
- ff = 0;
+ if (status < 0)
+ return status;
- for (int i = 0;;)
- {
- ff |= buf[i];
+ result <<= 8;
+ result |= b;
- if (++i >= 4)
- break;
+ ++pos;
+ }
- ff <<= 8;
- }
-
- result = f;
- }
- else
- {
- assert(size == 8);
-
- union
- {
- double d;
- unsigned long long dd;
- };
-
- dd = 0;
-
- for (int i = 0;;)
- {
- dd |= buf[i];
-
- if (++i >= 8)
- break;
-
- dd <<= 8;
- }
-
- result = d;
- }
-
- return 0;
+ return 0; // success
}
+long mkvparser::UnserializeString(IMkvReader* pReader, long long pos,
+ long long size_, char*& str) {
+ delete[] str;
+ str = NULL;
-long mkvparser::UnserializeInt(
- IMkvReader* pReader,
- long long pos,
- long size,
- long long& result)
-{
- assert(pReader);
- assert(pos >= 0);
- assert(size > 0);
- assert(size <= 8);
+ if (size_ >= LONG_MAX) // we need (size+1) chars
+ return E_FILE_FORMAT_INVALID;
- {
- signed char b;
+ const long size = static_cast<long>(size_);
- const long status = pReader->Read(pos, 1, (unsigned char*)&b);
+ str = new (std::nothrow) char[size + 1];
- if (status < 0)
- return status;
+ if (str == NULL)
+ return -1;
- result = b;
+ unsigned char* const buf = reinterpret_cast<unsigned char*>(str);
- ++pos;
- }
+ const long status = pReader->Read(pos, size, buf);
- for (long i = 1; i < size; ++i)
- {
- unsigned char b;
-
- const long status = pReader->Read(pos, 1, &b);
-
- if (status < 0)
- return status;
-
- result <<= 8;
- result |= b;
-
- ++pos;
- }
-
- return 0; //success
-}
-
-
-long mkvparser::UnserializeString(
- IMkvReader* pReader,
- long long pos,
- long long size_,
- char*& str)
-{
+ if (status) {
delete[] str;
str = NULL;
- if (size_ >= LONG_MAX) //we need (size+1) chars
- return E_FILE_FORMAT_INVALID;
+ return status;
+ }
- const long size = static_cast<long>(size_);
+ str[size] = '\0';
- str = new (std::nothrow) char[size+1];
-
- if (str == NULL)
- return -1;
-
- unsigned char* const buf = reinterpret_cast<unsigned char*>(str);
-
- const long status = pReader->Read(pos, size, buf);
-
- if (status)
- {
- delete[] str;
- str = NULL;
-
- return status;
- }
-
- str[size] = '\0';
-
- return 0; //success
+ return 0; // success
}
+long mkvparser::ParseElementHeader(IMkvReader* pReader, long long& pos,
+ long long stop, long long& id,
+ long long& size) {
+ if ((stop >= 0) && (pos >= stop))
+ return E_FILE_FORMAT_INVALID;
-long mkvparser::ParseElementHeader(
- IMkvReader* pReader,
- long long& pos,
- long long stop,
- long long& id,
- long long& size)
-{
- if ((stop >= 0) && (pos >= stop))
- return E_FILE_FORMAT_INVALID;
+ long len;
- long len;
+ id = ReadUInt(pReader, pos, len);
- id = ReadUInt(pReader, pos, len);
+ if (id < 0)
+ return E_FILE_FORMAT_INVALID;
- if (id < 0)
- return E_FILE_FORMAT_INVALID;
+ pos += len; // consume id
- pos += len; //consume id
+ if ((stop >= 0) && (pos >= stop))
+ return E_FILE_FORMAT_INVALID;
- if ((stop >= 0) && (pos >= stop))
- return E_FILE_FORMAT_INVALID;
+ size = ReadUInt(pReader, pos, len);
- size = ReadUInt(pReader, pos, len);
+ if (size < 0)
+ return E_FILE_FORMAT_INVALID;
- if (size < 0)
- return E_FILE_FORMAT_INVALID;
+ pos += len; // consume length of size
- pos += len; //consume length of size
+ // pos now designates payload
- //pos now designates payload
+ if ((stop >= 0) && ((pos + size) > stop))
+ return E_FILE_FORMAT_INVALID;
- if ((stop >= 0) && ((pos + size) > stop))
- return E_FILE_FORMAT_INVALID;
-
- return 0; //success
+ return 0; // success
}
+bool mkvparser::Match(IMkvReader* pReader, long long& pos, unsigned long id_,
+ long long& val) {
+ assert(pReader);
+ assert(pos >= 0);
-bool mkvparser::Match(
- IMkvReader* pReader,
- long long& pos,
- unsigned long id_,
- long long& val)
-{
- assert(pReader);
- assert(pos >= 0);
+ long long total, available;
- long long total, available;
+ const long status = pReader->Length(&total, &available);
+ assert(status >= 0);
+ assert((total < 0) || (available <= total));
+ if (status < 0)
+ return false;
- const long status = pReader->Length(&total, &available);
- assert(status >= 0);
- assert((total < 0) || (available <= total));
- if (status < 0)
- return false;
+ long len;
- long len;
+ const long long id = ReadUInt(pReader, pos, len);
+ assert(id >= 0);
+ assert(len > 0);
+ assert(len <= 8);
+ assert((pos + len) <= available);
- const long long id = ReadUInt(pReader, pos, len);
- assert(id >= 0);
- assert(len > 0);
- assert(len <= 8);
- assert((pos + len) <= available);
+ if ((unsigned long)id != id_)
+ return false;
- if ((unsigned long)id != id_)
- return false;
+ pos += len; // consume id
- pos += len; //consume id
+ const long long size = ReadUInt(pReader, pos, len);
+ assert(size >= 0);
+ assert(size <= 8);
+ assert(len > 0);
+ assert(len <= 8);
+ assert((pos + len) <= available);
- const long long size = ReadUInt(pReader, pos, len);
- assert(size >= 0);
- assert(size <= 8);
- assert(len > 0);
- assert(len <= 8);
- assert((pos + len) <= available);
+ pos += len; // consume length of size of payload
- pos += len; //consume length of size of payload
+ val = UnserializeUInt(pReader, pos, size);
+ assert(val >= 0);
- val = UnserializeUInt(pReader, pos, size);
- assert(val >= 0);
+ pos += size; // consume size of payload
- pos += size; //consume size of payload
-
- return true;
+ return true;
}
-bool mkvparser::Match(
- IMkvReader* pReader,
- long long& pos,
- unsigned long id_,
- unsigned char*& buf,
- size_t& buflen)
-{
- assert(pReader);
- assert(pos >= 0);
+bool mkvparser::Match(IMkvReader* pReader, long long& pos, unsigned long id_,
+ unsigned char*& buf, size_t& buflen) {
+ assert(pReader);
+ assert(pos >= 0);
- long long total, available;
+ long long total, available;
- long status = pReader->Length(&total, &available);
- assert(status >= 0);
- assert((total < 0) || (available <= total));
- if (status < 0)
- return false;
+ long status = pReader->Length(&total, &available);
+ assert(status >= 0);
+ assert((total < 0) || (available <= total));
+ if (status < 0)
+ return false;
- long len;
- const long long id = ReadUInt(pReader, pos, len);
- assert(id >= 0);
- assert(len > 0);
- assert(len <= 8);
- assert((pos + len) <= available);
+ long len;
+ const long long id = ReadUInt(pReader, pos, len);
+ assert(id >= 0);
+ assert(len > 0);
+ assert(len <= 8);
+ assert((pos + len) <= available);
- if ((unsigned long)id != id_)
- return false;
+ if ((unsigned long)id != id_)
+ return false;
- pos += len; //consume id
+ pos += len; // consume id
- const long long size_ = ReadUInt(pReader, pos, len);
- assert(size_ >= 0);
- assert(len > 0);
- assert(len <= 8);
- assert((pos + len) <= available);
+ const long long size_ = ReadUInt(pReader, pos, len);
+ assert(size_ >= 0);
+ assert(len > 0);
+ assert(len <= 8);
+ assert((pos + len) <= available);
- pos += len; //consume length of size of payload
- assert((pos + size_) <= available);
+ pos += len; // consume length of size of payload
+ assert((pos + size_) <= available);
- const long buflen_ = static_cast<long>(size_);
+ const long buflen_ = static_cast<long>(size_);
- buf = new (std::nothrow) unsigned char[buflen_];
- assert(buf); //TODO
+ buf = new (std::nothrow) unsigned char[buflen_];
+ assert(buf); // TODO
- status = pReader->Read(pos, buflen_, buf);
- assert(status == 0); //TODO
+ status = pReader->Read(pos, buflen_, buf);
+ assert(status == 0); // TODO
- buflen = buflen_;
+ buflen = buflen_;
- pos += size_; //consume size of payload
- return true;
+ pos += size_; // consume size of payload
+ return true;
}
+namespace mkvparser {
-namespace mkvparser
-{
+EBMLHeader::EBMLHeader() : m_docType(NULL) { Init(); }
-EBMLHeader::EBMLHeader() :
- m_docType(NULL)
-{
- Init();
-}
+EBMLHeader::~EBMLHeader() { delete[] m_docType; }
-EBMLHeader::~EBMLHeader()
-{
+void EBMLHeader::Init() {
+ m_version = 1;
+ m_readVersion = 1;
+ m_maxIdLength = 4;
+ m_maxSizeLength = 8;
+
+ if (m_docType) {
delete[] m_docType;
+ m_docType = NULL;
+ }
+
+ m_docTypeVersion = 1;
+ m_docTypeReadVersion = 1;
}
-void EBMLHeader::Init()
-{
- m_version = 1;
- m_readVersion = 1;
- m_maxIdLength = 4;
- m_maxSizeLength = 8;
+long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) {
+ assert(pReader);
- if (m_docType)
- {
- delete[] m_docType;
- m_docType = NULL;
- }
+ long long total, available;
- m_docTypeVersion = 1;
- m_docTypeReadVersion = 1;
-}
+ long status = pReader->Length(&total, &available);
-long long EBMLHeader::Parse(
- IMkvReader* pReader,
- long long& pos)
-{
- assert(pReader);
+ if (status < 0) // error
+ return status;
- long long total, available;
+ pos = 0;
+ long long end = (available >= 1024) ? 1024 : available;
- long status = pReader->Length(&total, &available);
+ for (;;) {
+ unsigned char b = 0;
- if (status < 0) //error
+ while (pos < end) {
+ status = pReader->Read(pos, 1, &b);
+
+ if (status < 0) // error
return status;
- pos = 0;
- long long end = (available >= 1024) ? 1024 : available;
+ if (b == 0x1A)
+ break;
- for (;;)
- {
- unsigned char b = 0;
-
- while (pos < end)
- {
- status = pReader->Read(pos, 1, &b);
-
- if (status < 0) //error
- return status;
-
- if (b == 0x1A)
- break;
-
- ++pos;
- }
-
- if (b != 0x1A)
- {
- if (pos >= 1024)
- return E_FILE_FORMAT_INVALID; //don't bother looking anymore
-
- if ((total >= 0) && ((total - available) < 5))
- return E_FILE_FORMAT_INVALID;
-
- return available + 5; //5 = 4-byte ID + 1st byte of size
- }
-
- if ((total >= 0) && ((total - pos) < 5))
- return E_FILE_FORMAT_INVALID;
-
- if ((available - pos) < 5)
- return pos + 5; //try again later
-
- long len;
-
- const long long result = ReadUInt(pReader, pos, len);
-
- if (result < 0) //error
- return result;
-
- if (result == 0x0A45DFA3) //EBML Header ID
- {
- pos += len; //consume ID
- break;
- }
-
- ++pos; //throw away just the 0x1A byte, and try again
+ ++pos;
}
- //pos designates start of size field
+ if (b != 0x1A) {
+ if (pos >= 1024)
+ return E_FILE_FORMAT_INVALID; // don't bother looking anymore
- //get length of size field
+ if ((total >= 0) && ((total - available) < 5))
+ return E_FILE_FORMAT_INVALID;
+ return available + 5; // 5 = 4-byte ID + 1st byte of size
+ }
+
+ if ((total >= 0) && ((total - pos) < 5))
+ return E_FILE_FORMAT_INVALID;
+
+ if ((available - pos) < 5)
+ return pos + 5; // try again later
+
+ long len;
+
+ const long long result = ReadUInt(pReader, pos, len);
+
+ if (result < 0) // error
+ return result;
+
+ if (result == 0x0A45DFA3) { // EBML Header ID
+ pos += len; // consume ID
+ break;
+ }
+
+ ++pos; // throw away just the 0x1A byte, and try again
+ }
+
+ // pos designates start of size field
+
+ // get length of size field
+
+ long len;
+ long long result = GetUIntLength(pReader, pos, len);
+
+ if (result < 0) // error
+ return result;
+
+ if (result > 0) // need more data
+ return result;
+
+ assert(len > 0);
+ assert(len <= 8);
+
+ if ((total >= 0) && ((total - pos) < len))
+ return E_FILE_FORMAT_INVALID;
+
+ if ((available - pos) < len)
+ return pos + len; // try again later
+
+ // get the EBML header size
+
+ result = ReadUInt(pReader, pos, len);
+
+ if (result < 0) // error
+ return result;
+
+ pos += len; // consume size field
+
+ // pos now designates start of payload
+
+ if ((total >= 0) && ((total - pos) < result))
+ return E_FILE_FORMAT_INVALID;
+
+ if ((available - pos) < result)
+ return pos + result;
+
+ end = pos + result;
+
+ Init();
+
+ while (pos < end) {
+ long long id, size;
+
+ status = ParseElementHeader(pReader, pos, end, id, size);
+
+ if (status < 0) // error
+ return status;
+
+ if (size == 0) // weird
+ return E_FILE_FORMAT_INVALID;
+
+ if (id == 0x0286) { // version
+ m_version = UnserializeUInt(pReader, pos, size);
+
+ if (m_version <= 0)
+ return E_FILE_FORMAT_INVALID;
+ } else if (id == 0x02F7) { // read version
+ m_readVersion = UnserializeUInt(pReader, pos, size);
+
+ if (m_readVersion <= 0)
+ return E_FILE_FORMAT_INVALID;
+ } else if (id == 0x02F2) { // max id length
+ m_maxIdLength = UnserializeUInt(pReader, pos, size);
+
+ if (m_maxIdLength <= 0)
+ return E_FILE_FORMAT_INVALID;
+ } else if (id == 0x02F3) { // max size length
+ m_maxSizeLength = UnserializeUInt(pReader, pos, size);
+
+ if (m_maxSizeLength <= 0)
+ return E_FILE_FORMAT_INVALID;
+ } else if (id == 0x0282) { // doctype
+ if (m_docType)
+ return E_FILE_FORMAT_INVALID;
+
+ status = UnserializeString(pReader, pos, size, m_docType);
+
+ if (status) // error
+ return status;
+ } else if (id == 0x0287) { // doctype version
+ m_docTypeVersion = UnserializeUInt(pReader, pos, size);
+
+ if (m_docTypeVersion <= 0)
+ return E_FILE_FORMAT_INVALID;
+ } else if (id == 0x0285) { // doctype read version
+ m_docTypeReadVersion = UnserializeUInt(pReader, pos, size);
+
+ if (m_docTypeReadVersion <= 0)
+ return E_FILE_FORMAT_INVALID;
+ }
+
+ pos += size;
+ }
+
+ assert(pos == end);
+ return 0;
+}
+
+Segment::Segment(IMkvReader* pReader, long long elem_start,
+ // long long elem_size,
+ long long start, long long size)
+ : m_pReader(pReader),
+ m_element_start(elem_start),
+ // m_element_size(elem_size),
+ m_start(start),
+ m_size(size),
+ m_pos(start),
+ m_pUnknownSize(0),
+ m_pSeekHead(NULL),
+ m_pInfo(NULL),
+ m_pTracks(NULL),
+ m_pCues(NULL),
+ m_pChapters(NULL),
+ m_clusters(NULL),
+ m_clusterCount(0),
+ m_clusterPreloadCount(0),
+ m_clusterSize(0) {}
+
+Segment::~Segment() {
+ const long count = m_clusterCount + m_clusterPreloadCount;
+
+ Cluster** i = m_clusters;
+ Cluster** j = m_clusters + count;
+
+ while (i != j) {
+ Cluster* const p = *i++;
+ assert(p);
+
+ delete p;
+ }
+
+ delete[] m_clusters;
+
+ delete m_pTracks;
+ delete m_pInfo;
+ delete m_pCues;
+ delete m_pChapters;
+ delete m_pSeekHead;
+}
+
+long long Segment::CreateInstance(IMkvReader* pReader, long long pos,
+ Segment*& pSegment) {
+ assert(pReader);
+ assert(pos >= 0);
+
+ pSegment = NULL;
+
+ long long total, available;
+
+ const long status = pReader->Length(&total, &available);
+
+ if (status < 0) // error
+ return status;
+
+ if (available < 0)
+ return -1;
+
+ if ((total >= 0) && (available > total))
+ return -1;
+
+ // I would assume that in practice this loop would execute
+ // exactly once, but we allow for other elements (e.g. Void)
+ // to immediately follow the EBML header. This is fine for
+ // the source filter case (since the entire file is available),
+ // but in the splitter case over a network we should probably
+ // just give up early. We could for example decide only to
+ // execute this loop a maximum of, say, 10 times.
+ // TODO:
+ // There is an implied "give up early" by only parsing up
+ // to the available limit. We do do that, but only if the
+ // total file size is unknown. We could decide to always
+ // use what's available as our limit (irrespective of whether
+ // we happen to know the total file length). This would have
+ // as its sense "parse this much of the file before giving up",
+ // which a slightly different sense from "try to parse up to
+ // 10 EMBL elements before giving up".
+
+ for (;;) {
+ if ((total >= 0) && (pos >= total))
+ return E_FILE_FORMAT_INVALID;
+
+ // Read ID
long len;
long long result = GetUIntLength(pReader, pos, len);
- if (result < 0) //error
- return result;
+ if (result) // error, or too few available bytes
+ return result;
- if (result > 0) //need more data
- return result;
+ if ((total >= 0) && ((pos + len) > total))
+ return E_FILE_FORMAT_INVALID;
- assert(len > 0);
- assert(len <= 8);
+ if ((pos + len) > available)
+ return pos + len;
- if ((total >= 0) && ((total - pos) < len))
- return E_FILE_FORMAT_INVALID;
+ const long long idpos = pos;
+ const long long id = ReadUInt(pReader, pos, len);
- if ((available - pos) < len)
- return pos + len; //try again later
+ if (id < 0) // error
+ return id;
- //get the EBML header size
+ pos += len; // consume ID
- result = ReadUInt(pReader, pos, len);
+ // Read Size
- if (result < 0) //error
- return result;
+ result = GetUIntLength(pReader, pos, len);
- pos += len; //consume size field
+ if (result) // error, or too few available bytes
+ return result;
- //pos now designates start of payload
+ if ((total >= 0) && ((pos + len) > total))
+ return E_FILE_FORMAT_INVALID;
- if ((total >= 0) && ((total - pos) < result))
- return E_FILE_FORMAT_INVALID;
+ if ((pos + len) > available)
+ return pos + len;
- if ((available - pos) < result)
- return pos + result;
+ long long size = ReadUInt(pReader, pos, len);
- end = pos + result;
+ if (size < 0) // error
+ return size;
- Init();
+ pos += len; // consume length of size of element
- while (pos < end)
- {
- long long id, size;
+ // Pos now points to start of payload
- status = ParseElementHeader(
- pReader,
- pos,
- end,
- id,
- size);
+ // Handle "unknown size" for live streaming of webm files.
+ const long long unknown_size = (1LL << (7 * len)) - 1;
- if (status < 0) //error
- return status;
+ if (id == 0x08538067) { // Segment ID
+ if (size == unknown_size)
+ size = -1;
- if (size == 0) //weird
- return E_FILE_FORMAT_INVALID;
+ else if (total < 0)
+ size = -1;
- if (id == 0x0286) //version
- {
- m_version = UnserializeUInt(pReader, pos, size);
+ else if ((pos + size) > total)
+ size = -1;
- if (m_version <= 0)
- return E_FILE_FORMAT_INVALID;
- }
- else if (id == 0x02F7) //read version
- {
- m_readVersion = UnserializeUInt(pReader, pos, size);
+ pSegment = new (std::nothrow) Segment(pReader, idpos,
+ // elem_size
+ pos, size);
- if (m_readVersion <= 0)
- return E_FILE_FORMAT_INVALID;
- }
- else if (id == 0x02F2) //max id length
- {
- m_maxIdLength = UnserializeUInt(pReader, pos, size);
+ if (pSegment == 0)
+ return -1; // generic error
- if (m_maxIdLength <= 0)
- return E_FILE_FORMAT_INVALID;
- }
- else if (id == 0x02F3) //max size length
- {
- m_maxSizeLength = UnserializeUInt(pReader, pos, size);
-
- if (m_maxSizeLength <= 0)
- return E_FILE_FORMAT_INVALID;
- }
- else if (id == 0x0282) //doctype
- {
- if (m_docType)
- return E_FILE_FORMAT_INVALID;
-
- status = UnserializeString(pReader, pos, size, m_docType);
-
- if (status) //error
- return status;
- }
- else if (id == 0x0287) //doctype version
- {
- m_docTypeVersion = UnserializeUInt(pReader, pos, size);
-
- if (m_docTypeVersion <= 0)
- return E_FILE_FORMAT_INVALID;
- }
- else if (id == 0x0285) //doctype read version
- {
- m_docTypeReadVersion = UnserializeUInt(pReader, pos, size);
-
- if (m_docTypeReadVersion <= 0)
- return E_FILE_FORMAT_INVALID;
- }
-
- pos += size;
+ return 0; // success
}
- assert(pos == end);
- return 0;
+ if (size == unknown_size)
+ return E_FILE_FORMAT_INVALID;
+
+ if ((total >= 0) && ((pos + size) > total))
+ return E_FILE_FORMAT_INVALID;
+
+ if ((pos + size) > available)
+ return pos + size;
+
+ pos += size; // consume payload
+ }
}
+long long Segment::ParseHeaders() {
+ // Outermost (level 0) segment object has been constructed,
+ // and pos designates start of payload. We need to find the
+ // inner (level 1) elements.
+ long long total, available;
-Segment::Segment(
- IMkvReader* pReader,
- long long elem_start,
- //long long elem_size,
- long long start,
- long long size) :
- m_pReader(pReader),
- m_element_start(elem_start),
- //m_element_size(elem_size),
- m_start(start),
- m_size(size),
- m_pos(start),
- m_pUnknownSize(0),
- m_pSeekHead(NULL),
- m_pInfo(NULL),
- m_pTracks(NULL),
- m_pCues(NULL),
- m_pChapters(NULL),
- m_clusters(NULL),
- m_clusterCount(0),
- m_clusterPreloadCount(0),
- m_clusterSize(0)
-{
-}
+ const int status = m_pReader->Length(&total, &available);
+ if (status < 0) // error
+ return status;
-Segment::~Segment()
-{
- const long count = m_clusterCount + m_clusterPreloadCount;
+ assert((total < 0) || (available <= total));
- Cluster** i = m_clusters;
- Cluster** j = m_clusters + count;
+ const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
+ assert((segment_stop < 0) || (total < 0) || (segment_stop <= total));
+ assert((segment_stop < 0) || (m_pos <= segment_stop));
- while (i != j)
- {
- Cluster* const p = *i++;
- assert(p);
+ for (;;) {
+ if ((total >= 0) && (m_pos >= total))
+ break;
- delete p;
- }
+ if ((segment_stop >= 0) && (m_pos >= segment_stop))
+ break;
- delete[] m_clusters;
+ long long pos = m_pos;
+ const long long element_start = pos;
- delete m_pTracks;
- delete m_pInfo;
- delete m_pCues;
- delete m_pChapters;
- delete m_pSeekHead;
-}
+ if ((pos + 1) > available)
+ return (pos + 1);
+ long len;
+ long long result = GetUIntLength(m_pReader, pos, len);
-long long Segment::CreateInstance(
- IMkvReader* pReader,
- long long pos,
- Segment*& pSegment)
-{
- assert(pReader);
- assert(pos >= 0);
+ if (result < 0) // error
+ return result;
- pSegment = NULL;
+ if (result > 0) // underflow (weird)
+ return (pos + 1);
- long long total, available;
+ if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+ return E_FILE_FORMAT_INVALID;
- const long status = pReader->Length(&total, &available);
+ if ((pos + len) > available)
+ return pos + len;
- if (status < 0) //error
- return status;
+ const long long idpos = pos;
+ const long long id = ReadUInt(m_pReader, idpos, len);
- if (available < 0)
+ if (id < 0) // error
+ return id;
+
+ if (id == 0x0F43B675) // Cluster ID
+ break;
+
+ pos += len; // consume ID
+
+ if ((pos + 1) > available)
+ return (pos + 1);
+
+ // Read Size
+ result = GetUIntLength(m_pReader, pos, len);
+
+ if (result < 0) // error
+ return result;
+
+ if (result > 0) // underflow (weird)
+ return (pos + 1);
+
+ if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+ return E_FILE_FORMAT_INVALID;
+
+ if ((pos + len) > available)
+ return pos + len;
+
+ const long long size = ReadUInt(m_pReader, pos, len);
+
+ if (size < 0) // error
+ return size;
+
+ pos += len; // consume length of size of element
+
+ const long long element_size = size + pos - element_start;
+
+ // Pos now points to start of payload
+
+ if ((segment_stop >= 0) && ((pos + size) > segment_stop))
+ return E_FILE_FORMAT_INVALID;
+
+ // We read EBML elements either in total or nothing at all.
+
+ if ((pos + size) > available)
+ return pos + size;
+
+ if (id == 0x0549A966) { // Segment Info ID
+ if (m_pInfo)
+ return E_FILE_FORMAT_INVALID;
+
+ m_pInfo = new (std::nothrow)
+ SegmentInfo(this, pos, size, element_start, element_size);
+
+ if (m_pInfo == NULL)
return -1;
- if ((total >= 0) && (available > total))
+ const long status = m_pInfo->Parse();
+
+ if (status)
+ return status;
+ } else if (id == 0x0654AE6B) { // Tracks ID
+ if (m_pTracks)
+ return E_FILE_FORMAT_INVALID;
+
+ m_pTracks = new (std::nothrow)
+ Tracks(this, pos, size, element_start, element_size);
+
+ if (m_pTracks == NULL)
return -1;
- //I would assume that in practice this loop would execute
- //exactly once, but we allow for other elements (e.g. Void)
- //to immediately follow the EBML header. This is fine for
- //the source filter case (since the entire file is available),
- //but in the splitter case over a network we should probably
- //just give up early. We could for example decide only to
- //execute this loop a maximum of, say, 10 times.
- //TODO:
- //There is an implied "give up early" by only parsing up
- //to the available limit. We do do that, but only if the
- //total file size is unknown. We could decide to always
- //use what's available as our limit (irrespective of whether
- //we happen to know the total file length). This would have
- //as its sense "parse this much of the file before giving up",
- //which a slightly different sense from "try to parse up to
- //10 EMBL elements before giving up".
+ const long status = m_pTracks->Parse();
- for (;;)
- {
- if ((total >= 0) && (pos >= total))
- return E_FILE_FORMAT_INVALID;
-
- //Read ID
- long len;
- long long result = GetUIntLength(pReader, pos, len);
-
- if (result) //error, or too few available bytes
- return result;
-
- if ((total >= 0) && ((pos + len) > total))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > available)
- return pos + len;
-
- const long long idpos = pos;
- const long long id = ReadUInt(pReader, pos, len);
-
- if (id < 0) //error
- return id;
-
- pos += len; //consume ID
-
- //Read Size
-
- result = GetUIntLength(pReader, pos, len);
-
- if (result) //error, or too few available bytes
- return result;
-
- if ((total >= 0) && ((pos + len) > total))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > available)
- return pos + len;
-
- long long size = ReadUInt(pReader, pos, len);
-
- if (size < 0) //error
- return size;
-
- pos += len; //consume length of size of element
-
- //Pos now points to start of payload
-
- //Handle "unknown size" for live streaming of webm files.
- const long long unknown_size = (1LL << (7 * len)) - 1;
-
- if (id == 0x08538067) //Segment ID
- {
- if (size == unknown_size)
- size = -1;
-
- else if (total < 0)
- size = -1;
-
- else if ((pos + size) > total)
- size = -1;
-
- pSegment = new (std::nothrow) Segment(
- pReader,
- idpos,
- //elem_size
- pos,
- size);
-
- if (pSegment == 0)
- return -1; //generic error
-
- return 0; //success
- }
-
- if (size == unknown_size)
- return E_FILE_FORMAT_INVALID;
-
- if ((total >= 0) && ((pos + size) > total))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + size) > available)
- return pos + size;
-
- pos += size; //consume payload
- }
-}
-
-
-long long Segment::ParseHeaders()
-{
- //Outermost (level 0) segment object has been constructed,
- //and pos designates start of payload. We need to find the
- //inner (level 1) elements.
- long long total, available;
-
- const int status = m_pReader->Length(&total, &available);
-
- if (status < 0) //error
+ if (status)
return status;
+ } else if (id == 0x0C53BB6B) { // Cues ID
+ if (m_pCues == NULL) {
+ m_pCues = new (std::nothrow)
+ Cues(this, pos, size, element_start, element_size);
- assert((total < 0) || (available <= total));
+ if (m_pCues == NULL)
+ return -1;
+ }
+ } else if (id == 0x014D9B74) { // SeekHead ID
+ if (m_pSeekHead == NULL) {
+ m_pSeekHead = new (std::nothrow)
+ SeekHead(this, pos, size, element_start, element_size);
- const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
- assert((segment_stop < 0) || (total < 0) || (segment_stop <= total));
- assert((segment_stop < 0) || (m_pos <= segment_stop));
+ if (m_pSeekHead == NULL)
+ return -1;
- for (;;)
- {
- if ((total >= 0) && (m_pos >= total))
- break;
+ const long status = m_pSeekHead->Parse();
- if ((segment_stop >= 0) && (m_pos >= segment_stop))
- break;
+ if (status)
+ return status;
+ }
+ } else if (id == 0x0043A770) { // Chapters ID
+ if (m_pChapters == NULL) {
+ m_pChapters = new (std::nothrow)
+ Chapters(this, pos, size, element_start, element_size);
- long long pos = m_pos;
- const long long element_start = pos;
+ if (m_pChapters == NULL)
+ return -1;
- if ((pos + 1) > available)
- return (pos + 1);
+ const long status = m_pChapters->Parse();
- long len;
- long long result = GetUIntLength(m_pReader, pos, len);
-
- if (result < 0) //error
- return result;
-
- if (result > 0) //underflow (weird)
- return (pos + 1);
-
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > available)
- return pos + len;
-
- const long long idpos = pos;
- const long long id = ReadUInt(m_pReader, idpos, len);
-
- if (id < 0) //error
- return id;
-
- if (id == 0x0F43B675) //Cluster ID
- break;
-
- pos += len; //consume ID
-
- if ((pos + 1) > available)
- return (pos + 1);
-
- //Read Size
- result = GetUIntLength(m_pReader, pos, len);
-
- if (result < 0) //error
- return result;
-
- if (result > 0) //underflow (weird)
- return (pos + 1);
-
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > available)
- return pos + len;
-
- const long long size = ReadUInt(m_pReader, pos, len);
-
- if (size < 0) //error
- return size;
-
- pos += len; //consume length of size of element
-
- const long long element_size = size + pos - element_start;
-
- //Pos now points to start of payload
-
- if ((segment_stop >= 0) && ((pos + size) > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- //We read EBML elements either in total or nothing at all.
-
- if ((pos + size) > available)
- return pos + size;
-
- if (id == 0x0549A966) //Segment Info ID
- {
- if (m_pInfo)
- return E_FILE_FORMAT_INVALID;
-
- m_pInfo = new (std::nothrow) SegmentInfo(
- this,
- pos,
- size,
- element_start,
- element_size);
-
- if (m_pInfo == NULL)
- return -1;
-
- const long status = m_pInfo->Parse();
-
- if (status)
- return status;
- }
- else if (id == 0x0654AE6B) //Tracks ID
- {
- if (m_pTracks)
- return E_FILE_FORMAT_INVALID;
-
- m_pTracks = new (std::nothrow) Tracks(this,
- pos,
- size,
- element_start,
- element_size);
-
- if (m_pTracks == NULL)
- return -1;
-
- const long status = m_pTracks->Parse();
-
- if (status)
- return status;
- }
- else if (id == 0x0C53BB6B) //Cues ID
- {
- if (m_pCues == NULL)
- {
- m_pCues = new (std::nothrow) Cues(
- this,
- pos,
- size,
- element_start,
- element_size);
-
- if (m_pCues == NULL)
- return -1;
- }
- }
- else if (id == 0x014D9B74) //SeekHead ID
- {
- if (m_pSeekHead == NULL)
- {
- m_pSeekHead = new (std::nothrow) SeekHead(
- this,
- pos,
- size,
- element_start,
- element_size);
-
- if (m_pSeekHead == NULL)
- return -1;
-
- const long status = m_pSeekHead->Parse();
-
- if (status)
- return status;
- }
- }
- else if (id == 0x0043A770) //Chapters ID
- {
- if (m_pChapters == NULL)
- {
- m_pChapters = new (std::nothrow) Chapters(
- this,
- pos,
- size,
- element_start,
- element_size);
-
- if (m_pChapters == NULL)
- return -1;
-
- const long status = m_pChapters->Parse();
-
- if (status)
- return status;
- }
- }
-
- m_pos = pos + size; //consume payload
+ if (status)
+ return status;
+ }
}
- assert((segment_stop < 0) || (m_pos <= segment_stop));
+ m_pos = pos + size; // consume payload
+ }
- if (m_pInfo == NULL) //TODO: liberalize this behavior
- return E_FILE_FORMAT_INVALID;
+ assert((segment_stop < 0) || (m_pos <= segment_stop));
- if (m_pTracks == NULL)
- return E_FILE_FORMAT_INVALID;
+ if (m_pInfo == NULL) // TODO: liberalize this behavior
+ return E_FILE_FORMAT_INVALID;
- return 0; //success
+ if (m_pTracks == NULL)
+ return E_FILE_FORMAT_INVALID;
+
+ return 0; // success
}
+long Segment::LoadCluster(long long& pos, long& len) {
+ for (;;) {
+ const long result = DoLoadCluster(pos, len);
-long Segment::LoadCluster(
- long long& pos,
- long& len)
-{
- for (;;)
- {
- const long result = DoLoadCluster(pos, len);
+ if (result <= 1)
+ return result;
+ }
+}
- if (result <= 1)
- return result;
+long Segment::DoLoadCluster(long long& pos, long& len) {
+ if (m_pos < 0)
+ return DoLoadClusterUnknownSize(pos, len);
+
+ long long total, avail;
+
+ long status = m_pReader->Length(&total, &avail);
+
+ if (status < 0) // error
+ return status;
+
+ assert((total < 0) || (avail <= total));
+
+ const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
+
+ long long cluster_off = -1; // offset relative to start of segment
+ long long cluster_size = -1; // size of cluster payload
+
+ for (;;) {
+ if ((total >= 0) && (m_pos >= total))
+ return 1; // no more clusters
+
+ if ((segment_stop >= 0) && (m_pos >= segment_stop))
+ return 1; // no more clusters
+
+ pos = m_pos;
+
+ // Read ID
+
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
}
-}
+ long long result = GetUIntLength(m_pReader, pos, len);
-long Segment::DoLoadCluster(
- long long& pos,
- long& len)
-{
- if (m_pos < 0)
- return DoLoadClusterUnknownSize(pos, len);
+ if (result < 0) // error
+ return static_cast<long>(result);
- long long total, avail;
+ if (result > 0) // weird
+ return E_BUFFER_NOT_FULL;
- long status = m_pReader->Length(&total, &avail);
+ if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+ return E_FILE_FORMAT_INVALID;
- if (status < 0) //error
- return status;
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
- assert((total < 0) || (avail <= total));
+ const long long idpos = pos;
+ const long long id = ReadUInt(m_pReader, idpos, len);
- const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
+ if (id < 0) // error (or underflow)
+ return static_cast<long>(id);
- long long cluster_off = -1; //offset relative to start of segment
- long long cluster_size = -1; //size of cluster payload
+ pos += len; // consume ID
- for (;;)
- {
- if ((total >= 0) && (m_pos >= total))
- return 1; //no more clusters
+ // Read Size
- if ((segment_stop >= 0) && (m_pos >= segment_stop))
- return 1; //no more clusters
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
+ }
- pos = m_pos;
+ result = GetUIntLength(m_pReader, pos, len);
- //Read ID
+ if (result < 0) // error
+ return static_cast<long>(result);
- if ((pos + 1) > avail)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
+ if (result > 0) // weird
+ return E_BUFFER_NOT_FULL;
- long long result = GetUIntLength(m_pReader, pos, len);
+ if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+ return E_FILE_FORMAT_INVALID;
- if (result < 0) //error
- return static_cast<long>(result);
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
- if (result > 0) //weird
- return E_BUFFER_NOT_FULL;
+ const long long size = ReadUInt(m_pReader, pos, len);
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
+ if (size < 0) // error
+ return static_cast<long>(size);
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
+ pos += len; // consume length of size of element
- const long long idpos = pos;
- const long long id = ReadUInt(m_pReader, idpos, len);
+ // pos now points to start of payload
- if (id < 0) //error (or underflow)
- return static_cast<long>(id);
+ if (size == 0) { // weird
+ m_pos = pos;
+ continue;
+ }
- pos += len; //consume ID
+ const long long unknown_size = (1LL << (7 * len)) - 1;
- //Read Size
-
- if ((pos + 1) > avail)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- result = GetUIntLength(m_pReader, pos, len);
-
- if (result < 0) //error
- return static_cast<long>(result);
-
- if (result > 0) //weird
- return E_BUFFER_NOT_FULL;
-
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long size = ReadUInt(m_pReader, pos, len);
-
- if (size < 0) //error
- return static_cast<long>(size);
-
- pos += len; //consume length of size of element
-
- //pos now points to start of payload
-
- if (size == 0) //weird
- {
- m_pos = pos;
- continue;
- }
-
- const long long unknown_size = (1LL << (7 * len)) - 1;
-
-#if 0 //we must handle this to support live webm
+#if 0 // we must handle this to support live webm
if (size == unknown_size)
return E_FILE_FORMAT_INVALID; //TODO: allow this
#endif
- if ((segment_stop >= 0) &&
- (size != unknown_size) &&
- ((pos + size) > segment_stop))
- {
- return E_FILE_FORMAT_INVALID;
- }
+ if ((segment_stop >= 0) && (size != unknown_size) &&
+ ((pos + size) > segment_stop)) {
+ return E_FILE_FORMAT_INVALID;
+ }
-#if 0 //commented-out, to support incremental cluster parsing
+#if 0 // commented-out, to support incremental cluster parsing
len = static_cast<long>(size);
if ((pos + size) > avail)
return E_BUFFER_NOT_FULL;
#endif
- if (id == 0x0C53BB6B) //Cues ID
- {
- if (size == unknown_size)
- return E_FILE_FORMAT_INVALID; //TODO: liberalize
+ if (id == 0x0C53BB6B) { // Cues ID
+ if (size == unknown_size)
+ return E_FILE_FORMAT_INVALID; // TODO: liberalize
- if (m_pCues == NULL)
- {
- const long long element_size = (pos - idpos) + size;
+ if (m_pCues == NULL) {
+ const long long element_size = (pos - idpos) + size;
- m_pCues = new Cues(this,
- pos,
- size,
- idpos,
- element_size);
- assert(m_pCues); //TODO
- }
+ m_pCues = new Cues(this, pos, size, idpos, element_size);
+ assert(m_pCues); // TODO
+ }
- m_pos = pos + size; //consume payload
- continue;
- }
-
- if (id != 0x0F43B675) //Cluster ID
- {
- if (size == unknown_size)
- return E_FILE_FORMAT_INVALID; //TODO: liberalize
-
- m_pos = pos + size; //consume payload
- continue;
- }
-
- //We have a cluster.
-
- cluster_off = idpos - m_start; //relative pos
-
- if (size != unknown_size)
- cluster_size = size;
-
- break;
+ m_pos = pos + size; // consume payload
+ continue;
}
- assert(cluster_off >= 0); //have cluster
+ if (id != 0x0F43B675) { // Cluster ID
+ if (size == unknown_size)
+ return E_FILE_FORMAT_INVALID; // TODO: liberalize
- long long pos_;
- long len_;
-
- status = Cluster::HasBlockEntries(this, cluster_off, pos_, len_);
-
- if (status < 0) //error, or underflow
- {
- pos = pos_;
- len = len_;
-
- return status;
+ m_pos = pos + size; // consume payload
+ continue;
}
- //status == 0 means "no block entries found"
- //status > 0 means "found at least one block entry"
+ // We have a cluster.
- //TODO:
- //The issue here is that the segment increments its own
- //pos ptr past the most recent cluster parsed, and then
- //starts from there to parse the next cluster. If we
- //don't know the size of the current cluster, then we
- //must either parse its payload (as we do below), looking
- //for the cluster (or cues) ID to terminate the parse.
- //This isn't really what we want: rather, we really need
- //a way to create the curr cluster object immediately.
- //The pity is that cluster::parse can determine its own
- //boundary, and we largely duplicate that same logic here.
- //
- //Maybe we need to get rid of our look-ahead preloading
- //in source::parse???
- //
- //As we're parsing the blocks in the curr cluster
- //(in cluster::parse), we should have some way to signal
- //to the segment that we have determined the boundary,
- //so it can adjust its own segment::m_pos member.
- //
- //The problem is that we're asserting in asyncreadinit,
- //because we adjust the pos down to the curr seek pos,
- //and the resulting adjusted len is > 2GB. I'm suspicious
- //that this is even correct, but even if it is, we can't
- //be loading that much data in the cache anyway.
+ cluster_off = idpos - m_start; // relative pos
- const long idx = m_clusterCount;
+ if (size != unknown_size)
+ cluster_size = size;
- if (m_clusterPreloadCount > 0)
- {
- assert(idx < m_clusterSize);
+ break;
+ }
- Cluster* const pCluster = m_clusters[idx];
- assert(pCluster);
- assert(pCluster->m_index < 0);
+ assert(cluster_off >= 0); // have cluster
- const long long off = pCluster->GetPosition();
- assert(off >= 0);
+ long long pos_;
+ long len_;
- if (off == cluster_off) //preloaded already
- {
- if (status == 0) //no entries found
- return E_FILE_FORMAT_INVALID;
+ status = Cluster::HasBlockEntries(this, cluster_off, pos_, len_);
- if (cluster_size >= 0)
- pos += cluster_size;
- else
- {
- const long long element_size = pCluster->GetElementSize();
+ if (status < 0) { // error, or underflow
+ pos = pos_;
+ len = len_;
- if (element_size <= 0)
- return E_FILE_FORMAT_INVALID; //TODO: handle this case
+ return status;
+ }
- pos = pCluster->m_element_start + element_size;
- }
+ // status == 0 means "no block entries found"
+ // status > 0 means "found at least one block entry"
- pCluster->m_index = idx; //move from preloaded to loaded
- ++m_clusterCount;
- --m_clusterPreloadCount;
+ // TODO:
+ // The issue here is that the segment increments its own
+ // pos ptr past the most recent cluster parsed, and then
+ // starts from there to parse the next cluster. If we
+ // don't know the size of the current cluster, then we
+ // must either parse its payload (as we do below), looking
+ // for the cluster (or cues) ID to terminate the parse.
+ // This isn't really what we want: rather, we really need
+ // a way to create the curr cluster object immediately.
+ // The pity is that cluster::parse can determine its own
+ // boundary, and we largely duplicate that same logic here.
+ //
+ // Maybe we need to get rid of our look-ahead preloading
+ // in source::parse???
+ //
+ // As we're parsing the blocks in the curr cluster
+ //(in cluster::parse), we should have some way to signal
+ // to the segment that we have determined the boundary,
+ // so it can adjust its own segment::m_pos member.
+ //
+ // The problem is that we're asserting in asyncreadinit,
+ // because we adjust the pos down to the curr seek pos,
+ // and the resulting adjusted len is > 2GB. I'm suspicious
+ // that this is even correct, but even if it is, we can't
+ // be loading that much data in the cache anyway.
- m_pos = pos; //consume payload
- assert((segment_stop < 0) || (m_pos <= segment_stop));
+ const long idx = m_clusterCount;
- return 0; //success
- }
- }
-
- if (status == 0) //no entries found
- {
- if (cluster_size < 0)
- return E_FILE_FORMAT_INVALID; //TODO: handle this
-
- pos += cluster_size;
-
- if ((total >= 0) && (pos >= total))
- {
- m_pos = total;
- return 1; //no more clusters
- }
-
- if ((segment_stop >= 0) && (pos >= segment_stop))
- {
- m_pos = segment_stop;
- return 1; //no more clusters
- }
-
- m_pos = pos;
- return 2; //try again
- }
-
- //status > 0 means we have an entry
-
- Cluster* const pCluster = Cluster::Create(this,
- idx,
- cluster_off);
- //element_size);
- assert(pCluster);
-
- AppendCluster(pCluster);
- assert(m_clusters);
+ if (m_clusterPreloadCount > 0) {
assert(idx < m_clusterSize);
- assert(m_clusters[idx] == pCluster);
- if (cluster_size >= 0)
- {
+ Cluster* const pCluster = m_clusters[idx];
+ assert(pCluster);
+ assert(pCluster->m_index < 0);
+
+ const long long off = pCluster->GetPosition();
+ assert(off >= 0);
+
+ if (off == cluster_off) { // preloaded already
+ if (status == 0) // no entries found
+ return E_FILE_FORMAT_INVALID;
+
+ if (cluster_size >= 0)
pos += cluster_size;
+ else {
+ const long long element_size = pCluster->GetElementSize();
- m_pos = pos;
- assert((segment_stop < 0) || (m_pos <= segment_stop));
+ if (element_size <= 0)
+ return E_FILE_FORMAT_INVALID; // TODO: handle this case
- return 0;
+ pos = pCluster->m_element_start + element_size;
+ }
+
+ pCluster->m_index = idx; // move from preloaded to loaded
+ ++m_clusterCount;
+ --m_clusterPreloadCount;
+
+ m_pos = pos; // consume payload
+ assert((segment_stop < 0) || (m_pos <= segment_stop));
+
+ return 0; // success
+ }
+ }
+
+ if (status == 0) { // no entries found
+ if (cluster_size < 0)
+ return E_FILE_FORMAT_INVALID; // TODO: handle this
+
+ pos += cluster_size;
+
+ if ((total >= 0) && (pos >= total)) {
+ m_pos = total;
+ return 1; // no more clusters
}
- m_pUnknownSize = pCluster;
- m_pos = -pos;
+ if ((segment_stop >= 0) && (pos >= segment_stop)) {
+ m_pos = segment_stop;
+ return 1; // no more clusters
+ }
- return 0; //partial success, since we have a new cluster
+ m_pos = pos;
+ return 2; // try again
+ }
- //status == 0 means "no block entries found"
+ // status > 0 means we have an entry
- //pos designates start of payload
- //m_pos has NOT been adjusted yet (in case we need to come back here)
+ Cluster* const pCluster = Cluster::Create(this, idx, cluster_off);
+ // element_size);
+ assert(pCluster);
+
+ AppendCluster(pCluster);
+ assert(m_clusters);
+ assert(idx < m_clusterSize);
+ assert(m_clusters[idx] == pCluster);
+
+ if (cluster_size >= 0) {
+ pos += cluster_size;
+
+ m_pos = pos;
+ assert((segment_stop < 0) || (m_pos <= segment_stop));
+
+ return 0;
+ }
+
+ m_pUnknownSize = pCluster;
+ m_pos = -pos;
+
+ return 0; // partial success, since we have a new cluster
+
+// status == 0 means "no block entries found"
+
+// pos designates start of payload
+// m_pos has NOT been adjusted yet (in case we need to come back here)
#if 0
- if (cluster_size < 0) //unknown size
- {
+ if (cluster_size < 0) { //unknown size
const long long payload_pos = pos; //absolute pos of cluster payload
- for (;;) //determine cluster size
- {
+ for (;;) { //determine cluster size
if ((total >= 0) && (pos >= total))
break;
@@ -1523,16 +1344,11 @@
return 2; //try to find another cluster
#endif
-
}
-
-long Segment::DoLoadClusterUnknownSize(
- long long& pos,
- long& len)
-{
- assert(m_pos < 0);
- assert(m_pUnknownSize);
+long Segment::DoLoadClusterUnknownSize(long long& pos, long& len) {
+ assert(m_pos < 0);
+ assert(m_pUnknownSize);
#if 0
assert(m_pUnknownSize->GetElementSize() < 0); //TODO: verify this
@@ -1559,8 +1375,7 @@
long long element_size = -1;
- for (;;) //determine cluster size
- {
+ for (;;) { //determine cluster size
if ((total >= 0) && (pos >= total))
{
element_size = total - element_start;
@@ -1609,8 +1424,7 @@
//that we have exhausted the sub-element's inside the cluster
//whose ID we parsed earlier.
- if ((id == 0x0F43B675) || (id == 0x0C53BB6B)) //Cluster ID or Cues ID
- {
+ if ((id == 0x0F43B675) || (id == 0x0C53BB6B)) { //Cluster ID or Cues ID
element_size = pos - element_start;
assert(element_size > 0);
@@ -1687,348 +1501,299 @@
return 2; //continue parsing
#else
- const long status = m_pUnknownSize->Parse(pos, len);
+ const long status = m_pUnknownSize->Parse(pos, len);
- if (status < 0) //error or underflow
- return status;
+ if (status < 0) // error or underflow
+ return status;
- if (status == 0) //parsed a block
- return 2; //continue parsing
+ if (status == 0) // parsed a block
+ return 2; // continue parsing
- assert(status > 0); //nothing left to parse of this cluster
+ assert(status > 0); // nothing left to parse of this cluster
- const long long start = m_pUnknownSize->m_element_start;
+ const long long start = m_pUnknownSize->m_element_start;
- const long long size = m_pUnknownSize->GetElementSize();
- assert(size >= 0);
+ const long long size = m_pUnknownSize->GetElementSize();
+ assert(size >= 0);
- pos = start + size;
- m_pos = pos;
+ pos = start + size;
+ m_pos = pos;
- m_pUnknownSize = 0;
+ m_pUnknownSize = 0;
- return 2; //continue parsing
+ return 2; // continue parsing
#endif
}
+void Segment::AppendCluster(Cluster* pCluster) {
+ assert(pCluster);
+ assert(pCluster->m_index >= 0);
-void Segment::AppendCluster(Cluster* pCluster)
-{
- assert(pCluster);
- assert(pCluster->m_index >= 0);
+ const long count = m_clusterCount + m_clusterPreloadCount;
- const long count = m_clusterCount + m_clusterPreloadCount;
+ long& size = m_clusterSize;
+ assert(size >= count);
- long& size = m_clusterSize;
- assert(size >= count);
+ const long idx = pCluster->m_index;
+ assert(idx == m_clusterCount);
- const long idx = pCluster->m_index;
- assert(idx == m_clusterCount);
+ if (count >= size) {
+ const long n = (size <= 0) ? 2048 : 2 * size;
- if (count >= size)
- {
- const long n = (size <= 0) ? 2048 : 2*size;
+ Cluster** const qq = new Cluster* [n];
+ Cluster** q = qq;
- Cluster** const qq = new Cluster*[n];
- Cluster** q = qq;
+ Cluster** p = m_clusters;
+ Cluster** const pp = p + count;
- Cluster** p = m_clusters;
- Cluster** const pp = p + count;
+ while (p != pp)
+ *q++ = *p++;
- while (p != pp)
- *q++ = *p++;
+ delete[] m_clusters;
- delete[] m_clusters;
+ m_clusters = qq;
+ size = n;
+ }
- m_clusters = qq;
- size = n;
- }
-
- if (m_clusterPreloadCount > 0)
- {
- assert(m_clusters);
-
- Cluster** const p = m_clusters + m_clusterCount;
- assert(*p);
- assert((*p)->m_index < 0);
-
- Cluster** q = p + m_clusterPreloadCount;
- assert(q < (m_clusters + size));
-
- for (;;)
- {
- Cluster** const qq = q - 1;
- assert((*qq)->m_index < 0);
-
- *q = *qq;
- q = qq;
-
- if (q == p)
- break;
- }
- }
-
- m_clusters[idx] = pCluster;
- ++m_clusterCount;
-}
-
-
-void Segment::PreloadCluster(Cluster* pCluster, ptrdiff_t idx)
-{
- assert(pCluster);
- assert(pCluster->m_index < 0);
- assert(idx >= m_clusterCount);
-
- const long count = m_clusterCount + m_clusterPreloadCount;
-
- long& size = m_clusterSize;
- assert(size >= count);
-
- if (count >= size)
- {
- const long n = (size <= 0) ? 2048 : 2*size;
-
- Cluster** const qq = new Cluster*[n];
- Cluster** q = qq;
-
- Cluster** p = m_clusters;
- Cluster** const pp = p + count;
-
- while (p != pp)
- *q++ = *p++;
-
- delete[] m_clusters;
-
- m_clusters = qq;
- size = n;
- }
-
+ if (m_clusterPreloadCount > 0) {
assert(m_clusters);
- Cluster** const p = m_clusters + idx;
+ Cluster** const p = m_clusters + m_clusterCount;
+ assert(*p);
+ assert((*p)->m_index < 0);
- Cluster** q = m_clusters + count;
- assert(q >= p);
+ Cluster** q = p + m_clusterPreloadCount;
assert(q < (m_clusters + size));
- while (q > p)
- {
- Cluster** const qq = q - 1;
- assert((*qq)->m_index < 0);
+ for (;;) {
+ Cluster** const qq = q - 1;
+ assert((*qq)->m_index < 0);
- *q = *qq;
- q = qq;
+ *q = *qq;
+ q = qq;
+
+ if (q == p)
+ break;
}
+ }
- m_clusters[idx] = pCluster;
- ++m_clusterPreloadCount;
+ m_clusters[idx] = pCluster;
+ ++m_clusterCount;
}
+void Segment::PreloadCluster(Cluster* pCluster, ptrdiff_t idx) {
+ assert(pCluster);
+ assert(pCluster->m_index < 0);
+ assert(idx >= m_clusterCount);
-long Segment::Load()
-{
- assert(m_clusters == NULL);
- assert(m_clusterSize == 0);
- assert(m_clusterCount == 0);
- //assert(m_size >= 0);
+ const long count = m_clusterCount + m_clusterPreloadCount;
- //Outermost (level 0) segment object has been constructed,
- //and pos designates start of payload. We need to find the
- //inner (level 1) elements.
+ long& size = m_clusterSize;
+ assert(size >= count);
- const long long header_status = ParseHeaders();
+ if (count >= size) {
+ const long n = (size <= 0) ? 2048 : 2 * size;
- if (header_status < 0) //error
- return static_cast<long>(header_status);
+ Cluster** const qq = new Cluster* [n];
+ Cluster** q = qq;
- if (header_status > 0) //underflow
- return E_BUFFER_NOT_FULL;
+ Cluster** p = m_clusters;
+ Cluster** const pp = p + count;
- assert(m_pInfo);
- assert(m_pTracks);
+ while (p != pp)
+ *q++ = *p++;
- for (;;)
- {
- const int status = LoadCluster();
+ delete[] m_clusters;
- if (status < 0) //error
- return status;
+ m_clusters = qq;
+ size = n;
+ }
- if (status >= 1) //no more clusters
- return 0;
- }
+ assert(m_clusters);
+
+ Cluster** const p = m_clusters + idx;
+
+ Cluster** q = m_clusters + count;
+ assert(q >= p);
+ assert(q < (m_clusters + size));
+
+ while (q > p) {
+ Cluster** const qq = q - 1;
+ assert((*qq)->m_index < 0);
+
+ *q = *qq;
+ q = qq;
+ }
+
+ m_clusters[idx] = pCluster;
+ ++m_clusterPreloadCount;
}
+long Segment::Load() {
+ assert(m_clusters == NULL);
+ assert(m_clusterSize == 0);
+ assert(m_clusterCount == 0);
+ // assert(m_size >= 0);
-SeekHead::SeekHead(
- Segment* pSegment,
- long long start,
- long long size_,
- long long element_start,
- long long element_size) :
- m_pSegment(pSegment),
- m_start(start),
- m_size(size_),
- m_element_start(element_start),
- m_element_size(element_size),
- m_entries(0),
- m_entry_count(0),
- m_void_elements(0),
- m_void_element_count(0)
-{
+ // Outermost (level 0) segment object has been constructed,
+ // and pos designates start of payload. We need to find the
+ // inner (level 1) elements.
+
+ const long long header_status = ParseHeaders();
+
+ if (header_status < 0) // error
+ return static_cast<long>(header_status);
+
+ if (header_status > 0) // underflow
+ return E_BUFFER_NOT_FULL;
+
+ assert(m_pInfo);
+ assert(m_pTracks);
+
+ for (;;) {
+ const int status = LoadCluster();
+
+ if (status < 0) // error
+ return status;
+
+ if (status >= 1) // no more clusters
+ return 0;
+ }
}
+SeekHead::SeekHead(Segment* pSegment, long long start, long long size_,
+ long long element_start, long long element_size)
+ : m_pSegment(pSegment),
+ m_start(start),
+ m_size(size_),
+ m_element_start(element_start),
+ m_element_size(element_size),
+ m_entries(0),
+ m_entry_count(0),
+ m_void_elements(0),
+ m_void_element_count(0) {}
-SeekHead::~SeekHead()
-{
- delete[] m_entries;
- delete[] m_void_elements;
+SeekHead::~SeekHead() {
+ delete[] m_entries;
+ delete[] m_void_elements;
}
+long SeekHead::Parse() {
+ IMkvReader* const pReader = m_pSegment->m_pReader;
-long SeekHead::Parse()
-{
- IMkvReader* const pReader = m_pSegment->m_pReader;
+ long long pos = m_start;
+ const long long stop = m_start + m_size;
- long long pos = m_start;
- const long long stop = m_start + m_size;
+ // first count the seek head entries
- //first count the seek head entries
+ int entry_count = 0;
+ int void_element_count = 0;
- int entry_count = 0;
- int void_element_count = 0;
+ while (pos < stop) {
+ long long id, size;
- while (pos < stop)
- {
- long long id, size;
+ const long status = ParseElementHeader(pReader, pos, stop, id, size);
- const long status = ParseElementHeader(
- pReader,
- pos,
- stop,
- id,
- size);
+ if (status < 0) // error
+ return status;
- if (status < 0) //error
- return status;
+ if (id == 0x0DBB) // SeekEntry ID
+ ++entry_count;
+ else if (id == 0x6C) // Void ID
+ ++void_element_count;
- if (id == 0x0DBB) //SeekEntry ID
- ++entry_count;
- else if (id == 0x6C) //Void ID
- ++void_element_count;
+ pos += size; // consume payload
+ assert(pos <= stop);
+ }
- pos += size; //consume payload
- assert(pos <= stop);
+ assert(pos == stop);
+
+ m_entries = new (std::nothrow) Entry[entry_count];
+
+ if (m_entries == NULL)
+ return -1;
+
+ m_void_elements = new (std::nothrow) VoidElement[void_element_count];
+
+ if (m_void_elements == NULL)
+ return -1;
+
+ // now parse the entries and void elements
+
+ Entry* pEntry = m_entries;
+ VoidElement* pVoidElement = m_void_elements;
+
+ pos = m_start;
+
+ while (pos < stop) {
+ const long long idpos = pos;
+
+ long long id, size;
+
+ const long status = ParseElementHeader(pReader, pos, stop, id, size);
+
+ if (status < 0) // error
+ return status;
+
+ if (id == 0x0DBB) { // SeekEntry ID
+ if (ParseEntry(pReader, pos, size, pEntry)) {
+ Entry& e = *pEntry++;
+
+ e.element_start = idpos;
+ e.element_size = (pos + size) - idpos;
+ }
+ } else if (id == 0x6C) { // Void ID
+ VoidElement& e = *pVoidElement++;
+
+ e.element_start = idpos;
+ e.element_size = (pos + size) - idpos;
}
- assert(pos == stop);
+ pos += size; // consume payload
+ assert(pos <= stop);
+ }
- m_entries = new (std::nothrow) Entry[entry_count];
+ assert(pos == stop);
- if (m_entries == NULL)
- return -1;
+ ptrdiff_t count_ = ptrdiff_t(pEntry - m_entries);
+ assert(count_ >= 0);
+ assert(count_ <= entry_count);
- m_void_elements = new (std::nothrow) VoidElement[void_element_count];
+ m_entry_count = static_cast<int>(count_);
- if (m_void_elements == NULL)
- return -1;
+ count_ = ptrdiff_t(pVoidElement - m_void_elements);
+ assert(count_ >= 0);
+ assert(count_ <= void_element_count);
- //now parse the entries and void elements
+ m_void_element_count = static_cast<int>(count_);
- Entry* pEntry = m_entries;
- VoidElement* pVoidElement = m_void_elements;
+ return 0;
+}
- pos = m_start;
+int SeekHead::GetCount() const { return m_entry_count; }
- while (pos < stop)
- {
- const long long idpos = pos;
-
- long long id, size;
-
- const long status = ParseElementHeader(
- pReader,
- pos,
- stop,
- id,
- size);
-
- if (status < 0) //error
- return status;
-
- if (id == 0x0DBB) //SeekEntry ID
- {
- if (ParseEntry(pReader, pos, size, pEntry))
- {
- Entry& e = *pEntry++;
-
- e.element_start = idpos;
- e.element_size = (pos + size) - idpos;
- }
- }
- else if (id == 0x6C) //Void ID
- {
- VoidElement& e = *pVoidElement++;
-
- e.element_start = idpos;
- e.element_size = (pos + size) - idpos;
- }
-
- pos += size; //consume payload
- assert(pos <= stop);
- }
-
- assert(pos == stop);
-
- ptrdiff_t count_ = ptrdiff_t(pEntry - m_entries);
- assert(count_ >= 0);
- assert(count_ <= entry_count);
-
- m_entry_count = static_cast<int>(count_);
-
- count_ = ptrdiff_t(pVoidElement - m_void_elements);
- assert(count_ >= 0);
- assert(count_ <= void_element_count);
-
- m_void_element_count = static_cast<int>(count_);
-
+const SeekHead::Entry* SeekHead::GetEntry(int idx) const {
+ if (idx < 0)
return 0;
+
+ if (idx >= m_entry_count)
+ return 0;
+
+ return m_entries + idx;
}
+int SeekHead::GetVoidElementCount() const { return m_void_element_count; }
-int SeekHead::GetCount() const
-{
- return m_entry_count;
+const SeekHead::VoidElement* SeekHead::GetVoidElement(int idx) const {
+ if (idx < 0)
+ return 0;
+
+ if (idx >= m_void_element_count)
+ return 0;
+
+ return m_void_elements + idx;
}
-const SeekHead::Entry* SeekHead::GetEntry(int idx) const
-{
- if (idx < 0)
- return 0;
-
- if (idx >= m_entry_count)
- return 0;
-
- return m_entries + idx;
-}
-
-int SeekHead::GetVoidElementCount() const
-{
- return m_void_element_count;
-}
-
-const SeekHead::VoidElement* SeekHead::GetVoidElement(int idx) const
-{
- if (idx < 0)
- return 0;
-
- if (idx >= m_void_element_count)
- return 0;
-
- return m_void_elements + idx;
-}
-
-
#if 0
void Segment::ParseCues(long long off)
{
@@ -2078,133 +1843,122 @@
//os << "Segment::ParseCues (end)" << endl;
}
#else
-long Segment::ParseCues(
- long long off,
- long long& pos,
- long& len)
-{
- if (m_pCues)
- return 0; //success
+long Segment::ParseCues(long long off, long long& pos, long& len) {
+ if (m_pCues)
+ return 0; // success
- if (off < 0)
- return -1;
+ if (off < 0)
+ return -1;
- long long total, avail;
+ long long total, avail;
- const int status = m_pReader->Length(&total, &avail);
+ const int status = m_pReader->Length(&total, &avail);
- if (status < 0) //error
- return status;
+ if (status < 0) // error
+ return status;
- assert((total < 0) || (avail <= total));
+ assert((total < 0) || (avail <= total));
- pos = m_start + off;
+ pos = m_start + off;
- if ((total < 0) || (pos >= total))
- return 1; //don't bother parsing cues
+ if ((total < 0) || (pos >= total))
+ return 1; // don't bother parsing cues
- const long long element_start = pos;
- const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
+ const long long element_start = pos;
+ const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
- if ((pos + 1) > avail)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
+ }
- long long result = GetUIntLength(m_pReader, pos, len);
+ long long result = GetUIntLength(m_pReader, pos, len);
- if (result < 0) //error
- return static_cast<long>(result);
+ if (result < 0) // error
+ return static_cast<long>(result);
- if (result > 0) //underflow (weird)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
+ if (result > 0) // underflow (weird)
+ {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
+ }
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
+ if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+ return E_FILE_FORMAT_INVALID;
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
- const long long idpos = pos;
+ const long long idpos = pos;
- const long long id = ReadUInt(m_pReader, idpos, len);
+ const long long id = ReadUInt(m_pReader, idpos, len);
- if (id != 0x0C53BB6B) //Cues ID
- return E_FILE_FORMAT_INVALID;
+ if (id != 0x0C53BB6B) // Cues ID
+ return E_FILE_FORMAT_INVALID;
- pos += len; //consume ID
- assert((segment_stop < 0) || (pos <= segment_stop));
+ pos += len; // consume ID
+ assert((segment_stop < 0) || (pos <= segment_stop));
- //Read Size
+ // Read Size
- if ((pos + 1) > avail)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
+ }
- result = GetUIntLength(m_pReader, pos, len);
+ result = GetUIntLength(m_pReader, pos, len);
- if (result < 0) //error
- return static_cast<long>(result);
+ if (result < 0) // error
+ return static_cast<long>(result);
- if (result > 0) //underflow (weird)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
+ if (result > 0) // underflow (weird)
+ {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
+ }
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
+ if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+ return E_FILE_FORMAT_INVALID;
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
- const long long size = ReadUInt(m_pReader, pos, len);
+ const long long size = ReadUInt(m_pReader, pos, len);
- if (size < 0) //error
- return static_cast<long>(size);
+ if (size < 0) // error
+ return static_cast<long>(size);
- if (size == 0) //weird, although technically not illegal
- return 1; //done
+ if (size == 0) // weird, although technically not illegal
+ return 1; // done
- pos += len; //consume length of size of element
- assert((segment_stop < 0) || (pos <= segment_stop));
+ pos += len; // consume length of size of element
+ assert((segment_stop < 0) || (pos <= segment_stop));
- //Pos now points to start of payload
+ // Pos now points to start of payload
- const long long element_stop = pos + size;
+ const long long element_stop = pos + size;
- if ((segment_stop >= 0) && (element_stop > segment_stop))
- return E_FILE_FORMAT_INVALID;
+ if ((segment_stop >= 0) && (element_stop > segment_stop))
+ return E_FILE_FORMAT_INVALID;
- if ((total >= 0) && (element_stop > total))
- return 1; //don't bother parsing anymore
+ if ((total >= 0) && (element_stop > total))
+ return 1; // don't bother parsing anymore
- len = static_cast<long>(size);
+ len = static_cast<long>(size);
- if (element_stop > avail)
- return E_BUFFER_NOT_FULL;
+ if (element_stop > avail)
+ return E_BUFFER_NOT_FULL;
- const long long element_size = element_stop - element_start;
+ const long long element_size = element_stop - element_start;
- m_pCues = new (std::nothrow) Cues(
- this,
- pos,
- size,
- element_start,
- element_size);
- assert(m_pCues); //TODO
+ m_pCues =
+ new (std::nothrow) Cues(this, pos, size, element_start, element_size);
+ assert(m_pCues); // TODO
- return 0; //success
+ return 0; // success
}
#endif
-
#if 0
void Segment::ParseSeekEntry(
long long start,
@@ -2264,304 +2018,269 @@
ParseCues(seekOff);
}
#else
-bool SeekHead::ParseEntry(
- IMkvReader* pReader,
- long long start,
- long long size_,
- Entry* pEntry)
-{
- if (size_ <= 0)
- return false;
+bool SeekHead::ParseEntry(IMkvReader* pReader, long long start, long long size_,
+ Entry* pEntry) {
+ if (size_ <= 0)
+ return false;
- long long pos = start;
- const long long stop = start + size_;
+ long long pos = start;
+ const long long stop = start + size_;
- long len;
+ long len;
- //parse the container for the level-1 element ID
+ // parse the container for the level-1 element ID
- const long long seekIdId = ReadUInt(pReader, pos, len);
- //seekIdId;
+ const long long seekIdId = ReadUInt(pReader, pos, len);
+ // seekIdId;
- if (seekIdId != 0x13AB) //SeekID ID
- return false;
+ if (seekIdId != 0x13AB) // SeekID ID
+ return false;
- if ((pos + len) > stop)
- return false;
+ if ((pos + len) > stop)
+ return false;
- pos += len; //consume SeekID id
+ pos += len; // consume SeekID id
- const long long seekIdSize = ReadUInt(pReader, pos, len);
+ const long long seekIdSize = ReadUInt(pReader, pos, len);
- if (seekIdSize <= 0)
- return false;
+ if (seekIdSize <= 0)
+ return false;
- if ((pos + len) > stop)
- return false;
+ if ((pos + len) > stop)
+ return false;
- pos += len; //consume size of field
+ pos += len; // consume size of field
- if ((pos + seekIdSize) > stop)
- return false;
+ if ((pos + seekIdSize) > stop)
+ return false;
- //Note that the SeekId payload really is serialized
- //as a "Matroska integer", not as a plain binary value.
- //In fact, Matroska requires that ID values in the
- //stream exactly match the binary representation as listed
- //in the Matroska specification.
- //
- //This parser is more liberal, and permits IDs to have
- //any width. (This could make the representation in the stream
- //different from what's in the spec, but it doesn't matter here,
- //since we always normalize "Matroska integer" values.)
+ // Note that the SeekId payload really is serialized
+ // as a "Matroska integer", not as a plain binary value.
+ // In fact, Matroska requires that ID values in the
+ // stream exactly match the binary representation as listed
+ // in the Matroska specification.
+ //
+ // This parser is more liberal, and permits IDs to have
+ // any width. (This could make the representation in the stream
+ // different from what's in the spec, but it doesn't matter here,
+ // since we always normalize "Matroska integer" values.)
- pEntry->id = ReadUInt(pReader, pos, len); //payload
+ pEntry->id = ReadUInt(pReader, pos, len); // payload
- if (pEntry->id <= 0)
- return false;
+ if (pEntry->id <= 0)
+ return false;
- if (len != seekIdSize)
- return false;
+ if (len != seekIdSize)
+ return false;
- pos += seekIdSize; //consume SeekID payload
+ pos += seekIdSize; // consume SeekID payload
- const long long seekPosId = ReadUInt(pReader, pos, len);
+ const long long seekPosId = ReadUInt(pReader, pos, len);
- if (seekPosId != 0x13AC) //SeekPos ID
- return false;
+ if (seekPosId != 0x13AC) // SeekPos ID
+ return false;
- if ((pos + len) > stop)
- return false;
+ if ((pos + len) > stop)
+ return false;
- pos += len; //consume id
+ pos += len; // consume id
- const long long seekPosSize = ReadUInt(pReader, pos, len);
+ const long long seekPosSize = ReadUInt(pReader, pos, len);
- if (seekPosSize <= 0)
- return false;
+ if (seekPosSize <= 0)
+ return false;
- if ((pos + len) > stop)
- return false;
+ if ((pos + len) > stop)
+ return false;
- pos += len; //consume size
+ pos += len; // consume size
- if ((pos + seekPosSize) > stop)
- return false;
+ if ((pos + seekPosSize) > stop)
+ return false;
- pEntry->pos = UnserializeUInt(pReader, pos, seekPosSize);
+ pEntry->pos = UnserializeUInt(pReader, pos, seekPosSize);
- if (pEntry->pos < 0)
- return false;
+ if (pEntry->pos < 0)
+ return false;
- pos += seekPosSize; //consume payload
+ pos += seekPosSize; // consume payload
- if (pos != stop)
- return false;
+ if (pos != stop)
+ return false;
- return true;
+ return true;
}
#endif
+Cues::Cues(Segment* pSegment, long long start_, long long size_,
+ long long element_start, long long element_size)
+ : m_pSegment(pSegment),
+ m_start(start_),
+ m_size(size_),
+ m_element_start(element_start),
+ m_element_size(element_size),
+ m_cue_points(NULL),
+ m_count(0),
+ m_preload_count(0),
+ m_pos(start_) {}
-Cues::Cues(
- Segment* pSegment,
- long long start_,
- long long size_,
- long long element_start,
- long long element_size) :
- m_pSegment(pSegment),
- m_start(start_),
- m_size(size_),
- m_element_start(element_start),
- m_element_size(element_size),
- m_cue_points(NULL),
- m_count(0),
- m_preload_count(0),
- m_pos(start_)
-{
+Cues::~Cues() {
+ const long n = m_count + m_preload_count;
+
+ CuePoint** p = m_cue_points;
+ CuePoint** const q = p + n;
+
+ while (p != q) {
+ CuePoint* const pCP = *p++;
+ assert(pCP);
+
+ delete pCP;
+ }
+
+ delete[] m_cue_points;
}
+long Cues::GetCount() const {
+ if (m_cue_points == NULL)
+ return -1;
-Cues::~Cues()
-{
- const long n = m_count + m_preload_count;
+ return m_count; // TODO: really ignore preload count?
+}
- CuePoint** p = m_cue_points;
- CuePoint** const q = p + n;
+bool Cues::DoneParsing() const {
+ const long long stop = m_start + m_size;
+ return (m_pos >= stop);
+}
- while (p != q)
- {
- CuePoint* const pCP = *p++;
- assert(pCP);
+void Cues::Init() const {
+ if (m_cue_points)
+ return;
- delete pCP;
- }
+ assert(m_count == 0);
+ assert(m_preload_count == 0);
+
+ IMkvReader* const pReader = m_pSegment->m_pReader;
+
+ const long long stop = m_start + m_size;
+ long long pos = m_start;
+
+ long cue_points_size = 0;
+
+ while (pos < stop) {
+ const long long idpos = pos;
+
+ long len;
+
+ const long long id = ReadUInt(pReader, pos, len);
+ assert(id >= 0); // TODO
+ assert((pos + len) <= stop);
+
+ pos += len; // consume ID
+
+ const long long size = ReadUInt(pReader, pos, len);
+ assert(size >= 0);
+ assert((pos + len) <= stop);
+
+ pos += len; // consume Size field
+ assert((pos + size) <= stop);
+
+ if (id == 0x3B) // CuePoint ID
+ PreloadCuePoint(cue_points_size, idpos);
+
+ pos += size; // consume payload
+ assert(pos <= stop);
+ }
+}
+
+void Cues::PreloadCuePoint(long& cue_points_size, long long pos) const {
+ assert(m_count == 0);
+
+ if (m_preload_count >= cue_points_size) {
+ const long n = (cue_points_size <= 0) ? 2048 : 2 * cue_points_size;
+
+ CuePoint** const qq = new CuePoint* [n];
+ CuePoint** q = qq; // beginning of target
+
+ CuePoint** p = m_cue_points; // beginning of source
+ CuePoint** const pp = p + m_preload_count; // end of source
+
+ while (p != pp)
+ *q++ = *p++;
delete[] m_cue_points;
+
+ m_cue_points = qq;
+ cue_points_size = n;
+ }
+
+ CuePoint* const pCP = new CuePoint(m_preload_count, pos);
+ m_cue_points[m_preload_count++] = pCP;
}
+bool Cues::LoadCuePoint() const {
+ // odbgstream os;
+ // os << "Cues::LoadCuePoint" << endl;
-long Cues::GetCount() const
-{
- if (m_cue_points == NULL)
- return -1;
+ const long long stop = m_start + m_size;
- return m_count; //TODO: really ignore preload count?
-}
+ if (m_pos >= stop)
+ return false; // nothing else to do
+ Init();
-bool Cues::DoneParsing() const
-{
- const long long stop = m_start + m_size;
- return (m_pos >= stop);
-}
+ IMkvReader* const pReader = m_pSegment->m_pReader;
+ while (m_pos < stop) {
+ const long long idpos = m_pos;
-void Cues::Init() const
-{
- if (m_cue_points)
- return;
+ long len;
- assert(m_count == 0);
- assert(m_preload_count == 0);
+ const long long id = ReadUInt(pReader, m_pos, len);
+ assert(id >= 0); // TODO
+ assert((m_pos + len) <= stop);
- IMkvReader* const pReader = m_pSegment->m_pReader;
+ m_pos += len; // consume ID
- const long long stop = m_start + m_size;
- long long pos = m_start;
+ const long long size = ReadUInt(pReader, m_pos, len);
+ assert(size >= 0);
+ assert((m_pos + len) <= stop);
- long cue_points_size = 0;
+ m_pos += len; // consume Size field
+ assert((m_pos + size) <= stop);
- while (pos < stop)
- {
- const long long idpos = pos;
+ if (id != 0x3B) { // CuePoint ID
+ m_pos += size; // consume payload
+ assert(m_pos <= stop);
- long len;
-
- const long long id = ReadUInt(pReader, pos, len);
- assert(id >= 0); //TODO
- assert((pos + len) <= stop);
-
- pos += len; //consume ID
-
- const long long size = ReadUInt(pReader, pos, len);
- assert(size >= 0);
- assert((pos + len) <= stop);
-
- pos += len; //consume Size field
- assert((pos + size) <= stop);
-
- if (id == 0x3B) //CuePoint ID
- PreloadCuePoint(cue_points_size, idpos);
-
- pos += size; //consume payload
- assert(pos <= stop);
- }
-}
-
-
-void Cues::PreloadCuePoint(
- long& cue_points_size,
- long long pos) const
-{
- assert(m_count == 0);
-
- if (m_preload_count >= cue_points_size)
- {
- const long n = (cue_points_size <= 0) ? 2048 : 2*cue_points_size;
-
- CuePoint** const qq = new CuePoint*[n];
- CuePoint** q = qq; //beginning of target
-
- CuePoint** p = m_cue_points; //beginning of source
- CuePoint** const pp = p + m_preload_count; //end of source
-
- while (p != pp)
- *q++ = *p++;
-
- delete[] m_cue_points;
-
- m_cue_points = qq;
- cue_points_size = n;
+ continue;
}
- CuePoint* const pCP = new CuePoint(m_preload_count, pos);
- m_cue_points[m_preload_count++] = pCP;
+ assert(m_preload_count > 0);
+
+ CuePoint* const pCP = m_cue_points[m_count];
+ assert(pCP);
+ assert((pCP->GetTimeCode() >= 0) || (-pCP->GetTimeCode() == idpos));
+ if (pCP->GetTimeCode() < 0 && (-pCP->GetTimeCode() != idpos))
+ return false;
+
+ pCP->Load(pReader);
+ ++m_count;
+ --m_preload_count;
+
+ m_pos += size; // consume payload
+ assert(m_pos <= stop);
+
+ return true; // yes, we loaded a cue point
+ }
+
+ // return (m_pos < stop);
+ return false; // no, we did not load a cue point
}
-
-bool Cues::LoadCuePoint() const
-{
- //odbgstream os;
- //os << "Cues::LoadCuePoint" << endl;
-
- const long long stop = m_start + m_size;
-
- if (m_pos >= stop)
- return false; //nothing else to do
-
- Init();
-
- IMkvReader* const pReader = m_pSegment->m_pReader;
-
- while (m_pos < stop)
- {
- const long long idpos = m_pos;
-
- long len;
-
- const long long id = ReadUInt(pReader, m_pos, len);
- assert(id >= 0); //TODO
- assert((m_pos + len) <= stop);
-
- m_pos += len; //consume ID
-
- const long long size = ReadUInt(pReader, m_pos, len);
- assert(size >= 0);
- assert((m_pos + len) <= stop);
-
- m_pos += len; //consume Size field
- assert((m_pos + size) <= stop);
-
- if (id != 0x3B) //CuePoint ID
- {
- m_pos += size; //consume payload
- assert(m_pos <= stop);
-
- continue;
- }
-
- assert(m_preload_count > 0);
-
- CuePoint* const pCP = m_cue_points[m_count];
- assert(pCP);
- assert((pCP->GetTimeCode() >= 0) || (-pCP->GetTimeCode() == idpos));
- if (pCP->GetTimeCode() < 0 && (-pCP->GetTimeCode() != idpos))
- return false;
-
- pCP->Load(pReader);
- ++m_count;
- --m_preload_count;
-
- m_pos += size; //consume payload
- assert(m_pos <= stop);
-
- return true; //yes, we loaded a cue point
- }
-
- //return (m_pos < stop);
- return false; //no, we did not load a cue point
-}
-
-
-bool Cues::Find(
- long long time_ns,
- const Track* pTrack,
- const CuePoint*& pCP,
- const CuePoint::TrackPosition*& pTP) const
-{
- assert(time_ns >= 0);
- assert(pTrack);
+bool Cues::Find(long long time_ns, const Track* pTrack, const CuePoint*& pCP,
+ const CuePoint::TrackPosition*& pTP) const {
+ assert(time_ns >= 0);
+ assert(pTrack);
#if 0
LoadCuePoint(); //establish invariant
@@ -2619,71 +2338,68 @@
assert(pCP);
assert(pCP->GetTime(m_pSegment) <= time_ns);
#else
- if (m_cue_points == NULL)
- return false;
+ if (m_cue_points == NULL)
+ return false;
- if (m_count == 0)
- return false;
+ if (m_count == 0)
+ return false;
- CuePoint** const ii = m_cue_points;
- CuePoint** i = ii;
+ CuePoint** const ii = m_cue_points;
+ CuePoint** i = ii;
- CuePoint** const jj = ii + m_count;
- CuePoint** j = jj;
+ CuePoint** const jj = ii + m_count;
+ CuePoint** j = jj;
- pCP = *i;
- assert(pCP);
+ pCP = *i;
+ assert(pCP);
- if (time_ns <= pCP->GetTime(m_pSegment))
- {
- pTP = pCP->Find(pTrack);
- return (pTP != NULL);
- }
-
- while (i < j)
- {
- //INVARIANT:
- //[ii, i) <= time_ns
- //[i, j) ?
- //[j, jj) > time_ns
-
- CuePoint** const k = i + (j - i) / 2;
- assert(k < jj);
-
- CuePoint* const pCP = *k;
- assert(pCP);
-
- const long long t = pCP->GetTime(m_pSegment);
-
- if (t <= time_ns)
- i = k + 1;
- else
- j = k;
-
- assert(i <= j);
- }
-
- assert(i == j);
- assert(i <= jj);
- assert(i > ii);
-
- pCP = *--i;
- assert(pCP);
- assert(pCP->GetTime(m_pSegment) <= time_ns);
-#endif
-
- //TODO: here and elsewhere, it's probably not correct to search
- //for the cue point with this time, and then search for a matching
- //track. In principle, the matching track could be on some earlier
- //cue point, and with our current algorithm, we'd miss it. To make
- //this bullet-proof, we'd need to create a secondary structure,
- //with a list of cue points that apply to a track, and then search
- //that track-based structure for a matching cue point.
-
+ if (time_ns <= pCP->GetTime(m_pSegment)) {
pTP = pCP->Find(pTrack);
return (pTP != NULL);
-}
+ }
+ while (i < j) {
+ // INVARIANT:
+ //[ii, i) <= time_ns
+ //[i, j) ?
+ //[j, jj) > time_ns
+
+ CuePoint** const k = i + (j - i) / 2;
+ assert(k < jj);
+
+ CuePoint* const pCP = *k;
+ assert(pCP);
+
+ const long long t = pCP->GetTime(m_pSegment);
+
+ if (t <= time_ns)
+ i = k + 1;
+ else
+ j = k;
+
+ assert(i <= j);
+ }
+
+ assert(i == j);
+ assert(i <= jj);
+ assert(i > ii);
+
+ pCP = *--i;
+ assert(pCP);
+ assert(pCP->GetTime(m_pSegment) <= time_ns);
+#endif
+
+ // TODO: here and elsewhere, it's probably not correct to search
+ // for the cue point with this time, and then search for a matching
+ // track. In principle, the matching track could be on some earlier
+ // cue point, and with our current algorithm, we'd miss it. To make
+ // this bullet-proof, we'd need to create a secondary structure,
+ // with a list of cue points that apply to a track, and then search
+ // that track-based structure for a matching cue point.
+
+ pTP = pCP->Find(pTrack);
+ return (pTP != NULL);
+}
#if 0
bool Cues::FindNext(
@@ -2744,14 +2460,12 @@
}
#endif
+const CuePoint* Cues::GetFirst() const {
+ if (m_cue_points == NULL)
+ return NULL;
-const CuePoint* Cues::GetFirst() const
-{
- if (m_cue_points == NULL)
- return NULL;
-
- if (m_count == 0)
- return NULL;
+ if (m_count == 0)
+ return NULL;
#if 0
LoadCuePoint(); //init cues
@@ -2762,24 +2476,22 @@
return NULL;
#endif
- CuePoint* const* const pp = m_cue_points;
- assert(pp);
+ CuePoint* const* const pp = m_cue_points;
+ assert(pp);
- CuePoint* const pCP = pp[0];
- assert(pCP);
- assert(pCP->GetTimeCode() >= 0);
+ CuePoint* const pCP = pp[0];
+ assert(pCP);
+ assert(pCP->GetTimeCode() >= 0);
- return pCP;
+ return pCP;
}
+const CuePoint* Cues::GetLast() const {
+ if (m_cue_points == NULL)
+ return NULL;
-const CuePoint* Cues::GetLast() const
-{
- if (m_cue_points == NULL)
- return NULL;
-
- if (m_count <= 0)
- return NULL;
+ if (m_count <= 0)
+ return NULL;
#if 0
LoadCuePoint(); //init cues
@@ -2800,28 +2512,26 @@
pCP->Load(m_pSegment->m_pReader);
assert(pCP->GetTimeCode() >= 0);
#else
- const long index = m_count - 1;
+ const long index = m_count - 1;
- CuePoint* const* const pp = m_cue_points;
- assert(pp);
+ CuePoint* const* const pp = m_cue_points;
+ assert(pp);
- CuePoint* const pCP = pp[index];
- assert(pCP);
- assert(pCP->GetTimeCode() >= 0);
+ CuePoint* const pCP = pp[index];
+ assert(pCP);
+ assert(pCP->GetTimeCode() >= 0);
#endif
- return pCP;
+ return pCP;
}
+const CuePoint* Cues::GetNext(const CuePoint* pCurr) const {
+ if (pCurr == NULL)
+ return NULL;
-const CuePoint* Cues::GetNext(const CuePoint* pCurr) const
-{
- if (pCurr == NULL)
- return NULL;
-
- assert(pCurr->GetTimeCode() >= 0);
- assert(m_cue_points);
- assert(m_count >= 1);
+ assert(pCurr->GetTimeCode() >= 0);
+ assert(m_cue_points);
+ assert(m_count >= 1);
#if 0
const size_t count = m_count + m_preload_count;
@@ -2843,386 +2553,347 @@
pNext->Load(m_pSegment->m_pReader);
#else
- long index = pCurr->m_index;
- assert(index < m_count);
+ long index = pCurr->m_index;
+ assert(index < m_count);
- CuePoint* const* const pp = m_cue_points;
- assert(pp);
- assert(pp[index] == pCurr);
+ CuePoint* const* const pp = m_cue_points;
+ assert(pp);
+ assert(pp[index] == pCurr);
- ++index;
+ ++index;
- if (index >= m_count)
- return NULL;
+ if (index >= m_count)
+ return NULL;
- CuePoint* const pNext = pp[index];
- assert(pNext);
- assert(pNext->GetTimeCode() >= 0);
+ CuePoint* const pNext = pp[index];
+ assert(pNext);
+ assert(pNext->GetTimeCode() >= 0);
#endif
- return pNext;
+ return pNext;
}
+const BlockEntry* Cues::GetBlock(const CuePoint* pCP,
+ const CuePoint::TrackPosition* pTP) const {
+ if (pCP == NULL)
+ return NULL;
-const BlockEntry* Cues::GetBlock(
- const CuePoint* pCP,
- const CuePoint::TrackPosition* pTP) const
-{
- if (pCP == NULL)
- return NULL;
+ if (pTP == NULL)
+ return NULL;
- if (pTP == NULL)
- return NULL;
-
- return m_pSegment->GetBlock(*pCP, *pTP);
+ return m_pSegment->GetBlock(*pCP, *pTP);
}
+const BlockEntry* Segment::GetBlock(const CuePoint& cp,
+ const CuePoint::TrackPosition& tp) {
+ Cluster** const ii = m_clusters;
+ Cluster** i = ii;
-const BlockEntry* Segment::GetBlock(
- const CuePoint& cp,
- const CuePoint::TrackPosition& tp)
-{
- Cluster** const ii = m_clusters;
- Cluster** i = ii;
+ const long count = m_clusterCount + m_clusterPreloadCount;
- const long count = m_clusterCount + m_clusterPreloadCount;
+ Cluster** const jj = ii + count;
+ Cluster** j = jj;
- Cluster** const jj = ii + count;
- Cluster** j = jj;
+ while (i < j) {
+ // INVARIANT:
+ //[ii, i) < pTP->m_pos
+ //[i, j) ?
+ //[j, jj) > pTP->m_pos
- while (i < j)
- {
- //INVARIANT:
- //[ii, i) < pTP->m_pos
- //[i, j) ?
- //[j, jj) > pTP->m_pos
+ Cluster** const k = i + (j - i) / 2;
+ assert(k < jj);
- Cluster** const k = i + (j - i) / 2;
- assert(k < jj);
-
- Cluster* const pCluster = *k;
- assert(pCluster);
-
- //const long long pos_ = pCluster->m_pos;
- //assert(pos_);
- //const long long pos = pos_ * ((pos_ < 0) ? -1 : 1);
-
- const long long pos = pCluster->GetPosition();
- assert(pos >= 0);
-
- if (pos < tp.m_pos)
- i = k + 1;
- else if (pos > tp.m_pos)
- j = k;
- else
- return pCluster->GetEntry(cp, tp);
- }
-
- assert(i == j);
- //assert(Cluster::HasBlockEntries(this, tp.m_pos));
-
- Cluster* const pCluster = Cluster::Create(this, -1, tp.m_pos); //, -1);
+ Cluster* const pCluster = *k;
assert(pCluster);
- const ptrdiff_t idx = i - m_clusters;
+ // const long long pos_ = pCluster->m_pos;
+ // assert(pos_);
+ // const long long pos = pos_ * ((pos_ < 0) ? -1 : 1);
- PreloadCluster(pCluster, idx);
- assert(m_clusters);
- assert(m_clusterPreloadCount > 0);
- assert(m_clusters[idx] == pCluster);
+ const long long pos = pCluster->GetPosition();
+ assert(pos >= 0);
- return pCluster->GetEntry(cp, tp);
+ if (pos < tp.m_pos)
+ i = k + 1;
+ else if (pos > tp.m_pos)
+ j = k;
+ else
+ return pCluster->GetEntry(cp, tp);
+ }
+
+ assert(i == j);
+ // assert(Cluster::HasBlockEntries(this, tp.m_pos));
+
+ Cluster* const pCluster = Cluster::Create(this, -1, tp.m_pos); //, -1);
+ assert(pCluster);
+
+ const ptrdiff_t idx = i - m_clusters;
+
+ PreloadCluster(pCluster, idx);
+ assert(m_clusters);
+ assert(m_clusterPreloadCount > 0);
+ assert(m_clusters[idx] == pCluster);
+
+ return pCluster->GetEntry(cp, tp);
}
+const Cluster* Segment::FindOrPreloadCluster(long long requested_pos) {
+ if (requested_pos < 0)
+ return 0;
-const Cluster* Segment::FindOrPreloadCluster(long long requested_pos)
-{
- if (requested_pos < 0)
- return 0;
+ Cluster** const ii = m_clusters;
+ Cluster** i = ii;
- Cluster** const ii = m_clusters;
- Cluster** i = ii;
+ const long count = m_clusterCount + m_clusterPreloadCount;
- const long count = m_clusterCount + m_clusterPreloadCount;
+ Cluster** const jj = ii + count;
+ Cluster** j = jj;
- Cluster** const jj = ii + count;
- Cluster** j = jj;
+ while (i < j) {
+ // INVARIANT:
+ //[ii, i) < pTP->m_pos
+ //[i, j) ?
+ //[j, jj) > pTP->m_pos
- while (i < j)
- {
- //INVARIANT:
- //[ii, i) < pTP->m_pos
- //[i, j) ?
- //[j, jj) > pTP->m_pos
+ Cluster** const k = i + (j - i) / 2;
+ assert(k < jj);
- Cluster** const k = i + (j - i) / 2;
- assert(k < jj);
-
- Cluster* const pCluster = *k;
- assert(pCluster);
-
- //const long long pos_ = pCluster->m_pos;
- //assert(pos_);
- //const long long pos = pos_ * ((pos_ < 0) ? -1 : 1);
-
- const long long pos = pCluster->GetPosition();
- assert(pos >= 0);
-
- if (pos < requested_pos)
- i = k + 1;
- else if (pos > requested_pos)
- j = k;
- else
- return pCluster;
- }
-
- assert(i == j);
- //assert(Cluster::HasBlockEntries(this, tp.m_pos));
-
- Cluster* const pCluster = Cluster::Create(
- this,
- -1,
- requested_pos);
- //-1);
+ Cluster* const pCluster = *k;
assert(pCluster);
- const ptrdiff_t idx = i - m_clusters;
+ // const long long pos_ = pCluster->m_pos;
+ // assert(pos_);
+ // const long long pos = pos_ * ((pos_ < 0) ? -1 : 1);
- PreloadCluster(pCluster, idx);
- assert(m_clusters);
- assert(m_clusterPreloadCount > 0);
- assert(m_clusters[idx] == pCluster);
+ const long long pos = pCluster->GetPosition();
+ assert(pos >= 0);
- return pCluster;
+ if (pos < requested_pos)
+ i = k + 1;
+ else if (pos > requested_pos)
+ j = k;
+ else
+ return pCluster;
+ }
+
+ assert(i == j);
+ // assert(Cluster::HasBlockEntries(this, tp.m_pos));
+
+ Cluster* const pCluster = Cluster::Create(this, -1, requested_pos);
+ //-1);
+ assert(pCluster);
+
+ const ptrdiff_t idx = i - m_clusters;
+
+ PreloadCluster(pCluster, idx);
+ assert(m_clusters);
+ assert(m_clusterPreloadCount > 0);
+ assert(m_clusters[idx] == pCluster);
+
+ return pCluster;
}
-
-CuePoint::CuePoint(long idx, long long pos) :
- m_element_start(0),
- m_element_size(0),
- m_index(idx),
- m_timecode(-1 * pos),
- m_track_positions(NULL),
- m_track_positions_count(0)
-{
- assert(pos > 0);
+CuePoint::CuePoint(long idx, long long pos)
+ : m_element_start(0),
+ m_element_size(0),
+ m_index(idx),
+ m_timecode(-1 * pos),
+ m_track_positions(NULL),
+ m_track_positions_count(0) {
+ assert(pos > 0);
}
+CuePoint::~CuePoint() { delete[] m_track_positions; }
-CuePoint::~CuePoint()
-{
- delete[] m_track_positions;
-}
+void CuePoint::Load(IMkvReader* pReader) {
+ // odbgstream os;
+ // os << "CuePoint::Load(begin): timecode=" << m_timecode << endl;
+ if (m_timecode >= 0) // already loaded
+ return;
-void CuePoint::Load(IMkvReader* pReader)
-{
- //odbgstream os;
- //os << "CuePoint::Load(begin): timecode=" << m_timecode << endl;
+ assert(m_track_positions == NULL);
+ assert(m_track_positions_count == 0);
- if (m_timecode >= 0) //already loaded
- return;
+ long long pos_ = -m_timecode;
+ const long long element_start = pos_;
- assert(m_track_positions == NULL);
- assert(m_track_positions_count == 0);
+ long long stop;
- long long pos_ = -m_timecode;
- const long long element_start = pos_;
+ {
+ long len;
- long long stop;
+ const long long id = ReadUInt(pReader, pos_, len);
+ assert(id == 0x3B); // CuePoint ID
+ if (id != 0x3B)
+ return;
- {
- long len;
+ pos_ += len; // consume ID
- const long long id = ReadUInt(pReader, pos_, len);
- assert(id == 0x3B); //CuePoint ID
- if (id != 0x3B)
- return;
+ const long long size = ReadUInt(pReader, pos_, len);
+ assert(size >= 0);
- pos_ += len; //consume ID
+ pos_ += len; // consume Size field
+ // pos_ now points to start of payload
- const long long size = ReadUInt(pReader, pos_, len);
- assert(size >= 0);
+ stop = pos_ + size;
+ }
- pos_ += len; //consume Size field
- //pos_ now points to start of payload
+ const long long element_size = stop - element_start;
- stop = pos_ + size;
+ long long pos = pos_;
+
+ // First count number of track positions
+
+ while (pos < stop) {
+ long len;
+
+ const long long id = ReadUInt(pReader, pos, len);
+ assert(id >= 0); // TODO
+ assert((pos + len) <= stop);
+
+ pos += len; // consume ID
+
+ const long long size = ReadUInt(pReader, pos, len);
+ assert(size >= 0);
+ assert((pos + len) <= stop);
+
+ pos += len; // consume Size field
+ assert((pos + size) <= stop);
+
+ if (id == 0x33) // CueTime ID
+ m_timecode = UnserializeUInt(pReader, pos, size);
+
+ else if (id == 0x37) // CueTrackPosition(s) ID
+ ++m_track_positions_count;
+
+ pos += size; // consume payload
+ assert(pos <= stop);
+ }
+
+ assert(m_timecode >= 0);
+ assert(m_track_positions_count > 0);
+
+ // os << "CuePoint::Load(cont'd): idpos=" << idpos
+ // << " timecode=" << m_timecode
+ // << endl;
+
+ m_track_positions = new TrackPosition[m_track_positions_count];
+
+ // Now parse track positions
+
+ TrackPosition* p = m_track_positions;
+ pos = pos_;
+
+ while (pos < stop) {
+ long len;
+
+ const long long id = ReadUInt(pReader, pos, len);
+ assert(id >= 0); // TODO
+ assert((pos + len) <= stop);
+
+ pos += len; // consume ID
+
+ const long long size = ReadUInt(pReader, pos, len);
+ assert(size >= 0);
+ assert((pos + len) <= stop);
+
+ pos += len; // consume Size field
+ assert((pos + size) <= stop);
+
+ if (id == 0x37) { // CueTrackPosition(s) ID
+ TrackPosition& tp = *p++;
+ tp.Parse(pReader, pos, size);
}
- const long long element_size = stop - element_start;
+ pos += size; // consume payload
+ assert(pos <= stop);
+ }
- long long pos = pos_;
+ assert(size_t(p - m_track_positions) == m_track_positions_count);
- //First count number of track positions
-
- while (pos < stop)
- {
- long len;
-
- const long long id = ReadUInt(pReader, pos, len);
- assert(id >= 0); //TODO
- assert((pos + len) <= stop);
-
- pos += len; //consume ID
-
- const long long size = ReadUInt(pReader, pos, len);
- assert(size >= 0);
- assert((pos + len) <= stop);
-
- pos += len; //consume Size field
- assert((pos + size) <= stop);
-
- if (id == 0x33) //CueTime ID
- m_timecode = UnserializeUInt(pReader, pos, size);
-
- else if (id == 0x37) //CueTrackPosition(s) ID
- ++m_track_positions_count;
-
- pos += size; //consume payload
- assert(pos <= stop);
- }
-
- assert(m_timecode >= 0);
- assert(m_track_positions_count > 0);
-
- //os << "CuePoint::Load(cont'd): idpos=" << idpos
- // << " timecode=" << m_timecode
- // << endl;
-
- m_track_positions = new TrackPosition[m_track_positions_count];
-
- //Now parse track positions
-
- TrackPosition* p = m_track_positions;
- pos = pos_;
-
- while (pos < stop)
- {
- long len;
-
- const long long id = ReadUInt(pReader, pos, len);
- assert(id >= 0); //TODO
- assert((pos + len) <= stop);
-
- pos += len; //consume ID
-
- const long long size = ReadUInt(pReader, pos, len);
- assert(size >= 0);
- assert((pos + len) <= stop);
-
- pos += len; //consume Size field
- assert((pos + size) <= stop);
-
- if (id == 0x37) //CueTrackPosition(s) ID
- {
- TrackPosition& tp = *p++;
- tp.Parse(pReader, pos, size);
- }
-
- pos += size; //consume payload
- assert(pos <= stop);
- }
-
- assert(size_t(p - m_track_positions) == m_track_positions_count);
-
- m_element_start = element_start;
- m_element_size = element_size;
+ m_element_start = element_start;
+ m_element_size = element_size;
}
+void CuePoint::TrackPosition::Parse(IMkvReader* pReader, long long start_,
+ long long size_) {
+ const long long stop = start_ + size_;
+ long long pos = start_;
+ m_track = -1;
+ m_pos = -1;
+ m_block = 1; // default
-void CuePoint::TrackPosition::Parse(
- IMkvReader* pReader,
- long long start_,
- long long size_)
-{
- const long long stop = start_ + size_;
- long long pos = start_;
+ while (pos < stop) {
+ long len;
- m_track = -1;
- m_pos = -1;
- m_block = 1; //default
+ const long long id = ReadUInt(pReader, pos, len);
+ assert(id >= 0); // TODO
+ assert((pos + len) <= stop);
- while (pos < stop)
- {
- long len;
+ pos += len; // consume ID
- const long long id = ReadUInt(pReader, pos, len);
- assert(id >= 0); //TODO
- assert((pos + len) <= stop);
+ const long long size = ReadUInt(pReader, pos, len);
+ assert(size >= 0);
+ assert((pos + len) <= stop);
- pos += len; //consume ID
+ pos += len; // consume Size field
+ assert((pos + size) <= stop);
- const long long size = ReadUInt(pReader, pos, len);
- assert(size >= 0);
- assert((pos + len) <= stop);
+ if (id == 0x77) // CueTrack ID
+ m_track = UnserializeUInt(pReader, pos, size);
- pos += len; //consume Size field
- assert((pos + size) <= stop);
+ else if (id == 0x71) // CueClusterPos ID
+ m_pos = UnserializeUInt(pReader, pos, size);
- if (id == 0x77) //CueTrack ID
- m_track = UnserializeUInt(pReader, pos, size);
+ else if (id == 0x1378) // CueBlockNumber
+ m_block = UnserializeUInt(pReader, pos, size);
- else if (id == 0x71) //CueClusterPos ID
- m_pos = UnserializeUInt(pReader, pos, size);
+ pos += size; // consume payload
+ assert(pos <= stop);
+ }
- else if (id == 0x1378) //CueBlockNumber
- m_block = UnserializeUInt(pReader, pos, size);
-
- pos += size; //consume payload
- assert(pos <= stop);
- }
-
- assert(m_pos >= 0);
- assert(m_track > 0);
- //assert(m_block > 0);
+ assert(m_pos >= 0);
+ assert(m_track > 0);
+ // assert(m_block > 0);
}
+const CuePoint::TrackPosition* CuePoint::Find(const Track* pTrack) const {
+ assert(pTrack);
-const CuePoint::TrackPosition* CuePoint::Find(const Track* pTrack) const
-{
- assert(pTrack);
+ const long long n = pTrack->GetNumber();
- const long long n = pTrack->GetNumber();
+ const TrackPosition* i = m_track_positions;
+ const TrackPosition* const j = i + m_track_positions_count;
- const TrackPosition* i = m_track_positions;
- const TrackPosition* const j = i + m_track_positions_count;
+ while (i != j) {
+ const TrackPosition& p = *i++;
- while (i != j)
- {
- const TrackPosition& p = *i++;
+ if (p.m_track == n)
+ return &p;
+ }
- if (p.m_track == n)
- return &p;
- }
-
- return NULL; //no matching track number found
+ return NULL; // no matching track number found
}
+long long CuePoint::GetTimeCode() const { return m_timecode; }
-long long CuePoint::GetTimeCode() const
-{
- return m_timecode;
+long long CuePoint::GetTime(const Segment* pSegment) const {
+ assert(pSegment);
+ assert(m_timecode >= 0);
+
+ const SegmentInfo* const pInfo = pSegment->GetInfo();
+ assert(pInfo);
+
+ const long long scale = pInfo->GetTimeCodeScale();
+ assert(scale >= 1);
+
+ const long long time = scale * m_timecode;
+
+ return time;
}
-long long CuePoint::GetTime(const Segment* pSegment) const
-{
- assert(pSegment);
- assert(m_timecode >= 0);
-
- const SegmentInfo* const pInfo = pSegment->GetInfo();
- assert(pInfo);
-
- const long long scale = pInfo->GetTimeCodeScale();
- assert(scale >= 1);
-
- const long long time = scale * m_timecode;
-
- return time;
-}
-
-
#if 0
long long Segment::Unparsed() const
{
@@ -3237,808 +2908,745 @@
return result;
}
#else
-bool Segment::DoneParsing() const
-{
- if (m_size < 0)
- {
- long long total, avail;
+bool Segment::DoneParsing() const {
+ if (m_size < 0) {
+ long long total, avail;
- const int status = m_pReader->Length(&total, &avail);
+ const int status = m_pReader->Length(&total, &avail);
- if (status < 0) //error
- return true; //must assume done
+ if (status < 0) // error
+ return true; // must assume done
- if (total < 0)
- return false; //assume live stream
+ if (total < 0)
+ return false; // assume live stream
- return (m_pos >= total);
- }
+ return (m_pos >= total);
+ }
- const long long stop = m_start + m_size;
+ const long long stop = m_start + m_size;
- return (m_pos >= stop);
+ return (m_pos >= stop);
}
#endif
+const Cluster* Segment::GetFirst() const {
+ if ((m_clusters == NULL) || (m_clusterCount <= 0))
+ return &m_eos;
-const Cluster* Segment::GetFirst() const
-{
- if ((m_clusters == NULL) || (m_clusterCount <= 0))
- return &m_eos;
+ Cluster* const pCluster = m_clusters[0];
+ assert(pCluster);
- Cluster* const pCluster = m_clusters[0];
- assert(pCluster);
-
- return pCluster;
+ return pCluster;
}
+const Cluster* Segment::GetLast() const {
+ if ((m_clusters == NULL) || (m_clusterCount <= 0))
+ return &m_eos;
-const Cluster* Segment::GetLast() const
-{
- if ((m_clusters == NULL) || (m_clusterCount <= 0))
- return &m_eos;
+ const long idx = m_clusterCount - 1;
- const long idx = m_clusterCount - 1;
+ Cluster* const pCluster = m_clusters[idx];
+ assert(pCluster);
- Cluster* const pCluster = m_clusters[idx];
- assert(pCluster);
-
- return pCluster;
+ return pCluster;
}
+unsigned long Segment::GetCount() const { return m_clusterCount; }
-unsigned long Segment::GetCount() const
-{
- return m_clusterCount;
-}
+const Cluster* Segment::GetNext(const Cluster* pCurr) {
+ assert(pCurr);
+ assert(pCurr != &m_eos);
+ assert(m_clusters);
+ long idx = pCurr->m_index;
-const Cluster* Segment::GetNext(const Cluster* pCurr)
-{
- assert(pCurr);
- assert(pCurr != &m_eos);
- assert(m_clusters);
+ if (idx >= 0) {
+ assert(m_clusterCount > 0);
+ assert(idx < m_clusterCount);
+ assert(pCurr == m_clusters[idx]);
- long idx = pCurr->m_index;
+ ++idx;
- if (idx >= 0)
- {
- assert(m_clusterCount > 0);
- assert(idx < m_clusterCount);
- assert(pCurr == m_clusters[idx]);
+ if (idx >= m_clusterCount)
+ return &m_eos; // caller will LoadCluster as desired
- ++idx;
-
- if (idx >= m_clusterCount)
- return &m_eos; //caller will LoadCluster as desired
-
- Cluster* const pNext = m_clusters[idx];
- assert(pNext);
- assert(pNext->m_index >= 0);
- assert(pNext->m_index == idx);
-
- return pNext;
- }
-
- assert(m_clusterPreloadCount > 0);
-
- long long pos = pCurr->m_element_start;
-
- assert(m_size >= 0); //TODO
- const long long stop = m_start + m_size; //end of segment
-
- {
- long len;
-
- long long result = GetUIntLength(m_pReader, pos, len);
- assert(result == 0);
- assert((pos + len) <= stop); //TODO
- if (result != 0)
- return NULL;
-
- const long long id = ReadUInt(m_pReader, pos, len);
- assert(id == 0x0F43B675); //Cluster ID
- if (id != 0x0F43B675)
- return NULL;
-
- pos += len; //consume ID
-
- //Read Size
- result = GetUIntLength(m_pReader, pos, len);
- assert(result == 0); //TODO
- assert((pos + len) <= stop); //TODO
-
- const long long size = ReadUInt(m_pReader, pos, len);
- assert(size > 0); //TODO
- //assert((pCurr->m_size <= 0) || (pCurr->m_size == size));
-
- pos += len; //consume length of size of element
- assert((pos + size) <= stop); //TODO
-
- //Pos now points to start of payload
-
- pos += size; //consume payload
- }
-
- long long off_next = 0;
-
- while (pos < stop)
- {
- long len;
-
- long long result = GetUIntLength(m_pReader, pos, len);
- assert(result == 0);
- assert((pos + len) <= stop); //TODO
- if (result != 0)
- return NULL;
-
- const long long idpos = pos; //pos of next (potential) cluster
-
- const long long id = ReadUInt(m_pReader, idpos, len);
- assert(id > 0); //TODO
-
- pos += len; //consume ID
-
- //Read Size
- result = GetUIntLength(m_pReader, pos, len);
- assert(result == 0); //TODO
- assert((pos + len) <= stop); //TODO
-
- const long long size = ReadUInt(m_pReader, pos, len);
- assert(size >= 0); //TODO
-
- pos += len; //consume length of size of element
- assert((pos + size) <= stop); //TODO
-
- //Pos now points to start of payload
-
- if (size == 0) //weird
- continue;
-
- if (id == 0x0F43B675) //Cluster ID
- {
- const long long off_next_ = idpos - m_start;
-
- long long pos_;
- long len_;
-
- const long status = Cluster::HasBlockEntries(
- this,
- off_next_,
- pos_,
- len_);
-
- assert(status >= 0);
-
- if (status > 0)
- {
- off_next = off_next_;
- break;
- }
- }
-
- pos += size; //consume payload
- }
-
- if (off_next <= 0)
- return 0;
-
- Cluster** const ii = m_clusters + m_clusterCount;
- Cluster** i = ii;
-
- Cluster** const jj = ii + m_clusterPreloadCount;
- Cluster** j = jj;
-
- while (i < j)
- {
- //INVARIANT:
- //[0, i) < pos_next
- //[i, j) ?
- //[j, jj) > pos_next
-
- Cluster** const k = i + (j - i) / 2;
- assert(k < jj);
-
- Cluster* const pNext = *k;
- assert(pNext);
- assert(pNext->m_index < 0);
-
- //const long long pos_ = pNext->m_pos;
- //assert(pos_);
- //pos = pos_ * ((pos_ < 0) ? -1 : 1);
-
- pos = pNext->GetPosition();
-
- if (pos < off_next)
- i = k + 1;
- else if (pos > off_next)
- j = k;
- else
- return pNext;
- }
-
- assert(i == j);
-
- Cluster* const pNext = Cluster::Create(this,
- -1,
- off_next);
+ Cluster* const pNext = m_clusters[idx];
assert(pNext);
-
- const ptrdiff_t idx_next = i - m_clusters; //insertion position
-
- PreloadCluster(pNext, idx_next);
- assert(m_clusters);
- assert(idx_next < m_clusterSize);
- assert(m_clusters[idx_next] == pNext);
+ assert(pNext->m_index >= 0);
+ assert(pNext->m_index == idx);
return pNext;
-}
+ }
+ assert(m_clusterPreloadCount > 0);
-long Segment::ParseNext(
- const Cluster* pCurr,
- const Cluster*& pResult,
- long long& pos,
- long& len)
-{
- assert(pCurr);
- assert(!pCurr->EOS());
- assert(m_clusters);
+ long long pos = pCurr->m_element_start;
- pResult = 0;
+ assert(m_size >= 0); // TODO
+ const long long stop = m_start + m_size; // end of segment
- if (pCurr->m_index >= 0) //loaded (not merely preloaded)
- {
- assert(m_clusters[pCurr->m_index] == pCurr);
+ {
+ long len;
- const long next_idx = pCurr->m_index + 1;
+ long long result = GetUIntLength(m_pReader, pos, len);
+ assert(result == 0);
+ assert((pos + len) <= stop); // TODO
+ if (result != 0)
+ return NULL;
- if (next_idx < m_clusterCount)
- {
- pResult = m_clusters[next_idx];
- return 0; //success
- }
+ const long long id = ReadUInt(m_pReader, pos, len);
+ assert(id == 0x0F43B675); // Cluster ID
+ if (id != 0x0F43B675)
+ return NULL;
- //curr cluster is last among loaded
+ pos += len; // consume ID
- const long result = LoadCluster(pos, len);
+ // Read Size
+ result = GetUIntLength(m_pReader, pos, len);
+ assert(result == 0); // TODO
+ assert((pos + len) <= stop); // TODO
- if (result < 0) //error or underflow
- return result;
+ const long long size = ReadUInt(m_pReader, pos, len);
+ assert(size > 0); // TODO
+ // assert((pCurr->m_size <= 0) || (pCurr->m_size == size));
- if (result > 0) //no more clusters
- {
- //pResult = &m_eos;
- return 1;
- }
+ pos += len; // consume length of size of element
+ assert((pos + size) <= stop); // TODO
- pResult = GetLast();
- return 0; //success
+ // Pos now points to start of payload
+
+ pos += size; // consume payload
+ }
+
+ long long off_next = 0;
+
+ while (pos < stop) {
+ long len;
+
+ long long result = GetUIntLength(m_pReader, pos, len);
+ assert(result == 0);
+ assert((pos + len) <= stop); // TODO
+ if (result != 0)
+ return NULL;
+
+ const long long idpos = pos; // pos of next (potential) cluster
+
+ const long long id = ReadUInt(m_pReader, idpos, len);
+ assert(id > 0); // TODO
+
+ pos += len; // consume ID
+
+ // Read Size
+ result = GetUIntLength(m_pReader, pos, len);
+ assert(result == 0); // TODO
+ assert((pos + len) <= stop); // TODO
+
+ const long long size = ReadUInt(m_pReader, pos, len);
+ assert(size >= 0); // TODO
+
+ pos += len; // consume length of size of element
+ assert((pos + size) <= stop); // TODO
+
+ // Pos now points to start of payload
+
+ if (size == 0) // weird
+ continue;
+
+ if (id == 0x0F43B675) { // Cluster ID
+ const long long off_next_ = idpos - m_start;
+
+ long long pos_;
+ long len_;
+
+ const long status = Cluster::HasBlockEntries(this, off_next_, pos_, len_);
+
+ assert(status >= 0);
+
+ if (status > 0) {
+ off_next = off_next_;
+ break;
+ }
}
- assert(m_pos > 0);
+ pos += size; // consume payload
+ }
- long long total, avail;
+ if (off_next <= 0)
+ return 0;
- long status = m_pReader->Length(&total, &avail);
+ Cluster** const ii = m_clusters + m_clusterCount;
+ Cluster** i = ii;
- if (status < 0) //error
- return status;
+ Cluster** const jj = ii + m_clusterPreloadCount;
+ Cluster** j = jj;
- assert((total < 0) || (avail <= total));
+ while (i < j) {
+ // INVARIANT:
+ //[0, i) < pos_next
+ //[i, j) ?
+ //[j, jj) > pos_next
- const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
+ Cluster** const k = i + (j - i) / 2;
+ assert(k < jj);
- //interrogate curr cluster
+ Cluster* const pNext = *k;
+ assert(pNext);
+ assert(pNext->m_index < 0);
- pos = pCurr->m_element_start;
+ // const long long pos_ = pNext->m_pos;
+ // assert(pos_);
+ // pos = pos_ * ((pos_ < 0) ? -1 : 1);
- if (pCurr->m_element_size >= 0)
- pos += pCurr->m_element_size;
+ pos = pNext->GetPosition();
+
+ if (pos < off_next)
+ i = k + 1;
+ else if (pos > off_next)
+ j = k;
else
- {
- if ((pos + 1) > avail)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
+ return pNext;
+ }
- long long result = GetUIntLength(m_pReader, pos, len);
+ assert(i == j);
- if (result < 0) //error
- return static_cast<long>(result);
+ Cluster* const pNext = Cluster::Create(this, -1, off_next);
+ assert(pNext);
- if (result > 0) //weird
- return E_BUFFER_NOT_FULL;
+ const ptrdiff_t idx_next = i - m_clusters; // insertion position
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
+ PreloadCluster(pNext, idx_next);
+ assert(m_clusters);
+ assert(idx_next < m_clusterSize);
+ assert(m_clusters[idx_next] == pNext);
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long id = ReadUInt(m_pReader, pos, len);
-
- if (id != 0x0F43B675) //weird: not Cluster ID
- return -1;
-
- pos += len; //consume ID
-
- //Read Size
-
- if ((pos + 1) > avail)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- result = GetUIntLength(m_pReader, pos, len);
-
- if (result < 0) //error
- return static_cast<long>(result);
-
- if (result > 0) //weird
- return E_BUFFER_NOT_FULL;
-
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long size = ReadUInt(m_pReader, pos, len);
-
- if (size < 0) //error
- return static_cast<long>(size);
-
- pos += len; //consume size field
-
- const long long unknown_size = (1LL << (7 * len)) - 1;
-
- if (size == unknown_size) //TODO: should never happen
- return E_FILE_FORMAT_INVALID; //TODO: resolve this
-
- //assert((pCurr->m_size <= 0) || (pCurr->m_size == size));
-
- if ((segment_stop >= 0) && ((pos + size) > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- //Pos now points to start of payload
-
- pos += size; //consume payload (that is, the current cluster)
- assert((segment_stop < 0) || (pos <= segment_stop));
-
- //By consuming the payload, we are assuming that the curr
- //cluster isn't interesting. That is, we don't bother checking
- //whether the payload of the curr cluster is less than what
- //happens to be available (obtained via IMkvReader::Length).
- //Presumably the caller has already dispensed with the current
- //cluster, and really does want the next cluster.
- }
-
- //pos now points to just beyond the last fully-loaded cluster
-
- for (;;)
- {
- const long status = DoParseNext(pResult, pos, len);
-
- if (status <= 1)
- return status;
- }
+ return pNext;
}
+long Segment::ParseNext(const Cluster* pCurr, const Cluster*& pResult,
+ long long& pos, long& len) {
+ assert(pCurr);
+ assert(!pCurr->EOS());
+ assert(m_clusters);
-long Segment::DoParseNext(
- const Cluster*& pResult,
- long long& pos,
- long& len)
-{
- long long total, avail;
+ pResult = 0;
- long status = m_pReader->Length(&total, &avail);
+ if (pCurr->m_index >= 0) { // loaded (not merely preloaded)
+ assert(m_clusters[pCurr->m_index] == pCurr);
- if (status < 0) //error
- return status;
+ const long next_idx = pCurr->m_index + 1;
- assert((total < 0) || (avail <= total));
+ if (next_idx < m_clusterCount) {
+ pResult = m_clusters[next_idx];
+ return 0; // success
+ }
- const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
+ // curr cluster is last among loaded
- //Parse next cluster. This is strictly a parsing activity.
- //Creation of a new cluster object happens later, after the
- //parsing is done.
+ const long result = LoadCluster(pos, len);
- long long off_next = 0;
- long long cluster_size = -1;
+ if (result < 0) // error or underflow
+ return result;
- for (;;)
+ if (result > 0) // no more clusters
{
- if ((total >= 0) && (pos >= total))
- return 1; //EOF
+ // pResult = &m_eos;
+ return 1;
+ }
- if ((segment_stop >= 0) && (pos >= segment_stop))
- return 1; //EOF
+ pResult = GetLast();
+ return 0; // success
+ }
- if ((pos + 1) > avail)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
+ assert(m_pos > 0);
- long long result = GetUIntLength(m_pReader, pos, len);
+ long long total, avail;
- if (result < 0) //error
- return static_cast<long>(result);
+ long status = m_pReader->Length(&total, &avail);
- if (result > 0) //weird
- return E_BUFFER_NOT_FULL;
+ if (status < 0) // error
+ return status;
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
+ assert((total < 0) || (avail <= total));
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
+ const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
- const long long idpos = pos; //absolute
- const long long idoff = pos - m_start; //relative
+ // interrogate curr cluster
- const long long id = ReadUInt(m_pReader, idpos, len); //absolute
+ pos = pCurr->m_element_start;
- if (id < 0) //error
- return static_cast<long>(id);
+ if (pCurr->m_element_size >= 0)
+ pos += pCurr->m_element_size;
+ else {
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
+ }
- if (id == 0) //weird
- return -1; //generic error
+ long long result = GetUIntLength(m_pReader, pos, len);
- pos += len; //consume ID
+ if (result < 0) // error
+ return static_cast<long>(result);
- //Read Size
+ if (result > 0) // weird
+ return E_BUFFER_NOT_FULL;
- if ((pos + 1) > avail)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
+ if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+ return E_FILE_FORMAT_INVALID;
- result = GetUIntLength(m_pReader, pos, len);
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
- if (result < 0) //error
- return static_cast<long>(result);
+ const long long id = ReadUInt(m_pReader, pos, len);
- if (result > 0) //weird
- return E_BUFFER_NOT_FULL;
+ if (id != 0x0F43B675) // weird: not Cluster ID
+ return -1;
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
+ pos += len; // consume ID
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
+ // Read Size
- const long long size = ReadUInt(m_pReader, pos, len);
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
+ }
- if (size < 0) //error
- return static_cast<long>(size);
+ result = GetUIntLength(m_pReader, pos, len);
- pos += len; //consume length of size of element
+ if (result < 0) // error
+ return static_cast<long>(result);
- //Pos now points to start of payload
+ if (result > 0) // weird
+ return E_BUFFER_NOT_FULL;
- if (size == 0) //weird
- continue;
+ if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+ return E_FILE_FORMAT_INVALID;
- const long long unknown_size = (1LL << (7 * len)) - 1;
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
- if ((segment_stop >= 0) &&
- (size != unknown_size) &&
- ((pos + size) > segment_stop))
- {
- return E_FILE_FORMAT_INVALID;
- }
+ const long long size = ReadUInt(m_pReader, pos, len);
- if (id == 0x0C53BB6B) //Cues ID
- {
- if (size == unknown_size)
- return E_FILE_FORMAT_INVALID;
+ if (size < 0) // error
+ return static_cast<long>(size);
- const long long element_stop = pos + size;
+ pos += len; // consume size field
- if ((segment_stop >= 0) && (element_stop > segment_stop))
- return E_FILE_FORMAT_INVALID;
+ const long long unknown_size = (1LL << (7 * len)) - 1;
- const long long element_start = idpos;
- const long long element_size = element_stop - element_start;
+ if (size == unknown_size) // TODO: should never happen
+ return E_FILE_FORMAT_INVALID; // TODO: resolve this
- if (m_pCues == NULL)
- {
- m_pCues = new Cues(this,
- pos,
- size,
- element_start,
- element_size);
- assert(m_pCues); //TODO
- }
+ // assert((pCurr->m_size <= 0) || (pCurr->m_size == size));
- pos += size; //consume payload
- assert((segment_stop < 0) || (pos <= segment_stop));
+ if ((segment_stop >= 0) && ((pos + size) > segment_stop))
+ return E_FILE_FORMAT_INVALID;
- continue;
- }
+ // Pos now points to start of payload
- if (id != 0x0F43B675) //not a Cluster ID
- {
- if (size == unknown_size)
- return E_FILE_FORMAT_INVALID;
+ pos += size; // consume payload (that is, the current cluster)
+ assert((segment_stop < 0) || (pos <= segment_stop));
- pos += size; //consume payload
- assert((segment_stop < 0) || (pos <= segment_stop));
+ // By consuming the payload, we are assuming that the curr
+ // cluster isn't interesting. That is, we don't bother checking
+ // whether the payload of the curr cluster is less than what
+ // happens to be available (obtained via IMkvReader::Length).
+ // Presumably the caller has already dispensed with the current
+ // cluster, and really does want the next cluster.
+ }
- continue;
- }
+ // pos now points to just beyond the last fully-loaded cluster
-#if 0 //this is commented-out to support incremental cluster parsing
+ for (;;) {
+ const long status = DoParseNext(pResult, pos, len);
+
+ if (status <= 1)
+ return status;
+ }
+}
+
+long Segment::DoParseNext(const Cluster*& pResult, long long& pos, long& len) {
+ long long total, avail;
+
+ long status = m_pReader->Length(&total, &avail);
+
+ if (status < 0) // error
+ return status;
+
+ assert((total < 0) || (avail <= total));
+
+ const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
+
+ // Parse next cluster. This is strictly a parsing activity.
+ // Creation of a new cluster object happens later, after the
+ // parsing is done.
+
+ long long off_next = 0;
+ long long cluster_size = -1;
+
+ for (;;) {
+ if ((total >= 0) && (pos >= total))
+ return 1; // EOF
+
+ if ((segment_stop >= 0) && (pos >= segment_stop))
+ return 1; // EOF
+
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
+ }
+
+ long long result = GetUIntLength(m_pReader, pos, len);
+
+ if (result < 0) // error
+ return static_cast<long>(result);
+
+ if (result > 0) // weird
+ return E_BUFFER_NOT_FULL;
+
+ if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+ return E_FILE_FORMAT_INVALID;
+
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
+
+ const long long idpos = pos; // absolute
+ const long long idoff = pos - m_start; // relative
+
+ const long long id = ReadUInt(m_pReader, idpos, len); // absolute
+
+ if (id < 0) // error
+ return static_cast<long>(id);
+
+ if (id == 0) // weird
+ return -1; // generic error
+
+ pos += len; // consume ID
+
+ // Read Size
+
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
+ }
+
+ result = GetUIntLength(m_pReader, pos, len);
+
+ if (result < 0) // error
+ return static_cast<long>(result);
+
+ if (result > 0) // weird
+ return E_BUFFER_NOT_FULL;
+
+ if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+ return E_FILE_FORMAT_INVALID;
+
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
+
+ const long long size = ReadUInt(m_pReader, pos, len);
+
+ if (size < 0) // error
+ return static_cast<long>(size);
+
+ pos += len; // consume length of size of element
+
+ // Pos now points to start of payload
+
+ if (size == 0) // weird
+ continue;
+
+ const long long unknown_size = (1LL << (7 * len)) - 1;
+
+ if ((segment_stop >= 0) && (size != unknown_size) &&
+ ((pos + size) > segment_stop)) {
+ return E_FILE_FORMAT_INVALID;
+ }
+
+ if (id == 0x0C53BB6B) { // Cues ID
+ if (size == unknown_size)
+ return E_FILE_FORMAT_INVALID;
+
+ const long long element_stop = pos + size;
+
+ if ((segment_stop >= 0) && (element_stop > segment_stop))
+ return E_FILE_FORMAT_INVALID;
+
+ const long long element_start = idpos;
+ const long long element_size = element_stop - element_start;
+
+ if (m_pCues == NULL) {
+ m_pCues = new Cues(this, pos, size, element_start, element_size);
+ assert(m_pCues); // TODO
+ }
+
+ pos += size; // consume payload
+ assert((segment_stop < 0) || (pos <= segment_stop));
+
+ continue;
+ }
+
+ if (id != 0x0F43B675) { // not a Cluster ID
+ if (size == unknown_size)
+ return E_FILE_FORMAT_INVALID;
+
+ pos += size; // consume payload
+ assert((segment_stop < 0) || (pos <= segment_stop));
+
+ continue;
+ }
+
+#if 0 // this is commented-out to support incremental cluster parsing
len = static_cast<long>(size);
if (element_stop > avail)
return E_BUFFER_NOT_FULL;
#endif
- //We have a cluster.
+ // We have a cluster.
- off_next = idoff;
+ off_next = idoff;
- if (size != unknown_size)
- cluster_size = size;
+ if (size != unknown_size)
+ cluster_size = size;
+ break;
+ }
+
+ assert(off_next > 0); // have cluster
+
+ // We have parsed the next cluster.
+ // We have not created a cluster object yet. What we need
+ // to do now is determine whether it has already be preloaded
+ //(in which case, an object for this cluster has already been
+ // created), and if not, create a new cluster object.
+
+ Cluster** const ii = m_clusters + m_clusterCount;
+ Cluster** i = ii;
+
+ Cluster** const jj = ii + m_clusterPreloadCount;
+ Cluster** j = jj;
+
+ while (i < j) {
+ // INVARIANT:
+ //[0, i) < pos_next
+ //[i, j) ?
+ //[j, jj) > pos_next
+
+ Cluster** const k = i + (j - i) / 2;
+ assert(k < jj);
+
+ const Cluster* const pNext = *k;
+ assert(pNext);
+ assert(pNext->m_index < 0);
+
+ pos = pNext->GetPosition();
+ assert(pos >= 0);
+
+ if (pos < off_next)
+ i = k + 1;
+ else if (pos > off_next)
+ j = k;
+ else {
+ pResult = pNext;
+ return 0; // success
+ }
+ }
+
+ assert(i == j);
+
+ long long pos_;
+ long len_;
+
+ status = Cluster::HasBlockEntries(this, off_next, pos_, len_);
+
+ if (status < 0) { // error or underflow
+ pos = pos_;
+ len = len_;
+
+ return status;
+ }
+
+ if (status > 0) { // means "found at least one block entry"
+ Cluster* const pNext = Cluster::Create(this,
+ -1, // preloaded
+ off_next);
+ // element_size);
+ assert(pNext);
+
+ const ptrdiff_t idx_next = i - m_clusters; // insertion position
+
+ PreloadCluster(pNext, idx_next);
+ assert(m_clusters);
+ assert(idx_next < m_clusterSize);
+ assert(m_clusters[idx_next] == pNext);
+
+ pResult = pNext;
+ return 0; // success
+ }
+
+ // status == 0 means "no block entries found"
+
+ if (cluster_size < 0) { // unknown size
+ const long long payload_pos = pos; // absolute pos of cluster payload
+
+ for (;;) { // determine cluster size
+ if ((total >= 0) && (pos >= total))
break;
- }
- assert(off_next > 0); //have cluster
+ if ((segment_stop >= 0) && (pos >= segment_stop))
+ break; // no more clusters
- //We have parsed the next cluster.
- //We have not created a cluster object yet. What we need
- //to do now is determine whether it has already be preloaded
- //(in which case, an object for this cluster has already been
- //created), and if not, create a new cluster object.
+ // Read ID
- Cluster** const ii = m_clusters + m_clusterCount;
- Cluster** i = ii;
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
+ }
- Cluster** const jj = ii + m_clusterPreloadCount;
- Cluster** j = jj;
+ long long result = GetUIntLength(m_pReader, pos, len);
- while (i < j)
- {
- //INVARIANT:
- //[0, i) < pos_next
- //[i, j) ?
- //[j, jj) > pos_next
+ if (result < 0) // error
+ return static_cast<long>(result);
- Cluster** const k = i + (j - i) / 2;
- assert(k < jj);
+ if (result > 0) // weird
+ return E_BUFFER_NOT_FULL;
- const Cluster* const pNext = *k;
- assert(pNext);
- assert(pNext->m_index < 0);
+ if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+ return E_FILE_FORMAT_INVALID;
- pos = pNext->GetPosition();
- assert(pos >= 0);
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
- if (pos < off_next)
- i = k + 1;
- else if (pos > off_next)
- j = k;
- else
- {
- pResult = pNext;
- return 0; //success
- }
- }
+ const long long idpos = pos;
+ const long long id = ReadUInt(m_pReader, idpos, len);
- assert(i == j);
+ if (id < 0) // error (or underflow)
+ return static_cast<long>(id);
- long long pos_;
- long len_;
+ // This is the distinguished set of ID's we use to determine
+ // that we have exhausted the sub-element's inside the cluster
+ // whose ID we parsed earlier.
- status = Cluster::HasBlockEntries(this, off_next, pos_, len_);
+ if (id == 0x0F43B675) // Cluster ID
+ break;
- if (status < 0) //error or underflow
- {
- pos = pos_;
- len = len_;
+ if (id == 0x0C53BB6B) // Cues ID
+ break;
- return status;
- }
+ pos += len; // consume ID (of sub-element)
- if (status > 0) //means "found at least one block entry"
- {
- Cluster* const pNext = Cluster::Create(this,
- -1, //preloaded
- off_next);
- //element_size);
- assert(pNext);
+ // Read Size
- const ptrdiff_t idx_next = i - m_clusters; //insertion position
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
+ }
- PreloadCluster(pNext, idx_next);
- assert(m_clusters);
- assert(idx_next < m_clusterSize);
- assert(m_clusters[idx_next] == pNext);
+ result = GetUIntLength(m_pReader, pos, len);
- pResult = pNext;
- return 0; //success
- }
+ if (result < 0) // error
+ return static_cast<long>(result);
- //status == 0 means "no block entries found"
+ if (result > 0) // weird
+ return E_BUFFER_NOT_FULL;
- if (cluster_size < 0) //unknown size
- {
- const long long payload_pos = pos; //absolute pos of cluster payload
+ if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+ return E_FILE_FORMAT_INVALID;
- for (;;) //determine cluster size
- {
- if ((total >= 0) && (pos >= total))
- break;
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
- if ((segment_stop >= 0) && (pos >= segment_stop))
- break; //no more clusters
+ const long long size = ReadUInt(m_pReader, pos, len);
- //Read ID
+ if (size < 0) // error
+ return static_cast<long>(size);
- if ((pos + 1) > avail)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
+ pos += len; // consume size field of element
- long long result = GetUIntLength(m_pReader, pos, len);
+ // pos now points to start of sub-element's payload
- if (result < 0) //error
- return static_cast<long>(result);
+ if (size == 0) // weird
+ continue;
- if (result > 0) //weird
- return E_BUFFER_NOT_FULL;
+ const long long unknown_size = (1LL << (7 * len)) - 1;
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
+ if (size == unknown_size)
+ return E_FILE_FORMAT_INVALID; // not allowed for sub-elements
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
+ if ((segment_stop >= 0) && ((pos + size) > segment_stop)) // weird
+ return E_FILE_FORMAT_INVALID;
- const long long idpos = pos;
- const long long id = ReadUInt(m_pReader, idpos, len);
+ pos += size; // consume payload of sub-element
+ assert((segment_stop < 0) || (pos <= segment_stop));
+ } // determine cluster size
- if (id < 0) //error (or underflow)
- return static_cast<long>(id);
+ cluster_size = pos - payload_pos;
+ assert(cluster_size >= 0); // TODO: handle cluster_size = 0
- //This is the distinguished set of ID's we use to determine
- //that we have exhausted the sub-element's inside the cluster
- //whose ID we parsed earlier.
+ pos = payload_pos; // reset and re-parse original cluster
+ }
- if (id == 0x0F43B675) //Cluster ID
- break;
+ pos += cluster_size; // consume payload
+ assert((segment_stop < 0) || (pos <= segment_stop));
- if (id == 0x0C53BB6B) //Cues ID
- break;
-
- pos += len; //consume ID (of sub-element)
-
- //Read Size
-
- if ((pos + 1) > avail)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- result = GetUIntLength(m_pReader, pos, len);
-
- if (result < 0) //error
- return static_cast<long>(result);
-
- if (result > 0) //weird
- return E_BUFFER_NOT_FULL;
-
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long size = ReadUInt(m_pReader, pos, len);
-
- if (size < 0) //error
- return static_cast<long>(size);
-
- pos += len; //consume size field of element
-
- //pos now points to start of sub-element's payload
-
- if (size == 0) //weird
- continue;
-
- const long long unknown_size = (1LL << (7 * len)) - 1;
-
- if (size == unknown_size)
- return E_FILE_FORMAT_INVALID; //not allowed for sub-elements
-
- if ((segment_stop >= 0) && ((pos + size) > segment_stop)) //weird
- return E_FILE_FORMAT_INVALID;
-
- pos += size; //consume payload of sub-element
- assert((segment_stop < 0) || (pos <= segment_stop));
- } //determine cluster size
-
- cluster_size = pos - payload_pos;
- assert(cluster_size >= 0); //TODO: handle cluster_size = 0
-
- pos = payload_pos; //reset and re-parse original cluster
- }
-
- pos += cluster_size; //consume payload
- assert((segment_stop < 0) || (pos <= segment_stop));
-
- return 2; //try to find a cluster that follows next
+ return 2; // try to find a cluster that follows next
}
+const Cluster* Segment::FindCluster(long long time_ns) const {
+ if ((m_clusters == NULL) || (m_clusterCount <= 0))
+ return &m_eos;
-const Cluster* Segment::FindCluster(long long time_ns) const
-{
- if ((m_clusters == NULL) || (m_clusterCount <= 0))
- return &m_eos;
+ {
+ Cluster* const pCluster = m_clusters[0];
+ assert(pCluster);
+ assert(pCluster->m_index == 0);
- {
- Cluster* const pCluster = m_clusters[0];
- assert(pCluster);
- assert(pCluster->m_index == 0);
+ if (time_ns <= pCluster->GetTime())
+ return pCluster;
+ }
- if (time_ns <= pCluster->GetTime())
- return pCluster;
- }
+ // Binary search of cluster array
- //Binary search of cluster array
+ long i = 0;
+ long j = m_clusterCount;
- long i = 0;
- long j = m_clusterCount;
+ while (i < j) {
+ // INVARIANT:
+ //[0, i) <= time_ns
+ //[i, j) ?
+ //[j, m_clusterCount) > time_ns
- while (i < j)
- {
- //INVARIANT:
- //[0, i) <= time_ns
- //[i, j) ?
- //[j, m_clusterCount) > time_ns
-
- const long k = i + (j - i) / 2;
- assert(k < m_clusterCount);
-
- Cluster* const pCluster = m_clusters[k];
- assert(pCluster);
- assert(pCluster->m_index == k);
-
- const long long t = pCluster->GetTime();
-
- if (t <= time_ns)
- i = k + 1;
- else
- j = k;
-
- assert(i <= j);
- }
-
- assert(i == j);
- assert(i > 0);
- assert(i <= m_clusterCount);
-
- const long k = i - 1;
+ const long k = i + (j - i) / 2;
+ assert(k < m_clusterCount);
Cluster* const pCluster = m_clusters[k];
assert(pCluster);
assert(pCluster->m_index == k);
- assert(pCluster->GetTime() <= time_ns);
- return pCluster;
+ const long long t = pCluster->GetTime();
+
+ if (t <= time_ns)
+ i = k + 1;
+ else
+ j = k;
+
+ assert(i <= j);
+ }
+
+ assert(i == j);
+ assert(i > 0);
+ assert(i <= m_clusterCount);
+
+ const long k = i - 1;
+
+ Cluster* const pCluster = m_clusters[k];
+ assert(pCluster);
+ assert(pCluster->m_index == k);
+ assert(pCluster->GetTime() <= time_ns);
+
+ return pCluster;
}
-
#if 0
const BlockEntry* Segment::Seek(
long long time_ns,
@@ -4064,8 +3672,7 @@
Cluster** const j = i + m_clusterCount;
- if (pTrack->GetType() == 2) //audio
- {
+ if (pTrack->GetType() == 2) { //audio
//TODO: we could decide to use cues for this, as we do for video.
//But we only use it for video because looking around for a keyframe
//can get expensive. Audio doesn't require anything special so a
@@ -4184,7 +3791,6 @@
}
#endif
-
#if 0
bool Segment::SearchCues(
long long time_ns,
@@ -4215,844 +3821,592 @@
}
#endif
+const Tracks* Segment::GetTracks() const { return m_pTracks; }
-const Tracks* Segment::GetTracks() const
-{
- return m_pTracks;
+const SegmentInfo* Segment::GetInfo() const { return m_pInfo; }
+
+const Cues* Segment::GetCues() const { return m_pCues; }
+
+const Chapters* Segment::GetChapters() const { return m_pChapters; }
+
+const SeekHead* Segment::GetSeekHead() const { return m_pSeekHead; }
+
+long long Segment::GetDuration() const {
+ assert(m_pInfo);
+ return m_pInfo->GetDuration();
}
+Chapters::Chapters(Segment* pSegment, long long payload_start,
+ long long payload_size, long long element_start,
+ long long element_size)
+ : m_pSegment(pSegment),
+ m_start(payload_start),
+ m_size(payload_size),
+ m_element_start(element_start),
+ m_element_size(element_size),
+ m_editions(NULL),
+ m_editions_size(0),
+ m_editions_count(0) {}
-const SegmentInfo* Segment::GetInfo() const
-{
- return m_pInfo;
+Chapters::~Chapters() {
+ while (m_editions_count > 0) {
+ Edition& e = m_editions[--m_editions_count];
+ e.Clear();
+ }
}
+long Chapters::Parse() {
+ IMkvReader* const pReader = m_pSegment->m_pReader;
-const Cues* Segment::GetCues() const
-{
- return m_pCues;
-}
+ long long pos = m_start; // payload start
+ const long long stop = pos + m_size; // payload stop
+ while (pos < stop) {
+ long long id, size;
-const Chapters* Segment::GetChapters() const
-{
- return m_pChapters;
-}
+ long status = ParseElementHeader(pReader, pos, stop, id, size);
+ if (status < 0) // error
+ return status;
-const SeekHead* Segment::GetSeekHead() const
-{
- return m_pSeekHead;
-}
+ if (size == 0) // weird
+ continue;
+ if (id == 0x05B9) { // EditionEntry ID
+ status = ParseEdition(pos, size);
-long long Segment::GetDuration() const
-{
- assert(m_pInfo);
- return m_pInfo->GetDuration();
-}
-
-
-Chapters::Chapters(
- Segment* pSegment,
- long long payload_start,
- long long payload_size,
- long long element_start,
- long long element_size) :
- m_pSegment(pSegment),
- m_start(payload_start),
- m_size(payload_size),
- m_element_start(element_start),
- m_element_size(element_size),
- m_editions(NULL),
- m_editions_size(0),
- m_editions_count(0)
-{
-}
-
-
-Chapters::~Chapters()
-{
- while (m_editions_count > 0)
- {
- Edition& e = m_editions[--m_editions_count];
- e.Clear();
- }
-}
-
-
-long Chapters::Parse()
-{
- IMkvReader* const pReader = m_pSegment->m_pReader;
-
- long long pos = m_start; // payload start
- const long long stop = pos + m_size; // payload stop
-
- while (pos < stop)
- {
- long long id, size;
-
- long status = ParseElementHeader(
- pReader,
- pos,
- stop,
- id,
- size);
-
- if (status < 0) // error
- return status;
-
- if (size == 0) // weird
- continue;
-
- if (id == 0x05B9) // EditionEntry ID
- {
- status = ParseEdition(pos, size);
-
- if (status < 0) // error
- return status;
- }
-
- pos += size;
- assert(pos <= stop);
+ if (status < 0) // error
+ return status;
}
- assert(pos == stop);
- return 0;
+ pos += size;
+ assert(pos <= stop);
+ }
+
+ assert(pos == stop);
+ return 0;
}
+int Chapters::GetEditionCount() const { return m_editions_count; }
-int Chapters::GetEditionCount() const
-{
- return m_editions_count;
+const Chapters::Edition* Chapters::GetEdition(int idx) const {
+ if (idx < 0)
+ return NULL;
+
+ if (idx >= m_editions_count)
+ return NULL;
+
+ return m_editions + idx;
}
+bool Chapters::ExpandEditionsArray() {
+ if (m_editions_size > m_editions_count)
+ return true; // nothing else to do
-const Chapters::Edition* Chapters::GetEdition(int idx) const
-{
- if (idx < 0)
- return NULL;
+ const int size = (m_editions_size == 0) ? 1 : 2 * m_editions_size;
- if (idx >= m_editions_count)
- return NULL;
+ Edition* const editions = new (std::nothrow) Edition[size];
- return m_editions + idx;
+ if (editions == NULL)
+ return false;
+
+ for (int idx = 0; idx < m_editions_count; ++idx) {
+ m_editions[idx].ShallowCopy(editions[idx]);
+ }
+
+ delete[] m_editions;
+ m_editions = editions;
+
+ m_editions_size = size;
+ return true;
}
+long Chapters::ParseEdition(long long pos, long long size) {
+ if (!ExpandEditionsArray())
+ return -1;
-bool Chapters::ExpandEditionsArray()
-{
- if (m_editions_size > m_editions_count)
- return true; // nothing else to do
+ Edition& e = m_editions[m_editions_count++];
+ e.Init();
- const int size = (m_editions_size == 0) ? 1 : 2 * m_editions_size;
+ return e.Parse(m_pSegment->m_pReader, pos, size);
+}
- Edition* const editions = new (std::nothrow) Edition[size];
+Chapters::Edition::Edition() {}
- if (editions == NULL)
- return false;
+Chapters::Edition::~Edition() {}
- for (int idx = 0; idx < m_editions_count; ++idx)
- {
- m_editions[idx].ShallowCopy(editions[idx]);
+int Chapters::Edition::GetAtomCount() const { return m_atoms_count; }
+
+const Chapters::Atom* Chapters::Edition::GetAtom(int index) const {
+ if (index < 0)
+ return NULL;
+
+ if (index >= m_atoms_count)
+ return NULL;
+
+ return m_atoms + index;
+}
+
+void Chapters::Edition::Init() {
+ m_atoms = NULL;
+ m_atoms_size = 0;
+ m_atoms_count = 0;
+}
+
+void Chapters::Edition::ShallowCopy(Edition& rhs) const {
+ rhs.m_atoms = m_atoms;
+ rhs.m_atoms_size = m_atoms_size;
+ rhs.m_atoms_count = m_atoms_count;
+}
+
+void Chapters::Edition::Clear() {
+ while (m_atoms_count > 0) {
+ Atom& a = m_atoms[--m_atoms_count];
+ a.Clear();
+ }
+
+ delete[] m_atoms;
+ m_atoms = NULL;
+
+ m_atoms_size = 0;
+}
+
+long Chapters::Edition::Parse(IMkvReader* pReader, long long pos,
+ long long size) {
+ const long long stop = pos + size;
+
+ while (pos < stop) {
+ long long id, size;
+
+ long status = ParseElementHeader(pReader, pos, stop, id, size);
+
+ if (status < 0) // error
+ return status;
+
+ if (size == 0) // weird
+ continue;
+
+ if (id == 0x36) { // Atom ID
+ status = ParseAtom(pReader, pos, size);
+
+ if (status < 0) // error
+ return status;
}
- delete[] m_editions;
- m_editions = editions;
+ pos += size;
+ assert(pos <= stop);
+ }
- m_editions_size = size;
- return true;
+ assert(pos == stop);
+ return 0;
}
+long Chapters::Edition::ParseAtom(IMkvReader* pReader, long long pos,
+ long long size) {
+ if (!ExpandAtomsArray())
+ return -1;
-long Chapters::ParseEdition(
- long long pos,
- long long size)
-{
- if (!ExpandEditionsArray())
- return -1;
+ Atom& a = m_atoms[m_atoms_count++];
+ a.Init();
- Edition& e = m_editions[m_editions_count++];
- e.Init();
-
- return e.Parse(m_pSegment->m_pReader, pos, size);
+ return a.Parse(pReader, pos, size);
}
+bool Chapters::Edition::ExpandAtomsArray() {
+ if (m_atoms_size > m_atoms_count)
+ return true; // nothing else to do
-Chapters::Edition::Edition()
-{
+ const int size = (m_atoms_size == 0) ? 1 : 2 * m_atoms_size;
+
+ Atom* const atoms = new (std::nothrow) Atom[size];
+
+ if (atoms == NULL)
+ return false;
+
+ for (int idx = 0; idx < m_atoms_count; ++idx) {
+ m_atoms[idx].ShallowCopy(atoms[idx]);
+ }
+
+ delete[] m_atoms;
+ m_atoms = atoms;
+
+ m_atoms_size = size;
+ return true;
}
+Chapters::Atom::Atom() {}
-Chapters::Edition::~Edition()
-{
+Chapters::Atom::~Atom() {}
+
+unsigned long long Chapters::Atom::GetUID() const { return m_uid; }
+
+const char* Chapters::Atom::GetStringUID() const { return m_string_uid; }
+
+long long Chapters::Atom::GetStartTimecode() const { return m_start_timecode; }
+
+long long Chapters::Atom::GetStopTimecode() const { return m_stop_timecode; }
+
+long long Chapters::Atom::GetStartTime(const Chapters* pChapters) const {
+ return GetTime(pChapters, m_start_timecode);
}
-
-int Chapters::Edition::GetAtomCount() const
-{
- return m_atoms_count;
+long long Chapters::Atom::GetStopTime(const Chapters* pChapters) const {
+ return GetTime(pChapters, m_stop_timecode);
}
+int Chapters::Atom::GetDisplayCount() const { return m_displays_count; }
-const Chapters::Atom* Chapters::Edition::GetAtom(int index) const
-{
- if (index < 0)
- return NULL;
+const Chapters::Display* Chapters::Atom::GetDisplay(int index) const {
+ if (index < 0)
+ return NULL;
- if (index >= m_atoms_count)
- return NULL;
+ if (index >= m_displays_count)
+ return NULL;
- return m_atoms + index;
+ return m_displays + index;
}
+void Chapters::Atom::Init() {
+ m_string_uid = NULL;
+ m_uid = 0;
+ m_start_timecode = -1;
+ m_stop_timecode = -1;
-void Chapters::Edition::Init()
-{
- m_atoms = NULL;
- m_atoms_size = 0;
- m_atoms_count = 0;
+ m_displays = NULL;
+ m_displays_size = 0;
+ m_displays_count = 0;
}
+void Chapters::Atom::ShallowCopy(Atom& rhs) const {
+ rhs.m_string_uid = m_string_uid;
+ rhs.m_uid = m_uid;
+ rhs.m_start_timecode = m_start_timecode;
+ rhs.m_stop_timecode = m_stop_timecode;
-void Chapters::Edition::ShallowCopy(Edition& rhs) const
-{
- rhs.m_atoms = m_atoms;
- rhs.m_atoms_size = m_atoms_size;
- rhs.m_atoms_count = m_atoms_count;
+ rhs.m_displays = m_displays;
+ rhs.m_displays_size = m_displays_size;
+ rhs.m_displays_count = m_displays_count;
}
+void Chapters::Atom::Clear() {
+ delete[] m_string_uid;
+ m_string_uid = NULL;
-void Chapters::Edition::Clear()
-{
- while (m_atoms_count > 0)
- {
- Atom& a = m_atoms[--m_atoms_count];
- a.Clear();
+ while (m_displays_count > 0) {
+ Display& d = m_displays[--m_displays_count];
+ d.Clear();
+ }
+
+ delete[] m_displays;
+ m_displays = NULL;
+
+ m_displays_size = 0;
+}
+
+long Chapters::Atom::Parse(IMkvReader* pReader, long long pos, long long size) {
+ const long long stop = pos + size;
+
+ while (pos < stop) {
+ long long id, size;
+
+ long status = ParseElementHeader(pReader, pos, stop, id, size);
+
+ if (status < 0) // error
+ return status;
+
+ if (size == 0) // weird
+ continue;
+
+ if (id == 0x00) { // Display ID
+ status = ParseDisplay(pReader, pos, size);
+
+ if (status < 0) // error
+ return status;
+ } else if (id == 0x1654) { // StringUID ID
+ status = UnserializeString(pReader, pos, size, m_string_uid);
+
+ if (status < 0) // error
+ return status;
+ } else if (id == 0x33C4) { // UID ID
+ const long long val = UnserializeUInt(pReader, pos, size);
+
+ if (val < 0) // error
+ return static_cast<long>(val);
+
+ m_uid = val;
+ } else if (id == 0x11) { // TimeStart ID
+ const long long val = UnserializeUInt(pReader, pos, size);
+
+ if (val < 0) // error
+ return static_cast<long>(val);
+
+ m_start_timecode = val;
+ } else if (id == 0x12) { // TimeEnd ID
+ const long long val = UnserializeUInt(pReader, pos, size);
+
+ if (val < 0) // error
+ return static_cast<long>(val);
+
+ m_stop_timecode = val;
}
- delete[] m_atoms;
- m_atoms = NULL;
+ pos += size;
+ assert(pos <= stop);
+ }
- m_atoms_size = 0;
+ assert(pos == stop);
+ return 0;
}
+long long Chapters::Atom::GetTime(const Chapters* pChapters,
+ long long timecode) {
+ if (pChapters == NULL)
+ return -1;
-long Chapters::Edition::Parse(
- IMkvReader* pReader,
- long long pos,
- long long size)
-{
- const long long stop = pos + size;
+ Segment* const pSegment = pChapters->m_pSegment;
- while (pos < stop)
- {
- long long id, size;
+ if (pSegment == NULL) // weird
+ return -1;
- long status = ParseElementHeader(
- pReader,
- pos,
- stop,
- id,
- size);
+ const SegmentInfo* const pInfo = pSegment->GetInfo();
- if (status < 0) // error
- return status;
+ if (pInfo == NULL)
+ return -1;
- if (size == 0) // weird
- continue;
+ const long long timecode_scale = pInfo->GetTimeCodeScale();
- if (id == 0x36) // Atom ID
- {
- status = ParseAtom(pReader, pos, size);
+ if (timecode_scale < 1) // weird
+ return -1;
- if (status < 0) // error
- return status;
- }
+ if (timecode < 0)
+ return -1;
- pos += size;
- assert(pos <= stop);
+ const long long result = timecode_scale * timecode;
+
+ return result;
+}
+
+long Chapters::Atom::ParseDisplay(IMkvReader* pReader, long long pos,
+ long long size) {
+ if (!ExpandDisplaysArray())
+ return -1;
+
+ Display& d = m_displays[m_displays_count++];
+ d.Init();
+
+ return d.Parse(pReader, pos, size);
+}
+
+bool Chapters::Atom::ExpandDisplaysArray() {
+ if (m_displays_size > m_displays_count)
+ return true; // nothing else to do
+
+ const int size = (m_displays_size == 0) ? 1 : 2 * m_displays_size;
+
+ Display* const displays = new (std::nothrow) Display[size];
+
+ if (displays == NULL)
+ return false;
+
+ for (int idx = 0; idx < m_displays_count; ++idx) {
+ m_displays[idx].ShallowCopy(displays[idx]);
+ }
+
+ delete[] m_displays;
+ m_displays = displays;
+
+ m_displays_size = size;
+ return true;
+}
+
+Chapters::Display::Display() {}
+
+Chapters::Display::~Display() {}
+
+const char* Chapters::Display::GetString() const { return m_string; }
+
+const char* Chapters::Display::GetLanguage() const { return m_language; }
+
+const char* Chapters::Display::GetCountry() const { return m_country; }
+
+void Chapters::Display::Init() {
+ m_string = NULL;
+ m_language = NULL;
+ m_country = NULL;
+}
+
+void Chapters::Display::ShallowCopy(Display& rhs) const {
+ rhs.m_string = m_string;
+ rhs.m_language = m_language;
+ rhs.m_country = m_country;
+}
+
+void Chapters::Display::Clear() {
+ delete[] m_string;
+ m_string = NULL;
+
+ delete[] m_language;
+ m_language = NULL;
+
+ delete[] m_country;
+ m_country = NULL;
+}
+
+long Chapters::Display::Parse(IMkvReader* pReader, long long pos,
+ long long size) {
+ const long long stop = pos + size;
+
+ while (pos < stop) {
+ long long id, size;
+
+ long status = ParseElementHeader(pReader, pos, stop, id, size);
+
+ if (status < 0) // error
+ return status;
+
+ if (size == 0) // weird
+ continue;
+
+ if (id == 0x05) { // ChapterString ID
+ status = UnserializeString(pReader, pos, size, m_string);
+
+ if (status)
+ return status;
+ } else if (id == 0x037C) { // ChapterLanguage ID
+ status = UnserializeString(pReader, pos, size, m_language);
+
+ if (status)
+ return status;
+ } else if (id == 0x037E) { // ChapterCountry ID
+ status = UnserializeString(pReader, pos, size, m_country);
+
+ if (status)
+ return status;
}
- assert(pos == stop);
- return 0;
+ pos += size;
+ assert(pos <= stop);
+ }
+
+ assert(pos == stop);
+ return 0;
}
+SegmentInfo::SegmentInfo(Segment* pSegment, long long start, long long size_,
+ long long element_start, long long element_size)
+ : m_pSegment(pSegment),
+ m_start(start),
+ m_size(size_),
+ m_element_start(element_start),
+ m_element_size(element_size),
+ m_pMuxingAppAsUTF8(NULL),
+ m_pWritingAppAsUTF8(NULL),
+ m_pTitleAsUTF8(NULL) {}
-long Chapters::Edition::ParseAtom(
- IMkvReader* pReader,
- long long pos,
- long long size)
-{
- if (!ExpandAtomsArray())
- return -1;
+SegmentInfo::~SegmentInfo() {
+ delete[] m_pMuxingAppAsUTF8;
+ m_pMuxingAppAsUTF8 = NULL;
- Atom& a = m_atoms[m_atoms_count++];
- a.Init();
+ delete[] m_pWritingAppAsUTF8;
+ m_pWritingAppAsUTF8 = NULL;
- return a.Parse(pReader, pos, size);
+ delete[] m_pTitleAsUTF8;
+ m_pTitleAsUTF8 = NULL;
}
+long SegmentInfo::Parse() {
+ assert(m_pMuxingAppAsUTF8 == NULL);
+ assert(m_pWritingAppAsUTF8 == NULL);
+ assert(m_pTitleAsUTF8 == NULL);
-bool Chapters::Edition::ExpandAtomsArray()
-{
- if (m_atoms_size > m_atoms_count)
- return true; // nothing else to do
+ IMkvReader* const pReader = m_pSegment->m_pReader;
- const int size = (m_atoms_size == 0) ? 1 : 2 * m_atoms_size;
+ long long pos = m_start;
+ const long long stop = m_start + m_size;
- Atom* const atoms = new (std::nothrow) Atom[size];
+ m_timecodeScale = 1000000;
+ m_duration = -1;
- if (atoms == NULL)
- return false;
+ while (pos < stop) {
+ long long id, size;
- for (int idx = 0; idx < m_atoms_count; ++idx)
- {
- m_atoms[idx].ShallowCopy(atoms[idx]);
+ const long status = ParseElementHeader(pReader, pos, stop, id, size);
+
+ if (status < 0) // error
+ return status;
+
+ if (id == 0x0AD7B1) { // Timecode Scale
+ m_timecodeScale = UnserializeUInt(pReader, pos, size);
+
+ if (m_timecodeScale <= 0)
+ return E_FILE_FORMAT_INVALID;
+ } else if (id == 0x0489) { // Segment duration
+ const long status = UnserializeFloat(pReader, pos, size, m_duration);
+
+ if (status < 0)
+ return status;
+
+ if (m_duration < 0)
+ return E_FILE_FORMAT_INVALID;
+ } else if (id == 0x0D80) { // MuxingApp
+ const long status =
+ UnserializeString(pReader, pos, size, m_pMuxingAppAsUTF8);
+
+ if (status)
+ return status;
+ } else if (id == 0x1741) { // WritingApp
+ const long status =
+ UnserializeString(pReader, pos, size, m_pWritingAppAsUTF8);
+
+ if (status)
+ return status;
+ } else if (id == 0x3BA9) { // Title
+ const long status = UnserializeString(pReader, pos, size, m_pTitleAsUTF8);
+
+ if (status)
+ return status;
}
- delete[] m_atoms;
- m_atoms = atoms;
+ pos += size;
+ assert(pos <= stop);
+ }
- m_atoms_size = size;
- return true;
+ assert(pos == stop);
+
+ return 0;
}
+long long SegmentInfo::GetTimeCodeScale() const { return m_timecodeScale; }
-Chapters::Atom::Atom()
-{
+long long SegmentInfo::GetDuration() const {
+ if (m_duration < 0)
+ return -1;
+
+ assert(m_timecodeScale >= 1);
+
+ const double dd = double(m_duration) * double(m_timecodeScale);
+ const long long d = static_cast<long long>(dd);
+
+ return d;
}
-
-Chapters::Atom::~Atom()
-{
+const char* SegmentInfo::GetMuxingAppAsUTF8() const {
+ return m_pMuxingAppAsUTF8;
}
-
-unsigned long long Chapters::Atom::GetUID() const
-{
- return m_uid;
+const char* SegmentInfo::GetWritingAppAsUTF8() const {
+ return m_pWritingAppAsUTF8;
}
-
-const char* Chapters::Atom::GetStringUID() const
-{
- return m_string_uid;
-}
-
-
-long long Chapters::Atom::GetStartTimecode() const
-{
- return m_start_timecode;
-}
-
-
-long long Chapters::Atom::GetStopTimecode() const
-{
- return m_stop_timecode;
-}
-
-
-long long Chapters::Atom::GetStartTime(const Chapters* pChapters) const
-{
- return GetTime(pChapters, m_start_timecode);
-}
-
-
-long long Chapters::Atom::GetStopTime(const Chapters* pChapters) const
-{
- return GetTime(pChapters, m_stop_timecode);
-}
-
-
-int Chapters::Atom::GetDisplayCount() const
-{
- return m_displays_count;
-}
-
-
-const Chapters::Display* Chapters::Atom::GetDisplay(int index) const
-{
- if (index < 0)
- return NULL;
-
- if (index >= m_displays_count)
- return NULL;
-
- return m_displays + index;
-}
-
-
-void Chapters::Atom::Init()
-{
- m_string_uid = NULL;
- m_uid = 0;
- m_start_timecode = -1;
- m_stop_timecode = -1;
-
- m_displays = NULL;
- m_displays_size = 0;
- m_displays_count = 0;
-}
-
-
-void Chapters::Atom::ShallowCopy(Atom& rhs) const
-{
- rhs.m_string_uid = m_string_uid;
- rhs.m_uid = m_uid;
- rhs.m_start_timecode = m_start_timecode;
- rhs.m_stop_timecode = m_stop_timecode;
-
- rhs.m_displays = m_displays;
- rhs.m_displays_size = m_displays_size;
- rhs.m_displays_count = m_displays_count;
-}
-
-
-void Chapters::Atom::Clear()
-{
- delete[] m_string_uid;
- m_string_uid = NULL;
-
- while (m_displays_count > 0)
- {
- Display& d = m_displays[--m_displays_count];
- d.Clear();
- }
-
- delete[] m_displays;
- m_displays = NULL;
-
- m_displays_size = 0;
-}
-
-
-long Chapters::Atom::Parse(
- IMkvReader* pReader,
- long long pos,
- long long size)
-{
- const long long stop = pos + size;
-
- while (pos < stop)
- {
- long long id, size;
-
- long status = ParseElementHeader(
- pReader,
- pos,
- stop,
- id,
- size);
-
- if (status < 0) // error
- return status;
-
- if (size == 0) // weird
- continue;
-
- if (id == 0x00) // Display ID
- {
- status = ParseDisplay(pReader, pos, size);
-
- if (status < 0) // error
- return status;
- }
- else if (id == 0x1654) // StringUID ID
- {
- status = UnserializeString(pReader, pos, size, m_string_uid);
-
- if (status < 0) // error
- return status;
- }
- else if (id == 0x33C4) // UID ID
- {
- const long long val = UnserializeUInt(pReader, pos, size);
-
- if (val < 0) // error
- return static_cast<long>(val);
-
- m_uid = val;
- }
- else if (id == 0x11) // TimeStart ID
- {
- const long long val = UnserializeUInt(pReader, pos, size);
-
- if (val < 0) // error
- return static_cast<long>(val);
-
- m_start_timecode = val;
- }
- else if (id == 0x12) // TimeEnd ID
- {
- const long long val = UnserializeUInt(pReader, pos, size);
-
- if (val < 0) // error
- return static_cast<long>(val);
-
- m_stop_timecode = val;
- }
-
- pos += size;
- assert(pos <= stop);
- }
-
- assert(pos == stop);
- return 0;
-}
-
-
-long long Chapters::Atom::GetTime(
- const Chapters* pChapters,
- long long timecode)
-{
- if (pChapters == NULL)
- return -1;
-
- Segment* const pSegment = pChapters->m_pSegment;
-
- if (pSegment == NULL) // weird
- return -1;
-
- const SegmentInfo* const pInfo = pSegment->GetInfo();
-
- if (pInfo == NULL)
- return -1;
-
- const long long timecode_scale = pInfo->GetTimeCodeScale();
-
- if (timecode_scale < 1) // weird
- return -1;
-
- if (timecode < 0)
- return -1;
-
- const long long result = timecode_scale * timecode;
-
- return result;
-}
-
-
-long Chapters::Atom::ParseDisplay(
- IMkvReader* pReader,
- long long pos,
- long long size)
-{
- if (!ExpandDisplaysArray())
- return -1;
-
- Display& d = m_displays[m_displays_count++];
- d.Init();
-
- return d.Parse(pReader, pos, size);
-}
-
-
-bool Chapters::Atom::ExpandDisplaysArray()
-{
- if (m_displays_size > m_displays_count)
- return true; // nothing else to do
-
- const int size = (m_displays_size == 0) ? 1 : 2 * m_displays_size;
-
- Display* const displays = new (std::nothrow) Display[size];
-
- if (displays == NULL)
- return false;
-
- for (int idx = 0; idx < m_displays_count; ++idx)
- {
- m_displays[idx].ShallowCopy(displays[idx]);
- }
-
- delete[] m_displays;
- m_displays = displays;
-
- m_displays_size = size;
- return true;
-}
-
-
-Chapters::Display::Display()
-{
-}
-
-
-Chapters::Display::~Display()
-{
-}
-
-
-const char* Chapters::Display::GetString() const
-{
- return m_string;
-}
-
-
-const char* Chapters::Display::GetLanguage() const
-{
- return m_language;
-}
-
-
-const char* Chapters::Display::GetCountry() const
-{
- return m_country;
-}
-
-
-void Chapters::Display::Init()
-{
- m_string = NULL;
- m_language = NULL;
- m_country = NULL;
-}
-
-
-void Chapters::Display::ShallowCopy(Display& rhs) const
-{
- rhs.m_string = m_string;
- rhs.m_language = m_language;
- rhs.m_country = m_country;
-}
-
-
-void Chapters::Display::Clear()
-{
- delete[] m_string;
- m_string = NULL;
-
- delete[] m_language;
- m_language = NULL;
-
- delete[] m_country;
- m_country = NULL;
-}
-
-
-long Chapters::Display::Parse(
- IMkvReader* pReader,
- long long pos,
- long long size)
-{
- const long long stop = pos + size;
-
- while (pos < stop)
- {
- long long id, size;
-
- long status = ParseElementHeader(
- pReader,
- pos,
- stop,
- id,
- size);
-
- if (status < 0) // error
- return status;
-
- if (size == 0) // weird
- continue;
-
- if (id == 0x05) // ChapterString ID
- {
- status = UnserializeString(pReader, pos, size, m_string);
-
- if (status)
- return status;
- }
- else if (id == 0x037C) // ChapterLanguage ID
- {
- status = UnserializeString(pReader, pos, size, m_language);
-
- if (status)
- return status;
- }
- else if (id == 0x037E) // ChapterCountry ID
- {
- status = UnserializeString(pReader, pos, size, m_country);
-
- if (status)
- return status;
- }
-
- pos += size;
- assert(pos <= stop);
- }
-
- assert(pos == stop);
- return 0;
-}
-
-
-SegmentInfo::SegmentInfo(
- Segment* pSegment,
- long long start,
- long long size_,
- long long element_start,
- long long element_size) :
- m_pSegment(pSegment),
- m_start(start),
- m_size(size_),
- m_element_start(element_start),
- m_element_size(element_size),
- m_pMuxingAppAsUTF8(NULL),
- m_pWritingAppAsUTF8(NULL),
- m_pTitleAsUTF8(NULL)
-{
-}
-
-SegmentInfo::~SegmentInfo()
-{
- delete[] m_pMuxingAppAsUTF8;
- m_pMuxingAppAsUTF8 = NULL;
-
- delete[] m_pWritingAppAsUTF8;
- m_pWritingAppAsUTF8 = NULL;
-
- delete[] m_pTitleAsUTF8;
- m_pTitleAsUTF8 = NULL;
-}
-
-
-long SegmentInfo::Parse()
-{
- assert(m_pMuxingAppAsUTF8 == NULL);
- assert(m_pWritingAppAsUTF8 == NULL);
- assert(m_pTitleAsUTF8 == NULL);
-
- IMkvReader* const pReader = m_pSegment->m_pReader;
-
- long long pos = m_start;
- const long long stop = m_start + m_size;
-
- m_timecodeScale = 1000000;
- m_duration = -1;
-
- while (pos < stop)
- {
- long long id, size;
-
- const long status = ParseElementHeader(
- pReader,
- pos,
- stop,
- id,
- size);
-
- if (status < 0) //error
- return status;
-
- if (id == 0x0AD7B1) //Timecode Scale
- {
- m_timecodeScale = UnserializeUInt(pReader, pos, size);
-
- if (m_timecodeScale <= 0)
- return E_FILE_FORMAT_INVALID;
- }
- else if (id == 0x0489) //Segment duration
- {
- const long status = UnserializeFloat(
- pReader,
- pos,
- size,
- m_duration);
-
- if (status < 0)
- return status;
-
- if (m_duration < 0)
- return E_FILE_FORMAT_INVALID;
- }
- else if (id == 0x0D80) //MuxingApp
- {
- const long status = UnserializeString(
- pReader,
- pos,
- size,
- m_pMuxingAppAsUTF8);
-
- if (status)
- return status;
- }
- else if (id == 0x1741) //WritingApp
- {
- const long status = UnserializeString(
- pReader,
- pos,
- size,
- m_pWritingAppAsUTF8);
-
- if (status)
- return status;
- }
- else if (id == 0x3BA9) //Title
- {
- const long status = UnserializeString(
- pReader,
- pos,
- size,
- m_pTitleAsUTF8);
-
- if (status)
- return status;
- }
-
- pos += size;
- assert(pos <= stop);
- }
-
- assert(pos == stop);
-
- return 0;
-}
-
-
-long long SegmentInfo::GetTimeCodeScale() const
-{
- return m_timecodeScale;
-}
-
-
-long long SegmentInfo::GetDuration() const
-{
- if (m_duration < 0)
- return -1;
-
- assert(m_timecodeScale >= 1);
-
- const double dd = double(m_duration) * double(m_timecodeScale);
- const long long d = static_cast<long long>(dd);
-
- return d;
-}
-
-const char* SegmentInfo::GetMuxingAppAsUTF8() const
-{
- return m_pMuxingAppAsUTF8;
-}
-
-
-const char* SegmentInfo::GetWritingAppAsUTF8() const
-{
- return m_pWritingAppAsUTF8;
-}
-
-const char* SegmentInfo::GetTitleAsUTF8() const
-{
- return m_pTitleAsUTF8;
-}
+const char* SegmentInfo::GetTitleAsUTF8() const { return m_pTitleAsUTF8; }
///////////////////////////////////////////////////////////////
// ContentEncoding element
ContentEncoding::ContentCompression::ContentCompression()
- : algo(0),
- settings(NULL),
- settings_len(0) {
-}
+ : algo(0), settings(NULL), settings_len(0) {}
ContentEncoding::ContentCompression::~ContentCompression() {
- delete [] settings;
+ delete[] settings;
}
ContentEncoding::ContentEncryption::ContentEncryption()
@@ -5064,13 +4418,12 @@
sig_key_id(NULL),
sig_key_id_len(0),
sig_algo(0),
- sig_hash_algo(0) {
-}
+ sig_hash_algo(0) {}
ContentEncoding::ContentEncryption::~ContentEncryption() {
- delete [] key_id;
- delete [] signature;
- delete [] sig_key_id;
+ delete[] key_id;
+ delete[] signature;
+ delete[] sig_key_id;
}
ContentEncoding::ContentEncoding()
@@ -5080,8 +4433,7 @@
encryption_entries_end_(NULL),
encoding_order_(0),
encoding_scope_(1),
- encoding_type_(0) {
-}
+ encoding_type_(0) {}
ContentEncoding::~ContentEncoding() {
ContentCompression** comp_i = compression_entries_;
@@ -5092,7 +4444,7 @@
delete comp;
}
- delete [] compression_entries_;
+ delete[] compression_entries_;
ContentEncryption** enc_i = encryption_entries_;
ContentEncryption** const enc_j = encryption_entries_end_;
@@ -5102,10 +4454,9 @@
delete enc;
}
- delete [] encryption_entries_;
+ delete[] encryption_entries_;
}
-
const ContentEncoding::ContentCompression*
ContentEncoding::GetCompressionByIndex(unsigned long idx) const {
const ptrdiff_t count = compression_entries_end_ - compression_entries_;
@@ -5124,8 +4475,8 @@
return static_cast<unsigned long>(count);
}
-const ContentEncoding::ContentEncryption*
-ContentEncoding::GetEncryptionByIndex(unsigned long idx) const {
+const ContentEncoding::ContentEncryption* ContentEncoding::GetEncryptionByIndex(
+ unsigned long idx) const {
const ptrdiff_t count = encryption_entries_end_ - encryption_entries_;
assert(count >= 0);
@@ -5143,9 +4494,7 @@
}
long ContentEncoding::ParseContentEncAESSettingsEntry(
- long long start,
- long long size,
- IMkvReader* pReader,
+ long long start, long long size, IMkvReader* pReader,
ContentEncAESSettings* aes) {
assert(pReader);
assert(aes);
@@ -5155,12 +4504,8 @@
while (pos < stop) {
long long id, size;
- const long status = ParseElementHeader(pReader,
- pos,
- stop,
- id,
- size);
- if (status < 0) //error
+ const long status = ParseElementHeader(pReader, pos, stop, id, size);
+ if (status < 0) // error
return status;
if (id == 0x7E8) {
@@ -5170,15 +4515,14 @@
return E_FILE_FORMAT_INVALID;
}
- pos += size; //consume payload
+ pos += size; // consume payload
assert(pos <= stop);
}
return 0;
}
-long ContentEncoding::ParseContentEncodingEntry(long long start,
- long long size,
+long ContentEncoding::ParseContentEncodingEntry(long long start, long long size,
IMkvReader* pReader) {
assert(pReader);
@@ -5191,12 +4535,8 @@
while (pos < stop) {
long long id, size;
- const long status = ParseElementHeader(pReader,
- pos,
- stop,
- id,
- size);
- if (status < 0) //error
+ const long status = ParseElementHeader(pReader, pos, stop, id, size);
+ if (status < 0) // error
return status;
if (id == 0x1034) // ContentCompression ID
@@ -5205,7 +4545,7 @@
if (id == 0x1035) // ContentEncryption ID
++encryption_count;
- pos += size; //consume payload
+ pos += size; // consume payload
assert(pos <= stop);
}
@@ -5214,7 +4554,7 @@
if (compression_count > 0) {
compression_entries_ =
- new (std::nothrow) ContentCompression*[compression_count];
+ new (std::nothrow) ContentCompression* [compression_count];
if (!compression_entries_)
return -1;
compression_entries_end_ = compression_entries_;
@@ -5222,9 +4562,9 @@
if (encryption_count > 0) {
encryption_entries_ =
- new (std::nothrow) ContentEncryption*[encryption_count];
+ new (std::nothrow) ContentEncryption* [encryption_count];
if (!encryption_entries_) {
- delete [] compression_entries_;
+ delete[] compression_entries_;
return -1;
}
encryption_entries_end_ = encryption_entries_;
@@ -5233,12 +4573,8 @@
pos = start;
while (pos < stop) {
long long id, size;
- long status = ParseElementHeader(pReader,
- pos,
- stop,
- id,
- size);
- if (status < 0) //error
+ long status = ParseElementHeader(pReader, pos, stop, id, size);
+ if (status < 0) // error
return status;
if (id == 0x1031) {
@@ -5255,7 +4591,7 @@
} else if (id == 0x1034) {
// ContentCompression ID
ContentCompression* const compression =
- new (std::nothrow) ContentCompression();
+ new (std::nothrow) ContentCompression();
if (!compression)
return -1;
@@ -5280,7 +4616,7 @@
*encryption_entries_end_++ = encryption;
}
- pos += size; //consume payload
+ pos += size; // consume payload
assert(pos <= stop);
}
@@ -5288,11 +4624,9 @@
return 0;
}
-long ContentEncoding::ParseCompressionEntry(
- long long start,
- long long size,
- IMkvReader* pReader,
- ContentCompression* compression) {
+long ContentEncoding::ParseCompressionEntry(long long start, long long size,
+ IMkvReader* pReader,
+ ContentCompression* compression) {
assert(pReader);
assert(compression);
@@ -5303,12 +4637,8 @@
while (pos < stop) {
long long id, size;
- const long status = ParseElementHeader(pReader,
- pos,
- stop,
- id,
- size);
- if (status < 0) //error
+ const long status = ParseElementHeader(pReader, pos, stop, id, size);
+ if (status < 0) // error
return status;
if (id == 0x254) {
@@ -5329,9 +4659,10 @@
if (buf == NULL)
return -1;
- const int read_status = pReader->Read(pos, buflen, buf);
+ const int read_status =
+ pReader->Read(pos, static_cast<long>(buflen), buf);
if (read_status) {
- delete [] buf;
+ delete[] buf;
return status;
}
@@ -5339,7 +4670,7 @@
compression->settings_len = buflen;
}
- pos += size; //consume payload
+ pos += size; // consume payload
assert(pos <= stop);
}
@@ -5350,11 +4681,9 @@
return 0;
}
-long ContentEncoding::ParseEncryptionEntry(
- long long start,
- long long size,
- IMkvReader* pReader,
- ContentEncryption* encryption) {
+long ContentEncoding::ParseEncryptionEntry(long long start, long long size,
+ IMkvReader* pReader,
+ ContentEncryption* encryption) {
assert(pReader);
assert(encryption);
@@ -5363,12 +4692,8 @@
while (pos < stop) {
long long id, size;
- const long status = ParseElementHeader(pReader,
- pos,
- stop,
- id,
- size);
- if (status < 0) //error
+ const long status = ParseElementHeader(pReader, pos, stop, id, size);
+ if (status < 0) // error
return status;
if (id == 0x7E1) {
@@ -5378,7 +4703,7 @@
return E_FILE_FORMAT_INVALID;
} else if (id == 0x7E2) {
// ContentEncKeyID
- delete[] encryption->key_id;
+ delete[] encryption -> key_id;
encryption->key_id = NULL;
encryption->key_id_len = 0;
@@ -5391,9 +4716,10 @@
if (buf == NULL)
return -1;
- const int read_status = pReader->Read(pos, buflen, buf);
+ const int read_status =
+ pReader->Read(pos, static_cast<long>(buflen), buf);
if (read_status) {
- delete [] buf;
+ delete[] buf;
return status;
}
@@ -5401,7 +4727,7 @@
encryption->key_id_len = buflen;
} else if (id == 0x7E3) {
// ContentSignature
- delete[] encryption->signature;
+ delete[] encryption -> signature;
encryption->signature = NULL;
encryption->signature_len = 0;
@@ -5414,9 +4740,10 @@
if (buf == NULL)
return -1;
- const int read_status = pReader->Read(pos, buflen, buf);
+ const int read_status =
+ pReader->Read(pos, static_cast<long>(buflen), buf);
if (read_status) {
- delete [] buf;
+ delete[] buf;
return status;
}
@@ -5424,7 +4751,7 @@
encryption->signature_len = buflen;
} else if (id == 0x7E4) {
// ContentSigKeyID
- delete[] encryption->sig_key_id;
+ delete[] encryption -> sig_key_id;
encryption->sig_key_id = NULL;
encryption->sig_key_id_len = 0;
@@ -5437,9 +4764,10 @@
if (buf == NULL)
return -1;
- const int read_status = pReader->Read(pos, buflen, buf);
+ const int read_status =
+ pReader->Read(pos, static_cast<long>(buflen), buf);
if (read_status) {
- delete [] buf;
+ delete[] buf;
return status;
}
@@ -5454,400 +4782,322 @@
} else if (id == 0x7E7) {
// ContentEncAESSettings
const long status = ParseContentEncAESSettingsEntry(
- pos,
- size,
- pReader,
- &encryption->aes_settings);
+ pos, size, pReader, &encryption->aes_settings);
if (status)
return status;
}
- pos += size; //consume payload
+ pos += size; // consume payload
assert(pos <= stop);
}
return 0;
}
-Track::Track(
- Segment* pSegment,
- long long element_start,
- long long element_size) :
- m_pSegment(pSegment),
- m_element_start(element_start),
- m_element_size(element_size),
- content_encoding_entries_(NULL),
- content_encoding_entries_end_(NULL)
-{
+Track::Track(Segment* pSegment, long long element_start, long long element_size)
+ : m_pSegment(pSegment),
+ m_element_start(element_start),
+ m_element_size(element_size),
+ content_encoding_entries_(NULL),
+ content_encoding_entries_end_(NULL) {}
+
+Track::~Track() {
+ Info& info = const_cast<Info&>(m_info);
+ info.Clear();
+
+ ContentEncoding** i = content_encoding_entries_;
+ ContentEncoding** const j = content_encoding_entries_end_;
+
+ while (i != j) {
+ ContentEncoding* const encoding = *i++;
+ delete encoding;
+ }
+
+ delete[] content_encoding_entries_;
}
-Track::~Track()
-{
- Info& info = const_cast<Info&>(m_info);
- info.Clear();
+long Track::Create(Segment* pSegment, const Info& info, long long element_start,
+ long long element_size, Track*& pResult) {
+ if (pResult)
+ return -1;
- ContentEncoding** i = content_encoding_entries_;
- ContentEncoding** const j = content_encoding_entries_end_;
+ Track* const pTrack =
+ new (std::nothrow) Track(pSegment, element_start, element_size);
- while (i != j) {
- ContentEncoding* const encoding = *i++;
- delete encoding;
- }
+ if (pTrack == NULL)
+ return -1; // generic error
- delete [] content_encoding_entries_;
+ const int status = info.Copy(pTrack->m_info);
+
+ if (status) { // error
+ delete pTrack;
+ return status;
+ }
+
+ pResult = pTrack;
+ return 0; // success
}
-long Track::Create(
- Segment* pSegment,
- const Info& info,
- long long element_start,
- long long element_size,
- Track*& pResult)
-{
- if (pResult)
- return -1;
+Track::Info::Info()
+ : uid(0),
+ defaultDuration(0),
+ codecDelay(0),
+ seekPreRoll(0),
+ nameAsUTF8(NULL),
+ language(NULL),
+ codecId(NULL),
+ codecNameAsUTF8(NULL),
+ codecPrivate(NULL),
+ codecPrivateSize(0),
+ lacing(false) {}
- Track* const pTrack = new (std::nothrow) Track(pSegment,
- element_start,
- element_size);
+Track::Info::~Info() { Clear(); }
- if (pTrack == NULL)
- return -1; //generic error
+void Track::Info::Clear() {
+ delete[] nameAsUTF8;
+ nameAsUTF8 = NULL;
- const int status = info.Copy(pTrack->m_info);
+ delete[] language;
+ language = NULL;
- if (status) // error
- {
- delete pTrack;
- return status;
- }
+ delete[] codecId;
+ codecId = NULL;
- pResult = pTrack;
- return 0; //success
+ delete[] codecPrivate;
+ codecPrivate = NULL;
+ codecPrivateSize = 0;
+
+ delete[] codecNameAsUTF8;
+ codecNameAsUTF8 = NULL;
}
-Track::Info::Info():
- uid(0),
- defaultDuration(0),
- codecDelay(0),
- seekPreRoll(0),
- nameAsUTF8(NULL),
- language(NULL),
- codecId(NULL),
- codecNameAsUTF8(NULL),
- codecPrivate(NULL),
- codecPrivateSize(0),
- lacing(false)
-{
-}
+int Track::Info::CopyStr(char* Info::*str, Info& dst_) const {
+ if (str == static_cast<char * Info::*>(NULL))
+ return -1;
-Track::Info::~Info()
-{
- Clear();
-}
+ char*& dst = dst_.*str;
-void Track::Info::Clear()
-{
- delete[] nameAsUTF8;
- nameAsUTF8 = NULL;
+ if (dst) // should be NULL already
+ return -1;
- delete[] language;
- language = NULL;
+ const char* const src = this->*str;
- delete[] codecId;
- codecId = NULL;
-
- delete[] codecPrivate;
- codecPrivate = NULL;
- codecPrivateSize = 0;
-
- delete[] codecNameAsUTF8;
- codecNameAsUTF8 = NULL;
-}
-
-int Track::Info::CopyStr(char* Info::*str, Info& dst_) const
-{
- if (str == static_cast<char* Info::*>(NULL))
- return -1;
-
- char*& dst = dst_.*str;
-
- if (dst) //should be NULL already
- return -1;
-
- const char* const src = this->*str;
-
- if (src == NULL)
- return 0;
-
- const size_t len = strlen(src);
-
- dst = new (std::nothrow) char[len+1];
-
- if (dst == NULL)
- return -1;
-
- strcpy(dst, src);
-
+ if (src == NULL)
return 0;
+
+ const size_t len = strlen(src);
+
+ dst = new (std::nothrow) char[len + 1];
+
+ if (dst == NULL)
+ return -1;
+
+ strcpy(dst, src);
+
+ return 0;
}
+int Track::Info::Copy(Info& dst) const {
+ if (&dst == this)
+ return 0;
-int Track::Info::Copy(Info& dst) const
-{
- if (&dst == this)
- return 0;
+ dst.type = type;
+ dst.number = number;
+ dst.defaultDuration = defaultDuration;
+ dst.codecDelay = codecDelay;
+ dst.seekPreRoll = seekPreRoll;
+ dst.uid = uid;
+ dst.lacing = lacing;
+ dst.settings = settings;
- dst.type = type;
- dst.number = number;
- dst.defaultDuration = defaultDuration;
- dst.codecDelay = codecDelay;
- dst.seekPreRoll = seekPreRoll;
- dst.uid = uid;
- dst.lacing = lacing;
- dst.settings = settings;
+ // We now copy the string member variables from src to dst.
+ // This involves memory allocation so in principle the operation
+ // can fail (indeed, that's why we have Info::Copy), so we must
+ // report this to the caller. An error return from this function
+ // therefore implies that the copy was only partially successful.
- //We now copy the string member variables from src to dst.
- //This involves memory allocation so in principle the operation
- //can fail (indeed, that's why we have Info::Copy), so we must
- //report this to the caller. An error return from this function
- //therefore implies that the copy was only partially successful.
+ if (int status = CopyStr(&Info::nameAsUTF8, dst))
+ return status;
- if (int status = CopyStr(&Info::nameAsUTF8, dst))
- return status;
+ if (int status = CopyStr(&Info::language, dst))
+ return status;
- if (int status = CopyStr(&Info::language, dst))
- return status;
+ if (int status = CopyStr(&Info::codecId, dst))
+ return status;
- if (int status = CopyStr(&Info::codecId, dst))
- return status;
+ if (int status = CopyStr(&Info::codecNameAsUTF8, dst))
+ return status;
- if (int status = CopyStr(&Info::codecNameAsUTF8, dst))
- return status;
+ if (codecPrivateSize > 0) {
+ if (codecPrivate == NULL)
+ return -1;
- if (codecPrivateSize > 0)
- {
- if (codecPrivate == NULL)
- return -1;
+ if (dst.codecPrivate)
+ return -1;
- if (dst.codecPrivate)
- return -1;
+ if (dst.codecPrivateSize != 0)
+ return -1;
- if (dst.codecPrivateSize != 0)
- return -1;
+ dst.codecPrivate = new (std::nothrow) unsigned char[codecPrivateSize];
- dst.codecPrivate = new (std::nothrow) unsigned char[codecPrivateSize];
+ if (dst.codecPrivate == NULL)
+ return -1;
- if (dst.codecPrivate == NULL)
- return -1;
+ memcpy(dst.codecPrivate, codecPrivate, codecPrivateSize);
+ dst.codecPrivateSize = codecPrivateSize;
+ }
- memcpy(dst.codecPrivate, codecPrivate, codecPrivateSize);
- dst.codecPrivateSize = codecPrivateSize;
+ return 0;
+}
+
+const BlockEntry* Track::GetEOS() const { return &m_eos; }
+
+long Track::GetType() const { return m_info.type; }
+
+long Track::GetNumber() const { return m_info.number; }
+
+unsigned long long Track::GetUid() const { return m_info.uid; }
+
+const char* Track::GetNameAsUTF8() const { return m_info.nameAsUTF8; }
+
+const char* Track::GetLanguage() const { return m_info.language; }
+
+const char* Track::GetCodecNameAsUTF8() const { return m_info.codecNameAsUTF8; }
+
+const char* Track::GetCodecId() const { return m_info.codecId; }
+
+const unsigned char* Track::GetCodecPrivate(size_t& size) const {
+ size = m_info.codecPrivateSize;
+ return m_info.codecPrivate;
+}
+
+bool Track::GetLacing() const { return m_info.lacing; }
+
+unsigned long long Track::GetDefaultDuration() const {
+ return m_info.defaultDuration;
+}
+
+unsigned long long Track::GetCodecDelay() const { return m_info.codecDelay; }
+
+unsigned long long Track::GetSeekPreRoll() const { return m_info.seekPreRoll; }
+
+long Track::GetFirst(const BlockEntry*& pBlockEntry) const {
+ const Cluster* pCluster = m_pSegment->GetFirst();
+
+ for (int i = 0;;) {
+ if (pCluster == NULL) {
+ pBlockEntry = GetEOS();
+ return 1;
}
- return 0;
-}
-
-const BlockEntry* Track::GetEOS() const
-{
- return &m_eos;
-}
-
-long Track::GetType() const
-{
- return m_info.type;
-}
-
-long Track::GetNumber() const
-{
- return m_info.number;
-}
-
-unsigned long long Track::GetUid() const
-{
- return m_info.uid;
-}
-
-const char* Track::GetNameAsUTF8() const
-{
- return m_info.nameAsUTF8;
-}
-
-const char* Track::GetLanguage() const
-{
- return m_info.language;
-}
-
-const char* Track::GetCodecNameAsUTF8() const
-{
- return m_info.codecNameAsUTF8;
-}
-
-
-const char* Track::GetCodecId() const
-{
- return m_info.codecId;
-}
-
-const unsigned char* Track::GetCodecPrivate(size_t& size) const
-{
- size = m_info.codecPrivateSize;
- return m_info.codecPrivate;
-}
-
-
-bool Track::GetLacing() const
-{
- return m_info.lacing;
-}
-
-unsigned long long Track::GetDefaultDuration() const
-{
- return m_info.defaultDuration;
-}
-
-unsigned long long Track::GetCodecDelay() const
-{
- return m_info.codecDelay;
-}
-
-unsigned long long Track::GetSeekPreRoll() const
-{
- return m_info.seekPreRoll;
-}
-
-long Track::GetFirst(const BlockEntry*& pBlockEntry) const
-{
- const Cluster* pCluster = m_pSegment->GetFirst();
-
- for (int i = 0; ; )
- {
- if (pCluster == NULL)
- {
- pBlockEntry = GetEOS();
- return 1;
- }
-
- if (pCluster->EOS())
- {
+ if (pCluster->EOS()) {
#if 0
- if (m_pSegment->Unparsed() <= 0) //all clusters have been loaded
- {
+ if (m_pSegment->Unparsed() <= 0) { //all clusters have been loaded
pBlockEntry = GetEOS();
return 1;
}
#else
- if (m_pSegment->DoneParsing())
- {
- pBlockEntry = GetEOS();
- return 1;
- }
+ if (m_pSegment->DoneParsing()) {
+ pBlockEntry = GetEOS();
+ return 1;
+ }
#endif
- pBlockEntry = 0;
- return E_BUFFER_NOT_FULL;
- }
-
- long status = pCluster->GetFirst(pBlockEntry);
-
- if (status < 0) //error
- return status;
-
- if (pBlockEntry == 0) //empty cluster
- {
- pCluster = m_pSegment->GetNext(pCluster);
- continue;
- }
-
- for (;;)
- {
- const Block* const pBlock = pBlockEntry->GetBlock();
- assert(pBlock);
-
- const long long tn = pBlock->GetTrackNumber();
-
- if ((tn == m_info.number) && VetEntry(pBlockEntry))
- return 0;
-
- const BlockEntry* pNextEntry;
-
- status = pCluster->GetNext(pBlockEntry, pNextEntry);
-
- if (status < 0) //error
- return status;
-
- if (pNextEntry == 0)
- break;
-
- pBlockEntry = pNextEntry;
- }
-
- ++i;
-
- if (i >= 100)
- break;
-
- pCluster = m_pSegment->GetNext(pCluster);
+ pBlockEntry = 0;
+ return E_BUFFER_NOT_FULL;
}
- //NOTE: if we get here, it means that we didn't find a block with
- //a matching track number. We interpret that as an error (which
- //might be too conservative).
+ long status = pCluster->GetFirst(pBlockEntry);
- pBlockEntry = GetEOS(); //so we can return a non-NULL value
- return 1;
-}
+ if (status < 0) // error
+ return status;
+ if (pBlockEntry == 0) { // empty cluster
+ pCluster = m_pSegment->GetNext(pCluster);
+ continue;
+ }
-long Track::GetNext(
- const BlockEntry* pCurrEntry,
- const BlockEntry*& pNextEntry) const
-{
- assert(pCurrEntry);
- assert(!pCurrEntry->EOS()); //?
+ for (;;) {
+ const Block* const pBlock = pBlockEntry->GetBlock();
+ assert(pBlock);
- const Block* const pCurrBlock = pCurrEntry->GetBlock();
- assert(pCurrBlock && pCurrBlock->GetTrackNumber() == m_info.number);
- if (!pCurrBlock || pCurrBlock->GetTrackNumber() != m_info.number)
- return -1;
+ const long long tn = pBlock->GetTrackNumber();
- const Cluster* pCluster = pCurrEntry->GetCluster();
- assert(pCluster);
- assert(!pCluster->EOS());
+ if ((tn == m_info.number) && VetEntry(pBlockEntry))
+ return 0;
- long status = pCluster->GetNext(pCurrEntry, pNextEntry);
+ const BlockEntry* pNextEntry;
- if (status < 0) //error
+ status = pCluster->GetNext(pBlockEntry, pNextEntry);
+
+ if (status < 0) // error
return status;
- for (int i = 0; ; )
- {
- while (pNextEntry)
- {
- const Block* const pNextBlock = pNextEntry->GetBlock();
- assert(pNextBlock);
+ if (pNextEntry == 0)
+ break;
- if (pNextBlock->GetTrackNumber() == m_info.number)
- return 0;
+ pBlockEntry = pNextEntry;
+ }
- pCurrEntry = pNextEntry;
+ ++i;
- status = pCluster->GetNext(pCurrEntry, pNextEntry);
+ if (i >= 100)
+ break;
- if (status < 0) //error
- return status;
- }
+ pCluster = m_pSegment->GetNext(pCluster);
+ }
- pCluster = m_pSegment->GetNext(pCluster);
+ // NOTE: if we get here, it means that we didn't find a block with
+ // a matching track number. We interpret that as an error (which
+ // might be too conservative).
- if (pCluster == NULL)
- {
- pNextEntry = GetEOS();
- return 1;
- }
+ pBlockEntry = GetEOS(); // so we can return a non-NULL value
+ return 1;
+}
- if (pCluster->EOS())
- {
+long Track::GetNext(const BlockEntry* pCurrEntry,
+ const BlockEntry*& pNextEntry) const {
+ assert(pCurrEntry);
+ assert(!pCurrEntry->EOS()); //?
+
+ const Block* const pCurrBlock = pCurrEntry->GetBlock();
+ assert(pCurrBlock && pCurrBlock->GetTrackNumber() == m_info.number);
+ if (!pCurrBlock || pCurrBlock->GetTrackNumber() != m_info.number)
+ return -1;
+
+ const Cluster* pCluster = pCurrEntry->GetCluster();
+ assert(pCluster);
+ assert(!pCluster->EOS());
+
+ long status = pCluster->GetNext(pCurrEntry, pNextEntry);
+
+ if (status < 0) // error
+ return status;
+
+ for (int i = 0;;) {
+ while (pNextEntry) {
+ const Block* const pNextBlock = pNextEntry->GetBlock();
+ assert(pNextBlock);
+
+ if (pNextBlock->GetTrackNumber() == m_info.number)
+ return 0;
+
+ pCurrEntry = pNextEntry;
+
+ status = pCluster->GetNext(pCurrEntry, pNextEntry);
+
+ if (status < 0) // error
+ return status;
+ }
+
+ pCluster = m_pSegment->GetNext(pCluster);
+
+ if (pCluster == NULL) {
+ pNextEntry = GetEOS();
+ return 1;
+ }
+
+ if (pCluster->EOS()) {
#if 0
if (m_pSegment->Unparsed() <= 0) //all clusters have been loaded
{
@@ -5855,155 +5105,148 @@
return 1;
}
#else
- if (m_pSegment->DoneParsing())
- {
- pNextEntry = GetEOS();
- return 1;
- }
+ if (m_pSegment->DoneParsing()) {
+ pNextEntry = GetEOS();
+ return 1;
+ }
#endif
- //TODO: there is a potential O(n^2) problem here: we tell the
- //caller to (pre)load another cluster, which he does, but then he
- //calls GetNext again, which repeats the same search. This is
- //a pathological case, since the only way it can happen is if
- //there exists a long sequence of clusters none of which contain a
- // block from this track. One way around this problem is for the
- //caller to be smarter when he loads another cluster: don't call
- //us back until you have a cluster that contains a block from this
- //track. (Of course, that's not cheap either, since our caller
- //would have to scan the each cluster as it's loaded, so that
- //would just push back the problem.)
+ // TODO: there is a potential O(n^2) problem here: we tell the
+ // caller to (pre)load another cluster, which he does, but then he
+ // calls GetNext again, which repeats the same search. This is
+ // a pathological case, since the only way it can happen is if
+ // there exists a long sequence of clusters none of which contain a
+ // block from this track. One way around this problem is for the
+ // caller to be smarter when he loads another cluster: don't call
+ // us back until you have a cluster that contains a block from this
+ // track. (Of course, that's not cheap either, since our caller
+ // would have to scan the each cluster as it's loaded, so that
+ // would just push back the problem.)
- pNextEntry = NULL;
- return E_BUFFER_NOT_FULL;
- }
-
- status = pCluster->GetFirst(pNextEntry);
-
- if (status < 0) //error
- return status;
-
- if (pNextEntry == NULL) //empty cluster
- continue;
-
- ++i;
-
- if (i >= 100)
- break;
+ pNextEntry = NULL;
+ return E_BUFFER_NOT_FULL;
}
- //NOTE: if we get here, it means that we didn't find a block with
- //a matching track number after lots of searching, so we give
- //up trying.
+ status = pCluster->GetFirst(pNextEntry);
- pNextEntry = GetEOS(); //so we can return a non-NULL value
- return 1;
+ if (status < 0) // error
+ return status;
+
+ if (pNextEntry == NULL) // empty cluster
+ continue;
+
+ ++i;
+
+ if (i >= 100)
+ break;
+ }
+
+ // NOTE: if we get here, it means that we didn't find a block with
+ // a matching track number after lots of searching, so we give
+ // up trying.
+
+ pNextEntry = GetEOS(); // so we can return a non-NULL value
+ return 1;
}
-bool Track::VetEntry(const BlockEntry* pBlockEntry) const
-{
- assert(pBlockEntry);
- const Block* const pBlock = pBlockEntry->GetBlock();
- assert(pBlock);
- assert(pBlock->GetTrackNumber() == m_info.number);
- if (!pBlock || pBlock->GetTrackNumber() != m_info.number)
- return false;
+bool Track::VetEntry(const BlockEntry* pBlockEntry) const {
+ assert(pBlockEntry);
+ const Block* const pBlock = pBlockEntry->GetBlock();
+ assert(pBlock);
+ assert(pBlock->GetTrackNumber() == m_info.number);
+ if (!pBlock || pBlock->GetTrackNumber() != m_info.number)
+ return false;
- // This function is used during a seek to determine whether the
- // frame is a valid seek target. This default function simply
- // returns true, which means all frames are valid seek targets.
- // It gets overridden by the VideoTrack class, because only video
- // keyframes can be used as seek target.
+ // This function is used during a seek to determine whether the
+ // frame is a valid seek target. This default function simply
+ // returns true, which means all frames are valid seek targets.
+ // It gets overridden by the VideoTrack class, because only video
+ // keyframes can be used as seek target.
- return true;
+ return true;
}
-long Track::Seek(
- long long time_ns,
- const BlockEntry*& pResult) const
-{
- const long status = GetFirst(pResult);
+long Track::Seek(long long time_ns, const BlockEntry*& pResult) const {
+ const long status = GetFirst(pResult);
- if (status < 0) //buffer underflow, etc
- return status;
+ if (status < 0) // buffer underflow, etc
+ return status;
- assert(pResult);
+ assert(pResult);
- if (pResult->EOS())
- return 0;
+ if (pResult->EOS())
+ return 0;
- const Cluster* pCluster = pResult->GetCluster();
+ const Cluster* pCluster = pResult->GetCluster();
+ assert(pCluster);
+ assert(pCluster->GetIndex() >= 0);
+
+ if (time_ns <= pResult->GetBlock()->GetTime(pCluster))
+ return 0;
+
+ Cluster** const clusters = m_pSegment->m_clusters;
+ assert(clusters);
+
+ const long count = m_pSegment->GetCount(); // loaded only, not preloaded
+ assert(count > 0);
+
+ Cluster** const i = clusters + pCluster->GetIndex();
+ assert(i);
+ assert(*i == pCluster);
+ assert(pCluster->GetTime() <= time_ns);
+
+ Cluster** const j = clusters + count;
+
+ Cluster** lo = i;
+ Cluster** hi = j;
+
+ while (lo < hi) {
+ // INVARIANT:
+ //[i, lo) <= time_ns
+ //[lo, hi) ?
+ //[hi, j) > time_ns
+
+ Cluster** const mid = lo + (hi - lo) / 2;
+ assert(mid < hi);
+
+ pCluster = *mid;
assert(pCluster);
assert(pCluster->GetIndex() >= 0);
+ assert(pCluster->GetIndex() == long(mid - m_pSegment->m_clusters));
- if (time_ns <= pResult->GetBlock()->GetTime(pCluster))
- return 0;
+ const long long t = pCluster->GetTime();
- Cluster** const clusters = m_pSegment->m_clusters;
- assert(clusters);
+ if (t <= time_ns)
+ lo = mid + 1;
+ else
+ hi = mid;
- const long count = m_pSegment->GetCount(); //loaded only, not preloaded
- assert(count > 0);
+ assert(lo <= hi);
+ }
- Cluster** const i = clusters + pCluster->GetIndex();
- assert(i);
- assert(*i == pCluster);
+ assert(lo == hi);
+ assert(lo > i);
+ assert(lo <= j);
+
+ while (lo > i) {
+ pCluster = *--lo;
+ assert(pCluster);
assert(pCluster->GetTime() <= time_ns);
- Cluster** const j = clusters + count;
+ pResult = pCluster->GetEntry(this);
- Cluster** lo = i;
- Cluster** hi = j;
+ if ((pResult != 0) && !pResult->EOS())
+ return 0;
- while (lo < hi)
- {
- //INVARIANT:
- //[i, lo) <= time_ns
- //[lo, hi) ?
- //[hi, j) > time_ns
+ // landed on empty cluster (no entries)
+ }
- Cluster** const mid = lo + (hi - lo) / 2;
- assert(mid < hi);
-
- pCluster = *mid;
- assert(pCluster);
- assert(pCluster->GetIndex() >= 0);
- assert(pCluster->GetIndex() == long(mid - m_pSegment->m_clusters));
-
- const long long t = pCluster->GetTime();
-
- if (t <= time_ns)
- lo = mid + 1;
- else
- hi = mid;
-
- assert(lo <= hi);
- }
-
- assert(lo == hi);
- assert(lo > i);
- assert(lo <= j);
-
- while (lo > i)
- {
- pCluster = *--lo;
- assert(pCluster);
- assert(pCluster->GetTime() <= time_ns);
-
- pResult = pCluster->GetEntry(this);
-
- if ((pResult != 0) && !pResult->EOS())
- return 0;
-
- //landed on empty cluster (no entries)
- }
-
- pResult = GetEOS(); //weird
- return 0;
+ pResult = GetEOS(); // weird
+ return 0;
}
-const ContentEncoding*
-Track::GetContentEncodingByIndex(unsigned long idx) const {
+const ContentEncoding* Track::GetContentEncodingByIndex(
+ unsigned long idx) const {
const ptrdiff_t count =
content_encoding_entries_end_ - content_encoding_entries_;
assert(count >= 0);
@@ -6033,27 +5276,22 @@
int count = 0;
while (pos < stop) {
long long id, size;
- const long status = ParseElementHeader(pReader,
- pos,
- stop,
- id,
- size);
- if (status < 0) //error
+ const long status = ParseElementHeader(pReader, pos, stop, id, size);
+ if (status < 0) // error
return status;
-
- //pos now designates start of element
+ // pos now designates start of element
if (id == 0x2240) // ContentEncoding ID
++count;
- pos += size; //consume payload
+ pos += size; // consume payload
assert(pos <= stop);
}
if (count <= 0)
return -1;
- content_encoding_entries_ = new (std::nothrow) ContentEncoding*[count];
+ content_encoding_entries_ = new (std::nothrow) ContentEncoding* [count];
if (!content_encoding_entries_)
return -1;
@@ -6062,24 +5300,18 @@
pos = start;
while (pos < stop) {
long long id, size;
- long status = ParseElementHeader(pReader,
- pos,
- stop,
- id,
- size);
- if (status < 0) //error
+ long status = ParseElementHeader(pReader, pos, stop, id, size);
+ if (status < 0) // error
return status;
- //pos now designates start of element
- if (id == 0x2240) { // ContentEncoding ID
+ // pos now designates start of element
+ if (id == 0x2240) { // ContentEncoding ID
ContentEncoding* const content_encoding =
new (std::nothrow) ContentEncoding();
if (!content_encoding)
return -1;
- status = content_encoding->ParseContentEncodingEntry(pos,
- size,
- pReader);
+ status = content_encoding->ParseContentEncodingEntry(pos, size, pReader);
if (status) {
delete content_encoding;
return status;
@@ -6088,7 +5320,7 @@
*content_encoding_entries_end_++ = content_encoding;
}
- pos += size; //consume payload
+ pos += size; // consume payload
assert(pos <= stop);
}
@@ -6097,219 +5329,175 @@
return 0;
}
-Track::EOSBlock::EOSBlock() :
- BlockEntry(NULL, LONG_MIN)
-{
-}
+Track::EOSBlock::EOSBlock() : BlockEntry(NULL, LONG_MIN) {}
-BlockEntry::Kind Track::EOSBlock::GetKind() const
-{
- return kBlockEOS;
-}
+BlockEntry::Kind Track::EOSBlock::GetKind() const { return kBlockEOS; }
+const Block* Track::EOSBlock::GetBlock() const { return NULL; }
-const Block* Track::EOSBlock::GetBlock() const
-{
- return NULL;
-}
+VideoTrack::VideoTrack(Segment* pSegment, long long element_start,
+ long long element_size)
+ : Track(pSegment, element_start, element_size) {}
+long VideoTrack::Parse(Segment* pSegment, const Info& info,
+ long long element_start, long long element_size,
+ VideoTrack*& pResult) {
+ if (pResult)
+ return -1;
-VideoTrack::VideoTrack(
- Segment* pSegment,
- long long element_start,
- long long element_size) :
- Track(pSegment, element_start, element_size)
-{
-}
+ if (info.type != Track::kVideo)
+ return -1;
+ long long width = 0;
+ long long height = 0;
+ double rate = 0.0;
-long VideoTrack::Parse(
- Segment* pSegment,
- const Info& info,
- long long element_start,
- long long element_size,
- VideoTrack*& pResult)
-{
- if (pResult)
- return -1;
+ IMkvReader* const pReader = pSegment->m_pReader;
- if (info.type != Track::kVideo)
- return -1;
+ const Settings& s = info.settings;
+ assert(s.start >= 0);
+ assert(s.size >= 0);
- long long width = 0;
- long long height = 0;
- double rate = 0.0;
+ long long pos = s.start;
+ assert(pos >= 0);
- IMkvReader* const pReader = pSegment->m_pReader;
+ const long long stop = pos + s.size;
- const Settings& s = info.settings;
- assert(s.start >= 0);
- assert(s.size >= 0);
+ while (pos < stop) {
+ long long id, size;
- long long pos = s.start;
- assert(pos >= 0);
+ const long status = ParseElementHeader(pReader, pos, stop, id, size);
- const long long stop = pos + s.size;
+ if (status < 0) // error
+ return status;
- while (pos < stop)
- {
- long long id, size;
+ if (id == 0x30) { // pixel width
+ width = UnserializeUInt(pReader, pos, size);
- const long status = ParseElementHeader(
- pReader,
- pos,
- stop,
- id,
- size);
+ if (width <= 0)
+ return E_FILE_FORMAT_INVALID;
+ } else if (id == 0x3A) { // pixel height
+ height = UnserializeUInt(pReader, pos, size);
- if (status < 0) //error
- return status;
+ if (height <= 0)
+ return E_FILE_FORMAT_INVALID;
+ } else if (id == 0x0383E3) { // frame rate
+ const long status = UnserializeFloat(pReader, pos, size, rate);
- if (id == 0x30) //pixel width
- {
- width = UnserializeUInt(pReader, pos, size);
-
- if (width <= 0)
- return E_FILE_FORMAT_INVALID;
- }
- else if (id == 0x3A) //pixel height
- {
- height = UnserializeUInt(pReader, pos, size);
-
- if (height <= 0)
- return E_FILE_FORMAT_INVALID;
- }
- else if (id == 0x0383E3) //frame rate
- {
- const long status = UnserializeFloat(
- pReader,
- pos,
- size,
- rate);
-
- if (status < 0)
- return status;
-
- if (rate <= 0)
- return E_FILE_FORMAT_INVALID;
- }
-
- pos += size; //consume payload
- assert(pos <= stop);
- }
-
- assert(pos == stop);
-
- VideoTrack* const pTrack = new (std::nothrow) VideoTrack(pSegment,
- element_start,
- element_size);
-
- if (pTrack == NULL)
- return -1; //generic error
-
- const int status = info.Copy(pTrack->m_info);
-
- if (status) // error
- {
- delete pTrack;
- return status;
- }
-
- pTrack->m_width = width;
- pTrack->m_height = height;
- pTrack->m_rate = rate;
-
- pResult = pTrack;
- return 0; //success
-}
-
-
-bool VideoTrack::VetEntry(const BlockEntry* pBlockEntry) const
-{
- return Track::VetEntry(pBlockEntry) && pBlockEntry->GetBlock()->IsKey();
-}
-
-long VideoTrack::Seek(
- long long time_ns,
- const BlockEntry*& pResult) const
-{
- const long status = GetFirst(pResult);
-
- if (status < 0) //buffer underflow, etc
+ if (status < 0)
return status;
- assert(pResult);
+ if (rate <= 0)
+ return E_FILE_FORMAT_INVALID;
+ }
- if (pResult->EOS())
- return 0;
+ pos += size; // consume payload
+ assert(pos <= stop);
+ }
- const Cluster* pCluster = pResult->GetCluster();
+ assert(pos == stop);
+
+ VideoTrack* const pTrack =
+ new (std::nothrow) VideoTrack(pSegment, element_start, element_size);
+
+ if (pTrack == NULL)
+ return -1; // generic error
+
+ const int status = info.Copy(pTrack->m_info);
+
+ if (status) { // error
+ delete pTrack;
+ return status;
+ }
+
+ pTrack->m_width = width;
+ pTrack->m_height = height;
+ pTrack->m_rate = rate;
+
+ pResult = pTrack;
+ return 0; // success
+}
+
+bool VideoTrack::VetEntry(const BlockEntry* pBlockEntry) const {
+ return Track::VetEntry(pBlockEntry) && pBlockEntry->GetBlock()->IsKey();
+}
+
+long VideoTrack::Seek(long long time_ns, const BlockEntry*& pResult) const {
+ const long status = GetFirst(pResult);
+
+ if (status < 0) // buffer underflow, etc
+ return status;
+
+ assert(pResult);
+
+ if (pResult->EOS())
+ return 0;
+
+ const Cluster* pCluster = pResult->GetCluster();
+ assert(pCluster);
+ assert(pCluster->GetIndex() >= 0);
+
+ if (time_ns <= pResult->GetBlock()->GetTime(pCluster))
+ return 0;
+
+ Cluster** const clusters = m_pSegment->m_clusters;
+ assert(clusters);
+
+ const long count = m_pSegment->GetCount(); // loaded only, not pre-loaded
+ assert(count > 0);
+
+ Cluster** const i = clusters + pCluster->GetIndex();
+ assert(i);
+ assert(*i == pCluster);
+ assert(pCluster->GetTime() <= time_ns);
+
+ Cluster** const j = clusters + count;
+
+ Cluster** lo = i;
+ Cluster** hi = j;
+
+ while (lo < hi) {
+ // INVARIANT:
+ //[i, lo) <= time_ns
+ //[lo, hi) ?
+ //[hi, j) > time_ns
+
+ Cluster** const mid = lo + (hi - lo) / 2;
+ assert(mid < hi);
+
+ pCluster = *mid;
assert(pCluster);
assert(pCluster->GetIndex() >= 0);
+ assert(pCluster->GetIndex() == long(mid - m_pSegment->m_clusters));
- if (time_ns <= pResult->GetBlock()->GetTime(pCluster))
- return 0;
+ const long long t = pCluster->GetTime();
- Cluster** const clusters = m_pSegment->m_clusters;
- assert(clusters);
+ if (t <= time_ns)
+ lo = mid + 1;
+ else
+ hi = mid;
- const long count = m_pSegment->GetCount(); //loaded only, not pre-loaded
- assert(count > 0);
+ assert(lo <= hi);
+ }
- Cluster** const i = clusters + pCluster->GetIndex();
- assert(i);
- assert(*i == pCluster);
- assert(pCluster->GetTime() <= time_ns);
+ assert(lo == hi);
+ assert(lo > i);
+ assert(lo <= j);
- Cluster** const j = clusters + count;
+ pCluster = *--lo;
+ assert(pCluster);
+ assert(pCluster->GetTime() <= time_ns);
- Cluster** lo = i;
- Cluster** hi = j;
+ pResult = pCluster->GetEntry(this, time_ns);
- while (lo < hi)
- {
- //INVARIANT:
- //[i, lo) <= time_ns
- //[lo, hi) ?
- //[hi, j) > time_ns
+ if ((pResult != 0) && !pResult->EOS()) // found a keyframe
+ return 0;
- Cluster** const mid = lo + (hi - lo) / 2;
- assert(mid < hi);
-
- pCluster = *mid;
- assert(pCluster);
- assert(pCluster->GetIndex() >= 0);
- assert(pCluster->GetIndex() == long(mid - m_pSegment->m_clusters));
-
- const long long t = pCluster->GetTime();
-
- if (t <= time_ns)
- lo = mid + 1;
- else
- hi = mid;
-
- assert(lo <= hi);
- }
-
- assert(lo == hi);
- assert(lo > i);
- assert(lo <= j);
-
+ while (lo != i) {
pCluster = *--lo;
assert(pCluster);
assert(pCluster->GetTime() <= time_ns);
- pResult = pCluster->GetEntry(this, time_ns);
-
- if ((pResult != 0) && !pResult->EOS()) //found a keyframe
- return 0;
-
- while (lo != i)
- {
- pCluster = *--lo;
- assert(pCluster);
- assert(pCluster->GetTime() <= time_ns);
-
#if 0
//TODO:
//We need to handle the case when a cluster
@@ -6318,651 +5506,501 @@
//good enough.
pResult = pCluster->GetMaxKey(this);
#else
- pResult = pCluster->GetEntry(this, time_ns);
+ pResult = pCluster->GetEntry(this, time_ns);
#endif
- if ((pResult != 0) && !pResult->EOS())
- return 0;
+ if ((pResult != 0) && !pResult->EOS())
+ return 0;
+ }
+
+ // weird: we're on the first cluster, but no keyframe found
+ // should never happen but we must return something anyway
+
+ pResult = GetEOS();
+ return 0;
+}
+
+long long VideoTrack::GetWidth() const { return m_width; }
+
+long long VideoTrack::GetHeight() const { return m_height; }
+
+double VideoTrack::GetFrameRate() const { return m_rate; }
+
+AudioTrack::AudioTrack(Segment* pSegment, long long element_start,
+ long long element_size)
+ : Track(pSegment, element_start, element_size) {}
+
+long AudioTrack::Parse(Segment* pSegment, const Info& info,
+ long long element_start, long long element_size,
+ AudioTrack*& pResult) {
+ if (pResult)
+ return -1;
+
+ if (info.type != Track::kAudio)
+ return -1;
+
+ IMkvReader* const pReader = pSegment->m_pReader;
+
+ const Settings& s = info.settings;
+ assert(s.start >= 0);
+ assert(s.size >= 0);
+
+ long long pos = s.start;
+ assert(pos >= 0);
+
+ const long long stop = pos + s.size;
+
+ double rate = 8000.0; // MKV default
+ long long channels = 1;
+ long long bit_depth = 0;
+
+ while (pos < stop) {
+ long long id, size;
+
+ long status = ParseElementHeader(pReader, pos, stop, id, size);
+
+ if (status < 0) // error
+ return status;
+
+ if (id == 0x35) { // Sample Rate
+ status = UnserializeFloat(pReader, pos, size, rate);
+
+ if (status < 0)
+ return status;
+
+ if (rate <= 0)
+ return E_FILE_FORMAT_INVALID;
+ } else if (id == 0x1F) { // Channel Count
+ channels = UnserializeUInt(pReader, pos, size);
+
+ if (channels <= 0)
+ return E_FILE_FORMAT_INVALID;
+ } else if (id == 0x2264) { // Bit Depth
+ bit_depth = UnserializeUInt(pReader, pos, size);
+
+ if (bit_depth <= 0)
+ return E_FILE_FORMAT_INVALID;
}
- //weird: we're on the first cluster, but no keyframe found
- //should never happen but we must return something anyway
+ pos += size; // consume payload
+ assert(pos <= stop);
+ }
- pResult = GetEOS();
- return 0;
+ assert(pos == stop);
+
+ AudioTrack* const pTrack =
+ new (std::nothrow) AudioTrack(pSegment, element_start, element_size);
+
+ if (pTrack == NULL)
+ return -1; // generic error
+
+ const int status = info.Copy(pTrack->m_info);
+
+ if (status) {
+ delete pTrack;
+ return status;
+ }
+
+ pTrack->m_rate = rate;
+ pTrack->m_channels = channels;
+ pTrack->m_bitDepth = bit_depth;
+
+ pResult = pTrack;
+ return 0; // success
}
+double AudioTrack::GetSamplingRate() const { return m_rate; }
-long long VideoTrack::GetWidth() const
-{
- return m_width;
-}
+long long AudioTrack::GetChannels() const { return m_channels; }
+long long AudioTrack::GetBitDepth() const { return m_bitDepth; }
-long long VideoTrack::GetHeight() const
-{
- return m_height;
-}
+Tracks::Tracks(Segment* pSegment, long long start, long long size_,
+ long long element_start, long long element_size)
+ : m_pSegment(pSegment),
+ m_start(start),
+ m_size(size_),
+ m_element_start(element_start),
+ m_element_size(element_size),
+ m_trackEntries(NULL),
+ m_trackEntriesEnd(NULL) {}
+long Tracks::Parse() {
+ assert(m_trackEntries == NULL);
+ assert(m_trackEntriesEnd == NULL);
-double VideoTrack::GetFrameRate() const
-{
- return m_rate;
-}
+ const long long stop = m_start + m_size;
+ IMkvReader* const pReader = m_pSegment->m_pReader;
+ int count = 0;
+ long long pos = m_start;
-AudioTrack::AudioTrack(
- Segment* pSegment,
- long long element_start,
- long long element_size) :
- Track(pSegment, element_start, element_size)
-{
-}
+ while (pos < stop) {
+ long long id, size;
+ const long status = ParseElementHeader(pReader, pos, stop, id, size);
-long AudioTrack::Parse(
- Segment* pSegment,
- const Info& info,
- long long element_start,
- long long element_size,
- AudioTrack*& pResult)
-{
- if (pResult)
- return -1;
+ if (status < 0) // error
+ return status;
- if (info.type != Track::kAudio)
- return -1;
+ if (size == 0) // weird
+ continue;
- IMkvReader* const pReader = pSegment->m_pReader;
+ if (id == 0x2E) // TrackEntry ID
+ ++count;
- const Settings& s = info.settings;
- assert(s.start >= 0);
- assert(s.size >= 0);
+ pos += size; // consume payload
+ assert(pos <= stop);
+ }
- long long pos = s.start;
- assert(pos >= 0);
+ assert(pos == stop);
- const long long stop = pos + s.size;
+ if (count <= 0)
+ return 0; // success
- double rate = 8000.0; // MKV default
- long long channels = 1;
- long long bit_depth = 0;
+ m_trackEntries = new (std::nothrow) Track* [count];
- while (pos < stop)
- {
- long long id, size;
+ if (m_trackEntries == NULL)
+ return -1;
- long status = ParseElementHeader(
- pReader,
- pos,
- stop,
- id,
- size);
+ m_trackEntriesEnd = m_trackEntries;
- if (status < 0) //error
- return status;
+ pos = m_start;
- if (id == 0x35) //Sample Rate
- {
- status = UnserializeFloat(pReader, pos, size, rate);
+ while (pos < stop) {
+ const long long element_start = pos;
- if (status < 0)
- return status;
+ long long id, payload_size;
- if (rate <= 0)
- return E_FILE_FORMAT_INVALID;
- }
- else if (id == 0x1F) //Channel Count
- {
- channels = UnserializeUInt(pReader, pos, size);
+ const long status =
+ ParseElementHeader(pReader, pos, stop, id, payload_size);
- if (channels <= 0)
- return E_FILE_FORMAT_INVALID;
- }
- else if (id == 0x2264) //Bit Depth
- {
- bit_depth = UnserializeUInt(pReader, pos, size);
+ if (status < 0) // error
+ return status;
- if (bit_depth <= 0)
- return E_FILE_FORMAT_INVALID;
- }
+ if (payload_size == 0) // weird
+ continue;
- pos += size; //consume payload
- assert(pos <= stop);
+ const long long payload_stop = pos + payload_size;
+ assert(payload_stop <= stop); // checked in ParseElement
+
+ const long long element_size = payload_stop - element_start;
+
+ if (id == 0x2E) { // TrackEntry ID
+ Track*& pTrack = *m_trackEntriesEnd;
+ pTrack = NULL;
+
+ const long status = ParseTrackEntry(pos, payload_size, element_start,
+ element_size, pTrack);
+
+ if (status)
+ return status;
+
+ if (pTrack)
+ ++m_trackEntriesEnd;
}
- assert(pos == stop);
+ pos = payload_stop;
+ assert(pos <= stop);
+ }
- AudioTrack* const pTrack = new (std::nothrow) AudioTrack(pSegment,
- element_start,
- element_size);
+ assert(pos == stop);
- if (pTrack == NULL)
- return -1; //generic error
+ return 0; // success
+}
- const int status = info.Copy(pTrack->m_info);
+unsigned long Tracks::GetTracksCount() const {
+ const ptrdiff_t result = m_trackEntriesEnd - m_trackEntries;
+ assert(result >= 0);
+
+ return static_cast<unsigned long>(result);
+}
+
+long Tracks::ParseTrackEntry(long long track_start, long long track_size,
+ long long element_start, long long element_size,
+ Track*& pResult) const {
+ if (pResult)
+ return -1;
+
+ IMkvReader* const pReader = m_pSegment->m_pReader;
+
+ long long pos = track_start;
+ const long long track_stop = track_start + track_size;
+
+ Track::Info info;
+
+ info.type = 0;
+ info.number = 0;
+ info.uid = 0;
+ info.defaultDuration = 0;
+
+ Track::Settings v;
+ v.start = -1;
+ v.size = -1;
+
+ Track::Settings a;
+ a.start = -1;
+ a.size = -1;
+
+ Track::Settings e; // content_encodings_settings;
+ e.start = -1;
+ e.size = -1;
+
+ long long lacing = 1; // default is true
+
+ while (pos < track_stop) {
+ long long id, size;
+
+ const long status = ParseElementHeader(pReader, pos, track_stop, id, size);
+
+ if (status < 0) // error
+ return status;
+
+ if (size < 0)
+ return E_FILE_FORMAT_INVALID;
+
+ const long long start = pos;
+
+ if (id == 0x60) { // VideoSettings ID
+ v.start = start;
+ v.size = size;
+ } else if (id == 0x61) { // AudioSettings ID
+ a.start = start;
+ a.size = size;
+ } else if (id == 0x2D80) { // ContentEncodings ID
+ e.start = start;
+ e.size = size;
+ } else if (id == 0x33C5) { // Track UID
+ if (size > 8)
+ return E_FILE_FORMAT_INVALID;
+
+ info.uid = 0;
+
+ long long pos_ = start;
+ const long long pos_end = start + size;
+
+ while (pos_ != pos_end) {
+ unsigned char b;
+
+ const int status = pReader->Read(pos_, 1, &b);
+
+ if (status)
+ return status;
+
+ info.uid <<= 8;
+ info.uid |= b;
+
+ ++pos_;
+ }
+ } else if (id == 0x57) { // Track Number
+ const long long num = UnserializeUInt(pReader, pos, size);
+
+ if ((num <= 0) || (num > 127))
+ return E_FILE_FORMAT_INVALID;
+
+ info.number = static_cast<long>(num);
+ } else if (id == 0x03) { // Track Type
+ const long long type = UnserializeUInt(pReader, pos, size);
+
+ if ((type <= 0) || (type > 254))
+ return E_FILE_FORMAT_INVALID;
+
+ info.type = static_cast<long>(type);
+ } else if (id == 0x136E) { // Track Name
+ const long status =
+ UnserializeString(pReader, pos, size, info.nameAsUTF8);
+
+ if (status)
+ return status;
+ } else if (id == 0x02B59C) { // Track Language
+ const long status = UnserializeString(pReader, pos, size, info.language);
+
+ if (status)
+ return status;
+ } else if (id == 0x03E383) { // Default Duration
+ const long long duration = UnserializeUInt(pReader, pos, size);
+
+ if (duration < 0)
+ return E_FILE_FORMAT_INVALID;
+
+ info.defaultDuration = static_cast<unsigned long long>(duration);
+ } else if (id == 0x06) { // CodecID
+ const long status = UnserializeString(pReader, pos, size, info.codecId);
+
+ if (status)
+ return status;
+ } else if (id == 0x1C) { // lacing
+ lacing = UnserializeUInt(pReader, pos, size);
+
+ if ((lacing < 0) || (lacing > 1))
+ return E_FILE_FORMAT_INVALID;
+ } else if (id == 0x23A2) { // Codec Private
+ delete[] info.codecPrivate;
+ info.codecPrivate = NULL;
+ info.codecPrivateSize = 0;
+
+ const size_t buflen = static_cast<size_t>(size);
+
+ if (buflen) {
+ typedef unsigned char* buf_t;
+
+ const buf_t buf = new (std::nothrow) unsigned char[buflen];
+
+ if (buf == NULL)
+ return -1;
+
+ const int status = pReader->Read(pos, static_cast<long>(buflen), buf);
+
+ if (status) {
+ delete[] buf;
+ return status;
+ }
+
+ info.codecPrivate = buf;
+ info.codecPrivateSize = buflen;
+ }
+ } else if (id == 0x058688) { // Codec Name
+ const long status =
+ UnserializeString(pReader, pos, size, info.codecNameAsUTF8);
+
+ if (status)
+ return status;
+ } else if (id == 0x16AA) { // Codec Delay
+ info.codecDelay = UnserializeUInt(pReader, pos, size);
+ } else if (id == 0x16BB) { // Seek Pre Roll
+ info.seekPreRoll = UnserializeUInt(pReader, pos, size);
+ }
+
+ pos += size; // consume payload
+ assert(pos <= track_stop);
+ }
+
+ assert(pos == track_stop);
+
+ if (info.number <= 0) // not specified
+ return E_FILE_FORMAT_INVALID;
+
+ if (GetTrackByNumber(info.number))
+ return E_FILE_FORMAT_INVALID;
+
+ if (info.type <= 0) // not specified
+ return E_FILE_FORMAT_INVALID;
+
+ info.lacing = (lacing > 0) ? true : false;
+
+ if (info.type == Track::kVideo) {
+ if (v.start < 0)
+ return E_FILE_FORMAT_INVALID;
+
+ if (a.start >= 0)
+ return E_FILE_FORMAT_INVALID;
+
+ info.settings = v;
+
+ VideoTrack* pTrack = NULL;
+
+ const long status = VideoTrack::Parse(m_pSegment, info, element_start,
+ element_size, pTrack);
if (status)
- {
- delete pTrack;
- return status;
- }
-
- pTrack->m_rate = rate;
- pTrack->m_channels = channels;
- pTrack->m_bitDepth = bit_depth;
+ return status;
pResult = pTrack;
- return 0; //success
+ assert(pResult);
+
+ if (e.start >= 0)
+ pResult->ParseContentEncodingsEntry(e.start, e.size);
+ } else if (info.type == Track::kAudio) {
+ if (a.start < 0)
+ return E_FILE_FORMAT_INVALID;
+
+ if (v.start >= 0)
+ return E_FILE_FORMAT_INVALID;
+
+ info.settings = a;
+
+ AudioTrack* pTrack = NULL;
+
+ const long status = AudioTrack::Parse(m_pSegment, info, element_start,
+ element_size, pTrack);
+
+ if (status)
+ return status;
+
+ pResult = pTrack;
+ assert(pResult);
+
+ if (e.start >= 0)
+ pResult->ParseContentEncodingsEntry(e.start, e.size);
+ } else {
+ // neither video nor audio - probably metadata or subtitles
+
+ if (a.start >= 0)
+ return E_FILE_FORMAT_INVALID;
+
+ if (v.start >= 0)
+ return E_FILE_FORMAT_INVALID;
+
+ if (e.start >= 0)
+ return E_FILE_FORMAT_INVALID;
+
+ info.settings.start = -1;
+ info.settings.size = 0;
+
+ Track* pTrack = NULL;
+
+ const long status =
+ Track::Create(m_pSegment, info, element_start, element_size, pTrack);
+
+ if (status)
+ return status;
+
+ pResult = pTrack;
+ assert(pResult);
+ }
+
+ return 0; // success
}
+Tracks::~Tracks() {
+ Track** i = m_trackEntries;
+ Track** const j = m_trackEntriesEnd;
-double AudioTrack::GetSamplingRate() const
-{
- return m_rate;
+ while (i != j) {
+ Track* const pTrack = *i++;
+ delete pTrack;
+ }
+
+ delete[] m_trackEntries;
}
+const Track* Tracks::GetTrackByNumber(long tn) const {
+ if (tn < 0)
+ return NULL;
-long long AudioTrack::GetChannels() const
-{
- return m_channels;
+ Track** i = m_trackEntries;
+ Track** const j = m_trackEntriesEnd;
+
+ while (i != j) {
+ Track* const pTrack = *i++;
+
+ if (pTrack == NULL)
+ continue;
+
+ if (tn == pTrack->GetNumber())
+ return pTrack;
+ }
+
+ return NULL; // not found
}
-long long AudioTrack::GetBitDepth() const
-{
- return m_bitDepth;
-}
+const Track* Tracks::GetTrackByIndex(unsigned long idx) const {
+ const ptrdiff_t count = m_trackEntriesEnd - m_trackEntries;
-Tracks::Tracks(
- Segment* pSegment,
- long long start,
- long long size_,
- long long element_start,
- long long element_size) :
- m_pSegment(pSegment),
- m_start(start),
- m_size(size_),
- m_element_start(element_start),
- m_element_size(element_size),
- m_trackEntries(NULL),
- m_trackEntriesEnd(NULL)
-{
-}
+ if (idx >= static_cast<unsigned long>(count))
+ return NULL;
-
-long Tracks::Parse()
-{
- assert(m_trackEntries == NULL);
- assert(m_trackEntriesEnd == NULL);
-
- const long long stop = m_start + m_size;
- IMkvReader* const pReader = m_pSegment->m_pReader;
-
- int count = 0;
- long long pos = m_start;
-
- while (pos < stop)
- {
- long long id, size;
-
- const long status = ParseElementHeader(
- pReader,
- pos,
- stop,
- id,
- size);
-
- if (status < 0) //error
- return status;
-
- if (size == 0) //weird
- continue;
-
- if (id == 0x2E) //TrackEntry ID
- ++count;
-
- pos += size; //consume payload
- assert(pos <= stop);
- }
-
- assert(pos == stop);
-
- if (count <= 0)
- return 0; //success
-
- m_trackEntries = new (std::nothrow) Track*[count];
-
- if (m_trackEntries == NULL)
- return -1;
-
- m_trackEntriesEnd = m_trackEntries;
-
- pos = m_start;
-
- while (pos < stop)
- {
- const long long element_start = pos;
-
- long long id, payload_size;
-
- const long status = ParseElementHeader(
- pReader,
- pos,
- stop,
- id,
- payload_size);
-
- if (status < 0) //error
- return status;
-
- if (payload_size == 0) //weird
- continue;
-
- const long long payload_stop = pos + payload_size;
- assert(payload_stop <= stop); //checked in ParseElement
-
- const long long element_size = payload_stop - element_start;
-
- if (id == 0x2E) //TrackEntry ID
- {
- Track*& pTrack = *m_trackEntriesEnd;
- pTrack = NULL;
-
- const long status = ParseTrackEntry(
- pos,
- payload_size,
- element_start,
- element_size,
- pTrack);
-
- if (status)
- return status;
-
- if (pTrack)
- ++m_trackEntriesEnd;
- }
-
- pos = payload_stop;
- assert(pos <= stop);
- }
-
- assert(pos == stop);
-
- return 0; //success
-}
-
-
-unsigned long Tracks::GetTracksCount() const
-{
- const ptrdiff_t result = m_trackEntriesEnd - m_trackEntries;
- assert(result >= 0);
-
- return static_cast<unsigned long>(result);
-}
-
-long Tracks::ParseTrackEntry(
- long long track_start,
- long long track_size,
- long long element_start,
- long long element_size,
- Track*& pResult) const
-{
- if (pResult)
- return -1;
-
- IMkvReader* const pReader = m_pSegment->m_pReader;
-
- long long pos = track_start;
- const long long track_stop = track_start + track_size;
-
- Track::Info info;
-
- info.type = 0;
- info.number = 0;
- info.uid = 0;
- info.defaultDuration = 0;
-
- Track::Settings v;
- v.start = -1;
- v.size = -1;
-
- Track::Settings a;
- a.start = -1;
- a.size = -1;
-
- Track::Settings e; //content_encodings_settings;
- e.start = -1;
- e.size = -1;
-
- long long lacing = 1; //default is true
-
- while (pos < track_stop)
- {
- long long id, size;
-
- const long status = ParseElementHeader(
- pReader,
- pos,
- track_stop,
- id,
- size);
-
- if (status < 0) //error
- return status;
-
- if (size < 0)
- return E_FILE_FORMAT_INVALID;
-
- const long long start = pos;
-
- if (id == 0x60) // VideoSettings ID
- {
- v.start = start;
- v.size = size;
- }
- else if (id == 0x61) // AudioSettings ID
- {
- a.start = start;
- a.size = size;
- }
- else if (id == 0x2D80) // ContentEncodings ID
- {
- e.start = start;
- e.size = size;
- }
- else if (id == 0x33C5) //Track UID
- {
- if (size > 8)
- return E_FILE_FORMAT_INVALID;
-
- info.uid = 0;
-
- long long pos_ = start;
- const long long pos_end = start + size;
-
- while (pos_ != pos_end)
- {
- unsigned char b;
-
- const int status = pReader->Read(pos_, 1, &b);
-
- if (status)
- return status;
-
- info.uid <<= 8;
- info.uid |= b;
-
- ++pos_;
- }
- }
- else if (id == 0x57) //Track Number
- {
- const long long num = UnserializeUInt(pReader, pos, size);
-
- if ((num <= 0) || (num > 127))
- return E_FILE_FORMAT_INVALID;
-
- info.number = static_cast<long>(num);
- }
- else if (id == 0x03) //Track Type
- {
- const long long type = UnserializeUInt(pReader, pos, size);
-
- if ((type <= 0) || (type > 254))
- return E_FILE_FORMAT_INVALID;
-
- info.type = static_cast<long>(type);
- }
- else if (id == 0x136E) //Track Name
- {
- const long status = UnserializeString(
- pReader,
- pos,
- size,
- info.nameAsUTF8);
-
- if (status)
- return status;
- }
- else if (id == 0x02B59C) //Track Language
- {
- const long status = UnserializeString(
- pReader,
- pos,
- size,
- info.language);
-
- if (status)
- return status;
- }
- else if (id == 0x03E383) //Default Duration
- {
- const long long duration = UnserializeUInt(pReader, pos, size);
-
- if (duration < 0)
- return E_FILE_FORMAT_INVALID;
-
- info.defaultDuration = static_cast<unsigned long long>(duration);
- }
- else if (id == 0x06) //CodecID
- {
- const long status = UnserializeString(
- pReader,
- pos,
- size,
- info.codecId);
-
- if (status)
- return status;
- }
- else if (id == 0x1C) //lacing
- {
- lacing = UnserializeUInt(pReader, pos, size);
-
- if ((lacing < 0) || (lacing > 1))
- return E_FILE_FORMAT_INVALID;
- }
- else if (id == 0x23A2) //Codec Private
- {
- delete[] info.codecPrivate;
- info.codecPrivate = NULL;
- info.codecPrivateSize = 0;
-
- const size_t buflen = static_cast<size_t>(size);
-
- if (buflen)
- {
- typedef unsigned char* buf_t;
-
- const buf_t buf = new (std::nothrow) unsigned char[buflen];
-
- if (buf == NULL)
- return -1;
-
- const int status = pReader->Read(pos, buflen, buf);
-
- if (status)
- {
- delete[] buf;
- return status;
- }
-
- info.codecPrivate = buf;
- info.codecPrivateSize = buflen;
- }
- }
- else if (id == 0x058688) //Codec Name
- {
- const long status = UnserializeString(
- pReader,
- pos,
- size,
- info.codecNameAsUTF8);
-
- if (status)
- return status;
- }
- else if (id == 0x16AA) //Codec Delay
- {
- info.codecDelay = UnserializeUInt(pReader, pos, size);
-
- }
- else if (id == 0x16BB) //Seek Pre Roll
- {
- info.seekPreRoll = UnserializeUInt(pReader, pos, size);
- }
-
- pos += size; //consume payload
- assert(pos <= track_stop);
- }
-
- assert(pos == track_stop);
-
- if (info.number <= 0) //not specified
- return E_FILE_FORMAT_INVALID;
-
- if (GetTrackByNumber(info.number))
- return E_FILE_FORMAT_INVALID;
-
- if (info.type <= 0) //not specified
- return E_FILE_FORMAT_INVALID;
-
- info.lacing = (lacing > 0) ? true : false;
-
- if (info.type == Track::kVideo)
- {
- if (v.start < 0)
- return E_FILE_FORMAT_INVALID;
-
- if (a.start >= 0)
- return E_FILE_FORMAT_INVALID;
-
- info.settings = v;
-
- VideoTrack* pTrack = NULL;
-
- const long status = VideoTrack::Parse(m_pSegment,
- info,
- element_start,
- element_size,
- pTrack);
-
- if (status)
- return status;
-
- pResult = pTrack;
- assert(pResult);
-
- if (e.start >= 0)
- pResult->ParseContentEncodingsEntry(e.start, e.size);
- }
- else if (info.type == Track::kAudio)
- {
- if (a.start < 0)
- return E_FILE_FORMAT_INVALID;
-
- if (v.start >= 0)
- return E_FILE_FORMAT_INVALID;
-
- info.settings = a;
-
- AudioTrack* pTrack = NULL;
-
- const long status = AudioTrack::Parse(m_pSegment,
- info,
- element_start,
- element_size,
- pTrack);
-
- if (status)
- return status;
-
- pResult = pTrack;
- assert(pResult);
-
- if (e.start >= 0)
- pResult->ParseContentEncodingsEntry(e.start, e.size);
- }
- else
- {
- // neither video nor audio - probably metadata or subtitles
-
- if (a.start >= 0)
- return E_FILE_FORMAT_INVALID;
-
- if (v.start >= 0)
- return E_FILE_FORMAT_INVALID;
-
- if (e.start >= 0)
- return E_FILE_FORMAT_INVALID;
-
- info.settings.start = -1;
- info.settings.size = 0;
-
- Track* pTrack = NULL;
-
- const long status = Track::Create(m_pSegment,
- info,
- element_start,
- element_size,
- pTrack);
-
- if (status)
- return status;
-
- pResult = pTrack;
- assert(pResult);
- }
-
- return 0; //success
-}
-
-
-Tracks::~Tracks()
-{
- Track** i = m_trackEntries;
- Track** const j = m_trackEntriesEnd;
-
- while (i != j)
- {
- Track* const pTrack = *i++;
- delete pTrack;
- }
-
- delete[] m_trackEntries;
-}
-
-const Track* Tracks::GetTrackByNumber(long tn) const
-{
- if (tn < 0)
- return NULL;
-
- Track** i = m_trackEntries;
- Track** const j = m_trackEntriesEnd;
-
- while (i != j)
- {
- Track* const pTrack = *i++;
-
- if (pTrack == NULL)
- continue;
-
- if (tn == pTrack->GetNumber())
- return pTrack;
- }
-
- return NULL; //not found
-}
-
-
-const Track* Tracks::GetTrackByIndex(unsigned long idx) const
-{
- const ptrdiff_t count = m_trackEntriesEnd - m_trackEntries;
-
- if (idx >= static_cast<unsigned long>(count))
- return NULL;
-
- return m_trackEntries[idx];
+ return m_trackEntries[idx];
}
#if 0
@@ -6984,104 +6022,100 @@
}
#endif
+long Cluster::Load(long long& pos, long& len) const {
+ assert(m_pSegment);
+ assert(m_pos >= m_element_start);
-long Cluster::Load(long long& pos, long& len) const
-{
- assert(m_pSegment);
- assert(m_pos >= m_element_start);
+ if (m_timecode >= 0) // at least partially loaded
+ return 0;
- if (m_timecode >= 0) //at least partially loaded
- return 0;
+ assert(m_pos == m_element_start);
+ assert(m_element_size < 0);
- assert(m_pos == m_element_start);
- assert(m_element_size < 0);
+ IMkvReader* const pReader = m_pSegment->m_pReader;
- IMkvReader* const pReader = m_pSegment->m_pReader;
+ long long total, avail;
- long long total, avail;
+ const int status = pReader->Length(&total, &avail);
- const int status = pReader->Length(&total, &avail);
+ if (status < 0) // error
+ return status;
- if (status < 0) //error
- return status;
+ assert((total < 0) || (avail <= total));
+ assert((total < 0) || (m_pos <= total)); // TODO: verify this
- assert((total < 0) || (avail <= total));
- assert((total < 0) || (m_pos <= total)); //TODO: verify this
+ pos = m_pos;
- pos = m_pos;
+ long long cluster_size = -1;
- long long cluster_size = -1;
-
- {
- if ((pos + 1) > avail)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- long long result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) //error or underflow
- return static_cast<long>(result);
-
- if (result > 0) //underflow (weird)
- return E_BUFFER_NOT_FULL;
-
- //if ((pos + len) > segment_stop)
- // return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long id_ = ReadUInt(pReader, pos, len);
-
- if (id_ < 0) //error
- return static_cast<long>(id_);
-
- if (id_ != 0x0F43B675) //Cluster ID
- return E_FILE_FORMAT_INVALID;
-
- pos += len; //consume id
-
- //read cluster size
-
- if ((pos + 1) > avail)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) //error
- return static_cast<long>(result);
-
- if (result > 0) //weird
- return E_BUFFER_NOT_FULL;
-
- //if ((pos + len) > segment_stop)
- // return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long size = ReadUInt(pReader, pos, len);
-
- if (size < 0) //error
- return static_cast<long>(cluster_size);
-
- if (size == 0)
- return E_FILE_FORMAT_INVALID; //TODO: verify this
-
- pos += len; //consume length of size of element
-
- const long long unknown_size = (1LL << (7 * len)) - 1;
-
- if (size != unknown_size)
- cluster_size = size;
+ {
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
}
- //pos points to start of payload
+ long long result = GetUIntLength(pReader, pos, len);
+
+ if (result < 0) // error or underflow
+ return static_cast<long>(result);
+
+ if (result > 0) // underflow (weird)
+ return E_BUFFER_NOT_FULL;
+
+ // if ((pos + len) > segment_stop)
+ // return E_FILE_FORMAT_INVALID;
+
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
+
+ const long long id_ = ReadUInt(pReader, pos, len);
+
+ if (id_ < 0) // error
+ return static_cast<long>(id_);
+
+ if (id_ != 0x0F43B675) // Cluster ID
+ return E_FILE_FORMAT_INVALID;
+
+ pos += len; // consume id
+
+ // read cluster size
+
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
+ }
+
+ result = GetUIntLength(pReader, pos, len);
+
+ if (result < 0) // error
+ return static_cast<long>(result);
+
+ if (result > 0) // weird
+ return E_BUFFER_NOT_FULL;
+
+ // if ((pos + len) > segment_stop)
+ // return E_FILE_FORMAT_INVALID;
+
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
+
+ const long long size = ReadUInt(pReader, pos, len);
+
+ if (size < 0) // error
+ return static_cast<long>(cluster_size);
+
+ if (size == 0)
+ return E_FILE_FORMAT_INVALID; // TODO: verify this
+
+ pos += len; // consume length of size of element
+
+ const long long unknown_size = (1LL << (7 * len)) - 1;
+
+ if (size != unknown_size)
+ cluster_size = size;
+ }
+
+// pos points to start of payload
#if 0
len = static_cast<long>(size_);
@@ -7090,403 +6124,376 @@
return E_BUFFER_NOT_FULL;
#endif
- long long timecode = -1;
- long long new_pos = -1;
- bool bBlock = false;
+ long long timecode = -1;
+ long long new_pos = -1;
+ bool bBlock = false;
- long long cluster_stop = (cluster_size < 0) ? -1 : pos + cluster_size;
+ long long cluster_stop = (cluster_size < 0) ? -1 : pos + cluster_size;
- for (;;)
- {
- if ((cluster_stop >= 0) && (pos >= cluster_stop))
- break;
+ for (;;) {
+ if ((cluster_stop >= 0) && (pos >= cluster_stop))
+ break;
- //Parse ID
+ // Parse ID
- if ((pos + 1) > avail)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- long long result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) //error
- return static_cast<long>(result);
-
- if (result > 0) //weird
- return E_BUFFER_NOT_FULL;
-
- if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long id = ReadUInt(pReader, pos, len);
-
- if (id < 0) //error
- return static_cast<long>(id);
-
- if (id == 0)
- return E_FILE_FORMAT_INVALID;
-
- //This is the distinguished set of ID's we use to determine
- //that we have exhausted the sub-element's inside the cluster
- //whose ID we parsed earlier.
-
- if (id == 0x0F43B675) //Cluster ID
- break;
-
- if (id == 0x0C53BB6B) //Cues ID
- break;
-
- pos += len; //consume ID field
-
- //Parse Size
-
- if ((pos + 1) > avail)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) //error
- return static_cast<long>(result);
-
- if (result > 0) //weird
- return E_BUFFER_NOT_FULL;
-
- if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long size = ReadUInt(pReader, pos, len);
-
- if (size < 0) //error
- return static_cast<long>(size);
-
- const long long unknown_size = (1LL << (7 * len)) - 1;
-
- if (size == unknown_size)
- return E_FILE_FORMAT_INVALID;
-
- pos += len; //consume size field
-
- if ((cluster_stop >= 0) && (pos > cluster_stop))
- return E_FILE_FORMAT_INVALID;
-
- //pos now points to start of payload
-
- if (size == 0) //weird
- continue;
-
- if ((cluster_stop >= 0) && ((pos + size) > cluster_stop))
- return E_FILE_FORMAT_INVALID;
-
- if (id == 0x67) //TimeCode ID
- {
- len = static_cast<long>(size);
-
- if ((pos + size) > avail)
- return E_BUFFER_NOT_FULL;
-
- timecode = UnserializeUInt(pReader, pos, size);
-
- if (timecode < 0) //error (or underflow)
- return static_cast<long>(timecode);
-
- new_pos = pos + size;
-
- if (bBlock)
- break;
- }
- else if (id == 0x20) //BlockGroup ID
- {
- bBlock = true;
- break;
- }
- else if (id == 0x23) //SimpleBlock ID
- {
- bBlock = true;
- break;
- }
-
- pos += size; //consume payload
- assert((cluster_stop < 0) || (pos <= cluster_stop));
- }
-
- assert((cluster_stop < 0) || (pos <= cluster_stop));
-
- if (timecode < 0) //no timecode found
- return E_FILE_FORMAT_INVALID;
-
- if (!bBlock)
- return E_FILE_FORMAT_INVALID;
-
- m_pos = new_pos; //designates position just beyond timecode payload
- m_timecode = timecode; // m_timecode >= 0 means we're partially loaded
-
- if (cluster_size >= 0)
- m_element_size = cluster_stop - m_element_start;
-
- return 0;
-}
-
-
-long Cluster::Parse(long long& pos, long& len) const
-{
- long status = Load(pos, len);
-
- if (status < 0)
- return status;
-
- assert(m_pos >= m_element_start);
- assert(m_timecode >= 0);
- //assert(m_size > 0);
- //assert(m_element_size > m_size);
-
- const long long cluster_stop =
- (m_element_size < 0) ? -1 : m_element_start + m_element_size;
-
- if ((cluster_stop >= 0) && (m_pos >= cluster_stop))
- return 1; //nothing else to do
-
- IMkvReader* const pReader = m_pSegment->m_pReader;
-
- long long total, avail;
-
- status = pReader->Length(&total, &avail);
-
- if (status < 0) //error
- return status;
-
- assert((total < 0) || (avail <= total));
-
- pos = m_pos;
-
- for (;;)
- {
- if ((cluster_stop >= 0) && (pos >= cluster_stop))
- break;
-
- if ((total >= 0) && (pos >= total))
- {
- if (m_element_size < 0)
- m_element_size = pos - m_element_start;
-
- break;
- }
-
- //Parse ID
-
- if ((pos + 1) > avail)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- long long result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) //error
- return static_cast<long>(result);
-
- if (result > 0) //weird
- return E_BUFFER_NOT_FULL;
-
- if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long id = ReadUInt(pReader, pos, len);
-
- if (id < 0) //error
- return static_cast<long>(id);
-
- if (id == 0) //weird
- return E_FILE_FORMAT_INVALID;
-
- //This is the distinguished set of ID's we use to determine
- //that we have exhausted the sub-element's inside the cluster
- //whose ID we parsed earlier.
-
- if ((id == 0x0F43B675) || (id == 0x0C53BB6B)) //Cluster or Cues ID
- {
- if (m_element_size < 0)
- m_element_size = pos - m_element_start;
-
- break;
- }
-
- pos += len; //consume ID field
-
- //Parse Size
-
- if ((pos + 1) > avail)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) //error
- return static_cast<long>(result);
-
- if (result > 0) //weird
- return E_BUFFER_NOT_FULL;
-
- if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long size = ReadUInt(pReader, pos, len);
-
- if (size < 0) //error
- return static_cast<long>(size);
-
- const long long unknown_size = (1LL << (7 * len)) - 1;
-
- if (size == unknown_size)
- return E_FILE_FORMAT_INVALID;
-
- pos += len; //consume size field
-
- if ((cluster_stop >= 0) && (pos > cluster_stop))
- return E_FILE_FORMAT_INVALID;
-
- //pos now points to start of payload
-
- if (size == 0) //weird
- continue;
-
- //const long long block_start = pos;
- const long long block_stop = pos + size;
-
- if (cluster_stop >= 0)
- {
- if (block_stop > cluster_stop)
- {
- if ((id == 0x20) || (id == 0x23))
- return E_FILE_FORMAT_INVALID;
-
- pos = cluster_stop;
- break;
- }
- }
- else if ((total >= 0) && (block_stop > total))
- {
- m_element_size = total - m_element_start;
- pos = total;
- break;
- }
- else if (block_stop > avail)
- {
- len = static_cast<long>(size);
- return E_BUFFER_NOT_FULL;
- }
-
- Cluster* const this_ = const_cast<Cluster*>(this);
-
- if (id == 0x20) //BlockGroup
- return this_->ParseBlockGroup(size, pos, len);
-
- if (id == 0x23) //SimpleBlock
- return this_->ParseSimpleBlock(size, pos, len);
-
- pos += size; //consume payload
- assert((cluster_stop < 0) || (pos <= cluster_stop));
- }
-
- assert(m_element_size > 0);
-
- m_pos = pos;
- assert((cluster_stop < 0) || (m_pos <= cluster_stop));
-
- if (m_entries_count > 0)
- {
- const long idx = m_entries_count - 1;
-
- const BlockEntry* const pLast = m_entries[idx];
- assert(pLast);
-
- const Block* const pBlock = pLast->GetBlock();
- assert(pBlock);
-
- const long long start = pBlock->m_start;
-
- if ((total >= 0) && (start > total))
- return -1; //defend against trucated stream
-
- const long long size = pBlock->m_size;
-
- const long long stop = start + size;
- assert((cluster_stop < 0) || (stop <= cluster_stop));
-
- if ((total >= 0) && (stop > total))
- return -1; //defend against trucated stream
- }
-
- return 1; //no more entries
-}
-
-
-long Cluster::ParseSimpleBlock(
- long long block_size,
- long long& pos,
- long& len)
-{
- const long long block_start = pos;
- const long long block_stop = pos + block_size;
-
- IMkvReader* const pReader = m_pSegment->m_pReader;
-
- long long total, avail;
-
- long status = pReader->Length(&total, &avail);
-
- if (status < 0) //error
- return status;
-
- assert((total < 0) || (avail <= total));
-
- //parse track number
-
- if ((pos + 1) > avail)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
}
long long result = GetUIntLength(pReader, pos, len);
- if (result < 0) //error
- return static_cast<long>(result);
+ if (result < 0) // error
+ return static_cast<long>(result);
- if (result > 0) //weird
- return E_BUFFER_NOT_FULL;
+ if (result > 0) // weird
+ return E_BUFFER_NOT_FULL;
- if ((pos + len) > block_stop)
- return E_FILE_FORMAT_INVALID;
+ if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
+ return E_FILE_FORMAT_INVALID;
if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
+
+ const long long id = ReadUInt(pReader, pos, len);
+
+ if (id < 0) // error
+ return static_cast<long>(id);
+
+ if (id == 0)
+ return E_FILE_FORMAT_INVALID;
+
+ // This is the distinguished set of ID's we use to determine
+ // that we have exhausted the sub-element's inside the cluster
+ // whose ID we parsed earlier.
+
+ if (id == 0x0F43B675) // Cluster ID
+ break;
+
+ if (id == 0x0C53BB6B) // Cues ID
+ break;
+
+ pos += len; // consume ID field
+
+ // Parse Size
+
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
+ }
+
+ result = GetUIntLength(pReader, pos, len);
+
+ if (result < 0) // error
+ return static_cast<long>(result);
+
+ if (result > 0) // weird
+ return E_BUFFER_NOT_FULL;
+
+ if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
+ return E_FILE_FORMAT_INVALID;
+
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
+
+ const long long size = ReadUInt(pReader, pos, len);
+
+ if (size < 0) // error
+ return static_cast<long>(size);
+
+ const long long unknown_size = (1LL << (7 * len)) - 1;
+
+ if (size == unknown_size)
+ return E_FILE_FORMAT_INVALID;
+
+ pos += len; // consume size field
+
+ if ((cluster_stop >= 0) && (pos > cluster_stop))
+ return E_FILE_FORMAT_INVALID;
+
+ // pos now points to start of payload
+
+ if (size == 0) // weird
+ continue;
+
+ if ((cluster_stop >= 0) && ((pos + size) > cluster_stop))
+ return E_FILE_FORMAT_INVALID;
+
+ if (id == 0x67) { // TimeCode ID
+ len = static_cast<long>(size);
+
+ if ((pos + size) > avail)
return E_BUFFER_NOT_FULL;
- const long long track = ReadUInt(pReader, pos, len);
+ timecode = UnserializeUInt(pReader, pos, size);
- if (track < 0) //error
- return static_cast<long>(track);
+ if (timecode < 0) // error (or underflow)
+ return static_cast<long>(timecode);
- if (track == 0)
- return E_FILE_FORMAT_INVALID;
+ new_pos = pos + size;
+
+ if (bBlock)
+ break;
+ } else if (id == 0x20) { // BlockGroup ID
+ bBlock = true;
+ break;
+ } else if (id == 0x23) { // SimpleBlock ID
+ bBlock = true;
+ break;
+ }
+
+ pos += size; // consume payload
+ assert((cluster_stop < 0) || (pos <= cluster_stop));
+ }
+
+ assert((cluster_stop < 0) || (pos <= cluster_stop));
+
+ if (timecode < 0) // no timecode found
+ return E_FILE_FORMAT_INVALID;
+
+ if (!bBlock)
+ return E_FILE_FORMAT_INVALID;
+
+ m_pos = new_pos; // designates position just beyond timecode payload
+ m_timecode = timecode; // m_timecode >= 0 means we're partially loaded
+
+ if (cluster_size >= 0)
+ m_element_size = cluster_stop - m_element_start;
+
+ return 0;
+}
+
+long Cluster::Parse(long long& pos, long& len) const {
+ long status = Load(pos, len);
+
+ if (status < 0)
+ return status;
+
+ assert(m_pos >= m_element_start);
+ assert(m_timecode >= 0);
+ // assert(m_size > 0);
+ // assert(m_element_size > m_size);
+
+ const long long cluster_stop =
+ (m_element_size < 0) ? -1 : m_element_start + m_element_size;
+
+ if ((cluster_stop >= 0) && (m_pos >= cluster_stop))
+ return 1; // nothing else to do
+
+ IMkvReader* const pReader = m_pSegment->m_pReader;
+
+ long long total, avail;
+
+ status = pReader->Length(&total, &avail);
+
+ if (status < 0) // error
+ return status;
+
+ assert((total < 0) || (avail <= total));
+
+ pos = m_pos;
+
+ for (;;) {
+ if ((cluster_stop >= 0) && (pos >= cluster_stop))
+ break;
+
+ if ((total >= 0) && (pos >= total)) {
+ if (m_element_size < 0)
+ m_element_size = pos - m_element_start;
+
+ break;
+ }
+
+ // Parse ID
+
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
+ }
+
+ long long result = GetUIntLength(pReader, pos, len);
+
+ if (result < 0) // error
+ return static_cast<long>(result);
+
+ if (result > 0) // weird
+ return E_BUFFER_NOT_FULL;
+
+ if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
+ return E_FILE_FORMAT_INVALID;
+
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
+
+ const long long id = ReadUInt(pReader, pos, len);
+
+ if (id < 0) // error
+ return static_cast<long>(id);
+
+ if (id == 0) // weird
+ return E_FILE_FORMAT_INVALID;
+
+ // This is the distinguished set of ID's we use to determine
+ // that we have exhausted the sub-element's inside the cluster
+ // whose ID we parsed earlier.
+
+ if ((id == 0x0F43B675) || (id == 0x0C53BB6B)) { // Cluster or Cues ID
+ if (m_element_size < 0)
+ m_element_size = pos - m_element_start;
+
+ break;
+ }
+
+ pos += len; // consume ID field
+
+ // Parse Size
+
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
+ }
+
+ result = GetUIntLength(pReader, pos, len);
+
+ if (result < 0) // error
+ return static_cast<long>(result);
+
+ if (result > 0) // weird
+ return E_BUFFER_NOT_FULL;
+
+ if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
+ return E_FILE_FORMAT_INVALID;
+
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
+
+ const long long size = ReadUInt(pReader, pos, len);
+
+ if (size < 0) // error
+ return static_cast<long>(size);
+
+ const long long unknown_size = (1LL << (7 * len)) - 1;
+
+ if (size == unknown_size)
+ return E_FILE_FORMAT_INVALID;
+
+ pos += len; // consume size field
+
+ if ((cluster_stop >= 0) && (pos > cluster_stop))
+ return E_FILE_FORMAT_INVALID;
+
+ // pos now points to start of payload
+
+ if (size == 0) // weird
+ continue;
+
+ // const long long block_start = pos;
+ const long long block_stop = pos + size;
+
+ if (cluster_stop >= 0) {
+ if (block_stop > cluster_stop) {
+ if ((id == 0x20) || (id == 0x23))
+ return E_FILE_FORMAT_INVALID;
+
+ pos = cluster_stop;
+ break;
+ }
+ } else if ((total >= 0) && (block_stop > total)) {
+ m_element_size = total - m_element_start;
+ pos = total;
+ break;
+ } else if (block_stop > avail) {
+ len = static_cast<long>(size);
+ return E_BUFFER_NOT_FULL;
+ }
+
+ Cluster* const this_ = const_cast<Cluster*>(this);
+
+ if (id == 0x20) // BlockGroup
+ return this_->ParseBlockGroup(size, pos, len);
+
+ if (id == 0x23) // SimpleBlock
+ return this_->ParseSimpleBlock(size, pos, len);
+
+ pos += size; // consume payload
+ assert((cluster_stop < 0) || (pos <= cluster_stop));
+ }
+
+ assert(m_element_size > 0);
+
+ m_pos = pos;
+ assert((cluster_stop < 0) || (m_pos <= cluster_stop));
+
+ if (m_entries_count > 0) {
+ const long idx = m_entries_count - 1;
+
+ const BlockEntry* const pLast = m_entries[idx];
+ assert(pLast);
+
+ const Block* const pBlock = pLast->GetBlock();
+ assert(pBlock);
+
+ const long long start = pBlock->m_start;
+
+ if ((total >= 0) && (start > total))
+ return -1; // defend against trucated stream
+
+ const long long size = pBlock->m_size;
+
+ const long long stop = start + size;
+ assert((cluster_stop < 0) || (stop <= cluster_stop));
+
+ if ((total >= 0) && (stop > total))
+ return -1; // defend against trucated stream
+ }
+
+ return 1; // no more entries
+}
+
+long Cluster::ParseSimpleBlock(long long block_size, long long& pos,
+ long& len) {
+ const long long block_start = pos;
+ const long long block_stop = pos + block_size;
+
+ IMkvReader* const pReader = m_pSegment->m_pReader;
+
+ long long total, avail;
+
+ long status = pReader->Length(&total, &avail);
+
+ if (status < 0) // error
+ return status;
+
+ assert((total < 0) || (avail <= total));
+
+ // parse track number
+
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
+ }
+
+ long long result = GetUIntLength(pReader, pos, len);
+
+ if (result < 0) // error
+ return static_cast<long>(result);
+
+ if (result > 0) // weird
+ return E_BUFFER_NOT_FULL;
+
+ if ((pos + len) > block_stop)
+ return E_FILE_FORMAT_INVALID;
+
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
+
+ const long long track = ReadUInt(pReader, pos, len);
+
+ if (track < 0) // error
+ return static_cast<long>(track);
+
+ if (track == 0)
+ return E_FILE_FORMAT_INVALID;
#if 0
//TODO(matthewjheaney)
@@ -7518,228 +6525,213 @@
return E_FILE_FORMAT_INVALID;
#endif
- pos += len; //consume track number
+ pos += len; // consume track number
- if ((pos + 2) > block_stop)
- return E_FILE_FORMAT_INVALID;
+ if ((pos + 2) > block_stop)
+ return E_FILE_FORMAT_INVALID;
- if ((pos + 2) > avail)
- {
- len = 2;
- return E_BUFFER_NOT_FULL;
- }
+ if ((pos + 2) > avail) {
+ len = 2;
+ return E_BUFFER_NOT_FULL;
+ }
- pos += 2; //consume timecode
+ pos += 2; // consume timecode
- if ((pos + 1) > block_stop)
- return E_FILE_FORMAT_INVALID;
+ if ((pos + 1) > block_stop)
+ return E_FILE_FORMAT_INVALID;
- if ((pos + 1) > avail)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
+ }
- unsigned char flags;
+ unsigned char flags;
- status = pReader->Read(pos, 1, &flags);
+ status = pReader->Read(pos, 1, &flags);
- if (status < 0) //error or underflow
- {
- len = 1;
- return status;
- }
+ if (status < 0) { // error or underflow
+ len = 1;
+ return status;
+ }
- ++pos; //consume flags byte
- assert(pos <= avail);
+ ++pos; // consume flags byte
+ assert(pos <= avail);
- if (pos >= block_stop)
- return E_FILE_FORMAT_INVALID;
+ if (pos >= block_stop)
+ return E_FILE_FORMAT_INVALID;
- const int lacing = int(flags & 0x06) >> 1;
+ const int lacing = int(flags & 0x06) >> 1;
- if ((lacing != 0) && (block_stop > avail))
- {
- len = static_cast<long>(block_stop - pos);
- return E_BUFFER_NOT_FULL;
- }
+ if ((lacing != 0) && (block_stop > avail)) {
+ len = static_cast<long>(block_stop - pos);
+ return E_BUFFER_NOT_FULL;
+ }
- status = CreateBlock(0x23, //simple block id
- block_start, block_size,
- 0); //DiscardPadding
+ status = CreateBlock(0x23, // simple block id
+ block_start, block_size,
+ 0); // DiscardPadding
- if (status != 0)
- return status;
+ if (status != 0)
+ return status;
- m_pos = block_stop;
+ m_pos = block_stop;
- return 0; //success
+ return 0; // success
}
+long Cluster::ParseBlockGroup(long long payload_size, long long& pos,
+ long& len) {
+ const long long payload_start = pos;
+ const long long payload_stop = pos + payload_size;
-long Cluster::ParseBlockGroup(
- long long payload_size,
- long long& pos,
- long& len)
-{
- const long long payload_start = pos;
- const long long payload_stop = pos + payload_size;
+ IMkvReader* const pReader = m_pSegment->m_pReader;
- IMkvReader* const pReader = m_pSegment->m_pReader;
+ long long total, avail;
- long long total, avail;
+ long status = pReader->Length(&total, &avail);
- long status = pReader->Length(&total, &avail);
+ if (status < 0) // error
+ return status;
- if (status < 0) //error
- return status;
+ assert((total < 0) || (avail <= total));
- assert((total < 0) || (avail <= total));
+ if ((total >= 0) && (payload_stop > total))
+ return E_FILE_FORMAT_INVALID;
- if ((total >= 0) && (payload_stop > total))
- return E_FILE_FORMAT_INVALID;
+ if (payload_stop > avail) {
+ len = static_cast<long>(payload_size);
+ return E_BUFFER_NOT_FULL;
+ }
- if (payload_stop > avail)
- {
- len = static_cast<long>(payload_size);
- return E_BUFFER_NOT_FULL;
+ long long discard_padding = 0;
+
+ while (pos < payload_stop) {
+ // parse sub-block element ID
+
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
}
- long long discard_padding = 0;
+ long long result = GetUIntLength(pReader, pos, len);
- while (pos < payload_stop)
- {
- //parse sub-block element ID
+ if (result < 0) // error
+ return static_cast<long>(result);
- if ((pos + 1) > avail)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
+ if (result > 0) // weird
+ return E_BUFFER_NOT_FULL;
- long long result = GetUIntLength(pReader, pos, len);
+ if ((pos + len) > payload_stop)
+ return E_FILE_FORMAT_INVALID;
- if (result < 0) //error
- return static_cast<long>(result);
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
- if (result > 0) //weird
- return E_BUFFER_NOT_FULL;
+ const long long id = ReadUInt(pReader, pos, len);
- if ((pos + len) > payload_stop)
- return E_FILE_FORMAT_INVALID;
+ if (id < 0) // error
+ return static_cast<long>(id);
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
+ if (id == 0) // not a value ID
+ return E_FILE_FORMAT_INVALID;
- const long long id = ReadUInt(pReader, pos, len);
+ pos += len; // consume ID field
- if (id < 0) //error
- return static_cast<long>(id);
+ // Parse Size
- if (id == 0) //not a value ID
- return E_FILE_FORMAT_INVALID;
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
+ }
- pos += len; //consume ID field
+ result = GetUIntLength(pReader, pos, len);
- //Parse Size
+ if (result < 0) // error
+ return static_cast<long>(result);
- if ((pos + 1) > avail)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
+ if (result > 0) // weird
+ return E_BUFFER_NOT_FULL;
- result = GetUIntLength(pReader, pos, len);
+ if ((pos + len) > payload_stop)
+ return E_FILE_FORMAT_INVALID;
- if (result < 0) //error
- return static_cast<long>(result);
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
- if (result > 0) //weird
- return E_BUFFER_NOT_FULL;
+ const long long size = ReadUInt(pReader, pos, len);
- if ((pos + len) > payload_stop)
- return E_FILE_FORMAT_INVALID;
+ if (size < 0) // error
+ return static_cast<long>(size);
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
+ pos += len; // consume size field
- const long long size = ReadUInt(pReader, pos, len);
+ // pos now points to start of sub-block group payload
- if (size < 0) //error
- return static_cast<long>(size);
+ if (pos > payload_stop)
+ return E_FILE_FORMAT_INVALID;
- pos += len; //consume size field
+ if (size == 0) // weird
+ continue;
- //pos now points to start of sub-block group payload
+ const long long unknown_size = (1LL << (7 * len)) - 1;
- if (pos > payload_stop)
- return E_FILE_FORMAT_INVALID;
+ if (size == unknown_size)
+ return E_FILE_FORMAT_INVALID;
- if (size == 0) //weird
- continue;
+ if (id == 0x35A2) { // DiscardPadding
+ result = GetUIntLength(pReader, pos, len);
- const long long unknown_size = (1LL << (7 * len)) - 1;
+ if (result < 0) // error
+ return static_cast<long>(result);
- if (size == unknown_size)
- return E_FILE_FORMAT_INVALID;
+ status = UnserializeInt(pReader, pos, len, discard_padding);
- if (id == 0x35A2) //DiscardPadding
- {
- result = GetUIntLength(pReader, pos, len);
+ if (status < 0) // error
+ return status;
+ }
- if (result < 0) //error
- return static_cast<long>(result);
+ if (id != 0x21) { // sub-part of BlockGroup is not a Block
+ pos += size; // consume sub-part of block group
- status = UnserializeInt(pReader, pos, len, discard_padding);
+ if (pos > payload_stop)
+ return E_FILE_FORMAT_INVALID;
- if (status < 0) //error
- return status;
- }
+ continue;
+ }
- if (id != 0x21) //sub-part of BlockGroup is not a Block
- {
- pos += size; //consume sub-part of block group
+ const long long block_stop = pos + size;
- if (pos > payload_stop)
- return E_FILE_FORMAT_INVALID;
+ if (block_stop > payload_stop)
+ return E_FILE_FORMAT_INVALID;
- continue;
- }
+ // parse track number
- const long long block_stop = pos + size;
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
+ }
- if (block_stop > payload_stop)
- return E_FILE_FORMAT_INVALID;
+ result = GetUIntLength(pReader, pos, len);
- //parse track number
+ if (result < 0) // error
+ return static_cast<long>(result);
- if ((pos + 1) > avail)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
+ if (result > 0) // weird
+ return E_BUFFER_NOT_FULL;
- result = GetUIntLength(pReader, pos, len);
+ if ((pos + len) > block_stop)
+ return E_FILE_FORMAT_INVALID;
- if (result < 0) //error
- return static_cast<long>(result);
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
- if (result > 0) //weird
- return E_BUFFER_NOT_FULL;
+ const long long track = ReadUInt(pReader, pos, len);
- if ((pos + len) > block_stop)
- return E_FILE_FORMAT_INVALID;
+ if (track < 0) // error
+ return static_cast<long>(track);
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long track = ReadUInt(pReader, pos, len);
-
- if (track < 0) //error
- return static_cast<long>(track);
-
- if (track == 0)
- return E_FILE_FORMAT_INVALID;
+ if (track == 0)
+ return E_FILE_FORMAT_INVALID;
#if 0
//TODO(matthewjheaney)
@@ -7771,213 +6763,173 @@
return E_FILE_FORMAT_INVALID;
#endif
- pos += len; //consume track number
+ pos += len; // consume track number
- if ((pos + 2) > block_stop)
- return E_FILE_FORMAT_INVALID;
+ if ((pos + 2) > block_stop)
+ return E_FILE_FORMAT_INVALID;
- if ((pos + 2) > avail)
- {
- len = 2;
- return E_BUFFER_NOT_FULL;
- }
-
- pos += 2; //consume timecode
-
- if ((pos + 1) > block_stop)
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + 1) > avail)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- unsigned char flags;
-
- status = pReader->Read(pos, 1, &flags);
-
- if (status < 0) //error or underflow
- {
- len = 1;
- return status;
- }
-
- ++pos; //consume flags byte
- assert(pos <= avail);
-
- if (pos >= block_stop)
- return E_FILE_FORMAT_INVALID;
-
- const int lacing = int(flags & 0x06) >> 1;
-
- if ((lacing != 0) && (block_stop > avail))
- {
- len = static_cast<long>(block_stop - pos);
- return E_BUFFER_NOT_FULL;
- }
-
- pos = block_stop; //consume block-part of block group
- assert(pos <= payload_stop);
+ if ((pos + 2) > avail) {
+ len = 2;
+ return E_BUFFER_NOT_FULL;
}
- assert(pos == payload_stop);
+ pos += 2; // consume timecode
- status = CreateBlock(0x20, //BlockGroup ID
- payload_start, payload_size,
- discard_padding);
- if (status != 0)
- return status;
+ if ((pos + 1) > block_stop)
+ return E_FILE_FORMAT_INVALID;
- m_pos = payload_stop;
-
- return 0; //success
-}
-
-
-long Cluster::GetEntry(long index, const mkvparser::BlockEntry*& pEntry) const
-{
- assert(m_pos >= m_element_start);
-
- pEntry = NULL;
-
- if (index < 0)
- return -1; //generic error
-
- if (m_entries_count < 0)
- return E_BUFFER_NOT_FULL;
-
- assert(m_entries);
- assert(m_entries_size > 0);
- assert(m_entries_count <= m_entries_size);
-
- if (index < m_entries_count)
- {
- pEntry = m_entries[index];
- assert(pEntry);
-
- return 1; //found entry
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
}
- if (m_element_size < 0) //we don't know cluster end yet
- return E_BUFFER_NOT_FULL; //underflow
+ unsigned char flags;
- const long long element_stop = m_element_start + m_element_size;
+ status = pReader->Read(pos, 1, &flags);
- if (m_pos >= element_stop)
- return 0; //nothing left to parse
-
- return E_BUFFER_NOT_FULL; //underflow, since more remains to be parsed
-}
-
-
-Cluster* Cluster::Create(
- Segment* pSegment,
- long idx,
- long long off)
- //long long element_size)
-{
- assert(pSegment);
- assert(off >= 0);
-
- const long long element_start = pSegment->m_start + off;
-
- Cluster* const pCluster = new Cluster(pSegment,
- idx,
- element_start);
- //element_size);
- assert(pCluster);
-
- return pCluster;
-}
-
-
-Cluster::Cluster() :
- m_pSegment(NULL),
- m_element_start(0),
- m_index(0),
- m_pos(0),
- m_element_size(0),
- m_timecode(0),
- m_entries(NULL),
- m_entries_size(0),
- m_entries_count(0) //means "no entries"
-{
-}
-
-
-Cluster::Cluster(
- Segment* pSegment,
- long idx,
- long long element_start
- /* long long element_size */ ) :
- m_pSegment(pSegment),
- m_element_start(element_start),
- m_index(idx),
- m_pos(element_start),
- m_element_size(-1 /* element_size */ ),
- m_timecode(-1),
- m_entries(NULL),
- m_entries_size(0),
- m_entries_count(-1) //means "has not been parsed yet"
-{
-}
-
-
-Cluster::~Cluster()
-{
- if (m_entries_count <= 0)
- return;
-
- BlockEntry** i = m_entries;
- BlockEntry** const j = m_entries + m_entries_count;
-
- while (i != j)
- {
- BlockEntry* p = *i++;
- assert(p);
-
- delete p;
+ if (status < 0) { // error or underflow
+ len = 1;
+ return status;
}
- delete[] m_entries;
+ ++pos; // consume flags byte
+ assert(pos <= avail);
+
+ if (pos >= block_stop)
+ return E_FILE_FORMAT_INVALID;
+
+ const int lacing = int(flags & 0x06) >> 1;
+
+ if ((lacing != 0) && (block_stop > avail)) {
+ len = static_cast<long>(block_stop - pos);
+ return E_BUFFER_NOT_FULL;
+ }
+
+ pos = block_stop; // consume block-part of block group
+ assert(pos <= payload_stop);
+ }
+
+ assert(pos == payload_stop);
+
+ status = CreateBlock(0x20, // BlockGroup ID
+ payload_start, payload_size, discard_padding);
+ if (status != 0)
+ return status;
+
+ m_pos = payload_stop;
+
+ return 0; // success
}
+long Cluster::GetEntry(long index, const mkvparser::BlockEntry*& pEntry) const {
+ assert(m_pos >= m_element_start);
-bool Cluster::EOS() const
+ pEntry = NULL;
+
+ if (index < 0)
+ return -1; // generic error
+
+ if (m_entries_count < 0)
+ return E_BUFFER_NOT_FULL;
+
+ assert(m_entries);
+ assert(m_entries_size > 0);
+ assert(m_entries_count <= m_entries_size);
+
+ if (index < m_entries_count) {
+ pEntry = m_entries[index];
+ assert(pEntry);
+
+ return 1; // found entry
+ }
+
+ if (m_element_size < 0) // we don't know cluster end yet
+ return E_BUFFER_NOT_FULL; // underflow
+
+ const long long element_stop = m_element_start + m_element_size;
+
+ if (m_pos >= element_stop)
+ return 0; // nothing left to parse
+
+ return E_BUFFER_NOT_FULL; // underflow, since more remains to be parsed
+}
+
+Cluster* Cluster::Create(Segment* pSegment, long idx, long long off)
+// long long element_size)
{
- return (m_pSegment == NULL);
+ assert(pSegment);
+ assert(off >= 0);
+
+ const long long element_start = pSegment->m_start + off;
+
+ Cluster* const pCluster = new Cluster(pSegment, idx, element_start);
+ // element_size);
+ assert(pCluster);
+
+ return pCluster;
}
+Cluster::Cluster()
+ : m_pSegment(NULL),
+ m_element_start(0),
+ m_index(0),
+ m_pos(0),
+ m_element_size(0),
+ m_timecode(0),
+ m_entries(NULL),
+ m_entries_size(0),
+ m_entries_count(0) // means "no entries"
+{}
-long Cluster::GetIndex() const
-{
- return m_index;
+Cluster::Cluster(Segment* pSegment, long idx, long long element_start
+ /* long long element_size */)
+ : m_pSegment(pSegment),
+ m_element_start(element_start),
+ m_index(idx),
+ m_pos(element_start),
+ m_element_size(-1 /* element_size */),
+ m_timecode(-1),
+ m_entries(NULL),
+ m_entries_size(0),
+ m_entries_count(-1) // means "has not been parsed yet"
+{}
+
+Cluster::~Cluster() {
+ if (m_entries_count <= 0)
+ return;
+
+ BlockEntry** i = m_entries;
+ BlockEntry** const j = m_entries + m_entries_count;
+
+ while (i != j) {
+ BlockEntry* p = *i++;
+ assert(p);
+
+ delete p;
+ }
+
+ delete[] m_entries;
}
+bool Cluster::EOS() const { return (m_pSegment == NULL); }
-long long Cluster::GetPosition() const
-{
- const long long pos = m_element_start - m_pSegment->m_start;
- assert(pos >= 0);
+long Cluster::GetIndex() const { return m_index; }
- return pos;
+long long Cluster::GetPosition() const {
+ const long long pos = m_element_start - m_pSegment->m_start;
+ assert(pos >= 0);
+
+ return pos;
}
-
-long long Cluster::GetElementSize() const
-{
- return m_element_size;
-}
-
+long long Cluster::GetElementSize() const { return m_element_size; }
#if 0
bool Cluster::HasBlockEntries(
const Segment* pSegment,
- long long off) //relative to start of segment payload
-{
+ long long off) {
assert(pSegment);
- assert(off >= 0); //relative to segment
+ assert(off >= 0); //relative to start of segment payload
IMkvReader* const pReader = pSegment->m_pReader;
@@ -8034,631 +6986,558 @@
}
#endif
-
long Cluster::HasBlockEntries(
const Segment* pSegment,
- long long off, //relative to start of segment payload
- long long& pos,
- long& len)
-{
- assert(pSegment);
- assert(off >= 0); //relative to segment
+ long long off, // relative to start of segment payload
+ long long& pos, long& len) {
+ assert(pSegment);
+ assert(off >= 0); // relative to segment
- IMkvReader* const pReader = pSegment->m_pReader;
+ IMkvReader* const pReader = pSegment->m_pReader;
- long long total, avail;
+ long long total, avail;
- long status = pReader->Length(&total, &avail);
+ long status = pReader->Length(&total, &avail);
- if (status < 0) //error
- return status;
+ if (status < 0) // error
+ return status;
- assert((total < 0) || (avail <= total));
+ assert((total < 0) || (avail <= total));
- pos = pSegment->m_start + off; //absolute
+ pos = pSegment->m_start + off; // absolute
- if ((total >= 0) && (pos >= total))
- return 0; //we don't even have a complete cluster
+ if ((total >= 0) && (pos >= total))
+ return 0; // we don't even have a complete cluster
- const long long segment_stop =
- (pSegment->m_size < 0) ? -1 : pSegment->m_start + pSegment->m_size;
+ const long long segment_stop =
+ (pSegment->m_size < 0) ? -1 : pSegment->m_start + pSegment->m_size;
- long long cluster_stop = -1; //interpreted later to mean "unknown size"
+ long long cluster_stop = -1; // interpreted later to mean "unknown size"
- {
- if ((pos + 1) > avail)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- long long result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) //error
- return static_cast<long>(result);
-
- if (result > 0) //need more data
- return E_BUFFER_NOT_FULL;
-
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((total >= 0) && ((pos + len) > total))
- return 0;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long id = ReadUInt(pReader, pos, len);
-
- if (id < 0) //error
- return static_cast<long>(id);
-
- if (id != 0x0F43B675) //weird: not cluster ID
- return -1; //generic error
-
- pos += len; //consume Cluster ID field
-
- //read size field
-
- if ((pos + 1) > avail)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) //error
- return static_cast<long>(result);
-
- if (result > 0) //weird
- return E_BUFFER_NOT_FULL;
-
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((total >= 0) && ((pos + len) > total))
- return 0;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long size = ReadUInt(pReader, pos, len);
-
- if (size < 0) //error
- return static_cast<long>(size);
-
- if (size == 0)
- return 0; //cluster does not have entries
-
- pos += len; //consume size field
-
- //pos now points to start of payload
-
- const long long unknown_size = (1LL << (7 * len)) - 1;
-
- if (size != unknown_size)
- {
- cluster_stop = pos + size;
- assert(cluster_stop >= 0);
-
- if ((segment_stop >= 0) && (cluster_stop > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((total >= 0) && (cluster_stop > total))
- //return E_FILE_FORMAT_INVALID; //too conservative
- return 0; //cluster does not have any entries
- }
+ {
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
}
- for (;;)
- {
- if ((cluster_stop >= 0) && (pos >= cluster_stop))
- return 0; //no entries detected
+ long long result = GetUIntLength(pReader, pos, len);
- if ((pos + 1) > avail)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
+ if (result < 0) // error
+ return static_cast<long>(result);
- long long result = GetUIntLength(pReader, pos, len);
+ if (result > 0) // need more data
+ return E_BUFFER_NOT_FULL;
- if (result < 0) //error
- return static_cast<long>(result);
+ if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+ return E_FILE_FORMAT_INVALID;
- if (result > 0) //need more data
- return E_BUFFER_NOT_FULL;
+ if ((total >= 0) && ((pos + len) > total))
+ return 0;
- if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
- return E_FILE_FORMAT_INVALID;
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
+ const long long id = ReadUInt(pReader, pos, len);
- const long long id = ReadUInt(pReader, pos, len);
+ if (id < 0) // error
+ return static_cast<long>(id);
- if (id < 0) //error
- return static_cast<long>(id);
+ if (id != 0x0F43B675) // weird: not cluster ID
+ return -1; // generic error
- //This is the distinguished set of ID's we use to determine
- //that we have exhausted the sub-element's inside the cluster
- //whose ID we parsed earlier.
+ pos += len; // consume Cluster ID field
- if (id == 0x0F43B675) //Cluster ID
- return 0; //no entries found
+ // read size field
- if (id == 0x0C53BB6B) //Cues ID
- return 0; //no entries found
-
- pos += len; //consume id field
-
- if ((cluster_stop >= 0) && (pos >= cluster_stop))
- return E_FILE_FORMAT_INVALID;
-
- //read size field
-
- if ((pos + 1) > avail)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) //error
- return static_cast<long>(result);
-
- if (result > 0) //underflow
- return E_BUFFER_NOT_FULL;
-
- if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long size = ReadUInt(pReader, pos, len);
-
- if (size < 0) //error
- return static_cast<long>(size);
-
- pos += len; //consume size field
-
- //pos now points to start of payload
-
- if ((cluster_stop >= 0) && (pos > cluster_stop))
- return E_FILE_FORMAT_INVALID;
-
- if (size == 0) //weird
- continue;
-
- const long long unknown_size = (1LL << (7 * len)) - 1;
-
- if (size == unknown_size)
- return E_FILE_FORMAT_INVALID; //not supported inside cluster
-
- if ((cluster_stop >= 0) && ((pos + size) > cluster_stop))
- return E_FILE_FORMAT_INVALID;
-
- if (id == 0x20) //BlockGroup ID
- return 1; //have at least one entry
-
- if (id == 0x23) //SimpleBlock ID
- return 1; //have at least one entry
-
- pos += size; //consume payload
- assert((cluster_stop < 0) || (pos <= cluster_stop));
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
}
+
+ result = GetUIntLength(pReader, pos, len);
+
+ if (result < 0) // error
+ return static_cast<long>(result);
+
+ if (result > 0) // weird
+ return E_BUFFER_NOT_FULL;
+
+ if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+ return E_FILE_FORMAT_INVALID;
+
+ if ((total >= 0) && ((pos + len) > total))
+ return 0;
+
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
+
+ const long long size = ReadUInt(pReader, pos, len);
+
+ if (size < 0) // error
+ return static_cast<long>(size);
+
+ if (size == 0)
+ return 0; // cluster does not have entries
+
+ pos += len; // consume size field
+
+ // pos now points to start of payload
+
+ const long long unknown_size = (1LL << (7 * len)) - 1;
+
+ if (size != unknown_size) {
+ cluster_stop = pos + size;
+ assert(cluster_stop >= 0);
+
+ if ((segment_stop >= 0) && (cluster_stop > segment_stop))
+ return E_FILE_FORMAT_INVALID;
+
+ if ((total >= 0) && (cluster_stop > total))
+ // return E_FILE_FORMAT_INVALID; //too conservative
+ return 0; // cluster does not have any entries
+ }
+ }
+
+ for (;;) {
+ if ((cluster_stop >= 0) && (pos >= cluster_stop))
+ return 0; // no entries detected
+
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
+ }
+
+ long long result = GetUIntLength(pReader, pos, len);
+
+ if (result < 0) // error
+ return static_cast<long>(result);
+
+ if (result > 0) // need more data
+ return E_BUFFER_NOT_FULL;
+
+ if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
+ return E_FILE_FORMAT_INVALID;
+
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
+
+ const long long id = ReadUInt(pReader, pos, len);
+
+ if (id < 0) // error
+ return static_cast<long>(id);
+
+ // This is the distinguished set of ID's we use to determine
+ // that we have exhausted the sub-element's inside the cluster
+ // whose ID we parsed earlier.
+
+ if (id == 0x0F43B675) // Cluster ID
+ return 0; // no entries found
+
+ if (id == 0x0C53BB6B) // Cues ID
+ return 0; // no entries found
+
+ pos += len; // consume id field
+
+ if ((cluster_stop >= 0) && (pos >= cluster_stop))
+ return E_FILE_FORMAT_INVALID;
+
+ // read size field
+
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
+ }
+
+ result = GetUIntLength(pReader, pos, len);
+
+ if (result < 0) // error
+ return static_cast<long>(result);
+
+ if (result > 0) // underflow
+ return E_BUFFER_NOT_FULL;
+
+ if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
+ return E_FILE_FORMAT_INVALID;
+
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
+
+ const long long size = ReadUInt(pReader, pos, len);
+
+ if (size < 0) // error
+ return static_cast<long>(size);
+
+ pos += len; // consume size field
+
+ // pos now points to start of payload
+
+ if ((cluster_stop >= 0) && (pos > cluster_stop))
+ return E_FILE_FORMAT_INVALID;
+
+ if (size == 0) // weird
+ continue;
+
+ const long long unknown_size = (1LL << (7 * len)) - 1;
+
+ if (size == unknown_size)
+ return E_FILE_FORMAT_INVALID; // not supported inside cluster
+
+ if ((cluster_stop >= 0) && ((pos + size) > cluster_stop))
+ return E_FILE_FORMAT_INVALID;
+
+ if (id == 0x20) // BlockGroup ID
+ return 1; // have at least one entry
+
+ if (id == 0x23) // SimpleBlock ID
+ return 1; // have at least one entry
+
+ pos += size; // consume payload
+ assert((cluster_stop < 0) || (pos <= cluster_stop));
+ }
}
+long long Cluster::GetTimeCode() const {
+ long long pos;
+ long len;
-long long Cluster::GetTimeCode() const
-{
+ const long status = Load(pos, len);
+
+ if (status < 0) // error
+ return status;
+
+ return m_timecode;
+}
+
+long long Cluster::GetTime() const {
+ const long long tc = GetTimeCode();
+
+ if (tc < 0)
+ return tc;
+
+ const SegmentInfo* const pInfo = m_pSegment->GetInfo();
+ assert(pInfo);
+
+ const long long scale = pInfo->GetTimeCodeScale();
+ assert(scale >= 1);
+
+ const long long t = m_timecode * scale;
+
+ return t;
+}
+
+long long Cluster::GetFirstTime() const {
+ const BlockEntry* pEntry;
+
+ const long status = GetFirst(pEntry);
+
+ if (status < 0) // error
+ return status;
+
+ if (pEntry == NULL) // empty cluster
+ return GetTime();
+
+ const Block* const pBlock = pEntry->GetBlock();
+ assert(pBlock);
+
+ return pBlock->GetTime(this);
+}
+
+long long Cluster::GetLastTime() const {
+ const BlockEntry* pEntry;
+
+ const long status = GetLast(pEntry);
+
+ if (status < 0) // error
+ return status;
+
+ if (pEntry == NULL) // empty cluster
+ return GetTime();
+
+ const Block* const pBlock = pEntry->GetBlock();
+ assert(pBlock);
+
+ return pBlock->GetTime(this);
+}
+
+long Cluster::CreateBlock(long long id,
+ long long pos, // absolute pos of payload
+ long long size, long long discard_padding) {
+ assert((id == 0x20) || (id == 0x23)); // BlockGroup or SimpleBlock
+
+ if (m_entries_count < 0) { // haven't parsed anything yet
+ assert(m_entries == NULL);
+ assert(m_entries_size == 0);
+
+ m_entries_size = 1024;
+ m_entries = new BlockEntry* [m_entries_size];
+
+ m_entries_count = 0;
+ } else {
+ assert(m_entries);
+ assert(m_entries_size > 0);
+ assert(m_entries_count <= m_entries_size);
+
+ if (m_entries_count >= m_entries_size) {
+ const long entries_size = 2 * m_entries_size;
+
+ BlockEntry** const entries = new BlockEntry* [entries_size];
+ assert(entries);
+
+ BlockEntry** src = m_entries;
+ BlockEntry** const src_end = src + m_entries_count;
+
+ BlockEntry** dst = entries;
+
+ while (src != src_end)
+ *dst++ = *src++;
+
+ delete[] m_entries;
+
+ m_entries = entries;
+ m_entries_size = entries_size;
+ }
+ }
+
+ if (id == 0x20) // BlockGroup ID
+ return CreateBlockGroup(pos, size, discard_padding);
+ else // SimpleBlock ID
+ return CreateSimpleBlock(pos, size);
+}
+
+long Cluster::CreateBlockGroup(long long start_offset, long long size,
+ long long discard_padding) {
+ assert(m_entries);
+ assert(m_entries_size > 0);
+ assert(m_entries_count >= 0);
+ assert(m_entries_count < m_entries_size);
+
+ IMkvReader* const pReader = m_pSegment->m_pReader;
+
+ long long pos = start_offset;
+ const long long stop = start_offset + size;
+
+ // For WebM files, there is a bias towards previous reference times
+ //(in order to support alt-ref frames, which refer back to the previous
+ // keyframe). Normally a 0 value is not possible, but here we tenatively
+ // allow 0 as the value of a reference frame, with the interpretation
+ // that this is a "previous" reference time.
+
+ long long prev = 1; // nonce
+ long long next = 0; // nonce
+ long long duration = -1; // really, this is unsigned
+
+ long long bpos = -1;
+ long long bsize = -1;
+
+ while (pos < stop) {
+ long len;
+ const long long id = ReadUInt(pReader, pos, len);
+ assert(id >= 0); // TODO
+ assert((pos + len) <= stop);
+
+ pos += len; // consume ID
+
+ const long long size = ReadUInt(pReader, pos, len);
+ assert(size >= 0); // TODO
+ assert((pos + len) <= stop);
+
+ pos += len; // consume size
+
+ if (id == 0x21) { // Block ID
+ if (bpos < 0) { // Block ID
+ bpos = pos;
+ bsize = size;
+ }
+ } else if (id == 0x1B) { // Duration ID
+ assert(size <= 8);
+
+ duration = UnserializeUInt(pReader, pos, size);
+ assert(duration >= 0); // TODO
+ } else if (id == 0x7B) { // ReferenceBlock
+ assert(size <= 8);
+ const long size_ = static_cast<long>(size);
+
+ long long time;
+
+ long status = UnserializeInt(pReader, pos, size_, time);
+ assert(status == 0);
+ if (status != 0)
+ return -1;
+
+ if (time <= 0) // see note above
+ prev = time;
+ else // weird
+ next = time;
+ }
+
+ pos += size; // consume payload
+ assert(pos <= stop);
+ }
+
+ assert(pos == stop);
+ assert(bpos >= 0);
+ assert(bsize >= 0);
+
+ const long idx = m_entries_count;
+
+ BlockEntry** const ppEntry = m_entries + idx;
+ BlockEntry*& pEntry = *ppEntry;
+
+ pEntry = new (std::nothrow)
+ BlockGroup(this, idx, bpos, bsize, prev, next, duration, discard_padding);
+
+ if (pEntry == NULL)
+ return -1; // generic error
+
+ BlockGroup* const p = static_cast<BlockGroup*>(pEntry);
+
+ const long status = p->Parse();
+
+ if (status == 0) { // success
+ ++m_entries_count;
+ return 0;
+ }
+
+ delete pEntry;
+ pEntry = 0;
+
+ return status;
+}
+
+long Cluster::CreateSimpleBlock(long long st, long long sz) {
+ assert(m_entries);
+ assert(m_entries_size > 0);
+ assert(m_entries_count >= 0);
+ assert(m_entries_count < m_entries_size);
+
+ const long idx = m_entries_count;
+
+ BlockEntry** const ppEntry = m_entries + idx;
+ BlockEntry*& pEntry = *ppEntry;
+
+ pEntry = new (std::nothrow) SimpleBlock(this, idx, st, sz);
+
+ if (pEntry == NULL)
+ return -1; // generic error
+
+ SimpleBlock* const p = static_cast<SimpleBlock*>(pEntry);
+
+ const long status = p->Parse();
+
+ if (status == 0) {
+ ++m_entries_count;
+ return 0;
+ }
+
+ delete pEntry;
+ pEntry = 0;
+
+ return status;
+}
+
+long Cluster::GetFirst(const BlockEntry*& pFirst) const {
+ if (m_entries_count <= 0) {
long long pos;
long len;
- const long status = Load(pos, len);
+ const long status = Parse(pos, len);
- if (status < 0) //error
- return status;
-
- return m_timecode;
-}
-
-
-long long Cluster::GetTime() const
-{
- const long long tc = GetTimeCode();
-
- if (tc < 0)
- return tc;
-
- const SegmentInfo* const pInfo = m_pSegment->GetInfo();
- assert(pInfo);
-
- const long long scale = pInfo->GetTimeCodeScale();
- assert(scale >= 1);
-
- const long long t = m_timecode * scale;
-
- return t;
-}
-
-
-long long Cluster::GetFirstTime() const
-{
- const BlockEntry* pEntry;
-
- const long status = GetFirst(pEntry);
-
- if (status < 0) //error
- return status;
-
- if (pEntry == NULL) //empty cluster
- return GetTime();
-
- const Block* const pBlock = pEntry->GetBlock();
- assert(pBlock);
-
- return pBlock->GetTime(this);
-}
-
-
-long long Cluster::GetLastTime() const
-{
- const BlockEntry* pEntry;
-
- const long status = GetLast(pEntry);
-
- if (status < 0) //error
- return status;
-
- if (pEntry == NULL) //empty cluster
- return GetTime();
-
- const Block* const pBlock = pEntry->GetBlock();
- assert(pBlock);
-
- return pBlock->GetTime(this);
-}
-
-
-long Cluster::CreateBlock(
- long long id,
- long long pos, //absolute pos of payload
- long long size,
- long long discard_padding)
-{
- assert((id == 0x20) || (id == 0x23)); //BlockGroup or SimpleBlock
-
- if (m_entries_count < 0) //haven't parsed anything yet
- {
- assert(m_entries == NULL);
- assert(m_entries_size == 0);
-
- m_entries_size = 1024;
- m_entries = new BlockEntry*[m_entries_size];
-
- m_entries_count = 0;
- }
- else
- {
- assert(m_entries);
- assert(m_entries_size > 0);
- assert(m_entries_count <= m_entries_size);
-
- if (m_entries_count >= m_entries_size)
- {
- const long entries_size = 2 * m_entries_size;
-
- BlockEntry** const entries = new BlockEntry*[entries_size];
- assert(entries);
-
- BlockEntry** src = m_entries;
- BlockEntry** const src_end = src + m_entries_count;
-
- BlockEntry** dst = entries;
-
- while (src != src_end)
- *dst++ = *src++;
-
- delete[] m_entries;
-
- m_entries = entries;
- m_entries_size = entries_size;
- }
+ if (status < 0) { // error
+ pFirst = NULL;
+ return status;
}
- if (id == 0x20) //BlockGroup ID
- return CreateBlockGroup(pos, size, discard_padding);
- else //SimpleBlock ID
- return CreateSimpleBlock(pos, size);
+ if (m_entries_count <= 0) { // empty cluster
+ pFirst = NULL;
+ return 0;
+ }
+ }
+
+ assert(m_entries);
+
+ pFirst = m_entries[0];
+ assert(pFirst);
+
+ return 0; // success
}
+long Cluster::GetLast(const BlockEntry*& pLast) const {
+ for (;;) {
+ long long pos;
+ long len;
-long Cluster::CreateBlockGroup(
- long long start_offset,
- long long size,
- long long discard_padding)
-{
- assert(m_entries);
- assert(m_entries_size > 0);
- assert(m_entries_count >= 0);
- assert(m_entries_count < m_entries_size);
+ const long status = Parse(pos, len);
- IMkvReader* const pReader = m_pSegment->m_pReader;
-
- long long pos = start_offset;
- const long long stop = start_offset + size;
-
- //For WebM files, there is a bias towards previous reference times
- //(in order to support alt-ref frames, which refer back to the previous
- //keyframe). Normally a 0 value is not possible, but here we tenatively
- //allow 0 as the value of a reference frame, with the interpretation
- //that this is a "previous" reference time.
-
- long long prev = 1; //nonce
- long long next = 0; //nonce
- long long duration = -1; //really, this is unsigned
-
- long long bpos = -1;
- long long bsize = -1;
-
- while (pos < stop)
- {
- long len;
- const long long id = ReadUInt(pReader, pos, len);
- assert(id >= 0); //TODO
- assert((pos + len) <= stop);
-
- pos += len; //consume ID
-
- const long long size = ReadUInt(pReader, pos, len);
- assert(size >= 0); //TODO
- assert((pos + len) <= stop);
-
- pos += len; //consume size
-
- if (id == 0x21) //Block ID
- {
- if (bpos < 0) //Block ID
- {
- bpos = pos;
- bsize = size;
- }
- }
- else if (id == 0x1B) //Duration ID
- {
- assert(size <= 8);
-
- duration = UnserializeUInt(pReader, pos, size);
- assert(duration >= 0); //TODO
- }
- else if (id == 0x7B) //ReferenceBlock
- {
- assert(size <= 8);
- const long size_ = static_cast<long>(size);
-
- long long time;
-
- long status = UnserializeInt(pReader, pos, size_, time);
- assert(status == 0);
- if (status != 0)
- return -1;
-
- if (time <= 0) //see note above
- prev = time;
- else //weird
- next = time;
- }
-
- pos += size; //consume payload
- assert(pos <= stop);
+ if (status < 0) { // error
+ pLast = NULL;
+ return status;
}
- assert(pos == stop);
- assert(bpos >= 0);
- assert(bsize >= 0);
+ if (status > 0) // no new block
+ break;
+ }
- const long idx = m_entries_count;
-
- BlockEntry** const ppEntry = m_entries + idx;
- BlockEntry*& pEntry = *ppEntry;
-
- pEntry = new (std::nothrow) BlockGroup(
- this,
- idx,
- bpos,
- bsize,
- prev,
- next,
- duration,
- discard_padding);
-
- if (pEntry == NULL)
- return -1; //generic error
-
- BlockGroup* const p = static_cast<BlockGroup*>(pEntry);
-
- const long status = p->Parse();
-
- if (status == 0) //success
- {
- ++m_entries_count;
- return 0;
- }
-
- delete pEntry;
- pEntry = 0;
-
- return status;
-}
-
-
-
-long Cluster::CreateSimpleBlock(
- long long st,
- long long sz)
-{
- assert(m_entries);
- assert(m_entries_size > 0);
- assert(m_entries_count >= 0);
- assert(m_entries_count < m_entries_size);
-
- const long idx = m_entries_count;
-
- BlockEntry** const ppEntry = m_entries + idx;
- BlockEntry*& pEntry = *ppEntry;
-
- pEntry = new (std::nothrow) SimpleBlock(this, idx, st, sz);
-
- if (pEntry == NULL)
- return -1; //generic error
-
- SimpleBlock* const p = static_cast<SimpleBlock*>(pEntry);
-
- const long status = p->Parse();
-
- if (status == 0)
- {
- ++m_entries_count;
- return 0;
- }
-
- delete pEntry;
- pEntry = 0;
-
- return status;
-}
-
-
-long Cluster::GetFirst(const BlockEntry*& pFirst) const
-{
- if (m_entries_count <= 0)
- {
- long long pos;
- long len;
-
- const long status = Parse(pos, len);
-
- if (status < 0) //error
- {
- pFirst = NULL;
- return status;
- }
-
- if (m_entries_count <= 0) //empty cluster
- {
- pFirst = NULL;
- return 0;
- }
- }
-
- assert(m_entries);
-
- pFirst = m_entries[0];
- assert(pFirst);
-
- return 0; //success
-}
-
-long Cluster::GetLast(const BlockEntry*& pLast) const
-{
- for (;;)
- {
- long long pos;
- long len;
-
- const long status = Parse(pos, len);
-
- if (status < 0) //error
- {
- pLast = NULL;
- return status;
- }
-
- if (status > 0) //no new block
- break;
- }
-
- if (m_entries_count <= 0)
- {
- pLast = NULL;
- return 0;
- }
-
- assert(m_entries);
-
- const long idx = m_entries_count - 1;
-
- pLast = m_entries[idx];
- assert(pLast);
-
+ if (m_entries_count <= 0) {
+ pLast = NULL;
return 0;
+ }
+
+ assert(m_entries);
+
+ const long idx = m_entries_count - 1;
+
+ pLast = m_entries[idx];
+ assert(pLast);
+
+ return 0;
}
+long Cluster::GetNext(const BlockEntry* pCurr, const BlockEntry*& pNext) const {
+ assert(pCurr);
+ assert(m_entries);
+ assert(m_entries_count > 0);
-long Cluster::GetNext(
- const BlockEntry* pCurr,
- const BlockEntry*& pNext) const
-{
- assert(pCurr);
+ size_t idx = pCurr->GetIndex();
+ assert(idx < size_t(m_entries_count));
+ assert(m_entries[idx] == pCurr);
+
+ ++idx;
+
+ if (idx >= size_t(m_entries_count)) {
+ long long pos;
+ long len;
+
+ const long status = Parse(pos, len);
+
+ if (status < 0) { // error
+ pNext = NULL;
+ return status;
+ }
+
+ if (status > 0) {
+ pNext = NULL;
+ return 0;
+ }
+
assert(m_entries);
assert(m_entries_count > 0);
-
- size_t idx = pCurr->GetIndex();
assert(idx < size_t(m_entries_count));
- assert(m_entries[idx] == pCurr);
+ }
- ++idx;
+ pNext = m_entries[idx];
+ assert(pNext);
- if (idx >= size_t(m_entries_count))
- {
- long long pos;
- long len;
-
- const long status = Parse(pos, len);
-
- if (status < 0) //error
- {
- pNext = NULL;
- return status;
- }
-
- if (status > 0)
- {
- pNext = NULL;
- return 0;
- }
-
- assert(m_entries);
- assert(m_entries_count > 0);
- assert(idx < size_t(m_entries_count));
- }
-
- pNext = m_entries[idx];
- assert(pNext);
-
- return 0;
+ return 0;
}
+long Cluster::GetEntryCount() const { return m_entries_count; }
-long Cluster::GetEntryCount() const
-{
- return m_entries_count;
-}
+const BlockEntry* Cluster::GetEntry(const Track* pTrack,
+ long long time_ns) const {
+ assert(pTrack);
-
-const BlockEntry* Cluster::GetEntry(
- const Track* pTrack,
- long long time_ns) const
-{
- assert(pTrack);
-
- if (m_pSegment == NULL) //this is the special EOS cluster
- return pTrack->GetEOS();
+ if (m_pSegment == NULL) // this is the special EOS cluster
+ return pTrack->GetEOS();
#if 0
@@ -8711,76 +7590,66 @@
#else
- const BlockEntry* pResult = pTrack->GetEOS();
+ const BlockEntry* pResult = pTrack->GetEOS();
- long index = 0;
+ long index = 0;
- for (;;)
- {
- if (index >= m_entries_count)
- {
- long long pos;
- long len;
+ for (;;) {
+ if (index >= m_entries_count) {
+ long long pos;
+ long len;
- const long status = Parse(pos, len);
- assert(status >= 0);
+ const long status = Parse(pos, len);
+ assert(status >= 0);
- if (status > 0) //completely parsed, and no more entries
- return pResult;
+ if (status > 0) // completely parsed, and no more entries
+ return pResult;
- if (status < 0) //should never happen
- return 0;
+ if (status < 0) // should never happen
+ return 0;
- assert(m_entries);
- assert(index < m_entries_count);
- }
-
- const BlockEntry* const pEntry = m_entries[index];
- assert(pEntry);
- assert(!pEntry->EOS());
-
- const Block* const pBlock = pEntry->GetBlock();
- assert(pBlock);
-
- if (pBlock->GetTrackNumber() != pTrack->GetNumber())
- {
- ++index;
- continue;
- }
-
- if (pTrack->VetEntry(pEntry))
- {
- if (time_ns < 0) //just want first candidate block
- return pEntry;
-
- const long long ns = pBlock->GetTime(this);
-
- if (ns > time_ns)
- return pResult;
-
- pResult = pEntry; //have a candidate
- }
- else if (time_ns >= 0)
- {
- const long long ns = pBlock->GetTime(this);
-
- if (ns > time_ns)
- return pResult;
- }
-
- ++index;
+ assert(m_entries);
+ assert(index < m_entries_count);
}
+ const BlockEntry* const pEntry = m_entries[index];
+ assert(pEntry);
+ assert(!pEntry->EOS());
+
+ const Block* const pBlock = pEntry->GetBlock();
+ assert(pBlock);
+
+ if (pBlock->GetTrackNumber() != pTrack->GetNumber()) {
+ ++index;
+ continue;
+ }
+
+ if (pTrack->VetEntry(pEntry)) {
+ if (time_ns < 0) // just want first candidate block
+ return pEntry;
+
+ const long long ns = pBlock->GetTime(this);
+
+ if (ns > time_ns)
+ return pResult;
+
+ pResult = pEntry; // have a candidate
+ } else if (time_ns >= 0) {
+ const long long ns = pBlock->GetTime(this);
+
+ if (ns > time_ns)
+ return pResult;
+ }
+
+ ++index;
+ }
+
#endif
}
-
-const BlockEntry*
-Cluster::GetEntry(
- const CuePoint& cp,
- const CuePoint::TrackPosition& tp) const
-{
- assert(m_pSegment);
+const BlockEntry* Cluster::GetEntry(const CuePoint& cp,
+ const CuePoint::TrackPosition& tp) const {
+ assert(m_pSegment);
#if 0
@@ -8871,114 +7740,105 @@
#else
- const long long tc = cp.GetTimeCode();
+ const long long tc = cp.GetTimeCode();
- if (tp.m_block > 0)
- {
- const long block = static_cast<long>(tp.m_block);
- const long index = block - 1;
+ if (tp.m_block > 0) {
+ const long block = static_cast<long>(tp.m_block);
+ const long index = block - 1;
- while (index >= m_entries_count)
- {
- long long pos;
- long len;
+ while (index >= m_entries_count) {
+ long long pos;
+ long len;
- const long status = Parse(pos, len);
+ const long status = Parse(pos, len);
- if (status < 0) //TODO: can this happen?
- return NULL;
+ if (status < 0) // TODO: can this happen?
+ return NULL;
- if (status > 0) //nothing remains to be parsed
- return NULL;
- }
-
- const BlockEntry* const pEntry = m_entries[index];
- assert(pEntry);
- assert(!pEntry->EOS());
-
- const Block* const pBlock = pEntry->GetBlock();
- assert(pBlock);
-
- if ((pBlock->GetTrackNumber() == tp.m_track) &&
- (pBlock->GetTimeCode(this) == tc))
- {
- return pEntry;
- }
+ if (status > 0) // nothing remains to be parsed
+ return NULL;
}
- long index = 0;
+ const BlockEntry* const pEntry = m_entries[index];
+ assert(pEntry);
+ assert(!pEntry->EOS());
- for (;;)
- {
- if (index >= m_entries_count)
- {
- long long pos;
- long len;
+ const Block* const pBlock = pEntry->GetBlock();
+ assert(pBlock);
- const long status = Parse(pos, len);
-
- if (status < 0) //TODO: can this happen?
- return NULL;
-
- if (status > 0) //nothing remains to be parsed
- return NULL;
-
- assert(m_entries);
- assert(index < m_entries_count);
- }
-
- const BlockEntry* const pEntry = m_entries[index];
- assert(pEntry);
- assert(!pEntry->EOS());
-
- const Block* const pBlock = pEntry->GetBlock();
- assert(pBlock);
-
- if (pBlock->GetTrackNumber() != tp.m_track)
- {
- ++index;
- continue;
- }
-
- const long long tc_ = pBlock->GetTimeCode(this);
-
- if (tc_ < tc)
- {
- ++index;
- continue;
- }
-
- if (tc_ > tc)
- return NULL;
-
- const Tracks* const pTracks = m_pSegment->GetTracks();
- assert(pTracks);
-
- const long tn = static_cast<long>(tp.m_track);
- const Track* const pTrack = pTracks->GetTrackByNumber(tn);
-
- if (pTrack == NULL)
- return NULL;
-
- const long long type = pTrack->GetType();
-
- if (type == 2) //audio
- return pEntry;
-
- if (type != 1) //not video
- return NULL;
-
- if (!pBlock->IsKey())
- return NULL;
-
- return pEntry;
+ if ((pBlock->GetTrackNumber() == tp.m_track) &&
+ (pBlock->GetTimeCode(this) == tc)) {
+ return pEntry;
}
+ }
+
+ long index = 0;
+
+ for (;;) {
+ if (index >= m_entries_count) {
+ long long pos;
+ long len;
+
+ const long status = Parse(pos, len);
+
+ if (status < 0) // TODO: can this happen?
+ return NULL;
+
+ if (status > 0) // nothing remains to be parsed
+ return NULL;
+
+ assert(m_entries);
+ assert(index < m_entries_count);
+ }
+
+ const BlockEntry* const pEntry = m_entries[index];
+ assert(pEntry);
+ assert(!pEntry->EOS());
+
+ const Block* const pBlock = pEntry->GetBlock();
+ assert(pBlock);
+
+ if (pBlock->GetTrackNumber() != tp.m_track) {
+ ++index;
+ continue;
+ }
+
+ const long long tc_ = pBlock->GetTimeCode(this);
+
+ if (tc_ < tc) {
+ ++index;
+ continue;
+ }
+
+ if (tc_ > tc)
+ return NULL;
+
+ const Tracks* const pTracks = m_pSegment->GetTracks();
+ assert(pTracks);
+
+ const long tn = static_cast<long>(tp.m_track);
+ const Track* const pTrack = pTracks->GetTrackByNumber(tn);
+
+ if (pTrack == NULL)
+ return NULL;
+
+ const long long type = pTrack->GetType();
+
+ if (type == 2) // audio
+ return pEntry;
+
+ if (type != 1) // not video
+ return NULL;
+
+ if (!pBlock->IsKey())
+ return NULL;
+
+ return pEntry;
+ }
#endif
-
}
-
#if 0
const BlockEntry* Cluster::GetMaxKey(const VideoTrack* pTrack) const
{
@@ -9015,97 +7875,46 @@
}
#endif
+BlockEntry::BlockEntry(Cluster* p, long idx) : m_pCluster(p), m_index(idx) {}
-BlockEntry::BlockEntry(Cluster* p, long idx) :
- m_pCluster(p),
- m_index(idx)
-{
+BlockEntry::~BlockEntry() {}
+
+bool BlockEntry::EOS() const { return (GetKind() == kBlockEOS); }
+
+const Cluster* BlockEntry::GetCluster() const { return m_pCluster; }
+
+long BlockEntry::GetIndex() const { return m_index; }
+
+SimpleBlock::SimpleBlock(Cluster* pCluster, long idx, long long start,
+ long long size)
+ : BlockEntry(pCluster, idx), m_block(start, size, 0) {}
+
+long SimpleBlock::Parse() { return m_block.Parse(m_pCluster); }
+
+BlockEntry::Kind SimpleBlock::GetKind() const { return kBlockSimple; }
+
+const Block* SimpleBlock::GetBlock() const { return &m_block; }
+
+BlockGroup::BlockGroup(Cluster* pCluster, long idx, long long block_start,
+ long long block_size, long long prev, long long next,
+ long long duration, long long discard_padding)
+ : BlockEntry(pCluster, idx),
+ m_block(block_start, block_size, discard_padding),
+ m_prev(prev),
+ m_next(next),
+ m_duration(duration) {}
+
+long BlockGroup::Parse() {
+ const long status = m_block.Parse(m_pCluster);
+
+ if (status)
+ return status;
+
+ m_block.SetKey((m_prev > 0) && (m_next <= 0));
+
+ return 0;
}
-
-BlockEntry::~BlockEntry()
-{
-}
-
-
-bool BlockEntry::EOS() const
-{
- return (GetKind() == kBlockEOS);
-}
-
-
-const Cluster* BlockEntry::GetCluster() const
-{
- return m_pCluster;
-}
-
-
-long BlockEntry::GetIndex() const
-{
- return m_index;
-}
-
-
-SimpleBlock::SimpleBlock(
- Cluster* pCluster,
- long idx,
- long long start,
- long long size) :
- BlockEntry(pCluster, idx),
- m_block(start, size, 0)
-{
-}
-
-
-long SimpleBlock::Parse()
-{
- return m_block.Parse(m_pCluster);
-}
-
-
-BlockEntry::Kind SimpleBlock::GetKind() const
-{
- return kBlockSimple;
-}
-
-
-const Block* SimpleBlock::GetBlock() const
-{
- return &m_block;
-}
-
-
-BlockGroup::BlockGroup(
- Cluster* pCluster,
- long idx,
- long long block_start,
- long long block_size,
- long long prev,
- long long next,
- long long duration,
- long long discard_padding) :
- BlockEntry(pCluster, idx),
- m_block(block_start, block_size, discard_padding),
- m_prev(prev),
- m_next(next),
- m_duration(duration)
-{
-}
-
-
-long BlockGroup::Parse()
-{
- const long status = m_block.Parse(m_pCluster);
-
- if (status)
- return status;
-
- m_block.SetKey((m_prev > 0) && (m_next <= 0));
-
- return 0;
-}
-
-
#if 0
void BlockGroup::ParseBlock(long long start, long long size)
{
@@ -9122,496 +7931,428 @@
}
#endif
+BlockEntry::Kind BlockGroup::GetKind() const { return kBlockGroup; }
-BlockEntry::Kind BlockGroup::GetKind() const
-{
- return kBlockGroup;
-}
+const Block* BlockGroup::GetBlock() const { return &m_block; }
+long long BlockGroup::GetPrevTimeCode() const { return m_prev; }
-const Block* BlockGroup::GetBlock() const
-{
- return &m_block;
-}
+long long BlockGroup::GetNextTimeCode() const { return m_next; }
+long long BlockGroup::GetDurationTimeCode() const { return m_duration; }
-long long BlockGroup::GetPrevTimeCode() const
-{
- return m_prev;
-}
+Block::Block(long long start, long long size_, long long discard_padding)
+ : m_start(start),
+ m_size(size_),
+ m_track(0),
+ m_timecode(-1),
+ m_flags(0),
+ m_frames(NULL),
+ m_frame_count(-1),
+ m_discard_padding(discard_padding) {}
+Block::~Block() { delete[] m_frames; }
-long long BlockGroup::GetNextTimeCode() const
-{
- return m_next;
-}
+long Block::Parse(const Cluster* pCluster) {
+ if (pCluster == NULL)
+ return -1;
-long long BlockGroup::GetDurationTimeCode() const
-{
- return m_duration;
-}
+ if (pCluster->m_pSegment == NULL)
+ return -1;
-Block::Block(long long start, long long size_, long long discard_padding) :
- m_start(start),
- m_size(size_),
- m_track(0),
- m_timecode(-1),
- m_flags(0),
- m_frames(NULL),
- m_frame_count(-1),
- m_discard_padding(discard_padding)
-{
-}
+ assert(m_start >= 0);
+ assert(m_size >= 0);
+ assert(m_track <= 0);
+ assert(m_frames == NULL);
+ assert(m_frame_count <= 0);
+ long long pos = m_start;
+ const long long stop = m_start + m_size;
-Block::~Block()
-{
- delete[] m_frames;
-}
+ long len;
+ IMkvReader* const pReader = pCluster->m_pSegment->m_pReader;
-long Block::Parse(const Cluster* pCluster)
-{
- if (pCluster == NULL)
- return -1;
+ m_track = ReadUInt(pReader, pos, len);
- if (pCluster->m_pSegment == NULL)
- return -1;
+ if (m_track <= 0)
+ return E_FILE_FORMAT_INVALID;
- assert(m_start >= 0);
- assert(m_size >= 0);
- assert(m_track <= 0);
- assert(m_frames == NULL);
- assert(m_frame_count <= 0);
+ if ((pos + len) > stop)
+ return E_FILE_FORMAT_INVALID;
- long long pos = m_start;
- const long long stop = m_start + m_size;
+ pos += len; // consume track number
- long len;
+ if ((stop - pos) < 2)
+ return E_FILE_FORMAT_INVALID;
- IMkvReader* const pReader = pCluster->m_pSegment->m_pReader;
+ long status;
+ long long value;
- m_track = ReadUInt(pReader, pos, len);
+ status = UnserializeInt(pReader, pos, 2, value);
- if (m_track <= 0)
- return E_FILE_FORMAT_INVALID;
+ if (status)
+ return E_FILE_FORMAT_INVALID;
- if ((pos + len) > stop)
- return E_FILE_FORMAT_INVALID;
+ if (value < SHRT_MIN)
+ return E_FILE_FORMAT_INVALID;
- pos += len; //consume track number
+ if (value > SHRT_MAX)
+ return E_FILE_FORMAT_INVALID;
- if ((stop - pos) < 2)
- return E_FILE_FORMAT_INVALID;
+ m_timecode = static_cast<short>(value);
- long status;
- long long value;
+ pos += 2;
- status = UnserializeInt(pReader, pos, 2, value);
+ if ((stop - pos) <= 0)
+ return E_FILE_FORMAT_INVALID;
- if (status)
- return E_FILE_FORMAT_INVALID;
+ status = pReader->Read(pos, 1, &m_flags);
- if (value < SHRT_MIN)
- return E_FILE_FORMAT_INVALID;
+ if (status)
+ return E_FILE_FORMAT_INVALID;
- if (value > SHRT_MAX)
- return E_FILE_FORMAT_INVALID;
+ const int lacing = int(m_flags & 0x06) >> 1;
- m_timecode = static_cast<short>(value);
+ ++pos; // consume flags byte
- pos += 2;
+ if (lacing == 0) { // no lacing
+ if (pos > stop)
+ return E_FILE_FORMAT_INVALID;
- if ((stop - pos) <= 0)
- return E_FILE_FORMAT_INVALID;
-
- status = pReader->Read(pos, 1, &m_flags);
-
- if (status)
- return E_FILE_FORMAT_INVALID;
-
- const int lacing = int(m_flags & 0x06) >> 1;
-
- ++pos; //consume flags byte
-
- if (lacing == 0) //no lacing
- {
- if (pos > stop)
- return E_FILE_FORMAT_INVALID;
-
- m_frame_count = 1;
- m_frames = new Frame[m_frame_count];
-
- Frame& f = m_frames[0];
- f.pos = pos;
-
- const long long frame_size = stop - pos;
-
- if (frame_size > LONG_MAX)
- return E_FILE_FORMAT_INVALID;
-
- f.len = static_cast<long>(frame_size);
-
- return 0; //success
- }
-
- if (pos >= stop)
- return E_FILE_FORMAT_INVALID;
-
- unsigned char biased_count;
-
- status = pReader->Read(pos, 1, &biased_count);
-
- if (status)
- return E_FILE_FORMAT_INVALID;
-
- ++pos; //consume frame count
- assert(pos <= stop);
-
- m_frame_count = int(biased_count) + 1;
-
+ m_frame_count = 1;
m_frames = new Frame[m_frame_count];
- assert(m_frames);
- if (lacing == 1) //Xiph
- {
- Frame* pf = m_frames;
- Frame* const pf_end = pf + m_frame_count;
+ Frame& f = m_frames[0];
+ f.pos = pos;
- long size = 0;
- int frame_count = m_frame_count;
+ const long long frame_size = stop - pos;
- while (frame_count > 1)
- {
- long frame_size = 0;
+ if (frame_size > LONG_MAX)
+ return E_FILE_FORMAT_INVALID;
- for (;;)
- {
- unsigned char val;
+ f.len = static_cast<long>(frame_size);
- if (pos >= stop)
- return E_FILE_FORMAT_INVALID;
+ return 0; // success
+ }
- status = pReader->Read(pos, 1, &val);
+ if (pos >= stop)
+ return E_FILE_FORMAT_INVALID;
- if (status)
- return E_FILE_FORMAT_INVALID;
+ unsigned char biased_count;
- ++pos; //consume xiph size byte
+ status = pReader->Read(pos, 1, &biased_count);
- frame_size += val;
+ if (status)
+ return E_FILE_FORMAT_INVALID;
- if (val < 255)
- break;
- }
+ ++pos; // consume frame count
+ assert(pos <= stop);
- Frame& f = *pf++;
- assert(pf < pf_end);
+ m_frame_count = int(biased_count) + 1;
- f.pos = 0; //patch later
+ m_frames = new Frame[m_frame_count];
+ assert(m_frames);
- f.len = frame_size;
- size += frame_size; //contribution of this frame
+ if (lacing == 1) { // Xiph
+ Frame* pf = m_frames;
+ Frame* const pf_end = pf + m_frame_count;
- --frame_count;
- }
+ long size = 0;
+ int frame_count = m_frame_count;
- assert(pf < pf_end);
- assert(pos <= stop);
+ while (frame_count > 1) {
+ long frame_size = 0;
- {
- Frame& f = *pf++;
-
- if (pf != pf_end)
- return E_FILE_FORMAT_INVALID;
-
- f.pos = 0; //patch later
-
- const long long total_size = stop - pos;
-
- if (total_size < size)
- return E_FILE_FORMAT_INVALID;
-
- const long long frame_size = total_size - size;
-
- if (frame_size > LONG_MAX)
- return E_FILE_FORMAT_INVALID;
-
- f.len = static_cast<long>(frame_size);
- }
-
- pf = m_frames;
- while (pf != pf_end)
- {
- Frame& f = *pf++;
- assert((pos + f.len) <= stop);
-
- f.pos = pos;
- pos += f.len;
- }
-
- assert(pos == stop);
- }
- else if (lacing == 2) //fixed-size lacing
- {
- const long long total_size = stop - pos;
-
- if ((total_size % m_frame_count) != 0)
- return E_FILE_FORMAT_INVALID;
-
- const long long frame_size = total_size / m_frame_count;
-
- if (frame_size > LONG_MAX)
- return E_FILE_FORMAT_INVALID;
-
- Frame* pf = m_frames;
- Frame* const pf_end = pf + m_frame_count;
-
- while (pf != pf_end)
- {
- assert((pos + frame_size) <= stop);
-
- Frame& f = *pf++;
-
- f.pos = pos;
- f.len = static_cast<long>(frame_size);
-
- pos += frame_size;
- }
-
- assert(pos == stop);
- }
- else
- {
- assert(lacing == 3); //EBML lacing
+ for (;;) {
+ unsigned char val;
if (pos >= stop)
- return E_FILE_FORMAT_INVALID;
+ return E_FILE_FORMAT_INVALID;
- long size = 0;
- int frame_count = m_frame_count;
+ status = pReader->Read(pos, 1, &val);
- long long frame_size = ReadUInt(pReader, pos, len);
+ if (status)
+ return E_FILE_FORMAT_INVALID;
- if (frame_size < 0)
- return E_FILE_FORMAT_INVALID;
+ ++pos; // consume xiph size byte
- if (frame_size > LONG_MAX)
- return E_FILE_FORMAT_INVALID;
+ frame_size += val;
- if ((pos + len) > stop)
- return E_FILE_FORMAT_INVALID;
+ if (val < 255)
+ break;
+ }
- pos += len; //consume length of size of first frame
+ Frame& f = *pf++;
+ assert(pf < pf_end);
- if ((pos + frame_size) > stop)
- return E_FILE_FORMAT_INVALID;
+ f.pos = 0; // patch later
- Frame* pf = m_frames;
- Frame* const pf_end = pf + m_frame_count;
+ f.len = frame_size;
+ size += frame_size; // contribution of this frame
- {
- Frame& curr = *pf;
-
- curr.pos = 0; //patch later
-
- curr.len = static_cast<long>(frame_size);
- size += curr.len; //contribution of this frame
- }
-
- --frame_count;
-
- while (frame_count > 1)
- {
- if (pos >= stop)
- return E_FILE_FORMAT_INVALID;
-
- assert(pf < pf_end);
-
- const Frame& prev = *pf++;
- assert(prev.len == frame_size);
- if (prev.len != frame_size)
- return E_FILE_FORMAT_INVALID;
-
- assert(pf < pf_end);
-
- Frame& curr = *pf;
-
- curr.pos = 0; //patch later
-
- const long long delta_size_ = ReadUInt(pReader, pos, len);
-
- if (delta_size_ < 0)
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > stop)
- return E_FILE_FORMAT_INVALID;
-
- pos += len; //consume length of (delta) size
- assert(pos <= stop);
-
- const int exp = 7*len - 1;
- const long long bias = (1LL << exp) - 1LL;
- const long long delta_size = delta_size_ - bias;
-
- frame_size += delta_size;
-
- if (frame_size < 0)
- return E_FILE_FORMAT_INVALID;
-
- if (frame_size > LONG_MAX)
- return E_FILE_FORMAT_INVALID;
-
- curr.len = static_cast<long>(frame_size);
- size += curr.len; //contribution of this frame
-
- --frame_count;
- }
-
- {
- assert(pos <= stop);
- assert(pf < pf_end);
-
- const Frame& prev = *pf++;
- assert(prev.len == frame_size);
- if (prev.len != frame_size)
- return E_FILE_FORMAT_INVALID;
-
- assert(pf < pf_end);
-
- Frame& curr = *pf++;
- assert(pf == pf_end);
-
- curr.pos = 0; //patch later
-
- const long long total_size = stop - pos;
-
- if (total_size < size)
- return E_FILE_FORMAT_INVALID;
-
- frame_size = total_size - size;
-
- if (frame_size > LONG_MAX)
- return E_FILE_FORMAT_INVALID;
-
- curr.len = static_cast<long>(frame_size);
- }
-
- pf = m_frames;
- while (pf != pf_end)
- {
- Frame& f = *pf++;
- assert((pos + f.len) <= stop);
-
- f.pos = pos;
- pos += f.len;
- }
-
- assert(pos == stop);
+ --frame_count;
}
- return 0; //success
+ assert(pf < pf_end);
+ assert(pos <= stop);
+
+ {
+ Frame& f = *pf++;
+
+ if (pf != pf_end)
+ return E_FILE_FORMAT_INVALID;
+
+ f.pos = 0; // patch later
+
+ const long long total_size = stop - pos;
+
+ if (total_size < size)
+ return E_FILE_FORMAT_INVALID;
+
+ const long long frame_size = total_size - size;
+
+ if (frame_size > LONG_MAX)
+ return E_FILE_FORMAT_INVALID;
+
+ f.len = static_cast<long>(frame_size);
+ }
+
+ pf = m_frames;
+ while (pf != pf_end) {
+ Frame& f = *pf++;
+ assert((pos + f.len) <= stop);
+
+ f.pos = pos;
+ pos += f.len;
+ }
+
+ assert(pos == stop);
+ } else if (lacing == 2) { // fixed-size lacing
+ const long long total_size = stop - pos;
+
+ if ((total_size % m_frame_count) != 0)
+ return E_FILE_FORMAT_INVALID;
+
+ const long long frame_size = total_size / m_frame_count;
+
+ if (frame_size > LONG_MAX)
+ return E_FILE_FORMAT_INVALID;
+
+ Frame* pf = m_frames;
+ Frame* const pf_end = pf + m_frame_count;
+
+ while (pf != pf_end) {
+ assert((pos + frame_size) <= stop);
+
+ Frame& f = *pf++;
+
+ f.pos = pos;
+ f.len = static_cast<long>(frame_size);
+
+ pos += frame_size;
+ }
+
+ assert(pos == stop);
+ } else {
+ assert(lacing == 3); // EBML lacing
+
+ if (pos >= stop)
+ return E_FILE_FORMAT_INVALID;
+
+ long size = 0;
+ int frame_count = m_frame_count;
+
+ long long frame_size = ReadUInt(pReader, pos, len);
+
+ if (frame_size < 0)
+ return E_FILE_FORMAT_INVALID;
+
+ if (frame_size > LONG_MAX)
+ return E_FILE_FORMAT_INVALID;
+
+ if ((pos + len) > stop)
+ return E_FILE_FORMAT_INVALID;
+
+ pos += len; // consume length of size of first frame
+
+ if ((pos + frame_size) > stop)
+ return E_FILE_FORMAT_INVALID;
+
+ Frame* pf = m_frames;
+ Frame* const pf_end = pf + m_frame_count;
+
+ {
+ Frame& curr = *pf;
+
+ curr.pos = 0; // patch later
+
+ curr.len = static_cast<long>(frame_size);
+ size += curr.len; // contribution of this frame
+ }
+
+ --frame_count;
+
+ while (frame_count > 1) {
+ if (pos >= stop)
+ return E_FILE_FORMAT_INVALID;
+
+ assert(pf < pf_end);
+
+ const Frame& prev = *pf++;
+ assert(prev.len == frame_size);
+ if (prev.len != frame_size)
+ return E_FILE_FORMAT_INVALID;
+
+ assert(pf < pf_end);
+
+ Frame& curr = *pf;
+
+ curr.pos = 0; // patch later
+
+ const long long delta_size_ = ReadUInt(pReader, pos, len);
+
+ if (delta_size_ < 0)
+ return E_FILE_FORMAT_INVALID;
+
+ if ((pos + len) > stop)
+ return E_FILE_FORMAT_INVALID;
+
+ pos += len; // consume length of (delta) size
+ assert(pos <= stop);
+
+ const int exp = 7 * len - 1;
+ const long long bias = (1LL << exp) - 1LL;
+ const long long delta_size = delta_size_ - bias;
+
+ frame_size += delta_size;
+
+ if (frame_size < 0)
+ return E_FILE_FORMAT_INVALID;
+
+ if (frame_size > LONG_MAX)
+ return E_FILE_FORMAT_INVALID;
+
+ curr.len = static_cast<long>(frame_size);
+ size += curr.len; // contribution of this frame
+
+ --frame_count;
+ }
+
+ {
+ assert(pos <= stop);
+ assert(pf < pf_end);
+
+ const Frame& prev = *pf++;
+ assert(prev.len == frame_size);
+ if (prev.len != frame_size)
+ return E_FILE_FORMAT_INVALID;
+
+ assert(pf < pf_end);
+
+ Frame& curr = *pf++;
+ assert(pf == pf_end);
+
+ curr.pos = 0; // patch later
+
+ const long long total_size = stop - pos;
+
+ if (total_size < size)
+ return E_FILE_FORMAT_INVALID;
+
+ frame_size = total_size - size;
+
+ if (frame_size > LONG_MAX)
+ return E_FILE_FORMAT_INVALID;
+
+ curr.len = static_cast<long>(frame_size);
+ }
+
+ pf = m_frames;
+ while (pf != pf_end) {
+ Frame& f = *pf++;
+ assert((pos + f.len) <= stop);
+
+ f.pos = pos;
+ pos += f.len;
+ }
+
+ assert(pos == stop);
+ }
+
+ return 0; // success
}
+long long Block::GetTimeCode(const Cluster* pCluster) const {
+ if (pCluster == 0)
+ return m_timecode;
-long long Block::GetTimeCode(const Cluster* pCluster) const
-{
- if (pCluster == 0)
- return m_timecode;
+ const long long tc0 = pCluster->GetTimeCode();
+ assert(tc0 >= 0);
- const long long tc0 = pCluster->GetTimeCode();
- assert(tc0 >= 0);
+ const long long tc = tc0 + m_timecode;
- const long long tc = tc0 + m_timecode;
-
- return tc; //unscaled timecode units
+ return tc; // unscaled timecode units
}
+long long Block::GetTime(const Cluster* pCluster) const {
+ assert(pCluster);
-long long Block::GetTime(const Cluster* pCluster) const
-{
- assert(pCluster);
+ const long long tc = GetTimeCode(pCluster);
- const long long tc = GetTimeCode(pCluster);
+ const Segment* const pSegment = pCluster->m_pSegment;
+ const SegmentInfo* const pInfo = pSegment->GetInfo();
+ assert(pInfo);
- const Segment* const pSegment = pCluster->m_pSegment;
- const SegmentInfo* const pInfo = pSegment->GetInfo();
- assert(pInfo);
+ const long long scale = pInfo->GetTimeCodeScale();
+ assert(scale >= 1);
- const long long scale = pInfo->GetTimeCodeScale();
- assert(scale >= 1);
+ const long long ns = tc * scale;
- const long long ns = tc * scale;
-
- return ns;
+ return ns;
}
+long long Block::GetTrackNumber() const { return m_track; }
-long long Block::GetTrackNumber() const
-{
- return m_track;
+bool Block::IsKey() const {
+ return ((m_flags & static_cast<unsigned char>(1 << 7)) != 0);
}
-
-bool Block::IsKey() const
-{
- return ((m_flags & static_cast<unsigned char>(1 << 7)) != 0);
+void Block::SetKey(bool bKey) {
+ if (bKey)
+ m_flags |= static_cast<unsigned char>(1 << 7);
+ else
+ m_flags &= 0x7F;
}
+bool Block::IsInvisible() const { return bool(int(m_flags & 0x08) != 0); }
-void Block::SetKey(bool bKey)
-{
- if (bKey)
- m_flags |= static_cast<unsigned char>(1 << 7);
- else
- m_flags &= 0x7F;
+Block::Lacing Block::GetLacing() const {
+ const int value = int(m_flags & 0x06) >> 1;
+ return static_cast<Lacing>(value);
}
+int Block::GetFrameCount() const { return m_frame_count; }
-bool Block::IsInvisible() const
-{
- return bool(int(m_flags & 0x08) != 0);
+const Block::Frame& Block::GetFrame(int idx) const {
+ assert(idx >= 0);
+ assert(idx < m_frame_count);
+
+ const Frame& f = m_frames[idx];
+ assert(f.pos > 0);
+ assert(f.len > 0);
+
+ return f;
}
+long Block::Frame::Read(IMkvReader* pReader, unsigned char* buf) const {
+ assert(pReader);
+ assert(buf);
-Block::Lacing Block::GetLacing() const
-{
- const int value = int(m_flags & 0x06) >> 1;
- return static_cast<Lacing>(value);
+ const long status = pReader->Read(pos, len, buf);
+ return status;
}
+long long Block::GetDiscardPadding() const { return m_discard_padding; }
-int Block::GetFrameCount() const
-{
- return m_frame_count;
-}
-
-
-const Block::Frame& Block::GetFrame(int idx) const
-{
- assert(idx >= 0);
- assert(idx < m_frame_count);
-
- const Frame& f = m_frames[idx];
- assert(f.pos > 0);
- assert(f.len > 0);
-
- return f;
-}
-
-
-long Block::Frame::Read(IMkvReader* pReader, unsigned char* buf) const
-{
- assert(pReader);
- assert(buf);
-
- const long status = pReader->Read(pos, len, buf);
- return status;
-}
-
-long long Block::GetDiscardPadding() const
-{
- return m_discard_padding;
-}
-
-} //end namespace mkvparser
+} // end namespace mkvparser
diff --git a/third_party/libwebm/mkvparser.hpp b/third_party/libwebm/mkvparser.hpp
index 7184d26..3e17d07 100644
--- a/third_party/libwebm/mkvparser.hpp
+++ b/third_party/libwebm/mkvparser.hpp
@@ -13,19 +13,18 @@
#include <cstdio>
#include <cstddef>
-namespace mkvparser
-{
+namespace mkvparser {
const int E_FILE_FORMAT_INVALID = -2;
const int E_BUFFER_NOT_FULL = -3;
-class IMkvReader
-{
-public:
- virtual int Read(long long pos, long len, unsigned char* buf) = 0;
- virtual int Length(long long* total, long long* available) = 0;
-protected:
- virtual ~IMkvReader();
+class IMkvReader {
+ public:
+ virtual int Read(long long pos, long len, unsigned char* buf) = 0;
+ virtual int Length(long long* total, long long* available) = 0;
+
+ protected:
+ virtual ~IMkvReader();
};
long long GetUIntLength(IMkvReader*, long long, long&);
@@ -35,170 +34,148 @@
long UnserializeFloat(IMkvReader*, long long pos, long long size, double&);
long UnserializeInt(IMkvReader*, long long pos, long len, long long& result);
-long UnserializeString(
- IMkvReader*,
- long long pos,
- long long size,
- char*& str);
+long UnserializeString(IMkvReader*, long long pos, long long size, char*& str);
-long ParseElementHeader(
- IMkvReader* pReader,
- long long& pos, //consume id and size fields
- long long stop, //if you know size of element's parent
- long long& id,
- long long& size);
+long ParseElementHeader(IMkvReader* pReader,
+ long long& pos, // consume id and size fields
+ long long stop, // if you know size of element's parent
+ long long& id, long long& size);
bool Match(IMkvReader*, long long&, unsigned long, long long&);
bool Match(IMkvReader*, long long&, unsigned long, unsigned char*&, size_t&);
void GetVersion(int& major, int& minor, int& build, int& revision);
-struct EBMLHeader
-{
- EBMLHeader();
- ~EBMLHeader();
- long long m_version;
- long long m_readVersion;
- long long m_maxIdLength;
- long long m_maxSizeLength;
- char* m_docType;
- long long m_docTypeVersion;
- long long m_docTypeReadVersion;
+struct EBMLHeader {
+ EBMLHeader();
+ ~EBMLHeader();
+ long long m_version;
+ long long m_readVersion;
+ long long m_maxIdLength;
+ long long m_maxSizeLength;
+ char* m_docType;
+ long long m_docTypeVersion;
+ long long m_docTypeReadVersion;
- long long Parse(IMkvReader*, long long&);
- void Init();
+ long long Parse(IMkvReader*, long long&);
+ void Init();
};
-
class Segment;
class Track;
class Cluster;
-class Block
-{
- Block(const Block&);
- Block& operator=(const Block&);
+class Block {
+ Block(const Block&);
+ Block& operator=(const Block&);
-public:
- const long long m_start;
- const long long m_size;
+ public:
+ const long long m_start;
+ const long long m_size;
- Block(long long start, long long size, long long discard_padding);
- ~Block();
+ Block(long long start, long long size, long long discard_padding);
+ ~Block();
- long Parse(const Cluster*);
+ long Parse(const Cluster*);
- long long GetTrackNumber() const;
- long long GetTimeCode(const Cluster*) const; //absolute, but not scaled
- long long GetTime(const Cluster*) const; //absolute, and scaled (ns)
- bool IsKey() const;
- void SetKey(bool);
- bool IsInvisible() const;
+ long long GetTrackNumber() const;
+ long long GetTimeCode(const Cluster*) const; // absolute, but not scaled
+ long long GetTime(const Cluster*) const; // absolute, and scaled (ns)
+ bool IsKey() const;
+ void SetKey(bool);
+ bool IsInvisible() const;
- enum Lacing { kLacingNone, kLacingXiph, kLacingFixed, kLacingEbml };
- Lacing GetLacing() const;
+ enum Lacing { kLacingNone, kLacingXiph, kLacingFixed, kLacingEbml };
+ Lacing GetLacing() const;
- int GetFrameCount() const; //to index frames: [0, count)
+ int GetFrameCount() const; // to index frames: [0, count)
- struct Frame
- {
- long long pos; //absolute offset
- long len;
+ struct Frame {
+ long long pos; // absolute offset
+ long len;
- long Read(IMkvReader*, unsigned char*) const;
- };
+ long Read(IMkvReader*, unsigned char*) const;
+ };
- const Frame& GetFrame(int frame_index) const;
+ const Frame& GetFrame(int frame_index) const;
- long long GetDiscardPadding() const;
+ long long GetDiscardPadding() const;
-private:
- long long m_track; //Track::Number()
- short m_timecode; //relative to cluster
- unsigned char m_flags;
+ private:
+ long long m_track; // Track::Number()
+ short m_timecode; // relative to cluster
+ unsigned char m_flags;
- Frame* m_frames;
- int m_frame_count;
+ Frame* m_frames;
+ int m_frame_count;
-protected:
- const long long m_discard_padding;
+ protected:
+ const long long m_discard_padding;
};
+class BlockEntry {
+ BlockEntry(const BlockEntry&);
+ BlockEntry& operator=(const BlockEntry&);
-class BlockEntry
-{
- BlockEntry(const BlockEntry&);
- BlockEntry& operator=(const BlockEntry&);
+ protected:
+ BlockEntry(Cluster*, long index);
-protected:
- BlockEntry(Cluster*, long index);
+ public:
+ virtual ~BlockEntry();
-public:
- virtual ~BlockEntry();
+ bool EOS() const;
+ const Cluster* GetCluster() const;
+ long GetIndex() const;
+ virtual const Block* GetBlock() const = 0;
- bool EOS() const;
- const Cluster* GetCluster() const;
- long GetIndex() const;
- virtual const Block* GetBlock() const = 0;
+ enum Kind { kBlockEOS, kBlockSimple, kBlockGroup };
+ virtual Kind GetKind() const = 0;
- enum Kind { kBlockEOS, kBlockSimple, kBlockGroup };
- virtual Kind GetKind() const = 0;
-
-protected:
- Cluster* const m_pCluster;
- const long m_index;
-
+ protected:
+ Cluster* const m_pCluster;
+ const long m_index;
};
+class SimpleBlock : public BlockEntry {
+ SimpleBlock(const SimpleBlock&);
+ SimpleBlock& operator=(const SimpleBlock&);
-class SimpleBlock : public BlockEntry
-{
- SimpleBlock(const SimpleBlock&);
- SimpleBlock& operator=(const SimpleBlock&);
+ public:
+ SimpleBlock(Cluster*, long index, long long start, long long size);
+ long Parse();
-public:
- SimpleBlock(Cluster*, long index, long long start, long long size);
- long Parse();
+ Kind GetKind() const;
+ const Block* GetBlock() const;
- Kind GetKind() const;
- const Block* GetBlock() const;
-
-protected:
- Block m_block;
-
+ protected:
+ Block m_block;
};
+class BlockGroup : public BlockEntry {
+ BlockGroup(const BlockGroup&);
+ BlockGroup& operator=(const BlockGroup&);
-class BlockGroup : public BlockEntry
-{
- BlockGroup(const BlockGroup&);
- BlockGroup& operator=(const BlockGroup&);
+ public:
+ BlockGroup(Cluster*, long index,
+ long long block_start, // absolute pos of block's payload
+ long long block_size, // size of block's payload
+ long long prev, long long next, long long duration,
+ long long discard_padding);
-public:
- BlockGroup(
- Cluster*,
- long index,
- long long block_start, //absolute pos of block's payload
- long long block_size, //size of block's payload
- long long prev,
- long long next,
- long long duration,
- long long discard_padding);
+ long Parse();
- long Parse();
+ Kind GetKind() const;
+ const Block* GetBlock() const;
- Kind GetKind() const;
- const Block* GetBlock() const;
+ long long GetPrevTimeCode() const; // relative to block's time
+ long long GetNextTimeCode() const; // as above
+ long long GetDurationTimeCode() const;
- long long GetPrevTimeCode() const; //relative to block's time
- long long GetNextTimeCode() const; //as above
- long long GetDurationTimeCode() const;
-
-private:
- Block m_block;
- const long long m_prev;
- const long long m_next;
- const long long m_duration;
+ private:
+ Block m_block;
+ const long long m_prev;
+ const long long m_next;
+ const long long m_duration;
};
///////////////////////////////////////////////////////////////
@@ -206,635 +183,552 @@
// Elements used to describe if the track data has been encrypted or
// compressed with zlib or header stripping.
class ContentEncoding {
-public:
- enum {
- kCTR = 1
- };
+ public:
+ enum { kCTR = 1 };
- ContentEncoding();
- ~ContentEncoding();
+ ContentEncoding();
+ ~ContentEncoding();
- // ContentCompression element names
- struct ContentCompression {
- ContentCompression();
- ~ContentCompression();
+ // ContentCompression element names
+ struct ContentCompression {
+ ContentCompression();
+ ~ContentCompression();
- unsigned long long algo;
- unsigned char* settings;
- long long settings_len;
- };
+ unsigned long long algo;
+ unsigned char* settings;
+ long long settings_len;
+ };
- // ContentEncAESSettings element names
- struct ContentEncAESSettings {
- ContentEncAESSettings() : cipher_mode(kCTR) {}
- ~ContentEncAESSettings() {}
+ // ContentEncAESSettings element names
+ struct ContentEncAESSettings {
+ ContentEncAESSettings() : cipher_mode(kCTR) {}
+ ~ContentEncAESSettings() {}
- unsigned long long cipher_mode;
- };
+ unsigned long long cipher_mode;
+ };
- // ContentEncryption element names
- struct ContentEncryption {
- ContentEncryption();
- ~ContentEncryption();
+ // ContentEncryption element names
+ struct ContentEncryption {
+ ContentEncryption();
+ ~ContentEncryption();
- unsigned long long algo;
- unsigned char* key_id;
- long long key_id_len;
- unsigned char* signature;
- long long signature_len;
- unsigned char* sig_key_id;
- long long sig_key_id_len;
- unsigned long long sig_algo;
- unsigned long long sig_hash_algo;
+ unsigned long long algo;
+ unsigned char* key_id;
+ long long key_id_len;
+ unsigned char* signature;
+ long long signature_len;
+ unsigned char* sig_key_id;
+ long long sig_key_id_len;
+ unsigned long long sig_algo;
+ unsigned long long sig_hash_algo;
- ContentEncAESSettings aes_settings;
- };
+ ContentEncAESSettings aes_settings;
+ };
- // Returns ContentCompression represented by |idx|. Returns NULL if |idx|
- // is out of bounds.
- const ContentCompression* GetCompressionByIndex(unsigned long idx) const;
+ // Returns ContentCompression represented by |idx|. Returns NULL if |idx|
+ // is out of bounds.
+ const ContentCompression* GetCompressionByIndex(unsigned long idx) const;
- // Returns number of ContentCompression elements in this ContentEncoding
- // element.
- unsigned long GetCompressionCount() const;
+ // Returns number of ContentCompression elements in this ContentEncoding
+ // element.
+ unsigned long GetCompressionCount() const;
- // Parses the ContentCompression element from |pReader|. |start| is the
- // starting offset of the ContentCompression payload. |size| is the size in
- // bytes of the ContentCompression payload. |compression| is where the parsed
- // values will be stored.
- long ParseCompressionEntry(long long start,
- long long size,
- IMkvReader* pReader,
- ContentCompression* compression);
+ // Parses the ContentCompression element from |pReader|. |start| is the
+ // starting offset of the ContentCompression payload. |size| is the size in
+ // bytes of the ContentCompression payload. |compression| is where the parsed
+ // values will be stored.
+ long ParseCompressionEntry(long long start, long long size,
+ IMkvReader* pReader,
+ ContentCompression* compression);
- // Returns ContentEncryption represented by |idx|. Returns NULL if |idx|
- // is out of bounds.
- const ContentEncryption* GetEncryptionByIndex(unsigned long idx) const;
+ // Returns ContentEncryption represented by |idx|. Returns NULL if |idx|
+ // is out of bounds.
+ const ContentEncryption* GetEncryptionByIndex(unsigned long idx) const;
- // Returns number of ContentEncryption elements in this ContentEncoding
- // element.
- unsigned long GetEncryptionCount() const;
+ // Returns number of ContentEncryption elements in this ContentEncoding
+ // element.
+ unsigned long GetEncryptionCount() const;
- // Parses the ContentEncAESSettings element from |pReader|. |start| is the
- // starting offset of the ContentEncAESSettings payload. |size| is the
- // size in bytes of the ContentEncAESSettings payload. |encryption| is
- // where the parsed values will be stored.
- long ParseContentEncAESSettingsEntry(long long start,
- long long size,
- IMkvReader* pReader,
- ContentEncAESSettings* aes);
+ // Parses the ContentEncAESSettings element from |pReader|. |start| is the
+ // starting offset of the ContentEncAESSettings payload. |size| is the
+ // size in bytes of the ContentEncAESSettings payload. |encryption| is
+ // where the parsed values will be stored.
+ long ParseContentEncAESSettingsEntry(long long start, long long size,
+ IMkvReader* pReader,
+ ContentEncAESSettings* aes);
- // Parses the ContentEncoding element from |pReader|. |start| is the
- // starting offset of the ContentEncoding payload. |size| is the size in
- // bytes of the ContentEncoding payload. Returns true on success.
- long ParseContentEncodingEntry(long long start,
- long long size,
- IMkvReader* pReader);
+ // Parses the ContentEncoding element from |pReader|. |start| is the
+ // starting offset of the ContentEncoding payload. |size| is the size in
+ // bytes of the ContentEncoding payload. Returns true on success.
+ long ParseContentEncodingEntry(long long start, long long size,
+ IMkvReader* pReader);
- // Parses the ContentEncryption element from |pReader|. |start| is the
- // starting offset of the ContentEncryption payload. |size| is the size in
- // bytes of the ContentEncryption payload. |encryption| is where the parsed
- // values will be stored.
- long ParseEncryptionEntry(long long start,
- long long size,
- IMkvReader* pReader,
- ContentEncryption* encryption);
+ // Parses the ContentEncryption element from |pReader|. |start| is the
+ // starting offset of the ContentEncryption payload. |size| is the size in
+ // bytes of the ContentEncryption payload. |encryption| is where the parsed
+ // values will be stored.
+ long ParseEncryptionEntry(long long start, long long size,
+ IMkvReader* pReader, ContentEncryption* encryption);
- unsigned long long encoding_order() const { return encoding_order_; }
- unsigned long long encoding_scope() const { return encoding_scope_; }
- unsigned long long encoding_type() const { return encoding_type_; }
+ unsigned long long encoding_order() const { return encoding_order_; }
+ unsigned long long encoding_scope() const { return encoding_scope_; }
+ unsigned long long encoding_type() const { return encoding_type_; }
-private:
- // Member variables for list of ContentCompression elements.
- ContentCompression** compression_entries_;
- ContentCompression** compression_entries_end_;
+ private:
+ // Member variables for list of ContentCompression elements.
+ ContentCompression** compression_entries_;
+ ContentCompression** compression_entries_end_;
- // Member variables for list of ContentEncryption elements.
- ContentEncryption** encryption_entries_;
- ContentEncryption** encryption_entries_end_;
+ // Member variables for list of ContentEncryption elements.
+ ContentEncryption** encryption_entries_;
+ ContentEncryption** encryption_entries_end_;
- // ContentEncoding element names
- unsigned long long encoding_order_;
- unsigned long long encoding_scope_;
- unsigned long long encoding_type_;
+ // ContentEncoding element names
+ unsigned long long encoding_order_;
+ unsigned long long encoding_scope_;
+ unsigned long long encoding_type_;
- // LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncoding);
- ContentEncoding(const ContentEncoding&);
- ContentEncoding& operator=(const ContentEncoding&);
+ // LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncoding);
+ ContentEncoding(const ContentEncoding&);
+ ContentEncoding& operator=(const ContentEncoding&);
};
-class Track
-{
- Track(const Track&);
- Track& operator=(const Track&);
+class Track {
+ Track(const Track&);
+ Track& operator=(const Track&);
-public:
- class Info;
- static long Create(
- Segment*,
- const Info&,
- long long element_start,
- long long element_size,
- Track*&);
+ public:
+ class Info;
+ static long Create(Segment*, const Info&, long long element_start,
+ long long element_size, Track*&);
- enum Type {
- kVideo = 1,
- kAudio = 2,
- kSubtitle = 0x11,
- kMetadata = 0x21
- };
+ enum Type { kVideo = 1, kAudio = 2, kSubtitle = 0x11, kMetadata = 0x21 };
- Segment* const m_pSegment;
- const long long m_element_start;
- const long long m_element_size;
- virtual ~Track();
+ Segment* const m_pSegment;
+ const long long m_element_start;
+ const long long m_element_size;
+ virtual ~Track();
- long GetType() const;
- long GetNumber() const;
- unsigned long long GetUid() const;
- const char* GetNameAsUTF8() const;
+ long GetType() const;
+ long GetNumber() const;
+ unsigned long long GetUid() const;
+ const char* GetNameAsUTF8() const;
+ const char* GetLanguage() const;
+ const char* GetCodecNameAsUTF8() const;
+ const char* GetCodecId() const;
+ const unsigned char* GetCodecPrivate(size_t&) const;
+ bool GetLacing() const;
+ unsigned long long GetDefaultDuration() const;
+ unsigned long long GetCodecDelay() const;
+ unsigned long long GetSeekPreRoll() const;
+
+ const BlockEntry* GetEOS() const;
+
+ struct Settings {
+ long long start;
+ long long size;
+ };
+
+ class Info {
+ public:
+ Info();
+ ~Info();
+ int Copy(Info&) const;
+ void Clear();
+ long type;
+ long number;
+ unsigned long long uid;
+ unsigned long long defaultDuration;
+ unsigned long long codecDelay;
+ unsigned long long seekPreRoll;
+ char* nameAsUTF8;
+ char* language;
+ char* codecId;
+ char* codecNameAsUTF8;
+ unsigned char* codecPrivate;
+ size_t codecPrivateSize;
+ bool lacing;
+ Settings settings;
+
+ private:
+ Info(const Info&);
+ Info& operator=(const Info&);
+ int CopyStr(char* Info::*str, Info&) const;
+ };
+
+ long GetFirst(const BlockEntry*&) const;
+ long GetNext(const BlockEntry* pCurr, const BlockEntry*& pNext) const;
+ virtual bool VetEntry(const BlockEntry*) const;
+ virtual long Seek(long long time_ns, const BlockEntry*&) const;
+
+ const ContentEncoding* GetContentEncodingByIndex(unsigned long idx) const;
+ unsigned long GetContentEncodingCount() const;
+
+ long ParseContentEncodingsEntry(long long start, long long size);
+
+ protected:
+ Track(Segment*, long long element_start, long long element_size);
+
+ Info m_info;
+
+ class EOSBlock : public BlockEntry {
+ public:
+ EOSBlock();
+
+ Kind GetKind() const;
+ const Block* GetBlock() const;
+ };
+
+ EOSBlock m_eos;
+
+ private:
+ ContentEncoding** content_encoding_entries_;
+ ContentEncoding** content_encoding_entries_end_;
+};
+
+class VideoTrack : public Track {
+ VideoTrack(const VideoTrack&);
+ VideoTrack& operator=(const VideoTrack&);
+
+ VideoTrack(Segment*, long long element_start, long long element_size);
+
+ public:
+ static long Parse(Segment*, const Info&, long long element_start,
+ long long element_size, VideoTrack*&);
+
+ long long GetWidth() const;
+ long long GetHeight() const;
+ double GetFrameRate() const;
+
+ bool VetEntry(const BlockEntry*) const;
+ long Seek(long long time_ns, const BlockEntry*&) const;
+
+ private:
+ long long m_width;
+ long long m_height;
+ double m_rate;
+};
+
+class AudioTrack : public Track {
+ AudioTrack(const AudioTrack&);
+ AudioTrack& operator=(const AudioTrack&);
+
+ AudioTrack(Segment*, long long element_start, long long element_size);
+
+ public:
+ static long Parse(Segment*, const Info&, long long element_start,
+ long long element_size, AudioTrack*&);
+
+ double GetSamplingRate() const;
+ long long GetChannels() const;
+ long long GetBitDepth() const;
+
+ private:
+ double m_rate;
+ long long m_channels;
+ long long m_bitDepth;
+};
+
+class Tracks {
+ Tracks(const Tracks&);
+ Tracks& operator=(const Tracks&);
+
+ public:
+ Segment* const m_pSegment;
+ const long long m_start;
+ const long long m_size;
+ const long long m_element_start;
+ const long long m_element_size;
+
+ Tracks(Segment*, long long start, long long size, long long element_start,
+ long long element_size);
+
+ ~Tracks();
+
+ long Parse();
+
+ unsigned long GetTracksCount() const;
+
+ const Track* GetTrackByNumber(long tn) const;
+ const Track* GetTrackByIndex(unsigned long idx) const;
+
+ private:
+ Track** m_trackEntries;
+ Track** m_trackEntriesEnd;
+
+ long ParseTrackEntry(long long payload_start, long long payload_size,
+ long long element_start, long long element_size,
+ Track*&) const;
+};
+
+class Chapters {
+ Chapters(const Chapters&);
+ Chapters& operator=(const Chapters&);
+
+ public:
+ Segment* const m_pSegment;
+ const long long m_start;
+ const long long m_size;
+ const long long m_element_start;
+ const long long m_element_size;
+
+ Chapters(Segment*, long long payload_start, long long payload_size,
+ long long element_start, long long element_size);
+
+ ~Chapters();
+
+ long Parse();
+
+ class Atom;
+ class Edition;
+
+ class Display {
+ friend class Atom;
+ Display();
+ Display(const Display&);
+ ~Display();
+ Display& operator=(const Display&);
+
+ public:
+ const char* GetString() const;
const char* GetLanguage() const;
- const char* GetCodecNameAsUTF8() const;
- const char* GetCodecId() const;
- const unsigned char* GetCodecPrivate(size_t&) const;
- bool GetLacing() const;
- unsigned long long GetDefaultDuration() const;
- unsigned long long GetCodecDelay() const;
- unsigned long long GetSeekPreRoll() const;
+ const char* GetCountry() const;
- const BlockEntry* GetEOS() const;
+ private:
+ void Init();
+ void ShallowCopy(Display&) const;
+ void Clear();
+ long Parse(IMkvReader*, long long pos, long long size);
- struct Settings
- {
- long long start;
- long long size;
- };
+ char* m_string;
+ char* m_language;
+ char* m_country;
+ };
- class Info
- {
- public:
- Info();
- ~Info();
- int Copy(Info&) const;
- void Clear();
- long type;
- long number;
- unsigned long long uid;
- unsigned long long defaultDuration;
- unsigned long long codecDelay;
- unsigned long long seekPreRoll;
- char* nameAsUTF8;
- char* language;
- char* codecId;
- char* codecNameAsUTF8;
- unsigned char* codecPrivate;
- size_t codecPrivateSize;
- bool lacing;
- Settings settings;
+ class Atom {
+ friend class Edition;
+ Atom();
+ Atom(const Atom&);
+ ~Atom();
+ Atom& operator=(const Atom&);
- private:
- Info(const Info&);
- Info& operator=(const Info&);
- int CopyStr(char* Info::*str, Info&) const;
- };
+ public:
+ unsigned long long GetUID() const;
+ const char* GetStringUID() const;
- long GetFirst(const BlockEntry*&) const;
- long GetNext(const BlockEntry* pCurr, const BlockEntry*& pNext) const;
- virtual bool VetEntry(const BlockEntry*) const;
- virtual long Seek(long long time_ns, const BlockEntry*&) const;
+ long long GetStartTimecode() const;
+ long long GetStopTimecode() const;
- const ContentEncoding* GetContentEncodingByIndex(unsigned long idx) const;
- unsigned long GetContentEncodingCount() const;
+ long long GetStartTime(const Chapters*) const;
+ long long GetStopTime(const Chapters*) const;
- long ParseContentEncodingsEntry(long long start, long long size);
+ int GetDisplayCount() const;
+ const Display* GetDisplay(int index) const;
-protected:
- Track(
- Segment*,
- long long element_start,
- long long element_size);
+ private:
+ void Init();
+ void ShallowCopy(Atom&) const;
+ void Clear();
+ long Parse(IMkvReader*, long long pos, long long size);
+ static long long GetTime(const Chapters*, long long timecode);
- Info m_info;
+ long ParseDisplay(IMkvReader*, long long pos, long long size);
+ bool ExpandDisplaysArray();
- class EOSBlock : public BlockEntry
- {
- public:
- EOSBlock();
+ char* m_string_uid;
+ unsigned long long m_uid;
+ long long m_start_timecode;
+ long long m_stop_timecode;
- Kind GetKind() const;
- const Block* GetBlock() const;
- };
+ Display* m_displays;
+ int m_displays_size;
+ int m_displays_count;
+ };
- EOSBlock m_eos;
+ class Edition {
+ friend class Chapters;
+ Edition();
+ Edition(const Edition&);
+ ~Edition();
+ Edition& operator=(const Edition&);
-private:
- ContentEncoding** content_encoding_entries_;
- ContentEncoding** content_encoding_entries_end_;
+ public:
+ int GetAtomCount() const;
+ const Atom* GetAtom(int index) const;
+
+ private:
+ void Init();
+ void ShallowCopy(Edition&) const;
+ void Clear();
+ long Parse(IMkvReader*, long long pos, long long size);
+
+ long ParseAtom(IMkvReader*, long long pos, long long size);
+ bool ExpandAtomsArray();
+
+ Atom* m_atoms;
+ int m_atoms_size;
+ int m_atoms_count;
+ };
+
+ int GetEditionCount() const;
+ const Edition* GetEdition(int index) const;
+
+ private:
+ long ParseEdition(long long pos, long long size);
+ bool ExpandEditionsArray();
+
+ Edition* m_editions;
+ int m_editions_size;
+ int m_editions_count;
};
+class SegmentInfo {
+ SegmentInfo(const SegmentInfo&);
+ SegmentInfo& operator=(const SegmentInfo&);
-class VideoTrack : public Track
-{
- VideoTrack(const VideoTrack&);
- VideoTrack& operator=(const VideoTrack&);
+ public:
+ Segment* const m_pSegment;
+ const long long m_start;
+ const long long m_size;
+ const long long m_element_start;
+ const long long m_element_size;
- VideoTrack(
- Segment*,
- long long element_start,
- long long element_size);
+ SegmentInfo(Segment*, long long start, long long size,
+ long long element_start, long long element_size);
-public:
- static long Parse(
- Segment*,
- const Info&,
- long long element_start,
- long long element_size,
- VideoTrack*&);
+ ~SegmentInfo();
- long long GetWidth() const;
- long long GetHeight() const;
- double GetFrameRate() const;
+ long Parse();
- bool VetEntry(const BlockEntry*) const;
- long Seek(long long time_ns, const BlockEntry*&) const;
+ long long GetTimeCodeScale() const;
+ long long GetDuration() const; // scaled
+ const char* GetMuxingAppAsUTF8() const;
+ const char* GetWritingAppAsUTF8() const;
+ const char* GetTitleAsUTF8() const;
-private:
- long long m_width;
- long long m_height;
- double m_rate;
-
+ private:
+ long long m_timecodeScale;
+ double m_duration;
+ char* m_pMuxingAppAsUTF8;
+ char* m_pWritingAppAsUTF8;
+ char* m_pTitleAsUTF8;
};
+class SeekHead {
+ SeekHead(const SeekHead&);
+ SeekHead& operator=(const SeekHead&);
-class AudioTrack : public Track
-{
- AudioTrack(const AudioTrack&);
- AudioTrack& operator=(const AudioTrack&);
+ public:
+ Segment* const m_pSegment;
+ const long long m_start;
+ const long long m_size;
+ const long long m_element_start;
+ const long long m_element_size;
- AudioTrack(
- Segment*,
- long long element_start,
- long long element_size);
-public:
- static long Parse(
- Segment*,
- const Info&,
- long long element_start,
- long long element_size,
- AudioTrack*&);
+ SeekHead(Segment*, long long start, long long size, long long element_start,
+ long long element_size);
- double GetSamplingRate() const;
- long long GetChannels() const;
- long long GetBitDepth() const;
+ ~SeekHead();
-private:
- double m_rate;
- long long m_channels;
- long long m_bitDepth;
-};
+ long Parse();
+ struct Entry {
+ // the SeekHead entry payload
+ long long id;
+ long long pos;
-class Tracks
-{
- Tracks(const Tracks&);
- Tracks& operator=(const Tracks&);
+ // absolute pos of SeekEntry ID
+ long long element_start;
-public:
- Segment* const m_pSegment;
- const long long m_start;
- const long long m_size;
- const long long m_element_start;
- const long long m_element_size;
+ // SeekEntry ID size + size size + payload
+ long long element_size;
+ };
- Tracks(
- Segment*,
- long long start,
- long long size,
- long long element_start,
- long long element_size);
+ int GetCount() const;
+ const Entry* GetEntry(int idx) const;
- ~Tracks();
+ struct VoidElement {
+ // absolute pos of Void ID
+ long long element_start;
- long Parse();
+ // ID size + size size + payload size
+ long long element_size;
+ };
- unsigned long GetTracksCount() const;
+ int GetVoidElementCount() const;
+ const VoidElement* GetVoidElement(int idx) const;
- const Track* GetTrackByNumber(long tn) const;
- const Track* GetTrackByIndex(unsigned long idx) const;
+ private:
+ Entry* m_entries;
+ int m_entry_count;
-private:
- Track** m_trackEntries;
- Track** m_trackEntriesEnd;
+ VoidElement* m_void_elements;
+ int m_void_element_count;
- long ParseTrackEntry(
- long long payload_start,
- long long payload_size,
- long long element_start,
- long long element_size,
- Track*&) const;
-
-};
-
-
-class Chapters
-{
- Chapters(const Chapters&);
- Chapters& operator=(const Chapters&);
-
-public:
- Segment* const m_pSegment;
- const long long m_start;
- const long long m_size;
- const long long m_element_start;
- const long long m_element_size;
-
- Chapters(
- Segment*,
- long long payload_start,
- long long payload_size,
- long long element_start,
- long long element_size);
-
- ~Chapters();
-
- long Parse();
-
- class Atom;
- class Edition;
-
- class Display
- {
- friend class Atom;
- Display();
- Display(const Display&);
- ~Display();
- Display& operator=(const Display&);
- public:
- const char* GetString() const;
- const char* GetLanguage() const;
- const char* GetCountry() const;
- private:
- void Init();
- void ShallowCopy(Display&) const;
- void Clear();
- long Parse(IMkvReader*, long long pos, long long size);
-
- char* m_string;
- char* m_language;
- char* m_country;
- };
-
- class Atom
- {
- friend class Edition;
- Atom();
- Atom(const Atom&);
- ~Atom();
- Atom& operator=(const Atom&);
- public:
- unsigned long long GetUID() const;
- const char* GetStringUID() const;
-
- long long GetStartTimecode() const;
- long long GetStopTimecode() const;
-
- long long GetStartTime(const Chapters*) const;
- long long GetStopTime(const Chapters*) const;
-
- int GetDisplayCount() const;
- const Display* GetDisplay(int index) const;
- private:
- void Init();
- void ShallowCopy(Atom&) const;
- void Clear();
- long Parse(IMkvReader*, long long pos, long long size);
- static long long GetTime(const Chapters*, long long timecode);
-
- long ParseDisplay(IMkvReader*, long long pos, long long size);
- bool ExpandDisplaysArray();
-
- char* m_string_uid;
- unsigned long long m_uid;
- long long m_start_timecode;
- long long m_stop_timecode;
-
- Display* m_displays;
- int m_displays_size;
- int m_displays_count;
- };
-
- class Edition
- {
- friend class Chapters;
- Edition();
- Edition(const Edition&);
- ~Edition();
- Edition& operator=(const Edition&);
- public:
- int GetAtomCount() const;
- const Atom* GetAtom(int index) const;
- private:
- void Init();
- void ShallowCopy(Edition&) const;
- void Clear();
- long Parse(IMkvReader*, long long pos, long long size);
-
- long ParseAtom(IMkvReader*, long long pos, long long size);
- bool ExpandAtomsArray();
-
- Atom* m_atoms;
- int m_atoms_size;
- int m_atoms_count;
- };
-
- int GetEditionCount() const;
- const Edition* GetEdition(int index) const;
-
-private:
- long ParseEdition(long long pos, long long size);
- bool ExpandEditionsArray();
-
- Edition* m_editions;
- int m_editions_size;
- int m_editions_count;
-
-};
-
-
-class SegmentInfo
-{
- SegmentInfo(const SegmentInfo&);
- SegmentInfo& operator=(const SegmentInfo&);
-
-public:
- Segment* const m_pSegment;
- const long long m_start;
- const long long m_size;
- const long long m_element_start;
- const long long m_element_size;
-
- SegmentInfo(
- Segment*,
- long long start,
- long long size,
- long long element_start,
- long long element_size);
-
- ~SegmentInfo();
-
- long Parse();
-
- long long GetTimeCodeScale() const;
- long long GetDuration() const; //scaled
- const char* GetMuxingAppAsUTF8() const;
- const char* GetWritingAppAsUTF8() const;
- const char* GetTitleAsUTF8() const;
-
-private:
- long long m_timecodeScale;
- double m_duration;
- char* m_pMuxingAppAsUTF8;
- char* m_pWritingAppAsUTF8;
- char* m_pTitleAsUTF8;
-};
-
-
-class SeekHead
-{
- SeekHead(const SeekHead&);
- SeekHead& operator=(const SeekHead&);
-
-public:
- Segment* const m_pSegment;
- const long long m_start;
- const long long m_size;
- const long long m_element_start;
- const long long m_element_size;
-
- SeekHead(
- Segment*,
- long long start,
- long long size,
- long long element_start,
- long long element_size);
-
- ~SeekHead();
-
- long Parse();
-
- struct Entry
- {
- //the SeekHead entry payload
- long long id;
- long long pos;
-
- //absolute pos of SeekEntry ID
- long long element_start;
-
- //SeekEntry ID size + size size + payload
- long long element_size;
- };
-
- int GetCount() const;
- const Entry* GetEntry(int idx) const;
-
- struct VoidElement
- {
- //absolute pos of Void ID
- long long element_start;
-
- //ID size + size size + payload size
- long long element_size;
- };
-
- int GetVoidElementCount() const;
- const VoidElement* GetVoidElement(int idx) const;
-
-private:
- Entry* m_entries;
- int m_entry_count;
-
- VoidElement* m_void_elements;
- int m_void_element_count;
-
- static bool ParseEntry(
- IMkvReader*,
- long long pos, //payload
- long long size,
- Entry*);
-
+ static bool ParseEntry(IMkvReader*,
+ long long pos, // payload
+ long long size, Entry*);
};
class Cues;
-class CuePoint
-{
- friend class Cues;
+class CuePoint {
+ friend class Cues;
- CuePoint(long, long long);
- ~CuePoint();
+ CuePoint(long, long long);
+ ~CuePoint();
- CuePoint(const CuePoint&);
- CuePoint& operator=(const CuePoint&);
+ CuePoint(const CuePoint&);
+ CuePoint& operator=(const CuePoint&);
-public:
- long long m_element_start;
- long long m_element_size;
+ public:
+ long long m_element_start;
+ long long m_element_size;
- void Load(IMkvReader*);
+ void Load(IMkvReader*);
- long long GetTimeCode() const; //absolute but unscaled
- long long GetTime(const Segment*) const; //absolute and scaled (ns units)
+ long long GetTimeCode() const; // absolute but unscaled
+ long long GetTime(const Segment*) const; // absolute and scaled (ns units)
- struct TrackPosition
- {
- long long m_track;
- long long m_pos; //of cluster
- long long m_block;
- //codec_state //defaults to 0
- //reference = clusters containing req'd referenced blocks
- // reftime = timecode of the referenced block
+ struct TrackPosition {
+ long long m_track;
+ long long m_pos; // of cluster
+ long long m_block;
+ // codec_state //defaults to 0
+ // reference = clusters containing req'd referenced blocks
+ // reftime = timecode of the referenced block
- void Parse(IMkvReader*, long long, long long);
- };
+ void Parse(IMkvReader*, long long, long long);
+ };
- const TrackPosition* Find(const Track*) const;
+ const TrackPosition* Find(const Track*) const;
-private:
- const long m_index;
- long long m_timecode;
- TrackPosition* m_track_positions;
- size_t m_track_positions_count;
-
+ private:
+ const long m_index;
+ long long m_timecode;
+ TrackPosition* m_track_positions;
+ size_t m_track_positions_count;
};
+class Cues {
+ friend class Segment;
-class Cues
-{
- friend class Segment;
+ Cues(Segment*, long long start, long long size, long long element_start,
+ long long element_size);
+ ~Cues();
- Cues(
- Segment*,
- long long start,
- long long size,
- long long element_start,
- long long element_size);
- ~Cues();
+ Cues(const Cues&);
+ Cues& operator=(const Cues&);
- Cues(const Cues&);
- Cues& operator=(const Cues&);
+ public:
+ Segment* const m_pSegment;
+ const long long m_start;
+ const long long m_size;
+ const long long m_element_start;
+ const long long m_element_size;
-public:
- Segment* const m_pSegment;
- const long long m_start;
- const long long m_size;
- const long long m_element_start;
- const long long m_element_size;
-
- bool Find( //lower bound of time_ns
- long long time_ns,
- const Track*,
- const CuePoint*&,
- const CuePoint::TrackPosition*&) const;
+ bool Find( // lower bound of time_ns
+ long long time_ns, const Track*, const CuePoint*&,
+ const CuePoint::TrackPosition*&) const;
#if 0
bool FindNext( //upper_bound of time_ns
@@ -844,165 +738,144 @@
const CuePoint::TrackPosition*&) const;
#endif
- const CuePoint* GetFirst() const;
- const CuePoint* GetLast() const;
- const CuePoint* GetNext(const CuePoint*) const;
+ const CuePoint* GetFirst() const;
+ const CuePoint* GetLast() const;
+ const CuePoint* GetNext(const CuePoint*) const;
- const BlockEntry* GetBlock(
- const CuePoint*,
- const CuePoint::TrackPosition*) const;
+ const BlockEntry* GetBlock(const CuePoint*,
+ const CuePoint::TrackPosition*) const;
- bool LoadCuePoint() const;
- long GetCount() const; //loaded only
- //long GetTotal() const; //loaded + preloaded
- bool DoneParsing() const;
+ bool LoadCuePoint() const;
+ long GetCount() const; // loaded only
+ // long GetTotal() const; //loaded + preloaded
+ bool DoneParsing() const;
-private:
- void Init() const;
- void PreloadCuePoint(long&, long long) const;
+ private:
+ void Init() const;
+ void PreloadCuePoint(long&, long long) const;
- mutable CuePoint** m_cue_points;
- mutable long m_count;
- mutable long m_preload_count;
- mutable long long m_pos;
-
+ mutable CuePoint** m_cue_points;
+ mutable long m_count;
+ mutable long m_preload_count;
+ mutable long long m_pos;
};
+class Cluster {
+ friend class Segment;
-class Cluster
-{
- friend class Segment;
+ Cluster(const Cluster&);
+ Cluster& operator=(const Cluster&);
- Cluster(const Cluster&);
- Cluster& operator=(const Cluster&);
+ public:
+ Segment* const m_pSegment;
-public:
- Segment* const m_pSegment;
+ public:
+ static Cluster* Create(Segment*,
+ long index, // index in segment
+ long long off); // offset relative to segment
+ // long long element_size);
-public:
- static Cluster* Create(
- Segment*,
- long index, //index in segment
- long long off); //offset relative to segment
- //long long element_size);
+ Cluster(); // EndOfStream
+ ~Cluster();
- Cluster(); //EndOfStream
- ~Cluster();
+ bool EOS() const;
- bool EOS() const;
+ long long GetTimeCode() const; // absolute, but not scaled
+ long long GetTime() const; // absolute, and scaled (nanosecond units)
+ long long GetFirstTime() const; // time (ns) of first (earliest) block
+ long long GetLastTime() const; // time (ns) of last (latest) block
- long long GetTimeCode() const; //absolute, but not scaled
- long long GetTime() const; //absolute, and scaled (nanosecond units)
- long long GetFirstTime() const; //time (ns) of first (earliest) block
- long long GetLastTime() const; //time (ns) of last (latest) block
+ long GetFirst(const BlockEntry*&) const;
+ long GetLast(const BlockEntry*&) const;
+ long GetNext(const BlockEntry* curr, const BlockEntry*& next) const;
- long GetFirst(const BlockEntry*&) const;
- long GetLast(const BlockEntry*&) const;
- long GetNext(const BlockEntry* curr, const BlockEntry*& next) const;
+ const BlockEntry* GetEntry(const Track*, long long ns = -1) const;
+ const BlockEntry* GetEntry(const CuePoint&,
+ const CuePoint::TrackPosition&) const;
+ // const BlockEntry* GetMaxKey(const VideoTrack*) const;
- const BlockEntry* GetEntry(const Track*, long long ns = -1) const;
- const BlockEntry* GetEntry(
- const CuePoint&,
- const CuePoint::TrackPosition&) const;
- //const BlockEntry* GetMaxKey(const VideoTrack*) const;
+ // static bool HasBlockEntries(const Segment*, long long);
-// static bool HasBlockEntries(const Segment*, long long);
+ static long HasBlockEntries(const Segment*, long long idoff, long long& pos,
+ long& size);
- static long HasBlockEntries(
- const Segment*,
- long long idoff,
- long long& pos,
- long& size);
+ long GetEntryCount() const;
- long GetEntryCount() const;
+ long Load(long long& pos, long& size) const;
- long Load(long long& pos, long& size) const;
+ long Parse(long long& pos, long& size) const;
+ long GetEntry(long index, const mkvparser::BlockEntry*&) const;
- long Parse(long long& pos, long& size) const;
- long GetEntry(long index, const mkvparser::BlockEntry*&) const;
+ protected:
+ Cluster(Segment*, long index, long long element_start);
+ // long long element_size);
-protected:
- Cluster(
- Segment*,
- long index,
- long long element_start);
- //long long element_size);
+ public:
+ const long long m_element_start;
+ long long GetPosition() const; // offset relative to segment
-public:
- const long long m_element_start;
- long long GetPosition() const; //offset relative to segment
+ long GetIndex() const;
+ long long GetElementSize() const;
+ // long long GetPayloadSize() const;
- long GetIndex() const;
- long long GetElementSize() const;
- //long long GetPayloadSize() const;
+ // long long Unparsed() const;
- //long long Unparsed() const;
+ private:
+ long m_index;
+ mutable long long m_pos;
+ // mutable long long m_size;
+ mutable long long m_element_size;
+ mutable long long m_timecode;
+ mutable BlockEntry** m_entries;
+ mutable long m_entries_size;
+ mutable long m_entries_count;
-private:
- long m_index;
- mutable long long m_pos;
- //mutable long long m_size;
- mutable long long m_element_size;
- mutable long long m_timecode;
- mutable BlockEntry** m_entries;
- mutable long m_entries_size;
- mutable long m_entries_count;
+ long ParseSimpleBlock(long long, long long&, long&);
+ long ParseBlockGroup(long long, long long&, long&);
- long ParseSimpleBlock(long long, long long&, long&);
- long ParseBlockGroup(long long, long long&, long&);
-
- long CreateBlock(long long id, long long pos, long long size,
- long long discard_padding);
- long CreateBlockGroup(long long start_offset, long long size,
- long long discard_padding);
- long CreateSimpleBlock(long long, long long);
-
+ long CreateBlock(long long id, long long pos, long long size,
+ long long discard_padding);
+ long CreateBlockGroup(long long start_offset, long long size,
+ long long discard_padding);
+ long CreateSimpleBlock(long long, long long);
};
+class Segment {
+ friend class Cues;
+ friend class Track;
+ friend class VideoTrack;
-class Segment
-{
- friend class Cues;
- friend class Track;
- friend class VideoTrack;
+ Segment(const Segment&);
+ Segment& operator=(const Segment&);
- Segment(const Segment&);
- Segment& operator=(const Segment&);
+ private:
+ Segment(IMkvReader*, long long elem_start,
+ // long long elem_size,
+ long long pos, long long size);
-private:
- Segment(
- IMkvReader*,
- long long elem_start,
- //long long elem_size,
- long long pos,
- long long size);
+ public:
+ IMkvReader* const m_pReader;
+ const long long m_element_start;
+ // const long long m_element_size;
+ const long long m_start; // posn of segment payload
+ const long long m_size; // size of segment payload
+ Cluster m_eos; // TODO: make private?
-public:
- IMkvReader* const m_pReader;
- const long long m_element_start;
- //const long long m_element_size;
- const long long m_start; //posn of segment payload
- const long long m_size; //size of segment payload
- Cluster m_eos; //TODO: make private?
+ static long long CreateInstance(IMkvReader*, long long, Segment*&);
+ ~Segment();
- static long long CreateInstance(IMkvReader*, long long, Segment*&);
- ~Segment();
+ long Load(); // loads headers and all clusters
- long Load(); //loads headers and all clusters
+ // for incremental loading
+ // long long Unparsed() const;
+ bool DoneParsing() const;
+ long long ParseHeaders(); // stops when first cluster is found
+ // long FindNextCluster(long long& pos, long& size) const;
+ long LoadCluster(long long& pos, long& size); // load one cluster
+ long LoadCluster();
- //for incremental loading
- //long long Unparsed() const;
- bool DoneParsing() const;
- long long ParseHeaders(); //stops when first cluster is found
- //long FindNextCluster(long long& pos, long& size) const;
- long LoadCluster(long long& pos, long& size); //load one cluster
- long LoadCluster();
-
- long ParseNext(
- const Cluster* pCurr,
- const Cluster*& pNext,
- long long& pos,
- long& size);
+ long ParseNext(const Cluster* pCurr, const Cluster*& pNext, long long& pos,
+ long& size);
#if 0
//This pair parses one cluster, but only changes the state of the
@@ -1011,69 +884,62 @@
bool AddCluster(long long cluster_pos, long long new_pos);
#endif
- const SeekHead* GetSeekHead() const;
- const Tracks* GetTracks() const;
- const SegmentInfo* GetInfo() const;
- const Cues* GetCues() const;
- const Chapters* GetChapters() const;
+ const SeekHead* GetSeekHead() const;
+ const Tracks* GetTracks() const;
+ const SegmentInfo* GetInfo() const;
+ const Cues* GetCues() const;
+ const Chapters* GetChapters() const;
- long long GetDuration() const;
+ long long GetDuration() const;
- unsigned long GetCount() const;
- const Cluster* GetFirst() const;
- const Cluster* GetLast() const;
- const Cluster* GetNext(const Cluster*);
+ unsigned long GetCount() const;
+ const Cluster* GetFirst() const;
+ const Cluster* GetLast() const;
+ const Cluster* GetNext(const Cluster*);
- const Cluster* FindCluster(long long time_nanoseconds) const;
- //const BlockEntry* Seek(long long time_nanoseconds, const Track*) const;
+ const Cluster* FindCluster(long long time_nanoseconds) const;
+ // const BlockEntry* Seek(long long time_nanoseconds, const Track*) const;
- const Cluster* FindOrPreloadCluster(long long pos);
+ const Cluster* FindOrPreloadCluster(long long pos);
- long ParseCues(
- long long cues_off, //offset relative to start of segment
- long long& parse_pos,
- long& parse_len);
+ long ParseCues(long long cues_off, // offset relative to start of segment
+ long long& parse_pos, long& parse_len);
-private:
+ private:
+ long long m_pos; // absolute file posn; what has been consumed so far
+ Cluster* m_pUnknownSize;
- long long m_pos; //absolute file posn; what has been consumed so far
- Cluster* m_pUnknownSize;
+ SeekHead* m_pSeekHead;
+ SegmentInfo* m_pInfo;
+ Tracks* m_pTracks;
+ Cues* m_pCues;
+ Chapters* m_pChapters;
+ Cluster** m_clusters;
+ long m_clusterCount; // number of entries for which m_index >= 0
+ long m_clusterPreloadCount; // number of entries for which m_index < 0
+ long m_clusterSize; // array size
- SeekHead* m_pSeekHead;
- SegmentInfo* m_pInfo;
- Tracks* m_pTracks;
- Cues* m_pCues;
- Chapters* m_pChapters;
- Cluster** m_clusters;
- long m_clusterCount; //number of entries for which m_index >= 0
- long m_clusterPreloadCount; //number of entries for which m_index < 0
- long m_clusterSize; //array size
+ long DoLoadCluster(long long&, long&);
+ long DoLoadClusterUnknownSize(long long&, long&);
+ long DoParseNext(const Cluster*&, long long&, long&);
- long DoLoadCluster(long long&, long&);
- long DoLoadClusterUnknownSize(long long&, long&);
- long DoParseNext(const Cluster*&, long long&, long&);
+ void AppendCluster(Cluster*);
+ void PreloadCluster(Cluster*, ptrdiff_t);
- void AppendCluster(Cluster*);
- void PreloadCluster(Cluster*, ptrdiff_t);
+ // void ParseSeekHead(long long pos, long long size);
+ // void ParseSeekEntry(long long pos, long long size);
+ // void ParseCues(long long);
- //void ParseSeekHead(long long pos, long long size);
- //void ParseSeekEntry(long long pos, long long size);
- //void ParseCues(long long);
-
- const BlockEntry* GetBlock(
- const CuePoint&,
- const CuePoint::TrackPosition&);
-
+ const BlockEntry* GetBlock(const CuePoint&, const CuePoint::TrackPosition&);
};
-} //end namespace mkvparser
+} // end namespace mkvparser
-inline long mkvparser::Segment::LoadCluster()
-{
- long long pos;
- long size;
+inline long mkvparser::Segment::LoadCluster() {
+ long long pos;
+ long size;
- return LoadCluster(pos, size);
+ return LoadCluster(pos, size);
}
-#endif //MKVPARSER_HPP
+#endif // MKVPARSER_HPP
diff --git a/third_party/libwebm/mkvreader.cpp b/third_party/libwebm/mkvreader.cpp
index b4b2459..eaf9e0a 100644
--- a/third_party/libwebm/mkvreader.cpp
+++ b/third_party/libwebm/mkvreader.cpp
@@ -10,17 +10,11 @@
#include <cassert>
-namespace mkvparser
-{
+namespace mkvparser {
-MkvReader::MkvReader() :
- m_file(NULL),
- reader_owns_file_(true) {
-}
+MkvReader::MkvReader() : m_file(NULL), reader_owns_file_(true) {}
-MkvReader::MkvReader(FILE* fp) :
- m_file(fp),
- reader_owns_file_(false) {
+MkvReader::MkvReader(FILE* fp) : m_file(fp), reader_owns_file_(false) {
GetFileSize();
}
@@ -30,114 +24,109 @@
m_file = NULL;
}
-int MkvReader::Open(const char* fileName)
-{
- if (fileName == NULL)
- return -1;
+int MkvReader::Open(const char* fileName) {
+ if (fileName == NULL)
+ return -1;
- if (m_file)
- return -1;
+ if (m_file)
+ return -1;
#ifdef _MSC_VER
- const errno_t e = fopen_s(&m_file, fileName, "rb");
+ const errno_t e = fopen_s(&m_file, fileName, "rb");
- if (e)
- return -1; //error
+ if (e)
+ return -1; // error
#else
- m_file = fopen(fileName, "rb");
+ m_file = fopen(fileName, "rb");
- if (m_file == NULL)
- return -1;
+ if (m_file == NULL)
+ return -1;
#endif
- return !GetFileSize();
+ return !GetFileSize();
}
bool MkvReader::GetFileSize() {
- if (m_file == NULL)
- return false;
+ if (m_file == NULL)
+ return false;
#ifdef _MSC_VER
- int status = _fseeki64(m_file, 0L, SEEK_END);
+ int status = _fseeki64(m_file, 0L, SEEK_END);
- if (status)
- return false; //error
+ if (status)
+ return false; // error
- m_length = _ftelli64(m_file);
+ m_length = _ftelli64(m_file);
#else
- fseek(m_file, 0L, SEEK_END);
- m_length = ftell(m_file);
+ fseek(m_file, 0L, SEEK_END);
+ m_length = ftell(m_file);
#endif
- assert(m_length >= 0);
+ assert(m_length >= 0);
- if (m_length < 0)
- return false;
+ if (m_length < 0)
+ return false;
#ifdef _MSC_VER
- status = _fseeki64(m_file, 0L, SEEK_SET);
+ status = _fseeki64(m_file, 0L, SEEK_SET);
- if (status)
- return false; //error
+ if (status)
+ return false; // error
#else
- fseek(m_file, 0L, SEEK_SET);
+ fseek(m_file, 0L, SEEK_SET);
#endif
- return true;
+ return true;
}
-void MkvReader::Close()
-{
- if (m_file != NULL)
- {
- fclose(m_file);
- m_file = NULL;
- }
+void MkvReader::Close() {
+ if (m_file != NULL) {
+ fclose(m_file);
+ m_file = NULL;
+ }
}
-int MkvReader::Length(long long* total, long long* available)
-{
- if (m_file == NULL)
- return -1;
+int MkvReader::Length(long long* total, long long* available) {
+ if (m_file == NULL)
+ return -1;
- if (total)
- *total = m_length;
+ if (total)
+ *total = m_length;
- if (available)
- *available = m_length;
+ if (available)
+ *available = m_length;
+ return 0;
+}
+
+int MkvReader::Read(long long offset, long len, unsigned char* buffer) {
+ if (m_file == NULL)
+ return -1;
+
+ if (offset < 0)
+ return -1;
+
+ if (len < 0)
+ return -1;
+
+ if (len == 0)
return 0;
-}
-int MkvReader::Read(long long offset, long len, unsigned char* buffer)
-{
- if (m_file == NULL)
- return -1;
-
- if (offset < 0)
- return -1;
-
- if (len < 0)
- return -1;
-
- if (len == 0)
- return 0;
-
- if (offset >= m_length)
- return -1;
+ if (offset >= m_length)
+ return -1;
#ifdef _MSC_VER
- const int status = _fseeki64(m_file, offset, SEEK_SET);
+ const int status = _fseeki64(m_file, offset, SEEK_SET);
- if (status)
- return -1; //error
+ if (status)
+ return -1; // error
#else
- fseek(m_file, offset, SEEK_SET);
+ fseek(m_file, offset, SEEK_SET);
#endif
- const size_t size = fread(buffer, 1, len, m_file);
+ const size_t size = fread(buffer, 1, len, m_file);
- if (size < size_t(len))
- return -1; //error
+ if (size < size_t(len))
+ return -1; // error
- return 0; //success
+ return 0; // success
}
-} //end namespace mkvparser
+} // end namespace mkvparser
diff --git a/third_party/libwebm/mkvreader.hpp b/third_party/libwebm/mkvreader.hpp
index 8ebdd99..82ebad5 100644
--- a/third_party/libwebm/mkvreader.hpp
+++ b/third_party/libwebm/mkvreader.hpp
@@ -12,35 +12,34 @@
#include "mkvparser.hpp"
#include <cstdio>
-namespace mkvparser
-{
+namespace mkvparser {
-class MkvReader : public IMkvReader
-{
- MkvReader(const MkvReader&);
- MkvReader& operator=(const MkvReader&);
-public:
- MkvReader();
- MkvReader(FILE* fp);
- virtual ~MkvReader();
+class MkvReader : public IMkvReader {
+ public:
+ MkvReader();
+ explicit MkvReader(FILE* fp);
+ virtual ~MkvReader();
- int Open(const char*);
- void Close();
+ int Open(const char*);
+ void Close();
- virtual int Read(long long position, long length, unsigned char* buffer);
- virtual int Length(long long* total, long long* available);
-private:
+ virtual int Read(long long position, long length, unsigned char* buffer);
+ virtual int Length(long long* total, long long* available);
- // Determines the size of the file. This is called either by the constructor
- // or by the Open function depending on file ownership. Returns true on
- // success.
- bool GetFileSize();
+ private:
+ MkvReader(const MkvReader&);
+ MkvReader& operator=(const MkvReader&);
- long long m_length;
- FILE* m_file;
- bool reader_owns_file_;
+ // Determines the size of the file. This is called either by the constructor
+ // or by the Open function depending on file ownership. Returns true on
+ // success.
+ bool GetFileSize();
+
+ long long m_length;
+ FILE* m_file;
+ bool reader_owns_file_;
};
-} //end namespace mkvparser
+} // end namespace mkvparser
-#endif //MKVREADER_HPP
+#endif // MKVREADER_HPP
diff --git a/third_party/libwebm/mkvwriter.cpp b/third_party/libwebm/mkvwriter.cpp
index 8de89a4..75d4350 100644
--- a/third_party/libwebm/mkvwriter.cpp
+++ b/third_party/libwebm/mkvwriter.cpp
@@ -16,15 +16,11 @@
namespace mkvmuxer {
-MkvWriter::MkvWriter() : file_(NULL), writer_owns_file_(true) {
-}
+MkvWriter::MkvWriter() : file_(NULL), writer_owns_file_(true) {}
-MkvWriter::MkvWriter(FILE* fp): file_(fp), writer_owns_file_(false) {
-}
+MkvWriter::MkvWriter(FILE* fp) : file_(fp), writer_owns_file_(false) {}
-MkvWriter::~MkvWriter() {
- Close();
-}
+MkvWriter::~MkvWriter() { Close(); }
int32 MkvWriter::Write(const void* buffer, uint32 length) {
if (!file_)
@@ -70,9 +66,9 @@
return 0;
#ifdef _MSC_VER
- return _ftelli64(file_);
+ return _ftelli64(file_);
#else
- return ftell(file_);
+ return ftell(file_);
#endif
}
@@ -81,17 +77,14 @@
return -1;
#ifdef _MSC_VER
- return _fseeki64(file_, position, SEEK_SET);
+ return _fseeki64(file_, position, SEEK_SET);
#else
- return fseek(file_, position, SEEK_SET);
+ return fseek(file_, position, SEEK_SET);
#endif
}
-bool MkvWriter::Seekable() const {
- return true;
-}
+bool MkvWriter::Seekable() const { return true; }
-void MkvWriter::ElementStartNotify(uint64, int64) {
-}
+void MkvWriter::ElementStartNotify(uint64, int64) {}
} // namespace mkvmuxer
diff --git a/third_party/libwebm/mkvwriter.hpp b/third_party/libwebm/mkvwriter.hpp
index 524e0f7..684560c 100644
--- a/third_party/libwebm/mkvwriter.hpp
+++ b/third_party/libwebm/mkvwriter.hpp
@@ -20,7 +20,7 @@
class MkvWriter : public IMkvWriter {
public:
MkvWriter();
- MkvWriter(FILE* fp);
+ explicit MkvWriter(FILE* fp);
virtual ~MkvWriter();
// IMkvWriter interface
@@ -46,6 +46,6 @@
LIBWEBM_DISALLOW_COPY_AND_ASSIGN(MkvWriter);
};
-} //end namespace mkvmuxer
+} // end namespace mkvmuxer
-#endif // MKVWRITER_HPP
+#endif // MKVWRITER_HPP
diff --git a/third_party/libwebm/webmids.hpp b/third_party/libwebm/webmids.hpp
index 65fab96..eeb52d8 100644
--- a/third_party/libwebm/webmids.hpp
+++ b/third_party/libwebm/webmids.hpp
@@ -12,130 +12,130 @@
namespace mkvmuxer {
enum MkvId {
- kMkvEBML = 0x1A45DFA3,
- kMkvEBMLVersion = 0x4286,
- kMkvEBMLReadVersion = 0x42F7,
- kMkvEBMLMaxIDLength = 0x42F2,
- kMkvEBMLMaxSizeLength = 0x42F3,
- kMkvDocType = 0x4282,
- kMkvDocTypeVersion = 0x4287,
- kMkvDocTypeReadVersion = 0x4285,
- kMkvVoid = 0xEC,
- kMkvSignatureSlot = 0x1B538667,
- kMkvSignatureAlgo = 0x7E8A,
- kMkvSignatureHash = 0x7E9A,
- kMkvSignaturePublicKey = 0x7EA5,
- kMkvSignature = 0x7EB5,
- kMkvSignatureElements = 0x7E5B,
- kMkvSignatureElementList = 0x7E7B,
- kMkvSignedElement = 0x6532,
- //segment
- kMkvSegment = 0x18538067,
- //Meta Seek Information
- kMkvSeekHead = 0x114D9B74,
- kMkvSeek = 0x4DBB,
- kMkvSeekID = 0x53AB,
- kMkvSeekPosition = 0x53AC,
- //Segment Information
- kMkvInfo = 0x1549A966,
- kMkvTimecodeScale = 0x2AD7B1,
- kMkvDuration = 0x4489,
- kMkvDateUTC = 0x4461,
- kMkvMuxingApp = 0x4D80,
- kMkvWritingApp = 0x5741,
- //Cluster
- kMkvCluster = 0x1F43B675,
- kMkvTimecode = 0xE7,
- kMkvPrevSize = 0xAB,
- kMkvBlockGroup = 0xA0,
- kMkvBlock = 0xA1,
- kMkvBlockDuration = 0x9B,
- kMkvReferenceBlock = 0xFB,
- kMkvLaceNumber = 0xCC,
- kMkvSimpleBlock = 0xA3,
- kMkvBlockAdditions = 0x75A1,
- kMkvBlockMore = 0xA6,
- kMkvBlockAddID = 0xEE,
- kMkvBlockAdditional = 0xA5,
- kMkvDiscardPadding = 0x75A2,
- //Track
- kMkvTracks = 0x1654AE6B,
- kMkvTrackEntry = 0xAE,
- kMkvTrackNumber = 0xD7,
- kMkvTrackUID = 0x73C5,
- kMkvTrackType = 0x83,
- kMkvFlagEnabled = 0xB9,
- kMkvFlagDefault = 0x88,
- kMkvFlagForced = 0x55AA,
- kMkvFlagLacing = 0x9C,
- kMkvDefaultDuration = 0x23E383,
- kMkvMaxBlockAdditionID = 0x55EE,
- kMkvName = 0x536E,
- kMkvLanguage = 0x22B59C,
- kMkvCodecID = 0x86,
- kMkvCodecPrivate = 0x63A2,
- kMkvCodecName = 0x258688,
- kMkvCodecDelay = 0x56AA,
- kMkvSeekPreRoll = 0x56BB,
- //video
- kMkvVideo = 0xE0,
- kMkvFlagInterlaced = 0x9A,
- kMkvStereoMode = 0x53B8,
- kMkvAlphaMode = 0x53C0,
- kMkvPixelWidth = 0xB0,
- kMkvPixelHeight = 0xBA,
- kMkvPixelCropBottom = 0x54AA,
- kMkvPixelCropTop = 0x54BB,
- kMkvPixelCropLeft = 0x54CC,
- kMkvPixelCropRight = 0x54DD,
- kMkvDisplayWidth = 0x54B0,
- kMkvDisplayHeight = 0x54BA,
- kMkvDisplayUnit = 0x54B2,
- kMkvAspectRatioType = 0x54B3,
- kMkvFrameRate = 0x2383E3,
- //end video
- //audio
- kMkvAudio = 0xE1,
- kMkvSamplingFrequency = 0xB5,
+ kMkvEBML = 0x1A45DFA3,
+ kMkvEBMLVersion = 0x4286,
+ kMkvEBMLReadVersion = 0x42F7,
+ kMkvEBMLMaxIDLength = 0x42F2,
+ kMkvEBMLMaxSizeLength = 0x42F3,
+ kMkvDocType = 0x4282,
+ kMkvDocTypeVersion = 0x4287,
+ kMkvDocTypeReadVersion = 0x4285,
+ kMkvVoid = 0xEC,
+ kMkvSignatureSlot = 0x1B538667,
+ kMkvSignatureAlgo = 0x7E8A,
+ kMkvSignatureHash = 0x7E9A,
+ kMkvSignaturePublicKey = 0x7EA5,
+ kMkvSignature = 0x7EB5,
+ kMkvSignatureElements = 0x7E5B,
+ kMkvSignatureElementList = 0x7E7B,
+ kMkvSignedElement = 0x6532,
+ // segment
+ kMkvSegment = 0x18538067,
+ // Meta Seek Information
+ kMkvSeekHead = 0x114D9B74,
+ kMkvSeek = 0x4DBB,
+ kMkvSeekID = 0x53AB,
+ kMkvSeekPosition = 0x53AC,
+ // Segment Information
+ kMkvInfo = 0x1549A966,
+ kMkvTimecodeScale = 0x2AD7B1,
+ kMkvDuration = 0x4489,
+ kMkvDateUTC = 0x4461,
+ kMkvMuxingApp = 0x4D80,
+ kMkvWritingApp = 0x5741,
+ // Cluster
+ kMkvCluster = 0x1F43B675,
+ kMkvTimecode = 0xE7,
+ kMkvPrevSize = 0xAB,
+ kMkvBlockGroup = 0xA0,
+ kMkvBlock = 0xA1,
+ kMkvBlockDuration = 0x9B,
+ kMkvReferenceBlock = 0xFB,
+ kMkvLaceNumber = 0xCC,
+ kMkvSimpleBlock = 0xA3,
+ kMkvBlockAdditions = 0x75A1,
+ kMkvBlockMore = 0xA6,
+ kMkvBlockAddID = 0xEE,
+ kMkvBlockAdditional = 0xA5,
+ kMkvDiscardPadding = 0x75A2,
+ // Track
+ kMkvTracks = 0x1654AE6B,
+ kMkvTrackEntry = 0xAE,
+ kMkvTrackNumber = 0xD7,
+ kMkvTrackUID = 0x73C5,
+ kMkvTrackType = 0x83,
+ kMkvFlagEnabled = 0xB9,
+ kMkvFlagDefault = 0x88,
+ kMkvFlagForced = 0x55AA,
+ kMkvFlagLacing = 0x9C,
+ kMkvDefaultDuration = 0x23E383,
+ kMkvMaxBlockAdditionID = 0x55EE,
+ kMkvName = 0x536E,
+ kMkvLanguage = 0x22B59C,
+ kMkvCodecID = 0x86,
+ kMkvCodecPrivate = 0x63A2,
+ kMkvCodecName = 0x258688,
+ kMkvCodecDelay = 0x56AA,
+ kMkvSeekPreRoll = 0x56BB,
+ // video
+ kMkvVideo = 0xE0,
+ kMkvFlagInterlaced = 0x9A,
+ kMkvStereoMode = 0x53B8,
+ kMkvAlphaMode = 0x53C0,
+ kMkvPixelWidth = 0xB0,
+ kMkvPixelHeight = 0xBA,
+ kMkvPixelCropBottom = 0x54AA,
+ kMkvPixelCropTop = 0x54BB,
+ kMkvPixelCropLeft = 0x54CC,
+ kMkvPixelCropRight = 0x54DD,
+ kMkvDisplayWidth = 0x54B0,
+ kMkvDisplayHeight = 0x54BA,
+ kMkvDisplayUnit = 0x54B2,
+ kMkvAspectRatioType = 0x54B3,
+ kMkvFrameRate = 0x2383E3,
+ // end video
+ // audio
+ kMkvAudio = 0xE1,
+ kMkvSamplingFrequency = 0xB5,
kMkvOutputSamplingFrequency = 0x78B5,
- kMkvChannels = 0x9F,
- kMkvBitDepth = 0x6264,
- //end audio
- //ContentEncodings
- kMkvContentEncodings = 0x6D80,
- kMkvContentEncoding = 0x6240,
- kMkvContentEncodingOrder = 0x5031,
- kMkvContentEncodingScope = 0x5032,
- kMkvContentEncodingType = 0x5033,
- kMkvContentEncryption = 0x5035,
- kMkvContentEncAlgo = 0x47E1,
- kMkvContentEncKeyID = 0x47E2,
- kMkvContentEncAESSettings = 0x47E7,
- kMkvAESSettingsCipherMode = 0x47E8,
+ kMkvChannels = 0x9F,
+ kMkvBitDepth = 0x6264,
+ // end audio
+ // ContentEncodings
+ kMkvContentEncodings = 0x6D80,
+ kMkvContentEncoding = 0x6240,
+ kMkvContentEncodingOrder = 0x5031,
+ kMkvContentEncodingScope = 0x5032,
+ kMkvContentEncodingType = 0x5033,
+ kMkvContentEncryption = 0x5035,
+ kMkvContentEncAlgo = 0x47E1,
+ kMkvContentEncKeyID = 0x47E2,
+ kMkvContentEncAESSettings = 0x47E7,
+ kMkvAESSettingsCipherMode = 0x47E8,
kMkvAESSettingsCipherInitData = 0x47E9,
- //end ContentEncodings
- //Cueing Data
- kMkvCues = 0x1C53BB6B,
- kMkvCuePoint = 0xBB,
- kMkvCueTime = 0xB3,
- kMkvCueTrackPositions = 0xB7,
- kMkvCueTrack = 0xF7,
- kMkvCueClusterPosition = 0xF1,
- kMkvCueBlockNumber = 0x5378,
- //Chapters
- kMkvChapters = 0x1043A770,
- kMkvEditionEntry = 0x45B9,
- kMkvChapterAtom = 0xB6,
- kMkvChapterUID = 0x73C4,
- kMkvChapterStringUID = 0x5654,
- kMkvChapterTimeStart = 0x91,
- kMkvChapterTimeEnd = 0x92,
- kMkvChapterDisplay = 0x80,
- kMkvChapString = 0x85,
- kMkvChapLanguage = 0x437C,
- kMkvChapCountry = 0x437E
+ // end ContentEncodings
+ // Cueing Data
+ kMkvCues = 0x1C53BB6B,
+ kMkvCuePoint = 0xBB,
+ kMkvCueTime = 0xB3,
+ kMkvCueTrackPositions = 0xB7,
+ kMkvCueTrack = 0xF7,
+ kMkvCueClusterPosition = 0xF1,
+ kMkvCueBlockNumber = 0x5378,
+ // Chapters
+ kMkvChapters = 0x1043A770,
+ kMkvEditionEntry = 0x45B9,
+ kMkvChapterAtom = 0xB6,
+ kMkvChapterUID = 0x73C4,
+ kMkvChapterStringUID = 0x5654,
+ kMkvChapterTimeStart = 0x91,
+ kMkvChapterTimeEnd = 0x92,
+ kMkvChapterDisplay = 0x80,
+ kMkvChapString = 0x85,
+ kMkvChapLanguage = 0x437C,
+ kMkvChapCountry = 0x437E
};
} // end namespace mkvmuxer
-#endif // WEBMIDS_HPP
+#endif // WEBMIDS_HPP
diff --git a/third_party/x86inc/x86inc.asm b/third_party/x86inc/x86inc.asm
index 9e4918a..5b22b94 100644
--- a/third_party/x86inc/x86inc.asm
+++ b/third_party/x86inc/x86inc.asm
@@ -592,16 +592,20 @@
CAT_XDEFINE cglobaled_, %1, 1
%endif
%xdefine current_function %1
- %ifidn __OUTPUT_FORMAT__,elf
- global %1:function hidden
- %elifidn __OUTPUT_FORMAT__,elf32
- global %1:function hidden
- %elifidn __OUTPUT_FORMAT__,elf64
- global %1:function hidden
- %elifidn __OUTPUT_FORMAT__,macho32
- global %1:private_extern
- %elifidn __OUTPUT_FORMAT__,macho64
- global %1:private_extern
+ %ifdef CHROMIUM
+ %ifidn __OUTPUT_FORMAT__,elf
+ global %1:function hidden
+ %elifidn __OUTPUT_FORMAT__,elf32
+ global %1:function hidden
+ %elifidn __OUTPUT_FORMAT__,elf64
+ global %1:function hidden
+ %elifidn __OUTPUT_FORMAT__,macho32
+ global %1:private_extern
+ %elifidn __OUTPUT_FORMAT__,macho64
+ global %1:private_extern
+ %else
+ global %1
+ %endif
%else
global %1
%endif
diff --git a/vp8/common/arm/dequantize_arm.c b/vp8/common/arm/dequantize_arm.c
index 70e72aa..3e37e08 100644
--- a/vp8/common/arm/dequantize_arm.c
+++ b/vp8/common/arm/dequantize_arm.c
@@ -12,7 +12,7 @@
#include "vpx_config.h"
#include "vp8/common/blockd.h"
-#if HAVE_NEON
+#if HAVE_NEON_ASM
extern void vp8_dequantize_b_loop_neon(short *Q, short *DQC, short *DQ);
#endif
@@ -20,7 +20,7 @@
extern void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ);
#endif
-#if HAVE_NEON
+#if HAVE_NEON_ASM
void vp8_dequantize_b_neon(BLOCKD *d, short *DQC)
{
diff --git a/vp8/common/arm/loopfilter_arm.c b/vp8/common/arm/loopfilter_arm.c
index 3bdc967..5d693c6 100644
--- a/vp8/common/arm/loopfilter_arm.c
+++ b/vp8/common/arm/loopfilter_arm.c
@@ -25,7 +25,7 @@
extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_armv6);
#endif
-#if HAVE_NEON
+#if HAVE_NEON_ASM
typedef void loopfilter_y_neon(unsigned char *src, int pitch,
unsigned char blimit, unsigned char limit, unsigned char thresh);
typedef void loopfilter_uv_neon(unsigned char *u, int pitch,
@@ -118,7 +118,7 @@
}
#endif
-#if HAVE_NEON
+#if HAVE_NEON_ASM
/* NEON loopfilter functions */
/* Horizontal MB filtering */
void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
diff --git a/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm b/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
index e3ea91f..a8730aa 100644
--- a/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
+++ b/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
@@ -26,6 +26,7 @@
|vp8_build_intra_predictors_mby_neon_func| PROC
push {r4-r8, lr}
+ vpush {d8-d15}
cmp r3, #0
beq case_dc_pred
@@ -37,8 +38,8 @@
beq case_tm_pred
case_dc_pred
- ldr r4, [sp, #24] ; Up
- ldr r5, [sp, #28] ; Left
+ ldr r4, [sp, #88] ; Up
+ ldr r5, [sp, #92] ; Left
; Default the DC average to 128
mov r12, #128
@@ -143,6 +144,7 @@
vst1.u8 {q0}, [r1]!
vst1.u8 {q0}, [r1]!
+ vpop {d8-d15}
pop {r4-r8,pc}
case_v_pred
; Copy down above row
@@ -165,6 +167,7 @@
vst1.u8 {q0}, [r1]!
vst1.u8 {q0}, [r1]!
vst1.u8 {q0}, [r1]!
+ vpop {d8-d15}
pop {r4-r8,pc}
case_h_pred
@@ -224,6 +227,7 @@
vst1.u8 {q2}, [r1]!
vst1.u8 {q3}, [r1]!
+ vpop {d8-d15}
pop {r4-r8,pc}
case_tm_pred
@@ -293,6 +297,7 @@
subs r12, r12, #1
bne case_tm_pred_loop
+ vpop {d8-d15}
pop {r4-r8,pc}
ENDP
@@ -307,6 +312,7 @@
|vp8_build_intra_predictors_mby_s_neon_func| PROC
push {r4-r8, lr}
+ vpush {d8-d15}
mov r1, r0 ; unsigned char *ypred_ptr = x->dst.y_buffer; //x->Predictor;
@@ -320,8 +326,8 @@
beq case_tm_pred_s
case_dc_pred_s
- ldr r4, [sp, #24] ; Up
- ldr r5, [sp, #28] ; Left
+ ldr r4, [sp, #88] ; Up
+ ldr r5, [sp, #92] ; Left
; Default the DC average to 128
mov r12, #128
@@ -426,6 +432,7 @@
vst1.u8 {q0}, [r1], r2
vst1.u8 {q0}, [r1], r2
+ vpop {d8-d15}
pop {r4-r8,pc}
case_v_pred_s
; Copy down above row
@@ -448,6 +455,8 @@
vst1.u8 {q0}, [r1], r2
vst1.u8 {q0}, [r1], r2
vst1.u8 {q0}, [r1], r2
+
+ vpop {d8-d15}
pop {r4-r8,pc}
case_h_pred_s
@@ -507,6 +516,7 @@
vst1.u8 {q2}, [r1], r2
vst1.u8 {q3}, [r1], r2
+ vpop {d8-d15}
pop {r4-r8,pc}
case_tm_pred_s
@@ -576,6 +586,7 @@
subs r12, r12, #1
bne case_tm_pred_loop_s
+ vpop {d8-d15}
pop {r4-r8,pc}
ENDP
diff --git a/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm b/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm
deleted file mode 100644
index 6c29c55..0000000
--- a/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm
+++ /dev/null
@@ -1,79 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
- EXPORT |idct_dequant_0_2x_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-;void idct_dequant_0_2x_neon(short *q, short dq,
-; unsigned char *dst, int stride);
-; r0 *q
-; r1 dq
-; r2 *dst
-; r3 stride
-|idct_dequant_0_2x_neon| PROC
- push {r4, r5}
-
- add r12, r2, #4
- vld1.32 {d2[0]}, [r2], r3
- vld1.32 {d8[0]}, [r12], r3
- vld1.32 {d2[1]}, [r2], r3
- vld1.32 {d8[1]}, [r12], r3
- vld1.32 {d4[0]}, [r2], r3
- vld1.32 {d10[0]}, [r12], r3
- vld1.32 {d4[1]}, [r2], r3
- vld1.32 {d10[1]}, [r12], r3
-
- ldrh r12, [r0] ; lo q
- ldrh r4, [r0, #32] ; hi q
- mov r5, #0
- strh r5, [r0]
- strh r5, [r0, #32]
-
- sxth r12, r12 ; lo
- mul r0, r12, r1
- add r0, r0, #4
- asr r0, r0, #3
- vdup.16 q0, r0
- sxth r4, r4 ; hi
- mul r0, r4, r1
- add r0, r0, #4
- asr r0, r0, #3
- vdup.16 q3, r0
-
- vaddw.u8 q1, q0, d2 ; lo
- vaddw.u8 q2, q0, d4
- vaddw.u8 q4, q3, d8 ; hi
- vaddw.u8 q5, q3, d10
-
- sub r2, r2, r3, lsl #2 ; dst - 4*stride
- add r0, r2, #4
-
- vqmovun.s16 d2, q1 ; lo
- vqmovun.s16 d4, q2
- vqmovun.s16 d8, q4 ; hi
- vqmovun.s16 d10, q5
-
- vst1.32 {d2[0]}, [r2], r3 ; lo
- vst1.32 {d8[0]}, [r0], r3 ; hi
- vst1.32 {d2[1]}, [r2], r3
- vst1.32 {d8[1]}, [r0], r3
- vst1.32 {d4[0]}, [r2], r3
- vst1.32 {d10[0]}, [r0], r3
- vst1.32 {d4[1]}, [r2]
- vst1.32 {d10[1]}, [r0]
-
- pop {r4, r5}
- bx lr
-
- ENDP ; |idct_dequant_0_2x_neon|
- END
diff --git a/vp8/common/arm/neon/idct_dequant_0_2x_neon.c b/vp8/common/arm/neon/idct_dequant_0_2x_neon.c
new file mode 100644
index 0000000..967c322
--- /dev/null
+++ b/vp8/common/arm/neon/idct_dequant_0_2x_neon.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+void idct_dequant_0_2x_neon(
+ int16_t *q,
+ int16_t dq,
+ unsigned char *dst,
+ int stride) {
+ unsigned char *dst0;
+ int i, a0, a1;
+ int16x8x2_t q2Add;
+ int32x2_t d2s32, d4s32;
+ uint8x8_t d2u8, d4u8;
+ uint16x8_t q1u16, q2u16;
+
+ a0 = ((q[0] * dq) + 4) >> 3;
+ a1 = ((q[16] * dq) + 4) >> 3;
+ q[0] = q[16] = 0;
+ q2Add.val[0] = vdupq_n_s16((int16_t)a0);
+ q2Add.val[1] = vdupq_n_s16((int16_t)a1);
+
+ for (i = 0; i < 2; i++, dst += 4) {
+ dst0 = dst;
+ d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 0);
+ dst0 += stride;
+ d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 1);
+ dst0 += stride;
+ d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 0);
+ dst0 += stride;
+ d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 1);
+
+ q1u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]),
+ vreinterpret_u8_s32(d2s32));
+ q2u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]),
+ vreinterpret_u8_s32(d4s32));
+
+ d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
+ d4u8 = vqmovun_s16(vreinterpretq_s16_u16(q2u16));
+
+ d2s32 = vreinterpret_s32_u8(d2u8);
+ d4s32 = vreinterpret_s32_u8(d4u8);
+
+ dst0 = dst;
+ vst1_lane_s32((int32_t *)dst0, d2s32, 0);
+ dst0 += stride;
+ vst1_lane_s32((int32_t *)dst0, d2s32, 1);
+ dst0 += stride;
+ vst1_lane_s32((int32_t *)dst0, d4s32, 0);
+ dst0 += stride;
+ vst1_lane_s32((int32_t *)dst0, d4s32, 1);
+ }
+ return;
+}
diff --git a/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm b/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm
deleted file mode 100644
index d5dce63..0000000
--- a/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm
+++ /dev/null
@@ -1,196 +0,0 @@
-;
-; Copyright (c) 2010 The Webm project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |idct_dequant_full_2x_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-;void idct_dequant_full_2x_neon(short *q, short *dq,
-; unsigned char *dst, int stride);
-; r0 *q,
-; r1 *dq,
-; r2 *dst
-; r3 stride
-|idct_dequant_full_2x_neon| PROC
- vld1.16 {q0, q1}, [r1] ; dq (same l/r)
- vld1.16 {q2, q3}, [r0] ; l q
- add r0, r0, #32
- vld1.16 {q4, q5}, [r0] ; r q
- add r12, r2, #4
-
- ; interleave the predictors
- vld1.32 {d28[0]}, [r2], r3 ; l pre
- vld1.32 {d28[1]}, [r12], r3 ; r pre
- vld1.32 {d29[0]}, [r2], r3
- vld1.32 {d29[1]}, [r12], r3
- vld1.32 {d30[0]}, [r2], r3
- vld1.32 {d30[1]}, [r12], r3
- vld1.32 {d31[0]}, [r2], r3
- vld1.32 {d31[1]}, [r12]
-
- adr r1, cospi8sqrt2minus1 ; pointer to the first constant
-
- ; dequant: q[i] = q[i] * dq[i]
- vmul.i16 q2, q2, q0
- vmul.i16 q3, q3, q1
- vmul.i16 q4, q4, q0
- vmul.i16 q5, q5, q1
-
- vld1.16 {d0}, [r1]
-
- ; q2: l0r0 q3: l8r8
- ; q4: l4r4 q5: l12r12
- vswp d5, d8
- vswp d7, d10
-
- ; _CONSTANTS_ * 4,12 >> 16
- ; q6: 4 * sinpi : c1/temp1
- ; q7: 12 * sinpi : d1/temp2
- ; q8: 4 * cospi
- ; q9: 12 * cospi
- vqdmulh.s16 q6, q4, d0[2] ; sinpi8sqrt2
- vqdmulh.s16 q7, q5, d0[2]
- vqdmulh.s16 q8, q4, d0[0] ; cospi8sqrt2minus1
- vqdmulh.s16 q9, q5, d0[0]
-
- vqadd.s16 q10, q2, q3 ; a1 = 0 + 8
- vqsub.s16 q11, q2, q3 ; b1 = 0 - 8
-
- ; vqdmulh only accepts signed values. this was a problem because
- ; our constant had the high bit set, and was treated as a negative value.
- ; vqdmulh also doubles the value before it shifts by 16. we need to
- ; compensate for this. in the case of sinpi8sqrt2, the lowest bit is 0,
- ; so we can shift the constant without losing precision. this avoids
- ; shift again afterward, but also avoids the sign issue. win win!
- ; for cospi8sqrt2minus1 the lowest bit is 1, so we lose precision if we
- ; pre-shift it
- vshr.s16 q8, q8, #1
- vshr.s16 q9, q9, #1
-
- ; q4: 4 + 4 * cospi : d1/temp1
- ; q5: 12 + 12 * cospi : c1/temp2
- vqadd.s16 q4, q4, q8
- vqadd.s16 q5, q5, q9
-
- ; c1 = temp1 - temp2
- ; d1 = temp1 + temp2
- vqsub.s16 q2, q6, q5
- vqadd.s16 q3, q4, q7
-
- ; [0]: a1+d1
- ; [1]: b1+c1
- ; [2]: b1-c1
- ; [3]: a1-d1
- vqadd.s16 q4, q10, q3
- vqadd.s16 q5, q11, q2
- vqsub.s16 q6, q11, q2
- vqsub.s16 q7, q10, q3
-
- ; rotate
- vtrn.32 q4, q6
- vtrn.32 q5, q7
- vtrn.16 q4, q5
- vtrn.16 q6, q7
- ; idct loop 2
- ; q4: l 0, 4, 8,12 r 0, 4, 8,12
- ; q5: l 1, 5, 9,13 r 1, 5, 9,13
- ; q6: l 2, 6,10,14 r 2, 6,10,14
- ; q7: l 3, 7,11,15 r 3, 7,11,15
-
- ; q8: 1 * sinpi : c1/temp1
- ; q9: 3 * sinpi : d1/temp2
- ; q10: 1 * cospi
- ; q11: 3 * cospi
- vqdmulh.s16 q8, q5, d0[2] ; sinpi8sqrt2
- vqdmulh.s16 q9, q7, d0[2]
- vqdmulh.s16 q10, q5, d0[0] ; cospi8sqrt2minus1
- vqdmulh.s16 q11, q7, d0[0]
-
- vqadd.s16 q2, q4, q6 ; a1 = 0 + 2
- vqsub.s16 q3, q4, q6 ; b1 = 0 - 2
-
- ; see note on shifting above
- vshr.s16 q10, q10, #1
- vshr.s16 q11, q11, #1
-
- ; q10: 1 + 1 * cospi : d1/temp1
- ; q11: 3 + 3 * cospi : c1/temp2
- vqadd.s16 q10, q5, q10
- vqadd.s16 q11, q7, q11
-
- ; q8: c1 = temp1 - temp2
- ; q9: d1 = temp1 + temp2
- vqsub.s16 q8, q8, q11
- vqadd.s16 q9, q10, q9
-
- ; a1+d1
- ; b1+c1
- ; b1-c1
- ; a1-d1
- vqadd.s16 q4, q2, q9
- vqadd.s16 q5, q3, q8
- vqsub.s16 q6, q3, q8
- vqsub.s16 q7, q2, q9
-
- ; +4 >> 3 (rounding)
- vrshr.s16 q4, q4, #3 ; lo
- vrshr.s16 q5, q5, #3
- vrshr.s16 q6, q6, #3 ; hi
- vrshr.s16 q7, q7, #3
-
- vtrn.32 q4, q6
- vtrn.32 q5, q7
- vtrn.16 q4, q5
- vtrn.16 q6, q7
-
- ; adding pre
- ; input is still packed. pre was read interleaved
- vaddw.u8 q4, q4, d28
- vaddw.u8 q5, q5, d29
- vaddw.u8 q6, q6, d30
- vaddw.u8 q7, q7, d31
-
- vmov.i16 q14, #0
- vmov q15, q14
- vst1.16 {q14, q15}, [r0] ; write over high input
- sub r0, r0, #32
- vst1.16 {q14, q15}, [r0] ; write over low input
-
- sub r2, r2, r3, lsl #2 ; dst - 4*stride
- add r1, r2, #4 ; hi
-
- ;saturate and narrow
- vqmovun.s16 d0, q4 ; lo
- vqmovun.s16 d1, q5
- vqmovun.s16 d2, q6 ; hi
- vqmovun.s16 d3, q7
-
- vst1.32 {d0[0]}, [r2], r3 ; lo
- vst1.32 {d0[1]}, [r1], r3 ; hi
- vst1.32 {d1[0]}, [r2], r3
- vst1.32 {d1[1]}, [r1], r3
- vst1.32 {d2[0]}, [r2], r3
- vst1.32 {d2[1]}, [r1], r3
- vst1.32 {d3[0]}, [r2]
- vst1.32 {d3[1]}, [r1]
-
- bx lr
-
- ENDP ; |idct_dequant_full_2x_neon|
-
-; Constant Pool
-cospi8sqrt2minus1 DCD 0x4e7b
-; because the lowest bit in 0x8a8c is 0, we can pre-shift this
-sinpi8sqrt2 DCD 0x4546
-
- END
diff --git a/vp8/common/arm/neon/idct_dequant_full_2x_neon.c b/vp8/common/arm/neon/idct_dequant_full_2x_neon.c
new file mode 100644
index 0000000..a60ed46
--- /dev/null
+++ b/vp8/common/arm/neon/idct_dequant_full_2x_neon.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+static const int16_t cospi8sqrt2minus1 = 20091;
+static const int16_t sinpi8sqrt2 = 17734;
+// because the lowest bit in 0x8a8c is 0, we can pre-shift this
+
+void idct_dequant_full_2x_neon(
+ int16_t *q,
+ int16_t *dq,
+ unsigned char *dst,
+ int stride) {
+ unsigned char *dst0, *dst1;
+ int32x2_t d28, d29, d30, d31;
+ int16x8_t q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11;
+ int16x8_t qEmpty = vdupq_n_s16(0);
+ int32x4x2_t q2tmp0, q2tmp1;
+ int16x8x2_t q2tmp2, q2tmp3;
+ int16x4_t dLow0, dLow1, dHigh0, dHigh1;
+
+ d28 = d29 = d30 = d31 = vdup_n_s32(0);
+
+ // load dq
+ q0 = vld1q_s16(dq);
+ dq += 8;
+ q1 = vld1q_s16(dq);
+
+ // load q
+ q2 = vld1q_s16(q);
+ vst1q_s16(q, qEmpty);
+ q += 8;
+ q3 = vld1q_s16(q);
+ vst1q_s16(q, qEmpty);
+ q += 8;
+ q4 = vld1q_s16(q);
+ vst1q_s16(q, qEmpty);
+ q += 8;
+ q5 = vld1q_s16(q);
+ vst1q_s16(q, qEmpty);
+
+ // load src from dst
+ dst0 = dst;
+ dst1 = dst + 4;
+ d28 = vld1_lane_s32((const int32_t *)dst0, d28, 0);
+ dst0 += stride;
+ d28 = vld1_lane_s32((const int32_t *)dst1, d28, 1);
+ dst1 += stride;
+ d29 = vld1_lane_s32((const int32_t *)dst0, d29, 0);
+ dst0 += stride;
+ d29 = vld1_lane_s32((const int32_t *)dst1, d29, 1);
+ dst1 += stride;
+
+ d30 = vld1_lane_s32((const int32_t *)dst0, d30, 0);
+ dst0 += stride;
+ d30 = vld1_lane_s32((const int32_t *)dst1, d30, 1);
+ dst1 += stride;
+ d31 = vld1_lane_s32((const int32_t *)dst0, d31, 0);
+ d31 = vld1_lane_s32((const int32_t *)dst1, d31, 1);
+
+ q2 = vmulq_s16(q2, q0);
+ q3 = vmulq_s16(q3, q1);
+ q4 = vmulq_s16(q4, q0);
+ q5 = vmulq_s16(q5, q1);
+
+ // vswp
+ dLow0 = vget_low_s16(q2);
+ dHigh0 = vget_high_s16(q2);
+ dLow1 = vget_low_s16(q4);
+ dHigh1 = vget_high_s16(q4);
+ q2 = vcombine_s16(dLow0, dLow1);
+ q4 = vcombine_s16(dHigh0, dHigh1);
+
+ dLow0 = vget_low_s16(q3);
+ dHigh0 = vget_high_s16(q3);
+ dLow1 = vget_low_s16(q5);
+ dHigh1 = vget_high_s16(q5);
+ q3 = vcombine_s16(dLow0, dLow1);
+ q5 = vcombine_s16(dHigh0, dHigh1);
+
+ q6 = vqdmulhq_n_s16(q4, sinpi8sqrt2);
+ q7 = vqdmulhq_n_s16(q5, sinpi8sqrt2);
+ q8 = vqdmulhq_n_s16(q4, cospi8sqrt2minus1);
+ q9 = vqdmulhq_n_s16(q5, cospi8sqrt2minus1);
+
+ q10 = vqaddq_s16(q2, q3);
+ q11 = vqsubq_s16(q2, q3);
+
+ q8 = vshrq_n_s16(q8, 1);
+ q9 = vshrq_n_s16(q9, 1);
+
+ q4 = vqaddq_s16(q4, q8);
+ q5 = vqaddq_s16(q5, q9);
+
+ q2 = vqsubq_s16(q6, q5);
+ q3 = vqaddq_s16(q7, q4);
+
+ q4 = vqaddq_s16(q10, q3);
+ q5 = vqaddq_s16(q11, q2);
+ q6 = vqsubq_s16(q11, q2);
+ q7 = vqsubq_s16(q10, q3);
+
+ q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6));
+ q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7));
+ q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]),
+ vreinterpretq_s16_s32(q2tmp1.val[0]));
+ q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]),
+ vreinterpretq_s16_s32(q2tmp1.val[1]));
+
+ // loop 2
+ q8 = vqdmulhq_n_s16(q2tmp2.val[1], sinpi8sqrt2);
+ q9 = vqdmulhq_n_s16(q2tmp3.val[1], sinpi8sqrt2);
+ q10 = vqdmulhq_n_s16(q2tmp2.val[1], cospi8sqrt2minus1);
+ q11 = vqdmulhq_n_s16(q2tmp3.val[1], cospi8sqrt2minus1);
+
+ q2 = vqaddq_s16(q2tmp2.val[0], q2tmp3.val[0]);
+ q3 = vqsubq_s16(q2tmp2.val[0], q2tmp3.val[0]);
+
+ q10 = vshrq_n_s16(q10, 1);
+ q11 = vshrq_n_s16(q11, 1);
+
+ q10 = vqaddq_s16(q2tmp2.val[1], q10);
+ q11 = vqaddq_s16(q2tmp3.val[1], q11);
+
+ q8 = vqsubq_s16(q8, q11);
+ q9 = vqaddq_s16(q9, q10);
+
+ q4 = vqaddq_s16(q2, q9);
+ q5 = vqaddq_s16(q3, q8);
+ q6 = vqsubq_s16(q3, q8);
+ q7 = vqsubq_s16(q2, q9);
+
+ q4 = vrshrq_n_s16(q4, 3);
+ q5 = vrshrq_n_s16(q5, 3);
+ q6 = vrshrq_n_s16(q6, 3);
+ q7 = vrshrq_n_s16(q7, 3);
+
+ q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6));
+ q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7));
+ q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]),
+ vreinterpretq_s16_s32(q2tmp1.val[0]));
+ q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]),
+ vreinterpretq_s16_s32(q2tmp1.val[1]));
+
+ q4 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[0]),
+ vreinterpret_u8_s32(d28)));
+ q5 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[1]),
+ vreinterpret_u8_s32(d29)));
+ q6 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[0]),
+ vreinterpret_u8_s32(d30)));
+ q7 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[1]),
+ vreinterpret_u8_s32(d31)));
+
+ d28 = vreinterpret_s32_u8(vqmovun_s16(q4));
+ d29 = vreinterpret_s32_u8(vqmovun_s16(q5));
+ d30 = vreinterpret_s32_u8(vqmovun_s16(q6));
+ d31 = vreinterpret_s32_u8(vqmovun_s16(q7));
+
+ dst0 = dst;
+ dst1 = dst + 4;
+ vst1_lane_s32((int32_t *)dst0, d28, 0);
+ dst0 += stride;
+ vst1_lane_s32((int32_t *)dst1, d28, 1);
+ dst1 += stride;
+ vst1_lane_s32((int32_t *)dst0, d29, 0);
+ dst0 += stride;
+ vst1_lane_s32((int32_t *)dst1, d29, 1);
+ dst1 += stride;
+
+ vst1_lane_s32((int32_t *)dst0, d30, 0);
+ dst0 += stride;
+ vst1_lane_s32((int32_t *)dst1, d30, 1);
+ dst1 += stride;
+ vst1_lane_s32((int32_t *)dst0, d31, 0);
+ vst1_lane_s32((int32_t *)dst1, d31, 1);
+ return;
+}
diff --git a/vp8/common/arm/neon/iwalsh_neon.asm b/vp8/common/arm/neon/iwalsh_neon.asm
deleted file mode 100644
index e8ea2a6..0000000
--- a/vp8/common/arm/neon/iwalsh_neon.asm
+++ /dev/null
@@ -1,87 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
- EXPORT |vp8_short_inv_walsh4x4_neon|
-
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA |.text|, CODE, READONLY ; name this block of code
-
-;short vp8_short_inv_walsh4x4_neon(short *input, short *mb_dqcoeff)
-|vp8_short_inv_walsh4x4_neon| PROC
-
- ; read in all four lines of values: d0->d3
- vld1.i16 {q0-q1}, [r0@128]
-
- ; first for loop
- vadd.s16 d4, d0, d3 ;a = [0] + [12]
- vadd.s16 d6, d1, d2 ;b = [4] + [8]
- vsub.s16 d5, d0, d3 ;d = [0] - [12]
- vsub.s16 d7, d1, d2 ;c = [4] - [8]
-
- vadd.s16 q0, q2, q3 ; a+b d+c
- vsub.s16 q1, q2, q3 ; a-b d-c
-
- vtrn.32 d0, d2 ;d0: 0 1 8 9
- ;d2: 2 3 10 11
- vtrn.32 d1, d3 ;d1: 4 5 12 13
- ;d3: 6 7 14 15
-
- vtrn.16 d0, d1 ;d0: 0 4 8 12
- ;d1: 1 5 9 13
- vtrn.16 d2, d3 ;d2: 2 6 10 14
- ;d3: 3 7 11 15
-
- ; second for loop
-
- vadd.s16 d4, d0, d3 ;a = [0] + [3]
- vadd.s16 d6, d1, d2 ;b = [1] + [2]
- vsub.s16 d5, d0, d3 ;d = [0] - [3]
- vsub.s16 d7, d1, d2 ;c = [1] - [2]
-
- vmov.i16 q8, #3
-
- vadd.s16 q0, q2, q3 ; a+b d+c
- vsub.s16 q1, q2, q3 ; a-b d-c
-
- vadd.i16 q0, q0, q8 ;e/f += 3
- vadd.i16 q1, q1, q8 ;g/h += 3
-
- vshr.s16 q0, q0, #3 ;e/f >> 3
- vshr.s16 q1, q1, #3 ;g/h >> 3
-
- mov r2, #64
- add r3, r1, #32
-
- vst1.i16 d0[0], [r1],r2
- vst1.i16 d1[0], [r3],r2
- vst1.i16 d2[0], [r1],r2
- vst1.i16 d3[0], [r3],r2
-
- vst1.i16 d0[1], [r1],r2
- vst1.i16 d1[1], [r3],r2
- vst1.i16 d2[1], [r1],r2
- vst1.i16 d3[1], [r3],r2
-
- vst1.i16 d0[2], [r1],r2
- vst1.i16 d1[2], [r3],r2
- vst1.i16 d2[2], [r1],r2
- vst1.i16 d3[2], [r3],r2
-
- vst1.i16 d0[3], [r1],r2
- vst1.i16 d1[3], [r3],r2
- vst1.i16 d2[3], [r1]
- vst1.i16 d3[3], [r3]
-
- bx lr
- ENDP ; |vp8_short_inv_walsh4x4_neon|
-
- END
diff --git a/vp8/common/arm/neon/iwalsh_neon.c b/vp8/common/arm/neon/iwalsh_neon.c
new file mode 100644
index 0000000..6ea9dd7
--- /dev/null
+++ b/vp8/common/arm/neon/iwalsh_neon.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+void vp8_short_inv_walsh4x4_neon(
+ int16_t *input,
+ int16_t *mb_dqcoeff) {
+ int16x8_t q0s16, q1s16, q2s16, q3s16;
+ int16x4_t d4s16, d5s16, d6s16, d7s16;
+ int16x4x2_t v2tmp0, v2tmp1;
+ int32x2x2_t v2tmp2, v2tmp3;
+ int16x8_t qAdd3;
+
+ q0s16 = vld1q_s16(input);
+ q1s16 = vld1q_s16(input + 8);
+
+ // 1st for loop
+ d4s16 = vadd_s16(vget_low_s16(q0s16), vget_high_s16(q1s16));
+ d6s16 = vadd_s16(vget_high_s16(q0s16), vget_low_s16(q1s16));
+ d5s16 = vsub_s16(vget_low_s16(q0s16), vget_high_s16(q1s16));
+ d7s16 = vsub_s16(vget_high_s16(q0s16), vget_low_s16(q1s16));
+
+ q2s16 = vcombine_s16(d4s16, d5s16);
+ q3s16 = vcombine_s16(d6s16, d7s16);
+
+ q0s16 = vaddq_s16(q2s16, q3s16);
+ q1s16 = vsubq_s16(q2s16, q3s16);
+
+ v2tmp2 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(q0s16)),
+ vreinterpret_s32_s16(vget_low_s16(q1s16)));
+ v2tmp3 = vtrn_s32(vreinterpret_s32_s16(vget_high_s16(q0s16)),
+ vreinterpret_s32_s16(vget_high_s16(q1s16)));
+ v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]),
+ vreinterpret_s16_s32(v2tmp3.val[0]));
+ v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]),
+ vreinterpret_s16_s32(v2tmp3.val[1]));
+
+ // 2nd for loop
+ d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[1]);
+ d6s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[0]);
+ d5s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[1]);
+ d7s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[0]);
+ q2s16 = vcombine_s16(d4s16, d5s16);
+ q3s16 = vcombine_s16(d6s16, d7s16);
+
+ qAdd3 = vdupq_n_s16(3);
+
+ q0s16 = vaddq_s16(q2s16, q3s16);
+ q1s16 = vsubq_s16(q2s16, q3s16);
+
+ q0s16 = vaddq_s16(q0s16, qAdd3);
+ q1s16 = vaddq_s16(q1s16, qAdd3);
+
+ q0s16 = vshrq_n_s16(q0s16, 3);
+ q1s16 = vshrq_n_s16(q1s16, 3);
+
+ // store
+ vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 0);
+ mb_dqcoeff += 16;
+ vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 0);
+ mb_dqcoeff += 16;
+ vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 0);
+ mb_dqcoeff += 16;
+ vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 0);
+ mb_dqcoeff += 16;
+
+ vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 1);
+ mb_dqcoeff += 16;
+ vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 1);
+ mb_dqcoeff += 16;
+ vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 1);
+ mb_dqcoeff += 16;
+ vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 1);
+ mb_dqcoeff += 16;
+
+ vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 2);
+ mb_dqcoeff += 16;
+ vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 2);
+ mb_dqcoeff += 16;
+ vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 2);
+ mb_dqcoeff += 16;
+ vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 2);
+ mb_dqcoeff += 16;
+
+ vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 3);
+ mb_dqcoeff += 16;
+ vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 3);
+ mb_dqcoeff += 16;
+ vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 3);
+ mb_dqcoeff += 16;
+ vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 3);
+ mb_dqcoeff += 16;
+ return;
+}
diff --git a/vp8/common/arm/neon/loopfilter_neon.asm b/vp8/common/arm/neon/loopfilter_neon.asm
index e44be0a..c4f09c7 100644
--- a/vp8/common/arm/neon/loopfilter_neon.asm
+++ b/vp8/common/arm/neon/loopfilter_neon.asm
@@ -24,10 +24,12 @@
; sp unsigned char thresh,
|vp8_loop_filter_horizontal_edge_y_neon| PROC
push {lr}
+ vpush {d8-d15}
+
vdup.u8 q0, r2 ; duplicate blimit
vdup.u8 q1, r3 ; duplicate limit
sub r2, r0, r1, lsl #2 ; move src pointer down by 4 lines
- ldr r3, [sp, #4] ; load thresh
+ ldr r3, [sp, #68] ; load thresh
add r12, r2, r1
add r1, r1, r1
@@ -52,6 +54,7 @@
vst1.u8 {q7}, [r2@128], r1 ; store oq0
vst1.u8 {q8}, [r12@128], r1 ; store oq1
+ vpop {d8-d15}
pop {pc}
ENDP ; |vp8_loop_filter_horizontal_edge_y_neon|
@@ -64,10 +67,12 @@
; sp+4 unsigned char *v
|vp8_loop_filter_horizontal_edge_uv_neon| PROC
push {lr}
+ vpush {d8-d15}
+
vdup.u8 q0, r2 ; duplicate blimit
vdup.u8 q1, r3 ; duplicate limit
- ldr r12, [sp, #4] ; load thresh
- ldr r2, [sp, #8] ; load v ptr
+ ldr r12, [sp, #68] ; load thresh
+ ldr r2, [sp, #72] ; load v ptr
vdup.u8 q2, r12 ; duplicate thresh
sub r3, r0, r1, lsl #2 ; move u pointer down by 4 lines
@@ -104,6 +109,7 @@
vst1.u8 {d16}, [r0@64] ; store u oq1
vst1.u8 {d17}, [r2@64] ; store v oq1
+ vpop {d8-d15}
pop {pc}
ENDP ; |vp8_loop_filter_horizontal_edge_uv_neon|
@@ -120,11 +126,13 @@
|vp8_loop_filter_vertical_edge_y_neon| PROC
push {lr}
+ vpush {d8-d15}
+
vdup.u8 q0, r2 ; duplicate blimit
vdup.u8 q1, r3 ; duplicate limit
sub r2, r0, #4 ; src ptr down by 4 columns
add r1, r1, r1
- ldr r3, [sp, #4] ; load thresh
+ ldr r3, [sp, #68] ; load thresh
add r12, r2, r1, asr #1
vld1.u8 {d6}, [r2], r1
@@ -194,6 +202,7 @@
vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r0]
vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r12]
+ vpop {d8-d15}
pop {pc}
ENDP ; |vp8_loop_filter_vertical_edge_y_neon|
@@ -210,9 +219,11 @@
; sp+4 unsigned char *v
|vp8_loop_filter_vertical_edge_uv_neon| PROC
push {lr}
+ vpush {d8-d15}
+
vdup.u8 q0, r2 ; duplicate blimit
sub r12, r0, #4 ; move u pointer down by 4 columns
- ldr r2, [sp, #8] ; load v ptr
+ ldr r2, [sp, #72] ; load v ptr
vdup.u8 q1, r3 ; duplicate limit
sub r3, r2, #4 ; move v pointer down by 4 columns
@@ -233,7 +244,7 @@
vld1.u8 {d20}, [r12]
vld1.u8 {d21}, [r3]
- ldr r12, [sp, #4] ; load thresh
+ ldr r12, [sp, #68] ; load thresh
;transpose to 8x16 matrix
vtrn.32 q3, q7
@@ -281,6 +292,7 @@
vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0]
vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r2]
+ vpop {d8-d15}
pop {pc}
ENDP ; |vp8_loop_filter_vertical_edge_uv_neon|
diff --git a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
deleted file mode 100644
index adf848b..0000000
--- a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
+++ /dev/null
@@ -1,117 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- ;EXPORT |vp8_loop_filter_simple_horizontal_edge_neon|
- EXPORT |vp8_loop_filter_bhs_neon|
- EXPORT |vp8_loop_filter_mbhs_neon|
- ARM
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-
-; r0 unsigned char *s, PRESERVE
-; r1 int p, PRESERVE
-; q1 limit, PRESERVE
-
-|vp8_loop_filter_simple_horizontal_edge_neon| PROC
-
- sub r3, r0, r1, lsl #1 ; move src pointer down by 2 lines
-
- vld1.u8 {q7}, [r0@128], r1 ; q0
- vld1.u8 {q5}, [r3@128], r1 ; p0
- vld1.u8 {q8}, [r0@128] ; q1
- vld1.u8 {q6}, [r3@128] ; p1
-
- vabd.u8 q15, q6, q7 ; abs(p0 - q0)
- vabd.u8 q14, q5, q8 ; abs(p1 - q1)
-
- vqadd.u8 q15, q15, q15 ; abs(p0 - q0) * 2
- vshr.u8 q14, q14, #1 ; abs(p1 - q1) / 2
- vmov.u8 q0, #0x80 ; 0x80
- vmov.s16 q13, #3
- vqadd.u8 q15, q15, q14 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
-
- veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value
- veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value
- veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value
- veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value
-
- vcge.u8 q15, q1, q15 ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > limit)*-1
-
- vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
- vsubl.s8 q3, d15, d13
-
- vqsub.s8 q4, q5, q8 ; q4: vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-
- vmul.s16 q2, q2, q13 ; 3 * ( qs0 - ps0)
- vmul.s16 q3, q3, q13
-
- vmov.u8 q10, #0x03 ; 0x03
- vmov.u8 q9, #0x04 ; 0x04
-
- vaddw.s8 q2, q2, d8 ; vp8_filter + 3 * ( qs0 - ps0)
- vaddw.s8 q3, q3, d9
-
- vqmovn.s16 d8, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
- vqmovn.s16 d9, q3
-
- vand q14, q4, q15 ; vp8_filter &= mask
-
- vqadd.s8 q2, q14, q10 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
- vqadd.s8 q3, q14, q9 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
- vshr.s8 q2, q2, #3 ; Filter2 >>= 3
- vshr.s8 q4, q3, #3 ; Filter1 >>= 3
-
- sub r0, r0, r1
-
- ;calculate output
- vqadd.s8 q11, q6, q2 ; u = vp8_signed_char_clamp(ps0 + Filter2)
- vqsub.s8 q10, q7, q4 ; u = vp8_signed_char_clamp(qs0 - Filter1)
-
- veor q6, q11, q0 ; *op0 = u^0x80
- veor q7, q10, q0 ; *oq0 = u^0x80
-
- vst1.u8 {q6}, [r3@128] ; store op0
- vst1.u8 {q7}, [r0@128] ; store oq0
-
- bx lr
- ENDP ; |vp8_loop_filter_simple_horizontal_edge_neon|
-
-; r0 unsigned char *y
-; r1 int ystride
-; r2 const unsigned char *blimit
-
-|vp8_loop_filter_bhs_neon| PROC
- push {r4, lr}
- ldrb r3, [r2] ; load blim from mem
- vdup.s8 q1, r3 ; duplicate blim
-
- add r0, r0, r1, lsl #2 ; src = y_ptr + 4 * y_stride
- bl vp8_loop_filter_simple_horizontal_edge_neon
- ; vp8_loop_filter_simple_horizontal_edge_neon preserves r0, r1 and q1
- add r0, r0, r1, lsl #2 ; src = y_ptr + 8* y_stride
- bl vp8_loop_filter_simple_horizontal_edge_neon
- add r0, r0, r1, lsl #2 ; src = y_ptr + 12 * y_stride
- pop {r4, lr}
- b vp8_loop_filter_simple_horizontal_edge_neon
- ENDP ;|vp8_loop_filter_bhs_neon|
-
-; r0 unsigned char *y
-; r1 int ystride
-; r2 const unsigned char *blimit
-
-|vp8_loop_filter_mbhs_neon| PROC
- ldrb r3, [r2] ; load blim from mem
- vdup.s8 q1, r3 ; duplicate mblim
- b vp8_loop_filter_simple_horizontal_edge_neon
- ENDP ;|vp8_loop_filter_bhs_neon|
-
- END
diff --git a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c
new file mode 100644
index 0000000..b25686f
--- /dev/null
+++ b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include "./vpx_config.h"
+
+static INLINE void vp8_loop_filter_simple_horizontal_edge_neon(
+ unsigned char *s,
+ int p,
+ const unsigned char *blimit) {
+ uint8_t *sp;
+ uint8x16_t qblimit, q0u8;
+ uint8x16_t q5u8, q6u8, q7u8, q8u8, q9u8, q10u8, q14u8, q15u8;
+ int16x8_t q2s16, q3s16, q13s16;
+ int8x8_t d8s8, d9s8;
+ int8x16_t q2s8, q3s8, q4s8, q10s8, q11s8, q14s8;
+
+ qblimit = vdupq_n_u8(*blimit);
+
+ sp = s - (p << 1);
+ q5u8 = vld1q_u8(sp);
+ sp += p;
+ q6u8 = vld1q_u8(sp);
+ sp += p;
+ q7u8 = vld1q_u8(sp);
+ sp += p;
+ q8u8 = vld1q_u8(sp);
+
+ q15u8 = vabdq_u8(q6u8, q7u8);
+ q14u8 = vabdq_u8(q5u8, q8u8);
+
+ q15u8 = vqaddq_u8(q15u8, q15u8);
+ q14u8 = vshrq_n_u8(q14u8, 1);
+ q0u8 = vdupq_n_u8(0x80);
+ q13s16 = vdupq_n_s16(3);
+ q15u8 = vqaddq_u8(q15u8, q14u8);
+
+ q5u8 = veorq_u8(q5u8, q0u8);
+ q6u8 = veorq_u8(q6u8, q0u8);
+ q7u8 = veorq_u8(q7u8, q0u8);
+ q8u8 = veorq_u8(q8u8, q0u8);
+
+ q15u8 = vcgeq_u8(qblimit, q15u8);
+
+ q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7u8)),
+ vget_low_s8(vreinterpretq_s8_u8(q6u8)));
+ q3s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7u8)),
+ vget_high_s8(vreinterpretq_s8_u8(q6u8)));
+
+ q4s8 = vqsubq_s8(vreinterpretq_s8_u8(q5u8),
+ vreinterpretq_s8_u8(q8u8));
+
+ q2s16 = vmulq_s16(q2s16, q13s16);
+ q3s16 = vmulq_s16(q3s16, q13s16);
+
+ q10u8 = vdupq_n_u8(3);
+ q9u8 = vdupq_n_u8(4);
+
+ q2s16 = vaddw_s8(q2s16, vget_low_s8(q4s8));
+ q3s16 = vaddw_s8(q3s16, vget_high_s8(q4s8));
+
+ d8s8 = vqmovn_s16(q2s16);
+ d9s8 = vqmovn_s16(q3s16);
+ q4s8 = vcombine_s8(d8s8, d9s8);
+
+ q14s8 = vandq_s8(q4s8, vreinterpretq_s8_u8(q15u8));
+
+ q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q10u8));
+ q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q9u8));
+ q2s8 = vshrq_n_s8(q2s8, 3);
+ q3s8 = vshrq_n_s8(q3s8, 3);
+
+ q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6u8), q2s8);
+ q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7u8), q3s8);
+
+ q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
+ q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
+
+ vst1q_u8(s, q7u8);
+ s -= p;
+ vst1q_u8(s, q6u8);
+ return;
+}
+
+void vp8_loop_filter_bhs_neon(
+ unsigned char *y_ptr,
+ int y_stride,
+ const unsigned char *blimit) {
+ y_ptr += y_stride * 4;
+ vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
+ y_ptr += y_stride * 4;
+ vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
+ y_ptr += y_stride * 4;
+ vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
+ return;
+}
+
+void vp8_loop_filter_mbhs_neon(
+ unsigned char *y_ptr,
+ int y_stride,
+ const unsigned char *blimit) {
+ vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
+ return;
+}
diff --git a/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm b/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
index e690df2..78d13c8 100644
--- a/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
+++ b/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
@@ -9,7 +9,6 @@
;
- ;EXPORT |vp8_loop_filter_simple_vertical_edge_neon|
EXPORT |vp8_loop_filter_bvs_neon|
EXPORT |vp8_loop_filter_mbvs_neon|
ARM
@@ -22,6 +21,8 @@
; q1 limit, PRESERVE
|vp8_loop_filter_simple_vertical_edge_neon| PROC
+ vpush {d8-d15}
+
sub r0, r0, #2 ; move src pointer down by 2 columns
add r12, r1, r1
add r3, r0, r1
@@ -120,6 +121,7 @@
vst2.8 {d14[6], d15[6]}, [r0], r12
vst2.8 {d14[7], d15[7]}, [r3]
+ vpop {d8-d15}
bx lr
ENDP ; |vp8_loop_filter_simple_vertical_edge_neon|
diff --git a/vp8/common/arm/neon/mbloopfilter_neon.asm b/vp8/common/arm/neon/mbloopfilter_neon.asm
deleted file mode 100644
index f41c156..0000000
--- a/vp8/common/arm/neon/mbloopfilter_neon.asm
+++ /dev/null
@@ -1,469 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_mbloop_filter_horizontal_edge_y_neon|
- EXPORT |vp8_mbloop_filter_horizontal_edge_uv_neon|
- EXPORT |vp8_mbloop_filter_vertical_edge_y_neon|
- EXPORT |vp8_mbloop_filter_vertical_edge_uv_neon|
- ARM
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-
-; void vp8_mbloop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
-; const unsigned char *blimit,
-; const unsigned char *limit,
-; const unsigned char *thresh)
-; r0 unsigned char *src,
-; r1 int pitch,
-; r2 unsigned char blimit
-; r3 unsigned char limit
-; sp unsigned char thresh,
-|vp8_mbloop_filter_horizontal_edge_y_neon| PROC
- push {lr}
- add r1, r1, r1 ; double stride
- ldr r12, [sp, #4] ; load thresh
- sub r0, r0, r1, lsl #1 ; move src pointer down by 4 lines
- vdup.u8 q2, r12 ; thresh
- add r12, r0, r1, lsr #1 ; move src pointer up by 1 line
-
- vld1.u8 {q3}, [r0@128], r1 ; p3
- vld1.u8 {q4}, [r12@128], r1 ; p2
- vld1.u8 {q5}, [r0@128], r1 ; p1
- vld1.u8 {q6}, [r12@128], r1 ; p0
- vld1.u8 {q7}, [r0@128], r1 ; q0
- vld1.u8 {q8}, [r12@128], r1 ; q1
- vld1.u8 {q9}, [r0@128], r1 ; q2
- vld1.u8 {q10}, [r12@128], r1 ; q3
-
- bl vp8_mbloop_filter_neon
-
- sub r12, r12, r1, lsl #2
- add r0, r12, r1, lsr #1
-
- vst1.u8 {q4}, [r12@128],r1 ; store op2
- vst1.u8 {q5}, [r0@128],r1 ; store op1
- vst1.u8 {q6}, [r12@128], r1 ; store op0
- vst1.u8 {q7}, [r0@128],r1 ; store oq0
- vst1.u8 {q8}, [r12@128] ; store oq1
- vst1.u8 {q9}, [r0@128] ; store oq2
-
- pop {pc}
- ENDP ; |vp8_mbloop_filter_horizontal_edge_y_neon|
-
-; void vp8_mbloop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch,
-; const unsigned char *blimit,
-; const unsigned char *limit,
-; const unsigned char *thresh,
-; unsigned char *v)
-; r0 unsigned char *u,
-; r1 int pitch,
-; r2 unsigned char blimit
-; r3 unsigned char limit
-; sp unsigned char thresh,
-; sp+4 unsigned char *v
-
-|vp8_mbloop_filter_horizontal_edge_uv_neon| PROC
- push {lr}
- ldr r12, [sp, #4] ; load thresh
- sub r0, r0, r1, lsl #2 ; move u pointer down by 4 lines
- vdup.u8 q2, r12 ; thresh
- ldr r12, [sp, #8] ; load v ptr
- sub r12, r12, r1, lsl #2 ; move v pointer down by 4 lines
-
- vld1.u8 {d6}, [r0@64], r1 ; p3
- vld1.u8 {d7}, [r12@64], r1 ; p3
- vld1.u8 {d8}, [r0@64], r1 ; p2
- vld1.u8 {d9}, [r12@64], r1 ; p2
- vld1.u8 {d10}, [r0@64], r1 ; p1
- vld1.u8 {d11}, [r12@64], r1 ; p1
- vld1.u8 {d12}, [r0@64], r1 ; p0
- vld1.u8 {d13}, [r12@64], r1 ; p0
- vld1.u8 {d14}, [r0@64], r1 ; q0
- vld1.u8 {d15}, [r12@64], r1 ; q0
- vld1.u8 {d16}, [r0@64], r1 ; q1
- vld1.u8 {d17}, [r12@64], r1 ; q1
- vld1.u8 {d18}, [r0@64], r1 ; q2
- vld1.u8 {d19}, [r12@64], r1 ; q2
- vld1.u8 {d20}, [r0@64], r1 ; q3
- vld1.u8 {d21}, [r12@64], r1 ; q3
-
- bl vp8_mbloop_filter_neon
-
- sub r0, r0, r1, lsl #3
- sub r12, r12, r1, lsl #3
-
- add r0, r0, r1
- add r12, r12, r1
-
- vst1.u8 {d8}, [r0@64], r1 ; store u op2
- vst1.u8 {d9}, [r12@64], r1 ; store v op2
- vst1.u8 {d10}, [r0@64], r1 ; store u op1
- vst1.u8 {d11}, [r12@64], r1 ; store v op1
- vst1.u8 {d12}, [r0@64], r1 ; store u op0
- vst1.u8 {d13}, [r12@64], r1 ; store v op0
- vst1.u8 {d14}, [r0@64], r1 ; store u oq0
- vst1.u8 {d15}, [r12@64], r1 ; store v oq0
- vst1.u8 {d16}, [r0@64], r1 ; store u oq1
- vst1.u8 {d17}, [r12@64], r1 ; store v oq1
- vst1.u8 {d18}, [r0@64], r1 ; store u oq2
- vst1.u8 {d19}, [r12@64], r1 ; store v oq2
-
- pop {pc}
- ENDP ; |vp8_mbloop_filter_horizontal_edge_uv_neon|
-
-; void vp8_mbloop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
-; const unsigned char *blimit,
-; const unsigned char *limit,
-; const unsigned char *thresh)
-; r0 unsigned char *src,
-; r1 int pitch,
-; r2 unsigned char blimit
-; r3 unsigned char limit
-; sp unsigned char thresh,
-|vp8_mbloop_filter_vertical_edge_y_neon| PROC
- push {lr}
- ldr r12, [sp, #4] ; load thresh
- sub r0, r0, #4 ; move src pointer down by 4 columns
- vdup.s8 q2, r12 ; thresh
- add r12, r0, r1, lsl #3 ; move src pointer down by 8 lines
-
- vld1.u8 {d6}, [r0], r1 ; load first 8-line src data
- vld1.u8 {d7}, [r12], r1 ; load second 8-line src data
- vld1.u8 {d8}, [r0], r1
- vld1.u8 {d9}, [r12], r1
- vld1.u8 {d10}, [r0], r1
- vld1.u8 {d11}, [r12], r1
- vld1.u8 {d12}, [r0], r1
- vld1.u8 {d13}, [r12], r1
- vld1.u8 {d14}, [r0], r1
- vld1.u8 {d15}, [r12], r1
- vld1.u8 {d16}, [r0], r1
- vld1.u8 {d17}, [r12], r1
- vld1.u8 {d18}, [r0], r1
- vld1.u8 {d19}, [r12], r1
- vld1.u8 {d20}, [r0], r1
- vld1.u8 {d21}, [r12], r1
-
- ;transpose to 8x16 matrix
- vtrn.32 q3, q7
- vtrn.32 q4, q8
- vtrn.32 q5, q9
- vtrn.32 q6, q10
-
- vtrn.16 q3, q5
- vtrn.16 q4, q6
- vtrn.16 q7, q9
- vtrn.16 q8, q10
-
- vtrn.8 q3, q4
- vtrn.8 q5, q6
- vtrn.8 q7, q8
- vtrn.8 q9, q10
-
- sub r0, r0, r1, lsl #3
-
- bl vp8_mbloop_filter_neon
-
- sub r12, r12, r1, lsl #3
-
- ;transpose to 16x8 matrix
- vtrn.32 q3, q7
- vtrn.32 q4, q8
- vtrn.32 q5, q9
- vtrn.32 q6, q10
-
- vtrn.16 q3, q5
- vtrn.16 q4, q6
- vtrn.16 q7, q9
- vtrn.16 q8, q10
-
- vtrn.8 q3, q4
- vtrn.8 q5, q6
- vtrn.8 q7, q8
- vtrn.8 q9, q10
-
- ;store op2, op1, op0, oq0, oq1, oq2
- vst1.8 {d6}, [r0], r1
- vst1.8 {d7}, [r12], r1
- vst1.8 {d8}, [r0], r1
- vst1.8 {d9}, [r12], r1
- vst1.8 {d10}, [r0], r1
- vst1.8 {d11}, [r12], r1
- vst1.8 {d12}, [r0], r1
- vst1.8 {d13}, [r12], r1
- vst1.8 {d14}, [r0], r1
- vst1.8 {d15}, [r12], r1
- vst1.8 {d16}, [r0], r1
- vst1.8 {d17}, [r12], r1
- vst1.8 {d18}, [r0], r1
- vst1.8 {d19}, [r12], r1
- vst1.8 {d20}, [r0]
- vst1.8 {d21}, [r12]
-
- pop {pc}
- ENDP ; |vp8_mbloop_filter_vertical_edge_y_neon|
-
-; void vp8_mbloop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch,
-; const unsigned char *blimit,
-; const unsigned char *limit,
-; const unsigned char *thresh,
-; unsigned char *v)
-; r0 unsigned char *u,
-; r1 int pitch,
-; r2 const signed char *flimit,
-; r3 const signed char *limit,
-; sp const signed char *thresh,
-; sp+4 unsigned char *v
-|vp8_mbloop_filter_vertical_edge_uv_neon| PROC
- push {lr}
- ldr r12, [sp, #4] ; load thresh
- sub r0, r0, #4 ; move u pointer down by 4 columns
- vdup.u8 q2, r12 ; thresh
- ldr r12, [sp, #8] ; load v ptr
- sub r12, r12, #4 ; move v pointer down by 4 columns
-
- vld1.u8 {d6}, [r0], r1 ;load u data
- vld1.u8 {d7}, [r12], r1 ;load v data
- vld1.u8 {d8}, [r0], r1
- vld1.u8 {d9}, [r12], r1
- vld1.u8 {d10}, [r0], r1
- vld1.u8 {d11}, [r12], r1
- vld1.u8 {d12}, [r0], r1
- vld1.u8 {d13}, [r12], r1
- vld1.u8 {d14}, [r0], r1
- vld1.u8 {d15}, [r12], r1
- vld1.u8 {d16}, [r0], r1
- vld1.u8 {d17}, [r12], r1
- vld1.u8 {d18}, [r0], r1
- vld1.u8 {d19}, [r12], r1
- vld1.u8 {d20}, [r0], r1
- vld1.u8 {d21}, [r12], r1
-
- ;transpose to 8x16 matrix
- vtrn.32 q3, q7
- vtrn.32 q4, q8
- vtrn.32 q5, q9
- vtrn.32 q6, q10
-
- vtrn.16 q3, q5
- vtrn.16 q4, q6
- vtrn.16 q7, q9
- vtrn.16 q8, q10
-
- vtrn.8 q3, q4
- vtrn.8 q5, q6
- vtrn.8 q7, q8
- vtrn.8 q9, q10
-
- sub r0, r0, r1, lsl #3
-
- bl vp8_mbloop_filter_neon
-
- sub r12, r12, r1, lsl #3
-
- ;transpose to 16x8 matrix
- vtrn.32 q3, q7
- vtrn.32 q4, q8
- vtrn.32 q5, q9
- vtrn.32 q6, q10
-
- vtrn.16 q3, q5
- vtrn.16 q4, q6
- vtrn.16 q7, q9
- vtrn.16 q8, q10
-
- vtrn.8 q3, q4
- vtrn.8 q5, q6
- vtrn.8 q7, q8
- vtrn.8 q9, q10
-
- ;store op2, op1, op0, oq0, oq1, oq2
- vst1.8 {d6}, [r0], r1
- vst1.8 {d7}, [r12], r1
- vst1.8 {d8}, [r0], r1
- vst1.8 {d9}, [r12], r1
- vst1.8 {d10}, [r0], r1
- vst1.8 {d11}, [r12], r1
- vst1.8 {d12}, [r0], r1
- vst1.8 {d13}, [r12], r1
- vst1.8 {d14}, [r0], r1
- vst1.8 {d15}, [r12], r1
- vst1.8 {d16}, [r0], r1
- vst1.8 {d17}, [r12], r1
- vst1.8 {d18}, [r0], r1
- vst1.8 {d19}, [r12], r1
- vst1.8 {d20}, [r0]
- vst1.8 {d21}, [r12]
-
- pop {pc}
- ENDP ; |vp8_mbloop_filter_vertical_edge_uv_neon|
-
-; void vp8_mbloop_filter_neon()
-; This is a helper function for the macroblock loopfilters. The individual
-; functions do the necessary load, transpose (if necessary), preserve (if
-; necessary) and store.
-
-; r0,r1 PRESERVE
-; r2 mblimit
-; r3 limit
-
-; q2 thresh
-; q3 p3 PRESERVE
-; q4 p2
-; q5 p1
-; q6 p0
-; q7 q0
-; q8 q1
-; q9 q2
-; q10 q3 PRESERVE
-
-|vp8_mbloop_filter_neon| PROC
-
- ; vp8_filter_mask
- vabd.u8 q11, q3, q4 ; abs(p3 - p2)
- vabd.u8 q12, q4, q5 ; abs(p2 - p1)
- vabd.u8 q13, q5, q6 ; abs(p1 - p0)
- vabd.u8 q14, q8, q7 ; abs(q1 - q0)
- vabd.u8 q1, q9, q8 ; abs(q2 - q1)
- vabd.u8 q0, q10, q9 ; abs(q3 - q2)
-
- vmax.u8 q11, q11, q12
- vmax.u8 q12, q13, q14
- vmax.u8 q1, q1, q0
- vmax.u8 q15, q11, q12
-
- vabd.u8 q12, q6, q7 ; abs(p0 - q0)
-
- ; vp8_hevmask
- vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh) * -1
- vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh) * -1
- vmax.u8 q15, q15, q1
-
- vdup.u8 q1, r3 ; limit
- vdup.u8 q2, r2 ; mblimit
-
- vmov.u8 q0, #0x80 ; 0x80
-
- vcge.u8 q15, q1, q15
-
- vabd.u8 q1, q5, q8 ; a = abs(p1 - q1)
- vqadd.u8 q12, q12, q12 ; b = abs(p0 - q0) * 2
- vmov.u16 q11, #3 ; #3
-
- ; vp8_filter
- ; convert to signed
- veor q7, q7, q0 ; qs0
- vshr.u8 q1, q1, #1 ; a = a / 2
- veor q6, q6, q0 ; ps0
- veor q5, q5, q0 ; ps1
-
- vqadd.u8 q12, q12, q1 ; a = b + a
-
- veor q8, q8, q0 ; qs1
- veor q4, q4, q0 ; ps2
- veor q9, q9, q0 ; qs2
-
- vorr q14, q13, q14 ; vp8_hevmask
-
- vcge.u8 q12, q2, q12 ; (a > flimit * 2 + limit) * -1
-
- vsubl.s8 q2, d14, d12 ; qs0 - ps0
- vsubl.s8 q13, d15, d13
-
- vqsub.s8 q1, q5, q8 ; vp8_filter = clamp(ps1-qs1)
-
- vmul.i16 q2, q2, q11 ; 3 * ( qs0 - ps0)
-
- vand q15, q15, q12 ; vp8_filter_mask
-
- vmul.i16 q13, q13, q11
-
- vmov.u8 q12, #3 ; #3
-
- vaddw.s8 q2, q2, d2 ; vp8_filter + 3 * ( qs0 - ps0)
- vaddw.s8 q13, q13, d3
-
- vmov.u8 q11, #4 ; #4
-
- ; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
- vqmovn.s16 d2, q2
- vqmovn.s16 d3, q13
-
- vand q1, q1, q15 ; vp8_filter &= mask
-
- vmov.u16 q15, #63 ; #63
-
- vand q13, q1, q14 ; Filter2 &= hev
-
- vqadd.s8 q2, q13, q11 ; Filter1 = clamp(Filter2+4)
- vqadd.s8 q13, q13, q12 ; Filter2 = clamp(Filter2+3)
-
- vmov q0, q15
-
- vshr.s8 q2, q2, #3 ; Filter1 >>= 3
- vshr.s8 q13, q13, #3 ; Filter2 >>= 3
-
- vmov q11, q15
- vmov q12, q15
-
- vqsub.s8 q7, q7, q2 ; qs0 = clamp(qs0 - Filter1)
-
- vqadd.s8 q6, q6, q13 ; ps0 = clamp(ps0 + Filter2)
-
- vbic q1, q1, q14 ; vp8_filter &= ~hev
-
- ; roughly 1/7th difference across boundary
- ; roughly 2/7th difference across boundary
- ; roughly 3/7th difference across boundary
-
- vmov.u8 d5, #9 ; #9
- vmov.u8 d4, #18 ; #18
-
- vmov q13, q15
- vmov q14, q15
-
- vmlal.s8 q0, d2, d5 ; 63 + Filter2 * 9
- vmlal.s8 q11, d3, d5
- vmov.u8 d5, #27 ; #27
- vmlal.s8 q12, d2, d4 ; 63 + Filter2 * 18
- vmlal.s8 q13, d3, d4
- vmlal.s8 q14, d2, d5 ; 63 + Filter2 * 27
- vmlal.s8 q15, d3, d5
-
- vqshrn.s16 d0, q0, #7 ; u = clamp((63 + Filter2 * 9)>>7)
- vqshrn.s16 d1, q11, #7
- vqshrn.s16 d24, q12, #7 ; u = clamp((63 + Filter2 * 18)>>7)
- vqshrn.s16 d25, q13, #7
- vqshrn.s16 d28, q14, #7 ; u = clamp((63 + Filter2 * 27)>>7)
- vqshrn.s16 d29, q15, #7
-
- vmov.u8 q1, #0x80 ; 0x80
-
- vqsub.s8 q11, q9, q0 ; s = clamp(qs2 - u)
- vqadd.s8 q0, q4, q0 ; s = clamp(ps2 + u)
- vqsub.s8 q13, q8, q12 ; s = clamp(qs1 - u)
- vqadd.s8 q12, q5, q12 ; s = clamp(ps1 + u)
- vqsub.s8 q15, q7, q14 ; s = clamp(qs0 - u)
- vqadd.s8 q14, q6, q14 ; s = clamp(ps0 + u)
-
- veor q9, q11, q1 ; *oq2 = s^0x80
- veor q4, q0, q1 ; *op2 = s^0x80
- veor q8, q13, q1 ; *oq1 = s^0x80
- veor q5, q12, q1 ; *op2 = s^0x80
- veor q7, q15, q1 ; *oq0 = s^0x80
- veor q6, q14, q1 ; *op0 = s^0x80
-
- bx lr
- ENDP ; |vp8_mbloop_filter_neon|
-
-;-----------------
-
- END
diff --git a/vp8/common/arm/neon/mbloopfilter_neon.c b/vp8/common/arm/neon/mbloopfilter_neon.c
new file mode 100644
index 0000000..5351f4b
--- /dev/null
+++ b/vp8/common/arm/neon/mbloopfilter_neon.c
@@ -0,0 +1,625 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include "./vpx_config.h"
+
+static INLINE void vp8_mbloop_filter_neon(
+ uint8x16_t qblimit, // mblimit
+ uint8x16_t qlimit, // limit
+ uint8x16_t qthresh, // thresh
+ uint8x16_t q3, // p2
+ uint8x16_t q4, // p2
+ uint8x16_t q5, // p1
+ uint8x16_t q6, // p0
+ uint8x16_t q7, // q0
+ uint8x16_t q8, // q1
+ uint8x16_t q9, // q2
+ uint8x16_t q10, // q3
+ uint8x16_t *q4r, // p1
+ uint8x16_t *q5r, // p1
+ uint8x16_t *q6r, // p0
+ uint8x16_t *q7r, // q0
+ uint8x16_t *q8r, // q1
+ uint8x16_t *q9r) { // q1
+ uint8x16_t q0u8, q1u8, q11u8, q12u8, q13u8, q14u8, q15u8;
+ int16x8_t q0s16, q2s16, q11s16, q12s16, q13s16, q14s16, q15s16;
+ int8x16_t q1s8, q6s8, q7s8, q2s8, q11s8, q13s8;
+ uint16x8_t q0u16, q11u16, q12u16, q13u16, q14u16, q15u16;
+ int8x16_t q0s8, q12s8, q14s8, q15s8;
+ int8x8_t d0, d1, d2, d3, d4, d5, d24, d25, d28, d29;
+
+ q11u8 = vabdq_u8(q3, q4);
+ q12u8 = vabdq_u8(q4, q5);
+ q13u8 = vabdq_u8(q5, q6);
+ q14u8 = vabdq_u8(q8, q7);
+ q1u8 = vabdq_u8(q9, q8);
+ q0u8 = vabdq_u8(q10, q9);
+
+ q11u8 = vmaxq_u8(q11u8, q12u8);
+ q12u8 = vmaxq_u8(q13u8, q14u8);
+ q1u8 = vmaxq_u8(q1u8, q0u8);
+ q15u8 = vmaxq_u8(q11u8, q12u8);
+
+ q12u8 = vabdq_u8(q6, q7);
+
+ // vp8_hevmask
+ q13u8 = vcgtq_u8(q13u8, qthresh);
+ q14u8 = vcgtq_u8(q14u8, qthresh);
+ q15u8 = vmaxq_u8(q15u8, q1u8);
+
+ q15u8 = vcgeq_u8(qlimit, q15u8);
+
+ q1u8 = vabdq_u8(q5, q8);
+ q12u8 = vqaddq_u8(q12u8, q12u8);
+
+ // vp8_filter() function
+ // convert to signed
+ q0u8 = vdupq_n_u8(0x80);
+ q9 = veorq_u8(q9, q0u8);
+ q8 = veorq_u8(q8, q0u8);
+ q7 = veorq_u8(q7, q0u8);
+ q6 = veorq_u8(q6, q0u8);
+ q5 = veorq_u8(q5, q0u8);
+ q4 = veorq_u8(q4, q0u8);
+
+ q1u8 = vshrq_n_u8(q1u8, 1);
+ q12u8 = vqaddq_u8(q12u8, q1u8);
+
+ q14u8 = vorrq_u8(q13u8, q14u8);
+ q12u8 = vcgeq_u8(qblimit, q12u8);
+
+ q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)),
+ vget_low_s8(vreinterpretq_s8_u8(q6)));
+ q13s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)),
+ vget_high_s8(vreinterpretq_s8_u8(q6)));
+
+ q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5),
+ vreinterpretq_s8_u8(q8));
+
+ q11s16 = vdupq_n_s16(3);
+ q2s16 = vmulq_s16(q2s16, q11s16);
+ q13s16 = vmulq_s16(q13s16, q11s16);
+
+ q15u8 = vandq_u8(q15u8, q12u8);
+
+ q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8));
+ q13s16 = vaddw_s8(q13s16, vget_high_s8(q1s8));
+
+ q12u8 = vdupq_n_u8(3);
+ q11u8 = vdupq_n_u8(4);
+ // vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
+ d2 = vqmovn_s16(q2s16);
+ d3 = vqmovn_s16(q13s16);
+ q1s8 = vcombine_s8(d2, d3);
+ q1s8 = vandq_s8(q1s8, vreinterpretq_s8_u8(q15u8));
+ q13s8 = vandq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
+
+ q2s8 = vqaddq_s8(q13s8, vreinterpretq_s8_u8(q11u8));
+ q13s8 = vqaddq_s8(q13s8, vreinterpretq_s8_u8(q12u8));
+ q2s8 = vshrq_n_s8(q2s8, 3);
+ q13s8 = vshrq_n_s8(q13s8, 3);
+
+ q7s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q2s8);
+ q6s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q13s8);
+
+ q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
+
+ q0u16 = q11u16 = q12u16 = q13u16 = q14u16 = q15u16 = vdupq_n_u16(63);
+ d5 = vdup_n_s8(9);
+ d4 = vdup_n_s8(18);
+
+ q0s16 = vmlal_s8(vreinterpretq_s16_u16(q0u16), vget_low_s8(q1s8), d5);
+ q11s16 = vmlal_s8(vreinterpretq_s16_u16(q11u16), vget_high_s8(q1s8), d5);
+ d5 = vdup_n_s8(27);
+ q12s16 = vmlal_s8(vreinterpretq_s16_u16(q12u16), vget_low_s8(q1s8), d4);
+ q13s16 = vmlal_s8(vreinterpretq_s16_u16(q13u16), vget_high_s8(q1s8), d4);
+ q14s16 = vmlal_s8(vreinterpretq_s16_u16(q14u16), vget_low_s8(q1s8), d5);
+ q15s16 = vmlal_s8(vreinterpretq_s16_u16(q15u16), vget_high_s8(q1s8), d5);
+
+ d0 = vqshrn_n_s16(q0s16 , 7);
+ d1 = vqshrn_n_s16(q11s16, 7);
+ d24 = vqshrn_n_s16(q12s16, 7);
+ d25 = vqshrn_n_s16(q13s16, 7);
+ d28 = vqshrn_n_s16(q14s16, 7);
+ d29 = vqshrn_n_s16(q15s16, 7);
+
+ q0s8 = vcombine_s8(d0, d1);
+ q12s8 = vcombine_s8(d24, d25);
+ q14s8 = vcombine_s8(d28, d29);
+
+ q11s8 = vqsubq_s8(vreinterpretq_s8_u8(q9), q0s8);
+ q0s8 = vqaddq_s8(vreinterpretq_s8_u8(q4), q0s8);
+ q13s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q12s8);
+ q12s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q12s8);
+ q15s8 = vqsubq_s8((q7s8), q14s8);
+ q14s8 = vqaddq_s8((q6s8), q14s8);
+
+ q1u8 = vdupq_n_u8(0x80);
+ *q9r = veorq_u8(vreinterpretq_u8_s8(q11s8), q1u8);
+ *q8r = veorq_u8(vreinterpretq_u8_s8(q13s8), q1u8);
+ *q7r = veorq_u8(vreinterpretq_u8_s8(q15s8), q1u8);
+ *q6r = veorq_u8(vreinterpretq_u8_s8(q14s8), q1u8);
+ *q5r = veorq_u8(vreinterpretq_u8_s8(q12s8), q1u8);
+ *q4r = veorq_u8(vreinterpretq_u8_s8(q0s8), q1u8);
+ return;
+}
+
+void vp8_mbloop_filter_horizontal_edge_y_neon(
+ unsigned char *src,
+ int pitch,
+ unsigned char blimit,
+ unsigned char limit,
+ unsigned char thresh) {
+ uint8x16_t qblimit, qlimit, qthresh, q3, q4;
+ uint8x16_t q5, q6, q7, q8, q9, q10;
+
+ qblimit = vdupq_n_u8(blimit);
+ qlimit = vdupq_n_u8(limit);
+ qthresh = vdupq_n_u8(thresh);
+
+ src -= (pitch << 2);
+
+ q3 = vld1q_u8(src);
+ src += pitch;
+ q4 = vld1q_u8(src);
+ src += pitch;
+ q5 = vld1q_u8(src);
+ src += pitch;
+ q6 = vld1q_u8(src);
+ src += pitch;
+ q7 = vld1q_u8(src);
+ src += pitch;
+ q8 = vld1q_u8(src);
+ src += pitch;
+ q9 = vld1q_u8(src);
+ src += pitch;
+ q10 = vld1q_u8(src);
+
+ vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
+ q5, q6, q7, q8, q9, q10,
+ &q4, &q5, &q6, &q7, &q8, &q9);
+
+ src -= (pitch * 6);
+ vst1q_u8(src, q4);
+ src += pitch;
+ vst1q_u8(src, q5);
+ src += pitch;
+ vst1q_u8(src, q6);
+ src += pitch;
+ vst1q_u8(src, q7);
+ src += pitch;
+ vst1q_u8(src, q8);
+ src += pitch;
+ vst1q_u8(src, q9);
+ return;
+}
+
+void vp8_mbloop_filter_horizontal_edge_uv_neon(
+ unsigned char *u,
+ int pitch,
+ unsigned char blimit,
+ unsigned char limit,
+ unsigned char thresh,
+ unsigned char *v) {
+ uint8x16_t qblimit, qlimit, qthresh, q3, q4;
+ uint8x16_t q5, q6, q7, q8, q9, q10;
+ uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
+ uint8x8_t d15, d16, d17, d18, d19, d20, d21;
+
+ qblimit = vdupq_n_u8(blimit);
+ qlimit = vdupq_n_u8(limit);
+ qthresh = vdupq_n_u8(thresh);
+
+ u -= (pitch << 2);
+ v -= (pitch << 2);
+
+ d6 = vld1_u8(u);
+ u += pitch;
+ d7 = vld1_u8(v);
+ v += pitch;
+ d8 = vld1_u8(u);
+ u += pitch;
+ d9 = vld1_u8(v);
+ v += pitch;
+ d10 = vld1_u8(u);
+ u += pitch;
+ d11 = vld1_u8(v);
+ v += pitch;
+ d12 = vld1_u8(u);
+ u += pitch;
+ d13 = vld1_u8(v);
+ v += pitch;
+ d14 = vld1_u8(u);
+ u += pitch;
+ d15 = vld1_u8(v);
+ v += pitch;
+ d16 = vld1_u8(u);
+ u += pitch;
+ d17 = vld1_u8(v);
+ v += pitch;
+ d18 = vld1_u8(u);
+ u += pitch;
+ d19 = vld1_u8(v);
+ v += pitch;
+ d20 = vld1_u8(u);
+ d21 = vld1_u8(v);
+
+ q3 = vcombine_u8(d6, d7);
+ q4 = vcombine_u8(d8, d9);
+ q5 = vcombine_u8(d10, d11);
+ q6 = vcombine_u8(d12, d13);
+ q7 = vcombine_u8(d14, d15);
+ q8 = vcombine_u8(d16, d17);
+ q9 = vcombine_u8(d18, d19);
+ q10 = vcombine_u8(d20, d21);
+
+ vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
+ q5, q6, q7, q8, q9, q10,
+ &q4, &q5, &q6, &q7, &q8, &q9);
+
+ u -= (pitch * 6);
+ v -= (pitch * 6);
+ vst1_u8(u, vget_low_u8(q4));
+ u += pitch;
+ vst1_u8(v, vget_high_u8(q4));
+ v += pitch;
+ vst1_u8(u, vget_low_u8(q5));
+ u += pitch;
+ vst1_u8(v, vget_high_u8(q5));
+ v += pitch;
+ vst1_u8(u, vget_low_u8(q6));
+ u += pitch;
+ vst1_u8(v, vget_high_u8(q6));
+ v += pitch;
+ vst1_u8(u, vget_low_u8(q7));
+ u += pitch;
+ vst1_u8(v, vget_high_u8(q7));
+ v += pitch;
+ vst1_u8(u, vget_low_u8(q8));
+ u += pitch;
+ vst1_u8(v, vget_high_u8(q8));
+ v += pitch;
+ vst1_u8(u, vget_low_u8(q9));
+ vst1_u8(v, vget_high_u8(q9));
+ return;
+}
+
+void vp8_mbloop_filter_vertical_edge_y_neon(
+ unsigned char *src,
+ int pitch,
+ unsigned char blimit,
+ unsigned char limit,
+ unsigned char thresh) {
+ unsigned char *s1, *s2;
+ uint8x16_t qblimit, qlimit, qthresh, q3, q4;
+ uint8x16_t q5, q6, q7, q8, q9, q10;
+ uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
+ uint8x8_t d15, d16, d17, d18, d19, d20, d21;
+ uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
+ uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
+ uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
+
+ qblimit = vdupq_n_u8(blimit);
+ qlimit = vdupq_n_u8(limit);
+ qthresh = vdupq_n_u8(thresh);
+
+ s1 = src - 4;
+ s2 = s1 + 8 * pitch;
+ d6 = vld1_u8(s1);
+ s1 += pitch;
+ d7 = vld1_u8(s2);
+ s2 += pitch;
+ d8 = vld1_u8(s1);
+ s1 += pitch;
+ d9 = vld1_u8(s2);
+ s2 += pitch;
+ d10 = vld1_u8(s1);
+ s1 += pitch;
+ d11 = vld1_u8(s2);
+ s2 += pitch;
+ d12 = vld1_u8(s1);
+ s1 += pitch;
+ d13 = vld1_u8(s2);
+ s2 += pitch;
+ d14 = vld1_u8(s1);
+ s1 += pitch;
+ d15 = vld1_u8(s2);
+ s2 += pitch;
+ d16 = vld1_u8(s1);
+ s1 += pitch;
+ d17 = vld1_u8(s2);
+ s2 += pitch;
+ d18 = vld1_u8(s1);
+ s1 += pitch;
+ d19 = vld1_u8(s2);
+ s2 += pitch;
+ d20 = vld1_u8(s1);
+ d21 = vld1_u8(s2);
+
+ q3 = vcombine_u8(d6, d7);
+ q4 = vcombine_u8(d8, d9);
+ q5 = vcombine_u8(d10, d11);
+ q6 = vcombine_u8(d12, d13);
+ q7 = vcombine_u8(d14, d15);
+ q8 = vcombine_u8(d16, d17);
+ q9 = vcombine_u8(d18, d19);
+ q10 = vcombine_u8(d20, d21);
+
+ q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
+ q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
+ q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
+ q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
+
+ q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
+ vreinterpretq_u16_u32(q2tmp2.val[0]));
+ q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
+ vreinterpretq_u16_u32(q2tmp3.val[0]));
+ q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
+ vreinterpretq_u16_u32(q2tmp2.val[1]));
+ q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
+ vreinterpretq_u16_u32(q2tmp3.val[1]));
+
+ q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
+ vreinterpretq_u8_u16(q2tmp5.val[0]));
+ q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
+ vreinterpretq_u8_u16(q2tmp5.val[1]));
+ q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
+ vreinterpretq_u8_u16(q2tmp7.val[0]));
+ q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
+ vreinterpretq_u8_u16(q2tmp7.val[1]));
+
+ q3 = q2tmp8.val[0];
+ q4 = q2tmp8.val[1];
+ q5 = q2tmp9.val[0];
+ q6 = q2tmp9.val[1];
+ q7 = q2tmp10.val[0];
+ q8 = q2tmp10.val[1];
+ q9 = q2tmp11.val[0];
+ q10 = q2tmp11.val[1];
+
+ vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
+ q5, q6, q7, q8, q9, q10,
+ &q4, &q5, &q6, &q7, &q8, &q9);
+
+ q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
+ q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
+ q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
+ q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
+
+ q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
+ vreinterpretq_u16_u32(q2tmp2.val[0]));
+ q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
+ vreinterpretq_u16_u32(q2tmp3.val[0]));
+ q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
+ vreinterpretq_u16_u32(q2tmp2.val[1]));
+ q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
+ vreinterpretq_u16_u32(q2tmp3.val[1]));
+
+ q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
+ vreinterpretq_u8_u16(q2tmp5.val[0]));
+ q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
+ vreinterpretq_u8_u16(q2tmp5.val[1]));
+ q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
+ vreinterpretq_u8_u16(q2tmp7.val[0]));
+ q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
+ vreinterpretq_u8_u16(q2tmp7.val[1]));
+
+ q3 = q2tmp8.val[0];
+ q4 = q2tmp8.val[1];
+ q5 = q2tmp9.val[0];
+ q6 = q2tmp9.val[1];
+ q7 = q2tmp10.val[0];
+ q8 = q2tmp10.val[1];
+ q9 = q2tmp11.val[0];
+ q10 = q2tmp11.val[1];
+
+ s1 -= 7 * pitch;
+ s2 -= 7 * pitch;
+
+ vst1_u8(s1, vget_low_u8(q3));
+ s1 += pitch;
+ vst1_u8(s2, vget_high_u8(q3));
+ s2 += pitch;
+ vst1_u8(s1, vget_low_u8(q4));
+ s1 += pitch;
+ vst1_u8(s2, vget_high_u8(q4));
+ s2 += pitch;
+ vst1_u8(s1, vget_low_u8(q5));
+ s1 += pitch;
+ vst1_u8(s2, vget_high_u8(q5));
+ s2 += pitch;
+ vst1_u8(s1, vget_low_u8(q6));
+ s1 += pitch;
+ vst1_u8(s2, vget_high_u8(q6));
+ s2 += pitch;
+ vst1_u8(s1, vget_low_u8(q7));
+ s1 += pitch;
+ vst1_u8(s2, vget_high_u8(q7));
+ s2 += pitch;
+ vst1_u8(s1, vget_low_u8(q8));
+ s1 += pitch;
+ vst1_u8(s2, vget_high_u8(q8));
+ s2 += pitch;
+ vst1_u8(s1, vget_low_u8(q9));
+ s1 += pitch;
+ vst1_u8(s2, vget_high_u8(q9));
+ s2 += pitch;
+ vst1_u8(s1, vget_low_u8(q10));
+ vst1_u8(s2, vget_high_u8(q10));
+ return;
+}
+
+void vp8_mbloop_filter_vertical_edge_uv_neon(
+ unsigned char *u,
+ int pitch,
+ unsigned char blimit,
+ unsigned char limit,
+ unsigned char thresh,
+ unsigned char *v) {
+ unsigned char *us, *ud;
+ unsigned char *vs, *vd;
+ uint8x16_t qblimit, qlimit, qthresh, q3, q4;
+ uint8x16_t q5, q6, q7, q8, q9, q10;
+ uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
+ uint8x8_t d15, d16, d17, d18, d19, d20, d21;
+ uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
+ uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
+ uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
+
+ qblimit = vdupq_n_u8(blimit);
+ qlimit = vdupq_n_u8(limit);
+ qthresh = vdupq_n_u8(thresh);
+
+ us = u - 4;
+ vs = v - 4;
+ d6 = vld1_u8(us);
+ us += pitch;
+ d7 = vld1_u8(vs);
+ vs += pitch;
+ d8 = vld1_u8(us);
+ us += pitch;
+ d9 = vld1_u8(vs);
+ vs += pitch;
+ d10 = vld1_u8(us);
+ us += pitch;
+ d11 = vld1_u8(vs);
+ vs += pitch;
+ d12 = vld1_u8(us);
+ us += pitch;
+ d13 = vld1_u8(vs);
+ vs += pitch;
+ d14 = vld1_u8(us);
+ us += pitch;
+ d15 = vld1_u8(vs);
+ vs += pitch;
+ d16 = vld1_u8(us);
+ us += pitch;
+ d17 = vld1_u8(vs);
+ vs += pitch;
+ d18 = vld1_u8(us);
+ us += pitch;
+ d19 = vld1_u8(vs);
+ vs += pitch;
+ d20 = vld1_u8(us);
+ d21 = vld1_u8(vs);
+
+ q3 = vcombine_u8(d6, d7);
+ q4 = vcombine_u8(d8, d9);
+ q5 = vcombine_u8(d10, d11);
+ q6 = vcombine_u8(d12, d13);
+ q7 = vcombine_u8(d14, d15);
+ q8 = vcombine_u8(d16, d17);
+ q9 = vcombine_u8(d18, d19);
+ q10 = vcombine_u8(d20, d21);
+
+ q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
+ q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
+ q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
+ q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
+
+ q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
+ vreinterpretq_u16_u32(q2tmp2.val[0]));
+ q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
+ vreinterpretq_u16_u32(q2tmp3.val[0]));
+ q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
+ vreinterpretq_u16_u32(q2tmp2.val[1]));
+ q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
+ vreinterpretq_u16_u32(q2tmp3.val[1]));
+
+ q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
+ vreinterpretq_u8_u16(q2tmp5.val[0]));
+ q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
+ vreinterpretq_u8_u16(q2tmp5.val[1]));
+ q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
+ vreinterpretq_u8_u16(q2tmp7.val[0]));
+ q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
+ vreinterpretq_u8_u16(q2tmp7.val[1]));
+
+ q3 = q2tmp8.val[0];
+ q4 = q2tmp8.val[1];
+ q5 = q2tmp9.val[0];
+ q6 = q2tmp9.val[1];
+ q7 = q2tmp10.val[0];
+ q8 = q2tmp10.val[1];
+ q9 = q2tmp11.val[0];
+ q10 = q2tmp11.val[1];
+
+ vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
+ q5, q6, q7, q8, q9, q10,
+ &q4, &q5, &q6, &q7, &q8, &q9);
+
+ q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
+ q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
+ q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
+ q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
+
+ q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
+ vreinterpretq_u16_u32(q2tmp2.val[0]));
+ q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
+ vreinterpretq_u16_u32(q2tmp3.val[0]));
+ q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
+ vreinterpretq_u16_u32(q2tmp2.val[1]));
+ q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
+ vreinterpretq_u16_u32(q2tmp3.val[1]));
+
+ q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
+ vreinterpretq_u8_u16(q2tmp5.val[0]));
+ q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
+ vreinterpretq_u8_u16(q2tmp5.val[1]));
+ q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
+ vreinterpretq_u8_u16(q2tmp7.val[0]));
+ q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
+ vreinterpretq_u8_u16(q2tmp7.val[1]));
+
+ q3 = q2tmp8.val[0];
+ q4 = q2tmp8.val[1];
+ q5 = q2tmp9.val[0];
+ q6 = q2tmp9.val[1];
+ q7 = q2tmp10.val[0];
+ q8 = q2tmp10.val[1];
+ q9 = q2tmp11.val[0];
+ q10 = q2tmp11.val[1];
+
+ ud = u - 4;
+ vst1_u8(ud, vget_low_u8(q3));
+ ud += pitch;
+ vst1_u8(ud, vget_low_u8(q4));
+ ud += pitch;
+ vst1_u8(ud, vget_low_u8(q5));
+ ud += pitch;
+ vst1_u8(ud, vget_low_u8(q6));
+ ud += pitch;
+ vst1_u8(ud, vget_low_u8(q7));
+ ud += pitch;
+ vst1_u8(ud, vget_low_u8(q8));
+ ud += pitch;
+ vst1_u8(ud, vget_low_u8(q9));
+ ud += pitch;
+ vst1_u8(ud, vget_low_u8(q10));
+
+ vd = v - 4;
+ vst1_u8(vd, vget_high_u8(q3));
+ vd += pitch;
+ vst1_u8(vd, vget_high_u8(q4));
+ vd += pitch;
+ vst1_u8(vd, vget_high_u8(q5));
+ vd += pitch;
+ vst1_u8(vd, vget_high_u8(q6));
+ vd += pitch;
+ vst1_u8(vd, vget_high_u8(q7));
+ vd += pitch;
+ vst1_u8(vd, vget_high_u8(q8));
+ vd += pitch;
+ vst1_u8(vd, vget_high_u8(q9));
+ vd += pitch;
+ vst1_u8(vd, vget_high_u8(q10));
+ return;
+}
diff --git a/vp8/common/arm/neon/sad16_neon.asm b/vp8/common/arm/neon/sad16_neon.asm
deleted file mode 100644
index d7c590e..0000000
--- a/vp8/common/arm/neon/sad16_neon.asm
+++ /dev/null
@@ -1,207 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_sad16x16_neon|
- EXPORT |vp8_sad16x8_neon|
-
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-
-; r0 unsigned char *src_ptr
-; r1 int src_stride
-; r2 unsigned char *ref_ptr
-; r3 int ref_stride
-|vp8_sad16x16_neon| PROC
-;;
- vld1.8 {q0}, [r0], r1
- vld1.8 {q4}, [r2], r3
-
- vld1.8 {q1}, [r0], r1
- vld1.8 {q5}, [r2], r3
-
- vabdl.u8 q12, d0, d8
- vabdl.u8 q13, d1, d9
-
- vld1.8 {q2}, [r0], r1
- vld1.8 {q6}, [r2], r3
-
- vabal.u8 q12, d2, d10
- vabal.u8 q13, d3, d11
-
- vld1.8 {q3}, [r0], r1
- vld1.8 {q7}, [r2], r3
-
- vabal.u8 q12, d4, d12
- vabal.u8 q13, d5, d13
-
-;;
- vld1.8 {q0}, [r0], r1
- vld1.8 {q4}, [r2], r3
-
- vabal.u8 q12, d6, d14
- vabal.u8 q13, d7, d15
-
- vld1.8 {q1}, [r0], r1
- vld1.8 {q5}, [r2], r3
-
- vabal.u8 q12, d0, d8
- vabal.u8 q13, d1, d9
-
- vld1.8 {q2}, [r0], r1
- vld1.8 {q6}, [r2], r3
-
- vabal.u8 q12, d2, d10
- vabal.u8 q13, d3, d11
-
- vld1.8 {q3}, [r0], r1
- vld1.8 {q7}, [r2], r3
-
- vabal.u8 q12, d4, d12
- vabal.u8 q13, d5, d13
-
-;;
- vld1.8 {q0}, [r0], r1
- vld1.8 {q4}, [r2], r3
-
- vabal.u8 q12, d6, d14
- vabal.u8 q13, d7, d15
-
- vld1.8 {q1}, [r0], r1
- vld1.8 {q5}, [r2], r3
-
- vabal.u8 q12, d0, d8
- vabal.u8 q13, d1, d9
-
- vld1.8 {q2}, [r0], r1
- vld1.8 {q6}, [r2], r3
-
- vabal.u8 q12, d2, d10
- vabal.u8 q13, d3, d11
-
- vld1.8 {q3}, [r0], r1
- vld1.8 {q7}, [r2], r3
-
- vabal.u8 q12, d4, d12
- vabal.u8 q13, d5, d13
-
-;;
- vld1.8 {q0}, [r0], r1
- vld1.8 {q4}, [r2], r3
-
- vabal.u8 q12, d6, d14
- vabal.u8 q13, d7, d15
-
- vld1.8 {q1}, [r0], r1
- vld1.8 {q5}, [r2], r3
-
- vabal.u8 q12, d0, d8
- vabal.u8 q13, d1, d9
-
- vld1.8 {q2}, [r0], r1
- vld1.8 {q6}, [r2], r3
-
- vabal.u8 q12, d2, d10
- vabal.u8 q13, d3, d11
-
- vld1.8 {q3}, [r0]
- vld1.8 {q7}, [r2]
-
- vabal.u8 q12, d4, d12
- vabal.u8 q13, d5, d13
-
- vabal.u8 q12, d6, d14
- vabal.u8 q13, d7, d15
-
- vadd.u16 q0, q12, q13
-
- vpaddl.u16 q1, q0
- vpaddl.u32 q0, q1
-
- vadd.u32 d0, d0, d1
-
- vmov.32 r0, d0[0]
-
- bx lr
-
- ENDP
-
-;==============================
-;unsigned int vp8_sad16x8_c(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride)
-|vp8_sad16x8_neon| PROC
- vld1.8 {q0}, [r0], r1
- vld1.8 {q4}, [r2], r3
-
- vld1.8 {q1}, [r0], r1
- vld1.8 {q5}, [r2], r3
-
- vabdl.u8 q12, d0, d8
- vabdl.u8 q13, d1, d9
-
- vld1.8 {q2}, [r0], r1
- vld1.8 {q6}, [r2], r3
-
- vabal.u8 q12, d2, d10
- vabal.u8 q13, d3, d11
-
- vld1.8 {q3}, [r0], r1
- vld1.8 {q7}, [r2], r3
-
- vabal.u8 q12, d4, d12
- vabal.u8 q13, d5, d13
-
- vld1.8 {q0}, [r0], r1
- vld1.8 {q4}, [r2], r3
-
- vabal.u8 q12, d6, d14
- vabal.u8 q13, d7, d15
-
- vld1.8 {q1}, [r0], r1
- vld1.8 {q5}, [r2], r3
-
- vabal.u8 q12, d0, d8
- vabal.u8 q13, d1, d9
-
- vld1.8 {q2}, [r0], r1
- vld1.8 {q6}, [r2], r3
-
- vabal.u8 q12, d2, d10
- vabal.u8 q13, d3, d11
-
- vld1.8 {q3}, [r0], r1
- vld1.8 {q7}, [r2], r3
-
- vabal.u8 q12, d4, d12
- vabal.u8 q13, d5, d13
-
- vabal.u8 q12, d6, d14
- vabal.u8 q13, d7, d15
-
- vadd.u16 q0, q12, q13
-
- vpaddl.u16 q1, q0
- vpaddl.u32 q0, q1
-
- vadd.u32 d0, d0, d1
-
- vmov.32 r0, d0[0]
-
- bx lr
-
- ENDP
-
- END
diff --git a/vp8/common/arm/neon/sad8_neon.asm b/vp8/common/arm/neon/sad8_neon.asm
deleted file mode 100644
index 23ba6df..0000000
--- a/vp8/common/arm/neon/sad8_neon.asm
+++ /dev/null
@@ -1,209 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_sad8x8_neon|
- EXPORT |vp8_sad8x16_neon|
- EXPORT |vp8_sad4x4_neon|
-
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-; unsigned int vp8_sad8x8_c(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride)
-
-|vp8_sad8x8_neon| PROC
- vld1.8 {d0}, [r0], r1
- vld1.8 {d8}, [r2], r3
-
- vld1.8 {d2}, [r0], r1
- vld1.8 {d10}, [r2], r3
-
- vabdl.u8 q12, d0, d8
-
- vld1.8 {d4}, [r0], r1
- vld1.8 {d12}, [r2], r3
-
- vabal.u8 q12, d2, d10
-
- vld1.8 {d6}, [r0], r1
- vld1.8 {d14}, [r2], r3
-
- vabal.u8 q12, d4, d12
-
- vld1.8 {d0}, [r0], r1
- vld1.8 {d8}, [r2], r3
-
- vabal.u8 q12, d6, d14
-
- vld1.8 {d2}, [r0], r1
- vld1.8 {d10}, [r2], r3
-
- vabal.u8 q12, d0, d8
-
- vld1.8 {d4}, [r0], r1
- vld1.8 {d12}, [r2], r3
-
- vabal.u8 q12, d2, d10
-
- vld1.8 {d6}, [r0], r1
- vld1.8 {d14}, [r2], r3
-
- vabal.u8 q12, d4, d12
- vabal.u8 q12, d6, d14
-
- vpaddl.u16 q1, q12
- vpaddl.u32 q0, q1
- vadd.u32 d0, d0, d1
-
- vmov.32 r0, d0[0]
-
- bx lr
-
- ENDP
-
-;============================
-;unsigned int vp8_sad8x16_c(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride)
-
-|vp8_sad8x16_neon| PROC
- vld1.8 {d0}, [r0], r1
- vld1.8 {d8}, [r2], r3
-
- vld1.8 {d2}, [r0], r1
- vld1.8 {d10}, [r2], r3
-
- vabdl.u8 q12, d0, d8
-
- vld1.8 {d4}, [r0], r1
- vld1.8 {d12}, [r2], r3
-
- vabal.u8 q12, d2, d10
-
- vld1.8 {d6}, [r0], r1
- vld1.8 {d14}, [r2], r3
-
- vabal.u8 q12, d4, d12
-
- vld1.8 {d0}, [r0], r1
- vld1.8 {d8}, [r2], r3
-
- vabal.u8 q12, d6, d14
-
- vld1.8 {d2}, [r0], r1
- vld1.8 {d10}, [r2], r3
-
- vabal.u8 q12, d0, d8
-
- vld1.8 {d4}, [r0], r1
- vld1.8 {d12}, [r2], r3
-
- vabal.u8 q12, d2, d10
-
- vld1.8 {d6}, [r0], r1
- vld1.8 {d14}, [r2], r3
-
- vabal.u8 q12, d4, d12
-
- vld1.8 {d0}, [r0], r1
- vld1.8 {d8}, [r2], r3
-
- vabal.u8 q12, d6, d14
-
- vld1.8 {d2}, [r0], r1
- vld1.8 {d10}, [r2], r3
-
- vabal.u8 q12, d0, d8
-
- vld1.8 {d4}, [r0], r1
- vld1.8 {d12}, [r2], r3
-
- vabal.u8 q12, d2, d10
-
- vld1.8 {d6}, [r0], r1
- vld1.8 {d14}, [r2], r3
-
- vabal.u8 q12, d4, d12
-
- vld1.8 {d0}, [r0], r1
- vld1.8 {d8}, [r2], r3
-
- vabal.u8 q12, d6, d14
-
- vld1.8 {d2}, [r0], r1
- vld1.8 {d10}, [r2], r3
-
- vabal.u8 q12, d0, d8
-
- vld1.8 {d4}, [r0], r1
- vld1.8 {d12}, [r2], r3
-
- vabal.u8 q12, d2, d10
-
- vld1.8 {d6}, [r0], r1
- vld1.8 {d14}, [r2], r3
-
- vabal.u8 q12, d4, d12
- vabal.u8 q12, d6, d14
-
- vpaddl.u16 q1, q12
- vpaddl.u32 q0, q1
- vadd.u32 d0, d0, d1
-
- vmov.32 r0, d0[0]
-
- bx lr
-
- ENDP
-
-;===========================
-;unsigned int vp8_sad4x4_c(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride)
-
-|vp8_sad4x4_neon| PROC
- vld1.8 {d0}, [r0], r1
- vld1.8 {d8}, [r2], r3
-
- vld1.8 {d2}, [r0], r1
- vld1.8 {d10}, [r2], r3
-
- vabdl.u8 q12, d0, d8
-
- vld1.8 {d4}, [r0], r1
- vld1.8 {d12}, [r2], r3
-
- vabal.u8 q12, d2, d10
-
- vld1.8 {d6}, [r0], r1
- vld1.8 {d14}, [r2], r3
-
- vabal.u8 q12, d4, d12
- vabal.u8 q12, d6, d14
-
- vpaddl.u16 d1, d24
- vpaddl.u32 d0, d1
- vmov.32 r0, d0[0]
-
- bx lr
-
- ENDP
-
- END
diff --git a/vp8/common/arm/neon/sad_neon.c b/vp8/common/arm/neon/sad_neon.c
new file mode 100644
index 0000000..6595ac0
--- /dev/null
+++ b/vp8/common/arm/neon/sad_neon.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+unsigned int vp8_sad8x8_neon(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride) {
+ uint8x8_t d0, d8;
+ uint16x8_t q12;
+ uint32x4_t q1;
+ uint64x2_t q3;
+ uint32x2_t d5;
+ int i;
+
+ d0 = vld1_u8(src_ptr);
+ src_ptr += src_stride;
+ d8 = vld1_u8(ref_ptr);
+ ref_ptr += ref_stride;
+ q12 = vabdl_u8(d0, d8);
+
+ for (i = 0; i < 7; i++) {
+ d0 = vld1_u8(src_ptr);
+ src_ptr += src_stride;
+ d8 = vld1_u8(ref_ptr);
+ ref_ptr += ref_stride;
+ q12 = vabal_u8(q12, d0, d8);
+ }
+
+ q1 = vpaddlq_u16(q12);
+ q3 = vpaddlq_u32(q1);
+ d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
+ vreinterpret_u32_u64(vget_high_u64(q3)));
+
+ return vget_lane_u32(d5, 0);
+}
+
+unsigned int vp8_sad8x16_neon(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride) {
+ uint8x8_t d0, d8;
+ uint16x8_t q12;
+ uint32x4_t q1;
+ uint64x2_t q3;
+ uint32x2_t d5;
+ int i;
+
+ d0 = vld1_u8(src_ptr);
+ src_ptr += src_stride;
+ d8 = vld1_u8(ref_ptr);
+ ref_ptr += ref_stride;
+ q12 = vabdl_u8(d0, d8);
+
+ for (i = 0; i < 15; i++) {
+ d0 = vld1_u8(src_ptr);
+ src_ptr += src_stride;
+ d8 = vld1_u8(ref_ptr);
+ ref_ptr += ref_stride;
+ q12 = vabal_u8(q12, d0, d8);
+ }
+
+ q1 = vpaddlq_u16(q12);
+ q3 = vpaddlq_u32(q1);
+ d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
+ vreinterpret_u32_u64(vget_high_u64(q3)));
+
+ return vget_lane_u32(d5, 0);
+}
+
+unsigned int vp8_sad4x4_neon(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride) {
+ uint8x8_t d0, d8;
+ uint16x8_t q12;
+ uint32x2_t d1;
+ uint64x1_t d3;
+ int i;
+
+ d0 = vld1_u8(src_ptr);
+ src_ptr += src_stride;
+ d8 = vld1_u8(ref_ptr);
+ ref_ptr += ref_stride;
+ q12 = vabdl_u8(d0, d8);
+
+ for (i = 0; i < 3; i++) {
+ d0 = vld1_u8(src_ptr);
+ src_ptr += src_stride;
+ d8 = vld1_u8(ref_ptr);
+ ref_ptr += ref_stride;
+ q12 = vabal_u8(q12, d0, d8);
+ }
+
+ d1 = vpaddl_u16(vget_low_u16(q12));
+ d3 = vpaddl_u32(d1);
+
+ return vget_lane_u32(vreinterpret_u32_u64(d3), 0);
+}
+
+unsigned int vp8_sad16x16_neon(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride) {
+ uint8x16_t q0, q4;
+ uint16x8_t q12, q13;
+ uint32x4_t q1;
+ uint64x2_t q3;
+ uint32x2_t d5;
+ int i;
+
+ q0 = vld1q_u8(src_ptr);
+ src_ptr += src_stride;
+ q4 = vld1q_u8(ref_ptr);
+ ref_ptr += ref_stride;
+ q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4));
+ q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4));
+
+ for (i = 0; i < 15; i++) {
+ q0 = vld1q_u8(src_ptr);
+ src_ptr += src_stride;
+ q4 = vld1q_u8(ref_ptr);
+ ref_ptr += ref_stride;
+ q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4));
+ q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4));
+ }
+
+ q12 = vaddq_u16(q12, q13);
+ q1 = vpaddlq_u16(q12);
+ q3 = vpaddlq_u32(q1);
+ d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
+ vreinterpret_u32_u64(vget_high_u64(q3)));
+
+ return vget_lane_u32(d5, 0);
+}
+
+unsigned int vp8_sad16x8_neon(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride) {
+ uint8x16_t q0, q4;
+ uint16x8_t q12, q13;
+ uint32x4_t q1;
+ uint64x2_t q3;
+ uint32x2_t d5;
+ int i;
+
+ q0 = vld1q_u8(src_ptr);
+ src_ptr += src_stride;
+ q4 = vld1q_u8(ref_ptr);
+ ref_ptr += ref_stride;
+ q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4));
+ q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4));
+
+ for (i = 0; i < 7; i++) {
+ q0 = vld1q_u8(src_ptr);
+ src_ptr += src_stride;
+ q4 = vld1q_u8(ref_ptr);
+ ref_ptr += ref_stride;
+ q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4));
+ q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4));
+ }
+
+ q12 = vaddq_u16(q12, q13);
+ q1 = vpaddlq_u16(q12);
+ q3 = vpaddlq_u32(q1);
+ d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
+ vreinterpret_u32_u64(vget_high_u64(q3)));
+
+ return vget_lane_u32(d5, 0);
+}
diff --git a/vp8/common/arm/neon/save_reg_neon.asm b/vp8/common/arm/neon/save_reg_neon.asm
deleted file mode 100644
index fd7002e..0000000
--- a/vp8/common/arm/neon/save_reg_neon.asm
+++ /dev/null
@@ -1,36 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_push_neon|
- EXPORT |vp8_pop_neon|
-
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-
-|vp8_push_neon| PROC
- vst1.i64 {d8, d9, d10, d11}, [r0]!
- vst1.i64 {d12, d13, d14, d15}, [r0]!
- bx lr
-
- ENDP
-
-|vp8_pop_neon| PROC
- vld1.i64 {d8, d9, d10, d11}, [r0]!
- vld1.i64 {d12, d13, d14, d15}, [r0]!
- bx lr
-
- ENDP
-
- END
-
diff --git a/vp8/common/arm/neon/shortidct4x4llm_neon.asm b/vp8/common/arm/neon/shortidct4x4llm_neon.asm
deleted file mode 100644
index 67d2ab0..0000000
--- a/vp8/common/arm/neon/shortidct4x4llm_neon.asm
+++ /dev/null
@@ -1,139 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_short_idct4x4llm_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-
-;*************************************************************
-;void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch,
-; unsigned char *dst, int stride)
-;r0 short * input
-;r1 short * pred
-;r2 int pitch
-;r3 unsigned char dst
-;sp int stride
-;*************************************************************
-
-; static const int cospi8sqrt2minus1=20091;
-; static const int sinpi8sqrt2 =35468;
-; static const int rounding = 0;
-
-; Optimization note: The resulted data from dequantization are signed
-; 13-bit data that is in the range of [-4096, 4095]. This allows to
-; use "vqdmulh"(neon) instruction since it won't go out of range
-; (13+16+1=30bits<32bits). This instruction gives the high half
-; result of the multiplication that is needed in IDCT.
-
-|vp8_short_idct4x4llm_neon| PROC
- adr r12, idct_coeff
- vld1.16 {q1, q2}, [r0]
- vld1.16 {d0}, [r12]
-
- vswp d3, d4 ;q2(vp[4] vp[12])
- ldr r0, [sp] ; stride
-
- vqdmulh.s16 q3, q2, d0[2]
- vqdmulh.s16 q4, q2, d0[0]
-
- vqadd.s16 d12, d2, d3 ;a1
- vqsub.s16 d13, d2, d3 ;b1
-
- vshr.s16 q3, q3, #1
- vshr.s16 q4, q4, #1
-
- vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
- vqadd.s16 q4, q4, q2
-
- ;d6 - c1:temp1
- ;d7 - d1:temp2
- ;d8 - d1:temp1
- ;d9 - c1:temp2
-
- vqsub.s16 d10, d6, d9 ;c1
- vqadd.s16 d11, d7, d8 ;d1
-
- vqadd.s16 d2, d12, d11
- vqadd.s16 d3, d13, d10
- vqsub.s16 d4, d13, d10
- vqsub.s16 d5, d12, d11
-
- vtrn.32 d2, d4
- vtrn.32 d3, d5
- vtrn.16 d2, d3
- vtrn.16 d4, d5
-
- vswp d3, d4
-
- vqdmulh.s16 q3, q2, d0[2]
- vqdmulh.s16 q4, q2, d0[0]
-
- vqadd.s16 d12, d2, d3 ;a1
- vqsub.s16 d13, d2, d3 ;b1
-
- vshr.s16 q3, q3, #1
- vshr.s16 q4, q4, #1
-
- vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
- vqadd.s16 q4, q4, q2
-
- vqsub.s16 d10, d6, d9 ;c1
- vqadd.s16 d11, d7, d8 ;d1
-
- vqadd.s16 d2, d12, d11
- vqadd.s16 d3, d13, d10
- vqsub.s16 d4, d13, d10
- vqsub.s16 d5, d12, d11
-
- vrshr.s16 d2, d2, #3
- vrshr.s16 d3, d3, #3
- vrshr.s16 d4, d4, #3
- vrshr.s16 d5, d5, #3
-
- vtrn.32 d2, d4
- vtrn.32 d3, d5
- vtrn.16 d2, d3
- vtrn.16 d4, d5
-
- ; load prediction data
- vld1.32 d6[0], [r1], r2
- vld1.32 d6[1], [r1], r2
- vld1.32 d7[0], [r1], r2
- vld1.32 d7[1], [r1], r2
-
- ; add prediction and residual
- vaddw.u8 q1, q1, d6
- vaddw.u8 q2, q2, d7
-
- vqmovun.s16 d1, q1
- vqmovun.s16 d2, q2
-
- ; store to destination
- vst1.32 d1[0], [r3], r0
- vst1.32 d1[1], [r3], r0
- vst1.32 d2[0], [r3], r0
- vst1.32 d2[1], [r3], r0
-
- bx lr
-
- ENDP
-
-;-----------------
-
-idct_coeff
- DCD 0x4e7b4e7b, 0x8a8c8a8c
-
-;20091, 20091, 35468, 35468
-
- END
diff --git a/vp8/common/arm/neon/shortidct4x4llm_neon.c b/vp8/common/arm/neon/shortidct4x4llm_neon.c
new file mode 100644
index 0000000..373afa6
--- /dev/null
+++ b/vp8/common/arm/neon/shortidct4x4llm_neon.c
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+static const int16_t cospi8sqrt2minus1 = 20091;
+static const int16_t sinpi8sqrt2 = 35468;
+
+void vp8_short_idct4x4llm_neon(
+ int16_t *input,
+ unsigned char *pred_ptr,
+ int pred_stride,
+ unsigned char *dst_ptr,
+ int dst_stride) {
+ int i;
+ uint32x2_t d6u32 = vdup_n_u32(0);
+ uint8x8_t d1u8;
+ int16x4_t d2, d3, d4, d5, d10, d11, d12, d13;
+ uint16x8_t q1u16;
+ int16x8_t q1s16, q2s16, q3s16, q4s16;
+ int32x2x2_t v2tmp0, v2tmp1;
+ int16x4x2_t v2tmp2, v2tmp3;
+
+ d2 = vld1_s16(input);
+ d3 = vld1_s16(input + 4);
+ d4 = vld1_s16(input + 8);
+ d5 = vld1_s16(input + 12);
+
+ // 1st for loop
+ q1s16 = vcombine_s16(d2, d4); // Swap d3 d4 here
+ q2s16 = vcombine_s16(d3, d5);
+
+ q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2);
+ q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1);
+
+ d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1
+ d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1
+
+ q3s16 = vshrq_n_s16(q3s16, 1);
+ q4s16 = vshrq_n_s16(q4s16, 1);
+
+ q3s16 = vqaddq_s16(q3s16, q2s16);
+ q4s16 = vqaddq_s16(q4s16, q2s16);
+
+ d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1
+ d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1
+
+ d2 = vqadd_s16(d12, d11);
+ d3 = vqadd_s16(d13, d10);
+ d4 = vqsub_s16(d13, d10);
+ d5 = vqsub_s16(d12, d11);
+
+ v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
+ v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
+ v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]),
+ vreinterpret_s16_s32(v2tmp1.val[0]));
+ v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]),
+ vreinterpret_s16_s32(v2tmp1.val[1]));
+
+ // 2nd for loop
+ q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp3.val[0]);
+ q2s16 = vcombine_s16(v2tmp2.val[1], v2tmp3.val[1]);
+
+ q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2);
+ q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1);
+
+ d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1
+ d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1
+
+ q3s16 = vshrq_n_s16(q3s16, 1);
+ q4s16 = vshrq_n_s16(q4s16, 1);
+
+ q3s16 = vqaddq_s16(q3s16, q2s16);
+ q4s16 = vqaddq_s16(q4s16, q2s16);
+
+ d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1
+ d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1
+
+ d2 = vqadd_s16(d12, d11);
+ d3 = vqadd_s16(d13, d10);
+ d4 = vqsub_s16(d13, d10);
+ d5 = vqsub_s16(d12, d11);
+
+ d2 = vrshr_n_s16(d2, 3);
+ d3 = vrshr_n_s16(d3, 3);
+ d4 = vrshr_n_s16(d4, 3);
+ d5 = vrshr_n_s16(d5, 3);
+
+ v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
+ v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
+ v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]),
+ vreinterpret_s16_s32(v2tmp1.val[0]));
+ v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]),
+ vreinterpret_s16_s32(v2tmp1.val[1]));
+
+ q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp2.val[1]);
+ q2s16 = vcombine_s16(v2tmp3.val[0], v2tmp3.val[1]);
+
+ // dc_only_idct_add
+ for (i = 0; i < 2; i++, q1s16 = q2s16) {
+ d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 0);
+ pred_ptr += pred_stride;
+ d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 1);
+ pred_ptr += pred_stride;
+
+ q1u16 = vaddw_u8(vreinterpretq_u16_s16(q1s16),
+ vreinterpret_u8_u32(d6u32));
+ d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
+
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 0);
+ dst_ptr += dst_stride;
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 1);
+ dst_ptr += dst_stride;
+ }
+ return;
+}
diff --git a/vp8/common/arm/neon/sixtappredict16x16_neon.asm b/vp8/common/arm/neon/sixtappredict16x16_neon.asm
deleted file mode 100644
index 9fdafd3..0000000
--- a/vp8/common/arm/neon/sixtappredict16x16_neon.asm
+++ /dev/null
@@ -1,490 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_sixtap_predict16x16_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-
-filter16_coeff
- DCD 0, 0, 128, 0, 0, 0, 0, 0
- DCD 0, -6, 123, 12, -1, 0, 0, 0
- DCD 2, -11, 108, 36, -8, 1, 0, 0
- DCD 0, -9, 93, 50, -6, 0, 0, 0
- DCD 3, -16, 77, 77, -16, 3, 0, 0
- DCD 0, -6, 50, 93, -9, 0, 0, 0
- DCD 1, -8, 36, 108, -11, 2, 0, 0
- DCD 0, -1, 12, 123, -6, 0, 0, 0
-
-; r0 unsigned char *src_ptr,
-; r1 int src_pixels_per_line,
-; r2 int xoffset,
-; r3 int yoffset,
-; r4 unsigned char *dst_ptr,
-; stack(r5) int dst_pitch
-
-;Note: To take advantage of 8-bit mulplication instruction in NEON. First apply abs() to
-; filter coeffs to make them u8. Then, use vmlsl for negtive coeffs. After multiplication,
-; the result can be negtive. So, I treat the result as s16. But, since it is also possible
-; that the result can be a large positive number (> 2^15-1), which could be confused as a
-; negtive number. To avoid that error, apply filter coeffs in the order of 0, 1, 4 ,5 ,2,
-; which ensures that the result stays in s16 range. Finally, saturated add the result by
-; applying 3rd filter coeff. Same applys to other filter functions.
-
-|vp8_sixtap_predict16x16_neon| PROC
- push {r4-r5, lr}
-
- adr r12, filter16_coeff
- ldr r4, [sp, #12] ;load parameters from stack
- ldr r5, [sp, #16] ;load parameters from stack
-
- cmp r2, #0 ;skip first_pass filter if xoffset=0
- beq secondpass_filter16x16_only
-
- add r2, r12, r2, lsl #5 ;calculate filter location
-
- cmp r3, #0 ;skip second_pass filter if yoffset=0
-
- vld1.s32 {q14, q15}, [r2] ;load first_pass filter
-
- beq firstpass_filter16x16_only
-
- sub sp, sp, #336 ;reserve space on stack for temporary storage
- mov lr, sp
-
- vabs.s32 q12, q14
- vabs.s32 q13, q15
-
- mov r2, #7 ;loop counter
- sub r0, r0, #2 ;move srcptr back to (line-2) and (column-2)
- sub r0, r0, r1, lsl #1
-
- vdup.8 d0, d24[0] ;first_pass filter (d0-d5)
- vdup.8 d1, d24[4]
- vdup.8 d2, d25[0]
- vdup.8 d3, d25[4]
- vdup.8 d4, d26[0]
- vdup.8 d5, d26[4]
-
-;First Pass: output_height lines x output_width columns (21x16)
-filt_blk2d_fp16x16_loop_neon
- vld1.u8 {d6, d7, d8}, [r0], r1 ;load src data
- vld1.u8 {d9, d10, d11}, [r0], r1
- vld1.u8 {d12, d13, d14}, [r0], r1
-
- pld [r0]
- pld [r0, r1]
- pld [r0, r1, lsl #1]
-
- vmull.u8 q8, d6, d0 ;(src_ptr[-2] * vp8_filter[0])
- vmull.u8 q9, d7, d0
- vmull.u8 q10, d9, d0
- vmull.u8 q11, d10, d0
- vmull.u8 q12, d12, d0
- vmull.u8 q13, d13, d0
-
- vext.8 d28, d6, d7, #1 ;construct src_ptr[-1]
- vext.8 d29, d9, d10, #1
- vext.8 d30, d12, d13, #1
-
- vmlsl.u8 q8, d28, d1 ;-(src_ptr[-1] * vp8_filter[1])
- vmlsl.u8 q10, d29, d1
- vmlsl.u8 q12, d30, d1
-
- vext.8 d28, d7, d8, #1
- vext.8 d29, d10, d11, #1
- vext.8 d30, d13, d14, #1
-
- vmlsl.u8 q9, d28, d1 ;-(src_ptr[-1] * vp8_filter[1])
- vmlsl.u8 q11, d29, d1
- vmlsl.u8 q13, d30, d1
-
- vext.8 d28, d6, d7, #4 ;construct src_ptr[2]
- vext.8 d29, d9, d10, #4
- vext.8 d30, d12, d13, #4
-
- vmlsl.u8 q8, d28, d4 ;-(src_ptr[2] * vp8_filter[4])
- vmlsl.u8 q10, d29, d4
- vmlsl.u8 q12, d30, d4
-
- vext.8 d28, d7, d8, #4
- vext.8 d29, d10, d11, #4
- vext.8 d30, d13, d14, #4
-
- vmlsl.u8 q9, d28, d4 ;-(src_ptr[2] * vp8_filter[4])
- vmlsl.u8 q11, d29, d4
- vmlsl.u8 q13, d30, d4
-
- vext.8 d28, d6, d7, #5 ;construct src_ptr[3]
- vext.8 d29, d9, d10, #5
- vext.8 d30, d12, d13, #5
-
- vmlal.u8 q8, d28, d5 ;(src_ptr[3] * vp8_filter[5])
- vmlal.u8 q10, d29, d5
- vmlal.u8 q12, d30, d5
-
- vext.8 d28, d7, d8, #5
- vext.8 d29, d10, d11, #5
- vext.8 d30, d13, d14, #5
-
- vmlal.u8 q9, d28, d5 ;(src_ptr[3] * vp8_filter[5])
- vmlal.u8 q11, d29, d5
- vmlal.u8 q13, d30, d5
-
- vext.8 d28, d6, d7, #2 ;construct src_ptr[0]
- vext.8 d29, d9, d10, #2
- vext.8 d30, d12, d13, #2
-
- vmlal.u8 q8, d28, d2 ;(src_ptr[0] * vp8_filter[2])
- vmlal.u8 q10, d29, d2
- vmlal.u8 q12, d30, d2
-
- vext.8 d28, d7, d8, #2
- vext.8 d29, d10, d11, #2
- vext.8 d30, d13, d14, #2
-
- vmlal.u8 q9, d28, d2 ;(src_ptr[0] * vp8_filter[2])
- vmlal.u8 q11, d29, d2
- vmlal.u8 q13, d30, d2
-
- vext.8 d28, d6, d7, #3 ;construct src_ptr[1]
- vext.8 d29, d9, d10, #3
- vext.8 d30, d12, d13, #3
-
- vext.8 d15, d7, d8, #3
- vext.8 d31, d10, d11, #3
- vext.8 d6, d13, d14, #3
-
- vmull.u8 q4, d28, d3 ;(src_ptr[1] * vp8_filter[3])
- vmull.u8 q5, d29, d3
- vmull.u8 q6, d30, d3
-
- vqadd.s16 q8, q4 ;sum of all (src_data*filter_parameters)
- vqadd.s16 q10, q5
- vqadd.s16 q12, q6
-
- vmull.u8 q6, d15, d3 ;(src_ptr[1] * vp8_filter[3])
- vmull.u8 q7, d31, d3
- vmull.u8 q3, d6, d3
-
- subs r2, r2, #1
-
- vqadd.s16 q9, q6
- vqadd.s16 q11, q7
- vqadd.s16 q13, q3
-
- vqrshrun.s16 d6, q8, #7 ;shift/round/saturate to u8
- vqrshrun.s16 d7, q9, #7
- vqrshrun.s16 d8, q10, #7
- vqrshrun.s16 d9, q11, #7
- vqrshrun.s16 d10, q12, #7
- vqrshrun.s16 d11, q13, #7
-
- vst1.u8 {d6, d7, d8}, [lr]! ;store result
- vst1.u8 {d9, d10, d11}, [lr]!
-
- bne filt_blk2d_fp16x16_loop_neon
-
-;Second pass: 16x16
-;secondpass_filter - do first 8-columns and then second 8-columns
- add r3, r12, r3, lsl #5
- sub lr, lr, #336
-
- vld1.s32 {q5, q6}, [r3] ;load second_pass filter
- mov r3, #2 ;loop counter
-
- vabs.s32 q7, q5
- vabs.s32 q8, q6
-
- mov r2, #16
-
- vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5)
- vdup.8 d1, d14[4]
- vdup.8 d2, d15[0]
- vdup.8 d3, d15[4]
- vdup.8 d4, d16[0]
- vdup.8 d5, d16[4]
-
-filt_blk2d_sp16x16_outloop_neon
- vld1.u8 {d18}, [lr], r2 ;load src data
- vld1.u8 {d19}, [lr], r2
- vld1.u8 {d20}, [lr], r2
- vld1.u8 {d21}, [lr], r2
- mov r12, #4 ;loop counter
- vld1.u8 {d22}, [lr], r2
-
-secondpass_inner_loop_neon
- vld1.u8 {d23}, [lr], r2 ;load src data
- vld1.u8 {d24}, [lr], r2
- vld1.u8 {d25}, [lr], r2
- vld1.u8 {d26}, [lr], r2
-
- vmull.u8 q3, d18, d0 ;(src_ptr[-2] * vp8_filter[0])
- vmull.u8 q4, d19, d0
- vmull.u8 q5, d20, d0
- vmull.u8 q6, d21, d0
-
- vmlsl.u8 q3, d19, d1 ;-(src_ptr[-1] * vp8_filter[1])
- vmlsl.u8 q4, d20, d1
- vmlsl.u8 q5, d21, d1
- vmlsl.u8 q6, d22, d1
-
- vmlsl.u8 q3, d22, d4 ;-(src_ptr[2] * vp8_filter[4])
- vmlsl.u8 q4, d23, d4
- vmlsl.u8 q5, d24, d4
- vmlsl.u8 q6, d25, d4
-
- vmlal.u8 q3, d20, d2 ;(src_ptr[0] * vp8_filter[2])
- vmlal.u8 q4, d21, d2
- vmlal.u8 q5, d22, d2
- vmlal.u8 q6, d23, d2
-
- vmlal.u8 q3, d23, d5 ;(src_ptr[3] * vp8_filter[5])
- vmlal.u8 q4, d24, d5
- vmlal.u8 q5, d25, d5
- vmlal.u8 q6, d26, d5
-
- vmull.u8 q7, d21, d3 ;(src_ptr[1] * vp8_filter[3])
- vmull.u8 q8, d22, d3
- vmull.u8 q9, d23, d3
- vmull.u8 q10, d24, d3
-
- subs r12, r12, #1
-
- vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters)
- vqadd.s16 q8, q4
- vqadd.s16 q9, q5
- vqadd.s16 q10, q6
-
- vqrshrun.s16 d6, q7, #7 ;shift/round/saturate to u8
- vqrshrun.s16 d7, q8, #7
- vqrshrun.s16 d8, q9, #7
- vqrshrun.s16 d9, q10, #7
-
- vst1.u8 {d6}, [r4], r5 ;store result
- vmov q9, q11
- vst1.u8 {d7}, [r4], r5
- vmov q10, q12
- vst1.u8 {d8}, [r4], r5
- vmov d22, d26
- vst1.u8 {d9}, [r4], r5
-
- bne secondpass_inner_loop_neon
-
- subs r3, r3, #1
- sub lr, lr, #336
- add lr, lr, #8
-
- sub r4, r4, r5, lsl #4
- add r4, r4, #8
-
- bne filt_blk2d_sp16x16_outloop_neon
-
- add sp, sp, #336
- pop {r4-r5,pc}
-
-;--------------------
-firstpass_filter16x16_only
- vabs.s32 q12, q14
- vabs.s32 q13, q15
-
- mov r2, #8 ;loop counter
- sub r0, r0, #2 ;move srcptr back to (column-2)
-
- vdup.8 d0, d24[0] ;first_pass filter (d0-d5)
- vdup.8 d1, d24[4]
- vdup.8 d2, d25[0]
- vdup.8 d3, d25[4]
- vdup.8 d4, d26[0]
- vdup.8 d5, d26[4]
-
-;First Pass: output_height lines x output_width columns (16x16)
-filt_blk2d_fpo16x16_loop_neon
- vld1.u8 {d6, d7, d8}, [r0], r1 ;load src data
- vld1.u8 {d9, d10, d11}, [r0], r1
-
- pld [r0]
- pld [r0, r1]
-
- vmull.u8 q6, d6, d0 ;(src_ptr[-2] * vp8_filter[0])
- vmull.u8 q7, d7, d0
- vmull.u8 q8, d9, d0
- vmull.u8 q9, d10, d0
-
- vext.8 d20, d6, d7, #1 ;construct src_ptr[-1]
- vext.8 d21, d9, d10, #1
- vext.8 d22, d7, d8, #1
- vext.8 d23, d10, d11, #1
- vext.8 d24, d6, d7, #4 ;construct src_ptr[2]
- vext.8 d25, d9, d10, #4
- vext.8 d26, d7, d8, #4
- vext.8 d27, d10, d11, #4
- vext.8 d28, d6, d7, #5 ;construct src_ptr[3]
- vext.8 d29, d9, d10, #5
-
- vmlsl.u8 q6, d20, d1 ;-(src_ptr[-1] * vp8_filter[1])
- vmlsl.u8 q8, d21, d1
- vmlsl.u8 q7, d22, d1 ;-(src_ptr[-1] * vp8_filter[1])
- vmlsl.u8 q9, d23, d1
- vmlsl.u8 q6, d24, d4 ;-(src_ptr[2] * vp8_filter[4])
- vmlsl.u8 q8, d25, d4
- vmlsl.u8 q7, d26, d4 ;-(src_ptr[2] * vp8_filter[4])
- vmlsl.u8 q9, d27, d4
- vmlal.u8 q6, d28, d5 ;(src_ptr[3] * vp8_filter[5])
- vmlal.u8 q8, d29, d5
-
- vext.8 d20, d7, d8, #5
- vext.8 d21, d10, d11, #5
- vext.8 d22, d6, d7, #2 ;construct src_ptr[0]
- vext.8 d23, d9, d10, #2
- vext.8 d24, d7, d8, #2
- vext.8 d25, d10, d11, #2
-
- vext.8 d26, d6, d7, #3 ;construct src_ptr[1]
- vext.8 d27, d9, d10, #3
- vext.8 d28, d7, d8, #3
- vext.8 d29, d10, d11, #3
-
- vmlal.u8 q7, d20, d5 ;(src_ptr[3] * vp8_filter[5])
- vmlal.u8 q9, d21, d5
- vmlal.u8 q6, d22, d2 ;(src_ptr[0] * vp8_filter[2])
- vmlal.u8 q8, d23, d2
- vmlal.u8 q7, d24, d2 ;(src_ptr[0] * vp8_filter[2])
- vmlal.u8 q9, d25, d2
-
- vmull.u8 q10, d26, d3 ;(src_ptr[1] * vp8_filter[3])
- vmull.u8 q11, d27, d3
- vmull.u8 q12, d28, d3 ;(src_ptr[1] * vp8_filter[3])
- vmull.u8 q15, d29, d3
-
- vqadd.s16 q6, q10 ;sum of all (src_data*filter_parameters)
- vqadd.s16 q8, q11
- vqadd.s16 q7, q12
- vqadd.s16 q9, q15
-
- subs r2, r2, #1
-
- vqrshrun.s16 d6, q6, #7 ;shift/round/saturate to u8
- vqrshrun.s16 d7, q7, #7
- vqrshrun.s16 d8, q8, #7
- vqrshrun.s16 d9, q9, #7
-
- vst1.u8 {q3}, [r4], r5 ;store result
- vst1.u8 {q4}, [r4], r5
-
- bne filt_blk2d_fpo16x16_loop_neon
-
- pop {r4-r5,pc}
-
-;--------------------
-secondpass_filter16x16_only
-;Second pass: 16x16
- add r3, r12, r3, lsl #5
- sub r0, r0, r1, lsl #1
-
- vld1.s32 {q5, q6}, [r3] ;load second_pass filter
- mov r3, #2 ;loop counter
-
- vabs.s32 q7, q5
- vabs.s32 q8, q6
-
- vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5)
- vdup.8 d1, d14[4]
- vdup.8 d2, d15[0]
- vdup.8 d3, d15[4]
- vdup.8 d4, d16[0]
- vdup.8 d5, d16[4]
-
-filt_blk2d_spo16x16_outloop_neon
- vld1.u8 {d18}, [r0], r1 ;load src data
- vld1.u8 {d19}, [r0], r1
- vld1.u8 {d20}, [r0], r1
- vld1.u8 {d21}, [r0], r1
- mov r12, #4 ;loop counter
- vld1.u8 {d22}, [r0], r1
-
-secondpass_only_inner_loop_neon
- vld1.u8 {d23}, [r0], r1 ;load src data
- vld1.u8 {d24}, [r0], r1
- vld1.u8 {d25}, [r0], r1
- vld1.u8 {d26}, [r0], r1
-
- vmull.u8 q3, d18, d0 ;(src_ptr[-2] * vp8_filter[0])
- vmull.u8 q4, d19, d0
- vmull.u8 q5, d20, d0
- vmull.u8 q6, d21, d0
-
- vmlsl.u8 q3, d19, d1 ;-(src_ptr[-1] * vp8_filter[1])
- vmlsl.u8 q4, d20, d1
- vmlsl.u8 q5, d21, d1
- vmlsl.u8 q6, d22, d1
-
- vmlsl.u8 q3, d22, d4 ;-(src_ptr[2] * vp8_filter[4])
- vmlsl.u8 q4, d23, d4
- vmlsl.u8 q5, d24, d4
- vmlsl.u8 q6, d25, d4
-
- vmlal.u8 q3, d20, d2 ;(src_ptr[0] * vp8_filter[2])
- vmlal.u8 q4, d21, d2
- vmlal.u8 q5, d22, d2
- vmlal.u8 q6, d23, d2
-
- vmlal.u8 q3, d23, d5 ;(src_ptr[3] * vp8_filter[5])
- vmlal.u8 q4, d24, d5
- vmlal.u8 q5, d25, d5
- vmlal.u8 q6, d26, d5
-
- vmull.u8 q7, d21, d3 ;(src_ptr[1] * vp8_filter[3])
- vmull.u8 q8, d22, d3
- vmull.u8 q9, d23, d3
- vmull.u8 q10, d24, d3
-
- subs r12, r12, #1
-
- vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters)
- vqadd.s16 q8, q4
- vqadd.s16 q9, q5
- vqadd.s16 q10, q6
-
- vqrshrun.s16 d6, q7, #7 ;shift/round/saturate to u8
- vqrshrun.s16 d7, q8, #7
- vqrshrun.s16 d8, q9, #7
- vqrshrun.s16 d9, q10, #7
-
- vst1.u8 {d6}, [r4], r5 ;store result
- vmov q9, q11
- vst1.u8 {d7}, [r4], r5
- vmov q10, q12
- vst1.u8 {d8}, [r4], r5
- vmov d22, d26
- vst1.u8 {d9}, [r4], r5
-
- bne secondpass_only_inner_loop_neon
-
- subs r3, r3, #1
- sub r0, r0, r1, lsl #4
- sub r0, r0, r1, lsl #2
- sub r0, r0, r1
- add r0, r0, #8
-
- sub r4, r4, r5, lsl #4
- add r4, r4, #8
-
- bne filt_blk2d_spo16x16_outloop_neon
-
- pop {r4-r5,pc}
-
- ENDP
-
-;-----------------
- END
diff --git a/vp8/common/arm/neon/sixtappredict4x4_neon.asm b/vp8/common/arm/neon/sixtappredict4x4_neon.asm
deleted file mode 100644
index a4222bc..0000000
--- a/vp8/common/arm/neon/sixtappredict4x4_neon.asm
+++ /dev/null
@@ -1,422 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_sixtap_predict4x4_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-
-filter4_coeff
- DCD 0, 0, 128, 0, 0, 0, 0, 0
- DCD 0, -6, 123, 12, -1, 0, 0, 0
- DCD 2, -11, 108, 36, -8, 1, 0, 0
- DCD 0, -9, 93, 50, -6, 0, 0, 0
- DCD 3, -16, 77, 77, -16, 3, 0, 0
- DCD 0, -6, 50, 93, -9, 0, 0, 0
- DCD 1, -8, 36, 108, -11, 2, 0, 0
- DCD 0, -1, 12, 123, -6, 0, 0, 0
-
-; r0 unsigned char *src_ptr,
-; r1 int src_pixels_per_line,
-; r2 int xoffset,
-; r3 int yoffset,
-; stack(r4) unsigned char *dst_ptr,
-; stack(lr) int dst_pitch
-
-|vp8_sixtap_predict4x4_neon| PROC
- push {r4, lr}
-
- adr r12, filter4_coeff
- ldr r4, [sp, #8] ;load parameters from stack
- ldr lr, [sp, #12] ;load parameters from stack
-
- cmp r2, #0 ;skip first_pass filter if xoffset=0
- beq secondpass_filter4x4_only
-
- add r2, r12, r2, lsl #5 ;calculate filter location
-
- cmp r3, #0 ;skip second_pass filter if yoffset=0
- vld1.s32 {q14, q15}, [r2] ;load first_pass filter
-
- beq firstpass_filter4x4_only
-
- vabs.s32 q12, q14 ;get abs(filer_parameters)
- vabs.s32 q13, q15
-
- sub r0, r0, #2 ;go back 2 columns of src data
- sub r0, r0, r1, lsl #1 ;go back 2 lines of src data
-
-;First pass: output_height lines x output_width columns (9x4)
- vld1.u8 {q3}, [r0], r1 ;load first 4-line src data
- vdup.8 d0, d24[0] ;first_pass filter (d0-d5)
- vld1.u8 {q4}, [r0], r1
- vdup.8 d1, d24[4]
- vld1.u8 {q5}, [r0], r1
- vdup.8 d2, d25[0]
- vld1.u8 {q6}, [r0], r1
- vdup.8 d3, d25[4]
- vdup.8 d4, d26[0]
- vdup.8 d5, d26[4]
-
- pld [r0]
- pld [r0, r1]
- pld [r0, r1, lsl #1]
-
- vext.8 d18, d6, d7, #5 ;construct src_ptr[3]
- vext.8 d19, d8, d9, #5
- vext.8 d20, d10, d11, #5
- vext.8 d21, d12, d13, #5
-
- vswp d7, d8 ;discard 2nd half data after src_ptr[3] is done
- vswp d11, d12
-
- vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[3])
- vzip.32 d20, d21
- vmull.u8 q7, d18, d5 ;(src_ptr[3] * vp8_filter[5])
- vmull.u8 q8, d20, d5
-
- vmov q4, q3 ;keep original src data in q4 q6
- vmov q6, q5
-
- vzip.32 d6, d7 ;construct src_ptr[-2], and put 2-line data together
- vzip.32 d10, d11
- vshr.u64 q9, q4, #8 ;construct src_ptr[-1]
- vshr.u64 q10, q6, #8
- vmlal.u8 q7, d6, d0 ;+(src_ptr[-2] * vp8_filter[0])
- vmlal.u8 q8, d10, d0
-
- vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[-1])
- vzip.32 d20, d21
- vshr.u64 q3, q4, #32 ;construct src_ptr[2]
- vshr.u64 q5, q6, #32
- vmlsl.u8 q7, d18, d1 ;-(src_ptr[-1] * vp8_filter[1])
- vmlsl.u8 q8, d20, d1
-
- vzip.32 d6, d7 ;put 2-line data in 1 register (src_ptr[2])
- vzip.32 d10, d11
- vshr.u64 q9, q4, #16 ;construct src_ptr[0]
- vshr.u64 q10, q6, #16
- vmlsl.u8 q7, d6, d4 ;-(src_ptr[2] * vp8_filter[4])
- vmlsl.u8 q8, d10, d4
-
- vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[0])
- vzip.32 d20, d21
- vshr.u64 q3, q4, #24 ;construct src_ptr[1]
- vshr.u64 q5, q6, #24
- vmlal.u8 q7, d18, d2 ;(src_ptr[0] * vp8_filter[2])
- vmlal.u8 q8, d20, d2
-
- vzip.32 d6, d7 ;put 2-line data in 1 register (src_ptr[1])
- vzip.32 d10, d11
- vmull.u8 q9, d6, d3 ;(src_ptr[1] * vp8_filter[3])
- vmull.u8 q10, d10, d3
-
- vld1.u8 {q3}, [r0], r1 ;load rest 5-line src data
- vld1.u8 {q4}, [r0], r1
-
- vqadd.s16 q7, q9 ;sum of all (src_data*filter_parameters)
- vqadd.s16 q8, q10
-
- vld1.u8 {q5}, [r0], r1
- vld1.u8 {q6}, [r0], r1
-
- vqrshrun.s16 d27, q7, #7 ;shift/round/saturate to u8
- vqrshrun.s16 d28, q8, #7
-
- ;First Pass on rest 5-line data
- vld1.u8 {q11}, [r0], r1
-
- vext.8 d18, d6, d7, #5 ;construct src_ptr[3]
- vext.8 d19, d8, d9, #5
- vext.8 d20, d10, d11, #5
- vext.8 d21, d12, d13, #5
-
- vswp d7, d8 ;discard 2nd half data after src_ptr[3] is done
- vswp d11, d12
-
- vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[3])
- vzip.32 d20, d21
- vext.8 d31, d22, d23, #5 ;construct src_ptr[3]
- vmull.u8 q7, d18, d5 ;(src_ptr[3] * vp8_filter[5])
- vmull.u8 q8, d20, d5
- vmull.u8 q12, d31, d5 ;(src_ptr[3] * vp8_filter[5])
-
- vmov q4, q3 ;keep original src data in q4 q6
- vmov q6, q5
-
- vzip.32 d6, d7 ;construct src_ptr[-2], and put 2-line data together
- vzip.32 d10, d11
- vshr.u64 q9, q4, #8 ;construct src_ptr[-1]
- vshr.u64 q10, q6, #8
-
- vmlal.u8 q7, d6, d0 ;+(src_ptr[-2] * vp8_filter[0])
- vmlal.u8 q8, d10, d0
- vmlal.u8 q12, d22, d0 ;(src_ptr[-2] * vp8_filter[0])
-
- vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[-1])
- vzip.32 d20, d21
- vshr.u64 q3, q4, #32 ;construct src_ptr[2]
- vshr.u64 q5, q6, #32
- vext.8 d31, d22, d23, #1 ;construct src_ptr[-1]
-
- vmlsl.u8 q7, d18, d1 ;-(src_ptr[-1] * vp8_filter[1])
- vmlsl.u8 q8, d20, d1
- vmlsl.u8 q12, d31, d1 ;-(src_ptr[-1] * vp8_filter[1])
-
- vzip.32 d6, d7 ;put 2-line data in 1 register (src_ptr[2])
- vzip.32 d10, d11
- vshr.u64 q9, q4, #16 ;construct src_ptr[0]
- vshr.u64 q10, q6, #16
- vext.8 d31, d22, d23, #4 ;construct src_ptr[2]
-
- vmlsl.u8 q7, d6, d4 ;-(src_ptr[2] * vp8_filter[4])
- vmlsl.u8 q8, d10, d4
- vmlsl.u8 q12, d31, d4 ;-(src_ptr[2] * vp8_filter[4])
-
- vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[0])
- vzip.32 d20, d21
- vshr.u64 q3, q4, #24 ;construct src_ptr[1]
- vshr.u64 q5, q6, #24
- vext.8 d31, d22, d23, #2 ;construct src_ptr[0]
-
- vmlal.u8 q7, d18, d2 ;(src_ptr[0] * vp8_filter[2])
- vmlal.u8 q8, d20, d2
- vmlal.u8 q12, d31, d2 ;(src_ptr[0] * vp8_filter[2])
-
- vzip.32 d6, d7 ;put 2-line data in 1 register (src_ptr[1])
- vzip.32 d10, d11
- vext.8 d31, d22, d23, #3 ;construct src_ptr[1]
- vmull.u8 q9, d6, d3 ;(src_ptr[1] * vp8_filter[3])
- vmull.u8 q10, d10, d3
- vmull.u8 q11, d31, d3 ;(src_ptr[1] * vp8_filter[3])
-
- add r3, r12, r3, lsl #5
-
- vqadd.s16 q7, q9 ;sum of all (src_data*filter_parameters)
- vqadd.s16 q8, q10
- vqadd.s16 q12, q11
-
- vext.8 d23, d27, d28, #4
- vld1.s32 {q5, q6}, [r3] ;load second_pass filter
-
- vqrshrun.s16 d29, q7, #7 ;shift/round/saturate to u8
- vqrshrun.s16 d30, q8, #7
- vqrshrun.s16 d31, q12, #7
-
-;Second pass: 4x4
- vabs.s32 q7, q5
- vabs.s32 q8, q6
-
- vext.8 d24, d28, d29, #4
- vext.8 d25, d29, d30, #4
- vext.8 d26, d30, d31, #4
-
- vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5)
- vdup.8 d1, d14[4]
- vdup.8 d2, d15[0]
- vdup.8 d3, d15[4]
- vdup.8 d4, d16[0]
- vdup.8 d5, d16[4]
-
- vmull.u8 q3, d27, d0 ;(src_ptr[-2] * vp8_filter[0])
- vmull.u8 q4, d28, d0
-
- vmull.u8 q5, d25, d5 ;(src_ptr[3] * vp8_filter[5])
- vmull.u8 q6, d26, d5
-
- vmlsl.u8 q3, d29, d4 ;-(src_ptr[2] * vp8_filter[4])
- vmlsl.u8 q4, d30, d4
-
- vmlsl.u8 q5, d23, d1 ;-(src_ptr[-1] * vp8_filter[1])
- vmlsl.u8 q6, d24, d1
-
- vmlal.u8 q3, d28, d2 ;(src_ptr[0] * vp8_filter[2])
- vmlal.u8 q4, d29, d2
-
- vmlal.u8 q5, d24, d3 ;(src_ptr[1] * vp8_filter[3])
- vmlal.u8 q6, d25, d3
-
- add r0, r4, lr
- add r1, r0, lr
- add r2, r1, lr
-
- vqadd.s16 q5, q3 ;sum of all (src_data*filter_parameters)
- vqadd.s16 q6, q4
-
- vqrshrun.s16 d3, q5, #7 ;shift/round/saturate to u8
- vqrshrun.s16 d4, q6, #7
-
- vst1.32 {d3[0]}, [r4] ;store result
- vst1.32 {d3[1]}, [r0]
- vst1.32 {d4[0]}, [r1]
- vst1.32 {d4[1]}, [r2]
-
- pop {r4, pc}
-
-
-;---------------------
-firstpass_filter4x4_only
- vabs.s32 q12, q14 ;get abs(filer_parameters)
- vabs.s32 q13, q15
-
- sub r0, r0, #2 ;go back 2 columns of src data
-
-;First pass: output_height lines x output_width columns (4x4)
- vld1.u8 {q3}, [r0], r1 ;load first 4-line src data
- vdup.8 d0, d24[0] ;first_pass filter (d0-d5)
- vld1.u8 {q4}, [r0], r1
- vdup.8 d1, d24[4]
- vld1.u8 {q5}, [r0], r1
- vdup.8 d2, d25[0]
- vld1.u8 {q6}, [r0], r1
-
- vdup.8 d3, d25[4]
- vdup.8 d4, d26[0]
- vdup.8 d5, d26[4]
-
- vext.8 d18, d6, d7, #5 ;construct src_ptr[3]
- vext.8 d19, d8, d9, #5
- vext.8 d20, d10, d11, #5
- vext.8 d21, d12, d13, #5
-
- vswp d7, d8 ;discard 2nd half data after src_ptr[3] is done
- vswp d11, d12
-
- vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[3])
- vzip.32 d20, d21
- vmull.u8 q7, d18, d5 ;(src_ptr[3] * vp8_filter[5])
- vmull.u8 q8, d20, d5
-
- vmov q4, q3 ;keep original src data in q4 q6
- vmov q6, q5
-
- vzip.32 d6, d7 ;construct src_ptr[-2], and put 2-line data together
- vzip.32 d10, d11
- vshr.u64 q9, q4, #8 ;construct src_ptr[-1]
- vshr.u64 q10, q6, #8
- vmlal.u8 q7, d6, d0 ;+(src_ptr[-2] * vp8_filter[0])
- vmlal.u8 q8, d10, d0
-
- vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[-1])
- vzip.32 d20, d21
- vshr.u64 q3, q4, #32 ;construct src_ptr[2]
- vshr.u64 q5, q6, #32
- vmlsl.u8 q7, d18, d1 ;-(src_ptr[-1] * vp8_filter[1])
- vmlsl.u8 q8, d20, d1
-
- vzip.32 d6, d7 ;put 2-line data in 1 register (src_ptr[2])
- vzip.32 d10, d11
- vshr.u64 q9, q4, #16 ;construct src_ptr[0]
- vshr.u64 q10, q6, #16
- vmlsl.u8 q7, d6, d4 ;-(src_ptr[2] * vp8_filter[4])
- vmlsl.u8 q8, d10, d4
-
- vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[0])
- vzip.32 d20, d21
- vshr.u64 q3, q4, #24 ;construct src_ptr[1]
- vshr.u64 q5, q6, #24
- vmlal.u8 q7, d18, d2 ;(src_ptr[0] * vp8_filter[2])
- vmlal.u8 q8, d20, d2
-
- vzip.32 d6, d7 ;put 2-line data in 1 register (src_ptr[1])
- vzip.32 d10, d11
- vmull.u8 q9, d6, d3 ;(src_ptr[1] * vp8_filter[3])
- vmull.u8 q10, d10, d3
-
- add r0, r4, lr
- add r1, r0, lr
- add r2, r1, lr
-
- vqadd.s16 q7, q9 ;sum of all (src_data*filter_parameters)
- vqadd.s16 q8, q10
-
- vqrshrun.s16 d27, q7, #7 ;shift/round/saturate to u8
- vqrshrun.s16 d28, q8, #7
-
- vst1.32 {d27[0]}, [r4] ;store result
- vst1.32 {d27[1]}, [r0]
- vst1.32 {d28[0]}, [r1]
- vst1.32 {d28[1]}, [r2]
-
- pop {r4, pc}
-
-
-;---------------------
-secondpass_filter4x4_only
- sub r0, r0, r1, lsl #1
- add r3, r12, r3, lsl #5
-
- vld1.32 {d27[0]}, [r0], r1 ;load src data
- vld1.s32 {q5, q6}, [r3] ;load second_pass filter
- vld1.32 {d27[1]}, [r0], r1
- vabs.s32 q7, q5
- vld1.32 {d28[0]}, [r0], r1
- vabs.s32 q8, q6
- vld1.32 {d28[1]}, [r0], r1
- vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5)
- vld1.32 {d29[0]}, [r0], r1
- vdup.8 d1, d14[4]
- vld1.32 {d29[1]}, [r0], r1
- vdup.8 d2, d15[0]
- vld1.32 {d30[0]}, [r0], r1
- vdup.8 d3, d15[4]
- vld1.32 {d30[1]}, [r0], r1
- vdup.8 d4, d16[0]
- vld1.32 {d31[0]}, [r0], r1
- vdup.8 d5, d16[4]
-
- vext.8 d23, d27, d28, #4
- vext.8 d24, d28, d29, #4
- vext.8 d25, d29, d30, #4
- vext.8 d26, d30, d31, #4
-
- vmull.u8 q3, d27, d0 ;(src_ptr[-2] * vp8_filter[0])
- vmull.u8 q4, d28, d0
-
- vmull.u8 q5, d25, d5 ;(src_ptr[3] * vp8_filter[5])
- vmull.u8 q6, d26, d5
-
- vmlsl.u8 q3, d29, d4 ;-(src_ptr[2] * vp8_filter[4])
- vmlsl.u8 q4, d30, d4
-
- vmlsl.u8 q5, d23, d1 ;-(src_ptr[-1] * vp8_filter[1])
- vmlsl.u8 q6, d24, d1
-
- vmlal.u8 q3, d28, d2 ;(src_ptr[0] * vp8_filter[2])
- vmlal.u8 q4, d29, d2
-
- vmlal.u8 q5, d24, d3 ;(src_ptr[1] * vp8_filter[3])
- vmlal.u8 q6, d25, d3
-
- add r0, r4, lr
- add r1, r0, lr
- add r2, r1, lr
-
- vqadd.s16 q5, q3 ;sum of all (src_data*filter_parameters)
- vqadd.s16 q6, q4
-
- vqrshrun.s16 d3, q5, #7 ;shift/round/saturate to u8
- vqrshrun.s16 d4, q6, #7
-
- vst1.32 {d3[0]}, [r4] ;store result
- vst1.32 {d3[1]}, [r0]
- vst1.32 {d4[0]}, [r1]
- vst1.32 {d4[1]}, [r2]
-
- pop {r4, pc}
-
- ENDP
-
-;-----------------
-
- END
diff --git a/vp8/common/arm/neon/sixtappredict8x4_neon.asm b/vp8/common/arm/neon/sixtappredict8x4_neon.asm
deleted file mode 100644
index a57ec01..0000000
--- a/vp8/common/arm/neon/sixtappredict8x4_neon.asm
+++ /dev/null
@@ -1,473 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_sixtap_predict8x4_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-
-filter8_coeff
- DCD 0, 0, 128, 0, 0, 0, 0, 0
- DCD 0, -6, 123, 12, -1, 0, 0, 0
- DCD 2, -11, 108, 36, -8, 1, 0, 0
- DCD 0, -9, 93, 50, -6, 0, 0, 0
- DCD 3, -16, 77, 77, -16, 3, 0, 0
- DCD 0, -6, 50, 93, -9, 0, 0, 0
- DCD 1, -8, 36, 108, -11, 2, 0, 0
- DCD 0, -1, 12, 123, -6, 0, 0, 0
-
-; r0 unsigned char *src_ptr,
-; r1 int src_pixels_per_line,
-; r2 int xoffset,
-; r3 int yoffset,
-; r4 unsigned char *dst_ptr,
-; stack(r5) int dst_pitch
-
-|vp8_sixtap_predict8x4_neon| PROC
- push {r4-r5, lr}
-
- adr r12, filter8_coeff
- ldr r4, [sp, #12] ;load parameters from stack
- ldr r5, [sp, #16] ;load parameters from stack
-
- cmp r2, #0 ;skip first_pass filter if xoffset=0
- beq secondpass_filter8x4_only
-
- add r2, r12, r2, lsl #5 ;calculate filter location
-
- cmp r3, #0 ;skip second_pass filter if yoffset=0
-
- vld1.s32 {q14, q15}, [r2] ;load first_pass filter
-
- beq firstpass_filter8x4_only
-
- sub sp, sp, #32 ;reserve space on stack for temporary storage
- vabs.s32 q12, q14
- vabs.s32 q13, q15
-
- sub r0, r0, #2 ;move srcptr back to (line-2) and (column-2)
- mov lr, sp
- sub r0, r0, r1, lsl #1
-
- vdup.8 d0, d24[0] ;first_pass filter (d0-d5)
- vdup.8 d1, d24[4]
- vdup.8 d2, d25[0]
-
-;First pass: output_height lines x output_width columns (9x8)
- vld1.u8 {q3}, [r0], r1 ;load src data
- vdup.8 d3, d25[4]
- vld1.u8 {q4}, [r0], r1
- vdup.8 d4, d26[0]
- vld1.u8 {q5}, [r0], r1
- vdup.8 d5, d26[4]
- vld1.u8 {q6}, [r0], r1
-
- pld [r0]
- pld [r0, r1]
- pld [r0, r1, lsl #1]
-
- vmull.u8 q7, d6, d0 ;(src_ptr[-2] * vp8_filter[0])
- vmull.u8 q8, d8, d0
- vmull.u8 q9, d10, d0
- vmull.u8 q10, d12, d0
-
- vext.8 d28, d6, d7, #1 ;construct src_ptr[-1]
- vext.8 d29, d8, d9, #1
- vext.8 d30, d10, d11, #1
- vext.8 d31, d12, d13, #1
-
- vmlsl.u8 q7, d28, d1 ;-(src_ptr[-1] * vp8_filter[1])
- vmlsl.u8 q8, d29, d1
- vmlsl.u8 q9, d30, d1
- vmlsl.u8 q10, d31, d1
-
- vext.8 d28, d6, d7, #4 ;construct src_ptr[2]
- vext.8 d29, d8, d9, #4
- vext.8 d30, d10, d11, #4
- vext.8 d31, d12, d13, #4
-
- vmlsl.u8 q7, d28, d4 ;-(src_ptr[2] * vp8_filter[4])
- vmlsl.u8 q8, d29, d4
- vmlsl.u8 q9, d30, d4
- vmlsl.u8 q10, d31, d4
-
- vext.8 d28, d6, d7, #2 ;construct src_ptr[0]
- vext.8 d29, d8, d9, #2
- vext.8 d30, d10, d11, #2
- vext.8 d31, d12, d13, #2
-
- vmlal.u8 q7, d28, d2 ;(src_ptr[0] * vp8_filter[2])
- vmlal.u8 q8, d29, d2
- vmlal.u8 q9, d30, d2
- vmlal.u8 q10, d31, d2
-
- vext.8 d28, d6, d7, #5 ;construct src_ptr[3]
- vext.8 d29, d8, d9, #5
- vext.8 d30, d10, d11, #5
- vext.8 d31, d12, d13, #5
-
- vmlal.u8 q7, d28, d5 ;(src_ptr[3] * vp8_filter[5])
- vmlal.u8 q8, d29, d5
- vmlal.u8 q9, d30, d5
- vmlal.u8 q10, d31, d5
-
- vext.8 d28, d6, d7, #3 ;construct src_ptr[1]
- vext.8 d29, d8, d9, #3
- vext.8 d30, d10, d11, #3
- vext.8 d31, d12, d13, #3
-
- vmull.u8 q3, d28, d3 ;(src_ptr[1] * vp8_filter[3])
- vmull.u8 q4, d29, d3
- vmull.u8 q5, d30, d3
- vmull.u8 q6, d31, d3
-
- vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters)
- vqadd.s16 q8, q4
- vqadd.s16 q9, q5
- vqadd.s16 q10, q6
-
- vld1.u8 {q3}, [r0], r1 ;load src data
-
- vqrshrun.s16 d22, q7, #7 ;shift/round/saturate to u8
- vqrshrun.s16 d23, q8, #7
- vqrshrun.s16 d24, q9, #7
- vqrshrun.s16 d25, q10, #7
-
- vld1.u8 {q4}, [r0], r1
- vst1.u8 {d22}, [lr]! ;store result
- vld1.u8 {q5}, [r0], r1
- vst1.u8 {d23}, [lr]!
- vld1.u8 {q6}, [r0], r1
- vst1.u8 {d24}, [lr]!
- vld1.u8 {q7}, [r0], r1
- vst1.u8 {d25}, [lr]!
-
- ;first_pass filtering on the rest 5-line data
- vmull.u8 q8, d6, d0 ;(src_ptr[-2] * vp8_filter[0])
- vmull.u8 q9, d8, d0
- vmull.u8 q10, d10, d0
- vmull.u8 q11, d12, d0
- vmull.u8 q12, d14, d0
-
- vext.8 d27, d6, d7, #1 ;construct src_ptr[-1]
- vext.8 d28, d8, d9, #1
- vext.8 d29, d10, d11, #1
- vext.8 d30, d12, d13, #1
- vext.8 d31, d14, d15, #1
-
- vmlsl.u8 q8, d27, d1 ;-(src_ptr[-1] * vp8_filter[1])
- vmlsl.u8 q9, d28, d1
- vmlsl.u8 q10, d29, d1
- vmlsl.u8 q11, d30, d1
- vmlsl.u8 q12, d31, d1
-
- vext.8 d27, d6, d7, #4 ;construct src_ptr[2]
- vext.8 d28, d8, d9, #4
- vext.8 d29, d10, d11, #4
- vext.8 d30, d12, d13, #4
- vext.8 d31, d14, d15, #4
-
- vmlsl.u8 q8, d27, d4 ;-(src_ptr[2] * vp8_filter[4])
- vmlsl.u8 q9, d28, d4
- vmlsl.u8 q10, d29, d4
- vmlsl.u8 q11, d30, d4
- vmlsl.u8 q12, d31, d4
-
- vext.8 d27, d6, d7, #2 ;construct src_ptr[0]
- vext.8 d28, d8, d9, #2
- vext.8 d29, d10, d11, #2
- vext.8 d30, d12, d13, #2
- vext.8 d31, d14, d15, #2
-
- vmlal.u8 q8, d27, d2 ;(src_ptr[0] * vp8_filter[2])
- vmlal.u8 q9, d28, d2
- vmlal.u8 q10, d29, d2
- vmlal.u8 q11, d30, d2
- vmlal.u8 q12, d31, d2
-
- vext.8 d27, d6, d7, #5 ;construct src_ptr[3]
- vext.8 d28, d8, d9, #5
- vext.8 d29, d10, d11, #5
- vext.8 d30, d12, d13, #5
- vext.8 d31, d14, d15, #5
-
- vmlal.u8 q8, d27, d5 ;(src_ptr[3] * vp8_filter[5])
- vmlal.u8 q9, d28, d5
- vmlal.u8 q10, d29, d5
- vmlal.u8 q11, d30, d5
- vmlal.u8 q12, d31, d5
-
- vext.8 d27, d6, d7, #3 ;construct src_ptr[1]
- vext.8 d28, d8, d9, #3
- vext.8 d29, d10, d11, #3
- vext.8 d30, d12, d13, #3
- vext.8 d31, d14, d15, #3
-
- vmull.u8 q3, d27, d3 ;(src_ptr[1] * vp8_filter[3])
- vmull.u8 q4, d28, d3
- vmull.u8 q5, d29, d3
- vmull.u8 q6, d30, d3
- vmull.u8 q7, d31, d3
-
- vqadd.s16 q8, q3 ;sum of all (src_data*filter_parameters)
- vqadd.s16 q9, q4
- vqadd.s16 q10, q5
- vqadd.s16 q11, q6
- vqadd.s16 q12, q7
-
- vqrshrun.s16 d26, q8, #7 ;shift/round/saturate to u8
- vqrshrun.s16 d27, q9, #7
- vqrshrun.s16 d28, q10, #7
- vqrshrun.s16 d29, q11, #7 ;load intermediate data from stack
- vqrshrun.s16 d30, q12, #7
-
-;Second pass: 8x4
-;secondpass_filter
- add r3, r12, r3, lsl #5
- sub lr, lr, #32
-
- vld1.s32 {q5, q6}, [r3] ;load second_pass filter
- vld1.u8 {q11}, [lr]!
-
- vabs.s32 q7, q5
- vabs.s32 q8, q6
-
- vld1.u8 {q12}, [lr]!
-
- vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5)
- vdup.8 d1, d14[4]
- vdup.8 d2, d15[0]
- vdup.8 d3, d15[4]
- vdup.8 d4, d16[0]
- vdup.8 d5, d16[4]
-
- vmull.u8 q3, d22, d0 ;(src_ptr[-2] * vp8_filter[0])
- vmull.u8 q4, d23, d0
- vmull.u8 q5, d24, d0
- vmull.u8 q6, d25, d0
-
- vmlsl.u8 q3, d23, d1 ;-(src_ptr[-1] * vp8_filter[1])
- vmlsl.u8 q4, d24, d1
- vmlsl.u8 q5, d25, d1
- vmlsl.u8 q6, d26, d1
-
- vmlsl.u8 q3, d26, d4 ;-(src_ptr[2] * vp8_filter[4])
- vmlsl.u8 q4, d27, d4
- vmlsl.u8 q5, d28, d4
- vmlsl.u8 q6, d29, d4
-
- vmlal.u8 q3, d24, d2 ;(src_ptr[0] * vp8_filter[2])
- vmlal.u8 q4, d25, d2
- vmlal.u8 q5, d26, d2
- vmlal.u8 q6, d27, d2
-
- vmlal.u8 q3, d27, d5 ;(src_ptr[3] * vp8_filter[5])
- vmlal.u8 q4, d28, d5
- vmlal.u8 q5, d29, d5
- vmlal.u8 q6, d30, d5
-
- vmull.u8 q7, d25, d3 ;(src_ptr[1] * vp8_filter[3])
- vmull.u8 q8, d26, d3
- vmull.u8 q9, d27, d3
- vmull.u8 q10, d28, d3
-
- vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters)
- vqadd.s16 q8, q4
- vqadd.s16 q9, q5
- vqadd.s16 q10, q6
-
- vqrshrun.s16 d6, q7, #7 ;shift/round/saturate to u8
- vqrshrun.s16 d7, q8, #7
- vqrshrun.s16 d8, q9, #7
- vqrshrun.s16 d9, q10, #7
-
- vst1.u8 {d6}, [r4], r5 ;store result
- vst1.u8 {d7}, [r4], r5
- vst1.u8 {d8}, [r4], r5
- vst1.u8 {d9}, [r4], r5
-
- add sp, sp, #32
- pop {r4-r5,pc}
-
-;--------------------
-firstpass_filter8x4_only
- vabs.s32 q12, q14
- vabs.s32 q13, q15
-
- sub r0, r0, #2 ;move srcptr back to (line-2) and (column-2)
- vld1.u8 {q3}, [r0], r1 ;load src data
-
- vdup.8 d0, d24[0] ;first_pass filter (d0-d5)
- vld1.u8 {q4}, [r0], r1
- vdup.8 d1, d24[4]
- vld1.u8 {q5}, [r0], r1
- vdup.8 d2, d25[0]
- vld1.u8 {q6}, [r0], r1
- vdup.8 d3, d25[4]
- vdup.8 d4, d26[0]
- vdup.8 d5, d26[4]
-
-;First pass: output_height lines x output_width columns (4x8)
- pld [r0]
- pld [r0, r1]
- pld [r0, r1, lsl #1]
-
- vmull.u8 q7, d6, d0 ;(src_ptr[-2] * vp8_filter[0])
- vmull.u8 q8, d8, d0
- vmull.u8 q9, d10, d0
- vmull.u8 q10, d12, d0
-
- vext.8 d28, d6, d7, #1 ;construct src_ptr[-1]
- vext.8 d29, d8, d9, #1
- vext.8 d30, d10, d11, #1
- vext.8 d31, d12, d13, #1
-
- vmlsl.u8 q7, d28, d1 ;-(src_ptr[-1] * vp8_filter[1])
- vmlsl.u8 q8, d29, d1
- vmlsl.u8 q9, d30, d1
- vmlsl.u8 q10, d31, d1
-
- vext.8 d28, d6, d7, #4 ;construct src_ptr[2]
- vext.8 d29, d8, d9, #4
- vext.8 d30, d10, d11, #4
- vext.8 d31, d12, d13, #4
-
- vmlsl.u8 q7, d28, d4 ;-(src_ptr[2] * vp8_filter[4])
- vmlsl.u8 q8, d29, d4
- vmlsl.u8 q9, d30, d4
- vmlsl.u8 q10, d31, d4
-
- vext.8 d28, d6, d7, #2 ;construct src_ptr[0]
- vext.8 d29, d8, d9, #2
- vext.8 d30, d10, d11, #2
- vext.8 d31, d12, d13, #2
-
- vmlal.u8 q7, d28, d2 ;(src_ptr[0] * vp8_filter[2])
- vmlal.u8 q8, d29, d2
- vmlal.u8 q9, d30, d2
- vmlal.u8 q10, d31, d2
-
- vext.8 d28, d6, d7, #5 ;construct src_ptr[3]
- vext.8 d29, d8, d9, #5
- vext.8 d30, d10, d11, #5
- vext.8 d31, d12, d13, #5
-
- vmlal.u8 q7, d28, d5 ;(src_ptr[3] * vp8_filter[5])
- vmlal.u8 q8, d29, d5
- vmlal.u8 q9, d30, d5
- vmlal.u8 q10, d31, d5
-
- vext.8 d28, d6, d7, #3 ;construct src_ptr[1]
- vext.8 d29, d8, d9, #3
- vext.8 d30, d10, d11, #3
- vext.8 d31, d12, d13, #3
-
- vmull.u8 q3, d28, d3 ;(src_ptr[1] * vp8_filter[3])
- vmull.u8 q4, d29, d3
- vmull.u8 q5, d30, d3
- vmull.u8 q6, d31, d3
-
- vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters)
- vqadd.s16 q8, q4
- vqadd.s16 q9, q5
- vqadd.s16 q10, q6
-
- vqrshrun.s16 d22, q7, #7 ;shift/round/saturate to u8
- vqrshrun.s16 d23, q8, #7
- vqrshrun.s16 d24, q9, #7
- vqrshrun.s16 d25, q10, #7
-
- vst1.u8 {d22}, [r4], r5 ;store result
- vst1.u8 {d23}, [r4], r5
- vst1.u8 {d24}, [r4], r5
- vst1.u8 {d25}, [r4], r5
-
- pop {r4-r5,pc}
-
-;---------------------
-secondpass_filter8x4_only
-;Second pass: 8x4
- add r3, r12, r3, lsl #5
- sub r0, r0, r1, lsl #1
- vld1.s32 {q5, q6}, [r3] ;load second_pass filter
- vabs.s32 q7, q5
- vabs.s32 q8, q6
-
- vld1.u8 {d22}, [r0], r1
- vld1.u8 {d23}, [r0], r1
- vld1.u8 {d24}, [r0], r1
- vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5)
- vld1.u8 {d25}, [r0], r1
- vdup.8 d1, d14[4]
- vld1.u8 {d26}, [r0], r1
- vdup.8 d2, d15[0]
- vld1.u8 {d27}, [r0], r1
- vdup.8 d3, d15[4]
- vld1.u8 {d28}, [r0], r1
- vdup.8 d4, d16[0]
- vld1.u8 {d29}, [r0], r1
- vdup.8 d5, d16[4]
- vld1.u8 {d30}, [r0], r1
-
- vmull.u8 q3, d22, d0 ;(src_ptr[-2] * vp8_filter[0])
- vmull.u8 q4, d23, d0
- vmull.u8 q5, d24, d0
- vmull.u8 q6, d25, d0
-
- vmlsl.u8 q3, d23, d1 ;-(src_ptr[-1] * vp8_filter[1])
- vmlsl.u8 q4, d24, d1
- vmlsl.u8 q5, d25, d1
- vmlsl.u8 q6, d26, d1
-
- vmlsl.u8 q3, d26, d4 ;-(src_ptr[2] * vp8_filter[4])
- vmlsl.u8 q4, d27, d4
- vmlsl.u8 q5, d28, d4
- vmlsl.u8 q6, d29, d4
-
- vmlal.u8 q3, d24, d2 ;(src_ptr[0] * vp8_filter[2])
- vmlal.u8 q4, d25, d2
- vmlal.u8 q5, d26, d2
- vmlal.u8 q6, d27, d2
-
- vmlal.u8 q3, d27, d5 ;(src_ptr[3] * vp8_filter[5])
- vmlal.u8 q4, d28, d5
- vmlal.u8 q5, d29, d5
- vmlal.u8 q6, d30, d5
-
- vmull.u8 q7, d25, d3 ;(src_ptr[1] * vp8_filter[3])
- vmull.u8 q8, d26, d3
- vmull.u8 q9, d27, d3
- vmull.u8 q10, d28, d3
-
- vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters)
- vqadd.s16 q8, q4
- vqadd.s16 q9, q5
- vqadd.s16 q10, q6
-
- vqrshrun.s16 d6, q7, #7 ;shift/round/saturate to u8
- vqrshrun.s16 d7, q8, #7
- vqrshrun.s16 d8, q9, #7
- vqrshrun.s16 d9, q10, #7
-
- vst1.u8 {d6}, [r4], r5 ;store result
- vst1.u8 {d7}, [r4], r5
- vst1.u8 {d8}, [r4], r5
- vst1.u8 {d9}, [r4], r5
-
- pop {r4-r5,pc}
-
- ENDP
-
-;-----------------
-
- END
diff --git a/vp8/common/arm/neon/sixtappredict8x8_neon.asm b/vp8/common/arm/neon/sixtappredict8x8_neon.asm
deleted file mode 100644
index 00ed5ae..0000000
--- a/vp8/common/arm/neon/sixtappredict8x8_neon.asm
+++ /dev/null
@@ -1,524 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_sixtap_predict8x8_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-
-filter8_coeff
- DCD 0, 0, 128, 0, 0, 0, 0, 0
- DCD 0, -6, 123, 12, -1, 0, 0, 0
- DCD 2, -11, 108, 36, -8, 1, 0, 0
- DCD 0, -9, 93, 50, -6, 0, 0, 0
- DCD 3, -16, 77, 77, -16, 3, 0, 0
- DCD 0, -6, 50, 93, -9, 0, 0, 0
- DCD 1, -8, 36, 108, -11, 2, 0, 0
- DCD 0, -1, 12, 123, -6, 0, 0, 0
-
-; r0 unsigned char *src_ptr,
-; r1 int src_pixels_per_line,
-; r2 int xoffset,
-; r3 int yoffset,
-; stack(r4) unsigned char *dst_ptr,
-; stack(r5) int dst_pitch
-
-|vp8_sixtap_predict8x8_neon| PROC
- push {r4-r5, lr}
-
- adr r12, filter8_coeff
-
- ldr r4, [sp, #12] ;load parameters from stack
- ldr r5, [sp, #16] ;load parameters from stack
-
- cmp r2, #0 ;skip first_pass filter if xoffset=0
- beq secondpass_filter8x8_only
-
- add r2, r12, r2, lsl #5 ;calculate filter location
-
- cmp r3, #0 ;skip second_pass filter if yoffset=0
-
- vld1.s32 {q14, q15}, [r2] ;load first_pass filter
-
- beq firstpass_filter8x8_only
-
- sub sp, sp, #64 ;reserve space on stack for temporary storage
- mov lr, sp
-
- vabs.s32 q12, q14
- vabs.s32 q13, q15
-
- mov r2, #2 ;loop counter
- sub r0, r0, #2 ;move srcptr back to (line-2) and (column-2)
- sub r0, r0, r1, lsl #1
-
- vdup.8 d0, d24[0] ;first_pass filter (d0-d5)
- vdup.8 d1, d24[4]
- vdup.8 d2, d25[0]
-
-;First pass: output_height lines x output_width columns (13x8)
- vld1.u8 {q3}, [r0], r1 ;load src data
- vdup.8 d3, d25[4]
- vld1.u8 {q4}, [r0], r1
- vdup.8 d4, d26[0]
- vld1.u8 {q5}, [r0], r1
- vdup.8 d5, d26[4]
- vld1.u8 {q6}, [r0], r1
-
-filt_blk2d_fp8x8_loop_neon
- pld [r0]
- pld [r0, r1]
- pld [r0, r1, lsl #1]
-
- vmull.u8 q7, d6, d0 ;(src_ptr[-2] * vp8_filter[0])
- vmull.u8 q8, d8, d0
- vmull.u8 q9, d10, d0
- vmull.u8 q10, d12, d0
-
- vext.8 d28, d6, d7, #1 ;construct src_ptr[-1]
- vext.8 d29, d8, d9, #1
- vext.8 d30, d10, d11, #1
- vext.8 d31, d12, d13, #1
-
- vmlsl.u8 q7, d28, d1 ;-(src_ptr[-1] * vp8_filter[1])
- vmlsl.u8 q8, d29, d1
- vmlsl.u8 q9, d30, d1
- vmlsl.u8 q10, d31, d1
-
- vext.8 d28, d6, d7, #4 ;construct src_ptr[2]
- vext.8 d29, d8, d9, #4
- vext.8 d30, d10, d11, #4
- vext.8 d31, d12, d13, #4
-
- vmlsl.u8 q7, d28, d4 ;-(src_ptr[2] * vp8_filter[4])
- vmlsl.u8 q8, d29, d4
- vmlsl.u8 q9, d30, d4
- vmlsl.u8 q10, d31, d4
-
- vext.8 d28, d6, d7, #2 ;construct src_ptr[0]
- vext.8 d29, d8, d9, #2
- vext.8 d30, d10, d11, #2
- vext.8 d31, d12, d13, #2
-
- vmlal.u8 q7, d28, d2 ;(src_ptr[0] * vp8_filter[2])
- vmlal.u8 q8, d29, d2
- vmlal.u8 q9, d30, d2
- vmlal.u8 q10, d31, d2
-
- vext.8 d28, d6, d7, #5 ;construct src_ptr[3]
- vext.8 d29, d8, d9, #5
- vext.8 d30, d10, d11, #5
- vext.8 d31, d12, d13, #5
-
- vmlal.u8 q7, d28, d5 ;(src_ptr[3] * vp8_filter[5])
- vmlal.u8 q8, d29, d5
- vmlal.u8 q9, d30, d5
- vmlal.u8 q10, d31, d5
-
- vext.8 d28, d6, d7, #3 ;construct src_ptr[1]
- vext.8 d29, d8, d9, #3
- vext.8 d30, d10, d11, #3
- vext.8 d31, d12, d13, #3
-
- vmull.u8 q3, d28, d3 ;(src_ptr[1] * vp8_filter[3])
- vmull.u8 q4, d29, d3
- vmull.u8 q5, d30, d3
- vmull.u8 q6, d31, d3
-
- subs r2, r2, #1
-
- vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters)
- vqadd.s16 q8, q4
- vqadd.s16 q9, q5
- vqadd.s16 q10, q6
-
- vld1.u8 {q3}, [r0], r1 ;load src data
-
- vqrshrun.s16 d22, q7, #7 ;shift/round/saturate to u8
- vqrshrun.s16 d23, q8, #7
- vqrshrun.s16 d24, q9, #7
- vqrshrun.s16 d25, q10, #7
-
- vst1.u8 {d22}, [lr]! ;store result
- vld1.u8 {q4}, [r0], r1
- vst1.u8 {d23}, [lr]!
- vld1.u8 {q5}, [r0], r1
- vst1.u8 {d24}, [lr]!
- vld1.u8 {q6}, [r0], r1
- vst1.u8 {d25}, [lr]!
-
- bne filt_blk2d_fp8x8_loop_neon
-
- ;first_pass filtering on the rest 5-line data
- ;vld1.u8 {q3}, [r0], r1 ;load src data
- ;vld1.u8 {q4}, [r0], r1
- ;vld1.u8 {q5}, [r0], r1
- ;vld1.u8 {q6}, [r0], r1
- vld1.u8 {q7}, [r0], r1
-
- vmull.u8 q8, d6, d0 ;(src_ptr[-2] * vp8_filter[0])
- vmull.u8 q9, d8, d0
- vmull.u8 q10, d10, d0
- vmull.u8 q11, d12, d0
- vmull.u8 q12, d14, d0
-
- vext.8 d27, d6, d7, #1 ;construct src_ptr[-1]
- vext.8 d28, d8, d9, #1
- vext.8 d29, d10, d11, #1
- vext.8 d30, d12, d13, #1
- vext.8 d31, d14, d15, #1
-
- vmlsl.u8 q8, d27, d1 ;-(src_ptr[-1] * vp8_filter[1])
- vmlsl.u8 q9, d28, d1
- vmlsl.u8 q10, d29, d1
- vmlsl.u8 q11, d30, d1
- vmlsl.u8 q12, d31, d1
-
- vext.8 d27, d6, d7, #4 ;construct src_ptr[2]
- vext.8 d28, d8, d9, #4
- vext.8 d29, d10, d11, #4
- vext.8 d30, d12, d13, #4
- vext.8 d31, d14, d15, #4
-
- vmlsl.u8 q8, d27, d4 ;-(src_ptr[2] * vp8_filter[4])
- vmlsl.u8 q9, d28, d4
- vmlsl.u8 q10, d29, d4
- vmlsl.u8 q11, d30, d4
- vmlsl.u8 q12, d31, d4
-
- vext.8 d27, d6, d7, #2 ;construct src_ptr[0]
- vext.8 d28, d8, d9, #2
- vext.8 d29, d10, d11, #2
- vext.8 d30, d12, d13, #2
- vext.8 d31, d14, d15, #2
-
- vmlal.u8 q8, d27, d2 ;(src_ptr[0] * vp8_filter[2])
- vmlal.u8 q9, d28, d2
- vmlal.u8 q10, d29, d2
- vmlal.u8 q11, d30, d2
- vmlal.u8 q12, d31, d2
-
- vext.8 d27, d6, d7, #5 ;construct src_ptr[3]
- vext.8 d28, d8, d9, #5
- vext.8 d29, d10, d11, #5
- vext.8 d30, d12, d13, #5
- vext.8 d31, d14, d15, #5
-
- vmlal.u8 q8, d27, d5 ;(src_ptr[3] * vp8_filter[5])
- vmlal.u8 q9, d28, d5
- vmlal.u8 q10, d29, d5
- vmlal.u8 q11, d30, d5
- vmlal.u8 q12, d31, d5
-
- vext.8 d27, d6, d7, #3 ;construct src_ptr[1]
- vext.8 d28, d8, d9, #3
- vext.8 d29, d10, d11, #3
- vext.8 d30, d12, d13, #3
- vext.8 d31, d14, d15, #3
-
- vmull.u8 q3, d27, d3 ;(src_ptr[1] * vp8_filter[3])
- vmull.u8 q4, d28, d3
- vmull.u8 q5, d29, d3
- vmull.u8 q6, d30, d3
- vmull.u8 q7, d31, d3
-
- vqadd.s16 q8, q3 ;sum of all (src_data*filter_parameters)
- vqadd.s16 q9, q4
- vqadd.s16 q10, q5
- vqadd.s16 q11, q6
- vqadd.s16 q12, q7
-
- add r3, r12, r3, lsl #5
-
- vqrshrun.s16 d26, q8, #7 ;shift/round/saturate to u8
- sub lr, lr, #64
- vqrshrun.s16 d27, q9, #7
- vld1.u8 {q9}, [lr]! ;load intermediate data from stack
- vqrshrun.s16 d28, q10, #7
- vld1.u8 {q10}, [lr]!
-
- vld1.s32 {q5, q6}, [r3] ;load second_pass filter
-
- vqrshrun.s16 d29, q11, #7
- vld1.u8 {q11}, [lr]!
-
- vabs.s32 q7, q5
- vabs.s32 q8, q6
-
- vqrshrun.s16 d30, q12, #7
- vld1.u8 {q12}, [lr]!
-
-;Second pass: 8x8
- mov r3, #2 ;loop counter
-
- vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5)
- vdup.8 d1, d14[4]
- vdup.8 d2, d15[0]
- vdup.8 d3, d15[4]
- vdup.8 d4, d16[0]
- vdup.8 d5, d16[4]
-
-filt_blk2d_sp8x8_loop_neon
- vmull.u8 q3, d18, d0 ;(src_ptr[-2] * vp8_filter[0])
- vmull.u8 q4, d19, d0
- vmull.u8 q5, d20, d0
- vmull.u8 q6, d21, d0
-
- vmlsl.u8 q3, d19, d1 ;-(src_ptr[-1] * vp8_filter[1])
- vmlsl.u8 q4, d20, d1
- vmlsl.u8 q5, d21, d1
- vmlsl.u8 q6, d22, d1
-
- vmlsl.u8 q3, d22, d4 ;-(src_ptr[2] * vp8_filter[4])
- vmlsl.u8 q4, d23, d4
- vmlsl.u8 q5, d24, d4
- vmlsl.u8 q6, d25, d4
-
- vmlal.u8 q3, d20, d2 ;(src_ptr[0] * vp8_filter[2])
- vmlal.u8 q4, d21, d2
- vmlal.u8 q5, d22, d2
- vmlal.u8 q6, d23, d2
-
- vmlal.u8 q3, d23, d5 ;(src_ptr[3] * vp8_filter[5])
- vmlal.u8 q4, d24, d5
- vmlal.u8 q5, d25, d5
- vmlal.u8 q6, d26, d5
-
- vmull.u8 q7, d21, d3 ;(src_ptr[1] * vp8_filter[3])
- vmull.u8 q8, d22, d3
- vmull.u8 q9, d23, d3
- vmull.u8 q10, d24, d3
-
- subs r3, r3, #1
-
- vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters)
- vqadd.s16 q8, q4
- vqadd.s16 q9, q5
- vqadd.s16 q10, q6
-
- vqrshrun.s16 d6, q7, #7 ;shift/round/saturate to u8
- vqrshrun.s16 d7, q8, #7
- vqrshrun.s16 d8, q9, #7
- vqrshrun.s16 d9, q10, #7
-
- vmov q9, q11
- vst1.u8 {d6}, [r4], r5 ;store result
- vmov q10, q12
- vst1.u8 {d7}, [r4], r5
- vmov q11, q13
- vst1.u8 {d8}, [r4], r5
- vmov q12, q14
- vst1.u8 {d9}, [r4], r5
- vmov d26, d30
-
- bne filt_blk2d_sp8x8_loop_neon
-
- add sp, sp, #64
- pop {r4-r5,pc}
-
-;---------------------
-firstpass_filter8x8_only
- ;add r2, r12, r2, lsl #5 ;calculate filter location
- ;vld1.s32 {q14, q15}, [r2] ;load first_pass filter
- vabs.s32 q12, q14
- vabs.s32 q13, q15
-
- mov r2, #2 ;loop counter
- sub r0, r0, #2 ;move srcptr back to (line-2) and (column-2)
-
- vdup.8 d0, d24[0] ;first_pass filter (d0-d5)
- vdup.8 d1, d24[4]
- vdup.8 d2, d25[0]
- vdup.8 d3, d25[4]
- vdup.8 d4, d26[0]
- vdup.8 d5, d26[4]
-
-;First pass: output_height lines x output_width columns (8x8)
-filt_blk2d_fpo8x8_loop_neon
- vld1.u8 {q3}, [r0], r1 ;load src data
- vld1.u8 {q4}, [r0], r1
- vld1.u8 {q5}, [r0], r1
- vld1.u8 {q6}, [r0], r1
-
- pld [r0]
- pld [r0, r1]
- pld [r0, r1, lsl #1]
-
- vmull.u8 q7, d6, d0 ;(src_ptr[-2] * vp8_filter[0])
- vmull.u8 q8, d8, d0
- vmull.u8 q9, d10, d0
- vmull.u8 q10, d12, d0
-
- vext.8 d28, d6, d7, #1 ;construct src_ptr[-1]
- vext.8 d29, d8, d9, #1
- vext.8 d30, d10, d11, #1
- vext.8 d31, d12, d13, #1
-
- vmlsl.u8 q7, d28, d1 ;-(src_ptr[-1] * vp8_filter[1])
- vmlsl.u8 q8, d29, d1
- vmlsl.u8 q9, d30, d1
- vmlsl.u8 q10, d31, d1
-
- vext.8 d28, d6, d7, #4 ;construct src_ptr[2]
- vext.8 d29, d8, d9, #4
- vext.8 d30, d10, d11, #4
- vext.8 d31, d12, d13, #4
-
- vmlsl.u8 q7, d28, d4 ;-(src_ptr[2] * vp8_filter[4])
- vmlsl.u8 q8, d29, d4
- vmlsl.u8 q9, d30, d4
- vmlsl.u8 q10, d31, d4
-
- vext.8 d28, d6, d7, #2 ;construct src_ptr[0]
- vext.8 d29, d8, d9, #2
- vext.8 d30, d10, d11, #2
- vext.8 d31, d12, d13, #2
-
- vmlal.u8 q7, d28, d2 ;(src_ptr[0] * vp8_filter[2])
- vmlal.u8 q8, d29, d2
- vmlal.u8 q9, d30, d2
- vmlal.u8 q10, d31, d2
-
- vext.8 d28, d6, d7, #5 ;construct src_ptr[3]
- vext.8 d29, d8, d9, #5
- vext.8 d30, d10, d11, #5
- vext.8 d31, d12, d13, #5
-
- vmlal.u8 q7, d28, d5 ;(src_ptr[3] * vp8_filter[5])
- vmlal.u8 q8, d29, d5
- vmlal.u8 q9, d30, d5
- vmlal.u8 q10, d31, d5
-
- vext.8 d28, d6, d7, #3 ;construct src_ptr[1]
- vext.8 d29, d8, d9, #3
- vext.8 d30, d10, d11, #3
- vext.8 d31, d12, d13, #3
-
- vmull.u8 q3, d28, d3 ;(src_ptr[1] * vp8_filter[3])
- vmull.u8 q4, d29, d3
- vmull.u8 q5, d30, d3
- vmull.u8 q6, d31, d3
- ;
- vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters)
- vqadd.s16 q8, q4
- vqadd.s16 q9, q5
- vqadd.s16 q10, q6
-
- subs r2, r2, #1
-
- vqrshrun.s16 d22, q7, #7 ;shift/round/saturate to u8
- vqrshrun.s16 d23, q8, #7
- vqrshrun.s16 d24, q9, #7
- vqrshrun.s16 d25, q10, #7
-
- vst1.u8 {d22}, [r4], r5 ;store result
- vst1.u8 {d23}, [r4], r5
- vst1.u8 {d24}, [r4], r5
- vst1.u8 {d25}, [r4], r5
-
- bne filt_blk2d_fpo8x8_loop_neon
-
- pop {r4-r5,pc}
-
-;---------------------
-secondpass_filter8x8_only
- sub r0, r0, r1, lsl #1
- add r3, r12, r3, lsl #5
-
- vld1.u8 {d18}, [r0], r1 ;load src data
- vld1.s32 {q5, q6}, [r3] ;load second_pass filter
- vld1.u8 {d19}, [r0], r1
- vabs.s32 q7, q5
- vld1.u8 {d20}, [r0], r1
- vabs.s32 q8, q6
- vld1.u8 {d21}, [r0], r1
- mov r3, #2 ;loop counter
- vld1.u8 {d22}, [r0], r1
- vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5)
- vld1.u8 {d23}, [r0], r1
- vdup.8 d1, d14[4]
- vld1.u8 {d24}, [r0], r1
- vdup.8 d2, d15[0]
- vld1.u8 {d25}, [r0], r1
- vdup.8 d3, d15[4]
- vld1.u8 {d26}, [r0], r1
- vdup.8 d4, d16[0]
- vld1.u8 {d27}, [r0], r1
- vdup.8 d5, d16[4]
- vld1.u8 {d28}, [r0], r1
- vld1.u8 {d29}, [r0], r1
- vld1.u8 {d30}, [r0], r1
-
-;Second pass: 8x8
-filt_blk2d_spo8x8_loop_neon
- vmull.u8 q3, d18, d0 ;(src_ptr[-2] * vp8_filter[0])
- vmull.u8 q4, d19, d0
- vmull.u8 q5, d20, d0
- vmull.u8 q6, d21, d0
-
- vmlsl.u8 q3, d19, d1 ;-(src_ptr[-1] * vp8_filter[1])
- vmlsl.u8 q4, d20, d1
- vmlsl.u8 q5, d21, d1
- vmlsl.u8 q6, d22, d1
-
- vmlsl.u8 q3, d22, d4 ;-(src_ptr[2] * vp8_filter[4])
- vmlsl.u8 q4, d23, d4
- vmlsl.u8 q5, d24, d4
- vmlsl.u8 q6, d25, d4
-
- vmlal.u8 q3, d20, d2 ;(src_ptr[0] * vp8_filter[2])
- vmlal.u8 q4, d21, d2
- vmlal.u8 q5, d22, d2
- vmlal.u8 q6, d23, d2
-
- vmlal.u8 q3, d23, d5 ;(src_ptr[3] * vp8_filter[5])
- vmlal.u8 q4, d24, d5
- vmlal.u8 q5, d25, d5
- vmlal.u8 q6, d26, d5
-
- vmull.u8 q7, d21, d3 ;(src_ptr[1] * vp8_filter[3])
- vmull.u8 q8, d22, d3
- vmull.u8 q9, d23, d3
- vmull.u8 q10, d24, d3
-
- subs r3, r3, #1
-
- vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters)
- vqadd.s16 q8, q4
- vqadd.s16 q9, q5
- vqadd.s16 q10, q6
-
- vqrshrun.s16 d6, q7, #7 ;shift/round/saturate to u8
- vqrshrun.s16 d7, q8, #7
- vqrshrun.s16 d8, q9, #7
- vqrshrun.s16 d9, q10, #7
-
- vmov q9, q11
- vst1.u8 {d6}, [r4], r5 ;store result
- vmov q10, q12
- vst1.u8 {d7}, [r4], r5
- vmov q11, q13
- vst1.u8 {d8}, [r4], r5
- vmov q12, q14
- vst1.u8 {d9}, [r4], r5
- vmov d26, d30
-
- bne filt_blk2d_spo8x8_loop_neon
-
- pop {r4-r5,pc}
-
- ENDP
-
-;-----------------
-
- END
diff --git a/vp8/common/arm/neon/sixtappredict_neon.c b/vp8/common/arm/neon/sixtappredict_neon.c
new file mode 100644
index 0000000..7a4d9e0
--- /dev/null
+++ b/vp8/common/arm/neon/sixtappredict_neon.c
@@ -0,0 +1,1752 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#ifdef _MSC_VER
+#define __builtin_prefetch(x)
+#endif
+
+static const int8_t vp8_sub_pel_filters[8][8] = {
+ {0, 0, 128, 0, 0, 0, 0, 0}, /* note that 1/8 pel positionyys are */
+ {0, -6, 123, 12, -1, 0, 0, 0}, /* just as per alpha -0.5 bicubic */
+ {2, -11, 108, 36, -8, 1, 0, 0}, /* New 1/4 pel 6 tap filter */
+ {0, -9, 93, 50, -6, 0, 0, 0},
+ {3, -16, 77, 77, -16, 3, 0, 0}, /* New 1/2 pel 6 tap filter */
+ {0, -6, 50, 93, -9, 0, 0, 0},
+ {1, -8, 36, 108, -11, 2, 0, 0}, /* New 1/4 pel 6 tap filter */
+ {0, -1, 12, 123, -6, 0, 0, 0},
+};
+
+void vp8_sixtap_predict4x4_neon(
+ unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ unsigned char *src;
+ uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d18u8, d19u8, d20u8, d21u8;
+ uint8x8_t d23u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8, d30u8, d31u8;
+ int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8;
+ uint32x2_t d27u32, d28u32, d29u32, d30u32, d31u32;
+ uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16;
+ uint16x8_t q8u16, q9u16, q10u16, q11u16, q12u16;
+ int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16;
+ int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16;
+ uint8x16_t q3u8, q4u8, q5u8, q6u8, q11u8;
+ uint64x2_t q3u64, q4u64, q5u64, q6u64, q9u64, q10u64;
+ uint32x2x2_t d0u32x2, d1u32x2;
+
+ if (xoffset == 0) { // secondpass_filter4x4_only
+ // load second_pass filter
+ dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
+ d0s8 = vdup_lane_s8(dtmps8, 0);
+ d1s8 = vdup_lane_s8(dtmps8, 1);
+ d2s8 = vdup_lane_s8(dtmps8, 2);
+ d3s8 = vdup_lane_s8(dtmps8, 3);
+ d4s8 = vdup_lane_s8(dtmps8, 4);
+ d5s8 = vdup_lane_s8(dtmps8, 5);
+ d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+ d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+ d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+ d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+ d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+ d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+ // load src data
+ src = src_ptr - src_pixels_per_line * 2;
+ d27u32 = vld1_lane_u32((const uint32_t *)src, d27u32, 0);
+ src += src_pixels_per_line;
+ d27u32 = vld1_lane_u32((const uint32_t *)src, d27u32, 1);
+ src += src_pixels_per_line;
+ d28u32 = vld1_lane_u32((const uint32_t *)src, d28u32, 0);
+ src += src_pixels_per_line;
+ d28u32 = vld1_lane_u32((const uint32_t *)src, d28u32, 1);
+ src += src_pixels_per_line;
+ d29u32 = vld1_lane_u32((const uint32_t *)src, d29u32, 0);
+ src += src_pixels_per_line;
+ d29u32 = vld1_lane_u32((const uint32_t *)src, d29u32, 1);
+ src += src_pixels_per_line;
+ d30u32 = vld1_lane_u32((const uint32_t *)src, d30u32, 0);
+ src += src_pixels_per_line;
+ d30u32 = vld1_lane_u32((const uint32_t *)src, d30u32, 1);
+ src += src_pixels_per_line;
+ d31u32 = vld1_lane_u32((const uint32_t *)src, d31u32, 0);
+
+ d27u8 = vreinterpret_u8_u32(d27u32);
+ d28u8 = vreinterpret_u8_u32(d28u32);
+ d29u8 = vreinterpret_u8_u32(d29u32);
+ d30u8 = vreinterpret_u8_u32(d30u32);
+ d31u8 = vreinterpret_u8_u32(d31u32);
+
+ d23u8 = vext_u8(d27u8, d28u8, 4);
+ d24u8 = vext_u8(d28u8, d29u8, 4);
+ d25u8 = vext_u8(d29u8, d30u8, 4);
+ d26u8 = vext_u8(d30u8, d31u8, 4);
+
+ q3u16 = vmull_u8(d27u8, d0u8);
+ q4u16 = vmull_u8(d28u8, d0u8);
+ q5u16 = vmull_u8(d25u8, d5u8);
+ q6u16 = vmull_u8(d26u8, d5u8);
+
+ q3u16 = vmlsl_u8(q3u16, d29u8, d4u8);
+ q4u16 = vmlsl_u8(q4u16, d30u8, d4u8);
+ q5u16 = vmlsl_u8(q5u16, d23u8, d1u8);
+ q6u16 = vmlsl_u8(q6u16, d24u8, d1u8);
+
+ q3u16 = vmlal_u8(q3u16, d28u8, d2u8);
+ q4u16 = vmlal_u8(q4u16, d29u8, d2u8);
+ q5u16 = vmlal_u8(q5u16, d24u8, d3u8);
+ q6u16 = vmlal_u8(q6u16, d25u8, d3u8);
+
+ q3s16 = vreinterpretq_s16_u16(q3u16);
+ q4s16 = vreinterpretq_s16_u16(q4u16);
+ q5s16 = vreinterpretq_s16_u16(q5u16);
+ q6s16 = vreinterpretq_s16_u16(q6u16);
+
+ q5s16 = vqaddq_s16(q5s16, q3s16);
+ q6s16 = vqaddq_s16(q6s16, q4s16);
+
+ d3u8 = vqrshrun_n_s16(q5s16, 7);
+ d4u8 = vqrshrun_n_s16(q6s16, 7);
+
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 0);
+ dst_ptr += dst_pitch;
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 1);
+ dst_ptr += dst_pitch;
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 0);
+ dst_ptr += dst_pitch;
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 1);
+ return;
+ }
+
+ // load first_pass filter
+ dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]);
+ d0s8 = vdup_lane_s8(dtmps8, 0);
+ d1s8 = vdup_lane_s8(dtmps8, 1);
+ d2s8 = vdup_lane_s8(dtmps8, 2);
+ d3s8 = vdup_lane_s8(dtmps8, 3);
+ d4s8 = vdup_lane_s8(dtmps8, 4);
+ d5s8 = vdup_lane_s8(dtmps8, 5);
+ d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+ d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+ d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+ d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+ d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+ d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+ // First pass: output_height lines x output_width columns (9x4)
+
+ if (yoffset == 0) // firstpass_filter4x4_only
+ src = src_ptr - 2;
+ else
+ src = src_ptr - 2 - (src_pixels_per_line * 2);
+
+ q3u8 = vld1q_u8(src);
+ src += src_pixels_per_line;
+ q4u8 = vld1q_u8(src);
+ src += src_pixels_per_line;
+ q5u8 = vld1q_u8(src);
+ src += src_pixels_per_line;
+ q6u8 = vld1q_u8(src);
+ src += src_pixels_per_line;
+
+ d18u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5);
+ d19u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5);
+ d20u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5);
+ d21u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5);
+
+ // vswp here
+ q3u8 = vcombine_u8(vget_low_u8(q3u8), vget_low_u8(q4u8));
+ q5u8 = vcombine_u8(vget_low_u8(q5u8), vget_low_u8(q6u8));
+
+ d0u32x2 = vzip_u32(vreinterpret_u32_u8(d18u8), // d18 d19
+ vreinterpret_u32_u8(d19u8));
+ d1u32x2 = vzip_u32(vreinterpret_u32_u8(d20u8), // d20 d21
+ vreinterpret_u32_u8(d21u8));
+ q7u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d5u8);
+ q8u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d5u8);
+
+ // keep original src data in q4 q6
+ q4u64 = vreinterpretq_u64_u8(q3u8);
+ q6u64 = vreinterpretq_u64_u8(q5u8);
+
+ d0u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q3u8)), // d6 d7
+ vreinterpret_u32_u8(vget_high_u8(q3u8)));
+ d1u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q5u8)), // d10 d11
+ vreinterpret_u32_u8(vget_high_u8(q5u8)));
+ q9u64 = vshrq_n_u64(q4u64, 8);
+ q10u64 = vshrq_n_u64(q6u64, 8);
+ q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d0u8);
+ q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d0u8);
+
+ d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)), // d18 d19
+ vreinterpret_u32_u64(vget_high_u64(q9u64)));
+ d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)), // d20 d211
+ vreinterpret_u32_u64(vget_high_u64(q10u64)));
+ q3u64 = vshrq_n_u64(q4u64, 32);
+ q5u64 = vshrq_n_u64(q6u64, 32);
+ q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d1u8);
+ q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d1u8);
+
+ d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)), // d6 d7
+ vreinterpret_u32_u64(vget_high_u64(q3u64)));
+ d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), // d10 d11
+ vreinterpret_u32_u64(vget_high_u64(q5u64)));
+ q9u64 = vshrq_n_u64(q4u64, 16);
+ q10u64 = vshrq_n_u64(q6u64, 16);
+ q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d4u8);
+ q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d4u8);
+
+ d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)), // d18 d19
+ vreinterpret_u32_u64(vget_high_u64(q9u64)));
+ d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)), // d20 d211
+ vreinterpret_u32_u64(vget_high_u64(q10u64)));
+ q3u64 = vshrq_n_u64(q4u64, 24);
+ q5u64 = vshrq_n_u64(q6u64, 24);
+ q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d2u8);
+ q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d2u8);
+
+ d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)), // d6 d7
+ vreinterpret_u32_u64(vget_high_u64(q3u64)));
+ d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), // d10 d11
+ vreinterpret_u32_u64(vget_high_u64(q5u64)));
+ q9u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d3u8);
+ q10u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d3u8);
+
+ q7s16 = vreinterpretq_s16_u16(q7u16);
+ q8s16 = vreinterpretq_s16_u16(q8u16);
+ q9s16 = vreinterpretq_s16_u16(q9u16);
+ q10s16 = vreinterpretq_s16_u16(q10u16);
+ q7s16 = vqaddq_s16(q7s16, q9s16);
+ q8s16 = vqaddq_s16(q8s16, q10s16);
+
+ d27u8 = vqrshrun_n_s16(q7s16, 7);
+ d28u8 = vqrshrun_n_s16(q8s16, 7);
+
+ if (yoffset == 0) { // firstpass_filter4x4_only
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d27u8), 0);
+ dst_ptr += dst_pitch;
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d27u8), 1);
+ dst_ptr += dst_pitch;
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 0);
+ dst_ptr += dst_pitch;
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 1);
+ return;
+ }
+
+ // First Pass on rest 5-line data
+ q3u8 = vld1q_u8(src);
+ src += src_pixels_per_line;
+ q4u8 = vld1q_u8(src);
+ src += src_pixels_per_line;
+ q5u8 = vld1q_u8(src);
+ src += src_pixels_per_line;
+ q6u8 = vld1q_u8(src);
+ src += src_pixels_per_line;
+ q11u8 = vld1q_u8(src);
+
+ d18u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5);
+ d19u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5);
+ d20u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5);
+ d21u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5);
+
+ // vswp here
+ q3u8 = vcombine_u8(vget_low_u8(q3u8), vget_low_u8(q4u8));
+ q5u8 = vcombine_u8(vget_low_u8(q5u8), vget_low_u8(q6u8));
+
+ d0u32x2 = vzip_u32(vreinterpret_u32_u8(d18u8), // d18 d19
+ vreinterpret_u32_u8(d19u8));
+ d1u32x2 = vzip_u32(vreinterpret_u32_u8(d20u8), // d20 d21
+ vreinterpret_u32_u8(d21u8));
+ d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 5);
+ q7u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d5u8);
+ q8u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d5u8);
+ q12u16 = vmull_u8(d31u8, d5u8);
+
+ q4u64 = vreinterpretq_u64_u8(q3u8);
+ q6u64 = vreinterpretq_u64_u8(q5u8);
+
+ d0u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q3u8)), // d6 d7
+ vreinterpret_u32_u8(vget_high_u8(q3u8)));
+ d1u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q5u8)), // d10 d11
+ vreinterpret_u32_u8(vget_high_u8(q5u8)));
+ q9u64 = vshrq_n_u64(q4u64, 8);
+ q10u64 = vshrq_n_u64(q6u64, 8);
+ q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d0u8);
+ q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d0u8);
+ q12u16 = vmlal_u8(q12u16, vget_low_u8(q11u8), d0u8);
+
+ d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)), // d18 d19
+ vreinterpret_u32_u64(vget_high_u64(q9u64)));
+ d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)), // d20 d211
+ vreinterpret_u32_u64(vget_high_u64(q10u64)));
+ q3u64 = vshrq_n_u64(q4u64, 32);
+ q5u64 = vshrq_n_u64(q6u64, 32);
+ d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 1);
+ q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d1u8);
+ q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d1u8);
+ q12u16 = vmlsl_u8(q12u16, d31u8, d1u8);
+
+ d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)), // d6 d7
+ vreinterpret_u32_u64(vget_high_u64(q3u64)));
+ d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), // d10 d11
+ vreinterpret_u32_u64(vget_high_u64(q5u64)));
+ q9u64 = vshrq_n_u64(q4u64, 16);
+ q10u64 = vshrq_n_u64(q6u64, 16);
+ d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 4);
+ q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d4u8);
+ q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d4u8);
+ q12u16 = vmlsl_u8(q12u16, d31u8, d4u8);
+
+ d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)), // d18 d19
+ vreinterpret_u32_u64(vget_high_u64(q9u64)));
+ d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)), // d20 d211
+ vreinterpret_u32_u64(vget_high_u64(q10u64)));
+ q3u64 = vshrq_n_u64(q4u64, 24);
+ q5u64 = vshrq_n_u64(q6u64, 24);
+ d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 2);
+ q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d2u8);
+ q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d2u8);
+ q12u16 = vmlal_u8(q12u16, d31u8, d2u8);
+
+ d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)), // d6 d7
+ vreinterpret_u32_u64(vget_high_u64(q3u64)));
+ d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), // d10 d11
+ vreinterpret_u32_u64(vget_high_u64(q5u64)));
+ d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 3);
+ q9u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d3u8);
+ q10u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d3u8);
+ q11u16 = vmull_u8(d31u8, d3u8);
+
+ q7s16 = vreinterpretq_s16_u16(q7u16);
+ q8s16 = vreinterpretq_s16_u16(q8u16);
+ q9s16 = vreinterpretq_s16_u16(q9u16);
+ q10s16 = vreinterpretq_s16_u16(q10u16);
+ q11s16 = vreinterpretq_s16_u16(q11u16);
+ q12s16 = vreinterpretq_s16_u16(q12u16);
+ q7s16 = vqaddq_s16(q7s16, q9s16);
+ q8s16 = vqaddq_s16(q8s16, q10s16);
+ q12s16 = vqaddq_s16(q12s16, q11s16);
+
+ d29u8 = vqrshrun_n_s16(q7s16, 7);
+ d30u8 = vqrshrun_n_s16(q8s16, 7);
+ d31u8 = vqrshrun_n_s16(q12s16, 7);
+
+ // Second pass: 4x4
+ dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
+ d0s8 = vdup_lane_s8(dtmps8, 0);
+ d1s8 = vdup_lane_s8(dtmps8, 1);
+ d2s8 = vdup_lane_s8(dtmps8, 2);
+ d3s8 = vdup_lane_s8(dtmps8, 3);
+ d4s8 = vdup_lane_s8(dtmps8, 4);
+ d5s8 = vdup_lane_s8(dtmps8, 5);
+ d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+ d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+ d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+ d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+ d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+ d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+ d23u8 = vext_u8(d27u8, d28u8, 4);
+ d24u8 = vext_u8(d28u8, d29u8, 4);
+ d25u8 = vext_u8(d29u8, d30u8, 4);
+ d26u8 = vext_u8(d30u8, d31u8, 4);
+
+ q3u16 = vmull_u8(d27u8, d0u8);
+ q4u16 = vmull_u8(d28u8, d0u8);
+ q5u16 = vmull_u8(d25u8, d5u8);
+ q6u16 = vmull_u8(d26u8, d5u8);
+
+ q3u16 = vmlsl_u8(q3u16, d29u8, d4u8);
+ q4u16 = vmlsl_u8(q4u16, d30u8, d4u8);
+ q5u16 = vmlsl_u8(q5u16, d23u8, d1u8);
+ q6u16 = vmlsl_u8(q6u16, d24u8, d1u8);
+
+ q3u16 = vmlal_u8(q3u16, d28u8, d2u8);
+ q4u16 = vmlal_u8(q4u16, d29u8, d2u8);
+ q5u16 = vmlal_u8(q5u16, d24u8, d3u8);
+ q6u16 = vmlal_u8(q6u16, d25u8, d3u8);
+
+ q3s16 = vreinterpretq_s16_u16(q3u16);
+ q4s16 = vreinterpretq_s16_u16(q4u16);
+ q5s16 = vreinterpretq_s16_u16(q5u16);
+ q6s16 = vreinterpretq_s16_u16(q6u16);
+
+ q5s16 = vqaddq_s16(q5s16, q3s16);
+ q6s16 = vqaddq_s16(q6s16, q4s16);
+
+ d3u8 = vqrshrun_n_s16(q5s16, 7);
+ d4u8 = vqrshrun_n_s16(q6s16, 7);
+
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 0);
+ dst_ptr += dst_pitch;
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 1);
+ dst_ptr += dst_pitch;
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 0);
+ dst_ptr += dst_pitch;
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 1);
+ return;
+}
+
+void vp8_sixtap_predict8x4_neon(
+ unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ unsigned char *src;
+ uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8;
+ uint8x8_t d22u8, d23u8, d24u8, d25u8, d26u8;
+ uint8x8_t d27u8, d28u8, d29u8, d30u8, d31u8;
+ int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8;
+ uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16;
+ uint16x8_t q8u16, q9u16, q10u16, q11u16, q12u16;
+ int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16;
+ int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16;
+ uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8;
+
+ if (xoffset == 0) { // secondpass_filter8x4_only
+ // load second_pass filter
+ dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
+ d0s8 = vdup_lane_s8(dtmps8, 0);
+ d1s8 = vdup_lane_s8(dtmps8, 1);
+ d2s8 = vdup_lane_s8(dtmps8, 2);
+ d3s8 = vdup_lane_s8(dtmps8, 3);
+ d4s8 = vdup_lane_s8(dtmps8, 4);
+ d5s8 = vdup_lane_s8(dtmps8, 5);
+ d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+ d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+ d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+ d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+ d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+ d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+ // load src data
+ src = src_ptr - src_pixels_per_line * 2;
+ d22u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d23u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d24u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d25u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d26u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d27u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d28u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d29u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d30u8 = vld1_u8(src);
+
+ q3u16 = vmull_u8(d22u8, d0u8);
+ q4u16 = vmull_u8(d23u8, d0u8);
+ q5u16 = vmull_u8(d24u8, d0u8);
+ q6u16 = vmull_u8(d25u8, d0u8);
+
+ q3u16 = vmlsl_u8(q3u16, d23u8, d1u8);
+ q4u16 = vmlsl_u8(q4u16, d24u8, d1u8);
+ q5u16 = vmlsl_u8(q5u16, d25u8, d1u8);
+ q6u16 = vmlsl_u8(q6u16, d26u8, d1u8);
+
+ q3u16 = vmlsl_u8(q3u16, d26u8, d4u8);
+ q4u16 = vmlsl_u8(q4u16, d27u8, d4u8);
+ q5u16 = vmlsl_u8(q5u16, d28u8, d4u8);
+ q6u16 = vmlsl_u8(q6u16, d29u8, d4u8);
+
+ q3u16 = vmlal_u8(q3u16, d24u8, d2u8);
+ q4u16 = vmlal_u8(q4u16, d25u8, d2u8);
+ q5u16 = vmlal_u8(q5u16, d26u8, d2u8);
+ q6u16 = vmlal_u8(q6u16, d27u8, d2u8);
+
+ q3u16 = vmlal_u8(q3u16, d27u8, d5u8);
+ q4u16 = vmlal_u8(q4u16, d28u8, d5u8);
+ q5u16 = vmlal_u8(q5u16, d29u8, d5u8);
+ q6u16 = vmlal_u8(q6u16, d30u8, d5u8);
+
+ q7u16 = vmull_u8(d25u8, d3u8);
+ q8u16 = vmull_u8(d26u8, d3u8);
+ q9u16 = vmull_u8(d27u8, d3u8);
+ q10u16 = vmull_u8(d28u8, d3u8);
+
+ q3s16 = vreinterpretq_s16_u16(q3u16);
+ q4s16 = vreinterpretq_s16_u16(q4u16);
+ q5s16 = vreinterpretq_s16_u16(q5u16);
+ q6s16 = vreinterpretq_s16_u16(q6u16);
+ q7s16 = vreinterpretq_s16_u16(q7u16);
+ q8s16 = vreinterpretq_s16_u16(q8u16);
+ q9s16 = vreinterpretq_s16_u16(q9u16);
+ q10s16 = vreinterpretq_s16_u16(q10u16);
+
+ q7s16 = vqaddq_s16(q7s16, q3s16);
+ q8s16 = vqaddq_s16(q8s16, q4s16);
+ q9s16 = vqaddq_s16(q9s16, q5s16);
+ q10s16 = vqaddq_s16(q10s16, q6s16);
+
+ d6u8 = vqrshrun_n_s16(q7s16, 7);
+ d7u8 = vqrshrun_n_s16(q8s16, 7);
+ d8u8 = vqrshrun_n_s16(q9s16, 7);
+ d9u8 = vqrshrun_n_s16(q10s16, 7);
+
+ vst1_u8(dst_ptr, d6u8);
+ dst_ptr += dst_pitch;
+ vst1_u8(dst_ptr, d7u8);
+ dst_ptr += dst_pitch;
+ vst1_u8(dst_ptr, d8u8);
+ dst_ptr += dst_pitch;
+ vst1_u8(dst_ptr, d9u8);
+ return;
+ }
+
+ // load first_pass filter
+ dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]);
+ d0s8 = vdup_lane_s8(dtmps8, 0);
+ d1s8 = vdup_lane_s8(dtmps8, 1);
+ d2s8 = vdup_lane_s8(dtmps8, 2);
+ d3s8 = vdup_lane_s8(dtmps8, 3);
+ d4s8 = vdup_lane_s8(dtmps8, 4);
+ d5s8 = vdup_lane_s8(dtmps8, 5);
+ d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+ d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+ d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+ d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+ d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+ d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+ // First pass: output_height lines x output_width columns (9x4)
+ if (yoffset == 0) // firstpass_filter4x4_only
+ src = src_ptr - 2;
+ else
+ src = src_ptr - 2 - (src_pixels_per_line * 2);
+ q3u8 = vld1q_u8(src);
+ src += src_pixels_per_line;
+ q4u8 = vld1q_u8(src);
+ src += src_pixels_per_line;
+ q5u8 = vld1q_u8(src);
+ src += src_pixels_per_line;
+ q6u8 = vld1q_u8(src);
+
+ q7u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
+ q8u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
+ q9u16 = vmull_u8(vget_low_u8(q5u8), d0u8);
+ q10u16 = vmull_u8(vget_low_u8(q6u8), d0u8);
+
+ d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
+ d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
+ d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1);
+ d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1);
+
+ q7u16 = vmlsl_u8(q7u16, d28u8, d1u8);
+ q8u16 = vmlsl_u8(q8u16, d29u8, d1u8);
+ q9u16 = vmlsl_u8(q9u16, d30u8, d1u8);
+ q10u16 = vmlsl_u8(q10u16, d31u8, d1u8);
+
+ d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4);
+ d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4);
+ d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4);
+ d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4);
+
+ q7u16 = vmlsl_u8(q7u16, d28u8, d4u8);
+ q8u16 = vmlsl_u8(q8u16, d29u8, d4u8);
+ q9u16 = vmlsl_u8(q9u16, d30u8, d4u8);
+ q10u16 = vmlsl_u8(q10u16, d31u8, d4u8);
+
+ d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2);
+ d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2);
+ d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2);
+ d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2);
+
+ q7u16 = vmlal_u8(q7u16, d28u8, d2u8);
+ q8u16 = vmlal_u8(q8u16, d29u8, d2u8);
+ q9u16 = vmlal_u8(q9u16, d30u8, d2u8);
+ q10u16 = vmlal_u8(q10u16, d31u8, d2u8);
+
+ d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5);
+ d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5);
+ d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5);
+ d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5);
+
+ q7u16 = vmlal_u8(q7u16, d28u8, d5u8);
+ q8u16 = vmlal_u8(q8u16, d29u8, d5u8);
+ q9u16 = vmlal_u8(q9u16, d30u8, d5u8);
+ q10u16 = vmlal_u8(q10u16, d31u8, d5u8);
+
+ d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3);
+ d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3);
+ d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3);
+ d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3);
+
+ q3u16 = vmull_u8(d28u8, d3u8);
+ q4u16 = vmull_u8(d29u8, d3u8);
+ q5u16 = vmull_u8(d30u8, d3u8);
+ q6u16 = vmull_u8(d31u8, d3u8);
+
+ q3s16 = vreinterpretq_s16_u16(q3u16);
+ q4s16 = vreinterpretq_s16_u16(q4u16);
+ q5s16 = vreinterpretq_s16_u16(q5u16);
+ q6s16 = vreinterpretq_s16_u16(q6u16);
+ q7s16 = vreinterpretq_s16_u16(q7u16);
+ q8s16 = vreinterpretq_s16_u16(q8u16);
+ q9s16 = vreinterpretq_s16_u16(q9u16);
+ q10s16 = vreinterpretq_s16_u16(q10u16);
+
+ q7s16 = vqaddq_s16(q7s16, q3s16);
+ q8s16 = vqaddq_s16(q8s16, q4s16);
+ q9s16 = vqaddq_s16(q9s16, q5s16);
+ q10s16 = vqaddq_s16(q10s16, q6s16);
+
+ d22u8 = vqrshrun_n_s16(q7s16, 7);
+ d23u8 = vqrshrun_n_s16(q8s16, 7);
+ d24u8 = vqrshrun_n_s16(q9s16, 7);
+ d25u8 = vqrshrun_n_s16(q10s16, 7);
+
+ if (yoffset == 0) { // firstpass_filter8x4_only
+ vst1_u8(dst_ptr, d22u8);
+ dst_ptr += dst_pitch;
+ vst1_u8(dst_ptr, d23u8);
+ dst_ptr += dst_pitch;
+ vst1_u8(dst_ptr, d24u8);
+ dst_ptr += dst_pitch;
+ vst1_u8(dst_ptr, d25u8);
+ return;
+ }
+
+ // First Pass on rest 5-line data
+ src += src_pixels_per_line;
+ q3u8 = vld1q_u8(src);
+ src += src_pixels_per_line;
+ q4u8 = vld1q_u8(src);
+ src += src_pixels_per_line;
+ q5u8 = vld1q_u8(src);
+ src += src_pixels_per_line;
+ q6u8 = vld1q_u8(src);
+ src += src_pixels_per_line;
+ q7u8 = vld1q_u8(src);
+
+ q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
+ q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
+ q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8);
+ q11u16 = vmull_u8(vget_low_u8(q6u8), d0u8);
+ q12u16 = vmull_u8(vget_low_u8(q7u8), d0u8);
+
+ d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
+ d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
+ d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1);
+ d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1);
+ d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 1);
+
+ q8u16 = vmlsl_u8(q8u16, d27u8, d1u8);
+ q9u16 = vmlsl_u8(q9u16, d28u8, d1u8);
+ q10u16 = vmlsl_u8(q10u16, d29u8, d1u8);
+ q11u16 = vmlsl_u8(q11u16, d30u8, d1u8);
+ q12u16 = vmlsl_u8(q12u16, d31u8, d1u8);
+
+ d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4);
+ d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4);
+ d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4);
+ d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4);
+ d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 4);
+
+ q8u16 = vmlsl_u8(q8u16, d27u8, d4u8);
+ q9u16 = vmlsl_u8(q9u16, d28u8, d4u8);
+ q10u16 = vmlsl_u8(q10u16, d29u8, d4u8);
+ q11u16 = vmlsl_u8(q11u16, d30u8, d4u8);
+ q12u16 = vmlsl_u8(q12u16, d31u8, d4u8);
+
+ d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2);
+ d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2);
+ d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2);
+ d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2);
+ d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 2);
+
+ q8u16 = vmlal_u8(q8u16, d27u8, d2u8);
+ q9u16 = vmlal_u8(q9u16, d28u8, d2u8);
+ q10u16 = vmlal_u8(q10u16, d29u8, d2u8);
+ q11u16 = vmlal_u8(q11u16, d30u8, d2u8);
+ q12u16 = vmlal_u8(q12u16, d31u8, d2u8);
+
+ d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5);
+ d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5);
+ d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5);
+ d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5);
+ d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 5);
+
+ q8u16 = vmlal_u8(q8u16, d27u8, d5u8);
+ q9u16 = vmlal_u8(q9u16, d28u8, d5u8);
+ q10u16 = vmlal_u8(q10u16, d29u8, d5u8);
+ q11u16 = vmlal_u8(q11u16, d30u8, d5u8);
+ q12u16 = vmlal_u8(q12u16, d31u8, d5u8);
+
+ d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3);
+ d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3);
+ d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3);
+ d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3);
+ d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 3);
+
+ q3u16 = vmull_u8(d27u8, d3u8);
+ q4u16 = vmull_u8(d28u8, d3u8);
+ q5u16 = vmull_u8(d29u8, d3u8);
+ q6u16 = vmull_u8(d30u8, d3u8);
+ q7u16 = vmull_u8(d31u8, d3u8);
+
+ q3s16 = vreinterpretq_s16_u16(q3u16);
+ q4s16 = vreinterpretq_s16_u16(q4u16);
+ q5s16 = vreinterpretq_s16_u16(q5u16);
+ q6s16 = vreinterpretq_s16_u16(q6u16);
+ q7s16 = vreinterpretq_s16_u16(q7u16);
+ q8s16 = vreinterpretq_s16_u16(q8u16);
+ q9s16 = vreinterpretq_s16_u16(q9u16);
+ q10s16 = vreinterpretq_s16_u16(q10u16);
+ q11s16 = vreinterpretq_s16_u16(q11u16);
+ q12s16 = vreinterpretq_s16_u16(q12u16);
+
+ q8s16 = vqaddq_s16(q8s16, q3s16);
+ q9s16 = vqaddq_s16(q9s16, q4s16);
+ q10s16 = vqaddq_s16(q10s16, q5s16);
+ q11s16 = vqaddq_s16(q11s16, q6s16);
+ q12s16 = vqaddq_s16(q12s16, q7s16);
+
+ d26u8 = vqrshrun_n_s16(q8s16, 7);
+ d27u8 = vqrshrun_n_s16(q9s16, 7);
+ d28u8 = vqrshrun_n_s16(q10s16, 7);
+ d29u8 = vqrshrun_n_s16(q11s16, 7);
+ d30u8 = vqrshrun_n_s16(q12s16, 7);
+
+ // Second pass: 8x4
+ dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
+ d0s8 = vdup_lane_s8(dtmps8, 0);
+ d1s8 = vdup_lane_s8(dtmps8, 1);
+ d2s8 = vdup_lane_s8(dtmps8, 2);
+ d3s8 = vdup_lane_s8(dtmps8, 3);
+ d4s8 = vdup_lane_s8(dtmps8, 4);
+ d5s8 = vdup_lane_s8(dtmps8, 5);
+ d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+ d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+ d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+ d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+ d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+ d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+ q3u16 = vmull_u8(d22u8, d0u8);
+ q4u16 = vmull_u8(d23u8, d0u8);
+ q5u16 = vmull_u8(d24u8, d0u8);
+ q6u16 = vmull_u8(d25u8, d0u8);
+
+ q3u16 = vmlsl_u8(q3u16, d23u8, d1u8);
+ q4u16 = vmlsl_u8(q4u16, d24u8, d1u8);
+ q5u16 = vmlsl_u8(q5u16, d25u8, d1u8);
+ q6u16 = vmlsl_u8(q6u16, d26u8, d1u8);
+
+ q3u16 = vmlsl_u8(q3u16, d26u8, d4u8);
+ q4u16 = vmlsl_u8(q4u16, d27u8, d4u8);
+ q5u16 = vmlsl_u8(q5u16, d28u8, d4u8);
+ q6u16 = vmlsl_u8(q6u16, d29u8, d4u8);
+
+ q3u16 = vmlal_u8(q3u16, d24u8, d2u8);
+ q4u16 = vmlal_u8(q4u16, d25u8, d2u8);
+ q5u16 = vmlal_u8(q5u16, d26u8, d2u8);
+ q6u16 = vmlal_u8(q6u16, d27u8, d2u8);
+
+ q3u16 = vmlal_u8(q3u16, d27u8, d5u8);
+ q4u16 = vmlal_u8(q4u16, d28u8, d5u8);
+ q5u16 = vmlal_u8(q5u16, d29u8, d5u8);
+ q6u16 = vmlal_u8(q6u16, d30u8, d5u8);
+
+ q7u16 = vmull_u8(d25u8, d3u8);
+ q8u16 = vmull_u8(d26u8, d3u8);
+ q9u16 = vmull_u8(d27u8, d3u8);
+ q10u16 = vmull_u8(d28u8, d3u8);
+
+ q3s16 = vreinterpretq_s16_u16(q3u16);
+ q4s16 = vreinterpretq_s16_u16(q4u16);
+ q5s16 = vreinterpretq_s16_u16(q5u16);
+ q6s16 = vreinterpretq_s16_u16(q6u16);
+ q7s16 = vreinterpretq_s16_u16(q7u16);
+ q8s16 = vreinterpretq_s16_u16(q8u16);
+ q9s16 = vreinterpretq_s16_u16(q9u16);
+ q10s16 = vreinterpretq_s16_u16(q10u16);
+
+ q7s16 = vqaddq_s16(q7s16, q3s16);
+ q8s16 = vqaddq_s16(q8s16, q4s16);
+ q9s16 = vqaddq_s16(q9s16, q5s16);
+ q10s16 = vqaddq_s16(q10s16, q6s16);
+
+ d6u8 = vqrshrun_n_s16(q7s16, 7);
+ d7u8 = vqrshrun_n_s16(q8s16, 7);
+ d8u8 = vqrshrun_n_s16(q9s16, 7);
+ d9u8 = vqrshrun_n_s16(q10s16, 7);
+
+ vst1_u8(dst_ptr, d6u8);
+ dst_ptr += dst_pitch;
+ vst1_u8(dst_ptr, d7u8);
+ dst_ptr += dst_pitch;
+ vst1_u8(dst_ptr, d8u8);
+ dst_ptr += dst_pitch;
+ vst1_u8(dst_ptr, d9u8);
+ return;
+}
+
+void vp8_sixtap_predict8x8_neon(
+ unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ unsigned char *src, *tmpp;
+ unsigned char tmp[64];
+ int i;
+ uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8;
+ uint8x8_t d18u8, d19u8, d20u8, d21u8, d22u8, d23u8, d24u8, d25u8;
+ uint8x8_t d26u8, d27u8, d28u8, d29u8, d30u8, d31u8;
+ int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8;
+ uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16;
+ uint16x8_t q8u16, q9u16, q10u16, q11u16, q12u16;
+ int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16;
+ int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16;
+ uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q9u8, q10u8, q11u8, q12u8;
+
+ if (xoffset == 0) { // secondpass_filter8x8_only
+ // load second_pass filter
+ dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
+ d0s8 = vdup_lane_s8(dtmps8, 0);
+ d1s8 = vdup_lane_s8(dtmps8, 1);
+ d2s8 = vdup_lane_s8(dtmps8, 2);
+ d3s8 = vdup_lane_s8(dtmps8, 3);
+ d4s8 = vdup_lane_s8(dtmps8, 4);
+ d5s8 = vdup_lane_s8(dtmps8, 5);
+ d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+ d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+ d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+ d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+ d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+ d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+ // load src data
+ src = src_ptr - src_pixels_per_line * 2;
+ d18u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d19u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d20u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d21u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d22u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d23u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d24u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d25u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d26u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d27u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d28u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d29u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d30u8 = vld1_u8(src);
+
+ for (i = 2; i > 0; i--) {
+ q3u16 = vmull_u8(d18u8, d0u8);
+ q4u16 = vmull_u8(d19u8, d0u8);
+ q5u16 = vmull_u8(d20u8, d0u8);
+ q6u16 = vmull_u8(d21u8, d0u8);
+
+ q3u16 = vmlsl_u8(q3u16, d19u8, d1u8);
+ q4u16 = vmlsl_u8(q4u16, d20u8, d1u8);
+ q5u16 = vmlsl_u8(q5u16, d21u8, d1u8);
+ q6u16 = vmlsl_u8(q6u16, d22u8, d1u8);
+
+ q3u16 = vmlsl_u8(q3u16, d22u8, d4u8);
+ q4u16 = vmlsl_u8(q4u16, d23u8, d4u8);
+ q5u16 = vmlsl_u8(q5u16, d24u8, d4u8);
+ q6u16 = vmlsl_u8(q6u16, d25u8, d4u8);
+
+ q3u16 = vmlal_u8(q3u16, d20u8, d2u8);
+ q4u16 = vmlal_u8(q4u16, d21u8, d2u8);
+ q5u16 = vmlal_u8(q5u16, d22u8, d2u8);
+ q6u16 = vmlal_u8(q6u16, d23u8, d2u8);
+
+ q3u16 = vmlal_u8(q3u16, d23u8, d5u8);
+ q4u16 = vmlal_u8(q4u16, d24u8, d5u8);
+ q5u16 = vmlal_u8(q5u16, d25u8, d5u8);
+ q6u16 = vmlal_u8(q6u16, d26u8, d5u8);
+
+ q7u16 = vmull_u8(d21u8, d3u8);
+ q8u16 = vmull_u8(d22u8, d3u8);
+ q9u16 = vmull_u8(d23u8, d3u8);
+ q10u16 = vmull_u8(d24u8, d3u8);
+
+ q3s16 = vreinterpretq_s16_u16(q3u16);
+ q4s16 = vreinterpretq_s16_u16(q4u16);
+ q5s16 = vreinterpretq_s16_u16(q5u16);
+ q6s16 = vreinterpretq_s16_u16(q6u16);
+ q7s16 = vreinterpretq_s16_u16(q7u16);
+ q8s16 = vreinterpretq_s16_u16(q8u16);
+ q9s16 = vreinterpretq_s16_u16(q9u16);
+ q10s16 = vreinterpretq_s16_u16(q10u16);
+
+ q7s16 = vqaddq_s16(q7s16, q3s16);
+ q8s16 = vqaddq_s16(q8s16, q4s16);
+ q9s16 = vqaddq_s16(q9s16, q5s16);
+ q10s16 = vqaddq_s16(q10s16, q6s16);
+
+ d6u8 = vqrshrun_n_s16(q7s16, 7);
+ d7u8 = vqrshrun_n_s16(q8s16, 7);
+ d8u8 = vqrshrun_n_s16(q9s16, 7);
+ d9u8 = vqrshrun_n_s16(q10s16, 7);
+
+ d18u8 = d22u8;
+ d19u8 = d23u8;
+ d20u8 = d24u8;
+ d21u8 = d25u8;
+ d22u8 = d26u8;
+ d23u8 = d27u8;
+ d24u8 = d28u8;
+ d25u8 = d29u8;
+ d26u8 = d30u8;
+
+ vst1_u8(dst_ptr, d6u8);
+ dst_ptr += dst_pitch;
+ vst1_u8(dst_ptr, d7u8);
+ dst_ptr += dst_pitch;
+ vst1_u8(dst_ptr, d8u8);
+ dst_ptr += dst_pitch;
+ vst1_u8(dst_ptr, d9u8);
+ dst_ptr += dst_pitch;
+ }
+ return;
+ }
+
+ // load first_pass filter
+ dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]);
+ d0s8 = vdup_lane_s8(dtmps8, 0);
+ d1s8 = vdup_lane_s8(dtmps8, 1);
+ d2s8 = vdup_lane_s8(dtmps8, 2);
+ d3s8 = vdup_lane_s8(dtmps8, 3);
+ d4s8 = vdup_lane_s8(dtmps8, 4);
+ d5s8 = vdup_lane_s8(dtmps8, 5);
+ d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+ d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+ d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+ d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+ d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+ d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+ // First pass: output_height lines x output_width columns (9x4)
+ if (yoffset == 0) // firstpass_filter4x4_only
+ src = src_ptr - 2;
+ else
+ src = src_ptr - 2 - (src_pixels_per_line * 2);
+
+ tmpp = tmp;
+ for (i = 2; i > 0; i--) {
+ q3u8 = vld1q_u8(src);
+ src += src_pixels_per_line;
+ q4u8 = vld1q_u8(src);
+ src += src_pixels_per_line;
+ q5u8 = vld1q_u8(src);
+ src += src_pixels_per_line;
+ q6u8 = vld1q_u8(src);
+ src += src_pixels_per_line;
+
+ __builtin_prefetch(src);
+ __builtin_prefetch(src + src_pixels_per_line);
+ __builtin_prefetch(src + src_pixels_per_line * 2);
+
+ q7u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
+ q8u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
+ q9u16 = vmull_u8(vget_low_u8(q5u8), d0u8);
+ q10u16 = vmull_u8(vget_low_u8(q6u8), d0u8);
+
+ d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
+ d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
+ d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1);
+ d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1);
+
+ q7u16 = vmlsl_u8(q7u16, d28u8, d1u8);
+ q8u16 = vmlsl_u8(q8u16, d29u8, d1u8);
+ q9u16 = vmlsl_u8(q9u16, d30u8, d1u8);
+ q10u16 = vmlsl_u8(q10u16, d31u8, d1u8);
+
+ d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4);
+ d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4);
+ d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4);
+ d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4);
+
+ q7u16 = vmlsl_u8(q7u16, d28u8, d4u8);
+ q8u16 = vmlsl_u8(q8u16, d29u8, d4u8);
+ q9u16 = vmlsl_u8(q9u16, d30u8, d4u8);
+ q10u16 = vmlsl_u8(q10u16, d31u8, d4u8);
+
+ d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2);
+ d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2);
+ d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2);
+ d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2);
+
+ q7u16 = vmlal_u8(q7u16, d28u8, d2u8);
+ q8u16 = vmlal_u8(q8u16, d29u8, d2u8);
+ q9u16 = vmlal_u8(q9u16, d30u8, d2u8);
+ q10u16 = vmlal_u8(q10u16, d31u8, d2u8);
+
+ d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5);
+ d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5);
+ d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5);
+ d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5);
+
+ q7u16 = vmlal_u8(q7u16, d28u8, d5u8);
+ q8u16 = vmlal_u8(q8u16, d29u8, d5u8);
+ q9u16 = vmlal_u8(q9u16, d30u8, d5u8);
+ q10u16 = vmlal_u8(q10u16, d31u8, d5u8);
+
+ d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3);
+ d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3);
+ d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3);
+ d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3);
+
+ q3u16 = vmull_u8(d28u8, d3u8);
+ q4u16 = vmull_u8(d29u8, d3u8);
+ q5u16 = vmull_u8(d30u8, d3u8);
+ q6u16 = vmull_u8(d31u8, d3u8);
+
+ q3s16 = vreinterpretq_s16_u16(q3u16);
+ q4s16 = vreinterpretq_s16_u16(q4u16);
+ q5s16 = vreinterpretq_s16_u16(q5u16);
+ q6s16 = vreinterpretq_s16_u16(q6u16);
+ q7s16 = vreinterpretq_s16_u16(q7u16);
+ q8s16 = vreinterpretq_s16_u16(q8u16);
+ q9s16 = vreinterpretq_s16_u16(q9u16);
+ q10s16 = vreinterpretq_s16_u16(q10u16);
+
+ q7s16 = vqaddq_s16(q7s16, q3s16);
+ q8s16 = vqaddq_s16(q8s16, q4s16);
+ q9s16 = vqaddq_s16(q9s16, q5s16);
+ q10s16 = vqaddq_s16(q10s16, q6s16);
+
+ d22u8 = vqrshrun_n_s16(q7s16, 7);
+ d23u8 = vqrshrun_n_s16(q8s16, 7);
+ d24u8 = vqrshrun_n_s16(q9s16, 7);
+ d25u8 = vqrshrun_n_s16(q10s16, 7);
+
+ if (yoffset == 0) { // firstpass_filter8x4_only
+ vst1_u8(dst_ptr, d22u8);
+ dst_ptr += dst_pitch;
+ vst1_u8(dst_ptr, d23u8);
+ dst_ptr += dst_pitch;
+ vst1_u8(dst_ptr, d24u8);
+ dst_ptr += dst_pitch;
+ vst1_u8(dst_ptr, d25u8);
+ dst_ptr += dst_pitch;
+ } else {
+ vst1_u8(tmpp, d22u8);
+ tmpp += 8;
+ vst1_u8(tmpp, d23u8);
+ tmpp += 8;
+ vst1_u8(tmpp, d24u8);
+ tmpp += 8;
+ vst1_u8(tmpp, d25u8);
+ tmpp += 8;
+ }
+ }
+ if (yoffset == 0)
+ return;
+
+ // First Pass on rest 5-line data
+ q3u8 = vld1q_u8(src);
+ src += src_pixels_per_line;
+ q4u8 = vld1q_u8(src);
+ src += src_pixels_per_line;
+ q5u8 = vld1q_u8(src);
+ src += src_pixels_per_line;
+ q6u8 = vld1q_u8(src);
+ src += src_pixels_per_line;
+ q7u8 = vld1q_u8(src);
+
+ q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
+ q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
+ q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8);
+ q11u16 = vmull_u8(vget_low_u8(q6u8), d0u8);
+ q12u16 = vmull_u8(vget_low_u8(q7u8), d0u8);
+
+ d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
+ d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
+ d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1);
+ d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1);
+ d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 1);
+
+ q8u16 = vmlsl_u8(q8u16, d27u8, d1u8);
+ q9u16 = vmlsl_u8(q9u16, d28u8, d1u8);
+ q10u16 = vmlsl_u8(q10u16, d29u8, d1u8);
+ q11u16 = vmlsl_u8(q11u16, d30u8, d1u8);
+ q12u16 = vmlsl_u8(q12u16, d31u8, d1u8);
+
+ d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4);
+ d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4);
+ d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4);
+ d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4);
+ d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 4);
+
+ q8u16 = vmlsl_u8(q8u16, d27u8, d4u8);
+ q9u16 = vmlsl_u8(q9u16, d28u8, d4u8);
+ q10u16 = vmlsl_u8(q10u16, d29u8, d4u8);
+ q11u16 = vmlsl_u8(q11u16, d30u8, d4u8);
+ q12u16 = vmlsl_u8(q12u16, d31u8, d4u8);
+
+ d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2);
+ d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2);
+ d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2);
+ d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2);
+ d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 2);
+
+ q8u16 = vmlal_u8(q8u16, d27u8, d2u8);
+ q9u16 = vmlal_u8(q9u16, d28u8, d2u8);
+ q10u16 = vmlal_u8(q10u16, d29u8, d2u8);
+ q11u16 = vmlal_u8(q11u16, d30u8, d2u8);
+ q12u16 = vmlal_u8(q12u16, d31u8, d2u8);
+
+ d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5);
+ d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5);
+ d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5);
+ d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5);
+ d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 5);
+
+ q8u16 = vmlal_u8(q8u16, d27u8, d5u8);
+ q9u16 = vmlal_u8(q9u16, d28u8, d5u8);
+ q10u16 = vmlal_u8(q10u16, d29u8, d5u8);
+ q11u16 = vmlal_u8(q11u16, d30u8, d5u8);
+ q12u16 = vmlal_u8(q12u16, d31u8, d5u8);
+
+ d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3);
+ d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3);
+ d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3);
+ d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3);
+ d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 3);
+
+ q3u16 = vmull_u8(d27u8, d3u8);
+ q4u16 = vmull_u8(d28u8, d3u8);
+ q5u16 = vmull_u8(d29u8, d3u8);
+ q6u16 = vmull_u8(d30u8, d3u8);
+ q7u16 = vmull_u8(d31u8, d3u8);
+
+ q3s16 = vreinterpretq_s16_u16(q3u16);
+ q4s16 = vreinterpretq_s16_u16(q4u16);
+ q5s16 = vreinterpretq_s16_u16(q5u16);
+ q6s16 = vreinterpretq_s16_u16(q6u16);
+ q7s16 = vreinterpretq_s16_u16(q7u16);
+ q8s16 = vreinterpretq_s16_u16(q8u16);
+ q9s16 = vreinterpretq_s16_u16(q9u16);
+ q10s16 = vreinterpretq_s16_u16(q10u16);
+ q11s16 = vreinterpretq_s16_u16(q11u16);
+ q12s16 = vreinterpretq_s16_u16(q12u16);
+
+ q8s16 = vqaddq_s16(q8s16, q3s16);
+ q9s16 = vqaddq_s16(q9s16, q4s16);
+ q10s16 = vqaddq_s16(q10s16, q5s16);
+ q11s16 = vqaddq_s16(q11s16, q6s16);
+ q12s16 = vqaddq_s16(q12s16, q7s16);
+
+ d26u8 = vqrshrun_n_s16(q8s16, 7);
+ d27u8 = vqrshrun_n_s16(q9s16, 7);
+ d28u8 = vqrshrun_n_s16(q10s16, 7);
+ d29u8 = vqrshrun_n_s16(q11s16, 7);
+ d30u8 = vqrshrun_n_s16(q12s16, 7);
+
+ // Second pass: 8x8
+ dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
+ d0s8 = vdup_lane_s8(dtmps8, 0);
+ d1s8 = vdup_lane_s8(dtmps8, 1);
+ d2s8 = vdup_lane_s8(dtmps8, 2);
+ d3s8 = vdup_lane_s8(dtmps8, 3);
+ d4s8 = vdup_lane_s8(dtmps8, 4);
+ d5s8 = vdup_lane_s8(dtmps8, 5);
+ d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+ d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+ d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+ d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+ d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+ d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+ tmpp = tmp;
+ q9u8 = vld1q_u8(tmpp);
+ tmpp += 16;
+ q10u8 = vld1q_u8(tmpp);
+ tmpp += 16;
+ q11u8 = vld1q_u8(tmpp);
+ tmpp += 16;
+ q12u8 = vld1q_u8(tmpp);
+
+ d18u8 = vget_low_u8(q9u8);
+ d19u8 = vget_high_u8(q9u8);
+ d20u8 = vget_low_u8(q10u8);
+ d21u8 = vget_high_u8(q10u8);
+ d22u8 = vget_low_u8(q11u8);
+ d23u8 = vget_high_u8(q11u8);
+ d24u8 = vget_low_u8(q12u8);
+ d25u8 = vget_high_u8(q12u8);
+
+ for (i = 2; i > 0; i--) {
+ q3u16 = vmull_u8(d18u8, d0u8);
+ q4u16 = vmull_u8(d19u8, d0u8);
+ q5u16 = vmull_u8(d20u8, d0u8);
+ q6u16 = vmull_u8(d21u8, d0u8);
+
+ q3u16 = vmlsl_u8(q3u16, d19u8, d1u8);
+ q4u16 = vmlsl_u8(q4u16, d20u8, d1u8);
+ q5u16 = vmlsl_u8(q5u16, d21u8, d1u8);
+ q6u16 = vmlsl_u8(q6u16, d22u8, d1u8);
+
+ q3u16 = vmlsl_u8(q3u16, d22u8, d4u8);
+ q4u16 = vmlsl_u8(q4u16, d23u8, d4u8);
+ q5u16 = vmlsl_u8(q5u16, d24u8, d4u8);
+ q6u16 = vmlsl_u8(q6u16, d25u8, d4u8);
+
+ q3u16 = vmlal_u8(q3u16, d20u8, d2u8);
+ q4u16 = vmlal_u8(q4u16, d21u8, d2u8);
+ q5u16 = vmlal_u8(q5u16, d22u8, d2u8);
+ q6u16 = vmlal_u8(q6u16, d23u8, d2u8);
+
+ q3u16 = vmlal_u8(q3u16, d23u8, d5u8);
+ q4u16 = vmlal_u8(q4u16, d24u8, d5u8);
+ q5u16 = vmlal_u8(q5u16, d25u8, d5u8);
+ q6u16 = vmlal_u8(q6u16, d26u8, d5u8);
+
+ q7u16 = vmull_u8(d21u8, d3u8);
+ q8u16 = vmull_u8(d22u8, d3u8);
+ q9u16 = vmull_u8(d23u8, d3u8);
+ q10u16 = vmull_u8(d24u8, d3u8);
+
+ q3s16 = vreinterpretq_s16_u16(q3u16);
+ q4s16 = vreinterpretq_s16_u16(q4u16);
+ q5s16 = vreinterpretq_s16_u16(q5u16);
+ q6s16 = vreinterpretq_s16_u16(q6u16);
+ q7s16 = vreinterpretq_s16_u16(q7u16);
+ q8s16 = vreinterpretq_s16_u16(q8u16);
+ q9s16 = vreinterpretq_s16_u16(q9u16);
+ q10s16 = vreinterpretq_s16_u16(q10u16);
+
+ q7s16 = vqaddq_s16(q7s16, q3s16);
+ q8s16 = vqaddq_s16(q8s16, q4s16);
+ q9s16 = vqaddq_s16(q9s16, q5s16);
+ q10s16 = vqaddq_s16(q10s16, q6s16);
+
+ d6u8 = vqrshrun_n_s16(q7s16, 7);
+ d7u8 = vqrshrun_n_s16(q8s16, 7);
+ d8u8 = vqrshrun_n_s16(q9s16, 7);
+ d9u8 = vqrshrun_n_s16(q10s16, 7);
+
+ d18u8 = d22u8;
+ d19u8 = d23u8;
+ d20u8 = d24u8;
+ d21u8 = d25u8;
+ d22u8 = d26u8;
+ d23u8 = d27u8;
+ d24u8 = d28u8;
+ d25u8 = d29u8;
+ d26u8 = d30u8;
+
+ vst1_u8(dst_ptr, d6u8);
+ dst_ptr += dst_pitch;
+ vst1_u8(dst_ptr, d7u8);
+ dst_ptr += dst_pitch;
+ vst1_u8(dst_ptr, d8u8);
+ dst_ptr += dst_pitch;
+ vst1_u8(dst_ptr, d9u8);
+ dst_ptr += dst_pitch;
+ }
+ return;
+}
+
+void vp8_sixtap_predict16x16_neon(
+ unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ unsigned char *src, *src_tmp, *dst, *tmpp;
+ unsigned char tmp[336];
+ int i, j;
+ uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8;
+ uint8x8_t d10u8, d11u8, d12u8, d13u8, d14u8, d15u8, d18u8, d19u8;
+ uint8x8_t d20u8, d21u8, d22u8, d23u8, d24u8, d25u8, d26u8, d27u8;
+ uint8x8_t d28u8, d29u8, d30u8, d31u8;
+ int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8;
+ uint8x16_t q3u8, q4u8;
+ uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16, q8u16, q9u16, q10u16;
+ uint16x8_t q11u16, q12u16, q13u16, q15u16;
+ int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16, q8s16, q9s16, q10s16;
+ int16x8_t q11s16, q12s16, q13s16, q15s16;
+
+ if (xoffset == 0) { // secondpass_filter8x8_only
+ // load second_pass filter
+ dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
+ d0s8 = vdup_lane_s8(dtmps8, 0);
+ d1s8 = vdup_lane_s8(dtmps8, 1);
+ d2s8 = vdup_lane_s8(dtmps8, 2);
+ d3s8 = vdup_lane_s8(dtmps8, 3);
+ d4s8 = vdup_lane_s8(dtmps8, 4);
+ d5s8 = vdup_lane_s8(dtmps8, 5);
+ d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+ d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+ d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+ d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+ d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+ d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+ // load src data
+ src_tmp = src_ptr - src_pixels_per_line * 2;
+ for (i = 0; i < 2; i++) {
+ src = src_tmp + i * 8;
+ dst = dst_ptr + i * 8;
+ d18u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d19u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d20u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d21u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d22u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ for (j = 0; j < 4; j++) {
+ d23u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d24u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d25u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+ d26u8 = vld1_u8(src);
+ src += src_pixels_per_line;
+
+ q3u16 = vmull_u8(d18u8, d0u8);
+ q4u16 = vmull_u8(d19u8, d0u8);
+ q5u16 = vmull_u8(d20u8, d0u8);
+ q6u16 = vmull_u8(d21u8, d0u8);
+
+ q3u16 = vmlsl_u8(q3u16, d19u8, d1u8);
+ q4u16 = vmlsl_u8(q4u16, d20u8, d1u8);
+ q5u16 = vmlsl_u8(q5u16, d21u8, d1u8);
+ q6u16 = vmlsl_u8(q6u16, d22u8, d1u8);
+
+ q3u16 = vmlsl_u8(q3u16, d22u8, d4u8);
+ q4u16 = vmlsl_u8(q4u16, d23u8, d4u8);
+ q5u16 = vmlsl_u8(q5u16, d24u8, d4u8);
+ q6u16 = vmlsl_u8(q6u16, d25u8, d4u8);
+
+ q3u16 = vmlal_u8(q3u16, d20u8, d2u8);
+ q4u16 = vmlal_u8(q4u16, d21u8, d2u8);
+ q5u16 = vmlal_u8(q5u16, d22u8, d2u8);
+ q6u16 = vmlal_u8(q6u16, d23u8, d2u8);
+
+ q3u16 = vmlal_u8(q3u16, d23u8, d5u8);
+ q4u16 = vmlal_u8(q4u16, d24u8, d5u8);
+ q5u16 = vmlal_u8(q5u16, d25u8, d5u8);
+ q6u16 = vmlal_u8(q6u16, d26u8, d5u8);
+
+ q7u16 = vmull_u8(d21u8, d3u8);
+ q8u16 = vmull_u8(d22u8, d3u8);
+ q9u16 = vmull_u8(d23u8, d3u8);
+ q10u16 = vmull_u8(d24u8, d3u8);
+
+ q3s16 = vreinterpretq_s16_u16(q3u16);
+ q4s16 = vreinterpretq_s16_u16(q4u16);
+ q5s16 = vreinterpretq_s16_u16(q5u16);
+ q6s16 = vreinterpretq_s16_u16(q6u16);
+ q7s16 = vreinterpretq_s16_u16(q7u16);
+ q8s16 = vreinterpretq_s16_u16(q8u16);
+ q9s16 = vreinterpretq_s16_u16(q9u16);
+ q10s16 = vreinterpretq_s16_u16(q10u16);
+
+ q7s16 = vqaddq_s16(q7s16, q3s16);
+ q8s16 = vqaddq_s16(q8s16, q4s16);
+ q9s16 = vqaddq_s16(q9s16, q5s16);
+ q10s16 = vqaddq_s16(q10s16, q6s16);
+
+ d6u8 = vqrshrun_n_s16(q7s16, 7);
+ d7u8 = vqrshrun_n_s16(q8s16, 7);
+ d8u8 = vqrshrun_n_s16(q9s16, 7);
+ d9u8 = vqrshrun_n_s16(q10s16, 7);
+
+ d18u8 = d22u8;
+ d19u8 = d23u8;
+ d20u8 = d24u8;
+ d21u8 = d25u8;
+ d22u8 = d26u8;
+
+ vst1_u8(dst, d6u8);
+ dst += dst_pitch;
+ vst1_u8(dst, d7u8);
+ dst += dst_pitch;
+ vst1_u8(dst, d8u8);
+ dst += dst_pitch;
+ vst1_u8(dst, d9u8);
+ dst += dst_pitch;
+ }
+ }
+ return;
+ }
+
+ // load first_pass filter
+ dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]);
+ d0s8 = vdup_lane_s8(dtmps8, 0);
+ d1s8 = vdup_lane_s8(dtmps8, 1);
+ d2s8 = vdup_lane_s8(dtmps8, 2);
+ d3s8 = vdup_lane_s8(dtmps8, 3);
+ d4s8 = vdup_lane_s8(dtmps8, 4);
+ d5s8 = vdup_lane_s8(dtmps8, 5);
+ d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+ d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+ d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+ d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+ d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+ d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+ // First pass: output_height lines x output_width columns (9x4)
+ if (yoffset == 0) { // firstpass_filter4x4_only
+ src = src_ptr - 2;
+ dst = dst_ptr;
+ for (i = 0; i < 8; i++) {
+ d6u8 = vld1_u8(src);
+ d7u8 = vld1_u8(src + 8);
+ d8u8 = vld1_u8(src + 16);
+ src += src_pixels_per_line;
+ d9u8 = vld1_u8(src);
+ d10u8 = vld1_u8(src + 8);
+ d11u8 = vld1_u8(src + 16);
+ src += src_pixels_per_line;
+
+ __builtin_prefetch(src);
+ __builtin_prefetch(src + src_pixels_per_line);
+
+ q6u16 = vmull_u8(d6u8, d0u8);
+ q7u16 = vmull_u8(d7u8, d0u8);
+ q8u16 = vmull_u8(d9u8, d0u8);
+ q9u16 = vmull_u8(d10u8, d0u8);
+
+ d20u8 = vext_u8(d6u8, d7u8, 1);
+ d21u8 = vext_u8(d9u8, d10u8, 1);
+ d22u8 = vext_u8(d7u8, d8u8, 1);
+ d23u8 = vext_u8(d10u8, d11u8, 1);
+ d24u8 = vext_u8(d6u8, d7u8, 4);
+ d25u8 = vext_u8(d9u8, d10u8, 4);
+ d26u8 = vext_u8(d7u8, d8u8, 4);
+ d27u8 = vext_u8(d10u8, d11u8, 4);
+ d28u8 = vext_u8(d6u8, d7u8, 5);
+ d29u8 = vext_u8(d9u8, d10u8, 5);
+
+ q6u16 = vmlsl_u8(q6u16, d20u8, d1u8);
+ q8u16 = vmlsl_u8(q8u16, d21u8, d1u8);
+ q7u16 = vmlsl_u8(q7u16, d22u8, d1u8);
+ q9u16 = vmlsl_u8(q9u16, d23u8, d1u8);
+ q6u16 = vmlsl_u8(q6u16, d24u8, d4u8);
+ q8u16 = vmlsl_u8(q8u16, d25u8, d4u8);
+ q7u16 = vmlsl_u8(q7u16, d26u8, d4u8);
+ q9u16 = vmlsl_u8(q9u16, d27u8, d4u8);
+ q6u16 = vmlal_u8(q6u16, d28u8, d5u8);
+ q8u16 = vmlal_u8(q8u16, d29u8, d5u8);
+
+ d20u8 = vext_u8(d7u8, d8u8, 5);
+ d21u8 = vext_u8(d10u8, d11u8, 5);
+ d22u8 = vext_u8(d6u8, d7u8, 2);
+ d23u8 = vext_u8(d9u8, d10u8, 2);
+ d24u8 = vext_u8(d7u8, d8u8, 2);
+ d25u8 = vext_u8(d10u8, d11u8, 2);
+ d26u8 = vext_u8(d6u8, d7u8, 3);
+ d27u8 = vext_u8(d9u8, d10u8, 3);
+ d28u8 = vext_u8(d7u8, d8u8, 3);
+ d29u8 = vext_u8(d10u8, d11u8, 3);
+
+ q7u16 = vmlal_u8(q7u16, d20u8, d5u8);
+ q9u16 = vmlal_u8(q9u16, d21u8, d5u8);
+ q6u16 = vmlal_u8(q6u16, d22u8, d2u8);
+ q8u16 = vmlal_u8(q8u16, d23u8, d2u8);
+ q7u16 = vmlal_u8(q7u16, d24u8, d2u8);
+ q9u16 = vmlal_u8(q9u16, d25u8, d2u8);
+
+ q10u16 = vmull_u8(d26u8, d3u8);
+ q11u16 = vmull_u8(d27u8, d3u8);
+ q12u16 = vmull_u8(d28u8, d3u8);
+ q15u16 = vmull_u8(d29u8, d3u8);
+
+ q6s16 = vreinterpretq_s16_u16(q6u16);
+ q7s16 = vreinterpretq_s16_u16(q7u16);
+ q8s16 = vreinterpretq_s16_u16(q8u16);
+ q9s16 = vreinterpretq_s16_u16(q9u16);
+ q10s16 = vreinterpretq_s16_u16(q10u16);
+ q11s16 = vreinterpretq_s16_u16(q11u16);
+ q12s16 = vreinterpretq_s16_u16(q12u16);
+ q15s16 = vreinterpretq_s16_u16(q15u16);
+
+ q6s16 = vqaddq_s16(q6s16, q10s16);
+ q8s16 = vqaddq_s16(q8s16, q11s16);
+ q7s16 = vqaddq_s16(q7s16, q12s16);
+ q9s16 = vqaddq_s16(q9s16, q15s16);
+
+ d6u8 = vqrshrun_n_s16(q6s16, 7);
+ d7u8 = vqrshrun_n_s16(q7s16, 7);
+ d8u8 = vqrshrun_n_s16(q8s16, 7);
+ d9u8 = vqrshrun_n_s16(q9s16, 7);
+
+ q3u8 = vcombine_u8(d6u8, d7u8);
+ q4u8 = vcombine_u8(d8u8, d9u8);
+ vst1q_u8(dst, q3u8);
+ dst += dst_pitch;
+ vst1q_u8(dst, q4u8);
+ dst += dst_pitch;
+ }
+ return;
+ }
+
+ src = src_ptr - 2 - src_pixels_per_line * 2;
+ tmpp = tmp;
+ for (i = 0; i < 7; i++) {
+ d6u8 = vld1_u8(src);
+ d7u8 = vld1_u8(src + 8);
+ d8u8 = vld1_u8(src + 16);
+ src += src_pixels_per_line;
+ d9u8 = vld1_u8(src);
+ d10u8 = vld1_u8(src + 8);
+ d11u8 = vld1_u8(src + 16);
+ src += src_pixels_per_line;
+ d12u8 = vld1_u8(src);
+ d13u8 = vld1_u8(src + 8);
+ d14u8 = vld1_u8(src + 16);
+ src += src_pixels_per_line;
+
+ __builtin_prefetch(src);
+ __builtin_prefetch(src + src_pixels_per_line);
+ __builtin_prefetch(src + src_pixels_per_line * 2);
+
+ q8u16 = vmull_u8(d6u8, d0u8);
+ q9u16 = vmull_u8(d7u8, d0u8);
+ q10u16 = vmull_u8(d9u8, d0u8);
+ q11u16 = vmull_u8(d10u8, d0u8);
+ q12u16 = vmull_u8(d12u8, d0u8);
+ q13u16 = vmull_u8(d13u8, d0u8);
+
+ d28u8 = vext_u8(d6u8, d7u8, 1);
+ d29u8 = vext_u8(d9u8, d10u8, 1);
+ d30u8 = vext_u8(d12u8, d13u8, 1);
+ q8u16 = vmlsl_u8(q8u16, d28u8, d1u8);
+ q10u16 = vmlsl_u8(q10u16, d29u8, d1u8);
+ q12u16 = vmlsl_u8(q12u16, d30u8, d1u8);
+ d28u8 = vext_u8(d7u8, d8u8, 1);
+ d29u8 = vext_u8(d10u8, d11u8, 1);
+ d30u8 = vext_u8(d13u8, d14u8, 1);
+ q9u16 = vmlsl_u8(q9u16, d28u8, d1u8);
+ q11u16 = vmlsl_u8(q11u16, d29u8, d1u8);
+ q13u16 = vmlsl_u8(q13u16, d30u8, d1u8);
+
+ d28u8 = vext_u8(d6u8, d7u8, 4);
+ d29u8 = vext_u8(d9u8, d10u8, 4);
+ d30u8 = vext_u8(d12u8, d13u8, 4);
+ q8u16 = vmlsl_u8(q8u16, d28u8, d4u8);
+ q10u16 = vmlsl_u8(q10u16, d29u8, d4u8);
+ q12u16 = vmlsl_u8(q12u16, d30u8, d4u8);
+ d28u8 = vext_u8(d7u8, d8u8, 4);
+ d29u8 = vext_u8(d10u8, d11u8, 4);
+ d30u8 = vext_u8(d13u8, d14u8, 4);
+ q9u16 = vmlsl_u8(q9u16, d28u8, d4u8);
+ q11u16 = vmlsl_u8(q11u16, d29u8, d4u8);
+ q13u16 = vmlsl_u8(q13u16, d30u8, d4u8);
+
+ d28u8 = vext_u8(d6u8, d7u8, 5);
+ d29u8 = vext_u8(d9u8, d10u8, 5);
+ d30u8 = vext_u8(d12u8, d13u8, 5);
+ q8u16 = vmlal_u8(q8u16, d28u8, d5u8);
+ q10u16 = vmlal_u8(q10u16, d29u8, d5u8);
+ q12u16 = vmlal_u8(q12u16, d30u8, d5u8);
+ d28u8 = vext_u8(d7u8, d8u8, 5);
+ d29u8 = vext_u8(d10u8, d11u8, 5);
+ d30u8 = vext_u8(d13u8, d14u8, 5);
+ q9u16 = vmlal_u8(q9u16, d28u8, d5u8);
+ q11u16 = vmlal_u8(q11u16, d29u8, d5u8);
+ q13u16 = vmlal_u8(q13u16, d30u8, d5u8);
+
+ d28u8 = vext_u8(d6u8, d7u8, 2);
+ d29u8 = vext_u8(d9u8, d10u8, 2);
+ d30u8 = vext_u8(d12u8, d13u8, 2);
+ q8u16 = vmlal_u8(q8u16, d28u8, d2u8);
+ q10u16 = vmlal_u8(q10u16, d29u8, d2u8);
+ q12u16 = vmlal_u8(q12u16, d30u8, d2u8);
+ d28u8 = vext_u8(d7u8, d8u8, 2);
+ d29u8 = vext_u8(d10u8, d11u8, 2);
+ d30u8 = vext_u8(d13u8, d14u8, 2);
+ q9u16 = vmlal_u8(q9u16, d28u8, d2u8);
+ q11u16 = vmlal_u8(q11u16, d29u8, d2u8);
+ q13u16 = vmlal_u8(q13u16, d30u8, d2u8);
+
+ d28u8 = vext_u8(d6u8, d7u8, 3);
+ d29u8 = vext_u8(d9u8, d10u8, 3);
+ d30u8 = vext_u8(d12u8, d13u8, 3);
+ d15u8 = vext_u8(d7u8, d8u8, 3);
+ d31u8 = vext_u8(d10u8, d11u8, 3);
+ d6u8 = vext_u8(d13u8, d14u8, 3);
+ q4u16 = vmull_u8(d28u8, d3u8);
+ q5u16 = vmull_u8(d29u8, d3u8);
+ q6u16 = vmull_u8(d30u8, d3u8);
+ q4s16 = vreinterpretq_s16_u16(q4u16);
+ q5s16 = vreinterpretq_s16_u16(q5u16);
+ q6s16 = vreinterpretq_s16_u16(q6u16);
+ q8s16 = vreinterpretq_s16_u16(q8u16);
+ q10s16 = vreinterpretq_s16_u16(q10u16);
+ q12s16 = vreinterpretq_s16_u16(q12u16);
+ q8s16 = vqaddq_s16(q8s16, q4s16);
+ q10s16 = vqaddq_s16(q10s16, q5s16);
+ q12s16 = vqaddq_s16(q12s16, q6s16);
+
+ q6u16 = vmull_u8(d15u8, d3u8);
+ q7u16 = vmull_u8(d31u8, d3u8);
+ q3u16 = vmull_u8(d6u8, d3u8);
+ q3s16 = vreinterpretq_s16_u16(q3u16);
+ q6s16 = vreinterpretq_s16_u16(q6u16);
+ q7s16 = vreinterpretq_s16_u16(q7u16);
+ q9s16 = vreinterpretq_s16_u16(q9u16);
+ q11s16 = vreinterpretq_s16_u16(q11u16);
+ q13s16 = vreinterpretq_s16_u16(q13u16);
+ q9s16 = vqaddq_s16(q9s16, q6s16);
+ q11s16 = vqaddq_s16(q11s16, q7s16);
+ q13s16 = vqaddq_s16(q13s16, q3s16);
+
+ d6u8 = vqrshrun_n_s16(q8s16, 7);
+ d7u8 = vqrshrun_n_s16(q9s16, 7);
+ d8u8 = vqrshrun_n_s16(q10s16, 7);
+ d9u8 = vqrshrun_n_s16(q11s16, 7);
+ d10u8 = vqrshrun_n_s16(q12s16, 7);
+ d11u8 = vqrshrun_n_s16(q13s16, 7);
+
+ vst1_u8(tmpp, d6u8);
+ tmpp += 8;
+ vst1_u8(tmpp, d7u8);
+ tmpp += 8;
+ vst1_u8(tmpp, d8u8);
+ tmpp += 8;
+ vst1_u8(tmpp, d9u8);
+ tmpp += 8;
+ vst1_u8(tmpp, d10u8);
+ tmpp += 8;
+ vst1_u8(tmpp, d11u8);
+ tmpp += 8;
+ }
+
+ // Second pass: 16x16
+ dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
+ d0s8 = vdup_lane_s8(dtmps8, 0);
+ d1s8 = vdup_lane_s8(dtmps8, 1);
+ d2s8 = vdup_lane_s8(dtmps8, 2);
+ d3s8 = vdup_lane_s8(dtmps8, 3);
+ d4s8 = vdup_lane_s8(dtmps8, 4);
+ d5s8 = vdup_lane_s8(dtmps8, 5);
+ d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+ d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+ d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+ d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+ d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+ d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+ for (i = 0; i < 2; i++) {
+ dst = dst_ptr + 8 * i;
+ tmpp = tmp + 8 * i;
+ d18u8 = vld1_u8(tmpp);
+ tmpp += 16;
+ d19u8 = vld1_u8(tmpp);
+ tmpp += 16;
+ d20u8 = vld1_u8(tmpp);
+ tmpp += 16;
+ d21u8 = vld1_u8(tmpp);
+ tmpp += 16;
+ d22u8 = vld1_u8(tmpp);
+ tmpp += 16;
+ for (j = 0; j < 4; j++) {
+ d23u8 = vld1_u8(tmpp);
+ tmpp += 16;
+ d24u8 = vld1_u8(tmpp);
+ tmpp += 16;
+ d25u8 = vld1_u8(tmpp);
+ tmpp += 16;
+ d26u8 = vld1_u8(tmpp);
+ tmpp += 16;
+
+ q3u16 = vmull_u8(d18u8, d0u8);
+ q4u16 = vmull_u8(d19u8, d0u8);
+ q5u16 = vmull_u8(d20u8, d0u8);
+ q6u16 = vmull_u8(d21u8, d0u8);
+
+ q3u16 = vmlsl_u8(q3u16, d19u8, d1u8);
+ q4u16 = vmlsl_u8(q4u16, d20u8, d1u8);
+ q5u16 = vmlsl_u8(q5u16, d21u8, d1u8);
+ q6u16 = vmlsl_u8(q6u16, d22u8, d1u8);
+
+ q3u16 = vmlsl_u8(q3u16, d22u8, d4u8);
+ q4u16 = vmlsl_u8(q4u16, d23u8, d4u8);
+ q5u16 = vmlsl_u8(q5u16, d24u8, d4u8);
+ q6u16 = vmlsl_u8(q6u16, d25u8, d4u8);
+
+ q3u16 = vmlal_u8(q3u16, d20u8, d2u8);
+ q4u16 = vmlal_u8(q4u16, d21u8, d2u8);
+ q5u16 = vmlal_u8(q5u16, d22u8, d2u8);
+ q6u16 = vmlal_u8(q6u16, d23u8, d2u8);
+
+ q3u16 = vmlal_u8(q3u16, d23u8, d5u8);
+ q4u16 = vmlal_u8(q4u16, d24u8, d5u8);
+ q5u16 = vmlal_u8(q5u16, d25u8, d5u8);
+ q6u16 = vmlal_u8(q6u16, d26u8, d5u8);
+
+ q7u16 = vmull_u8(d21u8, d3u8);
+ q8u16 = vmull_u8(d22u8, d3u8);
+ q9u16 = vmull_u8(d23u8, d3u8);
+ q10u16 = vmull_u8(d24u8, d3u8);
+
+ q3s16 = vreinterpretq_s16_u16(q3u16);
+ q4s16 = vreinterpretq_s16_u16(q4u16);
+ q5s16 = vreinterpretq_s16_u16(q5u16);
+ q6s16 = vreinterpretq_s16_u16(q6u16);
+ q7s16 = vreinterpretq_s16_u16(q7u16);
+ q8s16 = vreinterpretq_s16_u16(q8u16);
+ q9s16 = vreinterpretq_s16_u16(q9u16);
+ q10s16 = vreinterpretq_s16_u16(q10u16);
+
+ q7s16 = vqaddq_s16(q7s16, q3s16);
+ q8s16 = vqaddq_s16(q8s16, q4s16);
+ q9s16 = vqaddq_s16(q9s16, q5s16);
+ q10s16 = vqaddq_s16(q10s16, q6s16);
+
+ d6u8 = vqrshrun_n_s16(q7s16, 7);
+ d7u8 = vqrshrun_n_s16(q8s16, 7);
+ d8u8 = vqrshrun_n_s16(q9s16, 7);
+ d9u8 = vqrshrun_n_s16(q10s16, 7);
+
+ d18u8 = d22u8;
+ d19u8 = d23u8;
+ d20u8 = d24u8;
+ d21u8 = d25u8;
+ d22u8 = d26u8;
+
+ vst1_u8(dst, d6u8);
+ dst += dst_pitch;
+ vst1_u8(dst, d7u8);
+ dst += dst_pitch;
+ vst1_u8(dst, d8u8);
+ dst += dst_pitch;
+ vst1_u8(dst, d9u8);
+ dst += dst_pitch;
+ }
+ }
+ return;
+}
diff --git a/vp8/common/arm/neon/variance_neon.asm b/vp8/common/arm/neon/variance_neon.asm
deleted file mode 100644
index e3b4832..0000000
--- a/vp8/common/arm/neon/variance_neon.asm
+++ /dev/null
@@ -1,276 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_variance16x16_neon|
- EXPORT |vp8_variance16x8_neon|
- EXPORT |vp8_variance8x16_neon|
- EXPORT |vp8_variance8x8_neon|
-
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-
-; r0 unsigned char *src_ptr
-; r1 int source_stride
-; r2 unsigned char *ref_ptr
-; r3 int recon_stride
-; stack unsigned int *sse
-|vp8_variance16x16_neon| PROC
- vmov.i8 q8, #0 ;q8 - sum
- vmov.i8 q9, #0 ;q9, q10 - sse
- vmov.i8 q10, #0
-
- mov r12, #8
-
-variance16x16_neon_loop
- vld1.8 {q0}, [r0], r1 ;Load up source and reference
- vld1.8 {q2}, [r2], r3
- vld1.8 {q1}, [r0], r1
- vld1.8 {q3}, [r2], r3
-
- vsubl.u8 q11, d0, d4 ;calculate diff
- vsubl.u8 q12, d1, d5
- vsubl.u8 q13, d2, d6
- vsubl.u8 q14, d3, d7
-
- ;VPADAL adds adjacent pairs of elements of a vector, and accumulates
- ;the results into the elements of the destination vector. The explanation
- ;in ARM guide is wrong.
- vpadal.s16 q8, q11 ;calculate sum
- vmlal.s16 q9, d22, d22 ;calculate sse
- vmlal.s16 q10, d23, d23
-
- subs r12, r12, #1
-
- vpadal.s16 q8, q12
- vmlal.s16 q9, d24, d24
- vmlal.s16 q10, d25, d25
- vpadal.s16 q8, q13
- vmlal.s16 q9, d26, d26
- vmlal.s16 q10, d27, d27
- vpadal.s16 q8, q14
- vmlal.s16 q9, d28, d28
- vmlal.s16 q10, d29, d29
-
- bne variance16x16_neon_loop
-
- vadd.u32 q10, q9, q10 ;accumulate sse
- vpaddl.s32 q0, q8 ;accumulate sum
-
- ldr r12, [sp] ;load *sse from stack
-
- vpaddl.u32 q1, q10
- vadd.s64 d0, d0, d1
- vadd.u64 d1, d2, d3
-
- ;vmov.32 r0, d0[0] ;this instruction costs a lot
- ;vmov.32 r1, d1[0]
- ;mul r0, r0, r0
- ;str r1, [r12]
- ;sub r0, r1, r0, lsr #8
-
- ; while sum is signed, sum * sum is always positive and must be treated as
- ; unsigned to avoid propagating the sign bit.
- vmull.s32 q5, d0, d0
- vst1.32 {d1[0]}, [r12] ;store sse
- vshr.u32 d10, d10, #8
- vsub.u32 d0, d1, d10
-
- vmov.32 r0, d0[0] ;return
- bx lr
-
- ENDP
-
-;================================
-;unsigned int vp8_variance16x8_c(
-; unsigned char *src_ptr,
-; int source_stride,
-; unsigned char *ref_ptr,
-; int recon_stride,
-; unsigned int *sse)
-|vp8_variance16x8_neon| PROC
- vmov.i8 q8, #0 ;q8 - sum
- vmov.i8 q9, #0 ;q9, q10 - sse
- vmov.i8 q10, #0
-
- mov r12, #4
-
-variance16x8_neon_loop
- vld1.8 {q0}, [r0], r1 ;Load up source and reference
- vld1.8 {q2}, [r2], r3
- vld1.8 {q1}, [r0], r1
- vld1.8 {q3}, [r2], r3
-
- vsubl.u8 q11, d0, d4 ;calculate diff
- vsubl.u8 q12, d1, d5
- vsubl.u8 q13, d2, d6
- vsubl.u8 q14, d3, d7
-
- vpadal.s16 q8, q11 ;calculate sum
- vmlal.s16 q9, d22, d22 ;calculate sse
- vmlal.s16 q10, d23, d23
-
- subs r12, r12, #1
-
- vpadal.s16 q8, q12
- vmlal.s16 q9, d24, d24
- vmlal.s16 q10, d25, d25
- vpadal.s16 q8, q13
- vmlal.s16 q9, d26, d26
- vmlal.s16 q10, d27, d27
- vpadal.s16 q8, q14
- vmlal.s16 q9, d28, d28
- vmlal.s16 q10, d29, d29
-
- bne variance16x8_neon_loop
-
- vadd.u32 q10, q9, q10 ;accumulate sse
- vpaddl.s32 q0, q8 ;accumulate sum
-
- ldr r12, [sp] ;load *sse from stack
-
- vpaddl.u32 q1, q10
- vadd.s64 d0, d0, d1
- vadd.u64 d1, d2, d3
-
- vmull.s32 q5, d0, d0
- vst1.32 {d1[0]}, [r12] ;store sse
- vshr.u32 d10, d10, #7
- vsub.u32 d0, d1, d10
-
- vmov.32 r0, d0[0] ;return
- bx lr
-
- ENDP
-
-;=================================
-;unsigned int vp8_variance8x16_c(
-; unsigned char *src_ptr,
-; int source_stride,
-; unsigned char *ref_ptr,
-; int recon_stride,
-; unsigned int *sse)
-
-|vp8_variance8x16_neon| PROC
- vmov.i8 q8, #0 ;q8 - sum
- vmov.i8 q9, #0 ;q9, q10 - sse
- vmov.i8 q10, #0
-
- mov r12, #8
-
-variance8x16_neon_loop
- vld1.8 {d0}, [r0], r1 ;Load up source and reference
- vld1.8 {d4}, [r2], r3
- vld1.8 {d2}, [r0], r1
- vld1.8 {d6}, [r2], r3
-
- vsubl.u8 q11, d0, d4 ;calculate diff
- vsubl.u8 q12, d2, d6
-
- vpadal.s16 q8, q11 ;calculate sum
- vmlal.s16 q9, d22, d22 ;calculate sse
- vmlal.s16 q10, d23, d23
-
- subs r12, r12, #1
-
- vpadal.s16 q8, q12
- vmlal.s16 q9, d24, d24
- vmlal.s16 q10, d25, d25
-
- bne variance8x16_neon_loop
-
- vadd.u32 q10, q9, q10 ;accumulate sse
- vpaddl.s32 q0, q8 ;accumulate sum
-
- ldr r12, [sp] ;load *sse from stack
-
- vpaddl.u32 q1, q10
- vadd.s64 d0, d0, d1
- vadd.u64 d1, d2, d3
-
- vmull.s32 q5, d0, d0
- vst1.32 {d1[0]}, [r12] ;store sse
- vshr.u32 d10, d10, #7
- vsub.u32 d0, d1, d10
-
- vmov.32 r0, d0[0] ;return
- bx lr
-
- ENDP
-
-;==================================
-; r0 unsigned char *src_ptr
-; r1 int source_stride
-; r2 unsigned char *ref_ptr
-; r3 int recon_stride
-; stack unsigned int *sse
-|vp8_variance8x8_neon| PROC
- vmov.i8 q8, #0 ;q8 - sum
- vmov.i8 q9, #0 ;q9, q10 - sse
- vmov.i8 q10, #0
-
- mov r12, #2
-
-variance8x8_neon_loop
- vld1.8 {d0}, [r0], r1 ;Load up source and reference
- vld1.8 {d4}, [r2], r3
- vld1.8 {d1}, [r0], r1
- vld1.8 {d5}, [r2], r3
- vld1.8 {d2}, [r0], r1
- vld1.8 {d6}, [r2], r3
- vld1.8 {d3}, [r0], r1
- vld1.8 {d7}, [r2], r3
-
- vsubl.u8 q11, d0, d4 ;calculate diff
- vsubl.u8 q12, d1, d5
- vsubl.u8 q13, d2, d6
- vsubl.u8 q14, d3, d7
-
- vpadal.s16 q8, q11 ;calculate sum
- vmlal.s16 q9, d22, d22 ;calculate sse
- vmlal.s16 q10, d23, d23
-
- subs r12, r12, #1
-
- vpadal.s16 q8, q12
- vmlal.s16 q9, d24, d24
- vmlal.s16 q10, d25, d25
- vpadal.s16 q8, q13
- vmlal.s16 q9, d26, d26
- vmlal.s16 q10, d27, d27
- vpadal.s16 q8, q14
- vmlal.s16 q9, d28, d28
- vmlal.s16 q10, d29, d29
-
- bne variance8x8_neon_loop
-
- vadd.u32 q10, q9, q10 ;accumulate sse
- vpaddl.s32 q0, q8 ;accumulate sum
-
- ldr r12, [sp] ;load *sse from stack
-
- vpaddl.u32 q1, q10
- vadd.s64 d0, d0, d1
- vadd.u64 d1, d2, d3
-
- vmull.s32 q5, d0, d0
- vst1.32 {d1[0]}, [r12] ;store sse
- vshr.u32 d10, d10, #6
- vsub.u32 d0, d1, d10
-
- vmov.32 r0, d0[0] ;return
- bx lr
-
- ENDP
-
- END
diff --git a/vp8/common/arm/neon/variance_neon.c b/vp8/common/arm/neon/variance_neon.c
new file mode 100644
index 0000000..afd2dc3
--- /dev/null
+++ b/vp8/common/arm/neon/variance_neon.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#ifdef _MSC_VER
+#define __builtin_prefetch(x)
+#endif
+
+unsigned int vp8_variance16x16_neon(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ int i;
+ int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
+ uint32x2_t d0u32, d10u32;
+ int64x1_t d0s64, d1s64;
+ uint8x16_t q0u8, q1u8, q2u8, q3u8;
+ uint16x8_t q11u16, q12u16, q13u16, q14u16;
+ int32x4_t q8s32, q9s32, q10s32;
+ int64x2_t q0s64, q1s64, q5s64;
+
+ q8s32 = vdupq_n_s32(0);
+ q9s32 = vdupq_n_s32(0);
+ q10s32 = vdupq_n_s32(0);
+
+ for (i = 0; i < 8; i++) {
+ q0u8 = vld1q_u8(src_ptr);
+ src_ptr += source_stride;
+ q1u8 = vld1q_u8(src_ptr);
+ src_ptr += source_stride;
+ __builtin_prefetch(src_ptr);
+
+ q2u8 = vld1q_u8(ref_ptr);
+ ref_ptr += recon_stride;
+ q3u8 = vld1q_u8(ref_ptr);
+ ref_ptr += recon_stride;
+ __builtin_prefetch(ref_ptr);
+
+ q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
+ q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
+ q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
+ q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
+
+ d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
+ d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
+ q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
+ q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
+ q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
+
+ d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
+ d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
+ q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
+ q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
+ q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
+
+ d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
+ d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
+ q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
+ q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
+ q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
+
+ d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
+ d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
+ q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
+ q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
+ q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
+ }
+
+ q10s32 = vaddq_s32(q10s32, q9s32);
+ q0s64 = vpaddlq_s32(q8s32);
+ q1s64 = vpaddlq_s32(q10s32);
+
+ d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
+ d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
+
+ q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
+ vreinterpret_s32_s64(d0s64));
+ vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
+
+ d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 8);
+ d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
+
+ return vget_lane_u32(d0u32, 0);
+}
+
+unsigned int vp8_variance16x8_neon(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ int i;
+ int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
+ uint32x2_t d0u32, d10u32;
+ int64x1_t d0s64, d1s64;
+ uint8x16_t q0u8, q1u8, q2u8, q3u8;
+ uint16x8_t q11u16, q12u16, q13u16, q14u16;
+ int32x4_t q8s32, q9s32, q10s32;
+ int64x2_t q0s64, q1s64, q5s64;
+
+ q8s32 = vdupq_n_s32(0);
+ q9s32 = vdupq_n_s32(0);
+ q10s32 = vdupq_n_s32(0);
+
+ for (i = 0; i < 4; i++) { // variance16x8_neon_loop
+ q0u8 = vld1q_u8(src_ptr);
+ src_ptr += source_stride;
+ q1u8 = vld1q_u8(src_ptr);
+ src_ptr += source_stride;
+ __builtin_prefetch(src_ptr);
+
+ q2u8 = vld1q_u8(ref_ptr);
+ ref_ptr += recon_stride;
+ q3u8 = vld1q_u8(ref_ptr);
+ ref_ptr += recon_stride;
+ __builtin_prefetch(ref_ptr);
+
+ q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
+ q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
+ q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
+ q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
+
+ d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
+ d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
+ q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
+ q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
+ q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
+
+ d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
+ d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
+ q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
+ q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
+ q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
+
+ d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
+ d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
+ q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
+ q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
+ q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
+
+ d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
+ d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
+ q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
+ q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
+ q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
+ }
+
+ q10s32 = vaddq_s32(q10s32, q9s32);
+ q0s64 = vpaddlq_s32(q8s32);
+ q1s64 = vpaddlq_s32(q10s32);
+
+ d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
+ d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
+
+ q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
+ vreinterpret_s32_s64(d0s64));
+ vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
+
+ d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
+ d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
+
+ return vget_lane_u32(d0u32, 0);
+}
+
+unsigned int vp8_variance8x16_neon(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ int i;
+ uint8x8_t d0u8, d2u8, d4u8, d6u8;
+ int16x4_t d22s16, d23s16, d24s16, d25s16;
+ uint32x2_t d0u32, d10u32;
+ int64x1_t d0s64, d1s64;
+ uint16x8_t q11u16, q12u16;
+ int32x4_t q8s32, q9s32, q10s32;
+ int64x2_t q0s64, q1s64, q5s64;
+
+ q8s32 = vdupq_n_s32(0);
+ q9s32 = vdupq_n_s32(0);
+ q10s32 = vdupq_n_s32(0);
+
+ for (i = 0; i < 8; i++) { // variance8x16_neon_loop
+ d0u8 = vld1_u8(src_ptr);
+ src_ptr += source_stride;
+ d2u8 = vld1_u8(src_ptr);
+ src_ptr += source_stride;
+ __builtin_prefetch(src_ptr);
+
+ d4u8 = vld1_u8(ref_ptr);
+ ref_ptr += recon_stride;
+ d6u8 = vld1_u8(ref_ptr);
+ ref_ptr += recon_stride;
+ __builtin_prefetch(ref_ptr);
+
+ q11u16 = vsubl_u8(d0u8, d4u8);
+ q12u16 = vsubl_u8(d2u8, d6u8);
+
+ d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
+ d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
+ q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
+ q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
+ q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
+
+ d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
+ d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
+ q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
+ q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
+ q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
+ }
+
+ q10s32 = vaddq_s32(q10s32, q9s32);
+ q0s64 = vpaddlq_s32(q8s32);
+ q1s64 = vpaddlq_s32(q10s32);
+
+ d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
+ d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
+
+ q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
+ vreinterpret_s32_s64(d0s64));
+ vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
+
+ d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
+ d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
+
+ return vget_lane_u32(d0u32, 0);
+}
+
+unsigned int vp8_variance8x8_neon(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ int i;
+ uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
+ int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
+ uint32x2_t d0u32, d10u32;
+ int64x1_t d0s64, d1s64;
+ uint16x8_t q11u16, q12u16, q13u16, q14u16;
+ int32x4_t q8s32, q9s32, q10s32;
+ int64x2_t q0s64, q1s64, q5s64;
+
+ q8s32 = vdupq_n_s32(0);
+ q9s32 = vdupq_n_s32(0);
+ q10s32 = vdupq_n_s32(0);
+
+ for (i = 0; i < 2; i++) { // variance8x8_neon_loop
+ d0u8 = vld1_u8(src_ptr);
+ src_ptr += source_stride;
+ d1u8 = vld1_u8(src_ptr);
+ src_ptr += source_stride;
+ d2u8 = vld1_u8(src_ptr);
+ src_ptr += source_stride;
+ d3u8 = vld1_u8(src_ptr);
+ src_ptr += source_stride;
+
+ d4u8 = vld1_u8(ref_ptr);
+ ref_ptr += recon_stride;
+ d5u8 = vld1_u8(ref_ptr);
+ ref_ptr += recon_stride;
+ d6u8 = vld1_u8(ref_ptr);
+ ref_ptr += recon_stride;
+ d7u8 = vld1_u8(ref_ptr);
+ ref_ptr += recon_stride;
+
+ q11u16 = vsubl_u8(d0u8, d4u8);
+ q12u16 = vsubl_u8(d1u8, d5u8);
+ q13u16 = vsubl_u8(d2u8, d6u8);
+ q14u16 = vsubl_u8(d3u8, d7u8);
+
+ d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
+ d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
+ q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
+ q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
+ q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
+
+ d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
+ d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
+ q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
+ q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
+ q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
+
+ d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
+ d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
+ q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
+ q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
+ q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
+
+ d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
+ d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
+ q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
+ q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
+ q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
+ }
+
+ q10s32 = vaddq_s32(q10s32, q9s32);
+ q0s64 = vpaddlq_s32(q8s32);
+ q1s64 = vpaddlq_s32(q10s32);
+
+ d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
+ d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
+
+ q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
+ vreinterpret_s32_s64(d0s64));
+ vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
+
+ d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 6);
+ d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
+
+ return vget_lane_u32(d0u32, 0);
+}
diff --git a/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm b/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm
index 9d22c52..adc5b7e 100644
--- a/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm
+++ b/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm
@@ -31,11 +31,12 @@
|vp8_sub_pixel_variance16x16_neon_func| PROC
push {r4-r6, lr}
+ vpush {d8-d15}
adr r12, bilinear_taps_coeff
- ldr r4, [sp, #16] ;load *dst_ptr from stack
- ldr r5, [sp, #20] ;load dst_pixels_per_line from stack
- ldr r6, [sp, #24] ;load *sse from stack
+ ldr r4, [sp, #80] ;load *dst_ptr from stack
+ ldr r5, [sp, #84] ;load dst_pixels_per_line from stack
+ ldr r6, [sp, #88] ;load *sse from stack
cmp r2, #0 ;skip first_pass filter if xoffset=0
beq secondpass_bfilter16x16_only
@@ -416,6 +417,7 @@
add sp, sp, #528
vmov.32 r0, d0[0] ;return
+ vpop {d8-d15}
pop {r4-r6,pc}
ENDP
diff --git a/vp8/common/arm/neon/vp8_subpixelvariance16x16s_neon.asm b/vp8/common/arm/neon/vp8_subpixelvariance16x16s_neon.asm
index 155be4f..b0829af 100644
--- a/vp8/common/arm/neon/vp8_subpixelvariance16x16s_neon.asm
+++ b/vp8/common/arm/neon/vp8_subpixelvariance16x16s_neon.asm
@@ -31,9 +31,10 @@
;================================================
|vp8_variance_halfpixvar16x16_h_neon| PROC
push {lr}
+ vpush {d8-d15}
mov r12, #4 ;loop counter
- ldr lr, [sp, #4] ;load *sse from stack
+ ldr lr, [sp, #68] ;load *sse from stack
vmov.i8 q8, #0 ;q8 - sum
vmov.i8 q9, #0 ;q9, q10 - sse
vmov.i8 q10, #0
@@ -116,6 +117,8 @@
vsub.u32 d0, d1, d10
vmov.32 r0, d0[0] ;return
+
+ vpop {d8-d15}
pop {pc}
ENDP
@@ -131,11 +134,12 @@
;================================================
|vp8_variance_halfpixvar16x16_v_neon| PROC
push {lr}
+ vpush {d8-d15}
mov r12, #4 ;loop counter
vld1.u8 {q0}, [r0], r1 ;load src data
- ldr lr, [sp, #4] ;load *sse from stack
+ ldr lr, [sp, #68] ;load *sse from stack
vmov.i8 q8, #0 ;q8 - sum
vmov.i8 q9, #0 ;q9, q10 - sse
@@ -212,6 +216,8 @@
vsub.u32 d0, d1, d10
vmov.32 r0, d0[0] ;return
+
+ vpop {d8-d15}
pop {pc}
ENDP
@@ -227,10 +233,11 @@
;================================================
|vp8_variance_halfpixvar16x16_hv_neon| PROC
push {lr}
+ vpush {d8-d15}
vld1.u8 {d0, d1, d2, d3}, [r0], r1 ;load src data
- ldr lr, [sp, #4] ;load *sse from stack
+ ldr lr, [sp, #68] ;load *sse from stack
vmov.i8 q13, #0 ;q8 - sum
vext.8 q1, q0, q1, #1 ;construct src_ptr[1]
@@ -331,6 +338,8 @@
vsub.u32 d0, d1, d10
vmov.32 r0, d0[0] ;return
+
+ vpop {d8-d15}
pop {pc}
ENDP
@@ -349,10 +358,11 @@
|vp8_sub_pixel_variance16x16s_neon| PROC
push {r4, lr}
+ vpush {d8-d15}
- ldr r4, [sp, #8] ;load *dst_ptr from stack
- ldr r12, [sp, #12] ;load dst_pixels_per_line from stack
- ldr lr, [sp, #16] ;load *sse from stack
+ ldr r4, [sp, #72] ;load *dst_ptr from stack
+ ldr r12, [sp, #76] ;load dst_pixels_per_line from stack
+ ldr lr, [sp, #80] ;load *sse from stack
cmp r2, #0 ;skip first_pass filter if xoffset=0
beq secondpass_bfilter16x16s_only
@@ -566,6 +576,7 @@
add sp, sp, #256
vmov.32 r0, d0[0] ;return
+ vpop {d8-d15}
pop {r4, pc}
ENDP
diff --git a/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm b/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm
index f6b6847..9d9f9e0 100644
--- a/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm
+++ b/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm
@@ -26,11 +26,12 @@
|vp8_sub_pixel_variance8x8_neon| PROC
push {r4-r5, lr}
+ vpush {d8-d15}
adr r12, bilinear_taps_coeff
- ldr r4, [sp, #12] ;load *dst_ptr from stack
- ldr r5, [sp, #16] ;load dst_pixels_per_line from stack
- ldr lr, [sp, #20] ;load *sse from stack
+ ldr r4, [sp, #76] ;load *dst_ptr from stack
+ ldr r5, [sp, #80] ;load dst_pixels_per_line from stack
+ ldr lr, [sp, #84] ;load *sse from stack
cmp r2, #0 ;skip first_pass filter if xoffset=0
beq skip_firstpass_filter
@@ -210,6 +211,8 @@
vsub.u32 d0, d1, d10
vmov.32 r0, d0[0] ;return
+
+ vpop {d8-d15}
pop {r4-r5, pc}
ENDP
diff --git a/vp8/common/arm/reconintra_arm.c b/vp8/common/arm/reconintra_arm.c
index 2874896..765fc3a 100644
--- a/vp8/common/arm/reconintra_arm.c
+++ b/vp8/common/arm/reconintra_arm.c
@@ -14,7 +14,7 @@
#include "vp8/common/blockd.h"
#include "vpx_mem/vpx_mem.h"
-#if HAVE_NEON
+#if HAVE_NEON_ARM
extern void vp8_build_intra_predictors_mby_neon_func(
unsigned char *y_buffer,
unsigned char *ypred_ptr,
diff --git a/vp8/common/arm/variance_arm.c b/vp8/common/arm/variance_arm.c
index 467a509..e3f7083 100644
--- a/vp8/common/arm/variance_arm.c
+++ b/vp8/common/arm/variance_arm.c
@@ -95,7 +95,7 @@
#endif /* HAVE_MEDIA */
-#if HAVE_NEON
+#if HAVE_NEON_ASM
extern unsigned int vp8_sub_pixel_variance16x16_neon_func
(
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index 7d0fbf6..8e546d5 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -310,6 +310,7 @@
}
}
+#if CONFIG_POSTPROC
static void vp8_de_mblock(YV12_BUFFER_CONFIG *post,
int q)
{
@@ -382,6 +383,7 @@
vp8_yv12_copy_frame(source, post);
}
}
+#endif
#if !(CONFIG_TEMPORAL_DENOISING)
void vp8_de_noise(VP8_COMMON *cm,
diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl
index 130d965..dcf5b8e 100644
--- a/vp8/common/rtcd_defs.pl
+++ b/vp8/common/rtcd_defs.pl
@@ -29,8 +29,9 @@
# Dequant
#
add_proto qw/void vp8_dequantize_b/, "struct blockd*, short *dqc";
-specialize qw/vp8_dequantize_b mmx media neon/;
+specialize qw/vp8_dequantize_b mmx media neon_asm/;
$vp8_dequantize_b_media=vp8_dequantize_b_v6;
+$vp8_dequantize_b_neon_asm=vp8_dequantize_b_neon;
add_proto qw/void vp8_dequant_idct_add/, "short *input, short *dq, unsigned char *output, int stride";
specialize qw/vp8_dequant_idct_add mmx media neon dspr2/;
@@ -38,70 +39,76 @@
$vp8_dequant_idct_add_dspr2=vp8_dequant_idct_add_dspr2;
add_proto qw/void vp8_dequant_idct_add_y_block/, "short *q, short *dq, unsigned char *dst, int stride, char *eobs";
-specialize qw/vp8_dequant_idct_add_y_block mmx sse2 media neon dspr2/;
+specialize qw/vp8_dequant_idct_add_y_block mmx sse2 media neon_asm dspr2/;
$vp8_dequant_idct_add_y_block_media=vp8_dequant_idct_add_y_block_v6;
+$vp8_dequant_idct_add_y_block_neon_asm=vp8_dequant_idct_add_y_block_neon;
$vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2;
add_proto qw/void vp8_dequant_idct_add_uv_block/, "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs";
-specialize qw/vp8_dequant_idct_add_uv_block mmx sse2 media neon dspr2/;
+specialize qw/vp8_dequant_idct_add_uv_block mmx sse2 media neon_asm dspr2/;
$vp8_dequant_idct_add_uv_block_media=vp8_dequant_idct_add_uv_block_v6;
+$vp8_dequant_idct_add_uv_block_neon_asm=vp8_dequant_idct_add_uv_block_neon;
$vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2;
#
# Loopfilter
#
add_proto qw/void vp8_loop_filter_mbv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
-specialize qw/vp8_loop_filter_mbv mmx sse2 media neon dspr2/;
+specialize qw/vp8_loop_filter_mbv mmx sse2 media neon_asm dspr2/;
$vp8_loop_filter_mbv_media=vp8_loop_filter_mbv_armv6;
+$vp8_loop_filter_mbv_neon_asm=vp8_loop_filter_mbv_neon;
$vp8_loop_filter_mbv_dspr2=vp8_loop_filter_mbv_dspr2;
add_proto qw/void vp8_loop_filter_bv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
-specialize qw/vp8_loop_filter_bv mmx sse2 media neon dspr2/;
+specialize qw/vp8_loop_filter_bv mmx sse2 media neon_asm dspr2/;
$vp8_loop_filter_bv_media=vp8_loop_filter_bv_armv6;
+$vp8_loop_filter_bv_neon_asm=vp8_loop_filter_bv_neon;
$vp8_loop_filter_bv_dspr2=vp8_loop_filter_bv_dspr2;
add_proto qw/void vp8_loop_filter_mbh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
-specialize qw/vp8_loop_filter_mbh mmx sse2 media neon dspr2/;
+specialize qw/vp8_loop_filter_mbh mmx sse2 media neon_asm dspr2/;
$vp8_loop_filter_mbh_media=vp8_loop_filter_mbh_armv6;
+$vp8_loop_filter_mbh_neon_asm=vp8_loop_filter_mbh_neon;
$vp8_loop_filter_mbh_dspr2=vp8_loop_filter_mbh_dspr2;
add_proto qw/void vp8_loop_filter_bh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
-specialize qw/vp8_loop_filter_bh mmx sse2 media neon dspr2/;
+specialize qw/vp8_loop_filter_bh mmx sse2 media neon_asm dspr2/;
$vp8_loop_filter_bh_media=vp8_loop_filter_bh_armv6;
+$vp8_loop_filter_bh_neon_asm=vp8_loop_filter_bh_neon;
$vp8_loop_filter_bh_dspr2=vp8_loop_filter_bh_dspr2;
add_proto qw/void vp8_loop_filter_simple_mbv/, "unsigned char *y, int ystride, const unsigned char *blimit";
-specialize qw/vp8_loop_filter_simple_mbv mmx sse2 media neon/;
+specialize qw/vp8_loop_filter_simple_mbv mmx sse2 media neon_asm/;
$vp8_loop_filter_simple_mbv_c=vp8_loop_filter_simple_vertical_edge_c;
$vp8_loop_filter_simple_mbv_mmx=vp8_loop_filter_simple_vertical_edge_mmx;
$vp8_loop_filter_simple_mbv_sse2=vp8_loop_filter_simple_vertical_edge_sse2;
$vp8_loop_filter_simple_mbv_media=vp8_loop_filter_simple_vertical_edge_armv6;
-$vp8_loop_filter_simple_mbv_neon=vp8_loop_filter_mbvs_neon;
+$vp8_loop_filter_simple_mbv_neon_asm=vp8_loop_filter_mbvs_neon;
add_proto qw/void vp8_loop_filter_simple_mbh/, "unsigned char *y, int ystride, const unsigned char *blimit";
-specialize qw/vp8_loop_filter_simple_mbh mmx sse2 media neon/;
+specialize qw/vp8_loop_filter_simple_mbh mmx sse2 media neon_asm/;
$vp8_loop_filter_simple_mbh_c=vp8_loop_filter_simple_horizontal_edge_c;
$vp8_loop_filter_simple_mbh_mmx=vp8_loop_filter_simple_horizontal_edge_mmx;
$vp8_loop_filter_simple_mbh_sse2=vp8_loop_filter_simple_horizontal_edge_sse2;
$vp8_loop_filter_simple_mbh_media=vp8_loop_filter_simple_horizontal_edge_armv6;
-$vp8_loop_filter_simple_mbh_neon=vp8_loop_filter_mbhs_neon;
+$vp8_loop_filter_simple_mbh_neon_asm=vp8_loop_filter_mbhs_neon;
add_proto qw/void vp8_loop_filter_simple_bv/, "unsigned char *y, int ystride, const unsigned char *blimit";
-specialize qw/vp8_loop_filter_simple_bv mmx sse2 media neon/;
+specialize qw/vp8_loop_filter_simple_bv mmx sse2 media neon_asm/;
$vp8_loop_filter_simple_bv_c=vp8_loop_filter_bvs_c;
$vp8_loop_filter_simple_bv_mmx=vp8_loop_filter_bvs_mmx;
$vp8_loop_filter_simple_bv_sse2=vp8_loop_filter_bvs_sse2;
$vp8_loop_filter_simple_bv_media=vp8_loop_filter_bvs_armv6;
-$vp8_loop_filter_simple_bv_neon=vp8_loop_filter_bvs_neon;
+$vp8_loop_filter_simple_bv_neon_asm=vp8_loop_filter_bvs_neon;
add_proto qw/void vp8_loop_filter_simple_bh/, "unsigned char *y, int ystride, const unsigned char *blimit";
-specialize qw/vp8_loop_filter_simple_bh mmx sse2 media neon/;
+specialize qw/vp8_loop_filter_simple_bh mmx sse2 media neon_asm/;
$vp8_loop_filter_simple_bh_c=vp8_loop_filter_bhs_c;
$vp8_loop_filter_simple_bh_mmx=vp8_loop_filter_bhs_mmx;
$vp8_loop_filter_simple_bh_sse2=vp8_loop_filter_bhs_sse2;
$vp8_loop_filter_simple_bh_media=vp8_loop_filter_bhs_armv6;
-$vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon;
+$vp8_loop_filter_simple_bh_neon_asm=vp8_loop_filter_bhs_neon;
#
# IDCT
@@ -269,9 +276,10 @@
$vp8_sub_pixel_variance4x4_sse2=vp8_sub_pixel_variance4x4_wmt;
add_proto qw/unsigned int vp8_sub_pixel_variance8x8/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
-specialize qw/vp8_sub_pixel_variance8x8 mmx sse2 media neon/;
+specialize qw/vp8_sub_pixel_variance8x8 mmx sse2 media neon_asm/;
$vp8_sub_pixel_variance8x8_sse2=vp8_sub_pixel_variance8x8_wmt;
$vp8_sub_pixel_variance8x8_media=vp8_sub_pixel_variance8x8_armv6;
+$vp8_sub_pixel_variance8x8_neon_asm=vp8_sub_pixel_variance8x8_neon;
add_proto qw/unsigned int vp8_sub_pixel_variance8x16/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
specialize qw/vp8_sub_pixel_variance8x16 mmx sse2/;
@@ -282,24 +290,28 @@
$vp8_sub_pixel_variance16x8_sse2=vp8_sub_pixel_variance16x8_wmt;
add_proto qw/unsigned int vp8_sub_pixel_variance16x16/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
-specialize qw/vp8_sub_pixel_variance16x16 mmx sse2 ssse3 media neon/;
+specialize qw/vp8_sub_pixel_variance16x16 mmx sse2 ssse3 media neon_asm/;
$vp8_sub_pixel_variance16x16_sse2=vp8_sub_pixel_variance16x16_wmt;
$vp8_sub_pixel_variance16x16_media=vp8_sub_pixel_variance16x16_armv6;
+$vp8_sub_pixel_variance16x16_neon_asm=vp8_sub_pixel_variance16x16_neon;
add_proto qw/unsigned int vp8_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp8_variance_halfpixvar16x16_h mmx sse2 media neon/;
+specialize qw/vp8_variance_halfpixvar16x16_h mmx sse2 media neon_asm/;
$vp8_variance_halfpixvar16x16_h_sse2=vp8_variance_halfpixvar16x16_h_wmt;
$vp8_variance_halfpixvar16x16_h_media=vp8_variance_halfpixvar16x16_h_armv6;
+$vp8_variance_halfpixvar16x16_h_neon_asm=vp8_variance_halfpixvar16x16_h_neon;
add_proto qw/unsigned int vp8_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp8_variance_halfpixvar16x16_v mmx sse2 media neon/;
+specialize qw/vp8_variance_halfpixvar16x16_v mmx sse2 media neon_asm/;
$vp8_variance_halfpixvar16x16_v_sse2=vp8_variance_halfpixvar16x16_v_wmt;
$vp8_variance_halfpixvar16x16_v_media=vp8_variance_halfpixvar16x16_v_armv6;
+$vp8_variance_halfpixvar16x16_v_neon_asm=vp8_variance_halfpixvar16x16_v_neon;
add_proto qw/unsigned int vp8_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp8_variance_halfpixvar16x16_hv mmx sse2 media neon/;
+specialize qw/vp8_variance_halfpixvar16x16_hv mmx sse2 media neon_asm/;
$vp8_variance_halfpixvar16x16_hv_sse2=vp8_variance_halfpixvar16x16_hv_wmt;
$vp8_variance_halfpixvar16x16_hv_media=vp8_variance_halfpixvar16x16_hv_armv6;
+$vp8_variance_halfpixvar16x16_hv_neon_asm=vp8_variance_halfpixvar16x16_hv_neon;
#
# Single block SAD
@@ -402,12 +414,14 @@
$vp8_sub_pixel_mse16x16_sse2=vp8_sub_pixel_mse16x16_wmt;
add_proto qw/unsigned int vp8_mse16x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp8_mse16x16 mmx sse2 media neon/;
+specialize qw/vp8_mse16x16 mmx sse2 media neon_asm/;
$vp8_mse16x16_sse2=vp8_mse16x16_wmt;
$vp8_mse16x16_media=vp8_mse16x16_armv6;
+$vp8_mse16x16_neon_asm=vp8_mse16x16_neon;
add_proto qw/unsigned int vp8_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride";
-specialize qw/vp8_get4x4sse_cs mmx neon/;
+specialize qw/vp8_get4x4sse_cs mmx neon_asm/;
+$vp8_get4x4sse_cs_neon_asm=vp8_get4x4sse_cs_neon;
#
# Block copy
@@ -434,16 +448,19 @@
# Forward DCT
#
add_proto qw/void vp8_short_fdct4x4/, "short *input, short *output, int pitch";
-specialize qw/vp8_short_fdct4x4 mmx sse2 media neon/;
+specialize qw/vp8_short_fdct4x4 mmx sse2 media neon_asm/;
$vp8_short_fdct4x4_media=vp8_short_fdct4x4_armv6;
+$vp8_short_fdct4x4_neon_asm=vp8_short_fdct4x4_neon;
add_proto qw/void vp8_short_fdct8x4/, "short *input, short *output, int pitch";
-specialize qw/vp8_short_fdct8x4 mmx sse2 media neon/;
+specialize qw/vp8_short_fdct8x4 mmx sse2 media neon_asm/;
$vp8_short_fdct8x4_media=vp8_short_fdct8x4_armv6;
+$vp8_short_fdct8x4_neon_asm=vp8_short_fdct8x4_neon;
add_proto qw/void vp8_short_walsh4x4/, "short *input, short *output, int pitch";
-specialize qw/vp8_short_walsh4x4 sse2 media neon/;
+specialize qw/vp8_short_walsh4x4 sse2 media neon_asm/;
$vp8_short_walsh4x4_media=vp8_short_walsh4x4_armv6;
+$vp8_short_walsh4x4_neon_asm=vp8_short_walsh4x4_neon;
#
# Quantizer
@@ -454,14 +471,16 @@
#$vp8_regular_quantize_b_sse4_1=vp8_regular_quantize_b_sse4;
add_proto qw/void vp8_fast_quantize_b/, "struct block *, struct blockd *";
-specialize qw/vp8_fast_quantize_b sse2 ssse3 media neon/;
+specialize qw/vp8_fast_quantize_b sse2 ssse3 media neon_asm/;
$vp8_fast_quantize_b_media=vp8_fast_quantize_b_armv6;
+$vp8_fast_quantize_b_neon_asm=vp8_fast_quantize_b_neon;
add_proto qw/void vp8_regular_quantize_b_pair/, "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2";
# no asm yet
add_proto qw/void vp8_fast_quantize_b_pair/, "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2";
-specialize qw/vp8_fast_quantize_b_pair neon/;
+specialize qw/vp8_fast_quantize_b_pair neon_asm/;
+$vp8_fast_quantize_b_pair_neon_asm=vp8_fast_quantize_b_pair_neon;
add_proto qw/void vp8_quantize_mb/, "struct macroblock *";
specialize qw/vp8_quantize_mb neon/;
@@ -488,16 +507,19 @@
$vp8_mbuverror_sse2=vp8_mbuverror_xmm;
add_proto qw/void vp8_subtract_b/, "struct block *be, struct blockd *bd, int pitch";
-specialize qw/vp8_subtract_b mmx sse2 media neon/;
+specialize qw/vp8_subtract_b mmx sse2 media neon_asm/;
$vp8_subtract_b_media=vp8_subtract_b_armv6;
+$vp8_subtract_b_neon_asm=vp8_subtract_b_neon;
add_proto qw/void vp8_subtract_mby/, "short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride";
-specialize qw/vp8_subtract_mby mmx sse2 media neon/;
+specialize qw/vp8_subtract_mby mmx sse2 media neon_asm/;
$vp8_subtract_mby_media=vp8_subtract_mby_armv6;
+$vp8_subtract_mby_neon_asm=vp8_subtract_mby_neon;
add_proto qw/void vp8_subtract_mbuv/, "short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride";
-specialize qw/vp8_subtract_mbuv mmx sse2 media neon/;
+specialize qw/vp8_subtract_mbuv mmx sse2 media neon_asm/;
$vp8_subtract_mbuv_media=vp8_subtract_mbuv_armv6;
+$vp8_subtract_mbuv_neon_asm=vp8_subtract_mbuv_neon;
#
# Motion search
@@ -526,7 +548,8 @@
# Pick Loopfilter
#
add_proto qw/void vp8_yv12_copy_partial_frame/, "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc";
-specialize qw/vp8_yv12_copy_partial_frame neon/;
+specialize qw/vp8_yv12_copy_partial_frame neon_asm/;
+$vp8_yv12_copy_partial_frame_neon_asm=vp8_yv12_copy_partial_frame_neon;
#
# Denoiser filter
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 2d9e343..29fea61 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -178,12 +178,6 @@
return pbi->common.error.error_code;
}
-/*For ARM NEON, d8-d15 are callee-saved registers, and need to be saved by us.*/
-#if HAVE_NEON
-extern void vp8_push_neon(int64_t *store);
-extern void vp8_pop_neon(int64_t *store);
-#endif
-
static int get_free_fb (VP8_COMMON *cm)
{
int i;
@@ -307,9 +301,6 @@
const uint8_t *source,
int64_t time_stamp)
{
-#if HAVE_NEON
- int64_t dx_store_reg[8];
-#endif
VP8_COMMON *cm = &pbi->common;
int retcode = -1;
@@ -319,15 +310,6 @@
if(retcode <= 0)
return retcode;
-#if HAVE_NEON
-#if CONFIG_RUNTIME_CPU_DETECT
- if (cm->cpu_caps & HAS_NEON)
-#endif
- {
- vp8_push_neon(dx_store_reg);
- }
-#endif
-
cm->new_fb_idx = get_free_fb (cm);
/* setup reference frames for vp8_decode_frame */
@@ -403,15 +385,6 @@
pbi->last_time_stamp = time_stamp;
decode_exit:
-#if HAVE_NEON
-#if CONFIG_RUNTIME_CPU_DETECT
- if (cm->cpu_caps & HAS_NEON)
-#endif
- {
- vp8_pop_neon(dx_store_reg);
- }
-#endif
-
pbi->common.error.setjmp = 0;
return retcode;
}
diff --git a/vp8/encoder/arm/neon/denoising_neon.c b/vp8/encoder/arm/neon/denoising_neon.c
index 3f85397..23dc0a9 100644
--- a/vp8/encoder/arm/neon/denoising_neon.c
+++ b/vp8/encoder/arm/neon/denoising_neon.c
@@ -68,14 +68,11 @@
int mc_running_avg_y_stride = mc_running_avg->y_stride;
unsigned char *running_avg_y = running_avg->y_buffer + y_offset;
int running_avg_y_stride = running_avg->y_stride;
+ int64x2_t v_sum_diff_total = vdupq_n_s64(0);
/* Go over lines. */
int i;
- int sum_diff = 0;
for (i = 0; i < 16; ++i) {
- int8x16_t v_sum_diff = vdupq_n_s8(0);
- uint8x16_t v_running_avg_y;
-
/* Load inputs. */
const uint8x16_t v_sig = vld1q_u8(sig);
const uint8x16_t v_mc_running_avg_y = vld1q_u8(mc_running_avg_y);
@@ -117,12 +114,9 @@
v_abs_adjustment);
const uint8x16_t v_neg_adjustment = vandq_u8(v_diff_neg_mask,
v_abs_adjustment);
- v_running_avg_y = vqaddq_u8(v_sig, v_pos_adjustment);
+
+ uint8x16_t v_running_avg_y = vqaddq_u8(v_sig, v_pos_adjustment);
v_running_avg_y = vqsubq_u8(v_running_avg_y, v_neg_adjustment);
- v_sum_diff = vqaddq_s8(v_sum_diff,
- vreinterpretq_s8_u8(v_pos_adjustment));
- v_sum_diff = vqsubq_s8(v_sum_diff,
- vreinterpretq_s8_u8(v_neg_adjustment));
/* Store results. */
vst1q_u8(running_avg_y, v_running_avg_y);
@@ -131,23 +125,19 @@
* for this macroblock.
*/
{
- int s0 = vgetq_lane_s8(v_sum_diff, 0) +
- vgetq_lane_s8(v_sum_diff, 1) +
- vgetq_lane_s8(v_sum_diff, 2) +
- vgetq_lane_s8(v_sum_diff, 3);
- int s1 = vgetq_lane_s8(v_sum_diff, 4) +
- vgetq_lane_s8(v_sum_diff, 5) +
- vgetq_lane_s8(v_sum_diff, 6) +
- vgetq_lane_s8(v_sum_diff, 7);
- int s2 = vgetq_lane_s8(v_sum_diff, 8) +
- vgetq_lane_s8(v_sum_diff, 9) +
- vgetq_lane_s8(v_sum_diff, 10) +
- vgetq_lane_s8(v_sum_diff, 11);
- int s3 = vgetq_lane_s8(v_sum_diff, 12) +
- vgetq_lane_s8(v_sum_diff, 13) +
- vgetq_lane_s8(v_sum_diff, 14) +
- vgetq_lane_s8(v_sum_diff, 15);
- sum_diff += s0 + s1+ s2 + s3;
+ const int8x16_t v_sum_diff =
+ vqsubq_s8(vreinterpretq_s8_u8(v_pos_adjustment),
+ vreinterpretq_s8_u8(v_neg_adjustment));
+
+ const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff);
+
+ const int32x4_t fedc_ba98_7654_3210 =
+ vpaddlq_s16(fe_dc_ba_98_76_54_32_10);
+
+ const int64x2_t fedcba98_76543210 =
+ vpaddlq_s32(fedc_ba98_7654_3210);
+
+ v_sum_diff_total = vqaddq_s64(v_sum_diff_total, fedcba98_76543210);
}
/* Update pointers for next iteration. */
@@ -157,11 +147,20 @@
}
/* Too much adjustments => copy block. */
- if (abs(sum_diff) > SUM_DIFF_THRESHOLD)
- return COPY_BLOCK;
+ {
+ const int64x1_t x = vqadd_s64(vget_high_s64(v_sum_diff_total),
+ vget_low_s64(v_sum_diff_total));
+ const int s0 = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0);
+
+ if (s0 > SUM_DIFF_THRESHOLD)
+ return COPY_BLOCK;
+ }
/* Tell above level that block was filtered. */
- vp8_copy_mem16x16(running_avg->y_buffer + y_offset, running_avg_y_stride,
- signal->thismb, sig_stride);
+ running_avg_y -= running_avg_y_stride * 16;
+ sig -= sig_stride * 16;
+
+ vp8_copy_mem16x16(running_avg_y, running_avg_y_stride, sig, sig_stride);
+
return FILTER_BLOCK;
}
diff --git a/vp8/encoder/arm/neon/subtract_neon.asm b/vp8/encoder/arm/neon/subtract_neon.asm
index 5bda786..840cb33 100644
--- a/vp8/encoder/arm/neon/subtract_neon.asm
+++ b/vp8/encoder/arm/neon/subtract_neon.asm
@@ -65,8 +65,10 @@
; unsigned char *pred, int pred_stride)
|vp8_subtract_mby_neon| PROC
push {r4-r7}
+ vpush {d8-d15}
+
mov r12, #4
- ldr r4, [sp, #16] ; pred_stride
+ ldr r4, [sp, #80] ; pred_stride
mov r6, #32 ; "diff" stride x2
add r5, r0, #16 ; second diff pointer
@@ -101,6 +103,7 @@
subs r12, r12, #1
bne subtract_mby_loop
+ vpop {d8-d15}
pop {r4-r7}
bx lr
ENDP
@@ -112,9 +115,11 @@
|vp8_subtract_mbuv_neon| PROC
push {r4-r7}
- ldr r4, [sp, #16] ; upred
- ldr r5, [sp, #20] ; vpred
- ldr r6, [sp, #24] ; pred_stride
+ vpush {d8-d15}
+
+ ldr r4, [sp, #80] ; upred
+ ldr r5, [sp, #84] ; vpred
+ ldr r6, [sp, #88] ; pred_stride
add r0, r0, #512 ; short *udiff = diff + 256;
mov r12, #32 ; "diff" stride x2
add r7, r0, #16 ; second diff pointer
@@ -191,6 +196,7 @@
vst1.16 {q14}, [r0], r12
vst1.16 {q15}, [r7], r12
+ vpop {d8-d15}
pop {r4-r7}
bx lr
diff --git a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
index 5b9f11e..d219e2d 100644
--- a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
@@ -21,6 +21,7 @@
;void vp8_memcpy_partial_neon(unsigned char *dst_ptr, unsigned char *src_ptr,
; int sz);
|vp8_memcpy_partial_neon| PROC
+ vpush {d8-d15}
;pld [r1] ;preload pred data
;pld [r1, #128]
;pld [r1, #256]
@@ -64,6 +65,7 @@
bne extra_copy_neon_loop
done_copy_neon_loop
+ vpop {d8-d15}
bx lr
ENDP
diff --git a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
index 55edbf5..f82af3e 100644
--- a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
@@ -27,6 +27,8 @@
;from vp8_variance().
|vp8_mse16x16_neon| PROC
+ vpush {q7}
+
vmov.i8 q7, #0 ;q7, q8, q9, q10 - sse
vmov.i8 q8, #0
vmov.i8 q9, #0
@@ -62,7 +64,7 @@
vadd.u32 q7, q7, q8
vadd.u32 q9, q9, q10
- ldr r12, [sp] ;load *sse from stack
+ ldr r12, [sp, #16] ;load *sse from stack
vadd.u32 q10, q7, q9
vpaddl.u32 q1, q10
@@ -71,6 +73,7 @@
vst1.32 {d0[0]}, [r12]
vmov.32 r0, d0[0]
+ vpop {q7}
bx lr
ENDP
@@ -82,6 +85,8 @@
; r2 unsigned char *ref_ptr,
; r3 int recon_stride
|vp8_get4x4sse_cs_neon| PROC
+ vpush {q7}
+
vld1.8 {d0}, [r0], r1 ;Load up source and reference
vld1.8 {d4}, [r2], r3
vld1.8 {d1}, [r0], r1
@@ -109,6 +114,8 @@
vadd.u64 d0, d2, d3
vmov.32 r0, d0[0]
+
+ vpop {q7}
bx lr
ENDP
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 32c5997..e95e44f 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -4820,33 +4820,11 @@
}
#endif
-/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */
-#if HAVE_NEON
-extern void vp8_push_neon(int64_t *store);
-extern void vp8_pop_neon(int64_t *store);
-#endif
-
-
int vp8_receive_raw_frame(VP8_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time)
{
-#if HAVE_NEON
- int64_t store_reg[8];
-#if CONFIG_RUNTIME_CPU_DETECT
- VP8_COMMON *cm = &cpi->common;
-#endif
-#endif
struct vpx_usec_timer timer;
int res = 0;
-#if HAVE_NEON
-#if CONFIG_RUNTIME_CPU_DETECT
- if (cm->cpu_caps & HAS_NEON)
-#endif
- {
- vp8_push_neon(store_reg);
- }
-#endif
-
vpx_usec_timer_start(&timer);
/* Reinit the lookahead buffer if the frame size changes */
@@ -4863,15 +4841,6 @@
vpx_usec_timer_mark(&timer);
cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
-#if HAVE_NEON
-#if CONFIG_RUNTIME_CPU_DETECT
- if (cm->cpu_caps & HAS_NEON)
-#endif
- {
- vp8_pop_neon(store_reg);
- }
-#endif
-
return res;
}
@@ -4892,9 +4861,6 @@
int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, unsigned char *dest_end, int64_t *time_stamp, int64_t *time_end, int flush)
{
-#if HAVE_NEON
- int64_t store_reg[8];
-#endif
VP8_COMMON *cm;
struct vpx_usec_timer tsctimer;
struct vpx_usec_timer ticktimer;
@@ -4914,15 +4880,6 @@
cpi->common.error.setjmp = 1;
-#if HAVE_NEON
-#if CONFIG_RUNTIME_CPU_DETECT
- if (cm->cpu_caps & HAS_NEON)
-#endif
- {
- vp8_push_neon(store_reg);
- }
-#endif
-
vpx_usec_timer_start(&cmptimer);
cpi->source = NULL;
@@ -5005,14 +4962,6 @@
#endif
-#if HAVE_NEON
-#if CONFIG_RUNTIME_CPU_DETECT
- if (cm->cpu_caps & HAS_NEON)
-#endif
- {
- vp8_pop_neon(store_reg);
- }
-#endif
return -1;
}
@@ -5278,7 +5227,7 @@
int y_samples = orig->y_height * orig->y_width ;
int uv_samples = orig->uv_height * orig->uv_width ;
int t_samples = y_samples + 2 * uv_samples;
- double sq_error, sq_error2;
+ double sq_error;
ye = calc_plane_error(orig->y_buffer, orig->y_stride,
recon->y_buffer, recon->y_stride, orig->y_width, orig->y_height);
@@ -5301,6 +5250,7 @@
#if CONFIG_POSTPROC
{
YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer;
+ double sq_error2;
double frame_psnr2, frame_ssim2 = 0;
double weight = 0;
@@ -5416,15 +5366,6 @@
#endif
#endif
-#if HAVE_NEON
-#if CONFIG_RUNTIME_CPU_DETECT
- if (cm->cpu_caps & HAS_NEON)
-#endif
- {
- vp8_pop_neon(store_reg);
- }
-#endif
-
cpi->common.error.setjmp = 0;
return 0;
diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c
index 513b2bf..4dc0d95 100644
--- a/vp8/encoder/temporal_filter.c
+++ b/vp8/encoder/temporal_filter.c
@@ -98,6 +98,7 @@
unsigned int i, j, k;
int modifier;
int byte = 0;
+ const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
for (i = 0,k = 0; i < block_size; i++)
{
@@ -114,7 +115,7 @@
*/
modifier *= modifier;
modifier *= 3;
- modifier += 1 << (strength - 1);
+ modifier += rounding;
modifier >>= strength;
if (modifier > 16)
diff --git a/vp8/encoder/x86/quantize_ssse3.asm b/vp8/encoder/x86/quantize_ssse3.asm
deleted file mode 100644
index 7b1dc11..0000000
--- a/vp8/encoder/x86/quantize_ssse3.asm
+++ /dev/null
@@ -1,138 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-%include "vp8_asm_enc_offsets.asm"
-
-
-; void vp8_fast_quantize_b_ssse3 | arg
-; (BLOCK *b, | 0
-; BLOCKD *d) | 1
-;
-
-global sym(vp8_fast_quantize_b_ssse3) PRIVATE
-sym(vp8_fast_quantize_b_ssse3):
- push rbp
- mov rbp, rsp
- GET_GOT rbx
-
-%if ABI_IS_32BIT
- push rdi
- push rsi
-%else
- %if LIBVPX_YASM_WIN64
- push rdi
- push rsi
- %endif
-%endif
- ; end prolog
-
-%if ABI_IS_32BIT
- mov rdi, arg(0) ; BLOCK *b
- mov rsi, arg(1) ; BLOCKD *d
-%else
- %if LIBVPX_YASM_WIN64
- mov rdi, rcx ; BLOCK *b
- mov rsi, rdx ; BLOCKD *d
- %else
- ;mov rdi, rdi ; BLOCK *b
- ;mov rsi, rsi ; BLOCKD *d
- %endif
-%endif
-
- mov rax, [rdi + vp8_block_coeff]
- mov rcx, [rdi + vp8_block_round]
- mov rdx, [rdi + vp8_block_quant_fast]
-
- ; coeff
- movdqa xmm0, [rax]
- movdqa xmm4, [rax + 16]
-
- ; round
- movdqa xmm2, [rcx]
- movdqa xmm3, [rcx + 16]
-
- movdqa xmm1, xmm0
- movdqa xmm5, xmm4
-
- ; sz = z >> 15
- psraw xmm0, 15
- psraw xmm4, 15
-
- pabsw xmm1, xmm1
- pabsw xmm5, xmm5
-
- paddw xmm1, xmm2
- paddw xmm5, xmm3
-
- ; quant_fast
- pmulhw xmm1, [rdx]
- pmulhw xmm5, [rdx + 16]
-
- mov rax, [rsi + vp8_blockd_qcoeff]
- mov rdi, [rsi + vp8_blockd_dequant]
- mov rcx, [rsi + vp8_blockd_dqcoeff]
-
- movdqa xmm2, xmm1 ;store y for getting eob
- movdqa xmm3, xmm5
-
- pxor xmm1, xmm0
- pxor xmm5, xmm4
- psubw xmm1, xmm0
- psubw xmm5, xmm4
-
- movdqa [rax], xmm1
- movdqa [rax + 16], xmm5
-
- movdqa xmm0, [rdi]
- movdqa xmm4, [rdi + 16]
-
- pmullw xmm0, xmm1
- pmullw xmm4, xmm5
- pxor xmm1, xmm1
-
- pcmpgtw xmm2, xmm1 ;calculate eob
- pcmpgtw xmm3, xmm1
- packsswb xmm2, xmm3
- pshufb xmm2, [GLOBAL(zz_shuf)]
-
- pmovmskb edx, xmm2
-
- movdqa [rcx], xmm0 ;store dqcoeff
- movdqa [rcx + 16], xmm4 ;store dqcoeff
- mov rcx, [rsi + vp8_blockd_eob]
-
- bsr eax, edx ;count 0
- add eax, 1
-
- cmp edx, 0 ;if all 0, eob=0
- cmove eax, edx
-
- mov BYTE PTR [rcx], al ;store eob
-
- ; begin epilog
-%if ABI_IS_32BIT
- pop rsi
- pop rdi
-%else
- %if LIBVPX_YASM_WIN64
- pop rsi
- pop rdi
- %endif
-%endif
-
- RESTORE_GOT
- pop rbp
- ret
-
-SECTION_RODATA
-align 16
-zz_shuf:
- db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
diff --git a/vp8/encoder/x86/quantize_ssse3.c b/vp8/encoder/x86/quantize_ssse3.c
new file mode 100644
index 0000000..448217f
--- /dev/null
+++ b/vp8/encoder/x86/quantize_ssse3.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <tmmintrin.h> /* SSSE3 */
+
+#include "vp8/encoder/block.h"
+
+/* bitscan reverse (bsr) */
+#if defined(_MSC_VER)
+#include <intrin.h>
+#pragma intrinsic(_BitScanReverse)
+static int bsr(int mask) {
+ int eob;
+ _BitScanReverse(&eob, mask);
+ eob++;
+ if (mask == 0)
+ eob = 0;
+ return eob;
+}
+#else
+static int bsr(int mask) {
+ int eob;
+#if defined(__GNUC__) && __GNUC__
+ __asm__ __volatile__("bsr %1, %0" : "=r" (eob) : "r" (mask) : "flags");
+#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
+ asm volatile("bsr %1, %0" : "=r" (eob) : "r" (mask) : "flags");
+#endif
+ eob++;
+ if (mask == 0)
+ eob = 0;
+ return eob;
+}
+#endif
+
+void vp8_fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d) {
+ int eob, mask;
+
+ __m128i z0 = _mm_load_si128((__m128i *)(b->coeff));
+ __m128i z1 = _mm_load_si128((__m128i *)(b->coeff + 8));
+ __m128i round0 = _mm_load_si128((__m128i *)(b->round));
+ __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8));
+ __m128i quant_fast0 = _mm_load_si128((__m128i *)(b->quant_fast));
+ __m128i quant_fast1 = _mm_load_si128((__m128i *)(b->quant_fast + 8));
+ __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant));
+ __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8));
+
+ __m128i sz0, sz1, x, x0, x1, y0, y1, zeros, abs0, abs1;
+
+ DECLARE_ALIGNED(16, const uint8_t, pshufb_zig_zag_mask[16]) =
+ { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 };
+ __m128i zig_zag = _mm_load_si128((const __m128i *)pshufb_zig_zag_mask);
+
+ /* sign of z: z >> 15 */
+ sz0 = _mm_srai_epi16(z0, 15);
+ sz1 = _mm_srai_epi16(z1, 15);
+
+ /* x = abs(z) */
+ x0 = _mm_abs_epi16(z0);
+ x1 = _mm_abs_epi16(z1);
+
+ /* x += round */
+ x0 = _mm_add_epi16(x0, round0);
+ x1 = _mm_add_epi16(x1, round1);
+
+ /* y = (x * quant) >> 16 */
+ y0 = _mm_mulhi_epi16(x0, quant_fast0);
+ y1 = _mm_mulhi_epi16(x1, quant_fast1);
+
+ /* ASM saves Y for EOB */
+ /* I think we can ignore that because adding the sign doesn't change anything
+ * and multiplying 0 by dequant is OK as well */
+ abs0 = y0;
+ abs1 = y1;
+
+ /* Restore the sign bit. */
+ y0 = _mm_xor_si128(y0, sz0);
+ y1 = _mm_xor_si128(y1, sz1);
+ x0 = _mm_sub_epi16(y0, sz0);
+ x1 = _mm_sub_epi16(y1, sz1);
+
+ /* qcoeff = x */
+ _mm_store_si128((__m128i *)(d->qcoeff), x0);
+ _mm_store_si128((__m128i *)(d->qcoeff + 8), x1);
+
+ /* x * dequant */
+ x0 = _mm_mullo_epi16(x0, dequant0);
+ x1 = _mm_mullo_epi16(x1, dequant1);
+
+ /* dqcoeff = x * dequant */
+ _mm_store_si128((__m128i *)(d->dqcoeff), x0);
+ _mm_store_si128((__m128i *)(d->dqcoeff + 8), x1);
+
+ zeros = _mm_setzero_si128();
+
+ x0 = _mm_cmpgt_epi16(abs0, zeros);
+ x1 = _mm_cmpgt_epi16(abs1, zeros);
+
+ x = _mm_packs_epi16(x0, x1);
+
+ x = _mm_shuffle_epi8(x, zig_zag);
+
+ mask = _mm_movemask_epi8(x);
+
+ eob = bsr(mask);
+
+ *d->eob = 0xFF & eob;
+}
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index dfb54a5..e58cdad 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -129,7 +129,6 @@
# common (c)
VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/filter_arm.c
VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/loopfilter_arm.c
-VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/reconintra_arm.c
VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/dequantize_arm.c
VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/variance_arm.c
@@ -159,27 +158,14 @@
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6$(ASM)
# common (neon)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/iwalsh_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfilter_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimplehorizontaledge_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimpleverticaledge_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/mbloopfilter_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/shortidct4x4llm_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sad8_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sad16_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict4x4_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict8x4_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict8x8_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict16x16_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/buildintrapredictorsmby_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/save_reg_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_dequant_full_2x_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_dequant_0_2x_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_blk_neon.c
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/variance_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance8x8_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance16x16_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM)
+VP8_COMMON_SRCS-$(ARCH_NEON_ASM) += common/arm/reconintra_arm.c
+VP8_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/loopfilter_neon$(ASM)
+VP8_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/loopfiltersimpleverticaledge_neon$(ASM)
+VP8_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/buildintrapredictorsmby_neon$(ASM)
+VP8_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/idct_blk_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp8_subpixelvariance8x8_neon$(ASM)
+VP8_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp8_subpixelvariance16x16_neon$(ASM)
+VP8_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM)
# common (neon intrinsics)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/bilinearpredict_neon.c
@@ -187,6 +173,14 @@
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dc_only_idct_add_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequant_idct_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequantizeb_neon.c
-
+VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_dequant_full_2x_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/iwalsh_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimplehorizontaledge_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/mbloopfilter_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sad_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/shortidct4x4llm_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_dequant_0_2x_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/variance_neon.c
$(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.pl))
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index d7c6dd1..607382b 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -88,6 +88,7 @@
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.c
+VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.c
ifeq ($(CONFIG_TEMPORAL_DENOISING),yes)
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoising_sse2.c
@@ -96,7 +97,6 @@
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_enc_stubs_sse2.c
-VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.asm
VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/quantize_sse4.asm
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm
diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk
index 398172a..5733048 100644
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@@ -35,11 +35,12 @@
#File list for neon
# encoder
-VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/fastquantizeb_neon$(ASM)
-VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/picklpf_arm.c
+VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/fastquantizeb_neon$(ASM)
+VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/picklpf_arm.c
+VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/shortfdct_neon$(ASM)
+VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/subtract_neon$(ASM)
+VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/vp8_mse16x16_neon$(ASM)
+VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/vp8_memcpy_neon$(ASM)
+VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/vp8_shortwalsh4x4_neon$(ASM)
+
VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/denoising_neon.c
-VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/shortfdct_neon$(ASM)
-VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/subtract_neon$(ASM)
-VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp8_mse16x16_neon$(ASM)
-VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp8_memcpy_neon$(ASM)
-VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp8_shortwalsh4x4_neon$(ASM)
diff --git a/vp9/common/arm/neon/vp9_idct8x8_add_neon.asm b/vp9/common/arm/neon/vp9_idct8x8_add_neon.asm
index 5476400..ab5bb69 100644
--- a/vp9/common/arm/neon/vp9_idct8x8_add_neon.asm
+++ b/vp9/common/arm/neon/vp9_idct8x8_add_neon.asm
@@ -9,7 +9,7 @@
;
EXPORT |vp9_idct8x8_64_add_neon|
- EXPORT |vp9_idct8x8_10_add_neon|
+ EXPORT |vp9_idct8x8_12_add_neon|
ARM
REQUIRE8
PRESERVE8
@@ -310,13 +310,13 @@
bx lr
ENDP ; |vp9_idct8x8_64_add_neon|
-;void vp9_idct8x8_10_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
+;void vp9_idct8x8_12_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
;
; r0 int16_t input
; r1 uint8_t *dest
; r2 int dest_stride)
-|vp9_idct8x8_10_add_neon| PROC
+|vp9_idct8x8_12_add_neon| PROC
push {r4-r9}
vpush {d8-d15}
vld1.s16 {q8,q9}, [r0]!
@@ -514,6 +514,6 @@
vpop {d8-d15}
pop {r4-r9}
bx lr
- ENDP ; |vp9_idct8x8_10_add_neon|
+ ENDP ; |vp9_idct8x8_12_add_neon|
END
diff --git a/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
index acccaea..fc44ffa 100644
--- a/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
@@ -617,7 +617,7 @@
}
}
-void vp9_idct8x8_10_add_dspr2(const int16_t *input, uint8_t *dest,
+void vp9_idct8x8_12_add_dspr2(const int16_t *input, uint8_t *dest,
int dest_stride) {
DECLARE_ALIGNED(32, int16_t, out[8 * 8]);
int16_t *outptr = out;
diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c
index 20b78bf..856d41e 100644
--- a/vp9/common/vp9_idct.c
+++ b/vp9/common/vp9_idct.c
@@ -421,7 +421,7 @@
}
}
-void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int stride) {
+void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int stride) {
int16_t out[8 * 8] = { 0 };
int16_t *outptr = out;
int i, j;
@@ -1348,8 +1348,8 @@
if (eob == 1)
// DC only DCT coefficient
vp9_idct8x8_1_add(input, dest, stride);
- else if (eob <= 10)
- vp9_idct8x8_10_add(input, dest, stride);
+ else if (eob <= 12)
+ vp9_idct8x8_12_add(input, dest, stride);
else
vp9_idct8x8_64_add(input, dest, stride);
}
diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h
index ceca795..d868776 100644
--- a/vp9/common/vp9_idct.h
+++ b/vp9/common/vp9_idct.h
@@ -33,9 +33,6 @@
#define pair_set_epi16(a, b) \
_mm_set_epi16(b, a, b, a, b, a, b, a)
-#define pair_set_epi32(a, b) \
- _mm_set_epi32(b, a, b, a)
-
// Constants:
// for (int i = 1; i< 32; ++i)
// printf("static const int cospi_%d_64 = %.0f;\n", i,
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 295dd78..7754763 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -12,7 +12,7 @@
/* Encoder forward decls */
struct macroblock;
struct vp9_variance_vtable;
-
+struct search_site_config;
struct mv;
union int_mv;
struct yv12_buffer_config;
@@ -58,7 +58,8 @@
specialize qw/vp9_d63_predictor_4x4/, "$ssse3_x86inc";
add_proto qw/void vp9_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_h_predictor_4x4 neon dspr2/, "$ssse3_x86inc";
+specialize qw/vp9_h_predictor_4x4 neon_asm dspr2/, "$ssse3_x86inc";
+$vp9_h_predictor_4x4_neon_asm=vp9_h_predictor_4x4_neon;
add_proto qw/void vp9_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_4x4/;
@@ -70,10 +71,12 @@
specialize qw/vp9_d153_predictor_4x4/, "$ssse3_x86inc";
add_proto qw/void vp9_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_v_predictor_4x4 neon/, "$sse_x86inc";
+specialize qw/vp9_v_predictor_4x4 neon_asm/, "$sse_x86inc";
+$vp9_v_predictor_4x4_neon_asm=vp9_v_predictor_4x4_neon;
add_proto qw/void vp9_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_tm_predictor_4x4 neon dspr2/, "$sse_x86inc";
+specialize qw/vp9_tm_predictor_4x4 neon_asm dspr2/, "$sse_x86inc";
+$vp9_tm_predictor_4x4_neon_asm=vp9_tm_predictor_4x4_neon;
add_proto qw/void vp9_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_predictor_4x4 dspr2/, "$sse_x86inc";
@@ -97,7 +100,8 @@
specialize qw/vp9_d63_predictor_8x8/, "$ssse3_x86inc";
add_proto qw/void vp9_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_h_predictor_8x8 neon dspr2/, "$ssse3_x86inc";
+specialize qw/vp9_h_predictor_8x8 neon_asm dspr2/, "$ssse3_x86inc";
+$vp9_h_predictor_8x8_neon_asm=vp9_h_predictor_8x8_neon;
add_proto qw/void vp9_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_8x8/;
@@ -109,10 +113,12 @@
specialize qw/vp9_d153_predictor_8x8/, "$ssse3_x86inc";
add_proto qw/void vp9_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_v_predictor_8x8 neon/, "$sse_x86inc";
+specialize qw/vp9_v_predictor_8x8 neon_asm/, "$sse_x86inc";
+$vp9_v_predictor_8x8_neon_asm=vp9_v_predictor_8x8_neon;
add_proto qw/void vp9_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_tm_predictor_8x8 neon dspr2/, "$sse2_x86inc";
+specialize qw/vp9_tm_predictor_8x8 neon_asm dspr2/, "$sse2_x86inc";
+$vp9_tm_predictor_8x8_neon_asm=vp9_tm_predictor_8x8_neon;
add_proto qw/void vp9_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_predictor_8x8 dspr2/, "$sse_x86inc";
@@ -136,7 +142,8 @@
specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc";
add_proto qw/void vp9_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_h_predictor_16x16 neon dspr2/, "$ssse3_x86inc";
+specialize qw/vp9_h_predictor_16x16 neon_asm dspr2/, "$ssse3_x86inc";
+$vp9_h_predictor_16x16_neon_asm=vp9_h_predictor_16x16_neon;
add_proto qw/void vp9_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_16x16/;
@@ -148,10 +155,12 @@
specialize qw/vp9_d153_predictor_16x16/, "$ssse3_x86inc";
add_proto qw/void vp9_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_v_predictor_16x16 neon/, "$sse2_x86inc";
+specialize qw/vp9_v_predictor_16x16 neon_asm/, "$sse2_x86inc";
+$vp9_v_predictor_16x16_neon_asm=vp9_v_predictor_16x16_neon;
add_proto qw/void vp9_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_tm_predictor_16x16 neon/, "$sse2_x86inc";
+specialize qw/vp9_tm_predictor_16x16 neon_asm/, "$sse2_x86inc";
+$vp9_tm_predictor_16x16_neon_asm=vp9_tm_predictor_16x16_neon;
add_proto qw/void vp9_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_predictor_16x16 dspr2/, "$sse2_x86inc";
@@ -175,7 +184,8 @@
specialize qw/vp9_d63_predictor_32x32/, "$ssse3_x86inc";
add_proto qw/void vp9_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_h_predictor_32x32 neon/, "$ssse3_x86inc";
+specialize qw/vp9_h_predictor_32x32 neon_asm/, "$ssse3_x86inc";
+$vp9_h_predictor_32x32_neon_asm=vp9_h_predictor_32x32_neon;
add_proto qw/void vp9_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_32x32/;
@@ -187,10 +197,12 @@
specialize qw/vp9_d153_predictor_32x32/;
add_proto qw/void vp9_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_v_predictor_32x32 neon/, "$sse2_x86inc";
+specialize qw/vp9_v_predictor_32x32 neon_asm/, "$sse2_x86inc";
+$vp9_v_predictor_32x32_neon_asm=vp9_v_predictor_32x32_neon;
add_proto qw/void vp9_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_tm_predictor_32x32 neon/, "$sse2_x86_64";
+specialize qw/vp9_tm_predictor_32x32 neon_asm/, "$sse2_x86_64";
+$vp9_tm_predictor_32x32_neon_asm=vp9_tm_predictor_32x32_neon;
add_proto qw/void vp9_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_predictor_32x32/, "$sse2_x86inc";
@@ -208,37 +220,48 @@
# Loopfilter
#
add_proto qw/void vp9_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/vp9_lpf_vertical_16 sse2 neon dspr2/;
+specialize qw/vp9_lpf_vertical_16 sse2 neon_asm dspr2/;
+$vp9_lpf_vertical_16_neon_asm=vp9_lpf_vertical_16_neon;
add_proto qw/void vp9_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/vp9_lpf_vertical_16_dual sse2 neon dspr2/;
+specialize qw/vp9_lpf_vertical_16_dual sse2 neon_asm dspr2/;
+$vp9_lpf_vertical_16_dual_neon_asm=vp9_lpf_vertical_16_dual_neon;
add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_vertical_8 sse2 neon dspr2/;
+specialize qw/vp9_lpf_vertical_8 sse2 neon_asm dspr2/;
+$vp9_lpf_vertical_8_neon_asm=vp9_lpf_vertical_8_neon;
add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_vertical_8_dual sse2 neon dspr2/;
+specialize qw/vp9_lpf_vertical_8_dual sse2 neon_asm dspr2/;
+$vp9_lpf_vertical_8_dual_neon_asm=vp9_lpf_vertical_8_dual_neon;
add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_vertical_4 mmx neon dspr2/;
+specialize qw/vp9_lpf_vertical_4 mmx neon_asm dspr2/;
+$vp9_lpf_vertical_4_neon_asm=vp9_lpf_vertical_4_neon;
add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_vertical_4_dual sse2 neon dspr2/;
+specialize qw/vp9_lpf_vertical_4_dual sse2 neon_asm dspr2/;
+$vp9_lpf_vertical_4_dual_neon_asm=vp9_lpf_vertical_4_dual_neon;
add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon dspr2/;
+specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon_asm dspr2/;
+$vp9_lpf_horizontal_16_neon_asm=vp9_lpf_horizontal_16_neon;
add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_horizontal_8 sse2 neon dspr2/;
+specialize qw/vp9_lpf_horizontal_8 sse2 neon_asm dspr2/;
+$vp9_lpf_horizontal_8_neon_asm=vp9_lpf_horizontal_8_neon;
add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_horizontal_8_dual sse2 neon dspr2/;
+specialize qw/vp9_lpf_horizontal_8_dual sse2 neon_asm dspr2/;
+$vp9_lpf_horizontal_8_dual_neon_asm=vp9_lpf_horizontal_8_dual_neon;
add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_horizontal_4 mmx neon dspr2/;
+specialize qw/vp9_lpf_horizontal_4 mmx neon_asm dspr2/;
+$vp9_lpf_horizontal_4_neon_asm=vp9_lpf_horizontal_4_neon;
add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_horizontal_4_dual sse2 neon dspr2/;
+specialize qw/vp9_lpf_horizontal_4_dual sse2 neon_asm dspr2/;
+$vp9_lpf_horizontal_4_dual_neon_asm=vp9_lpf_horizontal_4_dual_neon;
#
# post proc
@@ -274,71 +297,91 @@
# Sub Pixel Filters
#
add_proto qw/void vp9_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve_copy neon dspr2/, "$sse2_x86inc";
+specialize qw/vp9_convolve_copy neon_asm dspr2/, "$sse2_x86inc";
+$vp9_convolve_copy_neon_asm=vp9_convolve_copy_neon;
add_proto qw/void vp9_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve_avg neon dspr2/, "$sse2_x86inc";
+specialize qw/vp9_convolve_avg neon_asm dspr2/, "$sse2_x86inc";
+$vp9_convolve_avg_neon_asm=vp9_convolve_avg_neon;
add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8 sse2 ssse3 avx2 neon dspr2/;
+specialize qw/vp9_convolve8 sse2 ssse3 avx2 neon_asm dspr2/;
+$vp9_convolve8_neon_asm=vp9_convolve8_neon;
add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_horiz sse2 ssse3 avx2 neon dspr2/;
+specialize qw/vp9_convolve8_horiz sse2 ssse3 avx2 neon_asm dspr2/;
+$vp9_convolve8_horiz_neon_asm=vp9_convolve8_horiz_neon;
add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_vert sse2 ssse3 avx2 neon dspr2/;
+specialize qw/vp9_convolve8_vert sse2 ssse3 avx2 neon_asm dspr2/;
+$vp9_convolve8_vert_neon_asm=vp9_convolve8_vert_neon;
add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_avg sse2 ssse3 neon dspr2/;
+specialize qw/vp9_convolve8_avg sse2 ssse3 neon_asm dspr2/;
+$vp9_convolve8_avg_neon_asm=vp9_convolve8_avg_neon;
add_proto qw/void vp9_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_avg_horiz sse2 ssse3 neon dspr2/;
+specialize qw/vp9_convolve8_avg_horiz sse2 ssse3 neon_asm dspr2/;
+$vp9_convolve8_avg_horiz_neon_asm=vp9_convolve8_avg_horiz_neon;
add_proto qw/void vp9_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_avg_vert sse2 ssse3 neon dspr2/;
+specialize qw/vp9_convolve8_avg_vert sse2 ssse3 neon_asm dspr2/;
+$vp9_convolve8_avg_vert_neon_asm=vp9_convolve8_avg_vert_neon;
#
# dct
#
add_proto qw/void vp9_idct4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
-specialize qw/vp9_idct4x4_1_add sse2 neon dspr2/;
+specialize qw/vp9_idct4x4_1_add sse2 neon_asm dspr2/;
+$vp9_idct4x4_1_add_neon_asm=vp9_idct4x4_1_add_neon;
add_proto qw/void vp9_idct4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
-specialize qw/vp9_idct4x4_16_add sse2 neon dspr2/;
+specialize qw/vp9_idct4x4_16_add sse2 neon_asm dspr2/;
+$vp9_idct4x4_16_add_neon_asm=vp9_idct4x4_16_add_neon;
add_proto qw/void vp9_idct8x8_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
-specialize qw/vp9_idct8x8_1_add sse2 neon dspr2/;
+specialize qw/vp9_idct8x8_1_add sse2 neon_asm dspr2/;
+$vp9_idct8x8_1_add_neon_asm=vp9_idct8x8_1_add_neon;
add_proto qw/void vp9_idct8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
-specialize qw/vp9_idct8x8_64_add sse2 neon dspr2/;
+specialize qw/vp9_idct8x8_64_add sse2 neon_asm dspr2/, "$ssse3_x86_64";
+$vp9_idct8x8_64_add_neon_asm=vp9_idct8x8_64_add_neon;
-add_proto qw/void vp9_idct8x8_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
-specialize qw/vp9_idct8x8_10_add sse2 neon dspr2/;
+add_proto qw/void vp9_idct8x8_12_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct8x8_12_add sse2 neon_asm dspr2/, "$ssse3_x86_64";
+$vp9_idct8x8_12_add_neon_asm=vp9_idct8x8_12_add_neon;
add_proto qw/void vp9_idct16x16_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
-specialize qw/vp9_idct16x16_1_add sse2 neon dspr2/;
+specialize qw/vp9_idct16x16_1_add sse2 neon_asm dspr2/;
+$vp9_idct16x16_1_add_neon_asm=vp9_idct16x16_1_add_neon;
add_proto qw/void vp9_idct16x16_256_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
-specialize qw/vp9_idct16x16_256_add sse2 neon dspr2/;
+specialize qw/vp9_idct16x16_256_add sse2 neon_asm dspr2/;
+$vp9_idct16x16_256_add_neon_asm=vp9_idct16x16_256_add_neon;
add_proto qw/void vp9_idct16x16_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
-specialize qw/vp9_idct16x16_10_add sse2 neon dspr2/;
+specialize qw/vp9_idct16x16_10_add sse2 neon_asm dspr2/;
+$vp9_idct16x16_10_add_neon_asm=vp9_idct16x16_10_add_neon;
add_proto qw/void vp9_idct32x32_1024_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
-specialize qw/vp9_idct32x32_1024_add sse2 neon dspr2/;
+specialize qw/vp9_idct32x32_1024_add sse2 neon_asm dspr2/;
+$vp9_idct32x32_1024_add_neon_asm=vp9_idct32x32_1024_add_neon;
add_proto qw/void vp9_idct32x32_34_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
-specialize qw/vp9_idct32x32_34_add sse2 neon dspr2/;
-$vp9_idct32x32_34_add_neon=vp9_idct32x32_1024_add_neon;
+specialize qw/vp9_idct32x32_34_add sse2 neon_asm dspr2/;
+$vp9_idct32x32_34_add_neon_asm=vp9_idct32x32_1024_add_neon;
add_proto qw/void vp9_idct32x32_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
-specialize qw/vp9_idct32x32_1_add sse2 neon dspr2/;
+specialize qw/vp9_idct32x32_1_add sse2 neon_asm dspr2/;
+$vp9_idct32x32_1_add_neon_asm=vp9_idct32x32_1_add_neon;
add_proto qw/void vp9_iht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type";
-specialize qw/vp9_iht4x4_16_add sse2 neon dspr2/;
+specialize qw/vp9_iht4x4_16_add sse2 neon_asm dspr2/;
+$vp9_iht4x4_16_add_neon_asm=vp9_iht4x4_16_add_neon;
add_proto qw/void vp9_iht8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type";
-specialize qw/vp9_iht8x8_64_add sse2 neon dspr2/;
+specialize qw/vp9_iht8x8_64_add sse2 neon_asm dspr2/;
+$vp9_iht8x8_64_add_neon_asm=vp9_iht8x8_64_add_neon;
add_proto qw/void vp9_iht16x16_256_add/, "const int16_t *input, uint8_t *output, int pitch, int tx_type";
specialize qw/vp9_iht16x16_256_add sse2 dspr2/;
@@ -379,10 +422,6 @@
add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_variance16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc";
-add_proto qw/void vp9_get_sse_sum_16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-specialize qw/vp9_get_sse_sum_16x16 sse2/;
-$vp9_get_sse_sum_16x16_sse2=vp9_get16x16var_sse2;
-
add_proto qw/unsigned int vp9_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_variance16x8 mmx/, "$sse2_x86inc";
@@ -392,10 +431,6 @@
add_proto qw/unsigned int vp9_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_variance8x8 mmx/, "$sse2_x86inc";
-add_proto qw/void vp9_get_sse_sum_8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-specialize qw/vp9_get_sse_sum_8x8 sse2/;
-$vp9_get_sse_sum_8x8_sse2=vp9_get8x8var_sse2;
-
add_proto qw/unsigned int vp9_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_variance8x4/, "$sse2_x86inc";
@@ -563,33 +598,6 @@
add_proto qw/unsigned int vp9_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
specialize qw/vp9_sad4x4_avg/, "$sse_x86inc";
-add_proto qw/unsigned int vp9_variance_halfpixvar16x16_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar16x16_h/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance_halfpixvar16x16_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar16x16_v/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance_halfpixvar16x16_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar16x16_hv/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance_halfpixvar64x64_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar64x64_h/;
-
-add_proto qw/unsigned int vp9_variance_halfpixvar64x64_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar64x64_v/;
-
-add_proto qw/unsigned int vp9_variance_halfpixvar64x64_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar64x64_hv/;
-
-add_proto qw/unsigned int vp9_variance_halfpixvar32x32_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar32x32_h/;
-
-add_proto qw/unsigned int vp9_variance_halfpixvar32x32_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar32x32_v/;
-
-add_proto qw/unsigned int vp9_variance_halfpixvar32x32_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar32x32_hv/;
-
add_proto qw/void vp9_sad64x64x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad64x64x3/;
@@ -728,13 +736,13 @@
specialize qw/vp9_fht16x16 sse2 avx2/;
add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride";
-specialize qw/vp9_fwht4x4/;
+specialize qw/vp9_fwht4x4/, "$mmx_x86inc";
add_proto qw/void vp9_fdct4x4/, "const int16_t *input, int16_t *output, int stride";
specialize qw/vp9_fdct4x4 sse2 avx2/;
add_proto qw/void vp9_fdct8x8/, "const int16_t *input, int16_t *output, int stride";
-specialize qw/vp9_fdct8x8 sse2 avx2/;
+specialize qw/vp9_fdct8x8 sse2 avx2/, "$ssse3_x86_64";
add_proto qw/void vp9_fdct16x16/, "const int16_t *input, int16_t *output, int stride";
specialize qw/vp9_fdct16x16 sse2 avx2/;
@@ -757,11 +765,11 @@
specialize qw/vp9_refining_search_sad sse3/;
$vp9_refining_search_sad_sse3=vp9_refining_search_sadx4;
-add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
+add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
specialize qw/vp9_diamond_search_sad sse3/;
$vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4;
-add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
+add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
specialize qw/vp9_full_range_search/;
add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
diff --git a/vp9/common/vp9_tapify.py b/vp9/common/vp9_tapify.py
deleted file mode 100644
index 99529cf..0000000
--- a/vp9/common/vp9_tapify.py
+++ /dev/null
@@ -1,106 +0,0 @@
-"""
- * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
-"""
-#!/usr/bin/env python
-import sys,string,os,re,math,numpy
-scale = 2**16
-def dist(p1,p2):
- x1,y1 = p1
- x2,y2 = p2
- if x1==x2 and y1==y2 :
- return 1.0
- return 1/ math.sqrt((x1-x2)*(x1-x2)+(y1-y2)*(y1-y2))
-
-def gettaps(p):
- def l(b):
- return int(math.floor(b))
- def h(b):
- return int(math.ceil(b))
- def t(b,p,s):
- return int((scale*dist(b,p)+s/2)/s)
- r,c = p
- ul=[l(r),l(c)]
- ur=[l(r),h(c)]
- ll=[h(r),l(c)]
- lr=[h(r),h(c)]
- sum = dist(ul,p)+dist(ur,p)+dist(ll,p)+dist(lr,p)
- t4 = scale - t(ul,p,sum) - t(ur,p,sum) - t(ll,p,sum);
- return [[ul,t(ul,p,sum)],[ur,t(ur,p,sum)],
- [ll,t(ll,p,sum)],[lr,t4]]
-
-def print_mb_taps(angle,blocksize):
- theta = angle / 57.2957795;
- affine = [[math.cos(theta),-math.sin(theta)],
- [math.sin(theta),math.cos(theta)]]
- radius = (float(blocksize)-1)/2
- print " // angle of",angle,"degrees"
- for y in range(blocksize) :
- for x in range(blocksize) :
- r,c = numpy.dot(affine,[y-radius, x-radius])
- tps = gettaps([r+radius,c+radius])
- for t in tps :
- p,t = t
- tr,tc = p
- print " %2d, %2d, %5d, " % (tr,tc,t,),
- print " // %2d,%2d " % (y,x)
-
-i=float(sys.argv[1])
-while i <= float(sys.argv[2]) :
- print_mb_taps(i,float(sys.argv[4]))
- i=i+float(sys.argv[3])
-"""
-
-taps = []
-pt=dict()
-ptr=dict()
-for y in range(16) :
- for x in range(16) :
- r,c = numpy.dot(affine,[y-7.5, x-7.5])
- tps = gettaps([r+7.5,c+7.5])
- j=0
- for tp in tps :
- p,i = tp
- r,c = p
- pt[y,x,j]= [p,i]
- try:
- ptr[r,j,c].append([y,x])
- except:
- ptr[r,j,c]=[[y,x]]
- j = j+1
-
-for key in sorted(pt.keys()) :
- print key,pt[key]
-
-lr = -99
-lj = -99
-lc = 0
-
-shuf=""
-mask=""
-for r,j,c in sorted(ptr.keys()) :
- for y,x in ptr[r,j,c] :
- if lr != r or lj != j :
- print "shuf_"+str(lr)+"_"+str(lj)+"_"+shuf.ljust(16,"0"), lc
- shuf=""
- lc = 0
- for i in range(lc,c-1) :
- shuf = shuf +"0"
- shuf = shuf + hex(x)[2]
- lc =c
- break
- lr = r
- lj = j
-# print r,j,c,ptr[r,j,c]
-# print
-
-for r,j,c in sorted(ptr.keys()) :
- for y,x in ptr[r,j,c] :
- print r,j,c,y,x
- break
-"""
diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c
index 13a5b5a..0231726 100644
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -995,7 +995,7 @@
RECON_AND_STORE(dest, in[7]);
}
-void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
+void vp9_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
const __m128i zero = _mm_setzero_si128();
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1<<4);
diff --git a/vp9/common/x86/vp9_idct_ssse3.asm b/vp9/common/x86/vp9_idct_ssse3.asm
new file mode 100644
index 0000000..2c10607
--- /dev/null
+++ b/vp9/common/x86/vp9_idct_ssse3.asm
@@ -0,0 +1,300 @@
+;
+; Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+%include "third_party/x86inc/x86inc.asm"
+
+; This file provides SSSE3 version of the inverse transformation. Part
+; of the functions are originally derived from the ffmpeg project.
+; Note that the current version applies to x86 64-bit only.
+
+SECTION_RODATA
+
+pw_11585x2: times 8 dw 23170
+pd_8192: times 4 dd 8192
+pw_16: times 8 dw 16
+
+%macro TRANSFORM_COEFFS 2
+pw_%1_%2: dw %1, %2, %1, %2, %1, %2, %1, %2
+pw_m%2_%1: dw -%2, %1, -%2, %1, -%2, %1, -%2, %1
+%endmacro
+
+TRANSFORM_COEFFS 6270, 15137
+TRANSFORM_COEFFS 3196, 16069
+TRANSFORM_COEFFS 13623, 9102
+
+%macro PAIR_PP_COEFFS 2
+dpw_%1_%2: dw %1, %1, %1, %1, %2, %2, %2, %2
+%endmacro
+
+%macro PAIR_MP_COEFFS 2
+dpw_m%1_%2: dw -%1, -%1, -%1, -%1, %2, %2, %2, %2
+%endmacro
+
+%macro PAIR_MM_COEFFS 2
+dpw_m%1_m%2: dw -%1, -%1, -%1, -%1, -%2, -%2, -%2, -%2
+%endmacro
+
+PAIR_PP_COEFFS 30274, 12540
+PAIR_PP_COEFFS 6392, 32138
+PAIR_MP_COEFFS 18204, 27246
+
+PAIR_PP_COEFFS 12540, 12540
+PAIR_PP_COEFFS 30274, 30274
+PAIR_PP_COEFFS 6392, 6392
+PAIR_PP_COEFFS 32138, 32138
+PAIR_MM_COEFFS 18204, 18204
+PAIR_PP_COEFFS 27246, 27246
+
+SECTION .text
+
+%if ARCH_X86_64
+%macro SUM_SUB 3
+ psubw m%3, m%1, m%2
+ paddw m%1, m%2
+ SWAP %2, %3
+%endmacro
+
+; butterfly operation
+%macro MUL_ADD_2X 6 ; dst1, dst2, src, round, coefs1, coefs2
+ pmaddwd m%1, m%3, %5
+ pmaddwd m%2, m%3, %6
+ paddd m%1, %4
+ paddd m%2, %4
+ psrad m%1, 14
+ psrad m%2, 14
+%endmacro
+
+%macro BUTTERFLY_4X 7 ; dst1, dst2, coef1, coef2, round, tmp1, tmp2
+ punpckhwd m%6, m%2, m%1
+ MUL_ADD_2X %7, %6, %6, %5, [pw_m%4_%3], [pw_%3_%4]
+ punpcklwd m%2, m%1
+ MUL_ADD_2X %1, %2, %2, %5, [pw_m%4_%3], [pw_%3_%4]
+ packssdw m%1, m%7
+ packssdw m%2, m%6
+%endmacro
+
+; matrix transpose
+%macro INTERLEAVE_2X 4
+ punpckh%1 m%4, m%2, m%3
+ punpckl%1 m%2, m%3
+ SWAP %3, %4
+%endmacro
+
+%macro TRANSPOSE8X8 9
+ INTERLEAVE_2X wd, %1, %2, %9
+ INTERLEAVE_2X wd, %3, %4, %9
+ INTERLEAVE_2X wd, %5, %6, %9
+ INTERLEAVE_2X wd, %7, %8, %9
+
+ INTERLEAVE_2X dq, %1, %3, %9
+ INTERLEAVE_2X dq, %2, %4, %9
+ INTERLEAVE_2X dq, %5, %7, %9
+ INTERLEAVE_2X dq, %6, %8, %9
+
+ INTERLEAVE_2X qdq, %1, %5, %9
+ INTERLEAVE_2X qdq, %3, %7, %9
+ INTERLEAVE_2X qdq, %2, %6, %9
+ INTERLEAVE_2X qdq, %4, %8, %9
+
+ SWAP %2, %5
+ SWAP %4, %7
+%endmacro
+
+%macro IDCT8_1D 0
+ SUM_SUB 0, 4, 9
+ BUTTERFLY_4X 2, 6, 6270, 15137, m8, 9, 10
+ pmulhrsw m0, m12
+ pmulhrsw m4, m12
+ BUTTERFLY_4X 1, 7, 3196, 16069, m8, 9, 10
+ BUTTERFLY_4X 5, 3, 13623, 9102, m8, 9, 10
+
+ SUM_SUB 1, 5, 9
+ SUM_SUB 7, 3, 9
+ SUM_SUB 0, 6, 9
+ SUM_SUB 4, 2, 9
+ SUM_SUB 3, 5, 9
+ pmulhrsw m3, m12
+ pmulhrsw m5, m12
+
+ SUM_SUB 0, 7, 9
+ SUM_SUB 4, 3, 9
+ SUM_SUB 2, 5, 9
+ SUM_SUB 6, 1, 9
+
+ SWAP 3, 6
+ SWAP 1, 4
+%endmacro
+
+; This macro handles 8 pixels per line
+%macro ADD_STORE_8P_2X 5; src1, src2, tmp1, tmp2, zero
+ paddw m%1, m11
+ paddw m%2, m11
+ psraw m%1, 5
+ psraw m%2, 5
+
+ movh m%3, [outputq]
+ movh m%4, [outputq + strideq]
+ punpcklbw m%3, m%5
+ punpcklbw m%4, m%5
+ paddw m%3, m%1
+ paddw m%4, m%2
+ packuswb m%3, m%5
+ packuswb m%4, m%5
+ movh [outputq], m%3
+ movh [outputq + strideq], m%4
+%endmacro
+
+INIT_XMM ssse3
+; full inverse 8x8 2D-DCT transform
+cglobal idct8x8_64_add, 3, 5, 13, input, output, stride
+ mova m8, [pd_8192]
+ mova m11, [pw_16]
+ mova m12, [pw_11585x2]
+
+ lea r3, [2 * strideq]
+
+ mova m0, [inputq + 0]
+ mova m1, [inputq + 16]
+ mova m2, [inputq + 32]
+ mova m3, [inputq + 48]
+ mova m4, [inputq + 64]
+ mova m5, [inputq + 80]
+ mova m6, [inputq + 96]
+ mova m7, [inputq + 112]
+
+ TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
+ IDCT8_1D
+ TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
+ IDCT8_1D
+
+ pxor m12, m12
+ ADD_STORE_8P_2X 0, 1, 9, 10, 12
+ lea outputq, [outputq + r3]
+ ADD_STORE_8P_2X 2, 3, 9, 10, 12
+ lea outputq, [outputq + r3]
+ ADD_STORE_8P_2X 4, 5, 9, 10, 12
+ lea outputq, [outputq + r3]
+ ADD_STORE_8P_2X 6, 7, 9, 10, 12
+
+ RET
+
+; inverse 8x8 2D-DCT transform with only first 10 coeffs non-zero
+cglobal idct8x8_12_add, 3, 5, 13, input, output, stride
+ mova m8, [pd_8192]
+ mova m11, [pw_16]
+ mova m12, [pw_11585x2]
+
+ lea r3, [2 * strideq]
+
+ mova m0, [inputq + 0]
+ mova m1, [inputq + 16]
+ mova m2, [inputq + 32]
+ mova m3, [inputq + 48]
+
+ punpcklwd m0, m1
+ punpcklwd m2, m3
+ punpckhdq m9, m0, m2
+ punpckldq m0, m2
+ SWAP 2, 9
+
+ ; m0 -> [0], [0]
+ ; m1 -> [1], [1]
+ ; m2 -> [2], [2]
+ ; m3 -> [3], [3]
+ punpckhqdq m10, m0, m0
+ punpcklqdq m0, m0
+ punpckhqdq m9, m2, m2
+ punpcklqdq m2, m2
+ SWAP 1, 10
+ SWAP 3, 9
+
+ pmulhrsw m0, m12
+ pmulhrsw m2, [dpw_30274_12540]
+ pmulhrsw m1, [dpw_6392_32138]
+ pmulhrsw m3, [dpw_m18204_27246]
+
+ SUM_SUB 0, 2, 9
+ SUM_SUB 1, 3, 9
+
+ punpcklqdq m9, m3, m3
+ punpckhqdq m5, m3, m9
+
+ SUM_SUB 3, 5, 9
+ punpckhqdq m5, m3
+ pmulhrsw m5, m12
+
+ punpckhqdq m9, m1, m5
+ punpcklqdq m1, m5
+ SWAP 5, 9
+
+ SUM_SUB 0, 5, 9
+ SUM_SUB 2, 1, 9
+
+ punpckhqdq m3, m0, m0
+ punpckhqdq m4, m1, m1
+ punpckhqdq m6, m5, m5
+ punpckhqdq m7, m2, m2
+
+ punpcklwd m0, m3
+ punpcklwd m7, m2
+ punpcklwd m1, m4
+ punpcklwd m6, m5
+
+ punpckhdq m4, m0, m7
+ punpckldq m0, m7
+ punpckhdq m10, m1, m6
+ punpckldq m5, m1, m6
+
+ punpckhqdq m1, m0, m5
+ punpcklqdq m0, m5
+ punpckhqdq m3, m4, m10
+ punpcklqdq m2, m4, m10
+
+
+ pmulhrsw m0, m12
+ pmulhrsw m6, m2, [dpw_30274_30274]
+ pmulhrsw m4, m2, [dpw_12540_12540]
+
+ pmulhrsw m7, m1, [dpw_32138_32138]
+ pmulhrsw m1, [dpw_6392_6392]
+ pmulhrsw m5, m3, [dpw_m18204_m18204]
+ pmulhrsw m3, [dpw_27246_27246]
+
+ mova m2, m0
+ SUM_SUB 0, 6, 9
+ SUM_SUB 2, 4, 9
+ SUM_SUB 1, 5, 9
+ SUM_SUB 7, 3, 9
+
+ SUM_SUB 3, 5, 9
+ pmulhrsw m3, m12
+ pmulhrsw m5, m12
+
+ SUM_SUB 0, 7, 9
+ SUM_SUB 2, 3, 9
+ SUM_SUB 4, 5, 9
+ SUM_SUB 6, 1, 9
+
+ SWAP 3, 6
+ SWAP 1, 2
+ SWAP 2, 4
+
+
+ pxor m12, m12
+ ADD_STORE_8P_2X 0, 1, 9, 10, 12
+ lea outputq, [outputq + r3]
+ ADD_STORE_8P_2X 2, 3, 9, 10, 12
+ lea outputq, [outputq + r3]
+ ADD_STORE_8P_2X 4, 5, 9, 10, 12
+ lea outputq, [outputq + r3]
+ ADD_STORE_8P_2X 6, 7, 9, 10, 12
+
+ RET
+
+%endif
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 1cc7ab7..9dd0c44 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -677,7 +677,7 @@
static void decode_tile(VP9Decoder *pbi, const TileInfo *const tile,
vp9_reader *r) {
- const int num_threads = pbi->oxcf.max_threads;
+ const int num_threads = pbi->max_threads;
VP9_COMMON *const cm = &pbi->common;
int mi_row, mi_col;
MACROBLOCKD *xd = &pbi->mb;
@@ -828,8 +828,7 @@
// Decode tiles using data from tile_buffers
for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
- const int col = pbi->oxcf.inv_tile_order ? tile_cols - tile_col - 1
- : tile_col;
+ const int col = pbi->inv_tile_order ? tile_cols - tile_col - 1 : tile_col;
const int last_tile = tile_row == tile_rows - 1 &&
col == tile_cols - 1;
const TileBuffer *const buf = &tile_buffers[tile_row][col];
@@ -887,7 +886,7 @@
const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
- const int num_workers = MIN(pbi->oxcf.max_threads & ~1, tile_cols);
+ const int num_workers = MIN(pbi->max_threads & ~1, tile_cols);
TileBuffer tile_buffers[1 << 6];
int n;
int final_worker = -1;
@@ -899,7 +898,7 @@
// TODO(jzern): See if we can remove the restriction of passing in max
// threads to the decoder.
if (pbi->num_tile_workers == 0) {
- const int num_threads = pbi->oxcf.max_threads & ~1;
+ const int num_threads = pbi->max_threads & ~1;
int i;
// TODO(jzern): Allocate one less worker, as in the current code we only
// use num_threads - 1 workers.
@@ -1328,7 +1327,7 @@
CHECK_MEM_ERROR(cm, pbi->lf_worker.data1,
vpx_memalign(32, sizeof(LFWorkerData)));
pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker;
- if (pbi->oxcf.max_threads > 1 && !vp9_worker_reset(&pbi->lf_worker)) {
+ if (pbi->max_threads > 1 && !vp9_worker_reset(&pbi->lf_worker)) {
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"Loop filter thread creation failed");
}
@@ -1353,7 +1352,7 @@
// TODO(jzern): remove frame_parallel_decoding_mode restriction for
// single-frame tile decoding.
- if (pbi->oxcf.max_threads > 1 && tile_rows == 1 && tile_cols > 1 &&
+ if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1 &&
cm->frame_parallel_decoding_mode) {
*p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end);
} else {
@@ -1370,15 +1369,17 @@
"A stream must start with a complete key frame");
}
- if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) {
- vp9_adapt_coef_probs(cm);
+ if (!new_fb->corrupted) {
+ if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) {
+ vp9_adapt_coef_probs(cm);
- if (!frame_is_intra_only(cm)) {
- vp9_adapt_mode_probs(cm);
- vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv);
+ if (!frame_is_intra_only(cm)) {
+ vp9_adapt_mode_probs(cm);
+ vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv);
+ }
+ } else {
+ debug_check_frame_counts(cm);
}
- } else {
- debug_check_frame_counts(cm);
}
if (cm->refresh_frame_context)
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index faf710c..8f72aa4 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -32,74 +32,6 @@
#include "vp9/decoder/vp9_detokenize.h"
#include "vp9/decoder/vp9_dthread.h"
-#define WRITE_RECON_BUFFER 0
-#if WRITE_RECON_BUFFER == 1
-static void recon_write_yuv_frame(const char *name,
- const YV12_BUFFER_CONFIG *s,
- int w, int _h) {
- FILE *yuv_file = fopen(name, "ab");
- const uint8_t *src = s->y_buffer;
- int h = _h;
-
- do {
- fwrite(src, w, 1, yuv_file);
- src += s->y_stride;
- } while (--h);
-
- src = s->u_buffer;
- h = (_h + 1) >> 1;
- w = (w + 1) >> 1;
-
- do {
- fwrite(src, w, 1, yuv_file);
- src += s->uv_stride;
- } while (--h);
-
- src = s->v_buffer;
- h = (_h + 1) >> 1;
-
- do {
- fwrite(src, w, 1, yuv_file);
- src += s->uv_stride;
- } while (--h);
-
- fclose(yuv_file);
-}
-#endif
-#if WRITE_RECON_BUFFER == 2
-void write_dx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) {
- // write the frame
- FILE *yframe;
- int i;
- char filename[255];
-
- snprintf(filename, sizeof(filename)-1, "dx\\y%04d.raw", this_frame);
- yframe = fopen(filename, "wb");
-
- for (i = 0; i < frame->y_height; i++)
- fwrite(frame->y_buffer + i * frame->y_stride,
- frame->y_width, 1, yframe);
-
- fclose(yframe);
- snprintf(filename, sizeof(filename)-1, "dx\\u%04d.raw", this_frame);
- yframe = fopen(filename, "wb");
-
- for (i = 0; i < frame->uv_height; i++)
- fwrite(frame->u_buffer + i * frame->uv_stride,
- frame->uv_width, 1, yframe);
-
- fclose(yframe);
- snprintf(filename, sizeof(filename)-1, "dx\\v%04d.raw", this_frame);
- yframe = fopen(filename, "wb");
-
- for (i = 0; i < frame->uv_height; i++)
- fwrite(frame->v_buffer + i * frame->uv_stride,
- frame->uv_width, 1, yframe);
-
- fclose(yframe);
-}
-#endif
-
void vp9_initialize_dec() {
static int init_done = 0;
@@ -110,7 +42,7 @@
}
}
-VP9Decoder *vp9_decoder_create(const VP9DecoderConfig *oxcf) {
+VP9Decoder *vp9_decoder_create() {
VP9Decoder *const pbi = vpx_memalign(32, sizeof(*pbi));
VP9_COMMON *const cm = pbi ? &pbi->common : NULL;
@@ -134,7 +66,6 @@
vpx_memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
cm->current_video_frame = 0;
- pbi->oxcf = *oxcf;
pbi->ready_for_new_data = 1;
pbi->decoded_key_frame = 0;
@@ -348,15 +279,6 @@
swap_frame_buffers(pbi);
-#if WRITE_RECON_BUFFER == 2
- if (cm->show_frame)
- write_dx_frame_to_file(cm->frame_to_show,
- cm->current_video_frame);
- else
- write_dx_frame_to_file(cm->frame_to_show,
- cm->current_video_frame + 1000);
-#endif
-
if (!pbi->do_loopfilter_inline) {
// If multiple threads are used to decode tiles, then we use those threads
// to do parallel loopfiltering.
@@ -367,21 +289,6 @@
}
}
-#if WRITE_RECON_BUFFER == 2
- if (cm->show_frame)
- write_dx_frame_to_file(cm->frame_to_show,
- cm->current_video_frame + 2000);
- else
- write_dx_frame_to_file(cm->frame_to_show,
- cm->current_video_frame + 3000);
-#endif
-
-#if WRITE_RECON_BUFFER == 1
- if (cm->show_frame)
- recon_write_yuv_frame("recon.yuv", cm->frame_to_show,
- cm->width, cm->height);
-#endif
-
vp9_clear_system_state();
cm->last_width = cm->width;
@@ -423,10 +330,6 @@
ret = vp9_post_proc_frame(&pbi->common, sd, flags);
#else
*sd = *pbi->common.frame_to_show;
- sd->y_width = pbi->common.width;
- sd->y_height = pbi->common.height;
- sd->uv_width = sd->y_width >> pbi->common.subsampling_x;
- sd->uv_height = sd->y_height >> pbi->common.subsampling_y;
ret = 0;
#endif /*!CONFIG_POSTPROC*/
vp9_clear_system_state();
diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h
index ebcbb90..66b0f15 100644
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -27,21 +27,11 @@
extern "C" {
#endif
-typedef struct VP9DecoderConfig {
- int width;
- int height;
- int version;
- int max_threads;
- int inv_tile_order;
-} VP9DecoderConfig;
-
typedef struct VP9Decoder {
DECLARE_ALIGNED(16, MACROBLOCKD, mb);
DECLARE_ALIGNED(16, VP9_COMMON, common);
- VP9DecoderConfig oxcf;
-
int64_t last_time_stamp;
int ready_for_new_data;
@@ -59,6 +49,9 @@
vpx_decrypt_cb decrypt_cb;
void *decrypt_state;
+
+ int max_threads;
+ int inv_tile_order;
} VP9Decoder;
void vp9_initialize_dec();
@@ -83,8 +76,7 @@
int vp9_get_reference_dec(struct VP9Decoder *pbi,
int index, YV12_BUFFER_CONFIG **fb);
-
-struct VP9Decoder *vp9_decoder_create(const VP9DecoderConfig *oxcf);
+struct VP9Decoder *vp9_decoder_create();
void vp9_decoder_remove(struct VP9Decoder *pbi);
diff --git a/vp9/decoder/vp9_dthread.c b/vp9/decoder/vp9_dthread.c
index 9098063..019b0ff 100644
--- a/vp9/decoder/vp9_dthread.c
+++ b/vp9/decoder/vp9_dthread.c
@@ -140,7 +140,7 @@
// Number of superblock rows and cols
const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
const int tile_cols = 1 << cm->log2_tile_cols;
- const int num_workers = MIN(pbi->oxcf.max_threads & ~1, tile_cols);
+ const int num_workers = MIN(pbi->max_threads & ~1, tile_cols);
int i;
// Allocate memory used in thread synchronization.
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index c5a85c9..35d2ecf 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -1006,9 +1006,10 @@
found = cm->width == cfg->y_crop_width &&
cm->height == cfg->y_crop_height;
- // TODO(ivan): This prevents a bug while more than 3 buffers are used. Do it
- // in a better way.
- if (cpi->use_svc) {
+ // Set "found" to 0 for temporal svc and for spatial svc key frame
+ if (cpi->use_svc &&
+ (cpi->svc.number_spatial_layers == 1 ||
+ cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame)) {
found = 0;
}
vp9_wb_write_bit(wb, found);
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index fcf2a04..2ccf4f8 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -20,12 +20,6 @@
extern "C" {
#endif
-// motion search site
-typedef struct {
- MV mv;
- int offset;
-} search_site;
-
// Structure to hold snapshot of coding context during the mode picking process
typedef struct {
MODE_INFO mic;
@@ -108,10 +102,6 @@
int skip_optimize;
int q_index;
- search_site *ss;
- int ss_count;
- int searches_per_step;
-
int errorperbit;
int sadperbit16;
int sadperbit4;
@@ -139,12 +129,6 @@
int *nmvsadcost_hp[2];
int **mvsadcost;
- int mbmode_cost[INTRA_MODES];
- unsigned inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES];
- int intra_uv_mode_cost[FRAME_TYPES][INTRA_MODES];
- int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
- int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
-
// These define limits to motion vector components to prevent them
// from extending outside the UMV borders
int mv_col_min;
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 2e44f7d..fba9465 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -70,6 +70,18 @@
128, 128, 128, 128, 128, 128, 128, 128
};
+static void get_sse_sum_8x8(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse, int *sum) {
+ variance(src, src_stride, ref, ref_stride, 8, 8, sse, sum);
+}
+
+static void get_sse_sum_16x16(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse, int *sum) {
+ variance(src, src_stride, ref, ref_stride, 16, 16, sse, sum);
+}
+
static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi,
const struct buf_2d *ref,
BLOCK_SIZE bs) {
@@ -242,7 +254,6 @@
}
static void set_block_size(VP9_COMP * const cpi,
- const TileInfo *const tile,
int mi_row, int mi_col,
BLOCK_SIZE bsize) {
if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) {
@@ -365,11 +376,9 @@
static int set_vt_partitioning(VP9_COMP *cpi,
void *data,
- const TileInfo *const tile,
BLOCK_SIZE bsize,
int mi_row,
- int mi_col,
- int mi_size) {
+ int mi_col) {
VP9_COMMON * const cm = &cpi->common;
variance_node vt;
const int block_width = num_8x8_blocks_wide_lookup[bsize];
@@ -386,7 +395,7 @@
if (mi_col + block_width / 2 < cm->mi_cols &&
mi_row + block_height / 2 < cm->mi_rows &&
vt.part_variances->none.variance < threshold) {
- set_block_size(cpi, tile, mi_row, mi_col, bsize);
+ set_block_size(cpi, mi_row, mi_col, bsize);
return 1;
}
@@ -395,8 +404,8 @@
vt.part_variances->vert[0].variance < threshold &&
vt.part_variances->vert[1].variance < threshold) {
BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT);
- set_block_size(cpi, tile, mi_row, mi_col, subsize);
- set_block_size(cpi, tile, mi_row, mi_col + block_width / 2, subsize);
+ set_block_size(cpi, mi_row, mi_col, subsize);
+ set_block_size(cpi, mi_row, mi_col + block_width / 2, subsize);
return 1;
}
@@ -405,8 +414,8 @@
vt.part_variances->horz[0].variance < threshold &&
vt.part_variances->horz[1].variance < threshold) {
BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ);
- set_block_size(cpi, tile, mi_row, mi_col, subsize);
- set_block_size(cpi, tile, mi_row + block_height / 2, mi_col, subsize);
+ set_block_size(cpi, mi_row, mi_col, subsize);
+ set_block_size(cpi, mi_row + block_height / 2, mi_col, subsize);
return 1;
}
return 0;
@@ -475,8 +484,8 @@
unsigned int sse = 0;
int sum = 0;
if (x_idx < pixels_wide && y_idx < pixels_high)
- vp9_get_sse_sum_8x8(s + y_idx * sp + x_idx, sp,
- d + y_idx * dp + x_idx, dp, &sse, &sum);
+ get_sse_sum_8x8(s + y_idx * sp + x_idx, sp,
+ d + y_idx * dp + x_idx, dp, &sse, &sum);
fill_variance(sse, sum, 64, &vst->split[k].part_variances.none);
}
}
@@ -493,13 +502,13 @@
// Now go through the entire structure, splitting every block size until
// we get to one that's got a variance lower than our threshold, or we
// hit 8x8.
- if (!set_vt_partitioning(cpi, &vt, tile, BLOCK_64X64,
- mi_row, mi_col, 8)) {
+ if (!set_vt_partitioning(cpi, &vt, BLOCK_64X64,
+ mi_row, mi_col)) {
for (i = 0; i < 4; ++i) {
const int x32_idx = ((i & 1) << 2);
const int y32_idx = ((i >> 1) << 2);
- if (!set_vt_partitioning(cpi, &vt.split[i], tile, BLOCK_32X32,
- (mi_row + y32_idx), (mi_col + x32_idx), 4)) {
+ if (!set_vt_partitioning(cpi, &vt.split[i], BLOCK_32X32,
+ (mi_row + y32_idx), (mi_col + x32_idx))) {
for (j = 0; j < 4; ++j) {
const int x16_idx = ((j & 1) << 1);
const int y16_idx = ((j >> 1) << 1);
@@ -509,7 +518,7 @@
#ifdef DISABLE_8X8_VAR_BASED_PARTITION
if (mi_row + y32_idx + y16_idx + 1 < cm->mi_rows &&
mi_row + x32_idx + x16_idx + 1 < cm->mi_cols) {
- set_block_size(cpi, tile,
+ set_block_size(cpi,
(mi_row + y32_idx + y16_idx),
(mi_col + x32_idx + x16_idx),
BLOCK_16X16);
@@ -517,7 +526,7 @@
for (k = 0; k < 4; ++k) {
const int x8_idx = (k & 1);
const int y8_idx = (k >> 1);
- set_block_size(cpi, tile,
+ set_block_size(cpi,
(mi_row + y32_idx + y16_idx + y8_idx),
(mi_col + x32_idx + x16_idx + x8_idx),
BLOCK_8X8);
@@ -531,7 +540,7 @@
for (k = 0; k < 4; ++k) {
const int x8_idx = (k & 1);
const int y8_idx = (k >> 1);
- set_block_size(cpi, tile,
+ set_block_size(cpi,
(mi_row + y32_idx + y16_idx + y8_idx),
(mi_col + x32_idx + x16_idx + x8_idx),
BLOCK_8X8);
@@ -560,11 +569,6 @@
return act < (8 << 12) ? MIN(act, 5 << 12) : act;
}
-// Stub for alternative experimental activity measures.
-static unsigned int alt_activity_measure(MACROBLOCK *x, int use_dc_pred) {
- return vp9_encode_intra(x, use_dc_pred);
-}
-
static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
int mi_row, int mi_col, BLOCK_SIZE bsize,
int output_enabled) {
@@ -1216,10 +1220,9 @@
int b_offset = b_mi_row * MI_SIZE * src_stride +
b_mi_col * MI_SIZE;
- vp9_get_sse_sum_16x16(src + b_offset,
- src_stride,
- pre_src + b_offset,
- pre_stride, &d16[j].sse, &d16[j].sum);
+ get_sse_sum_16x16(src + b_offset, src_stride,
+ pre_src + b_offset, pre_stride,
+ &d16[j].sse, &d16[j].sum);
d16[j].var = d16[j].sse -
(((uint32_t)d16[j].sum * d16[j].sum) >> 8);
@@ -1450,8 +1453,7 @@
MODE_INFO **mi_8x8,
TOKENEXTRA **tp, int mi_row, int mi_col,
BLOCK_SIZE bsize, int *rate, int64_t *dist,
- int do_recon, PC_TREE *pc_tree,
- int block) {
+ int do_recon, PC_TREE *pc_tree) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -1618,7 +1620,7 @@
rd_use_partition(cpi, tile, mi_8x8 + jj * bss * mis + ii * bss, tp,
mi_row + y_idx, mi_col + x_idx, subsize, &rt, &dt,
- i != 3, pc_tree->split[i], i);
+ i != 3, pc_tree->split[i]);
if (rt == INT_MAX || dt == INT64_MAX) {
last_part_rate = INT_MAX;
last_part_dist = INT64_MAX;
@@ -1865,6 +1867,67 @@
*max_block_size = max_size;
}
+static void auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
+ int mi_row, int mi_col,
+ BLOCK_SIZE *min_block_size,
+ BLOCK_SIZE *max_block_size) {
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+ MODE_INFO **mi_8x8 = xd->mi;
+ const int left_in_image = xd->left_available && mi_8x8[-1];
+ const int above_in_image = xd->up_available &&
+ mi_8x8[-xd->mi_stride];
+ int row8x8_remaining = tile->mi_row_end - mi_row;
+ int col8x8_remaining = tile->mi_col_end - mi_col;
+ int bh, bw;
+ BLOCK_SIZE min_size = BLOCK_32X32;
+ BLOCK_SIZE max_size = BLOCK_8X8;
+ int bsl = mi_width_log2_lookup[BLOCK_64X64];
+ int search_range_ctrl = (((mi_row + mi_col) >> bsl) +
+ cpi->sf.chessboard_index) & 0x01;
+ // Trap case where we do not have a prediction.
+ if (search_range_ctrl &&
+ (left_in_image || above_in_image || cm->frame_type != KEY_FRAME)) {
+ int block;
+ MODE_INFO **mi;
+ BLOCK_SIZE sb_type;
+
+ // Find the min and max partition sizes used in the left SB64.
+ if (left_in_image) {
+ MODE_INFO *cur_mi;
+ mi = &mi_8x8[-1];
+ for (block = 0; block < MI_BLOCK_SIZE; ++block) {
+ cur_mi = mi[block * xd->mi_stride];
+ sb_type = cur_mi ? cur_mi->mbmi.sb_type : 0;
+ min_size = MIN(min_size, sb_type);
+ max_size = MAX(max_size, sb_type);
+ }
+ }
+ // Find the min and max partition sizes used in the above SB64.
+ if (above_in_image) {
+ mi = &mi_8x8[-xd->mi_stride * MI_BLOCK_SIZE];
+ for (block = 0; block < MI_BLOCK_SIZE; ++block) {
+ sb_type = mi[block] ? mi[block]->mbmi.sb_type : 0;
+ min_size = MIN(min_size, sb_type);
+ max_size = MAX(max_size, sb_type);
+ }
+ }
+
+ min_size = min_partition_size[min_size];
+ max_size = find_partition_size(max_size, row8x8_remaining, col8x8_remaining,
+ &bh, &bw);
+ min_size = MIN(min_size, max_size);
+ min_size = MAX(min_size, BLOCK_8X8);
+ max_size = MIN(max_size, BLOCK_32X32);
+ } else {
+ min_size = BLOCK_8X8;
+ max_size = BLOCK_32X32;
+ }
+
+ *min_block_size = min_size;
+ *max_block_size = max_size;
+}
+
static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
vpx_memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv));
}
@@ -1880,7 +1943,7 @@
TOKENEXTRA **tp, int mi_row,
int mi_col, BLOCK_SIZE bsize, int *rate,
int64_t *dist, int do_recon, int64_t best_rd,
- PC_TREE *pc_tree, int block) {
+ PC_TREE *pc_tree) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -2032,7 +2095,7 @@
rd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx,
subsize, &this_rate, &this_dist, i != 3,
- best_rd - sum_rd, pc_tree->split[i], i);
+ best_rd - sum_rd, pc_tree->split[i]);
if (this_rate == INT_MAX) {
sum_rd = INT64_MAX;
@@ -2241,18 +2304,18 @@
set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
sf->always_this_block_size);
rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
- &dummy_rate, &dummy_dist, 1, x->pc_root, 0);
+ &dummy_rate, &dummy_dist, 1, x->pc_root);
} else if (sf->partition_search_type == VAR_BASED_FIXED_PARTITION) {
BLOCK_SIZE bsize;
set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col);
set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
- &dummy_rate, &dummy_dist, 1, x->pc_root, 0);
+ &dummy_rate, &dummy_dist, 1, x->pc_root);
} else if (sf->partition_search_type == VAR_BASED_PARTITION) {
choose_partitioning(cpi, tile, mi_row, mi_col);
rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
- &dummy_rate, &dummy_dist, 1, x->pc_root, 0);
+ &dummy_rate, &dummy_dist, 1, x->pc_root);
} else {
if ((cm->current_video_frame
% sf->last_partitioning_redo_frequency) == 0
@@ -2271,8 +2334,7 @@
&sf->max_partition_size);
}
rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
- &dummy_rate, &dummy_dist, 1, INT64_MAX, x->pc_root,
- 0);
+ &dummy_rate, &dummy_dist, 1, INT64_MAX, x->pc_root);
} else {
if (sf->constrain_copy_partition &&
sb_has_motion(cm, prev_mi_8x8))
@@ -2281,7 +2343,7 @@
else
copy_partitioning(cm, mi_8x8, prev_mi_8x8);
rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
- &dummy_rate, &dummy_dist, 1, x->pc_root, 0);
+ &dummy_rate, &dummy_dist, 1, x->pc_root);
}
}
} else {
@@ -2293,7 +2355,7 @@
&sf->max_partition_size);
}
rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
- &dummy_rate, &dummy_dist, 1, INT64_MAX, x->pc_root, 0);
+ &dummy_rate, &dummy_dist, 1, INT64_MAX, x->pc_root);
}
}
}
@@ -2617,9 +2679,7 @@
if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
continue;
-
load_pred_mv(x, ctx);
-
nonrd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx,
subsize, &this_rate, &this_dist, 0,
best_rd - sum_rd, pc_tree->split[i]);
@@ -2909,6 +2969,10 @@
case REFERENCE_PARTITION:
if (cpi->sf.partition_check ||
!is_background(cpi, tile, mi_row, mi_col)) {
+ set_modeinfo_offsets(cm, xd, mi_row, mi_col);
+ auto_partition_range(cpi, tile, mi_row, mi_col,
+ &cpi->sf.min_partition_size,
+ &cpi->sf.max_partition_size);
nonrd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
&dummy_rate, &dummy_dist, 1, INT64_MAX,
x->pc_root);
@@ -3057,6 +3121,23 @@
#endif
}
+static INTERP_FILTER get_interp_filter(
+ const int64_t threshes[SWITCHABLE_FILTER_CONTEXTS], int is_alt_ref) {
+ if (!is_alt_ref &&
+ threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP] &&
+ threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP_SHARP] &&
+ threshes[EIGHTTAP_SMOOTH] > threshes[SWITCHABLE - 1]) {
+ return EIGHTTAP_SMOOTH;
+ } else if (threshes[EIGHTTAP_SHARP] > threshes[EIGHTTAP] &&
+ threshes[EIGHTTAP_SHARP] > threshes[SWITCHABLE - 1]) {
+ return EIGHTTAP_SHARP;
+ } else if (threshes[EIGHTTAP] > threshes[SWITCHABLE - 1]) {
+ return EIGHTTAP;
+ } else {
+ return SWITCHABLE;
+ }
+}
+
void vp9_encode_frame(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
RD_OPT *const rd_opt = &cpi->rd;
@@ -3092,59 +3173,41 @@
// that for subsequent frames.
// It does the same analysis for transform size selection also.
const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
- const int64_t *mode_thresh = rd_opt->prediction_type_threshes[frame_type];
- const int64_t *filter_thresh = rd_opt->filter_threshes[frame_type];
+ int64_t *const mode_thrs = rd_opt->prediction_type_threshes[frame_type];
+ int64_t *const filter_thrs = rd_opt->filter_threshes[frame_type];
+ int *const tx_thrs = rd_opt->tx_select_threshes[frame_type];
+ const int is_alt_ref = frame_type == ALTREF_FRAME;
/* prediction (compound, single or hybrid) mode selection */
- if (frame_type == ALTREF_FRAME || !cm->allow_comp_inter_inter)
+ if (is_alt_ref || !cm->allow_comp_inter_inter)
cm->reference_mode = SINGLE_REFERENCE;
- else if (mode_thresh[COMPOUND_REFERENCE] > mode_thresh[SINGLE_REFERENCE] &&
- mode_thresh[COMPOUND_REFERENCE] >
- mode_thresh[REFERENCE_MODE_SELECT] &&
+ else if (mode_thrs[COMPOUND_REFERENCE] > mode_thrs[SINGLE_REFERENCE] &&
+ mode_thrs[COMPOUND_REFERENCE] >
+ mode_thrs[REFERENCE_MODE_SELECT] &&
check_dual_ref_flags(cpi) &&
cpi->static_mb_pct == 100)
cm->reference_mode = COMPOUND_REFERENCE;
- else if (mode_thresh[SINGLE_REFERENCE] > mode_thresh[REFERENCE_MODE_SELECT])
+ else if (mode_thrs[SINGLE_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT])
cm->reference_mode = SINGLE_REFERENCE;
else
cm->reference_mode = REFERENCE_MODE_SELECT;
- if (cm->interp_filter == SWITCHABLE) {
- if (frame_type != ALTREF_FRAME &&
- filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[EIGHTTAP] &&
- filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[EIGHTTAP_SHARP] &&
- filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[SWITCHABLE - 1]) {
- cm->interp_filter = EIGHTTAP_SMOOTH;
- } else if (filter_thresh[EIGHTTAP_SHARP] > filter_thresh[EIGHTTAP] &&
- filter_thresh[EIGHTTAP_SHARP] > filter_thresh[SWITCHABLE - 1]) {
- cm->interp_filter = EIGHTTAP_SHARP;
- } else if (filter_thresh[EIGHTTAP] > filter_thresh[SWITCHABLE - 1]) {
- cm->interp_filter = EIGHTTAP;
- }
- }
+ if (cm->interp_filter == SWITCHABLE)
+ cm->interp_filter = get_interp_filter(filter_thrs, is_alt_ref);
encode_frame_internal(cpi);
- for (i = 0; i < REFERENCE_MODES; ++i) {
- const int diff = (int) (rd_opt->comp_pred_diff[i] / cm->MBs);
- rd_opt->prediction_type_threshes[frame_type][i] += diff;
- rd_opt->prediction_type_threshes[frame_type][i] >>= 1;
- }
+ for (i = 0; i < REFERENCE_MODES; ++i)
+ mode_thrs[i] = (mode_thrs[i] + rd_opt->comp_pred_diff[i] / cm->MBs) / 2;
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
- const int64_t diff = rd_opt->filter_diff[i] / cm->MBs;
- rd_opt->filter_threshes[frame_type][i] =
- (rd_opt->filter_threshes[frame_type][i] + diff) / 2;
- }
+ for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
+ filter_thrs[i] = (filter_thrs[i] + rd_opt->filter_diff[i] / cm->MBs) / 2;
for (i = 0; i < TX_MODES; ++i) {
int64_t pd = rd_opt->tx_select_diff[i];
- int diff;
if (i == TX_MODE_SELECT)
pd -= RDCOST(cpi->mb.rdmult, cpi->mb.rddiv, 2048 * (TX_SIZES - 1), 0);
- diff = (int) (pd / cm->MBs);
- rd_opt->tx_select_threshes[frame_type][i] += diff;
- rd_opt->tx_select_threshes[frame_type][i] /= 2;
+ tx_thrs[i] = (tx_thrs[i] + (int)(pd / cm->MBs)) / 2;
}
if (cm->reference_mode == REFERENCE_MODE_SELECT) {
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index baa4665..3b231b7 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -99,7 +99,7 @@
}
static int optimize_b(MACROBLOCK *mb, int plane, int block,
- BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int ctx) {
+ TX_SIZE tx_size, int ctx) {
MACROBLOCKD *const xd = &mb->e_mbd;
struct macroblock_plane *const p = &mb->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
@@ -381,7 +381,7 @@
if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
const int ctx = combine_entropy_contexts(*a, *l);
- *a = *l = optimize_b(x, plane, block, plane_bsize, tx_size, ctx) > 0;
+ *a = *l = optimize_b(x, plane, block, tx_size, ctx) > 0;
} else {
*a = *l = p->eobs[block] > 0;
}
@@ -601,15 +601,3 @@
vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block_intra,
&arg);
}
-
-int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred) {
- MB_MODE_INFO * mbmi = &x->e_mbd.mi[0]->mbmi;
- x->skip_encode = 0;
- mbmi->mode = DC_PRED;
- mbmi->ref_frame[0] = INTRA_FRAME;
- mbmi->tx_size = use_16x16_pred ? (mbmi->sb_type >= BLOCK_16X16 ? TX_16X16
- : TX_8X8)
- : TX_4X4;
- vp9_encode_intra_block_plane(x, mbmi->sb_type, 0);
- return vp9_get_mb_ss(x->plane[0].src_diff);
-}
diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h
index edef1e2..8021459 100644
--- a/vp9/encoder/vp9_encodemb.h
+++ b/vp9/encoder/vp9_encodemb.h
@@ -34,8 +34,6 @@
void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
-int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred);
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 6816f55..5d38447 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -115,22 +115,6 @@
}
}
-static void setup_key_frame(VP9_COMP *cpi) {
- vp9_setup_past_independence(&cpi->common);
-
- // All buffers are implicitly updated on key frames.
- cpi->refresh_golden_frame = 1;
- cpi->refresh_alt_ref_frame = 1;
-}
-
-static void setup_inter_frame(VP9_COMMON *cm) {
- if (cm->error_resilient_mode || cm->intra_only)
- vp9_setup_past_independence(cm);
-
- assert(cm->frame_context_idx < FRAME_CONTEXTS);
- cm->fc = cm->frame_contexts[cm->frame_context_idx];
-}
-
static void setup_frame(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
// Set up entropy context depending on frame type. The decoder mandates
@@ -138,17 +122,21 @@
// frames where the error_resilient_mode or intra_only flag is set. For
// other inter-frames the encoder currently uses only two contexts;
// context 1 for ALTREF frames and context 0 for the others.
- if (cm->frame_type == KEY_FRAME) {
- setup_key_frame(cpi);
+ if (frame_is_intra_only(cm) || cm->error_resilient_mode) {
+ vp9_setup_past_independence(cm);
} else {
- if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc)
- cm->frame_context_idx = cpi->refresh_alt_ref_frame;
- setup_inter_frame(cm);
+ if (!cpi->use_svc)
+ cm->frame_context_idx = cpi->refresh_alt_ref_frame;
+ }
+
+ if (cm->frame_type == KEY_FRAME) {
+ cpi->refresh_golden_frame = 1;
+ cpi->refresh_alt_ref_frame = 1;
+ } else {
+ cm->fc = cm->frame_contexts[cm->frame_context_idx];
}
}
-
-
void vp9_initialize_enc() {
static int init_done = 0;
@@ -162,6 +150,7 @@
vp9_rc_init_minq_luts();
vp9_entropy_mv_init();
vp9_entropy_mode_init();
+ vp9_temporal_filter_init();
init_done = 1;
}
}
@@ -501,9 +490,9 @@
int y_stride = cpi->scaled_source.y_stride;
if (cpi->sf.search_method == NSTEP) {
- vp9_init3smotion_compensation(&cpi->mb, y_stride);
+ vp9_init3smotion_compensation(&cpi->ss_cfg, y_stride);
} else if (cpi->sf.search_method == DIAMOND) {
- vp9_init_dsmotion_compensation(&cpi->mb, y_stride);
+ vp9_init_dsmotion_compensation(&cpi->ss_cfg, y_stride);
}
}
@@ -536,7 +525,6 @@
static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) {
VP9_COMMON *const cm = &cpi->common;
- int i;
cpi->oxcf = *oxcf;
@@ -571,10 +559,6 @@
cpi->alt_fb_idx = 2;
set_tile_limits(cpi);
-
- cpi->fixed_divide[0] = 0;
- for (i = 1; i < 512; i++)
- cpi->fixed_divide[i] = 0x80000 / i;
}
static int get_pass(MODE mode) {
@@ -765,7 +749,7 @@
VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) {
- int i, j;
+ unsigned int i, j;
VP9_COMP *const cpi = vpx_memalign(32, sizeof(VP9_COMP));
VP9_COMMON *const cm = cpi != NULL ? &cpi->common : NULL;
@@ -782,9 +766,6 @@
cm->error.setjmp = 1;
- CHECK_MEM_ERROR(cm, cpi->mb.ss, vpx_calloc(sizeof(search_site),
- (MAX_MVSEARCH_STEPS * 8) + 1));
-
vp9_rtcd();
cpi->use_svc = 0;
@@ -973,95 +954,73 @@
cpi->rd.thresh_freq_fact[i][j] = 32;
}
-#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SVFHH, SVFHV, SVFHHV, \
- SDX3F, SDX8F, SDX4DF)\
+#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX3F, SDX8F, SDX4DF)\
cpi->fn_ptr[BT].sdf = SDF; \
cpi->fn_ptr[BT].sdaf = SDAF; \
cpi->fn_ptr[BT].vf = VF; \
cpi->fn_ptr[BT].svf = SVF; \
cpi->fn_ptr[BT].svaf = SVAF; \
- cpi->fn_ptr[BT].svf_halfpix_h = SVFHH; \
- cpi->fn_ptr[BT].svf_halfpix_v = SVFHV; \
- cpi->fn_ptr[BT].svf_halfpix_hv = SVFHHV; \
cpi->fn_ptr[BT].sdx3f = SDX3F; \
cpi->fn_ptr[BT].sdx8f = SDX8F; \
cpi->fn_ptr[BT].sdx4df = SDX4DF;
BFP(BLOCK_32X16, vp9_sad32x16, vp9_sad32x16_avg,
vp9_variance32x16, vp9_sub_pixel_variance32x16,
- vp9_sub_pixel_avg_variance32x16, NULL, NULL,
- NULL, NULL, NULL,
- vp9_sad32x16x4d)
+ vp9_sub_pixel_avg_variance32x16, NULL, NULL, vp9_sad32x16x4d)
BFP(BLOCK_16X32, vp9_sad16x32, vp9_sad16x32_avg,
vp9_variance16x32, vp9_sub_pixel_variance16x32,
- vp9_sub_pixel_avg_variance16x32, NULL, NULL,
- NULL, NULL, NULL,
- vp9_sad16x32x4d)
+ vp9_sub_pixel_avg_variance16x32, NULL, NULL, vp9_sad16x32x4d)
BFP(BLOCK_64X32, vp9_sad64x32, vp9_sad64x32_avg,
vp9_variance64x32, vp9_sub_pixel_variance64x32,
- vp9_sub_pixel_avg_variance64x32, NULL, NULL,
- NULL, NULL, NULL,
- vp9_sad64x32x4d)
+ vp9_sub_pixel_avg_variance64x32, NULL, NULL, vp9_sad64x32x4d)
BFP(BLOCK_32X64, vp9_sad32x64, vp9_sad32x64_avg,
vp9_variance32x64, vp9_sub_pixel_variance32x64,
- vp9_sub_pixel_avg_variance32x64, NULL, NULL,
- NULL, NULL, NULL,
- vp9_sad32x64x4d)
+ vp9_sub_pixel_avg_variance32x64, NULL, NULL, vp9_sad32x64x4d)
BFP(BLOCK_32X32, vp9_sad32x32, vp9_sad32x32_avg,
vp9_variance32x32, vp9_sub_pixel_variance32x32,
- vp9_sub_pixel_avg_variance32x32, vp9_variance_halfpixvar32x32_h,
- vp9_variance_halfpixvar32x32_v,
- vp9_variance_halfpixvar32x32_hv, vp9_sad32x32x3, vp9_sad32x32x8,
+ vp9_sub_pixel_avg_variance32x32, vp9_sad32x32x3, vp9_sad32x32x8,
vp9_sad32x32x4d)
BFP(BLOCK_64X64, vp9_sad64x64, vp9_sad64x64_avg,
vp9_variance64x64, vp9_sub_pixel_variance64x64,
- vp9_sub_pixel_avg_variance64x64, vp9_variance_halfpixvar64x64_h,
- vp9_variance_halfpixvar64x64_v,
- vp9_variance_halfpixvar64x64_hv, vp9_sad64x64x3, vp9_sad64x64x8,
+ vp9_sub_pixel_avg_variance64x64, vp9_sad64x64x3, vp9_sad64x64x8,
vp9_sad64x64x4d)
BFP(BLOCK_16X16, vp9_sad16x16, vp9_sad16x16_avg,
vp9_variance16x16, vp9_sub_pixel_variance16x16,
- vp9_sub_pixel_avg_variance16x16, vp9_variance_halfpixvar16x16_h,
- vp9_variance_halfpixvar16x16_v,
- vp9_variance_halfpixvar16x16_hv, vp9_sad16x16x3, vp9_sad16x16x8,
+ vp9_sub_pixel_avg_variance16x16, vp9_sad16x16x3, vp9_sad16x16x8,
vp9_sad16x16x4d)
BFP(BLOCK_16X8, vp9_sad16x8, vp9_sad16x8_avg,
vp9_variance16x8, vp9_sub_pixel_variance16x8,
- vp9_sub_pixel_avg_variance16x8, NULL, NULL, NULL,
+ vp9_sub_pixel_avg_variance16x8,
vp9_sad16x8x3, vp9_sad16x8x8, vp9_sad16x8x4d)
BFP(BLOCK_8X16, vp9_sad8x16, vp9_sad8x16_avg,
vp9_variance8x16, vp9_sub_pixel_variance8x16,
- vp9_sub_pixel_avg_variance8x16, NULL, NULL, NULL,
+ vp9_sub_pixel_avg_variance8x16,
vp9_sad8x16x3, vp9_sad8x16x8, vp9_sad8x16x4d)
BFP(BLOCK_8X8, vp9_sad8x8, vp9_sad8x8_avg,
vp9_variance8x8, vp9_sub_pixel_variance8x8,
- vp9_sub_pixel_avg_variance8x8, NULL, NULL, NULL,
+ vp9_sub_pixel_avg_variance8x8,
vp9_sad8x8x3, vp9_sad8x8x8, vp9_sad8x8x4d)
BFP(BLOCK_8X4, vp9_sad8x4, vp9_sad8x4_avg,
vp9_variance8x4, vp9_sub_pixel_variance8x4,
- vp9_sub_pixel_avg_variance8x4, NULL, NULL,
- NULL, NULL, vp9_sad8x4x8,
- vp9_sad8x4x4d)
+ vp9_sub_pixel_avg_variance8x4, NULL, vp9_sad8x4x8, vp9_sad8x4x4d)
BFP(BLOCK_4X8, vp9_sad4x8, vp9_sad4x8_avg,
vp9_variance4x8, vp9_sub_pixel_variance4x8,
- vp9_sub_pixel_avg_variance4x8, NULL, NULL,
- NULL, NULL, vp9_sad4x8x8,
- vp9_sad4x8x4d)
+ vp9_sub_pixel_avg_variance4x8, NULL, vp9_sad4x8x8, vp9_sad4x8x4d)
BFP(BLOCK_4X4, vp9_sad4x4, vp9_sad4x4_avg,
vp9_variance4x4, vp9_sub_pixel_variance4x4,
- vp9_sub_pixel_avg_variance4x4, NULL, NULL, NULL,
+ vp9_sub_pixel_avg_variance4x4,
vp9_sad4x4x3, vp9_sad4x4x8, vp9_sad4x4x4d)
cpi->full_search_sad = vp9_full_search_sad;
@@ -1079,15 +1038,11 @@
cm->error.setjmp = 0;
-#ifdef MODE_TEST_HIT_STATS
- vp9_zero(cpi->mode_test_hits);
-#endif
-
return cpi;
}
void vp9_remove_compressor(VP9_COMP *cpi) {
- int i;
+ unsigned int i;
if (!cpi)
return;
@@ -1141,34 +1096,6 @@
#endif
-#ifdef MODE_TEST_HIT_STATS
- if (cpi->pass != 1) {
- double norm_per_pixel_mode_tests = 0;
- double norm_counts[BLOCK_SIZES];
- int i;
- int sb64_per_frame;
- int norm_factors[BLOCK_SIZES] =
- {256, 128, 128, 64, 32, 32, 16, 8, 8, 4, 2, 2, 1};
- FILE *f = fopen("mode_hit_stats.stt", "a");
-
- // On average, how many mode tests do we do
- for (i = 0; i < BLOCK_SIZES; ++i) {
- norm_counts[i] = (double)cpi->mode_test_hits[i] /
- (double)norm_factors[i];
- norm_per_pixel_mode_tests += norm_counts[i];
- }
- // Convert to a number per 64x64 and per frame
- sb64_per_frame = ((cpi->common.height + 63) / 64) *
- ((cpi->common.width + 63) / 64);
- norm_per_pixel_mode_tests =
- norm_per_pixel_mode_tests /
- (double)(cpi->common.current_video_frame * sb64_per_frame);
-
- fprintf(f, "%6.4f\n", norm_per_pixel_mode_tests);
- fclose(f);
- }
-#endif
-
#if 0
{
printf("\n_pick_loop_filter_level:%d\n", cpi->time_pick_lpf / 1000);
@@ -1182,7 +1109,6 @@
}
dealloc_compressor_data(cpi);
- vpx_free(cpi->mb.ss);
vpx_free(cpi->tok);
for (i = 0; i < sizeof(cpi->mbgraph_stats) /
@@ -1444,77 +1370,67 @@
}
#endif
-static void scale_and_extend_frame_nonnormative(YV12_BUFFER_CONFIG *src_fb,
- YV12_BUFFER_CONFIG *dst_fb) {
- const int in_w = src_fb->y_crop_width;
- const int in_h = src_fb->y_crop_height;
- const int out_w = dst_fb->y_crop_width;
- const int out_h = dst_fb->y_crop_height;
- const int in_w_uv = src_fb->uv_crop_width;
- const int in_h_uv = src_fb->uv_crop_height;
- const int out_w_uv = dst_fb->uv_crop_width;
- const int out_h_uv = dst_fb->uv_crop_height;
+static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst) {
+ // TODO(dkovalev): replace YV12_BUFFER_CONFIG with vpx_image_t
int i;
+ const uint8_t *const srcs[4] = {src->y_buffer, src->u_buffer, src->v_buffer,
+ src->alpha_buffer};
+ const int src_strides[4] = {src->y_stride, src->uv_stride, src->uv_stride,
+ src->alpha_stride};
+ const int src_widths[4] = {src->y_crop_width, src->uv_crop_width,
+ src->uv_crop_width, src->y_crop_width};
+ const int src_heights[4] = {src->y_crop_height, src->uv_crop_height,
+ src->uv_crop_height, src->y_crop_height};
+ uint8_t *const dsts[4] = {dst->y_buffer, dst->u_buffer, dst->v_buffer,
+ dst->alpha_buffer};
+ const int dst_strides[4] = {dst->y_stride, dst->uv_stride, dst->uv_stride,
+ dst->alpha_stride};
+ const int dst_widths[4] = {dst->y_crop_width, dst->uv_crop_width,
+ dst->uv_crop_width, dst->y_crop_width};
+ const int dst_heights[4] = {dst->y_crop_height, dst->uv_crop_height,
+ dst->uv_crop_height, dst->y_crop_height};
- uint8_t *srcs[4] = {src_fb->y_buffer, src_fb->u_buffer, src_fb->v_buffer,
- src_fb->alpha_buffer};
- int src_strides[4] = {src_fb->y_stride, src_fb->uv_stride, src_fb->uv_stride,
- src_fb->alpha_stride};
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i],
+ dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]);
- uint8_t *dsts[4] = {dst_fb->y_buffer, dst_fb->u_buffer, dst_fb->v_buffer,
- dst_fb->alpha_buffer};
- int dst_strides[4] = {dst_fb->y_stride, dst_fb->uv_stride, dst_fb->uv_stride,
- dst_fb->alpha_stride};
-
- for (i = 0; i < MAX_MB_PLANE; ++i) {
- if (i == 0 || i == 3) {
- // Y and alpha planes
- vp9_resize_plane(srcs[i], in_h, in_w, src_strides[i],
- dsts[i], out_h, out_w, dst_strides[i]);
- } else {
- // Chroma planes
- vp9_resize_plane(srcs[i], in_h_uv, in_w_uv, src_strides[i],
- dsts[i], out_h_uv, out_w_uv, dst_strides[i]);
- }
- }
// TODO(hkuang): Call C version explicitly
// as neon version only expand border size 32.
- vp8_yv12_extend_frame_borders_c(dst_fb);
+ vp8_yv12_extend_frame_borders_c(dst);
}
-static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb,
- YV12_BUFFER_CONFIG *dst_fb) {
- const int in_w = src_fb->y_crop_width;
- const int in_h = src_fb->y_crop_height;
- const int out_w = dst_fb->y_crop_width;
- const int out_h = dst_fb->y_crop_height;
+static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst) {
+ const int src_w = src->y_crop_width;
+ const int src_h = src->y_crop_height;
+ const int dst_w = dst->y_crop_width;
+ const int dst_h = dst->y_crop_height;
+ const uint8_t *const srcs[4] = {src->y_buffer, src->u_buffer, src->v_buffer,
+ src->alpha_buffer};
+ const int src_strides[4] = {src->y_stride, src->uv_stride, src->uv_stride,
+ src->alpha_stride};
+ uint8_t *const dsts[4] = {dst->y_buffer, dst->u_buffer, dst->v_buffer,
+ dst->alpha_buffer};
+ const int dst_strides[4] = {dst->y_stride, dst->uv_stride, dst->uv_stride,
+ dst->alpha_stride};
int x, y, i;
- uint8_t *srcs[4] = {src_fb->y_buffer, src_fb->u_buffer, src_fb->v_buffer,
- src_fb->alpha_buffer};
- int src_strides[4] = {src_fb->y_stride, src_fb->uv_stride, src_fb->uv_stride,
- src_fb->alpha_stride};
-
- uint8_t *dsts[4] = {dst_fb->y_buffer, dst_fb->u_buffer, dst_fb->v_buffer,
- dst_fb->alpha_buffer};
- int dst_strides[4] = {dst_fb->y_stride, dst_fb->uv_stride, dst_fb->uv_stride,
- dst_fb->alpha_stride};
-
- for (y = 0; y < out_h; y += 16) {
- for (x = 0; x < out_w; x += 16) {
+ for (y = 0; y < dst_h; y += 16) {
+ for (x = 0; x < dst_w; x += 16) {
for (i = 0; i < MAX_MB_PLANE; ++i) {
const int factor = (i == 0 || i == 3 ? 1 : 2);
- const int x_q4 = x * (16 / factor) * in_w / out_w;
- const int y_q4 = y * (16 / factor) * in_h / out_h;
+ const int x_q4 = x * (16 / factor) * src_w / dst_w;
+ const int y_q4 = y * (16 / factor) * src_h / dst_h;
const int src_stride = src_strides[i];
const int dst_stride = dst_strides[i];
- uint8_t *src = srcs[i] + y / factor * in_h / out_h * src_stride +
- x / factor * in_w / out_w;
- uint8_t *dst = dsts[i] + y / factor * dst_stride + x / factor;
+ const uint8_t *src_ptr = srcs[i] + (y / factor) * src_h / dst_h *
+ src_stride + (x / factor) * src_w / dst_w;
+ uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor);
- vp9_convolve8(src, src_stride, dst, dst_stride,
- vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w,
- vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h,
+ vp9_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
+ vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * src_w / dst_w,
+ vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * src_h / dst_h,
16 / factor, 16 / factor);
}
}
@@ -1522,7 +1438,7 @@
// TODO(hkuang): Call C version explicitly
// as neon version only expand border size 32.
- vp8_yv12_extend_frame_borders_c(dst_fb);
+ vp8_yv12_extend_frame_borders_c(dst);
}
static int find_fp_qindex() {
@@ -1701,7 +1617,7 @@
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
- YV12_BUFFER_CONFIG *const ref = &cm->frame_bufs[idx].buf;
+ const YV12_BUFFER_CONFIG *const ref = &cm->frame_bufs[idx].buf;
if (ref->y_crop_width != cm->width ||
ref->y_crop_height != cm->height) {
@@ -2133,7 +2049,7 @@
cm->lf.mode_ref_delta_update = 0;
// Initialize cpi->mv_step_param to default based on max resolution.
- cpi->mv_step_param = vp9_init_search_range(cpi, max_mv_def);
+ cpi->mv_step_param = vp9_init_search_range(sf, max_mv_def);
// Initialize cpi->max_mv_magnitude and cpi->mv_step_param if appropriate.
if (sf->auto_mv_step_size) {
if (frame_is_intra_only(cm)) {
@@ -2145,7 +2061,7 @@
// Allow mv_steps to correspond to twice the max mv magnitude found
// in the previous frame, capped by the default max_mv_magnitude based
// on resolution.
- cpi->mv_step_param = vp9_init_search_range(cpi, MIN(max_mv_def, 2 *
+ cpi->mv_step_param = vp9_init_search_range(sf, MIN(max_mv_def, 2 *
cpi->max_mv_magnitude));
cpi->max_mv_magnitude = 0;
}
@@ -2308,9 +2224,6 @@
}
}
-#if 0
- output_frame_level_debug_stats(cpi);
-#endif
if (cpi->refresh_golden_frame == 1)
cpi->frame_flags |= FRAMEFLAGS_GOLDEN;
else
@@ -2326,6 +2239,10 @@
cm->last_frame_type = cm->frame_type;
vp9_rc_postencode_update(cpi, *size);
+#if 0
+ output_frame_level_debug_stats(cpi);
+#endif
+
if (cm->frame_type == KEY_FRAME) {
// Tell the caller that the frame was coded as a key frame
*frame_flags = cpi->frame_flags | FRAMEFLAGS_KEY;
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 8f32494..f48909e 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -40,8 +40,6 @@
extern "C" {
#endif
-// #define MODE_TEST_HIT_STATS
-
#define DEFAULT_GF_INTERVAL 10
#define MAX_MODES 30
@@ -276,6 +274,7 @@
int arnr_max_frames;
int arnr_strength;
+ int arnr_type;
int tile_columns;
int tile_rows;
@@ -443,7 +442,6 @@
YV12_BUFFER_CONFIG alt_ref_buffer;
YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS];
- int fixed_divide[512];
#if CONFIG_INTERNAL_STATS
unsigned int mode_chosen_counts[MAX_MODES];
@@ -497,6 +495,14 @@
int frame_flags;
+ search_site_config ss_cfg;
+
+ int mbmode_cost[INTRA_MODES];
+ unsigned inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES];
+ int intra_uv_mode_cost[FRAME_TYPES][INTRA_MODES];
+ int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
+ int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
+
#if CONFIG_MULTIPLE_ARF
// ARF tracking variables.
int multi_arf_enabled;
@@ -509,11 +515,6 @@
int this_frame_weight;
int max_arf_level;
#endif
-
-#ifdef MODE_TEST_HIT_STATS
- // Debug / test stats
- int64_t mode_test_hits[BLOCK_SIZES];
-#endif
} VP9_COMP;
void vp9_initialize_enc();
@@ -592,7 +593,8 @@
// alt ref frames tend to be coded at a higher than ambient quality
static INLINE int frame_is_boosted(const VP9_COMP *cpi) {
return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame ||
- (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref);
+ (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref) ||
+ vp9_is_upper_layer_key_frame(cpi);
}
static INLINE int get_token_alloc(int mb_rows, int mb_cols) {
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 3d4b962..43ae7c1 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -61,7 +61,7 @@
#define MIN_GF_INTERVAL 4
#endif
-#define LONG_TERM_VBR_CORRECTION
+// #define LONG_TERM_VBR_CORRECTION
static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) {
YV12_BUFFER_CONFIG temp = *a;
@@ -418,7 +418,7 @@
v_fn_ptr.vf = get_block_variance_fn(bsize);
// Center the initial step/diamond search on best mv.
- tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv,
+ tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv,
step_param,
x->sadperbit16, &num00, &v_fn_ptr, ref_mv);
if (tmp_err < INT_MAX)
@@ -441,7 +441,7 @@
if (num00) {
--num00;
} else {
- tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv,
+ tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv,
step_param + n, x->sadperbit16,
&num00, &v_fn_ptr, ref_mv);
if (tmp_err < INT_MAX)
@@ -604,7 +604,13 @@
}
// Do intra 16x16 prediction.
- this_error = vp9_encode_intra(x, use_dc_pred);
+ x->skip_encode = 0;
+ xd->mi[0]->mbmi.mode = DC_PRED;
+ xd->mi[0]->mbmi.tx_size = use_dc_pred ?
+ (bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4;
+ vp9_encode_intra_block_plane(x, bsize, 0);
+ this_error = vp9_get_mb_ss(x->plane[0].src_diff);
+
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
vp9_clear_system_state();
this_error = (int)(this_error * error_weight);
@@ -920,12 +926,19 @@
const int target_norm_bits_per_mb = ((uint64_t)section_target_bandwidth <<
BPER_MB_NORMBITS) / num_mbs;
int q;
+ int is_svc_upper_layer = 0;
+ if (cpi->use_svc && cpi->svc.number_temporal_layers == 1 &&
+ cpi->svc.spatial_layer_id > 0) {
+ is_svc_upper_layer = 1;
+ }
// Try and pick a max Q that will be high enough to encode the
// content at the given rate.
for (q = rc->best_quality; q < rc->worst_quality; ++q) {
- const double factor = calc_correction_factor(err_per_mb, ERR_DIVISOR,
- 0.5, 0.90, q);
+ const double factor =
+ calc_correction_factor(err_per_mb, ERR_DIVISOR,
+ is_svc_upper_layer ? 0.8 : 0.5,
+ is_svc_upper_layer ? 1.0 : 0.90, q);
const int bits_per_mb = vp9_rc_bits_per_mb(INTER_FRAME, q,
factor * speed_term);
if (bits_per_mb <= target_norm_bits_per_mb)
@@ -1404,6 +1417,58 @@
}
#endif
+// Calculate a section intra ratio used in setting max loop filter.
+static void calculate_section_intra_ratio(struct twopass_rc *twopass,
+ const FIRSTPASS_STATS *start_pos,
+ int section_length) {
+ FIRSTPASS_STATS next_frame = { 0 };
+ FIRSTPASS_STATS sectionstats = { 0 };
+ int i;
+
+ reset_fpf_position(twopass, start_pos);
+
+ for (i = 0; i < section_length; ++i) {
+ input_stats(twopass, &next_frame);
+ accumulate_stats(§ionstats, &next_frame);
+ }
+
+ avg_stats(§ionstats);
+
+ twopass->section_intra_rating =
+ (int)(sectionstats.intra_error /
+ DOUBLE_DIVIDE_CHECK(sectionstats.coded_error));
+
+ reset_fpf_position(twopass, start_pos);
+}
+
+// Calculate the total bits to allocate in this GF/ARF group.
+static int64_t calculate_total_gf_group_bits(VP9_COMP *cpi,
+ double gf_group_err) {
+ const RATE_CONTROL *const rc = &cpi->rc;
+ const struct twopass_rc *const twopass = &cpi->twopass;
+ const int max_bits = frame_max_bits(rc, &cpi->oxcf);
+ int64_t total_group_bits;
+
+ // Calculate the bits to be allocated to the group as a whole.
+ if ((twopass->kf_group_bits > 0) && (twopass->kf_group_error_left > 0)) {
+ total_group_bits = (int64_t)(twopass->kf_group_bits *
+ (gf_group_err / twopass->kf_group_error_left));
+ } else {
+ total_group_bits = 0;
+ }
+
+ // Clamp odd edge cases.
+ total_group_bits = (total_group_bits < 0) ?
+ 0 : (total_group_bits > twopass->kf_group_bits) ?
+ twopass->kf_group_bits : total_group_bits;
+
+ // Clip based on user supplied data rate variability limit.
+ if (total_group_bits > (int64_t)max_bits * rc->baseline_gf_interval)
+ total_group_bits = (int64_t)max_bits * rc->baseline_gf_interval;
+
+ return total_group_bits;
+}
+
// Analyse and define a gf/arf group.
static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
RATE_CONTROL *const rc = &cpi->rc;
@@ -1429,8 +1494,6 @@
double mv_in_out_accumulator = 0.0;
double abs_mv_in_out_accumulator = 0.0;
double mv_ratio_accumulator_thresh;
- // Max bits for a single frame.
- const int max_bits = frame_max_bits(rc, oxcf);
unsigned int allow_alt_ref = oxcf->play_alternate && oxcf->lag_in_frames;
int f_boost = 0;
@@ -1645,21 +1708,8 @@
#endif
#endif
- // Calculate the bits to be allocated to the group as a whole.
- if (twopass->kf_group_bits > 0 && twopass->kf_group_error_left > 0) {
- twopass->gf_group_bits = (int64_t)(twopass->kf_group_bits *
- (gf_group_err / twopass->kf_group_error_left));
- } else {
- twopass->gf_group_bits = 0;
- }
- twopass->gf_group_bits = (twopass->gf_group_bits < 0) ?
- 0 : (twopass->gf_group_bits > twopass->kf_group_bits) ?
- twopass->kf_group_bits : twopass->gf_group_bits;
-
- // Clip cpi->twopass.gf_group_bits based on user supplied data rate
- // variability limit, cpi->oxcf.two_pass_vbrmax_section.
- if (twopass->gf_group_bits > (int64_t)max_bits * rc->baseline_gf_interval)
- twopass->gf_group_bits = (int64_t)max_bits * rc->baseline_gf_interval;
+ // Calculate the bits to be allocated to the gf/arf group as a whole
+ twopass->gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err);
// Reset the file position.
reset_fpf_position(twopass, start_pos);
@@ -1727,8 +1777,8 @@
twopass->gf_bits = gf_bits;
}
if (i == 1 ||
- (!rc->source_alt_ref_pending &&
- cpi->common.frame_type != KEY_FRAME)) {
+ (!rc->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME &&
+ !vp9_is_upper_layer_key_frame(cpi))) {
// Calculate the per frame bit target for this frame.
vp9_rc_set_frame_target(cpi, gf_bits);
}
@@ -1769,24 +1819,9 @@
}
}
+ // Calculate a section intra ratio used in setting max loop filter.
if (cpi->common.frame_type != KEY_FRAME) {
- FIRSTPASS_STATS sectionstats;
-
- zero_stats(§ionstats);
- reset_fpf_position(twopass, start_pos);
-
- for (i = 0; i < rc->baseline_gf_interval; ++i) {
- input_stats(twopass, &next_frame);
- accumulate_stats(§ionstats, &next_frame);
- }
-
- avg_stats(§ionstats);
-
- twopass->section_intra_rating = (int)
- (sectionstats.intra_error /
- DOUBLE_DIVIDE_CHECK(sectionstats.coded_error));
-
- reset_fpf_position(twopass, start_pos);
+ calculate_section_intra_ratio(twopass, start_pos, rc->baseline_gf_interval);
}
}
@@ -2082,25 +2117,8 @@
}
}
- {
- FIRSTPASS_STATS sectionstats;
-
- zero_stats(§ionstats);
- reset_fpf_position(twopass, start_position);
-
- for (i = 0; i < rc->frames_to_key; ++i) {
- input_stats(twopass, &next_frame);
- accumulate_stats(§ionstats, &next_frame);
- }
-
- avg_stats(§ionstats);
-
- twopass->section_intra_rating = (int) (sectionstats.intra_error /
- DOUBLE_DIVIDE_CHECK(sectionstats.coded_error));
- }
-
- // Reset the first pass file position.
- reset_fpf_position(twopass, start_position);
+ // Calculate a section intra ratio used in setting max loop filter.
+ calculate_section_intra_ratio(twopass, start_position, rc->frames_to_key);
// Work out how many bits to allocate for the key frame itself.
if (1) {
@@ -2290,11 +2308,16 @@
this_frame_copy = this_frame;
find_next_key_frame(cpi, &this_frame_copy);
// Don't place key frame in any enhancement layers in spatial svc
- if (cpi->use_svc && cpi->svc.number_temporal_layers == 1 &&
- cpi->svc.spatial_layer_id > 0) {
- cm->frame_type = INTER_FRAME;
+ if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+ lc->is_key_frame = 1;
+ if (cpi->svc.spatial_layer_id > 0) {
+ cm->frame_type = INTER_FRAME;
+ }
}
} else {
+ if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+ lc->is_key_frame = 0;
+ }
cm->frame_type = INTER_FRAME;
}
@@ -2384,17 +2407,19 @@
const double progress =
(double)(cpi->twopass.stats_in - cpi->twopass.stats_in_start) /
(cpi->twopass.stats_in_end - cpi->twopass.stats_in_start);
- const int bits_used = progress * cpi->rc.this_frame_target +
- (1.0 - progress) * cpi->rc.projected_frame_size;
+ const int bits_used = (int)(progress * rc->this_frame_target +
+ (1.0 - progress) * rc->projected_frame_size);
#endif
cpi->twopass.bits_left -= bits_used;
cpi->twopass.bits_left = MAX(cpi->twopass.bits_left, 0);
#ifdef LONG_TERM_VBR_CORRECTION
- if (cpi->common.frame_type != KEY_FRAME) {
+ if (cpi->common.frame_type != KEY_FRAME &&
+ !vp9_is_upper_layer_key_frame(cpi)) {
#else
- if (cpi->common.frame_type == KEY_FRAME) {
+ if (cpi->common.frame_type == KEY_FRAME ||
+ vp9_is_upper_layer_key_frame(cpi)) {
// For key frames kf_group_bits already had the target bits subtracted out.
// So now update to the correct value based on the actual bits used.
cpi->twopass.kf_group_bits += cpi->rc.this_frame_target - bits_used;
diff --git a/vp9/encoder/vp9_lookahead.c b/vp9/encoder/vp9_lookahead.c
index a298f1c..abe71e6 100644
--- a/vp9/encoder/vp9_lookahead.c
+++ b/vp9/encoder/vp9_lookahead.c
@@ -18,6 +18,9 @@
#include "vp9/encoder/vp9_extend.h"
#include "vp9/encoder/vp9_lookahead.h"
+// The max of past frames we want to keep in the queue.
+#define MAX_PRE_FRAMES 1
+
struct lookahead_ctx {
unsigned int max_sz; /* Absolute size of the queue */
unsigned int sz; /* Number of buffers currently in the queue */
diff --git a/vp9/encoder/vp9_lookahead.h b/vp9/encoder/vp9_lookahead.h
index 046c533..ff63c0d 100644
--- a/vp9/encoder/vp9_lookahead.h
+++ b/vp9/encoder/vp9_lookahead.h
@@ -20,9 +20,6 @@
#define MAX_LAG_BUFFERS 25
-// The max of past frames we want to keep in the queue.
-#define MAX_PRE_FRAMES 1
-
struct lookahead_entry {
YV12_BUFFER_CONFIG img;
int64_t ts_start;
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 07a04cd..43c8ab8 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -51,7 +51,7 @@
x->mv_row_max = row_max;
}
-int vp9_init_search_range(VP9_COMP *cpi, int size) {
+int vp9_init_search_range(const SPEED_FEATURES *sf, int size) {
int sr = 0;
// Minimum search size no matter what the passed in value.
@@ -60,8 +60,8 @@
while ((size << sr) < MAX_FULL_PEL_VAL)
sr++;
- sr += cpi->sf.reduce_first_step_size;
- sr = MIN(sr, (cpi->sf.max_step_search_steps - 2));
+ sr += sf->reduce_first_step_size;
+ sr = MIN(sr, (sf->max_step_search_steps - 2));
return sr;
}
@@ -101,32 +101,32 @@
return 0;
}
-void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
+void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
int len, ss_count = 1;
- x->ss[0].mv.col = x->ss[0].mv.row = 0;
- x->ss[0].offset = 0;
+ cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
+ cfg->ss[0].offset = 0;
for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
// Generate offsets for 4 search sites per step.
const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}};
int i;
for (i = 0; i < 4; ++i) {
- search_site *const ss = &x->ss[ss_count++];
+ search_site *const ss = &cfg->ss[ss_count++];
ss->mv = ss_mvs[i];
ss->offset = ss->mv.row * stride + ss->mv.col;
}
}
- x->ss_count = ss_count;
- x->searches_per_step = 4;
+ cfg->ss_count = ss_count;
+ cfg->searches_per_step = 4;
}
-void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
+void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
int len, ss_count = 1;
- x->ss[0].mv.col = x->ss[0].mv.row = 0;
- x->ss[0].offset = 0;
+ cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
+ cfg->ss[0].offset = 0;
for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
// Generate offsets for 8 search sites per step.
@@ -136,14 +136,14 @@
};
int i;
for (i = 0; i < 8; ++i) {
- search_site *const ss = &x->ss[ss_count++];
+ search_site *const ss = &cfg->ss[ss_count++];
ss->mv = ss_mvs[i];
ss->offset = ss->mv.row * stride + ss->mv.col;
}
}
- x->ss_count = ss_count;
- x->searches_per_step = 8;
+ cfg->ss_count = ss_count;
+ cfg->searches_per_step = 8;
}
/*
@@ -871,97 +871,78 @@
#undef CHECK_BETTER
-int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
+int vp9_full_range_search_c(const MACROBLOCK *x,
+ const search_site_config *cfg,
+ MV *ref_mv, MV *best_mv,
int search_param, int sad_per_bit, int *num00,
const vp9_variance_fn_ptr_t *fn_ptr,
const MV *center_mv) {
const MACROBLOCKD *const xd = &x->e_mbd;
- const uint8_t *what = x->plane[0].src.buf;
- const int what_stride = x->plane[0].src.stride;
- const uint8_t *in_what;
- const int in_what_stride = xd->plane[0].pre[0].stride;
-
- unsigned int bestsad = INT_MAX;
- int ref_row, ref_col;
-
- unsigned int thissad;
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[0];
+ const int range = 64;
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
- int tr, tc;
- int best_tr = 0;
- int best_tc = 0;
- int range = 64;
-
- int start_col, end_col;
- int start_row, end_row;
- int i;
+ unsigned int best_sad = INT_MAX;
+ int r, c, i;
+ int start_col, end_col, start_row, end_row;
clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
- ref_row = ref_mv->row;
- ref_col = ref_mv->col;
+ *best_mv = *ref_mv;
*num00 = 11;
- best_mv->row = ref_row;
- best_mv->col = ref_col;
+ best_sad = fn_ptr->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, ref_mv), in_what->stride,
+ 0x7fffffff) +
+ mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
+ start_row = MAX(-range, x->mv_row_min - ref_mv->row);
+ start_col = MAX(-range, x->mv_col_min - ref_mv->col);
+ end_row = MIN(range, x->mv_row_max - ref_mv->row);
+ end_col = MIN(range, x->mv_col_max - ref_mv->col);
- // Work out the start point for the search
- in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
+ for (r = start_row; r <= end_row; ++r) {
+ for (c = start_col; c <= end_col; c += 4) {
+ if (c + 3 <= end_col) {
+ unsigned int sads[4];
+ const uint8_t *addrs[4];
+ for (i = 0; i < 4; ++i) {
+ const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
+ addrs[i] = get_buf_from_mv(in_what, &mv);
+ }
- // Check the starting position
- bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
- + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
-
- start_row = MAX(-range, x->mv_row_min - ref_row);
- start_col = MAX(-range, x->mv_col_min - ref_col);
- end_row = MIN(range, x->mv_row_max - ref_row);
- end_col = MIN(range, x->mv_col_max - ref_col);
-
- for (tr = start_row; tr <= end_row; ++tr) {
- for (tc = start_col; tc <= end_col; tc += 4) {
- if ((tc + 3) <= end_col) {
- unsigned int sad_array[4];
- unsigned char const *addr_ref[4];
- for (i = 0; i < 4; ++i)
- addr_ref[i] = in_what + tr * in_what_stride + tc + i;
-
- fn_ptr->sdx4df(what, what_stride, addr_ref, in_what_stride, sad_array);
+ fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
for (i = 0; i < 4; ++i) {
- if (sad_array[i] < bestsad) {
- const MV this_mv = {ref_row + tr, ref_col + tc + i};
- thissad = sad_array[i] +
- mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
- if (thissad < bestsad) {
- bestsad = thissad;
- best_tr = tr;
- best_tc = tc + i;
+ if (sads[i] < best_sad) {
+ const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
+ const unsigned int sad = sads[i] +
+ mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
}
}
}
} else {
- for (i = 0; i < end_col - tc; ++i) {
- const uint8_t *check_here = in_what + tr * in_what_stride + tc + i;
- thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
- bestsad);
-
- if (thissad < bestsad) {
- const MV this_mv = {ref_row + tr, ref_col + tc + i};
- thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
-
- if (thissad < bestsad) {
- bestsad = thissad;
- best_tr = tr;
- best_tc = tc + i;
+ for (i = 0; i < end_col - c; ++i) {
+ const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
+ unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &mv), in_what->stride, best_sad);
+ if (sad < best_sad) {
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
}
}
}
}
}
}
- best_mv->row += best_tr;
- best_mv->col += best_tc;
- return bestsad;
+
+ return best_sad;
}
int vp9_diamond_search_sad_c(const MACROBLOCK *x,
+ const search_site_config *cfg,
MV *ref_mv, MV *best_mv,
int search_param, int sad_per_bit, int *num00,
const vp9_variance_fn_ptr_t *fn_ptr,
@@ -973,8 +954,8 @@
// of iterations
// 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
// (MAX_FIRST_STEP/4) pel... etc.
- const search_site *const ss = &x->ss[search_param * x->searches_per_step];
- const int tot_steps = (x->ss_count / x->searches_per_step) - search_param;
+ const search_site *const ss = &cfg->ss[search_param * cfg->searches_per_step];
+ const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
const uint8_t *best_address, *in_what_ref;
int best_sad = INT_MAX;
@@ -996,7 +977,7 @@
i = 1;
for (step = 0; step < tot_steps; step++) {
- for (j = 0; j < x->searches_per_step; j++) {
+ for (j = 0; j < cfg->searches_per_step; j++) {
const MV mv = {best_mv->row + ss[i].mv.row,
best_mv->col + ss[i].mv.col};
if (is_mv_in(x, &mv)) {
@@ -1050,6 +1031,7 @@
}
int vp9_diamond_search_sadx4(const MACROBLOCK *x,
+ const search_site_config *cfg,
MV *ref_mv, MV *best_mv, int search_param,
int sad_per_bit, int *num00,
const vp9_variance_fn_ptr_t *fn_ptr,
@@ -1075,8 +1057,8 @@
// 0 = initial step (MAX_FIRST_STEP) pel
// 1 = (MAX_FIRST_STEP/2) pel,
// 2 = (MAX_FIRST_STEP/4) pel...
- const search_site *ss = &x->ss[search_param * x->searches_per_step];
- const int tot_steps = (x->ss_count / x->searches_per_step) - search_param;
+ const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
+ const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
@@ -1112,7 +1094,7 @@
if (all_in) {
unsigned int sad_array[4];
- for (j = 0; j < x->searches_per_step; j += 4) {
+ for (j = 0; j < cfg->searches_per_step; j += 4) {
unsigned char const *block_offset[4];
for (t = 0; t < 4; t++)
@@ -1135,7 +1117,7 @@
}
}
} else {
- for (j = 0; j < x->searches_per_step; j++) {
+ for (j = 0; j < cfg->searches_per_step; j++) {
// Trap illegal vectors
const MV this_mv = {best_mv->row + ss[i].mv.row,
best_mv->col + ss[i].mv.col};
@@ -1202,7 +1184,7 @@
const MV *ref_mv, MV *dst_mv) {
MV temp_mv;
int thissme, n, num00 = 0;
- int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
+ int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
step_param, sadpb, &n,
fn_ptr, ref_mv);
if (bestsme < INT_MAX)
@@ -1220,7 +1202,7 @@
if (num00) {
num00--;
} else {
- thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
+ thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
step_param + n, sadpb, &num00,
fn_ptr, ref_mv);
if (thissme < INT_MAX)
@@ -1355,108 +1337,89 @@
int sad_per_bit, int distance,
const vp9_variance_fn_ptr_t *fn_ptr,
const MV *center_mv, MV *best_mv) {
+ int r;
const MACROBLOCKD *const xd = &x->e_mbd;
- const uint8_t *const what = x->plane[0].src.buf;
- const int what_stride = x->plane[0].src.stride;
- const uint8_t *const in_what = xd->plane[0].pre[0].buf;
- const int in_what_stride = xd->plane[0].pre[0].stride;
- MV this_mv;
- unsigned int bestsad = INT_MAX;
- int r, c;
- int ref_row = ref_mv->row;
- int ref_col = ref_mv->col;
-
- // Apply further limits to prevent us looking using vectors that stretch
- // beyond the UMV border
- const int row_min = MAX(ref_row - distance, x->mv_row_min);
- const int row_max = MIN(ref_row + distance, x->mv_row_max);
- const int col_min = MAX(ref_col - distance, x->mv_col_min);
- const int col_max = MIN(ref_col + distance, x->mv_col_max);
- DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8);
- unsigned int sad_array[3];
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[0];
+ const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
+ const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
+ const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
+ const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
+ unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) +
+ mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
+ *best_mv = *ref_mv;
- // Work out the mid point for the search
- const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col];
+ for (r = row_min; r < row_max; ++r) {
+ int c = col_min;
+ const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
- best_mv->row = ref_row;
- best_mv->col = ref_col;
+ if (fn_ptr->sdx8f != NULL) {
+ while ((c + 7) < col_max) {
+ int i;
+ unsigned int sads[8];
- // Baseline value at the center
- bestsad = fn_ptr->sdf(what, what_stride,
- bestaddress, in_what_stride, 0x7fffffff)
- + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
+ fn_ptr->sdx8f(what->buf, what->stride, check_here, in_what->stride,
+ sads);
- for (r = row_min; r < row_max; r++) {
- const uint8_t *check_here = &in_what[r * in_what_stride + col_min];
- this_mv.row = r;
- c = col_min;
-
- while ((c + 7) < col_max) {
- int i;
-
- fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
-
- for (i = 0; i < 8; i++) {
- unsigned int thissad = (unsigned int)sad_array8[i];
-
- if (thissad < bestsad) {
- this_mv.col = c;
- thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
- if (thissad < bestsad) {
- bestsad = thissad;
- best_mv->row = r;
- best_mv->col = c;
+ for (i = 0; i < 8; ++i) {
+ unsigned int sad = sads[i];
+ if (sad < best_sad) {
+ const MV mv = {r, c};
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
+ }
}
+ ++check_here;
+ ++c;
}
-
- check_here++;
- c++;
}
}
- while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) {
- int i;
+ if (fn_ptr->sdx3f != NULL) {
+ while ((c + 2) < col_max) {
+ int i;
+ unsigned int sads[3];
- fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
+ fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
+ sads);
- for (i = 0; i < 3; i++) {
- unsigned int thissad = sad_array[i];
-
- if (thissad < bestsad) {
- this_mv.col = c;
- thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
- if (thissad < bestsad) {
- bestsad = thissad;
- best_mv->row = r;
- best_mv->col = c;
+ for (i = 0; i < 3; ++i) {
+ unsigned int sad = sads[i];
+ if (sad < best_sad) {
+ const MV mv = {r, c};
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
+ }
}
+ ++check_here;
+ ++c;
}
-
- check_here++;
- c++;
}
}
while (c < col_max) {
- unsigned int thissad = fn_ptr->sdf(what, what_stride,
- check_here, in_what_stride, bestsad);
-
- if (thissad < bestsad) {
- this_mv.col = c;
- thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
- if (thissad < bestsad) {
- bestsad = thissad;
- best_mv->row = r;
- best_mv->col = c;
+ unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
+ check_here, in_what->stride, best_sad);
+ if (sad < best_sad) {
+ const MV mv = {r, c};
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
}
}
-
- check_here++;
- c++;
+ ++check_here;
+ ++c;
}
}
- return bestsad;
+
+ return best_sad;
}
int vp9_refining_search_sad_c(const MACROBLOCK *x,
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index 70d7985..827957d 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -31,6 +31,20 @@
// for Block_16x16
#define BORDER_MV_PIXELS_B16 (16 + VP9_INTERP_EXTEND)
+// motion search site
+typedef struct search_site {
+ MV mv;
+ int offset;
+} search_site;
+
+typedef struct search_site_config {
+ search_site ss[8 * MAX_MVSEARCH_STEPS + 1];
+ int ss_count;
+ int searches_per_step;
+} search_site_config;
+
+void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride);
+void vp9_init3smotion_compensation(search_site_config *cfg, int stride);
void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv);
int vp9_mv_bit_cost(const MV *mv, const MV *ref,
@@ -46,11 +60,11 @@
const uint8_t *second_pred,
const vp9_variance_fn_ptr_t *vfp,
int use_mvcost);
-void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride);
-void vp9_init3smotion_compensation(MACROBLOCK *x, int stride);
struct VP9_COMP;
-int vp9_init_search_range(struct VP9_COMP *cpi, int size);
+struct SPEED_FEATURES;
+
+int vp9_init_search_range(const struct SPEED_FEATURES *sf, int size);
// Runs sequence of diamond searches in smaller steps for RD
int vp9_full_pixel_diamond(const struct VP9_COMP *cpi, MACROBLOCK *x,
@@ -119,6 +133,7 @@
const MV *center_mv);
typedef int (*vp9_diamond_search_fn_t)(const MACROBLOCK *x,
+ const search_site_config *cfg,
MV *ref_mv, MV *best_mv,
int search_param, int sad_per_bit,
int *num00,
diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c
index 7c42bb8..a4b280c 100644
--- a/vp9/encoder/vp9_picklpf.c
+++ b/vp9/encoder/vp9_picklpf.c
@@ -24,8 +24,12 @@
#include "vp9/encoder/vp9_quantize.h"
static int get_max_filter_level(const VP9_COMP *cpi) {
- return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4
- : MAX_LOOP_FILTER;
+ if (cpi->pass == 2) {
+ return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4
+ : MAX_LOOP_FILTER;
+ } else {
+ return MAX_LOOP_FILTER;
+ }
}
@@ -77,8 +81,8 @@
// Bias against raising loop filter in favor of lowering it.
int bias = (best_err >> (15 - (filt_mid / 8))) * filter_step;
- if (cpi->twopass.section_intra_rating < 20)
- bias = bias * cpi->twopass.section_intra_rating / 20;
+ if ((cpi->pass == 2) && (cpi->twopass.section_intra_rating < 20))
+ bias = (bias * cpi->twopass.section_intra_rating) / 20;
// yx, bias less for large block size
if (cm->tx_mode != ONLY_4X4)
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 56eb944..78fba73 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -27,7 +27,6 @@
#include "vp9/encoder/vp9_rdopt.h"
static void full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
- const TileInfo *const tile,
BLOCK_SIZE bsize, int mi_row, int mi_col,
int_mv *tmp_mv, int *rate_mv) {
MACROBLOCKD *xd = &x->e_mbd;
@@ -85,39 +84,9 @@
mvp_full.col >>= 3;
mvp_full.row >>= 3;
- if (cpi->sf.search_method == FAST_DIAMOND) {
- // NOTE: this returns SAD
- vp9_fast_dia_search(x, &mvp_full, step_param, sadpb, 0,
- &cpi->fn_ptr[bsize], 1,
- &ref_mv, &tmp_mv->as_mv);
- } else if (cpi->sf.search_method == FAST_HEX) {
- // NOTE: this returns SAD
- vp9_fast_hex_search(x, &mvp_full, step_param, sadpb, 0,
- &cpi->fn_ptr[bsize], 1,
- &ref_mv, &tmp_mv->as_mv);
- } else if (cpi->sf.search_method == HEX) {
- // NOTE: this returns SAD
- vp9_hex_search(x, &mvp_full, step_param, sadpb, 1,
- &cpi->fn_ptr[bsize], 1,
- &ref_mv, &tmp_mv->as_mv);
- } else if (cpi->sf.search_method == SQUARE) {
- // NOTE: this returns SAD
- vp9_square_search(x, &mvp_full, step_param, sadpb, 1,
- &cpi->fn_ptr[bsize], 1,
- &ref_mv, &tmp_mv->as_mv);
- } else if (cpi->sf.search_method == BIGDIA) {
- // NOTE: this returns SAD
- vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1,
- &cpi->fn_ptr[bsize], 1,
- &ref_mv, &tmp_mv->as_mv);
- } else {
- int further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
- // NOTE: this returns variance
- vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
- sadpb, further_steps, 1,
- &cpi->fn_ptr[bsize],
- &ref_mv, &tmp_mv->as_mv);
- }
+ full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb, &ref_mv,
+ &tmp_mv->as_mv, INT_MAX, 0);
+
x->mv_col_min = tmp_col_min;
x->mv_col_max = tmp_col_max;
x->mv_row_min = tmp_row_min;
@@ -137,7 +106,6 @@
}
static void sub_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
- const TileInfo *const tile,
BLOCK_SIZE bsize, int mi_row, int mi_col,
MV *tmp_mv) {
MACROBLOCKD *xd = &x->e_mbd;
@@ -191,10 +159,16 @@
struct macroblock_plane *const p = &x->plane[0];
struct macroblockd_plane *const pd = &xd->plane[0];
- int var = cpi->fn_ptr[bsize].vf(p->src.buf, p->src.stride,
- pd->dst.buf, pd->dst.stride, &sse);
+ unsigned int var = cpi->fn_ptr[bsize].vf(p->src.buf, p->src.stride,
+ pd->dst.buf, pd->dst.stride, &sse);
- vp9_model_rd_from_var_lapndz(sse + var, 1 << num_pels_log2_lookup[bsize],
+ // TODO(jingning) This is a temporary solution to account for frames with
+ // light changes. Need to customize the rate-distortion modeling for non-RD
+ // mode decision.
+ if ((sse >> 3) > var)
+ sse = var;
+
+ vp9_model_rd_from_var_lapndz(var + sse, 1 << num_pels_log2_lookup[bsize],
pd->dequant[1] >> 3, &rate, &dist);
*out_rate_sum = rate;
*out_dist_sum = dist << 3;
@@ -314,18 +288,18 @@
if (this_rd < (int64_t)(1 << num_pels_log2_lookup[bsize]))
continue;
- full_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col,
+ full_pixel_motion_search(cpi, x, bsize, mi_row, mi_col,
&frame_mv[NEWMV][ref_frame], &rate_mv);
if (frame_mv[NEWMV][ref_frame].as_int == INVALID_MV)
continue;
- rate_mode = x->inter_mode_cost[mbmi->mode_context[ref_frame]]
- [INTER_OFFSET(this_mode)];
+ rate_mode = cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
+ [INTER_OFFSET(this_mode)];
if (RDCOST(x->rdmult, x->rddiv, rate_mv + rate_mode, 0) > best_rd)
continue;
- sub_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col,
+ sub_pixel_motion_search(cpi, x, bsize, mi_row, mi_col,
&frame_mv[NEWMV][ref_frame].as_mv);
}
@@ -355,24 +329,24 @@
model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP],
&pf_dist[EIGHTTAP]);
tmp_rdcost1 = RDCOST(x->rdmult, x->rddiv,
- vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP],
+ vp9_get_switchable_rate(cpi) + pf_rate[EIGHTTAP],
pf_dist[EIGHTTAP]);
mbmi->interp_filter = EIGHTTAP_SHARP;
vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP_SHARP],
&pf_dist[EIGHTTAP_SHARP]);
- tmp_rdcost2 = RDCOST(x->rdmult, x->rddiv,
- vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP_SHARP],
- pf_dist[EIGHTTAP_SHARP]);
+ tmp_rdcost2 = RDCOST(x->rdmult, x->rddiv, vp9_get_switchable_rate(cpi) +
+ pf_rate[EIGHTTAP_SHARP],
+ pf_dist[EIGHTTAP_SHARP]);
mbmi->interp_filter = EIGHTTAP_SMOOTH;
vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP_SMOOTH],
&pf_dist[EIGHTTAP_SMOOTH]);
- tmp_rdcost3 = RDCOST(x->rdmult, x->rddiv,
- vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP_SMOOTH],
- pf_dist[EIGHTTAP_SMOOTH]);
+ tmp_rdcost3 = RDCOST(x->rdmult, x->rddiv, vp9_get_switchable_rate(cpi) +
+ pf_rate[EIGHTTAP_SMOOTH],
+ pf_dist[EIGHTTAP_SMOOTH]);
if (tmp_rdcost2 < tmp_rdcost1) {
if (tmp_rdcost2 < tmp_rdcost3)
@@ -395,7 +369,7 @@
}
rate += rate_mv;
- rate += x->inter_mode_cost[mbmi->mode_context[ref_frame]]
+ rate += cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
[INTER_OFFSET(this_mode)];
this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
@@ -426,7 +400,7 @@
&pd->dst.buf[0], pd->dst.stride, 0, 0, 0);
model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist);
- rate += x->mbmode_cost[this_mode];
+ rate += cpi->mbmode_cost[this_mode];
rate += intra_cost_penalty;
this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 6ebd9f3..0f4c4da 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -40,14 +40,14 @@
#define MIN_BPB_FACTOR 0.005
#define MAX_BPB_FACTOR 50
+#define FRAME_OVERHEAD_BITS 200
+
// Tables relating active max Q to active min Q
static int kf_low_motion_minq[QINDEX_RANGE];
static int kf_high_motion_minq[QINDEX_RANGE];
-static int gf_low_motion_minq[QINDEX_RANGE];
-static int gf_high_motion_minq[QINDEX_RANGE];
+static int arfgf_low_motion_minq[QINDEX_RANGE];
+static int arfgf_high_motion_minq[QINDEX_RANGE];
static int inter_minq[QINDEX_RANGE];
-static int afq_low_motion_minq[QINDEX_RANGE];
-static int afq_high_motion_minq[QINDEX_RANGE];
static int gf_high = 2000;
static int gf_low = 400;
static int kf_high = 5000;
@@ -79,13 +79,11 @@
for (i = 0; i < QINDEX_RANGE; i++) {
const double maxq = vp9_convert_qindex_to_q(i);
- kf_low_motion_minq[i] = get_minq_index(maxq, 0.000001, -0.0004, 0.15);
+ kf_low_motion_minq[i] = get_minq_index(maxq, 0.000001, -0.0004, 0.125);
kf_high_motion_minq[i] = get_minq_index(maxq, 0.000002, -0.0012, 0.50);
- gf_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.32);
- gf_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.50);
- afq_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.33);
- afq_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55);
- inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.75);
+ arfgf_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.30);
+ arfgf_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.50);
+ inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.90);
}
}
@@ -546,7 +544,7 @@
}
active_best_quality = get_active_quality(
q, rc->gfu_boost, gf_low, gf_high,
- gf_low_motion_minq, gf_high_motion_minq);
+ arfgf_low_motion_minq, arfgf_high_motion_minq);
} else {
// Use the lower of active_worst_quality and recent/average Q.
if (cm->current_video_frame > 1) {
@@ -674,17 +672,12 @@
if (oxcf->rc_mode == RC_MODE_CONSTRAINED_QUALITY) {
if (q < cq_level)
q = cq_level;
- if (rc->frames_since_key > 1) {
- active_best_quality = get_active_quality(q, rc->gfu_boost,
- gf_low, gf_high,
- afq_low_motion_minq,
- afq_high_motion_minq);
- } else {
- active_best_quality = get_active_quality(q, rc->gfu_boost,
- gf_low, gf_high,
- gf_low_motion_minq,
- gf_high_motion_minq);
- }
+
+ active_best_quality = get_active_quality(q, rc->gfu_boost,
+ gf_low, gf_high,
+ arfgf_low_motion_minq,
+ arfgf_high_motion_minq);
+
// Constrained quality use slightly lower active best.
active_best_quality = active_best_quality * 15 / 16;
@@ -692,20 +685,14 @@
if (!cpi->refresh_alt_ref_frame) {
active_best_quality = cq_level;
} else {
- if (rc->frames_since_key > 1) {
- active_best_quality = get_active_quality(
- q, rc->gfu_boost, gf_low, gf_high,
- afq_low_motion_minq, afq_high_motion_minq);
- } else {
- active_best_quality = get_active_quality(
- q, rc->gfu_boost, gf_low, gf_high,
- gf_low_motion_minq, gf_high_motion_minq);
- }
+ active_best_quality = get_active_quality(
+ q, rc->gfu_boost, gf_low, gf_high,
+ arfgf_low_motion_minq, arfgf_high_motion_minq);
}
} else {
active_best_quality = get_active_quality(
q, rc->gfu_boost, gf_low, gf_high,
- gf_low_motion_minq, gf_high_motion_minq);
+ arfgf_low_motion_minq, arfgf_high_motion_minq);
}
} else {
if (oxcf->rc_mode == RC_MODE_CONSTANT_QUALITY) {
@@ -807,7 +794,7 @@
int active_worst_quality = cpi->twopass.active_worst_quality;
int q;
- if (frame_is_intra_only(cm)) {
+ if (frame_is_intra_only(cm) || vp9_is_upper_layer_key_frame(cpi)) {
#if !CONFIG_MULTIPLE_ARF
// Handle the special case for key frames forced when we have75 reached
// the maximum key frame interval. Here force the Q to a range
@@ -865,17 +852,12 @@
if (oxcf->rc_mode == RC_MODE_CONSTRAINED_QUALITY) {
if (q < cq_level)
q = cq_level;
- if (rc->frames_since_key > 1) {
- active_best_quality = get_active_quality(q, rc->gfu_boost,
- gf_low, gf_high,
- afq_low_motion_minq,
- afq_high_motion_minq);
- } else {
- active_best_quality = get_active_quality(q, rc->gfu_boost,
- gf_low, gf_high,
- gf_low_motion_minq,
- gf_high_motion_minq);
- }
+
+ active_best_quality = get_active_quality(q, rc->gfu_boost,
+ gf_low, gf_high,
+ arfgf_low_motion_minq,
+ arfgf_high_motion_minq);
+
// Constrained quality use slightly lower active best.
active_best_quality = active_best_quality * 15 / 16;
@@ -883,20 +865,14 @@
if (!cpi->refresh_alt_ref_frame) {
active_best_quality = cq_level;
} else {
- if (rc->frames_since_key > 1) {
- active_best_quality = get_active_quality(
- q, rc->gfu_boost, gf_low, gf_high,
- afq_low_motion_minq, afq_high_motion_minq);
- } else {
- active_best_quality = get_active_quality(
- q, rc->gfu_boost, gf_low, gf_high,
- gf_low_motion_minq, gf_high_motion_minq);
- }
+ active_best_quality = get_active_quality(
+ q, rc->gfu_boost, gf_low, gf_high,
+ arfgf_low_motion_minq, arfgf_high_motion_minq);
}
} else {
active_best_quality = get_active_quality(
q, rc->gfu_boost, gf_low, gf_high,
- gf_low_motion_minq, gf_high_motion_minq);
+ arfgf_low_motion_minq, arfgf_high_motion_minq);
}
} else {
if (oxcf->rc_mode == RC_MODE_CONSTANT_QUALITY) {
@@ -928,7 +904,8 @@
vp9_clear_system_state();
// Limit Q range for the adaptive loop.
- if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced) {
+ if ((cm->frame_type == KEY_FRAME || vp9_is_upper_layer_key_frame(cpi)) &&
+ !rc->this_key_frame_forced) {
qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
active_worst_quality, 2.0);
} else if (!rc->is_src_frame_alt_ref &&
@@ -995,11 +972,7 @@
q = rc_pick_q_and_bounds_two_pass(cpi, bottom_index, top_index);
}
- // Q of 0 is disabled because we force tx size to be
- // 16x16...
if (cpi->sf.use_nonrd_pick_mode) {
- if (q == 0)
- q++;
if (cpi->sf.force_frame_boost == 1)
q -= cpi->sf.max_delta_qindex;
@@ -1305,11 +1278,26 @@
cpi->oxcf.key_freq == 0))) {
cm->frame_type = KEY_FRAME;
rc->source_alt_ref_active = 0;
+
+ if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+ cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame = 1;
+ }
+
if (cpi->pass == 0 && cpi->oxcf.rc_mode == RC_MODE_CBR) {
target = calc_iframe_target_size_one_pass_cbr(cpi);
}
} else {
cm->frame_type = INTER_FRAME;
+
+ if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+ LAYER_CONTEXT *lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
+ if (cpi->svc.spatial_layer_id == 0) {
+ lc->is_key_frame = 0;
+ } else {
+ lc->is_key_frame = cpi->svc.layer_context[0].is_key_frame;
+ }
+ }
+
if (cpi->pass == 0 && cpi->oxcf.rc_mode == RC_MODE_CBR) {
target = calc_pframe_target_size_one_pass_cbr(cpi);
}
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index 8203661..b1cc676 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -20,8 +20,6 @@
extern "C" {
#endif
-#define FRAME_OVERHEAD_BITS 200
-
// Bits Per MB at different Q (Multiplied by 512)
#define BPER_MB_NORMBITS 9
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index f309aac..22e19fe 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -152,24 +152,23 @@
}
static void fill_mode_costs(VP9_COMP *cpi) {
- MACROBLOCK *const x = &cpi->mb;
const FRAME_CONTEXT *const fc = &cpi->common.fc;
int i, j;
for (i = 0; i < INTRA_MODES; i++)
for (j = 0; j < INTRA_MODES; j++)
- vp9_cost_tokens((int *)x->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
+ vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
vp9_intra_mode_tree);
// TODO(rbultje) separate tables for superblock costing?
- vp9_cost_tokens(x->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
- vp9_cost_tokens(x->intra_uv_mode_cost[KEY_FRAME],
+ vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
+ vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME],
vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
- vp9_cost_tokens(x->intra_uv_mode_cost[INTER_FRAME],
+ vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME],
fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
- vp9_cost_tokens((int *)x->switchable_interp_costs[i],
+ vp9_cost_tokens(cpi->switchable_interp_costs[i],
fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
}
@@ -313,7 +312,7 @@
&cm->fc.nmvc, cm->allow_high_precision_mv);
for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
- vp9_cost_tokens((int *)x->inter_mode_cost[i],
+ vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
}
}
@@ -807,7 +806,7 @@
{INT64_MAX, INT64_MAX},
{INT64_MAX, INT64_MAX},
{INT64_MAX, INT64_MAX}};
- int n, m;
+ TX_SIZE n, m;
int s0, s1;
const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
int64_t best_rd = INT64_MAX;
@@ -890,7 +889,7 @@
{INT64_MAX, INT64_MAX},
{INT64_MAX, INT64_MAX},
{INT64_MAX, INT64_MAX}};
- int n, m;
+ TX_SIZE n, m;
int s0, s1;
double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
@@ -962,7 +961,7 @@
vp9_subtract_plane(x, bs, 0);
- if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
+ if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
ref_best_rd, bs);
@@ -1000,7 +999,7 @@
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
assert(bs == mbmi->sb_type);
- if (cpi->sf.tx_size_search_method != USE_FULL_RD) {
+ if (cpi->sf.tx_size_search_method != USE_FULL_RD || xd->lossless) {
vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
ref_best_rd, bs);
@@ -1187,7 +1186,7 @@
int tot_rate_y = 0;
int64_t total_rd = 0;
ENTROPY_CONTEXT t_above[4], t_left[4];
- const int *bmode_costs = mb->mbmode_cost;
+ const int *bmode_costs = cpi->mbmode_cost;
vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
@@ -1203,7 +1202,7 @@
const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i);
const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i);
- bmode_costs = mb->y_mode_costs[A][L];
+ bmode_costs = cpi->y_mode_costs[A][L];
}
this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
@@ -1250,7 +1249,7 @@
int64_t this_distortion, this_rd;
TX_SIZE best_tx = TX_4X4;
int i;
- int *bmode_costs = x->mbmode_cost;
+ int *bmode_costs = cpi->mbmode_cost;
if (cpi->sf.tx_size_search_method == USE_FULL_RD)
for (i = 0; i < TX_MODES; i++)
@@ -1269,7 +1268,7 @@
const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
- bmode_costs = x->y_mode_costs[A][L];
+ bmode_costs = cpi->y_mode_costs[A][L];
}
mic->mbmi.mode = mode;
@@ -1378,7 +1377,7 @@
if (this_rate_tokenonly == INT_MAX)
continue;
this_rate = this_rate_tokenonly +
- x->intra_uv_mode_cost[cpi->common.frame_type][mode];
+ cpi->intra_uv_mode_cost[cpi->common.frame_type][mode];
this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
if (this_rd < best_rd) {
@@ -1426,7 +1425,7 @@
x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED;
super_block_uvrd(cpi, x, rate_tokenonly, distortion,
skippable, &unused, bsize, INT64_MAX);
- *rate = *rate_tokenonly + x->intra_uv_mode_cost[cm->frame_type][DC_PRED];
+ *rate = *rate_tokenonly + cpi->intra_uv_mode_cost[cm->frame_type][DC_PRED];
return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
}
@@ -1460,7 +1459,7 @@
// Don't account for mode here if segment skip is enabled.
if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
assert(is_inter_mode(mode));
- return x->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
+ return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
} else {
return 0;
}
@@ -1736,7 +1735,6 @@
const BLOCK_SIZE bsize = mbmi->sb_type;
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
- vp9_variance_fn_ptr_t *v_fn_ptr = &cpi->fn_ptr[bsize];
ENTROPY_CONTEXT t_above[2], t_left[2];
int subpelmv = 1, have_ref = 0;
const int has_second_rf = has_second_ref(mbmi);
@@ -1806,9 +1804,8 @@
// motion search for newmv (single predictor case only)
if (!has_second_rf && this_mode == NEWMV &&
seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
- int_mv *const new_mv = &mode_mv[NEWMV][0];
+ MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
int step_param = 0;
- int further_steps;
int thissme, bestsme = INT_MAX;
int sadpb = x->sadperbit4;
MV mvp_full;
@@ -1836,8 +1833,8 @@
// Take wtd average of the step_params based on the last frame's
// max mv magnitude and the best ref mvs of the current block for
// the given reference.
- step_param = (vp9_init_search_range(cpi, max_mv) +
- cpi->mv_step_param) >> 1;
+ step_param = (vp9_init_search_range(&cpi->sf, max_mv) +
+ cpi->mv_step_param) / 2;
} else {
step_param = cpi->mv_step_param;
}
@@ -1851,48 +1848,14 @@
step_param = MAX(step_param, 8);
}
- further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
// adjust src pointer for this block
mi_buf_shift(x, i);
vp9_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv);
- if (cpi->sf.search_method == HEX) {
- bestsme = vp9_hex_search(x, &mvp_full,
- step_param,
- sadpb, 1, v_fn_ptr, 1,
- &bsi->ref_mv[0]->as_mv,
- &new_mv->as_mv);
- if (bestsme < INT_MAX)
- bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
- &bsi->ref_mv[0]->as_mv,
- v_fn_ptr, 1);
- } else if (cpi->sf.search_method == SQUARE) {
- bestsme = vp9_square_search(x, &mvp_full,
- step_param,
- sadpb, 1, v_fn_ptr, 1,
- &bsi->ref_mv[0]->as_mv,
- &new_mv->as_mv);
- if (bestsme < INT_MAX)
- bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
- &bsi->ref_mv[0]->as_mv,
- v_fn_ptr, 1);
- } else if (cpi->sf.search_method == BIGDIA) {
- bestsme = vp9_bigdia_search(x, &mvp_full,
- step_param,
- sadpb, 1, v_fn_ptr, 1,
- &bsi->ref_mv[0]->as_mv,
- &new_mv->as_mv);
- if (bestsme < INT_MAX)
- bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
- &bsi->ref_mv[0]->as_mv,
- v_fn_ptr, 1);
- } else {
- bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
- sadpb, further_steps, 0, v_fn_ptr,
- &bsi->ref_mv[0]->as_mv,
- &new_mv->as_mv);
- }
+ bestsme = full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
+ sadpb, &bsi->ref_mv[0]->as_mv, new_mv,
+ INT_MAX, 1);
// Should we do a full search (best quality only)
if (is_best_mode(cpi->oxcf.mode)) {
@@ -1901,26 +1864,26 @@
clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
x->mv_row_min, x->mv_row_max);
thissme = cpi->full_search_sad(x, &mvp_full,
- sadpb, 16, v_fn_ptr,
+ sadpb, 16, &cpi->fn_ptr[bsize],
&bsi->ref_mv[0]->as_mv,
&best_mv->as_mv);
if (thissme < bestsme) {
bestsme = thissme;
- new_mv->as_int = best_mv->as_int;
+ *new_mv = best_mv->as_mv;
} else {
// The full search result is actually worse so re-instate the
// previous best vector
- best_mv->as_int = new_mv->as_int;
+ best_mv->as_mv = *new_mv;
}
}
if (bestsme < INT_MAX) {
int distortion;
cpi->find_fractional_mv_step(x,
- &new_mv->as_mv,
+ new_mv,
&bsi->ref_mv[0]->as_mv,
cm->allow_high_precision_mv,
- x->errorperbit, v_fn_ptr,
+ x->errorperbit, &cpi->fn_ptr[bsize],
cpi->sf.subpel_force_stop,
cpi->sf.subpel_iters_per_step,
x->nmvjointcost, x->mvcost,
@@ -1928,11 +1891,11 @@
&x->pred_sse[mbmi->ref_frame[0]]);
// save motion search result for use in compound prediction
- seg_mvs[i][mbmi->ref_frame[0]].as_int = new_mv->as_int;
+ seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv;
}
if (cpi->sf.adaptive_motion_search)
- x->pred_mv[mbmi->ref_frame[0]].as_int = new_mv->as_int;
+ x->pred_mv[mbmi->ref_frame[0]].as_mv = *new_mv;
// restore src pointers
mi_buf_restore(x, orig_src, orig_pre);
@@ -2334,12 +2297,12 @@
return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL;
}
-int vp9_get_switchable_rate(const MACROBLOCK *x) {
- const MACROBLOCKD *const xd = &x->e_mbd;
+int vp9_get_switchable_rate(const VP9_COMP *cpi) {
+ const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
const int ctx = vp9_get_pred_context_switchable_interp(xd);
return SWITCHABLE_INTERP_RATE_FACTOR *
- x->switchable_interp_costs[ctx][mbmi->interp_filter];
+ cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
}
static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
@@ -2351,7 +2314,7 @@
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
int bestsme = INT_MAX;
- int further_steps, step_param;
+ int step_param;
int sadpb = x->sadperbit16;
MV mvp_full;
int ref = mbmi->ref_frame[0];
@@ -2389,8 +2352,8 @@
// Take wtd average of the step_params based on the last frame's
// max mv magnitude and that based on the best ref mvs of the current
// block for the given reference.
- step_param = (vp9_init_search_range(cpi, x->max_mv_context[ref]) +
- cpi->mv_step_param) >> 1;
+ step_param = (vp9_init_search_range(&cpi->sf, x->max_mv_context[ref]) +
+ cpi->mv_step_param) / 2;
} else {
step_param = cpi->mv_step_param;
}
@@ -2431,50 +2394,8 @@
mvp_full.col >>= 3;
mvp_full.row >>= 3;
- // Further step/diamond searches as necessary
- further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
-
- if (cpi->sf.search_method == FAST_DIAMOND) {
- bestsme = vp9_fast_dia_search(x, &mvp_full, step_param, sadpb, 0,
- &cpi->fn_ptr[bsize], 1,
- &ref_mv, &tmp_mv->as_mv);
- if (bestsme < INT_MAX)
- bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
- &cpi->fn_ptr[bsize], 1);
- } else if (cpi->sf.search_method == FAST_HEX) {
- bestsme = vp9_fast_hex_search(x, &mvp_full, step_param, sadpb, 0,
- &cpi->fn_ptr[bsize], 1,
- &ref_mv, &tmp_mv->as_mv);
- if (bestsme < INT_MAX)
- bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
- &cpi->fn_ptr[bsize], 1);
- } else if (cpi->sf.search_method == HEX) {
- bestsme = vp9_hex_search(x, &mvp_full, step_param, sadpb, 1,
- &cpi->fn_ptr[bsize], 1,
- &ref_mv, &tmp_mv->as_mv);
- if (bestsme < INT_MAX)
- bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
- &cpi->fn_ptr[bsize], 1);
- } else if (cpi->sf.search_method == SQUARE) {
- bestsme = vp9_square_search(x, &mvp_full, step_param, sadpb, 1,
- &cpi->fn_ptr[bsize], 1,
- &ref_mv, &tmp_mv->as_mv);
- if (bestsme < INT_MAX)
- bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
- &cpi->fn_ptr[bsize], 1);
- } else if (cpi->sf.search_method == BIGDIA) {
- bestsme = vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1,
- &cpi->fn_ptr[bsize], 1,
- &ref_mv, &tmp_mv->as_mv);
- if (bestsme < INT_MAX)
- bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
- &cpi->fn_ptr[bsize], 1);
- } else {
- bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
- sadpb, further_steps, 1,
- &cpi->fn_ptr[bsize],
- &ref_mv, &tmp_mv->as_mv);
- }
+ bestsme = full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
+ &ref_mv, &tmp_mv->as_mv, INT_MAX, 1);
x->mv_col_min = tmp_col_min;
x->mv_col_max = tmp_col_max;
@@ -2788,7 +2709,7 @@
int j;
int64_t rs_rd;
mbmi->interp_filter = i;
- rs = vp9_get_switchable_rate(x);
+ rs = vp9_get_switchable_rate(cpi);
rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
if (i > 0 && intpel_mv) {
@@ -2858,7 +2779,7 @@
// Set the appropriate filter
mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
cm->interp_filter : *best_filter;
- rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(x) : 0;
+ rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(cpi) : 0;
if (pred_exists) {
if (best_needs_copy) {
@@ -2888,7 +2809,7 @@
}
if (cm->interp_filter == SWITCHABLE)
- *rate2 += vp9_get_switchable_rate(x);
+ *rate2 += vp9_get_switchable_rate(cpi);
if (!is_comp_pred) {
if (!x->in_active_map) {
@@ -3250,9 +3171,8 @@
// an unfiltered alternative. We allow near/nearest as well
// because they may result in zero-zero MVs but be cheaper.
if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
- const int altref_zero_mask =
+ mode_skip_mask =
~((1 << THR_NEARESTA) | (1 << THR_NEARA) | (1 << THR_ZEROA));
- mode_skip_mask |= altref_zero_mask;
if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0)
mode_skip_mask |= (1 << THR_NEARA);
if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0)
@@ -3410,12 +3330,6 @@
for (i = 0; i < TX_MODES; ++i)
tx_cache[i] = INT64_MAX;
-#ifdef MODE_TEST_HIT_STATS
- // TEST/DEBUG CODE
- // Keep a rcord of the number of test hits at each size
- cpi->mode_test_hits[bsize]++;
-#endif
-
if (ref_frame == INTRA_FRAME) {
TX_SIZE uv_tx;
intra_super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL,
@@ -3436,7 +3350,7 @@
skippable = skippable && skip_uv[uv_tx];
mbmi->uv_mode = mode_uv[uv_tx];
- rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
+ rate2 = rate_y + cpi->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
if (this_mode != DC_PRED && this_mode != TM_PRED)
rate2 += intra_cost_penalty;
distortion2 = distortion_y + distortion_uv;
@@ -3954,12 +3868,6 @@
xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
}
-#ifdef MODE_TEST_HIT_STATS
- // TEST/DEBUG CODE
- // Keep a rcord of the number of test hits at each size
- cpi->mode_test_hits[bsize]++;
-#endif
-
if (ref_frame == INTRA_FRAME) {
int rate;
if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
@@ -4035,7 +3943,7 @@
if (tmp_rd == INT64_MAX)
continue;
- rs = vp9_get_switchable_rate(x);
+ rs = vp9_get_switchable_rate(cpi);
rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
rd_opt->filter_cache[switchable_filter_index] = tmp_rd;
rd_opt->filter_cache[SWITCHABLE_FILTERS] =
@@ -4113,7 +4021,7 @@
distortion2 += distortion;
if (cm->interp_filter == SWITCHABLE)
- rate2 += vp9_get_switchable_rate(x);
+ rate2 += vp9_get_switchable_rate(cpi);
if (!mode_excluded)
mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h
index cd622d6..b6b51e5 100644
--- a/vp9/encoder/vp9_rdopt.h
+++ b/vp9/encoder/vp9_rdopt.h
@@ -40,7 +40,7 @@
unsigned int qstep, int *rate,
int64_t *dist);
-int vp9_get_switchable_rate(const MACROBLOCK *x);
+int vp9_get_switchable_rate(const VP9_COMP *cpi);
void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
const TileInfo *const tile,
@@ -87,6 +87,49 @@
void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi);
+static INLINE int full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, MV *mvp_full,
+ int step_param, int error_per_bit,
+ const MV *ref_mv, MV *tmp_mv,
+ int var_max, int rd) {
+ int var = 0;
+
+ if (cpi->sf.search_method == FAST_DIAMOND) {
+ var = vp9_fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
+ &cpi->fn_ptr[bsize], 1, ref_mv, tmp_mv);
+ if (rd && var < var_max)
+ var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, &cpi->fn_ptr[bsize], 1);
+ } else if (cpi->sf.search_method == FAST_HEX) {
+ var = vp9_fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
+ &cpi->fn_ptr[bsize], 1, ref_mv, tmp_mv);
+ if (rd && var < var_max)
+ var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, &cpi->fn_ptr[bsize], 1);
+ } else if (cpi->sf.search_method == HEX) {
+ var = vp9_hex_search(x, mvp_full, step_param, error_per_bit, 1,
+ &cpi->fn_ptr[bsize], 1, ref_mv, tmp_mv);
+ if (rd && var < var_max)
+ var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, &cpi->fn_ptr[bsize], 1);
+ } else if (cpi->sf.search_method == SQUARE) {
+ var = vp9_square_search(x, mvp_full, step_param, error_per_bit, 1,
+ &cpi->fn_ptr[bsize], 1, ref_mv, tmp_mv);
+ if (rd && var < var_max)
+ var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, &cpi->fn_ptr[bsize], 1);
+ } else if (cpi->sf.search_method == BIGDIA) {
+ var = vp9_bigdia_search(x, mvp_full, step_param, error_per_bit, 1,
+ &cpi->fn_ptr[bsize], 1, ref_mv, tmp_mv);
+ if (rd && var < var_max)
+ var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, &cpi->fn_ptr[bsize], 1);
+ } else {
+ int further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
+
+ var = vp9_full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
+ further_steps, 1, &cpi->fn_ptr[bsize],
+ ref_mv, tmp_mv);
+ }
+
+ return var;
+}
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index cff99a6..46806c9 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -129,7 +129,7 @@
ONE_LOOP_REDUCED = 2
} FAST_COEFF_UPDATE;
-typedef struct {
+typedef struct SPEED_FEATURES {
// Frame level coding parameter update
int frame_parameter_update;
@@ -176,7 +176,7 @@
// a log search that iterates 4 times (check around mv for last for best
// error of combined predictor then check around mv for alt). If 0 we
// we just use the best motion vector found for each frame by itself.
- int comp_inter_joint_search_thresh;
+ BLOCK_SIZE comp_inter_joint_search_thresh;
// This variable is used to cap the maximum number of times we skip testing a
// mode to be evaluated. A high value means we will be faster.
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index 2379f35..2e98fa7 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -220,3 +220,10 @@
: &svc->layer_context[svc->spatial_layer_id];
++lc->current_video_frame_in_layer;
}
+
+int vp9_is_upper_layer_key_frame(const VP9_COMP *const cpi) {
+ return cpi->use_svc &&
+ cpi->svc.number_temporal_layers == 1 &&
+ cpi->svc.spatial_layer_id > 0 &&
+ cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame;
+}
diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h
index 2abed30..74d9c1c 100644
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -30,6 +30,7 @@
struct twopass_rc twopass;
struct vpx_fixed_buf rc_twopass_stats_in;
unsigned int current_video_frame_in_layer;
+ int is_key_frame;
} LAYER_CONTEXT;
typedef struct {
@@ -73,6 +74,9 @@
// Increment number of video frames in layer
void vp9_inc_frame_in_layer(SVC *svc);
+// Check if current layer is key frame in spatial upper layer
+int vp9_is_upper_layer_key_frame(const struct VP9_COMP *const cpi);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index ca93391..6eff200 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -27,6 +27,8 @@
#include "vpx_ports/vpx_timer.h"
#include "vpx_scale/vpx_scale.h"
+static int fixed_divide[512];
+
static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
uint8_t *y_mb_ptr,
uint8_t *u_mb_ptr,
@@ -78,6 +80,14 @@
kernel, mv_precision_uv, x, y);
}
+void vp9_temporal_filter_init() {
+ int i;
+
+ fixed_divide[0] = 0;
+ for (i = 1; i < 512; ++i)
+ fixed_divide[i] = 0x80000 / i;
+}
+
void vp9_temporal_filter_apply_c(uint8_t *frame1,
unsigned int stride,
uint8_t *frame2,
@@ -89,6 +99,7 @@
unsigned int i, j, k;
int modifier;
int byte = 0;
+ const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
for (i = 0, k = 0; i < block_size; i++) {
for (j = 0; j < block_size; j++, k++) {
@@ -101,7 +112,7 @@
// modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff);
modifier *= modifier;
modifier *= 3;
- modifier += 1 << (strength - 1);
+ modifier += rounding;
modifier >>= strength;
if (modifier > 16)
@@ -294,7 +305,7 @@
for (i = 0, k = 0; i < 16; i++) {
for (j = 0; j < 16; j++, k++) {
unsigned int pval = accumulator[k] + (count[k] >> 1);
- pval *= cpi->fixed_divide[count[k]];
+ pval *= fixed_divide[count[k]];
pval >>= 19;
dst1[byte] = (uint8_t)pval;
@@ -315,13 +326,13 @@
// U
unsigned int pval = accumulator[k] + (count[k] >> 1);
- pval *= cpi->fixed_divide[count[k]];
+ pval *= fixed_divide[count[k]];
pval >>= 19;
dst1[byte] = (uint8_t)pval;
// V
pval = accumulator[m] + (count[m] >> 1);
- pval *= cpi->fixed_divide[count[m]];
+ pval *= fixed_divide[count[m]];
pval >>= 19;
dst2[byte] = (uint8_t)pval;
@@ -345,33 +356,74 @@
void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) {
VP9_COMMON *const cm = &cpi->common;
int frame = 0;
+ int frames_to_blur_backward = 0;
+ int frames_to_blur_forward = 0;
int frames_to_blur = 0;
int start_frame = 0;
int strength = cpi->active_arnr_strength;
+ int blur_type = cpi->oxcf.arnr_type;
int max_frames = cpi->active_arnr_frames;
- int frames_to_blur_backward = distance;
- int frames_to_blur_forward = vp9_lookahead_depth(cpi->lookahead)
- - (distance + 1);
+ const int num_frames_backward = distance;
+ const int num_frames_forward = vp9_lookahead_depth(cpi->lookahead)
+ - (num_frames_backward + 1);
struct scale_factors sf;
- // Determine which input frames to filter.
- if (frames_to_blur_forward > frames_to_blur_backward)
- frames_to_blur_forward = frames_to_blur_backward;
+ switch (blur_type) {
+ case 1:
+ // Backward Blur
+ frames_to_blur_backward = num_frames_backward;
- if (frames_to_blur_backward > frames_to_blur_forward)
- frames_to_blur_backward = frames_to_blur_forward;
+ if (frames_to_blur_backward >= max_frames)
+ frames_to_blur_backward = max_frames - 1;
- // When max_frames is even we have 1 more frame backward than forward
- if (frames_to_blur_forward > (max_frames - 1) / 2)
- frames_to_blur_forward = (max_frames - 1) / 2;
+ frames_to_blur = frames_to_blur_backward + 1;
+ break;
- if (frames_to_blur_backward > (max_frames / 2))
- frames_to_blur_backward = max_frames / 2;
+ case 2:
+ // Forward Blur
+ frames_to_blur_forward = num_frames_forward;
- frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1;
+ if (frames_to_blur_forward >= max_frames)
+ frames_to_blur_forward = max_frames - 1;
+
+ frames_to_blur = frames_to_blur_forward + 1;
+ break;
+
+ case 3:
+ default:
+ // Center Blur
+ frames_to_blur_forward = num_frames_forward;
+ frames_to_blur_backward = num_frames_backward;
+
+ if (frames_to_blur_forward > frames_to_blur_backward)
+ frames_to_blur_forward = frames_to_blur_backward;
+
+ if (frames_to_blur_backward > frames_to_blur_forward)
+ frames_to_blur_backward = frames_to_blur_forward;
+
+ // When max_frames is even we have 1 more frame backward than forward
+ if (frames_to_blur_forward > (max_frames - 1) / 2)
+ frames_to_blur_forward = ((max_frames - 1) / 2);
+
+ if (frames_to_blur_backward > (max_frames / 2))
+ frames_to_blur_backward = (max_frames / 2);
+
+ frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1;
+ break;
+ }
start_frame = distance + frames_to_blur_forward;
+#ifdef DEBUGFWG
+ // DEBUG FWG
+ printf(
+ "max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d "
+ "start:%d",
+ max_frames, num_frames_backward, num_frames_forward, frames_to_blur,
+ frames_to_blur_backward, frames_to_blur_forward, cpi->source_encode_index,
+ cpi->last_alt_ref_sei, start_frame);
+#endif
+
// Setup scaling factors. Scaling on each of the arnr frames is not supported
vp9_setup_scale_factors_for_frame(&sf,
get_frame_new_buffer(cm)->y_crop_width,
@@ -380,7 +432,7 @@
// Setup frame pointers, NULL indicates frame not included in filter
vp9_zero(cpi->frames);
- for (frame = 0; frame < frames_to_blur; ++frame) {
+ for (frame = 0; frame < frames_to_blur; frame++) {
int which_buffer = start_frame - frame;
struct lookahead_entry *buf = vp9_lookahead_peek(cpi->lookahead,
which_buffer);
@@ -394,11 +446,11 @@
void vp9_configure_arnr_filter(VP9_COMP *cpi,
const unsigned int frames_to_arnr,
const int group_boost) {
- int q;
int half_gf_int;
int frames_after_arf;
- int frames_bwd;
- int frames_fwd = (cpi->oxcf.arnr_max_frames - 1) >> 1;
+ int frames_bwd = cpi->oxcf.arnr_max_frames - 1;
+ int frames_fwd = cpi->oxcf.arnr_max_frames - 1;
+ int q;
// Define the arnr filter width for this group of frames. We only
// filter frames that lie within a distance of half the GF interval
@@ -410,26 +462,47 @@
frames_after_arf = vp9_lookahead_depth(cpi->lookahead)
- frames_to_arnr - 1;
- if (frames_fwd > frames_after_arf)
- frames_fwd = frames_after_arf;
- if (frames_fwd > half_gf_int)
- frames_fwd = half_gf_int;
+ switch (cpi->oxcf.arnr_type) {
+ case 1: // Backward filter
+ frames_fwd = 0;
+ if (frames_bwd > half_gf_int)
+ frames_bwd = half_gf_int;
+ break;
- frames_bwd = frames_fwd;
+ case 2: // Forward filter
+ if (frames_fwd > half_gf_int)
+ frames_fwd = half_gf_int;
+ if (frames_fwd > frames_after_arf)
+ frames_fwd = frames_after_arf;
+ frames_bwd = 0;
+ break;
- // For even length filter there is one more frame backward
- // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff.
- if (frames_bwd < half_gf_int)
- frames_bwd += (cpi->oxcf.arnr_max_frames + 1) & 0x1;
+ case 3: // Centered filter
+ default:
+ frames_fwd >>= 1;
+ if (frames_fwd > frames_after_arf)
+ frames_fwd = frames_after_arf;
+ if (frames_fwd > half_gf_int)
+ frames_fwd = half_gf_int;
+
+ frames_bwd = frames_fwd;
+
+ // For even length filter there is one more frame backward
+ // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff.
+ if (frames_bwd < half_gf_int)
+ frames_bwd += (cpi->oxcf.arnr_max_frames + 1) & 0x1;
+ break;
+ }
cpi->active_arnr_frames = frames_bwd + 1 + frames_fwd;
// Adjust the strength based on active max q
if (cpi->common.current_video_frame > 1)
- q = ((int)vp9_convert_qindex_to_q(cpi->rc.avg_frame_qindex[INTER_FRAME]));
+ q = ((int)vp9_convert_qindex_to_q(
+ cpi->rc.avg_frame_qindex[INTER_FRAME]));
else
- q = ((int)vp9_convert_qindex_to_q(cpi->rc.avg_frame_qindex[KEY_FRAME]));
-
+ q = ((int)vp9_convert_qindex_to_q(
+ cpi->rc.avg_frame_qindex[KEY_FRAME]));
if (q > 16) {
cpi->active_arnr_strength = cpi->oxcf.arnr_strength;
} else {
diff --git a/vp9/encoder/vp9_temporal_filter.h b/vp9/encoder/vp9_temporal_filter.h
index 3028d78..9453dc1 100644
--- a/vp9/encoder/vp9_temporal_filter.h
+++ b/vp9/encoder/vp9_temporal_filter.h
@@ -15,6 +15,7 @@
extern "C" {
#endif
+void vp9_temporal_filter_init();
void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance);
void vp9_configure_arnr_filter(VP9_COMP *cpi,
const unsigned int frames_to_arnr,
diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c
index 520ee44..91d8ea4 100644
--- a/vp9/encoder/vp9_variance.c
+++ b/vp9/encoder/vp9_variance.c
@@ -113,12 +113,9 @@
unsigned int vp9_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
const uint8_t *b, int b_stride, \
unsigned int *sse) { \
- unsigned int var; \
- int avg; \
-\
- variance(a, a_stride, b, b_stride, W, H, &var, &avg); \
- *sse = var; \
- return var - (((int64_t)avg * avg) / (W * H)); \
+ int sum; \
+ variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
+ return *sse - (((int64_t)sum * sum) / (W * H)); \
}
#define SUBPIX_VAR(W, H) \
@@ -159,69 +156,36 @@
return vp9_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \
}
-
-void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride,
- const uint8_t *ref_ptr, int ref_stride,
- unsigned int *sse, int *sum) {
- variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum);
-}
-
-void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride,
- const uint8_t *ref_ptr, int ref_stride,
- unsigned int *sse, int *sum) {
- variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);
-}
-
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
+unsigned int vp9_mse16x16_c(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
unsigned int *sse) {
- unsigned int var;
- int avg;
-
- variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
- *sse = var;
- return var;
+ int sum;
+ variance(src, src_stride, ref, ref_stride, 16, 16, sse, &sum);
+ return *sse;
}
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
+unsigned int vp9_mse16x8_c(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
unsigned int *sse) {
- unsigned int var;
- int avg;
-
- variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
- *sse = var;
- return var;
+ int sum;
+ variance(src, src_stride, ref, ref_stride, 16, 8, sse, &sum);
+ return *sse;
}
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
+unsigned int vp9_mse8x16_c(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
unsigned int *sse) {
- unsigned int var;
- int avg;
-
- variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
- *sse = var;
- return var;
+ int sum;
+ variance(src, src_stride, ref, ref_stride, 8, 16, sse, &sum);
+ return *sse;
}
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
+unsigned int vp9_mse8x8_c(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
unsigned int *sse) {
- unsigned int var;
- int avg;
-
- variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
- *sse = var;
- return var;
+ int sum;
+ variance(src, src_stride, ref, ref_stride, 8, 8, sse, &sum);
+ return *sse;
}
VAR(4, 4)
@@ -276,87 +240,6 @@
SUBPIX_VAR(64, 64)
SUBPIX_AVG_VAR(64, 64)
-unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0,
- ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0,
- ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0,
- ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8,
- ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8,
- ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8,
- ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8,
- ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8,
- ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8,
- ref_ptr, recon_stride, sse);
-}
-
void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
int height, const uint8_t *ref, int ref_stride) {
int i, j;
diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h
index 4c8be71..c47fe13 100644
--- a/vp9/encoder/vp9_variance.h
+++ b/vp9/encoder/vp9_variance.h
@@ -69,22 +69,12 @@
unsigned int *sse,
const uint8_t *second_pred);
-typedef unsigned int (*vp9_getmbss_fn_t)(const short *);
-
-typedef unsigned int (*vp9_get16x16prederror_fn_t)(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int ref_stride);
-
typedef struct vp9_variance_vtable {
vp9_sad_fn_t sdf;
vp9_sad_avg_fn_t sdaf;
vp9_variance_fn_t vf;
vp9_subpixvariance_fn_t svf;
vp9_subp_avg_variance_fn_t svaf;
- vp9_variance_fn_t svf_halfpix_h;
- vp9_variance_fn_t svf_halfpix_v;
- vp9_variance_fn_t svf_halfpix_hv;
vp9_sad_multi_fn_t sdx3f;
vp9_sad_multi_fn_t sdx8f;
vp9_sad_multi_d_fn_t sdx4df;
diff --git a/vp9/encoder/x86/vp9_dct32x32_sse2.c b/vp9/encoder/x86/vp9_dct32x32_sse2.c
index 2d59775..42fdbbd 100644
--- a/vp9/encoder/x86/vp9_dct32x32_sse2.c
+++ b/vp9/encoder/x86/vp9_dct32x32_sse2.c
@@ -12,6 +12,9 @@
#include "vp9/common/vp9_idct.h" // for cospi constants
#include "vpx_ports/mem.h"
+#define pair_set_epi32(a, b) \
+ _mm_set_epi32(b, a, b, a)
+
#if FDCT32x32_HIGH_PRECISION
static INLINE __m128i k_madd_epi32(__m128i a, __m128i b) {
__m128i buf0, buf1;
diff --git a/vp9/encoder/x86/vp9_dct_mmx.asm b/vp9/encoder/x86/vp9_dct_mmx.asm
new file mode 100644
index 0000000..f71181c
--- /dev/null
+++ b/vp9/encoder/x86/vp9_dct_mmx.asm
@@ -0,0 +1,70 @@
+;
+; Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+%include "third_party/x86inc/x86inc.asm"
+
+SECTION .text
+
+%macro TRANSFORM_COLS 0
+ paddw m0, m1
+ movq m4, m0
+ psubw m3, m2
+ psubw m4, m3
+ psraw m4, 1
+ movq m5, m4
+ psubw m5, m1 ;b1
+ psubw m4, m2 ;c1
+ psubw m0, m4
+ paddw m3, m5
+ ; m0 a0
+ SWAP 1, 4 ; m1 c1
+ SWAP 2, 3 ; m2 d1
+ SWAP 3, 5 ; m3 b1
+%endmacro
+
+%macro TRANSPOSE_4X4 0
+ movq m4, m0
+ movq m5, m2
+ punpcklwd m4, m1
+ punpckhwd m0, m1
+ punpcklwd m5, m3
+ punpckhwd m2, m3
+ movq m1, m4
+ movq m3, m0
+ punpckldq m1, m5
+ punpckhdq m4, m5
+ punpckldq m3, m2
+ punpckhdq m0, m2
+ SWAP 2, 3, 0, 1, 4
+%endmacro
+
+INIT_MMX mmx
+cglobal fwht4x4, 3, 4, 8, input, output, stride
+ lea r3q, [inputq + strideq*4]
+ movq m0, [inputq] ;a1
+ movq m1, [inputq + strideq*2] ;b1
+ movq m2, [r3q] ;c1
+ movq m3, [r3q + strideq*2] ;d1
+
+ TRANSFORM_COLS
+ TRANSPOSE_4X4
+ TRANSFORM_COLS
+ TRANSPOSE_4X4
+
+ psllw m0, 2
+ psllw m1, 2
+ psllw m2, 2
+ psllw m3, 2
+
+ movq [outputq], m0
+ movq [outputq + 8], m1
+ movq [outputq + 16], m2
+ movq [outputq + 24], m3
+
+ RET
diff --git a/vp9/encoder/x86/vp9_dct_ssse3.asm b/vp9/encoder/x86/vp9_dct_ssse3.asm
new file mode 100644
index 0000000..8723a71
--- /dev/null
+++ b/vp9/encoder/x86/vp9_dct_ssse3.asm
@@ -0,0 +1,174 @@
+;
+; Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+%include "third_party/x86inc/x86inc.asm"
+
+; This file provides SSSE3 version of the forward transformation. Part
+; of the macro definitions are originally derived from the ffmpeg project.
+; The current version applies to x86 64-bit only.
+
+SECTION_RODATA
+
+pw_11585x2: times 8 dw 23170
+pd_8192: times 4 dd 8192
+
+%macro TRANSFORM_COEFFS 2
+pw_%1_%2: dw %1, %2, %1, %2, %1, %2, %1, %2
+pw_%2_m%1: dw %2, -%1, %2, -%1, %2, -%1, %2, -%1
+%endmacro
+
+TRANSFORM_COEFFS 15137, 6270
+TRANSFORM_COEFFS 16069, 3196
+TRANSFORM_COEFFS 9102, 13623
+
+SECTION .text
+
+%if ARCH_X86_64
+%macro SUM_SUB 3
+ psubw m%3, m%1, m%2
+ paddw m%1, m%2
+ SWAP %2, %3
+%endmacro
+
+; butterfly operation
+%macro MUL_ADD_2X 6 ; dst1, dst2, src, round, coefs1, coefs2
+ pmaddwd m%1, m%3, %5
+ pmaddwd m%2, m%3, %6
+ paddd m%1, %4
+ paddd m%2, %4
+ psrad m%1, 14
+ psrad m%2, 14
+%endmacro
+
+%macro BUTTERFLY_4X 7 ; dst1, dst2, coef1, coef2, round, tmp1, tmp2
+ punpckhwd m%6, m%2, m%1
+ MUL_ADD_2X %7, %6, %6, %5, [pw_%4_%3], [pw_%3_m%4]
+ punpcklwd m%2, m%1
+ MUL_ADD_2X %1, %2, %2, %5, [pw_%4_%3], [pw_%3_m%4]
+ packssdw m%1, m%7
+ packssdw m%2, m%6
+%endmacro
+
+; matrix transpose
+%macro INTERLEAVE_2X 4
+ punpckh%1 m%4, m%2, m%3
+ punpckl%1 m%2, m%3
+ SWAP %3, %4
+%endmacro
+
+%macro TRANSPOSE8X8 9
+ INTERLEAVE_2X wd, %1, %2, %9
+ INTERLEAVE_2X wd, %3, %4, %9
+ INTERLEAVE_2X wd, %5, %6, %9
+ INTERLEAVE_2X wd, %7, %8, %9
+
+ INTERLEAVE_2X dq, %1, %3, %9
+ INTERLEAVE_2X dq, %2, %4, %9
+ INTERLEAVE_2X dq, %5, %7, %9
+ INTERLEAVE_2X dq, %6, %8, %9
+
+ INTERLEAVE_2X qdq, %1, %5, %9
+ INTERLEAVE_2X qdq, %3, %7, %9
+ INTERLEAVE_2X qdq, %2, %6, %9
+ INTERLEAVE_2X qdq, %4, %8, %9
+
+ SWAP %2, %5
+ SWAP %4, %7
+%endmacro
+
+; 1D forward 8x8 DCT transform
+%macro FDCT8_1D 0
+ SUM_SUB 0, 7, 9
+ SUM_SUB 1, 6, 9
+ SUM_SUB 2, 5, 9
+ SUM_SUB 3, 4, 9
+
+ SUM_SUB 0, 3, 9
+ SUM_SUB 1, 2, 9
+ SUM_SUB 6, 5, 9
+ SUM_SUB 0, 1, 9
+
+ BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10
+
+ pmulhrsw m6, m12
+ pmulhrsw m5, m12
+ pmulhrsw m0, m12
+ pmulhrsw m1, m12
+
+ SUM_SUB 4, 5, 9
+ SUM_SUB 7, 6, 9
+ BUTTERFLY_4X 4, 7, 3196, 16069, m8, 9, 10
+ BUTTERFLY_4X 5, 6, 13623, 9102, m8, 9, 10
+ SWAP 1, 4
+ SWAP 3, 6
+%endmacro
+
+%macro DIVIDE_ROUND_2X 4 ; dst1, dst2, tmp1, tmp2
+ psraw m%3, m%1, 15
+ psraw m%4, m%2, 15
+ psubw m%1, m%3
+ psubw m%2, m%4
+ psraw m%1, 1
+ psraw m%2, 1
+%endmacro
+
+INIT_XMM ssse3
+cglobal fdct8x8, 3, 5, 13, input, output, stride
+
+ mova m8, [pd_8192]
+ mova m12, [pw_11585x2]
+ pxor m11, m11
+
+ lea r3, [2 * strideq]
+ lea r4, [4 * strideq]
+ mova m0, [inputq]
+ mova m1, [inputq + r3]
+ lea inputq, [inputq + r4]
+ mova m2, [inputq]
+ mova m3, [inputq + r3]
+ lea inputq, [inputq + r4]
+ mova m4, [inputq]
+ mova m5, [inputq + r3]
+ lea inputq, [inputq + r4]
+ mova m6, [inputq]
+ mova m7, [inputq + r3]
+
+ ; left shift by 2 to increase forward transformation precision
+ psllw m0, 2
+ psllw m1, 2
+ psllw m2, 2
+ psllw m3, 2
+ psllw m4, 2
+ psllw m5, 2
+ psllw m6, 2
+ psllw m7, 2
+
+ ; column transform
+ FDCT8_1D
+ TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
+
+ FDCT8_1D
+ TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
+
+ DIVIDE_ROUND_2X 0, 1, 9, 10
+ DIVIDE_ROUND_2X 2, 3, 9, 10
+ DIVIDE_ROUND_2X 4, 5, 9, 10
+ DIVIDE_ROUND_2X 6, 7, 9, 10
+
+ mova [outputq + 0], m0
+ mova [outputq + 16], m1
+ mova [outputq + 32], m2
+ mova [outputq + 48], m3
+ mova [outputq + 64], m4
+ mova [outputq + 80], m5
+ mova [outputq + 96], m6
+ mova [outputq + 112], m7
+
+ RET
+%endif
diff --git a/vp9/encoder/x86/vp9_subpel_variance_impl_sse2.asm b/vp9/encoder/x86/vp9_subpel_variance_impl_sse2.asm
deleted file mode 100644
index 2ecc23e..0000000
--- a/vp9/encoder/x86/vp9_subpel_variance_impl_sse2.asm
+++ /dev/null
@@ -1,337 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-%include "vpx_ports/x86_abi_support.asm"
-
-;void vp9_half_horiz_vert_variance16x_h_sse2
-;(
-; unsigned char *ref_ptr,
-; int ref_pixels_per_line,
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; unsigned int Height,
-; int *sum,
-; unsigned int *sumsquared
-;)
-global sym(vp9_half_horiz_vert_variance16x_h_sse2) PRIVATE
-sym(vp9_half_horiz_vert_variance16x_h_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- pxor xmm6, xmm6 ; error accumulator
- pxor xmm7, xmm7 ; sse eaccumulator
- mov rsi, arg(0) ;ref_ptr ;
-
- mov rdi, arg(2) ;src_ptr ;
- movsxd rcx, dword ptr arg(4) ;Height ;
- movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
- movsxd rdx, dword ptr arg(3) ;src_pixels_per_line
-
- pxor xmm0, xmm0 ;
-
- movdqu xmm5, XMMWORD PTR [rsi]
- movdqu xmm3, XMMWORD PTR [rsi+1]
- pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) horizontal line 1
-
- lea rsi, [rsi + rax]
-
-.half_horiz_vert_variance16x_h_1:
- movdqu xmm1, XMMWORD PTR [rsi] ;
- movdqu xmm2, XMMWORD PTR [rsi+1] ;
- pavgb xmm1, xmm2 ; xmm1 = avg(xmm1,xmm3) horizontal line i+1
-
- pavgb xmm5, xmm1 ; xmm = vertical average of the above
-
- movdqa xmm4, xmm5
- punpcklbw xmm5, xmm0 ; xmm5 = words of above
- punpckhbw xmm4, xmm0
-
- movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d7
- punpcklbw xmm3, xmm0 ; xmm3 = words of above
- psubw xmm5, xmm3 ; xmm5 -= xmm3
-
- movq xmm3, QWORD PTR [rdi+8]
- punpcklbw xmm3, xmm0
- psubw xmm4, xmm3
-
- paddw xmm6, xmm5 ; xmm6 += accumulated column differences
- paddw xmm6, xmm4
- pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
- pmaddwd xmm4, xmm4
- paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
- paddd xmm7, xmm4
-
- movdqa xmm5, xmm1 ; save xmm1 for use on the next row
-
- lea rsi, [rsi + rax]
- lea rdi, [rdi + rdx]
-
- sub rcx, 1 ;
- jnz .half_horiz_vert_variance16x_h_1 ;
-
- pxor xmm1, xmm1
- pxor xmm5, xmm5
-
- punpcklwd xmm0, xmm6
- punpckhwd xmm1, xmm6
- psrad xmm0, 16
- psrad xmm1, 16
- paddd xmm0, xmm1
- movdqa xmm1, xmm0
-
- movdqa xmm6, xmm7
- punpckldq xmm6, xmm5
- punpckhdq xmm7, xmm5
- paddd xmm6, xmm7
-
- punpckldq xmm0, xmm5
- punpckhdq xmm1, xmm5
- paddd xmm0, xmm1
-
- movdqa xmm7, xmm6
- movdqa xmm1, xmm0
-
- psrldq xmm7, 8
- psrldq xmm1, 8
-
- paddd xmm6, xmm7
- paddd xmm0, xmm1
-
- mov rsi, arg(5) ;[Sum]
- mov rdi, arg(6) ;[SSE]
-
- movd [rsi], xmm0
- movd [rdi], xmm6
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp9_half_vert_variance16x_h_sse2
-;(
-; unsigned char *ref_ptr,
-; int ref_pixels_per_line,
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; unsigned int Height,
-; int *sum,
-; unsigned int *sumsquared
-;)
-global sym(vp9_half_vert_variance16x_h_sse2) PRIVATE
-sym(vp9_half_vert_variance16x_h_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- pxor xmm6, xmm6 ; error accumulator
- pxor xmm7, xmm7 ; sse eaccumulator
- mov rsi, arg(0) ;ref_ptr
-
- mov rdi, arg(2) ;src_ptr
- movsxd rcx, dword ptr arg(4) ;Height
- movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
- movsxd rdx, dword ptr arg(3) ;src_pixels_per_line
-
- movdqu xmm5, XMMWORD PTR [rsi]
- lea rsi, [rsi + rax ]
- pxor xmm0, xmm0
-
-.half_vert_variance16x_h_1:
- movdqu xmm3, XMMWORD PTR [rsi]
-
- pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3)
- movdqa xmm4, xmm5
- punpcklbw xmm5, xmm0
- punpckhbw xmm4, xmm0
-
- movq xmm2, QWORD PTR [rdi]
- punpcklbw xmm2, xmm0
- psubw xmm5, xmm2
- movq xmm2, QWORD PTR [rdi+8]
- punpcklbw xmm2, xmm0
- psubw xmm4, xmm2
-
- paddw xmm6, xmm5 ; xmm6 += accumulated column differences
- paddw xmm6, xmm4
- pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
- pmaddwd xmm4, xmm4
- paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
- paddd xmm7, xmm4
-
- movdqa xmm5, xmm3
-
- lea rsi, [rsi + rax]
- lea rdi, [rdi + rdx]
-
- sub rcx, 1
- jnz .half_vert_variance16x_h_1
-
- pxor xmm1, xmm1
- pxor xmm5, xmm5
-
- punpcklwd xmm0, xmm6
- punpckhwd xmm1, xmm6
- psrad xmm0, 16
- psrad xmm1, 16
- paddd xmm0, xmm1
- movdqa xmm1, xmm0
-
- movdqa xmm6, xmm7
- punpckldq xmm6, xmm5
- punpckhdq xmm7, xmm5
- paddd xmm6, xmm7
-
- punpckldq xmm0, xmm5
- punpckhdq xmm1, xmm5
- paddd xmm0, xmm1
-
- movdqa xmm7, xmm6
- movdqa xmm1, xmm0
-
- psrldq xmm7, 8
- psrldq xmm1, 8
-
- paddd xmm6, xmm7
- paddd xmm0, xmm1
-
- mov rsi, arg(5) ;[Sum]
- mov rdi, arg(6) ;[SSE]
-
- movd [rsi], xmm0
- movd [rdi], xmm6
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp9_half_horiz_variance16x_h_sse2
-;(
-; unsigned char *ref_ptr,
-; int ref_pixels_per_line,
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; unsigned int Height,
-; int *sum,
-; unsigned int *sumsquared
-;)
-global sym(vp9_half_horiz_variance16x_h_sse2) PRIVATE
-sym(vp9_half_horiz_variance16x_h_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- pxor xmm6, xmm6 ; error accumulator
- pxor xmm7, xmm7 ; sse eaccumulator
- mov rsi, arg(0) ;ref_ptr ;
-
- mov rdi, arg(2) ;src_ptr ;
- movsxd rcx, dword ptr arg(4) ;Height ;
- movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
- movsxd rdx, dword ptr arg(3) ;src_pixels_per_line
-
- pxor xmm0, xmm0 ;
-
-.half_horiz_variance16x_h_1:
- movdqu xmm5, XMMWORD PTR [rsi] ; xmm5 = s0,s1,s2..s15
- movdqu xmm3, XMMWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s16
-
- pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3)
- movdqa xmm1, xmm5
- punpcklbw xmm5, xmm0 ; xmm5 = words of above
- punpckhbw xmm1, xmm0
-
- movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d7
- punpcklbw xmm3, xmm0 ; xmm3 = words of above
- movq xmm2, QWORD PTR [rdi+8]
- punpcklbw xmm2, xmm0
-
- psubw xmm5, xmm3 ; xmm5 -= xmm3
- psubw xmm1, xmm2
- paddw xmm6, xmm5 ; xmm6 += accumulated column differences
- paddw xmm6, xmm1
- pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
- pmaddwd xmm1, xmm1
- paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
- paddd xmm7, xmm1
-
- lea rsi, [rsi + rax]
- lea rdi, [rdi + rdx]
-
- sub rcx, 1 ;
- jnz .half_horiz_variance16x_h_1 ;
-
- pxor xmm1, xmm1
- pxor xmm5, xmm5
-
- punpcklwd xmm0, xmm6
- punpckhwd xmm1, xmm6
- psrad xmm0, 16
- psrad xmm1, 16
- paddd xmm0, xmm1
- movdqa xmm1, xmm0
-
- movdqa xmm6, xmm7
- punpckldq xmm6, xmm5
- punpckhdq xmm7, xmm5
- paddd xmm6, xmm7
-
- punpckldq xmm0, xmm5
- punpckhdq xmm1, xmm5
- paddd xmm0, xmm1
-
- movdqa xmm7, xmm6
- movdqa xmm1, xmm0
-
- psrldq xmm7, 8
- psrldq xmm1, 8
-
- paddd xmm6, xmm7
- paddd xmm0, xmm1
-
- mov rsi, arg(5) ;[Sum]
- mov rdi, arg(6) ;[SSE]
-
- movd [rsi], xmm0
- movd [rdi], xmm6
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
diff --git a/vp9/encoder/x86/vp9_variance_impl_sse2.asm b/vp9/encoder/x86/vp9_variance_impl_sse2.asm
index 2c50881..4830412 100644
--- a/vp9/encoder/x86/vp9_variance_impl_sse2.asm
+++ b/vp9/encoder/x86/vp9_variance_impl_sse2.asm
@@ -398,337 +398,4 @@
pop rbp
ret
-;void vp9_half_horiz_vert_variance8x_h_sse2
-;(
-; unsigned char *ref_ptr,
-; int ref_pixels_per_line,
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; unsigned int Height,
-; int *sum,
-; unsigned int *sumsquared
-;)
-global sym(vp9_half_horiz_vert_variance8x_h_sse2) PRIVATE
-sym(vp9_half_horiz_vert_variance8x_h_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
-%if ABI_IS_32BIT=0
- movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
- movsxd r9, dword ptr arg(3) ;src_pixels_per_line
-%endif
-
- pxor xmm6, xmm6 ; error accumulator
- pxor xmm7, xmm7 ; sse eaccumulator
- mov rsi, arg(0) ;ref_ptr ;
-
- mov rdi, arg(2) ;src_ptr ;
- movsxd rcx, dword ptr arg(4) ;Height ;
- movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
-
- pxor xmm0, xmm0 ;
-
- movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8
- movq xmm3, QWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s9
- pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) horizontal line 1
-
-%if ABI_IS_32BIT
- add rsi, dword ptr arg(1) ;ref_pixels_per_line ; next source
-%else
- add rsi, r8
-%endif
-
-.half_horiz_vert_variance8x_h_1:
-
- movq xmm1, QWORD PTR [rsi] ;
- movq xmm2, QWORD PTR [rsi+1] ;
- pavgb xmm1, xmm2 ; xmm1 = avg(xmm1,xmm3) horizontal line i+1
-
- pavgb xmm5, xmm1 ; xmm = vertical average of the above
- punpcklbw xmm5, xmm0 ; xmm5 = words of above
-
- movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d8
- punpcklbw xmm3, xmm0 ; xmm3 = words of above
-
- psubw xmm5, xmm3 ; xmm5 -= xmm3
- paddw xmm6, xmm5 ; xmm6 += accumulated column differences
- pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
- paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
-
- movdqa xmm5, xmm1 ; save xmm1 for use on the next row
-
-%if ABI_IS_32BIT
- add esi, dword ptr arg(1) ;ref_pixels_per_line ; next source
- add edi, dword ptr arg(3) ;src_pixels_per_line ; next destination
-%else
- add rsi, r8
- add rdi, r9
-%endif
-
- sub rcx, 1 ;
- jnz .half_horiz_vert_variance8x_h_1 ;
-
- movdq2q mm6, xmm6 ;
- movdq2q mm7, xmm7 ;
-
- psrldq xmm6, 8
- psrldq xmm7, 8
-
- movdq2q mm2, xmm6
- movdq2q mm3, xmm7
-
- paddw mm6, mm2
- paddd mm7, mm3
-
- pxor mm3, mm3 ;
- pxor mm2, mm2 ;
-
- punpcklwd mm2, mm6 ;
- punpckhwd mm3, mm6 ;
-
- paddd mm2, mm3 ;
- movq mm6, mm2 ;
-
- psrlq mm6, 32 ;
- paddd mm2, mm6 ;
-
- psrad mm2, 16 ;
- movq mm4, mm7 ;
-
- psrlq mm4, 32 ;
- paddd mm4, mm7 ;
-
- mov rsi, arg(5) ; sum
- mov rdi, arg(6) ; sumsquared
-
- movd [rsi], mm2 ;
- movd [rdi], mm4 ;
-
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp9_half_vert_variance8x_h_sse2
-;(
-; unsigned char *ref_ptr,
-; int ref_pixels_per_line,
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; unsigned int Height,
-; int *sum,
-; unsigned int *sumsquared
-;)
-global sym(vp9_half_vert_variance8x_h_sse2) PRIVATE
-sym(vp9_half_vert_variance8x_h_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
-%if ABI_IS_32BIT=0
- movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
- movsxd r9, dword ptr arg(3) ;src_pixels_per_line
-%endif
-
- pxor xmm6, xmm6 ; error accumulator
- pxor xmm7, xmm7 ; sse eaccumulator
- mov rsi, arg(0) ;ref_ptr ;
-
- mov rdi, arg(2) ;src_ptr ;
- movsxd rcx, dword ptr arg(4) ;Height ;
- movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
-
- pxor xmm0, xmm0 ;
-.half_vert_variance8x_h_1:
- movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8
- movq xmm3, QWORD PTR [rsi+rax] ; xmm3 = s1,s2,s3..s9
-
- pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3)
- punpcklbw xmm5, xmm0 ; xmm5 = words of above
-
- movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d8
- punpcklbw xmm3, xmm0 ; xmm3 = words of above
-
- psubw xmm5, xmm3 ; xmm5 -= xmm3
- paddw xmm6, xmm5 ; xmm6 += accumulated column differences
- pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
- paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
-
-%if ABI_IS_32BIT
- add esi, dword ptr arg(1) ;ref_pixels_per_line ; next source
- add edi, dword ptr arg(3) ;src_pixels_per_line ; next destination
-%else
- add rsi, r8
- add rdi, r9
-%endif
-
- sub rcx, 1 ;
- jnz .half_vert_variance8x_h_1 ;
-
- movdq2q mm6, xmm6 ;
- movdq2q mm7, xmm7 ;
-
- psrldq xmm6, 8
- psrldq xmm7, 8
-
- movdq2q mm2, xmm6
- movdq2q mm3, xmm7
-
- paddw mm6, mm2
- paddd mm7, mm3
-
- pxor mm3, mm3 ;
- pxor mm2, mm2 ;
-
- punpcklwd mm2, mm6 ;
- punpckhwd mm3, mm6 ;
-
- paddd mm2, mm3 ;
- movq mm6, mm2 ;
-
- psrlq mm6, 32 ;
- paddd mm2, mm6 ;
-
- psrad mm2, 16 ;
- movq mm4, mm7 ;
-
- psrlq mm4, 32 ;
- paddd mm4, mm7 ;
-
- mov rsi, arg(5) ; sum
- mov rdi, arg(6) ; sumsquared
-
- movd [rsi], mm2 ;
- movd [rdi], mm4 ;
-
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void vp9_half_horiz_variance8x_h_sse2
-;(
-; unsigned char *ref_ptr,
-; int ref_pixels_per_line,
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; unsigned int Height,
-; int *sum,
-; unsigned int *sumsquared
-;)
-global sym(vp9_half_horiz_variance8x_h_sse2) PRIVATE
-sym(vp9_half_horiz_variance8x_h_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
-%if ABI_IS_32BIT=0
- movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
- movsxd r9, dword ptr arg(3) ;src_pixels_per_line
-%endif
-
- pxor xmm6, xmm6 ; error accumulator
- pxor xmm7, xmm7 ; sse eaccumulator
- mov rsi, arg(0) ;ref_ptr ;
-
- mov rdi, arg(2) ;src_ptr ;
- movsxd rcx, dword ptr arg(4) ;Height ;
-
- pxor xmm0, xmm0 ;
-.half_horiz_variance8x_h_1:
- movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8
- movq xmm3, QWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s9
-
- pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3)
- punpcklbw xmm5, xmm0 ; xmm5 = words of above
-
- movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d8
- punpcklbw xmm3, xmm0 ; xmm3 = words of above
-
- psubw xmm5, xmm3 ; xmm5 -= xmm3
- paddw xmm6, xmm5 ; xmm6 += accumulated column differences
- pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
- paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
-
-%if ABI_IS_32BIT
- add esi, dword ptr arg(1) ;ref_pixels_per_line ; next source
- add edi, dword ptr arg(3) ;src_pixels_per_line ; next destination
-%else
- add rsi, r8
- add rdi, r9
-%endif
- sub rcx, 1 ;
- jnz .half_horiz_variance8x_h_1 ;
-
- movdq2q mm6, xmm6 ;
- movdq2q mm7, xmm7 ;
-
- psrldq xmm6, 8
- psrldq xmm7, 8
-
- movdq2q mm2, xmm6
- movdq2q mm3, xmm7
-
- paddw mm6, mm2
- paddd mm7, mm3
-
- pxor mm3, mm3 ;
- pxor mm2, mm2 ;
-
- punpcklwd mm2, mm6 ;
- punpckhwd mm3, mm6 ;
-
- paddd mm2, mm3 ;
- movq mm6, mm2 ;
-
- psrlq mm6, 32 ;
- paddd mm2, mm6 ;
-
- psrad mm2, 16 ;
- movq mm4, mm7 ;
-
- psrlq mm4, 32 ;
- paddd mm4, mm7 ;
-
- mov rsi, arg(5) ; sum
- mov rdi, arg(6) ; sumsquared
-
- movd [rsi], mm2 ;
- movd [rdi], mm4 ;
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
diff --git a/vp9/encoder/x86/vp9_variance_sse2.c b/vp9/encoder/x86/vp9_variance_sse2.c
index 9e65694..41f2259 100644
--- a/vp9/encoder/x86/vp9_variance_sse2.c
+++ b/vp9/encoder/x86/vp9_variance_sse2.c
@@ -42,66 +42,6 @@
unsigned int *SSE,
int *Sum
);
-void vp9_half_horiz_vert_variance8x_h_sse2
-(
- const unsigned char *ref_ptr,
- int ref_pixels_per_line,
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- unsigned int Height,
- int *sum,
- unsigned int *sumsquared
-);
-void vp9_half_horiz_vert_variance16x_h_sse2
-(
- const unsigned char *ref_ptr,
- int ref_pixels_per_line,
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- unsigned int Height,
- int *sum,
- unsigned int *sumsquared
-);
-void vp9_half_horiz_variance8x_h_sse2
-(
- const unsigned char *ref_ptr,
- int ref_pixels_per_line,
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- unsigned int Height,
- int *sum,
- unsigned int *sumsquared
-);
-void vp9_half_horiz_variance16x_h_sse2
-(
- const unsigned char *ref_ptr,
- int ref_pixels_per_line,
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- unsigned int Height,
- int *sum,
- unsigned int *sumsquared
-);
-void vp9_half_vert_variance8x_h_sse2
-(
- const unsigned char *ref_ptr,
- int ref_pixels_per_line,
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- unsigned int Height,
- int *sum,
- unsigned int *sumsquared
-);
-void vp9_half_vert_variance16x_h_sse2
-(
- const unsigned char *ref_ptr,
- int ref_pixels_per_line,
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- unsigned int Height,
- int *sum,
- unsigned int *sumsquared
-);
typedef unsigned int (*get_var_sse2) (
const unsigned char *src_ptr,
@@ -494,58 +434,3 @@
#undef FNS
#undef FN
-
-unsigned int vp9_variance_halfpixvar16x16_h_sse2(
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- const unsigned char *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse) {
- int xsum0;
- unsigned int xxsum0;
-
- vp9_half_horiz_variance16x_h_sse2(
- src_ptr, src_pixels_per_line,
- dst_ptr, dst_pixels_per_line, 16,
- &xsum0, &xxsum0);
-
- *sse = xxsum0;
- return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
-}
-
-
-unsigned int vp9_variance_halfpixvar16x16_v_sse2(
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- const unsigned char *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse) {
- int xsum0;
- unsigned int xxsum0;
- vp9_half_vert_variance16x_h_sse2(
- src_ptr, src_pixels_per_line,
- dst_ptr, dst_pixels_per_line, 16,
- &xsum0, &xxsum0);
-
- *sse = xxsum0;
- return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
-}
-
-
-unsigned int vp9_variance_halfpixvar16x16_hv_sse2(
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- const unsigned char *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse) {
- int xsum0;
- unsigned int xxsum0;
-
- vp9_half_horiz_vert_variance16x_h_sse2(
- src_ptr, src_pixels_per_line,
- dst_ptr, dst_pixels_per_line, 16,
- &xsum0, &xxsum0);
-
- *sse = xxsum0;
- return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
-}
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index b1ba0b1..3b4d6b9 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -120,28 +120,32 @@
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve_neon.c
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct16x16_neon.c
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_16_neon.c
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_avg_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_16_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_dc_only_idct_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct4x4_1_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct4x4_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct8x8_1_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct8x8_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct16x16_1_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct16x16_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct32x32_1_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct32x32_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht8x8_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_mb_lpf_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_copy_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_avg_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_save_reg_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_reconintra_neon$(ASM)
+ifeq ($(ARCH_X86_64), yes)
+VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_idct_ssse3.asm
+endif
+
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_convolve_neon.c
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct16x16_neon.c
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_loopfilter_16_neon.c
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_convolve8_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_convolve8_avg_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_loopfilter_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_loopfilter_16_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_dc_only_idct_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct4x4_1_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct4x4_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct8x8_1_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct8x8_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct16x16_1_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct16x16_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct32x32_1_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct32x32_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_iht4x4_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_iht8x8_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_mb_lpf_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_copy_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_avg_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_save_reg_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_reconintra_neon$(ASM)
$(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.pl))
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index f7c0a64..c4058bb 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -30,6 +30,7 @@
unsigned int tile_rows;
unsigned int arnr_max_frames;
unsigned int arnr_strength;
+ unsigned int arnr_type;
vp8e_tuning tuning;
unsigned int cq_level; // constrained quality level
unsigned int rc_max_intra_bitrate_pct;
@@ -41,7 +42,7 @@
};
struct extraconfig_map {
- int usage;
+ unsigned int usage;
struct vp9_extracfg cfg;
};
@@ -59,6 +60,7 @@
0, // tile_rows
7, // arnr_max_frames
5, // arnr_strength
+ 3, // arnr_type
VP8_TUNE_PSNR, // tuning
10, // cq_level
0, // rc_max_intra_bitrate_pct
@@ -201,6 +203,7 @@
RANGE_CHECK_HI(extra_cfg, sharpness, 7);
RANGE_CHECK(extra_cfg, arnr_max_frames, 0, 15);
RANGE_CHECK_HI(extra_cfg, arnr_strength, 6);
+ RANGE_CHECK(extra_cfg, arnr_type, 1, 3);
RANGE_CHECK(extra_cfg, cq_level, 0, 63);
// TODO(yaowu): remove this when ssim tuning is implemented for vp9
@@ -242,7 +245,8 @@
layer_id = (int)stats->spatial_layer_id;
if (layer_id >= cfg->ss_number_layers
- ||(int)(stats->count + 0.5) != n_packets_per_layer[layer_id] - 1)
+ ||(unsigned int)(stats->count + 0.5) !=
+ n_packets_per_layer[layer_id] - 1)
ERROR("rc_twopass_stats_in missing EOS stats packet");
}
} else {
@@ -364,6 +368,7 @@
oxcf->arnr_max_frames = extra_cfg->arnr_max_frames;
oxcf->arnr_strength = extra_cfg->arnr_strength;
+ oxcf->arnr_type = extra_cfg->arnr_type;
oxcf->tuning = extra_cfg->tuning;
@@ -494,6 +499,7 @@
MAP(VP9E_SET_TILE_ROWS, extra_cfg.tile_rows);
MAP(VP8E_SET_ARNR_MAXFRAMES, extra_cfg.arnr_max_frames);
MAP(VP8E_SET_ARNR_STRENGTH, extra_cfg.arnr_strength);
+ MAP(VP8E_SET_ARNR_TYPE, extra_cfg.arnr_type);
MAP(VP8E_SET_TUNING, extra_cfg.tuning);
MAP(VP8E_SET_CQ_LEVEL, extra_cfg.cq_level);
MAP(VP8E_SET_MAX_INTRA_BITRATE_PCT, extra_cfg.rc_max_intra_bitrate_pct);
@@ -1107,6 +1113,7 @@
{VP9E_SET_TILE_ROWS, ctrl_set_param},
{VP8E_SET_ARNR_MAXFRAMES, ctrl_set_param},
{VP8E_SET_ARNR_STRENGTH, ctrl_set_param},
+ {VP8E_SET_ARNR_TYPE, ctrl_set_param},
{VP8E_SET_TUNING, ctrl_set_param},
{VP8E_SET_CQ_LEVEL, ctrl_set_param},
{VP8E_SET_MAX_INTRA_BITRATE_PCT, ctrl_set_param},
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 06b4823..6198250 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -245,17 +245,13 @@
}
static void init_decoder(vpx_codec_alg_priv_t *ctx) {
- VP9DecoderConfig oxcf;
- oxcf.width = ctx->si.w;
- oxcf.height = ctx->si.h;
- oxcf.version = 9;
- oxcf.max_threads = ctx->cfg.threads;
- oxcf.inv_tile_order = ctx->invert_tile_order;
-
- ctx->pbi = vp9_decoder_create(&oxcf);
+ ctx->pbi = vp9_decoder_create();
if (ctx->pbi == NULL)
return;
+ ctx->pbi->max_threads = ctx->cfg.threads;
+ ctx->pbi->inv_tile_order = ctx->invert_tile_order;
+
vp9_initialize_dec();
// If postprocessing was enabled by the application and a
@@ -375,80 +371,70 @@
}
}
+static vpx_codec_err_t decode_one_iter(vpx_codec_alg_priv_t *ctx,
+ const uint8_t **data_start_ptr,
+ const uint8_t *data_end,
+ uint32_t frame_size, void *user_priv,
+ long deadline) {
+ const vpx_codec_err_t res = decode_one(ctx, data_start_ptr, frame_size,
+ user_priv, deadline);
+ if (res != VPX_CODEC_OK)
+ return res;
+
+ // Account for suboptimal termination by the encoder.
+ while (*data_start_ptr < data_end) {
+ const uint8_t marker = read_marker(ctx->decrypt_cb, ctx->decrypt_state,
+ *data_start_ptr);
+ if (marker)
+ break;
+ (*data_start_ptr)++;
+ }
+
+ return VPX_CODEC_OK;
+}
+
static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx,
const uint8_t *data, unsigned int data_sz,
void *user_priv, long deadline) {
const uint8_t *data_start = data;
- const uint8_t *data_end = data + data_sz;
- vpx_codec_err_t res = VPX_CODEC_OK;
- uint32_t sizes[8];
- int frames_this_pts, frame_count = 0;
+ const uint8_t *const data_end = data + data_sz;
+ vpx_codec_err_t res;
+ uint32_t frame_sizes[8];
+ int frame_count;
if (data == NULL || data_sz == 0)
return VPX_CODEC_INVALID_PARAM;
- parse_superframe_index(data, data_sz, sizes, &frames_this_pts,
+ parse_superframe_index(data, data_sz, frame_sizes, &frame_count,
ctx->decrypt_cb, ctx->decrypt_state);
- do {
- if (data_sz) {
- uint8_t marker = read_marker(ctx->decrypt_cb, ctx->decrypt_state,
- data_start);
- // Skip over the superframe index, if present
- if ((marker & 0xe0) == 0xc0) {
- const uint32_t frames = (marker & 0x7) + 1;
- const uint32_t mag = ((marker >> 3) & 0x3) + 1;
- const uint32_t index_sz = 2 + mag * frames;
+ if (frame_count > 0) {
+ int i;
- if (data_sz >= index_sz) {
- uint8_t marker2 = read_marker(ctx->decrypt_cb, ctx->decrypt_state,
- data_start + index_sz - 1);
- if (marker2 == marker) {
- data_start += index_sz;
- data_sz -= index_sz;
- if (data_start < data_end)
- continue;
- else
- break;
- }
- }
- }
- }
-
- // Use the correct size for this frame, if an index is present.
- if (frames_this_pts) {
- uint32_t this_sz = sizes[frame_count];
-
- if (data_sz < this_sz) {
+ for (i = 0; i < frame_count; ++i) {
+ const uint32_t frame_size = frame_sizes[i];
+ if (data_start < data ||
+ frame_size > (uint32_t)(data_end - data_start)) {
ctx->base.err_detail = "Invalid frame size in index";
return VPX_CODEC_CORRUPT_FRAME;
}
- data_sz = this_sz;
- frame_count++;
+ res = decode_one_iter(ctx, &data_start, data_end, frame_size,
+ user_priv, deadline);
+ if (res != VPX_CODEC_OK)
+ return res;
}
-
- res = decode_one(ctx, &data_start, data_sz, user_priv, deadline);
- assert(data_start >= data);
- assert(data_start <= data_end);
-
- // Early exit if there was a decode error
- if (res)
- break;
-
- // Account for suboptimal termination by the encoder.
+ } else {
while (data_start < data_end) {
- uint8_t marker3 = read_marker(ctx->decrypt_cb, ctx->decrypt_state,
- data_start);
- if (marker3)
- break;
- data_start++;
+ res = decode_one_iter(ctx, &data_start, data_end,
+ (uint32_t)(data_end - data_start),
+ user_priv, deadline);
+ if (res != VPX_CODEC_OK)
+ return res;
}
+ }
- data_sz = (unsigned int)(data_end - data_start);
- } while (data_start < data_end);
-
- return res;
+ return VPX_CODEC_OK;
}
static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx,
diff --git a/vp9/vp9_iface_common.h b/vp9/vp9_iface_common.h
index 58256b2..d60883c 100644
--- a/vp9/vp9_iface_common.h
+++ b/vp9/vp9_iface_common.h
@@ -16,9 +16,11 @@
* the Y, U, and V planes, nor other alignment adjustments that
* might be representable by a YV12_BUFFER_CONFIG, so we just
* initialize all the fields.*/
- int bps = 12;
- if (yv12->uv_height == yv12->y_height) {
- if (yv12->uv_width == yv12->y_width) {
+ const int ss_x = yv12->uv_crop_width < yv12->y_crop_width;
+ const int ss_y = yv12->uv_crop_height < yv12->y_crop_height;
+ int bps;
+ if (!ss_y) {
+ if (!ss_x) {
img->fmt = VPX_IMG_FMT_I444;
bps = 24;
} else {
@@ -27,13 +29,14 @@
}
} else {
img->fmt = VPX_IMG_FMT_I420;
+ bps = 12;
}
img->w = yv12->y_stride;
img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3);
img->d_w = yv12->y_crop_width;
img->d_h = yv12->y_crop_height;
- img->x_chroma_shift = yv12->uv_width < yv12->y_width;
- img->y_chroma_shift = yv12->uv_height < yv12->y_height;
+ img->x_chroma_shift = ss_x;
+ img->y_chroma_shift = ss_y;
img->planes[VPX_PLANE_Y] = yv12->y_buffer;
img->planes[VPX_PLANE_U] = yv12->u_buffer;
img->planes[VPX_PLANE_V] = yv12->v_buffer;
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index c444fe4..bc9a478 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -96,12 +96,12 @@
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_impl_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_sad4d_intrin_avx2.c
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance_impl_sse2.asm
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/vp9_sad_sse3.asm
ifeq ($(CONFIG_USE_X86INC),yes)
+VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subtract_sse2.asm
@@ -112,6 +112,7 @@
ifeq ($(ARCH_X86_64),yes)
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3.asm
+VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3.asm
endif
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_sad_ssse3.asm
VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/vp9_sad_sse4.asm
diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c
index 76aacd2..b874be7 100644
--- a/vpx/src/svc_encodeframe.c
+++ b/vpx/src/svc_encodeframe.c
@@ -234,7 +234,8 @@
si->message_buffer[0] = '\0';
}
-static int svc_log(SvcContext *svc_ctx, int level, const char *fmt, ...) {
+static int svc_log(SvcContext *svc_ctx, SVC_LOG_LEVEL level,
+ const char *fmt, ...) {
char buf[512];
int retval = 0;
va_list ap;
@@ -1000,8 +1001,10 @@
(int)si->frame_size, (int)pts);
}
}
- ++si->frame_within_gop;
- ++si->encode_frame_count;
+ if (rawimg != NULL) {
+ ++si->frame_within_gop;
+ ++si->encode_frame_count;
+ }
return VPX_CODEC_OK;
}
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index 8944a26..67cbdb1 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -160,8 +160,12 @@
scale as used by the rc_*_quantizer config
parameters */
VP8E_SET_ARNR_MAXFRAMES, /**< control function to set the max number of frames blurred creating arf*/
- VP8E_SET_ARNR_STRENGTH, /**< control function to set the filter strength for the arf */
- VP8E_SET_ARNR_TYPE, /**< control function to set the type of filter to use for the arf*/
+ VP8E_SET_ARNR_STRENGTH, //!< control function to set the filter
+ //!< strength for the arf
+
+ /*!\deprecated control function to set the filter type to use for the arf */
+ VP8E_SET_ARNR_TYPE,
+
VP8E_SET_TUNING, /**< control function to set visual tuning */
/*!\brief control function to set constrained quality level
*
@@ -347,7 +351,7 @@
VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_MAXFRAMES, unsigned int)
VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_STRENGTH, unsigned int)
-VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_TYPE, unsigned int)
+VPX_CTRL_USE_TYPE_DEPRECATED(VP8E_SET_ARNR_TYPE, unsigned int)
VPX_CTRL_USE_TYPE(VP8E_SET_TUNING, int) /* vp8e_tuning */
VPX_CTRL_USE_TYPE(VP8E_SET_CQ_LEVEL, unsigned int)
diff --git a/vpx/vpx_image.h b/vpx/vpx_image.h
index 8d0f4ec..d45b003 100644
--- a/vpx/vpx_image.h
+++ b/vpx/vpx_image.h
@@ -34,7 +34,7 @@
#define VPX_IMG_FMT_PLANAR 0x100 /**< Image is a planar format */
#define VPX_IMG_FMT_UV_FLIP 0x200 /**< V plane precedes U plane in memory */
#define VPX_IMG_FMT_HAS_ALPHA 0x400 /**< Image has an alpha channel component */
-
+#define VPX_IMG_FMT_HIGH 0x800 /**< Image uses 16bit framebuffer */
/*!\brief List of supported image formats */
typedef enum vpx_img_fmt {
@@ -58,7 +58,10 @@
VPX_IMG_FMT_VPXI420 = VPX_IMG_FMT_PLANAR | 4,
VPX_IMG_FMT_I422 = VPX_IMG_FMT_PLANAR | 5,
VPX_IMG_FMT_I444 = VPX_IMG_FMT_PLANAR | 6,
- VPX_IMG_FMT_444A = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_HAS_ALPHA | 7
+ VPX_IMG_FMT_444A = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_HAS_ALPHA | 7,
+ VPX_IMG_FMT_I42016 = VPX_IMG_FMT_I420 | VPX_IMG_FMT_HIGH,
+ VPX_IMG_FMT_I42216 = VPX_IMG_FMT_I422 | VPX_IMG_FMT_HIGH,
+ VPX_IMG_FMT_I44416 = VPX_IMG_FMT_I444 | VPX_IMG_FMT_HIGH
} vpx_img_fmt_t; /**< alias for enum vpx_img_fmt */
#if !defined(VPX_CODEC_DISABLE_COMPAT) || !VPX_CODEC_DISABLE_COMPAT
diff --git a/vpx_ports/arm_cpudetect.c b/vpx_ports/arm_cpudetect.c
index 542ff67..fa0e030 100644
--- a/vpx_ports/arm_cpudetect.c
+++ b/vpx_ports/arm_cpudetect.c
@@ -12,6 +12,13 @@
#include <string.h>
#include "arm.h"
+#ifdef WINAPI_FAMILY
+#include <winapifamily.h>
+#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+#define getenv(x) NULL
+#endif
+#endif
+
static int arm_cpu_env_flags(int *flags) {
char *env;
env = getenv("VPX_SIMD_CAPS");
diff --git a/vpx_scale/vpx_scale.mk b/vpx_scale/vpx_scale.mk
index ded8e0b..95e7483 100644
--- a/vpx_scale/vpx_scale.mk
+++ b/vpx_scale/vpx_scale.mk
@@ -10,10 +10,10 @@
SCALE_SRCS-yes += vpx_scale_rtcd.pl
#neon
-SCALE_SRCS-$(HAVE_NEON) += arm/neon/vp8_vpxyv12_copyframe_func_neon$(ASM)
-SCALE_SRCS-$(HAVE_NEON) += arm/neon/vp8_vpxyv12_copysrcframe_func_neon$(ASM)
-SCALE_SRCS-$(HAVE_NEON) += arm/neon/vp8_vpxyv12_extendframeborders_neon$(ASM)
-SCALE_SRCS-$(HAVE_NEON) += arm/neon/yv12extend_arm.c
+SCALE_SRCS-$(HAVE_NEON_ASM) += arm/neon/vp8_vpxyv12_copyframe_func_neon$(ASM)
+SCALE_SRCS-$(HAVE_NEON_ASM) += arm/neon/vp8_vpxyv12_copysrcframe_func_neon$(ASM)
+SCALE_SRCS-$(HAVE_NEON_ASM) += arm/neon/vp8_vpxyv12_extendframeborders_neon$(ASM)
+SCALE_SRCS-$(HAVE_NEON_ASM) += arm/neon/yv12extend_arm.c
#mips(dspr2)
SCALE_SRCS-$(HAVE_DSPR2) += mips/dspr2/yv12extend_dspr2.c
diff --git a/vpx_scale/vpx_scale_rtcd.pl b/vpx_scale/vpx_scale_rtcd.pl
index 8c92570..2e3f1ff 100644
--- a/vpx_scale/vpx_scale_rtcd.pl
+++ b/vpx_scale/vpx_scale_rtcd.pl
@@ -17,10 +17,12 @@
}
add_proto qw/void vp8_yv12_extend_frame_borders/, "struct yv12_buffer_config *ybf";
-specialize qw/vp8_yv12_extend_frame_borders neon/;
+specialize qw/vp8_yv12_extend_frame_borders neon_asm/;
+$vp8_yv12_extend_frame_borders_neon_asm=vp8_yv12_extend_frame_borders_neon;
add_proto qw/void vp8_yv12_copy_frame/, "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc";
-specialize qw/vp8_yv12_copy_frame neon/;
+specialize qw/vp8_yv12_copy_frame neon_asm/;
+$vp8_yv12_copy_frame_neon_asm=vp8_yv12_copy_frame_neon;
add_proto qw/void vpx_yv12_copy_y/, "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc";
diff --git a/vpxdec.c b/vpxdec.c
index 6356961..ed37c70 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -33,7 +33,9 @@
#include "./md5_utils.h"
#include "./tools_common.h"
+#if CONFIG_WEBM_IO
#include "./webmdec.h"
+#endif
#include "./y4menc.h"
static const char *exec_name;
@@ -528,9 +530,11 @@
struct VpxDecInputContext input = {0};
struct VpxInputContext vpx_input_ctx = {0};
+#if CONFIG_WEBM_IO
struct WebmInputContext webm_ctx = {0};
- input.vpx_input_ctx = &vpx_input_ctx;
input.webm_ctx = &webm_ctx;
+#endif
+ input.vpx_input_ctx = &vpx_input_ctx;
/* Parse command line */
exec_name = argv_[0];
diff --git a/vpxenc.c b/vpxenc.c
index 3b8591e..96a7ab6 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -42,7 +42,9 @@
#include "./rate_hist.h"
#include "./vpxstats.h"
#include "./warnings.h"
+#if CONFIG_WEBM_IO
#include "./webmenc.h"
+#endif
#include "./y4minput.h"
/* Swallow warnings about unused results of fread/fwrite */
@@ -207,6 +209,7 @@
"Frame width");
static const arg_def_t height = ARG_DEF("h", "height", 1,
"Frame height");
+#if CONFIG_WEBM_IO
static const struct arg_enum_list stereo_mode_enum[] = {
{"mono", STEREO_FORMAT_MONO},
{"left-right", STEREO_FORMAT_LEFT_RIGHT},
@@ -217,6 +220,7 @@
};
static const arg_def_t stereo_mode = ARG_DEF_ENUM(NULL, "stereo-mode", 1,
"Stereo 3D video format", stereo_mode_enum);
+#endif
static const arg_def_t timebase = ARG_DEF(NULL, "timebase", 1,
"Output timestamp precision (fractional seconds)");
static const arg_def_t error_resilient = ARG_DEF(NULL, "error-resilient", 1,
@@ -226,7 +230,11 @@
static const arg_def_t *global_args[] = {
&use_yv12, &use_i420, &usage, &threads, &profile,
- &width, &height, &stereo_mode, &timebase, &framerate,
+ &width, &height,
+#if CONFIG_WEBM_IO
+ &stereo_mode,
+#endif
+ &timebase, &framerate,
&error_resilient,
&lag_in_frames, NULL
};
@@ -363,7 +371,7 @@
static const arg_def_t *vp9_args[] = {
&cpu_used, &auto_altref, &noise_sens, &sharpness, &static_thresh,
- &tile_cols, &tile_rows, &arnr_maxframes, &arnr_strength,
+ &tile_cols, &tile_rows, &arnr_maxframes, &arnr_strength, &arnr_type,
&tune_ssim, &cq_level, &max_intra_rate_pct, &lossless,
&frame_parallel_decoding, &aq_mode, &frame_periodic_boost,
NULL
@@ -372,7 +380,7 @@
VP8E_SET_CPUUSED, VP8E_SET_ENABLEAUTOALTREF,
VP8E_SET_NOISE_SENSITIVITY, VP8E_SET_SHARPNESS, VP8E_SET_STATIC_THRESHOLD,
VP9E_SET_TILE_COLUMNS, VP9E_SET_TILE_ROWS,
- VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH,
+ VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH, VP8E_SET_ARNR_TYPE,
VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, VP8E_SET_MAX_INTRA_BITRATE_PCT,
VP9E_SET_LOSSLESS, VP9E_SET_FRAME_PARALLEL_DECODING, VP9E_SET_AQ_MODE,
VP9E_SET_FRAME_PERIODIC_BOOST,
@@ -554,6 +562,11 @@
NELEMENTS(vp9_arg_ctrl_map))
#endif
+#if !CONFIG_WEBM_IO
+typedef int stereo_format_t;
+struct EbmlGlobal { int debug; };
+#endif
+
/* Per-stream configuration */
struct stream_config {
struct vpx_codec_enc_cfg cfg;
@@ -792,9 +805,9 @@
stream->config.cfg.g_h = 0;
/* Initialize remaining stream parameters */
- stream->config.stereo_fmt = STEREO_FORMAT_MONO;
stream->config.write_webm = 1;
#if CONFIG_WEBM_IO
+ stream->config.stereo_fmt = STEREO_FORMAT_MONO;
stream->ebml.last_pts_ns = -1;
stream->ebml.writer = NULL;
stream->ebml.segment = NULL;
@@ -869,8 +882,10 @@
config->cfg.g_w = arg_parse_uint(&arg);
} else if (arg_match(&arg, &height, argi)) {
config->cfg.g_h = arg_parse_uint(&arg);
+#if CONFIG_WEBM_IO
} else if (arg_match(&arg, &stereo_mode, argi)) {
config->stereo_fmt = arg_parse_enum_or_int(&arg);
+#endif
} else if (arg_match(&arg, &timebase, argi)) {
config->cfg.g_timebase = arg_parse_rational(&arg);
validate_positive_rational(arg.name, &config->cfg.g_timebase);
diff --git a/webmdec.c b/webmdec.c
deleted file mode 100644
index 93a8d9f..0000000
--- a/webmdec.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "./webmdec.h"
-
-#include <stdarg.h>
-
-#include "third_party/nestegg/include/nestegg/nestegg.h"
-
-static int nestegg_read_cb(void *buffer, size_t length, void *userdata) {
- FILE *f = userdata;
-
- if (fread(buffer, 1, length, f) < length) {
- if (ferror(f))
- return -1;
- if (feof(f))
- return 0;
- }
- return 1;
-}
-
-static int nestegg_seek_cb(int64_t offset, int whence, void *userdata) {
- switch (whence) {
- case NESTEGG_SEEK_SET:
- whence = SEEK_SET;
- break;
- case NESTEGG_SEEK_CUR:
- whence = SEEK_CUR;
- break;
- case NESTEGG_SEEK_END:
- whence = SEEK_END;
- break;
- };
- return fseek(userdata, (int32_t)offset, whence) ? -1 : 0;
-}
-
-static int64_t nestegg_tell_cb(void *userdata) {
- return ftell(userdata);
-}
-
-static void nestegg_log_cb(nestegg *context,
- unsigned int severity,
- char const *format, ...) {
- va_list ap;
- va_start(ap, format);
- vfprintf(stderr, format, ap);
- fprintf(stderr, "\n");
- va_end(ap);
-}
-
-int file_is_webm(struct WebmInputContext *webm_ctx,
- struct VpxInputContext *vpx_ctx) {
- uint32_t i, n;
- int track_type = -1;
- int codec_id;
-
- nestegg_io io = {nestegg_read_cb, nestegg_seek_cb, nestegg_tell_cb, 0};
- nestegg_video_params params;
-
- io.userdata = vpx_ctx->file;
- if (nestegg_init(&webm_ctx->nestegg_ctx, io, NULL, -1))
- goto fail;
-
- if (nestegg_track_count(webm_ctx->nestegg_ctx, &n))
- goto fail;
-
- for (i = 0; i < n; i++) {
- track_type = nestegg_track_type(webm_ctx->nestegg_ctx, i);
-
- if (track_type == NESTEGG_TRACK_VIDEO)
- break;
- else if (track_type < 0)
- goto fail;
- }
-
- codec_id = nestegg_track_codec_id(webm_ctx->nestegg_ctx, i);
- if (codec_id == NESTEGG_CODEC_VP8) {
- vpx_ctx->fourcc = VP8_FOURCC;
- } else if (codec_id == NESTEGG_CODEC_VP9) {
- vpx_ctx->fourcc = VP9_FOURCC;
- } else {
- fprintf(stderr, "Not VPx video, quitting.\n");
- goto fail;
- }
-
- webm_ctx->video_track = i;
-
- if (nestegg_track_video_params(webm_ctx->nestegg_ctx, i, ¶ms))
- goto fail;
-
- vpx_ctx->framerate.denominator = 0;
- vpx_ctx->framerate.numerator = 0;
- vpx_ctx->width = params.width;
- vpx_ctx->height = params.height;
-
- return 1;
-
- fail:
- webm_ctx->nestegg_ctx = NULL;
- rewind(vpx_ctx->file);
-
- return 0;
-}
-
-int webm_read_frame(struct WebmInputContext *webm_ctx,
- uint8_t **buffer,
- size_t *bytes_in_buffer,
- size_t *buffer_size) {
- if (webm_ctx->chunk >= webm_ctx->chunks) {
- uint32_t track;
- int status;
-
- do {
- /* End of this packet, get another. */
- if (webm_ctx->pkt) {
- nestegg_free_packet(webm_ctx->pkt);
- webm_ctx->pkt = NULL;
- }
-
- status = nestegg_read_packet(webm_ctx->nestegg_ctx, &webm_ctx->pkt);
- if (status <= 0)
- return status ? status : 1;
-
- if (nestegg_packet_track(webm_ctx->pkt, &track))
- return -1;
- } while (track != webm_ctx->video_track);
-
- if (nestegg_packet_count(webm_ctx->pkt, &webm_ctx->chunks))
- return -1;
-
- webm_ctx->chunk = 0;
- }
-
- if (nestegg_packet_data(webm_ctx->pkt, webm_ctx->chunk,
- buffer, bytes_in_buffer)) {
- return -1;
- }
-
- webm_ctx->chunk++;
- return 0;
-}
-
-int webm_guess_framerate(struct WebmInputContext *webm_ctx,
- struct VpxInputContext *vpx_ctx) {
- uint32_t i;
- uint64_t tstamp = 0;
-
- /* Check to see if we can seek before we parse any data. */
- if (nestegg_track_seek(webm_ctx->nestegg_ctx, webm_ctx->video_track, 0)) {
- fprintf(stderr, "Failed to guess framerate (no Cues), set to 30fps.\n");
- vpx_ctx->framerate.numerator = 30;
- vpx_ctx->framerate.denominator = 1;
- return 0;
- }
-
- /* Guess the framerate. Read up to 1 second, or 50 video packets,
- * whichever comes first.
- */
- for (i = 0; tstamp < 1000000000 && i < 50;) {
- nestegg_packet *pkt;
- uint32_t track;
-
- if (nestegg_read_packet(webm_ctx->nestegg_ctx, &pkt) <= 0)
- break;
-
- nestegg_packet_track(pkt, &track);
- if (track == webm_ctx->video_track) {
- nestegg_packet_tstamp(pkt, &tstamp);
- ++i;
- }
-
- nestegg_free_packet(pkt);
- }
-
- if (nestegg_track_seek(webm_ctx->nestegg_ctx, webm_ctx->video_track, 0))
- goto fail;
-
- vpx_ctx->framerate.numerator = (i - 1) * 1000000;
- vpx_ctx->framerate.denominator = (int)(tstamp / 1000);
- return 0;
-
- fail:
- nestegg_destroy(webm_ctx->nestegg_ctx);
- webm_ctx->nestegg_ctx = NULL;
- rewind(vpx_ctx->file);
- return 1;
-}
-
-void webm_free(struct WebmInputContext *webm_ctx) {
- if (webm_ctx && webm_ctx->nestegg_ctx) {
- if (webm_ctx->pkt)
- nestegg_free_packet(webm_ctx->pkt);
- nestegg_destroy(webm_ctx->nestegg_ctx);
- }
-}
diff --git a/webmdec.cc b/webmdec.cc
new file mode 100644
index 0000000..4383e8e
--- /dev/null
+++ b/webmdec.cc
@@ -0,0 +1,219 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./webmdec.h"
+
+#include <cstring>
+#include <cstdio>
+
+#include "third_party/libwebm/mkvparser.hpp"
+#include "third_party/libwebm/mkvreader.hpp"
+
+namespace {
+
+void reset(struct WebmInputContext *const webm_ctx) {
+ if (webm_ctx->reader != NULL) {
+ mkvparser::MkvReader *const reader =
+ reinterpret_cast<mkvparser::MkvReader*>(webm_ctx->reader);
+ delete reader;
+ }
+ if (webm_ctx->segment != NULL) {
+ mkvparser::Segment *const segment =
+ reinterpret_cast<mkvparser::Segment*>(webm_ctx->segment);
+ delete segment;
+ }
+ if (webm_ctx->buffer != NULL) {
+ delete[] webm_ctx->buffer;
+ }
+ webm_ctx->reader = NULL;
+ webm_ctx->segment = NULL;
+ webm_ctx->buffer = NULL;
+ webm_ctx->cluster = NULL;
+ webm_ctx->block_entry = NULL;
+ webm_ctx->block = NULL;
+ webm_ctx->block_frame_index = 0;
+ webm_ctx->video_track_index = 0;
+ webm_ctx->timestamp_ns = 0;
+}
+
+void get_first_cluster(struct WebmInputContext *const webm_ctx) {
+ mkvparser::Segment *const segment =
+ reinterpret_cast<mkvparser::Segment*>(webm_ctx->segment);
+ const mkvparser::Cluster *const cluster = segment->GetFirst();
+ webm_ctx->cluster = cluster;
+}
+
+void rewind_and_reset(struct WebmInputContext *const webm_ctx,
+ struct VpxInputContext *const vpx_ctx) {
+ rewind(vpx_ctx->file);
+ reset(webm_ctx);
+}
+
+} // namespace
+
+int file_is_webm(struct WebmInputContext *webm_ctx,
+ struct VpxInputContext *vpx_ctx) {
+ mkvparser::MkvReader *const reader = new mkvparser::MkvReader(vpx_ctx->file);
+ webm_ctx->reader = reader;
+
+ mkvparser::EBMLHeader header;
+ long long pos = 0;
+ if (header.Parse(reader, pos) < 0) {
+ rewind_and_reset(webm_ctx, vpx_ctx);
+ return 0;
+ }
+
+ mkvparser::Segment* segment;
+ if (mkvparser::Segment::CreateInstance(reader, pos, segment)) {
+ rewind_and_reset(webm_ctx, vpx_ctx);
+ return 0;
+ }
+ webm_ctx->segment = segment;
+ if (segment->Load() < 0) {
+ rewind_and_reset(webm_ctx, vpx_ctx);
+ return 0;
+ }
+
+ const mkvparser::Tracks *const tracks = segment->GetTracks();
+ const mkvparser::VideoTrack* video_track = NULL;
+ for (unsigned long i = 0; i < tracks->GetTracksCount(); ++i) {
+ const mkvparser::Track* const track = tracks->GetTrackByIndex(i);
+ if (track->GetType() == mkvparser::Track::kVideo) {
+ video_track = static_cast<const mkvparser::VideoTrack*>(track);
+ webm_ctx->video_track_index = track->GetNumber();
+ break;
+ }
+ }
+
+ if (video_track == NULL) {
+ rewind_and_reset(webm_ctx, vpx_ctx);
+ return 0;
+ }
+
+ if (!strncmp(video_track->GetCodecId(), "V_VP8", 5)) {
+ vpx_ctx->fourcc = VP8_FOURCC;
+ } else if (!strncmp(video_track->GetCodecId(), "V_VP9", 5)) {
+ vpx_ctx->fourcc = VP9_FOURCC;
+ } else {
+ rewind_and_reset(webm_ctx, vpx_ctx);
+ return 0;
+ }
+
+ vpx_ctx->framerate.denominator = 0;
+ vpx_ctx->framerate.numerator = 0;
+ vpx_ctx->width = static_cast<uint32_t>(video_track->GetWidth());
+ vpx_ctx->height = static_cast<uint32_t>(video_track->GetHeight());
+
+ get_first_cluster(webm_ctx);
+
+ return 1;
+}
+
+int webm_read_frame(struct WebmInputContext *webm_ctx,
+ uint8_t **buffer,
+ size_t *bytes_in_buffer,
+ size_t *buffer_size) {
+ mkvparser::Segment *const segment =
+ reinterpret_cast<mkvparser::Segment*>(webm_ctx->segment);
+ const mkvparser::Cluster* cluster =
+ reinterpret_cast<const mkvparser::Cluster*>(webm_ctx->cluster);
+ const mkvparser::Block *block =
+ reinterpret_cast<const mkvparser::Block*>(webm_ctx->block);
+ const mkvparser::BlockEntry *block_entry =
+ reinterpret_cast<const mkvparser::BlockEntry*>(webm_ctx->block_entry);
+ bool block_entry_eos = false;
+ do {
+ long status = 0;
+ bool get_new_block = false;
+ if (block_entry == NULL && !block_entry_eos) {
+ status = cluster->GetFirst(block_entry);
+ get_new_block = true;
+ } else if (block_entry_eos || block_entry->EOS()) {
+ cluster = segment->GetNext(cluster);
+ if (cluster == NULL || cluster->EOS()) {
+ *bytes_in_buffer = 0;
+ return 1;
+ }
+ status = cluster->GetFirst(block_entry);
+ block_entry_eos = false;
+ get_new_block = true;
+ } else if (block == NULL ||
+ webm_ctx->block_frame_index == block->GetFrameCount() ||
+ block->GetTrackNumber() != webm_ctx->video_track_index) {
+ status = cluster->GetNext(block_entry, block_entry);
+ if (block_entry == NULL || block_entry->EOS()) {
+ block_entry_eos = true;
+ continue;
+ }
+ get_new_block = true;
+ }
+ if (status) {
+ return -1;
+ }
+ if (get_new_block) {
+ block = block_entry->GetBlock();
+ webm_ctx->block_frame_index = 0;
+ }
+ } while (block->GetTrackNumber() != webm_ctx->video_track_index ||
+ block_entry_eos);
+
+ webm_ctx->cluster = cluster;
+ webm_ctx->block_entry = block_entry;
+ webm_ctx->block = block;
+
+ const mkvparser::Block::Frame& frame =
+ block->GetFrame(webm_ctx->block_frame_index);
+ ++webm_ctx->block_frame_index;
+ if (frame.len > static_cast<long>(*buffer_size)) {
+ delete[] *buffer;
+ *buffer = new uint8_t[frame.len];
+ if (*buffer == NULL) {
+ return -1;
+ }
+ *buffer_size = frame.len;
+ webm_ctx->buffer = *buffer;
+ }
+ *bytes_in_buffer = frame.len;
+ webm_ctx->timestamp_ns = block->GetTime(cluster);
+
+ mkvparser::MkvReader *const reader =
+ reinterpret_cast<mkvparser::MkvReader*>(webm_ctx->reader);
+ return frame.Read(reader, *buffer) ? -1 : 0;
+}
+
+int webm_guess_framerate(struct WebmInputContext *webm_ctx,
+ struct VpxInputContext *vpx_ctx) {
+ uint32_t i = 0;
+ uint8_t *buffer = NULL;
+ size_t bytes_in_buffer = 0;
+ size_t buffer_size = 0;
+ while (webm_ctx->timestamp_ns < 1000000000 && i < 50) {
+ if (webm_read_frame(webm_ctx, &buffer, &bytes_in_buffer, &buffer_size)) {
+ break;
+ }
+ ++i;
+ }
+ vpx_ctx->framerate.numerator = (i - 1) * 1000000;
+ vpx_ctx->framerate.denominator =
+ static_cast<int>(webm_ctx->timestamp_ns / 1000);
+ delete[] buffer;
+
+ get_first_cluster(webm_ctx);
+ webm_ctx->block = NULL;
+ webm_ctx->block_entry = NULL;
+ webm_ctx->block_frame_index = 0;
+ webm_ctx->timestamp_ns = 0;
+
+ return 0;
+}
+
+void webm_free(struct WebmInputContext *webm_ctx) {
+ reset(webm_ctx);
+}
diff --git a/webmdec.h b/webmdec.h
index 108c6ad..29b815d 100644
--- a/webmdec.h
+++ b/webmdec.h
@@ -16,34 +16,53 @@
extern "C" {
#endif
-struct nestegg;
-struct nestegg_packet;
struct VpxInputContext;
struct WebmInputContext {
- uint32_t chunk;
- uint32_t chunks;
- uint32_t video_track;
- struct nestegg *nestegg_ctx;
- struct nestegg_packet *pkt;
+ void *reader;
+ void *segment;
+ uint8_t *buffer;
+ const void *cluster;
+ const void *block_entry;
+ const void *block;
+ int block_frame_index;
+ int video_track_index;
+ uint64_t timestamp_ns;
};
+// Checks if the input is a WebM file. If so, initializes WebMInputContext so
+// that webm_read_frame can be called to retrieve a video frame.
+// Returns 1 on success and 0 on failure or input is not WebM file.
+// TODO(vigneshv): Refactor this function into two smaller functions specific
+// to their task.
int file_is_webm(struct WebmInputContext *webm_ctx,
struct VpxInputContext *vpx_ctx);
-/* Reads a WebM video frame. Return values:
- * 0 - Success
- * 1 - End of File
- * -1 - Error
- */
+// Reads a WebM Video Frame. Memory for the buffer is created, owned and managed
+// by this function. For the first call, |buffer| should be NULL and
+// |*bytes_in_buffer| should be 0. Once all the frames are read and used,
+// webm_free() should be called, otherwise there will be a leak.
+// Parameters:
+// webm_ctx - WebmInputContext object
+// buffer - pointer where the frame data will be filled.
+// bytes_in_buffer - pointer to buffer size.
+// buffer_size - unused TODO(vigneshv): remove this
+// Return values:
+// 0 - Success
+// 1 - End of Stream
+// -1 - Error
+// TODO(vigneshv): Make the return values consistent across all functions in
+// this file.
int webm_read_frame(struct WebmInputContext *webm_ctx,
uint8_t **buffer,
size_t *bytes_in_buffer,
size_t *buffer_size);
+// Guesses the frame rate of the input file based on the container timestamps.
int webm_guess_framerate(struct WebmInputContext *webm_ctx,
struct VpxInputContext *vpx_ctx);
+// Resets the WebMInputContext.
void webm_free(struct WebmInputContext *webm_ctx);
#ifdef __cplusplus