Merge "Use reconstructed pixels for intra prediction"
diff --git a/build/arm-msvs/obj_int_extract.bat b/build/arm-msvs/obj_int_extract.bat
deleted file mode 100644
index c0987bc..0000000
--- a/build/arm-msvs/obj_int_extract.bat
+++ /dev/null
@@ -1,18 +0,0 @@
-REM Copyright (c) 2013 The WebM project authors. All Rights Reserved.
-REM
-REM Use of this source code is governed by a BSD-style license
-REM that can be found in the LICENSE file in the root of the source
-REM tree. An additional intellectual property rights grant can be found
-REM in the file PATENTS. All contributing project authors may
-REM be found in the AUTHORS file in the root of the source tree.
-echo on
-
-REM Arguments:
-REM %1 - Relative path to the directory containing the vp8 and vpx_scale
-REM source directories.
-REM %2 - Path to obj_int_extract.exe.
-cl /I. /I%1 /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%~1/vp8/encoder/vp8_asm_enc_offsets.c"
-%2\obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"
-
-cl /I. /I%1 /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%~1/vpx_scale/vpx_scale_asm_offsets.c"
-%2\obj_int_extract.exe rvds "vpx_scale_asm_offsets.obj" > "vpx_scale_asm_offsets.asm"
diff --git a/build/make/Android.mk b/build/make/Android.mk
index 816334e..d897b44 100644
--- a/build/make/Android.mk
+++ b/build/make/Android.mk
@@ -43,7 +43,7 @@
# will remove any NEON dependency.
# To change to building armeabi, run ./libvpx/configure again, but with
-# --target=arm5te-android-gcc and modify the Application.mk file to
+# --target=armv6-android-gcc and modify the Application.mk file to
# set APP_ABI := armeabi
#
# Running ndk-build will build libvpx and include it in your project.
@@ -60,7 +60,7 @@
include $(CONFIG_DIR)libs-armv7-android-gcc.mk
LOCAL_ARM_MODE := arm
else ifeq ($(TARGET_ARCH_ABI),armeabi)
- include $(CONFIG_DIR)libs-armv5te-android-gcc.mk
+ include $(CONFIG_DIR)libs-armv6-android-gcc.mk
LOCAL_ARM_MODE := arm
else ifeq ($(TARGET_ARCH_ABI),arm64-v8a)
include $(CONFIG_DIR)libs-armv8-android-gcc.mk
@@ -91,51 +91,8 @@
# like x86inc.asm and x86_abi_support.asm
LOCAL_ASMFLAGS := -I$(LIBVPX_PATH)
-# -----------------------------------------------------------------------------
-# Template : asm_offsets_template
-# Arguments : 1: assembly offsets file to be created
-# 2: c file to base assembly offsets on
-# Returns : None
-# Usage : $(eval $(call asm_offsets_template,<asmfile>, <srcfile>
-# Rationale : Create offsets at compile time using for structures that are
-# defined in c, but used in assembly functions.
-# -----------------------------------------------------------------------------
-define asm_offsets_template
-
-_SRC:=$(2)
-_OBJ:=$(ASM_CNV_PATH)/$$(notdir $(2)).S
-
-_FLAGS = $$($$(my)CFLAGS) \
- $$(call get-src-file-target-cflags,$(2)) \
- $$(call host-c-includes,$$(LOCAL_C_INCLUDES) $$(CONFIG_DIR)) \
- $$(LOCAL_CFLAGS) \
- $$(NDK_APP_CFLAGS) \
- $$(call host-c-includes,$$($(my)C_INCLUDES)) \
- -DINLINE_ASM \
- -S \
-
-_TEXT = "Compile $$(call get-src-file-text,$(2))"
-_CC = $$(TARGET_CC)
-
-$$(eval $$(call ev-build-file))
-
-$(1) : $$(_OBJ) $(2)
- @mkdir -p $$(dir $$@)
- @grep $(OFFSET_PATTERN) $$< | tr -d '\#' | $(CONFIG_DIR)$(ASM_CONVERSION) > $$@
-endef
-
-# Use ads2gas script to convert from RVCT format to GAS format. This
-# puts the processed file under $(ASM_CNV_PATH). Local clean rule
-# to handle removing these
-ifeq ($(CONFIG_VP8_ENCODER), yes)
- ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vp8_asm_enc_offsets.asm
-endif
-ifeq ($(HAVE_NEON_ASM), yes)
- ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vpx_scale_asm_offsets.asm
-endif
-
.PRECIOUS: %.asm.s
-$(ASM_CNV_PATH)/libvpx/%.asm.s: $(LIBVPX_PATH)/%.asm $(ASM_CNV_OFFSETS_DEPEND)
+$(ASM_CNV_PATH)/libvpx/%.asm.s: $(LIBVPX_PATH)/%.asm
@mkdir -p $(dir $@)
@$(CONFIG_DIR)$(ASM_CONVERSION) <$< > $@
@@ -224,24 +181,11 @@
clean:
@echo "Clean: ads2gas files [$(TARGET_ARCH_ABI)]"
@$(RM) $(CODEC_SRCS_ASM_ADS2GAS) $(CODEC_SRCS_ASM_NEON_ADS2GAS)
- @$(RM) $(patsubst %.asm, %.*, $(ASM_CNV_OFFSETS_DEPEND))
@$(RM) -r $(ASM_CNV_PATH)
@$(RM) $(CLEAN-OBJS)
include $(BUILD_SHARED_LIBRARY)
-ifeq ($(HAVE_NEON), yes)
- $(eval $(call asm_offsets_template,\
- $(ASM_CNV_PATH)/vpx_scale_asm_offsets.asm, \
- $(LIBVPX_PATH)/vpx_scale/vpx_scale_asm_offsets.c))
-endif
-
-ifeq ($(CONFIG_VP8_ENCODER), yes)
- $(eval $(call asm_offsets_template,\
- $(ASM_CNV_PATH)/vp8_asm_enc_offsets.asm, \
- $(LIBVPX_PATH)/vp8/encoder/vp8_asm_enc_offsets.c))
-endif
-
ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
$(call import-module,cpufeatures)
endif
diff --git a/build/make/Makefile b/build/make/Makefile
index ed90397..da5721a 100644
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -217,14 +217,6 @@
endif
#
-# Rule to extract assembly constants from C sources
-#
-obj_int_extract: build/make/obj_int_extract.c
- $(if $(quiet),@echo " [HOSTCC] $@")
- $(qexec)$(HOSTCC) -I. -I$(SRC_PATH_BARE) -o $@ $<
-CLEAN-OBJS += obj_int_extract
-
-#
# Utility functions
#
pairmap=$(if $(strip $(2)),\
@@ -424,11 +416,7 @@
DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_sln.sh
DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_vcxproj.sh
DIST-SRCS-$(CONFIG_MSVS) += build/make/msvs_common.sh
- DIST-SRCS-$(CONFIG_MSVS) += build/x86-msvs/obj_int_extract.bat
- DIST-SRCS-$(CONFIG_MSVS) += build/arm-msvs/obj_int_extract.bat
DIST-SRCS-$(CONFIG_RVCT) += build/make/armlink_adapter.sh
- # Include obj_int_extract if we use offsets from *_asm_*_offsets
- DIST-SRCS-$(ARCH_ARM)$(ARCH_X86)$(ARCH_X86_64) += build/make/obj_int_extract.c
DIST-SRCS-$(ARCH_ARM) += build/make/ads2gas.pl
DIST-SRCS-$(ARCH_ARM) += build/make/ads2gas_apple.pl
DIST-SRCS-$(ARCH_ARM) += build/make/ads2armasm_ms.pl
diff --git a/build/make/configure.sh b/build/make/configure.sh
index 56e9f44..6763688 100644
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -822,18 +822,12 @@
soft_enable neon
soft_enable neon_asm
soft_enable media
- soft_enable edsp
soft_enable fast_unaligned
;;
armv6)
soft_enable media
- soft_enable edsp
soft_enable fast_unaligned
;;
- armv5te)
- soft_enable edsp
- disable_feature fast_unaligned
- ;;
esac
asm_conversion_cmd="cat"
diff --git a/build/make/gen_msvs_proj.sh b/build/make/gen_msvs_proj.sh
index 7907225..dcce782 100755
--- a/build/make/gen_msvs_proj.sh
+++ b/build/make/gen_msvs_proj.sh
@@ -295,23 +295,8 @@
case "$target" in
x86*)
case "$name" in
- obj_int_extract)
- tag Tool \
- Name="VCCLCompilerTool" \
- Optimization="0" \
- AdditionalIncludeDirectories="$incs" \
- PreprocessorDefinitions="WIN32;DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE" \
- RuntimeLibrary="$debug_runtime" \
- WarningLevel="3" \
- DebugInformationFormat="1" \
- $warn_64bit \
- ;;
vpx)
tag Tool \
- Name="VCPreBuildEventTool" \
- CommandLine="call obj_int_extract.bat "$src_path_bare" $plat_no_ws\\\$(ConfigurationName)" \
-
- tag Tool \
Name="VCCLCompilerTool" \
Optimization="0" \
AdditionalIncludeDirectories="$incs" \
@@ -347,11 +332,6 @@
case "$target" in
x86*)
case "$name" in
- obj_int_extract)
- tag Tool \
- Name="VCLinkerTool" \
- GenerateDebugInformation="true" \
- ;;
*)
tag Tool \
Name="VCLinkerTool" \
@@ -400,25 +380,8 @@
case "$target" in
x86*)
case "$name" in
- obj_int_extract)
- tag Tool \
- Name="VCCLCompilerTool" \
- Optimization="2" \
- FavorSizeorSpeed="1" \
- AdditionalIncludeDirectories="$incs" \
- PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE" \
- RuntimeLibrary="$release_runtime" \
- UsePrecompiledHeader="0" \
- WarningLevel="3" \
- DebugInformationFormat="0" \
- $warn_64bit \
- ;;
vpx)
tag Tool \
- Name="VCPreBuildEventTool" \
- CommandLine="call obj_int_extract.bat "$src_path_bare" $plat_no_ws\\\$(ConfigurationName)" \
-
- tag Tool \
Name="VCCLCompilerTool" \
Optimization="2" \
FavorSizeorSpeed="1" \
@@ -456,11 +419,6 @@
case "$target" in
x86*)
case "$name" in
- obj_int_extract)
- tag Tool \
- Name="VCLinkerTool" \
- GenerateDebugInformation="true" \
- ;;
*)
tag Tool \
Name="VCLinkerTool" \
diff --git a/build/make/gen_msvs_vcxproj.sh b/build/make/gen_msvs_vcxproj.sh
index 56b9a3b..643ebd6 100755
--- a/build/make/gen_msvs_vcxproj.sh
+++ b/build/make/gen_msvs_vcxproj.sh
@@ -262,15 +262,9 @@
asm_Release_cmdline="yasm -Xvc -f win32 ${yasmincs} "%(FullPath)""
;;
arm*)
+ platforms[0]="ARM"
asm_Debug_cmdline="armasm -nologo "%(FullPath)""
asm_Release_cmdline="armasm -nologo "%(FullPath)""
- if [ "$name" = "obj_int_extract" ]; then
- # We don't want to build this tool for the target architecture,
- # but for an architecture we can run locally during the build.
- platforms[0]="Win32"
- else
- platforms[0]="ARM"
- fi
;;
*) die "Unsupported target $target!"
;;
@@ -400,23 +394,13 @@
if [ "$hostplat" == "ARM" ]; then
hostplat=Win32
fi
- open_tag PreBuildEvent
- tag_content Command "call obj_int_extract.bat "$src_path_bare" $hostplat\\\$(Configuration)"
- close_tag PreBuildEvent
fi
open_tag ClCompile
if [ "$config" = "Debug" ]; then
opt=Disabled
runtime=$debug_runtime
curlibs=$debug_libs
- case "$name" in
- obj_int_extract)
- debug=DEBUG
- ;;
- *)
- debug=_DEBUG
- ;;
- esac
+ debug=_DEBUG
else
opt=MaxSpeed
runtime=$release_runtime
@@ -424,14 +408,7 @@
tag_content FavorSizeOrSpeed Speed
debug=NDEBUG
fi
- case "$name" in
- obj_int_extract)
- extradefines=";_CONSOLE"
- ;;
- *)
- extradefines=";$defines"
- ;;
- esac
+ extradefines=";$defines"
tag_content Optimization $opt
tag_content AdditionalIncludeDirectories "$incs;%(AdditionalIncludeDirectories)"
tag_content PreprocessorDefinitions "WIN32;$debug;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE$extradefines;%(PreprocessorDefinitions)"
@@ -451,10 +428,6 @@
case "$proj_kind" in
exe)
open_tag Link
- if [ "$name" != "obj_int_extract" ]; then
- tag_content AdditionalDependencies "$curlibs;%(AdditionalDependencies)"
- tag_content AdditionalLibraryDirectories "$libdirs;%(AdditionalLibraryDirectories)"
- fi
tag_content GenerateDebugInformation true
# Console is the default normally, but if
# AppContainerApplication is set, we need to override it.
diff --git a/build/make/rtcd.pl b/build/make/rtcd.pl
index 0872414..bfc91a5 100755
--- a/build/make/rtcd.pl
+++ b/build/make/rtcd.pl
@@ -379,14 +379,11 @@
}
close CONFIG_FILE;
mips;
-} elsif ($opts{arch} eq 'armv5te') {
- @ALL_ARCHS = filter(qw/edsp/);
- arm;
} elsif ($opts{arch} eq 'armv6') {
- @ALL_ARCHS = filter(qw/edsp media/);
+ @ALL_ARCHS = filter(qw/media/);
arm;
} elsif ($opts{arch} eq 'armv7') {
- @ALL_ARCHS = filter(qw/edsp media neon_asm neon/);
+ @ALL_ARCHS = filter(qw/media neon_asm neon/);
@REQUIRES = filter(keys %required ? keys %required : qw/media/);
&require(@REQUIRES);
arm;
diff --git a/build/x86-msvs/obj_int_extract.bat b/build/x86-msvs/obj_int_extract.bat
deleted file mode 100644
index dfa3b90..0000000
--- a/build/x86-msvs/obj_int_extract.bat
+++ /dev/null
@@ -1,15 +0,0 @@
-REM Copyright (c) 2011 The WebM project authors. All Rights Reserved.
-REM
-REM Use of this source code is governed by a BSD-style license
-REM that can be found in the LICENSE file in the root of the source
-REM tree. An additional intellectual property rights grant can be found
-REM in the file PATENTS. All contributing project authors may
-REM be found in the AUTHORS file in the root of the source tree.
-echo on
-
-REM Arguments:
-REM %1 - Relative path to the directory containing the vp8 source directory.
-REM %2 - Path to obj_int_extract.exe.
-cl /I. /I%1 /nologo /c "%~1/vp8/encoder/vp8_asm_enc_offsets.c"
-%2\obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"
-
diff --git a/configure b/configure
index 3ed976c..b374fa0 100755
--- a/configure
+++ b/configure
@@ -56,6 +56,8 @@
${toggle_postproc_visualizer} macro block / block level visualizers
${toggle_multi_res_encoding} enable multiple-resolution encoding
${toggle_temporal_denoising} enable temporal denoising and disable the spatial denoiser
+ ${toggle_vp9_temporal_denoising}
+ enable vp9 temporal denoising
${toggle_webm_io} enable input from and output to WebM container
${toggle_libyuv} enable libyuv
@@ -279,7 +281,6 @@
"
EXPERIMENT_LIST="
spatial_svc
- vp9_temporal_denoising
fp_mb_stats
emulate_hardware
"
@@ -334,6 +335,7 @@
encode_perf_tests
multi_res_encoding
temporal_denoising
+ vp9_temporal_denoising
coefficient_range_checking
vp9_highbitdepth
experimental
@@ -393,6 +395,7 @@
encode_perf_tests
multi_res_encoding
temporal_denoising
+ vp9_temporal_denoising
coefficient_range_checking
vp9_highbitdepth
experimental
@@ -451,8 +454,6 @@
enabled child || write_common_config_banner
enabled universal || write_common_target_config_h ${BUILD_PFX}vpx_config.h
- # TODO: add host tools target (obj_int_extract, etc)
-
# For fat binaries, call configure recursively to configure for each
# binary architecture to be included.
if enabled universal; then
diff --git a/examples/vp9_spatial_svc_encoder.c b/examples/vp9_spatial_svc_encoder.c
index a4a9edb..a193c67 100644
--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c
@@ -125,7 +125,7 @@
}
static void parse_command_line(int argc, const char **argv_,
- AppInput *app_input, SvcContext *svc_ctx,
+ AppInput *app_input, SvcContext_t *svc_ctx,
vpx_codec_enc_cfg_t *enc_cfg) {
struct arg arg = {0};
char **argv = NULL;
@@ -322,7 +322,7 @@
VpxVideoInfo info = {0};
vpx_codec_ctx_t codec;
vpx_codec_enc_cfg_t enc_cfg;
- SvcContext svc_ctx;
+ SvcContext_t svc_ctx;
uint32_t i;
uint32_t frame_cnt = 0;
vpx_image_t raw;
diff --git a/libs.mk b/libs.mk
index f9f2d80..38b4b11 100644
--- a/libs.mk
+++ b/libs.mk
@@ -18,32 +18,6 @@
endif
#
-# Calculate platform- and compiler-specific offsets for hand coded assembly
-#
-ifeq ($(filter icc gcc,$(TGT_CC)), $(TGT_CC))
-OFFSET_PATTERN:='^[a-zA-Z0-9_]* EQU'
-define asm_offsets_template
-$$(BUILD_PFX)$(1): $$(BUILD_PFX)$(2).S
- @echo " [CREATE] $$@"
- $$(qexec)LC_ALL=C grep $$(OFFSET_PATTERN) $$< | tr -d '$$$$\#' $$(ADS2GAS) > $$@
-$$(BUILD_PFX)$(2).S: $(2)
-CLEAN-OBJS += $$(BUILD_PFX)$(1) $(2).S
-endef
-else
- ifeq ($(filter rvct,$(TGT_CC)), $(TGT_CC))
-define asm_offsets_template
-$$(BUILD_PFX)$(1): obj_int_extract
-$$(BUILD_PFX)$(1): $$(BUILD_PFX)$(2).o
- @echo " [CREATE] $$@"
- $$(qexec)./obj_int_extract rvds $$< $$(ADS2GAS) > $$@
-OBJS-yes += $$(BUILD_PFX)$(2).o
-CLEAN-OBJS += $$(BUILD_PFX)$(1)
-$$(filter %$$(ASM).o,$$(OBJS-yes)): $$(BUILD_PFX)$(1)
-endef
-endif # rvct
-endif # !gcc
-
-#
# Rule to generate runtime cpu detection files
#
define rtcd_h_template
@@ -212,26 +186,6 @@
ifeq ($(CONFIG_EXTERNAL_BUILD),yes)
ifeq ($(CONFIG_MSVS),yes)
-obj_int_extract.bat: $(SRC_PATH_BARE)/build/$(MSVS_ARCH_DIR)/obj_int_extract.bat
- @cp $^ $@
-
-obj_int_extract.$(VCPROJ_SFX): obj_int_extract.bat
-obj_int_extract.$(VCPROJ_SFX): $(SRC_PATH_BARE)/build/make/obj_int_extract.c
- @echo " [CREATE] $@"
- $(qexec)$(GEN_VCPROJ) \
- --exe \
- --target=$(TOOLCHAIN) \
- --name=obj_int_extract \
- --ver=$(CONFIG_VS_VERSION) \
- --proj-guid=E1360C65-D375-4335-8057-7ED99CC3F9B2 \
- --src-path-bare="$(SRC_PATH_BARE)" \
- $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
- --out=$@ $^ \
- -I. \
- -I"$(SRC_PATH_BARE)" \
-
-PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.$(VCPROJ_SFX)
-
vpx.def: $(call enabled,CODEC_EXPORTS)
@echo " [CREATE] $@"
$(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_def.sh\
@@ -246,7 +200,7 @@
vpx_config.asm \
vpx_ports/x86_abi_support.asm \
-vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def obj_int_extract.$(VCPROJ_SFX)
+vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def
@echo " [CREATE] $@"
$(qexec)$(GEN_VCPROJ) \
$(if $(CONFIG_SHARED),--dll,--lib) \
@@ -377,7 +331,7 @@
endif
#
-# Add assembler dependencies for configuration and offsets
+# Add assembler dependencies for configuration.
#
$(filter %.s.o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
diff --git a/solution.mk b/solution.mk
index 2c8d29a..145adc0 100644
--- a/solution.mk
+++ b/solution.mk
@@ -9,7 +9,7 @@
##
# libvpx reverse dependencies (targets that depend on libvpx)
-VPX_NONDEPS=$(addsuffix .$(VCPROJ_SFX),vpx gtest obj_int_extract)
+VPX_NONDEPS=$(addsuffix .$(VCPROJ_SFX),vpx gtest)
VPX_RDEPS=$(foreach vcp,\
$(filter-out $(VPX_NONDEPS),$^), --dep=$(vcp:.$(VCPROJ_SFX)=):vpx)
@@ -17,7 +17,6 @@
@echo " [CREATE] $@"
$(SRC_PATH_BARE)/build/make/gen_msvs_sln.sh \
$(if $(filter vpx.$(VCPROJ_SFX),$^),$(VPX_RDEPS)) \
- --dep=vpx:obj_int_extract \
--dep=test_libvpx:gtest \
--ver=$(CONFIG_VS_VERSION)\
--out=$@ $^
diff --git a/test/convolve_test.cc b/test/convolve_test.cc
index 470c436..b0f6052 100644
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -1488,7 +1488,15 @@
wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8);
-INSTANTIATE_TEST_CASE_P(SSE2_8, ConvolveTest, ::testing::Values(
+const ConvolveFunctions convolve10_sse2(
+ wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
+ wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
+ wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10);
+const ConvolveFunctions convolve12_sse2(
+ wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
+ wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
+ wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
+INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
make_tuple(4, 4, &convolve8_sse2),
make_tuple(8, 4, &convolve8_sse2),
make_tuple(4, 8, &convolve8_sse2),
@@ -1501,12 +1509,7 @@
make_tuple(32, 32, &convolve8_sse2),
make_tuple(64, 32, &convolve8_sse2),
make_tuple(32, 64, &convolve8_sse2),
- make_tuple(64, 64, &convolve8_sse2)));
-const ConvolveFunctions convolve10_sse2(
- wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
- wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
- wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10);
-INSTANTIATE_TEST_CASE_P(SSE2_10, ConvolveTest, ::testing::Values(
+ make_tuple(64, 64, &convolve8_sse2),
make_tuple(4, 4, &convolve10_sse2),
make_tuple(8, 4, &convolve10_sse2),
make_tuple(4, 8, &convolve10_sse2),
@@ -1519,12 +1522,7 @@
make_tuple(32, 32, &convolve10_sse2),
make_tuple(64, 32, &convolve10_sse2),
make_tuple(32, 64, &convolve10_sse2),
- make_tuple(64, 64, &convolve10_sse2)));
-const ConvolveFunctions convolve12_sse2(
- wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
- wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
- wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
-INSTANTIATE_TEST_CASE_P(SSE2_12, ConvolveTest, ::testing::Values(
+ make_tuple(64, 64, &convolve10_sse2),
make_tuple(4, 4, &convolve12_sse2),
make_tuple(8, 4, &convolve12_sse2),
make_tuple(4, 8, &convolve12_sse2),
diff --git a/test/invalid_file_test.cc b/test/invalid_file_test.cc
index b61d490..039b726 100644
--- a/test/invalid_file_test.cc
+++ b/test/invalid_file_test.cc
@@ -151,6 +151,7 @@
{4, "invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm"},
{4, "invalid-"
"vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf"},
+ {4, "invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf"},
{2, "invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf"},
{4, "invalid-vp90-2-09-subpixel-00.ivf.s19552_r01-05_b6-.v2.ivf"},
};
diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc
index b9f3d3d..4324cb9 100644
--- a/test/lpf_8_test.cc
+++ b/test/lpf_8_test.cc
@@ -474,7 +474,7 @@
#if HAVE_SSE2
#if CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
- SSE2_C_COMPARE_SINGLE, Loop8Test6Param,
+ SSE2, Loop8Test6Param,
::testing::Values(
make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
&vp9_highbd_lpf_horizontal_4_c, 8),
@@ -511,29 +511,7 @@
make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
&vp9_highbd_lpf_vertical_8_c, 12),
make_tuple(&wrapper_vertical_16_sse2,
- &wrapper_vertical_16_c, 12)));
-#else
-INSTANTIATE_TEST_CASE_P(
- SSE2_C_COMPARE_SINGLE, Loop8Test6Param,
- ::testing::Values(
- make_tuple(&vp9_lpf_horizontal_8_sse2, &vp9_lpf_horizontal_8_c, 8),
- make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c, 8),
- make_tuple(&vp9_lpf_vertical_8_sse2, &vp9_lpf_vertical_8_c, 8)));
-#endif // CONFIG_VP9_HIGHBITDEPTH
-#endif
-
-#if HAVE_AVX2 && (!CONFIG_VP9_HIGHBITDEPTH)
-INSTANTIATE_TEST_CASE_P(
- AVX2_C_COMPARE_SINGLE, Loop8Test6Param,
- ::testing::Values(
- make_tuple(&vp9_lpf_horizontal_16_avx2, &vp9_lpf_horizontal_16_c, 8)));
-#endif
-
-#if HAVE_SSE2
-#if CONFIG_VP9_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
- SSE2_C_COMPARE_DUAL, Loop8Test6Param,
- ::testing::Values(
+ &wrapper_vertical_16_c, 12),
make_tuple(&wrapper_vertical_16_dual_sse2,
&wrapper_vertical_16_dual_c, 8),
make_tuple(&wrapper_vertical_16_dual_sse2,
@@ -542,16 +520,26 @@
&wrapper_vertical_16_dual_c, 12)));
#else
INSTANTIATE_TEST_CASE_P(
- SSE2_C_COMPARE_DUAL, Loop8Test6Param,
+ SSE2, Loop8Test6Param,
::testing::Values(
+ make_tuple(&vp9_lpf_horizontal_8_sse2, &vp9_lpf_horizontal_8_c, 8),
+ make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c, 8),
+ make_tuple(&vp9_lpf_vertical_8_sse2, &vp9_lpf_vertical_8_c, 8),
make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8)));
#endif // CONFIG_VP9_HIGHBITDEPTH
-#endif // HAVE_SSE2
+#endif
+
+#if HAVE_AVX2 && (!CONFIG_VP9_HIGHBITDEPTH)
+INSTANTIATE_TEST_CASE_P(
+ AVX2, Loop8Test6Param,
+ ::testing::Values(
+ make_tuple(&vp9_lpf_horizontal_16_avx2, &vp9_lpf_horizontal_16_c, 8)));
+#endif
#if HAVE_SSE2
#if CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
- SSE_C_COMPARE_DUAL, Loop8Test9Param,
+ SSE2, Loop8Test9Param,
::testing::Values(
make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
&vp9_highbd_lpf_horizontal_4_dual_c, 8),
@@ -579,7 +567,7 @@
&vp9_highbd_lpf_vertical_8_dual_c, 12)));
#else
INSTANTIATE_TEST_CASE_P(
- SSE_C_COMPARE_DUAL, Loop8Test9Param,
+ SSE2, Loop8Test9Param,
::testing::Values(
make_tuple(&vp9_lpf_horizontal_4_dual_sse2,
&vp9_lpf_horizontal_4_dual_c, 8),
diff --git a/test/partial_idct_test.cc b/test/partial_idct_test.cc
index 536273e..39d5854 100644
--- a/test/partial_idct_test.cc
+++ b/test/partial_idct_test.cc
@@ -294,17 +294,6 @@
TX_4X4, 1)));
#endif
-#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
- !CONFIG_EMULATE_HARDWARE
-INSTANTIATE_TEST_CASE_P(
- SSSE3_64, PartialIDctTest,
- ::testing::Values(
- make_tuple(&vp9_fdct8x8_c,
- &vp9_idct8x8_64_add_c,
- &vp9_idct8x8_12_add_ssse3,
- TX_8X8, 12)));
-#endif
-
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
SSSE3, PartialIDctTest,
@@ -312,6 +301,14 @@
make_tuple(&vp9_fdct16x16_c,
&vp9_idct16x16_256_add_c,
&vp9_idct16x16_10_add_ssse3,
- TX_16X16, 10)));
-#endif
+ TX_16X16, 10)
+#if ARCH_X86_64
+ ,
+ make_tuple(&vp9_fdct8x8_c,
+ &vp9_idct8x8_64_add_c,
+ &vp9_idct8x8_12_add_ssse3,
+ TX_8X8, 12)
+#endif // ARCH_X86_64
+ ));
+#endif // HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
} // namespace
diff --git a/test/quantize_test.cc b/test/quantize_test.cc
index 3e717ec..756d0f6 100644
--- a/test/quantize_test.cc
+++ b/test/quantize_test.cc
@@ -181,12 +181,6 @@
&vp8_regular_quantize_b_c)));
#endif // HAVE_SSE4_1
-#if HAVE_MEDIA
-INSTANTIATE_TEST_CASE_P(MEDIA, QuantizeTest,
- ::testing::Values(make_tuple(&vp8_fast_quantize_b_armv6,
- &vp8_fast_quantize_b_c)));
-#endif // HAVE_MEDIA
-
#if HAVE_NEON
INSTANTIATE_TEST_CASE_P(NEON, QuantizeTest,
::testing::Values(make_tuple(&vp8_fast_quantize_b_neon,
diff --git a/test/sad_test.cc b/test/sad_test.cc
index c7042fe..c7eaddd 100644
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -27,6 +27,7 @@
#include "test/register_state_check.h"
#include "test/util.h"
#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "vpx/vpx_codec.h"
#if CONFIG_VP8_ENCODER
@@ -35,42 +36,83 @@
const unsigned char *reference_ptr,
int reference_stride,
unsigned int max_sad);
-typedef std::tr1::tuple<int, int, SadMxNFunc> SadMxNParam;
+typedef std::tr1::tuple<int, int, SadMxNFunc, int> SadMxNParam;
#endif
#if CONFIG_VP9_ENCODER
typedef unsigned int (*SadMxNVp9Func)(const unsigned char *source_ptr,
int source_stride,
const unsigned char *reference_ptr,
int reference_stride);
-typedef std::tr1::tuple<int, int, SadMxNVp9Func> SadMxNVp9Param;
+typedef std::tr1::tuple<int, int, SadMxNVp9Func, int> SadMxNVp9Param;
+typedef uint32_t (*SadMxNAvgVp9Func)(const uint8_t *source_ptr,
+ int source_stride,
+ const uint8_t *reference_ptr,
+ int reference_stride,
+ const uint8_t *second_pred);
+typedef std::tr1::tuple<int, int, SadMxNAvgVp9Func, int> SadMxNAvgVp9Param;
#endif
typedef void (*SadMxNx4Func)(const uint8_t *src_ptr,
int src_stride,
- const unsigned char *const ref_ptr[],
+ const uint8_t *const ref_ptr[],
int ref_stride,
- unsigned int *sad_array);
-typedef std::tr1::tuple<int, int, SadMxNx4Func> SadMxNx4Param;
+ uint32_t *sad_array);
+typedef std::tr1::tuple<int, int, SadMxNx4Func, int> SadMxNx4Param;
using libvpx_test::ACMRandom;
namespace {
class SADTestBase : public ::testing::Test {
public:
- SADTestBase(int width, int height) : width_(width), height_(height) {}
+ SADTestBase(int width, int height, int bit_depth) :
+ width_(width), height_(height), bd_(bit_depth) {}
static void SetUpTestCase() {
+#if CONFIG_VP9_HIGHBITDEPTH
+ source_data8_ = reinterpret_cast<uint8_t*>(
+ vpx_memalign(kDataAlignment, kDataBlockSize));
+ reference_data8_ = reinterpret_cast<uint8_t*>(
+ vpx_memalign(kDataAlignment, kDataBufferSize));
+ second_pred8_ = reinterpret_cast<uint8_t*>(
+ vpx_memalign(kDataAlignment, 64*64));
+ source_data16_ = reinterpret_cast<uint16_t*>(
+ vpx_memalign(kDataAlignment, kDataBlockSize*sizeof(uint16_t)));
+ reference_data16_ = reinterpret_cast<uint16_t*>(
+ vpx_memalign(kDataAlignment, kDataBufferSize*sizeof(uint16_t)));
+ second_pred16_ = reinterpret_cast<uint16_t*>(
+ vpx_memalign(kDataAlignment, 64*64*sizeof(uint16_t)));
+#else
source_data_ = reinterpret_cast<uint8_t*>(
vpx_memalign(kDataAlignment, kDataBlockSize));
reference_data_ = reinterpret_cast<uint8_t*>(
vpx_memalign(kDataAlignment, kDataBufferSize));
+ second_pred_ = reinterpret_cast<uint8_t*>(
+ vpx_memalign(kDataAlignment, 64*64));
+#endif
}
static void TearDownTestCase() {
+#if CONFIG_VP9_HIGHBITDEPTH
+ vpx_free(source_data8_);
+ source_data8_ = NULL;
+ vpx_free(reference_data8_);
+ reference_data8_ = NULL;
+ vpx_free(second_pred8_);
+ second_pred8_ = NULL;
+ vpx_free(source_data16_);
+ source_data16_ = NULL;
+ vpx_free(reference_data16_);
+ reference_data16_ = NULL;
+ vpx_free(second_pred16_);
+ second_pred16_ = NULL;
+#else
vpx_free(source_data_);
source_data_ = NULL;
vpx_free(reference_data_);
reference_data_ = NULL;
+ vpx_free(second_pred_);
+ second_pred_ = NULL;
+#endif
}
virtual void TearDown() {
@@ -84,25 +126,71 @@
static const int kDataBufferSize = 4 * kDataBlockSize;
virtual void SetUp() {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (bd_ == -1) {
+ use_high_bit_depth_ = false;
+ bit_depth_ = VPX_BITS_8;
+ source_data_ = source_data8_;
+ reference_data_ = reference_data8_;
+ second_pred_ = second_pred8_;
+ } else {
+ use_high_bit_depth_ = true;
+ bit_depth_ = static_cast<vpx_bit_depth_t>(bd_);
+ source_data_ = CONVERT_TO_BYTEPTR(source_data16_);
+ reference_data_ = CONVERT_TO_BYTEPTR(reference_data16_);
+ second_pred_ = CONVERT_TO_BYTEPTR(second_pred16_);
+ }
+#endif
+ mask_ = (1 << bit_depth_) - 1;
source_stride_ = (width_ + 31) & ~31;
reference_stride_ = width_ * 2;
rnd_.Reset(ACMRandom::DeterministicSeed());
}
- virtual uint8_t* GetReference(int block_idx) {
+ virtual uint8_t *GetReference(int block_idx) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (!use_high_bit_depth_) {
+ return reference_data_ + block_idx * kDataBlockSize;
+ } else {
+ return CONVERT_TO_BYTEPTR(CONVERT_TO_SHORTPTR(reference_data_) +
+ block_idx * kDataBlockSize);
+ }
+#else
return reference_data_ + block_idx * kDataBlockSize;
+#endif
}
// Sum of Absolute Differences. Given two blocks, calculate the absolute
// difference between two pixels in the same relative location; accumulate.
unsigned int ReferenceSAD(unsigned int max_sad, int block_idx) {
unsigned int sad = 0;
- const uint8_t* const reference = GetReference(block_idx);
-
+#if CONFIG_VP9_HIGHBITDEPTH
+ const uint8_t *const reference8 = GetReference(block_idx);
+ const uint8_t *const source8 = source_data_;
+ const uint16_t *const reference16 =
+ CONVERT_TO_SHORTPTR(GetReference(block_idx));
+ const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_);
+#else
+ const uint8_t *const reference = GetReference(block_idx);
+ const uint8_t *const source = source_data_;
+#endif
for (int h = 0; h < height_; ++h) {
for (int w = 0; w < width_; ++w) {
- sad += abs(source_data_[h * source_stride_ + w]
- - reference[h * reference_stride_ + w]);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (!use_high_bit_depth_) {
+ sad +=
+ abs(source8[h * source_stride_ + w] -
+ reference8[h * reference_stride_ + w]);
+ } else {
+ sad +=
+ abs(source16[h * source_stride_ + w] -
+ reference16[h * reference_stride_ + w]);
+ }
+#else
+ sad +=
+ abs(source[h * source_stride_ + w] -
+ reference[h * reference_stride_ + w]);
+#endif
}
if (sad > max_sad) {
break;
@@ -111,26 +199,107 @@
return sad;
}
- void FillConstant(uint8_t *data, int stride, uint8_t fill_constant) {
+ // Sum of Absolute Differences Average. Given two blocks, and a prediction
+ // calculate the absolute difference between one pixel and average of the
+ // corresponding and predicted pixels; accumulate.
+ unsigned int ReferenceSADavg(unsigned int max_sad, int block_idx) {
+ unsigned int sad = 0;
+#if CONFIG_VP9_HIGHBITDEPTH
+ const uint8_t *const reference8 = GetReference(block_idx);
+ const uint8_t *const source8 = source_data_;
+ const uint8_t *const second_pred8 = second_pred_;
+ const uint16_t *const reference16 =
+ CONVERT_TO_SHORTPTR(GetReference(block_idx));
+ const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_);
+ const uint16_t *const second_pred16 = CONVERT_TO_SHORTPTR(second_pred_);
+#else
+ const uint8_t *const reference = GetReference(block_idx);
+ const uint8_t *const source = source_data_;
+ const uint8_t *const second_pred = second_pred_;
+#endif
for (int h = 0; h < height_; ++h) {
for (int w = 0; w < width_; ++w) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (!use_high_bit_depth_) {
+ const int tmp = second_pred8[h * width_ + w] +
+ reference8[h * reference_stride_ + w];
+ const uint8_t comp_pred = ROUND_POWER_OF_TWO(tmp, 1);
+ sad += abs(source8[h * source_stride_ + w] - comp_pred);
+ } else {
+ const int tmp = second_pred16[h * width_ + w] +
+ reference16[h * reference_stride_ + w];
+ const uint16_t comp_pred = ROUND_POWER_OF_TWO(tmp, 1);
+ sad += abs(source16[h * source_stride_ + w] - comp_pred);
+ }
+#else
+ const int tmp = second_pred[h * width_ + w] +
+ reference[h * reference_stride_ + w];
+ const uint8_t comp_pred = ROUND_POWER_OF_TWO(tmp, 1);
+ sad += abs(source[h * source_stride_ + w] - comp_pred);
+#endif
+ }
+ if (sad > max_sad) {
+ break;
+ }
+ }
+ return sad;
+ }
+
+ void FillConstant(uint8_t *data, int stride, uint16_t fill_constant) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ uint8_t *data8 = data;
+ uint16_t *data16 = CONVERT_TO_SHORTPTR(data);
+#endif
+ for (int h = 0; h < height_; ++h) {
+ for (int w = 0; w < width_; ++w) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (!use_high_bit_depth_) {
+ data8[h * stride + w] = fill_constant;
+ } else {
+ data16[h * stride + w] = fill_constant;
+ }
+#else
data[h * stride + w] = fill_constant;
+#endif
}
}
}
void FillRandom(uint8_t *data, int stride) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ uint8_t *data8 = data;
+ uint16_t *data16 = CONVERT_TO_SHORTPTR(data);
+#endif
for (int h = 0; h < height_; ++h) {
for (int w = 0; w < width_; ++w) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (!use_high_bit_depth_) {
+ data8[h * stride + w] = rnd_.Rand8();
+ } else {
+ data16[h * stride + w] = rnd_.Rand16() & mask_;
+ }
+#else
data[h * stride + w] = rnd_.Rand8();
+#endif
}
}
}
- int width_, height_;
- static uint8_t* source_data_;
+ int width_, height_, mask_, bd_;
+ vpx_bit_depth_t bit_depth_;
+ static uint8_t *source_data_;
+ static uint8_t *reference_data_;
+ static uint8_t *second_pred_;
int source_stride_;
- static uint8_t* reference_data_;
+#if CONFIG_VP9_HIGHBITDEPTH
+ bool use_high_bit_depth_;
+ static uint8_t *source_data8_;
+ static uint8_t *reference_data8_;
+ static uint8_t *second_pred8_;
+ static uint16_t *source_data16_;
+ static uint16_t *reference_data16_;
+ static uint16_t *second_pred16_;
+#endif
int reference_stride_;
ACMRandom rnd_;
@@ -140,11 +309,11 @@
: public SADTestBase,
public ::testing::WithParamInterface<SadMxNx4Param> {
public:
- SADx4Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1)) {}
+ SADx4Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
protected:
void SADs(unsigned int *results) {
- const uint8_t* refs[] = {GetReference(0), GetReference(1),
+ const uint8_t *refs[] = {GetReference(0), GetReference(1),
GetReference(2), GetReference(3)};
ASM_REGISTER_STATE_CHECK(GET_PARAM(2)(source_data_, source_stride_,
@@ -169,12 +338,12 @@
: public SADTestBase,
public ::testing::WithParamInterface<SadMxNParam> {
public:
- SADTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1)) {}
+ SADTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
protected:
unsigned int SAD(unsigned int max_sad, int block_idx) {
unsigned int ret;
- const uint8_t* const reference = GetReference(block_idx);
+ const uint8_t *const reference = GetReference(block_idx);
ASM_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
reference, reference_stride_,
@@ -201,12 +370,12 @@
: public SADTestBase,
public ::testing::WithParamInterface<SadMxNVp9Param> {
public:
- SADVP9Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1)) {}
+ SADVP9Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
protected:
unsigned int SAD(int block_idx) {
unsigned int ret;
- const uint8_t* const reference = GetReference(block_idx);
+ const uint8_t *const reference = GetReference(block_idx);
ASM_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
reference, reference_stride_));
@@ -220,20 +389,54 @@
ASSERT_EQ(reference_sad, exp_sad);
}
};
+
+class SADavgVP9Test
+ : public SADTestBase,
+ public ::testing::WithParamInterface<SadMxNAvgVp9Param> {
+ public:
+ SADavgVP9Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
+
+ protected:
+ unsigned int SAD_avg(int block_idx) {
+ unsigned int ret;
+ const uint8_t *const reference = GetReference(block_idx);
+
+ ASM_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
+ reference, reference_stride_,
+ second_pred_));
+ return ret;
+ }
+
+ void CheckSAD() {
+ const unsigned int reference_sad = ReferenceSADavg(UINT_MAX, 0);
+ const unsigned int exp_sad = SAD_avg(0);
+
+ ASSERT_EQ(reference_sad, exp_sad);
+ }
+};
#endif // CONFIG_VP9_ENCODER
-uint8_t* SADTestBase::source_data_ = NULL;
-uint8_t* SADTestBase::reference_data_ = NULL;
+uint8_t *SADTestBase::source_data_ = NULL;
+uint8_t *SADTestBase::reference_data_ = NULL;
+uint8_t *SADTestBase::second_pred_ = NULL;
+#if CONFIG_VP9_ENCODER && CONFIG_VP9_HIGHBITDEPTH
+uint8_t *SADTestBase::source_data8_ = NULL;
+uint8_t *SADTestBase::reference_data8_ = NULL;
+uint8_t *SADTestBase::second_pred8_ = NULL;
+uint16_t *SADTestBase::source_data16_ = NULL;
+uint16_t *SADTestBase::reference_data16_ = NULL;
+uint16_t *SADTestBase::second_pred16_ = NULL;
+#endif
#if CONFIG_VP8_ENCODER
TEST_P(SADTest, MaxRef) {
FillConstant(source_data_, source_stride_, 0);
- FillConstant(reference_data_, reference_stride_, 255);
+ FillConstant(reference_data_, reference_stride_, mask_);
CheckSAD(UINT_MAX);
}
TEST_P(SADTest, MaxSrc) {
- FillConstant(source_data_, source_stride_, 255);
+ FillConstant(source_data_, source_stride_, mask_);
FillConstant(reference_data_, reference_stride_, 0);
CheckSAD(UINT_MAX);
}
@@ -270,7 +473,7 @@
TEST_P(SADTest, MaxSAD) {
// Verify that, when max_sad is set, the implementation does not return a
// value lower than the reference.
- FillConstant(source_data_, source_stride_, 255);
+ FillConstant(source_data_, source_stride_, mask_);
FillConstant(reference_data_, reference_stride_, 0);
CheckSAD(128);
}
@@ -279,12 +482,12 @@
#if CONFIG_VP9_ENCODER
TEST_P(SADVP9Test, MaxRef) {
FillConstant(source_data_, source_stride_, 0);
- FillConstant(reference_data_, reference_stride_, 255);
+ FillConstant(reference_data_, reference_stride_, mask_);
CheckSAD();
}
TEST_P(SADVP9Test, MaxSrc) {
- FillConstant(source_data_, source_stride_, 255);
+ FillConstant(source_data_, source_stride_, mask_);
FillConstant(reference_data_, reference_stride_, 0);
CheckSAD();
}
@@ -317,19 +520,64 @@
CheckSAD();
source_stride_ = tmp_stride;
}
+
+TEST_P(SADavgVP9Test, MaxRef) {
+ FillConstant(source_data_, source_stride_, 0);
+ FillConstant(reference_data_, reference_stride_, mask_);
+ FillConstant(second_pred_, width_, 0);
+ CheckSAD();
+}
+TEST_P(SADavgVP9Test, MaxSrc) {
+ FillConstant(source_data_, source_stride_, mask_);
+ FillConstant(reference_data_, reference_stride_, 0);
+ FillConstant(second_pred_, width_, 0);
+ CheckSAD();
+}
+
+TEST_P(SADavgVP9Test, ShortRef) {
+ const int tmp_stride = reference_stride_;
+ reference_stride_ >>= 1;
+ FillRandom(source_data_, source_stride_);
+ FillRandom(reference_data_, reference_stride_);
+ FillRandom(second_pred_, width_);
+ CheckSAD();
+ reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADavgVP9Test, UnalignedRef) {
+ // The reference frame, but not the source frame, may be unaligned for
+ // certain types of searches.
+ const int tmp_stride = reference_stride_;
+ reference_stride_ -= 1;
+ FillRandom(source_data_, source_stride_);
+ FillRandom(reference_data_, reference_stride_);
+ FillRandom(second_pred_, width_);
+ CheckSAD();
+ reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADavgVP9Test, ShortSrc) {
+ const int tmp_stride = source_stride_;
+ source_stride_ >>= 1;
+ FillRandom(source_data_, source_stride_);
+ FillRandom(reference_data_, reference_stride_);
+ FillRandom(second_pred_, width_);
+ CheckSAD();
+ source_stride_ = tmp_stride;
+}
#endif // CONFIG_VP9_ENCODER
TEST_P(SADx4Test, MaxRef) {
FillConstant(source_data_, source_stride_, 0);
- FillConstant(GetReference(0), reference_stride_, 255);
- FillConstant(GetReference(1), reference_stride_, 255);
- FillConstant(GetReference(2), reference_stride_, 255);
- FillConstant(GetReference(3), reference_stride_, 255);
+ FillConstant(GetReference(0), reference_stride_, mask_);
+ FillConstant(GetReference(1), reference_stride_, mask_);
+ FillConstant(GetReference(2), reference_stride_, mask_);
+ FillConstant(GetReference(3), reference_stride_, mask_);
CheckSADs();
}
TEST_P(SADx4Test, MaxSrc) {
- FillConstant(source_data_, source_stride_, 255);
+ FillConstant(source_data_, source_stride_, mask_);
FillConstant(GetReference(0), reference_stride_, 0);
FillConstant(GetReference(1), reference_stride_, 0);
FillConstant(GetReference(2), reference_stride_, 0);
@@ -375,6 +623,18 @@
source_stride_ = tmp_stride;
}
+TEST_P(SADx4Test, SrcAlignedByWidth) {
+ uint8_t * tmp_source_data = source_data_;
+ source_data_ += width_;
+ FillRandom(source_data_, source_stride_);
+ FillRandom(GetReference(0), reference_stride_);
+ FillRandom(GetReference(1), reference_stride_);
+ FillRandom(GetReference(2), reference_stride_);
+ FillRandom(GetReference(3), reference_stride_);
+ CheckSADs();
+ source_data_ = tmp_source_data;
+}
+
using std::tr1::make_tuple;
//------------------------------------------------------------------------------
@@ -386,11 +646,11 @@
const SadMxNFunc sad_8x8_c = vp8_sad8x8_c;
const SadMxNFunc sad_4x4_c = vp8_sad4x4_c;
const SadMxNParam c_tests[] = {
- make_tuple(16, 16, sad_16x16_c),
- make_tuple(8, 16, sad_8x16_c),
- make_tuple(16, 8, sad_16x8_c),
- make_tuple(8, 8, sad_8x8_c),
- make_tuple(4, 4, sad_4x4_c),
+ make_tuple(16, 16, sad_16x16_c, -1),
+ make_tuple(8, 16, sad_8x16_c, -1),
+ make_tuple(16, 8, sad_16x8_c, -1),
+ make_tuple(8, 8, sad_8x8_c, -1),
+ make_tuple(4, 4, sad_4x4_c, -1),
};
INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::ValuesIn(c_tests));
#endif // CONFIG_VP8_ENCODER
@@ -406,15 +666,15 @@
const SadMxNVp9Func sad_4x8_c_vp9 = vp9_sad4x8_c;
const SadMxNVp9Func sad_4x4_c_vp9 = vp9_sad4x4_c;
const SadMxNVp9Param c_vp9_tests[] = {
- make_tuple(64, 64, sad_64x64_c_vp9),
- make_tuple(32, 32, sad_32x32_c_vp9),
- make_tuple(16, 16, sad_16x16_c_vp9),
- make_tuple(8, 16, sad_8x16_c_vp9),
- make_tuple(16, 8, sad_16x8_c_vp9),
- make_tuple(8, 8, sad_8x8_c_vp9),
- make_tuple(8, 4, sad_8x4_c_vp9),
- make_tuple(4, 8, sad_4x8_c_vp9),
- make_tuple(4, 4, sad_4x4_c_vp9),
+ make_tuple(64, 64, sad_64x64_c_vp9, -1),
+ make_tuple(32, 32, sad_32x32_c_vp9, -1),
+ make_tuple(16, 16, sad_16x16_c_vp9, -1),
+ make_tuple(8, 16, sad_8x16_c_vp9, -1),
+ make_tuple(16, 8, sad_16x8_c_vp9, -1),
+ make_tuple(8, 8, sad_8x8_c_vp9, -1),
+ make_tuple(8, 4, sad_8x4_c_vp9, -1),
+ make_tuple(4, 8, sad_4x8_c_vp9, -1),
+ make_tuple(4, 4, sad_4x4_c_vp9, -1),
};
INSTANTIATE_TEST_CASE_P(C, SADVP9Test, ::testing::ValuesIn(c_vp9_tests));
@@ -432,19 +692,186 @@
const SadMxNx4Func sad_4x8x4d_c = vp9_sad4x8x4d_c;
const SadMxNx4Func sad_4x4x4d_c = vp9_sad4x4x4d_c;
INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::Values(
- make_tuple(64, 64, sad_64x64x4d_c),
- make_tuple(64, 32, sad_64x32x4d_c),
- make_tuple(32, 64, sad_32x64x4d_c),
- make_tuple(32, 32, sad_32x32x4d_c),
- make_tuple(32, 16, sad_32x16x4d_c),
- make_tuple(16, 32, sad_16x32x4d_c),
- make_tuple(16, 16, sad_16x16x4d_c),
- make_tuple(16, 8, sad_16x8x4d_c),
- make_tuple(8, 16, sad_8x16x4d_c),
- make_tuple(8, 8, sad_8x8x4d_c),
- make_tuple(8, 4, sad_8x4x4d_c),
- make_tuple(4, 8, sad_4x8x4d_c),
- make_tuple(4, 4, sad_4x4x4d_c)));
+ make_tuple(64, 64, sad_64x64x4d_c, -1),
+ make_tuple(64, 32, sad_64x32x4d_c, -1),
+ make_tuple(32, 64, sad_32x64x4d_c, -1),
+ make_tuple(32, 32, sad_32x32x4d_c, -1),
+ make_tuple(32, 16, sad_32x16x4d_c, -1),
+ make_tuple(16, 32, sad_16x32x4d_c, -1),
+ make_tuple(16, 16, sad_16x16x4d_c, -1),
+ make_tuple(16, 8, sad_16x8x4d_c, -1),
+ make_tuple(8, 16, sad_8x16x4d_c, -1),
+ make_tuple(8, 8, sad_8x8x4d_c, -1),
+ make_tuple(8, 4, sad_8x4x4d_c, -1),
+ make_tuple(4, 8, sad_4x8x4d_c, -1),
+ make_tuple(4, 4, sad_4x4x4d_c, -1)));
+
+#if CONFIG_VP9_HIGHBITDEPTH
+const SadMxNVp9Func highbd_sad_64x64_c_vp9 = vp9_highbd_sad64x64_c;
+const SadMxNVp9Func highbd_sad_32x32_c_vp9 = vp9_highbd_sad32x32_c;
+const SadMxNVp9Func highbd_sad_16x16_c_vp9 = vp9_highbd_sad16x16_c;
+const SadMxNVp9Func highbd_sad_8x16_c_vp9 = vp9_highbd_sad8x16_c;
+const SadMxNVp9Func highbd_sad_16x8_c_vp9 = vp9_highbd_sad16x8_c;
+const SadMxNVp9Func highbd_sad_8x8_c_vp9 = vp9_highbd_sad8x8_c;
+const SadMxNVp9Func highbd_sad_8x4_c_vp9 = vp9_highbd_sad8x4_c;
+const SadMxNVp9Func highbd_sad_4x8_c_vp9 = vp9_highbd_sad4x8_c;
+const SadMxNVp9Func highbd_sad_4x4_c_vp9 = vp9_highbd_sad4x4_c;
+const SadMxNVp9Param c_vp9_highbd_8_tests[] = {
+ make_tuple(64, 64, highbd_sad_64x64_c_vp9, 8),
+ make_tuple(32, 32, highbd_sad_32x32_c_vp9, 8),
+ make_tuple(16, 16, highbd_sad_16x16_c_vp9, 8),
+ make_tuple(8, 16, highbd_sad_8x16_c_vp9, 8),
+ make_tuple(16, 8, highbd_sad_16x8_c_vp9, 8),
+ make_tuple(8, 8, highbd_sad_8x8_c_vp9, 8),
+ make_tuple(8, 4, highbd_sad_8x4_c_vp9, 8),
+ make_tuple(4, 8, highbd_sad_4x8_c_vp9, 8),
+ make_tuple(4, 4, highbd_sad_4x4_c_vp9, 8),
+};
+INSTANTIATE_TEST_CASE_P(C_8, SADVP9Test,
+ ::testing::ValuesIn(c_vp9_highbd_8_tests));
+
+const SadMxNVp9Param c_vp9_highbd_10_tests[] = {
+ make_tuple(64, 64, highbd_sad_64x64_c_vp9, 10),
+ make_tuple(32, 32, highbd_sad_32x32_c_vp9, 10),
+ make_tuple(16, 16, highbd_sad_16x16_c_vp9, 10),
+ make_tuple(8, 16, highbd_sad_8x16_c_vp9, 10),
+ make_tuple(16, 8, highbd_sad_16x8_c_vp9, 10),
+ make_tuple(8, 8, highbd_sad_8x8_c_vp9, 10),
+ make_tuple(8, 4, highbd_sad_8x4_c_vp9, 10),
+ make_tuple(4, 8, highbd_sad_4x8_c_vp9, 10),
+ make_tuple(4, 4, highbd_sad_4x4_c_vp9, 10),
+};
+INSTANTIATE_TEST_CASE_P(C_10, SADVP9Test,
+ ::testing::ValuesIn(c_vp9_highbd_10_tests));
+
+const SadMxNVp9Param c_vp9_highbd_12_tests[] = {
+ make_tuple(64, 64, highbd_sad_64x64_c_vp9, 12),
+ make_tuple(32, 32, highbd_sad_32x32_c_vp9, 12),
+ make_tuple(16, 16, highbd_sad_16x16_c_vp9, 12),
+ make_tuple(8, 16, highbd_sad_8x16_c_vp9, 12),
+ make_tuple(16, 8, highbd_sad_16x8_c_vp9, 12),
+ make_tuple(8, 8, highbd_sad_8x8_c_vp9, 12),
+ make_tuple(8, 4, highbd_sad_8x4_c_vp9, 12),
+ make_tuple(4, 8, highbd_sad_4x8_c_vp9, 12),
+ make_tuple(4, 4, highbd_sad_4x4_c_vp9, 12),
+};
+INSTANTIATE_TEST_CASE_P(C_12, SADVP9Test,
+ ::testing::ValuesIn(c_vp9_highbd_12_tests));
+
+const SadMxNAvgVp9Func highbd_sad8x4_avg_c_vp9 = vp9_highbd_sad8x4_avg_c;
+const SadMxNAvgVp9Func highbd_sad8x8_avg_c_vp9 = vp9_highbd_sad8x8_avg_c;
+const SadMxNAvgVp9Func highbd_sad8x16_avg_c_vp9 = vp9_highbd_sad8x16_avg_c;
+const SadMxNAvgVp9Func highbd_sad16x8_avg_c_vp9 = vp9_highbd_sad16x8_avg_c;
+const SadMxNAvgVp9Func highbd_sad16x16_avg_c_vp9 = vp9_highbd_sad16x16_avg_c;
+const SadMxNAvgVp9Func highbd_sad16x32_avg_c_vp9 = vp9_highbd_sad16x32_avg_c;
+const SadMxNAvgVp9Func highbd_sad32x16_avg_c_vp9 = vp9_highbd_sad32x16_avg_c;
+const SadMxNAvgVp9Func highbd_sad32x32_avg_c_vp9 = vp9_highbd_sad32x32_avg_c;
+const SadMxNAvgVp9Func highbd_sad32x64_avg_c_vp9 = vp9_highbd_sad32x64_avg_c;
+const SadMxNAvgVp9Func highbd_sad64x32_avg_c_vp9 = vp9_highbd_sad64x32_avg_c;
+const SadMxNAvgVp9Func highbd_sad64x64_avg_c_vp9 = vp9_highbd_sad64x64_avg_c;
+SadMxNAvgVp9Param avg_c_vp9_highbd_8_tests[] = {
+ make_tuple(8, 4, highbd_sad8x4_avg_c_vp9, 8),
+ make_tuple(8, 8, highbd_sad8x8_avg_c_vp9, 8),
+ make_tuple(8, 16, highbd_sad8x16_avg_c_vp9, 8),
+ make_tuple(16, 8, highbd_sad16x8_avg_c_vp9, 8),
+ make_tuple(16, 16, highbd_sad16x16_avg_c_vp9, 8),
+ make_tuple(16, 32, highbd_sad16x32_avg_c_vp9, 8),
+ make_tuple(32, 16, highbd_sad32x16_avg_c_vp9, 8),
+ make_tuple(32, 32, highbd_sad32x32_avg_c_vp9, 8),
+ make_tuple(32, 64, highbd_sad32x64_avg_c_vp9, 8),
+ make_tuple(64, 32, highbd_sad64x32_avg_c_vp9, 8),
+ make_tuple(64, 64, highbd_sad64x64_avg_c_vp9, 8)};
+INSTANTIATE_TEST_CASE_P(C_8, SADavgVP9Test,
+ ::testing::ValuesIn(avg_c_vp9_highbd_8_tests));
+
+SadMxNAvgVp9Param avg_c_vp9_highbd_10_tests[] = {
+ make_tuple(8, 4, highbd_sad8x4_avg_c_vp9, 10),
+ make_tuple(8, 8, highbd_sad8x8_avg_c_vp9, 10),
+ make_tuple(8, 16, highbd_sad8x16_avg_c_vp9, 10),
+ make_tuple(16, 8, highbd_sad16x8_avg_c_vp9, 10),
+ make_tuple(16, 16, highbd_sad16x16_avg_c_vp9, 10),
+ make_tuple(16, 32, highbd_sad16x32_avg_c_vp9, 10),
+ make_tuple(32, 16, highbd_sad32x16_avg_c_vp9, 10),
+ make_tuple(32, 32, highbd_sad32x32_avg_c_vp9, 10),
+ make_tuple(32, 64, highbd_sad32x64_avg_c_vp9, 10),
+ make_tuple(64, 32, highbd_sad64x32_avg_c_vp9, 10),
+ make_tuple(64, 64, highbd_sad64x64_avg_c_vp9, 10)};
+INSTANTIATE_TEST_CASE_P(C_10, SADavgVP9Test,
+ ::testing::ValuesIn(avg_c_vp9_highbd_10_tests));
+
+SadMxNAvgVp9Param avg_c_vp9_highbd_12_tests[] = {
+ make_tuple(8, 4, highbd_sad8x4_avg_c_vp9, 12),
+ make_tuple(8, 8, highbd_sad8x8_avg_c_vp9, 12),
+ make_tuple(8, 16, highbd_sad8x16_avg_c_vp9, 12),
+ make_tuple(16, 8, highbd_sad16x8_avg_c_vp9, 12),
+ make_tuple(16, 16, highbd_sad16x16_avg_c_vp9, 12),
+ make_tuple(16, 32, highbd_sad16x32_avg_c_vp9, 12),
+ make_tuple(32, 16, highbd_sad32x16_avg_c_vp9, 12),
+ make_tuple(32, 32, highbd_sad32x32_avg_c_vp9, 12),
+ make_tuple(32, 64, highbd_sad32x64_avg_c_vp9, 12),
+ make_tuple(64, 32, highbd_sad64x32_avg_c_vp9, 12),
+ make_tuple(64, 64, highbd_sad64x64_avg_c_vp9, 12)};
+INSTANTIATE_TEST_CASE_P(C_12, SADavgVP9Test,
+ ::testing::ValuesIn(avg_c_vp9_highbd_12_tests));
+
+const SadMxNx4Func highbd_sad_64x64x4d_c = vp9_highbd_sad64x64x4d_c;
+const SadMxNx4Func highbd_sad_64x32x4d_c = vp9_highbd_sad64x32x4d_c;
+const SadMxNx4Func highbd_sad_32x64x4d_c = vp9_highbd_sad32x64x4d_c;
+const SadMxNx4Func highbd_sad_32x32x4d_c = vp9_highbd_sad32x32x4d_c;
+const SadMxNx4Func highbd_sad_32x16x4d_c = vp9_highbd_sad32x16x4d_c;
+const SadMxNx4Func highbd_sad_16x32x4d_c = vp9_highbd_sad16x32x4d_c;
+const SadMxNx4Func highbd_sad_16x16x4d_c = vp9_highbd_sad16x16x4d_c;
+const SadMxNx4Func highbd_sad_16x8x4d_c = vp9_highbd_sad16x8x4d_c;
+const SadMxNx4Func highbd_sad_8x16x4d_c = vp9_highbd_sad8x16x4d_c;
+const SadMxNx4Func highbd_sad_8x8x4d_c = vp9_highbd_sad8x8x4d_c;
+const SadMxNx4Func highbd_sad_8x4x4d_c = vp9_highbd_sad8x4x4d_c;
+const SadMxNx4Func highbd_sad_4x8x4d_c = vp9_highbd_sad4x8x4d_c;
+const SadMxNx4Func highbd_sad_4x4x4d_c = vp9_highbd_sad4x4x4d_c;
+INSTANTIATE_TEST_CASE_P(C_8, SADx4Test, ::testing::Values(
+ make_tuple(64, 64, highbd_sad_64x64x4d_c, 8),
+ make_tuple(64, 32, highbd_sad_64x32x4d_c, 8),
+ make_tuple(32, 64, highbd_sad_32x64x4d_c, 8),
+ make_tuple(32, 32, highbd_sad_32x32x4d_c, 8),
+ make_tuple(32, 16, highbd_sad_32x16x4d_c, 8),
+ make_tuple(16, 32, highbd_sad_16x32x4d_c, 8),
+ make_tuple(16, 16, highbd_sad_16x16x4d_c, 8),
+ make_tuple(16, 8, highbd_sad_16x8x4d_c, 8),
+ make_tuple(8, 16, highbd_sad_8x16x4d_c, 8),
+ make_tuple(8, 8, highbd_sad_8x8x4d_c, 8),
+ make_tuple(8, 4, highbd_sad_8x4x4d_c, 8),
+ make_tuple(4, 8, highbd_sad_4x8x4d_c, 8),
+ make_tuple(4, 4, highbd_sad_4x4x4d_c, 8)));
+
+INSTANTIATE_TEST_CASE_P(C_10, SADx4Test, ::testing::Values(
+ make_tuple(64, 64, highbd_sad_64x64x4d_c, 10),
+ make_tuple(64, 32, highbd_sad_64x32x4d_c, 10),
+ make_tuple(32, 64, highbd_sad_32x64x4d_c, 10),
+ make_tuple(32, 32, highbd_sad_32x32x4d_c, 10),
+ make_tuple(32, 16, highbd_sad_32x16x4d_c, 10),
+ make_tuple(16, 32, highbd_sad_16x32x4d_c, 10),
+ make_tuple(16, 16, highbd_sad_16x16x4d_c, 10),
+ make_tuple(16, 8, highbd_sad_16x8x4d_c, 10),
+ make_tuple(8, 16, highbd_sad_8x16x4d_c, 10),
+ make_tuple(8, 8, highbd_sad_8x8x4d_c, 10),
+ make_tuple(8, 4, highbd_sad_8x4x4d_c, 10),
+ make_tuple(4, 8, highbd_sad_4x8x4d_c, 10),
+ make_tuple(4, 4, highbd_sad_4x4x4d_c, 10)));
+
+INSTANTIATE_TEST_CASE_P(C_12, SADx4Test, ::testing::Values(
+ make_tuple(64, 64, highbd_sad_64x64x4d_c, 12),
+ make_tuple(64, 32, highbd_sad_64x32x4d_c, 12),
+ make_tuple(32, 64, highbd_sad_32x64x4d_c, 12),
+ make_tuple(32, 32, highbd_sad_32x32x4d_c, 12),
+ make_tuple(32, 16, highbd_sad_32x16x4d_c, 12),
+ make_tuple(16, 32, highbd_sad_16x32x4d_c, 12),
+ make_tuple(16, 16, highbd_sad_16x16x4d_c, 12),
+ make_tuple(16, 8, highbd_sad_16x8x4d_c, 12),
+ make_tuple(8, 16, highbd_sad_8x16x4d_c, 12),
+ make_tuple(8, 8, highbd_sad_8x8x4d_c, 12),
+ make_tuple(8, 4, highbd_sad_8x4x4d_c, 12),
+ make_tuple(4, 8, highbd_sad_4x8x4d_c, 12),
+ make_tuple(4, 4, highbd_sad_4x4x4d_c, 12)));
+#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // CONFIG_VP9_ENCODER
//------------------------------------------------------------------------------
@@ -453,7 +880,7 @@
#if CONFIG_VP8_ENCODER
const SadMxNFunc sad_16x16_armv6 = vp8_sad16x16_armv6;
INSTANTIATE_TEST_CASE_P(MEDIA, SADTest, ::testing::Values(
- make_tuple(16, 16, sad_16x16_armv6)));
+ make_tuple(16, 16, sad_16x16_armv6, -1)));
#endif // CONFIG_VP8_ENCODER
#endif // HAVE_MEDIA
@@ -465,11 +892,11 @@
const SadMxNFunc sad_8x8_neon = vp8_sad8x8_neon;
const SadMxNFunc sad_4x4_neon = vp8_sad4x4_neon;
INSTANTIATE_TEST_CASE_P(NEON, SADTest, ::testing::Values(
- make_tuple(16, 16, sad_16x16_neon),
- make_tuple(8, 16, sad_8x16_neon),
- make_tuple(16, 8, sad_16x8_neon),
- make_tuple(8, 8, sad_8x8_neon),
- make_tuple(4, 4, sad_4x4_neon)));
+ make_tuple(16, 16, sad_16x16_neon, -1),
+ make_tuple(8, 16, sad_8x16_neon, -1),
+ make_tuple(16, 8, sad_16x8_neon, -1),
+ make_tuple(8, 8, sad_8x8_neon, -1),
+ make_tuple(4, 4, sad_4x4_neon, -1)));
#endif // CONFIG_VP8_ENCODER
#if CONFIG_VP9_ENCODER
const SadMxNVp9Func sad_64x64_neon_vp9 = vp9_sad64x64_neon;
@@ -477,10 +904,10 @@
const SadMxNVp9Func sad_16x16_neon_vp9 = vp9_sad16x16_neon;
const SadMxNVp9Func sad_8x8_neon_vp9 = vp9_sad8x8_neon;
const SadMxNVp9Param neon_vp9_tests[] = {
- make_tuple(64, 64, sad_64x64_neon_vp9),
- make_tuple(32, 32, sad_32x32_neon_vp9),
- make_tuple(16, 16, sad_16x16_neon_vp9),
- make_tuple(8, 8, sad_8x8_neon_vp9),
+ make_tuple(64, 64, sad_64x64_neon_vp9, -1),
+ make_tuple(32, 32, sad_32x32_neon_vp9, -1),
+ make_tuple(16, 16, sad_16x16_neon_vp9, -1),
+ make_tuple(8, 8, sad_8x8_neon_vp9, -1),
};
INSTANTIATE_TEST_CASE_P(NEON, SADVP9Test, ::testing::ValuesIn(neon_vp9_tests));
#endif // CONFIG_VP9_ENCODER
@@ -496,11 +923,11 @@
const SadMxNFunc sad_8x8_mmx = vp8_sad8x8_mmx;
const SadMxNFunc sad_4x4_mmx = vp8_sad4x4_mmx;
const SadMxNParam mmx_tests[] = {
- make_tuple(16, 16, sad_16x16_mmx),
- make_tuple(8, 16, sad_8x16_mmx),
- make_tuple(16, 8, sad_16x8_mmx),
- make_tuple(8, 8, sad_8x8_mmx),
- make_tuple(4, 4, sad_4x4_mmx),
+ make_tuple(16, 16, sad_16x16_mmx, -1),
+ make_tuple(8, 16, sad_8x16_mmx, -1),
+ make_tuple(16, 8, sad_16x8_mmx, -1),
+ make_tuple(8, 8, sad_8x8_mmx, -1),
+ make_tuple(4, 4, sad_4x4_mmx, -1),
};
INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::ValuesIn(mmx_tests));
#endif // CONFIG_VP8_ENCODER
@@ -513,14 +940,14 @@
const SadMxNVp9Func sad_4x4_sse_vp9 = vp9_sad4x4_sse;
const SadMxNVp9Func sad_4x8_sse_vp9 = vp9_sad4x8_sse;
INSTANTIATE_TEST_CASE_P(SSE, SADVP9Test, ::testing::Values(
- make_tuple(4, 4, sad_4x4_sse_vp9),
- make_tuple(4, 8, sad_4x8_sse_vp9)));
+ make_tuple(4, 4, sad_4x4_sse_vp9, -1),
+ make_tuple(4, 8, sad_4x8_sse_vp9, -1)));
const SadMxNx4Func sad_4x8x4d_sse = vp9_sad4x8x4d_sse;
const SadMxNx4Func sad_4x4x4d_sse = vp9_sad4x4x4d_sse;
INSTANTIATE_TEST_CASE_P(SSE, SADx4Test, ::testing::Values(
- make_tuple(4, 8, sad_4x8x4d_sse),
- make_tuple(4, 4, sad_4x4x4d_sse)));
+ make_tuple(4, 8, sad_4x8x4d_sse, -1),
+ make_tuple(4, 4, sad_4x4x4d_sse, -1)));
#endif // CONFIG_USE_X86INC
#endif // CONFIG_VP9_ENCODER
#endif // HAVE_SSE
@@ -533,11 +960,11 @@
const SadMxNFunc sad_8x8_wmt = vp8_sad8x8_wmt;
const SadMxNFunc sad_4x4_wmt = vp8_sad4x4_wmt;
const SadMxNParam sse2_tests[] = {
- make_tuple(16, 16, sad_16x16_wmt),
- make_tuple(8, 16, sad_8x16_wmt),
- make_tuple(16, 8, sad_16x8_wmt),
- make_tuple(8, 8, sad_8x8_wmt),
- make_tuple(4, 4, sad_4x4_wmt),
+ make_tuple(16, 16, sad_16x16_wmt, -1),
+ make_tuple(8, 16, sad_8x16_wmt, -1),
+ make_tuple(16, 8, sad_16x8_wmt, -1),
+ make_tuple(8, 8, sad_8x8_wmt, -1),
+ make_tuple(4, 4, sad_4x4_wmt, -1),
};
INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests));
#endif // CONFIG_VP8_ENCODER
@@ -555,20 +982,6 @@
const SadMxNVp9Func sad_8x16_sse2_vp9 = vp9_sad8x16_sse2;
const SadMxNVp9Func sad_8x8_sse2_vp9 = vp9_sad8x8_sse2;
const SadMxNVp9Func sad_8x4_sse2_vp9 = vp9_sad8x4_sse2;
-const SadMxNVp9Param sse2_vp9_tests[] = {
- make_tuple(64, 64, sad_64x64_sse2_vp9),
- make_tuple(64, 32, sad_64x32_sse2_vp9),
- make_tuple(32, 64, sad_32x64_sse2_vp9),
- make_tuple(32, 32, sad_32x32_sse2_vp9),
- make_tuple(32, 16, sad_32x16_sse2_vp9),
- make_tuple(16, 32, sad_16x32_sse2_vp9),
- make_tuple(16, 16, sad_16x16_sse2_vp9),
- make_tuple(16, 8, sad_16x8_sse2_vp9),
- make_tuple(8, 16, sad_8x16_sse2_vp9),
- make_tuple(8, 8, sad_8x8_sse2_vp9),
- make_tuple(8, 4, sad_8x4_sse2_vp9),
-};
-INSTANTIATE_TEST_CASE_P(SSE2, SADVP9Test, ::testing::ValuesIn(sse2_vp9_tests));
const SadMxNx4Func sad_64x64x4d_sse2 = vp9_sad64x64x4d_sse2;
const SadMxNx4Func sad_64x32x4d_sse2 = vp9_sad64x32x4d_sse2;
@@ -581,18 +994,214 @@
const SadMxNx4Func sad_8x16x4d_sse2 = vp9_sad8x16x4d_sse2;
const SadMxNx4Func sad_8x8x4d_sse2 = vp9_sad8x8x4d_sse2;
const SadMxNx4Func sad_8x4x4d_sse2 = vp9_sad8x4x4d_sse2;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+const SadMxNVp9Func highbd_sad8x4_sse2_vp9 = vp9_highbd_sad8x4_sse2;
+const SadMxNVp9Func highbd_sad8x8_sse2_vp9 = vp9_highbd_sad8x8_sse2;
+const SadMxNVp9Func highbd_sad8x16_sse2_vp9 = vp9_highbd_sad8x16_sse2;
+const SadMxNVp9Func highbd_sad16x8_sse2_vp9 = vp9_highbd_sad16x8_sse2;
+const SadMxNVp9Func highbd_sad16x16_sse2_vp9 = vp9_highbd_sad16x16_sse2;
+const SadMxNVp9Func highbd_sad16x32_sse2_vp9 = vp9_highbd_sad16x32_sse2;
+const SadMxNVp9Func highbd_sad32x16_sse2_vp9 = vp9_highbd_sad32x16_sse2;
+const SadMxNVp9Func highbd_sad32x32_sse2_vp9 = vp9_highbd_sad32x32_sse2;
+const SadMxNVp9Func highbd_sad32x64_sse2_vp9 = vp9_highbd_sad32x64_sse2;
+const SadMxNVp9Func highbd_sad64x32_sse2_vp9 = vp9_highbd_sad64x32_sse2;
+const SadMxNVp9Func highbd_sad64x64_sse2_vp9 = vp9_highbd_sad64x64_sse2;
+
+INSTANTIATE_TEST_CASE_P(SSE2, SADVP9Test, ::testing::Values(
+ make_tuple(64, 64, sad_64x64_sse2_vp9, -1),
+ make_tuple(64, 32, sad_64x32_sse2_vp9, -1),
+ make_tuple(32, 64, sad_32x64_sse2_vp9, -1),
+ make_tuple(32, 32, sad_32x32_sse2_vp9, -1),
+ make_tuple(32, 16, sad_32x16_sse2_vp9, -1),
+ make_tuple(16, 32, sad_16x32_sse2_vp9, -1),
+ make_tuple(16, 16, sad_16x16_sse2_vp9, -1),
+ make_tuple(16, 8, sad_16x8_sse2_vp9, -1),
+ make_tuple(8, 16, sad_8x16_sse2_vp9, -1),
+ make_tuple(8, 8, sad_8x8_sse2_vp9, -1),
+ make_tuple(8, 4, sad_8x4_sse2_vp9, -1),
+ make_tuple(8, 4, highbd_sad8x4_sse2_vp9, 8),
+ make_tuple(8, 8, highbd_sad8x8_sse2_vp9, 8),
+ make_tuple(8, 16, highbd_sad8x16_sse2_vp9, 8),
+ make_tuple(16, 8, highbd_sad16x8_sse2_vp9, 8),
+ make_tuple(16, 16, highbd_sad16x16_sse2_vp9, 8),
+ make_tuple(16, 32, highbd_sad16x32_sse2_vp9, 8),
+ make_tuple(32, 16, highbd_sad32x16_sse2_vp9, 8),
+ make_tuple(32, 32, highbd_sad32x32_sse2_vp9, 8),
+ make_tuple(32, 64, highbd_sad32x64_sse2_vp9, 8),
+ make_tuple(64, 32, highbd_sad64x32_sse2_vp9, 8),
+ make_tuple(64, 64, highbd_sad64x64_sse2_vp9, 8),
+ make_tuple(8, 4, highbd_sad8x4_sse2_vp9, 10),
+ make_tuple(8, 8, highbd_sad8x8_sse2_vp9, 10),
+ make_tuple(8, 16, highbd_sad8x16_sse2_vp9, 10),
+ make_tuple(16, 8, highbd_sad16x8_sse2_vp9, 10),
+ make_tuple(16, 16, highbd_sad16x16_sse2_vp9, 10),
+ make_tuple(16, 32, highbd_sad16x32_sse2_vp9, 10),
+ make_tuple(32, 16, highbd_sad32x16_sse2_vp9, 10),
+ make_tuple(32, 32, highbd_sad32x32_sse2_vp9, 10),
+ make_tuple(32, 64, highbd_sad32x64_sse2_vp9, 10),
+ make_tuple(64, 32, highbd_sad64x32_sse2_vp9, 10),
+ make_tuple(64, 64, highbd_sad64x64_sse2_vp9, 10),
+ make_tuple(8, 4, highbd_sad8x4_sse2_vp9, 12),
+ make_tuple(8, 8, highbd_sad8x8_sse2_vp9, 12),
+ make_tuple(8, 16, highbd_sad8x16_sse2_vp9, 12),
+ make_tuple(16, 8, highbd_sad16x8_sse2_vp9, 12),
+ make_tuple(16, 16, highbd_sad16x16_sse2_vp9, 12),
+ make_tuple(16, 32, highbd_sad16x32_sse2_vp9, 12),
+ make_tuple(32, 16, highbd_sad32x16_sse2_vp9, 12),
+ make_tuple(32, 32, highbd_sad32x32_sse2_vp9, 12),
+ make_tuple(32, 64, highbd_sad32x64_sse2_vp9, 12),
+ make_tuple(64, 32, highbd_sad64x32_sse2_vp9, 12),
+ make_tuple(64, 64, highbd_sad64x64_sse2_vp9, 12)));
+
+const SadMxNAvgVp9Func highbd_sad8x4_avg_sse2_vp9 = vp9_highbd_sad8x4_avg_sse2;
+const SadMxNAvgVp9Func highbd_sad8x8_avg_sse2_vp9 = vp9_highbd_sad8x8_avg_sse2;
+const SadMxNAvgVp9Func highbd_sad8x16_avg_sse2_vp9 =
+ vp9_highbd_sad8x16_avg_sse2;
+const SadMxNAvgVp9Func highbd_sad16x8_avg_sse2_vp9 =
+ vp9_highbd_sad16x8_avg_sse2;
+const SadMxNAvgVp9Func highbd_sad16x16_avg_sse2_vp9 =
+ vp9_highbd_sad16x16_avg_sse2;
+const SadMxNAvgVp9Func highbd_sad16x32_avg_sse2_vp9 =
+ vp9_highbd_sad16x32_avg_sse2;
+const SadMxNAvgVp9Func highbd_sad32x16_avg_sse2_vp9 =
+ vp9_highbd_sad32x16_avg_sse2;
+const SadMxNAvgVp9Func highbd_sad32x32_avg_sse2_vp9 =
+ vp9_highbd_sad32x32_avg_sse2;
+const SadMxNAvgVp9Func highbd_sad32x64_avg_sse2_vp9 =
+ vp9_highbd_sad32x64_avg_sse2;
+const SadMxNAvgVp9Func highbd_sad64x32_avg_sse2_vp9 =
+ vp9_highbd_sad64x32_avg_sse2;
+const SadMxNAvgVp9Func highbd_sad64x64_avg_sse2_vp9 =
+ vp9_highbd_sad64x64_avg_sse2;
+
+INSTANTIATE_TEST_CASE_P(SSE2, SADavgVP9Test, ::testing::Values(
+ make_tuple(8, 4, highbd_sad8x4_avg_sse2_vp9, 8),
+ make_tuple(8, 8, highbd_sad8x8_avg_sse2_vp9, 8),
+ make_tuple(8, 16, highbd_sad8x16_avg_sse2_vp9, 8),
+ make_tuple(16, 8, highbd_sad16x8_avg_sse2_vp9, 8),
+ make_tuple(16, 16, highbd_sad16x16_avg_sse2_vp9, 8),
+ make_tuple(16, 32, highbd_sad16x32_avg_sse2_vp9, 8),
+ make_tuple(32, 16, highbd_sad32x16_avg_sse2_vp9, 8),
+ make_tuple(32, 32, highbd_sad32x32_avg_sse2_vp9, 8),
+ make_tuple(32, 64, highbd_sad32x64_avg_sse2_vp9, 8),
+ make_tuple(64, 32, highbd_sad64x32_avg_sse2_vp9, 8),
+ make_tuple(64, 64, highbd_sad64x64_avg_sse2_vp9, 8),
+ make_tuple(8, 4, highbd_sad8x4_avg_sse2_vp9, 10),
+ make_tuple(8, 8, highbd_sad8x8_avg_sse2_vp9, 10),
+ make_tuple(8, 16, highbd_sad8x16_avg_sse2_vp9, 10),
+ make_tuple(16, 8, highbd_sad16x8_avg_sse2_vp9, 10),
+ make_tuple(16, 16, highbd_sad16x16_avg_sse2_vp9, 10),
+ make_tuple(16, 32, highbd_sad16x32_avg_sse2_vp9, 10),
+ make_tuple(32, 16, highbd_sad32x16_avg_sse2_vp9, 10),
+ make_tuple(32, 32, highbd_sad32x32_avg_sse2_vp9, 10),
+ make_tuple(32, 64, highbd_sad32x64_avg_sse2_vp9, 10),
+ make_tuple(64, 32, highbd_sad64x32_avg_sse2_vp9, 10),
+ make_tuple(64, 64, highbd_sad64x64_avg_sse2_vp9, 10),
+ make_tuple(8, 4, highbd_sad8x4_avg_sse2_vp9, 12),
+ make_tuple(8, 8, highbd_sad8x8_avg_sse2_vp9, 12),
+ make_tuple(8, 16, highbd_sad8x16_avg_sse2_vp9, 12),
+ make_tuple(16, 8, highbd_sad16x8_avg_sse2_vp9, 12),
+ make_tuple(16, 16, highbd_sad16x16_avg_sse2_vp9, 12),
+ make_tuple(16, 32, highbd_sad16x32_avg_sse2_vp9, 12),
+ make_tuple(32, 16, highbd_sad32x16_avg_sse2_vp9, 12),
+ make_tuple(32, 32, highbd_sad32x32_avg_sse2_vp9, 12),
+ make_tuple(32, 64, highbd_sad32x64_avg_sse2_vp9, 12),
+ make_tuple(64, 32, highbd_sad64x32_avg_sse2_vp9, 12),
+ make_tuple(64, 64, highbd_sad64x64_avg_sse2_vp9, 12)));
+
+const SadMxNx4Func highbd_sad_64x64x4d_sse2 = vp9_highbd_sad64x64x4d_sse2;
+const SadMxNx4Func highbd_sad_64x32x4d_sse2 = vp9_highbd_sad64x32x4d_sse2;
+const SadMxNx4Func highbd_sad_32x64x4d_sse2 = vp9_highbd_sad32x64x4d_sse2;
+const SadMxNx4Func highbd_sad_32x32x4d_sse2 = vp9_highbd_sad32x32x4d_sse2;
+const SadMxNx4Func highbd_sad_32x16x4d_sse2 = vp9_highbd_sad32x16x4d_sse2;
+const SadMxNx4Func highbd_sad_16x32x4d_sse2 = vp9_highbd_sad16x32x4d_sse2;
+const SadMxNx4Func highbd_sad_16x16x4d_sse2 = vp9_highbd_sad16x16x4d_sse2;
+const SadMxNx4Func highbd_sad_16x8x4d_sse2 = vp9_highbd_sad16x8x4d_sse2;
+const SadMxNx4Func highbd_sad_8x16x4d_sse2 = vp9_highbd_sad8x16x4d_sse2;
+const SadMxNx4Func highbd_sad_8x8x4d_sse2 = vp9_highbd_sad8x8x4d_sse2;
+const SadMxNx4Func highbd_sad_8x4x4d_sse2 = vp9_highbd_sad8x4x4d_sse2;
+const SadMxNx4Func highbd_sad_4x8x4d_sse2 = vp9_highbd_sad4x8x4d_sse2;
+const SadMxNx4Func highbd_sad_4x4x4d_sse2 = vp9_highbd_sad4x4x4d_sse2;
+
INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::Values(
- make_tuple(64, 64, sad_64x64x4d_sse2),
- make_tuple(64, 32, sad_64x32x4d_sse2),
- make_tuple(32, 64, sad_32x64x4d_sse2),
- make_tuple(32, 32, sad_32x32x4d_sse2),
- make_tuple(32, 16, sad_32x16x4d_sse2),
- make_tuple(16, 32, sad_16x32x4d_sse2),
- make_tuple(16, 16, sad_16x16x4d_sse2),
- make_tuple(16, 8, sad_16x8x4d_sse2),
- make_tuple(8, 16, sad_8x16x4d_sse2),
- make_tuple(8, 8, sad_8x8x4d_sse2),
- make_tuple(8, 4, sad_8x4x4d_sse2)));
+ make_tuple(64, 64, sad_64x64x4d_sse2, -1),
+ make_tuple(64, 32, sad_64x32x4d_sse2, -1),
+ make_tuple(32, 64, sad_32x64x4d_sse2, -1),
+ make_tuple(32, 32, sad_32x32x4d_sse2, -1),
+ make_tuple(32, 16, sad_32x16x4d_sse2, -1),
+ make_tuple(16, 32, sad_16x32x4d_sse2, -1),
+ make_tuple(16, 16, sad_16x16x4d_sse2, -1),
+ make_tuple(16, 8, sad_16x8x4d_sse2, -1),
+ make_tuple(8, 16, sad_8x16x4d_sse2, -1),
+ make_tuple(8, 8, sad_8x8x4d_sse2, -1),
+ make_tuple(8, 4, sad_8x4x4d_sse2, -1),
+ make_tuple(64, 64, highbd_sad_64x64x4d_sse2, 8),
+ make_tuple(64, 32, highbd_sad_64x32x4d_sse2, 8),
+ make_tuple(32, 64, highbd_sad_32x64x4d_sse2, 8),
+ make_tuple(32, 32, highbd_sad_32x32x4d_sse2, 8),
+ make_tuple(32, 16, highbd_sad_32x16x4d_sse2, 8),
+ make_tuple(16, 32, highbd_sad_16x32x4d_sse2, 8),
+ make_tuple(16, 16, highbd_sad_16x16x4d_sse2, 8),
+ make_tuple(16, 8, highbd_sad_16x8x4d_sse2, 8),
+ make_tuple(8, 16, highbd_sad_8x16x4d_sse2, 8),
+ make_tuple(8, 8, highbd_sad_8x8x4d_sse2, 8),
+ make_tuple(8, 4, highbd_sad_8x4x4d_sse2, 8),
+ make_tuple(4, 8, highbd_sad_4x8x4d_sse2, 8),
+ make_tuple(4, 4, highbd_sad_4x4x4d_sse2, 8),
+ make_tuple(64, 64, highbd_sad_64x64x4d_sse2, 10),
+ make_tuple(64, 32, highbd_sad_64x32x4d_sse2, 10),
+ make_tuple(32, 64, highbd_sad_32x64x4d_sse2, 10),
+ make_tuple(32, 32, highbd_sad_32x32x4d_sse2, 10),
+ make_tuple(32, 16, highbd_sad_32x16x4d_sse2, 10),
+ make_tuple(16, 32, highbd_sad_16x32x4d_sse2, 10),
+ make_tuple(16, 16, highbd_sad_16x16x4d_sse2, 10),
+ make_tuple(16, 8, highbd_sad_16x8x4d_sse2, 10),
+ make_tuple(8, 16, highbd_sad_8x16x4d_sse2, 10),
+ make_tuple(8, 8, highbd_sad_8x8x4d_sse2, 10),
+ make_tuple(8, 4, highbd_sad_8x4x4d_sse2, 10),
+ make_tuple(4, 8, highbd_sad_4x8x4d_sse2, 10),
+ make_tuple(4, 4, highbd_sad_4x4x4d_sse2, 10),
+ make_tuple(64, 64, highbd_sad_64x64x4d_sse2, 12),
+ make_tuple(64, 32, highbd_sad_64x32x4d_sse2, 12),
+ make_tuple(32, 64, highbd_sad_32x64x4d_sse2, 12),
+ make_tuple(32, 32, highbd_sad_32x32x4d_sse2, 12),
+ make_tuple(32, 16, highbd_sad_32x16x4d_sse2, 12),
+ make_tuple(16, 32, highbd_sad_16x32x4d_sse2, 12),
+ make_tuple(16, 16, highbd_sad_16x16x4d_sse2, 12),
+ make_tuple(16, 8, highbd_sad_16x8x4d_sse2, 12),
+ make_tuple(8, 16, highbd_sad_8x16x4d_sse2, 12),
+ make_tuple(8, 8, highbd_sad_8x8x4d_sse2, 12),
+ make_tuple(8, 4, highbd_sad_8x4x4d_sse2, 12),
+ make_tuple(4, 8, highbd_sad_4x8x4d_sse2, 12),
+ make_tuple(4, 4, highbd_sad_4x4x4d_sse2, 12)));
+#else
+INSTANTIATE_TEST_CASE_P(SSE2, SADVP9Test, ::testing::Values(
+ make_tuple(64, 64, sad_64x64_sse2_vp9, -1),
+ make_tuple(64, 32, sad_64x32_sse2_vp9, -1),
+ make_tuple(32, 64, sad_32x64_sse2_vp9, -1),
+ make_tuple(32, 32, sad_32x32_sse2_vp9, -1),
+ make_tuple(32, 16, sad_32x16_sse2_vp9, -1),
+ make_tuple(16, 32, sad_16x32_sse2_vp9, -1),
+ make_tuple(16, 16, sad_16x16_sse2_vp9, -1),
+ make_tuple(16, 8, sad_16x8_sse2_vp9, -1),
+ make_tuple(8, 16, sad_8x16_sse2_vp9, -1),
+ make_tuple(8, 8, sad_8x8_sse2_vp9, -1),
+ make_tuple(8, 4, sad_8x4_sse2_vp9, -1)));
+
+INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::Values(
+ make_tuple(64, 64, sad_64x64x4d_sse2, -1),
+ make_tuple(64, 32, sad_64x32x4d_sse2, -1),
+ make_tuple(32, 64, sad_32x64x4d_sse2, -1),
+ make_tuple(32, 32, sad_32x32x4d_sse2, -1),
+ make_tuple(32, 16, sad_32x16x4d_sse2, -1),
+ make_tuple(16, 32, sad_16x32x4d_sse2, -1),
+ make_tuple(16, 16, sad_16x16x4d_sse2, -1),
+ make_tuple(16, 8, sad_16x8x4d_sse2, -1),
+ make_tuple(8, 16, sad_8x16x4d_sse2, -1),
+ make_tuple(8, 8, sad_8x8x4d_sse2, -1),
+ make_tuple(8, 4, sad_8x4x4d_sse2, -1)));
+#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // CONFIG_USE_X86INC
#endif // CONFIG_VP9_ENCODER
#endif // HAVE_SSE2
@@ -605,11 +1214,11 @@
const SadMxNx4Func sad_8x8x4d_sse3 = vp8_sad8x8x4d_sse3;
const SadMxNx4Func sad_4x4x4d_sse3 = vp8_sad4x4x4d_sse3;
INSTANTIATE_TEST_CASE_P(SSE3, SADx4Test, ::testing::Values(
- make_tuple(16, 16, sad_16x16x4d_sse3),
- make_tuple(16, 8, sad_16x8x4d_sse3),
- make_tuple(8, 16, sad_8x16x4d_sse3),
- make_tuple(8, 8, sad_8x8x4d_sse3),
- make_tuple(4, 4, sad_4x4x4d_sse3)));
+ make_tuple(16, 16, sad_16x16x4d_sse3, -1),
+ make_tuple(16, 8, sad_16x8x4d_sse3, -1),
+ make_tuple(8, 16, sad_8x16x4d_sse3, -1),
+ make_tuple(8, 8, sad_8x8x4d_sse3, -1),
+ make_tuple(4, 4, sad_4x4x4d_sse3, -1)));
#endif // CONFIG_VP8_ENCODER
#endif // HAVE_SSE3
@@ -618,32 +1227,18 @@
#if CONFIG_VP8_ENCODER
const SadMxNFunc sad_16x16_sse3 = vp8_sad16x16_sse3;
INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values(
- make_tuple(16, 16, sad_16x16_sse3)));
+ make_tuple(16, 16, sad_16x16_sse3, -1)));
#endif // CONFIG_VP8_ENCODER
#endif // CONFIG_USE_X86INC
#endif // HAVE_SSSE3
#if HAVE_AVX2
#if CONFIG_VP9_ENCODER
-const SadMxNVp9Func sad_64x64_avx2_vp9 = vp9_sad64x64_avx2;
-const SadMxNVp9Func sad_64x32_avx2_vp9 = vp9_sad64x32_avx2;
-const SadMxNVp9Func sad_32x64_avx2_vp9 = vp9_sad32x64_avx2;
-const SadMxNVp9Func sad_32x32_avx2_vp9 = vp9_sad32x32_avx2;
-const SadMxNVp9Func sad_32x16_avx2_vp9 = vp9_sad32x16_avx2;
-const SadMxNVp9Param avx2_vp9_tests[] = {
- make_tuple(64, 64, sad_64x64_avx2_vp9),
- make_tuple(64, 32, sad_64x32_avx2_vp9),
- make_tuple(32, 64, sad_32x64_avx2_vp9),
- make_tuple(32, 32, sad_32x32_avx2_vp9),
- make_tuple(32, 16, sad_32x16_avx2_vp9),
-};
-INSTANTIATE_TEST_CASE_P(AVX2, SADVP9Test, ::testing::ValuesIn(avx2_vp9_tests));
-
const SadMxNx4Func sad_64x64x4d_avx2 = vp9_sad64x64x4d_avx2;
const SadMxNx4Func sad_32x32x4d_avx2 = vp9_sad32x32x4d_avx2;
INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::Values(
- make_tuple(32, 32, sad_32x32x4d_avx2),
- make_tuple(64, 64, sad_64x64x4d_avx2)));
+ make_tuple(32, 32, sad_32x32x4d_avx2, -1),
+ make_tuple(64, 64, sad_64x64x4d_avx2, -1)));
#endif // CONFIG_VP9_ENCODER
#endif // HAVE_AVX2
diff --git a/test/test-data.mk b/test/test-data.mk
index e2da193..0a40eb5 100644
--- a/test/test-data.mk
+++ b/test/test-data.mk
@@ -695,6 +695,8 @@
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf.res
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf.res
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf.res
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm.res
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf
diff --git a/test/test-data.sha1 b/test/test-data.sha1
index 3d1cd65..428bd56 100644
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -706,6 +706,8 @@
717da707afcaa1f692ff1946f291054eb75a4f06 screendata.y4m
b7c1296630cdf1a7ef493d15ff4f9eb2999202f6 invalid-vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf
0a3884edb3fd8f9d9b500223e650f7de257b67d8 invalid-vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf.res
+359e138dfb66863828397b77000ea7a83c844d02 invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf
+bbd33de01c17b165b4ce00308e8a19a942023ab8 invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf.res
fac89b5735be8a86b0dc05159f996a5c3208ae32 invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf
0a3884edb3fd8f9d9b500223e650f7de257b67d8 invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf.res
4506dfdcdf8ee4250924b075a0dcf1f070f72e5a invalid-vp90-2-09-subpixel-00.ivf.s19552_r01-05_b6-.v2.ivf
diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl
index 6756008..63fde4c 100644
--- a/vp8/common/rtcd_defs.pl
+++ b/vp8/common/rtcd_defs.pl
@@ -454,8 +454,7 @@
specialize qw/vp8_regular_quantize_b sse2 sse4_1/;
add_proto qw/void vp8_fast_quantize_b/, "struct block *, struct blockd *";
-specialize qw/vp8_fast_quantize_b sse2 ssse3 media neon/;
-$vp8_fast_quantize_b_media=vp8_fast_quantize_b_armv6;
+specialize qw/vp8_fast_quantize_b sse2 ssse3 neon/;
#
# Block subtraction
@@ -473,16 +472,13 @@
$vp8_mbuverror_sse2=vp8_mbuverror_xmm;
add_proto qw/void vp8_subtract_b/, "struct block *be, struct blockd *bd, int pitch";
-specialize qw/vp8_subtract_b mmx sse2 media neon/;
-$vp8_subtract_b_media=vp8_subtract_b_armv6;
+specialize qw/vp8_subtract_b mmx sse2 neon/;
add_proto qw/void vp8_subtract_mby/, "short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride";
-specialize qw/vp8_subtract_mby mmx sse2 media neon/;
-$vp8_subtract_mby_media=vp8_subtract_mby_armv6;
+specialize qw/vp8_subtract_mby mmx sse2 neon/;
add_proto qw/void vp8_subtract_mbuv/, "short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride";
-specialize qw/vp8_subtract_mbuv mmx sse2 media neon/;
-$vp8_subtract_mbuv_media=vp8_subtract_mbuv_armv6;
+specialize qw/vp8_subtract_mbuv mmx sse2 neon/;
#
# Motion search
diff --git a/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
deleted file mode 100644
index 4abe818..0000000
--- a/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
+++ /dev/null
@@ -1,310 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_start_encode|
- EXPORT |vp8_encode_bool|
- EXPORT |vp8_stop_encode|
- EXPORT |vp8_encode_value|
- IMPORT |vp8_validate_buffer_arm|
-
- INCLUDE vp8_asm_enc_offsets.asm
-
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA |.text|, CODE, READONLY
-
- ; macro for validating write buffer position
- ; needs vp8_writer in r0
- ; start shall not be in r1
- MACRO
- VALIDATE_POS $start, $pos
- push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
- ldr r2, [r0, #vp8_writer_buffer_end]
- ldr r3, [r0, #vp8_writer_error]
- mov r1, $pos
- mov r0, $start
- bl vp8_validate_buffer_arm
- pop {r0-r3, r12, lr}
- MEND
-
-; r0 BOOL_CODER *br
-; r1 unsigned char *source
-; r2 unsigned char *source_end
-|vp8_start_encode| PROC
- str r2, [r0, #vp8_writer_buffer_end]
- mov r12, #0
- mov r3, #255
- mvn r2, #23
- str r12, [r0, #vp8_writer_lowvalue]
- str r3, [r0, #vp8_writer_range]
- str r2, [r0, #vp8_writer_count]
- str r12, [r0, #vp8_writer_pos]
- str r1, [r0, #vp8_writer_buffer]
- bx lr
- ENDP
-
-; r0 BOOL_CODER *br
-; r1 int bit
-; r2 int probability
-|vp8_encode_bool| PROC
- push {r4-r10, lr}
-
- mov r4, r2
-
- ldr r2, [r0, #vp8_writer_lowvalue]
- ldr r5, [r0, #vp8_writer_range]
- ldr r3, [r0, #vp8_writer_count]
-
- sub r7, r5, #1 ; range-1
-
- cmp r1, #0
- mul r6, r4, r7 ; ((range-1) * probability)
-
- mov r7, #1
- add r4, r7, r6, lsr #8 ; 1 + (((range-1) * probability) >> 8)
-
- addne r2, r2, r4 ; if (bit) lowvalue += split
- subne r4, r5, r4 ; if (bit) range = range-split
-
- ; Counting the leading zeros is used to normalize range.
- clz r6, r4
- sub r6, r6, #24 ; shift
-
- ; Flag is set on the sum of count. This flag is used later
- ; to determine if count >= 0
- adds r3, r3, r6 ; count += shift
- lsl r5, r4, r6 ; range <<= shift
- bmi token_count_lt_zero ; if(count >= 0)
-
- sub r6, r6, r3 ; offset = shift - count
- sub r4, r6, #1 ; offset-1
- lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
- bpl token_high_bit_not_set
-
- ldr r4, [r0, #vp8_writer_pos] ; x
- sub r4, r4, #1 ; x = w->pos-1
- b token_zero_while_start
-token_zero_while_loop
- mov r9, #0
- strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0
- sub r4, r4, #1 ; x--
-token_zero_while_start
- cmp r4, #0
- ldrge r7, [r0, #vp8_writer_buffer]
- ldrb r1, [r7, r4]
- cmpge r1, #0xff
- beq token_zero_while_loop
-
- ldr r7, [r0, #vp8_writer_buffer]
- ldrb r9, [r7, r4] ; w->buffer[x]
- add r9, r9, #1
- strb r9, [r7, r4] ; w->buffer[x] + 1
-token_high_bit_not_set
- rsb r4, r6, #24 ; 24-offset
- ldr r9, [r0, #vp8_writer_buffer]
- lsr r7, r2, r4 ; lowvalue >> (24-offset)
- ldr r4, [r0, #vp8_writer_pos] ; w->pos
- lsl r2, r2, r6 ; lowvalue <<= offset
- mov r6, r3 ; shift = count
- add r1, r4, #1 ; w->pos++
- bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
- str r1, [r0, #vp8_writer_pos]
- sub r3, r3, #8 ; count -= 8
-
- VALIDATE_POS r9, r1 ; validate_buffer at pos
-
- strb r7, [r9, r4] ; w->buffer[w->pos++]
-
-token_count_lt_zero
- lsl r2, r2, r6 ; lowvalue <<= shift
-
- str r2, [r0, #vp8_writer_lowvalue]
- str r5, [r0, #vp8_writer_range]
- str r3, [r0, #vp8_writer_count]
- pop {r4-r10, pc}
- ENDP
-
-; r0 BOOL_CODER *br
-|vp8_stop_encode| PROC
- push {r4-r10, lr}
-
- ldr r2, [r0, #vp8_writer_lowvalue]
- ldr r5, [r0, #vp8_writer_range]
- ldr r3, [r0, #vp8_writer_count]
-
- mov r10, #32
-
-stop_encode_loop
- sub r7, r5, #1 ; range-1
-
- mov r4, r7, lsl #7 ; ((range-1) * 128)
-
- mov r7, #1
- add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8)
-
- ; Counting the leading zeros is used to normalize range.
- clz r6, r4
- sub r6, r6, #24 ; shift
-
- ; Flag is set on the sum of count. This flag is used later
- ; to determine if count >= 0
- adds r3, r3, r6 ; count += shift
- lsl r5, r4, r6 ; range <<= shift
- bmi token_count_lt_zero_se ; if(count >= 0)
-
- sub r6, r6, r3 ; offset = shift - count
- sub r4, r6, #1 ; offset-1
- lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
- bpl token_high_bit_not_set_se
-
- ldr r4, [r0, #vp8_writer_pos] ; x
- sub r4, r4, #1 ; x = w->pos-1
- b token_zero_while_start_se
-token_zero_while_loop_se
- mov r9, #0
- strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0
- sub r4, r4, #1 ; x--
-token_zero_while_start_se
- cmp r4, #0
- ldrge r7, [r0, #vp8_writer_buffer]
- ldrb r1, [r7, r4]
- cmpge r1, #0xff
- beq token_zero_while_loop_se
-
- ldr r7, [r0, #vp8_writer_buffer]
- ldrb r9, [r7, r4] ; w->buffer[x]
- add r9, r9, #1
- strb r9, [r7, r4] ; w->buffer[x] + 1
-token_high_bit_not_set_se
- rsb r4, r6, #24 ; 24-offset
- ldr r9, [r0, #vp8_writer_buffer]
- lsr r7, r2, r4 ; lowvalue >> (24-offset)
- ldr r4, [r0, #vp8_writer_pos] ; w->pos
- lsl r2, r2, r6 ; lowvalue <<= offset
- mov r6, r3 ; shift = count
- add r1, r4, #1 ; w->pos++
- bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
- str r1, [r0, #vp8_writer_pos]
- sub r3, r3, #8 ; count -= 8
-
- VALIDATE_POS r9, r1 ; validate_buffer at pos
-
- strb r7, [r9, r4] ; w->buffer[w->pos++]
-
-token_count_lt_zero_se
- lsl r2, r2, r6 ; lowvalue <<= shift
-
- subs r10, r10, #1
- bne stop_encode_loop
-
- str r2, [r0, #vp8_writer_lowvalue]
- str r5, [r0, #vp8_writer_range]
- str r3, [r0, #vp8_writer_count]
- pop {r4-r10, pc}
-
- ENDP
-
-; r0 BOOL_CODER *br
-; r1 int data
-; r2 int bits
-|vp8_encode_value| PROC
- push {r4-r12, lr}
-
- mov r10, r2
-
- ldr r2, [r0, #vp8_writer_lowvalue]
- ldr r5, [r0, #vp8_writer_range]
- ldr r3, [r0, #vp8_writer_count]
-
- rsb r4, r10, #32 ; 32-n
-
- ; v is kept in r1 during the token pack loop
- lsl r1, r1, r4 ; r1 = v << 32 - n
-
-encode_value_loop
- sub r7, r5, #1 ; range-1
-
- ; Decisions are made based on the bit value shifted
- ; off of v, so set a flag here based on this.
- ; This value is refered to as "bb"
- lsls r1, r1, #1 ; bit = v >> n
- mov r4, r7, lsl #7 ; ((range-1) * 128)
-
- mov r7, #1
- add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8)
-
- addcs r2, r2, r4 ; if (bit) lowvalue += split
- subcs r4, r5, r4 ; if (bit) range = range-split
-
- ; Counting the leading zeros is used to normalize range.
- clz r6, r4
- sub r6, r6, #24 ; shift
-
- ; Flag is set on the sum of count. This flag is used later
- ; to determine if count >= 0
- adds r3, r3, r6 ; count += shift
- lsl r5, r4, r6 ; range <<= shift
- bmi token_count_lt_zero_ev ; if(count >= 0)
-
- sub r6, r6, r3 ; offset = shift - count
- sub r4, r6, #1 ; offset-1
- lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
- bpl token_high_bit_not_set_ev
-
- ldr r4, [r0, #vp8_writer_pos] ; x
- sub r4, r4, #1 ; x = w->pos-1
- b token_zero_while_start_ev
-token_zero_while_loop_ev
- mov r9, #0
- strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0
- sub r4, r4, #1 ; x--
-token_zero_while_start_ev
- cmp r4, #0
- ldrge r7, [r0, #vp8_writer_buffer]
- ldrb r11, [r7, r4]
- cmpge r11, #0xff
- beq token_zero_while_loop_ev
-
- ldr r7, [r0, #vp8_writer_buffer]
- ldrb r9, [r7, r4] ; w->buffer[x]
- add r9, r9, #1
- strb r9, [r7, r4] ; w->buffer[x] + 1
-token_high_bit_not_set_ev
- rsb r4, r6, #24 ; 24-offset
- ldr r9, [r0, #vp8_writer_buffer]
- lsr r7, r2, r4 ; lowvalue >> (24-offset)
- ldr r4, [r0, #vp8_writer_pos] ; w->pos
- lsl r2, r2, r6 ; lowvalue <<= offset
- mov r6, r3 ; shift = count
- add r11, r4, #1 ; w->pos++
- bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
- str r11, [r0, #vp8_writer_pos]
- sub r3, r3, #8 ; count -= 8
-
- VALIDATE_POS r9, r11 ; validate_buffer at pos
-
- strb r7, [r9, r4] ; w->buffer[w->pos++]
-
-token_count_lt_zero_ev
- lsl r2, r2, r6 ; lowvalue <<= shift
-
- subs r10, r10, #1
- bne encode_value_loop
-
- str r2, [r0, #vp8_writer_lowvalue]
- str r5, [r0, #vp8_writer_range]
- str r3, [r0, #vp8_writer_count]
- pop {r4-r12, pc}
- ENDP
-
- END
diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
deleted file mode 100644
index 90a141c..0000000
--- a/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
+++ /dev/null
@@ -1,317 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8cx_pack_tokens_armv5|
- IMPORT |vp8_validate_buffer_arm|
-
- INCLUDE vp8_asm_enc_offsets.asm
-
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA |.text|, CODE, READONLY
-
-
- ; macro for validating write buffer position
- ; needs vp8_writer in r0
- ; start shall not be in r1
- MACRO
- VALIDATE_POS $start, $pos
- push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
- ldr r2, [r0, #vp8_writer_buffer_end]
- ldr r3, [r0, #vp8_writer_error]
- mov r1, $pos
- mov r0, $start
- bl vp8_validate_buffer_arm
- pop {r0-r3, r12, lr}
- MEND
-
-
-; r0 vp8_writer *w
-; r1 const TOKENEXTRA *p
-; r2 int xcount
-; r3 vp8_coef_encodings
-; s0 vp8_extra_bits
-; s1 vp8_coef_tree
-|vp8cx_pack_tokens_armv5| PROC
- push {r4-r12, lr}
- sub sp, sp, #16
-
- ; Add size of xcount * sizeof (TOKENEXTRA) to get stop
- ; sizeof (TOKENEXTRA) is 8
- add r2, r1, r2, lsl #3 ; stop = p + xcount*sizeof(TOKENEXTRA)
- str r2, [sp, #0]
- str r3, [sp, #8] ; save vp8_coef_encodings
- ldr r2, [r0, #vp8_writer_lowvalue]
- ldr r5, [r0, #vp8_writer_range]
- ldr r3, [r0, #vp8_writer_count]
- b check_p_lt_stop
-
-while_p_lt_stop
- ldrb r6, [r1, #tokenextra_token] ; t
- ldr r4, [sp, #8] ; vp8_coef_encodings
- mov lr, #0
- add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
- ldr r9, [r1, #tokenextra_context_tree] ; pp
-
- ldrb r7, [r1, #tokenextra_skip_eob_node]
-
- ldr r6, [r4, #vp8_token_value] ; v
- ldr r8, [r4, #vp8_token_len] ; n
-
- ; vp8 specific skip_eob_node
- cmp r7, #0
- movne lr, #2 ; i = 2
- subne r8, r8, #1 ; --n
-
- rsb r4, r8, #32 ; 32-n
- ldr r10, [sp, #60] ; vp8_coef_tree
-
- ; v is kept in r12 during the token pack loop
- lsl r12, r6, r4 ; r12 = v << 32 - n
-
-; loop start
-token_loop
- ldrb r4, [r9, lr, asr #1] ; pp [i>>1]
- sub r7, r5, #1 ; range-1
-
- ; Decisions are made based on the bit value shifted
- ; off of v, so set a flag here based on this.
- ; This value is refered to as "bb"
- lsls r12, r12, #1 ; bb = v >> n
- mul r6, r4, r7 ; ((range-1) * pp[i>>1]))
-
- ; bb can only be 0 or 1. So only execute this statement
- ; if bb == 1, otherwise it will act like i + 0
- addcs lr, lr, #1 ; i + bb
-
- mov r7, #1
- ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb]
- add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8)
-
- addcs r2, r2, r4 ; if (bb) lowvalue += split
- subcs r4, r5, r4 ; if (bb) range = range-split
-
- ; Counting the leading zeros is used to normalize range.
- clz r6, r4
- sub r6, r6, #24 ; shift
-
- ; Flag is set on the sum of count. This flag is used later
- ; to determine if count >= 0
- adds r3, r3, r6 ; count += shift
- lsl r5, r4, r6 ; range <<= shift
- bmi token_count_lt_zero ; if(count >= 0)
-
- sub r6, r6, r3 ; offset = shift - count
- sub r4, r6, #1 ; offset-1
- lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
- bpl token_high_bit_not_set
-
- ldr r4, [r0, #vp8_writer_pos] ; x
- sub r4, r4, #1 ; x = w->pos-1
- b token_zero_while_start
-token_zero_while_loop
- mov r10, #0
- strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
- sub r4, r4, #1 ; x--
-token_zero_while_start
- cmp r4, #0
- ldrge r7, [r0, #vp8_writer_buffer]
- ldrb r11, [r7, r4]
- cmpge r11, #0xff
- beq token_zero_while_loop
-
- ldr r7, [r0, #vp8_writer_buffer]
- ldrb r10, [r7, r4] ; w->buffer[x]
- add r10, r10, #1
- strb r10, [r7, r4] ; w->buffer[x] + 1
-token_high_bit_not_set
- rsb r4, r6, #24 ; 24-offset
- ldr r10, [r0, #vp8_writer_buffer]
- lsr r7, r2, r4 ; lowvalue >> (24-offset)
- ldr r4, [r0, #vp8_writer_pos] ; w->pos
- lsl r2, r2, r6 ; lowvalue <<= offset
- mov r6, r3 ; shift = count
- add r11, r4, #1 ; w->pos++
- bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
- str r11, [r0, #vp8_writer_pos]
- sub r3, r3, #8 ; count -= 8
-
- VALIDATE_POS r10, r11 ; validate_buffer at pos
-
- strb r7, [r10, r4] ; w->buffer[w->pos++]
-
- ; r10 is used earlier in the loop, but r10 is used as
- ; temp variable here. So after r10 is used, reload
- ; vp8_coef_tree_dcd into r10
- ldr r10, [sp, #60] ; vp8_coef_tree
-
-token_count_lt_zero
- lsl r2, r2, r6 ; lowvalue <<= shift
-
- subs r8, r8, #1 ; --n
- bne token_loop
-
- ldrb r6, [r1, #tokenextra_token] ; t
- ldr r7, [sp, #56] ; vp8_extra_bits
- ; Add t * sizeof (vp8_extra_bit_struct) to get the desired
- ; element. Here vp8_extra_bit_struct == 16
- add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
-
- ldr r4, [r12, #vp8_extra_bit_struct_base_val]
- cmp r4, #0
- beq skip_extra_bits
-
-; if( b->base_val)
- ldr r8, [r12, #vp8_extra_bit_struct_len] ; L
- ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
- cmp r8, #0 ; if( L)
- beq no_extra_bits
-
- ldr r9, [r12, #vp8_extra_bit_struct_prob]
- asr r7, lr, #1 ; v=e>>1
-
- ldr r10, [r12, #vp8_extra_bit_struct_tree]
- str r10, [sp, #4] ; b->tree
-
- rsb r4, r8, #32
- lsl r12, r7, r4
-
- mov lr, #0 ; i = 0
-
-extra_bits_loop
- ldrb r4, [r9, lr, asr #1] ; pp[i>>1]
- sub r7, r5, #1 ; range-1
- lsls r12, r12, #1 ; v >> n
- mul r6, r4, r7 ; (range-1) * pp[i>>1]
- addcs lr, lr, #1 ; i + bb
-
- mov r7, #1
- ldrsb lr, [r10, lr] ; i = b->tree[i+bb]
- add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8)
-
- addcs r2, r2, r4 ; if (bb) lowvalue += split
- subcs r4, r5, r4 ; if (bb) range = range-split
-
- clz r6, r4
- sub r6, r6, #24
-
- adds r3, r3, r6 ; count += shift
- lsl r5, r4, r6 ; range <<= shift
- bmi extra_count_lt_zero ; if(count >= 0)
-
- sub r6, r6, r3 ; offset= shift - count
- sub r4, r6, #1 ; offset-1
- lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
- bpl extra_high_bit_not_set
-
- ldr r4, [r0, #vp8_writer_pos] ; x
- sub r4, r4, #1 ; x = w->pos - 1
- b extra_zero_while_start
-extra_zero_while_loop
- mov r10, #0
- strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
- sub r4, r4, #1 ; x--
-extra_zero_while_start
- cmp r4, #0
- ldrge r7, [r0, #vp8_writer_buffer]
- ldrb r11, [r7, r4]
- cmpge r11, #0xff
- beq extra_zero_while_loop
-
- ldr r7, [r0, #vp8_writer_buffer]
- ldrb r10, [r7, r4]
- add r10, r10, #1
- strb r10, [r7, r4]
-extra_high_bit_not_set
- rsb r4, r6, #24 ; 24-offset
- ldr r10, [r0, #vp8_writer_buffer]
- lsr r7, r2, r4 ; lowvalue >> (24-offset)
- ldr r4, [r0, #vp8_writer_pos]
- lsl r2, r2, r6 ; lowvalue <<= offset
- mov r6, r3 ; shift = count
- add r11, r4, #1 ; w->pos++
- bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
- str r11, [r0, #vp8_writer_pos]
- sub r3, r3, #8 ; count -= 8
-
- VALIDATE_POS r10, r11 ; validate_buffer at pos
-
- strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
- ldr r10, [sp, #4] ; b->tree
-extra_count_lt_zero
- lsl r2, r2, r6
-
- subs r8, r8, #1 ; --n
- bne extra_bits_loop ; while (n)
-
-no_extra_bits
- ldr lr, [r1, #4] ; e = p->Extra
- add r4, r5, #1 ; range + 1
- tst lr, #1
- lsr r4, r4, #1 ; split = (range + 1) >> 1
- addne r2, r2, r4 ; lowvalue += split
- subne r4, r5, r4 ; range = range-split
- tst r2, #0x80000000 ; lowvalue & 0x80000000
- lsl r5, r4, #1 ; range <<= 1
- beq end_high_bit_not_set
-
- ldr r4, [r0, #vp8_writer_pos]
- mov r7, #0
- sub r4, r4, #1
- b end_zero_while_start
-end_zero_while_loop
- strb r7, [r6, r4]
- sub r4, r4, #1 ; x--
-end_zero_while_start
- cmp r4, #0
- ldrge r6, [r0, #vp8_writer_buffer]
- ldrb r12, [r6, r4]
- cmpge r12, #0xff
- beq end_zero_while_loop
-
- ldr r6, [r0, #vp8_writer_buffer]
- ldrb r7, [r6, r4]
- add r7, r7, #1
- strb r7, [r6, r4]
-end_high_bit_not_set
- adds r3, r3, #1 ; ++count
- lsl r2, r2, #1 ; lowvalue <<= 1
- bne end_count_zero
-
- ldr r4, [r0, #vp8_writer_pos]
- mvn r3, #7
- ldr r7, [r0, #vp8_writer_buffer]
- lsr r6, r2, #24 ; lowvalue >> 24
- add r12, r4, #1 ; w->pos++
- bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
- str r12, [r0, #vp8_writer_pos]
-
- VALIDATE_POS r7, r12 ; validate_buffer at pos
-
- strb r6, [r7, r4]
-end_count_zero
-skip_extra_bits
- add r1, r1, #TOKENEXTRA_SZ ; ++p
-check_p_lt_stop
- ldr r4, [sp, #0] ; stop
- cmp r1, r4 ; while( p < stop)
- bcc while_p_lt_stop
-
- str r2, [r0, #vp8_writer_lowvalue]
- str r5, [r0, #vp8_writer_range]
- str r3, [r0, #vp8_writer_count]
- add sp, sp, #16
- pop {r4-r12, pc}
- ENDP
-
- END
diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
deleted file mode 100644
index 3a8d17a..0000000
--- a/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
+++ /dev/null
@@ -1,352 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8cx_pack_mb_row_tokens_armv5|
- IMPORT |vp8_validate_buffer_arm|
-
- INCLUDE vp8_asm_enc_offsets.asm
-
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA |.text|, CODE, READONLY
-
-
- ; macro for validating write buffer position
- ; needs vp8_writer in r0
- ; start shall not be in r1
- MACRO
- VALIDATE_POS $start, $pos
- push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
- ldr r2, [r0, #vp8_writer_buffer_end]
- ldr r3, [r0, #vp8_writer_error]
- mov r1, $pos
- mov r0, $start
- bl vp8_validate_buffer_arm
- pop {r0-r3, r12, lr}
- MEND
-
-; r0 VP8_COMP *cpi
-; r1 vp8_writer *w
-; r2 vp8_coef_encodings
-; r3 vp8_extra_bits
-; s0 vp8_coef_tree
-
-|vp8cx_pack_mb_row_tokens_armv5| PROC
- push {r4-r12, lr}
- sub sp, sp, #24
-
- ; Compute address of cpi->common.mb_rows
- ldr r4, _VP8_COMP_common_
- ldr r6, _VP8_COMMON_MBrows_
- add r4, r0, r4
-
- ldr r5, [r4, r6] ; load up mb_rows
-
- str r2, [sp, #20] ; save vp8_coef_encodings
- str r5, [sp, #12] ; save mb_rows
- str r3, [sp, #8] ; save vp8_extra_bits
-
- ldr r4, _VP8_COMP_tplist_
- add r4, r0, r4
- ldr r7, [r4, #0] ; dereference cpi->tp_list
-
- mov r0, r1 ; keep same as other loops
-
- ldr r2, [r0, #vp8_writer_lowvalue]
- ldr r5, [r0, #vp8_writer_range]
- ldr r3, [r0, #vp8_writer_count]
-
-mb_row_loop
-
- ldr r1, [r7, #tokenlist_start]
- ldr r9, [r7, #tokenlist_stop]
- str r9, [sp, #0] ; save stop for later comparison
- str r7, [sp, #16] ; tokenlist address for next time
-
- b check_p_lt_stop
-
- ; actuall work gets done here!
-
-while_p_lt_stop
- ldrb r6, [r1, #tokenextra_token] ; t
- ldr r4, [sp, #20] ; vp8_coef_encodings
- mov lr, #0
- add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
- ldr r9, [r1, #tokenextra_context_tree] ; pp
-
- ldrb r7, [r1, #tokenextra_skip_eob_node]
-
- ldr r6, [r4, #vp8_token_value] ; v
- ldr r8, [r4, #vp8_token_len] ; n
-
- ; vp8 specific skip_eob_node
- cmp r7, #0
- movne lr, #2 ; i = 2
- subne r8, r8, #1 ; --n
-
- rsb r4, r8, #32 ; 32-n
- ldr r10, [sp, #64] ; vp8_coef_tree
-
- ; v is kept in r12 during the token pack loop
- lsl r12, r6, r4 ; r12 = v << 32 - n
-
-; loop start
-token_loop
- ldrb r4, [r9, lr, asr #1] ; pp [i>>1]
- sub r7, r5, #1 ; range-1
-
- ; Decisions are made based on the bit value shifted
- ; off of v, so set a flag here based on this.
- ; This value is refered to as "bb"
- lsls r12, r12, #1 ; bb = v >> n
- mul r6, r4, r7 ; ((range-1) * pp[i>>1]))
-
- ; bb can only be 0 or 1. So only execute this statement
- ; if bb == 1, otherwise it will act like i + 0
- addcs lr, lr, #1 ; i + bb
-
- mov r7, #1
- ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb]
- add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8)
-
- addcs r2, r2, r4 ; if (bb) lowvalue += split
- subcs r4, r5, r4 ; if (bb) range = range-split
-
- ; Counting the leading zeros is used to normalize range.
- clz r6, r4
- sub r6, r6, #24 ; shift
-
- ; Flag is set on the sum of count. This flag is used later
- ; to determine if count >= 0
- adds r3, r3, r6 ; count += shift
- lsl r5, r4, r6 ; range <<= shift
- bmi token_count_lt_zero ; if(count >= 0)
-
- sub r6, r6, r3 ; offset = shift - count
- sub r4, r6, #1 ; offset-1
- lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
- bpl token_high_bit_not_set
-
- ldr r4, [r0, #vp8_writer_pos] ; x
- sub r4, r4, #1 ; x = w->pos-1
- b token_zero_while_start
-token_zero_while_loop
- mov r10, #0
- strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
- sub r4, r4, #1 ; x--
-token_zero_while_start
- cmp r4, #0
- ldrge r7, [r0, #vp8_writer_buffer]
- ldrb r11, [r7, r4]
- cmpge r11, #0xff
- beq token_zero_while_loop
-
- ldr r7, [r0, #vp8_writer_buffer]
- ldrb r10, [r7, r4] ; w->buffer[x]
- add r10, r10, #1
- strb r10, [r7, r4] ; w->buffer[x] + 1
-token_high_bit_not_set
- rsb r4, r6, #24 ; 24-offset
- ldr r10, [r0, #vp8_writer_buffer]
- lsr r7, r2, r4 ; lowvalue >> (24-offset)
- ldr r4, [r0, #vp8_writer_pos] ; w->pos
- lsl r2, r2, r6 ; lowvalue <<= offset
- mov r6, r3 ; shift = count
- add r11, r4, #1 ; w->pos++
- bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
- str r11, [r0, #vp8_writer_pos]
- sub r3, r3, #8 ; count -= 8
-
- VALIDATE_POS r10, r11 ; validate_buffer at pos
-
- strb r7, [r10, r4] ; w->buffer[w->pos++]
-
- ; r10 is used earlier in the loop, but r10 is used as
- ; temp variable here. So after r10 is used, reload
- ; vp8_coef_tree_dcd into r10
- ldr r10, [sp, #64] ; vp8_coef_tree
-
-token_count_lt_zero
- lsl r2, r2, r6 ; lowvalue <<= shift
-
- subs r8, r8, #1 ; --n
- bne token_loop
-
- ldrb r6, [r1, #tokenextra_token] ; t
- ldr r7, [sp, #8] ; vp8_extra_bits
- ; Add t * sizeof (vp8_extra_bit_struct) to get the desired
- ; element. Here vp8_extra_bit_struct == 16
- add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
-
- ldr r4, [r12, #vp8_extra_bit_struct_base_val]
- cmp r4, #0
- beq skip_extra_bits
-
-; if( b->base_val)
- ldr r8, [r12, #vp8_extra_bit_struct_len] ; L
- ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
- cmp r8, #0 ; if( L)
- beq no_extra_bits
-
- ldr r9, [r12, #vp8_extra_bit_struct_prob]
- asr r7, lr, #1 ; v=e>>1
-
- ldr r10, [r12, #vp8_extra_bit_struct_tree]
- str r10, [sp, #4] ; b->tree
-
- rsb r4, r8, #32
- lsl r12, r7, r4
-
- mov lr, #0 ; i = 0
-
-extra_bits_loop
- ldrb r4, [r9, lr, asr #1] ; pp[i>>1]
- sub r7, r5, #1 ; range-1
- lsls r12, r12, #1 ; v >> n
- mul r6, r4, r7 ; (range-1) * pp[i>>1]
- addcs lr, lr, #1 ; i + bb
-
- mov r7, #1
- ldrsb lr, [r10, lr] ; i = b->tree[i+bb]
- add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8)
-
- addcs r2, r2, r4 ; if (bb) lowvalue += split
- subcs r4, r5, r4 ; if (bb) range = range-split
-
- clz r6, r4
- sub r6, r6, #24
-
- adds r3, r3, r6 ; count += shift
- lsl r5, r4, r6 ; range <<= shift
- bmi extra_count_lt_zero ; if(count >= 0)
-
- sub r6, r6, r3 ; offset= shift - count
- sub r4, r6, #1 ; offset-1
- lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
- bpl extra_high_bit_not_set
-
- ldr r4, [r0, #vp8_writer_pos] ; x
- sub r4, r4, #1 ; x = w->pos - 1
- b extra_zero_while_start
-extra_zero_while_loop
- mov r10, #0
- strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
- sub r4, r4, #1 ; x--
-extra_zero_while_start
- cmp r4, #0
- ldrge r7, [r0, #vp8_writer_buffer]
- ldrb r11, [r7, r4]
- cmpge r11, #0xff
- beq extra_zero_while_loop
-
- ldr r7, [r0, #vp8_writer_buffer]
- ldrb r10, [r7, r4]
- add r10, r10, #1
- strb r10, [r7, r4]
-extra_high_bit_not_set
- rsb r4, r6, #24 ; 24-offset
- ldr r10, [r0, #vp8_writer_buffer]
- lsr r7, r2, r4 ; lowvalue >> (24-offset)
- ldr r4, [r0, #vp8_writer_pos]
- lsl r2, r2, r6 ; lowvalue <<= offset
- mov r6, r3 ; shift = count
- add r11, r4, #1 ; w->pos++
- bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
- str r11, [r0, #vp8_writer_pos]
- sub r3, r3, #8 ; count -= 8
-
- VALIDATE_POS r10, r11 ; validate_buffer at pos
-
- strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
- ldr r10, [sp, #4] ; b->tree
-extra_count_lt_zero
- lsl r2, r2, r6
-
- subs r8, r8, #1 ; --n
- bne extra_bits_loop ; while (n)
-
-no_extra_bits
- ldr lr, [r1, #4] ; e = p->Extra
- add r4, r5, #1 ; range + 1
- tst lr, #1
- lsr r4, r4, #1 ; split = (range + 1) >> 1
- addne r2, r2, r4 ; lowvalue += split
- subne r4, r5, r4 ; range = range-split
- tst r2, #0x80000000 ; lowvalue & 0x80000000
- lsl r5, r4, #1 ; range <<= 1
- beq end_high_bit_not_set
-
- ldr r4, [r0, #vp8_writer_pos]
- mov r7, #0
- sub r4, r4, #1
- b end_zero_while_start
-end_zero_while_loop
- strb r7, [r6, r4]
- sub r4, r4, #1 ; x--
-end_zero_while_start
- cmp r4, #0
- ldrge r6, [r0, #vp8_writer_buffer]
- ldrb r12, [r6, r4]
- cmpge r12, #0xff
- beq end_zero_while_loop
-
- ldr r6, [r0, #vp8_writer_buffer]
- ldrb r7, [r6, r4]
- add r7, r7, #1
- strb r7, [r6, r4]
-end_high_bit_not_set
- adds r3, r3, #1 ; ++count
- lsl r2, r2, #1 ; lowvalue <<= 1
- bne end_count_zero
-
- ldr r4, [r0, #vp8_writer_pos]
- mvn r3, #7
- ldr r7, [r0, #vp8_writer_buffer]
- lsr r6, r2, #24 ; lowvalue >> 24
- add r12, r4, #1 ; w->pos++
- bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
- str r12, [r0, #vp8_writer_pos]
-
- VALIDATE_POS r7, r12 ; validate_buffer at pos
-
- strb r6, [r7, r4]
-end_count_zero
-skip_extra_bits
- add r1, r1, #TOKENEXTRA_SZ ; ++p
-check_p_lt_stop
- ldr r4, [sp, #0] ; stop
- cmp r1, r4 ; while( p < stop)
- bcc while_p_lt_stop
-
- ldr r6, [sp, #12] ; mb_rows
- ldr r7, [sp, #16] ; tokenlist address
- subs r6, r6, #1
- add r7, r7, #TOKENLIST_SZ ; next element in the array
- str r6, [sp, #12]
- bne mb_row_loop
-
- str r2, [r0, #vp8_writer_lowvalue]
- str r5, [r0, #vp8_writer_range]
- str r3, [r0, #vp8_writer_count]
- add sp, sp, #24
- pop {r4-r12, pc}
- ENDP
-
-_VP8_COMP_common_
- DCD vp8_comp_common
-_VP8_COMMON_MBrows_
- DCD vp8_common_mb_rows
-_VP8_COMP_tplist_
- DCD vp8_comp_tplist
-
- END
diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
deleted file mode 100644
index e9aa495..0000000
--- a/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
+++ /dev/null
@@ -1,471 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8cx_pack_tokens_into_partitions_armv5|
- IMPORT |vp8_validate_buffer_arm|
-
- INCLUDE vp8_asm_enc_offsets.asm
-
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA |.text|, CODE, READONLY
-
- ; macro for validating write buffer position
- ; needs vp8_writer in r0
- ; start shall not be in r1
- MACRO
- VALIDATE_POS $start, $pos
- push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
- ldr r2, [r0, #vp8_writer_buffer_end]
- ldr r3, [r0, #vp8_writer_error]
- mov r1, $pos
- mov r0, $start
- bl vp8_validate_buffer_arm
- pop {r0-r3, r12, lr}
- MEND
-
-; r0 VP8_COMP *cpi
-; r1 unsigned char *cx_data
-; r2 const unsigned char *cx_data_end
-; r3 int num_part
-; s0 vp8_coef_encodings
-; s1 vp8_extra_bits,
-; s2 const vp8_tree_index *
-
-|vp8cx_pack_tokens_into_partitions_armv5| PROC
- push {r4-r12, lr}
- sub sp, sp, #40
-
- ; Compute address of cpi->common.mb_rows
- ldr r4, _VP8_COMP_common_
- ldr r6, _VP8_COMMON_MBrows_
- add r4, r0, r4
-
- ldr r5, [r4, r6] ; load up mb_rows
-
- str r5, [sp, #36] ; save mb_rows
- str r1, [sp, #24] ; save ptr = cx_data
- str r3, [sp, #20] ; save num_part
- str r2, [sp, #8] ; save cx_data_end
-
- ldr r4, _VP8_COMP_tplist_
- add r4, r0, r4
- ldr r7, [r4, #0] ; dereference cpi->tp_list
- str r7, [sp, #32] ; store start of cpi->tp_list
-
- ldr r11, _VP8_COMP_bc_ ; load up vp8_writer out of cpi
- add r0, r0, r11
-
- mov r11, #0
- str r11, [sp, #28] ; i
-
-numparts_loop
- ldr r2, _vp8_writer_sz_ ; load up sizeof(vp8_writer)
- add r0, r2 ; bc[i + 1]
-
- ldr r10, [sp, #24] ; ptr
- ldr r5, [sp, #36] ; move mb_rows to the counting section
- subs r5, r5, r11 ; move start point with each partition
- ; mb_rows starts at i
- str r5, [sp, #12]
-
- ; Reset all of the VP8 Writer data for each partition that
- ; is processed.
- ; start_encode
-
- ldr r3, [sp, #8]
- str r3, [r0, #vp8_writer_buffer_end]
-
- mov r2, #0 ; vp8_writer_lowvalue
- mov r5, #255 ; vp8_writer_range
- mvn r3, #23 ; vp8_writer_count
-
- str r2, [r0, #vp8_writer_pos]
- str r10, [r0, #vp8_writer_buffer]
-
- ble end_partition ; if (mb_rows <= 0) end partition
-
-mb_row_loop
-
- ldr r1, [r7, #tokenlist_start]
- ldr r9, [r7, #tokenlist_stop]
- str r9, [sp, #0] ; save stop for later comparison
- str r7, [sp, #16] ; tokenlist address for next time
-
- b check_p_lt_stop
-
- ; actual work gets done here!
-
-while_p_lt_stop
- ldrb r6, [r1, #tokenextra_token] ; t
- ldr r4, [sp, #80] ; vp8_coef_encodings
- mov lr, #0
- add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
- ldr r9, [r1, #tokenextra_context_tree] ; pp
-
- ldrb r7, [r1, #tokenextra_skip_eob_node]
-
- ldr r6, [r4, #vp8_token_value] ; v
- ldr r8, [r4, #vp8_token_len] ; n
-
- ; vp8 specific skip_eob_node
- cmp r7, #0
- movne lr, #2 ; i = 2
- subne r8, r8, #1 ; --n
-
- rsb r4, r8, #32 ; 32-n
- ldr r10, [sp, #88] ; vp8_coef_tree
-
- ; v is kept in r12 during the token pack loop
- lsl r12, r6, r4 ; r12 = v << 32 - n
-
-; loop start
-token_loop
- ldrb r4, [r9, lr, asr #1] ; pp [i>>1]
- sub r7, r5, #1 ; range-1
-
- ; Decisions are made based on the bit value shifted
- ; off of v, so set a flag here based on this.
- ; This value is refered to as "bb"
- lsls r12, r12, #1 ; bb = v >> n
- mul r6, r4, r7 ; ((range-1) * pp[i>>1]))
-
- ; bb can only be 0 or 1. So only execute this statement
- ; if bb == 1, otherwise it will act like i + 0
- addcs lr, lr, #1 ; i + bb
-
- mov r7, #1
- ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb]
- add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8)
-
- addcs r2, r2, r4 ; if (bb) lowvalue += split
- subcs r4, r5, r4 ; if (bb) range = range-split
-
- ; Counting the leading zeros is used to normalize range.
- clz r6, r4
- sub r6, r6, #24 ; shift
-
- ; Flag is set on the sum of count. This flag is used later
- ; to determine if count >= 0
- adds r3, r3, r6 ; count += shift
- lsl r5, r4, r6 ; range <<= shift
- bmi token_count_lt_zero ; if(count >= 0)
-
- sub r6, r6, r3 ; offset = shift - count
- sub r4, r6, #1 ; offset-1
- lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
- bpl token_high_bit_not_set
-
- ldr r4, [r0, #vp8_writer_pos] ; x
- sub r4, r4, #1 ; x = w->pos-1
- b token_zero_while_start
-token_zero_while_loop
- mov r10, #0
- strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
- sub r4, r4, #1 ; x--
-token_zero_while_start
- cmp r4, #0
- ldrge r7, [r0, #vp8_writer_buffer]
- ldrb r11, [r7, r4]
- cmpge r11, #0xff
- beq token_zero_while_loop
-
- ldr r7, [r0, #vp8_writer_buffer]
- ldrb r10, [r7, r4] ; w->buffer[x]
- add r10, r10, #1
- strb r10, [r7, r4] ; w->buffer[x] + 1
-token_high_bit_not_set
- rsb r4, r6, #24 ; 24-offset
- ldr r10, [r0, #vp8_writer_buffer]
- lsr r7, r2, r4 ; lowvalue >> (24-offset)
- ldr r4, [r0, #vp8_writer_pos] ; w->pos
- lsl r2, r2, r6 ; lowvalue <<= offset
- mov r6, r3 ; shift = count
- add r11, r4, #1 ; w->pos++
- bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
- str r11, [r0, #vp8_writer_pos]
- sub r3, r3, #8 ; count -= 8
-
- VALIDATE_POS r10, r11 ; validate_buffer at pos
-
- strb r7, [r10, r4] ; w->buffer[w->pos++]
-
- ; r10 is used earlier in the loop, but r10 is used as
- ; temp variable here. So after r10 is used, reload
- ; vp8_coef_tree_dcd into r10
- ldr r10, [sp, #88] ; vp8_coef_tree
-
-token_count_lt_zero
- lsl r2, r2, r6 ; lowvalue <<= shift
-
- subs r8, r8, #1 ; --n
- bne token_loop
-
- ldrb r6, [r1, #tokenextra_token] ; t
- ldr r7, [sp, #84] ; vp8_extra_bits
- ; Add t * sizeof (vp8_extra_bit_struct) to get the desired
- ; element. Here vp8_extra_bit_struct == 16
- add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
-
- ldr r4, [r12, #vp8_extra_bit_struct_base_val]
- cmp r4, #0
- beq skip_extra_bits
-
-; if( b->base_val)
- ldr r8, [r12, #vp8_extra_bit_struct_len] ; L
- ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
- cmp r8, #0 ; if( L)
- beq no_extra_bits
-
- ldr r9, [r12, #vp8_extra_bit_struct_prob]
- asr r7, lr, #1 ; v=e>>1
-
- ldr r10, [r12, #vp8_extra_bit_struct_tree]
- str r10, [sp, #4] ; b->tree
-
- rsb r4, r8, #32
- lsl r12, r7, r4
-
- mov lr, #0 ; i = 0
-
-extra_bits_loop
- ldrb r4, [r9, lr, asr #1] ; pp[i>>1]
- sub r7, r5, #1 ; range-1
- lsls r12, r12, #1 ; v >> n
- mul r6, r4, r7 ; (range-1) * pp[i>>1]
- addcs lr, lr, #1 ; i + bb
-
- mov r7, #1
- ldrsb lr, [r10, lr] ; i = b->tree[i+bb]
- add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8)
-
- addcs r2, r2, r4 ; if (bb) lowvalue += split
- subcs r4, r5, r4 ; if (bb) range = range-split
-
- clz r6, r4
- sub r6, r6, #24
-
- adds r3, r3, r6 ; count += shift
- lsl r5, r4, r6 ; range <<= shift
- bmi extra_count_lt_zero ; if(count >= 0)
-
- sub r6, r6, r3 ; offset= shift - count
- sub r4, r6, #1 ; offset-1
- lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
- bpl extra_high_bit_not_set
-
- ldr r4, [r0, #vp8_writer_pos] ; x
- sub r4, r4, #1 ; x = w->pos - 1
- b extra_zero_while_start
-extra_zero_while_loop
- mov r10, #0
- strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
- sub r4, r4, #1 ; x--
-extra_zero_while_start
- cmp r4, #0
- ldrge r7, [r0, #vp8_writer_buffer]
- ldrb r11, [r7, r4]
- cmpge r11, #0xff
- beq extra_zero_while_loop
-
- ldr r7, [r0, #vp8_writer_buffer]
- ldrb r10, [r7, r4]
- add r10, r10, #1
- strb r10, [r7, r4]
-extra_high_bit_not_set
- rsb r4, r6, #24 ; 24-offset
- ldr r10, [r0, #vp8_writer_buffer]
- lsr r7, r2, r4 ; lowvalue >> (24-offset)
- ldr r4, [r0, #vp8_writer_pos]
- lsl r2, r2, r6 ; lowvalue <<= offset
- mov r6, r3 ; shift = count
- add r11, r4, #1 ; w->pos++
- bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
- str r11, [r0, #vp8_writer_pos]
- sub r3, r3, #8 ; count -= 8
-
- VALIDATE_POS r10, r11 ; validate_buffer at pos
-
- strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
- ldr r10, [sp, #4] ; b->tree
-extra_count_lt_zero
- lsl r2, r2, r6
-
- subs r8, r8, #1 ; --n
- bne extra_bits_loop ; while (n)
-
-no_extra_bits
- ldr lr, [r1, #4] ; e = p->Extra
- add r4, r5, #1 ; range + 1
- tst lr, #1
- lsr r4, r4, #1 ; split = (range + 1) >> 1
- addne r2, r2, r4 ; lowvalue += split
- subne r4, r5, r4 ; range = range-split
- tst r2, #0x80000000 ; lowvalue & 0x80000000
- lsl r5, r4, #1 ; range <<= 1
- beq end_high_bit_not_set
-
- ldr r4, [r0, #vp8_writer_pos]
- mov r7, #0
- sub r4, r4, #1
- b end_zero_while_start
-end_zero_while_loop
- strb r7, [r6, r4]
- sub r4, r4, #1 ; x--
-end_zero_while_start
- cmp r4, #0
- ldrge r6, [r0, #vp8_writer_buffer]
- ldrb r12, [r6, r4]
- cmpge r12, #0xff
- beq end_zero_while_loop
-
- ldr r6, [r0, #vp8_writer_buffer]
- ldrb r7, [r6, r4]
- add r7, r7, #1
- strb r7, [r6, r4]
-end_high_bit_not_set
- adds r3, r3, #1 ; ++count
- lsl r2, r2, #1 ; lowvalue <<= 1
- bne end_count_zero
-
- ldr r4, [r0, #vp8_writer_pos]
- mvn r3, #7 ; count = -8
- ldr r7, [r0, #vp8_writer_buffer]
- lsr r6, r2, #24 ; lowvalue >> 24
- add r12, r4, #1 ; w->pos++
- bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
- str r12, [r0, #vp8_writer_pos]
-
- VALIDATE_POS r7, r12 ; validate_buffer at pos
-
- strb r6, [r7, r4]
-end_count_zero
-skip_extra_bits
- add r1, r1, #TOKENEXTRA_SZ ; ++p
-check_p_lt_stop
- ldr r4, [sp, #0] ; stop
- cmp r1, r4 ; while( p < stop)
- bcc while_p_lt_stop
-
- ldr r10, [sp, #20] ; num_parts
- mov r1, #TOKENLIST_SZ
- mul r1, r10, r1
-
- ldr r6, [sp, #12] ; mb_rows
- ldr r7, [sp, #16] ; tokenlist address
- subs r6, r6, r10
- add r7, r7, r1 ; next element in the array
- str r6, [sp, #12]
- bgt mb_row_loop
-
-end_partition
- mov r12, #32
-
-stop_encode_loop
- sub r7, r5, #1 ; range-1
-
- mov r4, r7, lsl #7 ; ((range-1) * 128)
-
- mov r7, #1
- add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8)
-
- ; Counting the leading zeros is used to normalize range.
- clz r6, r4
- sub r6, r6, #24 ; shift
-
- ; Flag is set on the sum of count. This flag is used later
- ; to determine if count >= 0
- adds r3, r3, r6 ; count += shift
- lsl r5, r4, r6 ; range <<= shift
- bmi token_count_lt_zero_se ; if(count >= 0)
-
- sub r6, r6, r3 ; offset = shift - count
- sub r4, r6, #1 ; offset-1
- lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
- bpl token_high_bit_not_set_se
-
- ldr r4, [r0, #vp8_writer_pos] ; x
- sub r4, r4, #1 ; x = w->pos-1
- b token_zero_while_start_se
-token_zero_while_loop_se
- mov r10, #0
- strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
- sub r4, r4, #1 ; x--
-token_zero_while_start_se
- cmp r4, #0
- ldrge r7, [r0, #vp8_writer_buffer]
- ldrb r11, [r7, r4]
- cmpge r11, #0xff
- beq token_zero_while_loop_se
-
- ldr r7, [r0, #vp8_writer_buffer]
- ldrb r10, [r7, r4] ; w->buffer[x]
- add r10, r10, #1
- strb r10, [r7, r4] ; w->buffer[x] + 1
-token_high_bit_not_set_se
- rsb r4, r6, #24 ; 24-offset
- ldr r10, [r0, #vp8_writer_buffer]
- lsr r7, r2, r4 ; lowvalue >> (24-offset)
- ldr r4, [r0, #vp8_writer_pos] ; w->pos
- lsl r2, r2, r6 ; lowvalue <<= offset
- mov r6, r3 ; shift = count
- add r11, r4, #1 ; w->pos++
- bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
- str r11, [r0, #vp8_writer_pos]
- sub r3, r3, #8 ; count -= 8
-
- VALIDATE_POS r10, r11 ; validate_buffer at pos
-
- strb r7, [r10, r4] ; w->buffer[w->pos++]
-
-token_count_lt_zero_se
- lsl r2, r2, r6 ; lowvalue <<= shift
-
- subs r12, r12, #1
- bne stop_encode_loop
-
- ldr r4, [r0, #vp8_writer_pos] ; w->pos
- ldr r12, [sp, #24] ; ptr
- add r12, r12, r4 ; ptr += w->pos
- str r12, [sp, #24]
-
- ldr r11, [sp, #28] ; i
- ldr r10, [sp, #20] ; num_parts
-
- add r11, r11, #1 ; i++
- str r11, [sp, #28]
-
- ldr r7, [sp, #32] ; cpi->tp_list[i]
- mov r1, #TOKENLIST_SZ
- add r7, r7, r1 ; next element in cpi->tp_list
- str r7, [sp, #32] ; cpi->tp_list[i+1]
-
- cmp r10, r11
- bgt numparts_loop
-
- add sp, sp, #40
- pop {r4-r12, pc}
- ENDP
-
-_VP8_COMP_common_
- DCD vp8_comp_common
-_VP8_COMMON_MBrows_
- DCD vp8_common_mb_rows
-_VP8_COMP_tplist_
- DCD vp8_comp_tplist
-_VP8_COMP_bc_
- DCD vp8_comp_bc
-_vp8_writer_sz_
- DCD vp8_writer_sz
-
- END
diff --git a/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm b/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm
deleted file mode 100644
index de35a1e..0000000
--- a/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm
+++ /dev/null
@@ -1,225 +0,0 @@
-;
-; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_fast_quantize_b_armv6|
-
- INCLUDE vp8_asm_enc_offsets.asm
-
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-
-; r0 BLOCK *b
-; r1 BLOCKD *d
-|vp8_fast_quantize_b_armv6| PROC
- stmfd sp!, {r1, r4-r11, lr}
-
- ldr r3, [r0, #vp8_block_coeff] ; coeff
- ldr r4, [r0, #vp8_block_quant_fast] ; quant_fast
- ldr r5, [r0, #vp8_block_round] ; round
- ldr r6, [r1, #vp8_blockd_qcoeff] ; qcoeff
- ldr r7, [r1, #vp8_blockd_dqcoeff] ; dqcoeff
- ldr r8, [r1, #vp8_blockd_dequant] ; dequant
-
- ldr r2, loop_count ; loop_count=0x1000000. 'lsls' instruction
- ; is used to update the counter so that
- ; it can be used to mark nonzero
- ; quantized coefficient pairs.
-
- mov r1, #0 ; flags for quantized coeffs
-
- ; PART 1: quantization and dequantization loop
-loop
- ldr r9, [r3], #4 ; [z1 | z0]
- ldr r10, [r5], #4 ; [r1 | r0]
- ldr r11, [r4], #4 ; [q1 | q0]
-
- ssat16 lr, #1, r9 ; [sz1 | sz0]
- eor r9, r9, lr ; [z1 ^ sz1 | z0 ^ sz0]
- ssub16 r9, r9, lr ; x = (z ^ sz) - sz
- sadd16 r9, r9, r10 ; [x1+r1 | x0+r0]
-
- ldr r12, [r3], #4 ; [z3 | z2]
-
- smulbb r0, r9, r11 ; [(x0+r0)*q0]
- smultt r9, r9, r11 ; [(x1+r1)*q1]
-
- ldr r10, [r5], #4 ; [r3 | r2]
-
- ssat16 r11, #1, r12 ; [sz3 | sz2]
- eor r12, r12, r11 ; [z3 ^ sz3 | z2 ^ sz2]
- pkhtb r0, r9, r0, asr #16 ; [y1 | y0]
- ldr r9, [r4], #4 ; [q3 | q2]
- ssub16 r12, r12, r11 ; x = (z ^ sz) - sz
-
- sadd16 r12, r12, r10 ; [x3+r3 | x2+r2]
-
- eor r0, r0, lr ; [(y1 ^ sz1) | (y0 ^ sz0)]
-
- smulbb r10, r12, r9 ; [(x2+r2)*q2]
- smultt r12, r12, r9 ; [(x3+r3)*q3]
-
- ssub16 r0, r0, lr ; x = (y ^ sz) - sz
-
- cmp r0, #0 ; check if zero
- orrne r1, r1, r2, lsr #24 ; add flag for nonzero coeffs
-
- str r0, [r6], #4 ; *qcoeff++ = x
- ldr r9, [r8], #4 ; [dq1 | dq0]
-
- pkhtb r10, r12, r10, asr #16 ; [y3 | y2]
- eor r10, r10, r11 ; [(y3 ^ sz3) | (y2 ^ sz2)]
- ssub16 r10, r10, r11 ; x = (y ^ sz) - sz
-
- cmp r10, #0 ; check if zero
- orrne r1, r1, r2, lsr #23 ; add flag for nonzero coeffs
-
- str r10, [r6], #4 ; *qcoeff++ = x
- ldr r11, [r8], #4 ; [dq3 | dq2]
-
- smulbb r12, r0, r9 ; [x0*dq0]
- smultt r0, r0, r9 ; [x1*dq1]
-
- smulbb r9, r10, r11 ; [x2*dq2]
- smultt r10, r10, r11 ; [x3*dq3]
-
- lsls r2, r2, #2 ; update loop counter
- strh r12, [r7, #0] ; dqcoeff[0] = [x0*dq0]
- strh r0, [r7, #2] ; dqcoeff[1] = [x1*dq1]
- strh r9, [r7, #4] ; dqcoeff[2] = [x2*dq2]
- strh r10, [r7, #6] ; dqcoeff[3] = [x3*dq3]
- add r7, r7, #8 ; dqcoeff += 8
- bne loop
-
- ; PART 2: check position for eob...
- ldr r11, [sp, #0] ; restore BLOCKD pointer
- mov lr, #0 ; init eob
- cmp r1, #0 ; coeffs after quantization?
- ldr r12, [r11, #vp8_blockd_eob]
- beq end ; skip eob calculations if all zero
-
- ldr r0, [r11, #vp8_blockd_qcoeff]
-
- ; check shortcut for nonzero qcoeffs
- tst r1, #0x80
- bne quant_coeff_15_14
- tst r1, #0x20
- bne quant_coeff_13_11
- tst r1, #0x8
- bne quant_coeff_12_7
- tst r1, #0x40
- bne quant_coeff_10_9
- tst r1, #0x10
- bne quant_coeff_8_3
- tst r1, #0x2
- bne quant_coeff_6_5
- tst r1, #0x4
- bne quant_coeff_4_2
- b quant_coeff_1_0
-
-quant_coeff_15_14
- ldrh r2, [r0, #30] ; rc=15, i=15
- mov lr, #16
- cmp r2, #0
- bne end
-
- ldrh r3, [r0, #28] ; rc=14, i=14
- mov lr, #15
- cmp r3, #0
- bne end
-
-quant_coeff_13_11
- ldrh r2, [r0, #22] ; rc=11, i=13
- mov lr, #14
- cmp r2, #0
- bne end
-
-quant_coeff_12_7
- ldrh r3, [r0, #14] ; rc=7, i=12
- mov lr, #13
- cmp r3, #0
- bne end
-
- ldrh r2, [r0, #20] ; rc=10, i=11
- mov lr, #12
- cmp r2, #0
- bne end
-
-quant_coeff_10_9
- ldrh r3, [r0, #26] ; rc=13, i=10
- mov lr, #11
- cmp r3, #0
- bne end
-
- ldrh r2, [r0, #24] ; rc=12, i=9
- mov lr, #10
- cmp r2, #0
- bne end
-
-quant_coeff_8_3
- ldrh r3, [r0, #18] ; rc=9, i=8
- mov lr, #9
- cmp r3, #0
- bne end
-
- ldrh r2, [r0, #12] ; rc=6, i=7
- mov lr, #8
- cmp r2, #0
- bne end
-
-quant_coeff_6_5
- ldrh r3, [r0, #6] ; rc=3, i=6
- mov lr, #7
- cmp r3, #0
- bne end
-
- ldrh r2, [r0, #4] ; rc=2, i=5
- mov lr, #6
- cmp r2, #0
- bne end
-
-quant_coeff_4_2
- ldrh r3, [r0, #10] ; rc=5, i=4
- mov lr, #5
- cmp r3, #0
- bne end
-
- ldrh r2, [r0, #16] ; rc=8, i=3
- mov lr, #4
- cmp r2, #0
- bne end
-
- ldrh r3, [r0, #8] ; rc=4, i=2
- mov lr, #3
- cmp r3, #0
- bne end
-
-quant_coeff_1_0
- ldrh r2, [r0, #2] ; rc=1, i=1
- mov lr, #2
- cmp r2, #0
- bne end
-
- mov lr, #1 ; rc=0, i=0
-
-end
- strb lr, [r12]
- ldmfd sp!, {r1, r4-r11, pc}
-
- ENDP
-
-loop_count
- DCD 0x1000000
-
- END
-
diff --git a/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm b/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm
deleted file mode 100644
index 05746cf..0000000
--- a/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm
+++ /dev/null
@@ -1,272 +0,0 @@
-;
-; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_subtract_mby_armv6|
- EXPORT |vp8_subtract_mbuv_armv6|
- EXPORT |vp8_subtract_b_armv6|
-
- INCLUDE vp8_asm_enc_offsets.asm
-
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-
-; r0 BLOCK *be
-; r1 BLOCKD *bd
-; r2 int pitch
-|vp8_subtract_b_armv6| PROC
-
- stmfd sp!, {r4-r9}
-
- ldr r4, [r0, #vp8_block_base_src]
- ldr r5, [r0, #vp8_block_src]
- ldr r6, [r0, #vp8_block_src_diff]
-
- ldr r3, [r4]
- ldr r7, [r0, #vp8_block_src_stride]
- add r3, r3, r5 ; src = *base_src + src
- ldr r8, [r1, #vp8_blockd_predictor]
-
- mov r9, #4 ; loop count
-
-loop_block
-
- ldr r0, [r3], r7 ; src
- ldr r1, [r8], r2 ; pred
-
- uxtb16 r4, r0 ; [s2 | s0]
- uxtb16 r5, r1 ; [p2 | p0]
- uxtb16 r0, r0, ror #8 ; [s3 | s1]
- uxtb16 r1, r1, ror #8 ; [p3 | p1]
-
- usub16 r4, r4, r5 ; [d2 | d0]
- usub16 r5, r0, r1 ; [d3 | d1]
-
- subs r9, r9, #1 ; decrement loop counter
-
- pkhbt r0, r4, r5, lsl #16 ; [d1 | d0]
- pkhtb r1, r5, r4, asr #16 ; [d3 | d2]
-
- str r0, [r6, #0] ; diff
- str r1, [r6, #4] ; diff
-
- add r6, r6, r2, lsl #1 ; update diff pointer
- bne loop_block
-
- ldmfd sp!, {r4-r9}
- mov pc, lr
-
- ENDP
-
-
-; r0 short *diff
-; r1 unsigned char *usrc
-; r2 unsigned char *vsrc
-; r3 int src_stride
-; sp unsigned char *upred
-; sp unsigned char *vpred
-; sp int pred_stride
-|vp8_subtract_mbuv_armv6| PROC
-
- stmfd sp!, {r4-r11}
-
- add r0, r0, #512 ; set *diff point to Cb
- mov r4, #8 ; loop count
- ldr r5, [sp, #32] ; upred
- ldr r12, [sp, #40] ; pred_stride
-
- ; Subtract U block
-loop_u
- ldr r6, [r1] ; usrc (A)
- ldr r7, [r5] ; upred (A)
-
- uxtb16 r8, r6 ; [s2 | s0] (A)
- uxtb16 r9, r7 ; [p2 | p0] (A)
- uxtb16 r10, r6, ror #8 ; [s3 | s1] (A)
- uxtb16 r11, r7, ror #8 ; [p3 | p1] (A)
-
- usub16 r6, r8, r9 ; [d2 | d0] (A)
- usub16 r7, r10, r11 ; [d3 | d1] (A)
-
- ldr r10, [r1, #4] ; usrc (B)
- ldr r11, [r5, #4] ; upred (B)
-
- pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
- pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
-
- str r8, [r0], #4 ; diff (A)
- uxtb16 r8, r10 ; [s2 | s0] (B)
- str r9, [r0], #4 ; diff (A)
-
- uxtb16 r9, r11 ; [p2 | p0] (B)
- uxtb16 r10, r10, ror #8 ; [s3 | s1] (B)
- uxtb16 r11, r11, ror #8 ; [p3 | p1] (B)
-
- usub16 r6, r8, r9 ; [d2 | d0] (B)
- usub16 r7, r10, r11 ; [d3 | d1] (B)
-
- add r1, r1, r3 ; update usrc pointer
- add r5, r5, r12 ; update upred pointer
-
- pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
- pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
-
- str r8, [r0], #4 ; diff (B)
- subs r4, r4, #1 ; update loop counter
- str r9, [r0], #4 ; diff (B)
-
- bne loop_u
-
- ldr r5, [sp, #36] ; vpred
- mov r4, #8 ; loop count
-
- ; Subtract V block
-loop_v
- ldr r6, [r2] ; vsrc (A)
- ldr r7, [r5] ; vpred (A)
-
- uxtb16 r8, r6 ; [s2 | s0] (A)
- uxtb16 r9, r7 ; [p2 | p0] (A)
- uxtb16 r10, r6, ror #8 ; [s3 | s1] (A)
- uxtb16 r11, r7, ror #8 ; [p3 | p1] (A)
-
- usub16 r6, r8, r9 ; [d2 | d0] (A)
- usub16 r7, r10, r11 ; [d3 | d1] (A)
-
- ldr r10, [r2, #4] ; vsrc (B)
- ldr r11, [r5, #4] ; vpred (B)
-
- pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
- pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
-
- str r8, [r0], #4 ; diff (A)
- uxtb16 r8, r10 ; [s2 | s0] (B)
- str r9, [r0], #4 ; diff (A)
-
- uxtb16 r9, r11 ; [p2 | p0] (B)
- uxtb16 r10, r10, ror #8 ; [s3 | s1] (B)
- uxtb16 r11, r11, ror #8 ; [p3 | p1] (B)
-
- usub16 r6, r8, r9 ; [d2 | d0] (B)
- usub16 r7, r10, r11 ; [d3 | d1] (B)
-
- add r2, r2, r3 ; update vsrc pointer
- add r5, r5, r12 ; update vpred pointer
-
- pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
- pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
-
- str r8, [r0], #4 ; diff (B)
- subs r4, r4, #1 ; update loop counter
- str r9, [r0], #4 ; diff (B)
-
- bne loop_v
-
- ldmfd sp!, {r4-r11}
- bx lr
-
- ENDP
-
-
-; r0 short *diff
-; r1 unsigned char *src
-; r2 int src_stride
-; r3 unsigned char *pred
-; sp int pred_stride
-|vp8_subtract_mby_armv6| PROC
-
- stmfd sp!, {r4-r11}
- ldr r12, [sp, #32] ; pred_stride
- mov r4, #16
-loop
- ldr r6, [r1] ; src (A)
- ldr r7, [r3] ; pred (A)
-
- uxtb16 r8, r6 ; [s2 | s0] (A)
- uxtb16 r9, r7 ; [p2 | p0] (A)
- uxtb16 r10, r6, ror #8 ; [s3 | s1] (A)
- uxtb16 r11, r7, ror #8 ; [p3 | p1] (A)
-
- usub16 r6, r8, r9 ; [d2 | d0] (A)
- usub16 r7, r10, r11 ; [d3 | d1] (A)
-
- ldr r10, [r1, #4] ; src (B)
- ldr r11, [r3, #4] ; pred (B)
-
- pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
- pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
-
- str r8, [r0], #4 ; diff (A)
- uxtb16 r8, r10 ; [s2 | s0] (B)
- str r9, [r0], #4 ; diff (A)
-
- uxtb16 r9, r11 ; [p2 | p0] (B)
- uxtb16 r10, r10, ror #8 ; [s3 | s1] (B)
- uxtb16 r11, r11, ror #8 ; [p3 | p1] (B)
-
- usub16 r6, r8, r9 ; [d2 | d0] (B)
- usub16 r7, r10, r11 ; [d3 | d1] (B)
-
- ldr r10, [r1, #8] ; src (C)
- ldr r11, [r3, #8] ; pred (C)
-
- pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
- pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
-
- str r8, [r0], #4 ; diff (B)
- uxtb16 r8, r10 ; [s2 | s0] (C)
- str r9, [r0], #4 ; diff (B)
-
- uxtb16 r9, r11 ; [p2 | p0] (C)
- uxtb16 r10, r10, ror #8 ; [s3 | s1] (C)
- uxtb16 r11, r11, ror #8 ; [p3 | p1] (C)
-
- usub16 r6, r8, r9 ; [d2 | d0] (C)
- usub16 r7, r10, r11 ; [d3 | d1] (C)
-
- ldr r10, [r1, #12] ; src (D)
- ldr r11, [r3, #12] ; pred (D)
-
- pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (C)
- pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (C)
-
- str r8, [r0], #4 ; diff (C)
- uxtb16 r8, r10 ; [s2 | s0] (D)
- str r9, [r0], #4 ; diff (C)
-
- uxtb16 r9, r11 ; [p2 | p0] (D)
- uxtb16 r10, r10, ror #8 ; [s3 | s1] (D)
- uxtb16 r11, r11, ror #8 ; [p3 | p1] (D)
-
- usub16 r6, r8, r9 ; [d2 | d0] (D)
- usub16 r7, r10, r11 ; [d3 | d1] (D)
-
- add r1, r1, r2 ; update src pointer
- add r3, r3, r12 ; update pred pointer
-
- pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (D)
- pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (D)
-
- str r8, [r0], #4 ; diff (D)
- subs r4, r4, #1 ; update loop counter
- str r9, [r0], #4 ; diff (D)
-
- bne loop
-
- ldmfd sp!, {r4-r11}
- bx lr
-
- ENDP
-
- END
-
diff --git a/vp8/encoder/arm/boolhuff_arm.c b/vp8/encoder/arm/boolhuff_arm.c
deleted file mode 100644
index 17a941b..0000000
--- a/vp8/encoder/arm/boolhuff_arm.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vp8/encoder/boolhuff.h"
-#include "vpx/internal/vpx_codec_internal.h"
-
-const unsigned int vp8_prob_cost[256] =
-{
- 2047, 2047, 1791, 1641, 1535, 1452, 1385, 1328, 1279, 1235, 1196, 1161, 1129, 1099, 1072, 1046,
- 1023, 1000, 979, 959, 940, 922, 905, 889, 873, 858, 843, 829, 816, 803, 790, 778,
- 767, 755, 744, 733, 723, 713, 703, 693, 684, 675, 666, 657, 649, 641, 633, 625,
- 617, 609, 602, 594, 587, 580, 573, 567, 560, 553, 547, 541, 534, 528, 522, 516,
- 511, 505, 499, 494, 488, 483, 477, 472, 467, 462, 457, 452, 447, 442, 437, 433,
- 428, 424, 419, 415, 410, 406, 401, 397, 393, 389, 385, 381, 377, 373, 369, 365,
- 361, 357, 353, 349, 346, 342, 338, 335, 331, 328, 324, 321, 317, 314, 311, 307,
- 304, 301, 297, 294, 291, 288, 285, 281, 278, 275, 272, 269, 266, 263, 260, 257,
- 255, 252, 249, 246, 243, 240, 238, 235, 232, 229, 227, 224, 221, 219, 216, 214,
- 211, 208, 206, 203, 201, 198, 196, 194, 191, 189, 186, 184, 181, 179, 177, 174,
- 172, 170, 168, 165, 163, 161, 159, 156, 154, 152, 150, 148, 145, 143, 141, 139,
- 137, 135, 133, 131, 129, 127, 125, 123, 121, 119, 117, 115, 113, 111, 109, 107,
- 105, 103, 101, 99, 97, 95, 93, 92, 90, 88, 86, 84, 82, 81, 79, 77,
- 75, 73, 72, 70, 68, 66, 65, 63, 61, 60, 58, 56, 55, 53, 51, 50,
- 48, 46, 45, 43, 41, 40, 38, 37, 35, 33, 32, 30, 29, 27, 25, 24,
- 22, 21, 19, 18, 16, 15, 13, 12, 10, 9, 7, 6, 4, 3, 1, 1
-};
-
-int vp8_validate_buffer_arm(const unsigned char *start,
- size_t len,
- const unsigned char *end,
- struct vpx_internal_error_info *error)
-{
- return validate_buffer(start, len, end, error);
-}
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index 9d0e69c..f9096f9 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -159,7 +159,7 @@
);
}
-void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount)
+void vp8_pack_tokens(vp8_writer *w, const TOKENEXTRA *p, int xcount)
{
const TOKENEXTRA *stop = p + xcount;
unsigned int split;
@@ -374,7 +374,7 @@
}
-static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data,
+static void pack_tokens_into_partitions(VP8_COMP *cpi, unsigned char *cx_data,
unsigned char * cx_data_end,
int num_part)
{
@@ -398,7 +398,7 @@
const TOKENEXTRA *stop = cpi->tplist[mb_row].stop;
int tokens = (int)(stop - p);
- vp8_pack_tokens_c(w, p, tokens);
+ vp8_pack_tokens(w, p, tokens);
}
vp8_stop_encode(w);
@@ -407,7 +407,7 @@
}
-static void pack_mb_row_tokens_c(VP8_COMP *cpi, vp8_writer *w)
+static void pack_mb_row_tokens(VP8_COMP *cpi, vp8_writer *w)
{
int mb_row;
@@ -417,7 +417,7 @@
const TOKENEXTRA *stop = cpi->tplist[mb_row].stop;
int tokens = (int)(stop - p);
- vp8_pack_tokens_c(w, p, tokens);
+ vp8_pack_tokens(w, p, tokens);
}
}
@@ -1676,7 +1676,7 @@
pack_mb_row_tokens(cpi, &cpi->bc[1]);
else
#endif
- pack_tokens(&cpi->bc[1], cpi->tok, cpi->tok_count);
+ vp8_pack_tokens(&cpi->bc[1], cpi->tok, cpi->tok_count);
vp8_stop_encode(&cpi->bc[1]);
diff --git a/vp8/encoder/bitstream.h b/vp8/encoder/bitstream.h
index 66f4bf6..de69805 100644
--- a/vp8/encoder/bitstream.h
+++ b/vp8/encoder/bitstream.h
@@ -16,36 +16,7 @@
extern "C" {
#endif
-#if HAVE_EDSP
-void vp8cx_pack_tokens_armv5(vp8_writer *w, const TOKENEXTRA *p, int xcount,
- vp8_token *,
- const vp8_extra_bit_struct *,
- const vp8_tree_index *);
-void vp8cx_pack_tokens_into_partitions_armv5(VP8_COMP *,
- unsigned char * cx_data,
- const unsigned char *cx_data_end,
- int num_parts,
- vp8_token *,
- const vp8_extra_bit_struct *,
- const vp8_tree_index *);
-void vp8cx_pack_mb_row_tokens_armv5(VP8_COMP *cpi, vp8_writer *w,
- vp8_token *,
- const vp8_extra_bit_struct *,
- const vp8_tree_index *);
-# define pack_tokens(a,b,c) \
- vp8cx_pack_tokens_armv5(a,b,c,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
-# define pack_tokens_into_partitions(a,b,c,d) \
- vp8cx_pack_tokens_into_partitions_armv5(a,b,c,d,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
-# define pack_mb_row_tokens(a,b) \
- vp8cx_pack_mb_row_tokens_armv5(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
-#else
-
-void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount);
-
-# define pack_tokens(a,b,c) vp8_pack_tokens_c(a,b,c)
-# define pack_tokens_into_partitions(a,b,c,d) pack_tokens_into_partitions_c(a,b,c,d)
-# define pack_mb_row_tokens(a,b) pack_mb_row_tokens_c(a,b)
-#endif
+void vp8_pack_tokens(vp8_writer *w, const TOKENEXTRA *p, int xcount);
#ifdef __cplusplus
} // extern "C"
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 85813b6..2a3f69c 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -574,7 +574,7 @@
/* pack tokens for this MB */
{
int tok_count = *tp - tp_start;
- pack_tokens(w, tp_start, tok_count);
+ vp8_pack_tokens(w, tp_start, tok_count);
}
#endif
/* Increment pointer into gf usage flags structure. */
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index 7814679..3598a7a 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -261,7 +261,7 @@
/* pack tokens for this MB */
{
int tok_count = tp - tp_start;
- pack_tokens(w, tp_start, tok_count);
+ vp8_pack_tokens(w, tp_start, tok_count);
}
#else
cpi->tplist[mb_row].stop = tp;
diff --git a/vp8/encoder/vp8_asm_enc_offsets.c b/vp8/encoder/vp8_asm_enc_offsets.c
index a4169b3..ce8a670 100644
--- a/vp8/encoder/vp8_asm_enc_offsets.c
+++ b/vp8/encoder/vp8_asm_enc_offsets.c
@@ -10,84 +10,7 @@
#include "vpx_ports/asm_offsets.h"
-#include "vpx_config.h"
-#include "block.h"
-#include "vp8/common/blockd.h"
-#include "onyx_int.h"
-#include "treewriter.h"
-#include "tokenize.h"
BEGIN
-/* regular quantize */
-DEFINE(vp8_block_coeff, offsetof(BLOCK, coeff));
-DEFINE(vp8_block_zbin, offsetof(BLOCK, zbin));
-DEFINE(vp8_block_round, offsetof(BLOCK, round));
-DEFINE(vp8_block_quant, offsetof(BLOCK, quant));
-DEFINE(vp8_block_quant_fast, offsetof(BLOCK, quant_fast));
-DEFINE(vp8_block_zbin_extra, offsetof(BLOCK, zbin_extra));
-DEFINE(vp8_block_zrun_zbin_boost, offsetof(BLOCK, zrun_zbin_boost));
-DEFINE(vp8_block_quant_shift, offsetof(BLOCK, quant_shift));
-
-DEFINE(vp8_blockd_qcoeff, offsetof(BLOCKD, qcoeff));
-DEFINE(vp8_blockd_dequant, offsetof(BLOCKD, dequant));
-DEFINE(vp8_blockd_dqcoeff, offsetof(BLOCKD, dqcoeff));
-DEFINE(vp8_blockd_eob, offsetof(BLOCKD, eob));
-
-/* subtract */
-DEFINE(vp8_block_base_src, offsetof(BLOCK, base_src));
-DEFINE(vp8_block_src, offsetof(BLOCK, src));
-DEFINE(vp8_block_src_diff, offsetof(BLOCK, src_diff));
-DEFINE(vp8_block_src_stride, offsetof(BLOCK, src_stride));
-
-DEFINE(vp8_blockd_predictor, offsetof(BLOCKD, predictor));
-
-/* pack tokens */
-DEFINE(vp8_writer_lowvalue, offsetof(vp8_writer, lowvalue));
-DEFINE(vp8_writer_range, offsetof(vp8_writer, range));
-DEFINE(vp8_writer_count, offsetof(vp8_writer, count));
-DEFINE(vp8_writer_pos, offsetof(vp8_writer, pos));
-DEFINE(vp8_writer_buffer, offsetof(vp8_writer, buffer));
-DEFINE(vp8_writer_buffer_end, offsetof(vp8_writer, buffer_end));
-DEFINE(vp8_writer_error, offsetof(vp8_writer, error));
-
-DEFINE(tokenextra_token, offsetof(TOKENEXTRA, Token));
-DEFINE(tokenextra_extra, offsetof(TOKENEXTRA, Extra));
-DEFINE(tokenextra_context_tree, offsetof(TOKENEXTRA, context_tree));
-DEFINE(tokenextra_skip_eob_node, offsetof(TOKENEXTRA, skip_eob_node));
-DEFINE(TOKENEXTRA_SZ, sizeof(TOKENEXTRA));
-
-DEFINE(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct));
-
-DEFINE(vp8_token_value, offsetof(vp8_token, value));
-DEFINE(vp8_token_len, offsetof(vp8_token, Len));
-
-DEFINE(vp8_extra_bit_struct_tree, offsetof(vp8_extra_bit_struct, tree));
-DEFINE(vp8_extra_bit_struct_prob, offsetof(vp8_extra_bit_struct, prob));
-DEFINE(vp8_extra_bit_struct_len, offsetof(vp8_extra_bit_struct, Len));
-DEFINE(vp8_extra_bit_struct_base_val, offsetof(vp8_extra_bit_struct, base_val));
-
-DEFINE(vp8_comp_tplist, offsetof(VP8_COMP, tplist));
-DEFINE(vp8_comp_common, offsetof(VP8_COMP, common));
-DEFINE(vp8_comp_bc , offsetof(VP8_COMP, bc));
-DEFINE(vp8_writer_sz , sizeof(vp8_writer));
-
-DEFINE(tokenlist_start, offsetof(TOKENLIST, start));
-DEFINE(tokenlist_stop, offsetof(TOKENLIST, stop));
-DEFINE(TOKENLIST_SZ, sizeof(TOKENLIST));
-
-DEFINE(vp8_common_mb_rows, offsetof(VP8_COMMON, mb_rows));
-
END
-
-/* add asserts for any offset that is not supported by assembly code
- * add asserts for any size that is not supported by assembly code
-
- * These are used in vp8cx_pack_tokens. They are hard coded so if their sizes
- * change they will have to be adjusted.
- */
-
-#if HAVE_EDSP
-ct_assert(TOKENEXTRA_SZ, sizeof(TOKENEXTRA) == 8)
-ct_assert(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct) == 16)
-#endif
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index a0dbdcf..5e4ef05 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -75,7 +75,6 @@
VP8_CX_SRCS-yes += encoder/temporal_filter.c
VP8_CX_SRCS-$(CONFIG_MULTI_RES_ENCODING) += encoder/mr_dissim.c
VP8_CX_SRCS-$(CONFIG_MULTI_RES_ENCODING) += encoder/mr_dissim.h
-VP8_CX_SRCS-yes += encoder/vp8_asm_enc_offsets.c
ifeq ($(CONFIG_REALTIME_ONLY),yes)
VP8_CX_SRCS_REMOVE-yes += encoder/firstpass.c
@@ -107,6 +106,3 @@
endif
VP8_CX_SRCS-yes := $(filter-out $(VP8_CX_SRCS_REMOVE-yes),$(VP8_CX_SRCS-yes))
-
-$(eval $(call asm_offsets_template,\
- vp8_asm_enc_offsets.asm, $(VP8_PREFIX)encoder/vp8_asm_enc_offsets.c))
diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk
index 2c2b871..0500301 100644
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@@ -15,20 +15,9 @@
# encoder
VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/dct_arm.c
-#File list for edsp
-# encoder
-VP8_CX_SRCS-$(HAVE_EDSP) += encoder/arm/boolhuff_arm.c
-VP8_CX_SRCS_REMOVE-$(HAVE_EDSP) += encoder/boolhuff.c
-VP8_CX_SRCS-$(HAVE_EDSP) += encoder/arm/armv5te/boolhuff_armv5te$(ASM)
-VP8_CX_SRCS-$(HAVE_EDSP) += encoder/arm/armv5te/vp8_packtokens_armv5$(ASM)
-VP8_CX_SRCS-$(HAVE_EDSP) += encoder/arm/armv5te/vp8_packtokens_mbrow_armv5$(ASM)
-VP8_CX_SRCS-$(HAVE_EDSP) += encoder/arm/armv5te/vp8_packtokens_partitions_armv5$(ASM)
-
#File list for media
# encoder
-VP8_CX_SRCS-$(HAVE_MEDIA) += encoder/arm/armv6/vp8_subtract_armv6$(ASM)
VP8_CX_SRCS-$(HAVE_MEDIA) += encoder/arm/armv6/vp8_short_fdct4x4_armv6$(ASM)
-VP8_CX_SRCS-$(HAVE_MEDIA) += encoder/arm/armv6/vp8_fast_quantize_b_armv6$(ASM)
VP8_CX_SRCS-$(HAVE_MEDIA) += encoder/arm/armv6/vp8_mse16x16_armv6$(ASM)
VP8_CX_SRCS-$(HAVE_MEDIA) += encoder/arm/armv6/walsh_v6$(ASM)
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index 5b8b2a9..284d3a2 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -17,17 +17,6 @@
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_systemdependent.h"
-static void clear_mi_border(const VP9_COMMON *cm, MODE_INFO *mi) {
- int i;
-
- // Top border row
- vpx_memset(mi, 0, sizeof(*mi) * cm->mi_stride);
-
- // Left border column
- for (i = 1; i < cm->mi_rows + 1; ++i)
- vpx_memset(&mi[i * cm->mi_stride], 0, sizeof(*mi));
-}
-
void vp9_set_mb_mi(VP9_COMMON *cm, int width, int height) {
const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2);
const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2);
@@ -41,48 +30,6 @@
cm->MBs = cm->mb_rows * cm->mb_cols;
}
-static void setup_mi(VP9_COMMON *cm) {
- cm->mi = cm->mip + cm->mi_stride + 1;
- cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
-
- vpx_memset(cm->mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip));
- clear_mi_border(cm, cm->prev_mip);
-}
-
-static int alloc_mi(VP9_COMMON *cm, int mi_size) {
- int i;
-
- for (i = 0; i < 2; ++i) {
- cm->mip_array[i] =
- (MODE_INFO *)vpx_calloc(mi_size, sizeof(MODE_INFO));
- if (cm->mip_array[i] == NULL)
- return 1;
- }
-
- cm->mi_alloc_size = mi_size;
-
- // Init the index.
- cm->mi_idx = 0;
- cm->prev_mi_idx = 1;
-
- cm->mip = cm->mip_array[cm->mi_idx];
- cm->prev_mip = cm->mip_array[cm->prev_mi_idx];
-
- return 0;
-}
-
-static void free_mi(VP9_COMMON *cm) {
- int i;
-
- for (i = 0; i < 2; ++i) {
- vpx_free(cm->mip_array[i]);
- cm->mip_array[i] = NULL;
- }
-
- cm->mip = NULL;
- cm->prev_mip = NULL;
-}
-
void vp9_free_ref_frame_buffers(VP9_COMMON *cm) {
int i;
@@ -101,14 +48,11 @@
}
void vp9_free_context_buffers(VP9_COMMON *cm) {
- free_mi(cm);
-
+ cm->free_mi(cm);
vpx_free(cm->last_frame_seg_map);
cm->last_frame_seg_map = NULL;
-
vpx_free(cm->above_context);
cm->above_context = NULL;
-
vpx_free(cm->above_seg_context);
cm->above_seg_context = NULL;
}
@@ -117,7 +61,7 @@
vp9_free_context_buffers(cm);
vp9_set_mb_mi(cm, width, height);
- if (alloc_mi(cm, cm->mi_stride * calc_mi_size(cm->mi_rows)))
+ if (cm->alloc_mi(cm, cm->mi_stride * calc_mi_size(cm->mi_rows)))
goto fail;
cm->last_frame_seg_map = (uint8_t *)vpx_calloc(cm->mi_rows * cm->mi_cols, 1);
@@ -201,25 +145,15 @@
vp9_free_ref_frame_buffers(cm);
vp9_free_context_buffers(cm);
vp9_free_internal_frame_buffers(&cm->int_frame_buffers);
+
+ vpx_free(cm->fc);
+ cm->fc = NULL;
+ vpx_free(cm->frame_contexts);
+ cm->frame_contexts = NULL;
}
void vp9_init_context_buffers(VP9_COMMON *cm) {
- setup_mi(cm);
+ cm->setup_mi(cm);
if (cm->last_frame_seg_map)
vpx_memset(cm->last_frame_seg_map, 0, cm->mi_rows * cm->mi_cols);
}
-
-void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) {
- // Swap indices.
- const int tmp = cm->mi_idx;
- cm->mi_idx = cm->prev_mi_idx;
- cm->prev_mi_idx = tmp;
-
- // Current mip will be the prev_mip for the next frame.
- cm->mip = cm->mip_array[cm->mi_idx];
- cm->prev_mip = cm->mip_array[cm->prev_mi_idx];
-
- // Update the upper left visible macroblock ptrs.
- cm->mi = cm->mip + cm->mi_stride + 1;
- cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
-}
diff --git a/vp9/common/vp9_alloccommon.h b/vp9/common/vp9_alloccommon.h
index c5b893f..955bb9e 100644
--- a/vp9/common/vp9_alloccommon.h
+++ b/vp9/common/vp9_alloccommon.h
@@ -31,7 +31,6 @@
void vp9_free_state_buffers(struct VP9Common *cm);
void vp9_set_mb_mi(struct VP9Common *cm, int width, int height);
-void vp9_swap_mi_and_prev_mi(struct VP9Common *cm);
#ifdef __cplusplus
} // extern "C"
diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c
index 1a24572..d7610ed 100644
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -464,7 +464,8 @@
cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
}
- if (frame_is_intra_only(cm))
+ // prev_mip will only be allocated in encoder.
+ if (frame_is_intra_only(cm) && cm->prev_mip)
vpx_memset(cm->prev_mip, 0, cm->mi_stride * (cm->mi_rows + 1) *
sizeof(*cm->prev_mip));
diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h
index f83d21f..7454dd4 100644
--- a/vp9/common/vp9_enums.h
+++ b/vp9/common/vp9_enums.h
@@ -104,7 +104,7 @@
BT_709 = 2, // YUV
SMPTE_170 = 3, // YUV
SMPTE_240 = 4, // YUV
- RESERVED_1 = 5,
+ BT_2020 = 5, // YUV
RESERVED_2 = 6,
SRGB = 7 // RGB
} COLOR_SPACE;
diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c
index cc261ab..b2c78c7 100644
--- a/vp9/common/vp9_idct.c
+++ b/vp9/common/vp9_idct.c
@@ -276,10 +276,10 @@
static void iadst4(const tran_low_t *input, tran_low_t *output) {
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
- tran_high_t x0 = input[0];
- tran_high_t x1 = input[1];
- tran_high_t x2 = input[2];
- tran_high_t x3 = input[3];
+ tran_low_t x0 = input[0];
+ tran_low_t x1 = input[1];
+ tran_low_t x2 = input[2];
+ tran_low_t x3 = input[3];
if (!(x0 | x1 | x2 | x3)) {
output[0] = output[1] = output[2] = output[3] = 0;
@@ -295,24 +295,19 @@
s6 = sinpi_4_9 * x3;
s7 = x0 - x2 + x3;
- x0 = s0 + s3 + s5;
- x1 = s1 - s4 - s6;
- x2 = sinpi_3_9 * s7;
- x3 = s2;
-
- s0 = x0 + x3;
- s1 = x1 + x3;
- s2 = x2;
- s3 = x0 + x1 - x3;
+ s0 = s0 + s3 + s5;
+ s1 = s1 - s4 - s6;
+ s3 = s2;
+ s2 = sinpi_3_9 * s7;
// 1-D transform scaling factor is sqrt(2).
// The overall dynamic range is 14b (input) + 14b (multiplication scaling)
// + 1b (addition) = 29b.
// Hence the output bit depth is 15b.
- output[0] = WRAPLOW(dct_const_round_shift(s0), 8);
- output[1] = WRAPLOW(dct_const_round_shift(s1), 8);
+ output[0] = WRAPLOW(dct_const_round_shift(s0 + s3), 8);
+ output[1] = WRAPLOW(dct_const_round_shift(s1 + s3), 8);
output[2] = WRAPLOW(dct_const_round_shift(s2), 8);
- output[3] = WRAPLOW(dct_const_round_shift(s3), 8);
+ output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3), 8);
}
void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
@@ -1702,10 +1697,10 @@
static void highbd_iadst4(const tran_low_t *input, tran_low_t *output, int bd) {
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
- tran_high_t x0 = input[0];
- tran_high_t x1 = input[1];
- tran_high_t x2 = input[2];
- tran_high_t x3 = input[3];
+ tran_low_t x0 = input[0];
+ tran_low_t x1 = input[1];
+ tran_low_t x2 = input[2];
+ tran_low_t x3 = input[3];
(void) bd;
if (!(x0 | x1 | x2 | x3)) {
@@ -1720,26 +1715,21 @@
s4 = sinpi_1_9 * x2;
s5 = sinpi_2_9 * x3;
s6 = sinpi_4_9 * x3;
- s7 = x0 - x2 + x3;
+ s7 = (tran_high_t)(x0 - x2 + x3);
- x0 = s0 + s3 + s5;
- x1 = s1 - s4 - s6;
- x2 = sinpi_3_9 * s7;
- x3 = s2;
-
- s0 = x0 + x3;
- s1 = x1 + x3;
- s2 = x2;
- s3 = x0 + x1 - x3;
+ s0 = s0 + s3 + s5;
+ s1 = s1 - s4 - s6;
+ s3 = s2;
+ s2 = sinpi_3_9 * s7;
// 1-D transform scaling factor is sqrt(2).
// The overall dynamic range is 14b (input) + 14b (multiplication scaling)
// + 1b (addition) = 29b.
// Hence the output bit depth is 15b.
- output[0] = WRAPLOW(dct_const_round_shift(s0), bd);
- output[1] = WRAPLOW(dct_const_round_shift(s1), bd);
+ output[0] = WRAPLOW(dct_const_round_shift(s0 + s3), bd);
+ output[1] = WRAPLOW(dct_const_round_shift(s1 + s3), bd);
output[2] = WRAPLOW(dct_const_round_shift(s2), bd);
- output[3] = WRAPLOW(dct_const_round_shift(s3), bd);
+ output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3), bd);
}
void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
@@ -1779,14 +1769,14 @@
static void highbd_iadst8(const tran_low_t *input, tran_low_t *output, int bd) {
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
- tran_high_t x0 = input[7];
- tran_high_t x1 = input[0];
- tran_high_t x2 = input[5];
- tran_high_t x3 = input[2];
- tran_high_t x4 = input[3];
- tran_high_t x5 = input[4];
- tran_high_t x6 = input[1];
- tran_high_t x7 = input[6];
+ tran_low_t x0 = input[7];
+ tran_low_t x1 = input[0];
+ tran_low_t x2 = input[5];
+ tran_low_t x3 = input[2];
+ tran_low_t x4 = input[3];
+ tran_low_t x5 = input[4];
+ tran_low_t x6 = input[1];
+ tran_low_t x7 = input[6];
(void) bd;
if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) {
@@ -2113,22 +2103,22 @@
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
tran_high_t s9, s10, s11, s12, s13, s14, s15;
- tran_high_t x0 = input[15];
- tran_high_t x1 = input[0];
- tran_high_t x2 = input[13];
- tran_high_t x3 = input[2];
- tran_high_t x4 = input[11];
- tran_high_t x5 = input[4];
- tran_high_t x6 = input[9];
- tran_high_t x7 = input[6];
- tran_high_t x8 = input[7];
- tran_high_t x9 = input[8];
- tran_high_t x10 = input[5];
- tran_high_t x11 = input[10];
- tran_high_t x12 = input[3];
- tran_high_t x13 = input[12];
- tran_high_t x14 = input[1];
- tran_high_t x15 = input[14];
+ tran_low_t x0 = input[15];
+ tran_low_t x1 = input[0];
+ tran_low_t x2 = input[13];
+ tran_low_t x3 = input[2];
+ tran_low_t x4 = input[11];
+ tran_low_t x5 = input[4];
+ tran_low_t x6 = input[9];
+ tran_low_t x7 = input[6];
+ tran_low_t x8 = input[7];
+ tran_low_t x9 = input[8];
+ tran_low_t x10 = input[5];
+ tran_low_t x11 = input[10];
+ tran_low_t x12 = input[3];
+ tran_low_t x13 = input[12];
+ tran_low_t x14 = input[1];
+ tran_low_t x15 = input[14];
(void) bd;
if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index b3a6590..ae69c0c 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -96,7 +96,6 @@
#endif
YV12_BUFFER_CONFIG *frame_to_show;
-
RefCntBuffer frame_bufs[FRAME_BUFFERS];
RefCntBuffer *prev_frame;
@@ -149,17 +148,20 @@
/* We allocate a MODE_INFO struct for each macroblock, together with
an extra row on top and column on the left to simplify prediction. */
-
- int mi_idx;
- int prev_mi_idx;
int mi_alloc_size;
- MODE_INFO *mip_array[2];
-
MODE_INFO *mip; /* Base of allocated array */
MODE_INFO *mi; /* Corresponds to upper left visible macroblock */
+
+ // TODO(agrange): Move prev_mi into encoder structure.
+ // prev_mip and prev_mi will only be allocated in VP9 encoder.
MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */
MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */
+ // Separate mi functions between encoder and decoder.
+ int (*alloc_mi)(struct VP9Common *cm, int mi_size);
+ void (*free_mi)(struct VP9Common *cm);
+ void (*setup_mi)(struct VP9Common *cm);
+
// Whether to use previous frame's motion vectors for prediction.
int use_prev_frame_mvs;
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 0530f3a..f3922b1 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -1652,37 +1652,37 @@
specialize qw/vp9_highbd_12_sub_pixel_avg_variance4x4/;
add_proto qw/unsigned int vp9_highbd_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vp9_highbd_sad64x64/;
+ specialize qw/vp9_highbd_sad64x64/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vp9_highbd_sad32x64/;
+ specialize qw/vp9_highbd_sad32x64/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vp9_highbd_sad64x32/;
+ specialize qw/vp9_highbd_sad64x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vp9_highbd_sad32x16/;
+ specialize qw/vp9_highbd_sad32x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vp9_highbd_sad16x32/;
+ specialize qw/vp9_highbd_sad16x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vp9_highbd_sad32x32/;
+ specialize qw/vp9_highbd_sad32x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vp9_highbd_sad16x16/;
+ specialize qw/vp9_highbd_sad16x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vp9_highbd_sad16x8/;
+ specialize qw/vp9_highbd_sad16x8/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vp9_highbd_sad8x16/;
+ specialize qw/vp9_highbd_sad8x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vp9_highbd_sad8x8/;
+ specialize qw/vp9_highbd_sad8x8/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vp9_highbd_sad8x4/;
+ specialize qw/vp9_highbd_sad8x4/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vp9_highbd_sad4x8/;
@@ -1691,37 +1691,37 @@
specialize qw/vp9_highbd_sad4x4/;
add_proto qw/unsigned int vp9_highbd_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sad64x64_avg/;
+ specialize qw/vp9_highbd_sad64x64_avg/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sad32x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sad32x64_avg/;
+ specialize qw/vp9_highbd_sad32x64_avg/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sad64x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sad64x32_avg/;
+ specialize qw/vp9_highbd_sad64x32_avg/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sad32x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sad32x16_avg/;
+ specialize qw/vp9_highbd_sad32x16_avg/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sad16x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sad16x32_avg/;
+ specialize qw/vp9_highbd_sad16x32_avg/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sad32x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sad32x32_avg/;
+ specialize qw/vp9_highbd_sad32x32_avg/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sad16x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sad16x16_avg/;
+ specialize qw/vp9_highbd_sad16x16_avg/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sad16x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sad16x8_avg/;
+ specialize qw/vp9_highbd_sad16x8_avg/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sad8x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sad8x16_avg/;
+ specialize qw/vp9_highbd_sad8x16_avg/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sad8x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sad8x8_avg/;
+ specialize qw/vp9_highbd_sad8x8_avg/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sad8x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sad8x4_avg/;
+ specialize qw/vp9_highbd_sad8x4_avg/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sad4x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vp9_highbd_sad4x8_avg/;
@@ -1778,44 +1778,43 @@
specialize qw/vp9_highbd_sad4x4x8/;
add_proto qw/void vp9_highbd_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad64x64x4d/;
+ specialize qw/vp9_highbd_sad64x64x4d sse2/;
add_proto qw/void vp9_highbd_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad32x64x4d/;
+ specialize qw/vp9_highbd_sad32x64x4d sse2/;
add_proto qw/void vp9_highbd_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad64x32x4d/;
+ specialize qw/vp9_highbd_sad64x32x4d sse2/;
add_proto qw/void vp9_highbd_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad32x16x4d/;
+ specialize qw/vp9_highbd_sad32x16x4d sse2/;
add_proto qw/void vp9_highbd_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad16x32x4d/;
+ specialize qw/vp9_highbd_sad16x32x4d sse2/;
add_proto qw/void vp9_highbd_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad32x32x4d/;
+ specialize qw/vp9_highbd_sad32x32x4d sse2/;
add_proto qw/void vp9_highbd_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad16x16x4d/;
+ specialize qw/vp9_highbd_sad16x16x4d sse2/;
add_proto qw/void vp9_highbd_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad16x8x4d/;
+ specialize qw/vp9_highbd_sad16x8x4d sse2/;
add_proto qw/void vp9_highbd_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad8x16x4d/;
+ specialize qw/vp9_highbd_sad8x16x4d sse2/;
add_proto qw/void vp9_highbd_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad8x8x4d/;
+ specialize qw/vp9_highbd_sad8x8x4d sse2/;
- # TODO(jingning): need to convert these 4x8/8x4 functions into sse2 form
add_proto qw/void vp9_highbd_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad8x4x4d/;
+ specialize qw/vp9_highbd_sad8x4x4d sse2/;
add_proto qw/void vp9_highbd_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad4x8x4d/;
+ specialize qw/vp9_highbd_sad4x8x4d sse2/;
add_proto qw/void vp9_highbd_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
- specialize qw/vp9_highbd_sad4x4x4d/;
+ specialize qw/vp9_highbd_sad4x4x4d sse2/;
add_proto qw/unsigned int vp9_highbd_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
specialize qw/vp9_highbd_mse16x16/;
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index 1968165..16f3cd4 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -43,6 +43,24 @@
}
}
+static void vp9_dec_setup_mi(VP9_COMMON *cm) {
+ cm->mi = cm->mip + cm->mi_stride + 1;
+ vpx_memset(cm->mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip));
+}
+
+static int vp9_dec_alloc_mi(VP9_COMMON *cm, int mi_size) {
+ cm->mip = vpx_calloc(mi_size, sizeof(*cm->mip));
+ if (!cm->mip)
+ return 1;
+ cm->mi_alloc_size = mi_size;
+ return 0;
+}
+
+static void vp9_dec_free_mi(VP9_COMMON *cm) {
+ vpx_free(cm->mip);
+ cm->mip = NULL;
+}
+
VP9Decoder *vp9_decoder_create() {
VP9Decoder *const pbi = vpx_memalign(32, sizeof(*pbi));
VP9_COMMON *const cm = pbi ? &pbi->common : NULL;
@@ -77,6 +95,10 @@
cm->bit_depth = VPX_BITS_8;
cm->dequant_bit_depth = VPX_BITS_8;
+ cm->alloc_mi = vp9_dec_alloc_mi;
+ cm->free_mi = vp9_dec_free_mi;
+ cm->setup_mi = vp9_dec_setup_mi;
+
// vp9_init_dequantizer() is first called here. Add check in
// frame_init_dequantizer() to avoid unnecessary calling of
// vp9_init_dequantizer() for every frame.
@@ -95,11 +117,6 @@
VP9_COMMON *const cm = &pbi->common;
int i;
- vpx_free(cm->fc);
- cm->fc = NULL;
- vpx_free(cm->frame_contexts);
- cm->frame_contexts = NULL;
-
vp9_get_worker_interface()->end(&pbi->lf_worker);
vpx_free(pbi->lf_worker.data1);
vpx_free(pbi->tile_data);
diff --git a/vp9/encoder/vp9_aq_variance.c b/vp9/encoder/vp9_aq_variance.c
index 7d75f09..7ee9662 100644
--- a/vp9/encoder/vp9_aq_variance.c
+++ b/vp9/encoder/vp9_aq_variance.c
@@ -52,14 +52,6 @@
return RDMULT_RATIO(energy);
}
-double vp9_vaq_inv_q_ratio(int energy) {
- ENERGY_IN_BOUNDS(energy);
-
- vp9_clear_system_state();
-
- return Q_RATIO(-energy);
-}
-
void vp9_vaq_init() {
int i;
double base_ratio;
diff --git a/vp9/encoder/vp9_aq_variance.h b/vp9/encoder/vp9_aq_variance.h
index d1a459f..73ce2eb 100644
--- a/vp9/encoder/vp9_aq_variance.h
+++ b/vp9/encoder/vp9_aq_variance.h
@@ -20,7 +20,6 @@
unsigned int vp9_vaq_segment_id(int energy);
double vp9_vaq_rdmult_ratio(int energy);
-double vp9_vaq_inv_q_ratio(int energy);
void vp9_vaq_init();
void vp9_vaq_frame_setup(VP9_COMP *cpi);
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 377d1c0..ccab05b 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1282,36 +1282,6 @@
}
}
-static int is_background(const VP9_COMP *cpi, const TileInfo *const tile,
- int mi_row, int mi_col) {
- // This assumes the input source frames are of the same dimension.
- const int row8x8_remaining = tile->mi_row_end - mi_row;
- const int col8x8_remaining = tile->mi_col_end - mi_col;
- const int x = mi_col * MI_SIZE;
- const int y = mi_row * MI_SIZE;
- const int src_stride = cpi->Source->y_stride;
- const uint8_t *const src = &cpi->Source->y_buffer[y * src_stride + x];
- const int pre_stride = cpi->Last_Source->y_stride;
- const uint8_t *const pre = &cpi->Last_Source->y_buffer[y * pre_stride + x];
- int this_sad = 0;
- int threshold = 0;
-
- if (row8x8_remaining >= MI_BLOCK_SIZE &&
- col8x8_remaining >= MI_BLOCK_SIZE) {
- this_sad = cpi->fn_ptr[BLOCK_64X64].sdf(src, src_stride, pre, pre_stride);
- threshold = (1 << 12);
- } else {
- int r, c;
- for (r = 0; r < row8x8_remaining; r += 2)
- for (c = 0; c < col8x8_remaining; c += 2)
- this_sad += cpi->fn_ptr[BLOCK_16X16].sdf(src, src_stride,
- pre, pre_stride);
- threshold = (row8x8_remaining * col8x8_remaining) << 6;
- }
-
- return this_sad < 2 * threshold;
-}
-
static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
int mi_row, int mi_col, int bsize) {
VP9_COMMON *const cm = &cpi->common;
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 1d9fe5e..fd7c974 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -139,6 +139,47 @@
}
}
+static void vp9_enc_setup_mi(VP9_COMMON *cm) {
+ int i;
+ cm->mi = cm->mip + cm->mi_stride + 1;
+ vpx_memset(cm->mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip));
+ cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
+ // Clear top border row
+ vpx_memset(cm->prev_mip, 0, sizeof(*cm->prev_mip) * cm->mi_stride);
+ // Clear left border column
+ for (i = 1; i < cm->mi_rows + 1; ++i)
+ vpx_memset(&cm->prev_mip[i * cm->mi_stride], 0, sizeof(*cm->prev_mip));
+}
+
+static int vp9_enc_alloc_mi(VP9_COMMON *cm, int mi_size) {
+ cm->mip = vpx_calloc(mi_size, sizeof(*cm->mip));
+ if (!cm->mip)
+ return 1;
+ cm->prev_mip = vpx_calloc(mi_size, sizeof(*cm->prev_mip));
+ if (!cm->prev_mip)
+ return 1;
+ cm->mi_alloc_size = mi_size;
+ return 0;
+}
+
+static void vp9_enc_free_mi(VP9_COMMON *cm) {
+ vpx_free(cm->mip);
+ cm->mip = NULL;
+ vpx_free(cm->prev_mip);
+ cm->prev_mip = NULL;
+}
+
+static void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) {
+ // Current mip will be the prev_mip for the next frame.
+ MODE_INFO *temp = cm->prev_mip;
+ cm->prev_mip = cm->mip;
+ cm->mip = temp;
+
+ // Update the upper left visible macroblock ptrs.
+ cm->mi = cm->mip + cm->mi_stride + 1;
+ cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
+}
+
void vp9_initialize_enc() {
static int init_done = 0;
@@ -160,10 +201,6 @@
VP9_COMMON *const cm = &cpi->common;
int i;
- vpx_free(cm->fc);
- cm->fc = NULL;
- vpx_free(cm->frame_contexts);
- cm->frame_contexts = NULL;
vpx_free(cpi->tile_data);
cpi->tile_data = NULL;
@@ -1380,6 +1417,9 @@
}
cm->error.setjmp = 1;
+ cm->alloc_mi = vp9_enc_alloc_mi;
+ cm->free_mi = vp9_enc_free_mi;
+ cm->setup_mi = vp9_enc_setup_mi;
CHECK_MEM_ERROR(cm, cm->fc,
(FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc)));
@@ -1689,12 +1729,13 @@
}
void vp9_remove_compressor(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
unsigned int i;
if (!cpi)
return;
- if (cpi && (cpi->common.current_video_frame > 0)) {
+ if (cpi && (cm->current_video_frame > 0)) {
#if CONFIG_INTERNAL_STATS
vp9_clear_system_state();
@@ -1777,7 +1818,7 @@
}
#endif
- vp9_remove_common(&cpi->common);
+ vp9_remove_common(cm);
vpx_free(cpi);
#if CONFIG_VP9_TEMPORAL_DENOISING
@@ -3594,6 +3635,13 @@
if (source != NULL) {
cm->show_frame = 1;
cm->intra_only = 0;
+ // if the flags indicate intra frame, but if the current picture is for
+ // non-zero spatial layer, it should not be an intra picture.
+ // TODO(Won Kap): this needs to change if per-layer intra frame is
+ // allowed.
+ if ((source->flags | VPX_EFLAG_FORCE_KF) && cpi->svc.spatial_layer_id) {
+ source->flags &= ~(unsigned int)(VPX_EFLAG_FORCE_KF);
+ }
// Check to see if the frame should be encoded as an arf overlay.
check_src_altref(cpi, source);
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index c8c784b..327082f 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -53,6 +53,7 @@
#define MIN_KF_BOOST 300
#define NEW_MV_MODE_PENALTY 32
#define SVC_FACTOR_PT_LOW 0.45
+#define DARK_THRESH 64
#define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x) - 0.000001 : (x) + 0.000001)
@@ -106,10 +107,11 @@
FILE *fpfile;
fpfile = fopen("firstpass.stt", "a");
- fprintf(fpfile, "%12.0f %12.0f %12.0f %12.0f %12.4f %12.4f"
+ fprintf(fpfile, "%12.0f %12.4f %12.0f %12.0f %12.0f %12.4f %12.4f"
"%12.4f %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f"
"%12.0f %12.0f %12.4f %12.0f %12.0f %12.4f\n",
stats->frame,
+ stats->weight,
stats->intra_error,
stats->coded_error,
stats->sr_coded_error,
@@ -144,7 +146,8 @@
#endif
static void zero_stats(FIRSTPASS_STATS *section) {
- section->frame = 0.0;
+ section->frame = 0.0;
+ section->weight = 0.0;
section->intra_error = 0.0;
section->coded_error = 0.0;
section->sr_coded_error = 0.0;
@@ -168,6 +171,7 @@
static void accumulate_stats(FIRSTPASS_STATS *section,
const FIRSTPASS_STATS *frame) {
section->frame += frame->frame;
+ section->weight += frame->weight;
section->spatial_layer_id = frame->spatial_layer_id;
section->intra_error += frame->intra_error;
section->coded_error += frame->coded_error;
@@ -191,6 +195,7 @@
static void subtract_stats(FIRSTPASS_STATS *section,
const FIRSTPASS_STATS *frame) {
section->frame -= frame->frame;
+ section->weight -= frame->weight;
section->intra_error -= frame->intra_error;
section->coded_error -= frame->coded_error;
section->sr_coded_error -= frame->sr_coded_error;
@@ -217,10 +222,11 @@
const VP9EncoderConfig *oxcf,
const FIRSTPASS_STATS *this_frame) {
const FIRSTPASS_STATS *const stats = &twopass->total_stats;
- const double av_err = stats->coded_error / stats->count;
- const double modified_error = av_err *
- pow(this_frame->coded_error / DOUBLE_DIVIDE_CHECK(av_err),
- oxcf->two_pass_vbrbias / 100.0);
+ const double av_weight = stats->weight / stats->count;
+ const double av_err = (stats->coded_error * av_weight) / stats->count;
+ const double modified_error =
+ av_err * pow(this_frame->coded_error * this_frame->weight /
+ DOUBLE_DIVIDE_CHECK(av_err), oxcf->two_pass_vbrbias / 100.0);
return fclamp(modified_error,
twopass->modified_error_min, twopass->modified_error_max);
}
@@ -480,6 +486,8 @@
const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12;
LAYER_CONTEXT *const lc = is_two_pass_svc(cpi) ?
&cpi->svc.layer_context[cpi->svc.spatial_layer_id] : NULL;
+ double intra_factor = 0.0;
+ double brightness_factor = 0.0;
#if CONFIG_FP_MB_STATS
if (cpi->use_fp_mb_stats) {
@@ -585,8 +593,10 @@
for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) {
int this_error;
const int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
- double error_weight = 1.0;
const BLOCK_SIZE bsize = get_bsize(cm, mb_row, mb_col);
+ double log_intra;
+ int level_sample;
+
#if CONFIG_FP_MB_STATS
const int mb_index = mb_row * cm->mb_cols + mb_col;
#endif
@@ -604,11 +614,6 @@
mb_col << 1, num_8x8_blocks_wide_lookup[bsize],
cm->mi_rows, cm->mi_cols);
- if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
- const int energy = vp9_block_energy(cpi, x, bsize);
- error_weight = vp9_vaq_inv_q_ratio(energy);
- }
-
// Do intra 16x16 prediction.
x->skip_encode = 0;
xd->mi[0].src_mi->mbmi.mode = DC_PRED;
@@ -635,10 +640,24 @@
}
#endif // CONFIG_VP9_HIGHBITDEPTH
- if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
- vp9_clear_system_state();
- this_error = (int)(this_error * error_weight);
- }
+ log_intra = log(this_error + 1.0);
+ if (log_intra < 10.0)
+ intra_factor += 1.0 + ((10.0 - log_intra) * 0.05);
+ else
+ intra_factor += 1.0;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth)
+ level_sample = CONVERT_TO_SHORTPTR(x->plane[0].src.buf)[0];
+ else
+ level_sample = x->plane[0].src.buf[0];
+#else
+ level_sample = x->plane[0].src.buf[0];
+#endif
+ if ((level_sample < DARK_THRESH) && (log_intra < 9.0))
+ brightness_factor += 1.0 + (0.01 * (DARK_THRESH - level_sample));
+ else
+ brightness_factor += 1.0;
// Intrapenalty below deals with situations where the intra and inter
// error scores are very low (e.g. a plain black frame).
@@ -711,20 +730,12 @@
// Test last reference frame using the previous best mv as the
// starting point (best reference) for the search.
first_pass_motion_search(cpi, x, &best_ref_mv, &mv, &motion_error);
- if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
- vp9_clear_system_state();
- motion_error = (int)(motion_error * error_weight);
- }
// If the current best reference mv is not centered on 0,0 then do a
// 0,0 based search as well.
if (!is_zero_mv(&best_ref_mv)) {
tmp_err = INT_MAX;
first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv, &tmp_err);
- if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
- vp9_clear_system_state();
- tmp_err = (int)(tmp_err * error_weight);
- }
if (tmp_err < motion_error) {
motion_error = tmp_err;
@@ -755,10 +766,6 @@
first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv,
&gf_motion_error);
- if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
- vp9_clear_system_state();
- gf_motion_error = (int)(gf_motion_error * error_weight);
- }
if (gf_motion_error < motion_error && gf_motion_error < this_error)
++second_ref_count;
@@ -943,6 +950,10 @@
cpi->oxcf.allow_spatial_resampling ? cpi->initial_mbs : cpi->common.MBs;
const double min_err = 200 * sqrt(num_mbs);
+ intra_factor = intra_factor / (double)num_mbs;
+ brightness_factor = brightness_factor / (double)num_mbs;
+ fps.weight = intra_factor * brightness_factor;
+
fps.frame = cm->current_video_frame;
fps.spatial_layer_id = cpi->svc.spatial_layer_id;
fps.coded_error = (double)(coded_error >> 8) + min_err;
@@ -1094,7 +1105,8 @@
// content at the given rate.
for (q = rc->best_quality; q < rc->worst_quality; ++q) {
const double factor =
- calc_correction_factor(err_per_mb, ERR_DIVISOR - ediv_size_correction,
+ calc_correction_factor(err_per_mb,
+ ERR_DIVISOR - ediv_size_correction,
is_svc_upper_layer ? SVC_FACTOR_PT_LOW :
FACTOR_PT_LOW, FACTOR_PT_HIGH, q,
cpi->common.bit_depth);
@@ -1319,14 +1331,14 @@
const double lq =
vp9_convert_qindex_to_q(cpi->rc.avg_frame_qindex[INTER_FRAME],
cpi->common.bit_depth);
- const double boost_correction = MIN((0.5 + (lq * 0.015)), 1.5);
+ const double boost_q_correction = MIN((0.5 + (lq * 0.015)), 1.5);
const int num_mbs =
cpi->oxcf.allow_spatial_resampling ? cpi->initial_mbs : cpi->common.MBs;
// Underlying boost factor is based on inter error ratio.
frame_boost = (BASELINE_ERR_PER_MB * num_mbs) /
DOUBLE_DIVIDE_CHECK(this_frame->coded_error);
- frame_boost = frame_boost * BOOST_FACTOR * boost_correction;
+ frame_boost = frame_boost * BOOST_FACTOR * boost_q_correction;
// Increase boost for frames where new data coming into frame (e.g. zoom out).
// Slightly reduce boost if there is a net balance of motion out of the frame
@@ -1337,7 +1349,7 @@
else
frame_boost += frame_boost * (this_frame_mv_in_out / 2.0);
- return MIN(frame_boost, max_boost * boost_correction);
+ return MIN(frame_boost, max_boost * boost_q_correction);
}
static int calc_arf_boost(VP9_COMP *cpi, int offset,
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index e21d869..0a8f756 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -43,6 +43,7 @@
typedef struct {
double frame;
+ double weight;
double intra_error;
double coded_error;
double sr_coded_error;
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index af67328..358bedd 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -779,6 +779,9 @@
// Check that a prediction mode has been selected.
assert(best_rdc.rdcost < INT64_MAX);
+
+ if (x->skip)
+ break;
}
mbmi->mode = best_mode;
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 8a5b611..6f16579 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -369,8 +369,8 @@
return rc->rate_correction_factors[rf_lvl];
} else {
if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
- !rc->is_src_frame_alt_ref &&
- !(cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR))
+ !rc->is_src_frame_alt_ref && !cpi->use_svc &&
+ cpi->oxcf.rc_mode != VPX_CBR)
return rc->rate_correction_factors[GF_ARF_STD];
else
return rc->rate_correction_factors[INTER_NORMAL];
@@ -388,8 +388,8 @@
rc->rate_correction_factors[rf_lvl] = factor;
} else {
if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
- !rc->is_src_frame_alt_ref &&
- !(cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR))
+ !rc->is_src_frame_alt_ref && !cpi->use_svc &&
+ cpi->oxcf.rc_mode != VPX_CBR)
rc->rate_correction_factors[GF_ARF_STD] = factor;
else
rc->rate_correction_factors[INTER_NORMAL] = factor;
@@ -1186,8 +1186,8 @@
// Post encode loop adjustment of Q prediction.
vp9_rc_update_rate_correction_factors(
- cpi, (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF ||
- oxcf->rc_mode == VPX_CBR) ? 2 : 0);
+ cpi, (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) ? 2 :
+ ((oxcf->rc_mode == VPX_CBR) ? 1 : 0));
// Keep a record of last Q and ambient average Q.
if (cm->frame_type == KEY_FRAME) {
diff --git a/vp9/encoder/x86/vp9_highbd_sad4d_sse2.asm b/vp9/encoder/x86/vp9_highbd_sad4d_sse2.asm
new file mode 100644
index 0000000..986efb1
--- /dev/null
+++ b/vp9/encoder/x86/vp9_highbd_sad4d_sse2.asm
@@ -0,0 +1,284 @@
+;
+; Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%include "third_party/x86inc/x86inc.asm"
+
+SECTION .text
+
+; HIGH_PROCESS_4x2x4 first, off_{first,second}_{src,ref}, advance_at_end
+%macro HIGH_PROCESS_4x2x4 5-6 0
+ movh m0, [srcq +%2*2]
+%if %1 == 1
+ movu m4, [ref1q+%3*2]
+ movu m5, [ref2q+%3*2]
+ movu m6, [ref3q+%3*2]
+ movu m7, [ref4q+%3*2]
+ movhps m0, [srcq +%4*2]
+ movhps m4, [ref1q+%5*2]
+ movhps m5, [ref2q+%5*2]
+ movhps m6, [ref3q+%5*2]
+ movhps m7, [ref4q+%5*2]
+ mova m3, m0
+ mova m2, m0
+ psubusw m3, m4
+ psubusw m2, m5
+ psubusw m4, m0
+ psubusw m5, m0
+ por m4, m3
+ por m5, m2
+ pmaddwd m4, m1
+ pmaddwd m5, m1
+ mova m3, m0
+ mova m2, m0
+ psubusw m3, m6
+ psubusw m2, m7
+ psubusw m6, m0
+ psubusw m7, m0
+ por m6, m3
+ por m7, m2
+ pmaddwd m6, m1
+ pmaddwd m7, m1
+%else
+ movu m2, [ref1q+%3*2]
+ movhps m0, [srcq +%4*2]
+ movhps m2, [ref1q+%5*2]
+ mova m3, m0
+ psubusw m3, m2
+ psubusw m2, m0
+ por m2, m3
+ pmaddwd m2, m1
+ paddd m4, m2
+
+ movu m2, [ref2q+%3*2]
+ mova m3, m0
+ movhps m2, [ref2q+%5*2]
+ psubusw m3, m2
+ psubusw m2, m0
+ por m2, m3
+ pmaddwd m2, m1
+ paddd m5, m2
+
+ movu m2, [ref3q+%3*2]
+ mova m3, m0
+ movhps m2, [ref3q+%5*2]
+ psubusw m3, m2
+ psubusw m2, m0
+ por m2, m3
+ pmaddwd m2, m1
+ paddd m6, m2
+
+ movu m2, [ref4q+%3*2]
+ mova m3, m0
+ movhps m2, [ref4q+%5*2]
+ psubusw m3, m2
+ psubusw m2, m0
+ por m2, m3
+ pmaddwd m2, m1
+ paddd m7, m2
+%endif
+%if %6 == 1
+ lea srcq, [srcq +src_strideq*4]
+ lea ref1q, [ref1q+ref_strideq*4]
+ lea ref2q, [ref2q+ref_strideq*4]
+ lea ref3q, [ref3q+ref_strideq*4]
+ lea ref4q, [ref4q+ref_strideq*4]
+%endif
+%endmacro
+
+; PROCESS_8x2x4 first, off_{first,second}_{src,ref}, advance_at_end
+%macro HIGH_PROCESS_8x2x4 5-6 0
+ ; 1st 8 px
+ mova m0, [srcq +%2*2]
+%if %1 == 1
+ movu m4, [ref1q+%3*2]
+ movu m5, [ref2q+%3*2]
+ movu m6, [ref3q+%3*2]
+ movu m7, [ref4q+%3*2]
+ mova m3, m0
+ mova m2, m0
+ psubusw m3, m4
+ psubusw m2, m5
+ psubusw m4, m0
+ psubusw m5, m0
+ por m4, m3
+ por m5, m2
+ pmaddwd m4, m1
+ pmaddwd m5, m1
+ mova m3, m0
+ mova m2, m0
+ psubusw m3, m6
+ psubusw m2, m7
+ psubusw m6, m0
+ psubusw m7, m0
+ por m6, m3
+ por m7, m2
+ pmaddwd m6, m1
+ pmaddwd m7, m1
+%else
+ mova m3, m0
+ movu m2, [ref1q+%3*2]
+ psubusw m3, m2
+ psubusw m2, m0
+ por m2, m3
+ mova m3, m0
+ pmaddwd m2, m1
+ paddd m4, m2
+ movu m2, [ref2q+%3*2]
+ psubusw m3, m2
+ psubusw m2, m0
+ por m2, m3
+ mova m3, m0
+ pmaddwd m2, m1
+ paddd m5, m2
+ movu m2, [ref3q+%3*2]
+ psubusw m3, m2
+ psubusw m2, m0
+ por m2, m3
+ mova m3, m0
+ pmaddwd m2, m1
+ paddd m6, m2
+ movu m2, [ref4q+%3*2]
+ psubusw m3, m2
+ psubusw m2, m0
+ por m2, m3
+ pmaddwd m2, m1
+ paddd m7, m2
+%endif
+
+ ; 2nd 8 px
+ mova m0, [srcq +(%4)*2]
+ mova m3, m0
+ movu m2, [ref1q+(%5)*2]
+ psubusw m3, m2
+ psubusw m2, m0
+ por m2, m3
+ mova m3, m0
+ pmaddwd m2, m1
+ paddd m4, m2
+ movu m2, [ref2q+(%5)*2]
+ psubusw m3, m2
+ psubusw m2, m0
+ por m2, m3
+ mova m3, m0
+ pmaddwd m2, m1
+ paddd m5, m2
+ movu m2, [ref3q+(%5)*2]
+ psubusw m3, m2
+ psubusw m2, m0
+ por m2, m3
+ mova m3, m0
+ pmaddwd m2, m1
+ paddd m6, m2
+ movu m2, [ref4q+(%5)*2]
+ psubusw m3, m2
+ psubusw m2, m0
+%if %6 == 1
+ lea srcq, [srcq +src_strideq*4]
+ lea ref1q, [ref1q+ref_strideq*4]
+ lea ref2q, [ref2q+ref_strideq*4]
+ lea ref3q, [ref3q+ref_strideq*4]
+ lea ref4q, [ref4q+ref_strideq*4]
+%endif
+ por m2, m3
+ pmaddwd m2, m1
+ paddd m7, m2
+%endmacro
+
+; HIGH_PROCESS_16x2x4 first, off_{first,second}_{src,ref}, advance_at_end
+%macro HIGH_PROCESS_16x2x4 5-6 0
+ HIGH_PROCESS_8x2x4 %1, %2, %3, (%2 + 8), (%3 + 8)
+ HIGH_PROCESS_8x2x4 0, %4, %5, (%4 + 8), (%5 + 8), %6
+%endmacro
+
+; HIGH_PROCESS_32x2x4 first, off_{first,second}_{src,ref}, advance_at_end
+%macro HIGH_PROCESS_32x2x4 5-6 0
+ HIGH_PROCESS_16x2x4 %1, %2, %3, (%2 + 16), (%3 + 16)
+ HIGH_PROCESS_16x2x4 0, %4, %5, (%4 + 16), (%5 + 16), %6
+%endmacro
+
+; HIGH_PROCESS_64x2x4 first, off_{first,second}_{src,ref}, advance_at_end
+%macro HIGH_PROCESS_64x2x4 5-6 0
+ HIGH_PROCESS_32x2x4 %1, %2, %3, (%2 + 32), (%3 + 32)
+ HIGH_PROCESS_32x2x4 0, %4, %5, (%4 + 32), (%5 + 32), %6
+%endmacro
+
+; void vp9_highbd_sadNxNx4d_sse2(uint8_t *src, int src_stride,
+; uint8_t *ref[4], int ref_stride,
+; unsigned int res[4]);
+; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8
+%macro HIGH_SADNXN4D 2
+%if UNIX64
+cglobal highbd_sad%1x%2x4d, 5, 9, 8, src, src_stride, ref1, ref_stride, \
+ res, ref2, ref3, ref4, one
+%else
+cglobal highbd_sad%1x%2x4d, 4, 8, 8, src, src_stride, ref1, ref_stride, \
+ ref2, ref3, ref4, one
+%endif
+
+ movsxdifnidn src_strideq, src_strided
+ movsxdifnidn ref_strideq, ref_strided
+ mov ref2q, [ref1q+gprsize*1]
+ mov ref3q, [ref1q+gprsize*2]
+ mov ref4q, [ref1q+gprsize*3]
+ mov ref1q, [ref1q+gprsize*0]
+
+; convert byte pointers to short pointers
+ shl srcq, 1
+ shl ref2q, 1
+ shl ref3q, 1
+ shl ref4q, 1
+ shl ref1q, 1
+
+ mov oned, 0x00010001
+ movd m1, oned
+ pshufd m1, m1, 0x0
+
+ HIGH_PROCESS_%1x2x4 1, 0, 0, src_strideq, ref_strideq, 1
+%rep (%2-4)/2
+ HIGH_PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 1
+%endrep
+ HIGH_PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 0
+ ; N.B. HIGH_PROCESS outputs dwords (32 bits)
+ ; so in high bit depth even the smallest width (4) needs 128bits i.e. XMM
+ movhlps m0, m4
+ movhlps m1, m5
+ movhlps m2, m6
+ movhlps m3, m7
+ paddd m4, m0
+ paddd m5, m1
+ paddd m6, m2
+ paddd m7, m3
+ punpckldq m4, m5
+ punpckldq m6, m7
+ movhlps m0, m4
+ movhlps m1, m6
+ paddd m4, m0
+ paddd m6, m1
+ punpcklqdq m4, m6
+ movifnidn r4, r4mp
+ movu [r4], m4
+ RET
+%endmacro
+
+
+INIT_XMM sse2
+HIGH_SADNXN4D 64, 64
+HIGH_SADNXN4D 64, 32
+HIGH_SADNXN4D 32, 64
+HIGH_SADNXN4D 32, 32
+HIGH_SADNXN4D 32, 16
+HIGH_SADNXN4D 16, 32
+HIGH_SADNXN4D 16, 16
+HIGH_SADNXN4D 16, 8
+HIGH_SADNXN4D 8, 16
+HIGH_SADNXN4D 8, 8
+HIGH_SADNXN4D 8, 4
+HIGH_SADNXN4D 4, 8
+HIGH_SADNXN4D 4, 4
diff --git a/vp9/encoder/x86/vp9_highbd_sad_sse2.asm b/vp9/encoder/x86/vp9_highbd_sad_sse2.asm
new file mode 100644
index 0000000..c895ac0
--- /dev/null
+++ b/vp9/encoder/x86/vp9_highbd_sad_sse2.asm
@@ -0,0 +1,363 @@
+;
+; Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%include "third_party/x86inc/x86inc.asm"
+
+SECTION .text
+
+%macro HIGH_SAD_FN 4
+%if %4 == 0
+%if %3 == 5
+cglobal highbd_sad%1x%2, 4, %3, 7, src, src_stride, ref, ref_stride, n_rows
+%else ; %3 == 7
+cglobal highbd_sad%1x%2, 4, %3, 7, src, src_stride, ref, ref_stride, \
+ src_stride3, ref_stride3, n_rows
+%endif ; %3 == 5/7
+%else ; avg
+%if %3 == 5
+cglobal highbd_sad%1x%2_avg, 5, 1 + %3, 7, src, src_stride, ref, ref_stride, \
+ second_pred, n_rows
+%else ; %3 == 7
+cglobal highbd_sad%1x%2_avg, 5, ARCH_X86_64 + %3, 7, src, src_stride, \
+ ref, ref_stride, \
+ second_pred, \
+ src_stride3, ref_stride3
+%if ARCH_X86_64
+%define n_rowsd r7d
+%else ; x86-32
+%define n_rowsd dword r0m
+%endif ; x86-32/64
+%endif ; %3 == 5/7
+%endif ; avg/sad
+ movsxdifnidn src_strideq, src_strided
+ movsxdifnidn ref_strideq, ref_strided
+%if %3 == 7
+ lea src_stride3q, [src_strideq*3]
+ lea ref_stride3q, [ref_strideq*3]
+%endif ; %3 == 7
+; convert src, ref & second_pred to short ptrs (from byte ptrs)
+ shl srcq, 1
+ shl refq, 1
+%if %4 == 1
+ shl second_predq, 1
+%endif
+%endmacro
+
+; unsigned int vp9_highbd_sad64x{16,32,64}_sse2(uint8_t *src, int src_stride,
+; uint8_t *ref, int ref_stride);
+%macro HIGH_SAD64XN 1-2 0
+ HIGH_SAD_FN 64, %1, 5, %2
+ mov n_rowsd, %1
+ pxor m0, m0
+ pxor m6, m6
+
+.loop:
+ ; first half of each row
+ movu m1, [refq]
+ movu m2, [refq+16]
+ movu m3, [refq+32]
+ movu m4, [refq+48]
+%if %2 == 1
+ pavgw m1, [second_predq+mmsize*0]
+ pavgw m2, [second_predq+mmsize*1]
+ pavgw m3, [second_predq+mmsize*2]
+ pavgw m4, [second_predq+mmsize*3]
+ lea second_predq, [second_predq+mmsize*4]
+%endif
+ mova m5, [srcq]
+ psubusw m5, m1
+ psubusw m1, [srcq]
+ por m1, m5
+ mova m5, [srcq+16]
+ psubusw m5, m2
+ psubusw m2, [srcq+16]
+ por m2, m5
+ mova m5, [srcq+32]
+ psubusw m5, m3
+ psubusw m3, [srcq+32]
+ por m3, m5
+ mova m5, [srcq+48]
+ psubusw m5, m4
+ psubusw m4, [srcq+48]
+ por m4, m5
+ paddw m1, m2
+ paddw m3, m4
+ movhlps m2, m1
+ movhlps m4, m3
+ paddw m1, m2
+ paddw m3, m4
+ punpcklwd m1, m6
+ punpcklwd m3, m6
+ paddd m0, m1
+ paddd m0, m3
+ ; second half of each row
+ movu m1, [refq+64]
+ movu m2, [refq+80]
+ movu m3, [refq+96]
+ movu m4, [refq+112]
+%if %2 == 1
+ pavgw m1, [second_predq+mmsize*0]
+ pavgw m2, [second_predq+mmsize*1]
+ pavgw m3, [second_predq+mmsize*2]
+ pavgw m4, [second_predq+mmsize*3]
+ lea second_predq, [second_predq+mmsize*4]
+%endif
+ mova m5, [srcq+64]
+ psubusw m5, m1
+ psubusw m1, [srcq+64]
+ por m1, m5
+ mova m5, [srcq+80]
+ psubusw m5, m2
+ psubusw m2, [srcq+80]
+ por m2, m5
+ mova m5, [srcq+96]
+ psubusw m5, m3
+ psubusw m3, [srcq+96]
+ por m3, m5
+ mova m5, [srcq+112]
+ psubusw m5, m4
+ psubusw m4, [srcq+112]
+ por m4, m5
+ paddw m1, m2
+ paddw m3, m4
+ movhlps m2, m1
+ movhlps m4, m3
+ paddw m1, m2
+ paddw m3, m4
+ punpcklwd m1, m6
+ punpcklwd m3, m6
+ lea refq, [refq+ref_strideq*2]
+ paddd m0, m1
+ lea srcq, [srcq+src_strideq*2]
+ paddd m0, m3
+
+ dec n_rowsd
+ jg .loop
+
+ movhlps m1, m0
+ paddd m0, m1
+ punpckldq m0, m6
+ movhlps m1, m0
+ paddd m0, m1
+ movd eax, m0
+ RET
+%endmacro
+
+INIT_XMM sse2
+HIGH_SAD64XN 64 ; highbd_sad64x64_sse2
+HIGH_SAD64XN 32 ; highbd_sad64x32_sse2
+HIGH_SAD64XN 64, 1 ; highbd_sad64x64_avg_sse2
+HIGH_SAD64XN 32, 1 ; highbd_sad64x32_avg_sse2
+
+
+; unsigned int vp9_highbd_sad32x{16,32,64}_sse2(uint8_t *src, int src_stride,
+; uint8_t *ref, int ref_stride);
+%macro HIGH_SAD32XN 1-2 0
+ HIGH_SAD_FN 32, %1, 5, %2
+ mov n_rowsd, %1
+ pxor m0, m0
+ pxor m6, m6
+
+.loop:
+ movu m1, [refq]
+ movu m2, [refq+16]
+ movu m3, [refq+32]
+ movu m4, [refq+48]
+%if %2 == 1
+ pavgw m1, [second_predq+mmsize*0]
+ pavgw m2, [second_predq+mmsize*1]
+ pavgw m3, [second_predq+mmsize*2]
+ pavgw m4, [second_predq+mmsize*3]
+ lea second_predq, [second_predq+mmsize*4]
+%endif
+ mova m5, [srcq]
+ psubusw m5, m1
+ psubusw m1, [srcq]
+ por m1, m5
+ mova m5, [srcq+16]
+ psubusw m5, m2
+ psubusw m2, [srcq+16]
+ por m2, m5
+ mova m5, [srcq+32]
+ psubusw m5, m3
+ psubusw m3, [srcq+32]
+ por m3, m5
+ mova m5, [srcq+48]
+ psubusw m5, m4
+ psubusw m4, [srcq+48]
+ por m4, m5
+ paddw m1, m2
+ paddw m3, m4
+ movhlps m2, m1
+ movhlps m4, m3
+ paddw m1, m2
+ paddw m3, m4
+ punpcklwd m1, m6
+ punpcklwd m3, m6
+ lea refq, [refq+ref_strideq*2]
+ paddd m0, m1
+ lea srcq, [srcq+src_strideq*2]
+ paddd m0, m3
+ dec n_rowsd
+ jg .loop
+
+ movhlps m1, m0
+ paddd m0, m1
+ punpckldq m0, m6
+ movhlps m1, m0
+ paddd m0, m1
+ movd eax, m0
+ RET
+%endmacro
+
+INIT_XMM sse2
+HIGH_SAD32XN 64 ; highbd_sad32x64_sse2
+HIGH_SAD32XN 32 ; highbd_sad32x32_sse2
+HIGH_SAD32XN 16 ; highbd_sad32x16_sse2
+HIGH_SAD32XN 64, 1 ; highbd_sad32x64_avg_sse2
+HIGH_SAD32XN 32, 1 ; highbd_sad32x32_avg_sse2
+HIGH_SAD32XN 16, 1 ; highbd_sad32x16_avg_sse2
+
+; unsigned int vp9_highbd_sad16x{8,16,32}_sse2(uint8_t *src, int src_stride,
+; uint8_t *ref, int ref_stride);
+%macro HIGH_SAD16XN 1-2 0
+ HIGH_SAD_FN 16, %1, 5, %2
+ mov n_rowsd, %1/2
+ pxor m0, m0
+ pxor m6, m6
+
+.loop:
+ movu m1, [refq]
+ movu m2, [refq+16]
+ movu m3, [refq+ref_strideq*2]
+ movu m4, [refq+ref_strideq*2+16]
+%if %2 == 1
+ pavgw m1, [second_predq+mmsize*0]
+ pavgw m2, [second_predq+16]
+ pavgw m3, [second_predq+mmsize*2]
+ pavgw m4, [second_predq+mmsize*2+16]
+ lea second_predq, [second_predq+mmsize*4]
+%endif
+ mova m5, [srcq]
+ psubusw m5, m1
+ psubusw m1, [srcq]
+ por m1, m5
+ mova m5, [srcq+16]
+ psubusw m5, m2
+ psubusw m2, [srcq+16]
+ por m2, m5
+ mova m5, [srcq+src_strideq*2]
+ psubusw m5, m3
+ psubusw m3, [srcq+src_strideq*2]
+ por m3, m5
+ mova m5, [srcq+src_strideq*2+16]
+ psubusw m5, m4
+ psubusw m4, [srcq+src_strideq*2+16]
+ por m4, m5
+ paddw m1, m2
+ paddw m3, m4
+ movhlps m2, m1
+ movhlps m4, m3
+ paddw m1, m2
+ paddw m3, m4
+ punpcklwd m1, m6
+ punpcklwd m3, m6
+ lea refq, [refq+ref_strideq*4]
+ paddd m0, m1
+ lea srcq, [srcq+src_strideq*4]
+ paddd m0, m3
+ dec n_rowsd
+ jg .loop
+
+ movhlps m1, m0
+ paddd m0, m1
+ punpckldq m0, m6
+ movhlps m1, m0
+ paddd m0, m1
+ movd eax, m0
+ RET
+%endmacro
+
+INIT_XMM sse2
+HIGH_SAD16XN 32 ; highbd_sad16x32_sse2
+HIGH_SAD16XN 16 ; highbd_sad16x16_sse2
+HIGH_SAD16XN 8 ; highbd_sad16x8_sse2
+HIGH_SAD16XN 32, 1 ; highbd_sad16x32_avg_sse2
+HIGH_SAD16XN 16, 1 ; highbd_sad16x16_avg_sse2
+HIGH_SAD16XN 8, 1 ; highbd_sad16x8_avg_sse2
+
+
+; unsigned int vp9_highbd_sad8x{4,8,16}_sse2(uint8_t *src, int src_stride,
+; uint8_t *ref, int ref_stride);
+%macro HIGH_SAD8XN 1-2 0
+ HIGH_SAD_FN 8, %1, 7, %2
+ mov n_rowsd, %1/4
+ pxor m0, m0
+ pxor m6, m6
+
+.loop:
+ movu m1, [refq]
+ movu m2, [refq+ref_strideq*2]
+ movu m3, [refq+ref_strideq*4]
+ movu m4, [refq+ref_stride3q*2]
+%if %2 == 1
+ pavgw m1, [second_predq+mmsize*0]
+ pavgw m2, [second_predq+mmsize*1]
+ pavgw m3, [second_predq+mmsize*2]
+ pavgw m4, [second_predq+mmsize*3]
+ lea second_predq, [second_predq+mmsize*4]
+%endif
+ mova m5, [srcq]
+ psubusw m5, m1
+ psubusw m1, [srcq]
+ por m1, m5
+ mova m5, [srcq+src_strideq*2]
+ psubusw m5, m2
+ psubusw m2, [srcq+src_strideq*2]
+ por m2, m5
+ mova m5, [srcq+src_strideq*4]
+ psubusw m5, m3
+ psubusw m3, [srcq+src_strideq*4]
+ por m3, m5
+ mova m5, [srcq+src_stride3q*2]
+ psubusw m5, m4
+ psubusw m4, [srcq+src_stride3q*2]
+ por m4, m5
+ paddw m1, m2
+ paddw m3, m4
+ movhlps m2, m1
+ movhlps m4, m3
+ paddw m1, m2
+ paddw m3, m4
+ punpcklwd m1, m6
+ punpcklwd m3, m6
+ lea refq, [refq+ref_strideq*8]
+ paddd m0, m1
+ lea srcq, [srcq+src_strideq*8]
+ paddd m0, m3
+ dec n_rowsd
+ jg .loop
+
+ movhlps m1, m0
+ paddd m0, m1
+ punpckldq m0, m6
+ movhlps m1, m0
+ paddd m0, m1
+ movd eax, m0
+ RET
+%endmacro
+
+INIT_XMM sse2
+HIGH_SAD8XN 16 ; highbd_sad8x16_sse2
+HIGH_SAD8XN 8 ; highbd_sad8x8_sse2
+HIGH_SAD8XN 4 ; highbd_sad8x4_sse2
+HIGH_SAD8XN 16, 1 ; highbd_sad8x16_avg_sse2
+HIGH_SAD8XN 8, 1 ; highbd_sad8x8_avg_sse2
+HIGH_SAD8XN 4, 1 ; highbd_sad8x4_avg_sse2
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index d3c2a13..5948699 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -83,6 +83,7 @@
vp8_postproc_cfg_t preview_ppcfg;
vpx_codec_pkt_list_decl(256) pkt_list;
unsigned int fixed_kf_cntr;
+ vpx_codec_priv_output_cx_pkt_cb_pair_t output_cx_pkt_cb;
};
static VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) {
@@ -994,6 +995,24 @@
ctx->pending_frame_magnitude |= size;
cx_data += size;
cx_data_sz -= size;
+
+ if (ctx->output_cx_pkt_cb.output_cx_pkt) {
+ pkt.kind = VPX_CODEC_CX_FRAME_PKT;
+ pkt.data.frame.pts = ticks_to_timebase_units(timebase,
+ dst_time_stamp);
+ pkt.data.frame.duration =
+ (unsigned long)ticks_to_timebase_units(timebase,
+ dst_end_time_stamp - dst_time_stamp);
+ pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);
+ pkt.data.frame.buf = ctx->pending_cx_data;
+ pkt.data.frame.sz = size;
+ ctx->pending_cx_data = NULL;
+ ctx->pending_cx_data_sz = 0;
+ ctx->pending_frame_count = 0;
+ ctx->pending_frame_magnitude = 0;
+ ctx->output_cx_pkt_cb.output_cx_pkt(
+ &pkt, ctx->output_cx_pkt_cb.user_priv);
+ }
continue;
}
@@ -1009,7 +1028,9 @@
ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
ctx->pending_frame_magnitude |= size;
ctx->pending_cx_data_sz += size;
- size += write_superframe_index(ctx);
+ // write the superframe only for the case when
+ if (!ctx->output_cx_pkt_cb.output_cx_pkt)
+ size += write_superframe_index(ctx);
pkt.data.frame.buf = ctx->pending_cx_data;
pkt.data.frame.sz = ctx->pending_cx_data_sz;
ctx->pending_cx_data = NULL;
@@ -1021,11 +1042,16 @@
pkt.data.frame.sz = size;
}
pkt.data.frame.partition_id = -1;
- vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
+
+ if(ctx->output_cx_pkt_cb.output_cx_pkt)
+ ctx->output_cx_pkt_cb.output_cx_pkt(&pkt, ctx->output_cx_pkt_cb.user_priv);
+ else
+ vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
+
cx_data += size;
cx_data_sz -= size;
#if CONFIG_SPATIAL_SVC
- if (is_two_pass_svc(cpi)) {
+ if (is_two_pass_svc(cpi) && !ctx->output_cx_pkt_cb.output_cx_pkt) {
vpx_codec_cx_pkt_t pkt_sizes, pkt_psnr;
int i;
vp9_zero(pkt_sizes);
@@ -1038,7 +1064,9 @@
pkt_psnr.data.layer_psnr[i] = lc->psnr_pkt;
lc->layer_size = 0;
}
+
vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt_sizes);
+
vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt_psnr);
}
#endif
@@ -1239,6 +1267,18 @@
return VPX_CODEC_OK;
}
+static vpx_codec_err_t ctrl_get_svc_layer_id(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ vpx_svc_layer_id_t *data = va_arg(args, vpx_svc_layer_id_t *);
+ VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi;
+ SVC *const svc = &cpi->svc;
+
+ data->spatial_layer_id = svc->spatial_layer_id;
+ data->temporal_layer_id = svc->temporal_layer_id;
+
+ return VPX_CODEC_OK;
+}
+
static vpx_codec_err_t ctrl_set_svc_parameters(vpx_codec_alg_priv_t *ctx,
va_list args) {
VP9_COMP *const cpi = ctx->cpi;
@@ -1257,6 +1297,16 @@
return VPX_CODEC_OK;
}
+static vpx_codec_err_t ctrl_register_cx_callback(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ vpx_codec_priv_output_cx_pkt_cb_pair_t *cbp =
+ (vpx_codec_priv_output_cx_pkt_cb_pair_t *)va_arg(args, void *);
+ ctx->output_cx_pkt_cb.output_cx_pkt = cbp->output_cx_pkt;
+ ctx->output_cx_pkt_cb.user_priv = cbp->user_priv;
+
+ return VPX_CODEC_OK;
+}
+
static vpx_codec_err_t ctrl_set_tune_content(vpx_codec_alg_priv_t *ctx,
va_list args) {
struct vp9_extracfg extra_cfg = ctx->extra_cfg;
@@ -1296,6 +1346,7 @@
{VP9E_SET_FRAME_PERIODIC_BOOST, ctrl_set_frame_periodic_boost},
{VP9E_SET_SVC, ctrl_set_svc},
{VP9E_SET_SVC_PARAMETERS, ctrl_set_svc_parameters},
+ {VP9E_REGISTER_CX_CALLBACK, ctrl_register_cx_callback},
{VP9E_SET_SVC_LAYER_ID, ctrl_set_svc_layer_id},
{VP9E_SET_TUNE_CONTENT, ctrl_set_tune_content},
{VP9E_SET_NOISE_SENSITIVITY, ctrl_set_noise_sensitivity},
@@ -1304,6 +1355,7 @@
{VP8E_GET_LAST_QUANTIZER, ctrl_get_quantizer},
{VP8E_GET_LAST_QUANTIZER_64, ctrl_get_quantizer64},
{VP9_GET_REFERENCE, ctrl_get_reference},
+ {VP9E_GET_SVC_LAYER_ID, ctrl_get_svc_layer_id},
{ -1, NULL},
};
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index e72cb00..b3a3774 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -102,6 +102,9 @@
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/vp9_sad_sse3.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.c
+ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_sad4d_sse2.asm
+endif
ifeq ($(CONFIG_USE_X86INC),yes)
VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.asm
@@ -110,6 +113,9 @@
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subtract_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_sse2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance.asm
+ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_sad_sse2.asm
+endif
endif
ifeq ($(ARCH_X86_64),yes)
diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c
index fa3409c..4606f08 100644
--- a/vpx/src/svc_encodeframe.c
+++ b/vpx/src/svc_encodeframe.c
@@ -44,8 +44,6 @@
#define SVC_REFERENCE_FRAMES 8
#define SUPERFRAME_SLOTS (8)
#define SUPERFRAME_BUFFER_SIZE (SUPERFRAME_SLOTS * sizeof(uint32_t) + 2)
-#define OPTION_BUFFER_SIZE 1024
-#define COMPONENTS 4 // psnr & sse statistics maintained for total, y, u, v
#define MAX_QUANTIZER 63
@@ -81,61 +79,35 @@
struct FrameData *next;
} FrameData;
-typedef struct SvcInternal {
- char options[OPTION_BUFFER_SIZE]; // set by vpx_svc_set_options
-
- // values extracted from option, quantizers
- vpx_svc_extra_cfg_t svc_params;
- int enable_auto_alt_ref[VPX_SS_MAX_LAYERS];
- int bitrates[VPX_SS_MAX_LAYERS];
-
- // accumulated statistics
- double psnr_sum[VPX_SS_MAX_LAYERS][COMPONENTS]; // total/Y/U/V
- uint64_t sse_sum[VPX_SS_MAX_LAYERS][COMPONENTS];
- uint32_t bytes_sum[VPX_SS_MAX_LAYERS];
-
- // codec encoding values
- int width; // width of highest layer
- int height; // height of highest layer
- int kf_dist; // distance between keyframes
-
- // state variables
- int psnr_pkt_received;
- int layer;
- int use_multiple_frame_contexts;
-
- char message_buffer[2048];
- vpx_codec_ctx_t *codec_ctx;
-} SvcInternal;
-
-static SvcInternal *get_svc_internal(SvcContext *svc_ctx) {
+static SvcInternal_t *get_svc_internal(SvcContext_t *svc_ctx) {
if (svc_ctx == NULL) return NULL;
if (svc_ctx->internal == NULL) {
- SvcInternal *const si = (SvcInternal *)malloc(sizeof(*si));
+ SvcInternal_t *const si = (SvcInternal_t *)malloc(sizeof(*si));
if (si != NULL) {
memset(si, 0, sizeof(*si));
}
svc_ctx->internal = si;
}
- return (SvcInternal *)svc_ctx->internal;
+ return (SvcInternal_t *)svc_ctx->internal;
}
-static const SvcInternal *get_const_svc_internal(const SvcContext *svc_ctx) {
+static const SvcInternal_t *get_const_svc_internal(
+ const SvcContext_t *svc_ctx) {
if (svc_ctx == NULL) return NULL;
- return (const SvcInternal *)svc_ctx->internal;
+ return (const SvcInternal_t *)svc_ctx->internal;
}
-static void svc_log_reset(SvcContext *svc_ctx) {
- SvcInternal *const si = (SvcInternal *)svc_ctx->internal;
+static void svc_log_reset(SvcContext_t *svc_ctx) {
+ SvcInternal_t *const si = (SvcInternal_t *)svc_ctx->internal;
si->message_buffer[0] = '\0';
}
-static int svc_log(SvcContext *svc_ctx, SVC_LOG_LEVEL level,
+static int svc_log(SvcContext_t *svc_ctx, SVC_LOG_LEVEL level,
const char *fmt, ...) {
char buf[512];
int retval = 0;
va_list ap;
- SvcInternal *const si = get_svc_internal(svc_ctx);
+ SvcInternal_t *const si = get_svc_internal(svc_ctx);
if (level > svc_ctx->log_level) {
return retval;
@@ -183,7 +155,7 @@
return VPX_CODEC_OK;
}
-static vpx_codec_err_t parse_layer_options_from_string(SvcContext *svc_ctx,
+static vpx_codec_err_t parse_layer_options_from_string(SvcContext_t *svc_ctx,
LAYER_OPTION_TYPE type,
const char *input,
int *option0,
@@ -228,12 +200,12 @@
* quantizers=<q1>,<q2>,...
* svc_mode = [i|ip|alt_ip|gf]
*/
-static vpx_codec_err_t parse_options(SvcContext *svc_ctx, const char *options) {
+static vpx_codec_err_t parse_options(SvcContext_t *svc_ctx, const char *options) {
char *input_string;
char *option_name;
char *option_value;
char *input_ptr;
- SvcInternal *const si = get_svc_internal(svc_ctx);
+ SvcInternal_t *const si = get_svc_internal(svc_ctx);
vpx_codec_err_t res = VPX_CODEC_OK;
int i, alt_ref_enabled = 0;
@@ -315,8 +287,9 @@
return res;
}
-vpx_codec_err_t vpx_svc_set_options(SvcContext *svc_ctx, const char *options) {
- SvcInternal *const si = get_svc_internal(svc_ctx);
+vpx_codec_err_t vpx_svc_set_options(SvcContext_t *svc_ctx,
+ const char *options) {
+ SvcInternal_t *const si = get_svc_internal(svc_ctx);
if (svc_ctx == NULL || options == NULL || si == NULL) {
return VPX_CODEC_INVALID_PARAM;
}
@@ -325,10 +298,10 @@
return VPX_CODEC_OK;
}
-void assign_layer_bitrates(const SvcContext *svc_ctx,
+void assign_layer_bitrates(const SvcContext_t *svc_ctx,
vpx_codec_enc_cfg_t *const enc_cfg) {
int i;
- const SvcInternal *const si = get_const_svc_internal(svc_ctx);
+ const SvcInternal_t *const si = get_const_svc_internal(svc_ctx);
if (si->bitrates[0] != 0) {
enc_cfg->rc_target_bitrate = 0;
@@ -359,12 +332,12 @@
}
}
-vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
+vpx_codec_err_t vpx_svc_init(SvcContext_t *svc_ctx, vpx_codec_ctx_t *codec_ctx,
vpx_codec_iface_t *iface,
vpx_codec_enc_cfg_t *enc_cfg) {
vpx_codec_err_t res;
int i;
- SvcInternal *const si = get_svc_internal(svc_ctx);
+ SvcInternal_t *const si = get_svc_internal(svc_ctx);
if (svc_ctx == NULL || codec_ctx == NULL || iface == NULL ||
enc_cfg == NULL) {
return VPX_CODEC_INVALID_PARAM;
@@ -454,13 +427,15 @@
* Encode a frame into multiple layers
* Create a superframe containing the individual layers
*/
-vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
- struct vpx_image *rawimg, vpx_codec_pts_t pts,
+vpx_codec_err_t vpx_svc_encode(SvcContext_t *svc_ctx,
+ vpx_codec_ctx_t *codec_ctx,
+ struct vpx_image *rawimg,
+ vpx_codec_pts_t pts,
int64_t duration, int deadline) {
vpx_codec_err_t res;
vpx_codec_iter_t iter;
const vpx_codec_cx_pkt_t *cx_pkt;
- SvcInternal *const si = get_svc_internal(svc_ctx);
+ SvcInternal_t *const si = get_svc_internal(svc_ctx);
if (svc_ctx == NULL || codec_ctx == NULL || si == NULL) {
return VPX_CODEC_INVALID_PARAM;
}
@@ -523,8 +498,8 @@
return VPX_CODEC_OK;
}
-const char *vpx_svc_get_message(const SvcContext *svc_ctx) {
- const SvcInternal *const si = get_const_svc_internal(svc_ctx);
+const char *vpx_svc_get_message(const SvcContext_t *svc_ctx) {
+ const SvcInternal_t *const si = get_const_svc_internal(svc_ctx);
if (svc_ctx == NULL || si == NULL) return NULL;
return si->message_buffer;
}
@@ -535,7 +510,7 @@
}
// dump accumulated statistics and reset accumulated values
-const char *vpx_svc_dump_statistics(SvcContext *svc_ctx) {
+const char *vpx_svc_dump_statistics(SvcContext_t *svc_ctx) {
int number_of_frames;
int i, j;
uint32_t bytes_total = 0;
@@ -544,7 +519,7 @@
double mse[COMPONENTS];
double y_scale;
- SvcInternal *const si = get_svc_internal(svc_ctx);
+ SvcInternal_t *const si = get_svc_internal(svc_ctx);
if (svc_ctx == NULL || si == NULL) return NULL;
svc_log_reset(svc_ctx);
@@ -594,12 +569,12 @@
return vpx_svc_get_message(svc_ctx);
}
-void vpx_svc_release(SvcContext *svc_ctx) {
- SvcInternal *si;
+void vpx_svc_release(SvcContext_t *svc_ctx) {
+ SvcInternal_t *si;
if (svc_ctx == NULL) return;
// do not use get_svc_internal as it will unnecessarily allocate an
- // SvcInternal if it was not already allocated
- si = (SvcInternal *)svc_ctx->internal;
+ // SvcInternal_t if it was not already allocated
+ si = (SvcInternal_t *)svc_ctx->internal;
if (si != NULL) {
free(si);
svc_ctx->internal = NULL;
diff --git a/vpx/svc_context.h b/vpx/svc_context.h
index 61b5f4b..c2abec4 100644
--- a/vpx/svc_context.h
+++ b/vpx/svc_context.h
@@ -29,7 +29,7 @@
SVC_LOG_DEBUG
} SVC_LOG_LEVEL;
-typedef struct {
+typedef struct SvcContext {
// public interface to svc_command options
int spatial_layers; // number of spatial layers
int temporal_layers; // number of temporal layers
@@ -39,7 +39,37 @@
// private storage for vpx_svc_encode
void *internal;
-} SvcContext;
+} SvcContext_t;
+
+#define OPTION_BUFFER_SIZE 1024
+#define COMPONENTS 4 // psnr & sse statistics maintained for total, y, u, v
+
+typedef struct SvcInternal {
+ char options[OPTION_BUFFER_SIZE]; // set by vpx_svc_set_options
+
+ // values extracted from option, quantizers
+ vpx_svc_extra_cfg_t svc_params;
+ int enable_auto_alt_ref[VPX_SS_MAX_LAYERS];
+ int bitrates[VPX_SS_MAX_LAYERS];
+
+ // accumulated statistics
+ double psnr_sum[VPX_SS_MAX_LAYERS][COMPONENTS]; // total/Y/U/V
+ uint64_t sse_sum[VPX_SS_MAX_LAYERS][COMPONENTS];
+ uint32_t bytes_sum[VPX_SS_MAX_LAYERS];
+
+ // codec encoding values
+ int width; // width of highest layer
+ int height; // height of highest layer
+ int kf_dist; // distance between keyframes
+
+ // state variables
+ int psnr_pkt_received;
+ int layer;
+ int use_multiple_frame_contexts;
+
+ char message_buffer[2048];
+ vpx_codec_ctx_t *codec_ctx;
+} SvcInternal_t;
/**
* Set SVC options
@@ -49,35 +79,38 @@
* scaling-factors=<n1>/<d1>,<n2>/<d2>,...
* quantizers=<q1>,<q2>,...
*/
-vpx_codec_err_t vpx_svc_set_options(SvcContext *svc_ctx, const char *options);
+vpx_codec_err_t vpx_svc_set_options(SvcContext_t *svc_ctx, const char *options);
/**
* initialize SVC encoding
*/
-vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
+vpx_codec_err_t vpx_svc_init(SvcContext_t *svc_ctx,
+ vpx_codec_ctx_t *codec_ctx,
vpx_codec_iface_t *iface,
vpx_codec_enc_cfg_t *cfg);
/**
* encode a frame of video with multiple layers
*/
-vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
- struct vpx_image *rawimg, vpx_codec_pts_t pts,
+vpx_codec_err_t vpx_svc_encode(SvcContext_t *svc_ctx,
+ vpx_codec_ctx_t *codec_ctx,
+ struct vpx_image *rawimg,
+ vpx_codec_pts_t pts,
int64_t duration, int deadline);
/**
* finished with svc encoding, release allocated resources
*/
-void vpx_svc_release(SvcContext *svc_ctx);
+void vpx_svc_release(SvcContext_t *svc_ctx);
/**
* dump accumulated statistics and reset accumulated values
*/
-const char *vpx_svc_dump_statistics(SvcContext *svc_ctx);
+const char *vpx_svc_dump_statistics(SvcContext_t *svc_ctx);
/**
* get status message from previous encode
*/
-const char *vpx_svc_get_message(const SvcContext *svc_ctx);
+const char *vpx_svc_get_message(const SvcContext_t *svc_ctx);
#ifdef __cplusplus
} // extern "C"
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index 67e00fb..0a45dbb 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -237,13 +237,16 @@
VP9E_SET_SVC,
VP9E_SET_SVC_PARAMETERS,
+
/*!\brief control function to set svc layer for spatial and temporal.
* \note Valid ranges: 0..#vpx_codec_enc_cfg::ss_number_layers for spatial
* layer and 0..#vpx_codec_enc_cfg::ts_number_layers for
* temporal layer.
*/
VP9E_SET_SVC_LAYER_ID,
- VP9E_SET_TUNE_CONTENT
+ VP9E_SET_TUNE_CONTENT,
+ VP9E_GET_SVC_LAYER_ID,
+ VP9E_REGISTER_CX_CALLBACK,
};
/*!\brief vpx 1-D scaling mode
@@ -365,6 +368,7 @@
VPX_CTRL_USE_TYPE(VP9E_SET_SVC, int)
VPX_CTRL_USE_TYPE(VP9E_SET_SVC_PARAMETERS, void *)
+VPX_CTRL_USE_TYPE(VP9E_REGISTER_CX_CALLBACK, void *)
VPX_CTRL_USE_TYPE(VP9E_SET_SVC_LAYER_ID, vpx_svc_layer_id_t *)
VPX_CTRL_USE_TYPE(VP8E_SET_CPUUSED, int)
@@ -385,6 +389,7 @@
VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER, int *)
VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER_64, int *)
+VPX_CTRL_USE_TYPE(VP9E_GET_SVC_LAYER_ID, vpx_svc_layer_id_t *)
VPX_CTRL_USE_TYPE(VP8E_SET_MAX_INTRA_BITRATE_PCT, unsigned int)
VPX_CTRL_USE_TYPE(VP8E_SET_MAX_INTER_BITRATE_PCT, unsigned int)
diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h
index 044243d..da5bd06 100644
--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h
@@ -220,6 +220,22 @@
} vpx_codec_cx_pkt_t; /**< alias for struct vpx_codec_cx_pkt */
+ /*!\brief Encoder return output buffer callback
+ *
+ * This callback function, when registered, returns with packets when each
+ * spatial layer is encoded.
+ */
+ // putting the definitions here for now. (agrange: find if there
+ // is a better place for this)
+ typedef void (* vpx_codec_enc_output_cx_pkt_cb_fn_t)(vpx_codec_cx_pkt_t *pkt,
+ void *user_data);
+
+ /*!\brief Callback function pointer / user data pair storage */
+ typedef struct vpx_codec_enc_output_cx_cb_pair {
+ vpx_codec_enc_output_cx_pkt_cb_fn_t output_cx_pkt;
+ void *user_priv;
+ } vpx_codec_priv_output_cx_pkt_cb_pair_t;
+
/*!\brief Rational Number
*
* This structure holds a fractional value.
diff --git a/vpx_ports/arm_cpudetect.c b/vpx_ports/arm_cpudetect.c
index f03feff..8a4b8af 100644
--- a/vpx_ports/arm_cpudetect.c
+++ b/vpx_ports/arm_cpudetect.c
@@ -49,9 +49,6 @@
return flags;
}
mask = arm_cpu_env_mask();
-#if HAVE_EDSP
- flags |= HAS_EDSP;
-#endif /* HAVE_EDSP */
#if HAVE_MEDIA
flags |= HAS_MEDIA;
#endif /* HAVE_MEDIA */
@@ -78,17 +75,6 @@
* instructions via their assembled hex code.
* All of these instructions should be essentially nops.
*/
-#if HAVE_EDSP
- if (mask & HAS_EDSP) {
- __try {
- /*PLD [r13]*/
- __emit(0xF5DDF000);
- flags |= HAS_EDSP;
- } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) {
- /*Ignore exception.*/
- }
- }
-#endif /* HAVE_EDSP */
#if HAVE_MEDIA
if (mask & HAS_MEDIA)
__try {
@@ -127,9 +113,6 @@
mask = arm_cpu_env_mask();
features = android_getCpuFeatures();
-#if HAVE_EDSP
- flags |= HAS_EDSP;
-#endif /* HAVE_EDSP */
#if HAVE_MEDIA
flags |= HAS_MEDIA;
#endif /* HAVE_MEDIA */
@@ -163,23 +146,15 @@
*/
char buf[512];
while (fgets(buf, 511, fin) != NULL) {
-#if HAVE_EDSP || HAVE_NEON || HAVE_NEON_ASM
+#if HAVE_NEON || HAVE_NEON_ASM
if (memcmp(buf, "Features", 8) == 0) {
char *p;
-#if HAVE_EDSP
- p = strstr(buf, " edsp");
- if (p != NULL && (p[5] == ' ' || p[5] == '\n')) {
- flags |= HAS_EDSP;
- }
-#endif /* HAVE_EDSP */
-#if HAVE_NEON || HAVE_NEON_ASM
p = strstr(buf, " neon");
if (p != NULL && (p[5] == ' ' || p[5] == '\n')) {
flags |= HAS_NEON;
}
-#endif /* HAVE_NEON || HAVE_NEON_ASM */
}
-#endif /* HAVE_EDSP || HAVE_NEON || HAVE_NEON_ASM */
+#endif /* HAVE_NEON || HAVE_NEON_ASM */
#if HAVE_MEDIA
if (memcmp(buf, "CPU architecture:", 17) == 0) {
int version;
diff --git a/vpx_scale/vpx_scale.mk b/vpx_scale/vpx_scale.mk
index 0a1594b..92d4991 100644
--- a/vpx_scale/vpx_scale.mk
+++ b/vpx_scale/vpx_scale.mk
@@ -5,7 +5,6 @@
SCALE_SRCS-yes += generic/yv12config.c
SCALE_SRCS-yes += generic/yv12extend.c
SCALE_SRCS-$(CONFIG_SPATIAL_RESAMPLING) += generic/gen_scalers.c
-SCALE_SRCS-yes += vpx_scale_asm_offsets.c
SCALE_SRCS-yes += vpx_scale_rtcd.c
SCALE_SRCS-yes += vpx_scale_rtcd.pl
@@ -14,7 +13,4 @@
SCALE_SRCS-no += $(SCALE_SRCS_REMOVE-yes)
-$(eval $(call asm_offsets_template,\
- vpx_scale_asm_offsets.asm, vpx_scale/vpx_scale_asm_offsets.c))
-
$(eval $(call rtcd_h_template,vpx_scale_rtcd,vpx_scale/vpx_scale_rtcd.pl))
diff --git a/vpx_scale/vpx_scale_asm_offsets.c b/vpx_scale/vpx_scale_asm_offsets.c
index caa9e80..ce8a670 100644
--- a/vpx_scale/vpx_scale_asm_offsets.c
+++ b/vpx_scale/vpx_scale_asm_offsets.c
@@ -9,32 +9,8 @@
*/
-#include "./vpx_config.h"
-#include "vpx/vpx_codec.h"
#include "vpx_ports/asm_offsets.h"
-#include "vpx_scale/yv12config.h"
BEGIN
-/* vpx_scale */
-DEFINE(yv12_buffer_config_y_width, offsetof(YV12_BUFFER_CONFIG, y_width));
-DEFINE(yv12_buffer_config_y_height, offsetof(YV12_BUFFER_CONFIG, y_height));
-DEFINE(yv12_buffer_config_y_stride, offsetof(YV12_BUFFER_CONFIG, y_stride));
-DEFINE(yv12_buffer_config_uv_width, offsetof(YV12_BUFFER_CONFIG, uv_width));
-DEFINE(yv12_buffer_config_uv_height, offsetof(YV12_BUFFER_CONFIG, uv_height));
-DEFINE(yv12_buffer_config_uv_stride, offsetof(YV12_BUFFER_CONFIG, uv_stride));
-DEFINE(yv12_buffer_config_y_buffer, offsetof(YV12_BUFFER_CONFIG, y_buffer));
-DEFINE(yv12_buffer_config_u_buffer, offsetof(YV12_BUFFER_CONFIG, u_buffer));
-DEFINE(yv12_buffer_config_v_buffer, offsetof(YV12_BUFFER_CONFIG, v_buffer));
-DEFINE(yv12_buffer_config_border, offsetof(YV12_BUFFER_CONFIG, border));
-DEFINE(VP8BORDERINPIXELS_VAL, VP8BORDERINPIXELS);
-
END
-
-/* add asserts for any offset that is not supported by assembly code */
-/* add asserts for any size that is not supported by assembly code */
-
-#if HAVE_NEON
-/* vp8_yv12_extend_frame_borders_neon makes several assumptions based on this */
-ct_assert(VP8BORDERINPIXELS_VAL, VP8BORDERINPIXELS == 32)
-#endif