Merge branch 'vp9-preview' of review:webm/libvpx
Merge the vp9-preview branch into master.
Change-Id: If700b9054676f24bed9deb59050af546c1ca5296
diff --git a/args.c b/args.c
index 37ba778..9dabc9b 100644
--- a/args.c
+++ b/args.c
@@ -25,241 +25,214 @@
#endif
-struct arg arg_init(char **argv)
-{
- struct arg a;
+struct arg arg_init(char **argv) {
+ struct arg a;
- a.argv = argv;
- a.argv_step = 1;
- a.name = NULL;
- a.val = NULL;
- a.def = NULL;
- return a;
+ a.argv = argv;
+ a.argv_step = 1;
+ a.name = NULL;
+ a.val = NULL;
+ a.def = NULL;
+ return a;
}
-int arg_match(struct arg *arg_, const struct arg_def *def, char **argv)
-{
- struct arg arg;
+int arg_match(struct arg *arg_, const struct arg_def *def, char **argv) {
+ struct arg arg;
- if (!argv[0] || argv[0][0] != '-')
- return 0;
+ if (!argv[0] || argv[0][0] != '-')
+ return 0;
- arg = arg_init(argv);
+ arg = arg_init(argv);
- if (def->short_name
- && strlen(arg.argv[0]) == strlen(def->short_name) + 1
- && !strcmp(arg.argv[0] + 1, def->short_name))
- {
+ if (def->short_name
+ && strlen(arg.argv[0]) == strlen(def->short_name) + 1
+ && !strcmp(arg.argv[0] + 1, def->short_name)) {
- arg.name = arg.argv[0] + 1;
- arg.val = def->has_val ? arg.argv[1] : NULL;
- arg.argv_step = def->has_val ? 2 : 1;
+ arg.name = arg.argv[0] + 1;
+ arg.val = def->has_val ? arg.argv[1] : NULL;
+ arg.argv_step = def->has_val ? 2 : 1;
+ } else if (def->long_name) {
+ const size_t name_len = strlen(def->long_name);
+
+ if (strlen(arg.argv[0]) >= name_len + 2
+ && arg.argv[0][1] == '-'
+ && !strncmp(arg.argv[0] + 2, def->long_name, name_len)
+ && (arg.argv[0][name_len + 2] == '='
+ || arg.argv[0][name_len + 2] == '\0')) {
+
+ arg.name = arg.argv[0] + 2;
+ arg.val = arg.name[name_len] == '=' ? arg.name + name_len + 1 : NULL;
+ arg.argv_step = 1;
}
+ }
+
+ if (arg.name && !arg.val && def->has_val)
+ die("Error: option %s requires argument.\n", arg.name);
+
+ if (arg.name && arg.val && !def->has_val)
+ die("Error: option %s requires no argument.\n", arg.name);
+
+ if (arg.name
+ && (arg.val || !def->has_val)) {
+ arg.def = def;
+ *arg_ = arg;
+ return 1;
+ }
+
+ return 0;
+}
+
+
+const char *arg_next(struct arg *arg) {
+ if (arg->argv[0])
+ arg->argv += arg->argv_step;
+
+ return *arg->argv;
+}
+
+
+char **argv_dup(int argc, const char **argv) {
+ char **new_argv = malloc((argc + 1) * sizeof(*argv));
+
+ memcpy(new_argv, argv, argc * sizeof(*argv));
+ new_argv[argc] = NULL;
+ return new_argv;
+}
+
+
+void arg_show_usage(FILE *fp, const struct arg_def *const *defs) {
+ char option_text[40] = {0};
+
+ for (; *defs; defs++) {
+ const struct arg_def *def = *defs;
+ char *short_val = def->has_val ? " <arg>" : "";
+ char *long_val = def->has_val ? "=<arg>" : "";
+
+ if (def->short_name && def->long_name) {
+ char *comma = def->has_val ? "," : ", ";
+
+ snprintf(option_text, 37, "-%s%s%s --%s%6s",
+ def->short_name, short_val, comma,
+ def->long_name, long_val);
+ } else if (def->short_name)
+ snprintf(option_text, 37, "-%s%s",
+ def->short_name, short_val);
else if (def->long_name)
- {
- const size_t name_len = strlen(def->long_name);
+ snprintf(option_text, 37, " --%s%s",
+ def->long_name, long_val);
- if (strlen(arg.argv[0]) >= name_len + 2
- && arg.argv[0][1] == '-'
- && !strncmp(arg.argv[0] + 2, def->long_name, name_len)
- && (arg.argv[0][name_len+2] == '='
- || arg.argv[0][name_len+2] == '\0'))
- {
+ fprintf(fp, " %-37s\t%s\n", option_text, def->desc);
- arg.name = arg.argv[0] + 2;
- arg.val = arg.name[name_len] == '=' ? arg.name + name_len + 1 : NULL;
- arg.argv_step = 1;
- }
+ if (def->enums) {
+ const struct arg_enum_list *listptr;
+
+ fprintf(fp, " %-37s\t ", "");
+
+ for (listptr = def->enums; listptr->name; listptr++)
+ fprintf(fp, "%s%s", listptr->name,
+ listptr[1].name ? ", " : "\n");
}
-
- if (arg.name && !arg.val && def->has_val)
- die("Error: option %s requires argument.\n", arg.name);
-
- if (arg.name && arg.val && !def->has_val)
- die("Error: option %s requires no argument.\n", arg.name);
-
- if (arg.name
- && (arg.val || !def->has_val))
- {
- arg.def = def;
- *arg_ = arg;
- return 1;
- }
-
- return 0;
+ }
}
-const char *arg_next(struct arg *arg)
-{
- if (arg->argv[0])
- arg->argv += arg->argv_step;
+unsigned int arg_parse_uint(const struct arg *arg) {
+ long int rawval;
+ char *endptr;
- return *arg->argv;
+ rawval = strtol(arg->val, &endptr, 10);
+
+ if (arg->val[0] != '\0' && endptr[0] == '\0') {
+ if (rawval >= 0 && rawval <= UINT_MAX)
+ return rawval;
+
+ die("Option %s: Value %ld out of range for unsigned int\n",
+ arg->name, rawval);
+ }
+
+ die("Option %s: Invalid character '%c'\n", arg->name, *endptr);
+ return 0;
}
-char **argv_dup(int argc, const char **argv)
-{
- char **new_argv = malloc((argc + 1) * sizeof(*argv));
+int arg_parse_int(const struct arg *arg) {
+ long int rawval;
+ char *endptr;
- memcpy(new_argv, argv, argc * sizeof(*argv));
- new_argv[argc] = NULL;
- return new_argv;
+ rawval = strtol(arg->val, &endptr, 10);
+
+ if (arg->val[0] != '\0' && endptr[0] == '\0') {
+ if (rawval >= INT_MIN && rawval <= INT_MAX)
+ return rawval;
+
+ die("Option %s: Value %ld out of range for signed int\n",
+ arg->name, rawval);
+ }
+
+ die("Option %s: Invalid character '%c'\n", arg->name, *endptr);
+ return 0;
}
-void arg_show_usage(FILE *fp, const struct arg_def *const *defs)
-{
- char option_text[40] = {0};
-
- for (; *defs; defs++)
- {
- const struct arg_def *def = *defs;
- char *short_val = def->has_val ? " <arg>" : "";
- char *long_val = def->has_val ? "=<arg>" : "";
-
- if (def->short_name && def->long_name)
- {
- char *comma = def->has_val ? "," : ", ";
-
- snprintf(option_text, 37, "-%s%s%s --%s%6s",
- def->short_name, short_val, comma,
- def->long_name, long_val);
- }
- else if (def->short_name)
- snprintf(option_text, 37, "-%s%s",
- def->short_name, short_val);
- else if (def->long_name)
- snprintf(option_text, 37, " --%s%s",
- def->long_name, long_val);
-
- fprintf(fp, " %-37s\t%s\n", option_text, def->desc);
-
- if(def->enums)
- {
- const struct arg_enum_list *listptr;
-
- fprintf(fp, " %-37s\t ", "");
-
- for(listptr = def->enums; listptr->name; listptr++)
- fprintf(fp, "%s%s", listptr->name,
- listptr[1].name ? ", " : "\n");
- }
- }
-}
-
-
-unsigned int arg_parse_uint(const struct arg *arg)
-{
- long int rawval;
- char *endptr;
-
- rawval = strtol(arg->val, &endptr, 10);
-
- if (arg->val[0] != '\0' && endptr[0] == '\0')
- {
- if (rawval >= 0 && rawval <= UINT_MAX)
- return rawval;
-
- die("Option %s: Value %ld out of range for unsigned int\n",
- arg->name, rawval);
- }
-
- die("Option %s: Invalid character '%c'\n", arg->name, *endptr);
- return 0;
-}
-
-
-int arg_parse_int(const struct arg *arg)
-{
- long int rawval;
- char *endptr;
-
- rawval = strtol(arg->val, &endptr, 10);
-
- if (arg->val[0] != '\0' && endptr[0] == '\0')
- {
- if (rawval >= INT_MIN && rawval <= INT_MAX)
- return rawval;
-
- die("Option %s: Value %ld out of range for signed int\n",
- arg->name, rawval);
- }
-
- die("Option %s: Invalid character '%c'\n", arg->name, *endptr);
- return 0;
-}
-
-
-struct vpx_rational
-{
- int num; /**< fraction numerator */
- int den; /**< fraction denominator */
+struct vpx_rational {
+ int num; /**< fraction numerator */
+ int den; /**< fraction denominator */
};
-struct vpx_rational arg_parse_rational(const struct arg *arg)
-{
- long int rawval;
- char *endptr;
- struct vpx_rational rat;
+struct vpx_rational arg_parse_rational(const struct arg *arg) {
+ long int rawval;
+ char *endptr;
+ struct vpx_rational rat;
- /* parse numerator */
- rawval = strtol(arg->val, &endptr, 10);
+ /* parse numerator */
+ rawval = strtol(arg->val, &endptr, 10);
- if (arg->val[0] != '\0' && endptr[0] == '/')
- {
- if (rawval >= INT_MIN && rawval <= INT_MAX)
- rat.num = rawval;
- else die("Option %s: Value %ld out of range for signed int\n",
- arg->name, rawval);
- }
- else die("Option %s: Expected / at '%c'\n", arg->name, *endptr);
+ if (arg->val[0] != '\0' && endptr[0] == '/') {
+ if (rawval >= INT_MIN && rawval <= INT_MAX)
+ rat.num = rawval;
+ else die("Option %s: Value %ld out of range for signed int\n",
+ arg->name, rawval);
+ } else die("Option %s: Expected / at '%c'\n", arg->name, *endptr);
- /* parse denominator */
- rawval = strtol(endptr + 1, &endptr, 10);
+ /* parse denominator */
+ rawval = strtol(endptr + 1, &endptr, 10);
- if (arg->val[0] != '\0' && endptr[0] == '\0')
- {
- if (rawval >= INT_MIN && rawval <= INT_MAX)
- rat.den = rawval;
- else die("Option %s: Value %ld out of range for signed int\n",
- arg->name, rawval);
- }
- else die("Option %s: Invalid character '%c'\n", arg->name, *endptr);
+ if (arg->val[0] != '\0' && endptr[0] == '\0') {
+ if (rawval >= INT_MIN && rawval <= INT_MAX)
+ rat.den = rawval;
+ else die("Option %s: Value %ld out of range for signed int\n",
+ arg->name, rawval);
+ } else die("Option %s: Invalid character '%c'\n", arg->name, *endptr);
- return rat;
+ return rat;
}
-int arg_parse_enum(const struct arg *arg)
-{
- const struct arg_enum_list *listptr;
- long int rawval;
- char *endptr;
+int arg_parse_enum(const struct arg *arg) {
+ const struct arg_enum_list *listptr;
+ long int rawval;
+ char *endptr;
- /* First see if the value can be parsed as a raw value */
- rawval = strtol(arg->val, &endptr, 10);
- if (arg->val[0] != '\0' && endptr[0] == '\0')
- {
- /* Got a raw value, make sure it's valid */
- for(listptr = arg->def->enums; listptr->name; listptr++)
- if(listptr->val == rawval)
- return rawval;
- }
+ /* First see if the value can be parsed as a raw value */
+ rawval = strtol(arg->val, &endptr, 10);
+ if (arg->val[0] != '\0' && endptr[0] == '\0') {
+ /* Got a raw value, make sure it's valid */
+ for (listptr = arg->def->enums; listptr->name; listptr++)
+ if (listptr->val == rawval)
+ return rawval;
+ }
- /* Next see if it can be parsed as a string */
- for(listptr = arg->def->enums; listptr->name; listptr++)
- if(!strcmp(arg->val, listptr->name))
- return listptr->val;
+ /* Next see if it can be parsed as a string */
+ for (listptr = arg->def->enums; listptr->name; listptr++)
+ if (!strcmp(arg->val, listptr->name))
+ return listptr->val;
- die("Option %s: Invalid value '%s'\n", arg->name, arg->val);
- return 0;
+ die("Option %s: Invalid value '%s'\n", arg->name, arg->val);
+ return 0;
}
-int arg_parse_enum_or_int(const struct arg *arg)
-{
- if(arg->def->enums)
- return arg_parse_enum(arg);
- return arg_parse_int(arg);
+int arg_parse_enum_or_int(const struct arg *arg) {
+ if (arg->def->enums)
+ return arg_parse_enum(arg);
+ return arg_parse_int(arg);
}
diff --git a/args.h b/args.h
index 7963fa6..ad591af 100644
--- a/args.h
+++ b/args.h
@@ -13,29 +13,26 @@
#define ARGS_H
#include <stdio.h>
-struct arg
-{
- char **argv;
- const char *name;
- const char *val;
- unsigned int argv_step;
- const struct arg_def *def;
+struct arg {
+ char **argv;
+ const char *name;
+ const char *val;
+ unsigned int argv_step;
+ const struct arg_def *def;
};
-struct arg_enum_list
-{
- const char *name;
- int val;
+struct arg_enum_list {
+ const char *name;
+ int val;
};
#define ARG_ENUM_LIST_END {0}
-typedef struct arg_def
-{
- const char *short_name;
- const char *long_name;
- int has_val;
- const char *desc;
- const struct arg_enum_list *enums;
+typedef struct arg_def {
+ const char *short_name;
+ const char *long_name;
+ int has_val;
+ const char *desc;
+ const struct arg_enum_list *enums;
} arg_def_t;
#define ARG_DEF(s,l,v,d) {s,l,v,d, NULL}
#define ARG_DEF_ENUM(s,l,v,d,e) {s,l,v,d,e}
diff --git a/build/make/Android.mk b/build/make/Android.mk
index c6b9cf9..db0cebf 100644
--- a/build/make/Android.mk
+++ b/build/make/Android.mk
@@ -208,4 +208,3 @@
ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
$(call import-module,cpufeatures)
endif
-
diff --git a/build/make/Makefile b/build/make/Makefile
index 1088c84..92113cc 100644
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -253,10 +253,25 @@
$(if $(quiet),@echo " [LD] $$@")
$(qexec)$$(LD) -shared $$(LDFLAGS) \
-Wl,--no-undefined -Wl,-soname,$$(SONAME) \
- -Wl,--version-script,$$(SO_VERSION_SCRIPT) -o $$@ \
- $$(filter %.o,$$?) $$(extralibs)
+ -Wl,--version-script,$$(EXPORTS_FILE) -o $$@ \
+ $$(filter %.o,$$^) $$(extralibs)
endef
+define dl_template
+# Not using a pattern rule here because we don't want to generate empty
+# archives when they are listed as a dependency in files not responsible
+# for creating them.
+$(1):
+ $(if $(quiet),@echo " [LD] $$@")
+ $(qexec)$$(LD) -dynamiclib $$(LDFLAGS) \
+ -exported_symbols_list $$(EXPORTS_FILE) \
+ -Wl,-headerpad_max_install_names,-compatibility_version,1.0,-current_version,$$(VERSION_MAJOR) \
+ -o $$@ \
+ $$(filter %.o,$$^) $$(extralibs)
+endef
+
+
+
define lipo_lib_template
$(1): $(addsuffix /$(1),$(FAT_ARCHS))
$(if $(quiet),@echo " [LIPO] $$@")
@@ -321,6 +336,7 @@
@touch $@
$(foreach lib,$(filter %_g.a,$(LIBS)),$(eval $(call archive_template,$(lib))))
$(foreach lib,$(filter %so.$(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_PATCH),$(LIBS)),$(eval $(call so_template,$(lib))))
+$(foreach lib,$(filter %$(VERSION_MAJOR).dylib,$(LIBS)),$(eval $(call dl_template,$(lib))))
INSTALL-LIBS=$(call cond_enabled,CONFIG_INSTALL_LIBS,INSTALL-LIBS)
ifeq ($(MAKECMDGOALS),dist)
diff --git a/build/make/configure.sh b/build/make/configure.sh
index c99a01c..e27af96 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -277,6 +277,7 @@
# Toolchain Check Functions
#
check_cmd() {
+ enabled external_build && return
log "$@"
"$@" >>${logfile} 2>&1
}
@@ -435,10 +436,10 @@
EOF
if enabled rvct; then cat >> $1 << EOF
-fmt_deps = sed -e 's;^__image.axf;\$(dir \$@)\$(notdir \$<).o \$@;' #hide
+fmt_deps = sed -e 's;^__image.axf;\$\${@:.d=.o} \$\$@;' #hide
EOF
else cat >> $1 << EOF
-fmt_deps = sed -e 's;^\([a-zA-Z0-9_]*\)\.o;\$(dir \$@)\1\$(suffix \$<).o \$@;'
+fmt_deps = sed -e 's;^\([a-zA-Z0-9_]*\)\.o;\$\${@:.d=.o} \$\$@;'
EOF
fi
@@ -1001,7 +1002,11 @@
soft_enable sse2
soft_enable sse3
soft_enable ssse3
- soft_enable sse4_1
+ if enabled gcc && ! disabled sse4_1 && ! check_cflags -msse4; then
+ RTCD_OPTIONS="${RTCD_OPTIONS}--disable-sse4_1 "
+ else
+ soft_enable sse4_1
+ fi
case ${tgt_os} in
win*)
@@ -1176,9 +1181,6 @@
;;
esac
- # for sysconf(3) and friends.
- check_header unistd.h
-
# glibc needs these
if enabled linux; then
add_cflags -D_LARGEFILE_SOURCE
diff --git a/build/make/gen_msvs_proj.sh b/build/make/gen_msvs_proj.sh
index 6d42941..947cc62 100755
--- a/build/make/gen_msvs_proj.sh
+++ b/build/make/gen_msvs_proj.sh
@@ -26,6 +26,7 @@
--help Print this message
--exe Generate a project for building an Application
--lib Generate a project for creating a static library
+ --dll Generate a project for creating a dll
--static-crt Use the static C runtime (/MT)
--target=isa-os-cc Target specifier (required)
--out=filename Write output to a file [stdout]
@@ -142,7 +143,9 @@
if [ "${f##*.}" == "$pat" ]; then
unset file_list[i]
+ objf=$(echo ${f%.*}.obj | sed -e 's/^[\./]\+//g' -e 's,/,_,g')
open_tag File RelativePath="./$f"
+
if [ "$pat" == "asm" ] && $asm_use_custom_step; then
for plat in "${platforms[@]}"; do
for cfg in Debug Release; do
@@ -152,14 +155,27 @@
tag Tool \
Name="VCCustomBuildTool" \
Description="Assembling \$(InputFileName)" \
- CommandLine="$(eval echo \$asm_${cfg}_cmdline)" \
- Outputs="\$(InputName).obj" \
+ CommandLine="$(eval echo \$asm_${cfg}_cmdline) -o \$(IntDir)$objf" \
+ Outputs="\$(IntDir)$objf" \
close_tag FileConfiguration
done
done
fi
+ if [ "$pat" == "c" ] || [ "$pat" == "cc" ] ; then
+ for plat in "${platforms[@]}"; do
+ for cfg in Debug Release; do
+ open_tag FileConfiguration \
+ Name="${cfg}|${plat}" \
+ tag Tool \
+ Name="VCCLCompilerTool" \
+ ObjectFile="\$(IntDir)$objf" \
+
+ close_tag FileConfiguration
+ done
+ done
+ fi
close_tag File
break
@@ -190,6 +206,8 @@
;;
--exe) proj_kind="exe"
;;
+ --dll) proj_kind="dll"
+ ;;
--lib) proj_kind="lib"
;;
--src-path-bare=*) src_path_bare="$optval"
@@ -244,8 +262,10 @@
asm_use_custom_step=$uses_asm
;;
8) vs_ver_id="8.00"
+ asm_use_custom_step=$uses_asm
;;
9) vs_ver_id="9.00"
+ asm_use_custom_step=$uses_asm
;;
esac
@@ -284,10 +304,11 @@
case "$target" in
x86_64*)
platforms[0]="x64"
+ asm_Debug_cmdline="yasm -Xvc -g cv8 -f \$(PlatformName) ${yasmincs} "\$(InputPath)""
+ asm_Release_cmdline="yasm -Xvc -f \$(PlatformName) ${yasmincs} "\$(InputPath)""
;;
x86*)
platforms[0]="Win32"
- # these are only used by vs7
asm_Debug_cmdline="yasm -Xvc -g cv8 -f \$(PlatformName) ${yasmincs} "\$(InputPath)""
asm_Release_cmdline="yasm -Xvc -f \$(PlatformName) ${yasmincs} "\$(InputPath)""
;;
@@ -299,6 +320,8 @@
case "$proj_kind" in
exe) vs_ConfigurationType=1
;;
+ dll) vs_ConfigurationType=2
+ ;;
*) vs_ConfigurationType=4
;;
esac
@@ -318,13 +341,6 @@
done
close_tag Platforms
- open_tag ToolFiles
- case "$target" in
- x86*) $uses_asm && tag ToolFile RelativePath="$self_dirname/../x86-msvs/yasm.rules"
- ;;
- esac
- close_tag ToolFiles
-
open_tag Configurations
for plat in "${platforms[@]}"; do
plat_no_ws=`echo $plat | sed 's/[^A-Za-z0-9_]/_/g'`
diff --git a/build/make/obj_int_extract.c b/build/make/obj_int_extract.c
index bf317bd..f86cec2 100644
--- a/build/make/obj_int_extract.c
+++ b/build/make/obj_int_extract.c
@@ -17,21 +17,19 @@
#include "vpx_config.h"
#include "vpx/vpx_integer.h"
-typedef enum
-{
- OUTPUT_FMT_PLAIN,
- OUTPUT_FMT_RVDS,
- OUTPUT_FMT_GAS,
+typedef enum {
+ OUTPUT_FMT_PLAIN,
+ OUTPUT_FMT_RVDS,
+ OUTPUT_FMT_GAS,
} output_fmt_t;
-int log_msg(const char *fmt, ...)
-{
- int res;
- va_list ap;
- va_start(ap, fmt);
- res = vfprintf(stderr, fmt, ap);
- va_end(ap);
- return res;
+int log_msg(const char *fmt, ...) {
+ int res;
+ va_list ap;
+ va_start(ap, fmt);
+ res = vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ return res;
}
#if defined(__GNUC__) && __GNUC__
@@ -40,175 +38,148 @@
#include <mach-o/loader.h>
#include <mach-o/nlist.h>
-int parse_macho(uint8_t *base_buf, size_t sz)
-{
- int i, j;
- struct mach_header header;
- uint8_t *buf = base_buf;
- int base_data_section = 0;
- int bits = 0;
+int parse_macho(uint8_t *base_buf, size_t sz) {
+ int i, j;
+ struct mach_header header;
+ uint8_t *buf = base_buf;
+ int base_data_section = 0;
+ int bits = 0;
- /* We can read in mach_header for 32 and 64 bit architectures
- * because it's identical to mach_header_64 except for the last
- * element (uint32_t reserved), which we don't use. Then, when
- * we know which architecture we're looking at, increment buf
- * appropriately.
- */
- memcpy(&header, buf, sizeof(struct mach_header));
+ /* We can read in mach_header for 32 and 64 bit architectures
+ * because it's identical to mach_header_64 except for the last
+ * element (uint32_t reserved), which we don't use. Then, when
+ * we know which architecture we're looking at, increment buf
+ * appropriately.
+ */
+ memcpy(&header, buf, sizeof(struct mach_header));
- if (header.magic == MH_MAGIC)
- {
- if (header.cputype == CPU_TYPE_ARM
- || header.cputype == CPU_TYPE_X86)
- {
- bits = 32;
- buf += sizeof(struct mach_header);
- }
- else
- {
- log_msg("Bad cputype for object file. Currently only tested for CPU_TYPE_[ARM|X86].\n");
- goto bail;
- }
+ if (header.magic == MH_MAGIC) {
+ if (header.cputype == CPU_TYPE_ARM
+ || header.cputype == CPU_TYPE_X86) {
+ bits = 32;
+ buf += sizeof(struct mach_header);
+ } else {
+ log_msg("Bad cputype for object file. Currently only tested for CPU_TYPE_[ARM|X86].\n");
+ goto bail;
}
- else if (header.magic == MH_MAGIC_64)
- {
- if (header.cputype == CPU_TYPE_X86_64)
- {
- bits = 64;
- buf += sizeof(struct mach_header_64);
- }
- else
- {
- log_msg("Bad cputype for object file. Currently only tested for CPU_TYPE_X86_64.\n");
- goto bail;
- }
+ } else if (header.magic == MH_MAGIC_64) {
+ if (header.cputype == CPU_TYPE_X86_64) {
+ bits = 64;
+ buf += sizeof(struct mach_header_64);
+ } else {
+ log_msg("Bad cputype for object file. Currently only tested for CPU_TYPE_X86_64.\n");
+ goto bail;
}
- else
- {
- log_msg("Bad magic number for object file. 0x%x or 0x%x expected, 0x%x found.\n",
- MH_MAGIC, MH_MAGIC_64, header.magic);
+ } else {
+ log_msg("Bad magic number for object file. 0x%x or 0x%x expected, 0x%x found.\n",
+ MH_MAGIC, MH_MAGIC_64, header.magic);
+ goto bail;
+ }
+
+ if (header.filetype != MH_OBJECT) {
+ log_msg("Bad filetype for object file. Currently only tested for MH_OBJECT.\n");
+ goto bail;
+ }
+
+ for (i = 0; i < header.ncmds; i++) {
+ struct load_command lc;
+
+ memcpy(&lc, buf, sizeof(struct load_command));
+
+ if (lc.cmd == LC_SEGMENT) {
+ uint8_t *seg_buf = buf;
+ struct section s;
+ struct segment_command seg_c;
+
+ memcpy(&seg_c, seg_buf, sizeof(struct segment_command));
+ seg_buf += sizeof(struct segment_command);
+
+ /* Although each section is given it's own offset, nlist.n_value
+ * references the offset of the first section. This isn't
+ * apparent without debug information because the offset of the
+ * data section is the same as the first section. However, with
+ * debug sections mixed in, the offset of the debug section
+ * increases but n_value still references the first section.
+ */
+ if (seg_c.nsects < 1) {
+ log_msg("Not enough sections\n");
goto bail;
- }
+ }
- if (header.filetype != MH_OBJECT)
- {
- log_msg("Bad filetype for object file. Currently only tested for MH_OBJECT.\n");
+ memcpy(&s, seg_buf, sizeof(struct section));
+ base_data_section = s.offset;
+ } else if (lc.cmd == LC_SEGMENT_64) {
+ uint8_t *seg_buf = buf;
+ struct section_64 s;
+ struct segment_command_64 seg_c;
+
+ memcpy(&seg_c, seg_buf, sizeof(struct segment_command_64));
+ seg_buf += sizeof(struct segment_command_64);
+
+ /* Explanation in LG_SEGMENT */
+ if (seg_c.nsects < 1) {
+ log_msg("Not enough sections\n");
goto bail;
+ }
+
+ memcpy(&s, seg_buf, sizeof(struct section_64));
+ base_data_section = s.offset;
+ } else if (lc.cmd == LC_SYMTAB) {
+ if (base_data_section != 0) {
+ struct symtab_command sc;
+ uint8_t *sym_buf = base_buf;
+ uint8_t *str_buf = base_buf;
+
+ memcpy(&sc, buf, sizeof(struct symtab_command));
+
+ if (sc.cmdsize != sizeof(struct symtab_command)) {
+ log_msg("Can't find symbol table!\n");
+ goto bail;
+ }
+
+ sym_buf += sc.symoff;
+ str_buf += sc.stroff;
+
+ for (j = 0; j < sc.nsyms; j++) {
+ /* Location of string is cacluated each time from the
+ * start of the string buffer. On darwin the symbols
+ * are prefixed by "_", so we bump the pointer by 1.
+ * The target value is defined as an int in asm_*_offsets.c,
+ * which is 4 bytes on all targets we currently use.
+ */
+ if (bits == 32) {
+ struct nlist nl;
+ int val;
+
+ memcpy(&nl, sym_buf, sizeof(struct nlist));
+ sym_buf += sizeof(struct nlist);
+
+ memcpy(&val, base_buf + base_data_section + nl.n_value,
+ sizeof(val));
+ printf("%-40s EQU %5d\n",
+ str_buf + nl.n_un.n_strx + 1, val);
+ } else { /* if (bits == 64) */
+ struct nlist_64 nl;
+ int val;
+
+ memcpy(&nl, sym_buf, sizeof(struct nlist_64));
+ sym_buf += sizeof(struct nlist_64);
+
+ memcpy(&val, base_buf + base_data_section + nl.n_value,
+ sizeof(val));
+ printf("%-40s EQU %5d\n",
+ str_buf + nl.n_un.n_strx + 1, val);
+ }
+ }
+ }
}
- for (i = 0; i < header.ncmds; i++)
- {
- struct load_command lc;
+ buf += lc.cmdsize;
+ }
- memcpy(&lc, buf, sizeof(struct load_command));
-
- if (lc.cmd == LC_SEGMENT)
- {
- uint8_t *seg_buf = buf;
- struct section s;
- struct segment_command seg_c;
-
- memcpy(&seg_c, seg_buf, sizeof(struct segment_command));
- seg_buf += sizeof(struct segment_command);
-
- /* Although each section is given it's own offset, nlist.n_value
- * references the offset of the first section. This isn't
- * apparent without debug information because the offset of the
- * data section is the same as the first section. However, with
- * debug sections mixed in, the offset of the debug section
- * increases but n_value still references the first section.
- */
- if (seg_c.nsects < 1)
- {
- log_msg("Not enough sections\n");
- goto bail;
- }
-
- memcpy(&s, seg_buf, sizeof(struct section));
- base_data_section = s.offset;
- }
- else if (lc.cmd == LC_SEGMENT_64)
- {
- uint8_t *seg_buf = buf;
- struct section_64 s;
- struct segment_command_64 seg_c;
-
- memcpy(&seg_c, seg_buf, sizeof(struct segment_command_64));
- seg_buf += sizeof(struct segment_command_64);
-
- /* Explanation in LG_SEGMENT */
- if (seg_c.nsects < 1)
- {
- log_msg("Not enough sections\n");
- goto bail;
- }
-
- memcpy(&s, seg_buf, sizeof(struct section_64));
- base_data_section = s.offset;
- }
- else if (lc.cmd == LC_SYMTAB)
- {
- if (base_data_section != 0)
- {
- struct symtab_command sc;
- uint8_t *sym_buf = base_buf;
- uint8_t *str_buf = base_buf;
-
- memcpy(&sc, buf, sizeof(struct symtab_command));
-
- if (sc.cmdsize != sizeof(struct symtab_command))
- {
- log_msg("Can't find symbol table!\n");
- goto bail;
- }
-
- sym_buf += sc.symoff;
- str_buf += sc.stroff;
-
- for (j = 0; j < sc.nsyms; j++)
- {
- /* Location of string is cacluated each time from the
- * start of the string buffer. On darwin the symbols
- * are prefixed by "_", so we bump the pointer by 1.
- * The target value is defined as an int in asm_*_offsets.c,
- * which is 4 bytes on all targets we currently use.
- */
- if (bits == 32)
- {
- struct nlist nl;
- int val;
-
- memcpy(&nl, sym_buf, sizeof(struct nlist));
- sym_buf += sizeof(struct nlist);
-
- memcpy(&val, base_buf + base_data_section + nl.n_value,
- sizeof(val));
- printf("%-40s EQU %5d\n",
- str_buf + nl.n_un.n_strx + 1, val);
- }
- else /* if (bits == 64) */
- {
- struct nlist_64 nl;
- int val;
-
- memcpy(&nl, sym_buf, sizeof(struct nlist_64));
- sym_buf += sizeof(struct nlist_64);
-
- memcpy(&val, base_buf + base_data_section + nl.n_value,
- sizeof(val));
- printf("%-40s EQU %5d\n",
- str_buf + nl.n_un.n_strx + 1, val);
- }
- }
- }
- }
-
- buf += lc.cmdsize;
- }
-
- return 0;
+ return 0;
bail:
- return 1;
+ return 1;
}
@@ -216,448 +187,400 @@
#include "elf.h"
#define COPY_STRUCT(dst, buf, ofst, sz) do {\
- if(ofst + sizeof((*(dst))) > sz) goto bail;\
- memcpy(dst, buf+ofst, sizeof((*(dst))));\
- } while(0)
+ if(ofst + sizeof((*(dst))) > sz) goto bail;\
+ memcpy(dst, buf+ofst, sizeof((*(dst))));\
+ } while(0)
#define ENDIAN_ASSIGN(val, memb) do {\
- if(!elf->le_data) {log_msg("Big Endian data not supported yet!\n");goto bail;}\
- (val) = (memb);\
- } while(0)
+ if(!elf->le_data) {log_msg("Big Endian data not supported yet!\n");goto bail;}\
+ (val) = (memb);\
+ } while(0)
#define ENDIAN_ASSIGN_IN_PLACE(memb) do {\
- ENDIAN_ASSIGN(memb, memb);\
- } while(0)
+ ENDIAN_ASSIGN(memb, memb);\
+ } while(0)
-typedef struct
-{
- uint8_t *buf; /* Buffer containing ELF data */
- size_t sz; /* Buffer size */
- int le_data; /* Data is little-endian */
- unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */
- int bits; /* 32 or 64 */
- Elf32_Ehdr hdr32;
- Elf64_Ehdr hdr64;
+typedef struct {
+ uint8_t *buf; /* Buffer containing ELF data */
+ size_t sz; /* Buffer size */
+ int le_data; /* Data is little-endian */
+ unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */
+ int bits; /* 32 or 64 */
+ Elf32_Ehdr hdr32;
+ Elf64_Ehdr hdr64;
} elf_obj_t;
-int parse_elf_header(elf_obj_t *elf)
-{
- int res;
- /* Verify ELF Magic numbers */
- COPY_STRUCT(&elf->e_ident, elf->buf, 0, elf->sz);
- res = elf->e_ident[EI_MAG0] == ELFMAG0;
- res &= elf->e_ident[EI_MAG1] == ELFMAG1;
- res &= elf->e_ident[EI_MAG2] == ELFMAG2;
- res &= elf->e_ident[EI_MAG3] == ELFMAG3;
- res &= elf->e_ident[EI_CLASS] == ELFCLASS32
- || elf->e_ident[EI_CLASS] == ELFCLASS64;
- res &= elf->e_ident[EI_DATA] == ELFDATA2LSB;
+int parse_elf_header(elf_obj_t *elf) {
+ int res;
+ /* Verify ELF Magic numbers */
+ COPY_STRUCT(&elf->e_ident, elf->buf, 0, elf->sz);
+ res = elf->e_ident[EI_MAG0] == ELFMAG0;
+ res &= elf->e_ident[EI_MAG1] == ELFMAG1;
+ res &= elf->e_ident[EI_MAG2] == ELFMAG2;
+ res &= elf->e_ident[EI_MAG3] == ELFMAG3;
+ res &= elf->e_ident[EI_CLASS] == ELFCLASS32
+ || elf->e_ident[EI_CLASS] == ELFCLASS64;
+ res &= elf->e_ident[EI_DATA] == ELFDATA2LSB;
- if (!res) goto bail;
+ if (!res) goto bail;
- elf->le_data = elf->e_ident[EI_DATA] == ELFDATA2LSB;
+ elf->le_data = elf->e_ident[EI_DATA] == ELFDATA2LSB;
- /* Read in relevant values */
- if (elf->e_ident[EI_CLASS] == ELFCLASS32)
- {
- elf->bits = 32;
- COPY_STRUCT(&elf->hdr32, elf->buf, 0, elf->sz);
+ /* Read in relevant values */
+ if (elf->e_ident[EI_CLASS] == ELFCLASS32) {
+ elf->bits = 32;
+ COPY_STRUCT(&elf->hdr32, elf->buf, 0, elf->sz);
- ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_type);
- ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_machine);
- ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_version);
- ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_entry);
- ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phoff);
- ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shoff);
- ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_flags);
- ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_ehsize);
- ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phentsize);
- ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phnum);
- ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shentsize);
- ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shnum);
- ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shstrndx);
- }
- else /* if (elf->e_ident[EI_CLASS] == ELFCLASS64) */
- {
- elf->bits = 64;
- COPY_STRUCT(&elf->hdr64, elf->buf, 0, elf->sz);
+ ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_type);
+ ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_machine);
+ ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_version);
+ ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_entry);
+ ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phoff);
+ ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shoff);
+ ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_flags);
+ ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_ehsize);
+ ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phentsize);
+ ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phnum);
+ ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shentsize);
+ ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shnum);
+ ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shstrndx);
+ } else { /* if (elf->e_ident[EI_CLASS] == ELFCLASS64) */
+ elf->bits = 64;
+ COPY_STRUCT(&elf->hdr64, elf->buf, 0, elf->sz);
- ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_type);
- ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_machine);
- ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_version);
- ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_entry);
- ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phoff);
- ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shoff);
- ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_flags);
- ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_ehsize);
- ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phentsize);
- ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phnum);
- ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shentsize);
- ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shnum);
- ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shstrndx);
- }
+ ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_type);
+ ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_machine);
+ ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_version);
+ ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_entry);
+ ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phoff);
+ ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shoff);
+ ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_flags);
+ ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_ehsize);
+ ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phentsize);
+ ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phnum);
+ ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shentsize);
+ ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shnum);
+ ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shstrndx);
+ }
- return 0;
+ return 0;
bail:
- log_msg("Failed to parse ELF file header");
- return 1;
+ log_msg("Failed to parse ELF file header");
+ return 1;
}
-int parse_elf_section(elf_obj_t *elf, int idx, Elf32_Shdr *hdr32, Elf64_Shdr *hdr64)
-{
- if (hdr32)
- {
- if (idx >= elf->hdr32.e_shnum)
- goto bail;
-
- COPY_STRUCT(hdr32, elf->buf, elf->hdr32.e_shoff + idx * elf->hdr32.e_shentsize,
- elf->sz);
- ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_name);
- ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_type);
- ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_flags);
- ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_addr);
- ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_offset);
- ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_size);
- ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_link);
- ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_info);
- ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_addralign);
- ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_entsize);
- }
- else /* if (hdr64) */
- {
- if (idx >= elf->hdr64.e_shnum)
- goto bail;
-
- COPY_STRUCT(hdr64, elf->buf, elf->hdr64.e_shoff + idx * elf->hdr64.e_shentsize,
- elf->sz);
- ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_name);
- ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_type);
- ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_flags);
- ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_addr);
- ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_offset);
- ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_size);
- ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_link);
- ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_info);
- ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_addralign);
- ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_entsize);
- }
-
- return 0;
-bail:
- return 1;
-}
-
-char *parse_elf_string_table(elf_obj_t *elf, int s_idx, int idx)
-{
- if (elf->bits == 32)
- {
- Elf32_Shdr shdr;
-
- if (parse_elf_section(elf, s_idx, &shdr, NULL))
- {
- log_msg("Failed to parse ELF string table: section %d, index %d\n",
- s_idx, idx);
- return "";
- }
-
- return (char *)(elf->buf + shdr.sh_offset + idx);
- }
- else /* if (elf->bits == 64) */
- {
- Elf64_Shdr shdr;
-
- if (parse_elf_section(elf, s_idx, NULL, &shdr))
- {
- log_msg("Failed to parse ELF string table: section %d, index %d\n",
- s_idx, idx);
- return "";
- }
-
- return (char *)(elf->buf + shdr.sh_offset + idx);
- }
-}
-
-int parse_elf_symbol(elf_obj_t *elf, unsigned int ofst, Elf32_Sym *sym32, Elf64_Sym *sym64)
-{
- if (sym32)
- {
- COPY_STRUCT(sym32, elf->buf, ofst, elf->sz);
- ENDIAN_ASSIGN_IN_PLACE(sym32->st_name);
- ENDIAN_ASSIGN_IN_PLACE(sym32->st_value);
- ENDIAN_ASSIGN_IN_PLACE(sym32->st_size);
- ENDIAN_ASSIGN_IN_PLACE(sym32->st_info);
- ENDIAN_ASSIGN_IN_PLACE(sym32->st_other);
- ENDIAN_ASSIGN_IN_PLACE(sym32->st_shndx);
- }
- else /* if (sym64) */
- {
- COPY_STRUCT(sym64, elf->buf, ofst, elf->sz);
- ENDIAN_ASSIGN_IN_PLACE(sym64->st_name);
- ENDIAN_ASSIGN_IN_PLACE(sym64->st_value);
- ENDIAN_ASSIGN_IN_PLACE(sym64->st_size);
- ENDIAN_ASSIGN_IN_PLACE(sym64->st_info);
- ENDIAN_ASSIGN_IN_PLACE(sym64->st_other);
- ENDIAN_ASSIGN_IN_PLACE(sym64->st_shndx);
- }
- return 0;
-bail:
- return 1;
-}
-
-int parse_elf(uint8_t *buf, size_t sz, output_fmt_t mode)
-{
- elf_obj_t elf;
- unsigned int ofst;
- int i;
- Elf32_Off strtab_off32;
- Elf64_Off strtab_off64; /* save String Table offset for later use */
-
- memset(&elf, 0, sizeof(elf));
- elf.buf = buf;
- elf.sz = sz;
-
- /* Parse Header */
- if (parse_elf_header(&elf))
+int parse_elf_section(elf_obj_t *elf, int idx, Elf32_Shdr *hdr32, Elf64_Shdr *hdr64) {
+ if (hdr32) {
+ if (idx >= elf->hdr32.e_shnum)
goto bail;
- if (elf.bits == 32)
- {
- Elf32_Shdr shdr;
- for (i = 0; i < elf.hdr32.e_shnum; i++)
- {
- parse_elf_section(&elf, i, &shdr, NULL);
+ COPY_STRUCT(hdr32, elf->buf, elf->hdr32.e_shoff + idx * elf->hdr32.e_shentsize,
+ elf->sz);
+ ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_name);
+ ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_type);
+ ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_flags);
+ ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_addr);
+ ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_offset);
+ ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_size);
+ ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_link);
+ ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_info);
+ ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_addralign);
+ ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_entsize);
+ } else { /* if (hdr64) */
+ if (idx >= elf->hdr64.e_shnum)
+ goto bail;
- if (shdr.sh_type == SHT_STRTAB)
- {
- char strtsb_name[128];
+ COPY_STRUCT(hdr64, elf->buf, elf->hdr64.e_shoff + idx * elf->hdr64.e_shentsize,
+ elf->sz);
+ ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_name);
+ ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_type);
+ ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_flags);
+ ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_addr);
+ ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_offset);
+ ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_size);
+ ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_link);
+ ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_info);
+ ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_addralign);
+ ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_entsize);
+ }
- strcpy(strtsb_name, (char *)(elf.buf + shdr.sh_offset + shdr.sh_name));
-
- if (!(strcmp(strtsb_name, ".shstrtab")))
- {
- /* log_msg("found section: %s\n", strtsb_name); */
- strtab_off32 = shdr.sh_offset;
- break;
- }
- }
- }
- }
- else /* if (elf.bits == 64) */
- {
- Elf64_Shdr shdr;
- for (i = 0; i < elf.hdr64.e_shnum; i++)
- {
- parse_elf_section(&elf, i, NULL, &shdr);
-
- if (shdr.sh_type == SHT_STRTAB)
- {
- char strtsb_name[128];
-
- strcpy(strtsb_name, (char *)(elf.buf + shdr.sh_offset + shdr.sh_name));
-
- if (!(strcmp(strtsb_name, ".shstrtab")))
- {
- /* log_msg("found section: %s\n", strtsb_name); */
- strtab_off64 = shdr.sh_offset;
- break;
- }
- }
- }
- }
-
- /* Parse all Symbol Tables */
- if (elf.bits == 32)
- {
- Elf32_Shdr shdr;
- for (i = 0; i < elf.hdr32.e_shnum; i++)
- {
- parse_elf_section(&elf, i, &shdr, NULL);
-
- if (shdr.sh_type == SHT_SYMTAB)
- {
- for (ofst = shdr.sh_offset;
- ofst < shdr.sh_offset + shdr.sh_size;
- ofst += shdr.sh_entsize)
- {
- Elf32_Sym sym;
-
- parse_elf_symbol(&elf, ofst, &sym, NULL);
-
- /* For all OBJECTS (data objects), extract the value from the
- * proper data segment.
- */
- /* if (ELF32_ST_TYPE(sym.st_info) == STT_OBJECT && sym.st_name)
- log_msg("found data object %s\n",
- parse_elf_string_table(&elf,
- shdr.sh_link,
- sym.st_name));
- */
-
- if (ELF32_ST_TYPE(sym.st_info) == STT_OBJECT
- && sym.st_size == 4)
- {
- Elf32_Shdr dhdr;
- int val = 0;
- char section_name[128];
-
- parse_elf_section(&elf, sym.st_shndx, &dhdr, NULL);
-
- /* For explanition - refer to _MSC_VER version of code */
- strcpy(section_name, (char *)(elf.buf + strtab_off32 + dhdr.sh_name));
- /* log_msg("Section_name: %s, Section_type: %d\n", section_name, dhdr.sh_type); */
-
- if (strcmp(section_name, ".bss"))
- {
- if (sizeof(val) != sym.st_size)
- {
- /* The target value is declared as an int in
- * asm_*_offsets.c, which is 4 bytes on all
- * targets we currently use. Complain loudly if
- * this is not true.
- */
- log_msg("Symbol size is wrong\n");
- goto bail;
- }
-
- memcpy(&val,
- elf.buf + dhdr.sh_offset + sym.st_value,
- sym.st_size);
- }
-
- if (!elf.le_data)
- {
- log_msg("Big Endian data not supported yet!\n");
- goto bail;
- }
-
- switch (mode)
- {
- case OUTPUT_FMT_RVDS:
- printf("%-40s EQU %5d\n",
- parse_elf_string_table(&elf,
- shdr.sh_link,
- sym.st_name),
- val);
- break;
- case OUTPUT_FMT_GAS:
- printf(".equ %-40s, %5d\n",
- parse_elf_string_table(&elf,
- shdr.sh_link,
- sym.st_name),
- val);
- break;
- default:
- printf("%s = %d\n",
- parse_elf_string_table(&elf,
- shdr.sh_link,
- sym.st_name),
- val);
- }
- }
- }
- }
- }
- }
- else /* if (elf.bits == 64) */
- {
- Elf64_Shdr shdr;
- for (i = 0; i < elf.hdr64.e_shnum; i++)
- {
- parse_elf_section(&elf, i, NULL, &shdr);
-
- if (shdr.sh_type == SHT_SYMTAB)
- {
- for (ofst = shdr.sh_offset;
- ofst < shdr.sh_offset + shdr.sh_size;
- ofst += shdr.sh_entsize)
- {
- Elf64_Sym sym;
-
- parse_elf_symbol(&elf, ofst, NULL, &sym);
-
- /* For all OBJECTS (data objects), extract the value from the
- * proper data segment.
- */
- /* if (ELF64_ST_TYPE(sym.st_info) == STT_OBJECT && sym.st_name)
- log_msg("found data object %s\n",
- parse_elf_string_table(&elf,
- shdr.sh_link,
- sym.st_name));
- */
-
- if (ELF64_ST_TYPE(sym.st_info) == STT_OBJECT
- && sym.st_size == 4)
- {
- Elf64_Shdr dhdr;
- int val = 0;
- char section_name[128];
-
- parse_elf_section(&elf, sym.st_shndx, NULL, &dhdr);
-
- /* For explanition - refer to _MSC_VER version of code */
- strcpy(section_name, (char *)(elf.buf + strtab_off64 + dhdr.sh_name));
- /* log_msg("Section_name: %s, Section_type: %d\n", section_name, dhdr.sh_type); */
-
- if ((strcmp(section_name, ".bss")))
- {
- if (sizeof(val) != sym.st_size)
- {
- /* The target value is declared as an int in
- * asm_*_offsets.c, which is 4 bytes on all
- * targets we currently use. Complain loudly if
- * this is not true.
- */
- log_msg("Symbol size is wrong\n");
- goto bail;
- }
-
- memcpy(&val,
- elf.buf + dhdr.sh_offset + sym.st_value,
- sym.st_size);
- }
-
- if (!elf.le_data)
- {
- log_msg("Big Endian data not supported yet!\n");
- goto bail;
- }
-
- switch (mode)
- {
- case OUTPUT_FMT_RVDS:
- printf("%-40s EQU %5d\n",
- parse_elf_string_table(&elf,
- shdr.sh_link,
- sym.st_name),
- val);
- break;
- case OUTPUT_FMT_GAS:
- printf(".equ %-40s, %5d\n",
- parse_elf_string_table(&elf,
- shdr.sh_link,
- sym.st_name),
- val);
- break;
- default:
- printf("%s = %d\n",
- parse_elf_string_table(&elf,
- shdr.sh_link,
- sym.st_name),
- val);
- }
- }
- }
- }
- }
- }
-
- if (mode == OUTPUT_FMT_RVDS)
- printf(" END\n");
-
- return 0;
+ return 0;
bail:
- log_msg("Parse error: File does not appear to be valid ELF32 or ELF64\n");
- return 1;
+ return 1;
+}
+
+char *parse_elf_string_table(elf_obj_t *elf, int s_idx, int idx) {
+ if (elf->bits == 32) {
+ Elf32_Shdr shdr;
+
+ if (parse_elf_section(elf, s_idx, &shdr, NULL)) {
+ log_msg("Failed to parse ELF string table: section %d, index %d\n",
+ s_idx, idx);
+ return "";
+ }
+
+ return (char *)(elf->buf + shdr.sh_offset + idx);
+ } else { /* if (elf->bits == 64) */
+ Elf64_Shdr shdr;
+
+ if (parse_elf_section(elf, s_idx, NULL, &shdr)) {
+ log_msg("Failed to parse ELF string table: section %d, index %d\n",
+ s_idx, idx);
+ return "";
+ }
+
+ return (char *)(elf->buf + shdr.sh_offset + idx);
+ }
+}
+
+int parse_elf_symbol(elf_obj_t *elf, unsigned int ofst, Elf32_Sym *sym32, Elf64_Sym *sym64) {
+ if (sym32) {
+ COPY_STRUCT(sym32, elf->buf, ofst, elf->sz);
+ ENDIAN_ASSIGN_IN_PLACE(sym32->st_name);
+ ENDIAN_ASSIGN_IN_PLACE(sym32->st_value);
+ ENDIAN_ASSIGN_IN_PLACE(sym32->st_size);
+ ENDIAN_ASSIGN_IN_PLACE(sym32->st_info);
+ ENDIAN_ASSIGN_IN_PLACE(sym32->st_other);
+ ENDIAN_ASSIGN_IN_PLACE(sym32->st_shndx);
+ } else { /* if (sym64) */
+ COPY_STRUCT(sym64, elf->buf, ofst, elf->sz);
+ ENDIAN_ASSIGN_IN_PLACE(sym64->st_name);
+ ENDIAN_ASSIGN_IN_PLACE(sym64->st_value);
+ ENDIAN_ASSIGN_IN_PLACE(sym64->st_size);
+ ENDIAN_ASSIGN_IN_PLACE(sym64->st_info);
+ ENDIAN_ASSIGN_IN_PLACE(sym64->st_other);
+ ENDIAN_ASSIGN_IN_PLACE(sym64->st_shndx);
+ }
+ return 0;
+bail:
+ return 1;
+}
+
+int parse_elf(uint8_t *buf, size_t sz, output_fmt_t mode) {
+ elf_obj_t elf;
+ unsigned int ofst;
+ int i;
+ Elf32_Off strtab_off32;
+ Elf64_Off strtab_off64; /* save String Table offset for later use */
+
+ memset(&elf, 0, sizeof(elf));
+ elf.buf = buf;
+ elf.sz = sz;
+
+ /* Parse Header */
+ if (parse_elf_header(&elf))
+ goto bail;
+
+ if (elf.bits == 32) {
+ Elf32_Shdr shdr;
+ for (i = 0; i < elf.hdr32.e_shnum; i++) {
+ parse_elf_section(&elf, i, &shdr, NULL);
+
+ if (shdr.sh_type == SHT_STRTAB) {
+ char strtsb_name[128];
+
+ strcpy(strtsb_name, (char *)(elf.buf + shdr.sh_offset + shdr.sh_name));
+
+ if (!(strcmp(strtsb_name, ".shstrtab"))) {
+ /* log_msg("found section: %s\n", strtsb_name); */
+ strtab_off32 = shdr.sh_offset;
+ break;
+ }
+ }
+ }
+ } else { /* if (elf.bits == 64) */
+ Elf64_Shdr shdr;
+ for (i = 0; i < elf.hdr64.e_shnum; i++) {
+ parse_elf_section(&elf, i, NULL, &shdr);
+
+ if (shdr.sh_type == SHT_STRTAB) {
+ char strtsb_name[128];
+
+ strcpy(strtsb_name, (char *)(elf.buf + shdr.sh_offset + shdr.sh_name));
+
+ if (!(strcmp(strtsb_name, ".shstrtab"))) {
+ /* log_msg("found section: %s\n", strtsb_name); */
+ strtab_off64 = shdr.sh_offset;
+ break;
+ }
+ }
+ }
+ }
+
+ /* Parse all Symbol Tables */
+ if (elf.bits == 32) {
+ Elf32_Shdr shdr;
+ for (i = 0; i < elf.hdr32.e_shnum; i++) {
+ parse_elf_section(&elf, i, &shdr, NULL);
+
+ if (shdr.sh_type == SHT_SYMTAB) {
+ for (ofst = shdr.sh_offset;
+ ofst < shdr.sh_offset + shdr.sh_size;
+ ofst += shdr.sh_entsize) {
+ Elf32_Sym sym;
+
+ parse_elf_symbol(&elf, ofst, &sym, NULL);
+
+ /* For all OBJECTS (data objects), extract the value from the
+ * proper data segment.
+ */
+ /* if (ELF32_ST_TYPE(sym.st_info) == STT_OBJECT && sym.st_name)
+ log_msg("found data object %s\n",
+ parse_elf_string_table(&elf,
+ shdr.sh_link,
+ sym.st_name));
+ */
+
+ if (ELF32_ST_TYPE(sym.st_info) == STT_OBJECT
+ && sym.st_size == 4) {
+ Elf32_Shdr dhdr;
+ int val = 0;
+ char section_name[128];
+
+ parse_elf_section(&elf, sym.st_shndx, &dhdr, NULL);
+
+ /* For explanition - refer to _MSC_VER version of code */
+ strcpy(section_name, (char *)(elf.buf + strtab_off32 + dhdr.sh_name));
+ /* log_msg("Section_name: %s, Section_type: %d\n", section_name, dhdr.sh_type); */
+
+ if (strcmp(section_name, ".bss")) {
+ if (sizeof(val) != sym.st_size) {
+ /* The target value is declared as an int in
+ * asm_*_offsets.c, which is 4 bytes on all
+ * targets we currently use. Complain loudly if
+ * this is not true.
+ */
+ log_msg("Symbol size is wrong\n");
+ goto bail;
+ }
+
+ memcpy(&val,
+ elf.buf + dhdr.sh_offset + sym.st_value,
+ sym.st_size);
+ }
+
+ if (!elf.le_data) {
+ log_msg("Big Endian data not supported yet!\n");
+ goto bail;
+ }
+
+ switch (mode) {
+ case OUTPUT_FMT_RVDS:
+ printf("%-40s EQU %5d\n",
+ parse_elf_string_table(&elf,
+ shdr.sh_link,
+ sym.st_name),
+ val);
+ break;
+ case OUTPUT_FMT_GAS:
+ printf(".equ %-40s, %5d\n",
+ parse_elf_string_table(&elf,
+ shdr.sh_link,
+ sym.st_name),
+ val);
+ break;
+ default:
+ printf("%s = %d\n",
+ parse_elf_string_table(&elf,
+ shdr.sh_link,
+ sym.st_name),
+ val);
+ }
+ }
+ }
+ }
+ }
+ } else { /* if (elf.bits == 64) */
+ Elf64_Shdr shdr;
+ for (i = 0; i < elf.hdr64.e_shnum; i++) {
+ parse_elf_section(&elf, i, NULL, &shdr);
+
+ if (shdr.sh_type == SHT_SYMTAB) {
+ for (ofst = shdr.sh_offset;
+ ofst < shdr.sh_offset + shdr.sh_size;
+ ofst += shdr.sh_entsize) {
+ Elf64_Sym sym;
+
+ parse_elf_symbol(&elf, ofst, NULL, &sym);
+
+ /* For all OBJECTS (data objects), extract the value from the
+ * proper data segment.
+ */
+ /* if (ELF64_ST_TYPE(sym.st_info) == STT_OBJECT && sym.st_name)
+ log_msg("found data object %s\n",
+ parse_elf_string_table(&elf,
+ shdr.sh_link,
+ sym.st_name));
+ */
+
+ if (ELF64_ST_TYPE(sym.st_info) == STT_OBJECT
+ && sym.st_size == 4) {
+ Elf64_Shdr dhdr;
+ int val = 0;
+ char section_name[128];
+
+ parse_elf_section(&elf, sym.st_shndx, NULL, &dhdr);
+
+ /* For explanition - refer to _MSC_VER version of code */
+ strcpy(section_name, (char *)(elf.buf + strtab_off64 + dhdr.sh_name));
+ /* log_msg("Section_name: %s, Section_type: %d\n", section_name, dhdr.sh_type); */
+
+ if ((strcmp(section_name, ".bss"))) {
+ if (sizeof(val) != sym.st_size) {
+ /* The target value is declared as an int in
+ * asm_*_offsets.c, which is 4 bytes on all
+ * targets we currently use. Complain loudly if
+ * this is not true.
+ */
+ log_msg("Symbol size is wrong\n");
+ goto bail;
+ }
+
+ memcpy(&val,
+ elf.buf + dhdr.sh_offset + sym.st_value,
+ sym.st_size);
+ }
+
+ if (!elf.le_data) {
+ log_msg("Big Endian data not supported yet!\n");
+ goto bail;
+ }
+
+ switch (mode) {
+ case OUTPUT_FMT_RVDS:
+ printf("%-40s EQU %5d\n",
+ parse_elf_string_table(&elf,
+ shdr.sh_link,
+ sym.st_name),
+ val);
+ break;
+ case OUTPUT_FMT_GAS:
+ printf(".equ %-40s, %5d\n",
+ parse_elf_string_table(&elf,
+ shdr.sh_link,
+ sym.st_name),
+ val);
+ break;
+ default:
+ printf("%s = %d\n",
+ parse_elf_string_table(&elf,
+ shdr.sh_link,
+ sym.st_name),
+ val);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (mode == OUTPUT_FMT_RVDS)
+ printf(" END\n");
+
+ return 0;
+bail:
+ log_msg("Parse error: File does not appear to be valid ELF32 or ELF64\n");
+ return 1;
}
#endif
@@ -671,244 +594,222 @@
#define get_le32(x) ((*(x)) | (*(x+1)) << 8 |(*(x+2)) << 16 | (*(x+3)) << 24 )
#define get_le16(x) ((*(x)) | (*(x+1)) << 8)
-int parse_coff(uint8_t *buf, size_t sz)
-{
- unsigned int nsections, symtab_ptr, symtab_sz, strtab_ptr;
- unsigned int sectionrawdata_ptr;
- unsigned int i;
- uint8_t *ptr;
- uint32_t symoffset;
+int parse_coff(uint8_t *buf, size_t sz) {
+ unsigned int nsections, symtab_ptr, symtab_sz, strtab_ptr;
+ unsigned int sectionrawdata_ptr;
+ unsigned int i;
+ uint8_t *ptr;
+ uint32_t symoffset;
- char **sectionlist; //this array holds all section names in their correct order.
- //it is used to check if the symbol is in .bss or .rdata section.
+ char **sectionlist; // this array holds all section names in their correct order.
+ // it is used to check if the symbol is in .bss or .rdata section.
- nsections = get_le16(buf + 2);
- symtab_ptr = get_le32(buf + 8);
- symtab_sz = get_le32(buf + 12);
- strtab_ptr = symtab_ptr + symtab_sz * 18;
+ nsections = get_le16(buf + 2);
+ symtab_ptr = get_le32(buf + 8);
+ symtab_sz = get_le32(buf + 12);
+ strtab_ptr = symtab_ptr + symtab_sz * 18;
- if (nsections > 96)
- {
- log_msg("Too many sections\n");
- return 1;
+ if (nsections > 96) {
+ log_msg("Too many sections\n");
+ return 1;
+ }
+
+ sectionlist = malloc(nsections * sizeof(sectionlist));
+
+ if (sectionlist == NULL) {
+ log_msg("Allocating first level of section list failed\n");
+ return 1;
+ }
+
+ // log_msg("COFF: Found %u symbols in %u sections.\n", symtab_sz, nsections);
+
+ /*
+ The size of optional header is always zero for an obj file. So, the section header
+ follows the file header immediately.
+ */
+
+ ptr = buf + 20; // section header
+
+ for (i = 0; i < nsections; i++) {
+ char sectionname[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
+ strncpy(sectionname, ptr, 8);
+ // log_msg("COFF: Parsing section %s\n",sectionname);
+
+ sectionlist[i] = malloc(strlen(sectionname) + 1);
+
+ if (sectionlist[i] == NULL) {
+ log_msg("Allocating storage for %s failed\n", sectionname);
+ goto bail;
+ }
+ strcpy(sectionlist[i], sectionname);
+
+ if (!strcmp(sectionname, ".rdata")) sectionrawdata_ptr = get_le32(ptr + 20);
+
+ ptr += 40;
+ }
+
+ // log_msg("COFF: Symbol table at offset %u\n", symtab_ptr);
+ // log_msg("COFF: raw data pointer ofset for section .rdata is %u\n", sectionrawdata_ptr);
+
+ /* The compiler puts the data with non-zero offset in .rdata section, but puts the data with
+ zero offset in .bss section. So, if the data in in .bss section, set offset=0.
+ Note from Wiki: In an object module compiled from C, the bss section contains
+ the local variables (but not functions) that were declared with the static keyword,
+ except for those with non-zero initial values. (In C, static variables are initialized
+ to zero by default.) It also contains the non-local (both extern and static) variables
+ that are also initialized to zero (either explicitly or by default).
+ */
+ // move to symbol table
+ /* COFF symbol table:
+ offset field
+ 0 Name(*)
+ 8 Value
+ 12 SectionNumber
+ 14 Type
+ 16 StorageClass
+ 17 NumberOfAuxSymbols
+ */
+ ptr = buf + symtab_ptr;
+
+ for (i = 0; i < symtab_sz; i++) {
+ int16_t section = get_le16(ptr + 12); // section number
+
+ if (section > 0 && ptr[16] == 2) {
+ // if(section > 0 && ptr[16] == 3 && get_le32(ptr+8)) {
+
+ if (get_le32(ptr)) {
+ char name[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
+ strncpy(name, ptr, 8);
+ // log_msg("COFF: Parsing symbol %s\n",name);
+ /* The 64bit Windows compiler doesn't prefix with an _.
+ * Check what's there, and bump if necessary
+ */
+ if (name[0] == '_')
+ printf("%-40s EQU ", name + 1);
+ else
+ printf("%-40s EQU ", name);
+ } else {
+ // log_msg("COFF: Parsing symbol %s\n",
+ // buf + strtab_ptr + get_le32(ptr+4));
+ if ((buf + strtab_ptr + get_le32(ptr + 4))[0] == '_')
+ printf("%-40s EQU ",
+ buf + strtab_ptr + get_le32(ptr + 4) + 1);
+ else
+ printf("%-40s EQU ", buf + strtab_ptr + get_le32(ptr + 4));
+ }
+
+ if (!(strcmp(sectionlist[section - 1], ".bss"))) {
+ symoffset = 0;
+ } else {
+ symoffset = get_le32(buf + sectionrawdata_ptr + get_le32(ptr + 8));
+ }
+
+ // log_msg(" Section: %d\n",section);
+ // log_msg(" Class: %d\n",ptr[16]);
+ // log_msg(" Address: %u\n",get_le32(ptr+8));
+ // log_msg(" Offset: %u\n", symoffset);
+
+ printf("%5d\n", symoffset);
}
- sectionlist = malloc(nsections * sizeof(sectionlist));
+ ptr += 18;
+ }
- if (sectionlist == NULL)
- {
- log_msg("Allocating first level of section list failed\n");
- return 1;
- }
+ printf(" END\n");
- //log_msg("COFF: Found %u symbols in %u sections.\n", symtab_sz, nsections);
+ for (i = 0; i < nsections; i++) {
+ free(sectionlist[i]);
+ }
- /*
- The size of optional header is always zero for an obj file. So, the section header
- follows the file header immediately.
- */
+ free(sectionlist);
- ptr = buf + 20; //section header
-
- for (i = 0; i < nsections; i++)
- {
- char sectionname[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
- strncpy(sectionname, ptr, 8);
- //log_msg("COFF: Parsing section %s\n",sectionname);
-
- sectionlist[i] = malloc(strlen(sectionname) + 1);
-
- if (sectionlist[i] == NULL)
- {
- log_msg("Allocating storage for %s failed\n", sectionname);
- goto bail;
- }
- strcpy(sectionlist[i], sectionname);
-
- if (!strcmp(sectionname, ".rdata")) sectionrawdata_ptr = get_le32(ptr + 20);
-
- ptr += 40;
- }
-
- //log_msg("COFF: Symbol table at offset %u\n", symtab_ptr);
- //log_msg("COFF: raw data pointer ofset for section .rdata is %u\n", sectionrawdata_ptr);
-
- /* The compiler puts the data with non-zero offset in .rdata section, but puts the data with
- zero offset in .bss section. So, if the data in in .bss section, set offset=0.
- Note from Wiki: In an object module compiled from C, the bss section contains
- the local variables (but not functions) that were declared with the static keyword,
- except for those with non-zero initial values. (In C, static variables are initialized
- to zero by default.) It also contains the non-local (both extern and static) variables
- that are also initialized to zero (either explicitly or by default).
- */
- //move to symbol table
- /* COFF symbol table:
- offset field
- 0 Name(*)
- 8 Value
- 12 SectionNumber
- 14 Type
- 16 StorageClass
- 17 NumberOfAuxSymbols
- */
- ptr = buf + symtab_ptr;
-
- for (i = 0; i < symtab_sz; i++)
- {
- int16_t section = get_le16(ptr + 12); //section number
-
- if (section > 0 && ptr[16] == 2)
- {
- //if(section > 0 && ptr[16] == 3 && get_le32(ptr+8)) {
-
- if (get_le32(ptr))
- {
- char name[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
- strncpy(name, ptr, 8);
- //log_msg("COFF: Parsing symbol %s\n",name);
- /* The 64bit Windows compiler doesn't prefix with an _.
- * Check what's there, and bump if necessary
- */
- if (name[0] == '_')
- printf("%-40s EQU ", name + 1);
- else
- printf("%-40s EQU ", name);
- }
- else
- {
- //log_msg("COFF: Parsing symbol %s\n",
- // buf + strtab_ptr + get_le32(ptr+4));
- if ((buf + strtab_ptr + get_le32(ptr + 4))[0] == '_')
- printf("%-40s EQU ",
- buf + strtab_ptr + get_le32(ptr + 4) + 1);
- else
- printf("%-40s EQU ", buf + strtab_ptr + get_le32(ptr + 4));
- }
-
- if (!(strcmp(sectionlist[section-1], ".bss")))
- {
- symoffset = 0;
- }
- else
- {
- symoffset = get_le32(buf + sectionrawdata_ptr + get_le32(ptr + 8));
- }
-
- //log_msg(" Section: %d\n",section);
- //log_msg(" Class: %d\n",ptr[16]);
- //log_msg(" Address: %u\n",get_le32(ptr+8));
- //log_msg(" Offset: %u\n", symoffset);
-
- printf("%5d\n", symoffset);
- }
-
- ptr += 18;
- }
-
- printf(" END\n");
-
- for (i = 0; i < nsections; i++)
- {
- free(sectionlist[i]);
- }
-
- free(sectionlist);
-
- return 0;
+ return 0;
bail:
- for (i = 0; i < nsections; i++)
- {
- free(sectionlist[i]);
- }
+ for (i = 0; i < nsections; i++) {
+ free(sectionlist[i]);
+ }
- free(sectionlist);
+ free(sectionlist);
- return 1;
+ return 1;
}
#endif /* defined(_MSC_VER) || defined(__MINGW32__) || defined(__CYGWIN__) */
-int main(int argc, char **argv)
-{
- output_fmt_t mode = OUTPUT_FMT_PLAIN;
- const char *f;
- uint8_t *file_buf;
- int res;
- FILE *fp;
- long int file_size;
+int main(int argc, char **argv) {
+ output_fmt_t mode = OUTPUT_FMT_PLAIN;
+ const char *f;
+ uint8_t *file_buf;
+ int res;
+ FILE *fp;
+ long int file_size;
- if (argc < 2 || argc > 3)
- {
- fprintf(stderr, "Usage: %s [output format] <obj file>\n\n", argv[0]);
- fprintf(stderr, " <obj file>\tobject file to parse\n");
- fprintf(stderr, "Output Formats:\n");
- fprintf(stderr, " gas - compatible with GNU assembler\n");
- fprintf(stderr, " rvds - compatible with armasm\n");
- goto bail;
- }
+ if (argc < 2 || argc > 3) {
+ fprintf(stderr, "Usage: %s [output format] <obj file>\n\n", argv[0]);
+ fprintf(stderr, " <obj file>\tobject file to parse\n");
+ fprintf(stderr, "Output Formats:\n");
+ fprintf(stderr, " gas - compatible with GNU assembler\n");
+ fprintf(stderr, " rvds - compatible with armasm\n");
+ goto bail;
+ }
- f = argv[2];
+ f = argv[2];
- if (!strcmp(argv[1], "rvds"))
- mode = OUTPUT_FMT_RVDS;
- else if (!strcmp(argv[1], "gas"))
- mode = OUTPUT_FMT_GAS;
- else
- f = argv[1];
+ if (!strcmp(argv[1], "rvds"))
+ mode = OUTPUT_FMT_RVDS;
+ else if (!strcmp(argv[1], "gas"))
+ mode = OUTPUT_FMT_GAS;
+ else
+ f = argv[1];
- fp = fopen(f, "rb");
+ fp = fopen(f, "rb");
- if (!fp)
- {
- perror("Unable to open file");
- goto bail;
- }
+ if (!fp) {
+ perror("Unable to open file");
+ goto bail;
+ }
- if (fseek(fp, 0, SEEK_END))
- {
- perror("stat");
- goto bail;
- }
+ if (fseek(fp, 0, SEEK_END)) {
+ perror("stat");
+ goto bail;
+ }
- file_size = ftell(fp);
- file_buf = malloc(file_size);
+ file_size = ftell(fp);
+ file_buf = malloc(file_size);
- if (!file_buf)
- {
- perror("malloc");
- goto bail;
- }
+ if (!file_buf) {
+ perror("malloc");
+ goto bail;
+ }
- rewind(fp);
+ rewind(fp);
- if (fread(file_buf, sizeof(char), file_size, fp) != file_size)
- {
- perror("read");
- goto bail;
- }
+ if (fread(file_buf, sizeof(char), file_size, fp) != file_size) {
+ perror("read");
+ goto bail;
+ }
- if (fclose(fp))
- {
- perror("close");
- goto bail;
- }
+ if (fclose(fp)) {
+ perror("close");
+ goto bail;
+ }
#if defined(__GNUC__) && __GNUC__
#if defined(__MACH__)
- res = parse_macho(file_buf, file_size);
+ res = parse_macho(file_buf, file_size);
#elif defined(__ELF__)
- res = parse_elf(file_buf, file_size, mode);
+ res = parse_elf(file_buf, file_size, mode);
#endif
#endif
#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__CYGWIN__)
- res = parse_coff(file_buf, file_size);
+ res = parse_coff(file_buf, file_size);
#endif
- free(file_buf);
+ free(file_buf);
- if (!res)
- return EXIT_SUCCESS;
+ if (!res)
+ return EXIT_SUCCESS;
bail:
- return EXIT_FAILURE;
+ return EXIT_FAILURE;
}
diff --git a/build/x86-msvs/obj_int_extract.bat b/build/x86-msvs/obj_int_extract.bat
index 1bb8653..70b39f6 100644
--- a/build/x86-msvs/obj_int_extract.bat
+++ b/build/x86-msvs/obj_int_extract.bat
@@ -7,9 +7,17 @@
REM be found in the AUTHORS file in the root of the source tree.
echo on
+cl /I "./" /I "%1" /nologo /c "%1/vp9/common/vp9_asm_com_offsets.c"
+cl /I "./" /I "%1" /nologo /c "%1/vp9/decoder/vp9_asm_dec_offsets.c"
+cl /I "./" /I "%1" /nologo /c "%1/vp9/encoder/vp9_asm_enc_offsets.c"
+obj_int_extract.exe rvds "vp9_asm_com_offsets.obj" > "vp9_asm_com_offsets.asm"
+obj_int_extract.exe rvds "vp9_asm_dec_offsets.obj" > "vp9_asm_dec_offsets.asm"
+obj_int_extract.exe rvds "vp9_asm_enc_offsets.obj" > "vp9_asm_enc_offsets.asm"
+
cl /I "./" /I "%1" /nologo /c "%1/vp8/common/asm_com_offsets.c"
cl /I "./" /I "%1" /nologo /c "%1/vp8/decoder/asm_dec_offsets.c"
cl /I "./" /I "%1" /nologo /c "%1/vp8/encoder/asm_enc_offsets.c"
-obj_int_extract.exe rvds "asm_com_offsets.obj" > "asm_com_offsets.asm"
-obj_int_extract.exe rvds "asm_dec_offsets.obj" > "asm_dec_offsets.asm"
-obj_int_extract.exe rvds "asm_enc_offsets.obj" > "asm_enc_offsets.asm"
+obj_int_extract.exe rvds "asm_com_offsets.obj" > "vp8_asm_com_offsets.asm"
+obj_int_extract.exe rvds "asm_dec_offsets.obj" > "vp8_asm_dec_offsets.asm"
+obj_int_extract.exe rvds "asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"
+
diff --git a/build/x86-msvs/yasm.rules b/build/x86-msvs/yasm.rules
deleted file mode 100644
index ee1fefb..0000000
--- a/build/x86-msvs/yasm.rules
+++ /dev/null
@@ -1,115 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<VisualStudioToolFile
- Name="Yasm"
- Version="8.00"
- >
- <Rules>
- <CustomBuildRule
- Name="YASM"
- DisplayName="Yasm Assembler"
- CommandLine="yasm -Xvc -f $(PlatformName) [AllOptions] [AdditionalOptions] [Inputs]"
- Outputs="[$ObjectFileName]"
- FileExtensions="*.asm"
- ExecutionDescription="Assembling $(InputFileName)"
- ShowOnlyRuleProperties="false"
- >
- <Properties>
- <StringProperty
- Name="Defines"
- DisplayName="Definitions"
- Category="Pre-Defined Symbols"
- Description="Specify pre-defined symbols ('symbol' or 'symbol = value') "
- Switch="-D [value]"
- Delimited="true"
- Inheritable="true"
- />
- <StringProperty
- Name="IncludePaths"
- DisplayName="Include Paths"
- Category="Configuration"
- Description="Set the paths for any additional include files"
- Switch="-I [value]"
- Delimited="true"
- Inheritable="true"
- />
- <StringProperty
- Name="UnDefines"
- DisplayName="Remove Definitions"
- Category="Pre-Defined Symbols"
- Description="Remove pre-defined symbols "
- Switch="-U [value]"
- Delimited="true"
- Inheritable="true"
- />
- <StringProperty
- Name="ObjectFileName"
- DisplayName="Object File Name"
- Category="Output"
- Description="Select the output file name"
- Switch="-o [value]"
- DefaultValue="$(IntDir)\$(InputName).obj"
- />
- <StringProperty
- Name="ListFileName"
- DisplayName="List File Name"
- Category="Output"
- Description="Select an output listing by setting its file name"
- Switch="-l [value]"
- />
- <StringProperty
- Name="PreIncludeFile"
- DisplayName="Pre Include File"
- Category="Configuration"
- Description="Select a pre-included file by setting its name"
- Switch="-P [value]"
- />
- <BooleanProperty
- Name="Debug"
- DisplayName="Debug Information"
- Category="Output"
- Description="Generate debugging information"
- Switch="-g cv8"
- />
- <EnumProperty
- Name="PreProc"
- DisplayName="Pre-Processor"
- Category="Configuration"
- Description="Select the pre-processor ('nasm' or 'raw')"
- >
- <Values>
- <EnumValue
- Value="0"
- Switch="-rnasm"
- DisplayName="Nasm "
- />
- <EnumValue
- Value="1"
- Switch="-rraw"
- DisplayName="Raw"
- />
- </Values>
- </EnumProperty>
- <EnumProperty
- Name="Parser"
- DisplayName="Parser"
- Category="Configuration"
- Description="Select the parser for Intel ('nasm') or AT&T ( 'gas') syntax"
- >
- <Values>
- <EnumValue
- Value="0"
- Switch="-pnasm"
- DisplayName="Nasm"
- />
- <EnumValue
- Value="1"
- Switch="-pgas"
- DisplayName="Gas"
- />
- </Values>
- </EnumProperty>
- </Properties>
- </CustomBuildRule>
- </Rules>
-</VisualStudioToolFile>
-
diff --git a/configure b/configure
index be36e56..8bbd7ba 100755
--- a/configure
+++ b/configure
@@ -34,6 +34,7 @@
${toggle_md5} support for output of checksum data
${toggle_static_msvcrt} use static MSVCRT (VS builds only)
${toggle_vp8} VP8 codec support
+ ${toggle_vp9} VP9 codec support
${toggle_internal_stats} output of encoder internal stats for debug, if supported (encoders)
${toggle_mem_tracker} track memory usage
${toggle_postproc} postprocessing
@@ -176,19 +177,24 @@
enable temporal_denoising
[ -d ${source_path}/../include ] && enable alt_tree_layout
-for d in vp8; do
+for d in vp8 vp9; do
[ -d ${source_path}/${d} ] && disable alt_tree_layout;
done
if ! enabled alt_tree_layout; then
# development environment
[ -d ${source_path}/vp8 ] && CODECS="${CODECS} vp8_encoder vp8_decoder"
+[ -d ${source_path}/vp9 ] && CODECS="${CODECS} vp9_encoder vp9_decoder"
else
# customer environment
[ -f ${source_path}/../include/vpx/vp8cx.h ] && CODECS="${CODECS} vp8_encoder"
[ -f ${source_path}/../include/vpx/vp8dx.h ] && CODECS="${CODECS} vp8_decoder"
+[ -f ${source_path}/../include/vpx/vp9cx.h ] && CODECS="${CODECS} vp9_encoder"
+[ -f ${source_path}/../include/vpx/vp9dx.h ] && CODECS="${CODECS} vp9_decoder"
[ -f ${source_path}/../include/vpx/vp8cx.h ] || disable vp8_encoder
[ -f ${source_path}/../include/vpx/vp8dx.h ] || disable vp8_decoder
+[ -f ${source_path}/../include/vpx/vp9cx.h ] || disable vp9_encoder
+[ -f ${source_path}/../include/vpx/vp9dx.h ] || disable vp9_decoder
[ -f ${source_path}/../lib/*/*mt.lib ] && soft_enable static_msvcrt
fi
@@ -230,6 +236,18 @@
sys_mman_h
unistd_h
"
+EXPERIMENT_LIST="
+ csm
+ comp_intra_pred
+ superblocks
+ pred_filter
+ lossless
+ subpelrefmv
+ new_mvref
+ implicit_segmentation
+ newbintramodes
+ comp_interintra_pred
+"
CONFIG_LIST="
external_build
install_docs
@@ -276,8 +294,11 @@
unit_tests
multi_res_encoding
temporal_denoising
+ experimental
+ ${EXPERIMENT_LIST}
"
CMDLINE_SELECT="
+ external_build
extra_warnings
werror
install_docs
@@ -322,6 +343,7 @@
unit_tests
multi_res_encoding
temporal_denoising
+ experimental
"
process_cmdline() {
@@ -329,6 +351,18 @@
optval="${opt#*=}"
case "$opt" in
--disable-codecs) for c in ${CODECS}; do disable $c; done ;;
+ --enable-?*|--disable-?*)
+ eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
+ if echo "${EXPERIMENT_LIST}" | grep "^ *$option\$" >/dev/null; then
+ if enabled experimental; then
+ $action $option
+ else
+ log_echo "Ignoring $opt -- not in experimental mode."
+ fi
+ else
+ process_common_cmdline $opt
+ fi
+ ;;
*) process_common_cmdline "$opt"
;;
esac
@@ -464,7 +498,7 @@
fi
fi
fi
- if [ -z "$CC" ]; then
+ if [ -z "$CC" ] || enabled external_build; then
echo "Bypassing toolchain for environment detection."
enable external_build
check_header() {
@@ -473,6 +507,7 @@
shift
var=`echo $header | sed 's/[^A-Za-z0-9_]/_/g'`
disable $var
+ # Headers common to all environments
case $header in
stdio.h)
true;
@@ -484,6 +519,25 @@
done
${result:-true}
esac && enable $var
+
+ # Specialize windows and POSIX environments.
+ case $toolchain in
+ *-win*-*)
+ case $header-$toolchain in
+ stdint*-gcc) true;;
+ *) false;;
+ esac && enable $var
+ ;;
+ *)
+ case $header in
+ stdint.h) true;;
+ pthread.h) true;;
+ sys/mman.h) true;;
+ unistd.h) true;;
+ *) false;;
+ esac && enable $var
+ esac
+ enabled $var
}
check_ld() {
true
@@ -497,6 +551,7 @@
check_header stdint.h
check_header pthread.h
check_header sys/mman.h
+ check_header unistd.h # for sysconf(3) and friends.
check_header vpx/vpx_integer.h -I${source_path} && enable vpx_ports
}
@@ -537,6 +592,7 @@
check_add_cflags -Wpointer-arith
check_add_cflags -Wtype-limits
check_add_cflags -Wcast-qual
+ check_add_cflags -Wvla
check_add_cflags -Wimplicit-function-declaration
check_add_cflags -Wuninitialized
check_add_cflags -Wunused-variable
diff --git a/example_xma.c b/example_xma.c
index 72eb470..7aa8798 100644
--- a/example_xma.c
+++ b/example_xma.c
@@ -18,197 +18,174 @@
#include "vpx_config.h"
#include "vpx/vpx_decoder.h"
#include "vpx/vpx_integer.h"
-#if CONFIG_VP8_DECODER
+#if CONFIG_VP9_DECODER
#include "vpx/vp8dx.h"
#endif
static char *exec_name;
static int verbose = 0;
-static const struct
-{
- const char *name;
- const vpx_codec_iface_t *iface;
-} ifaces[] =
-{
-#if CONFIG_VP8_DECODER
- {"vp8", &vpx_codec_vp8_dx_algo},
+static const struct {
+ const char *name;
+ const vpx_codec_iface_t *iface;
+} ifaces[] = {
+#if CONFIG_VP9_DECODER
+ {"vp9", &vpx_codec_vp8_dx_algo},
#endif
};
-static void usage_exit(void)
-{
- int i;
+static void usage_exit(void) {
+ int i;
- printf("Usage: %s <options>\n\n"
- "Options:\n"
- "\t--codec <name>\tCodec to use (default=%s)\n"
- "\t-h <height>\tHeight of the simulated video frame, in pixels\n"
- "\t-w <width> \tWidth of the simulated video frame, in pixels\n"
- "\t-v \tVerbose mode (show individual segment sizes)\n"
- "\t--help \tShow this message\n"
- "\n"
- "Included decoders:\n"
- "\n",
- exec_name,
- ifaces[0].name);
+ printf("Usage: %s <options>\n\n"
+ "Options:\n"
+ "\t--codec <name>\tCodec to use (default=%s)\n"
+ "\t-h <height>\tHeight of the simulated video frame, in pixels\n"
+ "\t-w <width> \tWidth of the simulated video frame, in pixels\n"
+ "\t-v \tVerbose mode (show individual segment sizes)\n"
+ "\t--help \tShow this message\n"
+ "\n"
+ "Included decoders:\n"
+ "\n",
+ exec_name,
+ ifaces[0].name);
- for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
- printf(" %-6s - %s\n",
- ifaces[i].name,
- vpx_codec_iface_name(ifaces[i].iface));
+ for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
+ printf(" %-6s - %s\n",
+ ifaces[i].name,
+ vpx_codec_iface_name(ifaces[i].iface));
- exit(EXIT_FAILURE);
+ exit(EXIT_FAILURE);
}
-static void usage_error(const char *fmt, ...)
-{
- va_list ap;
- va_start(ap, fmt);
- vprintf(fmt, ap);
- printf("\n");
- usage_exit();
+static void usage_error(const char *fmt, ...) {
+ va_list ap;
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ printf("\n");
+ usage_exit();
}
-void my_mem_dtor(vpx_codec_mmap_t *mmap)
-{
- if (verbose)
- printf("freeing segment %d\n", mmap->id);
+void my_mem_dtor(vpx_codec_mmap_t *mmap) {
+ if (verbose)
+ printf("freeing segment %d\n", mmap->id);
- free(mmap->priv);
+ free(mmap->priv);
}
-int main(int argc, char **argv)
-{
- vpx_codec_ctx_t decoder;
- vpx_codec_iface_t *iface = ifaces[0].iface;
- vpx_codec_iter_t iter;
- vpx_codec_dec_cfg_t cfg;
- vpx_codec_err_t res = VPX_CODEC_OK;
- unsigned int alloc_sz = 0;
- unsigned int w = 352;
- unsigned int h = 288;
- int i;
+int main(int argc, char **argv) {
+ vpx_codec_ctx_t decoder;
+ vpx_codec_iface_t *iface = ifaces[0].iface;
+ vpx_codec_iter_t iter;
+ vpx_codec_dec_cfg_t cfg;
+ vpx_codec_err_t res = VPX_CODEC_OK;
+ unsigned int alloc_sz = 0;
+ unsigned int w = 352;
+ unsigned int h = 288;
+ int i;
- exec_name = argv[0];
+ exec_name = argv[0];
- for (i = 1; i < argc; i++)
- {
- if (!strcmp(argv[i], "--codec"))
- {
- if (i + 1 < argc)
- {
- int j, k = -1;
+ for (i = 1; i < argc; i++) {
+ if (!strcmp(argv[i], "--codec")) {
+ if (i + 1 < argc) {
+ int j, k = -1;
- i++;
+ i++;
- for (j = 0; j < sizeof(ifaces) / sizeof(ifaces[0]); j++)
- if (!strcmp(ifaces[j].name, argv[i]))
- k = j;
+ for (j = 0; j < sizeof(ifaces) / sizeof(ifaces[0]); j++)
+ if (!strcmp(ifaces[j].name, argv[i]))
+ k = j;
- if (k >= 0)
- iface = ifaces[k].iface;
- else
- usage_error("Error: Unrecognized argument (%s) to --codec\n",
- argv[i]);
- }
- else
- usage_error("Error: Option --codec requires argument.\n");
- }
- else if (!strcmp(argv[i], "-v"))
- verbose = 1;
- else if (!strcmp(argv[i], "-h"))
- if (i + 1 < argc)
- {
- h = atoi(argv[++i]);
- }
- else
- usage_error("Error: Option -h requires argument.\n");
- else if (!strcmp(argv[i], "-w"))
- if (i + 1 < argc)
- {
- w = atoi(argv[++i]);
- }
- else
- usage_error("Error: Option -w requires argument.\n");
- else if (!strcmp(argv[i], "--help"))
- usage_exit();
+ if (k >= 0)
+ iface = ifaces[k].iface;
else
- usage_error("Error: Unrecognized option %s\n\n", argv[i]);
- }
+ usage_error("Error: Unrecognized argument (%s) to --codec\n",
+ argv[i]);
+ } else
+ usage_error("Error: Option --codec requires argument.\n");
+ } else if (!strcmp(argv[i], "-v"))
+ verbose = 1;
+ else if (!strcmp(argv[i], "-h"))
+ if (i + 1 < argc) {
+ h = atoi(argv[++i]);
+ } else
+ usage_error("Error: Option -h requires argument.\n");
+ else if (!strcmp(argv[i], "-w"))
+ if (i + 1 < argc) {
+ w = atoi(argv[++i]);
+ } else
+ usage_error("Error: Option -w requires argument.\n");
+ else if (!strcmp(argv[i], "--help"))
+ usage_exit();
+ else
+ usage_error("Error: Unrecognized option %s\n\n", argv[i]);
+ }
- if (argc == 1)
- printf("Using built-in defaults. For options, rerun with --help\n\n");
+ if (argc == 1)
+ printf("Using built-in defaults. For options, rerun with --help\n\n");
- /* XMA mode is not supported on all decoders! */
- if (!(vpx_codec_get_caps(iface) & VPX_CODEC_CAP_XMA))
- {
- printf("%s does not support XMA mode!\n", vpx_codec_iface_name(iface));
+ /* XMA mode is not supported on all decoders! */
+ if (!(vpx_codec_get_caps(iface) & VPX_CODEC_CAP_XMA)) {
+ printf("%s does not support XMA mode!\n", vpx_codec_iface_name(iface));
+ return EXIT_FAILURE;
+ }
+
+ /* The codec knows how much memory to allocate based on the size of the
+ * encoded frames. This data can be parsed from the bitstream with
+ * vpx_codec_peek_stream_info() if a bitstream is available. Otherwise,
+ * a fixed size can be used that will be the upper limit on the frame
+ * size the decoder can decode.
+ */
+ cfg.w = w;
+ cfg.h = h;
+
+ /* Initialize the decoder in XMA mode. */
+ if (vpx_codec_dec_init(&decoder, iface, &cfg, VPX_CODEC_USE_XMA)) {
+ printf("Failed to initialize decoder in XMA mode: %s\n", vpx_codec_error(&decoder));
+ return EXIT_FAILURE;
+ }
+
+ /* Iterate through the list of memory maps, allocating them with the
+ * requested alignment.
+ */
+ iter = NULL;
+
+ do {
+ vpx_codec_mmap_t mmap;
+ unsigned int align;
+
+ res = vpx_codec_get_mem_map(&decoder, &mmap, &iter);
+ align = mmap.align ? mmap.align - 1 : 0;
+
+ if (!res) {
+ if (verbose)
+ printf("Allocating segment %u, size %lu, align %u %s\n",
+ mmap.id, mmap.sz, mmap.align,
+ mmap.flags & VPX_CODEC_MEM_ZERO ? "(ZEROED)" : "");
+
+ if (mmap.flags & VPX_CODEC_MEM_ZERO)
+ mmap.priv = calloc(1, mmap.sz + align);
+ else
+ mmap.priv = malloc(mmap.sz + align);
+
+ mmap.base = (void *)((((uintptr_t)mmap.priv) + align) & ~(uintptr_t)align);
+ mmap.dtor = my_mem_dtor;
+ alloc_sz += mmap.sz + align;
+
+ if (vpx_codec_set_mem_map(&decoder, &mmap, 1)) {
+ printf("Failed to set mmap: %s\n", vpx_codec_error(&decoder));
return EXIT_FAILURE;
+ }
+ } else if (res != VPX_CODEC_LIST_END) {
+ printf("Failed to get mmap: %s\n", vpx_codec_error(&decoder));
+ return EXIT_FAILURE;
}
+ } while (res != VPX_CODEC_LIST_END);
- /* The codec knows how much memory to allocate based on the size of the
- * encoded frames. This data can be parsed from the bitstream with
- * vpx_codec_peek_stream_info() if a bitstream is available. Otherwise,
- * a fixed size can be used that will be the upper limit on the frame
- * size the decoder can decode.
- */
- cfg.w = w;
- cfg.h = h;
-
- /* Initialize the decoder in XMA mode. */
- if (vpx_codec_dec_init(&decoder, iface, &cfg, VPX_CODEC_USE_XMA))
- {
- printf("Failed to initialize decoder in XMA mode: %s\n", vpx_codec_error(&decoder));
- return EXIT_FAILURE;
- }
-
- /* Iterate through the list of memory maps, allocating them with the
- * requested alignment.
- */
- iter = NULL;
-
- do
- {
- vpx_codec_mmap_t mmap;
- unsigned int align;
-
- res = vpx_codec_get_mem_map(&decoder, &mmap, &iter);
- align = mmap.align ? mmap.align - 1 : 0;
-
- if (!res)
- {
- if (verbose)
- printf("Allocating segment %u, size %lu, align %u %s\n",
- mmap.id, mmap.sz, mmap.align,
- mmap.flags & VPX_CODEC_MEM_ZERO ? "(ZEROED)" : "");
-
- if (mmap.flags & VPX_CODEC_MEM_ZERO)
- mmap.priv = calloc(1, mmap.sz + align);
- else
- mmap.priv = malloc(mmap.sz + align);
-
- mmap.base = (void *)((((uintptr_t)mmap.priv) + align) & ~(uintptr_t)align);
- mmap.dtor = my_mem_dtor;
- alloc_sz += mmap.sz + align;
-
- if (vpx_codec_set_mem_map(&decoder, &mmap, 1))
- {
- printf("Failed to set mmap: %s\n", vpx_codec_error(&decoder));
- return EXIT_FAILURE;
- }
- }
- else if (res != VPX_CODEC_LIST_END)
- {
- printf("Failed to get mmap: %s\n", vpx_codec_error(&decoder));
- return EXIT_FAILURE;
- }
- }
- while (res != VPX_CODEC_LIST_END);
-
- printf("%s\n %d bytes external memory required for %dx%d.\n",
- decoder.name, alloc_sz, cfg.w, cfg.h);
- vpx_codec_destroy(&decoder);
- return EXIT_SUCCESS;
+ printf("%s\n %d bytes external memory required for %dx%d.\n",
+ decoder.name, alloc_sz, cfg.w, cfg.h);
+ vpx_codec_destroy(&decoder);
+ return EXIT_SUCCESS;
}
diff --git a/examples.mk b/examples.mk
index 90913e6..f1cc42b 100644
--- a/examples.mk
+++ b/examples.mk
@@ -38,7 +38,7 @@
vpxenc.SRCS += libmkv/EbmlWriter.h
vpxenc.GUID = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1
vpxenc.DESCRIPTION = Full featured encoder
-UTILS-$(CONFIG_ENCODERS) += vp8_scalable_patterns.c
+UTILS-$(CONFIG_VP8_ENCODER) += vp8_scalable_patterns.c
vp8_scalable_patterns.GUID = 0D6A210B-F482-4D6F-8570-4A9C01ACC88C
vp8_scalable_patterns.DESCRIPTION = Temporal Scalability Encoder
@@ -56,37 +56,37 @@
#example_xma.GUID = A955FC4A-73F1-44F7-135E-30D84D32F022
#example_xma.DESCRIPTION = External Memory Allocation mode usage
-GEN_EXAMPLES-$(CONFIG_DECODERS) += simple_decoder.c
+GEN_EXAMPLES-$(CONFIG_VP8_DECODER) += simple_decoder.c
simple_decoder.GUID = D3BBF1E9-2427-450D-BBFF-B2843C1D44CC
simple_decoder.DESCRIPTION = Simplified decoder loop
-GEN_EXAMPLES-$(CONFIG_DECODERS) += postproc.c
+GEN_EXAMPLES-$(CONFIG_VP8_DECODER) += postproc.c
postproc.GUID = 65E33355-F35E-4088-884D-3FD4905881D7
postproc.DESCRIPTION = Decoder postprocessor control
-GEN_EXAMPLES-$(CONFIG_DECODERS) += decode_to_md5.c
+GEN_EXAMPLES-$(CONFIG_VP8_DECODER) += decode_to_md5.c
decode_to_md5.SRCS += md5_utils.h md5_utils.c
decode_to_md5.GUID = 59120B9B-2735-4BFE-B022-146CA340FE42
decode_to_md5.DESCRIPTION = Frame by frame MD5 checksum
-GEN_EXAMPLES-$(CONFIG_ENCODERS) += simple_encoder.c
+GEN_EXAMPLES-$(CONFIG_VP8_ENCODER) += simple_encoder.c
simple_encoder.GUID = 4607D299-8A71-4D2C-9B1D-071899B6FBFD
simple_encoder.DESCRIPTION = Simplified encoder loop
-GEN_EXAMPLES-$(CONFIG_ENCODERS) += twopass_encoder.c
+GEN_EXAMPLES-$(CONFIG_VP8_ENCODER) += twopass_encoder.c
twopass_encoder.GUID = 73494FA6-4AF9-4763-8FBB-265C92402FD8
twopass_encoder.DESCRIPTION = Two-pass encoder loop
-GEN_EXAMPLES-$(CONFIG_ENCODERS) += force_keyframe.c
+GEN_EXAMPLES-$(CONFIG_VP8_ENCODER) += force_keyframe.c
force_keyframe.GUID = 3C67CADF-029F-4C86-81F5-D6D4F51177F0
force_keyframe.DESCRIPTION = Force generation of keyframes
ifeq ($(CONFIG_DECODERS),yes)
-GEN_EXAMPLES-$(CONFIG_ENCODERS) += decode_with_drops.c
+GEN_EXAMPLES-$(CONFIG_VP8_ENCODER) += decode_with_drops.c
endif
decode_with_drops.GUID = CE5C53C4-8DDA-438A-86ED-0DDD3CDB8D26
decode_with_drops.DESCRIPTION = Drops frames while decoding
-ifeq ($(CONFIG_DECODERS),yes)
+ifeq ($(CONFIG_VP8_DECODER),yes)
GEN_EXAMPLES-$(CONFIG_ERROR_CONCEALMENT) += decode_with_partial_drops.c
endif
decode_with_partial_drops.GUID = 61C2D026-5754-46AC-916F-1343ECC5537E
decode_with_partial_drops.DESCRIPTION = Drops parts of frames while decoding
-GEN_EXAMPLES-$(CONFIG_ENCODERS) += error_resilient.c
+GEN_EXAMPLES-$(CONFIG_VP8_ENCODER) += error_resilient.c
error_resilient.GUID = DF5837B9-4145-4F92-A031-44E4F832E00C
error_resilient.DESCRIPTION = Error Resiliency Feature
@@ -115,9 +115,11 @@
# when building for bare-metal targets
ifeq ($(CONFIG_OS_SUPPORT), yes)
CODEC_EXTRA_LIBS-$(CONFIG_VP8) += m
+CODEC_EXTRA_LIBS-$(CONFIG_VP9) += m
else
ifeq ($(CONFIG_GCC), yes)
CODEC_EXTRA_LIBS-$(CONFIG_VP8) += m
+ CODEC_EXTRA_LIBS-$(CONFIG_VP9) += m
endif
endif
#
@@ -136,6 +138,8 @@
LIB_PATH-yes += $(if $(BUILD_PFX),$(BUILD_PFX),.)
INC_PATH-$(CONFIG_VP8_DECODER) += $(SRC_PATH_BARE)/vp8
INC_PATH-$(CONFIG_VP8_ENCODER) += $(SRC_PATH_BARE)/vp8
+ INC_PATH-$(CONFIG_VP9_DECODER) += $(SRC_PATH_BARE)/vp9
+ INC_PATH-$(CONFIG_VP9_ENCODER) += $(SRC_PATH_BARE)/vp9
LIB_PATH := $(call enabled,LIB_PATH)
INC_PATH := $(call enabled,INC_PATH)
endif
@@ -179,7 +183,8 @@
# Instantiate linker template for all examples.
CODEC_LIB=$(if $(CONFIG_DEBUG_LIBS),vpx_g,vpx)
-CODEC_LIB_SUF=$(if $(CONFIG_SHARED),.so,.a)
+SHARED_LIB_SUF=$(if $(filter darwin%,$(TGT_OS)),.dylib,.so)
+CODEC_LIB_SUF=$(if $(CONFIG_SHARED),$(SHARED_LIB_SUF),.a)
$(foreach bin,$(BINS-yes),\
$(if $(BUILD_OBJS),$(eval $(bin):\
$(LIB_PATH)/lib$(CODEC_LIB)$(CODEC_LIB_SUF)))\
@@ -209,7 +214,7 @@
# Set up additional MSVS environment
ifeq ($(CONFIG_MSVS),yes)
-CODEC_LIB=$(if $(CONFIG_STATIC_MSVCRT),vpxmt,vpxmd)
+CODEC_LIB=$(if $(CONFIG_SHARED),vpx,$(if $(CONFIG_STATIC_MSVCRT),vpxmt,vpxmd))
# This variable uses deferred expansion intentionally, since the results of
# $(wildcard) may change during the course of the Make.
VS_PLATFORMS = $(foreach d,$(wildcard */Release/$(CODEC_LIB).lib),$(word 1,$(subst /, ,$(d))))
diff --git a/examples/decoder_tmpl.c b/examples/decoder_tmpl.c
index 8194f0a..597fea2 100644
--- a/examples/decoder_tmpl.c
+++ b/examples/decoder_tmpl.c
@@ -12,6 +12,7 @@
/*
@*INTRODUCTION
*/
+#include "vpx_config.h"
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
diff --git a/examples/decoder_tmpl.txt b/examples/decoder_tmpl.txt
index e652a63..3d230a5 100644
--- a/examples/decoder_tmpl.txt
+++ b/examples/decoder_tmpl.txt
@@ -1,7 +1,7 @@
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_INCLUDES
#define VPX_CODEC_DISABLE_COMPAT 1
#include "vpx/vpx_decoder.h"
-#include "vpx/vp8dx.h"
+#include "vpx/vp9dx.h"
#define interface (vpx_codec_vp8_dx())
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_INCLUDES
diff --git a/examples/encoder_tmpl.txt b/examples/encoder_tmpl.txt
index 1afbd8b..9f8f4af 100644
--- a/examples/encoder_tmpl.txt
+++ b/examples/encoder_tmpl.txt
@@ -1,7 +1,7 @@
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENC_INCLUDES
#define VPX_CODEC_DISABLE_COMPAT 1
#include "vpx/vpx_encoder.h"
-#include "vpx/vp8cx.h"
+#include "vpx/vp9cx.h"
#define interface (vpx_codec_vp8_cx())
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENC_INCLUDES
diff --git a/examples/postproc.txt b/examples/postproc.txt
index 51b251a..e00bf59 100644
--- a/examples/postproc.txt
+++ b/examples/postproc.txt
@@ -51,7 +51,7 @@
postprocessors. VP8 is one example. The following sample code toggles
postprocessing on and off every 15 frames.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PRE_DECODE
-#if CONFIG_VP8_DECODER
+#if CONFIG_VP9_DECODER
if(frame_cnt%30 == 1) {
vp8_postproc_cfg_t pp = {0, 0, 0};
diff --git a/libmkv/EbmlBufferWriter.c b/libmkv/EbmlBufferWriter.c
index d9b04a8..574e478 100644
--- a/libmkv/EbmlBufferWriter.c
+++ b/libmkv/EbmlBufferWriter.c
@@ -1,60 +1,54 @@
-//#include <strmif.h>
+// #include <strmif.h>
#include "EbmlBufferWriter.h"
#include "EbmlWriter.h"
-//#include <cassert>
-//#include <limits>
-//#include <malloc.h> //_alloca
+// #include <cassert>
+// #include <limits>
+// #include <malloc.h> //_alloca
#include <stdlib.h>
#include <wchar.h>
#include <string.h>
-void Ebml_Write(EbmlGlobal *glob, const void *buffer_in, unsigned long len)
-{
- unsigned char *src = glob->buf;
- src += glob->offset;
- memcpy(src, buffer_in, len);
- glob->offset += len;
+void Ebml_Write(EbmlGlobal *glob, const void *buffer_in, unsigned long len) {
+ unsigned char *src = glob->buf;
+ src += glob->offset;
+ memcpy(src, buffer_in, len);
+ glob->offset += len;
}
-static void _Serialize(EbmlGlobal *glob, const unsigned char *p, const unsigned char *q)
-{
- while (q != p)
- {
- --q;
+static void _Serialize(EbmlGlobal *glob, const unsigned char *p, const unsigned char *q) {
+ while (q != p) {
+ --q;
- unsigned long cbWritten;
- memcpy(&(glob->buf[glob->offset]), q, 1);
- glob->offset ++;
- }
+ unsigned long cbWritten;
+ memcpy(&(glob->buf[glob->offset]), q, 1);
+ glob->offset++;
+ }
}
-void Ebml_Serialize(EbmlGlobal *glob, const void *buffer_in, unsigned long len)
-{
- //assert(buf);
+void Ebml_Serialize(EbmlGlobal *glob, const void *buffer_in, unsigned long len) {
+ // assert(buf);
- const unsigned char *const p = (const unsigned char *)(buffer_in);
- const unsigned char *const q = p + len;
+ const unsigned char *const p = (const unsigned char *)(buffer_in);
+ const unsigned char *const q = p + len;
- _Serialize(glob, p, q);
+ _Serialize(glob, p, q);
}
-void Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc, unsigned long class_id)
-{
- Ebml_WriteID(glob, class_id);
- ebmlLoc->offset = glob->offset;
- //todo this is always taking 8 bytes, this may need later optimization
- unsigned long long unknownLen = 0x01FFFFFFFFFFFFFFLLU;
- Ebml_Serialize(glob, (void *)&unknownLen, 8); //this is a key that says lenght unknown
+void Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc, unsigned long class_id) {
+ Ebml_WriteID(glob, class_id);
+ ebmlLoc->offset = glob->offset;
+ // todo this is always taking 8 bytes, this may need later optimization
+ unsigned long long unknownLen = 0x01FFFFFFFFFFFFFFLLU;
+ Ebml_Serialize(glob, (void *)&unknownLen, 8); // this is a key that says lenght unknown
}
-void Ebml_EndSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc)
-{
- unsigned long long size = glob->offset - ebmlLoc->offset - 8;
- unsigned long long curOffset = glob->offset;
- glob->offset = ebmlLoc->offset;
- size |= 0x0100000000000000LLU;
- Ebml_Serialize(glob, &size, 8);
- glob->offset = curOffset;
+void Ebml_EndSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc) {
+ unsigned long long size = glob->offset - ebmlLoc->offset - 8;
+ unsigned long long curOffset = glob->offset;
+ glob->offset = ebmlLoc->offset;
+ size |= 0x0100000000000000LLU;
+ Ebml_Serialize(glob, &size, 8);
+ glob->offset = curOffset;
}
diff --git a/libmkv/EbmlBufferWriter.h b/libmkv/EbmlBufferWriter.h
index ba0a9b3..acd5c2a 100644
--- a/libmkv/EbmlBufferWriter.h
+++ b/libmkv/EbmlBufferWriter.h
@@ -1,16 +1,14 @@
#ifndef EBMLBUFFERWRITER_HPP
#define EBMLBUFFERWRITER_HPP
-typedef struct
-{
- unsigned long long offset;
+typedef struct {
+ unsigned long long offset;
} EbmlLoc;
-typedef struct
-{
- unsigned char *buf;
- unsigned int length;
- unsigned int offset;
+typedef struct {
+ unsigned char *buf;
+ unsigned int length;
+ unsigned int offset;
} EbmlGlobal;
diff --git a/libmkv/EbmlIDs.h b/libmkv/EbmlIDs.h
index e3ce585..4920bf9 100644
--- a/libmkv/EbmlIDs.h
+++ b/libmkv/EbmlIDs.h
@@ -12,35 +12,34 @@
/* Commenting out values not available in webm, but available in matroska */
-enum mkv
-{
- EBML = 0x1A45DFA3,
- EBMLVersion = 0x4286,
- EBMLReadVersion = 0x42F7,
- EBMLMaxIDLength = 0x42F2,
- EBMLMaxSizeLength = 0x42F3,
- DocType = 0x4282,
- DocTypeVersion = 0x4287,
- DocTypeReadVersion = 0x4285,
+enum mkv {
+ EBML = 0x1A45DFA3,
+ EBMLVersion = 0x4286,
+ EBMLReadVersion = 0x42F7,
+ EBMLMaxIDLength = 0x42F2,
+ EBMLMaxSizeLength = 0x42F3,
+ DocType = 0x4282,
+ DocTypeVersion = 0x4287,
+ DocTypeReadVersion = 0x4285,
/* CRC_32 = 0xBF, */
- Void = 0xEC,
- SignatureSlot = 0x1B538667,
- SignatureAlgo = 0x7E8A,
- SignatureHash = 0x7E9A,
- SignaturePublicKey = 0x7EA5,
- Signature = 0x7EB5,
- SignatureElements = 0x7E5B,
- SignatureElementList = 0x7E7B,
- SignedElement = 0x6532,
- /* segment */
- Segment = 0x18538067,
- /* Meta Seek Information */
- SeekHead = 0x114D9B74,
- Seek = 0x4DBB,
- SeekID = 0x53AB,
- SeekPosition = 0x53AC,
- /* Segment Information */
- Info = 0x1549A966,
+ Void = 0xEC,
+ SignatureSlot = 0x1B538667,
+ SignatureAlgo = 0x7E8A,
+ SignatureHash = 0x7E9A,
+ SignaturePublicKey = 0x7EA5,
+ Signature = 0x7EB5,
+ SignatureElements = 0x7E5B,
+ SignatureElementList = 0x7E7B,
+ SignedElement = 0x6532,
+ /* segment */
+ Segment = 0x18538067,
+ /* Meta Seek Information */
+ SeekHead = 0x114D9B74,
+ Seek = 0x4DBB,
+ SeekID = 0x53AB,
+ SeekPosition = 0x53AC,
+ /* Segment Information */
+ Info = 0x1549A966,
/* SegmentUID = 0x73A4, */
/* SegmentFilename = 0x7384, */
/* PrevUID = 0x3CB923, */
@@ -52,61 +51,61 @@
/* ChapterTranslateEditionUID = 0x69FC, */
/* ChapterTranslateCodec = 0x69BF, */
/* ChapterTranslateID = 0x69A5, */
- TimecodeScale = 0x2AD7B1,
- Segment_Duration = 0x4489,
- DateUTC = 0x4461,
+ TimecodeScale = 0x2AD7B1,
+ Segment_Duration = 0x4489,
+ DateUTC = 0x4461,
/* Title = 0x7BA9, */
- MuxingApp = 0x4D80,
- WritingApp = 0x5741,
- /* Cluster */
- Cluster = 0x1F43B675,
- Timecode = 0xE7,
+ MuxingApp = 0x4D80,
+ WritingApp = 0x5741,
+ /* Cluster */
+ Cluster = 0x1F43B675,
+ Timecode = 0xE7,
/* SilentTracks = 0x5854, */
/* SilentTrackNumber = 0x58D7, */
/* Position = 0xA7, */
- PrevSize = 0xAB,
- BlockGroup = 0xA0,
- Block = 0xA1,
+ PrevSize = 0xAB,
+ BlockGroup = 0xA0,
+ Block = 0xA1,
/* BlockVirtual = 0xA2, */
/* BlockAdditions = 0x75A1, */
/* BlockMore = 0xA6, */
/* BlockAddID = 0xEE, */
/* BlockAdditional = 0xA5, */
- BlockDuration = 0x9B,
+ BlockDuration = 0x9B,
/* ReferencePriority = 0xFA, */
- ReferenceBlock = 0xFB,
+ ReferenceBlock = 0xFB,
/* ReferenceVirtual = 0xFD, */
/* CodecState = 0xA4, */
/* Slices = 0x8E, */
/* TimeSlice = 0xE8, */
- LaceNumber = 0xCC,
+ LaceNumber = 0xCC,
/* FrameNumber = 0xCD, */
/* BlockAdditionID = 0xCB, */
/* MkvDelay = 0xCE, */
/* Cluster_Duration = 0xCF, */
- SimpleBlock = 0xA3,
+ SimpleBlock = 0xA3,
/* EncryptedBlock = 0xAF, */
- /* Track */
- Tracks = 0x1654AE6B,
- TrackEntry = 0xAE,
- TrackNumber = 0xD7,
- TrackUID = 0x73C5,
- TrackType = 0x83,
- FlagEnabled = 0xB9,
- FlagDefault = 0x88,
- FlagForced = 0x55AA,
- FlagLacing = 0x9C,
+ /* Track */
+ Tracks = 0x1654AE6B,
+ TrackEntry = 0xAE,
+ TrackNumber = 0xD7,
+ TrackUID = 0x73C5,
+ TrackType = 0x83,
+ FlagEnabled = 0xB9,
+ FlagDefault = 0x88,
+ FlagForced = 0x55AA,
+ FlagLacing = 0x9C,
/* MinCache = 0x6DE7, */
/* MaxCache = 0x6DF8, */
- DefaultDuration = 0x23E383,
+ DefaultDuration = 0x23E383,
/* TrackTimecodeScale = 0x23314F, */
/* TrackOffset = 0x537F, */
/* MaxBlockAdditionID = 0x55EE, */
- Name = 0x536E,
- Language = 0x22B59C,
- CodecID = 0x86,
- CodecPrivate = 0x63A2,
- CodecName = 0x258688,
+ Name = 0x536E,
+ Language = 0x22B59C,
+ CodecID = 0x86,
+ CodecPrivate = 0x63A2,
+ CodecName = 0x258688,
/* AttachmentLink = 0x7446, */
/* CodecSettings = 0x3A9697, */
/* CodecInfoURL = 0x3B4040, */
@@ -117,33 +116,33 @@
/* TrackTranslateEditionUID = 0x66FC, */
/* TrackTranslateCodec = 0x66BF, */
/* TrackTranslateTrackID = 0x66A5, */
- /* video */
- Video = 0xE0,
- FlagInterlaced = 0x9A,
- StereoMode = 0x53B8,
- PixelWidth = 0xB0,
- PixelHeight = 0xBA,
- PixelCropBottom = 0x54AA,
- PixelCropTop = 0x54BB,
- PixelCropLeft = 0x54CC,
- PixelCropRight = 0x54DD,
- DisplayWidth = 0x54B0,
- DisplayHeight = 0x54BA,
- DisplayUnit = 0x54B2,
- AspectRatioType = 0x54B3,
+ /* video */
+ Video = 0xE0,
+ FlagInterlaced = 0x9A,
+ StereoMode = 0x53B8,
+ PixelWidth = 0xB0,
+ PixelHeight = 0xBA,
+ PixelCropBottom = 0x54AA,
+ PixelCropTop = 0x54BB,
+ PixelCropLeft = 0x54CC,
+ PixelCropRight = 0x54DD,
+ DisplayWidth = 0x54B0,
+ DisplayHeight = 0x54BA,
+ DisplayUnit = 0x54B2,
+ AspectRatioType = 0x54B3,
/* ColourSpace = 0x2EB524, */
/* GammaValue = 0x2FB523, */
- FrameRate = 0x2383E3,
- /* end video */
- /* audio */
- Audio = 0xE1,
- SamplingFrequency = 0xB5,
- OutputSamplingFrequency = 0x78B5,
- Channels = 0x9F,
+ FrameRate = 0x2383E3,
+ /* end video */
+ /* audio */
+ Audio = 0xE1,
+ SamplingFrequency = 0xB5,
+ OutputSamplingFrequency = 0x78B5,
+ Channels = 0x9F,
/* ChannelPositions = 0x7D7B, */
- BitDepth = 0x6264,
- /* end audio */
- /* content encoding */
+ BitDepth = 0x6264,
+ /* end audio */
+ /* content encoding */
/* ContentEncodings = 0x6d80, */
/* ContentEncoding = 0x6240, */
/* ContentEncodingOrder = 0x5031, */
@@ -159,22 +158,22 @@
/* ContentSigKeyID = 0x47e4, */
/* ContentSigAlgo = 0x47e5, */
/* ContentSigHashAlgo = 0x47e6, */
- /* end content encoding */
- /* Cueing Data */
- Cues = 0x1C53BB6B,
- CuePoint = 0xBB,
- CueTime = 0xB3,
- CueTrackPositions = 0xB7,
- CueTrack = 0xF7,
- CueClusterPosition = 0xF1,
- CueBlockNumber = 0x5378
+ /* end content encoding */
+ /* Cueing Data */
+ Cues = 0x1C53BB6B,
+ CuePoint = 0xBB,
+ CueTime = 0xB3,
+ CueTrackPositions = 0xB7,
+ CueTrack = 0xF7,
+ CueClusterPosition = 0xF1,
+ CueBlockNumber = 0x5378
/* CueCodecState = 0xEA, */
/* CueReference = 0xDB, */
/* CueRefTime = 0x96, */
/* CueRefCluster = 0x97, */
/* CueRefNumber = 0x535F, */
/* CueRefCodecState = 0xEB, */
- /* Attachment */
+ /* Attachment */
/* Attachments = 0x1941A469, */
/* AttachedFile = 0x61A7, */
/* FileDescription = 0x467E, */
@@ -183,7 +182,7 @@
/* FileData = 0x465C, */
/* FileUID = 0x46AE, */
/* FileReferral = 0x4675, */
- /* Chapters */
+ /* Chapters */
/* Chapters = 0x1043A770, */
/* EditionEntry = 0x45B9, */
/* EditionUID = 0x45BC, */
@@ -211,7 +210,7 @@
/* ChapProcessCommand = 0x6911, */
/* ChapProcessTime = 0x6922, */
/* ChapProcessData = 0x6933, */
- /* Tagging */
+ /* Tagging */
/* Tags = 0x1254C367, */
/* Tag = 0x7373, */
/* Targets = 0x63C0, */
diff --git a/libmkv/EbmlWriter.c b/libmkv/EbmlWriter.c
index d70f06e..5fc5ed2 100644
--- a/libmkv/EbmlWriter.c
+++ b/libmkv/EbmlWriter.c
@@ -18,158 +18,140 @@
#define LITERALU64(n) n##LLU
#endif
-void Ebml_WriteLen(EbmlGlobal *glob, int64_t val)
-{
- /* TODO check and make sure we are not > than 0x0100000000000000LLU */
- unsigned char size = 8; /* size in bytes to output */
+void Ebml_WriteLen(EbmlGlobal *glob, int64_t val) {
+ /* TODO check and make sure we are not > than 0x0100000000000000LLU */
+ unsigned char size = 8; /* size in bytes to output */
- /* mask to compare for byte size */
- int64_t minVal = 0xff;
+ /* mask to compare for byte size */
+ int64_t minVal = 0xff;
- for (size = 1; size < 8; size ++)
- {
- if (val < minVal)
- break;
+ for (size = 1; size < 8; size ++) {
+ if (val < minVal)
+ break;
- minVal = (minVal << 7);
+ minVal = (minVal << 7);
+ }
+
+ val |= (((uint64_t)0x80) << ((size - 1) * 7));
+
+ Ebml_Serialize(glob, (void *) &val, sizeof(val), size);
+}
+
+void Ebml_WriteString(EbmlGlobal *glob, const char *str) {
+ const size_t size_ = strlen(str);
+ const uint64_t size = size_;
+ Ebml_WriteLen(glob, size);
+ /* TODO: it's not clear from the spec whether the nul terminator
+ * should be serialized too. For now we omit the null terminator.
+ */
+ Ebml_Write(glob, str, (unsigned long)size);
+}
+
+void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr) {
+ const size_t strlen = wcslen(wstr);
+
+ /* TODO: it's not clear from the spec whether the nul terminator
+ * should be serialized too. For now we include it.
+ */
+ const uint64_t size = strlen;
+
+ Ebml_WriteLen(glob, size);
+ Ebml_Write(glob, wstr, (unsigned long)size);
+}
+
+void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id) {
+ int len;
+
+ if (class_id >= 0x01000000)
+ len = 4;
+ else if (class_id >= 0x00010000)
+ len = 3;
+ else if (class_id >= 0x00000100)
+ len = 2;
+ else
+ len = 1;
+
+ Ebml_Serialize(glob, (void *)&class_id, sizeof(class_id), len);
+}
+
+void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t ui) {
+ unsigned char sizeSerialized = 8 | 0x80;
+ Ebml_WriteID(glob, class_id);
+ Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1);
+ Ebml_Serialize(glob, &ui, sizeof(ui), 8);
+}
+
+void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui) {
+ unsigned char size = 8; /* size in bytes to output */
+ unsigned char sizeSerialized = 0;
+ unsigned long minVal;
+
+ Ebml_WriteID(glob, class_id);
+ minVal = 0x7fLU; /* mask to compare for byte size */
+
+ for (size = 1; size < 4; size ++) {
+ if (ui < minVal) {
+ break;
}
- val |= (((uint64_t)0x80) << ((size - 1) * 7));
+ minVal <<= 7;
+ }
- Ebml_Serialize(glob, (void *) &val, sizeof(val), size);
-}
-
-void Ebml_WriteString(EbmlGlobal *glob, const char *str)
-{
- const size_t size_ = strlen(str);
- const uint64_t size = size_;
- Ebml_WriteLen(glob, size);
- /* TODO: it's not clear from the spec whether the nul terminator
- * should be serialized too. For now we omit the null terminator.
- */
- Ebml_Write(glob, str, (unsigned long)size);
-}
-
-void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr)
-{
- const size_t strlen = wcslen(wstr);
-
- /* TODO: it's not clear from the spec whether the nul terminator
- * should be serialized too. For now we include it.
- */
- const uint64_t size = strlen;
-
- Ebml_WriteLen(glob, size);
- Ebml_Write(glob, wstr, (unsigned long)size);
-}
-
-void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id)
-{
- int len;
-
- if (class_id >= 0x01000000)
- len = 4;
- else if (class_id >= 0x00010000)
- len = 3;
- else if (class_id >= 0x00000100)
- len = 2;
- else
- len = 1;
-
- Ebml_Serialize(glob, (void *)&class_id, sizeof(class_id), len);
-}
-
-void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t ui)
-{
- unsigned char sizeSerialized = 8 | 0x80;
- Ebml_WriteID(glob, class_id);
- Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1);
- Ebml_Serialize(glob, &ui, sizeof(ui), 8);
-}
-
-void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui)
-{
- unsigned char size = 8; /* size in bytes to output */
- unsigned char sizeSerialized = 0;
- unsigned long minVal;
-
- Ebml_WriteID(glob, class_id);
- minVal = 0x7fLU; /* mask to compare for byte size */
-
- for (size = 1; size < 4; size ++)
- {
- if (ui < minVal)
- {
- break;
- }
-
- minVal <<= 7;
- }
-
- sizeSerialized = 0x80 | size;
- Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1);
- Ebml_Serialize(glob, &ui, sizeof(ui), size);
+ sizeSerialized = 0x80 | size;
+ Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1);
+ Ebml_Serialize(glob, &ui, sizeof(ui), size);
}
/* TODO: perhaps this is a poor name for this id serializer helper function */
-void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin)
-{
- int size;
- for (size=4; size > 1; size--)
- {
- if (bin & 0x000000ff << ((size-1) * 8))
- break;
- }
- Ebml_WriteID(glob, class_id);
- Ebml_WriteLen(glob, size);
- Ebml_WriteID(glob, bin);
+void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin) {
+ int size;
+ for (size = 4; size > 1; size--) {
+ if (bin & 0x000000ff << ((size - 1) * 8))
+ break;
+ }
+ Ebml_WriteID(glob, class_id);
+ Ebml_WriteLen(glob, size);
+ Ebml_WriteID(glob, bin);
}
-void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d)
-{
- unsigned char len = 0x88;
+void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d) {
+ unsigned char len = 0x88;
- Ebml_WriteID(glob, class_id);
- Ebml_Serialize(glob, &len, sizeof(len), 1);
- Ebml_Serialize(glob, &d, sizeof(d), 8);
+ Ebml_WriteID(glob, class_id);
+ Ebml_Serialize(glob, &len, sizeof(len), 1);
+ Ebml_Serialize(glob, &d, sizeof(d), 8);
}
-void Ebml_WriteSigned16(EbmlGlobal *glob, short val)
-{
- signed long out = ((val & 0x003FFFFF) | 0x00200000) << 8;
- Ebml_Serialize(glob, &out, sizeof(out), 3);
+void Ebml_WriteSigned16(EbmlGlobal *glob, short val) {
+ signed long out = ((val & 0x003FFFFF) | 0x00200000) << 8;
+ Ebml_Serialize(glob, &out, sizeof(out), 3);
}
-void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s)
-{
- Ebml_WriteID(glob, class_id);
- Ebml_WriteString(glob, s);
+void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s) {
+ Ebml_WriteID(glob, class_id);
+ Ebml_WriteString(glob, s);
}
-void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s)
-{
- Ebml_WriteID(glob, class_id);
- Ebml_WriteUTF8(glob, s);
+void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s) {
+ Ebml_WriteID(glob, class_id);
+ Ebml_WriteUTF8(glob, s);
}
-void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length)
-{
- Ebml_WriteID(glob, class_id);
- Ebml_WriteLen(glob, data_length);
- Ebml_Write(glob, data, data_length);
+void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length) {
+ Ebml_WriteID(glob, class_id);
+ Ebml_WriteLen(glob, data_length);
+ Ebml_Write(glob, data, data_length);
}
-void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize)
-{
- unsigned char tmp = 0;
- unsigned long i = 0;
+void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize) {
+ unsigned char tmp = 0;
+ unsigned long i = 0;
- Ebml_WriteID(glob, 0xEC);
- Ebml_WriteLen(glob, vSize);
+ Ebml_WriteID(glob, 0xEC);
+ Ebml_WriteLen(glob, vSize);
- for (i = 0; i < vSize; i++)
- {
- Ebml_Write(glob, &tmp, 1);
- }
+ for (i = 0; i < vSize; i++) {
+ Ebml_Write(glob, &tmp, 1);
+ }
}
/* TODO Serialize Date */
diff --git a/libmkv/WebMElement.c b/libmkv/WebMElement.c
index 0ef5100..2f79a3c 100644
--- a/libmkv/WebMElement.c
+++ b/libmkv/WebMElement.c
@@ -14,106 +14,100 @@
#define kVorbisPrivateMaxSize 4000
-void writeHeader(EbmlGlobal *glob)
-{
- EbmlLoc start;
- Ebml_StartSubElement(glob, &start, EBML);
- Ebml_SerializeUnsigned(glob, EBMLVersion, 1);
- Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1); //EBML Read Version
- Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4); //EBML Max ID Length
- Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8); //EBML Max Size Length
- Ebml_SerializeString(glob, DocType, "webm"); //Doc Type
- Ebml_SerializeUnsigned(glob, DocTypeVersion, 2); //Doc Type Version
- Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2); //Doc Type Read Version
- Ebml_EndSubElement(glob, &start);
+void writeHeader(EbmlGlobal *glob) {
+ EbmlLoc start;
+ Ebml_StartSubElement(glob, &start, EBML);
+ Ebml_SerializeUnsigned(glob, EBMLVersion, 1);
+ Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1); // EBML Read Version
+ Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4); // EBML Max ID Length
+ Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8); // EBML Max Size Length
+ Ebml_SerializeString(glob, DocType, "webm"); // Doc Type
+ Ebml_SerializeUnsigned(glob, DocTypeVersion, 2); // Doc Type Version
+ Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2); // Doc Type Read Version
+ Ebml_EndSubElement(glob, &start);
}
void writeSimpleBlock(EbmlGlobal *glob, unsigned char trackNumber, short timeCode,
int isKeyframe, unsigned char lacingFlag, int discardable,
- unsigned char *data, unsigned long dataLength)
-{
- Ebml_WriteID(glob, SimpleBlock);
- unsigned long blockLength = 4 + dataLength;
- blockLength |= 0x10000000; //TODO check length < 0x0FFFFFFFF
- Ebml_Serialize(glob, &blockLength, sizeof(blockLength), 4);
- trackNumber |= 0x80; //TODO check track nubmer < 128
- Ebml_Write(glob, &trackNumber, 1);
- //Ebml_WriteSigned16(glob, timeCode,2); //this is 3 bytes
- Ebml_Serialize(glob, &timeCode, sizeof(timeCode), 2);
- unsigned char flags = 0x00 | (isKeyframe ? 0x80 : 0x00) | (lacingFlag << 1) | discardable;
- Ebml_Write(glob, &flags, 1);
- Ebml_Write(glob, data, dataLength);
+ unsigned char *data, unsigned long dataLength) {
+ Ebml_WriteID(glob, SimpleBlock);
+ unsigned long blockLength = 4 + dataLength;
+ blockLength |= 0x10000000; // TODO check length < 0x0FFFFFFFF
+ Ebml_Serialize(glob, &blockLength, sizeof(blockLength), 4);
+ trackNumber |= 0x80; // TODO check track nubmer < 128
+ Ebml_Write(glob, &trackNumber, 1);
+ // Ebml_WriteSigned16(glob, timeCode,2); //this is 3 bytes
+ Ebml_Serialize(glob, &timeCode, sizeof(timeCode), 2);
+ unsigned char flags = 0x00 | (isKeyframe ? 0x80 : 0x00) | (lacingFlag << 1) | discardable;
+ Ebml_Write(glob, &flags, 1);
+ Ebml_Write(glob, data, dataLength);
}
-static UInt64 generateTrackID(unsigned int trackNumber)
-{
- UInt64 t = time(NULL) * trackNumber;
- UInt64 r = rand();
- r = r << 32;
- r += rand();
- UInt64 rval = t ^ r;
- return rval;
+static UInt64 generateTrackID(unsigned int trackNumber) {
+ UInt64 t = time(NULL) * trackNumber;
+ UInt64 r = rand();
+ r = r << 32;
+ r += rand();
+ UInt64 rval = t ^ r;
+ return rval;
}
void writeVideoTrack(EbmlGlobal *glob, unsigned int trackNumber, int flagLacing,
char *codecId, unsigned int pixelWidth, unsigned int pixelHeight,
- double frameRate)
-{
- EbmlLoc start;
- Ebml_StartSubElement(glob, &start, TrackEntry);
- Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber);
- UInt64 trackID = generateTrackID(trackNumber);
- Ebml_SerializeUnsigned(glob, TrackUID, trackID);
- Ebml_SerializeString(glob, CodecName, "VP8"); //TODO shouldn't be fixed
+ double frameRate) {
+ EbmlLoc start;
+ Ebml_StartSubElement(glob, &start, TrackEntry);
+ Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber);
+ UInt64 trackID = generateTrackID(trackNumber);
+ Ebml_SerializeUnsigned(glob, TrackUID, trackID);
+ Ebml_SerializeString(glob, CodecName, "VP8"); // TODO shouldn't be fixed
- Ebml_SerializeUnsigned(glob, TrackType, 1); //video is always 1
- Ebml_SerializeString(glob, CodecID, codecId);
- {
- EbmlLoc videoStart;
- Ebml_StartSubElement(glob, &videoStart, Video);
- Ebml_SerializeUnsigned(glob, PixelWidth, pixelWidth);
- Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight);
- Ebml_SerializeFloat(glob, FrameRate, frameRate);
- Ebml_EndSubElement(glob, &videoStart); //Video
- }
- Ebml_EndSubElement(glob, &start); //Track Entry
+ Ebml_SerializeUnsigned(glob, TrackType, 1); // video is always 1
+ Ebml_SerializeString(glob, CodecID, codecId);
+ {
+ EbmlLoc videoStart;
+ Ebml_StartSubElement(glob, &videoStart, Video);
+ Ebml_SerializeUnsigned(glob, PixelWidth, pixelWidth);
+ Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight);
+ Ebml_SerializeFloat(glob, FrameRate, frameRate);
+ Ebml_EndSubElement(glob, &videoStart); // Video
+ }
+ Ebml_EndSubElement(glob, &start); // Track Entry
}
void writeAudioTrack(EbmlGlobal *glob, unsigned int trackNumber, int flagLacing,
char *codecId, double samplingFrequency, unsigned int channels,
- unsigned char *private, unsigned long privateSize)
-{
- EbmlLoc start;
- Ebml_StartSubElement(glob, &start, TrackEntry);
- Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber);
- UInt64 trackID = generateTrackID(trackNumber);
- Ebml_SerializeUnsigned(glob, TrackUID, trackID);
- Ebml_SerializeUnsigned(glob, TrackType, 2); //audio is always 2
- //I am using defaults for thesed required fields
- /* Ebml_SerializeUnsigned(glob, FlagEnabled, 1);
- Ebml_SerializeUnsigned(glob, FlagDefault, 1);
- Ebml_SerializeUnsigned(glob, FlagForced, 1);
- Ebml_SerializeUnsigned(glob, FlagLacing, flagLacing);*/
- Ebml_SerializeString(glob, CodecID, codecId);
- Ebml_SerializeData(glob, CodecPrivate, private, privateSize);
+ unsigned char *private, unsigned long privateSize) {
+ EbmlLoc start;
+ Ebml_StartSubElement(glob, &start, TrackEntry);
+ Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber);
+ UInt64 trackID = generateTrackID(trackNumber);
+ Ebml_SerializeUnsigned(glob, TrackUID, trackID);
+ Ebml_SerializeUnsigned(glob, TrackType, 2); // audio is always 2
+ // I am using defaults for thesed required fields
+ /* Ebml_SerializeUnsigned(glob, FlagEnabled, 1);
+ Ebml_SerializeUnsigned(glob, FlagDefault, 1);
+ Ebml_SerializeUnsigned(glob, FlagForced, 1);
+ Ebml_SerializeUnsigned(glob, FlagLacing, flagLacing);*/
+ Ebml_SerializeString(glob, CodecID, codecId);
+ Ebml_SerializeData(glob, CodecPrivate, private, privateSize);
- Ebml_SerializeString(glob, CodecName, "VORBIS"); //fixed for now
- {
- EbmlLoc AudioStart;
- Ebml_StartSubElement(glob, &AudioStart, Audio);
- Ebml_SerializeFloat(glob, SamplingFrequency, samplingFrequency);
- Ebml_SerializeUnsigned(glob, Channels, channels);
- Ebml_EndSubElement(glob, &AudioStart);
- }
- Ebml_EndSubElement(glob, &start);
+ Ebml_SerializeString(glob, CodecName, "VORBIS"); // fixed for now
+ {
+ EbmlLoc AudioStart;
+ Ebml_StartSubElement(glob, &AudioStart, Audio);
+ Ebml_SerializeFloat(glob, SamplingFrequency, samplingFrequency);
+ Ebml_SerializeUnsigned(glob, Channels, channels);
+ Ebml_EndSubElement(glob, &AudioStart);
+ }
+ Ebml_EndSubElement(glob, &start);
}
-void writeSegmentInformation(EbmlGlobal *ebml, EbmlLoc* startInfo, unsigned long timeCodeScale, double duration)
-{
- Ebml_StartSubElement(ebml, startInfo, Info);
- Ebml_SerializeUnsigned(ebml, TimecodeScale, timeCodeScale);
- Ebml_SerializeFloat(ebml, Segment_Duration, duration * 1000.0); //Currently fixed to using milliseconds
- Ebml_SerializeString(ebml, 0x4D80, "QTmuxingAppLibWebM-0.0.1");
- Ebml_SerializeString(ebml, 0x5741, "QTwritingAppLibWebM-0.0.1");
- Ebml_EndSubElement(ebml, startInfo);
+void writeSegmentInformation(EbmlGlobal *ebml, EbmlLoc *startInfo, unsigned long timeCodeScale, double duration) {
+ Ebml_StartSubElement(ebml, startInfo, Info);
+ Ebml_SerializeUnsigned(ebml, TimecodeScale, timeCodeScale);
+ Ebml_SerializeFloat(ebml, Segment_Duration, duration * 1000.0); // Currently fixed to using milliseconds
+ Ebml_SerializeString(ebml, 0x4D80, "QTmuxingAppLibWebM-0.0.1");
+ Ebml_SerializeString(ebml, 0x5741, "QTwritingAppLibWebM-0.0.1");
+ Ebml_EndSubElement(ebml, startInfo);
}
/*
@@ -142,7 +136,7 @@
Ebml_SerializeString(ebml_out, 0x7384, segmentInformation.filename);
Ebml_SerializeUnsigned(ebml_out, 0x2AD7B1, segmentInformation.TimecodeScale);
Ebml_SerializeUnsigned(ebml_out, 0x4489, segmentInformation.Duration);
- //TODO date
+ // TODO date
Ebml_SerializeWString(ebml_out, 0x4D80, L"MKVMUX");
Ebml_SerializeWString(ebml_out, 0x5741, segmentInformation.WritingApp);
}
@@ -173,9 +167,9 @@
void Mkv_WriteVideoTrack(Ebml& ebml_out, TrackStruct & track, VideoTrackStruct& video)
{
EbmlLoc trackHeadLoc, videoHeadLoc;
- Ebml_StartSubElement(ebml_out, trackHeadLoc, 0xAE); //start Track
+ Ebml_StartSubElement(ebml_out, trackHeadLoc, 0xAE); // start Track
Mkv_WriteGenericTrackData(ebml_out, track);
- Ebml_StartSubElement(ebml_out, videoHeadLoc, 0xE0); //start Video
+ Ebml_StartSubElement(ebml_out, videoHeadLoc, 0xE0); // start Video
Ebml_SerializeUnsigned(ebml_out, 0x9A, video.FlagInterlaced ? 1 :0);
Ebml_SerializeUnsigned(ebml_out, 0xB0, video.PixelWidth);
Ebml_SerializeUnsigned(ebml_out, 0xBA, video.PixelHeight);
@@ -193,7 +187,7 @@
EbmlLoc trackHeadLoc, audioHeadLoc;
Ebml_StartSubElement(ebml_out, trackHeadLoc, 0xAE);
Mkv_WriteGenericTrackData(ebml_out, track);
- Ebml_StartSubElement(ebml_out, audioHeadLoc, 0xE0); //start Audio
+ Ebml_StartSubElement(ebml_out, audioHeadLoc, 0xE0); // start Audio
Ebml_SerializeFloat(ebml_out, 0xB5, video.SamplingFrequency);
Ebml_SerializeUnsigned(ebml_out, 0x9F, video.Channels);
Ebml_SerializeUnsigned(ebml_out, 0x6264, video.BitDepth);
@@ -213,7 +207,7 @@
Ebml_Write1UInt(ebml_out, block.TrackNumber);
Ebml_WriteSigned16(ebml_out,block.TimeCode);
unsigned char flags = 0x00 | (block.iskey ? 0x80:0x00) | (block.lacing << 1) | block.discardable;
- Ebml_Write1UInt(ebml_out, flags); //TODO this may be the wrong function
+ Ebml_Write1UInt(ebml_out, flags); // TODO this may be the wrong function
Ebml_Serialize(ebml_out, block.data, block.dataLength);
Ebml_EndSubElement(ebml_out,ebmlLoc);
}
diff --git a/libmkv/WebMElement.h b/libmkv/WebMElement.h
index b4208f2..d9ad0a0 100644
--- a/libmkv/WebMElement.h
+++ b/libmkv/WebMElement.h
@@ -17,8 +17,8 @@
// these are helper functions
void writeHeader(EbmlGlobal *ebml);
-void writeSegmentInformation(EbmlGlobal *ebml, EbmlLoc* startInfo , unsigned long timeCodeScale, double duration);
-//this function is a helper only, it assumes a lot of defaults
+void writeSegmentInformation(EbmlGlobal *ebml, EbmlLoc *startInfo, unsigned long timeCodeScale, double duration);
+// this function is a helper only, it assumes a lot of defaults
void writeVideoTrack(EbmlGlobal *ebml, unsigned int trackNumber, int flagLacing,
char *codecId, unsigned int pixelWidth, unsigned int pixelHeight,
double frameRate);
diff --git a/libmkv/testlibmkv.c b/libmkv/testlibmkv.c
index 7edfc43..97bcf95 100644
--- a/libmkv/testlibmkv.c
+++ b/libmkv/testlibmkv.c
@@ -13,51 +13,50 @@
#include "WebMElement.h"
#include <stdio.h>
-int main(int argc, char *argv[])
-{
- //init the datatype we're using for ebml output
- unsigned char data[8192];
- EbmlGlobal ebml;
- ebml.buf = data;
- ebml.offset = 0;
- ebml.length = 8192;
+int main(int argc, char *argv[]) {
+ // init the datatype we're using for ebml output
+ unsigned char data[8192];
+ EbmlGlobal ebml;
+ ebml.buf = data;
+ ebml.offset = 0;
+ ebml.length = 8192;
- writeHeader(&ebml);
+ writeHeader(&ebml);
+ {
+ EbmlLoc startSegment;
+ Ebml_StartSubElement(&ebml, &startSegment, Segment); // segment
{
- EbmlLoc startSegment;
- Ebml_StartSubElement(&ebml, &startSegment, Segment); //segment
- {
- //segment info
- EbmlLoc startInfo;
- Ebml_StartSubElement(&ebml, &startInfo, Info);
- Ebml_SerializeString(&ebml, 0x4D80, "muxingAppLibMkv");
- Ebml_SerializeString(&ebml, 0x5741, "writingAppLibMkv");
- Ebml_EndSubElement(&ebml, &startInfo);
- }
-
- {
- EbmlLoc trackStart;
- Ebml_StartSubElement(&ebml, &trackStart, Tracks);
- writeVideoTrack(&ebml, 1, 1, "V_MS/VFW/FOURCC", 320, 240, 29.97);
- //writeAudioTrack(&ebml,2,1, "A_VORBIS", 32000, 1, NULL, 0);
- Ebml_EndSubElement(&ebml, &trackStart);
- }
-
- {
- EbmlLoc clusterStart;
- Ebml_StartSubElement(&ebml, &clusterStart, Cluster); //cluster
- Ebml_SerializeUnsigned(&ebml, Timecode, 0);
-
- unsigned char someData[4] = {1, 2, 3, 4};
- writeSimpleBlock(&ebml, 1, 0, 1, 0, 0, someData, 4);
- Ebml_EndSubElement(&ebml, &clusterStart);
- } //end cluster
- Ebml_EndSubElement(&ebml, &startSegment);
+ // segment info
+ EbmlLoc startInfo;
+ Ebml_StartSubElement(&ebml, &startInfo, Info);
+ Ebml_SerializeString(&ebml, 0x4D80, "muxingAppLibMkv");
+ Ebml_SerializeString(&ebml, 0x5741, "writingAppLibMkv");
+ Ebml_EndSubElement(&ebml, &startInfo);
}
- //dump ebml stuff to the file
- FILE *file_out = fopen("test.mkv", "wb");
- size_t bytesWritten = fwrite(data, 1, ebml.offset, file_out);
- fclose(file_out);
- return 0;
+ {
+ EbmlLoc trackStart;
+ Ebml_StartSubElement(&ebml, &trackStart, Tracks);
+ writeVideoTrack(&ebml, 1, 1, "V_MS/VFW/FOURCC", 320, 240, 29.97);
+ // writeAudioTrack(&ebml,2,1, "A_VORBIS", 32000, 1, NULL, 0);
+ Ebml_EndSubElement(&ebml, &trackStart);
+ }
+
+ {
+ EbmlLoc clusterStart;
+ Ebml_StartSubElement(&ebml, &clusterStart, Cluster); // cluster
+ Ebml_SerializeUnsigned(&ebml, Timecode, 0);
+
+ unsigned char someData[4] = {1, 2, 3, 4};
+ writeSimpleBlock(&ebml, 1, 0, 1, 0, 0, someData, 4);
+ Ebml_EndSubElement(&ebml, &clusterStart);
+ } // end cluster
+ Ebml_EndSubElement(&ebml, &startSegment);
+ }
+
+ // dump ebml stuff to the file
+ FILE *file_out = fopen("test.mkv", "wb");
+ size_t bytesWritten = fwrite(data, 1, ebml.offset, file_out);
+ fclose(file_out);
+ return 0;
}
\ No newline at end of file
diff --git a/libs.mk b/libs.mk
index 4115dd8..1f0ade3 100644
--- a/libs.mk
+++ b/libs.mk
@@ -17,6 +17,47 @@
ASM:=.asm
endif
+
+#
+# Calculate platform- and compiler-specific offsets for hand coded assembly
+#
+ifeq ($(filter icc gcc,$(TGT_CC)), $(TGT_CC))
+OFFSET_PATTERN:='^[a-zA-Z0-9_]* EQU'
+define asm_offsets_template
+$$(BUILD_PFX)$(1): $$(BUILD_PFX)$(2).S
+ @echo " [CREATE] $$@"
+ $$(qexec)LC_ALL=C grep $$(OFFSET_PATTERN) $$< | tr -d '$$$$\#' $$(ADS2GAS) > $$@
+$$(BUILD_PFX)$(2).S: $(2)
+CLEAN-OBJS += $$(BUILD_PFX)$(1) $(2).S
+endef
+else
+ ifeq ($(filter rvct,$(TGT_CC)), $(TGT_CC))
+define asm_offsets_template
+$$(BUILD_PFX)$(1): obj_int_extract
+$$(BUILD_PFX)$(1): $$(BUILD_PFX)$(2).o
+ @echo " [CREATE] $$@"
+ $$(qexec)./obj_int_extract rvds $$< $$(ADS2GAS) > $$@
+OBJS-yes += $$(BUILD_PFX)$(2).o
+CLEAN-OBJS += $$(BUILD_PFX)$(1)
+$$(filter %$$(ASM).o,$$(OBJS-yes)): $$(BUILD_PFX)$(1)
+endef
+endif # rvct
+endif # !gcc
+
+#
+# Rule to generate runtime cpu detection files
+#
+define rtcd_h_template
+$$(BUILD_PFX)$(1).h: $$(SRC_PATH_BARE)/$(2)
+ @echo " [CREATE] $$@"
+ $$(qexec)$$(SRC_PATH_BARE)/build/make/rtcd.sh --arch=$$(TGT_ISA) \
+ --sym=$(1) \
+ --config=$$(target)$$(if $$(FAT_ARCHS),,-$$(TOOLCHAIN)).mk \
+ $$(RTCD_OPTIONS) $$^ > $$@
+CLEAN-OBJS += $$(BUILD_PFX)$(1).h
+RTCD += $$(BUILD_PFX)$(1).h
+endef
+
CODEC_SRCS-yes += CHANGELOG
CODEC_SRCS-yes += libs.mk
@@ -40,9 +81,12 @@
include $(SRC_PATH_BARE)/vpx_ports/vpx_ports.mk
CODEC_SRCS-yes += $(addprefix vpx_ports/,$(call enabled,PORTS_SRCS))
+ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)
+ VP8_PREFIX=vp8/
+ include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk
+endif
ifeq ($(CONFIG_VP8_ENCODER),yes)
- VP8_PREFIX=vp8/
include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8cx.mk
CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_CX_SRCS))
CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_CX_EXPORTS))
@@ -52,7 +96,6 @@
endif
ifeq ($(CONFIG_VP8_DECODER),yes)
- VP8_PREFIX=vp8/
include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8dx.mk
CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_DX_SRCS))
CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_DX_EXPORTS))
@@ -61,6 +104,35 @@
CODEC_DOC_SECTIONS += vp8 vp8_decoder
endif
+ifneq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),)
+ VP9_PREFIX=vp9/
+ include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9_common.mk
+endif
+
+ifeq ($(CONFIG_VP9_ENCODER),yes)
+ VP9_PREFIX=vp9/
+ include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9cx.mk
+ CODEC_SRCS-yes += $(addprefix $(VP9_PREFIX),$(call enabled,VP9_CX_SRCS))
+ CODEC_EXPORTS-yes += $(addprefix $(VP9_PREFIX),$(VP9_CX_EXPORTS))
+ CODEC_SRCS-yes += $(VP9_PREFIX)vp9cx.mk vpx/vp8.h vpx/vp8cx.h
+ INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h
+ INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP9_PREFIX)/%
+ CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h
+ CODEC_DOC_SECTIONS += vp9 vp9_encoder
+endif
+
+ifeq ($(CONFIG_VP9_DECODER),yes)
+ VP9_PREFIX=vp9/
+ include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9dx.mk
+ CODEC_SRCS-yes += $(addprefix $(VP9_PREFIX),$(call enabled,VP9_DX_SRCS))
+ CODEC_EXPORTS-yes += $(addprefix $(VP9_PREFIX),$(VP9_DX_EXPORTS))
+ CODEC_SRCS-yes += $(VP9_PREFIX)vp9dx.mk vpx/vp8.h vpx/vp8dx.h
+ INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8dx.h
+ INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP9_PREFIX)/%
+ CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8dx.h
+ CODEC_DOC_SECTIONS += vp9 vp9_decoder
+endif
+
ifeq ($(CONFIG_ENCODERS),yes)
CODEC_DOC_SECTIONS += encoder
@@ -91,8 +163,11 @@
CODEC_SRCS-$(BUILD_LIBVPX) += build/make/version.sh
CODEC_SRCS-$(BUILD_LIBVPX) += build/make/rtcd.sh
+CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/emmintrin_compat.h
+CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/vpx_once.h
CODEC_SRCS-$(BUILD_LIBVPX) += $(BUILD_PFX)vpx_config.c
INSTALL-SRCS-no += $(BUILD_PFX)vpx_config.c
+CODEC_SRCS-$(BUILD_LIBVPX) += third_party/x86inc/x86inc.asm
CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com
CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec
@@ -116,7 +191,7 @@
INSTALL-LIBS-$(CONFIG_DEBUG_LIBS) += $(LIBSUBDIR)/libvpx_g.a
endif
-CODEC_SRCS=$(filter-out %_test.cc,$(call enabled,CODEC_SRCS))
+CODEC_SRCS=$(call enabled,CODEC_SRCS)
INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(CODEC_SRCS)
INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(call enabled,CODEC_EXPORTS)
@@ -158,8 +233,8 @@
vpx.vcproj: $(CODEC_SRCS) vpx.def
@echo " [CREATE] $@"
$(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
- --lib \
- --target=$(TOOLCHAIN) \
+ $(if $(CONFIG_SHARED),--dll,--lib) \
+ --target=$(TOOLCHAIN) \
$(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
--name=vpx \
--proj-guid=DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74 \
@@ -171,7 +246,7 @@
PROJECTS-$(BUILD_LIBVPX) += vpx.vcproj
vpx.vcproj: vpx_config.asm
-vpx.vcproj: vpx_rtcd.h
+vpx.vcproj: $(RTCD)
endif
else
@@ -180,17 +255,29 @@
LIBS-$(if $(BUILD_LIBVPX),$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
$(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)
+
BUILD_LIBVPX_SO := $(if $(BUILD_LIBVPX),$(CONFIG_SHARED))
+
+ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS))
+LIBVPX_SO := libvpx.$(VERSION_MAJOR).dylib
+EXPORT_FILE := libvpx.syms
+LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, \
+ libvpx.dylib )
+else
LIBVPX_SO := libvpx.so.$(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_PATCH)
-LIBS-$(BUILD_LIBVPX_SO) += $(BUILD_PFX)$(LIBVPX_SO)\
- $(notdir $(LIBVPX_SO_SYMLINKS))
-$(BUILD_PFX)$(LIBVPX_SO): $(LIBVPX_OBJS) libvpx.ver
-$(BUILD_PFX)$(LIBVPX_SO): extralibs += -lm
-$(BUILD_PFX)$(LIBVPX_SO): SONAME = libvpx.so.$(VERSION_MAJOR)
-$(BUILD_PFX)$(LIBVPX_SO): SO_VERSION_SCRIPT = libvpx.ver
+EXPORT_FILE := libvpx.ver
+SYM_LINK := libvpx.so
LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, \
libvpx.so libvpx.so.$(VERSION_MAJOR) \
libvpx.so.$(VERSION_MAJOR).$(VERSION_MINOR))
+endif
+
+LIBS-$(BUILD_LIBVPX_SO) += $(BUILD_PFX)$(LIBVPX_SO)\
+ $(notdir $(LIBVPX_SO_SYMLINKS))
+$(BUILD_PFX)$(LIBVPX_SO): $(LIBVPX_OBJS) $(EXPORT_FILE)
+$(BUILD_PFX)$(LIBVPX_SO): extralibs += -lm
+$(BUILD_PFX)$(LIBVPX_SO): SONAME = libvpx.so.$(VERSION_MAJOR)
+$(BUILD_PFX)$(LIBVPX_SO): EXPORTS_FILE = $(EXPORT_FILE)
libvpx.ver: $(call enabled,CODEC_EXPORTS)
@echo " [CREATE] $@"
@@ -199,10 +286,16 @@
$(qexec)echo "local: *; };" >> $@
CLEAN-OBJS += libvpx.ver
+libvpx.syms: $(call enabled,CODEC_EXPORTS)
+ @echo " [CREATE] $@"
+ $(qexec)awk '{print "_"$$2}' $^ >$@
+CLEAN-OBJS += libvpx.syms
+
define libvpx_symlink_template
$(1): $(2)
- @echo " [LN] $$@"
- $(qexec)ln -sf $(LIBVPX_SO) $$@
+ @echo " [LN] $(2) $$@"
+ $(qexec)mkdir -p $$(dir $$@)
+ $(qexec)ln -sf $(2) $$@
endef
$(eval $(call libvpx_symlink_template,\
@@ -210,10 +303,12 @@
$(BUILD_PFX)$(LIBVPX_SO)))
$(eval $(call libvpx_symlink_template,\
$(addprefix $(DIST_DIR)/,$(LIBVPX_SO_SYMLINKS)),\
- $(DIST_DIR)/$(LIBSUBDIR)/$(LIBVPX_SO)))
+ $(LIBVPX_SO)))
-INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBVPX_SO_SYMLINKS)
-INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBSUBDIR)/$(LIBVPX_SO)
+
+INSTALL-LIBS-$(BUILD_LIBVPX_SO) += $(LIBVPX_SO_SYMLINKS)
+INSTALL-LIBS-$(BUILD_LIBVPX_SO) += $(LIBSUBDIR)/$(LIBVPX_SO)
+
LIBS-$(BUILD_LIBVPX) += vpx.pc
vpx.pc: config.mk libs.mk
@@ -229,7 +324,7 @@
$(qexec)echo 'Version: $(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_PATCH)' >> $@
$(qexec)echo 'Requires:' >> $@
$(qexec)echo 'Conflicts:' >> $@
- $(qexec)echo 'Libs: -L$${libdir} -lvpx' >> $@
+ $(qexec)echo 'Libs: -L$${libdir} -lvpx -lm' >> $@
$(qexec)echo 'Libs.private: -lm -lpthread' >> $@
$(qexec)echo 'Cflags: -I$${includedir}' >> $@
INSTALL-LIBS-yes += $(LIBSUBDIR)/pkgconfig/vpx.pc
@@ -265,71 +360,10 @@
$(filter %.s.o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
-#
-# Calculate platform- and compiler-specific offsets for hand coded assembly
-#
-
-OFFSET_PATTERN:='^[a-zA-Z0-9_]* EQU'
-
-ifeq ($(filter icc gcc,$(TGT_CC)), $(TGT_CC))
- $(BUILD_PFX)asm_com_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S
- @echo " [CREATE] $@"
- $(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@
- $(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S: $(VP8_PREFIX)common/asm_com_offsets.c
- CLEAN-OBJS += $(BUILD_PFX)asm_com_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S
-
- $(BUILD_PFX)asm_enc_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S
- @echo " [CREATE] $@"
- $(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@
- $(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S: $(VP8_PREFIX)encoder/asm_enc_offsets.c
- CLEAN-OBJS += $(BUILD_PFX)asm_enc_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S
-
- $(BUILD_PFX)asm_dec_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S
- @echo " [CREATE] $@"
- $(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@
- $(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S: $(VP8_PREFIX)decoder/asm_dec_offsets.c
- CLEAN-OBJS += $(BUILD_PFX)asm_dec_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S
-else
- ifeq ($(filter rvct,$(TGT_CC)), $(TGT_CC))
- asm_com_offsets.asm: obj_int_extract
- asm_com_offsets.asm: $(VP8_PREFIX)common/asm_com_offsets.c.o
- @echo " [CREATE] $@"
- $(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@
- OBJS-yes += $(VP8_PREFIX)common/asm_com_offsets.c.o
- CLEAN-OBJS += asm_com_offsets.asm
- $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_com_offsets.asm
-
- asm_enc_offsets.asm: obj_int_extract
- asm_enc_offsets.asm: $(VP8_PREFIX)encoder/asm_enc_offsets.c.o
- @echo " [CREATE] $@"
- $(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@
- OBJS-yes += $(VP8_PREFIX)encoder/asm_enc_offsets.c.o
- CLEAN-OBJS += asm_enc_offsets.asm
- $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_enc_offsets.asm
-
- asm_dec_offsets.asm: obj_int_extract
- asm_dec_offsets.asm: $(VP8_PREFIX)decoder/asm_dec_offsets.c.o
- @echo " [CREATE] $@"
- $(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@
- OBJS-yes += $(VP8_PREFIX)decoder/asm_dec_offsets.c.o
- CLEAN-OBJS += asm_dec_offsets.asm
- $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_dec_offsets.asm
- endif
-endif
$(shell $(SRC_PATH_BARE)/build/make/version.sh "$(SRC_PATH_BARE)" $(BUILD_PFX)vpx_version.h)
CLEAN-OBJS += $(BUILD_PFX)vpx_version.h
-#
-# Rule to generate runtime cpu detection files
-#
-$(BUILD_PFX)vpx_rtcd.h: $(SRC_PATH_BARE)/$(sort $(filter %rtcd_defs.sh,$(CODEC_SRCS)))
- @echo " [CREATE] $@"
- $(qexec)$(SRC_PATH_BARE)/build/make/rtcd.sh --arch=$(TGT_ISA) \
- --sym=vpx_rtcd \
- --config=$(target)$(if $(FAT_ARCHS),,-$(TOOLCHAIN)).mk \
- $(RTCD_OPTIONS) $^ > $@
-CLEAN-OBJS += $(BUILD_PFX)vpx_rtcd.h
##
## libvpx test directives
@@ -375,6 +409,7 @@
--proj-guid=EC00E1EC-AF68-4D92-A255-181690D1C9B1 \
--ver=$(CONFIG_VS_VERSION) \
--src-path-bare="$(SRC_PATH_BARE)" \
+ -D_VARIADIC_MAX=10 \
--out=gtest.vcproj $(SRC_PATH_BARE)/third_party/googletest/src/src/gtest-all.cc \
-I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" -I"$(SRC_PATH_BARE)/third_party/googletest/src"
@@ -386,6 +421,7 @@
--exe \
--target=$(TOOLCHAIN) \
--name=test_libvpx \
+ -D_VARIADIC_MAX=10 \
--proj-guid=CD837F5F-52D8-4314-A370-895D614166A7 \
--ver=$(CONFIG_VS_VERSION) \
$(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
@@ -450,5 +486,8 @@
@echo "INCLUDE_PATH += ." >> $@;
@echo "ENABLED_SECTIONS += $(sort $(CODEC_DOC_SECTIONS))" >> $@
-## Generate vpx_rtcd.h for all objects
-$(OBJS-yes:.o=.d): $(BUILD_PFX)vpx_rtcd.h
+## Generate rtcd.h for all objects
+$(OBJS-yes:.o=.d): $(RTCD)
+
+## Update the global src list
+SRCS += $(CODEC_SRCS) $(LIBVPX_TEST_SRCS) $(GTEST_SRCS)
diff --git a/md5_utils.c b/md5_utils.c
index 9a584fa..8fb26e2 100644
--- a/md5_utils.c
+++ b/md5_utils.c
@@ -25,25 +25,22 @@
#include "md5_utils.h"
void
-byteSwap(UWORD32 *buf, unsigned words)
-{
- md5byte *p;
+byteSwap(UWORD32 *buf, unsigned words) {
+ md5byte *p;
- /* Only swap bytes for big endian machines */
- int i = 1;
+ /* Only swap bytes for big endian machines */
+ int i = 1;
- if (*(char *)&i == 1)
- return;
+ if (*(char *)&i == 1)
+ return;
- p = (md5byte *)buf;
+ p = (md5byte *)buf;
- do
- {
- *buf++ = (UWORD32)((unsigned)p[3] << 8 | p[2]) << 16 |
- ((unsigned)p[1] << 8 | p[0]);
- p += 4;
- }
- while (--words);
+ do {
+ *buf++ = (UWORD32)((unsigned)p[3] << 8 | p[2]) << 16 |
+ ((unsigned)p[1] << 8 | p[0]);
+ p += 4;
+ } while (--words);
}
/*
@@ -51,15 +48,14 @@
* initialization constants.
*/
void
-MD5Init(struct MD5Context *ctx)
-{
- ctx->buf[0] = 0x67452301;
- ctx->buf[1] = 0xefcdab89;
- ctx->buf[2] = 0x98badcfe;
- ctx->buf[3] = 0x10325476;
+MD5Init(struct MD5Context *ctx) {
+ ctx->buf[0] = 0x67452301;
+ ctx->buf[1] = 0xefcdab89;
+ ctx->buf[2] = 0x98badcfe;
+ ctx->buf[3] = 0x10325476;
- ctx->bytes[0] = 0;
- ctx->bytes[1] = 0;
+ ctx->bytes[0] = 0;
+ ctx->bytes[1] = 0;
}
/*
@@ -67,44 +63,41 @@
* of bytes.
*/
void
-MD5Update(struct MD5Context *ctx, md5byte const *buf, unsigned len)
-{
- UWORD32 t;
+MD5Update(struct MD5Context *ctx, md5byte const *buf, unsigned len) {
+ UWORD32 t;
- /* Update byte count */
+ /* Update byte count */
- t = ctx->bytes[0];
+ t = ctx->bytes[0];
- if ((ctx->bytes[0] = t + len) < t)
- ctx->bytes[1]++; /* Carry from low to high */
+ if ((ctx->bytes[0] = t + len) < t)
+ ctx->bytes[1]++; /* Carry from low to high */
- t = 64 - (t & 0x3f); /* Space available in ctx->in (at least 1) */
+ t = 64 - (t & 0x3f); /* Space available in ctx->in (at least 1) */
- if (t > len)
- {
- memcpy((md5byte *)ctx->in + 64 - t, buf, len);
- return;
- }
+ if (t > len) {
+ memcpy((md5byte *)ctx->in + 64 - t, buf, len);
+ return;
+ }
- /* First chunk is an odd size */
- memcpy((md5byte *)ctx->in + 64 - t, buf, t);
+ /* First chunk is an odd size */
+ memcpy((md5byte *)ctx->in + 64 - t, buf, t);
+ byteSwap(ctx->in, 16);
+ MD5Transform(ctx->buf, ctx->in);
+ buf += t;
+ len -= t;
+
+ /* Process data in 64-byte chunks */
+ while (len >= 64) {
+ memcpy(ctx->in, buf, 64);
byteSwap(ctx->in, 16);
MD5Transform(ctx->buf, ctx->in);
- buf += t;
- len -= t;
+ buf += 64;
+ len -= 64;
+ }
- /* Process data in 64-byte chunks */
- while (len >= 64)
- {
- memcpy(ctx->in, buf, 64);
- byteSwap(ctx->in, 16);
- MD5Transform(ctx->buf, ctx->in);
- buf += 64;
- len -= 64;
- }
-
- /* Handle any remaining bytes of data. */
- memcpy(ctx->in, buf, len);
+ /* Handle any remaining bytes of data. */
+ memcpy(ctx->in, buf, len);
}
/*
@@ -112,37 +105,35 @@
* 1 0* (64-bit count of bits processed, MSB-first)
*/
void
-MD5Final(md5byte digest[16], struct MD5Context *ctx)
-{
- int count = ctx->bytes[0] & 0x3f; /* Number of bytes in ctx->in */
- md5byte *p = (md5byte *)ctx->in + count;
+MD5Final(md5byte digest[16], struct MD5Context *ctx) {
+ int count = ctx->bytes[0] & 0x3f; /* Number of bytes in ctx->in */
+ md5byte *p = (md5byte *)ctx->in + count;
- /* Set the first char of padding to 0x80. There is always room. */
- *p++ = 0x80;
+ /* Set the first char of padding to 0x80. There is always room. */
+ *p++ = 0x80;
- /* Bytes of padding needed to make 56 bytes (-8..55) */
- count = 56 - 1 - count;
+ /* Bytes of padding needed to make 56 bytes (-8..55) */
+ count = 56 - 1 - count;
- if (count < 0) /* Padding forces an extra block */
- {
- memset(p, 0, count + 8);
- byteSwap(ctx->in, 16);
- MD5Transform(ctx->buf, ctx->in);
- p = (md5byte *)ctx->in;
- count = 56;
- }
-
- memset(p, 0, count);
- byteSwap(ctx->in, 14);
-
- /* Append length in bits and transform */
- ctx->in[14] = ctx->bytes[0] << 3;
- ctx->in[15] = ctx->bytes[1] << 3 | ctx->bytes[0] >> 29;
+ if (count < 0) { /* Padding forces an extra block */
+ memset(p, 0, count + 8);
+ byteSwap(ctx->in, 16);
MD5Transform(ctx->buf, ctx->in);
+ p = (md5byte *)ctx->in;
+ count = 56;
+ }
- byteSwap(ctx->buf, 4);
- memcpy(digest, ctx->buf, 16);
- memset(ctx, 0, sizeof(*ctx)); /* In case it's sensitive */
+ memset(p, 0, count);
+ byteSwap(ctx->in, 14);
+
+ /* Append length in bits and transform */
+ ctx->in[14] = ctx->bytes[0] << 3;
+ ctx->in[15] = ctx->bytes[1] << 3 | ctx->bytes[0] >> 29;
+ MD5Transform(ctx->buf, ctx->in);
+
+ byteSwap(ctx->buf, 4);
+ memcpy(digest, ctx->buf, 16);
+ memset(ctx, 0, sizeof(*ctx)); /* In case it's sensitive */
}
#ifndef ASM_MD5
@@ -157,7 +148,7 @@
/* This is the central step in the MD5 algorithm. */
#define MD5STEP(f,w,x,y,z,in,s) \
- (w += f(x,y,z) + in, w = (w<<s | w>>(32-s)) + x)
+ (w += f(x,y,z) + in, w = (w<<s | w>>(32-s)) + x)
/*
* The core of the MD5 algorithm, this alters an existing MD5 hash to
@@ -165,87 +156,86 @@
* the data and converts bytes into longwords for this routine.
*/
void
-MD5Transform(UWORD32 buf[4], UWORD32 const in[16])
-{
- register UWORD32 a, b, c, d;
+MD5Transform(UWORD32 buf[4], UWORD32 const in[16]) {
+ register UWORD32 a, b, c, d;
- a = buf[0];
- b = buf[1];
- c = buf[2];
- d = buf[3];
+ a = buf[0];
+ b = buf[1];
+ c = buf[2];
+ d = buf[3];
- MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
- MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
- MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
- MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
- MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
- MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
- MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
- MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
- MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
- MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
- MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
- MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
- MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
- MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
- MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
- MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
+ MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
+ MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
+ MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
+ MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
+ MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
+ MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
+ MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
+ MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
+ MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
+ MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
+ MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
+ MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
+ MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
+ MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
+ MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
+ MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
- MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
- MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
- MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
- MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
- MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
- MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
- MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
- MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
- MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
- MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
- MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
- MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
- MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
- MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
- MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
- MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
+ MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
+ MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
+ MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
+ MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
+ MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
+ MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
+ MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
+ MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
+ MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
+ MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
+ MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
+ MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
+ MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
+ MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
+ MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
+ MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
- MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
- MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
- MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
- MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
- MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
- MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
- MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
- MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
- MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
- MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
- MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
- MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
- MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
- MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
- MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
- MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
+ MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
+ MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
+ MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
+ MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
+ MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
+ MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
+ MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
+ MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
+ MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
+ MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
+ MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
+ MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
+ MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
+ MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
+ MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
+ MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
- MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
- MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
- MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
- MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
- MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
- MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
- MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
- MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
- MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
- MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
- MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
- MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
- MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
- MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
- MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
- MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
+ MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
+ MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
+ MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
+ MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
+ MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
+ MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
+ MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
+ MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
+ MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
+ MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
+ MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
+ MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
+ MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
+ MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
+ MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
+ MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
- buf[0] += a;
- buf[1] += b;
- buf[2] += c;
- buf[3] += d;
+ buf[0] += a;
+ buf[1] += b;
+ buf[2] += c;
+ buf[3] += d;
}
#endif
diff --git a/md5_utils.h b/md5_utils.h
index 5ca1b5f..81792c4 100644
--- a/md5_utils.h
+++ b/md5_utils.h
@@ -27,11 +27,10 @@
#define UWORD32 unsigned int
typedef struct MD5Context MD5Context;
-struct MD5Context
-{
- UWORD32 buf[4];
- UWORD32 bytes[2];
- UWORD32 in[16];
+struct MD5Context {
+ UWORD32 buf[4];
+ UWORD32 bytes[2];
+ UWORD32 in[16];
};
void MD5Init(struct MD5Context *context);
diff --git a/nestegg/include/nestegg/nestegg.h b/nestegg/include/nestegg/nestegg.h
index 7447d14..6510694 100644
--- a/nestegg/include/nestegg/nestegg.h
+++ b/nestegg/include/nestegg/nestegg.h
@@ -67,6 +67,7 @@
#define NESTEGG_CODEC_VP8 0 /**< Track uses Google On2 VP8 codec. */
#define NESTEGG_CODEC_VORBIS 1 /**< Track uses Xiph Vorbis codec. */
+#define NESTEGG_CODEC_VP9 2 /**< Track uses Google On2 VP9 codec. */
#define NESTEGG_SEEK_SET 0 /**< Seek offset relative to beginning of stream. */
#define NESTEGG_SEEK_CUR 1 /**< Seek offset relative to current position in stream. */
diff --git a/nestegg/src/nestegg.c b/nestegg/src/nestegg.c
index cc87788..ae87e8f 100644
--- a/nestegg/src/nestegg.c
+++ b/nestegg/src/nestegg.c
@@ -127,6 +127,7 @@
/* Track IDs */
#define TRACK_ID_VP8 "V_VP8"
+#define TRACK_ID_VP9 "V_VP9"
#define TRACK_ID_VORBIS "A_VORBIS"
enum vint_mask {
@@ -1669,6 +1670,9 @@
if (strcmp(codec_id, TRACK_ID_VP8) == 0)
return NESTEGG_CODEC_VP8;
+ if (strcmp(codec_id, TRACK_ID_VP9) == 0)
+ return NESTEGG_CODEC_VP9;
+
if (strcmp(codec_id, TRACK_ID_VORBIS) == 0)
return NESTEGG_CODEC_VORBIS;
diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc
new file mode 100644
index 0000000..64bf0bb
--- /dev/null
+++ b/test/dct16x16_test.cc
@@ -0,0 +1,356 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+extern "C" {
+#include "vp9/common/entropy.h"
+#include "vp9_rtcd.h"
+}
+
+#include "acm_random.h"
+#include "vpx/vpx_integer.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+
+const double PI = 3.1415926535898;
+void reference2_16x16_idct_2d(double *input, double *output) {
+ double x;
+ for (int l = 0; l < 16; ++l) {
+ for (int k = 0; k < 16; ++k) {
+ double s = 0;
+ for (int i = 0; i < 16; ++i) {
+ for (int j = 0; j < 16; ++j) {
+ x=cos(PI*j*(l+0.5)/16.0)*cos(PI*i*(k+0.5)/16.0)*input[i*16+j]/256;
+ if (i != 0)
+ x *= sqrt(2.0);
+ if (j != 0)
+ x *= sqrt(2.0);
+ s += x;
+ }
+ }
+ output[k*16+l] = s;
+ }
+ }
+}
+
+
+static const double C1 = 0.995184726672197;
+static const double C2 = 0.98078528040323;
+static const double C3 = 0.956940335732209;
+static const double C4 = 0.923879532511287;
+static const double C5 = 0.881921264348355;
+static const double C6 = 0.831469612302545;
+static const double C7 = 0.773010453362737;
+static const double C8 = 0.707106781186548;
+static const double C9 = 0.634393284163646;
+static const double C10 = 0.555570233019602;
+static const double C11 = 0.471396736825998;
+static const double C12 = 0.38268343236509;
+static const double C13 = 0.290284677254462;
+static const double C14 = 0.195090322016128;
+static const double C15 = 0.098017140329561;
+
+static void butterfly_16x16_dct_1d(double input[16], double output[16]) {
+ double step[16];
+ double intermediate[16];
+ double temp1, temp2;
+
+ // step 1
+ step[ 0] = input[0] + input[15];
+ step[ 1] = input[1] + input[14];
+ step[ 2] = input[2] + input[13];
+ step[ 3] = input[3] + input[12];
+ step[ 4] = input[4] + input[11];
+ step[ 5] = input[5] + input[10];
+ step[ 6] = input[6] + input[ 9];
+ step[ 7] = input[7] + input[ 8];
+ step[ 8] = input[7] - input[ 8];
+ step[ 9] = input[6] - input[ 9];
+ step[10] = input[5] - input[10];
+ step[11] = input[4] - input[11];
+ step[12] = input[3] - input[12];
+ step[13] = input[2] - input[13];
+ step[14] = input[1] - input[14];
+ step[15] = input[0] - input[15];
+
+ // step 2
+ output[0] = step[0] + step[7];
+ output[1] = step[1] + step[6];
+ output[2] = step[2] + step[5];
+ output[3] = step[3] + step[4];
+ output[4] = step[3] - step[4];
+ output[5] = step[2] - step[5];
+ output[6] = step[1] - step[6];
+ output[7] = step[0] - step[7];
+
+ temp1 = step[ 8]*C7;
+ temp2 = step[15]*C9;
+ output[ 8] = temp1 + temp2;
+
+ temp1 = step[ 9]*C11;
+ temp2 = step[14]*C5;
+ output[ 9] = temp1 - temp2;
+
+ temp1 = step[10]*C3;
+ temp2 = step[13]*C13;
+ output[10] = temp1 + temp2;
+
+ temp1 = step[11]*C15;
+ temp2 = step[12]*C1;
+ output[11] = temp1 - temp2;
+
+ temp1 = step[11]*C1;
+ temp2 = step[12]*C15;
+ output[12] = temp2 + temp1;
+
+ temp1 = step[10]*C13;
+ temp2 = step[13]*C3;
+ output[13] = temp2 - temp1;
+
+ temp1 = step[ 9]*C5;
+ temp2 = step[14]*C11;
+ output[14] = temp2 + temp1;
+
+ temp1 = step[ 8]*C9;
+ temp2 = step[15]*C7;
+ output[15] = temp2 - temp1;
+
+ // step 3
+ step[ 0] = output[0] + output[3];
+ step[ 1] = output[1] + output[2];
+ step[ 2] = output[1] - output[2];
+ step[ 3] = output[0] - output[3];
+
+ temp1 = output[4]*C14;
+ temp2 = output[7]*C2;
+ step[ 4] = temp1 + temp2;
+
+ temp1 = output[5]*C10;
+ temp2 = output[6]*C6;
+ step[ 5] = temp1 + temp2;
+
+ temp1 = output[5]*C6;
+ temp2 = output[6]*C10;
+ step[ 6] = temp2 - temp1;
+
+ temp1 = output[4]*C2;
+ temp2 = output[7]*C14;
+ step[ 7] = temp2 - temp1;
+
+ step[ 8] = output[ 8] + output[11];
+ step[ 9] = output[ 9] + output[10];
+ step[10] = output[ 9] - output[10];
+ step[11] = output[ 8] - output[11];
+
+ step[12] = output[12] + output[15];
+ step[13] = output[13] + output[14];
+ step[14] = output[13] - output[14];
+ step[15] = output[12] - output[15];
+
+ // step 4
+ output[ 0] = (step[ 0] + step[ 1]);
+ output[ 8] = (step[ 0] - step[ 1]);
+
+ temp1 = step[2]*C12;
+ temp2 = step[3]*C4;
+ temp1 = temp1 + temp2;
+ output[ 4] = 2*(temp1*C8);
+
+ temp1 = step[2]*C4;
+ temp2 = step[3]*C12;
+ temp1 = temp2 - temp1;
+ output[12] = 2*(temp1*C8);
+
+ output[ 2] = 2*((step[4] + step[ 5])*C8);
+ output[14] = 2*((step[7] - step[ 6])*C8);
+
+ temp1 = step[4] - step[5];
+ temp2 = step[6] + step[7];
+ output[ 6] = (temp1 + temp2);
+ output[10] = (temp1 - temp2);
+
+ intermediate[8] = step[8] + step[14];
+ intermediate[9] = step[9] + step[15];
+
+ temp1 = intermediate[8]*C12;
+ temp2 = intermediate[9]*C4;
+ temp1 = temp1 - temp2;
+ output[3] = 2*(temp1*C8);
+
+ temp1 = intermediate[8]*C4;
+ temp2 = intermediate[9]*C12;
+ temp1 = temp2 + temp1;
+ output[13] = 2*(temp1*C8);
+
+ output[ 9] = 2*((step[10] + step[11])*C8);
+
+ intermediate[11] = step[10] - step[11];
+ intermediate[12] = step[12] + step[13];
+ intermediate[13] = step[12] - step[13];
+ intermediate[14] = step[ 8] - step[14];
+ intermediate[15] = step[ 9] - step[15];
+
+ output[15] = (intermediate[11] + intermediate[12]);
+ output[ 1] = -(intermediate[11] - intermediate[12]);
+
+ output[ 7] = 2*(intermediate[13]*C8);
+
+ temp1 = intermediate[14]*C12;
+ temp2 = intermediate[15]*C4;
+ temp1 = temp1 - temp2;
+ output[11] = -2*(temp1*C8);
+
+ temp1 = intermediate[14]*C4;
+ temp2 = intermediate[15]*C12;
+ temp1 = temp2 + temp1;
+ output[ 5] = 2*(temp1*C8);
+}
+
+static void reference_16x16_dct_1d(double in[16], double out[16]) {
+ const double kPi = 3.141592653589793238462643383279502884;
+ const double kInvSqrt2 = 0.707106781186547524400844362104;
+ for (int k = 0; k < 16; k++) {
+ out[k] = 0.0;
+ for (int n = 0; n < 16; n++)
+ out[k] += in[n]*cos(kPi*(2*n+1)*k/32.0);
+ if (k == 0)
+ out[k] = out[k]*kInvSqrt2;
+ }
+}
+
+void reference_16x16_dct_2d(int16_t input[16*16], double output[16*16]) {
+ // First transform columns
+ for (int i = 0; i < 16; ++i) {
+ double temp_in[16], temp_out[16];
+ for (int j = 0; j < 16; ++j)
+ temp_in[j] = input[j*16 + i];
+ butterfly_16x16_dct_1d(temp_in, temp_out);
+ for (int j = 0; j < 16; ++j)
+ output[j*16 + i] = temp_out[j];
+ }
+ // Then transform rows
+ for (int i = 0; i < 16; ++i) {
+ double temp_in[16], temp_out[16];
+ for (int j = 0; j < 16; ++j)
+ temp_in[j] = output[j + i*16];
+ butterfly_16x16_dct_1d(temp_in, temp_out);
+ // Scale by some magic number
+ for (int j = 0; j < 16; ++j)
+ output[j + i*16] = temp_out[j]/2;
+ }
+}
+
+
+TEST(VP9Idct16x16Test, AccuracyCheck) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ const int count_test_block = 1000;
+ for (int i = 0; i < count_test_block; ++i) {
+ int16_t in[256], coeff[256];
+ int16_t out_c[256];
+ double out_r[256];
+
+ // Initialize a test block with input range [-255, 255].
+ for (int j = 0; j < 256; ++j)
+ in[j] = rnd.Rand8() - rnd.Rand8();
+
+ reference_16x16_dct_2d(in, out_r);
+ for (int j = 0; j < 256; j++)
+ coeff[j] = round(out_r[j]);
+ vp9_short_idct16x16_c(coeff, out_c, 32);
+ for (int j = 0; j < 256; ++j) {
+ const int diff = out_c[j] - in[j];
+ const int error = diff * diff;
+ EXPECT_GE(1, error)
+ << "Error: 16x16 IDCT has error " << error
+ << " at index " << j;
+ }
+
+ vp9_short_fdct16x16_c(in, out_c, 32);
+ for (int j = 0; j < 256; ++j) {
+ const double diff = coeff[j] - out_c[j];
+ const double error = diff * diff;
+ EXPECT_GE(1.0, error)
+ << "Error: 16x16 FDCT has error " << error
+ << " at index " << j;
+ }
+ }
+}
+
+TEST(VP9Fdct16x16Test, AccuracyCheck) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ int max_error = 0;
+ double total_error = 0;
+ const int count_test_block = 1000;
+ for (int i = 0; i < count_test_block; ++i) {
+ int16_t test_input_block[256];
+ int16_t test_temp_block[256];
+ int16_t test_output_block[256];
+
+ // Initialize a test block with input range [-255, 255].
+ for (int j = 0; j < 256; ++j)
+ test_input_block[j] = rnd.Rand8() - rnd.Rand8();
+
+ const int pitch = 32;
+ vp9_short_fdct16x16_c(test_input_block, test_temp_block, pitch);
+ vp9_short_idct16x16_c(test_temp_block, test_output_block, pitch);
+
+ for (int j = 0; j < 256; ++j) {
+ const int diff = test_input_block[j] - test_output_block[j];
+ const int error = diff * diff;
+ if (max_error < error)
+ max_error = error;
+ total_error += error;
+ }
+ }
+
+ EXPECT_GE(1, max_error)
+ << "Error: 16x16 FDCT/IDCT has an individual roundtrip error > 1";
+
+ EXPECT_GE(count_test_block/10, total_error)
+ << "Error: 16x16 FDCT/IDCT has average roundtrip error > 1/10 per block";
+}
+
+TEST(VP9Fdct16x16Test, CoeffSizeCheck) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ const int count_test_block = 1000;
+ for (int i = 0; i < count_test_block; ++i) {
+ int16_t input_block[256], input_extreme_block[256];
+ int16_t output_block[256], output_extreme_block[256];
+
+ // Initialize a test block with input range [-255, 255].
+ for (int j = 0; j < 256; ++j) {
+ input_block[j] = rnd.Rand8() - rnd.Rand8();
+ input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
+ }
+ if (i == 0)
+ for (int j = 0; j < 256; ++j)
+ input_extreme_block[j] = 255;
+
+ const int pitch = 32;
+ vp9_short_fdct16x16_c(input_block, output_block, pitch);
+ vp9_short_fdct16x16_c(input_extreme_block, output_extreme_block, pitch);
+
+ // The minimum quant value is 4.
+ for (int j = 0; j < 256; ++j) {
+ EXPECT_GE(4*DCT_MAX_VALUE, abs(output_block[j]))
+ << "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
+ EXPECT_GE(4*DCT_MAX_VALUE, abs(output_extreme_block[j]))
+ << "Error: 16x16 FDCT extreme has coefficient larger than 4*DCT_MAX_VALUE";
+ }
+ }
+}
+} // namespace
diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc
index 619b23d..ebec890 100644
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -1,79 +1,31 @@
/*
-* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-*
-* Use of this source code is governed by a BSD-style license
-* that can be found in the LICENSE file in the root of the source
-* tree. An additional intellectual property rights grant can be found
-* in the file PATENTS. All contributing project authors may
-* be found in the AUTHORS file in the root of the source tree.
-*/
-
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
#include <math.h>
-#include <stddef.h>
-#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <sys/types.h>
+#include "third_party/googletest/src/include/gtest/gtest.h"
extern "C" {
-#include "vpx_rtcd.h"
+#include "vp9_rtcd.h"
}
-#include "test/acm_random.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "acm_random.h"
#include "vpx/vpx_integer.h"
-
-namespace {
-
-const int cospi8sqrt2minus1 = 20091;
-const int sinpi8sqrt2 = 35468;
-
-void reference_idct4x4(const int16_t *input, int16_t *output) {
- const int16_t *ip = input;
- int16_t *op = output;
-
- for (int i = 0; i < 4; ++i) {
- const int a1 = ip[0] + ip[8];
- const int b1 = ip[0] - ip[8];
- const int temp1 = (ip[4] * sinpi8sqrt2) >> 16;
- const int temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
- const int c1 = temp1 - temp2;
- const int temp3 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
- const int temp4 = (ip[12] * sinpi8sqrt2) >> 16;
- const int d1 = temp3 + temp4;
- op[0] = a1 + d1;
- op[12] = a1 - d1;
- op[4] = b1 + c1;
- op[8] = b1 - c1;
- ++ip;
- ++op;
- }
- ip = output;
- op = output;
- for (int i = 0; i < 4; ++i) {
- const int a1 = ip[0] + ip[2];
- const int b1 = ip[0] - ip[2];
- const int temp1 = (ip[1] * sinpi8sqrt2) >> 16;
- const int temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
- const int c1 = temp1 - temp2;
- const int temp3 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
- const int temp4 = (ip[3] * sinpi8sqrt2) >> 16;
- const int d1 = temp3 + temp4;
- op[0] = (a1 + d1 + 4) >> 3;
- op[3] = (a1 - d1 + 4) >> 3;
- op[1] = (b1 + c1 + 4) >> 3;
- op[2] = (b1 - c1 + 4) >> 3;
- ip += 4;
- op += 4;
- }
-}
-
using libvpx_test::ACMRandom;
-TEST(Vp8FdctTest, SignBiasCheck) {
+namespace {
+
+TEST(Vp9FdctTest, SignBiasCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
int16_t test_input_block[16];
int16_t test_output_block[16];
@@ -88,7 +40,9 @@
for (int j = 0; j < 16; ++j)
test_input_block[j] = rnd.Rand8() - rnd.Rand8();
- vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch);
+ // TODO(Yaowu): this should be converted to a parameterized test
+ // to test optimized versions of this function.
+ vp9_short_fdct4x4_c(test_input_block, test_output_block, pitch);
for (int j = 0; j < 16; ++j) {
if (test_output_block[j] < 0)
@@ -98,13 +52,13 @@
}
}
- bool bias_acceptable = true;
- for (int j = 0; j < 16; ++j)
- bias_acceptable = bias_acceptable &&
- (abs(count_sign_block[j][0] - count_sign_block[j][1]) < 10000);
-
- EXPECT_EQ(true, bias_acceptable)
- << "Error: 4x4 FDCT has a sign bias > 1% for input range [-255, 255]";
+ for (int j = 0; j < 16; ++j) {
+ const bool bias_acceptable = (abs(count_sign_block[j][0] -
+ count_sign_block[j][1]) < 10000);
+ EXPECT_TRUE(bias_acceptable)
+ << "Error: 4x4 FDCT has a sign bias > 1%"
+ << " for input range [-255, 255] at index " << j;
+ }
memset(count_sign_block, 0, sizeof(count_sign_block));
@@ -113,7 +67,9 @@
for (int j = 0; j < 16; ++j)
test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
- vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch);
+ // TODO(Yaowu): this should be converted to a parameterized test
+ // to test optimized versions of this function.
+ vp9_short_fdct4x4_c(test_input_block, test_output_block, pitch);
for (int j = 0; j < 16; ++j) {
if (test_output_block[j] < 0)
@@ -123,16 +79,16 @@
}
}
- bias_acceptable = true;
- for (int j = 0; j < 16; ++j)
- bias_acceptable = bias_acceptable &&
- (abs(count_sign_block[j][0] - count_sign_block[j][1]) < 100000);
-
- EXPECT_EQ(true, bias_acceptable)
- << "Error: 4x4 FDCT has a sign bias > 10% for input range [-15, 15]";
+ for (int j = 0; j < 16; ++j) {
+ const bool bias_acceptable = (abs(count_sign_block[j][0] -
+ count_sign_block[j][1]) < 100000);
+ EXPECT_TRUE(bias_acceptable)
+ << "Error: 4x4 FDCT has a sign bias > 10%"
+ << " for input range [-15, 15] at index " << j;
+ }
};
-TEST(Vp8FdctTest, RoundTripErrorCheck) {
+TEST(Vp9FdctTest, RoundTripErrorCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
int max_error = 0;
double total_error = 0;
@@ -146,9 +102,25 @@
for (int j = 0; j < 16; ++j)
test_input_block[j] = rnd.Rand8() - rnd.Rand8();
+ // TODO(Yaowu): this should be converted to a parameterized test
+ // to test optimized versions of this function.
const int pitch = 8;
- vp8_short_fdct4x4_c(test_input_block, test_temp_block, pitch);
- reference_idct4x4(test_temp_block, test_output_block);
+ vp9_short_fdct4x4_c(test_input_block, test_temp_block, pitch);
+
+ for (int j = 0; j < 16; ++j) {
+ if(test_temp_block[j] > 0) {
+ test_temp_block[j] += 2;
+ test_temp_block[j] /= 4;
+ test_temp_block[j] *= 4;
+ } else {
+ test_temp_block[j] -= 2;
+ test_temp_block[j] /= 4;
+ test_temp_block[j] *= 4;
+ }
+ }
+
+ // Because the bitstream is not frozen yet, use the idct in the codebase.
+ vp9_short_idct4x4llm_c(test_temp_block, test_output_block, pitch);
for (int j = 0; j < 16; ++j) {
const int diff = test_input_block[j] - test_output_block[j];
@@ -158,12 +130,11 @@
total_error += error;
}
}
-
- EXPECT_GE(1, max_error )
- << "Error: FDCT/IDCT has an individual roundtrip error > 1";
+ EXPECT_GE(1, max_error)
+ << "Error: FDCT/IDCT has an individual roundtrip error > 1";
EXPECT_GE(count_test_block, total_error)
- << "Error: FDCT/IDCT has average roundtrip error > 1 per block";
+ << "Error: FDCT/IDCT has average roundtrip error > 1 per block";
};
} // namespace
diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc
new file mode 100644
index 0000000..d82f7c3
--- /dev/null
+++ b/test/fdct8x8_test.cc
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+extern "C" {
+#include "vp9_rtcd.h"
+}
+
+#include "acm_random.h"
+#include "vpx/vpx_integer.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+
+TEST(VP9Fdct8x8Test, SignBiasCheck) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ int16_t test_input_block[64];
+ int16_t test_output_block[64];
+ const int pitch = 16;
+ int count_sign_block[64][2];
+ const int count_test_block = 100000;
+
+ memset(count_sign_block, 0, sizeof(count_sign_block));
+
+ for (int i = 0; i < count_test_block; ++i) {
+ // Initialize a test block with input range [-255, 255].
+ for (int j = 0; j < 64; ++j)
+ test_input_block[j] = rnd.Rand8() - rnd.Rand8();
+
+ vp9_short_fdct8x8_c(test_input_block, test_output_block, pitch);
+
+ for (int j = 0; j < 64; ++j) {
+ if (test_output_block[j] < 0)
+ ++count_sign_block[j][0];
+ else if (test_output_block[j] > 0)
+ ++count_sign_block[j][1];
+ }
+ }
+
+ for (int j = 0; j < 64; ++j) {
+ const bool bias_acceptable = (abs(count_sign_block[j][0] -
+ count_sign_block[j][1]) < 1000);
+ EXPECT_TRUE(bias_acceptable)
+ << "Error: 8x8 FDCT has a sign bias > 1%"
+ << " for input range [-255, 255] at index " << j;
+ }
+
+ memset(count_sign_block, 0, sizeof(count_sign_block));
+
+ for (int i = 0; i < count_test_block; ++i) {
+ // Initialize a test block with input range [-15, 15].
+ for (int j = 0; j < 64; ++j)
+ test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
+
+ vp9_short_fdct8x8_c(test_input_block, test_output_block, pitch);
+
+ for (int j = 0; j < 64; ++j) {
+ if (test_output_block[j] < 0)
+ ++count_sign_block[j][0];
+ else if (test_output_block[j] > 0)
+ ++count_sign_block[j][1];
+ }
+ }
+
+ for (int j = 0; j < 64; ++j) {
+ const bool bias_acceptable = (abs(count_sign_block[j][0] -
+ count_sign_block[j][1]) < 10000);
+ EXPECT_TRUE(bias_acceptable)
+ << "Error: 8x8 FDCT has a sign bias > 10%"
+ << " for input range [-15, 15] at index " << j;
+ }
+};
+
+TEST(VP9Fdct8x8Test, RoundTripErrorCheck) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ int max_error = 0;
+ double total_error = 0;
+ const int count_test_block = 100000;
+ for (int i = 0; i < count_test_block; ++i) {
+ int16_t test_input_block[64];
+ int16_t test_temp_block[64];
+ int16_t test_output_block[64];
+
+ // Initialize a test block with input range [-255, 255].
+ for (int j = 0; j < 64; ++j)
+ test_input_block[j] = rnd.Rand8() - rnd.Rand8();
+
+ const int pitch = 16;
+ vp9_short_fdct8x8_c(test_input_block, test_temp_block, pitch);
+ for (int j = 0; j < 64; ++j){
+ if(test_temp_block[j] > 0) {
+ test_temp_block[j] += 2;
+ test_temp_block[j] /= 4;
+ test_temp_block[j] *= 4;
+ } else {
+ test_temp_block[j] -= 2;
+ test_temp_block[j] /= 4;
+ test_temp_block[j] *= 4;
+ }
+ }
+ vp9_short_idct8x8_c(test_temp_block, test_output_block, pitch);
+
+ for (int j = 0; j < 64; ++j) {
+ const int diff = test_input_block[j] - test_output_block[j];
+ const int error = diff * diff;
+ if (max_error < error)
+ max_error = error;
+ total_error += error;
+ }
+ }
+
+ EXPECT_GE(1, max_error)
+ << "Error: 8x8 FDCT/IDCT has an individual roundtrip error > 1";
+
+ EXPECT_GE(count_test_block/5, total_error)
+ << "Error: 8x8 FDCT/IDCT has average roundtrip error > 1/5 per block";
+};
+
+TEST(VP9Fdct8x8Test, ExtremalCheck) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ int max_error = 0;
+ double total_error = 0;
+ const int count_test_block = 100000;
+ for (int i = 0; i < count_test_block; ++i) {
+ int16_t test_input_block[64];
+ int16_t test_temp_block[64];
+ int16_t test_output_block[64];
+
+ // Initialize a test block with input range {-255, 255}.
+ for (int j = 0; j < 64; ++j)
+ test_input_block[j] = rnd.Rand8() % 2 ? 255 : -255;
+
+ const int pitch = 16;
+ vp9_short_fdct8x8_c(test_input_block, test_temp_block, pitch);
+ vp9_short_idct8x8_c(test_temp_block, test_output_block, pitch);
+
+ for (int j = 0; j < 64; ++j) {
+ const int diff = test_input_block[j] - test_output_block[j];
+ const int error = diff * diff;
+ if (max_error < error)
+ max_error = error;
+ total_error += error;
+ }
+
+ EXPECT_GE(1, max_error)
+ << "Error: Extremal 8x8 FDCT/IDCT has an"
+ << " individual roundtrip error > 1";
+
+ EXPECT_GE(count_test_block/5, total_error)
+ << "Error: Extremal 8x8 FDCT/IDCT has average"
+ << " roundtrip error > 1/5 per block";
+ }
+};
+
+} // namespace
diff --git a/test/idct8x8_test.cc b/test/idct8x8_test.cc
new file mode 100644
index 0000000..ab71e85
--- /dev/null
+++ b/test/idct8x8_test.cc
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+extern "C" {
+#include "vp9_rtcd.h"
+}
+
+#include "acm_random.h"
+#include "vpx/vpx_integer.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+
+#ifdef _MSC_VER
+static int round(double x) {
+ if(x < 0)
+ return (int)ceil(x - 0.5);
+ else
+ return (int)floor(x + 0.5);
+}
+#endif
+
+void reference_dct_1d(double input[8], double output[8]) {
+ const double kPi = 3.141592653589793238462643383279502884;
+ const double kInvSqrt2 = 0.707106781186547524400844362104;
+ for (int k = 0; k < 8; k++) {
+ output[k] = 0.0;
+ for (int n = 0; n < 8; n++)
+ output[k] += input[n]*cos(kPi*(2*n+1)*k/16.0);
+ if (k == 0)
+ output[k] = output[k]*kInvSqrt2;
+ }
+}
+
+void reference_dct_2d(int16_t input[64], double output[64]) {
+ // First transform columns
+ for (int i = 0; i < 8; ++i) {
+ double temp_in[8], temp_out[8];
+ for (int j = 0; j < 8; ++j)
+ temp_in[j] = input[j*8 + i];
+ reference_dct_1d(temp_in, temp_out);
+ for (int j = 0; j < 8; ++j)
+ output[j*8 + i] = temp_out[j];
+ }
+ // Then transform rows
+ for (int i = 0; i < 8; ++i) {
+ double temp_in[8], temp_out[8];
+ for (int j = 0; j < 8; ++j)
+ temp_in[j] = output[j + i*8];
+ reference_dct_1d(temp_in, temp_out);
+ for (int j = 0; j < 8; ++j)
+ output[j + i*8] = temp_out[j];
+ }
+ // Scale by some magic number
+ for (int i = 0; i < 64; ++i)
+ output[i] *= 2;
+}
+
+void reference_idct_1d(double input[8], double output[8]) {
+ const double kPi = 3.141592653589793238462643383279502884;
+ const double kSqrt2 = 1.414213562373095048801688724209698;
+ for (int k = 0; k < 8; k++) {
+ output[k] = 0.0;
+ for (int n = 0; n < 8; n++) {
+ output[k] += input[n]*cos(kPi*(2*k+1)*n/16.0);
+ if (n == 0)
+ output[k] = output[k]/kSqrt2;
+ }
+ }
+}
+
+void reference_idct_2d(double input[64], int16_t output[64]) {
+ double out[64], out2[64];
+ // First transform rows
+ for (int i = 0; i < 8; ++i) {
+ double temp_in[8], temp_out[8];
+ for (int j = 0; j < 8; ++j)
+ temp_in[j] = input[j + i*8];
+ reference_idct_1d(temp_in, temp_out);
+ for (int j = 0; j < 8; ++j)
+ out[j + i*8] = temp_out[j];
+ }
+ // Then transform columns
+ for (int i = 0; i < 8; ++i) {
+ double temp_in[8], temp_out[8];
+ for (int j = 0; j < 8; ++j)
+ temp_in[j] = out[j*8 + i];
+ reference_idct_1d(temp_in, temp_out);
+ for (int j = 0; j < 8; ++j)
+ out2[j*8 + i] = temp_out[j];
+ }
+ for (int i = 0; i < 64; ++i)
+ output[i] = round(out2[i]/32);
+}
+
+TEST(VP9Idct8x8Test, AccuracyCheck) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ const int count_test_block = 10000;
+ for (int i = 0; i < count_test_block; ++i) {
+ int16_t input[64], coeff[64];
+ int16_t output_c[64];
+ double output_r[64];
+
+ // Initialize a test block with input range [-255, 255].
+ for (int j = 0; j < 64; ++j)
+ input[j] = rnd.Rand8() - rnd.Rand8();
+
+ const int pitch = 16;
+ vp9_short_fdct8x8_c(input, output_c, pitch);
+ reference_dct_2d(input, output_r);
+
+ for (int j = 0; j < 64; ++j) {
+ const double diff = output_c[j] - output_r[j];
+ const double error = diff * diff;
+ // An error in a DCT coefficient isn't that bad.
+ // We care more about the reconstructed pixels.
+ EXPECT_GE(2.0, error)
+ << "Error: 8x8 FDCT/IDCT has error " << error
+ << " at index " << j;
+ }
+
+#if 0
+ // Tests that the reference iDCT and fDCT match.
+ reference_dct_2d(input, output_r);
+ reference_idct_2d(output_r, output_c);
+ for (int j = 0; j < 64; ++j) {
+ const int diff = output_c[j] -input[j];
+ const int error = diff * diff;
+ EXPECT_EQ(0, error)
+ << "Error: 8x8 FDCT/IDCT has error " << error
+ << " at index " << j;
+ }
+#endif
+ reference_dct_2d(input, output_r);
+ for (int j = 0; j < 64; ++j)
+ coeff[j] = round(output_r[j]);
+ vp9_short_idct8x8_c(coeff, output_c, pitch);
+ for (int j = 0; j < 64; ++j) {
+ const int diff = output_c[j] -input[j];
+ const int error = diff * diff;
+ EXPECT_GE(1, error)
+ << "Error: 8x8 FDCT/IDCT has error " << error
+ << " at index " << j;
+ }
+ }
+}
+
+} // namespace
diff --git a/test/idctllm_test.cc b/test/idctllm_test.cc
index 1be5fa0..d6fdffe 100644
--- a/test/idctllm_test.cc
+++ b/test/idctllm_test.cc
@@ -11,7 +11,7 @@
extern "C" {
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
}
#include "test/register_state_check.h"
#include "third_party/googletest/src/include/gtest/gtest.h"
diff --git a/test/intrapred_test.cc b/test/intrapred_test.cc
index 4c16c3f..1493990 100644
--- a/test/intrapred_test.cc
+++ b/test/intrapred_test.cc
@@ -15,7 +15,7 @@
#include "third_party/googletest/src/include/gtest/gtest.h"
extern "C" {
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vp8/common/blockd.h"
#include "vpx_mem/vpx_mem.h"
}
diff --git a/test/pp_filter_test.cc b/test/pp_filter_test.cc
index 9227449..412a574 100644
--- a/test/pp_filter_test.cc
+++ b/test/pp_filter_test.cc
@@ -11,7 +11,7 @@
#include "third_party/googletest/src/include/gtest/gtest.h"
extern "C" {
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vpx/vpx_integer.h"
#include "vpx_mem/vpx_mem.h"
}
diff --git a/test/sad_test.cc b/test/sad_test.cc
index 5a0653b..72741a9 100644
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -15,7 +15,7 @@
extern "C" {
#include "./vpx_config.h"
-#include "./vpx_rtcd.h"
+#include "./vp8_rtcd.h"
#include "vp8/common/blockd.h"
#include "vpx_mem/vpx_mem.h"
}
diff --git a/test/sixtap_predict_test.cc b/test/sixtap_predict_test.cc
index c9dcceb..2d4581d 100644
--- a/test/sixtap_predict_test.cc
+++ b/test/sixtap_predict_test.cc
@@ -17,7 +17,7 @@
#include "third_party/googletest/src/include/gtest/gtest.h"
extern "C" {
#include "./vpx_config.h"
-#include "./vpx_rtcd.h"
+#include "./vp8_rtcd.h"
#include "vpx/vpx_integer.h"
#include "vpx_mem/vpx_mem.h"
}
diff --git a/test/subtract_test.cc b/test/subtract_test.cc
index 60acf81..e7d1073 100644
--- a/test/subtract_test.cc
+++ b/test/subtract_test.cc
@@ -13,7 +13,7 @@
#include "test/register_state_check.h"
extern "C" {
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vp8/common/blockd.h"
#include "vp8/encoder/block.h"
#include "vpx_mem/vpx_mem.h"
diff --git a/test/test.mk b/test/test.mk
index 982be5b..cb15fce 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -1,6 +1,7 @@
-LIBVPX_TEST_SRCS-yes += acm_random.h
LIBVPX_TEST_SRCS-yes += register_state_check.h
LIBVPX_TEST_SRCS-yes += test.mk
+LIBVPX_TEST_SRCS-yes += acm_random.h
+
LIBVPX_TEST_SRCS-yes += test_libvpx.cc
LIBVPX_TEST_SRCS-yes += util.h
LIBVPX_TEST_SRCS-yes += video_source.h
@@ -34,12 +35,14 @@
##
ifeq ($(CONFIG_SHARED),)
+## VP8
+ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)
+
# These tests require both the encoder and decoder to be built.
ifeq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),yesyes)
-LIBVPX_TEST_SRCS-yes += boolcoder_test.cc
+LIBVPX_TEST_SRCS-yes += vp8_boolcoder_test.cc
endif
-LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += fdct4x4_test.cc
LIBVPX_TEST_SRCS-yes += idctllm_test.cc
LIBVPX_TEST_SRCS-yes += intrapred_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC) += pp_filter_test.cc
@@ -47,6 +50,27 @@
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += set_roi.cc
LIBVPX_TEST_SRCS-yes += sixtap_predict_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += subtract_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc
+
+endif # VP8
+
+## VP9
+ifneq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),)
+
+# These tests require both the encoder and decoder to be built.
+ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),yesyes)
+LIBVPX_TEST_SRCS-yes += vp9_boolcoder_test.cc
+
+# IDCT test currently depends on FDCT function
+LIBVPX_TEST_SRCS-yes += idct8x8_test.cc
+endif
+
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
+#LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
+endif # VP9
+
endif
diff --git a/test/test_libvpx.cc b/test/test_libvpx.cc
index cfd5d28..5610c26 100644
--- a/test/test_libvpx.cc
+++ b/test/test_libvpx.cc
@@ -9,11 +9,17 @@
*/
#include <string>
#include "vpx_config.h"
-#if ARCH_X86 || ARCH_X86_64
extern "C" {
+#if ARCH_X86 || ARCH_X86_64
#include "vpx_ports/x86.h"
-}
#endif
+#if CONFIG_VP8
+extern void vp8_rtcd();
+#endif
+#if CONFIG_VP9
+extern void vp9_rtcd();
+#endif
+}
#include "third_party/googletest/src/include/gtest/gtest.h"
static void append_gtest_filter(const char *str) {
@@ -27,19 +33,29 @@
#if ARCH_X86 || ARCH_X86_64
const int simd_caps = x86_simd_caps();
- if(!(simd_caps & HAS_MMX))
+ if (!(simd_caps & HAS_MMX))
append_gtest_filter(":-MMX/*");
- if(!(simd_caps & HAS_SSE))
+ if (!(simd_caps & HAS_SSE))
append_gtest_filter(":-SSE/*");
- if(!(simd_caps & HAS_SSE2))
+ if (!(simd_caps & HAS_SSE2))
append_gtest_filter(":-SSE2/*");
- if(!(simd_caps & HAS_SSE3))
+ if (!(simd_caps & HAS_SSE3))
append_gtest_filter(":-SSE3/*");
- if(!(simd_caps & HAS_SSSE3))
+ if (!(simd_caps & HAS_SSSE3))
append_gtest_filter(":-SSSE3/*");
- if(!(simd_caps & HAS_SSE4_1))
+ if (!(simd_caps & HAS_SSE4_1))
append_gtest_filter(":-SSE4_1/*");
#endif
+#if !CONFIG_SHARED
+ /* Shared library builds don't support whitebox tests that exercise internal symbols. */
+#if CONFIG_VP8
+ vp8_rtcd();
+#endif
+#if CONFIG_VP9
+ vp9_rtcd();
+#endif
+#endif
+
return RUN_ALL_TESTS();
}
diff --git a/test/variance_test.cc b/test/variance_test.cc
new file mode 100644
index 0000000..83f1139
--- /dev/null
+++ b/test/variance_test.cc
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include <stdlib.h>
+#include <new>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "vpx_config.h"
+extern "C" {
+#include "vp9/encoder/vp9_variance.h"
+#include "vpx/vpx_integer.h"
+#include "vp9_rtcd.h"
+}
+
+namespace {
+
+using ::std::tr1::get;
+using ::std::tr1::make_tuple;
+using ::std::tr1::tuple;
+
+class VP9VarianceTest :
+ public ::testing::TestWithParam<tuple<int, int, vp9_variance_fn_t> > {
+ public:
+ virtual void SetUp() {
+ const tuple<int, int, vp9_variance_fn_t>& params = GetParam();
+ width_ = get<0>(params);
+ height_ = get<1>(params);
+ variance_ = get<2>(params);
+
+ block_size_ = width_ * height_;
+ src_ = new uint8_t[block_size_];
+ ref_ = new uint8_t[block_size_];
+ ASSERT_TRUE(src_ != NULL);
+ ASSERT_TRUE(ref_ != NULL);
+ }
+
+ virtual void TearDown() {
+ delete[] src_;
+ delete[] ref_;
+ }
+
+ protected:
+ uint8_t* src_;
+ uint8_t* ref_;
+ int width_;
+ int height_;
+ int block_size_;
+ vp9_variance_fn_t variance_;
+};
+
+TEST_P(VP9VarianceTest, Zero) {
+ for (int i = 0; i <= 255; ++i) {
+ memset(src_, i, block_size_);
+ for (int j = 0; j <= 255; ++j) {
+ memset(ref_, j, block_size_);
+ unsigned int sse;
+ const unsigned int var = variance_(src_, width_, ref_, width_, &sse);
+ EXPECT_EQ(0u, var) << "src values: " << i << "ref values: " << j;
+ }
+ }
+}
+
+TEST_P(VP9VarianceTest, OneQuarter) {
+ memset(src_, 255, block_size_);
+ const int half = block_size_ / 2;
+ memset(ref_, 255, half);
+ memset(ref_ + half, 0, half);
+ unsigned int sse;
+ const unsigned int var = variance_(src_, width_, ref_, width_, &sse);
+ const unsigned int expected = block_size_ * 255 * 255 / 4;
+ EXPECT_EQ(expected, var);
+}
+
+const vp9_variance_fn_t variance4x4_c = vp9_variance4x4_c;
+const vp9_variance_fn_t variance8x8_c = vp9_variance8x8_c;
+const vp9_variance_fn_t variance8x16_c = vp9_variance8x16_c;
+const vp9_variance_fn_t variance16x8_c = vp9_variance16x8_c;
+const vp9_variance_fn_t variance16x16_c = vp9_variance16x16_c;
+INSTANTIATE_TEST_CASE_P(
+ C, VP9VarianceTest,
+ ::testing::Values(make_tuple(4, 4, variance4x4_c),
+ make_tuple(8, 8, variance8x8_c),
+ make_tuple(8, 16, variance8x16_c),
+ make_tuple(16, 8, variance16x8_c),
+ make_tuple(16, 16, variance16x16_c)));
+
+#if HAVE_MMX
+const vp9_variance_fn_t variance4x4_mmx = vp9_variance4x4_mmx;
+const vp9_variance_fn_t variance8x8_mmx = vp9_variance8x8_mmx;
+const vp9_variance_fn_t variance8x16_mmx = vp9_variance8x16_mmx;
+const vp9_variance_fn_t variance16x8_mmx = vp9_variance16x8_mmx;
+const vp9_variance_fn_t variance16x16_mmx = vp9_variance16x16_mmx;
+INSTANTIATE_TEST_CASE_P(
+ MMX, VP9VarianceTest,
+ ::testing::Values(make_tuple(4, 4, variance4x4_mmx),
+ make_tuple(8, 8, variance8x8_mmx),
+ make_tuple(8, 16, variance8x16_mmx),
+ make_tuple(16, 8, variance16x8_mmx),
+ make_tuple(16, 16, variance16x16_mmx)));
+#endif
+
+#if HAVE_SSE2
+const vp9_variance_fn_t variance4x4_wmt = vp9_variance4x4_wmt;
+const vp9_variance_fn_t variance8x8_wmt = vp9_variance8x8_wmt;
+const vp9_variance_fn_t variance8x16_wmt = vp9_variance8x16_wmt;
+const vp9_variance_fn_t variance16x8_wmt = vp9_variance16x8_wmt;
+const vp9_variance_fn_t variance16x16_wmt = vp9_variance16x16_wmt;
+INSTANTIATE_TEST_CASE_P(
+ SSE2, VP9VarianceTest,
+ ::testing::Values(make_tuple(4, 4, variance4x4_wmt),
+ make_tuple(8, 8, variance8x8_wmt),
+ make_tuple(8, 16, variance8x16_wmt),
+ make_tuple(16, 8, variance16x8_wmt),
+ make_tuple(16, 16, variance16x16_wmt)));
+#endif
+} // namespace
diff --git a/test/boolcoder_test.cc b/test/vp8_boolcoder_test.cc
similarity index 100%
rename from test/boolcoder_test.cc
rename to test/vp8_boolcoder_test.cc
diff --git a/test/vp8_fdct4x4_test.cc b/test/vp8_fdct4x4_test.cc
new file mode 100644
index 0000000..3c60011
--- /dev/null
+++ b/test/vp8_fdct4x4_test.cc
@@ -0,0 +1,169 @@
+/*
+* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+*
+* Use of this source code is governed by a BSD-style license
+* that can be found in the LICENSE file in the root of the source
+* tree. An additional intellectual property rights grant can be found
+* in the file PATENTS. All contributing project authors may
+* be found in the AUTHORS file in the root of the source tree.
+*/
+
+
+#include <math.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+
+extern "C" {
+#include "vp8_rtcd.h"
+}
+
+#include "test/acm_random.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "vpx/vpx_integer.h"
+
+
+namespace {
+
+const int cospi8sqrt2minus1 = 20091;
+const int sinpi8sqrt2 = 35468;
+
+void reference_idct4x4(const int16_t *input, int16_t *output) {
+ const int16_t *ip = input;
+ int16_t *op = output;
+
+ for (int i = 0; i < 4; ++i) {
+ const int a1 = ip[0] + ip[8];
+ const int b1 = ip[0] - ip[8];
+ const int temp1 = (ip[4] * sinpi8sqrt2) >> 16;
+ const int temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
+ const int c1 = temp1 - temp2;
+ const int temp3 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
+ const int temp4 = (ip[12] * sinpi8sqrt2) >> 16;
+ const int d1 = temp3 + temp4;
+ op[0] = a1 + d1;
+ op[12] = a1 - d1;
+ op[4] = b1 + c1;
+ op[8] = b1 - c1;
+ ++ip;
+ ++op;
+ }
+ ip = output;
+ op = output;
+ for (int i = 0; i < 4; ++i) {
+ const int a1 = ip[0] + ip[2];
+ const int b1 = ip[0] - ip[2];
+ const int temp1 = (ip[1] * sinpi8sqrt2) >> 16;
+ const int temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
+ const int c1 = temp1 - temp2;
+ const int temp3 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
+ const int temp4 = (ip[3] * sinpi8sqrt2) >> 16;
+ const int d1 = temp3 + temp4;
+ op[0] = (a1 + d1 + 4) >> 3;
+ op[3] = (a1 - d1 + 4) >> 3;
+ op[1] = (b1 + c1 + 4) >> 3;
+ op[2] = (b1 - c1 + 4) >> 3;
+ ip += 4;
+ op += 4;
+ }
+}
+
+using libvpx_test::ACMRandom;
+
+TEST(Vp8FdctTest, SignBiasCheck) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ int16_t test_input_block[16];
+ int16_t test_output_block[16];
+ const int pitch = 8;
+ int count_sign_block[16][2];
+ const int count_test_block = 1000000;
+
+ memset(count_sign_block, 0, sizeof(count_sign_block));
+
+ for (int i = 0; i < count_test_block; ++i) {
+ // Initialize a test block with input range [-255, 255].
+ for (int j = 0; j < 16; ++j)
+ test_input_block[j] = rnd.Rand8() - rnd.Rand8();
+
+ vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch);
+
+ for (int j = 0; j < 16; ++j) {
+ if (test_output_block[j] < 0)
+ ++count_sign_block[j][0];
+ else if (test_output_block[j] > 0)
+ ++count_sign_block[j][1];
+ }
+ }
+
+ bool bias_acceptable = true;
+ for (int j = 0; j < 16; ++j)
+ bias_acceptable = bias_acceptable &&
+ (abs(count_sign_block[j][0] - count_sign_block[j][1]) < 10000);
+
+ EXPECT_EQ(true, bias_acceptable)
+ << "Error: 4x4 FDCT has a sign bias > 1% for input range [-255, 255]";
+
+ memset(count_sign_block, 0, sizeof(count_sign_block));
+
+ for (int i = 0; i < count_test_block; ++i) {
+ // Initialize a test block with input range [-15, 15].
+ for (int j = 0; j < 16; ++j)
+ test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
+
+ vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch);
+
+ for (int j = 0; j < 16; ++j) {
+ if (test_output_block[j] < 0)
+ ++count_sign_block[j][0];
+ else if (test_output_block[j] > 0)
+ ++count_sign_block[j][1];
+ }
+ }
+
+ bias_acceptable = true;
+ for (int j = 0; j < 16; ++j)
+ bias_acceptable = bias_acceptable &&
+ (abs(count_sign_block[j][0] - count_sign_block[j][1]) < 100000);
+
+ EXPECT_EQ(true, bias_acceptable)
+ << "Error: 4x4 FDCT has a sign bias > 10% for input range [-15, 15]";
+};
+
+TEST(Vp8FdctTest, RoundTripErrorCheck) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ int max_error = 0;
+ double total_error = 0;
+ const int count_test_block = 1000000;
+ for (int i = 0; i < count_test_block; ++i) {
+ int16_t test_input_block[16];
+ int16_t test_temp_block[16];
+ int16_t test_output_block[16];
+
+ // Initialize a test block with input range [-255, 255].
+ for (int j = 0; j < 16; ++j)
+ test_input_block[j] = rnd.Rand8() - rnd.Rand8();
+
+ const int pitch = 8;
+ vp8_short_fdct4x4_c(test_input_block, test_temp_block, pitch);
+ reference_idct4x4(test_temp_block, test_output_block);
+
+ for (int j = 0; j < 16; ++j) {
+ const int diff = test_input_block[j] - test_output_block[j];
+ const int error = diff * diff;
+ if (max_error < error)
+ max_error = error;
+ total_error += error;
+ }
+ }
+
+ EXPECT_GE(1, max_error )
+ << "Error: FDCT/IDCT has an individual roundtrip error > 1";
+
+ EXPECT_GE(count_test_block, total_error)
+ << "Error: FDCT/IDCT has average roundtrip error > 1 per block";
+};
+
+} // namespace
diff --git a/test/vp9_boolcoder_test.cc b/test/vp9_boolcoder_test.cc
new file mode 100644
index 0000000..bde28a5
--- /dev/null
+++ b/test/vp9_boolcoder_test.cc
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+extern "C" {
+#include "vp9/encoder/vp9_boolhuff.h"
+#include "vp9/decoder/vp9_dboolhuff.h"
+}
+
+#include "acm_random.h"
+#include "vpx/vpx_integer.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+const int num_tests = 10;
+} // namespace
+
+TEST(VP9, TestBitIO) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ for (int n = 0; n < num_tests; ++n) {
+ for (int method = 0; method <= 7; ++method) { // we generate various proba
+ const int bits_to_test = 1000;
+ uint8_t probas[bits_to_test];
+
+ for (int i = 0; i < bits_to_test; ++i) {
+ const int parity = i & 1;
+ probas[i] =
+ (method == 0) ? 0 : (method == 1) ? 255 :
+ (method == 2) ? 128 :
+ (method == 3) ? rnd.Rand8() :
+ (method == 4) ? (parity ? 0 : 255) :
+ // alternate between low and high proba:
+ (method == 5) ? (parity ? rnd(128) : 255 - rnd(128)) :
+ (method == 6) ?
+ (parity ? rnd(64) : 255 - rnd(64)) :
+ (parity ? rnd(32) : 255 - rnd(32));
+ }
+ for (int bit_method = 0; bit_method <= 3; ++bit_method) {
+ const int random_seed = 6432;
+ const int buffer_size = 10000;
+ ACMRandom bit_rnd(random_seed);
+ BOOL_CODER bw;
+ uint8_t bw_buffer[buffer_size];
+ vp9_start_encode(&bw, bw_buffer);
+
+ int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0;
+ for (int i = 0; i < bits_to_test; ++i) {
+ if (bit_method == 2) {
+ bit = (i & 1);
+ } else if (bit_method == 3) {
+ bit = bit_rnd(2);
+ }
+ encode_bool(&bw, bit, static_cast<int>(probas[i]));
+ }
+
+ vp9_stop_encode(&bw);
+
+ BOOL_DECODER br;
+ vp9_start_decode(&br, bw_buffer, buffer_size);
+ bit_rnd.Reset(random_seed);
+ for (int i = 0; i < bits_to_test; ++i) {
+ if (bit_method == 2) {
+ bit = (i & 1);
+ } else if (bit_method == 3) {
+ bit = bit_rnd(2);
+ }
+ GTEST_ASSERT_EQ(decode_bool(&br, probas[i]), bit)
+ << "pos: " << i << " / " << bits_to_test
+ << " bit_method: " << bit_method
+ << " method: " << method;
+ }
+ }
+ }
+ }
+}
diff --git a/third_party/x86inc/LICENSE b/third_party/x86inc/LICENSE
new file mode 100644
index 0000000..7d07645
--- /dev/null
+++ b/third_party/x86inc/LICENSE
@@ -0,0 +1,18 @@
+Copyright (C) 2005-2012 x264 project
+
+Authors: Loren Merritt <lorenm@u.washington.edu>
+ Anton Mitrofanov <BugMaster@narod.ru>
+ Jason Garrett-Glaser <darkshikari@gmail.com>
+ Henrik Gramner <hengar-6@student.ltu.se>
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
diff --git a/third_party/x86inc/README.webm b/third_party/x86inc/README.webm
new file mode 100644
index 0000000..02cd9ab
--- /dev/null
+++ b/third_party/x86inc/README.webm
@@ -0,0 +1,11 @@
+URL: http://git.videolan.org/?p=x264.git
+Version: 999b753ff0f4dc872077f4fa90d465e948cbe656
+License: ISC
+License File: LICENSE
+
+Description:
+x264/libav's framework for x86 assembly. Contains a variety of macros and
+defines that help automatically allow assembly to work cross-platform.
+
+Local Modifications:
+Some modifications to allow PIC to work with x86inc.
diff --git a/third_party/x86inc/x86inc.asm b/third_party/x86inc/x86inc.asm
new file mode 100644
index 0000000..d9927ec
--- /dev/null
+++ b/third_party/x86inc/x86inc.asm
@@ -0,0 +1,1118 @@
+;*****************************************************************************
+;* x86inc.asm: x264asm abstraction layer
+;*****************************************************************************
+;* Copyright (C) 2005-2012 x264 project
+;*
+;* Authors: Loren Merritt <lorenm@u.washington.edu>
+;* Anton Mitrofanov <BugMaster@narod.ru>
+;* Jason Garrett-Glaser <darkshikari@gmail.com>
+;* Henrik Gramner <hengar-6@student.ltu.se>
+;*
+;* Permission to use, copy, modify, and/or distribute this software for any
+;* purpose with or without fee is hereby granted, provided that the above
+;* copyright notice and this permission notice appear in all copies.
+;*
+;* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+;* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+;* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+;* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+;* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+;* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+;* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+;*****************************************************************************
+
+; This is a header file for the x264ASM assembly language, which uses
+; NASM/YASM syntax combined with a large number of macros to provide easy
+; abstraction between different calling conventions (x86_32, win64, linux64).
+; It also has various other useful features to simplify writing the kind of
+; DSP functions that are most often used in x264.
+
+; Unlike the rest of x264, this file is available under an ISC license, as it
+; has significant usefulness outside of x264 and we want it to be available
+; to the largest audience possible. Of course, if you modify it for your own
+; purposes to add a new feature, we strongly encourage contributing a patch
+; as this feature might be useful for others as well. Send patches or ideas
+; to x264-devel@videolan.org .
+
+%include "vpx_config.asm"
+
+%define program_name vp9
+
+
+%define UNIX64 0
+%define WIN64 0
+%if ARCH_X86_64
+ %ifidn __OUTPUT_FORMAT__,win32
+ %define WIN64 1
+ %elifidn __OUTPUT_FORMAT__,win64
+ %define WIN64 1
+ %elifidn __OUTPUT_FORMAT__,x64
+ %define WIN64 1
+ %else
+ %define UNIX64 1
+ %endif
+%endif
+
+%ifidn __OUTPUT_FORMAT__,elf32
+ %define mangle(x) x
+%elifidn __OUTPUT_FORMAT__,elf64
+ %define mangle(x) x
+%elifidn __OUTPUT_FORMAT__,elf
+ %define mangle(x) x
+%elifidn __OUTPUT_FORMAT__,x64
+ %define mangle(x) x
+%else
+ %define mangle(x) _ %+ x
+%endif
+
+; FIXME: All of the 64bit asm functions that take a stride as an argument
+; via register, assume that the high dword of that register is filled with 0.
+; This is true in practice (since we never do any 64bit arithmetic on strides,
+; and x264's strides are all positive), but is not guaranteed by the ABI.
+
+; Name of the .rodata section.
+; Kludge: Something on OS X fails to align .rodata even given an align attribute,
+; so use a different read-only section.
+%macro SECTION_RODATA 0-1 16
+ %ifidn __OUTPUT_FORMAT__,macho64
+ SECTION .text align=%1
+ %elifidn __OUTPUT_FORMAT__,macho
+ SECTION .text align=%1
+ fakegot:
+ %elifidn __OUTPUT_FORMAT__,aout
+ section .text
+ %else
+ SECTION .rodata align=%1
+ %endif
+%endmacro
+
+; aout does not support align=
+%macro SECTION_TEXT 0-1 16
+ %ifidn __OUTPUT_FORMAT__,aout
+ SECTION .text
+ %else
+ SECTION .text align=%1
+ %endif
+%endmacro
+
+%if WIN64
+ %define PIC
+%elifidn __OUTPUT_FORMAT__,macho64
+ %define PIC
+%elif ARCH_X86_64 == 0
+; x86_32 doesn't require PIC.
+; Some distros prefer shared objects to be PIC, but nothing breaks if
+; the code contains a few textrels, so we'll skip that complexity.
+ %undef PIC
+%elif CONFIG_PIC
+ %define PIC
+%endif
+%ifdef PIC
+ default rel
+%endif
+
+; Always use long nops (reduces 0x90 spam in disassembly on x86_32)
+CPU amdnop
+
+; Macros to eliminate most code duplication between x86_32 and x86_64:
+; Currently this works only for leaf functions which load all their arguments
+; into registers at the start, and make no other use of the stack. Luckily that
+; covers most of x264's asm.
+
+; PROLOGUE:
+; %1 = number of arguments. loads them from stack if needed.
+; %2 = number of registers used. pushes callee-saved regs if needed.
+; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed.
+; %4 = list of names to define to registers
+; PROLOGUE can also be invoked by adding the same options to cglobal
+
+; e.g.
+; cglobal foo, 2,3,0, dst, src, tmp
+; declares a function (foo), taking two args (dst and src) and one local variable (tmp)
+
+; TODO Some functions can use some args directly from the stack. If they're the
+; last args then you can just not declare them, but if they're in the middle
+; we need more flexible macro.
+
+; RET:
+; Pops anything that was pushed by PROLOGUE, and returns.
+
+; REP_RET:
+; Same, but if it doesn't pop anything it becomes a 2-byte ret, for athlons
+; which are slow when a normal ret follows a branch.
+
+; registers:
+; rN and rNq are the native-size register holding function argument N
+; rNd, rNw, rNb are dword, word, and byte size
+; rNm is the original location of arg N (a register or on the stack), dword
+; rNmp is native size
+
+%macro DECLARE_REG 5-6
+ %define r%1q %2
+ %define r%1d %3
+ %define r%1w %4
+ %define r%1b %5
+ %if %0 == 5
+ %define r%1m %3
+ %define r%1mp %2
+ %elif ARCH_X86_64 ; memory
+ %define r%1m [rsp + stack_offset + %6]
+ %define r%1mp qword r %+ %1m
+ %else
+ %define r%1m [esp + stack_offset + %6]
+ %define r%1mp dword r %+ %1m
+ %endif
+ %define r%1 %2
+%endmacro
+
+%macro DECLARE_REG_SIZE 2
+ %define r%1q r%1
+ %define e%1q r%1
+ %define r%1d e%1
+ %define e%1d e%1
+ %define r%1w %1
+ %define e%1w %1
+ %define r%1b %2
+ %define e%1b %2
+%if ARCH_X86_64 == 0
+ %define r%1 e%1
+%endif
+%endmacro
+
+DECLARE_REG_SIZE ax, al
+DECLARE_REG_SIZE bx, bl
+DECLARE_REG_SIZE cx, cl
+DECLARE_REG_SIZE dx, dl
+DECLARE_REG_SIZE si, sil
+DECLARE_REG_SIZE di, dil
+DECLARE_REG_SIZE bp, bpl
+
+; t# defines for when per-arch register allocation is more complex than just function arguments
+
+%macro DECLARE_REG_TMP 1-*
+ %assign %%i 0
+ %rep %0
+ CAT_XDEFINE t, %%i, r%1
+ %assign %%i %%i+1
+ %rotate 1
+ %endrep
+%endmacro
+
+%macro DECLARE_REG_TMP_SIZE 0-*
+ %rep %0
+ %define t%1q t%1 %+ q
+ %define t%1d t%1 %+ d
+ %define t%1w t%1 %+ w
+ %define t%1b t%1 %+ b
+ %rotate 1
+ %endrep
+%endmacro
+
+DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
+
+%if ARCH_X86_64
+ %define gprsize 8
+%else
+ %define gprsize 4
+%endif
+
+%macro PUSH 1
+ push %1
+ %assign stack_offset stack_offset+gprsize
+%endmacro
+
+%macro POP 1
+ pop %1
+ %assign stack_offset stack_offset-gprsize
+%endmacro
+
+%macro PUSH_IF_USED 1-*
+ %rep %0
+ %if %1 < regs_used
+ PUSH r%1
+ %endif
+ %rotate 1
+ %endrep
+%endmacro
+
+%macro POP_IF_USED 1-*
+ %rep %0
+ %if %1 < regs_used
+ pop r%1
+ %endif
+ %rotate 1
+ %endrep
+%endmacro
+
+%macro LOAD_IF_USED 1-*
+ %rep %0
+ %if %1 < num_args
+ mov r%1, r %+ %1 %+ mp
+ %endif
+ %rotate 1
+ %endrep
+%endmacro
+
+%macro SUB 2
+ sub %1, %2
+ %ifidn %1, rsp
+ %assign stack_offset stack_offset+(%2)
+ %endif
+%endmacro
+
+%macro ADD 2
+ add %1, %2
+ %ifidn %1, rsp
+ %assign stack_offset stack_offset-(%2)
+ %endif
+%endmacro
+
+%macro movifnidn 2
+ %ifnidn %1, %2
+ mov %1, %2
+ %endif
+%endmacro
+
+%macro movsxdifnidn 2
+ %ifnidn %1, %2
+ movsxd %1, %2
+ %endif
+%endmacro
+
+%macro ASSERT 1
+ %if (%1) == 0
+ %error assert failed
+ %endif
+%endmacro
+
+%macro DEFINE_ARGS 0-*
+ %ifdef n_arg_names
+ %assign %%i 0
+ %rep n_arg_names
+ CAT_UNDEF arg_name %+ %%i, q
+ CAT_UNDEF arg_name %+ %%i, d
+ CAT_UNDEF arg_name %+ %%i, w
+ CAT_UNDEF arg_name %+ %%i, b
+ CAT_UNDEF arg_name %+ %%i, m
+ CAT_UNDEF arg_name %+ %%i, mp
+ CAT_UNDEF arg_name, %%i
+ %assign %%i %%i+1
+ %endrep
+ %endif
+
+ %xdefine %%stack_offset stack_offset
+ %undef stack_offset ; so that the current value of stack_offset doesn't get baked in by xdefine
+ %assign %%i 0
+ %rep %0
+ %xdefine %1q r %+ %%i %+ q
+ %xdefine %1d r %+ %%i %+ d
+ %xdefine %1w r %+ %%i %+ w
+ %xdefine %1b r %+ %%i %+ b
+ %xdefine %1m r %+ %%i %+ m
+ %xdefine %1mp r %+ %%i %+ mp
+ CAT_XDEFINE arg_name, %%i, %1
+ %assign %%i %%i+1
+ %rotate 1
+ %endrep
+ %xdefine stack_offset %%stack_offset
+ %assign n_arg_names %0
+%endmacro
+
+%if WIN64 ; Windows x64 ;=================================================
+
+DECLARE_REG 0, rcx, ecx, cx, cl
+DECLARE_REG 1, rdx, edx, dx, dl
+DECLARE_REG 2, R8, R8D, R8W, R8B
+DECLARE_REG 3, R9, R9D, R9W, R9B
+DECLARE_REG 4, R10, R10D, R10W, R10B, 40
+DECLARE_REG 5, R11, R11D, R11W, R11B, 48
+DECLARE_REG 6, rax, eax, ax, al, 56
+DECLARE_REG 7, rdi, edi, di, dil, 64
+DECLARE_REG 8, rsi, esi, si, sil, 72
+DECLARE_REG 9, rbx, ebx, bx, bl, 80
+DECLARE_REG 10, rbp, ebp, bp, bpl, 88
+DECLARE_REG 11, R12, R12D, R12W, R12B, 96
+DECLARE_REG 12, R13, R13D, R13W, R13B, 104
+DECLARE_REG 13, R14, R14D, R14W, R14B, 112
+DECLARE_REG 14, R15, R15D, R15W, R15B, 120
+
+%macro PROLOGUE 2-4+ 0 ; #args, #regs, #xmm_regs, arg_names...
+ %assign num_args %1
+ %assign regs_used %2
+ ASSERT regs_used >= num_args
+ ASSERT regs_used <= 15
+ PUSH_IF_USED 7, 8, 9, 10, 11, 12, 13, 14
+ %if mmsize == 8
+ %assign xmm_regs_used 0
+ %else
+ WIN64_SPILL_XMM %3
+ %endif
+ LOAD_IF_USED 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
+ DEFINE_ARGS %4
+%endmacro
+
+%macro WIN64_SPILL_XMM 1
+ %assign xmm_regs_used %1
+ ASSERT xmm_regs_used <= 16
+ %if xmm_regs_used > 6
+ SUB rsp, (xmm_regs_used-6)*16+16
+ %assign %%i xmm_regs_used
+ %rep (xmm_regs_used-6)
+ %assign %%i %%i-1
+ movdqa [rsp + (%%i-6)*16+(~stack_offset&8)], xmm %+ %%i
+ %endrep
+ %endif
+%endmacro
+
+%macro WIN64_RESTORE_XMM_INTERNAL 1
+ %if xmm_regs_used > 6
+ %assign %%i xmm_regs_used
+ %rep (xmm_regs_used-6)
+ %assign %%i %%i-1
+ movdqa xmm %+ %%i, [%1 + (%%i-6)*16+(~stack_offset&8)]
+ %endrep
+ add %1, (xmm_regs_used-6)*16+16
+ %endif
+%endmacro
+
+%macro WIN64_RESTORE_XMM 1
+ WIN64_RESTORE_XMM_INTERNAL %1
+ %assign stack_offset stack_offset-(xmm_regs_used-6)*16+16
+ %assign xmm_regs_used 0
+%endmacro
+
+%macro RET 0
+ WIN64_RESTORE_XMM_INTERNAL rsp
+ POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7
+ ret
+%endmacro
+
+%macro REP_RET 0
+ %if regs_used > 7 || xmm_regs_used > 6
+ RET
+ %else
+ rep ret
+ %endif
+%endmacro
+
+%elif ARCH_X86_64 ; *nix x64 ;=============================================
+
+DECLARE_REG 0, rdi, edi, di, dil
+DECLARE_REG 1, rsi, esi, si, sil
+DECLARE_REG 2, rdx, edx, dx, dl
+DECLARE_REG 3, rcx, ecx, cx, cl
+DECLARE_REG 4, R8, R8D, R8W, R8B
+DECLARE_REG 5, R9, R9D, R9W, R9B
+DECLARE_REG 6, rax, eax, ax, al, 8
+DECLARE_REG 7, R10, R10D, R10W, R10B, 16
+DECLARE_REG 8, R11, R11D, R11W, R11B, 24
+DECLARE_REG 9, rbx, ebx, bx, bl, 32
+DECLARE_REG 10, rbp, ebp, bp, bpl, 40
+DECLARE_REG 11, R12, R12D, R12W, R12B, 48
+DECLARE_REG 12, R13, R13D, R13W, R13B, 56
+DECLARE_REG 13, R14, R14D, R14W, R14B, 64
+DECLARE_REG 14, R15, R15D, R15W, R15B, 72
+
+%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
+ %assign num_args %1
+ %assign regs_used %2
+ ASSERT regs_used >= num_args
+ ASSERT regs_used <= 15
+ PUSH_IF_USED 9, 10, 11, 12, 13, 14
+ LOAD_IF_USED 6, 7, 8, 9, 10, 11, 12, 13, 14
+ DEFINE_ARGS %4
+%endmacro
+
+%macro RET 0
+ POP_IF_USED 14, 13, 12, 11, 10, 9
+ ret
+%endmacro
+
+%macro REP_RET 0
+ %if regs_used > 9
+ RET
+ %else
+ rep ret
+ %endif
+%endmacro
+
+%else ; X86_32 ;==============================================================
+
+DECLARE_REG 0, eax, eax, ax, al, 4
+DECLARE_REG 1, ecx, ecx, cx, cl, 8
+DECLARE_REG 2, edx, edx, dx, dl, 12
+DECLARE_REG 3, ebx, ebx, bx, bl, 16
+DECLARE_REG 4, esi, esi, si, null, 20
+DECLARE_REG 5, edi, edi, di, null, 24
+DECLARE_REG 6, ebp, ebp, bp, null, 28
+%define rsp esp
+
+%macro DECLARE_ARG 1-*
+ %rep %0
+ %define r%1m [esp + stack_offset + 4*%1 + 4]
+ %define r%1mp dword r%1m
+ %rotate 1
+ %endrep
+%endmacro
+
+DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
+
+%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
+ %assign num_args %1
+ %assign regs_used %2
+ %if regs_used > 7
+ %assign regs_used 7
+ %endif
+ ASSERT regs_used >= num_args
+ PUSH_IF_USED 3, 4, 5, 6
+ LOAD_IF_USED 0, 1, 2, 3, 4, 5, 6
+ DEFINE_ARGS %4
+%endmacro
+
+%macro RET 0
+ POP_IF_USED 6, 5, 4, 3
+ ret
+%endmacro
+
+%macro REP_RET 0
+ %if regs_used > 3
+ RET
+ %else
+ rep ret
+ %endif
+%endmacro
+
+%endif ;======================================================================
+
+%if WIN64 == 0
+%macro WIN64_SPILL_XMM 1
+%endmacro
+%macro WIN64_RESTORE_XMM 1
+%endmacro
+%endif
+
+;=============================================================================
+; arch-independent part
+;=============================================================================
+
+%assign function_align 16
+
+; Begin a function.
+; Applies any symbol mangling needed for C linkage, and sets up a define such that
+; subsequent uses of the function name automatically refer to the mangled version.
+; Appends cpuflags to the function name if cpuflags has been specified.
+%macro cglobal 1-2+ ; name, [PROLOGUE args]
+%if %0 == 1
+ cglobal_internal %1 %+ SUFFIX
+%else
+ cglobal_internal %1 %+ SUFFIX, %2
+%endif
+%endmacro
+%macro cglobal_internal 1-2+
+ %ifndef cglobaled_%1
+ %xdefine %1 mangle(program_name %+ _ %+ %1)
+ %xdefine %1.skip_prologue %1 %+ .skip_prologue
+ CAT_XDEFINE cglobaled_, %1, 1
+ %endif
+ %xdefine current_function %1
+ %ifidn __OUTPUT_FORMAT__,elf
+ global %1:function hidden
+ %elifidn __OUTPUT_FORMAT__,elf32
+ global %1:function hidden
+ %elifidn __OUTPUT_FORMAT__,elf64
+ global %1:function hidden
+ %else
+ global %1
+ %endif
+ align function_align
+ %1:
+ RESET_MM_PERMUTATION ; not really needed, but makes disassembly somewhat nicer
+ %assign stack_offset 0
+ %if %0 > 1
+ PROLOGUE %2
+ %endif
+%endmacro
+
+%macro cextern 1
+ %xdefine %1 mangle(program_name %+ _ %+ %1)
+ CAT_XDEFINE cglobaled_, %1, 1
+ extern %1
+%endmacro
+
+; like cextern, but without the prefix
+%macro cextern_naked 1
+ %xdefine %1 mangle(%1)
+ CAT_XDEFINE cglobaled_, %1, 1
+ extern %1
+%endmacro
+
+%macro const 2+
+ %xdefine %1 mangle(program_name %+ _ %+ %1)
+ global %1
+ %1: %2
+%endmacro
+
+; This is needed for ELF, otherwise the GNU linker assumes the stack is
+; executable by default.
+%ifidn __OUTPUT_FORMAT__,elf
+SECTION .note.GNU-stack noalloc noexec nowrite progbits
+%elifidn __OUTPUT_FORMAT__,elf32
+SECTION .note.GNU-stack noalloc noexec nowrite progbits
+%elifidn __OUTPUT_FORMAT__,elf64
+SECTION .note.GNU-stack noalloc noexec nowrite progbits
+%endif
+
+; cpuflags
+
+%assign cpuflags_mmx (1<<0)
+%assign cpuflags_mmx2 (1<<1) | cpuflags_mmx
+%assign cpuflags_3dnow (1<<2) | cpuflags_mmx
+%assign cpuflags_3dnow2 (1<<3) | cpuflags_3dnow
+%assign cpuflags_sse (1<<4) | cpuflags_mmx2
+%assign cpuflags_sse2 (1<<5) | cpuflags_sse
+%assign cpuflags_sse2slow (1<<6) | cpuflags_sse2
+%assign cpuflags_sse3 (1<<7) | cpuflags_sse2
+%assign cpuflags_ssse3 (1<<8) | cpuflags_sse3
+%assign cpuflags_sse4 (1<<9) | cpuflags_ssse3
+%assign cpuflags_sse42 (1<<10)| cpuflags_sse4
+%assign cpuflags_avx (1<<11)| cpuflags_sse42
+%assign cpuflags_xop (1<<12)| cpuflags_avx
+%assign cpuflags_fma4 (1<<13)| cpuflags_avx
+
+%assign cpuflags_cache32 (1<<16)
+%assign cpuflags_cache64 (1<<17)
+%assign cpuflags_slowctz (1<<18)
+%assign cpuflags_lzcnt (1<<19)
+%assign cpuflags_misalign (1<<20)
+%assign cpuflags_aligned (1<<21) ; not a cpu feature, but a function variant
+%assign cpuflags_atom (1<<22)
+
+%define cpuflag(x) ((cpuflags & (cpuflags_ %+ x)) == (cpuflags_ %+ x))
+%define notcpuflag(x) ((cpuflags & (cpuflags_ %+ x)) != (cpuflags_ %+ x))
+
+; Takes up to 2 cpuflags from the above list.
+; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu.
+; You shouldn't need to invoke this macro directly, it's a subroutine for INIT_MMX &co.
+%macro INIT_CPUFLAGS 0-2
+ %if %0 >= 1
+ %xdefine cpuname %1
+ %assign cpuflags cpuflags_%1
+ %if %0 >= 2
+ %xdefine cpuname %1_%2
+ %assign cpuflags cpuflags | cpuflags_%2
+ %endif
+ %xdefine SUFFIX _ %+ cpuname
+ %if cpuflag(avx)
+ %assign avx_enabled 1
+ %endif
+ %if mmsize == 16 && notcpuflag(sse2)
+ %define mova movaps
+ %define movu movups
+ %define movnta movntps
+ %endif
+ %if cpuflag(aligned)
+ %define movu mova
+ %elifidn %1, sse3
+ %define movu lddqu
+ %endif
+ %else
+ %xdefine SUFFIX
+ %undef cpuname
+ %undef cpuflags
+ %endif
+%endmacro
+
+; merge mmx and sse*
+
+%macro CAT_XDEFINE 3
+ %xdefine %1%2 %3
+%endmacro
+
+%macro CAT_UNDEF 2
+ %undef %1%2
+%endmacro
+
+%macro INIT_MMX 0-1+
+ %assign avx_enabled 0
+ %define RESET_MM_PERMUTATION INIT_MMX %1
+ %define mmsize 8
+ %define num_mmregs 8
+ %define mova movq
+ %define movu movq
+ %define movh movd
+ %define movnta movntq
+ %assign %%i 0
+ %rep 8
+ CAT_XDEFINE m, %%i, mm %+ %%i
+ CAT_XDEFINE nmm, %%i, %%i
+ %assign %%i %%i+1
+ %endrep
+ %rep 8
+ CAT_UNDEF m, %%i
+ CAT_UNDEF nmm, %%i
+ %assign %%i %%i+1
+ %endrep
+ INIT_CPUFLAGS %1
+%endmacro
+
+%macro INIT_XMM 0-1+
+ %assign avx_enabled 0
+ %define RESET_MM_PERMUTATION INIT_XMM %1
+ %define mmsize 16
+ %define num_mmregs 8
+ %if ARCH_X86_64
+ %define num_mmregs 16
+ %endif
+ %define mova movdqa
+ %define movu movdqu
+ %define movh movq
+ %define movnta movntdq
+ %assign %%i 0
+ %rep num_mmregs
+ CAT_XDEFINE m, %%i, xmm %+ %%i
+ CAT_XDEFINE nxmm, %%i, %%i
+ %assign %%i %%i+1
+ %endrep
+ INIT_CPUFLAGS %1
+%endmacro
+
+; FIXME: INIT_AVX can be replaced by INIT_XMM avx
+%macro INIT_AVX 0
+ INIT_XMM
+ %assign avx_enabled 1
+ %define PALIGNR PALIGNR_SSSE3
+ %define RESET_MM_PERMUTATION INIT_AVX
+%endmacro
+
+%macro INIT_YMM 0-1+
+ %assign avx_enabled 1
+ %define RESET_MM_PERMUTATION INIT_YMM %1
+ %define mmsize 32
+ %define num_mmregs 8
+ %if ARCH_X86_64
+ %define num_mmregs 16
+ %endif
+ %define mova vmovaps
+ %define movu vmovups
+ %undef movh
+ %define movnta vmovntps
+ %assign %%i 0
+ %rep num_mmregs
+ CAT_XDEFINE m, %%i, ymm %+ %%i
+ CAT_XDEFINE nymm, %%i, %%i
+ %assign %%i %%i+1
+ %endrep
+ INIT_CPUFLAGS %1
+%endmacro
+
+INIT_XMM
+
+; I often want to use macros that permute their arguments. e.g. there's no
+; efficient way to implement butterfly or transpose or dct without swapping some
+; arguments.
+;
+; I would like to not have to manually keep track of the permutations:
+; If I insert a permutation in the middle of a function, it should automatically
+; change everything that follows. For more complex macros I may also have multiple
+; implementations, e.g. the SSE2 and SSSE3 versions may have different permutations.
+;
+; Hence these macros. Insert a PERMUTE or some SWAPs at the end of a macro that
+; permutes its arguments. It's equivalent to exchanging the contents of the
+; registers, except that this way you exchange the register names instead, so it
+; doesn't cost any cycles.
+
+%macro PERMUTE 2-* ; takes a list of pairs to swap
+%rep %0/2
+ %xdefine tmp%2 m%2
+ %xdefine ntmp%2 nm%2
+ %rotate 2
+%endrep
+%rep %0/2
+ %xdefine m%1 tmp%2
+ %xdefine nm%1 ntmp%2
+ %undef tmp%2
+ %undef ntmp%2
+ %rotate 2
+%endrep
+%endmacro
+
+%macro SWAP 2-* ; swaps a single chain (sometimes more concise than pairs)
+%rep %0-1
+%ifdef m%1
+ %xdefine tmp m%1
+ %xdefine m%1 m%2
+ %xdefine m%2 tmp
+ CAT_XDEFINE n, m%1, %1
+ CAT_XDEFINE n, m%2, %2
+%else
+ ; If we were called as "SWAP m0,m1" rather than "SWAP 0,1" infer the original numbers here.
+ ; Be careful using this mode in nested macros though, as in some cases there may be
+ ; other copies of m# that have already been dereferenced and don't get updated correctly.
+ %xdefine %%n1 n %+ %1
+ %xdefine %%n2 n %+ %2
+ %xdefine tmp m %+ %%n1
+ CAT_XDEFINE m, %%n1, m %+ %%n2
+ CAT_XDEFINE m, %%n2, tmp
+ CAT_XDEFINE n, m %+ %%n1, %%n1
+ CAT_XDEFINE n, m %+ %%n2, %%n2
+%endif
+ %undef tmp
+ %rotate 1
+%endrep
+%endmacro
+
+; If SAVE_MM_PERMUTATION is placed at the end of a function, then any later
+; calls to that function will automatically load the permutation, so values can
+; be returned in mmregs.
+%macro SAVE_MM_PERMUTATION 0-1
+ %if %0
+ %xdefine %%f %1_m
+ %else
+ %xdefine %%f current_function %+ _m
+ %endif
+ %assign %%i 0
+ %rep num_mmregs
+ CAT_XDEFINE %%f, %%i, m %+ %%i
+ %assign %%i %%i+1
+ %endrep
+%endmacro
+
+%macro LOAD_MM_PERMUTATION 1 ; name to load from
+ %ifdef %1_m0
+ %assign %%i 0
+ %rep num_mmregs
+ CAT_XDEFINE m, %%i, %1_m %+ %%i
+ CAT_XDEFINE n, m %+ %%i, %%i
+ %assign %%i %%i+1
+ %endrep
+ %endif
+%endmacro
+
+; Append cpuflags to the callee's name iff the appended name is known and the plain name isn't
+%macro call 1
+ call_internal %1, %1 %+ SUFFIX
+%endmacro
+%macro call_internal 2
+ %xdefine %%i %1
+ %ifndef cglobaled_%1
+ %ifdef cglobaled_%2
+ %xdefine %%i %2
+ %endif
+ %endif
+ call %%i
+ LOAD_MM_PERMUTATION %%i
+%endmacro
+
+; Substitutions that reduce instruction size but are functionally equivalent
+%macro add 2
+ %ifnum %2
+ %if %2==128
+ sub %1, -128
+ %else
+ add %1, %2
+ %endif
+ %else
+ add %1, %2
+ %endif
+%endmacro
+
+%macro sub 2
+ %ifnum %2
+ %if %2==128
+ add %1, -128
+ %else
+ sub %1, %2
+ %endif
+ %else
+ sub %1, %2
+ %endif
+%endmacro
+
+;=============================================================================
+; AVX abstraction layer
+;=============================================================================
+
+%assign i 0
+%rep 16
+ %if i < 8
+ CAT_XDEFINE sizeofmm, i, 8
+ %endif
+ CAT_XDEFINE sizeofxmm, i, 16
+ CAT_XDEFINE sizeofymm, i, 32
+%assign i i+1
+%endrep
+%undef i
+
+;%1 == instruction
+;%2 == 1 if float, 0 if int
+;%3 == 1 if 4-operand (xmm, xmm, xmm, imm), 0 if 2- or 3-operand (xmm, xmm, xmm)
+;%4 == number of operands given
+;%5+: operands
+%macro RUN_AVX_INSTR 6-7+
+ %ifid %5
+ %define %%size sizeof%5
+ %else
+ %define %%size mmsize
+ %endif
+ %if %%size==32
+ %if %0 >= 7
+ v%1 %5, %6, %7
+ %else
+ v%1 %5, %6
+ %endif
+ %else
+ %if %%size==8
+ %define %%regmov movq
+ %elif %2
+ %define %%regmov movaps
+ %else
+ %define %%regmov movdqa
+ %endif
+
+ %if %4>=3+%3
+ %ifnidn %5, %6
+ %if avx_enabled && sizeof%5==16
+ v%1 %5, %6, %7
+ %else
+ %%regmov %5, %6
+ %1 %5, %7
+ %endif
+ %else
+ %1 %5, %7
+ %endif
+ %elif %3
+ %1 %5, %6, %7
+ %else
+ %1 %5, %6
+ %endif
+ %endif
+%endmacro
+
+; 3arg AVX ops with a memory arg can only have it in src2,
+; whereas SSE emulation of 3arg prefers to have it in src1 (i.e. the mov).
+; So, if the op is symmetric and the wrong one is memory, swap them.
+%macro RUN_AVX_INSTR1 8
+ %assign %%swap 0
+ %if avx_enabled
+ %ifnid %6
+ %assign %%swap 1
+ %endif
+ %elifnidn %5, %6
+ %ifnid %7
+ %assign %%swap 1
+ %endif
+ %endif
+ %if %%swap && %3 == 0 && %8 == 1
+ RUN_AVX_INSTR %1, %2, %3, %4, %5, %7, %6
+ %else
+ RUN_AVX_INSTR %1, %2, %3, %4, %5, %6, %7
+ %endif
+%endmacro
+
+;%1 == instruction
+;%2 == 1 if float, 0 if int
+;%3 == 1 if 4-operand (xmm, xmm, xmm, imm), 0 if 3-operand (xmm, xmm, xmm)
+;%4 == 1 if symmetric (i.e. doesn't matter which src arg is which), 0 if not
+%macro AVX_INSTR 4
+ %macro %1 2-9 fnord, fnord, fnord, %1, %2, %3, %4
+ %ifidn %3, fnord
+ RUN_AVX_INSTR %6, %7, %8, 2, %1, %2
+ %elifidn %4, fnord
+ RUN_AVX_INSTR1 %6, %7, %8, 3, %1, %2, %3, %9
+ %elifidn %5, fnord
+ RUN_AVX_INSTR %6, %7, %8, 4, %1, %2, %3, %4
+ %else
+ RUN_AVX_INSTR %6, %7, %8, 5, %1, %2, %3, %4, %5
+ %endif
+ %endmacro
+%endmacro
+
+AVX_INSTR addpd, 1, 0, 1
+AVX_INSTR addps, 1, 0, 1
+AVX_INSTR addsd, 1, 0, 1
+AVX_INSTR addss, 1, 0, 1
+AVX_INSTR addsubpd, 1, 0, 0
+AVX_INSTR addsubps, 1, 0, 0
+AVX_INSTR andpd, 1, 0, 1
+AVX_INSTR andps, 1, 0, 1
+AVX_INSTR andnpd, 1, 0, 0
+AVX_INSTR andnps, 1, 0, 0
+AVX_INSTR blendpd, 1, 0, 0
+AVX_INSTR blendps, 1, 0, 0
+AVX_INSTR blendvpd, 1, 0, 0
+AVX_INSTR blendvps, 1, 0, 0
+AVX_INSTR cmppd, 1, 0, 0
+AVX_INSTR cmpps, 1, 0, 0
+AVX_INSTR cmpsd, 1, 0, 0
+AVX_INSTR cmpss, 1, 0, 0
+AVX_INSTR cvtdq2ps, 1, 0, 0
+AVX_INSTR cvtps2dq, 1, 0, 0
+AVX_INSTR divpd, 1, 0, 0
+AVX_INSTR divps, 1, 0, 0
+AVX_INSTR divsd, 1, 0, 0
+AVX_INSTR divss, 1, 0, 0
+AVX_INSTR dppd, 1, 1, 0
+AVX_INSTR dpps, 1, 1, 0
+AVX_INSTR haddpd, 1, 0, 0
+AVX_INSTR haddps, 1, 0, 0
+AVX_INSTR hsubpd, 1, 0, 0
+AVX_INSTR hsubps, 1, 0, 0
+AVX_INSTR maxpd, 1, 0, 1
+AVX_INSTR maxps, 1, 0, 1
+AVX_INSTR maxsd, 1, 0, 1
+AVX_INSTR maxss, 1, 0, 1
+AVX_INSTR minpd, 1, 0, 1
+AVX_INSTR minps, 1, 0, 1
+AVX_INSTR minsd, 1, 0, 1
+AVX_INSTR minss, 1, 0, 1
+AVX_INSTR movhlps, 1, 0, 0
+AVX_INSTR movlhps, 1, 0, 0
+AVX_INSTR movsd, 1, 0, 0
+AVX_INSTR movss, 1, 0, 0
+AVX_INSTR mpsadbw, 0, 1, 0
+AVX_INSTR mulpd, 1, 0, 1
+AVX_INSTR mulps, 1, 0, 1
+AVX_INSTR mulsd, 1, 0, 1
+AVX_INSTR mulss, 1, 0, 1
+AVX_INSTR orpd, 1, 0, 1
+AVX_INSTR orps, 1, 0, 1
+AVX_INSTR packsswb, 0, 0, 0
+AVX_INSTR packssdw, 0, 0, 0
+AVX_INSTR packuswb, 0, 0, 0
+AVX_INSTR packusdw, 0, 0, 0
+AVX_INSTR paddb, 0, 0, 1
+AVX_INSTR paddw, 0, 0, 1
+AVX_INSTR paddd, 0, 0, 1
+AVX_INSTR paddq, 0, 0, 1
+AVX_INSTR paddsb, 0, 0, 1
+AVX_INSTR paddsw, 0, 0, 1
+AVX_INSTR paddusb, 0, 0, 1
+AVX_INSTR paddusw, 0, 0, 1
+AVX_INSTR palignr, 0, 1, 0
+AVX_INSTR pand, 0, 0, 1
+AVX_INSTR pandn, 0, 0, 0
+AVX_INSTR pavgb, 0, 0, 1
+AVX_INSTR pavgw, 0, 0, 1
+AVX_INSTR pblendvb, 0, 0, 0
+AVX_INSTR pblendw, 0, 1, 0
+AVX_INSTR pcmpestri, 0, 0, 0
+AVX_INSTR pcmpestrm, 0, 0, 0
+AVX_INSTR pcmpistri, 0, 0, 0
+AVX_INSTR pcmpistrm, 0, 0, 0
+AVX_INSTR pcmpeqb, 0, 0, 1
+AVX_INSTR pcmpeqw, 0, 0, 1
+AVX_INSTR pcmpeqd, 0, 0, 1
+AVX_INSTR pcmpeqq, 0, 0, 1
+AVX_INSTR pcmpgtb, 0, 0, 0
+AVX_INSTR pcmpgtw, 0, 0, 0
+AVX_INSTR pcmpgtd, 0, 0, 0
+AVX_INSTR pcmpgtq, 0, 0, 0
+AVX_INSTR phaddw, 0, 0, 0
+AVX_INSTR phaddd, 0, 0, 0
+AVX_INSTR phaddsw, 0, 0, 0
+AVX_INSTR phsubw, 0, 0, 0
+AVX_INSTR phsubd, 0, 0, 0
+AVX_INSTR phsubsw, 0, 0, 0
+AVX_INSTR pmaddwd, 0, 0, 1
+AVX_INSTR pmaddubsw, 0, 0, 0
+AVX_INSTR pmaxsb, 0, 0, 1
+AVX_INSTR pmaxsw, 0, 0, 1
+AVX_INSTR pmaxsd, 0, 0, 1
+AVX_INSTR pmaxub, 0, 0, 1
+AVX_INSTR pmaxuw, 0, 0, 1
+AVX_INSTR pmaxud, 0, 0, 1
+AVX_INSTR pminsb, 0, 0, 1
+AVX_INSTR pminsw, 0, 0, 1
+AVX_INSTR pminsd, 0, 0, 1
+AVX_INSTR pminub, 0, 0, 1
+AVX_INSTR pminuw, 0, 0, 1
+AVX_INSTR pminud, 0, 0, 1
+AVX_INSTR pmulhuw, 0, 0, 1
+AVX_INSTR pmulhrsw, 0, 0, 1
+AVX_INSTR pmulhw, 0, 0, 1
+AVX_INSTR pmullw, 0, 0, 1
+AVX_INSTR pmulld, 0, 0, 1
+AVX_INSTR pmuludq, 0, 0, 1
+AVX_INSTR pmuldq, 0, 0, 1
+AVX_INSTR por, 0, 0, 1
+AVX_INSTR psadbw, 0, 0, 1
+AVX_INSTR pshufb, 0, 0, 0
+AVX_INSTR psignb, 0, 0, 0
+AVX_INSTR psignw, 0, 0, 0
+AVX_INSTR psignd, 0, 0, 0
+AVX_INSTR psllw, 0, 0, 0
+AVX_INSTR pslld, 0, 0, 0
+AVX_INSTR psllq, 0, 0, 0
+AVX_INSTR pslldq, 0, 0, 0
+AVX_INSTR psraw, 0, 0, 0
+AVX_INSTR psrad, 0, 0, 0
+AVX_INSTR psrlw, 0, 0, 0
+AVX_INSTR psrld, 0, 0, 0
+AVX_INSTR psrlq, 0, 0, 0
+AVX_INSTR psrldq, 0, 0, 0
+AVX_INSTR psubb, 0, 0, 0
+AVX_INSTR psubw, 0, 0, 0
+AVX_INSTR psubd, 0, 0, 0
+AVX_INSTR psubq, 0, 0, 0
+AVX_INSTR psubsb, 0, 0, 0
+AVX_INSTR psubsw, 0, 0, 0
+AVX_INSTR psubusb, 0, 0, 0
+AVX_INSTR psubusw, 0, 0, 0
+AVX_INSTR punpckhbw, 0, 0, 0
+AVX_INSTR punpckhwd, 0, 0, 0
+AVX_INSTR punpckhdq, 0, 0, 0
+AVX_INSTR punpckhqdq, 0, 0, 0
+AVX_INSTR punpcklbw, 0, 0, 0
+AVX_INSTR punpcklwd, 0, 0, 0
+AVX_INSTR punpckldq, 0, 0, 0
+AVX_INSTR punpcklqdq, 0, 0, 0
+AVX_INSTR pxor, 0, 0, 1
+AVX_INSTR shufps, 1, 1, 0
+AVX_INSTR subpd, 1, 0, 0
+AVX_INSTR subps, 1, 0, 0
+AVX_INSTR subsd, 1, 0, 0
+AVX_INSTR subss, 1, 0, 0
+AVX_INSTR unpckhpd, 1, 0, 0
+AVX_INSTR unpckhps, 1, 0, 0
+AVX_INSTR unpcklpd, 1, 0, 0
+AVX_INSTR unpcklps, 1, 0, 0
+AVX_INSTR xorpd, 1, 0, 1
+AVX_INSTR xorps, 1, 0, 1
+
+; 3DNow instructions, for sharing code between AVX, SSE and 3DN
+AVX_INSTR pfadd, 1, 0, 1
+AVX_INSTR pfsub, 1, 0, 0
+AVX_INSTR pfmul, 1, 0, 1
+
+; base-4 constants for shuffles
+%assign i 0
+%rep 256
+ %assign j ((i>>6)&3)*1000 + ((i>>4)&3)*100 + ((i>>2)&3)*10 + (i&3)
+ %if j < 10
+ CAT_XDEFINE q000, j, i
+ %elif j < 100
+ CAT_XDEFINE q00, j, i
+ %elif j < 1000
+ CAT_XDEFINE q0, j, i
+ %else
+ CAT_XDEFINE q, j, i
+ %endif
+%assign i i+1
+%endrep
+%undef i
+%undef j
+
+%macro FMA_INSTR 3
+ %macro %1 4-7 %1, %2, %3
+ %if cpuflag(xop)
+ v%5 %1, %2, %3, %4
+ %else
+ %6 %1, %2, %3
+ %7 %1, %4
+ %endif
+ %endmacro
+%endmacro
+
+FMA_INSTR pmacsdd, pmulld, paddd
+FMA_INSTR pmacsww, pmullw, paddw
+FMA_INSTR pmadcswd, pmaddwd, paddd
diff --git a/tools/all_builds.py b/tools/all_builds.py
new file mode 100755
index 0000000..d1f0c80
--- /dev/null
+++ b/tools/all_builds.py
@@ -0,0 +1,72 @@
+#!/usr/bin/python
+
+import getopt
+import subprocess
+import sys
+
+LONG_OPTIONS = ["shard=", "shards="]
+BASE_COMMAND = "./configure --enable-internal-stats --enable-experimental"
+
+def RunCommand(command):
+ run = subprocess.Popen(command, shell=True)
+ output = run.communicate()
+ if run.returncode:
+ print "Non-zero return code: " + str(run.returncode) + " => exiting!"
+ sys.exit(1)
+
+def list_of_experiments():
+ experiments = []
+ configure_file = open("configure")
+ list_start = False
+ for line in configure_file.read().split("\n"):
+ if line == 'EXPERIMENT_LIST="':
+ list_start = True
+ elif line == '"':
+ list_start = False
+ elif list_start:
+ currently_broken = ["csm"]
+ experiment = line[4:]
+ if experiment not in currently_broken:
+ experiments.append(experiment)
+ return experiments
+
+def main(argv):
+ # Parse arguments
+ options = {"--shard": 0, "--shards": 1}
+ if "--" in argv:
+ opt_end_index = argv.index("--")
+ else:
+ opt_end_index = len(argv)
+ try:
+ o, _ = getopt.getopt(argv[1:opt_end_index], None, LONG_OPTIONS)
+ except getopt.GetoptError, err:
+ print str(err)
+ print "Usage: %s [--shard=<n> --shards=<n>] -- [configure flag ...]"%argv[0]
+ sys.exit(2)
+
+ options.update(o)
+ extra_args = argv[opt_end_index + 1:]
+
+ # Shard experiment list
+ shard = int(options["--shard"])
+ shards = int(options["--shards"])
+ experiments = list_of_experiments()
+ base_command = " ".join([BASE_COMMAND] + extra_args)
+ configs = [base_command]
+ configs += ["%s --enable-%s" % (base_command, e) for e in experiments]
+ my_configs = zip(configs, range(len(configs)))
+ my_configs = filter(lambda x: x[1] % shards == shard, my_configs)
+ my_configs = [e[0] for e in my_configs]
+
+ # Run configs for this shard
+ for config in my_configs:
+ test_build(config)
+
+def test_build(configure_command):
+ print "\033[34m\033[47mTesting %s\033[0m" % (configure_command)
+ RunCommand(configure_command)
+ RunCommand("make clean")
+ RunCommand("make")
+
+if __name__ == "__main__":
+ main(sys.argv)
diff --git a/tools/cpplint.py b/tools/cpplint.py
new file mode 100755
index 0000000..526b955
--- /dev/null
+++ b/tools/cpplint.py
@@ -0,0 +1,3361 @@
+#!/usr/bin/python
+#
+# Copyright (c) 2009 Google Inc. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Here are some issues that I've had people identify in my code during reviews,
+# that I think are possible to flag automatically in a lint tool. If these were
+# caught by lint, it would save time both for myself and that of my reviewers.
+# Most likely, some of these are beyond the scope of the current lint framework,
+# but I think it is valuable to retain these wish-list items even if they cannot
+# be immediately implemented.
+#
+# Suggestions
+# -----------
+# - Check for no 'explicit' for multi-arg ctor
+# - Check for boolean assign RHS in parens
+# - Check for ctor initializer-list colon position and spacing
+# - Check that if there's a ctor, there should be a dtor
+# - Check accessors that return non-pointer member variables are
+# declared const
+# - Check accessors that return non-const pointer member vars are
+# *not* declared const
+# - Check for using public includes for testing
+# - Check for spaces between brackets in one-line inline method
+# - Check for no assert()
+# - Check for spaces surrounding operators
+# - Check for 0 in pointer context (should be NULL)
+# - Check for 0 in char context (should be '\0')
+# - Check for camel-case method name conventions for methods
+# that are not simple inline getters and setters
+# - Check that base classes have virtual destructors
+# put " // namespace" after } that closes a namespace, with
+# namespace's name after 'namespace' if it is named.
+# - Do not indent namespace contents
+# - Avoid inlining non-trivial constructors in header files
+# include base/basictypes.h if DISALLOW_EVIL_CONSTRUCTORS is used
+# - Check for old-school (void) cast for call-sites of functions
+# ignored return value
+# - Check gUnit usage of anonymous namespace
+# - Check for class declaration order (typedefs, consts, enums,
+# ctor(s?), dtor, friend declarations, methods, member vars)
+#
+
+"""Does google-lint on c++ files.
+
+The goal of this script is to identify places in the code that *may*
+be in non-compliance with google style. It does not attempt to fix
+up these problems -- the point is to educate. It does also not
+attempt to find all problems, or to ensure that everything it does
+find is legitimately a problem.
+
+In particular, we can get very confused by /* and // inside strings!
+We do a small hack, which is to ignore //'s with "'s after them on the
+same line, but it is far from perfect (in either direction).
+"""
+
+import codecs
+import getopt
+import math # for log
+import os
+import re
+import sre_compile
+import string
+import sys
+import unicodedata
+
+
+_USAGE = """
+Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
+ [--counting=total|toplevel|detailed]
+ <file> [file] ...
+
+ The style guidelines this tries to follow are those in
+ http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
+
+ Every problem is given a confidence score from 1-5, with 5 meaning we are
+ certain of the problem, and 1 meaning it could be a legitimate construct.
+ This will miss some errors, and is not a substitute for a code review.
+
+ To suppress false-positive errors of a certain category, add a
+ 'NOLINT(category)' comment to the line. NOLINT or NOLINT(*)
+ suppresses errors of all categories on that line.
+
+ The files passed in will be linted; at least one file must be provided.
+ Linted extensions are .cc, .cpp, and .h. Other file types will be ignored.
+
+ Flags:
+
+ output=vs7
+ By default, the output is formatted to ease emacs parsing. Visual Studio
+ compatible output (vs7) may also be used. Other formats are unsupported.
+
+ verbose=#
+ Specify a number 0-5 to restrict errors to certain verbosity levels.
+
+ filter=-x,+y,...
+ Specify a comma-separated list of category-filters to apply: only
+ error messages whose category names pass the filters will be printed.
+ (Category names are printed with the message and look like
+ "[whitespace/indent]".) Filters are evaluated left to right.
+ "-FOO" and "FOO" means "do not print categories that start with FOO".
+ "+FOO" means "do print categories that start with FOO".
+
+ Examples: --filter=-whitespace,+whitespace/braces
+ --filter=whitespace,runtime/printf,+runtime/printf_format
+ --filter=-,+build/include_what_you_use
+
+ To see a list of all the categories used in cpplint, pass no arg:
+ --filter=
+
+ counting=total|toplevel|detailed
+ The total number of errors found is always printed. If
+ 'toplevel' is provided, then the count of errors in each of
+ the top-level categories like 'build' and 'whitespace' will
+ also be printed. If 'detailed' is provided, then a count
+ is provided for each category like 'build/class'.
+"""
+
+# We categorize each error message we print. Here are the categories.
+# We want an explicit list so we can list them all in cpplint --filter=.
+# If you add a new error message with a new category, add it to the list
+# here! cpplint_unittest.py should tell you if you forget to do this.
+# \ used for clearer layout -- pylint: disable-msg=C6013
+_ERROR_CATEGORIES = [
+ 'build/class',
+ 'build/deprecated',
+ 'build/endif_comment',
+ 'build/explicit_make_pair',
+ 'build/forward_decl',
+ 'build/header_guard',
+ 'build/include',
+ 'build/include_alpha',
+ 'build/include_order',
+ 'build/include_what_you_use',
+ 'build/namespaces',
+ 'build/printf_format',
+ 'build/storage_class',
+ 'legal/copyright',
+ 'readability/braces',
+ 'readability/casting',
+ 'readability/check',
+ 'readability/constructors',
+ 'readability/fn_size',
+ 'readability/function',
+ 'readability/multiline_comment',
+ 'readability/multiline_string',
+ 'readability/nolint',
+ 'readability/streams',
+ 'readability/todo',
+ 'readability/utf8',
+ 'runtime/arrays',
+ 'runtime/casting',
+ 'runtime/explicit',
+ 'runtime/int',
+ 'runtime/init',
+ 'runtime/invalid_increment',
+ 'runtime/member_string_references',
+ 'runtime/memset',
+ 'runtime/operator',
+ 'runtime/printf',
+ 'runtime/printf_format',
+ 'runtime/references',
+ 'runtime/rtti',
+ 'runtime/sizeof',
+ 'runtime/string',
+ 'runtime/threadsafe_fn',
+ 'runtime/virtual',
+ 'whitespace/blank_line',
+ 'whitespace/braces',
+ 'whitespace/comma',
+ 'whitespace/comments',
+ 'whitespace/end_of_line',
+ 'whitespace/ending_newline',
+ 'whitespace/indent',
+ 'whitespace/labels',
+ 'whitespace/line_length',
+ 'whitespace/newline',
+ 'whitespace/operators',
+ 'whitespace/parens',
+ 'whitespace/semicolon',
+ 'whitespace/tab',
+ 'whitespace/todo'
+ ]
+
+# The default state of the category filter. This is overrided by the --filter=
+# flag. By default all errors are on, so only add here categories that should be
+# off by default (i.e., categories that must be enabled by the --filter= flags).
+# All entries here should start with a '-' or '+', as in the --filter= flag.
+_DEFAULT_FILTERS = ['-build/include_alpha']
+
+# We used to check for high-bit characters, but after much discussion we
+# decided those were OK, as long as they were in UTF-8 and didn't represent
+# hard-coded international strings, which belong in a separate i18n file.
+
+# Headers that we consider STL headers.
+_STL_HEADERS = frozenset([
+ 'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
+ 'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
+ 'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'new',
+ 'pair.h', 'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
+ 'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
+ 'utility', 'vector', 'vector.h',
+ ])
+
+
+# Non-STL C++ system headers.
+_CPP_HEADERS = frozenset([
+ 'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
+ 'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
+ 'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
+ 'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
+ 'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
+ 'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
+ 'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream',
+ 'istream.h', 'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
+ 'numeric', 'ostream', 'ostream.h', 'parsestream.h', 'pfstream.h',
+ 'PlotFile.h', 'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h',
+ 'ropeimpl.h', 'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
+ 'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string',
+ 'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray',
+ ])
+
+
+# Assertion macros. These are defined in base/logging.h and
+# testing/base/gunit.h. Note that the _M versions need to come first
+# for substring matching to work.
+_CHECK_MACROS = [
+ 'DCHECK', 'CHECK',
+ 'EXPECT_TRUE_M', 'EXPECT_TRUE',
+ 'ASSERT_TRUE_M', 'ASSERT_TRUE',
+ 'EXPECT_FALSE_M', 'EXPECT_FALSE',
+ 'ASSERT_FALSE_M', 'ASSERT_FALSE',
+ ]
+
+# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
+_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
+
+for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
+ ('>=', 'GE'), ('>', 'GT'),
+ ('<=', 'LE'), ('<', 'LT')]:
+ _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
+ _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
+ _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
+ _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
+ _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
+ _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
+
+for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
+ ('>=', 'LT'), ('>', 'LE'),
+ ('<=', 'GT'), ('<', 'GE')]:
+ _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
+ _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
+ _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
+ _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
+
+
+# These constants define types of headers for use with
+# _IncludeState.CheckNextIncludeOrder().
+_C_SYS_HEADER = 1
+_CPP_SYS_HEADER = 2
+_LIKELY_MY_HEADER = 3
+_POSSIBLE_MY_HEADER = 4
+_OTHER_HEADER = 5
+
+
+_regexp_compile_cache = {}
+
+# Finds occurrences of NOLINT or NOLINT(...).
+_RE_SUPPRESSION = re.compile(r'\bNOLINT\b(\([^)]*\))?')
+
+# {str, set(int)}: a map from error categories to sets of linenumbers
+# on which those errors are expected and should be suppressed.
+_error_suppressions = {}
+
+def ParseNolintSuppressions(filename, raw_line, linenum, error):
+ """Updates the global list of error-suppressions.
+
+ Parses any NOLINT comments on the current line, updating the global
+ error_suppressions store. Reports an error if the NOLINT comment
+ was malformed.
+
+ Args:
+ filename: str, the name of the input file.
+ raw_line: str, the line of input text, with comments.
+ linenum: int, the number of the current line.
+ error: function, an error handler.
+ """
+ # FIXME(adonovan): "NOLINT(" is misparsed as NOLINT(*).
+ matched = _RE_SUPPRESSION.search(raw_line)
+ if matched:
+ category = matched.group(1)
+ if category in (None, '(*)'): # => "suppress all"
+ _error_suppressions.setdefault(None, set()).add(linenum)
+ else:
+ if category.startswith('(') and category.endswith(')'):
+ category = category[1:-1]
+ if category in _ERROR_CATEGORIES:
+ _error_suppressions.setdefault(category, set()).add(linenum)
+ else:
+ error(filename, linenum, 'readability/nolint', 5,
+ 'Unknown NOLINT error category: %s' % category)
+
+
+def ResetNolintSuppressions():
+ "Resets the set of NOLINT suppressions to empty."
+ _error_suppressions.clear()
+
+
+def IsErrorSuppressedByNolint(category, linenum):
+ """Returns true if the specified error category is suppressed on this line.
+
+ Consults the global error_suppressions map populated by
+ ParseNolintSuppressions/ResetNolintSuppressions.
+
+ Args:
+ category: str, the category of the error.
+ linenum: int, the current line number.
+ Returns:
+ bool, True iff the error should be suppressed due to a NOLINT comment.
+ """
+ return (linenum in _error_suppressions.get(category, set()) or
+ linenum in _error_suppressions.get(None, set()))
+
+def Match(pattern, s):
+ """Matches the string with the pattern, caching the compiled regexp."""
+ # The regexp compilation caching is inlined in both Match and Search for
+ # performance reasons; factoring it out into a separate function turns out
+ # to be noticeably expensive.
+ if not pattern in _regexp_compile_cache:
+ _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
+ return _regexp_compile_cache[pattern].match(s)
+
+
+def Search(pattern, s):
+ """Searches the string for the pattern, caching the compiled regexp."""
+ if not pattern in _regexp_compile_cache:
+ _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
+ return _regexp_compile_cache[pattern].search(s)
+
+
+class _IncludeState(dict):
+ """Tracks line numbers for includes, and the order in which includes appear.
+
+ As a dict, an _IncludeState object serves as a mapping between include
+ filename and line number on which that file was included.
+
+ Call CheckNextIncludeOrder() once for each header in the file, passing
+ in the type constants defined above. Calls in an illegal order will
+ raise an _IncludeError with an appropriate error message.
+
+ """
+ # self._section will move monotonically through this set. If it ever
+ # needs to move backwards, CheckNextIncludeOrder will raise an error.
+ _INITIAL_SECTION = 0
+ _MY_H_SECTION = 1
+ _C_SECTION = 2
+ _CPP_SECTION = 3
+ _OTHER_H_SECTION = 4
+
+ _TYPE_NAMES = {
+ _C_SYS_HEADER: 'C system header',
+ _CPP_SYS_HEADER: 'C++ system header',
+ _LIKELY_MY_HEADER: 'header this file implements',
+ _POSSIBLE_MY_HEADER: 'header this file may implement',
+ _OTHER_HEADER: 'other header',
+ }
+ _SECTION_NAMES = {
+ _INITIAL_SECTION: "... nothing. (This can't be an error.)",
+ _MY_H_SECTION: 'a header this file implements',
+ _C_SECTION: 'C system header',
+ _CPP_SECTION: 'C++ system header',
+ _OTHER_H_SECTION: 'other header',
+ }
+
+ def __init__(self):
+ dict.__init__(self)
+ # The name of the current section.
+ self._section = self._INITIAL_SECTION
+ # The path of last found header.
+ self._last_header = ''
+
+ def CanonicalizeAlphabeticalOrder(self, header_path):
+ """Returns a path canonicalized for alphabetical comparison.
+
+ - replaces "-" with "_" so they both cmp the same.
+ - removes '-inl' since we don't require them to be after the main header.
+ - lowercase everything, just in case.
+
+ Args:
+ header_path: Path to be canonicalized.
+
+ Returns:
+ Canonicalized path.
+ """
+ return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
+
+ def IsInAlphabeticalOrder(self, header_path):
+ """Check if a header is in alphabetical order with the previous header.
+
+ Args:
+ header_path: Header to be checked.
+
+ Returns:
+ Returns true if the header is in alphabetical order.
+ """
+ canonical_header = self.CanonicalizeAlphabeticalOrder(header_path)
+ if self._last_header > canonical_header:
+ return False
+ self._last_header = canonical_header
+ return True
+
+ def CheckNextIncludeOrder(self, header_type):
+ """Returns a non-empty error message if the next header is out of order.
+
+ This function also updates the internal state to be ready to check
+ the next include.
+
+ Args:
+ header_type: One of the _XXX_HEADER constants defined above.
+
+ Returns:
+ The empty string if the header is in the right order, or an
+ error message describing what's wrong.
+
+ """
+ error_message = ('Found %s after %s' %
+ (self._TYPE_NAMES[header_type],
+ self._SECTION_NAMES[self._section]))
+
+ last_section = self._section
+
+ if header_type == _C_SYS_HEADER:
+ if self._section <= self._C_SECTION:
+ self._section = self._C_SECTION
+ else:
+ self._last_header = ''
+ return error_message
+ elif header_type == _CPP_SYS_HEADER:
+ if self._section <= self._CPP_SECTION:
+ self._section = self._CPP_SECTION
+ else:
+ self._last_header = ''
+ return error_message
+ elif header_type == _LIKELY_MY_HEADER:
+ if self._section <= self._MY_H_SECTION:
+ self._section = self._MY_H_SECTION
+ else:
+ self._section = self._OTHER_H_SECTION
+ elif header_type == _POSSIBLE_MY_HEADER:
+ if self._section <= self._MY_H_SECTION:
+ self._section = self._MY_H_SECTION
+ else:
+ # This will always be the fallback because we're not sure
+ # enough that the header is associated with this file.
+ self._section = self._OTHER_H_SECTION
+ else:
+ assert header_type == _OTHER_HEADER
+ self._section = self._OTHER_H_SECTION
+
+ if last_section != self._section:
+ self._last_header = ''
+
+ return ''
+
+
+class _CppLintState(object):
+ """Maintains module-wide state.."""
+
+ def __init__(self):
+ self.verbose_level = 1 # global setting.
+ self.error_count = 0 # global count of reported errors
+ # filters to apply when emitting error messages
+ self.filters = _DEFAULT_FILTERS[:]
+ self.counting = 'total' # In what way are we counting errors?
+ self.errors_by_category = {} # string to int dict storing error counts
+
+ # output format:
+ # "emacs" - format that emacs can parse (default)
+ # "vs7" - format that Microsoft Visual Studio 7 can parse
+ self.output_format = 'emacs'
+
+ def SetOutputFormat(self, output_format):
+ """Sets the output format for errors."""
+ self.output_format = output_format
+
+ def SetVerboseLevel(self, level):
+ """Sets the module's verbosity, and returns the previous setting."""
+ last_verbose_level = self.verbose_level
+ self.verbose_level = level
+ return last_verbose_level
+
+ def SetCountingStyle(self, counting_style):
+ """Sets the module's counting options."""
+ self.counting = counting_style
+
+ def SetFilters(self, filters):
+ """Sets the error-message filters.
+
+ These filters are applied when deciding whether to emit a given
+ error message.
+
+ Args:
+ filters: A string of comma-separated filters (eg "+whitespace/indent").
+ Each filter should start with + or -; else we die.
+
+ Raises:
+ ValueError: The comma-separated filters did not all start with '+' or '-'.
+ E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
+ """
+ # Default filters always have less priority than the flag ones.
+ self.filters = _DEFAULT_FILTERS[:]
+ for filt in filters.split(','):
+ clean_filt = filt.strip()
+ if clean_filt:
+ self.filters.append(clean_filt)
+ for filt in self.filters:
+ if not (filt.startswith('+') or filt.startswith('-')):
+ raise ValueError('Every filter in --filters must start with + or -'
+ ' (%s does not)' % filt)
+
+ def ResetErrorCounts(self):
+ """Sets the module's error statistic back to zero."""
+ self.error_count = 0
+ self.errors_by_category = {}
+
+ def IncrementErrorCount(self, category):
+ """Bumps the module's error statistic."""
+ self.error_count += 1
+ if self.counting in ('toplevel', 'detailed'):
+ if self.counting != 'detailed':
+ category = category.split('/')[0]
+ if category not in self.errors_by_category:
+ self.errors_by_category[category] = 0
+ self.errors_by_category[category] += 1
+
+ def PrintErrorCounts(self):
+ """Print a summary of errors by category, and the total."""
+ for category, count in self.errors_by_category.iteritems():
+ sys.stderr.write('Category \'%s\' errors found: %d\n' %
+ (category, count))
+ sys.stderr.write('Total errors found: %d\n' % self.error_count)
+
+_cpplint_state = _CppLintState()
+
+
+def _OutputFormat():
+ """Gets the module's output format."""
+ return _cpplint_state.output_format
+
+
+def _SetOutputFormat(output_format):
+ """Sets the module's output format."""
+ _cpplint_state.SetOutputFormat(output_format)
+
+
+def _VerboseLevel():
+ """Returns the module's verbosity setting."""
+ return _cpplint_state.verbose_level
+
+
+def _SetVerboseLevel(level):
+ """Sets the module's verbosity, and returns the previous setting."""
+ return _cpplint_state.SetVerboseLevel(level)
+
+
+def _SetCountingStyle(level):
+ """Sets the module's counting options."""
+ _cpplint_state.SetCountingStyle(level)
+
+
+def _Filters():
+ """Returns the module's list of output filters, as a list."""
+ return _cpplint_state.filters
+
+
+def _SetFilters(filters):
+ """Sets the module's error-message filters.
+
+ These filters are applied when deciding whether to emit a given
+ error message.
+
+ Args:
+ filters: A string of comma-separated filters (eg "whitespace/indent").
+ Each filter should start with + or -; else we die.
+ """
+ _cpplint_state.SetFilters(filters)
+
+
+class _FunctionState(object):
+ """Tracks current function name and the number of lines in its body."""
+
+ _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc.
+ _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER.
+
+ def __init__(self):
+ self.in_a_function = False
+ self.lines_in_function = 0
+ self.current_function = ''
+
+ def Begin(self, function_name):
+ """Start analyzing function body.
+
+ Args:
+ function_name: The name of the function being tracked.
+ """
+ self.in_a_function = True
+ self.lines_in_function = 0
+ self.current_function = function_name
+
+ def Count(self):
+ """Count line in current function body."""
+ if self.in_a_function:
+ self.lines_in_function += 1
+
+ def Check(self, error, filename, linenum):
+ """Report if too many lines in function body.
+
+ Args:
+ error: The function to call with any errors found.
+ filename: The name of the current file.
+ linenum: The number of the line to check.
+ """
+ if Match(r'T(EST|est)', self.current_function):
+ base_trigger = self._TEST_TRIGGER
+ else:
+ base_trigger = self._NORMAL_TRIGGER
+ trigger = base_trigger * 2**_VerboseLevel()
+
+ if self.lines_in_function > trigger:
+ error_level = int(math.log(self.lines_in_function / base_trigger, 2))
+ # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
+ if error_level > 5:
+ error_level = 5
+ error(filename, linenum, 'readability/fn_size', error_level,
+ 'Small and focused functions are preferred:'
+ ' %s has %d non-comment lines'
+ ' (error triggered by exceeding %d lines).' % (
+ self.current_function, self.lines_in_function, trigger))
+
+ def End(self):
+ """Stop analyzing function body."""
+ self.in_a_function = False
+
+
+class _IncludeError(Exception):
+ """Indicates a problem with the include order in a file."""
+ pass
+
+
+class FileInfo:
+ """Provides utility functions for filenames.
+
+ FileInfo provides easy access to the components of a file's path
+ relative to the project root.
+ """
+
+ def __init__(self, filename):
+ self._filename = filename
+
+ def FullName(self):
+ """Make Windows paths like Unix."""
+ return os.path.abspath(self._filename).replace('\\', '/')
+
+ def RepositoryName(self):
+ """FullName after removing the local path to the repository.
+
+ If we have a real absolute path name here we can try to do something smart:
+ detecting the root of the checkout and truncating /path/to/checkout from
+ the name so that we get header guards that don't include things like
+ "C:\Documents and Settings\..." or "/home/username/..." in them and thus
+ people on different computers who have checked the source out to different
+ locations won't see bogus errors.
+ """
+ fullname = self.FullName()
+
+ if os.path.exists(fullname):
+ project_dir = os.path.dirname(fullname)
+
+ if os.path.exists(os.path.join(project_dir, ".svn")):
+ # If there's a .svn file in the current directory, we recursively look
+ # up the directory tree for the top of the SVN checkout
+ root_dir = project_dir
+ one_up_dir = os.path.dirname(root_dir)
+ while os.path.exists(os.path.join(one_up_dir, ".svn")):
+ root_dir = os.path.dirname(root_dir)
+ one_up_dir = os.path.dirname(one_up_dir)
+
+ prefix = os.path.commonprefix([root_dir, project_dir])
+ return fullname[len(prefix) + 1:]
+
+ # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
+ # searching up from the current path.
+ root_dir = os.path.dirname(fullname)
+ while (root_dir != os.path.dirname(root_dir) and
+ not os.path.exists(os.path.join(root_dir, ".git")) and
+ not os.path.exists(os.path.join(root_dir, ".hg")) and
+ not os.path.exists(os.path.join(root_dir, ".svn"))):
+ root_dir = os.path.dirname(root_dir)
+
+ if (os.path.exists(os.path.join(root_dir, ".git")) or
+ os.path.exists(os.path.join(root_dir, ".hg")) or
+ os.path.exists(os.path.join(root_dir, ".svn"))):
+ prefix = os.path.commonprefix([root_dir, project_dir])
+ return fullname[len(prefix) + 1:]
+
+ # Don't know what to do; header guard warnings may be wrong...
+ return fullname
+
+ def Split(self):
+ """Splits the file into the directory, basename, and extension.
+
+ For 'chrome/browser/browser.cc', Split() would
+ return ('chrome/browser', 'browser', '.cc')
+
+ Returns:
+ A tuple of (directory, basename, extension).
+ """
+
+ googlename = self.RepositoryName()
+ project, rest = os.path.split(googlename)
+ return (project,) + os.path.splitext(rest)
+
+ def BaseName(self):
+ """File base name - text after the final slash, before the final period."""
+ return self.Split()[1]
+
+ def Extension(self):
+ """File extension - text following the final period."""
+ return self.Split()[2]
+
+ def NoExtension(self):
+ """File has no source file extension."""
+ return '/'.join(self.Split()[0:2])
+
+ def IsSource(self):
+ """File has a source file extension."""
+ return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
+
+
+def _ShouldPrintError(category, confidence, linenum):
+ """If confidence >= verbose, category passes filter and is not suppressed."""
+
+ # There are three ways we might decide not to print an error message:
+ # a "NOLINT(category)" comment appears in the source,
+ # the verbosity level isn't high enough, or the filters filter it out.
+ if IsErrorSuppressedByNolint(category, linenum):
+ return False
+ if confidence < _cpplint_state.verbose_level:
+ return False
+
+ is_filtered = False
+ for one_filter in _Filters():
+ if one_filter.startswith('-'):
+ if category.startswith(one_filter[1:]):
+ is_filtered = True
+ elif one_filter.startswith('+'):
+ if category.startswith(one_filter[1:]):
+ is_filtered = False
+ else:
+ assert False # should have been checked for in SetFilter.
+ if is_filtered:
+ return False
+
+ return True
+
+
+def Error(filename, linenum, category, confidence, message):
+ """Logs the fact we've found a lint error.
+
+ We log where the error was found, and also our confidence in the error,
+ that is, how certain we are this is a legitimate style regression, and
+ not a misidentification or a use that's sometimes justified.
+
+ False positives can be suppressed by the use of
+ "cpplint(category)" comments on the offending line. These are
+ parsed into _error_suppressions.
+
+ Args:
+ filename: The name of the file containing the error.
+ linenum: The number of the line containing the error.
+ category: A string used to describe the "category" this bug
+ falls under: "whitespace", say, or "runtime". Categories
+ may have a hierarchy separated by slashes: "whitespace/indent".
+ confidence: A number from 1-5 representing a confidence score for
+ the error, with 5 meaning that we are certain of the problem,
+ and 1 meaning that it could be a legitimate construct.
+ message: The error message.
+ """
+ if _ShouldPrintError(category, confidence, linenum):
+ _cpplint_state.IncrementErrorCount(category)
+ if _cpplint_state.output_format == 'vs7':
+ sys.stderr.write('%s(%s): %s [%s] [%d]\n' % (
+ filename, linenum, message, category, confidence))
+ else:
+ sys.stderr.write('%s:%s: %s [%s] [%d]\n' % (
+ filename, linenum, message, category, confidence))
+
+
+# Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
+_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
+ r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
+# Matches strings. Escape codes should already be removed by ESCAPES.
+_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
+# Matches characters. Escape codes should already be removed by ESCAPES.
+_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
+# Matches multi-line C++ comments.
+# This RE is a little bit more complicated than one might expect, because we
+# have to take care of space removals tools so we can handle comments inside
+# statements better.
+# The current rule is: We only clear spaces from both sides when we're at the
+# end of the line. Otherwise, we try to remove spaces from the right side,
+# if this doesn't work we try on left side but only if there's a non-character
+# on the right.
+_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
+ r"""(\s*/\*.*\*/\s*$|
+ /\*.*\*/\s+|
+ \s+/\*.*\*/(?=\W)|
+ /\*.*\*/)""", re.VERBOSE)
+
+
+def IsCppString(line):
+ """Does line terminate so, that the next symbol is in string constant.
+
+ This function does not consider single-line nor multi-line comments.
+
+ Args:
+ line: is a partial line of code starting from the 0..n.
+
+ Returns:
+ True, if next character appended to 'line' is inside a
+ string constant.
+ """
+
+ line = line.replace(r'\\', 'XX') # after this, \\" does not match to \"
+ return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
+
+
+def FindNextMultiLineCommentStart(lines, lineix):
+ """Find the beginning marker for a multiline comment."""
+ while lineix < len(lines):
+ if lines[lineix].strip().startswith('/*'):
+ # Only return this marker if the comment goes beyond this line
+ if lines[lineix].strip().find('*/', 2) < 0:
+ return lineix
+ lineix += 1
+ return len(lines)
+
+
+def FindNextMultiLineCommentEnd(lines, lineix):
+ """We are inside a comment, find the end marker."""
+ while lineix < len(lines):
+ if lines[lineix].strip().endswith('*/'):
+ return lineix
+ lineix += 1
+ return len(lines)
+
+
+def RemoveMultiLineCommentsFromRange(lines, begin, end):
+ """Clears a range of lines for multi-line comments."""
+ # Having // dummy comments makes the lines non-empty, so we will not get
+ # unnecessary blank line warnings later in the code.
+ for i in range(begin, end):
+ lines[i] = '// dummy'
+
+
+def RemoveMultiLineComments(filename, lines, error):
+ """Removes multiline (c-style) comments from lines."""
+ lineix = 0
+ while lineix < len(lines):
+ lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
+ if lineix_begin >= len(lines):
+ return
+ lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
+ if lineix_end >= len(lines):
+ error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
+ 'Could not find end of multi-line comment')
+ return
+ RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
+ lineix = lineix_end + 1
+
+
+def CleanseComments(line):
+ """Removes //-comments and single-line C-style /* */ comments.
+
+ Args:
+ line: A line of C++ source.
+
+ Returns:
+ The line with single-line comments removed.
+ """
+ commentpos = line.find('//')
+ if commentpos != -1 and not IsCppString(line[:commentpos]):
+ line = line[:commentpos].rstrip()
+ # get rid of /* ... */
+ return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
+
+
+class CleansedLines(object):
+ """Holds 3 copies of all lines with different preprocessing applied to them.
+
+ 1) elided member contains lines without strings and comments,
+ 2) lines member contains lines without comments, and
+ 3) raw member contains all the lines without processing.
+ All these three members are of <type 'list'>, and of the same length.
+ """
+
+ def __init__(self, lines):
+ self.elided = []
+ self.lines = []
+ self.raw_lines = lines
+ self.num_lines = len(lines)
+ for linenum in range(len(lines)):
+ self.lines.append(CleanseComments(lines[linenum]))
+ elided = self._CollapseStrings(lines[linenum])
+ self.elided.append(CleanseComments(elided))
+
+ def NumLines(self):
+ """Returns the number of lines represented."""
+ return self.num_lines
+
+ @staticmethod
+ def _CollapseStrings(elided):
+ """Collapses strings and chars on a line to simple "" or '' blocks.
+
+ We nix strings first so we're not fooled by text like '"http://"'
+
+ Args:
+ elided: The line being processed.
+
+ Returns:
+ The line with collapsed strings.
+ """
+ if not _RE_PATTERN_INCLUDE.match(elided):
+ # Remove escaped characters first to make quote/single quote collapsing
+ # basic. Things that look like escaped characters shouldn't occur
+ # outside of strings and chars.
+ elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
+ elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
+ elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
+ return elided
+
+
+def CloseExpression(clean_lines, linenum, pos):
+ """If input points to ( or { or [, finds the position that closes it.
+
+ If lines[linenum][pos] points to a '(' or '{' or '[', finds the
+ linenum/pos that correspond to the closing of the expression.
+
+ Args:
+ clean_lines: A CleansedLines instance containing the file.
+ linenum: The number of the line to check.
+ pos: A position on the line.
+
+ Returns:
+ A tuple (line, linenum, pos) pointer *past* the closing brace, or
+ (line, len(lines), -1) if we never find a close. Note we ignore
+ strings and comments when matching; and the line we return is the
+ 'cleansed' line at linenum.
+ """
+
+ line = clean_lines.elided[linenum]
+ startchar = line[pos]
+ if startchar not in '({[':
+ return (line, clean_lines.NumLines(), -1)
+ if startchar == '(': endchar = ')'
+ if startchar == '[': endchar = ']'
+ if startchar == '{': endchar = '}'
+
+ num_open = line.count(startchar) - line.count(endchar)
+ while linenum < clean_lines.NumLines() and num_open > 0:
+ linenum += 1
+ line = clean_lines.elided[linenum]
+ num_open += line.count(startchar) - line.count(endchar)
+ # OK, now find the endchar that actually got us back to even
+ endpos = len(line)
+ while num_open >= 0:
+ endpos = line.rfind(')', 0, endpos)
+ num_open -= 1 # chopped off another )
+ return (line, linenum, endpos + 1)
+
+
+def CheckForCopyright(filename, lines, error):
+ """Logs an error if no Copyright message appears at the top of the file."""
+
+ # We'll say it should occur by line 10. Don't forget there's a
+ # dummy line at the front.
+ for line in xrange(1, min(len(lines), 11)):
+ if re.search(r'Copyright', lines[line], re.I): break
+ else: # means no copyright line was found
+ error(filename, 0, 'legal/copyright', 5,
+ 'No copyright message found. '
+ 'You should have a line: "Copyright [year] <Copyright Owner>"')
+
+
+def GetHeaderGuardCPPVariable(filename):
+ """Returns the CPP variable that should be used as a header guard.
+
+ Args:
+ filename: The name of a C++ header file.
+
+ Returns:
+ The CPP variable that should be used as a header guard in the
+ named file.
+
+ """
+
+ # Restores original filename in case that cpplint is invoked from Emacs's
+ # flymake.
+ filename = re.sub(r'_flymake\.h$', '.h', filename)
+
+ fileinfo = FileInfo(filename)
+ return re.sub(r'[-./\s]', '_', fileinfo.RepositoryName()).upper() + '_'
+
+
+def CheckForHeaderGuard(filename, lines, error):
+ """Checks that the file contains a header guard.
+
+ Logs an error if no #ifndef header guard is present. For other
+ headers, checks that the full pathname is used.
+
+ Args:
+ filename: The name of the C++ header file.
+ lines: An array of strings, each representing a line of the file.
+ error: The function to call with any errors found.
+ """
+
+ cppvar = GetHeaderGuardCPPVariable(filename)
+
+ ifndef = None
+ ifndef_linenum = 0
+ define = None
+ endif = None
+ endif_linenum = 0
+ for linenum, line in enumerate(lines):
+ linesplit = line.split()
+ if len(linesplit) >= 2:
+ # find the first occurrence of #ifndef and #define, save arg
+ if not ifndef and linesplit[0] == '#ifndef':
+ # set ifndef to the header guard presented on the #ifndef line.
+ ifndef = linesplit[1]
+ ifndef_linenum = linenum
+ if not define and linesplit[0] == '#define':
+ define = linesplit[1]
+ # find the last occurrence of #endif, save entire line
+ if line.startswith('#endif'):
+ endif = line
+ endif_linenum = linenum
+
+ if not ifndef:
+ error(filename, 0, 'build/header_guard', 5,
+ 'No #ifndef header guard found, suggested CPP variable is: %s' %
+ cppvar)
+ return
+
+ if not define:
+ error(filename, 0, 'build/header_guard', 5,
+ 'No #define header guard found, suggested CPP variable is: %s' %
+ cppvar)
+ return
+
+ # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
+ # for backward compatibility.
+ if ifndef != cppvar:
+ error_level = 0
+ if ifndef != cppvar + '_':
+ error_level = 5
+
+ ParseNolintSuppressions(filename, lines[ifndef_linenum], ifndef_linenum,
+ error)
+ error(filename, ifndef_linenum, 'build/header_guard', error_level,
+ '#ifndef header guard has wrong style, please use: %s' % cppvar)
+
+ if define != ifndef:
+ error(filename, 0, 'build/header_guard', 5,
+ '#ifndef and #define don\'t match, suggested CPP variable is: %s' %
+ cppvar)
+ return
+
+ if endif != ('#endif // %s' % cppvar):
+ error_level = 0
+ if endif != ('#endif // %s' % (cppvar + '_')):
+ error_level = 5
+
+ ParseNolintSuppressions(filename, lines[endif_linenum], endif_linenum,
+ error)
+ error(filename, endif_linenum, 'build/header_guard', error_level,
+ '#endif line should be "#endif // %s"' % cppvar)
+
+
+def CheckForUnicodeReplacementCharacters(filename, lines, error):
+ """Logs an error for each line containing Unicode replacement characters.
+
+ These indicate that either the file contained invalid UTF-8 (likely)
+ or Unicode replacement characters (which it shouldn't). Note that
+ it's possible for this to throw off line numbering if the invalid
+ UTF-8 occurred adjacent to a newline.
+
+ Args:
+ filename: The name of the current file.
+ lines: An array of strings, each representing a line of the file.
+ error: The function to call with any errors found.
+ """
+ for linenum, line in enumerate(lines):
+ if u'\ufffd' in line:
+ error(filename, linenum, 'readability/utf8', 5,
+ 'Line contains invalid UTF-8 (or Unicode replacement character).')
+
+
+def CheckForNewlineAtEOF(filename, lines, error):
+ """Logs an error if there is no newline char at the end of the file.
+
+ Args:
+ filename: The name of the current file.
+ lines: An array of strings, each representing a line of the file.
+ error: The function to call with any errors found.
+ """
+
+ # The array lines() was created by adding two newlines to the
+ # original file (go figure), then splitting on \n.
+ # To verify that the file ends in \n, we just have to make sure the
+ # last-but-two element of lines() exists and is empty.
+ if len(lines) < 3 or lines[-2]:
+ error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
+ 'Could not find a newline character at the end of the file.')
+
+
+def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
+ """Logs an error if we see /* ... */ or "..." that extend past one line.
+
+ /* ... */ comments are legit inside macros, for one line.
+ Otherwise, we prefer // comments, so it's ok to warn about the
+ other. Likewise, it's ok for strings to extend across multiple
+ lines, as long as a line continuation character (backslash)
+ terminates each line. Although not currently prohibited by the C++
+ style guide, it's ugly and unnecessary. We don't do well with either
+ in this lint program, so we warn about both.
+
+ Args:
+ filename: The name of the current file.
+ clean_lines: A CleansedLines instance containing the file.
+ linenum: The number of the line to check.
+ error: The function to call with any errors found.
+ """
+ line = clean_lines.elided[linenum]
+
+ # Remove all \\ (escaped backslashes) from the line. They are OK, and the
+ # second (escaped) slash may trigger later \" detection erroneously.
+ line = line.replace('\\\\', '')
+
+ if line.count('/*') > line.count('*/'):
+ error(filename, linenum, 'readability/multiline_comment', 5,
+ 'Complex multi-line /*...*/-style comment found. '
+ 'Lint may give bogus warnings. '
+ 'Consider replacing these with //-style comments, '
+ 'with #if 0...#endif, '
+ 'or with more clearly structured multi-line comments.')
+
+ if (line.count('"') - line.count('\\"')) % 2:
+ error(filename, linenum, 'readability/multiline_string', 5,
+ 'Multi-line string ("...") found. This lint script doesn\'t '
+ 'do well with such strings, and may give bogus warnings. They\'re '
+ 'ugly and unnecessary, and you should use concatenation instead".')
+
+
+threading_list = (
+ ('asctime(', 'asctime_r('),
+ ('ctime(', 'ctime_r('),
+ ('getgrgid(', 'getgrgid_r('),
+ ('getgrnam(', 'getgrnam_r('),
+ ('getlogin(', 'getlogin_r('),
+ ('getpwnam(', 'getpwnam_r('),
+ ('getpwuid(', 'getpwuid_r('),
+ ('gmtime(', 'gmtime_r('),
+ ('localtime(', 'localtime_r('),
+ ('rand(', 'rand_r('),
+ ('readdir(', 'readdir_r('),
+ ('strtok(', 'strtok_r('),
+ ('ttyname(', 'ttyname_r('),
+ )
+
+
+def CheckPosixThreading(filename, clean_lines, linenum, error):
+ """Checks for calls to thread-unsafe functions.
+
+ Much code has been originally written without consideration of
+ multi-threading. Also, engineers are relying on their old experience;
+ they have learned posix before threading extensions were added. These
+ tests guide the engineers to use thread-safe functions (when using
+ posix directly).
+
+ Args:
+ filename: The name of the current file.
+ clean_lines: A CleansedLines instance containing the file.
+ linenum: The number of the line to check.
+ error: The function to call with any errors found.
+ """
+ line = clean_lines.elided[linenum]
+ for single_thread_function, multithread_safe_function in threading_list:
+ ix = line.find(single_thread_function)
+ # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
+ if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
+ line[ix - 1] not in ('_', '.', '>'))):
+ error(filename, linenum, 'runtime/threadsafe_fn', 2,
+ 'Consider using ' + multithread_safe_function +
+ '...) instead of ' + single_thread_function +
+ '...) for improved thread safety.')
+
+
+# Matches invalid increment: *count++, which moves pointer instead of
+# incrementing a value.
+_RE_PATTERN_INVALID_INCREMENT = re.compile(
+ r'^\s*\*\w+(\+\+|--);')
+
+
+def CheckInvalidIncrement(filename, clean_lines, linenum, error):
+ """Checks for invalid increment *count++.
+
+ For example following function:
+ void increment_counter(int* count) {
+ *count++;
+ }
+ is invalid, because it effectively does count++, moving pointer, and should
+ be replaced with ++*count, (*count)++ or *count += 1.
+
+ Args:
+ filename: The name of the current file.
+ clean_lines: A CleansedLines instance containing the file.
+ linenum: The number of the line to check.
+ error: The function to call with any errors found.
+ """
+ line = clean_lines.elided[linenum]
+ if _RE_PATTERN_INVALID_INCREMENT.match(line):
+ error(filename, linenum, 'runtime/invalid_increment', 5,
+ 'Changing pointer instead of value (or unused value of operator*).')
+
+
+class _ClassInfo(object):
+ """Stores information about a class."""
+
+ def __init__(self, name, clean_lines, linenum):
+ self.name = name
+ self.linenum = linenum
+ self.seen_open_brace = False
+ self.is_derived = False
+ self.virtual_method_linenumber = None
+ self.has_virtual_destructor = False
+ self.brace_depth = 0
+
+ # Try to find the end of the class. This will be confused by things like:
+ # class A {
+ # } *x = { ...
+ #
+ # But it's still good enough for CheckSectionSpacing.
+ self.last_line = 0
+ depth = 0
+ for i in range(linenum, clean_lines.NumLines()):
+ line = clean_lines.lines[i]
+ depth += line.count('{') - line.count('}')
+ if not depth:
+ self.last_line = i
+ break
+
+
+class _ClassState(object):
+ """Holds the current state of the parse relating to class declarations.
+
+ It maintains a stack of _ClassInfos representing the parser's guess
+ as to the current nesting of class declarations. The innermost class
+ is at the top (back) of the stack. Typically, the stack will either
+ be empty or have exactly one entry.
+ """
+
+ def __init__(self):
+ self.classinfo_stack = []
+
+ def CheckFinished(self, filename, error):
+ """Checks that all classes have been completely parsed.
+
+ Call this when all lines in a file have been processed.
+ Args:
+ filename: The name of the current file.
+ error: The function to call with any errors found.
+ """
+ if self.classinfo_stack:
+ # Note: This test can result in false positives if #ifdef constructs
+ # get in the way of brace matching. See the testBuildClass test in
+ # cpplint_unittest.py for an example of this.
+ error(filename, self.classinfo_stack[0].linenum, 'build/class', 5,
+ 'Failed to find complete declaration of class %s' %
+ self.classinfo_stack[0].name)
+
+
+def CheckForNonStandardConstructs(filename, clean_lines, linenum,
+ class_state, error):
+ """Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
+
+ Complain about several constructs which gcc-2 accepts, but which are
+ not standard C++. Warning about these in lint is one way to ease the
+ transition to new compilers.
+ - put storage class first (e.g. "static const" instead of "const static").
+ - "%lld" instead of %qd" in printf-type functions.
+ - "%1$d" is non-standard in printf-type functions.
+ - "\%" is an undefined character escape sequence.
+ - text after #endif is not allowed.
+ - invalid inner-style forward declaration.
+ - >? and <? operators, and their >?= and <?= cousins.
+ - classes with virtual methods need virtual destructors (compiler warning
+ available, but not turned on yet.)
+
+ Additionally, check for constructor/destructor style violations and reference
+ members, as it is very convenient to do so while checking for
+ gcc-2 compliance.
+
+ Args:
+ filename: The name of the current file.
+ clean_lines: A CleansedLines instance containing the file.
+ linenum: The number of the line to check.
+ class_state: A _ClassState instance which maintains information about
+ the current stack of nested class declarations being parsed.
+ error: A callable to which errors are reported, which takes 4 arguments:
+ filename, line number, error level, and message
+ """
+
+ # Remove comments from the line, but leave in strings for now.
+ line = clean_lines.lines[linenum]
+
+ if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
+ error(filename, linenum, 'runtime/printf_format', 3,
+ '%q in format strings is deprecated. Use %ll instead.')
+
+ if Search(r'printf\s*\(.*".*%\d+\$', line):
+ error(filename, linenum, 'runtime/printf_format', 2,
+ '%N$ formats are unconventional. Try rewriting to avoid them.')
+
+ # Remove escaped backslashes before looking for undefined escapes.
+ line = line.replace('\\\\', '')
+
+ if Search(r'("|\').*\\(%|\[|\(|{)', line):
+ error(filename, linenum, 'build/printf_format', 3,
+ '%, [, (, and { are undefined character escapes. Unescape them.')
+
+ # For the rest, work with both comments and strings removed.
+ line = clean_lines.elided[linenum]
+
+ if Search(r'\b(const|volatile|void|char|short|int|long'
+ r'|float|double|signed|unsigned'
+ r'|schar|u?int8|u?int16|u?int32|u?int64)'
+ r'\s+(auto|register|static|extern|typedef)\b',
+ line):
+ error(filename, linenum, 'build/storage_class', 5,
+ 'Storage class (static, extern, typedef, etc) should be first.')
+
+ if Match(r'\s*#\s*endif\s*[^/\s]+', line):
+ error(filename, linenum, 'build/endif_comment', 5,
+ 'Uncommented text after #endif is non-standard. Use a comment.')
+
+ if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
+ error(filename, linenum, 'build/forward_decl', 5,
+ 'Inner-style forward declarations are invalid. Remove this line.')
+
+ if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
+ line):
+ error(filename, linenum, 'build/deprecated', 3,
+ '>? and <? (max and min) operators are non-standard and deprecated.')
+
+ if Search(r'^\s*const\s*string\s*&\s*\w+\s*;', line):
+ # TODO(unknown): Could it be expanded safely to arbitrary references,
+ # without triggering too many false positives? The first
+ # attempt triggered 5 warnings for mostly benign code in the regtest, hence
+ # the restriction.
+ # Here's the original regexp, for the reference:
+ # type_name = r'\w+((\s*::\s*\w+)|(\s*<\s*\w+?\s*>))?'
+ # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;'
+ error(filename, linenum, 'runtime/member_string_references', 2,
+ 'const string& members are dangerous. It is much better to use '
+ 'alternatives, such as pointers or simple constants.')
+
+ # Track class entry and exit, and attempt to find cases within the
+ # class declaration that don't meet the C++ style
+ # guidelines. Tracking is very dependent on the code matching Google
+ # style guidelines, but it seems to perform well enough in testing
+ # to be a worthwhile addition to the checks.
+ classinfo_stack = class_state.classinfo_stack
+ # Look for a class declaration. The regexp accounts for decorated classes
+ # such as in:
+ # class LOCKABLE API Object {
+ # };
+ class_decl_match = Match(
+ r'\s*(template\s*<[\w\s<>,:]*>\s*)?'
+ '(class|struct)\s+([A-Z_]+\s+)*(\w+(::\w+)*)', line)
+ if class_decl_match:
+ classinfo_stack.append(_ClassInfo(
+ class_decl_match.group(4), clean_lines, linenum))
+
+ # Everything else in this function uses the top of the stack if it's
+ # not empty.
+ if not classinfo_stack:
+ return
+
+ classinfo = classinfo_stack[-1]
+
+ # If the opening brace hasn't been seen look for it and also
+ # parent class declarations.
+ if not classinfo.seen_open_brace:
+ # If the line has a ';' in it, assume it's a forward declaration or
+ # a single-line class declaration, which we won't process.
+ if line.find(';') != -1:
+ classinfo_stack.pop()
+ return
+ classinfo.seen_open_brace = (line.find('{') != -1)
+ # Look for a bare ':'
+ if Search('(^|[^:]):($|[^:])', line):
+ classinfo.is_derived = True
+ if not classinfo.seen_open_brace:
+ return # Everything else in this function is for after open brace
+
+ # The class may have been declared with namespace or classname qualifiers.
+ # The constructor and destructor will not have those qualifiers.
+ base_classname = classinfo.name.split('::')[-1]
+
+ # Look for single-argument constructors that aren't marked explicit.
+ # Technically a valid construct, but against style.
+ args = Match(r'\s+(?:inline\s+)?%s\s*\(([^,()]+)\)'
+ % re.escape(base_classname),
+ line)
+ if (args and
+ args.group(1) != 'void' and
+ not Match(r'(const\s+)?%s\s*(?:<\w+>\s*)?&' % re.escape(base_classname),
+ args.group(1).strip())):
+ error(filename, linenum, 'runtime/explicit', 5,
+ 'Single-argument constructors should be marked explicit.')
+
+ # Look for methods declared virtual.
+ if Search(r'\bvirtual\b', line):
+ classinfo.virtual_method_linenumber = linenum
+ # Only look for a destructor declaration on the same line. It would
+ # be extremely unlikely for the destructor declaration to occupy
+ # more than one line.
+ if Search(r'~%s\s*\(' % base_classname, line):
+ classinfo.has_virtual_destructor = True
+
+ # Look for class end.
+ brace_depth = classinfo.brace_depth
+ brace_depth = brace_depth + line.count('{') - line.count('}')
+ if brace_depth <= 0:
+ classinfo = classinfo_stack.pop()
+ # Try to detect missing virtual destructor declarations.
+ # For now, only warn if a non-derived class with virtual methods lacks
+ # a virtual destructor. This is to make it less likely that people will
+ # declare derived virtual destructors without declaring the base
+ # destructor virtual.
+ if ((classinfo.virtual_method_linenumber is not None) and
+ (not classinfo.has_virtual_destructor) and
+ (not classinfo.is_derived)): # Only warn for base classes
+ error(filename, classinfo.linenum, 'runtime/virtual', 4,
+ 'The class %s probably needs a virtual destructor due to '
+ 'having virtual method(s), one declared at line %d.'
+ % (classinfo.name, classinfo.virtual_method_linenumber))
+ else:
+ classinfo.brace_depth = brace_depth
+
+
+def CheckSpacingForFunctionCall(filename, line, linenum, error):
+ """Checks for the correctness of various spacing around function calls.
+
+ Args:
+ filename: The name of the current file.
+ line: The text of the line to check.
+ linenum: The number of the line to check.
+ error: The function to call with any errors found.
+ """
+
+ # Since function calls often occur inside if/for/while/switch
+ # expressions - which have their own, more liberal conventions - we
+ # first see if we should be looking inside such an expression for a
+ # function call, to which we can apply more strict standards.
+ fncall = line # if there's no control flow construct, look at whole line
+ for pattern in (r'\bif\s*\((.*)\)\s*{',
+ r'\bfor\s*\((.*)\)\s*{',
+ r'\bwhile\s*\((.*)\)\s*[{;]',
+ r'\bswitch\s*\((.*)\)\s*{'):
+ match = Search(pattern, line)
+ if match:
+ fncall = match.group(1) # look inside the parens for function calls
+ break
+
+ # Except in if/for/while/switch, there should never be space
+ # immediately inside parens (eg "f( 3, 4 )"). We make an exception
+ # for nested parens ( (a+b) + c ). Likewise, there should never be
+ # a space before a ( when it's a function argument. I assume it's a
+ # function argument when the char before the whitespace is legal in
+ # a function name (alnum + _) and we're not starting a macro. Also ignore
+ # pointers and references to arrays and functions coz they're too tricky:
+ # we use a very simple way to recognize these:
+ # " (something)(maybe-something)" or
+ # " (something)(maybe-something," or
+ # " (something)[something]"
+ # Note that we assume the contents of [] to be short enough that
+ # they'll never need to wrap.
+ if ( # Ignore control structures.
+ not Search(r'\b(if|for|while|switch|return|delete)\b', fncall) and
+ # Ignore pointers/references to functions.
+ not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
+ # Ignore pointers/references to arrays.
+ not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
+ if Search(r'\w\s*\(\s(?!\s*\\$)', fncall): # a ( used for a fn call
+ error(filename, linenum, 'whitespace/parens', 4,
+ 'Extra space after ( in function call')
+ elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
+ error(filename, linenum, 'whitespace/parens', 2,
+ 'Extra space after (')
+ if (Search(r'\w\s+\(', fncall) and
+ not Search(r'#\s*define|typedef', fncall)):
+ error(filename, linenum, 'whitespace/parens', 4,
+ 'Extra space before ( in function call')
+ # If the ) is followed only by a newline or a { + newline, assume it's
+ # part of a control statement (if/while/etc), and don't complain
+ if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
+ # If the closing parenthesis is preceded by only whitespaces,
+ # try to give a more descriptive error message.
+ if Search(r'^\s+\)', fncall):
+ error(filename, linenum, 'whitespace/parens', 2,
+ 'Closing ) should be moved to the previous line')
+ else:
+ error(filename, linenum, 'whitespace/parens', 2,
+ 'Extra space before )')
+
+
+def IsBlankLine(line):
+ """Returns true if the given line is blank.
+
+ We consider a line to be blank if the line is empty or consists of
+ only white spaces.
+
+ Args:
+ line: A line of a string.
+
+ Returns:
+ True, if the given line is blank.
+ """
+ return not line or line.isspace()
+
+
+def CheckForFunctionLengths(filename, clean_lines, linenum,
+ function_state, error):
+ """Reports for long function bodies.
+
+ For an overview why this is done, see:
+ http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
+
+ Uses a simplistic algorithm assuming other style guidelines
+ (especially spacing) are followed.
+ Only checks unindented functions, so class members are unchecked.
+ Trivial bodies are unchecked, so constructors with huge initializer lists
+ may be missed.
+ Blank/comment lines are not counted so as to avoid encouraging the removal
+ of vertical space and comments just to get through a lint check.
+ NOLINT *on the last line of a function* disables this check.
+
+ Args:
+ filename: The name of the current file.
+ clean_lines: A CleansedLines instance containing the file.
+ linenum: The number of the line to check.
+ function_state: Current function name and lines in body so far.
+ error: The function to call with any errors found.
+ """
+ lines = clean_lines.lines
+ line = lines[linenum]
+ raw = clean_lines.raw_lines
+ raw_line = raw[linenum]
+ joined_line = ''
+
+ starting_func = False
+ regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ...
+ match_result = Match(regexp, line)
+ if match_result:
+ # If the name is all caps and underscores, figure it's a macro and
+ # ignore it, unless it's TEST or TEST_F.
+ function_name = match_result.group(1).split()[-1]
+ if function_name == 'TEST' or function_name == 'TEST_F' or (
+ not Match(r'[A-Z_]+$', function_name)):
+ starting_func = True
+
+ if starting_func:
+ body_found = False
+ for start_linenum in xrange(linenum, clean_lines.NumLines()):
+ start_line = lines[start_linenum]
+ joined_line += ' ' + start_line.lstrip()
+ if Search(r'(;|})', start_line): # Declarations and trivial functions
+ body_found = True
+ break # ... ignore
+ elif Search(r'{', start_line):
+ body_found = True
+ function = Search(r'((\w|:)*)\(', line).group(1)
+ if Match(r'TEST', function): # Handle TEST... macros
+ parameter_regexp = Search(r'(\(.*\))', joined_line)
+ if parameter_regexp: # Ignore bad syntax
+ function += parameter_regexp.group(1)
+ else:
+ function += '()'
+ function_state.Begin(function)
+ break
+ if not body_found:
+ # No body for the function (or evidence of a non-function) was found.
+ error(filename, linenum, 'readability/fn_size', 5,
+ 'Lint failed to find start of function body.')
+ elif Match(r'^\}\s*$', line): # function end
+ function_state.Check(error, filename, linenum)
+ function_state.End()
+ elif not Match(r'^\s*$', line):
+ function_state.Count() # Count non-blank/non-comment lines.
+
+
+_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
+
+
+def CheckComment(comment, filename, linenum, error):
+ """Checks for common mistakes in TODO comments.
+
+ Args:
+ comment: The text of the comment from the line in question.
+ filename: The name of the current file.
+ linenum: The number of the line to check.
+ error: The function to call with any errors found.
+ """
+ match = _RE_PATTERN_TODO.match(comment)
+ if match:
+ # One whitespace is correct; zero whitespace is handled elsewhere.
+ leading_whitespace = match.group(1)
+ if len(leading_whitespace) > 1:
+ error(filename, linenum, 'whitespace/todo', 2,
+ 'Too many spaces before TODO')
+
+ username = match.group(2)
+ if not username:
+ error(filename, linenum, 'readability/todo', 2,
+ 'Missing username in TODO; it should look like '
+ '"// TODO(my_username): Stuff."')
+
+ middle_whitespace = match.group(3)
+ # Comparisons made explicit for correctness -- pylint: disable-msg=C6403
+ if middle_whitespace != ' ' and middle_whitespace != '':
+ error(filename, linenum, 'whitespace/todo', 2,
+ 'TODO(my_username) should be followed by a space')
+
+
+def CheckSpacing(filename, clean_lines, linenum, error):
+ """Checks for the correctness of various spacing issues in the code.
+
+ Things we check for: spaces around operators, spaces after
+ if/for/while/switch, no spaces around parens in function calls, two
+ spaces between code and comment, don't start a block with a blank
+ line, don't end a function with a blank line, don't add a blank line
+ after public/protected/private, don't have too many blank lines in a row.
+
+ Args:
+ filename: The name of the current file.
+ clean_lines: A CleansedLines instance containing the file.
+ linenum: The number of the line to check.
+ error: The function to call with any errors found.
+ """
+
+ raw = clean_lines.raw_lines
+ line = raw[linenum]
+
+ # Before nixing comments, check if the line is blank for no good
+ # reason. This includes the first line after a block is opened, and
+ # blank lines at the end of a function (ie, right before a line like '}'
+ if IsBlankLine(line):
+ elided = clean_lines.elided
+ prev_line = elided[linenum - 1]
+ prevbrace = prev_line.rfind('{')
+ # TODO(unknown): Don't complain if line before blank line, and line after,
+ # both start with alnums and are indented the same amount.
+ # This ignores whitespace at the start of a namespace block
+ # because those are not usually indented.
+ if (prevbrace != -1 and prev_line[prevbrace:].find('}') == -1
+ and prev_line[:prevbrace].find('namespace') == -1):
+ # OK, we have a blank line at the start of a code block. Before we
+ # complain, we check if it is an exception to the rule: The previous
+ # non-empty line has the parameters of a function header that are indented
+ # 4 spaces (because they did not fit in a 80 column line when placed on
+ # the same line as the function name). We also check for the case where
+ # the previous line is indented 6 spaces, which may happen when the
+ # initializers of a constructor do not fit into a 80 column line.
+ exception = False
+ if Match(r' {6}\w', prev_line): # Initializer list?
+ # We are looking for the opening column of initializer list, which
+ # should be indented 4 spaces to cause 6 space indentation afterwards.
+ search_position = linenum-2
+ while (search_position >= 0
+ and Match(r' {6}\w', elided[search_position])):
+ search_position -= 1
+ exception = (search_position >= 0
+ and elided[search_position][:5] == ' :')
+ else:
+ # Search for the function arguments or an initializer list. We use a
+ # simple heuristic here: If the line is indented 4 spaces; and we have a
+ # closing paren, without the opening paren, followed by an opening brace
+ # or colon (for initializer lists) we assume that it is the last line of
+ # a function header. If we have a colon indented 4 spaces, it is an
+ # initializer list.
+ exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
+ prev_line)
+ or Match(r' {4}:', prev_line))
+
+ if not exception:
+ error(filename, linenum, 'whitespace/blank_line', 2,
+ 'Blank line at the start of a code block. Is this needed?')
+ # This doesn't ignore whitespace at the end of a namespace block
+ # because that is too hard without pairing open/close braces;
+ # however, a special exception is made for namespace closing
+ # brackets which have a comment containing "namespace".
+ #
+ # Also, ignore blank lines at the end of a block in a long if-else
+ # chain, like this:
+ # if (condition1) {
+ # // Something followed by a blank line
+ #
+ # } else if (condition2) {
+ # // Something else
+ # }
+ if linenum + 1 < clean_lines.NumLines():
+ next_line = raw[linenum + 1]
+ if (next_line
+ and Match(r'\s*}', next_line)
+ and next_line.find('namespace') == -1
+ and next_line.find('} else ') == -1):
+ error(filename, linenum, 'whitespace/blank_line', 3,
+ 'Blank line at the end of a code block. Is this needed?')
+
+ matched = Match(r'\s*(public|protected|private):', prev_line)
+ if matched:
+ error(filename, linenum, 'whitespace/blank_line', 3,
+ 'Do not leave a blank line after "%s:"' % matched.group(1))
+
+ # Next, we complain if there's a comment too near the text
+ commentpos = line.find('//')
+ if commentpos != -1:
+ # Check if the // may be in quotes. If so, ignore it
+ # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
+ if (line.count('"', 0, commentpos) -
+ line.count('\\"', 0, commentpos)) % 2 == 0: # not in quotes
+ # Allow one space for new scopes, two spaces otherwise:
+ if (not Match(r'^\s*{ //', line) and
+ ((commentpos >= 1 and
+ line[commentpos-1] not in string.whitespace) or
+ (commentpos >= 2 and
+ line[commentpos-2] not in string.whitespace))):
+ error(filename, linenum, 'whitespace/comments', 2,
+ 'At least two spaces is best between code and comments')
+ # There should always be a space between the // and the comment
+ commentend = commentpos + 2
+ if commentend < len(line) and not line[commentend] == ' ':
+ # but some lines are exceptions -- e.g. if they're big
+ # comment delimiters like:
+ # //----------------------------------------------------------
+ # or are an empty C++ style Doxygen comment, like:
+ # ///
+ # or they begin with multiple slashes followed by a space:
+ # //////// Header comment
+ match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
+ Search(r'^/$', line[commentend:]) or
+ Search(r'^/+ ', line[commentend:]))
+ if not match:
+ error(filename, linenum, 'whitespace/comments', 4,
+ 'Should have a space between // and comment')
+ CheckComment(line[commentpos:], filename, linenum, error)
+
+ line = clean_lines.elided[linenum] # get rid of comments and strings
+
+ # Don't try to do spacing checks for operator methods
+ line = re.sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line)
+
+ # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
+ # Otherwise not. Note we only check for non-spaces on *both* sides;
+ # sometimes people put non-spaces on one side when aligning ='s among
+ # many lines (not that this is behavior that I approve of...)
+ if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if|while) ', line):
+ error(filename, linenum, 'whitespace/operators', 4,
+ 'Missing spaces around =')
+
+ # It's ok not to have spaces around binary operators like + - * /, but if
+ # there's too little whitespace, we get concerned. It's hard to tell,
+ # though, so we punt on this one for now. TODO.
+
+ # You should always have whitespace around binary operators.
+ # Alas, we can't test < or > because they're legitimately used sans spaces
+ # (a->b, vector<int> a). The only time we can tell is a < with no >, and
+ # only if it's not template params list spilling into the next line.
+ match = Search(r'[^<>=!\s](==|!=|<=|>=)[^<>=!\s]', line)
+ if not match:
+ # Note that while it seems that the '<[^<]*' term in the following
+ # regexp could be simplified to '<.*', which would indeed match
+ # the same class of strings, the [^<] means that searching for the
+ # regexp takes linear rather than quadratic time.
+ if not Search(r'<[^<]*,\s*$', line): # template params spill
+ match = Search(r'[^<>=!\s](<)[^<>=!\s]([^>]|->)*$', line)
+ if match:
+ error(filename, linenum, 'whitespace/operators', 3,
+ 'Missing spaces around %s' % match.group(1))
+ # We allow no-spaces around << and >> when used like this: 10<<20, but
+ # not otherwise (particularly, not when used as streams)
+ match = Search(r'[^0-9\s](<<|>>)[^0-9\s]', line)
+ if match:
+ error(filename, linenum, 'whitespace/operators', 3,
+ 'Missing spaces around %s' % match.group(1))
+
+ # There shouldn't be space around unary operators
+ match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
+ if match:
+ error(filename, linenum, 'whitespace/operators', 4,
+ 'Extra space for operator %s' % match.group(1))
+
+ # A pet peeve of mine: no spaces after an if, while, switch, or for
+ match = Search(r' (if\(|for\(|while\(|switch\()', line)
+ if match:
+ error(filename, linenum, 'whitespace/parens', 5,
+ 'Missing space before ( in %s' % match.group(1))
+
+ # For if/for/while/switch, the left and right parens should be
+ # consistent about how many spaces are inside the parens, and
+ # there should either be zero or one spaces inside the parens.
+ # We don't want: "if ( foo)" or "if ( foo )".
+ # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
+ match = Search(r'\b(if|for|while|switch)\s*'
+ r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
+ line)
+ if match:
+ if len(match.group(2)) != len(match.group(4)):
+ if not (match.group(3) == ';' and
+ len(match.group(2)) == 1 + len(match.group(4)) or
+ not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)):
+ error(filename, linenum, 'whitespace/parens', 5,
+ 'Mismatching spaces inside () in %s' % match.group(1))
+ if not len(match.group(2)) in [0, 1]:
+ error(filename, linenum, 'whitespace/parens', 5,
+ 'Should have zero or one spaces inside ( and ) in %s' %
+ match.group(1))
+
+ # You should always have a space after a comma (either as fn arg or operator)
+ if Search(r',[^\s]', line):
+ error(filename, linenum, 'whitespace/comma', 3,
+ 'Missing space after ,')
+
+ # You should always have a space after a semicolon
+ # except for few corner cases
+ # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
+ # space after ;
+ if Search(r';[^\s};\\)/]', line):
+ error(filename, linenum, 'whitespace/semicolon', 3,
+ 'Missing space after ;')
+
+ # Next we will look for issues with function calls.
+ CheckSpacingForFunctionCall(filename, line, linenum, error)
+
+ # Except after an opening paren, or after another opening brace (in case of
+ # an initializer list, for instance), you should have spaces before your
+ # braces. And since you should never have braces at the beginning of a line,
+ # this is an easy test.
+ if Search(r'[^ ({]{', line):
+ error(filename, linenum, 'whitespace/braces', 5,
+ 'Missing space before {')
+
+ # Make sure '} else {' has spaces.
+ if Search(r'}else', line):
+ error(filename, linenum, 'whitespace/braces', 5,
+ 'Missing space before else')
+
+ # You shouldn't have spaces before your brackets, except maybe after
+ # 'delete []' or 'new char * []'.
+ if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
+ error(filename, linenum, 'whitespace/braces', 5,
+ 'Extra space before [')
+
+ # You shouldn't have a space before a semicolon at the end of the line.
+ # There's a special case for "for" since the style guide allows space before
+ # the semicolon there.
+ if Search(r':\s*;\s*$', line):
+ error(filename, linenum, 'whitespace/semicolon', 5,
+ 'Semicolon defining empty statement. Use { } instead.')
+ elif Search(r'^\s*;\s*$', line):
+ error(filename, linenum, 'whitespace/semicolon', 5,
+ 'Line contains only semicolon. If this should be an empty statement, '
+ 'use { } instead.')
+ elif (Search(r'\s+;\s*$', line) and
+ not Search(r'\bfor\b', line)):
+ error(filename, linenum, 'whitespace/semicolon', 5,
+ 'Extra space before last semicolon. If this should be an empty '
+ 'statement, use { } instead.')
+
+
+def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error):
+ """Checks for additional blank line issues related to sections.
+
+ Currently the only thing checked here is blank line before protected/private.
+
+ Args:
+ filename: The name of the current file.
+ clean_lines: A CleansedLines instance containing the file.
+ class_info: A _ClassInfo objects.
+ linenum: The number of the line to check.
+ error: The function to call with any errors found.
+ """
+ # Skip checks if the class is small, where small means 25 lines or less.
+ # 25 lines seems like a good cutoff since that's the usual height of
+ # terminals, and any class that can't fit in one screen can't really
+ # be considered "small".
+ #
+ # Also skip checks if we are on the first line. This accounts for
+ # classes that look like
+ # class Foo { public: ... };
+ #
+ # If we didn't find the end of the class, last_line would be zero,
+ # and the check will be skipped by the first condition.
+ if (class_info.last_line - class_info.linenum <= 24 or
+ linenum <= class_info.linenum):
+ return
+
+ matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum])
+ if matched:
+ # Issue warning if the line before public/protected/private was
+ # not a blank line, but don't do this if the previous line contains
+ # "class" or "struct". This can happen two ways:
+ # - We are at the beginning of the class.
+ # - We are forward-declaring an inner class that is semantically
+ # private, but needed to be public for implementation reasons.
+ prev_line = clean_lines.lines[linenum - 1]
+ if (not IsBlankLine(prev_line) and
+ not Search(r'\b(class|struct)\b', prev_line)):
+ # Try a bit harder to find the beginning of the class. This is to
+ # account for multi-line base-specifier lists, e.g.:
+ # class Derived
+ # : public Base {
+ end_class_head = class_info.linenum
+ for i in range(class_info.linenum, linenum):
+ if Search(r'\{\s*$', clean_lines.lines[i]):
+ end_class_head = i
+ break
+ if end_class_head < linenum - 1:
+ error(filename, linenum, 'whitespace/blank_line', 3,
+ '"%s:" should be preceded by a blank line' % matched.group(1))
+
+
+def GetPreviousNonBlankLine(clean_lines, linenum):
+ """Return the most recent non-blank line and its line number.
+
+ Args:
+ clean_lines: A CleansedLines instance containing the file contents.
+ linenum: The number of the line to check.
+
+ Returns:
+ A tuple with two elements. The first element is the contents of the last
+ non-blank line before the current line, or the empty string if this is the
+ first non-blank line. The second is the line number of that line, or -1
+ if this is the first non-blank line.
+ """
+
+ prevlinenum = linenum - 1
+ while prevlinenum >= 0:
+ prevline = clean_lines.elided[prevlinenum]
+ if not IsBlankLine(prevline): # if not a blank line...
+ return (prevline, prevlinenum)
+ prevlinenum -= 1
+ return ('', -1)
+
+
+def CheckBraces(filename, clean_lines, linenum, error):
+ """Looks for misplaced braces (e.g. at the end of line).
+
+ Args:
+ filename: The name of the current file.
+ clean_lines: A CleansedLines instance containing the file.
+ linenum: The number of the line to check.
+ error: The function to call with any errors found.
+ """
+
+ line = clean_lines.elided[linenum] # get rid of comments and strings
+
+ if Match(r'\s*{\s*$', line):
+ # We allow an open brace to start a line in the case where someone
+ # is using braces in a block to explicitly create a new scope,
+ # which is commonly used to control the lifetime of
+ # stack-allocated variables. We don't detect this perfectly: we
+ # just don't complain if the last non-whitespace character on the
+ # previous non-blank line is ';', ':', '{', or '}'.
+ prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
+ if not Search(r'[;:}{]\s*$', prevline):
+ error(filename, linenum, 'whitespace/braces', 4,
+ '{ should almost always be at the end of the previous line')
+
+ # An else clause should be on the same line as the preceding closing brace.
+ if Match(r'\s*else\s*', line):
+ prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
+ if Match(r'\s*}\s*$', prevline):
+ error(filename, linenum, 'whitespace/newline', 4,
+ 'An else should appear on the same line as the preceding }')
+
+ # If braces come on one side of an else, they should be on both.
+ # However, we have to worry about "else if" that spans multiple lines!
+ if Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
+ if Search(r'}\s*else if([^{]*)$', line): # could be multi-line if
+ # find the ( after the if
+ pos = line.find('else if')
+ pos = line.find('(', pos)
+ if pos > 0:
+ (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
+ if endline[endpos:].find('{') == -1: # must be brace after if
+ error(filename, linenum, 'readability/braces', 5,
+ 'If an else has a brace on one side, it should have it on both')
+ else: # common case: else not followed by a multi-line if
+ error(filename, linenum, 'readability/braces', 5,
+ 'If an else has a brace on one side, it should have it on both')
+
+ # Likewise, an else should never have the else clause on the same line
+ if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
+ error(filename, linenum, 'whitespace/newline', 4,
+ 'Else clause should never be on same line as else (use 2 lines)')
+
+ # In the same way, a do/while should never be on one line
+ if Match(r'\s*do [^\s{]', line):
+ error(filename, linenum, 'whitespace/newline', 4,
+ 'do/while clauses should not be on a single line')
+
+ # Braces shouldn't be followed by a ; unless they're defining a struct
+ # or initializing an array.
+ # We can't tell in general, but we can for some common cases.
+ prevlinenum = linenum
+ while True:
+ (prevline, prevlinenum) = GetPreviousNonBlankLine(clean_lines, prevlinenum)
+ if Match(r'\s+{.*}\s*;', line) and not prevline.count(';'):
+ line = prevline + line
+ else:
+ break
+ if (Search(r'{.*}\s*;', line) and
+ line.count('{') == line.count('}') and
+ not Search(r'struct|class|enum|\s*=\s*{', line)):
+ error(filename, linenum, 'readability/braces', 4,
+ "You don't need a ; after a }")
+
+
+def ReplaceableCheck(operator, macro, line):
+ """Determine whether a basic CHECK can be replaced with a more specific one.
+
+ For example suggest using CHECK_EQ instead of CHECK(a == b) and
+ similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
+
+ Args:
+ operator: The C++ operator used in the CHECK.
+ macro: The CHECK or EXPECT macro being called.
+ line: The current source line.
+
+ Returns:
+ True if the CHECK can be replaced with a more specific one.
+ """
+
+ # This matches decimal and hex integers, strings, and chars (in that order).
+ match_constant = r'([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')'
+
+ # Expression to match two sides of the operator with something that
+ # looks like a literal, since CHECK(x == iterator) won't compile.
+ # This means we can't catch all the cases where a more specific
+ # CHECK is possible, but it's less annoying than dealing with
+ # extraneous warnings.
+ match_this = (r'\s*' + macro + r'\((\s*' +
+ match_constant + r'\s*' + operator + r'[^<>].*|'
+ r'.*[^<>]' + operator + r'\s*' + match_constant +
+ r'\s*\))')
+
+ # Don't complain about CHECK(x == NULL) or similar because
+ # CHECK_EQ(x, NULL) won't compile (requires a cast).
+ # Also, don't complain about more complex boolean expressions
+ # involving && or || such as CHECK(a == b || c == d).
+ return Match(match_this, line) and not Search(r'NULL|&&|\|\|', line)
+
+
+def CheckCheck(filename, clean_lines, linenum, error):
+ """Checks the use of CHECK and EXPECT macros.
+
+ Args:
+ filename: The name of the current file.
+ clean_lines: A CleansedLines instance containing the file.
+ linenum: The number of the line to check.
+ error: The function to call with any errors found.
+ """
+
+ # Decide the set of replacement macros that should be suggested
+ raw_lines = clean_lines.raw_lines
+ current_macro = ''
+ for macro in _CHECK_MACROS:
+ if raw_lines[linenum].find(macro) >= 0:
+ current_macro = macro
+ break
+ if not current_macro:
+ # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
+ return
+
+ line = clean_lines.elided[linenum] # get rid of comments and strings
+
+ # Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
+ for operator in ['==', '!=', '>=', '>', '<=', '<']:
+ if ReplaceableCheck(operator, current_macro, line):
+ error(filename, linenum, 'readability/check', 2,
+ 'Consider using %s instead of %s(a %s b)' % (
+ _CHECK_REPLACEMENT[current_macro][operator],
+ current_macro, operator))
+ break
+
+
+def GetLineWidth(line):
+ """Determines the width of the line in column positions.
+
+ Args:
+ line: A string, which may be a Unicode string.
+
+ Returns:
+ The width of the line in column positions, accounting for Unicode
+ combining characters and wide characters.
+ """
+ if isinstance(line, unicode):
+ width = 0
+ for uc in unicodedata.normalize('NFC', line):
+ if unicodedata.east_asian_width(uc) in ('W', 'F'):
+ width += 2
+ elif not unicodedata.combining(uc):
+ width += 1
+ return width
+ else:
+ return len(line)
+
+
+def CheckStyle(filename, clean_lines, linenum, file_extension, class_state,
+ error):
+ """Checks rules from the 'C++ style rules' section of cppguide.html.
+
+ Most of these rules are hard to test (naming, comment style), but we
+ do what we can. In particular we check for 2-space indents, line lengths,
+ tab usage, spaces inside code, etc.
+
+ Args:
+ filename: The name of the current file.
+ clean_lines: A CleansedLines instance containing the file.
+ linenum: The number of the line to check.
+ file_extension: The extension (without the dot) of the filename.
+ error: The function to call with any errors found.
+ """
+
+ raw_lines = clean_lines.raw_lines
+ line = raw_lines[linenum]
+
+ if line.find('\t') != -1:
+ error(filename, linenum, 'whitespace/tab', 1,
+ 'Tab found; better to use spaces')
+
+ # One or three blank spaces at the beginning of the line is weird; it's
+ # hard to reconcile that with 2-space indents.
+ # NOTE: here are the conditions rob pike used for his tests. Mine aren't
+ # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces
+ # if(RLENGTH > 20) complain = 0;
+ # if(match($0, " +(error|private|public|protected):")) complain = 0;
+ # if(match(prev, "&& *$")) complain = 0;
+ # if(match(prev, "\\|\\| *$")) complain = 0;
+ # if(match(prev, "[\",=><] *$")) complain = 0;
+ # if(match($0, " <<")) complain = 0;
+ # if(match(prev, " +for \\(")) complain = 0;
+ # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
+ initial_spaces = 0
+ cleansed_line = clean_lines.elided[linenum]
+ while initial_spaces < len(line) and line[initial_spaces] == ' ':
+ initial_spaces += 1
+ if line and line[-1].isspace():
+ error(filename, linenum, 'whitespace/end_of_line', 4,
+ 'Line ends in whitespace. Consider deleting these extra spaces.')
+ # There are certain situations we allow one space, notably for labels
+ elif ((initial_spaces == 1 or initial_spaces == 3) and
+ not Match(r'\s*\w+\s*:\s*$', cleansed_line)):
+ error(filename, linenum, 'whitespace/indent', 3,
+ 'Weird number of spaces at line-start. '
+ 'Are you using a 2-space indent?')
+ # Labels should always be indented at least one space.
+ elif not initial_spaces and line[:2] != '//' and Search(r'[^:]:\s*$',
+ line):
+ error(filename, linenum, 'whitespace/labels', 4,
+ 'Labels should always be indented at least one space. '
+ 'If this is a member-initializer list in a constructor or '
+ 'the base class list in a class definition, the colon should '
+ 'be on the following line.')
+
+
+ # Check if the line is a header guard.
+ is_header_guard = False
+ if file_extension == 'h':
+ cppvar = GetHeaderGuardCPPVariable(filename)
+ if (line.startswith('#ifndef %s' % cppvar) or
+ line.startswith('#define %s' % cppvar) or
+ line.startswith('#endif // %s' % cppvar)):
+ is_header_guard = True
+ # #include lines and header guards can be long, since there's no clean way to
+ # split them.
+ #
+ # URLs can be long too. It's possible to split these, but it makes them
+ # harder to cut&paste.
+ #
+ # The "$Id:...$" comment may also get very long without it being the
+ # developers fault.
+ if (not line.startswith('#include') and not is_header_guard and
+ not Match(r'^\s*//.*http(s?)://\S*$', line) and
+ not Match(r'^// \$Id:.*#[0-9]+ \$$', line)):
+ line_width = GetLineWidth(line)
+ if line_width > 100:
+ error(filename, linenum, 'whitespace/line_length', 4,
+ 'Lines should very rarely be longer than 100 characters')
+ elif line_width > 80:
+ error(filename, linenum, 'whitespace/line_length', 2,
+ 'Lines should be <= 80 characters long')
+
+ if (cleansed_line.count(';') > 1 and
+ # for loops are allowed two ;'s (and may run over two lines).
+ cleansed_line.find('for') == -1 and
+ (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
+ GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
+ # It's ok to have many commands in a switch case that fits in 1 line
+ not ((cleansed_line.find('case ') != -1 or
+ cleansed_line.find('default:') != -1) and
+ cleansed_line.find('break;') != -1)):
+ error(filename, linenum, 'whitespace/newline', 4,
+ 'More than one command on the same line')
+
+ # Some more style checks
+ CheckBraces(filename, clean_lines, linenum, error)
+ CheckSpacing(filename, clean_lines, linenum, error)
+ CheckCheck(filename, clean_lines, linenum, error)
+ if class_state and class_state.classinfo_stack:
+ CheckSectionSpacing(filename, clean_lines,
+ class_state.classinfo_stack[-1], linenum, error)
+
+
+_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
+_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
+# Matches the first component of a filename delimited by -s and _s. That is:
+# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
+# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
+# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
+# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
+_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
+
+
+def _DropCommonSuffixes(filename):
+ """Drops common suffixes like _test.cc or -inl.h from filename.
+
+ For example:
+ >>> _DropCommonSuffixes('foo/foo-inl.h')
+ 'foo/foo'
+ >>> _DropCommonSuffixes('foo/bar/foo.cc')
+ 'foo/bar/foo'
+ >>> _DropCommonSuffixes('foo/foo_internal.h')
+ 'foo/foo'
+ >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
+ 'foo/foo_unusualinternal'
+
+ Args:
+ filename: The input filename.
+
+ Returns:
+ The filename with the common suffix removed.
+ """
+ for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
+ 'inl.h', 'impl.h', 'internal.h'):
+ if (filename.endswith(suffix) and len(filename) > len(suffix) and
+ filename[-len(suffix) - 1] in ('-', '_')):
+ return filename[:-len(suffix) - 1]
+ return os.path.splitext(filename)[0]
+
+
+def _IsTestFilename(filename):
+ """Determines if the given filename has a suffix that identifies it as a test.
+
+ Args:
+ filename: The input filename.
+
+ Returns:
+ True if 'filename' looks like a test, False otherwise.
+ """
+ if (filename.endswith('_test.cc') or
+ filename.endswith('_unittest.cc') or
+ filename.endswith('_regtest.cc')):
+ return True
+ else:
+ return False
+
+
+def _ClassifyInclude(fileinfo, include, is_system):
+ """Figures out what kind of header 'include' is.
+
+ Args:
+ fileinfo: The current file cpplint is running over. A FileInfo instance.
+ include: The path to a #included file.
+ is_system: True if the #include used <> rather than "".
+
+ Returns:
+ One of the _XXX_HEADER constants.
+
+ For example:
+ >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
+ _C_SYS_HEADER
+ >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
+ _CPP_SYS_HEADER
+ >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
+ _LIKELY_MY_HEADER
+ >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
+ ... 'bar/foo_other_ext.h', False)
+ _POSSIBLE_MY_HEADER
+ >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
+ _OTHER_HEADER
+ """
+ # This is a list of all standard c++ header files, except
+ # those already checked for above.
+ is_stl_h = include in _STL_HEADERS
+ is_cpp_h = is_stl_h or include in _CPP_HEADERS
+
+ if is_system:
+ if is_cpp_h:
+ return _CPP_SYS_HEADER
+ else:
+ return _C_SYS_HEADER
+
+ # If the target file and the include we're checking share a
+ # basename when we drop common extensions, and the include
+ # lives in . , then it's likely to be owned by the target file.
+ target_dir, target_base = (
+ os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
+ include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
+ if target_base == include_base and (
+ include_dir == target_dir or
+ include_dir == os.path.normpath(target_dir + '/../public')):
+ return _LIKELY_MY_HEADER
+
+ # If the target and include share some initial basename
+ # component, it's possible the target is implementing the
+ # include, so it's allowed to be first, but we'll never
+ # complain if it's not there.
+ target_first_component = _RE_FIRST_COMPONENT.match(target_base)
+ include_first_component = _RE_FIRST_COMPONENT.match(include_base)
+ if (target_first_component and include_first_component and
+ target_first_component.group(0) ==
+ include_first_component.group(0)):
+ return _POSSIBLE_MY_HEADER
+
+ return _OTHER_HEADER
+
+
+
+def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
+ """Check rules that are applicable to #include lines.
+
+ Strings on #include lines are NOT removed from elided line, to make
+ certain tasks easier. However, to prevent false positives, checks
+ applicable to #include lines in CheckLanguage must be put here.
+
+ Args:
+ filename: The name of the current file.
+ clean_lines: A CleansedLines instance containing the file.
+ linenum: The number of the line to check.
+ include_state: An _IncludeState instance in which the headers are inserted.
+ error: The function to call with any errors found.
+ """
+ fileinfo = FileInfo(filename)
+
+ line = clean_lines.lines[linenum]
+
+ # "include" should use the new style "foo/bar.h" instead of just "bar.h"
+ if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
+ error(filename, linenum, 'build/include', 4,
+ 'Include the directory when naming .h files')
+
+ # we shouldn't include a file more than once. actually, there are a
+ # handful of instances where doing so is okay, but in general it's
+ # not.
+ match = _RE_PATTERN_INCLUDE.search(line)
+ if match:
+ include = match.group(2)
+ is_system = (match.group(1) == '<')
+ if include in include_state:
+ error(filename, linenum, 'build/include', 4,
+ '"%s" already included at %s:%s' %
+ (include, filename, include_state[include]))
+ else:
+ include_state[include] = linenum
+
+ # We want to ensure that headers appear in the right order:
+ # 1) for foo.cc, foo.h (preferred location)
+ # 2) c system files
+ # 3) cpp system files
+ # 4) for foo.cc, foo.h (deprecated location)
+ # 5) other google headers
+ #
+ # We classify each include statement as one of those 5 types
+ # using a number of techniques. The include_state object keeps
+ # track of the highest type seen, and complains if we see a
+ # lower type after that.
+ error_message = include_state.CheckNextIncludeOrder(
+ _ClassifyInclude(fileinfo, include, is_system))
+ if error_message:
+ error(filename, linenum, 'build/include_order', 4,
+ '%s. Should be: %s.h, c system, c++ system, other.' %
+ (error_message, fileinfo.BaseName()))
+ if not include_state.IsInAlphabeticalOrder(include):
+ error(filename, linenum, 'build/include_alpha', 4,
+ 'Include "%s" not in alphabetical order' % include)
+
+ # Look for any of the stream classes that are part of standard C++.
+ match = _RE_PATTERN_INCLUDE.match(line)
+ if match:
+ include = match.group(2)
+ if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
+ # Many unit tests use cout, so we exempt them.
+ if not _IsTestFilename(filename):
+ error(filename, linenum, 'readability/streams', 3,
+ 'Streams are highly discouraged.')
+
+
+def _GetTextInside(text, start_pattern):
+ """Retrieves all the text between matching open and close parentheses.
+
+ Given a string of lines and a regular expression string, retrieve all the text
+ following the expression and between opening punctuation symbols like
+ (, [, or {, and the matching close-punctuation symbol. This properly nested
+ occurrences of the punctuations, so for the text like
+ printf(a(), b(c()));
+ a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
+ start_pattern must match string having an open punctuation symbol at the end.
+
+ Args:
+ text: The lines to extract text. Its comments and strings must be elided.
+ It can be single line and can span multiple lines.
+ start_pattern: The regexp string indicating where to start extracting
+ the text.
+ Returns:
+ The extracted text.
+ None if either the opening string or ending punctuation could not be found.
+ """
+ # TODO(sugawarayu): Audit cpplint.py to see what places could be profitably
+ # rewritten to use _GetTextInside (and use inferior regexp matching today).
+
+ # Give opening punctuations to get the matching close-punctuations.
+ matching_punctuation = {'(': ')', '{': '}', '[': ']'}
+ closing_punctuation = set(matching_punctuation.itervalues())
+
+ # Find the position to start extracting text.
+ match = re.search(start_pattern, text, re.M)
+ if not match: # start_pattern not found in text.
+ return None
+ start_position = match.end(0)
+
+ assert start_position > 0, (
+ 'start_pattern must ends with an opening punctuation.')
+ assert text[start_position - 1] in matching_punctuation, (
+ 'start_pattern must ends with an opening punctuation.')
+ # Stack of closing punctuations we expect to have in text after position.
+ punctuation_stack = [matching_punctuation[text[start_position - 1]]]
+ position = start_position
+ while punctuation_stack and position < len(text):
+ if text[position] == punctuation_stack[-1]:
+ punctuation_stack.pop()
+ elif text[position] in closing_punctuation:
+ # A closing punctuation without matching opening punctuations.
+ return None
+ elif text[position] in matching_punctuation:
+ punctuation_stack.append(matching_punctuation[text[position]])
+ position += 1
+ if punctuation_stack:
+ # Opening punctuations left without matching close-punctuations.
+ return None
+ # punctuations match.
+ return text[start_position:position - 1]
+
+
+def CheckLanguage(filename, clean_lines, linenum, file_extension, include_state,
+ error):
+ """Checks rules from the 'C++ language rules' section of cppguide.html.
+
+ Some of these rules are hard to test (function overloading, using
+ uint32 inappropriately), but we do the best we can.
+
+ Args:
+ filename: The name of the current file.
+ clean_lines: A CleansedLines instance containing the file.
+ linenum: The number of the line to check.
+ file_extension: The extension (without the dot) of the filename.
+ include_state: An _IncludeState instance in which the headers are inserted.
+ error: The function to call with any errors found.
+ """
+ # If the line is empty or consists of entirely a comment, no need to
+ # check it.
+ line = clean_lines.elided[linenum]
+ if not line:
+ return
+
+ match = _RE_PATTERN_INCLUDE.search(line)
+ if match:
+ CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
+ return
+
+ # Create an extended_line, which is the concatenation of the current and
+ # next lines, for more effective checking of code that may span more than one
+ # line.
+ if linenum + 1 < clean_lines.NumLines():
+ extended_line = line + clean_lines.elided[linenum + 1]
+ else:
+ extended_line = line
+
+ # Make Windows paths like Unix.
+ fullname = os.path.abspath(filename).replace('\\', '/')
+
+ # TODO(unknown): figure out if they're using default arguments in fn proto.
+
+ # Check for non-const references in functions. This is tricky because &
+ # is also used to take the address of something. We allow <> for templates,
+ # (ignoring whatever is between the braces) and : for classes.
+ # These are complicated re's. They try to capture the following:
+ # paren (for fn-prototype start), typename, &, varname. For the const
+ # version, we're willing for const to be before typename or after
+ # Don't check the implementation on same line.
+ fnline = line.split('{', 1)[0]
+ if (len(re.findall(r'\([^()]*\b(?:[\w:]|<[^()]*>)+(\s?&|&\s?)\w+', fnline)) >
+ len(re.findall(r'\([^()]*\bconst\s+(?:typename\s+)?(?:struct\s+)?'
+ r'(?:[\w:]|<[^()]*>)+(\s?&|&\s?)\w+', fnline)) +
+ len(re.findall(r'\([^()]*\b(?:[\w:]|<[^()]*>)+\s+const(\s?&|&\s?)[\w]+',
+ fnline))):
+
+ # We allow non-const references in a few standard places, like functions
+ # called "swap()" or iostream operators like "<<" or ">>".
+ if not Search(
+ r'(swap|Swap|operator[<>][<>])\s*\(\s*(?:[\w:]|<.*>)+\s*&',
+ fnline):
+ error(filename, linenum, 'runtime/references', 2,
+ 'Is this a non-const reference? '
+ 'If so, make const or use a pointer.')
+
+ # Check to see if they're using an conversion function cast.
+ # I just try to capture the most common basic types, though there are more.
+ # Parameterless conversion functions, such as bool(), are allowed as they are
+ # probably a member operator declaration or default constructor.
+ match = Search(
+ r'(\bnew\s+)?\b' # Grab 'new' operator, if it's there
+ r'(int|float|double|bool|char|int32|uint32|int64|uint64)\([^)]', line)
+ if match:
+ # gMock methods are defined using some variant of MOCK_METHODx(name, type)
+ # where type may be float(), int(string), etc. Without context they are
+ # virtually indistinguishable from int(x) casts. Likewise, gMock's
+ # MockCallback takes a template parameter of the form return_type(arg_type),
+ # which looks much like the cast we're trying to detect.
+ if (match.group(1) is None and # If new operator, then this isn't a cast
+ not (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
+ Match(r'^\s*MockCallback<.*>', line))):
+ error(filename, linenum, 'readability/casting', 4,
+ 'Using deprecated casting style. '
+ 'Use static_cast<%s>(...) instead' %
+ match.group(2))
+
+ CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
+ 'static_cast',
+ r'\((int|float|double|bool|char|u?int(16|32|64))\)', error)
+
+ # This doesn't catch all cases. Consider (const char * const)"hello".
+ #
+ # (char *) "foo" should always be a const_cast (reinterpret_cast won't
+ # compile).
+ if CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
+ 'const_cast', r'\((char\s?\*+\s?)\)\s*"', error):
+ pass
+ else:
+ # Check pointer casts for other than string constants
+ CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
+ 'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
+
+ # In addition, we look for people taking the address of a cast. This
+ # is dangerous -- casts can assign to temporaries, so the pointer doesn't
+ # point where you think.
+ if Search(
+ r'(&\([^)]+\)[\w(])|(&(static|dynamic|reinterpret)_cast\b)', line):
+ error(filename, linenum, 'runtime/casting', 4,
+ ('Are you taking an address of a cast? '
+ 'This is dangerous: could be a temp var. '
+ 'Take the address before doing the cast, rather than after'))
+
+ # Check for people declaring static/global STL strings at the top level.
+ # This is dangerous because the C++ language does not guarantee that
+ # globals with constructors are initialized before the first access.
+ match = Match(
+ r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
+ line)
+ # Make sure it's not a function.
+ # Function template specialization looks like: "string foo<Type>(...".
+ # Class template definitions look like: "string Foo<Type>::Method(...".
+ if match and not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)',
+ match.group(3)):
+ error(filename, linenum, 'runtime/string', 4,
+ 'For a static/global string constant, use a C style string instead: '
+ '"%schar %s[]".' %
+ (match.group(1), match.group(2)))
+
+ # Check that we're not using RTTI outside of testing code.
+ if Search(r'\bdynamic_cast<', line) and not _IsTestFilename(filename):
+ error(filename, linenum, 'runtime/rtti', 5,
+ 'Do not use dynamic_cast<>. If you need to cast within a class '
+ "hierarchy, use static_cast<> to upcast. Google doesn't support "
+ 'RTTI.')
+
+ if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
+ error(filename, linenum, 'runtime/init', 4,
+ 'You seem to be initializing a member variable with itself.')
+
+ if file_extension == 'h':
+ # TODO(unknown): check that 1-arg constructors are explicit.
+ # How to tell it's a constructor?
+ # (handled in CheckForNonStandardConstructs for now)
+ # TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
+ # (level 1 error)
+ pass
+
+ # Check if people are using the verboten C basic types. The only exception
+ # we regularly allow is "unsigned short port" for port.
+ if Search(r'\bshort port\b', line):
+ if not Search(r'\bunsigned short port\b', line):
+ error(filename, linenum, 'runtime/int', 4,
+ 'Use "unsigned short" for ports, not "short"')
+ else:
+ match = Search(r'\b(short|long(?! +double)|long long)\b', line)
+ if match:
+ error(filename, linenum, 'runtime/int', 4,
+ 'Use int16/int64/etc, rather than the C type %s' % match.group(1))
+
+ # When snprintf is used, the second argument shouldn't be a literal.
+ match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
+ if match and match.group(2) != '0':
+ # If 2nd arg is zero, snprintf is used to calculate size.
+ error(filename, linenum, 'runtime/printf', 3,
+ 'If you can, use sizeof(%s) instead of %s as the 2nd arg '
+ 'to snprintf.' % (match.group(1), match.group(2)))
+
+ # Check if some verboten C functions are being used.
+ if Search(r'\bsprintf\b', line):
+ error(filename, linenum, 'runtime/printf', 5,
+ 'Never use sprintf. Use snprintf instead.')
+ match = Search(r'\b(strcpy|strcat)\b', line)
+ if match:
+ error(filename, linenum, 'runtime/printf', 4,
+ 'Almost always, snprintf is better than %s' % match.group(1))
+
+ if Search(r'\bsscanf\b', line):
+ error(filename, linenum, 'runtime/printf', 1,
+ 'sscanf can be ok, but is slow and can overflow buffers.')
+
+ # Check if some verboten operator overloading is going on
+ # TODO(unknown): catch out-of-line unary operator&:
+ # class X {};
+ # int operator&(const X& x) { return 42; } // unary operator&
+ # The trick is it's hard to tell apart from binary operator&:
+ # class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
+ if Search(r'\boperator\s*&\s*\(\s*\)', line):
+ error(filename, linenum, 'runtime/operator', 4,
+ 'Unary operator& is dangerous. Do not use it.')
+
+ # Check for suspicious usage of "if" like
+ # } if (a == b) {
+ if Search(r'\}\s*if\s*\(', line):
+ error(filename, linenum, 'readability/braces', 4,
+ 'Did you mean "else if"? If not, start a new line for "if".')
+
+ # Check for potential format string bugs like printf(foo).
+ # We constrain the pattern not to pick things like DocidForPrintf(foo).
+ # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
+ # TODO(sugawarayu): Catch the following case. Need to change the calling
+ # convention of the whole function to process multiple line to handle it.
+ # printf(
+ # boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
+ printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
+ if printf_args:
+ match = Match(r'([\w.\->()]+)$', printf_args)
+ if match:
+ function_name = re.search(r'\b((?:string)?printf)\s*\(',
+ line, re.I).group(1)
+ error(filename, linenum, 'runtime/printf', 4,
+ 'Potential format string bug. Do %s("%%s", %s) instead.'
+ % (function_name, match.group(1)))
+
+ # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
+ match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
+ if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
+ error(filename, linenum, 'runtime/memset', 4,
+ 'Did you mean "memset(%s, 0, %s)"?'
+ % (match.group(1), match.group(2)))
+
+ if Search(r'\busing namespace\b', line):
+ error(filename, linenum, 'build/namespaces', 5,
+ 'Do not use namespace using-directives. '
+ 'Use using-declarations instead.')
+
+ # Detect variable-length arrays.
+ match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
+ if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
+ match.group(3).find(']') == -1):
+ # Split the size using space and arithmetic operators as delimiters.
+ # If any of the resulting tokens are not compile time constants then
+ # report the error.
+ tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
+ is_const = True
+ skip_next = False
+ for tok in tokens:
+ if skip_next:
+ skip_next = False
+ continue
+
+ if Search(r'sizeof\(.+\)', tok): continue
+ if Search(r'arraysize\(\w+\)', tok): continue
+
+ tok = tok.lstrip('(')
+ tok = tok.rstrip(')')
+ if not tok: continue
+ if Match(r'\d+', tok): continue
+ if Match(r'0[xX][0-9a-fA-F]+', tok): continue
+ if Match(r'k[A-Z0-9]\w*', tok): continue
+ if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
+ if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
+ # A catch all for tricky sizeof cases, including 'sizeof expression',
+ # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
+ # requires skipping the next token because we split on ' ' and '*'.
+ if tok.startswith('sizeof'):
+ skip_next = True
+ continue
+ is_const = False
+ break
+ if not is_const:
+ error(filename, linenum, 'runtime/arrays', 1,
+ 'Do not use variable-length arrays. Use an appropriately named '
+ "('k' followed by CamelCase) compile-time constant for the size.")
+
+ # If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
+ # DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
+ # in the class declaration.
+ match = Match(
+ (r'\s*'
+ r'(DISALLOW_(EVIL_CONSTRUCTORS|COPY_AND_ASSIGN|IMPLICIT_CONSTRUCTORS))'
+ r'\(.*\);$'),
+ line)
+ if match and linenum + 1 < clean_lines.NumLines():
+ next_line = clean_lines.elided[linenum + 1]
+ # We allow some, but not all, declarations of variables to be present
+ # in the statement that defines the class. The [\w\*,\s]* fragment of
+ # the regular expression below allows users to declare instances of
+ # the class or pointers to instances, but not less common types such
+ # as function pointers or arrays. It's a tradeoff between allowing
+ # reasonable code and avoiding trying to parse more C++ using regexps.
+ if not Search(r'^\s*}[\w\*,\s]*;', next_line):
+ error(filename, linenum, 'readability/constructors', 3,
+ match.group(1) + ' should be the last thing in the class')
+
+ # Check for use of unnamed namespaces in header files. Registration
+ # macros are typically OK, so we allow use of "namespace {" on lines
+ # that end with backslashes.
+ if (file_extension == 'h'
+ and Search(r'\bnamespace\s*{', line)
+ and line[-1] != '\\'):
+ error(filename, linenum, 'build/namespaces', 4,
+ 'Do not use unnamed namespaces in header files. See '
+ 'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
+ ' for more information.')
+
+
+def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
+ error):
+ """Checks for a C-style cast by looking for the pattern.
+
+ This also handles sizeof(type) warnings, due to similarity of content.
+
+ Args:
+ filename: The name of the current file.
+ linenum: The number of the line to check.
+ line: The line of code to check.
+ raw_line: The raw line of code to check, with comments.
+ cast_type: The string for the C++ cast to recommend. This is either
+ reinterpret_cast, static_cast, or const_cast, depending.
+ pattern: The regular expression used to find C-style casts.
+ error: The function to call with any errors found.
+
+ Returns:
+ True if an error was emitted.
+ False otherwise.
+ """
+ match = Search(pattern, line)
+ if not match:
+ return False
+
+ # e.g., sizeof(int)
+ sizeof_match = Match(r'.*sizeof\s*$', line[0:match.start(1) - 1])
+ if sizeof_match:
+ error(filename, linenum, 'runtime/sizeof', 1,
+ 'Using sizeof(type). Use sizeof(varname) instead if possible')
+ return True
+
+ remainder = line[match.end(0):]
+
+ # The close paren is for function pointers as arguments to a function.
+ # eg, void foo(void (*bar)(int));
+ # The semicolon check is a more basic function check; also possibly a
+ # function pointer typedef.
+ # eg, void foo(int); or void foo(int) const;
+ # The equals check is for function pointer assignment.
+ # eg, void *(*foo)(int) = ...
+ # The > is for MockCallback<...> ...
+ #
+ # Right now, this will only catch cases where there's a single argument, and
+ # it's unnamed. It should probably be expanded to check for multiple
+ # arguments with some unnamed.
+ function_match = Match(r'\s*(\)|=|(const)?\s*(;|\{|throw\(\)|>))', remainder)
+ if function_match:
+ if (not function_match.group(3) or
+ function_match.group(3) == ';' or
+ ('MockCallback<' not in raw_line and
+ '/*' not in raw_line)):
+ error(filename, linenum, 'readability/function', 3,
+ 'All parameters should be named in a function')
+ return True
+
+ # At this point, all that should be left is actual casts.
+ error(filename, linenum, 'readability/casting', 4,
+ 'Using C-style cast. Use %s<%s>(...) instead' %
+ (cast_type, match.group(1)))
+
+ return True
+
+
+_HEADERS_CONTAINING_TEMPLATES = (
+ ('<deque>', ('deque',)),
+ ('<functional>', ('unary_function', 'binary_function',
+ 'plus', 'minus', 'multiplies', 'divides', 'modulus',
+ 'negate',
+ 'equal_to', 'not_equal_to', 'greater', 'less',
+ 'greater_equal', 'less_equal',
+ 'logical_and', 'logical_or', 'logical_not',
+ 'unary_negate', 'not1', 'binary_negate', 'not2',
+ 'bind1st', 'bind2nd',
+ 'pointer_to_unary_function',
+ 'pointer_to_binary_function',
+ 'ptr_fun',
+ 'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
+ 'mem_fun_ref_t',
+ 'const_mem_fun_t', 'const_mem_fun1_t',
+ 'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
+ 'mem_fun_ref',
+ )),
+ ('<limits>', ('numeric_limits',)),
+ ('<list>', ('list',)),
+ ('<map>', ('map', 'multimap',)),
+ ('<memory>', ('allocator',)),
+ ('<queue>', ('queue', 'priority_queue',)),
+ ('<set>', ('set', 'multiset',)),
+ ('<stack>', ('stack',)),
+ ('<string>', ('char_traits', 'basic_string',)),
+ ('<utility>', ('pair',)),
+ ('<vector>', ('vector',)),
+
+ # gcc extensions.
+ # Note: std::hash is their hash, ::hash is our hash
+ ('<hash_map>', ('hash_map', 'hash_multimap',)),
+ ('<hash_set>', ('hash_set', 'hash_multiset',)),
+ ('<slist>', ('slist',)),
+ )
+
+_RE_PATTERN_STRING = re.compile(r'\bstring\b')
+
+_re_pattern_algorithm_header = []
+for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
+ 'transform'):
+ # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
+ # type::max().
+ _re_pattern_algorithm_header.append(
+ (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
+ _template,
+ '<algorithm>'))
+
+_re_pattern_templates = []
+for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
+ for _template in _templates:
+ _re_pattern_templates.append(
+ (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
+ _template + '<>',
+ _header))
+
+
+def FilesBelongToSameModule(filename_cc, filename_h):
+ """Check if these two filenames belong to the same module.
+
+ The concept of a 'module' here is a as follows:
+ foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
+ same 'module' if they are in the same directory.
+ some/path/public/xyzzy and some/path/internal/xyzzy are also considered
+ to belong to the same module here.
+
+ If the filename_cc contains a longer path than the filename_h, for example,
+ '/absolute/path/to/base/sysinfo.cc', and this file would include
+ 'base/sysinfo.h', this function also produces the prefix needed to open the
+ header. This is used by the caller of this function to more robustly open the
+ header file. We don't have access to the real include paths in this context,
+ so we need this guesswork here.
+
+ Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
+ according to this implementation. Because of this, this function gives
+ some false positives. This should be sufficiently rare in practice.
+
+ Args:
+ filename_cc: is the path for the .cc file
+ filename_h: is the path for the header path
+
+ Returns:
+ Tuple with a bool and a string:
+ bool: True if filename_cc and filename_h belong to the same module.
+ string: the additional prefix needed to open the header file.
+ """
+
+ if not filename_cc.endswith('.cc'):
+ return (False, '')
+ filename_cc = filename_cc[:-len('.cc')]
+ if filename_cc.endswith('_unittest'):
+ filename_cc = filename_cc[:-len('_unittest')]
+ elif filename_cc.endswith('_test'):
+ filename_cc = filename_cc[:-len('_test')]
+ filename_cc = filename_cc.replace('/public/', '/')
+ filename_cc = filename_cc.replace('/internal/', '/')
+
+ if not filename_h.endswith('.h'):
+ return (False, '')
+ filename_h = filename_h[:-len('.h')]
+ if filename_h.endswith('-inl'):
+ filename_h = filename_h[:-len('-inl')]
+ filename_h = filename_h.replace('/public/', '/')
+ filename_h = filename_h.replace('/internal/', '/')
+
+ files_belong_to_same_module = filename_cc.endswith(filename_h)
+ common_path = ''
+ if files_belong_to_same_module:
+ common_path = filename_cc[:-len(filename_h)]
+ return files_belong_to_same_module, common_path
+
+
+def UpdateIncludeState(filename, include_state, io=codecs):
+ """Fill up the include_state with new includes found from the file.
+
+ Args:
+ filename: the name of the header to read.
+ include_state: an _IncludeState instance in which the headers are inserted.
+ io: The io factory to use to read the file. Provided for testability.
+
+ Returns:
+ True if a header was succesfully added. False otherwise.
+ """
+ headerfile = None
+ try:
+ headerfile = io.open(filename, 'r', 'utf8', 'replace')
+ except IOError:
+ return False
+ linenum = 0
+ for line in headerfile:
+ linenum += 1
+ clean_line = CleanseComments(line)
+ match = _RE_PATTERN_INCLUDE.search(clean_line)
+ if match:
+ include = match.group(2)
+ # The value formatting is cute, but not really used right now.
+ # What matters here is that the key is in include_state.
+ include_state.setdefault(include, '%s:%d' % (filename, linenum))
+ return True
+
+
+def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
+ io=codecs):
+ """Reports for missing stl includes.
+
+ This function will output warnings to make sure you are including the headers
+ necessary for the stl containers and functions that you use. We only give one
+ reason to include a header. For example, if you use both equal_to<> and
+ less<> in a .h file, only one (the latter in the file) of these will be
+ reported as a reason to include the <functional>.
+
+ Args:
+ filename: The name of the current file.
+ clean_lines: A CleansedLines instance containing the file.
+ include_state: An _IncludeState instance.
+ error: The function to call with any errors found.
+ io: The IO factory to use to read the header file. Provided for unittest
+ injection.
+ """
+ required = {} # A map of header name to linenumber and the template entity.
+ # Example of required: { '<functional>': (1219, 'less<>') }
+
+ for linenum in xrange(clean_lines.NumLines()):
+ line = clean_lines.elided[linenum]
+ if not line or line[0] == '#':
+ continue
+
+ # String is special -- it is a non-templatized type in STL.
+ matched = _RE_PATTERN_STRING.search(line)
+ if matched:
+ # Don't warn about strings in non-STL namespaces:
+ # (We check only the first match per line; good enough.)
+ prefix = line[:matched.start()]
+ if prefix.endswith('std::') or not prefix.endswith('::'):
+ required['<string>'] = (linenum, 'string')
+
+ for pattern, template, header in _re_pattern_algorithm_header:
+ if pattern.search(line):
+ required[header] = (linenum, template)
+
+ # The following function is just a speed up, no semantics are changed.
+ if not '<' in line: # Reduces the cpu time usage by skipping lines.
+ continue
+
+ for pattern, template, header in _re_pattern_templates:
+ if pattern.search(line):
+ required[header] = (linenum, template)
+
+ # The policy is that if you #include something in foo.h you don't need to
+ # include it again in foo.cc. Here, we will look at possible includes.
+ # Let's copy the include_state so it is only messed up within this function.
+ include_state = include_state.copy()
+
+ # Did we find the header for this file (if any) and succesfully load it?
+ header_found = False
+
+ # Use the absolute path so that matching works properly.
+ abs_filename = FileInfo(filename).FullName()
+
+ # For Emacs's flymake.
+ # If cpplint is invoked from Emacs's flymake, a temporary file is generated
+ # by flymake and that file name might end with '_flymake.cc'. In that case,
+ # restore original file name here so that the corresponding header file can be
+ # found.
+ # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
+ # instead of 'foo_flymake.h'
+ abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
+
+ # include_state is modified during iteration, so we iterate over a copy of
+ # the keys.
+ header_keys = include_state.keys()
+ for header in header_keys:
+ (same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
+ fullpath = common_path + header
+ if same_module and UpdateIncludeState(fullpath, include_state, io):
+ header_found = True
+
+ # If we can't find the header file for a .cc, assume it's because we don't
+ # know where to look. In that case we'll give up as we're not sure they
+ # didn't include it in the .h file.
+ # TODO(unknown): Do a better job of finding .h files so we are confident that
+ # not having the .h file means there isn't one.
+ if filename.endswith('.cc') and not header_found:
+ return
+
+ # All the lines have been processed, report the errors found.
+ for required_header_unstripped in required:
+ template = required[required_header_unstripped][1]
+ if required_header_unstripped.strip('<>"') not in include_state:
+ error(filename, required[required_header_unstripped][0],
+ 'build/include_what_you_use', 4,
+ 'Add #include ' + required_header_unstripped + ' for ' + template)
+
+
+_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
+
+
+def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
+ """Check that make_pair's template arguments are deduced.
+
+ G++ 4.6 in C++0x mode fails badly if make_pair's template arguments are
+ specified explicitly, and such use isn't intended in any case.
+
+ Args:
+ filename: The name of the current file.
+ clean_lines: A CleansedLines instance containing the file.
+ linenum: The number of the line to check.
+ error: The function to call with any errors found.
+ """
+ raw = clean_lines.raw_lines
+ line = raw[linenum]
+ match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line)
+ if match:
+ error(filename, linenum, 'build/explicit_make_pair',
+ 4, # 4 = high confidence
+ 'Omit template arguments from make_pair OR use pair directly OR'
+ ' if appropriate, construct a pair directly')
+
+
+def ProcessLine(filename, file_extension,
+ clean_lines, line, include_state, function_state,
+ class_state, error, extra_check_functions=[]):
+ """Processes a single line in the file.
+
+ Args:
+ filename: Filename of the file that is being processed.
+ file_extension: The extension (dot not included) of the file.
+ clean_lines: An array of strings, each representing a line of the file,
+ with comments stripped.
+ line: Number of line being processed.
+ include_state: An _IncludeState instance in which the headers are inserted.
+ function_state: A _FunctionState instance which counts function lines, etc.
+ class_state: A _ClassState instance which maintains information about
+ the current stack of nested class declarations being parsed.
+ error: A callable to which errors are reported, which takes 4 arguments:
+ filename, line number, error level, and message
+ extra_check_functions: An array of additional check functions that will be
+ run on each source line. Each function takes 4
+ arguments: filename, clean_lines, line, error
+ """
+ raw_lines = clean_lines.raw_lines
+ ParseNolintSuppressions(filename, raw_lines[line], line, error)
+ CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
+ CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
+ CheckStyle(filename, clean_lines, line, file_extension, class_state, error)
+ CheckLanguage(filename, clean_lines, line, file_extension, include_state,
+ error)
+ CheckForNonStandardConstructs(filename, clean_lines, line,
+ class_state, error)
+ CheckPosixThreading(filename, clean_lines, line, error)
+ CheckInvalidIncrement(filename, clean_lines, line, error)
+ CheckMakePairUsesDeduction(filename, clean_lines, line, error)
+ for check_fn in extra_check_functions:
+ check_fn(filename, clean_lines, line, error)
+
+def ProcessFileData(filename, file_extension, lines, error,
+ extra_check_functions=[]):
+ """Performs lint checks and reports any errors to the given error function.
+
+ Args:
+ filename: Filename of the file that is being processed.
+ file_extension: The extension (dot not included) of the file.
+ lines: An array of strings, each representing a line of the file, with the
+ last element being empty if the file is terminated with a newline.
+ error: A callable to which errors are reported, which takes 4 arguments:
+ filename, line number, error level, and message
+ extra_check_functions: An array of additional check functions that will be
+ run on each source line. Each function takes 4
+ arguments: filename, clean_lines, line, error
+ """
+ lines = (['// marker so line numbers and indices both start at 1'] + lines +
+ ['// marker so line numbers end in a known way'])
+
+ include_state = _IncludeState()
+ function_state = _FunctionState()
+ class_state = _ClassState()
+
+ ResetNolintSuppressions()
+
+ CheckForCopyright(filename, lines, error)
+
+ if file_extension == 'h':
+ CheckForHeaderGuard(filename, lines, error)
+
+ RemoveMultiLineComments(filename, lines, error)
+ clean_lines = CleansedLines(lines)
+ for line in xrange(clean_lines.NumLines()):
+ ProcessLine(filename, file_extension, clean_lines, line,
+ include_state, function_state, class_state, error,
+ extra_check_functions)
+ class_state.CheckFinished(filename, error)
+
+ CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
+
+ # We check here rather than inside ProcessLine so that we see raw
+ # lines rather than "cleaned" lines.
+ CheckForUnicodeReplacementCharacters(filename, lines, error)
+
+ CheckForNewlineAtEOF(filename, lines, error)
+
+def ProcessFile(filename, vlevel, extra_check_functions=[]):
+ """Does google-lint on a single file.
+
+ Args:
+ filename: The name of the file to parse.
+
+ vlevel: The level of errors to report. Every error of confidence
+ >= verbose_level will be reported. 0 is a good default.
+
+ extra_check_functions: An array of additional check functions that will be
+ run on each source line. Each function takes 4
+ arguments: filename, clean_lines, line, error
+ """
+
+ _SetVerboseLevel(vlevel)
+
+ try:
+ # Support the UNIX convention of using "-" for stdin. Note that
+ # we are not opening the file with universal newline support
+ # (which codecs doesn't support anyway), so the resulting lines do
+ # contain trailing '\r' characters if we are reading a file that
+ # has CRLF endings.
+ # If after the split a trailing '\r' is present, it is removed
+ # below. If it is not expected to be present (i.e. os.linesep !=
+ # '\r\n' as in Windows), a warning is issued below if this file
+ # is processed.
+
+ if filename == '-':
+ lines = codecs.StreamReaderWriter(sys.stdin,
+ codecs.getreader('utf8'),
+ codecs.getwriter('utf8'),
+ 'replace').read().split('\n')
+ else:
+ lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
+
+ carriage_return_found = False
+ # Remove trailing '\r'.
+ for linenum in range(len(lines)):
+ if lines[linenum].endswith('\r'):
+ lines[linenum] = lines[linenum].rstrip('\r')
+ carriage_return_found = True
+
+ except IOError:
+ sys.stderr.write(
+ "Skipping input '%s': Can't open for reading\n" % filename)
+ return
+
+ # Note, if no dot is found, this will give the entire filename as the ext.
+ file_extension = filename[filename.rfind('.') + 1:]
+
+ # When reading from stdin, the extension is unknown, so no cpplint tests
+ # should rely on the extension.
+ if (filename != '-' and file_extension != 'cc' and file_extension != 'h'
+ and file_extension != 'cpp'):
+ sys.stderr.write('Ignoring %s; not a .cc or .h file\n' % filename)
+ else:
+ ProcessFileData(filename, file_extension, lines, Error,
+ extra_check_functions)
+ if carriage_return_found and os.linesep != '\r\n':
+ # Use 0 for linenum since outputting only one error for potentially
+ # several lines.
+ Error(filename, 0, 'whitespace/newline', 1,
+ 'One or more unexpected \\r (^M) found;'
+ 'better to use only a \\n')
+
+ sys.stderr.write('Done processing %s\n' % filename)
+
+
+def PrintUsage(message):
+ """Prints a brief usage string and exits, optionally with an error message.
+
+ Args:
+ message: The optional error message.
+ """
+ sys.stderr.write(_USAGE)
+ if message:
+ sys.exit('\nFATAL ERROR: ' + message)
+ else:
+ sys.exit(1)
+
+
+def PrintCategories():
+ """Prints a list of all the error-categories used by error messages.
+
+ These are the categories used to filter messages via --filter.
+ """
+ sys.stderr.write(''.join(' %s\n' % cat for cat in _ERROR_CATEGORIES))
+ sys.exit(0)
+
+
+def ParseArguments(args):
+ """Parses the command line arguments.
+
+ This may set the output format and verbosity level as side-effects.
+
+ Args:
+ args: The command line arguments:
+
+ Returns:
+ The list of filenames to lint.
+ """
+ try:
+ (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
+ 'counting=',
+ 'filter='])
+ except getopt.GetoptError:
+ PrintUsage('Invalid arguments.')
+
+ verbosity = _VerboseLevel()
+ output_format = _OutputFormat()
+ filters = ''
+ counting_style = ''
+
+ for (opt, val) in opts:
+ if opt == '--help':
+ PrintUsage(None)
+ elif opt == '--output':
+ if not val in ('emacs', 'vs7'):
+ PrintUsage('The only allowed output formats are emacs and vs7.')
+ output_format = val
+ elif opt == '--verbose':
+ verbosity = int(val)
+ elif opt == '--filter':
+ filters = val
+ if not filters:
+ PrintCategories()
+ elif opt == '--counting':
+ if val not in ('total', 'toplevel', 'detailed'):
+ PrintUsage('Valid counting options are total, toplevel, and detailed')
+ counting_style = val
+
+ if not filenames:
+ PrintUsage('No files were specified.')
+
+ _SetOutputFormat(output_format)
+ _SetVerboseLevel(verbosity)
+ _SetFilters(filters)
+ _SetCountingStyle(counting_style)
+
+ return filenames
+
+
+def main():
+ filenames = ParseArguments(sys.argv[1:])
+
+ # Change stderr to write with replacement characters so we don't die
+ # if we try to print something containing non-ASCII characters.
+ sys.stderr = codecs.StreamReaderWriter(sys.stderr,
+ codecs.getreader('utf8'),
+ codecs.getwriter('utf8'),
+ 'replace')
+
+ _cpplint_state.ResetErrorCounts()
+ for filename in filenames:
+ ProcessFile(filename, _cpplint_state.verbose_level)
+ _cpplint_state.PrintErrorCounts()
+
+ sys.exit(_cpplint_state.error_count > 0)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/diff.py b/tools/diff.py
new file mode 100644
index 0000000..a42a4dc
--- /dev/null
+++ b/tools/diff.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env python
+## Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+"""Classes for representing diff pieces."""
+
+__author__ = "jkoleszar@google.com"
+
+import re
+
+
+class DiffLines(object):
+ """A container for one half of a diff."""
+
+ def __init__(self, filename, offset, length):
+ self.filename = filename
+ self.offset = offset
+ self.length = length
+ self.lines = []
+ self.delta_line_nums = []
+
+ def Append(self, line):
+ l = len(self.lines)
+ if line[0] != " ":
+ self.delta_line_nums.append(self.offset + l)
+ self.lines.append(line[1:])
+ assert l+1 <= self.length
+
+ def Complete(self):
+ return len(self.lines) == self.length
+
+ def __contains__(self, item):
+ return item >= self.offset and item <= self.offset + self.length - 1
+
+
+class DiffHunk(object):
+ """A container for one diff hunk, consisting of two DiffLines."""
+
+ def __init__(self, header, file_a, file_b, start_a, len_a, start_b, len_b):
+ self.header = header
+ self.left = DiffLines(file_a, start_a, len_a)
+ self.right = DiffLines(file_b, start_b, len_b)
+ self.lines = []
+
+ def Append(self, line):
+ """Adds a line to the DiffHunk and its DiffLines children."""
+ if line[0] == "-":
+ self.left.Append(line)
+ elif line[0] == "+":
+ self.right.Append(line)
+ elif line[0] == " ":
+ self.left.Append(line)
+ self.right.Append(line)
+ else:
+ assert False, ("Unrecognized character at start of diff line "
+ "%r" % line[0])
+ self.lines.append(line)
+
+ def Complete(self):
+ return self.left.Complete() and self.right.Complete()
+
+ def __repr__(self):
+ return "DiffHunk(%s, %s, len %d)" % (
+ self.left.filename, self.right.filename,
+ max(self.left.length, self.right.length))
+
+
+def ParseDiffHunks(stream):
+ """Walk a file-like object, yielding DiffHunks as they're parsed."""
+
+ file_regex = re.compile(r"(\+\+\+|---) (\S+)")
+ range_regex = re.compile(r"@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?")
+ hunk = None
+ while True:
+ line = stream.readline()
+ if not line:
+ break
+
+ if hunk is None:
+ # Parse file names
+ diff_file = file_regex.match(line)
+ if diff_file:
+ if line.startswith("---"):
+ a_line = line
+ a = diff_file.group(2)
+ continue
+ if line.startswith("+++"):
+ b_line = line
+ b = diff_file.group(2)
+ continue
+
+ # Parse offset/lengths
+ diffrange = range_regex.match(line)
+ if diffrange:
+ if diffrange.group(2):
+ start_a = int(diffrange.group(1))
+ len_a = int(diffrange.group(3))
+ else:
+ start_a = 1
+ len_a = int(diffrange.group(1))
+
+ if diffrange.group(5):
+ start_b = int(diffrange.group(4))
+ len_b = int(diffrange.group(6))
+ else:
+ start_b = 1
+ len_b = int(diffrange.group(4))
+
+ header = [a_line, b_line, line]
+ hunk = DiffHunk(header, a, b, start_a, len_a, start_b, len_b)
+ else:
+ # Add the current line to the hunk
+ hunk.Append(line)
+
+ # See if the whole hunk has been parsed. If so, yield it and prepare
+ # for the next hunk.
+ if hunk.Complete():
+ yield hunk
+ hunk = None
+
+ # Partial hunks are a parse error
+ assert hunk is None
diff --git a/tools/ftfy.sh b/tools/ftfy.sh
index c5cfdea..92059f5 100755
--- a/tools/ftfy.sh
+++ b/tools/ftfy.sh
@@ -29,12 +29,13 @@
vpx_style() {
- astyle --style=bsd --min-conditional-indent=0 --break-blocks \
- --pad-oper --pad-header --unpad-paren \
- --align-pointer=name \
- --indent-preprocessor --convert-tabs --indent-labels \
- --suffix=none --quiet "$@"
- sed -i "" 's/[[:space:]]\{1,\},/,/g' "$@"
+ for f; do
+ case "$f" in
+ *.h|*.c|*.cc)
+ "${dirname_self}"/vpx-astyle.sh "$f"
+ ;;
+ esac
+ done
}
@@ -119,8 +120,7 @@
git show > "${ORIG_DIFF}"
# Apply the style guide on new and modified files and collect its diff
-for f in $(git diff HEAD^ --name-only -M90 --diff-filter=AM \
- | grep '\.[ch]$'); do
+for f in $(git diff HEAD^ --name-only -M90 --diff-filter=AM); do
case "$f" in
third_party/*) continue;;
nestegg/*) continue;;
diff --git a/tools/intersect-diffs.py b/tools/intersect-diffs.py
index be9dea5..4dbafa9 100755
--- a/tools/intersect-diffs.py
+++ b/tools/intersect-diffs.py
@@ -16,121 +16,9 @@
__author__ = "jkoleszar@google.com"
-import re
import sys
-
-class DiffLines(object):
- """A container for one half of a diff."""
-
- def __init__(self, filename, offset, length):
- self.filename = filename
- self.offset = offset
- self.length = length
- self.lines = []
- self.delta_line_nums = []
-
- def Append(self, line):
- l = len(self.lines)
- if line[0] != " ":
- self.delta_line_nums.append(self.offset + l)
- self.lines.append(line[1:])
- assert l+1 <= self.length
-
- def Complete(self):
- return len(self.lines) == self.length
-
- def __contains__(self, item):
- return item >= self.offset and item <= self.offset + self.length - 1
-
-
-class DiffHunk(object):
- """A container for one diff hunk, consisting of two DiffLines."""
-
- def __init__(self, header, file_a, file_b, start_a, len_a, start_b, len_b):
- self.header = header
- self.left = DiffLines(file_a, start_a, len_a)
- self.right = DiffLines(file_b, start_b, len_b)
- self.lines = []
-
- def Append(self, line):
- """Adds a line to the DiffHunk and its DiffLines children."""
- if line[0] == "-":
- self.left.Append(line)
- elif line[0] == "+":
- self.right.Append(line)
- elif line[0] == " ":
- self.left.Append(line)
- self.right.Append(line)
- else:
- assert False, ("Unrecognized character at start of diff line "
- "%r" % line[0])
- self.lines.append(line)
-
- def Complete(self):
- return self.left.Complete() and self.right.Complete()
-
- def __repr__(self):
- return "DiffHunk(%s, %s, len %d)" % (
- self.left.filename, self.right.filename,
- max(self.left.length, self.right.length))
-
-
-def ParseDiffHunks(stream):
- """Walk a file-like object, yielding DiffHunks as they're parsed."""
-
- file_regex = re.compile(r"(\+\+\+|---) (\S+)")
- range_regex = re.compile(r"@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?")
- hunk = None
- while True:
- line = stream.readline()
- if not line:
- break
-
- if hunk is None:
- # Parse file names
- diff_file = file_regex.match(line)
- if diff_file:
- if line.startswith("---"):
- a_line = line
- a = diff_file.group(2)
- continue
- if line.startswith("+++"):
- b_line = line
- b = diff_file.group(2)
- continue
-
- # Parse offset/lengths
- diffrange = range_regex.match(line)
- if diffrange:
- if diffrange.group(2):
- start_a = int(diffrange.group(1))
- len_a = int(diffrange.group(3))
- else:
- start_a = 1
- len_a = int(diffrange.group(1))
-
- if diffrange.group(5):
- start_b = int(diffrange.group(4))
- len_b = int(diffrange.group(6))
- else:
- start_b = 1
- len_b = int(diffrange.group(4))
-
- header = [a_line, b_line, line]
- hunk = DiffHunk(header, a, b, start_a, len_a, start_b, len_b)
- else:
- # Add the current line to the hunk
- hunk.Append(line)
-
- # See if the whole hunk has been parsed. If so, yield it and prepare
- # for the next hunk.
- if hunk.Complete():
- yield hunk
- hunk = None
-
- # Partial hunks are a parse error
- assert hunk is None
+import diff
def FormatDiffHunks(hunks):
@@ -162,8 +50,8 @@
def main():
- old_hunks = [x for x in ParseDiffHunks(open(sys.argv[1], "r"))]
- new_hunks = [x for x in ParseDiffHunks(open(sys.argv[2], "r"))]
+ old_hunks = [x for x in diff.ParseDiffHunks(open(sys.argv[1], "r"))]
+ new_hunks = [x for x in diff.ParseDiffHunks(open(sys.argv[2], "r"))]
out_hunks = []
# Join the right hand side of the older diff with the left hand side of the
diff --git a/tools/lint-hunks.py b/tools/lint-hunks.py
new file mode 100755
index 0000000..b15a691
--- /dev/null
+++ b/tools/lint-hunks.py
@@ -0,0 +1,144 @@
+#!/usr/bin/python
+## Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+"""Performs style checking on each diff hunk."""
+import getopt
+import os
+import StringIO
+import subprocess
+import sys
+
+import diff
+
+
+SHORT_OPTIONS = "h"
+LONG_OPTIONS = ["help"]
+
+TOPLEVEL_CMD = ["git", "rev-parse", "--show-toplevel"]
+DIFF_CMD = ["git", "diff"]
+DIFF_INDEX_CMD = ["git", "diff-index", "-u", "HEAD", "--"]
+SHOW_CMD = ["git", "show"]
+CPPLINT_FILTERS = ["-readability/casting", "-runtime/int"]
+
+
+class Usage(Exception):
+ pass
+
+
+class SubprocessException(Exception):
+ def __init__(self, args):
+ msg = "Failed to execute '%s'"%(" ".join(args))
+ super(SubprocessException, self).__init__(msg)
+
+
+class Subprocess(subprocess.Popen):
+ """Adds the notion of an expected returncode to Popen."""
+
+ def __init__(self, args, expected_returncode=0, **kwargs):
+ self._args = args
+ self._expected_returncode = expected_returncode
+ super(Subprocess, self).__init__(args, **kwargs)
+
+ def communicate(self, *args, **kwargs):
+ result = super(Subprocess, self).communicate(*args, **kwargs)
+ if self._expected_returncode is not None:
+ try:
+ ok = self.returncode in self._expected_returncode
+ except TypeError:
+ ok = self.returncode == self._expected_returncode
+ if not ok:
+ raise SubprocessException(self._args)
+ return result
+
+
+def main(argv=None):
+ if argv is None:
+ argv = sys.argv
+ try:
+ try:
+ opts, args = getopt.getopt(argv[1:], SHORT_OPTIONS, LONG_OPTIONS)
+ except getopt.error, msg:
+ raise Usage(msg)
+
+ # process options
+ for o, _ in opts:
+ if o in ("-h", "--help"):
+ print __doc__
+ sys.exit(0)
+
+ if args and len(args) > 1:
+ print __doc__
+ sys.exit(0)
+
+ # Find the fully qualified path to the root of the tree
+ tl = Subprocess(TOPLEVEL_CMD, stdout=subprocess.PIPE)
+ tl = tl.communicate()[0].strip()
+
+ # See if we're working on the index or not.
+ if args:
+ diff_cmd = DIFF_CMD + [args[0] + "^!"]
+ else:
+ diff_cmd = DIFF_INDEX_CMD
+
+ # Build the command line to execute cpplint
+ cpplint_cmd = [os.path.join(tl, "tools", "cpplint.py"),
+ "--filter=" + ",".join(CPPLINT_FILTERS),
+ "-"]
+
+ # Get a list of all affected lines
+ file_affected_line_map = {}
+ p = Subprocess(diff_cmd, stdout=subprocess.PIPE)
+ stdout = p.communicate()[0]
+ for hunk in diff.ParseDiffHunks(StringIO.StringIO(stdout)):
+ filename = hunk.right.filename[2:]
+ if filename not in file_affected_line_map:
+ file_affected_line_map[filename] = set()
+ file_affected_line_map[filename].update(hunk.right.delta_line_nums)
+
+ # Run each affected file through cpplint
+ lint_failed = False
+ for filename, affected_lines in file_affected_line_map.iteritems():
+ if filename.split(".")[-1] not in ("c", "h", "cc"):
+ continue
+
+ if args:
+ # File contents come from git
+ show_cmd = SHOW_CMD + [args[0] + ":" + filename]
+ show = Subprocess(show_cmd, stdout=subprocess.PIPE)
+ lint = Subprocess(cpplint_cmd, expected_returncode=(0, 1),
+ stdin=show.stdout, stderr=subprocess.PIPE)
+ lint_out = lint.communicate()[1]
+ else:
+ # File contents come from the working tree
+ lint = Subprocess(cpplint_cmd, expected_returncode=(0, 1),
+ stdin=subprocess.PIPE, stderr=subprocess.PIPE)
+ stdin = open(os.path.join(tl, filename)).read()
+ lint_out = lint.communicate(stdin)[1]
+
+ for line in lint_out.split("\n"):
+ fields = line.split(":")
+ if fields[0] != "-":
+ continue
+ warning_line_num = int(fields[1])
+ if warning_line_num in affected_lines:
+ print "%s:%d:%s"%(filename, warning_line_num,
+ ":".join(fields[2:]))
+ lint_failed = True
+
+ # Set exit code if any relevant lint errors seen
+ if lint_failed:
+ return 1
+
+ except Usage, err:
+ print >>sys.stderr, err
+ print >>sys.stderr, "for help use --help"
+ return 2
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/tools/vpx-astyle.sh b/tools/vpx-astyle.sh
new file mode 100755
index 0000000..6340426
--- /dev/null
+++ b/tools/vpx-astyle.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+set -e
+astyle --style=java --indent=spaces=2 --indent-switches\
+ --min-conditional-indent=0 \
+ --pad-oper --pad-header --unpad-paren \
+ --align-pointer=name \
+ --indent-preprocessor --convert-tabs --indent-labels \
+ --suffix=none --quiet --max-instatement-indent=80 "$@"
+# Disabled, too greedy?
+#sed -i 's;[[:space:]]\{1,\}\[;[;g' "$@"
+
+sed_i() {
+ # Incompatible sed parameter parsing.
+ if sed -i 2>&1 | grep -q 'requires an argument'; then
+ sed -i '' "$@"
+ else
+ sed -i "$@"
+ fi
+}
+
+sed_i -e 's/[[:space:]]\{1,\}\([,;]\)/\1/g' \
+ -e 's/[[:space:]]\{1,\}\([+-]\{2\};\)/\1/g' \
+ -e 's/,[[:space:]]*}/}/g' \
+ -e 's;//\([^/[:space:]].*$\);// \1;g' \
+ -e 's/^\(public\|private\|protected\):$/ \1:/g' \
+ -e 's/[[:space:]]\{1,\}$//g' \
+ "$@"
diff --git a/tools_common.c b/tools_common.c
index 6f95028..92de794 100644
--- a/tools_common.c
+++ b/tools_common.c
@@ -20,11 +20,10 @@
#endif
#endif
-FILE* set_binary_mode(FILE *stream)
-{
- (void)stream;
+FILE *set_binary_mode(FILE *stream) {
+ (void)stream;
#if defined(_WIN32) || defined(__OS2__)
- _setmode(_fileno(stream), _O_BINARY);
+ _setmode(_fileno(stream), _O_BINARY);
#endif
- return stream;
+ return stream;
}
diff --git a/tools_common.h b/tools_common.h
index 80c9747..9e56149 100644
--- a/tools_common.h
+++ b/tools_common.h
@@ -11,6 +11,6 @@
#define TOOLS_COMMON_H
/* Sets a stdio stream into binary mode */
-FILE* set_binary_mode(FILE *stream);
+FILE *set_binary_mode(FILE *stream);
#endif
diff --git a/vp8/common/arm/armv6/idct_blk_v6.c b/vp8/common/arm/armv6/idct_blk_v6.c
index 6002c0f..c94f84a 100644
--- a/vp8/common/arm/armv6/idct_blk_v6.c
+++ b/vp8/common/arm/armv6/idct_blk_v6.c
@@ -9,7 +9,7 @@
*/
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
void vp8_dequant_idct_add_y_block_v6(short *q, short *dq,
diff --git a/vp8/common/arm/bilinearfilter_arm.c b/vp8/common/arm/bilinearfilter_arm.c
index c63073c..799c8bd 100644
--- a/vp8/common/arm/bilinearfilter_arm.c
+++ b/vp8/common/arm/bilinearfilter_arm.c
@@ -9,7 +9,7 @@
*/
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include <math.h>
#include "vp8/common/filter.h"
#include "bilinearfilter_arm.h"
diff --git a/vp8/common/arm/filter_arm.c b/vp8/common/arm/filter_arm.c
index 148951a..7fe3967 100644
--- a/vp8/common/arm/filter_arm.c
+++ b/vp8/common/arm/filter_arm.c
@@ -10,7 +10,7 @@
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include <math.h>
#include "vp8/common/filter.h"
#include "vpx_ports/mem.h"
diff --git a/vp8/common/arm/loopfilter_arm.c b/vp8/common/arm/loopfilter_arm.c
index b8f9bd9..3bdc967 100644
--- a/vp8/common/arm/loopfilter_arm.c
+++ b/vp8/common/arm/loopfilter_arm.c
@@ -10,7 +10,7 @@
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vp8/common/loopfilter.h"
#include "vp8/common/onyxc_int.h"
diff --git a/vp8/common/arm/neon/idct_blk_neon.c b/vp8/common/arm/neon/idct_blk_neon.c
index ee7f223..fb327a7 100644
--- a/vp8/common/arm/neon/idct_blk_neon.c
+++ b/vp8/common/arm/neon/idct_blk_neon.c
@@ -9,7 +9,7 @@
*/
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
/* place these declarations here because we don't want to maintain them
* outside of this scope
diff --git a/vp8/common/arm/reconintra_arm.c b/vp8/common/arm/reconintra_arm.c
index 121e090..2874896 100644
--- a/vp8/common/arm/reconintra_arm.c
+++ b/vp8/common/arm/reconintra_arm.c
@@ -10,7 +10,7 @@
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vp8/common/blockd.h"
#include "vpx_mem/vpx_mem.h"
diff --git a/vp8/common/arm/variance_arm.c b/vp8/common/arm/variance_arm.c
index 891d767..467a509 100644
--- a/vp8/common/arm/variance_arm.c
+++ b/vp8/common/arm/variance_arm.c
@@ -9,7 +9,7 @@
*/
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vp8/common/variance.h"
#include "vp8/common/filter.h"
diff --git a/vp8/common/asm_com_offsets.c b/vp8/common/asm_com_offsets.c
index ae22b5f..7bab90f 100644
--- a/vp8/common/asm_com_offsets.c
+++ b/vp8/common/asm_com_offsets.c
@@ -12,7 +12,6 @@
#include "vpx_config.h"
#include "vpx/vpx_codec.h"
#include "vpx_ports/asm_offsets.h"
-#include "vpx_scale/yv12config.h"
#include "vp8/common/blockd.h"
#if CONFIG_POSTPROC
@@ -21,19 +20,6 @@
BEGIN
-/* vpx_scale */
-DEFINE(yv12_buffer_config_y_width, offsetof(YV12_BUFFER_CONFIG, y_width));
-DEFINE(yv12_buffer_config_y_height, offsetof(YV12_BUFFER_CONFIG, y_height));
-DEFINE(yv12_buffer_config_y_stride, offsetof(YV12_BUFFER_CONFIG, y_stride));
-DEFINE(yv12_buffer_config_uv_width, offsetof(YV12_BUFFER_CONFIG, uv_width));
-DEFINE(yv12_buffer_config_uv_height, offsetof(YV12_BUFFER_CONFIG, uv_height));
-DEFINE(yv12_buffer_config_uv_stride, offsetof(YV12_BUFFER_CONFIG, uv_stride));
-DEFINE(yv12_buffer_config_y_buffer, offsetof(YV12_BUFFER_CONFIG, y_buffer));
-DEFINE(yv12_buffer_config_u_buffer, offsetof(YV12_BUFFER_CONFIG, u_buffer));
-DEFINE(yv12_buffer_config_v_buffer, offsetof(YV12_BUFFER_CONFIG, v_buffer));
-DEFINE(yv12_buffer_config_border, offsetof(YV12_BUFFER_CONFIG, border));
-DEFINE(VP8BORDERINPIXELS_VAL, VP8BORDERINPIXELS);
-
#if CONFIG_POSTPROC
/* mfqe.c / filter_by_weight */
DEFINE(MFQE_PRECISION_VAL, MFQE_PRECISION);
@@ -58,11 +44,6 @@
ct_assert(B_HU_PRED, B_HU_PRED == 9);
#endif
-#if HAVE_NEON
-/* vp8_yv12_extend_frame_borders_neon makes several assumptions based on this */
-ct_assert(VP8BORDERINPIXELS_VAL, VP8BORDERINPIXELS == 32)
-#endif
-
#if HAVE_SSE2
#if CONFIG_POSTPROC
/* vp8_filter_by_weight16x16 and 8x8 */
diff --git a/vp8/common/dequantize.c b/vp8/common/dequantize.c
index 8eda486..6e2f69a 100644
--- a/vp8/common/dequantize.c
+++ b/vp8/common/dequantize.c
@@ -10,7 +10,7 @@
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vp8/common/blockd.h"
#include "vpx_mem/vpx_mem.h"
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index 5a6ac7b..2de019d 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -10,7 +10,7 @@
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#if ARCH_ARM
#include "vpx_ports/arm.h"
#elif ARCH_X86 || ARCH_X86_64
diff --git a/vp8/common/idct_blk.c b/vp8/common/idct_blk.c
index 0b058c7..8edfffb 100644
--- a/vp8/common/idct_blk.c
+++ b/vp8/common/idct_blk.c
@@ -9,7 +9,7 @@
*/
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
void vp8_dequant_idct_add_c(short *input, short *dq,
unsigned char *dest, int stride);
diff --git a/vp8/common/invtrans.h b/vp8/common/invtrans.h
index d048665..9262640 100644
--- a/vp8/common/invtrans.h
+++ b/vp8/common/invtrans.h
@@ -13,7 +13,7 @@
#define __INC_INVTRANS_H
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "blockd.h"
#include "onyxc_int.h"
diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c
index 41b4f12..2b1ee85 100644
--- a/vp8/common/loopfilter.c
+++ b/vp8/common/loopfilter.c
@@ -10,7 +10,7 @@
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "loopfilter.h"
#include "onyxc_int.h"
#include "vpx_mem/vpx_mem.h"
diff --git a/vp8/common/loopfilter.h b/vp8/common/loopfilter.h
index b3af2d6..1e47f34 100644
--- a/vp8/common/loopfilter.h
+++ b/vp8/common/loopfilter.h
@@ -14,7 +14,7 @@
#include "vpx_ports/mem.h"
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#define MAX_LOOP_FILTER 63
/* fraction of total macroblock rows to be used in fast filter level picking */
diff --git a/vp8/common/mfqe.c b/vp8/common/mfqe.c
index 3dff150..8a8f92f 100644
--- a/vp8/common/mfqe.c
+++ b/vp8/common/mfqe.c
@@ -20,7 +20,7 @@
#include "postproc.h"
#include "variance.h"
#include "vpx_mem/vpx_mem.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vpx_scale/yv12config.h"
#include <limits.h>
diff --git a/vp8/common/mips/dspr2/dequantize_dspr2.c b/vp8/common/mips/dspr2/dequantize_dspr2.c
index 6823325..619ee80 100644
--- a/vp8/common/mips/dspr2/dequantize_dspr2.c
+++ b/vp8/common/mips/dspr2/dequantize_dspr2.c
@@ -10,7 +10,7 @@
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vpx_mem/vpx_mem.h"
#if HAVE_DSPR2
diff --git a/vp8/common/mips/dspr2/filter_dspr2.c b/vp8/common/mips/dspr2/filter_dspr2.c
index 71fdcd7..ace5d40 100644
--- a/vp8/common/mips/dspr2/filter_dspr2.c
+++ b/vp8/common/mips/dspr2/filter_dspr2.c
@@ -10,7 +10,7 @@
#include <stdlib.h>
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vpx_ports/mem.h"
#if HAVE_DSPR2
diff --git a/vp8/common/mips/dspr2/idct_blk_dspr2.c b/vp8/common/mips/dspr2/idct_blk_dspr2.c
index 1e0ebd1..ab938cd 100644
--- a/vp8/common/mips/dspr2/idct_blk_dspr2.c
+++ b/vp8/common/mips/dspr2/idct_blk_dspr2.c
@@ -9,7 +9,7 @@
*/
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#if HAVE_DSPR2
diff --git a/vp8/common/mips/dspr2/idctllm_dspr2.c b/vp8/common/mips/dspr2/idctllm_dspr2.c
index 25b7936..2eff710 100644
--- a/vp8/common/mips/dspr2/idctllm_dspr2.c
+++ b/vp8/common/mips/dspr2/idctllm_dspr2.c
@@ -8,7 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#if HAVE_DSPR2
#define CROP_WIDTH 256
diff --git a/vp8/common/mips/dspr2/loopfilter_filters_dspr2.c b/vp8/common/mips/dspr2/loopfilter_filters_dspr2.c
index b8e5e4d..9ae6bc8 100644
--- a/vp8/common/mips/dspr2/loopfilter_filters_dspr2.c
+++ b/vp8/common/mips/dspr2/loopfilter_filters_dspr2.c
@@ -10,7 +10,7 @@
#include <stdlib.h>
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vp8/common/onyxc_int.h"
#if HAVE_DSPR2
diff --git a/vp8/common/mips/dspr2/reconinter_dspr2.c b/vp8/common/mips/dspr2/reconinter_dspr2.c
index a5239a3..a14b397 100644
--- a/vp8/common/mips/dspr2/reconinter_dspr2.c
+++ b/vp8/common/mips/dspr2/reconinter_dspr2.c
@@ -10,7 +10,7 @@
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vpx/vpx_integer.h"
#if HAVE_DSPR2
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index 5325bac..03c9718 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -13,7 +13,7 @@
#define __INC_VP8C_INT_H
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vpx/internal/vpx_codec_internal.h"
#include "loopfilter.h"
#include "entropymv.h"
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index 80fa530..c6442c9 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -10,7 +10,8 @@
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
+#include "vpx_scale_rtcd.h"
#include "vpx_scale/yv12config.h"
#include "postproc.h"
#include "common.h"
diff --git a/vp8/common/ppc/systemdependent.c b/vp8/common/ppc/systemdependent.c
index 87f4cac..6899c0e 100644
--- a/vp8/common/ppc/systemdependent.c
+++ b/vp8/common/ppc/systemdependent.c
@@ -12,13 +12,8 @@
#include "subpixel.h"
#include "loopfilter.h"
#include "recon.h"
-#include "idct.h"
#include "onyxc_int.h"
-void (*vp8_short_idct4x4)(short *input, short *output, int pitch);
-void (*vp8_short_idct4x4_1)(short *input, short *output, int pitch);
-void (*vp8_dc_only_idct)(short input_dc, short *output, int pitch);
-
extern void (*vp8_post_proc_down_and_across_mb_row)(
unsigned char *src_ptr,
unsigned char *dst_ptr,
diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c
index 3da3bc7..43f84d0 100644
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@@ -11,7 +11,7 @@
#include <limits.h>
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vpx/vpx_integer.h"
#include "blockd.h"
#include "reconinter.h"
diff --git a/vp8/common/reconintra.c b/vp8/common/reconintra.c
index 4067a68..a851215 100644
--- a/vp8/common/reconintra.c
+++ b/vp8/common/reconintra.c
@@ -10,7 +10,7 @@
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vpx_mem/vpx_mem.h"
#include "blockd.h"
diff --git a/vp8/common/reconintra4x4.c b/vp8/common/reconintra4x4.c
index 7bb8d0a..3d4f2c4 100644
--- a/vp8/common/reconintra4x4.c
+++ b/vp8/common/reconintra4x4.c
@@ -10,17 +10,17 @@
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "blockd.h"
void vp8_intra4x4_predict_c(unsigned char *Above,
unsigned char *yleft, int left_stride,
- B_PREDICTION_MODE b_mode,
+ int _b_mode,
unsigned char *dst, int dst_stride,
unsigned char top_left)
{
int i, r, c;
-
+ B_PREDICTION_MODE b_mode = (B_PREDICTION_MODE)_b_mode;
unsigned char Left[4];
Left[0] = yleft[0];
Left[1] = yleft[left_stride];
diff --git a/vp8/common/rtcd.c b/vp8/common/rtcd.c
index 01dad46..0b371b0 100644
--- a/vp8/common/rtcd.c
+++ b/vp8/common/rtcd.c
@@ -9,97 +9,13 @@
*/
#include "vpx_config.h"
#define RTCD_C
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
+#include "vpx_ports/vpx_once.h"
-#if CONFIG_MULTITHREAD && defined(_WIN32)
-#include <windows.h>
-#include <stdlib.h>
-static void once(void (*func)(void))
+extern void vpx_scale_rtcd(void);
+
+void vp8_rtcd()
{
- static CRITICAL_SECTION *lock;
- static LONG waiters;
- static int done;
- void *lock_ptr = &lock;
-
- /* If the initialization is complete, return early. This isn't just an
- * optimization, it prevents races on the destruction of the global
- * lock.
- */
- if(done)
- return;
-
- InterlockedIncrement(&waiters);
-
- /* Get a lock. We create one and try to make it the one-true-lock,
- * throwing it away if we lost the race.
- */
-
- {
- /* Scope to protect access to new_lock */
- CRITICAL_SECTION *new_lock = malloc(sizeof(CRITICAL_SECTION));
- InitializeCriticalSection(new_lock);
- if (InterlockedCompareExchangePointer(lock_ptr, new_lock, NULL) != NULL)
- {
- DeleteCriticalSection(new_lock);
- free(new_lock);
- }
- }
-
- /* At this point, we have a lock that can be synchronized on. We don't
- * care which thread actually performed the allocation.
- */
-
- EnterCriticalSection(lock);
-
- if (!done)
- {
- func();
- done = 1;
- }
-
- LeaveCriticalSection(lock);
-
- /* Last one out should free resources. The destructed objects are
- * protected by checking if(done) above.
- */
- if(!InterlockedDecrement(&waiters))
- {
- DeleteCriticalSection(lock);
- free(lock);
- lock = NULL;
- }
-}
-
-
-#elif CONFIG_MULTITHREAD && HAVE_PTHREAD_H
-#include <pthread.h>
-static void once(void (*func)(void))
-{
- static pthread_once_t lock = PTHREAD_ONCE_INIT;
- pthread_once(&lock, func);
-}
-
-
-#else
-/* No-op version that performs no synchronization. vpx_rtcd() is idempotent,
- * so as long as your platform provides atomic loads/stores of pointers
- * no synchronization is strictly necessary.
- */
-
-static void once(void (*func)(void))
-{
- static int done;
-
- if(!done)
- {
- func();
- done = 1;
- }
-}
-#endif
-
-
-void vpx_rtcd()
-{
+ vpx_scale_rtcd();
once(setup_rtcd_internal);
}
diff --git a/vp8/common/rtcd_defs.sh b/vp8/common/rtcd_defs.sh
index 0f950f8..4eb96b7 100644
--- a/vp8/common/rtcd_defs.sh
+++ b/vp8/common/rtcd_defs.sh
@@ -1,6 +1,8 @@
-common_forward_decls() {
+vp8_common_forward_decls() {
cat <<EOF
-#include "vp8/common/blockd.h"
+/*
+ * VP8
+ */
struct blockd;
struct macroblockd;
@@ -14,7 +16,7 @@
struct yv12_buffer_config;
EOF
}
-forward_decls common_forward_decls
+forward_decls vp8_common_forward_decls
#
# Dequant
@@ -146,7 +148,7 @@
prototype void vp8_build_intra_predictors_mbuv_s "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride"
specialize vp8_build_intra_predictors_mbuv_s sse2 ssse3
-prototype void vp8_intra4x4_predict "unsigned char *Above, unsigned char *yleft, int left_stride, B_PREDICTION_MODE b_mode, unsigned char *dst, int dst_stride, unsigned char top_left"
+prototype void vp8_intra4x4_predict "unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left"
specialize vp8_intra4x4_predict media
vp8_intra4x4_predict_media=vp8_intra4x4_predict_armv6
@@ -530,39 +532,3 @@
# End of encoder only functions
fi
-
-# Scaler functions
-if [ "CONFIG_SPATIAL_RESAMPLING" != "yes" ]; then
- prototype void vp8_horizontal_line_4_5_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
- prototype void vp8_vertical_band_4_5_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
- prototype void vp8_last_vertical_band_4_5_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
- prototype void vp8_horizontal_line_2_3_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
- prototype void vp8_vertical_band_2_3_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
- prototype void vp8_last_vertical_band_2_3_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
- prototype void vp8_horizontal_line_3_5_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
- prototype void vp8_vertical_band_3_5_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
- prototype void vp8_last_vertical_band_3_5_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
- prototype void vp8_horizontal_line_3_4_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
- prototype void vp8_vertical_band_3_4_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
- prototype void vp8_last_vertical_band_3_4_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
- prototype void vp8_horizontal_line_1_2_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
- prototype void vp8_vertical_band_1_2_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
- prototype void vp8_last_vertical_band_1_2_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
- prototype void vp8_horizontal_line_5_4_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
- prototype void vp8_vertical_band_5_4_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
- prototype void vp8_horizontal_line_5_3_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
- prototype void vp8_vertical_band_5_3_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
- prototype void vp8_horizontal_line_2_1_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
- prototype void vp8_vertical_band_2_1_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
- prototype void vp8_vertical_band_2_1_scale_i "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
-fi
-
-prototype void vp8_yv12_extend_frame_borders "struct yv12_buffer_config *ybf"
-specialize vp8_yv12_extend_frame_borders neon
-
-prototype void vp8_yv12_copy_frame "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"
-specialize vp8_yv12_copy_frame neon
-
-prototype void vp8_yv12_copy_y "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"
-specialize vp8_yv12_copy_y neon
-
diff --git a/vp8/common/x86/idct_blk_mmx.c b/vp8/common/x86/idct_blk_mmx.c
index 4adf3f5..49b2013 100644
--- a/vp8/common/x86/idct_blk_mmx.c
+++ b/vp8/common/x86/idct_blk_mmx.c
@@ -9,7 +9,7 @@
*/
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vp8/common/blockd.h"
extern void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q);
diff --git a/vp8/common/x86/idct_blk_sse2.c b/vp8/common/x86/idct_blk_sse2.c
index 056e052..ae96ec8 100644
--- a/vp8/common/x86/idct_blk_sse2.c
+++ b/vp8/common/x86/idct_blk_sse2.c
@@ -9,7 +9,7 @@
*/
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
void vp8_idct_dequant_0_2x_sse2
(short *q, short *dq ,
diff --git a/vp8/common/x86/recon_wrapper_sse2.c b/vp8/common/x86/recon_wrapper_sse2.c
index b482faa..65f4251 100644
--- a/vp8/common/x86/recon_wrapper_sse2.c
+++ b/vp8/common/x86/recon_wrapper_sse2.c
@@ -9,7 +9,7 @@
*/
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vpx_mem/vpx_mem.h"
#include "vp8/common/blockd.h"
diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c
index 3437a23..c0416b7 100644
--- a/vp8/common/x86/vp8_asm_stubs.c
+++ b/vp8/common/x86/vp8_asm_stubs.c
@@ -10,7 +10,7 @@
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vpx_ports/mem.h"
#include "filter_x86.h"
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index a4a00f6..a22f372 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -10,7 +10,8 @@
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
+#include "./vpx_scale_rtcd.h"
#include "onyxd_int.h"
#include "vp8/common/header.h"
#include "vp8/common/reconintra4x4.h"
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 8d6871b..459e34e 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -25,6 +25,7 @@
#include <assert.h>
#include "vp8/common/quant_common.h"
+#include "./vpx_scale_rtcd.h"
#include "vpx_scale/vpxscale.h"
#include "vp8/common/systemdependent.h"
#include "vpx_ports/vpx_timer.h"
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index 88c06be..b18cb50 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -10,7 +10,7 @@
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#if !defined(WIN32) && CONFIG_OS_SUPPORT == 1
# include <unistd.h>
#endif
diff --git a/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
index a644a00..4abe818 100644
--- a/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
+++ b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
@@ -15,7 +15,7 @@
EXPORT |vp8_encode_value|
IMPORT |vp8_validate_buffer_arm|
- INCLUDE asm_enc_offsets.asm
+ INCLUDE vp8_asm_enc_offsets.asm
ARM
REQUIRE8
diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
index a1cd467..90a141c 100644
--- a/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
@@ -12,7 +12,7 @@
EXPORT |vp8cx_pack_tokens_armv5|
IMPORT |vp8_validate_buffer_arm|
- INCLUDE asm_enc_offsets.asm
+ INCLUDE vp8_asm_enc_offsets.asm
ARM
REQUIRE8
diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
index 1fa5e6c..3a8d17a 100644
--- a/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
@@ -12,7 +12,7 @@
EXPORT |vp8cx_pack_mb_row_tokens_armv5|
IMPORT |vp8_validate_buffer_arm|
- INCLUDE asm_enc_offsets.asm
+ INCLUDE vp8_asm_enc_offsets.asm
ARM
REQUIRE8
diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
index 90a98fe..e9aa495 100644
--- a/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
@@ -12,7 +12,7 @@
EXPORT |vp8cx_pack_tokens_into_partitions_armv5|
IMPORT |vp8_validate_buffer_arm|
- INCLUDE asm_enc_offsets.asm
+ INCLUDE vp8_asm_enc_offsets.asm
ARM
REQUIRE8
diff --git a/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm b/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm
index d61f5d9..de35a1e 100644
--- a/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm
+++ b/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm
@@ -11,7 +11,7 @@
EXPORT |vp8_fast_quantize_b_armv6|
- INCLUDE asm_enc_offsets.asm
+ INCLUDE vp8_asm_enc_offsets.asm
ARM
REQUIRE8
diff --git a/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm b/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm
index f329f8f..05746cf 100644
--- a/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm
+++ b/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm
@@ -13,7 +13,7 @@
EXPORT |vp8_subtract_mbuv_armv6|
EXPORT |vp8_subtract_b_armv6|
- INCLUDE asm_enc_offsets.asm
+ INCLUDE vp8_asm_enc_offsets.asm
ARM
REQUIRE8
diff --git a/vp8/encoder/arm/dct_arm.c b/vp8/encoder/arm/dct_arm.c
index af0fb27..f71300d 100644
--- a/vp8/encoder/arm/dct_arm.c
+++ b/vp8/encoder/arm/dct_arm.c
@@ -9,7 +9,7 @@
*/
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#if HAVE_MEDIA
diff --git a/vp8/encoder/arm/neon/fastquantizeb_neon.asm b/vp8/encoder/arm/neon/fastquantizeb_neon.asm
index 1430588..9374310 100644
--- a/vp8/encoder/arm/neon/fastquantizeb_neon.asm
+++ b/vp8/encoder/arm/neon/fastquantizeb_neon.asm
@@ -12,7 +12,7 @@
EXPORT |vp8_fast_quantize_b_neon|
EXPORT |vp8_fast_quantize_b_pair_neon|
- INCLUDE asm_enc_offsets.asm
+ INCLUDE vp8_asm_enc_offsets.asm
ARM
REQUIRE8
diff --git a/vp8/encoder/arm/neon/subtract_neon.asm b/vp8/encoder/arm/neon/subtract_neon.asm
index 91a328c..5bda786 100644
--- a/vp8/encoder/arm/neon/subtract_neon.asm
+++ b/vp8/encoder/arm/neon/subtract_neon.asm
@@ -12,7 +12,7 @@
EXPORT |vp8_subtract_mby_neon|
EXPORT |vp8_subtract_mbuv_neon|
- INCLUDE asm_enc_offsets.asm
+ INCLUDE vp8_asm_enc_offsets.asm
ARM
REQUIRE8
diff --git a/vp8/encoder/arm/quantize_arm.c b/vp8/encoder/arm/quantize_arm.c
index 8999e34..80d9ad0 100644
--- a/vp8/encoder/arm/quantize_arm.c
+++ b/vp8/encoder/arm/quantize_arm.c
@@ -10,7 +10,7 @@
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vp8/encoder/block.h"
#include <math.h>
#include "vpx_mem/vpx_mem.h"
diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c
index f3faa22..1ee1cb5 100644
--- a/vp8/encoder/denoising.c
+++ b/vp8/encoder/denoising.c
@@ -13,7 +13,7 @@
#include "vp8/common/reconinter.h"
#include "vpx/vpx_integer.h"
#include "vpx_mem/vpx_mem.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
static const unsigned int NOISE_MOTION_THRESHOLD = 25 * 25;
/* SSE_DIFF_THRESHOLD is selected as ~95% confidence assuming
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 340dd63..cfa4cb9 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -10,7 +10,7 @@
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "quantize.h"
#include "vp8/common/reconintra4x4.h"
#include "encodemb.h"
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index 7d494f2..7ed2fe1 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -10,7 +10,7 @@
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "encodemb.h"
#include "vp8/common/reconinter.h"
#include "quantize.h"
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 30bf8a6..5e41ec8 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -12,6 +12,7 @@
#include <limits.h>
#include <stdio.h>
+#include "./vpx_scale_rtcd.h"
#include "block.h"
#include "onyx_int.h"
#include "vp8/common/variance.h"
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 4680f39..9837485 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -10,6 +10,7 @@
#include "vpx_config.h"
+#include "./vpx_scale_rtcd.h"
#include "vp8/common/onyxc_int.h"
#include "vp8/common/blockd.h"
#include "onyx_int.h"
diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c
index 4121349..8114ec3 100644
--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c
@@ -9,6 +9,7 @@
*/
+#include "./vpx_scale_rtcd.h"
#include "vp8/common/onyxc_int.h"
#include "onyx_int.h"
#include "quantize.h"
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index ceb817c..f0ec7b6 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -14,7 +14,7 @@
#include <limits.h>
#include <assert.h>
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vp8/common/pragmas.h"
#include "tokenize.h"
#include "treewriter.h"
diff --git a/vp8/encoder/x86/denoising_sse2.c b/vp8/encoder/x86/denoising_sse2.c
index c1ac6c1..cceb826 100644
--- a/vp8/encoder/x86/denoising_sse2.c
+++ b/vp8/encoder/x86/denoising_sse2.c
@@ -12,9 +12,10 @@
#include "vp8/common/reconinter.h"
#include "vpx/vpx_integer.h"
#include "vpx_mem/vpx_mem.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include <emmintrin.h>
+#include "vpx_ports/emmintrin_compat.h"
union sum_union {
__m128i v;
diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm
index 724e54c..fe9464b 100644
--- a/vp8/encoder/x86/quantize_sse2.asm
+++ b/vp8/encoder/x86/quantize_sse2.asm
@@ -9,7 +9,7 @@
%include "vpx_ports/x86_abi_support.asm"
-%include "asm_enc_offsets.asm"
+%include "vp8_asm_enc_offsets.asm"
; void vp8_regular_quantize_b_sse2 | arg
diff --git a/vp8/encoder/x86/quantize_sse4.asm b/vp8/encoder/x86/quantize_sse4.asm
index f0e5d40..f211464 100644
--- a/vp8/encoder/x86/quantize_sse4.asm
+++ b/vp8/encoder/x86/quantize_sse4.asm
@@ -9,7 +9,7 @@
%include "vpx_ports/x86_abi_support.asm"
-%include "asm_enc_offsets.asm"
+%include "vp8_asm_enc_offsets.asm"
; void vp8_regular_quantize_b_sse4 | arg
diff --git a/vp8/encoder/x86/quantize_ssse3.asm b/vp8/encoder/x86/quantize_ssse3.asm
index dd526f4..3536889 100644
--- a/vp8/encoder/x86/quantize_ssse3.asm
+++ b/vp8/encoder/x86/quantize_ssse3.asm
@@ -9,7 +9,7 @@
%include "vpx_ports/x86_abi_support.asm"
-%include "asm_enc_offsets.asm"
+%include "vp8_asm_enc_offsets.asm"
; void vp8_fast_quantize_b_ssse3 | arg
diff --git a/vp8/encoder/x86/vp8_enc_stubs_mmx.c b/vp8/encoder/x86/vp8_enc_stubs_mmx.c
index da25f52..cf3d8ca 100644
--- a/vp8/encoder/x86/vp8_enc_stubs_mmx.c
+++ b/vp8/encoder/x86/vp8_enc_stubs_mmx.c
@@ -10,7 +10,7 @@
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vpx_ports/x86.h"
#include "vp8/encoder/block.h"
diff --git a/vp8/encoder/x86/vp8_enc_stubs_sse2.c b/vp8/encoder/x86/vp8_enc_stubs_sse2.c
index 68db815..3dfbee3 100644
--- a/vp8/encoder/x86/vp8_enc_stubs_sse2.c
+++ b/vp8/encoder/x86/vp8_enc_stubs_sse2.c
@@ -10,7 +10,7 @@
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vpx_ports/x86.h"
#include "vp8/encoder/block.h"
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index a328f46..2f73420 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -191,3 +191,8 @@
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance8x8_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance16x16_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM)
+
+$(eval $(call asm_offsets_template,\
+ vp8_asm_com_offsets.asm, $(VP8_PREFIX)common/asm_com_offsets.c))
+
+$(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.sh))
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index eeac3a8..83eecba 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -9,7 +9,7 @@
*/
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vpx/vpx_codec.h"
#include "vpx/internal/vpx_codec_internal.h"
#include "vpx_version.h"
@@ -582,7 +582,7 @@
struct VP8_COMP *optr;
- vpx_rtcd();
+ vp8_rtcd();
if (!ctx->priv)
{
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index c13d697..01482fc 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -11,7 +11,7 @@
#include <stdlib.h>
#include <string.h>
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vpx/vpx_decoder.h"
#include "vpx/vp8dx.h"
#include "vpx/internal/vpx_codec_internal.h"
@@ -194,7 +194,7 @@
vpx_codec_err_t res = VPX_CODEC_OK;
(void) data;
- vpx_rtcd();
+ vp8_rtcd();
/* This function only allocates space for the vpx_codec_alg_priv_t
* structure. More memory may be required at the time the stream
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index 0ae2f10..0659407 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -9,8 +9,6 @@
##
-include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk
-
VP8_CX_EXPORTS += exports_enc
VP8_CX_SRCS-yes += $(VP8_COMMON_SRCS-yes)
@@ -97,6 +95,7 @@
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoising_sse2.c
ifeq ($(HAVE_SSE2),yes)
vp8/encoder/x86/denoising_sse2.c.o: CFLAGS += -msse2
+vp8/encoder/x86/denoising_sse2.c.d: CFLAGS += -msse2
endif
endif
@@ -115,3 +114,6 @@
VP8_CX_SRCS-yes := $(filter-out $(VP8_CX_SRCS_REMOVE-yes),$(VP8_CX_SRCS-yes))
+
+$(eval $(call asm_offsets_template,\
+ vp8_asm_enc_offsets.asm, $(VP8_PREFIX)encoder/asm_enc_offsets.c))
diff --git a/vp8/vp8dx.mk b/vp8/vp8dx.mk
index dd39190..8be4c7b 100644
--- a/vp8/vp8dx.mk
+++ b/vp8/vp8dx.mk
@@ -9,8 +9,6 @@
##
-include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk
-
VP8_DX_EXPORTS += exports_dec
VP8_DX_SRCS-yes += $(VP8_COMMON_SRCS-yes)
@@ -22,30 +20,6 @@
VP8_DX_SRCS-yes += vp8_dx_iface.c
-# common
-#define ARM
-#define DISABLE_THREAD
-
-#INCLUDES += algo/vpx_common/vpx_mem/include
-#INCLUDES += common
-#INCLUDES += common
-#INCLUDES += common
-#INCLUDES += common
-#INCLUDES += decoder
-
-
-
-# decoder
-#define ARM
-#define DISABLE_THREAD
-
-#INCLUDES += algo/vpx_common/vpx_mem/include
-#INCLUDES += common
-#INCLUDES += common
-#INCLUDES += common
-#INCLUDES += common
-#INCLUDES += decoder
-
VP8_DX_SRCS-yes += decoder/asm_dec_offsets.c
VP8_DX_SRCS-yes += decoder/dboolhuff.c
VP8_DX_SRCS-yes += decoder/decodemv.c
@@ -64,3 +38,6 @@
VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/threading.c
VP8_DX_SRCS-yes := $(filter-out $(VP8_DX_SRCS_REMOVE-yes),$(VP8_DX_SRCS-yes))
+
+$(eval $(call asm_offsets_template,\
+ vp8_asm_dec_offsets.asm, $(VP8_PREFIX)decoder/asm_dec_offsets.c))
diff --git a/vp9/common/generic/vp9_systemdependent.c b/vp9/common/generic/vp9_systemdependent.c
new file mode 100644
index 0000000..b02f3f0
--- /dev/null
+++ b/vp9/common/generic/vp9_systemdependent.c
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "./vpx_config.h"
+#include "vp9_rtcd.h"
+#include "vp9/common/vp9_subpixel.h"
+#include "vp9/common/vp9_loopfilter.h"
+#include "vp9/common/vp9_onyxc_int.h"
+
+void vp9_machine_specific_config(VP9_COMMON *ctx) {
+ vp9_rtcd();
+}
diff --git a/vp9/common/ppc/vp9_copy_altivec.asm b/vp9/common/ppc/vp9_copy_altivec.asm
new file mode 100644
index 0000000..a4ce915
--- /dev/null
+++ b/vp9/common/ppc/vp9_copy_altivec.asm
@@ -0,0 +1,47 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ .globl copy_mem16x16_ppc
+
+;# r3 unsigned char *src
+;# r4 int src_stride
+;# r5 unsigned char *dst
+;# r6 int dst_stride
+
+;# Make the assumption that input will not be aligned,
+;# but the output will be. So two reads and a perm
+;# for the input, but only one store for the output.
+copy_mem16x16_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xe000
+ mtspr 256, r12 ;# set VRSAVE
+
+ li r10, 16
+ mtctr r10
+
+cp_16x16_loop:
+ lvsl v0, 0, r3 ;# permutate value for alignment
+
+ lvx v1, 0, r3
+ lvx v2, r10, r3
+
+ vperm v1, v1, v2, v0
+
+ stvx v1, 0, r5
+
+ add r3, r3, r4 ;# increment source pointer
+ add r5, r5, r6 ;# increment destination pointer
+
+ bdnz cp_16x16_loop
+
+ mtspr 256, r11 ;# reset old VRSAVE
+
+ blr
diff --git a/vp9/common/ppc/vp9_filter_altivec.asm b/vp9/common/ppc/vp9_filter_altivec.asm
new file mode 100644
index 0000000..4da2e94
--- /dev/null
+++ b/vp9/common/ppc/vp9_filter_altivec.asm
@@ -0,0 +1,1013 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ .globl sixtap_predict_ppc
+ .globl sixtap_predict8x4_ppc
+ .globl sixtap_predict8x8_ppc
+ .globl sixtap_predict16x16_ppc
+
+.macro load_c V, LABEL, OFF, R0, R1
+ lis \R0, \LABEL@ha
+ la \R1, \LABEL@l(\R0)
+ lvx \V, \OFF, \R1
+.endm
+
+.macro load_hfilter V0, V1
+ load_c \V0, HFilter, r5, r9, r10
+
+ addi r5, r5, 16
+ lvx \V1, r5, r10
+.endm
+
+;# Vertical filtering
+.macro Vprolog
+ load_c v0, VFilter, r6, r3, r10
+
+ vspltish v5, 8
+ vspltish v6, 3
+ vslh v6, v5, v6 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
+
+ vspltb v1, v0, 1
+ vspltb v2, v0, 2
+ vspltb v3, v0, 3
+ vspltb v4, v0, 4
+ vspltb v5, v0, 5
+ vspltb v0, v0, 0
+.endm
+
+.macro vpre_load
+ Vprolog
+ li r10, 16
+ lvx v10, 0, r9 ;# v10..v14 = first 5 rows
+ lvx v11, r10, r9
+ addi r9, r9, 32
+ lvx v12, 0, r9
+ lvx v13, r10, r9
+ addi r9, r9, 32
+ lvx v14, 0, r9
+.endm
+
+.macro Msum Re, Ro, V, T, TMP
+ ;# (Re,Ro) += (V*T)
+ vmuleub \TMP, \V, \T ;# trashes v8
+ vadduhm \Re, \Re, \TMP ;# Re = evens, saturation unnecessary
+ vmuloub \TMP, \V, \T
+ vadduhm \Ro, \Ro, \TMP ;# Ro = odds
+.endm
+
+.macro vinterp_no_store P0 P1 P2 P3 P4 P5
+ vmuleub v8, \P0, v0 ;# 64 + 4 positive taps
+ vadduhm v16, v6, v8
+ vmuloub v8, \P0, v0
+ vadduhm v17, v6, v8
+ Msum v16, v17, \P2, v2, v8
+ Msum v16, v17, \P3, v3, v8
+ Msum v16, v17, \P5, v5, v8
+
+ vmuleub v18, \P1, v1 ;# 2 negative taps
+ vmuloub v19, \P1, v1
+ Msum v18, v19, \P4, v4, v8
+
+ vsubuhs v16, v16, v18 ;# subtract neg from pos
+ vsubuhs v17, v17, v19
+ vsrh v16, v16, v7 ;# divide by 128
+ vsrh v17, v17, v7 ;# v16 v17 = evens, odds
+ vmrghh v18, v16, v17 ;# v18 v19 = 16-bit result in order
+ vmrglh v19, v16, v17
+ vpkuhus \P0, v18, v19 ;# P0 = 8-bit result
+.endm
+
+.macro vinterp_no_store_8x8 P0 P1 P2 P3 P4 P5
+ vmuleub v24, \P0, v13 ;# 64 + 4 positive taps
+ vadduhm v21, v20, v24
+ vmuloub v24, \P0, v13
+ vadduhm v22, v20, v24
+ Msum v21, v22, \P2, v15, v25
+ Msum v21, v22, \P3, v16, v25
+ Msum v21, v22, \P5, v18, v25
+
+ vmuleub v23, \P1, v14 ;# 2 negative taps
+ vmuloub v24, \P1, v14
+ Msum v23, v24, \P4, v17, v25
+
+ vsubuhs v21, v21, v23 ;# subtract neg from pos
+ vsubuhs v22, v22, v24
+ vsrh v21, v21, v19 ;# divide by 128
+ vsrh v22, v22, v19 ;# v16 v17 = evens, odds
+ vmrghh v23, v21, v22 ;# v18 v19 = 16-bit result in order
+ vmrglh v24, v21, v22
+ vpkuhus \P0, v23, v24 ;# P0 = 8-bit result
+.endm
+
+
+.macro Vinterp P0 P1 P2 P3 P4 P5
+ vinterp_no_store \P0, \P1, \P2, \P3, \P4, \P5
+ stvx \P0, 0, r7
+ add r7, r7, r8 ;# 33 ops per 16 pels
+.endm
+
+
+.macro luma_v P0, P1, P2, P3, P4, P5
+ addi r9, r9, 16 ;# P5 = newest input row
+ lvx \P5, 0, r9
+ Vinterp \P0, \P1, \P2, \P3, \P4, \P5
+.endm
+
+.macro luma_vtwo
+ luma_v v10, v11, v12, v13, v14, v15
+ luma_v v11, v12, v13, v14, v15, v10
+.endm
+
+.macro luma_vfour
+ luma_vtwo
+ luma_v v12, v13, v14, v15, v10, v11
+ luma_v v13, v14, v15, v10, v11, v12
+.endm
+
+.macro luma_vsix
+ luma_vfour
+ luma_v v14, v15, v10, v11, v12, v13
+ luma_v v15, v10, v11, v12, v13, v14
+.endm
+
+.macro Interp4 R I I4
+ vmsummbm \R, v13, \I, v15
+ vmsummbm \R, v14, \I4, \R
+.endm
+
+.macro Read8x8 VD, RS, RP, increment_counter
+ lvsl v21, 0, \RS ;# permutate value for alignment
+
+ ;# input to filter is 21 bytes wide, output is 16 bytes.
+ ;# input will can span three vectors if not aligned correctly.
+ lvx \VD, 0, \RS
+ lvx v20, r10, \RS
+
+.if \increment_counter
+ add \RS, \RS, \RP
+.endif
+
+ vperm \VD, \VD, v20, v21
+.endm
+
+.macro interp_8x8 R
+ vperm v20, \R, \R, v16 ;# v20 = 0123 1234 2345 3456
+ vperm v21, \R, \R, v17 ;# v21 = 4567 5678 6789 789A
+ Interp4 v20, v20, v21 ;# v20 = result 0 1 2 3
+ vperm \R, \R, \R, v18 ;# R = 89AB 9ABC ABCx BCxx
+ Interp4 v21, v21, \R ;# v21 = result 4 5 6 7
+
+ vpkswus \R, v20, v21 ;# R = 0 1 2 3 4 5 6 7
+ vsrh \R, \R, v19
+
+ vpkuhus \R, \R, \R ;# saturate and pack
+
+.endm
+
+.macro Read4x4 VD, RS, RP, increment_counter
+ lvsl v21, 0, \RS ;# permutate value for alignment
+
+ ;# input to filter is 21 bytes wide, output is 16 bytes.
+ ;# input will can span three vectors if not aligned correctly.
+ lvx v20, 0, \RS
+
+.if \increment_counter
+ add \RS, \RS, \RP
+.endif
+
+ vperm \VD, v20, v20, v21
+.endm
+ .text
+
+ .align 2
+;# r3 unsigned char * src
+;# r4 int src_pitch
+;# r5 int x_offset
+;# r6 int y_offset
+;# r7 unsigned char * dst
+;# r8 int dst_pitch
+sixtap_predict_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xff87
+ ori r12, r12, 0xffc0
+ mtspr 256, r12 ;# set VRSAVE
+
+ stwu r1,-32(r1) ;# create space on the stack
+
+ slwi. r5, r5, 5 ;# index into horizontal filter array
+
+ vspltish v19, 7
+
+ ;# If there isn't any filtering to be done for the horizontal, then
+ ;# just skip to the second pass.
+ beq- vertical_only_4x4
+
+ ;# load up horizontal filter
+ load_hfilter v13, v14
+
+ ;# rounding added in on the multiply
+ vspltisw v16, 8
+ vspltisw v15, 3
+ vslw v15, v16, v15 ;# 0x00000040000000400000004000000040
+
+ ;# Load up permutation constants
+ load_c v16, B_0123, 0, r9, r10
+ load_c v17, B_4567, 0, r9, r10
+ load_c v18, B_89AB, 0, r9, r10
+
+ ;# Back off input buffer by 2 bytes. Need 2 before and 3 after
+ addi r3, r3, -2
+
+ addi r9, r3, 0
+ li r10, 16
+ Read8x8 v2, r3, r4, 1
+ Read8x8 v3, r3, r4, 1
+ Read8x8 v4, r3, r4, 1
+ Read8x8 v5, r3, r4, 1
+
+ slwi. r6, r6, 4 ;# index into vertical filter array
+
+ ;# filter a line
+ interp_8x8 v2
+ interp_8x8 v3
+ interp_8x8 v4
+ interp_8x8 v5
+
+ ;# Finished filtering main horizontal block. If there is no
+ ;# vertical filtering, jump to storing the data. Otherwise
+ ;# load up and filter the additional 5 lines that are needed
+ ;# for the vertical filter.
+ beq- store_4x4
+
+ ;# only needed if there is a vertical filter present
+ ;# if the second filter is not null then need to back off by 2*pitch
+ sub r9, r9, r4
+ sub r9, r9, r4
+
+ Read8x8 v0, r9, r4, 1
+ Read8x8 v1, r9, r4, 0
+ Read8x8 v6, r3, r4, 1
+ Read8x8 v7, r3, r4, 1
+ Read8x8 v8, r3, r4, 0
+
+ interp_8x8 v0
+ interp_8x8 v1
+ interp_8x8 v6
+ interp_8x8 v7
+ interp_8x8 v8
+
+ b second_pass_4x4
+
+vertical_only_4x4:
+ ;# only needed if there is a vertical filter present
+ ;# if the second filter is not null then need to back off by 2*pitch
+ sub r3, r3, r4
+ sub r3, r3, r4
+ li r10, 16
+
+ Read8x8 v0, r3, r4, 1
+ Read8x8 v1, r3, r4, 1
+ Read8x8 v2, r3, r4, 1
+ Read8x8 v3, r3, r4, 1
+ Read8x8 v4, r3, r4, 1
+ Read8x8 v5, r3, r4, 1
+ Read8x8 v6, r3, r4, 1
+ Read8x8 v7, r3, r4, 1
+ Read8x8 v8, r3, r4, 0
+
+ slwi r6, r6, 4 ;# index into vertical filter array
+
+second_pass_4x4:
+ load_c v20, b_hilo_4x4, 0, r9, r10
+ load_c v21, b_hilo, 0, r9, r10
+
+ ;# reposition input so that it can go through the
+ ;# filtering phase with one pass.
+ vperm v0, v0, v1, v20 ;# 0 1 x x
+ vperm v2, v2, v3, v20 ;# 2 3 x x
+ vperm v4, v4, v5, v20 ;# 4 5 x x
+ vperm v6, v6, v7, v20 ;# 6 7 x x
+
+ vperm v0, v0, v2, v21 ;# 0 1 2 3
+ vperm v4, v4, v6, v21 ;# 4 5 6 7
+
+ vsldoi v1, v0, v4, 4
+ vsldoi v2, v0, v4, 8
+ vsldoi v3, v0, v4, 12
+
+ vsldoi v5, v4, v8, 4
+
+ load_c v13, VFilter, r6, r9, r10
+
+ vspltish v15, 8
+ vspltish v20, 3
+ vslh v20, v15, v20 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
+
+ vspltb v14, v13, 1
+ vspltb v15, v13, 2
+ vspltb v16, v13, 3
+ vspltb v17, v13, 4
+ vspltb v18, v13, 5
+ vspltb v13, v13, 0
+
+ vinterp_no_store_8x8 v0, v1, v2, v3, v4, v5
+
+ stvx v0, 0, r1
+
+ lwz r0, 0(r1)
+ stw r0, 0(r7)
+ add r7, r7, r8
+
+ lwz r0, 4(r1)
+ stw r0, 0(r7)
+ add r7, r7, r8
+
+ lwz r0, 8(r1)
+ stw r0, 0(r7)
+ add r7, r7, r8
+
+ lwz r0, 12(r1)
+ stw r0, 0(r7)
+
+ b exit_4x4
+
+store_4x4:
+
+ stvx v2, 0, r1
+ lwz r0, 0(r1)
+ stw r0, 0(r7)
+ add r7, r7, r8
+
+ stvx v3, 0, r1
+ lwz r0, 0(r1)
+ stw r0, 0(r7)
+ add r7, r7, r8
+
+ stvx v4, 0, r1
+ lwz r0, 0(r1)
+ stw r0, 0(r7)
+ add r7, r7, r8
+
+ stvx v5, 0, r1
+ lwz r0, 0(r1)
+ stw r0, 0(r7)
+
+exit_4x4:
+
+ addi r1, r1, 32 ;# recover stack
+
+ mtspr 256, r11 ;# reset old VRSAVE
+
+ blr
+
+.macro w_8x8 V, D, R, P
+ stvx \V, 0, r1
+ lwz \R, 0(r1)
+ stw \R, 0(r7)
+ lwz \R, 4(r1)
+ stw \R, 4(r7)
+ add \D, \D, \P
+.endm
+
+ .align 2
+;# r3 unsigned char * src
+;# r4 int src_pitch
+;# r5 int x_offset
+;# r6 int y_offset
+;# r7 unsigned char * dst
+;# r8 int dst_pitch
+
+sixtap_predict8x4_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xffff
+ ori r12, r12, 0xffc0
+ mtspr 256, r12 ;# set VRSAVE
+
+ stwu r1,-32(r1) ;# create space on the stack
+
+ slwi. r5, r5, 5 ;# index into horizontal filter array
+
+ vspltish v19, 7
+
+ ;# If there isn't any filtering to be done for the horizontal, then
+ ;# just skip to the second pass.
+ beq- second_pass_pre_copy_8x4
+
+ load_hfilter v13, v14
+
+ ;# rounding added in on the multiply
+ vspltisw v16, 8
+ vspltisw v15, 3
+ vslw v15, v16, v15 ;# 0x00000040000000400000004000000040
+
+ ;# Load up permutation constants
+ load_c v16, B_0123, 0, r9, r10
+ load_c v17, B_4567, 0, r9, r10
+ load_c v18, B_89AB, 0, r9, r10
+
+ ;# Back off input buffer by 2 bytes. Need 2 before and 3 after
+ addi r3, r3, -2
+
+ addi r9, r3, 0
+ li r10, 16
+ Read8x8 v2, r3, r4, 1
+ Read8x8 v3, r3, r4, 1
+ Read8x8 v4, r3, r4, 1
+ Read8x8 v5, r3, r4, 1
+
+ slwi. r6, r6, 4 ;# index into vertical filter array
+
+ ;# filter a line
+ interp_8x8 v2
+ interp_8x8 v3
+ interp_8x8 v4
+ interp_8x8 v5
+
+ ;# Finished filtering main horizontal block. If there is no
+ ;# vertical filtering, jump to storing the data. Otherwise
+ ;# load up and filter the additional 5 lines that are needed
+ ;# for the vertical filter.
+ beq- store_8x4
+
+ ;# only needed if there is a vertical filter present
+ ;# if the second filter is not null then need to back off by 2*pitch
+ sub r9, r9, r4
+ sub r9, r9, r4
+
+ Read8x8 v0, r9, r4, 1
+ Read8x8 v1, r9, r4, 0
+ Read8x8 v6, r3, r4, 1
+ Read8x8 v7, r3, r4, 1
+ Read8x8 v8, r3, r4, 0
+
+ interp_8x8 v0
+ interp_8x8 v1
+ interp_8x8 v6
+ interp_8x8 v7
+ interp_8x8 v8
+
+ b second_pass_8x4
+
+second_pass_pre_copy_8x4:
+ ;# only needed if there is a vertical filter present
+ ;# if the second filter is not null then need to back off by 2*pitch
+ sub r3, r3, r4
+ sub r3, r3, r4
+ li r10, 16
+
+ Read8x8 v0, r3, r4, 1
+ Read8x8 v1, r3, r4, 1
+ Read8x8 v2, r3, r4, 1
+ Read8x8 v3, r3, r4, 1
+ Read8x8 v4, r3, r4, 1
+ Read8x8 v5, r3, r4, 1
+ Read8x8 v6, r3, r4, 1
+ Read8x8 v7, r3, r4, 1
+ Read8x8 v8, r3, r4, 1
+
+ slwi r6, r6, 4 ;# index into vertical filter array
+
+second_pass_8x4:
+ load_c v13, VFilter, r6, r9, r10
+
+ vspltish v15, 8
+ vspltish v20, 3
+ vslh v20, v15, v20 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
+
+ vspltb v14, v13, 1
+ vspltb v15, v13, 2
+ vspltb v16, v13, 3
+ vspltb v17, v13, 4
+ vspltb v18, v13, 5
+ vspltb v13, v13, 0
+
+ vinterp_no_store_8x8 v0, v1, v2, v3, v4, v5
+ vinterp_no_store_8x8 v1, v2, v3, v4, v5, v6
+ vinterp_no_store_8x8 v2, v3, v4, v5, v6, v7
+ vinterp_no_store_8x8 v3, v4, v5, v6, v7, v8
+
+ cmpi cr0, r8, 8
+ beq cr0, store_aligned_8x4
+
+ w_8x8 v0, r7, r0, r8
+ w_8x8 v1, r7, r0, r8
+ w_8x8 v2, r7, r0, r8
+ w_8x8 v3, r7, r0, r8
+
+ b exit_8x4
+
+store_aligned_8x4:
+
+ load_c v10, b_hilo, 0, r9, r10
+
+ vperm v0, v0, v1, v10
+ vperm v2, v2, v3, v10
+
+ stvx v0, 0, r7
+ addi r7, r7, 16
+ stvx v2, 0, r7
+
+ b exit_8x4
+
+store_8x4:
+ cmpi cr0, r8, 8
+ beq cr0, store_aligned2_8x4
+
+ w_8x8 v2, r7, r0, r8
+ w_8x8 v3, r7, r0, r8
+ w_8x8 v4, r7, r0, r8
+ w_8x8 v5, r7, r0, r8
+
+ b exit_8x4
+
+store_aligned2_8x4:
+ load_c v10, b_hilo, 0, r9, r10
+
+ vperm v2, v2, v3, v10
+ vperm v4, v4, v5, v10
+
+ stvx v2, 0, r7
+ addi r7, r7, 16
+ stvx v4, 0, r7
+
+exit_8x4:
+
+ addi r1, r1, 32 ;# recover stack
+
+ mtspr 256, r11 ;# reset old VRSAVE
+
+
+ blr
+
+ .align 2
+;# r3 unsigned char * src
+;# r4 int src_pitch
+;# r5 int x_offset
+;# r6 int y_offset
+;# r7 unsigned char * dst
+;# r8 int dst_pitch
+
+;# Because the width that needs to be filtered will fit in a single altivec
+;# register there is no need to loop. Everything can stay in registers.
+sixtap_predict8x8_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xffff
+ ori r12, r12, 0xffc0
+ mtspr 256, r12 ;# set VRSAVE
+
+ stwu r1,-32(r1) ;# create space on the stack
+
+ slwi. r5, r5, 5 ;# index into horizontal filter array
+
+ vspltish v19, 7
+
+ ;# If there isn't any filtering to be done for the horizontal, then
+ ;# just skip to the second pass.
+ beq- second_pass_pre_copy_8x8
+
+ load_hfilter v13, v14
+
+ ;# rounding added in on the multiply
+ vspltisw v16, 8
+ vspltisw v15, 3
+ vslw v15, v16, v15 ;# 0x00000040000000400000004000000040
+
+ ;# Load up permutation constants
+ load_c v16, B_0123, 0, r9, r10
+ load_c v17, B_4567, 0, r9, r10
+ load_c v18, B_89AB, 0, r9, r10
+
+ ;# Back off input buffer by 2 bytes. Need 2 before and 3 after
+ addi r3, r3, -2
+
+ addi r9, r3, 0
+ li r10, 16
+ Read8x8 v2, r3, r4, 1
+ Read8x8 v3, r3, r4, 1
+ Read8x8 v4, r3, r4, 1
+ Read8x8 v5, r3, r4, 1
+ Read8x8 v6, r3, r4, 1
+ Read8x8 v7, r3, r4, 1
+ Read8x8 v8, r3, r4, 1
+ Read8x8 v9, r3, r4, 1
+
+ slwi. r6, r6, 4 ;# index into vertical filter array
+
+ ;# filter a line
+ interp_8x8 v2
+ interp_8x8 v3
+ interp_8x8 v4
+ interp_8x8 v5
+ interp_8x8 v6
+ interp_8x8 v7
+ interp_8x8 v8
+ interp_8x8 v9
+
+ ;# Finished filtering main horizontal block. If there is no
+ ;# vertical filtering, jump to storing the data. Otherwise
+ ;# load up and filter the additional 5 lines that are needed
+ ;# for the vertical filter.
+ beq- store_8x8
+
+ ;# only needed if there is a vertical filter present
+ ;# if the second filter is not null then need to back off by 2*pitch
+ sub r9, r9, r4
+ sub r9, r9, r4
+
+ Read8x8 v0, r9, r4, 1
+ Read8x8 v1, r9, r4, 0
+ Read8x8 v10, r3, r4, 1
+ Read8x8 v11, r3, r4, 1
+ Read8x8 v12, r3, r4, 0
+
+ interp_8x8 v0
+ interp_8x8 v1
+ interp_8x8 v10
+ interp_8x8 v11
+ interp_8x8 v12
+
+ b second_pass_8x8
+
+second_pass_pre_copy_8x8:
+ ;# only needed if there is a vertical filter present
+ ;# if the second filter is not null then need to back off by 2*pitch
+ sub r3, r3, r4
+ sub r3, r3, r4
+ li r10, 16
+
+ Read8x8 v0, r3, r4, 1
+ Read8x8 v1, r3, r4, 1
+ Read8x8 v2, r3, r4, 1
+ Read8x8 v3, r3, r4, 1
+ Read8x8 v4, r3, r4, 1
+ Read8x8 v5, r3, r4, 1
+ Read8x8 v6, r3, r4, 1
+ Read8x8 v7, r3, r4, 1
+ Read8x8 v8, r3, r4, 1
+ Read8x8 v9, r3, r4, 1
+ Read8x8 v10, r3, r4, 1
+ Read8x8 v11, r3, r4, 1
+ Read8x8 v12, r3, r4, 0
+
+ slwi r6, r6, 4 ;# index into vertical filter array
+
+second_pass_8x8:
+ load_c v13, VFilter, r6, r9, r10
+
+ vspltish v15, 8
+ vspltish v20, 3
+ vslh v20, v15, v20 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
+
+ vspltb v14, v13, 1
+ vspltb v15, v13, 2
+ vspltb v16, v13, 3
+ vspltb v17, v13, 4
+ vspltb v18, v13, 5
+ vspltb v13, v13, 0
+
+ vinterp_no_store_8x8 v0, v1, v2, v3, v4, v5
+ vinterp_no_store_8x8 v1, v2, v3, v4, v5, v6
+ vinterp_no_store_8x8 v2, v3, v4, v5, v6, v7
+ vinterp_no_store_8x8 v3, v4, v5, v6, v7, v8
+ vinterp_no_store_8x8 v4, v5, v6, v7, v8, v9
+ vinterp_no_store_8x8 v5, v6, v7, v8, v9, v10
+ vinterp_no_store_8x8 v6, v7, v8, v9, v10, v11
+ vinterp_no_store_8x8 v7, v8, v9, v10, v11, v12
+
+ cmpi cr0, r8, 8
+ beq cr0, store_aligned_8x8
+
+ w_8x8 v0, r7, r0, r8
+ w_8x8 v1, r7, r0, r8
+ w_8x8 v2, r7, r0, r8
+ w_8x8 v3, r7, r0, r8
+ w_8x8 v4, r7, r0, r8
+ w_8x8 v5, r7, r0, r8
+ w_8x8 v6, r7, r0, r8
+ w_8x8 v7, r7, r0, r8
+
+ b exit_8x8
+
+store_aligned_8x8:
+
+ load_c v10, b_hilo, 0, r9, r10
+
+ vperm v0, v0, v1, v10
+ vperm v2, v2, v3, v10
+ vperm v4, v4, v5, v10
+ vperm v6, v6, v7, v10
+
+ stvx v0, 0, r7
+ addi r7, r7, 16
+ stvx v2, 0, r7
+ addi r7, r7, 16
+ stvx v4, 0, r7
+ addi r7, r7, 16
+ stvx v6, 0, r7
+
+ b exit_8x8
+
+store_8x8:
+ cmpi cr0, r8, 8
+ beq cr0, store_aligned2_8x8
+
+ w_8x8 v2, r7, r0, r8
+ w_8x8 v3, r7, r0, r8
+ w_8x8 v4, r7, r0, r8
+ w_8x8 v5, r7, r0, r8
+ w_8x8 v6, r7, r0, r8
+ w_8x8 v7, r7, r0, r8
+ w_8x8 v8, r7, r0, r8
+ w_8x8 v9, r7, r0, r8
+
+ b exit_8x8
+
+store_aligned2_8x8:
+ load_c v10, b_hilo, 0, r9, r10
+
+ vperm v2, v2, v3, v10
+ vperm v4, v4, v5, v10
+ vperm v6, v6, v7, v10
+ vperm v8, v8, v9, v10
+
+ stvx v2, 0, r7
+ addi r7, r7, 16
+ stvx v4, 0, r7
+ addi r7, r7, 16
+ stvx v6, 0, r7
+ addi r7, r7, 16
+ stvx v8, 0, r7
+
+exit_8x8:
+
+ addi r1, r1, 32 ;# recover stack
+
+ mtspr 256, r11 ;# reset old VRSAVE
+
+ blr
+
+ .align 2
+;# r3 unsigned char * src
+;# r4 int src_pitch
+;# r5 int x_offset
+;# r6 int y_offset
+;# r7 unsigned char * dst
+;# r8 int dst_pitch
+
+;# Two pass filtering. First pass is Horizontal edges, second pass is vertical
+;# edges. One of the filters can be null, but both won't be. Needs to use a
+;# temporary buffer because the source buffer can't be modified and the buffer
+;# for the destination is not large enough to hold the temporary data.
+sixtap_predict16x16_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xffff
+ ori r12, r12, 0xf000
+ mtspr 256, r12 ;# set VRSAVE
+
+ stwu r1,-416(r1) ;# create space on the stack
+
+ ;# Three possiblities
+ ;# 1. First filter is null. Don't use a temp buffer.
+ ;# 2. Second filter is null. Don't use a temp buffer.
+ ;# 3. Neither are null, use temp buffer.
+
+ ;# First Pass (horizontal edge)
+ ;# setup pointers for src
+ ;# if possiblity (1) then setup the src pointer to be the orginal and jump
+ ;# to second pass. this is based on if x_offset is 0.
+
+ ;# load up horizontal filter
+ slwi. r5, r5, 5 ;# index into horizontal filter array
+
+ load_hfilter v4, v5
+
+ beq- copy_horizontal_16x21
+
+ ;# Back off input buffer by 2 bytes. Need 2 before and 3 after
+ addi r3, r3, -2
+
+ slwi. r6, r6, 4 ;# index into vertical filter array
+
+ ;# setup constants
+ ;# v14 permutation value for alignment
+ load_c v14, b_hperm, 0, r9, r10
+
+ ;# These statements are guessing that there won't be a second pass,
+ ;# but if there is then inside the bypass they need to be set
+ li r0, 16 ;# prepare for no vertical filter
+
+ ;# Change the output pointer and pitch to be the actual
+ ;# desination instead of a temporary buffer.
+ addi r9, r7, 0
+ addi r5, r8, 0
+
+ ;# no vertical filter, so write the output from the first pass
+ ;# directly into the output buffer.
+ beq- no_vertical_filter_bypass
+
+ ;# if the second filter is not null then need to back off by 2*pitch
+ sub r3, r3, r4
+ sub r3, r3, r4
+
+ ;# setup counter for the number of lines that are going to be filtered
+ li r0, 21
+
+ ;# use the stack as temporary storage
+ la r9, 48(r1)
+ li r5, 16
+
+no_vertical_filter_bypass:
+
+ mtctr r0
+
+ ;# rounding added in on the multiply
+ vspltisw v10, 8
+ vspltisw v12, 3
+ vslw v12, v10, v12 ;# 0x00000040000000400000004000000040
+
+ ;# downshift by 7 ( divide by 128 ) at the end
+ vspltish v13, 7
+
+ ;# index to the next set of vectors in the row.
+ li r10, 16
+ li r12, 32
+
+horizontal_loop_16x16:
+
+ lvsl v15, 0, r3 ;# permutate value for alignment
+
+ ;# input to filter is 21 bytes wide, output is 16 bytes.
+ ;# input will can span three vectors if not aligned correctly.
+ lvx v1, 0, r3
+ lvx v2, r10, r3
+ lvx v3, r12, r3
+
+ vperm v8, v1, v2, v15
+ vperm v9, v2, v3, v15 ;# v8 v9 = 21 input pixels left-justified
+
+ vsldoi v11, v8, v9, 4
+
+ ;# set 0
+ vmsummbm v6, v4, v8, v12 ;# taps times elements
+ vmsummbm v0, v5, v11, v6
+
+ ;# set 1
+ vsldoi v10, v8, v9, 1
+ vsldoi v11, v8, v9, 5
+
+ vmsummbm v6, v4, v10, v12
+ vmsummbm v1, v5, v11, v6
+
+ ;# set 2
+ vsldoi v10, v8, v9, 2
+ vsldoi v11, v8, v9, 6
+
+ vmsummbm v6, v4, v10, v12
+ vmsummbm v2, v5, v11, v6
+
+ ;# set 3
+ vsldoi v10, v8, v9, 3
+ vsldoi v11, v8, v9, 7
+
+ vmsummbm v6, v4, v10, v12
+ vmsummbm v3, v5, v11, v6
+
+ vpkswus v0, v0, v1 ;# v0 = 0 4 8 C 1 5 9 D (16-bit)
+ vpkswus v1, v2, v3 ;# v1 = 2 6 A E 3 7 B F
+
+ vsrh v0, v0, v13 ;# divide v0, v1 by 128
+ vsrh v1, v1, v13
+
+ vpkuhus v0, v0, v1 ;# v0 = scrambled 8-bit result
+ vperm v0, v0, v0, v14 ;# v0 = correctly-ordered result
+
+ stvx v0, 0, r9
+ add r9, r9, r5
+
+ add r3, r3, r4
+
+ bdnz horizontal_loop_16x16
+
+ ;# check again to see if vertical filter needs to be done.
+ cmpi cr0, r6, 0
+ beq cr0, end_16x16
+
+ ;# yes there is, so go to the second pass
+ b second_pass_16x16
+
+copy_horizontal_16x21:
+ li r10, 21
+ mtctr r10
+
+ li r10, 16
+
+ sub r3, r3, r4
+ sub r3, r3, r4
+
+ ;# this is done above if there is a horizontal filter,
+ ;# if not it needs to be done down here.
+ slwi r6, r6, 4 ;# index into vertical filter array
+
+ ;# always write to the stack when doing a horizontal copy
+ la r9, 48(r1)
+
+copy_horizontal_loop_16x21:
+ lvsl v15, 0, r3 ;# permutate value for alignment
+
+ lvx v1, 0, r3
+ lvx v2, r10, r3
+
+ vperm v8, v1, v2, v15
+
+ stvx v8, 0, r9
+ addi r9, r9, 16
+
+ add r3, r3, r4
+
+ bdnz copy_horizontal_loop_16x21
+
+second_pass_16x16:
+
+ ;# always read from the stack when doing a vertical filter
+ la r9, 48(r1)
+
+ ;# downshift by 7 ( divide by 128 ) at the end
+ vspltish v7, 7
+
+ vpre_load
+
+ luma_vsix
+ luma_vsix
+ luma_vfour
+
+end_16x16:
+
+ addi r1, r1, 416 ;# recover stack
+
+ mtspr 256, r11 ;# reset old VRSAVE
+
+ blr
+
+ .data
+
+ .align 4
+HFilter:
+ .byte 0, 0,128, 0, 0, 0,128, 0, 0, 0,128, 0, 0, 0,128, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, -6,123, 12, 0, -6,123, 12, 0, -6,123, 12, 0, -6,123, 12
+ .byte -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0
+ .byte 2,-11,108, 36, 2,-11,108, 36, 2,-11,108, 36, 2,-11,108, 36
+ .byte -8, 1, 0, 0, -8, 1, 0, 0, -8, 1, 0, 0, -8, 1, 0, 0
+ .byte 0, -9, 93, 50, 0, -9, 93, 50, 0, -9, 93, 50, 0, -9, 93, 50
+ .byte -6, 0, 0, 0, -6, 0, 0, 0, -6, 0, 0, 0, -6, 0, 0, 0
+ .byte 3,-16, 77, 77, 3,-16, 77, 77, 3,-16, 77, 77, 3,-16, 77, 77
+ .byte -16, 3, 0, 0,-16, 3, 0, 0,-16, 3, 0, 0,-16, 3, 0, 0
+ .byte 0, -6, 50, 93, 0, -6, 50, 93, 0, -6, 50, 93, 0, -6, 50, 93
+ .byte -9, 0, 0, 0, -9, 0, 0, 0, -9, 0, 0, 0, -9, 0, 0, 0
+ .byte 1, -8, 36,108, 1, -8, 36,108, 1, -8, 36,108, 1, -8, 36,108
+ .byte -11, 2, 0, 0,-11, 2, 0, 0,-11, 2, 0, 0,-11, 2, 0, 0
+ .byte 0, -1, 12,123, 0, -1, 12,123, 0, -1, 12,123, 0, -1, 12,123
+ .byte -6, 0, 0, 0, -6, 0, 0, 0, -6, 0, 0, 0, -6, 0, 0, 0
+
+ .align 4
+VFilter:
+ .byte 0, 0,128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 6,123, 12, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 2, 11,108, 36, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 9, 93, 50, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 3, 16, 77, 77, 16, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 6, 50, 93, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 1, 8, 36,108, 11, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 1, 12,123, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+
+ .align 4
+b_hperm:
+ .byte 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15
+
+ .align 4
+B_0123:
+ .byte 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6
+
+ .align 4
+B_4567:
+ .byte 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10
+
+ .align 4
+B_89AB:
+ .byte 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14
+
+ .align 4
+b_hilo:
+ .byte 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23
+
+ .align 4
+b_hilo_4x4:
+ .byte 0, 1, 2, 3, 16, 17, 18, 19, 0, 0, 0, 0, 0, 0, 0, 0
diff --git a/vp9/common/ppc/vp9_filter_bilinear_altivec.asm b/vp9/common/ppc/vp9_filter_bilinear_altivec.asm
new file mode 100644
index 0000000..fd8aa66
--- /dev/null
+++ b/vp9/common/ppc/vp9_filter_bilinear_altivec.asm
@@ -0,0 +1,677 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ .globl bilinear_predict4x4_ppc
+ .globl bilinear_predict8x4_ppc
+ .globl bilinear_predict8x8_ppc
+ .globl bilinear_predict16x16_ppc
+
+.macro load_c V, LABEL, OFF, R0, R1
+ lis \R0, \LABEL@ha
+ la \R1, \LABEL@l(\R0)
+ lvx \V, \OFF, \R1
+.endm
+
+.macro load_vfilter V0, V1
+ load_c \V0, vfilter_b, r6, r9, r10
+
+ addi r6, r6, 16
+ lvx \V1, r6, r10
+.endm
+
+.macro HProlog jump_label
+ ;# load up horizontal filter
+ slwi. r5, r5, 4 ;# index into horizontal filter array
+
+ ;# index to the next set of vectors in the row.
+ li r10, 16
+ li r12, 32
+
+ ;# downshift by 7 ( divide by 128 ) at the end
+ vspltish v19, 7
+
+ ;# If there isn't any filtering to be done for the horizontal, then
+ ;# just skip to the second pass.
+ beq \jump_label
+
+ load_c v20, hfilter_b, r5, r9, r0
+
+ ;# setup constants
+ ;# v14 permutation value for alignment
+ load_c v28, b_hperm_b, 0, r9, r0
+
+ ;# rounding added in on the multiply
+ vspltisw v21, 8
+ vspltisw v18, 3
+ vslw v18, v21, v18 ;# 0x00000040000000400000004000000040
+
+ slwi. r6, r6, 5 ;# index into vertical filter array
+.endm
+
+;# Filters a horizontal line
+;# expects:
+;# r3 src_ptr
+;# r4 pitch
+;# r10 16
+;# r12 32
+;# v17 perm intput
+;# v18 rounding
+;# v19 shift
+;# v20 filter taps
+;# v21 tmp
+;# v22 tmp
+;# v23 tmp
+;# v24 tmp
+;# v25 tmp
+;# v26 tmp
+;# v27 tmp
+;# v28 perm output
+;#
+.macro HFilter V
+ vperm v24, v21, v21, v10 ;# v20 = 0123 1234 2345 3456
+ vperm v25, v21, v21, v11 ;# v21 = 4567 5678 6789 789A
+
+ vmsummbm v24, v20, v24, v18
+ vmsummbm v25, v20, v25, v18
+
+ vpkswus v24, v24, v25 ;# v24 = 0 4 8 C 1 5 9 D (16-bit)
+
+ vsrh v24, v24, v19 ;# divide v0, v1 by 128
+
+ vpkuhus \V, v24, v24 ;# \V = scrambled 8-bit result
+.endm
+
+.macro hfilter_8 V, increment_counter
+ lvsl v17, 0, r3 ;# permutate value for alignment
+
+ ;# input to filter is 9 bytes wide, output is 8 bytes.
+ lvx v21, 0, r3
+ lvx v22, r10, r3
+
+.if \increment_counter
+ add r3, r3, r4
+.endif
+ vperm v21, v21, v22, v17
+
+ HFilter \V
+.endm
+
+
+.macro load_and_align_8 V, increment_counter
+ lvsl v17, 0, r3 ;# permutate value for alignment
+
+ ;# input to filter is 21 bytes wide, output is 16 bytes.
+ ;# input will can span three vectors if not aligned correctly.
+ lvx v21, 0, r3
+ lvx v22, r10, r3
+
+.if \increment_counter
+ add r3, r3, r4
+.endif
+
+ vperm \V, v21, v22, v17
+.endm
+
+.macro write_aligned_8 V, increment_counter
+ stvx \V, 0, r7
+
+.if \increment_counter
+ add r7, r7, r8
+.endif
+.endm
+
+.macro vfilter_16 P0 P1
+ vmuleub v22, \P0, v20 ;# 64 + 4 positive taps
+ vadduhm v22, v18, v22
+ vmuloub v23, \P0, v20
+ vadduhm v23, v18, v23
+
+ vmuleub v24, \P1, v21
+ vadduhm v22, v22, v24 ;# Re = evens, saturation unnecessary
+ vmuloub v25, \P1, v21
+ vadduhm v23, v23, v25 ;# Ro = odds
+
+ vsrh v22, v22, v19 ;# divide by 128
+ vsrh v23, v23, v19 ;# v16 v17 = evens, odds
+ vmrghh \P0, v22, v23 ;# v18 v19 = 16-bit result in order
+ vmrglh v23, v22, v23
+ vpkuhus \P0, \P0, v23 ;# P0 = 8-bit result
+.endm
+
+
+.macro w_8x8 V, D, R, P
+ stvx \V, 0, r1
+ lwz \R, 0(r1)
+ stw \R, 0(r7)
+ lwz \R, 4(r1)
+ stw \R, 4(r7)
+ add \D, \D, \P
+.endm
+
+
+ .align 2
+;# r3 unsigned char * src
+;# r4 int src_pitch
+;# r5 int x_offset
+;# r6 int y_offset
+;# r7 unsigned char * dst
+;# r8 int dst_pitch
+bilinear_predict4x4_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xf830
+ ori r12, r12, 0xfff8
+ mtspr 256, r12 ;# set VRSAVE
+
+ stwu r1,-32(r1) ;# create space on the stack
+
+ HProlog second_pass_4x4_pre_copy_b
+
+ ;# Load up permutation constants
+ load_c v10, b_0123_b, 0, r9, r12
+ load_c v11, b_4567_b, 0, r9, r12
+
+ hfilter_8 v0, 1
+ hfilter_8 v1, 1
+ hfilter_8 v2, 1
+ hfilter_8 v3, 1
+
+ ;# Finished filtering main horizontal block. If there is no
+ ;# vertical filtering, jump to storing the data. Otherwise
+ ;# load up and filter the additional line that is needed
+ ;# for the vertical filter.
+ beq store_out_4x4_b
+
+ hfilter_8 v4, 0
+
+ b second_pass_4x4_b
+
+second_pass_4x4_pre_copy_b:
+ slwi r6, r6, 5 ;# index into vertical filter array
+
+ load_and_align_8 v0, 1
+ load_and_align_8 v1, 1
+ load_and_align_8 v2, 1
+ load_and_align_8 v3, 1
+ load_and_align_8 v4, 1
+
+second_pass_4x4_b:
+ vspltish v20, 8
+ vspltish v18, 3
+ vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
+
+ load_vfilter v20, v21
+
+ vfilter_16 v0, v1
+ vfilter_16 v1, v2
+ vfilter_16 v2, v3
+ vfilter_16 v3, v4
+
+store_out_4x4_b:
+
+ stvx v0, 0, r1
+ lwz r0, 0(r1)
+ stw r0, 0(r7)
+ add r7, r7, r8
+
+ stvx v1, 0, r1
+ lwz r0, 0(r1)
+ stw r0, 0(r7)
+ add r7, r7, r8
+
+ stvx v2, 0, r1
+ lwz r0, 0(r1)
+ stw r0, 0(r7)
+ add r7, r7, r8
+
+ stvx v3, 0, r1
+ lwz r0, 0(r1)
+ stw r0, 0(r7)
+
+exit_4x4:
+
+ addi r1, r1, 32 ;# recover stack
+ mtspr 256, r11 ;# reset old VRSAVE
+
+ blr
+
+ .align 2
+;# r3 unsigned char * src
+;# r4 int src_pitch
+;# r5 int x_offset
+;# r6 int y_offset
+;# r7 unsigned char * dst
+;# r8 int dst_pitch
+bilinear_predict8x4_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xf830
+ ori r12, r12, 0xfff8
+ mtspr 256, r12 ;# set VRSAVE
+
+ stwu r1,-32(r1) ;# create space on the stack
+
+ HProlog second_pass_8x4_pre_copy_b
+
+ ;# Load up permutation constants
+ load_c v10, b_0123_b, 0, r9, r12
+ load_c v11, b_4567_b, 0, r9, r12
+
+ hfilter_8 v0, 1
+ hfilter_8 v1, 1
+ hfilter_8 v2, 1
+ hfilter_8 v3, 1
+
+ ;# Finished filtering main horizontal block. If there is no
+ ;# vertical filtering, jump to storing the data. Otherwise
+ ;# load up and filter the additional line that is needed
+ ;# for the vertical filter.
+ beq store_out_8x4_b
+
+ hfilter_8 v4, 0
+
+ b second_pass_8x4_b
+
+second_pass_8x4_pre_copy_b:
+ slwi r6, r6, 5 ;# index into vertical filter array
+
+ load_and_align_8 v0, 1
+ load_and_align_8 v1, 1
+ load_and_align_8 v2, 1
+ load_and_align_8 v3, 1
+ load_and_align_8 v4, 1
+
+second_pass_8x4_b:
+ vspltish v20, 8
+ vspltish v18, 3
+ vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
+
+ load_vfilter v20, v21
+
+ vfilter_16 v0, v1
+ vfilter_16 v1, v2
+ vfilter_16 v2, v3
+ vfilter_16 v3, v4
+
+store_out_8x4_b:
+
+ cmpi cr0, r8, 8
+ beq cr0, store_aligned_8x4_b
+
+ w_8x8 v0, r7, r0, r8
+ w_8x8 v1, r7, r0, r8
+ w_8x8 v2, r7, r0, r8
+ w_8x8 v3, r7, r0, r8
+
+ b exit_8x4
+
+store_aligned_8x4_b:
+ load_c v10, b_hilo_b, 0, r9, r10
+
+ vperm v0, v0, v1, v10
+ vperm v2, v2, v3, v10
+
+ stvx v0, 0, r7
+ addi r7, r7, 16
+ stvx v2, 0, r7
+
+exit_8x4:
+
+ addi r1, r1, 32 ;# recover stack
+ mtspr 256, r11 ;# reset old VRSAVE
+
+ blr
+
+ .align 2
+;# r3 unsigned char * src
+;# r4 int src_pitch
+;# r5 int x_offset
+;# r6 int y_offset
+;# r7 unsigned char * dst
+;# r8 int dst_pitch
+bilinear_predict8x8_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xfff0
+ ori r12, r12, 0xffff
+ mtspr 256, r12 ;# set VRSAVE
+
+ stwu r1,-32(r1) ;# create space on the stack
+
+ HProlog second_pass_8x8_pre_copy_b
+
+ ;# Load up permutation constants
+ load_c v10, b_0123_b, 0, r9, r12
+ load_c v11, b_4567_b, 0, r9, r12
+
+ hfilter_8 v0, 1
+ hfilter_8 v1, 1
+ hfilter_8 v2, 1
+ hfilter_8 v3, 1
+ hfilter_8 v4, 1
+ hfilter_8 v5, 1
+ hfilter_8 v6, 1
+ hfilter_8 v7, 1
+
+ ;# Finished filtering main horizontal block. If there is no
+ ;# vertical filtering, jump to storing the data. Otherwise
+ ;# load up and filter the additional line that is needed
+ ;# for the vertical filter.
+ beq store_out_8x8_b
+
+ hfilter_8 v8, 0
+
+ b second_pass_8x8_b
+
+second_pass_8x8_pre_copy_b:
+ slwi r6, r6, 5 ;# index into vertical filter array
+
+ load_and_align_8 v0, 1
+ load_and_align_8 v1, 1
+ load_and_align_8 v2, 1
+ load_and_align_8 v3, 1
+ load_and_align_8 v4, 1
+ load_and_align_8 v5, 1
+ load_and_align_8 v6, 1
+ load_and_align_8 v7, 1
+ load_and_align_8 v8, 0
+
+second_pass_8x8_b:
+ vspltish v20, 8
+ vspltish v18, 3
+ vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
+
+ load_vfilter v20, v21
+
+ vfilter_16 v0, v1
+ vfilter_16 v1, v2
+ vfilter_16 v2, v3
+ vfilter_16 v3, v4
+ vfilter_16 v4, v5
+ vfilter_16 v5, v6
+ vfilter_16 v6, v7
+ vfilter_16 v7, v8
+
+store_out_8x8_b:
+
+ cmpi cr0, r8, 8
+ beq cr0, store_aligned_8x8_b
+
+ w_8x8 v0, r7, r0, r8
+ w_8x8 v1, r7, r0, r8
+ w_8x8 v2, r7, r0, r8
+ w_8x8 v3, r7, r0, r8
+ w_8x8 v4, r7, r0, r8
+ w_8x8 v5, r7, r0, r8
+ w_8x8 v6, r7, r0, r8
+ w_8x8 v7, r7, r0, r8
+
+ b exit_8x8
+
+store_aligned_8x8_b:
+ load_c v10, b_hilo_b, 0, r9, r10
+
+ vperm v0, v0, v1, v10
+ vperm v2, v2, v3, v10
+ vperm v4, v4, v5, v10
+ vperm v6, v6, v7, v10
+
+ stvx v0, 0, r7
+ addi r7, r7, 16
+ stvx v2, 0, r7
+ addi r7, r7, 16
+ stvx v4, 0, r7
+ addi r7, r7, 16
+ stvx v6, 0, r7
+
+exit_8x8:
+
+ addi r1, r1, 32 ;# recover stack
+ mtspr 256, r11 ;# reset old VRSAVE
+
+ blr
+
+;# Filters a horizontal line
+;# expects:
+;# r3 src_ptr
+;# r4 pitch
+;# r10 16
+;# r12 32
+;# v17 perm intput
+;# v18 rounding
+;# v19 shift
+;# v20 filter taps
+;# v21 tmp
+;# v22 tmp
+;# v23 tmp
+;# v24 tmp
+;# v25 tmp
+;# v26 tmp
+;# v27 tmp
+;# v28 perm output
+;#
+.macro hfilter_16 V, increment_counter
+
+ lvsl v17, 0, r3 ;# permutate value for alignment
+
+ ;# input to filter is 21 bytes wide, output is 16 bytes.
+ ;# input will can span three vectors if not aligned correctly.
+ lvx v21, 0, r3
+ lvx v22, r10, r3
+ lvx v23, r12, r3
+
+.if \increment_counter
+ add r3, r3, r4
+.endif
+ vperm v21, v21, v22, v17
+ vperm v22, v22, v23, v17 ;# v8 v9 = 21 input pixels left-justified
+
+ ;# set 0
+ vmsummbm v24, v20, v21, v18 ;# taps times elements
+
+ ;# set 1
+ vsldoi v23, v21, v22, 1
+ vmsummbm v25, v20, v23, v18
+
+ ;# set 2
+ vsldoi v23, v21, v22, 2
+ vmsummbm v26, v20, v23, v18
+
+ ;# set 3
+ vsldoi v23, v21, v22, 3
+ vmsummbm v27, v20, v23, v18
+
+ vpkswus v24, v24, v25 ;# v24 = 0 4 8 C 1 5 9 D (16-bit)
+ vpkswus v25, v26, v27 ;# v25 = 2 6 A E 3 7 B F
+
+ vsrh v24, v24, v19 ;# divide v0, v1 by 128
+ vsrh v25, v25, v19
+
+ vpkuhus \V, v24, v25 ;# \V = scrambled 8-bit result
+ vperm \V, \V, v0, v28 ;# \V = correctly-ordered result
+.endm
+
+.macro load_and_align_16 V, increment_counter
+ lvsl v17, 0, r3 ;# permutate value for alignment
+
+ ;# input to filter is 21 bytes wide, output is 16 bytes.
+ ;# input will can span three vectors if not aligned correctly.
+ lvx v21, 0, r3
+ lvx v22, r10, r3
+
+.if \increment_counter
+ add r3, r3, r4
+.endif
+
+ vperm \V, v21, v22, v17
+.endm
+
+.macro write_16 V, increment_counter
+ stvx \V, 0, r7
+
+.if \increment_counter
+ add r7, r7, r8
+.endif
+.endm
+
+ .align 2
+;# r3 unsigned char * src
+;# r4 int src_pitch
+;# r5 int x_offset
+;# r6 int y_offset
+;# r7 unsigned char * dst
+;# r8 int dst_pitch
+bilinear_predict16x16_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xffff
+ ori r12, r12, 0xfff8
+ mtspr 256, r12 ;# set VRSAVE
+
+ HProlog second_pass_16x16_pre_copy_b
+
+ hfilter_16 v0, 1
+ hfilter_16 v1, 1
+ hfilter_16 v2, 1
+ hfilter_16 v3, 1
+ hfilter_16 v4, 1
+ hfilter_16 v5, 1
+ hfilter_16 v6, 1
+ hfilter_16 v7, 1
+ hfilter_16 v8, 1
+ hfilter_16 v9, 1
+ hfilter_16 v10, 1
+ hfilter_16 v11, 1
+ hfilter_16 v12, 1
+ hfilter_16 v13, 1
+ hfilter_16 v14, 1
+ hfilter_16 v15, 1
+
+ ;# Finished filtering main horizontal block. If there is no
+ ;# vertical filtering, jump to storing the data. Otherwise
+ ;# load up and filter the additional line that is needed
+ ;# for the vertical filter.
+ beq store_out_16x16_b
+
+ hfilter_16 v16, 0
+
+ b second_pass_16x16_b
+
+second_pass_16x16_pre_copy_b:
+ slwi r6, r6, 5 ;# index into vertical filter array
+
+ load_and_align_16 v0, 1
+ load_and_align_16 v1, 1
+ load_and_align_16 v2, 1
+ load_and_align_16 v3, 1
+ load_and_align_16 v4, 1
+ load_and_align_16 v5, 1
+ load_and_align_16 v6, 1
+ load_and_align_16 v7, 1
+ load_and_align_16 v8, 1
+ load_and_align_16 v9, 1
+ load_and_align_16 v10, 1
+ load_and_align_16 v11, 1
+ load_and_align_16 v12, 1
+ load_and_align_16 v13, 1
+ load_and_align_16 v14, 1
+ load_and_align_16 v15, 1
+ load_and_align_16 v16, 0
+
+second_pass_16x16_b:
+ vspltish v20, 8
+ vspltish v18, 3
+ vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
+
+ load_vfilter v20, v21
+
+ vfilter_16 v0, v1
+ vfilter_16 v1, v2
+ vfilter_16 v2, v3
+ vfilter_16 v3, v4
+ vfilter_16 v4, v5
+ vfilter_16 v5, v6
+ vfilter_16 v6, v7
+ vfilter_16 v7, v8
+ vfilter_16 v8, v9
+ vfilter_16 v9, v10
+ vfilter_16 v10, v11
+ vfilter_16 v11, v12
+ vfilter_16 v12, v13
+ vfilter_16 v13, v14
+ vfilter_16 v14, v15
+ vfilter_16 v15, v16
+
+store_out_16x16_b:
+
+ write_16 v0, 1
+ write_16 v1, 1
+ write_16 v2, 1
+ write_16 v3, 1
+ write_16 v4, 1
+ write_16 v5, 1
+ write_16 v6, 1
+ write_16 v7, 1
+ write_16 v8, 1
+ write_16 v9, 1
+ write_16 v10, 1
+ write_16 v11, 1
+ write_16 v12, 1
+ write_16 v13, 1
+ write_16 v14, 1
+ write_16 v15, 0
+
+ mtspr 256, r11 ;# reset old VRSAVE
+
+ blr
+
+ .data
+
+ .align 4
+hfilter_b:
+ .byte 128, 0, 0, 0,128, 0, 0, 0,128, 0, 0, 0,128, 0, 0, 0
+ .byte 112, 16, 0, 0,112, 16, 0, 0,112, 16, 0, 0,112, 16, 0, 0
+ .byte 96, 32, 0, 0, 96, 32, 0, 0, 96, 32, 0, 0, 96, 32, 0, 0
+ .byte 80, 48, 0, 0, 80, 48, 0, 0, 80, 48, 0, 0, 80, 48, 0, 0
+ .byte 64, 64, 0, 0, 64, 64, 0, 0, 64, 64, 0, 0, 64, 64, 0, 0
+ .byte 48, 80, 0, 0, 48, 80, 0, 0, 48, 80, 0, 0, 48, 80, 0, 0
+ .byte 32, 96, 0, 0, 32, 96, 0, 0, 32, 96, 0, 0, 32, 96, 0, 0
+ .byte 16,112, 0, 0, 16,112, 0, 0, 16,112, 0, 0, 16,112, 0, 0
+
+ .align 4
+vfilter_b:
+ .byte 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112
+ .byte 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
+ .byte 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96
+ .byte 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
+ .byte 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80
+ .byte 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48
+ .byte 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
+ .byte 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
+ .byte 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48
+ .byte 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80
+ .byte 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
+ .byte 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96
+ .byte 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
+ .byte 112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112
+
+ .align 4
+b_hperm_b:
+ .byte 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15
+
+ .align 4
+b_0123_b:
+ .byte 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6
+
+ .align 4
+b_4567_b:
+ .byte 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10
+
+b_hilo_b:
+ .byte 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23
diff --git a/vp9/common/ppc/vp9_idctllm_altivec.asm b/vp9/common/ppc/vp9_idctllm_altivec.asm
new file mode 100644
index 0000000..117d9cf
--- /dev/null
+++ b/vp9/common/ppc/vp9_idctllm_altivec.asm
@@ -0,0 +1,189 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ .globl short_idct4x4llm_ppc
+
+.macro load_c V, LABEL, OFF, R0, R1
+ lis \R0, \LABEL@ha
+ la \R1, \LABEL@l(\R0)
+ lvx \V, \OFF, \R1
+.endm
+
+;# r3 short *input
+;# r4 short *output
+;# r5 int pitch
+ .align 2
+short_idct4x4llm_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xfff8
+ mtspr 256, r12 ;# set VRSAVE
+
+ load_c v8, sinpi8sqrt2, 0, r9, r10
+ load_c v9, cospi8sqrt2minus1, 0, r9, r10
+ load_c v10, hi_hi, 0, r9, r10
+ load_c v11, lo_lo, 0, r9, r10
+ load_c v12, shift_16, 0, r9, r10
+
+ li r10, 16
+ lvx v0, 0, r3 ;# input ip[0], ip[ 4]
+ lvx v1, r10, r3 ;# input ip[8], ip[12]
+
+ ;# first pass
+ vupkhsh v2, v0
+ vupkhsh v3, v1
+ vaddsws v6, v2, v3 ;# a1 = ip[0]+ip[8]
+ vsubsws v7, v2, v3 ;# b1 = ip[0]-ip[8]
+
+ vupklsh v0, v0
+ vmulosh v4, v0, v8
+ vsraw v4, v4, v12
+ vaddsws v4, v4, v0 ;# ip[ 4] * sin(pi/8) * sqrt(2)
+
+ vupklsh v1, v1
+ vmulosh v5, v1, v9
+ vsraw v5, v5, v12 ;# ip[12] * cos(pi/8) * sqrt(2)
+ vaddsws v5, v5, v1
+
+ vsubsws v4, v4, v5 ;# c1
+
+ vmulosh v3, v1, v8
+ vsraw v3, v3, v12
+ vaddsws v3, v3, v1 ;# ip[12] * sin(pi/8) * sqrt(2)
+
+ vmulosh v5, v0, v9
+ vsraw v5, v5, v12 ;# ip[ 4] * cos(pi/8) * sqrt(2)
+ vaddsws v5, v5, v0
+
+ vaddsws v3, v3, v5 ;# d1
+
+ vaddsws v0, v6, v3 ;# a1 + d1
+ vsubsws v3, v6, v3 ;# a1 - d1
+
+ vaddsws v1, v7, v4 ;# b1 + c1
+ vsubsws v2, v7, v4 ;# b1 - c1
+
+ ;# transpose input
+ vmrghw v4, v0, v1 ;# a0 b0 a1 b1
+ vmrghw v5, v2, v3 ;# c0 d0 c1 d1
+
+ vmrglw v6, v0, v1 ;# a2 b2 a3 b3
+ vmrglw v7, v2, v3 ;# c2 d2 c3 d3
+
+ vperm v0, v4, v5, v10 ;# a0 b0 c0 d0
+ vperm v1, v4, v5, v11 ;# a1 b1 c1 d1
+
+ vperm v2, v6, v7, v10 ;# a2 b2 c2 d2
+ vperm v3, v6, v7, v11 ;# a3 b3 c3 d3
+
+ ;# second pass
+ vaddsws v6, v0, v2 ;# a1 = ip[0]+ip[8]
+ vsubsws v7, v0, v2 ;# b1 = ip[0]-ip[8]
+
+ vmulosh v4, v1, v8
+ vsraw v4, v4, v12
+ vaddsws v4, v4, v1 ;# ip[ 4] * sin(pi/8) * sqrt(2)
+
+ vmulosh v5, v3, v9
+ vsraw v5, v5, v12 ;# ip[12] * cos(pi/8) * sqrt(2)
+ vaddsws v5, v5, v3
+
+ vsubsws v4, v4, v5 ;# c1
+
+ vmulosh v2, v3, v8
+ vsraw v2, v2, v12
+ vaddsws v2, v2, v3 ;# ip[12] * sin(pi/8) * sqrt(2)
+
+ vmulosh v5, v1, v9
+ vsraw v5, v5, v12 ;# ip[ 4] * cos(pi/8) * sqrt(2)
+ vaddsws v5, v5, v1
+
+ vaddsws v3, v2, v5 ;# d1
+
+ vaddsws v0, v6, v3 ;# a1 + d1
+ vsubsws v3, v6, v3 ;# a1 - d1
+
+ vaddsws v1, v7, v4 ;# b1 + c1
+ vsubsws v2, v7, v4 ;# b1 - c1
+
+ vspltish v6, 4
+ vspltish v7, 3
+
+ vpkswss v0, v0, v1
+ vpkswss v1, v2, v3
+
+ vaddshs v0, v0, v6
+ vaddshs v1, v1, v6
+
+ vsrah v0, v0, v7
+ vsrah v1, v1, v7
+
+ ;# transpose output
+ vmrghh v2, v0, v1 ;# a0 c0 a1 c1 a2 c2 a3 c3
+ vmrglh v3, v0, v1 ;# b0 d0 b1 d1 b2 d2 b3 d3
+
+ vmrghh v0, v2, v3 ;# a0 b0 c0 d0 a1 b1 c1 d1
+ vmrglh v1, v2, v3 ;# a2 b2 c2 d2 a3 b3 c3 d3
+
+ stwu r1,-416(r1) ;# create space on the stack
+
+ stvx v0, 0, r1
+ lwz r6, 0(r1)
+ stw r6, 0(r4)
+ lwz r6, 4(r1)
+ stw r6, 4(r4)
+
+ add r4, r4, r5
+
+ lwz r6, 8(r1)
+ stw r6, 0(r4)
+ lwz r6, 12(r1)
+ stw r6, 4(r4)
+
+ add r4, r4, r5
+
+ stvx v1, 0, r1
+ lwz r6, 0(r1)
+ stw r6, 0(r4)
+ lwz r6, 4(r1)
+ stw r6, 4(r4)
+
+ add r4, r4, r5
+
+ lwz r6, 8(r1)
+ stw r6, 0(r4)
+ lwz r6, 12(r1)
+ stw r6, 4(r4)
+
+ addi r1, r1, 416 ;# recover stack
+
+ mtspr 256, r11 ;# reset old VRSAVE
+
+ blr
+
+ .align 4
+sinpi8sqrt2:
+ .short 35468, 35468, 35468, 35468, 35468, 35468, 35468, 35468
+
+ .align 4
+cospi8sqrt2minus1:
+ .short 20091, 20091, 20091, 20091, 20091, 20091, 20091, 20091
+
+ .align 4
+shift_16:
+ .long 16, 16, 16, 16
+
+ .align 4
+hi_hi:
+ .byte 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23
+
+ .align 4
+lo_lo:
+ .byte 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31
diff --git a/vp9/common/ppc/vp9_loopfilter_altivec.c b/vp9/common/ppc/vp9_loopfilter_altivec.c
new file mode 100644
index 0000000..599070a
--- /dev/null
+++ b/vp9/common/ppc/vp9_loopfilter_altivec.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/common/vp9_loopfilter.h"
+#include "vp9/common/vp9_onyxc_int.h"
+
+typedef void loop_filter_function_y_ppc
+(
+ unsigned char *s, // source pointer
+ int p, // pitch
+ const signed char *flimit,
+ const signed char *limit,
+ const signed char *thresh
+);
+
+typedef void loop_filter_function_uv_ppc
+(
+ unsigned char *u, // source pointer
+ unsigned char *v, // source pointer
+ int p, // pitch
+ const signed char *flimit,
+ const signed char *limit,
+ const signed char *thresh
+);
+
+typedef void loop_filter_function_s_ppc
+(
+ unsigned char *s, // source pointer
+ int p, // pitch
+ const signed char *flimit
+);
+
+loop_filter_function_y_ppc mbloop_filter_horizontal_edge_y_ppc;
+loop_filter_function_y_ppc mbloop_filter_vertical_edge_y_ppc;
+loop_filter_function_y_ppc loop_filter_horizontal_edge_y_ppc;
+loop_filter_function_y_ppc loop_filter_vertical_edge_y_ppc;
+
+loop_filter_function_uv_ppc mbloop_filter_horizontal_edge_uv_ppc;
+loop_filter_function_uv_ppc mbloop_filter_vertical_edge_uv_ppc;
+loop_filter_function_uv_ppc loop_filter_horizontal_edge_uv_ppc;
+loop_filter_function_uv_ppc loop_filter_vertical_edge_uv_ppc;
+
+loop_filter_function_s_ppc loop_filter_simple_horizontal_edge_ppc;
+loop_filter_function_s_ppc loop_filter_simple_vertical_edge_ppc;
+
+// Horizontal MB filtering
+void loop_filter_mbh_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+ int y_stride, int uv_stride, loop_filter_info *lfi) {
+ mbloop_filter_horizontal_edge_y_ppc(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr);
+
+ if (u_ptr)
+ mbloop_filter_horizontal_edge_uv_ppc(u_ptr, v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr);
+}
+
+void loop_filter_mbhs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+ int y_stride, int uv_stride, loop_filter_info *lfi) {
+ (void)u_ptr;
+ (void)v_ptr;
+ (void)uv_stride;
+ loop_filter_simple_horizontal_edge_ppc(y_ptr, y_stride, lfi->mbflim);
+}
+
+// Vertical MB Filtering
+void loop_filter_mbv_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+ int y_stride, int uv_stride, loop_filter_info *lfi) {
+ mbloop_filter_vertical_edge_y_ppc(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr);
+
+ if (u_ptr)
+ mbloop_filter_vertical_edge_uv_ppc(u_ptr, v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr);
+}
+
+void loop_filter_mbvs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+ int y_stride, int uv_stride, loop_filter_info *lfi) {
+ (void)u_ptr;
+ (void)v_ptr;
+ (void)uv_stride;
+ loop_filter_simple_vertical_edge_ppc(y_ptr, y_stride, lfi->mbflim);
+}
+
+// Horizontal B Filtering
+void loop_filter_bh_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+ int y_stride, int uv_stride, loop_filter_info *lfi) {
+ // These should all be done at once with one call, instead of 3
+ loop_filter_horizontal_edge_y_ppc(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr);
+ loop_filter_horizontal_edge_y_ppc(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr);
+ loop_filter_horizontal_edge_y_ppc(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr);
+
+ if (u_ptr)
+ loop_filter_horizontal_edge_uv_ppc(u_ptr + 4 * uv_stride, v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr);
+}
+
+void loop_filter_bhs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+ int y_stride, int uv_stride, loop_filter_info *lfi) {
+ (void)u_ptr;
+ (void)v_ptr;
+ (void)uv_stride;
+ loop_filter_simple_horizontal_edge_ppc(y_ptr + 4 * y_stride, y_stride, lfi->flim);
+ loop_filter_simple_horizontal_edge_ppc(y_ptr + 8 * y_stride, y_stride, lfi->flim);
+ loop_filter_simple_horizontal_edge_ppc(y_ptr + 12 * y_stride, y_stride, lfi->flim);
+}
+
+// Vertical B Filtering
+void loop_filter_bv_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+ int y_stride, int uv_stride, loop_filter_info *lfi) {
+ loop_filter_vertical_edge_y_ppc(y_ptr, y_stride, lfi->flim, lfi->lim, lfi->thr);
+
+ if (u_ptr)
+ loop_filter_vertical_edge_uv_ppc(u_ptr + 4, v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr);
+}
+
+void loop_filter_bvs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+ int y_stride, int uv_stride, loop_filter_info *lfi) {
+ (void)u_ptr;
+ (void)v_ptr;
+ (void)uv_stride;
+ loop_filter_simple_vertical_edge_ppc(y_ptr + 4, y_stride, lfi->flim);
+ loop_filter_simple_vertical_edge_ppc(y_ptr + 8, y_stride, lfi->flim);
+ loop_filter_simple_vertical_edge_ppc(y_ptr + 12, y_stride, lfi->flim);
+}
diff --git a/vp9/common/ppc/vp9_loopfilter_filters_altivec.asm b/vp9/common/ppc/vp9_loopfilter_filters_altivec.asm
new file mode 100644
index 0000000..61df4e9
--- /dev/null
+++ b/vp9/common/ppc/vp9_loopfilter_filters_altivec.asm
@@ -0,0 +1,1253 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ .globl mbloop_filter_horizontal_edge_y_ppc
+ .globl loop_filter_horizontal_edge_y_ppc
+ .globl mbloop_filter_vertical_edge_y_ppc
+ .globl loop_filter_vertical_edge_y_ppc
+
+ .globl mbloop_filter_horizontal_edge_uv_ppc
+ .globl loop_filter_horizontal_edge_uv_ppc
+ .globl mbloop_filter_vertical_edge_uv_ppc
+ .globl loop_filter_vertical_edge_uv_ppc
+
+ .globl loop_filter_simple_horizontal_edge_ppc
+ .globl loop_filter_simple_vertical_edge_ppc
+
+ .text
+;# We often need to perform transposes (and other transpose-like operations)
+;# on matrices of data. This is simplified by the fact that we usually
+;# operate on hunks of data whose dimensions are powers of 2, or at least
+;# divisible by highish powers of 2.
+;#
+;# These operations can be very confusing. They become more straightforward
+;# when we think of them as permutations of address bits: Concatenate a
+;# group of vector registers and think of it as occupying a block of
+;# memory beginning at address zero. The low four bits 0...3 of the
+;# address then correspond to position within a register, the higher-order
+;# address bits select the register.
+;#
+;# Although register selection, at the code level, is arbitrary, things
+;# are simpler if we use contiguous ranges of register numbers, simpler
+;# still if the low-order bits of the register number correspond to
+;# conceptual address bits. We do this whenever reasonable.
+;#
+;# A 16x16 transpose can then be thought of as an operation on
+;# a 256-element block of memory. It takes 8 bits 0...7 to address this
+;# memory and the effect of a transpose is to interchange address bit
+;# 0 with 4, 1 with 5, 2 with 6, and 3 with 7. Bits 0...3 index the
+;# column, which is interchanged with the row addressed by bits 4..7.
+;#
+;# The altivec merge instructions provide a rapid means of effecting
+;# many of these transforms. They operate at three widths (8,16,32).
+;# Writing V(x) for vector register #x, paired merges permute address
+;# indices as follows.
+;#
+;# 0->1 1->2 2->3 3->(4+d) (4+s)->0:
+;#
+;# vmrghb V( x), V( y), V( y + (1<<s))
+;# vmrglb V( x + (1<<d)), V( y), V( y + (1<<s))
+;#
+;#
+;# =0= 1->2 2->3 3->(4+d) (4+s)->1:
+;#
+;# vmrghh V( x), V( y), V( y + (1<<s))
+;# vmrglh V( x + (1<<d)), V( y), V( y + (1<<s))
+;#
+;#
+;# =0= =1= 2->3 3->(4+d) (4+s)->2:
+;#
+;# vmrghw V( x), V( y), V( y + (1<<s))
+;# vmrglw V( x + (1<<d)), V( y), V( y + (1<<s))
+;#
+;#
+;# Unfortunately, there is no doubleword merge instruction.
+;# The following sequence uses "vperm" is a substitute.
+;# Assuming that the selection masks b_hihi and b_lolo (defined in LFppc.c)
+;# are in registers Vhihi and Vlolo, we can also effect the permutation
+;#
+;# =0= =1= =2= 3->(4+d) (4+s)->3 by the sequence:
+;#
+;# vperm V( x), V( y), V( y + (1<<s)), Vhihi
+;# vperm V( x + (1<<d)), V( y), V( y + (1<<s)), Vlolo
+;#
+;#
+;# Except for bits s and d, the other relationships between register
+;# number (= high-order part of address) bits are at the disposal of
+;# the programmer.
+;#
+
+;# To avoid excess transposes, we filter all 3 vertical luma subblock
+;# edges together. This requires a single 16x16 transpose, which, in
+;# the above language, amounts to the following permutation of address
+;# indices: 0<->4 1<->5 2<->6 3<->7, which we accomplish by
+;# 4 iterations of the cyclic transform 0->1->2->3->4->5->6->7->0.
+;#
+;# Except for the fact that the destination registers get written
+;# before we are done referencing the old contents, the cyclic transform
+;# is effected by
+;#
+;# x = 0; do {
+;# vmrghb V(2x), V(x), V(x+8);
+;# vmrghb V(2x+1), V(x), V(x+8);
+;# } while( ++x < 8);
+;#
+;# For clarity, and because we can afford it, we do this transpose
+;# using all 32 registers, alternating the banks 0..15 and 16 .. 31,
+;# leaving the final result in 16 .. 31, as the lower registers are
+;# used in the filtering itself.
+;#
+.macro Tpair A, B, X, Y
+ vmrghb \A, \X, \Y
+ vmrglb \B, \X, \Y
+.endm
+
+;# Each step takes 8*2 = 16 instructions
+
+.macro t16_even
+ Tpair v16,v17, v0,v8
+ Tpair v18,v19, v1,v9
+ Tpair v20,v21, v2,v10
+ Tpair v22,v23, v3,v11
+ Tpair v24,v25, v4,v12
+ Tpair v26,v27, v5,v13
+ Tpair v28,v29, v6,v14
+ Tpair v30,v31, v7,v15
+.endm
+
+.macro t16_odd
+ Tpair v0,v1, v16,v24
+ Tpair v2,v3, v17,v25
+ Tpair v4,v5, v18,v26
+ Tpair v6,v7, v19,v27
+ Tpair v8,v9, v20,v28
+ Tpair v10,v11, v21,v29
+ Tpair v12,v13, v22,v30
+ Tpair v14,v15, v23,v31
+.endm
+
+;# Whole transpose takes 4*16 = 64 instructions
+
+.macro t16_full
+ t16_odd
+ t16_even
+ t16_odd
+ t16_even
+.endm
+
+;# Vertical edge filtering requires transposes. For the simple filter,
+;# we need to convert 16 rows of 4 pels each into 4 registers of 16 pels
+;# each. Writing 0 ... 63 for the pixel indices, the desired result is:
+;#
+;# v0 = 0 1 ... 14 15
+;# v1 = 16 17 ... 30 31
+;# v2 = 32 33 ... 47 48
+;# v3 = 49 50 ... 62 63
+;#
+;# In frame-buffer memory, the layout is:
+;#
+;# 0 16 32 48
+;# 1 17 33 49
+;# ...
+;# 15 31 47 63.
+;#
+;# We begin by reading the data 32 bits at a time (using scalar operations)
+;# into a temporary array, reading the rows of the array into vector registers,
+;# with the following layout:
+;#
+;# v0 = 0 16 32 48 4 20 36 52 8 24 40 56 12 28 44 60
+;# v1 = 1 17 33 49 5 21 ... 45 61
+;# v2 = 2 18 ... 46 62
+;# v3 = 3 19 ... 47 63
+;#
+;# From the "address-bit" perspective discussed above, we simply need to
+;# interchange bits 0 <-> 4 and 1 <-> 5, leaving bits 2 and 3 alone.
+;# In other words, we transpose each of the four 4x4 submatrices.
+;#
+;# This transformation is its own inverse, and we need to perform it
+;# again before writing the pixels back into the frame buffer.
+;#
+;# It acts in place on registers v0...v3, uses v4...v7 as temporaries,
+;# and assumes that v14/v15 contain the b_hihi/b_lolo selectors
+;# defined above. We think of both groups of 4 registers as having
+;# "addresses" {0,1,2,3} * 16.
+;#
+.macro Transpose4times4x4 Vlo, Vhi
+
+ ;# d=s=0 0->1 1->2 2->3 3->4 4->0 =5=
+
+ vmrghb v4, v0, v1
+ vmrglb v5, v0, v1
+ vmrghb v6, v2, v3
+ vmrglb v7, v2, v3
+
+ ;# d=0 s=1 =0= 1->2 2->3 3->4 4->5 5->1
+
+ vmrghh v0, v4, v6
+ vmrglh v1, v4, v6
+ vmrghh v2, v5, v7
+ vmrglh v3, v5, v7
+
+ ;# d=s=0 =0= =1= 2->3 3->4 4->2 =5=
+
+ vmrghw v4, v0, v1
+ vmrglw v5, v0, v1
+ vmrghw v6, v2, v3
+ vmrglw v7, v2, v3
+
+ ;# d=0 s=1 =0= =1= =2= 3->4 4->5 5->3
+
+ vperm v0, v4, v6, \Vlo
+ vperm v1, v4, v6, \Vhi
+ vperm v2, v5, v7, \Vlo
+ vperm v3, v5, v7, \Vhi
+.endm
+;# end Transpose4times4x4
+
+
+;# Normal mb vertical edge filter transpose.
+;#
+;# We read 8 columns of data, initially in the following pattern:
+;#
+;# (0,0) (1,0) ... (7,0) (0,1) (1,1) ... (7,1)
+;# (0,2) (1,2) ... (7,2) (0,3) (1,3) ... (7,3)
+;# ...
+;# (0,14) (1,14) .. (7,14) (0,15) (1,15) .. (7,15)
+;#
+;# and wish to convert to:
+;#
+;# (0,0) ... (0,15)
+;# (1,0) ... (1,15)
+;# ...
+;# (7,0) ... (7,15).
+;#
+;# In "address bit" language, we wish to map
+;#
+;# 0->4 1->5 2->6 3->0 4->1 5->2 6->3, i.e., I -> (I+4) mod 7.
+;#
+;# This can be accomplished by 4 iterations of the cyclic transform
+;#
+;# I -> (I+1) mod 7;
+;#
+;# each iteration can be realized by (d=0, s=2):
+;#
+;# x = 0; do Tpair( V(2x),V(2x+1), V(x),V(x+4)) while( ++x < 4);
+;#
+;# The input/output is in registers v0...v7. We use v10...v17 as mirrors;
+;# preserving v8 = sign converter.
+;#
+;# Inverse transpose is similar, except here I -> (I+3) mod 7 and the
+;# result lands in the "mirror" registers v10...v17
+;#
+.macro t8x16_odd
+ Tpair v10, v11, v0, v4
+ Tpair v12, v13, v1, v5
+ Tpair v14, v15, v2, v6
+ Tpair v16, v17, v3, v7
+.endm
+
+.macro t8x16_even
+ Tpair v0, v1, v10, v14
+ Tpair v2, v3, v11, v15
+ Tpair v4, v5, v12, v16
+ Tpair v6, v7, v13, v17
+.endm
+
+.macro transpose8x16_fwd
+ t8x16_odd
+ t8x16_even
+ t8x16_odd
+ t8x16_even
+.endm
+
+.macro transpose8x16_inv
+ t8x16_odd
+ t8x16_even
+ t8x16_odd
+.endm
+
+.macro Transpose16x16
+ vmrghb v0, v16, v24
+ vmrglb v1, v16, v24
+ vmrghb v2, v17, v25
+ vmrglb v3, v17, v25
+ vmrghb v4, v18, v26
+ vmrglb v5, v18, v26
+ vmrghb v6, v19, v27
+ vmrglb v7, v19, v27
+ vmrghb v8, v20, v28
+ vmrglb v9, v20, v28
+ vmrghb v10, v21, v29
+ vmrglb v11, v21, v29
+ vmrghb v12, v22, v30
+ vmrglb v13, v22, v30
+ vmrghb v14, v23, v31
+ vmrglb v15, v23, v31
+ vmrghb v16, v0, v8
+ vmrglb v17, v0, v8
+ vmrghb v18, v1, v9
+ vmrglb v19, v1, v9
+ vmrghb v20, v2, v10
+ vmrglb v21, v2, v10
+ vmrghb v22, v3, v11
+ vmrglb v23, v3, v11
+ vmrghb v24, v4, v12
+ vmrglb v25, v4, v12
+ vmrghb v26, v5, v13
+ vmrglb v27, v5, v13
+ vmrghb v28, v6, v14
+ vmrglb v29, v6, v14
+ vmrghb v30, v7, v15
+ vmrglb v31, v7, v15
+ vmrghb v0, v16, v24
+ vmrglb v1, v16, v24
+ vmrghb v2, v17, v25
+ vmrglb v3, v17, v25
+ vmrghb v4, v18, v26
+ vmrglb v5, v18, v26
+ vmrghb v6, v19, v27
+ vmrglb v7, v19, v27
+ vmrghb v8, v20, v28
+ vmrglb v9, v20, v28
+ vmrghb v10, v21, v29
+ vmrglb v11, v21, v29
+ vmrghb v12, v22, v30
+ vmrglb v13, v22, v30
+ vmrghb v14, v23, v31
+ vmrglb v15, v23, v31
+ vmrghb v16, v0, v8
+ vmrglb v17, v0, v8
+ vmrghb v18, v1, v9
+ vmrglb v19, v1, v9
+ vmrghb v20, v2, v10
+ vmrglb v21, v2, v10
+ vmrghb v22, v3, v11
+ vmrglb v23, v3, v11
+ vmrghb v24, v4, v12
+ vmrglb v25, v4, v12
+ vmrghb v26, v5, v13
+ vmrglb v27, v5, v13
+ vmrghb v28, v6, v14
+ vmrglb v29, v6, v14
+ vmrghb v30, v7, v15
+ vmrglb v31, v7, v15
+.endm
+
+;# load_g loads a global vector (whose address is in the local variable Gptr)
+;# into vector register Vreg. Trashes r0
+.macro load_g Vreg, Gptr
+ lwz r0, \Gptr
+ lvx \Vreg, 0, r0
+.endm
+
+;# exploit the saturation here. if the answer is negative
+;# it will be clamped to 0. orring 0 with a positive
+;# number will be the positive number (abs)
+;# RES = abs( A-B), trashes TMP
+.macro Abs RES, TMP, A, B
+ vsububs \RES, \A, \B
+ vsububs \TMP, \B, \A
+ vor \RES, \RES, \TMP
+.endm
+
+;# RES = Max( RES, abs( A-B)), trashes TMP
+.macro max_abs RES, TMP, A, B
+ vsububs \TMP, \A, \B
+ vmaxub \RES, \RES, \TMP
+ vsububs \TMP, \B, \A
+ vmaxub \RES, \RES, \TMP
+.endm
+
+.macro Masks
+ ;# build masks
+ ;# input is all 8 bit unsigned (0-255). need to
+ ;# do abs(vala-valb) > limit. but no need to compare each
+ ;# value to the limit. find the max of the absolute differences
+ ;# and compare that to the limit.
+ ;# First hev
+ Abs v14, v13, v2, v3 ;# |P1 - P0|
+ max_abs v14, v13, v5, v4 ;# |Q1 - Q0|
+
+ vcmpgtub v10, v14, v10 ;# HEV = true if thresh exceeded
+
+ ;# Next limit
+ max_abs v14, v13, v0, v1 ;# |P3 - P2|
+ max_abs v14, v13, v1, v2 ;# |P2 - P1|
+ max_abs v14, v13, v6, v5 ;# |Q2 - Q1|
+ max_abs v14, v13, v7, v6 ;# |Q3 - Q2|
+
+ vcmpgtub v9, v14, v9 ;# R = true if limit exceeded
+
+ ;# flimit
+ Abs v14, v13, v3, v4 ;# |P0 - Q0|
+
+ vcmpgtub v8, v14, v8 ;# X = true if flimit exceeded
+
+ vor v8, v8, v9 ;# R = true if flimit or limit exceeded
+ ;# done building masks
+.endm
+
+.macro build_constants RFL, RLI, RTH, FL, LI, TH
+ ;# build constants
+ lvx \FL, 0, \RFL ;# flimit
+ lvx \LI, 0, \RLI ;# limit
+ lvx \TH, 0, \RTH ;# thresh
+
+ vspltisb v11, 8
+ vspltisb v12, 4
+ vslb v11, v11, v12 ;# 0x80808080808080808080808080808080
+.endm
+
+.macro load_data_y
+ ;# setup strides/pointers to be able to access
+ ;# all of the data
+ add r5, r4, r4 ;# r5 = 2 * stride
+ sub r6, r3, r5 ;# r6 -> 2 rows back
+ neg r7, r4 ;# r7 = -stride
+
+ ;# load 16 pixels worth of data to work on
+ sub r0, r6, r5 ;# r0 -> 4 rows back (temp)
+ lvx v0, 0, r0 ;# P3 (read only)
+ lvx v1, r7, r6 ;# P2
+ lvx v2, 0, r6 ;# P1
+ lvx v3, r7, r3 ;# P0
+ lvx v4, 0, r3 ;# Q0
+ lvx v5, r4, r3 ;# Q1
+ lvx v6, r5, r3 ;# Q2
+ add r0, r3, r5 ;# r0 -> 2 rows fwd (temp)
+ lvx v7, r4, r0 ;# Q3 (read only)
+.endm
+
+;# Expects
+;# v10 == HEV
+;# v13 == tmp
+;# v14 == tmp
+.macro common_adjust P0, Q0, P1, Q1, HEV_PRESENT
+ vxor \P1, \P1, v11 ;# SP1
+ vxor \P0, \P0, v11 ;# SP0
+ vxor \Q0, \Q0, v11 ;# SQ0
+ vxor \Q1, \Q1, v11 ;# SQ1
+
+ vsubsbs v13, \P1, \Q1 ;# f = c (P1 - Q1)
+.if \HEV_PRESENT
+ vand v13, v13, v10 ;# f &= hev
+.endif
+ vsubsbs v14, \Q0, \P0 ;# -126 <= X = Q0-P0 <= +126
+ vaddsbs v13, v13, v14
+ vaddsbs v13, v13, v14
+ vaddsbs v13, v13, v14 ;# A = c( c(P1-Q1) + 3*(Q0-P0))
+
+ vandc v13, v13, v8 ;# f &= mask
+
+ vspltisb v8, 3
+ vspltisb v9, 4
+
+ vaddsbs v14, v13, v9 ;# f1 = c (f+4)
+ vaddsbs v15, v13, v8 ;# f2 = c (f+3)
+
+ vsrab v13, v14, v8 ;# f1 >>= 3
+ vsrab v15, v15, v8 ;# f2 >>= 3
+
+ vsubsbs \Q0, \Q0, v13 ;# u1 = c (SQ0 - f1)
+ vaddsbs \P0, \P0, v15 ;# u2 = c (SP0 + f2)
+.endm
+
+.macro vp8_mbfilter
+ Masks
+
+ ;# start the fitering here
+ vxor v1, v1, v11 ;# SP2
+ vxor v2, v2, v11 ;# SP1
+ vxor v3, v3, v11 ;# SP0
+ vxor v4, v4, v11 ;# SQ0
+ vxor v5, v5, v11 ;# SQ1
+ vxor v6, v6, v11 ;# SQ2
+
+ ;# add outer taps if we have high edge variance
+ vsubsbs v13, v2, v5 ;# f = c (SP1-SQ1)
+
+ vsubsbs v14, v4, v3 ;# SQ0-SP0
+ vaddsbs v13, v13, v14
+ vaddsbs v13, v13, v14
+ vaddsbs v13, v13, v14 ;# f = c( c(SP1-SQ1) + 3*(SQ0-SP0))
+
+ vandc v13, v13, v8 ;# f &= mask
+ vand v15, v13, v10 ;# f2 = f & hev
+
+ ;# save bottom 3 bits so that we round one side +4 and the other +3
+ vspltisb v8, 3
+ vspltisb v9, 4
+
+ vaddsbs v14, v15, v9 ;# f1 = c (f+4)
+ vaddsbs v15, v15, v8 ;# f2 = c (f+3)
+
+ vsrab v14, v14, v8 ;# f1 >>= 3
+ vsrab v15, v15, v8 ;# f2 >>= 3
+
+ vsubsbs v4, v4, v14 ;# u1 = c (SQ0 - f1)
+ vaddsbs v3, v3, v15 ;# u2 = c (SP0 + f2)
+
+ ;# only apply wider filter if not high edge variance
+ vandc v13, v13, v10 ;# f &= ~hev
+
+ vspltisb v9, 2
+ vnor v8, v8, v8
+ vsrb v9, v8, v9 ;# 0x3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f
+ vupkhsb v9, v9 ;# 0x003f003f003f003f003f003f003f003f
+ vspltisb v8, 9
+
+ ;# roughly 1/7th difference across boundary
+ vspltish v10, 7
+ vmulosb v14, v8, v13 ;# A = c( c(P1-Q1) + 3*(Q0-P0))
+ vmulesb v15, v8, v13
+ vaddshs v14, v14, v9 ;# += 63
+ vaddshs v15, v15, v9
+ vsrah v14, v14, v10 ;# >>= 7
+ vsrah v15, v15, v10
+ vmrglh v10, v15, v14
+ vmrghh v15, v15, v14
+
+ vpkshss v10, v15, v10 ;# X = saturated down to bytes
+
+ vsubsbs v6, v6, v10 ;# subtract from Q and add to P
+ vaddsbs v1, v1, v10
+
+ vxor v6, v6, v11
+ vxor v1, v1, v11
+
+ ;# roughly 2/7th difference across boundary
+ vspltish v10, 7
+ vaddubm v12, v8, v8
+ vmulosb v14, v12, v13 ;# A = c( c(P1-Q1) + 3*(Q0-P0))
+ vmulesb v15, v12, v13
+ vaddshs v14, v14, v9
+ vaddshs v15, v15, v9
+ vsrah v14, v14, v10 ;# >>= 7
+ vsrah v15, v15, v10
+ vmrglh v10, v15, v14
+ vmrghh v15, v15, v14
+
+ vpkshss v10, v15, v10 ;# X = saturated down to bytes
+
+ vsubsbs v5, v5, v10 ;# subtract from Q and add to P
+ vaddsbs v2, v2, v10
+
+ vxor v5, v5, v11
+ vxor v2, v2, v11
+
+ ;# roughly 3/7th difference across boundary
+ vspltish v10, 7
+ vaddubm v12, v12, v8
+ vmulosb v14, v12, v13 ;# A = c( c(P1-Q1) + 3*(Q0-P0))
+ vmulesb v15, v12, v13
+ vaddshs v14, v14, v9
+ vaddshs v15, v15, v9
+ vsrah v14, v14, v10 ;# >>= 7
+ vsrah v15, v15, v10
+ vmrglh v10, v15, v14
+ vmrghh v15, v15, v14
+
+ vpkshss v10, v15, v10 ;# X = saturated down to bytes
+
+ vsubsbs v4, v4, v10 ;# subtract from Q and add to P
+ vaddsbs v3, v3, v10
+
+ vxor v4, v4, v11
+ vxor v3, v3, v11
+.endm
+
+.macro SBFilter
+ Masks
+
+ common_adjust v3, v4, v2, v5, 1
+
+ ;# outer tap adjustments
+ vspltisb v8, 1
+
+ vaddubm v13, v13, v8 ;# f += 1
+ vsrab v13, v13, v8 ;# f >>= 1
+
+ vandc v13, v13, v10 ;# f &= ~hev
+
+ vsubsbs v5, v5, v13 ;# u1 = c (SQ1 - f)
+ vaddsbs v2, v2, v13 ;# u2 = c (SP1 + f)
+
+ vxor v2, v2, v11
+ vxor v3, v3, v11
+ vxor v4, v4, v11
+ vxor v5, v5, v11
+.endm
+
+ .align 2
+mbloop_filter_horizontal_edge_y_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xffff
+ mtspr 256, r12 ;# set VRSAVE
+
+ build_constants r5, r6, r7, v8, v9, v10
+
+ load_data_y
+
+ vp8_mbfilter
+
+ stvx v1, r7, r6 ;# P2
+ stvx v2, 0, r6 ;# P1
+ stvx v3, r7, r3 ;# P0
+ stvx v4, 0, r3 ;# Q0
+ stvx v5, r4, r3 ;# Q1
+ stvx v6, r5, r3 ;# Q2
+
+ mtspr 256, r11 ;# reset old VRSAVE
+
+ blr
+
+ .align 2
+;# r3 unsigned char *s
+;# r4 int p
+;# r5 const signed char *flimit
+;# r6 const signed char *limit
+;# r7 const signed char *thresh
+loop_filter_horizontal_edge_y_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xffff
+ mtspr 256, r12 ;# set VRSAVE
+
+ build_constants r5, r6, r7, v8, v9, v10
+
+ load_data_y
+
+ SBFilter
+
+ stvx v2, 0, r6 ;# P1
+ stvx v3, r7, r3 ;# P0
+ stvx v4, 0, r3 ;# Q0
+ stvx v5, r4, r3 ;# Q1
+
+ mtspr 256, r11 ;# reset old VRSAVE
+
+ blr
+
+;# Filtering a vertical mb. Each mb is aligned on a 16 byte boundary.
+;# So we can read in an entire mb aligned. However if we want to filter the mb
+;# edge we run into problems. For the loopfilter we require 4 bytes before the mb
+;# and 4 after for a total of 8 bytes. Reading 16 bytes inorder to get 4 is a bit
+;# of a waste. So this is an even uglier way to get around that.
+;# Using the regular register file words are read in and then saved back out to
+;# memory to align and order them up. Then they are read in using the
+;# vector register file.
+.macro RLVmb V, R
+ lwzux r0, r3, r4
+ stw r0, 4(\R)
+ lwz r0,-4(r3)
+ stw r0, 0(\R)
+ lwzux r0, r3, r4
+ stw r0,12(\R)
+ lwz r0,-4(r3)
+ stw r0, 8(\R)
+ lvx \V, 0, \R
+.endm
+
+.macro WLVmb V, R
+ stvx \V, 0, \R
+ lwz r0,12(\R)
+ stwux r0, r3, r4
+ lwz r0, 8(\R)
+ stw r0,-4(r3)
+ lwz r0, 4(\R)
+ stwux r0, r3, r4
+ lwz r0, 0(\R)
+ stw r0,-4(r3)
+.endm
+
+ .align 2
+;# r3 unsigned char *s
+;# r4 int p
+;# r5 const signed char *flimit
+;# r6 const signed char *limit
+;# r7 const signed char *thresh
+mbloop_filter_vertical_edge_y_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xffff
+ ori r12, r12, 0xc000
+ mtspr 256, r12 ;# set VRSAVE
+
+ la r9, -48(r1) ;# temporary space for reading in vectors
+ sub r3, r3, r4
+
+ RLVmb v0, r9
+ RLVmb v1, r9
+ RLVmb v2, r9
+ RLVmb v3, r9
+ RLVmb v4, r9
+ RLVmb v5, r9
+ RLVmb v6, r9
+ RLVmb v7, r9
+
+ transpose8x16_fwd
+
+ build_constants r5, r6, r7, v8, v9, v10
+
+ vp8_mbfilter
+
+ transpose8x16_inv
+
+ add r3, r3, r4
+ neg r4, r4
+
+ WLVmb v17, r9
+ WLVmb v16, r9
+ WLVmb v15, r9
+ WLVmb v14, r9
+ WLVmb v13, r9
+ WLVmb v12, r9
+ WLVmb v11, r9
+ WLVmb v10, r9
+
+ mtspr 256, r11 ;# reset old VRSAVE
+
+ blr
+
+.macro RL V, R, P
+ lvx \V, 0, \R
+ add \R, \R, \P
+.endm
+
+.macro WL V, R, P
+ stvx \V, 0, \R
+ add \R, \R, \P
+.endm
+
+.macro Fil P3, P2, P1, P0, Q0, Q1, Q2, Q3
+ ;# K = |P0-P1| already
+ Abs v14, v13, \Q0, \Q1 ;# M = |Q0-Q1|
+ vmaxub v14, v14, v4 ;# M = max( |P0-P1|, |Q0-Q1|)
+ vcmpgtub v10, v14, v0
+
+ Abs v4, v5, \Q2, \Q3 ;# K = |Q2-Q3| = next |P0-P1]
+
+ max_abs v14, v13, \Q1, \Q2 ;# M = max( M, |Q1-Q2|)
+ max_abs v14, v13, \P1, \P2 ;# M = max( M, |P1-P2|)
+ max_abs v14, v13, \P2, \P3 ;# M = max( M, |P2-P3|)
+
+ vmaxub v14, v14, v4 ;# M = max interior abs diff
+ vcmpgtub v9, v14, v2 ;# M = true if int_l exceeded
+
+ Abs v14, v13, \P0, \Q0 ;# X = Abs( P0-Q0)
+ vcmpgtub v8, v14, v3 ;# X = true if edge_l exceeded
+ vor v8, v8, v9 ;# M = true if edge_l or int_l exceeded
+
+ ;# replace P1,Q1 w/signed versions
+ common_adjust \P0, \Q0, \P1, \Q1, 1
+
+ vaddubm v13, v13, v1 ;# -16 <= M <= 15, saturation irrelevant
+ vsrab v13, v13, v1
+ vandc v13, v13, v10 ;# adjust P1,Q1 by (M+1)>>1 if ! hev
+ vsubsbs \Q1, \Q1, v13
+ vaddsbs \P1, \P1, v13
+
+ vxor \P1, \P1, v11 ;# P1
+ vxor \P0, \P0, v11 ;# P0
+ vxor \Q0, \Q0, v11 ;# Q0
+ vxor \Q1, \Q1, v11 ;# Q1
+.endm
+
+
+ .align 2
+;# r3 unsigned char *s
+;# r4 int p
+;# r5 const signed char *flimit
+;# r6 const signed char *limit
+;# r7 const signed char *thresh
+loop_filter_vertical_edge_y_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xffff
+ ori r12, r12, 0xffff
+ mtspr 256, r12 ;# set VRSAVE
+
+ addi r9, r3, 0
+ RL v16, r9, r4
+ RL v17, r9, r4
+ RL v18, r9, r4
+ RL v19, r9, r4
+ RL v20, r9, r4
+ RL v21, r9, r4
+ RL v22, r9, r4
+ RL v23, r9, r4
+ RL v24, r9, r4
+ RL v25, r9, r4
+ RL v26, r9, r4
+ RL v27, r9, r4
+ RL v28, r9, r4
+ RL v29, r9, r4
+ RL v30, r9, r4
+ lvx v31, 0, r9
+
+ Transpose16x16
+
+ vspltisb v1, 1
+
+ build_constants r5, r6, r7, v3, v2, v0
+
+ Abs v4, v5, v19, v18 ;# K(v14) = first |P0-P1|
+
+ Fil v16, v17, v18, v19, v20, v21, v22, v23
+ Fil v20, v21, v22, v23, v24, v25, v26, v27
+ Fil v24, v25, v26, v27, v28, v29, v30, v31
+
+ Transpose16x16
+
+ addi r9, r3, 0
+ WL v16, r9, r4
+ WL v17, r9, r4
+ WL v18, r9, r4
+ WL v19, r9, r4
+ WL v20, r9, r4
+ WL v21, r9, r4
+ WL v22, r9, r4
+ WL v23, r9, r4
+ WL v24, r9, r4
+ WL v25, r9, r4
+ WL v26, r9, r4
+ WL v27, r9, r4
+ WL v28, r9, r4
+ WL v29, r9, r4
+ WL v30, r9, r4
+ stvx v31, 0, r9
+
+ mtspr 256, r11 ;# reset old VRSAVE
+
+ blr
+
+;# -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- UV FILTERING -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+.macro active_chroma_sel V
+ andi. r7, r3, 8 ;# row origin modulo 16
+ add r7, r7, r7 ;# selects selectors
+ lis r12, _chromaSelectors@ha
+ la r0, _chromaSelectors@l(r12)
+ lwzux r0, r7, r0 ;# leave selector addr in r7
+
+ lvx \V, 0, r0 ;# mask to concatenate active U,V pels
+.endm
+
+.macro hread_uv Dest, U, V, Offs, VMask
+ lvx \U, \Offs, r3
+ lvx \V, \Offs, r4
+ vperm \Dest, \U, \V, \VMask ;# Dest = active part of U then V
+.endm
+
+.macro hwrite_uv New, U, V, Offs, Umask, Vmask
+ vperm \U, \New, \U, \Umask ;# Combine new pels with siblings
+ vperm \V, \New, \V, \Vmask
+ stvx \U, \Offs, r3 ;# Write to frame buffer
+ stvx \V, \Offs, r4
+.endm
+
+;# Process U,V in parallel.
+.macro load_chroma_h
+ neg r9, r5 ;# r9 = -1 * stride
+ add r8, r9, r9 ;# r8 = -2 * stride
+ add r10, r5, r5 ;# r10 = 2 * stride
+
+ active_chroma_sel v12
+
+ ;# P3, Q3 are read-only; need not save addresses or sibling pels
+ add r6, r8, r8 ;# r6 = -4 * stride
+ hread_uv v0, v14, v15, r6, v12
+ add r6, r10, r5 ;# r6 = 3 * stride
+ hread_uv v7, v14, v15, r6, v12
+
+ ;# Others are read/write; save addresses and sibling pels
+
+ add r6, r8, r9 ;# r6 = -3 * stride
+ hread_uv v1, v16, v17, r6, v12
+ hread_uv v2, v18, v19, r8, v12
+ hread_uv v3, v20, v21, r9, v12
+ hread_uv v4, v22, v23, 0, v12
+ hread_uv v5, v24, v25, r5, v12
+ hread_uv v6, v26, v27, r10, v12
+.endm
+
+.macro uresult_sel V
+ load_g \V, 4(r7)
+.endm
+
+.macro vresult_sel V
+ load_g \V, 8(r7)
+.endm
+
+;# always write P1,P0,Q0,Q1
+.macro store_chroma_h
+ uresult_sel v11
+ vresult_sel v12
+ hwrite_uv v2, v18, v19, r8, v11, v12
+ hwrite_uv v3, v20, v21, r9, v11, v12
+ hwrite_uv v4, v22, v23, 0, v11, v12
+ hwrite_uv v5, v24, v25, r5, v11, v12
+.endm
+
+ .align 2
+;# r3 unsigned char *u
+;# r4 unsigned char *v
+;# r5 int p
+;# r6 const signed char *flimit
+;# r7 const signed char *limit
+;# r8 const signed char *thresh
+mbloop_filter_horizontal_edge_uv_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xffff
+ ori r12, r12, 0xffff
+ mtspr 256, r12 ;# set VRSAVE
+
+ build_constants r6, r7, r8, v8, v9, v10
+
+ load_chroma_h
+
+ vp8_mbfilter
+
+ store_chroma_h
+
+ hwrite_uv v1, v16, v17, r6, v11, v12 ;# v1 == P2
+ hwrite_uv v6, v26, v27, r10, v11, v12 ;# v6 == Q2
+
+ mtspr 256, r11 ;# reset old VRSAVE
+
+ blr
+
+ .align 2
+;# r3 unsigned char *u
+;# r4 unsigned char *v
+;# r5 int p
+;# r6 const signed char *flimit
+;# r7 const signed char *limit
+;# r8 const signed char *thresh
+loop_filter_horizontal_edge_uv_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xffff
+ ori r12, r12, 0xffff
+ mtspr 256, r12 ;# set VRSAVE
+
+ build_constants r6, r7, r8, v8, v9, v10
+
+ load_chroma_h
+
+ SBFilter
+
+ store_chroma_h
+
+ mtspr 256, r11 ;# reset old VRSAVE
+
+ blr
+
+.macro R V, R
+ lwzux r0, r3, r5
+ stw r0, 4(\R)
+ lwz r0,-4(r3)
+ stw r0, 0(\R)
+ lwzux r0, r4, r5
+ stw r0,12(\R)
+ lwz r0,-4(r4)
+ stw r0, 8(\R)
+ lvx \V, 0, \R
+.endm
+
+
+.macro W V, R
+ stvx \V, 0, \R
+ lwz r0,12(\R)
+ stwux r0, r4, r5
+ lwz r0, 8(\R)
+ stw r0,-4(r4)
+ lwz r0, 4(\R)
+ stwux r0, r3, r5
+ lwz r0, 0(\R)
+ stw r0,-4(r3)
+.endm
+
+.macro chroma_vread R
+ sub r3, r3, r5 ;# back up one line for simplicity
+ sub r4, r4, r5
+
+ R v0, \R
+ R v1, \R
+ R v2, \R
+ R v3, \R
+ R v4, \R
+ R v5, \R
+ R v6, \R
+ R v7, \R
+
+ transpose8x16_fwd
+.endm
+
+.macro chroma_vwrite R
+
+ transpose8x16_inv
+
+ add r3, r3, r5
+ add r4, r4, r5
+ neg r5, r5 ;# Write rows back in reverse order
+
+ W v17, \R
+ W v16, \R
+ W v15, \R
+ W v14, \R
+ W v13, \R
+ W v12, \R
+ W v11, \R
+ W v10, \R
+.endm
+
+ .align 2
+;# r3 unsigned char *u
+;# r4 unsigned char *v
+;# r5 int p
+;# r6 const signed char *flimit
+;# r7 const signed char *limit
+;# r8 const signed char *thresh
+mbloop_filter_vertical_edge_uv_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xffff
+ ori r12, r12, 0xc000
+ mtspr 256, r12 ;# set VRSAVE
+
+ la r9, -48(r1) ;# temporary space for reading in vectors
+
+ chroma_vread r9
+
+ build_constants r6, r7, r8, v8, v9, v10
+
+ vp8_mbfilter
+
+ chroma_vwrite r9
+
+ mtspr 256, r11 ;# reset old VRSAVE
+
+ blr
+
+ .align 2
+;# r3 unsigned char *u
+;# r4 unsigned char *v
+;# r5 int p
+;# r6 const signed char *flimit
+;# r7 const signed char *limit
+;# r8 const signed char *thresh
+loop_filter_vertical_edge_uv_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xffff
+ ori r12, r12, 0xc000
+ mtspr 256, r12 ;# set VRSAVE
+
+ la r9, -48(r1) ;# temporary space for reading in vectors
+
+ chroma_vread r9
+
+ build_constants r6, r7, r8, v8, v9, v10
+
+ SBFilter
+
+ chroma_vwrite r9
+
+ mtspr 256, r11 ;# reset old VRSAVE
+
+ blr
+
+;# -=-=-=-=-=-=-=-=-=-=-=-=-=-= SIMPLE LOOP FILTER =-=-=-=-=-=-=-=-=-=-=-=-=-=-
+
+.macro vp8_simple_filter
+ Abs v14, v13, v1, v2 ;# M = abs( P0 - Q0)
+ vcmpgtub v8, v14, v8 ;# v5 = true if _over_ limit
+
+ ;# preserve unsigned v0 and v3
+ common_adjust v1, v2, v0, v3, 0
+
+ vxor v1, v1, v11
+ vxor v2, v2, v11 ;# cvt Q0, P0 back to pels
+.endm
+
+.macro simple_vertical
+ addi r8, 0, 16
+ addi r7, r5, 32
+
+ lvx v0, 0, r5
+ lvx v1, r8, r5
+ lvx v2, 0, r7
+ lvx v3, r8, r7
+
+ lis r12, _B_hihi@ha
+ la r0, _B_hihi@l(r12)
+ lvx v16, 0, r0
+
+ lis r12, _B_lolo@ha
+ la r0, _B_lolo@l(r12)
+ lvx v17, 0, r0
+
+ Transpose4times4x4 v16, v17
+ vp8_simple_filter
+
+ vxor v0, v0, v11
+ vxor v3, v3, v11 ;# cvt Q0, P0 back to pels
+
+ Transpose4times4x4 v16, v17
+
+ stvx v0, 0, r5
+ stvx v1, r8, r5
+ stvx v2, 0, r7
+ stvx v3, r8, r7
+.endm
+
+ .align 2
+;# r3 unsigned char *s
+;# r4 int p
+;# r5 const signed char *flimit
+loop_filter_simple_horizontal_edge_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xffff
+ mtspr 256, r12 ;# set VRSAVE
+
+ ;# build constants
+ lvx v8, 0, r5 ;# flimit
+
+ vspltisb v11, 8
+ vspltisb v12, 4
+ vslb v11, v11, v12 ;# 0x80808080808080808080808080808080
+
+ neg r5, r4 ;# r5 = -1 * stride
+ add r6, r5, r5 ;# r6 = -2 * stride
+
+ lvx v0, r6, r3 ;# v0 = P1 = 16 pels two rows above edge
+ lvx v1, r5, r3 ;# v1 = P0 = 16 pels one row above edge
+ lvx v2, 0, r3 ;# v2 = Q0 = 16 pels one row below edge
+ lvx v3, r4, r3 ;# v3 = Q1 = 16 pels two rows below edge
+
+ vp8_simple_filter
+
+ stvx v1, r5, r3 ;# store P0
+ stvx v2, 0, r3 ;# store Q0
+
+ mtspr 256, r11 ;# reset old VRSAVE
+
+ blr
+
+.macro RLV Offs
+ stw r0, (\Offs*4)(r5)
+ lwzux r0, r7, r4
+.endm
+
+.macro WLV Offs
+ lwz r0, (\Offs*4)(r5)
+ stwux r0, r7, r4
+.endm
+
+ .align 2
+;# r3 unsigned char *s
+;# r4 int p
+;# r5 const signed char *flimit
+loop_filter_simple_vertical_edge_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xffff
+ ori r12, r12, 0xc000
+ mtspr 256, r12 ;# set VRSAVE
+
+ ;# build constants
+ lvx v8, 0, r5 ;# flimit
+
+ vspltisb v11, 8
+ vspltisb v12, 4
+ vslb v11, v11, v12 ;# 0x80808080808080808080808080808080
+
+ la r5, -96(r1) ;# temporary space for reading in vectors
+
+ ;# Store 4 pels at word "Offs" in temp array, then advance r7
+ ;# to next row and read another 4 pels from the frame buffer.
+
+ subi r7, r3, 2 ;# r7 -> 2 pels before start
+ lwzx r0, 0, r7 ;# read first 4 pels
+
+ ;# 16 unaligned word accesses
+ RLV 0
+ RLV 4
+ RLV 8
+ RLV 12
+ RLV 1
+ RLV 5
+ RLV 9
+ RLV 13
+ RLV 2
+ RLV 6
+ RLV 10
+ RLV 14
+ RLV 3
+ RLV 7
+ RLV 11
+
+ stw r0, (15*4)(r5) ;# write last 4 pels
+
+ simple_vertical
+
+ ;# Read temp array, write frame buffer.
+ subi r7, r3, 2 ;# r7 -> 2 pels before start
+ lwzx r0, 0, r5 ;# read/write first 4 pels
+ stwx r0, 0, r7
+
+ WLV 4
+ WLV 8
+ WLV 12
+ WLV 1
+ WLV 5
+ WLV 9
+ WLV 13
+ WLV 2
+ WLV 6
+ WLV 10
+ WLV 14
+ WLV 3
+ WLV 7
+ WLV 11
+ WLV 15
+
+ mtspr 256, r11 ;# reset old VRSAVE
+
+ blr
+
+ .data
+
+_chromaSelectors:
+ .long _B_hihi
+ .long _B_Ures0
+ .long _B_Vres0
+ .long 0
+ .long _B_lolo
+ .long _B_Ures8
+ .long _B_Vres8
+ .long 0
+
+ .align 4
+_B_Vres8:
+ .byte 16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15
+
+ .align 4
+_B_Ures8:
+ .byte 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7
+
+ .align 4
+_B_lolo:
+ .byte 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31
+
+ .align 4
+_B_Vres0:
+ .byte 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31
+ .align 4
+_B_Ures0:
+ .byte 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+
+ .align 4
+_B_hihi:
+ .byte 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23
diff --git a/vp9/common/ppc/vp9_platform_altivec.asm b/vp9/common/ppc/vp9_platform_altivec.asm
new file mode 100644
index 0000000..f81d86f
--- /dev/null
+++ b/vp9/common/ppc/vp9_platform_altivec.asm
@@ -0,0 +1,59 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ .globl save_platform_context
+ .globl restore_platform_context
+
+.macro W V P
+ stvx \V, 0, \P
+ addi \P, \P, 16
+.endm
+
+.macro R V P
+ lvx \V, 0, \P
+ addi \P, \P, 16
+.endm
+
+;# r3 context_ptr
+ .align 2
+save_platform_contex:
+ W v20, r3
+ W v21, r3
+ W v22, r3
+ W v23, r3
+ W v24, r3
+ W v25, r3
+ W v26, r3
+ W v27, r3
+ W v28, r3
+ W v29, r3
+ W v30, r3
+ W v31, r3
+
+ blr
+
+;# r3 context_ptr
+ .align 2
+restore_platform_context:
+ R v20, r3
+ R v21, r3
+ R v22, r3
+ R v23, r3
+ R v24, r3
+ R v25, r3
+ R v26, r3
+ R v27, r3
+ R v28, r3
+ R v29, r3
+ R v30, r3
+ R v31, r3
+
+ blr
diff --git a/vp9/common/ppc/vp9_recon_altivec.asm b/vp9/common/ppc/vp9_recon_altivec.asm
new file mode 100644
index 0000000..dd39e05
--- /dev/null
+++ b/vp9/common/ppc/vp9_recon_altivec.asm
@@ -0,0 +1,175 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ .globl recon4b_ppc
+ .globl recon2b_ppc
+ .globl recon_b_ppc
+
+.macro row_of16 Diff Pred Dst Stride
+ lvx v1, 0, \Pred ;# v1 = pred = p0..p15
+ addi \Pred, \Pred, 16 ;# next pred
+ vmrghb v2, v0, v1 ;# v2 = 16-bit p0..p7
+ lvx v3, 0, \Diff ;# v3 = d0..d7
+ vaddshs v2, v2, v3 ;# v2 = r0..r7
+ vmrglb v1, v0, v1 ;# v1 = 16-bit p8..p15
+ lvx v3, r8, \Diff ;# v3 = d8..d15
+ addi \Diff, \Diff, 32 ;# next diff
+ vaddshs v3, v3, v1 ;# v3 = r8..r15
+ vpkshus v2, v2, v3 ;# v2 = 8-bit r0..r15
+ stvx v2, 0, \Dst ;# to dst
+ add \Dst, \Dst, \Stride ;# next dst
+.endm
+
+ .text
+ .align 2
+;# r3 = short *diff_ptr,
+;# r4 = unsigned char *pred_ptr,
+;# r5 = unsigned char *dst_ptr,
+;# r6 = int stride
+recon4b_ppc:
+ mfspr r0, 256 ;# get old VRSAVE
+ stw r0, -8(r1) ;# save old VRSAVE to stack
+ oris r0, r0, 0xf000
+ mtspr 256,r0 ;# set VRSAVE
+
+ vxor v0, v0, v0
+ li r8, 16
+
+ row_of16 r3, r4, r5, r6
+ row_of16 r3, r4, r5, r6
+ row_of16 r3, r4, r5, r6
+ row_of16 r3, r4, r5, r6
+
+ lwz r12, -8(r1) ;# restore old VRSAVE from stack
+ mtspr 256, r12 ;# reset old VRSAVE
+
+ blr
+
+.macro two_rows_of8 Diff Pred Dst Stride write_first_four_pels
+ lvx v1, 0, \Pred ;# v1 = pred = p0..p15
+ vmrghb v2, v0, v1 ;# v2 = 16-bit p0..p7
+ lvx v3, 0, \Diff ;# v3 = d0..d7
+ vaddshs v2, v2, v3 ;# v2 = r0..r7
+ vmrglb v1, v0, v1 ;# v1 = 16-bit p8..p15
+ lvx v3, r8, \Diff ;# v2 = d8..d15
+ vaddshs v3, v3, v1 ;# v3 = r8..r15
+ vpkshus v2, v2, v3 ;# v3 = 8-bit r0..r15
+ stvx v2, 0, r10 ;# 2 rows to dst from buf
+ lwz r0, 0(r10)
+.if \write_first_four_pels
+ stw r0, 0(\Dst)
+ .else
+ stwux r0, \Dst, \Stride
+.endif
+ lwz r0, 4(r10)
+ stw r0, 4(\Dst)
+ lwz r0, 8(r10)
+ stwux r0, \Dst, \Stride ;# advance dst to next row
+ lwz r0, 12(r10)
+ stw r0, 4(\Dst)
+.endm
+
+ .align 2
+;# r3 = short *diff_ptr,
+;# r4 = unsigned char *pred_ptr,
+;# r5 = unsigned char *dst_ptr,
+;# r6 = int stride
+
+recon2b_ppc:
+ mfspr r0, 256 ;# get old VRSAVE
+ stw r0, -8(r1) ;# save old VRSAVE to stack
+ oris r0, r0, 0xf000
+ mtspr 256,r0 ;# set VRSAVE
+
+ vxor v0, v0, v0
+ li r8, 16
+
+ la r10, -48(r1) ;# buf
+
+ two_rows_of8 r3, r4, r5, r6, 1
+
+ addi r4, r4, 16; ;# next pred
+ addi r3, r3, 32; ;# next diff
+
+ two_rows_of8 r3, r4, r5, r6, 0
+
+ lwz r12, -8(r1) ;# restore old VRSAVE from stack
+ mtspr 256, r12 ;# reset old VRSAVE
+
+ blr
+
+.macro get_two_diff_rows
+ stw r0, 0(r10)
+ lwz r0, 4(r3)
+ stw r0, 4(r10)
+ lwzu r0, 32(r3)
+ stw r0, 8(r10)
+ lwz r0, 4(r3)
+ stw r0, 12(r10)
+ lvx v3, 0, r10
+.endm
+
+ .align 2
+;# r3 = short *diff_ptr,
+;# r4 = unsigned char *pred_ptr,
+;# r5 = unsigned char *dst_ptr,
+;# r6 = int stride
+recon_b_ppc:
+ mfspr r0, 256 ;# get old VRSAVE
+ stw r0, -8(r1) ;# save old VRSAVE to stack
+ oris r0, r0, 0xf000
+ mtspr 256,r0 ;# set VRSAVE
+
+ vxor v0, v0, v0
+
+ la r10, -48(r1) ;# buf
+
+ lwz r0, 0(r4)
+ stw r0, 0(r10)
+ lwz r0, 16(r4)
+ stw r0, 4(r10)
+ lwz r0, 32(r4)
+ stw r0, 8(r10)
+ lwz r0, 48(r4)
+ stw r0, 12(r10)
+
+ lvx v1, 0, r10; ;# v1 = pred = p0..p15
+
+ lwz r0, 0(r3) ;# v3 = d0..d7
+
+ get_two_diff_rows
+
+ vmrghb v2, v0, v1; ;# v2 = 16-bit p0..p7
+ vaddshs v2, v2, v3; ;# v2 = r0..r7
+
+ lwzu r0, 32(r3) ;# v3 = d8..d15
+
+ get_two_diff_rows
+
+ vmrglb v1, v0, v1; ;# v1 = 16-bit p8..p15
+ vaddshs v3, v3, v1; ;# v3 = r8..r15
+
+ vpkshus v2, v2, v3; ;# v2 = 8-bit r0..r15
+ stvx v2, 0, r10; ;# 16 pels to dst from buf
+
+ lwz r0, 0(r10)
+ stw r0, 0(r5)
+ lwz r0, 4(r10)
+ stwux r0, r5, r6
+ lwz r0, 8(r10)
+ stwux r0, r5, r6
+ lwz r0, 12(r10)
+ stwx r0, r5, r6
+
+ lwz r12, -8(r1) ;# restore old VRSAVE from stack
+ mtspr 256, r12 ;# reset old VRSAVE
+
+ blr
diff --git a/vp9/common/ppc/vp9_systemdependent.c b/vp9/common/ppc/vp9_systemdependent.c
new file mode 100644
index 0000000..106a2b7
--- /dev/null
+++ b/vp9/common/ppc/vp9_systemdependent.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/vp9_subpixel.h"
+#include "vp9/common/vp9_loopfilter.h"
+#include "recon.h"
+#include "vp9/common/vp9_onyxc_int.h"
+
+void (*vp8_short_idct4x4)(short *input, short *output, int pitch);
+void (*vp8_short_idct4x4_1)(short *input, short *output, int pitch);
+void (*vp8_dc_only_idct)(short input_dc, short *output, int pitch);
+
+extern void (*vp9_post_proc_down_and_across)(
+ unsigned char *src_ptr,
+ unsigned char *dst_ptr,
+ int src_pixels_per_line,
+ int dst_pixels_per_line,
+ int rows,
+ int cols,
+ int flimit
+);
+
+extern void (*vp9_mbpost_proc_down)(unsigned char *dst, int pitch, int rows, int cols, int flimit);
+extern void vp9_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols, int flimit);
+extern void (*vp9_mbpost_proc_across_ip)(unsigned char *src, int pitch, int rows, int cols, int flimit);
+extern void vp9_mbpost_proc_across_ip_c(unsigned char *src, int pitch, int rows, int cols, int flimit);
+
+extern void vp9_post_proc_down_and_across_c
+(
+ unsigned char *src_ptr,
+ unsigned char *dst_ptr,
+ int src_pixels_per_line,
+ int dst_pixels_per_line,
+ int rows,
+ int cols,
+ int flimit
+);
+void vp9_plane_add_noise_c(unsigned char *Start, unsigned int Width, unsigned int Height, int Pitch, int q, int a);
+
+extern copy_mem_block_function *vp9_copy_mem16x16;
+extern copy_mem_block_function *vp9_copy_mem8x8;
+extern copy_mem_block_function *vp9_copy_mem8x4;
+
+// PPC
+extern subpixel_predict_function sixtap_predict_ppc;
+extern subpixel_predict_function sixtap_predict8x4_ppc;
+extern subpixel_predict_function sixtap_predict8x8_ppc;
+extern subpixel_predict_function sixtap_predict16x16_ppc;
+extern subpixel_predict_function bilinear_predict4x4_ppc;
+extern subpixel_predict_function bilinear_predict8x4_ppc;
+extern subpixel_predict_function bilinear_predict8x8_ppc;
+extern subpixel_predict_function bilinear_predict16x16_ppc;
+
+extern copy_mem_block_function copy_mem16x16_ppc;
+
+void recon_b_ppc(short *diff_ptr, unsigned char *pred_ptr, unsigned char *dst_ptr, int stride);
+void recon2b_ppc(short *diff_ptr, unsigned char *pred_ptr, unsigned char *dst_ptr, int stride);
+void recon4b_ppc(short *diff_ptr, unsigned char *pred_ptr, unsigned char *dst_ptr, int stride);
+
+extern void short_idct4x4llm_ppc(short *input, short *output, int pitch);
+
+// Generic C
+extern subpixel_predict_function vp9_sixtap_predict_c;
+extern subpixel_predict_function vp9_sixtap_predict8x4_c;
+extern subpixel_predict_function vp9_sixtap_predict8x8_c;
+extern subpixel_predict_function vp9_sixtap_predict16x16_c;
+extern subpixel_predict_function vp9_bilinear_predict4x4_c;
+extern subpixel_predict_function vp9_bilinear_predict8x4_c;
+extern subpixel_predict_function vp9_bilinear_predict8x8_c;
+extern subpixel_predict_function vp9_bilinear_predict16x16_c;
+
+extern copy_mem_block_function vp9_copy_mem16x16_c;
+extern copy_mem_block_function vp9_copy_mem8x8_c;
+extern copy_mem_block_function vp9_copy_mem8x4_c;
+
+void vp9_recon_b_c(short *diff_ptr, unsigned char *pred_ptr, unsigned char *dst_ptr, int stride);
+void vp9_recon2b_c(short *diff_ptr, unsigned char *pred_ptr, unsigned char *dst_ptr, int stride);
+void vp9_recon4b_c(short *diff_ptr, unsigned char *pred_ptr, unsigned char *dst_ptr, int stride);
+
+extern void vp9_short_idct4x4llm_1_c(short *input, short *output, int pitch);
+extern void vp9_short_idct4x4llm_c(short *input, short *output, int pitch);
+extern void vp8_dc_only_idct_c(short input_dc, short *output, int pitch);
+
+// PPC
+extern loop_filter_block_function loop_filter_mbv_ppc;
+extern loop_filter_block_function loop_filter_bv_ppc;
+extern loop_filter_block_function loop_filter_mbh_ppc;
+extern loop_filter_block_function loop_filter_bh_ppc;
+
+extern loop_filter_block_function loop_filter_mbvs_ppc;
+extern loop_filter_block_function loop_filter_bvs_ppc;
+extern loop_filter_block_function loop_filter_mbhs_ppc;
+extern loop_filter_block_function loop_filter_bhs_ppc;
+
+// Generic C
+extern loop_filter_block_function vp9_loop_filter_mbv_c;
+extern loop_filter_block_function vp9_loop_filter_bv_c;
+extern loop_filter_block_function vp9_loop_filter_mbh_c;
+extern loop_filter_block_function vp9_loop_filter_bh_c;
+
+extern loop_filter_block_function vp9_loop_filter_mbvs_c;
+extern loop_filter_block_function vp9_loop_filter_bvs_c;
+extern loop_filter_block_function vp9_loop_filter_mbhs_c;
+extern loop_filter_block_function vp9_loop_filter_bhs_c;
+
+extern loop_filter_block_function *vp8_lf_mbvfull;
+extern loop_filter_block_function *vp8_lf_mbhfull;
+extern loop_filter_block_function *vp8_lf_bvfull;
+extern loop_filter_block_function *vp8_lf_bhfull;
+
+extern loop_filter_block_function *vp8_lf_mbvsimple;
+extern loop_filter_block_function *vp8_lf_mbhsimple;
+extern loop_filter_block_function *vp8_lf_bvsimple;
+extern loop_filter_block_function *vp8_lf_bhsimple;
+
+void vp9_clear_c(void) {
+}
+
+void vp9_machine_specific_config(void) {
+ // Pure C:
+ vp9_clear_system_state = vp9_clear_c;
+ vp9_recon_b = vp9_recon_b_c;
+ vp9_recon4b = vp9_recon4b_c;
+ vp9_recon2b = vp9_recon2b_c;
+
+ vp9_bilinear_predict16x16 = bilinear_predict16x16_ppc;
+ vp9_bilinear_predict8x8 = bilinear_predict8x8_ppc;
+ vp9_bilinear_predict8x4 = bilinear_predict8x4_ppc;
+ vp8_bilinear_predict = bilinear_predict4x4_ppc;
+
+ vp9_sixtap_predict16x16 = sixtap_predict16x16_ppc;
+ vp9_sixtap_predict8x8 = sixtap_predict8x8_ppc;
+ vp9_sixtap_predict8x4 = sixtap_predict8x4_ppc;
+ vp9_sixtap_predict = sixtap_predict_ppc;
+
+ vp8_short_idct4x4_1 = vp9_short_idct4x4llm_1_c;
+ vp8_short_idct4x4 = short_idct4x4llm_ppc;
+ vp8_dc_only_idct = vp8_dc_only_idct_c;
+
+ vp8_lf_mbvfull = loop_filter_mbv_ppc;
+ vp8_lf_bvfull = loop_filter_bv_ppc;
+ vp8_lf_mbhfull = loop_filter_mbh_ppc;
+ vp8_lf_bhfull = loop_filter_bh_ppc;
+
+ vp8_lf_mbvsimple = loop_filter_mbvs_ppc;
+ vp8_lf_bvsimple = loop_filter_bvs_ppc;
+ vp8_lf_mbhsimple = loop_filter_mbhs_ppc;
+ vp8_lf_bhsimple = loop_filter_bhs_ppc;
+
+ vp9_post_proc_down_and_across = vp9_post_proc_down_and_across_c;
+ vp9_mbpost_proc_down = vp9_mbpost_proc_down_c;
+ vp9_mbpost_proc_across_ip = vp9_mbpost_proc_across_ip_c;
+ vp9_plane_add_noise = vp9_plane_add_noise_c;
+
+ vp9_copy_mem16x16 = copy_mem16x16_ppc;
+ vp9_copy_mem8x8 = vp9_copy_mem8x8_c;
+ vp9_copy_mem8x4 = vp9_copy_mem8x4_c;
+
+}
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
new file mode 100644
index 0000000..141d5f7
--- /dev/null
+++ b/vp9/common/vp9_alloccommon.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "./vpx_config.h"
+#include "vp9/common/vp9_blockd.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/common/vp9_findnearmv.h"
+#include "vp9/common/vp9_entropymode.h"
+#include "vp9/common/vp9_entropymv.h"
+#include "vp9/common/vp9_systemdependent.h"
+
+
+void vp9_update_mode_info_border(VP9_COMMON *cpi, MODE_INFO *mi_base) {
+ int stride = cpi->mode_info_stride;
+ int i;
+
+ // Clear down top border row
+ vpx_memset(mi_base, 0, sizeof(MODE_INFO) * cpi->mode_info_stride);
+
+ // Clear left border column
+ for (i = 1; i < cpi->mb_rows + 1; i++) {
+ vpx_memset(&mi_base[i * stride], 0, sizeof(MODE_INFO));
+ }
+}
+
+void vp9_update_mode_info_in_image(VP9_COMMON *cpi, MODE_INFO *mi) {
+ int i, j;
+
+ // For each in image mode_info element set the in image flag to 1
+ for (i = 0; i < cpi->mb_rows; i++) {
+ for (j = 0; j < cpi->mb_cols; j++) {
+ mi->mbmi.mb_in_image = 1;
+ mi++; // Next element in the row
+ }
+
+ mi++; // Step over border element at start of next row
+ }
+}
+
+void vp9_de_alloc_frame_buffers(VP9_COMMON *oci) {
+ int i;
+
+ for (i = 0; i < NUM_YV12_BUFFERS; i++)
+ vp8_yv12_de_alloc_frame_buffer(&oci->yv12_fb[i]);
+
+ vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame);
+ vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer);
+
+ vpx_free(oci->above_context);
+ vpx_free(oci->mip);
+ vpx_free(oci->prev_mip);
+
+ oci->above_context = 0;
+ oci->mip = 0;
+ oci->prev_mip = 0;
+
+}
+
+int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) {
+ int i;
+
+ vp9_de_alloc_frame_buffers(oci);
+
+ /* our internal buffers are always multiples of 16 */
+ if ((width & 0xf) != 0)
+ width += 16 - (width & 0xf);
+
+ if ((height & 0xf) != 0)
+ height += 16 - (height & 0xf);
+
+
+ for (i = 0; i < NUM_YV12_BUFFERS; i++) {
+ oci->fb_idx_ref_cnt[i] = 0;
+ oci->yv12_fb[i].flags = 0;
+ if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height,
+ VP9BORDERINPIXELS) < 0) {
+ vp9_de_alloc_frame_buffers(oci);
+ return 1;
+ }
+ }
+
+ oci->new_fb_idx = 0;
+ oci->lst_fb_idx = 1;
+ oci->gld_fb_idx = 2;
+ oci->alt_fb_idx = 3;
+
+ oci->fb_idx_ref_cnt[0] = 1;
+ oci->fb_idx_ref_cnt[1] = 1;
+ oci->fb_idx_ref_cnt[2] = 1;
+ oci->fb_idx_ref_cnt[3] = 1;
+
+ if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16,
+ VP9BORDERINPIXELS) < 0) {
+ vp9_de_alloc_frame_buffers(oci);
+ return 1;
+ }
+
+ if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height,
+ VP9BORDERINPIXELS) < 0) {
+ vp9_de_alloc_frame_buffers(oci);
+ return 1;
+ }
+
+ oci->mb_rows = height >> 4;
+ oci->mb_cols = width >> 4;
+ oci->MBs = oci->mb_rows * oci->mb_cols;
+ oci->mode_info_stride = oci->mb_cols + 1;
+ oci->mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO));
+
+ if (!oci->mip) {
+ vp9_de_alloc_frame_buffers(oci);
+ return 1;
+ }
+
+ oci->mi = oci->mip + oci->mode_info_stride + 1;
+
+ /* allocate memory for last frame MODE_INFO array */
+
+ oci->prev_mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO));
+
+ if (!oci->prev_mip) {
+ vp9_de_alloc_frame_buffers(oci);
+ return 1;
+ }
+
+ oci->prev_mi = oci->prev_mip + oci->mode_info_stride + 1;
+
+ oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);
+
+ if (!oci->above_context) {
+ vp9_de_alloc_frame_buffers(oci);
+ return 1;
+ }
+
+ vp9_update_mode_info_border(oci, oci->mip);
+ vp9_update_mode_info_in_image(oci, oci->mi);
+
+ return 0;
+}
+void vp9_setup_version(VP9_COMMON *cm) {
+ if (cm->version & 0x4) {
+ if (!CONFIG_EXPERIMENTAL)
+ vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+ "Bitstream was created by an experimental "
+ "encoder");
+ cm->experimental = 1;
+ }
+
+ switch (cm->version & 0x3) {
+ case 0:
+ cm->no_lpf = 0;
+ cm->filter_type = NORMAL_LOOPFILTER;
+ cm->use_bilinear_mc_filter = 0;
+ cm->full_pixel = 0;
+ break;
+ case 1:
+ cm->no_lpf = 0;
+ cm->filter_type = SIMPLE_LOOPFILTER;
+ cm->use_bilinear_mc_filter = 1;
+ cm->full_pixel = 0;
+ break;
+ case 2:
+ case 3:
+ cm->no_lpf = 1;
+ cm->filter_type = NORMAL_LOOPFILTER;
+ cm->use_bilinear_mc_filter = 1;
+ cm->full_pixel = 0;
+ break;
+ // Full pel only code deprecated in experimental code base
+ // case 3:
+ // cm->no_lpf = 1;
+ // cm->filter_type = SIMPLE_LOOPFILTER;
+ // cm->use_bilinear_mc_filter = 1;
+ // cm->full_pixel = 1;
+ // break;
+ }
+}
+void vp9_create_common(VP9_COMMON *oci) {
+ vp9_machine_specific_config(oci);
+
+ vp9_init_mbmode_probs(oci);
+
+ vp9_default_bmode_probs(oci->fc.bmode_prob);
+
+ oci->txfm_mode = ONLY_4X4;
+ oci->mb_no_coeff_skip = 1;
+ oci->comp_pred_mode = HYBRID_PREDICTION;
+ oci->no_lpf = 0;
+ oci->filter_type = NORMAL_LOOPFILTER;
+ oci->use_bilinear_mc_filter = 0;
+ oci->full_pixel = 0;
+ oci->clr_type = REG_YUV;
+ oci->clamp_type = RECON_CLAMP_REQUIRED;
+
+ /* Initialise reference frame sign bias structure to defaults */
+ vpx_memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));
+
+ /* Default disable buffer to buffer copying */
+ oci->copy_buffer_to_gf = 0;
+ oci->copy_buffer_to_arf = 0;
+ oci->kf_ymode_probs_update = 0;
+}
+
+void vp9_remove_common(VP9_COMMON *oci) {
+ vp9_de_alloc_frame_buffers(oci);
+}
+
+void vp9_initialize_common() {
+ vp9_coef_tree_initialize();
+
+ vp9_entropy_mode_init();
+
+ vp9_entropy_mv_init();
+}
diff --git a/vp9/common/vp9_alloccommon.h b/vp9/common/vp9_alloccommon.h
new file mode 100644
index 0000000..3a37dc3
--- /dev/null
+++ b/vp9/common/vp9_alloccommon.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_COMMON_VP9_ALLOCCOMMON_H_
+#define VP9_COMMON_VP9_ALLOCCOMMON_H_
+
+#include "vp9/common/vp9_onyxc_int.h"
+
+void vp9_create_common(VP9_COMMON *oci);
+void vp9_remove_common(VP9_COMMON *oci);
+void vp9_de_alloc_frame_buffers(VP9_COMMON *oci);
+int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height);
+void vp9_setup_version(VP9_COMMON *oci);
+
+void vp9_update_mode_info_border(VP9_COMMON *cpi, MODE_INFO *mi_base);
+void vp9_update_mode_info_in_image(VP9_COMMON *cpi, MODE_INFO *mi);
+
+#endif
diff --git a/vp9/common/vp9_asm_com_offsets.c b/vp9/common/vp9_asm_com_offsets.c
new file mode 100644
index 0000000..94ccb6e
--- /dev/null
+++ b/vp9/common/vp9_asm_com_offsets.c
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_config.h"
+#include "vpx/vpx_codec.h"
+#include "vpx_ports/asm_offsets.h"
+
+BEGIN
+
+END
+
+/* add asserts for any offset that is not supported by assembly code */
+/* add asserts for any size that is not supported by assembly code */
diff --git a/vp9/common/vp9_blockd.c b/vp9/common/vp9_blockd.c
new file mode 100644
index 0000000..60aedf8
--- /dev/null
+++ b/vp9/common/vp9_blockd.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/common/vp9_blockd.h"
+#include "vpx_mem/vpx_mem.h"
+
+
+const unsigned char vp9_block2left[25] = {
+ 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
+};
+const unsigned char vp9_block2above[25] = {
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
+};
+
+const unsigned char vp9_block2left_8x8[25] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8
+};
+const unsigned char vp9_block2above_8x8[25] = {
+ 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8
+};
+
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
new file mode 100644
index 0000000..fd20e09
--- /dev/null
+++ b/vp9/common/vp9_blockd.h
@@ -0,0 +1,653 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_COMMON_VP9_BLOCKD_H_
+#define VP9_COMMON_VP9_BLOCKD_H_
+
+void vpx_log(const char *format, ...);
+
+#include "./vpx_config.h"
+#include "vpx_scale/yv12config.h"
+#include "vp9/common/vp9_mv.h"
+#include "vp9/common/vp9_treecoder.h"
+#include "vp9/common/vp9_subpixel.h"
+#include "vpx_ports/mem.h"
+#include "vp9/common/vp9_common.h"
+
+#define TRUE 1
+#define FALSE 0
+
+// #define MODE_STATS
+
+/*#define DCPRED 1*/
+#define DCPREDSIMTHRESH 0
+#define DCPREDCNTTHRESH 3
+
+#define MB_FEATURE_TREE_PROBS 3
+#define PREDICTION_PROBS 3
+
+#define MBSKIP_CONTEXTS 3
+
+#define MAX_MB_SEGMENTS 4
+
+#define MAX_REF_LF_DELTAS 4
+#define MAX_MODE_LF_DELTAS 4
+
+/* Segment Feature Masks */
+#define SEGMENT_DELTADATA 0
+#define SEGMENT_ABSDATA 1
+#define MAX_MV_REFS 9
+
+typedef struct {
+ int r, c;
+} POS;
+
+typedef enum PlaneType {
+ PLANE_TYPE_Y_NO_DC = 0,
+ PLANE_TYPE_Y2,
+ PLANE_TYPE_UV,
+ PLANE_TYPE_Y_WITH_DC,
+} PLANE_TYPE;
+
+typedef char ENTROPY_CONTEXT;
+typedef struct {
+ ENTROPY_CONTEXT y1[4];
+ ENTROPY_CONTEXT u[2];
+ ENTROPY_CONTEXT v[2];
+ ENTROPY_CONTEXT y2;
+} ENTROPY_CONTEXT_PLANES;
+
+extern const unsigned char vp9_block2left[25];
+extern const unsigned char vp9_block2above[25];
+extern const unsigned char vp9_block2left_8x8[25];
+extern const unsigned char vp9_block2above_8x8[25];
+
+#define VP9_COMBINEENTROPYCONTEXTS( Dest, A, B) \
+ Dest = ((A)!=0) + ((B)!=0);
+
+typedef enum {
+ KEY_FRAME = 0,
+ INTER_FRAME = 1
+} FRAME_TYPE;
+
+typedef enum
+{
+ SIXTAP = 0,
+ BILINEAR = 1,
+ EIGHTTAP = 2,
+ EIGHTTAP_SHARP = 3,
+ SWITCHABLE /* should be the last one */
+} INTERPOLATIONFILTERTYPE;
+
+typedef enum
+{
+ DC_PRED, /* average of above and left pixels */
+ V_PRED, /* vertical prediction */
+ H_PRED, /* horizontal prediction */
+ D45_PRED, /* Directional 45 deg prediction [anti-clockwise from 0 deg hor] */
+ D135_PRED, /* Directional 135 deg prediction [anti-clockwise from 0 deg hor] */
+ D117_PRED, /* Directional 112 deg prediction [anti-clockwise from 0 deg hor] */
+ D153_PRED, /* Directional 157 deg prediction [anti-clockwise from 0 deg hor] */
+ D27_PRED, /* Directional 22 deg prediction [anti-clockwise from 0 deg hor] */
+ D63_PRED, /* Directional 67 deg prediction [anti-clockwise from 0 deg hor] */
+ TM_PRED, /* Truemotion prediction */
+ I8X8_PRED, /* 8x8 based prediction, each 8x8 has its own prediction mode */
+ B_PRED, /* block based prediction, each block has its own prediction mode */
+
+ NEARESTMV,
+ NEARMV,
+ ZEROMV,
+ NEWMV,
+ SPLITMV,
+
+ MB_MODE_COUNT
+} MB_PREDICTION_MODE;
+
+// Segment level features.
+typedef enum {
+ SEG_LVL_ALT_Q = 0, // Use alternate Quantizer ....
+ SEG_LVL_ALT_LF = 1, // Use alternate loop filter value...
+ SEG_LVL_REF_FRAME = 2, // Optional Segment reference frame
+ SEG_LVL_MODE = 3, // Optional Segment mode
+ SEG_LVL_EOB = 4, // EOB end stop marker.
+ SEG_LVL_TRANSFORM = 5, // Block transform size.
+ SEG_LVL_MAX = 6 // Number of MB level features supported
+
+} SEG_LVL_FEATURES;
+
+// Segment level features.
+typedef enum {
+ TX_4X4, // 4x4 dct transform
+ TX_8X8, // 8x8 dct transform
+ TX_16X16, // 16x16 dct transform
+ TX_SIZE_MAX // Number of different transforms available
+} TX_SIZE;
+
+typedef enum {
+ DCT_DCT = 0, // DCT in both horizontal and vertical
+ ADST_DCT = 1, // ADST in vertical, DCT in horizontal
+ DCT_ADST = 2, // DCT in vertical, ADST in horizontal
+ ADST_ADST = 3 // ADST in both directions
+} TX_TYPE;
+
+#define VP9_YMODES (B_PRED + 1)
+#define VP9_UV_MODES (TM_PRED + 1)
+#define VP9_I8X8_MODES (TM_PRED + 1)
+#define VP9_I32X32_MODES (TM_PRED + 1)
+
+#define VP9_MVREFS (1 + SPLITMV - NEARESTMV)
+
+#if CONFIG_LOSSLESS
+#define WHT_UPSCALE_FACTOR 3
+#define Y2_WHT_UPSCALE_FACTOR 2
+#endif
+
+typedef enum {
+ B_DC_PRED, /* average of above and left pixels */
+ B_TM_PRED,
+
+ B_VE_PRED, /* vertical prediction */
+ B_HE_PRED, /* horizontal prediction */
+
+ B_LD_PRED,
+ B_RD_PRED,
+
+ B_VR_PRED,
+ B_VL_PRED,
+ B_HD_PRED,
+ B_HU_PRED,
+#if CONFIG_NEWBINTRAMODES
+ B_CONTEXT_PRED,
+#endif
+
+ LEFT4X4,
+ ABOVE4X4,
+ ZERO4X4,
+ NEW4X4,
+
+ B_MODE_COUNT
+} B_PREDICTION_MODE;
+
+#define VP9_BINTRAMODES (LEFT4X4)
+#define VP9_SUBMVREFS (1 + NEW4X4 - LEFT4X4)
+
+#if CONFIG_NEWBINTRAMODES
+/* The number of B_PRED intra modes that are replaced by B_CONTEXT_PRED */
+#define CONTEXT_PRED_REPLACEMENTS 0
+#define VP9_KF_BINTRAMODES (VP9_BINTRAMODES - 1)
+#define VP9_NKF_BINTRAMODES (VP9_BINTRAMODES - CONTEXT_PRED_REPLACEMENTS)
+#else
+#define VP9_KF_BINTRAMODES (VP9_BINTRAMODES) /* 10 */
+#define VP9_NKF_BINTRAMODES (VP9_BINTRAMODES) /* 10 */
+#endif
+
+typedef enum {
+ PARTITIONING_16X8 = 0,
+ PARTITIONING_8X16,
+ PARTITIONING_8X8,
+ PARTITIONING_4X4,
+ NB_PARTITIONINGS,
+} SPLITMV_PARTITIONING_TYPE;
+
+/* For keyframes, intra block modes are predicted by the (already decoded)
+ modes for the Y blocks to the left and above us; for interframes, there
+ is a single probability table. */
+
+union b_mode_info {
+ struct {
+ B_PREDICTION_MODE first;
+ TX_TYPE tx_type;
+#if CONFIG_COMP_INTRA_PRED
+ B_PREDICTION_MODE second;
+#endif
+#if CONFIG_NEWBINTRAMODES
+ B_PREDICTION_MODE context;
+#endif
+ } as_mode;
+ struct {
+ int_mv first;
+ int_mv second;
+ } as_mv;
+};
+
+typedef enum {
+ NONE = -1,
+ INTRA_FRAME = 0,
+ LAST_FRAME = 1,
+ GOLDEN_FRAME = 2,
+ ALTREF_FRAME = 3,
+ MAX_REF_FRAMES = 4
+} MV_REFERENCE_FRAME;
+
+typedef struct {
+ MB_PREDICTION_MODE mode, uv_mode;
+#if CONFIG_COMP_INTRA_PRED
+ MB_PREDICTION_MODE second_mode, second_uv_mode;
+#endif
+#if CONFIG_COMP_INTERINTRA_PRED
+ MB_PREDICTION_MODE interintra_mode, interintra_uv_mode;
+#endif
+ MV_REFERENCE_FRAME ref_frame, second_ref_frame;
+ TX_SIZE txfm_size;
+ int_mv mv[2]; // for each reference frame used
+ int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REFS];
+ int_mv best_mv, best_second_mv;
+#if CONFIG_NEW_MVREF
+ int best_index, best_second_index;
+#endif
+
+ int mb_mode_context[MAX_REF_FRAMES];
+
+ SPLITMV_PARTITIONING_TYPE partitioning;
+ unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
+ unsigned char need_to_clamp_mvs;
+ unsigned char need_to_clamp_secondmv;
+ unsigned char segment_id; /* Which set of segmentation parameters should be used for this MB */
+
+ // Flags used for prediction status of various bistream signals
+ unsigned char seg_id_predicted;
+ unsigned char ref_predicted;
+
+ // Indicates if the mb is part of the image (1) vs border (0)
+ // This can be useful in determining whether the MB provides
+ // a valid predictor
+ unsigned char mb_in_image;
+
+#if CONFIG_PRED_FILTER
+ // Flag to turn prediction signal filter on(1)/off(0 ) at the MB level
+ unsigned int pred_filter_enabled;
+#endif
+ INTERPOLATIONFILTERTYPE interp_filter;
+
+#if CONFIG_SUPERBLOCKS
+ // FIXME need a SB array of 4 MB_MODE_INFOs that
+ // only needs one encoded_as_sb.
+ unsigned char encoded_as_sb;
+#endif
+} MB_MODE_INFO;
+
+typedef struct {
+ MB_MODE_INFO mbmi;
+ union b_mode_info bmi[16];
+} MODE_INFO;
+
+typedef struct blockd {
+ short *qcoeff;
+ short *dqcoeff;
+ unsigned char *predictor;
+ short *diff;
+ short *dequant;
+
+ /* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */
+ unsigned char **base_pre;
+ unsigned char **base_second_pre;
+ int pre;
+ int pre_stride;
+
+ unsigned char **base_dst;
+ int dst;
+ int dst_stride;
+
+ int eob;
+
+ union b_mode_info bmi;
+} BLOCKD;
+
+typedef struct macroblockd {
+ DECLARE_ALIGNED(16, short, diff[400]); /* from idct diff */
+ DECLARE_ALIGNED(16, unsigned char, predictor[384]);
+ DECLARE_ALIGNED(16, short, qcoeff[400]);
+ DECLARE_ALIGNED(16, short, dqcoeff[400]);
+ DECLARE_ALIGNED(16, unsigned short, eobs[25]);
+
+ /* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */
+ BLOCKD block[25];
+ int fullpixel_mask;
+
+ YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */
+ struct {
+ uint8_t *y_buffer, *u_buffer, *v_buffer;
+ } second_pre;
+ YV12_BUFFER_CONFIG dst;
+
+ MODE_INFO *prev_mode_info_context;
+ MODE_INFO *mode_info_context;
+ int mode_info_stride;
+
+ FRAME_TYPE frame_type;
+
+ int up_available;
+ int left_available;
+
+ /* Y,U,V,Y2 */
+ ENTROPY_CONTEXT_PLANES *above_context;
+ ENTROPY_CONTEXT_PLANES *left_context;
+
+ /* 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active. */
+ unsigned char segmentation_enabled;
+
+ /* 0 (do not update) 1 (update) the macroblock segmentation map. */
+ unsigned char update_mb_segmentation_map;
+
+ /* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
+ unsigned char update_mb_segmentation_data;
+
+ /* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
+ unsigned char mb_segment_abs_delta;
+
+ /* Per frame flags that define which MB level features (such as quantizer or loop filter level) */
+ /* are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO */
+
+ // Probability Tree used to code Segment number
+ vp9_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS];
+
+#if CONFIG_NEW_MVREF
+ vp9_prob mb_mv_ref_id_probs[MAX_REF_FRAMES][3];
+#endif
+
+ // Segment features
+ signed char segment_feature_data[MAX_MB_SEGMENTS][SEG_LVL_MAX];
+ unsigned int segment_feature_mask[MAX_MB_SEGMENTS];
+
+ /* mode_based Loop filter adjustment */
+ unsigned char mode_ref_lf_delta_enabled;
+ unsigned char mode_ref_lf_delta_update;
+
+ /* Delta values have the range +/- MAX_LOOP_FILTER */
+ signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */
+ signed char ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */
+ signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */
+ signed char mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */
+
+ /* Distance of MB away from frame edges */
+ int mb_to_left_edge;
+ int mb_to_right_edge;
+ int mb_to_top_edge;
+ int mb_to_bottom_edge;
+
+ unsigned int frames_since_golden;
+ unsigned int frames_till_alt_ref_frame;
+
+ /* Inverse transform function pointers. */
+ void (*inv_xform4x4_1_x8)(short *input, short *output, int pitch);
+ void (*inv_xform4x4_x8)(short *input, short *output, int pitch);
+ void (*inv_walsh4x4_1)(short *in, short *out);
+ void (*inv_walsh4x4_lossless)(short *in, short *out);
+
+
+ vp9_subpix_fn_t subpixel_predict;
+ vp9_subpix_fn_t subpixel_predict8x4;
+ vp9_subpix_fn_t subpixel_predict8x8;
+ vp9_subpix_fn_t subpixel_predict16x16;
+ vp9_subpix_fn_t subpixel_predict_avg;
+ vp9_subpix_fn_t subpixel_predict_avg8x4;
+ vp9_subpix_fn_t subpixel_predict_avg8x8;
+ vp9_subpix_fn_t subpixel_predict_avg16x16;
+ int allow_high_precision_mv;
+
+ int corrupted;
+
+#if !CONFIG_SUPERBLOCKS && (ARCH_X86 || ARCH_X86_64)
+ /* This is an intermediate buffer currently used in sub-pixel motion search
+ * to keep a copy of the reference area. This buffer can be used for other
+ * purpose.
+ */
+ DECLARE_ALIGNED(32, unsigned char, y_buf[22 * 32]);
+#endif
+
+ int mb_index; // Index of the MB in the SB (0..3)
+ int q_index;
+
+} MACROBLOCKD;
+
+#define ACTIVE_HT 110 // quantization stepsize threshold
+
+#define ACTIVE_HT8 300
+
+#define ACTIVE_HT16 300
+
+// convert MB_PREDICTION_MODE to B_PREDICTION_MODE
+static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) {
+ B_PREDICTION_MODE b_mode;
+ switch (mode) {
+ case DC_PRED:
+ b_mode = B_DC_PRED;
+ break;
+ case V_PRED:
+ b_mode = B_VE_PRED;
+ break;
+ case H_PRED:
+ b_mode = B_HE_PRED;
+ break;
+ case TM_PRED:
+ b_mode = B_TM_PRED;
+ break;
+ case D45_PRED:
+ b_mode = B_LD_PRED;
+ break;
+ case D135_PRED:
+ b_mode = B_RD_PRED;
+ break;
+ case D117_PRED:
+ b_mode = B_VR_PRED;
+ break;
+ case D153_PRED:
+ b_mode = B_HD_PRED;
+ break;
+ case D27_PRED:
+ b_mode = B_HU_PRED;
+ break;
+ case D63_PRED:
+ b_mode = B_VL_PRED;
+ break;
+ default :
+ // for debug purpose, to be removed after full testing
+ assert(0);
+ break;
+ }
+ return b_mode;
+}
+
+// transform mapping
+static TX_TYPE txfm_map(B_PREDICTION_MODE bmode) {
+ // map transform type
+ TX_TYPE tx_type;
+ switch (bmode) {
+ case B_TM_PRED :
+ case B_RD_PRED :
+ tx_type = ADST_ADST;
+ break;
+
+ case B_VE_PRED :
+ case B_VR_PRED :
+ tx_type = ADST_DCT;
+ break;
+
+ case B_HE_PRED :
+ case B_HD_PRED :
+ case B_HU_PRED :
+ tx_type = DCT_ADST;
+ break;
+
+#if CONFIG_NEWBINTRAMODES
+ case B_CONTEXT_PRED:
+ assert(0);
+ break;
+#endif
+
+ default :
+ tx_type = DCT_DCT;
+ break;
+ }
+ return tx_type;
+}
+
+#define USE_ADST_FOR_I16X16_8X8 0
+#define USE_ADST_FOR_I16X16_4X4 0
+#define USE_ADST_FOR_I8X8_4X4 1
+#define USE_ADST_PERIPHERY_ONLY 1
+
+static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, const BLOCKD *b) {
+ // TODO(debargha): explore different patterns for ADST usage when blocksize
+ // is smaller than the prediction size
+ TX_TYPE tx_type = DCT_DCT;
+ int ib = (int)(b - xd->block);
+ if (ib >= 16)
+ return tx_type;
+#if CONFIG_SUPERBLOCKS
+ // TODO(rbultje, debargha): Explore ADST usage for superblocks
+ if (xd->mode_info_context->mbmi.encoded_as_sb)
+ return tx_type;
+#endif
+ if (xd->mode_info_context->mbmi.mode == B_PRED &&
+ xd->q_index < ACTIVE_HT) {
+ tx_type = txfm_map(
+#if CONFIG_NEWBINTRAMODES
+ b->bmi.as_mode.first == B_CONTEXT_PRED ? b->bmi.as_mode.context :
+#endif
+ b->bmi.as_mode.first);
+ } else if (xd->mode_info_context->mbmi.mode == I8X8_PRED &&
+ xd->q_index < ACTIVE_HT) {
+#if USE_ADST_FOR_I8X8_4X4
+#if USE_ADST_PERIPHERY_ONLY
+ // Use ADST for periphery blocks only
+ int ic = (ib & 10);
+ b += ic - ib;
+ tx_type = (ic != 10) ?
+ txfm_map(pred_mode_conv((MB_PREDICTION_MODE)b->bmi.as_mode.first)) :
+ DCT_DCT;
+#else
+ // Use ADST
+ tx_type = txfm_map(pred_mode_conv(
+ (MB_PREDICTION_MODE)b->bmi.as_mode.first));
+#endif
+#else
+ // Use 2D DCT
+ tx_type = DCT_DCT;
+#endif
+ } else if (xd->mode_info_context->mbmi.mode < I8X8_PRED &&
+ xd->q_index < ACTIVE_HT) {
+#if USE_ADST_FOR_I16X16_4X4
+#if USE_ADST_PERIPHERY_ONLY
+ // Use ADST for periphery blocks only
+ tx_type = (ib < 4 || ((ib & 3) == 0)) ?
+ txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode)) : DCT_DCT;
+#else
+ // Use ADST
+ tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode));
+#endif
+#else
+ // Use 2D DCT
+ tx_type = DCT_DCT;
+#endif
+ }
+ return tx_type;
+}
+
+static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, const BLOCKD *b) {
+ // TODO(debargha): explore different patterns for ADST usage when blocksize
+ // is smaller than the prediction size
+ TX_TYPE tx_type = DCT_DCT;
+ int ib = (int)(b - xd->block);
+ if (ib >= 16)
+ return tx_type;
+#if CONFIG_SUPERBLOCKS
+ // TODO(rbultje, debargha): Explore ADST usage for superblocks
+ if (xd->mode_info_context->mbmi.encoded_as_sb)
+ return tx_type;
+#endif
+ if (xd->mode_info_context->mbmi.mode == I8X8_PRED &&
+ xd->q_index < ACTIVE_HT8) {
+ // TODO(rbultje): MB_PREDICTION_MODE / B_PREDICTION_MODE should be merged
+ // or the relationship otherwise modified to address this type conversion.
+ tx_type = txfm_map(pred_mode_conv(
+ (MB_PREDICTION_MODE)b->bmi.as_mode.first));
+ } else if (xd->mode_info_context->mbmi.mode < I8X8_PRED &&
+ xd->q_index < ACTIVE_HT8) {
+#if USE_ADST_FOR_I8X8_4X4
+#if USE_ADST_PERIPHERY_ONLY
+ // Use ADST for periphery blocks only
+ tx_type = (ib != 10) ?
+ txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode)) : DCT_DCT;
+#else
+ // Use ADST
+ tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode));
+#endif
+#else
+ // Use 2D DCT
+ tx_type = DCT_DCT;
+#endif
+ }
+ return tx_type;
+}
+
+static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, const BLOCKD *b) {
+ TX_TYPE tx_type = DCT_DCT;
+ int ib = (int)(b - xd->block);
+ if (ib >= 16)
+ return tx_type;
+#if CONFIG_SUPERBLOCKS
+ // TODO(rbultje, debargha): Explore ADST usage for superblocks
+ if (xd->mode_info_context->mbmi.encoded_as_sb)
+ return tx_type;
+#endif
+ if (xd->mode_info_context->mbmi.mode < I8X8_PRED &&
+ xd->q_index < ACTIVE_HT16) {
+ tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode));
+ }
+ return tx_type;
+}
+
+static TX_TYPE get_tx_type(const MACROBLOCKD *xd, const BLOCKD *b) {
+ TX_TYPE tx_type = DCT_DCT;
+ int ib = (int)(b - xd->block);
+ if (ib >= 16)
+ return tx_type;
+ if (xd->mode_info_context->mbmi.txfm_size == TX_16X16) {
+ tx_type = get_tx_type_16x16(xd, b);
+ }
+ if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
+ ib = (ib & 8) + ((ib & 4) >> 1);
+ tx_type = get_tx_type_8x8(xd, &xd->block[ib]);
+ }
+ if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) {
+ tx_type = get_tx_type_4x4(xd, b);
+ }
+ return tx_type;
+}
+
+static int get_2nd_order_usage(const MACROBLOCKD *xd) {
+ int has_2nd_order = (xd->mode_info_context->mbmi.mode != SPLITMV &&
+ xd->mode_info_context->mbmi.mode != I8X8_PRED &&
+ xd->mode_info_context->mbmi.mode != B_PRED &&
+ xd->mode_info_context->mbmi.txfm_size != TX_16X16);
+ if (has_2nd_order)
+ has_2nd_order = (get_tx_type(xd, xd->block) == DCT_DCT);
+ return has_2nd_order;
+}
+
+extern void vp9_build_block_doffsets(MACROBLOCKD *xd);
+extern void vp9_setup_block_dptrs(MACROBLOCKD *xd);
+
+static void update_blockd_bmi(MACROBLOCKD *xd) {
+ int i;
+ int is_4x4;
+ is_4x4 = (xd->mode_info_context->mbmi.mode == SPLITMV) ||
+ (xd->mode_info_context->mbmi.mode == I8X8_PRED) ||
+ (xd->mode_info_context->mbmi.mode == B_PRED);
+
+ if (is_4x4) {
+ for (i = 0; i < 16; i++) {
+ xd->block[i].bmi = xd->mode_info_context->bmi[i];
+ }
+ }
+}
+#endif /* __INC_BLOCKD_H */
diff --git a/vp9/common/vp9_coefupdateprobs.h b/vp9/common/vp9_coefupdateprobs.h
new file mode 100644
index 0000000..cd7eabf
--- /dev/null
+++ b/vp9/common/vp9_coefupdateprobs.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/* Update probabilities for the nodes in the token entropy tree.
+ Generated file included by vp9_entropy.c */
+#define COEF_UPDATE_PROB 252
+#define COEF_UPDATE_PROB_8X8 252
+#define COEF_UPDATE_PROB_16X16 252
diff --git a/vp9/common/vp9_common.h b/vp9/common/vp9_common.h
new file mode 100644
index 0000000..893a3f8
--- /dev/null
+++ b/vp9/common/vp9_common.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_COMMON_VP9_COMMON_H_
+#define VP9_COMMON_VP9_COMMON_H_
+
+#include <assert.h>
+#include "vpx_config.h"
+/* Interface header for common constant data structures and lookup tables */
+
+#include "vpx_mem/vpx_mem.h"
+
+#include "vp9/common/vp9_common_types.h"
+
+/* Only need this for fixed-size arrays, for structs just assign. */
+
+#define vp9_copy( Dest, Src) { \
+ assert( sizeof( Dest) == sizeof( Src)); \
+ vpx_memcpy( Dest, Src, sizeof( Src)); \
+ }
+
+/* Use this for variably-sized arrays. */
+
+#define vp9_copy_array( Dest, Src, N) { \
+ assert( sizeof( *Dest) == sizeof( *Src)); \
+ vpx_memcpy( Dest, Src, N * sizeof( *Src)); \
+ }
+
+#define vp9_zero( Dest) vpx_memset( &Dest, 0, sizeof( Dest));
+
+#define vp9_zero_array( Dest, N) vpx_memset( Dest, 0, N * sizeof( *Dest));
+
+#endif /* common_h */
diff --git a/vp9/common/vp9_common_types.h b/vp9/common/vp9_common_types.h
new file mode 100644
index 0000000..8982898
--- /dev/null
+++ b/vp9/common/vp9_common_types.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_COMMON_VP9_COMMON_TYPES_H_
+#define VP9_COMMON_VP9_COMMON_TYPES_H_
+
+#define TRUE 1
+#define FALSE 0
+
+#endif
diff --git a/vp9/common/vp9_context.c b/vp9/common/vp9_context.c
new file mode 100644
index 0000000..271b455
--- /dev/null
+++ b/vp9/common/vp9_context.c
@@ -0,0 +1,397 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/common/vp9_entropy.h"
+
+/* *** GENERATED FILE: DO NOT EDIT *** */
+
+#if 0
+int Contexts[vp8_coef_counter_dimen];
+
+const int default_contexts[vp8_coef_counter_dimen] = {
+ {
+ // Block Type ( 0 )
+ {
+ // Coeff Band ( 0 )
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ },
+ {
+ // Coeff Band ( 1 )
+ {30190, 26544, 225, 24, 4, 0, 0, 0, 0, 0, 0, 4171593},
+ {26846, 25157, 1241, 130, 26, 6, 1, 0, 0, 0, 0, 149987},
+ {10484, 9538, 1006, 160, 36, 18, 0, 0, 0, 0, 0, 15104},
+ },
+ {
+ // Coeff Band ( 2 )
+ {25842, 40456, 1126, 83, 11, 2, 0, 0, 0, 0, 0, 0},
+ {9338, 8010, 512, 73, 7, 3, 2, 0, 0, 0, 0, 43294},
+ {1047, 751, 149, 31, 13, 6, 1, 0, 0, 0, 0, 879},
+ },
+ {
+ // Coeff Band ( 3 )
+ {26136, 9826, 252, 13, 0, 0, 0, 0, 0, 0, 0, 0},
+ {8134, 5574, 191, 14, 2, 0, 0, 0, 0, 0, 0, 35302},
+ { 605, 677, 116, 9, 1, 0, 0, 0, 0, 0, 0, 611},
+ },
+ {
+ // Coeff Band ( 4 )
+ {10263, 15463, 283, 17, 0, 0, 0, 0, 0, 0, 0, 0},
+ {2773, 2191, 128, 9, 2, 2, 0, 0, 0, 0, 0, 10073},
+ { 134, 125, 32, 4, 0, 2, 0, 0, 0, 0, 0, 50},
+ },
+ {
+ // Coeff Band ( 5 )
+ {10483, 2663, 23, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+ {2137, 1251, 27, 1, 1, 0, 0, 0, 0, 0, 0, 14362},
+ { 116, 156, 14, 2, 1, 0, 0, 0, 0, 0, 0, 190},
+ },
+ {
+ // Coeff Band ( 6 )
+ {40977, 27614, 412, 28, 0, 0, 0, 0, 0, 0, 0, 0},
+ {6113, 5213, 261, 22, 3, 0, 0, 0, 0, 0, 0, 26164},
+ { 382, 312, 50, 14, 2, 0, 0, 0, 0, 0, 0, 345},
+ },
+ {
+ // Coeff Band ( 7 )
+ { 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 319},
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8},
+ },
+ },
+ {
+ // Block Type ( 1 )
+ {
+ // Coeff Band ( 0 )
+ {3268, 19382, 1043, 250, 93, 82, 49, 26, 17, 8, 25, 82289},
+ {8758, 32110, 5436, 1832, 827, 668, 420, 153, 24, 0, 3, 52914},
+ {9337, 23725, 8487, 3954, 2107, 1836, 1069, 399, 59, 0, 0, 18620},
+ },
+ {
+ // Coeff Band ( 1 )
+ {12419, 8420, 452, 62, 9, 1, 0, 0, 0, 0, 0, 0},
+ {11715, 8705, 693, 92, 15, 7, 2, 0, 0, 0, 0, 53988},
+ {7603, 8585, 2306, 778, 270, 145, 39, 5, 0, 0, 0, 9136},
+ },
+ {
+ // Coeff Band ( 2 )
+ {15938, 14335, 1207, 184, 55, 13, 4, 1, 0, 0, 0, 0},
+ {7415, 6829, 1138, 244, 71, 26, 7, 0, 0, 0, 0, 9980},
+ {1580, 1824, 655, 241, 89, 46, 10, 2, 0, 0, 0, 429},
+ },
+ {
+ // Coeff Band ( 3 )
+ {19453, 5260, 201, 19, 0, 0, 0, 0, 0, 0, 0, 0},
+ {9173, 3758, 213, 22, 1, 1, 0, 0, 0, 0, 0, 9820},
+ {1689, 1277, 276, 51, 17, 4, 0, 0, 0, 0, 0, 679},
+ },
+ {
+ // Coeff Band ( 4 )
+ {12076, 10667, 620, 85, 19, 9, 5, 0, 0, 0, 0, 0},
+ {4665, 3625, 423, 55, 19, 9, 0, 0, 0, 0, 0, 5127},
+ { 415, 440, 143, 34, 20, 7, 2, 0, 0, 0, 0, 101},
+ },
+ {
+ // Coeff Band ( 5 )
+ {12183, 4846, 115, 11, 1, 0, 0, 0, 0, 0, 0, 0},
+ {4226, 3149, 177, 21, 2, 0, 0, 0, 0, 0, 0, 7157},
+ { 375, 621, 189, 51, 11, 4, 1, 0, 0, 0, 0, 198},
+ },
+ {
+ // Coeff Band ( 6 )
+ {61658, 37743, 1203, 94, 10, 3, 0, 0, 0, 0, 0, 0},
+ {15514, 11563, 903, 111, 14, 5, 0, 0, 0, 0, 0, 25195},
+ { 929, 1077, 291, 78, 14, 7, 1, 0, 0, 0, 0, 507},
+ },
+ {
+ // Coeff Band ( 7 )
+ { 0, 990, 15, 3, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 412, 13, 0, 0, 0, 0, 0, 0, 0, 0, 1641},
+ { 0, 18, 7, 1, 0, 0, 0, 0, 0, 0, 0, 30},
+ },
+ },
+ {
+ // Block Type ( 2 )
+ {
+ // Coeff Band ( 0 )
+ { 953, 24519, 628, 120, 28, 12, 4, 0, 0, 0, 0, 2248798},
+ {1525, 25654, 2647, 617, 239, 143, 42, 5, 0, 0, 0, 66837},
+ {1180, 11011, 3001, 1237, 532, 448, 239, 54, 5, 0, 0, 7122},
+ },
+ {
+ // Coeff Band ( 1 )
+ {1356, 2220, 67, 10, 4, 1, 0, 0, 0, 0, 0, 0},
+ {1450, 2544, 102, 18, 4, 3, 0, 0, 0, 0, 0, 57063},
+ {1182, 2110, 470, 130, 41, 21, 0, 0, 0, 0, 0, 6047},
+ },
+ {
+ // Coeff Band ( 2 )
+ { 370, 3378, 200, 30, 5, 4, 1, 0, 0, 0, 0, 0},
+ { 293, 1006, 131, 29, 11, 0, 0, 0, 0, 0, 0, 5404},
+ { 114, 387, 98, 23, 4, 8, 1, 0, 0, 0, 0, 236},
+ },
+ {
+ // Coeff Band ( 3 )
+ { 579, 194, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 395, 213, 5, 1, 0, 0, 0, 0, 0, 0, 0, 4157},
+ { 119, 122, 4, 0, 0, 0, 0, 0, 0, 0, 0, 300},
+ },
+ {
+ // Coeff Band ( 4 )
+ { 38, 557, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 21, 114, 12, 1, 0, 0, 0, 0, 0, 0, 0, 427},
+ { 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7},
+ },
+ {
+ // Coeff Band ( 5 )
+ { 52, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 18, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 652},
+ { 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30},
+ },
+ {
+ // Coeff Band ( 6 )
+ { 640, 569, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 25, 77, 2, 0, 0, 0, 0, 0, 0, 0, 0, 517},
+ { 4, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3},
+ },
+ {
+ // Coeff Band ( 7 )
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ },
+ },
+ {
+ // Block Type ( 3 )
+ {
+ // Coeff Band ( 0 )
+ {2506, 20161, 2707, 767, 261, 178, 107, 30, 14, 3, 0, 100694},
+ {8806, 36478, 8817, 3268, 1280, 850, 401, 114, 42, 0, 0, 58572},
+ {11003, 27214, 11798, 5716, 2482, 2072, 1048, 175, 32, 0, 0, 19284},
+ },
+ {
+ // Coeff Band ( 1 )
+ {9738, 11313, 959, 205, 70, 18, 11, 1, 0, 0, 0, 0},
+ {12628, 15085, 1507, 273, 52, 19, 9, 0, 0, 0, 0, 54280},
+ {10701, 15846, 5561, 1926, 813, 570, 249, 36, 0, 0, 0, 6460},
+ },
+ {
+ // Coeff Band ( 2 )
+ {6781, 22539, 2784, 634, 182, 123, 20, 4, 0, 0, 0, 0},
+ {6263, 11544, 2649, 790, 259, 168, 27, 5, 0, 0, 0, 20539},
+ {3109, 4075, 2031, 896, 457, 386, 158, 29, 0, 0, 0, 1138},
+ },
+ {
+ // Coeff Band ( 3 )
+ {11515, 4079, 465, 73, 5, 14, 2, 0, 0, 0, 0, 0},
+ {9361, 5834, 650, 96, 24, 8, 4, 0, 0, 0, 0, 22181},
+ {4343, 3974, 1360, 415, 132, 96, 14, 1, 0, 0, 0, 1267},
+ },
+ {
+ // Coeff Band ( 4 )
+ {4787, 9297, 823, 168, 44, 12, 4, 0, 0, 0, 0, 0},
+ {3619, 4472, 719, 198, 60, 31, 3, 0, 0, 0, 0, 8401},
+ {1157, 1175, 483, 182, 88, 31, 8, 0, 0, 0, 0, 268},
+ },
+ {
+ // Coeff Band ( 5 )
+ {8299, 1226, 32, 5, 1, 0, 0, 0, 0, 0, 0, 0},
+ {3502, 1568, 57, 4, 1, 1, 0, 0, 0, 0, 0, 9811},
+ {1055, 1070, 166, 29, 6, 1, 0, 0, 0, 0, 0, 527},
+ },
+ {
+ // Coeff Band ( 6 )
+ {27414, 27927, 1989, 347, 69, 26, 0, 0, 0, 0, 0, 0},
+ {5876, 10074, 1574, 341, 91, 24, 4, 0, 0, 0, 0, 21954},
+ {1571, 2171, 778, 324, 124, 65, 16, 0, 0, 0, 0, 979},
+ },
+ {
+ // Coeff Band ( 7 )
+ { 0, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 459},
+ { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13},
+ },
+ },
+};
+
+// Update probabilities for the nodes in the token entropy tree.
+const vp9_prob tree_update_probs[vp9_coef_tree_dimen] = {
+ {
+ {
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255, },
+ {250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255, },
+ {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ },
+ {
+ {
+ {217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255, },
+ {234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255, },
+ },
+ {
+ {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ },
+ {
+ {
+ {186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255, },
+ {251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255, },
+ },
+ {
+ {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ },
+ {
+ {
+ {248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255, },
+ {248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ {
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+ },
+ },
+};
+#endif
diff --git a/vp9/common/vp9_debugmodes.c b/vp9/common/vp9_debugmodes.c
new file mode 100644
index 0000000..76318d2
--- /dev/null
+++ b/vp9/common/vp9_debugmodes.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdio.h>
+#include "vp9/common/vp9_blockd.h"
+
+void vp9_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols,
+ int frame) {
+ int mb_row;
+ int mb_col;
+ int mb_index = 0;
+ FILE *mvs = fopen("mvs.stt", "a");
+
+ /* print out the macroblock Y modes */
+ mb_index = 0;
+ fprintf(mvs, "Mb Modes for Frame %d\n", frame);
+
+ for (mb_row = 0; mb_row < rows; mb_row++) {
+ for (mb_col = 0; mb_col < cols; mb_col++) {
+
+ fprintf(mvs, "%2d ", mi[mb_index].mbmi.mode);
+
+ mb_index++;
+ }
+
+ fprintf(mvs, "\n");
+ mb_index++;
+ }
+
+ fprintf(mvs, "\n");
+
+ mb_index = 0;
+ fprintf(mvs, "Mb mv ref for Frame %d\n", frame);
+
+ for (mb_row = 0; mb_row < rows; mb_row++) {
+ for (mb_col = 0; mb_col < cols; mb_col++) {
+
+ fprintf(mvs, "%2d ", mi[mb_index].mbmi.ref_frame);
+
+ mb_index++;
+ }
+
+ fprintf(mvs, "\n");
+ mb_index++;
+ }
+
+ fprintf(mvs, "\n");
+
+ /* print out the macroblock UV modes */
+ mb_index = 0;
+ fprintf(mvs, "UV Modes for Frame %d\n", frame);
+
+ for (mb_row = 0; mb_row < rows; mb_row++) {
+ for (mb_col = 0; mb_col < cols; mb_col++) {
+
+ fprintf(mvs, "%2d ", mi[mb_index].mbmi.uv_mode);
+
+ mb_index++;
+ }
+
+ mb_index++;
+ fprintf(mvs, "\n");
+ }
+
+ fprintf(mvs, "\n");
+
+ /* print out the block modes */
+ mb_index = 0;
+ fprintf(mvs, "Mbs for Frame %d\n", frame);
+ {
+ int b_row;
+
+ for (b_row = 0; b_row < 4 * rows; b_row++) {
+ int b_col;
+ int bindex;
+
+ for (b_col = 0; b_col < 4 * cols; b_col++) {
+ mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2);
+ bindex = (b_row & 3) * 4 + (b_col & 3);
+
+ if (mi[mb_index].mbmi.mode == B_PRED) {
+ fprintf(mvs, "%2d ", mi[mb_index].bmi[bindex].as_mode.first);
+#if CONFIG_COMP_INTRA_PRED
+ fprintf(mvs, "%2d ", mi[mb_index].bmi[bindex].as_mode.second);
+#endif
+ } else
+ fprintf(mvs, "xx ");
+
+ }
+
+ fprintf(mvs, "\n");
+ }
+ }
+ fprintf(mvs, "\n");
+
+ /* print out the macroblock mvs */
+ mb_index = 0;
+ fprintf(mvs, "MVs for Frame %d\n", frame);
+
+ for (mb_row = 0; mb_row < rows; mb_row++) {
+ for (mb_col = 0; mb_col < cols; mb_col++) {
+ fprintf(mvs, "%5d:%-5d", mi[mb_index].mbmi.mv[0].as_mv.row / 2,
+ mi[mb_index].mbmi.mv[0].as_mv.col / 2);
+
+ mb_index++;
+ }
+
+ mb_index++;
+ fprintf(mvs, "\n");
+ }
+
+ fprintf(mvs, "\n");
+
+ /* print out the block modes */
+ mb_index = 0;
+ fprintf(mvs, "MVs for Frame %d\n", frame);
+ {
+ int b_row;
+
+ for (b_row = 0; b_row < 4 * rows; b_row++) {
+ int b_col;
+ int bindex;
+
+ for (b_col = 0; b_col < 4 * cols; b_col++) {
+ mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2);
+ bindex = (b_row & 3) * 4 + (b_col & 3);
+ fprintf(mvs, "%3d:%-3d ",
+ mi[mb_index].bmi[bindex].as_mv.first.as_mv.row,
+ mi[mb_index].bmi[bindex].as_mv.first.as_mv.col);
+
+ }
+
+ fprintf(mvs, "\n");
+ }
+ }
+ fprintf(mvs, "\n");
+
+ fclose(mvs);
+}
diff --git a/vp9/common/vp9_default_coef_probs.h b/vp9/common/vp9_default_coef_probs.h
new file mode 100644
index 0000000..52fb02f
--- /dev/null
+++ b/vp9/common/vp9_default_coef_probs.h
@@ -0,0 +1,1377 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+*/
+
+
+/*Generated file, included by vp9_entropy.c*/
+
+
+static const vp9_prob default_coef_probs [BLOCK_TYPES]
+ [COEF_BANDS]
+ [PREV_COEF_CONTEXTS]
+ [ENTROPY_NODES] = {
+ {
+ /* Block Type ( 0 ) */
+ {
+ /* Coeff Band ( 0 )*/
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 1 )*/
+ { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 },
+ { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 },
+ { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 },
+ { 90, 116, 227, 252, 214, 209, 255, 255, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 2 )*/
+ { 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 },
+ { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 },
+ { 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 },
+ { 64, 128, 202, 247, 198, 180, 255, 219, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 3 )*/
+ { 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 },
+ { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 },
+ { 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 },
+ { 64, 100, 216, 255, 236, 230, 128, 128, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 4 )*/
+ { 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 },
+ { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 },
+ { 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 },
+ { 28, 110, 196, 243, 228, 255, 255, 255, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 5 )*/
+ { 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 },
+ { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 },
+ { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 },
+ { 90, 90, 231, 255, 211, 171, 128, 128, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 6 )*/
+ { 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 },
+ { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 },
+ { 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 },
+ { 64, 120, 211, 255, 194, 224, 128, 128, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 7 )*/
+ { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+ }
+ },
+ {
+ /* Block Type ( 1 ) */
+ {
+ /* Coeff Band ( 0 )*/
+ { 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 },
+ { 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 },
+ { 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 },
+ { 48, 32, 146, 208, 149, 167, 221, 162, 255, 223, 128 },
+ },
+ {
+ /* Coeff Band ( 1 )*/
+ { 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 },
+ { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 },
+ { 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 },
+ { 66, 90, 181, 242, 176, 190, 249, 202, 255, 255, 128 },
+ },
+ {
+ /* Coeff Band ( 2 )*/
+ { 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 },
+ { 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 },
+ { 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 },
+ { 18, 80, 163, 242, 170, 187, 247, 210, 255, 255, 128 },
+ },
+ {
+ /* Coeff Band ( 3 )*/
+ { 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 },
+ { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 },
+ { 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 },
+ { 36, 120, 201, 253, 205, 192, 255, 255, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 4 )*/
+ { 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 },
+ { 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 },
+ { 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 },
+ { 18, 90, 174, 245, 186, 161, 255, 199, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 5 )*/
+ { 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 },
+ { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 },
+ { 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 },
+ { 28, 70, 181, 251, 193, 211, 255, 205, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 6 )*/
+ { 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 },
+ { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 },
+ { 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 },
+ { 40, 90, 188, 251, 195, 217, 255, 224, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 7 )*/
+ { 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 },
+ { 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+ { 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 },
+ { 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 },
+ }
+ },
+ {
+ /* Block Type ( 2 ) */
+ {
+ /* Coeff Band ( 0 )*/
+ { 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 },
+ { 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 },
+ { 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 },
+ { 64, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 },
+ },
+ {
+ /* Coeff Band ( 1 )*/
+ { 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 },
+ { 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 },
+ { 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 },
+ { 140, 70, 195, 248, 188, 195, 255, 255, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 2 )*/
+ { 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 },
+ { 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 },
+ { 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 },
+ { 60, 40, 190, 239, 201, 218, 255, 228, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 3 )*/
+ { 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 },
+ { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 },
+ { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+ { 132, 118, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 4 )*/
+ { 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+ { 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 },
+ { 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 5 )*/
+ { 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 6 )*/
+ { 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 },
+ { 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 },
+ { 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 48, 85, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 7 )*/
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+ }
+ },
+ {
+ /* Block Type ( 3 ) */
+ {
+ /* Coeff Band ( 0 )*/
+ { 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 },
+ { 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 },
+ { 63, 48, 138, 219, 151, 178, 240, 170, 255, 216, 128 },
+ { 54, 40, 138, 219, 151, 178, 240, 170, 255, 216, 128 },
+ },
+ {
+ /* Coeff Band ( 1 )*/
+ { 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 },
+ { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 },
+ { 44, 84, 162, 232, 172, 180, 245, 178, 255, 255, 128 },
+ { 32, 70, 162, 232, 172, 180, 245, 178, 255, 255, 128 },
+ },
+ {
+ /* Coeff Band ( 2 )*/
+ { 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 },
+ { 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 },
+ { 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 },
+ { 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 },
+ },
+ {
+ /* Coeff Band ( 3 )*/
+ { 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 },
+ { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 },
+ { 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 },
+ { 26, 104, 170, 242, 183, 194, 254, 223, 255, 255, 128 },
+ },
+ {
+ /* Coeff Band ( 4 )*/
+ { 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 },
+ { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 },
+ { 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 },
+ { 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 5 )*/
+ { 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 },
+ { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 },
+ { 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 },
+ { 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 6 )*/
+ { 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 },
+ { 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 },
+ { 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 },
+ { 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 7 )*/
+ { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ }
+ }
+};
+
+static const vp9_prob default_hybrid_coef_probs [BLOCK_TYPES]
+ [COEF_BANDS]
+ [PREV_COEF_CONTEXTS]
+ [ENTROPY_NODES] = {
+ {
+ /* Block Type ( 0 ) */
+ {
+ /* Coeff Band ( 0 )*/
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 1 )*/
+ { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 },
+ { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 },
+ { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 },
+ { 90, 116, 227, 252, 214, 209, 255, 255, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 2 )*/
+ { 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 },
+ { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 },
+ { 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 },
+ { 64, 128, 202, 247, 198, 180, 255, 219, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 3 )*/
+ { 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 },
+ { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 },
+ { 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 },
+ { 64, 100, 216, 255, 236, 230, 128, 128, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 4 )*/
+ { 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 },
+ { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 },
+ { 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 },
+ { 28, 110, 196, 243, 228, 255, 255, 255, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 5 )*/
+ { 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 },
+ { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 },
+ { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 },
+ { 90, 90, 231, 255, 211, 171, 128, 128, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 6 )*/
+ { 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 },
+ { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 },
+ { 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 },
+ { 64, 120, 211, 255, 194, 224, 128, 128, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 7 )*/
+ { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+ }
+ },
+ {
+ /* Block Type ( 1 ) */
+ {
+ /* Coeff Band ( 0 )*/
+ { 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 },
+ { 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 },
+ { 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 },
+ { 48, 32, 146, 208, 149, 167, 221, 162, 255, 223, 128 },
+ },
+ {
+ /* Coeff Band ( 1 )*/
+ { 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 },
+ { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 },
+ { 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 },
+ { 66, 90, 181, 242, 176, 190, 249, 202, 255, 255, 128 },
+ },
+ {
+ /* Coeff Band ( 2 )*/
+ { 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 },
+ { 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 },
+ { 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 },
+ { 18, 80, 163, 242, 170, 187, 247, 210, 255, 255, 128 },
+ },
+ {
+ /* Coeff Band ( 3 )*/
+ { 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 },
+ { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 },
+ { 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 },
+ { 36, 120, 201, 253, 205, 192, 255, 255, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 4 )*/
+ { 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 },
+ { 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 },
+ { 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 },
+ { 18, 90, 174, 245, 186, 161, 255, 199, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 5 )*/
+ { 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 },
+ { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 },
+ { 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 },
+ { 28, 70, 181, 251, 193, 211, 255, 205, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 6 )*/
+ { 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 },
+ { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 },
+ { 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 },
+ { 40, 90, 188, 251, 195, 217, 255, 224, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 7 )*/
+ { 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 },
+ { 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+ { 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 },
+ { 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 },
+ }
+ },
+ {
+ /* Block Type ( 2 ) */
+ {
+ /* Coeff Band ( 0 )*/
+ { 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 },
+ { 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 },
+ { 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 },
+ { 64, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 },
+ },
+ {
+ /* Coeff Band ( 1 )*/
+ { 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 },
+ { 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 },
+ { 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 },
+ { 140, 70, 195, 248, 188, 195, 255, 255, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 2 )*/
+ { 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 },
+ { 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 },
+ { 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 },
+ { 60, 40, 190, 239, 201, 218, 255, 228, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 3 )*/
+ { 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 },
+ { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 },
+ { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+ { 132, 118, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 4 )*/
+ { 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+ { 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 },
+ { 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 5 )*/
+ { 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 6 )*/
+ { 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 },
+ { 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 },
+ { 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 48, 85, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 7 )*/
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+ }
+ },
+ {
+ /* Block Type ( 3 ) */
+ {
+ /* Coeff Band ( 0 )*/
+ { 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 },
+ { 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 },
+ { 63, 48, 138, 219, 151, 178, 240, 170, 255, 216, 128 },
+ { 54, 40, 138, 219, 151, 178, 240, 170, 255, 216, 128 },
+ },
+ {
+ /* Coeff Band ( 1 )*/
+ { 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 },
+ { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 },
+ { 44, 84, 162, 232, 172, 180, 245, 178, 255, 255, 128 },
+ { 32, 70, 162, 232, 172, 180, 245, 178, 255, 255, 128 },
+ },
+ {
+ /* Coeff Band ( 2 )*/
+ { 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 },
+ { 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 },
+ { 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 },
+ { 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 },
+ },
+ {
+ /* Coeff Band ( 3 )*/
+ { 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 },
+ { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 },
+ { 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 },
+ { 26, 104, 170, 242, 183, 194, 254, 223, 255, 255, 128 },
+ },
+ {
+ /* Coeff Band ( 4 )*/
+ { 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 },
+ { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 },
+ { 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 },
+ { 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 5 )*/
+ { 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 },
+ { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 },
+ { 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 },
+ { 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 6 )*/
+ { 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 },
+ { 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 },
+ { 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 },
+ { 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 },
+ },
+ {
+ /* Coeff Band ( 7 )*/
+ { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+ }
+ }
+};
+
+static const vp9_prob
+default_coef_probs_8x8[BLOCK_TYPES_8X8]
+[COEF_BANDS]
+[PREV_COEF_CONTEXTS]
+[ENTROPY_NODES] = {
+ {
+ /* block Type 0 */
+ {
+ /* Coeff Band 0 */
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+ },
+ {
+ /* Coeff Band 1 */
+ { 60, 140, 195, 255, 212, 214, 128, 128, 128, 128, 128},
+ { 75, 221, 231, 255, 203, 255, 128, 128, 128, 128, 128},
+ { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128},
+ { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128}
+ },
+ {
+ /* Coeff Band 2 */
+ { 1, 227, 226, 255, 215, 215, 128, 128, 128, 128, 128},
+ { 5, 163, 209, 255, 212, 212, 255, 255, 128, 128, 128},
+ { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128},
+ { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128}
+ },
+ {
+ /* Coeff Band 3 */
+ { 1, 226, 225, 255, 228, 236, 128, 128, 128, 128, 128},
+ { 6, 163, 208, 255, 224, 234, 255, 255, 128, 128, 128},
+ { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128},
+ { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128}
+ },
+ {
+ /* Coeff Band 4 */
+ { 1, 222, 197, 254, 193, 216, 255, 236, 128, 128, 128},
+ { 7, 140, 163, 251, 195, 211, 255, 238, 128, 128, 128},
+ { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128},
+ { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128}
+ },
+ {
+ /* Coeff Band 5 */
+ { 1, 226, 218, 255, 216, 241, 255, 255, 128, 128, 128},
+ { 6, 154, 191, 255, 218, 240, 255, 255, 128, 128, 128},
+ { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128},
+ { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}
+ },
+ {
+ /* Coeff Band 6 */
+ { 1, 221, 217, 255, 208, 217, 255, 232, 128, 128, 128},
+ { 11, 155, 189, 254, 203, 211, 255, 249, 128, 128, 128},
+ { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128},
+ { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}
+ },
+ {
+ /* Coeff Band 7 */
+ { 1, 207, 235, 255, 232, 240, 128, 128, 128, 128, 128},
+ { 58, 161, 216, 255, 229, 235, 255, 255, 128, 128, 128},
+ { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128},
+ { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128}
+ }
+ },
+ {
+ /* block Type 1 */
+ {
+ /* Coeff Band 0 */
+ { 134, 152, 233, 224, 234, 52, 255, 166, 128, 128, 128},
+ { 97, 132, 185, 234, 186, 189, 197, 171, 255, 212, 128},
+ { 84, 110, 185, 237, 182, 182, 145, 145, 255, 255, 128}
+ },
+ {
+ /* Coeff Band 1 */
+ { 1, 124, 213, 247, 192, 212, 255, 255, 128, 128, 128},
+ { 88, 111, 178, 254, 189, 211, 255, 255, 128, 128, 128},
+ { 12, 59, 129, 236, 150, 179, 239, 195, 255, 255, 128},
+ { 12, 59, 129, 236, 150, 179, 239, 195, 255, 255, 128}
+ },
+ {
+ /* Coeff Band 2 */
+ { 1, 102, 225, 255, 210, 240, 128, 128, 128, 128, 128},
+ { 110, 78, 195, 254, 200, 191, 255, 255, 128, 128, 128},
+ { 37, 63, 177, 255, 194, 195, 128, 128, 128, 128, 128},
+ { 37, 63, 177, 255, 194, 195, 128, 128, 128, 128, 128}
+ },
+ {
+ /* Coeff Band 3 */
+ { 1, 1, 229, 255, 202, 224, 128, 128, 128, 128, 128},
+ { 150, 1, 192, 255, 206, 226, 128, 128, 128, 128, 128},
+ { 75, 1, 138, 255, 172, 228, 128, 128, 128, 128, 128},
+ { 75, 1, 138, 255, 172, 228, 128, 128, 128, 128, 128}
+ },
+ {
+ /* Coeff Band 4 */
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+ },
+ {
+ /* Coeff Band 5 */
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+ },
+ {
+ /* Coeff Band 6 */
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+ },
+ {
+ /* Coeff Band 7 */
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+ }
+ },
+ {
+ /* block Type 2 */
+ {
+ /* Coeff Band 0 */
+ { 11, 181, 226, 199, 183, 255, 255, 255, 128, 128, 128},
+ { 2, 147, 185, 248, 163, 180, 255, 236, 128, 128, 128},
+ { 1, 123, 157, 238, 154, 176, 255, 226, 255, 255, 128},
+ { 1, 123, 157, 238, 154, 176, 255, 226, 255, 255, 128}
+ },
+ {
+ /* Coeff Band 1 */
+ { 1, 150, 191, 246, 174, 188, 255, 235, 128, 128, 128},
+ { 1, 125, 166, 245, 165, 185, 255, 234, 128, 128, 128},
+ { 1, 79, 125, 240, 148, 179, 255, 234, 255, 255, 128},
+ { 1, 79, 125, 240, 148, 179, 255, 234, 255, 255, 128}
+ },
+ {
+ /* Coeff Band 2 */
+ { 1, 146, 184, 242, 167, 183, 255, 230, 255, 255, 128},
+ { 1, 119, 160, 239, 156, 178, 255, 231, 255, 255, 128},
+ { 1, 75, 115, 234, 142, 173, 255, 225, 255, 255, 128},
+ { 1, 75, 115, 234, 142, 173, 255, 225, 255, 255, 128}
+ },
+ {
+ /* Coeff Band 3 */
+ { 1, 150, 188, 244, 169, 183, 255, 233, 255, 255, 128},
+ { 1, 123, 162, 243, 161, 180, 255, 233, 128, 128, 128},
+ { 1, 76, 120, 238, 148, 178, 255, 230, 255, 255, 128},
+ { 1, 76, 120, 238, 148, 178, 255, 230, 255, 255, 128}
+ },
+ {
+ /* Coeff Band 4 */
+ { 1, 163, 202, 252, 188, 204, 255, 248, 128, 128, 128},
+ { 1, 136, 180, 251, 181, 201, 255, 246, 128, 128, 128},
+ { 1, 92, 146, 249, 170, 197, 255, 245, 128, 128, 128},
+ { 1, 92, 146, 249, 170, 197, 255, 245, 128, 128, 128}
+ },
+ {
+ /* Coeff Band 5 */
+ { 1, 156, 195, 249, 179, 193, 255, 241, 255, 255, 128},
+ { 1, 128, 169, 248, 171, 192, 255, 242, 255, 255, 128},
+ { 1, 84, 132, 245, 158, 187, 255, 240, 255, 255, 128},
+ { 1, 84, 132, 245, 158, 187, 255, 240, 255, 255, 128}
+ },
+ {
+ /* Coeff Band 6 */
+ { 1, 36, 71, 251, 192, 201, 255, 243, 255, 255, 128},
+ { 1, 49, 185, 250, 184, 199, 255, 242, 128, 128, 128},
+ { 1, 95, 147, 247, 168, 190, 255, 239, 255, 255, 128},
+ { 1, 95, 147, 247, 168, 190, 255, 239, 255, 255, 128}
+ },
+ {
+ /* Coeff Band 7 */
+ { 1, 19, 98, 255, 218, 222, 255, 255, 128, 128, 128},
+ { 36, 50, 210, 255, 212, 221, 255, 255, 128, 128, 128},
+ { 6, 117, 180, 254, 199, 216, 255, 251, 128, 128, 128},
+ { 6, 117, 180, 254, 199, 216, 255, 251, 128, 128, 128}
+ }
+ },
+ { /* block Type 3 */
+ { /* Coeff Band 0 */
+ { 192, 18, 155, 172, 145, 164, 192, 135, 246, 223, 255},
+ { 94, 29, 97, 131, 131, 153, 171, 121, 250, 190, 255},
+ { 25, 29, 63, 128, 119, 147, 168, 124, 251, 183, 255},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+ },
+ { /* Coeff Band 1 */
+ { 1, 108, 192, 220, 186, 173, 255, 194, 255, 255, 128},
+ { 123, 104, 188, 221, 165, 171, 247, 180, 255, 255, 128},
+ { 23, 76, 152, 216, 154, 166, 226, 182, 255, 209, 128},
+ { 1, 26, 52, 162, 109, 152, 208, 144, 255, 231, 128}
+ },
+ { /* Coeff Band 2 */
+ { 1, 57, 179, 220, 156, 175, 210, 158, 255, 223, 128},
+ { 48, 57, 134, 212, 151, 170, 219, 185, 255, 248, 128},
+ { 4, 35, 63, 189, 120, 156, 221, 159, 255, 241, 128},
+ { 1, 17, 23, 110, 97, 143, 187, 120, 255, 234, 128}
+ },
+ { /* Coeff Band 3 */
+ { 1, 115, 205, 243, 182, 187, 254, 218, 255, 255, 128},
+ { 80, 101, 186, 241, 183, 186, 249, 182, 255, 255, 128},
+ { 10, 81, 144, 229, 164, 175, 241, 185, 255, 255, 128},
+ { 1, 44, 81, 192, 130, 148, 240, 180, 255, 255, 128}
+ },
+ { /* Coeff Band 4 */
+ { 1, 161, 207, 249, 187, 176, 255, 180, 128, 128, 128},
+ { 79, 148, 196, 240, 186, 182, 253, 171, 255, 255, 128},
+ { 14, 111, 171, 233, 170, 178, 235, 204, 255, 255, 128},
+ { 1, 63, 103, 202, 143, 162, 240, 178, 255, 255, 128}
+ },
+ { /* Coeff Band 5 */
+ { 1, 101, 202, 239, 185, 184, 252, 186, 255, 255, 128},
+ { 43, 67, 166, 237, 178, 190, 246, 194, 255, 255, 128},
+ { 4, 49, 85, 220, 140, 168, 253, 182, 255, 255, 128},
+ { 1, 24, 35, 144, 93, 135, 239, 159, 255, 253, 128}
+ },
+ { /* Coeff Band 6 */
+ { 1, 212, 243, 255, 240, 234, 255, 255, 128, 128, 128},
+ { 98, 168, 234, 255, 229, 234, 255, 255, 128, 128, 128},
+ { 19, 127, 199, 255, 212, 198, 255, 255, 128, 128, 128},
+ { 1, 103, 162, 253, 186, 151, 255, 255, 128, 128, 128}
+ },
+ { /* Coeff Band 7 */
+ { 1, 188, 253, 255, 255, 128, 128, 128, 128, 128, 128},
+ { 191, 68, 242, 255, 255, 128, 128, 128, 128, 128, 128},
+ { 8, 132, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+ }
+ }
+};
+
+static const vp9_prob
+default_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]
+ [COEF_BANDS]
+ [PREV_COEF_CONTEXTS]
+ [ENTROPY_NODES] = {
+ {
+ /* block Type 0 */
+ {
+ /* Coeff Band 0 */
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+ },
+ {
+ /* Coeff Band 1 */
+ { 60, 140, 195, 255, 212, 214, 128, 128, 128, 128, 128},
+ { 75, 221, 231, 255, 203, 255, 128, 128, 128, 128, 128},
+ { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128},
+ { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128}
+ },
+ {
+ /* Coeff Band 2 */
+ { 1, 227, 226, 255, 215, 215, 128, 128, 128, 128, 128},
+ { 5, 163, 209, 255, 212, 212, 255, 255, 128, 128, 128},
+ { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128},
+ { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128}
+ },
+ {
+ /* Coeff Band 3 */
+ { 1, 226, 225, 255, 228, 236, 128, 128, 128, 128, 128},
+ { 6, 163, 208, 255, 224, 234, 255, 255, 128, 128, 128},
+ { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128},
+ { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128}
+ },
+ {
+ /* Coeff Band 4 */
+ { 1, 222, 197, 254, 193, 216, 255, 236, 128, 128, 128},
+ { 7, 140, 163, 251, 195, 211, 255, 238, 128, 128, 128},
+ { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128},
+ { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128}
+ },
+ {
+ /* Coeff Band 5 */
+ { 1, 226, 218, 255, 216, 241, 255, 255, 128, 128, 128},
+ { 6, 154, 191, 255, 218, 240, 255, 255, 128, 128, 128},
+ { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128},
+ { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}
+ },
+ {
+ /* Coeff Band 6 */
+ { 1, 221, 217, 255, 208, 217, 255, 232, 128, 128, 128},
+ { 11, 155, 189, 254, 203, 211, 255, 249, 128, 128, 128},
+ { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128},
+ { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}
+ },
+ {
+ /* Coeff Band 7 */
+ { 1, 207, 235, 255, 232, 240, 128, 128, 128, 128, 128},
+ { 58, 161, 216, 255, 229, 235, 255, 255, 128, 128, 128},
+ { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128},
+ { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128}
+ }
+ },
+ {
+ /* block Type 1 */
+ {
+ /* Coeff Band 0 */
+ { 134, 152, 233, 224, 234, 52, 255, 166, 128, 128, 128},
+ { 97, 132, 185, 234, 186, 189, 197, 171, 255, 212, 128},
+ { 84, 110, 185, 237, 182, 182, 145, 145, 255, 255, 128}
+ },
+ {
+ /* Coeff Band 1 */
+ { 1, 124, 213, 247, 192, 212, 255, 255, 128, 128, 128},
+ { 88, 111, 178, 254, 189, 211, 255, 255, 128, 128, 128},
+ { 12, 59, 129, 236, 150, 179, 239, 195, 255, 255, 128},
+ { 12, 59, 129, 236, 150, 179, 239, 195, 255, 255, 128}
+ },
+ {
+ /* Coeff Band 2 */
+ { 1, 102, 225, 255, 210, 240, 128, 128, 128, 128, 128},
+ { 110, 78, 195, 254, 200, 191, 255, 255, 128, 128, 128},
+ { 37, 63, 177, 255, 194, 195, 128, 128, 128, 128, 128},
+ { 37, 63, 177, 255, 194, 195, 128, 128, 128, 128, 128}
+ },
+ {
+ /* Coeff Band 3 */
+ { 1, 1, 229, 255, 202, 224, 128, 128, 128, 128, 128},
+ { 150, 1, 192, 255, 206, 226, 128, 128, 128, 128, 128},
+ { 75, 1, 138, 255, 172, 228, 128, 128, 128, 128, 128},
+ { 75, 1, 138, 255, 172, 228, 128, 128, 128, 128, 128}
+ },
+ {
+ /* Coeff Band 4 */
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+ },
+ {
+ /* Coeff Band 5 */
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+ },
+ {
+ /* Coeff Band 6 */
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+ },
+ {
+ /* Coeff Band 7 */
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+ }
+ },
+ {
+ /* block Type 2 */
+ {
+ /* Coeff Band 0 */
+ { 11, 181, 226, 199, 183, 255, 255, 255, 128, 128, 128},
+ { 2, 147, 185, 248, 163, 180, 255, 236, 128, 128, 128},
+ { 1, 123, 157, 238, 154, 176, 255, 226, 255, 255, 128},
+ { 1, 123, 157, 238, 154, 176, 255, 226, 255, 255, 128}
+ },
+ {
+ /* Coeff Band 1 */
+ { 1, 150, 191, 246, 174, 188, 255, 235, 128, 128, 128},
+ { 1, 125, 166, 245, 165, 185, 255, 234, 128, 128, 128},
+ { 1, 79, 125, 240, 148, 179, 255, 234, 255, 255, 128},
+ { 1, 79, 125, 240, 148, 179, 255, 234, 255, 255, 128}
+ },
+ {
+ /* Coeff Band 2 */
+ { 1, 146, 184, 242, 167, 183, 255, 230, 255, 255, 128},
+ { 1, 119, 160, 239, 156, 178, 255, 231, 255, 255, 128},
+ { 1, 75, 115, 234, 142, 173, 255, 225, 255, 255, 128},
+ { 1, 75, 115, 234, 142, 173, 255, 225, 255, 255, 128}
+ },
+ {
+ /* Coeff Band 3 */
+ { 1, 150, 188, 244, 169, 183, 255, 233, 255, 255, 128},
+ { 1, 123, 162, 243, 161, 180, 255, 233, 128, 128, 128},
+ { 1, 76, 120, 238, 148, 178, 255, 230, 255, 255, 128},
+ { 1, 76, 120, 238, 148, 178, 255, 230, 255, 255, 128}
+ },
+ {
+ /* Coeff Band 4 */
+ { 1, 163, 202, 252, 188, 204, 255, 248, 128, 128, 128},
+ { 1, 136, 180, 251, 181, 201, 255, 246, 128, 128, 128},
+ { 1, 92, 146, 249, 170, 197, 255, 245, 128, 128, 128},
+ { 1, 92, 146, 249, 170, 197, 255, 245, 128, 128, 128}
+ },
+ {
+ /* Coeff Band 5 */
+ { 1, 156, 195, 249, 179, 193, 255, 241, 255, 255, 128},
+ { 1, 128, 169, 248, 171, 192, 255, 242, 255, 255, 128},
+ { 1, 84, 132, 245, 158, 187, 255, 240, 255, 255, 128},
+ { 1, 84, 132, 245, 158, 187, 255, 240, 255, 255, 128}
+ },
+ {
+ /* Coeff Band 6 */
+ { 1, 36, 71, 251, 192, 201, 255, 243, 255, 255, 128},
+ { 1, 49, 185, 250, 184, 199, 255, 242, 128, 128, 128},
+ { 1, 95, 147, 247, 168, 190, 255, 239, 255, 255, 128},
+ { 1, 95, 147, 247, 168, 190, 255, 239, 255, 255, 128}
+ },
+ {
+ /* Coeff Band 7 */
+ { 1, 19, 98, 255, 218, 222, 255, 255, 128, 128, 128},
+ { 36, 50, 210, 255, 212, 221, 255, 255, 128, 128, 128},
+ { 6, 117, 180, 254, 199, 216, 255, 251, 128, 128, 128},
+ { 6, 117, 180, 254, 199, 216, 255, 251, 128, 128, 128}
+ }
+ },
+ { /* block Type 3 */
+ { /* Coeff Band 0 */
+ { 192, 18, 155, 172, 145, 164, 192, 135, 246, 223, 255},
+ { 94, 29, 97, 131, 131, 153, 171, 121, 250, 190, 255},
+ { 25, 29, 63, 128, 119, 147, 168, 124, 251, 183, 255},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+ },
+ { /* Coeff Band 1 */
+ { 1, 108, 192, 220, 186, 173, 255, 194, 255, 255, 128},
+ { 123, 104, 188, 221, 165, 171, 247, 180, 255, 255, 128},
+ { 23, 76, 152, 216, 154, 166, 226, 182, 255, 209, 128},
+ { 1, 26, 52, 162, 109, 152, 208, 144, 255, 231, 128}
+ },
+ { /* Coeff Band 2 */
+ { 1, 57, 179, 220, 156, 175, 210, 158, 255, 223, 128},
+ { 48, 57, 134, 212, 151, 170, 219, 185, 255, 248, 128},
+ { 4, 35, 63, 189, 120, 156, 221, 159, 255, 241, 128},
+ { 1, 17, 23, 110, 97, 143, 187, 120, 255, 234, 128}
+ },
+ { /* Coeff Band 3 */
+ { 1, 115, 205, 243, 182, 187, 254, 218, 255, 255, 128},
+ { 80, 101, 186, 241, 183, 186, 249, 182, 255, 255, 128},
+ { 10, 81, 144, 229, 164, 175, 241, 185, 255, 255, 128},
+ { 1, 44, 81, 192, 130, 148, 240, 180, 255, 255, 128}
+ },
+ { /* Coeff Band 4 */
+ { 1, 161, 207, 249, 187, 176, 255, 180, 128, 128, 128},
+ { 79, 148, 196, 240, 186, 182, 253, 171, 255, 255, 128},
+ { 14, 111, 171, 233, 170, 178, 235, 204, 255, 255, 128},
+ { 1, 63, 103, 202, 143, 162, 240, 178, 255, 255, 128}
+ },
+ { /* Coeff Band 5 */
+ { 1, 101, 202, 239, 185, 184, 252, 186, 255, 255, 128},
+ { 43, 67, 166, 237, 178, 190, 246, 194, 255, 255, 128},
+ { 4, 49, 85, 220, 140, 168, 253, 182, 255, 255, 128},
+ { 1, 24, 35, 144, 93, 135, 239, 159, 255, 253, 128}
+ },
+ { /* Coeff Band 6 */
+ { 1, 212, 243, 255, 240, 234, 255, 255, 128, 128, 128},
+ { 98, 168, 234, 255, 229, 234, 255, 255, 128, 128, 128},
+ { 19, 127, 199, 255, 212, 198, 255, 255, 128, 128, 128},
+ { 1, 103, 162, 253, 186, 151, 255, 255, 128, 128, 128}
+ },
+ { /* Coeff Band 7 */
+ { 1, 188, 253, 255, 255, 128, 128, 128, 128, 128, 128},
+ { 191, 68, 242, 255, 255, 128, 128, 128, 128, 128, 128},
+ { 8, 132, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+ }
+ }
+};
+
+static const vp9_prob
+ default_coef_probs_16x16[BLOCK_TYPES_16X16]
+ [COEF_BANDS]
+ [PREV_COEF_CONTEXTS]
+ [ENTROPY_NODES] = {
+ { /* block Type 0 */
+ { /* Coeff Band 0 */
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+ },
+ { /* Coeff Band 1 */
+ { 60, 140, 195, 255, 212, 214, 128, 128, 128, 128, 128},
+ { 75, 221, 231, 255, 203, 255, 128, 128, 128, 128, 128},
+ { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128},
+ { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128}
+ },
+ { /* Coeff Band 2 */
+ { 1, 227, 226, 255, 215, 215, 128, 128, 128, 128, 128},
+ { 5, 163, 209, 255, 212, 212, 255, 255, 128, 128, 128},
+ { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128},
+ { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128}
+ },
+ { /* Coeff Band 3 */
+ { 1, 226, 225, 255, 228, 236, 128, 128, 128, 128, 128},
+ { 6, 163, 208, 255, 224, 234, 255, 255, 128, 128, 128},
+ { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128},
+ { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128}
+ },
+ { /* Coeff Band 4 */
+ { 1, 222, 197, 254, 193, 216, 255, 236, 128, 128, 128},
+ { 7, 140, 163, 251, 195, 211, 255, 238, 128, 128, 128},
+ { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128},
+ { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128}
+ },
+ { /* Coeff Band 5 */
+ { 1, 226, 218, 255, 216, 241, 255, 255, 128, 128, 128},
+ { 6, 154, 191, 255, 218, 240, 255, 255, 128, 128, 128},
+ { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128},
+ { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}
+ },
+ { /* Coeff Band 6 */
+ { 1, 221, 217, 255, 208, 217, 255, 232, 128, 128, 128},
+ { 11, 155, 189, 254, 203, 211, 255, 249, 128, 128, 128},
+ { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128},
+ { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}
+ },
+ { /* Coeff Band 7 */
+ { 1, 207, 235, 255, 232, 240, 128, 128, 128, 128, 128},
+ { 58, 161, 216, 255, 229, 235, 255, 255, 128, 128, 128},
+ { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128},
+ { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128}
+ }
+ },
+ { /* block Type 1 */
+ { /* Coeff Band 0 */
+ { 1, 30, 103, 204, 142, 168, 235, 161, 255, 228, 128},
+ { 1, 35, 90, 192, 130, 161, 227, 158, 255, 226, 255},
+ { 1, 36, 78, 180, 122, 156, 221, 153, 255, 222, 255},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+ },
+ { /* Coeff Band 1 */
+ { 1, 163, 228, 253, 212, 194, 255, 205, 128, 128, 128},
+ { 67, 160, 226, 253, 210, 202, 245, 172, 255, 255, 128},
+ { 51, 147, 219, 251, 207, 207, 255, 217, 128, 128, 128},
+ { 25, 107, 175, 245, 183, 190, 254, 209, 255, 255, 128}
+ },
+ { /* Coeff Band 2 */
+ { 1, 66, 170, 240, 177, 186, 252, 203, 255, 245, 128},
+ { 23, 64, 145, 230, 161, 177, 252, 198, 255, 255, 128},
+ { 6, 51, 99, 208, 135, 163, 249, 178, 255, 248, 128},
+ { 1, 33, 59, 161, 104, 151, 238, 164, 255, 237, 128}
+ },
+ { /* Coeff Band 3 */
+ { 1, 76, 216, 250, 198, 199, 255, 226, 255, 255, 128},
+ { 86, 83, 200, 247, 189, 193, 255, 224, 255, 255, 128},
+ { 30, 75, 164, 242, 172, 184, 254, 218, 255, 255, 128},
+ { 3, 54, 103, 227, 140, 172, 253, 201, 255, 255, 128}
+ },
+ { /* Coeff Band 4 */
+ { 1, 241, 247, 255, 233, 223, 255, 255, 128, 128, 128},
+ { 78, 212, 242, 255, 226, 230, 255, 255, 128, 128, 128},
+ { 10, 167, 224, 255, 217, 225, 255, 128, 128, 128, 128},
+ { 1, 104, 176, 250, 166, 219, 255, 255, 128, 128, 128}
+ },
+ { /* Coeff Band 5 */
+ { 1, 194, 241, 254, 228, 214, 248, 237, 255, 255, 128},
+ { 95, 133, 228, 254, 218, 215, 255, 229, 128, 128, 128},
+ { 24, 119, 201, 252, 202, 205, 255, 229, 128, 128, 128},
+ { 1, 88, 155, 246, 183, 193, 255, 205, 128, 128, 128}
+ },
+ { /* Coeff Band 6 */
+ { 1, 204, 236, 255, 222, 220, 255, 239, 128, 128, 128},
+ { 126, 105, 225, 254, 214, 217, 255, 254, 128, 128, 128},
+ { 44, 86, 190, 251, 197, 204, 255, 233, 128, 128, 128},
+ { 6, 71, 130, 240, 164, 188, 255, 246, 128, 128, 128}
+ },
+ { /* Coeff Band 7 */
+ { 1, 195, 250, 255, 239, 197, 128, 128, 128, 128, 128},
+ { 167, 102, 249, 255, 234, 255, 128, 128, 128, 128, 128},
+ { 65, 91, 222, 255, 217, 255, 128, 128, 128, 128, 128},
+ { 1, 59, 128, 255, 154, 255, 128, 128, 128, 128, 128}
+ }
+ },
+ { /* block Type 2 */
+ { /* Coeff Band 0 */
+ { 1, 30, 103, 204, 142, 168, 235, 161, 255, 228, 128},
+ { 1, 35, 90, 192, 130, 161, 227, 158, 255, 226, 255},
+ { 1, 36, 78, 180, 122, 156, 221, 153, 255, 222, 255},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+ },
+ { /* Coeff Band 1 */
+ { 1, 163, 228, 253, 212, 194, 255, 205, 128, 128, 128},
+ { 67, 160, 226, 253, 210, 202, 245, 172, 255, 255, 128},
+ { 51, 147, 219, 251, 207, 207, 255, 217, 128, 128, 128},
+ { 25, 107, 175, 245, 183, 190, 254, 209, 255, 255, 128}
+ },
+ { /* Coeff Band 2 */
+ { 1, 66, 170, 240, 177, 186, 252, 203, 255, 245, 128},
+ { 23, 64, 145, 230, 161, 177, 252, 198, 255, 255, 128},
+ { 6, 51, 99, 208, 135, 163, 249, 178, 255, 248, 128},
+ { 1, 33, 59, 161, 104, 151, 238, 164, 255, 237, 128}
+ },
+ { /* Coeff Band 3 */
+ { 1, 76, 216, 250, 198, 199, 255, 226, 255, 255, 128},
+ { 86, 83, 200, 247, 189, 193, 255, 224, 255, 255, 128},
+ { 30, 75, 164, 242, 172, 184, 254, 218, 255, 255, 128},
+ { 3, 54, 103, 227, 140, 172, 253, 201, 255, 255, 128}
+ },
+ { /* Coeff Band 4 */
+ { 1, 241, 247, 255, 233, 223, 255, 255, 128, 128, 128},
+ { 78, 212, 242, 255, 226, 230, 255, 255, 128, 128, 128},
+ { 10, 167, 224, 255, 217, 225, 255, 128, 128, 128, 128},
+ { 1, 104, 176, 250, 166, 219, 255, 255, 128, 128, 128}
+ },
+ { /* Coeff Band 5 */
+ { 1, 194, 241, 254, 228, 214, 248, 237, 255, 255, 128},
+ { 95, 133, 228, 254, 218, 215, 255, 229, 128, 128, 128},
+ { 24, 119, 201, 252, 202, 205, 255, 229, 128, 128, 128},
+ { 1, 88, 155, 246, 183, 193, 255, 205, 128, 128, 128}
+ },
+ { /* Coeff Band 6 */
+ { 1, 204, 236, 255, 222, 220, 255, 239, 128, 128, 128},
+ { 126, 105, 225, 254, 214, 217, 255, 254, 128, 128, 128},
+ { 44, 86, 190, 251, 197, 204, 255, 233, 128, 128, 128},
+ { 6, 71, 130, 240, 164, 188, 255, 246, 128, 128, 128}
+ },
+ { /* Coeff Band 7 */
+ { 1, 195, 250, 255, 239, 197, 128, 128, 128, 128, 128},
+ { 167, 102, 249, 255, 234, 255, 128, 128, 128, 128, 128},
+ { 65, 91, 222, 255, 217, 255, 128, 128, 128, 128, 128},
+ { 1, 59, 128, 255, 154, 255, 128, 128, 128, 128, 128}
+ }
+ },
+ { /* block Type 3 */
+ { /* Coeff Band 0 */
+ { 17, 105, 227, 195, 164, 170, 168, 137, 221, 160, 184},
+ { 6, 92, 166, 193, 158, 169, 179, 142, 236, 175, 200},
+ { 2, 68, 118, 193, 147, 168, 187, 149, 241, 178, 247},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+ },
+ { /* Coeff Band 1 */
+ { 1, 193, 221, 246, 198, 194, 244, 176, 255, 192, 128},
+ { 112, 160, 209, 244, 196, 194, 243, 175, 255, 209, 128},
+ { 45, 123, 175, 240, 184, 195, 239, 178, 255, 218, 255},
+ { 16, 53, 75, 169, 119, 152, 209, 146, 255, 219, 255}
+ },
+ { /* Coeff Band 2 */
+ { 1, 141, 183, 240, 176, 187, 246, 198, 255, 218, 128},
+ { 36, 97, 150, 231, 161, 180, 243, 191, 255, 217, 255},
+ { 8, 65, 111, 210, 143, 166, 230, 167, 255, 224, 255},
+ { 2, 35, 61, 157, 113, 149, 208, 142, 255, 217, 255}
+ },
+ { /* Coeff Band 3 */
+ { 1, 173, 196, 245, 184, 191, 252, 211, 255, 240, 128},
+ { 35, 119, 175, 242, 177, 187, 252, 209, 255, 235, 128},
+ { 4, 88, 141, 234, 161, 180, 249, 200, 255, 228, 128},
+ { 1, 57, 95, 203, 133, 161, 235, 167, 255, 231, 255}
+ },
+ { /* Coeff Band 4 */
+ { 1, 208, 227, 249, 209, 204, 248, 188, 255, 248, 128},
+ { 28, 162, 211, 247, 203, 200, 252, 188, 255, 232, 128},
+ { 5, 114, 174, 238, 182, 189, 245, 184, 255, 238, 128},
+ { 1, 61, 100, 205, 136, 164, 235, 163, 255, 239, 128}
+ },
+ { /* Coeff Band 5 */
+ { 1, 195, 218, 252, 208, 207, 250, 205, 255, 245, 128},
+ { 22, 141, 196, 249, 198, 201, 250, 202, 255, 244, 128},
+ { 2, 105, 163, 240, 178, 189, 246, 191, 255, 246, 128},
+ { 1, 70, 112, 206, 144, 167, 232, 162, 255, 239, 128}
+ },
+ { /* Coeff Band 6 */
+ { 1, 204, 215, 251, 204, 203, 255, 222, 255, 225, 128},
+ { 15, 140, 194, 249, 194, 199, 254, 221, 255, 253, 128},
+ { 1, 95, 153, 243, 172, 188, 254, 213, 255, 248, 128},
+ { 1, 59, 99, 216, 135, 166, 247, 190, 255, 237, 255}
+ },
+ { /* Coeff Band 7 */
+ { 1, 7, 231, 255, 227, 223, 255, 240, 255, 255, 128},
+ { 15, 157, 217, 255, 218, 219, 255, 239, 255, 255, 128},
+ { 1, 114, 182, 252, 198, 207, 255, 235, 255, 255, 128},
+ { 1, 71, 122, 238, 154, 181, 255, 216, 255, 255, 128}
+ }
+ }
+};
+
+static const vp9_prob
+ default_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]
+ [COEF_BANDS]
+ [PREV_COEF_CONTEXTS]
+ [ENTROPY_NODES] = {
+ { /* block Type 0 */
+ { /* Coeff Band 0 */
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+ },
+ { /* Coeff Band 1 */
+ { 60, 140, 195, 255, 212, 214, 128, 128, 128, 128, 128},
+ { 75, 221, 231, 255, 203, 255, 128, 128, 128, 128, 128},
+ { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128},
+ { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128}
+ },
+ { /* Coeff Band 2 */
+ { 1, 227, 226, 255, 215, 215, 128, 128, 128, 128, 128},
+ { 5, 163, 209, 255, 212, 212, 255, 255, 128, 128, 128},
+ { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128},
+ { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128}
+ },
+ { /* Coeff Band 3 */
+ { 1, 226, 225, 255, 228, 236, 128, 128, 128, 128, 128},
+ { 6, 163, 208, 255, 224, 234, 255, 255, 128, 128, 128},
+ { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128},
+ { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128}
+ },
+ { /* Coeff Band 4 */
+ { 1, 222, 197, 254, 193, 216, 255, 236, 128, 128, 128},
+ { 7, 140, 163, 251, 195, 211, 255, 238, 128, 128, 128},
+ { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128},
+ { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128}
+ },
+ { /* Coeff Band 5 */
+ { 1, 226, 218, 255, 216, 241, 255, 255, 128, 128, 128},
+ { 6, 154, 191, 255, 218, 240, 255, 255, 128, 128, 128},
+ { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128},
+ { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}
+ },
+ { /* Coeff Band 6 */
+ { 1, 221, 217, 255, 208, 217, 255, 232, 128, 128, 128},
+ { 11, 155, 189, 254, 203, 211, 255, 249, 128, 128, 128},
+ { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128},
+ { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}
+ },
+ { /* Coeff Band 7 */
+ { 1, 207, 235, 255, 232, 240, 128, 128, 128, 128, 128},
+ { 58, 161, 216, 255, 229, 235, 255, 255, 128, 128, 128},
+ { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128},
+ { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128}
+ }
+ },
+ { /* block Type 1 */
+ { /* Coeff Band 0 */
+ { 1, 30, 103, 204, 142, 168, 235, 161, 255, 228, 128},
+ { 1, 35, 90, 192, 130, 161, 227, 158, 255, 226, 255},
+ { 1, 36, 78, 180, 122, 156, 221, 153, 255, 222, 255},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+ },
+ { /* Coeff Band 1 */
+ { 1, 163, 228, 253, 212, 194, 255, 205, 128, 128, 128},
+ { 67, 160, 226, 253, 210, 202, 245, 172, 255, 255, 128},
+ { 51, 147, 219, 251, 207, 207, 255, 217, 128, 128, 128},
+ { 25, 107, 175, 245, 183, 190, 254, 209, 255, 255, 128}
+ },
+ { /* Coeff Band 2 */
+ { 1, 66, 170, 240, 177, 186, 252, 203, 255, 245, 128},
+ { 23, 64, 145, 230, 161, 177, 252, 198, 255, 255, 128},
+ { 6, 51, 99, 208, 135, 163, 249, 178, 255, 248, 128},
+ { 1, 33, 59, 161, 104, 151, 238, 164, 255, 237, 128}
+ },
+ { /* Coeff Band 3 */
+ { 1, 76, 216, 250, 198, 199, 255, 226, 255, 255, 128},
+ { 86, 83, 200, 247, 189, 193, 255, 224, 255, 255, 128},
+ { 30, 75, 164, 242, 172, 184, 254, 218, 255, 255, 128},
+ { 3, 54, 103, 227, 140, 172, 253, 201, 255, 255, 128}
+ },
+ { /* Coeff Band 4 */
+ { 1, 241, 247, 255, 233, 223, 255, 255, 128, 128, 128},
+ { 78, 212, 242, 255, 226, 230, 255, 255, 128, 128, 128},
+ { 10, 167, 224, 255, 217, 225, 255, 128, 128, 128, 128},
+ { 1, 104, 176, 250, 166, 219, 255, 255, 128, 128, 128}
+ },
+ { /* Coeff Band 5 */
+ { 1, 194, 241, 254, 228, 214, 248, 237, 255, 255, 128},
+ { 95, 133, 228, 254, 218, 215, 255, 229, 128, 128, 128},
+ { 24, 119, 201, 252, 202, 205, 255, 229, 128, 128, 128},
+ { 1, 88, 155, 246, 183, 193, 255, 205, 128, 128, 128}
+ },
+ { /* Coeff Band 6 */
+ { 1, 204, 236, 255, 222, 220, 255, 239, 128, 128, 128},
+ { 126, 105, 225, 254, 214, 217, 255, 254, 128, 128, 128},
+ { 44, 86, 190, 251, 197, 204, 255, 233, 128, 128, 128},
+ { 6, 71, 130, 240, 164, 188, 255, 246, 128, 128, 128}
+ },
+ { /* Coeff Band 7 */
+ { 1, 195, 250, 255, 239, 197, 128, 128, 128, 128, 128},
+ { 167, 102, 249, 255, 234, 255, 128, 128, 128, 128, 128},
+ { 65, 91, 222, 255, 217, 255, 128, 128, 128, 128, 128},
+ { 1, 59, 128, 255, 154, 255, 128, 128, 128, 128, 128}
+ }
+ },
+ { /* block Type 2 */
+ { /* Coeff Band 0 */
+ { 1, 30, 103, 204, 142, 168, 235, 161, 255, 228, 128},
+ { 1, 35, 90, 192, 130, 161, 227, 158, 255, 226, 255},
+ { 1, 36, 78, 180, 122, 156, 221, 153, 255, 222, 255},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+ },
+ { /* Coeff Band 1 */
+ { 1, 163, 228, 253, 212, 194, 255, 205, 128, 128, 128},
+ { 67, 160, 226, 253, 210, 202, 245, 172, 255, 255, 128},
+ { 51, 147, 219, 251, 207, 207, 255, 217, 128, 128, 128},
+ { 25, 107, 175, 245, 183, 190, 254, 209, 255, 255, 128}
+ },
+ { /* Coeff Band 2 */
+ { 1, 66, 170, 240, 177, 186, 252, 203, 255, 245, 128},
+ { 23, 64, 145, 230, 161, 177, 252, 198, 255, 255, 128},
+ { 6, 51, 99, 208, 135, 163, 249, 178, 255, 248, 128},
+ { 1, 33, 59, 161, 104, 151, 238, 164, 255, 237, 128}
+ },
+ { /* Coeff Band 3 */
+ { 1, 76, 216, 250, 198, 199, 255, 226, 255, 255, 128},
+ { 86, 83, 200, 247, 189, 193, 255, 224, 255, 255, 128},
+ { 30, 75, 164, 242, 172, 184, 254, 218, 255, 255, 128},
+ { 3, 54, 103, 227, 140, 172, 253, 201, 255, 255, 128}
+ },
+ { /* Coeff Band 4 */
+ { 1, 241, 247, 255, 233, 223, 255, 255, 128, 128, 128},
+ { 78, 212, 242, 255, 226, 230, 255, 255, 128, 128, 128},
+ { 10, 167, 224, 255, 217, 225, 255, 128, 128, 128, 128},
+ { 1, 104, 176, 250, 166, 219, 255, 255, 128, 128, 128}
+ },
+ { /* Coeff Band 5 */
+ { 1, 194, 241, 254, 228, 214, 248, 237, 255, 255, 128},
+ { 95, 133, 228, 254, 218, 215, 255, 229, 128, 128, 128},
+ { 24, 119, 201, 252, 202, 205, 255, 229, 128, 128, 128},
+ { 1, 88, 155, 246, 183, 193, 255, 205, 128, 128, 128}
+ },
+ { /* Coeff Band 6 */
+ { 1, 204, 236, 255, 222, 220, 255, 239, 128, 128, 128},
+ { 126, 105, 225, 254, 214, 217, 255, 254, 128, 128, 128},
+ { 44, 86, 190, 251, 197, 204, 255, 233, 128, 128, 128},
+ { 6, 71, 130, 240, 164, 188, 255, 246, 128, 128, 128}
+ },
+ { /* Coeff Band 7 */
+ { 1, 195, 250, 255, 239, 197, 128, 128, 128, 128, 128},
+ { 167, 102, 249, 255, 234, 255, 128, 128, 128, 128, 128},
+ { 65, 91, 222, 255, 217, 255, 128, 128, 128, 128, 128},
+ { 1, 59, 128, 255, 154, 255, 128, 128, 128, 128, 128}
+ }
+ },
+ { /* block Type 3 */
+ { /* Coeff Band 0 */
+ { 17, 105, 227, 195, 164, 170, 168, 137, 221, 160, 184},
+ { 6, 92, 166, 193, 158, 169, 179, 142, 236, 175, 200},
+ { 2, 68, 118, 193, 147, 168, 187, 149, 241, 178, 247},
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+ },
+ { /* Coeff Band 1 */
+ { 1, 193, 221, 246, 198, 194, 244, 176, 255, 192, 128},
+ { 112, 160, 209, 244, 196, 194, 243, 175, 255, 209, 128},
+ { 45, 123, 175, 240, 184, 195, 239, 178, 255, 218, 255},
+ { 16, 53, 75, 169, 119, 152, 209, 146, 255, 219, 255}
+ },
+ { /* Coeff Band 2 */
+ { 1, 141, 183, 240, 176, 187, 246, 198, 255, 218, 128},
+ { 36, 97, 150, 231, 161, 180, 243, 191, 255, 217, 255},
+ { 8, 65, 111, 210, 143, 166, 230, 167, 255, 224, 255},
+ { 2, 35, 61, 157, 113, 149, 208, 142, 255, 217, 255}
+ },
+ { /* Coeff Band 3 */
+ { 1, 173, 196, 245, 184, 191, 252, 211, 255, 240, 128},
+ { 35, 119, 175, 242, 177, 187, 252, 209, 255, 235, 128},
+ { 4, 88, 141, 234, 161, 180, 249, 200, 255, 228, 128},
+ { 1, 57, 95, 203, 133, 161, 235, 167, 255, 231, 255}
+ },
+ { /* Coeff Band 4 */
+ { 1, 208, 227, 249, 209, 204, 248, 188, 255, 248, 128},
+ { 28, 162, 211, 247, 203, 200, 252, 188, 255, 232, 128},
+ { 5, 114, 174, 238, 182, 189, 245, 184, 255, 238, 128},
+ { 1, 61, 100, 205, 136, 164, 235, 163, 255, 239, 128}
+ },
+ { /* Coeff Band 5 */
+ { 1, 195, 218, 252, 208, 207, 250, 205, 255, 245, 128},
+ { 22, 141, 196, 249, 198, 201, 250, 202, 255, 244, 128},
+ { 2, 105, 163, 240, 178, 189, 246, 191, 255, 246, 128},
+ { 1, 70, 112, 206, 144, 167, 232, 162, 255, 239, 128}
+ },
+ { /* Coeff Band 6 */
+ { 1, 204, 215, 251, 204, 203, 255, 222, 255, 225, 128},
+ { 15, 140, 194, 249, 194, 199, 254, 221, 255, 253, 128},
+ { 1, 95, 153, 243, 172, 188, 254, 213, 255, 248, 128},
+ { 1, 59, 99, 216, 135, 166, 247, 190, 255, 237, 255}
+ },
+ { /* Coeff Band 7 */
+ { 1, 7, 231, 255, 227, 223, 255, 240, 255, 255, 128},
+ { 15, 157, 217, 255, 218, 219, 255, 239, 255, 255, 128},
+ { 1, 114, 182, 252, 198, 207, 255, 235, 255, 255, 128},
+ { 1, 71, 122, 238, 154, 181, 255, 216, 255, 255, 128}
+ }
+ }
+};
diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c
new file mode 100644
index 0000000..4832b4e
--- /dev/null
+++ b/vp9/common/vp9_entropy.c
@@ -0,0 +1,447 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <stdio.h>
+
+#include "vp9/common/vp9_entropy.h"
+#include "string.h"
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/common/vp9_entropymode.h"
+#include "vpx_mem/vpx_mem.h"
+
+#define uchar unsigned char /* typedefs can clash */
+#define uint unsigned int
+
+typedef const uchar cuchar;
+typedef const uint cuint;
+
+typedef vp9_prob Prob;
+
+#include "vp9/common/vp9_coefupdateprobs.h"
+
+const int vp9_i8x8_block[4] = {0, 2, 8, 10};
+
+DECLARE_ALIGNED(16, const unsigned char, vp9_norm[256]) = {
+ 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+DECLARE_ALIGNED(16, const int, vp9_coef_bands[16]) = {
+ 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7
+};
+
+DECLARE_ALIGNED(16, cuchar, vp9_prev_token_class[MAX_ENTROPY_TOKENS]) = {
+ 0, 1, 2, 2, 3, 3, 3, 3, 3, 3, 3, 0
+};
+
+DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d[16]) = {
+ 0, 1, 4, 8,
+ 5, 2, 3, 6,
+ 9, 12, 13, 10,
+ 7, 11, 14, 15,
+};
+
+DECLARE_ALIGNED(16, const int, vp9_col_scan[16]) = {
+ 0, 4, 8, 12,
+ 1, 5, 9, 13,
+ 2, 6, 10, 14,
+ 3, 7, 11, 15
+};
+DECLARE_ALIGNED(16, const int, vp9_row_scan[16]) = {
+ 0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 8, 9, 10, 11,
+ 12, 13, 14, 15
+};
+
+
+DECLARE_ALIGNED(64, const int, vp9_coef_bands_8x8[64]) = { 0, 1, 2, 3, 5, 4, 4, 5,
+ 5, 3, 6, 3, 5, 4, 6, 6,
+ 6, 5, 5, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7
+ };
+DECLARE_ALIGNED(64, const int, vp9_default_zig_zag1d_8x8[64]) = {
+ 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63,
+};
+
+// Table can be optimized.
+DECLARE_ALIGNED(16, const int, vp9_coef_bands_16x16[256]) = {
+ 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6,
+ 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+};
+DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]) = {
+ 0, 1, 16, 32, 17, 2, 3, 18, 33, 48, 64, 49, 34, 19, 4, 5,
+ 20, 35, 50, 65, 80, 96, 81, 66, 51, 36, 21, 6, 7, 22, 37, 52,
+ 67, 82, 97, 112, 128, 113, 98, 83, 68, 53, 38, 23, 8, 9, 24, 39,
+ 54, 69, 84, 99, 114, 129, 144, 160, 145, 130, 115, 100, 85, 70, 55, 40,
+ 25, 10, 11, 26, 41, 56, 71, 86, 101, 116, 131, 146, 161, 176, 192, 177,
+ 162, 147, 132, 117, 102, 87, 72, 57, 42, 27, 12, 13, 28, 43, 58, 73,
+ 88, 103, 118, 133, 148, 163, 178, 193, 208, 224, 209, 194, 179, 164, 149, 134,
+ 119, 104, 89, 74, 59, 44, 29, 14, 15, 30, 45, 60, 75, 90, 105, 120,
+ 135, 150, 165, 180, 195, 210, 225, 240, 241, 226, 211, 196, 181, 166, 151, 136,
+ 121, 106, 91, 76, 61, 46, 31, 47, 62, 77, 92, 107, 122, 137, 152, 167,
+ 182, 197, 212, 227, 242, 243, 228, 213, 198, 183, 168, 153, 138, 123, 108, 93,
+ 78, 63, 79, 94, 109, 124, 139, 154, 169, 184, 199, 214, 229, 244, 245, 230,
+ 215, 200, 185, 170, 155, 140, 125, 110, 95, 111, 126, 141, 156, 171, 186, 201,
+ 216, 231, 246, 247, 232, 217, 202, 187, 172, 157, 142, 127, 143, 158, 173, 188,
+ 203, 218, 233, 248, 249, 234, 219, 204, 189, 174, 159, 175, 190, 205, 220, 235,
+ 250, 251, 236, 221, 206, 191, 207, 222, 237, 252, 253, 238, 223, 239, 254, 255,
+};
+
+
+/* Array indices are identical to previously-existing CONTEXT_NODE indices */
+
+const vp9_tree_index vp9_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */
+{
+ -DCT_EOB_TOKEN, 2, /* 0 = EOB */
+ -ZERO_TOKEN, 4, /* 1 = ZERO */
+ -ONE_TOKEN, 6, /* 2 = ONE */
+ 8, 12, /* 3 = LOW_VAL */
+ -TWO_TOKEN, 10, /* 4 = TWO */
+ -THREE_TOKEN, -FOUR_TOKEN, /* 5 = THREE */
+ 14, 16, /* 6 = HIGH_LOW */
+ -DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2, /* 7 = CAT_ONE */
+ 18, 20, /* 8 = CAT_THREEFOUR */
+ -DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4, /* 9 = CAT_THREE */
+ -DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */
+};
+
+struct vp9_token_struct vp9_coef_encodings[MAX_ENTROPY_TOKENS];
+
+/* Trees for extra bits. Probabilities are constant and
+ do not depend on previously encoded bits */
+
+static const Prob Pcat1[] = { 159};
+static const Prob Pcat2[] = { 165, 145};
+static const Prob Pcat3[] = { 173, 148, 140};
+static const Prob Pcat4[] = { 176, 155, 140, 135};
+static const Prob Pcat5[] = { 180, 157, 141, 134, 130};
+static const Prob Pcat6[] =
+{ 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129};
+
+static vp9_tree_index cat1[2], cat2[4], cat3[6], cat4[8], cat5[10], cat6[26];
+
+static void init_bit_tree(vp9_tree_index *p, int n) {
+ int i = 0;
+
+ while (++i < n) {
+ p[0] = p[1] = i << 1;
+ p += 2;
+ }
+
+ p[0] = p[1] = 0;
+}
+
+static void init_bit_trees() {
+ init_bit_tree(cat1, 1);
+ init_bit_tree(cat2, 2);
+ init_bit_tree(cat3, 3);
+ init_bit_tree(cat4, 4);
+ init_bit_tree(cat5, 5);
+ init_bit_tree(cat6, 13);
+}
+
+vp9_extra_bit_struct vp9_extra_bits[12] = {
+ { 0, 0, 0, 0},
+ { 0, 0, 0, 1},
+ { 0, 0, 0, 2},
+ { 0, 0, 0, 3},
+ { 0, 0, 0, 4},
+ { cat1, Pcat1, 1, 5},
+ { cat2, Pcat2, 2, 7},
+ { cat3, Pcat3, 3, 11},
+ { cat4, Pcat4, 4, 19},
+ { cat5, Pcat5, 5, 35},
+ { cat6, Pcat6, 13, 67},
+ { 0, 0, 0, 0}
+};
+
+#include "vp9/common/vp9_default_coef_probs.h"
+
+void vp9_default_coef_probs(VP9_COMMON *pc) {
+ vpx_memcpy(pc->fc.coef_probs, default_coef_probs,
+ sizeof(pc->fc.coef_probs));
+ vpx_memcpy(pc->fc.hybrid_coef_probs, default_hybrid_coef_probs,
+ sizeof(pc->fc.hybrid_coef_probs));
+
+ vpx_memcpy(pc->fc.coef_probs_8x8, default_coef_probs_8x8,
+ sizeof(pc->fc.coef_probs_8x8));
+ vpx_memcpy(pc->fc.hybrid_coef_probs_8x8, default_hybrid_coef_probs_8x8,
+ sizeof(pc->fc.hybrid_coef_probs_8x8));
+
+ vpx_memcpy(pc->fc.coef_probs_16x16, default_coef_probs_16x16,
+ sizeof(pc->fc.coef_probs_16x16));
+ vpx_memcpy(pc->fc.hybrid_coef_probs_16x16,
+ default_hybrid_coef_probs_16x16,
+ sizeof(pc->fc.hybrid_coef_probs_16x16));
+}
+
+void vp9_coef_tree_initialize() {
+ init_bit_trees();
+ vp9_tokens_from_tree(vp9_coef_encodings, vp9_coef_tree);
+}
+
+// #define COEF_COUNT_TESTING
+
+#define COEF_COUNT_SAT 24
+#define COEF_MAX_UPDATE_FACTOR 112
+#define COEF_COUNT_SAT_KEY 24
+#define COEF_MAX_UPDATE_FACTOR_KEY 112
+#define COEF_COUNT_SAT_AFTER_KEY 24
+#define COEF_MAX_UPDATE_FACTOR_AFTER_KEY 128
+
+void vp9_adapt_coef_probs(VP9_COMMON *cm) {
+ int t, i, j, k, count;
+ unsigned int branch_ct[ENTROPY_NODES][2];
+ vp9_prob coef_probs[ENTROPY_NODES];
+ int update_factor; /* denominator 256 */
+ int factor;
+ int count_sat;
+
+ // printf("Frame type: %d\n", cm->frame_type);
+ if (cm->frame_type == KEY_FRAME) {
+ update_factor = COEF_MAX_UPDATE_FACTOR_KEY;
+ count_sat = COEF_COUNT_SAT_KEY;
+ } else if (cm->last_frame_type == KEY_FRAME) {
+ update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY; /* adapt quickly */
+ count_sat = COEF_COUNT_SAT_AFTER_KEY;
+ } else {
+ update_factor = COEF_MAX_UPDATE_FACTOR;
+ count_sat = COEF_COUNT_SAT;
+ }
+
+#ifdef COEF_COUNT_TESTING
+ {
+ printf("static const unsigned int\ncoef_counts"
+ "[BLOCK_TYPES] [COEF_BANDS]"
+ "[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS] = {\n");
+ for (i = 0; i < BLOCK_TYPES; ++i) {
+ printf(" {\n");
+ for (j = 0; j < COEF_BANDS; ++j) {
+ printf(" {\n");
+ for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
+ printf(" {");
+ for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
+ printf("%d, ", cm->fc.coef_counts[i][j][k][t]);
+ printf("},\n");
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf("};\n");
+ printf("static const unsigned int\ncoef_counts_8x8"
+ "[BLOCK_TYPES_8X8] [COEF_BANDS]"
+ "[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS] = {\n");
+ for (i = 0; i < BLOCK_TYPES_8X8; ++i) {
+ printf(" {\n");
+ for (j = 0; j < COEF_BANDS; ++j) {
+ printf(" {\n");
+ for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
+ printf(" {");
+ for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
+ printf("%d, ", cm->fc.coef_counts_8x8[i][j][k][t]);
+ printf("},\n");
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf("};\n");
+ printf("static const unsigned int\nhybrid_coef_counts"
+ "[BLOCK_TYPES] [COEF_BANDS]"
+ "[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS] = {\n");
+ for (i = 0; i < BLOCK_TYPES; ++i) {
+ printf(" {\n");
+ for (j = 0; j < COEF_BANDS; ++j) {
+ printf(" {\n");
+ for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
+ printf(" {");
+ for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
+ printf("%d, ", cm->fc.hybrid_coef_counts[i][j][k][t]);
+ printf("},\n");
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf("};\n");
+ }
+#endif
+
+ for (i = 0; i < BLOCK_TYPES; ++i)
+ for (j = 0; j < COEF_BANDS; ++j)
+ for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
+ if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
+ continue;
+ vp9_tree_probs_from_distribution(
+ MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree,
+ coef_probs, branch_ct, cm->fc.coef_counts [i][j][k],
+ 256, 1);
+ for (t = 0; t < ENTROPY_NODES; ++t) {
+ int prob;
+ count = branch_ct[t][0] + branch_ct[t][1];
+ count = count > count_sat ? count_sat : count;
+ factor = (update_factor * count / count_sat);
+ prob = ((int)cm->fc.pre_coef_probs[i][j][k][t] * (256 - factor) +
+ (int)coef_probs[t] * factor + 128) >> 8;
+ if (prob <= 0) cm->fc.coef_probs[i][j][k][t] = 1;
+ else if (prob > 255) cm->fc.coef_probs[i][j][k][t] = 255;
+ else cm->fc.coef_probs[i][j][k][t] = prob;
+ }
+ }
+
+ for (i = 0; i < BLOCK_TYPES; ++i)
+ for (j = 0; j < COEF_BANDS; ++j)
+ for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
+ if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
+ continue;
+ vp9_tree_probs_from_distribution(
+ MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree,
+ coef_probs, branch_ct, cm->fc.hybrid_coef_counts [i][j][k],
+ 256, 1);
+ for (t = 0; t < ENTROPY_NODES; ++t) {
+ int prob;
+ count = branch_ct[t][0] + branch_ct[t][1];
+ count = count > count_sat ? count_sat : count;
+ factor = (update_factor * count / count_sat);
+ prob = ((int)cm->fc.pre_hybrid_coef_probs[i][j][k][t] * (256 - factor) +
+ (int)coef_probs[t] * factor + 128) >> 8;
+ if (prob <= 0) cm->fc.hybrid_coef_probs[i][j][k][t] = 1;
+ else if (prob > 255) cm->fc.hybrid_coef_probs[i][j][k][t] = 255;
+ else cm->fc.hybrid_coef_probs[i][j][k][t] = prob;
+ }
+ }
+
+ for (i = 0; i < BLOCK_TYPES_8X8; ++i)
+ for (j = 0; j < COEF_BANDS; ++j)
+ for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
+ if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
+ continue;
+ vp9_tree_probs_from_distribution(
+ MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree,
+ coef_probs, branch_ct, cm->fc.coef_counts_8x8 [i][j][k],
+ 256, 1);
+ for (t = 0; t < ENTROPY_NODES; ++t) {
+ int prob;
+ count = branch_ct[t][0] + branch_ct[t][1];
+ count = count > count_sat ? count_sat : count;
+ factor = (update_factor * count / count_sat);
+ prob = ((int)cm->fc.pre_coef_probs_8x8[i][j][k][t] * (256 - factor) +
+ (int)coef_probs[t] * factor + 128) >> 8;
+ if (prob <= 0) cm->fc.coef_probs_8x8[i][j][k][t] = 1;
+ else if (prob > 255) cm->fc.coef_probs_8x8[i][j][k][t] = 255;
+ else cm->fc.coef_probs_8x8[i][j][k][t] = prob;
+ }
+ }
+
+ for (i = 0; i < BLOCK_TYPES_8X8; ++i)
+ for (j = 0; j < COEF_BANDS; ++j)
+ for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
+ if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
+ continue;
+ vp9_tree_probs_from_distribution(
+ MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree,
+ coef_probs, branch_ct, cm->fc.hybrid_coef_counts_8x8 [i][j][k],
+ 256, 1);
+ for (t = 0; t < ENTROPY_NODES; ++t) {
+ int prob;
+ count = branch_ct[t][0] + branch_ct[t][1];
+ count = count > count_sat ? count_sat : count;
+ factor = (update_factor * count / count_sat);
+ prob = ((int)cm->fc.pre_hybrid_coef_probs_8x8[i][j][k][t] *
+ (256 - factor) +
+ (int)coef_probs[t] * factor + 128) >> 8;
+ if (prob <= 0) cm->fc.hybrid_coef_probs_8x8[i][j][k][t] = 1;
+ else if (prob > 255) cm->fc.hybrid_coef_probs_8x8[i][j][k][t] = 255;
+ else cm->fc.hybrid_coef_probs_8x8[i][j][k][t] = prob;
+ }
+ }
+
+ for (i = 0; i < BLOCK_TYPES_16X16; ++i)
+ for (j = 0; j < COEF_BANDS; ++j)
+ for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
+ if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
+ continue;
+ vp9_tree_probs_from_distribution(
+ MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree,
+ coef_probs, branch_ct, cm->fc.coef_counts_16x16[i][j][k], 256, 1);
+ for (t = 0; t < ENTROPY_NODES; ++t) {
+ int prob;
+ count = branch_ct[t][0] + branch_ct[t][1];
+ count = count > count_sat ? count_sat : count;
+ factor = (update_factor * count / count_sat);
+ prob = ((int)cm->fc.pre_coef_probs_16x16[i][j][k][t] *
+ (256 - factor) +
+ (int)coef_probs[t] * factor + 128) >> 8;
+ if (prob <= 0) cm->fc.coef_probs_16x16[i][j][k][t] = 1;
+ else if (prob > 255) cm->fc.coef_probs_16x16[i][j][k][t] = 255;
+ else cm->fc.coef_probs_16x16[i][j][k][t] = prob;
+ }
+ }
+
+ for (i = 0; i < BLOCK_TYPES_16X16; ++i)
+ for (j = 0; j < COEF_BANDS; ++j)
+ for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
+ if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
+ continue;
+ vp9_tree_probs_from_distribution(
+ MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree,
+ coef_probs, branch_ct, cm->fc.hybrid_coef_counts_16x16[i][j][k], 256, 1);
+ for (t = 0; t < ENTROPY_NODES; ++t) {
+ int prob;
+ count = branch_ct[t][0] + branch_ct[t][1];
+ count = count > count_sat ? count_sat : count;
+ factor = (update_factor * count / count_sat);
+ prob = ((int)cm->fc.pre_hybrid_coef_probs_16x16[i][j][k][t] * (256 - factor) +
+ (int)coef_probs[t] * factor + 128) >> 8;
+ if (prob <= 0) cm->fc.hybrid_coef_probs_16x16[i][j][k][t] = 1;
+ else if (prob > 255) cm->fc.hybrid_coef_probs_16x16[i][j][k][t] = 255;
+ else cm->fc.hybrid_coef_probs_16x16[i][j][k][t] = prob;
+ }
+ }
+}
diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h
new file mode 100644
index 0000000..3c74de7
--- /dev/null
+++ b/vp9/common/vp9_entropy.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_COMMON_VP9_ENTROPY_H_
+#define VP9_COMMON_VP9_ENTROPY_H_
+
+#include "vp9/common/vp9_treecoder.h"
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_coefupdateprobs.h"
+
+extern const int vp9_i8x8_block[4];
+
+/* Coefficient token alphabet */
+
+#define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */
+#define ONE_TOKEN 1 /* 1 Extra Bits 0+1 */
+#define TWO_TOKEN 2 /* 2 Extra Bits 0+1 */
+#define THREE_TOKEN 3 /* 3 Extra Bits 0+1 */
+#define FOUR_TOKEN 4 /* 4 Extra Bits 0+1 */
+#define DCT_VAL_CATEGORY1 5 /* 5-6 Extra Bits 1+1 */
+#define DCT_VAL_CATEGORY2 6 /* 7-10 Extra Bits 2+1 */
+#define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */
+#define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */
+#define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */
+#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 13+1 */
+#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */
+#define MAX_ENTROPY_TOKENS 12
+#define ENTROPY_NODES 11
+#define EOSB_TOKEN 127 /* Not signalled, encoder only */
+
+#define INTER_MODE_CONTEXTS 7
+
+extern const vp9_tree_index vp9_coef_tree[];
+
+extern struct vp9_token_struct vp9_coef_encodings[MAX_ENTROPY_TOKENS];
+
+typedef struct {
+ vp9_tree_p tree;
+ const vp9_prob *prob;
+ int Len;
+ int base_val;
+} vp9_extra_bit_struct;
+
+extern vp9_extra_bit_struct vp9_extra_bits[12]; /* indexed by token value */
+
+#define PROB_UPDATE_BASELINE_COST 7
+
+#define MAX_PROB 255
+#define DCT_MAX_VALUE 8192
+
+/* Coefficients are predicted via a 3-dimensional probability table. */
+
+/* Outside dimension. 0 = Y no DC, 1 = Y2, 2 = UV, 3 = Y with DC */
+#define BLOCK_TYPES 4
+
+#define BLOCK_TYPES_8X8 4
+
+#define BLOCK_TYPES_16X16 4
+
+/* Middle dimension is a coarsening of the coefficient's
+ position within the 4x4 DCT. */
+
+#define COEF_BANDS 8
+extern DECLARE_ALIGNED(16, const int, vp9_coef_bands[16]);
+extern DECLARE_ALIGNED(64, const int, vp9_coef_bands_8x8[64]);
+extern DECLARE_ALIGNED(16, const int, vp9_coef_bands_16x16[256]);
+
+/* Inside dimension is 3-valued measure of nearby complexity, that is,
+ the extent to which nearby coefficients are nonzero. For the first
+ coefficient (DC, unless block type is 0), we look at the (already encoded)
+ blocks above and to the left of the current block. The context index is
+ then the number (0,1,or 2) of these blocks having nonzero coefficients.
+ After decoding a coefficient, the measure is roughly the size of the
+ most recently decoded coefficient (0 for 0, 1 for 1, 2 for >1).
+ Note that the intuitive meaning of this measure changes as coefficients
+ are decoded, e.g., prior to the first token, a zero means that my neighbors
+ are empty while, after the first token, because of the use of end-of-block,
+ a zero means we just decoded a zero and hence guarantees that a non-zero
+ coefficient will appear later in this block. However, this shift
+ in meaning is perfectly OK because our context depends also on the
+ coefficient band (and since zigzag positions 0, 1, and 2 are in
+ distinct bands). */
+
+/*# define DC_TOKEN_CONTEXTS 3*/ /* 00, 0!0, !0!0 */
+#define PREV_COEF_CONTEXTS 4
+
+#define SUBEXP_PARAM 4 /* Subexponential code parameter */
+#define MODULUS_PARAM 13 /* Modulus parameter */
+
+extern DECLARE_ALIGNED(16, const unsigned char, vp9_prev_token_class[MAX_ENTROPY_TOKENS]);
+
+struct VP9Common;
+void vp9_default_coef_probs(struct VP9Common *);
+extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d[16]);
+
+extern DECLARE_ALIGNED(16, const int, vp9_col_scan[16]);
+extern DECLARE_ALIGNED(16, const int, vp9_row_scan[16]);
+
+extern DECLARE_ALIGNED(64, const int, vp9_default_zig_zag1d_8x8[64]);
+void vp9_coef_tree_initialize(void);
+
+extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]);
+void vp9_adapt_coef_probs(struct VP9Common *);
+
+#endif
diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c
new file mode 100644
index 0000000..cda2176
--- /dev/null
+++ b/vp9/common/vp9_entropymode.c
@@ -0,0 +1,705 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/common/vp9_modecont.h"
+#include "vpx_mem/vpx_mem.h"
+
+
+static const unsigned int kf_y_mode_cts[8][VP9_YMODES] = {
+ /* DC V H D45 135 117 153 D27 D63 TM i8x8 BPRED */
+ {12, 6, 5, 5, 5, 5, 5, 5, 5, 2, 22, 200},
+ {25, 13, 13, 7, 7, 7, 7, 7, 7, 6, 27, 160},
+ {31, 17, 18, 8, 8, 8, 8, 8, 8, 9, 26, 139},
+ {40, 22, 23, 8, 8, 8, 8, 8, 8, 12, 27, 116},
+ {53, 26, 28, 8, 8, 8, 8, 8, 8, 13, 26, 94},
+ {68, 33, 35, 8, 8, 8, 8, 8, 8, 17, 20, 68},
+ {78, 38, 38, 8, 8, 8, 8, 8, 8, 19, 16, 52},
+ {89, 42, 42, 8, 8, 8, 8, 8, 8, 21, 12, 34},
+};
+
+static const unsigned int y_mode_cts [VP9_YMODES] = {
+ /* DC V H D45 135 117 153 D27 D63 TM i8x8 BPRED */
+ 98, 19, 15, 14, 14, 14, 14, 12, 12, 13, 16, 70
+};
+
+static const unsigned int uv_mode_cts [VP9_YMODES] [VP9_UV_MODES] = {
+ /* DC V H D45 135 117 153 D27 D63 TM */
+ { 200, 15, 15, 10, 10, 10, 10, 10, 10, 6}, /* DC */
+ { 130, 75, 10, 10, 10, 10, 10, 10, 10, 6}, /* V */
+ { 130, 10, 75, 10, 10, 10, 10, 10, 10, 6}, /* H */
+ { 130, 15, 10, 75, 10, 10, 10, 10, 10, 6}, /* D45 */
+ { 150, 15, 10, 10, 75, 10, 10, 10, 10, 6}, /* D135 */
+ { 150, 15, 10, 10, 10, 75, 10, 10, 10, 6}, /* D117 */
+ { 150, 15, 10, 10, 10, 10, 75, 10, 10, 6}, /* D153 */
+ { 150, 15, 10, 10, 10, 10, 10, 75, 10, 6}, /* D27 */
+ { 150, 15, 10, 10, 10, 10, 10, 10, 75, 6}, /* D63 */
+ { 160, 30, 30, 10, 10, 10, 10, 10, 10, 16}, /* TM */
+ { 132, 46, 40, 10, 10, 10, 10, 10, 10, 18}, /* i8x8 - never used */
+ { 150, 35, 41, 10, 10, 10, 10, 10, 10, 10}, /* BPRED */
+};
+
+static const unsigned int i8x8_mode_cts [VP9_I8X8_MODES] = {
+ /* DC V H D45 135 117 153 D27 D63 TM */
+ 73, 49, 61, 30, 30, 30, 30, 30, 30, 13
+};
+
+static const unsigned int kf_uv_mode_cts [VP9_YMODES] [VP9_UV_MODES] = {
+ // DC V H D45 135 117 153 D27 D63 TM
+ { 160, 24, 24, 20, 20, 20, 20, 20, 20, 8}, /* DC */
+ { 102, 64, 30, 20, 20, 20, 20, 20, 20, 10}, /* V */
+ { 102, 30, 64, 20, 20, 20, 20, 20, 20, 10}, /* H */
+ { 102, 33, 20, 64, 20, 20, 20, 20, 20, 14}, /* D45 */
+ { 102, 33, 20, 20, 64, 20, 20, 20, 20, 14}, /* D135 */
+ { 122, 33, 20, 20, 20, 64, 20, 20, 20, 14}, /* D117 */
+ { 102, 33, 20, 20, 20, 20, 64, 20, 20, 14}, /* D153 */
+ { 102, 33, 20, 20, 20, 20, 20, 64, 20, 14}, /* D27 */
+ { 102, 33, 20, 20, 20, 20, 20, 20, 64, 14}, /* D63 */
+ { 132, 36, 30, 20, 20, 20, 20, 20, 20, 18}, /* TM */
+ { 122, 41, 35, 20, 20, 20, 20, 20, 20, 18}, /* i8x8 - never used */
+ { 122, 41, 35, 20, 20, 20, 20, 20, 20, 18}, /* BPRED */
+};
+
+static const unsigned int bmode_cts[VP9_NKF_BINTRAMODES] = {
+#if CONFIG_NEWBINTRAMODES
+#if CONTEXT_PRED_REPLACEMENTS == 6
+ /* DC TM VE HE CONTEXT */
+ 43891, 17694, 10036, 3920, 20000
+#elif CONTEXT_PRED_REPLACEMENTS == 4
+ /* DC TM VE HE LD RD CONTEXT */
+ 43891, 17694, 10036, 3920, 3363, 2546, 14000
+#elif CONTEXT_PRED_REPLACEMENTS == 0
+ /* DC TM VE HE LD RD VR VL HD HU CONTEXT */
+ 43891, 17694, 10036, 3920, 3363, 2546, 5119, 3221, 2471, 1723, 50000
+#endif
+#else
+ /* DC TM VE HE LD RD VR VL HD HU */
+ 43891, 17694, 10036, 3920, 3363, 2546, 5119, 3221, 2471, 1723
+#endif
+};
+
+typedef enum {
+ SUBMVREF_NORMAL,
+ SUBMVREF_LEFT_ZED,
+ SUBMVREF_ABOVE_ZED,
+ SUBMVREF_LEFT_ABOVE_SAME,
+ SUBMVREF_LEFT_ABOVE_ZED
+} sumvfref_t;
+
+int vp9_mv_cont(const int_mv *l, const int_mv *a) {
+ int lez = (l->as_int == 0);
+ int aez = (a->as_int == 0);
+ int lea = (l->as_int == a->as_int);
+
+ if (lea && lez)
+ return SUBMVREF_LEFT_ABOVE_ZED;
+
+ if (lea)
+ return SUBMVREF_LEFT_ABOVE_SAME;
+
+ if (aez)
+ return SUBMVREF_ABOVE_ZED;
+
+ if (lez)
+ return SUBMVREF_LEFT_ZED;
+
+ return SUBMVREF_NORMAL;
+}
+
+const vp9_prob vp9_sub_mv_ref_prob [VP9_SUBMVREFS - 1] = { 180, 162, 25};
+
+const vp9_prob vp9_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP9_SUBMVREFS - 1] = {
+ { 147, 136, 18 },
+ { 106, 145, 1 },
+ { 179, 121, 1 },
+ { 223, 1, 34 },
+ { 208, 1, 1 }
+};
+
+vp9_mbsplit vp9_mbsplits [VP9_NUMMBSPLITS] = {
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 1, 1, 1, 1,
+ 1, 1, 1, 1,
+ }, {
+ 0, 0, 1, 1,
+ 0, 0, 1, 1,
+ 0, 0, 1, 1,
+ 0, 0, 1, 1,
+ }, {
+ 0, 0, 1, 1,
+ 0, 0, 1, 1,
+ 2, 2, 3, 3,
+ 2, 2, 3, 3,
+ }, {
+ 0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 8, 9, 10, 11,
+ 12, 13, 14, 15,
+ },
+};
+
+const int vp9_mbsplit_count [VP9_NUMMBSPLITS] = { 2, 2, 4, 16};
+
+const vp9_prob vp9_mbsplit_probs [VP9_NUMMBSPLITS - 1] = { 110, 111, 150};
+
+/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */
+
+const vp9_tree_index vp9_kf_bmode_tree[VP9_KF_BINTRAMODES * 2 - 2] = {
+ -B_DC_PRED, 2, /* 0 = DC_NODE */
+ -B_TM_PRED, 4, /* 1 = TM_NODE */
+ -B_VE_PRED, 6, /* 2 = VE_NODE */
+ 8, 12, /* 3 = COM_NODE */
+ -B_HE_PRED, 10, /* 4 = HE_NODE */
+ -B_RD_PRED, -B_VR_PRED, /* 5 = RD_NODE */
+ -B_LD_PRED, 14, /* 6 = LD_NODE */
+ -B_VL_PRED, 16, /* 7 = VL_NODE */
+ -B_HD_PRED, -B_HU_PRED /* 8 = HD_NODE */
+};
+
+const vp9_tree_index vp9_bmode_tree[VP9_NKF_BINTRAMODES * 2 - 2] = {
+#if CONFIG_NEWBINTRAMODES
+#if CONTEXT_PRED_REPLACEMENTS == 6
+ -B_DC_PRED, 2,
+ -B_TM_PRED, 4,
+ 6, -(B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS),
+ -B_VE_PRED, -B_HE_PRED
+#elif CONTEXT_PRED_REPLACEMENTS == 4
+ -B_DC_PRED, 2,
+ -B_TM_PRED, 4,
+ 6, 8,
+ -B_VE_PRED, -B_HE_PRED,
+ 10, -(B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS),
+ -B_RD_PRED, -B_LD_PRED,
+#elif CONTEXT_PRED_REPLACEMENTS == 0
+ -B_DC_PRED, 2, /* 0 = DC_NODE */
+ -B_TM_PRED, 4, /* 1 = TM_NODE */
+ -B_VE_PRED, 6, /* 2 = VE_NODE */
+ 8, 12, /* 3 = COM_NODE */
+ -B_HE_PRED, 10, /* 4 = HE_NODE */
+ -B_RD_PRED, -B_VR_PRED, /* 5 = RD_NODE */
+ -B_LD_PRED, 14, /* 6 = LD_NODE */
+ -B_VL_PRED, 16, /* 7 = VL_NODE */
+ -B_HD_PRED, 18,
+ -B_HU_PRED, -B_CONTEXT_PRED
+#endif
+#else
+ -B_DC_PRED, 2, /* 0 = DC_NODE */
+ -B_TM_PRED, 4, /* 1 = TM_NODE */
+ -B_VE_PRED, 6, /* 2 = VE_NODE */
+ 8, 12, /* 3 = COM_NODE */
+ -B_HE_PRED, 10, /* 4 = HE_NODE */
+ -B_RD_PRED, -B_VR_PRED, /* 5 = RD_NODE */
+ -B_LD_PRED, 14, /* 6 = LD_NODE */
+ -B_VL_PRED, 16, /* 7 = VL_NODE */
+ -B_HD_PRED, -B_HU_PRED /* 8 = HD_NODE */
+#endif
+};
+
+/* Again, these trees use the same probability indices as their
+ explicitly-programmed predecessors. */
+const vp9_tree_index vp9_ymode_tree[VP9_YMODES * 2 - 2] = {
+ 2, 14,
+ -DC_PRED, 4,
+ 6, 8,
+ -D45_PRED, -D135_PRED,
+ 10, 12,
+ -D117_PRED, -D153_PRED,
+ -D27_PRED, -D63_PRED,
+ 16, 18,
+ -V_PRED, -H_PRED,
+ -TM_PRED, 20,
+ -B_PRED, -I8X8_PRED
+};
+
+const vp9_tree_index vp9_kf_ymode_tree[VP9_YMODES * 2 - 2] = {
+ 2, 14,
+ -DC_PRED, 4,
+ 6, 8,
+ -D45_PRED, -D135_PRED,
+ 10, 12,
+ -D117_PRED, -D153_PRED,
+ -D27_PRED, -D63_PRED,
+ 16, 18,
+ -V_PRED, -H_PRED,
+ -TM_PRED, 20,
+ -B_PRED, -I8X8_PRED
+};
+
+const vp9_tree_index vp9_i8x8_mode_tree[VP9_I8X8_MODES * 2 - 2] = {
+ 2, 14,
+ -DC_PRED, 4,
+ 6, 8,
+ -D45_PRED, -D135_PRED,
+ 10, 12,
+ -D117_PRED, -D153_PRED,
+ -D27_PRED, -D63_PRED,
+ -V_PRED, 16,
+ -H_PRED, -TM_PRED
+};
+
+const vp9_tree_index vp9_uv_mode_tree[VP9_UV_MODES * 2 - 2] = {
+ 2, 14,
+ -DC_PRED, 4,
+ 6, 8,
+ -D45_PRED, -D135_PRED,
+ 10, 12,
+ -D117_PRED, -D153_PRED,
+ -D27_PRED, -D63_PRED,
+ -V_PRED, 16,
+ -H_PRED, -TM_PRED
+};
+
+const vp9_tree_index vp9_mbsplit_tree[6] = {
+ -PARTITIONING_4X4, 2,
+ -PARTITIONING_8X8, 4,
+ -PARTITIONING_16X8, -PARTITIONING_8X16,
+};
+
+const vp9_tree_index vp9_mv_ref_tree[8] = {
+ -ZEROMV, 2,
+ -NEARESTMV, 4,
+ -NEARMV, 6,
+ -NEWMV, -SPLITMV
+};
+
+#if CONFIG_SUPERBLOCKS
+const vp9_tree_index vp9_sb_mv_ref_tree[6] = {
+ -ZEROMV, 2,
+ -NEARESTMV, 4,
+ -NEARMV, -NEWMV
+};
+#endif
+
+const vp9_tree_index vp9_sub_mv_ref_tree[6] = {
+ -LEFT4X4, 2,
+ -ABOVE4X4, 4,
+ -ZERO4X4, -NEW4X4
+};
+
+struct vp9_token_struct vp9_bmode_encodings[VP9_NKF_BINTRAMODES];
+struct vp9_token_struct vp9_kf_bmode_encodings[VP9_KF_BINTRAMODES];
+struct vp9_token_struct vp9_ymode_encodings[VP9_YMODES];
+#if CONFIG_SUPERBLOCKS
+struct vp9_token_struct vp9_sb_ymode_encodings[VP9_I32X32_MODES];
+struct vp9_token_struct vp9_sb_kf_ymode_encodings[VP9_I32X32_MODES];
+#endif
+struct vp9_token_struct vp9_kf_ymode_encodings[VP9_YMODES];
+struct vp9_token_struct vp9_uv_mode_encodings[VP9_UV_MODES];
+struct vp9_token_struct vp9_i8x8_mode_encodings[VP9_I8X8_MODES];
+struct vp9_token_struct vp9_mbsplit_encodings[VP9_NUMMBSPLITS];
+
+struct vp9_token_struct vp9_mv_ref_encoding_array[VP9_MVREFS];
+#if CONFIG_SUPERBLOCKS
+struct vp9_token_struct vp9_sb_mv_ref_encoding_array[VP9_MVREFS];
+#endif
+struct vp9_token_struct vp9_sub_mv_ref_encoding_array[VP9_SUBMVREFS];
+
+void vp9_init_mbmode_probs(VP9_COMMON *x) {
+ unsigned int bct [VP9_YMODES] [2]; /* num Ymodes > num UV modes */
+
+ vp9_tree_probs_from_distribution(VP9_YMODES, vp9_ymode_encodings,
+ vp9_ymode_tree, x->fc.ymode_prob,
+ bct, y_mode_cts, 256, 1);
+#if CONFIG_SUPERBLOCKS
+ vp9_tree_probs_from_distribution(VP9_I32X32_MODES, vp9_sb_ymode_encodings,
+ vp9_sb_ymode_tree, x->fc.sb_ymode_prob,
+ bct, y_mode_cts, 256, 1);
+#endif
+ {
+ int i;
+ for (i = 0; i < 8; i++) {
+ vp9_tree_probs_from_distribution(VP9_YMODES, vp9_kf_ymode_encodings,
+ vp9_kf_ymode_tree, x->kf_ymode_prob[i],
+ bct, kf_y_mode_cts[i], 256, 1);
+#if CONFIG_SUPERBLOCKS
+ vp9_tree_probs_from_distribution(VP9_I32X32_MODES,
+ vp9_sb_kf_ymode_encodings,
+ vp9_sb_kf_ymode_tree,
+ x->sb_kf_ymode_prob[i], bct,
+ kf_y_mode_cts[i], 256, 1);
+#endif
+ }
+ }
+ {
+ int i;
+ for (i = 0; i < VP9_YMODES; i++) {
+ vp9_tree_probs_from_distribution(VP9_UV_MODES, vp9_uv_mode_encodings,
+ vp9_uv_mode_tree, x->kf_uv_mode_prob[i],
+ bct, kf_uv_mode_cts[i], 256, 1);
+ vp9_tree_probs_from_distribution(VP9_UV_MODES, vp9_uv_mode_encodings,
+ vp9_uv_mode_tree, x->fc.uv_mode_prob[i],
+ bct, uv_mode_cts[i], 256, 1);
+ }
+ }
+
+ vp9_tree_probs_from_distribution(VP9_I8X8_MODES, vp9_i8x8_mode_encodings,
+ vp9_i8x8_mode_tree, x->fc.i8x8_mode_prob,
+ bct, i8x8_mode_cts, 256, 1);
+
+ vpx_memcpy(x->fc.sub_mv_ref_prob, vp9_sub_mv_ref_prob2,
+ sizeof(vp9_sub_mv_ref_prob2));
+ vpx_memcpy(x->fc.mbsplit_prob, vp9_mbsplit_probs, sizeof(vp9_mbsplit_probs));
+ vpx_memcpy(x->fc.switchable_interp_prob, vp9_switchable_interp_prob,
+ sizeof(vp9_switchable_interp_prob));
+#if CONFIG_COMP_INTERINTRA_PRED
+ x->fc.interintra_prob = VP9_DEF_INTERINTRA_PROB;
+#endif
+}
+
+
+static void intra_bmode_probs_from_distribution(
+ vp9_prob p[VP9_NKF_BINTRAMODES - 1],
+ unsigned int branch_ct[VP9_NKF_BINTRAMODES - 1][2],
+ const unsigned int events[VP9_NKF_BINTRAMODES]) {
+ vp9_tree_probs_from_distribution(VP9_NKF_BINTRAMODES, vp9_bmode_encodings,
+ vp9_bmode_tree, p, branch_ct, events, 256, 1);
+}
+
+void vp9_default_bmode_probs(vp9_prob p[VP9_NKF_BINTRAMODES - 1]) {
+ unsigned int branch_ct[VP9_NKF_BINTRAMODES - 1][2];
+ intra_bmode_probs_from_distribution(p, branch_ct, bmode_cts);
+}
+
+static void intra_kf_bmode_probs_from_distribution(
+ vp9_prob p[VP9_KF_BINTRAMODES - 1],
+ unsigned int branch_ct[VP9_KF_BINTRAMODES - 1][2],
+ const unsigned int events[VP9_KF_BINTRAMODES]) {
+ vp9_tree_probs_from_distribution(VP9_KF_BINTRAMODES, vp9_kf_bmode_encodings,
+ vp9_kf_bmode_tree, p, branch_ct, events, 256, 1);
+}
+
+void vp9_kf_default_bmode_probs(vp9_prob p[VP9_KF_BINTRAMODES]
+ [VP9_KF_BINTRAMODES]
+ [VP9_KF_BINTRAMODES - 1]) {
+ unsigned int branch_ct[VP9_KF_BINTRAMODES - 1][2];
+ int i, j;
+
+ for (i = 0; i < VP9_KF_BINTRAMODES; ++i) {
+ for (j = 0; j < VP9_KF_BINTRAMODES; ++j) {
+ intra_kf_bmode_probs_from_distribution(
+ p[i][j], branch_ct, vp9_kf_default_bmode_counts[i][j]);
+ }
+ }
+}
+
+#if VP9_SWITCHABLE_FILTERS == 3
+const vp9_tree_index vp9_switchable_interp_tree[VP9_SWITCHABLE_FILTERS*2-2] = {
+ -0, 2,
+ -1, -2
+};
+struct vp9_token_struct vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS];
+const INTERPOLATIONFILTERTYPE vp9_switchable_interp[VP9_SWITCHABLE_FILTERS] = {
+ EIGHTTAP, SIXTAP, EIGHTTAP_SHARP};
+const int vp9_switchable_interp_map[SWITCHABLE+1] = {1, -1, 0, 2, -1};
+const vp9_prob vp9_switchable_interp_prob [VP9_SWITCHABLE_FILTERS+1]
+ [VP9_SWITCHABLE_FILTERS-1] = {
+ {248, 192}, { 32, 248}, { 32, 32}, {192, 160}
+};
+#elif VP9_SWITCHABLE_FILTERS == 2
+const vp9_tree_index vp9_switchable_interp_tree[VP9_SWITCHABLE_FILTERS*2-2] = {
+ -0, -1,
+};
+struct vp9_token_struct vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS];
+const vp9_prob vp9_switchable_interp_prob [VP9_SWITCHABLE_FILTERS+1]
+ [VP9_SWITCHABLE_FILTERS-1] = {
+ {248},
+ { 64},
+ {192},
+};
+const INTERPOLATIONFILTERTYPE vp9_switchable_interp[VP9_SWITCHABLE_FILTERS] = {
+ EIGHTTAP, EIGHTTAP_SHARP};
+const int vp9_switchable_interp_map[SWITCHABLE+1] = {-1, -1, 0, 1, -1}; //8, 8s
+#endif
+
+void vp9_entropy_mode_init() {
+ vp9_tokens_from_tree(vp9_kf_bmode_encodings, vp9_kf_bmode_tree);
+ vp9_tokens_from_tree(vp9_bmode_encodings, vp9_bmode_tree);
+ vp9_tokens_from_tree(vp9_ymode_encodings, vp9_ymode_tree);
+ vp9_tokens_from_tree(vp9_kf_ymode_encodings, vp9_kf_ymode_tree);
+#if CONFIG_SUPERBLOCKS
+ vp9_tokens_from_tree(vp9_sb_ymode_encodings, vp9_sb_ymode_tree);
+ vp9_tokens_from_tree(vp9_sb_kf_ymode_encodings, vp9_sb_kf_ymode_tree);
+#endif
+ vp9_tokens_from_tree(vp9_uv_mode_encodings, vp9_uv_mode_tree);
+ vp9_tokens_from_tree(vp9_i8x8_mode_encodings, vp9_i8x8_mode_tree);
+ vp9_tokens_from_tree(vp9_mbsplit_encodings, vp9_mbsplit_tree);
+ vp9_tokens_from_tree(vp9_switchable_interp_encodings,
+ vp9_switchable_interp_tree);
+
+ vp9_tokens_from_tree_offset(vp9_mv_ref_encoding_array,
+ vp9_mv_ref_tree, NEARESTMV);
+#if CONFIG_SUPERBLOCKS
+ vp9_tokens_from_tree_offset(vp9_sb_mv_ref_encoding_array,
+ vp9_sb_mv_ref_tree, NEARESTMV);
+#endif
+ vp9_tokens_from_tree_offset(vp9_sub_mv_ref_encoding_array,
+ vp9_sub_mv_ref_tree, LEFT4X4);
+}
+
+void vp9_init_mode_contexts(VP9_COMMON *pc) {
+ vpx_memset(pc->fc.mv_ref_ct, 0, sizeof(pc->fc.mv_ref_ct));
+ vpx_memcpy(pc->fc.vp9_mode_contexts,
+ vp9_default_mode_contexts,
+ sizeof(vp9_default_mode_contexts));
+}
+
+void vp9_accum_mv_refs(VP9_COMMON *pc,
+ MB_PREDICTION_MODE m,
+ const int context) {
+ unsigned int (*mv_ref_ct)[4][2];
+
+ mv_ref_ct = pc->fc.mv_ref_ct;
+
+ if (m == ZEROMV) {
+ ++mv_ref_ct[context][0][0];
+ } else {
+ ++mv_ref_ct[context][0][1];
+ if (m == NEARESTMV) {
+ ++mv_ref_ct[context][1][0];
+ } else {
+ ++mv_ref_ct[context][1][1];
+ if (m == NEARMV) {
+ ++mv_ref_ct[context][2][0];
+ } else {
+ ++mv_ref_ct[context][2][1];
+ if (m == NEWMV) {
+ ++mv_ref_ct[context][3][0];
+ } else {
+ ++mv_ref_ct[context][3][1];
+ }
+ }
+ }
+ }
+}
+
+#define MVREF_COUNT_SAT 20
+#define MVREF_MAX_UPDATE_FACTOR 128
+void vp9_update_mode_context(VP9_COMMON *pc) {
+ int i, j;
+ unsigned int (*mv_ref_ct)[4][2];
+ int (*mode_context)[4];
+
+ mode_context = pc->fc.vp9_mode_contexts;
+
+ mv_ref_ct = pc->fc.mv_ref_ct;
+
+ for (j = 0; j < INTER_MODE_CONTEXTS; j++) {
+ for (i = 0; i < 4; i++) {
+ int this_prob;
+ int count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
+ int factor;
+ {
+ this_prob = count > 0 ? 256 * mv_ref_ct[j][i][0] / count : 128;
+ count = count > MVREF_COUNT_SAT ? MVREF_COUNT_SAT : count;
+ factor = (MVREF_MAX_UPDATE_FACTOR * count / MVREF_COUNT_SAT);
+ this_prob = (pc->fc.vp9_mode_contexts[j][i] * (256 - factor) +
+ this_prob * factor + 128) >> 8;
+ mode_context[j][i] = clip_prob(this_prob);
+ }
+ }
+ }
+}
+
+#ifdef MODE_STATS
+#include "vp9/common/vp9_modecont.h"
+void print_mode_contexts(VP9_COMMON *pc) {
+ int j, i;
+ printf("\n====================\n");
+ for (j = 0; j < INTER_MODE_CONTEXTS; j++) {
+ for (i = 0; i < 4; i++) {
+ printf("%4d ", pc->fc.mode_context[j][i]);
+ }
+ printf("\n");
+ }
+ printf("====================\n");
+ for (j = 0; j < INTER_MODE_CONTEXTS; j++) {
+ for (i = 0; i < 4; i++) {
+ printf("%4d ", pc->fc.mode_context_a[j][i]);
+ }
+ printf("\n");
+ }
+}
+#endif
+
+// #define MODE_COUNT_TESTING
+#define MODE_COUNT_SAT 20
+#define MODE_MAX_UPDATE_FACTOR 144
+void vp9_adapt_mode_probs(VP9_COMMON *cm) {
+ int i, t, count, factor;
+ unsigned int branch_ct[32][2];
+ vp9_prob ymode_probs[VP9_YMODES - 1];
+#if CONFIG_SUPERBLOCKS
+ vp9_prob sb_ymode_probs[VP9_I32X32_MODES - 1];
+#endif
+ vp9_prob uvmode_probs[VP9_UV_MODES - 1];
+ vp9_prob bmode_probs[VP9_NKF_BINTRAMODES - 1];
+ vp9_prob i8x8_mode_probs[VP9_I8X8_MODES - 1];
+ vp9_prob sub_mv_ref_probs[VP9_SUBMVREFS - 1];
+ vp9_prob mbsplit_probs[VP9_NUMMBSPLITS - 1];
+#if CONFIG_COMP_INTERINTRA_PRED
+ vp9_prob interintra_prob;
+#endif
+#ifdef MODE_COUNT_TESTING
+ printf("static const unsigned int\nymode_counts"
+ "[VP9_YMODES] = {\n");
+ for (t = 0; t < VP9_YMODES; ++t) printf("%d, ", cm->fc.ymode_counts[t]);
+ printf("};\n");
+ printf("static const unsigned int\nuv_mode_counts"
+ "[VP9_YMODES] [VP9_UV_MODES] = {\n");
+ for (i = 0; i < VP9_YMODES; ++i) {
+ printf(" {");
+ for (t = 0; t < VP9_UV_MODES; ++t) printf("%d, ", cm->fc.uv_mode_counts[i][t]);
+ printf("},\n");
+ }
+ printf("};\n");
+ printf("static const unsigned int\nbmode_counts"
+ "[VP9_NKF_BINTRAMODES] = {\n");
+ for (t = 0; t < VP9_NKF_BINTRAMODES; ++t)
+ printf("%d, ", cm->fc.bmode_counts[t]);
+ printf("};\n");
+ printf("static const unsigned int\ni8x8_mode_counts"
+ "[VP9_I8X8_MODES] = {\n");
+ for (t = 0; t < VP9_I8X8_MODES; ++t) printf("%d, ", cm->fc.i8x8_mode_counts[t]);
+ printf("};\n");
+ printf("static const unsigned int\nsub_mv_ref_counts"
+ "[SUBMVREF_COUNT] [VP9_SUBMVREFS] = {\n");
+ for (i = 0; i < SUBMVREF_COUNT; ++i) {
+ printf(" {");
+ for (t = 0; t < VP9_SUBMVREFS; ++t) printf("%d, ", cm->fc.sub_mv_ref_counts[i][t]);
+ printf("},\n");
+ }
+ printf("};\n");
+ printf("static const unsigned int\nmbsplit_counts"
+ "[VP9_NUMMBSPLITS] = {\n");
+ for (t = 0; t < VP9_NUMMBSPLITS; ++t) printf("%d, ", cm->fc.mbsplit_counts[t]);
+ printf("};\n");
+#if CONFIG_COMP_INTERINTRA_PRED
+ printf("static const unsigned int\ninterintra_counts"
+ "[2] = {\n");
+ for (t = 0; t < 2; ++t) printf("%d, ", cm->fc.interintra_counts[t]);
+ printf("};\n");
+#endif
+#endif
+ vp9_tree_probs_from_distribution(
+ VP9_YMODES, vp9_ymode_encodings, vp9_ymode_tree,
+ ymode_probs, branch_ct, cm->fc.ymode_counts,
+ 256, 1);
+ for (t = 0; t < VP9_YMODES - 1; ++t) {
+ int prob;
+ count = branch_ct[t][0] + branch_ct[t][1];
+ count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
+ factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT);
+ prob = ((int)cm->fc.pre_ymode_prob[t] * (256 - factor) +
+ (int)ymode_probs[t] * factor + 128) >> 8;
+ cm->fc.ymode_prob[t] = clip_prob(prob);
+ }
+#if CONFIG_SUPERBLOCKS
+ vp9_tree_probs_from_distribution(VP9_I32X32_MODES,
+ vp9_sb_ymode_encodings, vp9_sb_ymode_tree,
+ sb_ymode_probs, branch_ct,
+ cm->fc.sb_ymode_counts,
+ 256, 1);
+ for (t = 0; t < VP9_I32X32_MODES - 1; ++t) {
+ int prob;
+ count = branch_ct[t][0] + branch_ct[t][1];
+ count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
+ factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT);
+ prob = ((int)cm->fc.pre_sb_ymode_prob[t] * (256 - factor) +
+ (int)sb_ymode_probs[t] * factor + 128) >> 8;
+ cm->fc.sb_ymode_prob[t] = clip_prob(prob);
+ }
+#endif
+ for (i = 0; i < VP9_YMODES; ++i) {
+ vp9_tree_probs_from_distribution(VP9_UV_MODES, vp9_uv_mode_encodings,
+ vp9_uv_mode_tree, uvmode_probs, branch_ct,
+ cm->fc.uv_mode_counts[i], 256, 1);
+ for (t = 0; t < VP9_UV_MODES - 1; ++t) {
+ int prob;
+ count = branch_ct[t][0] + branch_ct[t][1];
+ count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
+ factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT);
+ prob = ((int)cm->fc.pre_uv_mode_prob[i][t] * (256 - factor) +
+ (int)uvmode_probs[t] * factor + 128) >> 8;
+ cm->fc.uv_mode_prob[i][t] = clip_prob(prob);
+ }
+ }
+ vp9_tree_probs_from_distribution(VP9_NKF_BINTRAMODES, vp9_bmode_encodings,
+ vp9_bmode_tree, bmode_probs, branch_ct,
+ cm->fc.bmode_counts, 256, 1);
+ for (t = 0; t < VP9_NKF_BINTRAMODES - 1; ++t) {
+ int prob;
+ count = branch_ct[t][0] + branch_ct[t][1];
+ count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
+ factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT);
+ prob = ((int)cm->fc.pre_bmode_prob[t] * (256 - factor) +
+ (int)bmode_probs[t] * factor + 128) >> 8;
+ cm->fc.bmode_prob[t] = clip_prob(prob);
+ }
+ vp9_tree_probs_from_distribution(VP9_I8X8_MODES, vp9_i8x8_mode_encodings,
+ vp9_i8x8_mode_tree, i8x8_mode_probs,
+ branch_ct, cm->fc.i8x8_mode_counts, 256, 1);
+ for (t = 0; t < VP9_I8X8_MODES - 1; ++t) {
+ int prob;
+ count = branch_ct[t][0] + branch_ct[t][1];
+ count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
+ factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT);
+ prob = ((int)cm->fc.pre_i8x8_mode_prob[t] * (256 - factor) +
+ (int)i8x8_mode_probs[t] * factor + 128) >> 8;
+ cm->fc.i8x8_mode_prob[t] = clip_prob(prob);
+ }
+ for (i = 0; i < SUBMVREF_COUNT; ++i) {
+ vp9_tree_probs_from_distribution(VP9_SUBMVREFS,
+ vp9_sub_mv_ref_encoding_array,
+ vp9_sub_mv_ref_tree, sub_mv_ref_probs,
+ branch_ct, cm->fc.sub_mv_ref_counts[i],
+ 256, 1);
+ for (t = 0; t < VP9_SUBMVREFS - 1; ++t) {
+ int prob;
+ count = branch_ct[t][0] + branch_ct[t][1];
+ count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
+ factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT);
+ prob = ((int)cm->fc.pre_sub_mv_ref_prob[i][t] * (256 - factor) +
+ (int)sub_mv_ref_probs[t] * factor + 128) >> 8;
+ cm->fc.sub_mv_ref_prob[i][t] = clip_prob(prob);
+ }
+ }
+ vp9_tree_probs_from_distribution(VP9_NUMMBSPLITS, vp9_mbsplit_encodings,
+ vp9_mbsplit_tree, mbsplit_probs, branch_ct,
+ cm->fc.mbsplit_counts, 256, 1);
+ for (t = 0; t < VP9_NUMMBSPLITS - 1; ++t) {
+ int prob;
+ count = branch_ct[t][0] + branch_ct[t][1];
+ count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
+ factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT);
+ prob = ((int)cm->fc.pre_mbsplit_prob[t] * (256 - factor) +
+ (int)mbsplit_probs[t] * factor + 128) >> 8;
+ cm->fc.mbsplit_prob[t] = clip_prob(prob);
+ }
+#if CONFIG_COMP_INTERINTRA_PRED
+ if (cm->use_interintra) {
+ int prob;
+ interintra_prob = vp9_bin_prob_from_distribution(cm->fc.interintra_counts);
+ count = cm->fc.interintra_counts[0] + cm->fc.interintra_counts[1];
+ count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
+ factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT);
+ prob = ((int)cm->fc.pre_interintra_prob * (256 - factor) +
+ (int)interintra_prob * factor + 128) >> 8;
+ if (prob <= 0)
+ cm->fc.interintra_prob = 1;
+ else if (prob > 255)
+ cm->fc.interintra_prob = 255;
+ else
+ cm->fc.interintra_prob = prob;
+ }
+#endif
+}
diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h
new file mode 100644
index 0000000..78ec325
--- /dev/null
+++ b/vp9/common/vp9_entropymode.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_COMMON_VP9_ENTROPYMODE_H_
+#define VP9_COMMON_VP9_ENTROPYMODE_H_
+
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_treecoder.h"
+
+#define SUBMVREF_COUNT 5
+#define VP9_NUMMBSPLITS 4
+#if CONFIG_COMP_INTRA_PRED
+#define DEFAULT_COMP_INTRA_PROB 32
+#endif
+
+#if CONFIG_COMP_INTERINTRA_PRED
+#define VP9_DEF_INTERINTRA_PROB 248
+#define VP9_UPD_INTERINTRA_PROB 192
+// whether to use a separate uv mode (1) or use the same as the y mode (0)
+#define SEPARATE_INTERINTRA_UV 0
+#endif
+
+typedef const int vp9_mbsplit[16];
+
+extern vp9_mbsplit vp9_mbsplits[VP9_NUMMBSPLITS];
+
+extern const int vp9_mbsplit_count[VP9_NUMMBSPLITS]; /* # of subsets */
+
+extern const vp9_prob vp9_mbsplit_probs[VP9_NUMMBSPLITS - 1];
+
+extern int vp9_mv_cont(const int_mv *l, const int_mv *a);
+
+extern const vp9_prob vp9_sub_mv_ref_prob[VP9_SUBMVREFS - 1];
+
+extern const vp9_prob vp9_sub_mv_ref_prob2[SUBMVREF_COUNT][VP9_SUBMVREFS - 1];
+
+extern const unsigned int vp9_kf_default_bmode_counts[VP9_KF_BINTRAMODES]
+ [VP9_KF_BINTRAMODES]
+ [VP9_KF_BINTRAMODES];
+
+extern const vp9_tree_index vp9_bmode_tree[];
+extern const vp9_tree_index vp9_kf_bmode_tree[];
+
+extern const vp9_tree_index vp9_ymode_tree[];
+extern const vp9_tree_index vp9_kf_ymode_tree[];
+extern const vp9_tree_index vp9_uv_mode_tree[];
+#define vp9_sb_ymode_tree vp9_uv_mode_tree
+#define vp9_sb_kf_ymode_tree vp9_uv_mode_tree
+extern const vp9_tree_index vp9_i8x8_mode_tree[];
+extern const vp9_tree_index vp9_mbsplit_tree[];
+extern const vp9_tree_index vp9_mv_ref_tree[];
+extern const vp9_tree_index vp9_sb_mv_ref_tree[];
+extern const vp9_tree_index vp9_sub_mv_ref_tree[];
+
+extern struct vp9_token_struct vp9_bmode_encodings[VP9_NKF_BINTRAMODES];
+extern struct vp9_token_struct vp9_kf_bmode_encodings[VP9_KF_BINTRAMODES];
+extern struct vp9_token_struct vp9_ymode_encodings[VP9_YMODES];
+extern struct vp9_token_struct vp9_sb_ymode_encodings[VP9_I32X32_MODES];
+extern struct vp9_token_struct vp9_sb_kf_ymode_encodings[VP9_I32X32_MODES];
+extern struct vp9_token_struct vp9_kf_ymode_encodings[VP9_YMODES];
+extern struct vp9_token_struct vp9_i8x8_mode_encodings[VP9_I8X8_MODES];
+extern struct vp9_token_struct vp9_uv_mode_encodings[VP9_UV_MODES];
+extern struct vp9_token_struct vp9_mbsplit_encodings[VP9_NUMMBSPLITS];
+
+/* Inter mode values do not start at zero */
+
+extern struct vp9_token_struct vp9_mv_ref_encoding_array[VP9_MVREFS];
+extern struct vp9_token_struct vp9_sb_mv_ref_encoding_array[VP9_MVREFS];
+extern struct vp9_token_struct vp9_sub_mv_ref_encoding_array[VP9_SUBMVREFS];
+
+void vp9_entropy_mode_init(void);
+
+struct VP9Common;
+
+void vp9_init_mbmode_probs(struct VP9Common *x);
+
+extern void vp9_init_mode_contexts(struct VP9Common *pc);
+
+extern void vp9_update_mode_context(struct VP9Common *pc);
+
+extern void vp9_accum_mv_refs(struct VP9Common *pc,
+ MB_PREDICTION_MODE m,
+ const int context);
+
+void vp9_default_bmode_probs(vp9_prob dest[VP9_NKF_BINTRAMODES - 1]);
+
+void vp9_kf_default_bmode_probs(vp9_prob dest[VP9_KF_BINTRAMODES]
+ [VP9_KF_BINTRAMODES]
+ [VP9_KF_BINTRAMODES - 1]);
+
+void vp9_adapt_mode_probs(struct VP9Common *);
+
+#define VP9_SWITCHABLE_FILTERS 2 /* number of switchable filters */
+
+extern const INTERPOLATIONFILTERTYPE vp9_switchable_interp
+ [VP9_SWITCHABLE_FILTERS];
+
+extern const int vp9_switchable_interp_map[SWITCHABLE + 1];
+
+extern const vp9_tree_index vp9_switchable_interp_tree
+ [2 * (VP9_SWITCHABLE_FILTERS - 1)];
+
+extern struct vp9_token_struct vp9_switchable_interp_encodings
+ [VP9_SWITCHABLE_FILTERS];
+
+extern const vp9_prob vp9_switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1]
+ [VP9_SWITCHABLE_FILTERS - 1];
+
+#endif
diff --git a/vp9/common/vp9_entropymv.c b/vp9/common/vp9_entropymv.c
new file mode 100644
index 0000000..94543c5
--- /dev/null
+++ b/vp9/common/vp9_entropymv.c
@@ -0,0 +1,469 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/common/vp9_entropymv.h"
+
+//#define MV_COUNT_TESTING
+
+#define MV_COUNT_SAT 16
+#define MV_MAX_UPDATE_FACTOR 160
+
+#if CONFIG_NEW_MVREF
+/* Integer pel reference mv threshold for use of high-precision 1/8 mv */
+#define COMPANDED_MVREF_THRESH 1000000
+#else
+/* Integer pel reference mv threshold for use of high-precision 1/8 mv */
+#define COMPANDED_MVREF_THRESH 8
+#endif
+
+/* Smooth or bias the mv-counts before prob computation */
+/* #define SMOOTH_MV_COUNTS */
+
+const vp9_tree_index vp9_mv_joint_tree[2 * MV_JOINTS - 2] = {
+ -MV_JOINT_ZERO, 2,
+ -MV_JOINT_HNZVZ, 4,
+ -MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ
+};
+struct vp9_token_struct vp9_mv_joint_encodings[MV_JOINTS];
+
+const vp9_tree_index vp9_mv_class_tree[2 * MV_CLASSES - 2] = {
+ -MV_CLASS_0, 2,
+ -MV_CLASS_1, 4,
+ 6, 8,
+ -MV_CLASS_2, -MV_CLASS_3,
+ 10, 12,
+ -MV_CLASS_4, -MV_CLASS_5,
+ -MV_CLASS_6, -MV_CLASS_7,
+};
+struct vp9_token_struct vp9_mv_class_encodings[MV_CLASSES];
+
+const vp9_tree_index vp9_mv_class0_tree [2 * CLASS0_SIZE - 2] = {
+ -0, -1,
+};
+struct vp9_token_struct vp9_mv_class0_encodings[CLASS0_SIZE];
+
+const vp9_tree_index vp9_mv_fp_tree [2 * 4 - 2] = {
+ -0, 2,
+ -1, 4,
+ -2, -3
+};
+struct vp9_token_struct vp9_mv_fp_encodings[4];
+
+const nmv_context vp9_default_nmv_context = {
+ {32, 64, 96},
+ {
+ { /* vert component */
+ 128, /* sign */
+ {224, 144, 192, 168, 192, 176, 192}, /* class */
+ {216}, /* class0 */
+ {136, 140, 148, 160, 176, 192, 224}, /* bits */
+ {{128, 128, 64}, {96, 112, 64}}, /* class0_fp */
+ {64, 96, 64}, /* fp */
+ 160, /* class0_hp bit */
+ 128, /* hp */
+ },
+ { /* hor component */
+ 128, /* sign */
+ {216, 128, 176, 160, 176, 176, 192}, /* class */
+ {208}, /* class0 */
+ {136, 140, 148, 160, 176, 192, 224}, /* bits */
+ {{128, 128, 64}, {96, 112, 64}}, /* class0_fp */
+ {64, 96, 64}, /* fp */
+ 160, /* class0_hp bit */
+ 128, /* hp */
+ }
+ },
+};
+
+MV_JOINT_TYPE vp9_get_mv_joint(MV mv) {
+ if (mv.row == 0 && mv.col == 0) return MV_JOINT_ZERO;
+ else if (mv.row == 0 && mv.col != 0) return MV_JOINT_HNZVZ;
+ else if (mv.row != 0 && mv.col == 0) return MV_JOINT_HZVNZ;
+ else return MV_JOINT_HNZVNZ;
+}
+
+#define mv_class_base(c) ((c) ? (CLASS0_SIZE << (c + 2)) : 0)
+
+MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset) {
+ MV_CLASS_TYPE c;
+ if (z < CLASS0_SIZE * 8) c = MV_CLASS_0;
+ else if (z < CLASS0_SIZE * 16) c = MV_CLASS_1;
+ else if (z < CLASS0_SIZE * 32) c = MV_CLASS_2;
+ else if (z < CLASS0_SIZE * 64) c = MV_CLASS_3;
+ else if (z < CLASS0_SIZE * 128) c = MV_CLASS_4;
+ else if (z < CLASS0_SIZE * 256) c = MV_CLASS_5;
+ else if (z < CLASS0_SIZE * 512) c = MV_CLASS_6;
+ else if (z < CLASS0_SIZE * 1024) c = MV_CLASS_7;
+ else assert(0);
+ if (offset)
+ *offset = z - mv_class_base(c);
+ return c;
+}
+
+int vp9_use_nmv_hp(const MV *ref) {
+ if ((abs(ref->row) >> 3) < COMPANDED_MVREF_THRESH &&
+ (abs(ref->col) >> 3) < COMPANDED_MVREF_THRESH)
+ return 1;
+ else
+ return 0;
+}
+
+int vp9_get_mv_mag(MV_CLASS_TYPE c, int offset) {
+ return mv_class_base(c) + offset;
+}
+
+static void increment_nmv_component_count(int v,
+ nmv_component_counts *mvcomp,
+ int incr,
+ int usehp) {
+ assert (v != 0); /* should not be zero */
+ mvcomp->mvcount[MV_MAX + v] += incr;
+}
+
+static void increment_nmv_component(int v,
+ nmv_component_counts *mvcomp,
+ int incr,
+ int usehp) {
+ int s, z, c, o, d, e, f;
+ assert (v != 0); /* should not be zero */
+ s = v < 0;
+ mvcomp->sign[s] += incr;
+ z = (s ? -v : v) - 1; /* magnitude - 1 */
+
+ c = vp9_get_mv_class(z, &o);
+ mvcomp->classes[c] += incr;
+
+ d = (o >> 3); /* int mv data */
+ f = (o >> 1) & 3; /* fractional pel mv data */
+ e = (o & 1); /* high precision mv data */
+ if (c == MV_CLASS_0) {
+ mvcomp->class0[d] += incr;
+ } else {
+ int i, b;
+ b = c + CLASS0_BITS - 1; /* number of bits */
+ for (i = 0; i < b; ++i)
+ mvcomp->bits[i][((d >> i) & 1)] += incr;
+ }
+
+ /* Code the fractional pel bits */
+ if (c == MV_CLASS_0) {
+ mvcomp->class0_fp[d][f] += incr;
+ } else {
+ mvcomp->fp[f] += incr;
+ }
+
+ /* Code the high precision bit */
+ if (usehp) {
+ if (c == MV_CLASS_0) {
+ mvcomp->class0_hp[e] += incr;
+ } else {
+ mvcomp->hp[e] += incr;
+ }
+ }
+}
+
+#ifdef SMOOTH_MV_COUNTS
+static void smooth_counts(nmv_component_counts *mvcomp) {
+ static const int flen = 3; // (filter_length + 1) / 2
+ static const int fval[] = {8, 3, 1};
+ static const int fvalbits = 4;
+ int i;
+ unsigned int smvcount[MV_VALS];
+ vpx_memcpy(smvcount, mvcomp->mvcount, sizeof(smvcount));
+ smvcount[MV_MAX] = (smvcount[MV_MAX - 1] + smvcount[MV_MAX + 1]) >> 1;
+ for (i = flen - 1; i <= MV_VALS - flen; ++i) {
+ int j, s = smvcount[i] * fval[0];
+ for (j = 1; j < flen; ++j)
+ s += (smvcount[i - j] + smvcount[i + j]) * fval[j];
+ mvcomp->mvcount[i] = (s + (1 << (fvalbits - 1))) >> fvalbits;
+ }
+}
+#endif
+
+static void counts_to_context(nmv_component_counts *mvcomp, int usehp) {
+ int v;
+ vpx_memset(mvcomp->sign, 0, sizeof(nmv_component_counts) - sizeof(mvcomp->mvcount));
+ for (v = 1; v <= MV_MAX; v++) {
+ increment_nmv_component(-v, mvcomp, mvcomp->mvcount[MV_MAX - v], usehp);
+ increment_nmv_component( v, mvcomp, mvcomp->mvcount[MV_MAX + v], usehp);
+ }
+}
+
+void vp9_increment_nmv(const MV *mv, const MV *ref, nmv_context_counts *mvctx,
+ int usehp) {
+ MV_JOINT_TYPE j = vp9_get_mv_joint(*mv);
+ mvctx->joints[j]++;
+ usehp = usehp && vp9_use_nmv_hp(ref);
+ if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) {
+ increment_nmv_component_count(mv->row, &mvctx->comps[0], 1, usehp);
+ }
+ if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) {
+ increment_nmv_component_count(mv->col, &mvctx->comps[1], 1, usehp);
+ }
+}
+
+static void adapt_prob(vp9_prob *dest, vp9_prob prep, vp9_prob newp,
+ unsigned int ct[2]) {
+ int factor;
+ int prob;
+ int count = ct[0] + ct[1];
+ if (count) {
+ count = count > MV_COUNT_SAT ? MV_COUNT_SAT : count;
+ factor = (MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT);
+ prob = ((int)prep * (256 - factor) + (int)(newp) * factor + 128) >> 8;
+ prob += !prob;
+ prob = (prob > 255 ? 255 : prob);
+ *dest = prob;
+ }
+}
+
+void vp9_counts_process(nmv_context_counts *NMVcount, int usehp) {
+ counts_to_context(&NMVcount->comps[0], usehp);
+ counts_to_context(&NMVcount->comps[1], usehp);
+}
+
+void vp9_counts_to_nmv_context(
+ nmv_context_counts *NMVcount,
+ nmv_context *prob,
+ int usehp,
+ unsigned int (*branch_ct_joint)[2],
+ unsigned int (*branch_ct_sign)[2],
+ unsigned int (*branch_ct_classes)[MV_CLASSES - 1][2],
+ unsigned int (*branch_ct_class0)[CLASS0_SIZE - 1][2],
+ unsigned int (*branch_ct_bits)[MV_OFFSET_BITS][2],
+ unsigned int (*branch_ct_class0_fp)[CLASS0_SIZE][4 - 1][2],
+ unsigned int (*branch_ct_fp)[4 - 1][2],
+ unsigned int (*branch_ct_class0_hp)[2],
+ unsigned int (*branch_ct_hp)[2]) {
+ int i, j, k;
+ vp9_counts_process(NMVcount, usehp);
+ vp9_tree_probs_from_distribution(MV_JOINTS,
+ vp9_mv_joint_encodings,
+ vp9_mv_joint_tree,
+ prob->joints,
+ branch_ct_joint,
+ NMVcount->joints,
+ 256, 1);
+ for (i = 0; i < 2; ++i) {
+ prob->comps[i].sign =
+ vp9_bin_prob_from_distribution(NMVcount->comps[i].sign);
+ branch_ct_sign[i][0] = NMVcount->comps[i].sign[0];
+ branch_ct_sign[i][1] = NMVcount->comps[i].sign[1];
+ vp9_tree_probs_from_distribution(MV_CLASSES,
+ vp9_mv_class_encodings,
+ vp9_mv_class_tree,
+ prob->comps[i].classes,
+ branch_ct_classes[i],
+ NMVcount->comps[i].classes,
+ 256, 1);
+ vp9_tree_probs_from_distribution(CLASS0_SIZE,
+ vp9_mv_class0_encodings,
+ vp9_mv_class0_tree,
+ prob->comps[i].class0,
+ branch_ct_class0[i],
+ NMVcount->comps[i].class0,
+ 256, 1);
+ for (j = 0; j < MV_OFFSET_BITS; ++j) {
+ prob->comps[i].bits[j] = vp9_bin_prob_from_distribution(
+ NMVcount->comps[i].bits[j]);
+ branch_ct_bits[i][j][0] = NMVcount->comps[i].bits[j][0];
+ branch_ct_bits[i][j][1] = NMVcount->comps[i].bits[j][1];
+ }
+ }
+ for (i = 0; i < 2; ++i) {
+ for (k = 0; k < CLASS0_SIZE; ++k) {
+ vp9_tree_probs_from_distribution(4,
+ vp9_mv_fp_encodings,
+ vp9_mv_fp_tree,
+ prob->comps[i].class0_fp[k],
+ branch_ct_class0_fp[i][k],
+ NMVcount->comps[i].class0_fp[k],
+ 256, 1);
+ }
+ vp9_tree_probs_from_distribution(4,
+ vp9_mv_fp_encodings,
+ vp9_mv_fp_tree,
+ prob->comps[i].fp,
+ branch_ct_fp[i],
+ NMVcount->comps[i].fp,
+ 256, 1);
+ }
+ if (usehp) {
+ for (i = 0; i < 2; ++i) {
+ prob->comps[i].class0_hp = vp9_bin_prob_from_distribution(
+ NMVcount->comps[i].class0_hp);
+ branch_ct_class0_hp[i][0] = NMVcount->comps[i].class0_hp[0];
+ branch_ct_class0_hp[i][1] = NMVcount->comps[i].class0_hp[1];
+
+ prob->comps[i].hp =
+ vp9_bin_prob_from_distribution(NMVcount->comps[i].hp);
+ branch_ct_hp[i][0] = NMVcount->comps[i].hp[0];
+ branch_ct_hp[i][1] = NMVcount->comps[i].hp[1];
+ }
+ }
+}
+
+void vp9_adapt_nmv_probs(VP9_COMMON *cm, int usehp) {
+ int i, j, k;
+ nmv_context prob;
+ unsigned int branch_ct_joint[MV_JOINTS - 1][2];
+ unsigned int branch_ct_sign[2][2];
+ unsigned int branch_ct_classes[2][MV_CLASSES - 1][2];
+ unsigned int branch_ct_class0[2][CLASS0_SIZE - 1][2];
+ unsigned int branch_ct_bits[2][MV_OFFSET_BITS][2];
+ unsigned int branch_ct_class0_fp[2][CLASS0_SIZE][4 - 1][2];
+ unsigned int branch_ct_fp[2][4 - 1][2];
+ unsigned int branch_ct_class0_hp[2][2];
+ unsigned int branch_ct_hp[2][2];
+#ifdef MV_COUNT_TESTING
+ printf("joints count: ");
+ for (j = 0; j < MV_JOINTS; ++j) printf("%d ", cm->fc.NMVcount.joints[j]);
+ printf("\n"); fflush(stdout);
+ printf("signs count:\n");
+ for (i = 0; i < 2; ++i)
+ printf("%d/%d ", cm->fc.NMVcount.comps[i].sign[0], cm->fc.NMVcount.comps[i].sign[1]);
+ printf("\n"); fflush(stdout);
+ printf("classes count:\n");
+ for (i = 0; i < 2; ++i) {
+ for (j = 0; j < MV_CLASSES; ++j)
+ printf("%d ", cm->fc.NMVcount.comps[i].classes[j]);
+ printf("\n"); fflush(stdout);
+ }
+ printf("class0 count:\n");
+ for (i = 0; i < 2; ++i) {
+ for (j = 0; j < CLASS0_SIZE; ++j)
+ printf("%d ", cm->fc.NMVcount.comps[i].class0[j]);
+ printf("\n"); fflush(stdout);
+ }
+ printf("bits count:\n");
+ for (i = 0; i < 2; ++i) {
+ for (j = 0; j < MV_OFFSET_BITS; ++j)
+ printf("%d/%d ", cm->fc.NMVcount.comps[i].bits[j][0],
+ cm->fc.NMVcount.comps[i].bits[j][1]);
+ printf("\n"); fflush(stdout);
+ }
+ printf("class0_fp count:\n");
+ for (i = 0; i < 2; ++i) {
+ for (j = 0; j < CLASS0_SIZE; ++j) {
+ printf("{");
+ for (k = 0; k < 4; ++k)
+ printf("%d ", cm->fc.NMVcount.comps[i].class0_fp[j][k]);
+ printf("}, ");
+ }
+ printf("\n"); fflush(stdout);
+ }
+ printf("fp count:\n");
+ for (i = 0; i < 2; ++i) {
+ for (j = 0; j < 4; ++j)
+ printf("%d ", cm->fc.NMVcount.comps[i].fp[j]);
+ printf("\n"); fflush(stdout);
+ }
+ if (usehp) {
+ printf("class0_hp count:\n");
+ for (i = 0; i < 2; ++i)
+ printf("%d/%d ", cm->fc.NMVcount.comps[i].class0_hp[0],
+ cm->fc.NMVcount.comps[i].class0_hp[1]);
+ printf("\n"); fflush(stdout);
+ printf("hp count:\n");
+ for (i = 0; i < 2; ++i)
+ printf("%d/%d ", cm->fc.NMVcount.comps[i].hp[0],
+ cm->fc.NMVcount.comps[i].hp[1]);
+ printf("\n"); fflush(stdout);
+ }
+#endif
+#ifdef SMOOTH_MV_COUNTS
+ smooth_counts(&cm->fc.NMVcount.comps[0]);
+ smooth_counts(&cm->fc.NMVcount.comps[1]);
+#endif
+ vp9_counts_to_nmv_context(&cm->fc.NMVcount,
+ &prob,
+ usehp,
+ branch_ct_joint,
+ branch_ct_sign,
+ branch_ct_classes,
+ branch_ct_class0,
+ branch_ct_bits,
+ branch_ct_class0_fp,
+ branch_ct_fp,
+ branch_ct_class0_hp,
+ branch_ct_hp);
+
+ for (j = 0; j < MV_JOINTS - 1; ++j) {
+ adapt_prob(&cm->fc.nmvc.joints[j],
+ cm->fc.pre_nmvc.joints[j],
+ prob.joints[j],
+ branch_ct_joint[j]);
+ }
+ for (i = 0; i < 2; ++i) {
+ adapt_prob(&cm->fc.nmvc.comps[i].sign,
+ cm->fc.pre_nmvc.comps[i].sign,
+ prob.comps[i].sign,
+ branch_ct_sign[i]);
+ for (j = 0; j < MV_CLASSES - 1; ++j) {
+ adapt_prob(&cm->fc.nmvc.comps[i].classes[j],
+ cm->fc.pre_nmvc.comps[i].classes[j],
+ prob.comps[i].classes[j],
+ branch_ct_classes[i][j]);
+ }
+ for (j = 0; j < CLASS0_SIZE - 1; ++j) {
+ adapt_prob(&cm->fc.nmvc.comps[i].class0[j],
+ cm->fc.pre_nmvc.comps[i].class0[j],
+ prob.comps[i].class0[j],
+ branch_ct_class0[i][j]);
+ }
+ for (j = 0; j < MV_OFFSET_BITS; ++j) {
+ adapt_prob(&cm->fc.nmvc.comps[i].bits[j],
+ cm->fc.pre_nmvc.comps[i].bits[j],
+ prob.comps[i].bits[j],
+ branch_ct_bits[i][j]);
+ }
+ }
+ for (i = 0; i < 2; ++i) {
+ for (j = 0; j < CLASS0_SIZE; ++j) {
+ for (k = 0; k < 3; ++k) {
+ adapt_prob(&cm->fc.nmvc.comps[i].class0_fp[j][k],
+ cm->fc.pre_nmvc.comps[i].class0_fp[j][k],
+ prob.comps[i].class0_fp[j][k],
+ branch_ct_class0_fp[i][j][k]);
+ }
+ }
+ for (j = 0; j < 3; ++j) {
+ adapt_prob(&cm->fc.nmvc.comps[i].fp[j],
+ cm->fc.pre_nmvc.comps[i].fp[j],
+ prob.comps[i].fp[j],
+ branch_ct_fp[i][j]);
+ }
+ }
+ if (usehp) {
+ for (i = 0; i < 2; ++i) {
+ adapt_prob(&cm->fc.nmvc.comps[i].class0_hp,
+ cm->fc.pre_nmvc.comps[i].class0_hp,
+ prob.comps[i].class0_hp,
+ branch_ct_class0_hp[i]);
+ adapt_prob(&cm->fc.nmvc.comps[i].hp,
+ cm->fc.pre_nmvc.comps[i].hp,
+ prob.comps[i].hp,
+ branch_ct_hp[i]);
+ }
+ }
+}
+
+void vp9_entropy_mv_init() {
+ vp9_tokens_from_tree(vp9_mv_joint_encodings, vp9_mv_joint_tree);
+ vp9_tokens_from_tree(vp9_mv_class_encodings, vp9_mv_class_tree);
+ vp9_tokens_from_tree(vp9_mv_class0_encodings, vp9_mv_class0_tree);
+ vp9_tokens_from_tree(vp9_mv_fp_encodings, vp9_mv_fp_tree);
+}
+
+void vp9_init_mv_probs(VP9_COMMON *cm) {
+ vpx_memcpy(&cm->fc.nmvc, &vp9_default_nmv_context, sizeof(nmv_context));
+}
diff --git a/vp9/common/vp9_entropymv.h b/vp9/common/vp9_entropymv.h
new file mode 100644
index 0000000..66126da
--- /dev/null
+++ b/vp9/common/vp9_entropymv.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_COMMON_VP9_ENTROPYMV_H_
+#define VP9_COMMON_VP9_ENTROPYMV_H_
+
+#include "vp9/common/vp9_treecoder.h"
+#include "vpx_config.h"
+#include "vp9/common/vp9_blockd.h"
+
+struct VP9Common;
+
+void vp9_entropy_mv_init();
+void vp9_init_mv_probs(struct VP9Common *cm);
+
+void vp9_adapt_nmv_probs(struct VP9Common *cm, int usehp);
+int vp9_use_nmv_hp(const MV *ref);
+
+#define VP9_NMV_UPDATE_PROB 255
+//#define MV_GROUP_UPDATE
+
+#define LOW_PRECISION_MV_UPDATE /* Use 7 bit forward update */
+
+/* Symbols for coding which components are zero jointly */
+#define MV_JOINTS 4
+typedef enum {
+ MV_JOINT_ZERO = 0, /* Zero vector */
+ MV_JOINT_HNZVZ = 1, /* Vert zero, hor nonzero */
+ MV_JOINT_HZVNZ = 2, /* Hor zero, vert nonzero */
+ MV_JOINT_HNZVNZ = 3, /* Both components nonzero */
+} MV_JOINT_TYPE;
+
+extern const vp9_tree_index vp9_mv_joint_tree[2 * MV_JOINTS - 2];
+extern struct vp9_token_struct vp9_mv_joint_encodings [MV_JOINTS];
+
+/* Symbols for coding magnitude class of nonzero components */
+#define MV_CLASSES 8
+typedef enum {
+ MV_CLASS_0 = 0, /* (0, 2] integer pel */
+ MV_CLASS_1 = 1, /* (2, 4] integer pel */
+ MV_CLASS_2 = 2, /* (4, 8] integer pel */
+ MV_CLASS_3 = 3, /* (8, 16] integer pel */
+ MV_CLASS_4 = 4, /* (16, 32] integer pel */
+ MV_CLASS_5 = 5, /* (32, 64] integer pel */
+ MV_CLASS_6 = 6, /* (64, 128] integer pel */
+ MV_CLASS_7 = 7, /* (128, 256] integer pel */
+} MV_CLASS_TYPE;
+
+extern const vp9_tree_index vp9_mv_class_tree[2 * MV_CLASSES - 2];
+extern struct vp9_token_struct vp9_mv_class_encodings [MV_CLASSES];
+
+#define CLASS0_BITS 1 /* bits at integer precision for class 0 */
+#define CLASS0_SIZE (1 << CLASS0_BITS)
+#define MV_OFFSET_BITS (MV_CLASSES + CLASS0_BITS - 2)
+
+#define MV_MAX_BITS (MV_CLASSES + CLASS0_BITS + 2)
+#define MV_MAX ((1 << MV_MAX_BITS) - 1)
+#define MV_VALS ((MV_MAX << 1) + 1)
+
+extern const vp9_tree_index vp9_mv_class0_tree[2 * CLASS0_SIZE - 2];
+extern struct vp9_token_struct vp9_mv_class0_encodings[CLASS0_SIZE];
+
+extern const vp9_tree_index vp9_mv_fp_tree[2 * 4 - 2];
+extern struct vp9_token_struct vp9_mv_fp_encodings[4];
+
+typedef struct {
+ vp9_prob sign;
+ vp9_prob classes[MV_CLASSES - 1];
+ vp9_prob class0[CLASS0_SIZE - 1];
+ vp9_prob bits[MV_OFFSET_BITS];
+ vp9_prob class0_fp[CLASS0_SIZE][4 - 1];
+ vp9_prob fp[4 - 1];
+ vp9_prob class0_hp;
+ vp9_prob hp;
+} nmv_component;
+
+typedef struct {
+ vp9_prob joints[MV_JOINTS - 1];
+ nmv_component comps[2];
+} nmv_context;
+
+MV_JOINT_TYPE vp9_get_mv_joint(MV mv);
+MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset);
+int vp9_get_mv_mag(MV_CLASS_TYPE c, int offset);
+
+
+typedef struct {
+ unsigned int mvcount[MV_VALS];
+ unsigned int sign[2];
+ unsigned int classes[MV_CLASSES];
+ unsigned int class0[CLASS0_SIZE];
+ unsigned int bits[MV_OFFSET_BITS][2];
+ unsigned int class0_fp[CLASS0_SIZE][4];
+ unsigned int fp[4];
+ unsigned int class0_hp[2];
+ unsigned int hp[2];
+} nmv_component_counts;
+
+typedef struct {
+ unsigned int joints[MV_JOINTS];
+ nmv_component_counts comps[2];
+} nmv_context_counts;
+
+void vp9_increment_nmv(const MV *mv, const MV *ref, nmv_context_counts *mvctx,
+ int usehp);
+extern const nmv_context vp9_default_nmv_context;
+void vp9_counts_to_nmv_context(
+ nmv_context_counts *NMVcount,
+ nmv_context *prob,
+ int usehp,
+ unsigned int (*branch_ct_joint)[2],
+ unsigned int (*branch_ct_sign)[2],
+ unsigned int (*branch_ct_classes)[MV_CLASSES - 1][2],
+ unsigned int (*branch_ct_class0)[CLASS0_SIZE - 1][2],
+ unsigned int (*branch_ct_bits)[MV_OFFSET_BITS][2],
+ unsigned int (*branch_ct_class0_fp)[CLASS0_SIZE][4 - 1][2],
+ unsigned int (*branch_ct_fp)[4 - 1][2],
+ unsigned int (*branch_ct_class0_hp)[2],
+ unsigned int (*branch_ct_hp)[2]);
+void vp9_counts_process(nmv_context_counts *NMVcount, int usehp);
+#endif
diff --git a/vp9/common/vp9_extend.c b/vp9/common/vp9_extend.c
new file mode 100644
index 0000000..61c7abf
--- /dev/null
+++ b/vp9/common/vp9_extend.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/vp9_extend.h"
+#include "vpx_mem/vpx_mem.h"
+
+static void copy_and_extend_plane(unsigned char *s, /* source */
+ int sp, /* source pitch */
+ unsigned char *d, /* destination */
+ int dp, /* destination pitch */
+ int h, /* height */
+ int w, /* width */
+ int et, /* extend top border */
+ int el, /* extend left border */
+ int eb, /* extend bottom border */
+ int er) { /* extend right border */
+ int i;
+ unsigned char *src_ptr1, *src_ptr2;
+ unsigned char *dest_ptr1, *dest_ptr2;
+ int linesize;
+
+ /* copy the left and right most columns out */
+ src_ptr1 = s;
+ src_ptr2 = s + w - 1;
+ dest_ptr1 = d - el;
+ dest_ptr2 = d + w;
+
+ for (i = 0; i < h; i++) {
+ vpx_memset(dest_ptr1, src_ptr1[0], el);
+ vpx_memcpy(dest_ptr1 + el, src_ptr1, w);
+ vpx_memset(dest_ptr2, src_ptr2[0], er);
+ src_ptr1 += sp;
+ src_ptr2 += sp;
+ dest_ptr1 += dp;
+ dest_ptr2 += dp;
+ }
+
+ /* Now copy the top and bottom lines into each line of the respective
+ * borders
+ */
+ src_ptr1 = d - el;
+ src_ptr2 = d + dp * (h - 1) - el;
+ dest_ptr1 = d + dp * (-et) - el;
+ dest_ptr2 = d + dp * (h) - el;
+ linesize = el + er + w;
+
+ for (i = 0; i < et; i++) {
+ vpx_memcpy(dest_ptr1, src_ptr1, linesize);
+ dest_ptr1 += dp;
+ }
+
+ for (i = 0; i < eb; i++) {
+ vpx_memcpy(dest_ptr2, src_ptr2, linesize);
+ dest_ptr2 += dp;
+ }
+}
+
+void vp9_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst) {
+ int et = dst->border;
+ int el = dst->border;
+ int eb = dst->border + dst->y_height - src->y_height;
+ int er = dst->border + dst->y_width - src->y_width;
+
+ copy_and_extend_plane(src->y_buffer, src->y_stride,
+ dst->y_buffer, dst->y_stride,
+ src->y_height, src->y_width,
+ et, el, eb, er);
+
+ et = dst->border >> 1;
+ el = dst->border >> 1;
+ eb = (dst->border >> 1) + dst->uv_height - src->uv_height;
+ er = (dst->border >> 1) + dst->uv_width - src->uv_width;
+
+ copy_and_extend_plane(src->u_buffer, src->uv_stride,
+ dst->u_buffer, dst->uv_stride,
+ src->uv_height, src->uv_width,
+ et, el, eb, er);
+
+ copy_and_extend_plane(src->v_buffer, src->uv_stride,
+ dst->v_buffer, dst->uv_stride,
+ src->uv_height, src->uv_width,
+ et, el, eb, er);
+}
+
+void vp9_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst,
+ int srcy, int srcx,
+ int srch, int srcw) {
+ int et = dst->border;
+ int el = dst->border;
+ int eb = dst->border + dst->y_height - src->y_height;
+ int er = dst->border + dst->y_width - src->y_width;
+ int src_y_offset = srcy * src->y_stride + srcx;
+ int dst_y_offset = srcy * dst->y_stride + srcx;
+ int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1);
+ int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1);
+
+ // If the side is not touching the bounder then don't extend.
+ if (srcy)
+ et = 0;
+ if (srcx)
+ el = 0;
+ if (srcy + srch != src->y_height)
+ eb = 0;
+ if (srcx + srcw != src->y_width)
+ er = 0;
+
+ copy_and_extend_plane(src->y_buffer + src_y_offset,
+ src->y_stride,
+ dst->y_buffer + dst_y_offset,
+ dst->y_stride,
+ srch, srcw,
+ et, el, eb, er);
+
+ et = (et + 1) >> 1;
+ el = (el + 1) >> 1;
+ eb = (eb + 1) >> 1;
+ er = (er + 1) >> 1;
+ srch = (srch + 1) >> 1;
+ srcw = (srcw + 1) >> 1;
+
+ copy_and_extend_plane(src->u_buffer + src_uv_offset,
+ src->uv_stride,
+ dst->u_buffer + dst_uv_offset,
+ dst->uv_stride,
+ srch, srcw,
+ et, el, eb, er);
+
+ copy_and_extend_plane(src->v_buffer + src_uv_offset,
+ src->uv_stride,
+ dst->v_buffer + dst_uv_offset,
+ dst->uv_stride,
+ srch, srcw,
+ et, el, eb, er);
+}
+
+/* note the extension is only for the last row, for intra prediction purpose */
+void vp9_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr,
+ unsigned char *UPtr, unsigned char *VPtr) {
+ int i;
+
+ YPtr += ybf->y_stride * 14;
+ UPtr += ybf->uv_stride * 6;
+ VPtr += ybf->uv_stride * 6;
+
+ for (i = 0; i < 4; i++) {
+ YPtr[i] = YPtr[-1];
+ UPtr[i] = UPtr[-1];
+ VPtr[i] = VPtr[-1];
+ }
+
+ YPtr += ybf->y_stride;
+ UPtr += ybf->uv_stride;
+ VPtr += ybf->uv_stride;
+
+ for (i = 0; i < 4; i++) {
+ YPtr[i] = YPtr[-1];
+ UPtr[i] = UPtr[-1];
+ VPtr[i] = VPtr[-1];
+ }
+}
diff --git a/vp9/common/vp9_extend.h b/vp9/common/vp9_extend.h
new file mode 100644
index 0000000..55036f8
--- /dev/null
+++ b/vp9/common/vp9_extend.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_COMMON_VP9_EXTEND_H_
+#define VP9_COMMON_VP9_EXTEND_H_
+
+#include "vpx_scale/yv12config.h"
+
+void vp9_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr,
+ unsigned char *UPtr, unsigned char *VPtr);
+
+void vp9_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst);
+
+void vp9_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst,
+ int srcy, int srcx,
+ int srch, int srcw);
+
+#endif // __INC_EXTEND_H
diff --git a/vp9/common/vp9_filter.c b/vp9/common/vp9_filter.c
new file mode 100644
index 0000000..a59d1e6
--- /dev/null
+++ b/vp9/common/vp9_filter.c
@@ -0,0 +1,1157 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <stdlib.h>
+#include "vp9/common/vp9_filter.h"
+#include "vpx_ports/mem.h"
+#include "vp9_rtcd.h"
+
+DECLARE_ALIGNED(16, const short, vp9_bilinear_filters[SUBPEL_SHIFTS][2]) = {
+ { 128, 0 },
+ { 120, 8 },
+ { 112, 16 },
+ { 104, 24 },
+ { 96, 32 },
+ { 88, 40 },
+ { 80, 48 },
+ { 72, 56 },
+ { 64, 64 },
+ { 56, 72 },
+ { 48, 80 },
+ { 40, 88 },
+ { 32, 96 },
+ { 24, 104 },
+ { 16, 112 },
+ { 8, 120 }
+};
+
+#define FILTER_ALPHA 0
+#define FILTER_ALPHA_SHARP 1
+DECLARE_ALIGNED(16, const short, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]) = {
+#if FILTER_ALPHA == 0
+ /* Lagrangian interpolation filter */
+ { 0, 0, 0, 128, 0, 0, 0, 0},
+ { 0, 1, -5, 126, 8, -3, 1, 0},
+ { -1, 3, -10, 122, 18, -6, 2, 0},
+ { -1, 4, -13, 118, 27, -9, 3, -1},
+ { -1, 4, -16, 112, 37, -11, 4, -1},
+ { -1, 5, -18, 105, 48, -14, 4, -1},
+ { -1, 5, -19, 97, 58, -16, 5, -1},
+ { -1, 6, -19, 88, 68, -18, 5, -1},
+ { -1, 6, -19, 78, 78, -19, 6, -1},
+ { -1, 5, -18, 68, 88, -19, 6, -1},
+ { -1, 5, -16, 58, 97, -19, 5, -1},
+ { -1, 4, -14, 48, 105, -18, 5, -1},
+ { -1, 4, -11, 37, 112, -16, 4, -1},
+ { -1, 3, -9, 27, 118, -13, 4, -1},
+ { 0, 2, -6, 18, 122, -10, 3, -1},
+ { 0, 1, -3, 8, 126, -5, 1, 0}
+#elif FILTER_ALPHA == 50
+ /* Generated using MATLAB:
+ * alpha = 0.5;
+ * b=intfilt(8,4,alpha);
+ * bi=round(128*b);
+ * ba=flipud(reshape([bi 0], 8, 8));
+ * disp(num2str(ba, '%d,'))
+ */
+ { 0, 0, 0, 128, 0, 0, 0, 0},
+ { 0, 1, -5, 126, 8, -3, 1, 0},
+ { 0, 2, -10, 122, 18, -6, 2, 0},
+ { -1, 3, -13, 118, 27, -9, 3, 0},
+ { -1, 4, -16, 112, 37, -11, 3, 0},
+ { -1, 5, -17, 104, 48, -14, 4, -1},
+ { -1, 5, -18, 96, 58, -16, 5, -1},
+ { -1, 5, -19, 88, 68, -17, 5, -1},
+ { -1, 5, -18, 78, 78, -18, 5, -1},
+ { -1, 5, -17, 68, 88, -19, 5, -1},
+ { -1, 5, -16, 58, 96, -18, 5, -1},
+ { -1, 4, -14, 48, 104, -17, 5, -1},
+ { 0, 3, -11, 37, 112, -16, 4, -1},
+ { 0, 3, -9, 27, 118, -13, 3, -1},
+ { 0, 2, -6, 18, 122, -10, 2, 0},
+ { 0, 1, -3, 8, 126, -5, 1, 0}
+#endif /* FILTER_ALPHA */
+};
+
+DECLARE_ALIGNED(16, const short, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8]) = {
+#if FILTER_ALPHA_SHARP == 1
+ /* dct based filter */
+ {0, 0, 0, 128, 0, 0, 0, 0},
+ {-1, 3, -7, 127, 8, -3, 1, 0},
+ {-2, 5, -13, 125, 17, -6, 3, -1},
+ {-3, 7, -17, 121, 27, -10, 5, -2},
+ {-4, 9, -20, 115, 37, -13, 6, -2},
+ {-4, 10, -23, 108, 48, -16, 8, -3},
+ {-4, 10, -24, 100, 59, -19, 9, -3},
+ {-4, 11, -24, 90, 70, -21, 10, -4},
+ {-4, 11, -23, 80, 80, -23, 11, -4},
+ {-4, 10, -21, 70, 90, -24, 11, -4},
+ {-3, 9, -19, 59, 100, -24, 10, -4},
+ {-3, 8, -16, 48, 108, -23, 10, -4},
+ {-2, 6, -13, 37, 115, -20, 9, -4},
+ {-2, 5, -10, 27, 121, -17, 7, -3},
+ {-1, 3, -6, 17, 125, -13, 5, -2},
+ {0, 1, -3, 8, 127, -7, 3, -1}
+#elif FILTER_ALPHA_SHARP == 75
+ /* alpha = 0.75 */
+ {0, 0, 0, 128, 0, 0, 0, 0},
+ {-1, 2, -6, 126, 9, -3, 2, -1},
+ {-1, 4, -11, 123, 18, -7, 3, -1},
+ {-2, 6, -16, 119, 28, -10, 5, -2},
+ {-2, 7, -19, 113, 38, -13, 6, -2},
+ {-3, 8, -21, 106, 49, -16, 7, -2},
+ {-3, 9, -22, 99, 59, -19, 8, -3},
+ {-3, 9, -23, 90, 70, -21, 9, -3},
+ {-3, 9, -22, 80, 80, -22, 9, -3},
+ {-3, 9, -21, 70, 90, -23, 9, -3},
+ {-3, 8, -19, 59, 99, -22, 9, -3},
+ {-2, 7, -16, 49, 106, -21, 8, -3},
+ {-2, 6, -13, 38, 113, -19, 7, -2},
+ {-2, 5, -10, 28, 119, -16, 6, -2},
+ {-1, 3, -7, 18, 123, -11, 4, -1},
+ {-1, 2, -3, 9, 126, -6, 2, -1}
+#endif /* FILTER_ALPHA_SHARP */
+};
+
+DECLARE_ALIGNED(16, const short, vp9_sub_pel_filters_6[SUBPEL_SHIFTS][6]) = {
+ {0, 0, 128, 0, 0, 0},
+ {1, -5, 125, 8, -2, 1},
+ {1, -8, 122, 17, -5, 1},
+ {2, -11, 116, 27, -8, 2},
+ {3, -14, 110, 37, -10, 2},
+ {3, -15, 103, 47, -12, 2},
+ {3, -16, 95, 57, -14, 3},
+ {3, -16, 86, 67, -15, 3},
+ {3, -16, 77, 77, -16, 3},
+ {3, -15, 67, 86, -16, 3},
+ {3, -14, 57, 95, -16, 3},
+ {2, -12, 47, 103, -15, 3},
+ {2, -10, 37, 110, -14, 3},
+ {2, -8, 27, 116, -11, 2},
+ {1, -5, 17, 122, -8, 1},
+ {1, -2, 8, 125, -5, 1}
+};
+
+static void filter_block2d_first_pass_6(unsigned char *src_ptr,
+ int *output_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned int pixel_step,
+ unsigned int output_height,
+ unsigned int output_width,
+ const short *vp9_filter) {
+ unsigned int i, j;
+ int Temp;
+
+ for (i = 0; i < output_height; i++) {
+ for (j = 0; j < output_width; j++) {
+ Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp9_filter[0]) +
+ ((int)src_ptr[-1 * (int)pixel_step] * vp9_filter[1]) +
+ ((int)src_ptr[0] * vp9_filter[2]) +
+ ((int)src_ptr[pixel_step] * vp9_filter[3]) +
+ ((int)src_ptr[2 * pixel_step] * vp9_filter[4]) +
+ ((int)src_ptr[3 * pixel_step] * vp9_filter[5]) +
+ (VP9_FILTER_WEIGHT >> 1); /* Rounding */
+
+ /* Normalize back to 0-255 */
+ Temp = Temp >> VP9_FILTER_SHIFT;
+
+ if (Temp < 0)
+ Temp = 0;
+ else if (Temp > 255)
+ Temp = 255;
+
+ output_ptr[j] = Temp;
+ src_ptr++;
+ }
+
+ /* Next row... */
+ src_ptr += src_pixels_per_line - output_width;
+ output_ptr += output_width;
+ }
+}
+
+static void filter_block2d_second_pass_6(int *src_ptr,
+ unsigned char *output_ptr,
+ int output_pitch,
+ unsigned int src_pixels_per_line,
+ unsigned int pixel_step,
+ unsigned int output_height,
+ unsigned int output_width,
+ const short *vp9_filter) {
+ unsigned int i, j;
+ int Temp;
+
+ for (i = 0; i < output_height; i++) {
+ for (j = 0; j < output_width; j++) {
+ /* Apply filter */
+ Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp9_filter[0]) +
+ ((int)src_ptr[-1 * (int)pixel_step] * vp9_filter[1]) +
+ ((int)src_ptr[0] * vp9_filter[2]) +
+ ((int)src_ptr[pixel_step] * vp9_filter[3]) +
+ ((int)src_ptr[2 * pixel_step] * vp9_filter[4]) +
+ ((int)src_ptr[3 * pixel_step] * vp9_filter[5]) +
+ (VP9_FILTER_WEIGHT >> 1); /* Rounding */
+
+ /* Normalize back to 0-255 */
+ Temp = Temp >> VP9_FILTER_SHIFT;
+
+ if (Temp < 0)
+ Temp = 0;
+ else if (Temp > 255)
+ Temp = 255;
+
+ output_ptr[j] = (unsigned char)Temp;
+ src_ptr++;
+ }
+
+ /* Start next row */
+ src_ptr += src_pixels_per_line - output_width;
+ output_ptr += output_pitch;
+ }
+}
+
+/*
+ * The only functional difference between filter_block2d_second_pass()
+ * and this function is that filter_block2d_second_pass() does a sixtap
+ * filter on the input and stores it in the output. This function
+ * (filter_block2d_second_pass_avg()) does a sixtap filter on the input,
+ * and then averages that with the content already present in the output
+ * ((filter_result + dest + 1) >> 1) and stores that in the output.
+ */
+static void filter_block2d_second_pass_avg_6(int *src_ptr,
+ unsigned char *output_ptr,
+ int output_pitch,
+ unsigned int src_pixels_per_line,
+ unsigned int pixel_step,
+ unsigned int output_height,
+ unsigned int output_width,
+ const short *vp9_filter) {
+ unsigned int i, j;
+ int Temp;
+
+ for (i = 0; i < output_height; i++) {
+ for (j = 0; j < output_width; j++) {
+ /* Apply filter */
+ Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp9_filter[0]) +
+ ((int)src_ptr[-1 * (int)pixel_step] * vp9_filter[1]) +
+ ((int)src_ptr[0] * vp9_filter[2]) +
+ ((int)src_ptr[pixel_step] * vp9_filter[3]) +
+ ((int)src_ptr[2 * pixel_step] * vp9_filter[4]) +
+ ((int)src_ptr[3 * pixel_step] * vp9_filter[5]) +
+ (VP9_FILTER_WEIGHT >> 1); /* Rounding */
+
+ /* Normalize back to 0-255 */
+ Temp = Temp >> VP9_FILTER_SHIFT;
+
+ if (Temp < 0)
+ Temp = 0;
+ else if (Temp > 255)
+ Temp = 255;
+
+ output_ptr[j] = (unsigned char)((output_ptr[j] + Temp + 1) >> 1);
+ src_ptr++;
+ }
+
+ /* Start next row */
+ src_ptr += src_pixels_per_line - output_width;
+ output_ptr += output_pitch;
+ }
+}
+
+#define Interp_Extend 3
+static void filter_block2d_6(unsigned char *src_ptr,
+ unsigned char *output_ptr,
+ unsigned int src_pixels_per_line,
+ int output_pitch,
+ const short *HFilter,
+ const short *VFilter) {
+ int FData[(3 + Interp_Extend * 2) * 4]; /* Temp data buffer used in filtering */
+
+ /* First filter 1-D horizontally... */
+ filter_block2d_first_pass_6(src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData, src_pixels_per_line, 1,
+ 3 + Interp_Extend * 2, 4, HFilter);
+
+ /* then filter verticaly... */
+ filter_block2d_second_pass_6(FData + 4 * (Interp_Extend - 1), output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
+}
+
+
+void vp9_sixtap_predict_c(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ const short *HFilter;
+ const short *VFilter;
+
+ HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */
+ VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */
+
+ filter_block2d_6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
+}
+
+/*
+ * The difference between filter_block2d_6() and filter_block2d_avg_6 is
+ * that filter_block2d_6() does a 6-tap filter and stores it in the output
+ * buffer, whereas filter_block2d_avg_6() does the same 6-tap filter, and
+ * then averages that with the content already present in the output
+ * ((filter_result + dest + 1) >> 1) and stores that in the output.
+ */
+static void filter_block2d_avg_6(unsigned char *src_ptr,
+ unsigned char *output_ptr,
+ unsigned int src_pixels_per_line,
+ int output_pitch,
+ const short *HFilter,
+ const short *VFilter) {
+ int FData[(3 + Interp_Extend * 2) * 4]; /* Temp data buffer used in filtering */
+
+ /* First filter 1-D horizontally... */
+ filter_block2d_first_pass_6(src_ptr - ((Interp_Extend - 1) * src_pixels_per_line),
+ FData, src_pixels_per_line, 1,
+ 3 + Interp_Extend * 2, 4, HFilter);
+
+ /* then filter verticaly... */
+ filter_block2d_second_pass_avg_6(FData + 4 * (Interp_Extend - 1), output_ptr,
+ output_pitch, 4, 4, 4, 4, VFilter);
+}
+
+void vp9_sixtap_predict_avg_c
+(
+ unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch
+) {
+ const short *HFilter;
+ const short *VFilter;
+
+ HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */
+ VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */
+
+ filter_block2d_avg_6(src_ptr, dst_ptr, src_pixels_per_line,
+ dst_pitch, HFilter, VFilter);
+}
+
+void vp9_sixtap_predict8x8_c
+(
+ unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch
+) {
+ const short *HFilter;
+ const short *VFilter;
+ // int FData[(7+Interp_Extend*2)*16]; /* Temp data buffer used in filtering */
+ int FData[(7 + Interp_Extend * 2) * 8]; /* Temp data buffer used in filtering */
+
+ HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */
+ VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */
+
+ /* First filter 1-D horizontally... */
+ filter_block2d_first_pass_6(src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData, src_pixels_per_line, 1,
+ 7 + Interp_Extend * 2, 8, HFilter);
+
+
+ /* then filter verticaly... */
+ filter_block2d_second_pass_6(FData + 8 * (Interp_Extend - 1), dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
+
+}
+
+void vp9_sixtap_predict_avg8x8_c
+(
+ unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch
+) {
+ const short *HFilter;
+ const short *VFilter;
+ // int FData[(7+Interp_Extend*2)*16]; /* Temp data buffer used in filtering */
+ int FData[(7 + Interp_Extend * 2) * 8]; /* Temp data buffer used in filtering */
+
+ HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */
+ VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */
+
+ /* First filter 1-D horizontally... */
+ filter_block2d_first_pass_6(src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData, src_pixels_per_line, 1,
+ 7 + Interp_Extend * 2, 8, HFilter);
+
+ /* then filter verticaly... */
+ filter_block2d_second_pass_avg_6(FData + 8 * (Interp_Extend - 1), dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
+}
+
+void vp9_sixtap_predict8x4_c
+(
+ unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch
+) {
+ const short *HFilter;
+ const short *VFilter;
+ // int FData[(7+Interp_Extend*2)*16]; /* Temp data buffer used in filtering */
+ int FData[(3 + Interp_Extend * 2) * 8]; /* Temp data buffer used in filtering */
+
+ HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */
+ VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */
+
+ /* First filter 1-D horizontally... */
+ filter_block2d_first_pass_6(src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData, src_pixels_per_line, 1,
+ 3 + Interp_Extend * 2, 8, HFilter);
+
+
+ /* then filter verticaly... */
+ filter_block2d_second_pass_6(FData + 8 * (Interp_Extend - 1), dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
+
+}
+
+void vp9_sixtap_predict16x16_c
+(
+ unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch
+) {
+ const short *HFilter;
+ const short *VFilter;
+ // int FData[(15+Interp_Extend*2)*24]; /* Temp data buffer used in filtering */
+ int FData[(15 + Interp_Extend * 2) * 16]; /* Temp data buffer used in filtering */
+
+
+ HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */
+ VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */
+
+ /* First filter 1-D horizontally... */
+ filter_block2d_first_pass_6(src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData, src_pixels_per_line, 1,
+ 15 + Interp_Extend * 2, 16, HFilter);
+
+ /* then filter verticaly... */
+ filter_block2d_second_pass_6(FData + 16 * (Interp_Extend - 1), dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
+
+}
+
+void vp9_sixtap_predict_avg16x16_c
+(
+ unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch
+) {
+ const short *HFilter;
+ const short *VFilter;
+ // int FData[(15+Interp_Extend*2)*24]; /* Temp data buffer used in filtering */
+ int FData[(15 + Interp_Extend * 2) * 16]; /* Temp data buffer used in filtering */
+
+ HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */
+ VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */
+
+ /* First filter 1-D horizontally... */
+ filter_block2d_first_pass_6(src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData,
+ src_pixels_per_line, 1, 15 + Interp_Extend * 2, 16, HFilter);
+
+ /* then filter verticaly... */
+ filter_block2d_second_pass_avg_6(FData + 16 * (Interp_Extend - 1), dst_ptr, dst_pitch,
+ 16, 16, 16, 16, VFilter);
+}
+
+typedef enum {
+ VPX_FILTER_4x4 = 0,
+ VPX_FILTER_8x8 = 1,
+ VPX_FILTER_8x4 = 2,
+ VPX_FILTER_16x16 = 3,
+} filter_size_t;
+
+static const unsigned int filter_size_to_wh[][2] = {
+ {4, 4},
+ {8, 8},
+ {8, 4},
+ {16,16},
+};
+
+static void filter_block2d_8_c(const unsigned char *src_ptr,
+ const unsigned int src_stride,
+ const short *HFilter,
+ const short *VFilter,
+ const filter_size_t filter_size,
+ unsigned char *dst_ptr,
+ unsigned int dst_stride) {
+ const unsigned int output_width = filter_size_to_wh[filter_size][0];
+ const unsigned int output_height = filter_size_to_wh[filter_size][1];
+
+ // Between passes, we use an intermediate buffer whose height is extended to
+ // have enough horizontally filtered values as input for the vertical pass.
+ // This buffer is allocated to be big enough for the largest block type we
+ // support.
+ const int kInterp_Extend = 4;
+ const unsigned int intermediate_height =
+ (kInterp_Extend - 1) + output_height + kInterp_Extend;
+
+ /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
+ * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
+ * + kInterp_Extend
+ * = 3 + 16 + 4
+ * = 23
+ * and filter_max_width = 16
+ */
+ unsigned char intermediate_buffer[23 * 16];
+ const int intermediate_next_stride = 1 - intermediate_height * output_width;
+
+ // Horizontal pass (src -> transposed intermediate).
+ {
+ unsigned char *output_ptr = intermediate_buffer;
+ const int src_next_row_stride = src_stride - output_width;
+ unsigned int i, j;
+ src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
+ for (i = 0; i < intermediate_height; i++) {
+ for (j = 0; j < output_width; j++) {
+ // Apply filter...
+ int temp = ((int)src_ptr[0] * HFilter[0]) +
+ ((int)src_ptr[1] * HFilter[1]) +
+ ((int)src_ptr[2] * HFilter[2]) +
+ ((int)src_ptr[3] * HFilter[3]) +
+ ((int)src_ptr[4] * HFilter[4]) +
+ ((int)src_ptr[5] * HFilter[5]) +
+ ((int)src_ptr[6] * HFilter[6]) +
+ ((int)src_ptr[7] * HFilter[7]) +
+ (VP9_FILTER_WEIGHT >> 1); // Rounding
+
+ // Normalize back to 0-255...
+ temp >>= VP9_FILTER_SHIFT;
+ if (temp < 0) {
+ temp = 0;
+ } else if (temp > 255) {
+ temp = 255;
+ }
+ src_ptr++;
+ *output_ptr = temp;
+ output_ptr += intermediate_height;
+ }
+ src_ptr += src_next_row_stride;
+ output_ptr += intermediate_next_stride;
+ }
+ }
+
+ // Vertical pass (transposed intermediate -> dst).
+ {
+ unsigned char *src_ptr = intermediate_buffer;
+ const int dst_next_row_stride = dst_stride - output_width;
+ unsigned int i, j;
+ for (i = 0; i < output_height; i++) {
+ for (j = 0; j < output_width; j++) {
+ // Apply filter...
+ int temp = ((int)src_ptr[0] * VFilter[0]) +
+ ((int)src_ptr[1] * VFilter[1]) +
+ ((int)src_ptr[2] * VFilter[2]) +
+ ((int)src_ptr[3] * VFilter[3]) +
+ ((int)src_ptr[4] * VFilter[4]) +
+ ((int)src_ptr[5] * VFilter[5]) +
+ ((int)src_ptr[6] * VFilter[6]) +
+ ((int)src_ptr[7] * VFilter[7]) +
+ (VP9_FILTER_WEIGHT >> 1); // Rounding
+
+ // Normalize back to 0-255...
+ temp >>= VP9_FILTER_SHIFT;
+ if (temp < 0) {
+ temp = 0;
+ } else if (temp > 255) {
+ temp = 255;
+ }
+
+ src_ptr += intermediate_height;
+ *dst_ptr++ = (unsigned char)temp;
+ }
+ src_ptr += intermediate_next_stride;
+ dst_ptr += dst_next_row_stride;
+ }
+ }
+}
+
+void vp9_filter_block2d_4x4_8_c(const unsigned char *src_ptr,
+ const unsigned int src_stride,
+ const short *HFilter_aligned16,
+ const short *VFilter_aligned16,
+ unsigned char *dst_ptr,
+ unsigned int dst_stride) {
+ filter_block2d_8_c(src_ptr, src_stride,
+ HFilter_aligned16, VFilter_aligned16,
+ VPX_FILTER_4x4, dst_ptr, dst_stride);
+}
+
+void vp9_filter_block2d_8x4_8_c(const unsigned char *src_ptr,
+ const unsigned int src_stride,
+ const short *HFilter_aligned16,
+ const short *VFilter_aligned16,
+ unsigned char *dst_ptr,
+ unsigned int dst_stride) {
+ filter_block2d_8_c(src_ptr, src_stride,
+ HFilter_aligned16, VFilter_aligned16,
+ VPX_FILTER_8x4, dst_ptr, dst_stride);
+}
+
+void vp9_filter_block2d_8x8_8_c(const unsigned char *src_ptr,
+ const unsigned int src_stride,
+ const short *HFilter_aligned16,
+ const short *VFilter_aligned16,
+ unsigned char *dst_ptr,
+ unsigned int dst_stride) {
+ filter_block2d_8_c(src_ptr, src_stride,
+ HFilter_aligned16, VFilter_aligned16,
+ VPX_FILTER_8x8, dst_ptr, dst_stride);
+}
+
+void vp9_filter_block2d_16x16_8_c(const unsigned char *src_ptr,
+ const unsigned int src_stride,
+ const short *HFilter_aligned16,
+ const short *VFilter_aligned16,
+ unsigned char *dst_ptr,
+ unsigned int dst_stride) {
+ filter_block2d_8_c(src_ptr, src_stride,
+ HFilter_aligned16, VFilter_aligned16,
+ VPX_FILTER_16x16, dst_ptr, dst_stride);
+}
+
+static void block2d_average_c(unsigned char *src,
+ unsigned int src_stride,
+ unsigned char *output_ptr,
+ unsigned int output_stride,
+ const filter_size_t filter_size) {
+ const unsigned int output_width = filter_size_to_wh[filter_size][0];
+ const unsigned int output_height = filter_size_to_wh[filter_size][1];
+
+ unsigned int i, j;
+ for (i = 0; i < output_height; i++) {
+ for (j = 0; j < output_width; j++) {
+ output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
+ }
+ output_ptr += output_stride;
+ }
+}
+
+#define block2d_average block2d_average_c
+
+void vp9_eighttap_predict_c(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ const short *HFilter;
+ const short *VFilter;
+
+ HFilter = vp9_sub_pel_filters_8[xoffset];
+ VFilter = vp9_sub_pel_filters_8[yoffset];
+
+ vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line,
+ HFilter, VFilter,
+ dst_ptr, dst_pitch);
+}
+
+void vp9_eighttap_predict_avg4x4_c(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ const short *HFilter = vp9_sub_pel_filters_8[xoffset];
+ const short *VFilter = vp9_sub_pel_filters_8[yoffset];
+ unsigned char tmp[4 * 4];
+
+ vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line,
+ HFilter, VFilter,
+ tmp, 4);
+ block2d_average(tmp, 4, dst_ptr, dst_pitch, VPX_FILTER_4x4);
+}
+
+void vp9_eighttap_predict_sharp_c(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ const short *HFilter;
+ const short *VFilter;
+
+ HFilter = vp9_sub_pel_filters_8s[xoffset];
+ VFilter = vp9_sub_pel_filters_8s[yoffset];
+
+ vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line,
+ HFilter, VFilter,
+ dst_ptr, dst_pitch);
+}
+
+void vp9_eighttap_predict_avg4x4_sharp_c(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ const short *HFilter = vp9_sub_pel_filters_8s[xoffset];
+ const short *VFilter = vp9_sub_pel_filters_8s[yoffset];
+ unsigned char tmp[4 * 4];
+
+ vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line,
+ HFilter, VFilter,
+ tmp, 4);
+ block2d_average(tmp, 4, dst_ptr, dst_pitch, VPX_FILTER_4x4);
+}
+
+void vp9_eighttap_predict8x8_c(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ const short *HFilter = vp9_sub_pel_filters_8[xoffset];
+ const short *VFilter = vp9_sub_pel_filters_8[yoffset];
+
+ vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line,
+ HFilter, VFilter,
+ dst_ptr, dst_pitch);
+}
+
+void vp9_eighttap_predict8x8_sharp_c(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ const short *HFilter = vp9_sub_pel_filters_8s[xoffset];
+ const short *VFilter = vp9_sub_pel_filters_8s[yoffset];
+
+ vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line,
+ HFilter, VFilter,
+ dst_ptr, dst_pitch);
+}
+
+void vp9_eighttap_predict_avg8x8_c(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ unsigned char tmp[8 * 8];
+ const short *HFilter = vp9_sub_pel_filters_8[xoffset];
+ const short *VFilter = vp9_sub_pel_filters_8[yoffset];
+
+ vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line,
+ HFilter, VFilter,
+ tmp, 8);
+ block2d_average(tmp, 8, dst_ptr, dst_pitch, VPX_FILTER_8x8);
+}
+
+void vp9_eighttap_predict_avg8x8_sharp_c(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ unsigned char tmp[8 * 8];
+ const short *HFilter = vp9_sub_pel_filters_8s[xoffset];
+ const short *VFilter = vp9_sub_pel_filters_8s[yoffset];
+
+ vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line,
+ HFilter, VFilter,
+ tmp, 8);
+ block2d_average(tmp, 8, dst_ptr, dst_pitch, VPX_FILTER_8x8);
+}
+
+void vp9_eighttap_predict8x4_c(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ const short *HFilter = vp9_sub_pel_filters_8[xoffset];
+ const short *VFilter = vp9_sub_pel_filters_8[yoffset];
+
+ vp9_filter_block2d_8x4_8(src_ptr, src_pixels_per_line,
+ HFilter, VFilter,
+ dst_ptr, dst_pitch);
+}
+
+void vp9_eighttap_predict8x4_sharp_c(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ const short *HFilter = vp9_sub_pel_filters_8s[xoffset];
+ const short *VFilter = vp9_sub_pel_filters_8s[yoffset];
+
+ vp9_filter_block2d_8x4_8(src_ptr, src_pixels_per_line,
+ HFilter, VFilter,
+ dst_ptr, dst_pitch);
+}
+
+void vp9_eighttap_predict16x16_c(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ const short *HFilter = vp9_sub_pel_filters_8[xoffset];
+ const short *VFilter = vp9_sub_pel_filters_8[yoffset];
+
+ vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line,
+ HFilter, VFilter,
+ dst_ptr, dst_pitch);
+}
+
+void vp9_eighttap_predict16x16_sharp_c(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ const short *HFilter = vp9_sub_pel_filters_8s[xoffset];
+ const short *VFilter = vp9_sub_pel_filters_8s[yoffset];
+
+ vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line,
+ HFilter, VFilter,
+ dst_ptr, dst_pitch);
+}
+
+void vp9_eighttap_predict_avg16x16_c(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, tmp, 16 * 16);
+ const short *HFilter = vp9_sub_pel_filters_8[xoffset];
+ const short *VFilter = vp9_sub_pel_filters_8[yoffset];
+
+ vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line,
+ HFilter, VFilter,
+ tmp, 16);
+ block2d_average(tmp, 16, dst_ptr, dst_pitch, VPX_FILTER_16x16);
+}
+
+void vp9_eighttap_predict_avg16x16_sharp_c(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, tmp, 16 * 16);
+ const short *HFilter = vp9_sub_pel_filters_8s[xoffset];
+ const short *VFilter = vp9_sub_pel_filters_8s[yoffset];
+
+ vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line,
+ HFilter, VFilter,
+ tmp, 16);
+ block2d_average(tmp, 16, dst_ptr, dst_pitch, VPX_FILTER_16x16);
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : filter_block2d_bil_first_pass
+ *
+ * INPUTS : UINT8 *src_ptr : Pointer to source block.
+ * UINT32 src_stride : Stride of source block.
+ * UINT32 height : Block height.
+ * UINT32 width : Block width.
+ * INT32 *vp9_filter : Array of 2 bi-linear filter taps.
+ *
+ * OUTPUTS : INT32 *dst_ptr : Pointer to filtered block.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block
+ * in the horizontal direction to produce the filtered output
+ * block. Used to implement first-pass of 2-D separable filter.
+ *
+ * SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
+ * Two filter taps should sum to VP9_FILTER_WEIGHT.
+ *
+ ****************************************************************************/
+static void filter_block2d_bil_first_pass(unsigned char *src_ptr,
+ unsigned short *dst_ptr,
+ unsigned int src_stride,
+ unsigned int height,
+ unsigned int width,
+ const short *vp9_filter) {
+ unsigned int i, j;
+
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++) {
+ /* Apply bilinear filter */
+ dst_ptr[j] = (((int)src_ptr[0] * vp9_filter[0]) +
+ ((int)src_ptr[1] * vp9_filter[1]) +
+ (VP9_FILTER_WEIGHT / 2)) >> VP9_FILTER_SHIFT;
+ src_ptr++;
+ }
+
+ /* Next row... */
+ src_ptr += src_stride - width;
+ dst_ptr += width;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : filter_block2d_bil_second_pass
+ *
+ * INPUTS : INT32 *src_ptr : Pointer to source block.
+ * UINT32 dst_pitch : Destination block pitch.
+ * UINT32 height : Block height.
+ * UINT32 width : Block width.
+ * INT32 *vp9_filter : Array of 2 bi-linear filter taps.
+ *
+ * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block
+ * in the vertical direction to produce the filtered output
+ * block. Used to implement second-pass of 2-D separable filter.
+ *
+ * SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
+ * Two filter taps should sum to VP9_FILTER_WEIGHT.
+ *
+ ****************************************************************************/
+static void filter_block2d_bil_second_pass(unsigned short *src_ptr,
+ unsigned char *dst_ptr,
+ int dst_pitch,
+ unsigned int height,
+ unsigned int width,
+ const short *vp9_filter) {
+ unsigned int i, j;
+ int Temp;
+
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++) {
+ /* Apply filter */
+ Temp = ((int)src_ptr[0] * vp9_filter[0]) +
+ ((int)src_ptr[width] * vp9_filter[1]) +
+ (VP9_FILTER_WEIGHT / 2);
+ dst_ptr[j] = (unsigned int)(Temp >> VP9_FILTER_SHIFT);
+ src_ptr++;
+ }
+
+ /* Next row... */
+ dst_ptr += dst_pitch;
+ }
+}
+
+/*
+ * As before for filter_block2d_second_pass_avg(), the functional difference
+ * between filter_block2d_bil_second_pass() and filter_block2d_bil_second_pass_avg()
+ * is that filter_block2d_bil_second_pass() does a bilinear filter on input
+ * and stores the result in output; filter_block2d_bil_second_pass_avg(),
+ * instead, does a bilinear filter on input, averages the resulting value
+ * with the values already present in the output and stores the result of
+ * that back into the output ((filter_result + dest + 1) >> 1).
+ */
+static void filter_block2d_bil_second_pass_avg(unsigned short *src_ptr,
+ unsigned char *dst_ptr,
+ int dst_pitch,
+ unsigned int height,
+ unsigned int width,
+ const short *vp9_filter) {
+ unsigned int i, j;
+ int Temp;
+
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++) {
+ /* Apply filter */
+ Temp = ((int)src_ptr[0] * vp9_filter[0]) +
+ ((int)src_ptr[width] * vp9_filter[1]) +
+ (VP9_FILTER_WEIGHT / 2);
+ dst_ptr[j] = (unsigned int)(((Temp >> VP9_FILTER_SHIFT) + dst_ptr[j] + 1) >> 1);
+ src_ptr++;
+ }
+
+ /* Next row... */
+ dst_ptr += dst_pitch;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : filter_block2d_bil
+ *
+ * INPUTS : UINT8 *src_ptr : Pointer to source block.
+ * UINT32 src_pitch : Stride of source block.
+ * UINT32 dst_pitch : Stride of destination block.
+ * INT32 *HFilter : Array of 2 horizontal filter taps.
+ * INT32 *VFilter : Array of 2 vertical filter taps.
+ * INT32 Width : Block width
+ * INT32 Height : Block height
+ *
+ * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : 2-D filters an input block by applying a 2-tap
+ * bi-linear filter horizontally followed by a 2-tap
+ * bi-linear filter vertically on the result.
+ *
+ * SPECIAL NOTES : The largest block size can be handled here is 16x16
+ *
+ ****************************************************************************/
+static void filter_block2d_bil(unsigned char *src_ptr,
+ unsigned char *dst_ptr,
+ unsigned int src_pitch,
+ unsigned int dst_pitch,
+ const short *HFilter,
+ const short *VFilter,
+ int Width,
+ int Height) {
+
+ unsigned short FData[17 * 16]; /* Temp data buffer used in filtering */
+
+ /* First filter 1-D horizontally... */
+ filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
+
+ /* then 1-D vertically... */
+ filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
+}
+
+static void filter_block2d_bil_avg(unsigned char *src_ptr,
+ unsigned char *dst_ptr,
+ unsigned int src_pitch,
+ unsigned int dst_pitch,
+ const short *HFilter,
+ const short *VFilter,
+ int Width,
+ int Height) {
+ unsigned short FData[17 * 16]; /* Temp data buffer used in filtering */
+
+ /* First filter 1-D horizontally... */
+ filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
+
+ /* then 1-D vertically... */
+ filter_block2d_bil_second_pass_avg(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
+}
+
+void vp9_bilinear_predict4x4_c(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ const short *HFilter;
+ const short *VFilter;
+
+ HFilter = vp9_bilinear_filters[xoffset];
+ VFilter = vp9_bilinear_filters[yoffset];
+
+ filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
+}
+
+void vp9_bilinear_predict_avg4x4_c(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ const short *HFilter;
+ const short *VFilter;
+
+ HFilter = vp9_bilinear_filters[xoffset];
+ VFilter = vp9_bilinear_filters[yoffset];
+
+ filter_block2d_bil_avg(src_ptr, dst_ptr, src_pixels_per_line,
+ dst_pitch, HFilter, VFilter, 4, 4);
+}
+
+void vp9_bilinear_predict8x8_c(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ const short *HFilter;
+ const short *VFilter;
+
+ HFilter = vp9_bilinear_filters[xoffset];
+ VFilter = vp9_bilinear_filters[yoffset];
+
+ filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
+
+}
+
+void vp9_bilinear_predict_avg8x8_c(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ const short *HFilter;
+ const short *VFilter;
+
+ HFilter = vp9_bilinear_filters[xoffset];
+ VFilter = vp9_bilinear_filters[yoffset];
+
+ filter_block2d_bil_avg(src_ptr, dst_ptr, src_pixels_per_line,
+ dst_pitch, HFilter, VFilter, 8, 8);
+}
+
+void vp9_bilinear_predict8x4_c(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ const short *HFilter;
+ const short *VFilter;
+
+ HFilter = vp9_bilinear_filters[xoffset];
+ VFilter = vp9_bilinear_filters[yoffset];
+
+ filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
+
+}
+
+void vp9_bilinear_predict16x16_c(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ const short *HFilter;
+ const short *VFilter;
+
+ HFilter = vp9_bilinear_filters[xoffset];
+ VFilter = vp9_bilinear_filters[yoffset];
+
+ filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
+}
+
+void vp9_bilinear_predict_avg16x16_c(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ const short *HFilter;
+ const short *VFilter;
+
+ HFilter = vp9_bilinear_filters[xoffset];
+ VFilter = vp9_bilinear_filters[yoffset];
+
+ filter_block2d_bil_avg(src_ptr, dst_ptr, src_pixels_per_line,
+ dst_pitch, HFilter, VFilter, 16, 16);
+}
diff --git a/vp9/common/vp9_filter.h b/vp9/common/vp9_filter.h
new file mode 100644
index 0000000..545d39a
--- /dev/null
+++ b/vp9/common/vp9_filter.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_COMMON_VP9_FILTER_H_
+#define VP9_COMMON_VP9_FILTER_H_
+
+#include "vpx_config.h"
+#include "vpx_scale/yv12config.h"
+
+#define BLOCK_HEIGHT_WIDTH 4
+#define VP9_FILTER_WEIGHT 128
+#define VP9_FILTER_SHIFT 7
+
+#define SUBPEL_SHIFTS 16
+
+extern const short vp9_bilinear_filters[SUBPEL_SHIFTS][2];
+extern const short vp9_sub_pel_filters_6[SUBPEL_SHIFTS][6];
+extern const short vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8];
+extern const short vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8];
+
+#endif // FILTER_H
diff --git a/vp9/common/vp9_findnearmv.c b/vp9/common/vp9_findnearmv.c
new file mode 100644
index 0000000..02c28fb
--- /dev/null
+++ b/vp9/common/vp9_findnearmv.c
@@ -0,0 +1,298 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/common/vp9_findnearmv.h"
+#include "vp9/common/vp9_sadmxn.h"
+#include "vp9/common/vp9_subpelvar.h"
+#include <limits.h>
+
+const unsigned char vp9_mbsplit_offset[4][16] = {
+ { 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
+};
+
+static void lower_mv_precision(int_mv *mv, int usehp)
+{
+ if (!usehp || !vp9_use_nmv_hp(&mv->as_mv)) {
+ if (mv->as_mv.row & 1)
+ mv->as_mv.row += (mv->as_mv.row > 0 ? -1 : 1);
+ if (mv->as_mv.col & 1)
+ mv->as_mv.col += (mv->as_mv.col > 0 ? -1 : 1);
+ }
+}
+
+vp9_prob *vp9_mv_ref_probs(VP9_COMMON *pc,
+ vp9_prob p[4], const int context
+ ) {
+ p[0] = pc->fc.vp9_mode_contexts[context][0];
+ p[1] = pc->fc.vp9_mode_contexts[context][1];
+ p[2] = pc->fc.vp9_mode_contexts[context][2];
+ p[3] = pc->fc.vp9_mode_contexts[context][3];
+ return p;
+}
+
+#define SP(x) (((x) & 7) << 1)
+unsigned int vp9_sad3x16_c(const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride) {
+ return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 3, 16);
+}
+unsigned int vp9_sad16x3_c(const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride) {
+ return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 3);
+}
+
+#if CONFIG_SUBPELREFMV
+unsigned int vp9_variance2x16_c(const unsigned char *src_ptr,
+ const int source_stride,
+ const unsigned char *ref_ptr,
+ const int recon_stride,
+ unsigned int *sse) {
+ int sum;
+ variance(src_ptr, source_stride, ref_ptr, recon_stride, 2, 16, sse, &sum);
+ return (*sse - (((unsigned int)sum * sum) >> 5));
+}
+
+unsigned int vp9_variance16x2_c(const unsigned char *src_ptr,
+ const int source_stride,
+ const unsigned char *ref_ptr,
+ const int recon_stride,
+ unsigned int *sse) {
+ int sum;
+ variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 2, sse, &sum);
+ return (*sse - (((unsigned int)sum * sum) >> 5));
+}
+
+unsigned int vp9_sub_pixel_variance16x2_c(const unsigned char *src_ptr,
+ const int src_pixels_per_line,
+ const int xoffset,
+ const int yoffset,
+ const unsigned char *dst_ptr,
+ const int dst_pixels_per_line,
+ unsigned int *sse) {
+ unsigned short FData3[16 * 3]; // Temp data buffer used in filtering
+ unsigned char temp2[2 * 16];
+ const short *HFilter, *VFilter;
+
+ HFilter = vp9_bilinear_filters[xoffset];
+ VFilter = vp9_bilinear_filters[yoffset];
+
+ var_filter_block2d_bil_first_pass(src_ptr, FData3,
+ src_pixels_per_line, 1, 3, 16, HFilter);
+ var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 2, 16, VFilter);
+
+ return vp9_variance16x2_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
+}
+
+unsigned int vp9_sub_pixel_variance2x16_c(const unsigned char *src_ptr,
+ const int src_pixels_per_line,
+ const int xoffset,
+ const int yoffset,
+ const unsigned char *dst_ptr,
+ const int dst_pixels_per_line,
+ unsigned int *sse) {
+ unsigned short FData3[2 * 17]; // Temp data buffer used in filtering
+ unsigned char temp2[2 * 16];
+ const short *HFilter, *VFilter;
+
+ HFilter = vp9_bilinear_filters[xoffset];
+ VFilter = vp9_bilinear_filters[yoffset];
+
+ var_filter_block2d_bil_first_pass(src_ptr, FData3,
+ src_pixels_per_line, 1, 17, 2, HFilter);
+ var_filter_block2d_bil_second_pass(FData3, temp2, 2, 2, 16, 2, VFilter);
+
+ return vp9_variance2x16_c(temp2, 2, dst_ptr, dst_pixels_per_line, sse);
+}
+#endif
+
+/* check a list of motion vectors by sad score using a number rows of pixels
+ * above and a number cols of pixels in the left to select the one with best
+ * score to use as ref motion vector
+ */
+void vp9_find_best_ref_mvs(MACROBLOCKD *xd,
+ unsigned char *ref_y_buffer,
+ int ref_y_stride,
+ int_mv *mvlist,
+ int_mv *best_mv,
+ int_mv *nearest,
+ int_mv *near) {
+ int i, j;
+ unsigned char *above_src;
+ unsigned char *left_src;
+ unsigned char *above_ref;
+ unsigned char *left_ref;
+ unsigned int score;
+#if CONFIG_SUBPELREFMV
+ unsigned int sse;
+#endif
+ unsigned int ref_scores[MAX_MV_REFS] = {0};
+ int_mv sorted_mvs[MAX_MV_REFS];
+ int zero_seen = FALSE;
+
+ // Default all to 0,0 if nothing else available
+ best_mv->as_int = nearest->as_int = near->as_int = 0;
+ vpx_memset(sorted_mvs, 0, sizeof(sorted_mvs));
+
+#if CONFIG_SUBPELREFMV
+ above_src = xd->dst.y_buffer - xd->dst.y_stride * 2;
+ left_src = xd->dst.y_buffer - 2;
+ above_ref = ref_y_buffer - ref_y_stride * 2;
+ left_ref = ref_y_buffer - 2;
+#else
+ above_src = xd->dst.y_buffer - xd->dst.y_stride * 3;
+ left_src = xd->dst.y_buffer - 3;
+ above_ref = ref_y_buffer - ref_y_stride * 3;
+ left_ref = ref_y_buffer - 3;
+#endif
+
+ //for(i = 0; i < MAX_MV_REFS; ++i) {
+ // Limit search to the predicted best 4
+ for(i = 0; i < 4; ++i) {
+ int_mv this_mv;
+ int offset = 0;
+ int row_offset, col_offset;
+
+ this_mv.as_int = mvlist[i].as_int;
+
+ // If we see a 0,0 vector for a second time we have reached the end of
+ // the list of valid candidate vectors.
+ if (!this_mv.as_int && zero_seen)
+ break;
+
+ zero_seen = zero_seen || !this_mv.as_int;
+
+ clamp_mv(&this_mv,
+ xd->mb_to_left_edge - LEFT_TOP_MARGIN + 24,
+ xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
+ xd->mb_to_top_edge - LEFT_TOP_MARGIN + 24,
+ xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
+
+#if CONFIG_SUBPELREFMV
+ row_offset = this_mv.as_mv.row >> 3;
+ col_offset = this_mv.as_mv.col >> 3;
+ offset = ref_y_stride * row_offset + col_offset;
+ score = 0;
+ if (xd->up_available) {
+ vp9_sub_pixel_variance16x2_c(above_ref + offset, ref_y_stride,
+ SP(this_mv.as_mv.col),
+ SP(this_mv.as_mv.row),
+ above_src, xd->dst.y_stride, &sse);
+ score += sse;
+#if CONFIG_SUPERBLOCKS
+ if (xd->mode_info_context->mbmi.encoded_as_sb) {
+ vp9_sub_pixel_variance16x2_c(above_ref + offset + 16,
+ ref_y_stride,
+ SP(this_mv.as_mv.col),
+ SP(this_mv.as_mv.row),
+ above_src + 16, xd->dst.y_stride, &sse);
+ score += sse;
+ }
+#endif
+ }
+ if (xd->left_available) {
+ vp9_sub_pixel_variance2x16_c(left_ref + offset, ref_y_stride,
+ SP(this_mv.as_mv.col),
+ SP(this_mv.as_mv.row),
+ left_src, xd->dst.y_stride, &sse);
+ score += sse;
+#if CONFIG_SUPERBLOCKS
+ if (xd->mode_info_context->mbmi.encoded_as_sb) {
+ vp9_sub_pixel_variance2x16_c(left_ref + offset + ref_y_stride * 16,
+ ref_y_stride,
+ SP(this_mv.as_mv.col),
+ SP(this_mv.as_mv.row),
+ left_src + xd->dst.y_stride * 16,
+ xd->dst.y_stride, &sse);
+ score += sse;
+ }
+#endif
+ }
+#else
+ row_offset = (this_mv.as_mv.row > 0) ?
+ ((this_mv.as_mv.row + 3) >> 3):((this_mv.as_mv.row + 4) >> 3);
+ col_offset = (this_mv.as_mv.col > 0) ?
+ ((this_mv.as_mv.col + 3) >> 3):((this_mv.as_mv.col + 4) >> 3);
+ offset = ref_y_stride * row_offset + col_offset;
+ score = 0;
+ if (xd->up_available) {
+ score += vp9_sad16x3(above_src, xd->dst.y_stride,
+ above_ref + offset, ref_y_stride);
+#if CONFIG_SUPERBLOCKS
+ if (xd->mode_info_context->mbmi.encoded_as_sb) {
+ score += vp9_sad16x3(above_src + 16, xd->dst.y_stride,
+ above_ref + offset + 16, ref_y_stride);
+ }
+#endif
+ }
+ if (xd->left_available) {
+ score += vp9_sad3x16(left_src, xd->dst.y_stride,
+ left_ref + offset, ref_y_stride);
+#if CONFIG_SUPERBLOCKS
+ if (xd->mode_info_context->mbmi.encoded_as_sb) {
+ score += vp9_sad3x16(left_src + xd->dst.y_stride * 16,
+ xd->dst.y_stride,
+ left_ref + offset + ref_y_stride * 16,
+ ref_y_stride);
+ }
+#endif
+ }
+#endif
+ // Add the entry to our list and then resort the list on score.
+ ref_scores[i] = score;
+ sorted_mvs[i].as_int = this_mv.as_int;
+ j = i;
+ while (j > 0) {
+ if (ref_scores[j] < ref_scores[j-1]) {
+ ref_scores[j] = ref_scores[j-1];
+ sorted_mvs[j].as_int = sorted_mvs[j-1].as_int;
+ ref_scores[j-1] = score;
+ sorted_mvs[j-1].as_int = this_mv.as_int;
+ j--;
+ } else
+ break;
+ }
+ }
+
+ // Make sure all the candidates are properly clamped etc
+ for (i = 0; i < 4; ++i) {
+ lower_mv_precision(&sorted_mvs[i], xd->allow_high_precision_mv);
+ clamp_mv2(&sorted_mvs[i], xd);
+ }
+
+ // Set the best mv to the first entry in the sorted list
+ best_mv->as_int = sorted_mvs[0].as_int;
+
+ // Provided that there are non zero vectors available there will not
+ // be more than one 0,0 entry in the sorted list.
+ // The best ref mv is always set to the first entry (which gave the best
+ // results. The nearest is set to the first non zero vector if available and
+ // near to the second non zero vector if available.
+ // We do not use 0,0 as a nearest or near as 0,0 has its own mode.
+ if ( sorted_mvs[0].as_int ) {
+ nearest->as_int = sorted_mvs[0].as_int;
+ if ( sorted_mvs[1].as_int )
+ near->as_int = sorted_mvs[1].as_int;
+ else
+ near->as_int = sorted_mvs[2].as_int;
+ } else {
+ nearest->as_int = sorted_mvs[1].as_int;
+ near->as_int = sorted_mvs[2].as_int;
+ }
+
+ // Copy back the re-ordered mv list
+ vpx_memcpy(mvlist, sorted_mvs, sizeof(sorted_mvs));
+}
diff --git a/vp9/common/vp9_findnearmv.h b/vp9/common/vp9_findnearmv.h
new file mode 100644
index 0000000..8dab553
--- /dev/null
+++ b/vp9/common/vp9_findnearmv.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_COMMON_VP9_FINDNEARMV_H_
+#define VP9_COMMON_VP9_FINDNEARMV_H_
+
+#include "vp9/common/vp9_mv.h"
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_treecoder.h"
+#include "vp9/common/vp9_onyxc_int.h"
+
+/* check a list of motion vectors by sad score using a number rows of pixels
+ * above and a number cols of pixels in the left to select the one with best
+ * score to use as ref motion vector
+ */
+void vp9_find_best_ref_mvs(MACROBLOCKD *xd,
+ unsigned char *ref_y_buffer,
+ int ref_y_stride,
+ int_mv *mvlist,
+ int_mv *best_mv,
+ int_mv *nearest,
+ int_mv *near);
+
+static void mv_bias(int refmb_ref_frame_sign_bias, int refframe, int_mv *mvp, const int *ref_frame_sign_bias) {
+ MV xmv;
+ xmv = mvp->as_mv;
+
+ if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe]) {
+ xmv.row *= -1;
+ xmv.col *= -1;
+ }
+
+ mvp->as_mv = xmv;
+}
+
+#define LEFT_TOP_MARGIN (16 << 3)
+#define RIGHT_BOTTOM_MARGIN (16 << 3)
+
+static void clamp_mv(int_mv *mv,
+ int mb_to_left_edge,
+ int mb_to_right_edge,
+ int mb_to_top_edge,
+ int mb_to_bottom_edge) {
+ mv->as_mv.col = (mv->as_mv.col < mb_to_left_edge) ?
+ mb_to_left_edge : mv->as_mv.col;
+ mv->as_mv.col = (mv->as_mv.col > mb_to_right_edge) ?
+ mb_to_right_edge : mv->as_mv.col;
+ mv->as_mv.row = (mv->as_mv.row < mb_to_top_edge) ?
+ mb_to_top_edge : mv->as_mv.row;
+ mv->as_mv.row = (mv->as_mv.row > mb_to_bottom_edge) ?
+ mb_to_bottom_edge : mv->as_mv.row;
+}
+
+static void clamp_mv2(int_mv *mv, const MACROBLOCKD *xd) {
+ clamp_mv(mv,
+ xd->mb_to_left_edge - LEFT_TOP_MARGIN,
+ xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
+ xd->mb_to_top_edge - LEFT_TOP_MARGIN,
+ xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
+}
+
+static unsigned int check_mv_bounds(int_mv *mv,
+ int mb_to_left_edge,
+ int mb_to_right_edge,
+ int mb_to_top_edge,
+ int mb_to_bottom_edge) {
+ return (mv->as_mv.col < mb_to_left_edge) ||
+ (mv->as_mv.col > mb_to_right_edge) ||
+ (mv->as_mv.row < mb_to_top_edge) ||
+ (mv->as_mv.row > mb_to_bottom_edge);
+}
+
+vp9_prob *vp9_mv_ref_probs(VP9_COMMON *pc,
+ vp9_prob p[VP9_MVREFS - 1],
+ const int context);
+
+extern const unsigned char vp9_mbsplit_offset[4][16];
+
+static int left_block_mv(const MODE_INFO *cur_mb, int b) {
+ if (!(b & 3)) {
+ /* On L edge, get from MB to left of us */
+ --cur_mb;
+
+ if (cur_mb->mbmi.mode != SPLITMV)
+ return cur_mb->mbmi.mv[0].as_int;
+ b += 4;
+ }
+
+ return (cur_mb->bmi + b - 1)->as_mv.first.as_int;
+}
+
+static int left_block_second_mv(const MODE_INFO *cur_mb, int b) {
+ if (!(b & 3)) {
+ /* On L edge, get from MB to left of us */
+ --cur_mb;
+
+ if (cur_mb->mbmi.mode != SPLITMV)
+ return cur_mb->mbmi.second_ref_frame > 0 ?
+ cur_mb->mbmi.mv[1].as_int : cur_mb->mbmi.mv[0].as_int;
+ b += 4;
+ }
+
+ return cur_mb->mbmi.second_ref_frame > 0 ?
+ (cur_mb->bmi + b - 1)->as_mv.second.as_int :
+ (cur_mb->bmi + b - 1)->as_mv.first.as_int;
+}
+
+static int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride) {
+ if (!(b >> 2)) {
+ /* On top edge, get from MB above us */
+ cur_mb -= mi_stride;
+
+ if (cur_mb->mbmi.mode != SPLITMV)
+ return cur_mb->mbmi.mv[0].as_int;
+ b += 16;
+ }
+
+ return (cur_mb->bmi + b - 4)->as_mv.first.as_int;
+}
+
+static int above_block_second_mv(const MODE_INFO *cur_mb, int b, int mi_stride) {
+ if (!(b >> 2)) {
+ /* On top edge, get from MB above us */
+ cur_mb -= mi_stride;
+
+ if (cur_mb->mbmi.mode != SPLITMV)
+ return cur_mb->mbmi.second_ref_frame > 0 ?
+ cur_mb->mbmi.mv[1].as_int : cur_mb->mbmi.mv[0].as_int;
+ b += 16;
+ }
+
+ return cur_mb->mbmi.second_ref_frame > 0 ?
+ (cur_mb->bmi + b - 4)->as_mv.second.as_int :
+ (cur_mb->bmi + b - 4)->as_mv.first.as_int;
+}
+
+static B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) {
+ if (!(b & 3)) {
+ /* On L edge, get from MB to left of us */
+ --cur_mb;
+
+ if (cur_mb->mbmi.mode < I8X8_PRED) {
+ return pred_mode_conv(cur_mb->mbmi.mode);
+ } else if (cur_mb->mbmi.mode == I8X8_PRED) {
+ return pred_mode_conv(
+ (MB_PREDICTION_MODE)(cur_mb->bmi + 3 + b)->as_mode.first);
+ } else if (cur_mb->mbmi.mode == B_PRED) {
+ return ((cur_mb->bmi + 3 + b)->as_mode.first);
+ } else {
+ return B_DC_PRED;
+ }
+ }
+ return (cur_mb->bmi + b - 1)->as_mode.first;
+}
+
+static B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb,
+ int b, int mi_stride) {
+ if (!(b >> 2)) {
+ /* On top edge, get from MB above us */
+ cur_mb -= mi_stride;
+
+ if (cur_mb->mbmi.mode < I8X8_PRED) {
+ return pred_mode_conv(cur_mb->mbmi.mode);
+ } else if (cur_mb->mbmi.mode == I8X8_PRED) {
+ return pred_mode_conv(
+ (MB_PREDICTION_MODE)(cur_mb->bmi + 12 + b)->as_mode.first);
+ } else if (cur_mb->mbmi.mode == B_PRED) {
+ return ((cur_mb->bmi + 12 + b)->as_mode.first);
+ } else {
+ return B_DC_PRED;
+ }
+ }
+
+ return (cur_mb->bmi + b - 4)->as_mode.first;
+}
+
+#endif
diff --git a/vp9/common/vp9_header.h b/vp9/common/vp9_header.h
new file mode 100644
index 0000000..00dd17e
--- /dev/null
+++ b/vp9/common/vp9_header.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_COMMON_VP9_HEADER_H_
+#define VP9_COMMON_VP9_HEADER_H_
+
+/* 24 bits total */
+typedef struct {
+ unsigned int type: 1;
+ unsigned int version: 3;
+ unsigned int show_frame: 1;
+
+ /* Allow 2^20 bytes = 8 megabits for first partition */
+
+ unsigned int first_partition_length_in_bytes: 19;
+
+#ifdef PACKET_TESTING
+ unsigned int frame_number;
+ unsigned int update_gold: 1;
+ unsigned int uses_gold: 1;
+ unsigned int update_last: 1;
+ unsigned int uses_last: 1;
+#endif
+
+} VP9_HEADER;
+
+#ifdef PACKET_TESTING
+#define VP9_HEADER_SIZE 8
+#else
+#define VP9_HEADER_SIZE 3
+#endif
+
+
+#endif
diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c
new file mode 100644
index 0000000..893f378
--- /dev/null
+++ b/vp9/common/vp9_idctllm.c
@@ -0,0 +1,1550 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/****************************************************************************
+ * Notes:
+ *
+ * This implementation makes use of 16 bit fixed point verio of two multiply
+ * constants:
+ * 1. sqrt(2) * cos (pi/8)
+ * 2. sqrt(2) * sin (pi/8)
+ * Becuase the first constant is bigger than 1, to maintain the same 16 bit
+ * fixed point precision as the second one, we use a trick of
+ * x * a = x + x*(a-1)
+ * so
+ * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
+ **************************************************************************/
+#include <assert.h>
+#include <math.h>
+#include "./vpx_config.h"
+#include "vp9/common/vp9_systemdependent.h"
+
+#include "vp9/common/vp9_blockd.h"
+
+static const int cospi8sqrt2minus1 = 20091;
+static const int sinpi8sqrt2 = 35468;
+static const int rounding = 0;
+
+static const int16_t idct_i4[16] = {
+ 8192, 10703, 8192, 4433,
+ 8192, 4433, -8192, -10703,
+ 8192, -4433, -8192, 10703,
+ 8192, -10703, 8192, -4433
+};
+
+static const int16_t iadst_i4[16] = {
+ 3736, 9459, 10757, 7021,
+ 7021, 9459, -3736, -10757,
+ 9459, 0, -9459, 9459,
+ 10757, -9459, 7021, -3736
+};
+
+static const int16_t idct_i8[64] = {
+ 5793, 8035, 7568, 6811,
+ 5793, 4551, 3135, 1598,
+ 5793, 6811, 3135, -1598,
+ -5793, -8035, -7568, -4551,
+ 5793, 4551, -3135, -8035,
+ -5793, 1598, 7568, 6811,
+ 5793, 1598, -7568, -4551,
+ 5793, 6811, -3135, -8035,
+ 5793, -1598, -7568, 4551,
+ 5793, -6811, -3135, 8035,
+ 5793, -4551, -3135, 8035,
+ -5793, -1598, 7568, -6811,
+ 5793, -6811, 3135, 1598,
+ -5793, 8035, -7568, 4551,
+ 5793, -8035, 7568, -6811,
+ 5793, -4551, 3135, -1598
+};
+
+static const int16_t iadst_i8[64] = {
+ 1460, 4184, 6342, 7644,
+ 7914, 7114, 5354, 2871,
+ 2871, 7114, 7644, 4184,
+ -1460, -6342, -7914, -5354,
+ 4184, 7914, 2871, -5354,
+ -7644, -1460, 6342, 7114,
+ 5354, 6342, -4184, -7114,
+ 2871, 7644, -1460, -7914,
+ 6342, 2871, -7914, 1460,
+ 7114, -5354, -4184, 7644,
+ 7114, -1460, -5354, 7914,
+ -4184, -2871, 7644, -6342,
+ 7644, -5354, 1460, 2871,
+ -6342, 7914, -7114, 4184,
+ 7914, -7644, 7114, -6342,
+ 5354, -4184, 2871, -1460
+};
+
+
+
+static const int16_t idct_i16[256] = {
+ 4096, 5765, 5681, 5543, 5352, 5109, 4816, 4478,
+ 4096, 3675, 3218, 2731, 2217, 1682, 1130, 568,
+ 4096, 5543, 4816, 3675, 2217, 568, -1130, -2731,
+ -4096, -5109, -5681, -5765, -5352, -4478, -3218, -1682,
+ 4096, 5109, 3218, 568, -2217, -4478, -5681, -5543,
+ -4096, -1682, 1130, 3675, 5352, 5765, 4816, 2731,
+ 4096, 4478, 1130, -2731, -5352, -5543, -3218, 568,
+ 4096, 5765, 4816, 1682, -2217, -5109, -5681, -3675,
+ 4096, 3675, -1130, -5109, -5352, -1682, 3218, 5765,
+ 4096, -568, -4816, -5543, -2217, 2731, 5681, 4478,
+ 4096, 2731, -3218, -5765, -2217, 3675, 5681, 1682,
+ -4096, -5543, -1130, 4478, 5352, 568, -4816, -5109,
+ 4096, 1682, -4816, -4478, 2217, 5765, 1130, -5109,
+ -4096, 2731, 5681, 568, -5352, -3675, 3218, 5543,
+ 4096, 568, -5681, -1682, 5352, 2731, -4816, -3675,
+ 4096, 4478, -3218, -5109, 2217, 5543, -1130, -5765,
+ 4096, -568, -5681, 1682, 5352, -2731, -4816, 3675,
+ 4096, -4478, -3218, 5109, 2217, -5543, -1130, 5765,
+ 4096, -1682, -4816, 4478, 2217, -5765, 1130, 5109,
+ -4096, -2731, 5681, -568, -5352, 3675, 3218, -5543,
+ 4096, -2731, -3218, 5765, -2217, -3675, 5681, -1682,
+ -4096, 5543, -1130, -4478, 5352, -568, -4816, 5109,
+ 4096, -3675, -1130, 5109, -5352, 1682, 3218, -5765,
+ 4096, 568, -4816, 5543, -2217, -2731, 5681, -4478,
+ 4096, -4478, 1130, 2731, -5352, 5543, -3218, -568,
+ 4096, -5765, 4816, -1682, -2217, 5109, -5681, 3675,
+ 4096, -5109, 3218, -568, -2217, 4478, -5681, 5543,
+ -4096, 1682, 1130, -3675, 5352, -5765, 4816, -2731,
+ 4096, -5543, 4816, -3675, 2217, -568, -1130, 2731,
+ -4096, 5109, -5681, 5765, -5352, 4478, -3218, 1682,
+ 4096, -5765, 5681, -5543, 5352, -5109, 4816, -4478,
+ 4096, -3675, 3218, -2731, 2217, -1682, 1130, -568
+};
+
+static const int16_t iadst_i16[256] = {
+ 542, 1607, 2614, 3526, 4311, 4940, 5390, 5646,
+ 5698, 5543, 5189, 4646, 3936, 3084, 2120, 1080,
+ 1080, 3084, 4646, 5543, 5646, 4940, 3526, 1607,
+ -542, -2614, -4311, -5390, -5698, -5189, -3936, -2120,
+ 1607, 4311, 5646, 5189, 3084, 0, -3084, -5189,
+ -5646, -4311, -1607, 1607, 4311, 5646, 5189, 3084,
+ 2120, 5189, 5390, 2614, -1607, -4940, -5543, -3084,
+ 1080, 4646, 5646, 3526, -542, -4311, -5698, -3936,
+ 2614, 5646, 3936, -1080, -5189, -4940, -542, 4311,
+ 5543, 2120, -3084, -5698, -3526, 1607, 5390, 4646,
+ 3084, 5646, 1607, -4311, -5189, 0, 5189, 4311,
+ -1607, -5646, -3084, 3084, 5646, 1607, -4311, -5189,
+ 3526, 5189, -1080, -5698, -1607, 4940, 3936, -3084,
+ -5390, 542, 5646, 2120, -4646, -4311, 2614, 5543,
+ 3936, 4311, -3526, -4646, 3084, 4940, -2614, -5189,
+ 2120, 5390, -1607, -5543, 1080, 5646, -542, -5698,
+ 4311, 3084, -5189, -1607, 5646, 0, -5646, 1607,
+ 5189, -3084, -4311, 4311, 3084, -5189, -1607, 5646,
+ 4646, 1607, -5698, 2120, 4311, -4940, -1080, 5646,
+ -2614, -3936, 5189, 542, -5543, 3084, 3526, -5390,
+ 4940, 0, -4940, 4940, 0, -4940, 4940, 0,
+ -4940, 4940, 0, -4940, 4940, 0, -4940, 4940,
+ 5189, -1607, -3084, 5646, -4311, 0, 4311, -5646,
+ 3084, 1607, -5189, 5189, -1607, -3084, 5646, -4311,
+ 5390, -3084, -542, 3936, -5646, 4940, -2120, -1607,
+ 4646, -5698, 4311, -1080, -2614, 5189, -5543, 3526,
+ 5543, -4311, 2120, 542, -3084, 4940, -5698, 5189,
+ -3526, 1080, 1607, -3936, 5390, -5646, 4646, -2614,
+ 5646, -5189, 4311, -3084, 1607, 0, -1607, 3084,
+ -4311, 5189, -5646, 5646, -5189, 4311, -3084, 1607,
+ 5698, -5646, 5543, -5390, 5189, -4940, 4646, -4311,
+ 3936, -3526, 3084, -2614, 2120, -1607, 1080, -542
+};
+
+
+/* Converted the transforms to integer form. */
+#define VERTICAL_SHIFT 14 // 16
+#define VERTICAL_ROUNDING ((1 << (VERTICAL_SHIFT - 1)) - 1)
+#define HORIZONTAL_SHIFT 17 // 15
+#define HORIZONTAL_ROUNDING ((1 << (HORIZONTAL_SHIFT - 1)) - 1)
+void vp9_ihtllm_c(const int16_t *input, int16_t *output, int pitch,
+ TX_TYPE tx_type, int tx_dim, uint16_t eobs) {
+ int i, j, k;
+ int nz_dim;
+ int16_t imbuf[256];
+
+ const int16_t *ip = input;
+ int16_t *op = output;
+ int16_t *im = &imbuf[0];
+
+ /* pointers to vertical and horizontal transforms. */
+ const int16_t *ptv = NULL, *pth = NULL;
+ int shortpitch = pitch >> 1;
+
+ switch (tx_type) {
+ case ADST_ADST :
+ ptv = pth = (tx_dim == 4) ? &iadst_i4[0]
+ : ((tx_dim == 8) ? &iadst_i8[0]
+ : &iadst_i16[0]);
+ break;
+ case ADST_DCT :
+ ptv = (tx_dim == 4) ? &iadst_i4[0]
+ : ((tx_dim == 8) ? &iadst_i8[0] : &iadst_i16[0]);
+ pth = (tx_dim == 4) ? &idct_i4[0]
+ : ((tx_dim == 8) ? &idct_i8[0] : &idct_i16[0]);
+ break;
+ case DCT_ADST :
+ ptv = (tx_dim == 4) ? &idct_i4[0]
+ : ((tx_dim == 8) ? &idct_i8[0] : &idct_i16[0]);
+ pth = (tx_dim == 4) ? &iadst_i4[0]
+ : ((tx_dim == 8) ? &iadst_i8[0] : &iadst_i16[0]);
+ break;
+ case DCT_DCT :
+ ptv = pth = (tx_dim == 4) ? &idct_i4[0]
+ : ((tx_dim == 8) ? &idct_i8[0]
+ : &idct_i16[0]);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ nz_dim = tx_dim;
+ if(tx_dim > 4) {
+ if(eobs < 36) {
+ vpx_memset(im, 0, 512);
+ nz_dim = 8;
+ if(eobs < 3) {
+ nz_dim = 2;
+ } else if(eobs < 10) {
+ nz_dim = 4;
+ }
+ }
+ }
+
+ /* vertical transformation */
+ for (j = 0; j < tx_dim; j++) {
+ for (i = 0; i < nz_dim; i++) {
+ int temp = 0;
+
+ for (k = 0; k < nz_dim; k++) {
+ temp += ptv[k] * ip[(k * tx_dim)];
+ }
+
+ im[i] = (int16_t)((temp + VERTICAL_ROUNDING) >> VERTICAL_SHIFT);
+ ip++;
+ }
+ im += tx_dim; // 16
+ ptv += tx_dim;
+ ip = input;
+ }
+
+ /* horizontal transformation */
+ im = &imbuf[0];
+
+ for (j = 0; j < tx_dim; j++) {
+ const int16_t *pthc = pth;
+
+ for (i = 0; i < tx_dim; i++) {
+ int temp = 0;
+
+ for (k = 0; k < nz_dim; k++) {
+ temp += im[k] * pthc[k];
+ }
+
+ op[i] = (int16_t)((temp + HORIZONTAL_ROUNDING) >> HORIZONTAL_SHIFT);
+ pthc += tx_dim;
+ }
+
+ im += tx_dim; // 16
+ op += shortpitch;
+ }
+}
+
+void vp9_short_idct4x4llm_c(short *input, short *output, int pitch) {
+ int i;
+ int a1, b1, c1, d1;
+
+ short *ip = input;
+ short *op = output;
+ int temp1, temp2;
+ int shortpitch = pitch >> 1;
+
+ for (i = 0; i < 4; i++) {
+ a1 = ip[0] + ip[8];
+ b1 = ip[0] - ip[8];
+
+ temp1 = (ip[4] * sinpi8sqrt2 + rounding) >> 16;
+ temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1 + rounding) >> 16);
+ c1 = temp1 - temp2;
+
+ temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1 + rounding) >> 16);
+ temp2 = (ip[12] * sinpi8sqrt2 + rounding) >> 16;
+ d1 = temp1 + temp2;
+
+ op[shortpitch * 0] = a1 + d1;
+ op[shortpitch * 3] = a1 - d1;
+
+ op[shortpitch * 1] = b1 + c1;
+ op[shortpitch * 2] = b1 - c1;
+
+ ip++;
+ op++;
+ }
+
+ ip = output;
+ op = output;
+
+ for (i = 0; i < 4; i++) {
+ a1 = ip[0] + ip[2];
+ b1 = ip[0] - ip[2];
+
+ temp1 = (ip[1] * sinpi8sqrt2 + rounding) >> 16;
+ temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1 + rounding) >> 16);
+ c1 = temp1 - temp2;
+
+ temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1 + rounding) >> 16);
+ temp2 = (ip[3] * sinpi8sqrt2 + rounding) >> 16;
+ d1 = temp1 + temp2;
+
+ op[0] = (a1 + d1 + 16) >> 5;
+ op[3] = (a1 - d1 + 16) >> 5;
+
+ op[1] = (b1 + c1 + 16) >> 5;
+ op[2] = (b1 - c1 + 16) >> 5;
+
+ ip += shortpitch;
+ op += shortpitch;
+ }
+}
+
+void vp9_short_idct4x4llm_1_c(short *input, short *output, int pitch) {
+ int i;
+ int a1;
+ short *op = output;
+ int shortpitch = pitch >> 1;
+ a1 = ((input[0] + 16) >> 5);
+ for (i = 0; i < 4; i++) {
+ op[0] = a1;
+ op[1] = a1;
+ op[2] = a1;
+ op[3] = a1;
+ op += shortpitch;
+ }
+}
+
+void vp9_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
+ unsigned char *dst_ptr, int pitch, int stride) {
+ int a1 = ((input_dc + 16) >> 5);
+ int r, c;
+
+ for (r = 0; r < 4; r++) {
+ for (c = 0; c < 4; c++) {
+ int a = a1 + pred_ptr[c];
+
+ if (a < 0)
+ a = 0;
+
+ if (a > 255)
+ a = 255;
+
+ dst_ptr[c] = (unsigned char) a;
+ }
+
+ dst_ptr += stride;
+ pred_ptr += pitch;
+ }
+}
+
+void vp9_short_inv_walsh4x4_c(short *input, short *output) {
+ int i;
+ int a1, b1, c1, d1;
+ short *ip = input;
+ short *op = output;
+
+ for (i = 0; i < 4; i++) {
+ a1 = ((ip[0] + ip[3]));
+ b1 = ((ip[1] + ip[2]));
+ c1 = ((ip[1] - ip[2]));
+ d1 = ((ip[0] - ip[3]));
+
+ op[0] = (a1 + b1 + 1) >> 1;
+ op[1] = (c1 + d1) >> 1;
+ op[2] = (a1 - b1) >> 1;
+ op[3] = (d1 - c1) >> 1;
+
+ ip += 4;
+ op += 4;
+ }
+
+ ip = output;
+ op = output;
+ for (i = 0; i < 4; i++) {
+ a1 = ip[0] + ip[12];
+ b1 = ip[4] + ip[8];
+ c1 = ip[4] - ip[8];
+ d1 = ip[0] - ip[12];
+ op[0] = (a1 + b1 + 1) >> 1;
+ op[4] = (c1 + d1) >> 1;
+ op[8] = (a1 - b1) >> 1;
+ op[12] = (d1 - c1) >> 1;
+ ip++;
+ op++;
+ }
+}
+
+void vp9_short_inv_walsh4x4_1_c(short *in, short *out) {
+ int i;
+ short tmp[4];
+ short *ip = in;
+ short *op = tmp;
+
+ op[0] = (ip[0] + 1) >> 1;
+ op[1] = op[2] = op[3] = (ip[0] >> 1);
+
+ ip = tmp;
+ op = out;
+ for (i = 0; i < 4; i++) {
+ op[0] = (ip[0] + 1) >> 1;
+ op[4] = op[8] = op[12] = (ip[0] >> 1);
+ ip++;
+ op++;
+ }
+}
+
+#if CONFIG_LOSSLESS
+void vp9_short_inv_walsh4x4_lossless_c(short *input, short *output) {
+ int i;
+ int a1, b1, c1, d1;
+ short *ip = input;
+ short *op = output;
+
+ for (i = 0; i < 4; i++) {
+ a1 = ((ip[0] + ip[3])) >> Y2_WHT_UPSCALE_FACTOR;
+ b1 = ((ip[1] + ip[2])) >> Y2_WHT_UPSCALE_FACTOR;
+ c1 = ((ip[1] - ip[2])) >> Y2_WHT_UPSCALE_FACTOR;
+ d1 = ((ip[0] - ip[3])) >> Y2_WHT_UPSCALE_FACTOR;
+
+ op[0] = (a1 + b1 + 1) >> 1;
+ op[1] = (c1 + d1) >> 1;
+ op[2] = (a1 - b1) >> 1;
+ op[3] = (d1 - c1) >> 1;
+
+ ip += 4;
+ op += 4;
+ }
+
+ ip = output;
+ op = output;
+ for (i = 0; i < 4; i++) {
+ a1 = ip[0] + ip[12];
+ b1 = ip[4] + ip[8];
+ c1 = ip[4] - ip[8];
+ d1 = ip[0] - ip[12];
+
+
+ op[0] = ((a1 + b1 + 1) >> 1) << Y2_WHT_UPSCALE_FACTOR;
+ op[4] = ((c1 + d1) >> 1) << Y2_WHT_UPSCALE_FACTOR;
+ op[8] = ((a1 - b1) >> 1) << Y2_WHT_UPSCALE_FACTOR;
+ op[12] = ((d1 - c1) >> 1) << Y2_WHT_UPSCALE_FACTOR;
+
+ ip++;
+ op++;
+ }
+}
+
+void vp9_short_inv_walsh4x4_1_lossless_c(short *in, short *out) {
+ int i;
+ short tmp[4];
+ short *ip = in;
+ short *op = tmp;
+
+ op[0] = ((ip[0] >> Y2_WHT_UPSCALE_FACTOR) + 1) >> 1;
+ op[1] = op[2] = op[3] = ((ip[0] >> Y2_WHT_UPSCALE_FACTOR) >> 1);
+
+ ip = tmp;
+ op = out;
+ for (i = 0; i < 4; i++) {
+ op[0] = ((ip[0] + 1) >> 1) << Y2_WHT_UPSCALE_FACTOR;
+ op[4] = op[8] = op[12] = ((ip[0] >> 1)) << Y2_WHT_UPSCALE_FACTOR;
+ ip++;
+ op++;
+ }
+}
+
+void vp9_short_inv_walsh4x4_x8_c(short *input, short *output, int pitch) {
+ int i;
+ int a1, b1, c1, d1;
+ short *ip = input;
+ short *op = output;
+ int shortpitch = pitch >> 1;
+
+ for (i = 0; i < 4; i++) {
+ a1 = ((ip[0] + ip[3])) >> WHT_UPSCALE_FACTOR;
+ b1 = ((ip[1] + ip[2])) >> WHT_UPSCALE_FACTOR;
+ c1 = ((ip[1] - ip[2])) >> WHT_UPSCALE_FACTOR;
+ d1 = ((ip[0] - ip[3])) >> WHT_UPSCALE_FACTOR;
+
+ op[0] = (a1 + b1 + 1) >> 1;
+ op[1] = (c1 + d1) >> 1;
+ op[2] = (a1 - b1) >> 1;
+ op[3] = (d1 - c1) >> 1;
+
+ ip += 4;
+ op += shortpitch;
+ }
+
+ ip = output;
+ op = output;
+ for (i = 0; i < 4; i++) {
+ a1 = ip[shortpitch * 0] + ip[shortpitch * 3];
+ b1 = ip[shortpitch * 1] + ip[shortpitch * 2];
+ c1 = ip[shortpitch * 1] - ip[shortpitch * 2];
+ d1 = ip[shortpitch * 0] - ip[shortpitch * 3];
+
+
+ op[shortpitch * 0] = (a1 + b1 + 1) >> 1;
+ op[shortpitch * 1] = (c1 + d1) >> 1;
+ op[shortpitch * 2] = (a1 - b1) >> 1;
+ op[shortpitch * 3] = (d1 - c1) >> 1;
+
+ ip++;
+ op++;
+ }
+}
+
+void vp9_short_inv_walsh4x4_1_x8_c(short *in, short *out, int pitch) {
+ int i;
+ short tmp[4];
+ short *ip = in;
+ short *op = tmp;
+ int shortpitch = pitch >> 1;
+
+ op[0] = ((ip[0] >> WHT_UPSCALE_FACTOR) + 1) >> 1;
+ op[1] = op[2] = op[3] = ((ip[0] >> WHT_UPSCALE_FACTOR) >> 1);
+
+
+ ip = tmp;
+ op = out;
+ for (i = 0; i < 4; i++) {
+ op[shortpitch * 0] = (ip[0] + 1) >> 1;
+ op[shortpitch * 1] = op[shortpitch * 2] = op[shortpitch * 3] = ip[0] >> 1;
+ ip++;
+ op++;
+ }
+}
+
+void vp9_dc_only_inv_walsh_add_c(short input_dc, unsigned char *pred_ptr,
+ unsigned char *dst_ptr,
+ int pitch, int stride) {
+ int r, c;
+ short tmp[16];
+ vp9_short_inv_walsh4x4_1_x8_c(&input_dc, tmp, 4 << 1);
+
+ for (r = 0; r < 4; r++) {
+ for (c = 0; c < 4; c++) {
+ int a = tmp[r * 4 + c] + pred_ptr[c];
+ if (a < 0)
+ a = 0;
+
+ if (a > 255)
+ a = 255;
+
+ dst_ptr[c] = (unsigned char) a;
+ }
+
+ dst_ptr += stride;
+ pred_ptr += pitch;
+ }
+}
+#endif
+
+void vp9_dc_only_idct_add_8x8_c(short input_dc,
+ unsigned char *pred_ptr,
+ unsigned char *dst_ptr,
+ int pitch, int stride) {
+ int a1 = ((input_dc + 16) >> 5);
+ int r, c, b;
+ unsigned char *orig_pred = pred_ptr;
+ unsigned char *orig_dst = dst_ptr;
+ for (b = 0; b < 4; b++) {
+ for (r = 0; r < 4; r++) {
+ for (c = 0; c < 4; c++) {
+ int a = a1 + pred_ptr[c];
+
+ if (a < 0)
+ a = 0;
+
+ if (a > 255)
+ a = 255;
+
+ dst_ptr[c] = (unsigned char) a;
+ }
+
+ dst_ptr += stride;
+ pred_ptr += pitch;
+ }
+ dst_ptr = orig_dst + (b + 1) % 2 * 4 + (b + 1) / 2 * 4 * stride;
+ pred_ptr = orig_pred + (b + 1) % 2 * 4 + (b + 1) / 2 * 4 * pitch;
+ }
+}
+
+#define W1 2841 /* 2048*sqrt(2)*cos(1*pi/16) */
+#define W2 2676 /* 2048*sqrt(2)*cos(2*pi/16) */
+#define W3 2408 /* 2048*sqrt(2)*cos(3*pi/16) */
+#define W5 1609 /* 2048*sqrt(2)*cos(5*pi/16) */
+#define W6 1108 /* 2048*sqrt(2)*cos(6*pi/16) */
+#define W7 565 /* 2048*sqrt(2)*cos(7*pi/16) */
+
+/* row (horizontal) IDCT
+ *
+ * 7 pi 1 dst[k] = sum c[l] * src[l] * cos( -- *
+ * ( k + - ) * l ) l=0 8 2
+ *
+ * where: c[0] = 128 c[1..7] = 128*sqrt(2) */
+
+static void idctrow(int *blk) {
+ int x0, x1, x2, x3, x4, x5, x6, x7, x8;
+ /* shortcut */
+ if (!((x1 = blk[4] << 11) | (x2 = blk[6]) | (x3 = blk[2]) |
+ (x4 = blk[1]) | (x5 = blk[7]) | (x6 = blk[5]) | (x7 = blk[3]))) {
+ blk[0] = blk[1] = blk[2] = blk[3] = blk[4]
+ = blk[5] = blk[6] = blk[7] = blk[0] << 3;
+ return;
+ }
+
+ x0 = (blk[0] << 11) + 128; /* for proper rounding in the fourth stage */
+ /* first stage */
+ x8 = W7 * (x4 + x5);
+ x4 = x8 + (W1 - W7) * x4;
+ x5 = x8 - (W1 + W7) * x5;
+ x8 = W3 * (x6 + x7);
+ x6 = x8 - (W3 - W5) * x6;
+ x7 = x8 - (W3 + W5) * x7;
+
+ /* second stage */
+ x8 = x0 + x1;
+ x0 -= x1;
+ x1 = W6 * (x3 + x2);
+ x2 = x1 - (W2 + W6) * x2;
+ x3 = x1 + (W2 - W6) * x3;
+ x1 = x4 + x6;
+ x4 -= x6;
+ x6 = x5 + x7;
+ x5 -= x7;
+
+ /* third stage */
+ x7 = x8 + x3;
+ x8 -= x3;
+ x3 = x0 + x2;
+ x0 -= x2;
+ x2 = (181 * (x4 + x5) + 128) >> 8;
+ x4 = (181 * (x4 - x5) + 128) >> 8;
+
+ /* fourth stage */
+ blk[0] = (x7 + x1) >> 8;
+ blk[1] = (x3 + x2) >> 8;
+ blk[2] = (x0 + x4) >> 8;
+ blk[3] = (x8 + x6) >> 8;
+ blk[4] = (x8 - x6) >> 8;
+ blk[5] = (x0 - x4) >> 8;
+ blk[6] = (x3 - x2) >> 8;
+ blk[7] = (x7 - x1) >> 8;
+}
+
+/* column (vertical) IDCT
+ *
+ * 7 pi 1 dst[8*k] = sum c[l] * src[8*l] *
+ * cos( -- * ( k + - ) * l ) l=0 8 2
+ *
+ * where: c[0] = 1/1024 c[1..7] = (1/1024)*sqrt(2) */
+static void idctcol(int *blk) {
+ int x0, x1, x2, x3, x4, x5, x6, x7, x8;
+
+ /* shortcut */
+ if (!((x1 = (blk[8 * 4] << 8)) | (x2 = blk[8 * 6]) | (x3 = blk[8 * 2]) |
+ (x4 = blk[8 * 1]) | (x5 = blk[8 * 7]) | (x6 = blk[8 * 5]) |
+ (x7 = blk[8 * 3]))) {
+ blk[8 * 0] = blk[8 * 1] = blk[8 * 2] = blk[8 * 3]
+ = blk[8 * 4] = blk[8 * 5] = blk[8 * 6]
+ = blk[8 * 7] = ((blk[8 * 0] + 32) >> 6);
+ return;
+ }
+
+ x0 = (blk[8 * 0] << 8) + 16384;
+
+ /* first stage */
+ x8 = W7 * (x4 + x5) + 4;
+ x4 = (x8 + (W1 - W7) * x4) >> 3;
+ x5 = (x8 - (W1 + W7) * x5) >> 3;
+ x8 = W3 * (x6 + x7) + 4;
+ x6 = (x8 - (W3 - W5) * x6) >> 3;
+ x7 = (x8 - (W3 + W5) * x7) >> 3;
+
+ /* second stage */
+ x8 = x0 + x1;
+ x0 -= x1;
+ x1 = W6 * (x3 + x2) + 4;
+ x2 = (x1 - (W2 + W6) * x2) >> 3;
+ x3 = (x1 + (W2 - W6) * x3) >> 3;
+ x1 = x4 + x6;
+ x4 -= x6;
+ x6 = x5 + x7;
+ x5 -= x7;
+
+ /* third stage */
+ x7 = x8 + x3;
+ x8 -= x3;
+ x3 = x0 + x2;
+ x0 -= x2;
+ x2 = (181 * (x4 + x5) + 128) >> 8;
+ x4 = (181 * (x4 - x5) + 128) >> 8;
+
+ /* fourth stage */
+ blk[8 * 0] = (x7 + x1) >> 14;
+ blk[8 * 1] = (x3 + x2) >> 14;
+ blk[8 * 2] = (x0 + x4) >> 14;
+ blk[8 * 3] = (x8 + x6) >> 14;
+ blk[8 * 4] = (x8 - x6) >> 14;
+ blk[8 * 5] = (x0 - x4) >> 14;
+ blk[8 * 6] = (x3 - x2) >> 14;
+ blk[8 * 7] = (x7 - x1) >> 14;
+}
+
+#define TX_DIM 8
+void vp9_short_idct8x8_c(short *coefs, short *block, int pitch) {
+ int X[TX_DIM * TX_DIM];
+ int i, j;
+ int shortpitch = pitch >> 1;
+
+ for (i = 0; i < TX_DIM; i++) {
+ for (j = 0; j < TX_DIM; j++) {
+ X[i * TX_DIM + j] = (int)(coefs[i * TX_DIM + j] + 1
+ + (coefs[i * TX_DIM + j] < 0)) >> 2;
+ }
+ }
+ for (i = 0; i < 8; i++)
+ idctrow(X + 8 * i);
+
+ for (i = 0; i < 8; i++)
+ idctcol(X + i);
+
+ for (i = 0; i < TX_DIM; i++) {
+ for (j = 0; j < TX_DIM; j++) {
+ block[i * shortpitch + j] = X[i * TX_DIM + j] >> 1;
+ }
+ }
+}
+
+/* Row IDCT when only first 4 coefficients are non-zero. */
+static void idctrow10(int *blk) {
+ int x0, x1, x2, x3, x4, x5, x6, x7, x8;
+
+ /* shortcut */
+ if (!((x1 = blk[4] << 11) | (x2 = blk[6]) | (x3 = blk[2]) |
+ (x4 = blk[1]) | (x5 = blk[7]) | (x6 = blk[5]) | (x7 = blk[3]))) {
+ blk[0] = blk[1] = blk[2] = blk[3] = blk[4]
+ = blk[5] = blk[6] = blk[7] = blk[0] << 3;
+ return;
+ }
+
+ x0 = (blk[0] << 11) + 128; /* for proper rounding in the fourth stage */
+ /* first stage */
+ x5 = W7 * x4;
+ x4 = W1 * x4;
+ x6 = W3 * x7;
+ x7 = -W5 * x7;
+
+ /* second stage */
+ x2 = W6 * x3;
+ x3 = W2 * x3;
+ x1 = x4 + x6;
+ x4 -= x6;
+ x6 = x5 + x7;
+ x5 -= x7;
+
+ /* third stage */
+ x7 = x0 + x3;
+ x8 = x0 - x3;
+ x3 = x0 + x2;
+ x0 -= x2;
+ x2 = (181 * (x4 + x5) + 128) >> 8;
+ x4 = (181 * (x4 - x5) + 128) >> 8;
+
+ /* fourth stage */
+ blk[0] = (x7 + x1) >> 8;
+ blk[1] = (x3 + x2) >> 8;
+ blk[2] = (x0 + x4) >> 8;
+ blk[3] = (x8 + x6) >> 8;
+ blk[4] = (x8 - x6) >> 8;
+ blk[5] = (x0 - x4) >> 8;
+ blk[6] = (x3 - x2) >> 8;
+ blk[7] = (x7 - x1) >> 8;
+}
+
+/* Column (vertical) IDCT when only first 4 coefficients are non-zero. */
+static void idctcol10(int *blk) {
+ int x0, x1, x2, x3, x4, x5, x6, x7, x8;
+
+ /* shortcut */
+ if (!((x1 = (blk[8 * 4] << 8)) | (x2 = blk[8 * 6]) | (x3 = blk[8 * 2]) |
+ (x4 = blk[8 * 1]) | (x5 = blk[8 * 7]) | (x6 = blk[8 * 5]) |
+ (x7 = blk[8 * 3]))) {
+ blk[8 * 0] = blk[8 * 1] = blk[8 * 2] = blk[8 * 3]
+ = blk[8 * 4] = blk[8 * 5] = blk[8 * 6]
+ = blk[8 * 7] = ((blk[8 * 0] + 32) >> 6);
+ return;
+ }
+
+ x0 = (blk[8 * 0] << 8) + 16384;
+
+ /* first stage */
+ x5 = (W7 * x4 + 4) >> 3;
+ x4 = (W1 * x4 + 4) >> 3;
+ x6 = (W3 * x7 + 4) >> 3;
+ x7 = (-W5 * x7 + 4) >> 3;
+
+ /* second stage */
+ x2 = (W6 * x3 + 4) >> 3;
+ x3 = (W2 * x3 + 4) >> 3;
+ x1 = x4 + x6;
+ x4 -= x6;
+ x6 = x5 + x7;
+ x5 -= x7;
+
+ /* third stage */
+ x7 = x0 + x3;
+ x8 = x0 - x3;
+ x3 = x0 + x2;
+ x0 -= x2;
+ x2 = (181 * (x4 + x5) + 128) >> 8;
+ x4 = (181 * (x4 - x5) + 128) >> 8;
+
+ /* fourth stage */
+ blk[8 * 0] = (x7 + x1) >> 14;
+ blk[8 * 1] = (x3 + x2) >> 14;
+ blk[8 * 2] = (x0 + x4) >> 14;
+ blk[8 * 3] = (x8 + x6) >> 14;
+ blk[8 * 4] = (x8 - x6) >> 14;
+ blk[8 * 5] = (x0 - x4) >> 14;
+ blk[8 * 6] = (x3 - x2) >> 14;
+ blk[8 * 7] = (x7 - x1) >> 14;
+}
+
+void vp9_short_idct10_8x8_c(short *coefs, short *block, int pitch) {
+ int X[TX_DIM * TX_DIM];
+ int i, j;
+ int shortpitch = pitch >> 1;
+
+ for (i = 0; i < TX_DIM; i++) {
+ for (j = 0; j < TX_DIM; j++) {
+ X[i * TX_DIM + j] = (int)(coefs[i * TX_DIM + j] + 1
+ + (coefs[i * TX_DIM + j] < 0)) >> 2;
+ }
+ }
+
+ /* Do first 4 row idct only since non-zero dct coefficients are all in
+ * upper-left 4x4 area. */
+ for (i = 0; i < 4; i++)
+ idctrow10(X + 8 * i);
+
+ for (i = 0; i < 8; i++)
+ idctcol10(X + i);
+
+ for (i = 0; i < TX_DIM; i++) {
+ for (j = 0; j < TX_DIM; j++) {
+ block[i * shortpitch + j] = X[i * TX_DIM + j] >> 1;
+ }
+ }
+}
+
+void vp9_short_ihaar2x2_c(short *input, short *output, int pitch) {
+ int i;
+ short *ip = input; // 0,1, 4, 8
+ short *op = output;
+ for (i = 0; i < 16; i++) {
+ op[i] = 0;
+ }
+
+ op[0] = (ip[0] + ip[1] + ip[4] + ip[8] + 1) >> 1;
+ op[1] = (ip[0] - ip[1] + ip[4] - ip[8]) >> 1;
+ op[4] = (ip[0] + ip[1] - ip[4] - ip[8]) >> 1;
+ op[8] = (ip[0] - ip[1] - ip[4] + ip[8]) >> 1;
+}
+
+
+#if 0
+// Keep a really bad float version as reference for now.
+void vp9_short_idct16x16_c(short *input, short *output, int pitch) {
+
+ vp9_clear_system_state(); // Make it simd safe : __asm emms;
+ {
+ double x;
+ const int short_pitch = pitch >> 1;
+ int i, j, k, l;
+ for (l = 0; l < 16; ++l) {
+ for (k = 0; k < 16; ++k) {
+ double s = 0;
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j) {
+ x=cos(PI*j*(l+0.5)/16.0)*cos(PI*i*(k+0.5)/16.0)*input[i*16+j]/32;
+ if (i != 0)
+ x *= sqrt(2.0);
+ if (j != 0)
+ x *= sqrt(2.0);
+ s += x;
+ }
+ }
+ output[k*short_pitch+l] = (short)round(s);
+ }
+ }
+ }
+ vp9_clear_system_state(); // Make it simd safe : __asm emms;
+}
+#endif
+
+#define TEST_INT_16x16_IDCT 1
+#if !TEST_INT_16x16_IDCT
+static const double C1 = 0.995184726672197;
+static const double C2 = 0.98078528040323;
+static const double C3 = 0.956940335732209;
+static const double C4 = 0.923879532511287;
+static const double C5 = 0.881921264348355;
+static const double C6 = 0.831469612302545;
+static const double C7 = 0.773010453362737;
+static const double C8 = 0.707106781186548;
+static const double C9 = 0.634393284163646;
+static const double C10 = 0.555570233019602;
+static const double C11 = 0.471396736825998;
+static const double C12 = 0.38268343236509;
+static const double C13 = 0.290284677254462;
+static const double C14 = 0.195090322016128;
+static const double C15 = 0.098017140329561;
+
+
+static void butterfly_16x16_idct_1d(double input[16], double output[16]) {
+
+ vp9_clear_system_state(); // Make it simd safe : __asm emms;
+ {
+ double step[16];
+ double intermediate[16];
+ double temp1, temp2;
+
+
+ // step 1 and 2
+ step[ 0] = input[0] + input[8];
+ step[ 1] = input[0] - input[8];
+
+ temp1 = input[4]*C12;
+ temp2 = input[12]*C4;
+
+ temp1 -= temp2;
+ temp1 *= C8;
+
+ step[ 2] = 2*(temp1);
+
+ temp1 = input[4]*C4;
+ temp2 = input[12]*C12;
+ temp1 += temp2;
+ temp1 = (temp1);
+ temp1 *= C8;
+ step[ 3] = 2*(temp1);
+
+ temp1 = input[2]*C8;
+ temp1 = 2*(temp1);
+ temp2 = input[6] + input[10];
+
+ step[ 4] = temp1 + temp2;
+ step[ 5] = temp1 - temp2;
+
+ temp1 = input[14]*C8;
+ temp1 = 2*(temp1);
+ temp2 = input[6] - input[10];
+
+ step[ 6] = temp2 - temp1;
+ step[ 7] = temp2 + temp1;
+
+ // for odd input
+ temp1 = input[3]*C12;
+ temp2 = input[13]*C4;
+ temp1 += temp2;
+ temp1 = (temp1);
+ temp1 *= C8;
+ intermediate[ 8] = 2*(temp1);
+
+ temp1 = input[3]*C4;
+ temp2 = input[13]*C12;
+ temp2 -= temp1;
+ temp2 = (temp2);
+ temp2 *= C8;
+ intermediate[ 9] = 2*(temp2);
+
+ intermediate[10] = 2*(input[9]*C8);
+ intermediate[11] = input[15] - input[1];
+ intermediate[12] = input[15] + input[1];
+ intermediate[13] = 2*((input[7]*C8));
+
+ temp1 = input[11]*C12;
+ temp2 = input[5]*C4;
+ temp2 -= temp1;
+ temp2 = (temp2);
+ temp2 *= C8;
+ intermediate[14] = 2*(temp2);
+
+ temp1 = input[11]*C4;
+ temp2 = input[5]*C12;
+ temp1 += temp2;
+ temp1 = (temp1);
+ temp1 *= C8;
+ intermediate[15] = 2*(temp1);
+
+ step[ 8] = intermediate[ 8] + intermediate[14];
+ step[ 9] = intermediate[ 9] + intermediate[15];
+ step[10] = intermediate[10] + intermediate[11];
+ step[11] = intermediate[10] - intermediate[11];
+ step[12] = intermediate[12] + intermediate[13];
+ step[13] = intermediate[12] - intermediate[13];
+ step[14] = intermediate[ 8] - intermediate[14];
+ step[15] = intermediate[ 9] - intermediate[15];
+
+ // step 3
+ output[0] = step[ 0] + step[ 3];
+ output[1] = step[ 1] + step[ 2];
+ output[2] = step[ 1] - step[ 2];
+ output[3] = step[ 0] - step[ 3];
+
+ temp1 = step[ 4]*C14;
+ temp2 = step[ 7]*C2;
+ temp1 -= temp2;
+ output[4] = (temp1);
+
+ temp1 = step[ 4]*C2;
+ temp2 = step[ 7]*C14;
+ temp1 += temp2;
+ output[7] = (temp1);
+
+ temp1 = step[ 5]*C10;
+ temp2 = step[ 6]*C6;
+ temp1 -= temp2;
+ output[5] = (temp1);
+
+ temp1 = step[ 5]*C6;
+ temp2 = step[ 6]*C10;
+ temp1 += temp2;
+ output[6] = (temp1);
+
+ output[8] = step[ 8] + step[11];
+ output[9] = step[ 9] + step[10];
+ output[10] = step[ 9] - step[10];
+ output[11] = step[ 8] - step[11];
+ output[12] = step[12] + step[15];
+ output[13] = step[13] + step[14];
+ output[14] = step[13] - step[14];
+ output[15] = step[12] - step[15];
+
+ // output 4
+ step[ 0] = output[0] + output[7];
+ step[ 1] = output[1] + output[6];
+ step[ 2] = output[2] + output[5];
+ step[ 3] = output[3] + output[4];
+ step[ 4] = output[3] - output[4];
+ step[ 5] = output[2] - output[5];
+ step[ 6] = output[1] - output[6];
+ step[ 7] = output[0] - output[7];
+
+ temp1 = output[8]*C7;
+ temp2 = output[15]*C9;
+ temp1 -= temp2;
+ step[ 8] = (temp1);
+
+ temp1 = output[9]*C11;
+ temp2 = output[14]*C5;
+ temp1 += temp2;
+ step[ 9] = (temp1);
+
+ temp1 = output[10]*C3;
+ temp2 = output[13]*C13;
+ temp1 -= temp2;
+ step[10] = (temp1);
+
+ temp1 = output[11]*C15;
+ temp2 = output[12]*C1;
+ temp1 += temp2;
+ step[11] = (temp1);
+
+ temp1 = output[11]*C1;
+ temp2 = output[12]*C15;
+ temp2 -= temp1;
+ step[12] = (temp2);
+
+ temp1 = output[10]*C13;
+ temp2 = output[13]*C3;
+ temp1 += temp2;
+ step[13] = (temp1);
+
+ temp1 = output[9]*C5;
+ temp2 = output[14]*C11;
+ temp2 -= temp1;
+ step[14] = (temp2);
+
+ temp1 = output[8]*C9;
+ temp2 = output[15]*C7;
+ temp1 += temp2;
+ step[15] = (temp1);
+
+ // step 5
+ output[0] = (step[0] + step[15]);
+ output[1] = (step[1] + step[14]);
+ output[2] = (step[2] + step[13]);
+ output[3] = (step[3] + step[12]);
+ output[4] = (step[4] + step[11]);
+ output[5] = (step[5] + step[10]);
+ output[6] = (step[6] + step[ 9]);
+ output[7] = (step[7] + step[ 8]);
+
+ output[15] = (step[0] - step[15]);
+ output[14] = (step[1] - step[14]);
+ output[13] = (step[2] - step[13]);
+ output[12] = (step[3] - step[12]);
+ output[11] = (step[4] - step[11]);
+ output[10] = (step[5] - step[10]);
+ output[9] = (step[6] - step[ 9]);
+ output[8] = (step[7] - step[ 8]);
+ }
+ vp9_clear_system_state(); // Make it simd safe : __asm emms;
+}
+
+// Remove once an int version of iDCT is written
+#if 0
+void reference_16x16_idct_1d(double input[16], double output[16]) {
+
+ vp9_clear_system_state(); // Make it simd safe : __asm emms;
+ {
+ const double kPi = 3.141592653589793238462643383279502884;
+ const double kSqrt2 = 1.414213562373095048801688724209698;
+ for (int k = 0; k < 16; k++) {
+ output[k] = 0.0;
+ for (int n = 0; n < 16; n++) {
+ output[k] += input[n]*cos(kPi*(2*k+1)*n/32.0);
+ if (n == 0)
+ output[k] = output[k]/kSqrt2;
+ }
+ }
+ }
+ vp9_clear_system_state(); // Make it simd safe : __asm emms;
+}
+#endif
+
+void vp9_short_idct16x16_c(short *input, short *output, int pitch) {
+
+ vp9_clear_system_state(); // Make it simd safe : __asm emms;
+ {
+ double out[16*16], out2[16*16];
+ const int short_pitch = pitch >> 1;
+ int i, j;
+ // First transform rows
+ for (i = 0; i < 16; ++i) {
+ double temp_in[16], temp_out[16];
+ for (j = 0; j < 16; ++j)
+ temp_in[j] = input[j + i*short_pitch];
+ butterfly_16x16_idct_1d(temp_in, temp_out);
+ for (j = 0; j < 16; ++j)
+ out[j + i*16] = temp_out[j];
+ }
+ // Then transform columns
+ for (i = 0; i < 16; ++i) {
+ double temp_in[16], temp_out[16];
+ for (j = 0; j < 16; ++j)
+ temp_in[j] = out[j*16 + i];
+ butterfly_16x16_idct_1d(temp_in, temp_out);
+ for (j = 0; j < 16; ++j)
+ out2[j*16 + i] = temp_out[j];
+ }
+ for (i = 0; i < 16*16; ++i)
+ output[i] = round(out2[i]/128);
+ }
+ vp9_clear_system_state(); // Make it simd safe : __asm emms;
+}
+
+#else
+static const int16_t C1 = 16305;
+static const int16_t C2 = 16069;
+static const int16_t C3 = 15679;
+static const int16_t C4 = 15137;
+static const int16_t C5 = 14449;
+static const int16_t C6 = 13623;
+static const int16_t C7 = 12665;
+static const int16_t C8 = 11585;
+static const int16_t C9 = 10394;
+static const int16_t C10 = 9102;
+static const int16_t C11 = 7723;
+static const int16_t C12 = 6270;
+static const int16_t C13 = 4756;
+static const int16_t C14 = 3196;
+static const int16_t C15 = 1606;
+
+#define INITIAL_SHIFT 2
+#define INITIAL_ROUNDING (1 << (INITIAL_SHIFT - 1))
+#define RIGHT_SHIFT 14
+#define RIGHT_ROUNDING (1 << (RIGHT_SHIFT - 1))
+
+static void butterfly_16x16_idct_1d(int16_t input[16], int16_t output[16],
+ int last_shift_bits) {
+ int16_t step[16];
+ int intermediate[16];
+ int temp1, temp2;
+
+ int step1_shift = RIGHT_SHIFT + INITIAL_SHIFT;
+ int step1_rounding = 1 << (step1_shift - 1);
+ int last_rounding = 0;
+
+ if (last_shift_bits > 0)
+ last_rounding = 1 << (last_shift_bits - 1);
+
+ // step 1 and 2
+ step[ 0] = (input[0] + input[8] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
+ step[ 1] = (input[0] - input[8] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
+
+ temp1 = input[4] * C12;
+ temp2 = input[12] * C4;
+ temp1 = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+ temp1 *= C8;
+ step[ 2] = (2 * (temp1) + step1_rounding) >> step1_shift;
+
+ temp1 = input[4] * C4;
+ temp2 = input[12] * C12;
+ temp1 = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+ temp1 *= C8;
+ step[ 3] = (2 * (temp1) + step1_rounding) >> step1_shift;
+
+ temp1 = input[2] * C8;
+ temp1 = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+ temp2 = input[6] + input[10];
+ step[ 4] = (temp1 + temp2 + INITIAL_ROUNDING) >> INITIAL_SHIFT;
+ step[ 5] = (temp1 - temp2 + INITIAL_ROUNDING) >> INITIAL_SHIFT;
+
+ temp1 = input[14] * C8;
+ temp1 = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+ temp2 = input[6] - input[10];
+ step[ 6] = (temp2 - temp1 + INITIAL_ROUNDING) >> INITIAL_SHIFT;
+ step[ 7] = (temp2 + temp1 + INITIAL_ROUNDING) >> INITIAL_SHIFT;
+
+ // for odd input
+ temp1 = input[3] * C12;
+ temp2 = input[13] * C4;
+ temp1 = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+ temp1 *= C8;
+ intermediate[ 8] = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = input[3] * C4;
+ temp2 = input[13] * C12;
+ temp2 = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+ temp2 *= C8;
+ intermediate[ 9] = (2 * (temp2) + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ intermediate[10] = (2 * (input[9] * C8) + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+ intermediate[11] = input[15] - input[1];
+ intermediate[12] = input[15] + input[1];
+ intermediate[13] = (2 * (input[7] * C8) + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = input[11] * C12;
+ temp2 = input[5] * C4;
+ temp2 = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+ temp2 *= C8;
+ intermediate[14] = (2 * (temp2) + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = input[11] * C4;
+ temp2 = input[5] * C12;
+ temp1 = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+ temp1 *= C8;
+ intermediate[15] = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ step[ 8] = (intermediate[ 8] + intermediate[14] + INITIAL_ROUNDING)
+ >> INITIAL_SHIFT;
+ step[ 9] = (intermediate[ 9] + intermediate[15] + INITIAL_ROUNDING)
+ >> INITIAL_SHIFT;
+ step[10] = (intermediate[10] + intermediate[11] + INITIAL_ROUNDING)
+ >> INITIAL_SHIFT;
+ step[11] = (intermediate[10] - intermediate[11] + INITIAL_ROUNDING)
+ >> INITIAL_SHIFT;
+ step[12] = (intermediate[12] + intermediate[13] + INITIAL_ROUNDING)
+ >> INITIAL_SHIFT;
+ step[13] = (intermediate[12] - intermediate[13] + INITIAL_ROUNDING)
+ >> INITIAL_SHIFT;
+ step[14] = (intermediate[ 8] - intermediate[14] + INITIAL_ROUNDING)
+ >> INITIAL_SHIFT;
+ step[15] = (intermediate[ 9] - intermediate[15] + INITIAL_ROUNDING)
+ >> INITIAL_SHIFT;
+
+ // step 3
+ output[0] = step[ 0] + step[ 3];
+ output[1] = step[ 1] + step[ 2];
+ output[2] = step[ 1] - step[ 2];
+ output[3] = step[ 0] - step[ 3];
+
+ temp1 = step[ 4] * C14;
+ temp2 = step[ 7] * C2;
+ output[4] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = step[ 4] * C2;
+ temp2 = step[ 7] * C14;
+ output[7] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = step[ 5] * C10;
+ temp2 = step[ 6] * C6;
+ output[5] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = step[ 5] * C6;
+ temp2 = step[ 6] * C10;
+ output[6] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ output[8] = step[ 8] + step[11];
+ output[9] = step[ 9] + step[10];
+ output[10] = step[ 9] - step[10];
+ output[11] = step[ 8] - step[11];
+ output[12] = step[12] + step[15];
+ output[13] = step[13] + step[14];
+ output[14] = step[13] - step[14];
+ output[15] = step[12] - step[15];
+
+ // output 4
+ step[ 0] = output[0] + output[7];
+ step[ 1] = output[1] + output[6];
+ step[ 2] = output[2] + output[5];
+ step[ 3] = output[3] + output[4];
+ step[ 4] = output[3] - output[4];
+ step[ 5] = output[2] - output[5];
+ step[ 6] = output[1] - output[6];
+ step[ 7] = output[0] - output[7];
+
+ temp1 = output[8] * C7;
+ temp2 = output[15] * C9;
+ step[ 8] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = output[9] * C11;
+ temp2 = output[14] * C5;
+ step[ 9] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = output[10] * C3;
+ temp2 = output[13] * C13;
+ step[10] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = output[11] * C15;
+ temp2 = output[12] * C1;
+ step[11] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = output[11] * C1;
+ temp2 = output[12] * C15;
+ step[12] = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = output[10] * C13;
+ temp2 = output[13] * C3;
+ step[13] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = output[9] * C5;
+ temp2 = output[14] * C11;
+ step[14] = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = output[8] * C9;
+ temp2 = output[15] * C7;
+ step[15] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ // step 5
+ output[0] = (step[0] + step[15] + last_rounding) >> last_shift_bits;
+ output[1] = (step[1] + step[14] + last_rounding) >> last_shift_bits;
+ output[2] = (step[2] + step[13] + last_rounding) >> last_shift_bits;
+ output[3] = (step[3] + step[12] + last_rounding) >> last_shift_bits;
+ output[4] = (step[4] + step[11] + last_rounding) >> last_shift_bits;
+ output[5] = (step[5] + step[10] + last_rounding) >> last_shift_bits;
+ output[6] = (step[6] + step[ 9] + last_rounding) >> last_shift_bits;
+ output[7] = (step[7] + step[ 8] + last_rounding) >> last_shift_bits;
+
+ output[15] = (step[0] - step[15] + last_rounding) >> last_shift_bits;
+ output[14] = (step[1] - step[14] + last_rounding) >> last_shift_bits;
+ output[13] = (step[2] - step[13] + last_rounding) >> last_shift_bits;
+ output[12] = (step[3] - step[12] + last_rounding) >> last_shift_bits;
+ output[11] = (step[4] - step[11] + last_rounding) >> last_shift_bits;
+ output[10] = (step[5] - step[10] + last_rounding) >> last_shift_bits;
+ output[9] = (step[6] - step[ 9] + last_rounding) >> last_shift_bits;
+ output[8] = (step[7] - step[ 8] + last_rounding) >> last_shift_bits;
+}
+
+void vp9_short_idct16x16_c(int16_t *input, int16_t *output, int pitch) {
+ int16_t out[16 * 16];
+ int16_t *outptr = &out[0];
+ const int short_pitch = pitch >> 1;
+ int i, j;
+ int16_t temp_in[16], temp_out[16];
+
+ // First transform rows
+ for (i = 0; i < 16; ++i) {
+ butterfly_16x16_idct_1d(input, outptr, 0);
+ input += short_pitch;
+ outptr += 16;
+ }
+
+ // Then transform columns
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j)
+ temp_in[j] = out[j * 16 + i];
+ butterfly_16x16_idct_1d(temp_in, temp_out, 3);
+ for (j = 0; j < 16; ++j)
+ output[j * 16 + i] = temp_out[j];
+ }
+}
+
+/* The following function is called when we know the maximum number of non-zero
+ * dct coefficients is less or equal 10.
+ */
+static void butterfly_16x16_idct10_1d(int16_t input[16], int16_t output[16],
+ int last_shift_bits) {
+ int16_t step[16] = {0};
+ int intermediate[16] = {0};
+ int temp1, temp2;
+ int last_rounding = 0;
+
+ if (last_shift_bits > 0)
+ last_rounding = 1 << (last_shift_bits - 1);
+
+ // step 1 and 2
+ step[ 0] = (input[0] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
+ step[ 1] = (input[0] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
+
+ temp1 = (2 * (input[2] * C8) + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+ step[ 4] = (temp1 + INITIAL_ROUNDING) >> INITIAL_SHIFT;
+ step[ 5] = (temp1 + INITIAL_ROUNDING) >> INITIAL_SHIFT;
+
+ // for odd input
+ temp1 = (input[3] * C12 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+ temp1 *= C8;
+ intermediate[ 8] = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = (-input[3] * C4 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+ temp1 *= C8;
+ intermediate[ 9] = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ step[ 8] = (intermediate[ 8] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
+ step[ 9] = (intermediate[ 9] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
+ step[10] = (-input[1] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
+ step[11] = (input[1] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
+ step[12] = (input[1] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
+ step[13] = (input[1] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
+ step[14] = (intermediate[ 8] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
+ step[15] = (intermediate[ 9] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
+
+ // step 3
+ output[0] = step[ 0];
+ output[1] = step[ 1];
+ output[2] = step[ 1];
+ output[3] = step[ 0];
+
+ temp1 = step[ 4] * C14;
+ output[4] = (temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = step[ 4] * C2;
+ output[7] = (temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = step[ 5] * C10;
+ output[5] = (temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = step[ 5] * C6;
+ output[6] = (temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ output[8] = step[ 8] + step[11];
+ output[9] = step[ 9] + step[10];
+ output[10] = step[ 9] - step[10];
+ output[11] = step[ 8] - step[11];
+ output[12] = step[12] + step[15];
+ output[13] = step[13] + step[14];
+ output[14] = step[13] - step[14];
+ output[15] = step[12] - step[15];
+
+ // output 4
+ step[ 0] = output[0] + output[7];
+ step[ 1] = output[1] + output[6];
+ step[ 2] = output[2] + output[5];
+ step[ 3] = output[3] + output[4];
+ step[ 4] = output[3] - output[4];
+ step[ 5] = output[2] - output[5];
+ step[ 6] = output[1] - output[6];
+ step[ 7] = output[0] - output[7];
+
+ temp1 = output[8] * C7;
+ temp2 = output[15] * C9;
+ step[ 8] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = output[9] * C11;
+ temp2 = output[14] * C5;
+ step[ 9] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = output[10] * C3;
+ temp2 = output[13] * C13;
+ step[10] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = output[11] * C15;
+ temp2 = output[12] * C1;
+ step[11] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = output[11] * C1;
+ temp2 = output[12] * C15;
+ step[12] = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = output[10] * C13;
+ temp2 = output[13] * C3;
+ step[13] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = output[9] * C5;
+ temp2 = output[14] * C11;
+ step[14] = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = output[8] * C9;
+ temp2 = output[15] * C7;
+ step[15] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
+
+ // step 5
+ output[0] = (step[0] + step[15] + last_rounding) >> last_shift_bits;
+ output[1] = (step[1] + step[14] + last_rounding) >> last_shift_bits;
+ output[2] = (step[2] + step[13] + last_rounding) >> last_shift_bits;
+ output[3] = (step[3] + step[12] + last_rounding) >> last_shift_bits;
+ output[4] = (step[4] + step[11] + last_rounding) >> last_shift_bits;
+ output[5] = (step[5] + step[10] + last_rounding) >> last_shift_bits;
+ output[6] = (step[6] + step[ 9] + last_rounding) >> last_shift_bits;
+ output[7] = (step[7] + step[ 8] + last_rounding) >> last_shift_bits;
+
+ output[15] = (step[0] - step[15] + last_rounding) >> last_shift_bits;
+ output[14] = (step[1] - step[14] + last_rounding) >> last_shift_bits;
+ output[13] = (step[2] - step[13] + last_rounding) >> last_shift_bits;
+ output[12] = (step[3] - step[12] + last_rounding) >> last_shift_bits;
+ output[11] = (step[4] - step[11] + last_rounding) >> last_shift_bits;
+ output[10] = (step[5] - step[10] + last_rounding) >> last_shift_bits;
+ output[9] = (step[6] - step[ 9] + last_rounding) >> last_shift_bits;
+ output[8] = (step[7] - step[ 8] + last_rounding) >> last_shift_bits;
+}
+
+void vp9_short_idct10_16x16_c(int16_t *input, int16_t *output, int pitch) {
+ int16_t out[16 * 16];
+ int16_t *outptr = &out[0];
+ const int short_pitch = pitch >> 1;
+ int i, j;
+ int16_t temp_in[16], temp_out[16];
+
+ /* First transform rows. Since all non-zero dct coefficients are in
+ * upper-left 4x4 area, we only need to calculate first 4 rows here.
+ */
+ vpx_memset(out, 0, sizeof(out));
+ for (i = 0; i < 4; ++i) {
+ butterfly_16x16_idct10_1d(input, outptr, 0);
+ input += short_pitch;
+ outptr += 16;
+ }
+
+ // Then transform columns
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j)
+ temp_in[j] = out[j*16 + i];
+ butterfly_16x16_idct10_1d(temp_in, temp_out, 3);
+ for (j = 0; j < 16; ++j)
+ output[j*16 + i] = temp_out[j];
+ }
+}
+#undef INITIAL_SHIFT
+#undef INITIAL_ROUNDING
+#undef RIGHT_SHIFT
+#undef RIGHT_ROUNDING
+#endif
diff --git a/vp9/common/vp9_implicit_segmentation.c b/vp9/common/vp9_implicit_segmentation.c
new file mode 100644
index 0000000..472c3d1
--- /dev/null
+++ b/vp9/common/vp9_implicit_segmentation.c
@@ -0,0 +1,255 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/vp9_onyxc_int.h"
+
+#define MAX_REGIONS 24000
+#ifndef NULL
+#define NULL 0
+#endif
+
+#define min_mbs_in_region 3
+
+// this linked list structure holds equivalences for connected
+// component labeling
+struct list_el {
+ int label;
+ int seg_value;
+ int count;
+ struct list_el *next;
+};
+typedef struct list_el item;
+
+// connected colorsegments
+typedef struct {
+ int min_x;
+ int min_y;
+ int max_x;
+ int max_y;
+ long long sum_x;
+ long long sum_y;
+ int pixels;
+ int seg_value;
+ int label;
+} segment_info;
+
+
+typedef enum {
+ SEGMENT_MODE,
+ SEGMENT_MV,
+ SEGMENT_REFFRAME,
+ SEGMENT_SKIPPED
+} SEGMENT_TYPE;
+
+
+// this merges the two equivalence lists and
+// then makes sure that every label points to the same
+// equivalence list
+void merge(item *labels, int u, int v) {
+ item *a = labels[u].next;
+ item *b = labels[v].next;
+ item c;
+ item *it = &c;
+ int count;
+
+ // check if they are already merged
+ if (u == v || a == b)
+ return;
+
+ count = a->count + b->count;
+
+ // merge 2 sorted linked lists.
+ while (a != NULL && b != NULL) {
+ if (a->label < b->label) {
+ it->next = a;
+ a = a->next;
+ } else {
+ it->next = b;
+ b = b->next;
+ }
+
+ it = it->next;
+ }
+
+ if (a == NULL)
+ it->next = b;
+ else
+ it->next = a;
+
+ it = c.next;
+
+ // make sure every equivalence in the linked list points to this new ll
+ while (it != NULL) {
+ labels[it->label].next = c.next;
+ it = it->next;
+ }
+ c.next->count = count;
+
+}
+
+void segment_via_mode_info(VP9_COMMON *oci, int how) {
+ MODE_INFO *mi = oci->mi;
+ int i, j;
+ int mb_index = 0;
+
+ int label = 1;
+ int pitch = oci->mb_cols;
+
+ // holds linked list equivalences
+ // the max should probably be allocated at a higher level in oci
+ item equivalences[MAX_REGIONS];
+ int eq_ptr = 0;
+ item labels[MAX_REGIONS];
+ segment_info segments[MAX_REGIONS];
+ int label_count = 1;
+ int labeling[400 * 300];
+ int *lp = labeling;
+
+ label_count = 1;
+ memset(labels, 0, sizeof(labels));
+ memset(segments, 0, sizeof(segments));
+
+ /* Go through each macroblock first pass labelling */
+ for (i = 0; i < oci->mb_rows; i++, lp += pitch) {
+ for (j = 0; j < oci->mb_cols; j++) {
+ // int above seg_value, left seg_value, this seg_value...
+ int a = -1, l = -1, n = -1;
+
+ // above label, left label
+ int al = -1, ll = -1;
+ if (i) {
+ al = lp[j - pitch];
+ a = labels[al].next->seg_value;
+ }
+ if (j) {
+ ll = lp[j - 1];
+ l = labels[ll].next->seg_value;
+ }
+
+ // what setting are we going to do the implicit segmentation on
+ switch (how) {
+ case SEGMENT_MODE:
+ n = mi[mb_index].mbmi.mode;
+ break;
+ case SEGMENT_MV:
+ n = mi[mb_index].mbmi.mv[0].as_int;
+ if (mi[mb_index].mbmi.ref_frame == INTRA_FRAME)
+ n = -9999999;
+ break;
+ case SEGMENT_REFFRAME:
+ n = mi[mb_index].mbmi.ref_frame;
+ break;
+ case SEGMENT_SKIPPED:
+ n = mi[mb_index].mbmi.mb_skip_coeff;
+ break;
+ }
+
+ // above and left both have the same seg_value
+ if (n == a && n == l) {
+ // pick the lowest label
+ lp[j] = (al < ll ? al : ll);
+ labels[lp[j]].next->count++;
+
+ // merge the above and left equivalencies
+ merge(labels, al, ll);
+ }
+ // this matches above seg_value
+ else if (n == a) {
+ // give it the same label as above
+ lp[j] = al;
+ labels[al].next->count++;
+ }
+ // this matches left seg_value
+ else if (n == l) {
+ // give it the same label as above
+ lp[j] = ll;
+ labels[ll].next->count++;
+ } else {
+ // new label doesn't match either
+ item *e = &labels[label];
+ item *nl = &equivalences[eq_ptr++];
+ lp[j] = label;
+ nl->label = label;
+ nl->next = 0;
+ nl->seg_value = n;
+ nl->count = 1;
+ e->next = nl;
+ label++;
+ }
+ mb_index++;
+ }
+ mb_index++;
+ }
+ lp = labeling;
+
+ // give new labels to regions
+ for (i = 1; i < label; i++)
+ if (labels[i].next->count > min_mbs_in_region && labels[labels[i].next->label].label == 0) {
+ segment_info *cs = &segments[label_count];
+ cs->label = label_count;
+ labels[labels[i].next->label].label = label_count++;
+ labels[labels[i].next->label].seg_value = labels[i].next->seg_value;
+ cs->seg_value = labels[labels[i].next->label].seg_value;
+ cs->min_x = oci->mb_cols;
+ cs->min_y = oci->mb_rows;
+ cs->max_x = 0;
+ cs->max_y = 0;
+ cs->sum_x = 0;
+ cs->sum_y = 0;
+ cs->pixels = 0;
+
+ }
+ lp = labeling;
+
+ // this is just to gather stats...
+ for (i = 0; i < oci->mb_rows; i++, lp += pitch) {
+ for (j = 0; j < oci->mb_cols; j++) {
+ segment_info *cs;
+ int oldlab = labels[lp[j]].next->label;
+ int lab = labels[oldlab].label;
+ lp[j] = lab;
+
+ cs = &segments[lab];
+
+ cs->min_x = (j < cs->min_x ? j : cs->min_x);
+ cs->max_x = (j > cs->max_x ? j : cs->max_x);
+ cs->min_y = (i < cs->min_y ? i : cs->min_y);
+ cs->max_y = (i > cs->max_y ? i : cs->max_y);
+ cs->sum_x += j;
+ cs->sum_y += i;
+ cs->pixels++;
+
+ lp[j] = lab;
+ mb_index++;
+ }
+ mb_index++;
+ }
+
+ {
+ lp = labeling;
+ printf("labelling \n");
+ mb_index = 0;
+ for (i = 0; i < oci->mb_rows; i++, lp += pitch) {
+ for (j = 0; j < oci->mb_cols; j++) {
+ printf("%4d", lp[j]);
+ }
+ printf(" ");
+ for (j = 0; j < oci->mb_cols; j++, mb_index++) {
+ // printf("%3d",mi[mb_index].mbmi.mode );
+ printf("%4d:%4d", mi[mb_index].mbmi.mv[0].as_mv.row,
+ mi[mb_index].mbmi.mv[0].as_mv.col);
+ }
+ printf("\n");
+ ++mb_index;
+ }
+ printf("\n");
+ }
+}
+
diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c
new file mode 100644
index 0000000..eff9198
--- /dev/null
+++ b/vp9/common/vp9_invtrans.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/vp9_invtrans.h"
+#include "./vp9_rtcd.h"
+
+static void recon_dcblock(MACROBLOCKD *xd) {
+ BLOCKD *b = &xd->block[24];
+ int i;
+
+ for (i = 0; i < 16; i++) {
+ xd->block[i].dqcoeff[0] = b->diff[i];
+ }
+}
+
+static void recon_dcblock_8x8(MACROBLOCKD *xd) {
+ BLOCKD *b = &xd->block[24]; // for coeff 0, 2, 8, 10
+
+ xd->block[0].dqcoeff[0] = b->diff[0];
+ xd->block[4].dqcoeff[0] = b->diff[1];
+ xd->block[8].dqcoeff[0] = b->diff[4];
+ xd->block[12].dqcoeff[0] = b->diff[8];
+}
+
+void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch) {
+ BLOCKD *b = &xd->block[block];
+ if (b->eob <= 1)
+ xd->inv_xform4x4_1_x8(b->dqcoeff, b->diff, pitch);
+ else
+ xd->inv_xform4x4_x8(b->dqcoeff, b->diff, pitch);
+}
+
+void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) {
+ int i;
+ BLOCKD *blockd = xd->block;
+ int has_2nd_order = get_2nd_order_usage(xd);
+
+ if (has_2nd_order) {
+ /* do 2nd order transform on the dc block */
+ vp9_short_inv_walsh4x4(blockd[24].dqcoeff, blockd[24].diff);
+ recon_dcblock(xd);
+ }
+
+ for (i = 0; i < 16; i++) {
+ TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]);
+ if (tx_type != DCT_DCT) {
+ vp9_ihtllm(xd->block[i].dqcoeff, xd->block[i].diff, 32,
+ tx_type, 4, xd->block[i].eob);
+ } else {
+ vp9_inverse_transform_b_4x4(xd, i, 32);
+ }
+ }
+}
+
+void vp9_inverse_transform_mbuv_4x4(MACROBLOCKD *xd) {
+ int i;
+
+ for (i = 16; i < 24; i++) {
+ vp9_inverse_transform_b_4x4(xd, i, 16);
+ }
+}
+
+void vp9_inverse_transform_mb_4x4(MACROBLOCKD *xd) {
+ vp9_inverse_transform_mby_4x4(xd);
+ vp9_inverse_transform_mbuv_4x4(xd);
+}
+
+void vp9_inverse_transform_b_8x8(short *input_dqcoeff, short *output_coeff,
+ int pitch) {
+ vp9_short_idct8x8(input_dqcoeff, output_coeff, pitch);
+}
+
+void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) {
+ int i;
+ BLOCKD *blockd = xd->block;
+ int has_2nd_order = get_2nd_order_usage(xd);
+
+ if (has_2nd_order) {
+ // do 2nd order transform on the dc block
+ vp9_short_ihaar2x2(blockd[24].dqcoeff, blockd[24].diff, 8);
+ recon_dcblock_8x8(xd); // need to change for 8x8
+ }
+
+ for (i = 0; i < 9; i += 8) {
+ TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]);
+ if (tx_type != DCT_DCT) {
+ vp9_ihtllm(xd->block[i].dqcoeff, xd->block[i].diff, 32, tx_type, 8,
+ xd->block[i].eob);
+ } else {
+ vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0],
+ &blockd[i].diff[0], 32);
+ }
+ }
+ for (i = 2; i < 11; i += 8) {
+ TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]);
+ if (tx_type != DCT_DCT) {
+ vp9_ihtllm(xd->block[i + 2].dqcoeff, xd->block[i].diff, 32, tx_type, 8,
+ xd->block[i + 2].eob);
+ } else {
+ vp9_inverse_transform_b_8x8(&blockd[i + 2].dqcoeff[0],
+ &blockd[i].diff[0], 32);
+ }
+ }
+}
+
+void vp9_inverse_transform_mbuv_8x8(MACROBLOCKD *xd) {
+ int i;
+ BLOCKD *blockd = xd->block;
+
+ for (i = 16; i < 24; i += 4) {
+ vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0],
+ &blockd[i].diff[0], 16);
+ }
+}
+
+void vp9_inverse_transform_mb_8x8(MACROBLOCKD *xd) {
+ vp9_inverse_transform_mby_8x8(xd);
+ vp9_inverse_transform_mbuv_8x8(xd);
+}
+
+void vp9_inverse_transform_b_16x16(short *input_dqcoeff,
+ short *output_coeff, int pitch) {
+ vp9_short_idct16x16(input_dqcoeff, output_coeff, pitch);
+}
+
+void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd) {
+ BLOCKD *bd = &xd->block[0];
+ TX_TYPE tx_type = get_tx_type_16x16(xd, bd);
+ if (tx_type != DCT_DCT) {
+ vp9_ihtllm(bd->dqcoeff, bd->diff, 32, tx_type, 16, bd->eob);
+ } else {
+ vp9_inverse_transform_b_16x16(&xd->block[0].dqcoeff[0],
+ &xd->block[0].diff[0], 32);
+ }
+}
+
+void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd) {
+ vp9_inverse_transform_mby_16x16(xd);
+ vp9_inverse_transform_mbuv_8x8(xd);
+}
diff --git a/vp9/common/vp9_invtrans.h b/vp9/common/vp9_invtrans.h
new file mode 100644
index 0000000..4474ba4
--- /dev/null
+++ b/vp9/common/vp9_invtrans.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_COMMON_VP9_INVTRANS_H_
+#define VP9_COMMON_VP9_INVTRANS_H_
+
+#include "./vpx_config.h"
+#include "vp9/common/vp9_blockd.h"
+
+extern void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch);
+
+extern void vp9_inverse_transform_mb_4x4(MACROBLOCKD *xd);
+
+extern void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd);
+
+extern void vp9_inverse_transform_mbuv_4x4(MACROBLOCKD *xd);
+
+extern void vp9_inverse_transform_b_8x8(short *input_dqcoeff,
+ short *output_coeff, int pitch);
+
+extern void vp9_inverse_transform_mb_8x8(MACROBLOCKD *xd);
+
+extern void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd);
+
+extern void vp9_inverse_transform_mbuv_8x8(MACROBLOCKD *xd);
+
+extern void vp9_inverse_transform_b_16x16(short *input_dqcoeff,
+ short *output_coeff, int pitch);
+
+extern void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd);
+
+extern void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd);
+
+#endif // __INC_INVTRANS_H
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
new file mode 100644
index 0000000..8d4d014
--- /dev/null
+++ b/vp9/common/vp9_loopfilter.c
@@ -0,0 +1,524 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_config.h"
+#include "vp9/common/vp9_loopfilter.h"
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vpx_mem/vpx_mem.h"
+
+#include "vp9/common/vp9_seg_common.h"
+
+static void lf_init_lut(loop_filter_info_n *lfi) {
+ int filt_lvl;
+
+ for (filt_lvl = 0; filt_lvl <= MAX_LOOP_FILTER; filt_lvl++) {
+ if (filt_lvl >= 40) {
+ lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 2;
+ lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 3;
+ } else if (filt_lvl >= 20) {
+ lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1;
+ lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 2;
+ } else if (filt_lvl >= 15) {
+ lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1;
+ lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 1;
+ } else {
+ lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 0;
+ lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 0;
+ }
+ }
+
+ lfi->mode_lf_lut[DC_PRED] = 1;
+ lfi->mode_lf_lut[D45_PRED] = 1;
+ lfi->mode_lf_lut[D135_PRED] = 1;
+ lfi->mode_lf_lut[D117_PRED] = 1;
+ lfi->mode_lf_lut[D153_PRED] = 1;
+ lfi->mode_lf_lut[D27_PRED] = 1;
+ lfi->mode_lf_lut[D63_PRED] = 1;
+ lfi->mode_lf_lut[V_PRED] = 1;
+ lfi->mode_lf_lut[H_PRED] = 1;
+ lfi->mode_lf_lut[TM_PRED] = 1;
+ lfi->mode_lf_lut[B_PRED] = 0;
+ lfi->mode_lf_lut[I8X8_PRED] = 0;
+ lfi->mode_lf_lut[ZEROMV] = 1;
+ lfi->mode_lf_lut[NEARESTMV] = 2;
+ lfi->mode_lf_lut[NEARMV] = 2;
+ lfi->mode_lf_lut[NEWMV] = 2;
+ lfi->mode_lf_lut[SPLITMV] = 3;
+}
+
+void vp9_loop_filter_update_sharpness(loop_filter_info_n *lfi,
+ int sharpness_lvl) {
+ int i;
+
+ /* For each possible value for the loop filter fill out limits */
+ for (i = 0; i <= MAX_LOOP_FILTER; i++) {
+ int filt_lvl = i;
+ int block_inside_limit = 0;
+
+ /* Set loop filter paramaeters that control sharpness. */
+ block_inside_limit = filt_lvl >> (sharpness_lvl > 0);
+ block_inside_limit = block_inside_limit >> (sharpness_lvl > 4);
+
+ if (sharpness_lvl > 0) {
+ if (block_inside_limit > (9 - sharpness_lvl))
+ block_inside_limit = (9 - sharpness_lvl);
+ }
+
+ if (block_inside_limit < 1)
+ block_inside_limit = 1;
+
+ vpx_memset(lfi->lim[i], block_inside_limit, SIMD_WIDTH);
+ vpx_memset(lfi->blim[i], (2 * filt_lvl + block_inside_limit),
+ SIMD_WIDTH);
+ vpx_memset(lfi->mblim[i], (2 * (filt_lvl + 2) + block_inside_limit),
+ SIMD_WIDTH);
+ }
+}
+
+void vp9_loop_filter_init(VP9_COMMON *cm) {
+ loop_filter_info_n *lfi = &cm->lf_info;
+ int i;
+
+ /* init limits for given sharpness*/
+ vp9_loop_filter_update_sharpness(lfi, cm->sharpness_level);
+ cm->last_sharpness_level = cm->sharpness_level;
+
+ /* init LUT for lvl and hev thr picking */
+ lf_init_lut(lfi);
+
+ /* init hev threshold const vectors */
+ for (i = 0; i < 4; i++) {
+ vpx_memset(lfi->hev_thr[i], i, SIMD_WIDTH);
+ }
+}
+
+void vp9_loop_filter_frame_init(VP9_COMMON *cm,
+ MACROBLOCKD *xd,
+ int default_filt_lvl) {
+ int seg, /* segment number */
+ ref, /* index in ref_lf_deltas */
+ mode; /* index in mode_lf_deltas */
+
+ loop_filter_info_n *lfi = &cm->lf_info;
+
+ /* update limits if sharpness has changed */
+ if (cm->last_sharpness_level != cm->sharpness_level) {
+ vp9_loop_filter_update_sharpness(lfi, cm->sharpness_level);
+ cm->last_sharpness_level = cm->sharpness_level;
+ }
+
+ for (seg = 0; seg < MAX_MB_SEGMENTS; seg++) {
+ int lvl_seg = default_filt_lvl;
+ int lvl_ref, lvl_mode;
+
+
+ // Set the baseline filter values for each segment
+ if (vp9_segfeature_active(xd, seg, SEG_LVL_ALT_LF)) {
+ /* Abs value */
+ if (xd->mb_segment_abs_delta == SEGMENT_ABSDATA) {
+ lvl_seg = vp9_get_segdata(xd, seg, SEG_LVL_ALT_LF);
+ } else { /* Delta Value */
+ lvl_seg += vp9_get_segdata(xd, seg, SEG_LVL_ALT_LF);
+ lvl_seg = (lvl_seg > 0) ? ((lvl_seg > 63) ? 63 : lvl_seg) : 0;
+ }
+ }
+
+ if (!xd->mode_ref_lf_delta_enabled) {
+ /* we could get rid of this if we assume that deltas are set to
+ * zero when not in use; encoder always uses deltas
+ */
+ vpx_memset(lfi->lvl[seg][0], lvl_seg, 4 * 4);
+ continue;
+ }
+
+ lvl_ref = lvl_seg;
+
+ /* INTRA_FRAME */
+ ref = INTRA_FRAME;
+
+ /* Apply delta for reference frame */
+ lvl_ref += xd->ref_lf_deltas[ref];
+
+ /* Apply delta for Intra modes */
+ mode = 0; /* B_PRED */
+ /* Only the split mode BPRED has a further special case */
+ lvl_mode = lvl_ref + xd->mode_lf_deltas[mode];
+ lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0; /* clamp */
+
+ lfi->lvl[seg][ref][mode] = lvl_mode;
+
+ mode = 1; /* all the rest of Intra modes */
+ lvl_mode = (lvl_ref > 0) ? (lvl_ref > 63 ? 63 : lvl_ref) : 0; /* clamp */
+ lfi->lvl[seg][ref][mode] = lvl_mode;
+
+ /* LAST, GOLDEN, ALT */
+ for (ref = 1; ref < MAX_REF_FRAMES; ref++) {
+ int lvl_ref = lvl_seg;
+
+ /* Apply delta for reference frame */
+ lvl_ref += xd->ref_lf_deltas[ref];
+
+ /* Apply delta for Inter modes */
+ for (mode = 1; mode < 4; mode++) {
+ lvl_mode = lvl_ref + xd->mode_lf_deltas[mode];
+ lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0; /* clamp */
+
+ lfi->lvl[seg][ref][mode] = lvl_mode;
+ }
+ }
+ }
+}
+
+void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd) {
+ YV12_BUFFER_CONFIG *post = cm->frame_to_show;
+ loop_filter_info_n *lfi_n = &cm->lf_info;
+ struct loop_filter_info lfi;
+
+ FRAME_TYPE frame_type = cm->frame_type;
+
+ int mb_row;
+ int mb_col;
+
+ int filter_level;
+
+ unsigned char *y_ptr, *u_ptr, *v_ptr;
+
+ /* Point at base of Mb MODE_INFO list */
+ const MODE_INFO *mode_info_context = cm->mi;
+
+ /* Initialize the loop filter for this frame. */
+ vp9_loop_filter_frame_init(cm, xd, cm->filter_level);
+
+ /* Set up the buffer pointers */
+ y_ptr = post->y_buffer;
+ u_ptr = post->u_buffer;
+ v_ptr = post->v_buffer;
+
+ /* vp9_filter each macro block */
+ for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
+ int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
+ mode_info_context->mbmi.mode != I8X8_PRED &&
+ mode_info_context->mbmi.mode != SPLITMV &&
+ mode_info_context->mbmi.mb_skip_coeff);
+
+ const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
+ const int seg = mode_info_context->mbmi.segment_id;
+ const int ref_frame = mode_info_context->mbmi.ref_frame;
+ int tx_type = mode_info_context->mbmi.txfm_size;
+ filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
+
+ if (filter_level) {
+ if (cm->filter_type == NORMAL_LOOPFILTER) {
+ const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
+ lfi.mblim = lfi_n->mblim[filter_level];
+ lfi.blim = lfi_n->blim[filter_level];
+ lfi.lim = lfi_n->lim[filter_level];
+ lfi.hev_thr = lfi_n->hev_thr[hev_index];
+
+ if (mb_col > 0
+#if CONFIG_SUPERBLOCKS
+ && !((mb_col & 1) && mode_info_context->mbmi.encoded_as_sb &&
+ mode_info_context[0].mbmi.mb_skip_coeff &&
+ mode_info_context[-1].mbmi.mb_skip_coeff)
+#endif
+ )
+ vp9_loop_filter_mbv(y_ptr, u_ptr, v_ptr, post->y_stride,
+ post->uv_stride, &lfi);
+
+ if (!skip_lf && tx_type != TX_16X16) {
+ if (tx_type == TX_8X8)
+ vp9_loop_filter_bv8x8(y_ptr, u_ptr, v_ptr, post->y_stride,
+ post->uv_stride, &lfi);
+ else
+ vp9_loop_filter_bv(y_ptr, u_ptr, v_ptr, post->y_stride,
+ post->uv_stride, &lfi);
+
+ }
+
+ /* don't apply across umv border */
+ if (mb_row > 0
+#if CONFIG_SUPERBLOCKS
+ && !((mb_row & 1) && mode_info_context->mbmi.encoded_as_sb &&
+ mode_info_context[0].mbmi.mb_skip_coeff &&
+ mode_info_context[-cm->mode_info_stride].mbmi.mb_skip_coeff)
+#endif
+ )
+ vp9_loop_filter_mbh(y_ptr, u_ptr, v_ptr, post->y_stride,
+ post->uv_stride, &lfi);
+
+ if (!skip_lf && tx_type != TX_16X16) {
+ if (tx_type == TX_8X8)
+ vp9_loop_filter_bh8x8(y_ptr, u_ptr, v_ptr, post->y_stride,
+ post->uv_stride, &lfi);
+ else
+ vp9_loop_filter_bh(y_ptr, u_ptr, v_ptr, post->y_stride,
+ post->uv_stride, &lfi);
+ }
+ } else {
+ // FIXME: Not 8x8 aware
+ if (mb_col > 0
+#if CONFIG_SUPERBLOCKS
+ && !((mb_col & 1) && mode_info_context->mbmi.encoded_as_sb &&
+ mode_info_context[0].mbmi.mb_skip_coeff &&
+ mode_info_context[-1].mbmi.mb_skip_coeff)
+#endif
+ )
+ vp9_loop_filter_simple_mbv(y_ptr, post->y_stride,
+ lfi_n->mblim[filter_level]);
+
+ if (!skip_lf)
+ vp9_loop_filter_simple_bv(y_ptr, post->y_stride,
+ lfi_n->blim[filter_level]);
+
+ /* don't apply across umv border */
+ if (mb_row > 0
+#if CONFIG_SUPERBLOCKS
+ && !((mb_row & 1) && mode_info_context->mbmi.encoded_as_sb &&
+ mode_info_context[0].mbmi.mb_skip_coeff &&
+ mode_info_context[-cm->mode_info_stride].mbmi.mb_skip_coeff)
+#endif
+ )
+ vp9_loop_filter_simple_mbh(y_ptr, post->y_stride,
+ lfi_n->mblim[filter_level]);
+
+ if (!skip_lf)
+ vp9_loop_filter_simple_bh(y_ptr, post->y_stride,
+ lfi_n->blim[filter_level]);
+ }
+ }
+
+ y_ptr += 16;
+ u_ptr += 8;
+ v_ptr += 8;
+
+ mode_info_context++; /* step to next MB */
+ }
+
+ y_ptr += post->y_stride * 16 - post->y_width;
+ u_ptr += post->uv_stride * 8 - post->uv_width;
+ v_ptr += post->uv_stride * 8 - post->uv_width;
+
+ mode_info_context++; /* Skip border mb */
+ }
+}
+
+void vp9_loop_filter_frame_yonly(VP9_COMMON *cm, MACROBLOCKD *xd,
+ int default_filt_lvl) {
+ YV12_BUFFER_CONFIG *post = cm->frame_to_show;
+
+ unsigned char *y_ptr;
+ int mb_row;
+ int mb_col;
+
+ loop_filter_info_n *lfi_n = &cm->lf_info;
+ struct loop_filter_info lfi;
+
+ int filter_level;
+ FRAME_TYPE frame_type = cm->frame_type;
+
+ /* Point at base of Mb MODE_INFO list */
+ const MODE_INFO *mode_info_context = cm->mi;
+
+#if 0
+ if (default_filt_lvl == 0) /* no filter applied */
+ return;
+#endif
+
+ /* Initialize the loop filter for this frame. */
+ vp9_loop_filter_frame_init(cm, xd, default_filt_lvl);
+
+ /* Set up the buffer pointers */
+ y_ptr = post->y_buffer;
+
+ /* vp9_filter each macro block */
+ for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
+ int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
+ mode_info_context->mbmi.mode != I8X8_PRED &&
+ mode_info_context->mbmi.mode != SPLITMV &&
+ mode_info_context->mbmi.mb_skip_coeff);
+
+ const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
+ const int seg = mode_info_context->mbmi.segment_id;
+ const int ref_frame = mode_info_context->mbmi.ref_frame;
+ int tx_type = mode_info_context->mbmi.txfm_size;
+ filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
+
+ if (filter_level) {
+ if (cm->filter_type == NORMAL_LOOPFILTER) {
+ const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
+ lfi.mblim = lfi_n->mblim[filter_level];
+ lfi.blim = lfi_n->blim[filter_level];
+ lfi.lim = lfi_n->lim[filter_level];
+ lfi.hev_thr = lfi_n->hev_thr[hev_index];
+
+ if (mb_col > 0)
+ vp9_loop_filter_mbv(y_ptr, 0, 0, post->y_stride, 0, &lfi);
+
+ if (!skip_lf && tx_type != TX_16X16) {
+ if (tx_type == TX_8X8)
+ vp9_loop_filter_bv8x8(y_ptr, 0, 0, post->y_stride, 0, &lfi);
+ else
+ vp9_loop_filter_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi);
+ }
+
+ /* don't apply across umv border */
+ if (mb_row > 0)
+ vp9_loop_filter_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi);
+
+ if (!skip_lf && tx_type != TX_16X16) {
+ if (tx_type == TX_8X8)
+ vp9_loop_filter_bh8x8(y_ptr, 0, 0, post->y_stride, 0, &lfi);
+ else
+ vp9_loop_filter_bh(y_ptr, 0, 0, post->y_stride, 0, &lfi);
+ }
+ } else {
+ // FIXME: Not 8x8 aware
+ if (mb_col > 0)
+ vp9_loop_filter_simple_mbv(y_ptr, post->y_stride,
+ lfi_n->mblim[filter_level]);
+
+ if (!skip_lf)
+ vp9_loop_filter_simple_bv(y_ptr, post->y_stride,
+ lfi_n->blim[filter_level]);
+
+ /* don't apply across umv border */
+ if (mb_row > 0)
+ vp9_loop_filter_simple_mbh(y_ptr, post->y_stride,
+ lfi_n->mblim[filter_level]);
+
+ if (!skip_lf)
+ vp9_loop_filter_simple_bh(y_ptr, post->y_stride,
+ lfi_n->blim[filter_level]);
+ }
+ }
+
+ y_ptr += 16;
+ mode_info_context++; /* step to next MB */
+ }
+
+ y_ptr += post->y_stride * 16 - post->y_width;
+ mode_info_context++; /* Skip border mb */
+ }
+}
+
+void vp9_loop_filter_partial_frame(VP9_COMMON *cm, MACROBLOCKD *xd,
+ int default_filt_lvl) {
+ YV12_BUFFER_CONFIG *post = cm->frame_to_show;
+
+ unsigned char *y_ptr;
+ int mb_row;
+ int mb_col;
+ int mb_cols = post->y_width >> 4;
+
+ int linestocopy, i;
+
+ loop_filter_info_n *lfi_n = &cm->lf_info;
+ struct loop_filter_info lfi;
+
+ int filter_level;
+ int alt_flt_enabled = xd->segmentation_enabled;
+ FRAME_TYPE frame_type = cm->frame_type;
+
+ const MODE_INFO *mode_info_context;
+
+ int lvl_seg[MAX_MB_SEGMENTS];
+
+ mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1);
+
+ /* 3 is a magic number. 4 is probably magic too */
+ linestocopy = (post->y_height >> (4 + 3));
+
+ if (linestocopy < 1)
+ linestocopy = 1;
+
+ linestocopy <<= 4;
+
+ /* Note the baseline filter values for each segment */
+ /* See vp9_loop_filter_frame_init. Rather than call that for each change
+ * to default_filt_lvl, copy the relevant calculation here.
+ */
+ if (alt_flt_enabled) {
+ for (i = 0; i < MAX_MB_SEGMENTS; i++) {
+ /* Abs value */
+ if (xd->mb_segment_abs_delta == SEGMENT_ABSDATA) {
+ lvl_seg[i] = vp9_get_segdata(xd, i, SEG_LVL_ALT_LF);
+ }
+ /* Delta Value */
+ else {
+ lvl_seg[i] = default_filt_lvl +
+ vp9_get_segdata(xd, i, SEG_LVL_ALT_LF);
+ lvl_seg[i] = (lvl_seg[i] > 0) ?
+ ((lvl_seg[i] > 63) ? 63 : lvl_seg[i]) : 0;
+ }
+ }
+ }
+
+ /* Set up the buffer pointers */
+ y_ptr = post->y_buffer + (post->y_height >> 5) * 16 * post->y_stride;
+
+ /* vp9_filter each macro block */
+ for (mb_row = 0; mb_row < (linestocopy >> 4); mb_row++) {
+ for (mb_col = 0; mb_col < mb_cols; mb_col++) {
+ int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
+ mode_info_context->mbmi.mode != I8X8_PRED &&
+ mode_info_context->mbmi.mode != SPLITMV &&
+ mode_info_context->mbmi.mb_skip_coeff);
+
+ if (alt_flt_enabled)
+ filter_level = lvl_seg[mode_info_context->mbmi.segment_id];
+ else
+ filter_level = default_filt_lvl;
+
+ if (filter_level) {
+ if (cm->filter_type == NORMAL_LOOPFILTER) {
+ const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
+ lfi.mblim = lfi_n->mblim[filter_level];
+ lfi.blim = lfi_n->blim[filter_level];
+ lfi.lim = lfi_n->lim[filter_level];
+ lfi.hev_thr = lfi_n->hev_thr[hev_index];
+
+ if (mb_col > 0)
+ vp9_loop_filter_mbv(y_ptr, 0, 0, post->y_stride, 0, &lfi);
+
+ if (!skip_lf)
+ vp9_loop_filter_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi);
+
+ vp9_loop_filter_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi);
+
+ if (!skip_lf)
+ vp9_loop_filter_bh(y_ptr, 0, 0, post->y_stride, 0, &lfi);
+ } else {
+ if (mb_col > 0)
+ vp9_loop_filter_simple_mbv (y_ptr, post->y_stride,
+ lfi_n->mblim[filter_level]);
+
+ if (!skip_lf)
+ vp9_loop_filter_simple_bv(y_ptr, post->y_stride,
+ lfi_n->blim[filter_level]);
+
+ vp9_loop_filter_simple_mbh(y_ptr, post->y_stride,
+ lfi_n->mblim[filter_level]);
+
+ if (!skip_lf)
+ vp9_loop_filter_simple_bh(y_ptr, post->y_stride,
+ lfi_n->blim[filter_level]);
+ }
+ }
+
+ y_ptr += 16;
+ mode_info_context += 1; /* step to next MB */
+ }
+
+ y_ptr += post->y_stride * 16 - post->y_width;
+ mode_info_context += 1; /* Skip border mb */
+ }
+}
diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h
new file mode 100644
index 0000000..23df2d8
--- /dev/null
+++ b/vp9/common/vp9_loopfilter.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_COMMON_VP9_LOOPFILTER_H_
+#define VP9_COMMON_VP9_LOOPFILTER_H_
+
+#include "vpx_ports/mem.h"
+#include "vpx_config.h"
+#include "vp9/common/vp9_blockd.h"
+
+#define MAX_LOOP_FILTER 63
+
+typedef enum {
+ NORMAL_LOOPFILTER = 0,
+ SIMPLE_LOOPFILTER = 1
+} LOOPFILTERTYPE;
+
+#define SIMD_WIDTH 16
+
+/* Need to align this structure so when it is declared and
+ * passed it can be loaded into vector registers.
+ */
+typedef struct {
+ DECLARE_ALIGNED(SIMD_WIDTH, unsigned char,
+ mblim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
+ DECLARE_ALIGNED(SIMD_WIDTH, unsigned char,
+ blim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
+ DECLARE_ALIGNED(SIMD_WIDTH, unsigned char,
+ lim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
+ DECLARE_ALIGNED(SIMD_WIDTH, unsigned char,
+ hev_thr[4][SIMD_WIDTH]);
+ unsigned char lvl[4][4][4];
+ unsigned char hev_thr_lut[2][MAX_LOOP_FILTER + 1];
+ unsigned char mode_lf_lut[MB_MODE_COUNT];
+} loop_filter_info_n;
+
+struct loop_filter_info {
+ const unsigned char *mblim;
+ const unsigned char *blim;
+ const unsigned char *lim;
+ const unsigned char *hev_thr;
+};
+
+#define prototype_loopfilter(sym) \
+ void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
+ const unsigned char *limit, const unsigned char *thresh, int count)
+
+#define prototype_loopfilter_block(sym) \
+ void sym(unsigned char *y, unsigned char *u, unsigned char *v, \
+ int ystride, int uv_stride, struct loop_filter_info *lfi)
+
+#define prototype_simple_loopfilter(sym) \
+ void sym(unsigned char *y, int ystride, const unsigned char *blimit)
+
+#if ARCH_X86 || ARCH_X86_64
+#include "x86/vp9_loopfilter_x86.h"
+#endif
+
+typedef void loop_filter_uvfunction(unsigned char *u, /* source pointer */
+ int p, /* pitch */
+ const unsigned char *blimit,
+ const unsigned char *limit,
+ const unsigned char *thresh,
+ unsigned char *v);
+
+/* assorted loopfilter functions which get used elsewhere */
+struct VP9Common;
+struct macroblockd;
+
+void vp9_loop_filter_init(struct VP9Common *cm);
+
+void vp9_loop_filter_frame_init(struct VP9Common *cm,
+ struct macroblockd *mbd,
+ int default_filt_lvl);
+
+void vp9_loop_filter_frame(struct VP9Common *cm, struct macroblockd *mbd);
+
+void vp9_loop_filter_partial_frame(struct VP9Common *cm,
+ struct macroblockd *mbd,
+ int default_filt_lvl);
+
+void vp9_loop_filter_frame_yonly(struct VP9Common *cm,
+ struct macroblockd *mbd,
+ int default_filt_lvl);
+
+void vp9_loop_filter_update_sharpness(loop_filter_info_n *lfi,
+ int sharpness_lvl);
+
+#endif // loopfilter_h
diff --git a/vp9/common/vp9_loopfilter_filters.c b/vp9/common/vp9_loopfilter_filters.c
new file mode 100644
index 0000000..b8cfb9c
--- /dev/null
+++ b/vp9/common/vp9_loopfilter_filters.c
@@ -0,0 +1,480 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include "vpx_config.h"
+#include "vp9/common/vp9_loopfilter.h"
+#include "vp9/common/vp9_onyxc_int.h"
+
+typedef unsigned char uc;
+
+static __inline signed char signed_char_clamp(int t) {
+ t = (t < -128 ? -128 : t);
+ t = (t > 127 ? 127 : t);
+ return (signed char) t;
+}
+
+
+/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
+static __inline signed char filter_mask(uc limit, uc blimit,
+ uc p3, uc p2, uc p1, uc p0,
+ uc q0, uc q1, uc q2, uc q3) {
+ signed char mask = 0;
+ mask |= (abs(p3 - p2) > limit) * -1;
+ mask |= (abs(p2 - p1) > limit) * -1;
+ mask |= (abs(p1 - p0) > limit) * -1;
+ mask |= (abs(q1 - q0) > limit) * -1;
+ mask |= (abs(q2 - q1) > limit) * -1;
+ mask |= (abs(q3 - q2) > limit) * -1;
+ mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1;
+ mask = ~mask;
+ return mask;
+}
+
+/* is there high variance internal edge ( 11111111 yes, 00000000 no) */
+static __inline signed char hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1) {
+ signed char hev = 0;
+ hev |= (abs(p1 - p0) > thresh) * -1;
+ hev |= (abs(q1 - q0) > thresh) * -1;
+ return hev;
+}
+
+static __inline void filter(signed char mask, uc hev, uc *op1,
+ uc *op0, uc *oq0, uc *oq1)
+
+{
+ signed char ps0, qs0;
+ signed char ps1, qs1;
+ signed char filter, Filter1, Filter2;
+ signed char u;
+
+ ps1 = (signed char) * op1 ^ 0x80;
+ ps0 = (signed char) * op0 ^ 0x80;
+ qs0 = (signed char) * oq0 ^ 0x80;
+ qs1 = (signed char) * oq1 ^ 0x80;
+
+ /* add outer taps if we have high edge variance */
+ filter = signed_char_clamp(ps1 - qs1);
+ filter &= hev;
+
+ /* inner taps */
+ filter = signed_char_clamp(filter + 3 * (qs0 - ps0));
+ filter &= mask;
+
+ /* save bottom 3 bits so that we round one side +4 and the other +3
+ * if it equals 4 we'll set to adjust by -1 to account for the fact
+ * we'd round 3 the other way
+ */
+ Filter1 = signed_char_clamp(filter + 4);
+ Filter2 = signed_char_clamp(filter + 3);
+ Filter1 >>= 3;
+ Filter2 >>= 3;
+ u = signed_char_clamp(qs0 - Filter1);
+ *oq0 = u ^ 0x80;
+ u = signed_char_clamp(ps0 + Filter2);
+ *op0 = u ^ 0x80;
+ filter = Filter1;
+
+ /* outer tap adjustments */
+ filter += 1;
+ filter >>= 1;
+ filter &= ~hev;
+
+ u = signed_char_clamp(qs1 - filter);
+ *oq1 = u ^ 0x80;
+ u = signed_char_clamp(ps1 + filter);
+ *op1 = u ^ 0x80;
+
+}
+
+void vp9_loop_filter_horizontal_edge_c
+(
+ unsigned char *s,
+ int p, /* pitch */
+ const unsigned char *blimit,
+ const unsigned char *limit,
+ const unsigned char *thresh,
+ int count
+) {
+ int hev = 0; /* high edge variance */
+ signed char mask = 0;
+ int i = 0;
+
+ /* loop filter designed to work using chars so that we can make maximum use
+ * of 8 bit simd instructions.
+ */
+ do {
+ mask = filter_mask(limit[0], blimit[0],
+ s[-4 * p], s[-3 * p], s[-2 * p], s[-1 * p],
+ s[0 * p], s[1 * p], s[2 * p], s[3 * p]);
+
+ hev = hevmask(thresh[0], s[-2 * p], s[-1 * p], s[0 * p], s[1 * p]);
+
+ filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p);
+
+ ++s;
+ } while (++i < count * 8);
+}
+
+void vp9_loop_filter_vertical_edge_c(unsigned char *s,
+ int p,
+ const unsigned char *blimit,
+ const unsigned char *limit,
+ const unsigned char *thresh,
+ int count) {
+ int hev = 0; /* high edge variance */
+ signed char mask = 0;
+ int i = 0;
+
+ /* loop filter designed to work using chars so that we can make maximum use
+ * of 8 bit simd instructions.
+ */
+ do {
+ mask = filter_mask(limit[0], blimit[0],
+ s[-4], s[-3], s[-2], s[-1],
+ s[0], s[1], s[2], s[3]);
+
+ hev = hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
+
+ filter(mask, hev, s - 2, s - 1, s, s + 1);
+
+ s += p;
+ } while (++i < count * 8);
+}
+static __inline signed char flatmask(uc thresh,
+ uc p4, uc p3, uc p2, uc p1, uc p0,
+ uc q0, uc q1, uc q2, uc q3, uc q4) {
+ signed char flat = 0;
+ flat |= (abs(p1 - p0) > 1) * -1;
+ flat |= (abs(q1 - q0) > 1) * -1;
+ flat |= (abs(p0 - p2) > 1) * -1;
+ flat |= (abs(q0 - q2) > 1) * -1;
+ flat |= (abs(p3 - p0) > 1) * -1;
+ flat |= (abs(q3 - q0) > 1) * -1;
+ flat |= (abs(p4 - p0) > 1) * -1;
+ flat |= (abs(q4 - q0) > 1) * -1;
+ flat = ~flat;
+ return flat;
+}
+
+static __inline void mbfilter(signed char mask, uc hev, uc flat,
+ uc *op4, uc *op3, uc *op2, uc *op1, uc *op0,
+ uc *oq0, uc *oq1, uc *oq2, uc *oq3, uc *oq4) {
+ /* use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line */
+ if (flat && mask) {
+ unsigned char p0, q0;
+ unsigned char p1, q1;
+ unsigned char p2, q2;
+ unsigned char p3, q3;
+ unsigned char p4, q4;
+
+ p4 = *op4;
+ p3 = *op3;
+ p2 = *op2;
+ p1 = *op1;
+ p0 = *op0;
+ q0 = *oq0;
+ q1 = *oq1;
+ q2 = *oq2;
+ q3 = *oq3;
+ q4 = *oq4;
+
+ *op2 = (p4 + p4 + p3 + p2 + p2 + p1 + p0 + q0 + 4) >> 3;
+ *op1 = (p4 + p3 + p2 + p1 + p1 + p0 + q0 + q1 + 4) >> 3;
+ *op0 = (p3 + p2 + p1 + p0 + p0 + q0 + q1 + q2 + 4) >> 3;
+ *oq0 = (p2 + p1 + p0 + q0 + q0 + q1 + q2 + q3 + 4) >> 3;
+ *oq1 = (p1 + p0 + q0 + q1 + q1 + q2 + q3 + q4 + 4) >> 3;
+ *oq2 = (p0 + q0 + q1 + q2 + q2 + q3 + q4 + q4 + 4) >> 3;
+ } else {
+ signed char ps0, qs0;
+ signed char ps1, qs1;
+ signed char filter, Filter1, Filter2;
+ signed char u;
+
+ ps1 = (signed char) * op1 ^ 0x80;
+ ps0 = (signed char) * op0 ^ 0x80;
+ qs0 = (signed char) * oq0 ^ 0x80;
+ qs1 = (signed char) * oq1 ^ 0x80;
+
+ /* add outer taps if we have high edge variance */
+ filter = signed_char_clamp(ps1 - qs1);
+ filter &= hev;
+
+ /* inner taps */
+ filter = signed_char_clamp(filter + 3 * (qs0 - ps0));
+ filter &= mask;
+
+ Filter1 = signed_char_clamp(filter + 4);
+ Filter2 = signed_char_clamp(filter + 3);
+ Filter1 >>= 3;
+ Filter2 >>= 3;
+
+ u = signed_char_clamp(qs0 - Filter1);
+ *oq0 = u ^ 0x80;
+ u = signed_char_clamp(ps0 + Filter2);
+ *op0 = u ^ 0x80;
+ filter = Filter1;
+
+ /* outer tap adjustments */
+ filter += 1;
+ filter >>= 1;
+ filter &= ~hev;
+
+ u = signed_char_clamp(qs1 - filter);
+ *oq1 = u ^ 0x80;
+ u = signed_char_clamp(ps1 + filter);
+ *op1 = u ^ 0x80;
+ }
+}
+void vp9_mbloop_filter_horizontal_edge_c
+(
+ unsigned char *s,
+ int p,
+ const unsigned char *blimit,
+ const unsigned char *limit,
+ const unsigned char *thresh,
+ int count
+) {
+ signed char hev = 0; /* high edge variance */
+ signed char mask = 0;
+ signed char flat = 0;
+ int i = 0;
+
+ /* loop filter designed to work using chars so that we can make maximum use
+ * of 8 bit simd instructions.
+ */
+ do {
+
+ mask = filter_mask(limit[0], blimit[0],
+ s[-4 * p], s[-3 * p], s[-2 * p], s[-1 * p],
+ s[ 0 * p], s[ 1 * p], s[ 2 * p], s[ 3 * p]);
+
+ hev = hevmask(thresh[0], s[-2 * p], s[-1 * p], s[0 * p], s[1 * p]);
+
+ flat = flatmask(thresh[0],
+ s[-5 * p], s[-4 * p], s[-3 * p], s[-2 * p], s[-1 * p],
+ s[ 0 * p], s[ 1 * p], s[ 2 * p], s[ 3 * p], s[ 4 * p]);
+ mbfilter(mask, hev, flat,
+ s - 5 * p, s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p,
+ s, s + 1 * p, s + 2 * p, s + 3 * p, s + 4 * p);
+
+ ++s;
+ } while (++i < count * 8);
+
+}
+void vp9_mbloop_filter_vertical_edge_c
+(
+ unsigned char *s,
+ int p,
+ const unsigned char *blimit,
+ const unsigned char *limit,
+ const unsigned char *thresh,
+ int count
+) {
+ signed char hev = 0; /* high edge variance */
+ signed char mask = 0;
+ signed char flat = 0;
+ int i = 0;
+
+ do {
+
+ mask = filter_mask(limit[0], blimit[0],
+ s[-4], s[-3], s[-2], s[-1],
+ s[0], s[1], s[2], s[3]);
+
+ hev = hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
+ flat = flatmask(thresh[0],
+ s[-5], s[-4], s[-3], s[-2], s[-1],
+ s[ 0], s[ 1], s[ 2], s[ 3], s[ 4]);
+ mbfilter(mask, hev, flat,
+ s - 5, s - 4, s - 3, s - 2, s - 1,
+ s, s + 1, s + 2, s + 3, s + 4);
+ s += p;
+ } while (++i < count * 8);
+
+}
+
+/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
+static __inline signed char simple_filter_mask(uc blimit,
+ uc p1, uc p0,
+ uc q0, uc q1) {
+ /* Why does this cause problems for win32?
+ * error C2143: syntax error : missing ';' before 'type'
+ * (void) limit;
+ */
+ signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1;
+ return mask;
+}
+
+static __inline void simple_filter(signed char mask,
+ uc *op1, uc *op0,
+ uc *oq0, uc *oq1) {
+ signed char filter, Filter1, Filter2;
+ signed char p1 = (signed char) * op1 ^ 0x80;
+ signed char p0 = (signed char) * op0 ^ 0x80;
+ signed char q0 = (signed char) * oq0 ^ 0x80;
+ signed char q1 = (signed char) * oq1 ^ 0x80;
+ signed char u;
+
+ filter = signed_char_clamp(p1 - q1);
+ filter = signed_char_clamp(filter + 3 * (q0 - p0));
+ filter &= mask;
+
+ /* save bottom 3 bits so that we round one side +4 and the other +3 */
+ Filter1 = signed_char_clamp(filter + 4);
+ Filter1 >>= 3;
+ u = signed_char_clamp(q0 - Filter1);
+ *oq0 = u ^ 0x80;
+
+ Filter2 = signed_char_clamp(filter + 3);
+ Filter2 >>= 3;
+ u = signed_char_clamp(p0 + Filter2);
+ *op0 = u ^ 0x80;
+}
+
+void vp9_loop_filter_simple_horizontal_edge_c
+(
+ unsigned char *s,
+ int p,
+ const unsigned char *blimit
+) {
+ signed char mask = 0;
+ int i = 0;
+
+ do {
+ mask = simple_filter_mask(blimit[0],
+ s[-2 * p], s[-1 * p],
+ s[0 * p], s[1 * p]);
+ simple_filter(mask,
+ s - 2 * p, s - 1 * p,
+ s, s + 1 * p);
+ ++s;
+ } while (++i < 16);
+}
+
+void vp9_loop_filter_simple_vertical_edge_c
+(
+ unsigned char *s,
+ int p,
+ const unsigned char *blimit
+) {
+ signed char mask = 0;
+ int i = 0;
+
+ do {
+ mask = simple_filter_mask(blimit[0], s[-2], s[-1], s[0], s[1]);
+ simple_filter(mask, s - 2, s - 1, s, s + 1);
+ s += p;
+ } while (++i < 16);
+
+}
+
+/* Vertical MB Filtering */
+void vp9_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr,
+ unsigned char *v_ptr, int y_stride, int uv_stride,
+ struct loop_filter_info *lfi) {
+ vp9_mbloop_filter_vertical_edge_c(y_ptr, y_stride,
+ lfi->mblim, lfi->lim, lfi->hev_thr, 2);
+
+ if (u_ptr)
+ vp9_mbloop_filter_vertical_edge_c(u_ptr, uv_stride,
+ lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+
+ if (v_ptr)
+ vp9_mbloop_filter_vertical_edge_c(v_ptr, uv_stride,
+ lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+}
+
+/* Vertical B Filtering */
+void vp9_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr,
+ unsigned char *v_ptr, int y_stride, int uv_stride,
+ struct loop_filter_info *lfi) {
+ vp9_loop_filter_vertical_edge_c(y_ptr + 4, y_stride,
+ lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp9_loop_filter_vertical_edge_c(y_ptr + 8, y_stride,
+ lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp9_loop_filter_vertical_edge_c(y_ptr + 12, y_stride,
+ lfi->blim, lfi->lim, lfi->hev_thr, 2);
+
+ if (u_ptr)
+ vp9_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride,
+ lfi->blim, lfi->lim, lfi->hev_thr, 1);
+
+ if (v_ptr)
+ vp9_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride,
+ lfi->blim, lfi->lim, lfi->hev_thr, 1);
+}
+
+/* Horizontal MB filtering */
+void vp9_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr,
+ unsigned char *v_ptr, int y_stride, int uv_stride,
+ struct loop_filter_info *lfi) {
+ vp9_mbloop_filter_horizontal_edge_c(y_ptr, y_stride,
+ lfi->mblim, lfi->lim, lfi->hev_thr, 2);
+
+ if (u_ptr)
+ vp9_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride,
+ lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+
+ if (v_ptr)
+ vp9_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride,
+ lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+}
+
+/* Horizontal B Filtering */
+void vp9_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr,
+ unsigned char *v_ptr, int y_stride, int uv_stride,
+ struct loop_filter_info *lfi) {
+ vp9_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride,
+ lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp9_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride,
+ lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp9_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride,
+ lfi->blim, lfi->lim, lfi->hev_thr, 2);
+
+ if (u_ptr)
+ vp9_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride,
+ lfi->blim, lfi->lim, lfi->hev_thr, 1);
+
+ if (v_ptr)
+ vp9_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride,
+ lfi->blim, lfi->lim, lfi->hev_thr, 1);
+}
+
+void vp9_loop_filter_bh8x8_c(unsigned char *y_ptr, unsigned char *u_ptr,
+ unsigned char *v_ptr, int y_stride, int uv_stride,
+ struct loop_filter_info *lfi) {
+ vp9_mbloop_filter_horizontal_edge_c(
+ y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+}
+
+void vp9_loop_filter_bhs_c(unsigned char *y_ptr, int y_stride,
+ const unsigned char *blimit) {
+ vp9_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride,
+ y_stride, blimit);
+ vp9_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride,
+ y_stride, blimit);
+ vp9_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride,
+ y_stride, blimit);
+}
+
+void vp9_loop_filter_bv8x8_c(unsigned char *y_ptr, unsigned char *u_ptr,
+ unsigned char *v_ptr, int y_stride, int uv_stride,
+ struct loop_filter_info *lfi) {
+ vp9_mbloop_filter_vertical_edge_c(
+ y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+}
+
+void vp9_loop_filter_bvs_c(unsigned char *y_ptr, int y_stride,
+ const unsigned char *blimit) {
+ vp9_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, blimit);
+ vp9_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, blimit);
+ vp9_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, blimit);
+}
diff --git a/vp9/common/vp9_maskingmv.c b/vp9/common/vp9_maskingmv.c
new file mode 100644
index 0000000..f1151e3
--- /dev/null
+++ b/vp9/common/vp9_maskingmv.c
@@ -0,0 +1,806 @@
+/*
+ ============================================================================
+ Name : vp9_maskingmv.c
+ Author : jimbankoski
+ Version :
+ Copyright : Your copyright notice
+ Description : Hello World in C, Ansi-style
+ ============================================================================
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+extern unsigned int vp9_sad16x16_sse3(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride,
+ int max_err);
+
+extern void vp9_sad16x16x3_sse3(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride,
+ int *results);
+
+extern int vp8_growmaskmb_sse3(
+ unsigned char *om,
+ unsigned char *nm);
+
+extern void vp8_makemask_sse3(
+ unsigned char *y,
+ unsigned char *u,
+ unsigned char *v,
+ unsigned char *ym,
+ int yp,
+ int uvp,
+ int ys,
+ int us,
+ int vs,
+ int yt,
+ int ut,
+ int vt);
+
+unsigned int vp9_sad16x16_unmasked_wmt(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned char *mask);
+
+unsigned int vp9_sad16x16_masked_wmt(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned char *mask);
+
+unsigned int vp8_masked_predictor_wmt(
+ unsigned char *masked,
+ unsigned char *unmasked,
+ int src_stride,
+ unsigned char *dst_ptr,
+ int dst_stride,
+ unsigned char *mask);
+unsigned int vp8_masked_predictor_uv_wmt(
+ unsigned char *masked,
+ unsigned char *unmasked,
+ int src_stride,
+ unsigned char *dst_ptr,
+ int dst_stride,
+ unsigned char *mask);
+unsigned int vp8_uv_from_y_mask(
+ unsigned char *ymask,
+ unsigned char *uvmask);
+int yp = 16;
+unsigned char sxy[] = {
+ 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90
+};
+
+unsigned char sts[] = {
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+};
+unsigned char str[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+};
+
+unsigned char y[] = {
+ 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40,
+ 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40,
+ 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40,
+ 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40,
+ 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40,
+ 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40,
+ 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40,
+ 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40,
+ 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40,
+ 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40,
+ 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40,
+ 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40,
+ 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40,
+ 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40,
+ 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40,
+ 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40
+};
+int uvp = 8;
+unsigned char u[] = {
+ 90, 80, 70, 70, 90, 90, 90, 17,
+ 90, 80, 70, 70, 90, 90, 90, 17,
+ 84, 70, 70, 90, 90, 90, 17, 17,
+ 84, 70, 70, 90, 90, 90, 17, 17,
+ 80, 70, 70, 90, 90, 90, 17, 17,
+ 90, 80, 70, 70, 90, 90, 90, 17,
+ 90, 80, 70, 70, 90, 90, 90, 17,
+ 90, 80, 70, 70, 90, 90, 90, 17
+};
+
+unsigned char v[] = {
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80
+};
+
+unsigned char ym[256];
+unsigned char uvm[64];
+typedef struct {
+ unsigned char y;
+ unsigned char yt;
+ unsigned char u;
+ unsigned char ut;
+ unsigned char v;
+ unsigned char vt;
+ unsigned char use;
+} COLOR_SEG_ELEMENT;
+
+/*
+COLOR_SEG_ELEMENT segmentation[]=
+{
+ { 60,4,80,17,80,10, 1},
+ { 40,4,15,10,80,10, 1},
+};
+*/
+
+COLOR_SEG_ELEMENT segmentation[] = {
+ { 79, 44, 92, 44, 237, 60, 1},
+};
+
+unsigned char pixel_mask(unsigned char y, unsigned char u, unsigned char v,
+ COLOR_SEG_ELEMENT sgm[],
+ int c) {
+ COLOR_SEG_ELEMENT *s = sgm;
+ unsigned char m = 0;
+ int i;
+ for (i = 0; i < c; i++, s++)
+ m |= (abs(y - s->y) < s->yt &&
+ abs(u - s->u) < s->ut &&
+ abs(v - s->v) < s->vt ? 255 : 0);
+
+ return m;
+}
+int neighbors[256][8];
+int makeneighbors(void) {
+ int i, j;
+ for (i = 0; i < 256; i++) {
+ int r = (i >> 4), c = (i & 15);
+ int ni = 0;
+ for (j = 0; j < 8; j++)
+ neighbors[i][j] = i;
+ for (j = 0; j < 256; j++) {
+ int nr = (j >> 4), nc = (j & 15);
+ if (abs(nr - r) < 2 && abs(nc - c) < 2)
+ neighbors[i][ni++] = j;
+ }
+ }
+ return 0;
+}
+void grow_ymask(unsigned char *ym) {
+ unsigned char nym[256];
+ int i, j;
+
+ for (i = 0; i < 256; i++) {
+ nym[i] = ym[i];
+ for (j = 0; j < 8; j++) {
+ nym[i] |= ym[neighbors[i][j]];
+ }
+ }
+ for (i = 0; i < 256; i++)
+ ym[i] = nym[i];
+}
+void make_mb_mask(unsigned char *y, unsigned char *u, unsigned char *v,
+ unsigned char *ym, unsigned char *uvm,
+ int yp, int uvp,
+ COLOR_SEG_ELEMENT sgm[],
+ int count) {
+ int r, c;
+ unsigned char *oym = ym;
+
+ memset(ym, 20, 256);
+ for (r = 0; r < 8; r++, uvm += 8, u += uvp, v += uvp, y += (yp + yp), ym += 32)
+ for (c = 0; c < 8; c++) {
+ int y1 = y[c << 1];
+ int u1 = u[c];
+ int v1 = v[c];
+ int m = pixel_mask(y1, u1, v1, sgm, count);
+ uvm[c] = m;
+ ym[c << 1] = uvm[c]; // = pixel_mask(y[c<<1],u[c],v[c],sgm,count);
+ ym[(c << 1) + 1] = pixel_mask(y[1 + (c << 1)], u[c], v[c], sgm, count);
+ ym[(c << 1) + 16] = pixel_mask(y[yp + (c << 1)], u[c], v[c], sgm, count);
+ ym[(c << 1) + 17] = pixel_mask(y[1 + yp + (c << 1)], u[c], v[c], sgm, count);
+ }
+ grow_ymask(oym);
+}
+
+int masked_sad(unsigned char *src, int p, unsigned char *dst, int dp,
+ unsigned char *ym) {
+ int i, j;
+ unsigned sad = 0;
+ for (i = 0; i < 16; i++, src += p, dst += dp, ym += 16)
+ for (j = 0; j < 16; j++)
+ if (ym[j])
+ sad += abs(src[j] - dst[j]);
+
+ return sad;
+}
+
+int compare_masks(unsigned char *sym, unsigned char *ym) {
+ int i, j;
+ unsigned sad = 0;
+ for (i = 0; i < 16; i++, sym += 16, ym += 16)
+ for (j = 0; j < 16; j++)
+ sad += (sym[j] != ym[j] ? 1 : 0);
+
+ return sad;
+}
+int unmasked_sad(unsigned char *src, int p, unsigned char *dst, int dp,
+ unsigned char *ym) {
+ int i, j;
+ unsigned sad = 0;
+ for (i = 0; i < 16; i++, src += p, dst += dp, ym += 16)
+ for (j = 0; j < 16; j++)
+ if (!ym[j])
+ sad += abs(src[j] - dst[j]);
+
+ return sad;
+}
+int masked_motion_search(unsigned char *y, unsigned char *u, unsigned char *v,
+ int yp, int uvp,
+ unsigned char *dy, unsigned char *du, unsigned char *dv,
+ int dyp, int duvp,
+ COLOR_SEG_ELEMENT sgm[],
+ int count,
+ int *mi,
+ int *mj,
+ int *ui,
+ int *uj,
+ int *wm) {
+ int i, j;
+
+ unsigned char ym[256];
+ unsigned char uvm[64];
+ unsigned char dym[256];
+ unsigned char duvm[64];
+ unsigned int e = 0;
+ int beste = 256;
+ int bmi = -32, bmj = -32;
+ int bui = -32, buj = -32;
+ int beste1 = 256;
+ int bmi1 = -32, bmj1 = -32;
+ int bui1 = -32, buj1 = -32;
+ int obeste;
+
+ // first try finding best mask and then unmasked
+ beste = 0xffffffff;
+
+ // find best unmasked mv
+ for (i = -32; i < 32; i++) {
+ unsigned char *dyz = i * dyp + dy;
+ unsigned char *duz = i / 2 * duvp + du;
+ unsigned char *dvz = i / 2 * duvp + dv;
+ for (j = -32; j < 32; j++) {
+ // 0,0 masked destination
+ make_mb_mask(dyz + j, duz + j / 2, dvz + j / 2, dym, duvm, dyp, duvp, sgm, count);
+
+ e = unmasked_sad(y, yp, dyz + j, dyp, dym);
+
+ if (e < beste) {
+ bui = i;
+ buj = j;
+ beste = e;
+ }
+ }
+ }
+ // bui=0;buj=0;
+ // best mv masked destination
+ make_mb_mask(dy + bui * dyp + buj, du + bui / 2 * duvp + buj / 2, dv + bui / 2 * duvp + buj / 2,
+ dym, duvm, dyp, duvp, sgm, count);
+
+ obeste = beste;
+ beste = 0xffffffff;
+
+ // find best masked
+ for (i = -32; i < 32; i++) {
+ unsigned char *dyz = i * dyp + dy;
+ for (j = -32; j < 32; j++) {
+ e = masked_sad(y, yp, dyz + j, dyp, dym);
+
+ if (e < beste) {
+ bmi = i;
+ bmj = j;
+ beste = e;
+ }
+ }
+ }
+ beste1 = beste + obeste;
+ bmi1 = bmi;
+ bmj1 = bmj;
+ bui1 = bui;
+ buj1 = buj;
+
+ beste = 0xffffffff;
+ // source mask
+ make_mb_mask(y, u, v, ym, uvm, yp, uvp, sgm, count);
+
+ // find best mask
+ for (i = -32; i < 32; i++) {
+ unsigned char *dyz = i * dyp + dy;
+ unsigned char *duz = i / 2 * duvp + du;
+ unsigned char *dvz = i / 2 * duvp + dv;
+ for (j = -32; j < 32; j++) {
+ // 0,0 masked destination
+ make_mb_mask(dyz + j, duz + j / 2, dvz + j / 2, dym, duvm, dyp, duvp, sgm, count);
+
+ e = compare_masks(ym, dym);
+
+ if (e < beste) {
+ bmi = i;
+ bmj = j;
+ beste = e;
+ }
+ }
+ }
+
+
+ // best mv masked destination
+ make_mb_mask(dy + bmi * dyp + bmj, du + bmi / 2 * duvp + bmj / 2, dv + bmi / 2 * duvp + bmj / 2,
+ dym, duvm, dyp, duvp, sgm, count);
+
+ obeste = masked_sad(y, yp, dy + bmi * dyp + bmj, dyp, dym);
+
+ beste = 0xffffffff;
+
+ // find best unmasked mv
+ for (i = -32; i < 32; i++) {
+ unsigned char *dyz = i * dyp + dy;
+ for (j = -32; j < 32; j++) {
+ e = unmasked_sad(y, yp, dyz + j, dyp, dym);
+
+ if (e < beste) {
+ bui = i;
+ buj = j;
+ beste = e;
+ }
+ }
+ }
+ beste += obeste;
+
+
+ if (beste < beste1) {
+ *mi = bmi;
+ *mj = bmj;
+ *ui = bui;
+ *uj = buj;
+ *wm = 1;
+ } else {
+ *mi = bmi1;
+ *mj = bmj1;
+ *ui = bui1;
+ *uj = buj1;
+ *wm = 0;
+
+ }
+ return 0;
+}
+
+int predict(unsigned char *src, int p, unsigned char *dst, int dp,
+ unsigned char *ym, unsigned char *prd) {
+ int i, j;
+ for (i = 0; i < 16; i++, src += p, dst += dp, ym += 16, prd += 16)
+ for (j = 0; j < 16; j++)
+ prd[j] = (ym[j] ? src[j] : dst[j]);
+ return 0;
+}
+
+int fast_masked_motion_search(unsigned char *y, unsigned char *u, unsigned char *v,
+ int yp, int uvp,
+ unsigned char *dy, unsigned char *du, unsigned char *dv,
+ int dyp, int duvp,
+ COLOR_SEG_ELEMENT sgm[],
+ int count,
+ int *mi,
+ int *mj,
+ int *ui,
+ int *uj,
+ int *wm) {
+ int i, j;
+
+ unsigned char ym[256];
+ unsigned char ym2[256];
+ unsigned char uvm[64];
+ unsigned char dym2[256];
+ unsigned char dym[256];
+ unsigned char duvm[64];
+ unsigned int e = 0;
+ int beste = 256;
+ int bmi = -32, bmj = -32;
+ int bui = -32, buj = -32;
+ int beste1 = 256;
+ int bmi1 = -32, bmj1 = -32;
+ int bui1 = -32, buj1 = -32;
+ int obeste;
+
+ // first try finding best mask and then unmasked
+ beste = 0xffffffff;
+
+#if 0
+ for (i = 0; i < 16; i++) {
+ unsigned char *dy = i * yp + y;
+ for (j = 0; j < 16; j++)
+ printf("%2x", dy[j]);
+ printf("\n");
+ }
+ printf("\n");
+
+ for (i = -32; i < 48; i++) {
+ unsigned char *dyz = i * dyp + dy;
+ for (j = -32; j < 48; j++)
+ printf("%2x", dyz[j]);
+ printf("\n");
+ }
+#endif
+
+ // find best unmasked mv
+ for (i = -32; i < 32; i++) {
+ unsigned char *dyz = i * dyp + dy;
+ unsigned char *duz = i / 2 * duvp + du;
+ unsigned char *dvz = i / 2 * duvp + dv;
+ for (j = -32; j < 32; j++) {
+ // 0,0 masked destination
+ vp8_makemask_sse3(dyz + j, duz + j / 2, dvz + j / 2, dym, dyp, duvp,
+ sgm[0].y, sgm[0].u, sgm[0].v,
+ sgm[0].yt, sgm[0].ut, sgm[0].vt);
+
+ vp8_growmaskmb_sse3(dym, dym2);
+
+ e = vp9_sad16x16_unmasked_wmt(y, yp, dyz + j, dyp, dym2);
+
+ if (e < beste) {
+ bui = i;
+ buj = j;
+ beste = e;
+ }
+ }
+ }
+ // bui=0;buj=0;
+ // best mv masked destination
+
+ vp8_makemask_sse3(dy + bui * dyp + buj, du + bui / 2 * duvp + buj / 2, dv + bui / 2 * duvp + buj / 2,
+ dym, dyp, duvp,
+ sgm[0].y, sgm[0].u, sgm[0].v,
+ sgm[0].yt, sgm[0].ut, sgm[0].vt);
+
+ vp8_growmaskmb_sse3(dym, dym2);
+
+ obeste = beste;
+ beste = 0xffffffff;
+
+ // find best masked
+ for (i = -32; i < 32; i++) {
+ unsigned char *dyz = i * dyp + dy;
+ for (j = -32; j < 32; j++) {
+ e = vp9_sad16x16_masked_wmt(y, yp, dyz + j, dyp, dym2);
+ if (e < beste) {
+ bmi = i;
+ bmj = j;
+ beste = e;
+ }
+ }
+ }
+ beste1 = beste + obeste;
+ bmi1 = bmi;
+ bmj1 = bmj;
+ bui1 = bui;
+ buj1 = buj;
+
+ // source mask
+ vp8_makemask_sse3(y, u, v,
+ ym, yp, uvp,
+ sgm[0].y, sgm[0].u, sgm[0].v,
+ sgm[0].yt, sgm[0].ut, sgm[0].vt);
+
+ vp8_growmaskmb_sse3(ym, ym2);
+
+ // find best mask
+ for (i = -32; i < 32; i++) {
+ unsigned char *dyz = i * dyp + dy;
+ unsigned char *duz = i / 2 * duvp + du;
+ unsigned char *dvz = i / 2 * duvp + dv;
+ for (j = -32; j < 32; j++) {
+ // 0,0 masked destination
+ vp8_makemask_sse3(dyz + j, duz + j / 2, dvz + j / 2, dym, dyp, duvp,
+ sgm[0].y, sgm[0].u, sgm[0].v,
+ sgm[0].yt, sgm[0].ut, sgm[0].vt);
+
+ vp8_growmaskmb_sse3(dym, dym2);
+
+ e = compare_masks(ym2, dym2);
+
+ if (e < beste) {
+ bmi = i;
+ bmj = j;
+ beste = e;
+ }
+ }
+ }
+
+ vp8_makemask_sse3(dy + bmi * dyp + bmj, du + bmi / 2 * duvp + bmj / 2, dv + bmi / 2 * duvp + bmj / 2,
+ dym, dyp, duvp,
+ sgm[0].y, sgm[0].u, sgm[0].v,
+ sgm[0].yt, sgm[0].ut, sgm[0].vt);
+
+ vp8_growmaskmb_sse3(dym, dym2);
+
+ obeste = vp9_sad16x16_masked_wmt(y, yp, dy + bmi * dyp + bmj, dyp, dym2);
+
+ beste = 0xffffffff;
+
+ // find best unmasked mv
+ for (i = -32; i < 32; i++) {
+ unsigned char *dyz = i * dyp + dy;
+ for (j = -32; j < 32; j++) {
+ e = vp9_sad16x16_unmasked_wmt(y, yp, dyz + j, dyp, dym2);
+
+ if (e < beste) {
+ bui = i;
+ buj = j;
+ beste = e;
+ }
+ }
+ }
+ beste += obeste;
+
+ if (beste < beste1) {
+ *mi = bmi;
+ *mj = bmj;
+ *ui = bui;
+ *uj = buj;
+ *wm = 1;
+ } else {
+ *mi = bmi1;
+ *mj = bmj1;
+ *ui = bui1;
+ *uj = buj1;
+ *wm = 0;
+ beste = beste1;
+
+ }
+ return beste;
+}
+
+int predict_all(unsigned char *ym, unsigned char *um, unsigned char *vm,
+ int ymp, int uvmp,
+ unsigned char *yp, unsigned char *up, unsigned char *vp,
+ int ypp, int uvpp,
+ COLOR_SEG_ELEMENT sgm[],
+ int count,
+ int mi,
+ int mj,
+ int ui,
+ int uj,
+ int wm) {
+ int i, j;
+ unsigned char dym[256];
+ unsigned char dym2[256];
+ unsigned char duvm[64];
+ unsigned char *yu = ym, *uu = um, *vu = vm;
+
+ unsigned char *dym3 = dym2;
+
+ ym += mi * ymp + mj;
+ um += mi / 2 * uvmp + mj / 2;
+ vm += mi / 2 * uvmp + mj / 2;
+
+ yu += ui * ymp + uj;
+ uu += ui / 2 * uvmp + uj / 2;
+ vu += ui / 2 * uvmp + uj / 2;
+
+ // best mv masked destination
+ if (wm)
+ vp8_makemask_sse3(ym, um, vm, dym, ymp, uvmp,
+ sgm[0].y, sgm[0].u, sgm[0].v,
+ sgm[0].yt, sgm[0].ut, sgm[0].vt);
+ else
+ vp8_makemask_sse3(yu, uu, vu, dym, ymp, uvmp,
+ sgm[0].y, sgm[0].u, sgm[0].v,
+ sgm[0].yt, sgm[0].ut, sgm[0].vt);
+
+ vp8_growmaskmb_sse3(dym, dym2);
+ vp8_masked_predictor_wmt(ym, yu, ymp, yp, ypp, dym3);
+ vp8_uv_from_y_mask(dym3, duvm);
+ vp8_masked_predictor_uv_wmt(um, uu, uvmp, up, uvpp, duvm);
+ vp8_masked_predictor_uv_wmt(vm, vu, uvmp, vp, uvpp, duvm);
+
+ return 0;
+}
+
+unsigned char f0p[1280 * 720 * 3 / 2];
+unsigned char f1p[1280 * 720 * 3 / 2];
+unsigned char prd[1280 * 720 * 3 / 2];
+unsigned char msk[1280 * 720 * 3 / 2];
+
+
+int mainz(int argc, char *argv[]) {
+
+ FILE *f = fopen(argv[1], "rb");
+ FILE *g = fopen(argv[2], "wb");
+ int w = atoi(argv[3]), h = atoi(argv[4]);
+ int y_stride = w, uv_stride = w / 2;
+ int r, c;
+ unsigned char *f0 = f0p, *f1 = f1p, *t;
+ unsigned char ym[256], uvm[64];
+ unsigned char ym2[256], uvm2[64];
+ unsigned char ym3[256], uvm3[64];
+ int a, b;
+
+ COLOR_SEG_ELEMENT last = { 20, 20, 20, 20, 230, 20, 1}, best;
+#if 0
+ makeneighbors();
+ COLOR_SEG_ELEMENT segmentation[] = {
+ { 60, 4, 80, 17, 80, 10, 1},
+ { 40, 4, 15, 10, 80, 10, 1},
+ };
+ make_mb_mask(y, u, v, ym2, uvm2, 16, 8, segmentation, 1);
+
+ vp8_makemask_sse3(y, u, v, ym, (int) 16, (int) 8,
+ (int) segmentation[0].y, (int) segmentation[0].u, (int) segmentation[0].v,
+ segmentation[0].yt, segmentation[0].ut, segmentation[0].vt);
+
+ vp8_growmaskmb_sse3(ym, ym3);
+
+ a = vp9_sad16x16_masked_wmt(str, 16, sts, 16, ym3);
+ b = vp9_sad16x16_unmasked_wmt(str, 16, sts, 16, ym3);
+
+ vp8_masked_predictor_wmt(str, sts, 16, ym, 16, ym3);
+
+ vp8_uv_from_y_mask(ym3, uvm3);
+
+ return 4;
+#endif
+ makeneighbors();
+
+
+ memset(prd, 128, w * h * 3 / 2);
+
+ fread(f0, w * h * 3 / 2, 1, f);
+
+ while (!feof(f)) {
+ unsigned char *ys = f1, *yd = f0, *yp = prd;
+ unsigned char *us = f1 + w * h, *ud = f0 + w * h, *up = prd + w * h;
+ unsigned char *vs = f1 + w * h * 5 / 4, *vd = f0 + w * h * 5 / 4, *vp = prd + w * h * 5 / 4;
+ fread(f1, w * h * 3 / 2, 1, f);
+
+ ys += 32 * y_stride;
+ yd += 32 * y_stride;
+ yp += 32 * y_stride;
+ us += 16 * uv_stride;
+ ud += 16 * uv_stride;
+ up += 16 * uv_stride;
+ vs += 16 * uv_stride;
+ vd += 16 * uv_stride;
+ vp += 16 * uv_stride;
+ for (r = 32; r < h - 32; r += 16,
+ ys += 16 * w, yd += 16 * w, yp += 16 * w,
+ us += 8 * uv_stride, ud += 8 * uv_stride, up += 8 * uv_stride,
+ vs += 8 * uv_stride, vd += 8 * uv_stride, vp += 8 * uv_stride) {
+ for (c = 32; c < w - 32; c += 16) {
+ int mi, mj, ui, uj, wm;
+ int bmi, bmj, bui, buj, bwm;
+ unsigned char ym[256];
+
+ if (vp9_sad16x16_sse3(ys + c, y_stride, yd + c, y_stride, 0xffff) == 0)
+ bmi = bmj = bui = buj = bwm = 0;
+ else {
+ COLOR_SEG_ELEMENT cs[5];
+ int j;
+ unsigned int beste = 0xfffffff;
+ unsigned int bestj = 0;
+
+ // try color from last mb segmentation
+ cs[0] = last;
+
+ // try color segs from 4 pixels in mb recon as segmentation
+ cs[1].y = yd[c + y_stride + 1];
+ cs[1].u = ud[c / 2 + uv_stride];
+ cs[1].v = vd[c / 2 + uv_stride];
+ cs[1].yt = cs[1].ut = cs[1].vt = 20;
+ cs[2].y = yd[c + w + 14];
+ cs[2].u = ud[c / 2 + uv_stride + 7];
+ cs[2].v = vd[c / 2 + uv_stride + 7];
+ cs[2].yt = cs[2].ut = cs[2].vt = 20;
+ cs[3].y = yd[c + w * 14 + 1];
+ cs[3].u = ud[c / 2 + uv_stride * 7];
+ cs[3].v = vd[c / 2 + uv_stride * 7];
+ cs[3].yt = cs[3].ut = cs[3].vt = 20;
+ cs[4].y = yd[c + w * 14 + 14];
+ cs[4].u = ud[c / 2 + uv_stride * 7 + 7];
+ cs[4].v = vd[c / 2 + uv_stride * 7 + 7];
+ cs[4].yt = cs[4].ut = cs[4].vt = 20;
+
+ for (j = 0; j < 5; j++) {
+ int e;
+
+ e = fast_masked_motion_search(
+ ys + c, us + c / 2, vs + c / 2, y_stride, uv_stride,
+ yd + c, ud + c / 2, vd + c / 2, y_stride, uv_stride,
+ &cs[j], 1, &mi, &mj, &ui, &uj, &wm);
+
+ if (e < beste) {
+ bmi = mi;
+ bmj = mj;
+ bui = ui;
+ buj = uj, bwm = wm;
+ bestj = j;
+ beste = e;
+ }
+ }
+ best = cs[bestj];
+ // best = segmentation[0];
+ last = best;
+ }
+ predict_all(yd + c, ud + c / 2, vd + c / 2, w, uv_stride,
+ yp + c, up + c / 2, vp + c / 2, w, uv_stride,
+ &best, 1, bmi, bmj, bui, buj, bwm);
+
+ }
+ }
+ fwrite(prd, w * h * 3 / 2, 1, g);
+ t = f0;
+ f0 = f1;
+ f1 = t;
+
+ }
+ fclose(f);
+ fclose(g);
+ return;
+}
diff --git a/vp9/common/vp9_mbpitch.c b/vp9/common/vp9_mbpitch.c
new file mode 100644
index 0000000..1107402
--- /dev/null
+++ b/vp9/common/vp9_mbpitch.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/common/vp9_blockd.h"
+
+typedef enum {
+ PRED = 0,
+ DEST = 1
+} BLOCKSET;
+
+static void setup_block
+(
+ BLOCKD *b,
+ int mv_stride,
+ unsigned char **base,
+ unsigned char **base2,
+ int Stride,
+ int offset,
+ BLOCKSET bs
+) {
+
+ if (bs == DEST) {
+ b->dst_stride = Stride;
+ b->dst = offset;
+ b->base_dst = base;
+ } else {
+ b->pre_stride = Stride;
+ b->pre = offset;
+ b->base_pre = base;
+ b->base_second_pre = base2;
+ }
+
+}
+
+
+static void setup_macroblock(MACROBLOCKD *xd, BLOCKSET bs) {
+ int block;
+
+ unsigned char **y, **u, **v;
+ unsigned char **y2 = NULL, **u2 = NULL, **v2 = NULL;
+ BLOCKD *blockd = xd->block;
+ int stride;
+
+ if (bs == DEST) {
+ y = &xd->dst.y_buffer;
+ u = &xd->dst.u_buffer;
+ v = &xd->dst.v_buffer;
+ } else {
+ y = &xd->pre.y_buffer;
+ u = &xd->pre.u_buffer;
+ v = &xd->pre.v_buffer;
+
+ y2 = &xd->second_pre.y_buffer;
+ u2 = &xd->second_pre.u_buffer;
+ v2 = &xd->second_pre.v_buffer;
+ }
+
+ stride = xd->dst.y_stride;
+ for (block = 0; block < 16; block++) { /* y blocks */
+ setup_block(&blockd[block], stride, y, y2, stride,
+ (block >> 2) * 4 * stride + (block & 3) * 4, bs);
+ }
+
+ stride = xd->dst.uv_stride;
+ for (block = 16; block < 20; block++) { /* U and V blocks */
+ setup_block(&blockd[block], stride, u, u2, stride,
+ ((block - 16) >> 1) * 4 * stride + (block & 1) * 4, bs);
+
+ setup_block(&blockd[block + 4], stride, v, v2, stride,
+ ((block - 16) >> 1) * 4 * stride + (block & 1) * 4, bs);
+ }
+}
+
+void vp9_setup_block_dptrs(MACROBLOCKD *xd) {
+ int r, c;
+ BLOCKD *blockd = xd->block;
+
+ for (r = 0; r < 4; r++) {
+ for (c = 0; c < 4; c++) {
+ blockd[r * 4 + c].diff = &xd->diff[r * 4 * 16 + c * 4];
+ blockd[r * 4 + c].predictor = xd->predictor + r * 4 * 16 + c * 4;
+ }
+ }
+
+ for (r = 0; r < 2; r++) {
+ for (c = 0; c < 2; c++) {
+ blockd[16 + r * 2 + c].diff = &xd->diff[256 + r * 4 * 8 + c * 4];
+ blockd[16 + r * 2 + c].predictor =
+ xd->predictor + 256 + r * 4 * 8 + c * 4;
+
+ }
+ }
+
+ for (r = 0; r < 2; r++) {
+ for (c = 0; c < 2; c++) {
+ blockd[20 + r * 2 + c].diff = &xd->diff[320 + r * 4 * 8 + c * 4];
+ blockd[20 + r * 2 + c].predictor =
+ xd->predictor + 320 + r * 4 * 8 + c * 4;
+
+ }
+ }
+
+ blockd[24].diff = &xd->diff[384];
+
+ for (r = 0; r < 25; r++) {
+ blockd[r].qcoeff = xd->qcoeff + r * 16;
+ blockd[r].dqcoeff = xd->dqcoeff + r * 16;
+ }
+}
+
+void vp9_build_block_doffsets(MACROBLOCKD *xd) {
+
+ /* handle the destination pitch features */
+ setup_macroblock(xd, DEST);
+ setup_macroblock(xd, PRED);
+}
diff --git a/vp9/common/vp9_modecont.c b/vp9/common/vp9_modecont.c
new file mode 100644
index 0000000..f7f2b90
--- /dev/null
+++ b/vp9/common/vp9_modecont.c
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/common/vp9_entropy.h"
+
+const int vp9_default_mode_contexts[INTER_MODE_CONTEXTS][4] = {
+ {223, 1, 1, 237}, // 0,0 best: Only candidate
+ {87, 166, 26, 219}, // 0,0 best: non zero candidates
+ {89, 67, 18, 125}, // 0,0 best: non zero candidates, split
+ {16, 141, 69, 226}, // strong nz candidate(s), no split
+ {35, 122, 14, 227}, // weak nz candidate(s), no split
+ {14, 122, 22, 164}, // strong nz candidate(s), split
+ {16, 70, 9, 183}, // weak nz candidate(s), split
+};
diff --git a/vp9/common/vp9_modecont.h b/vp9/common/vp9_modecont.h
new file mode 100644
index 0000000..122eb12
--- /dev/null
+++ b/vp9/common/vp9_modecont.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_COMMON_VP9_MODECONT_H_
+#define VP9_COMMON_VP9_MODECONT_H_
+
+extern const int vp9_default_mode_contexts[INTER_MODE_CONTEXTS][4];
+#endif
diff --git a/vp9/common/vp9_modecontext.c b/vp9/common/vp9_modecontext.c
new file mode 100644
index 0000000..ab91c0b
--- /dev/null
+++ b/vp9/common/vp9_modecontext.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/common/vp9_entropymode.h"
+
+const unsigned int vp9_kf_default_bmode_counts[VP9_KF_BINTRAMODES]
+ [VP9_KF_BINTRAMODES]
+ [VP9_KF_BINTRAMODES] = {
+ {
+ /*Above Mode : 0*/
+ { 43438, 2195, 470, 316, 615, 171, 217, 412, 124, 160, }, /* left_mode 0 */
+ { 5722, 2751, 296, 291, 81, 68, 80, 101, 100, 170, }, /* left_mode 1 */
+ { 1629, 201, 307, 25, 47, 16, 34, 72, 19, 28, }, /* left_mode 2 */
+ { 332, 266, 36, 500, 20, 65, 23, 14, 154, 106, }, /* left_mode 3 */
+ { 450, 97, 10, 24, 117, 10, 2, 12, 8, 71, }, /* left_mode 4 */
+ { 384, 49, 29, 44, 12, 162, 51, 5, 87, 42, }, /* left_mode 5 */
+ { 495, 53, 157, 27, 14, 57, 180, 17, 17, 34, }, /* left_mode 6 */
+ { 695, 64, 62, 9, 27, 5, 3, 147, 10, 26, }, /* left_mode 7 */
+ { 230, 54, 20, 124, 16, 125, 29, 12, 283, 37, }, /* left_mode 8 */
+ { 260, 87, 21, 120, 32, 16, 33, 16, 33, 203, }, /* left_mode 9 */
+ },
+ {
+ /*Above Mode : 1*/
+ { 3934, 2573, 355, 137, 128, 87, 133, 117, 37, 27, }, /* left_mode 0 */
+ { 1036, 1929, 278, 135, 27, 37, 48, 55, 41, 91, }, /* left_mode 1 */
+ { 223, 256, 253, 15, 13, 9, 28, 64, 3, 3, }, /* left_mode 2 */
+ { 120, 129, 17, 316, 15, 11, 9, 4, 53, 74, }, /* left_mode 3 */
+ { 129, 58, 6, 11, 38, 2, 0, 5, 2, 67, }, /* left_mode 4 */
+ { 53, 22, 11, 16, 8, 26, 14, 3, 19, 12, }, /* left_mode 5 */
+ { 59, 26, 61, 11, 4, 9, 35, 13, 8, 8, }, /* left_mode 6 */
+ { 101, 52, 40, 8, 5, 2, 8, 59, 2, 20, }, /* left_mode 7 */
+ { 48, 34, 10, 52, 8, 15, 6, 6, 63, 20, }, /* left_mode 8 */
+ { 96, 48, 22, 63, 11, 14, 5, 8, 9, 96, }, /* left_mode 9 */
+ },
+ {
+ /*Above Mode : 2*/
+ { 709, 461, 506, 36, 27, 33, 151, 98, 24, 6, }, /* left_mode 0 */
+ { 201, 375, 442, 27, 13, 8, 46, 58, 6, 19, }, /* left_mode 1 */
+ { 122, 140, 417, 4, 13, 3, 33, 59, 4, 2, }, /* left_mode 2 */
+ { 36, 17, 22, 16, 6, 8, 12, 17, 9, 21, }, /* left_mode 3 */
+ { 51, 15, 7, 1, 14, 0, 4, 5, 3, 22, }, /* left_mode 4 */
+ { 18, 11, 30, 9, 7, 20, 11, 5, 2, 6, }, /* left_mode 5 */
+ { 38, 21, 103, 9, 4, 12, 79, 13, 2, 5, }, /* left_mode 6 */
+ { 64, 17, 66, 2, 12, 4, 2, 65, 4, 5, }, /* left_mode 7 */
+ { 14, 7, 7, 16, 3, 11, 4, 13, 15, 16, }, /* left_mode 8 */
+ { 36, 8, 32, 9, 9, 4, 14, 7, 6, 24, }, /* left_mode 9 */
+ },
+ {
+ /*Above Mode : 3*/
+ { 1340, 173, 36, 119, 30, 10, 13, 10, 20, 26, }, /* left_mode 0 */
+ { 156, 293, 26, 108, 5, 16, 2, 4, 23, 30, }, /* left_mode 1 */
+ { 60, 34, 13, 7, 3, 3, 0, 8, 4, 5, }, /* left_mode 2 */
+ { 72, 64, 1, 235, 3, 9, 2, 7, 28, 38, }, /* left_mode 3 */
+ { 29, 14, 1, 3, 5, 0, 2, 2, 5, 13, }, /* left_mode 4 */
+ { 22, 7, 4, 11, 2, 5, 1, 2, 6, 4, }, /* left_mode 5 */
+ { 18, 14, 5, 6, 4, 3, 14, 0, 9, 2, }, /* left_mode 6 */
+ { 41, 10, 7, 1, 2, 0, 0, 10, 2, 1, }, /* left_mode 7 */
+ { 23, 19, 2, 33, 1, 5, 2, 0, 51, 8, }, /* left_mode 8 */
+ { 33, 26, 7, 53, 3, 9, 3, 3, 9, 19, }, /* left_mode 9 */
+ },
+ {
+ /*Above Mode : 4*/
+ { 410, 165, 43, 31, 66, 15, 30, 54, 8, 17, }, /* left_mode 0 */
+ { 115, 64, 27, 18, 30, 7, 11, 15, 4, 19, }, /* left_mode 1 */
+ { 31, 23, 25, 1, 7, 2, 2, 10, 0, 5, }, /* left_mode 2 */
+ { 17, 4, 1, 6, 8, 2, 7, 5, 5, 21, }, /* left_mode 3 */
+ { 120, 12, 1, 2, 83, 3, 0, 4, 1, 40, }, /* left_mode 4 */
+ { 4, 3, 1, 2, 1, 2, 5, 0, 3, 6, }, /* left_mode 5 */
+ { 10, 2, 13, 6, 6, 6, 8, 2, 4, 5, }, /* left_mode 6 */
+ { 58, 10, 5, 1, 28, 1, 1, 33, 1, 9, }, /* left_mode 7 */
+ { 8, 2, 1, 4, 2, 5, 1, 1, 2, 10, }, /* left_mode 8 */
+ { 76, 7, 5, 7, 18, 2, 2, 0, 5, 45, }, /* left_mode 9 */
+ },
+ {
+ /*Above Mode : 5*/
+ { 444, 46, 47, 20, 14, 110, 60, 14, 60, 7, }, /* left_mode 0 */
+ { 59, 57, 25, 18, 3, 17, 21, 6, 14, 6, }, /* left_mode 1 */
+ { 24, 17, 20, 6, 4, 13, 7, 2, 3, 2, }, /* left_mode 2 */
+ { 13, 11, 5, 14, 4, 9, 2, 4, 15, 7, }, /* left_mode 3 */
+ { 8, 5, 2, 1, 4, 0, 1, 1, 2, 12, }, /* left_mode 4 */
+ { 19, 5, 5, 7, 4, 40, 6, 3, 10, 4, }, /* left_mode 5 */
+ { 16, 5, 9, 1, 1, 16, 26, 2, 10, 4, }, /* left_mode 6 */
+ { 11, 4, 8, 1, 1, 4, 4, 5, 4, 1, }, /* left_mode 7 */
+ { 15, 1, 3, 7, 3, 21, 7, 1, 34, 5, }, /* left_mode 8 */
+ { 18, 5, 1, 3, 4, 3, 7, 1, 2, 9, }, /* left_mode 9 */
+ },
+ {
+ /*Above Mode : 6*/
+ { 476, 149, 94, 13, 14, 77, 291, 27, 23, 3, }, /* left_mode 0 */
+ { 79, 83, 42, 14, 2, 12, 63, 2, 4, 14, }, /* left_mode 1 */
+ { 43, 36, 55, 1, 3, 8, 42, 11, 5, 1, }, /* left_mode 2 */
+ { 9, 9, 6, 16, 1, 5, 6, 3, 11, 10, }, /* left_mode 3 */
+ { 10, 3, 1, 3, 10, 1, 0, 1, 1, 4, }, /* left_mode 4 */
+ { 14, 6, 15, 5, 1, 20, 25, 2, 5, 0, }, /* left_mode 5 */
+ { 28, 7, 51, 1, 0, 8, 127, 6, 2, 5, }, /* left_mode 6 */
+ { 13, 3, 3, 2, 3, 1, 2, 8, 1, 2, }, /* left_mode 7 */
+ { 10, 3, 3, 3, 3, 8, 2, 2, 9, 3, }, /* left_mode 8 */
+ { 13, 7, 11, 4, 0, 4, 6, 2, 5, 8, }, /* left_mode 9 */
+ },
+ {
+ /*Above Mode : 7*/
+ { 376, 135, 119, 6, 32, 8, 31, 224, 9, 3, }, /* left_mode 0 */
+ { 93, 60, 54, 6, 13, 7, 8, 92, 2, 12, }, /* left_mode 1 */
+ { 74, 36, 84, 0, 3, 2, 9, 67, 2, 1, }, /* left_mode 2 */
+ { 19, 4, 4, 8, 8, 2, 4, 7, 6, 16, }, /* left_mode 3 */
+ { 51, 7, 4, 1, 77, 3, 0, 14, 1, 15, }, /* left_mode 4 */
+ { 7, 7, 5, 7, 4, 7, 4, 5, 0, 3, }, /* left_mode 5 */
+ { 18, 2, 19, 2, 2, 4, 12, 11, 1, 2, }, /* left_mode 6 */
+ { 129, 6, 27, 1, 21, 3, 0, 189, 0, 6, }, /* left_mode 7 */
+ { 9, 1, 2, 8, 3, 7, 0, 5, 3, 3, }, /* left_mode 8 */
+ { 20, 4, 5, 10, 4, 2, 7, 17, 3, 16, }, /* left_mode 9 */
+ },
+ {
+ /*Above Mode : 8*/
+ { 617, 68, 34, 79, 11, 27, 25, 14, 75, 13, }, /* left_mode 0 */
+ { 51, 82, 21, 26, 6, 12, 13, 1, 26, 16, }, /* left_mode 1 */
+ { 29, 9, 12, 11, 3, 7, 1, 10, 2, 2, }, /* left_mode 2 */
+ { 17, 19, 11, 74, 4, 3, 2, 0, 58, 13, }, /* left_mode 3 */
+ { 10, 1, 1, 3, 4, 1, 0, 2, 1, 8, }, /* left_mode 4 */
+ { 14, 4, 5, 5, 1, 13, 2, 0, 27, 8, }, /* left_mode 5 */
+ { 10, 3, 5, 4, 1, 7, 6, 4, 5, 1, }, /* left_mode 6 */
+ { 10, 2, 6, 2, 1, 1, 1, 4, 2, 1, }, /* left_mode 7 */
+ { 14, 8, 5, 23, 2, 12, 6, 2, 117, 5, }, /* left_mode 8 */
+ { 9, 6, 2, 19, 1, 6, 3, 2, 9, 9, }, /* left_mode 9 */
+ },
+ {
+ /*Above Mode : 9*/
+ { 680, 73, 22, 38, 42, 5, 11, 9, 6, 28, }, /* left_mode 0 */
+ { 113, 112, 21, 22, 10, 2, 8, 4, 6, 42, }, /* left_mode 1 */
+ { 44, 20, 24, 6, 5, 4, 3, 3, 1, 2, }, /* left_mode 2 */
+ { 40, 23, 7, 71, 5, 2, 4, 1, 7, 22, }, /* left_mode 3 */
+ { 85, 9, 4, 4, 17, 2, 0, 3, 2, 23, }, /* left_mode 4 */
+ { 13, 4, 2, 6, 1, 7, 0, 1, 7, 6, }, /* left_mode 5 */
+ { 26, 6, 8, 3, 2, 3, 8, 1, 5, 4, }, /* left_mode 6 */
+ { 54, 8, 9, 6, 7, 0, 1, 11, 1, 3, }, /* left_mode 7 */
+ { 9, 10, 4, 13, 2, 5, 4, 2, 14, 8, }, /* left_mode 8 */
+ { 92, 9, 5, 19, 15, 3, 3, 1, 6, 58, }, /* left_mode 9 */
+ },
+};
diff --git a/vp9/common/vp9_mv.h b/vp9/common/vp9_mv.h
new file mode 100644
index 0000000..f084b11
--- /dev/null
+++ b/vp9/common/vp9_mv.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_COMMON_VP9_MV_H_
+#define VP9_COMMON_VP9_MV_H_
+#include "vpx/vpx_integer.h"
+
+typedef struct {
+ short row;
+ short col;
+} MV;
+
+typedef union int_mv {
+ uint32_t as_int;
+ MV as_mv;
+} int_mv; /* facilitates faster equality tests and copies */
+
+#endif
diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c
new file mode 100644
index 0000000..bf60630
--- /dev/null
+++ b/vp9/common/vp9_mvref_common.c
@@ -0,0 +1,398 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/vp9_mvref_common.h"
+
+#define MVREF_NEIGHBOURS 8
+static int mb_mv_ref_search[MVREF_NEIGHBOURS][2] = {
+ {0, -1}, {-1, 0}, {-1, -1}, {0, -2},
+ {-2, 0}, {-1, -2}, {-2, -1}, {-2, -2}
+};
+static int mb_ref_distance_weight[MVREF_NEIGHBOURS] =
+ { 3, 3, 2, 1, 1, 1, 1, 1 };
+#if CONFIG_SUPERBLOCKS
+static int sb_mv_ref_search[MVREF_NEIGHBOURS][2] = {
+ {0, -1}, {-1, 0}, {1, -1}, {-1, 1},
+ {-1, -1}, {0, -2}, {-2, 0}, {-1, -2}
+};
+static int sb_ref_distance_weight[MVREF_NEIGHBOURS] =
+ { 3, 3, 2, 2, 2, 1, 1, 1 };
+#endif
+// clamp_mv
+#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units
+static void clamp_mv(const MACROBLOCKD *xd, int_mv *mv) {
+
+ if (mv->as_mv.col < (xd->mb_to_left_edge - MV_BORDER))
+ mv->as_mv.col = xd->mb_to_left_edge - MV_BORDER;
+ else if (mv->as_mv.col > xd->mb_to_right_edge + MV_BORDER)
+ mv->as_mv.col = xd->mb_to_right_edge + MV_BORDER;
+
+ if (mv->as_mv.row < (xd->mb_to_top_edge - MV_BORDER))
+ mv->as_mv.row = xd->mb_to_top_edge - MV_BORDER;
+ else if (mv->as_mv.row > xd->mb_to_bottom_edge + MV_BORDER)
+ mv->as_mv.row = xd->mb_to_bottom_edge + MV_BORDER;
+}
+
+
+// Gets a best matching candidate refenence motion vector
+// from the given mode info structure (if available)
+static int get_candidate_mvref(
+ const MODE_INFO *candidate_mi,
+ MV_REFERENCE_FRAME ref_frame,
+ MV_REFERENCE_FRAME *c_ref_frame,
+ int_mv *c_mv,
+ MV_REFERENCE_FRAME *c2_ref_frame,
+ int_mv *c2_mv
+) {
+
+ int ret_val = FALSE;
+ c2_mv->as_int = 0;
+ *c2_ref_frame = INTRA_FRAME;
+
+ // Target ref frame matches candidate first ref frame
+ if (ref_frame == candidate_mi->mbmi.ref_frame) {
+ c_mv->as_int = candidate_mi->mbmi.mv[0].as_int;
+ *c_ref_frame = ref_frame;
+ ret_val = TRUE;
+
+ // Is there a second non zero vector we can use.
+ if ((candidate_mi->mbmi.second_ref_frame > INTRA_FRAME) &&
+ (candidate_mi->mbmi.mv[1].as_int != 0) &&
+ (candidate_mi->mbmi.mv[1].as_int != c_mv->as_int)) {
+ c2_mv->as_int = candidate_mi->mbmi.mv[1].as_int;
+ *c2_ref_frame = candidate_mi->mbmi.second_ref_frame;
+ }
+
+ // Target ref frame matches candidate second ref frame
+ } else if (ref_frame == candidate_mi->mbmi.second_ref_frame) {
+ c_mv->as_int = candidate_mi->mbmi.mv[1].as_int;
+ *c_ref_frame = ref_frame;
+ ret_val = TRUE;
+
+ // Is there a second non zero vector we can use.
+ if ((candidate_mi->mbmi.ref_frame > INTRA_FRAME) &&
+ (candidate_mi->mbmi.mv[0].as_int != 0) &&
+ (candidate_mi->mbmi.mv[0].as_int != c_mv->as_int)) {
+ c2_mv->as_int = candidate_mi->mbmi.mv[0].as_int;
+ *c2_ref_frame = candidate_mi->mbmi.ref_frame;
+ }
+
+ // No ref frame matches so use first ref mv as first choice
+ } else if (candidate_mi->mbmi.ref_frame > INTRA_FRAME) {
+ c_mv->as_int = candidate_mi->mbmi.mv[0].as_int;
+ *c_ref_frame = candidate_mi->mbmi.ref_frame;
+ ret_val = TRUE;
+
+ // Is there a second non zero vector we can use.
+ if ((candidate_mi->mbmi.second_ref_frame > INTRA_FRAME) &&
+ (candidate_mi->mbmi.mv[1].as_int != 0) &&
+ (candidate_mi->mbmi.mv[1].as_int != c_mv->as_int)) {
+ c2_mv->as_int = candidate_mi->mbmi.mv[1].as_int;
+ *c2_ref_frame = candidate_mi->mbmi.second_ref_frame;
+ }
+
+ // If only the second ref mv is valid:- (Should not trigger in current code
+ // base given current possible compound prediction options).
+ } else if (candidate_mi->mbmi.second_ref_frame > INTRA_FRAME) {
+ c_mv->as_int = candidate_mi->mbmi.mv[1].as_int;
+ *c_ref_frame = candidate_mi->mbmi.second_ref_frame;
+ ret_val = TRUE;
+ }
+
+ return ret_val;
+}
+
+// Performs mv adjustment based on reference frame and clamps the MV
+// if it goes off the edge of the buffer.
+static void scale_mv(
+ MACROBLOCKD *xd,
+ MV_REFERENCE_FRAME this_ref_frame,
+ MV_REFERENCE_FRAME candidate_ref_frame,
+ int_mv *candidate_mv,
+ int *ref_sign_bias
+) {
+
+ if (candidate_ref_frame != this_ref_frame) {
+
+ //int frame_distances[MAX_REF_FRAMES];
+ //int last_distance = 1;
+ //int gf_distance = xd->frames_since_golden;
+ //int arf_distance = xd->frames_till_alt_ref_frame;
+
+ // Sign inversion where appropriate.
+ if (ref_sign_bias[candidate_ref_frame] != ref_sign_bias[this_ref_frame]) {
+ candidate_mv->as_mv.row = -candidate_mv->as_mv.row;
+ candidate_mv->as_mv.col = -candidate_mv->as_mv.col;
+ }
+
+ // Scale based on frame distance if the reference frames not the same.
+ /*frame_distances[INTRA_FRAME] = 1; // should never be used
+ frame_distances[LAST_FRAME] = 1;
+ frame_distances[GOLDEN_FRAME] =
+ (xd->frames_since_golden) ? xd->frames_since_golden : 1;
+ frame_distances[ALTREF_FRAME] =
+ (xd->frames_till_alt_ref_frame) ? xd->frames_till_alt_ref_frame : 1;
+
+ if (frame_distances[this_ref_frame] &&
+ frame_distances[candidate_ref_frame]) {
+ candidate_mv->as_mv.row =
+ (short)(((int)(candidate_mv->as_mv.row) *
+ frame_distances[this_ref_frame]) /
+ frame_distances[candidate_ref_frame]);
+
+ candidate_mv->as_mv.col =
+ (short)(((int)(candidate_mv->as_mv.col) *
+ frame_distances[this_ref_frame]) /
+ frame_distances[candidate_ref_frame]);
+ }
+ */
+ }
+
+ // Clamp the MV so it does not point out of the frame buffer
+ clamp_mv(xd, candidate_mv);
+}
+
+// Adds a new candidate reference vector to the list if indeed it is new.
+// If it is not new then the score of the existing candidate that it matches
+// is increased and the list is resorted.
+static void addmv_and_shuffle(
+ int_mv *mv_list,
+ int *mv_scores,
+ int *index,
+ int_mv candidate_mv,
+ int weight
+) {
+
+ int i = *index;
+ int duplicate_found = FALSE;
+
+ // Check for duplicates. If there is one increment its score.
+ // Duplicate defined as being the same full pel vector with rounding.
+ while (i > 0) {
+ i--;
+
+ if (candidate_mv.as_int == mv_list[i].as_int) {
+ duplicate_found = TRUE;
+ mv_scores[i] += weight;
+ break;
+ }
+ }
+
+ // If no duplicate was found add the new vector and give it a weight
+ if (!duplicate_found) {
+ mv_list[*index].as_int = candidate_mv.as_int;
+ mv_scores[*index] = weight;
+ i = *index;
+ (*index)++;
+ }
+
+ // Reshuffle the list so that highest scoring mvs at the top.
+ while (i > 0) {
+ if (mv_scores[i] > mv_scores[i-1]) {
+ int tmp_score = mv_scores[i-1];
+ int_mv tmp_mv = mv_list[i-1];
+
+ mv_scores[i-1] = mv_scores[i];
+ mv_list[i-1] = mv_list[i];
+ mv_scores[i] = tmp_score;
+ mv_list[i] = tmp_mv;
+ i--;
+ } else
+ break;
+ }
+}
+
+// This function searches the neighbourhood of a given MB/SB and populates a
+// list of candidate reference vectors.
+//
+void vp9_find_mv_refs(
+ MACROBLOCKD *xd,
+ MODE_INFO *here,
+ MODE_INFO *lf_here,
+ MV_REFERENCE_FRAME ref_frame,
+ int_mv *mv_ref_list,
+ int *ref_sign_bias
+) {
+
+ int i;
+ MODE_INFO *candidate_mi;
+ MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi;
+ int_mv candidate_mvs[MAX_MV_REFS];
+ int_mv c_refmv;
+ MV_REFERENCE_FRAME c_ref_frame;
+ int_mv c2_refmv;
+ MV_REFERENCE_FRAME c2_ref_frame;
+ int candidate_scores[MAX_MV_REFS];
+ int index = 0;
+ int split_count = 0;
+ int ref_weight = 0;
+ int valid_mv_ref;
+ int (*mv_ref_search)[2];
+ int *ref_distance_weight;
+
+ // Blank the reference vector lists and other local structures.
+ vpx_memset(mv_ref_list, 0, sizeof(int_mv) * MAX_MV_REFS);
+ vpx_memset(candidate_mvs, 0, sizeof(int_mv) * MAX_MV_REFS);
+ vpx_memset(candidate_scores, 0, sizeof(candidate_scores));
+
+#if CONFIG_SUPERBLOCKS
+ if (mbmi->encoded_as_sb) {
+ mv_ref_search = sb_mv_ref_search;
+ ref_distance_weight = sb_ref_distance_weight;
+ } else {
+ mv_ref_search = mb_mv_ref_search;
+ ref_distance_weight = mb_ref_distance_weight;
+ }
+#else
+ mv_ref_search = mb_mv_ref_search;
+ ref_distance_weight = mb_ref_distance_weight;
+#endif
+ // Populate a list with candidate reference vectors from the
+ // spatial neighbours.
+ for (i = 0; i < 2; ++i) {
+ if (((mv_ref_search[i][0] << 7) >= xd->mb_to_left_edge) &&
+ ((mv_ref_search[i][1] << 7) >= xd->mb_to_top_edge)) {
+
+ candidate_mi = here + mv_ref_search[i][0] +
+ (mv_ref_search[i][1] * xd->mode_info_stride);
+
+ valid_mv_ref = get_candidate_mvref(candidate_mi, ref_frame,
+ &c_ref_frame, &c_refmv,
+ &c2_ref_frame, &c2_refmv);
+
+ // If there is a valid MV candidate then add it to the list
+ if (valid_mv_ref) {
+ scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias );
+ ref_weight = ref_distance_weight[i] +
+ ((c_ref_frame == ref_frame) << 4);
+ split_count += (candidate_mi->mbmi.mode == SPLITMV);
+
+ addmv_and_shuffle(candidate_mvs, candidate_scores,
+ &index, c_refmv, ref_weight);
+
+ // If there is a second valid mv then add it as well.
+ if (c2_ref_frame > INTRA_FRAME) {
+ scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias );
+ ref_weight = ref_distance_weight[i] +
+ ((c2_ref_frame == ref_frame) << 4);
+
+ addmv_and_shuffle(candidate_mvs, candidate_scores,
+ &index, c2_refmv, ref_weight);
+ }
+ }
+ }
+ }
+
+ // Look at the corresponding vector in the last frame
+ candidate_mi = lf_here;
+ valid_mv_ref = get_candidate_mvref(candidate_mi, ref_frame,
+ &c_ref_frame, &c_refmv,
+ &c2_ref_frame, &c2_refmv);
+
+ // If there is a valid MV candidate then add it to the list
+ if (valid_mv_ref) {
+ scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias );
+ ref_weight = 2 + ((c_ref_frame == ref_frame) << 4);
+ addmv_and_shuffle(candidate_mvs, candidate_scores,
+ &index, c_refmv, ref_weight);
+
+ // If there is a second valid mv then add it as well.
+ if (c2_ref_frame > INTRA_FRAME) {
+ scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias );
+ ref_weight = ref_distance_weight[i] +
+ ((c2_ref_frame == ref_frame) << 4);
+
+ addmv_and_shuffle(candidate_mvs, candidate_scores,
+ &index, c2_refmv, ref_weight);
+ }
+ }
+
+ // Populate a list with candidate reference vectors from the
+ // spatial neighbours.
+ for (i = 2; (i < MVREF_NEIGHBOURS) && (index < (MAX_MV_REFS - 2)); ++i) {
+ if (((mv_ref_search[i][0] << 7) >= xd->mb_to_left_edge) &&
+ ((mv_ref_search[i][1] << 7) >= xd->mb_to_top_edge)) {
+
+ candidate_mi = here + mv_ref_search[i][0] +
+ (mv_ref_search[i][1] * xd->mode_info_stride);
+
+ valid_mv_ref = get_candidate_mvref(candidate_mi, ref_frame,
+ &c_ref_frame, &c_refmv,
+ &c2_ref_frame, &c2_refmv);
+
+ // If there is a valid MV candidate then add it to the list
+ if (valid_mv_ref) {
+ scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias );
+ ref_weight = ref_distance_weight[i] +
+ ((c_ref_frame == ref_frame) << 4);
+
+ addmv_and_shuffle(candidate_mvs, candidate_scores,
+ &index, c_refmv, ref_weight);
+
+ // If there is a second valid mv then add it as well.
+ if (c2_ref_frame > INTRA_FRAME) {
+ scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias );
+ ref_weight = ref_distance_weight[i] +
+ ((c2_ref_frame == ref_frame) << 4);
+
+ addmv_and_shuffle(candidate_mvs, candidate_scores,
+ &index, c2_refmv, ref_weight);
+ }
+ }
+ }
+ }
+
+ // Make sure we are able to add 0,0
+ if (index > (MAX_MV_REFS - 1)) {
+ index = (MAX_MV_REFS - 1);
+ }
+
+ // Define inter mode coding context.
+ // 0,0 was best
+ if (candidate_mvs[0].as_int == 0) {
+ // 0,0 is only candidate
+ if (index <= 1) {
+ mbmi->mb_mode_context[ref_frame] = 0;
+ // non zero candidates candidates available
+ } else if (split_count == 0) {
+ mbmi->mb_mode_context[ref_frame] = 1;
+ } else {
+ mbmi->mb_mode_context[ref_frame] = 2;
+ }
+ // Non zero best, No Split MV cases
+ } else if (split_count == 0) {
+ if (candidate_scores[0] >= 32) {
+ mbmi->mb_mode_context[ref_frame] = 3;
+ } else {
+ mbmi->mb_mode_context[ref_frame] = 4;
+ }
+ // Non zero best, some split mv
+ } else {
+ if (candidate_scores[0] >= 32) {
+ mbmi->mb_mode_context[ref_frame] = 5;
+ } else {
+ mbmi->mb_mode_context[ref_frame] = 6;
+ }
+ }
+
+ // 0,0 is always a valid reference.
+ for (i = 0; i < index; ++i) {
+ if (candidate_mvs[i].as_int == 0)
+ break;
+ }
+ if (i == index) {
+ c_refmv.as_int = 0;
+ addmv_and_shuffle(candidate_mvs, candidate_scores,
+ &index, c_refmv, candidate_scores[3]+1 );
+ }
+
+ // Copy over the candidate list.
+ vpx_memcpy(mv_ref_list, candidate_mvs, sizeof(candidate_mvs));
+}
diff --git a/vp9/common/vp9_mvref_common.h b/vp9/common/vp9_mvref_common.h
new file mode 100644
index 0000000..1938352
--- /dev/null
+++ b/vp9/common/vp9_mvref_common.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/common/vp9_blockd.h"
+
+
+#ifndef VP9_COMMON_VP9_MVREF_COMMON_H_
+#define VP9_COMMON_VP9_MVREF_COMMON_H_
+
+void vp9_find_mv_refs(
+ MACROBLOCKD *xd,
+ MODE_INFO *here,
+ MODE_INFO *lf_here,
+ MV_REFERENCE_FRAME ref_frame,
+ int_mv * mv_ref_list,
+ int *ref_sign_bias
+);
+
+#endif
+
diff --git a/vp9/common/vp9_onyx.h b/vp9/common/vp9_onyx.h
new file mode 100644
index 0000000..a6bd951
--- /dev/null
+++ b/vp9/common/vp9_onyx.h
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_COMMON_VP9_ONYX_H_
+#define VP9_COMMON_VP9_ONYX_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "vpx/internal/vpx_codec_internal.h"
+#include "vpx/vp8cx.h"
+#include "vpx_scale/yv12config.h"
+#include "vp9/common/vp9_type_aliases.h"
+#include "vp9/common/vp9_ppflags.h"
+ typedef int *VP9_PTR;
+
+ /* Create/destroy static data structures. */
+
+ typedef enum {
+ NORMAL = 0,
+ FOURFIVE = 1,
+ THREEFIVE = 2,
+ ONETWO = 3
+
+ } VPX_SCALING;
+
+ typedef enum {
+ VP9_LAST_FLAG = 1,
+ VP9_GOLD_FLAG = 2,
+ VP9_ALT_FLAG = 4
+ } VP9_REFFRAME;
+
+
+ typedef enum {
+ USAGE_STREAM_FROM_SERVER = 0x0,
+ USAGE_LOCAL_FILE_PLAYBACK = 0x1,
+ USAGE_CONSTRAINED_QUALITY = 0x2
+ } END_USAGE;
+
+
+ typedef enum {
+ MODE_GOODQUALITY = 0x1,
+ MODE_BESTQUALITY = 0x2,
+ MODE_FIRSTPASS = 0x3,
+ MODE_SECONDPASS = 0x4,
+ MODE_SECONDPASS_BEST = 0x5,
+ } MODE;
+
+ typedef enum {
+ FRAMEFLAGS_KEY = 1,
+ FRAMEFLAGS_GOLDEN = 2,
+ FRAMEFLAGS_ALTREF = 4,
+ } FRAMETYPE_FLAGS;
+
+
+#include <assert.h>
+ static __inline void Scale2Ratio(int mode, int *hr, int *hs) {
+ switch (mode) {
+ case NORMAL:
+ *hr = 1;
+ *hs = 1;
+ break;
+ case FOURFIVE:
+ *hr = 4;
+ *hs = 5;
+ break;
+ case THREEFIVE:
+ *hr = 3;
+ *hs = 5;
+ break;
+ case ONETWO:
+ *hr = 1;
+ *hs = 2;
+ break;
+ default:
+ *hr = 1;
+ *hs = 1;
+ assert(0);
+ break;
+ }
+ }
+
+ typedef struct {
+ int Version; // 4 versions of bitstream defined 0 best quality/slowest decode, 3 lowest quality/fastest decode
+ int Width; // width of data passed to the compressor
+ int Height; // height of data passed to the compressor
+ double frame_rate; // set to passed in framerate
+ int target_bandwidth; // bandwidth to be used in kilobits per second
+
+ int noise_sensitivity; // parameter used for applying pre processing blur: recommendation 0
+ int Sharpness; // parameter used for sharpening output: recommendation 0:
+ int cpu_used;
+ unsigned int rc_max_intra_bitrate_pct;
+
+ // mode ->
+ // (0)=Realtime/Live Encoding. This mode is optimized for realtim encoding (for example, capturing
+ // a television signal or feed from a live camera). ( speed setting controls how fast )
+ // (1)=Good Quality Fast Encoding. The encoder balances quality with the amount of time it takes to
+ // encode the output. ( speed setting controls how fast )
+ // (2)=One Pass - Best Quality. The encoder places priority on the quality of the output over encoding
+ // speed. The output is compressed at the highest possible quality. This option takes the longest
+ // amount of time to encode. ( speed setting ignored )
+ // (3)=Two Pass - First Pass. The encoder generates a file of statistics for use in the second encoding
+ // pass. ( speed setting controls how fast )
+ // (4)=Two Pass - Second Pass. The encoder uses the statistics that were generated in the first encoding
+ // pass to create the compressed output. ( speed setting controls how fast )
+ // (5)=Two Pass - Second Pass Best. The encoder uses the statistics that were generated in the first
+ // encoding pass to create the compressed output using the highest possible quality, and taking a
+ // longer amount of time to encode.. ( speed setting ignored )
+ int Mode; //
+
+ // Key Framing Operations
+ int auto_key; // automatically detect cut scenes and set the keyframes
+ int key_freq; // maximum distance to key frame.
+
+ int allow_lag; // allow lagged compression (if 0 lagin frames is ignored)
+ int lag_in_frames; // how many frames lag before we start encoding
+
+ // ----------------------------------------------------------------
+ // DATARATE CONTROL OPTIONS
+
+ int end_usage; // vbr or cbr
+
+ // buffer targeting aggressiveness
+ int under_shoot_pct;
+ int over_shoot_pct;
+
+ // buffering parameters
+ int starting_buffer_level; // in seconds
+ int optimal_buffer_level;
+ int maximum_buffer_size;
+
+ // controlling quality
+ int fixed_q;
+ int worst_allowed_q;
+ int best_allowed_q;
+ int cq_level;
+ int lossless;
+
+ // two pass datarate control
+ int two_pass_vbrbias; // two pass datarate control tweaks
+ int two_pass_vbrmin_section;
+ int two_pass_vbrmax_section;
+ // END DATARATE CONTROL OPTIONS
+ // ----------------------------------------------------------------
+
+
+ // these parameters aren't to be used in final build don't use!!!
+ int play_alternate;
+ int alt_freq;
+
+ int encode_breakout; // early breakout encode threshold : for video conf recommend 800
+
+ int arnr_max_frames;
+ int arnr_strength;
+ int arnr_type;
+
+ struct vpx_fixed_buf two_pass_stats_in;
+ struct vpx_codec_pkt_list *output_pkt_list;
+
+ vp8e_tuning tuning;
+ } VP9_CONFIG;
+
+
+ void vp9_initialize_enc();
+
+ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf);
+ void vp9_remove_compressor(VP9_PTR *comp);
+
+ void vp9_change_config(VP9_PTR onyx, VP9_CONFIG *oxcf);
+
+// receive a frames worth of data caller can assume that a copy of this frame is made
+// and not just a copy of the pointer..
+ int vp9_receive_raw_frame(VP9_PTR comp, unsigned int frame_flags,
+ YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
+ int64_t end_time_stamp);
+
+ int vp9_get_compressed_data(VP9_PTR comp, unsigned int *frame_flags,
+ unsigned long *size, unsigned char *dest,
+ int64_t *time_stamp, int64_t *time_end,
+ int flush);
+
+ int vp9_get_preview_raw_frame(VP9_PTR comp, YV12_BUFFER_CONFIG *dest,
+ vp9_ppflags_t *flags);
+
+ int vp9_use_as_reference(VP9_PTR comp, int ref_frame_flags);
+
+ int vp9_update_reference(VP9_PTR comp, int ref_frame_flags);
+
+ int vp9_get_reference_enc(VP9_PTR comp, VP9_REFFRAME ref_frame_flag,
+ YV12_BUFFER_CONFIG *sd);
+
+ int vp9_set_reference_enc(VP9_PTR comp, VP9_REFFRAME ref_frame_flag,
+ YV12_BUFFER_CONFIG *sd);
+
+ int vp9_update_entropy(VP9_PTR comp, int update);
+
+ int vp9_set_roimap(VP9_PTR comp, unsigned char *map,
+ unsigned int rows, unsigned int cols,
+ int delta_q[4], int delta_lf[4],
+ unsigned int threshold[4]);
+
+ int vp9_set_active_map(VP9_PTR comp, unsigned char *map,
+ unsigned int rows, unsigned int cols);
+
+ int vp9_set_internal_size(VP9_PTR comp,
+ VPX_SCALING horiz_mode, VPX_SCALING vert_mode);
+
+ int vp9_get_quantizer(VP9_PTR c);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __INC_ONYX_H
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
new file mode 100644
index 0000000..0b6de7f
--- /dev/null
+++ b/vp9/common/vp9_onyxc_int.h
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_COMMON_VP9_ONYXC_INT_H_
+#define VP9_COMMON_VP9_ONYXC_INT_H_
+
+#include "vpx_config.h"
+#include "vpx/internal/vpx_codec_internal.h"
+#include "vp9_rtcd.h"
+#include "vp9/common/vp9_loopfilter.h"
+#include "vp9/common/vp9_entropymv.h"
+#include "vp9/common/vp9_entropy.h"
+#include "vp9/common/vp9_entropymode.h"
+#if CONFIG_POSTPROC
+#include "vp9/common/vp9_postproc.h"
+#endif
+
+/*#ifdef PACKET_TESTING*/
+#include "vp9/common/vp9_header.h"
+/*#endif*/
+
+/* Create/destroy static data structures. */
+
+void vp9_initialize_common(void);
+
+#define MINQ 0
+
+#define MAXQ 255
+#define QINDEX_BITS 8
+
+#define QINDEX_RANGE (MAXQ + 1)
+
+#define NUM_YV12_BUFFERS 4
+
+#define COMP_PRED_CONTEXTS 2
+
+typedef struct frame_contexts {
+ vp9_prob bmode_prob[VP9_NKF_BINTRAMODES - 1];
+ vp9_prob ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */
+#if CONFIG_SUPERBLOCKS
+ vp9_prob sb_ymode_prob[VP9_I32X32_MODES - 1];
+#endif
+ vp9_prob uv_mode_prob[VP9_YMODES][VP9_UV_MODES - 1];
+ vp9_prob i8x8_mode_prob[VP9_I8X8_MODES - 1];
+ vp9_prob sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1];
+ vp9_prob mbsplit_prob[VP9_NUMMBSPLITS - 1];
+ vp9_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+ vp9_prob hybrid_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+ vp9_prob coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+ vp9_prob hybrid_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+ vp9_prob coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+ vp9_prob hybrid_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+
+ nmv_context nmvc;
+ nmv_context pre_nmvc;
+ vp9_prob pre_bmode_prob[VP9_NKF_BINTRAMODES - 1];
+ vp9_prob pre_ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */
+#if CONFIG_SUPERBLOCKS
+ vp9_prob pre_sb_ymode_prob[VP9_I32X32_MODES - 1];
+#endif
+ vp9_prob pre_uv_mode_prob[VP9_YMODES][VP9_UV_MODES - 1];
+ vp9_prob pre_i8x8_mode_prob[VP9_I8X8_MODES - 1];
+ vp9_prob pre_sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1];
+ vp9_prob pre_mbsplit_prob[VP9_NUMMBSPLITS - 1];
+ unsigned int bmode_counts[VP9_NKF_BINTRAMODES];
+ unsigned int ymode_counts[VP9_YMODES]; /* interframe intra mode probs */
+#if CONFIG_SUPERBLOCKS
+ unsigned int sb_ymode_counts[VP9_I32X32_MODES];
+#endif
+ unsigned int uv_mode_counts[VP9_YMODES][VP9_UV_MODES];
+ unsigned int i8x8_mode_counts[VP9_I8X8_MODES]; /* interframe intra probs */
+ unsigned int sub_mv_ref_counts[SUBMVREF_COUNT][VP9_SUBMVREFS];
+ unsigned int mbsplit_counts[VP9_NUMMBSPLITS];
+
+ vp9_prob pre_coef_probs [BLOCK_TYPES] [COEF_BANDS]
+ [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+ vp9_prob pre_hybrid_coef_probs [BLOCK_TYPES] [COEF_BANDS]
+ [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+
+ vp9_prob pre_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS]
+ [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+ vp9_prob pre_hybrid_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS]
+ [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+
+ vp9_prob pre_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS]
+ [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+ vp9_prob pre_hybrid_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS]
+ [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+
+ unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS]
+ [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
+ unsigned int hybrid_coef_counts [BLOCK_TYPES] [COEF_BANDS]
+ [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
+
+ unsigned int coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS]
+ [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
+ unsigned int hybrid_coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS]
+ [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
+
+ unsigned int coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS]
+ [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
+ unsigned int hybrid_coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS]
+ [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
+
+ nmv_context_counts NMVcount;
+ vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1]
+ [VP9_SWITCHABLE_FILTERS - 1];
+#if CONFIG_COMP_INTERINTRA_PRED
+ unsigned int interintra_counts[2];
+ vp9_prob interintra_prob;
+ vp9_prob pre_interintra_prob;
+#endif
+
+ int vp9_mode_contexts[INTER_MODE_CONTEXTS][4];
+ unsigned int mv_ref_ct[INTER_MODE_CONTEXTS][4][2];
+} FRAME_CONTEXT;
+
+typedef enum {
+ RECON_CLAMP_REQUIRED = 0,
+ RECON_CLAMP_NOTREQUIRED = 1
+} CLAMP_TYPE;
+
+typedef enum {
+ SINGLE_PREDICTION_ONLY = 0,
+ COMP_PREDICTION_ONLY = 1,
+ HYBRID_PREDICTION = 2,
+ NB_PREDICTION_TYPES = 3,
+} COMPPREDMODE_TYPE;
+
+typedef enum {
+ ONLY_4X4 = 0,
+ ALLOW_8X8 = 1,
+ ALLOW_16X16 = 2,
+ TX_MODE_SELECT = 3,
+ NB_TXFM_MODES = 4,
+} TXFM_MODE;
+
+typedef struct VP9Common {
+ struct vpx_internal_error_info error;
+
+ DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][16]);
+
+ int Width;
+ int Height;
+ int horiz_scale;
+ int vert_scale;
+
+ YUV_TYPE clr_type;
+ CLAMP_TYPE clamp_type;
+
+ YV12_BUFFER_CONFIG *frame_to_show;
+
+ YV12_BUFFER_CONFIG yv12_fb[NUM_YV12_BUFFERS];
+ int fb_idx_ref_cnt[NUM_YV12_BUFFERS];
+ int new_fb_idx, lst_fb_idx, gld_fb_idx, alt_fb_idx;
+
+ YV12_BUFFER_CONFIG post_proc_buffer;
+ YV12_BUFFER_CONFIG temp_scale_frame;
+
+
+ FRAME_TYPE last_frame_type; /* Save last frame's frame type for motion search. */
+ FRAME_TYPE frame_type;
+
+ int show_frame;
+
+ int frame_flags;
+ int MBs;
+ int mb_rows;
+ int mb_cols;
+ int mode_info_stride;
+
+ /* profile settings */
+ int experimental;
+ int mb_no_coeff_skip;
+ TXFM_MODE txfm_mode;
+ COMPPREDMODE_TYPE comp_pred_mode;
+ int no_lpf;
+ int use_bilinear_mc_filter;
+ int full_pixel;
+
+ int base_qindex;
+ int last_kf_gf_q; /* Q used on the last GF or KF */
+
+ int y1dc_delta_q;
+ int y2dc_delta_q;
+ int y2ac_delta_q;
+ int uvdc_delta_q;
+ int uvac_delta_q;
+
+ unsigned int frames_since_golden;
+ unsigned int frames_till_alt_ref_frame;
+
+ /* We allocate a MODE_INFO struct for each macroblock, together with
+ an extra row on top and column on the left to simplify prediction. */
+
+ MODE_INFO *mip; /* Base of allocated array */
+ MODE_INFO *mi; /* Corresponds to upper left visible macroblock */
+ MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */
+ MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */
+
+
+ // Persistent mb segment id map used in prediction.
+ unsigned char *last_frame_seg_map;
+
+ INTERPOLATIONFILTERTYPE mcomp_filter_type;
+ LOOPFILTERTYPE filter_type;
+
+ loop_filter_info_n lf_info;
+
+ int filter_level;
+ int last_sharpness_level;
+ int sharpness_level;
+
+ int refresh_last_frame; /* Two state 0 = NO, 1 = YES */
+ int refresh_golden_frame; /* Two state 0 = NO, 1 = YES */
+ int refresh_alt_ref_frame; /* Two state 0 = NO, 1 = YES */
+
+ int copy_buffer_to_gf; /* 0 none, 1 Last to GF, 2 ARF to GF */
+ int copy_buffer_to_arf; /* 0 none, 1 Last to ARF, 2 GF to ARF */
+
+ int refresh_entropy_probs; /* Two state 0 = NO, 1 = YES */
+
+ int ref_frame_sign_bias[MAX_REF_FRAMES]; /* Two state 0, 1 */
+
+ /* Y,U,V,Y2 */
+ ENTROPY_CONTEXT_PLANES *above_context; /* row of context for each plane */
+ ENTROPY_CONTEXT_PLANES left_context[2]; /* (up to) 4 contexts "" */
+
+ /* keyframe block modes are predicted by their above, left neighbors */
+
+ vp9_prob kf_bmode_prob[VP9_KF_BINTRAMODES]
+ [VP9_KF_BINTRAMODES]
+ [VP9_KF_BINTRAMODES - 1];
+ vp9_prob kf_ymode_prob[8][VP9_YMODES - 1]; /* keyframe "" */
+#if CONFIG_SUPERBLOCKS
+ vp9_prob sb_kf_ymode_prob[8][VP9_I32X32_MODES - 1];
+#endif
+ int kf_ymode_probs_index;
+ int kf_ymode_probs_update;
+ vp9_prob kf_uv_mode_prob[VP9_YMODES] [VP9_UV_MODES - 1];
+
+ vp9_prob prob_intra_coded;
+ vp9_prob prob_last_coded;
+ vp9_prob prob_gf_coded;
+#if CONFIG_SUPERBLOCKS
+ vp9_prob sb_coded;
+#endif
+
+ // Context probabilities when using predictive coding of segment id
+ vp9_prob segment_pred_probs[PREDICTION_PROBS];
+ unsigned char temporal_update;
+
+ // Context probabilities for reference frame prediction
+ unsigned char ref_scores[MAX_REF_FRAMES];
+ vp9_prob ref_pred_probs[PREDICTION_PROBS];
+ vp9_prob mod_refprobs[MAX_REF_FRAMES][PREDICTION_PROBS];
+
+ vp9_prob prob_comppred[COMP_PRED_CONTEXTS];
+
+ // FIXME contextualize
+ vp9_prob prob_tx[TX_SIZE_MAX - 1];
+
+ vp9_prob mbskip_pred_probs[MBSKIP_CONTEXTS];
+
+ FRAME_CONTEXT lfc_a; /* last alt ref entropy */
+ FRAME_CONTEXT lfc; /* last frame entropy */
+ FRAME_CONTEXT fc; /* this frame entropy */
+
+ unsigned int current_video_frame;
+ int near_boffset[3];
+ int version;
+
+#ifdef PACKET_TESTING
+ VP9_HEADER oh;
+#endif
+ double bitrate;
+ double framerate;
+
+#if CONFIG_POSTPROC
+ struct postproc_state postproc_state;
+#endif
+
+#if CONFIG_PRED_FILTER
+ /* Prediction filter variables */
+ int pred_filter_mode; // 0=disabled at the frame level (no MB filtered)
+ // 1=enabled at the frame level (all MB filtered)
+ // 2=specified per MB (1=filtered, 0=non-filtered)
+ vp9_prob prob_pred_filter_off;
+#endif
+#if CONFIG_COMP_INTERINTRA_PRED
+ int use_interintra;
+#endif
+
+} VP9_COMMON;
+
+#endif // __INC_ONYX_INT_H
diff --git a/vp9/common/vp9_onyxd.h b/vp9/common/vp9_onyxd.h
new file mode 100644
index 0000000..063e5a8
--- /dev/null
+++ b/vp9/common/vp9_onyxd.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_COMMON_VP9_ONYXD_H_
+#define VP9_COMMON_VP9_ONYXD_H_
+
+
+/* Create/destroy static data structures. */
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+#include "vp9/common/vp9_type_aliases.h"
+#include "vpx_scale/yv12config.h"
+#include "vp9/common/vp9_ppflags.h"
+#include "vpx_ports/mem.h"
+#include "vpx/vpx_codec.h"
+
+ typedef void *VP9D_PTR;
+ typedef struct {
+ int Width;
+ int Height;
+ int Version;
+ int postprocess;
+ int max_threads;
+ int input_partition;
+ } VP9D_CONFIG;
+ typedef enum {
+ VP9_LAST_FLAG = 1,
+ VP9_GOLD_FLAG = 2,
+ VP9_ALT_FLAG = 4
+ } VP9_REFFRAME;
+
+ void vp9_initialize_dec(void);
+
+ int vp9_receive_compressed_data(VP9D_PTR comp, unsigned long size,
+ const unsigned char **dest,
+ int64_t time_stamp);
+
+ int vp9_get_raw_frame(VP9D_PTR comp, YV12_BUFFER_CONFIG *sd,
+ int64_t *time_stamp, int64_t *time_end_stamp,
+ vp9_ppflags_t *flags);
+
+ vpx_codec_err_t vp9_get_reference_dec(VP9D_PTR comp,
+ VP9_REFFRAME ref_frame_flag,
+ YV12_BUFFER_CONFIG *sd);
+
+ vpx_codec_err_t vp9_set_reference_dec(VP9D_PTR comp,
+ VP9_REFFRAME ref_frame_flag,
+ YV12_BUFFER_CONFIG *sd);
+
+ VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf);
+
+ void vp9_remove_decompressor(VP9D_PTR comp);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __INC_ONYXD_H
diff --git a/vp9/common/vp9_postproc.c b/vp9/common/vp9_postproc.c
new file mode 100644
index 0000000..192166b
--- /dev/null
+++ b/vp9/common/vp9_postproc.c
@@ -0,0 +1,1012 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "./vpx_config.h"
+#include "vpx_scale/yv12config.h"
+#include "vp9/common/vp9_postproc.h"
+#include "vp9/common/vp9_textblit.h"
+#include "vpx_scale/vpxscale.h"
+#include "vp9/common/vp9_systemdependent.h"
+#include "./vp9_rtcd.h"
+#include "./vpx_scale_rtcd.h"
+
+
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#define RGB_TO_YUV(t) \
+ ( (0.257*(float)(t >> 16)) + (0.504*(float)(t >> 8 & 0xff)) + \
+ (0.098*(float)(t & 0xff)) + 16), \
+ (-(0.148*(float)(t >> 16)) - (0.291*(float)(t >> 8 & 0xff)) + \
+ (0.439*(float)(t & 0xff)) + 128), \
+ ( (0.439*(float)(t >> 16)) - (0.368*(float)(t >> 8 & 0xff)) - \
+ (0.071*(float)(t & 0xff)) + 128)
+
+/* global constants */
+#if 0 && CONFIG_POSTPROC_VISUALIZER
+static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] = {
+ { RGB_TO_YUV(0x98FB98) }, /* PaleGreen */
+ { RGB_TO_YUV(0x00FF00) }, /* Green */
+ { RGB_TO_YUV(0xADFF2F) }, /* GreenYellow */
+ { RGB_TO_YUV(0x8F0000) }, /* Dark Red */
+ { RGB_TO_YUV(0x008F8F) }, /* Dark Cyan */
+ { RGB_TO_YUV(0x008F8F) }, /* Dark Cyan */
+ { RGB_TO_YUV(0x008F8F) }, /* Dark Cyan */
+ { RGB_TO_YUV(0x8F0000) }, /* Dark Red */
+ { RGB_TO_YUV(0x8F0000) }, /* Dark Red */
+ { RGB_TO_YUV(0x228B22) }, /* ForestGreen */
+ { RGB_TO_YUV(0x006400) }, /* DarkGreen */
+ { RGB_TO_YUV(0x98F5FF) }, /* Cadet Blue */
+ { RGB_TO_YUV(0x6CA6CD) }, /* Sky Blue */
+ { RGB_TO_YUV(0x00008B) }, /* Dark blue */
+ { RGB_TO_YUV(0x551A8B) }, /* Purple */
+ { RGB_TO_YUV(0xFF0000) } /* Red */
+ { RGB_TO_YUV(0xCC33FF) }, /* Magenta */
+};
+
+static const unsigned char B_PREDICTION_MODE_colors[B_MODE_COUNT][3] = {
+ { RGB_TO_YUV(0x6633ff) }, /* Purple */
+ { RGB_TO_YUV(0xcc33ff) }, /* Magenta */
+ { RGB_TO_YUV(0xff33cc) }, /* Pink */
+ { RGB_TO_YUV(0xff3366) }, /* Coral */
+ { RGB_TO_YUV(0x3366ff) }, /* Blue */
+ { RGB_TO_YUV(0xed00f5) }, /* Dark Blue */
+ { RGB_TO_YUV(0x2e00b8) }, /* Dark Purple */
+ { RGB_TO_YUV(0xff6633) }, /* Orange */
+ { RGB_TO_YUV(0x33ccff) }, /* Light Blue */
+ { RGB_TO_YUV(0x8ab800) }, /* Green */
+ { RGB_TO_YUV(0xffcc33) }, /* Light Orange */
+ { RGB_TO_YUV(0x33ffcc) }, /* Aqua */
+ { RGB_TO_YUV(0x66ff33) }, /* Light Green */
+ { RGB_TO_YUV(0xccff33) }, /* Yellow */
+};
+
+static const unsigned char MV_REFERENCE_FRAME_colors[MAX_REF_FRAMES][3] = {
+ { RGB_TO_YUV(0x00ff00) }, /* Blue */
+ { RGB_TO_YUV(0x0000ff) }, /* Green */
+ { RGB_TO_YUV(0xffff00) }, /* Yellow */
+ { RGB_TO_YUV(0xff0000) }, /* Red */
+};
+#endif
+
+static const short kernel5[] = {
+ 1, 1, 4, 1, 1
+};
+
+const short vp9_rv[] = {
+ 8, 5, 2, 2, 8, 12, 4, 9, 8, 3,
+ 0, 3, 9, 0, 0, 0, 8, 3, 14, 4,
+ 10, 1, 11, 14, 1, 14, 9, 6, 12, 11,
+ 8, 6, 10, 0, 0, 8, 9, 0, 3, 14,
+ 8, 11, 13, 4, 2, 9, 0, 3, 9, 6,
+ 1, 2, 3, 14, 13, 1, 8, 2, 9, 7,
+ 3, 3, 1, 13, 13, 6, 6, 5, 2, 7,
+ 11, 9, 11, 8, 7, 3, 2, 0, 13, 13,
+ 14, 4, 12, 5, 12, 10, 8, 10, 13, 10,
+ 4, 14, 4, 10, 0, 8, 11, 1, 13, 7,
+ 7, 14, 6, 14, 13, 2, 13, 5, 4, 4,
+ 0, 10, 0, 5, 13, 2, 12, 7, 11, 13,
+ 8, 0, 4, 10, 7, 2, 7, 2, 2, 5,
+ 3, 4, 7, 3, 3, 14, 14, 5, 9, 13,
+ 3, 14, 3, 6, 3, 0, 11, 8, 13, 1,
+ 13, 1, 12, 0, 10, 9, 7, 6, 2, 8,
+ 5, 2, 13, 7, 1, 13, 14, 7, 6, 7,
+ 9, 6, 10, 11, 7, 8, 7, 5, 14, 8,
+ 4, 4, 0, 8, 7, 10, 0, 8, 14, 11,
+ 3, 12, 5, 7, 14, 3, 14, 5, 2, 6,
+ 11, 12, 12, 8, 0, 11, 13, 1, 2, 0,
+ 5, 10, 14, 7, 8, 0, 4, 11, 0, 8,
+ 0, 3, 10, 5, 8, 0, 11, 6, 7, 8,
+ 10, 7, 13, 9, 2, 5, 1, 5, 10, 2,
+ 4, 3, 5, 6, 10, 8, 9, 4, 11, 14,
+ 0, 10, 0, 5, 13, 2, 12, 7, 11, 13,
+ 8, 0, 4, 10, 7, 2, 7, 2, 2, 5,
+ 3, 4, 7, 3, 3, 14, 14, 5, 9, 13,
+ 3, 14, 3, 6, 3, 0, 11, 8, 13, 1,
+ 13, 1, 12, 0, 10, 9, 7, 6, 2, 8,
+ 5, 2, 13, 7, 1, 13, 14, 7, 6, 7,
+ 9, 6, 10, 11, 7, 8, 7, 5, 14, 8,
+ 4, 4, 0, 8, 7, 10, 0, 8, 14, 11,
+ 3, 12, 5, 7, 14, 3, 14, 5, 2, 6,
+ 11, 12, 12, 8, 0, 11, 13, 1, 2, 0,
+ 5, 10, 14, 7, 8, 0, 4, 11, 0, 8,
+ 0, 3, 10, 5, 8, 0, 11, 6, 7, 8,
+ 10, 7, 13, 9, 2, 5, 1, 5, 10, 2,
+ 4, 3, 5, 6, 10, 8, 9, 4, 11, 14,
+ 3, 8, 3, 7, 8, 5, 11, 4, 12, 3,
+ 11, 9, 14, 8, 14, 13, 4, 3, 1, 2,
+ 14, 6, 5, 4, 4, 11, 4, 6, 2, 1,
+ 5, 8, 8, 12, 13, 5, 14, 10, 12, 13,
+ 0, 9, 5, 5, 11, 10, 13, 9, 10, 13,
+};
+
+
+/****************************************************************************
+ */
+void vp9_post_proc_down_and_across_c(unsigned char *src_ptr,
+ unsigned char *dst_ptr,
+ int src_pixels_per_line,
+ int dst_pixels_per_line,
+ int rows,
+ int cols,
+ int flimit) {
+ unsigned char *p_src, *p_dst;
+ int row;
+ int col;
+ int i;
+ int v;
+ int pitch = src_pixels_per_line;
+ unsigned char d[8];
+ (void)dst_pixels_per_line;
+
+ for (row = 0; row < rows; row++) {
+ /* post_proc_down for one row */
+ p_src = src_ptr;
+ p_dst = dst_ptr;
+
+ for (col = 0; col < cols; col++) {
+
+ int kernel = 4;
+ int v = p_src[col];
+
+ for (i = -2; i <= 2; i++) {
+ if (abs(v - p_src[col + i * pitch]) > flimit)
+ goto down_skip_convolve;
+
+ kernel += kernel5[2 + i] * p_src[col + i * pitch];
+ }
+
+ v = (kernel >> 3);
+ down_skip_convolve:
+ p_dst[col] = v;
+ }
+
+ /* now post_proc_across */
+ p_src = dst_ptr;
+ p_dst = dst_ptr;
+
+ for (i = 0; i < 8; i++)
+ d[i] = p_src[i];
+
+ for (col = 0; col < cols; col++) {
+ int kernel = 4;
+ v = p_src[col];
+
+ d[col & 7] = v;
+
+ for (i = -2; i <= 2; i++) {
+ if (abs(v - p_src[col + i]) > flimit)
+ goto across_skip_convolve;
+
+ kernel += kernel5[2 + i] * p_src[col + i];
+ }
+
+ d[col & 7] = (kernel >> 3);
+ across_skip_convolve:
+
+ if (col >= 2)
+ p_dst[col - 2] = d[(col - 2) & 7];
+ }
+
+ /* handle the last two pixels */
+ p_dst[col - 2] = d[(col - 2) & 7];
+ p_dst[col - 1] = d[(col - 1) & 7];
+
+
+ /* next row */
+ src_ptr += pitch;
+ dst_ptr += pitch;
+ }
+}
+
+static int q2mbl(int x) {
+ if (x < 20) x = 20;
+
+ x = 50 + (x - 50) * 10 / 8;
+ return x * x / 3;
+}
+
+void vp9_mbpost_proc_across_ip_c(unsigned char *src, int pitch,
+ int rows, int cols, int flimit) {
+ int r, c, i;
+
+ unsigned char *s = src;
+ unsigned char d[16];
+
+
+ for (r = 0; r < rows; r++) {
+ int sumsq = 0;
+ int sum = 0;
+
+ for (i = -8; i <= 6; i++) {
+ sumsq += s[i] * s[i];
+ sum += s[i];
+ d[i + 8] = 0;
+ }
+
+ for (c = 0; c < cols + 8; c++) {
+ int x = s[c + 7] - s[c - 8];
+ int y = s[c + 7] + s[c - 8];
+
+ sum += x;
+ sumsq += x * y;
+
+ d[c & 15] = s[c];
+
+ if (sumsq * 15 - sum * sum < flimit) {
+ d[c & 15] = (8 + sum + s[c]) >> 4;
+ }
+
+ s[c - 8] = d[(c - 8) & 15];
+ }
+
+ s += pitch;
+ }
+}
+
+void vp9_mbpost_proc_down_c(unsigned char *dst, int pitch,
+ int rows, int cols, int flimit) {
+ int r, c, i;
+ const short *rv3 = &vp9_rv[63 & rand()];
+
+ for (c = 0; c < cols; c++) {
+ unsigned char *s = &dst[c];
+ int sumsq = 0;
+ int sum = 0;
+ unsigned char d[16];
+ const short *rv2 = rv3 + ((c * 17) & 127);
+
+ for (i = -8; i <= 6; i++) {
+ sumsq += s[i * pitch] * s[i * pitch];
+ sum += s[i * pitch];
+ }
+
+ for (r = 0; r < rows + 8; r++) {
+ sumsq += s[7 * pitch] * s[ 7 * pitch] - s[-8 * pitch] * s[-8 * pitch];
+ sum += s[7 * pitch] - s[-8 * pitch];
+ d[r & 15] = s[0];
+
+ if (sumsq * 15 - sum * sum < flimit) {
+ d[r & 15] = (rv2[r & 127] + sum + s[0]) >> 4;
+ }
+
+ s[-8 * pitch] = d[(r - 8) & 15];
+ s += pitch;
+ }
+ }
+}
+
+static void deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source,
+ YV12_BUFFER_CONFIG *post,
+ int q,
+ int low_var_thresh,
+ int flag) {
+ double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065;
+ int ppl = (int)(level + .5);
+ (void) low_var_thresh;
+ (void) flag;
+
+ vp9_post_proc_down_and_across(source->y_buffer, post->y_buffer,
+ source->y_stride, post->y_stride,
+ source->y_height, source->y_width, ppl);
+
+ vp9_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height,
+ post->y_width, q2mbl(q));
+
+ vp9_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height,
+ post->y_width, q2mbl(q));
+
+ vp9_post_proc_down_and_across(source->u_buffer, post->u_buffer,
+ source->uv_stride, post->uv_stride,
+ source->uv_height, source->uv_width, ppl);
+ vp9_post_proc_down_and_across(source->v_buffer, post->v_buffer,
+ source->uv_stride, post->uv_stride,
+ source->uv_height, source->uv_width, ppl);
+}
+
+void vp9_deblock(YV12_BUFFER_CONFIG *source,
+ YV12_BUFFER_CONFIG *post,
+ int q,
+ int low_var_thresh,
+ int flag) {
+ double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065;
+ int ppl = (int)(level + .5);
+ (void) low_var_thresh;
+ (void) flag;
+
+ vp9_post_proc_down_and_across(source->y_buffer, post->y_buffer,
+ source->y_stride, post->y_stride,
+ source->y_height, source->y_width, ppl);
+
+ vp9_post_proc_down_and_across(source->u_buffer, post->u_buffer,
+ source->uv_stride, post->uv_stride,
+ source->uv_height, source->uv_width, ppl);
+
+ vp9_post_proc_down_and_across(source->v_buffer, post->v_buffer,
+ source->uv_stride, post->uv_stride,
+ source->uv_height, source->uv_width, ppl);
+}
+
+void vp9_de_noise(YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *post,
+ int q,
+ int low_var_thresh,
+ int flag) {
+ double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065;
+ int ppl = (int)(level + .5);
+ (void) post;
+ (void) low_var_thresh;
+ (void) flag;
+
+ vp9_post_proc_down_and_across(src->y_buffer + 2 * src->y_stride + 2,
+ src->y_buffer + 2 * src->y_stride + 2,
+ src->y_stride, src->y_stride, src->y_height - 4,
+ src->y_width - 4, ppl);
+
+ vp9_post_proc_down_and_across(src->u_buffer + 2 * src->uv_stride + 2,
+ src->u_buffer + 2 * src->uv_stride + 2,
+ src->uv_stride, src->uv_stride,
+ src->uv_height - 4, src->uv_width - 4, ppl);
+
+ vp9_post_proc_down_and_across(src->v_buffer + 2 * src->uv_stride + 2,
+ src->v_buffer + 2 * src->uv_stride + 2,
+ src->uv_stride, src->uv_stride,
+ src->uv_height - 4, src->uv_width - 4, ppl);
+}
+
+double vp9_gaussian(double sigma, double mu, double x) {
+ return 1 / (sigma * sqrt(2.0 * 3.14159265)) *
+ (exp(-(x - mu) * (x - mu) / (2 * sigma * sigma)));
+}
+
+static void fillrd(struct postproc_state *state, int q, int a) {
+ char char_dist[300];
+
+ double sigma;
+ int ai = a, qi = q, i;
+
+ vp9_clear_system_state();
+
+ sigma = ai + .5 + .6 * (63 - qi) / 63.0;
+
+ /* set up a lookup table of 256 entries that matches
+ * a gaussian distribution with sigma determined by q.
+ */
+ {
+ double i;
+ int next, j;
+
+ next = 0;
+
+ for (i = -32; i < 32; i++) {
+ int a = (int)(.5 + 256 * vp9_gaussian(sigma, 0, i));
+
+ if (a) {
+ for (j = 0; j < a; j++) {
+ char_dist[next + j] = (char) i;
+ }
+
+ next = next + j;
+ }
+
+ }
+
+ for (next = next; next < 256; next++)
+ char_dist[next] = 0;
+ }
+
+ for (i = 0; i < 3072; i++) {
+ state->noise[i] = char_dist[rand() & 0xff];
+ }
+
+ for (i = 0; i < 16; i++) {
+ state->blackclamp[i] = -char_dist[0];
+ state->whiteclamp[i] = -char_dist[0];
+ state->bothclamp[i] = -2 * char_dist[0];
+ }
+
+ state->last_q = q;
+ state->last_noise = a;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : plane_add_noise_c
+ *
+ * INPUTS : unsigned char *Start starting address of buffer to
+ * add gaussian noise to
+ * unsigned int Width width of plane
+ * unsigned int Height height of plane
+ * int Pitch distance between subsequent lines of frame
+ * int q quantizer used to determine amount of noise
+ * to add
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void.
+ *
+ * FUNCTION : adds gaussian noise to a plane of pixels
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void vp9_plane_add_noise_c(unsigned char *Start, char *noise,
+ char blackclamp[16],
+ char whiteclamp[16],
+ char bothclamp[16],
+ unsigned int Width, unsigned int Height, int Pitch) {
+ unsigned int i, j;
+
+ for (i = 0; i < Height; i++) {
+ unsigned char *Pos = Start + i * Pitch;
+ char *Ref = (char *)(noise + (rand() & 0xff));
+
+ for (j = 0; j < Width; j++) {
+ if (Pos[j] < blackclamp[0])
+ Pos[j] = blackclamp[0];
+
+ if (Pos[j] > 255 + whiteclamp[0])
+ Pos[j] = 255 + whiteclamp[0];
+
+ Pos[j] += Ref[j];
+ }
+ }
+}
+
+/* Blend the macro block with a solid colored square. Leave the
+ * edges unblended to give distinction to macro blocks in areas
+ * filled with the same color block.
+ */
+void vp9_blend_mb_inner_c(unsigned char *y, unsigned char *u, unsigned char *v,
+ int y1, int u1, int v1, int alpha, int stride) {
+ int i, j;
+ int y1_const = y1 * ((1 << 16) - alpha);
+ int u1_const = u1 * ((1 << 16) - alpha);
+ int v1_const = v1 * ((1 << 16) - alpha);
+
+ y += 2 * stride + 2;
+ for (i = 0; i < 12; i++) {
+ for (j = 0; j < 12; j++) {
+ y[j] = (y[j] * alpha + y1_const) >> 16;
+ }
+ y += stride;
+ }
+
+ stride >>= 1;
+
+ u += stride + 1;
+ v += stride + 1;
+
+ for (i = 0; i < 6; i++) {
+ for (j = 0; j < 6; j++) {
+ u[j] = (u[j] * alpha + u1_const) >> 16;
+ v[j] = (v[j] * alpha + v1_const) >> 16;
+ }
+ u += stride;
+ v += stride;
+ }
+}
+
+/* Blend only the edge of the macro block. Leave center
+ * unblended to allow for other visualizations to be layered.
+ */
+void vp9_blend_mb_outer_c(unsigned char *y, unsigned char *u, unsigned char *v,
+ int y1, int u1, int v1, int alpha, int stride) {
+ int i, j;
+ int y1_const = y1 * ((1 << 16) - alpha);
+ int u1_const = u1 * ((1 << 16) - alpha);
+ int v1_const = v1 * ((1 << 16) - alpha);
+
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < 16; j++) {
+ y[j] = (y[j] * alpha + y1_const) >> 16;
+ }
+ y += stride;
+ }
+
+ for (i = 0; i < 12; i++) {
+ y[0] = (y[0] * alpha + y1_const) >> 16;
+ y[1] = (y[1] * alpha + y1_const) >> 16;
+ y[14] = (y[14] * alpha + y1_const) >> 16;
+ y[15] = (y[15] * alpha + y1_const) >> 16;
+ y += stride;
+ }
+
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < 16; j++) {
+ y[j] = (y[j] * alpha + y1_const) >> 16;
+ }
+ y += stride;
+ }
+
+ stride >>= 1;
+
+ for (j = 0; j < 8; j++) {
+ u[j] = (u[j] * alpha + u1_const) >> 16;
+ v[j] = (v[j] * alpha + v1_const) >> 16;
+ }
+ u += stride;
+ v += stride;
+
+ for (i = 0; i < 6; i++) {
+ u[0] = (u[0] * alpha + u1_const) >> 16;
+ v[0] = (v[0] * alpha + v1_const) >> 16;
+
+ u[7] = (u[7] * alpha + u1_const) >> 16;
+ v[7] = (v[7] * alpha + v1_const) >> 16;
+
+ u += stride;
+ v += stride;
+ }
+
+ for (j = 0; j < 8; j++) {
+ u[j] = (u[j] * alpha + u1_const) >> 16;
+ v[j] = (v[j] * alpha + v1_const) >> 16;
+ }
+}
+
+void vp9_blend_b_c(unsigned char *y, unsigned char *u, unsigned char *v,
+ int y1, int u1, int v1, int alpha, int stride) {
+ int i, j;
+ int y1_const = y1 * ((1 << 16) - alpha);
+ int u1_const = u1 * ((1 << 16) - alpha);
+ int v1_const = v1 * ((1 << 16) - alpha);
+
+ for (i = 0; i < 4; i++) {
+ for (j = 0; j < 4; j++) {
+ y[j] = (y[j] * alpha + y1_const) >> 16;
+ }
+ y += stride;
+ }
+
+ stride >>= 1;
+
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < 2; j++) {
+ u[j] = (u[j] * alpha + u1_const) >> 16;
+ v[j] = (v[j] * alpha + v1_const) >> 16;
+ }
+ u += stride;
+ v += stride;
+ }
+}
+
+static void constrain_line(int x0, int *x1, int y0, int *y1,
+ int width, int height) {
+ int dx;
+ int dy;
+
+ if (*x1 > width) {
+ dx = *x1 - x0;
+ dy = *y1 - y0;
+
+ *x1 = width;
+ if (dx)
+ *y1 = ((width - x0) * dy) / dx + y0;
+ }
+ if (*x1 < 0) {
+ dx = *x1 - x0;
+ dy = *y1 - y0;
+
+ *x1 = 0;
+ if (dx)
+ *y1 = ((0 - x0) * dy) / dx + y0;
+ }
+ if (*y1 > height) {
+ dx = *x1 - x0;
+ dy = *y1 - y0;
+
+ *y1 = height;
+ if (dy)
+ *x1 = ((height - y0) * dx) / dy + x0;
+ }
+ if (*y1 < 0) {
+ dx = *x1 - x0;
+ dy = *y1 - y0;
+
+ *y1 = 0;
+ if (dy)
+ *x1 = ((0 - y0) * dx) / dy + x0;
+ }
+}
+
+int vp9_post_proc_frame(VP9_COMMON *oci, YV12_BUFFER_CONFIG *dest,
+ vp9_ppflags_t *ppflags) {
+ int q = oci->filter_level * 10 / 6;
+ int flags = ppflags->post_proc_flag;
+ int deblock_level = ppflags->deblocking_level;
+ int noise_level = ppflags->noise_level;
+
+ if (!oci->frame_to_show)
+ return -1;
+
+ if (q > 63)
+ q = 63;
+
+ if (!flags) {
+ *dest = *oci->frame_to_show;
+
+ /* handle problem with extending borders */
+ dest->y_width = oci->Width;
+ dest->y_height = oci->Height;
+ dest->uv_height = dest->y_height / 2;
+ return 0;
+
+ }
+
+#if ARCH_X86||ARCH_X86_64
+ vpx_reset_mmx_state();
+#endif
+
+ if (flags & VP9D_DEMACROBLOCK) {
+ deblock_and_de_macro_block(oci->frame_to_show, &oci->post_proc_buffer,
+ q + (deblock_level - 5) * 10, 1, 0);
+ } else if (flags & VP9D_DEBLOCK) {
+ vp9_deblock(oci->frame_to_show, &oci->post_proc_buffer, q, 1, 0);
+ } else {
+ vp8_yv12_copy_frame(oci->frame_to_show, &oci->post_proc_buffer);
+ }
+
+ if (flags & VP9D_ADDNOISE) {
+ if (oci->postproc_state.last_q != q
+ || oci->postproc_state.last_noise != noise_level) {
+ fillrd(&oci->postproc_state, 63 - q, noise_level);
+ }
+
+ vp9_plane_add_noise(oci->post_proc_buffer.y_buffer,
+ oci->postproc_state.noise,
+ oci->postproc_state.blackclamp,
+ oci->postproc_state.whiteclamp,
+ oci->postproc_state.bothclamp,
+ oci->post_proc_buffer.y_width,
+ oci->post_proc_buffer.y_height,
+ oci->post_proc_buffer.y_stride);
+ }
+
+#if 0 && CONFIG_POSTPROC_VISUALIZER
+ if (flags & VP9D_DEBUG_TXT_FRAME_INFO) {
+ char message[512];
+ sprintf(message, "F%1dG%1dQ%3dF%3dP%d_s%dx%d",
+ (oci->frame_type == KEY_FRAME),
+ oci->refresh_golden_frame,
+ oci->base_qindex,
+ oci->filter_level,
+ flags,
+ oci->mb_cols, oci->mb_rows);
+ vp9_blit_text(message, oci->post_proc_buffer.y_buffer,
+ oci->post_proc_buffer.y_stride);
+ }
+
+ if (flags & VP9D_DEBUG_TXT_MBLK_MODES) {
+ int i, j;
+ unsigned char *y_ptr;
+ YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
+ int mb_rows = post->y_height >> 4;
+ int mb_cols = post->y_width >> 4;
+ int mb_index = 0;
+ MODE_INFO *mi = oci->mi;
+
+ y_ptr = post->y_buffer + 4 * post->y_stride + 4;
+
+ /* vp9_filter each macro block */
+ for (i = 0; i < mb_rows; i++) {
+ for (j = 0; j < mb_cols; j++) {
+ char zz[4];
+
+ sprintf(zz, "%c", mi[mb_index].mbmi.mode + 'a');
+
+ vp9_blit_text(zz, y_ptr, post->y_stride);
+ mb_index++;
+ y_ptr += 16;
+ }
+
+ mb_index++; /* border */
+ y_ptr += post->y_stride * 16 - post->y_width;
+
+ }
+ }
+
+ if (flags & VP9D_DEBUG_TXT_DC_DIFF) {
+ int i, j;
+ unsigned char *y_ptr;
+ YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
+ int mb_rows = post->y_height >> 4;
+ int mb_cols = post->y_width >> 4;
+ int mb_index = 0;
+ MODE_INFO *mi = oci->mi;
+
+ y_ptr = post->y_buffer + 4 * post->y_stride + 4;
+
+ /* vp9_filter each macro block */
+ for (i = 0; i < mb_rows; i++) {
+ for (j = 0; j < mb_cols; j++) {
+ char zz[4];
+ int dc_diff = !(mi[mb_index].mbmi.mode != B_PRED &&
+ mi[mb_index].mbmi.mode != SPLITMV &&
+ mi[mb_index].mbmi.mb_skip_coeff);
+
+ if (oci->frame_type == KEY_FRAME)
+ sprintf(zz, "a");
+ else
+ sprintf(zz, "%c", dc_diff + '0');
+
+ vp9_blit_text(zz, y_ptr, post->y_stride);
+ mb_index++;
+ y_ptr += 16;
+ }
+
+ mb_index++; /* border */
+ y_ptr += post->y_stride * 16 - post->y_width;
+
+ }
+ }
+
+ if (flags & VP9D_DEBUG_TXT_RATE_INFO) {
+ char message[512];
+ snprintf(message, sizeof(message),
+ "Bitrate: %10.2f frame_rate: %10.2f ",
+ oci->bitrate, oci->framerate);
+ vp9_blit_text(message, oci->post_proc_buffer.y_buffer,
+ oci->post_proc_buffer.y_stride);
+ }
+
+ /* Draw motion vectors */
+ if ((flags & VP9D_DEBUG_DRAW_MV) && ppflags->display_mv_flag) {
+ YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
+ int width = post->y_width;
+ int height = post->y_height;
+ unsigned char *y_buffer = oci->post_proc_buffer.y_buffer;
+ int y_stride = oci->post_proc_buffer.y_stride;
+ MODE_INFO *mi = oci->mi;
+ int x0, y0;
+
+ for (y0 = 0; y0 < height; y0 += 16) {
+ for (x0 = 0; x0 < width; x0 += 16) {
+ int x1, y1;
+
+ if (!(ppflags->display_mv_flag & (1 << mi->mbmi.mode))) {
+ mi++;
+ continue;
+ }
+
+ if (mi->mbmi.mode == SPLITMV) {
+ switch (mi->mbmi.partitioning) {
+ case PARTITIONING_16X8 : { /* mv_top_bottom */
+ union b_mode_info *bmi = &mi->bmi[0];
+ MV *mv = &bmi->mv.as_mv;
+
+ x1 = x0 + 8 + (mv->col >> 3);
+ y1 = y0 + 4 + (mv->row >> 3);
+
+ constrain_line(x0 + 8, &x1, y0 + 4, &y1, width, height);
+ vp9_blit_line(x0 + 8, x1, y0 + 4, y1, y_buffer, y_stride);
+
+ bmi = &mi->bmi[8];
+
+ x1 = x0 + 8 + (mv->col >> 3);
+ y1 = y0 + 12 + (mv->row >> 3);
+
+ constrain_line(x0 + 8, &x1, y0 + 12, &y1, width, height);
+ vp9_blit_line(x0 + 8, x1, y0 + 12, y1, y_buffer, y_stride);
+
+ break;
+ }
+ case PARTITIONING_8X16 : { /* mv_left_right */
+ union b_mode_info *bmi = &mi->bmi[0];
+ MV *mv = &bmi->mv.as_mv;
+
+ x1 = x0 + 4 + (mv->col >> 3);
+ y1 = y0 + 8 + (mv->row >> 3);
+
+ constrain_line(x0 + 4, &x1, y0 + 8, &y1, width, height);
+ vp9_blit_line(x0 + 4, x1, y0 + 8, y1, y_buffer, y_stride);
+
+ bmi = &mi->bmi[2];
+
+ x1 = x0 + 12 + (mv->col >> 3);
+ y1 = y0 + 8 + (mv->row >> 3);
+
+ constrain_line(x0 + 12, &x1, y0 + 8, &y1, width, height);
+ vp9_blit_line(x0 + 12, x1, y0 + 8, y1, y_buffer, y_stride);
+
+ break;
+ }
+ case PARTITIONING_8X8 : { /* mv_quarters */
+ union b_mode_info *bmi = &mi->bmi[0];
+ MV *mv = &bmi->mv.as_mv;
+
+ x1 = x0 + 4 + (mv->col >> 3);
+ y1 = y0 + 4 + (mv->row >> 3);
+
+ constrain_line(x0 + 4, &x1, y0 + 4, &y1, width, height);
+ vp9_blit_line(x0 + 4, x1, y0 + 4, y1, y_buffer, y_stride);
+
+ bmi = &mi->bmi[2];
+
+ x1 = x0 + 12 + (mv->col >> 3);
+ y1 = y0 + 4 + (mv->row >> 3);
+
+ constrain_line(x0 + 12, &x1, y0 + 4, &y1, width, height);
+ vp9_blit_line(x0 + 12, x1, y0 + 4, y1, y_buffer, y_stride);
+
+ bmi = &mi->bmi[8];
+
+ x1 = x0 + 4 + (mv->col >> 3);
+ y1 = y0 + 12 + (mv->row >> 3);
+
+ constrain_line(x0 + 4, &x1, y0 + 12, &y1, width, height);
+ vp9_blit_line(x0 + 4, x1, y0 + 12, y1, y_buffer, y_stride);
+
+ bmi = &mi->bmi[10];
+
+ x1 = x0 + 12 + (mv->col >> 3);
+ y1 = y0 + 12 + (mv->row >> 3);
+
+ constrain_line(x0 + 12, &x1, y0 + 12, &y1, width, height);
+ vp9_blit_line(x0 + 12, x1, y0 + 12, y1, y_buffer, y_stride);
+ break;
+ }
+ case PARTITIONING_4X4:
+ default : {
+ union b_mode_info *bmi = mi->bmi;
+ int bx0, by0;
+
+ for (by0 = y0; by0 < (y0 + 16); by0 += 4) {
+ for (bx0 = x0; bx0 < (x0 + 16); bx0 += 4) {
+ MV *mv = &bmi->mv.as_mv;
+
+ x1 = bx0 + 2 + (mv->col >> 3);
+ y1 = by0 + 2 + (mv->row >> 3);
+
+ constrain_line(bx0 + 2, &x1, by0 + 2, &y1, width, height);
+ vp9_blit_line(bx0 + 2, x1, by0 + 2, y1, y_buffer, y_stride);
+
+ bmi++;
+ }
+ }
+ }
+ }
+ } else if (mi->mbmi.mode >= NEARESTMV) {
+ MV *mv = &mi->mbmi.mv.as_mv;
+ const int lx0 = x0 + 8;
+ const int ly0 = y0 + 8;
+
+ x1 = lx0 + (mv->col >> 3);
+ y1 = ly0 + (mv->row >> 3);
+
+ if (x1 != lx0 && y1 != ly0) {
+ constrain_line(lx0, &x1, ly0 - 1, &y1, width, height);
+ vp9_blit_line(lx0, x1, ly0 - 1, y1, y_buffer, y_stride);
+
+ constrain_line(lx0, &x1, ly0 + 1, &y1, width, height);
+ vp9_blit_line(lx0, x1, ly0 + 1, y1, y_buffer, y_stride);
+ } else
+ vp9_blit_line(lx0, x1, ly0, y1, y_buffer, y_stride);
+ }
+
+ mi++;
+ }
+ mi++;
+ }
+ }
+
+ /* Color in block modes */
+ if ((flags & VP9D_DEBUG_CLR_BLK_MODES)
+ && (ppflags->display_mb_modes_flag || ppflags->display_b_modes_flag)) {
+ int y, x;
+ YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
+ int width = post->y_width;
+ int height = post->y_height;
+ unsigned char *y_ptr = oci->post_proc_buffer.y_buffer;
+ unsigned char *u_ptr = oci->post_proc_buffer.u_buffer;
+ unsigned char *v_ptr = oci->post_proc_buffer.v_buffer;
+ int y_stride = oci->post_proc_buffer.y_stride;
+ MODE_INFO *mi = oci->mi;
+
+ for (y = 0; y < height; y += 16) {
+ for (x = 0; x < width; x += 16) {
+ int Y = 0, U = 0, V = 0;
+
+ if (mi->mbmi.mode == B_PRED &&
+ ((ppflags->display_mb_modes_flag & B_PRED) ||
+ ppflags->display_b_modes_flag)) {
+ int by, bx;
+ unsigned char *yl, *ul, *vl;
+ union b_mode_info *bmi = mi->bmi;
+
+ yl = y_ptr + x;
+ ul = u_ptr + (x >> 1);
+ vl = v_ptr + (x >> 1);
+
+ for (by = 0; by < 16; by += 4) {
+ for (bx = 0; bx < 16; bx += 4) {
+ if ((ppflags->display_b_modes_flag & (1 << mi->mbmi.mode))
+ || (ppflags->display_mb_modes_flag & B_PRED)) {
+ Y = B_PREDICTION_MODE_colors[bmi->as_mode.first][0];
+ U = B_PREDICTION_MODE_colors[bmi->as_mode.first][1];
+ V = B_PREDICTION_MODE_colors[bmi->as_mode.first][2];
+
+ vp9_blend_b(yl + bx, ul + (bx >> 1), vl + (bx >> 1), Y, U, V,
+ 0xc000, y_stride);
+ }
+ bmi++;
+ }
+
+ yl += y_stride * 4;
+ ul += y_stride * 1;
+ vl += y_stride * 1;
+ }
+ } else if (ppflags->display_mb_modes_flag & (1 << mi->mbmi.mode)) {
+ Y = MB_PREDICTION_MODE_colors[mi->mbmi.mode][0];
+ U = MB_PREDICTION_MODE_colors[mi->mbmi.mode][1];
+ V = MB_PREDICTION_MODE_colors[mi->mbmi.mode][2];
+
+ vp9_blend_mb_inner(y_ptr + x, u_ptr + (x >> 1), v_ptr + (x >> 1),
+ Y, U, V, 0xc000, y_stride);
+ }
+
+ mi++;
+ }
+ y_ptr += y_stride * 16;
+ u_ptr += y_stride * 4;
+ v_ptr += y_stride * 4;
+
+ mi++;
+ }
+ }
+
+ /* Color in frame reference blocks */
+ if ((flags & VP9D_DEBUG_CLR_FRM_REF_BLKS) &&
+ ppflags->display_ref_frame_flag) {
+ int y, x;
+ YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
+ int width = post->y_width;
+ int height = post->y_height;
+ unsigned char *y_ptr = oci->post_proc_buffer.y_buffer;
+ unsigned char *u_ptr = oci->post_proc_buffer.u_buffer;
+ unsigned char *v_ptr = oci->post_proc_buffer.v_buffer;
+ int y_stride = oci->post_proc_buffer.y_stride;
+ MODE_INFO *mi = oci->mi;
+
+ for (y = 0; y < height; y += 16) {
+ for (x = 0; x < width; x += 16) {
+ int Y = 0, U = 0, V = 0;
+
+ if (ppflags->display_ref_frame_flag & (1 << mi->mbmi.ref_frame)) {
+ Y = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][0];
+ U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1];
+ V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2];
+
+ vp9_blend_mb_outer(y_ptr + x, u_ptr + (x >> 1), v_ptr + (x >> 1),
+ Y, U, V, 0xc000, y_stride);
+ }
+
+ mi++;
+ }
+ y_ptr += y_stride * 16;
+ u_ptr += y_stride * 4;
+ v_ptr += y_stride * 4;
+
+ mi++;
+ }
+ }
+#endif
+
+ *dest = oci->post_proc_buffer;
+
+ /* handle problem with extending borders */
+ dest->y_width = oci->Width;
+ dest->y_height = oci->Height;
+ dest->uv_height = dest->y_height / 2;
+
+ return 0;
+}
diff --git a/vp9/common/vp9_postproc.h b/vp9/common/vp9_postproc.h
new file mode 100644
index 0000000..0628d84
--- /dev/null
+++ b/vp9/common/vp9_postproc.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_COMMON_VP9_POSTPROC_H_
+#define VP9_COMMON_VP9_POSTPROC_H_
+
+#include "vpx_ports/mem.h"
+struct postproc_state {
+ int last_q;
+ int last_noise;
+ char noise[3072];
+ DECLARE_ALIGNED(16, char, blackclamp[16]);
+ DECLARE_ALIGNED(16, char, whiteclamp[16]);
+ DECLARE_ALIGNED(16, char, bothclamp[16]);
+};
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/common/vp9_ppflags.h"
+int vp9_post_proc_frame(struct VP9Common *oci, YV12_BUFFER_CONFIG *dest,
+ vp9_ppflags_t *flags);
+
+
+void vp9_de_noise(YV12_BUFFER_CONFIG *source,
+ YV12_BUFFER_CONFIG *post,
+ int q,
+ int low_var_thresh,
+ int flag);
+
+void vp9_deblock(YV12_BUFFER_CONFIG *source,
+ YV12_BUFFER_CONFIG *post,
+ int q,
+ int low_var_thresh,
+ int flag);
+#endif
diff --git a/vp9/common/vp9_ppflags.h b/vp9/common/vp9_ppflags.h
new file mode 100644
index 0000000..293d8d5
--- /dev/null
+++ b/vp9/common/vp9_ppflags.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_COMMON_VP9_PPFLAGS_H_
+#define VP9_COMMON_VP9_PPFLAGS_H_
+enum {
+ VP9D_NOFILTERING = 0,
+ VP9D_DEBLOCK = 1 << 0,
+ VP9D_DEMACROBLOCK = 1 << 1,
+ VP9D_ADDNOISE = 1 << 2,
+ VP9D_DEBUG_TXT_FRAME_INFO = 1 << 3,
+ VP9D_DEBUG_TXT_MBLK_MODES = 1 << 4,
+ VP9D_DEBUG_TXT_DC_DIFF = 1 << 5,
+ VP9D_DEBUG_TXT_RATE_INFO = 1 << 6,
+ VP9D_DEBUG_DRAW_MV = 1 << 7,
+ VP9D_DEBUG_CLR_BLK_MODES = 1 << 8,
+ VP9D_DEBUG_CLR_FRM_REF_BLKS = 1 << 9
+};
+
+typedef struct {
+ int post_proc_flag;
+ int deblocking_level;
+ int noise_level;
+ int display_ref_frame_flag;
+ int display_mb_modes_flag;
+ int display_b_modes_flag;
+ int display_mv_flag;
+} vp9_ppflags_t;
+
+#endif
diff --git a/vp9/common/vp9_pragmas.h b/vp9/common/vp9_pragmas.h
new file mode 100644
index 0000000..99fee5a
--- /dev/null
+++ b/vp9/common/vp9_pragmas.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:997 1011 170)
+#endif
+#ifdef _MSC_VER
+#pragma warning(disable:4799)
+#endif
diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c
new file mode 100644
index 0000000..2abae34
--- /dev/null
+++ b/vp9/common/vp9_pred_common.c
@@ -0,0 +1,465 @@
+
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/vp9_pred_common.h"
+#include "vp9/common/vp9_seg_common.h"
+
+// TBD prediction functions for various bitstream signals
+
+// Returns a context number for the given MB prediction signal
+unsigned char vp9_get_pred_context(const VP9_COMMON *const cm,
+ const MACROBLOCKD *const xd,
+ PRED_ID pred_id) {
+ int pred_context;
+ MODE_INFO *m = xd->mode_info_context;
+
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries correpsonding to real macroblocks.
+ // The prediction flags in these dummy entries are initialised to 0.
+ switch (pred_id) {
+ case PRED_SEG_ID:
+ pred_context = (m - 1)->mbmi.seg_id_predicted +
+ (m - cm->mode_info_stride)->mbmi.seg_id_predicted;
+ break;
+
+
+ case PRED_REF:
+ pred_context = (m - 1)->mbmi.ref_predicted +
+ (m - cm->mode_info_stride)->mbmi.ref_predicted;
+ break;
+
+ case PRED_COMP:
+ // Context based on use of comp pred flag by neighbours
+ // pred_context =
+ // ((m - 1)->mbmi.second_ref_frame > INTRA_FRAME) +
+ // ((m - cm->mode_info_stride)->mbmi.second_ref_frame > INTRA_FRAME);
+
+ // Context based on mode and reference frame
+ // if ( m->mbmi.ref_frame == LAST_FRAME )
+ // pred_context = 0 + (m->mbmi.mode != ZEROMV);
+ // else if ( m->mbmi.ref_frame == GOLDEN_FRAME )
+ // pred_context = 2 + (m->mbmi.mode != ZEROMV);
+ // else
+ // pred_context = 4 + (m->mbmi.mode != ZEROMV);
+
+ if (m->mbmi.ref_frame == LAST_FRAME)
+ pred_context = 0;
+ else
+ pred_context = 1;
+
+ break;
+
+ case PRED_MBSKIP:
+ pred_context = (m - 1)->mbmi.mb_skip_coeff +
+ (m - cm->mode_info_stride)->mbmi.mb_skip_coeff;
+ break;
+
+ case PRED_SWITCHABLE_INTERP:
+ {
+ int left_in_image = (m - 1)->mbmi.mb_in_image;
+ int above_in_image = (m - cm->mode_info_stride)->mbmi.mb_in_image;
+ int left_mode = (m - 1)->mbmi.mode;
+ int above_mode = (m - cm->mode_info_stride)->mbmi.mode;
+ int left_interp, above_interp;
+ if (left_in_image && left_mode >= NEARESTMV && left_mode <= SPLITMV)
+ left_interp = vp9_switchable_interp_map[(m - 1)->mbmi.interp_filter];
+ else
+ left_interp = VP9_SWITCHABLE_FILTERS;
+ assert(left_interp != -1);
+ if (above_in_image && above_mode >= NEARESTMV && above_mode <= SPLITMV)
+ above_interp = vp9_switchable_interp_map[
+ (m - cm->mode_info_stride)->mbmi.interp_filter];
+ else
+ above_interp = VP9_SWITCHABLE_FILTERS;
+ assert(above_interp != -1);
+
+ if (left_interp == above_interp)
+ pred_context = left_interp;
+ else if (left_interp == VP9_SWITCHABLE_FILTERS &&
+ above_interp != VP9_SWITCHABLE_FILTERS)
+ pred_context = above_interp;
+ else if (left_interp != VP9_SWITCHABLE_FILTERS &&
+ above_interp == VP9_SWITCHABLE_FILTERS)
+ pred_context = left_interp;
+ else
+ pred_context = VP9_SWITCHABLE_FILTERS;
+ }
+ break;
+
+ default:
+ // TODO *** add error trap code.
+ pred_context = 0;
+ break;
+ }
+
+ return pred_context;
+}
+
+// This function returns a context probability for coding a given
+// prediction signal
+vp9_prob vp9_get_pred_prob(const VP9_COMMON *const cm,
+ const MACROBLOCKD *const xd,
+ PRED_ID pred_id) {
+ vp9_prob pred_probability;
+ int pred_context;
+
+ // Get the appropriate prediction context
+ pred_context = vp9_get_pred_context(cm, xd, pred_id);
+
+ switch (pred_id) {
+ case PRED_SEG_ID:
+ pred_probability = cm->segment_pred_probs[pred_context];
+ break;
+
+ case PRED_REF:
+ pred_probability = cm->ref_pred_probs[pred_context];
+ break;
+
+ case PRED_COMP:
+ // In keeping with convention elsewhre the probability returned is
+ // the probability of a "0" outcome which in this case means the
+ // probability of comp pred off.
+ pred_probability = cm->prob_comppred[pred_context];
+ break;
+
+ case PRED_MBSKIP:
+ pred_probability = cm->mbskip_pred_probs[pred_context];
+ break;
+
+ default:
+ // TODO *** add error trap code.
+ pred_probability = 128;
+ break;
+ }
+
+ return pred_probability;
+}
+
+// This function returns a context probability ptr for coding a given
+// prediction signal
+const vp9_prob *vp9_get_pred_probs(const VP9_COMMON *const cm,
+ const MACROBLOCKD *const xd,
+ PRED_ID pred_id) {
+ const vp9_prob *pred_probability;
+ int pred_context;
+
+ // Get the appropriate prediction context
+ pred_context = vp9_get_pred_context(cm, xd, pred_id);
+
+ switch (pred_id) {
+ case PRED_SEG_ID:
+ pred_probability = &cm->segment_pred_probs[pred_context];
+ break;
+
+ case PRED_REF:
+ pred_probability = &cm->ref_pred_probs[pred_context];
+ break;
+
+ case PRED_COMP:
+ // In keeping with convention elsewhre the probability returned is
+ // the probability of a "0" outcome which in this case means the
+ // probability of comp pred off.
+ pred_probability = &cm->prob_comppred[pred_context];
+ break;
+
+ case PRED_MBSKIP:
+ pred_probability = &cm->mbskip_pred_probs[pred_context];
+ break;
+
+ case PRED_SWITCHABLE_INTERP:
+ pred_probability = &cm->fc.switchable_interp_prob[pred_context][0];
+ break;
+
+ default:
+ // TODO *** add error trap code.
+ pred_probability = NULL;
+ break;
+ }
+
+ return pred_probability;
+}
+
+// This function returns the status of the given prediction signal.
+// I.e. is the predicted value for the given signal correct.
+unsigned char vp9_get_pred_flag(const MACROBLOCKD *const xd,
+ PRED_ID pred_id) {
+ unsigned char pred_flag = 0;
+
+ switch (pred_id) {
+ case PRED_SEG_ID:
+ pred_flag = xd->mode_info_context->mbmi.seg_id_predicted;
+ break;
+
+ case PRED_REF:
+ pred_flag = xd->mode_info_context->mbmi.ref_predicted;
+ break;
+
+ case PRED_MBSKIP:
+ pred_flag = xd->mode_info_context->mbmi.mb_skip_coeff;
+ break;
+
+ default:
+ // TODO *** add error trap code.
+ pred_flag = 0;
+ break;
+ }
+
+ return pred_flag;
+}
+
+// This function sets the status of the given prediction signal.
+// I.e. is the predicted value for the given signal correct.
+void vp9_set_pred_flag(MACROBLOCKD *const xd,
+ PRED_ID pred_id,
+ unsigned char pred_flag) {
+#if CONFIG_SUPERBLOCKS
+ const int mis = xd->mode_info_stride;
+#endif
+
+ switch (pred_id) {
+ case PRED_SEG_ID:
+ xd->mode_info_context->mbmi.seg_id_predicted = pred_flag;
+#if CONFIG_SUPERBLOCKS
+ if (xd->mode_info_context->mbmi.encoded_as_sb) {
+ if (xd->mb_to_right_edge >= 0)
+ xd->mode_info_context[1].mbmi.seg_id_predicted = pred_flag;
+ if (xd->mb_to_bottom_edge >= 0) {
+ xd->mode_info_context[mis].mbmi.seg_id_predicted = pred_flag;
+ if (xd->mb_to_right_edge >= 0)
+ xd->mode_info_context[mis + 1].mbmi.seg_id_predicted = pred_flag;
+ }
+ }
+#endif
+ break;
+
+ case PRED_REF:
+ xd->mode_info_context->mbmi.ref_predicted = pred_flag;
+#if CONFIG_SUPERBLOCKS
+ if (xd->mode_info_context->mbmi.encoded_as_sb) {
+ if (xd->mb_to_right_edge >= 0)
+ xd->mode_info_context[1].mbmi.ref_predicted = pred_flag;
+ if (xd->mb_to_bottom_edge >= 0) {
+ xd->mode_info_context[mis].mbmi.ref_predicted = pred_flag;
+ if (xd->mb_to_right_edge >= 0)
+ xd->mode_info_context[mis + 1].mbmi.ref_predicted = pred_flag;
+ }
+ }
+#endif
+ break;
+
+ case PRED_MBSKIP:
+ xd->mode_info_context->mbmi.mb_skip_coeff = pred_flag;
+#if CONFIG_SUPERBLOCKS
+ if (xd->mode_info_context->mbmi.encoded_as_sb) {
+ if (xd->mb_to_right_edge >= 0)
+ xd->mode_info_context[1].mbmi.mb_skip_coeff = pred_flag;
+ if (xd->mb_to_bottom_edge >= 0) {
+ xd->mode_info_context[mis].mbmi.mb_skip_coeff = pred_flag;
+ if (xd->mb_to_right_edge >= 0)
+ xd->mode_info_context[mis + 1].mbmi.mb_skip_coeff = pred_flag;
+ }
+ }
+#endif
+ break;
+
+ default:
+ // TODO *** add error trap code.
+ break;
+ }
+}
+
+
+// The following contain the guts of the prediction code used to
+// peredict various bitstream signals.
+
+// Macroblock segment id prediction function
+unsigned char vp9_get_pred_mb_segid(const VP9_COMMON *const cm,
+ const MACROBLOCKD *const xd, int MbIndex) {
+ // Currently the prediction for the macroblock segment ID is
+ // the value stored for this macroblock in the previous frame.
+#if CONFIG_SUPERBLOCKS
+ if (!xd->mode_info_context->mbmi.encoded_as_sb) {
+#endif
+ return cm->last_frame_seg_map[MbIndex];
+#if CONFIG_SUPERBLOCKS
+ } else {
+ int seg_id = cm->last_frame_seg_map[MbIndex];
+ int mb_col = MbIndex % cm->mb_cols;
+ int mb_row = MbIndex / cm->mb_cols;
+ if (mb_col + 1 < cm->mb_cols)
+ seg_id = seg_id && cm->last_frame_seg_map[MbIndex + 1];
+ if (mb_row + 1 < cm->mb_rows) {
+ seg_id = seg_id && cm->last_frame_seg_map[MbIndex + cm->mb_cols];
+ if (mb_col + 1 < cm->mb_cols)
+ seg_id = seg_id && cm->last_frame_seg_map[MbIndex + cm->mb_cols + 1];
+ }
+ return seg_id;
+ }
+#endif
+}
+
+MV_REFERENCE_FRAME vp9_get_pred_ref(const VP9_COMMON *const cm,
+ const MACROBLOCKD *const xd) {
+ MODE_INFO *m = xd->mode_info_context;
+
+ MV_REFERENCE_FRAME left;
+ MV_REFERENCE_FRAME above;
+ MV_REFERENCE_FRAME above_left;
+ MV_REFERENCE_FRAME pred_ref = LAST_FRAME;
+
+ int segment_id = xd->mode_info_context->mbmi.segment_id;
+ int seg_ref_active;
+ int i;
+
+ unsigned char frame_allowed[MAX_REF_FRAMES] = {1, 1, 1, 1};
+ unsigned char ref_score[MAX_REF_FRAMES];
+ unsigned char best_score = 0;
+ unsigned char left_in_image;
+ unsigned char above_in_image;
+ unsigned char above_left_in_image;
+
+ // Is segment coding ennabled
+ seg_ref_active = vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME);
+
+ // Special case treatment if segment coding is enabled.
+ // Dont allow prediction of a reference frame that the segment
+ // does not allow
+ if (seg_ref_active) {
+ for (i = 0; i < MAX_REF_FRAMES; i++) {
+ frame_allowed[i] =
+ vp9_check_segref(xd, segment_id, i);
+
+ // Score set to 0 if ref frame not allowed
+ ref_score[i] = cm->ref_scores[i] * frame_allowed[i];
+ }
+ } else
+ vpx_memcpy(ref_score, cm->ref_scores, sizeof(ref_score));
+
+ // Reference frames used by neighbours
+ left = (m - 1)->mbmi.ref_frame;
+ above = (m - cm->mode_info_stride)->mbmi.ref_frame;
+ above_left = (m - 1 - cm->mode_info_stride)->mbmi.ref_frame;
+
+ // Are neighbours in image
+ left_in_image = (m - 1)->mbmi.mb_in_image;
+ above_in_image = (m - cm->mode_info_stride)->mbmi.mb_in_image;
+ above_left_in_image = (m - 1 - cm->mode_info_stride)->mbmi.mb_in_image;
+
+ // Adjust scores for candidate reference frames based on neigbours
+ if (frame_allowed[left] && left_in_image) {
+ ref_score[left] += 16;
+ if (above_left_in_image && (left == above_left))
+ ref_score[left] += 4;
+ }
+ if (frame_allowed[above] && above_in_image) {
+ ref_score[above] += 16;
+ if (above_left_in_image && (above == above_left))
+ ref_score[above] += 4;
+ }
+
+ // Now choose the candidate with the highest score
+ for (i = 0; i < MAX_REF_FRAMES; i++) {
+ if (ref_score[i] > best_score) {
+ pred_ref = i;
+ best_score = ref_score[i];
+ }
+ }
+
+ return pred_ref;
+}
+
+// Functions to computes a set of modified reference frame probabilities
+// to use when the prediction of the reference frame value fails
+void vp9_calc_ref_probs(int *count, vp9_prob *probs) {
+ int tot_count;
+
+ tot_count = count[0] + count[1] + count[2] + count[3];
+ if (tot_count) {
+ probs[0] = (vp9_prob)((count[0] * 255 + (tot_count >> 1)) / tot_count);
+ probs[0] += !probs[0];
+ } else
+ probs[0] = 128;
+
+ tot_count -= count[0];
+ if (tot_count) {
+ probs[1] = (vp9_prob)((count[1] * 255 + (tot_count >> 1)) / tot_count);
+ probs[1] += !probs[1];
+ } else
+ probs[1] = 128;
+
+ tot_count -= count[1];
+ if (tot_count) {
+ probs[2] = (vp9_prob)((count[2] * 255 + (tot_count >> 1)) / tot_count);
+ probs[2] += !probs[2];
+ } else
+ probs[2] = 128;
+
+}
+
+// Computes a set of modified conditional probabilities for the reference frame
+// Values willbe set to 0 for reference frame options that are not possible
+// because wither they were predicted and prediction has failed or because
+// they are not allowed for a given segment.
+void vp9_compute_mod_refprobs(VP9_COMMON *const cm) {
+ int norm_cnt[MAX_REF_FRAMES];
+ int intra_count;
+ int inter_count;
+ int last_count;
+ int gfarf_count;
+ int gf_count;
+ int arf_count;
+
+ intra_count = cm->prob_intra_coded;
+ inter_count = (255 - intra_count);
+ last_count = (inter_count * cm->prob_last_coded) / 255;
+ gfarf_count = inter_count - last_count;
+ gf_count = (gfarf_count * cm->prob_gf_coded) / 255;
+ arf_count = gfarf_count - gf_count;
+
+ // Work out modified reference frame probabilities to use where prediction
+ // of the reference frame fails
+ norm_cnt[0] = 0;
+ norm_cnt[1] = last_count;
+ norm_cnt[2] = gf_count;
+ norm_cnt[3] = arf_count;
+ vp9_calc_ref_probs(norm_cnt, cm->mod_refprobs[INTRA_FRAME]);
+ cm->mod_refprobs[INTRA_FRAME][0] = 0; // This branch implicit
+
+ norm_cnt[0] = intra_count;
+ norm_cnt[1] = 0;
+ norm_cnt[2] = gf_count;
+ norm_cnt[3] = arf_count;
+ vp9_calc_ref_probs(norm_cnt, cm->mod_refprobs[LAST_FRAME]);
+ cm->mod_refprobs[LAST_FRAME][1] = 0; // This branch implicit
+
+ norm_cnt[0] = intra_count;
+ norm_cnt[1] = last_count;
+ norm_cnt[2] = 0;
+ norm_cnt[3] = arf_count;
+ vp9_calc_ref_probs(norm_cnt, cm->mod_refprobs[GOLDEN_FRAME]);
+ cm->mod_refprobs[GOLDEN_FRAME][2] = 0; // This branch implicit
+
+ norm_cnt[0] = intra_count;
+ norm_cnt[1] = last_count;
+ norm_cnt[2] = gf_count;
+ norm_cnt[3] = 0;
+ vp9_calc_ref_probs(norm_cnt, cm->mod_refprobs[ALTREF_FRAME]);
+ cm->mod_refprobs[ALTREF_FRAME][2] = 0; // This branch implicit
+
+ // Score the reference frames based on overal frequency.
+ // These scores contribute to the prediction choices.
+ // Max score 17 min 1
+ cm->ref_scores[INTRA_FRAME] = 1 + (intra_count * 16 / 255);
+ cm->ref_scores[LAST_FRAME] = 1 + (last_count * 16 / 255);
+ cm->ref_scores[GOLDEN_FRAME] = 1 + (gf_count * 16 / 255);
+ cm->ref_scores[ALTREF_FRAME] = 1 + (arf_count * 16 / 255);
+}
diff --git a/vp9/common/vp9_pred_common.h b/vp9/common/vp9_pred_common.h
new file mode 100644
index 0000000..af6ad7b
--- /dev/null
+++ b/vp9/common/vp9_pred_common.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/vp9_type_aliases.h"
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/common/vp9_blockd.h"
+
+#ifndef VP9_COMMON_VP9_PRED_COMMON_H_
+#define VP9_COMMON_VP9_PRED_COMMON_H_
+
+
+// Predicted items
+typedef enum {
+ PRED_SEG_ID = 0, // Segment identifier
+ PRED_REF = 1,
+ PRED_COMP = 2,
+ PRED_MBSKIP = 3,
+ PRED_SWITCHABLE_INTERP = 4
+} PRED_ID;
+
+extern unsigned char vp9_get_pred_context(const VP9_COMMON *const cm,
+ const MACROBLOCKD *const xd,
+ PRED_ID pred_id);
+
+extern vp9_prob vp9_get_pred_prob(const VP9_COMMON *const cm,
+ const MACROBLOCKD *const xd,
+ PRED_ID pred_id);
+
+extern const vp9_prob *vp9_get_pred_probs(const VP9_COMMON *const cm,
+ const MACROBLOCKD *const xd,
+ PRED_ID pred_id);
+
+extern unsigned char vp9_get_pred_flag(const MACROBLOCKD *const xd,
+ PRED_ID pred_id);
+
+extern void vp9_set_pred_flag(MACROBLOCKD *const xd,
+ PRED_ID pred_id,
+ unsigned char pred_flag);
+
+
+extern unsigned char vp9_get_pred_mb_segid(const VP9_COMMON *const cm,
+ const MACROBLOCKD *const xd,
+ int MbIndex);
+
+extern MV_REFERENCE_FRAME vp9_get_pred_ref(const VP9_COMMON *const cm,
+ const MACROBLOCKD *const xd);
+extern void vp9_compute_mod_refprobs(VP9_COMMON *const cm);
+
+#endif /* __INC_PRED_COMMON_H__ */
diff --git a/vp9/common/vp9_quant_common.c b/vp9/common/vp9_quant_common.c
new file mode 100644
index 0000000..1190381
--- /dev/null
+++ b/vp9/common/vp9_quant_common.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/common/vp9_quant_common.h"
+
+static int dc_qlookup[QINDEX_RANGE];
+static int ac_qlookup[QINDEX_RANGE];
+
+#define ACDC_MIN 4
+
+void vp9_init_quant_tables() {
+ int i;
+ int current_val = 4;
+ int last_val = 4;
+ int ac_val;
+
+ for (i = 0; i < QINDEX_RANGE; i++) {
+ ac_qlookup[i] = current_val;
+ current_val = (int)((double)current_val * 1.02);
+ if (current_val == last_val)
+ current_val++;
+ last_val = current_val;
+
+ ac_val = ac_qlookup[i];
+ dc_qlookup[i] = (int)((0.000000305 * ac_val * ac_val * ac_val) +
+ (-0.00065 * ac_val * ac_val) +
+ (0.9 * ac_val) + 0.5);
+ if (dc_qlookup[i] < ACDC_MIN)
+ dc_qlookup[i] = ACDC_MIN;
+ }
+}
+
+int vp9_dc_quant(int QIndex, int Delta) {
+ int retval;
+
+ QIndex = QIndex + Delta;
+
+ if (QIndex > MAXQ)
+ QIndex = MAXQ;
+ else if (QIndex < 0)
+ QIndex = 0;
+
+ retval = dc_qlookup[ QIndex ];
+ return retval;
+}
+
+int vp9_dc2quant(int QIndex, int Delta) {
+ int retval;
+
+ QIndex = QIndex + Delta;
+
+ if (QIndex > MAXQ)
+ QIndex = MAXQ;
+ else if (QIndex < 0)
+ QIndex = 0;
+
+ retval = dc_qlookup[ QIndex ];
+
+ return retval;
+
+}
+int vp9_dc_uv_quant(int QIndex, int Delta) {
+ int retval;
+
+ QIndex = QIndex + Delta;
+
+ if (QIndex > MAXQ)
+ QIndex = MAXQ;
+ else if (QIndex < 0)
+ QIndex = 0;
+
+ retval = dc_qlookup[ QIndex ];
+
+ return retval;
+}
+
+int vp9_ac_yquant(int QIndex) {
+ int retval;
+
+ if (QIndex > MAXQ)
+ QIndex = MAXQ;
+ else if (QIndex < 0)
+ QIndex = 0;
+
+ retval = ac_qlookup[ QIndex ];
+ return retval;
+}
+
+int vp9_ac2quant(int QIndex, int Delta) {
+ int retval;
+
+ QIndex = QIndex + Delta;
+
+ if (QIndex > MAXQ)
+ QIndex = MAXQ;
+ else if (QIndex < 0)
+ QIndex = 0;
+
+ retval = (ac_qlookup[ QIndex ] * 775) / 1000;
+ if (retval < 4)
+ retval = 4;
+
+ return retval;
+}
+int vp9_ac_uv_quant(int QIndex, int Delta) {
+ int retval;
+
+ QIndex = QIndex + Delta;
+
+ if (QIndex > MAXQ)
+ QIndex = MAXQ;
+ else if (QIndex < 0)
+ QIndex = 0;
+
+ retval = ac_qlookup[ QIndex ];
+ return retval;
+}
diff --git a/vp9/common/vp9_quant_common.h b/vp9/common/vp9_quant_common.h
new file mode 100644
index 0000000..6e0555e
--- /dev/null
+++ b/vp9/common/vp9_quant_common.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "string.h"
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_onyxc_int.h"
+
+extern void vp9_init_quant_tables();
+extern int vp9_ac_yquant(int QIndex);
+extern int vp9_dc_quant(int QIndex, int Delta);
+extern int vp9_dc2quant(int QIndex, int Delta);
+extern int vp9_ac2quant(int QIndex, int Delta);
+extern int vp9_dc_uv_quant(int QIndex, int Delta);
+extern int vp9_ac_uv_quant(int QIndex, int Delta);
diff --git a/vp9/common/vp9_recon.c b/vp9/common/vp9_recon.c
new file mode 100644
index 0000000..c60d0aa
--- /dev/null
+++ b/vp9/common/vp9_recon.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "./vpx_config.h"
+#include "vp9_rtcd.h"
+#include "vp9/common/vp9_blockd.h"
+
+void vp9_recon_b_c
+(
+ unsigned char *pred_ptr,
+ short *diff_ptr,
+ unsigned char *dst_ptr,
+ int stride
+) {
+ int r, c;
+
+ for (r = 0; r < 4; r++) {
+ for (c = 0; c < 4; c++) {
+ int a = diff_ptr[c] + pred_ptr[c];
+
+ if (a < 0)
+ a = 0;
+
+ if (a > 255)
+ a = 255;
+
+ dst_ptr[c] = (unsigned char) a;
+ }
+
+ dst_ptr += stride;
+ diff_ptr += 16;
+ pred_ptr += 16;
+ }
+}
+
+void vp9_recon_uv_b_c
+(
+ unsigned char *pred_ptr,
+ short *diff_ptr,
+ unsigned char *dst_ptr,
+ int stride
+) {
+ int r, c;
+
+ for (r = 0; r < 4; r++) {
+ for (c = 0; c < 4; c++) {
+ int a = diff_ptr[c] + pred_ptr[c];
+
+ if (a < 0)
+ a = 0;
+
+ if (a > 255)
+ a = 255;
+
+ dst_ptr[c] = (unsigned char) a;
+ }
+
+ dst_ptr += stride;
+ diff_ptr += 8;
+ pred_ptr += 8;
+ }
+}
+void vp9_recon4b_c
+(
+ unsigned char *pred_ptr,
+ short *diff_ptr,
+ unsigned char *dst_ptr,
+ int stride
+) {
+ int r, c;
+
+ for (r = 0; r < 4; r++) {
+ for (c = 0; c < 16; c++) {
+ int a = diff_ptr[c] + pred_ptr[c];
+
+ if (a < 0)
+ a = 0;
+
+ if (a > 255)
+ a = 255;
+
+ dst_ptr[c] = (unsigned char) a;
+ }
+
+ dst_ptr += stride;
+ diff_ptr += 16;
+ pred_ptr += 16;
+ }
+}
+
+void vp9_recon2b_c
+(
+ unsigned char *pred_ptr,
+ short *diff_ptr,
+ unsigned char *dst_ptr,
+ int stride
+) {
+ int r, c;
+
+ for (r = 0; r < 4; r++) {
+ for (c = 0; c < 8; c++) {
+ int a = diff_ptr[c] + pred_ptr[c];
+
+ if (a < 0)
+ a = 0;
+
+ if (a > 255)
+ a = 255;
+
+ dst_ptr[c] = (unsigned char) a;
+ }
+
+ dst_ptr += stride;
+ diff_ptr += 8;
+ pred_ptr += 8;
+ }
+}
+
+#if CONFIG_SUPERBLOCKS
+void vp9_recon_mby_s_c(MACROBLOCKD *xd, uint8_t *dst) {
+ int x, y;
+ BLOCKD *b = &xd->block[0];
+ int stride = b->dst_stride;
+ short *diff = b->diff;
+
+ for (y = 0; y < 16; y++) {
+ for (x = 0; x < 16; x++) {
+ int a = dst[x] + diff[x];
+ if (a < 0)
+ a = 0;
+ else if (a > 255)
+ a = 255;
+ dst[x] = a;
+ }
+ dst += stride;
+ diff += 16;
+ }
+}
+
+void vp9_recon_mbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) {
+ int x, y, i;
+ uint8_t *dst = udst;
+
+ for (i = 0; i < 2; i++, dst = vdst) {
+ BLOCKD *b = &xd->block[16 + 4 * i];
+ int stride = b->dst_stride;
+ short *diff = b->diff;
+
+ for (y = 0; y < 8; y++) {
+ for (x = 0; x < 8; x++) {
+ int a = dst[x] + diff[x];
+ if (a < 0)
+ a = 0;
+ else if (a > 255)
+ a = 255;
+ dst[x] = a;
+ }
+ dst += stride;
+ diff += 8;
+ }
+ }
+}
+#endif
+
+void vp9_recon_mby_c(MACROBLOCKD *xd) {
+ int i;
+
+ for (i = 0; i < 16; i += 4) {
+ BLOCKD *b = &xd->block[i];
+
+ vp9_recon4b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ }
+}
+
+void vp9_recon_mb_c(MACROBLOCKD *xd) {
+ int i;
+
+ for (i = 0; i < 16; i += 4) {
+ BLOCKD *b = &xd->block[i];
+
+ vp9_recon4b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ }
+
+ for (i = 16; i < 24; i += 2) {
+ BLOCKD *b = &xd->block[i];
+
+ vp9_recon2b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ }
+}
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
new file mode 100644
index 0000000..23d0ae9
--- /dev/null
+++ b/vp9/common/vp9_reconinter.c
@@ -0,0 +1,1144 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_reconinter.h"
+#include "vp9/common/vp9_reconintra.h"
+
+void vp9_setup_interp_filters(MACROBLOCKD *xd,
+ INTERPOLATIONFILTERTYPE mcomp_filter_type,
+ VP9_COMMON *cm) {
+ if (mcomp_filter_type == SIXTAP) {
+ xd->subpixel_predict = vp9_sixtap_predict;
+ xd->subpixel_predict8x4 = vp9_sixtap_predict8x4;
+ xd->subpixel_predict8x8 = vp9_sixtap_predict8x8;
+ xd->subpixel_predict16x16 = vp9_sixtap_predict16x16;
+ xd->subpixel_predict_avg = vp9_sixtap_predict_avg;
+ xd->subpixel_predict_avg8x8 = vp9_sixtap_predict_avg8x8;
+ xd->subpixel_predict_avg16x16 = vp9_sixtap_predict_avg16x16;
+ } else if (mcomp_filter_type == EIGHTTAP || mcomp_filter_type == SWITCHABLE) {
+ xd->subpixel_predict = vp9_eighttap_predict;
+ xd->subpixel_predict8x4 = vp9_eighttap_predict8x4;
+ xd->subpixel_predict8x8 = vp9_eighttap_predict8x8;
+ xd->subpixel_predict16x16 = vp9_eighttap_predict16x16;
+ xd->subpixel_predict_avg = vp9_eighttap_predict_avg4x4;
+ xd->subpixel_predict_avg8x8 = vp9_eighttap_predict_avg8x8;
+ xd->subpixel_predict_avg16x16 = vp9_eighttap_predict_avg16x16;
+ } else if (mcomp_filter_type == EIGHTTAP_SHARP) {
+ xd->subpixel_predict = vp9_eighttap_predict_sharp;
+ xd->subpixel_predict8x4 = vp9_eighttap_predict8x4_sharp;
+ xd->subpixel_predict8x8 = vp9_eighttap_predict8x8_sharp;
+ xd->subpixel_predict16x16 = vp9_eighttap_predict16x16_sharp;
+ xd->subpixel_predict_avg = vp9_eighttap_predict_avg4x4_sharp;
+ xd->subpixel_predict_avg8x8 = vp9_eighttap_predict_avg8x8_sharp;
+ xd->subpixel_predict_avg16x16 = vp9_eighttap_predict_avg16x16_sharp_c;
+ }
+ else {
+ xd->subpixel_predict = vp9_bilinear_predict4x4;
+ xd->subpixel_predict8x4 = vp9_bilinear_predict8x4;
+ xd->subpixel_predict8x8 = vp9_bilinear_predict8x8;
+ xd->subpixel_predict16x16 = vp9_bilinear_predict16x16;
+ xd->subpixel_predict_avg = vp9_bilinear_predict_avg4x4;
+ xd->subpixel_predict_avg8x8 = vp9_bilinear_predict_avg8x8;
+ xd->subpixel_predict_avg16x16 = vp9_bilinear_predict_avg16x16;
+ }
+}
+
+void vp9_copy_mem16x16_c(unsigned char *src,
+ int src_stride,
+ unsigned char *dst,
+ int dst_stride) {
+ int r;
+
+ for (r = 0; r < 16; r++) {
+#if !(CONFIG_FAST_UNALIGNED)
+ dst[0] = src[0];
+ dst[1] = src[1];
+ dst[2] = src[2];
+ dst[3] = src[3];
+ dst[4] = src[4];
+ dst[5] = src[5];
+ dst[6] = src[6];
+ dst[7] = src[7];
+ dst[8] = src[8];
+ dst[9] = src[9];
+ dst[10] = src[10];
+ dst[11] = src[11];
+ dst[12] = src[12];
+ dst[13] = src[13];
+ dst[14] = src[14];
+ dst[15] = src[15];
+
+#else
+ ((uint32_t *)dst)[0] = ((uint32_t *)src)[0];
+ ((uint32_t *)dst)[1] = ((uint32_t *)src)[1];
+ ((uint32_t *)dst)[2] = ((uint32_t *)src)[2];
+ ((uint32_t *)dst)[3] = ((uint32_t *)src)[3];
+
+#endif
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+void vp9_avg_mem16x16_c(unsigned char *src,
+ int src_stride,
+ unsigned char *dst,
+ int dst_stride) {
+ int r;
+
+ for (r = 0; r < 16; r++) {
+ int n;
+
+ for (n = 0; n < 16; n++) {
+ dst[n] = (dst[n] + src[n] + 1) >> 1;
+ }
+
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+void vp9_copy_mem8x8_c(unsigned char *src,
+ int src_stride,
+ unsigned char *dst,
+ int dst_stride) {
+ int r;
+
+ for (r = 0; r < 8; r++) {
+#if !(CONFIG_FAST_UNALIGNED)
+ dst[0] = src[0];
+ dst[1] = src[1];
+ dst[2] = src[2];
+ dst[3] = src[3];
+ dst[4] = src[4];
+ dst[5] = src[5];
+ dst[6] = src[6];
+ dst[7] = src[7];
+#else
+ ((uint32_t *)dst)[0] = ((uint32_t *)src)[0];
+ ((uint32_t *)dst)[1] = ((uint32_t *)src)[1];
+#endif
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+void vp9_avg_mem8x8_c(unsigned char *src,
+ int src_stride,
+ unsigned char *dst,
+ int dst_stride) {
+ int r;
+
+ for (r = 0; r < 8; r++) {
+ int n;
+
+ for (n = 0; n < 8; n++) {
+ dst[n] = (dst[n] + src[n] + 1) >> 1;
+ }
+
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+void vp9_copy_mem8x4_c(unsigned char *src,
+ int src_stride,
+ unsigned char *dst,
+ int dst_stride) {
+ int r;
+
+ for (r = 0; r < 4; r++) {
+#if !(CONFIG_FAST_UNALIGNED)
+ dst[0] = src[0];
+ dst[1] = src[1];
+ dst[2] = src[2];
+ dst[3] = src[3];
+ dst[4] = src[4];
+ dst[5] = src[5];
+ dst[6] = src[6];
+ dst[7] = src[7];
+#else
+ ((uint32_t *)dst)[0] = ((uint32_t *)src)[0];
+ ((uint32_t *)dst)[1] = ((uint32_t *)src)[1];
+#endif
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+void vp9_build_inter_predictors_b(BLOCKD *d, int pitch, vp9_subpix_fn_t sppf) {
+ int r;
+ unsigned char *ptr_base;
+ unsigned char *ptr;
+ unsigned char *pred_ptr = d->predictor;
+ int_mv mv;
+
+ ptr_base = *(d->base_pre);
+ mv.as_int = d->bmi.as_mv.first.as_int;
+
+ if (mv.as_mv.row & 7 || mv.as_mv.col & 7) {
+ ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride +
+ (mv.as_mv.col >> 3);
+ sppf(ptr, d->pre_stride, (mv.as_mv.col & 7) << 1, (mv.as_mv.row & 7) << 1,
+ pred_ptr, pitch);
+ } else {
+ ptr_base += d->pre + (mv.as_mv.row >> 3) * d->pre_stride +
+ (mv.as_mv.col >> 3);
+ ptr = ptr_base;
+
+ for (r = 0; r < 4; r++) {
+#if !(CONFIG_FAST_UNALIGNED)
+ pred_ptr[0] = ptr[0];
+ pred_ptr[1] = ptr[1];
+ pred_ptr[2] = ptr[2];
+ pred_ptr[3] = ptr[3];
+#else
+ *(uint32_t *)pred_ptr = *(uint32_t *)ptr;
+#endif
+ pred_ptr += pitch;
+ ptr += d->pre_stride;
+ }
+ }
+}
+
+/*
+ * Similar to vp9_build_inter_predictors_b(), but instead of storing the
+ * results in d->predictor, we average the contents of d->predictor (which
+ * come from an earlier call to vp9_build_inter_predictors_b()) with the
+ * predictor of the second reference frame / motion vector.
+ */
+void vp9_build_2nd_inter_predictors_b(BLOCKD *d, int pitch,
+ vp9_subpix_fn_t sppf) {
+ int r;
+ unsigned char *ptr_base;
+ unsigned char *ptr;
+ unsigned char *pred_ptr = d->predictor;
+ int_mv mv;
+
+ ptr_base = *(d->base_second_pre);
+ mv.as_int = d->bmi.as_mv.second.as_int;
+
+ if (mv.as_mv.row & 7 || mv.as_mv.col & 7) {
+ ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride +
+ (mv.as_mv.col >> 3);
+ sppf(ptr, d->pre_stride, (mv.as_mv.col & 7) << 1, (mv.as_mv.row & 7) << 1,
+ pred_ptr, pitch);
+ } else {
+ ptr_base += d->pre + (mv.as_mv.row >> 3) * d->pre_stride +
+ (mv.as_mv.col >> 3);
+ ptr = ptr_base;
+
+ for (r = 0; r < 4; r++) {
+ pred_ptr[0] = (pred_ptr[0] + ptr[0] + 1) >> 1;
+ pred_ptr[1] = (pred_ptr[1] + ptr[1] + 1) >> 1;
+ pred_ptr[2] = (pred_ptr[2] + ptr[2] + 1) >> 1;
+ pred_ptr[3] = (pred_ptr[3] + ptr[3] + 1) >> 1;
+ pred_ptr += pitch;
+ ptr += d->pre_stride;
+ }
+ }
+}
+
+void vp9_build_inter_predictors4b(MACROBLOCKD *xd, BLOCKD *d, int pitch) {
+ unsigned char *ptr_base;
+ unsigned char *ptr;
+ unsigned char *pred_ptr = d->predictor;
+ int_mv mv;
+
+ ptr_base = *(d->base_pre);
+ mv.as_int = d->bmi.as_mv.first.as_int;
+ ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride +
+ (mv.as_mv.col >> 3);
+
+ if (mv.as_mv.row & 7 || mv.as_mv.col & 7) {
+ xd->subpixel_predict8x8(ptr, d->pre_stride, (mv.as_mv.col & 7) << 1,
+ (mv.as_mv.row & 7) << 1, pred_ptr, pitch);
+ } else {
+ vp9_copy_mem8x8(ptr, d->pre_stride, pred_ptr, pitch);
+ }
+}
+
+/*
+ * Similar to build_inter_predictors_4b(), but instead of storing the
+ * results in d->predictor, we average the contents of d->predictor (which
+ * come from an earlier call to build_inter_predictors_4b()) with the
+ * predictor of the second reference frame / motion vector.
+ */
+void vp9_build_2nd_inter_predictors4b(MACROBLOCKD *xd,
+ BLOCKD *d, int pitch) {
+ unsigned char *ptr_base;
+ unsigned char *ptr;
+ unsigned char *pred_ptr = d->predictor;
+ int_mv mv;
+
+ ptr_base = *(d->base_second_pre);
+ mv.as_int = d->bmi.as_mv.second.as_int;
+ ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride +
+ (mv.as_mv.col >> 3);
+
+ if (mv.as_mv.row & 7 || mv.as_mv.col & 7) {
+ xd->subpixel_predict_avg8x8(ptr, d->pre_stride, (mv.as_mv.col & 7) << 1,
+ (mv.as_mv.row & 7) << 1, pred_ptr, pitch);
+ } else {
+ vp9_avg_mem8x8(ptr, d->pre_stride, pred_ptr, pitch);
+ }
+}
+
+static void build_inter_predictors2b(MACROBLOCKD *xd, BLOCKD *d, int pitch) {
+ unsigned char *ptr_base;
+ unsigned char *ptr;
+ unsigned char *pred_ptr = d->predictor;
+ int_mv mv;
+
+ ptr_base = *(d->base_pre);
+ mv.as_int = d->bmi.as_mv.first.as_int;
+ ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride +
+ (mv.as_mv.col >> 3);
+
+ if (mv.as_mv.row & 7 || mv.as_mv.col & 7) {
+ xd->subpixel_predict8x4(ptr, d->pre_stride, (mv.as_mv.col & 7) << 1,
+ (mv.as_mv.row & 7) << 1, pred_ptr, pitch);
+ } else {
+ vp9_copy_mem8x4(ptr, d->pre_stride, pred_ptr, pitch);
+ }
+}
+
+
+/*encoder only*/
+#if CONFIG_PRED_FILTER
+
+// Select the thresholded or non-thresholded filter
+#define USE_THRESH_FILTER 0
+
+#define PRED_FILT_LEN 5
+
+static const int filt_shift = 4;
+static const int pred_filter[PRED_FILT_LEN] = {1, 2, 10, 2, 1};
+// Alternative filter {1, 1, 4, 1, 1}
+
+#if !USE_THRESH_FILTER
+void filter_mb(unsigned char *src, int src_stride,
+ unsigned char *dst, int dst_stride,
+ int width, int height) {
+ int i, j, k;
+ unsigned int Temp[32 * 32];
+ unsigned int *pTmp = Temp;
+ unsigned char *pSrc = src - (1 + src_stride) * (PRED_FILT_LEN / 2);
+
+ // Horizontal
+ for (i = 0; i < height + PRED_FILT_LEN - 1; i++) {
+ for (j = 0; j < width; j++) {
+ int sum = 0;
+ for (k = 0; k < PRED_FILT_LEN; k++)
+ sum += pSrc[j + k] * pred_filter[k];
+ pTmp[j] = sum;
+ }
+
+ pSrc += src_stride;
+ pTmp += width;
+ }
+
+ // Vertical
+ pTmp = Temp;
+ for (i = 0; i < width; i++) {
+ unsigned char *pDst = dst + i;
+ for (j = 0; j < height; j++) {
+ int sum = 0;
+ for (k = 0; k < PRED_FILT_LEN; k++)
+ sum += pTmp[(j + k) * width] * pred_filter[k];
+ // Round
+ sum = (sum + ((1 << (filt_shift << 1)) >> 1)) >> (filt_shift << 1);
+ pDst[j * dst_stride] = (sum < 0 ? 0 : sum > 255 ? 255 : sum);
+ }
+ ++pTmp;
+ }
+}
+#else
+// Based on vp9_post_proc_down_and_across_c (vp9_postproc.c)
+void filter_mb(unsigned char *src, int src_stride,
+ unsigned char *dst, int dst_stride,
+ int width, int height) {
+ unsigned char *pSrc, *pDst;
+ int row;
+ int col;
+ int i;
+ int v;
+ unsigned char d[8];
+
+ /* TODO flimit should be linked to the quantizer value */
+ int flimit = 7;
+
+ for (row = 0; row < height; row++) {
+ /* post_proc_down for one row */
+ pSrc = src;
+ pDst = dst;
+
+ for (col = 0; col < width; col++) {
+ int kernel = (1 << (filt_shift - 1));
+ int v = pSrc[col];
+
+ for (i = -2; i <= 2; i++) {
+ if (abs(v - pSrc[col + i * src_stride]) > flimit)
+ goto down_skip_convolve;
+
+ kernel += pred_filter[2 + i] * pSrc[col + i * src_stride];
+ }
+
+ v = (kernel >> filt_shift);
+ down_skip_convolve:
+ pDst[col] = v;
+ }
+
+ /* now post_proc_across */
+ pSrc = dst;
+ pDst = dst;
+
+ for (i = 0; i < 8; i++)
+ d[i] = pSrc[i];
+
+ for (col = 0; col < width; col++) {
+ int kernel = (1 << (filt_shift - 1));
+ v = pSrc[col];
+
+ d[col & 7] = v;
+
+ for (i = -2; i <= 2; i++) {
+ if (abs(v - pSrc[col + i]) > flimit)
+ goto across_skip_convolve;
+
+ kernel += pred_filter[2 + i] * pSrc[col + i];
+ }
+
+ d[col & 7] = (kernel >> filt_shift);
+ across_skip_convolve:
+
+ if (col >= 2)
+ pDst[col - 2] = d[(col - 2) & 7];
+ }
+
+ /* handle the last two pixels */
+ pDst[col - 2] = d[(col - 2) & 7];
+ pDst[col - 1] = d[(col - 1) & 7];
+
+ /* next row */
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+#endif // !USE_THRESH_FILTER
+
+#endif // CONFIG_PRED_FILTER
+
+/*encoder only*/
+void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd) {
+ int i, j;
+ BLOCKD *blockd = xd->block;
+
+ /* build uv mvs */
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < 2; j++) {
+ int yoffset = i * 8 + j * 2;
+ int uoffset = 16 + i * 2 + j;
+ int voffset = 20 + i * 2 + j;
+ int temp;
+
+ temp = blockd[yoffset ].bmi.as_mv.first.as_mv.row
+ + blockd[yoffset + 1].bmi.as_mv.first.as_mv.row
+ + blockd[yoffset + 4].bmi.as_mv.first.as_mv.row
+ + blockd[yoffset + 5].bmi.as_mv.first.as_mv.row;
+
+ if (temp < 0) temp -= 4;
+ else temp += 4;
+
+ xd->block[uoffset].bmi.as_mv.first.as_mv.row = (temp / 8) &
+ xd->fullpixel_mask;
+
+ temp = blockd[yoffset ].bmi.as_mv.first.as_mv.col
+ + blockd[yoffset + 1].bmi.as_mv.first.as_mv.col
+ + blockd[yoffset + 4].bmi.as_mv.first.as_mv.col
+ + blockd[yoffset + 5].bmi.as_mv.first.as_mv.col;
+
+ if (temp < 0) temp -= 4;
+ else temp += 4;
+
+ blockd[uoffset].bmi.as_mv.first.as_mv.col = (temp / 8) &
+ xd->fullpixel_mask;
+
+ blockd[voffset].bmi.as_mv.first.as_mv.row =
+ blockd[uoffset].bmi.as_mv.first.as_mv.row;
+ blockd[voffset].bmi.as_mv.first.as_mv.col =
+ blockd[uoffset].bmi.as_mv.first.as_mv.col;
+
+ if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
+ temp = blockd[yoffset ].bmi.as_mv.second.as_mv.row
+ + blockd[yoffset + 1].bmi.as_mv.second.as_mv.row
+ + blockd[yoffset + 4].bmi.as_mv.second.as_mv.row
+ + blockd[yoffset + 5].bmi.as_mv.second.as_mv.row;
+
+ if (temp < 0) {
+ temp -= 4;
+ } else {
+ temp += 4;
+ }
+
+ blockd[uoffset].bmi.as_mv.second.as_mv.row = (temp / 8) &
+ xd->fullpixel_mask;
+
+ temp = blockd[yoffset ].bmi.as_mv.second.as_mv.col
+ + blockd[yoffset + 1].bmi.as_mv.second.as_mv.col
+ + blockd[yoffset + 4].bmi.as_mv.second.as_mv.col
+ + blockd[yoffset + 5].bmi.as_mv.second.as_mv.col;
+
+ if (temp < 0) {
+ temp -= 4;
+ } else {
+ temp += 4;
+ }
+
+ blockd[uoffset].bmi.as_mv.second.as_mv.col = (temp / 8) &
+ xd->fullpixel_mask;
+
+ blockd[voffset].bmi.as_mv.second.as_mv.row =
+ blockd[uoffset].bmi.as_mv.second.as_mv.row;
+ blockd[voffset].bmi.as_mv.second.as_mv.col =
+ blockd[uoffset].bmi.as_mv.second.as_mv.col;
+ }
+ }
+ }
+
+ for (i = 16; i < 24; i += 2) {
+ BLOCKD *d0 = &blockd[i];
+ BLOCKD *d1 = &blockd[i + 1];
+
+ if (d0->bmi.as_mv.first.as_int == d1->bmi.as_mv.first.as_int)
+ build_inter_predictors2b(xd, d0, 8);
+ else {
+ vp9_build_inter_predictors_b(d0, 8, xd->subpixel_predict);
+ vp9_build_inter_predictors_b(d1, 8, xd->subpixel_predict);
+ }
+
+ if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
+ vp9_build_2nd_inter_predictors_b(d0, 8, xd->subpixel_predict_avg);
+ vp9_build_2nd_inter_predictors_b(d1, 8, xd->subpixel_predict_avg);
+ }
+ }
+}
+
+static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd) {
+ /* If the MV points so far into the UMV border that no visible pixels
+ * are used for reconstruction, the subpel part of the MV can be
+ * discarded and the MV limited to 16 pixels with equivalent results.
+ *
+ * This limit kicks in at 19 pixels for the top and left edges, for
+ * the 16 pixels plus 3 taps right of the central pixel when subpel
+ * filtering. The bottom and right edges use 16 pixels plus 2 pixels
+ * left of the central pixel when filtering.
+ */
+ if (mv->col < (xd->mb_to_left_edge - ((16 + VP9_INTERP_EXTEND) << 3)))
+ mv->col = xd->mb_to_left_edge - (16 << 3);
+ else if (mv->col > xd->mb_to_right_edge + ((15 + VP9_INTERP_EXTEND) << 3))
+ mv->col = xd->mb_to_right_edge + (16 << 3);
+
+ if (mv->row < (xd->mb_to_top_edge - ((16 + VP9_INTERP_EXTEND) << 3)))
+ mv->row = xd->mb_to_top_edge - (16 << 3);
+ else if (mv->row > xd->mb_to_bottom_edge + ((15 + VP9_INTERP_EXTEND) << 3))
+ mv->row = xd->mb_to_bottom_edge + (16 << 3);
+}
+
+/* A version of the above function for chroma block MVs.*/
+static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd) {
+ const int extend = VP9_INTERP_EXTEND;
+
+ mv->col = (2 * mv->col < (xd->mb_to_left_edge - ((16 + extend) << 3))) ?
+ (xd->mb_to_left_edge - (16 << 3)) >> 1 : mv->col;
+ mv->col = (2 * mv->col > xd->mb_to_right_edge + ((15 + extend) << 3)) ?
+ (xd->mb_to_right_edge + (16 << 3)) >> 1 : mv->col;
+
+ mv->row = (2 * mv->row < (xd->mb_to_top_edge - ((16 + extend) << 3))) ?
+ (xd->mb_to_top_edge - (16 << 3)) >> 1 : mv->row;
+ mv->row = (2 * mv->row > xd->mb_to_bottom_edge + ((15 + extend) << 3)) ?
+ (xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row;
+}
+
+/*encoder only*/
+void vp9_build_1st_inter16x16_predictors_mby(MACROBLOCKD *xd,
+ unsigned char *dst_y,
+ int dst_ystride,
+ int clamp_mvs) {
+ unsigned char *ptr_base = xd->pre.y_buffer;
+ unsigned char *ptr;
+ int pre_stride = xd->block[0].pre_stride;
+ int_mv ymv;
+
+ ymv.as_int = xd->mode_info_context->mbmi.mv[0].as_int;
+
+ if (clamp_mvs)
+ clamp_mv_to_umv_border(&ymv.as_mv, xd);
+
+ ptr = ptr_base + (ymv.as_mv.row >> 3) * pre_stride + (ymv.as_mv.col >> 3);
+
+#if CONFIG_PRED_FILTER
+ if (xd->mode_info_context->mbmi.pred_filter_enabled) {
+ if ((ymv.as_mv.row | ymv.as_mv.col) & 7) {
+ // Sub-pel filter needs extended input
+ int len = 15 + (VP9_INTERP_EXTEND << 1);
+ unsigned char Temp[32 * 32]; // Data required by sub-pel filter
+ unsigned char *pTemp = Temp + (VP9_INTERP_EXTEND - 1) * (len + 1);
+
+ // Copy extended MB into Temp array, applying the spatial filter
+ filter_mb(ptr - (VP9_INTERP_EXTEND - 1) * (pre_stride + 1), pre_stride,
+ Temp, len, len, len);
+
+ // Sub-pel interpolation
+ xd->subpixel_predict16x16(pTemp, len,
+ (ymv.as_mv.col & 7) << 1,
+ (ymv.as_mv.row & 7) << 1,
+ dst_y, dst_ystride);
+ } else {
+ // Apply spatial filter to create the prediction directly
+ filter_mb(ptr, pre_stride, dst_y, dst_ystride, 16, 16);
+ }
+ } else
+#endif
+ if ((ymv.as_mv.row | ymv.as_mv.col) & 7) {
+ xd->subpixel_predict16x16(ptr, pre_stride,
+ (ymv.as_mv.col & 7) << 1,
+ (ymv.as_mv.row & 7) << 1,
+ dst_y, dst_ystride);
+ } else {
+ vp9_copy_mem16x16(ptr, pre_stride, dst_y, dst_ystride);
+ }
+}
+
+void vp9_build_1st_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
+ unsigned char *dst_u,
+ unsigned char *dst_v,
+ int dst_uvstride) {
+ int offset;
+ unsigned char *uptr, *vptr;
+ int pre_stride = xd->block[0].pre_stride;
+ int_mv _o16x16mv;
+ int_mv _16x16mv;
+
+ _16x16mv.as_int = xd->mode_info_context->mbmi.mv[0].as_int;
+
+ if (xd->mode_info_context->mbmi.need_to_clamp_mvs)
+ clamp_mv_to_umv_border(&_16x16mv.as_mv, xd);
+
+ _o16x16mv = _16x16mv;
+ /* calc uv motion vectors */
+ if (_16x16mv.as_mv.row < 0)
+ _16x16mv.as_mv.row -= 1;
+ else
+ _16x16mv.as_mv.row += 1;
+
+ if (_16x16mv.as_mv.col < 0)
+ _16x16mv.as_mv.col -= 1;
+ else
+ _16x16mv.as_mv.col += 1;
+
+ _16x16mv.as_mv.row /= 2;
+ _16x16mv.as_mv.col /= 2;
+
+ _16x16mv.as_mv.row &= xd->fullpixel_mask;
+ _16x16mv.as_mv.col &= xd->fullpixel_mask;
+
+ pre_stride >>= 1;
+ offset = (_16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3);
+ uptr = xd->pre.u_buffer + offset;
+ vptr = xd->pre.v_buffer + offset;
+
+#if CONFIG_PRED_FILTER
+ if (xd->mode_info_context->mbmi.pred_filter_enabled) {
+ int i;
+ unsigned char *pSrc = uptr;
+ unsigned char *pDst = dst_u;
+ int len = 7 + (VP9_INTERP_EXTEND << 1);
+ unsigned char Temp[32 * 32]; // Data required by the sub-pel filter
+ unsigned char *pTemp = Temp + (VP9_INTERP_EXTEND - 1) * (len + 1);
+
+ // U & V
+ for (i = 0; i < 2; i++) {
+ if (_o16x16mv.as_int & 0x000f000f) {
+ // Copy extended MB into Temp array, applying the spatial filter
+ filter_mb(pSrc - (VP9_INTERP_EXTEND - 1) * (pre_stride + 1), pre_stride,
+ Temp, len, len, len);
+
+ // Sub-pel filter
+ xd->subpixel_predict8x8(pTemp, len,
+ _o16x16mv.as_mv.col & 15,
+ _o16x16mv.as_mv.row & 15,
+ pDst, dst_uvstride);
+ } else {
+ filter_mb(pSrc, pre_stride, pDst, dst_uvstride, 8, 8);
+ }
+
+ // V
+ pSrc = vptr;
+ pDst = dst_v;
+ }
+ } else
+#endif
+ if (_o16x16mv.as_int & 0x000f000f) {
+ xd->subpixel_predict8x8(uptr, pre_stride, _o16x16mv.as_mv.col & 15,
+ _o16x16mv.as_mv.row & 15, dst_u, dst_uvstride);
+ xd->subpixel_predict8x8(vptr, pre_stride, _o16x16mv.as_mv.col & 15,
+ _o16x16mv.as_mv.row & 15, dst_v, dst_uvstride);
+ } else {
+ vp9_copy_mem8x8(uptr, pre_stride, dst_u, dst_uvstride);
+ vp9_copy_mem8x8(vptr, pre_stride, dst_v, dst_uvstride);
+ }
+}
+
+
+void vp9_build_1st_inter16x16_predictors_mb(MACROBLOCKD *xd,
+ unsigned char *dst_y,
+ unsigned char *dst_u,
+ unsigned char *dst_v,
+ int dst_ystride, int dst_uvstride) {
+ vp9_build_1st_inter16x16_predictors_mby(xd, dst_y, dst_ystride,
+ xd->mode_info_context->mbmi.need_to_clamp_mvs);
+ vp9_build_1st_inter16x16_predictors_mbuv(xd, dst_u, dst_v, dst_uvstride);
+}
+
+#if CONFIG_SUPERBLOCKS
+void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x,
+ unsigned char *dst_y,
+ unsigned char *dst_u,
+ unsigned char *dst_v,
+ int dst_ystride,
+ int dst_uvstride) {
+ uint8_t *y1 = x->pre.y_buffer, *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer;
+ uint8_t *y2 = x->second_pre.y_buffer, *u2 = x->second_pre.u_buffer,
+ *v2 = x->second_pre.v_buffer;
+ int edge[4], n;
+
+ edge[0] = x->mb_to_top_edge;
+ edge[1] = x->mb_to_bottom_edge;
+ edge[2] = x->mb_to_left_edge;
+ edge[3] = x->mb_to_right_edge;
+
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+
+ x->mb_to_top_edge = edge[0] - ((y_idx * 16) << 3);
+ x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 16) << 3);
+ x->mb_to_left_edge = edge[2] - ((x_idx * 16) << 3);
+ x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 16) << 3);
+
+ x->pre.y_buffer = y1 + y_idx * 16 * x->pre.y_stride + x_idx * 16;
+ x->pre.u_buffer = u1 + y_idx * 8 * x->pre.uv_stride + x_idx * 8;
+ x->pre.v_buffer = v1 + y_idx * 8 * x->pre.uv_stride + x_idx * 8;
+
+ vp9_build_1st_inter16x16_predictors_mb(x,
+ dst_y + y_idx * 16 * dst_ystride + x_idx * 16,
+ dst_u + y_idx * 8 * dst_uvstride + x_idx * 8,
+ dst_v + y_idx * 8 * dst_uvstride + x_idx * 8,
+ dst_ystride, dst_uvstride);
+ if (x->mode_info_context->mbmi.second_ref_frame > 0) {
+ x->second_pre.y_buffer = y2 + y_idx * 16 * x->pre.y_stride + x_idx * 16;
+ x->second_pre.u_buffer = u2 + y_idx * 8 * x->pre.uv_stride + x_idx * 8;
+ x->second_pre.v_buffer = v2 + y_idx * 8 * x->pre.uv_stride + x_idx * 8;
+
+ vp9_build_2nd_inter16x16_predictors_mb(x,
+ dst_y + y_idx * 16 * dst_ystride + x_idx * 16,
+ dst_u + y_idx * 8 * dst_uvstride + x_idx * 8,
+ dst_v + y_idx * 8 * dst_uvstride + x_idx * 8,
+ dst_ystride, dst_uvstride);
+ }
+ }
+
+ x->mb_to_top_edge = edge[0];
+ x->mb_to_bottom_edge = edge[1];
+ x->mb_to_left_edge = edge[2];
+ x->mb_to_right_edge = edge[3];
+
+ x->pre.y_buffer = y1;
+ x->pre.u_buffer = u1;
+ x->pre.v_buffer = v1;
+
+ if (x->mode_info_context->mbmi.second_ref_frame > 0) {
+ x->second_pre.y_buffer = y2;
+ x->second_pre.u_buffer = u2;
+ x->second_pre.v_buffer = v2;
+ }
+
+#if CONFIG_COMP_INTERINTRA_PRED
+ if (x->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
+ vp9_build_interintra_32x32_predictors_sb(
+ x, dst_y, dst_u, dst_v, dst_ystride, dst_uvstride);
+ }
+#endif
+}
+#endif
+
+/*
+ * The following functions should be called after an initial
+ * call to vp9_build_1st_inter16x16_predictors_mb() or _mby()/_mbuv().
+ * It will run a second sixtap filter on a (different) ref
+ * frame and average the result with the output of the
+ * first sixtap filter. The second reference frame is stored
+ * in x->second_pre (the reference frame index is in
+ * x->mode_info_context->mbmi.second_ref_frame). The second
+ * motion vector is x->mode_info_context->mbmi.second_mv.
+ *
+ * This allows blending prediction from two reference frames
+ * which sometimes leads to better prediction than from a
+ * single reference framer.
+ */
+void vp9_build_2nd_inter16x16_predictors_mby(MACROBLOCKD *xd,
+ unsigned char *dst_y,
+ int dst_ystride) {
+ unsigned char *ptr;
+
+ int_mv _16x16mv;
+ int mv_row;
+ int mv_col;
+
+ unsigned char *ptr_base = xd->second_pre.y_buffer;
+ int pre_stride = xd->block[0].pre_stride;
+
+ _16x16mv.as_int = xd->mode_info_context->mbmi.mv[1].as_int;
+
+ if (xd->mode_info_context->mbmi.need_to_clamp_secondmv)
+ clamp_mv_to_umv_border(&_16x16mv.as_mv, xd);
+
+ mv_row = _16x16mv.as_mv.row;
+ mv_col = _16x16mv.as_mv.col;
+
+ ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
+
+#if CONFIG_PRED_FILTER
+ if (xd->mode_info_context->mbmi.pred_filter_enabled) {
+ if ((mv_row | mv_col) & 7) {
+ // Sub-pel filter needs extended input
+ int len = 15 + (VP9_INTERP_EXTEND << 1);
+ unsigned char Temp[32 * 32]; // Data required by sub-pel filter
+ unsigned char *pTemp = Temp + (VP9_INTERP_EXTEND - 1) * (len + 1);
+
+ // Copy extended MB into Temp array, applying the spatial filter
+ filter_mb(ptr - (VP9_INTERP_EXTEND - 1) * (pre_stride + 1), pre_stride,
+ Temp, len, len, len);
+
+ // Sub-pel filter
+ xd->subpixel_predict_avg16x16(pTemp, len, (mv_col & 7) << 1,
+ (mv_row & 7) << 1, dst_y, dst_ystride);
+ } else {
+ // TODO Needs to AVERAGE with the dst_y
+ // For now, do not apply the prediction filter in these cases!
+ vp9_avg_mem16x16(ptr, pre_stride, dst_y, dst_ystride);
+ }
+ } else
+#endif // CONFIG_PRED_FILTER
+ {
+ if ((mv_row | mv_col) & 7) {
+ xd->subpixel_predict_avg16x16(ptr, pre_stride, (mv_col & 7) << 1,
+ (mv_row & 7) << 1, dst_y, dst_ystride);
+ } else {
+ vp9_avg_mem16x16(ptr, pre_stride, dst_y, dst_ystride);
+ }
+ }
+}
+
+void vp9_build_2nd_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
+ unsigned char *dst_u,
+ unsigned char *dst_v,
+ int dst_uvstride) {
+ int offset;
+ unsigned char *uptr, *vptr;
+
+ int_mv _16x16mv;
+ int mv_row;
+ int mv_col;
+ int omv_row, omv_col;
+
+ int pre_stride = xd->block[0].pre_stride;
+
+ _16x16mv.as_int = xd->mode_info_context->mbmi.mv[1].as_int;
+
+ if (xd->mode_info_context->mbmi.need_to_clamp_secondmv)
+ clamp_mv_to_umv_border(&_16x16mv.as_mv, xd);
+
+ mv_row = _16x16mv.as_mv.row;
+ mv_col = _16x16mv.as_mv.col;
+
+ /* calc uv motion vectors */
+ omv_row = mv_row;
+ omv_col = mv_col;
+ mv_row = (mv_row + (mv_row > 0)) >> 1;
+ mv_col = (mv_col + (mv_col > 0)) >> 1;
+
+ mv_row &= xd->fullpixel_mask;
+ mv_col &= xd->fullpixel_mask;
+
+ pre_stride >>= 1;
+ offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
+ uptr = xd->second_pre.u_buffer + offset;
+ vptr = xd->second_pre.v_buffer + offset;
+
+#if CONFIG_PRED_FILTER
+ if (xd->mode_info_context->mbmi.pred_filter_enabled) {
+ int i;
+ int len = 7 + (VP9_INTERP_EXTEND << 1);
+ unsigned char Temp[32 * 32]; // Data required by sub-pel filter
+ unsigned char *pTemp = Temp + (VP9_INTERP_EXTEND - 1) * (len + 1);
+ unsigned char *pSrc = uptr;
+ unsigned char *pDst = dst_u;
+
+ // U & V
+ for (i = 0; i < 2; i++) {
+ if ((omv_row | omv_col) & 15) {
+ // Copy extended MB into Temp array, applying the spatial filter
+ filter_mb(pSrc - (VP9_INTERP_EXTEND - 1) * (pre_stride + 1), pre_stride,
+ Temp, len, len, len);
+
+ // Sub-pel filter
+ xd->subpixel_predict_avg8x8(pTemp, len, omv_col & 15,
+ omv_row & 15, pDst, dst_uvstride);
+ } else {
+ // TODO Needs to AVERAGE with the dst_[u|v]
+ // For now, do not apply the prediction filter here!
+ vp9_avg_mem8x8(pSrc, pre_stride, pDst, dst_uvstride);
+ }
+
+ // V
+ pSrc = vptr;
+ pDst = dst_v;
+ }
+ } else
+#endif // CONFIG_PRED_FILTER
+ if ((omv_row | omv_col) & 15) {
+ xd->subpixel_predict_avg8x8(uptr, pre_stride, omv_col & 15,
+ omv_row & 15, dst_u, dst_uvstride);
+ xd->subpixel_predict_avg8x8(vptr, pre_stride, omv_col & 15,
+ omv_row & 15, dst_v, dst_uvstride);
+ } else {
+ vp9_avg_mem8x8(uptr, pre_stride, dst_u, dst_uvstride);
+ vp9_avg_mem8x8(vptr, pre_stride, dst_v, dst_uvstride);
+ }
+}
+
+void vp9_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *xd,
+ unsigned char *dst_y,
+ unsigned char *dst_u,
+ unsigned char *dst_v,
+ int dst_ystride,
+ int dst_uvstride) {
+ vp9_build_2nd_inter16x16_predictors_mby(xd, dst_y, dst_ystride);
+ vp9_build_2nd_inter16x16_predictors_mbuv(xd, dst_u, dst_v, dst_uvstride);
+}
+
+static void build_inter4x4_predictors_mb(MACROBLOCKD *xd) {
+ int i;
+ MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi;
+ BLOCKD *blockd = xd->block;
+
+ if (xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4) {
+ blockd[ 0].bmi = xd->mode_info_context->bmi[ 0];
+ blockd[ 2].bmi = xd->mode_info_context->bmi[ 2];
+ blockd[ 8].bmi = xd->mode_info_context->bmi[ 8];
+ blockd[10].bmi = xd->mode_info_context->bmi[10];
+
+ if (mbmi->need_to_clamp_mvs) {
+ clamp_mv_to_umv_border(&blockd[ 0].bmi.as_mv.first.as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[ 2].bmi.as_mv.first.as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[ 8].bmi.as_mv.first.as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[10].bmi.as_mv.first.as_mv, xd);
+ if (mbmi->second_ref_frame > 0) {
+ clamp_mv_to_umv_border(&blockd[ 0].bmi.as_mv.second.as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[ 2].bmi.as_mv.second.as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[ 8].bmi.as_mv.second.as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[10].bmi.as_mv.second.as_mv, xd);
+ }
+ }
+
+
+ vp9_build_inter_predictors4b(xd, &blockd[ 0], 16);
+ vp9_build_inter_predictors4b(xd, &blockd[ 2], 16);
+ vp9_build_inter_predictors4b(xd, &blockd[ 8], 16);
+ vp9_build_inter_predictors4b(xd, &blockd[10], 16);
+
+ if (mbmi->second_ref_frame > 0) {
+ vp9_build_2nd_inter_predictors4b(xd, &blockd[ 0], 16);
+ vp9_build_2nd_inter_predictors4b(xd, &blockd[ 2], 16);
+ vp9_build_2nd_inter_predictors4b(xd, &blockd[ 8], 16);
+ vp9_build_2nd_inter_predictors4b(xd, &blockd[10], 16);
+ }
+ } else {
+ for (i = 0; i < 16; i += 2) {
+ BLOCKD *d0 = &blockd[i];
+ BLOCKD *d1 = &blockd[i + 1];
+
+ blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0];
+ blockd[i + 1].bmi = xd->mode_info_context->bmi[i + 1];
+
+ if (mbmi->need_to_clamp_mvs) {
+ clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv.first.as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[i + 1].bmi.as_mv.first.as_mv, xd);
+ if (mbmi->second_ref_frame > 0) {
+ clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv.second.as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[i + 1].bmi.as_mv.second.as_mv, xd);
+ }
+ }
+
+ if (d0->bmi.as_mv.first.as_int == d1->bmi.as_mv.first.as_int)
+ build_inter_predictors2b(xd, d0, 16);
+ else {
+ vp9_build_inter_predictors_b(d0, 16, xd->subpixel_predict);
+ vp9_build_inter_predictors_b(d1, 16, xd->subpixel_predict);
+ }
+
+ if (mbmi->second_ref_frame > 0) {
+ vp9_build_2nd_inter_predictors_b(d0, 16, xd->subpixel_predict_avg);
+ vp9_build_2nd_inter_predictors_b(d1, 16, xd->subpixel_predict_avg);
+ }
+ }
+ }
+
+ for (i = 16; i < 24; i += 2) {
+ BLOCKD *d0 = &blockd[i];
+ BLOCKD *d1 = &blockd[i + 1];
+
+ if (d0->bmi.as_mv.first.as_int == d1->bmi.as_mv.first.as_int)
+ build_inter_predictors2b(xd, d0, 8);
+ else {
+ vp9_build_inter_predictors_b(d0, 8, xd->subpixel_predict);
+ vp9_build_inter_predictors_b(d1, 8, xd->subpixel_predict);
+ }
+
+ if (mbmi->second_ref_frame > 0) {
+ vp9_build_2nd_inter_predictors_b(d0, 8, xd->subpixel_predict_avg);
+ vp9_build_2nd_inter_predictors_b(d1, 8, xd->subpixel_predict_avg);
+ }
+ }
+}
+
+static
+void build_4x4uvmvs(MACROBLOCKD *xd) {
+ int i, j;
+ BLOCKD *blockd = xd->block;
+
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < 2; j++) {
+ int yoffset = i * 8 + j * 2;
+ int uoffset = 16 + i * 2 + j;
+ int voffset = 20 + i * 2 + j;
+
+ int temp;
+
+ temp = xd->mode_info_context->bmi[yoffset + 0].as_mv.first.as_mv.row
+ + xd->mode_info_context->bmi[yoffset + 1].as_mv.first.as_mv.row
+ + xd->mode_info_context->bmi[yoffset + 4].as_mv.first.as_mv.row
+ + xd->mode_info_context->bmi[yoffset + 5].as_mv.first.as_mv.row;
+
+ if (temp < 0) temp -= 4;
+ else temp += 4;
+
+ blockd[uoffset].bmi.as_mv.first.as_mv.row = (temp / 8) &
+ xd->fullpixel_mask;
+
+ temp = xd->mode_info_context->bmi[yoffset + 0].as_mv.first.as_mv.col
+ + xd->mode_info_context->bmi[yoffset + 1].as_mv.first.as_mv.col
+ + xd->mode_info_context->bmi[yoffset + 4].as_mv.first.as_mv.col
+ + xd->mode_info_context->bmi[yoffset + 5].as_mv.first.as_mv.col;
+
+ if (temp < 0) temp -= 4;
+ else temp += 4;
+
+ blockd[uoffset].bmi.as_mv.first.as_mv.col = (temp / 8) &
+ xd->fullpixel_mask;
+
+ // if (x->mode_info_context->mbmi.need_to_clamp_mvs)
+ clamp_uvmv_to_umv_border(&blockd[uoffset].bmi.as_mv.first.as_mv, xd);
+
+ // if (x->mode_info_context->mbmi.need_to_clamp_mvs)
+ clamp_uvmv_to_umv_border(&blockd[uoffset].bmi.as_mv.first.as_mv, xd);
+
+ blockd[voffset].bmi.as_mv.first.as_mv.row =
+ blockd[uoffset].bmi.as_mv.first.as_mv.row;
+ blockd[voffset].bmi.as_mv.first.as_mv.col =
+ blockd[uoffset].bmi.as_mv.first.as_mv.col;
+
+ if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
+ temp = xd->mode_info_context->bmi[yoffset + 0].as_mv.second.as_mv.row
+ + xd->mode_info_context->bmi[yoffset + 1].as_mv.second.as_mv.row
+ + xd->mode_info_context->bmi[yoffset + 4].as_mv.second.as_mv.row
+ + xd->mode_info_context->bmi[yoffset + 5].as_mv.second.as_mv.row;
+
+ if (temp < 0) {
+ temp -= 4;
+ } else {
+ temp += 4;
+ }
+
+ blockd[uoffset].bmi.as_mv.second.as_mv.row = (temp / 8) &
+ xd->fullpixel_mask;
+
+ temp = xd->mode_info_context->bmi[yoffset + 0].as_mv.second.as_mv.col
+ + xd->mode_info_context->bmi[yoffset + 1].as_mv.second.as_mv.col
+ + xd->mode_info_context->bmi[yoffset + 4].as_mv.second.as_mv.col
+ + xd->mode_info_context->bmi[yoffset + 5].as_mv.second.as_mv.col;
+
+ if (temp < 0) {
+ temp -= 4;
+ } else {
+ temp += 4;
+ }
+
+ blockd[uoffset].bmi.as_mv.second.as_mv.col = (temp / 8) &
+ xd->fullpixel_mask;
+
+ // if (mbmi->need_to_clamp_mvs)
+ clamp_uvmv_to_umv_border(
+ &blockd[uoffset].bmi.as_mv.second.as_mv, xd);
+
+ // if (mbmi->need_to_clamp_mvs)
+ clamp_uvmv_to_umv_border(
+ &blockd[uoffset].bmi.as_mv.second.as_mv, xd);
+
+ blockd[voffset].bmi.as_mv.second.as_mv.row =
+ blockd[uoffset].bmi.as_mv.second.as_mv.row;
+ blockd[voffset].bmi.as_mv.second.as_mv.col =
+ blockd[uoffset].bmi.as_mv.second.as_mv.col;
+ }
+ }
+ }
+}
+
+void vp9_build_inter_predictors_mb(MACROBLOCKD *xd) {
+ if (xd->mode_info_context->mbmi.mode != SPLITMV) {
+ vp9_build_1st_inter16x16_predictors_mb(xd, xd->predictor,
+ &xd->predictor[256],
+ &xd->predictor[320], 16, 8);
+
+ if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
+ /* 256 = offset of U plane in Y+U+V buffer;
+ * 320 = offset of V plane in Y+U+V buffer.
+ * (256=16x16, 320=16x16+8x8). */
+ vp9_build_2nd_inter16x16_predictors_mb(xd, xd->predictor,
+ &xd->predictor[256],
+ &xd->predictor[320], 16, 8);
+ }
+#if CONFIG_COMP_INTERINTRA_PRED
+ else if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
+ vp9_build_interintra_16x16_predictors_mb(xd, xd->predictor,
+ &xd->predictor[256],
+ &xd->predictor[320], 16, 8);
+ }
+#endif
+ } else {
+ build_4x4uvmvs(xd);
+ build_inter4x4_predictors_mb(xd);
+ }
+}
diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h
new file mode 100644
index 0000000..f3292f1
--- /dev/null
+++ b/vp9/common/vp9_reconinter.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_COMMON_VP9_RECONINTER_H_
+#define VP9_COMMON_VP9_RECONINTER_H_
+
+#include "vp9/common/vp9_onyxc_int.h"
+
+extern void vp9_build_1st_inter16x16_predictors_mby(MACROBLOCKD *xd,
+ unsigned char *dst_y,
+ int dst_ystride,
+ int clamp_mvs);
+
+extern void vp9_build_1st_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
+ unsigned char *dst_u,
+ unsigned char *dst_v,
+ int dst_uvstride);
+
+extern void vp9_build_1st_inter16x16_predictors_mb(MACROBLOCKD *xd,
+ unsigned char *dst_y,
+ unsigned char *dst_u,
+ unsigned char *dst_v,
+ int dst_ystride,
+ int dst_uvstride);
+
+extern void vp9_build_2nd_inter16x16_predictors_mby(MACROBLOCKD *xd,
+ unsigned char *dst_y,
+ int dst_ystride);
+
+extern void vp9_build_2nd_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
+ unsigned char *dst_u,
+ unsigned char *dst_v,
+ int dst_uvstride);
+
+extern void vp9_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *xd,
+ unsigned char *dst_y,
+ unsigned char *dst_u,
+ unsigned char *dst_v,
+ int dst_ystride,
+ int dst_uvstride);
+
+#if CONFIG_SUPERBLOCKS
+extern void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x,
+ unsigned char *dst_y,
+ unsigned char *dst_u,
+ unsigned char *dst_v,
+ int dst_ystride,
+ int dst_uvstride);
+#endif
+
+extern void vp9_build_inter_predictors_mb(MACROBLOCKD *xd);
+
+extern void vp9_build_inter_predictors_b(BLOCKD *d, int pitch,
+ vp9_subpix_fn_t sppf);
+
+extern void vp9_build_2nd_inter_predictors_b(BLOCKD *d, int pitch,
+ vp9_subpix_fn_t sppf);
+
+extern void vp9_build_inter_predictors4b(MACROBLOCKD *xd, BLOCKD *d,
+ int pitch);
+
+extern void vp9_build_2nd_inter_predictors4b(MACROBLOCKD *xd,
+ BLOCKD *d, int pitch);
+
+extern void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd);
+
+extern void vp9_setup_interp_filters(MACROBLOCKD *xd,
+ INTERPOLATIONFILTERTYPE filter,
+ VP9_COMMON *cm);
+
+#endif // __INC_RECONINTER_H
diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c
new file mode 100644
index 0000000..73e29ce
--- /dev/null
+++ b/vp9/common/vp9_reconintra.c
@@ -0,0 +1,863 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdio.h>
+#include "./vpx_config.h"
+#include "vp9_rtcd.h"
+#include "vp9/common/vp9_reconintra.h"
+#include "vpx_mem/vpx_mem.h"
+
+/* For skip_recon_mb(), add vp9_build_intra_predictors_mby_s(MACROBLOCKD *xd)
+ * and vp9_build_intra_predictors_mbuv_s(MACROBLOCKD *xd).
+ */
+
+static void d27_predictor(uint8_t *ypred_ptr, int y_stride, int n,
+ uint8_t *yabove_row, uint8_t *yleft_col) {
+ int r, c, h, w, v;
+ int a, b;
+ r = 0;
+ for (c = 0; c < n - 2; c++) {
+ if (c & 1)
+ a = yleft_col[r + 1];
+ else
+ a = (yleft_col[r] + yleft_col[r + 1] + 1) >> 1;
+ b = yabove_row[c + 2];
+ ypred_ptr[c] = (2 * a + (c + 1) * b + (c + 3) / 2) / (c + 3);
+ }
+ for (r = 1; r < n / 2 - 1; r++) {
+ for (c = 0; c < n - 2 - 2 * r; c++) {
+ if (c & 1)
+ a = yleft_col[r + 1];
+ else
+ a = (yleft_col[r] + yleft_col[r + 1] + 1) >> 1;
+ b = ypred_ptr[(r - 1) * y_stride + c + 2];
+ ypred_ptr[r * y_stride + c] = (2 * a + (c + 1) * b + (c + 3) / 2) / (c + 3);
+ }
+ }
+ for (; r < n - 1; ++r) {
+ for (c = 0; c < n; c++) {
+ v = (c & 1 ? yleft_col[r + 1] : (yleft_col[r] + yleft_col[r + 1] + 1) >> 1);
+ h = r - c / 2;
+ ypred_ptr[h * y_stride + c] = v;
+ }
+ }
+ c = 0;
+ r = n - 1;
+ ypred_ptr[r * y_stride] = (ypred_ptr[(r - 1) * y_stride] +
+ yleft_col[r] + 1) >> 1;
+ for (r = n - 2; r >= n / 2; --r) {
+ w = c + (n - 1 - r) * 2;
+ ypred_ptr[r * y_stride + w] = (ypred_ptr[(r - 1) * y_stride + w] +
+ ypred_ptr[r * y_stride + w - 1] + 1) >> 1;
+ }
+ for (c = 1; c < n; c++) {
+ for (r = n - 1; r >= n / 2 + c / 2; --r) {
+ w = c + (n - 1 - r) * 2;
+ ypred_ptr[r * y_stride + w] = (ypred_ptr[(r - 1) * y_stride + w] +
+ ypred_ptr[r * y_stride + w - 1] + 1) >> 1;
+ }
+ }
+}
+
+static void d63_predictor(uint8_t *ypred_ptr, int y_stride, int n,
+ uint8_t *yabove_row, uint8_t *yleft_col) {
+ int r, c, h, w, v;
+ int a, b;
+ c = 0;
+ for (r = 0; r < n - 2; r++) {
+ if (r & 1)
+ a = yabove_row[c + 1];
+ else
+ a = (yabove_row[c] + yabove_row[c + 1] + 1) >> 1;
+ b = yleft_col[r + 2];
+ ypred_ptr[r * y_stride] = (2 * a + (r + 1) * b + (r + 3) / 2) / (r + 3);
+ }
+ for (c = 1; c < n / 2 - 1; c++) {
+ for (r = 0; r < n - 2 - 2 * c; r++) {
+ if (r & 1)
+ a = yabove_row[c + 1];
+ else
+ a = (yabove_row[c] + yabove_row[c + 1] + 1) >> 1;
+ b = ypred_ptr[(r + 2) * y_stride + c - 1];
+ ypred_ptr[r * y_stride + c] = (2 * a + (c + 1) * b + (c + 3) / 2) / (c + 3);
+ }
+ }
+ for (; c < n - 1; ++c) {
+ for (r = 0; r < n; r++) {
+ v = (r & 1 ? yabove_row[c + 1] : (yabove_row[c] + yabove_row[c + 1] + 1) >> 1);
+ w = c - r / 2;
+ ypred_ptr[r * y_stride + w] = v;
+ }
+ }
+ r = 0;
+ c = n - 1;
+ ypred_ptr[c] = (ypred_ptr[(c - 1)] + yabove_row[c] + 1) >> 1;
+ for (c = n - 2; c >= n / 2; --c) {
+ h = r + (n - 1 - c) * 2;
+ ypred_ptr[h * y_stride + c] = (ypred_ptr[h * y_stride + c - 1] +
+ ypred_ptr[(h - 1) * y_stride + c] + 1) >> 1;
+ }
+ for (r = 1; r < n; r++) {
+ for (c = n - 1; c >= n / 2 + r / 2; --c) {
+ h = r + (n - 1 - c) * 2;
+ ypred_ptr[h * y_stride + c] = (ypred_ptr[h * y_stride + c - 1] +
+ ypred_ptr[(h - 1) * y_stride + c] + 1) >> 1;
+ }
+ }
+}
+
+static void d45_predictor(uint8_t *ypred_ptr, int y_stride, int n,
+ uint8_t *yabove_row, uint8_t *yleft_col) {
+ int r, c;
+ for (r = 0; r < n - 1; ++r) {
+ for (c = 0; c <= r; ++c) {
+ ypred_ptr[(r - c) * y_stride + c] =
+ (yabove_row[r + 1] * (c + 1) +
+ yleft_col[r + 1] * (r - c + 1) + r / 2 + 1) / (r + 2);
+ }
+ }
+ for (c = 0; c <= r; ++c) {
+ int yabove_ext = yabove_row[r]; // 2*yabove_row[r] - yabove_row[r-1];
+ int yleft_ext = yleft_col[r]; // 2*yleft_col[r] - yleft_col[r-1];
+ yabove_ext = (yabove_ext > 255 ? 255 : (yabove_ext < 0 ? 0 : yabove_ext));
+ yleft_ext = (yleft_ext > 255 ? 255 : (yleft_ext < 0 ? 0 : yleft_ext));
+ ypred_ptr[(r - c) * y_stride + c] =
+ (yabove_ext * (c + 1) +
+ yleft_ext * (r - c + 1) + r / 2 + 1) / (r + 2);
+ }
+ for (r = 1; r < n; ++r) {
+ for (c = n - r; c < n; ++c)
+ ypred_ptr[r * y_stride + c] = (ypred_ptr[(r - 1) * y_stride + c] +
+ ypred_ptr[r * y_stride + c - 1] + 1) >> 1;
+ }
+}
+
+static void d117_predictor(uint8_t *ypred_ptr, int y_stride, int n,
+ uint8_t *yabove_row, uint8_t *yleft_col) {
+ int r, c;
+ for (c = 0; c < n; c++)
+ ypred_ptr[c] = (yabove_row[c - 1] + yabove_row[c] + 1) >> 1;
+ ypred_ptr += y_stride;
+ for (c = 0; c < n; c++)
+ ypred_ptr[c] = yabove_row[c - 1];
+ ypred_ptr += y_stride;
+ for (r = 2; r < n; ++r) {
+ ypred_ptr[0] = yleft_col[r - 2];
+ for (c = 1; c < n; c++)
+ ypred_ptr[c] = ypred_ptr[-2 * y_stride + c - 1];
+ ypred_ptr += y_stride;
+ }
+}
+
+static void d135_predictor(uint8_t *ypred_ptr, int y_stride, int n,
+ uint8_t *yabove_row, uint8_t *yleft_col) {
+ int r, c;
+ ypred_ptr[0] = yabove_row[-1];
+ for (c = 1; c < n; c++)
+ ypred_ptr[c] = yabove_row[c - 1];
+ for (r = 1; r < n; ++r)
+ ypred_ptr[r * y_stride] = yleft_col[r - 1];
+
+ ypred_ptr += y_stride;
+ for (r = 1; r < n; ++r) {
+ for (c = 1; c < n; c++) {
+ ypred_ptr[c] = ypred_ptr[-y_stride + c - 1];
+ }
+ ypred_ptr += y_stride;
+ }
+}
+
+static void d153_predictor(uint8_t *ypred_ptr, int y_stride, int n,
+ uint8_t *yabove_row, uint8_t *yleft_col) {
+ int r, c;
+ ypred_ptr[0] = (yabove_row[-1] + yleft_col[0] + 1) >> 1;
+ for (r = 1; r < n; r++)
+ ypred_ptr[r * y_stride] = (yleft_col[r - 1] + yleft_col[r] + 1) >> 1;
+ ypred_ptr++;
+ ypred_ptr[0] = yabove_row[-1];
+ for (r = 1; r < n; r++)
+ ypred_ptr[r * y_stride] = yleft_col[r - 1];
+ ypred_ptr++;
+
+ for (c = 0; c < n - 2; c++)
+ ypred_ptr[c] = yabove_row[c];
+ ypred_ptr += y_stride;
+ for (r = 1; r < n; ++r) {
+ for (c = 0; c < n - 2; c++)
+ ypred_ptr[c] = ypred_ptr[-y_stride + c - 2];
+ ypred_ptr += y_stride;
+ }
+}
+
+static void corner_predictor(unsigned char *ypred_ptr, int y_stride, int n,
+ unsigned char *yabove_row,
+ unsigned char *yleft_col) {
+ int mh, mv, maxgradh, maxgradv, x, y, nx, ny;
+ int i, j;
+ int top_left = yabove_row[-1];
+ mh = mv = 0;
+ maxgradh = yabove_row[1] - top_left;
+ maxgradv = yleft_col[1] - top_left;
+ for (i = 2; i < n; ++i) {
+ int gh = yabove_row[i] - yabove_row[i - 2];
+ int gv = yleft_col[i] - yleft_col[i - 2];
+ if (gh > maxgradh) {
+ maxgradh = gh;
+ mh = i - 1;
+ }
+ if (gv > maxgradv) {
+ maxgradv = gv;
+ mv = i - 1;
+ }
+ }
+ nx = mh + mv + 3;
+ ny = 2 * n + 1 - nx;
+
+ x = top_left;
+ for (i = 0; i <= mh; ++i) x += yabove_row[i];
+ for (i = 0; i <= mv; ++i) x += yleft_col[i];
+ x += (nx >> 1);
+ x /= nx;
+ y = 0;
+ for (i = mh + 1; i < n; ++i) y += yabove_row[i];
+ for (i = mv + 1; i < n; ++i) y += yleft_col[i];
+ y += (ny >> 1);
+ y /= ny;
+
+ for (i = 0; i < n; ++i) {
+ for (j = 0; j < n; ++j)
+ ypred_ptr[j] = (i <= mh && j <= mv ? x : y);
+ ypred_ptr += y_stride;
+ }
+}
+
+void vp9_recon_intra_mbuv(MACROBLOCKD *xd) {
+ int i;
+ for (i = 16; i < 24; i += 2) {
+ BLOCKD *b = &xd->block[i];
+ vp9_recon2b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ }
+}
+
+void vp9_build_intra_predictors_internal(unsigned char *src, int src_stride,
+ unsigned char *ypred_ptr,
+ int y_stride, int mode, int bsize,
+ int up_available, int left_available) {
+
+ unsigned char *yabove_row = src - src_stride;
+ unsigned char yleft_col[32];
+ unsigned char ytop_left = yabove_row[-1];
+ int r, c, i;
+
+ for (i = 0; i < bsize; i++) {
+ yleft_col[i] = src[i * src_stride - 1];
+ }
+
+ /* for Y */
+ switch (mode) {
+ case DC_PRED: {
+ int expected_dc;
+ int i;
+ int shift;
+ int average = 0;
+ int log2_bsize_minus_1;
+
+ assert(bsize == 4 || bsize == 8 || bsize == 16 || bsize == 32);
+ if (bsize == 4) {
+ log2_bsize_minus_1 = 1;
+ } else if (bsize == 8) {
+ log2_bsize_minus_1 = 2;
+ } else if (bsize == 16) {
+ log2_bsize_minus_1 = 3;
+ } else /* bsize == 32 */ {
+ log2_bsize_minus_1 = 4;
+ }
+
+ if (up_available || left_available) {
+ if (up_available) {
+ for (i = 0; i < bsize; i++) {
+ average += yabove_row[i];
+ }
+ }
+
+ if (left_available) {
+ for (i = 0; i < bsize; i++) {
+ average += yleft_col[i];
+ }
+ }
+ shift = log2_bsize_minus_1 + up_available + left_available;
+ expected_dc = (average + (1 << (shift - 1))) >> shift;
+ } else {
+ expected_dc = 128;
+ }
+
+ for (r = 0; r < bsize; r++) {
+ vpx_memset(ypred_ptr, expected_dc, bsize);
+ ypred_ptr += y_stride;
+ }
+ }
+ break;
+ case V_PRED: {
+ for (r = 0; r < bsize; r++) {
+ memcpy(ypred_ptr, yabove_row, bsize);
+ ypred_ptr += y_stride;
+ }
+ }
+ break;
+ case H_PRED: {
+ for (r = 0; r < bsize; r++) {
+ vpx_memset(ypred_ptr, yleft_col[r], bsize);
+ ypred_ptr += y_stride;
+ }
+ }
+ break;
+ case TM_PRED: {
+ for (r = 0; r < bsize; r++) {
+ for (c = 0; c < bsize; c++) {
+ int pred = yleft_col[r] + yabove_row[ c] - ytop_left;
+
+ if (pred < 0)
+ pred = 0;
+
+ if (pred > 255)
+ pred = 255;
+
+ ypred_ptr[c] = pred;
+ }
+
+ ypred_ptr += y_stride;
+ }
+ }
+ break;
+ case D45_PRED: {
+ d45_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col);
+ }
+ break;
+ case D135_PRED: {
+ d135_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col);
+ }
+ break;
+ case D117_PRED: {
+ d117_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col);
+ }
+ break;
+ case D153_PRED: {
+ d153_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col);
+ }
+ break;
+ case D27_PRED: {
+ d27_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col);
+ }
+ break;
+ case D63_PRED: {
+ d63_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col);
+ }
+ break;
+ case I8X8_PRED:
+ case B_PRED:
+ case NEARESTMV:
+ case NEARMV:
+ case ZEROMV:
+ case NEWMV:
+ case SPLITMV:
+ case MB_MODE_COUNT:
+ break;
+ }
+}
+
+#if CONFIG_COMP_INTERINTRA_PRED
+static void combine_interintra(MB_PREDICTION_MODE mode,
+ unsigned char *interpred,
+ int interstride,
+ unsigned char *intrapred,
+ int intrastride,
+ int size) {
+ // TODO(debargha): Explore different ways of combining predictors
+ // or designing the tables below
+ static const int scale_bits = 8;
+ static const int scale_max = 256; // 1 << scale_bits;
+ static const int scale_round = 127; // (1 << (scale_bits - 1));
+ // This table is a function A + B*exp(-kx), where x is hor. index
+ static const int weights1d[32] = {
+ 128, 122, 116, 111, 107, 103, 99, 96,
+ 93, 90, 88, 85, 83, 81, 80, 78,
+ 77, 76, 75, 74, 73, 72, 71, 70,
+ 70, 69, 69, 68, 68, 68, 67, 67,
+ };
+ // This table is a function A + B*exp(-k.sqrt(xy)), where x, y are
+ // hor. and vert. indices
+ static const int weights2d[1024] = {
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 122, 120, 118, 116, 115, 114, 113,
+ 112, 111, 111, 110, 109, 109, 108, 107,
+ 107, 106, 106, 105, 105, 104, 104, 104,
+ 103, 103, 102, 102, 102, 101, 101, 101,
+ 128, 120, 116, 114, 112, 111, 109, 108,
+ 107, 106, 105, 104, 103, 102, 102, 101,
+ 100, 100, 99, 99, 98, 97, 97, 96,
+ 96, 96, 95, 95, 94, 94, 93, 93,
+ 128, 118, 114, 111, 109, 107, 106, 104,
+ 103, 102, 101, 100, 99, 98, 97, 97,
+ 96, 95, 95, 94, 93, 93, 92, 92,
+ 91, 91, 90, 90, 90, 89, 89, 88,
+ 128, 116, 112, 109, 107, 105, 103, 102,
+ 100, 99, 98, 97, 96, 95, 94, 93,
+ 93, 92, 91, 91, 90, 90, 89, 89,
+ 88, 88, 87, 87, 86, 86, 85, 85,
+ 128, 115, 111, 107, 105, 103, 101, 99,
+ 98, 97, 96, 94, 93, 93, 92, 91,
+ 90, 89, 89, 88, 88, 87, 86, 86,
+ 85, 85, 84, 84, 84, 83, 83, 82,
+ 128, 114, 109, 106, 103, 101, 99, 97,
+ 96, 95, 93, 92, 91, 90, 90, 89,
+ 88, 87, 87, 86, 85, 85, 84, 84,
+ 83, 83, 82, 82, 82, 81, 81, 80,
+ 128, 113, 108, 104, 102, 99, 97, 96,
+ 94, 93, 92, 91, 90, 89, 88, 87,
+ 86, 85, 85, 84, 84, 83, 83, 82,
+ 82, 81, 81, 80, 80, 79, 79, 79,
+ 128, 112, 107, 103, 100, 98, 96, 94,
+ 93, 91, 90, 89, 88, 87, 86, 85,
+ 85, 84, 83, 83, 82, 82, 81, 80,
+ 80, 80, 79, 79, 78, 78, 78, 77,
+ 128, 111, 106, 102, 99, 97, 95, 93,
+ 91, 90, 89, 88, 87, 86, 85, 84,
+ 83, 83, 82, 81, 81, 80, 80, 79,
+ 79, 78, 78, 77, 77, 77, 76, 76,
+ 128, 111, 105, 101, 98, 96, 93, 92,
+ 90, 89, 88, 86, 85, 84, 84, 83,
+ 82, 81, 81, 80, 80, 79, 79, 78,
+ 78, 77, 77, 76, 76, 76, 75, 75,
+ 128, 110, 104, 100, 97, 94, 92, 91,
+ 89, 88, 86, 85, 84, 83, 83, 82,
+ 81, 80, 80, 79, 79, 78, 78, 77,
+ 77, 76, 76, 75, 75, 75, 74, 74,
+ 128, 109, 103, 99, 96, 93, 91, 90,
+ 88, 87, 85, 84, 83, 82, 82, 81,
+ 80, 79, 79, 78, 78, 77, 77, 76,
+ 76, 75, 75, 75, 74, 74, 74, 73,
+ 128, 109, 102, 98, 95, 93, 90, 89,
+ 87, 86, 84, 83, 82, 81, 81, 80,
+ 79, 78, 78, 77, 77, 76, 76, 75,
+ 75, 75, 74, 74, 73, 73, 73, 73,
+ 128, 108, 102, 97, 94, 92, 90, 88,
+ 86, 85, 84, 83, 82, 81, 80, 79,
+ 78, 78, 77, 77, 76, 76, 75, 75,
+ 74, 74, 73, 73, 73, 73, 72, 72,
+ 128, 107, 101, 97, 93, 91, 89, 87,
+ 85, 84, 83, 82, 81, 80, 79, 78,
+ 78, 77, 76, 76, 75, 75, 74, 74,
+ 74, 73, 73, 73, 72, 72, 72, 71,
+ 128, 107, 100, 96, 93, 90, 88, 86,
+ 85, 83, 82, 81, 80, 79, 78, 78,
+ 77, 76, 76, 75, 75, 74, 74, 73,
+ 73, 73, 72, 72, 72, 71, 71, 71,
+ 128, 106, 100, 95, 92, 89, 87, 85,
+ 84, 83, 81, 80, 79, 78, 78, 77,
+ 76, 76, 75, 75, 74, 74, 73, 73,
+ 72, 72, 72, 72, 71, 71, 71, 70,
+ 128, 106, 99, 95, 91, 89, 87, 85,
+ 83, 82, 81, 80, 79, 78, 77, 76,
+ 76, 75, 75, 74, 74, 73, 73, 72,
+ 72, 72, 71, 71, 71, 71, 70, 70,
+ 128, 105, 99, 94, 91, 88, 86, 84,
+ 83, 81, 80, 79, 78, 77, 77, 76,
+ 75, 75, 74, 74, 73, 73, 72, 72,
+ 72, 71, 71, 71, 70, 70, 70, 70,
+ 128, 105, 98, 93, 90, 88, 85, 84,
+ 82, 81, 80, 79, 78, 77, 76, 75,
+ 75, 74, 74, 73, 73, 72, 72, 71,
+ 71, 71, 71, 70, 70, 70, 70, 69,
+ 128, 104, 97, 93, 90, 87, 85, 83,
+ 82, 80, 79, 78, 77, 76, 76, 75,
+ 74, 74, 73, 73, 72, 72, 71, 71,
+ 71, 70, 70, 70, 70, 69, 69, 69,
+ 128, 104, 97, 92, 89, 86, 84, 83,
+ 81, 80, 79, 78, 77, 76, 75, 74,
+ 74, 73, 73, 72, 72, 71, 71, 71,
+ 70, 70, 70, 70, 69, 69, 69, 69,
+ 128, 104, 96, 92, 89, 86, 84, 82,
+ 80, 79, 78, 77, 76, 75, 75, 74,
+ 73, 73, 72, 72, 71, 71, 71, 70,
+ 70, 70, 70, 69, 69, 69, 69, 68,
+ 128, 103, 96, 91, 88, 85, 83, 82,
+ 80, 79, 78, 77, 76, 75, 74, 74,
+ 73, 72, 72, 72, 71, 71, 70, 70,
+ 70, 70, 69, 69, 69, 69, 68, 68,
+ 128, 103, 96, 91, 88, 85, 83, 81,
+ 80, 78, 77, 76, 75, 75, 74, 73,
+ 73, 72, 72, 71, 71, 70, 70, 70,
+ 70, 69, 69, 69, 69, 68, 68, 68,
+ 128, 102, 95, 90, 87, 84, 82, 81,
+ 79, 78, 77, 76, 75, 74, 73, 73,
+ 72, 72, 71, 71, 71, 70, 70, 70,
+ 69, 69, 69, 69, 68, 68, 68, 68,
+ 128, 102, 95, 90, 87, 84, 82, 80,
+ 79, 77, 76, 75, 75, 74, 73, 73,
+ 72, 72, 71, 71, 70, 70, 70, 69,
+ 69, 69, 69, 68, 68, 68, 68, 68,
+ 128, 102, 94, 90, 86, 84, 82, 80,
+ 78, 77, 76, 75, 74, 73, 73, 72,
+ 72, 71, 71, 70, 70, 70, 69, 69,
+ 69, 69, 68, 68, 68, 68, 68, 67,
+ 128, 101, 94, 89, 86, 83, 81, 79,
+ 78, 77, 76, 75, 74, 73, 73, 72,
+ 71, 71, 71, 70, 70, 69, 69, 69,
+ 69, 68, 68, 68, 68, 68, 67, 67,
+ 128, 101, 93, 89, 85, 83, 81, 79,
+ 78, 76, 75, 74, 74, 73, 72, 72,
+ 71, 71, 70, 70, 70, 69, 69, 69,
+ 68, 68, 68, 68, 68, 67, 67, 67,
+ 128, 101, 93, 88, 85, 82, 80, 79,
+ 77, 76, 75, 74, 73, 73, 72, 71,
+ 71, 70, 70, 70, 69, 69, 69, 68,
+ 68, 68, 68, 68, 67, 67, 67, 67,
+ };
+ int size_scale = (size == 32 ? 1 :
+ size == 16 ? 2 :
+ size == 8 ? 4 : 8);
+ int i, j;
+ switch (mode) {
+ case V_PRED:
+ for (i = 0; i < size; ++i) {
+ for (j = 0; j < size; ++j) {
+ int k = i * interstride + j;
+ int scale = weights1d[i * size_scale];
+ interpred[k] =
+ ((scale_max - scale) * interpred[k] +
+ scale * intrapred[i * intrastride + j] + scale_round)
+ >> scale_bits;
+ }
+ }
+ break;
+
+ case H_PRED:
+ for (i = 0; i < size; ++i) {
+ for (j = 0; j < size; ++j) {
+ int k = i * interstride + j;
+ int scale = weights1d[j * size_scale];
+ interpred[k] =
+ ((scale_max - scale) * interpred[k] +
+ scale * intrapred[i * intrastride + j] + scale_round)
+ >> scale_bits;
+ }
+ }
+ break;
+
+ case D63_PRED:
+ case D117_PRED:
+ for (i = 0; i < size; ++i) {
+ for (j = 0; j < size; ++j) {
+ int k = i * interstride + j;
+ int scale = (weights2d[i * size_scale * 32 + j * size_scale] +
+ weights1d[i * size_scale]) >> 1;
+ interpred[k] =
+ ((scale_max - scale) * interpred[k] +
+ scale * intrapred[i * intrastride + j] + scale_round)
+ >> scale_bits;
+ }
+ }
+ break;
+
+ case D27_PRED:
+ case D153_PRED:
+ for (i = 0; i < size; ++i) {
+ for (j = 0; j < size; ++j) {
+ int k = i * interstride + j;
+ int scale = (weights2d[i * size_scale * 32 + j * size_scale] +
+ weights1d[j * size_scale]) >> 1;
+ interpred[k] =
+ ((scale_max - scale) * interpred[k] +
+ scale * intrapred[i * intrastride + j] + scale_round)
+ >> scale_bits;
+ }
+ }
+ break;
+
+ case D135_PRED:
+ for (i = 0; i < size; ++i) {
+ for (j = 0; j < size; ++j) {
+ int k = i * interstride + j;
+ int scale = weights2d[i * size_scale * 32 + j * size_scale];
+ interpred[k] =
+ ((scale_max - scale) * interpred[k] +
+ scale * intrapred[i * intrastride + j] + scale_round)
+ >> scale_bits;
+ }
+ }
+ break;
+
+ case D45_PRED:
+ case DC_PRED:
+ case TM_PRED:
+ default:
+ // simple average
+ for (i = 0; i < size; ++i) {
+ for (j = 0; j < size; ++j) {
+ int k = i * interstride + j;
+ interpred[k] = (interpred[k] + intrapred[i * intrastride + j]) >> 1;
+ }
+ }
+ break;
+ }
+}
+
+void vp9_build_interintra_16x16_predictors_mb(MACROBLOCKD *xd,
+ unsigned char *ypred,
+ unsigned char *upred,
+ unsigned char *vpred,
+ int ystride, int uvstride) {
+ vp9_build_interintra_16x16_predictors_mby(xd, ypred, ystride);
+ vp9_build_interintra_16x16_predictors_mbuv(xd, upred, vpred, uvstride);
+}
+
+void vp9_build_interintra_16x16_predictors_mby(MACROBLOCKD *xd,
+ unsigned char *ypred,
+ int ystride) {
+ unsigned char intrapredictor[256];
+ vp9_build_intra_predictors_internal(
+ xd->dst.y_buffer, xd->dst.y_stride,
+ intrapredictor, 16,
+ xd->mode_info_context->mbmi.interintra_mode, 16,
+ xd->up_available, xd->left_available);
+ combine_interintra(xd->mode_info_context->mbmi.interintra_mode,
+ ypred, ystride, intrapredictor, 16, 16);
+}
+
+void vp9_build_interintra_16x16_predictors_mbuv(MACROBLOCKD *xd,
+ unsigned char *upred,
+ unsigned char *vpred,
+ int uvstride) {
+ unsigned char uintrapredictor[64];
+ unsigned char vintrapredictor[64];
+ vp9_build_intra_predictors_internal(
+ xd->dst.u_buffer, xd->dst.uv_stride,
+ uintrapredictor, 8,
+ xd->mode_info_context->mbmi.interintra_uv_mode, 8,
+ xd->up_available, xd->left_available);
+ vp9_build_intra_predictors_internal(
+ xd->dst.v_buffer, xd->dst.uv_stride,
+ vintrapredictor, 8,
+ xd->mode_info_context->mbmi.interintra_uv_mode, 8,
+ xd->up_available, xd->left_available);
+ combine_interintra(xd->mode_info_context->mbmi.interintra_uv_mode,
+ upred, uvstride, uintrapredictor, 8, 8);
+ combine_interintra(xd->mode_info_context->mbmi.interintra_uv_mode,
+ vpred, uvstride, vintrapredictor, 8, 8);
+}
+
+#if CONFIG_SUPERBLOCKS
+void vp9_build_interintra_32x32_predictors_sby(MACROBLOCKD *xd,
+ unsigned char *ypred,
+ int ystride) {
+ unsigned char intrapredictor[1024];
+ vp9_build_intra_predictors_internal(
+ xd->dst.y_buffer, xd->dst.y_stride,
+ intrapredictor, 32,
+ xd->mode_info_context->mbmi.interintra_mode, 32,
+ xd->up_available, xd->left_available);
+ combine_interintra(xd->mode_info_context->mbmi.interintra_mode,
+ ypred, ystride, intrapredictor, 32, 32);
+}
+
+void vp9_build_interintra_32x32_predictors_sbuv(MACROBLOCKD *xd,
+ unsigned char *upred,
+ unsigned char *vpred,
+ int uvstride) {
+ unsigned char uintrapredictor[256];
+ unsigned char vintrapredictor[256];
+ vp9_build_intra_predictors_internal(
+ xd->dst.u_buffer, xd->dst.uv_stride,
+ uintrapredictor, 16,
+ xd->mode_info_context->mbmi.interintra_uv_mode, 16,
+ xd->up_available, xd->left_available);
+ vp9_build_intra_predictors_internal(
+ xd->dst.v_buffer, xd->dst.uv_stride,
+ vintrapredictor, 16,
+ xd->mode_info_context->mbmi.interintra_uv_mode, 16,
+ xd->up_available, xd->left_available);
+ combine_interintra(xd->mode_info_context->mbmi.interintra_uv_mode,
+ upred, uvstride, uintrapredictor, 16, 16);
+ combine_interintra(xd->mode_info_context->mbmi.interintra_uv_mode,
+ vpred, uvstride, vintrapredictor, 16, 16);
+}
+
+void vp9_build_interintra_32x32_predictors_sb(MACROBLOCKD *xd,
+ unsigned char *ypred,
+ unsigned char *upred,
+ unsigned char *vpred,
+ int ystride,
+ int uvstride) {
+ vp9_build_interintra_32x32_predictors_sby(xd, ypred, ystride);
+ vp9_build_interintra_32x32_predictors_sbuv(xd, upred, vpred, uvstride);
+}
+#endif
+#endif
+
+void vp9_build_intra_predictors_mby(MACROBLOCKD *xd) {
+ vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride,
+ xd->predictor, 16,
+ xd->mode_info_context->mbmi.mode, 16,
+ xd->up_available, xd->left_available);
+}
+
+void vp9_build_intra_predictors_mby_s(MACROBLOCKD *xd) {
+ vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride,
+ xd->dst.y_buffer, xd->dst.y_stride,
+ xd->mode_info_context->mbmi.mode, 16,
+ xd->up_available, xd->left_available);
+}
+
+#if CONFIG_SUPERBLOCKS
+void vp9_build_intra_predictors_sby_s(MACROBLOCKD *xd) {
+ vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride,
+ xd->dst.y_buffer, xd->dst.y_stride,
+ xd->mode_info_context->mbmi.mode, 32,
+ xd->up_available, xd->left_available);
+}
+#endif
+
+#if CONFIG_COMP_INTRA_PRED
+void vp9_build_comp_intra_predictors_mby(MACROBLOCKD *xd) {
+ unsigned char predictor[2][256];
+ int i;
+
+ vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride,
+ predictor[0], 16,
+ xd->mode_info_context->mbmi.mode,
+ 16, xd->up_available,
+ xd->left_available);
+ vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride,
+ predictor[1], 16,
+ xd->mode_info_context->mbmi.second_mode,
+ 16, xd->up_available,
+ xd->left_available);
+
+ for (i = 0; i < 256; i++) {
+ xd->predictor[i] = (predictor[0][i] + predictor[1][i] + 1) >> 1;
+ }
+}
+#endif
+
+void vp9_build_intra_predictors_mbuv_internal(MACROBLOCKD *xd,
+ unsigned char *upred_ptr,
+ unsigned char *vpred_ptr,
+ int uv_stride,
+ int mode, int bsize) {
+ vp9_build_intra_predictors_internal(xd->dst.u_buffer, xd->dst.uv_stride,
+ upred_ptr, uv_stride, mode, bsize,
+ xd->up_available, xd->left_available);
+ vp9_build_intra_predictors_internal(xd->dst.v_buffer, xd->dst.uv_stride,
+ vpred_ptr, uv_stride, mode, bsize,
+ xd->up_available, xd->left_available);
+}
+
+void vp9_build_intra_predictors_mbuv(MACROBLOCKD *xd) {
+ vp9_build_intra_predictors_mbuv_internal(xd, &xd->predictor[256],
+ &xd->predictor[320], 8,
+ xd->mode_info_context->mbmi.uv_mode,
+ 8);
+}
+
+void vp9_build_intra_predictors_mbuv_s(MACROBLOCKD *xd) {
+ vp9_build_intra_predictors_mbuv_internal(xd, xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.uv_stride,
+ xd->mode_info_context->mbmi.uv_mode,
+ 8);
+}
+
+#if CONFIG_SUPERBLOCKS
+void vp9_build_intra_predictors_sbuv_s(MACROBLOCKD *xd) {
+ vp9_build_intra_predictors_mbuv_internal(xd, xd->dst.u_buffer,
+ xd->dst.v_buffer, xd->dst.uv_stride,
+ xd->mode_info_context->mbmi.uv_mode,
+ 16);
+}
+#endif
+
+#if CONFIG_COMP_INTRA_PRED
+void vp9_build_comp_intra_predictors_mbuv(MACROBLOCKD *xd) {
+ unsigned char predictor[2][2][64];
+ int i;
+
+ vp9_build_intra_predictors_mbuv_internal(
+ xd, predictor[0][0], predictor[1][0], 8,
+ xd->mode_info_context->mbmi.uv_mode, 8);
+ vp9_build_intra_predictors_mbuv_internal(
+ xd, predictor[0][1], predictor[1][1], 8,
+ xd->mode_info_context->mbmi.second_uv_mode, 8);
+ for (i = 0; i < 64; i++) {
+ xd->predictor[256 + i] = (predictor[0][0][i] + predictor[0][1][i] + 1) >> 1;
+ xd->predictor[256 + 64 + i] = (predictor[1][0][i] +
+ predictor[1][1][i] + 1) >> 1;
+ }
+}
+#endif
+
+void vp9_intra8x8_predict(BLOCKD *xd,
+ int mode,
+ unsigned char *predictor) {
+ vp9_build_intra_predictors_internal(*(xd->base_dst) + xd->dst,
+ xd->dst_stride, predictor, 16,
+ mode, 8, 1, 1);
+}
+
+#if CONFIG_COMP_INTRA_PRED
+void vp9_comp_intra8x8_predict(BLOCKD *xd,
+ int mode, int second_mode,
+ unsigned char *out_predictor) {
+ unsigned char predictor[2][8 * 16];
+ int i, j;
+
+ vp9_intra8x8_predict(xd, mode, predictor[0]);
+ vp9_intra8x8_predict(xd, second_mode, predictor[1]);
+
+ for (i = 0; i < 8 * 16; i += 16) {
+ for (j = i; j < i + 8; j++) {
+ out_predictor[j] = (predictor[0][j] + predictor[1][j] + 1) >> 1;
+ }
+ }
+}
+#endif
+
+void vp9_intra_uv4x4_predict(BLOCKD *xd,
+ int mode,
+ unsigned char *predictor) {
+ vp9_build_intra_predictors_internal(*(xd->base_dst) + xd->dst,
+ xd->dst_stride, predictor, 8,
+ mode, 4, 1, 1);
+}
+
+#if CONFIG_COMP_INTRA_PRED
+void vp9_comp_intra_uv4x4_predict(BLOCKD *xd,
+ int mode, int mode2,
+ unsigned char *out_predictor) {
+ unsigned char predictor[2][8 * 4];
+ int i, j;
+
+ vp9_intra_uv4x4_predict(xd, mode, predictor[0]);
+ vp9_intra_uv4x4_predict(xd, mode2, predictor[1]);
+
+ for (i = 0; i < 4 * 8; i += 8) {
+ for (j = i; j < i + 4; j++) {
+ out_predictor[j] = (predictor[0][j] + predictor[1][j] + 1) >> 1;
+ }
+ }
+}
+#endif
+
+/* TODO: try different ways of use Y-UV mode correlation
+ Current code assumes that a uv 4x4 block use same mode
+ as corresponding Y 8x8 area
+ */
diff --git a/vp9/common/vp9_reconintra.h b/vp9/common/vp9_reconintra.h
new file mode 100644
index 0000000..92882d3
--- /dev/null
+++ b/vp9/common/vp9_reconintra.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_COMMON_VP9_RECONINTRA_H_
+#define VP9_COMMON_VP9_RECONINTRA_H_
+
+#include "vp9/common/vp9_blockd.h"
+
+extern void vp9_recon_intra_mbuv(MACROBLOCKD *xd);
+extern B_PREDICTION_MODE vp9_find_dominant_direction(unsigned char *ptr,
+ int stride, int n);
+extern B_PREDICTION_MODE vp9_find_bpred_context(BLOCKD *x);
+#if CONFIG_COMP_INTERINTRA_PRED
+extern void vp9_build_interintra_16x16_predictors_mb(MACROBLOCKD *xd,
+ unsigned char *ypred,
+ unsigned char *upred,
+ unsigned char *vpred,
+ int ystride,
+ int uvstride);
+extern void vp9_build_interintra_16x16_predictors_mby(MACROBLOCKD *xd,
+ unsigned char *ypred,
+ int ystride);
+extern void vp9_build_interintra_16x16_predictors_mbuv(MACROBLOCKD *xd,
+ unsigned char *upred,
+ unsigned char *vpred,
+ int uvstride);
+#if CONFIG_SUPERBLOCKS
+extern void vp9_build_interintra_32x32_predictors_sb(MACROBLOCKD *xd,
+ unsigned char *ypred,
+ unsigned char *upred,
+ unsigned char *vpred,
+ int ystride,
+ int uvstride);
+#endif
+#endif
+
+#endif // __INC_RECONINTRA_H
diff --git a/vp9/common/vp9_reconintra4x4.c b/vp9/common/vp9_reconintra4x4.c
new file mode 100644
index 0000000..a730fd0
--- /dev/null
+++ b/vp9/common/vp9_reconintra4x4.c
@@ -0,0 +1,472 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "./vpx_config.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vp9/common/vp9_reconintra.h"
+#include "vp9_rtcd.h"
+
+#if CONFIG_NEWBINTRAMODES
+static int find_grad_measure(unsigned char *x, int stride, int n, int t,
+ int dx, int dy) {
+ int i, j;
+ int count = 0, gsum = 0, gdiv;
+ /* TODO: Make this code more efficient by breaking up into two loops */
+ for (i = -t; i < n; ++i)
+ for (j = -t; j < n; ++j) {
+ int g;
+ if (i >= 0 && j >= 0) continue;
+ if (i + dy >= 0 && j + dx >= 0) continue;
+ if (i + dy < -t || i + dy >= n || j + dx < -t || j + dx >= n) continue;
+ g = abs(x[(i + dy) * stride + j + dx] - x[i * stride + j]);
+ gsum += g * g;
+ count++;
+ }
+ gdiv = (dx * dx + dy * dy) * count;
+ return ((gsum << 8) + (gdiv >> 1)) / gdiv;
+}
+
+#if CONTEXT_PRED_REPLACEMENTS == 6
+B_PREDICTION_MODE vp9_find_dominant_direction(
+ unsigned char *ptr, int stride, int n) {
+ int g[8], i, imin, imax;
+ g[1] = find_grad_measure(ptr, stride, n, 4, 2, 1);
+ g[2] = find_grad_measure(ptr, stride, n, 4, 1, 1);
+ g[3] = find_grad_measure(ptr, stride, n, 4, 1, 2);
+ g[5] = find_grad_measure(ptr, stride, n, 4, -1, 2);
+ g[6] = find_grad_measure(ptr, stride, n, 4, -1, 1);
+ g[7] = find_grad_measure(ptr, stride, n, 4, -2, 1);
+ imin = 1;
+ for (i = 2; i < 8; i += 1 + (i == 3))
+ imin = (g[i] < g[imin] ? i : imin);
+ imax = 1;
+ for (i = 2; i < 8; i += 1 + (i == 3))
+ imax = (g[i] > g[imax] ? i : imax);
+ /*
+ printf("%d %d %d %d %d %d = %d %d\n",
+ g[1], g[2], g[3], g[5], g[6], g[7], imin, imax);
+ */
+ switch (imin) {
+ case 1:
+ return B_HD_PRED;
+ case 2:
+ return B_RD_PRED;
+ case 3:
+ return B_VR_PRED;
+ case 5:
+ return B_VL_PRED;
+ case 6:
+ return B_LD_PRED;
+ case 7:
+ return B_HU_PRED;
+ default:
+ assert(0);
+ }
+}
+#elif CONTEXT_PRED_REPLACEMENTS == 4
+B_PREDICTION_MODE vp9_find_dominant_direction(
+ unsigned char *ptr, int stride, int n) {
+ int g[8], i, imin, imax;
+ g[1] = find_grad_measure(ptr, stride, n, 4, 2, 1);
+ g[3] = find_grad_measure(ptr, stride, n, 4, 1, 2);
+ g[5] = find_grad_measure(ptr, stride, n, 4, -1, 2);
+ g[7] = find_grad_measure(ptr, stride, n, 4, -2, 1);
+ imin = 1;
+ for (i = 3; i < 8; i+=2)
+ imin = (g[i] < g[imin] ? i : imin);
+ imax = 1;
+ for (i = 3; i < 8; i+=2)
+ imax = (g[i] > g[imax] ? i : imax);
+ /*
+ printf("%d %d %d %d = %d %d\n",
+ g[1], g[3], g[5], g[7], imin, imax);
+ */
+ switch (imin) {
+ case 1:
+ return B_HD_PRED;
+ case 3:
+ return B_VR_PRED;
+ case 5:
+ return B_VL_PRED;
+ case 7:
+ return B_HU_PRED;
+ default:
+ assert(0);
+ }
+}
+#elif CONTEXT_PRED_REPLACEMENTS == 0
+B_PREDICTION_MODE vp9_find_dominant_direction(
+ unsigned char *ptr, int stride, int n) {
+ int g[8], i, imin, imax;
+ g[0] = find_grad_measure(ptr, stride, n, 4, 1, 0);
+ g[1] = find_grad_measure(ptr, stride, n, 4, 2, 1);
+ g[2] = find_grad_measure(ptr, stride, n, 4, 1, 1);
+ g[3] = find_grad_measure(ptr, stride, n, 4, 1, 2);
+ g[4] = find_grad_measure(ptr, stride, n, 4, 0, 1);
+ g[5] = find_grad_measure(ptr, stride, n, 4, -1, 2);
+ g[6] = find_grad_measure(ptr, stride, n, 4, -1, 1);
+ g[7] = find_grad_measure(ptr, stride, n, 4, -2, 1);
+ imax = 0;
+ for (i = 1; i < 8; i++)
+ imax = (g[i] > g[imax] ? i : imax);
+ imin = 0;
+ for (i = 1; i < 8; i++)
+ imin = (g[i] < g[imin] ? i : imin);
+
+ switch (imin) {
+ case 0:
+ return B_HE_PRED;
+ case 1:
+ return B_HD_PRED;
+ case 2:
+ return B_RD_PRED;
+ case 3:
+ return B_VR_PRED;
+ case 4:
+ return B_VE_PRED;
+ case 5:
+ return B_VL_PRED;
+ case 6:
+ return B_LD_PRED;
+ case 7:
+ return B_HU_PRED;
+ default:
+ assert(0);
+ }
+}
+#endif
+
+B_PREDICTION_MODE vp9_find_bpred_context(BLOCKD *x) {
+ unsigned char *ptr = *(x->base_dst) + x->dst;
+ int stride = x->dst_stride;
+ return vp9_find_dominant_direction(ptr, stride, 4);
+}
+#endif
+
+void vp9_intra4x4_predict(BLOCKD *x,
+ int b_mode,
+ unsigned char *predictor) {
+ int i, r, c;
+
+ unsigned char *Above = *(x->base_dst) + x->dst - x->dst_stride;
+ unsigned char Left[4];
+ unsigned char top_left = Above[-1];
+
+ Left[0] = (*(x->base_dst))[x->dst - 1];
+ Left[1] = (*(x->base_dst))[x->dst - 1 + x->dst_stride];
+ Left[2] = (*(x->base_dst))[x->dst - 1 + 2 * x->dst_stride];
+ Left[3] = (*(x->base_dst))[x->dst - 1 + 3 * x->dst_stride];
+
+#if CONFIG_NEWBINTRAMODES
+ if (b_mode == B_CONTEXT_PRED)
+ b_mode = x->bmi.as_mode.context;
+#endif
+
+ switch (b_mode) {
+ case B_DC_PRED: {
+ int expected_dc = 0;
+
+ for (i = 0; i < 4; i++) {
+ expected_dc += Above[i];
+ expected_dc += Left[i];
+ }
+
+ expected_dc = (expected_dc + 4) >> 3;
+
+ for (r = 0; r < 4; r++) {
+ for (c = 0; c < 4; c++) {
+ predictor[c] = expected_dc;
+ }
+
+ predictor += 16;
+ }
+ }
+ break;
+ case B_TM_PRED: {
+ /* prediction similar to true_motion prediction */
+ for (r = 0; r < 4; r++) {
+ for (c = 0; c < 4; c++) {
+ int pred = Above[c] - top_left + Left[r];
+
+ if (pred < 0)
+ pred = 0;
+
+ if (pred > 255)
+ pred = 255;
+
+ predictor[c] = pred;
+ }
+
+ predictor += 16;
+ }
+ }
+ break;
+
+ case B_VE_PRED: {
+
+ unsigned int ap[4];
+ ap[0] = Above[0];
+ ap[1] = Above[1];
+ ap[2] = Above[2];
+ ap[3] = Above[3];
+
+ for (r = 0; r < 4; r++) {
+ for (c = 0; c < 4; c++) {
+
+ predictor[c] = ap[c];
+ }
+
+ predictor += 16;
+ }
+
+ }
+ break;
+
+
+ case B_HE_PRED: {
+
+ unsigned int lp[4];
+ lp[0] = Left[0];
+ lp[1] = Left[1];
+ lp[2] = Left[2];
+ lp[3] = Left[3];
+
+ for (r = 0; r < 4; r++) {
+ for (c = 0; c < 4; c++) {
+ predictor[c] = lp[r];
+ }
+
+ predictor += 16;
+ }
+ }
+ break;
+ case B_LD_PRED: {
+ unsigned char *ptr = Above;
+ predictor[0 * 16 + 0] = (ptr[0] + ptr[1] * 2 + ptr[2] + 2) >> 2;
+ predictor[0 * 16 + 1] =
+ predictor[1 * 16 + 0] = (ptr[1] + ptr[2] * 2 + ptr[3] + 2) >> 2;
+ predictor[0 * 16 + 2] =
+ predictor[1 * 16 + 1] =
+ predictor[2 * 16 + 0] = (ptr[2] + ptr[3] * 2 + ptr[4] + 2) >> 2;
+ predictor[0 * 16 + 3] =
+ predictor[1 * 16 + 2] =
+ predictor[2 * 16 + 1] =
+ predictor[3 * 16 + 0] = (ptr[3] + ptr[4] * 2 + ptr[5] + 2) >> 2;
+ predictor[1 * 16 + 3] =
+ predictor[2 * 16 + 2] =
+ predictor[3 * 16 + 1] = (ptr[4] + ptr[5] * 2 + ptr[6] + 2) >> 2;
+ predictor[2 * 16 + 3] =
+ predictor[3 * 16 + 2] = (ptr[5] + ptr[6] * 2 + ptr[7] + 2) >> 2;
+ predictor[3 * 16 + 3] = (ptr[6] + ptr[7] * 2 + ptr[7] + 2) >> 2;
+
+ }
+ break;
+ case B_RD_PRED: {
+
+ unsigned char pp[9];
+
+ pp[0] = Left[3];
+ pp[1] = Left[2];
+ pp[2] = Left[1];
+ pp[3] = Left[0];
+ pp[4] = top_left;
+ pp[5] = Above[0];
+ pp[6] = Above[1];
+ pp[7] = Above[2];
+ pp[8] = Above[3];
+
+ predictor[3 * 16 + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
+ predictor[3 * 16 + 1] =
+ predictor[2 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
+ predictor[3 * 16 + 2] =
+ predictor[2 * 16 + 1] =
+ predictor[1 * 16 + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
+ predictor[3 * 16 + 3] =
+ predictor[2 * 16 + 2] =
+ predictor[1 * 16 + 1] =
+ predictor[0 * 16 + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
+ predictor[2 * 16 + 3] =
+ predictor[1 * 16 + 2] =
+ predictor[0 * 16 + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
+ predictor[1 * 16 + 3] =
+ predictor[0 * 16 + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
+ predictor[0 * 16 + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2;
+
+ }
+ break;
+ case B_VR_PRED: {
+
+ unsigned char pp[9];
+
+ pp[0] = Left[3];
+ pp[1] = Left[2];
+ pp[2] = Left[1];
+ pp[3] = Left[0];
+ pp[4] = top_left;
+ pp[5] = Above[0];
+ pp[6] = Above[1];
+ pp[7] = Above[2];
+ pp[8] = Above[3];
+
+
+ predictor[3 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
+ predictor[2 * 16 + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
+ predictor[3 * 16 + 1] =
+ predictor[1 * 16 + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
+ predictor[2 * 16 + 1] =
+ predictor[0 * 16 + 0] = (pp[4] + pp[5] + 1) >> 1;
+ predictor[3 * 16 + 2] =
+ predictor[1 * 16 + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
+ predictor[2 * 16 + 2] =
+ predictor[0 * 16 + 1] = (pp[5] + pp[6] + 1) >> 1;
+ predictor[3 * 16 + 3] =
+ predictor[1 * 16 + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
+ predictor[2 * 16 + 3] =
+ predictor[0 * 16 + 2] = (pp[6] + pp[7] + 1) >> 1;
+ predictor[1 * 16 + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2;
+ predictor[0 * 16 + 3] = (pp[7] + pp[8] + 1) >> 1;
+
+ }
+ break;
+ case B_VL_PRED: {
+
+ unsigned char *pp = Above;
+
+ predictor[0 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1;
+ predictor[1 * 16 + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
+ predictor[2 * 16 + 0] =
+ predictor[0 * 16 + 1] = (pp[1] + pp[2] + 1) >> 1;
+ predictor[1 * 16 + 1] =
+ predictor[3 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
+ predictor[2 * 16 + 1] =
+ predictor[0 * 16 + 2] = (pp[2] + pp[3] + 1) >> 1;
+ predictor[3 * 16 + 1] =
+ predictor[1 * 16 + 2] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
+ predictor[0 * 16 + 3] =
+ predictor[2 * 16 + 2] = (pp[3] + pp[4] + 1) >> 1;
+ predictor[1 * 16 + 3] =
+ predictor[3 * 16 + 2] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
+ predictor[2 * 16 + 3] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
+ predictor[3 * 16 + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
+ }
+ break;
+
+ case B_HD_PRED: {
+ unsigned char pp[9];
+ pp[0] = Left[3];
+ pp[1] = Left[2];
+ pp[2] = Left[1];
+ pp[3] = Left[0];
+ pp[4] = top_left;
+ pp[5] = Above[0];
+ pp[6] = Above[1];
+ pp[7] = Above[2];
+ pp[8] = Above[3];
+
+
+ predictor[3 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1;
+ predictor[3 * 16 + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
+ predictor[2 * 16 + 0] =
+ predictor[3 * 16 + 2] = (pp[1] + pp[2] + 1) >> 1;
+ predictor[2 * 16 + 1] =
+ predictor[3 * 16 + 3] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
+ predictor[2 * 16 + 2] =
+ predictor[1 * 16 + 0] = (pp[2] + pp[3] + 1) >> 1;
+ predictor[2 * 16 + 3] =
+ predictor[1 * 16 + 1] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
+ predictor[1 * 16 + 2] =
+ predictor[0 * 16 + 0] = (pp[3] + pp[4] + 1) >> 1;
+ predictor[1 * 16 + 3] =
+ predictor[0 * 16 + 1] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
+ predictor[0 * 16 + 2] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
+ predictor[0 * 16 + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
+ }
+ break;
+
+
+ case B_HU_PRED: {
+ unsigned char *pp = Left;
+ predictor[0 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1;
+ predictor[0 * 16 + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
+ predictor[0 * 16 + 2] =
+ predictor[1 * 16 + 0] = (pp[1] + pp[2] + 1) >> 1;
+ predictor[0 * 16 + 3] =
+ predictor[1 * 16 + 1] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
+ predictor[1 * 16 + 2] =
+ predictor[2 * 16 + 0] = (pp[2] + pp[3] + 1) >> 1;
+ predictor[1 * 16 + 3] =
+ predictor[2 * 16 + 1] = (pp[2] + pp[3] * 2 + pp[3] + 2) >> 2;
+ predictor[2 * 16 + 2] =
+ predictor[2 * 16 + 3] =
+ predictor[3 * 16 + 0] =
+ predictor[3 * 16 + 1] =
+ predictor[3 * 16 + 2] =
+ predictor[3 * 16 + 3] = pp[3];
+ }
+ break;
+
+#if CONFIG_NEWBINTRAMODES
+ case B_CONTEXT_PRED:
+ break;
+ /*
+ case B_CORNER_PRED:
+ corner_predictor(predictor, 16, 4, Above, Left);
+ break;
+ */
+#endif
+ }
+}
+
+#if CONFIG_COMP_INTRA_PRED
+void vp9_comp_intra4x4_predict_c(BLOCKD *x,
+ int b_mode, int b_mode2,
+ unsigned char *out_predictor) {
+ unsigned char predictor[2][4 * 16];
+ int i, j;
+
+ vp9_intra4x4_predict(x, b_mode, predictor[0]);
+ vp9_intra4x4_predict(x, b_mode2, predictor[1]);
+
+ for (i = 0; i < 16 * 4; i += 16) {
+ for (j = i; j < i + 4; j++) {
+ out_predictor[j] = (predictor[0][j] + predictor[1][j] + 1) >> 1;
+ }
+ }
+}
+#endif
+
+/* copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and
+ * to the right prediction have filled in pixels to use.
+ */
+void vp9_intra_prediction_down_copy(MACROBLOCKD *xd) {
+ int extend_edge = (xd->mb_to_right_edge == 0 && xd->mb_index < 2);
+ unsigned char *above_right = *(xd->block[0].base_dst) + xd->block[0].dst -
+ xd->block[0].dst_stride + 16;
+ unsigned int *src_ptr = (unsigned int *)
+ (above_right - (xd->mb_index == 3 ? 16 * xd->block[0].dst_stride : 0));
+
+ unsigned int *dst_ptr0 = (unsigned int *)above_right;
+ unsigned int *dst_ptr1 =
+ (unsigned int *)(above_right + 4 * xd->block[0].dst_stride);
+ unsigned int *dst_ptr2 =
+ (unsigned int *)(above_right + 8 * xd->block[0].dst_stride);
+ unsigned int *dst_ptr3 =
+ (unsigned int *)(above_right + 12 * xd->block[0].dst_stride);
+
+ if (extend_edge) {
+ *src_ptr = ((uint8_t *) src_ptr)[-1] * 0x01010101U;
+ }
+
+ *dst_ptr0 = *src_ptr;
+ *dst_ptr1 = *src_ptr;
+ *dst_ptr2 = *src_ptr;
+ *dst_ptr3 = *src_ptr;
+}
diff --git a/vp9/common/vp9_reconintra4x4.h b/vp9/common/vp9_reconintra4x4.h
new file mode 100644
index 0000000..8e806bc
--- /dev/null
+++ b/vp9/common/vp9_reconintra4x4.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_COMMON_VP9_RECONINTRA4X4_H_
+#define VP9_COMMON_VP9_RECONINTRA4X4_H_
+
+extern void vp9_intra_prediction_down_copy(MACROBLOCKD *xd);
+
+#endif
diff --git a/vp9/common/vp9_rtcd.c b/vp9/common/vp9_rtcd.c
new file mode 100644
index 0000000..277d5b2
--- /dev/null
+++ b/vp9/common/vp9_rtcd.c
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "vpx_config.h"
+#define RTCD_C
+#include "vp9_rtcd.h"
+#include "vpx_ports/vpx_once.h"
+
+extern void vpx_scale_rtcd(void);
+
+void vp9_rtcd()
+{
+ vpx_scale_rtcd();
+ once(setup_rtcd_internal);
+}
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
new file mode 100644
index 0000000..6af7b3b
--- /dev/null
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -0,0 +1,681 @@
+vp9_common_forward_decls() {
+cat <<EOF
+/*
+ * VP9
+ */
+
+struct loop_filter_info;
+struct blockd;
+struct macroblockd;
+struct loop_filter_info;
+
+/* Encoder forward decls */
+struct block;
+struct macroblock;
+struct variance_vtable;
+
+#define DEC_MVCOSTS int *mvjcost, int *mvcost[2]
+union int_mv;
+struct yv12_buffer_config;
+EOF
+}
+forward_decls vp9_common_forward_decls
+
+prototype void vp9_filter_block2d_4x4_8 "const unsigned char *src_ptr, const unsigned int src_stride, const short *HFilter_aligned16, const short *VFilter_aligned16, unsigned char *dst_ptr, unsigned int dst_stride"
+prototype void vp9_filter_block2d_8x4_8 "const unsigned char *src_ptr, const unsigned int src_stride, const short *HFilter_aligned16, const short *VFilter_aligned16, unsigned char *dst_ptr, unsigned int dst_stride"
+prototype void vp9_filter_block2d_8x8_8 "const unsigned char *src_ptr, const unsigned int src_stride, const short *HFilter_aligned16, const short *VFilter_aligned16, unsigned char *dst_ptr, unsigned int dst_stride"
+prototype void vp9_filter_block2d_16x16_8 "const unsigned char *src_ptr, const unsigned int src_stride, const short *HFilter_aligned16, const short *VFilter_aligned16, unsigned char *dst_ptr, unsigned int dst_stride"
+
+# At the very least, MSVC 2008 has compiler bug exhibited by this code; code
+# compiles warning free but a dissassembly of generated code show bugs. To be
+# on the safe side, only enabled when compiled with 'gcc'.
+if [ "$CONFIG_GCC" = "yes" ]; then
+ specialize vp9_filter_block2d_4x4_8 sse4_1 sse2
+fi
+ specialize vp9_filter_block2d_8x4_8 ssse3 #sse4_1 sse2
+ specialize vp9_filter_block2d_8x8_8 ssse3 #sse4_1 sse2
+ specialize vp9_filter_block2d_16x16_8 ssse3 #sse4_1 sse2
+
+#
+# Dequant
+#
+prototype void vp9_dequantize_b "struct blockd *x"
+specialize vp9_dequantize_b
+
+prototype void vp9_dequantize_b_2x2 "struct blockd *x"
+specialize vp9_dequantize_b_2x2
+
+prototype void vp9_dequant_dc_idct_add_y_block_8x8 "short *q, const short *dq, unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs, const short *dc, struct macroblockd *xd"
+specialize vp9_dequant_dc_idct_add_y_block_8x8
+
+prototype void vp9_dequant_idct_add_y_block_8x8 "short *q, const short *dq, unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs, struct macroblockd *xd"
+specialize vp9_dequant_idct_add_y_block_8x8
+
+prototype void vp9_dequant_idct_add_uv_block_8x8 "short *q, const short *dq, unsigned char *pre, unsigned char *dstu, unsigned char *dstv, int stride, unsigned short *eobs, struct macroblockd *xd"
+specialize vp9_dequant_idct_add_uv_block_8x8
+
+prototype void vp9_dequant_idct_add_16x16 "short *input, const short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, unsigned short eobs"
+specialize vp9_dequant_idct_add_16x16
+
+prototype void vp9_dequant_idct_add_8x8 "short *input, const short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int dc, unsigned short eobs"
+specialize vp9_dequant_idct_add_8x8
+
+prototype void vp9_dequant_idct_add "short *input, const short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride"
+specialize vp9_dequant_idct_add
+
+prototype void vp9_dequant_dc_idct_add "short *input, const short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int Dc"
+specialize vp9_dequant_dc_idct_add
+
+prototype void vp9_dequant_dc_idct_add_y_block "short *q, const short *dq, unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs, const short *dc"
+specialize vp9_dequant_dc_idct_add_y_block
+
+prototype void vp9_dequant_idct_add_y_block "short *q, const short *dq, unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs"
+specialize vp9_dequant_idct_add_y_block
+
+prototype void vp9_dequant_idct_add_uv_block "short *q, const short *dq, unsigned char *pre, unsigned char *dstu, unsigned char *dstv, int stride, unsigned short *eobs"
+specialize vp9_dequant_idct_add_uv_block
+
+#
+# RECON
+#
+prototype void vp9_copy_mem16x16 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
+specialize vp9_copy_mem16x16 mmx sse2 dspr2
+vp9_copy_mem16x16_dspr2=vp9_copy_mem16x16_dspr2
+
+prototype void vp9_copy_mem8x8 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
+specialize vp9_copy_mem8x8 mmx dspr2
+vp9_copy_mem8x8_dspr2=vp9_copy_mem8x8_dspr2
+
+prototype void vp9_copy_mem8x4 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
+specialize vp9_copy_mem8x4 mmx
+
+prototype void vp9_avg_mem16x16 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
+specialize vp9_avg_mem16x16
+
+prototype void vp9_avg_mem8x8 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
+specialize vp9_avg_mem8x8
+
+prototype void vp9_copy_mem8x4 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
+specialize vp9_copy_mem8x4 mmx dspr2
+vp9_copy_mem8x4_dspr2=vp9_copy_mem8x4_dspr2
+
+prototype void vp9_recon_b "unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int stride"
+specialize vp9_recon_b
+
+prototype void vp9_recon_uv_b "unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int stride"
+specialize vp9_recon_uv_b
+
+prototype void vp9_recon2b "unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int stride"
+specialize vp9_recon2b sse2
+
+prototype void vp9_recon4b "unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int stride"
+specialize vp9_recon4b sse2
+
+prototype void vp9_recon_mb "struct macroblockd *x"
+specialize vp9_recon_mb
+
+prototype void vp9_recon_mby "struct macroblockd *x"
+specialize vp9_recon_mby
+
+prototype void vp9_recon_mby_s "struct macroblockd *x, unsigned char *dst"
+specialize vp9_recon_mby_s
+
+prototype void vp9_recon_mbuv_s "struct macroblockd *x, unsigned char *udst, unsigned char *vdst"
+specialize void vp9_recon_mbuv_s
+
+prototype void vp9_build_intra_predictors_mby_s "struct macroblockd *x"
+specialize vp9_build_intra_predictors_mby_s
+
+prototype void vp9_build_intra_predictors_sby_s "struct macroblockd *x"
+specialize vp9_build_intra_predictors_sby_s;
+
+prototype void vp9_build_intra_predictors_sbuv_s "struct macroblockd *x"
+specialize vp9_build_intra_predictors_sbuv_s;
+
+prototype void vp9_build_intra_predictors_mby "struct macroblockd *x"
+specialize vp9_build_intra_predictors_mby;
+
+prototype void vp9_build_comp_intra_predictors_mby "struct macroblockd *x"
+specialize vp9_build_comp_intra_predictors_mby;
+
+prototype void vp9_build_intra_predictors_mby_s "struct macroblockd *x"
+specialize vp9_build_intra_predictors_mby_s;
+
+prototype void vp9_build_intra_predictors_mbuv "struct macroblockd *x"
+specialize vp9_build_intra_predictors_mbuv;
+
+prototype void vp9_build_intra_predictors_mbuv_s "struct macroblockd *x"
+specialize vp9_build_intra_predictors_mbuv_s;
+
+prototype void vp9_build_comp_intra_predictors_mbuv "struct macroblockd *x"
+specialize vp9_build_comp_intra_predictors_mbuv;
+
+prototype void vp9_intra4x4_predict "struct blockd *x, int b_mode, unsigned char *predictor"
+specialize vp9_intra4x4_predict;
+
+prototype void vp9_comp_intra4x4_predict "struct blockd *x, int b_mode, int second_mode, unsigned char *predictor"
+specialize vp9_comp_intra4x4_predict;
+
+prototype void vp9_intra8x8_predict "struct blockd *x, int b_mode, unsigned char *predictor"
+specialize vp9_intra8x8_predict;
+
+prototype void vp9_comp_intra8x8_predict "struct blockd *x, int b_mode, int second_mode, unsigned char *predictor"
+specialize vp9_comp_intra8x8_predict;
+
+prototype void vp9_intra_uv4x4_predict "struct blockd *x, int b_mode, unsigned char *predictor"
+specialize vp9_intra_uv4x4_predict;
+
+prototype void vp9_comp_intra_uv4x4_predict "struct blockd *x, int b_mode, int second_mode, unsigned char *predictor"
+specialize vp9_comp_intra_uv4x4_predict;
+
+#
+# Loopfilter
+#
+prototype void vp9_loop_filter_mbv "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
+specialize vp9_loop_filter_mbv sse2
+
+prototype void vp9_loop_filter_bv "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
+specialize vp9_loop_filter_bv sse2
+
+prototype void vp9_loop_filter_bv8x8 "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
+specialize vp9_loop_filter_bv8x8 sse2
+
+prototype void vp9_loop_filter_mbh "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
+specialize vp9_loop_filter_mbh sse2
+
+prototype void vp9_loop_filter_bh "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
+specialize vp9_loop_filter_bh sse2
+
+prototype void vp9_loop_filter_bh8x8 "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
+specialize vp9_loop_filter_bh8x8 sse2
+
+prototype void vp9_loop_filter_simple_mbv "unsigned char *y, int ystride, const unsigned char *blimit"
+specialize vp9_loop_filter_simple_mbv mmx sse2
+vp9_loop_filter_simple_mbv_c=vp9_loop_filter_simple_vertical_edge_c
+vp9_loop_filter_simple_mbv_mmx=vp9_loop_filter_simple_vertical_edge_mmx
+vp9_loop_filter_simple_mbv_sse2=vp9_loop_filter_simple_vertical_edge_sse2
+
+prototype void vp9_loop_filter_simple_mbh "unsigned char *y, int ystride, const unsigned char *blimit"
+specialize vp9_loop_filter_simple_mbh mmx sse2
+vp9_loop_filter_simple_mbh_c=vp9_loop_filter_simple_horizontal_edge_c
+vp9_loop_filter_simple_mbh_mmx=vp9_loop_filter_simple_horizontal_edge_mmx
+vp9_loop_filter_simple_mbh_sse2=vp9_loop_filter_simple_horizontal_edge_sse2
+
+prototype void vp9_loop_filter_simple_bv "unsigned char *y, int ystride, const unsigned char *blimit"
+specialize vp9_loop_filter_simple_bv mmx sse2
+vp9_loop_filter_simple_bv_c=vp9_loop_filter_bvs_c
+vp9_loop_filter_simple_bv_mmx=vp9_loop_filter_bvs_mmx
+vp9_loop_filter_simple_bv_sse2=vp9_loop_filter_bvs_sse2
+
+prototype void vp9_loop_filter_simple_bh "unsigned char *y, int ystride, const unsigned char *blimit"
+specialize vp9_loop_filter_simple_bh mmx sse2
+vp9_loop_filter_simple_bh_c=vp9_loop_filter_bhs_c
+vp9_loop_filter_simple_bh_mmx=vp9_loop_filter_bhs_mmx
+vp9_loop_filter_simple_bh_sse2=vp9_loop_filter_bhs_sse2
+
+#
+# post proc
+#
+if [ "$CONFIG_POSTPROC" = "yes" ]; then
+prototype void vp9_mbpost_proc_down "unsigned char *dst, int pitch, int rows, int cols, int flimit"
+specialize vp9_mbpost_proc_down mmx sse2
+vp9_mbpost_proc_down_sse2=vp9_mbpost_proc_down_xmm
+
+prototype void vp9_mbpost_proc_across_ip "unsigned char *src, int pitch, int rows, int cols, int flimit"
+specialize vp9_mbpost_proc_across_ip sse2
+vp9_mbpost_proc_across_ip_sse2=vp9_mbpost_proc_across_ip_xmm
+
+prototype void vp9_post_proc_down_and_across "unsigned char *src_ptr, unsigned char *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit"
+specialize vp9_post_proc_down_and_across mmx sse2
+vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm
+
+prototype void vp9_plane_add_noise "unsigned char *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch"
+specialize vp9_plane_add_noise mmx sse2
+vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt
+fi
+
+prototype void vp9_blend_mb_inner "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"
+specialize vp9_blend_mb_inner
+
+prototype void vp9_blend_mb_outer "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"
+specialize vp9_blend_mb_outer
+
+prototype void vp9_blend_b "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"
+specialize vp9_blend_b
+
+#
+# sad 16x3, 3x16
+#
+prototype unsigned int vp9_sad16x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride"
+specialize vp9_sad16x3 sse2
+
+prototype unsigned int vp9_sad3x16 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride"
+specialize vp9_sad3x16 sse2
+
+#
+# Sub Pixel Filters
+#
+prototype void vp9_eighttap_predict16x16 "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_eighttap_predict16x16
+
+prototype void vp9_eighttap_predict8x8 "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_eighttap_predict8x8
+
+prototype void vp9_eighttap_predict_avg16x16 "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_eighttap_predict_avg16x16
+
+prototype void vp9_eighttap_predict_avg8x8 "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_eighttap_predict_avg8x8
+
+prototype void vp9_eighttap_predict_avg4x4 "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_eighttap_predict_avg4x4
+
+prototype void vp9_eighttap_predict8x4 "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_eighttap_predict8x4
+
+prototype void vp9_eighttap_predict "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_eighttap_predict
+
+prototype void vp9_eighttap_predict16x16_sharp "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_eighttap_predict16x16_sharp
+
+prototype void vp9_eighttap_predict8x8_sharp "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_eighttap_predict8x8_sharp
+
+prototype void vp9_eighttap_predict_avg16x16_sharp "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_eighttap_predict_avg16x16_sharp
+
+prototype void vp9_eighttap_predict_avg8x8_sharp "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_eighttap_predict_avg8x8_sharp
+
+prototype void vp9_eighttap_predict_avg4x4_sharp "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_eighttap_predict_avg4x4_sharp
+
+prototype void vp9_eighttap_predict8x4_sharp "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_eighttap_predict8x4_sharp
+
+prototype void vp9_eighttap_predict_sharp "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_eighttap_predict_sharp
+
+prototype void vp9_sixtap_predict16x16 "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_sixtap_predict16x16
+
+prototype void vp9_sixtap_predict8x8 "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_sixtap_predict8x8
+
+prototype void vp9_sixtap_predict_avg16x16 "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_sixtap_predict_avg16x16
+
+prototype void vp9_sixtap_predict_avg8x8 "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_sixtap_predict_avg8x8
+
+prototype void vp9_sixtap_predict8x4 "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_sixtap_predict8x4
+
+prototype void vp9_sixtap_predict "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_sixtap_predict
+
+prototype void vp9_sixtap_predict_avg "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_sixtap_predict_avg
+
+prototype void vp9_bilinear_predict16x16 "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_bilinear_predict16x16 mmx sse2
+
+prototype void vp9_bilinear_predict8x8 "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_bilinear_predict8x8 mmx sse2
+
+prototype void vp9_bilinear_predict_avg16x16 "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_bilinear_predict_avg16x16
+
+prototype void vp9_bilinear_predict_avg8x8 "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_bilinear_predict_avg8x8
+
+prototype void vp9_bilinear_predict8x4 "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_bilinear_predict8x4 mmx
+
+prototype void vp9_bilinear_predict4x4 "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_bilinear_predict4x4 mmx
+
+prototype void vp9_bilinear_predict_avg4x4 "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"
+specialize vp9_bilinear_predict_avg4x4
+
+#
+# dct
+#
+prototype void vp9_short_idct4x4llm_1 "short *input, short *output, int pitch"
+specialize vp9_short_idct4x4llm_1
+
+prototype void vp9_short_idct4x4llm "short *input, short *output, int pitch"
+specialize vp9_short_idct4x4llm
+
+prototype void vp9_short_idct8x8 "short *input, short *output, int pitch"
+specialize vp9_short_idct8x8
+
+prototype void vp9_short_idct10_8x8 "short *input, short *output, int pitch"
+specialize vp9_short_idct10_8x8
+
+prototype void vp9_short_ihaar2x2 "short *input, short *output, int pitch"
+specialize vp9_short_ihaar2x2
+
+prototype void vp9_short_idct16x16 "short *input, short *output, int pitch"
+specialize vp9_short_idct16x16
+
+prototype void vp9_short_idct10_16x16 "short *input, short *output, int pitch"
+specialize vp9_short_idct10_16x16
+
+prototype void vp9_ihtllm "const short *input, short *output, int pitch, int tx_type, int tx_dim, short eobs"
+specialize vp9_ihtllm
+
+#
+# 2nd order
+#
+prototype void vp9_short_inv_walsh4x4_1 "short *in, short *out"
+specialize vp9_short_inv_walsh4x4_1
+
+prototype void vp9_short_inv_walsh4x4 "short *in, short *out"
+specialize vp9_short_inv_walsh4x4_
+
+
+# dct and add
+prototype void vp9_dc_only_idct_add_8x8 "short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride"
+specialize vp9_dc_only_idct_add_8x8
+
+prototype void vp9_dc_only_idct_add "short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride"
+specialize vp9_dc_only_idct_add
+
+if [ "$CONFIG_LOSSLESS" = "yes" ]; then
+prototype void vp9_short_inv_walsh4x4_1_x8 "short *input, short *output, int pitch"
+prototype void vp9_short_inv_walsh4x4_x8 "short *input, short *output, int pitch"
+prototype void vp9_dc_only_inv_walsh_add "short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride"
+prototype void vp9_short_inv_walsh4x4_1_lossless "short *in, short *out"
+prototype void vp9_short_inv_walsh4x4_lossless "short *in, short *out"
+fi
+
+
+
+if [ "$CONFIG_SUPERBLOCKS" = "yes" ]; then
+
+prototype unsigned int vp9_sad32x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad"
+specialize vp9_sad32x3
+
+prototype unsigned int vp9_sad3x32 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad"
+specialize vp9_sad3x32
+
+fi
+
+#
+# Encoder functions below this point.
+#
+if [ "$CONFIG_VP9_ENCODER" = "yes" ]; then
+
+
+# variance
+[ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2
+
+prototype unsigned int vp9_variance32x32 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance32x32
+
+prototype unsigned int vp9_variance16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance16x16 mmx sse2
+vp9_variance16x16_sse2=vp9_variance16x16_wmt
+vp9_variance16x16_mmx=vp9_variance16x16_mmx
+
+prototype unsigned int vp9_variance16x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance16x8 mmx sse2
+vp9_variance16x8_sse2=vp9_variance16x8_wmt
+vp9_variance16x8_mmx=vp9_variance16x8_mmx
+
+prototype unsigned int vp9_variance8x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance8x16 mmx sse2
+vp9_variance8x16_sse2=vp9_variance8x16_wmt
+vp9_variance8x16_mmx=vp9_variance8x16_mmx
+
+prototype unsigned int vp9_variance8x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance8x8 mmx sse2
+vp9_variance8x8_sse2=vp9_variance8x8_wmt
+vp9_variance8x8_mmx=vp9_variance8x8_mmx
+
+prototype unsigned int vp9_variance4x4 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance4x4 mmx sse2
+vp9_variance4x4_sse2=vp9_variance4x4_wmt
+vp9_variance4x4_mmx=vp9_variance4x4_mmx
+
+prototype unsigned int vp9_sub_pixel_variance32x32 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
+specialize vp9_sub_pixel_variance32x32
+
+prototype unsigned int vp9_sub_pixel_variance16x16 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
+specialize vp9_sub_pixel_variance16x16 sse2 mmx ssse3
+vp9_sub_pixel_variance16x16_sse2=vp9_sub_pixel_variance16x16_wmt
+
+prototype unsigned int vp9_sub_pixel_variance8x16 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
+specialize vp9_sub_pixel_variance8x16 sse2 mmx
+vp9_sub_pixel_variance8x16_sse2=vp9_sub_pixel_variance8x16_wmt
+
+prototype unsigned int vp9_sub_pixel_variance16x8 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
+specialize vp9_sub_pixel_variance16x8 sse2 mmx ssse3
+vp9_sub_pixel_variance16x8_sse2=vp9_sub_pixel_variance16x8_ssse3;
+vp9_sub_pixel_variance16x8_sse2=vp9_sub_pixel_variance16x8_wmt
+
+prototype unsigned int vp9_sub_pixel_variance8x8 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
+specialize vp9_sub_pixel_variance8x8 sse2 mmx
+vp9_sub_pixel_variance8x8_sse2=vp9_sub_pixel_variance8x8_wmt
+
+prototype unsigned int vp9_sub_pixel_variance4x4 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
+specialize vp9_sub_pixel_variance4x4 sse2 mmx
+vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt
+
+prototype unsigned int vp9_sad32x32 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
+specialize vp9_sad32x32
+
+prototype unsigned int vp9_sad16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
+specialize vp9_sad16x16 mmx sse2 sse3
+vp9_sad16x16_sse2=vp9_sad16x16_wmt
+
+prototype unsigned int vp9_sad16x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
+specialize vp9_sad16x8 mmx sse2
+vp9_sad16x8_sse2=vp9_sad16x8_wmt
+
+prototype unsigned int vp9_sad8x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
+specialize vp9_sad8x16 mmx sse2
+vp9_sad8x16_sse2=vp9_sad8x16_wmt
+
+prototype unsigned int vp9_sad8x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
+specialize vp9_sad8x8 mmx sse2
+vp9_sad8x8_sse2=vp9_sad8x8_wmt
+
+prototype unsigned int vp9_sad4x4 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
+specialize vp9_sad4x4 mmx sse2
+vp9_sad4x4_sse2=vp9_sad4x4_wmt
+
+prototype unsigned int vp9_variance_halfpixvar16x16_h "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance_halfpixvar16x16_h mmx sse2
+vp9_variance_halfpixvar16x16_h_sse2=vp9_variance_halfpixvar16x16_h_wmt
+
+prototype unsigned int vp9_variance_halfpixvar16x16_v "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance_halfpixvar16x16_v mmx sse2
+vp9_variance_halfpixvar16x16_v_sse2=vp9_variance_halfpixvar16x16_v_wmt
+
+prototype unsigned int vp9_variance_halfpixvar16x16_hv "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance_halfpixvar16x16_hv mmx sse2
+vp9_variance_halfpixvar16x16_hv_sse2=vp9_variance_halfpixvar16x16_hv_wmt
+
+prototype unsigned int vp9_variance_halfpixvar32x32_h "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance_halfpixvar32x32_h
+
+prototype unsigned int vp9_variance_halfpixvar32x32_v "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance_halfpixvar32x32_v
+
+prototype unsigned int vp9_variance_halfpixvar32x32_hv "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance_halfpixvar32x32_hv
+
+prototype void vp9_sad32x32x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
+specialize vp9_sad32x32x3
+
+prototype void vp9_sad16x16x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
+specialize vp9_sad16x16x3 sse3 ssse3
+
+prototype void vp9_sad16x8x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
+specialize vp9_sad16x8x3 sse3 ssse3
+
+prototype void vp9_sad8x16x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
+specialize vp9_sad8x16x3 sse3
+
+prototype void vp9_sad8x8x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
+specialize vp9_sad8x8x3 sse3
+
+prototype void vp9_sad4x4x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
+specialize vp9_sad4x4x3 sse3
+
+prototype void vp9_sad32x32x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
+specialize vp9_sad32x32x8
+
+prototype void vp9_sad16x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
+specialize vp9_sad16x16x8 sse4
+
+prototype void vp9_sad16x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
+specialize vp9_sad16x8x8 sse4
+
+prototype void vp9_sad8x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
+specialize vp9_sad8x16x8 sse4
+
+prototype void vp9_sad8x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
+specialize vp9_sad8x8x8 sse4
+
+prototype void vp9_sad4x4x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
+specialize vp9_sad4x4x8 sse4
+
+prototype void vp9_sad32x32x4d "const unsigned char *src_ptr, int src_stride, const unsigned char **ref_ptr, int ref_stride, unsigned int *sad_array"
+specialize vp9_sad32x32x4d
+
+prototype void vp9_sad16x16x4d "const unsigned char *src_ptr, int src_stride, const unsigned char **ref_ptr, int ref_stride, unsigned int *sad_array"
+specialize vp9_sad16x16x4d sse3
+
+prototype void vp9_sad16x8x4d "const unsigned char *src_ptr, int src_stride, const unsigned char **ref_ptr, int ref_stride, unsigned int *sad_array"
+specialize vp9_sad16x8x4d sse3
+
+prototype void vp9_sad8x16x4d "const unsigned char *src_ptr, int src_stride, const unsigned char **ref_ptr, int ref_stride, unsigned int *sad_array"
+specialize vp9_sad8x16x4d sse3
+
+prototype void vp9_sad8x8x4d "const unsigned char *src_ptr, int src_stride, const unsigned char **ref_ptr, int ref_stride, unsigned int *sad_array"
+specialize vp9_sad8x8x4d sse3
+
+prototype void vp9_sad4x4x4d "const unsigned char *src_ptr, int src_stride, const unsigned char **ref_ptr, int ref_stride, unsigned int *sad_array"
+specialize vp9_sad4x4x4d sse3
+
+#
+# Block copy
+#
+case $arch in
+ x86*)
+ prototype void vp9_copy32xn "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n"
+ specialize vp9_copy32xn sse2 sse3
+ ;;
+esac
+
+prototype unsigned int vp9_sub_pixel_mse16x16 "const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse"
+specialize vp9_sub_pixel_mse16x16 sse2 mmx
+vp9_sub_pixel_mse16x16_sse2=vp9_sub_pixel_mse16x16_wmt
+
+prototype unsigned int vp9_mse16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse"
+specialize vp9_mse16x16 mmx sse2
+vp9_mse16x16_sse2=vp9_mse16x16_wmt
+
+prototype unsigned int vp9_sub_pixel_mse32x32 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
+specialize vp9_sub_pixel_mse32x32
+
+prototype unsigned int vp9_get_mb_ss "const short *"
+specialize vp9_get_mb_ss mmx sse2
+# ENCODEMB INVOKE
+prototype int vp9_mbblock_error "struct macroblock *mb, int dc"
+specialize vp9_mbblock_error mmx sse2
+vp9_mbblock_error_sse2=vp9_mbblock_error_xmm
+
+prototype int vp9_block_error "short *coeff, short *dqcoeff, int block_size"
+specialize vp9_block_error mmx sse2
+vp9_block_error_sse2=vp9_block_error_xmm
+
+prototype void vp9_subtract_b "struct block *be, struct blockd *bd, int pitch"
+specialize vp9_subtract_b mmx sse2
+
+prototype int vp9_mbuverror "struct macroblock *mb"
+specialize vp9_mbuverror mmx sse2
+vp9_mbuverror_sse2=vp9_mbuverror_xmm
+
+prototype void vp9_subtract_b "struct block *be, struct blockd *bd, int pitch"
+specialize vp9_subtract_b mmx sse2
+
+prototype void vp9_subtract_mby "short *diff, unsigned char *src, unsigned char *pred, int stride"
+specialize vp9_subtract_mby mmx sse2
+
+prototype void vp9_subtract_mbuv "short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride"
+specialize vp9_subtract_mbuv mmx sse2
+
+#
+# Structured Similarity (SSIM)
+#
+if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then
+ [ $arch = "x86_64" ] && sse2_on_x86_64=sse2
+
+ prototype void vp9_ssim_parms_8x8 "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"
+ specialize vp9_ssim_parms_8x8 $sse2_on_x86_64
+
+ prototype void vp9_ssim_parms_16x16 "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"
+ specialize vp9_ssim_parms_16x16 $sse2_on_x86_64
+fi
+
+# fdct functions
+prototype void vp9_fht "const short *input, int pitch, short *output, int tx_type, int tx_dim"
+specialize vp9_fht
+
+prototype void vp9_short_fdct8x8 "short *InputData, short *OutputData, int pitch"
+specialize vp9_short_fdct8x8
+
+prototype void vp9_short_fhaar2x2 "short *InputData, short *OutputData, int pitch"
+specialize vp9_short_fhaar2x2
+
+prototype void vp9_short_fdct4x4 "short *InputData, short *OutputData, int pitch"
+specialize vp9_short_fdct4x4
+
+prototype void vp9_short_fdct8x4 "short *InputData, short *OutputData, int pitch"
+specialize vp9_short_fdct8x4
+
+prototype void vp9_short_walsh4x4 "short *InputData, short *OutputData, int pitch"
+specialize vp9_short_walsh4x4
+
+prototype void vp9_short_fdct16x16 "short *InputData, short *OutputData, int pitch"
+specialize vp9_short_fdct16x16
+
+prototype void vp9_short_walsh4x4_lossless "short *InputData, short *OutputData, int pitch"
+specialize vp9_short_walsh4x4_lossless
+
+prototype void vp9_short_walsh4x4_x8 "short *InputData, short *OutputData, int pitch"
+specialize vp9_short_walsh4x4_x8
+
+prototype void vp9_short_walsh8x4_x8 "short *InputData, short *OutputData, int pitch"
+specialize vp9_short_walsh8x4_x8
+
+#
+# Motion search
+#
+prototype int vp9_full_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, DEC_MVCOSTS, union int_mv *center_mv"
+specialize vp9_full_search_sad sse3 sse4_1
+vp9_full_search_sad_sse3=vp9_full_search_sadx3
+vp9_full_search_sad_sse4_1=vp9_full_search_sadx8
+
+prototype int vp9_refining_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, DEC_MVCOSTS, union int_mv *center_mv"
+specialize vp9_refining_search_sad sse3
+vp9_refining_search_sad_sse3=vp9_refining_search_sadx4
+
+prototype int vp9_diamond_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, DEC_MVCOSTS, union int_mv *center_mv"
+specialize vp9_diamond_search_sad sse3
+vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4
+
+prototype void vp9_temporal_filter_apply "unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count"
+specialize vp9_temporal_filter_apply sse2
+
+prototype void vp9_yv12_copy_partial_frame "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc, int fraction"
+specialize vp9_yv12_copy_partial_frame
+
+
+fi
+# end encoder functions
diff --git a/vp9/common/vp9_sadmxn.h b/vp9/common/vp9_sadmxn.h
new file mode 100644
index 0000000..bed257f
--- /dev/null
+++ b/vp9/common/vp9_sadmxn.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_COMMON_VP9_SADMXN_H_
+#define VP9_COMMON_VP9_SADMXN_H_
+
+static __inline
+unsigned int sad_mx_n_c(
+ const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ int m,
+ int n) {
+ int r, c;
+ unsigned int sad = 0;
+
+ for (r = 0; r < n; r++) {
+ for (c = 0; c < m; c++) {
+ sad += abs(src_ptr[c] - ref_ptr[c]);
+ }
+
+ src_ptr += src_stride;
+ ref_ptr += ref_stride;
+ }
+
+ return sad;
+}
+
+#endif
diff --git a/vp9/common/vp9_seg_common.c b/vp9/common/vp9_seg_common.c
new file mode 100644
index 0000000..46a6ee4
--- /dev/null
+++ b/vp9/common/vp9_seg_common.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_seg_common.h"
+
+static const int segfeaturedata_signed[SEG_LVL_MAX] = { 1, 1, 0, 0, 0, 0 };
+static const int seg_feature_data_max[SEG_LVL_MAX] =
+ { MAXQ, 63, 0xf, MB_MODE_COUNT - 1, 255, TX_SIZE_MAX - 1};
+
+// These functions provide access to new segment level features.
+// Eventually these function may be "optimized out" but for the moment,
+// the coding mechanism is still subject to change so these provide a
+// convenient single point of change.
+
+int vp9_segfeature_active(const MACROBLOCKD *xd,
+ int segment_id,
+ SEG_LVL_FEATURES feature_id) {
+ // Return true if mask bit set and segmentation enabled.
+ return (xd->segmentation_enabled &&
+ (xd->segment_feature_mask[segment_id] &
+ (0x01 << feature_id)));
+}
+
+void vp9_clearall_segfeatures(MACROBLOCKD *xd) {
+ vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data));
+ vpx_memset(xd->segment_feature_mask, 0, sizeof(xd->segment_feature_mask));
+}
+
+void vp9_enable_segfeature(MACROBLOCKD *xd,
+ int segment_id,
+ SEG_LVL_FEATURES feature_id) {
+ xd->segment_feature_mask[segment_id] |= (0x01 << feature_id);
+}
+
+void vp9_disable_segfeature(MACROBLOCKD *xd,
+ int segment_id,
+ SEG_LVL_FEATURES feature_id) {
+ xd->segment_feature_mask[segment_id] &= ~(1 << feature_id);
+}
+
+int vp9_seg_feature_data_max(SEG_LVL_FEATURES feature_id) {
+ return seg_feature_data_max[feature_id];
+}
+
+int vp9_is_segfeature_signed(SEG_LVL_FEATURES feature_id) {
+ return (segfeaturedata_signed[feature_id]);
+}
+
+void vp9_clear_segdata(MACROBLOCKD *xd,
+ int segment_id,
+ SEG_LVL_FEATURES feature_id) {
+ xd->segment_feature_data[segment_id][feature_id] = 0;
+}
+
+void vp9_set_segdata(MACROBLOCKD *xd,
+ int segment_id,
+ SEG_LVL_FEATURES feature_id,
+ int seg_data) {
+ assert(seg_data <= seg_feature_data_max[feature_id]);
+ if (seg_data < 0) {
+ assert(segfeaturedata_signed[feature_id]);
+ assert(-seg_data <= seg_feature_data_max[feature_id]);
+ }
+
+ xd->segment_feature_data[segment_id][feature_id] = seg_data;
+}
+
+int vp9_get_segdata(const MACROBLOCKD *xd,
+ int segment_id,
+ SEG_LVL_FEATURES feature_id) {
+ return xd->segment_feature_data[segment_id][feature_id];
+}
+
+void vp9_clear_segref(MACROBLOCKD *xd, int segment_id) {
+ xd->segment_feature_data[segment_id][SEG_LVL_REF_FRAME] = 0;
+}
+
+void vp9_set_segref(MACROBLOCKD *xd,
+ int segment_id,
+ MV_REFERENCE_FRAME ref_frame) {
+ xd->segment_feature_data[segment_id][SEG_LVL_REF_FRAME] |=
+ (1 << ref_frame);
+}
+
+int vp9_check_segref(const MACROBLOCKD *xd,
+ int segment_id,
+ MV_REFERENCE_FRAME ref_frame) {
+ return (xd->segment_feature_data[segment_id][SEG_LVL_REF_FRAME] &
+ (1 << ref_frame)) ? 1 : 0;
+}
+
+int vp9_check_segref_inter(MACROBLOCKD *xd, int segment_id) {
+ return (xd->segment_feature_data[segment_id][SEG_LVL_REF_FRAME] &
+ ~(1 << INTRA_FRAME)) ? 1 : 0;
+}
+
+int vp9_get_seg_tx_type(MACROBLOCKD *xd, int segment_id) {
+ if (vp9_segfeature_active(xd, segment_id, SEG_LVL_TRANSFORM))
+ return vp9_get_segdata(xd, segment_id, SEG_LVL_TRANSFORM);
+ else
+ return TX_4X4;
+}
+// TBD? Functions to read and write segment data with range / validity checking
diff --git a/vp9/common/vp9_seg_common.h b/vp9/common/vp9_seg_common.h
new file mode 100644
index 0000000..a9f9b93
--- /dev/null
+++ b/vp9/common/vp9_seg_common.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/vp9_type_aliases.h"
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/common/vp9_blockd.h"
+
+#ifndef VP9_COMMON_VP9_SEG_COMMON_H_
+#define VP9_COMMON_VP9_SEG_COMMON_H_
+
+int vp9_segfeature_active(const MACROBLOCKD *xd,
+ int segment_id,
+ SEG_LVL_FEATURES feature_id);
+
+void vp9_clearall_segfeatures(MACROBLOCKD *xd);
+
+void vp9_enable_segfeature(MACROBLOCKD *xd,
+ int segment_id,
+ SEG_LVL_FEATURES feature_id);
+
+void vp9_disable_segfeature(MACROBLOCKD *xd,
+ int segment_id,
+ SEG_LVL_FEATURES feature_id);
+
+int vp9_seg_feature_data_max(SEG_LVL_FEATURES feature_id);
+
+int vp9_is_segfeature_signed(SEG_LVL_FEATURES feature_id);
+
+void vp9_clear_segdata(MACROBLOCKD *xd,
+ int segment_id,
+ SEG_LVL_FEATURES feature_id);
+
+void vp9_set_segdata(MACROBLOCKD *xd,
+ int segment_id,
+ SEG_LVL_FEATURES feature_id,
+ int seg_data);
+
+int vp9_get_segdata(const MACROBLOCKD *xd,
+ int segment_id,
+ SEG_LVL_FEATURES feature_id);
+
+void vp9_clear_segref(MACROBLOCKD *xd, int segment_id);
+
+void vp9_set_segref(MACROBLOCKD *xd,
+ int segment_id,
+ MV_REFERENCE_FRAME ref_frame);
+
+int vp9_check_segref(const MACROBLOCKD *xd,
+ int segment_id,
+ MV_REFERENCE_FRAME ref_frame);
+
+int vp9_check_segref_inter(MACROBLOCKD *xd, int segment_id);
+
+int vp9_get_seg_tx_type(MACROBLOCKD *xd, int segment_id);
+
+#endif /* __INC_SEG_COMMON_H__ */
+
diff --git a/vp9/common/vp9_setupintrarecon.c b/vp9/common/vp9_setupintrarecon.c
new file mode 100644
index 0000000..2c84cd0
--- /dev/null
+++ b/vp9/common/vp9_setupintrarecon.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/common/vp9_setupintrarecon.h"
+#include "vpx_mem/vpx_mem.h"
+
+void vp9_setup_intra_recon(YV12_BUFFER_CONFIG *ybf) {
+ int i;
+
+ /* set up frame new frame for intra coded blocks */
+ vpx_memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
+ for (i = 0; i < ybf->y_height; i++)
+ ybf->y_buffer[ybf->y_stride * i - 1] = (unsigned char) 129;
+
+ vpx_memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
+ for (i = 0; i < ybf->uv_height; i++)
+ ybf->u_buffer[ybf->uv_stride * i - 1] = (unsigned char) 129;
+
+ vpx_memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
+ for (i = 0; i < ybf->uv_height; i++)
+ ybf->v_buffer[ybf->uv_stride * i - 1] = (unsigned char) 129;
+
+}
diff --git a/vp9/common/vp9_setupintrarecon.h b/vp9/common/vp9_setupintrarecon.h
new file mode 100644
index 0000000..1a55d0a
--- /dev/null
+++ b/vp9/common/vp9_setupintrarecon.h
@@ -0,0 +1,13 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_scale/yv12config.h"
+extern void vp9_setup_intra_recon(YV12_BUFFER_CONFIG *ybf);
diff --git a/vp9/common/vp9_subpelvar.h b/vp9/common/vp9_subpelvar.h
new file mode 100644
index 0000000..b3c3fcd
--- /dev/null
+++ b/vp9/common/vp9_subpelvar.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/common/vp9_filter.h"
+
+
+
+static void variance(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ int w,
+ int h,
+ unsigned int *sse,
+ int *sum) {
+ int i, j;
+ int diff;
+
+ *sum = 0;
+ *sse = 0;
+
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j++) {
+ diff = src_ptr[j] - ref_ptr[j];
+ *sum += diff;
+ *sse += diff * diff;
+ }
+
+ src_ptr += source_stride;
+ ref_ptr += recon_stride;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : filter_block2d_bil_first_pass
+ *
+ * INPUTS : UINT8 *src_ptr : Pointer to source block.
+ * UINT32 src_pixels_per_line : Stride of input block.
+ * UINT32 pixel_step : Offset between filter input samples (see notes).
+ * UINT32 output_height : Input block height.
+ * UINT32 output_width : Input block width.
+ * INT32 *vp9_filter : Array of 2 bi-linear filter taps.
+ *
+ * OUTPUTS : INT32 *output_ptr : Pointer to filtered block.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
+ * either horizontal or vertical direction to produce the
+ * filtered output block. Used to implement first-pass
+ * of 2-D separable filter.
+ *
+ * SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
+ * Two filter taps should sum to VP9_FILTER_WEIGHT.
+ * pixel_step defines whether the filter is applied
+ * horizontally (pixel_step=1) or vertically (pixel_step=stride).
+ * It defines the offset required to move from one input
+ * to the next.
+ *
+ ****************************************************************************/
+static void var_filter_block2d_bil_first_pass(const unsigned char *src_ptr,
+ unsigned short *output_ptr,
+ unsigned int src_pixels_per_line,
+ int pixel_step,
+ unsigned int output_height,
+ unsigned int output_width,
+ const short *vp9_filter) {
+ unsigned int i, j;
+
+ for (i = 0; i < output_height; i++) {
+ for (j = 0; j < output_width; j++) {
+ // Apply bilinear filter
+ output_ptr[j] = (((int)src_ptr[0] * vp9_filter[0]) +
+ ((int)src_ptr[pixel_step] * vp9_filter[1]) +
+ (VP9_FILTER_WEIGHT / 2)) >> VP9_FILTER_SHIFT;
+ src_ptr++;
+ }
+
+ // Next row...
+ src_ptr += src_pixels_per_line - output_width;
+ output_ptr += output_width;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : filter_block2d_bil_second_pass
+ *
+ * INPUTS : INT32 *src_ptr : Pointer to source block.
+ * UINT32 src_pixels_per_line : Stride of input block.
+ * UINT32 pixel_step : Offset between filter input samples (see notes).
+ * UINT32 output_height : Input block height.
+ * UINT32 output_width : Input block width.
+ * INT32 *vp9_filter : Array of 2 bi-linear filter taps.
+ *
+ * OUTPUTS : UINT16 *output_ptr : Pointer to filtered block.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
+ * either horizontal or vertical direction to produce the
+ * filtered output block. Used to implement second-pass
+ * of 2-D separable filter.
+ *
+ * SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
+ * Two filter taps should sum to VP9_FILTER_WEIGHT.
+ * pixel_step defines whether the filter is applied
+ * horizontally (pixel_step=1) or vertically (pixel_step=stride).
+ * It defines the offset required to move from one input
+ * to the next.
+ *
+ ****************************************************************************/
+static void var_filter_block2d_bil_second_pass(const unsigned short *src_ptr,
+ unsigned char *output_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned int pixel_step,
+ unsigned int output_height,
+ unsigned int output_width,
+ const short *vp9_filter) {
+ unsigned int i, j;
+ int Temp;
+
+ for (i = 0; i < output_height; i++) {
+ for (j = 0; j < output_width; j++) {
+ // Apply filter
+ Temp = ((int)src_ptr[0] * vp9_filter[0]) +
+ ((int)src_ptr[pixel_step] * vp9_filter[1]) +
+ (VP9_FILTER_WEIGHT / 2);
+ output_ptr[j] = (unsigned int)(Temp >> VP9_FILTER_SHIFT);
+ src_ptr++;
+ }
+
+ // Next row...
+ src_ptr += src_pixels_per_line - output_width;
+ output_ptr += output_width;
+ }
+}
+
diff --git a/vp9/common/vp9_subpixel.h b/vp9/common/vp9_subpixel.h
new file mode 100644
index 0000000..5824e1a
--- /dev/null
+++ b/vp9/common/vp9_subpixel.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_COMMON_VP9_SUBPIXEL_H_
+#define VP9_COMMON_VP9_SUBPIXEL_H_
+
+#define prototype_subpixel_predict(sym) \
+ void sym(unsigned char *src, int src_pitch, int xofst, int yofst, \
+ unsigned char *dst, int dst_pitch)
+
+typedef prototype_subpixel_predict((*vp9_subpix_fn_t));
+
+#endif
diff --git a/vp9/common/vp9_swapyv12buffer.c b/vp9/common/vp9_swapyv12buffer.c
new file mode 100644
index 0000000..b014625
--- /dev/null
+++ b/vp9/common/vp9_swapyv12buffer.c
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/vp9_swapyv12buffer.h"
+
+void vp9_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame,
+ YV12_BUFFER_CONFIG *last_frame) {
+ unsigned char *temp;
+
+ temp = last_frame->buffer_alloc;
+ last_frame->buffer_alloc = new_frame->buffer_alloc;
+ new_frame->buffer_alloc = temp;
+
+ temp = last_frame->y_buffer;
+ last_frame->y_buffer = new_frame->y_buffer;
+ new_frame->y_buffer = temp;
+
+ temp = last_frame->u_buffer;
+ last_frame->u_buffer = new_frame->u_buffer;
+ new_frame->u_buffer = temp;
+
+ temp = last_frame->v_buffer;
+ last_frame->v_buffer = new_frame->v_buffer;
+ new_frame->v_buffer = temp;
+}
diff --git a/vp9/common/vp9_swapyv12buffer.h b/vp9/common/vp9_swapyv12buffer.h
new file mode 100644
index 0000000..4300176
--- /dev/null
+++ b/vp9/common/vp9_swapyv12buffer.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_COMMON_VP9_SWAPYV12BUFFER_H_
+#define VP9_COMMON_VP9_SWAPYV12BUFFER_H_
+
+#include "vpx_scale/yv12config.h"
+
+void vp9_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame,
+ YV12_BUFFER_CONFIG *last_frame);
+
+#endif // __SWAPYV12_BUFFER_H
diff --git a/vp9/common/vp9_systemdependent.h b/vp9/common/vp9_systemdependent.h
new file mode 100644
index 0000000..6f08e69
--- /dev/null
+++ b/vp9/common/vp9_systemdependent.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef VP9_COMMON_VP9_SYSTEMDEPENDENT_H_
+#define VP9_COMMON_VP9_SYSTEMDEPENDENT_H_
+
+#include "./vpx_config.h"
+#if ARCH_X86 || ARCH_X86_64
+void vpx_reset_mmx_state(void);
+#define vp9_clear_system_state() vpx_reset_mmx_state()
+#else
+#define vp9_clear_system_state()
+#endif
+
+struct VP9Common;
+void vp9_machine_specific_config(struct VP9Common *);
+#endif
diff --git a/vp9/common/vp9_tapify.py b/vp9/common/vp9_tapify.py
new file mode 100644
index 0000000..99529cf
--- /dev/null
+++ b/vp9/common/vp9_tapify.py
@@ -0,0 +1,106 @@
+"""
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+"""
+#!/usr/bin/env python
+import sys,string,os,re,math,numpy
+scale = 2**16
+def dist(p1,p2):
+ x1,y1 = p1
+ x2,y2 = p2
+ if x1==x2 and y1==y2 :
+ return 1.0
+ return 1/ math.sqrt((x1-x2)*(x1-x2)+(y1-y2)*(y1-y2))
+
+def gettaps(p):
+ def l(b):
+ return int(math.floor(b))
+ def h(b):
+ return int(math.ceil(b))
+ def t(b,p,s):
+ return int((scale*dist(b,p)+s/2)/s)
+ r,c = p
+ ul=[l(r),l(c)]
+ ur=[l(r),h(c)]
+ ll=[h(r),l(c)]
+ lr=[h(r),h(c)]
+ sum = dist(ul,p)+dist(ur,p)+dist(ll,p)+dist(lr,p)
+ t4 = scale - t(ul,p,sum) - t(ur,p,sum) - t(ll,p,sum);
+ return [[ul,t(ul,p,sum)],[ur,t(ur,p,sum)],
+ [ll,t(ll,p,sum)],[lr,t4]]
+
+def print_mb_taps(angle,blocksize):
+ theta = angle / 57.2957795;
+ affine = [[math.cos(theta),-math.sin(theta)],
+ [math.sin(theta),math.cos(theta)]]
+ radius = (float(blocksize)-1)/2
+ print " // angle of",angle,"degrees"
+ for y in range(blocksize) :
+ for x in range(blocksize) :
+ r,c = numpy.dot(affine,[y-radius, x-radius])
+ tps = gettaps([r+radius,c+radius])
+ for t in tps :
+ p,t = t
+ tr,tc = p
+ print " %2d, %2d, %5d, " % (tr,tc,t,),
+ print " // %2d,%2d " % (y,x)
+
+i=float(sys.argv[1])
+while i <= float(sys.argv[2]) :
+ print_mb_taps(i,float(sys.argv[4]))
+ i=i+float(sys.argv[3])
+"""
+
+taps = []
+pt=dict()
+ptr=dict()
+for y in range(16) :
+ for x in range(16) :
+ r,c = numpy.dot(affine,[y-7.5, x-7.5])
+ tps = gettaps([r+7.5,c+7.5])
+ j=0
+ for tp in tps :
+ p,i = tp
+ r,c = p
+ pt[y,x,j]= [p,i]
+ try:
+ ptr[r,j,c].append([y,x])
+ except:
+ ptr[r,j,c]=[[y,x]]
+ j = j+1
+
+for key in sorted(pt.keys()) :
+ print key,pt[key]
+
+lr = -99
+lj = -99
+lc = 0
+
+shuf=""
+mask=""
+for r,j,c in sorted(ptr.keys()) :
+ for y,x in ptr[r,j,c] :
+ if lr != r or lj != j :
+ print "shuf_"+str(lr)+"_"+str(lj)+"_"+shuf.ljust(16,"0"), lc
+ shuf=""
+ lc = 0
+ for i in range(lc,c-1) :
+ shuf = shuf +"0"
+ shuf = shuf + hex(x)[2]
+ lc =c
+ break
+ lr = r
+ lj = j
+# print r,j,c,ptr[r,j,c]
+# print
+
+for r,j,c in sorted(ptr.keys()) :
+ for y,x in ptr[r,j,c] :
+ print r,j,c,y,x
+ break
+"""
diff --git a/vp9/common/vp9_textblit.c b/vp9/common/vp9_textblit.c
new file mode 100644
index 0000000..52c6b87
--- /dev/null
+++ b/vp9/common/vp9_textblit.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+
+#include "vp9/common/vp9_textblit.h"
+
+void vp9_blit_text(const char *msg, unsigned char *address, const int pitch) {
+ int letter_bitmap;
+ unsigned char *output_pos = address;
+ int colpos;
+ const int font[] = {
+ 0x0, 0x5C00, 0x8020, 0xAFABEA, 0xD7EC0, 0x1111111, 0x1855740, 0x18000,
+ 0x45C0, 0x74400, 0x51140, 0x23880, 0xC4000, 0x21080, 0x80000, 0x111110,
+ 0xE9D72E, 0x87E40, 0x12AD732, 0xAAD62A, 0x4F94C4, 0x4D6B7, 0x456AA,
+ 0x3E8423, 0xAAD6AA, 0xAAD6A2, 0x2800, 0x2A00, 0x8A880, 0x52940, 0x22A20,
+ 0x15422, 0x6AD62E, 0x1E4A53E, 0xAAD6BF, 0x8C62E, 0xE8C63F, 0x118D6BF,
+ 0x1094BF, 0xCAC62E, 0x1F2109F, 0x118FE31, 0xF8C628, 0x8A89F, 0x108421F,
+ 0x1F1105F, 0x1F4105F, 0xE8C62E, 0x2294BF, 0x164C62E, 0x12694BF, 0x8AD6A2,
+ 0x10FC21, 0x1F8421F, 0x744107, 0xF8220F, 0x1151151, 0x117041, 0x119D731,
+ 0x47E0, 0x1041041, 0xFC400, 0x10440, 0x1084210, 0x820
+ };
+ colpos = 0;
+
+ while (msg[colpos] != 0) {
+ char letter = msg[colpos];
+ int fontcol, fontrow;
+
+ if (letter <= 'Z' && letter >= ' ')
+ letter_bitmap = font[letter - ' '];
+ else if (letter <= 'z' && letter >= 'a')
+ letter_bitmap = font[letter - 'a' + 'A' - ' '];
+ else
+ letter_bitmap = font[0];
+
+ for (fontcol = 6; fontcol >= 0; fontcol--)
+ for (fontrow = 0; fontrow < 5; fontrow++)
+ output_pos[fontrow * pitch + fontcol] =
+ ((letter_bitmap >> (fontcol * 5)) & (1 << fontrow) ? 255 : 0);
+
+ output_pos += 7;
+ colpos++;
+ }
+}
+
+static void plot(const int x, const int y, unsigned char *image, const int pitch) {
+ image [x + y * pitch] ^= 255;
+}
+
+/* Bresenham line algorithm */
+void vp9_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch) {
+ int steep = abs(y1 - y0) > abs(x1 - x0);
+ int deltax, deltay;
+ int error, ystep, y, x;
+
+ if (steep) {
+ int t;
+ t = x0;
+ x0 = y0;
+ y0 = t;
+
+ t = x1;
+ x1 = y1;
+ y1 = t;
+ }
+
+ if (x0 > x1) {
+ int t;
+ t = x0;
+ x0 = x1;
+ x1 = t;
+
+ t = y0;
+ y0 = y1;
+ y1 = t;
+ }
+
+ deltax = x1 - x0;
+ deltay = abs(y1 - y0);
+ error = deltax / 2;
+
+ y = y0;
+
+ if (y0 < y1)
+ ystep = 1;
+ else
+ ystep = -1;
+
+ if (steep) {
+ for (x = x0; x <= x1; x++) {
+ plot(y, x, image, pitch);
+
+ error = error - deltay;
+ if (error < 0) {
+ y = y + ystep;
+ error = error + deltax;
+ }
+ }
+ } else {
+ for (x = x0; x <= x1; x++) {
+ plot(x, y, image, pitch);
+
+ error = error - deltay;
+ if (error < 0) {
+ y = y + ystep;
+ error = error + deltax;
+ }
+ }
+ }
+}
diff --git a/vp9/common/vp9_textblit.h b/vp9/common/vp9_textblit.h
new file mode 100644
index 0000000..81bfa253
--- /dev/null
+++ b/vp9/common/vp9_textblit.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_COMMON_VP9_TEXTBLIT_H_
+#define VP9_COMMON_VP9_TEXTBLIT_H_
+
+extern void vp9_blit_text(const char *msg, unsigned char *address,
+ const int pitch);
+extern void vp9_blit_line(int x0, int x1, int y0, int y1,
+ unsigned char *image, const int pitch);
+
+#endif // __INC_TEXTBLIT_H
diff --git a/vp9/common/vp9_treecoder.c b/vp9/common/vp9_treecoder.c
new file mode 100644
index 0000000..64018a1
--- /dev/null
+++ b/vp9/common/vp9_treecoder.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_config.h"
+
+#if defined(CONFIG_DEBUG) && CONFIG_DEBUG
+#include <assert.h>
+#endif
+#include <stdio.h>
+
+#include "vp9/common/vp9_treecoder.h"
+
+static void tree2tok(
+ struct vp9_token_struct *const p,
+ vp9_tree t,
+ int i,
+ int v,
+ int L
+) {
+ v += v;
+ ++L;
+
+ do {
+ const vp9_tree_index j = t[i++];
+
+ if (j <= 0) {
+ p[-j].value = v;
+ p[-j].Len = L;
+ } else
+ tree2tok(p, t, j, v, L);
+ } while (++v & 1);
+}
+
+void vp9_tokens_from_tree(struct vp9_token_struct *p, vp9_tree t) {
+ tree2tok(p, t, 0, 0, 0);
+}
+
+void vp9_tokens_from_tree_offset(struct vp9_token_struct *p, vp9_tree t,
+ int offset) {
+ tree2tok(p - offset, t, 0, 0, 0);
+}
+
+static void branch_counts(
+ int n, /* n = size of alphabet */
+ vp9_token tok [ /* n */ ],
+ vp9_tree tree,
+ unsigned int branch_ct [ /* n-1 */ ] [2],
+ const unsigned int num_events[ /* n */ ]
+) {
+ const int tree_len = n - 1;
+ int t = 0;
+
+#if CONFIG_DEBUG
+ assert(tree_len);
+#endif
+
+ do {
+ branch_ct[t][0] = branch_ct[t][1] = 0;
+ } while (++t < tree_len);
+
+ t = 0;
+
+ do {
+ int L = tok[t].Len;
+ const int enc = tok[t].value;
+ const unsigned int ct = num_events[t];
+
+ vp9_tree_index i = 0;
+
+ do {
+ const int b = (enc >> --L) & 1;
+ const int j = i >> 1;
+#if CONFIG_DEBUG
+ assert(j < tree_len && 0 <= L);
+#endif
+
+ branch_ct [j] [b] += ct;
+ i = tree[ i + b];
+ } while (i > 0);
+
+#if CONFIG_DEBUG
+ assert(!L);
+#endif
+ } while (++t < n);
+
+}
+
+
+void vp9_tree_probs_from_distribution(
+ int n, /* n = size of alphabet */
+ vp9_token tok [ /* n */ ],
+ vp9_tree tree,
+ vp9_prob probs [ /* n-1 */ ],
+ unsigned int branch_ct [ /* n-1 */ ] [2],
+ const unsigned int num_events[ /* n */ ],
+ unsigned int Pfac,
+ int rd
+) {
+ const int tree_len = n - 1;
+ int t = 0;
+
+ branch_counts(n, tok, tree, branch_ct, num_events);
+
+ do {
+ const unsigned int *const c = branch_ct[t];
+ const unsigned int tot = c[0] + c[1];
+
+#if CONFIG_DEBUG
+ assert(tot < (1 << 24)); /* no overflow below */
+#endif
+
+ if (tot) {
+ const unsigned int p = ((c[0] * Pfac) + (rd ? tot >> 1 : 0)) / tot;
+ probs[t] = p < 256 ? (p ? p : 1) : 255; /* agree w/old version for now */
+ } else
+ probs[t] = vp9_prob_half;
+ } while (++t < tree_len);
+}
+
+vp9_prob vp9_bin_prob_from_distribution(const unsigned int counts[2]) {
+ int tot_count = counts[0] + counts[1];
+ vp9_prob prob;
+ if (tot_count) {
+ prob = (counts[0] * 255 + (tot_count >> 1)) / tot_count;
+ prob += !prob;
+ } else {
+ prob = 128;
+ }
+ return prob;
+}
diff --git a/vp9/common/vp9_treecoder.h b/vp9/common/vp9_treecoder.h
new file mode 100644
index 0000000..bbf7e74
--- /dev/null
+++ b/vp9/common/vp9_treecoder.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_COMMON_VP9_TREECODER_H_
+#define VP9_COMMON_VP9_TREECODER_H_
+
+typedef unsigned char vp9_prob;
+
+#define vp9_prob_half ( (vp9_prob) 128)
+
+typedef signed char vp9_tree_index;
+struct bool_coder_spec;
+
+typedef struct bool_coder_spec bool_coder_spec;
+typedef struct bool_writer bool_writer;
+typedef struct bool_reader bool_reader;
+
+typedef const bool_coder_spec c_bool_coder_spec;
+typedef const bool_writer c_bool_writer;
+typedef const bool_reader c_bool_reader;
+
+
+
+# define vp9_complement( x) (255 - x)
+
+
+/* We build coding trees compactly in arrays.
+ Each node of the tree is a pair of vp9_tree_indices.
+ Array index often references a corresponding probability table.
+ Index <= 0 means done encoding/decoding and value = -Index,
+ Index > 0 means need another bit, specification at index.
+ Nonnegative indices are always even; processing begins at node 0. */
+
+typedef const vp9_tree_index vp9_tree[], *vp9_tree_p;
+
+
+typedef const struct vp9_token_struct {
+ int value;
+ int Len;
+} vp9_token;
+
+/* Construct encoding array from tree. */
+
+void vp9_tokens_from_tree(struct vp9_token_struct *, vp9_tree);
+void vp9_tokens_from_tree_offset(struct vp9_token_struct *, vp9_tree,
+ int offset);
+
+
+/* Convert array of token occurrence counts into a table of probabilities
+ for the associated binary encoding tree. Also writes count of branches
+ taken for each node on the tree; this facilitiates decisions as to
+ probability updates. */
+
+void vp9_tree_probs_from_distribution(
+ int n, /* n = size of alphabet */
+ vp9_token tok [ /* n */ ],
+ vp9_tree tree,
+ vp9_prob probs [ /* n-1 */ ],
+ unsigned int branch_ct [ /* n-1 */ ] [2],
+ const unsigned int num_events[ /* n */ ],
+ unsigned int Pfactor,
+ int Round
+);
+
+static __inline int clip_prob(int p) {
+ if (p > 255)
+ return 255;
+ else if (p < 1)
+ return 1;
+ return p;
+}
+
+vp9_prob vp9_bin_prob_from_distribution(const unsigned int counts[2]);
+
+#endif
diff --git a/vp9/common/vp9_type_aliases.h b/vp9/common/vp9_type_aliases.h
new file mode 100644
index 0000000..47be747
--- /dev/null
+++ b/vp9/common/vp9_type_aliases.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/****************************************************************************
+*
+* Module Title : vp9_type_aliases.h
+*
+* Description : Standard type aliases
+*
+****************************************************************************/
+#ifndef VP9_COMMON_VP9_TYPE_ALIASES_H_
+#define VP9_COMMON_VP9_TYPE_ALIASES_H_
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#define EXPORT
+#define IMPORT extern /* Used to declare imported data & routines */
+#define PRIVATE static /* Used to declare & define module-local data */
+#define LOCAL static /* Used to define all persistent routine-local data */
+#define STD_IN_PATH 0 /* Standard input path */
+#define STD_OUT_PATH 1 /* Standard output path */
+#define STD_ERR_PATH 2 /* Standard error path */
+#define STD_IN_FILE stdin /* Standard input file pointer */
+#define STD_OUT_FILE stdout /* Standard output file pointer */
+#define STD_ERR_FILE stderr /* Standard error file pointer */
+#define max_int 0x7FFFFFFF
+
+#define __export
+#define _export
+
+#define CCONV
+
+#ifndef NULL
+#ifdef __cplusplus
+#define NULL 0
+#else
+#define NULL ((void *)0)
+#endif
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+/****************************************************************************
+* Typedefs
+****************************************************************************/
+#ifndef TYPE_INT8
+#define TYPE_INT8
+typedef signed char INT8;
+#endif
+
+#ifndef TYPE_INT16
+/*#define TYPE_INT16*/
+typedef signed short INT16;
+#endif
+
+#ifndef TYPE_INT32
+/*#define TYPE_INT32*/
+typedef signed int INT32;
+#endif
+
+#ifndef TYPE_UINT8
+/*#define TYPE_UINT8*/
+typedef unsigned char UINT8;
+#endif
+
+#ifndef TYPE_UINT32
+/*#define TYPE_UINT32*/
+typedef unsigned int UINT32;
+#endif
+
+#ifndef TYPE_UINT16
+/*#define TYPE_UINT16*/
+typedef unsigned short UINT16;
+#endif
+
+#ifndef TYPE_BOOL
+/*#define TYPE_BOOL*/
+typedef int BOOL;
+#endif
+
+typedef unsigned char BOOLEAN;
+
+#ifdef _MSC_VER
+typedef __int64 INT64;
+#if _MSC_VER < 1600
+#ifndef INT64_MAX
+#define INT64_MAX LLONG_MAX
+#endif
+#endif
+#else
+
+#ifndef TYPE_INT64
+#ifdef _TMS320C6X
+/* for now we only have 40bits */
+typedef long INT64;
+#else
+typedef long long INT64;
+#endif
+#endif
+
+#endif
+
+/* Floating point */
+typedef double FLOAT64;
+typedef float FLOAT32;
+
+#endif
diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c
new file mode 100644
index 0000000..de1f0fa
--- /dev/null
+++ b/vp9/common/x86/vp9_asm_stubs.c
@@ -0,0 +1,625 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "./vpx_config.h"
+#include "vpx_ports/mem.h"
+#include "vp9/common/vp9_subpixel.h"
+
+extern const short vp9_six_tap_mmx[8][6 * 8];
+
+extern const short vp9_bilinear_filters_8x_mmx[8][2 * 8];
+
+extern void vp9_filter_block1d_h6_mmx(unsigned char *src_ptr,
+ unsigned short *output_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned int pixel_step,
+ unsigned int output_height,
+ unsigned int output_width,
+ const short *vp9_filter);
+
+extern void vp9_filter_block1dc_v6_mmx(unsigned short *src_ptr,
+ unsigned char *output_ptr,
+ int output_pitch,
+ unsigned int pixels_per_line,
+ unsigned int pixel_step,
+ unsigned int output_height,
+ unsigned int output_width,
+ const short *vp9_filter);
+
+extern void vp9_filter_block1d8_h6_sse2(unsigned char *src_ptr,
+ unsigned short *output_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned int pixel_step,
+ unsigned int output_height,
+ unsigned int output_width,
+ const short *vp9_filter);
+
+extern void vp9_filter_block1d16_h6_sse2(unsigned char *src_ptr,
+ unsigned short *output_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned int pixel_step,
+ unsigned int output_height,
+ unsigned int output_width,
+ const short *vp9_filter);
+
+extern void vp9_filter_block1d8_v6_sse2(unsigned short *src_ptr,
+ unsigned char *output_ptr,
+ int dst_ptich,
+ unsigned int pixels_per_line,
+ unsigned int pixel_step,
+ unsigned int output_height,
+ unsigned int output_width,
+ const short *vp9_filter);
+
+extern void vp9_filter_block1d16_v6_sse2(unsigned short *src_ptr,
+ unsigned char *output_ptr,
+ int dst_ptich,
+ unsigned int pixels_per_line,
+ unsigned int pixel_step,
+ unsigned int output_height,
+ unsigned int output_width,
+ const short *vp9_filter);
+
+extern void vp9_unpack_block1d16_h6_sse2(unsigned char *src_ptr,
+ unsigned short *output_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned int output_height,
+ unsigned int output_width);
+
+extern void vp9_filter_block1d8_h6_only_sse2(unsigned char *src_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned char *output_ptr,
+ int dst_pitch,
+ unsigned int output_height,
+ const short *vp9_filter);
+
+extern void vp9_filter_block1d16_h6_only_sse2(unsigned char *src_ptr,
+ unsigned int src_pixels_per_lin,
+ unsigned char *output_ptr,
+ int dst_pitch,
+ unsigned int output_height,
+ const short *vp9_filter);
+
+extern void vp9_filter_block1d8_v6_only_sse2(unsigned char *src_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned char *output_ptr,
+ int dst_pitch,
+ unsigned int output_height,
+ const short *vp9_filter);
+
+extern prototype_subpixel_predict(vp9_bilinear_predict8x8_mmx);
+
+///////////////////////////////////////////////////////////////////////////
+// the mmx function that does the bilinear filtering and var calculation //
+// int one pass //
+///////////////////////////////////////////////////////////////////////////
+DECLARE_ALIGNED(16, const short, vp9_bilinear_filters_mmx[16][8]) = {
+ { 128, 128, 128, 128, 0, 0, 0, 0 },
+ { 120, 120, 120, 120, 8, 8, 8, 8 },
+ { 112, 112, 112, 112, 16, 16, 16, 16 },
+ { 104, 104, 104, 104, 24, 24, 24, 24 },
+ { 96, 96, 96, 96, 32, 32, 32, 32 },
+ { 88, 88, 88, 88, 40, 40, 40, 40 },
+ { 80, 80, 80, 80, 48, 48, 48, 48 },
+ { 72, 72, 72, 72, 56, 56, 56, 56 },
+ { 64, 64, 64, 64, 64, 64, 64, 64 },
+ { 56, 56, 56, 56, 72, 72, 72, 72 },
+ { 48, 48, 48, 48, 80, 80, 80, 80 },
+ { 40, 40, 40, 40, 88, 88, 88, 88 },
+ { 32, 32, 32, 32, 96, 96, 96, 96 },
+ { 24, 24, 24, 24, 104, 104, 104, 104 },
+ { 16, 16, 16, 16, 112, 112, 112, 112 },
+ { 8, 8, 8, 8, 120, 120, 120, 120 }
+};
+
+#if HAVE_MMX
+void vp9_sixtap_predict4x4_mmx(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+#ifdef ANNOUNCE_FUNCTION
+ printf("vp9_sixtap_predict4x4_mmx\n");
+#endif
+ /* Temp data bufffer used in filtering */
+ DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 16 * 16);
+ const short *hfilter, *vfilter;
+ hfilter = vp9_six_tap_mmx[xoffset];
+ vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), fdata2,
+ src_pixels_per_line, 1, 9, 8, hfilter);
+ vfilter = vp9_six_tap_mmx[yoffset];
+ vp9_filter_block1dc_v6_mmx(fdata2 + 8, dst_ptr, dst_pitch,
+ 8, 4, 4, 4, vfilter);
+}
+
+void vp9_sixtap_predict16x16_mmx(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+#ifdef ANNOUNCE_FUNCTION
+ printf("vp9_sixtap_predict16x16_mmx\n");
+#endif
+ /* Temp data bufffer used in filtering */
+ DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 24 * 24);
+ const short *hfilter, *vfilter;
+
+ hfilter = vp9_six_tap_mmx[xoffset];
+ vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line),
+ fdata2, src_pixels_per_line, 1, 21, 32,
+ hfilter);
+ vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4,
+ fdata2 + 4, src_pixels_per_line, 1, 21, 32,
+ hfilter);
+ vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 8,
+ fdata2 + 8, src_pixels_per_line, 1, 21, 32,
+ hfilter);
+ vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 12,
+ fdata2 + 12, src_pixels_per_line, 1, 21, 32,
+ hfilter);
+
+ vfilter = vp9_six_tap_mmx[yoffset];
+ vp9_filter_block1dc_v6_mmx(fdata2 + 32, dst_ptr, dst_pitch,
+ 32, 16, 16, 16, vfilter);
+ vp9_filter_block1dc_v6_mmx(fdata2 + 36, dst_ptr + 4, dst_pitch,
+ 32, 16, 16, 16, vfilter);
+ vp9_filter_block1dc_v6_mmx(fdata2 + 40, dst_ptr + 8, dst_pitch,
+ 32, 16, 16, 16, vfilter);
+ vp9_filter_block1dc_v6_mmx(fdata2 + 44, dst_ptr + 12, dst_pitch,
+ 32, 16, 16, 16, vfilter);
+}
+
+void vp9_sixtap_predict8x8_mmx(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+#ifdef ANNOUNCE_FUNCTION
+ printf("vp9_sixtap_predict8x8_mmx\n");
+#endif
+ /* Temp data bufffer used in filtering */
+ DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 256);
+ const short *hfilter, *vfilter;
+
+ hfilter = vp9_six_tap_mmx[xoffset];
+ vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line),
+ fdata2, src_pixels_per_line, 1, 13, 16,
+ hfilter);
+ vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4,
+ fdata2 + 4, src_pixels_per_line, 1, 13, 16,
+ hfilter);
+
+ vfilter = vp9_six_tap_mmx[yoffset];
+ vp9_filter_block1dc_v6_mmx(fdata2 + 16, dst_ptr, dst_pitch,
+ 16, 8, 8, 8, vfilter);
+ vp9_filter_block1dc_v6_mmx(fdata2 + 20, dst_ptr + 4, dst_pitch,
+ 16, 8, 8, 8, vfilter);
+}
+
+void vp9_sixtap_predict8x4_mmx(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+#ifdef ANNOUNCE_FUNCTION
+ printf("vp9_sixtap_predict8x4_mmx\n");
+#endif
+ /* Temp data bufffer used in filtering */
+ DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 256);
+ const short *hfilter, *vfilter;
+
+ hfilter = vp9_six_tap_mmx[xoffset];
+ vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line),
+ fdata2, src_pixels_per_line, 1, 9, 16, hfilter);
+ vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4,
+ fdata2 + 4, src_pixels_per_line, 1, 9, 16, hfilter);
+
+ vfilter = vp9_six_tap_mmx[yoffset];
+ vp9_filter_block1dc_v6_mmx(fdata2 + 16, dst_ptr, dst_pitch,
+ 16, 8, 4, 8, vfilter);
+ vp9_filter_block1dc_v6_mmx(fdata2 + 20, dst_ptr + 4, dst_pitch,
+ 16, 8, 4, 8, vfilter);
+}
+
+void vp9_bilinear_predict16x16_mmx(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ vp9_bilinear_predict8x8_mmx(src_ptr,
+ src_pixels_per_line, xoffset, yoffset,
+ dst_ptr, dst_pitch);
+ vp9_bilinear_predict8x8_mmx(src_ptr + 8,
+ src_pixels_per_line, xoffset, yoffset,
+ dst_ptr + 8, dst_pitch);
+ vp9_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line,
+ src_pixels_per_line, xoffset, yoffset,
+ dst_ptr + dst_pitch * 8, dst_pitch);
+ vp9_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line + 8,
+ src_pixels_per_line, xoffset, yoffset,
+ dst_ptr + dst_pitch * 8 + 8, dst_pitch);
+}
+#endif
+
+#if HAVE_SSE2
+void vp9_sixtap_predict16x16_sse2(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ /* Temp data bufffer used in filtering */
+ DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 24 * 24);
+ const short *hfilter, *vfilter;
+#ifdef ANNOUNCE_FUNCTION
+ printf("vp9_sixtap_predict16x16_sse2\n");
+#endif
+
+ if (xoffset) {
+ if (yoffset) {
+ hfilter = vp9_six_tap_mmx[xoffset];
+ vp9_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), fdata2,
+ src_pixels_per_line, 1, 21, 32, hfilter);
+ vfilter = vp9_six_tap_mmx[yoffset];
+ vp9_filter_block1d16_v6_sse2(fdata2 + 32, dst_ptr, dst_pitch,
+ 32, 16, 16, dst_pitch, vfilter);
+ } else {
+ /* First-pass only */
+ hfilter = vp9_six_tap_mmx[xoffset];
+ vp9_filter_block1d16_h6_only_sse2(src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pitch, 16, hfilter);
+ }
+ } else {
+ /* Second-pass only */
+ vfilter = vp9_six_tap_mmx[yoffset];
+ vp9_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), fdata2,
+ src_pixels_per_line, 21, 32);
+ vp9_filter_block1d16_v6_sse2(fdata2 + 32, dst_ptr, dst_pitch,
+ 32, 16, 16, dst_pitch, vfilter);
+ }
+}
+
+void vp9_sixtap_predict8x8_sse2(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ /* Temp data bufffer used in filtering */
+ DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 256);
+ const short *hfilter, *vfilter;
+#ifdef ANNOUNCE_FUNCTION
+ printf("vp9_sixtap_predict8x8_sse2\n");
+#endif
+
+ if (xoffset) {
+ if (yoffset) {
+ hfilter = vp9_six_tap_mmx[xoffset];
+ vp9_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), fdata2,
+ src_pixels_per_line, 1, 13, 16, hfilter);
+ vfilter = vp9_six_tap_mmx[yoffset];
+ vp9_filter_block1d8_v6_sse2(fdata2 + 16, dst_ptr, dst_pitch,
+ 16, 8, 8, dst_pitch, vfilter);
+ } else {
+ /* First-pass only */
+ hfilter = vp9_six_tap_mmx[xoffset];
+ vp9_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pitch, 8, hfilter);
+ }
+ } else {
+ /* Second-pass only */
+ vfilter = vp9_six_tap_mmx[yoffset];
+ vp9_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line,
+ dst_ptr, dst_pitch, 8, vfilter);
+ }
+}
+
+void vp9_sixtap_predict8x4_sse2(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ /* Temp data bufffer used in filtering */
+ DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 256);
+ const short *hfilter, *vfilter;
+#ifdef ANNOUNCE_FUNCTION
+ printf("vp9_sixtap_predict8x4_sse2\n");
+#endif
+
+ if (xoffset) {
+ if (yoffset) {
+ hfilter = vp9_six_tap_mmx[xoffset];
+ vp9_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), fdata2,
+ src_pixels_per_line, 1, 9, 16, hfilter);
+ vfilter = vp9_six_tap_mmx[yoffset];
+ vp9_filter_block1d8_v6_sse2(fdata2 + 16, dst_ptr, dst_pitch,
+ 16, 8, 4, dst_pitch, vfilter);
+ } else {
+ /* First-pass only */
+ hfilter = vp9_six_tap_mmx[xoffset];
+ vp9_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pitch, 4, hfilter);
+ }
+ } else {
+ /* Second-pass only */
+ vfilter = vp9_six_tap_mmx[yoffset];
+ vp9_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line,
+ dst_ptr, dst_pitch, 4, vfilter);
+ }
+}
+#endif
+
+#if HAVE_SSSE3
+extern void vp9_filter_block1d8_h6_ssse3(unsigned char *src_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned char *output_ptr,
+ unsigned int output_pitch,
+ unsigned int output_height,
+ unsigned int vp9_filter_index);
+
+extern void vp9_filter_block1d16_h6_ssse3(unsigned char *src_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned char *output_ptr,
+ unsigned int output_pitch,
+ unsigned int output_height,
+ unsigned int vp9_filter_index);
+
+extern void vp9_filter_block1d16_v6_ssse3(unsigned char *src_ptr,
+ unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ unsigned int vp9_filter_index);
+
+extern void vp9_filter_block1d8_v6_ssse3(unsigned char *src_ptr,
+ unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ unsigned int vp9_filter_index);
+
+extern void vp9_filter_block1d4_h6_ssse3(unsigned char *src_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned char *output_ptr,
+ unsigned int output_pitch,
+ unsigned int output_height,
+ unsigned int vp9_filter_index);
+
+extern void vp9_filter_block1d4_v6_ssse3(unsigned char *src_ptr,
+ unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ unsigned int vp9_filter_index);
+
+void vp9_sixtap_predict16x16_ssse3(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 24 * 24);
+#ifdef ANNOUNCE_FUNCTION
+ printf("vp9_sixtap_predict16x16_ssse3\n");
+#endif
+
+ if (xoffset) {
+ if (yoffset) {
+ vp9_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line,
+ fdata2, 16, 21, xoffset);
+ vp9_filter_block1d16_v6_ssse3(fdata2, 16, dst_ptr, dst_pitch,
+ 16, yoffset);
+ } else {
+ /* First-pass only */
+ vp9_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pitch, 16, xoffset);
+ }
+ } else {
+ /* Second-pass only */
+ vp9_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line,
+ dst_ptr, dst_pitch, 16, yoffset);
+ }
+}
+
+void vp9_sixtap_predict8x8_ssse3(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 256);
+#ifdef ANNOUNCE_FUNCTION
+ printf("vp9_sixtap_predict8x8_ssse3\n");
+#endif
+
+ if (xoffset) {
+ if (yoffset) {
+ vp9_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line, fdata2, 8, 13, xoffset);
+ vp9_filter_block1d8_v6_ssse3(fdata2, 8, dst_ptr, dst_pitch, 8, yoffset);
+ } else {
+ vp9_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pitch, 8, xoffset);
+ }
+ } else {
+ /* Second-pass only */
+ vp9_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line,
+ dst_ptr, dst_pitch, 8, yoffset);
+ }
+}
+
+void vp9_sixtap_predict8x4_ssse3(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 256);
+#ifdef ANNOUNCE_FUNCTION
+ printf("vp9_sixtap_predict8x4_ssse3\n");
+#endif
+
+ if (xoffset) {
+ if (yoffset) {
+ vp9_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line, fdata2, 8, 9, xoffset);
+ vp9_filter_block1d8_v6_ssse3(fdata2, 8, dst_ptr, dst_pitch, 4, yoffset);
+ } else {
+ /* First-pass only */
+ vp9_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pitch, 4, xoffset);
+ }
+ } else {
+ /* Second-pass only */
+ vp9_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line,
+ dst_ptr, dst_pitch, 4, yoffset);
+ }
+}
+
+void vp9_sixtap_predict4x4_ssse3(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 4 * 9);
+#ifdef ANNOUNCE_FUNCTION
+ printf("vp9_sixtap_predict4x4_ssse3\n");
+#endif
+
+ if (xoffset) {
+ if (yoffset) {
+ vp9_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line, fdata2, 4, 9, xoffset);
+ vp9_filter_block1d4_v6_ssse3(fdata2, 4, dst_ptr, dst_pitch, 4, yoffset);
+ } else {
+ vp9_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pitch, 4, xoffset);
+ }
+ } else {
+ vp9_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line,
+ dst_ptr, dst_pitch, 4, yoffset);
+ }
+}
+
+void vp9_filter_block1d16_v8_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ const short *filter);
+
+void vp9_filter_block1d16_h8_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ const short *filter);
+
+void vp9_filter_block2d_16x16_8_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_stride,
+ const short *hfilter_aligned16,
+ const short *vfilter_aligned16,
+ unsigned char *dst_ptr,
+ unsigned int dst_stride) {
+ if (hfilter_aligned16[3] != 128 && vfilter_aligned16[3] != 128) {
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 23 * 16);
+
+ vp9_filter_block1d16_h8_ssse3(src_ptr - (3 * src_stride), src_stride,
+ fdata2, 16, 23, hfilter_aligned16);
+ vp9_filter_block1d16_v8_ssse3(fdata2, 16, dst_ptr, dst_stride, 16,
+ vfilter_aligned16);
+ } else {
+ if (hfilter_aligned16[3] != 128) {
+ vp9_filter_block1d16_h8_ssse3(src_ptr, src_stride, dst_ptr, dst_stride,
+ 16, hfilter_aligned16);
+ } else {
+ vp9_filter_block1d16_v8_ssse3(src_ptr - (3 * src_stride), src_stride,
+ dst_ptr, dst_stride, 16, vfilter_aligned16);
+ }
+ }
+}
+
+void vp9_filter_block1d8_v8_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ const short *filter);
+
+void vp9_filter_block1d8_h8_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ const short *filter);
+
+void vp9_filter_block2d_8x8_8_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_stride,
+ const short *hfilter_aligned16,
+ const short *vfilter_aligned16,
+ unsigned char *dst_ptr,
+ unsigned int dst_stride) {
+ if (hfilter_aligned16[3] != 128 && vfilter_aligned16[3] != 128) {
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 23 * 16);
+
+ vp9_filter_block1d8_h8_ssse3(src_ptr - (3 * src_stride), src_stride,
+ fdata2, 16, 15, hfilter_aligned16);
+ vp9_filter_block1d8_v8_ssse3(fdata2, 16, dst_ptr, dst_stride, 8,
+ vfilter_aligned16);
+ } else {
+ if (hfilter_aligned16[3] != 128) {
+ vp9_filter_block1d8_h8_ssse3(src_ptr, src_stride, dst_ptr, dst_stride, 8,
+ hfilter_aligned16);
+ } else {
+ vp9_filter_block1d8_v8_ssse3(src_ptr - (3 * src_stride), src_stride,
+ dst_ptr, dst_stride, 8, vfilter_aligned16);
+ }
+ }
+}
+
+void vp9_filter_block2d_8x4_8_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_stride,
+ const short *hfilter_aligned16,
+ const short *vfilter_aligned16,
+ unsigned char *dst_ptr,
+ unsigned int dst_stride) {
+ if (hfilter_aligned16[3] !=128 && vfilter_aligned16[3] != 128) {
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 23 * 16);
+
+ vp9_filter_block1d8_h8_ssse3(src_ptr - (3 * src_stride), src_stride,
+ fdata2, 16, 11, hfilter_aligned16);
+ vp9_filter_block1d8_v8_ssse3(fdata2, 16, dst_ptr, dst_stride, 4,
+ vfilter_aligned16);
+ } else {
+ if (hfilter_aligned16[3] != 128) {
+ vp9_filter_block1d8_h8_ssse3(src_ptr, src_stride, dst_ptr, dst_stride, 4,
+ hfilter_aligned16);
+ } else {
+ vp9_filter_block1d8_v8_ssse3(src_ptr - (3 * src_stride), src_stride,
+ dst_ptr, dst_stride, 4, vfilter_aligned16);
+ }
+ }
+}
+#endif
diff --git a/vp9/common/x86/vp9_filter_sse2.c b/vp9/common/x86/vp9_filter_sse2.c
new file mode 100644
index 0000000..8e02ac1
--- /dev/null
+++ b/vp9/common/x86/vp9_filter_sse2.c
@@ -0,0 +1,290 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h> // for alignment checks
+#include <emmintrin.h> // SSE2
+#include "vp9/common/vp9_filter.h"
+#include "vpx_ports/emmintrin_compat.h"
+#include "vpx_ports/mem.h" // for DECLARE_ALIGNED
+#include "vp9_rtcd.h"
+
+// TODO(cd): After cleanup, commit faster versions for non 4x4 size. This is
+// just a quick partial snapshot so that other can already use some
+// speedup.
+// TODO(cd): Use vectorized 8 tap filtering code as speedup to pure C 6 tap
+// filtering.
+// TODO(cd): Add some comments, better variable naming.
+// TODO(cd): Maybe use _mm_maddubs_epi16 if smaller filter coeficients (no sum
+// of positive above 128), or have higher precision filter
+// coefficients.
+
+DECLARE_ALIGNED(16, static const unsigned int, rounding_c[4]) = {
+ VP9_FILTER_WEIGHT >> 1,
+ VP9_FILTER_WEIGHT >> 1,
+ VP9_FILTER_WEIGHT >> 1,
+ VP9_FILTER_WEIGHT >> 1,
+};
+
+// Creating a macro to do more than four pixels at once to hide instruction
+// latency is actually slower :-(
+#define DO_FOUR_PIXELS(result, src_ptr, offset) \
+ { \
+ /* Do shifted load to achieve require shuffles through unpacking */ \
+ const __m128i src0 = _mm_loadu_si128((const __m128i *)(src_ptr + offset + 0)); \
+ const __m128i src1 = _mm_loadu_si128((const __m128i *)(src_ptr + offset + 1)); \
+ const __m128i src2 = _mm_loadu_si128((const __m128i *)(src_ptr + offset + 2)); \
+ const __m128i src3 = _mm_loadu_si128((const __m128i *)(src_ptr + offset + 3)); \
+ const __m128i src01 = _mm_unpacklo_epi8(src0, src1); \
+ const __m128i src01_16 = _mm_unpacklo_epi8(src01, zero); \
+ const __m128i src23 = _mm_unpacklo_epi8(src2, src3); \
+ const __m128i src23_16 = _mm_unpacklo_epi8(src23, zero); \
+ /* Shit by 4 bytes through suffle to get additional shifted loads */ \
+ const __m128i src4 = _mm_shuffle_epi32(src0, _MM_SHUFFLE(3, 3, 2, 1)); \
+ const __m128i src5 = _mm_shuffle_epi32(src1, _MM_SHUFFLE(3, 3, 2, 1)); \
+ const __m128i src6 = _mm_shuffle_epi32(src2, _MM_SHUFFLE(3, 3, 2, 1)); \
+ const __m128i src7 = _mm_shuffle_epi32(src3, _MM_SHUFFLE(3, 3, 2, 1)); \
+ const __m128i src45 = _mm_unpacklo_epi8(src4, src5); \
+ const __m128i src45_16 = _mm_unpacklo_epi8(src45, zero); \
+ const __m128i src67 = _mm_unpacklo_epi8(src6, src7); \
+ const __m128i src67_16 = _mm_unpacklo_epi8(src67, zero); \
+ /* multiply accumulate them */ \
+ const __m128i mad01 = _mm_madd_epi16(src01_16, fil01); \
+ const __m128i mad23 = _mm_madd_epi16(src23_16, fil23); \
+ const __m128i mad45 = _mm_madd_epi16(src45_16, fil45); \
+ const __m128i mad67 = _mm_madd_epi16(src67_16, fil67); \
+ const __m128i mad0123 = _mm_add_epi32(mad01, mad23); \
+ const __m128i mad4567 = _mm_add_epi32(mad45, mad67); \
+ __m128i mad_all = _mm_add_epi32(mad0123, mad4567); \
+ mad_all = _mm_add_epi32(mad_all, rounding); \
+ result = _mm_srai_epi32(mad_all, VP9_FILTER_SHIFT); \
+ }
+
+void vp9_filter_block2d_4x4_8_sse2
+(
+ const unsigned char *src_ptr, const unsigned int src_stride,
+ const short *HFilter_aligned16, const short *VFilter_aligned16,
+ unsigned char *dst_ptr, unsigned int dst_stride
+) {
+ __m128i intermediateA, intermediateB, intermediateC;
+
+ const int kInterp_Extend = 4;
+
+ const __m128i zero = _mm_set1_epi16(0);
+ const __m128i rounding = _mm_load_si128((const __m128i *)rounding_c);
+
+ // check alignment
+ assert(0 == ((long)HFilter_aligned16)%16);
+ assert(0 == ((long)VFilter_aligned16)%16);
+
+ {
+ __m128i transpose3_0;
+ __m128i transpose3_1;
+ __m128i transpose3_2;
+ __m128i transpose3_3;
+
+ // Horizontal pass (src -> intermediate).
+ {
+ const __m128i HFilter = _mm_load_si128((const __m128i *)HFilter_aligned16);
+ // get first two columns filter coefficients
+ __m128i fil01 = _mm_shuffle_epi32(HFilter, _MM_SHUFFLE(0, 0, 0, 0));
+ __m128i fil23 = _mm_shuffle_epi32(HFilter, _MM_SHUFFLE(1, 1, 1, 1));
+ __m128i fil45 = _mm_shuffle_epi32(HFilter, _MM_SHUFFLE(2, 2, 2, 2));
+ __m128i fil67 = _mm_shuffle_epi32(HFilter, _MM_SHUFFLE(3, 3, 3, 3));
+ src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
+
+ {
+ __m128i mad_all0;
+ __m128i mad_all1;
+ __m128i mad_all2;
+ __m128i mad_all3;
+ DO_FOUR_PIXELS(mad_all0, src_ptr, 0*src_stride)
+ DO_FOUR_PIXELS(mad_all1, src_ptr, 1*src_stride)
+ DO_FOUR_PIXELS(mad_all2, src_ptr, 2*src_stride)
+ DO_FOUR_PIXELS(mad_all3, src_ptr, 3*src_stride)
+ mad_all0 = _mm_packs_epi32(mad_all0, mad_all1);
+ mad_all2 = _mm_packs_epi32(mad_all2, mad_all3);
+ intermediateA = _mm_packus_epi16(mad_all0, mad_all2);
+ // --
+ src_ptr += src_stride*4;
+ // --
+ DO_FOUR_PIXELS(mad_all0, src_ptr, 0*src_stride)
+ DO_FOUR_PIXELS(mad_all1, src_ptr, 1*src_stride)
+ DO_FOUR_PIXELS(mad_all2, src_ptr, 2*src_stride)
+ DO_FOUR_PIXELS(mad_all3, src_ptr, 3*src_stride)
+ mad_all0 = _mm_packs_epi32(mad_all0, mad_all1);
+ mad_all2 = _mm_packs_epi32(mad_all2, mad_all3);
+ intermediateB = _mm_packus_epi16(mad_all0, mad_all2);
+ // --
+ src_ptr += src_stride*4;
+ // --
+ DO_FOUR_PIXELS(mad_all0, src_ptr, 0*src_stride)
+ DO_FOUR_PIXELS(mad_all1, src_ptr, 1*src_stride)
+ DO_FOUR_PIXELS(mad_all2, src_ptr, 2*src_stride)
+ mad_all0 = _mm_packs_epi32(mad_all0, mad_all1);
+ mad_all2 = _mm_packs_epi32(mad_all2, mad_all2);
+ intermediateC = _mm_packus_epi16(mad_all0, mad_all2);
+ }
+ }
+
+ // Transpose result (intermediate -> transpose3_x)
+ {
+ // 00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33
+ // 40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73
+ // 80 81 82 83 90 91 92 93 A0 A1 A2 A3 xx xx xx xx
+ const __m128i transpose0_0 = _mm_unpacklo_epi8(intermediateA, intermediateB);
+ const __m128i transpose0_1 = _mm_unpackhi_epi8(intermediateA, intermediateB);
+ const __m128i transpose0_2 = _mm_unpacklo_epi8(intermediateC, intermediateC);
+ const __m128i transpose0_3 = _mm_unpackhi_epi8(intermediateC, intermediateC);
+ // 00 40 01 41 02 42 03 43 10 50 11 51 12 52 13 53
+ // 20 60 21 61 22 62 23 63 30 70 31 71 32 72 33 73
+ // 80 xx 81 xx 82 xx 83 xx 90 xx 91 xx 92 xx 93 xx
+ // A0 xx A1 xx A2 xx A3 xx xx xx xx xx xx xx xx xx
+ const __m128i transpose1_0 = _mm_unpacklo_epi8(transpose0_0, transpose0_1);
+ const __m128i transpose1_1 = _mm_unpackhi_epi8(transpose0_0, transpose0_1);
+ const __m128i transpose1_2 = _mm_unpacklo_epi8(transpose0_2, transpose0_3);
+ const __m128i transpose1_3 = _mm_unpackhi_epi8(transpose0_2, transpose0_3);
+ // 00 20 40 60 01 21 41 61 02 22 42 62 03 23 43 63
+ // 10 30 50 70 11 31 51 71 12 32 52 72 13 33 53 73
+ // 80 A0 xx xx 81 A1 xx xx 82 A2 xx xx 83 A3 xx xx
+ // 90 xx xx xx 91 xx xx xx 92 xx xx xx 93 xx xx xx
+ const __m128i transpose2_0 = _mm_unpacklo_epi8(transpose1_0, transpose1_1);
+ const __m128i transpose2_1 = _mm_unpackhi_epi8(transpose1_0, transpose1_1);
+ const __m128i transpose2_2 = _mm_unpacklo_epi8(transpose1_2, transpose1_3);
+ const __m128i transpose2_3 = _mm_unpackhi_epi8(transpose1_2, transpose1_3);
+ // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71
+ // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73
+ // 80 90 A0 xx xx xx xx xx 81 91 A1 xx xx xx xx xx
+ // 82 92 A2 xx xx xx xx xx 83 93 A3 xx xx xx xx xx
+ transpose3_0 = _mm_castps_si128(
+ _mm_shuffle_ps(_mm_castsi128_ps(transpose2_0),
+ _mm_castsi128_ps(transpose2_2),
+ _MM_SHUFFLE(1, 0, 1, 0)));
+ transpose3_1 = _mm_castps_si128(
+ _mm_shuffle_ps(_mm_castsi128_ps(transpose2_0),
+ _mm_castsi128_ps(transpose2_2),
+ _MM_SHUFFLE(3, 2, 3, 2)));
+ transpose3_2 = _mm_castps_si128(
+ _mm_shuffle_ps(_mm_castsi128_ps(transpose2_1),
+ _mm_castsi128_ps(transpose2_3),
+ _MM_SHUFFLE(1, 0, 1, 0)));
+ transpose3_3 = _mm_castps_si128(
+ _mm_shuffle_ps(_mm_castsi128_ps(transpose2_1),
+ _mm_castsi128_ps(transpose2_3),
+ _MM_SHUFFLE(3, 2, 3, 2)));
+ // 00 10 20 30 40 50 60 70 80 90 A0 xx xx xx xx xx
+ // 01 11 21 31 41 51 61 71 81 91 A1 xx xx xx xx xx
+ // 02 12 22 32 42 52 62 72 82 92 A2 xx xx xx xx xx
+ // 03 13 23 33 43 53 63 73 83 93 A3 xx xx xx xx xx
+ }
+
+ // Vertical pass (transpose3_x -> dst).
+ {
+ const __m128i VFilter = _mm_load_si128((const __m128i *)VFilter_aligned16);
+ // get first two columns filter coefficients
+ __m128i fil01 = _mm_shuffle_epi32(VFilter, _MM_SHUFFLE(0, 0, 0, 0));
+ __m128i fil23 = _mm_shuffle_epi32(VFilter, _MM_SHUFFLE(1, 1, 1, 1));
+ __m128i fil45 = _mm_shuffle_epi32(VFilter, _MM_SHUFFLE(2, 2, 2, 2));
+ __m128i fil67 = _mm_shuffle_epi32(VFilter, _MM_SHUFFLE(3, 3, 3, 3));
+ __m128i col0, col1, col2, col3;
+ DECLARE_ALIGNED(16, unsigned char, temp[32]);
+ {
+ _mm_store_si128((__m128i *)temp, transpose3_0);
+ DO_FOUR_PIXELS(col0, temp, 0);
+ }
+ {
+ _mm_store_si128((__m128i *)temp, transpose3_1);
+ DO_FOUR_PIXELS(col1, temp, 0);
+ }
+ {
+ _mm_store_si128((__m128i *)temp, transpose3_2);
+ DO_FOUR_PIXELS(col2, temp, 0);
+ }
+ {
+ _mm_store_si128((__m128i *)temp, transpose3_3);
+ DO_FOUR_PIXELS(col3, temp, 0);
+ }
+ // transpose
+ {
+ __m128i T0 = _mm_unpacklo_epi32(col0, col1);
+ __m128i T1 = _mm_unpacklo_epi32(col2, col3);
+ __m128i T2 = _mm_unpackhi_epi32(col0, col1);
+ __m128i T3 = _mm_unpackhi_epi32(col2, col3);
+ col0 = _mm_unpacklo_epi64(T0, T1);
+ col1 = _mm_unpackhi_epi64(T0, T1);
+ col2 = _mm_unpacklo_epi64(T2, T3);
+ col3 = _mm_unpackhi_epi64(T2, T3);
+ }
+ // saturate to 8 bit
+ {
+ col0 = _mm_packs_epi32(col0, col0);
+ col0 = _mm_packus_epi16(col0, col0);
+ col1 = _mm_packs_epi32(col1, col1);
+ col1 = _mm_packus_epi16(col1, col1);
+ col2 = _mm_packs_epi32 (col2, col2);
+ col2 = _mm_packus_epi16(col2, col2);
+ col3 = _mm_packs_epi32 (col3, col3);
+ col3 = _mm_packus_epi16(col3, col3);
+ }
+ // store
+ {
+ *((unsigned int *)&dst_ptr[dst_stride * 0]) = _mm_cvtsi128_si32(col0);
+ *((unsigned int *)&dst_ptr[dst_stride * 1]) = _mm_cvtsi128_si32(col1);
+ *((unsigned int *)&dst_ptr[dst_stride * 2]) = _mm_cvtsi128_si32(col2);
+ *((unsigned int *)&dst_ptr[dst_stride * 3]) = _mm_cvtsi128_si32(col3);
+ }
+ }
+ }
+}
+
+void vp9_filter_block2d_8x4_8_sse2
+(
+ const unsigned char *src_ptr, const unsigned int src_stride,
+ const short *HFilter_aligned16, const short *VFilter_aligned16,
+ unsigned char *dst_ptr, unsigned int dst_stride
+) {
+ int j;
+ for (j=0; j<8; j+=4) {
+ vp9_filter_block2d_4x4_8_sse2(src_ptr + j, src_stride,
+ HFilter_aligned16, VFilter_aligned16,
+ dst_ptr + j, dst_stride);
+ }
+}
+
+void vp9_filter_block2d_8x8_8_sse2
+(
+ const unsigned char *src_ptr, const unsigned int src_stride,
+ const short *HFilter_aligned16, const short *VFilter_aligned16,
+ unsigned char *dst_ptr, unsigned int dst_stride
+) {
+ int i, j;
+ for (i=0; i<8; i+=4) {
+ for (j=0; j<8; j+=4) {
+ vp9_filter_block2d_4x4_8_sse2(src_ptr + j + i*src_stride, src_stride,
+ HFilter_aligned16, VFilter_aligned16,
+ dst_ptr + j + i*dst_stride, dst_stride);
+ }
+ }
+}
+
+void vp9_filter_block2d_16x16_8_sse2
+(
+ const unsigned char *src_ptr, const unsigned int src_stride,
+ const short *HFilter_aligned16, const short *VFilter_aligned16,
+ unsigned char *dst_ptr, unsigned int dst_stride
+) {
+ int i, j;
+ for (i=0; i<16; i+=4) {
+ for (j=0; j<16; j+=4) {
+ vp9_filter_block2d_4x4_8_sse2(src_ptr + j + i*src_stride, src_stride,
+ HFilter_aligned16, VFilter_aligned16,
+ dst_ptr + j + i*dst_stride, dst_stride);
+ }
+ }
+}
diff --git a/vp9/common/x86/vp9_filter_sse4.c b/vp9/common/x86/vp9_filter_sse4.c
new file mode 100644
index 0000000..52c35b2
--- /dev/null
+++ b/vp9/common/x86/vp9_filter_sse4.c
@@ -0,0 +1,362 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h> // for alignment checks
+#include <smmintrin.h> // SSE4.1
+#include "vp9/common/vp9_filter.h"
+#include "vpx_ports/mem.h" // for DECLARE_ALIGNED
+#include "vp9_rtcd.h"
+
+// TODO(cd): After cleanup, commit faster versions for non 4x4 size. This is
+// just a quick partial snapshot so that other can already use some
+// speedup.
+// TODO(cd): Use vectorized 8 tap filtering code as speedup to pure C 6 tap
+// filtering.
+// TODO(cd): Reduce source size by using macros instead of current code
+// duplication.
+// TODO(cd): Add some comments, better variable naming.
+// TODO(cd): Maybe use _mm_maddubs_epi16 if smaller filter coeficients (no sum
+// of positive above 128), or have higher precision filter
+// coefficients.
+
+DECLARE_ALIGNED(16, static const unsigned char, mask0123_c[16]) = {
+ 0x00, 0x01,
+ 0x01, 0x02,
+ 0x02, 0x03,
+ 0x03, 0x04,
+ 0x02, 0x03,
+ 0x03, 0x04,
+ 0x04, 0x05,
+ 0x05, 0x06,
+};
+DECLARE_ALIGNED(16, static const unsigned char, mask4567_c[16]) = {
+ 0x04, 0x05,
+ 0x05, 0x06,
+ 0x06, 0x07,
+ 0x07, 0x08,
+ 0x06, 0x07,
+ 0x07, 0x08,
+ 0x08, 0x09,
+ 0x09, 0x0A,
+};
+DECLARE_ALIGNED(16, static const unsigned int, rounding_c[4]) = {
+ VP9_FILTER_WEIGHT >> 1,
+ VP9_FILTER_WEIGHT >> 1,
+ VP9_FILTER_WEIGHT >> 1,
+ VP9_FILTER_WEIGHT >> 1,
+};
+DECLARE_ALIGNED(16, static const unsigned char, transpose_c[16]) = {
+ 0, 4, 8, 12,
+ 1, 5, 9, 13,
+ 2, 6, 10, 14,
+ 3, 7, 11, 15
+};
+
+// Creating a macro to do more than four pixels at once to hide instruction
+// latency is actually slower :-(
+#define DO_FOUR_PIXELS(result, offset) \
+ { \
+ /*load pixels*/ \
+ __m128i src = _mm_loadu_si128((const __m128i *)(src_ptr + offset)); \
+ /* extract the ones used for first column */ \
+ __m128i src0123 = _mm_shuffle_epi8(src, mask0123); \
+ __m128i src4567 = _mm_shuffle_epi8(src, mask4567); \
+ __m128i src01_16 = _mm_unpacklo_epi8(src0123, zero); \
+ __m128i src23_16 = _mm_unpackhi_epi8(src0123, zero); \
+ __m128i src45_16 = _mm_unpacklo_epi8(src4567, zero); \
+ __m128i src67_16 = _mm_unpackhi_epi8(src4567, zero); \
+ /* multiply accumulate them */ \
+ __m128i mad01 = _mm_madd_epi16(src01_16, fil01); \
+ __m128i mad23 = _mm_madd_epi16(src23_16, fil23); \
+ __m128i mad45 = _mm_madd_epi16(src45_16, fil45); \
+ __m128i mad67 = _mm_madd_epi16(src67_16, fil67); \
+ __m128i mad0123 = _mm_add_epi32(mad01, mad23); \
+ __m128i mad4567 = _mm_add_epi32(mad45, mad67); \
+ __m128i mad_all = _mm_add_epi32(mad0123, mad4567); \
+ mad_all = _mm_add_epi32(mad_all, rounding); \
+ result = _mm_srai_epi32(mad_all, VP9_FILTER_SHIFT); \
+ }
+
+void vp9_filter_block2d_4x4_8_sse4_1
+(
+ const unsigned char *src_ptr, const unsigned int src_stride,
+ const short *HFilter_aligned16, const short *VFilter_aligned16,
+ unsigned char *dst_ptr, unsigned int dst_stride
+) {
+ __m128i intermediateA, intermediateB, intermediateC;
+
+ const int kInterp_Extend = 4;
+
+ const __m128i zero = _mm_set1_epi16(0);
+ const __m128i mask0123 = _mm_load_si128((const __m128i *)mask0123_c);
+ const __m128i mask4567 = _mm_load_si128((const __m128i *)mask4567_c);
+ const __m128i rounding = _mm_load_si128((const __m128i *)rounding_c);
+ const __m128i transpose = _mm_load_si128((const __m128i *)transpose_c);
+
+ // check alignment
+ assert(0 == ((long)HFilter_aligned16)%16);
+ assert(0 == ((long)VFilter_aligned16)%16);
+
+ {
+ __m128i transpose3_0;
+ __m128i transpose3_1;
+ __m128i transpose3_2;
+ __m128i transpose3_3;
+
+ // Horizontal pass (src -> intermediate).
+ {
+ const __m128i HFilter = _mm_load_si128((const __m128i *)HFilter_aligned16);
+ // get first two columns filter coefficients
+ __m128i fil01 = _mm_shuffle_epi32(HFilter, _MM_SHUFFLE(0, 0, 0, 0));
+ __m128i fil23 = _mm_shuffle_epi32(HFilter, _MM_SHUFFLE(1, 1, 1, 1));
+ __m128i fil45 = _mm_shuffle_epi32(HFilter, _MM_SHUFFLE(2, 2, 2, 2));
+ __m128i fil67 = _mm_shuffle_epi32(HFilter, _MM_SHUFFLE(3, 3, 3, 3));
+ src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
+
+ {
+ __m128i mad_all0;
+ __m128i mad_all1;
+ __m128i mad_all2;
+ __m128i mad_all3;
+ DO_FOUR_PIXELS(mad_all0, 0*src_stride)
+ DO_FOUR_PIXELS(mad_all1, 1*src_stride)
+ DO_FOUR_PIXELS(mad_all2, 2*src_stride)
+ DO_FOUR_PIXELS(mad_all3, 3*src_stride)
+ mad_all0 = _mm_packs_epi32(mad_all0, mad_all1);
+ mad_all2 = _mm_packs_epi32(mad_all2, mad_all3);
+ intermediateA = _mm_packus_epi16(mad_all0, mad_all2);
+ // --
+ src_ptr += src_stride*4;
+ // --
+ DO_FOUR_PIXELS(mad_all0, 0*src_stride)
+ DO_FOUR_PIXELS(mad_all1, 1*src_stride)
+ DO_FOUR_PIXELS(mad_all2, 2*src_stride)
+ DO_FOUR_PIXELS(mad_all3, 3*src_stride)
+ mad_all0 = _mm_packs_epi32(mad_all0, mad_all1);
+ mad_all2 = _mm_packs_epi32(mad_all2, mad_all3);
+ intermediateB = _mm_packus_epi16(mad_all0, mad_all2);
+ // --
+ src_ptr += src_stride*4;
+ // --
+ DO_FOUR_PIXELS(mad_all0, 0*src_stride)
+ DO_FOUR_PIXELS(mad_all1, 1*src_stride)
+ DO_FOUR_PIXELS(mad_all2, 2*src_stride)
+ mad_all0 = _mm_packs_epi32(mad_all0, mad_all1);
+ mad_all2 = _mm_packs_epi32(mad_all2, mad_all2);
+ intermediateC = _mm_packus_epi16(mad_all0, mad_all2);
+ }
+ }
+
+ // Transpose result (intermediate -> transpose3_x)
+ {
+ // 00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33
+ // 40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73
+ // 80 81 82 83 90 91 92 93 A0 A1 A2 A3 xx xx xx xx
+ const __m128i transpose1_0 = _mm_shuffle_epi8(intermediateA, transpose);
+ const __m128i transpose1_1 = _mm_shuffle_epi8(intermediateB, transpose);
+ const __m128i transpose1_2 = _mm_shuffle_epi8(intermediateC, transpose);
+ // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
+ // 40 50 60 70 41 51 61 71 42 52 62 72 43 53 63 73
+ // 80 90 A0 xx 81 91 A1 xx 82 92 A2 xx 83 93 A3 xx
+ const __m128i transpose2_0 = _mm_unpacklo_epi32(transpose1_0, transpose1_1);
+ const __m128i transpose2_1 = _mm_unpackhi_epi32(transpose1_0, transpose1_1);
+ // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71
+ // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73
+ transpose3_0 = _mm_castps_si128(
+ _mm_shuffle_ps(_mm_castsi128_ps(transpose2_0),
+ _mm_castsi128_ps(transpose1_2),
+ _MM_SHUFFLE(0, 0, 1, 0)));
+ transpose3_1 = _mm_castps_si128(
+ _mm_shuffle_ps(_mm_castsi128_ps(transpose2_0),
+ _mm_castsi128_ps(transpose1_2),
+ _MM_SHUFFLE(1, 1, 3, 2)));
+ transpose3_2 = _mm_castps_si128(
+ _mm_shuffle_ps(_mm_castsi128_ps(transpose2_1),
+ _mm_castsi128_ps(transpose1_2),
+ _MM_SHUFFLE(2, 2, 1, 0)));
+ transpose3_3 = _mm_castps_si128(
+ _mm_shuffle_ps(_mm_castsi128_ps(transpose2_1),
+ _mm_castsi128_ps(transpose1_2),
+ _MM_SHUFFLE(3, 3, 3, 2)));
+ // 00 10 20 30 40 50 60 70 80 90 A0 xx xx xx xx xx
+ // 01 11 21 31 41 51 61 71 81 91 A1 xx xx xx xx xx
+ // 02 12 22 32 42 52 62 72 82 92 A2 xx xx xx xx xx
+ // 03 13 23 33 43 53 63 73 83 93 A3 xx xx xx xx xx
+ }
+
+ // Vertical pass (transpose3_x -> dst).
+ {
+ const __m128i VFilter = _mm_load_si128((const __m128i *)VFilter_aligned16);
+ // get first two columns filter coefficients
+ __m128i fil01 = _mm_shuffle_epi32(VFilter, _MM_SHUFFLE(0, 0, 0, 0));
+ __m128i fil23 = _mm_shuffle_epi32(VFilter, _MM_SHUFFLE(1, 1, 1, 1));
+ __m128i fil45 = _mm_shuffle_epi32(VFilter, _MM_SHUFFLE(2, 2, 2, 2));
+ __m128i fil67 = _mm_shuffle_epi32(VFilter, _MM_SHUFFLE(3, 3, 3, 3));
+ __m128i col0, col1, col2, col3;
+ {
+ //load pixels
+ __m128i src = transpose3_0;
+ // extract the ones used for first column
+ __m128i src0123 = _mm_shuffle_epi8(src, mask0123);
+ __m128i src4567 = _mm_shuffle_epi8(src, mask4567);
+ __m128i src01_16 = _mm_unpacklo_epi8(src0123, zero);
+ __m128i src23_16 = _mm_unpackhi_epi8(src0123, zero);
+ __m128i src45_16 = _mm_unpacklo_epi8(src4567, zero);
+ __m128i src67_16 = _mm_unpackhi_epi8(src4567, zero);
+ // multiply accumulate them
+ __m128i mad01 = _mm_madd_epi16(src01_16, fil01);
+ __m128i mad23 = _mm_madd_epi16(src23_16, fil23);
+ __m128i mad45 = _mm_madd_epi16(src45_16, fil45);
+ __m128i mad67 = _mm_madd_epi16(src67_16, fil67);
+ __m128i mad0123 = _mm_add_epi32(mad01, mad23);
+ __m128i mad4567 = _mm_add_epi32(mad45, mad67);
+ __m128i mad_all = _mm_add_epi32(mad0123, mad4567);
+ mad_all = _mm_add_epi32(mad_all, rounding);
+ mad_all = _mm_srai_epi32(mad_all, VP9_FILTER_SHIFT);
+ mad_all = _mm_packs_epi32(mad_all, mad_all);
+ col0 = _mm_packus_epi16(mad_all, mad_all);
+ }
+ {
+ //load pixels
+ __m128i src = transpose3_1;
+ // extract the ones used for first column
+ __m128i src0123 = _mm_shuffle_epi8(src, mask0123);
+ __m128i src4567 = _mm_shuffle_epi8(src, mask4567);
+ __m128i src01_16 = _mm_unpacklo_epi8(src0123, zero);
+ __m128i src23_16 = _mm_unpackhi_epi8(src0123, zero);
+ __m128i src45_16 = _mm_unpacklo_epi8(src4567, zero);
+ __m128i src67_16 = _mm_unpackhi_epi8(src4567, zero);
+ // multiply accumulate them
+ __m128i mad01 = _mm_madd_epi16(src01_16, fil01);
+ __m128i mad23 = _mm_madd_epi16(src23_16, fil23);
+ __m128i mad45 = _mm_madd_epi16(src45_16, fil45);
+ __m128i mad67 = _mm_madd_epi16(src67_16, fil67);
+ __m128i mad0123 = _mm_add_epi32(mad01, mad23);
+ __m128i mad4567 = _mm_add_epi32(mad45, mad67);
+ __m128i mad_all = _mm_add_epi32(mad0123, mad4567);
+ mad_all = _mm_add_epi32(mad_all, rounding);
+ mad_all = _mm_srai_epi32(mad_all, VP9_FILTER_SHIFT);
+ mad_all = _mm_packs_epi32(mad_all, mad_all);
+ col1 = _mm_packus_epi16(mad_all, mad_all);
+ }
+ {
+ //load pixels
+ __m128i src = transpose3_2;
+ // extract the ones used for first column
+ __m128i src0123 = _mm_shuffle_epi8(src, mask0123);
+ __m128i src4567 = _mm_shuffle_epi8(src, mask4567);
+ __m128i src01_16 = _mm_unpacklo_epi8(src0123, zero);
+ __m128i src23_16 = _mm_unpackhi_epi8(src0123, zero);
+ __m128i src45_16 = _mm_unpacklo_epi8(src4567, zero);
+ __m128i src67_16 = _mm_unpackhi_epi8(src4567, zero);
+ // multiply accumulate them
+ __m128i mad01 = _mm_madd_epi16(src01_16, fil01);
+ __m128i mad23 = _mm_madd_epi16(src23_16, fil23);
+ __m128i mad45 = _mm_madd_epi16(src45_16, fil45);
+ __m128i mad67 = _mm_madd_epi16(src67_16, fil67);
+ __m128i mad0123 = _mm_add_epi32(mad01, mad23);
+ __m128i mad4567 = _mm_add_epi32(mad45, mad67);
+ __m128i mad_all = _mm_add_epi32(mad0123, mad4567);
+ mad_all = _mm_add_epi32(mad_all, rounding);
+ mad_all = _mm_srai_epi32(mad_all, VP9_FILTER_SHIFT);
+ mad_all = _mm_packs_epi32(mad_all, mad_all);
+ col2 = _mm_packus_epi16(mad_all, mad_all);
+ }
+ {
+ //load pixels
+ __m128i src = transpose3_3;
+ // extract the ones used for first column
+ __m128i src0123 = _mm_shuffle_epi8(src, mask0123);
+ __m128i src4567 = _mm_shuffle_epi8(src, mask4567);
+ __m128i src01_16 = _mm_unpacklo_epi8(src0123, zero);
+ __m128i src23_16 = _mm_unpackhi_epi8(src0123, zero);
+ __m128i src45_16 = _mm_unpacklo_epi8(src4567, zero);
+ __m128i src67_16 = _mm_unpackhi_epi8(src4567, zero);
+ // multiply accumulate them
+ __m128i mad01 = _mm_madd_epi16(src01_16, fil01);
+ __m128i mad23 = _mm_madd_epi16(src23_16, fil23);
+ __m128i mad45 = _mm_madd_epi16(src45_16, fil45);
+ __m128i mad67 = _mm_madd_epi16(src67_16, fil67);
+ __m128i mad0123 = _mm_add_epi32(mad01, mad23);
+ __m128i mad4567 = _mm_add_epi32(mad45, mad67);
+ __m128i mad_all = _mm_add_epi32(mad0123, mad4567);
+ mad_all = _mm_add_epi32(mad_all, rounding);
+ mad_all = _mm_srai_epi32(mad_all, VP9_FILTER_SHIFT);
+ mad_all = _mm_packs_epi32(mad_all, mad_all);
+ col3 = _mm_packus_epi16(mad_all, mad_all);
+ }
+ {
+ __m128i col01 = _mm_unpacklo_epi8(col0, col1);
+ __m128i col23 = _mm_unpacklo_epi8(col2, col3);
+ __m128i col0123 = _mm_unpacklo_epi16(col01, col23);
+ //TODO(cd): look into Ronald's comment:
+ // Future suggestion: I believe here, too, you can merge the
+ // packs_epi32() and pacus_epi16() for the 4 cols above, so that
+ // you get the data in a single register, and then use pshufb
+ // (shuffle_epi8()) instead of the unpacks here. Should be
+ // 2+3+2 instructions faster.
+ *((unsigned int *)&dst_ptr[dst_stride * 0]) =
+ _mm_extract_epi32(col0123, 0);
+ *((unsigned int *)&dst_ptr[dst_stride * 1]) =
+ _mm_extract_epi32(col0123, 1);
+ *((unsigned int *)&dst_ptr[dst_stride * 2]) =
+ _mm_extract_epi32(col0123, 2);
+ *((unsigned int *)&dst_ptr[dst_stride * 3]) =
+ _mm_extract_epi32(col0123, 3);
+ }
+ }
+ }
+}
+
+void vp9_filter_block2d_8x4_8_sse4_1
+(
+ const unsigned char *src_ptr, const unsigned int src_stride,
+ const short *HFilter_aligned16, const short *VFilter_aligned16,
+ unsigned char *dst_ptr, unsigned int dst_stride
+) {
+ int j;
+ for (j=0; j<8; j+=4) {
+ vp9_filter_block2d_4x4_8_sse4_1(src_ptr + j, src_stride,
+ HFilter_aligned16, VFilter_aligned16,
+ dst_ptr + j, dst_stride);
+ }
+}
+
+void vp9_filter_block2d_8x8_8_sse4_1
+(
+ const unsigned char *src_ptr, const unsigned int src_stride,
+ const short *HFilter_aligned16, const short *VFilter_aligned16,
+ unsigned char *dst_ptr, unsigned int dst_stride
+) {
+ int i, j;
+ for (i=0; i<8; i+=4) {
+ for (j=0; j<8; j+=4) {
+ vp9_filter_block2d_4x4_8_sse4_1(src_ptr + j + i*src_stride, src_stride,
+ HFilter_aligned16, VFilter_aligned16,
+ dst_ptr + j + i*dst_stride, dst_stride);
+ }
+ }
+}
+
+void vp9_filter_block2d_16x16_8_sse4_1
+(
+ const unsigned char *src_ptr, const unsigned int src_stride,
+ const short *HFilter_aligned16, const short *VFilter_aligned16,
+ unsigned char *dst_ptr, unsigned int dst_stride
+) {
+ int i, j;
+ for (i=0; i<16; i+=4) {
+ for (j=0; j<16; j+=4) {
+ vp9_filter_block2d_4x4_8_sse4_1(src_ptr + j + i*src_stride, src_stride,
+ HFilter_aligned16, VFilter_aligned16,
+ dst_ptr + j + i*dst_stride, dst_stride);
+ }
+ }
+}
diff --git a/vp9/common/x86/vp9_idct_x86.h b/vp9/common/x86/vp9_idct_x86.h
new file mode 100644
index 0000000..8320cf8
--- /dev/null
+++ b/vp9/common/x86/vp9_idct_x86.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_COMMON_X86_VP9_IDCT_X86_H_
+#define VP9_COMMON_X86_VP9_IDCT_X86_H_
+
+/* Note:
+ *
+ * This platform is commonly built for runtime CPU detection. If you modify
+ * any of the function mappings present in this file, be sure to also update
+ * them in the function pointer initialization code
+ */
+
+#if HAVE_MMX
+extern prototype_idct(vp9_short_idct4x4llm_1_mmx);
+extern prototype_idct(vp9_short_idct4x4llm_mmx);
+extern prototype_idct_scalar_add(vp9_dc_only_idct_add_mmx);
+
+extern prototype_second_order(vp9_short_inv_walsh4x4_mmx);
+extern prototype_second_order(vp9_short_inv_walsh4x4_1_mmx);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+#undef vp9_idct_idct1
+#define vp9_idct_idct1 vp9_short_idct4x4llm_1_mmx
+
+#undef vp9_idct_idct16
+#define vp9_idct_idct16 vp9_short_idct4x4llm_mmx
+
+#undef vp9_idct_idct1_scalar_add
+#define vp9_idct_idct1_scalar_add vp9_dc_only_idct_add_mmx
+
+#undef vp9_idct_iwalsh16
+#define vp9_idct_iwalsh16 vp9_short_inv_walsh4x4_mmx
+
+#undef vp9_idct_iwalsh1
+#define vp9_idct_iwalsh1 vp9_short_inv_walsh4x4_1_mmx
+
+#endif
+#endif
+
+#if HAVE_SSE2
+
+extern prototype_second_order(vp9_short_inv_walsh4x4_sse2);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+
+#undef vp9_idct_iwalsh16
+#define vp9_idct_iwalsh16 vp9_short_inv_walsh4x4_sse2
+
+#endif
+
+#endif
+
+
+
+#endif
diff --git a/vp9/common/x86/vp9_idctllm_mmx.asm b/vp9/common/x86/vp9_idctllm_mmx.asm
new file mode 100644
index 0000000..15e81ad
--- /dev/null
+++ b/vp9/common/x86/vp9_idctllm_mmx.asm
@@ -0,0 +1,241 @@
+;
+; Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%include "third_party/x86inc/x86inc.asm"
+
+SECTION_RODATA
+align 16
+x_s1sqr2: times 4 dw 0x8A8C
+align 16
+x_c1sqr2less1: times 4 dw 0x4E7B
+align 16
+pw_16: times 4 dw 16
+
+SECTION .text
+
+
+; /****************************************************************************
+; * Notes:
+; *
+; * This implementation makes use of 16 bit fixed point version of two multiply
+; * constants:
+; * 1. sqrt(2) * cos (pi/8)
+; * 2. sqrt(2) * sin (pi/8)
+; * Because the first constant is bigger than 1, to maintain the same 16 bit
+; * fixed point precision as the second one, we use a trick of
+; * x * a = x + x*(a-1)
+; * so
+; * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
+; *
+; * For the second constant, because of the 16bit version is 35468, which
+; * is bigger than 32768, in signed 16 bit multiply, it becomes a negative
+; * number.
+; * (x * (unsigned)35468 >> 16) = x * (signed)35468 >> 16 + x
+; *
+; **************************************************************************/
+
+INIT_MMX
+
+;void short_idct4x4llm_mmx(short *input, short *output, int pitch)
+cglobal short_idct4x4llm_mmx, 3,3,0, inp, out, pit
+ mova m0, [inpq +0]
+ mova m1, [inpq +8]
+
+ mova m2, [inpq+16]
+ mova m3, [inpq+24]
+
+ psubw m0, m2 ; b1= 0-2
+ paddw m2, m2 ;
+
+ mova m5, m1
+ paddw m2, m0 ; a1 =0+2
+
+ pmulhw m5, [x_s1sqr2] ;
+ paddw m5, m1 ; ip1 * sin(pi/8) * sqrt(2)
+
+ mova m7, m3 ;
+ pmulhw m7, [x_c1sqr2less1] ;
+
+ paddw m7, m3 ; ip3 * cos(pi/8) * sqrt(2)
+ psubw m7, m5 ; c1
+
+ mova m5, m1
+ mova m4, m3
+
+ pmulhw m5, [x_c1sqr2less1]
+ paddw m5, m1
+
+ pmulhw m3, [x_s1sqr2]
+ paddw m3, m4
+
+ paddw m3, m5 ; d1
+ mova m6, m2 ; a1
+
+ mova m4, m0 ; b1
+ paddw m2, m3 ;0
+
+ paddw m4, m7 ;1
+ psubw m0, m7 ;2
+
+ psubw m6, m3 ;3
+
+ mova m1, m2 ; 03 02 01 00
+ mova m3, m4 ; 23 22 21 20
+
+ punpcklwd m1, m0 ; 11 01 10 00
+ punpckhwd m2, m0 ; 13 03 12 02
+
+ punpcklwd m3, m6 ; 31 21 30 20
+ punpckhwd m4, m6 ; 33 23 32 22
+
+ mova m0, m1 ; 11 01 10 00
+ mova m5, m2 ; 13 03 12 02
+
+ punpckldq m0, m3 ; 30 20 10 00
+ punpckhdq m1, m3 ; 31 21 11 01
+
+ punpckldq m2, m4 ; 32 22 12 02
+ punpckhdq m5, m4 ; 33 23 13 03
+
+ mova m3, m5 ; 33 23 13 03
+
+ psubw m0, m2 ; b1= 0-2
+ paddw m2, m2 ;
+
+ mova m5, m1
+ paddw m2, m0 ; a1 =0+2
+
+ pmulhw m5, [x_s1sqr2] ;
+ paddw m5, m1 ; ip1 * sin(pi/8) * sqrt(2)
+
+ mova m7, m3 ;
+ pmulhw m7, [x_c1sqr2less1] ;
+
+ paddw m7, m3 ; ip3 * cos(pi/8) * sqrt(2)
+ psubw m7, m5 ; c1
+
+ mova m5, m1
+ mova m4, m3
+
+ pmulhw m5, [x_c1sqr2less1]
+ paddw m5, m1
+
+ pmulhw m3, [x_s1sqr2]
+ paddw m3, m4
+
+ paddw m3, m5 ; d1
+ paddw m0, [pw_16]
+
+ paddw m2, [pw_16]
+ mova m6, m2 ; a1
+
+ mova m4, m0 ; b1
+ paddw m2, m3 ;0
+
+ paddw m4, m7 ;1
+ psubw m0, m7 ;2
+
+ psubw m6, m3 ;3
+ psraw m2, 5
+
+ psraw m0, 5
+ psraw m4, 5
+
+ psraw m6, 5
+
+ mova m1, m2 ; 03 02 01 00
+ mova m3, m4 ; 23 22 21 20
+
+ punpcklwd m1, m0 ; 11 01 10 00
+ punpckhwd m2, m0 ; 13 03 12 02
+
+ punpcklwd m3, m6 ; 31 21 30 20
+ punpckhwd m4, m6 ; 33 23 32 22
+
+ mova m0, m1 ; 11 01 10 00
+ mova m5, m2 ; 13 03 12 02
+
+ punpckldq m0, m3 ; 30 20 10 00
+ punpckhdq m1, m3 ; 31 21 11 01
+
+ punpckldq m2, m4 ; 32 22 12 02
+ punpckhdq m5, m4 ; 33 23 13 03
+
+ mova [outq], m0
+
+ mova [outq+r2], m1
+ mova [outq+pitq*2], m2
+
+ add outq, pitq
+ mova [outq+pitq*2], m5
+ RET
+
+;void short_idct4x4llm_1_mmx(short *input, short *output, int pitch)
+cglobal short_idct4x4llm_1_mmx,3,3,0,inp,out,pit
+ movh m0, [inpq]
+ paddw m0, [pw_16]
+ psraw m0, 5
+ punpcklwd m0, m0
+ punpckldq m0, m0
+
+ mova [outq], m0
+ mova [outq+pitq], m0
+
+ mova [outq+pitq*2], m0
+ add r1, r2
+
+ mova [outq+pitq*2], m0
+ RET
+
+
+;void dc_only_idct_add_mmx(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride)
+cglobal dc_only_idct_add_mmx, 4,5,0,in_dc,pred,dst,pit,stride
+%if ARCH_X86_64
+ movsxd strideq, dword stridem
+%else
+ mov strideq, stridem
+%endif
+ pxor m0, m0
+
+ movh m5, in_dcq ; dc
+ paddw m5, [pw_16]
+
+ psraw m5, 5
+
+ punpcklwd m5, m5
+ punpckldq m5, m5
+
+ movh m1, [predq]
+ punpcklbw m1, m0
+ paddsw m1, m5
+ packuswb m1, m0 ; pack and unpack to saturate
+ movh [dstq], m1
+
+ movh m2, [predq+pitq]
+ punpcklbw m2, m0
+ paddsw m2, m5
+ packuswb m2, m0 ; pack and unpack to saturate
+ movh [dstq+strideq], m2
+
+ movh m3, [predq+2*pitq]
+ punpcklbw m3, m0
+ paddsw m3, m5
+ packuswb m3, m0 ; pack and unpack to saturate
+ movh [dstq+2*strideq], m3
+
+ add dstq, strideq
+ add predq, pitq
+ movh m4, [predq+2*pitq]
+ punpcklbw m4, m0
+ paddsw m4, m5
+ packuswb m4, m0 ; pack and unpack to saturate
+ movh [dstq+2*strideq], m4
+ RET
+
diff --git a/vp9/common/x86/vp9_idctllm_sse2.asm b/vp9/common/x86/vp9_idctllm_sse2.asm
new file mode 100644
index 0000000..8f3c6df
--- /dev/null
+++ b/vp9/common/x86/vp9_idctllm_sse2.asm
@@ -0,0 +1,712 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;void vp9_idct_dequant_0_2x_sse2
+; (
+; short *qcoeff - 0
+; short *dequant - 1
+; unsigned char *pre - 2
+; unsigned char *dst - 3
+; int dst_stride - 4
+; int blk_stride - 5
+; )
+
+global sym(vp9_idct_dequant_0_2x_sse2) PRIVATE
+sym(vp9_idct_dequant_0_2x_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ GET_GOT rbx
+ ; end prolog
+
+ mov rdx, arg(1) ; dequant
+ mov rax, arg(0) ; qcoeff
+
+ movd xmm4, [rax]
+ movd xmm5, [rdx]
+
+ pinsrw xmm4, [rax+32], 4
+ pinsrw xmm5, [rdx], 4
+
+ pmullw xmm4, xmm5
+
+ ; Zero out xmm5, for use unpacking
+ pxor xmm5, xmm5
+
+ ; clear coeffs
+ movd [rax], xmm5
+ movd [rax+32], xmm5
+;pshufb
+ pshuflw xmm4, xmm4, 00000000b
+ pshufhw xmm4, xmm4, 00000000b
+
+ mov rax, arg(2) ; pre
+ paddw xmm4, [GLOBAL(fours)]
+
+ movsxd rcx, dword ptr arg(5) ; blk_stride
+ psraw xmm4, 3
+
+ movq xmm0, [rax]
+ movq xmm1, [rax+rcx]
+ movq xmm2, [rax+2*rcx]
+ lea rcx, [3*rcx]
+ movq xmm3, [rax+rcx]
+
+ punpcklbw xmm0, xmm5
+ punpcklbw xmm1, xmm5
+ punpcklbw xmm2, xmm5
+ punpcklbw xmm3, xmm5
+
+ mov rax, arg(3) ; dst
+ movsxd rdx, dword ptr arg(4) ; dst_stride
+
+ ; Add to predict buffer
+ paddw xmm0, xmm4
+ paddw xmm1, xmm4
+ paddw xmm2, xmm4
+ paddw xmm3, xmm4
+
+ ; pack up before storing
+ packuswb xmm0, xmm5
+ packuswb xmm1, xmm5
+ packuswb xmm2, xmm5
+ packuswb xmm3, xmm5
+
+ ; store blocks back out
+ movq [rax], xmm0
+ movq [rax + rdx], xmm1
+
+ lea rax, [rax + 2*rdx]
+
+ movq [rax], xmm2
+ movq [rax + rdx], xmm3
+
+ ; begin epilog
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_idct_dequant_full_2x_sse2) PRIVATE
+sym(vp9_idct_dequant_full_2x_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ; special case when 2 blocks have 0 or 1 coeffs
+ ; dc is set as first coeff, so no need to load qcoeff
+ mov rax, arg(0) ; qcoeff
+ mov rsi, arg(2) ; pre
+ mov rdi, arg(3) ; dst
+ movsxd rcx, dword ptr arg(5) ; blk_stride
+
+ ; Zero out xmm7, for use unpacking
+ pxor xmm7, xmm7
+
+ mov rdx, arg(1) ; dequant
+
+ ; note the transpose of xmm1 and xmm2, necessary for shuffle
+ ; to spit out sensicle data
+ movdqa xmm0, [rax]
+ movdqa xmm2, [rax+16]
+ movdqa xmm1, [rax+32]
+ movdqa xmm3, [rax+48]
+
+ ; Clear out coeffs
+ movdqa [rax], xmm7
+ movdqa [rax+16], xmm7
+ movdqa [rax+32], xmm7
+ movdqa [rax+48], xmm7
+
+ ; dequantize qcoeff buffer
+ pmullw xmm0, [rdx]
+ pmullw xmm2, [rdx+16]
+ pmullw xmm1, [rdx]
+ pmullw xmm3, [rdx+16]
+
+ ; repack so block 0 row x and block 1 row x are together
+ movdqa xmm4, xmm0
+ punpckldq xmm0, xmm1
+ punpckhdq xmm4, xmm1
+
+ pshufd xmm0, xmm0, 11011000b
+ pshufd xmm1, xmm4, 11011000b
+
+ movdqa xmm4, xmm2
+ punpckldq xmm2, xmm3
+ punpckhdq xmm4, xmm3
+
+ pshufd xmm2, xmm2, 11011000b
+ pshufd xmm3, xmm4, 11011000b
+
+ ; first pass
+ psubw xmm0, xmm2 ; b1 = 0-2
+ paddw xmm2, xmm2 ;
+
+ movdqa xmm5, xmm1
+ paddw xmm2, xmm0 ; a1 = 0+2
+
+ pmulhw xmm5, [GLOBAL(x_s1sqr2)]
+ paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
+
+ movdqa xmm7, xmm3
+ pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
+
+ paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
+ psubw xmm7, xmm5 ; c1
+
+ movdqa xmm5, xmm1
+ movdqa xmm4, xmm3
+
+ pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
+ paddw xmm5, xmm1
+
+ pmulhw xmm3, [GLOBAL(x_s1sqr2)]
+ paddw xmm3, xmm4
+
+ paddw xmm3, xmm5 ; d1
+ movdqa xmm6, xmm2 ; a1
+
+ movdqa xmm4, xmm0 ; b1
+ paddw xmm2, xmm3 ;0
+
+ paddw xmm4, xmm7 ;1
+ psubw xmm0, xmm7 ;2
+
+ psubw xmm6, xmm3 ;3
+
+ ; transpose for the second pass
+ movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
+ punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
+ punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
+
+ movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
+ punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
+ punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
+
+
+ movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
+ punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
+ punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
+
+ movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
+ punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
+ punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
+
+
+ movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
+ punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
+ punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
+
+ movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
+ punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
+ punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
+
+ pshufd xmm0, xmm2, 11011000b
+ pshufd xmm2, xmm1, 11011000b
+
+ pshufd xmm1, xmm5, 11011000b
+ pshufd xmm3, xmm7, 11011000b
+
+ ; second pass
+ psubw xmm0, xmm2 ; b1 = 0-2
+ paddw xmm2, xmm2
+
+ movdqa xmm5, xmm1
+ paddw xmm2, xmm0 ; a1 = 0+2
+
+ pmulhw xmm5, [GLOBAL(x_s1sqr2)]
+ paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
+
+ movdqa xmm7, xmm3
+ pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
+
+ paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
+ psubw xmm7, xmm5 ; c1
+
+ movdqa xmm5, xmm1
+ movdqa xmm4, xmm3
+
+ pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
+ paddw xmm5, xmm1
+
+ pmulhw xmm3, [GLOBAL(x_s1sqr2)]
+ paddw xmm3, xmm4
+
+ paddw xmm3, xmm5 ; d1
+ paddw xmm0, [GLOBAL(fours)]
+
+ paddw xmm2, [GLOBAL(fours)]
+ movdqa xmm6, xmm2 ; a1
+
+ movdqa xmm4, xmm0 ; b1
+ paddw xmm2, xmm3 ;0
+
+ paddw xmm4, xmm7 ;1
+ psubw xmm0, xmm7 ;2
+
+ psubw xmm6, xmm3 ;3
+ psraw xmm2, 3
+
+ psraw xmm0, 3
+ psraw xmm4, 3
+
+ psraw xmm6, 3
+
+ ; transpose to save
+ movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
+ punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
+ punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
+
+ movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
+ punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
+ punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
+
+
+ movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
+ punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
+ punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
+
+ movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
+ punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
+ punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
+
+
+ movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
+ punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
+ punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
+
+ movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
+ punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
+ punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
+
+ pshufd xmm0, xmm2, 11011000b
+ pshufd xmm2, xmm1, 11011000b
+
+ pshufd xmm1, xmm5, 11011000b
+ pshufd xmm3, xmm7, 11011000b
+
+ pxor xmm7, xmm7
+
+ ; Load up predict blocks
+ movq xmm4, [rsi]
+ movq xmm5, [rsi+rcx]
+
+ punpcklbw xmm4, xmm7
+ punpcklbw xmm5, xmm7
+
+ paddw xmm0, xmm4
+ paddw xmm1, xmm5
+
+ movq xmm4, [rsi+2*rcx]
+ lea rcx, [3*rcx]
+ movq xmm5, [rsi+rcx]
+
+ punpcklbw xmm4, xmm7
+ punpcklbw xmm5, xmm7
+
+ paddw xmm2, xmm4
+ paddw xmm3, xmm5
+
+.finish:
+
+ ; pack up before storing
+ packuswb xmm0, xmm7
+ packuswb xmm1, xmm7
+ packuswb xmm2, xmm7
+ packuswb xmm3, xmm7
+
+ ; Load destination stride before writing out,
+ ; doesn't need to persist
+ movsxd rdx, dword ptr arg(4) ; dst_stride
+
+ ; store blocks back out
+ movq [rdi], xmm0
+ movq [rdi + rdx], xmm1
+
+ lea rdi, [rdi + 2*rdx]
+
+ movq [rdi], xmm2
+ movq [rdi + rdx], xmm3
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp9_idct_dequant_dc_0_2x_sse2
+; (
+; short *qcoeff - 0
+; short *dequant - 1
+; unsigned char *pre - 2
+; unsigned char *dst - 3
+; int dst_stride - 4
+; short *dc - 5
+; )
+global sym(vp9_idct_dequant_dc_0_2x_sse2) PRIVATE
+sym(vp9_idct_dequant_dc_0_2x_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ; special case when 2 blocks have 0 or 1 coeffs
+ ; dc is set as first coeff, so no need to load qcoeff
+ mov rax, arg(0) ; qcoeff
+ mov rsi, arg(2) ; pre
+ mov rdi, arg(3) ; dst
+ mov rdx, arg(5) ; dc
+
+ ; Zero out xmm5, for use unpacking
+ pxor xmm5, xmm5
+
+ ; load up 2 dc words here == 2*16 = doubleword
+ movd xmm4, [rdx]
+
+ ; Load up predict blocks
+ movq xmm0, [rsi]
+ movq xmm1, [rsi+16]
+ movq xmm2, [rsi+32]
+ movq xmm3, [rsi+48]
+
+ ; Duplicate and expand dc across
+ punpcklwd xmm4, xmm4
+ punpckldq xmm4, xmm4
+
+ ; Rounding to dequant and downshift
+ paddw xmm4, [GLOBAL(fours)]
+ psraw xmm4, 3
+
+ ; Predict buffer needs to be expanded from bytes to words
+ punpcklbw xmm0, xmm5
+ punpcklbw xmm1, xmm5
+ punpcklbw xmm2, xmm5
+ punpcklbw xmm3, xmm5
+
+ ; Add to predict buffer
+ paddw xmm0, xmm4
+ paddw xmm1, xmm4
+ paddw xmm2, xmm4
+ paddw xmm3, xmm4
+
+ ; pack up before storing
+ packuswb xmm0, xmm5
+ packuswb xmm1, xmm5
+ packuswb xmm2, xmm5
+ packuswb xmm3, xmm5
+
+ ; Load destination stride before writing out,
+ ; doesn't need to persist
+ movsxd rdx, dword ptr arg(4) ; dst_stride
+
+ ; store blocks back out
+ movq [rdi], xmm0
+ movq [rdi + rdx], xmm1
+
+ lea rdi, [rdi + 2*rdx]
+
+ movq [rdi], xmm2
+ movq [rdi + rdx], xmm3
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_idct_dequant_dc_full_2x_sse2) PRIVATE
+sym(vp9_idct_dequant_dc_full_2x_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ; special case when 2 blocks have 0 or 1 coeffs
+ ; dc is set as first coeff, so no need to load qcoeff
+ mov rax, arg(0) ; qcoeff
+ mov rsi, arg(2) ; pre
+ mov rdi, arg(3) ; dst
+
+ ; Zero out xmm7, for use unpacking
+ pxor xmm7, xmm7
+
+ mov rdx, arg(1) ; dequant
+
+ ; note the transpose of xmm1 and xmm2, necessary for shuffle
+ ; to spit out sensicle data
+ movdqa xmm0, [rax]
+ movdqa xmm2, [rax+16]
+ movdqa xmm1, [rax+32]
+ movdqa xmm3, [rax+48]
+
+ ; Clear out coeffs
+ movdqa [rax], xmm7
+ movdqa [rax+16], xmm7
+ movdqa [rax+32], xmm7
+ movdqa [rax+48], xmm7
+
+ ; dequantize qcoeff buffer
+ pmullw xmm0, [rdx]
+ pmullw xmm2, [rdx+16]
+ pmullw xmm1, [rdx]
+ pmullw xmm3, [rdx+16]
+
+ ; DC component
+ mov rdx, arg(5)
+
+ ; repack so block 0 row x and block 1 row x are together
+ movdqa xmm4, xmm0
+ punpckldq xmm0, xmm1
+ punpckhdq xmm4, xmm1
+
+ pshufd xmm0, xmm0, 11011000b
+ pshufd xmm1, xmm4, 11011000b
+
+ movdqa xmm4, xmm2
+ punpckldq xmm2, xmm3
+ punpckhdq xmm4, xmm3
+
+ pshufd xmm2, xmm2, 11011000b
+ pshufd xmm3, xmm4, 11011000b
+
+ ; insert DC component
+ pinsrw xmm0, [rdx], 0
+ pinsrw xmm0, [rdx+2], 4
+
+ ; first pass
+ psubw xmm0, xmm2 ; b1 = 0-2
+ paddw xmm2, xmm2 ;
+
+ movdqa xmm5, xmm1
+ paddw xmm2, xmm0 ; a1 = 0+2
+
+ pmulhw xmm5, [GLOBAL(x_s1sqr2)]
+ paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
+
+ movdqa xmm7, xmm3
+ pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
+
+ paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
+ psubw xmm7, xmm5 ; c1
+
+ movdqa xmm5, xmm1
+ movdqa xmm4, xmm3
+
+ pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
+ paddw xmm5, xmm1
+
+ pmulhw xmm3, [GLOBAL(x_s1sqr2)]
+ paddw xmm3, xmm4
+
+ paddw xmm3, xmm5 ; d1
+ movdqa xmm6, xmm2 ; a1
+
+ movdqa xmm4, xmm0 ; b1
+ paddw xmm2, xmm3 ;0
+
+ paddw xmm4, xmm7 ;1
+ psubw xmm0, xmm7 ;2
+
+ psubw xmm6, xmm3 ;3
+
+ ; transpose for the second pass
+ movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
+ punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
+ punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
+
+ movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
+ punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
+ punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
+
+
+ movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
+ punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
+ punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
+
+ movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
+ punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
+ punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
+
+
+ movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
+ punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
+ punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
+
+ movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
+ punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
+ punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
+
+ pshufd xmm0, xmm2, 11011000b
+ pshufd xmm2, xmm1, 11011000b
+
+ pshufd xmm1, xmm5, 11011000b
+ pshufd xmm3, xmm7, 11011000b
+
+ ; second pass
+ psubw xmm0, xmm2 ; b1 = 0-2
+ paddw xmm2, xmm2
+
+ movdqa xmm5, xmm1
+ paddw xmm2, xmm0 ; a1 = 0+2
+
+ pmulhw xmm5, [GLOBAL(x_s1sqr2)]
+ paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
+
+ movdqa xmm7, xmm3
+ pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
+
+ paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
+ psubw xmm7, xmm5 ; c1
+
+ movdqa xmm5, xmm1
+ movdqa xmm4, xmm3
+
+ pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
+ paddw xmm5, xmm1
+
+ pmulhw xmm3, [GLOBAL(x_s1sqr2)]
+ paddw xmm3, xmm4
+
+ paddw xmm3, xmm5 ; d1
+ paddw xmm0, [GLOBAL(fours)]
+
+ paddw xmm2, [GLOBAL(fours)]
+ movdqa xmm6, xmm2 ; a1
+
+ movdqa xmm4, xmm0 ; b1
+ paddw xmm2, xmm3 ;0
+
+ paddw xmm4, xmm7 ;1
+ psubw xmm0, xmm7 ;2
+
+ psubw xmm6, xmm3 ;3
+ psraw xmm2, 3
+
+ psraw xmm0, 3
+ psraw xmm4, 3
+
+ psraw xmm6, 3
+
+ ; transpose to save
+ movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
+ punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
+ punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
+
+ movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
+ punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
+ punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
+
+
+ movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
+ punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
+ punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
+
+ movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
+ punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
+ punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
+
+
+ movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
+ punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
+ punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
+
+ movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
+ punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
+ punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
+
+ pshufd xmm0, xmm2, 11011000b
+ pshufd xmm2, xmm1, 11011000b
+
+ pshufd xmm1, xmm5, 11011000b
+ pshufd xmm3, xmm7, 11011000b
+
+ pxor xmm7, xmm7
+
+ ; Load up predict blocks
+ movq xmm4, [rsi]
+ movq xmm5, [rsi+16]
+
+ punpcklbw xmm4, xmm7
+ punpcklbw xmm5, xmm7
+
+ paddw xmm0, xmm4
+ paddw xmm1, xmm5
+
+ movq xmm4, [rsi+32]
+ movq xmm5, [rsi+48]
+
+ punpcklbw xmm4, xmm7
+ punpcklbw xmm5, xmm7
+
+ paddw xmm2, xmm4
+ paddw xmm3, xmm5
+
+.finish:
+
+ ; pack up before storing
+ packuswb xmm0, xmm7
+ packuswb xmm1, xmm7
+ packuswb xmm2, xmm7
+ packuswb xmm3, xmm7
+
+ ; Load destination stride before writing out,
+ ; doesn't need to persist
+ movsxd rdx, dword ptr arg(4) ; dst_stride
+
+ ; store blocks back out
+ movq [rdi], xmm0
+ movq [rdi + rdx], xmm1
+
+ lea rdi, [rdi + 2*rdx]
+
+ movq [rdi], xmm2
+ movq [rdi + rdx], xmm3
+
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+SECTION_RODATA
+align 16
+fours:
+ times 8 dw 0x0004
+align 16
+x_s1sqr2:
+ times 8 dw 0x8A8C
+align 16
+x_c1sqr2less1:
+ times 8 dw 0x4E7B
diff --git a/vp9/common/x86/vp9_iwalsh_mmx.asm b/vp9/common/x86/vp9_iwalsh_mmx.asm
new file mode 100644
index 0000000..1af2521
--- /dev/null
+++ b/vp9/common/x86/vp9_iwalsh_mmx.asm
@@ -0,0 +1,173 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;void vp9_short_inv_walsh4x4_1_mmx(short *input, short *output)
+global sym(vp9_short_inv_walsh4x4_1_mmx) PRIVATE
+sym(vp9_short_inv_walsh4x4_1_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 2
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0)
+ mov rax, 3
+
+ mov rdi, arg(1)
+ add rax, [rsi] ;input[0] + 3
+
+ movd mm0, eax
+
+ punpcklwd mm0, mm0 ;x x val val
+
+ punpckldq mm0, mm0 ;val val val val
+
+ psraw mm0, 3 ;(input[0] + 3) >> 3
+
+ movq [rdi + 0], mm0
+ movq [rdi + 8], mm0
+ movq [rdi + 16], mm0
+ movq [rdi + 24], mm0
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp9_short_inv_walsh4x4_mmx(short *input, short *output)
+global sym(vp9_short_inv_walsh4x4_mmx) PRIVATE
+sym(vp9_short_inv_walsh4x4_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 2
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rax, 3
+ mov rsi, arg(0)
+ mov rdi, arg(1)
+ shl rax, 16
+
+ movq mm0, [rsi + 0] ;ip[0]
+ movq mm1, [rsi + 8] ;ip[4]
+ or rax, 3 ;00030003h
+
+ movq mm2, [rsi + 16] ;ip[8]
+ movq mm3, [rsi + 24] ;ip[12]
+
+ movq mm7, rax
+ movq mm4, mm0
+
+ punpcklwd mm7, mm7 ;0003000300030003h
+ movq mm5, mm1
+
+ paddw mm4, mm3 ;ip[0] + ip[12] aka al
+ paddw mm5, mm2 ;ip[4] + ip[8] aka bl
+
+ movq mm6, mm4 ;temp al
+
+ paddw mm4, mm5 ;al + bl
+ psubw mm6, mm5 ;al - bl
+
+ psubw mm0, mm3 ;ip[0] - ip[12] aka d1
+ psubw mm1, mm2 ;ip[4] - ip[8] aka c1
+
+ movq mm5, mm0 ;temp dl
+
+ paddw mm0, mm1 ;dl + cl
+ psubw mm5, mm1 ;dl - cl
+
+ ; 03 02 01 00
+ ; 13 12 11 10
+ ; 23 22 21 20
+ ; 33 32 31 30
+
+ movq mm3, mm4 ; 03 02 01 00
+ punpcklwd mm4, mm0 ; 11 01 10 00
+ punpckhwd mm3, mm0 ; 13 03 12 02
+
+ movq mm1, mm6 ; 23 22 21 20
+ punpcklwd mm6, mm5 ; 31 21 30 20
+ punpckhwd mm1, mm5 ; 33 23 32 22
+
+ movq mm0, mm4 ; 11 01 10 00
+ movq mm2, mm3 ; 13 03 12 02
+
+ punpckldq mm0, mm6 ; 30 20 10 00 aka ip[0]
+ punpckhdq mm4, mm6 ; 31 21 11 01 aka ip[4]
+
+ punpckldq mm2, mm1 ; 32 22 12 02 aka ip[8]
+ punpckhdq mm3, mm1 ; 33 23 13 03 aka ip[12]
+;~~~~~~~~~~~~~~~~~~~~~
+ movq mm1, mm0
+ movq mm5, mm4
+
+ paddw mm1, mm3 ;ip[0] + ip[12] aka al
+ paddw mm5, mm2 ;ip[4] + ip[8] aka bl
+
+ movq mm6, mm1 ;temp al
+
+ paddw mm1, mm5 ;al + bl
+ psubw mm6, mm5 ;al - bl
+
+ psubw mm0, mm3 ;ip[0] - ip[12] aka d1
+ psubw mm4, mm2 ;ip[4] - ip[8] aka c1
+
+ movq mm5, mm0 ;temp dl
+
+ paddw mm0, mm4 ;dl + cl
+ psubw mm5, mm4 ;dl - cl
+;~~~~~~~~~~~~~~~~~~~~~
+ movq mm3, mm1 ; 03 02 01 00
+ punpcklwd mm1, mm0 ; 11 01 10 00
+ punpckhwd mm3, mm0 ; 13 03 12 02
+
+ movq mm4, mm6 ; 23 22 21 20
+ punpcklwd mm6, mm5 ; 31 21 30 20
+ punpckhwd mm4, mm5 ; 33 23 32 22
+
+ movq mm0, mm1 ; 11 01 10 00
+ movq mm2, mm3 ; 13 03 12 02
+
+ punpckldq mm0, mm6 ; 30 20 10 00 aka ip[0]
+ punpckhdq mm1, mm6 ; 31 21 11 01 aka ip[4]
+
+ punpckldq mm2, mm4 ; 32 22 12 02 aka ip[8]
+ punpckhdq mm3, mm4 ; 33 23 13 03 aka ip[12]
+
+ paddw mm0, mm7
+ paddw mm1, mm7
+ paddw mm2, mm7
+ paddw mm3, mm7
+
+ psraw mm0, 3
+ psraw mm1, 3
+ psraw mm2, 3
+ psraw mm3, 3
+
+ movq [rdi + 0], mm0
+ movq [rdi + 8], mm1
+ movq [rdi + 16], mm2
+ movq [rdi + 24], mm3
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
diff --git a/vp9/common/x86/vp9_iwalsh_sse2.asm b/vp9/common/x86/vp9_iwalsh_sse2.asm
new file mode 100644
index 0000000..84fa2fe
--- /dev/null
+++ b/vp9/common/x86/vp9_iwalsh_sse2.asm
@@ -0,0 +1,119 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;void vp9_short_inv_walsh4x4_sse2(short *input, short *output)
+global sym(vp9_short_inv_walsh4x4_sse2) PRIVATE
+sym(vp9_short_inv_walsh4x4_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 2
+ SAVE_XMM 6
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0)
+ mov rdi, arg(1)
+ mov rax, 3
+
+ movdqa xmm0, [rsi + 0] ;ip[4] ip[0]
+ movdqa xmm1, [rsi + 16] ;ip[12] ip[8]
+
+ shl rax, 16
+ or rax, 3 ;00030003h
+
+ pshufd xmm2, xmm1, 4eh ;ip[8] ip[12]
+ movdqa xmm3, xmm0 ;ip[4] ip[0]
+
+ paddw xmm0, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
+ psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
+
+ movdqa xmm4, xmm0
+ punpcklqdq xmm0, xmm3 ;d1 a1
+ punpckhqdq xmm4, xmm3 ;c1 b1
+ movd xmm6, eax
+
+ movdqa xmm1, xmm4 ;c1 b1
+ paddw xmm4, xmm0 ;dl+cl a1+b1 aka op[4] op[0]
+ psubw xmm0, xmm1 ;d1-c1 a1-b1 aka op[12] op[8]
+
+;;;temp output
+;; movdqu [rdi + 0], xmm4
+;; movdqu [rdi + 16], xmm3
+
+;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ; 13 12 11 10 03 02 01 00
+ ;
+ ; 33 32 31 30 23 22 21 20
+ ;
+ movdqa xmm3, xmm4 ; 13 12 11 10 03 02 01 00
+ punpcklwd xmm4, xmm0 ; 23 03 22 02 21 01 20 00
+ punpckhwd xmm3, xmm0 ; 33 13 32 12 31 11 30 10
+ movdqa xmm1, xmm4 ; 23 03 22 02 21 01 20 00
+ punpcklwd xmm4, xmm3 ; 31 21 11 01 30 20 10 00
+ punpckhwd xmm1, xmm3 ; 33 23 13 03 32 22 12 02
+ ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ pshufd xmm2, xmm1, 4eh ;ip[8] ip[12]
+ movdqa xmm3, xmm4 ;ip[4] ip[0]
+
+ pshufd xmm6, xmm6, 0 ;03 03 03 03 03 03 03 03
+
+ paddw xmm4, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
+ psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
+
+ movdqa xmm5, xmm4
+ punpcklqdq xmm4, xmm3 ;d1 a1
+ punpckhqdq xmm5, xmm3 ;c1 b1
+
+ movdqa xmm1, xmm5 ;c1 b1
+ paddw xmm5, xmm4 ;dl+cl a1+b1 aka op[4] op[0]
+ psubw xmm4, xmm1 ;d1-c1 a1-b1 aka op[12] op[8]
+;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ; 13 12 11 10 03 02 01 00
+ ;
+ ; 33 32 31 30 23 22 21 20
+ ;
+ movdqa xmm0, xmm5 ; 13 12 11 10 03 02 01 00
+ punpcklwd xmm5, xmm4 ; 23 03 22 02 21 01 20 00
+ punpckhwd xmm0, xmm4 ; 33 13 32 12 31 11 30 10
+ movdqa xmm1, xmm5 ; 23 03 22 02 21 01 20 00
+ punpcklwd xmm5, xmm0 ; 31 21 11 01 30 20 10 00
+ punpckhwd xmm1, xmm0 ; 33 23 13 03 32 22 12 02
+;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ paddw xmm5, xmm6
+ paddw xmm1, xmm6
+
+ psraw xmm5, 3
+ psraw xmm1, 3
+
+ movdqa [rdi + 0], xmm5
+ movdqa [rdi + 16], xmm1
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+SECTION_RODATA
+align 16
+x_s1sqr2:
+ times 4 dw 0x8A8C
+align 16
+x_c1sqr2less1:
+ times 4 dw 0x4E7B
+align 16
+fours:
+ times 4 dw 0x0004
diff --git a/vp9/common/x86/vp9_loopfilter_mmx.asm b/vp9/common/x86/vp9_loopfilter_mmx.asm
new file mode 100644
index 0000000..ceffdf5
--- /dev/null
+++ b/vp9/common/x86/vp9_loopfilter_mmx.asm
@@ -0,0 +1,969 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+
+;void vp9_loop_filter_horizontal_edge_mmx
+;(
+; unsigned char *src_ptr,
+; int src_pixel_step,
+; const char *blimit,
+; const char *limit,
+; const char *thresh,
+; int count
+;)
+global sym(vp9_loop_filter_horizontal_edge_mmx) PRIVATE
+sym(vp9_loop_filter_horizontal_edge_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 32 ; reserve 32 bytes
+ %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8];
+ %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8];
+
+ mov rsi, arg(0) ;src_ptr
+ movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
+
+ movsxd rcx, dword ptr arg(5) ;count
+.next8_h:
+ mov rdx, arg(3) ;limit
+ movq mm7, [rdx]
+ mov rdi, rsi ; rdi points to row +1 for indirect addressing
+ add rdi, rax
+
+ ; calculate breakout conditions
+ movq mm2, [rdi+2*rax] ; q3
+ movq mm1, [rsi+2*rax] ; q2
+ movq mm6, mm1 ; q2
+ psubusb mm1, mm2 ; q2-=q3
+ psubusb mm2, mm6 ; q3-=q2
+ por mm1, mm2 ; abs(q3-q2)
+ psubusb mm1, mm7 ;
+
+
+ movq mm4, [rsi+rax] ; q1
+ movq mm3, mm4 ; q1
+ psubusb mm4, mm6 ; q1-=q2
+ psubusb mm6, mm3 ; q2-=q1
+ por mm4, mm6 ; abs(q2-q1)
+
+ psubusb mm4, mm7
+ por mm1, mm4
+
+ movq mm4, [rsi] ; q0
+ movq mm0, mm4 ; q0
+ psubusb mm4, mm3 ; q0-=q1
+ psubusb mm3, mm0 ; q1-=q0
+ por mm4, mm3 ; abs(q0-q1)
+ movq t0, mm4 ; save to t0
+ psubusb mm4, mm7
+ por mm1, mm4
+
+
+ neg rax ; negate pitch to deal with above border
+
+ movq mm2, [rsi+4*rax] ; p3
+ movq mm4, [rdi+4*rax] ; p2
+ movq mm5, mm4 ; p2
+ psubusb mm4, mm2 ; p2-=p3
+ psubusb mm2, mm5 ; p3-=p2
+ por mm4, mm2 ; abs(p3 - p2)
+ psubusb mm4, mm7
+ por mm1, mm4
+
+
+ movq mm4, [rsi+2*rax] ; p1
+ movq mm3, mm4 ; p1
+ psubusb mm4, mm5 ; p1-=p2
+ psubusb mm5, mm3 ; p2-=p1
+ por mm4, mm5 ; abs(p2 - p1)
+ psubusb mm4, mm7
+ por mm1, mm4
+
+ movq mm2, mm3 ; p1
+
+ movq mm4, [rsi+rax] ; p0
+ movq mm5, mm4 ; p0
+ psubusb mm4, mm3 ; p0-=p1
+ psubusb mm3, mm5 ; p1-=p0
+ por mm4, mm3 ; abs(p1 - p0)
+ movq t1, mm4 ; save to t1
+ psubusb mm4, mm7
+ por mm1, mm4
+
+ movq mm3, [rdi] ; q1
+ movq mm4, mm3 ; q1
+ psubusb mm3, mm2 ; q1-=p1
+ psubusb mm2, mm4 ; p1-=q1
+ por mm2, mm3 ; abs(p1-q1)
+ pand mm2, [GLOBAL(tfe)] ; set lsb of each byte to zero
+ psrlw mm2, 1 ; abs(p1-q1)/2
+
+ movq mm6, mm5 ; p0
+ movq mm3, [rsi] ; q0
+ psubusb mm5, mm3 ; p0-=q0
+ psubusb mm3, mm6 ; q0-=p0
+ por mm5, mm3 ; abs(p0 - q0)
+ paddusb mm5, mm5 ; abs(p0-q0)*2
+ paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
+
+ mov rdx, arg(2) ;blimit ; get blimit
+ movq mm7, [rdx] ; blimit
+
+ psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
+ por mm1, mm5
+ pxor mm5, mm5
+ pcmpeqb mm1, mm5 ; mask mm1
+
+ ; calculate high edge variance
+ mov rdx, arg(4) ;thresh ; get thresh
+ movq mm7, [rdx] ;
+ movq mm4, t0 ; get abs (q1 - q0)
+ psubusb mm4, mm7
+ movq mm3, t1 ; get abs (p1 - p0)
+ psubusb mm3, mm7
+ paddb mm4, mm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
+
+ pcmpeqb mm4, mm5
+
+ pcmpeqb mm5, mm5
+ pxor mm4, mm5
+
+
+ ; start work on filters
+ movq mm2, [rsi+2*rax] ; p1
+ movq mm7, [rdi] ; q1
+ pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
+ pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
+ psubsb mm2, mm7 ; p1 - q1
+ pand mm2, mm4 ; high var mask (hvm)(p1 - q1)
+ pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values
+ pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values
+ movq mm3, mm0 ; q0
+ psubsb mm0, mm6 ; q0 - p0
+ paddsb mm2, mm0 ; 1 * (q0 - p0) + hvm(p1 - q1)
+ paddsb mm2, mm0 ; 2 * (q0 - p0) + hvm(p1 - q1)
+ paddsb mm2, mm0 ; 3 * (q0 - p0) + hvm(p1 - q1)
+ pand mm1, mm2 ; mask filter values we don't care about
+ movq mm2, mm1
+ paddsb mm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4
+ paddsb mm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3
+
+ pxor mm0, mm0 ;
+ pxor mm5, mm5
+ punpcklbw mm0, mm2 ;
+ punpckhbw mm5, mm2 ;
+ psraw mm0, 11 ;
+ psraw mm5, 11
+ packsswb mm0, mm5
+ movq mm2, mm0 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
+
+ pxor mm0, mm0 ; 0
+ movq mm5, mm1 ; abcdefgh
+ punpcklbw mm0, mm1 ; e0f0g0h0
+ psraw mm0, 11 ; sign extended shift right by 3
+ pxor mm1, mm1 ; 0
+ punpckhbw mm1, mm5 ; a0b0c0d0
+ psraw mm1, 11 ; sign extended shift right by 3
+ movq mm5, mm0 ; save results
+
+ packsswb mm0, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
+ paddsw mm5, [GLOBAL(ones)]
+ paddsw mm1, [GLOBAL(ones)]
+ psraw mm5, 1 ; partial shifted one more time for 2nd tap
+ psraw mm1, 1 ; partial shifted one more time for 2nd tap
+ packsswb mm5, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4
+ pandn mm4, mm5 ; high edge variance additive
+
+ paddsb mm6, mm2 ; p0+= p0 add
+ pxor mm6, [GLOBAL(t80)] ; unoffset
+ movq [rsi+rax], mm6 ; write back
+
+ movq mm6, [rsi+2*rax] ; p1
+ pxor mm6, [GLOBAL(t80)] ; reoffset
+ paddsb mm6, mm4 ; p1+= p1 add
+ pxor mm6, [GLOBAL(t80)] ; unoffset
+ movq [rsi+2*rax], mm6 ; write back
+
+ psubsb mm3, mm0 ; q0-= q0 add
+ pxor mm3, [GLOBAL(t80)] ; unoffset
+ movq [rsi], mm3 ; write back
+
+ psubsb mm7, mm4 ; q1-= q1 add
+ pxor mm7, [GLOBAL(t80)] ; unoffset
+ movq [rdi], mm7 ; write back
+
+ add rsi,8
+ neg rax
+ dec rcx
+ jnz .next8_h
+
+ add rsp, 32
+ pop rsp
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_loop_filter_vertical_edge_mmx
+;(
+; unsigned char *src_ptr,
+; int src_pixel_step,
+; const char *blimit,
+; const char *limit,
+; const char *thresh,
+; int count
+;)
+global sym(vp9_loop_filter_vertical_edge_mmx) PRIVATE
+sym(vp9_loop_filter_vertical_edge_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 64 ; reserve 64 bytes
+ %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8];
+ %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8];
+ %define srct [rsp + 32] ;__declspec(align(16)) char srct[32];
+
+ mov rsi, arg(0) ;src_ptr
+ movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
+
+ lea rsi, [rsi + rax*4 - 4]
+
+ movsxd rcx, dword ptr arg(5) ;count
+.next8_v:
+ mov rdi, rsi ; rdi points to row +1 for indirect addressing
+ add rdi, rax
+
+
+ ;transpose
+ movq mm6, [rsi+2*rax] ; 67 66 65 64 63 62 61 60
+ movq mm7, mm6 ; 77 76 75 74 73 72 71 70
+
+ punpckhbw mm7, [rdi+2*rax] ; 77 67 76 66 75 65 74 64
+ punpcklbw mm6, [rdi+2*rax] ; 73 63 72 62 71 61 70 60
+
+ movq mm4, [rsi] ; 47 46 45 44 43 42 41 40
+ movq mm5, mm4 ; 47 46 45 44 43 42 41 40
+
+ punpckhbw mm5, [rsi+rax] ; 57 47 56 46 55 45 54 44
+ punpcklbw mm4, [rsi+rax] ; 53 43 52 42 51 41 50 40
+
+ movq mm3, mm5 ; 57 47 56 46 55 45 54 44
+ punpckhwd mm5, mm7 ; 77 67 57 47 76 66 56 46
+
+ punpcklwd mm3, mm7 ; 75 65 55 45 74 64 54 44
+ movq mm2, mm4 ; 53 43 52 42 51 41 50 40
+
+ punpckhwd mm4, mm6 ; 73 63 53 43 72 62 52 42
+ punpcklwd mm2, mm6 ; 71 61 51 41 70 60 50 40
+
+ neg rax
+ movq mm6, [rsi+rax*2] ; 27 26 25 24 23 22 21 20
+
+ movq mm1, mm6 ; 27 26 25 24 23 22 21 20
+ punpckhbw mm6, [rsi+rax] ; 37 27 36 36 35 25 34 24
+
+ punpcklbw mm1, [rsi+rax] ; 33 23 32 22 31 21 30 20
+ movq mm7, [rsi+rax*4]; ; 07 06 05 04 03 02 01 00
+
+ punpckhbw mm7, [rdi+rax*4] ; 17 07 16 06 15 05 14 04
+ movq mm0, mm7 ; 17 07 16 06 15 05 14 04
+
+ punpckhwd mm7, mm6 ; 37 27 17 07 36 26 16 06
+ punpcklwd mm0, mm6 ; 35 25 15 05 34 24 14 04
+
+ movq mm6, mm7 ; 37 27 17 07 36 26 16 06
+ punpckhdq mm7, mm5 ; 77 67 57 47 37 27 17 07 = q3
+
+ punpckldq mm6, mm5 ; 76 66 56 46 36 26 16 06 = q2
+
+ movq mm5, mm6 ; 76 66 56 46 36 26 16 06
+ psubusb mm5, mm7 ; q2-q3
+
+ psubusb mm7, mm6 ; q3-q2
+ por mm7, mm5; ; mm7=abs (q3-q2)
+
+ movq mm5, mm0 ; 35 25 15 05 34 24 14 04
+ punpckhdq mm5, mm3 ; 75 65 55 45 35 25 15 05 = q1
+
+ punpckldq mm0, mm3 ; 74 64 54 44 34 24 15 04 = q0
+ movq mm3, mm5 ; 75 65 55 45 35 25 15 05 = q1
+
+ psubusb mm3, mm6 ; q1-q2
+ psubusb mm6, mm5 ; q2-q1
+
+ por mm6, mm3 ; mm6=abs(q2-q1)
+ lea rdx, srct
+
+ movq [rdx+24], mm5 ; save q1
+ movq [rdx+16], mm0 ; save q0
+
+ movq mm3, [rsi+rax*4] ; 07 06 05 04 03 02 01 00
+ punpcklbw mm3, [rdi+rax*4] ; 13 03 12 02 11 01 10 00
+
+ movq mm0, mm3 ; 13 03 12 02 11 01 10 00
+ punpcklwd mm0, mm1 ; 31 21 11 01 30 20 10 00
+
+ punpckhwd mm3, mm1 ; 33 23 13 03 32 22 12 02
+ movq mm1, mm0 ; 31 21 11 01 30 20 10 00
+
+ punpckldq mm0, mm2 ; 70 60 50 40 30 20 10 00 =p3
+ punpckhdq mm1, mm2 ; 71 61 51 41 31 21 11 01 =p2
+
+ movq mm2, mm1 ; 71 61 51 41 31 21 11 01 =p2
+ psubusb mm2, mm0 ; p2-p3
+
+ psubusb mm0, mm1 ; p3-p2
+ por mm0, mm2 ; mm0=abs(p3-p2)
+
+ movq mm2, mm3 ; 33 23 13 03 32 22 12 02
+ punpckldq mm2, mm4 ; 72 62 52 42 32 22 12 02 = p1
+
+ punpckhdq mm3, mm4 ; 73 63 53 43 33 23 13 03 = p0
+ movq [rdx+8], mm3 ; save p0
+
+ movq [rdx], mm2 ; save p1
+ movq mm5, mm2 ; mm5 = p1
+
+ psubusb mm2, mm1 ; p1-p2
+ psubusb mm1, mm5 ; p2-p1
+
+ por mm1, mm2 ; mm1=abs(p2-p1)
+ mov rdx, arg(3) ;limit
+
+ movq mm4, [rdx] ; mm4 = limit
+ psubusb mm7, mm4
+
+ psubusb mm0, mm4
+ psubusb mm1, mm4
+
+ psubusb mm6, mm4
+ por mm7, mm6
+
+ por mm0, mm1
+ por mm0, mm7 ; abs(q3-q2) > limit || abs(p3-p2) > limit ||abs(p2-p1) > limit || abs(q2-q1) > limit
+
+ movq mm1, mm5 ; p1
+
+ movq mm7, mm3 ; mm3=mm7=p0
+ psubusb mm7, mm5 ; p0 - p1
+
+ psubusb mm5, mm3 ; p1 - p0
+ por mm5, mm7 ; abs(p1-p0)
+
+ movq t0, mm5 ; save abs(p1-p0)
+ lea rdx, srct
+
+ psubusb mm5, mm4
+ por mm0, mm5 ; mm0=mask
+
+ movq mm5, [rdx+16] ; mm5=q0
+ movq mm7, [rdx+24] ; mm7=q1
+
+ movq mm6, mm5 ; mm6=q0
+ movq mm2, mm7 ; q1
+ psubusb mm5, mm7 ; q0-q1
+
+ psubusb mm7, mm6 ; q1-q0
+ por mm7, mm5 ; abs(q1-q0)
+
+ movq t1, mm7 ; save abs(q1-q0)
+ psubusb mm7, mm4
+
+ por mm0, mm7 ; mask
+
+ movq mm5, mm2 ; q1
+ psubusb mm5, mm1 ; q1-=p1
+ psubusb mm1, mm2 ; p1-=q1
+ por mm5, mm1 ; abs(p1-q1)
+ pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
+ psrlw mm5, 1 ; abs(p1-q1)/2
+
+ mov rdx, arg(2) ;blimit ;
+
+ movq mm4, [rdx] ;blimit
+ movq mm1, mm3 ; mm1=mm3=p0
+
+ movq mm7, mm6 ; mm7=mm6=q0
+ psubusb mm1, mm7 ; p0-q0
+
+ psubusb mm7, mm3 ; q0-p0
+ por mm1, mm7 ; abs(q0-p0)
+ paddusb mm1, mm1 ; abs(q0-p0)*2
+ paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
+
+ psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
+ por mm1, mm0; ; mask
+
+ pxor mm0, mm0
+ pcmpeqb mm1, mm0
+
+ ; calculate high edge variance
+ mov rdx, arg(4) ;thresh ; get thresh
+ movq mm7, [rdx]
+ ;
+ movq mm4, t0 ; get abs (q1 - q0)
+ psubusb mm4, mm7
+
+ movq mm3, t1 ; get abs (p1 - p0)
+ psubusb mm3, mm7
+
+ por mm4, mm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
+ pcmpeqb mm4, mm0
+
+ pcmpeqb mm0, mm0
+ pxor mm4, mm0
+
+
+
+ ; start work on filters
+ lea rdx, srct
+
+ movq mm2, [rdx] ; p1
+ movq mm7, [rdx+24] ; q1
+
+ movq mm6, [rdx+8] ; p0
+ movq mm0, [rdx+16] ; q0
+
+ pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
+ pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
+
+ psubsb mm2, mm7 ; p1 - q1
+ pand mm2, mm4 ; high var mask (hvm)(p1 - q1)
+
+ pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values
+ pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values
+
+ movq mm3, mm0 ; q0
+ psubsb mm0, mm6 ; q0 - p0
+
+ paddsb mm2, mm0 ; 1 * (q0 - p0) + hvm(p1 - q1)
+ paddsb mm2, mm0 ; 2 * (q0 - p0) + hvm(p1 - q1)
+
+ paddsb mm2, mm0 ; 3 * (q0 - p0) + hvm(p1 - q1)
+ pand mm1, mm2 ; mask filter values we don't care about
+
+ movq mm2, mm1
+ paddsb mm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4
+
+ paddsb mm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3
+ pxor mm0, mm0 ;
+
+ pxor mm5, mm5
+ punpcklbw mm0, mm2 ;
+
+ punpckhbw mm5, mm2 ;
+ psraw mm0, 11 ;
+
+ psraw mm5, 11
+ packsswb mm0, mm5
+
+ movq mm2, mm0 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
+
+ pxor mm0, mm0 ; 0
+ movq mm5, mm1 ; abcdefgh
+
+ punpcklbw mm0, mm1 ; e0f0g0h0
+ psraw mm0, 11 ; sign extended shift right by 3
+
+ pxor mm1, mm1 ; 0
+ punpckhbw mm1, mm5 ; a0b0c0d0
+
+ psraw mm1, 11 ; sign extended shift right by 3
+ movq mm5, mm0 ; save results
+
+ packsswb mm0, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
+ paddsw mm5, [GLOBAL(ones)]
+
+ paddsw mm1, [GLOBAL(ones)]
+ psraw mm5, 1 ; partial shifted one more time for 2nd tap
+
+ psraw mm1, 1 ; partial shifted one more time for 2nd tap
+ packsswb mm5, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4
+
+ pandn mm4, mm5 ; high edge variance additive
+
+ paddsb mm6, mm2 ; p0+= p0 add
+ pxor mm6, [GLOBAL(t80)] ; unoffset
+
+ ; mm6=p0 ;
+ movq mm1, [rdx] ; p1
+ pxor mm1, [GLOBAL(t80)] ; reoffset
+
+ paddsb mm1, mm4 ; p1+= p1 add
+ pxor mm1, [GLOBAL(t80)] ; unoffset
+ ; mm6 = p0 mm1 = p1
+
+ psubsb mm3, mm0 ; q0-= q0 add
+ pxor mm3, [GLOBAL(t80)] ; unoffset
+
+ ; mm3 = q0
+ psubsb mm7, mm4 ; q1-= q1 add
+ pxor mm7, [GLOBAL(t80)] ; unoffset
+ ; mm7 = q1
+
+ ; tranpose and write back
+ ; mm1 = 72 62 52 42 32 22 12 02
+ ; mm6 = 73 63 53 43 33 23 13 03
+ ; mm3 = 74 64 54 44 34 24 14 04
+ ; mm7 = 75 65 55 45 35 25 15 05
+
+ movq mm2, mm1 ; 72 62 52 42 32 22 12 02
+ punpcklbw mm2, mm6 ; 33 32 23 22 13 12 03 02
+
+ movq mm4, mm3 ; 74 64 54 44 34 24 14 04
+ punpckhbw mm1, mm6 ; 73 72 63 62 53 52 43 42
+
+ punpcklbw mm4, mm7 ; 35 34 25 24 15 14 05 04
+ punpckhbw mm3, mm7 ; 75 74 65 64 55 54 45 44
+
+ movq mm6, mm2 ; 33 32 23 22 13 12 03 02
+ punpcklwd mm2, mm4 ; 15 14 13 12 05 04 03 02
+
+ punpckhwd mm6, mm4 ; 35 34 33 32 25 24 23 22
+ movq mm5, mm1 ; 73 72 63 62 53 52 43 42
+
+ punpcklwd mm1, mm3 ; 55 54 53 52 45 44 43 42
+ punpckhwd mm5, mm3 ; 75 74 73 72 65 64 63 62
+
+
+ ; mm2 = 15 14 13 12 05 04 03 02
+ ; mm6 = 35 34 33 32 25 24 23 22
+ ; mm5 = 55 54 53 52 45 44 43 42
+ ; mm1 = 75 74 73 72 65 64 63 62
+
+
+
+ movd [rsi+rax*4+2], mm2
+ psrlq mm2, 32
+
+ movd [rdi+rax*4+2], mm2
+ movd [rsi+rax*2+2], mm6
+
+ psrlq mm6, 32
+ movd [rsi+rax+2],mm6
+
+ movd [rsi+2], mm1
+ psrlq mm1, 32
+
+ movd [rdi+2], mm1
+ neg rax
+
+ movd [rdi+rax+2],mm5
+ psrlq mm5, 32
+
+ movd [rdi+rax*2+2], mm5
+
+ lea rsi, [rsi+rax*8]
+ dec rcx
+ jnz .next8_v
+
+ add rsp, 64
+ pop rsp
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_loop_filter_simple_horizontal_edge_mmx
+;(
+; unsigned char *src_ptr,
+; int src_pixel_step,
+; const char *blimit
+;)
+global sym(vp9_loop_filter_simple_horizontal_edge_mmx) PRIVATE
+sym(vp9_loop_filter_simple_horizontal_edge_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 3
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
+
+ mov rcx, 2 ; count
+.nexts8_h:
+ mov rdx, arg(2) ;blimit ; get blimit
+ movq mm3, [rdx] ;
+
+ mov rdi, rsi ; rdi points to row +1 for indirect addressing
+ add rdi, rax
+ neg rax
+
+ ; calculate mask
+ movq mm1, [rsi+2*rax] ; p1
+ movq mm0, [rdi] ; q1
+ movq mm2, mm1
+ movq mm7, mm0
+ movq mm4, mm0
+ psubusb mm0, mm1 ; q1-=p1
+ psubusb mm1, mm4 ; p1-=q1
+ por mm1, mm0 ; abs(p1-q1)
+ pand mm1, [GLOBAL(tfe)] ; set lsb of each byte to zero
+ psrlw mm1, 1 ; abs(p1-q1)/2
+
+ movq mm5, [rsi+rax] ; p0
+ movq mm4, [rsi] ; q0
+ movq mm0, mm4 ; q0
+ movq mm6, mm5 ; p0
+ psubusb mm5, mm4 ; p0-=q0
+ psubusb mm4, mm6 ; q0-=p0
+ por mm5, mm4 ; abs(p0 - q0)
+ paddusb mm5, mm5 ; abs(p0-q0)*2
+ paddusb mm5, mm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2
+
+ psubusb mm5, mm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
+ pxor mm3, mm3
+ pcmpeqb mm5, mm3
+
+ ; start work on filters
+ pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
+ pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
+ psubsb mm2, mm7 ; p1 - q1
+
+ pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values
+ pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values
+ movq mm3, mm0 ; q0
+ psubsb mm0, mm6 ; q0 - p0
+ paddsb mm2, mm0 ; p1 - q1 + 1 * (q0 - p0)
+ paddsb mm2, mm0 ; p1 - q1 + 2 * (q0 - p0)
+ paddsb mm2, mm0 ; p1 - q1 + 3 * (q0 - p0)
+ pand mm5, mm2 ; mask filter values we don't care about
+
+ ; do + 4 side
+ paddsb mm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4
+
+ movq mm0, mm5 ; get a copy of filters
+ psllw mm0, 8 ; shift left 8
+ psraw mm0, 3 ; arithmetic shift right 11
+ psrlw mm0, 8
+ movq mm1, mm5 ; get a copy of filters
+ psraw mm1, 11 ; arithmetic shift right 11
+ psllw mm1, 8 ; shift left 8 to put it back
+
+ por mm0, mm1 ; put the two together to get result
+
+ psubsb mm3, mm0 ; q0-= q0 add
+ pxor mm3, [GLOBAL(t80)] ; unoffset
+ movq [rsi], mm3 ; write back
+
+
+ ; now do +3 side
+ psubsb mm5, [GLOBAL(t1s)] ; +3 instead of +4
+
+ movq mm0, mm5 ; get a copy of filters
+ psllw mm0, 8 ; shift left 8
+ psraw mm0, 3 ; arithmetic shift right 11
+ psrlw mm0, 8
+ psraw mm5, 11 ; arithmetic shift right 11
+ psllw mm5, 8 ; shift left 8 to put it back
+ por mm0, mm5 ; put the two together to get result
+
+
+ paddsb mm6, mm0 ; p0+= p0 add
+ pxor mm6, [GLOBAL(t80)] ; unoffset
+ movq [rsi+rax], mm6 ; write back
+
+ add rsi,8
+ neg rax
+ dec rcx
+ jnz .nexts8_h
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_loop_filter_simple_vertical_edge_mmx
+;(
+; unsigned char *src_ptr,
+; int src_pixel_step,
+; const char *blimit
+;)
+global sym(vp9_loop_filter_simple_vertical_edge_mmx) PRIVATE
+sym(vp9_loop_filter_simple_vertical_edge_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 3
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 32 ; reserve 32 bytes
+ %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8];
+ %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8];
+
+ mov rsi, arg(0) ;src_ptr
+ movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
+
+ lea rsi, [rsi + rax*4- 2]; ;
+ mov rcx, 2 ; count
+.nexts8_v:
+
+ lea rdi, [rsi + rax];
+ movd mm0, [rdi + rax * 2] ; xx xx xx xx 73 72 71 70
+
+ movd mm6, [rsi + rax * 2] ; xx xx xx xx 63 62 61 60
+ punpcklbw mm6, mm0 ; 73 63 72 62 71 61 70 60
+
+ movd mm0, [rsi + rax] ; xx xx xx xx 53 52 51 50
+ movd mm4, [rsi] ; xx xx xx xx 43 42 41 40
+
+ punpcklbw mm4, mm0 ; 53 43 52 42 51 41 50 40
+ movq mm5, mm4 ; 53 43 52 42 51 41 50 40
+
+ punpcklwd mm4, mm6 ; 71 61 51 41 70 60 50 40
+ punpckhwd mm5, mm6 ; 73 63 53 43 72 62 52 42
+
+ neg rax
+
+ movd mm7, [rsi + rax] ; xx xx xx xx 33 32 31 30
+ movd mm6, [rsi + rax * 2] ; xx xx xx xx 23 22 21 20
+
+ punpcklbw mm6, mm7 ; 33 23 32 22 31 21 30 20
+ movd mm1, [rdi + rax * 4] ; xx xx xx xx 13 12 11 10
+
+ movd mm0, [rsi + rax * 4] ; xx xx xx xx 03 02 01 00
+ punpcklbw mm0, mm1 ; 13 03 12 02 11 01 10 00
+
+ movq mm2, mm0 ; 13 03 12 02 11 01 10 00
+ punpcklwd mm0, mm6 ; 31 21 11 01 30 20 10 00
+
+ punpckhwd mm2, mm6 ; 33 23 13 03 32 22 12 02
+ movq mm1, mm0 ; 13 03 12 02 11 01 10 00
+
+ punpckldq mm0, mm4 ; 70 60 50 40 30 20 10 00 = p1
+ movq mm3, mm2 ; 33 23 13 03 32 22 12 02
+
+ punpckhdq mm1, mm4 ; 71 61 51 41 31 21 11 01 = p0
+ punpckldq mm2, mm5 ; 72 62 52 42 32 22 12 02 = q0
+
+ punpckhdq mm3, mm5 ; 73 63 53 43 33 23 13 03 = q1
+
+
+ ; calculate mask
+ movq mm6, mm0 ; p1
+ movq mm7, mm3 ; q1
+ psubusb mm7, mm6 ; q1-=p1
+ psubusb mm6, mm3 ; p1-=q1
+ por mm6, mm7 ; abs(p1-q1)
+ pand mm6, [GLOBAL(tfe)] ; set lsb of each byte to zero
+ psrlw mm6, 1 ; abs(p1-q1)/2
+
+ movq mm5, mm1 ; p0
+ movq mm4, mm2 ; q0
+
+ psubusb mm5, mm2 ; p0-=q0
+ psubusb mm4, mm1 ; q0-=p0
+
+ por mm5, mm4 ; abs(p0 - q0)
+ paddusb mm5, mm5 ; abs(p0-q0)*2
+ paddusb mm5, mm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2
+
+ mov rdx, arg(2) ;blimit ; get blimit
+ movq mm7, [rdx]
+
+ psubusb mm5, mm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
+ pxor mm7, mm7
+ pcmpeqb mm5, mm7 ; mm5 = mask
+
+ ; start work on filters
+ movq t0, mm0
+ movq t1, mm3
+
+ pxor mm0, [GLOBAL(t80)] ; p1 offset to convert to signed values
+ pxor mm3, [GLOBAL(t80)] ; q1 offset to convert to signed values
+
+ psubsb mm0, mm3 ; p1 - q1
+ movq mm6, mm1 ; p0
+
+ movq mm7, mm2 ; q0
+ pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values
+
+ pxor mm7, [GLOBAL(t80)] ; offset to convert to signed values
+ movq mm3, mm7 ; offseted ; q0
+
+ psubsb mm7, mm6 ; q0 - p0
+ paddsb mm0, mm7 ; p1 - q1 + 1 * (q0 - p0)
+
+ paddsb mm0, mm7 ; p1 - q1 + 2 * (q0 - p0)
+ paddsb mm0, mm7 ; p1 - q1 + 3 * (q0 - p0)
+
+ pand mm5, mm0 ; mask filter values we don't care about
+
+ paddsb mm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4
+
+ movq mm0, mm5 ; get a copy of filters
+ psllw mm0, 8 ; shift left 8
+ psraw mm0, 3 ; arithmetic shift right 11
+ psrlw mm0, 8
+
+ movq mm7, mm5 ; get a copy of filters
+ psraw mm7, 11 ; arithmetic shift right 11
+ psllw mm7, 8 ; shift left 8 to put it back
+
+ por mm0, mm7 ; put the two together to get result
+
+ psubsb mm3, mm0 ; q0-= q0sz add
+ pxor mm3, [GLOBAL(t80)] ; unoffset
+
+ ; now do +3 side
+ psubsb mm5, [GLOBAL(t1s)] ; +3 instead of +4
+
+ movq mm0, mm5 ; get a copy of filters
+ psllw mm0, 8 ; shift left 8
+ psraw mm0, 3 ; arithmetic shift right 11
+ psrlw mm0, 8
+
+ psraw mm5, 11 ; arithmetic shift right 11
+ psllw mm5, 8 ; shift left 8 to put it back
+ por mm0, mm5 ; put the two together to get result
+
+ paddsb mm6, mm0 ; p0+= p0 add
+ pxor mm6, [GLOBAL(t80)] ; unoffset
+
+
+ movq mm0, t0
+ movq mm4, t1
+
+ ; mm0 = 70 60 50 40 30 20 10 00
+ ; mm6 = 71 61 51 41 31 21 11 01
+ ; mm3 = 72 62 52 42 32 22 12 02
+ ; mm4 = 73 63 53 43 33 23 13 03
+ ; transpose back to write out
+
+ movq mm1, mm0 ;
+ punpcklbw mm0, mm6 ; 31 30 21 20 11 10 01 00
+
+ punpckhbw mm1, mm6 ; 71 70 61 60 51 50 41 40
+ movq mm2, mm3 ;
+
+ punpcklbw mm2, mm4 ; 33 32 23 22 13 12 03 02
+ movq mm5, mm1 ; 71 70 61 60 51 50 41 40
+
+ punpckhbw mm3, mm4 ; 73 72 63 62 53 52 43 42
+ movq mm6, mm0 ; 31 30 21 20 11 10 01 00
+
+ punpcklwd mm0, mm2 ; 13 12 11 10 03 02 01 00
+ punpckhwd mm6, mm2 ; 33 32 31 30 23 22 21 20
+
+ movd [rsi+rax*4], mm0 ; write 03 02 01 00
+ punpcklwd mm1, mm3 ; 53 52 51 50 43 42 41 40
+
+ psrlq mm0, 32 ; xx xx xx xx 13 12 11 10
+ punpckhwd mm5, mm3 ; 73 72 71 70 63 62 61 60
+
+ movd [rdi+rax*4], mm0 ; write 13 12 11 10
+ movd [rsi+rax*2], mm6 ; write 23 22 21 20
+
+ psrlq mm6, 32 ; 33 32 31 30
+ movd [rsi], mm1 ; write 43 42 41 40
+
+ movd [rsi + rax], mm6 ; write 33 32 31 30
+ neg rax
+
+ movd [rsi + rax*2], mm5 ; write 63 62 61 60
+ psrlq mm1, 32 ; 53 52 51 50
+
+ movd [rdi], mm1 ; write out 53 52 51 50
+ psrlq mm5, 32 ; 73 72 71 70
+
+ movd [rdi + rax*2], mm5 ; write 73 72 71 70
+
+ lea rsi, [rsi+rax*8] ; next 8
+
+ dec rcx
+ jnz .nexts8_v
+
+ add rsp, 32
+ pop rsp
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+
+;void fast_loop_filter_vertical_edges_mmx(unsigned char *y_ptr,
+; int y_stride,
+; loop_filter_info *lfi)
+;{
+;
+;
+; vp9_loop_filter_simple_vertical_edge_mmx(y_ptr+4, y_stride, lfi->flim,lfi->lim,lfi->thr,2);
+; vp9_loop_filter_simple_vertical_edge_mmx(y_ptr+8, y_stride, lfi->flim,lfi->lim,lfi->thr,2);
+; vp9_loop_filter_simple_vertical_edge_mmx(y_ptr+12, y_stride, lfi->flim,lfi->lim,lfi->thr,2);
+;}
+
+SECTION_RODATA
+align 16
+tfe:
+ times 8 db 0xfe
+align 16
+t80:
+ times 8 db 0x80
+align 16
+t1s:
+ times 8 db 0x01
+align 16
+t3:
+ times 8 db 0x03
+align 16
+t4:
+ times 8 db 0x04
+align 16
+ones:
+ times 4 dw 0x0001
+align 16
+s27:
+ times 4 dw 0x1b00
+align 16
+s18:
+ times 4 dw 0x1200
+align 16
+s9:
+ times 4 dw 0x0900
+align 16
+s63:
+ times 4 dw 0x003f
diff --git a/vp9/common/x86/vp9_loopfilter_sse2.asm b/vp9/common/x86/vp9_loopfilter_sse2.asm
new file mode 100644
index 0000000..ae4c60f
--- /dev/null
+++ b/vp9/common/x86/vp9_loopfilter_sse2.asm
@@ -0,0 +1,1238 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+; Use of pmaxub instead of psubusb to compute filter mask was seen
+; in ffvp8
+
+%macro LFH_FILTER_AND_HEV_MASK 1
+%if %1
+ movdqa xmm2, [rdi+2*rax] ; q3
+ movdqa xmm1, [rsi+2*rax] ; q2
+ movdqa xmm4, [rsi+rax] ; q1
+ movdqa xmm5, [rsi] ; q0
+ neg rax ; negate pitch to deal with above border
+%else
+ movlps xmm2, [rsi + rcx*2] ; q3
+ movlps xmm1, [rsi + rcx] ; q2
+ movlps xmm4, [rsi] ; q1
+ movlps xmm5, [rsi + rax] ; q0
+
+ movhps xmm2, [rdi + rcx*2]
+ movhps xmm1, [rdi + rcx]
+ movhps xmm4, [rdi]
+ movhps xmm5, [rdi + rax]
+
+ lea rsi, [rsi + rax*4]
+ lea rdi, [rdi + rax*4]
+
+ movdqa XMMWORD PTR [rsp], xmm1 ; store q2
+ movdqa XMMWORD PTR [rsp + 16], xmm4 ; store q1
+%endif
+
+ movdqa xmm6, xmm1 ; q2
+ movdqa xmm3, xmm4 ; q1
+
+ psubusb xmm1, xmm2 ; q2-=q3
+ psubusb xmm2, xmm6 ; q3-=q2
+
+ psubusb xmm4, xmm6 ; q1-=q2
+ psubusb xmm6, xmm3 ; q2-=q1
+
+ por xmm4, xmm6 ; abs(q2-q1)
+ por xmm1, xmm2 ; abs(q3-q2)
+
+ movdqa xmm0, xmm5 ; q0
+ pmaxub xmm1, xmm4
+
+ psubusb xmm5, xmm3 ; q0-=q1
+ psubusb xmm3, xmm0 ; q1-=q0
+
+ por xmm5, xmm3 ; abs(q0-q1)
+ movdqa t0, xmm5 ; save to t0
+
+ pmaxub xmm1, xmm5
+
+%if %1
+ movdqa xmm2, [rsi+4*rax] ; p3
+ movdqa xmm4, [rdi+4*rax] ; p2
+ movdqa xmm6, [rsi+2*rax] ; p1
+%else
+ movlps xmm2, [rsi + rax] ; p3
+ movlps xmm4, [rsi] ; p2
+ movlps xmm6, [rsi + rcx] ; p1
+
+ movhps xmm2, [rdi + rax]
+ movhps xmm4, [rdi]
+ movhps xmm6, [rdi + rcx]
+
+ movdqa XMMWORD PTR [rsp + 32], xmm4 ; store p2
+ movdqa XMMWORD PTR [rsp + 48], xmm6 ; store p1
+%endif
+
+ movdqa xmm5, xmm4 ; p2
+ movdqa xmm3, xmm6 ; p1
+
+ psubusb xmm4, xmm2 ; p2-=p3
+ psubusb xmm2, xmm5 ; p3-=p2
+
+ psubusb xmm3, xmm5 ; p1-=p2
+ pmaxub xmm1, xmm4 ; abs(p3 - p2)
+
+ psubusb xmm5, xmm6 ; p2-=p1
+ pmaxub xmm1, xmm2 ; abs(p3 - p2)
+
+ pmaxub xmm1, xmm5 ; abs(p2 - p1)
+ movdqa xmm2, xmm6 ; p1
+
+ pmaxub xmm1, xmm3 ; abs(p2 - p1)
+%if %1
+ movdqa xmm4, [rsi+rax] ; p0
+ movdqa xmm3, [rdi] ; q1
+%else
+ movlps xmm4, [rsi + rcx*2] ; p0
+ movhps xmm4, [rdi + rcx*2]
+ movdqa xmm3, q1 ; q1
+%endif
+
+ movdqa xmm5, xmm4 ; p0
+ psubusb xmm4, xmm6 ; p0-=p1
+
+ psubusb xmm6, xmm5 ; p1-=p0
+
+ por xmm6, xmm4 ; abs(p1 - p0)
+ mov rdx, arg(2) ; get blimit
+
+ movdqa t1, xmm6 ; save to t1
+
+ movdqa xmm4, xmm3 ; q1
+ pmaxub xmm1, xmm6
+
+ psubusb xmm3, xmm2 ; q1-=p1
+ psubusb xmm2, xmm4 ; p1-=q1
+
+ psubusb xmm1, xmm7
+ por xmm2, xmm3 ; abs(p1-q1)
+
+ movdqa xmm7, XMMWORD PTR [rdx] ; blimit
+
+ movdqa xmm3, xmm0 ; q0
+ pand xmm2, [GLOBAL(tfe)] ; set lsb of each byte to zero
+
+ mov rdx, arg(4) ; hev get thresh
+
+ movdqa xmm6, xmm5 ; p0
+ psrlw xmm2, 1 ; abs(p1-q1)/2
+
+ psubusb xmm5, xmm3 ; p0-=q0
+
+ psubusb xmm3, xmm6 ; q0-=p0
+ por xmm5, xmm3 ; abs(p0 - q0)
+
+ paddusb xmm5, xmm5 ; abs(p0-q0)*2
+
+ movdqa xmm4, t0 ; hev get abs (q1 - q0)
+
+ movdqa xmm3, t1 ; get abs (p1 - p0)
+
+ paddusb xmm5, xmm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
+
+ movdqa xmm2, XMMWORD PTR [rdx] ; hev
+
+ psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
+ psubusb xmm4, xmm2 ; hev
+
+ psubusb xmm3, xmm2 ; hev
+ por xmm1, xmm5
+
+ pxor xmm7, xmm7
+ paddb xmm4, xmm3 ; hev abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
+
+ pcmpeqb xmm4, xmm5 ; hev
+ pcmpeqb xmm3, xmm3 ; hev
+
+ pcmpeqb xmm1, xmm7 ; mask xmm1
+ pxor xmm4, xmm3 ; hev
+%endmacro
+
+%macro B_FILTER 1
+%if %1 == 0
+ movdqa xmm2, p1 ; p1
+ movdqa xmm7, q1 ; q1
+%elif %1 == 1
+ movdqa xmm2, [rsi+2*rax] ; p1
+ movdqa xmm7, [rdi] ; q1
+%elif %1 == 2
+ lea rdx, srct
+
+ movdqa xmm2, [rdx] ; p1
+ movdqa xmm7, [rdx+48] ; q1
+ movdqa xmm6, [rdx+16] ; p0
+ movdqa xmm0, [rdx+32] ; q0
+%endif
+
+ pxor xmm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
+ pxor xmm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
+
+ psubsb xmm2, xmm7 ; p1 - q1
+ pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values
+
+ pand xmm2, xmm4 ; high var mask (hvm)(p1 - q1)
+ pxor xmm0, [GLOBAL(t80)] ; offset to convert to signed values
+
+ movdqa xmm3, xmm0 ; q0
+ psubsb xmm0, xmm6 ; q0 - p0
+
+ paddsb xmm2, xmm0 ; 1 * (q0 - p0) + hvm(p1 - q1)
+
+ paddsb xmm2, xmm0 ; 2 * (q0 - p0) + hvm(p1 - q1)
+
+ paddsb xmm2, xmm0 ; 3 * (q0 - p0) + hvm(p1 - q1)
+
+ pand xmm1, xmm2 ; mask filter values we don't care about
+
+ movdqa xmm2, xmm1
+
+ paddsb xmm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4
+ paddsb xmm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3
+
+ punpckhbw xmm5, xmm2 ; axbxcxdx
+ punpcklbw xmm2, xmm2 ; exfxgxhx
+
+ punpcklbw xmm0, xmm1 ; exfxgxhx
+ psraw xmm5, 11 ; sign extended shift right by 3
+
+ punpckhbw xmm1, xmm1 ; axbxcxdx
+ psraw xmm2, 11 ; sign extended shift right by 3
+
+ packsswb xmm2, xmm5 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
+ psraw xmm0, 11 ; sign extended shift right by 3
+
+ psraw xmm1, 11 ; sign extended shift right by 3
+ movdqa xmm5, xmm0 ; save results
+
+ packsswb xmm0, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
+ paddsw xmm5, [GLOBAL(ones)]
+
+ paddsw xmm1, [GLOBAL(ones)]
+ psraw xmm5, 1 ; partial shifted one more time for 2nd tap
+
+ psraw xmm1, 1 ; partial shifted one more time for 2nd tap
+
+ paddsb xmm6, xmm2 ; p0+= p0 add
+ packsswb xmm5, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4
+
+%if %1 == 0
+ movdqa xmm1, p1 ; p1
+%elif %1 == 1
+ movdqa xmm1, [rsi+2*rax] ; p1
+%elif %1 == 2
+ movdqa xmm1, [rdx] ; p1
+%endif
+ pandn xmm4, xmm5 ; high edge variance additive
+ pxor xmm6, [GLOBAL(t80)] ; unoffset
+
+ pxor xmm1, [GLOBAL(t80)] ; reoffset
+ psubsb xmm3, xmm0 ; q0-= q0 add
+
+ paddsb xmm1, xmm4 ; p1+= p1 add
+ pxor xmm3, [GLOBAL(t80)] ; unoffset
+
+ pxor xmm1, [GLOBAL(t80)] ; unoffset
+ psubsb xmm7, xmm4 ; q1-= q1 add
+
+ pxor xmm7, [GLOBAL(t80)] ; unoffset
+%if %1 == 0
+ lea rsi, [rsi + rcx*2]
+ lea rdi, [rdi + rcx*2]
+ movq MMWORD PTR [rsi], xmm6 ; p0
+ movhps MMWORD PTR [rdi], xmm6
+ movq MMWORD PTR [rsi + rax], xmm1 ; p1
+ movhps MMWORD PTR [rdi + rax], xmm1
+ movq MMWORD PTR [rsi + rcx], xmm3 ; q0
+ movhps MMWORD PTR [rdi + rcx], xmm3
+ movq MMWORD PTR [rsi + rcx*2],xmm7 ; q1
+ movhps MMWORD PTR [rdi + rcx*2],xmm7
+%elif %1 == 1
+ movdqa [rsi+rax], xmm6 ; write back
+ movdqa [rsi+2*rax], xmm1 ; write back
+ movdqa [rsi], xmm3 ; write back
+ movdqa [rdi], xmm7 ; write back
+%endif
+
+%endmacro
+
+
+;void vp9_loop_filter_horizontal_edge_sse2
+;(
+; unsigned char *src_ptr,
+; int src_pixel_step,
+; const char *blimit,
+; const char *limit,
+; const char *thresh,
+; int count
+;)
+global sym(vp9_loop_filter_horizontal_edge_sse2) PRIVATE
+sym(vp9_loop_filter_horizontal_edge_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 32 ; reserve 32 bytes
+ %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16];
+ %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16];
+
+ mov rsi, arg(0) ;src_ptr
+ movsxd rax, dword ptr arg(1) ;src_pixel_step
+
+ mov rdx, arg(3) ;limit
+ movdqa xmm7, XMMWORD PTR [rdx]
+
+ lea rdi, [rsi+rax] ; rdi points to row +1 for indirect addressing
+
+ ; calculate breakout conditions and high edge variance
+ LFH_FILTER_AND_HEV_MASK 1
+ ; filter and write back the result
+ B_FILTER 1
+
+ add rsp, 32
+ pop rsp
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_loop_filter_horizontal_edge_uv_sse2
+;(
+; unsigned char *src_ptr,
+; int src_pixel_step,
+; const char *blimit,
+; const char *limit,
+; const char *thresh,
+; int count
+;)
+global sym(vp9_loop_filter_horizontal_edge_uv_sse2) PRIVATE
+sym(vp9_loop_filter_horizontal_edge_uv_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 96 ; reserve 96 bytes
+ %define q2 [rsp + 0] ;__declspec(align(16)) char q2[16];
+ %define q1 [rsp + 16] ;__declspec(align(16)) char q1[16];
+ %define p2 [rsp + 32] ;__declspec(align(16)) char p2[16];
+ %define p1 [rsp + 48] ;__declspec(align(16)) char p1[16];
+ %define t0 [rsp + 64] ;__declspec(align(16)) char t0[16];
+ %define t1 [rsp + 80] ;__declspec(align(16)) char t1[16];
+
+ mov rsi, arg(0) ; u
+ mov rdi, arg(5) ; v
+ movsxd rax, dword ptr arg(1) ; src_pixel_step
+ mov rcx, rax
+ neg rax ; negate pitch to deal with above border
+
+ mov rdx, arg(3) ;limit
+ movdqa xmm7, XMMWORD PTR [rdx]
+
+ lea rsi, [rsi + rcx]
+ lea rdi, [rdi + rcx]
+
+ ; calculate breakout conditions and high edge variance
+ LFH_FILTER_AND_HEV_MASK 0
+ ; filter and write back the result
+ B_FILTER 0
+
+ add rsp, 96
+ pop rsp
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+%macro TRANSPOSE_16X8 2
+ movq xmm4, QWORD PTR [rsi] ; xx xx xx xx xx xx xx xx 07 06 05 04 03 02 01 00
+ movq xmm1, QWORD PTR [rdi] ; xx xx xx xx xx xx xx xx 17 16 15 14 13 12 11 10
+ movq xmm0, QWORD PTR [rsi+2*rax] ; xx xx xx xx xx xx xx xx 27 26 25 24 23 22 21 20
+ movq xmm7, QWORD PTR [rdi+2*rax] ; xx xx xx xx xx xx xx xx 37 36 35 34 33 32 31 30
+ movq xmm5, QWORD PTR [rsi+4*rax] ; xx xx xx xx xx xx xx xx 47 46 45 44 43 42 41 40
+ movq xmm2, QWORD PTR [rdi+4*rax] ; xx xx xx xx xx xx xx xx 57 56 55 54 53 52 51 50
+
+ punpcklbw xmm4, xmm1 ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00
+
+ movq xmm1, QWORD PTR [rdi+2*rcx] ; xx xx xx xx xx xx xx xx 77 76 75 74 73 72 71 70
+
+ movdqa xmm3, xmm4 ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00
+ punpcklbw xmm0, xmm7 ; 37 27 36 36 35 25 34 24 33 23 32 22 31 21 30 20
+
+ movq xmm7, QWORD PTR [rsi+2*rcx] ; xx xx xx xx xx xx xx xx 67 66 65 64 63 62 61 60
+
+ punpcklbw xmm5, xmm2 ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
+%if %1
+ lea rsi, [rsi+rax*8]
+%else
+ mov rsi, arg(5) ; v_ptr
+%endif
+
+ movdqa xmm6, xmm5 ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
+ punpcklbw xmm7, xmm1 ; 77 67 76 66 75 65 74 64 73 63 72 62 71 61 70 60
+
+ punpcklwd xmm5, xmm7 ; 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40
+
+ punpckhwd xmm6, xmm7 ; 77 67 57 47 76 66 56 46 75 65 55 45 74 64 54 44
+%if %1
+ lea rdi, [rdi+rax*8]
+%else
+ lea rsi, [rsi - 4]
+%endif
+
+ punpcklwd xmm3, xmm0 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
+%if %1
+ lea rdx, srct
+%else
+ lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing
+%endif
+
+ movdqa xmm2, xmm3 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
+ punpckhwd xmm4, xmm0 ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04
+
+ movdqa xmm7, xmm4 ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04
+ punpckhdq xmm3, xmm5 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
+
+ punpckhdq xmm7, xmm6 ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06
+
+ punpckldq xmm4, xmm6 ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
+
+ punpckldq xmm2, xmm5 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
+
+ movdqa t0, xmm2 ; save to free XMM2
+ movq xmm2, QWORD PTR [rsi] ; xx xx xx xx xx xx xx xx 87 86 85 84 83 82 81 80
+ movq xmm6, QWORD PTR [rdi] ; xx xx xx xx xx xx xx xx 97 96 95 94 93 92 91 90
+ movq xmm0, QWORD PTR [rsi+2*rax] ; xx xx xx xx xx xx xx xx a7 a6 a5 a4 a3 a2 a1 a0
+ movq xmm5, QWORD PTR [rdi+2*rax] ; xx xx xx xx xx xx xx xx b7 b6 b5 b4 b3 b2 b1 b0
+ movq xmm1, QWORD PTR [rsi+4*rax] ; xx xx xx xx xx xx xx xx c7 c6 c5 c4 c3 c2 c1 c0
+
+ punpcklbw xmm2, xmm6 ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80
+
+ movq xmm6, QWORD PTR [rdi+4*rax] ; xx xx xx xx xx xx xx xx d7 d6 d5 d4 d3 d2 d1 d0
+
+ punpcklbw xmm0, xmm5 ; b7 a7 b6 a6 b5 a5 b4 a4 b3 a3 b2 a2 b1 a1 b0 a0
+
+ movq xmm5, QWORD PTR [rsi+2*rcx] ; xx xx xx xx xx xx xx xx e7 e6 e5 e4 e3 e2 e1 e0
+
+ punpcklbw xmm1, xmm6 ; d7 c7 d6 c6 d5 c5 d4 c4 d3 c3 d2 c2 d1 e1 d0 c0
+
+ movq xmm6, QWORD PTR [rdi+2*rcx] ; xx xx xx xx xx xx xx xx f7 f6 f5 f4 f3 f2 f1 f0
+
+ punpcklbw xmm5, xmm6 ; f7 e7 f6 e6 f5 e5 f4 e4 f3 e3 f2 e2 f1 e1 f0 e0
+
+ movdqa xmm6, xmm1 ;
+ punpckhwd xmm6, xmm5 ; f7 e7 d7 c7 f6 e6 d6 c6 f5 e5 d5 c5 f4 e4 d4 c4
+
+ punpcklwd xmm1, xmm5 ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0
+ movdqa xmm5, xmm2 ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80
+
+ punpcklwd xmm5, xmm0 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80
+
+ punpckhwd xmm2, xmm0 ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84
+
+ movdqa xmm0, xmm5
+ punpckldq xmm0, xmm1 ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
+
+ punpckhdq xmm5, xmm1 ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
+ movdqa xmm1, xmm2 ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84
+
+ punpckldq xmm1, xmm6 ; f5 e5 d5 c5 b5 a5 95 85 f4 e4 d4 c4 b4 a4 94 84
+
+ punpckhdq xmm2, xmm6 ; f7 e7 d7 c7 b7 a7 97 87 f6 e6 d6 c6 b6 a6 96 86
+ movdqa xmm6, xmm7 ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06
+
+ punpcklqdq xmm6, xmm2 ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06
+
+ punpckhqdq xmm7, xmm2 ; f7 e7 d7 c7 b7 a7 97 87 77 67 57 47 37 27 17 07
+%if %2
+ movdqa xmm2, xmm3 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
+ punpcklqdq xmm2, xmm5 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+
+ punpckhqdq xmm3, xmm5 ; f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
+
+ movdqa [rdx], xmm2 ; save 2
+
+ movdqa xmm5, xmm4 ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
+ punpcklqdq xmm4, xmm1 ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
+
+ movdqa [rdx+16], xmm3 ; save 3
+
+ punpckhqdq xmm5, xmm1 ; f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05
+
+ movdqa [rdx+32], xmm4 ; save 4
+ movdqa [rdx+48], xmm5 ; save 5
+ movdqa xmm1, t0 ; get
+
+ movdqa xmm2, xmm1 ;
+ punpckhqdq xmm1, xmm0 ; f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
+
+ punpcklqdq xmm2, xmm0 ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
+%else
+ movdqa [rdx+112], xmm7 ; save 7
+
+ movdqa [rdx+96], xmm6 ; save 6
+
+ movdqa xmm2, xmm3 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
+ punpckhqdq xmm3, xmm5 ; f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
+
+ punpcklqdq xmm2, xmm5 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+
+ movdqa [rdx+32], xmm2 ; save 2
+
+ movdqa xmm5, xmm4 ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
+ punpcklqdq xmm4, xmm1 ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
+
+ movdqa [rdx+48], xmm3 ; save 3
+
+ punpckhqdq xmm5, xmm1 ; f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05
+
+ movdqa [rdx+64], xmm4 ; save 4
+ movdqa [rdx+80], xmm5 ; save 5
+ movdqa xmm1, t0 ; get
+
+ movdqa xmm2, xmm1
+ punpckhqdq xmm1, xmm0 ; f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
+
+ punpcklqdq xmm2, xmm0 ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
+
+ movdqa [rdx+16], xmm1
+
+ movdqa [rdx], xmm2
+%endif
+%endmacro
+
+%macro LFV_FILTER_MASK_HEV_MASK 1
+ movdqa xmm0, xmm6 ; q2
+ psubusb xmm0, xmm7 ; q2-q3
+
+ psubusb xmm7, xmm6 ; q3-q2
+ movdqa xmm4, xmm5 ; q1
+
+ por xmm7, xmm0 ; abs (q3-q2)
+ psubusb xmm4, xmm6 ; q1-q2
+
+ movdqa xmm0, xmm1
+ psubusb xmm6, xmm5 ; q2-q1
+
+ por xmm6, xmm4 ; abs (q2-q1)
+ psubusb xmm0, xmm2 ; p2 - p3;
+
+ psubusb xmm2, xmm1 ; p3 - p2;
+ por xmm0, xmm2 ; abs(p2-p3)
+%if %1
+ movdqa xmm2, [rdx] ; p1
+%else
+ movdqa xmm2, [rdx+32] ; p1
+%endif
+ movdqa xmm5, xmm2 ; p1
+ pmaxub xmm0, xmm7
+
+ psubusb xmm5, xmm1 ; p1-p2
+ psubusb xmm1, xmm2 ; p2-p1
+
+ movdqa xmm7, xmm3 ; p0
+ psubusb xmm7, xmm2 ; p0-p1
+
+ por xmm1, xmm5 ; abs(p2-p1)
+ pmaxub xmm0, xmm6
+
+ pmaxub xmm0, xmm1
+ movdqa xmm1, xmm2 ; p1
+
+ psubusb xmm2, xmm3 ; p1-p0
+ lea rdx, srct
+
+ por xmm2, xmm7 ; abs(p1-p0)
+
+ movdqa t0, xmm2 ; save abs(p1-p0)
+
+ pmaxub xmm0, xmm2
+
+%if %1
+ movdqa xmm5, [rdx+32] ; q0
+ movdqa xmm7, [rdx+48] ; q1
+%else
+ movdqa xmm5, [rdx+64] ; q0
+ movdqa xmm7, [rdx+80] ; q1
+%endif
+ mov rdx, arg(3) ; limit
+
+ movdqa xmm6, xmm5 ; q0
+ movdqa xmm2, xmm7 ; q1
+
+ psubusb xmm5, xmm7 ; q0-q1
+ psubusb xmm7, xmm6 ; q1-q0
+
+ por xmm7, xmm5 ; abs(q1-q0)
+
+ movdqa t1, xmm7 ; save abs(q1-q0)
+
+ movdqa xmm4, XMMWORD PTR [rdx]; limit
+
+ pmaxub xmm0, xmm7
+ mov rdx, arg(2) ; blimit
+
+ psubusb xmm0, xmm4
+ movdqa xmm5, xmm2 ; q1
+
+ psubusb xmm5, xmm1 ; q1-=p1
+ psubusb xmm1, xmm2 ; p1-=q1
+
+ por xmm5, xmm1 ; abs(p1-q1)
+ movdqa xmm1, xmm3 ; p0
+
+ pand xmm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
+ psubusb xmm1, xmm6 ; p0-q0
+
+ psrlw xmm5, 1 ; abs(p1-q1)/2
+ psubusb xmm6, xmm3 ; q0-p0
+
+ movdqa xmm4, XMMWORD PTR [rdx]; blimit
+
+ mov rdx, arg(4) ; get thresh
+
+ por xmm1, xmm6 ; abs(q0-p0)
+
+ movdqa xmm6, t0 ; get abs (q1 - q0)
+
+ paddusb xmm1, xmm1 ; abs(q0-p0)*2
+
+ movdqa xmm3, t1 ; get abs (p1 - p0)
+
+ movdqa xmm7, XMMWORD PTR [rdx]
+
+ paddusb xmm1, xmm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
+ psubusb xmm6, xmm7 ; abs(q1 - q0) > thresh
+
+ psubusb xmm3, xmm7 ; abs(p1 - p0)> thresh
+
+ psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
+ por xmm6, xmm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
+
+ por xmm1, xmm0 ; mask
+ pcmpeqb xmm6, xmm0
+
+ pxor xmm0, xmm0
+ pcmpeqb xmm4, xmm4
+
+ pcmpeqb xmm1, xmm0
+ pxor xmm4, xmm6
+%endmacro
+
+%macro BV_TRANSPOSE 0
+ ; xmm1 = f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+ ; xmm6 = f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
+ ; xmm3 = f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
+ ; xmm7 = f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05
+ movdqa xmm2, xmm1 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+ punpcklbw xmm2, xmm6 ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
+
+ movdqa xmm4, xmm3 ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
+ punpckhbw xmm1, xmm6 ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
+
+ punpcklbw xmm4, xmm7 ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04
+
+ punpckhbw xmm3, xmm7 ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84
+
+ movdqa xmm6, xmm2 ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
+ punpcklwd xmm2, xmm4 ; 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02
+
+ punpckhwd xmm6, xmm4 ; 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42
+ movdqa xmm5, xmm1 ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
+
+ punpcklwd xmm1, xmm3 ; b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82
+
+ punpckhwd xmm5, xmm3 ; f5 f4 f3 f2 e5 e4 e3 e2 d5 d4 d3 d2 c5 c4 c3 c2
+ ; xmm2 = 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02
+ ; xmm6 = 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42
+ ; xmm1 = b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82
+ ; xmm5 = f5 f4 f3 f2 e5 e4 e3 e2 d5 d4 d3 d2 c5 c4 c3 c2
+%endmacro
+
+%macro BV_WRITEBACK 2
+ movd [rsi+2], %1
+ psrldq %1, 4
+
+ movd [rdi+2], %1
+ psrldq %1, 4
+
+ movd [rsi+2*rax+2], %1
+ psrldq %1, 4
+
+ movd [rdi+2*rax+2], %1
+
+ movd [rsi+4*rax+2], %2
+ psrldq %2, 4
+
+ movd [rdi+4*rax+2], %2
+ psrldq %2, 4
+
+ movd [rsi+2*rcx+2], %2
+ psrldq %2, 4
+
+ movd [rdi+2*rcx+2], %2
+%endmacro
+
+
+;void vp9_loop_filter_vertical_edge_sse2
+;(
+; unsigned char *src_ptr,
+; int src_pixel_step,
+; const char *blimit,
+; const char *limit,
+; const char *thresh,
+; int count
+;)
+global sym(vp9_loop_filter_vertical_edge_sse2) PRIVATE
+sym(vp9_loop_filter_vertical_edge_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 96 ; reserve 96 bytes
+ %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16];
+ %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16];
+ %define srct [rsp + 32] ;__declspec(align(16)) char srct[64];
+
+ mov rsi, arg(0) ; src_ptr
+ movsxd rax, dword ptr arg(1) ; src_pixel_step
+
+ lea rsi, [rsi - 4]
+ lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing
+ lea rcx, [rax*2+rax]
+
+ ;transpose 16x8 to 8x16, and store the 8-line result on stack.
+ TRANSPOSE_16X8 1, 1
+
+ ; calculate filter mask and high edge variance
+ LFV_FILTER_MASK_HEV_MASK 1
+
+ ; start work on filters
+ B_FILTER 2
+
+ ; tranpose and write back - only work on q1, q0, p0, p1
+ BV_TRANSPOSE
+ ; store 16-line result
+
+ lea rdx, [rax]
+ neg rdx
+
+ BV_WRITEBACK xmm1, xmm5
+
+ lea rsi, [rsi+rdx*8]
+ lea rdi, [rdi+rdx*8]
+ BV_WRITEBACK xmm2, xmm6
+
+ add rsp, 96
+ pop rsp
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_loop_filter_vertical_edge_uv_sse2
+;(
+; unsigned char *u,
+; int src_pixel_step,
+; const char *blimit,
+; const char *limit,
+; const char *thresh,
+; unsigned char *v
+;)
+global sym(vp9_loop_filter_vertical_edge_uv_sse2) PRIVATE
+sym(vp9_loop_filter_vertical_edge_uv_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 96 ; reserve 96 bytes
+ %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16];
+ %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16];
+ %define srct [rsp + 32] ;__declspec(align(16)) char srct[64];
+
+ mov rsi, arg(0) ; u_ptr
+ movsxd rax, dword ptr arg(1) ; src_pixel_step
+
+ lea rsi, [rsi - 4]
+ lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing
+ lea rcx, [rax+2*rax]
+
+ lea rdx, srct
+
+ ;transpose 16x8 to 8x16, and store the 8-line result on stack.
+ TRANSPOSE_16X8 0, 1
+
+ ; calculate filter mask and high edge variance
+ LFV_FILTER_MASK_HEV_MASK 1
+
+ ; start work on filters
+ B_FILTER 2
+
+ ; tranpose and write back - only work on q1, q0, p0, p1
+ BV_TRANSPOSE
+
+ lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing
+
+ ; store 16-line result
+ BV_WRITEBACK xmm1, xmm5
+
+ mov rsi, arg(0) ; u_ptr
+ lea rsi, [rsi - 4]
+ lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing
+ BV_WRITEBACK xmm2, xmm6
+
+ add rsp, 96
+ pop rsp
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp9_loop_filter_simple_horizontal_edge_sse2
+;(
+; unsigned char *src_ptr,
+; int src_pixel_step,
+; const char *blimit,
+;)
+global sym(vp9_loop_filter_simple_horizontal_edge_sse2) PRIVATE
+sym(vp9_loop_filter_simple_horizontal_edge_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 3
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
+ mov rdx, arg(2) ;blimit
+ movdqa xmm3, XMMWORD PTR [rdx]
+
+ mov rdi, rsi ; rdi points to row +1 for indirect addressing
+ add rdi, rax
+ neg rax
+
+ ; calculate mask
+ movdqa xmm1, [rsi+2*rax] ; p1
+ movdqa xmm0, [rdi] ; q1
+ movdqa xmm2, xmm1
+ movdqa xmm7, xmm0
+ movdqa xmm4, xmm0
+ psubusb xmm0, xmm1 ; q1-=p1
+ psubusb xmm1, xmm4 ; p1-=q1
+ por xmm1, xmm0 ; abs(p1-q1)
+ pand xmm1, [GLOBAL(tfe)] ; set lsb of each byte to zero
+ psrlw xmm1, 1 ; abs(p1-q1)/2
+
+ movdqa xmm5, [rsi+rax] ; p0
+ movdqa xmm4, [rsi] ; q0
+ movdqa xmm0, xmm4 ; q0
+ movdqa xmm6, xmm5 ; p0
+ psubusb xmm5, xmm4 ; p0-=q0
+ psubusb xmm4, xmm6 ; q0-=p0
+ por xmm5, xmm4 ; abs(p0 - q0)
+ paddusb xmm5, xmm5 ; abs(p0-q0)*2
+ paddusb xmm5, xmm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2
+
+ psubusb xmm5, xmm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
+ pxor xmm3, xmm3
+ pcmpeqb xmm5, xmm3
+
+ ; start work on filters
+ pxor xmm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
+ pxor xmm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
+ psubsb xmm2, xmm7 ; p1 - q1
+
+ pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values
+ pxor xmm0, [GLOBAL(t80)] ; offset to convert to signed values
+ movdqa xmm3, xmm0 ; q0
+ psubsb xmm0, xmm6 ; q0 - p0
+ paddsb xmm2, xmm0 ; p1 - q1 + 1 * (q0 - p0)
+ paddsb xmm2, xmm0 ; p1 - q1 + 2 * (q0 - p0)
+ paddsb xmm2, xmm0 ; p1 - q1 + 3 * (q0 - p0)
+ pand xmm5, xmm2 ; mask filter values we don't care about
+
+ ; do + 4 side
+ paddsb xmm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4
+
+ movdqa xmm0, xmm5 ; get a copy of filters
+ psllw xmm0, 8 ; shift left 8
+ psraw xmm0, 3 ; arithmetic shift right 11
+ psrlw xmm0, 8
+ movdqa xmm1, xmm5 ; get a copy of filters
+ psraw xmm1, 11 ; arithmetic shift right 11
+ psllw xmm1, 8 ; shift left 8 to put it back
+
+ por xmm0, xmm1 ; put the two together to get result
+
+ psubsb xmm3, xmm0 ; q0-= q0 add
+ pxor xmm3, [GLOBAL(t80)] ; unoffset
+ movdqa [rsi], xmm3 ; write back
+
+ ; now do +3 side
+ psubsb xmm5, [GLOBAL(t1s)] ; +3 instead of +4
+
+ movdqa xmm0, xmm5 ; get a copy of filters
+ psllw xmm0, 8 ; shift left 8
+ psraw xmm0, 3 ; arithmetic shift right 11
+ psrlw xmm0, 8
+ psraw xmm5, 11 ; arithmetic shift right 11
+ psllw xmm5, 8 ; shift left 8 to put it back
+ por xmm0, xmm5 ; put the two together to get result
+
+
+ paddsb xmm6, xmm0 ; p0+= p0 add
+ pxor xmm6, [GLOBAL(t80)] ; unoffset
+ movdqa [rsi+rax], xmm6 ; write back
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_loop_filter_simple_vertical_edge_sse2
+;(
+; unsigned char *src_ptr,
+; int src_pixel_step,
+; const char *blimit,
+;)
+global sym(vp9_loop_filter_simple_vertical_edge_sse2) PRIVATE
+sym(vp9_loop_filter_simple_vertical_edge_sse2):
+ push rbp ; save old base pointer value.
+ mov rbp, rsp ; set new base pointer value.
+ SHADOW_ARGS_TO_STACK 3
+ SAVE_XMM 7
+ GET_GOT rbx ; save callee-saved reg
+ push rsi
+ push rdi
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 32 ; reserve 32 bytes
+ %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16];
+ %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16];
+
+ mov rsi, arg(0) ;src_ptr
+ movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
+
+ lea rsi, [rsi - 2 ]
+ lea rdi, [rsi + rax]
+ lea rdx, [rsi + rax*4]
+ lea rcx, [rdx + rax]
+
+ movd xmm0, [rsi] ; (high 96 bits unused) 03 02 01 00
+ movd xmm1, [rdx] ; (high 96 bits unused) 43 42 41 40
+ movd xmm2, [rdi] ; 13 12 11 10
+ movd xmm3, [rcx] ; 53 52 51 50
+ punpckldq xmm0, xmm1 ; (high 64 bits unused) 43 42 41 40 03 02 01 00
+ punpckldq xmm2, xmm3 ; 53 52 51 50 13 12 11 10
+
+ movd xmm4, [rsi + rax*2] ; 23 22 21 20
+ movd xmm5, [rdx + rax*2] ; 63 62 61 60
+ movd xmm6, [rdi + rax*2] ; 33 32 31 30
+ movd xmm7, [rcx + rax*2] ; 73 72 71 70
+ punpckldq xmm4, xmm5 ; 63 62 61 60 23 22 21 20
+ punpckldq xmm6, xmm7 ; 73 72 71 70 33 32 31 30
+
+ punpcklbw xmm0, xmm2 ; 53 43 52 42 51 41 50 40 13 03 12 02 11 01 10 00
+ punpcklbw xmm4, xmm6 ; 73 63 72 62 71 61 70 60 33 23 32 22 31 21 30 20
+
+ movdqa xmm1, xmm0
+ punpcklwd xmm0, xmm4 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
+ punpckhwd xmm1, xmm4 ; 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40
+
+ movdqa xmm2, xmm0
+ punpckldq xmm0, xmm1 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
+ punpckhdq xmm2, xmm1 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
+
+ movdqa t0, xmm0 ; save to t0
+ movdqa t1, xmm2 ; save to t1
+
+ lea rsi, [rsi + rax*8]
+ lea rdi, [rsi + rax]
+ lea rdx, [rsi + rax*4]
+ lea rcx, [rdx + rax]
+
+ movd xmm4, [rsi] ; 83 82 81 80
+ movd xmm1, [rdx] ; c3 c2 c1 c0
+ movd xmm6, [rdi] ; 93 92 91 90
+ movd xmm3, [rcx] ; d3 d2 d1 d0
+ punpckldq xmm4, xmm1 ; c3 c2 c1 c0 83 82 81 80
+ punpckldq xmm6, xmm3 ; d3 d2 d1 d0 93 92 91 90
+
+ movd xmm0, [rsi + rax*2] ; a3 a2 a1 a0
+ movd xmm5, [rdx + rax*2] ; e3 e2 e1 e0
+ movd xmm2, [rdi + rax*2] ; b3 b2 b1 b0
+ movd xmm7, [rcx + rax*2] ; f3 f2 f1 f0
+ punpckldq xmm0, xmm5 ; e3 e2 e1 e0 a3 a2 a1 a0
+ punpckldq xmm2, xmm7 ; f3 f2 f1 f0 b3 b2 b1 b0
+
+ punpcklbw xmm4, xmm6 ; d3 c3 d2 c2 d1 c1 d0 c0 93 83 92 82 91 81 90 80
+ punpcklbw xmm0, xmm2 ; f3 e3 f2 e2 f1 e1 f0 e0 b3 a3 b2 a2 b1 a1 b0 a0
+
+ movdqa xmm1, xmm4
+ punpcklwd xmm4, xmm0 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80
+ punpckhwd xmm1, xmm0 ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0
+
+ movdqa xmm6, xmm4
+ punpckldq xmm4, xmm1 ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
+ punpckhdq xmm6, xmm1 ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
+
+ movdqa xmm0, t0 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
+ movdqa xmm2, t1 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
+ movdqa xmm1, xmm0
+ movdqa xmm3, xmm2
+
+ punpcklqdq xmm0, xmm4 ; p1 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
+ punpckhqdq xmm1, xmm4 ; p0 f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
+ punpcklqdq xmm2, xmm6 ; q0 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+ punpckhqdq xmm3, xmm6 ; q1 f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
+
+ ; calculate mask
+ movdqa xmm6, xmm0 ; p1
+ movdqa xmm7, xmm3 ; q1
+ psubusb xmm7, xmm0 ; q1-=p1
+ psubusb xmm6, xmm3 ; p1-=q1
+ por xmm6, xmm7 ; abs(p1-q1)
+ pand xmm6, [GLOBAL(tfe)] ; set lsb of each byte to zero
+ psrlw xmm6, 1 ; abs(p1-q1)/2
+
+ movdqa xmm5, xmm1 ; p0
+ movdqa xmm4, xmm2 ; q0
+ psubusb xmm5, xmm2 ; p0-=q0
+ psubusb xmm4, xmm1 ; q0-=p0
+ por xmm5, xmm4 ; abs(p0 - q0)
+ paddusb xmm5, xmm5 ; abs(p0-q0)*2
+ paddusb xmm5, xmm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2
+
+ mov rdx, arg(2) ;blimit
+ movdqa xmm7, XMMWORD PTR [rdx]
+
+ psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
+ pxor xmm7, xmm7
+ pcmpeqb xmm5, xmm7 ; mm5 = mask
+
+ ; start work on filters
+ movdqa t0, xmm0
+ movdqa t1, xmm3
+
+ pxor xmm0, [GLOBAL(t80)] ; p1 offset to convert to signed values
+ pxor xmm3, [GLOBAL(t80)] ; q1 offset to convert to signed values
+
+ psubsb xmm0, xmm3 ; p1 - q1
+ movdqa xmm6, xmm1 ; p0
+
+ movdqa xmm7, xmm2 ; q0
+ pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values
+
+ pxor xmm7, [GLOBAL(t80)] ; offset to convert to signed values
+ movdqa xmm3, xmm7 ; offseted ; q0
+
+ psubsb xmm7, xmm6 ; q0 - p0
+ paddsb xmm0, xmm7 ; p1 - q1 + 1 * (q0 - p0)
+
+ paddsb xmm0, xmm7 ; p1 - q1 + 2 * (q0 - p0)
+ paddsb xmm0, xmm7 ; p1 - q1 + 3 * (q0 - p0)
+
+ pand xmm5, xmm0 ; mask filter values we don't care about
+
+
+ paddsb xmm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4
+
+ movdqa xmm0, xmm5 ; get a copy of filters
+ psllw xmm0, 8 ; shift left 8
+
+ psraw xmm0, 3 ; arithmetic shift right 11
+ psrlw xmm0, 8
+
+ movdqa xmm7, xmm5 ; get a copy of filters
+ psraw xmm7, 11 ; arithmetic shift right 11
+
+ psllw xmm7, 8 ; shift left 8 to put it back
+ por xmm0, xmm7 ; put the two together to get result
+
+ psubsb xmm3, xmm0 ; q0-= q0sz add
+ pxor xmm3, [GLOBAL(t80)] ; unoffset q0
+
+ ; now do +3 side
+ psubsb xmm5, [GLOBAL(t1s)] ; +3 instead of +4
+ movdqa xmm0, xmm5 ; get a copy of filters
+
+ psllw xmm0, 8 ; shift left 8
+ psraw xmm0, 3 ; arithmetic shift right 11
+
+ psrlw xmm0, 8
+ psraw xmm5, 11 ; arithmetic shift right 11
+
+ psllw xmm5, 8 ; shift left 8 to put it back
+ por xmm0, xmm5 ; put the two together to get result
+
+ paddsb xmm6, xmm0 ; p0+= p0 add
+ pxor xmm6, [GLOBAL(t80)] ; unoffset p0
+
+ movdqa xmm0, t0 ; p1
+ movdqa xmm4, t1 ; q1
+
+ ; transpose back to write out
+ ; p1 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
+ ; p0 f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
+ ; q0 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+ ; q1 f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
+ movdqa xmm1, xmm0
+ punpcklbw xmm0, xmm6 ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
+ punpckhbw xmm1, xmm6 ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80
+
+ movdqa xmm5, xmm3
+ punpcklbw xmm3, xmm4 ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
+ punpckhbw xmm5, xmm4 ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
+
+ movdqa xmm2, xmm0
+ punpcklwd xmm0, xmm3 ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00
+ punpckhwd xmm2, xmm3 ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
+
+ movdqa xmm3, xmm1
+ punpcklwd xmm1, xmm5 ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80
+ punpckhwd xmm3, xmm5 ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0
+
+ ; write out order: xmm0 xmm2 xmm1 xmm3
+ lea rdx, [rsi + rax*4]
+
+ movd [rsi], xmm1 ; write the second 8-line result
+ psrldq xmm1, 4
+ movd [rdi], xmm1
+ psrldq xmm1, 4
+ movd [rsi + rax*2], xmm1
+ psrldq xmm1, 4
+ movd [rdi + rax*2], xmm1
+
+ movd [rdx], xmm3
+ psrldq xmm3, 4
+ movd [rcx], xmm3
+ psrldq xmm3, 4
+ movd [rdx + rax*2], xmm3
+ psrldq xmm3, 4
+ movd [rcx + rax*2], xmm3
+
+ neg rax
+ lea rsi, [rsi + rax*8]
+ neg rax
+ lea rdi, [rsi + rax]
+ lea rdx, [rsi + rax*4]
+ lea rcx, [rdx + rax]
+
+ movd [rsi], xmm0 ; write the first 8-line result
+ psrldq xmm0, 4
+ movd [rdi], xmm0
+ psrldq xmm0, 4
+ movd [rsi + rax*2], xmm0
+ psrldq xmm0, 4
+ movd [rdi + rax*2], xmm0
+
+ movd [rdx], xmm2
+ psrldq xmm2, 4
+ movd [rcx], xmm2
+ psrldq xmm2, 4
+ movd [rdx + rax*2], xmm2
+ psrldq xmm2, 4
+ movd [rcx + rax*2], xmm2
+
+ add rsp, 32
+ pop rsp
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+SECTION_RODATA
+align 16
+tfe:
+ times 16 db 0xfe
+align 16
+t80:
+ times 16 db 0x80
+align 16
+t1s:
+ times 16 db 0x01
+align 16
+t3:
+ times 16 db 0x03
+align 16
+t4:
+ times 16 db 0x04
+align 16
+ones:
+ times 8 dw 0x0001
+align 16
+s9:
+ times 8 dw 0x0900
+align 16
+s63:
+ times 8 dw 0x003f
diff --git a/vp9/common/x86/vp9_loopfilter_x86.c b/vp9/common/x86/vp9_loopfilter_x86.c
new file mode 100644
index 0000000..61b1c77
--- /dev/null
+++ b/vp9/common/x86/vp9_loopfilter_x86.c
@@ -0,0 +1,687 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <emmintrin.h> // SSE2
+#include "vpx_config.h"
+#include "vp9/common/vp9_loopfilter.h"
+#include "vpx_ports/emmintrin_compat.h"
+
+prototype_loopfilter(vp9_loop_filter_vertical_edge_mmx);
+prototype_loopfilter(vp9_loop_filter_horizontal_edge_mmx);
+
+prototype_loopfilter(vp9_loop_filter_vertical_edge_sse2);
+prototype_loopfilter(vp9_loop_filter_horizontal_edge_sse2);
+
+extern loop_filter_uvfunction vp9_loop_filter_horizontal_edge_uv_sse2;
+extern loop_filter_uvfunction vp9_loop_filter_vertical_edge_uv_sse2;
+
+#if HAVE_MMX
+/* Horizontal MB filtering */
+void vp9_loop_filter_mbh_mmx(unsigned char *y_ptr,
+ unsigned char *u_ptr, unsigned char *v_ptr,
+ int y_stride, int uv_stride,
+ struct loop_filter_info *lfi) {
+}
+
+/* Vertical MB Filtering */
+void vp9_loop_filter_mbv_mmx(unsigned char *y_ptr,
+ unsigned char *u_ptr, unsigned char *v_ptr,
+ int y_stride, int uv_stride,
+ struct loop_filter_info *lfi) {
+}
+
+/* Horizontal B Filtering */
+void vp9_loop_filter_bh_mmx(unsigned char *y_ptr,
+ unsigned char *u_ptr, unsigned char *v_ptr,
+ int y_stride, int uv_stride,
+ struct loop_filter_info *lfi) {
+
+}
+
+void vp9_loop_filter_bhs_mmx(unsigned char *y_ptr, int y_stride,
+ const unsigned char *blimit) {
+ vp9_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride,
+ y_stride, blimit);
+ vp9_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride,
+ y_stride, blimit);
+ vp9_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride,
+ y_stride, blimit);
+}
+
+/* Vertical B Filtering */
+void vp9_loop_filter_bv_mmx(unsigned char *y_ptr,
+ unsigned char *u_ptr, unsigned char *v_ptr,
+ int y_stride, int uv_stride,
+ struct loop_filter_info *lfi) {
+ vp9_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride,
+ lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp9_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride,
+ lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp9_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride,
+ lfi->blim, lfi->lim, lfi->hev_thr, 2);
+
+ if (u_ptr)
+ vp9_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride,
+ lfi->blim, lfi->lim, lfi->hev_thr, 1);
+
+ if (v_ptr)
+ vp9_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride,
+ lfi->blim, lfi->lim, lfi->hev_thr, 1);
+}
+
+void vp9_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride,
+ const unsigned char *blimit) {
+ vp9_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, blimit);
+ vp9_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, blimit);
+ vp9_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, blimit);
+}
+#endif
+
+#if HAVE_SSE2
+void vp9_mbloop_filter_horizontal_edge_sse2(unsigned char *s,
+ int p,
+ const unsigned char *_blimit,
+ const unsigned char *_limit,
+ const unsigned char *_thresh) {
+ DECLARE_ALIGNED(16, unsigned char, flat_op2[16]);
+ DECLARE_ALIGNED(16, unsigned char, flat_op1[16]);
+ DECLARE_ALIGNED(16, unsigned char, flat_op0[16]);
+ DECLARE_ALIGNED(16, unsigned char, flat_oq2[16]);
+ DECLARE_ALIGNED(16, unsigned char, flat_oq1[16]);
+ DECLARE_ALIGNED(16, unsigned char, flat_oq0[16]);
+ __m128i mask, hev, flat;
+ const __m128i zero = _mm_set1_epi16(0);
+ __m128i p4, p3, p2, p1, p0, q0, q1, q2, q3, q4;
+ const unsigned int extended_thresh = _thresh[0] * 0x01010101u;
+ const unsigned int extended_limit = _limit[0] * 0x01010101u;
+ const unsigned int extended_blimit = _blimit[0] * 0x01010101u;
+ const __m128i thresh =
+ _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_thresh), 0);
+ const __m128i limit =
+ _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_limit), 0);
+ const __m128i blimit =
+ _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_blimit), 0);
+
+ p4 = _mm_loadu_si128((__m128i *)(s - 5 * p));
+ p3 = _mm_loadu_si128((__m128i *)(s - 4 * p));
+ p2 = _mm_loadu_si128((__m128i *)(s - 3 * p));
+ p1 = _mm_loadu_si128((__m128i *)(s - 2 * p));
+ p0 = _mm_loadu_si128((__m128i *)(s - 1 * p));
+ q0 = _mm_loadu_si128((__m128i *)(s - 0 * p));
+ q1 = _mm_loadu_si128((__m128i *)(s + 1 * p));
+ q2 = _mm_loadu_si128((__m128i *)(s + 2 * p));
+ q3 = _mm_loadu_si128((__m128i *)(s + 3 * p));
+ q4 = _mm_loadu_si128((__m128i *)(s + 4 * p));
+ {
+ const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu8(p1, p0),
+ _mm_subs_epu8(p0, p1));
+ const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0),
+ _mm_subs_epu8(q0, q1));
+ const __m128i one = _mm_set1_epi8(1);
+ const __m128i fe = _mm_set1_epi8(0xfe);
+ const __m128i ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0);
+ __m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu8(p0, q0),
+ _mm_subs_epu8(q0, p0));
+ __m128i abs_p1q1 = _mm_or_si128(_mm_subs_epu8(p1, q1),
+ _mm_subs_epu8(q1, p1));
+ __m128i work;
+ flat = _mm_max_epu8(abs_p1p0, abs_q1q0);
+ hev = _mm_subs_epu8(flat, thresh);
+ hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff);
+
+ abs_p0q0 =_mm_adds_epu8(abs_p0q0, abs_p0q0);
+ abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1);
+ mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit);
+ mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff);
+ // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1;
+ mask = _mm_max_epu8(flat, mask);
+ // mask |= (abs(p1 - p0) > limit) * -1;
+ // mask |= (abs(q1 - q0) > limit) * -1;
+ work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p2, p1),
+ _mm_subs_epu8(p1, p2)),
+ _mm_or_si128(_mm_subs_epu8(p3, p2),
+ _mm_subs_epu8(p2, p3)));
+ mask = _mm_max_epu8(work, mask);
+ work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(q2, q1),
+ _mm_subs_epu8(q1, q2)),
+ _mm_or_si128(_mm_subs_epu8(q3, q2),
+ _mm_subs_epu8(q2, q3)));
+ mask = _mm_max_epu8(work, mask);
+ mask = _mm_subs_epu8(mask, limit);
+ mask = _mm_cmpeq_epi8(mask, zero);
+
+ work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p2, p0),
+ _mm_subs_epu8(p0, p2)),
+ _mm_or_si128(_mm_subs_epu8(q2, q0),
+ _mm_subs_epu8(q0, q2)));
+ flat = _mm_max_epu8(work, flat);
+ work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p3, p0),
+ _mm_subs_epu8(p0, p3)),
+ _mm_or_si128(_mm_subs_epu8(q3, q0),
+ _mm_subs_epu8(q0, q3)));
+ flat = _mm_max_epu8(work, flat);
+ work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p4, p0),
+ _mm_subs_epu8(p0, p4)),
+ _mm_or_si128(_mm_subs_epu8(q4, q0),
+ _mm_subs_epu8(q0, q4)));
+ flat = _mm_max_epu8(work, flat);
+ flat = _mm_subs_epu8(flat, one);
+ flat = _mm_cmpeq_epi8(flat, zero);
+ flat = _mm_and_si128(flat, mask);
+ }
+ {
+ const __m128i four = _mm_set1_epi16(4);
+ unsigned char *src = s;
+ int i = 0;
+ do {
+ __m128i workp_a, workp_b, workp_shft;
+ p4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 5 * p)), zero);
+ p3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 4 * p)), zero);
+ p2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 3 * p)), zero);
+ p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 2 * p)), zero);
+ p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 1 * p)), zero);
+ q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 0 * p)), zero);
+ q1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 1 * p)), zero);
+ q2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 2 * p)), zero);
+ q3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 3 * p)), zero);
+ q4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 4 * p)), zero);
+
+ workp_a = _mm_add_epi16(_mm_add_epi16(p4, p3), _mm_add_epi16(p2, p1));
+ workp_a = _mm_add_epi16(_mm_add_epi16(workp_a, four), p0);
+ workp_b = _mm_add_epi16(_mm_add_epi16(q0, p2), p4);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
+ _mm_storel_epi64((__m128i *)&flat_op2[i*8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ workp_b = _mm_add_epi16(_mm_add_epi16(q0, q1), p1);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
+ _mm_storel_epi64((__m128i *)&flat_op1[i*8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p4), q2);
+ workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p1), p0);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
+ _mm_storel_epi64((__m128i *)&flat_op0[i*8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q3);
+ workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p0), q0);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
+ _mm_storel_epi64((__m128i *)&flat_oq0[i*8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p2), q4);
+ workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q0), q1);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
+ _mm_storel_epi64((__m128i *)&flat_oq1[i*8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p1), q4);
+ workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q1), q2);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
+ _mm_storel_epi64((__m128i *)&flat_oq2[i*8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ src += 8;
+ } while (++i < 2);
+ }
+ // lp filter
+ {
+ const __m128i t4 = _mm_set1_epi8(4);
+ const __m128i t3 = _mm_set1_epi8(3);
+ const __m128i t80 = _mm_set1_epi8(0x80);
+ const __m128i te0 = _mm_set1_epi8(0xe0);
+ const __m128i t1f = _mm_set1_epi8(0x1f);
+ const __m128i t1 = _mm_set1_epi8(0x1);
+ const __m128i t7f = _mm_set1_epi8(0x7f);
+
+ const __m128i ps1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 2 * p)),
+ t80);
+ const __m128i ps0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 1 * p)),
+ t80);
+ const __m128i qs0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 0 * p)),
+ t80);
+ const __m128i qs1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 1 * p)),
+ t80);
+ __m128i filt;
+ __m128i work_a;
+ __m128i filter1, filter2;
+
+ filt = _mm_and_si128(_mm_subs_epi8(ps1, qs1), hev);
+ work_a = _mm_subs_epi8(qs0, ps0);
+ filt = _mm_adds_epi8(filt, work_a);
+ filt = _mm_adds_epi8(filt, work_a);
+ filt = _mm_adds_epi8(filt, work_a);
+ /* (vp9_filter + 3 * (qs0 - ps0)) & mask */
+ filt = _mm_and_si128(filt, mask);
+
+ filter1 = _mm_adds_epi8(filt, t4);
+ filter2 = _mm_adds_epi8(filt, t3);
+
+ /* Filter1 >> 3 */
+ work_a = _mm_cmpgt_epi8(zero, filter1);
+ filter1 = _mm_srli_epi16(filter1, 3);
+ work_a = _mm_and_si128(work_a, te0);
+ filter1 = _mm_and_si128(filter1, t1f);
+ filter1 = _mm_or_si128(filter1, work_a);
+
+ /* Filter2 >> 3 */
+ work_a = _mm_cmpgt_epi8(zero, filter2);
+ filter2 = _mm_srli_epi16(filter2, 3);
+ work_a = _mm_and_si128(work_a, te0);
+ filter2 = _mm_and_si128(filter2, t1f);
+ filter2 = _mm_or_si128(filter2, work_a);
+
+ /* filt >> 1 */
+ filt = _mm_adds_epi8(filter1, t1);
+ work_a = _mm_cmpgt_epi8(zero, filt);
+ filt = _mm_srli_epi16(filt, 1);
+ work_a = _mm_and_si128(work_a, t80);
+ filt = _mm_and_si128(filt, t7f);
+ filt = _mm_or_si128(filt, work_a);
+
+ filt = _mm_andnot_si128(hev, filt);
+
+ work_a = _mm_xor_si128(_mm_subs_epi8(qs0, filter1), t80);
+ q0 = _mm_load_si128((__m128i *)flat_oq0);
+ work_a = _mm_andnot_si128(flat, work_a);
+ q0 = _mm_and_si128(flat, q0);
+ q0 = _mm_or_si128(work_a, q0);
+
+ work_a = _mm_xor_si128(_mm_subs_epi8(qs1, filt), t80);
+ q1 = _mm_load_si128((__m128i *)flat_oq1);
+ work_a = _mm_andnot_si128(flat, work_a);
+ q1 = _mm_and_si128(flat, q1);
+ q1 = _mm_or_si128(work_a, q1);
+
+ work_a = _mm_loadu_si128((__m128i *)(s + 2 * p));
+ q2 = _mm_load_si128((__m128i *)flat_oq2);
+ work_a = _mm_andnot_si128(flat, work_a);
+ q2 = _mm_and_si128(flat, q2);
+ q2 = _mm_or_si128(work_a, q2);
+
+ work_a = _mm_xor_si128(_mm_adds_epi8(ps0, filter2), t80);
+ p0 = _mm_load_si128((__m128i *)flat_op0);
+ work_a = _mm_andnot_si128(flat, work_a);
+ p0 = _mm_and_si128(flat, p0);
+ p0 = _mm_or_si128(work_a, p0);
+
+ work_a = _mm_xor_si128(_mm_adds_epi8(ps1, filt), t80);
+ p1 = _mm_load_si128((__m128i *)flat_op1);
+ work_a = _mm_andnot_si128(flat, work_a);
+ p1 = _mm_and_si128(flat, p1);
+ p1 = _mm_or_si128(work_a, p1);
+
+ work_a = _mm_loadu_si128((__m128i *)(s - 3 * p));
+ p2 = _mm_load_si128((__m128i *)flat_op2);
+ work_a = _mm_andnot_si128(flat, work_a);
+ p2 = _mm_and_si128(flat, p2);
+ p2 = _mm_or_si128(work_a, p2);
+
+ _mm_storeu_si128((__m128i *)(s - 3 * p), p2);
+ _mm_storeu_si128((__m128i *)(s - 2 * p), p1);
+ _mm_storeu_si128((__m128i *)(s - 1 * p), p0);
+ _mm_storeu_si128((__m128i *)(s + 0 * p), q0);
+ _mm_storeu_si128((__m128i *)(s + 1 * p), q1);
+ _mm_storeu_si128((__m128i *)(s + 2 * p), q2);
+ }
+}
+
+void vp9_mbloop_filter_horizontal_edge_uv_sse2(unsigned char *u,
+ int p,
+ const unsigned char *_blimit,
+ const unsigned char *_limit,
+ const unsigned char *_thresh,
+ unsigned char *v) {
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, src, 160);
+
+ /* Read source */
+ const __m128i p4 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(u - 5 * p)),
+ _mm_loadl_epi64((__m128i *)(v - 5 * p)));
+ const __m128i p3 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(u - 4 * p)),
+ _mm_loadl_epi64((__m128i *)(v - 4 * p)));
+ const __m128i p2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(u - 3 * p)),
+ _mm_loadl_epi64((__m128i *)(v - 3 * p)));
+ const __m128i p1 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(u - 2 * p)),
+ _mm_loadl_epi64((__m128i *)(v - 2 * p)));
+ const __m128i p0 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(u - 1 * p)),
+ _mm_loadl_epi64((__m128i *)(v - 1 * p)));
+ const __m128i q0 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(u)),
+ _mm_loadl_epi64((__m128i *)(v)));
+ const __m128i q1 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(u + 1 * p)),
+ _mm_loadl_epi64((__m128i *)(v + 1 * p)));
+ const __m128i q2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(u + 2 * p)),
+ _mm_loadl_epi64((__m128i *)(v + 2 * p)));
+ const __m128i q3 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(u + 3 * p)),
+ _mm_loadl_epi64((__m128i *)(v + 3 * p)));
+ const __m128i q4 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(u + 4 * p)),
+ _mm_loadl_epi64((__m128i *)(v + 4 * p)));
+
+ _mm_store_si128((__m128i *)(src), p4);
+ _mm_store_si128((__m128i *)(src + 16), p3);
+ _mm_store_si128((__m128i *)(src + 32), p2);
+ _mm_store_si128((__m128i *)(src + 48), p1);
+ _mm_store_si128((__m128i *)(src + 64), p0);
+ _mm_store_si128((__m128i *)(src + 80), q0);
+ _mm_store_si128((__m128i *)(src + 96), q1);
+ _mm_store_si128((__m128i *)(src + 112), q2);
+ _mm_store_si128((__m128i *)(src + 128), q3);
+ _mm_store_si128((__m128i *)(src + 144), q4);
+
+ /* Loop filtering */
+ vp9_mbloop_filter_horizontal_edge_sse2(src + 80, 16, _blimit, _limit,
+ _thresh);
+
+ /* Store result */
+ _mm_storel_epi64((__m128i *)(u - 3 * p),
+ _mm_loadl_epi64((__m128i *)(src + 32)));
+ _mm_storel_epi64((__m128i *)(u - 2 * p),
+ _mm_loadl_epi64((__m128i *)(src + 48)));
+ _mm_storel_epi64((__m128i *)(u - p),
+ _mm_loadl_epi64((__m128i *)(src + 64)));
+ _mm_storel_epi64((__m128i *)u,
+ _mm_loadl_epi64((__m128i *)(src + 80)));
+ _mm_storel_epi64((__m128i *)(u + p),
+ _mm_loadl_epi64((__m128i *)(src + 96)));
+ _mm_storel_epi64((__m128i *)(u + 2 * p),
+ _mm_loadl_epi64((__m128i *)(src + 112)));
+
+ _mm_storel_epi64((__m128i *)(v - 3 * p),
+ _mm_loadl_epi64((__m128i *)(src + 40)));
+ _mm_storel_epi64((__m128i *)(v - 2 * p),
+ _mm_loadl_epi64((__m128i *)(src + 56)));
+ _mm_storel_epi64((__m128i *)(v - p),
+ _mm_loadl_epi64((__m128i *)(src + 72)));
+ _mm_storel_epi64((__m128i *)v,
+ _mm_loadl_epi64((__m128i *)(src + 88)));
+ _mm_storel_epi64((__m128i *)(v + p),
+ _mm_loadl_epi64((__m128i *)(src + 104)));
+ _mm_storel_epi64((__m128i *)(v + 2 * p),
+ _mm_loadl_epi64((__m128i *)(src + 120)));
+}
+
+static __inline void transpose8x16(unsigned char *in0, unsigned char *in1,
+ int in_p, unsigned char *out, int out_p) {
+ __m128i x0, x1, x2, x3, x4, x5, x6, x7;
+ __m128i x8, x9, x10, x11, x12, x13, x14, x15;
+
+ /* Read in 16 lines */
+ x0 = _mm_loadl_epi64((__m128i *)in0);
+ x8 = _mm_loadl_epi64((__m128i *)in1);
+ x1 = _mm_loadl_epi64((__m128i *)(in0 + in_p));
+ x9 = _mm_loadl_epi64((__m128i *)(in1 + in_p));
+ x2 = _mm_loadl_epi64((__m128i *)(in0 + 2 * in_p));
+ x10 = _mm_loadl_epi64((__m128i *)(in1 + 2 * in_p));
+ x3 = _mm_loadl_epi64((__m128i *)(in0 + 3*in_p));
+ x11 = _mm_loadl_epi64((__m128i *)(in1 + 3*in_p));
+ x4 = _mm_loadl_epi64((__m128i *)(in0 + 4*in_p));
+ x12 = _mm_loadl_epi64((__m128i *)(in1 + 4*in_p));
+ x5 = _mm_loadl_epi64((__m128i *)(in0 + 5*in_p));
+ x13 = _mm_loadl_epi64((__m128i *)(in1 + 5*in_p));
+ x6 = _mm_loadl_epi64((__m128i *)(in0 + 6*in_p));
+ x14 = _mm_loadl_epi64((__m128i *)(in1 + 6*in_p));
+ x7 = _mm_loadl_epi64((__m128i *)(in0 + 7*in_p));
+ x15 = _mm_loadl_epi64((__m128i *)(in1 + 7*in_p));
+
+ x0 = _mm_unpacklo_epi8(x0, x1);
+ x1 = _mm_unpacklo_epi8(x2, x3);
+ x2 = _mm_unpacklo_epi8(x4, x5);
+ x3 = _mm_unpacklo_epi8(x6, x7);
+
+ x8 = _mm_unpacklo_epi8(x8, x9);
+ x9 = _mm_unpacklo_epi8(x10, x11);
+ x10 = _mm_unpacklo_epi8(x12, x13);
+ x11 = _mm_unpacklo_epi8(x14, x15);
+
+ x4 = _mm_unpacklo_epi16(x0, x1);
+ x5 = _mm_unpacklo_epi16(x2, x3);
+ x12 = _mm_unpacklo_epi16(x8, x9);
+ x13 = _mm_unpacklo_epi16(x10, x11);
+
+ x6 = _mm_unpacklo_epi32(x4, x5);
+ x7 = _mm_unpackhi_epi32(x4, x5);
+ x14 = _mm_unpacklo_epi32(x12, x13);
+ x15 = _mm_unpackhi_epi32(x12, x13);
+
+ /* Store first 4-line result */
+ _mm_storeu_si128((__m128i *)out, _mm_unpacklo_epi64(x6, x14));
+ _mm_storeu_si128((__m128i *)(out + out_p), _mm_unpackhi_epi64(x6, x14));
+ _mm_storeu_si128((__m128i *)(out + 2 * out_p), _mm_unpacklo_epi64(x7, x15));
+ _mm_storeu_si128((__m128i *)(out + 3 * out_p), _mm_unpackhi_epi64(x7, x15));
+
+ x4 = _mm_unpackhi_epi16(x0, x1);
+ x5 = _mm_unpackhi_epi16(x2, x3);
+ x12 = _mm_unpackhi_epi16(x8, x9);
+ x13 = _mm_unpackhi_epi16(x10, x11);
+
+ x6 = _mm_unpacklo_epi32(x4, x5);
+ x7 = _mm_unpackhi_epi32(x4, x5);
+ x14 = _mm_unpacklo_epi32(x12, x13);
+ x15 = _mm_unpackhi_epi32(x12, x13);
+
+ /* Store second 4-line result */
+ _mm_storeu_si128((__m128i *)(out + 4 * out_p), _mm_unpacklo_epi64(x6, x14));
+ _mm_storeu_si128((__m128i *)(out + 5 * out_p), _mm_unpackhi_epi64(x6, x14));
+ _mm_storeu_si128((__m128i *)(out + 6 * out_p), _mm_unpacklo_epi64(x7, x15));
+ _mm_storeu_si128((__m128i *)(out + 7 * out_p), _mm_unpackhi_epi64(x7, x15));
+}
+
+static __inline void transpose(unsigned char *src[], int in_p,
+ unsigned char *dst[], int out_p,
+ int num_8x8_to_transpose) {
+ int idx8x8 = 0;
+ __m128i x0, x1, x2, x3, x4, x5, x6, x7;
+ do {
+ unsigned char *in = src[idx8x8];
+ unsigned char *out = dst[idx8x8];
+
+ x0 = _mm_loadl_epi64((__m128i *)(in + 0*in_p)); // 00 01 02 03 04 05 06 07
+ x1 = _mm_loadl_epi64((__m128i *)(in + 1*in_p)); // 10 11 12 13 14 15 16 17
+ x2 = _mm_loadl_epi64((__m128i *)(in + 2*in_p)); // 20 21 22 23 24 25 26 27
+ x3 = _mm_loadl_epi64((__m128i *)(in + 3*in_p)); // 30 31 32 33 34 35 36 37
+ x4 = _mm_loadl_epi64((__m128i *)(in + 4*in_p)); // 40 41 42 43 44 45 46 47
+ x5 = _mm_loadl_epi64((__m128i *)(in + 5*in_p)); // 50 51 52 53 54 55 56 57
+ x6 = _mm_loadl_epi64((__m128i *)(in + 6*in_p)); // 60 61 62 63 64 65 66 67
+ x7 = _mm_loadl_epi64((__m128i *)(in + 7*in_p)); // 70 71 72 73 74 75 76 77
+ // 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17
+ x0 = _mm_unpacklo_epi8(x0, x1);
+ // 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37
+ x1 = _mm_unpacklo_epi8(x2, x3);
+ // 40 50 41 51 42 52 43 53 44 54 45 55 46 56 47 57
+ x2 = _mm_unpacklo_epi8(x4, x5);
+ // 60 70 61 71 62 72 63 73 64 74 65 75 66 76 67 77
+ x3 = _mm_unpacklo_epi8(x6, x7);
+ // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
+ x4 = _mm_unpacklo_epi16(x0, x1);
+ // 40 50 60 70 41 51 61 71 42 52 62 72 43 53 63 73
+ x5 = _mm_unpacklo_epi16(x2, x3);
+ // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71
+ x6 = _mm_unpacklo_epi32(x4, x5);
+ // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73
+ x7 = _mm_unpackhi_epi32(x4, x5);
+
+ _mm_storel_pd((double *)(out + 0*out_p),
+ _mm_castsi128_pd(x6)); // 00 10 20 30 40 50 60 70
+ _mm_storeh_pd((double *)(out + 1*out_p),
+ _mm_castsi128_pd(x6)); // 01 11 21 31 41 51 61 71
+ _mm_storel_pd((double *)(out + 2*out_p),
+ _mm_castsi128_pd(x7)); // 02 12 22 32 42 52 62 72
+ _mm_storeh_pd((double *)(out + 3*out_p),
+ _mm_castsi128_pd(x7)); // 03 13 23 33 43 53 63 73
+
+ // 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37
+ x4 = _mm_unpackhi_epi16(x0, x1);
+ // 44 54 64 74 45 55 65 75 46 56 66 76 47 57 67 77
+ x5 = _mm_unpackhi_epi16(x2, x3);
+ // 04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75
+ x6 = _mm_unpacklo_epi32(x4, x5);
+ // 06 16 26 36 46 56 66 76 07 17 27 37 47 57 67 77
+ x7 = _mm_unpackhi_epi32(x4, x5);
+
+ _mm_storel_pd((double *)(out + 4*out_p),
+ _mm_castsi128_pd(x6)); // 04 14 24 34 44 54 64 74
+ _mm_storeh_pd((double *)(out + 5*out_p),
+ _mm_castsi128_pd(x6)); // 05 15 25 35 45 55 65 75
+ _mm_storel_pd((double *)(out + 6*out_p),
+ _mm_castsi128_pd(x7)); // 06 16 26 36 46 56 66 76
+ _mm_storeh_pd((double *)(out + 7*out_p),
+ _mm_castsi128_pd(x7)); // 07 17 27 37 47 57 67 77
+ } while (++idx8x8 < num_8x8_to_transpose);
+}
+
+void vp9_mbloop_filter_vertical_edge_sse2(unsigned char *s,
+ int p,
+ const unsigned char *blimit,
+ const unsigned char *limit,
+ const unsigned char *thresh) {
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, t_dst, 256);
+ unsigned char *src[2];
+ unsigned char *dst[2];
+
+ /* Transpose 16x16 */
+ transpose8x16(s - 8, s - 8 + p * 8, p, t_dst, 16);
+ transpose8x16(s, s + p * 8, p, t_dst + 16 * 8, 16);
+
+ /* Loop filtering */
+ vp9_mbloop_filter_horizontal_edge_sse2(t_dst + 8 * 16, 16, blimit, limit,
+ thresh);
+ src[0] = t_dst + 3 * 16;
+ src[1] = t_dst + 3 * 16 + 8;
+
+ dst[0] = s - 5;
+ dst[1] = s - 5 + p * 8;
+
+ /* Transpose 16x8 */
+ transpose(src, 16, dst, p, 2);
+}
+
+void vp9_mbloop_filter_vertical_edge_uv_sse2(unsigned char *u,
+ int p,
+ const unsigned char *blimit,
+ const unsigned char *limit,
+ const unsigned char *thresh,
+ unsigned char *v) {
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, t_dst, 256);
+ unsigned char *src[2];
+ unsigned char *dst[2];
+
+ /* Transpose 16x16 */
+ transpose8x16(u - 8, v - 8, p, t_dst, 16);
+ transpose8x16(u, v, p, t_dst + 16 * 8, 16);
+
+ /* Loop filtering */
+ vp9_mbloop_filter_horizontal_edge_sse2(t_dst + 8 * 16, 16, blimit, limit,
+ thresh);
+
+ src[0] = t_dst + 3 * 16;
+ src[1] = t_dst + 3 * 16 + 8;
+
+ dst[0] = u - 5;
+ dst[1] = v - 5;
+
+ /* Transpose 16x8 */
+ transpose(src, 16, dst, p, 2);
+}
+
+/* Horizontal MB filtering */
+void vp9_loop_filter_mbh_sse2(unsigned char *y_ptr,
+ unsigned char *u_ptr, unsigned char *v_ptr,
+ int y_stride, int uv_stride,
+ struct loop_filter_info *lfi) {
+ vp9_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim,
+ lfi->lim, lfi->hev_thr);
+
+ /* u,v */
+ if (u_ptr)
+ vp9_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim,
+ lfi->lim, lfi->hev_thr, v_ptr);
+}
+
+void vp9_loop_filter_bh8x8_sse2(unsigned char *y_ptr, unsigned char *u_ptr,
+ unsigned char *v_ptr, int y_stride, int uv_stride,
+ struct loop_filter_info *lfi) {
+ vp9_mbloop_filter_horizontal_edge_sse2(
+ y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
+}
+
+/* Vertical MB Filtering */
+void vp9_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr,
+ unsigned char *v_ptr, int y_stride, int uv_stride,
+ struct loop_filter_info *lfi) {
+ vp9_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim,
+ lfi->hev_thr);
+
+ /* u,v */
+ if (u_ptr)
+ vp9_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim,
+ lfi->lim, lfi->hev_thr, v_ptr);
+}
+
+void vp9_loop_filter_bv8x8_sse2(unsigned char *y_ptr, unsigned char *u_ptr,
+ unsigned char *v_ptr, int y_stride, int uv_stride,
+ struct loop_filter_info *lfi) {
+ vp9_mbloop_filter_vertical_edge_sse2(
+ y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
+}
+
+/* Horizontal B Filtering */
+void vp9_loop_filter_bh_sse2(unsigned char *y_ptr,
+ unsigned char *u_ptr, unsigned char *v_ptr,
+ int y_stride, int uv_stride,
+ struct loop_filter_info *lfi) {
+ vp9_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride,
+ lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp9_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride,
+ lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp9_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride,
+ lfi->blim, lfi->lim, lfi->hev_thr, 2);
+
+ if (u_ptr)
+ vp9_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride,
+ lfi->blim, lfi->lim, lfi->hev_thr,
+ v_ptr + 4 * uv_stride);
+}
+
+void vp9_loop_filter_bhs_sse2(unsigned char *y_ptr, int y_stride,
+ const unsigned char *blimit) {
+ vp9_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride,
+ y_stride, blimit);
+ vp9_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride,
+ y_stride, blimit);
+ vp9_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride,
+ y_stride, blimit);
+}
+
+/* Vertical B Filtering */
+void vp9_loop_filter_bv_sse2(unsigned char *y_ptr,
+ unsigned char *u_ptr, unsigned char *v_ptr,
+ int y_stride, int uv_stride,
+ struct loop_filter_info *lfi) {
+ vp9_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride,
+ lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp9_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride,
+ lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp9_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride,
+ lfi->blim, lfi->lim, lfi->hev_thr, 2);
+
+ if (u_ptr)
+ vp9_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride,
+ lfi->blim, lfi->lim, lfi->hev_thr,
+ v_ptr + 4);
+}
+
+void vp9_loop_filter_bvs_sse2(unsigned char *y_ptr, int y_stride,
+ const unsigned char *blimit) {
+ vp9_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, blimit);
+ vp9_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, blimit);
+ vp9_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, blimit);
+}
+
+#endif
diff --git a/vp9/common/x86/vp9_loopfilter_x86.h b/vp9/common/x86/vp9_loopfilter_x86.h
new file mode 100644
index 0000000..46a6202
--- /dev/null
+++ b/vp9/common/x86/vp9_loopfilter_x86.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_COMMON_X86_VP9_LOOPFILTER_X86_H_
+#define VP9_COMMON_X86_VP9_LOOPFILTER_X86_H_
+
+/* Note:
+ *
+ * This platform is commonly built for runtime CPU detection. If you modify
+ * any of the function mappings present in this file, be sure to also update
+ * them in the function pointer initialization code
+ */
+
+#if HAVE_MMX
+extern prototype_loopfilter_block(vp9_loop_filter_mbv_mmx);
+extern prototype_loopfilter_block(vp9_loop_filter_bv_mmx);
+extern prototype_loopfilter_block(vp9_loop_filter_mbh_mmx);
+extern prototype_loopfilter_block(vp9_loop_filter_bh_mmx);
+extern prototype_simple_loopfilter(vp9_loop_filter_simple_vertical_edge_mmx);
+extern prototype_simple_loopfilter(vp9_loop_filter_bvs_mmx);
+extern prototype_simple_loopfilter(vp9_loop_filter_simple_horizontal_edge_mmx);
+extern prototype_simple_loopfilter(vp9_loop_filter_bhs_mmx);
+#endif
+
+#if HAVE_SSE2
+extern prototype_loopfilter_block(vp9_loop_filter_mbv_sse2);
+extern prototype_loopfilter_block(vp9_loop_filter_bv_sse2);
+extern prototype_loopfilter_block(vp9_loop_filter_mbh_sse2);
+extern prototype_loopfilter_block(vp9_loop_filter_bh_sse2);
+extern prototype_simple_loopfilter(vp9_loop_filter_simple_vertical_edge_sse2);
+extern prototype_simple_loopfilter(vp9_loop_filter_bvs_sse2);
+extern prototype_simple_loopfilter(vp9_loop_filter_simple_horizontal_edge_sse2);
+extern prototype_simple_loopfilter(vp9_loop_filter_bhs_sse2);
+#endif
+
+#endif // LOOPFILTER_X86_H
diff --git a/vp9/common/x86/vp9_mask_sse3.asm b/vp9/common/x86/vp9_mask_sse3.asm
new file mode 100644
index 0000000..fe46823
--- /dev/null
+++ b/vp9/common/x86/vp9_mask_sse3.asm
@@ -0,0 +1,484 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;void int vp8_makemask_sse3(
+; unsigned char *y,
+; unsigned char *u,
+; unsigned char *v,
+; unsigned char *ym,
+; unsigned char *uvm,
+; int yp,
+; int uvp,
+; int ys,
+; int us,
+; int vs,
+; int yt,
+; int ut,
+; int vt)
+global sym(vp8_makemask_sse3) PRIVATE
+sym(vp8_makemask_sse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 14
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;y
+ mov rdi, arg(1) ;u
+ mov rcx, arg(2) ;v
+ mov rax, arg(3) ;ym
+ movsxd rbx, dword arg(4) ;yp
+ movsxd rdx, dword arg(5) ;uvp
+
+ pxor xmm0,xmm0
+
+ ;make 16 copies of the center y value
+ movd xmm1, arg(6)
+ pshufb xmm1, xmm0
+
+ ; make 16 copies of the center u value
+ movd xmm2, arg(7)
+ pshufb xmm2, xmm0
+
+ ; make 16 copies of the center v value
+ movd xmm3, arg(8)
+ pshufb xmm3, xmm0
+ unpcklpd xmm2, xmm3
+
+ ;make 16 copies of the y tolerance
+ movd xmm3, arg(9)
+ pshufb xmm3, xmm0
+
+ ;make 16 copies of the u tolerance
+ movd xmm4, arg(10)
+ pshufb xmm4, xmm0
+
+ ;make 16 copies of the v tolerance
+ movd xmm5, arg(11)
+ pshufb xmm5, xmm0
+ unpckhpd xmm4, xmm5
+
+ mov r8,8
+
+NextPairOfRows:
+
+ ;grab the y source values
+ movdqu xmm0, [rsi]
+
+ ;compute abs difference between source and y target
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm0
+ psubusb xmm0, xmm1
+ psubusb xmm6, xmm7
+ por xmm0, xmm6
+
+ ;compute abs difference between
+ movdqa xmm6, xmm3
+ pcmpgtb xmm6, xmm0
+
+ ;grab the y source values
+ add rsi, rbx
+ movdqu xmm0, [rsi]
+
+ ;compute abs difference between source and y target
+ movdqa xmm11, xmm1
+ movdqa xmm7, xmm0
+ psubusb xmm0, xmm1
+ psubusb xmm11, xmm7
+ por xmm0, xmm11
+
+ ;compute abs difference between
+ movdqa xmm11, xmm3
+ pcmpgtb xmm11, xmm0
+
+
+ ;grab the u and v source values
+ movdqu xmm7, [rdi]
+ movdqu xmm8, [rcx]
+ unpcklpd xmm7, xmm8
+
+ ;compute abs difference between source and uv targets
+ movdqa xmm9, xmm2
+ movdqa xmm10, xmm7
+ psubusb xmm7, xmm2
+ psubusb xmm9, xmm10
+ por xmm7, xmm9
+
+ ;check whether the number is < tolerance
+ movdqa xmm0, xmm4
+ pcmpgtb xmm0, xmm7
+
+ ;double u and v masks
+ movdqa xmm8, xmm0
+ punpckhbw xmm0, xmm0
+ punpcklbw xmm8, xmm8
+
+ ;mask row 0 and output
+ pand xmm6, xmm8
+ pand xmm6, xmm0
+ movdqa [rax],xmm6
+
+ ;mask row 1 and output
+ pand xmm11, xmm8
+ pand xmm11, xmm0
+ movdqa [rax+16],xmm11
+
+
+ ; to the next row or set of rows
+ add rsi, rbx
+ add rdi, rdx
+ add rcx, rdx
+ add rax,32
+ dec r8
+ jnz NextPairOfRows
+
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;GROW_HORIZ (register for result, source register or mem local)
+; takes source and shifts left and ors with source
+; then shifts right and ors with source
+%macro GROW_HORIZ 2
+ movdqa %1, %2
+ movdqa xmm14, %1
+ movdqa xmm15, %1
+ pslldq xmm14, 1
+ psrldq xmm15, 1
+ por %1,xmm14
+ por %1,xmm15
+%endmacro
+;GROW_VERT (result, center row, above row, below row)
+%macro GROW_VERT 4
+ movdqa %1,%2
+ por %1,%3
+ por %1,%4
+%endmacro
+
+;GROW_NEXTLINE (new line to grow, new source, line to write)
+%macro GROW_NEXTLINE 3
+ GROW_HORIZ %1, %2
+ GROW_VERT xmm3, xmm0, xmm1, xmm2
+ movdqa %3,xmm3
+%endmacro
+
+
+;void int vp8_growmaskmb_sse3(
+; unsigned char *om,
+; unsigned char *nm,
+global sym(vp8_growmaskmb_sse3) PRIVATE
+sym(vp8_growmaskmb_sse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 2
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src
+ mov rdi, arg(1) ;rst
+
+ GROW_HORIZ xmm0, [rsi]
+ GROW_HORIZ xmm1, [rsi+16]
+ GROW_HORIZ xmm2, [rsi+32]
+
+ GROW_VERT xmm3, xmm0, xmm1, xmm2
+ por xmm0,xmm1
+ movdqa [rdi], xmm0
+ movdqa [rdi+16],xmm3
+
+ GROW_NEXTLINE xmm0,[rsi+48],[rdi+32]
+ GROW_NEXTLINE xmm1,[rsi+64],[rdi+48]
+ GROW_NEXTLINE xmm2,[rsi+80],[rdi+64]
+ GROW_NEXTLINE xmm0,[rsi+96],[rdi+80]
+ GROW_NEXTLINE xmm1,[rsi+112],[rdi+96]
+ GROW_NEXTLINE xmm2,[rsi+128],[rdi+112]
+ GROW_NEXTLINE xmm0,[rsi+144],[rdi+128]
+ GROW_NEXTLINE xmm1,[rsi+160],[rdi+144]
+ GROW_NEXTLINE xmm2,[rsi+176],[rdi+160]
+ GROW_NEXTLINE xmm0,[rsi+192],[rdi+176]
+ GROW_NEXTLINE xmm1,[rsi+208],[rdi+192]
+ GROW_NEXTLINE xmm2,[rsi+224],[rdi+208]
+ GROW_NEXTLINE xmm0,[rsi+240],[rdi+224]
+
+ por xmm0,xmm2
+ movdqa [rdi+240], xmm0
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+
+;unsigned int vp8_sad16x16_masked_wmt(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride,
+; unsigned char *mask)
+global sym(vp8_sad16x16_masked_wmt) PRIVATE
+sym(vp8_sad16x16_masked_wmt):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ push rsi
+ push rdi
+ ; end prolog
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ mov rbx, arg(4) ;mask
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ mov rcx, 16
+
+ pxor xmm3, xmm3
+
+NextSadRow:
+ movdqu xmm0, [rsi]
+ movdqu xmm1, [rdi]
+ movdqu xmm2, [rbx]
+ pand xmm0, xmm2
+ pand xmm1, xmm2
+
+ psadbw xmm0, xmm1
+ paddw xmm3, xmm0
+
+ add rsi, rax
+ add rdi, rdx
+ add rbx, 16
+
+ dec rcx
+ jnz NextSadRow
+
+ movdqa xmm4 , xmm3
+ psrldq xmm4, 8
+ paddw xmm3, xmm4
+ movq rax, xmm3
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;unsigned int vp8_sad16x16_unmasked_wmt(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride,
+; unsigned char *mask)
+global sym(vp8_sad16x16_unmasked_wmt) PRIVATE
+sym(vp8_sad16x16_unmasked_wmt):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ push rsi
+ push rdi
+ ; end prolog
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ mov rbx, arg(4) ;mask
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ mov rcx, 16
+
+ pxor xmm3, xmm3
+
+next_vp8_sad16x16_unmasked_wmt:
+ movdqu xmm0, [rsi]
+ movdqu xmm1, [rdi]
+ movdqu xmm2, [rbx]
+ por xmm0, xmm2
+ por xmm1, xmm2
+
+ psadbw xmm0, xmm1
+ paddw xmm3, xmm0
+
+ add rsi, rax
+ add rdi, rdx
+ add rbx, 16
+
+ dec rcx
+ jnz next_vp8_sad16x16_unmasked_wmt
+
+ movdqa xmm4 , xmm3
+ psrldq xmm4, 8
+ paddw xmm3, xmm4
+ movq rax, xmm3
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;unsigned int vp8_masked_predictor_wmt(
+; unsigned char *masked,
+; unsigned char *unmasked,
+; int src_stride,
+; unsigned char *dst_ptr,
+; int dst_stride,
+; unsigned char *mask)
+global sym(vp8_masked_predictor_wmt) PRIVATE
+sym(vp8_masked_predictor_wmt):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ push rsi
+ push rdi
+ ; end prolog
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(1) ;ref_ptr
+
+ mov rbx, arg(5) ;mask
+ movsxd rax, dword ptr arg(2) ;src_stride
+ mov r11, arg(3) ; destination
+ movsxd rdx, dword ptr arg(4) ;dst_stride
+
+ mov rcx, 16
+
+ pxor xmm3, xmm3
+
+next_vp8_masked_predictor_wmt:
+ movdqu xmm0, [rsi]
+ movdqu xmm1, [rdi]
+ movdqu xmm2, [rbx]
+
+ pand xmm0, xmm2
+ pandn xmm2, xmm1
+ por xmm0, xmm2
+ movdqu [r11], xmm0
+
+ add r11, rdx
+ add rsi, rax
+ add rdi, rdx
+ add rbx, 16
+
+ dec rcx
+ jnz next_vp8_masked_predictor_wmt
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;unsigned int vp8_masked_predictor_uv_wmt(
+; unsigned char *masked,
+; unsigned char *unmasked,
+; int src_stride,
+; unsigned char *dst_ptr,
+; int dst_stride,
+; unsigned char *mask)
+global sym(vp8_masked_predictor_uv_wmt) PRIVATE
+sym(vp8_masked_predictor_uv_wmt):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ push rsi
+ push rdi
+ ; end prolog
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(1) ;ref_ptr
+
+ mov rbx, arg(5) ;mask
+ movsxd rax, dword ptr arg(2) ;src_stride
+ mov r11, arg(3) ; destination
+ movsxd rdx, dword ptr arg(4) ;dst_stride
+
+ mov rcx, 8
+
+ pxor xmm3, xmm3
+
+next_vp8_masked_predictor_uv_wmt:
+ movq xmm0, [rsi]
+ movq xmm1, [rdi]
+ movq xmm2, [rbx]
+
+ pand xmm0, xmm2
+ pandn xmm2, xmm1
+ por xmm0, xmm2
+ movq [r11], xmm0
+
+ add r11, rdx
+ add rsi, rax
+ add rdi, rax
+ add rbx, 8
+
+ dec rcx
+ jnz next_vp8_masked_predictor_uv_wmt
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;unsigned int vp8_uv_from_y_mask(
+; unsigned char *ymask,
+; unsigned char *uvmask)
+global sym(vp8_uv_from_y_mask) PRIVATE
+sym(vp8_uv_from_y_mask):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ push rsi
+ push rdi
+ ; end prolog
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(1) ;dst_ptr
+
+
+ mov rcx, 8
+
+ pxor xmm3, xmm3
+
+next_p8_uv_from_y_mask:
+ movdqu xmm0, [rsi]
+ pshufb xmm0, [shuf1b] ;[GLOBAL(shuf1b)]
+ movq [rdi],xmm0
+ add rdi, 8
+ add rsi,32
+
+ dec rcx
+ jnz next_p8_uv_from_y_mask
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+SECTION_RODATA
+align 16
+shuf1b:
+ db 0, 2, 4, 6, 8, 10, 12, 14, 0, 0, 0, 0, 0, 0, 0, 0
+
diff --git a/vp9/common/x86/vp9_postproc_mmx.asm b/vp9/common/x86/vp9_postproc_mmx.asm
new file mode 100644
index 0000000..5f06f0e
--- /dev/null
+++ b/vp9/common/x86/vp9_postproc_mmx.asm
@@ -0,0 +1,534 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+%define VP9_FILTER_WEIGHT 128
+%define VP9_FILTER_SHIFT 7
+
+;void vp9_post_proc_down_and_across_mmx
+;(
+; unsigned char *src_ptr,
+; unsigned char *dst_ptr,
+; int src_pixels_per_line,
+; int dst_pixels_per_line,
+; int rows,
+; int cols,
+; int flimit
+;)
+global sym(vp9_post_proc_down_and_across_mmx) PRIVATE
+sym(vp9_post_proc_down_and_across_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+%if ABI_IS_32BIT=1 && CONFIG_PIC=1
+ ; move the global rd onto the stack, since we don't have enough registers
+ ; to do PIC addressing
+ movq mm0, [GLOBAL(rd)]
+ sub rsp, 8
+ movq [rsp], mm0
+%define RD [rsp]
+%else
+%define RD [GLOBAL(rd)]
+%endif
+
+ push rbx
+ lea rbx, [GLOBAL(Blur)]
+ movd mm2, dword ptr arg(6) ;flimit
+ punpcklwd mm2, mm2
+ punpckldq mm2, mm2
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(1) ;dst_ptr
+
+ movsxd rcx, DWORD PTR arg(4) ;rows
+ movsxd rax, DWORD PTR arg(2) ;src_pixels_per_line ; destination pitch?
+ pxor mm0, mm0 ; mm0 = 00000000
+
+.nextrow:
+
+ xor rdx, rdx ; clear out rdx for use as loop counter
+.nextcol:
+
+ pxor mm7, mm7 ; mm7 = 00000000
+ movq mm6, [rbx + 32 ] ; mm6 = kernel 2 taps
+ movq mm3, [rsi] ; mm4 = r0 p0..p7
+ punpcklbw mm3, mm0 ; mm3 = p0..p3
+ movq mm1, mm3 ; mm1 = p0..p3
+ pmullw mm3, mm6 ; mm3 *= kernel 2 modifiers
+
+ movq mm6, [rbx + 48] ; mm6 = kernel 3 taps
+ movq mm5, [rsi + rax] ; mm4 = r1 p0..p7
+ punpcklbw mm5, mm0 ; mm5 = r1 p0..p3
+ pmullw mm6, mm5 ; mm6 *= p0..p3 * kernel 3 modifiers
+ paddusw mm3, mm6 ; mm3 += mm6
+
+ ; thresholding
+ movq mm7, mm1 ; mm7 = r0 p0..p3
+ psubusw mm7, mm5 ; mm7 = r0 p0..p3 - r1 p0..p3
+ psubusw mm5, mm1 ; mm5 = r1 p0..p3 - r0 p0..p3
+ paddusw mm7, mm5 ; mm7 = abs(r0 p0..p3 - r1 p0..p3)
+ pcmpgtw mm7, mm2
+
+ movq mm6, [rbx + 64 ] ; mm6 = kernel 4 modifiers
+ movq mm5, [rsi + 2*rax] ; mm4 = r2 p0..p7
+ punpcklbw mm5, mm0 ; mm5 = r2 p0..p3
+ pmullw mm6, mm5 ; mm5 *= kernel 4 modifiers
+ paddusw mm3, mm6 ; mm3 += mm5
+
+ ; thresholding
+ movq mm6, mm1 ; mm6 = r0 p0..p3
+ psubusw mm6, mm5 ; mm6 = r0 p0..p3 - r2 p0..p3
+ psubusw mm5, mm1 ; mm5 = r2 p0..p3 - r2 p0..p3
+ paddusw mm6, mm5 ; mm6 = abs(r0 p0..p3 - r2 p0..p3)
+ pcmpgtw mm6, mm2
+ por mm7, mm6 ; accumulate thresholds
+
+
+ neg rax
+ movq mm6, [rbx ] ; kernel 0 taps
+ movq mm5, [rsi+2*rax] ; mm4 = r-2 p0..p7
+ punpcklbw mm5, mm0 ; mm5 = r-2 p0..p3
+ pmullw mm6, mm5 ; mm5 *= kernel 0 modifiers
+ paddusw mm3, mm6 ; mm3 += mm5
+
+ ; thresholding
+ movq mm6, mm1 ; mm6 = r0 p0..p3
+ psubusw mm6, mm5 ; mm6 = p0..p3 - r-2 p0..p3
+ psubusw mm5, mm1 ; mm5 = r-2 p0..p3 - p0..p3
+ paddusw mm6, mm5 ; mm6 = abs(r0 p0..p3 - r-2 p0..p3)
+ pcmpgtw mm6, mm2
+ por mm7, mm6 ; accumulate thresholds
+
+ movq mm6, [rbx + 16] ; kernel 1 taps
+ movq mm4, [rsi+rax] ; mm4 = r-1 p0..p7
+ punpcklbw mm4, mm0 ; mm4 = r-1 p0..p3
+ pmullw mm6, mm4 ; mm4 *= kernel 1 modifiers.
+ paddusw mm3, mm6 ; mm3 += mm5
+
+ ; thresholding
+ movq mm6, mm1 ; mm6 = r0 p0..p3
+ psubusw mm6, mm4 ; mm6 = p0..p3 - r-2 p0..p3
+ psubusw mm4, mm1 ; mm5 = r-1 p0..p3 - p0..p3
+ paddusw mm6, mm4 ; mm6 = abs(r0 p0..p3 - r-1 p0..p3)
+ pcmpgtw mm6, mm2
+ por mm7, mm6 ; accumulate thresholds
+
+
+ paddusw mm3, RD ; mm3 += round value
+ psraw mm3, VP9_FILTER_SHIFT ; mm3 /= 128
+
+ pand mm1, mm7 ; mm1 select vals > thresh from source
+ pandn mm7, mm3 ; mm7 select vals < thresh from blurred result
+ paddusw mm1, mm7 ; combination
+
+ packuswb mm1, mm0 ; pack to bytes
+
+ movd [rdi], mm1 ;
+ neg rax ; pitch is positive
+
+
+ add rsi, 4
+ add rdi, 4
+ add rdx, 4
+
+ cmp edx, dword ptr arg(5) ;cols
+ jl .nextcol
+ ; done with the all cols, start the across filtering in place
+ sub rsi, rdx
+ sub rdi, rdx
+
+
+ push rax
+ xor rdx, rdx
+ mov rax, [rdi-4];
+
+.acrossnextcol:
+ pxor mm7, mm7 ; mm7 = 00000000
+ movq mm6, [rbx + 32 ] ;
+ movq mm4, [rdi+rdx] ; mm4 = p0..p7
+ movq mm3, mm4 ; mm3 = p0..p7
+ punpcklbw mm3, mm0 ; mm3 = p0..p3
+ movq mm1, mm3 ; mm1 = p0..p3
+ pmullw mm3, mm6 ; mm3 *= kernel 2 modifiers
+
+ movq mm6, [rbx + 48]
+ psrlq mm4, 8 ; mm4 = p1..p7
+ movq mm5, mm4 ; mm5 = p1..p7
+ punpcklbw mm5, mm0 ; mm5 = p1..p4
+ pmullw mm6, mm5 ; mm6 *= p1..p4 * kernel 3 modifiers
+ paddusw mm3, mm6 ; mm3 += mm6
+
+ ; thresholding
+ movq mm7, mm1 ; mm7 = p0..p3
+ psubusw mm7, mm5 ; mm7 = p0..p3 - p1..p4
+ psubusw mm5, mm1 ; mm5 = p1..p4 - p0..p3
+ paddusw mm7, mm5 ; mm7 = abs(p0..p3 - p1..p4)
+ pcmpgtw mm7, mm2
+
+ movq mm6, [rbx + 64 ]
+ psrlq mm4, 8 ; mm4 = p2..p7
+ movq mm5, mm4 ; mm5 = p2..p7
+ punpcklbw mm5, mm0 ; mm5 = p2..p5
+ pmullw mm6, mm5 ; mm5 *= kernel 4 modifiers
+ paddusw mm3, mm6 ; mm3 += mm5
+
+ ; thresholding
+ movq mm6, mm1 ; mm6 = p0..p3
+ psubusw mm6, mm5 ; mm6 = p0..p3 - p1..p4
+ psubusw mm5, mm1 ; mm5 = p1..p4 - p0..p3
+ paddusw mm6, mm5 ; mm6 = abs(p0..p3 - p1..p4)
+ pcmpgtw mm6, mm2
+ por mm7, mm6 ; accumulate thresholds
+
+
+ movq mm6, [rbx ]
+ movq mm4, [rdi+rdx-2] ; mm4 = p-2..p5
+ movq mm5, mm4 ; mm5 = p-2..p5
+ punpcklbw mm5, mm0 ; mm5 = p-2..p1
+ pmullw mm6, mm5 ; mm5 *= kernel 0 modifiers
+ paddusw mm3, mm6 ; mm3 += mm5
+
+ ; thresholding
+ movq mm6, mm1 ; mm6 = p0..p3
+ psubusw mm6, mm5 ; mm6 = p0..p3 - p1..p4
+ psubusw mm5, mm1 ; mm5 = p1..p4 - p0..p3
+ paddusw mm6, mm5 ; mm6 = abs(p0..p3 - p1..p4)
+ pcmpgtw mm6, mm2
+ por mm7, mm6 ; accumulate thresholds
+
+ movq mm6, [rbx + 16]
+ psrlq mm4, 8 ; mm4 = p-1..p5
+ punpcklbw mm4, mm0 ; mm4 = p-1..p2
+ pmullw mm6, mm4 ; mm4 *= kernel 1 modifiers.
+ paddusw mm3, mm6 ; mm3 += mm5
+
+ ; thresholding
+ movq mm6, mm1 ; mm6 = p0..p3
+ psubusw mm6, mm4 ; mm6 = p0..p3 - p1..p4
+ psubusw mm4, mm1 ; mm5 = p1..p4 - p0..p3
+ paddusw mm6, mm4 ; mm6 = abs(p0..p3 - p1..p4)
+ pcmpgtw mm6, mm2
+ por mm7, mm6 ; accumulate thresholds
+
+ paddusw mm3, RD ; mm3 += round value
+ psraw mm3, VP9_FILTER_SHIFT ; mm3 /= 128
+
+ pand mm1, mm7 ; mm1 select vals > thresh from source
+ pandn mm7, mm3 ; mm7 select vals < thresh from blurred result
+ paddusw mm1, mm7 ; combination
+
+ packuswb mm1, mm0 ; pack to bytes
+ mov DWORD PTR [rdi+rdx-4], eax ; store previous four bytes
+ movd eax, mm1
+
+ add rdx, 4
+ cmp edx, dword ptr arg(5) ;cols
+ jl .acrossnextcol;
+
+ mov DWORD PTR [rdi+rdx-4], eax
+ pop rax
+
+ ; done with this rwo
+ add rsi,rax ; next line
+ movsxd rax, dword ptr arg(3) ;dst_pixels_per_line ; destination pitch?
+ add rdi,rax ; next destination
+ movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; destination pitch?
+
+ dec rcx ; decrement count
+ jnz .nextrow ; next row
+ pop rbx
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+%undef RD
+
+
+;void vp9_mbpost_proc_down_mmx(unsigned char *dst,
+; int pitch, int rows, int cols,int flimit)
+extern sym(vp9_rv)
+global sym(vp9_mbpost_proc_down_mmx) PRIVATE
+sym(vp9_mbpost_proc_down_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 136
+
+ ; unsigned char d[16][8] at [rsp]
+ ; create flimit2 at [rsp+128]
+ mov eax, dword ptr arg(4) ;flimit
+ mov [rsp+128], eax
+ mov [rsp+128+4], eax
+%define flimit2 [rsp+128]
+
+%if ABI_IS_32BIT=0
+ lea r8, [GLOBAL(sym(vp9_rv))]
+%endif
+
+ ;rows +=8;
+ add dword ptr arg(2), 8
+
+ ;for(c=0; c<cols; c+=4)
+.loop_col:
+ mov rsi, arg(0) ;s
+ pxor mm0, mm0 ;
+
+ movsxd rax, dword ptr arg(1) ;pitch ;
+ neg rax ; rax = -pitch
+
+ lea rsi, [rsi + rax*8]; ; rdi = s[-pitch*8]
+ neg rax
+
+
+ pxor mm5, mm5
+ pxor mm6, mm6 ;
+
+ pxor mm7, mm7 ;
+ mov rdi, rsi
+
+ mov rcx, 15 ;
+
+.loop_initvar:
+ movd mm1, DWORD PTR [rdi];
+ punpcklbw mm1, mm0 ;
+
+ paddw mm5, mm1 ;
+ pmullw mm1, mm1 ;
+
+ movq mm2, mm1 ;
+ punpcklwd mm1, mm0 ;
+
+ punpckhwd mm2, mm0 ;
+ paddd mm6, mm1 ;
+
+ paddd mm7, mm2 ;
+ lea rdi, [rdi+rax] ;
+
+ dec rcx
+ jne .loop_initvar
+ ;save the var and sum
+ xor rdx, rdx
+.loop_row:
+ movd mm1, DWORD PTR [rsi] ; [s-pitch*8]
+ movd mm2, DWORD PTR [rdi] ; [s+pitch*7]
+
+ punpcklbw mm1, mm0
+ punpcklbw mm2, mm0
+
+ paddw mm5, mm2
+ psubw mm5, mm1
+
+ pmullw mm2, mm2
+ movq mm4, mm2
+
+ punpcklwd mm2, mm0
+ punpckhwd mm4, mm0
+
+ paddd mm6, mm2
+ paddd mm7, mm4
+
+ pmullw mm1, mm1
+ movq mm2, mm1
+
+ punpcklwd mm1, mm0
+ psubd mm6, mm1
+
+ punpckhwd mm2, mm0
+ psubd mm7, mm2
+
+
+ movq mm3, mm6
+ pslld mm3, 4
+
+ psubd mm3, mm6
+ movq mm1, mm5
+
+ movq mm4, mm5
+ pmullw mm1, mm1
+
+ pmulhw mm4, mm4
+ movq mm2, mm1
+
+ punpcklwd mm1, mm4
+ punpckhwd mm2, mm4
+
+ movq mm4, mm7
+ pslld mm4, 4
+
+ psubd mm4, mm7
+
+ psubd mm3, mm1
+ psubd mm4, mm2
+
+ psubd mm3, flimit2
+ psubd mm4, flimit2
+
+ psrad mm3, 31
+ psrad mm4, 31
+
+ packssdw mm3, mm4
+ packsswb mm3, mm0
+
+ movd mm1, DWORD PTR [rsi+rax*8]
+
+ movq mm2, mm1
+ punpcklbw mm1, mm0
+
+ paddw mm1, mm5
+ mov rcx, rdx
+
+ and rcx, 127
+%if ABI_IS_32BIT=1 && CONFIG_PIC=1
+ push rax
+ lea rax, [GLOBAL(sym(vp9_rv))]
+ movq mm4, [rax + rcx*2] ;vp9_rv[rcx*2]
+ pop rax
+%elif ABI_IS_32BIT=0
+ movq mm4, [r8 + rcx*2] ;vp9_rv[rcx*2]
+%else
+ movq mm4, [sym(vp9_rv) + rcx*2]
+%endif
+ paddw mm1, mm4
+ ;paddw xmm1, eight8s
+ psraw mm1, 4
+
+ packuswb mm1, mm0
+ pand mm1, mm3
+
+ pandn mm3, mm2
+ por mm1, mm3
+
+ and rcx, 15
+ movd DWORD PTR [rsp+rcx*4], mm1 ;d[rcx*4]
+
+ mov rcx, rdx
+ sub rcx, 8
+
+ and rcx, 15
+ movd mm1, DWORD PTR [rsp+rcx*4] ;d[rcx*4]
+
+ movd [rsi], mm1
+ lea rsi, [rsi+rax]
+
+ lea rdi, [rdi+rax]
+ add rdx, 1
+
+ cmp edx, dword arg(2) ;rows
+ jl .loop_row
+
+
+ add dword arg(0), 4 ; s += 4
+ sub dword arg(3), 4 ; cols -= 4
+ cmp dword arg(3), 0
+ jg .loop_col
+
+ add rsp, 136
+ pop rsp
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+%undef flimit2
+
+
+;void vp9_plane_add_noise_mmx (unsigned char *Start, unsigned char *noise,
+; unsigned char blackclamp[16],
+; unsigned char whiteclamp[16],
+; unsigned char bothclamp[16],
+; unsigned int Width, unsigned int Height, int Pitch)
+extern sym(rand)
+global sym(vp9_plane_add_noise_mmx) PRIVATE
+sym(vp9_plane_add_noise_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 8
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+.addnoise_loop:
+ call sym(rand) WRT_PLT
+ mov rcx, arg(1) ;noise
+ and rax, 0xff
+ add rcx, rax
+
+ ; we rely on the fact that the clamping vectors are stored contiguously
+ ; in black/white/both order. Note that we have to reload this here because
+ ; rdx could be trashed by rand()
+ mov rdx, arg(2) ; blackclamp
+
+
+ mov rdi, rcx
+ movsxd rcx, dword arg(5) ;[Width]
+ mov rsi, arg(0) ;Pos
+ xor rax,rax
+
+.addnoise_nextset:
+ movq mm1,[rsi+rax] ; get the source
+
+ psubusb mm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
+ paddusb mm1, [rdx+32] ;bothclamp
+ psubusb mm1, [rdx+16] ;whiteclamp
+
+ movq mm2,[rdi+rax] ; get the noise for this line
+ paddb mm1,mm2 ; add it in
+ movq [rsi+rax],mm1 ; store the result
+
+ add rax,8 ; move to the next line
+
+ cmp rax, rcx
+ jl .addnoise_nextset
+
+ movsxd rax, dword arg(7) ; Pitch
+ add arg(0), rax ; Start += Pitch
+ sub dword arg(6), 1 ; Height -= 1
+ jg .addnoise_loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+SECTION_RODATA
+align 16
+Blur:
+ times 16 dw 16
+ times 8 dw 64
+ times 16 dw 16
+ times 8 dw 0
+
+rd:
+ times 4 dw 0x40
diff --git a/vp9/common/x86/vp9_postproc_sse2.asm b/vp9/common/x86/vp9_postproc_sse2.asm
new file mode 100644
index 0000000..8bbb379
--- /dev/null
+++ b/vp9/common/x86/vp9_postproc_sse2.asm
@@ -0,0 +1,695 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;void vp9_post_proc_down_and_across_xmm
+;(
+; unsigned char *src_ptr,
+; unsigned char *dst_ptr,
+; int src_pixels_per_line,
+; int dst_pixels_per_line,
+; int rows,
+; int cols,
+; int flimit
+;)
+global sym(vp9_post_proc_down_and_across_xmm) PRIVATE
+sym(vp9_post_proc_down_and_across_xmm):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+%if ABI_IS_32BIT=1 && CONFIG_PIC=1
+ ALIGN_STACK 16, rax
+ ; move the global rd onto the stack, since we don't have enough registers
+ ; to do PIC addressing
+ movdqa xmm0, [GLOBAL(rd42)]
+ sub rsp, 16
+ movdqa [rsp], xmm0
+%define RD42 [rsp]
+%else
+%define RD42 [GLOBAL(rd42)]
+%endif
+
+
+ movd xmm2, dword ptr arg(6) ;flimit
+ punpcklwd xmm2, xmm2
+ punpckldq xmm2, xmm2
+ punpcklqdq xmm2, xmm2
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(1) ;dst_ptr
+
+ movsxd rcx, DWORD PTR arg(4) ;rows
+ movsxd rax, DWORD PTR arg(2) ;src_pixels_per_line ; destination pitch?
+ pxor xmm0, xmm0 ; mm0 = 00000000
+
+.nextrow:
+
+ xor rdx, rdx ; clear out rdx for use as loop counter
+.nextcol:
+ movq xmm3, QWORD PTR [rsi] ; mm4 = r0 p0..p7
+ punpcklbw xmm3, xmm0 ; mm3 = p0..p3
+ movdqa xmm1, xmm3 ; mm1 = p0..p3
+ psllw xmm3, 2 ;
+
+ movq xmm5, QWORD PTR [rsi + rax] ; mm4 = r1 p0..p7
+ punpcklbw xmm5, xmm0 ; mm5 = r1 p0..p3
+ paddusw xmm3, xmm5 ; mm3 += mm6
+
+ ; thresholding
+ movdqa xmm7, xmm1 ; mm7 = r0 p0..p3
+ psubusw xmm7, xmm5 ; mm7 = r0 p0..p3 - r1 p0..p3
+ psubusw xmm5, xmm1 ; mm5 = r1 p0..p3 - r0 p0..p3
+ paddusw xmm7, xmm5 ; mm7 = abs(r0 p0..p3 - r1 p0..p3)
+ pcmpgtw xmm7, xmm2
+
+ movq xmm5, QWORD PTR [rsi + 2*rax] ; mm4 = r2 p0..p7
+ punpcklbw xmm5, xmm0 ; mm5 = r2 p0..p3
+ paddusw xmm3, xmm5 ; mm3 += mm5
+
+ ; thresholding
+ movdqa xmm6, xmm1 ; mm6 = r0 p0..p3
+ psubusw xmm6, xmm5 ; mm6 = r0 p0..p3 - r2 p0..p3
+ psubusw xmm5, xmm1 ; mm5 = r2 p0..p3 - r2 p0..p3
+ paddusw xmm6, xmm5 ; mm6 = abs(r0 p0..p3 - r2 p0..p3)
+ pcmpgtw xmm6, xmm2
+ por xmm7, xmm6 ; accumulate thresholds
+
+
+ neg rax
+ movq xmm5, QWORD PTR [rsi+2*rax] ; mm4 = r-2 p0..p7
+ punpcklbw xmm5, xmm0 ; mm5 = r-2 p0..p3
+ paddusw xmm3, xmm5 ; mm3 += mm5
+
+ ; thresholding
+ movdqa xmm6, xmm1 ; mm6 = r0 p0..p3
+ psubusw xmm6, xmm5 ; mm6 = p0..p3 - r-2 p0..p3
+ psubusw xmm5, xmm1 ; mm5 = r-2 p0..p3 - p0..p3
+ paddusw xmm6, xmm5 ; mm6 = abs(r0 p0..p3 - r-2 p0..p3)
+ pcmpgtw xmm6, xmm2
+ por xmm7, xmm6 ; accumulate thresholds
+
+ movq xmm4, QWORD PTR [rsi+rax] ; mm4 = r-1 p0..p7
+ punpcklbw xmm4, xmm0 ; mm4 = r-1 p0..p3
+ paddusw xmm3, xmm4 ; mm3 += mm5
+
+ ; thresholding
+ movdqa xmm6, xmm1 ; mm6 = r0 p0..p3
+ psubusw xmm6, xmm4 ; mm6 = p0..p3 - r-2 p0..p3
+ psubusw xmm4, xmm1 ; mm5 = r-1 p0..p3 - p0..p3
+ paddusw xmm6, xmm4 ; mm6 = abs(r0 p0..p3 - r-1 p0..p3)
+ pcmpgtw xmm6, xmm2
+ por xmm7, xmm6 ; accumulate thresholds
+
+
+ paddusw xmm3, RD42 ; mm3 += round value
+ psraw xmm3, 3 ; mm3 /= 8
+
+ pand xmm1, xmm7 ; mm1 select vals > thresh from source
+ pandn xmm7, xmm3 ; mm7 select vals < thresh from blurred result
+ paddusw xmm1, xmm7 ; combination
+
+ packuswb xmm1, xmm0 ; pack to bytes
+ movq QWORD PTR [rdi], xmm1 ;
+
+ neg rax ; pitch is positive
+ add rsi, 8
+ add rdi, 8
+
+ add rdx, 8
+ cmp edx, dword arg(5) ;cols
+
+ jl .nextcol
+
+ ; done with the all cols, start the across filtering in place
+ sub rsi, rdx
+ sub rdi, rdx
+
+ xor rdx, rdx
+ movq mm0, QWORD PTR [rdi-8];
+
+.acrossnextcol:
+ movq xmm7, QWORD PTR [rdi +rdx -2]
+ movd xmm4, DWORD PTR [rdi +rdx +6]
+
+ pslldq xmm4, 8
+ por xmm4, xmm7
+
+ movdqa xmm3, xmm4
+ psrldq xmm3, 2
+ punpcklbw xmm3, xmm0 ; mm3 = p0..p3
+ movdqa xmm1, xmm3 ; mm1 = p0..p3
+ psllw xmm3, 2
+
+
+ movdqa xmm5, xmm4
+ psrldq xmm5, 3
+ punpcklbw xmm5, xmm0 ; mm5 = p1..p4
+ paddusw xmm3, xmm5 ; mm3 += mm6
+
+ ; thresholding
+ movdqa xmm7, xmm1 ; mm7 = p0..p3
+ psubusw xmm7, xmm5 ; mm7 = p0..p3 - p1..p4
+ psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3
+ paddusw xmm7, xmm5 ; mm7 = abs(p0..p3 - p1..p4)
+ pcmpgtw xmm7, xmm2
+
+ movdqa xmm5, xmm4
+ psrldq xmm5, 4
+ punpcklbw xmm5, xmm0 ; mm5 = p2..p5
+ paddusw xmm3, xmm5 ; mm3 += mm5
+
+ ; thresholding
+ movdqa xmm6, xmm1 ; mm6 = p0..p3
+ psubusw xmm6, xmm5 ; mm6 = p0..p3 - p1..p4
+ psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3
+ paddusw xmm6, xmm5 ; mm6 = abs(p0..p3 - p1..p4)
+ pcmpgtw xmm6, xmm2
+ por xmm7, xmm6 ; accumulate thresholds
+
+
+ movdqa xmm5, xmm4 ; mm5 = p-2..p5
+ punpcklbw xmm5, xmm0 ; mm5 = p-2..p1
+ paddusw xmm3, xmm5 ; mm3 += mm5
+
+ ; thresholding
+ movdqa xmm6, xmm1 ; mm6 = p0..p3
+ psubusw xmm6, xmm5 ; mm6 = p0..p3 - p1..p4
+ psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3
+ paddusw xmm6, xmm5 ; mm6 = abs(p0..p3 - p1..p4)
+ pcmpgtw xmm6, xmm2
+ por xmm7, xmm6 ; accumulate thresholds
+
+ psrldq xmm4, 1 ; mm4 = p-1..p5
+ punpcklbw xmm4, xmm0 ; mm4 = p-1..p2
+ paddusw xmm3, xmm4 ; mm3 += mm5
+
+ ; thresholding
+ movdqa xmm6, xmm1 ; mm6 = p0..p3
+ psubusw xmm6, xmm4 ; mm6 = p0..p3 - p1..p4
+ psubusw xmm4, xmm1 ; mm5 = p1..p4 - p0..p3
+ paddusw xmm6, xmm4 ; mm6 = abs(p0..p3 - p1..p4)
+ pcmpgtw xmm6, xmm2
+ por xmm7, xmm6 ; accumulate thresholds
+
+ paddusw xmm3, RD42 ; mm3 += round value
+ psraw xmm3, 3 ; mm3 /= 8
+
+ pand xmm1, xmm7 ; mm1 select vals > thresh from source
+ pandn xmm7, xmm3 ; mm7 select vals < thresh from blurred result
+ paddusw xmm1, xmm7 ; combination
+
+ packuswb xmm1, xmm0 ; pack to bytes
+ movq QWORD PTR [rdi+rdx-8], mm0 ; store previous four bytes
+ movdq2q mm0, xmm1
+
+ add rdx, 8
+ cmp edx, dword arg(5) ;cols
+ jl .acrossnextcol;
+
+ ; last 8 pixels
+ movq QWORD PTR [rdi+rdx-8], mm0
+
+ ; done with this rwo
+ add rsi,rax ; next line
+ mov eax, dword arg(3) ;dst_pixels_per_line ; destination pitch?
+ add rdi,rax ; next destination
+ mov eax, dword arg(2) ;src_pixels_per_line ; destination pitch?
+
+ dec rcx ; decrement count
+ jnz .nextrow ; next row
+
+%if ABI_IS_32BIT=1 && CONFIG_PIC=1
+ add rsp,16
+ pop rsp
+%endif
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+%undef RD42
+
+
+;void vp9_mbpost_proc_down_xmm(unsigned char *dst,
+; int pitch, int rows, int cols,int flimit)
+extern sym(vp9_rv)
+global sym(vp9_mbpost_proc_down_xmm) PRIVATE
+sym(vp9_mbpost_proc_down_xmm):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 128+16
+
+ ; unsigned char d[16][8] at [rsp]
+ ; create flimit2 at [rsp+128]
+ mov eax, dword ptr arg(4) ;flimit
+ mov [rsp+128], eax
+ mov [rsp+128+4], eax
+ mov [rsp+128+8], eax
+ mov [rsp+128+12], eax
+%define flimit4 [rsp+128]
+
+%if ABI_IS_32BIT=0
+ lea r8, [GLOBAL(sym(vp9_rv))]
+%endif
+
+ ;rows +=8;
+ add dword arg(2), 8
+
+ ;for(c=0; c<cols; c+=8)
+.loop_col:
+ mov rsi, arg(0) ; s
+ pxor xmm0, xmm0 ;
+
+ movsxd rax, dword ptr arg(1) ;pitch ;
+ neg rax ; rax = -pitch
+
+ lea rsi, [rsi + rax*8]; ; rdi = s[-pitch*8]
+ neg rax
+
+
+ pxor xmm5, xmm5
+ pxor xmm6, xmm6 ;
+
+ pxor xmm7, xmm7 ;
+ mov rdi, rsi
+
+ mov rcx, 15 ;
+
+.loop_initvar:
+ movq xmm1, QWORD PTR [rdi];
+ punpcklbw xmm1, xmm0 ;
+
+ paddw xmm5, xmm1 ;
+ pmullw xmm1, xmm1 ;
+
+ movdqa xmm2, xmm1 ;
+ punpcklwd xmm1, xmm0 ;
+
+ punpckhwd xmm2, xmm0 ;
+ paddd xmm6, xmm1 ;
+
+ paddd xmm7, xmm2 ;
+ lea rdi, [rdi+rax] ;
+
+ dec rcx
+ jne .loop_initvar
+ ;save the var and sum
+ xor rdx, rdx
+.loop_row:
+ movq xmm1, QWORD PTR [rsi] ; [s-pitch*8]
+ movq xmm2, QWORD PTR [rdi] ; [s+pitch*7]
+
+ punpcklbw xmm1, xmm0
+ punpcklbw xmm2, xmm0
+
+ paddw xmm5, xmm2
+ psubw xmm5, xmm1
+
+ pmullw xmm2, xmm2
+ movdqa xmm4, xmm2
+
+ punpcklwd xmm2, xmm0
+ punpckhwd xmm4, xmm0
+
+ paddd xmm6, xmm2
+ paddd xmm7, xmm4
+
+ pmullw xmm1, xmm1
+ movdqa xmm2, xmm1
+
+ punpcklwd xmm1, xmm0
+ psubd xmm6, xmm1
+
+ punpckhwd xmm2, xmm0
+ psubd xmm7, xmm2
+
+
+ movdqa xmm3, xmm6
+ pslld xmm3, 4
+
+ psubd xmm3, xmm6
+ movdqa xmm1, xmm5
+
+ movdqa xmm4, xmm5
+ pmullw xmm1, xmm1
+
+ pmulhw xmm4, xmm4
+ movdqa xmm2, xmm1
+
+ punpcklwd xmm1, xmm4
+ punpckhwd xmm2, xmm4
+
+ movdqa xmm4, xmm7
+ pslld xmm4, 4
+
+ psubd xmm4, xmm7
+
+ psubd xmm3, xmm1
+ psubd xmm4, xmm2
+
+ psubd xmm3, flimit4
+ psubd xmm4, flimit4
+
+ psrad xmm3, 31
+ psrad xmm4, 31
+
+ packssdw xmm3, xmm4
+ packsswb xmm3, xmm0
+
+ movq xmm1, QWORD PTR [rsi+rax*8]
+
+ movq xmm2, xmm1
+ punpcklbw xmm1, xmm0
+
+ paddw xmm1, xmm5
+ mov rcx, rdx
+
+ and rcx, 127
+%if ABI_IS_32BIT=1 && CONFIG_PIC=1
+ push rax
+ lea rax, [GLOBAL(sym(vp9_rv))]
+ movdqu xmm4, [rax + rcx*2] ;vp9_rv[rcx*2]
+ pop rax
+%elif ABI_IS_32BIT=0
+ movdqu xmm4, [r8 + rcx*2] ;vp9_rv[rcx*2]
+%else
+ movdqu xmm4, [sym(vp9_rv) + rcx*2]
+%endif
+
+ paddw xmm1, xmm4
+ ;paddw xmm1, eight8s
+ psraw xmm1, 4
+
+ packuswb xmm1, xmm0
+ pand xmm1, xmm3
+
+ pandn xmm3, xmm2
+ por xmm1, xmm3
+
+ and rcx, 15
+ movq QWORD PTR [rsp + rcx*8], xmm1 ;d[rcx*8]
+
+ mov rcx, rdx
+ sub rcx, 8
+
+ and rcx, 15
+ movq mm0, [rsp + rcx*8] ;d[rcx*8]
+
+ movq [rsi], mm0
+ lea rsi, [rsi+rax]
+
+ lea rdi, [rdi+rax]
+ add rdx, 1
+
+ cmp edx, dword arg(2) ;rows
+ jl .loop_row
+
+ add dword arg(0), 8 ; s += 8
+ sub dword arg(3), 8 ; cols -= 8
+ cmp dword arg(3), 0
+ jg .loop_col
+
+ add rsp, 128+16
+ pop rsp
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+%undef flimit4
+
+
+;void vp9_mbpost_proc_across_ip_xmm(unsigned char *src,
+; int pitch, int rows, int cols,int flimit)
+global sym(vp9_mbpost_proc_across_ip_xmm) PRIVATE
+sym(vp9_mbpost_proc_across_ip_xmm):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 16
+
+ ; create flimit4 at [rsp]
+ mov eax, dword ptr arg(4) ;flimit
+ mov [rsp], eax
+ mov [rsp+4], eax
+ mov [rsp+8], eax
+ mov [rsp+12], eax
+%define flimit4 [rsp]
+
+
+ ;for(r=0;r<rows;r++)
+.ip_row_loop:
+
+ xor rdx, rdx ;sumsq=0;
+ xor rcx, rcx ;sum=0;
+ mov rsi, arg(0); s
+ mov rdi, -8
+.ip_var_loop:
+ ;for(i=-8;i<=6;i++)
+ ;{
+ ; sumsq += s[i]*s[i];
+ ; sum += s[i];
+ ;}
+ movzx eax, byte [rsi+rdi]
+ add ecx, eax
+ mul al
+ add edx, eax
+ add rdi, 1
+ cmp rdi, 6
+ jle .ip_var_loop
+
+
+ ;mov rax, sumsq
+ ;movd xmm7, rax
+ movd xmm7, edx
+
+ ;mov rax, sum
+ ;movd xmm6, rax
+ movd xmm6, ecx
+
+ mov rsi, arg(0) ;s
+ xor rcx, rcx
+
+ movsxd rdx, dword arg(3) ;cols
+ add rdx, 8
+ pxor mm0, mm0
+ pxor mm1, mm1
+
+ pxor xmm0, xmm0
+.nextcol4:
+
+ movd xmm1, DWORD PTR [rsi+rcx-8] ; -8 -7 -6 -5
+ movd xmm2, DWORD PTR [rsi+rcx+7] ; +7 +8 +9 +10
+
+ punpcklbw xmm1, xmm0 ; expanding
+ punpcklbw xmm2, xmm0 ; expanding
+
+ punpcklwd xmm1, xmm0 ; expanding to dwords
+ punpcklwd xmm2, xmm0 ; expanding to dwords
+
+ psubd xmm2, xmm1 ; 7--8 8--7 9--6 10--5
+ paddd xmm1, xmm1 ; -8*2 -7*2 -6*2 -5*2
+
+ paddd xmm1, xmm2 ; 7+-8 8+-7 9+-6 10+-5
+ pmaddwd xmm1, xmm2 ; squared of 7+-8 8+-7 9+-6 10+-5
+
+ paddd xmm6, xmm2
+ paddd xmm7, xmm1
+
+ pshufd xmm6, xmm6, 0 ; duplicate the last ones
+ pshufd xmm7, xmm7, 0 ; duplicate the last ones
+
+ psrldq xmm1, 4 ; 8--7 9--6 10--5 0000
+ psrldq xmm2, 4 ; 8--7 9--6 10--5 0000
+
+ pshufd xmm3, xmm1, 3 ; 0000 8--7 8--7 8--7 squared
+ pshufd xmm4, xmm2, 3 ; 0000 8--7 8--7 8--7 squared
+
+ paddd xmm6, xmm4
+ paddd xmm7, xmm3
+
+ pshufd xmm3, xmm1, 01011111b ; 0000 0000 9--6 9--6 squared
+ pshufd xmm4, xmm2, 01011111b ; 0000 0000 9--6 9--6 squared
+
+ paddd xmm7, xmm3
+ paddd xmm6, xmm4
+
+ pshufd xmm3, xmm1, 10111111b ; 0000 0000 8--7 8--7 squared
+ pshufd xmm4, xmm2, 10111111b ; 0000 0000 8--7 8--7 squared
+
+ paddd xmm7, xmm3
+ paddd xmm6, xmm4
+
+ movdqa xmm3, xmm6
+ pmaddwd xmm3, xmm3
+
+ movdqa xmm5, xmm7
+ pslld xmm5, 4
+
+ psubd xmm5, xmm7
+ psubd xmm5, xmm3
+
+ psubd xmm5, flimit4
+ psrad xmm5, 31
+
+ packssdw xmm5, xmm0
+ packsswb xmm5, xmm0
+
+ movd xmm1, DWORD PTR [rsi+rcx]
+ movq xmm2, xmm1
+
+ punpcklbw xmm1, xmm0
+ punpcklwd xmm1, xmm0
+
+ paddd xmm1, xmm6
+ paddd xmm1, [GLOBAL(four8s)]
+
+ psrad xmm1, 4
+ packssdw xmm1, xmm0
+
+ packuswb xmm1, xmm0
+ pand xmm1, xmm5
+
+ pandn xmm5, xmm2
+ por xmm5, xmm1
+
+ movd [rsi+rcx-8], mm0
+ movq mm0, mm1
+
+ movdq2q mm1, xmm5
+ psrldq xmm7, 12
+
+ psrldq xmm6, 12
+ add rcx, 4
+
+ cmp rcx, rdx
+ jl .nextcol4
+
+ ;s+=pitch;
+ movsxd rax, dword arg(1)
+ add arg(0), rax
+
+ sub dword arg(2), 1 ;rows-=1
+ cmp dword arg(2), 0
+ jg .ip_row_loop
+
+ add rsp, 16
+ pop rsp
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+%undef flimit4
+
+
+;void vp9_plane_add_noise_wmt (unsigned char *Start, unsigned char *noise,
+; unsigned char blackclamp[16],
+; unsigned char whiteclamp[16],
+; unsigned char bothclamp[16],
+; unsigned int Width, unsigned int Height, int Pitch)
+extern sym(rand)
+global sym(vp9_plane_add_noise_wmt) PRIVATE
+sym(vp9_plane_add_noise_wmt):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 8
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+.addnoise_loop:
+ call sym(rand) WRT_PLT
+ mov rcx, arg(1) ;noise
+ and rax, 0xff
+ add rcx, rax
+
+ ; we rely on the fact that the clamping vectors are stored contiguously
+ ; in black/white/both order. Note that we have to reload this here because
+ ; rdx could be trashed by rand()
+ mov rdx, arg(2) ; blackclamp
+
+
+ mov rdi, rcx
+ movsxd rcx, dword arg(5) ;[Width]
+ mov rsi, arg(0) ;Pos
+ xor rax,rax
+
+.addnoise_nextset:
+ movdqu xmm1,[rsi+rax] ; get the source
+
+ psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
+ paddusb xmm1, [rdx+32] ;bothclamp
+ psubusb xmm1, [rdx+16] ;whiteclamp
+
+ movdqu xmm2,[rdi+rax] ; get the noise for this line
+ paddb xmm1,xmm2 ; add it in
+ movdqu [rsi+rax],xmm1 ; store the result
+
+ add rax,16 ; move to the next line
+
+ cmp rax, rcx
+ jl .addnoise_nextset
+
+ movsxd rax, dword arg(7) ; Pitch
+ add arg(0), rax ; Start += Pitch
+ sub dword arg(6), 1 ; Height -= 1
+ jg .addnoise_loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+SECTION_RODATA
+align 16
+rd42:
+ times 8 dw 0x04
+four8s:
+ times 4 dd 8
diff --git a/vp9/common/x86/vp9_postproc_x86.h b/vp9/common/x86/vp9_postproc_x86.h
new file mode 100644
index 0000000..b0e8b18
--- /dev/null
+++ b/vp9/common/x86/vp9_postproc_x86.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_COMMON_X86_VP9_POSTPROC_X86_H_
+#define VP9_COMMON_X86_VP9_POSTPROC_X86_H_
+
+/* Note:
+ *
+ * This platform is commonly built for runtime CPU detection. If you modify
+ * any of the function mappings present in this file, be sure to also update
+ * them in the function pointer initialization code
+ */
+
+#if HAVE_MMX
+extern prototype_postproc_inplace(vp9_mbpost_proc_down_mmx);
+extern prototype_postproc(vp9_post_proc_down_and_across_mmx);
+extern prototype_postproc_addnoise(vp9_plane_add_noise_mmx);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+#undef vp9_postproc_down
+#define vp9_postproc_down vp9_mbpost_proc_down_mmx
+
+#undef vp9_postproc_downacross
+#define vp9_postproc_downacross vp9_post_proc_down_and_across_mmx
+
+#undef vp9_postproc_addnoise
+#define vp9_postproc_addnoise vp9_plane_add_noise_mmx
+
+#endif
+#endif
+
+
+#if HAVE_SSE2
+extern prototype_postproc_inplace(vp9_mbpost_proc_down_xmm);
+extern prototype_postproc_inplace(vp9_mbpost_proc_across_ip_xmm);
+extern prototype_postproc(vp9_post_proc_down_and_across_xmm);
+extern prototype_postproc_addnoise(vp9_plane_add_noise_wmt);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+#undef vp9_postproc_down
+#define vp9_postproc_down vp9_mbpost_proc_down_xmm
+
+#undef vp9_postproc_across
+#define vp9_postproc_across vp9_mbpost_proc_across_ip_xmm
+
+#undef vp9_postproc_downacross
+#define vp9_postproc_downacross vp9_post_proc_down_and_across_xmm
+
+#undef vp9_postproc_addnoise
+#define vp9_postproc_addnoise vp9_plane_add_noise_wmt
+
+
+#endif
+#endif
+
+#endif
diff --git a/vp9/common/x86/vp9_recon_mmx.asm b/vp9/common/x86/vp9_recon_mmx.asm
new file mode 100644
index 0000000..fc03d3f
--- /dev/null
+++ b/vp9/common/x86/vp9_recon_mmx.asm
@@ -0,0 +1,321 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+;void vp9_recon_b_mmx(unsigned char *s, short *q, unsigned char *d, int stride)
+global sym(vp9_recon_b_mmx) PRIVATE
+sym(vp9_recon_b_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;s
+ mov rdi, arg(2) ;d
+ mov rdx, arg(1) ;q
+ movsxd rax, dword ptr arg(3) ;stride
+ pxor mm0, mm0
+
+ movd mm1, [rsi]
+ punpcklbw mm1, mm0
+ paddsw mm1, [rdx]
+ packuswb mm1, mm0 ; pack and unpack to saturate
+ movd [rdi], mm1
+
+ movd mm2, [rsi+16]
+ punpcklbw mm2, mm0
+ paddsw mm2, [rdx+32]
+ packuswb mm2, mm0 ; pack and unpack to saturate
+ movd [rdi+rax], mm2
+
+ movd mm3, [rsi+32]
+ punpcklbw mm3, mm0
+ paddsw mm3, [rdx+64]
+ packuswb mm3, mm0 ; pack and unpack to saturate
+ movd [rdi+2*rax], mm3
+
+ add rdi, rax
+ movd mm4, [rsi+48]
+ punpcklbw mm4, mm0
+ paddsw mm4, [rdx+96]
+ packuswb mm4, mm0 ; pack and unpack to saturate
+ movd [rdi+2*rax], mm4
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void copy_mem8x8_mmx(
+; unsigned char *src,
+; int src_stride,
+; unsigned char *dst,
+; int dst_stride
+; )
+global sym(vp9_copy_mem8x8_mmx) PRIVATE
+sym(vp9_copy_mem8x8_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src;
+ movq mm0, [rsi]
+
+ movsxd rax, dword ptr arg(1) ;src_stride;
+ mov rdi, arg(2) ;dst;
+
+ movq mm1, [rsi+rax]
+ movq mm2, [rsi+rax*2]
+
+ movsxd rcx, dword ptr arg(3) ;dst_stride
+ lea rsi, [rsi+rax*2]
+
+ movq [rdi], mm0
+ add rsi, rax
+
+ movq [rdi+rcx], mm1
+ movq [rdi+rcx*2], mm2
+
+
+ lea rdi, [rdi+rcx*2]
+ movq mm3, [rsi]
+
+ add rdi, rcx
+ movq mm4, [rsi+rax]
+
+ movq mm5, [rsi+rax*2]
+ movq [rdi], mm3
+
+ lea rsi, [rsi+rax*2]
+ movq [rdi+rcx], mm4
+
+ movq [rdi+rcx*2], mm5
+ lea rdi, [rdi+rcx*2]
+
+ movq mm0, [rsi+rax]
+ movq mm1, [rsi+rax*2]
+
+ movq [rdi+rcx], mm0
+ movq [rdi+rcx*2],mm1
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void copy_mem8x4_mmx(
+; unsigned char *src,
+; int src_stride,
+; unsigned char *dst,
+; int dst_stride
+; )
+global sym(vp9_copy_mem8x4_mmx) PRIVATE
+sym(vp9_copy_mem8x4_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src;
+ movq mm0, [rsi]
+
+ movsxd rax, dword ptr arg(1) ;src_stride;
+ mov rdi, arg(2) ;dst;
+
+ movq mm1, [rsi+rax]
+ movq mm2, [rsi+rax*2]
+
+ movsxd rcx, dword ptr arg(3) ;dst_stride
+ lea rsi, [rsi+rax*2]
+
+ movq [rdi], mm0
+ movq [rdi+rcx], mm1
+
+ movq [rdi+rcx*2], mm2
+ lea rdi, [rdi+rcx*2]
+
+ movq mm3, [rsi+rax]
+ movq [rdi+rcx], mm3
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void copy_mem16x16_mmx(
+; unsigned char *src,
+; int src_stride,
+; unsigned char *dst,
+; int dst_stride
+; )
+global sym(vp9_copy_mem16x16_mmx) PRIVATE
+sym(vp9_copy_mem16x16_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src;
+ movsxd rax, dword ptr arg(1) ;src_stride;
+
+ mov rdi, arg(2) ;dst;
+ movsxd rcx, dword ptr arg(3) ;dst_stride
+
+ movq mm0, [rsi]
+ movq mm3, [rsi+8];
+
+ movq mm1, [rsi+rax]
+ movq mm4, [rsi+rax+8]
+
+ movq mm2, [rsi+rax*2]
+ movq mm5, [rsi+rax*2+8]
+
+ lea rsi, [rsi+rax*2]
+ add rsi, rax
+
+ movq [rdi], mm0
+ movq [rdi+8], mm3
+
+ movq [rdi+rcx], mm1
+ movq [rdi+rcx+8], mm4
+
+ movq [rdi+rcx*2], mm2
+ movq [rdi+rcx*2+8], mm5
+
+ lea rdi, [rdi+rcx*2]
+ add rdi, rcx
+
+ movq mm0, [rsi]
+ movq mm3, [rsi+8];
+
+ movq mm1, [rsi+rax]
+ movq mm4, [rsi+rax+8]
+
+ movq mm2, [rsi+rax*2]
+ movq mm5, [rsi+rax*2+8]
+
+ lea rsi, [rsi+rax*2]
+ add rsi, rax
+
+ movq [rdi], mm0
+ movq [rdi+8], mm3
+
+ movq [rdi+rcx], mm1
+ movq [rdi+rcx+8], mm4
+
+ movq [rdi+rcx*2], mm2
+ movq [rdi+rcx*2+8], mm5
+
+ lea rdi, [rdi+rcx*2]
+ add rdi, rcx
+
+ movq mm0, [rsi]
+ movq mm3, [rsi+8];
+
+ movq mm1, [rsi+rax]
+ movq mm4, [rsi+rax+8]
+
+ movq mm2, [rsi+rax*2]
+ movq mm5, [rsi+rax*2+8]
+
+ lea rsi, [rsi+rax*2]
+ add rsi, rax
+
+ movq [rdi], mm0
+ movq [rdi+8], mm3
+
+ movq [rdi+rcx], mm1
+ movq [rdi+rcx+8], mm4
+
+ movq [rdi+rcx*2], mm2
+ movq [rdi+rcx*2+8], mm5
+
+ lea rdi, [rdi+rcx*2]
+ add rdi, rcx
+
+ movq mm0, [rsi]
+ movq mm3, [rsi+8];
+
+ movq mm1, [rsi+rax]
+ movq mm4, [rsi+rax+8]
+
+ movq mm2, [rsi+rax*2]
+ movq mm5, [rsi+rax*2+8]
+
+ lea rsi, [rsi+rax*2]
+ add rsi, rax
+
+ movq [rdi], mm0
+ movq [rdi+8], mm3
+
+ movq [rdi+rcx], mm1
+ movq [rdi+rcx+8], mm4
+
+ movq [rdi+rcx*2], mm2
+ movq [rdi+rcx*2+8], mm5
+
+ lea rdi, [rdi+rcx*2]
+ add rdi, rcx
+
+ movq mm0, [rsi]
+ movq mm3, [rsi+8];
+
+ movq mm1, [rsi+rax]
+ movq mm4, [rsi+rax+8]
+
+ movq mm2, [rsi+rax*2]
+ movq mm5, [rsi+rax*2+8]
+
+ lea rsi, [rsi+rax*2]
+ add rsi, rax
+
+ movq [rdi], mm0
+ movq [rdi+8], mm3
+
+ movq [rdi+rcx], mm1
+ movq [rdi+rcx+8], mm4
+
+ movq [rdi+rcx*2], mm2
+ movq [rdi+rcx*2+8], mm5
+
+ lea rdi, [rdi+rcx*2]
+ add rdi, rcx
+
+ movq mm0, [rsi]
+ movq mm3, [rsi+8];
+
+ movq [rdi], mm0
+ movq [rdi+8], mm3
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
diff --git a/vp9/common/x86/vp9_recon_sse2.asm b/vp9/common/x86/vp9_recon_sse2.asm
new file mode 100644
index 0000000..154442d
--- /dev/null
+++ b/vp9/common/x86/vp9_recon_sse2.asm
@@ -0,0 +1,688 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+;void vp9_recon2b_sse2(unsigned char *s, short *q, unsigned char *d, int stride)
+global sym(vp9_recon2b_sse2) PRIVATE
+sym(vp9_recon2b_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;s
+ mov rdi, arg(2) ;d
+ mov rdx, arg(1) ;q
+ movsxd rax, dword ptr arg(3) ;stride
+ pxor xmm0, xmm0
+
+ movq xmm1, MMWORD PTR [rsi]
+ punpcklbw xmm1, xmm0
+ paddsw xmm1, XMMWORD PTR [rdx]
+ packuswb xmm1, xmm0 ; pack and unpack to saturate
+ movq MMWORD PTR [rdi], xmm1
+
+
+ movq xmm2, MMWORD PTR [rsi+8]
+ punpcklbw xmm2, xmm0
+ paddsw xmm2, XMMWORD PTR [rdx+16]
+ packuswb xmm2, xmm0 ; pack and unpack to saturate
+ movq MMWORD PTR [rdi+rax], xmm2
+
+
+ movq xmm3, MMWORD PTR [rsi+16]
+ punpcklbw xmm3, xmm0
+ paddsw xmm3, XMMWORD PTR [rdx+32]
+ packuswb xmm3, xmm0 ; pack and unpack to saturate
+ movq MMWORD PTR [rdi+rax*2], xmm3
+
+ add rdi, rax
+ movq xmm4, MMWORD PTR [rsi+24]
+ punpcklbw xmm4, xmm0
+ paddsw xmm4, XMMWORD PTR [rdx+48]
+ packuswb xmm4, xmm0 ; pack and unpack to saturate
+ movq MMWORD PTR [rdi+rax*2], xmm4
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_recon4b_sse2(unsigned char *s, short *q, unsigned char *d, int stride)
+global sym(vp9_recon4b_sse2) PRIVATE
+sym(vp9_recon4b_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;s
+ mov rdi, arg(2) ;d
+ mov rdx, arg(1) ;q
+ movsxd rax, dword ptr arg(3) ;stride
+ pxor xmm0, xmm0
+
+ movdqa xmm1, XMMWORD PTR [rsi]
+ movdqa xmm5, xmm1
+ punpcklbw xmm1, xmm0
+ punpckhbw xmm5, xmm0
+ paddsw xmm1, XMMWORD PTR [rdx]
+ paddsw xmm5, XMMWORD PTR [rdx+16]
+ packuswb xmm1, xmm5 ; pack and unpack to saturate
+ movdqa XMMWORD PTR [rdi], xmm1
+
+
+ movdqa xmm2, XMMWORD PTR [rsi+16]
+ movdqa xmm6, xmm2
+ punpcklbw xmm2, xmm0
+ punpckhbw xmm6, xmm0
+ paddsw xmm2, XMMWORD PTR [rdx+32]
+ paddsw xmm6, XMMWORD PTR [rdx+48]
+ packuswb xmm2, xmm6 ; pack and unpack to saturate
+ movdqa XMMWORD PTR [rdi+rax], xmm2
+
+
+ movdqa xmm3, XMMWORD PTR [rsi+32]
+ movdqa xmm7, xmm3
+ punpcklbw xmm3, xmm0
+ punpckhbw xmm7, xmm0
+ paddsw xmm3, XMMWORD PTR [rdx+64]
+ paddsw xmm7, XMMWORD PTR [rdx+80]
+ packuswb xmm3, xmm7 ; pack and unpack to saturate
+ movdqa XMMWORD PTR [rdi+rax*2], xmm3
+
+ add rdi, rax
+ movdqa xmm4, XMMWORD PTR [rsi+48]
+ movdqa xmm5, xmm4
+ punpcklbw xmm4, xmm0
+ punpckhbw xmm5, xmm0
+ paddsw xmm4, XMMWORD PTR [rdx+96]
+ paddsw xmm5, XMMWORD PTR [rdx+112]
+ packuswb xmm4, xmm5 ; pack and unpack to saturate
+ movdqa XMMWORD PTR [rdi+rax*2], xmm4
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void copy_mem16x16_sse2(
+; unsigned char *src,
+; int src_stride,
+; unsigned char *dst,
+; int dst_stride
+; )
+global sym(vp9_copy_mem16x16_sse2) PRIVATE
+sym(vp9_copy_mem16x16_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src;
+ movdqu xmm0, [rsi]
+
+ movsxd rax, dword ptr arg(1) ;src_stride;
+ mov rdi, arg(2) ;dst;
+
+ movdqu xmm1, [rsi+rax]
+ movdqu xmm2, [rsi+rax*2]
+
+ movsxd rcx, dword ptr arg(3) ;dst_stride
+ lea rsi, [rsi+rax*2]
+
+ movdqa [rdi], xmm0
+ add rsi, rax
+
+ movdqa [rdi+rcx], xmm1
+ movdqa [rdi+rcx*2],xmm2
+
+ lea rdi, [rdi+rcx*2]
+ movdqu xmm3, [rsi]
+
+ add rdi, rcx
+ movdqu xmm4, [rsi+rax]
+
+ movdqu xmm5, [rsi+rax*2]
+ lea rsi, [rsi+rax*2]
+
+ movdqa [rdi], xmm3
+ add rsi, rax
+
+ movdqa [rdi+rcx], xmm4
+ movdqa [rdi+rcx*2],xmm5
+
+ lea rdi, [rdi+rcx*2]
+ movdqu xmm0, [rsi]
+
+ add rdi, rcx
+ movdqu xmm1, [rsi+rax]
+
+ movdqu xmm2, [rsi+rax*2]
+ lea rsi, [rsi+rax*2]
+
+ movdqa [rdi], xmm0
+ add rsi, rax
+
+ movdqa [rdi+rcx], xmm1
+
+ movdqa [rdi+rcx*2], xmm2
+ movdqu xmm3, [rsi]
+
+ movdqu xmm4, [rsi+rax]
+ lea rdi, [rdi+rcx*2]
+
+ add rdi, rcx
+ movdqu xmm5, [rsi+rax*2]
+
+ lea rsi, [rsi+rax*2]
+ movdqa [rdi], xmm3
+
+ add rsi, rax
+ movdqa [rdi+rcx], xmm4
+
+ movdqa [rdi+rcx*2],xmm5
+ movdqu xmm0, [rsi]
+
+ lea rdi, [rdi+rcx*2]
+ movdqu xmm1, [rsi+rax]
+
+ add rdi, rcx
+ movdqu xmm2, [rsi+rax*2]
+
+ lea rsi, [rsi+rax*2]
+ movdqa [rdi], xmm0
+
+ movdqa [rdi+rcx], xmm1
+ movdqa [rdi+rcx*2],xmm2
+
+ movdqu xmm3, [rsi+rax]
+ lea rdi, [rdi+rcx*2]
+
+ movdqa [rdi+rcx], xmm3
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_intra_pred_uv_dc_mmx2(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+global sym(vp9_intra_pred_uv_dc_mmx2) PRIVATE
+sym(vp9_intra_pred_uv_dc_mmx2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ ; end prolog
+
+ ; from top
+ mov rsi, arg(2) ;src;
+ movsxd rax, dword ptr arg(3) ;src_stride;
+ sub rsi, rax
+ pxor mm0, mm0
+ movq mm1, [rsi]
+ psadbw mm1, mm0
+
+ ; from left
+ dec rsi
+ lea rdi, [rax*3]
+ movzx ecx, byte [rsi+rax]
+ movzx edx, byte [rsi+rax*2]
+ add ecx, edx
+ movzx edx, byte [rsi+rdi]
+ add ecx, edx
+ lea rsi, [rsi+rax*4]
+ movzx edx, byte [rsi]
+ add ecx, edx
+ movzx edx, byte [rsi+rax]
+ add ecx, edx
+ movzx edx, byte [rsi+rax*2]
+ add ecx, edx
+ movzx edx, byte [rsi+rdi]
+ add ecx, edx
+ movzx edx, byte [rsi+rax*4]
+ add ecx, edx
+
+ ; add up
+ pextrw edx, mm1, 0x0
+ lea edx, [edx+ecx+8]
+ sar edx, 4
+ movd mm1, edx
+ pshufw mm1, mm1, 0x0
+ packuswb mm1, mm1
+
+ ; write out
+ mov rdi, arg(0) ;dst;
+ movsxd rcx, dword ptr arg(1) ;dst_stride
+ lea rax, [rcx*3]
+
+ movq [rdi ], mm1
+ movq [rdi+rcx ], mm1
+ movq [rdi+rcx*2], mm1
+ movq [rdi+rax ], mm1
+ lea rdi, [rdi+rcx*4]
+ movq [rdi ], mm1
+ movq [rdi+rcx ], mm1
+ movq [rdi+rcx*2], mm1
+ movq [rdi+rax ], mm1
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp9_intra_pred_uv_dctop_mmx2(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+global sym(vp9_intra_pred_uv_dctop_mmx2) PRIVATE
+sym(vp9_intra_pred_uv_dctop_mmx2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ; from top
+ mov rsi, arg(2) ;src;
+ movsxd rax, dword ptr arg(3) ;src_stride;
+ sub rsi, rax
+ pxor mm0, mm0
+ movq mm1, [rsi]
+ psadbw mm1, mm0
+
+ ; add up
+ paddw mm1, [GLOBAL(dc_4)]
+ psraw mm1, 3
+ pshufw mm1, mm1, 0x0
+ packuswb mm1, mm1
+
+ ; write out
+ mov rdi, arg(0) ;dst;
+ movsxd rcx, dword ptr arg(1) ;dst_stride
+ lea rax, [rcx*3]
+
+ movq [rdi ], mm1
+ movq [rdi+rcx ], mm1
+ movq [rdi+rcx*2], mm1
+ movq [rdi+rax ], mm1
+ lea rdi, [rdi+rcx*4]
+ movq [rdi ], mm1
+ movq [rdi+rcx ], mm1
+ movq [rdi+rcx*2], mm1
+ movq [rdi+rax ], mm1
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp9_intra_pred_uv_dcleft_mmx2(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+global sym(vp9_intra_pred_uv_dcleft_mmx2) PRIVATE
+sym(vp9_intra_pred_uv_dcleft_mmx2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ ; end prolog
+
+ ; from left
+ mov rsi, arg(2) ;src;
+ movsxd rax, dword ptr arg(3) ;src_stride;
+ dec rsi
+ lea rdi, [rax*3]
+ movzx ecx, byte [rsi]
+ movzx edx, byte [rsi+rax]
+ add ecx, edx
+ movzx edx, byte [rsi+rax*2]
+ add ecx, edx
+ movzx edx, byte [rsi+rdi]
+ add ecx, edx
+ lea rsi, [rsi+rax*4]
+ movzx edx, byte [rsi]
+ add ecx, edx
+ movzx edx, byte [rsi+rax]
+ add ecx, edx
+ movzx edx, byte [rsi+rax*2]
+ add ecx, edx
+ movzx edx, byte [rsi+rdi]
+ lea edx, [ecx+edx+4]
+
+ ; add up
+ shr edx, 3
+ movd mm1, edx
+ pshufw mm1, mm1, 0x0
+ packuswb mm1, mm1
+
+ ; write out
+ mov rdi, arg(0) ;dst;
+ movsxd rcx, dword ptr arg(1) ;dst_stride
+ lea rax, [rcx*3]
+
+ movq [rdi ], mm1
+ movq [rdi+rcx ], mm1
+ movq [rdi+rcx*2], mm1
+ movq [rdi+rax ], mm1
+ lea rdi, [rdi+rcx*4]
+ movq [rdi ], mm1
+ movq [rdi+rcx ], mm1
+ movq [rdi+rcx*2], mm1
+ movq [rdi+rax ], mm1
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp9_intra_pred_uv_dc128_mmx(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+global sym(vp9_intra_pred_uv_dc128_mmx) PRIVATE
+sym(vp9_intra_pred_uv_dc128_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ GET_GOT rbx
+ ; end prolog
+
+ ; write out
+ movq mm1, [GLOBAL(dc_128)]
+ mov rax, arg(0) ;dst;
+ movsxd rdx, dword ptr arg(1) ;dst_stride
+ lea rcx, [rdx*3]
+
+ movq [rax ], mm1
+ movq [rax+rdx ], mm1
+ movq [rax+rdx*2], mm1
+ movq [rax+rcx ], mm1
+ lea rax, [rax+rdx*4]
+ movq [rax ], mm1
+ movq [rax+rdx ], mm1
+ movq [rax+rdx*2], mm1
+ movq [rax+rcx ], mm1
+
+ ; begin epilog
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp9_intra_pred_uv_tm_sse2(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+%macro vp9_intra_pred_uv_tm 1
+global sym(vp9_intra_pred_uv_tm_%1) PRIVATE
+sym(vp9_intra_pred_uv_tm_%1):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ; read top row
+ mov edx, 4
+ mov rsi, arg(2) ;src;
+ movsxd rax, dword ptr arg(3) ;src_stride;
+ sub rsi, rax
+ pxor xmm0, xmm0
+%ifidn %1, ssse3
+ movdqa xmm2, [GLOBAL(dc_1024)]
+%endif
+ movq xmm1, [rsi]
+ punpcklbw xmm1, xmm0
+
+ ; set up left ptrs ans subtract topleft
+ movd xmm3, [rsi-1]
+ lea rsi, [rsi+rax-1]
+%ifidn %1, sse2
+ punpcklbw xmm3, xmm0
+ pshuflw xmm3, xmm3, 0x0
+ punpcklqdq xmm3, xmm3
+%else
+ pshufb xmm3, xmm2
+%endif
+ psubw xmm1, xmm3
+
+ ; set up dest ptrs
+ mov rdi, arg(0) ;dst;
+ movsxd rcx, dword ptr arg(1) ;dst_stride
+
+.vp9_intra_pred_uv_tm_%1_loop:
+ movd xmm3, [rsi]
+ movd xmm5, [rsi+rax]
+%ifidn %1, sse2
+ punpcklbw xmm3, xmm0
+ punpcklbw xmm5, xmm0
+ pshuflw xmm3, xmm3, 0x0
+ pshuflw xmm5, xmm5, 0x0
+ punpcklqdq xmm3, xmm3
+ punpcklqdq xmm5, xmm5
+%else
+ pshufb xmm3, xmm2
+ pshufb xmm5, xmm2
+%endif
+ paddw xmm3, xmm1
+ paddw xmm5, xmm1
+ packuswb xmm3, xmm5
+ movq [rdi ], xmm3
+ movhps[rdi+rcx], xmm3
+ lea rsi, [rsi+rax*2]
+ lea rdi, [rdi+rcx*2]
+ dec edx
+ jnz .vp9_intra_pred_uv_tm_%1_loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+%endmacro
+
+vp9_intra_pred_uv_tm sse2
+vp9_intra_pred_uv_tm ssse3
+
+;void vp9_intra_pred_uv_ve_mmx(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+global sym(vp9_intra_pred_uv_ve_mmx) PRIVATE
+sym(vp9_intra_pred_uv_ve_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ ; end prolog
+
+ ; read from top
+ mov rax, arg(2) ;src;
+ movsxd rdx, dword ptr arg(3) ;src_stride;
+ sub rax, rdx
+ movq mm1, [rax]
+
+ ; write out
+ mov rax, arg(0) ;dst;
+ movsxd rdx, dword ptr arg(1) ;dst_stride
+ lea rcx, [rdx*3]
+
+ movq [rax ], mm1
+ movq [rax+rdx ], mm1
+ movq [rax+rdx*2], mm1
+ movq [rax+rcx ], mm1
+ lea rax, [rax+rdx*4]
+ movq [rax ], mm1
+ movq [rax+rdx ], mm1
+ movq [rax+rdx*2], mm1
+ movq [rax+rcx ], mm1
+
+ ; begin epilog
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp9_intra_pred_uv_ho_mmx2(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+%macro vp9_intra_pred_uv_ho 1
+global sym(vp9_intra_pred_uv_ho_%1) PRIVATE
+sym(vp9_intra_pred_uv_ho_%1):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+%ifidn %1, ssse3
+%ifndef GET_GOT_SAVE_ARG
+ push rbx
+%endif
+ GET_GOT rbx
+%endif
+ ; end prolog
+
+ ; read from left and write out
+%ifidn %1, mmx2
+ mov edx, 4
+%endif
+ mov rsi, arg(2) ;src;
+ movsxd rax, dword ptr arg(3) ;src_stride;
+ mov rdi, arg(0) ;dst;
+ movsxd rcx, dword ptr arg(1) ;dst_stride
+%ifidn %1, ssse3
+ lea rdx, [rcx*3]
+ movdqa xmm2, [GLOBAL(dc_00001111)]
+ lea rbx, [rax*3]
+%endif
+ dec rsi
+%ifidn %1, mmx2
+.vp9_intra_pred_uv_ho_%1_loop:
+ movd mm0, [rsi]
+ movd mm1, [rsi+rax]
+ punpcklbw mm0, mm0
+ punpcklbw mm1, mm1
+ pshufw mm0, mm0, 0x0
+ pshufw mm1, mm1, 0x0
+ movq [rdi ], mm0
+ movq [rdi+rcx], mm1
+ lea rsi, [rsi+rax*2]
+ lea rdi, [rdi+rcx*2]
+ dec edx
+ jnz .vp9_intra_pred_uv_ho_%1_loop
+%else
+ movd xmm0, [rsi]
+ movd xmm3, [rsi+rax]
+ movd xmm1, [rsi+rax*2]
+ movd xmm4, [rsi+rbx]
+ punpcklbw xmm0, xmm3
+ punpcklbw xmm1, xmm4
+ pshufb xmm0, xmm2
+ pshufb xmm1, xmm2
+ movq [rdi ], xmm0
+ movhps [rdi+rcx], xmm0
+ movq [rdi+rcx*2], xmm1
+ movhps [rdi+rdx], xmm1
+ lea rsi, [rsi+rax*4]
+ lea rdi, [rdi+rcx*4]
+ movd xmm0, [rsi]
+ movd xmm3, [rsi+rax]
+ movd xmm1, [rsi+rax*2]
+ movd xmm4, [rsi+rbx]
+ punpcklbw xmm0, xmm3
+ punpcklbw xmm1, xmm4
+ pshufb xmm0, xmm2
+ pshufb xmm1, xmm2
+ movq [rdi ], xmm0
+ movhps [rdi+rcx], xmm0
+ movq [rdi+rcx*2], xmm1
+ movhps [rdi+rdx], xmm1
+%endif
+
+ ; begin epilog
+%ifidn %1, ssse3
+ RESTORE_GOT
+%ifndef GET_GOT_SAVE_ARG
+ pop rbx
+%endif
+%endif
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+%endmacro
+
+vp9_intra_pred_uv_ho mmx2
+vp9_intra_pred_uv_ho ssse3
+
+SECTION_RODATA
+dc_128:
+ times 8 db 128
+dc_4:
+ times 4 dw 4
+align 16
+dc_1024:
+ times 8 dw 0x400
+align 16
+dc_00001111:
+ times 8 db 0
+ times 8 db 1
diff --git a/vp9/common/x86/vp9_recon_wrapper_sse2.c b/vp9/common/x86/vp9_recon_wrapper_sse2.c
new file mode 100644
index 0000000..bb7baf8
--- /dev/null
+++ b/vp9/common/x86/vp9_recon_wrapper_sse2.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_config.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vp9/common/vp9_blockd.h"
+
+#define build_intra_predictors_mbuv_prototype(sym) \
+ void sym(unsigned char *dst, int dst_stride, \
+ const unsigned char *src, int src_stride)
+typedef build_intra_predictors_mbuv_prototype((*build_intra_pred_mbuv_fn_t));
+
+extern build_intra_predictors_mbuv_prototype(vp9_intra_pred_uv_dc_mmx2);
+extern build_intra_predictors_mbuv_prototype(vp9_intra_pred_uv_dctop_mmx2);
+extern build_intra_predictors_mbuv_prototype(vp9_intra_pred_uv_dcleft_mmx2);
+extern build_intra_predictors_mbuv_prototype(vp9_intra_pred_uv_dc128_mmx);
+extern build_intra_predictors_mbuv_prototype(vp9_intra_pred_uv_ho_mmx2);
+extern build_intra_predictors_mbuv_prototype(vp9_intra_pred_uv_ho_ssse3);
+extern build_intra_predictors_mbuv_prototype(vp9_intra_pred_uv_ve_mmx);
+extern build_intra_predictors_mbuv_prototype(vp9_intra_pred_uv_tm_sse2);
+extern build_intra_predictors_mbuv_prototype(vp9_intra_pred_uv_tm_ssse3);
+
+static void build_intra_predictors_mbuv_x86(MACROBLOCKD *xd,
+ unsigned char *dst_u,
+ unsigned char *dst_v,
+ int dst_stride,
+ build_intra_pred_mbuv_fn_t tm_fn,
+ build_intra_pred_mbuv_fn_t ho_fn) {
+ int mode = xd->mode_info_context->mbmi.uv_mode;
+ build_intra_pred_mbuv_fn_t fn;
+ int src_stride = xd->dst.uv_stride;
+
+ switch (mode) {
+ case V_PRED:
+ fn = vp9_intra_pred_uv_ve_mmx;
+ break;
+ case H_PRED:
+ fn = ho_fn;
+ break;
+ case TM_PRED:
+ fn = tm_fn;
+ break;
+ case DC_PRED:
+ if (xd->up_available) {
+ if (xd->left_available) {
+ fn = vp9_intra_pred_uv_dc_mmx2;
+ break;
+ } else {
+ fn = vp9_intra_pred_uv_dctop_mmx2;
+ break;
+ }
+ } else if (xd->left_available) {
+ fn = vp9_intra_pred_uv_dcleft_mmx2;
+ break;
+ } else {
+ fn = vp9_intra_pred_uv_dc128_mmx;
+ break;
+ }
+ break;
+ default:
+ return;
+ }
+
+ fn(dst_u, dst_stride, xd->dst.u_buffer, src_stride);
+ fn(dst_v, dst_stride, xd->dst.v_buffer, src_stride);
+}
+
+void vp9_build_intra_predictors_mbuv_sse2(MACROBLOCKD *xd) {
+ build_intra_predictors_mbuv_x86(xd, &xd->predictor[256],
+ &xd->predictor[320], 8,
+ vp9_intra_pred_uv_tm_sse2,
+ vp9_intra_pred_uv_ho_mmx2);
+}
+
+void vp9_build_intra_predictors_mbuv_ssse3(MACROBLOCKD *xd) {
+ build_intra_predictors_mbuv_x86(xd, &xd->predictor[256],
+ &xd->predictor[320], 8,
+ vp9_intra_pred_uv_tm_ssse3,
+ vp9_intra_pred_uv_ho_ssse3);
+}
+
+void vp9_build_intra_predictors_mbuv_s_sse2(MACROBLOCKD *xd) {
+ build_intra_predictors_mbuv_x86(xd, xd->dst.u_buffer,
+ xd->dst.v_buffer, xd->dst.uv_stride,
+ vp9_intra_pred_uv_tm_sse2,
+ vp9_intra_pred_uv_ho_mmx2);
+}
+
+void vp9_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *xd) {
+ build_intra_predictors_mbuv_x86(xd, xd->dst.u_buffer,
+ xd->dst.v_buffer, xd->dst.uv_stride,
+ vp9_intra_pred_uv_tm_ssse3,
+ vp9_intra_pred_uv_ho_ssse3);
+}
diff --git a/vp9/common/x86/vp9_sadmxn_x86.c b/vp9/common/x86/vp9_sadmxn_x86.c
new file mode 100644
index 0000000..3072d6d
--- /dev/null
+++ b/vp9/common/x86/vp9_sadmxn_x86.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <emmintrin.h> // SSE2
+#include "./vpx_config.h"
+#include "./vp9_rtcd.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_ports/emmintrin_compat.h"
+
+#if HAVE_SSE2
+unsigned int vp9_sad16x3_sse2(
+ const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride) {
+ __m128i s0, s1, s2;
+ __m128i r0, r1, r2;
+ __m128i sad;
+
+ s0 = _mm_loadu_si128((const __m128i *)(src_ptr + 0 * src_stride));
+ s1 = _mm_loadu_si128((const __m128i *)(src_ptr + 1 * src_stride));
+ s2 = _mm_loadu_si128((const __m128i *)(src_ptr + 2 * src_stride));
+
+ r0 = _mm_loadu_si128((const __m128i *)(ref_ptr + 0 * ref_stride));
+ r1 = _mm_loadu_si128((const __m128i *)(ref_ptr + 1 * ref_stride));
+ r2 = _mm_loadu_si128((const __m128i *)(ref_ptr + 2 * ref_stride));
+
+ sad = _mm_sad_epu8(s0, r0);
+ sad = _mm_add_epi16(sad, _mm_sad_epu8(s1, r1));
+ sad = _mm_add_epi16(sad, _mm_sad_epu8(s2, r2));
+ sad = _mm_add_epi16(sad, _mm_srli_si128(sad, 8));
+
+ return _mm_cvtsi128_si32(sad);
+}
+
+unsigned int vp9_sad3x16_sse2(
+ const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride) {
+ int r;
+ __m128i s0, s1, s2, s3;
+ __m128i r0, r1, r2, r3;
+ __m128i sad = _mm_setzero_si128();
+ __m128i mask;
+ const int offset = (uintptr_t)src_ptr & 3;
+
+ /* In current use case, the offset is 1 if CONFIG_SUBPELREFMV is off.
+ * Here, for offset=1, we adjust src_ptr to be 4-byte aligned. Then, movd
+ * takes much less time.
+ */
+ if (offset == 1)
+ src_ptr -= 1;
+
+ /* mask = 0xffffffffffff0000ffffffffffff0000 */
+ mask = _mm_cmpeq_epi32(sad, sad);
+ mask = _mm_slli_epi64(mask, 16);
+
+ for (r = 0; r < 16; r += 4) {
+ s0 = _mm_cvtsi32_si128 (*(const int *)(src_ptr + 0 * src_stride));
+ s1 = _mm_cvtsi32_si128 (*(const int *)(src_ptr + 1 * src_stride));
+ s2 = _mm_cvtsi32_si128 (*(const int *)(src_ptr + 2 * src_stride));
+ s3 = _mm_cvtsi32_si128 (*(const int *)(src_ptr + 3 * src_stride));
+ r0 = _mm_cvtsi32_si128 (*(const int *)(ref_ptr + 0 * ref_stride));
+ r1 = _mm_cvtsi32_si128 (*(const int *)(ref_ptr + 1 * ref_stride));
+ r2 = _mm_cvtsi32_si128 (*(const int *)(ref_ptr + 2 * ref_stride));
+ r3 = _mm_cvtsi32_si128 (*(const int *)(ref_ptr + 3 * ref_stride));
+
+ s0 = _mm_unpacklo_epi8(s0, s1);
+ r0 = _mm_unpacklo_epi8(r0, r1);
+ s2 = _mm_unpacklo_epi8(s2, s3);
+ r2 = _mm_unpacklo_epi8(r2, r3);
+ s0 = _mm_unpacklo_epi64(s0, s2);
+ r0 = _mm_unpacklo_epi64(r0, r2);
+
+ // throw out extra byte
+ if (offset == 1)
+ s0 = _mm_and_si128(s0, mask);
+ else
+ s0 = _mm_slli_epi64(s0, 16);
+ r0 = _mm_slli_epi64(r0, 16);
+
+ sad = _mm_add_epi16(sad, _mm_sad_epu8(s0, r0));
+
+ src_ptr += src_stride*4;
+ ref_ptr += ref_stride*4;
+ }
+
+ sad = _mm_add_epi16(sad, _mm_srli_si128(sad, 8));
+ return _mm_cvtsi128_si32(sad);
+}
+
+#endif
diff --git a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
new file mode 100644
index 0000000..c6d65e9
--- /dev/null
+++ b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
@@ -0,0 +1,550 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;/************************************************************************************
+; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The
+; input pixel array has output_height rows. This routine assumes that output_height is an
+; even number. This function handles 8 pixels in horizontal direction, calculating ONE
+; rows each iteration to take advantage of the 128 bits operations.
+;
+; This is an implementation of some of the SSE optimizations first seen in ffvp8
+;
+;*************************************************************************************/
+
+;void vp9_filter_block1d8_v8_ssse3
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pitch,
+; unsigned char *output_ptr,
+; unsigned int out_pitch,
+; unsigned int output_height,
+; short *filter
+;)
+global sym(vp9_filter_block1d8_v8_ssse3) PRIVATE
+sym(vp9_filter_block1d8_v8_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ push rbx
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 16*5
+ %define k0k1 [rsp + 16*0]
+ %define k2k3 [rsp + 16*1]
+ %define k4k5 [rsp + 16*2]
+ %define k6k7 [rsp + 16*3]
+ %define krd [rsp + 16*4]
+
+ mov rdx, arg(5) ;filter ptr
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;output_ptr
+ mov rcx, 0x0400040
+
+ movdqa xmm4, [rdx] ;load filters
+ movd xmm5, rcx
+ packsswb xmm4, xmm4
+ pshuflw xmm0, xmm4, 0b ;k0_k1
+ pshuflw xmm1, xmm4, 01010101b ;k2_k3
+ pshuflw xmm2, xmm4, 10101010b ;k4_k5
+ pshuflw xmm3, xmm4, 11111111b ;k6_k7
+
+ punpcklqdq xmm0, xmm0
+ punpcklqdq xmm1, xmm1
+ punpcklqdq xmm2, xmm2
+ punpcklqdq xmm3, xmm3
+
+ movdqa k0k1, xmm0
+ movdqa k2k3, xmm1
+ pshufd xmm5, xmm5, 0
+ movdqa k4k5, xmm2
+ movdqa k6k7, xmm3
+ movdqa krd, xmm5
+
+ movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
+
+%if ABI_IS_32BIT=0
+ movsxd r8, DWORD PTR arg(3) ;out_pitch
+%endif
+ mov rax, rsi
+ movsxd rcx, DWORD PTR arg(4) ;output_height
+ add rax, rdx
+
+ lea rbx, [rdx + rdx*4]
+ add rbx, rdx ;pitch * 6
+
+.vp9_filter_block1d8_v8_ssse3_loop:
+ movq xmm0, [rsi] ;A
+ movq xmm1, [rsi + rdx] ;B
+ movq xmm2, [rsi + rdx * 2] ;C
+ movq xmm3, [rax + rdx * 2] ;D
+ movq xmm4, [rsi + rdx * 4] ;E
+ movq xmm5, [rax + rdx * 4] ;F
+
+ punpcklbw xmm0, xmm1 ;A B
+ punpcklbw xmm2, xmm3 ;C D
+ punpcklbw xmm4, xmm5 ;E F
+
+ movq xmm6, [rsi + rbx] ;G
+ movq xmm7, [rax + rbx] ;H
+
+ pmaddubsw xmm0, k0k1
+ pmaddubsw xmm2, k2k3
+ punpcklbw xmm6, xmm7 ;G H
+ pmaddubsw xmm4, k4k5
+ pmaddubsw xmm6, k6k7
+
+ paddsw xmm0, xmm2
+ paddsw xmm0, krd
+ paddsw xmm4, xmm6
+ paddsw xmm0, xmm4
+
+ psraw xmm0, 7
+ packuswb xmm0, xmm0
+
+ add rsi, rdx
+ add rax, rdx
+
+ movq [rdi], xmm0
+
+%if ABI_IS_32BIT
+ add rdi, DWORD PTR arg(3) ;out_pitch
+%else
+ add rdi, r8
+%endif
+ dec rcx
+ jnz .vp9_filter_block1d8_v8_ssse3_loop
+
+ add rsp, 16*5
+ pop rsp
+ pop rbx
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp9_filter_block1d16_v8_ssse3
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pitch,
+; unsigned char *output_ptr,
+; unsigned int out_pitch,
+; unsigned int output_height,
+; short *filter
+;)
+global sym(vp9_filter_block1d16_v8_ssse3) PRIVATE
+sym(vp9_filter_block1d16_v8_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ push rbx
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 16*5
+ %define k0k1 [rsp + 16*0]
+ %define k2k3 [rsp + 16*1]
+ %define k4k5 [rsp + 16*2]
+ %define k6k7 [rsp + 16*3]
+ %define krd [rsp + 16*4]
+
+ mov rdx, arg(5) ;filter ptr
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;output_ptr
+ mov rcx, 0x0400040
+
+ movdqa xmm4, [rdx] ;load filters
+ movd xmm5, rcx
+ packsswb xmm4, xmm4
+ pshuflw xmm0, xmm4, 0b ;k0_k1
+ pshuflw xmm1, xmm4, 01010101b ;k2_k3
+ pshuflw xmm2, xmm4, 10101010b ;k4_k5
+ pshuflw xmm3, xmm4, 11111111b ;k6_k7
+
+ punpcklqdq xmm0, xmm0
+ punpcklqdq xmm1, xmm1
+ punpcklqdq xmm2, xmm2
+ punpcklqdq xmm3, xmm3
+
+ movdqa k0k1, xmm0
+ movdqa k2k3, xmm1
+ pshufd xmm5, xmm5, 0
+ movdqa k4k5, xmm2
+ movdqa k6k7, xmm3
+ movdqa krd, xmm5
+
+ movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
+
+%if ABI_IS_32BIT=0
+ movsxd r8, DWORD PTR arg(3) ;out_pitch
+%endif
+ mov rax, rsi
+ movsxd rcx, DWORD PTR arg(4) ;output_height
+ add rax, rdx
+
+ lea rbx, [rdx + rdx*4]
+ add rbx, rdx ;pitch * 6
+
+.vp9_filter_block1d16_v8_ssse3_loop:
+ movq xmm0, [rsi] ;A
+ movq xmm1, [rsi + rdx] ;B
+ movq xmm2, [rsi + rdx * 2] ;C
+ movq xmm3, [rax + rdx * 2] ;D
+ movq xmm4, [rsi + rdx * 4] ;E
+ movq xmm5, [rax + rdx * 4] ;F
+
+ punpcklbw xmm0, xmm1 ;A B
+ punpcklbw xmm2, xmm3 ;C D
+ punpcklbw xmm4, xmm5 ;E F
+
+ movq xmm6, [rsi + rbx] ;G
+ movq xmm7, [rax + rbx] ;H
+
+ pmaddubsw xmm0, k0k1
+ pmaddubsw xmm2, k2k3
+ punpcklbw xmm6, xmm7 ;G H
+ pmaddubsw xmm4, k4k5
+ pmaddubsw xmm6, k6k7
+
+ paddsw xmm0, xmm2
+ paddsw xmm0, krd
+ paddsw xmm4, xmm6
+ paddsw xmm0, xmm4
+
+ psraw xmm0, 7
+ packuswb xmm0, xmm0
+
+ movq [rdi], xmm0
+
+ movq xmm0, [rsi + 8] ;A
+ movq xmm1, [rsi + rdx + 8] ;B
+ movq xmm2, [rsi + rdx * 2 + 8] ;C
+ movq xmm3, [rax + rdx * 2 + 8] ;D
+ movq xmm4, [rsi + rdx * 4 + 8] ;E
+ movq xmm5, [rax + rdx * 4 + 8] ;F
+
+ punpcklbw xmm0, xmm1 ;A B
+ punpcklbw xmm2, xmm3 ;C D
+ punpcklbw xmm4, xmm5 ;E F
+
+
+ movq xmm6, [rsi + rbx + 8] ;G
+ movq xmm7, [rax + rbx + 8] ;H
+ punpcklbw xmm6, xmm7 ;G H
+
+
+ pmaddubsw xmm0, k0k1
+ pmaddubsw xmm2, k2k3
+ pmaddubsw xmm4, k4k5
+ pmaddubsw xmm6, k6k7
+
+ paddsw xmm0, xmm2
+ paddsw xmm4, xmm6
+ paddsw xmm0, krd
+ paddsw xmm0, xmm4
+
+ psraw xmm0, 7
+ packuswb xmm0, xmm0
+
+ add rsi, rdx
+ add rax, rdx
+
+ movq [rdi+8], xmm0
+
+%if ABI_IS_32BIT
+ add rdi, DWORD PTR arg(3) ;out_pitch
+%else
+ add rdi, r8
+%endif
+ dec rcx
+ jnz .vp9_filter_block1d16_v8_ssse3_loop
+
+ add rsp, 16*5
+ pop rsp
+ pop rbx
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp9_filter_block1d8_h8_ssse3
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pixels_per_line,
+; unsigned char *output_ptr,
+; unsigned int output_pitch,
+; unsigned int output_height,
+; short *filter
+;)
+global sym(vp9_filter_block1d8_h8_ssse3) PRIVATE
+sym(vp9_filter_block1d8_h8_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 16*5
+ %define k0k1 [rsp + 16*0]
+ %define k2k3 [rsp + 16*1]
+ %define k4k5 [rsp + 16*2]
+ %define k6k7 [rsp + 16*3]
+ %define krd [rsp + 16*4]
+
+ mov rdx, arg(5) ;filter ptr
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;output_ptr
+ mov rcx, 0x0400040
+
+ movdqa xmm4, [rdx] ;load filters
+ movd xmm5, rcx
+ packsswb xmm4, xmm4
+ pshuflw xmm0, xmm4, 0b ;k0_k1
+ pshuflw xmm1, xmm4, 01010101b ;k2_k3
+ pshuflw xmm2, xmm4, 10101010b ;k4_k5
+ pshuflw xmm3, xmm4, 11111111b ;k6_k7
+
+ punpcklqdq xmm0, xmm0
+ punpcklqdq xmm1, xmm1
+ punpcklqdq xmm2, xmm2
+ punpcklqdq xmm3, xmm3
+
+ movdqa k0k1, xmm0
+ movdqa k2k3, xmm1
+ pshufd xmm5, xmm5, 0
+ movdqa k4k5, xmm2
+ movdqa k6k7, xmm3
+; movdqa krd, xmm5
+
+ movsxd rax, dword ptr arg(1) ;src_pixels_per_line
+ movsxd rdx, dword ptr arg(3) ;output_pitch
+ movsxd rcx, dword ptr arg(4) ;output_height
+
+.filter_block1d8_h8_rowloop_ssse3:
+ movq xmm0, [rsi - 3] ; -3 -2 -1 0 1 2 3 4
+
+; movq xmm3, [rsi + 4] ; 4 5 6 7 8 9 10 11
+ movq xmm3, [rsi + 5] ; 5 6 7 8 9 10 11 12
+;note: if we create a k0_k7 filter, we can save a pshufb
+; punpcklbw xmm0, xmm3 ; -3 4 -2 5 -1 6 0 7 1 8 2 9 3 10 4 11
+ punpcklqdq xmm0, xmm3
+
+ movdqa xmm1, xmm0
+ pshufb xmm0, [GLOBAL(shuf_t0t1)]
+ pmaddubsw xmm0, k0k1
+
+ movdqa xmm2, xmm1
+ pshufb xmm1, [GLOBAL(shuf_t2t3)]
+ pmaddubsw xmm1, k2k3
+
+ movdqa xmm4, xmm2
+ pshufb xmm2, [GLOBAL(shuf_t4t5)]
+ pmaddubsw xmm2, k4k5
+
+ pshufb xmm4, [GLOBAL(shuf_t6t7)]
+ pmaddubsw xmm4, k6k7
+
+ paddsw xmm0, xmm1
+ paddsw xmm0, xmm2
+ paddsw xmm0, xmm5
+ paddsw xmm0, xmm4
+ psraw xmm0, 7
+ packuswb xmm0, xmm0
+
+ lea rsi, [rsi + rax]
+ movq [rdi], xmm0
+
+ lea rdi, [rdi + rdx]
+ dec rcx
+ jnz .filter_block1d8_h8_rowloop_ssse3
+
+ add rsp, 16*5
+ pop rsp
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp9_filter_block1d16_h8_ssse3
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pixels_per_line,
+; unsigned char *output_ptr,
+; unsigned int output_pitch,
+; unsigned int output_height,
+; short *filter
+;)
+global sym(vp9_filter_block1d16_h8_ssse3) PRIVATE
+sym(vp9_filter_block1d16_h8_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 16*5
+ %define k0k1 [rsp + 16*0]
+ %define k2k3 [rsp + 16*1]
+ %define k4k5 [rsp + 16*2]
+ %define k6k7 [rsp + 16*3]
+ %define krd [rsp + 16*4]
+
+ mov rdx, arg(5) ;filter ptr
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;output_ptr
+ mov rcx, 0x0400040
+
+ movdqa xmm4, [rdx] ;load filters
+ movd xmm5, rcx
+ packsswb xmm4, xmm4
+ pshuflw xmm0, xmm4, 0b ;k0_k1
+ pshuflw xmm1, xmm4, 01010101b ;k2_k3
+ pshuflw xmm2, xmm4, 10101010b ;k4_k5
+ pshuflw xmm3, xmm4, 11111111b ;k6_k7
+
+ punpcklqdq xmm0, xmm0
+ punpcklqdq xmm1, xmm1
+ punpcklqdq xmm2, xmm2
+ punpcklqdq xmm3, xmm3
+
+ movdqa k0k1, xmm0
+ movdqa k2k3, xmm1
+ pshufd xmm5, xmm5, 0
+ movdqa k4k5, xmm2
+ movdqa k6k7, xmm3
+ movdqa krd, xmm5
+
+ movsxd rax, dword ptr arg(1) ;src_pixels_per_line
+ movsxd rdx, dword ptr arg(3) ;output_pitch
+ movsxd rcx, dword ptr arg(4) ;output_height
+
+.filter_block1d16_h8_rowloop_ssse3:
+ movq xmm0, [rsi - 3] ; -3 -2 -1 0 1 2 3 4
+
+; movq xmm3, [rsi + 4] ; 4 5 6 7 8 9 10 11
+ movq xmm3, [rsi + 5] ; 5 6 7 8 9 10 11 12
+;note: if we create a k0_k7 filter, we can save a pshufb
+; punpcklbw xmm0, xmm3 ; -3 4 -2 5 -1 6 0 7 1 8 2 9 3 10 4 11
+ punpcklqdq xmm0, xmm3
+
+ movdqa xmm1, xmm0
+ pshufb xmm0, [GLOBAL(shuf_t0t1)]
+ pmaddubsw xmm0, k0k1
+
+ movdqa xmm2, xmm1
+ pshufb xmm1, [GLOBAL(shuf_t2t3)]
+ pmaddubsw xmm1, k2k3
+
+ movdqa xmm4, xmm2
+ pshufb xmm2, [GLOBAL(shuf_t4t5)]
+ pmaddubsw xmm2, k4k5
+
+ pshufb xmm4, [GLOBAL(shuf_t6t7)]
+ pmaddubsw xmm4, k6k7
+
+ paddsw xmm0, xmm1
+ paddsw xmm0, xmm4
+ paddsw xmm0, xmm2
+ paddsw xmm0, krd
+ psraw xmm0, 7
+ packuswb xmm0, xmm0
+
+
+ movq xmm3, [rsi + 5]
+; movq xmm7, [rsi + 12]
+ movq xmm7, [rsi + 13]
+;note: same as above
+; punpcklbw xmm3, xmm7
+ punpcklqdq xmm3, xmm7
+
+ movdqa xmm1, xmm3
+ pshufb xmm3, [GLOBAL(shuf_t0t1)]
+ pmaddubsw xmm3, k0k1
+
+ movdqa xmm2, xmm1
+ pshufb xmm1, [GLOBAL(shuf_t2t3)]
+ pmaddubsw xmm1, k2k3
+
+ movdqa xmm4, xmm2
+ pshufb xmm2, [GLOBAL(shuf_t4t5)]
+ pmaddubsw xmm2, k4k5
+
+ pshufb xmm4, [GLOBAL(shuf_t6t7)]
+ pmaddubsw xmm4, k6k7
+
+ paddsw xmm3, xmm1
+ paddsw xmm3, xmm2
+ paddsw xmm3, krd
+ paddsw xmm3, xmm4
+ psraw xmm3, 7
+ packuswb xmm3, xmm3
+ punpcklqdq xmm0, xmm3
+
+ lea rsi, [rsi + rax]
+ movdqa [rdi], xmm0
+
+ lea rdi, [rdi + rdx]
+ dec rcx
+ jnz .filter_block1d16_h8_rowloop_ssse3
+
+ add rsp, 16*5
+ pop rsp
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+SECTION_RODATA
+align 16
+shuf_t0t1:
+ db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
+align 16
+shuf_t2t3:
+ db 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10
+align 16
+shuf_t4t5:
+ db 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12
+align 16
+shuf_t6t7:
+ db 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14
diff --git a/vp9/common/x86/vp9_subpixel_mmx.asm b/vp9/common/x86/vp9_subpixel_mmx.asm
new file mode 100644
index 0000000..05e5cc8
--- /dev/null
+++ b/vp9/common/x86/vp9_subpixel_mmx.asm
@@ -0,0 +1,727 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+
+%define BLOCK_HEIGHT_WIDTH 4
+%define vp9_filter_weight 128
+%define VP9_FILTER_SHIFT 7
+
+
+;void vp9_filter_block1d_h6_mmx
+;(
+; unsigned char *src_ptr,
+; unsigned short *output_ptr,
+; unsigned int src_pixels_per_line,
+; unsigned int pixel_step,
+; unsigned int output_height,
+; unsigned int output_width,
+; short * vp9_filter
+;)
+global sym(vp9_filter_block1d_h6_mmx) PRIVATE
+sym(vp9_filter_block1d_h6_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rdx, arg(6) ;vp9_filter
+
+ movq mm1, [rdx + 16] ; do both the negative taps first!!!
+ movq mm2, [rdx + 32] ;
+ movq mm6, [rdx + 48] ;
+ movq mm7, [rdx + 64] ;
+
+ mov rdi, arg(1) ;output_ptr
+ mov rsi, arg(0) ;src_ptr
+ movsxd rcx, dword ptr arg(4) ;output_height
+ movsxd rax, dword ptr arg(5) ;output_width ; destination pitch?
+ pxor mm0, mm0 ; mm0 = 00000000
+
+.nextrow:
+ movq mm3, [rsi-2] ; mm3 = p-2..p5
+ movq mm4, mm3 ; mm4 = p-2..p5
+ psrlq mm3, 8 ; mm3 = p-1..p5
+ punpcklbw mm3, mm0 ; mm3 = p-1..p2
+ pmullw mm3, mm1 ; mm3 *= kernel 1 modifiers.
+
+ movq mm5, mm4 ; mm5 = p-2..p5
+ punpckhbw mm4, mm0 ; mm5 = p2..p5
+ pmullw mm4, mm7 ; mm5 *= kernel 4 modifiers
+ paddsw mm3, mm4 ; mm3 += mm5
+
+ movq mm4, mm5 ; mm4 = p-2..p5;
+ psrlq mm5, 16 ; mm5 = p0..p5;
+ punpcklbw mm5, mm0 ; mm5 = p0..p3
+ pmullw mm5, mm2 ; mm5 *= kernel 2 modifiers
+ paddsw mm3, mm5 ; mm3 += mm5
+
+ movq mm5, mm4 ; mm5 = p-2..p5
+ psrlq mm4, 24 ; mm4 = p1..p5
+ punpcklbw mm4, mm0 ; mm4 = p1..p4
+ pmullw mm4, mm6 ; mm5 *= kernel 3 modifiers
+ paddsw mm3, mm4 ; mm3 += mm5
+
+ ; do outer positive taps
+ movd mm4, [rsi+3]
+ punpcklbw mm4, mm0 ; mm5 = p3..p6
+ pmullw mm4, [rdx+80] ; mm5 *= kernel 0 modifiers
+ paddsw mm3, mm4 ; mm3 += mm5
+
+ punpcklbw mm5, mm0 ; mm5 = p-2..p1
+ pmullw mm5, [rdx] ; mm5 *= kernel 5 modifiers
+ paddsw mm3, mm5 ; mm3 += mm5
+
+ paddsw mm3, [GLOBAL(rd)] ; mm3 += round value
+ psraw mm3, VP9_FILTER_SHIFT ; mm3 /= 128
+ packuswb mm3, mm0 ; pack and unpack to saturate
+ punpcklbw mm3, mm0 ;
+
+ movq [rdi], mm3 ; store the results in the destination
+
+%if ABI_IS_32BIT
+ add rsi, dword ptr arg(2) ;src_pixels_per_line ; next line
+ add rdi, rax;
+%else
+ movsxd r8, dword ptr arg(2) ;src_pixels_per_line
+ add rdi, rax;
+
+ add rsi, r8 ; next line
+%endif
+
+ dec rcx ; decrement count
+ jnz .nextrow ; next row
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_filter_block1dc_v6_mmx
+;(
+; short *src_ptr,
+; unsigned char *output_ptr,
+; int output_pitch,
+; unsigned int pixels_per_line,
+; unsigned int pixel_step,
+; unsigned int output_height,
+; unsigned int output_width,
+; short * vp9_filter
+;)
+global sym(vp9_filter_block1dc_v6_mmx) PRIVATE
+sym(vp9_filter_block1dc_v6_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 8
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ movq mm5, [GLOBAL(rd)]
+ push rbx
+ mov rbx, arg(7) ;vp9_filter
+ movq mm1, [rbx + 16] ; do both the negative taps first!!!
+ movq mm2, [rbx + 32] ;
+ movq mm6, [rbx + 48] ;
+ movq mm7, [rbx + 64] ;
+
+ movsxd rdx, dword ptr arg(3) ;pixels_per_line
+ mov rdi, arg(1) ;output_ptr
+ mov rsi, arg(0) ;src_ptr
+ sub rsi, rdx
+ sub rsi, rdx
+ movsxd rcx, DWORD PTR arg(5) ;output_height
+ movsxd rax, DWORD PTR arg(2) ;output_pitch ; destination pitch?
+ pxor mm0, mm0 ; mm0 = 00000000
+
+
+.nextrow_cv:
+ movq mm3, [rsi+rdx] ; mm3 = p0..p8 = row -1
+ pmullw mm3, mm1 ; mm3 *= kernel 1 modifiers.
+
+
+ movq mm4, [rsi + 4*rdx] ; mm4 = p0..p3 = row 2
+ pmullw mm4, mm7 ; mm4 *= kernel 4 modifiers.
+ paddsw mm3, mm4 ; mm3 += mm4
+
+ movq mm4, [rsi + 2*rdx] ; mm4 = p0..p3 = row 0
+ pmullw mm4, mm2 ; mm4 *= kernel 2 modifiers.
+ paddsw mm3, mm4 ; mm3 += mm4
+
+ movq mm4, [rsi] ; mm4 = p0..p3 = row -2
+ pmullw mm4, [rbx] ; mm4 *= kernel 0 modifiers.
+ paddsw mm3, mm4 ; mm3 += mm4
+
+
+ add rsi, rdx ; move source forward 1 line to avoid 3 * pitch
+ movq mm4, [rsi + 2*rdx] ; mm4 = p0..p3 = row 1
+ pmullw mm4, mm6 ; mm4 *= kernel 3 modifiers.
+ paddsw mm3, mm4 ; mm3 += mm4
+
+ movq mm4, [rsi + 4*rdx] ; mm4 = p0..p3 = row 3
+ pmullw mm4, [rbx +80] ; mm4 *= kernel 3 modifiers.
+ paddsw mm3, mm4 ; mm3 += mm4
+
+
+ paddsw mm3, mm5 ; mm3 += round value
+ psraw mm3, VP9_FILTER_SHIFT ; mm3 /= 128
+ packuswb mm3, mm0 ; pack and saturate
+
+ movd [rdi],mm3 ; store the results in the destination
+ ; the subsequent iterations repeat 3 out of 4 of these reads. Since the
+ ; recon block should be in cache this shouldn't cost much. Its obviously
+ ; avoidable!!!.
+ lea rdi, [rdi+rax] ;
+ dec rcx ; decrement count
+ jnz .nextrow_cv ; next row
+
+ pop rbx
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void bilinear_predict8x8_mmx
+;(
+; unsigned char *src_ptr,
+; int src_pixels_per_line,
+; int xoffset,
+; int yoffset,
+; unsigned char *dst_ptr,
+; int dst_pitch
+;)
+global sym(vp9_bilinear_predict8x8_mmx) PRIVATE
+sym(vp9_bilinear_predict8x8_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ;const short *HFilter = bilinear_filters_mmx[xoffset];
+ ;const short *VFilter = bilinear_filters_mmx[yoffset];
+
+ movsxd rax, dword ptr arg(2) ;xoffset
+ mov rdi, arg(4) ;dst_ptr ;
+
+ shl rax, 5 ; offset * 32
+ lea rcx, [GLOBAL(sym(vp9_bilinear_filters_8x_mmx))]
+
+ add rax, rcx ; HFilter
+ mov rsi, arg(0) ;src_ptr ;
+
+ movsxd rdx, dword ptr arg(5) ;dst_pitch
+ movq mm1, [rax] ;
+
+ movq mm2, [rax+16] ;
+ movsxd rax, dword ptr arg(3) ;yoffset
+
+ pxor mm0, mm0 ;
+
+ shl rax, 5 ; offset*32
+ add rax, rcx ; VFilter
+
+ lea rcx, [rdi+rdx*8] ;
+ movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ;
+
+
+
+ ; get the first horizontal line done ;
+ movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+ movq mm4, mm3 ; make a copy of current line
+
+ punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
+ punpckhbw mm4, mm0 ;
+
+ pmullw mm3, mm1 ;
+ pmullw mm4, mm1 ;
+
+ movq mm5, [rsi+1] ;
+ movq mm6, mm5 ;
+
+ punpcklbw mm5, mm0 ;
+ punpckhbw mm6, mm0 ;
+
+ pmullw mm5, mm2 ;
+ pmullw mm6, mm2 ;
+
+ paddw mm3, mm5 ;
+ paddw mm4, mm6 ;
+
+ paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
+ psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
+
+ paddw mm4, [GLOBAL(rd)] ;
+ psraw mm4, VP9_FILTER_SHIFT ;
+
+ movq mm7, mm3 ;
+ packuswb mm7, mm4 ;
+
+ add rsi, rdx ; next line
+.next_row_8x8:
+ movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+ movq mm4, mm3 ; make a copy of current line
+
+ punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
+ punpckhbw mm4, mm0 ;
+
+ pmullw mm3, mm1 ;
+ pmullw mm4, mm1 ;
+
+ movq mm5, [rsi+1] ;
+ movq mm6, mm5 ;
+
+ punpcklbw mm5, mm0 ;
+ punpckhbw mm6, mm0 ;
+
+ pmullw mm5, mm2 ;
+ pmullw mm6, mm2 ;
+
+ paddw mm3, mm5 ;
+ paddw mm4, mm6 ;
+
+ movq mm5, mm7 ;
+ movq mm6, mm7 ;
+
+ punpcklbw mm5, mm0 ;
+ punpckhbw mm6, mm0
+
+ pmullw mm5, [rax] ;
+ pmullw mm6, [rax] ;
+
+ paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
+ psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
+
+ paddw mm4, [GLOBAL(rd)] ;
+ psraw mm4, VP9_FILTER_SHIFT ;
+
+ movq mm7, mm3 ;
+ packuswb mm7, mm4 ;
+
+
+ pmullw mm3, [rax+16] ;
+ pmullw mm4, [rax+16] ;
+
+ paddw mm3, mm5 ;
+ paddw mm4, mm6 ;
+
+
+ paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
+ psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
+
+ paddw mm4, [GLOBAL(rd)] ;
+ psraw mm4, VP9_FILTER_SHIFT ;
+
+ packuswb mm3, mm4
+
+ movq [rdi], mm3 ; store the results in the destination
+
+%if ABI_IS_32BIT
+ add rsi, rdx ; next line
+ add rdi, dword ptr arg(5) ;dst_pitch ;
+%else
+ movsxd r8, dword ptr arg(5) ;dst_pitch
+ add rsi, rdx ; next line
+ add rdi, r8 ;dst_pitch
+%endif
+ cmp rdi, rcx ;
+ jne .next_row_8x8
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void bilinear_predict8x4_mmx
+;(
+; unsigned char *src_ptr,
+; int src_pixels_per_line,
+; int xoffset,
+; int yoffset,
+; unsigned char *dst_ptr,
+; int dst_pitch
+;)
+global sym(vp9_bilinear_predict8x4_mmx) PRIVATE
+sym(vp9_bilinear_predict8x4_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ;const short *HFilter = bilinear_filters_mmx[xoffset];
+ ;const short *VFilter = bilinear_filters_mmx[yoffset];
+
+ movsxd rax, dword ptr arg(2) ;xoffset
+ mov rdi, arg(4) ;dst_ptr ;
+
+ lea rcx, [GLOBAL(sym(vp9_bilinear_filters_8x_mmx))]
+ shl rax, 5
+
+ mov rsi, arg(0) ;src_ptr ;
+ add rax, rcx
+
+ movsxd rdx, dword ptr arg(5) ;dst_pitch
+ movq mm1, [rax] ;
+
+ movq mm2, [rax+16] ;
+ movsxd rax, dword ptr arg(3) ;yoffset
+
+ pxor mm0, mm0 ;
+ shl rax, 5
+
+ add rax, rcx
+ lea rcx, [rdi+rdx*4] ;
+
+ movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ;
+
+ ; get the first horizontal line done ;
+ movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+ movq mm4, mm3 ; make a copy of current line
+
+ punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
+ punpckhbw mm4, mm0 ;
+
+ pmullw mm3, mm1 ;
+ pmullw mm4, mm1 ;
+
+ movq mm5, [rsi+1] ;
+ movq mm6, mm5 ;
+
+ punpcklbw mm5, mm0 ;
+ punpckhbw mm6, mm0 ;
+
+ pmullw mm5, mm2 ;
+ pmullw mm6, mm2 ;
+
+ paddw mm3, mm5 ;
+ paddw mm4, mm6 ;
+
+ paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
+ psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
+
+ paddw mm4, [GLOBAL(rd)] ;
+ psraw mm4, VP9_FILTER_SHIFT ;
+
+ movq mm7, mm3 ;
+ packuswb mm7, mm4 ;
+
+ add rsi, rdx ; next line
+.next_row_8x4:
+ movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+ movq mm4, mm3 ; make a copy of current line
+
+ punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
+ punpckhbw mm4, mm0 ;
+
+ pmullw mm3, mm1 ;
+ pmullw mm4, mm1 ;
+
+ movq mm5, [rsi+1] ;
+ movq mm6, mm5 ;
+
+ punpcklbw mm5, mm0 ;
+ punpckhbw mm6, mm0 ;
+
+ pmullw mm5, mm2 ;
+ pmullw mm6, mm2 ;
+
+ paddw mm3, mm5 ;
+ paddw mm4, mm6 ;
+
+ movq mm5, mm7 ;
+ movq mm6, mm7 ;
+
+ punpcklbw mm5, mm0 ;
+ punpckhbw mm6, mm0
+
+ pmullw mm5, [rax] ;
+ pmullw mm6, [rax] ;
+
+ paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
+ psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
+
+ paddw mm4, [GLOBAL(rd)] ;
+ psraw mm4, VP9_FILTER_SHIFT ;
+
+ movq mm7, mm3 ;
+ packuswb mm7, mm4 ;
+
+
+ pmullw mm3, [rax+16] ;
+ pmullw mm4, [rax+16] ;
+
+ paddw mm3, mm5 ;
+ paddw mm4, mm6 ;
+
+
+ paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
+ psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
+
+ paddw mm4, [GLOBAL(rd)] ;
+ psraw mm4, VP9_FILTER_SHIFT ;
+
+ packuswb mm3, mm4
+
+ movq [rdi], mm3 ; store the results in the destination
+
+%if ABI_IS_32BIT
+ add rsi, rdx ; next line
+ add rdi, dword ptr arg(5) ;dst_pitch ;
+%else
+ movsxd r8, dword ptr arg(5) ;dst_pitch
+ add rsi, rdx ; next line
+ add rdi, r8
+%endif
+ cmp rdi, rcx ;
+ jne .next_row_8x4
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void bilinear_predict4x4_mmx
+;(
+; unsigned char *src_ptr,
+; int src_pixels_per_line,
+; int xoffset,
+; int yoffset,
+; unsigned char *dst_ptr,
+; int dst_pitch
+;)
+global sym(vp9_bilinear_predict4x4_mmx) PRIVATE
+sym(vp9_bilinear_predict4x4_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ;const short *HFilter = bilinear_filters_mmx[xoffset];
+ ;const short *VFilter = bilinear_filters_mmx[yoffset];
+
+ movsxd rax, dword ptr arg(2) ;xoffset
+ mov rdi, arg(4) ;dst_ptr ;
+
+ lea rcx, [GLOBAL(sym(vp9_bilinear_filters_8x_mmx))]
+ shl rax, 5
+
+ add rax, rcx ; HFilter
+ mov rsi, arg(0) ;src_ptr ;
+
+ movsxd rdx, dword ptr arg(5) ;ldst_pitch
+ movq mm1, [rax] ;
+
+ movq mm2, [rax+16] ;
+ movsxd rax, dword ptr arg(3) ;yoffset
+
+ pxor mm0, mm0 ;
+ shl rax, 5
+
+ add rax, rcx
+ lea rcx, [rdi+rdx*4] ;
+
+ movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ;
+
+ ; get the first horizontal line done ;
+ movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+ punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
+
+ pmullw mm3, mm1 ;
+ movd mm5, [rsi+1] ;
+
+ punpcklbw mm5, mm0 ;
+ pmullw mm5, mm2 ;
+
+ paddw mm3, mm5 ;
+ paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
+
+ psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
+
+ movq mm7, mm3 ;
+ packuswb mm7, mm0 ;
+
+ add rsi, rdx ; next line
+.next_row_4x4:
+ movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+ punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
+
+ pmullw mm3, mm1 ;
+ movd mm5, [rsi+1] ;
+
+ punpcklbw mm5, mm0 ;
+ pmullw mm5, mm2 ;
+
+ paddw mm3, mm5 ;
+
+ movq mm5, mm7 ;
+ punpcklbw mm5, mm0 ;
+
+ pmullw mm5, [rax] ;
+ paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
+
+ psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
+ movq mm7, mm3 ;
+
+ packuswb mm7, mm0 ;
+
+ pmullw mm3, [rax+16] ;
+ paddw mm3, mm5 ;
+
+
+ paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
+ psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
+
+ packuswb mm3, mm0
+ movd [rdi], mm3 ; store the results in the destination
+
+%if ABI_IS_32BIT
+ add rsi, rdx ; next line
+ add rdi, dword ptr arg(5) ;dst_pitch ;
+%else
+ movsxd r8, dword ptr arg(5) ;dst_pitch ;
+ add rsi, rdx ; next line
+ add rdi, r8
+%endif
+
+ cmp rdi, rcx ;
+ jne .next_row_4x4
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+
+SECTION_RODATA
+align 16
+rd:
+ times 4 dw 0x40
+
+align 16
+global HIDDEN_DATA(sym(vp9_six_tap_mmx))
+sym(vp9_six_tap_mmx):
+ times 8 dw 0
+ times 8 dw 0
+ times 8 dw 128
+ times 8 dw 0
+ times 8 dw 0
+ times 8 dw 0
+
+ times 8 dw 0
+ times 8 dw -6
+ times 8 dw 123
+ times 8 dw 12
+ times 8 dw -1
+ times 8 dw 0
+
+ times 8 dw 2
+ times 8 dw -11
+ times 8 dw 108
+ times 8 dw 36
+ times 8 dw -8
+ times 8 dw 1
+
+ times 8 dw 0
+ times 8 dw -9
+ times 8 dw 93
+ times 8 dw 50
+ times 8 dw -6
+ times 8 dw 0
+
+ times 8 dw 3
+ times 8 dw -16
+ times 8 dw 77
+ times 8 dw 77
+ times 8 dw -16
+ times 8 dw 3
+
+ times 8 dw 0
+ times 8 dw -6
+ times 8 dw 50
+ times 8 dw 93
+ times 8 dw -9
+ times 8 dw 0
+
+ times 8 dw 1
+ times 8 dw -8
+ times 8 dw 36
+ times 8 dw 108
+ times 8 dw -11
+ times 8 dw 2
+
+ times 8 dw 0
+ times 8 dw -1
+ times 8 dw 12
+ times 8 dw 123
+ times 8 dw -6
+ times 8 dw 0
+
+
+align 16
+global HIDDEN_DATA(sym(vp9_bilinear_filters_8x_mmx))
+sym(vp9_bilinear_filters_8x_mmx):
+ times 8 dw 128
+ times 8 dw 0
+
+ times 8 dw 112
+ times 8 dw 16
+
+ times 8 dw 96
+ times 8 dw 32
+
+ times 8 dw 80
+ times 8 dw 48
+
+ times 8 dw 64
+ times 8 dw 64
+
+ times 8 dw 48
+ times 8 dw 80
+
+ times 8 dw 32
+ times 8 dw 96
+
+ times 8 dw 16
+ times 8 dw 112
diff --git a/vp9/common/x86/vp9_subpixel_sse2.asm b/vp9/common/x86/vp9_subpixel_sse2.asm
new file mode 100644
index 0000000..b0c4f12
--- /dev/null
+++ b/vp9/common/x86/vp9_subpixel_sse2.asm
@@ -0,0 +1,1372 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+%define BLOCK_HEIGHT_WIDTH 4
+%define VP9_FILTER_WEIGHT 128
+%define VP9_FILTER_SHIFT 7
+
+
+;/************************************************************************************
+; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The
+; input pixel array has output_height rows. This routine assumes that output_height is an
+; even number. This function handles 8 pixels in horizontal direction, calculating ONE
+; rows each iteration to take advantage of the 128 bits operations.
+;*************************************************************************************/
+;void vp9_filter_block1d8_h6_sse2
+;(
+; unsigned char *src_ptr,
+; unsigned short *output_ptr,
+; unsigned int src_pixels_per_line,
+; unsigned int pixel_step,
+; unsigned int output_height,
+; unsigned int output_width,
+; short *vp9_filter
+;)
+global sym(vp9_filter_block1d8_h6_sse2) PRIVATE
+sym(vp9_filter_block1d8_h6_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rdx, arg(6) ;vp9_filter
+ mov rsi, arg(0) ;src_ptr
+
+ mov rdi, arg(1) ;output_ptr
+
+ movsxd rcx, dword ptr arg(4) ;output_height
+ movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; Pitch for Source
+%if ABI_IS_32BIT=0
+ movsxd r8, dword ptr arg(5) ;output_width
+%endif
+ pxor xmm0, xmm0 ; clear xmm0 for unpack
+
+.filter_block1d8_h6_rowloop:
+ movq xmm3, MMWORD PTR [rsi - 2]
+ movq xmm1, MMWORD PTR [rsi + 6]
+
+ prefetcht2 [rsi+rax-2]
+
+ pslldq xmm1, 8
+ por xmm1, xmm3
+
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm1
+
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm1
+
+ punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
+ psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
+
+ pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1
+ punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
+
+ psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
+ pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2
+
+
+ punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
+ psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
+
+ pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3
+
+ punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
+ psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
+
+ pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4
+
+ punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
+ psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
+
+
+ pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5
+
+ punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
+ pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6
+
+
+ paddsw xmm4, xmm7
+ paddsw xmm4, xmm5
+
+ paddsw xmm4, xmm3
+ paddsw xmm4, xmm6
+
+ paddsw xmm4, xmm1
+ paddsw xmm4, [GLOBAL(rd)]
+
+ psraw xmm4, 7
+
+ packuswb xmm4, xmm0
+ punpcklbw xmm4, xmm0
+
+ movdqa XMMWORD Ptr [rdi], xmm4
+ lea rsi, [rsi + rax]
+
+%if ABI_IS_32BIT
+ add rdi, DWORD Ptr arg(5) ;[output_width]
+%else
+ add rdi, r8
+%endif
+ dec rcx
+
+ jnz .filter_block1d8_h6_rowloop ; next row
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_filter_block1d16_h6_sse2
+;(
+; unsigned char *src_ptr,
+; unsigned short *output_ptr,
+; unsigned int src_pixels_per_line,
+; unsigned int pixel_step,
+; unsigned int output_height,
+; unsigned int output_width,
+; short *vp9_filter
+;)
+;/************************************************************************************
+; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The
+; input pixel array has output_height rows. This routine assumes that output_height is an
+; even number. This function handles 8 pixels in horizontal direction, calculating ONE
+; rows each iteration to take advantage of the 128 bits operations.
+;*************************************************************************************/
+global sym(vp9_filter_block1d16_h6_sse2) PRIVATE
+sym(vp9_filter_block1d16_h6_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rdx, arg(6) ;vp9_filter
+ mov rsi, arg(0) ;src_ptr
+
+ mov rdi, arg(1) ;output_ptr
+
+ movsxd rcx, dword ptr arg(4) ;output_height
+ movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; Pitch for Source
+%if ABI_IS_32BIT=0
+ movsxd r8, dword ptr arg(5) ;output_width
+%endif
+
+ pxor xmm0, xmm0 ; clear xmm0 for unpack
+
+.filter_block1d16_h6_sse2_rowloop:
+ movq xmm3, MMWORD PTR [rsi - 2]
+ movq xmm1, MMWORD PTR [rsi + 6]
+
+ movq xmm2, MMWORD PTR [rsi +14]
+ pslldq xmm2, 8
+
+ por xmm2, xmm1
+ prefetcht2 [rsi+rax-2]
+
+ pslldq xmm1, 8
+ por xmm1, xmm3
+
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm1
+
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm1
+
+ punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
+ psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
+
+ pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1
+ punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
+
+ psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
+ pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2
+
+
+ punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
+ psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
+
+ pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3
+
+ punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
+ psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
+
+ pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4
+
+ punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
+ psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
+
+
+ pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5
+
+ punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
+ pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6
+
+ paddsw xmm4, xmm7
+ paddsw xmm4, xmm5
+
+ paddsw xmm4, xmm3
+ paddsw xmm4, xmm6
+
+ paddsw xmm4, xmm1
+ paddsw xmm4, [GLOBAL(rd)]
+
+ psraw xmm4, 7
+
+ packuswb xmm4, xmm0
+ punpcklbw xmm4, xmm0
+
+ movdqa XMMWORD Ptr [rdi], xmm4
+
+ movdqa xmm3, xmm2
+ movdqa xmm4, xmm2
+
+ movdqa xmm5, xmm2
+ movdqa xmm6, xmm2
+
+ movdqa xmm7, xmm2
+
+ punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
+ psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
+
+ pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1
+ punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
+
+ psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
+ pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2
+
+
+ punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
+ psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
+
+ pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3
+
+ punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
+ psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
+
+ pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4
+
+ punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
+ psrldq xmm2, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
+
+ pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5
+
+ punpcklbw xmm2, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
+ pmullw xmm2, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6
+
+
+ paddsw xmm4, xmm7
+ paddsw xmm4, xmm5
+
+ paddsw xmm4, xmm3
+ paddsw xmm4, xmm6
+
+ paddsw xmm4, xmm2
+ paddsw xmm4, [GLOBAL(rd)]
+
+ psraw xmm4, 7
+
+ packuswb xmm4, xmm0
+ punpcklbw xmm4, xmm0
+
+ movdqa XMMWORD Ptr [rdi+16], xmm4
+
+ lea rsi, [rsi + rax]
+%if ABI_IS_32BIT
+ add rdi, DWORD Ptr arg(5) ;[output_width]
+%else
+ add rdi, r8
+%endif
+
+ dec rcx
+ jnz .filter_block1d16_h6_sse2_rowloop ; next row
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_filter_block1d8_v6_sse2
+;(
+; short *src_ptr,
+; unsigned char *output_ptr,
+; int dst_ptich,
+; unsigned int pixels_per_line,
+; unsigned int pixel_step,
+; unsigned int output_height,
+; unsigned int output_width,
+; short * vp9_filter
+;)
+;/************************************************************************************
+; Notes: filter_block1d8_v6 applies a 6 tap filter vertically to the input pixels. The
+; input pixel array has output_height rows.
+;*************************************************************************************/
+global sym(vp9_filter_block1d8_v6_sse2) PRIVATE
+sym(vp9_filter_block1d8_v6_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 8
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rax, arg(7) ;vp9_filter
+ movsxd rdx, dword ptr arg(3) ;pixels_per_line
+
+ mov rdi, arg(1) ;output_ptr
+ mov rsi, arg(0) ;src_ptr
+
+ sub rsi, rdx
+ sub rsi, rdx
+
+ movsxd rcx, DWORD PTR arg(5) ;[output_height]
+ pxor xmm0, xmm0 ; clear xmm0
+
+ movdqa xmm7, XMMWORD PTR [GLOBAL(rd)]
+%if ABI_IS_32BIT=0
+ movsxd r8, dword ptr arg(2) ; dst_ptich
+%endif
+
+.vp9_filter_block1d8_v6_sse2_loop:
+ movdqa xmm1, XMMWORD PTR [rsi]
+ pmullw xmm1, [rax]
+
+ movdqa xmm2, XMMWORD PTR [rsi + rdx]
+ pmullw xmm2, [rax + 16]
+
+ movdqa xmm3, XMMWORD PTR [rsi + rdx * 2]
+ pmullw xmm3, [rax + 32]
+
+ movdqa xmm5, XMMWORD PTR [rsi + rdx * 4]
+ pmullw xmm5, [rax + 64]
+
+ add rsi, rdx
+ movdqa xmm4, XMMWORD PTR [rsi + rdx * 2]
+
+ pmullw xmm4, [rax + 48]
+ movdqa xmm6, XMMWORD PTR [rsi + rdx * 4]
+
+ pmullw xmm6, [rax + 80]
+
+ paddsw xmm2, xmm5
+ paddsw xmm2, xmm3
+
+ paddsw xmm2, xmm1
+ paddsw xmm2, xmm4
+
+ paddsw xmm2, xmm6
+ paddsw xmm2, xmm7
+
+ psraw xmm2, 7
+ packuswb xmm2, xmm0 ; pack and saturate
+
+ movq QWORD PTR [rdi], xmm2 ; store the results in the destination
+%if ABI_IS_32BIT
+ add rdi, DWORD PTR arg(2) ;[dst_ptich]
+%else
+ add rdi, r8
+%endif
+ dec rcx ; decrement count
+ jnz .vp9_filter_block1d8_v6_sse2_loop ; next row
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_filter_block1d16_v6_sse2
+;(
+; unsigned short *src_ptr,
+; unsigned char *output_ptr,
+; int dst_ptich,
+; unsigned int pixels_per_line,
+; unsigned int pixel_step,
+; unsigned int output_height,
+; unsigned int output_width,
+; const short *vp9_filter
+;)
+;/************************************************************************************
+; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixels. The
+; input pixel array has output_height rows.
+;*************************************************************************************/
+global sym(vp9_filter_block1d16_v6_sse2) PRIVATE
+sym(vp9_filter_block1d16_v6_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 8
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rax, arg(7) ;vp9_filter
+ movsxd rdx, dword ptr arg(3) ;pixels_per_line
+
+ mov rdi, arg(1) ;output_ptr
+ mov rsi, arg(0) ;src_ptr
+
+ sub rsi, rdx
+ sub rsi, rdx
+
+ movsxd rcx, DWORD PTR arg(5) ;[output_height]
+%if ABI_IS_32BIT=0
+ movsxd r8, dword ptr arg(2) ; dst_ptich
+%endif
+
+.vp9_filter_block1d16_v6_sse2_loop:
+; The order for adding 6-tap is 2 5 3 1 4 6. Read in data in that order.
+ movdqa xmm1, XMMWORD PTR [rsi + rdx] ; line 2
+ movdqa xmm2, XMMWORD PTR [rsi + rdx + 16]
+ pmullw xmm1, [rax + 16]
+ pmullw xmm2, [rax + 16]
+
+ movdqa xmm3, XMMWORD PTR [rsi + rdx * 4] ; line 5
+ movdqa xmm4, XMMWORD PTR [rsi + rdx * 4 + 16]
+ pmullw xmm3, [rax + 64]
+ pmullw xmm4, [rax + 64]
+
+ movdqa xmm5, XMMWORD PTR [rsi + rdx * 2] ; line 3
+ movdqa xmm6, XMMWORD PTR [rsi + rdx * 2 + 16]
+ pmullw xmm5, [rax + 32]
+ pmullw xmm6, [rax + 32]
+
+ movdqa xmm7, XMMWORD PTR [rsi] ; line 1
+ movdqa xmm0, XMMWORD PTR [rsi + 16]
+ pmullw xmm7, [rax]
+ pmullw xmm0, [rax]
+
+ paddsw xmm1, xmm3
+ paddsw xmm2, xmm4
+ paddsw xmm1, xmm5
+ paddsw xmm2, xmm6
+ paddsw xmm1, xmm7
+ paddsw xmm2, xmm0
+
+ add rsi, rdx
+
+ movdqa xmm3, XMMWORD PTR [rsi + rdx * 2] ; line 4
+ movdqa xmm4, XMMWORD PTR [rsi + rdx * 2 + 16]
+ pmullw xmm3, [rax + 48]
+ pmullw xmm4, [rax + 48]
+
+ movdqa xmm5, XMMWORD PTR [rsi + rdx * 4] ; line 6
+ movdqa xmm6, XMMWORD PTR [rsi + rdx * 4 + 16]
+ pmullw xmm5, [rax + 80]
+ pmullw xmm6, [rax + 80]
+
+ movdqa xmm7, XMMWORD PTR [GLOBAL(rd)]
+ pxor xmm0, xmm0 ; clear xmm0
+
+ paddsw xmm1, xmm3
+ paddsw xmm2, xmm4
+ paddsw xmm1, xmm5
+ paddsw xmm2, xmm6
+
+ paddsw xmm1, xmm7
+ paddsw xmm2, xmm7
+
+ psraw xmm1, 7
+ psraw xmm2, 7
+
+ packuswb xmm1, xmm2 ; pack and saturate
+ movdqa XMMWORD PTR [rdi], xmm1 ; store the results in the destination
+%if ABI_IS_32BIT
+ add rdi, DWORD PTR arg(2) ;[dst_ptich]
+%else
+ add rdi, r8
+%endif
+ dec rcx ; decrement count
+ jnz .vp9_filter_block1d16_v6_sse2_loop ; next row
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_filter_block1d8_h6_only_sse2
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pixels_per_line,
+; unsigned char *output_ptr,
+; int dst_ptich,
+; unsigned int output_height,
+; const short *vp9_filter
+;)
+; First-pass filter only when yoffset==0
+global sym(vp9_filter_block1d8_h6_only_sse2) PRIVATE
+sym(vp9_filter_block1d8_h6_only_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rdx, arg(5) ;vp9_filter
+ mov rsi, arg(0) ;src_ptr
+
+ mov rdi, arg(2) ;output_ptr
+
+ movsxd rcx, dword ptr arg(4) ;output_height
+ movsxd rax, dword ptr arg(1) ;src_pixels_per_line ; Pitch for Source
+%if ABI_IS_32BIT=0
+ movsxd r8, dword ptr arg(3) ;dst_ptich
+%endif
+ pxor xmm0, xmm0 ; clear xmm0 for unpack
+
+.filter_block1d8_h6_only_rowloop:
+ movq xmm3, MMWORD PTR [rsi - 2]
+ movq xmm1, MMWORD PTR [rsi + 6]
+
+ prefetcht2 [rsi+rax-2]
+
+ pslldq xmm1, 8
+ por xmm1, xmm3
+
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm1
+
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm1
+
+ punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
+ psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
+
+ pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1
+ punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
+
+ psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
+ pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2
+
+
+ punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
+ psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
+
+ pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3
+
+ punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
+ psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
+
+ pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4
+
+ punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
+ psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
+
+
+ pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5
+
+ punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
+ pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6
+
+
+ paddsw xmm4, xmm7
+ paddsw xmm4, xmm5
+
+ paddsw xmm4, xmm3
+ paddsw xmm4, xmm6
+
+ paddsw xmm4, xmm1
+ paddsw xmm4, [GLOBAL(rd)]
+
+ psraw xmm4, 7
+
+ packuswb xmm4, xmm0
+
+ movq QWORD PTR [rdi], xmm4 ; store the results in the destination
+ lea rsi, [rsi + rax]
+
+%if ABI_IS_32BIT
+ add rdi, DWORD Ptr arg(3) ;dst_ptich
+%else
+ add rdi, r8
+%endif
+ dec rcx
+
+ jnz .filter_block1d8_h6_only_rowloop ; next row
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_filter_block1d16_h6_only_sse2
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pixels_per_line,
+; unsigned char *output_ptr,
+; int dst_ptich,
+; unsigned int output_height,
+; const short *vp9_filter
+;)
+; First-pass filter only when yoffset==0
+global sym(vp9_filter_block1d16_h6_only_sse2) PRIVATE
+sym(vp9_filter_block1d16_h6_only_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rdx, arg(5) ;vp9_filter
+ mov rsi, arg(0) ;src_ptr
+
+ mov rdi, arg(2) ;output_ptr
+
+ movsxd rcx, dword ptr arg(4) ;output_height
+ movsxd rax, dword ptr arg(1) ;src_pixels_per_line ; Pitch for Source
+%if ABI_IS_32BIT=0
+ movsxd r8, dword ptr arg(3) ;dst_ptich
+%endif
+
+ pxor xmm0, xmm0 ; clear xmm0 for unpack
+
+.filter_block1d16_h6_only_sse2_rowloop:
+ movq xmm3, MMWORD PTR [rsi - 2]
+ movq xmm1, MMWORD PTR [rsi + 6]
+
+ movq xmm2, MMWORD PTR [rsi +14]
+ pslldq xmm2, 8
+
+ por xmm2, xmm1
+ prefetcht2 [rsi+rax-2]
+
+ pslldq xmm1, 8
+ por xmm1, xmm3
+
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm1
+
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm1
+
+ punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
+ psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
+
+ pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1
+ punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
+
+ psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
+ pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2
+
+ punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
+ psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
+
+ pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3
+
+ punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
+ psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
+
+ pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4
+
+ punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
+ psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
+
+ pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5
+
+ punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
+ pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6
+
+ paddsw xmm4, xmm7
+ paddsw xmm4, xmm5
+
+ paddsw xmm4, xmm3
+ paddsw xmm4, xmm6
+
+ paddsw xmm4, xmm1
+ paddsw xmm4, [GLOBAL(rd)]
+
+ psraw xmm4, 7
+
+ packuswb xmm4, xmm0 ; lower 8 bytes
+
+ movq QWORD Ptr [rdi], xmm4 ; store the results in the destination
+
+ movdqa xmm3, xmm2
+ movdqa xmm4, xmm2
+
+ movdqa xmm5, xmm2
+ movdqa xmm6, xmm2
+
+ movdqa xmm7, xmm2
+
+ punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
+ psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
+
+ pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1
+ punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
+
+ psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
+ pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2
+
+ punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
+ psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
+
+ pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3
+
+ punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
+ psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
+
+ pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4
+
+ punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
+ psrldq xmm2, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
+
+ pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5
+
+ punpcklbw xmm2, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
+ pmullw xmm2, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6
+
+ paddsw xmm4, xmm7
+ paddsw xmm4, xmm5
+
+ paddsw xmm4, xmm3
+ paddsw xmm4, xmm6
+
+ paddsw xmm4, xmm2
+ paddsw xmm4, [GLOBAL(rd)]
+
+ psraw xmm4, 7
+
+ packuswb xmm4, xmm0 ; higher 8 bytes
+
+ movq QWORD Ptr [rdi+8], xmm4 ; store the results in the destination
+
+ lea rsi, [rsi + rax]
+%if ABI_IS_32BIT
+ add rdi, DWORD Ptr arg(3) ;dst_ptich
+%else
+ add rdi, r8
+%endif
+
+ dec rcx
+ jnz .filter_block1d16_h6_only_sse2_rowloop ; next row
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_filter_block1d8_v6_only_sse2
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pixels_per_line,
+; unsigned char *output_ptr,
+; int dst_ptich,
+; unsigned int output_height,
+; const short *vp9_filter
+;)
+; Second-pass filter only when xoffset==0
+global sym(vp9_filter_block1d8_v6_only_sse2) PRIVATE
+sym(vp9_filter_block1d8_v6_only_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;output_ptr
+
+ movsxd rcx, dword ptr arg(4) ;output_height
+ movsxd rdx, dword ptr arg(1) ;src_pixels_per_line
+
+ mov rax, arg(5) ;vp9_filter
+
+ pxor xmm0, xmm0 ; clear xmm0
+
+ movdqa xmm7, XMMWORD PTR [GLOBAL(rd)]
+%if ABI_IS_32BIT=0
+ movsxd r8, dword ptr arg(3) ; dst_ptich
+%endif
+
+.vp9_filter_block1d8_v6_only_sse2_loop:
+ movq xmm1, MMWORD PTR [rsi]
+ movq xmm2, MMWORD PTR [rsi + rdx]
+ movq xmm3, MMWORD PTR [rsi + rdx * 2]
+ movq xmm5, MMWORD PTR [rsi + rdx * 4]
+ add rsi, rdx
+ movq xmm4, MMWORD PTR [rsi + rdx * 2]
+ movq xmm6, MMWORD PTR [rsi + rdx * 4]
+
+ punpcklbw xmm1, xmm0
+ pmullw xmm1, [rax]
+
+ punpcklbw xmm2, xmm0
+ pmullw xmm2, [rax + 16]
+
+ punpcklbw xmm3, xmm0
+ pmullw xmm3, [rax + 32]
+
+ punpcklbw xmm5, xmm0
+ pmullw xmm5, [rax + 64]
+
+ punpcklbw xmm4, xmm0
+ pmullw xmm4, [rax + 48]
+
+ punpcklbw xmm6, xmm0
+ pmullw xmm6, [rax + 80]
+
+ paddsw xmm2, xmm5
+ paddsw xmm2, xmm3
+
+ paddsw xmm2, xmm1
+ paddsw xmm2, xmm4
+
+ paddsw xmm2, xmm6
+ paddsw xmm2, xmm7
+
+ psraw xmm2, 7
+ packuswb xmm2, xmm0 ; pack and saturate
+
+ movq QWORD PTR [rdi], xmm2 ; store the results in the destination
+%if ABI_IS_32BIT
+ add rdi, DWORD PTR arg(3) ;[dst_ptich]
+%else
+ add rdi, r8
+%endif
+ dec rcx ; decrement count
+ jnz .vp9_filter_block1d8_v6_only_sse2_loop ; next row
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_unpack_block1d16_h6_sse2
+;(
+; unsigned char *src_ptr,
+; unsigned short *output_ptr,
+; unsigned int src_pixels_per_line,
+; unsigned int output_height,
+; unsigned int output_width
+;)
+global sym(vp9_unpack_block1d16_h6_sse2) PRIVATE
+sym(vp9_unpack_block1d16_h6_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(1) ;output_ptr
+
+ movsxd rcx, dword ptr arg(3) ;output_height
+ movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; Pitch for Source
+
+ pxor xmm0, xmm0 ; clear xmm0 for unpack
+%if ABI_IS_32BIT=0
+ movsxd r8, dword ptr arg(4) ;output_width ; Pitch for Source
+%endif
+
+.unpack_block1d16_h6_sse2_rowloop:
+ movq xmm1, MMWORD PTR [rsi] ; 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 -2
+ movq xmm3, MMWORD PTR [rsi+8] ; make copy of xmm1
+
+ punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
+ punpcklbw xmm1, xmm0
+
+ movdqa XMMWORD Ptr [rdi], xmm1
+ movdqa XMMWORD Ptr [rdi + 16], xmm3
+
+ lea rsi, [rsi + rax]
+%if ABI_IS_32BIT
+ add rdi, DWORD Ptr arg(4) ;[output_width]
+%else
+ add rdi, r8
+%endif
+ dec rcx
+ jnz .unpack_block1d16_h6_sse2_rowloop ; next row
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_bilinear_predict16x16_sse2
+;(
+; unsigned char *src_ptr,
+; int src_pixels_per_line,
+; int xoffset,
+; int yoffset,
+; unsigned char *dst_ptr,
+; int dst_pitch
+;)
+extern sym(vp9_bilinear_filters_mmx)
+global sym(vp9_bilinear_predict16x16_sse2) PRIVATE
+sym(vp9_bilinear_predict16x16_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ;const short *HFilter = bilinear_filters_mmx[xoffset]
+ ;const short *VFilter = bilinear_filters_mmx[yoffset]
+
+ lea rcx, [GLOBAL(sym(vp9_bilinear_filters_mmx))]
+ movsxd rax, dword ptr arg(2) ;xoffset
+
+ cmp rax, 0 ;skip first_pass filter if xoffset=0
+ je .b16x16_sp_only
+
+ shl rax, 5
+ add rax, rcx ;HFilter
+
+ mov rdi, arg(4) ;dst_ptr
+ mov rsi, arg(0) ;src_ptr
+ movsxd rdx, dword ptr arg(5) ;dst_pitch
+
+ movdqa xmm1, [rax]
+ movdqa xmm2, [rax+16]
+
+ movsxd rax, dword ptr arg(3) ;yoffset
+
+ cmp rax, 0 ;skip second_pass filter if yoffset=0
+ je .b16x16_fp_only
+
+ shl rax, 5
+ add rax, rcx ;VFilter
+
+ lea rcx, [rdi+rdx*8]
+ lea rcx, [rcx+rdx*8]
+ movsxd rdx, dword ptr arg(1) ;src_pixels_per_line
+
+ pxor xmm0, xmm0
+
+%if ABI_IS_32BIT=0
+ movsxd r8, dword ptr arg(5) ;dst_pitch
+%endif
+ ; get the first horizontal line done
+ movdqu xmm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+ movdqa xmm4, xmm3 ; make a copy of current line
+
+ punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06
+ punpckhbw xmm4, xmm0
+
+ pmullw xmm3, xmm1
+ pmullw xmm4, xmm1
+
+ movdqu xmm5, [rsi+1]
+ movdqa xmm6, xmm5
+
+ punpcklbw xmm5, xmm0
+ punpckhbw xmm6, xmm0
+
+ pmullw xmm5, xmm2
+ pmullw xmm6, xmm2
+
+ paddw xmm3, xmm5
+ paddw xmm4, xmm6
+
+ paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
+ psraw xmm3, VP9_FILTER_SHIFT ; xmm3 /= 128
+
+ paddw xmm4, [GLOBAL(rd)]
+ psraw xmm4, VP9_FILTER_SHIFT
+
+ movdqa xmm7, xmm3
+ packuswb xmm7, xmm4
+
+ add rsi, rdx ; next line
+.next_row:
+ movdqu xmm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+ movdqa xmm4, xmm3 ; make a copy of current line
+
+ punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06
+ punpckhbw xmm4, xmm0
+
+ pmullw xmm3, xmm1
+ pmullw xmm4, xmm1
+
+ movdqu xmm5, [rsi+1]
+ movdqa xmm6, xmm5
+
+ punpcklbw xmm5, xmm0
+ punpckhbw xmm6, xmm0
+
+ pmullw xmm5, xmm2
+ pmullw xmm6, xmm2
+
+ paddw xmm3, xmm5
+ paddw xmm4, xmm6
+
+ movdqa xmm5, xmm7
+ movdqa xmm6, xmm7
+
+ punpcklbw xmm5, xmm0
+ punpckhbw xmm6, xmm0
+
+ pmullw xmm5, [rax]
+ pmullw xmm6, [rax]
+
+ paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
+ psraw xmm3, VP9_FILTER_SHIFT ; xmm3 /= 128
+
+ paddw xmm4, [GLOBAL(rd)]
+ psraw xmm4, VP9_FILTER_SHIFT
+
+ movdqa xmm7, xmm3
+ packuswb xmm7, xmm4
+
+ pmullw xmm3, [rax+16]
+ pmullw xmm4, [rax+16]
+
+ paddw xmm3, xmm5
+ paddw xmm4, xmm6
+
+ paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
+ psraw xmm3, VP9_FILTER_SHIFT ; xmm3 /= 128
+
+ paddw xmm4, [GLOBAL(rd)]
+ psraw xmm4, VP9_FILTER_SHIFT
+
+ packuswb xmm3, xmm4
+ movdqa [rdi], xmm3 ; store the results in the destination
+
+ add rsi, rdx ; next line
+%if ABI_IS_32BIT
+ add rdi, DWORD PTR arg(5) ;dst_pitch
+%else
+ add rdi, r8
+%endif
+
+ cmp rdi, rcx
+ jne .next_row
+
+ jmp .done
+
+.b16x16_sp_only:
+ movsxd rax, dword ptr arg(3) ;yoffset
+ shl rax, 5
+ add rax, rcx ;VFilter
+
+ mov rdi, arg(4) ;dst_ptr
+ mov rsi, arg(0) ;src_ptr
+ movsxd rdx, dword ptr arg(5) ;dst_pitch
+
+ movdqa xmm1, [rax]
+ movdqa xmm2, [rax+16]
+
+ lea rcx, [rdi+rdx*8]
+ lea rcx, [rcx+rdx*8]
+ movsxd rax, dword ptr arg(1) ;src_pixels_per_line
+
+ pxor xmm0, xmm0
+
+ ; get the first horizontal line done
+ movdqu xmm7, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+
+ add rsi, rax ; next line
+.next_row_spo:
+ movdqu xmm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+
+ movdqa xmm5, xmm7
+ movdqa xmm6, xmm7
+
+ movdqa xmm4, xmm3 ; make a copy of current line
+ movdqa xmm7, xmm3
+
+ punpcklbw xmm5, xmm0
+ punpckhbw xmm6, xmm0
+ punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06
+ punpckhbw xmm4, xmm0
+
+ pmullw xmm5, xmm1
+ pmullw xmm6, xmm1
+ pmullw xmm3, xmm2
+ pmullw xmm4, xmm2
+
+ paddw xmm3, xmm5
+ paddw xmm4, xmm6
+
+ paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
+ psraw xmm3, VP9_FILTER_SHIFT ; xmm3 /= 128
+
+ paddw xmm4, [GLOBAL(rd)]
+ psraw xmm4, VP9_FILTER_SHIFT
+
+ packuswb xmm3, xmm4
+ movdqa [rdi], xmm3 ; store the results in the destination
+
+ add rsi, rax ; next line
+ add rdi, rdx ;dst_pitch
+ cmp rdi, rcx
+ jne .next_row_spo
+
+ jmp .done
+
+.b16x16_fp_only:
+ lea rcx, [rdi+rdx*8]
+ lea rcx, [rcx+rdx*8]
+ movsxd rax, dword ptr arg(1) ;src_pixels_per_line
+ pxor xmm0, xmm0
+
+.next_row_fpo:
+ movdqu xmm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+ movdqa xmm4, xmm3 ; make a copy of current line
+
+ punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06
+ punpckhbw xmm4, xmm0
+
+ pmullw xmm3, xmm1
+ pmullw xmm4, xmm1
+
+ movdqu xmm5, [rsi+1]
+ movdqa xmm6, xmm5
+
+ punpcklbw xmm5, xmm0
+ punpckhbw xmm6, xmm0
+
+ pmullw xmm5, xmm2
+ pmullw xmm6, xmm2
+
+ paddw xmm3, xmm5
+ paddw xmm4, xmm6
+
+ paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
+ psraw xmm3, VP9_FILTER_SHIFT ; xmm3 /= 128
+
+ paddw xmm4, [GLOBAL(rd)]
+ psraw xmm4, VP9_FILTER_SHIFT
+
+ packuswb xmm3, xmm4
+ movdqa [rdi], xmm3 ; store the results in the destination
+
+ add rsi, rax ; next line
+ add rdi, rdx ; dst_pitch
+ cmp rdi, rcx
+ jne .next_row_fpo
+
+.done:
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_bilinear_predict8x8_sse2
+;(
+; unsigned char *src_ptr,
+; int src_pixels_per_line,
+; int xoffset,
+; int yoffset,
+; unsigned char *dst_ptr,
+; int dst_pitch
+;)
+extern sym(vp9_bilinear_filters_mmx)
+global sym(vp9_bilinear_predict8x8_sse2) PRIVATE
+sym(vp9_bilinear_predict8x8_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 144 ; reserve 144 bytes
+
+ ;const short *HFilter = bilinear_filters_mmx[xoffset]
+ ;const short *VFilter = bilinear_filters_mmx[yoffset]
+ lea rcx, [GLOBAL(sym(vp9_bilinear_filters_mmx))]
+
+ mov rsi, arg(0) ;src_ptr
+ movsxd rdx, dword ptr arg(1) ;src_pixels_per_line
+
+ ;Read 9-line unaligned data in and put them on stack. This gives a big
+ ;performance boost.
+ movdqu xmm0, [rsi]
+ lea rax, [rdx + rdx*2]
+ movdqu xmm1, [rsi+rdx]
+ movdqu xmm2, [rsi+rdx*2]
+ add rsi, rax
+ movdqu xmm3, [rsi]
+ movdqu xmm4, [rsi+rdx]
+ movdqu xmm5, [rsi+rdx*2]
+ add rsi, rax
+ movdqu xmm6, [rsi]
+ movdqu xmm7, [rsi+rdx]
+
+ movdqa XMMWORD PTR [rsp], xmm0
+
+ movdqu xmm0, [rsi+rdx*2]
+
+ movdqa XMMWORD PTR [rsp+16], xmm1
+ movdqa XMMWORD PTR [rsp+32], xmm2
+ movdqa XMMWORD PTR [rsp+48], xmm3
+ movdqa XMMWORD PTR [rsp+64], xmm4
+ movdqa XMMWORD PTR [rsp+80], xmm5
+ movdqa XMMWORD PTR [rsp+96], xmm6
+ movdqa XMMWORD PTR [rsp+112], xmm7
+ movdqa XMMWORD PTR [rsp+128], xmm0
+
+ movsxd rax, dword ptr arg(2) ;xoffset
+ shl rax, 5
+ add rax, rcx ;HFilter
+
+ mov rdi, arg(4) ;dst_ptr
+ movsxd rdx, dword ptr arg(5) ;dst_pitch
+
+ movdqa xmm1, [rax]
+ movdqa xmm2, [rax+16]
+
+ movsxd rax, dword ptr arg(3) ;yoffset
+ shl rax, 5
+ add rax, rcx ;VFilter
+
+ lea rcx, [rdi+rdx*8]
+
+ movdqa xmm5, [rax]
+ movdqa xmm6, [rax+16]
+
+ pxor xmm0, xmm0
+
+ ; get the first horizontal line done
+ movdqa xmm3, XMMWORD PTR [rsp]
+ movdqa xmm4, xmm3 ; make a copy of current line
+ psrldq xmm4, 1
+
+ punpcklbw xmm3, xmm0 ; 00 01 02 03 04 05 06 07
+ punpcklbw xmm4, xmm0 ; 01 02 03 04 05 06 07 08
+
+ pmullw xmm3, xmm1
+ pmullw xmm4, xmm2
+
+ paddw xmm3, xmm4
+
+ paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
+ psraw xmm3, VP9_FILTER_SHIFT ; xmm3 /= 128
+
+ movdqa xmm7, xmm3
+ add rsp, 16 ; next line
+.next_row8x8:
+ movdqa xmm3, XMMWORD PTR [rsp] ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
+ movdqa xmm4, xmm3 ; make a copy of current line
+ psrldq xmm4, 1
+
+ punpcklbw xmm3, xmm0 ; 00 01 02 03 04 05 06 07
+ punpcklbw xmm4, xmm0 ; 01 02 03 04 05 06 07 08
+
+ pmullw xmm3, xmm1
+ pmullw xmm4, xmm2
+
+ paddw xmm3, xmm4
+ pmullw xmm7, xmm5
+
+ paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
+ psraw xmm3, VP9_FILTER_SHIFT ; xmm3 /= 128
+
+ movdqa xmm4, xmm3
+
+ pmullw xmm3, xmm6
+ paddw xmm3, xmm7
+
+ movdqa xmm7, xmm4
+
+ paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
+ psraw xmm3, VP9_FILTER_SHIFT ; xmm3 /= 128
+
+ packuswb xmm3, xmm0
+ movq [rdi], xmm3 ; store the results in the destination
+
+ add rsp, 16 ; next line
+ add rdi, rdx
+
+ cmp rdi, rcx
+ jne .next_row8x8
+
+ ;add rsp, 144
+ pop rsp
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+SECTION_RODATA
+align 16
+rd:
+ times 8 dw 0x40
diff --git a/vp9/common/x86/vp9_subpixel_ssse3.asm b/vp9/common/x86/vp9_subpixel_ssse3.asm
new file mode 100644
index 0000000..b260480
--- /dev/null
+++ b/vp9/common/x86/vp9_subpixel_ssse3.asm
@@ -0,0 +1,1515 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+%define BLOCK_HEIGHT_WIDTH 4
+%define VP9_FILTER_WEIGHT 128
+%define VP9_FILTER_SHIFT 7
+
+
+;/************************************************************************************
+; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The
+; input pixel array has output_height rows. This routine assumes that output_height is an
+; even number. This function handles 8 pixels in horizontal direction, calculating ONE
+; rows each iteration to take advantage of the 128 bits operations.
+;
+; This is an implementation of some of the SSE optimizations first seen in ffvp8
+;
+;*************************************************************************************/
+;void vp9_filter_block1d8_h6_ssse3
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pixels_per_line,
+; unsigned char *output_ptr,
+; unsigned int output_pitch,
+; unsigned int output_height,
+; unsigned int vp9_filter_index
+;)
+global sym(vp9_filter_block1d8_h6_ssse3) PRIVATE
+sym(vp9_filter_block1d8_h6_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ movsxd rdx, DWORD PTR arg(5) ;table index
+ xor rsi, rsi
+ shl rdx, 4
+
+ movdqa xmm7, [GLOBAL(rd)]
+
+ lea rax, [GLOBAL(k0_k5)]
+ add rax, rdx
+ mov rdi, arg(2) ;output_ptr
+
+ cmp esi, DWORD PTR [rax]
+ je vp9_filter_block1d8_h4_ssse3
+
+ movdqa xmm4, XMMWORD PTR [rax] ;k0_k5
+ movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
+ movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
+
+ mov rsi, arg(0) ;src_ptr
+ movsxd rax, dword ptr arg(1) ;src_pixels_per_line
+ movsxd rcx, dword ptr arg(4) ;output_height
+
+ movsxd rdx, dword ptr arg(3) ;output_pitch
+
+ sub rdi, rdx
+;xmm3 free
+.filter_block1d8_h6_rowloop_ssse3:
+ movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5
+
+ movq xmm2, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10
+
+ punpcklbw xmm0, xmm2 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10
+
+ movdqa xmm1, xmm0
+ pmaddubsw xmm0, xmm4
+
+ movdqa xmm2, xmm1
+ pshufb xmm1, [GLOBAL(shuf2bfrom1)]
+
+ pshufb xmm2, [GLOBAL(shuf3bfrom1)]
+ pmaddubsw xmm1, xmm5
+
+ lea rdi, [rdi + rdx]
+ pmaddubsw xmm2, xmm6
+
+ lea rsi, [rsi + rax]
+ dec rcx
+
+ paddsw xmm0, xmm1
+ paddsw xmm2, xmm7
+
+ paddsw xmm0, xmm2
+
+ psraw xmm0, 7
+
+ packuswb xmm0, xmm0
+
+ movq MMWORD Ptr [rdi], xmm0
+ jnz .filter_block1d8_h6_rowloop_ssse3
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+vp9_filter_block1d8_h4_ssse3:
+ movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
+ movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
+
+ movdqa xmm3, XMMWORD PTR [GLOBAL(shuf2bfrom1)]
+ movdqa xmm4, XMMWORD PTR [GLOBAL(shuf3bfrom1)]
+
+ mov rsi, arg(0) ;src_ptr
+
+ movsxd rax, dword ptr arg(1) ;src_pixels_per_line
+ movsxd rcx, dword ptr arg(4) ;output_height
+
+ movsxd rdx, dword ptr arg(3) ;output_pitch
+
+ sub rdi, rdx
+
+.filter_block1d8_h4_rowloop_ssse3:
+ movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5
+
+ movq xmm1, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10
+
+ punpcklbw xmm0, xmm1 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10
+
+ movdqa xmm2, xmm0
+ pshufb xmm0, xmm3
+
+ pshufb xmm2, xmm4
+ pmaddubsw xmm0, xmm5
+
+ lea rdi, [rdi + rdx]
+ pmaddubsw xmm2, xmm6
+
+ lea rsi, [rsi + rax]
+ dec rcx
+
+ paddsw xmm0, xmm7
+
+ paddsw xmm0, xmm2
+
+ psraw xmm0, 7
+
+ packuswb xmm0, xmm0
+
+ movq MMWORD Ptr [rdi], xmm0
+
+ jnz .filter_block1d8_h4_rowloop_ssse3
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+;void vp9_filter_block1d16_h6_ssse3
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pixels_per_line,
+; unsigned char *output_ptr,
+; unsigned int output_pitch,
+; unsigned int output_height,
+; unsigned int vp9_filter_index
+;)
+global sym(vp9_filter_block1d16_h6_ssse3) PRIVATE
+sym(vp9_filter_block1d16_h6_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ movsxd rdx, DWORD PTR arg(5) ;table index
+ xor rsi, rsi
+ shl rdx, 4 ;
+
+ lea rax, [GLOBAL(k0_k5)]
+ add rax, rdx
+
+ mov rdi, arg(2) ;output_ptr
+
+ mov rsi, arg(0) ;src_ptr
+
+ movdqa xmm4, XMMWORD PTR [rax] ;k0_k5
+ movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
+ movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
+
+ movsxd rax, dword ptr arg(1) ;src_pixels_per_line
+ movsxd rcx, dword ptr arg(4) ;output_height
+ movsxd rdx, dword ptr arg(3) ;output_pitch
+
+.filter_block1d16_h6_rowloop_ssse3:
+ movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5
+
+ movq xmm3, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10
+
+ punpcklbw xmm0, xmm3 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10
+
+ movdqa xmm1, xmm0
+ pmaddubsw xmm0, xmm4
+
+ movdqa xmm2, xmm1
+ pshufb xmm1, [GLOBAL(shuf2bfrom1)]
+
+ pshufb xmm2, [GLOBAL(shuf3bfrom1)]
+ movq xmm3, MMWORD PTR [rsi + 6]
+
+ pmaddubsw xmm1, xmm5
+ movq xmm7, MMWORD PTR [rsi + 11]
+
+ pmaddubsw xmm2, xmm6
+ punpcklbw xmm3, xmm7
+
+ paddsw xmm0, xmm1
+ movdqa xmm1, xmm3
+
+ pmaddubsw xmm3, xmm4
+ paddsw xmm0, xmm2
+
+ movdqa xmm2, xmm1
+ paddsw xmm0, [GLOBAL(rd)]
+
+ pshufb xmm1, [GLOBAL(shuf2bfrom1)]
+ pshufb xmm2, [GLOBAL(shuf3bfrom1)]
+
+ psraw xmm0, 7
+ pmaddubsw xmm1, xmm5
+
+ pmaddubsw xmm2, xmm6
+ packuswb xmm0, xmm0
+
+ lea rsi, [rsi + rax]
+ paddsw xmm3, xmm1
+
+ paddsw xmm3, xmm2
+
+ paddsw xmm3, [GLOBAL(rd)]
+
+ psraw xmm3, 7
+
+ packuswb xmm3, xmm3
+
+ punpcklqdq xmm0, xmm3
+
+ movdqa XMMWORD Ptr [rdi], xmm0
+
+ lea rdi, [rdi + rdx]
+ dec rcx
+ jnz .filter_block1d16_h6_rowloop_ssse3
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp9_filter_block1d4_h6_ssse3
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pixels_per_line,
+; unsigned char *output_ptr,
+; unsigned int output_pitch,
+; unsigned int output_height,
+; unsigned int vp9_filter_index
+;)
+global sym(vp9_filter_block1d4_h6_ssse3) PRIVATE
+sym(vp9_filter_block1d4_h6_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ movsxd rdx, DWORD PTR arg(5) ;table index
+ xor rsi, rsi
+ shl rdx, 4 ;
+
+ lea rax, [GLOBAL(k0_k5)]
+ add rax, rdx
+ movdqa xmm7, [GLOBAL(rd)]
+
+ cmp esi, DWORD PTR [rax]
+ je .vp9_filter_block1d4_h4_ssse3
+
+ movdqa xmm4, XMMWORD PTR [rax] ;k0_k5
+ movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
+ movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;output_ptr
+ movsxd rax, dword ptr arg(1) ;src_pixels_per_line
+ movsxd rcx, dword ptr arg(4) ;output_height
+
+ movsxd rdx, dword ptr arg(3) ;output_pitch
+
+;xmm3 free
+.filter_block1d4_h6_rowloop_ssse3:
+ movdqu xmm0, XMMWORD PTR [rsi - 2]
+
+ movdqa xmm1, xmm0
+ pshufb xmm0, [GLOBAL(shuf1b)]
+
+ movdqa xmm2, xmm1
+ pshufb xmm1, [GLOBAL(shuf2b)]
+ pmaddubsw xmm0, xmm4
+ pshufb xmm2, [GLOBAL(shuf3b)]
+ pmaddubsw xmm1, xmm5
+
+;--
+ pmaddubsw xmm2, xmm6
+
+ lea rsi, [rsi + rax]
+;--
+ paddsw xmm0, xmm1
+ paddsw xmm0, xmm7
+ pxor xmm1, xmm1
+ paddsw xmm0, xmm2
+ psraw xmm0, 7
+ packuswb xmm0, xmm0
+
+ movd DWORD PTR [rdi], xmm0
+
+ add rdi, rdx
+ dec rcx
+ jnz .filter_block1d4_h6_rowloop_ssse3
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+.vp9_filter_block1d4_h4_ssse3:
+ movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
+ movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
+ movdqa xmm0, XMMWORD PTR [GLOBAL(shuf2b)]
+ movdqa xmm3, XMMWORD PTR [GLOBAL(shuf3b)]
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;output_ptr
+ movsxd rax, dword ptr arg(1) ;src_pixels_per_line
+ movsxd rcx, dword ptr arg(4) ;output_height
+
+ movsxd rdx, dword ptr arg(3) ;output_pitch
+
+.filter_block1d4_h4_rowloop_ssse3:
+ movdqu xmm1, XMMWORD PTR [rsi - 2]
+
+ movdqa xmm2, xmm1
+ pshufb xmm1, xmm0 ;;[GLOBAL(shuf2b)]
+ pshufb xmm2, xmm3 ;;[GLOBAL(shuf3b)]
+ pmaddubsw xmm1, xmm5
+
+;--
+ pmaddubsw xmm2, xmm6
+
+ lea rsi, [rsi + rax]
+;--
+ paddsw xmm1, xmm7
+ paddsw xmm1, xmm2
+ psraw xmm1, 7
+ packuswb xmm1, xmm1
+
+ movd DWORD PTR [rdi], xmm1
+
+ add rdi, rdx
+ dec rcx
+ jnz .filter_block1d4_h4_rowloop_ssse3
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+
+;void vp9_filter_block1d16_v6_ssse3
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pitch,
+; unsigned char *output_ptr,
+; unsigned int out_pitch,
+; unsigned int output_height,
+; unsigned int vp9_filter_index
+;)
+global sym(vp9_filter_block1d16_v6_ssse3) PRIVATE
+sym(vp9_filter_block1d16_v6_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ movsxd rdx, DWORD PTR arg(5) ;table index
+ xor rsi, rsi
+ shl rdx, 4 ;
+
+ lea rax, [GLOBAL(k0_k5)]
+ add rax, rdx
+
+ cmp esi, DWORD PTR [rax]
+ je .vp9_filter_block1d16_v4_ssse3
+
+ movdqa xmm5, XMMWORD PTR [rax] ;k0_k5
+ movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4
+ movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3
+
+ mov rsi, arg(0) ;src_ptr
+ movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
+ mov rdi, arg(2) ;output_ptr
+
+%if ABI_IS_32BIT=0
+ movsxd r8, DWORD PTR arg(3) ;out_pitch
+%endif
+ mov rax, rsi
+ movsxd rcx, DWORD PTR arg(4) ;output_height
+ add rax, rdx
+
+
+.vp9_filter_block1d16_v6_ssse3_loop:
+ movq xmm1, MMWORD PTR [rsi] ;A
+ movq xmm2, MMWORD PTR [rsi + rdx] ;B
+ movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C
+ movq xmm4, MMWORD PTR [rax + rdx * 2] ;D
+ movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E
+
+ punpcklbw xmm2, xmm4 ;B D
+ punpcklbw xmm3, xmm0 ;C E
+
+ movq xmm0, MMWORD PTR [rax + rdx * 4] ;F
+
+ pmaddubsw xmm3, xmm6
+ punpcklbw xmm1, xmm0 ;A F
+ pmaddubsw xmm2, xmm7
+ pmaddubsw xmm1, xmm5
+
+ paddsw xmm2, xmm3
+ paddsw xmm2, xmm1
+ paddsw xmm2, [GLOBAL(rd)]
+ psraw xmm2, 7
+ packuswb xmm2, xmm2
+
+ movq MMWORD PTR [rdi], xmm2 ;store the results
+
+ movq xmm1, MMWORD PTR [rsi + 8] ;A
+ movq xmm2, MMWORD PTR [rsi + rdx + 8] ;B
+ movq xmm3, MMWORD PTR [rsi + rdx * 2 + 8] ;C
+ movq xmm4, MMWORD PTR [rax + rdx * 2 + 8] ;D
+ movq xmm0, MMWORD PTR [rsi + rdx * 4 + 8] ;E
+
+ punpcklbw xmm2, xmm4 ;B D
+ punpcklbw xmm3, xmm0 ;C E
+
+ movq xmm0, MMWORD PTR [rax + rdx * 4 + 8] ;F
+ pmaddubsw xmm3, xmm6
+ punpcklbw xmm1, xmm0 ;A F
+ pmaddubsw xmm2, xmm7
+ pmaddubsw xmm1, xmm5
+
+ add rsi, rdx
+ add rax, rdx
+;--
+;--
+ paddsw xmm2, xmm3
+ paddsw xmm2, xmm1
+ paddsw xmm2, [GLOBAL(rd)]
+ psraw xmm2, 7
+ packuswb xmm2, xmm2
+
+ movq MMWORD PTR [rdi+8], xmm2
+
+%if ABI_IS_32BIT
+ add rdi, DWORD PTR arg(3) ;out_pitch
+%else
+ add rdi, r8
+%endif
+ dec rcx
+ jnz .vp9_filter_block1d16_v6_ssse3_loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+.vp9_filter_block1d16_v4_ssse3:
+ movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4
+ movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3
+
+ mov rsi, arg(0) ;src_ptr
+ movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
+ mov rdi, arg(2) ;output_ptr
+
+%if ABI_IS_32BIT=0
+ movsxd r8, DWORD PTR arg(3) ;out_pitch
+%endif
+ mov rax, rsi
+ movsxd rcx, DWORD PTR arg(4) ;output_height
+ add rax, rdx
+
+.vp9_filter_block1d16_v4_ssse3_loop:
+ movq xmm2, MMWORD PTR [rsi + rdx] ;B
+ movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C
+ movq xmm4, MMWORD PTR [rax + rdx * 2] ;D
+ movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E
+
+ punpcklbw xmm2, xmm4 ;B D
+ punpcklbw xmm3, xmm0 ;C E
+
+ pmaddubsw xmm3, xmm6
+ pmaddubsw xmm2, xmm7
+ movq xmm5, MMWORD PTR [rsi + rdx + 8] ;B
+ movq xmm1, MMWORD PTR [rsi + rdx * 2 + 8] ;C
+ movq xmm4, MMWORD PTR [rax + rdx * 2 + 8] ;D
+ movq xmm0, MMWORD PTR [rsi + rdx * 4 + 8] ;E
+
+ paddsw xmm2, [GLOBAL(rd)]
+ paddsw xmm2, xmm3
+ psraw xmm2, 7
+ packuswb xmm2, xmm2
+
+ punpcklbw xmm5, xmm4 ;B D
+ punpcklbw xmm1, xmm0 ;C E
+
+ pmaddubsw xmm1, xmm6
+ pmaddubsw xmm5, xmm7
+
+ movdqa xmm4, [GLOBAL(rd)]
+ add rsi, rdx
+ add rax, rdx
+;--
+;--
+ paddsw xmm5, xmm1
+ paddsw xmm5, xmm4
+ psraw xmm5, 7
+ packuswb xmm5, xmm5
+
+ punpcklqdq xmm2, xmm5
+
+ movdqa XMMWORD PTR [rdi], xmm2
+
+%if ABI_IS_32BIT
+ add rdi, DWORD PTR arg(3) ;out_pitch
+%else
+ add rdi, r8
+%endif
+ dec rcx
+ jnz .vp9_filter_block1d16_v4_ssse3_loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp9_filter_block1d8_v6_ssse3
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pitch,
+; unsigned char *output_ptr,
+; unsigned int out_pitch,
+; unsigned int output_height,
+; unsigned int vp9_filter_index
+;)
+global sym(vp9_filter_block1d8_v6_ssse3) PRIVATE
+sym(vp9_filter_block1d8_v6_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ movsxd rdx, DWORD PTR arg(5) ;table index
+ xor rsi, rsi
+ shl rdx, 4 ;
+
+ lea rax, [GLOBAL(k0_k5)]
+ add rax, rdx
+
+ movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
+ mov rdi, arg(2) ;output_ptr
+%if ABI_IS_32BIT=0
+ movsxd r8, DWORD PTR arg(3) ; out_pitch
+%endif
+ movsxd rcx, DWORD PTR arg(4) ;[output_height]
+
+ cmp esi, DWORD PTR [rax]
+ je .vp9_filter_block1d8_v4_ssse3
+
+ movdqa xmm5, XMMWORD PTR [rax] ;k0_k5
+ movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4
+ movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3
+
+ mov rsi, arg(0) ;src_ptr
+
+ mov rax, rsi
+ add rax, rdx
+
+.vp9_filter_block1d8_v6_ssse3_loop:
+ movq xmm1, MMWORD PTR [rsi] ;A
+ movq xmm2, MMWORD PTR [rsi + rdx] ;B
+ movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C
+ movq xmm4, MMWORD PTR [rax + rdx * 2] ;D
+ movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E
+
+ punpcklbw xmm2, xmm4 ;B D
+ punpcklbw xmm3, xmm0 ;C E
+
+ movq xmm0, MMWORD PTR [rax + rdx * 4] ;F
+ movdqa xmm4, [GLOBAL(rd)]
+
+ pmaddubsw xmm3, xmm6
+ punpcklbw xmm1, xmm0 ;A F
+ pmaddubsw xmm2, xmm7
+ pmaddubsw xmm1, xmm5
+ add rsi, rdx
+ add rax, rdx
+;--
+;--
+ paddsw xmm2, xmm3
+ paddsw xmm2, xmm1
+ paddsw xmm2, xmm4
+ psraw xmm2, 7
+ packuswb xmm2, xmm2
+
+ movq MMWORD PTR [rdi], xmm2
+
+%if ABI_IS_32BIT
+ add rdi, DWORD PTR arg(3) ;[out_pitch]
+%else
+ add rdi, r8
+%endif
+ dec rcx
+ jnz .vp9_filter_block1d8_v6_ssse3_loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+.vp9_filter_block1d8_v4_ssse3:
+ movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4
+ movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3
+ movdqa xmm5, [GLOBAL(rd)]
+
+ mov rsi, arg(0) ;src_ptr
+
+ mov rax, rsi
+ add rax, rdx
+
+.vp9_filter_block1d8_v4_ssse3_loop:
+ movq xmm2, MMWORD PTR [rsi + rdx] ;B
+ movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C
+ movq xmm4, MMWORD PTR [rax + rdx * 2] ;D
+ movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E
+
+ punpcklbw xmm2, xmm4 ;B D
+ punpcklbw xmm3, xmm0 ;C E
+
+ pmaddubsw xmm3, xmm6
+ pmaddubsw xmm2, xmm7
+ add rsi, rdx
+ add rax, rdx
+;--
+;--
+ paddsw xmm2, xmm3
+ paddsw xmm2, xmm5
+ psraw xmm2, 7
+ packuswb xmm2, xmm2
+
+ movq MMWORD PTR [rdi], xmm2
+
+%if ABI_IS_32BIT
+ add rdi, DWORD PTR arg(3) ;[out_pitch]
+%else
+ add rdi, r8
+%endif
+ dec rcx
+ jnz .vp9_filter_block1d8_v4_ssse3_loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+;void vp9_filter_block1d4_v6_ssse3
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pitch,
+; unsigned char *output_ptr,
+; unsigned int out_pitch,
+; unsigned int output_height,
+; unsigned int vp9_filter_index
+;)
+global sym(vp9_filter_block1d4_v6_ssse3) PRIVATE
+sym(vp9_filter_block1d4_v6_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ movsxd rdx, DWORD PTR arg(5) ;table index
+ xor rsi, rsi
+ shl rdx, 4 ;
+
+ lea rax, [GLOBAL(k0_k5)]
+ add rax, rdx
+
+ movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
+ mov rdi, arg(2) ;output_ptr
+%if ABI_IS_32BIT=0
+ movsxd r8, DWORD PTR arg(3) ; out_pitch
+%endif
+ movsxd rcx, DWORD PTR arg(4) ;[output_height]
+
+ cmp esi, DWORD PTR [rax]
+ je .vp9_filter_block1d4_v4_ssse3
+
+ movq mm5, MMWORD PTR [rax] ;k0_k5
+ movq mm6, MMWORD PTR [rax+256] ;k2_k4
+ movq mm7, MMWORD PTR [rax+128] ;k1_k3
+
+ mov rsi, arg(0) ;src_ptr
+
+ mov rax, rsi
+ add rax, rdx
+
+.vp9_filter_block1d4_v6_ssse3_loop:
+ movd mm1, DWORD PTR [rsi] ;A
+ movd mm2, DWORD PTR [rsi + rdx] ;B
+ movd mm3, DWORD PTR [rsi + rdx * 2] ;C
+ movd mm4, DWORD PTR [rax + rdx * 2] ;D
+ movd mm0, DWORD PTR [rsi + rdx * 4] ;E
+
+ punpcklbw mm2, mm4 ;B D
+ punpcklbw mm3, mm0 ;C E
+
+ movd mm0, DWORD PTR [rax + rdx * 4] ;F
+
+ movq mm4, [GLOBAL(rd)]
+
+ pmaddubsw mm3, mm6
+ punpcklbw mm1, mm0 ;A F
+ pmaddubsw mm2, mm7
+ pmaddubsw mm1, mm5
+ add rsi, rdx
+ add rax, rdx
+;--
+;--
+ paddsw mm2, mm3
+ paddsw mm2, mm1
+ paddsw mm2, mm4
+ psraw mm2, 7
+ packuswb mm2, mm2
+
+ movd DWORD PTR [rdi], mm2
+
+%if ABI_IS_32BIT
+ add rdi, DWORD PTR arg(3) ;[out_pitch]
+%else
+ add rdi, r8
+%endif
+ dec rcx
+ jnz .vp9_filter_block1d4_v6_ssse3_loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+.vp9_filter_block1d4_v4_ssse3:
+ movq mm6, MMWORD PTR [rax+256] ;k2_k4
+ movq mm7, MMWORD PTR [rax+128] ;k1_k3
+ movq mm5, MMWORD PTR [GLOBAL(rd)]
+
+ mov rsi, arg(0) ;src_ptr
+
+ mov rax, rsi
+ add rax, rdx
+
+.vp9_filter_block1d4_v4_ssse3_loop:
+ movd mm2, DWORD PTR [rsi + rdx] ;B
+ movd mm3, DWORD PTR [rsi + rdx * 2] ;C
+ movd mm4, DWORD PTR [rax + rdx * 2] ;D
+ movd mm0, DWORD PTR [rsi + rdx * 4] ;E
+
+ punpcklbw mm2, mm4 ;B D
+ punpcklbw mm3, mm0 ;C E
+
+ pmaddubsw mm3, mm6
+ pmaddubsw mm2, mm7
+ add rsi, rdx
+ add rax, rdx
+;--
+;--
+ paddsw mm2, mm3
+ paddsw mm2, mm5
+ psraw mm2, 7
+ packuswb mm2, mm2
+
+ movd DWORD PTR [rdi], mm2
+
+%if ABI_IS_32BIT
+ add rdi, DWORD PTR arg(3) ;[out_pitch]
+%else
+ add rdi, r8
+%endif
+ dec rcx
+ jnz .vp9_filter_block1d4_v4_ssse3_loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp9_bilinear_predict16x16_ssse3
+;(
+; unsigned char *src_ptr,
+; int src_pixels_per_line,
+; int xoffset,
+; int yoffset,
+; unsigned char *dst_ptr,
+; int dst_pitch
+;)
+global sym(vp9_bilinear_predict16x16_ssse3) PRIVATE
+sym(vp9_bilinear_predict16x16_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ lea rcx, [GLOBAL(bilinear_filters_ssse3)]
+ movsxd rax, dword ptr arg(2) ; xoffset
+
+ cmp rax, 0 ; skip first_pass filter if xoffset=0
+ je .b16x16_sp_only
+
+ shl rax, 4
+ lea rax, [rax + rcx] ; HFilter
+
+ mov rdi, arg(4) ; dst_ptr
+ mov rsi, arg(0) ; src_ptr
+ movsxd rdx, dword ptr arg(5) ; dst_pitch
+
+ movdqa xmm1, [rax]
+
+ movsxd rax, dword ptr arg(3) ; yoffset
+
+ cmp rax, 0 ; skip second_pass filter if yoffset=0
+ je .b16x16_fp_only
+
+ shl rax, 4
+ lea rax, [rax + rcx] ; VFilter
+
+ lea rcx, [rdi+rdx*8]
+ lea rcx, [rcx+rdx*8]
+ movsxd rdx, dword ptr arg(1) ; src_pixels_per_line
+
+ movdqa xmm2, [rax]
+
+%if ABI_IS_32BIT=0
+ movsxd r8, dword ptr arg(5) ; dst_pitch
+%endif
+ movq xmm3, [rsi] ; 00 01 02 03 04 05 06 07
+ movq xmm5, [rsi+1] ; 01 02 03 04 05 06 07 08
+
+ punpcklbw xmm3, xmm5 ; 00 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08
+ movq xmm4, [rsi+8] ; 08 09 10 11 12 13 14 15
+
+ movq xmm5, [rsi+9] ; 09 10 11 12 13 14 15 16
+
+ lea rsi, [rsi + rdx] ; next line
+
+ pmaddubsw xmm3, xmm1 ; 00 02 04 06 08 10 12 14
+
+ punpcklbw xmm4, xmm5 ; 08 09 09 10 10 11 11 12 12 13 13 14 14 15 15 16
+ pmaddubsw xmm4, xmm1 ; 01 03 05 07 09 11 13 15
+
+ paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
+ psraw xmm3, VP9_FILTER_SHIFT ; xmm3 /= 128
+
+ paddw xmm4, [GLOBAL(rd)] ; xmm4 += round value
+ psraw xmm4, VP9_FILTER_SHIFT ; xmm4 /= 128
+
+ movdqa xmm7, xmm3
+ packuswb xmm7, xmm4 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
+
+.next_row:
+ movq xmm6, [rsi] ; 00 01 02 03 04 05 06 07
+ movq xmm5, [rsi+1] ; 01 02 03 04 05 06 07 08
+
+ punpcklbw xmm6, xmm5
+ movq xmm4, [rsi+8] ; 08 09 10 11 12 13 14 15
+
+ movq xmm5, [rsi+9] ; 09 10 11 12 13 14 15 16
+ lea rsi, [rsi + rdx] ; next line
+
+ pmaddubsw xmm6, xmm1
+
+ punpcklbw xmm4, xmm5
+ pmaddubsw xmm4, xmm1
+
+ paddw xmm6, [GLOBAL(rd)] ; xmm6 += round value
+ psraw xmm6, VP9_FILTER_SHIFT ; xmm6 /= 128
+
+ paddw xmm4, [GLOBAL(rd)] ; xmm4 += round value
+ psraw xmm4, VP9_FILTER_SHIFT ; xmm4 /= 128
+
+ packuswb xmm6, xmm4
+ movdqa xmm5, xmm7
+
+ punpcklbw xmm5, xmm6
+ pmaddubsw xmm5, xmm2
+
+ punpckhbw xmm7, xmm6
+ pmaddubsw xmm7, xmm2
+
+ paddw xmm5, [GLOBAL(rd)] ; xmm5 += round value
+ psraw xmm5, VP9_FILTER_SHIFT ; xmm5 /= 128
+
+ paddw xmm7, [GLOBAL(rd)] ; xmm7 += round value
+ psraw xmm7, VP9_FILTER_SHIFT ; xmm7 /= 128
+
+ packuswb xmm5, xmm7
+ movdqa xmm7, xmm6
+
+ movdqa [rdi], xmm5 ; store the results in the destination
+%if ABI_IS_32BIT
+ add rdi, DWORD PTR arg(5) ; dst_pitch
+%else
+ add rdi, r8
+%endif
+
+ cmp rdi, rcx
+ jne .next_row
+
+ jmp .done
+
+.b16x16_sp_only:
+ movsxd rax, dword ptr arg(3) ; yoffset
+ shl rax, 4
+ lea rax, [rax + rcx] ; VFilter
+
+ mov rdi, arg(4) ; dst_ptr
+ mov rsi, arg(0) ; src_ptr
+ movsxd rdx, dword ptr arg(5) ; dst_pitch
+
+ movdqa xmm1, [rax] ; VFilter
+
+ lea rcx, [rdi+rdx*8]
+ lea rcx, [rcx+rdx*8]
+ movsxd rax, dword ptr arg(1) ; src_pixels_per_line
+
+ ; get the first horizontal line done
+ movq xmm4, [rsi] ; load row 0
+ movq xmm2, [rsi + 8] ; load row 0
+
+ lea rsi, [rsi + rax] ; next line
+.next_row_sp:
+ movq xmm3, [rsi] ; load row + 1
+ movq xmm5, [rsi + 8] ; load row + 1
+
+ punpcklbw xmm4, xmm3
+ punpcklbw xmm2, xmm5
+
+ pmaddubsw xmm4, xmm1
+ movq xmm7, [rsi + rax] ; load row + 2
+
+ pmaddubsw xmm2, xmm1
+ movq xmm6, [rsi + rax + 8] ; load row + 2
+
+ punpcklbw xmm3, xmm7
+ punpcklbw xmm5, xmm6
+
+ pmaddubsw xmm3, xmm1
+ paddw xmm4, [GLOBAL(rd)]
+
+ pmaddubsw xmm5, xmm1
+ paddw xmm2, [GLOBAL(rd)]
+
+ psraw xmm4, VP9_FILTER_SHIFT
+ psraw xmm2, VP9_FILTER_SHIFT
+
+ packuswb xmm4, xmm2
+ paddw xmm3, [GLOBAL(rd)]
+
+ movdqa [rdi], xmm4 ; store row 0
+ paddw xmm5, [GLOBAL(rd)]
+
+ psraw xmm3, VP9_FILTER_SHIFT
+ psraw xmm5, VP9_FILTER_SHIFT
+
+ packuswb xmm3, xmm5
+ movdqa xmm4, xmm7
+
+ movdqa [rdi + rdx],xmm3 ; store row 1
+ lea rsi, [rsi + 2*rax]
+
+ movdqa xmm2, xmm6
+ lea rdi, [rdi + 2*rdx]
+
+ cmp rdi, rcx
+ jne .next_row_sp
+
+ jmp .done
+
+.b16x16_fp_only:
+ lea rcx, [rdi+rdx*8]
+ lea rcx, [rcx+rdx*8]
+ movsxd rax, dword ptr arg(1) ; src_pixels_per_line
+
+.next_row_fp:
+ movq xmm2, [rsi] ; 00 01 02 03 04 05 06 07
+ movq xmm4, [rsi+1] ; 01 02 03 04 05 06 07 08
+
+ punpcklbw xmm2, xmm4
+ movq xmm3, [rsi+8] ; 08 09 10 11 12 13 14 15
+
+ pmaddubsw xmm2, xmm1
+ movq xmm4, [rsi+9] ; 09 10 11 12 13 14 15 16
+
+ lea rsi, [rsi + rax] ; next line
+ punpcklbw xmm3, xmm4
+
+ pmaddubsw xmm3, xmm1
+ movq xmm5, [rsi]
+
+ paddw xmm2, [GLOBAL(rd)]
+ movq xmm7, [rsi+1]
+
+ movq xmm6, [rsi+8]
+ psraw xmm2, VP9_FILTER_SHIFT
+
+ punpcklbw xmm5, xmm7
+ movq xmm7, [rsi+9]
+
+ paddw xmm3, [GLOBAL(rd)]
+ pmaddubsw xmm5, xmm1
+
+ psraw xmm3, VP9_FILTER_SHIFT
+ punpcklbw xmm6, xmm7
+
+ packuswb xmm2, xmm3
+ pmaddubsw xmm6, xmm1
+
+ movdqa [rdi], xmm2 ; store the results in the destination
+ paddw xmm5, [GLOBAL(rd)]
+
+ lea rdi, [rdi + rdx] ; dst_pitch
+ psraw xmm5, VP9_FILTER_SHIFT
+
+ paddw xmm6, [GLOBAL(rd)]
+ psraw xmm6, VP9_FILTER_SHIFT
+
+ packuswb xmm5, xmm6
+ lea rsi, [rsi + rax] ; next line
+
+ movdqa [rdi], xmm5 ; store the results in the destination
+ lea rdi, [rdi + rdx] ; dst_pitch
+
+ cmp rdi, rcx
+
+ jne .next_row_fp
+
+.done:
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp9_bilinear_predict8x8_ssse3
+;(
+; unsigned char *src_ptr,
+; int src_pixels_per_line,
+; int xoffset,
+; int yoffset,
+; unsigned char *dst_ptr,
+; int dst_pitch
+;)
+global sym(vp9_bilinear_predict8x8_ssse3) PRIVATE
+sym(vp9_bilinear_predict8x8_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 144 ; reserve 144 bytes
+
+ lea rcx, [GLOBAL(bilinear_filters_ssse3)]
+
+ mov rsi, arg(0) ;src_ptr
+ movsxd rdx, dword ptr arg(1) ;src_pixels_per_line
+
+ ;Read 9-line unaligned data in and put them on stack. This gives a big
+ ;performance boost.
+ movdqu xmm0, [rsi]
+ lea rax, [rdx + rdx*2]
+ movdqu xmm1, [rsi+rdx]
+ movdqu xmm2, [rsi+rdx*2]
+ add rsi, rax
+ movdqu xmm3, [rsi]
+ movdqu xmm4, [rsi+rdx]
+ movdqu xmm5, [rsi+rdx*2]
+ add rsi, rax
+ movdqu xmm6, [rsi]
+ movdqu xmm7, [rsi+rdx]
+
+ movdqa XMMWORD PTR [rsp], xmm0
+
+ movdqu xmm0, [rsi+rdx*2]
+
+ movdqa XMMWORD PTR [rsp+16], xmm1
+ movdqa XMMWORD PTR [rsp+32], xmm2
+ movdqa XMMWORD PTR [rsp+48], xmm3
+ movdqa XMMWORD PTR [rsp+64], xmm4
+ movdqa XMMWORD PTR [rsp+80], xmm5
+ movdqa XMMWORD PTR [rsp+96], xmm6
+ movdqa XMMWORD PTR [rsp+112], xmm7
+ movdqa XMMWORD PTR [rsp+128], xmm0
+
+ movsxd rax, dword ptr arg(2) ; xoffset
+ cmp rax, 0 ; skip first_pass filter if xoffset=0
+ je .b8x8_sp_only
+
+ shl rax, 4
+ add rax, rcx ; HFilter
+
+ mov rdi, arg(4) ; dst_ptr
+ movsxd rdx, dword ptr arg(5) ; dst_pitch
+
+ movdqa xmm0, [rax]
+
+ movsxd rax, dword ptr arg(3) ; yoffset
+ cmp rax, 0 ; skip second_pass filter if yoffset=0
+ je .b8x8_fp_only
+
+ shl rax, 4
+ lea rax, [rax + rcx] ; VFilter
+
+ lea rcx, [rdi+rdx*8]
+
+ movdqa xmm1, [rax]
+
+ ; get the first horizontal line done
+ movdqa xmm3, [rsp] ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
+ movdqa xmm5, xmm3 ; 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 xx
+
+ psrldq xmm5, 1
+ lea rsp, [rsp + 16] ; next line
+
+ punpcklbw xmm3, xmm5 ; 00 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08
+ pmaddubsw xmm3, xmm0 ; 00 02 04 06 08 10 12 14
+
+ paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
+ psraw xmm3, VP9_FILTER_SHIFT ; xmm3 /= 128
+
+ movdqa xmm7, xmm3
+ packuswb xmm7, xmm7 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
+
+.next_row:
+ movdqa xmm6, [rsp] ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
+ lea rsp, [rsp + 16] ; next line
+
+ movdqa xmm5, xmm6
+
+ psrldq xmm5, 1
+
+ punpcklbw xmm6, xmm5
+ pmaddubsw xmm6, xmm0
+
+ paddw xmm6, [GLOBAL(rd)] ; xmm6 += round value
+ psraw xmm6, VP9_FILTER_SHIFT ; xmm6 /= 128
+
+ packuswb xmm6, xmm6
+
+ punpcklbw xmm7, xmm6
+ pmaddubsw xmm7, xmm1
+
+ paddw xmm7, [GLOBAL(rd)] ; xmm7 += round value
+ psraw xmm7, VP9_FILTER_SHIFT ; xmm7 /= 128
+
+ packuswb xmm7, xmm7
+
+ movq [rdi], xmm7 ; store the results in the destination
+ lea rdi, [rdi + rdx]
+
+ movdqa xmm7, xmm6
+
+ cmp rdi, rcx
+ jne .next_row
+
+ jmp .done8x8
+
+.b8x8_sp_only:
+ movsxd rax, dword ptr arg(3) ; yoffset
+ shl rax, 4
+ lea rax, [rax + rcx] ; VFilter
+
+ mov rdi, arg(4) ;dst_ptr
+ movsxd rdx, dword ptr arg(5) ; dst_pitch
+
+ movdqa xmm0, [rax] ; VFilter
+
+ movq xmm1, XMMWORD PTR [rsp]
+ movq xmm2, XMMWORD PTR [rsp+16]
+
+ movq xmm3, XMMWORD PTR [rsp+32]
+ punpcklbw xmm1, xmm2
+
+ movq xmm4, XMMWORD PTR [rsp+48]
+ punpcklbw xmm2, xmm3
+
+ movq xmm5, XMMWORD PTR [rsp+64]
+ punpcklbw xmm3, xmm4
+
+ movq xmm6, XMMWORD PTR [rsp+80]
+ punpcklbw xmm4, xmm5
+
+ movq xmm7, XMMWORD PTR [rsp+96]
+ punpcklbw xmm5, xmm6
+
+ pmaddubsw xmm1, xmm0
+ pmaddubsw xmm2, xmm0
+
+ pmaddubsw xmm3, xmm0
+ pmaddubsw xmm4, xmm0
+
+ pmaddubsw xmm5, xmm0
+ punpcklbw xmm6, xmm7
+
+ pmaddubsw xmm6, xmm0
+ paddw xmm1, [GLOBAL(rd)]
+
+ paddw xmm2, [GLOBAL(rd)]
+ psraw xmm1, VP9_FILTER_SHIFT
+
+ paddw xmm3, [GLOBAL(rd)]
+ psraw xmm2, VP9_FILTER_SHIFT
+
+ paddw xmm4, [GLOBAL(rd)]
+ psraw xmm3, VP9_FILTER_SHIFT
+
+ paddw xmm5, [GLOBAL(rd)]
+ psraw xmm4, VP9_FILTER_SHIFT
+
+ paddw xmm6, [GLOBAL(rd)]
+ psraw xmm5, VP9_FILTER_SHIFT
+
+ psraw xmm6, VP9_FILTER_SHIFT
+ packuswb xmm1, xmm1
+
+ packuswb xmm2, xmm2
+ movq [rdi], xmm1
+
+ packuswb xmm3, xmm3
+ movq [rdi+rdx], xmm2
+
+ packuswb xmm4, xmm4
+ movq xmm1, XMMWORD PTR [rsp+112]
+
+ lea rdi, [rdi + 2*rdx]
+ movq xmm2, XMMWORD PTR [rsp+128]
+
+ packuswb xmm5, xmm5
+ movq [rdi], xmm3
+
+ packuswb xmm6, xmm6
+ movq [rdi+rdx], xmm4
+
+ lea rdi, [rdi + 2*rdx]
+ punpcklbw xmm7, xmm1
+
+ movq [rdi], xmm5
+ pmaddubsw xmm7, xmm0
+
+ movq [rdi+rdx], xmm6
+ punpcklbw xmm1, xmm2
+
+ pmaddubsw xmm1, xmm0
+ paddw xmm7, [GLOBAL(rd)]
+
+ psraw xmm7, VP9_FILTER_SHIFT
+ paddw xmm1, [GLOBAL(rd)]
+
+ psraw xmm1, VP9_FILTER_SHIFT
+ packuswb xmm7, xmm7
+
+ packuswb xmm1, xmm1
+ lea rdi, [rdi + 2*rdx]
+
+ movq [rdi], xmm7
+
+ movq [rdi+rdx], xmm1
+ lea rsp, [rsp + 144]
+
+ jmp .done8x8
+
+.b8x8_fp_only:
+ lea rcx, [rdi+rdx*8]
+
+.next_row_fp:
+ movdqa xmm1, XMMWORD PTR [rsp]
+ movdqa xmm3, XMMWORD PTR [rsp+16]
+
+ movdqa xmm2, xmm1
+ movdqa xmm5, XMMWORD PTR [rsp+32]
+
+ psrldq xmm2, 1
+ movdqa xmm7, XMMWORD PTR [rsp+48]
+
+ movdqa xmm4, xmm3
+ psrldq xmm4, 1
+
+ movdqa xmm6, xmm5
+ psrldq xmm6, 1
+
+ punpcklbw xmm1, xmm2
+ pmaddubsw xmm1, xmm0
+
+ punpcklbw xmm3, xmm4
+ pmaddubsw xmm3, xmm0
+
+ punpcklbw xmm5, xmm6
+ pmaddubsw xmm5, xmm0
+
+ movdqa xmm2, xmm7
+ psrldq xmm2, 1
+
+ punpcklbw xmm7, xmm2
+ pmaddubsw xmm7, xmm0
+
+ paddw xmm1, [GLOBAL(rd)]
+ psraw xmm1, VP9_FILTER_SHIFT
+
+ paddw xmm3, [GLOBAL(rd)]
+ psraw xmm3, VP9_FILTER_SHIFT
+
+ paddw xmm5, [GLOBAL(rd)]
+ psraw xmm5, VP9_FILTER_SHIFT
+
+ paddw xmm7, [GLOBAL(rd)]
+ psraw xmm7, VP9_FILTER_SHIFT
+
+ packuswb xmm1, xmm1
+ packuswb xmm3, xmm3
+
+ packuswb xmm5, xmm5
+ movq [rdi], xmm1
+
+ packuswb xmm7, xmm7
+ movq [rdi+rdx], xmm3
+
+ lea rdi, [rdi + 2*rdx]
+ movq [rdi], xmm5
+
+ lea rsp, [rsp + 4*16]
+ movq [rdi+rdx], xmm7
+
+ lea rdi, [rdi + 2*rdx]
+ cmp rdi, rcx
+
+ jne .next_row_fp
+
+ lea rsp, [rsp + 16]
+
+.done8x8:
+ ;add rsp, 144
+ pop rsp
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+SECTION_RODATA
+align 16
+shuf1b:
+ db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12
+shuf2b:
+ db 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11
+shuf3b:
+ db 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10
+
+align 16
+shuf2bfrom1:
+ db 4, 8, 6, 1, 8, 3, 1, 5, 3, 7, 5, 9, 7,11, 9,13
+align 16
+shuf3bfrom1:
+ db 2, 6, 4, 8, 6, 1, 8, 3, 1, 5, 3, 7, 5, 9, 7,11
+
+align 16
+rd:
+ times 8 dw 0x40
+
+align 16
+k0_k5:
+ times 8 db 0, 0 ;placeholder
+ times 8 db 0, 0
+ times 8 db 2, 1
+ times 8 db 0, 0
+ times 8 db 3, 3
+ times 8 db 0, 0
+ times 8 db 1, 2
+ times 8 db 0, 0
+k1_k3:
+ times 8 db 0, 0 ;placeholder
+ times 8 db -6, 12
+ times 8 db -11, 36
+ times 8 db -9, 50
+ times 8 db -16, 77
+ times 8 db -6, 93
+ times 8 db -8, 108
+ times 8 db -1, 123
+k2_k4:
+ times 8 db 128, 0 ;placeholder
+ times 8 db 123, -1
+ times 8 db 108, -8
+ times 8 db 93, -6
+ times 8 db 77, -16
+ times 8 db 50, -9
+ times 8 db 36, -11
+ times 8 db 12, -6
+align 16
+bilinear_filters_ssse3:
+ times 8 db 128, 0
+ times 8 db 120, 8
+ times 8 db 112, 16
+ times 8 db 104, 24
+ times 8 db 96, 32
+ times 8 db 88, 40
+ times 8 db 80, 48
+ times 8 db 72, 56
+ times 8 db 64, 64
+ times 8 db 56, 72
+ times 8 db 48, 80
+ times 8 db 40, 88
+ times 8 db 32, 96
+ times 8 db 24, 104
+ times 8 db 16, 112
+ times 8 db 8, 120
+
diff --git a/vp9/common/x86/vp9_subpixel_x86.h b/vp9/common/x86/vp9_subpixel_x86.h
new file mode 100644
index 0000000..86b72f3
--- /dev/null
+++ b/vp9/common/x86/vp9_subpixel_x86.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_COMMON_X86_VP9_SUBPIXEL_X86_H_
+#define VP9_COMMON_X86_VP9_SUBPIXEL_X86_H_
+
+/* Note:
+ *
+ * This platform is commonly built for runtime CPU detection. If you modify
+ * any of the function mappings present in this file, be sure to also update
+ * them in the function pointer initialization code
+ */
+
+#if HAVE_MMX
+extern prototype_subpixel_predict(vp9_sixtap_predict16x16_mmx);
+extern prototype_subpixel_predict(vp9_sixtap_predict8x8_mmx);
+extern prototype_subpixel_predict(vp9_sixtap_predict8x4_mmx);
+extern prototype_subpixel_predict(vp9_sixtap_predict4x4_mmx);
+extern prototype_subpixel_predict(vp9_bilinear_predict16x16_mmx);
+extern prototype_subpixel_predict(vp9_bilinear_predict8x8_mmx);
+extern prototype_subpixel_predict(vp9_bilinear_predict8x4_mmx);
+extern prototype_subpixel_predict(vp9_bilinear_predict4x4_mmx);
+
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+#undef vp9_subpix_sixtap16x16
+#define vp9_subpix_sixtap16x16 vp9_sixtap_predict16x16_mmx
+
+#undef vp9_subpix_sixtap8x8
+#define vp9_subpix_sixtap8x8 vp9_sixtap_predict8x8_mmx
+
+#undef vp9_subpix_sixtap8x4
+#define vp9_subpix_sixtap8x4 vp9_sixtap_predict8x4_mmx
+
+#undef vp9_subpix_sixtap4x4
+#define vp9_subpix_sixtap4x4 vp9_sixtap_predict4x4_mmx
+
+#undef vp9_subpix_bilinear16x16
+#define vp9_subpix_bilinear16x16 vp9_bilinear_predict16x16_mmx
+
+#undef vp9_subpix_bilinear8x8
+#define vp9_subpix_bilinear8x8 vp9_bilinear_predict8x8_mmx
+
+#undef vp9_subpix_bilinear8x4
+#define vp9_subpix_bilinear8x4 vp9_bilinear_predict8x4_mmx
+
+#undef vp9_subpix_bilinear4x4
+#define vp9_subpix_bilinear4x4 vp9_bilinear_predict4x4_mmx
+
+#endif
+#endif
+
+
+#if HAVE_SSE2
+extern prototype_subpixel_predict(vp9_sixtap_predict16x16_sse2);
+extern prototype_subpixel_predict(vp9_sixtap_predict8x8_sse2);
+extern prototype_subpixel_predict(vp9_sixtap_predict8x4_sse2);
+extern prototype_subpixel_predict(vp9_bilinear_predict16x16_sse2);
+extern prototype_subpixel_predict(vp9_bilinear_predict8x8_sse2);
+
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+#undef vp9_subpix_sixtap16x16
+#define vp9_subpix_sixtap16x16 vp9_sixtap_predict16x16_sse2
+
+#undef vp9_subpix_sixtap8x8
+#define vp9_subpix_sixtap8x8 vp9_sixtap_predict8x8_sse2
+
+#undef vp9_subpix_sixtap8x4
+#define vp9_subpix_sixtap8x4 vp9_sixtap_predict8x4_sse2
+
+#undef vp9_subpix_bilinear16x16
+#define vp9_subpix_bilinear16x16 vp9_bilinear_predict16x16_sse2
+
+#undef vp9_subpix_bilinear8x8
+#define vp9_subpix_bilinear8x8 vp9_bilinear_predict8x8_sse2
+
+#endif
+#endif
+
+#if HAVE_SSSE3
+extern prototype_subpixel_predict(vp9_sixtap_predict16x16_ssse3);
+extern prototype_subpixel_predict(vp9_sixtap_predict8x8_ssse3);
+extern prototype_subpixel_predict(vp9_sixtap_predict8x4_ssse3);
+extern prototype_subpixel_predict(vp9_sixtap_predict4x4_ssse3);
+extern prototype_subpixel_predict(vp9_bilinear_predict16x16_ssse3);
+extern prototype_subpixel_predict(vp9_bilinear_predict8x8_ssse3);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+#undef vp9_subpix_sixtap16x16
+#define vp9_subpix_sixtap16x16 vp9_sixtap_predict16x16_ssse3
+
+#undef vp9_subpix_sixtap8x8
+#define vp9_subpix_sixtap8x8 vp9_sixtap_predict8x8_ssse3
+
+#undef vp9_subpix_sixtap8x4
+#define vp9_subpix_sixtap8x4 vp9_sixtap_predict8x4_ssse3
+
+#undef vp9_subpix_sixtap4x4
+#define vp9_subpix_sixtap4x4 vp9_sixtap_predict4x4_ssse3
+
+
+#undef vp9_subpix_bilinear16x16
+#define vp9_subpix_bilinear16x16 vp9_bilinear_predict16x16_ssse3
+
+#undef vp9_subpix_bilinear8x8
+#define vp9_subpix_bilinear8x8 vp9_bilinear_predict8x8_ssse3
+
+#endif
+#endif
+
+
+
+#endif
diff --git a/vp9/decoder/vp9_asm_dec_offsets.c b/vp9/decoder/vp9_asm_dec_offsets.c
new file mode 100644
index 0000000..e4b9c97
--- /dev/null
+++ b/vp9/decoder/vp9_asm_dec_offsets.c
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/asm_offsets.h"
+#include "vp9/decoder/vp9_onyxd_int.h"
+
+BEGIN
+
+END
+
+/* add asserts for any offset that is not supported by assembly code */
+/* add asserts for any size that is not supported by assembly code */
diff --git a/vp9/decoder/vp9_dboolhuff.c b/vp9/decoder/vp9_dboolhuff.c
new file mode 100644
index 0000000..5f1ef04
--- /dev/null
+++ b/vp9/decoder/vp9_dboolhuff.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/decoder/vp9_dboolhuff.h"
+#include "vpx_ports/mem.h"
+#include "vpx_mem/vpx_mem.h"
+
+int vp9_start_decode(BOOL_DECODER *br,
+ const unsigned char *source,
+ unsigned int source_sz) {
+ br->user_buffer_end = source + source_sz;
+ br->user_buffer = source;
+ br->value = 0;
+ br->count = -8;
+ br->range = 255;
+
+ if (source_sz && !source)
+ return 1;
+
+ /* Populate the buffer */
+ vp9_bool_decoder_fill(br);
+
+ return 0;
+}
+
+
+void vp9_bool_decoder_fill(BOOL_DECODER *br) {
+ const unsigned char *bufptr;
+ const unsigned char *bufend;
+ VP9_BD_VALUE value;
+ int count;
+ bufend = br->user_buffer_end;
+ bufptr = br->user_buffer;
+ value = br->value;
+ count = br->count;
+
+ VP9DX_BOOL_DECODER_FILL(count, value, bufptr, bufend);
+
+ br->user_buffer = bufptr;
+ br->value = value;
+ br->count = count;
+}
+
+
+static int get_unsigned_bits(unsigned num_values) {
+ int cat = 0;
+ if ((num_values--) <= 1) return 0;
+ while (num_values > 0) {
+ cat++;
+ num_values >>= 1;
+ }
+ return cat;
+}
+
+int vp9_inv_recenter_nonneg(int v, int m) {
+ if (v > (m << 1)) return v;
+ else if ((v & 1) == 0) return (v >> 1) + m;
+ else return m - ((v + 1) >> 1);
+}
+
+int vp9_decode_uniform(BOOL_DECODER *br, int n) {
+ int v;
+ int l = get_unsigned_bits(n);
+ int m = (1 << l) - n;
+ if (!l) return 0;
+ v = decode_value(br, l - 1);
+ if (v < m)
+ return v;
+ else
+ return (v << 1) - m + decode_value(br, 1);
+}
+
+int vp9_decode_term_subexp(BOOL_DECODER *br, int k, int num_syms) {
+ int i = 0, mk = 0, word;
+ while (1) {
+ int b = (i ? k + i - 1 : k);
+ int a = (1 << b);
+ if (num_syms <= mk + 3 * a) {
+ word = vp9_decode_uniform(br, num_syms - mk) + mk;
+ break;
+ } else {
+ if (decode_value(br, 1)) {
+ i++;
+ mk += a;
+ } else {
+ word = decode_value(br, b) + mk;
+ break;
+ }
+ }
+ }
+ return word;
+}
+
+int vp9_decode_unsigned_max(BOOL_DECODER *br, int max) {
+ int data = 0, bit = 0, lmax = max;
+
+ while (lmax) {
+ data |= decode_bool(br, 128) << bit++;
+ lmax >>= 1;
+ }
+ if (data > max)
+ return max;
+ return data;
+}
diff --git a/vp9/decoder/vp9_dboolhuff.h b/vp9/decoder/vp9_dboolhuff.h
new file mode 100644
index 0000000..635bd5b
--- /dev/null
+++ b/vp9/decoder/vp9_dboolhuff.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_DECODER_VP9_DBOOLHUFF_H_
+#define VP9_DECODER_VP9_DBOOLHUFF_H_
+#include <stddef.h>
+#include <limits.h>
+#include "./vpx_config.h"
+#include "vpx_ports/mem.h"
+#include "vpx/vpx_integer.h"
+
+typedef size_t VP9_BD_VALUE;
+
+# define VP9_BD_VALUE_SIZE ((int)sizeof(VP9_BD_VALUE)*CHAR_BIT)
+/*This is meant to be a large, positive constant that can still be efficiently
+ loaded as an immediate (on platforms like ARM, for example).
+ Even relatively modest values like 100 would work fine.*/
+# define VP9_LOTS_OF_BITS (0x40000000)
+
+typedef struct {
+ const unsigned char *user_buffer_end;
+ const unsigned char *user_buffer;
+ VP9_BD_VALUE value;
+ int count;
+ unsigned int range;
+} BOOL_DECODER;
+
+DECLARE_ALIGNED(16, extern const unsigned char, vp9_norm[256]);
+
+int vp9_start_decode(BOOL_DECODER *br,
+ const unsigned char *source,
+ unsigned int source_sz);
+
+void vp9_bool_decoder_fill(BOOL_DECODER *br);
+
+int vp9_decode_uniform(BOOL_DECODER *br, int n);
+int vp9_decode_term_subexp(BOOL_DECODER *br, int k, int num_syms);
+int vp9_inv_recenter_nonneg(int v, int m);
+
+/*The refill loop is used in several places, so define it in a macro to make
+ sure they're all consistent.
+ An inline function would be cleaner, but has a significant penalty, because
+ multiple BOOL_DECODER fields must be modified, and the compiler is not smart
+ enough to eliminate the stores to those fields and the subsequent reloads
+ from them when inlining the function.*/
+#define VP9DX_BOOL_DECODER_FILL(_count,_value,_bufptr,_bufend) \
+ do \
+ { \
+ int shift = VP9_BD_VALUE_SIZE - 8 - ((_count) + 8); \
+ int loop_end, x; \
+ int bits_left = (int)(((_bufend)-(_bufptr))*CHAR_BIT); \
+ \
+ x = shift + CHAR_BIT - bits_left; \
+ loop_end = 0; \
+ if(x >= 0) \
+ { \
+ (_count) += VP9_LOTS_OF_BITS; \
+ loop_end = x; \
+ if(!bits_left) break; \
+ } \
+ while(shift >= loop_end) \
+ { \
+ (_count) += CHAR_BIT; \
+ (_value) |= (VP9_BD_VALUE)*(_bufptr)++ << shift; \
+ shift -= CHAR_BIT; \
+ } \
+ } \
+ while(0) \
+
+
+static int decode_bool(BOOL_DECODER *br, int probability) {
+ unsigned int bit = 0;
+ VP9_BD_VALUE value;
+ unsigned int split;
+ VP9_BD_VALUE bigsplit;
+ int count;
+ unsigned int range;
+
+ split = 1 + (((br->range - 1) * probability) >> 8);
+
+ if (br->count < 0)
+ vp9_bool_decoder_fill(br);
+
+ value = br->value;
+ count = br->count;
+
+ bigsplit = (VP9_BD_VALUE)split << (VP9_BD_VALUE_SIZE - 8);
+
+ range = split;
+
+ if (value >= bigsplit) {
+ range = br->range - split;
+ value = value - bigsplit;
+ bit = 1;
+ }
+
+ {
+ register unsigned int shift = vp9_norm[range];
+ range <<= shift;
+ value <<= shift;
+ count -= shift;
+ }
+ br->value = value;
+ br->count = count;
+ br->range = range;
+
+ return bit;
+}
+
+static int decode_value(BOOL_DECODER *br, int bits) {
+ int z = 0;
+ int bit;
+
+ for (bit = bits - 1; bit >= 0; bit--) {
+ z |= (decode_bool(br, 0x80) << bit);
+ }
+
+ return z;
+}
+
+static int bool_error(BOOL_DECODER *br) {
+ /* Check if we have reached the end of the buffer.
+ *
+ * Variable 'count' stores the number of bits in the 'value' buffer, minus
+ * 8. The top byte is part of the algorithm, and the remainder is buffered
+ * to be shifted into it. So if count == 8, the top 16 bits of 'value' are
+ * occupied, 8 for the algorithm and 8 in the buffer.
+ *
+ * When reading a byte from the user's buffer, count is filled with 8 and
+ * one byte is filled into the value buffer. When we reach the end of the
+ * data, count is additionally filled with VP9_LOTS_OF_BITS. So when
+ * count == VP9_LOTS_OF_BITS - 1, the user's data has been exhausted.
+ */
+ if ((br->count > VP9_BD_VALUE_SIZE) && (br->count < VP9_LOTS_OF_BITS)) {
+ /* We have tried to decode bits after the end of
+ * stream was encountered.
+ */
+ return 1;
+ }
+
+ /* No error. */
+ return 0;
+}
+
+extern int vp9_decode_unsigned_max(BOOL_DECODER *br, int max);
+
+#endif
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
new file mode 100644
index 0000000..7e53884
--- /dev/null
+++ b/vp9/decoder/vp9_decodemv.c
@@ -0,0 +1,1262 @@
+/*
+ Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/decoder/vp9_treereader.h"
+#include "vp9/common/vp9_entropymv.h"
+#include "vp9/common/vp9_entropymode.h"
+#include "vp9/decoder/vp9_onyxd_int.h"
+#include "vp9/common/vp9_findnearmv.h"
+
+#include "vp9/common/vp9_seg_common.h"
+#include "vp9/common/vp9_pred_common.h"
+#include "vp9/common/vp9_entropy.h"
+#include "vp9/decoder/vp9_decodemv.h"
+#include "vp9/common/vp9_mvref_common.h"
+#if CONFIG_DEBUG
+#include <assert.h>
+#endif
+
+// #define DEBUG_DEC_MV
+#ifdef DEBUG_DEC_MV
+int dec_mvcount = 0;
+#endif
+// #define DEC_DEBUG
+#ifdef DEC_DEBUG
+extern int dec_debug;
+#endif
+
+static int read_bmode(vp9_reader *bc, const vp9_prob *p) {
+ B_PREDICTION_MODE m = treed_read(bc, vp9_bmode_tree, p);
+#if CONFIG_NEWBINTRAMODES
+ if (m == B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS)
+ m = B_CONTEXT_PRED;
+ assert(m < B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS || m == B_CONTEXT_PRED);
+#endif
+ return m;
+}
+
+static int read_kf_bmode(vp9_reader *bc, const vp9_prob *p) {
+ return treed_read(bc, vp9_kf_bmode_tree, p);
+}
+
+static int read_ymode(vp9_reader *bc, const vp9_prob *p) {
+ return treed_read(bc, vp9_ymode_tree, p);
+}
+
+#if CONFIG_SUPERBLOCKS
+static int read_sb_ymode(vp9_reader *bc, const vp9_prob *p) {
+ return treed_read(bc, vp9_sb_ymode_tree, p);
+}
+
+static int read_kf_sb_ymode(vp9_reader *bc, const vp9_prob *p) {
+ return treed_read(bc, vp9_uv_mode_tree, p);
+}
+#endif
+
+static int read_kf_mb_ymode(vp9_reader *bc, const vp9_prob *p) {
+ return treed_read(bc, vp9_kf_ymode_tree, p);
+}
+
+static int read_i8x8_mode(vp9_reader *bc, const vp9_prob *p) {
+ return treed_read(bc, vp9_i8x8_mode_tree, p);
+}
+
+static int read_uv_mode(vp9_reader *bc, const vp9_prob *p) {
+ return treed_read(bc, vp9_uv_mode_tree, p);
+}
+
+// This function reads the current macro block's segnent id from the bitstream
+// It should only be called if a segment map update is indicated.
+static void read_mb_segid(vp9_reader *r, MB_MODE_INFO *mi,
+ MACROBLOCKD *xd) {
+ /* Is segmentation enabled */
+ if (xd->segmentation_enabled && xd->update_mb_segmentation_map) {
+ /* If so then read the segment id. */
+ if (vp9_read(r, xd->mb_segment_tree_probs[0]))
+ mi->segment_id =
+ (unsigned char)(2 + vp9_read(r, xd->mb_segment_tree_probs[2]));
+ else
+ mi->segment_id =
+ (unsigned char)(vp9_read(r, xd->mb_segment_tree_probs[1]));
+ }
+}
+
+#if CONFIG_NEW_MVREF
+int vp9_read_mv_ref_id(vp9_reader *r,
+ vp9_prob * ref_id_probs) {
+ int ref_index = 0;
+
+ if (vp9_read(r, ref_id_probs[0])) {
+ ref_index++;
+ if (vp9_read(r, ref_id_probs[1])) {
+ ref_index++;
+ if (vp9_read(r, ref_id_probs[2]))
+ ref_index++;
+ }
+ }
+ return ref_index;
+}
+#endif
+
+extern const int vp9_i8x8_block[4];
+static void kfread_modes(VP9D_COMP *pbi,
+ MODE_INFO *m,
+ int mb_row,
+ int mb_col,
+ BOOL_DECODER* const bc) {
+ VP9_COMMON *const cm = &pbi->common;
+ const int mis = pbi->common.mode_info_stride;
+ int map_index = mb_row * pbi->common.mb_cols + mb_col;
+ MB_PREDICTION_MODE y_mode;
+
+ // Read the Macroblock segmentation map if it is being updated explicitly
+ // this frame (reset to 0 by default).
+ m->mbmi.segment_id = 0;
+ if (pbi->mb.update_mb_segmentation_map) {
+ read_mb_segid(bc, &m->mbmi, &pbi->mb);
+ pbi->common.last_frame_seg_map[map_index] = m->mbmi.segment_id;
+ }
+
+ m->mbmi.mb_skip_coeff = 0;
+ if (pbi->common.mb_no_coeff_skip &&
+ (!vp9_segfeature_active(&pbi->mb,
+ m->mbmi.segment_id, SEG_LVL_EOB) ||
+ (vp9_get_segdata(&pbi->mb,
+ m->mbmi.segment_id, SEG_LVL_EOB) != 0))) {
+ MACROBLOCKD *const xd = &pbi->mb;
+ m->mbmi.mb_skip_coeff =
+ vp9_read(bc, vp9_get_pred_prob(cm, xd, PRED_MBSKIP));
+ } else {
+ if (vp9_segfeature_active(&pbi->mb,
+ m->mbmi.segment_id, SEG_LVL_EOB) &&
+ (vp9_get_segdata(&pbi->mb,
+ m->mbmi.segment_id, SEG_LVL_EOB) == 0)) {
+ m->mbmi.mb_skip_coeff = 1;
+ } else
+ m->mbmi.mb_skip_coeff = 0;
+ }
+
+#if CONFIG_SUPERBLOCKS
+ if (m->mbmi.encoded_as_sb) {
+ y_mode = (MB_PREDICTION_MODE) read_kf_sb_ymode(bc,
+ pbi->common.sb_kf_ymode_prob[pbi->common.kf_ymode_probs_index]);
+ } else
+#endif
+ y_mode = (MB_PREDICTION_MODE) read_kf_mb_ymode(bc,
+ pbi->common.kf_ymode_prob[pbi->common.kf_ymode_probs_index]);
+#if CONFIG_COMP_INTRA_PRED
+ m->mbmi.second_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
+#endif
+
+ m->mbmi.ref_frame = INTRA_FRAME;
+
+ if ((m->mbmi.mode = y_mode) == B_PRED) {
+ int i = 0;
+#if CONFIG_COMP_INTRA_PRED
+ int use_comp_pred = vp9_read(bc, DEFAULT_COMP_INTRA_PROB);
+#endif
+ do {
+ const B_PREDICTION_MODE A = above_block_mode(m, i, mis);
+ const B_PREDICTION_MODE L = left_block_mode(m, i);
+
+ m->bmi[i].as_mode.first =
+ (B_PREDICTION_MODE) read_kf_bmode(
+ bc, pbi->common.kf_bmode_prob [A] [L]);
+#if CONFIG_COMP_INTRA_PRED
+ if (use_comp_pred) {
+ m->bmi[i].as_mode.second =
+ (B_PREDICTION_MODE) read_kf_bmode(
+ bc, pbi->common.kf_bmode_prob [A] [L]);
+ } else {
+ m->bmi[i].as_mode.second = (B_PREDICTION_MODE)(B_DC_PRED - 1);
+ }
+#endif
+ } while (++i < 16);
+ }
+ if ((m->mbmi.mode = y_mode) == I8X8_PRED) {
+ int i;
+ int mode8x8;
+ for (i = 0; i < 4; i++) {
+ int ib = vp9_i8x8_block[i];
+ mode8x8 = read_i8x8_mode(bc, pbi->common.fc.i8x8_mode_prob);
+ m->bmi[ib + 0].as_mode.first = mode8x8;
+ m->bmi[ib + 1].as_mode.first = mode8x8;
+ m->bmi[ib + 4].as_mode.first = mode8x8;
+ m->bmi[ib + 5].as_mode.first = mode8x8;
+#if CONFIG_COMP_INTRA_PRED
+ m->bmi[ib + 0].as_mode.second = (MB_PREDICTION_MODE)(DC_PRED - 1);
+ m->bmi[ib + 1].as_mode.second = (MB_PREDICTION_MODE)(DC_PRED - 1);
+ m->bmi[ib + 4].as_mode.second = (MB_PREDICTION_MODE)(DC_PRED - 1);
+ m->bmi[ib + 5].as_mode.second = (MB_PREDICTION_MODE)(DC_PRED - 1);
+#endif
+ }
+ } else
+ m->mbmi.uv_mode = (MB_PREDICTION_MODE)read_uv_mode(bc,
+ pbi->common.kf_uv_mode_prob[m->mbmi.mode]);
+#if CONFIG_COMP_INTRA_PRED
+ m->mbmi.second_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
+#endif
+
+ if (cm->txfm_mode == TX_MODE_SELECT && m->mbmi.mb_skip_coeff == 0 &&
+ m->mbmi.mode <= I8X8_PRED) {
+ // FIXME(rbultje) code ternary symbol once all experiments are merged
+ m->mbmi.txfm_size = vp9_read(bc, cm->prob_tx[0]);
+ if (m->mbmi.txfm_size != TX_4X4 && m->mbmi.mode != I8X8_PRED)
+ m->mbmi.txfm_size += vp9_read(bc, cm->prob_tx[1]);
+ } else if (cm->txfm_mode >= ALLOW_16X16 && m->mbmi.mode <= TM_PRED) {
+ m->mbmi.txfm_size = TX_16X16;
+ } else if (cm->txfm_mode >= ALLOW_8X8 && m->mbmi.mode != B_PRED) {
+ m->mbmi.txfm_size = TX_8X8;
+ } else {
+ m->mbmi.txfm_size = TX_4X4;
+ }
+}
+
+static int read_nmv_component(vp9_reader *r,
+ int rv,
+ const nmv_component *mvcomp) {
+ int v, s, z, c, o, d;
+ s = vp9_read(r, mvcomp->sign);
+ c = treed_read(r, vp9_mv_class_tree, mvcomp->classes);
+ if (c == MV_CLASS_0) {
+ d = treed_read(r, vp9_mv_class0_tree, mvcomp->class0);
+ } else {
+ int i, b;
+ d = 0;
+ b = c + CLASS0_BITS - 1; /* number of bits */
+ for (i = 0; i < b; ++i)
+ d |= (vp9_read(r, mvcomp->bits[i]) << i);
+ }
+ o = d << 3;
+
+ z = vp9_get_mv_mag(c, o);
+ v = (s ? -(z + 8) : (z + 8));
+ return v;
+}
+
+static int read_nmv_component_fp(vp9_reader *r,
+ int v,
+ int rv,
+ const nmv_component *mvcomp,
+ int usehp) {
+ int s, z, c, o, d, e, f;
+ s = v < 0;
+ z = (s ? -v : v) - 1; /* magnitude - 1 */
+ z &= ~7;
+
+ c = vp9_get_mv_class(z, &o);
+ d = o >> 3;
+
+ if (c == MV_CLASS_0) {
+ f = treed_read(r, vp9_mv_fp_tree, mvcomp->class0_fp[d]);
+ } else {
+ f = treed_read(r, vp9_mv_fp_tree, mvcomp->fp);
+ }
+ o += (f << 1);
+
+ if (usehp) {
+ if (c == MV_CLASS_0) {
+ e = vp9_read(r, mvcomp->class0_hp);
+ } else {
+ e = vp9_read(r, mvcomp->hp);
+ }
+ o += e;
+ } else {
+ ++o; /* Note if hp is not used, the default value of the hp bit is 1 */
+ }
+ z = vp9_get_mv_mag(c, o);
+ v = (s ? -(z + 1) : (z + 1));
+ return v;
+}
+
+static void read_nmv(vp9_reader *r, MV *mv, const MV *ref,
+ const nmv_context *mvctx) {
+ MV_JOINT_TYPE j = treed_read(r, vp9_mv_joint_tree, mvctx->joints);
+ mv->row = mv-> col = 0;
+ if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) {
+ mv->row = read_nmv_component(r, ref->row, &mvctx->comps[0]);
+ }
+ if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) {
+ mv->col = read_nmv_component(r, ref->col, &mvctx->comps[1]);
+ }
+}
+
+static void read_nmv_fp(vp9_reader *r, MV *mv, const MV *ref,
+ const nmv_context *mvctx, int usehp) {
+ MV_JOINT_TYPE j = vp9_get_mv_joint(*mv);
+ usehp = usehp && vp9_use_nmv_hp(ref);
+ if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) {
+ mv->row = read_nmv_component_fp(r, mv->row, ref->row, &mvctx->comps[0],
+ usehp);
+ }
+ if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) {
+ mv->col = read_nmv_component_fp(r, mv->col, ref->col, &mvctx->comps[1],
+ usehp);
+ }
+ //printf(" %d: %d %d ref: %d %d\n", usehp, mv->row, mv-> col, ref->row, ref->col);
+}
+
+static void update_nmv(vp9_reader *bc, vp9_prob *const p,
+ const vp9_prob upd_p) {
+ if (vp9_read(bc, upd_p)) {
+#ifdef LOW_PRECISION_MV_UPDATE
+ *p = (vp9_read_literal(bc, 7) << 1) | 1;
+#else
+ *p = (vp9_read_literal(bc, 8));
+#endif
+ }
+}
+
+static void read_nmvprobs(vp9_reader *bc, nmv_context *mvctx,
+ int usehp) {
+ int i, j, k;
+#ifdef MV_GROUP_UPDATE
+ if (!vp9_read_bit(bc)) return;
+#endif
+ for (j = 0; j < MV_JOINTS - 1; ++j) {
+ update_nmv(bc, &mvctx->joints[j],
+ VP9_NMV_UPDATE_PROB);
+ }
+ for (i = 0; i < 2; ++i) {
+ update_nmv(bc, &mvctx->comps[i].sign,
+ VP9_NMV_UPDATE_PROB);
+ for (j = 0; j < MV_CLASSES - 1; ++j) {
+ update_nmv(bc, &mvctx->comps[i].classes[j],
+ VP9_NMV_UPDATE_PROB);
+ }
+ for (j = 0; j < CLASS0_SIZE - 1; ++j) {
+ update_nmv(bc, &mvctx->comps[i].class0[j],
+ VP9_NMV_UPDATE_PROB);
+ }
+ for (j = 0; j < MV_OFFSET_BITS; ++j) {
+ update_nmv(bc, &mvctx->comps[i].bits[j],
+ VP9_NMV_UPDATE_PROB);
+ }
+ }
+
+ for (i = 0; i < 2; ++i) {
+ for (j = 0; j < CLASS0_SIZE; ++j) {
+ for (k = 0; k < 3; ++k)
+ update_nmv(bc, &mvctx->comps[i].class0_fp[j][k],
+ VP9_NMV_UPDATE_PROB);
+ }
+ for (j = 0; j < 3; ++j) {
+ update_nmv(bc, &mvctx->comps[i].fp[j],
+ VP9_NMV_UPDATE_PROB);
+ }
+ }
+
+ if (usehp) {
+ for (i = 0; i < 2; ++i) {
+ update_nmv(bc, &mvctx->comps[i].class0_hp,
+ VP9_NMV_UPDATE_PROB);
+ update_nmv(bc, &mvctx->comps[i].hp,
+ VP9_NMV_UPDATE_PROB);
+ }
+ }
+}
+
+// Read the referncence frame
+static MV_REFERENCE_FRAME read_ref_frame(VP9D_COMP *pbi,
+ vp9_reader *const bc,
+ unsigned char segment_id) {
+ MV_REFERENCE_FRAME ref_frame;
+ int seg_ref_active;
+ int seg_ref_count = 0;
+
+ VP9_COMMON *const cm = &pbi->common;
+ MACROBLOCKD *const xd = &pbi->mb;
+
+ seg_ref_active = vp9_segfeature_active(xd,
+ segment_id,
+ SEG_LVL_REF_FRAME);
+
+ // If segment coding enabled does the segment allow for more than one
+ // possible reference frame
+ if (seg_ref_active) {
+ seg_ref_count = vp9_check_segref(xd, segment_id, INTRA_FRAME) +
+ vp9_check_segref(xd, segment_id, LAST_FRAME) +
+ vp9_check_segref(xd, segment_id, GOLDEN_FRAME) +
+ vp9_check_segref(xd, segment_id, ALTREF_FRAME);
+ }
+
+ // Segment reference frame features not available or allows for
+ // multiple reference frame options
+ if (!seg_ref_active || (seg_ref_count > 1)) {
+ // Values used in prediction model coding
+ unsigned char prediction_flag;
+ vp9_prob pred_prob;
+ MV_REFERENCE_FRAME pred_ref;
+
+ // Get the context probability the prediction flag
+ pred_prob = vp9_get_pred_prob(cm, xd, PRED_REF);
+
+ // Read the prediction status flag
+ prediction_flag = (unsigned char)vp9_read(bc, pred_prob);
+
+ // Store the prediction flag.
+ vp9_set_pred_flag(xd, PRED_REF, prediction_flag);
+
+ // Get the predicted reference frame.
+ pred_ref = vp9_get_pred_ref(cm, xd);
+
+ // If correctly predicted then use the predicted value
+ if (prediction_flag) {
+ ref_frame = pred_ref;
+ }
+ // else decode the explicitly coded value
+ else {
+ vp9_prob mod_refprobs[PREDICTION_PROBS];
+ vpx_memcpy(mod_refprobs,
+ cm->mod_refprobs[pred_ref], sizeof(mod_refprobs));
+
+ // If segment coding enabled blank out options that cant occur by
+ // setting the branch probability to 0.
+ if (seg_ref_active) {
+ mod_refprobs[INTRA_FRAME] *=
+ vp9_check_segref(xd, segment_id, INTRA_FRAME);
+ mod_refprobs[LAST_FRAME] *=
+ vp9_check_segref(xd, segment_id, LAST_FRAME);
+ mod_refprobs[GOLDEN_FRAME] *=
+ (vp9_check_segref(xd, segment_id, GOLDEN_FRAME) *
+ vp9_check_segref(xd, segment_id, ALTREF_FRAME));
+ }
+
+ // Default to INTRA_FRAME (value 0)
+ ref_frame = INTRA_FRAME;
+
+ // Do we need to decode the Intra/Inter branch
+ if (mod_refprobs[0])
+ ref_frame = (MV_REFERENCE_FRAME) vp9_read(bc, mod_refprobs[0]);
+ else
+ ref_frame++;
+
+ if (ref_frame) {
+ // Do we need to decode the Last/Gf_Arf branch
+ if (mod_refprobs[1])
+ ref_frame += vp9_read(bc, mod_refprobs[1]);
+ else
+ ref_frame++;
+
+ if (ref_frame > 1) {
+ // Do we need to decode the GF/Arf branch
+ if (mod_refprobs[2])
+ ref_frame += vp9_read(bc, mod_refprobs[2]);
+ else {
+ if (seg_ref_active) {
+ if ((pred_ref == GOLDEN_FRAME) ||
+ !vp9_check_segref(xd, segment_id, GOLDEN_FRAME)) {
+ ref_frame = ALTREF_FRAME;
+ } else
+ ref_frame = GOLDEN_FRAME;
+ } else
+ ref_frame = (pred_ref == GOLDEN_FRAME)
+ ? ALTREF_FRAME : GOLDEN_FRAME;
+ }
+ }
+ }
+ }
+ }
+
+ // Segment reference frame features are enabled
+ else {
+ // The reference frame for the mb is considered as correclty predicted
+ // if it is signaled at the segment level for the purposes of the
+ // common prediction model
+ vp9_set_pred_flag(xd, PRED_REF, 1);
+ ref_frame = vp9_get_pred_ref(cm, xd);
+ }
+
+ return (MV_REFERENCE_FRAME)ref_frame;
+}
+
+#if CONFIG_SUPERBLOCKS
+static MB_PREDICTION_MODE read_sb_mv_ref(vp9_reader *bc, const vp9_prob *p) {
+ return (MB_PREDICTION_MODE) treed_read(bc, vp9_sb_mv_ref_tree, p);
+}
+#endif
+
+static MB_PREDICTION_MODE read_mv_ref(vp9_reader *bc, const vp9_prob *p) {
+ return (MB_PREDICTION_MODE) treed_read(bc, vp9_mv_ref_tree, p);
+}
+
+static B_PREDICTION_MODE sub_mv_ref(vp9_reader *bc, const vp9_prob *p) {
+ return (B_PREDICTION_MODE) treed_read(bc, vp9_sub_mv_ref_tree, p);
+}
+
+#ifdef VPX_MODE_COUNT
+unsigned int vp9_mv_cont_count[5][4] = {
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 }
+};
+#endif
+
+static const unsigned char mbsplit_fill_count[4] = {8, 8, 4, 1};
+static const unsigned char mbsplit_fill_offset[4][16] = {
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+ { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15},
+ { 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15},
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
+};
+
+static void read_switchable_interp_probs(VP9D_COMP* const pbi,
+ BOOL_DECODER* const bc) {
+ VP9_COMMON *const cm = &pbi->common;
+ int i, j;
+ for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j) {
+ for (i = 0; i < VP9_SWITCHABLE_FILTERS - 1; ++i) {
+ cm->fc.switchable_interp_prob[j][i] = vp9_read_literal(bc, 8);
+ }
+ }
+ //printf("DECODER: %d %d\n", cm->fc.switchable_interp_prob[0],
+ //cm->fc.switchable_interp_prob[1]);
+}
+
+static void mb_mode_mv_init(VP9D_COMP *pbi, vp9_reader *bc) {
+ VP9_COMMON *const cm = &pbi->common;
+ nmv_context *const nmvc = &pbi->common.fc.nmvc;
+ MACROBLOCKD *const xd = &pbi->mb;
+
+ if (cm->frame_type == KEY_FRAME) {
+ if (!cm->kf_ymode_probs_update)
+ cm->kf_ymode_probs_index = vp9_read_literal(bc, 3);
+ } else {
+#if CONFIG_PRED_FILTER
+ cm->pred_filter_mode = (vp9_prob)vp9_read_literal(bc, 2);
+
+ if (cm->pred_filter_mode == 2)
+ cm->prob_pred_filter_off = (vp9_prob)vp9_read_literal(bc, 8);
+#endif
+ if (cm->mcomp_filter_type == SWITCHABLE)
+ read_switchable_interp_probs(pbi, bc);
+#if CONFIG_COMP_INTERINTRA_PRED
+ if (cm->use_interintra) {
+ if (vp9_read(bc, VP9_UPD_INTERINTRA_PROB))
+ cm->fc.interintra_prob = (vp9_prob)vp9_read_literal(bc, 8);
+ }
+#endif
+ // Decode the baseline probabilities for decoding reference frame
+ cm->prob_intra_coded = (vp9_prob)vp9_read_literal(bc, 8);
+ cm->prob_last_coded = (vp9_prob)vp9_read_literal(bc, 8);
+ cm->prob_gf_coded = (vp9_prob)vp9_read_literal(bc, 8);
+
+ // Computes a modified set of probabilities for use when reference
+ // frame prediction fails.
+ vp9_compute_mod_refprobs(cm);
+
+ pbi->common.comp_pred_mode = vp9_read(bc, 128);
+ if (cm->comp_pred_mode)
+ cm->comp_pred_mode += vp9_read(bc, 128);
+ if (cm->comp_pred_mode == HYBRID_PREDICTION) {
+ int i;
+ for (i = 0; i < COMP_PRED_CONTEXTS; i++)
+ cm->prob_comppred[i] = (vp9_prob)vp9_read_literal(bc, 8);
+ }
+
+ if (vp9_read_bit(bc)) {
+ int i = 0;
+
+ do {
+ cm->fc.ymode_prob[i] = (vp9_prob) vp9_read_literal(bc, 8);
+ } while (++i < VP9_YMODES - 1);
+ }
+
+#if CONFIG_SUPERBLOCKS
+ if (vp9_read_bit(bc)) {
+ int i = 0;
+
+ do {
+ cm->fc.sb_ymode_prob[i] = (vp9_prob) vp9_read_literal(bc, 8);
+ } while (++i < VP9_I32X32_MODES - 1);
+ }
+#endif
+
+#if CONFIG_NEW_MVREF
+ // Temp defaults probabilities for ecnoding the MV ref id signal
+ vpx_memset(xd->mb_mv_ref_id_probs, 192, sizeof(xd->mb_mv_ref_id_probs));
+#endif
+
+ read_nmvprobs(bc, nmvc, xd->allow_high_precision_mv);
+ }
+}
+
+// This function either reads the segment id for the current macroblock from
+// the bitstream or if the value is temporally predicted asserts the predicted
+// value
+static void read_mb_segment_id(VP9D_COMP *pbi,
+ int mb_row, int mb_col,
+ BOOL_DECODER* const bc) {
+ VP9_COMMON *const cm = &pbi->common;
+ MACROBLOCKD *const xd = &pbi->mb;
+ MODE_INFO *mi = xd->mode_info_context;
+ MB_MODE_INFO *mbmi = &mi->mbmi;
+ int index = mb_row * pbi->common.mb_cols + mb_col;
+
+ if (xd->segmentation_enabled) {
+ if (xd->update_mb_segmentation_map) {
+ // Is temporal coding of the segment id for this mb enabled.
+ if (cm->temporal_update) {
+ // Get the context based probability for reading the
+ // prediction status flag
+ vp9_prob pred_prob =
+ vp9_get_pred_prob(cm, xd, PRED_SEG_ID);
+
+ // Read the prediction status flag
+ unsigned char seg_pred_flag =
+ (unsigned char)vp9_read(bc, pred_prob);
+
+ // Store the prediction flag.
+ vp9_set_pred_flag(xd, PRED_SEG_ID, seg_pred_flag);
+
+ // If the value is flagged as correctly predicted
+ // then use the predicted value
+ if (seg_pred_flag) {
+ mbmi->segment_id = vp9_get_pred_mb_segid(cm, xd, index);
+ }
+ // Else .... decode it explicitly
+ else {
+ read_mb_segid(bc, mbmi, xd);
+ }
+ }
+ // Normal unpredicted coding mode
+ else {
+ read_mb_segid(bc, mbmi, xd);
+ }
+#if CONFIG_SUPERBLOCKS
+ if (mbmi->encoded_as_sb) {
+ cm->last_frame_seg_map[index] = mbmi->segment_id;
+ if (mb_col + 1 < cm->mb_cols)
+ cm->last_frame_seg_map[index + 1] = mbmi->segment_id;
+ if (mb_row + 1 < cm->mb_rows) {
+ cm->last_frame_seg_map[index + cm->mb_cols] = mbmi->segment_id;
+ if (mb_col + 1 < cm->mb_cols)
+ cm->last_frame_seg_map[index + cm->mb_cols + 1] = mbmi->segment_id;
+ }
+ } else
+#endif
+ {
+ cm->last_frame_seg_map[index] = mbmi->segment_id;
+ }
+ } else {
+#if CONFIG_SUPERBLOCKS
+ if (mbmi->encoded_as_sb) {
+ mbmi->segment_id = cm->last_frame_seg_map[index];
+ if (mb_col < cm->mb_cols - 1)
+ mbmi->segment_id = mbmi->segment_id &&
+ cm->last_frame_seg_map[index + 1];
+ if (mb_row < cm->mb_rows - 1) {
+ mbmi->segment_id = mbmi->segment_id &&
+ cm->last_frame_seg_map[index + cm->mb_cols];
+ if (mb_col < cm->mb_cols - 1)
+ mbmi->segment_id = mbmi->segment_id &&
+ cm->last_frame_seg_map[index + cm->mb_cols + 1];
+ }
+ } else
+#endif
+ {
+ mbmi->segment_id = cm->last_frame_seg_map[index];
+ }
+ }
+ } else {
+ // The encoder explicitly sets the segment_id to 0
+ // when segmentation is disabled
+ mbmi->segment_id = 0;
+ }
+}
+
+static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
+ MODE_INFO *prev_mi,
+ int mb_row, int mb_col,
+ BOOL_DECODER* const bc) {
+ VP9_COMMON *const cm = &pbi->common;
+ nmv_context *const nmvc = &pbi->common.fc.nmvc;
+ const int mis = pbi->common.mode_info_stride;
+ MACROBLOCKD *const xd = &pbi->mb;
+
+ int_mv *const mv = &mbmi->mv[0];
+ int mb_to_left_edge;
+ int mb_to_right_edge;
+ int mb_to_top_edge;
+ int mb_to_bottom_edge;
+
+ mb_to_top_edge = xd->mb_to_top_edge;
+ mb_to_bottom_edge = xd->mb_to_bottom_edge;
+ mb_to_top_edge -= LEFT_TOP_MARGIN;
+ mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN;
+ mbmi->need_to_clamp_mvs = 0;
+ mbmi->need_to_clamp_secondmv = 0;
+ mbmi->second_ref_frame = NONE;
+ /* Distance of Mb to the various image edges.
+ * These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
+ */
+ xd->mb_to_left_edge =
+ mb_to_left_edge = -((mb_col * 16) << 3);
+ mb_to_left_edge -= LEFT_TOP_MARGIN;
+
+#if CONFIG_SUPERBLOCKS
+ if (mi->mbmi.encoded_as_sb) {
+ xd->mb_to_right_edge =
+ mb_to_right_edge = ((pbi->common.mb_cols - 2 - mb_col) * 16) << 3;
+ } else {
+#endif
+ xd->mb_to_right_edge =
+ mb_to_right_edge = ((pbi->common.mb_cols - 1 - mb_col) * 16) << 3;
+#if CONFIG_SUPERBLOCKS
+ }
+#endif
+ mb_to_right_edge += RIGHT_BOTTOM_MARGIN;
+
+ // Make sure the MACROBLOCKD mode info pointer is pointed at the
+ // correct entry for the current macroblock.
+ xd->mode_info_context = mi;
+ xd->prev_mode_info_context = prev_mi;
+
+ // Read the macroblock segment id.
+ read_mb_segment_id(pbi, mb_row, mb_col, bc);
+
+ if (pbi->common.mb_no_coeff_skip &&
+ (!vp9_segfeature_active(xd,
+ mbmi->segment_id, SEG_LVL_EOB) ||
+ (vp9_get_segdata(xd, mbmi->segment_id, SEG_LVL_EOB) != 0))) {
+ // Read the macroblock coeff skip flag if this feature is in use,
+ // else default to 0
+ mbmi->mb_skip_coeff = vp9_read(bc, vp9_get_pred_prob(cm, xd, PRED_MBSKIP));
+ } else {
+ if (vp9_segfeature_active(xd,
+ mbmi->segment_id, SEG_LVL_EOB) &&
+ (vp9_get_segdata(xd, mbmi->segment_id, SEG_LVL_EOB) == 0)) {
+ mbmi->mb_skip_coeff = 1;
+ } else
+ mbmi->mb_skip_coeff = 0;
+ }
+
+ // Read the reference frame
+ if (vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_MODE)
+ && vp9_get_segdata(xd, mbmi->segment_id, SEG_LVL_MODE) < NEARESTMV)
+ mbmi->ref_frame = INTRA_FRAME;
+ else
+ mbmi->ref_frame = read_ref_frame(pbi, bc, mbmi->segment_id);
+
+ // If reference frame is an Inter frame
+ if (mbmi->ref_frame) {
+ int_mv nearest, nearby, best_mv;
+ int_mv nearest_second, nearby_second, best_mv_second;
+ vp9_prob mv_ref_p [VP9_MVREFS - 1];
+
+ int recon_y_stride, recon_yoffset;
+ int recon_uv_stride, recon_uvoffset;
+
+ {
+ int ref_fb_idx;
+ MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame;
+
+ /* Select the appropriate reference frame for this MB */
+ if (ref_frame == LAST_FRAME)
+ ref_fb_idx = cm->lst_fb_idx;
+ else if (ref_frame == GOLDEN_FRAME)
+ ref_fb_idx = cm->gld_fb_idx;
+ else
+ ref_fb_idx = cm->alt_fb_idx;
+
+ recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride ;
+ recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
+
+ recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16);
+ recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8);
+
+ xd->pre.y_buffer = cm->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
+ xd->pre.u_buffer = cm->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
+ xd->pre.v_buffer = cm->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
+
+#ifdef DEC_DEBUG
+ if (dec_debug)
+ printf("%d %d\n", xd->mode_info_context->mbmi.mv[0].as_mv.row,
+ xd->mode_info_context->mbmi.mv[0].as_mv.col);
+#endif
+ vp9_find_mv_refs(xd, mi, prev_mi,
+ ref_frame, mbmi->ref_mvs[ref_frame],
+ cm->ref_frame_sign_bias);
+
+ vp9_find_best_ref_mvs(xd,
+ xd->pre.y_buffer,
+ recon_y_stride,
+ mbmi->ref_mvs[ref_frame],
+ &best_mv, &nearest, &nearby);
+
+ vp9_mv_ref_probs(&pbi->common, mv_ref_p,
+ mbmi->mb_mode_context[ref_frame]);
+#ifdef DEC_DEBUG
+ if (dec_debug)
+ printf("[D %d %d] %d %d %d %d\n", ref_frame,
+ mbmi->mb_mode_context[ref_frame],
+ mv_ref_p[0], mv_ref_p[1], mv_ref_p[2], mv_ref_p[3]);
+#endif
+ }
+
+ // Is the segment level mode feature enabled for this segment
+ if (vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_MODE)) {
+ mbmi->mode =
+ vp9_get_segdata(xd, mbmi->segment_id, SEG_LVL_MODE);
+ } else {
+#if CONFIG_SUPERBLOCKS
+ if (mbmi->encoded_as_sb) {
+ mbmi->mode = read_sb_mv_ref(bc, mv_ref_p);
+ } else
+#endif
+ mbmi->mode = read_mv_ref(bc, mv_ref_p);
+
+ vp9_accum_mv_refs(&pbi->common, mbmi->mode,
+ mbmi->mb_mode_context[mbmi->ref_frame]);
+ }
+
+#if CONFIG_PRED_FILTER
+ if (mbmi->mode >= NEARESTMV && mbmi->mode < SPLITMV) {
+ // Is the prediction filter enabled
+ if (cm->pred_filter_mode == 2)
+ mbmi->pred_filter_enabled =
+ vp9_read(bc, cm->prob_pred_filter_off);
+ else
+ mbmi->pred_filter_enabled = cm->pred_filter_mode;
+ }
+#endif
+ if (mbmi->mode >= NEARESTMV && mbmi->mode <= SPLITMV)
+ {
+ if (cm->mcomp_filter_type == SWITCHABLE) {
+ mbmi->interp_filter = vp9_switchable_interp[
+ treed_read(bc, vp9_switchable_interp_tree,
+ vp9_get_pred_probs(cm, xd, PRED_SWITCHABLE_INTERP))];
+ } else {
+ mbmi->interp_filter = cm->mcomp_filter_type;
+ }
+ }
+
+ if (cm->comp_pred_mode == COMP_PREDICTION_ONLY ||
+ (cm->comp_pred_mode == HYBRID_PREDICTION &&
+ vp9_read(bc, vp9_get_pred_prob(cm, xd, PRED_COMP)))) {
+ /* Since we have 3 reference frames, we can only have 3 unique
+ * combinations of combinations of 2 different reference frames
+ * (A-G, G-L or A-L). In the bitstream, we use this to simply
+ * derive the second reference frame from the first reference
+ * frame, by saying it's the next one in the enumerator, and
+ * if that's > n_refs, then the second reference frame is the
+ * first one in the enumerator. */
+ mbmi->second_ref_frame = mbmi->ref_frame + 1;
+ if (mbmi->second_ref_frame == 4)
+ mbmi->second_ref_frame = 1;
+ if (mbmi->second_ref_frame > 0) {
+ int second_ref_fb_idx;
+ /* Select the appropriate reference frame for this MB */
+ if (mbmi->second_ref_frame == LAST_FRAME)
+ second_ref_fb_idx = cm->lst_fb_idx;
+ else if (mbmi->second_ref_frame ==
+ GOLDEN_FRAME)
+ second_ref_fb_idx = cm->gld_fb_idx;
+ else
+ second_ref_fb_idx = cm->alt_fb_idx;
+
+ xd->second_pre.y_buffer =
+ cm->yv12_fb[second_ref_fb_idx].y_buffer + recon_yoffset;
+ xd->second_pre.u_buffer =
+ cm->yv12_fb[second_ref_fb_idx].u_buffer + recon_uvoffset;
+ xd->second_pre.v_buffer =
+ cm->yv12_fb[second_ref_fb_idx].v_buffer + recon_uvoffset;
+
+ vp9_find_mv_refs(xd, mi, prev_mi,
+ mbmi->second_ref_frame,
+ mbmi->ref_mvs[mbmi->second_ref_frame],
+ cm->ref_frame_sign_bias);
+
+ vp9_find_best_ref_mvs(xd,
+ xd->second_pre.y_buffer,
+ recon_y_stride,
+ mbmi->ref_mvs[mbmi->second_ref_frame],
+ &best_mv_second,
+ &nearest_second,
+ &nearby_second);
+ }
+
+ } else {
+#if CONFIG_COMP_INTERINTRA_PRED
+ if (pbi->common.use_interintra &&
+ mbmi->mode >= NEARESTMV && mbmi->mode < SPLITMV &&
+ mbmi->second_ref_frame == NONE) {
+ mbmi->second_ref_frame = (vp9_read(bc, pbi->common.fc.interintra_prob) ?
+ INTRA_FRAME : NONE);
+ // printf("-- %d (%d)\n", mbmi->second_ref_frame == INTRA_FRAME,
+ // pbi->common.fc.interintra_prob);
+ pbi->common.fc.interintra_counts[
+ mbmi->second_ref_frame == INTRA_FRAME]++;
+ if (mbmi->second_ref_frame == INTRA_FRAME) {
+ mbmi->interintra_mode = (MB_PREDICTION_MODE)read_ymode(
+ bc, pbi->common.fc.ymode_prob);
+ pbi->common.fc.ymode_counts[mbmi->interintra_mode]++;
+#if SEPARATE_INTERINTRA_UV
+ mbmi->interintra_uv_mode = (MB_PREDICTION_MODE)read_uv_mode(
+ bc, pbi->common.fc.uv_mode_prob[mbmi->interintra_mode]);
+ pbi->common.fc.uv_mode_counts[mbmi->interintra_mode]
+ [mbmi->interintra_uv_mode]++;
+#else
+ mbmi->interintra_uv_mode = mbmi->interintra_mode;
+#endif
+ // printf("** %d %d\n",
+ // mbmi->interintra_mode, mbmi->interintra_uv_mode);
+ }
+ }
+#endif
+ }
+
+ mbmi->uv_mode = DC_PRED;
+ switch (mbmi->mode) {
+ case SPLITMV: {
+ const int s = mbmi->partitioning =
+ treed_read(bc, vp9_mbsplit_tree, cm->fc.mbsplit_prob);
+ const int num_p = vp9_mbsplit_count [s];
+ int j = 0;
+ cm->fc.mbsplit_counts[s]++;
+
+ mbmi->need_to_clamp_mvs = 0;
+ do { /* for each subset j */
+ int_mv leftmv, abovemv, second_leftmv, second_abovemv;
+ int_mv blockmv, secondmv;
+ int k; /* first block in subset j */
+ int mv_contz;
+ int blockmode;
+
+ k = vp9_mbsplit_offset[s][j];
+
+ leftmv.as_int = left_block_mv(mi, k);
+ abovemv.as_int = above_block_mv(mi, k, mis);
+ second_leftmv.as_int = 0;
+ second_abovemv.as_int = 0;
+ if (mbmi->second_ref_frame > 0) {
+ second_leftmv.as_int = left_block_second_mv(mi, k);
+ second_abovemv.as_int = above_block_second_mv(mi, k, mis);
+ }
+ mv_contz = vp9_mv_cont(&leftmv, &abovemv);
+ blockmode = sub_mv_ref(bc, cm->fc.sub_mv_ref_prob [mv_contz]);
+ cm->fc.sub_mv_ref_counts[mv_contz][blockmode - LEFT4X4]++;
+
+ switch (blockmode) {
+ case NEW4X4:
+ read_nmv(bc, &blockmv.as_mv, &best_mv.as_mv, nmvc);
+ read_nmv_fp(bc, &blockmv.as_mv, &best_mv.as_mv, nmvc,
+ xd->allow_high_precision_mv);
+ vp9_increment_nmv(&blockmv.as_mv, &best_mv.as_mv,
+ &cm->fc.NMVcount, xd->allow_high_precision_mv);
+ blockmv.as_mv.row += best_mv.as_mv.row;
+ blockmv.as_mv.col += best_mv.as_mv.col;
+
+ if (mbmi->second_ref_frame > 0) {
+ read_nmv(bc, &secondmv.as_mv, &best_mv_second.as_mv, nmvc);
+ read_nmv_fp(bc, &secondmv.as_mv, &best_mv_second.as_mv, nmvc,
+ xd->allow_high_precision_mv);
+ vp9_increment_nmv(&secondmv.as_mv, &best_mv_second.as_mv,
+ &cm->fc.NMVcount, xd->allow_high_precision_mv);
+ secondmv.as_mv.row += best_mv_second.as_mv.row;
+ secondmv.as_mv.col += best_mv_second.as_mv.col;
+ }
+#ifdef VPX_MODE_COUNT
+ vp9_mv_cont_count[mv_contz][3]++;
+#endif
+ break;
+ case LEFT4X4:
+ blockmv.as_int = leftmv.as_int;
+ if (mbmi->second_ref_frame > 0)
+ secondmv.as_int = second_leftmv.as_int;
+#ifdef VPX_MODE_COUNT
+ vp9_mv_cont_count[mv_contz][0]++;
+#endif
+ break;
+ case ABOVE4X4:
+ blockmv.as_int = abovemv.as_int;
+ if (mbmi->second_ref_frame > 0)
+ secondmv.as_int = second_abovemv.as_int;
+#ifdef VPX_MODE_COUNT
+ vp9_mv_cont_count[mv_contz][1]++;
+#endif
+ break;
+ case ZERO4X4:
+ blockmv.as_int = 0;
+ if (mbmi->second_ref_frame > 0)
+ secondmv.as_int = 0;
+#ifdef VPX_MODE_COUNT
+ vp9_mv_cont_count[mv_contz][2]++;
+#endif
+ break;
+ default:
+ break;
+ }
+
+ /* Commenting this section out, not sure why this was needed, and
+ * there are mismatches with this section in rare cases since it is
+ * not done in the encoder at all.
+ mbmi->need_to_clamp_mvs |= check_mv_bounds(&blockmv,
+ mb_to_left_edge,
+ mb_to_right_edge,
+ mb_to_top_edge,
+ mb_to_bottom_edge);
+ if (mbmi->second_ref_frame > 0) {
+ mbmi->need_to_clamp_mvs |= check_mv_bounds(&secondmv,
+ mb_to_left_edge,
+ mb_to_right_edge,
+ mb_to_top_edge,
+ mb_to_bottom_edge);
+ }
+ */
+
+ {
+ /* Fill (uniform) modes, mvs of jth subset.
+ Must do it here because ensuing subsets can
+ refer back to us via "left" or "above". */
+ const unsigned char *fill_offset;
+ unsigned int fill_count = mbsplit_fill_count[s];
+
+ fill_offset = &mbsplit_fill_offset[s][(unsigned char)j * mbsplit_fill_count[s]];
+
+ do {
+ mi->bmi[ *fill_offset].as_mv.first.as_int = blockmv.as_int;
+ if (mbmi->second_ref_frame > 0)
+ mi->bmi[ *fill_offset].as_mv.second.as_int = secondmv.as_int;
+ fill_offset++;
+ } while (--fill_count);
+ }
+
+ } while (++j < num_p);
+ }
+
+ mv->as_int = mi->bmi[15].as_mv.first.as_int;
+ mbmi->mv[1].as_int = mi->bmi[15].as_mv.second.as_int;
+
+ break; /* done with SPLITMV */
+
+ case NEARMV:
+ mv->as_int = nearby.as_int;
+ /* Clip "next_nearest" so that it does not extend to far out of image */
+ clamp_mv(mv, mb_to_left_edge, mb_to_right_edge,
+ mb_to_top_edge, mb_to_bottom_edge);
+ if (mbmi->second_ref_frame > 0) {
+ mbmi->mv[1].as_int = nearby_second.as_int;
+ clamp_mv(&mbmi->mv[1], mb_to_left_edge, mb_to_right_edge,
+ mb_to_top_edge, mb_to_bottom_edge);
+ }
+ break;
+
+ case NEARESTMV:
+ mv->as_int = nearest.as_int;
+ /* Clip "next_nearest" so that it does not extend to far out of image */
+ clamp_mv(mv, mb_to_left_edge, mb_to_right_edge,
+ mb_to_top_edge, mb_to_bottom_edge);
+ if (mbmi->second_ref_frame > 0) {
+ mbmi->mv[1].as_int = nearest_second.as_int;
+ clamp_mv(&mbmi->mv[1], mb_to_left_edge, mb_to_right_edge,
+ mb_to_top_edge, mb_to_bottom_edge);
+ }
+ break;
+
+ case ZEROMV:
+ mv->as_int = 0;
+ if (mbmi->second_ref_frame > 0)
+ mbmi->mv[1].as_int = 0;
+ break;
+
+ case NEWMV:
+
+#if CONFIG_NEW_MVREF
+ {
+ int best_index;
+ MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame;
+
+ // Encode the index of the choice.
+ best_index =
+ vp9_read_mv_ref_id(bc, xd->mb_mv_ref_id_probs[ref_frame]);
+
+ best_mv.as_int = mbmi->ref_mvs[ref_frame][best_index].as_int;
+ }
+#endif
+
+ read_nmv(bc, &mv->as_mv, &best_mv.as_mv, nmvc);
+ read_nmv_fp(bc, &mv->as_mv, &best_mv.as_mv, nmvc,
+ xd->allow_high_precision_mv);
+ vp9_increment_nmv(&mv->as_mv, &best_mv.as_mv, &cm->fc.NMVcount,
+ xd->allow_high_precision_mv);
+
+ mv->as_mv.row += best_mv.as_mv.row;
+ mv->as_mv.col += best_mv.as_mv.col;
+
+ /* Don't need to check this on NEARMV and NEARESTMV modes
+ * since those modes clamp the MV. The NEWMV mode does not,
+ * so signal to the prediction stage whether special
+ * handling may be required.
+ */
+ mbmi->need_to_clamp_mvs = check_mv_bounds(mv,
+ mb_to_left_edge,
+ mb_to_right_edge,
+ mb_to_top_edge,
+ mb_to_bottom_edge);
+
+ if (mbmi->second_ref_frame > 0) {
+#if CONFIG_NEW_MVREF
+ {
+ int best_index;
+ MV_REFERENCE_FRAME ref_frame = mbmi->second_ref_frame;
+
+ // Encode the index of the choice.
+ best_index =
+ vp9_read_mv_ref_id(bc, xd->mb_mv_ref_id_probs[ref_frame]);
+ best_mv_second.as_int = mbmi->ref_mvs[ref_frame][best_index].as_int;
+ }
+#endif
+
+ read_nmv(bc, &mbmi->mv[1].as_mv, &best_mv_second.as_mv, nmvc);
+ read_nmv_fp(bc, &mbmi->mv[1].as_mv, &best_mv_second.as_mv, nmvc,
+ xd->allow_high_precision_mv);
+ vp9_increment_nmv(&mbmi->mv[1].as_mv, &best_mv_second.as_mv,
+ &cm->fc.NMVcount, xd->allow_high_precision_mv);
+ mbmi->mv[1].as_mv.row += best_mv_second.as_mv.row;
+ mbmi->mv[1].as_mv.col += best_mv_second.as_mv.col;
+ mbmi->need_to_clamp_secondmv |=
+ check_mv_bounds(&mbmi->mv[1],
+ mb_to_left_edge, mb_to_right_edge,
+ mb_to_top_edge, mb_to_bottom_edge);
+ }
+ break;
+ default:
+;
+#if CONFIG_DEBUG
+ assert(0);
+#endif
+ }
+ } else {
+ /* required for left and above block mv */
+ mbmi->mv[0].as_int = 0;
+
+ if (vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_MODE)) {
+ mbmi->mode = (MB_PREDICTION_MODE)
+ vp9_get_segdata(xd, mbmi->segment_id, SEG_LVL_MODE);
+#if CONFIG_SUPERBLOCKS
+ } else if (mbmi->encoded_as_sb) {
+ mbmi->mode = (MB_PREDICTION_MODE)
+ read_sb_ymode(bc, pbi->common.fc.sb_ymode_prob);
+ pbi->common.fc.sb_ymode_counts[mbmi->mode]++;
+#endif
+ } else {
+ mbmi->mode = (MB_PREDICTION_MODE)
+ read_ymode(bc, pbi->common.fc.ymode_prob);
+ pbi->common.fc.ymode_counts[mbmi->mode]++;
+ }
+#if CONFIG_COMP_INTRA_PRED
+ mbmi->second_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
+#endif
+
+ // If MB mode is BPRED read the block modes
+ if (mbmi->mode == B_PRED) {
+ int j = 0;
+#if CONFIG_COMP_INTRA_PRED
+ int use_comp_pred = vp9_read(bc, DEFAULT_COMP_INTRA_PROB);
+#endif
+ do {
+ int m;
+ m = mi->bmi[j].as_mode.first = (B_PREDICTION_MODE)
+ read_bmode(bc, pbi->common.fc.bmode_prob);
+#if CONFIG_NEWBINTRAMODES
+ if (m == B_CONTEXT_PRED) m -= CONTEXT_PRED_REPLACEMENTS;
+#endif
+ pbi->common.fc.bmode_counts[m]++;
+#if CONFIG_COMP_INTRA_PRED
+ if (use_comp_pred) {
+ mi->bmi[j].as_mode.second = (B_PREDICTION_MODE)read_bmode(bc, pbi->common.fc.bmode_prob);
+ } else {
+ mi->bmi[j].as_mode.second = (B_PREDICTION_MODE)(B_DC_PRED - 1);
+ }
+#endif
+ } while (++j < 16);
+ }
+
+ if (mbmi->mode == I8X8_PRED) {
+ int i;
+ int mode8x8;
+ for (i = 0; i < 4; i++) {
+ int ib = vp9_i8x8_block[i];
+ mode8x8 = read_i8x8_mode(bc, pbi->common.fc.i8x8_mode_prob);
+ mi->bmi[ib + 0].as_mode.first = mode8x8;
+ mi->bmi[ib + 1].as_mode.first = mode8x8;
+ mi->bmi[ib + 4].as_mode.first = mode8x8;
+ mi->bmi[ib + 5].as_mode.first = mode8x8;
+ pbi->common.fc.i8x8_mode_counts[mode8x8]++;
+#if CONFIG_COMP_INTRA_PRED
+ mi->bmi[ib + 0].as_mode.second = (MB_PREDICTION_MODE)(DC_PRED - 1);
+ mi->bmi[ib + 1].as_mode.second = (MB_PREDICTION_MODE)(DC_PRED - 1);
+ mi->bmi[ib + 4].as_mode.second = (MB_PREDICTION_MODE)(DC_PRED - 1);
+ mi->bmi[ib + 5].as_mode.second = (MB_PREDICTION_MODE)(DC_PRED - 1);
+#endif
+ }
+ } else {
+ mbmi->uv_mode = (MB_PREDICTION_MODE)read_uv_mode(
+ bc, pbi->common.fc.uv_mode_prob[mbmi->mode]);
+ pbi->common.fc.uv_mode_counts[mbmi->mode][mbmi->uv_mode]++;
+ }
+
+#if CONFIG_COMP_INTRA_PRED
+ mbmi->second_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
+#endif
+ }
+
+ if (cm->txfm_mode == TX_MODE_SELECT && mbmi->mb_skip_coeff == 0 &&
+ ((mbmi->ref_frame == INTRA_FRAME && mbmi->mode <= I8X8_PRED) ||
+ (mbmi->ref_frame != INTRA_FRAME && !(mbmi->mode == SPLITMV &&
+ mbmi->partitioning == PARTITIONING_4X4)))) {
+ // FIXME(rbultje) code ternary symbol once all experiments are merged
+ mbmi->txfm_size = vp9_read(bc, cm->prob_tx[0]);
+ if (mbmi->txfm_size != TX_4X4 && mbmi->mode != I8X8_PRED &&
+ mbmi->mode != SPLITMV)
+ mbmi->txfm_size += vp9_read(bc, cm->prob_tx[1]);
+ } else if (cm->txfm_mode >= ALLOW_16X16 &&
+ ((mbmi->ref_frame == INTRA_FRAME && mbmi->mode <= TM_PRED) ||
+ (mbmi->ref_frame != INTRA_FRAME && mbmi->mode != SPLITMV))) {
+ mbmi->txfm_size = TX_16X16;
+ } else if (cm->txfm_mode >= ALLOW_8X8 &&
+ (!(mbmi->ref_frame == INTRA_FRAME && mbmi->mode == B_PRED) &&
+ !(mbmi->ref_frame != INTRA_FRAME && mbmi->mode == SPLITMV &&
+ mbmi->partitioning == PARTITIONING_4X4))) {
+ mbmi->txfm_size = TX_8X8;
+ } else {
+ mbmi->txfm_size = TX_4X4;
+ }
+}
+
+void vp9_decode_mode_mvs_init(VP9D_COMP* const pbi, BOOL_DECODER* const bc) {
+ VP9_COMMON *cm = &pbi->common;
+
+ vpx_memset(cm->mbskip_pred_probs, 0, sizeof(cm->mbskip_pred_probs));
+ if (pbi->common.mb_no_coeff_skip) {
+ int k;
+ for (k = 0; k < MBSKIP_CONTEXTS; ++k)
+ cm->mbskip_pred_probs[k] = (vp9_prob)vp9_read_literal(bc, 8);
+ }
+
+ mb_mode_mv_init(pbi, bc);
+}
+void vp9_decode_mb_mode_mv(VP9D_COMP* const pbi,
+ MACROBLOCKD* const xd,
+ int mb_row,
+ int mb_col,
+ BOOL_DECODER* const bc) {
+ MODE_INFO *mi = xd->mode_info_context;
+ MODE_INFO *prev_mi = xd->prev_mode_info_context;
+
+ if (pbi->common.frame_type == KEY_FRAME)
+ kfread_modes(pbi, mi, mb_row, mb_col, bc);
+ else
+ read_mb_modes_mv(pbi, mi, &mi->mbmi, prev_mi, mb_row, mb_col, bc);
+}
diff --git a/vp9/decoder/vp9_decodemv.h b/vp9/decoder/vp9_decodemv.h
new file mode 100644
index 0000000..a5c4467
--- /dev/null
+++ b/vp9/decoder/vp9_decodemv.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/decoder/vp9_onyxd_int.h"
+
+void vp9_decode_mb_mode_mv(VP9D_COMP* const pbi,
+ MACROBLOCKD* const xd,
+ int mb_row,
+ int mb_col,
+ BOOL_DECODER* const bc);
+void vp9_decode_mode_mvs_init(VP9D_COMP* const pbi, BOOL_DECODER* const bc);
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
new file mode 100644
index 0000000..12feca6
--- /dev/null
+++ b/vp9/decoder/vp9_decodframe.c
@@ -0,0 +1,1728 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/decoder/vp9_onyxd_int.h"
+#include "vp9/common/vp9_header.h"
+#include "vp9/common/vp9_reconintra.h"
+#include "vp9/common/vp9_reconintra4x4.h"
+#include "vp9/common/vp9_reconinter.h"
+#include "vp9/decoder/vp9_decodframe.h"
+#include "vp9/decoder/vp9_detokenize.h"
+#include "vp9/common/vp9_invtrans.h"
+#include "vp9/common/vp9_alloccommon.h"
+#include "vp9/common/vp9_entropymode.h"
+#include "vp9/common/vp9_quant_common.h"
+#include "vpx_scale/vpxscale.h"
+#include "vp9/common/vp9_setupintrarecon.h"
+
+#include "vp9/decoder/vp9_decodemv.h"
+#include "vp9/common/vp9_extend.h"
+#include "vp9/common/vp9_modecont.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vp9/decoder/vp9_dboolhuff.h"
+
+#include "vp9/common/vp9_seg_common.h"
+#include "vp9/common/vp9_entropy.h"
+#include "vp9_rtcd.h"
+
+#include <assert.h>
+#include <stdio.h>
+
+#define COEFCOUNT_TESTING
+
+//#define DEC_DEBUG
+#ifdef DEC_DEBUG
+int dec_debug = 0;
+#endif
+
+static int merge_index(int v, int n, int modulus) {
+ int max1 = (n - 1 - modulus / 2) / modulus + 1;
+ if (v < max1) v = v * modulus + modulus / 2;
+ else {
+ int w;
+ v -= max1;
+ w = v;
+ v += (v + modulus - modulus / 2) / modulus;
+ while (v % modulus == modulus / 2 ||
+ w != v - (v + modulus - modulus / 2) / modulus) v++;
+ }
+ return v;
+}
+
+static int inv_remap_prob(int v, int m) {
+ const int n = 256;
+ const int modulus = MODULUS_PARAM;
+ int i;
+ v = merge_index(v, n - 1, modulus);
+ if ((m << 1) <= n) {
+ i = vp9_inv_recenter_nonneg(v + 1, m);
+ } else {
+ i = n - 1 - vp9_inv_recenter_nonneg(v + 1, n - 1 - m);
+ }
+ return i;
+}
+
+static vp9_prob read_prob_diff_update(vp9_reader *const bc, int oldp) {
+ int delp = vp9_decode_term_subexp(bc, SUBEXP_PARAM, 255);
+ return (vp9_prob)inv_remap_prob(delp, oldp);
+}
+
+void vp9_init_de_quantizer(VP9D_COMP *pbi) {
+ int i;
+ int Q;
+ VP9_COMMON *const pc = &pbi->common;
+
+ for (Q = 0; Q < QINDEX_RANGE; Q++) {
+ pc->Y1dequant[Q][0] = (short)vp9_dc_quant(Q, pc->y1dc_delta_q);
+ pc->Y2dequant[Q][0] = (short)vp9_dc2quant(Q, pc->y2dc_delta_q);
+ pc->UVdequant[Q][0] = (short)vp9_dc_uv_quant(Q, pc->uvdc_delta_q);
+
+ /* all the ac values =; */
+ for (i = 1; i < 16; i++) {
+ int rc = vp9_default_zig_zag1d[i];
+
+ pc->Y1dequant[Q][rc] = (short)vp9_ac_yquant(Q);
+ pc->Y2dequant[Q][rc] = (short)vp9_ac2quant(Q, pc->y2ac_delta_q);
+ pc->UVdequant[Q][rc] = (short)vp9_ac_uv_quant(Q, pc->uvac_delta_q);
+ }
+ }
+}
+
+static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *xd) {
+ int i;
+ int QIndex;
+ VP9_COMMON *const pc = &pbi->common;
+ int segment_id = xd->mode_info_context->mbmi.segment_id;
+
+ // Set the Q baseline allowing for any segment level adjustment
+ if (vp9_segfeature_active(xd, segment_id, SEG_LVL_ALT_Q)) {
+ /* Abs Value */
+ if (xd->mb_segment_abs_delta == SEGMENT_ABSDATA)
+ QIndex = vp9_get_segdata(xd, segment_id, SEG_LVL_ALT_Q);
+
+ /* Delta Value */
+ else {
+ QIndex = pc->base_qindex +
+ vp9_get_segdata(xd, segment_id, SEG_LVL_ALT_Q);
+ QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; /* Clamp to valid range */
+ }
+ } else
+ QIndex = pc->base_qindex;
+ xd->q_index = QIndex;
+
+ /* Set up the block level dequant pointers */
+ for (i = 0; i < 16; i++) {
+ xd->block[i].dequant = pc->Y1dequant[QIndex];
+ }
+
+#if CONFIG_LOSSLESS
+ if (!QIndex) {
+ pbi->mb.inv_xform4x4_1_x8 = vp9_short_inv_walsh4x4_1_x8;
+ pbi->mb.inv_xform4x4_x8 = vp9_short_inv_walsh4x4_x8;
+ pbi->mb.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1_lossless;
+ pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4_lossless;
+ pbi->idct_add = vp9_dequant_idct_add_lossless_c;
+ pbi->dc_idct_add = vp9_dequant_dc_idct_add_lossless_c;
+ pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block_lossless_c;
+ pbi->idct_add_y_block = vp9_dequant_idct_add_y_block_lossless_c;
+ pbi->idct_add_uv_block = vp9_dequant_idct_add_uv_block_lossless_c;
+ } else {
+ pbi->mb.inv_xform4x4_1_x8 = vp9_short_idct4x4llm_1;
+ pbi->mb.inv_xform4x4_x8 = vp9_short_idct4x4llm;
+ pbi->mb.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1;
+ pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;
+ pbi->idct_add = vp9_dequant_idct_add;
+ pbi->dc_idct_add = vp9_dequant_dc_idct_add;
+ pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block;
+ pbi->idct_add_y_block = vp9_dequant_idct_add_y_block;
+ pbi->idct_add_uv_block = vp9_dequant_idct_add_uv_block;
+ }
+#else
+ pbi->mb.inv_xform4x4_1_x8 = vp9_short_idct4x4llm_1;
+ pbi->mb.inv_xform4x4_x8 = vp9_short_idct4x4llm;
+ pbi->mb.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1;
+ pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;
+ pbi->idct_add = vp9_dequant_idct_add;
+ pbi->dc_idct_add = vp9_dequant_dc_idct_add;
+ pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block;
+ pbi->idct_add_y_block = vp9_dequant_idct_add_y_block;
+ pbi->idct_add_uv_block = vp9_dequant_idct_add_uv_block;
+#endif
+
+ for (i = 16; i < 24; i++) {
+ xd->block[i].dequant = pc->UVdequant[QIndex];
+ }
+
+ xd->block[24].dequant = pc->Y2dequant[QIndex];
+
+}
+
+/* skip_recon_mb() is Modified: Instead of writing the result to predictor buffer and then copying it
+ * to dst buffer, we can write the result directly to dst buffer. This eliminates unnecessary copy.
+ */
+static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd) {
+ if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
+#if CONFIG_SUPERBLOCKS
+ if (xd->mode_info_context->mbmi.encoded_as_sb) {
+ vp9_build_intra_predictors_sbuv_s(xd);
+ vp9_build_intra_predictors_sby_s(xd);
+ } else {
+#endif
+ vp9_build_intra_predictors_mbuv_s(xd);
+ vp9_build_intra_predictors_mby_s(xd);
+#if CONFIG_SUPERBLOCKS
+ }
+#endif
+ } else {
+#if CONFIG_SUPERBLOCKS
+ if (xd->mode_info_context->mbmi.encoded_as_sb) {
+ vp9_build_inter32x32_predictors_sb(xd,
+ xd->dst.y_buffer,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.y_stride,
+ xd->dst.uv_stride);
+ } else {
+#endif
+ vp9_build_1st_inter16x16_predictors_mb(xd,
+ xd->dst.y_buffer,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.y_stride,
+ xd->dst.uv_stride);
+
+ if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
+ vp9_build_2nd_inter16x16_predictors_mb(xd,
+ xd->dst.y_buffer,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.y_stride,
+ xd->dst.uv_stride);
+ }
+#if CONFIG_COMP_INTERINTRA_PRED
+ else if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
+ vp9_build_interintra_16x16_predictors_mb(xd,
+ xd->dst.y_buffer,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.y_stride,
+ xd->dst.uv_stride);
+ }
+#endif
+#if CONFIG_SUPERBLOCKS
+ }
+#endif
+ }
+}
+
+static void decode_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd,
+ BOOL_DECODER* const bc) {
+ BLOCKD *bd = &xd->block[0];
+ TX_TYPE tx_type = get_tx_type_16x16(xd, bd);
+ assert(get_2nd_order_usage(xd) == 0);
+#ifdef DEC_DEBUG
+ if (dec_debug) {
+ int i;
+ printf("\n");
+ printf("qcoeff 16x16\n");
+ for (i = 0; i < 400; i++) {
+ printf("%3d ", xd->qcoeff[i]);
+ if (i % 16 == 15) printf("\n");
+ }
+ printf("\n");
+ printf("predictor\n");
+ for (i = 0; i < 400; i++) {
+ printf("%3d ", xd->predictor[i]);
+ if (i % 16 == 15) printf("\n");
+ }
+ }
+#endif
+ if (tx_type != DCT_DCT) {
+ vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff,
+ xd->block[0].dequant, xd->predictor,
+ xd->dst.y_buffer, 16, xd->dst.y_stride,
+ xd->eobs[0]);
+ } else {
+ vp9_dequant_idct_add_16x16(xd->qcoeff, xd->block[0].dequant,
+ xd->predictor, xd->dst.y_buffer,
+ 16, xd->dst.y_stride, xd->eobs[0]);
+ }
+ vp9_dequant_idct_add_uv_block_8x8(
+ xd->qcoeff + 16 * 16, xd->block[16].dequant,
+ xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer,
+ xd->dst.uv_stride, xd->eobs + 16, xd);
+}
+
+static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
+ BOOL_DECODER* const bc) {
+ // First do Y
+ // if the first one is DCT_DCT assume all the rest are as well
+ TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[0]);
+#ifdef DEC_DEBUG
+ if (dec_debug) {
+ int i;
+ printf("\n");
+ printf("qcoeff 8x8\n");
+ for (i = 0; i < 400; i++) {
+ printf("%3d ", xd->qcoeff[i]);
+ if (i % 16 == 15) printf("\n");
+ }
+ }
+#endif
+ if (tx_type != DCT_DCT || xd->mode_info_context->mbmi.mode == I8X8_PRED) {
+ int i;
+ assert(get_2nd_order_usage(xd) == 0);
+ for (i = 0; i < 4; i++) {
+ int ib = vp9_i8x8_block[i];
+ int idx = (ib & 0x02) ? (ib + 2) : ib;
+ short *q = xd->block[idx].qcoeff;
+ short *dq = xd->block[0].dequant;
+ unsigned char *pre = xd->block[ib].predictor;
+ unsigned char *dst = *(xd->block[ib].base_dst) + xd->block[ib].dst;
+ int stride = xd->dst.y_stride;
+ BLOCKD *b = &xd->block[ib];
+ if (xd->mode_info_context->mbmi.mode == I8X8_PRED) {
+ int i8x8mode = b->bmi.as_mode.first;
+ vp9_intra8x8_predict(b, i8x8mode, b->predictor);
+ }
+ tx_type = get_tx_type_8x8(xd, &xd->block[ib]);
+ if (tx_type != DCT_DCT) {
+ vp9_ht_dequant_idct_add_8x8_c(tx_type, q, dq, pre, dst, 16, stride,
+ xd->eobs[idx]);
+ } else {
+ vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride,
+ 0, xd->eobs[idx]);
+ }
+ }
+ } else if (xd->mode_info_context->mbmi.mode == SPLITMV) {
+ assert(get_2nd_order_usage(xd) == 0);
+ vp9_dequant_idct_add_y_block_8x8(xd->qcoeff,
+ xd->block[0].dequant,
+ xd->predictor,
+ xd->dst.y_buffer,
+ xd->dst.y_stride,
+ xd->eobs, xd);
+ } else {
+ BLOCKD *b = &xd->block[24];
+ assert(get_2nd_order_usage(xd) == 1);
+ vp9_dequantize_b_2x2(b);
+ vp9_short_ihaar2x2(&b->dqcoeff[0], b->diff, 8);
+ ((int *)b->qcoeff)[0] = 0; // 2nd order block are set to 0 after idct
+ ((int *)b->qcoeff)[1] = 0;
+ ((int *)b->qcoeff)[2] = 0;
+ ((int *)b->qcoeff)[3] = 0;
+ ((int *)b->qcoeff)[4] = 0;
+ ((int *)b->qcoeff)[5] = 0;
+ ((int *)b->qcoeff)[6] = 0;
+ ((int *)b->qcoeff)[7] = 0;
+ vp9_dequant_dc_idct_add_y_block_8x8(xd->qcoeff,
+ xd->block[0].dequant,
+ xd->predictor,
+ xd->dst.y_buffer,
+ xd->dst.y_stride,
+ xd->eobs,
+ xd->block[24].diff,
+ xd);
+ }
+
+ // Now do UV
+ if (xd->mode_info_context->mbmi.mode == I8X8_PRED) {
+ int i;
+ for (i = 0; i < 4; i++) {
+ int ib = vp9_i8x8_block[i];
+ BLOCKD *b = &xd->block[ib];
+ int i8x8mode = b->bmi.as_mode.first;
+ b = &xd->block[16 + i];
+ vp9_intra_uv4x4_predict(&xd->block[16 + i], i8x8mode, b->predictor);
+ pbi->idct_add(b->qcoeff, b->dequant, b->predictor,
+ *(b->base_dst) + b->dst, 8, b->dst_stride);
+ b = &xd->block[20 + i];
+ vp9_intra_uv4x4_predict(&xd->block[20 + i], i8x8mode, b->predictor);
+ pbi->idct_add(b->qcoeff, b->dequant, b->predictor,
+ *(b->base_dst) + b->dst, 8, b->dst_stride);
+ }
+ } else if (xd->mode_info_context->mbmi.mode == SPLITMV) {
+ pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant,
+ xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer,
+ xd->dst.uv_stride, xd->eobs + 16);
+ } else {
+ vp9_dequant_idct_add_uv_block_8x8
+ (xd->qcoeff + 16 * 16, xd->block[16].dequant,
+ xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer,
+ xd->dst.uv_stride, xd->eobs + 16, xd);
+ }
+#ifdef DEC_DEBUG
+ if (dec_debug) {
+ int i;
+ printf("\n");
+ printf("predictor\n");
+ for (i = 0; i < 384; i++) {
+ printf("%3d ", xd->predictor[i]);
+ if (i % 16 == 15) printf("\n");
+ }
+ }
+#endif
+}
+
+static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
+ BOOL_DECODER* const bc) {
+ TX_TYPE tx_type;
+ int i, eobtotal = 0;
+ MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode;
+ if (mode == I8X8_PRED) {
+ assert(get_2nd_order_usage(xd) == 0);
+ for (i = 0; i < 4; i++) {
+ int ib = vp9_i8x8_block[i];
+ const int iblock[4] = {0, 1, 4, 5};
+ int j;
+ int i8x8mode;
+ BLOCKD *b;
+ b = &xd->block[ib];
+ i8x8mode = b->bmi.as_mode.first;
+ vp9_intra8x8_predict(b, i8x8mode, b->predictor);
+ for (j = 0; j < 4; j++) {
+ b = &xd->block[ib + iblock[j]];
+ tx_type = get_tx_type_4x4(xd, b);
+ if (tx_type != DCT_DCT) {
+ vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
+ b->dequant, b->predictor,
+ *(b->base_dst) + b->dst, 16,
+ b->dst_stride, b->eob);
+ } else {
+ vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor,
+ *(b->base_dst) + b->dst, 16, b->dst_stride);
+ }
+ }
+ b = &xd->block[16 + i];
+ vp9_intra_uv4x4_predict(b, i8x8mode, b->predictor);
+ pbi->idct_add(b->qcoeff, b->dequant, b->predictor,
+ *(b->base_dst) + b->dst, 8, b->dst_stride);
+ b = &xd->block[20 + i];
+ vp9_intra_uv4x4_predict(b, i8x8mode, b->predictor);
+ pbi->idct_add(b->qcoeff, b->dequant, b->predictor,
+ *(b->base_dst) + b->dst, 8, b->dst_stride);
+ }
+ } else if (mode == B_PRED) {
+ assert(get_2nd_order_usage(xd) == 0);
+ for (i = 0; i < 16; i++) {
+ int b_mode;
+#if CONFIG_COMP_INTRA_PRED
+ int b_mode2;
+#endif
+ BLOCKD *b = &xd->block[i];
+ b_mode = xd->mode_info_context->bmi[i].as_mode.first;
+#if CONFIG_NEWBINTRAMODES
+ xd->mode_info_context->bmi[i].as_mode.context = b->bmi.as_mode.context =
+ vp9_find_bpred_context(b);
+#endif
+ if (!xd->mode_info_context->mbmi.mb_skip_coeff)
+ eobtotal += vp9_decode_coefs_4x4(pbi, xd, bc, PLANE_TYPE_Y_WITH_DC, i);
+#if CONFIG_COMP_INTRA_PRED
+ b_mode2 = xd->mode_info_context->bmi[i].as_mode.second;
+
+ if (b_mode2 == (B_PREDICTION_MODE)(B_DC_PRED - 1)) {
+#endif
+ vp9_intra4x4_predict(b, b_mode, b->predictor);
+#if CONFIG_COMP_INTRA_PRED
+ } else {
+ vp9_comp_intra4x4_predict(b, b_mode, b_mode2, b->predictor);
+ }
+#endif
+ tx_type = get_tx_type_4x4(xd, b);
+ if (tx_type != DCT_DCT) {
+ vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
+ b->dequant, b->predictor,
+ *(b->base_dst) + b->dst, 16, b->dst_stride,
+ b->eob);
+ } else {
+ vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor,
+ *(b->base_dst) + b->dst, 16, b->dst_stride);
+ }
+ xd->above_context->y2 = 1;
+ xd->left_context->y2 = 1;
+ }
+ if (!xd->mode_info_context->mbmi.mb_skip_coeff) {
+ vp9_decode_mb_tokens_4x4_uv(pbi, xd, bc);
+ }
+ vp9_build_intra_predictors_mbuv(xd);
+ pbi->idct_add_uv_block(xd->qcoeff + 16 * 16,
+ xd->block[16].dequant,
+ xd->predictor + 16 * 16,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.uv_stride,
+ xd->eobs + 16);
+ } else if (mode == SPLITMV) {
+ assert(get_2nd_order_usage(xd) == 0);
+ pbi->idct_add_y_block(xd->qcoeff,
+ xd->block[0].dequant,
+ xd->predictor,
+ xd->dst.y_buffer,
+ xd->dst.y_stride,
+ xd->eobs);
+ pbi->idct_add_uv_block(xd->qcoeff + 16 * 16,
+ xd->block[16].dequant,
+ xd->predictor + 16 * 16,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.uv_stride,
+ xd->eobs + 16);
+ } else {
+#ifdef DEC_DEBUG
+ if (dec_debug) {
+ int i;
+ printf("\n");
+ printf("qcoeff 4x4\n");
+ for (i = 0; i < 400; i++) {
+ printf("%3d ", xd->qcoeff[i]);
+ if (i % 16 == 15) printf("\n");
+ }
+ printf("\n");
+ printf("predictor\n");
+ for (i = 0; i < 400; i++) {
+ printf("%3d ", xd->predictor[i]);
+ if (i % 16 == 15) printf("\n");
+ }
+ }
+#endif
+ tx_type = get_tx_type_4x4(xd, &xd->block[0]);
+ if (tx_type != DCT_DCT) {
+ assert(get_2nd_order_usage(xd) == 0);
+ for (i = 0; i < 16; i++) {
+ BLOCKD *b = &xd->block[i];
+ tx_type = get_tx_type_4x4(xd, b);
+ if (tx_type != DCT_DCT) {
+ vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
+ b->dequant, b->predictor,
+ *(b->base_dst) + b->dst, 16,
+ b->dst_stride, b->eob);
+ } else {
+ vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor,
+ *(b->base_dst) + b->dst, 16, b->dst_stride);
+ }
+ }
+ } else {
+ BLOCKD *b = &xd->block[24];
+ assert(get_2nd_order_usage(xd) == 1);
+ vp9_dequantize_b(b);
+ if (xd->eobs[24] > 1) {
+ vp9_short_inv_walsh4x4(&b->dqcoeff[0], b->diff);
+ ((int *)b->qcoeff)[0] = 0;
+ ((int *)b->qcoeff)[1] = 0;
+ ((int *)b->qcoeff)[2] = 0;
+ ((int *)b->qcoeff)[3] = 0;
+ ((int *)b->qcoeff)[4] = 0;
+ ((int *)b->qcoeff)[5] = 0;
+ ((int *)b->qcoeff)[6] = 0;
+ ((int *)b->qcoeff)[7] = 0;
+ } else {
+ xd->inv_walsh4x4_1(&b->dqcoeff[0], b->diff);
+ ((int *)b->qcoeff)[0] = 0;
+ }
+ vp9_dequantize_b(b);
+ pbi->dc_idct_add_y_block(xd->qcoeff,
+ xd->block[0].dequant,
+ xd->predictor,
+ xd->dst.y_buffer,
+ xd->dst.y_stride,
+ xd->eobs,
+ xd->block[24].diff);
+ }
+ pbi->idct_add_uv_block(xd->qcoeff + 16 * 16,
+ xd->block[16].dequant,
+ xd->predictor + 16 * 16,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.uv_stride,
+ xd->eobs + 16);
+ }
+}
+
+#if CONFIG_SUPERBLOCKS
+static void decode_16x16_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
+ BOOL_DECODER* const bc, int n) {
+ int x_idx = n & 1, y_idx = n >> 1;
+ TX_TYPE tx_type = get_tx_type_16x16(xd, &xd->block[0]);
+ if (tx_type != DCT_DCT) {
+ vp9_ht_dequant_idct_add_16x16_c(
+ tx_type, xd->qcoeff, xd->block[0].dequant,
+ xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
+ xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
+ xd->dst.y_stride, xd->dst.y_stride, xd->block[0].eob);
+ } else {
+ vp9_dequant_idct_add_16x16(
+ xd->qcoeff, xd->block[0].dequant,
+ xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
+ xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[0]);
+ }
+ vp9_dequant_idct_add_uv_block_8x8_inplace_c(
+ xd->qcoeff + 16 * 16,
+ xd->block[16].dequant,
+ xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
+ xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
+ xd->dst.uv_stride, xd->eobs + 16, xd);
+};
+
+static void decode_8x8_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
+ BOOL_DECODER* const bc, int n) {
+ BLOCKD *b = &xd->block[24];
+ int x_idx = n & 1, y_idx = n >> 1;
+ TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[0]);
+ if (tx_type != DCT_DCT) {
+ int i;
+ for (i = 0; i < 4; i++) {
+ int ib = vp9_i8x8_block[i];
+ int idx = (ib & 0x02) ? (ib + 2) : ib;
+ short *q = xd->block[idx].qcoeff;
+ short *dq = xd->block[0].dequant;
+ int stride = xd->dst.y_stride;
+ BLOCKD *b = &xd->block[ib];
+ tx_type = get_tx_type_8x8(xd, &xd->block[ib]);
+ if (tx_type != DCT_DCT) {
+ vp9_ht_dequant_idct_add_8x8_c(
+ tx_type, q, dq,
+ xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride
+ + x_idx * 16 + (i & 1) * 8,
+ xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride
+ + x_idx * 16 + (i & 1) * 8,
+ stride, stride, b->eob);
+ } else {
+ vp9_dequant_idct_add_8x8_c(
+ q, dq,
+ xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride
+ + x_idx * 16 + (i & 1) * 8,
+ xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride
+ + x_idx * 16 + (i & 1) * 8,
+ stride, stride, 0, b->eob);
+ }
+ vp9_dequant_idct_add_uv_block_8x8_inplace_c(
+ xd->qcoeff + 16 * 16, xd->block[16].dequant,
+ xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
+ xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
+ xd->dst.uv_stride, xd->eobs + 16, xd);
+ }
+ } else {
+ vp9_dequantize_b_2x2(b);
+ vp9_short_ihaar2x2(&b->dqcoeff[0], b->diff, 8);
+ ((int *)b->qcoeff)[0] = 0; // 2nd order block are set to 0 after idct
+ ((int *)b->qcoeff)[1] = 0;
+ ((int *)b->qcoeff)[2] = 0;
+ ((int *)b->qcoeff)[3] = 0;
+ ((int *)b->qcoeff)[4] = 0;
+ ((int *)b->qcoeff)[5] = 0;
+ ((int *)b->qcoeff)[6] = 0;
+ ((int *)b->qcoeff)[7] = 0;
+ vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(
+ xd->qcoeff, xd->block[0].dequant,
+ xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
+ xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd);
+ vp9_dequant_idct_add_uv_block_8x8_inplace_c(
+ xd->qcoeff + 16 * 16, xd->block[16].dequant,
+ xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
+ xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
+ xd->dst.uv_stride, xd->eobs + 16, xd);
+ }
+};
+
+static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
+ BOOL_DECODER* const bc, int n) {
+ BLOCKD *b = &xd->block[24];
+ int x_idx = n & 1, y_idx = n >> 1;
+ TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[0]);
+ if (tx_type != DCT_DCT) {
+ int i;
+ for (i = 0; i < 16; i++) {
+ BLOCKD *b = &xd->block[i];
+ tx_type = get_tx_type_4x4(xd, b);
+ if (tx_type != DCT_DCT) {
+ vp9_ht_dequant_idct_add_c(
+ tx_type, b->qcoeff, b->dequant,
+ xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride
+ + x_idx * 16 + (i & 3) * 4,
+ xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride
+ + x_idx * 16 + (i & 3) * 4,
+ xd->dst.y_stride, xd->dst.y_stride, b->eob);
+ } else {
+ vp9_dequant_idct_add_c(
+ b->qcoeff, b->dequant,
+ xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride
+ + x_idx * 16 + (i & 3) * 4,
+ xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride
+ + x_idx * 16 + (i & 3) * 4,
+ xd->dst.y_stride, xd->dst.y_stride);
+ }
+ }
+ } else {
+ vp9_dequantize_b(b);
+ if (xd->eobs[24] > 1) {
+ vp9_short_inv_walsh4x4(&b->dqcoeff[0], b->diff);
+ ((int *)b->qcoeff)[0] = 0;
+ ((int *)b->qcoeff)[1] = 0;
+ ((int *)b->qcoeff)[2] = 0;
+ ((int *)b->qcoeff)[3] = 0;
+ ((int *)b->qcoeff)[4] = 0;
+ ((int *)b->qcoeff)[5] = 0;
+ ((int *)b->qcoeff)[6] = 0;
+ ((int *)b->qcoeff)[7] = 0;
+ } else {
+ xd->inv_walsh4x4_1(&b->dqcoeff[0], b->diff);
+ ((int *)b->qcoeff)[0] = 0;
+ }
+ vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(
+ xd->qcoeff, xd->block[0].dequant,
+ xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
+ xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd);
+ }
+ vp9_dequant_idct_add_uv_block_4x4_inplace_c(
+ xd->qcoeff + 16 * 16, xd->block[16].dequant,
+ xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
+ xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
+ xd->dst.uv_stride, xd->eobs + 16, xd);
+};
+
+static void decode_superblock(VP9D_COMP *pbi, MACROBLOCKD *xd,
+ int mb_row, unsigned int mb_col,
+ BOOL_DECODER* const bc) {
+ int i, n, eobtotal;
+ TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
+ VP9_COMMON *const pc = &pbi->common;
+ MODE_INFO *orig_mi = xd->mode_info_context;
+
+ assert(xd->mode_info_context->mbmi.encoded_as_sb);
+
+ if (pbi->common.frame_type != KEY_FRAME)
+ vp9_setup_interp_filters(xd, xd->mode_info_context->mbmi.interp_filter, pc);
+
+ // re-initialize macroblock dequantizer before detokenization
+ if (xd->segmentation_enabled)
+ mb_init_dequantizer(pbi, xd);
+
+ if (xd->mode_info_context->mbmi.mb_skip_coeff) {
+ vp9_reset_mb_tokens_context(xd);
+ if (mb_col < pc->mb_cols - 1)
+ xd->above_context++;
+ if (mb_row < pc->mb_rows - 1)
+ xd->left_context++;
+ vp9_reset_mb_tokens_context(xd);
+ if (mb_col < pc->mb_cols - 1)
+ xd->above_context--;
+ if (mb_row < pc->mb_rows - 1)
+ xd->left_context--;
+
+ /* Special case: Force the loopfilter to skip when eobtotal and
+ * mb_skip_coeff are zero.
+ */
+ skip_recon_mb(pbi, xd);
+ return;
+ }
+
+ /* do prediction */
+ if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
+ vp9_build_intra_predictors_sby_s(xd);
+ vp9_build_intra_predictors_sbuv_s(xd);
+ } else {
+ vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer,
+ xd->dst.u_buffer, xd->dst.v_buffer,
+ xd->dst.y_stride, xd->dst.uv_stride);
+ }
+
+ /* dequantization and idct */
+ for (n = 0; n < 4; n++) {
+ int x_idx = n & 1, y_idx = n >> 1;
+
+ if (mb_col + x_idx >= pc->mb_cols || mb_row + y_idx >= pc->mb_rows)
+ continue;
+
+
+ xd->above_context = pc->above_context + mb_col + x_idx;
+ xd->left_context = pc->left_context + y_idx;
+ xd->mode_info_context = orig_mi + x_idx + y_idx * pc->mode_info_stride;
+ for (i = 0; i < 25; i++) {
+ xd->block[i].eob = 0;
+ xd->eobs[i] = 0;
+ }
+
+ eobtotal = vp9_decode_mb_tokens(pbi, xd, bc);
+ if (eobtotal == 0) { // skip loopfilter
+ xd->mode_info_context->mbmi.mb_skip_coeff = 1;
+ continue;
+ }
+
+ if (tx_size == TX_16X16) {
+ decode_16x16_sb(pbi, xd, bc, n);
+ } else if (tx_size == TX_8X8) {
+ decode_8x8_sb(pbi, xd, bc, n);
+ } else {
+ decode_4x4_sb(pbi, xd, bc, n);
+ }
+ }
+
+ xd->above_context = pc->above_context + mb_col;
+ xd->left_context = pc->left_context;
+ xd->mode_info_context = orig_mi;
+}
+#endif
+
+static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd,
+ int mb_row, unsigned int mb_col,
+ BOOL_DECODER* const bc) {
+ int eobtotal = 0;
+ MB_PREDICTION_MODE mode;
+ int i;
+ int tx_size;
+
+#if CONFIG_SUPERBLOCKS
+ assert(!xd->mode_info_context->mbmi.encoded_as_sb);
+#endif
+
+ // re-initialize macroblock dequantizer before detokenization
+ if (xd->segmentation_enabled)
+ mb_init_dequantizer(pbi, xd);
+
+ tx_size = xd->mode_info_context->mbmi.txfm_size;
+ mode = xd->mode_info_context->mbmi.mode;
+
+ if (xd->mode_info_context->mbmi.mb_skip_coeff) {
+ vp9_reset_mb_tokens_context(xd);
+ } else if (!bool_error(bc)) {
+ for (i = 0; i < 25; i++) {
+ xd->block[i].eob = 0;
+ xd->eobs[i] = 0;
+ }
+ if (mode != B_PRED) {
+ eobtotal = vp9_decode_mb_tokens(pbi, xd, bc);
+ }
+ }
+
+ //mode = xd->mode_info_context->mbmi.mode;
+ if (pbi->common.frame_type != KEY_FRAME)
+ vp9_setup_interp_filters(xd, xd->mode_info_context->mbmi.interp_filter,
+ &pbi->common);
+
+ if (eobtotal == 0 && mode != B_PRED && mode != SPLITMV
+ && mode != I8X8_PRED
+ && !bool_error(bc)) {
+ /* Special case: Force the loopfilter to skip when eobtotal and
+ * mb_skip_coeff are zero.
+ * */
+ xd->mode_info_context->mbmi.mb_skip_coeff = 1;
+ skip_recon_mb(pbi, xd);
+ return;
+ }
+#ifdef DEC_DEBUG
+ if (dec_debug)
+ printf("Decoding mb: %d %d\n", xd->mode_info_context->mbmi.mode, tx_size);
+#endif
+
+ // moved to be performed before detokenization
+// if (xd->segmentation_enabled)
+// mb_init_dequantizer(pbi, xd);
+
+ /* do prediction */
+ if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
+ if (mode != I8X8_PRED) {
+ vp9_build_intra_predictors_mbuv(xd);
+ if (mode != B_PRED) {
+ vp9_build_intra_predictors_mby(xd);
+ }
+ }
+ } else {
+#ifdef DEC_DEBUG
+ if (dec_debug)
+ printf("Decoding mb: %d %d interp %d\n",
+ xd->mode_info_context->mbmi.mode, tx_size,
+ xd->mode_info_context->mbmi.interp_filter);
+#endif
+ vp9_build_inter_predictors_mb(xd);
+ }
+
+ if (tx_size == TX_16X16) {
+ decode_16x16(pbi, xd, bc);
+ } else if (tx_size == TX_8X8) {
+ decode_8x8(pbi, xd, bc);
+ } else {
+ decode_4x4(pbi, xd, bc);
+ }
+#ifdef DEC_DEBUG
+ if (dec_debug) {
+ int i, j;
+ printf("\n");
+ printf("final y\n");
+ for (i = 0; i < 16; i++) {
+ for (j = 0; j < 16; j++)
+ printf("%3d ", xd->dst.y_buffer[i * xd->dst.y_stride + j]);
+ printf("\n");
+ }
+ printf("\n");
+ printf("final u\n");
+ for (i = 0; i < 8; i++) {
+ for (j = 0; j < 8; j++)
+ printf("%3d ", xd->dst.u_buffer[i * xd->dst.uv_stride + j]);
+ printf("\n");
+ }
+ printf("\n");
+ printf("final v\n");
+ for (i = 0; i < 8; i++) {
+ for (j = 0; j < 8; j++)
+ printf("%3d ", xd->dst.v_buffer[i * xd->dst.uv_stride + j]);
+ printf("\n");
+ }
+ fflush(stdout);
+ }
+#endif
+}
+
+
+static int get_delta_q(vp9_reader *bc, int prev, int *q_update) {
+ int ret_val = 0;
+
+ if (vp9_read_bit(bc)) {
+ ret_val = vp9_read_literal(bc, 4);
+
+ if (vp9_read_bit(bc))
+ ret_val = -ret_val;
+ }
+
+ /* Trigger a quantizer update if the delta-q value has changed */
+ if (ret_val != prev)
+ *q_update = 1;
+
+ return ret_val;
+}
+
+#ifdef PACKET_TESTING
+#include <stdio.h>
+FILE *vpxlog = 0;
+#endif
+
+/* Decode a row of Superblocks (2x2 region of MBs) */
+static void
+decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc, int mbrow, MACROBLOCKD *xd,
+ BOOL_DECODER* const bc) {
+ int i;
+ int sb_col;
+ int mb_row, mb_col;
+ int recon_yoffset, recon_uvoffset;
+ int ref_fb_idx = pc->lst_fb_idx;
+ int dst_fb_idx = pc->new_fb_idx;
+ int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
+ int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
+ int row_delta[4] = { 0, +1, 0, -1};
+ int col_delta[4] = { +1, -1, +1, +1};
+ int sb_cols = (pc->mb_cols + 1) >> 1;
+
+ // For a SB there are 2 left contexts, each pertaining to a MB row within
+ vpx_memset(pc->left_context, 0, sizeof(pc->left_context));
+
+ mb_row = mbrow;
+ mb_col = 0;
+
+ for (sb_col = 0; sb_col < sb_cols; sb_col++) {
+#if CONFIG_SUPERBLOCKS
+ MODE_INFO *mi = xd->mode_info_context;
+
+ mi->mbmi.encoded_as_sb = vp9_read(bc, pc->sb_coded);
+#endif
+
+ // Process the 4 MBs within the SB in the order:
+ // top-left, top-right, bottom-left, bottom-right
+ for (i = 0; i < 4; i++) {
+ int dy = row_delta[i];
+ int dx = col_delta[i];
+ int offset_extended = dy * xd->mode_info_stride + dx;
+
+ xd->mb_index = i;
+
+#if CONFIG_SUPERBLOCKS
+ mi = xd->mode_info_context;
+#endif
+ if ((mb_row >= pc->mb_rows) || (mb_col >= pc->mb_cols)) {
+ // MB lies outside frame, skip on to next
+ mb_row += dy;
+ mb_col += dx;
+ xd->mode_info_context += offset_extended;
+ xd->prev_mode_info_context += offset_extended;
+ continue;
+ }
+#if CONFIG_SUPERBLOCKS
+ if (i)
+ mi->mbmi.encoded_as_sb = 0;
+#endif
+
+ // Set above context pointer
+ xd->above_context = pc->above_context + mb_col;
+ xd->left_context = pc->left_context + (i >> 1);
+
+ /* Distance of Mb to the various image edges.
+ * These are specified to 8th pel as they are always compared to
+ * values that are in 1/8th pel units
+ */
+ xd->mb_to_top_edge = -((mb_row * 16)) << 3;
+ xd->mb_to_left_edge = -((mb_col * 16) << 3);
+#if CONFIG_SUPERBLOCKS
+ if (mi->mbmi.encoded_as_sb) {
+ xd->mb_to_bottom_edge = ((pc->mb_rows - 2 - mb_row) * 16) << 3;
+ xd->mb_to_right_edge = ((pc->mb_cols - 2 - mb_col) * 16) << 3;
+ } else {
+#endif
+ xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
+ xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
+#if CONFIG_SUPERBLOCKS
+ }
+#endif
+#ifdef DEC_DEBUG
+ dec_debug = (pbi->common.current_video_frame == 1 &&
+ mb_row == 2 && mb_col == 8);
+ if (dec_debug)
+#if CONFIG_SUPERBLOCKS
+ printf("Enter Debug %d %d sb %d\n", mb_row, mb_col,
+ mi->mbmi.encoded_as_sb);
+#else
+ printf("Enter Debug %d %d\n", mb_row, mb_col);
+#endif
+#endif
+ xd->up_available = (mb_row != 0);
+ xd->left_available = (mb_col != 0);
+
+
+ recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16);
+ recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8);
+
+ xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+ xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+ xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
+
+ vp9_decode_mb_mode_mv(pbi, xd, mb_row, mb_col, bc);
+
+ update_blockd_bmi(xd);
+#ifdef DEC_DEBUG
+ if (dec_debug)
+ printf("Hello\n");
+#endif
+
+ /* Select the appropriate reference frame for this MB */
+ if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
+ ref_fb_idx = pc->lst_fb_idx;
+ else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
+ ref_fb_idx = pc->gld_fb_idx;
+ else
+ ref_fb_idx = pc->alt_fb_idx;
+
+ xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
+ xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
+ xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
+
+ if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
+ int second_ref_fb_idx;
+
+ /* Select the appropriate reference frame for this MB */
+ if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME)
+ second_ref_fb_idx = pc->lst_fb_idx;
+ else if (xd->mode_info_context->mbmi.second_ref_frame ==
+ GOLDEN_FRAME)
+ second_ref_fb_idx = pc->gld_fb_idx;
+ else
+ second_ref_fb_idx = pc->alt_fb_idx;
+
+ xd->second_pre.y_buffer =
+ pc->yv12_fb[second_ref_fb_idx].y_buffer + recon_yoffset;
+ xd->second_pre.u_buffer =
+ pc->yv12_fb[second_ref_fb_idx].u_buffer + recon_uvoffset;
+ xd->second_pre.v_buffer =
+ pc->yv12_fb[second_ref_fb_idx].v_buffer + recon_uvoffset;
+ }
+
+ if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME) {
+ /* propagate errors from reference frames */
+ xd->corrupted |= pc->yv12_fb[ref_fb_idx].corrupted;
+ }
+
+#if CONFIG_SUPERBLOCKS
+ if (xd->mode_info_context->mbmi.encoded_as_sb) {
+ if (mb_col < pc->mb_cols - 1)
+ mi[1] = mi[0];
+ if (mb_row < pc->mb_rows - 1) {
+ mi[pc->mode_info_stride] = mi[0];
+ if (mb_col < pc->mb_cols - 1)
+ mi[pc->mode_info_stride + 1] = mi[0];
+ }
+ }
+ if (xd->mode_info_context->mbmi.encoded_as_sb) {
+ decode_superblock(pbi, xd, mb_row, mb_col, bc);
+ } else {
+#endif
+ vp9_intra_prediction_down_copy(xd);
+ decode_macroblock(pbi, xd, mb_row, mb_col, bc);
+#if CONFIG_SUPERBLOCKS
+ }
+#endif
+
+ /* check if the boolean decoder has suffered an error */
+ xd->corrupted |= bool_error(bc);
+
+#if CONFIG_SUPERBLOCKS
+ if (mi->mbmi.encoded_as_sb) {
+ assert(!i);
+ mb_col += 2;
+ xd->mode_info_context += 2;
+ xd->prev_mode_info_context += 2;
+ break;
+ }
+#endif
+
+ // skip to next MB
+ xd->mode_info_context += offset_extended;
+ xd->prev_mode_info_context += offset_extended;
+ mb_row += dy;
+ mb_col += dx;
+ }
+ }
+
+ /* skip prediction column */
+ xd->mode_info_context += 1 - (pc->mb_cols & 0x1) + xd->mode_info_stride;
+ xd->prev_mode_info_context += 1 - (pc->mb_cols & 0x1) + xd->mode_info_stride;
+}
+
+static unsigned int read_partition_size(const unsigned char *cx_size) {
+ const unsigned int size =
+ cx_size[0] + (cx_size[1] << 8) + (cx_size[2] << 16);
+ return size;
+}
+
+static int read_is_valid(const unsigned char *start,
+ size_t len,
+ const unsigned char *end) {
+ return (start + len > start && start + len <= end);
+}
+
+
+static void setup_token_decoder(VP9D_COMP *pbi,
+ const unsigned char *cx_data,
+ BOOL_DECODER* const bool_decoder) {
+ VP9_COMMON *pc = &pbi->common;
+ const unsigned char *user_data_end = pbi->Source + pbi->source_sz;
+ const unsigned char *partition;
+
+ ptrdiff_t partition_size;
+ ptrdiff_t bytes_left;
+
+ // Set up pointers to token partition
+ partition = cx_data;
+ bytes_left = user_data_end - partition;
+ partition_size = bytes_left;
+
+ /* Validate the calculated partition length. If the buffer
+ * described by the partition can't be fully read, then restrict
+ * it to the portion that can be (for EC mode) or throw an error.
+ */
+ if (!read_is_valid(partition, partition_size, user_data_end)) {
+ vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
+ "Truncated packet or corrupt partition "
+ "%d length", 1);
+ }
+
+ if (vp9_start_decode(bool_decoder,
+ partition, (unsigned int)partition_size))
+ vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate bool decoder %d", 1);
+}
+
+static void init_frame(VP9D_COMP *pbi) {
+ VP9_COMMON *const pc = &pbi->common;
+ MACROBLOCKD *const xd = &pbi->mb;
+
+ if (pc->frame_type == KEY_FRAME) {
+
+ if (pc->last_frame_seg_map)
+ vpx_memset(pc->last_frame_seg_map, 0, (pc->mb_rows * pc->mb_cols));
+
+ vp9_init_mv_probs(pc);
+
+ vp9_init_mbmode_probs(pc);
+ vp9_default_bmode_probs(pc->fc.bmode_prob);
+
+ vp9_default_coef_probs(pc);
+ vp9_kf_default_bmode_probs(pc->kf_bmode_prob);
+
+ // Reset the segment feature data to the default stats:
+ // Features disabled, 0, with delta coding (Default state).
+ vp9_clearall_segfeatures(xd);
+
+ xd->mb_segment_abs_delta = SEGMENT_DELTADATA;
+
+ /* reset the mode ref deltasa for loop filter */
+ vpx_memset(xd->ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas));
+ vpx_memset(xd->mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas));
+
+ /* All buffers are implicitly updated on key frames. */
+ pc->refresh_golden_frame = 1;
+ pc->refresh_alt_ref_frame = 1;
+ pc->copy_buffer_to_gf = 0;
+ pc->copy_buffer_to_arf = 0;
+
+ /* Note that Golden and Altref modes cannot be used on a key frame so
+ * ref_frame_sign_bias[] is undefined and meaningless
+ */
+ pc->ref_frame_sign_bias[GOLDEN_FRAME] = 0;
+ pc->ref_frame_sign_bias[ALTREF_FRAME] = 0;
+
+ vp9_init_mode_contexts(&pbi->common);
+ vpx_memcpy(&pc->lfc, &pc->fc, sizeof(pc->fc));
+ vpx_memcpy(&pc->lfc_a, &pc->fc, sizeof(pc->fc));
+
+ vpx_memset(pc->prev_mip, 0,
+ (pc->mb_cols + 1) * (pc->mb_rows + 1)* sizeof(MODE_INFO));
+ vpx_memset(pc->mip, 0,
+ (pc->mb_cols + 1) * (pc->mb_rows + 1)* sizeof(MODE_INFO));
+
+ vp9_update_mode_info_border(pc, pc->mip);
+ vp9_update_mode_info_in_image(pc, pc->mi);
+
+
+ } else {
+
+ if (!pc->use_bilinear_mc_filter)
+ pc->mcomp_filter_type = EIGHTTAP;
+ else
+ pc->mcomp_filter_type = BILINEAR;
+
+ /* To enable choice of different interpolation filters */
+ vp9_setup_interp_filters(xd, pc->mcomp_filter_type, pc);
+ }
+
+ xd->mode_info_context = pc->mi;
+ xd->prev_mode_info_context = pc->prev_mi;
+ xd->frame_type = pc->frame_type;
+ xd->mode_info_context->mbmi.mode = DC_PRED;
+ xd->mode_info_stride = pc->mode_info_stride;
+ xd->corrupted = 0; /* init without corruption */
+
+ xd->fullpixel_mask = 0xffffffff;
+ if (pc->full_pixel)
+ xd->fullpixel_mask = 0xfffffff8;
+
+}
+
+static void read_coef_probs_common(
+ BOOL_DECODER* const bc,
+ vp9_prob coef_probs[BLOCK_TYPES][COEF_BANDS]
+ [PREV_COEF_CONTEXTS][ENTROPY_NODES]) {
+ int i, j, k, l;
+
+ if (vp9_read_bit(bc)) {
+ for (i = 0; i < BLOCK_TYPES; i++) {
+ for (j = !i; j < COEF_BANDS; j++) {
+ /* NB: This j loop starts from 1 on block type i == 0 */
+ for (k = 0; k < PREV_COEF_CONTEXTS; k++) {
+ if (k >= 3 && ((i == 0 && j == 1) ||
+ (i > 0 && j == 0)))
+ continue;
+ for (l = 0; l < ENTROPY_NODES; l++) {
+ vp9_prob *const p = coef_probs[i][j][k] + l;
+
+ if (vp9_read(bc, COEF_UPDATE_PROB)) {
+ *p = read_prob_diff_update(bc, *p);
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+static void read_coef_probs(VP9D_COMP *pbi, BOOL_DECODER* const bc) {
+ VP9_COMMON *const pc = &pbi->common;
+
+ read_coef_probs_common(bc, pc->fc.coef_probs);
+ read_coef_probs_common(bc, pc->fc.hybrid_coef_probs);
+
+ if (pbi->common.txfm_mode != ONLY_4X4) {
+ read_coef_probs_common(bc, pc->fc.coef_probs_8x8);
+ read_coef_probs_common(bc, pc->fc.hybrid_coef_probs_8x8);
+ }
+ if (pbi->common.txfm_mode > ALLOW_8X8) {
+ read_coef_probs_common(bc, pc->fc.coef_probs_16x16);
+ read_coef_probs_common(bc, pc->fc.hybrid_coef_probs_16x16);
+ }
+}
+
+int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) {
+ BOOL_DECODER header_bc, residual_bc;
+ VP9_COMMON *const pc = &pbi->common;
+ MACROBLOCKD *const xd = &pbi->mb;
+ const unsigned char *data = (const unsigned char *)pbi->Source;
+ const unsigned char *data_end = data + pbi->source_sz;
+ ptrdiff_t first_partition_length_in_bytes = 0;
+
+ int mb_row;
+ int i, j;
+ int corrupt_tokens = 0;
+
+ /* start with no corruption of current frame */
+ xd->corrupted = 0;
+ pc->yv12_fb[pc->new_fb_idx].corrupted = 0;
+
+ if (data_end - data < 3) {
+ vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
+ "Truncated packet");
+ } else {
+ pc->last_frame_type = pc->frame_type;
+ pc->frame_type = (FRAME_TYPE)(data[0] & 1);
+ pc->version = (data[0] >> 1) & 7;
+ pc->show_frame = (data[0] >> 4) & 1;
+ first_partition_length_in_bytes =
+ (data[0] | (data[1] << 8) | (data[2] << 16)) >> 5;
+
+ if ((data + first_partition_length_in_bytes > data_end
+ || data + first_partition_length_in_bytes < data))
+ vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
+ "Truncated packet or corrupt partition 0 length");
+
+ data += 3;
+
+ vp9_setup_version(pc);
+
+ if (pc->frame_type == KEY_FRAME) {
+ const int Width = pc->Width;
+ const int Height = pc->Height;
+
+ /* vet via sync code */
+ /* When error concealment is enabled we should only check the sync
+ * code if we have enough bits available
+ */
+ if (data + 3 < data_end) {
+ if (data[0] != 0x9d || data[1] != 0x01 || data[2] != 0x2a)
+ vpx_internal_error(&pc->error, VPX_CODEC_UNSUP_BITSTREAM,
+ "Invalid frame sync code");
+ }
+
+ /* If error concealment is enabled we should only parse the new size
+ * if we have enough data. Otherwise we will end up with the wrong
+ * size.
+ */
+ if (data + 6 < data_end) {
+ pc->Width = (data[3] | (data[4] << 8)) & 0x3fff;
+ pc->horiz_scale = data[4] >> 6;
+ pc->Height = (data[5] | (data[6] << 8)) & 0x3fff;
+ pc->vert_scale = data[6] >> 6;
+ }
+ data += 7;
+
+ if (Width != pc->Width || Height != pc->Height) {
+ if (pc->Width <= 0) {
+ pc->Width = Width;
+ vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
+ "Invalid frame width");
+ }
+
+ if (pc->Height <= 0) {
+ pc->Height = Height;
+ vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
+ "Invalid frame height");
+ }
+
+ if (vp9_alloc_frame_buffers(pc, pc->Width, pc->Height))
+ vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate frame buffers");
+ }
+ }
+ }
+#ifdef DEC_DEBUG
+ printf("Decode frame %d\n", pc->current_video_frame);
+#endif
+
+ if ((!pbi->decoded_key_frame && pc->frame_type != KEY_FRAME) ||
+ pc->Width == 0 || pc->Height == 0) {
+ return -1;
+ }
+
+ init_frame(pbi);
+
+ if (vp9_start_decode(&header_bc, data,
+ (unsigned int)first_partition_length_in_bytes))
+ vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate bool decoder 0");
+ if (pc->frame_type == KEY_FRAME) {
+ pc->clr_type = (YUV_TYPE)vp9_read_bit(&header_bc);
+ pc->clamp_type = (CLAMP_TYPE)vp9_read_bit(&header_bc);
+ }
+
+ /* Is segmentation enabled */
+ xd->segmentation_enabled = (unsigned char)vp9_read_bit(&header_bc);
+
+ if (xd->segmentation_enabled) {
+ // Read whether or not the segmentation map is being explicitly
+ // updated this frame.
+ xd->update_mb_segmentation_map = (unsigned char)vp9_read_bit(&header_bc);
+
+ // If so what method will be used.
+ if (xd->update_mb_segmentation_map) {
+ // Which macro block level features are enabled
+
+ // Read the probs used to decode the segment id for each macro
+ // block.
+ for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) {
+ xd->mb_segment_tree_probs[i] = vp9_read_bit(&header_bc) ?
+ (vp9_prob)vp9_read_literal(&header_bc, 8) : 255;
+ }
+
+ // Read the prediction probs needed to decode the segment id
+ pc->temporal_update = (unsigned char)vp9_read_bit(&header_bc);
+ for (i = 0; i < PREDICTION_PROBS; i++) {
+ if (pc->temporal_update) {
+ pc->segment_pred_probs[i] = vp9_read_bit(&header_bc) ?
+ (vp9_prob)vp9_read_literal(&header_bc, 8) : 255;
+ } else {
+ pc->segment_pred_probs[i] = 255;
+ }
+ }
+ }
+ // Is the segment data being updated
+ xd->update_mb_segmentation_data = (unsigned char)vp9_read_bit(&header_bc);
+
+ if (xd->update_mb_segmentation_data) {
+ int data;
+
+ xd->mb_segment_abs_delta = (unsigned char)vp9_read_bit(&header_bc);
+
+ vp9_clearall_segfeatures(xd);
+
+ // For each segmentation...
+ for (i = 0; i < MAX_MB_SEGMENTS; i++) {
+ // For each of the segments features...
+ for (j = 0; j < SEG_LVL_MAX; j++) {
+ // Is the feature enabled
+ if (vp9_read_bit(&header_bc)) {
+ // Update the feature data and mask
+ vp9_enable_segfeature(xd, i, j);
+
+ data = vp9_decode_unsigned_max(&header_bc,
+ vp9_seg_feature_data_max(j));
+
+ // Is the segment data signed..
+ if (vp9_is_segfeature_signed(j)) {
+ if (vp9_read_bit(&header_bc))
+ data = -data;
+ }
+ } else
+ data = 0;
+
+ vp9_set_segdata(xd, i, j, data);
+ }
+ }
+ }
+ }
+
+ // Read common prediction model status flag probability updates for the
+ // reference frame
+ if (pc->frame_type == KEY_FRAME) {
+ // Set the prediction probabilities to defaults
+ pc->ref_pred_probs[0] = 120;
+ pc->ref_pred_probs[1] = 80;
+ pc->ref_pred_probs[2] = 40;
+
+ } else {
+ for (i = 0; i < PREDICTION_PROBS; i++) {
+ if (vp9_read_bit(&header_bc))
+ pc->ref_pred_probs[i] = (vp9_prob)vp9_read_literal(&header_bc, 8);
+ }
+ }
+
+#if CONFIG_SUPERBLOCKS
+ pc->sb_coded = vp9_read_literal(&header_bc, 8);
+#endif
+
+ /* Read the loop filter level and type */
+ pc->txfm_mode = vp9_read_literal(&header_bc, 2);
+ if (pc->txfm_mode == TX_MODE_SELECT) {
+ pc->prob_tx[0] = vp9_read_literal(&header_bc, 8);
+ pc->prob_tx[1] = vp9_read_literal(&header_bc, 8);
+ }
+
+ pc->filter_type = (LOOPFILTERTYPE) vp9_read_bit(&header_bc);
+ pc->filter_level = vp9_read_literal(&header_bc, 6);
+ pc->sharpness_level = vp9_read_literal(&header_bc, 3);
+
+ /* Read in loop filter deltas applied at the MB level based on mode or ref frame. */
+ xd->mode_ref_lf_delta_update = 0;
+ xd->mode_ref_lf_delta_enabled = (unsigned char)vp9_read_bit(&header_bc);
+
+ if (xd->mode_ref_lf_delta_enabled) {
+ /* Do the deltas need to be updated */
+ xd->mode_ref_lf_delta_update = (unsigned char)vp9_read_bit(&header_bc);
+
+ if (xd->mode_ref_lf_delta_update) {
+ /* Send update */
+ for (i = 0; i < MAX_REF_LF_DELTAS; i++) {
+ if (vp9_read_bit(&header_bc)) {
+ /*sign = vp9_read_bit( &header_bc );*/
+ xd->ref_lf_deltas[i] = (signed char)vp9_read_literal(&header_bc, 6);
+
+ if (vp9_read_bit(&header_bc)) /* Apply sign */
+ xd->ref_lf_deltas[i] = xd->ref_lf_deltas[i] * -1;
+ }
+ }
+
+ /* Send update */
+ for (i = 0; i < MAX_MODE_LF_DELTAS; i++) {
+ if (vp9_read_bit(&header_bc)) {
+ /*sign = vp9_read_bit( &header_bc );*/
+ xd->mode_lf_deltas[i] = (signed char)vp9_read_literal(&header_bc, 6);
+
+ if (vp9_read_bit(&header_bc)) /* Apply sign */
+ xd->mode_lf_deltas[i] = xd->mode_lf_deltas[i] * -1;
+ }
+ }
+ }
+ }
+
+ // Dummy read for now
+ vp9_read_literal(&header_bc, 2);
+
+ setup_token_decoder(pbi, data + first_partition_length_in_bytes,
+ &residual_bc);
+
+ /* Read the default quantizers. */
+ {
+ int Q, q_update;
+
+ Q = vp9_read_literal(&header_bc, QINDEX_BITS);
+ pc->base_qindex = Q;
+ q_update = 0;
+ /* AC 1st order Q = default */
+ pc->y1dc_delta_q = get_delta_q(&header_bc, pc->y1dc_delta_q, &q_update);
+ pc->y2dc_delta_q = get_delta_q(&header_bc, pc->y2dc_delta_q, &q_update);
+ pc->y2ac_delta_q = get_delta_q(&header_bc, pc->y2ac_delta_q, &q_update);
+ pc->uvdc_delta_q = get_delta_q(&header_bc, pc->uvdc_delta_q, &q_update);
+ pc->uvac_delta_q = get_delta_q(&header_bc, pc->uvac_delta_q, &q_update);
+
+ if (q_update)
+ vp9_init_de_quantizer(pbi);
+
+ /* MB level dequantizer setup */
+ mb_init_dequantizer(pbi, &pbi->mb);
+ }
+
+ /* Determine if the golden frame or ARF buffer should be updated and how.
+ * For all non key frames the GF and ARF refresh flags and sign bias
+ * flags must be set explicitly.
+ */
+ if (pc->frame_type != KEY_FRAME) {
+ /* Should the GF or ARF be updated from the current frame */
+ pc->refresh_golden_frame = vp9_read_bit(&header_bc);
+ pc->refresh_alt_ref_frame = vp9_read_bit(&header_bc);
+
+ if (pc->refresh_alt_ref_frame) {
+ vpx_memcpy(&pc->fc, &pc->lfc_a, sizeof(pc->fc));
+ } else {
+ vpx_memcpy(&pc->fc, &pc->lfc, sizeof(pc->fc));
+ }
+
+ /* Buffer to buffer copy flags. */
+ pc->copy_buffer_to_gf = 0;
+
+ if (!pc->refresh_golden_frame)
+ pc->copy_buffer_to_gf = vp9_read_literal(&header_bc, 2);
+
+ pc->copy_buffer_to_arf = 0;
+
+ if (!pc->refresh_alt_ref_frame)
+ pc->copy_buffer_to_arf = vp9_read_literal(&header_bc, 2);
+
+ pc->ref_frame_sign_bias[GOLDEN_FRAME] = vp9_read_bit(&header_bc);
+ pc->ref_frame_sign_bias[ALTREF_FRAME] = vp9_read_bit(&header_bc);
+
+ /* Is high precision mv allowed */
+ xd->allow_high_precision_mv = (unsigned char)vp9_read_bit(&header_bc);
+ // Read the type of subpel filter to use
+ if (vp9_read_bit(&header_bc)) {
+ pc->mcomp_filter_type = SWITCHABLE;
+ } else {
+ pc->mcomp_filter_type = vp9_read_literal(&header_bc, 2);
+ }
+#if CONFIG_COMP_INTERINTRA_PRED
+ pc->use_interintra = vp9_read_bit(&header_bc);
+#endif
+ /* To enable choice of different interploation filters */
+ vp9_setup_interp_filters(xd, pc->mcomp_filter_type, pc);
+ }
+
+ pc->refresh_entropy_probs = vp9_read_bit(&header_bc);
+ if (pc->refresh_entropy_probs == 0) {
+ vpx_memcpy(&pc->lfc, &pc->fc, sizeof(pc->fc));
+ }
+
+ pc->refresh_last_frame = (pc->frame_type == KEY_FRAME)
+ || vp9_read_bit(&header_bc);
+
+ // Read inter mode probability context updates
+ if (pc->frame_type != KEY_FRAME) {
+ int i, j;
+ for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
+ for (j = 0; j < 4; j++) {
+ if (vp9_read(&header_bc, 252)) {
+ pc->fc.vp9_mode_contexts[i][j] =
+ (vp9_prob)vp9_read_literal(&header_bc, 8);
+ }
+ }
+ }
+ }
+
+ if (0) {
+ FILE *z = fopen("decodestats.stt", "a");
+ fprintf(z, "%6d F:%d,G:%d,A:%d,L:%d,Q:%d\n",
+ pc->current_video_frame,
+ pc->frame_type,
+ pc->refresh_golden_frame,
+ pc->refresh_alt_ref_frame,
+ pc->refresh_last_frame,
+ pc->base_qindex);
+ fclose(z);
+ }
+
+ vp9_copy(pbi->common.fc.pre_coef_probs,
+ pbi->common.fc.coef_probs);
+ vp9_copy(pbi->common.fc.pre_hybrid_coef_probs,
+ pbi->common.fc.hybrid_coef_probs);
+ vp9_copy(pbi->common.fc.pre_coef_probs_8x8,
+ pbi->common.fc.coef_probs_8x8);
+ vp9_copy(pbi->common.fc.pre_hybrid_coef_probs_8x8,
+ pbi->common.fc.hybrid_coef_probs_8x8);
+ vp9_copy(pbi->common.fc.pre_coef_probs_16x16,
+ pbi->common.fc.coef_probs_16x16);
+ vp9_copy(pbi->common.fc.pre_hybrid_coef_probs_16x16,
+ pbi->common.fc.hybrid_coef_probs_16x16);
+ vp9_copy(pbi->common.fc.pre_ymode_prob, pbi->common.fc.ymode_prob);
+#if CONFIG_SUPERBLOCKS
+ vp9_copy(pbi->common.fc.pre_sb_ymode_prob, pbi->common.fc.sb_ymode_prob);
+#endif
+ vp9_copy(pbi->common.fc.pre_uv_mode_prob, pbi->common.fc.uv_mode_prob);
+ vp9_copy(pbi->common.fc.pre_bmode_prob, pbi->common.fc.bmode_prob);
+ vp9_copy(pbi->common.fc.pre_i8x8_mode_prob, pbi->common.fc.i8x8_mode_prob);
+ vp9_copy(pbi->common.fc.pre_sub_mv_ref_prob, pbi->common.fc.sub_mv_ref_prob);
+ vp9_copy(pbi->common.fc.pre_mbsplit_prob, pbi->common.fc.mbsplit_prob);
+#if CONFIG_COMP_INTERINTRA_PRED
+ pbi->common.fc.pre_interintra_prob = pbi->common.fc.interintra_prob;
+#endif
+ pbi->common.fc.pre_nmvc = pbi->common.fc.nmvc;
+ vp9_zero(pbi->common.fc.coef_counts);
+ vp9_zero(pbi->common.fc.hybrid_coef_counts);
+ vp9_zero(pbi->common.fc.coef_counts_8x8);
+ vp9_zero(pbi->common.fc.hybrid_coef_counts_8x8);
+ vp9_zero(pbi->common.fc.coef_counts_16x16);
+ vp9_zero(pbi->common.fc.hybrid_coef_counts_16x16);
+ vp9_zero(pbi->common.fc.ymode_counts);
+#if CONFIG_SUPERBLOCKS
+ vp9_zero(pbi->common.fc.sb_ymode_counts);
+#endif
+ vp9_zero(pbi->common.fc.uv_mode_counts);
+ vp9_zero(pbi->common.fc.bmode_counts);
+ vp9_zero(pbi->common.fc.i8x8_mode_counts);
+ vp9_zero(pbi->common.fc.sub_mv_ref_counts);
+ vp9_zero(pbi->common.fc.mbsplit_counts);
+ vp9_zero(pbi->common.fc.NMVcount);
+ vp9_zero(pbi->common.fc.mv_ref_ct);
+#if CONFIG_COMP_INTERINTRA_PRED
+ vp9_zero(pbi->common.fc.interintra_counts);
+#endif
+
+ read_coef_probs(pbi, &header_bc);
+
+ vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->lst_fb_idx], sizeof(YV12_BUFFER_CONFIG));
+ vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx], sizeof(YV12_BUFFER_CONFIG));
+
+ // Create the segmentation map structure and set to 0
+ if (!pc->last_frame_seg_map)
+ CHECK_MEM_ERROR(pc->last_frame_seg_map,
+ vpx_calloc((pc->mb_rows * pc->mb_cols), 1));
+
+ /* set up frame new frame for intra coded blocks */
+ vp9_setup_intra_recon(&pc->yv12_fb[pc->new_fb_idx]);
+
+ vp9_setup_block_dptrs(xd);
+
+ vp9_build_block_doffsets(xd);
+
+ /* clear out the coeff buffer */
+ vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
+
+ /* Read the mb_no_coeff_skip flag */
+ pc->mb_no_coeff_skip = (int)vp9_read_bit(&header_bc);
+
+ vp9_decode_mode_mvs_init(pbi, &header_bc);
+
+ vpx_memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols);
+
+ // Resset the macroblock mode info context to the start of the list
+ xd->mode_info_context = pc->mi;
+ xd->prev_mode_info_context = pc->prev_mi;
+
+ /* Decode a row of superblocks */
+ for (mb_row = 0; mb_row < pc->mb_rows; mb_row += 2) {
+ decode_sb_row(pbi, pc, mb_row, xd, &residual_bc);
+ }
+ corrupt_tokens |= xd->corrupted;
+
+ /* Collect information about decoder corruption. */
+ /* 1. Check first boolean decoder for errors. */
+ pc->yv12_fb[pc->new_fb_idx].corrupted = bool_error(&header_bc);
+ /* 2. Check the macroblock information */
+ pc->yv12_fb[pc->new_fb_idx].corrupted |= corrupt_tokens;
+
+ if (!pbi->decoded_key_frame) {
+ if (pc->frame_type == KEY_FRAME &&
+ !pc->yv12_fb[pc->new_fb_idx].corrupted)
+ pbi->decoded_key_frame = 1;
+ else
+ vpx_internal_error(&pbi->common.error, VPX_CODEC_CORRUPT_FRAME,
+ "A stream must start with a complete key frame");
+ }
+
+ vp9_adapt_coef_probs(pc);
+ if (pc->frame_type != KEY_FRAME) {
+ vp9_adapt_mode_probs(pc);
+ vp9_adapt_nmv_probs(pc, xd->allow_high_precision_mv);
+ vp9_update_mode_context(&pbi->common);
+ }
+
+ /* If this was a kf or Gf note the Q used */
+ if ((pc->frame_type == KEY_FRAME) ||
+ pc->refresh_golden_frame || pc->refresh_alt_ref_frame) {
+ pc->last_kf_gf_q = pc->base_qindex;
+ }
+ if (pc->refresh_entropy_probs) {
+ if (pc->refresh_alt_ref_frame)
+ vpx_memcpy(&pc->lfc_a, &pc->fc, sizeof(pc->fc));
+ else
+ vpx_memcpy(&pc->lfc, &pc->fc, sizeof(pc->fc));
+ }
+
+#ifdef PACKET_TESTING
+ {
+ FILE *f = fopen("decompressor.VP8", "ab");
+ unsigned int size = residual_bc.pos + header_bc.pos + 8;
+ fwrite((void *) &size, 4, 1, f);
+ fwrite((void *) pbi->Source, size, 1, f);
+ fclose(f);
+ }
+#endif
+ // printf("Frame %d Done\n", frame_count++);
+
+ /* Find the end of the coded buffer */
+ while (residual_bc.count > CHAR_BIT
+ && residual_bc.count < VP9_BD_VALUE_SIZE) {
+ residual_bc.count -= CHAR_BIT;
+ residual_bc.user_buffer--;
+ }
+ *p_data_end = residual_bc.user_buffer;
+ return 0;
+}
diff --git a/vp9/decoder/vp9_decodframe.h b/vp9/decoder/vp9_decodframe.h
new file mode 100644
index 0000000..2a6547e
--- /dev/null
+++ b/vp9/decoder/vp9_decodframe.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_DECODER_VP9_DECODFRAME_H_
+#define VP9_DECODER_VP9_DECODFRAME_H_
+
+struct VP9Decompressor;
+
+extern void vp9_init_de_quantizer(struct VP9Decompressor *pbi);
+
+#endif // __INC_DECODFRAME_H
diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c
new file mode 100644
index 0000000..39a2de1
--- /dev/null
+++ b/vp9/decoder/vp9_dequantize.c
@@ -0,0 +1,362 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9_rtcd.h"
+#include "vp9/decoder/vp9_dequantize.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vp9/decoder/vp9_onyxd_int.h"
+static void add_residual(const int16_t *diff, const uint8_t *pred, int pitch,
+ uint8_t *dest, int stride, int width, int height) {
+ int r, c;
+
+ for (r = 0; r < height; r++) {
+ for (c = 0; c < width; c++) {
+ int a = diff[c] + pred[c];
+
+ if (a < 0)
+ a = 0;
+ else if (a > 255)
+ a = 255;
+
+ dest[c] = (uint8_t) a;
+ }
+
+ dest += stride;
+ diff += width;
+ pred += pitch;
+ }
+}
+
+static void add_constant_residual(const int16_t diff, const uint8_t *pred,
+ int pitch, uint8_t *dest, int stride,
+ int width, int height) {
+ int r, c;
+
+ for (r = 0; r < height; r++) {
+ for (c = 0; c < width; c++) {
+ int a = diff + pred[c];
+
+ if (a < 0)
+ a = 0;
+ else if (a > 255)
+ a = 255;
+
+ dest[c] = (uint8_t) a;
+ }
+
+ dest += stride;
+ pred += pitch;
+ }
+}
+
+void vp9_dequantize_b_c(BLOCKD *d) {
+
+ int i;
+ int16_t *DQ = d->dqcoeff;
+ const int16_t *Q = d->qcoeff;
+ const int16_t *DQC = d->dequant;
+
+ for (i = 0; i < 16; i++) {
+ DQ[i] = Q[i] * DQC[i];
+ }
+}
+
+
+void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input,
+ const int16_t *dq,
+ uint8_t *pred, uint8_t *dest,
+ int pitch, int stride, uint16_t eobs) {
+ int16_t output[16];
+ int16_t *diff_ptr = output;
+ int i;
+
+ for (i = 0; i < 16; i++) {
+ input[i] = dq[i] * input[i];
+ }
+
+ vp9_ihtllm(input, output, 4 << 1, tx_type, 4, eobs);
+
+ vpx_memset(input, 0, 32);
+
+ add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4);
+}
+
+void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, int16_t *input,
+ const int16_t *dq,
+ uint8_t *pred, uint8_t *dest,
+ int pitch, int stride, uint16_t eobs) {
+ int16_t output[64];
+ int16_t *diff_ptr = output;
+ int i;
+ if (eobs == 0) {
+ /* All 0 DCT coefficient */
+ vp9_copy_mem8x8(pred, pitch, dest, stride);
+ } else if (eobs > 0) {
+ input[0] = dq[0] * input[0];
+ for (i = 1; i < 64; i++) {
+ input[i] = dq[1] * input[i];
+ }
+
+ vp9_ihtllm(input, output, 16, tx_type, 8, eobs);
+
+ vpx_memset(input, 0, 128);
+
+ add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8);
+ }
+}
+
+void vp9_dequant_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred,
+ uint8_t *dest, int pitch, int stride) {
+ int16_t output[16];
+ int16_t *diff_ptr = output;
+ int i;
+
+ for (i = 0; i < 16; i++) {
+ input[i] = dq[i] * input[i];
+ }
+
+ /* the idct halves ( >> 1) the pitch */
+ vp9_short_idct4x4llm_c(input, output, 4 << 1);
+
+ vpx_memset(input, 0, 32);
+
+ add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4);
+}
+
+void vp9_dequant_dc_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred,
+ uint8_t *dest, int pitch, int stride, int Dc) {
+ int i;
+ int16_t output[16];
+ int16_t *diff_ptr = output;
+
+ input[0] = (int16_t)Dc;
+
+ for (i = 1; i < 16; i++) {
+ input[i] = dq[i] * input[i];
+ }
+
+ /* the idct halves ( >> 1) the pitch */
+ vp9_short_idct4x4llm_c(input, output, 4 << 1);
+
+ vpx_memset(input, 0, 32);
+
+ add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4);
+}
+
+#if CONFIG_LOSSLESS
+void vp9_dequant_idct_add_lossless_c(int16_t *input, const int16_t *dq,
+ uint8_t *pred, uint8_t *dest,
+ int pitch, int stride) {
+ int16_t output[16];
+ int16_t *diff_ptr = output;
+ int i;
+
+ for (i = 0; i < 16; i++) {
+ input[i] = dq[i] * input[i];
+ }
+
+ vp9_short_inv_walsh4x4_x8_c(input, output, 4 << 1);
+
+ vpx_memset(input, 0, 32);
+
+ add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4);
+}
+
+void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, const int16_t *dq,
+ uint8_t *pred,
+ uint8_t *dest,
+ int pitch, int stride, int dc) {
+ int i;
+ int16_t output[16];
+ int16_t *diff_ptr = output;
+
+ input[0] = (int16_t)dc;
+
+ for (i = 1; i < 16; i++) {
+ input[i] = dq[i] * input[i];
+ }
+
+ vp9_short_inv_walsh4x4_x8_c(input, output, 4 << 1);
+ vpx_memset(input, 0, 32);
+
+ add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4);
+}
+#endif
+
+void vp9_dequantize_b_2x2_c(BLOCKD *d) {
+ int i;
+ int16_t *DQ = d->dqcoeff;
+ const int16_t *Q = d->qcoeff;
+ const int16_t *DQC = d->dequant;
+
+ for (i = 0; i < 16; i++) {
+ DQ[i] = (int16_t)((Q[i] * DQC[i]));
+ }
+}
+
+void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq,
+ uint8_t *pred, uint8_t *dest, int pitch,
+ int stride, int dc, uint16_t eobs) {
+ int16_t output[64];
+ int16_t *diff_ptr = output;
+ int i;
+
+ /* If dc is 1, then input[0] is the reconstructed value, do not need
+ * dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
+ */
+ if (!dc)
+ input[0] *= dq[0];
+
+ /* The calculation can be simplified if there are not many non-zero dct
+ * coefficients. Use eobs to decide what to do.
+ * TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c.
+ * Combine that with code here.
+ */
+ if (eobs == 0) {
+ /* All 0 DCT coefficient */
+ vp9_copy_mem8x8(pred, pitch, dest, stride);
+ } else if (eobs == 1) {
+ /* DC only DCT coefficient. */
+ int16_t out;
+
+ /* Note: the idct1 will need to be modified accordingly whenever
+ * vp9_short_idct8x8_c() is modified. */
+ out = (input[0] + 1 + (input[0] < 0)) >> 2;
+ out = out << 3;
+ out = (out + 32) >> 7;
+
+ input[0] = 0;
+
+ add_constant_residual(out, pred, pitch, dest, stride, 8, 8);
+ } else if (eobs <= 10) {
+ input[1] = input[1] * dq[1];
+ input[2] = input[2] * dq[1];
+ input[3] = input[3] * dq[1];
+ input[8] = input[8] * dq[1];
+ input[9] = input[9] * dq[1];
+ input[10] = input[10] * dq[1];
+ input[16] = input[16] * dq[1];
+ input[17] = input[17] * dq[1];
+ input[24] = input[24] * dq[1];
+
+ vp9_short_idct10_8x8_c(input, output, 16);
+
+ input[0] = input[1] = input[2] = input[3] = 0;
+ input[8] = input[9] = input[10] = 0;
+ input[16] = input[17] = 0;
+ input[24] = 0;
+
+ add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8);
+ } else {
+ // recover quantizer for 4 4x4 blocks
+ for (i = 1; i < 64; i++) {
+ input[i] = input[i] * dq[1];
+ }
+ // the idct halves ( >> 1) the pitch
+ vp9_short_idct8x8_c(input, output, 16);
+
+ vpx_memset(input, 0, 128);
+
+ add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8);
+
+ }
+}
+
+void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, int16_t *input,
+ const int16_t *dq, uint8_t *pred,
+ uint8_t *dest, int pitch, int stride,
+ uint16_t eobs) {
+ int16_t output[256];
+ int16_t *diff_ptr = output;
+ int i;
+ if (eobs == 0) {
+ /* All 0 DCT coefficient */
+ vp9_copy_mem16x16(pred, pitch, dest, stride);
+ } else if (eobs > 0) {
+ input[0]= input[0] * dq[0];
+
+ // recover quantizer for 4 4x4 blocks
+ for (i = 1; i < 256; i++)
+ input[i] = input[i] * dq[1];
+
+ // inverse hybrid transform
+ vp9_ihtllm(input, output, 32, tx_type, 16, eobs);
+
+ // the idct halves ( >> 1) the pitch
+ // vp9_short_idct16x16_c(input, output, 32);
+
+ vpx_memset(input, 0, 512);
+
+ add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16);
+ }
+}
+
+void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq,
+ uint8_t *pred, uint8_t *dest, int pitch,
+ int stride, uint16_t eobs) {
+ int16_t output[256];
+ int16_t *diff_ptr = output;
+ int i;
+
+ /* The calculation can be simplified if there are not many non-zero dct
+ * coefficients. Use eobs to separate different cases. */
+ if (eobs == 0) {
+ /* All 0 DCT coefficient */
+ vp9_copy_mem16x16(pred, pitch, dest, stride);
+ } else if (eobs == 1) {
+ /* DC only DCT coefficient. */
+ int16_t out;
+
+ /* Note: the idct1 will need to be modified accordingly whenever
+ * vp9_short_idct16x16_c() is modified. */
+ out = (input[0] * dq[0] + 2) >> 2;
+ out = (out + 2) >> 2;
+ out = (out + 4) >> 3;
+
+ input[0] = 0;
+
+ add_constant_residual(out, pred, pitch, dest, stride, 16, 16);
+ } else if (eobs <= 10) {
+ input[0]= input[0] * dq[0];
+ input[1] = input[1] * dq[1];
+ input[2] = input[2] * dq[1];
+ input[3] = input[3] * dq[1];
+ input[16] = input[16] * dq[1];
+ input[17] = input[17] * dq[1];
+ input[18] = input[18] * dq[1];
+ input[32] = input[32] * dq[1];
+ input[33] = input[33] * dq[1];
+ input[48] = input[48] * dq[1];
+
+ // the idct halves ( >> 1) the pitch
+ vp9_short_idct10_16x16_c(input, output, 32);
+
+ input[0] = input[1] = input[2] = input[3] = 0;
+ input[16] = input[17] = input[18] = 0;
+ input[32] = input[33] = 0;
+ input[48] = 0;
+
+ add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16);
+ } else {
+ input[0]= input[0] * dq[0];
+
+ // recover quantizer for 4 4x4 blocks
+ for (i = 1; i < 256; i++)
+ input[i] = input[i] * dq[1];
+
+ // the idct halves ( >> 1) the pitch
+ vp9_short_idct16x16_c(input, output, 32);
+
+ vpx_memset(input, 0, 512);
+
+ add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16);
+ }
+}
diff --git a/vp9/decoder/vp9_dequantize.h b/vp9/decoder/vp9_dequantize.h
new file mode 100644
index 0000000..f348b21
--- /dev/null
+++ b/vp9/decoder/vp9_dequantize.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_DECODER_VP9_DEQUANTIZE_H_
+#define VP9_DECODER_VP9_DEQUANTIZE_H_
+#include "vp9/common/vp9_blockd.h"
+
+#if CONFIG_LOSSLESS
+extern void vp9_dequant_idct_add_lossless_c(short *input, const short *dq,
+ unsigned char *pred,
+ unsigned char *output,
+ int pitch, int stride);
+extern void vp9_dequant_dc_idct_add_lossless_c(short *input, const short *dq,
+ unsigned char *pred,
+ unsigned char *output,
+ int pitch, int stride, int dc);
+extern void vp9_dequant_dc_idct_add_y_block_lossless_c(short *q,
+ const short *dq,
+ unsigned char *pre,
+ unsigned char *dst,
+ int stride,
+ unsigned short *eobs,
+ const short *dc);
+extern void vp9_dequant_idct_add_y_block_lossless_c(short *q, const short *dq,
+ unsigned char *pre,
+ unsigned char *dst,
+ int stride,
+ unsigned short *eobs);
+extern void vp9_dequant_idct_add_uv_block_lossless_c(short *q, const short *dq,
+ unsigned char *pre,
+ unsigned char *dst_u,
+ unsigned char *dst_v,
+ int stride,
+ unsigned short *eobs);
+#endif
+
+typedef void (*vp9_dequant_idct_add_fn_t)(short *input, const short *dq,
+ unsigned char *pred, unsigned char *output, int pitch, int stride);
+typedef void(*vp9_dequant_dc_idct_add_fn_t)(short *input, const short *dq,
+ unsigned char *pred, unsigned char *output, int pitch, int stride, int dc);
+
+typedef void(*vp9_dequant_dc_idct_add_y_block_fn_t)(short *q, const short *dq,
+ unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs,
+ const short *dc);
+typedef void(*vp9_dequant_idct_add_y_block_fn_t)(short *q, const short *dq,
+ unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs);
+typedef void(*vp9_dequant_idct_add_uv_block_fn_t)(short *q, const short *dq,
+ unsigned char *pre, unsigned char *dst_u, unsigned char *dst_v, int stride,
+ unsigned short *eobs);
+
+void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, const short *dq,
+ unsigned char *pred, unsigned char *dest,
+ int pitch, int stride, uint16_t eobs);
+
+void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input,
+ const short *dq, unsigned char *pred,
+ unsigned char *dest, int pitch, int stride,
+ uint16_t eobs);
+
+void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, short *input,
+ const short *dq, unsigned char *pred,
+ unsigned char *dest,
+ int pitch, int stride, uint16_t eobs);
+
+#if CONFIG_SUPERBLOCKS
+void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(short *q, const short *dq,
+ unsigned char *dst,
+ int stride,
+ unsigned short *eobs,
+ const short *dc,
+ MACROBLOCKD *xd);
+
+void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(short *q, const short *dq,
+ unsigned char *dst,
+ int stride,
+ unsigned short *eobs,
+ const short *dc,
+ MACROBLOCKD *xd);
+
+void vp9_dequant_idct_add_uv_block_8x8_inplace_c(short *q, const short *dq,
+ unsigned char *dstu,
+ unsigned char *dstv,
+ int stride,
+ unsigned short *eobs,
+ MACROBLOCKD *xd);
+
+void vp9_dequant_idct_add_uv_block_4x4_inplace_c(short *q, const short *dq,
+ unsigned char *dstu,
+ unsigned char *dstv,
+ int stride,
+ unsigned short *eobs,
+ MACROBLOCKD *xd);
+#endif
+
+#endif
diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c
new file mode 100644
index 0000000..897ad52
--- /dev/null
+++ b/vp9/decoder/vp9_detokenize.c
@@ -0,0 +1,463 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/common/vp9_type_aliases.h"
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/decoder/vp9_onyxd_int.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
+#include "vp9/decoder/vp9_detokenize.h"
+
+#include "vp9/common/vp9_seg_common.h"
+
+#define EOB_CONTEXT_NODE 0
+#define ZERO_CONTEXT_NODE 1
+#define ONE_CONTEXT_NODE 2
+#define LOW_VAL_CONTEXT_NODE 3
+#define TWO_CONTEXT_NODE 4
+#define THREE_CONTEXT_NODE 5
+#define HIGH_LOW_CONTEXT_NODE 6
+#define CAT_ONE_CONTEXT_NODE 7
+#define CAT_THREEFOUR_CONTEXT_NODE 8
+#define CAT_THREE_CONTEXT_NODE 9
+#define CAT_FIVE_CONTEXT_NODE 10
+
+#define CAT1_MIN_VAL 5
+#define CAT2_MIN_VAL 7
+#define CAT3_MIN_VAL 11
+#define CAT4_MIN_VAL 19
+#define CAT5_MIN_VAL 35
+#define CAT6_MIN_VAL 67
+#define CAT1_PROB0 159
+#define CAT2_PROB0 145
+#define CAT2_PROB1 165
+
+#define CAT3_PROB0 140
+#define CAT3_PROB1 148
+#define CAT3_PROB2 173
+
+#define CAT4_PROB0 135
+#define CAT4_PROB1 140
+#define CAT4_PROB2 155
+#define CAT4_PROB3 176
+
+#define CAT5_PROB0 130
+#define CAT5_PROB1 134
+#define CAT5_PROB2 141
+#define CAT5_PROB3 157
+#define CAT5_PROB4 180
+
+static const unsigned char cat6_prob[14] =
+{ 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 };
+
+void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) {
+ /* Clear entropy contexts */
+ if ((xd->mode_info_context->mbmi.mode != B_PRED &&
+ xd->mode_info_context->mbmi.mode != I8X8_PRED &&
+ xd->mode_info_context->mbmi.mode != SPLITMV)
+ || xd->mode_info_context->mbmi.txfm_size == TX_16X16) {
+ vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
+ } else {
+ vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) - 1);
+ vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) - 1);
+ xd->above_context->y2 = 1;
+ xd->left_context->y2 = 1;
+ }
+}
+
+DECLARE_ALIGNED(16, extern const unsigned char, vp9_norm[256]);
+
+static int get_signed(BOOL_DECODER *br, int value_to_sign) {
+ const int split = (br->range + 1) >> 1;
+ const VP9_BD_VALUE bigsplit = (VP9_BD_VALUE)split << (VP9_BD_VALUE_SIZE - 8);
+ int v;
+
+ if (br->count < 0)
+ vp9_bool_decoder_fill(br);
+
+ if (br->value < bigsplit) {
+ br->range = split;
+ v = value_to_sign;
+ } else {
+ br->range = br->range - split;
+ br->value = br->value - bigsplit;
+ v = -value_to_sign;
+ }
+ br->range += br->range;
+ br->value += br->value;
+ --br->count;
+
+ return v;
+}
+
+#define INCREMENT_COUNT(token) \
+ do { \
+ coef_counts[coef_bands[c]][pt][token]++; \
+ pt = vp9_prev_token_class[token]; \
+ } while (0)
+
+#define WRITE_COEF_CONTINUE(val, token) \
+ { \
+ qcoeff_ptr[scan[c]] = (INT16) get_signed(br, val); \
+ INCREMENT_COUNT(token); \
+ c++; \
+ continue; \
+ }
+
+#define ADJUST_COEF(prob, bits_count) \
+ do { \
+ if (vp9_read(br, prob)) \
+ val += (UINT16)(1 << bits_count);\
+ } while (0);
+
+static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd,
+ BOOL_DECODER* const br,
+ ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
+ PLANE_TYPE type,
+ TX_TYPE tx_type,
+ int seg_eob, INT16 *qcoeff_ptr,
+ const int *const scan, TX_SIZE txfm_size,
+ const int *coef_bands) {
+ FRAME_CONTEXT *const fc = &dx->common.fc;
+ int pt, c = (type == PLANE_TYPE_Y_NO_DC);
+ vp9_prob (*coef_probs)[PREV_COEF_CONTEXTS][ENTROPY_NODES], *prob;
+ unsigned int (*coef_counts)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
+
+ switch (txfm_size) {
+ default:
+ case TX_4X4:
+ if (tx_type == DCT_DCT) {
+ coef_probs = fc->coef_probs[type];
+ coef_counts = fc->coef_counts[type];
+ } else {
+ coef_probs = fc->hybrid_coef_probs[type];
+ coef_counts = fc->hybrid_coef_counts[type];
+ }
+ break;
+ case TX_8X8:
+ if (tx_type == DCT_DCT) {
+ coef_probs = fc->coef_probs_8x8[type];
+ coef_counts = fc->coef_counts_8x8[type];
+ } else {
+ coef_probs = fc->hybrid_coef_probs_8x8[type];
+ coef_counts = fc->hybrid_coef_counts_8x8[type];
+ }
+ break;
+ case TX_16X16:
+ if (tx_type == DCT_DCT) {
+ coef_probs = fc->coef_probs_16x16[type];
+ coef_counts = fc->coef_counts_16x16[type];
+ } else {
+ coef_probs = fc->hybrid_coef_probs_16x16[type];
+ coef_counts = fc->hybrid_coef_counts_16x16[type];
+ }
+ break;
+ }
+
+ VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l);
+ while (1) {
+ int val;
+ const uint8_t *cat6 = cat6_prob;
+ if (c >= seg_eob) break;
+ prob = coef_probs[coef_bands[c]][pt];
+ if (!vp9_read(br, prob[EOB_CONTEXT_NODE]))
+ break;
+SKIP_START:
+ if (c >= seg_eob) break;
+ if (!vp9_read(br, prob[ZERO_CONTEXT_NODE])) {
+ INCREMENT_COUNT(ZERO_TOKEN);
+ ++c;
+ prob = coef_probs[coef_bands[c]][pt];
+ goto SKIP_START;
+ }
+ // ONE_CONTEXT_NODE_0_
+ if (!vp9_read(br, prob[ONE_CONTEXT_NODE])) {
+ WRITE_COEF_CONTINUE(1, ONE_TOKEN);
+ }
+ // LOW_VAL_CONTEXT_NODE_0_
+ if (!vp9_read(br, prob[LOW_VAL_CONTEXT_NODE])) {
+ if (!vp9_read(br, prob[TWO_CONTEXT_NODE])) {
+ WRITE_COEF_CONTINUE(2, TWO_TOKEN);
+ }
+ if (!vp9_read(br, prob[THREE_CONTEXT_NODE])) {
+ WRITE_COEF_CONTINUE(3, THREE_TOKEN);
+ }
+ WRITE_COEF_CONTINUE(4, FOUR_TOKEN);
+ }
+ // HIGH_LOW_CONTEXT_NODE_0_
+ if (!vp9_read(br, prob[HIGH_LOW_CONTEXT_NODE])) {
+ if (!vp9_read(br, prob[CAT_ONE_CONTEXT_NODE])) {
+ val = CAT1_MIN_VAL;
+ ADJUST_COEF(CAT1_PROB0, 0);
+ WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY1);
+ }
+ val = CAT2_MIN_VAL;
+ ADJUST_COEF(CAT2_PROB1, 1);
+ ADJUST_COEF(CAT2_PROB0, 0);
+ WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY2);
+ }
+ // CAT_THREEFOUR_CONTEXT_NODE_0_
+ if (!vp9_read(br, prob[CAT_THREEFOUR_CONTEXT_NODE])) {
+ if (!vp9_read(br, prob[CAT_THREE_CONTEXT_NODE])) {
+ val = CAT3_MIN_VAL;
+ ADJUST_COEF(CAT3_PROB2, 2);
+ ADJUST_COEF(CAT3_PROB1, 1);
+ ADJUST_COEF(CAT3_PROB0, 0);
+ WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY3);
+ }
+ val = CAT4_MIN_VAL;
+ ADJUST_COEF(CAT4_PROB3, 3);
+ ADJUST_COEF(CAT4_PROB2, 2);
+ ADJUST_COEF(CAT4_PROB1, 1);
+ ADJUST_COEF(CAT4_PROB0, 0);
+ WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY4);
+ }
+ // CAT_FIVE_CONTEXT_NODE_0_:
+ if (!vp9_read(br, prob[CAT_FIVE_CONTEXT_NODE])) {
+ val = CAT5_MIN_VAL;
+ ADJUST_COEF(CAT5_PROB4, 4);
+ ADJUST_COEF(CAT5_PROB3, 3);
+ ADJUST_COEF(CAT5_PROB2, 2);
+ ADJUST_COEF(CAT5_PROB1, 1);
+ ADJUST_COEF(CAT5_PROB0, 0);
+ WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY5);
+ }
+ val = 0;
+ while (*cat6) {
+ val = (val << 1) | vp9_read(br, *cat6++);
+ }
+ val += CAT6_MIN_VAL;
+ WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY6);
+ }
+
+ if (c < seg_eob)
+ coef_counts[coef_bands[c]][pt][DCT_EOB_TOKEN]++;
+
+ a[0] = l[0] = (c > !type);
+
+ return c;
+}
+
+static int get_eob(MACROBLOCKD* const xd, int segment_id, int eob_max) {
+ int active = vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB);
+ int eob = vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
+
+ if (!active || eob > eob_max)
+ eob = eob_max;
+ return eob;
+}
+
+
+static int vp9_decode_mb_tokens_16x16(VP9D_COMP* const pbi,
+ MACROBLOCKD* const xd,
+ BOOL_DECODER* const bc) {
+ ENTROPY_CONTEXT* const A = (ENTROPY_CONTEXT *)xd->above_context;
+ ENTROPY_CONTEXT* const L = (ENTROPY_CONTEXT *)xd->left_context;
+ unsigned short* const eobs = xd->eobs;
+ const int segment_id = xd->mode_info_context->mbmi.segment_id;
+ int c, i, eobtotal = 0, seg_eob;
+
+ // Luma block
+ eobs[0] = c = decode_coefs(pbi, xd, bc, A, L, PLANE_TYPE_Y_WITH_DC,
+ get_tx_type(xd, &xd->block[0]),
+ get_eob(xd, segment_id, 256),
+ xd->qcoeff, vp9_default_zig_zag1d_16x16,
+ TX_16X16, vp9_coef_bands_16x16);
+ A[1] = A[2] = A[3] = A[0];
+ L[1] = L[2] = L[3] = L[0];
+ eobtotal += c;
+
+ // 8x8 chroma blocks
+ seg_eob = get_eob(xd, segment_id, 64);
+ for (i = 16; i < 24; i += 4) {
+ ENTROPY_CONTEXT* const a = A + vp9_block2above_8x8[i];
+ ENTROPY_CONTEXT* const l = L + vp9_block2left_8x8[i];
+
+ eobs[i] = c = decode_coefs(pbi, xd, bc, a, l, PLANE_TYPE_UV,
+ DCT_DCT, seg_eob, xd->block[i].qcoeff,
+ vp9_default_zig_zag1d_8x8,
+ TX_8X8, vp9_coef_bands_8x8);
+ a[1] = a[0];
+ l[1] = l[0];
+ eobtotal += c;
+ }
+ A[8] = 0;
+ L[8] = 0;
+ return eobtotal;
+}
+
+static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi,
+ MACROBLOCKD* const xd,
+ BOOL_DECODER* const bc) {
+ ENTROPY_CONTEXT *const A = (ENTROPY_CONTEXT *)xd->above_context;
+ ENTROPY_CONTEXT *const L = (ENTROPY_CONTEXT *)xd->left_context;
+ unsigned short *const eobs = xd->eobs;
+ PLANE_TYPE type;
+ int c, i, eobtotal = 0, seg_eob;
+ const int segment_id = xd->mode_info_context->mbmi.segment_id;
+
+ int has_2nd_order = get_2nd_order_usage(xd);
+ // 2nd order DC block
+ if (has_2nd_order) {
+ ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[24];
+ ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[24];
+
+ eobs[24] = c = decode_coefs(pbi, xd, bc, a, l, PLANE_TYPE_Y2,
+ DCT_DCT, get_eob(xd, segment_id, 4),
+ xd->block[24].qcoeff,
+ vp9_default_zig_zag1d, TX_8X8, vp9_coef_bands);
+ eobtotal += c - 4;
+ type = PLANE_TYPE_Y_NO_DC;
+ } else {
+ xd->above_context->y2 = 1;
+ xd->left_context->y2 = 1;
+ eobs[24] = 0;
+ type = PLANE_TYPE_Y_WITH_DC;
+ }
+
+ // luma blocks
+ seg_eob = get_eob(xd, segment_id, 64);
+ for (i = 0; i < 16; i += 4) {
+ ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[i];
+ ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[i];
+
+ eobs[i] = c = decode_coefs(pbi, xd, bc, a, l, type,
+ type == PLANE_TYPE_Y_WITH_DC ?
+ get_tx_type(xd, xd->block + i) : DCT_DCT,
+ seg_eob, xd->block[i].qcoeff,
+ vp9_default_zig_zag1d_8x8,
+ TX_8X8, vp9_coef_bands_8x8);
+ a[1] = a[0];
+ l[1] = l[0];
+ eobtotal += c;
+ }
+
+ // chroma blocks
+ if (xd->mode_info_context->mbmi.mode == I8X8_PRED ||
+ xd->mode_info_context->mbmi.mode == SPLITMV) {
+ // use 4x4 transform for U, V components in I8X8/splitmv prediction mode
+ seg_eob = get_eob(xd, segment_id, 16);
+ for (i = 16; i < 24; i++) {
+ ENTROPY_CONTEXT *const a = A + vp9_block2above[i];
+ ENTROPY_CONTEXT *const l = L + vp9_block2left[i];
+
+ eobs[i] = c = decode_coefs(pbi, xd, bc, a, l, PLANE_TYPE_UV,
+ DCT_DCT, seg_eob, xd->block[i].qcoeff,
+ vp9_default_zig_zag1d, TX_4X4, vp9_coef_bands);
+ eobtotal += c;
+ }
+ } else {
+ for (i = 16; i < 24; i += 4) {
+ ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[i];
+ ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[i];
+
+ eobs[i] = c = decode_coefs(pbi, xd, bc, a, l, PLANE_TYPE_UV,
+ DCT_DCT, seg_eob, xd->block[i].qcoeff,
+ vp9_default_zig_zag1d_8x8,
+ TX_8X8, vp9_coef_bands_8x8);
+ a[1] = a[0];
+ l[1] = l[0];
+ eobtotal += c;
+ }
+ }
+
+ return eobtotal;
+}
+
+int vp9_decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd,
+ BOOL_DECODER* const bc,
+ PLANE_TYPE type, int i) {
+ ENTROPY_CONTEXT *const A = (ENTROPY_CONTEXT *)xd->above_context;
+ ENTROPY_CONTEXT *const L = (ENTROPY_CONTEXT *)xd->left_context;
+ ENTROPY_CONTEXT *const a = A + vp9_block2above[i];
+ ENTROPY_CONTEXT *const l = L + vp9_block2left[i];
+ INT16 *qcoeff_ptr = &xd->qcoeff[0];
+ const int *scan = vp9_default_zig_zag1d;
+ unsigned short *const eobs = xd->eobs;
+ int segment_id = xd->mode_info_context->mbmi.segment_id;
+ int c, seg_eob = get_eob(xd, segment_id, 16);
+ TX_TYPE tx_type = DCT_DCT;
+
+ if (type == PLANE_TYPE_Y_WITH_DC)
+ tx_type = get_tx_type_4x4(xd, &xd->block[i]);
+ switch (tx_type) {
+ case ADST_DCT :
+ scan = vp9_row_scan;
+ break;
+
+ case DCT_ADST :
+ scan = vp9_col_scan;
+ break;
+
+ default :
+ scan = vp9_default_zig_zag1d;
+ break;
+ }
+ eobs[i] = c = decode_coefs(dx, xd, bc, a, l, type,
+ tx_type, seg_eob, qcoeff_ptr + i * 16,
+ scan, TX_4X4, vp9_coef_bands);
+ return c;
+}
+
+int vp9_decode_mb_tokens_4x4_uv(VP9D_COMP* const dx,
+ MACROBLOCKD* const xd,
+ BOOL_DECODER* const bc) {
+ int eobtotal = 0, i;
+
+ for (i = 16; i < 24; i++)
+ eobtotal += vp9_decode_coefs_4x4(dx, xd, bc, PLANE_TYPE_UV, i);
+
+ return eobtotal;
+}
+
+static int vp9_decode_mb_tokens_4x4(VP9D_COMP* const dx,
+ MACROBLOCKD* const xd,
+ BOOL_DECODER* const bc) {
+ int i, eobtotal = 0;
+ PLANE_TYPE type;
+
+ int has_2nd_order = get_2nd_order_usage(xd);
+
+ if (has_2nd_order) {
+ eobtotal += vp9_decode_coefs_4x4(dx, xd, bc, PLANE_TYPE_Y2, 24) - 16;
+ type = PLANE_TYPE_Y_NO_DC;
+ } else {
+ xd->above_context->y2 = 1;
+ xd->left_context->y2 = 1;
+ xd->eobs[24] = 0;
+ type = PLANE_TYPE_Y_WITH_DC;
+ }
+
+ for (i = 0; i < 16; ++i) {
+ eobtotal += vp9_decode_coefs_4x4(dx, xd, bc, type, i);
+ }
+
+ return eobtotal + vp9_decode_mb_tokens_4x4_uv(dx, xd, bc);
+}
+
+int vp9_decode_mb_tokens(VP9D_COMP* const dx,
+ MACROBLOCKD* const xd,
+ BOOL_DECODER* const bc) {
+ const TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
+ int eobtotal;
+
+ if (tx_size == TX_16X16) {
+ eobtotal = vp9_decode_mb_tokens_16x16(dx, xd, bc);
+ } else if (tx_size == TX_8X8) {
+ eobtotal = vp9_decode_mb_tokens_8x8(dx, xd, bc);
+ } else {
+ assert(tx_size == TX_4X4);
+ eobtotal = vp9_decode_mb_tokens_4x4(dx, xd, bc);
+ }
+
+ return eobtotal;
+}
diff --git a/vp9/decoder/vp9_detokenize.h b/vp9/decoder/vp9_detokenize.h
new file mode 100644
index 0000000..9b319d4
--- /dev/null
+++ b/vp9/decoder/vp9_detokenize.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_DECODER_VP9_DETOKENIZE_H_
+#define VP9_DECODER_VP9_DETOKENIZE_H_
+
+#include "vp9/decoder/vp9_onyxd_int.h"
+
+void vp9_reset_mb_tokens_context(MACROBLOCKD* const);
+
+int vp9_decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd,
+ BOOL_DECODER* const bc,
+ PLANE_TYPE type, int i);
+
+int vp9_decode_mb_tokens(VP9D_COMP* const, MACROBLOCKD* const,
+ BOOL_DECODER* const);
+
+int vp9_decode_mb_tokens_4x4_uv(VP9D_COMP* const dx, MACROBLOCKD* const xd,
+ BOOL_DECODER* const bc);
+
+#endif /* DETOKENIZE_H */
diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c
new file mode 100644
index 0000000..72ef52b
--- /dev/null
+++ b/vp9/decoder/vp9_idct_blk.c
@@ -0,0 +1,373 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9_rtcd.h"
+#include "vp9/common/vp9_blockd.h"
+#if CONFIG_LOSSLESS
+#include "vp9/decoder/vp9_dequantize.h"
+#endif
+
+void vp9_dequant_dc_idct_add_y_block_c(short *q, const short *dq,
+ unsigned char *pre,
+ unsigned char *dst,
+ int stride, unsigned short *eobs,
+ const short *dc) {
+ int i, j;
+
+ for (i = 0; i < 4; i++) {
+ for (j = 0; j < 4; j++) {
+ if (*eobs++ > 1)
+ vp9_dequant_dc_idct_add_c(q, dq, pre, dst, 16, stride, dc[0]);
+ else
+ vp9_dc_only_idct_add_c(dc[0], pre, dst, 16, stride);
+
+ q += 16;
+ pre += 4;
+ dst += 4;
+ dc++;
+ }
+
+ pre += 64 - 16;
+ dst += 4 * stride - 16;
+ }
+}
+
+#if CONFIG_SUPERBLOCKS
+void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(short *q, const short *dq,
+ unsigned char *dst,
+ int stride,
+ unsigned short *eobs,
+ const short *dc,
+ MACROBLOCKD *xd) {
+ int i, j;
+
+ for (i = 0; i < 4; i++) {
+ for (j = 0; j < 4; j++) {
+ if (*eobs++ > 1)
+ vp9_dequant_dc_idct_add_c(q, dq, dst, dst, stride, stride, dc[0]);
+ else
+ vp9_dc_only_idct_add_c(dc[0], dst, dst, stride, stride);
+
+ q += 16;
+ dst += 4;
+ dc++;
+ }
+
+ dst += 4 * stride - 16;
+ }
+}
+#endif
+
+void vp9_dequant_idct_add_y_block_c(short *q, const short *dq,
+ unsigned char *pre,
+ unsigned char *dst,
+ int stride, unsigned short *eobs) {
+ int i, j;
+
+ for (i = 0; i < 4; i++) {
+ for (j = 0; j < 4; j++) {
+ if (*eobs++ > 1)
+ vp9_dequant_idct_add_c(q, dq, pre, dst, 16, stride);
+ else {
+ vp9_dc_only_idct_add_c(q[0]*dq[0], pre, dst, 16, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ q += 16;
+ pre += 4;
+ dst += 4;
+ }
+
+ pre += 64 - 16;
+ dst += 4 * stride - 16;
+ }
+}
+
+void vp9_dequant_idct_add_uv_block_c(short *q, const short *dq,
+ unsigned char *pre, unsigned char *dstu,
+ unsigned char *dstv, int stride,
+ unsigned short *eobs) {
+ int i, j;
+
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < 2; j++) {
+ if (*eobs++ > 1)
+ vp9_dequant_idct_add_c(q, dq, pre, dstu, 8, stride);
+ else {
+ vp9_dc_only_idct_add_c(q[0]*dq[0], pre, dstu, 8, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ q += 16;
+ pre += 4;
+ dstu += 4;
+ }
+
+ pre += 32 - 8;
+ dstu += 4 * stride - 8;
+ }
+
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < 2; j++) {
+ if (*eobs++ > 1)
+ vp9_dequant_idct_add_c(q, dq, pre, dstv, 8, stride);
+ else {
+ vp9_dc_only_idct_add_c(q[0]*dq[0], pre, dstv, 8, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ q += 16;
+ pre += 4;
+ dstv += 4;
+ }
+
+ pre += 32 - 8;
+ dstv += 4 * stride - 8;
+ }
+}
+
+#if CONFIG_SUPERBLOCKS
+void vp9_dequant_idct_add_uv_block_4x4_inplace_c(short *q, const short *dq,
+ unsigned char *dstu,
+ unsigned char *dstv,
+ int stride,
+ unsigned short *eobs,
+ MACROBLOCKD *xd) {
+ int i, j;
+
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < 2; j++) {
+ if (*eobs++ > 1) {
+ vp9_dequant_idct_add_c(q, dq, dstu, dstu, stride, stride);
+ } else {
+ vp9_dc_only_idct_add_c(q[0]*dq[0], dstu, dstu, stride, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ q += 16;
+ dstu += 4;
+ }
+
+ dstu += 4 * stride - 8;
+ }
+
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < 2; j++) {
+ if (*eobs++ > 1) {
+ vp9_dequant_idct_add_c(q, dq, dstv, dstv, stride, stride);
+ } else {
+ vp9_dc_only_idct_add_c(q[0]*dq[0], dstv, dstv, stride, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ q += 16;
+ dstv += 4;
+ }
+
+ dstv += 4 * stride - 8;
+ }
+}
+#endif
+
+void vp9_dequant_dc_idct_add_y_block_8x8_c(short *q, const short *dq,
+ unsigned char *pre,
+ unsigned char *dst,
+ int stride, unsigned short *eobs,
+ const short *dc,
+ MACROBLOCKD *xd) {
+ q[0] = dc[0];
+ vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, 1, xd->eobs[0]);
+
+ q[64] = dc[1];
+ vp9_dequant_idct_add_8x8_c(&q[64], dq, pre + 8, dst + 8, 16, stride, 1,
+ xd->eobs[4]);
+
+ q[128] = dc[4];
+ vp9_dequant_idct_add_8x8_c(&q[128], dq, pre + 8 * 16,
+ dst + 8 * stride, 16, stride, 1, xd->eobs[8]);
+
+ q[192] = dc[8];
+ vp9_dequant_idct_add_8x8_c(&q[192], dq, pre + 8 * 16 + 8,
+ dst + 8 * stride + 8, 16, stride, 1,
+ xd->eobs[12]);
+}
+
+#if CONFIG_SUPERBLOCKS
+void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(short *q, const short *dq,
+ unsigned char *dst,
+ int stride,
+ unsigned short *eobs,
+ const short *dc,
+ MACROBLOCKD *xd) {
+ q[0] = dc[0];
+ vp9_dequant_idct_add_8x8_c(q, dq, dst, dst, stride, stride, 1, xd->eobs[0]);
+
+ q[64] = dc[1];
+ vp9_dequant_idct_add_8x8_c(&q[64], dq, dst + 8,
+ dst + 8, stride, stride, 1, xd->eobs[4]);
+
+ q[128] = dc[4];
+ vp9_dequant_idct_add_8x8_c(&q[128], dq, dst + 8 * stride,
+ dst + 8 * stride, stride, stride, 1,
+ xd->eobs[8]);
+
+ q[192] = dc[8];
+ vp9_dequant_idct_add_8x8_c(&q[192], dq, dst + 8 * stride + 8,
+ dst + 8 * stride + 8, stride, stride, 1,
+ xd->eobs[12]);
+}
+#endif
+
+void vp9_dequant_idct_add_y_block_8x8_c(short *q, const short *dq,
+ unsigned char *pre,
+ unsigned char *dst,
+ int stride, unsigned short *eobs,
+ MACROBLOCKD *xd) {
+ unsigned char *origdest = dst;
+ unsigned char *origpred = pre;
+
+ vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, 0, xd->eobs[0]);
+ vp9_dequant_idct_add_8x8_c(&q[64], dq, origpred + 8,
+ origdest + 8, 16, stride, 0, xd->eobs[4]);
+ vp9_dequant_idct_add_8x8_c(&q[128], dq, origpred + 8 * 16,
+ origdest + 8 * stride, 16, stride, 0, xd->eobs[8]);
+ vp9_dequant_idct_add_8x8_c(&q[192], dq, origpred + 8 * 16 + 8,
+ origdest + 8 * stride + 8, 16, stride, 0,
+ xd->eobs[12]);
+}
+
+void vp9_dequant_idct_add_uv_block_8x8_c(short *q, const short *dq,
+ unsigned char *pre,
+ unsigned char *dstu,
+ unsigned char *dstv,
+ int stride, unsigned short *eobs,
+ MACROBLOCKD *xd) {
+ vp9_dequant_idct_add_8x8_c(q, dq, pre, dstu, 8, stride, 0, xd->eobs[16]);
+
+ q += 64;
+ pre += 64;
+
+ vp9_dequant_idct_add_8x8_c(q, dq, pre, dstv, 8, stride, 0, xd->eobs[20]);
+}
+
+#if CONFIG_SUPERBLOCKS
+void vp9_dequant_idct_add_uv_block_8x8_inplace_c(short *q, const short *dq,
+ unsigned char *dstu,
+ unsigned char *dstv,
+ int stride,
+ unsigned short *eobs,
+ MACROBLOCKD *xd) {
+ vp9_dequant_idct_add_8x8_c(q, dq, dstu, dstu, stride, stride, 0,
+ xd->eobs[16]);
+
+ q += 64;
+ vp9_dequant_idct_add_8x8_c(q, dq, dstv, dstv, stride, stride, 0,
+ xd->eobs[20]);
+}
+#endif
+
+#if CONFIG_LOSSLESS
+void vp9_dequant_dc_idct_add_y_block_lossless_c(short *q, const short *dq,
+ unsigned char *pre,
+ unsigned char *dst,
+ int stride,
+ unsigned short *eobs,
+ const short *dc) {
+ int i, j;
+
+ for (i = 0; i < 4; i++) {
+ for (j = 0; j < 4; j++) {
+ if (*eobs++ > 1)
+ vp9_dequant_dc_idct_add_lossless_c(q, dq, pre, dst, 16, stride, dc[0]);
+ else
+ vp9_dc_only_inv_walsh_add_c(dc[0], pre, dst, 16, stride);
+
+ q += 16;
+ pre += 4;
+ dst += 4;
+ dc++;
+ }
+
+ pre += 64 - 16;
+ dst += 4 * stride - 16;
+ }
+}
+
+void vp9_dequant_idct_add_y_block_lossless_c(short *q, const short *dq,
+ unsigned char *pre,
+ unsigned char *dst,
+ int stride, unsigned short *eobs) {
+ int i, j;
+
+ for (i = 0; i < 4; i++) {
+ for (j = 0; j < 4; j++) {
+ if (*eobs++ > 1)
+ vp9_dequant_idct_add_lossless_c(q, dq, pre, dst, 16, stride);
+ else {
+ vp9_dc_only_inv_walsh_add_c(q[0]*dq[0], pre, dst, 16, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ q += 16;
+ pre += 4;
+ dst += 4;
+ }
+
+ pre += 64 - 16;
+ dst += 4 * stride - 16;
+ }
+}
+
+void vp9_dequant_idct_add_uv_block_lossless_c(short *q, const short *dq,
+ unsigned char *pre,
+ unsigned char *dstu,
+ unsigned char *dstv,
+ int stride,
+ unsigned short *eobs) {
+ int i, j;
+
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < 2; j++) {
+ if (*eobs++ > 1)
+ vp9_dequant_idct_add_lossless_c(q, dq, pre, dstu, 8, stride);
+ else {
+ vp9_dc_only_inv_walsh_add_c(q[0]*dq[0], pre, dstu, 8, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ q += 16;
+ pre += 4;
+ dstu += 4;
+ }
+
+ pre += 32 - 8;
+ dstu += 4 * stride - 8;
+ }
+
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < 2; j++) {
+ if (*eobs++ > 1)
+ vp9_dequant_idct_add_lossless_c(q, dq, pre, dstv, 8, stride);
+ else {
+ vp9_dc_only_inv_walsh_add_c(q[0]*dq[0], pre, dstv, 8, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ q += 16;
+ pre += 4;
+ dstv += 4;
+ }
+
+ pre += 32 - 8;
+ dstv += 4 * stride - 8;
+ }
+}
+#endif
+
diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c
new file mode 100644
index 0000000..bad43ca
--- /dev/null
+++ b/vp9/decoder/vp9_onyxd_if.c
@@ -0,0 +1,454 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/common/vp9_onyxc_int.h"
+#if CONFIG_POSTPROC
+#include "vp9/common/vp9_postproc.h"
+#endif
+#include "vp9/common/vp9_onyxd.h"
+#include "vp9/decoder/vp9_onyxd_int.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vp9/common/vp9_alloccommon.h"
+#include "vp9/common/vp9_loopfilter.h"
+#include "vp9/common/vp9_swapyv12buffer.h"
+#include <stdio.h>
+#include <assert.h>
+
+#include "vp9/common/vp9_quant_common.h"
+#include "vpx_scale/vpxscale.h"
+#include "vp9/common/vp9_systemdependent.h"
+#include "vpx_ports/vpx_timer.h"
+#include "vp9/decoder/vp9_decodframe.h"
+#include "vp9/decoder/vp9_detokenize.h"
+#include "./vpx_scale_rtcd.h"
+
+static int get_free_fb(VP9_COMMON *cm);
+static void ref_cnt_fb(int *buf, int *idx, int new_idx);
+
+#define WRITE_RECON_BUFFER 0
+#if WRITE_RECON_BUFFER == 1
+static void recon_write_yuv_frame(char *name, YV12_BUFFER_CONFIG *s) {
+ FILE *yuv_file = fopen((char *)name, "ab");
+ unsigned char *src = s->y_buffer;
+ int h = s->y_height;
+
+ do {
+ fwrite(src, s->y_width, 1, yuv_file);
+ src += s->y_stride;
+ } while (--h);
+
+ src = s->u_buffer;
+ h = s->uv_height;
+
+ do {
+ fwrite(src, s->uv_width, 1, yuv_file);
+ src += s->uv_stride;
+ } while (--h);
+
+ src = s->v_buffer;
+ h = s->uv_height;
+
+ do {
+ fwrite(src, s->uv_width, 1, yuv_file);
+ src += s->uv_stride;
+ } while (--h);
+
+ fclose(yuv_file);
+}
+#endif
+#if WRITE_RECON_BUFFER == 2
+void write_dx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) {
+
+ // write the frame
+ FILE *yframe;
+ int i;
+ char filename[255];
+
+ sprintf(filename, "dx\\y%04d.raw", this_frame);
+ yframe = fopen(filename, "wb");
+
+ for (i = 0; i < frame->y_height; i++)
+ fwrite(frame->y_buffer + i * frame->y_stride,
+ frame->y_width, 1, yframe);
+
+ fclose(yframe);
+ sprintf(filename, "dx\\u%04d.raw", this_frame);
+ yframe = fopen(filename, "wb");
+
+ for (i = 0; i < frame->uv_height; i++)
+ fwrite(frame->u_buffer + i * frame->uv_stride,
+ frame->uv_width, 1, yframe);
+
+ fclose(yframe);
+ sprintf(filename, "dx\\v%04d.raw", this_frame);
+ yframe = fopen(filename, "wb");
+
+ for (i = 0; i < frame->uv_height; i++)
+ fwrite(frame->v_buffer + i * frame->uv_stride,
+ frame->uv_width, 1, yframe);
+
+ fclose(yframe);
+}
+#endif
+
+void vp9_initialize_dec(void) {
+ static int init_done = 0;
+
+ if (!init_done) {
+ vp9_initialize_common();
+ vp9_init_quant_tables();
+ init_done = 1;
+ }
+}
+
+VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) {
+ VP9D_COMP *pbi = vpx_memalign(32, sizeof(VP9D_COMP));
+
+ if (!pbi)
+ return NULL;
+
+ vpx_memset(pbi, 0, sizeof(VP9D_COMP));
+
+ if (setjmp(pbi->common.error.jmp)) {
+ pbi->common.error.setjmp = 0;
+ vp9_remove_decompressor(pbi);
+ return 0;
+ }
+
+ pbi->common.error.setjmp = 1;
+ vp9_initialize_dec();
+
+ vp9_create_common(&pbi->common);
+
+ pbi->common.current_video_frame = 0;
+ pbi->ready_for_new_data = 1;
+
+ /* vp9_init_de_quantizer() is first called here. Add check in
+ * frame_init_dequantizer() to avoid unnecessary calling of
+ * vp9_init_de_quantizer() for every frame.
+ */
+ vp9_init_de_quantizer(pbi);
+
+ vp9_loop_filter_init(&pbi->common);
+
+ pbi->common.error.setjmp = 0;
+
+ pbi->decoded_key_frame = 0;
+
+ return (VP9D_PTR) pbi;
+}
+
+void vp9_remove_decompressor(VP9D_PTR ptr) {
+ VP9D_COMP *pbi = (VP9D_COMP *) ptr;
+
+ if (!pbi)
+ return;
+
+ // Delete sementation map
+ if (pbi->common.last_frame_seg_map != 0)
+ vpx_free(pbi->common.last_frame_seg_map);
+
+ vp9_remove_common(&pbi->common);
+ vpx_free(pbi->mbc);
+ vpx_free(pbi);
+}
+
+
+vpx_codec_err_t vp9_get_reference_dec(VP9D_PTR ptr, VP9_REFFRAME ref_frame_flag,
+ YV12_BUFFER_CONFIG *sd) {
+ VP9D_COMP *pbi = (VP9D_COMP *) ptr;
+ VP9_COMMON *cm = &pbi->common;
+ int ref_fb_idx;
+
+ if (ref_frame_flag == VP9_LAST_FLAG)
+ ref_fb_idx = cm->lst_fb_idx;
+ else if (ref_frame_flag == VP9_GOLD_FLAG)
+ ref_fb_idx = cm->gld_fb_idx;
+ else if (ref_frame_flag == VP9_ALT_FLAG)
+ ref_fb_idx = cm->alt_fb_idx;
+ else {
+ vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR,
+ "Invalid reference frame");
+ return pbi->common.error.error_code;
+ }
+
+ if (cm->yv12_fb[ref_fb_idx].y_height != sd->y_height ||
+ cm->yv12_fb[ref_fb_idx].y_width != sd->y_width ||
+ cm->yv12_fb[ref_fb_idx].uv_height != sd->uv_height ||
+ cm->yv12_fb[ref_fb_idx].uv_width != sd->uv_width) {
+ vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR,
+ "Incorrect buffer dimensions");
+ } else
+ vp8_yv12_copy_frame(&cm->yv12_fb[ref_fb_idx], sd);
+
+ return pbi->common.error.error_code;
+}
+
+
+vpx_codec_err_t vp9_set_reference_dec(VP9D_PTR ptr, VP9_REFFRAME ref_frame_flag,
+ YV12_BUFFER_CONFIG *sd) {
+ VP9D_COMP *pbi = (VP9D_COMP *) ptr;
+ VP9_COMMON *cm = &pbi->common;
+ int *ref_fb_ptr = NULL;
+ int free_fb;
+
+ if (ref_frame_flag == VP9_LAST_FLAG)
+ ref_fb_ptr = &cm->lst_fb_idx;
+ else if (ref_frame_flag == VP9_GOLD_FLAG)
+ ref_fb_ptr = &cm->gld_fb_idx;
+ else if (ref_frame_flag == VP9_ALT_FLAG)
+ ref_fb_ptr = &cm->alt_fb_idx;
+ else {
+ vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR,
+ "Invalid reference frame");
+ return pbi->common.error.error_code;
+ }
+
+ if (cm->yv12_fb[*ref_fb_ptr].y_height != sd->y_height ||
+ cm->yv12_fb[*ref_fb_ptr].y_width != sd->y_width ||
+ cm->yv12_fb[*ref_fb_ptr].uv_height != sd->uv_height ||
+ cm->yv12_fb[*ref_fb_ptr].uv_width != sd->uv_width) {
+ vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR,
+ "Incorrect buffer dimensions");
+ } else {
+ /* Find an empty frame buffer. */
+ free_fb = get_free_fb(cm);
+ /* Decrease fb_idx_ref_cnt since it will be increased again in
+ * ref_cnt_fb() below. */
+ cm->fb_idx_ref_cnt[free_fb]--;
+
+ /* Manage the reference counters and copy image. */
+ ref_cnt_fb(cm->fb_idx_ref_cnt, ref_fb_ptr, free_fb);
+ vp8_yv12_copy_frame(sd, &cm->yv12_fb[*ref_fb_ptr]);
+ }
+
+ return pbi->common.error.error_code;
+}
+
+
+static int get_free_fb(VP9_COMMON *cm) {
+ int i;
+ for (i = 0; i < NUM_YV12_BUFFERS; i++)
+ if (cm->fb_idx_ref_cnt[i] == 0)
+ break;
+
+ assert(i < NUM_YV12_BUFFERS);
+ cm->fb_idx_ref_cnt[i] = 1;
+ return i;
+}
+
+static void ref_cnt_fb(int *buf, int *idx, int new_idx) {
+ if (buf[*idx] > 0)
+ buf[*idx]--;
+
+ *idx = new_idx;
+
+ buf[new_idx]++;
+}
+
+/* If any buffer copy / swapping is signalled it should be done here. */
+static int swap_frame_buffers(VP9_COMMON *cm) {
+ int err = 0;
+
+ /* The alternate reference frame or golden frame can be updated
+ * using the new, last, or golden/alt ref frame. If it
+ * is updated using the newly decoded frame it is a refresh.
+ * An update using the last or golden/alt ref frame is a copy.
+ */
+ if (cm->copy_buffer_to_arf) {
+ int new_fb = 0;
+
+ if (cm->copy_buffer_to_arf == 1)
+ new_fb = cm->lst_fb_idx;
+ else if (cm->copy_buffer_to_arf == 2)
+ new_fb = cm->gld_fb_idx;
+ else
+ err = -1;
+
+ ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->alt_fb_idx, new_fb);
+ }
+
+ if (cm->copy_buffer_to_gf) {
+ int new_fb = 0;
+
+ if (cm->copy_buffer_to_gf == 1)
+ new_fb = cm->lst_fb_idx;
+ else if (cm->copy_buffer_to_gf == 2)
+ new_fb = cm->alt_fb_idx;
+ else
+ err = -1;
+
+ ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->gld_fb_idx, new_fb);
+ }
+
+ if (cm->refresh_golden_frame)
+ ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->gld_fb_idx, cm->new_fb_idx);
+
+ if (cm->refresh_alt_ref_frame)
+ ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->alt_fb_idx, cm->new_fb_idx);
+
+ if (cm->refresh_last_frame) {
+ ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->lst_fb_idx, cm->new_fb_idx);
+
+ cm->frame_to_show = &cm->yv12_fb[cm->lst_fb_idx];
+ } else
+ cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx];
+
+ cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
+
+ return err;
+}
+
+int vp9_receive_compressed_data(VP9D_PTR ptr, unsigned long size,
+ const unsigned char **psource,
+ int64_t time_stamp) {
+ VP9D_COMP *pbi = (VP9D_COMP *) ptr;
+ VP9_COMMON *cm = &pbi->common;
+ const unsigned char *source = *psource;
+ int retcode = 0;
+
+ /*if(pbi->ready_for_new_data == 0)
+ return -1;*/
+
+ if (ptr == 0) {
+ return -1;
+ }
+
+ pbi->common.error.error_code = VPX_CODEC_OK;
+
+ pbi->Source = source;
+ pbi->source_sz = size;
+
+ if (pbi->source_sz == 0) {
+ /* This is used to signal that we are missing frames.
+ * We do not know if the missing frame(s) was supposed to update
+ * any of the reference buffers, but we act conservative and
+ * mark only the last buffer as corrupted.
+ */
+ cm->yv12_fb[cm->lst_fb_idx].corrupted = 1;
+ }
+
+ cm->new_fb_idx = get_free_fb(cm);
+
+ if (setjmp(pbi->common.error.jmp)) {
+ pbi->common.error.setjmp = 0;
+
+ /* We do not know if the missing frame(s) was supposed to update
+ * any of the reference buffers, but we act conservative and
+ * mark only the last buffer as corrupted.
+ */
+ cm->yv12_fb[cm->lst_fb_idx].corrupted = 1;
+
+ if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
+ cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
+ return -1;
+ }
+
+ pbi->common.error.setjmp = 1;
+
+ retcode = vp9_decode_frame(pbi, psource);
+
+ if (retcode < 0) {
+ pbi->common.error.error_code = VPX_CODEC_ERROR;
+ pbi->common.error.setjmp = 0;
+ if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
+ cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
+ return retcode;
+ }
+
+ {
+ if (swap_frame_buffers(cm)) {
+ pbi->common.error.error_code = VPX_CODEC_ERROR;
+ pbi->common.error.setjmp = 0;
+ return -1;
+ }
+
+#if WRITE_RECON_BUFFER == 2
+ if (cm->show_frame)
+ write_dx_frame_to_file(cm->frame_to_show,
+ cm->current_video_frame);
+ else
+ write_dx_frame_to_file(cm->frame_to_show,
+ cm->current_video_frame + 1000);
+#endif
+
+ if (cm->filter_level) {
+ /* Apply the loop filter if appropriate. */
+ vp9_loop_filter_frame(cm, &pbi->mb);
+ }
+ vp8_yv12_extend_frame_borders(cm->frame_to_show);
+ }
+
+#if WRITE_RECON_BUFFER == 1
+ if (cm->show_frame)
+ recon_write_yuv_frame("recon.yuv", cm->frame_to_show);
+#endif
+
+ vp9_clear_system_state();
+
+ if (cm->show_frame) {
+ vpx_memcpy(cm->prev_mip, cm->mip,
+ (cm->mb_cols + 1) * (cm->mb_rows + 1)* sizeof(MODE_INFO));
+ } else {
+ vpx_memset(cm->prev_mip, 0,
+ (cm->mb_cols + 1) * (cm->mb_rows + 1)* sizeof(MODE_INFO));
+ }
+
+ /*vp9_print_modes_and_motion_vectors(cm->mi, cm->mb_rows,cm->mb_cols,
+ cm->current_video_frame);*/
+
+ if (cm->show_frame)
+ cm->current_video_frame++;
+
+ pbi->ready_for_new_data = 0;
+ pbi->last_time_stamp = time_stamp;
+ pbi->source_sz = 0;
+
+ pbi->common.error.setjmp = 0;
+ return retcode;
+}
+
+int vp9_get_raw_frame(VP9D_PTR ptr, YV12_BUFFER_CONFIG *sd,
+ int64_t *time_stamp, int64_t *time_end_stamp,
+ vp9_ppflags_t *flags) {
+ int ret = -1;
+ VP9D_COMP *pbi = (VP9D_COMP *) ptr;
+
+ if (pbi->ready_for_new_data == 1)
+ return ret;
+
+ /* ie no raw frame to show!!! */
+ if (pbi->common.show_frame == 0)
+ return ret;
+
+ pbi->ready_for_new_data = 1;
+ *time_stamp = pbi->last_time_stamp;
+ *time_end_stamp = 0;
+
+ sd->clrtype = pbi->common.clr_type;
+#if CONFIG_POSTPROC
+ ret = vp9_post_proc_frame(&pbi->common, sd, flags);
+#else
+
+ if (pbi->common.frame_to_show) {
+ *sd = *pbi->common.frame_to_show;
+ sd->y_width = pbi->common.Width;
+ sd->y_height = pbi->common.Height;
+ sd->uv_height = pbi->common.Height / 2;
+ ret = 0;
+ } else {
+ ret = -1;
+ }
+
+#endif /*!CONFIG_POSTPROC*/
+ vp9_clear_system_state();
+ return ret;
+}
diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h
new file mode 100644
index 0000000..49e13f7
--- /dev/null
+++ b/vp9/decoder/vp9_onyxd_int.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_DECODER_VP9_ONYXD_INT_H_
+#define VP9_DECODER_VP9_ONYXD_INT_H_
+#include "./vpx_config.h"
+#include "vp9/common/vp9_onyxd.h"
+#include "vp9/decoder/vp9_treereader.h"
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/decoder/vp9_dequantize.h"
+
+// #define DEC_DEBUG
+
+typedef struct {
+ int ithread;
+ void *ptr1;
+ void *ptr2;
+} DECODETHREAD_DATA;
+
+typedef struct {
+ MACROBLOCKD mbd;
+ int mb_row;
+ int current_mb_col;
+ short *coef_ptr;
+} MB_ROW_DEC;
+
+typedef struct {
+ int const *scan;
+ int const *scan_8x8;
+ UINT8 const *ptr_block2leftabove;
+ vp9_tree_index const *vp9_coef_tree_ptr;
+ unsigned char *norm_ptr;
+ UINT8 *ptr_coef_bands_x;
+ UINT8 *ptr_coef_bands_x_8x8;
+
+ ENTROPY_CONTEXT_PLANES *A;
+ ENTROPY_CONTEXT_PLANES *L;
+
+ INT16 *qcoeff_start_ptr;
+
+ vp9_prob const *coef_probs[BLOCK_TYPES];
+ vp9_prob const *coef_probs_8x8[BLOCK_TYPES_8X8];
+ vp9_prob const *coef_probs_16X16[BLOCK_TYPES_16X16];
+
+ UINT8 eob[25];
+
+} DETOK;
+
+typedef struct VP9Decompressor {
+ DECLARE_ALIGNED(16, MACROBLOCKD, mb);
+
+ DECLARE_ALIGNED(16, VP9_COMMON, common);
+
+ VP9D_CONFIG oxcf;
+
+
+ const unsigned char *Source;
+ unsigned int source_sz;
+
+ vp9_reader *mbc;
+ int64_t last_time_stamp;
+ int ready_for_new_data;
+
+ DETOK detoken;
+
+ vp9_dequant_idct_add_fn_t idct_add;
+ vp9_dequant_dc_idct_add_fn_t dc_idct_add;
+ vp9_dequant_dc_idct_add_y_block_fn_t dc_idct_add_y_block;
+ vp9_dequant_idct_add_y_block_fn_t idct_add_y_block;
+ vp9_dequant_idct_add_uv_block_fn_t idct_add_uv_block;
+
+ vp9_prob prob_skip_false;
+
+ int decoded_key_frame;
+
+} VP9D_COMP;
+
+int vp9_decode_frame(VP9D_COMP *cpi, const unsigned char **p_data_end);
+
+
+#if CONFIG_DEBUG
+#define CHECK_MEM_ERROR(lval,expr) do {\
+ lval = (expr); \
+ if(!lval) \
+ vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR,\
+ "Failed to allocate "#lval" at %s:%d", \
+ __FILE__,__LINE__);\
+ } while(0)
+#else
+#define CHECK_MEM_ERROR(lval,expr) do {\
+ lval = (expr); \
+ if(!lval) \
+ vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR,\
+ "Failed to allocate "#lval);\
+ } while(0)
+#endif
+
+#endif // __INC_ONYXD_INT_H
diff --git a/vp9/decoder/vp9_reconintra_mt.h b/vp9/decoder/vp9_reconintra_mt.h
new file mode 100644
index 0000000..af96349
--- /dev/null
+++ b/vp9/decoder/vp9_reconintra_mt.h
@@ -0,0 +1,15 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_DECODER_VP9_RECONINTRA_MT_H_
+#define VP9_DECODER_VP9_RECONINTRA_MT_H_
+
+#endif
diff --git a/vp9/decoder/vp9_treereader.h b/vp9/decoder/vp9_treereader.h
new file mode 100644
index 0000000..aa31dc5
--- /dev/null
+++ b/vp9/decoder/vp9_treereader.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_DECODER_VP9_TREEREADER_H_
+#define VP9_DECODER_VP9_TREEREADER_H_
+
+#include "vp9/common/vp9_treecoder.h"
+
+#include "vp9/decoder/vp9_dboolhuff.h"
+
+typedef BOOL_DECODER vp9_reader;
+
+#define vp9_read decode_bool
+#define vp9_read_literal decode_value
+#define vp9_read_bit(R) vp9_read(R, vp9_prob_half)
+
+/* Intent of tree data structure is to make decoding trivial. */
+
+static int treed_read(vp9_reader *const r, /* !!! must return a 0 or 1 !!! */
+ vp9_tree t,
+ const vp9_prob *const p) {
+ register vp9_tree_index i = 0;
+
+ while ((i = t[ i + vp9_read(r, p[i >> 1])]) > 0);
+
+ return -i;
+}
+
+#endif /* tree_reader_h */
diff --git a/vp9/decoder/x86/vp9_dequantize_mmx.asm b/vp9/decoder/x86/vp9_dequantize_mmx.asm
new file mode 100644
index 0000000..23080bf
--- /dev/null
+++ b/vp9/decoder/x86/vp9_dequantize_mmx.asm
@@ -0,0 +1,406 @@
+;
+; Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%include "third_party/x86inc/x86inc.asm"
+
+SECTION_RODATA
+align 16
+x_s1sqr2: times 4 dw 0x8A8C
+align 16
+x_c1sqr2less1: times 4 dw 0x4E7B
+align 16
+pw_16: times 4 dw 16
+
+SECTION .text
+
+INIT_MMX
+
+
+;void dequantize_b_impl_mmx(short *sq, short *dq, short *q)
+cglobal dequantize_b_impl_mmx, 3,3,0,sq,dq,arg3
+ mova m1, [sqq]
+ pmullw m1, [arg3q+0] ; mm4 *= kernel 0 modifiers.
+ mova [dqq+ 0], m1
+
+ mova m1, [sqq+8]
+ pmullw m1, [arg3q+8] ; mm4 *= kernel 0 modifiers.
+ mova [dqq+ 8], m1
+
+ mova m1, [sqq+16]
+ pmullw m1, [arg3q+16] ; mm4 *= kernel 0 modifiers.
+ mova [dqq+16], m1
+
+ mova m1, [sqq+24]
+ pmullw m1, [arg3q+24] ; mm4 *= kernel 0 modifiers.
+ mova [dqq+24], m1
+ RET
+
+
+;void dequant_idct_add_mmx(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride)
+cglobal dequant_idct_add_mmx, 4,6,0,inp,dq,pred,dest,pit,stride
+
+%if ARCH_X86_64
+ movsxd strideq, dword stridem
+ movsxd pitq, dword pitm
+%else
+ mov strideq, stridem
+ mov pitq, pitm
+%endif
+
+ mova m0, [inpq+ 0]
+ pmullw m0, [dqq]
+
+ mova m1, [inpq+ 8]
+ pmullw m1, [dqq+ 8]
+
+ mova m2, [inpq+16]
+ pmullw m2, [dqq+16]
+
+ mova m3, [inpq+24]
+ pmullw m3, [dqq+24]
+
+ pxor m7, m7
+ mova [inpq], m7
+ mova [inpq+8], m7
+ mova [inpq+16], m7
+ mova [inpq+24], m7
+
+
+ psubw m0, m2 ; b1= 0-2
+ paddw m2, m2 ;
+
+ mova m5, m1
+ paddw m2, m0 ; a1 =0+2
+
+ pmulhw m5, [x_s1sqr2];
+ paddw m5, m1 ; ip1 * sin(pi/8) * sqrt(2)
+
+ mova m7, m3 ;
+ pmulhw m7, [x_c1sqr2less1];
+
+ paddw m7, m3 ; ip3 * cos(pi/8) * sqrt(2)
+ psubw m7, m5 ; c1
+
+ mova m5, m1
+ mova m4, m3
+
+ pmulhw m5, [x_c1sqr2less1]
+ paddw m5, m1
+
+ pmulhw m3, [x_s1sqr2]
+ paddw m3, m4
+
+ paddw m3, m5 ; d1
+ mova m6, m2 ; a1
+
+ mova m4, m0 ; b1
+ paddw m2, m3 ;0
+
+ paddw m4, m7 ;1
+ psubw m0, m7 ;2
+
+ psubw m6, m3 ;3
+
+ mova m1, m2 ; 03 02 01 00
+ mova m3, m4 ; 23 22 21 20
+
+ punpcklwd m1, m0 ; 11 01 10 00
+ punpckhwd m2, m0 ; 13 03 12 02
+
+ punpcklwd m3, m6 ; 31 21 30 20
+ punpckhwd m4, m6 ; 33 23 32 22
+
+ mova m0, m1 ; 11 01 10 00
+ mova m5, m2 ; 13 03 12 02
+
+ punpckldq m0, m3 ; 30 20 10 00
+ punpckhdq m1, m3 ; 31 21 11 01
+
+ punpckldq m2, m4 ; 32 22 12 02
+ punpckhdq m5, m4 ; 33 23 13 03
+
+ mova m3, m5 ; 33 23 13 03
+
+ psubw m0, m2 ; b1= 0-2
+ paddw m2, m2 ;
+
+ mova m5, m1
+ paddw m2, m0 ; a1 =0+2
+
+ pmulhw m5, [x_s1sqr2];
+ paddw m5, m1 ; ip1 * sin(pi/8) * sqrt(2)
+
+ mova m7, m3 ;
+ pmulhw m7, [x_c1sqr2less1];
+
+ paddw m7, m3 ; ip3 * cos(pi/8) * sqrt(2)
+ psubw m7, m5 ; c1
+
+ mova m5, m1
+ mova m4, m3
+
+ pmulhw m5, [x_c1sqr2less1]
+ paddw m5, m1
+
+ pmulhw m3, [x_s1sqr2]
+ paddw m3, m4
+
+ paddw m3, m5 ; d1
+ paddw m0, [pw_16]
+
+ paddw m2, [pw_16]
+ mova m6, m2 ; a1
+
+ mova m4, m0 ; b1
+ paddw m2, m3 ;0
+
+ paddw m4, m7 ;1
+ psubw m0, m7 ;2
+
+ psubw m6, m3 ;3
+ psraw m2, 5
+
+ psraw m0, 5
+ psraw m4, 5
+
+ psraw m6, 5
+
+ mova m1, m2 ; 03 02 01 00
+ mova m3, m4 ; 23 22 21 20
+
+ punpcklwd m1, m0 ; 11 01 10 00
+ punpckhwd m2, m0 ; 13 03 12 02
+
+ punpcklwd m3, m6 ; 31 21 30 20
+ punpckhwd m4, m6 ; 33 23 32 22
+
+ mova m0, m1 ; 11 01 10 00
+ mova m5, m2 ; 13 03 12 02
+
+ punpckldq m0, m3 ; 30 20 10 00
+ punpckhdq m1, m3 ; 31 21 11 01
+
+ punpckldq m2, m4 ; 32 22 12 02
+ punpckhdq m5, m4 ; 33 23 13 03
+
+ pxor m7, m7
+
+ movh m4, [predq]
+ punpcklbw m4, m7
+ paddsw m0, m4
+ packuswb m0, m7
+ movh [destq], m0
+
+ movh m4, [predq+pitq]
+ punpcklbw m4, m7
+ paddsw m1, m4
+ packuswb m1, m7
+ movh [destq+strideq], m1
+
+ movh m4, [predq+2*pitq]
+ punpcklbw m4, m7
+ paddsw m2, m4
+ packuswb m2, m7
+ movh [destq+strideq*2], m2
+
+ add destq, strideq
+ add predq, pitq
+
+ movh m4, [predq+2*pitq]
+ punpcklbw m4, m7
+ paddsw m5, m4
+ packuswb m5, m7
+ movh [destq+strideq*2], m5
+ RET
+
+
+;void dequant_dc_idct_add_mmx(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int Dc)
+cglobal dequant_dc_idct_add_mmx, 4,7,0,inp,dq,pred,dest,pit,stride,Dc
+
+%if ARCH_X86_64
+ movsxd strideq, dword stridem
+ movsxd pitq, dword pitm
+%else
+ mov strideq, stridem
+ mov pitq, pitm
+%endif
+
+ mov Dcq, Dcm
+ mova m0, [inpq+ 0]
+ pmullw m0, [dqq+ 0]
+
+ mova m1, [inpq+ 8]
+ pmullw m1, [dqq+ 8]
+
+ mova m2, [inpq+16]
+ pmullw m2, [dqq+16]
+
+ mova m3, [inpq+24]
+ pmullw m3, [dqq+24]
+
+ pxor m7, m7
+ mova [inpq+ 0], m7
+ mova [inpq+ 8], m7
+ mova [inpq+16], m7
+ mova [inpq+24], m7
+
+ ; move lower word of Dc to lower word of m0
+ psrlq m0, 16
+ psllq m0, 16
+ and Dcq, 0xFFFF ; If Dc < 0, we don't want the full dword precision.
+ movh m7, Dcq
+ por m0, m7
+ psubw m0, m2 ; b1= 0-2
+ paddw m2, m2 ;
+
+ mova m5, m1
+ paddw m2, m0 ; a1 =0+2
+
+ pmulhw m5, [x_s1sqr2];
+ paddw m5, m1 ; ip1 * sin(pi/8) * sqrt(2)
+
+ mova m7, m3 ;
+ pmulhw m7, [x_c1sqr2less1];
+
+ paddw m7, m3 ; ip3 * cos(pi/8) * sqrt(2)
+ psubw m7, m5 ; c1
+
+ mova m5, m1
+ mova m4, m3
+
+ pmulhw m5, [x_c1sqr2less1]
+ paddw m5, m1
+
+ pmulhw m3, [x_s1sqr2]
+ paddw m3, m4
+
+ paddw m3, m5 ; d1
+ mova m6, m2 ; a1
+
+ mova m4, m0 ; b1
+ paddw m2, m3 ;0
+
+ paddw m4, m7 ;1
+ psubw m0, m7 ;2
+
+ psubw m6, m3 ;3
+
+ mova m1, m2 ; 03 02 01 00
+ mova m3, m4 ; 23 22 21 20
+
+ punpcklwd m1, m0 ; 11 01 10 00
+ punpckhwd m2, m0 ; 13 03 12 02
+
+ punpcklwd m3, m6 ; 31 21 30 20
+ punpckhwd m4, m6 ; 33 23 32 22
+
+ mova m0, m1 ; 11 01 10 00
+ mova m5, m2 ; 13 03 12 02
+
+ punpckldq m0, m3 ; 30 20 10 00
+ punpckhdq m1, m3 ; 31 21 11 01
+
+ punpckldq m2, m4 ; 32 22 12 02
+ punpckhdq m5, m4 ; 33 23 13 03
+
+ mova m3, m5 ; 33 23 13 03
+
+ psubw m0, m2 ; b1= 0-2
+ paddw m2, m2 ;
+
+ mova m5, m1
+ paddw m2, m0 ; a1 =0+2
+
+ pmulhw m5, [x_s1sqr2];
+ paddw m5, m1 ; ip1 * sin(pi/8) * sqrt(2)
+
+ mova m7, m3 ;
+ pmulhw m7, [x_c1sqr2less1];
+
+ paddw m7, m3 ; ip3 * cos(pi/8) * sqrt(2)
+ psubw m7, m5 ; c1
+
+ mova m5, m1
+ mova m4, m3
+
+ pmulhw m5, [x_c1sqr2less1]
+ paddw m5, m1
+
+ pmulhw m3, [x_s1sqr2]
+ paddw m3, m4
+
+ paddw m3, m5 ; d1
+ paddw m0, [pw_16]
+
+ paddw m2, [pw_16]
+ mova m6, m2 ; a1
+
+ mova m4, m0 ; b1
+ paddw m2, m3 ;0
+
+ paddw m4, m7 ;1
+ psubw m0, m7 ;2
+
+ psubw m6, m3 ;3
+ psraw m2, 5
+
+ psraw m0, 5
+ psraw m4, 5
+
+ psraw m6, 5
+
+ mova m1, m2 ; 03 02 01 00
+ mova m3, m4 ; 23 22 21 20
+
+ punpcklwd m1, m0 ; 11 01 10 00
+ punpckhwd m2, m0 ; 13 03 12 02
+
+ punpcklwd m3, m6 ; 31 21 30 20
+ punpckhwd m4, m6 ; 33 23 32 22
+
+ mova m0, m1 ; 11 01 10 00
+ mova m5, m2 ; 13 03 12 02
+
+ punpckldq m0, m3 ; 30 20 10 00
+ punpckhdq m1, m3 ; 31 21 11 01
+
+ punpckldq m2, m4 ; 32 22 12 02
+ punpckhdq m5, m4 ; 33 23 13 03
+
+ pxor m7, m7
+
+ movh m4, [predq]
+ punpcklbw m4, m7
+ paddsw m0, m4
+ packuswb m0, m7
+ movh [destq], m0
+
+ movh m4, [predq+pitq]
+ punpcklbw m4, m7
+ paddsw m1, m4
+ packuswb m1, m7
+ movh [destq+strideq], m1
+
+ movh m4, [predq+2*pitq]
+ punpcklbw m4, m7
+ paddsw m2, m4
+ packuswb m2, m7
+ movh [destq+strideq*2], m2
+
+ add destq, strideq
+ add predq, pitq
+
+ movh m4, [predq+2*pitq]
+ punpcklbw m4, m7
+ paddsw m5, m4
+ packuswb m5, m7
+ movh [destq+strideq*2], m5
+ RET
+
diff --git a/vp9/decoder/x86/vp9_idct_blk_mmx.c b/vp9/decoder/x86/vp9_idct_blk_mmx.c
new file mode 100644
index 0000000..8279eaa
--- /dev/null
+++ b/vp9/decoder/x86/vp9_idct_blk_mmx.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_config.h"
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/decoder/vp9_dequantize.h"
+#include "vp9/decoder/x86/vp9_idct_mmx.h"
+
+void vp9_dequant_dc_idct_add_y_block_mmx(short *q, const short *dq,
+ unsigned char *pre,
+ unsigned char *dst,
+ int stride, unsigned short *eobs,
+ const short *dc) {
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (eobs[0] > 1)
+ vp9_dequant_dc_idct_add_mmx(q, dq, pre, dst, 16, stride, dc[0]);
+ else
+ vp9_dc_only_idct_add_mmx(dc[0], pre, dst, 16, stride);
+
+ if (eobs[1] > 1)
+ vp9_dequant_dc_idct_add_mmx(q + 16, dq, pre + 4,
+ dst + 4, 16, stride, dc[1]);
+ else
+ vp9_dc_only_idct_add_mmx(dc[1], pre + 4, dst + 4, 16, stride);
+
+ if (eobs[2] > 1)
+ vp9_dequant_dc_idct_add_mmx(q + 32, dq, pre + 8,
+ dst + 8, 16, stride, dc[2]);
+ else
+ vp9_dc_only_idct_add_mmx(dc[2], pre + 8, dst + 8, 16, stride);
+
+ if (eobs[3] > 1)
+ vp9_dequant_dc_idct_add_mmx(q + 48, dq, pre + 12,
+ dst + 12, 16, stride, dc[3]);
+ else
+ vp9_dc_only_idct_add_mmx(dc[3], pre + 12, dst + 12, 16, stride);
+
+ q += 64;
+ dc += 4;
+ pre += 64;
+ dst += 4 * stride;
+ eobs += 4;
+ }
+}
+
+void vp9_dequant_idct_add_y_block_mmx(short *q, const short *dq,
+ unsigned char *pre,
+ unsigned char *dst,
+ int stride, unsigned short *eobs) {
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (eobs[0] > 1)
+ vp9_dequant_idct_add_mmx(q, dq, pre, dst, 16, stride);
+ else {
+ vp9_dc_only_idct_add_mmx(q[0]*dq[0], pre, dst, 16, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ if (eobs[1] > 1)
+ vp9_dequant_idct_add_mmx(q + 16, dq, pre + 4, dst + 4, 16, stride);
+ else {
+ vp9_dc_only_idct_add_mmx(q[16]*dq[0], pre + 4, dst + 4, 16, stride);
+ ((int *)(q + 16))[0] = 0;
+ }
+
+ if (eobs[2] > 1)
+ vp9_dequant_idct_add_mmx(q + 32, dq, pre + 8, dst + 8, 16, stride);
+ else {
+ vp9_dc_only_idct_add_mmx(q[32]*dq[0], pre + 8, dst + 8, 16, stride);
+ ((int *)(q + 32))[0] = 0;
+ }
+
+ if (eobs[3] > 1)
+ vp9_dequant_idct_add_mmx(q + 48, dq, pre + 12, dst + 12, 16, stride);
+ else {
+ vp9_dc_only_idct_add_mmx(q[48]*dq[0], pre + 12, dst + 12, 16, stride);
+ ((int *)(q + 48))[0] = 0;
+ }
+
+ q += 64;
+ pre += 64;
+ dst += 4 * stride;
+ eobs += 4;
+ }
+}
+
+void vp9_dequant_idct_add_uv_block_mmx(short *q, const short *dq,
+ unsigned char *pre,
+ unsigned char *dstu,
+ unsigned char *dstv,
+ int stride, unsigned short *eobs) {
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ if (eobs[0] > 1)
+ vp9_dequant_idct_add_mmx(q, dq, pre, dstu, 8, stride);
+ else {
+ vp9_dc_only_idct_add_mmx(q[0]*dq[0], pre, dstu, 8, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ if (eobs[1] > 1)
+ vp9_dequant_idct_add_mmx(q + 16, dq, pre + 4, dstu + 4, 8, stride);
+ else {
+ vp9_dc_only_idct_add_mmx(q[16]*dq[0], pre + 4, dstu + 4, 8, stride);
+ ((int *)(q + 16))[0] = 0;
+ }
+
+ q += 32;
+ pre += 32;
+ dstu += 4 * stride;
+ eobs += 2;
+ }
+
+ for (i = 0; i < 2; i++) {
+ if (eobs[0] > 1)
+ vp9_dequant_idct_add_mmx(q, dq, pre, dstv, 8, stride);
+ else {
+ vp9_dc_only_idct_add_mmx(q[0]*dq[0], pre, dstv, 8, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ if (eobs[1] > 1)
+ vp9_dequant_idct_add_mmx(q + 16, dq, pre + 4, dstv + 4, 8, stride);
+ else {
+ vp9_dc_only_idct_add_mmx(q[16]*dq[0], pre + 4, dstv + 4, 8, stride);
+ ((int *)(q + 16))[0] = 0;
+ }
+
+ q += 32;
+ pre += 32;
+ dstv += 4 * stride;
+ eobs += 2;
+ }
+}
diff --git a/vp9/decoder/x86/vp9_idct_blk_sse2.c b/vp9/decoder/x86/vp9_idct_blk_sse2.c
new file mode 100644
index 0000000..badd97f
--- /dev/null
+++ b/vp9/decoder/x86/vp9_idct_blk_sse2.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_config.h"
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/decoder/vp9_dequantize.h"
+
+void vp9_idct_dequant_dc_0_2x_sse2(short *q, const short *dq,
+ unsigned char *pre, unsigned char *dst,
+ int dst_stride, const short *dc);
+
+void vp9_idct_dequant_dc_full_2x_sse2(short *q, const short *dq,
+ unsigned char *pre, unsigned char *dst,
+ int dst_stride, const short *dc);
+
+void vp9_idct_dequant_0_2x_sse2(short *q, const short *dq,
+ unsigned char *pre, unsigned char *dst,
+ int dst_stride, int blk_stride);
+
+void vp9_idct_dequant_full_2x_sse2(short *q, const short *dq,
+ unsigned char *pre, unsigned char *dst,
+ int dst_stride, int blk_stride);
+
+void vp9_dequant_dc_idct_add_y_block_sse2(short *q, const short *dq,
+ unsigned char *pre,
+ unsigned char *dst,
+ int stride, unsigned short *eobs,
+ const short *dc) {
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (((short *)(eobs))[0] & 0xfefe)
+ vp9_idct_dequant_dc_full_2x_sse2(q, dq, pre, dst, stride, dc);
+ else
+ vp9_idct_dequant_dc_0_2x_sse2(q, dq, pre, dst, stride, dc);
+
+ if (((short *)(eobs))[1] & 0xfefe)
+ vp9_idct_dequant_dc_full_2x_sse2(q + 32, dq, pre + 8, dst + 8,
+ stride, dc + 2);
+ else
+ vp9_idct_dequant_dc_0_2x_sse2(q + 32, dq, pre + 8, dst + 8,
+ stride, dc + 2);
+
+ q += 64;
+ dc += 4;
+ pre += 64;
+ dst += stride * 4;
+ eobs += 4;
+ }
+}
+
+void vp9_dequant_idct_add_y_block_sse2(short *q, const short *dq,
+ unsigned char *pre, unsigned char *dst,
+ int stride, unsigned short *eobs) {
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (((short *)(eobs))[0] & 0xfefe)
+ vp9_idct_dequant_full_2x_sse2(q, dq, pre, dst, stride, 16);
+ else
+ vp9_idct_dequant_0_2x_sse2(q, dq, pre, dst, stride, 16);
+
+ if (((short *)(eobs))[1] & 0xfefe)
+ vp9_idct_dequant_full_2x_sse2(q + 32, dq, pre + 8, dst + 8, stride, 16);
+ else
+ vp9_idct_dequant_0_2x_sse2(q + 32, dq, pre + 8, dst + 8, stride, 16);
+
+ q += 64;
+ pre += 64;
+ dst += stride * 4;
+ eobs += 4;
+ }
+}
+
+void vp9_dequant_idct_add_uv_block_sse2(short *q, const short *dq,
+ unsigned char *pre,
+ unsigned char *dstu,
+ unsigned char *dstv,
+ int stride, unsigned short *eobs) {
+ if (((short *)(eobs))[0] & 0xfefe)
+ vp9_idct_dequant_full_2x_sse2(q, dq, pre, dstu, stride, 8);
+ else
+ vp9_idct_dequant_0_2x_sse2(q, dq, pre, dstu, stride, 8);
+
+ q += 32;
+ pre += 32;
+ dstu += stride * 4;
+
+ if (((short *)(eobs))[1] & 0xfefe)
+ vp9_idct_dequant_full_2x_sse2(q, dq, pre, dstu, stride, 8);
+ else
+ vp9_idct_dequant_0_2x_sse2(q, dq, pre, dstu, stride, 8);
+
+ q += 32;
+ pre += 32;
+
+ if (((short *)(eobs))[2] & 0xfefe)
+ vp9_idct_dequant_full_2x_sse2(q, dq, pre, dstv, stride, 8);
+ else
+ vp9_idct_dequant_0_2x_sse2(q, dq, pre, dstv, stride, 8);
+
+ q += 32;
+ pre += 32;
+ dstv += stride * 4;
+
+ if (((short *)(eobs))[3] & 0xfefe)
+ vp9_idct_dequant_full_2x_sse2(q, dq, pre, dstv, stride, 8);
+ else
+ vp9_idct_dequant_0_2x_sse2(q, dq, pre, dstv, stride, 8);
+}
diff --git a/vp9/decoder/x86/vp9_idct_mmx.h b/vp9/decoder/x86/vp9_idct_mmx.h
new file mode 100644
index 0000000..c0e9bfd
--- /dev/null
+++ b/vp9/decoder/x86/vp9_idct_mmx.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_DECODER_X86_VP9_IDCT_MMX_H_
+#define VP9_DECODER_X86_VP9_IDCT_MMX_H_
+
+
+void vp9_dequant_dc_idct_add_mmx(short *input, const short *dq,
+ unsigned char *pred, unsigned char *dest,
+ int pitch, int stride, int Dc);
+
+void vp9_dc_only_idct_add_mmx(short input_dc, const unsigned char *pred_ptr,
+ unsigned char *dst_ptr, int pitch, int stride);
+
+void vp9_dequant_idct_add_mmx(short *input, const short *dq, unsigned char *pred,
+ unsigned char *dest, int pitch, int stride);
+
+#endif /* VP9_DECODER_X86_VP9_IDCT_MMX_H_ */
diff --git a/vp9/decoder/x86/vp9_x86_dsystemdependent.c b/vp9/decoder/x86/vp9_x86_dsystemdependent.c
new file mode 100644
index 0000000..51ee8ec
--- /dev/null
+++ b/vp9/decoder/x86/vp9_x86_dsystemdependent.c
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_config.h"
+#include "vpx_ports/x86.h"
+#include "vp9/decoder/vp9_onyxd_int.h"
+
+#if HAVE_MMX
+void vp9_dequantize_b_impl_mmx(short *sq, short *dq, short *q);
+
+void vp9_dequantize_b_mmx(BLOCKD *d) {
+ short *sq = (short *) d->qcoeff;
+ short *dq = (short *) d->dqcoeff;
+ short *q = (short *) d->dequant;
+ vp9_dequantize_b_impl_mmx(sq, dq, q);
+}
+#endif
+
+
diff --git a/vp9/encoder/ppc/vp9_csystemdependent.c b/vp9/encoder/ppc/vp9_csystemdependent.c
new file mode 100644
index 0000000..cc67625
--- /dev/null
+++ b/vp9/encoder/ppc/vp9_csystemdependent.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/encoder/vp9_variance.h"
+#include "vp9/encoder/vp9_onyx_int.h"
+
+SADFunction *vp9_sad16x16;
+SADFunction *vp9_sad16x8;
+SADFunction *vp9_sad8x16;
+SADFunction *vp9_sad8x8;
+SADFunction *vp9_sad4x4;
+
+variance_function *vp9_variance4x4;
+variance_function *vp9_variance8x8;
+variance_function *vp9_variance8x16;
+variance_function *vp9_variance16x8;
+variance_function *vp9_variance16x16;
+
+variance_function *vp9_mse16x16;
+
+sub_pixel_variance_function *vp9_sub_pixel_variance4x4;
+sub_pixel_variance_function *vp9_sub_pixel_variance8x8;
+sub_pixel_variance_function *vp9_sub_pixel_variance8x16;
+sub_pixel_variance_function *vp9_sub_pixel_variance16x8;
+sub_pixel_variance_function *vp9_sub_pixel_variance16x16;
+
+int (*vp9_block_error)(short *coeff, short *dqcoeff);
+int (*vp9_mbblock_error)(MACROBLOCK *mb, int dc);
+
+int (*vp9_mbuverror)(MACROBLOCK *mb);
+unsigned int (*vp9_get_mb_ss)(short *);
+void (*vp9_short_fdct4x4)(short *input, short *output, int pitch);
+void (*vp9_short_fdct8x4)(short *input, short *output, int pitch);
+void (*vp8_fast_fdct4x4)(short *input, short *output, int pitch);
+void (*vp8_fast_fdct8x4)(short *input, short *output, int pitch);
+void (*short_walsh4x4)(short *input, short *output, int pitch);
+
+void (*vp9_subtract_b)(BLOCK *be, BLOCKD *bd, int pitch);
+void (*vp9_subtract_mby)(short *diff, unsigned char *src, unsigned char *pred, int stride);
+void (*vp9_subtract_mbuv)(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride);
+void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d);
+
+// c imports
+extern int block_error_c(short *coeff, short *dqcoeff);
+extern int vp9_mbblock_error_c(MACROBLOCK *mb, int dc);
+
+extern int vp9_mbuverror_c(MACROBLOCK *mb);
+extern unsigned int vp8_get8x8var_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
+extern void short_fdct4x4_c(short *input, short *output, int pitch);
+extern void short_fdct8x4_c(short *input, short *output, int pitch);
+extern void vp9_short_walsh4x4_c(short *input, short *output, int pitch);
+
+extern void vp9_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch);
+extern void subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, int stride);
+extern void subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride);
+extern void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d);
+
+extern SADFunction sad16x16_c;
+extern SADFunction sad16x8_c;
+extern SADFunction sad8x16_c;
+extern SADFunction sad8x8_c;
+extern SADFunction sad4x4_c;
+
+extern variance_function variance16x16_c;
+extern variance_function variance8x16_c;
+extern variance_function variance16x8_c;
+extern variance_function variance8x8_c;
+extern variance_function variance4x4_c;
+extern variance_function mse16x16_c;
+
+extern sub_pixel_variance_function sub_pixel_variance4x4_c;
+extern sub_pixel_variance_function sub_pixel_variance8x8_c;
+extern sub_pixel_variance_function sub_pixel_variance8x16_c;
+extern sub_pixel_variance_function sub_pixel_variance16x8_c;
+extern sub_pixel_variance_function sub_pixel_variance16x16_c;
+
+extern unsigned int vp9_get_mb_ss_c(short *);
+
+// ppc
+extern int vp9_block_error_ppc(short *coeff, short *dqcoeff);
+
+extern void vp9_short_fdct4x4_ppc(short *input, short *output, int pitch);
+extern void vp9_short_fdct8x4_ppc(short *input, short *output, int pitch);
+
+extern void vp9_subtract_mby_ppc(short *diff, unsigned char *src, unsigned char *pred, int stride);
+extern void vp9_subtract_mbuv_ppc(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride);
+
+extern SADFunction vp9_sad16x16_ppc;
+extern SADFunction vp9_sad16x8_ppc;
+extern SADFunction vp9_sad8x16_ppc;
+extern SADFunction vp9_sad8x8_ppc;
+extern SADFunction vp9_sad4x4_ppc;
+
+extern variance_function vp9_variance16x16_ppc;
+extern variance_function vp9_variance8x16_ppc;
+extern variance_function vp9_variance16x8_ppc;
+extern variance_function vp9_variance8x8_ppc;
+extern variance_function vp9_variance4x4_ppc;
+extern variance_function vp9_mse16x16_ppc;
+
+extern sub_pixel_variance_function vp9_sub_pixel_variance4x4_ppc;
+extern sub_pixel_variance_function vp9_sub_pixel_variance8x8_ppc;
+extern sub_pixel_variance_function vp9_sub_pixel_variance8x16_ppc;
+extern sub_pixel_variance_function vp9_sub_pixel_variance16x8_ppc;
+extern sub_pixel_variance_function vp9_sub_pixel_variance16x16_ppc;
+
+extern unsigned int vp8_get8x8var_ppc(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
+extern unsigned int vp8_get16x16var_ppc(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
+
+void vp9_cmachine_specific_config(void) {
+ // Pure C:
+ vp9_mbuverror = vp9_mbuverror_c;
+ vp8_fast_quantize_b = vp8_fast_quantize_b_c;
+ vp9_short_fdct4x4 = vp9_short_fdct4x4_ppc;
+ vp9_short_fdct8x4 = vp9_short_fdct8x4_ppc;
+ vp8_fast_fdct4x4 = vp9_short_fdct4x4_ppc;
+ vp8_fast_fdct8x4 = vp9_short_fdct8x4_ppc;
+ short_walsh4x4 = vp9_short_walsh4x4_c;
+
+ vp9_variance4x4 = vp9_variance4x4_ppc;
+ vp9_variance8x8 = vp9_variance8x8_ppc;
+ vp9_variance8x16 = vp9_variance8x16_ppc;
+ vp9_variance16x8 = vp9_variance16x8_ppc;
+ vp9_variance16x16 = vp9_variance16x16_ppc;
+ vp9_mse16x16 = vp9_mse16x16_ppc;
+
+ vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_ppc;
+ vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_ppc;
+ vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_ppc;
+ vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_ppc;
+ vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_ppc;
+
+ vp9_get_mb_ss = vp9_get_mb_ss_c;
+
+ vp9_sad16x16 = vp9_sad16x16_ppc;
+ vp9_sad16x8 = vp9_sad16x8_ppc;
+ vp9_sad8x16 = vp9_sad8x16_ppc;
+ vp9_sad8x8 = vp9_sad8x8_ppc;
+ vp9_sad4x4 = vp9_sad4x4_ppc;
+
+ vp9_block_error = vp9_block_error_ppc;
+ vp9_mbblock_error = vp9_mbblock_error_c;
+
+ vp9_subtract_b = vp9_subtract_b_c;
+ vp9_subtract_mby = vp9_subtract_mby_ppc;
+ vp9_subtract_mbuv = vp9_subtract_mbuv_ppc;
+}
diff --git a/vp9/encoder/ppc/vp9_encodemb_altivec.asm b/vp9/encoder/ppc/vp9_encodemb_altivec.asm
new file mode 100644
index 0000000..6e0099d
--- /dev/null
+++ b/vp9/encoder/ppc/vp9_encodemb_altivec.asm
@@ -0,0 +1,153 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ .globl vp8_subtract_mbuv_ppc
+ .globl vp8_subtract_mby_ppc
+
+;# r3 short *diff
+;# r4 unsigned char *usrc
+;# r5 unsigned char *vsrc
+;# r6 unsigned char *pred
+;# r7 int stride
+vp8_subtract_mbuv_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xf000
+ mtspr 256, r12 ;# set VRSAVE
+
+ li r9, 256
+ add r3, r3, r9
+ add r3, r3, r9
+ add r6, r6, r9
+
+ li r10, 16
+ li r9, 4
+ mtctr r9
+
+ vspltisw v0, 0
+
+mbu_loop:
+ lvsl v5, 0, r4 ;# permutate value for alignment
+ lvx v1, 0, r4 ;# src
+ lvx v2, 0, r6 ;# pred
+
+ add r4, r4, r7
+ addi r6, r6, 16
+
+ vperm v1, v1, v0, v5
+
+ vmrghb v3, v0, v1 ;# unpack high src to short
+ vmrghb v4, v0, v2 ;# unpack high pred to short
+
+ lvsl v5, 0, r4 ;# permutate value for alignment
+ lvx v1, 0, r4 ;# src
+
+ add r4, r4, r7
+
+ vsubshs v3, v3, v4
+
+ stvx v3, 0, r3 ;# store out diff
+
+ vperm v1, v1, v0, v5
+
+ vmrghb v3, v0, v1 ;# unpack high src to short
+ vmrglb v4, v0, v2 ;# unpack high pred to short
+
+ vsubshs v3, v3, v4
+
+ stvx v3, r10, r3 ;# store out diff
+
+ addi r3, r3, 32
+
+ bdnz mbu_loop
+
+ mtctr r9
+
+mbv_loop:
+ lvsl v5, 0, r5 ;# permutate value for alignment
+ lvx v1, 0, r5 ;# src
+ lvx v2, 0, r6 ;# pred
+
+ add r5, r5, r7
+ addi r6, r6, 16
+
+ vperm v1, v1, v0, v5
+
+ vmrghb v3, v0, v1 ;# unpack high src to short
+ vmrghb v4, v0, v2 ;# unpack high pred to short
+
+ lvsl v5, 0, r5 ;# permutate value for alignment
+ lvx v1, 0, r5 ;# src
+
+ add r5, r5, r7
+
+ vsubshs v3, v3, v4
+
+ stvx v3, 0, r3 ;# store out diff
+
+ vperm v1, v1, v0, v5
+
+ vmrghb v3, v0, v1 ;# unpack high src to short
+ vmrglb v4, v0, v2 ;# unpack high pred to short
+
+ vsubshs v3, v3, v4
+
+ stvx v3, r10, r3 ;# store out diff
+
+ addi r3, r3, 32
+
+ bdnz mbv_loop
+
+ mtspr 256, r11 ;# reset old VRSAVE
+
+ blr
+
+;# r3 short *diff
+;# r4 unsigned char *src
+;# r5 unsigned char *pred
+;# r6 int stride
+vp8_subtract_mby_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xf800
+ mtspr 256, r12 ;# set VRSAVE
+
+ li r10, 16
+ mtctr r10
+
+ vspltisw v0, 0
+
+mby_loop:
+ lvx v1, 0, r4 ;# src
+ lvx v2, 0, r5 ;# pred
+
+ add r4, r4, r6
+ addi r5, r5, 16
+
+ vmrghb v3, v0, v1 ;# unpack high src to short
+ vmrghb v4, v0, v2 ;# unpack high pred to short
+
+ vsubshs v3, v3, v4
+
+ stvx v3, 0, r3 ;# store out diff
+
+ vmrglb v3, v0, v1 ;# unpack low src to short
+ vmrglb v4, v0, v2 ;# unpack low pred to short
+
+ vsubshs v3, v3, v4
+
+ stvx v3, r10, r3 ;# store out diff
+
+ addi r3, r3, 32
+
+ bdnz mby_loop
+
+ mtspr 256, r11 ;# reset old VRSAVE
+
+ blr
diff --git a/vp9/encoder/ppc/vp9_fdct_altivec.asm b/vp9/encoder/ppc/vp9_fdct_altivec.asm
new file mode 100644
index 0000000..935d0cb
--- /dev/null
+++ b/vp9/encoder/ppc/vp9_fdct_altivec.asm
@@ -0,0 +1,205 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ .globl vp8_short_fdct4x4_ppc
+ .globl vp8_short_fdct8x4_ppc
+
+.macro load_c V, LABEL, OFF, R0, R1
+ lis \R0, \LABEL@ha
+ la \R1, \LABEL@l(\R0)
+ lvx \V, \OFF, \R1
+.endm
+
+;# Forward and inverse DCTs are nearly identical; only differences are
+;# in normalization (fwd is twice unitary, inv is half unitary)
+;# and that they are of course transposes of each other.
+;#
+;# The following three accomplish most of implementation and
+;# are used only by ppc_idct.c and ppc_fdct.c.
+.macro prologue
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xfffc
+ mtspr 256, r12 ;# set VRSAVE
+
+ stwu r1,-32(r1) ;# create space on the stack
+
+ li r6, 16
+
+ load_c v0, dct_tab, 0, r9, r10
+ lvx v1, r6, r10
+ addi r10, r10, 32
+ lvx v2, 0, r10
+ lvx v3, r6, r10
+
+ load_c v4, ppc_dctperm_tab, 0, r9, r10
+ load_c v5, ppc_dctperm_tab, r6, r9, r10
+
+ load_c v6, round_tab, 0, r10, r9
+.endm
+
+.macro epilogue
+ addi r1, r1, 32 ;# recover stack
+
+ mtspr 256, r11 ;# reset old VRSAVE
+.endm
+
+;# Do horiz xf on two rows of coeffs v8 = a0 a1 a2 a3 b0 b1 b2 b3.
+;# a/A are the even rows 0,2 b/B are the odd rows 1,3
+;# For fwd transform, indices are horizontal positions, then frequencies.
+;# For inverse transform, frequencies then positions.
+;# The two resulting A0..A3 B0..B3 are later combined
+;# and vertically transformed.
+
+.macro two_rows_horiz Dst
+ vperm v9, v8, v8, v4 ;# v9 = a2 a3 a0 a1 b2 b3 b0 b1
+
+ vmsumshm v10, v0, v8, v6
+ vmsumshm v10, v1, v9, v10
+ vsraw v10, v10, v7 ;# v10 = A0 A1 B0 B1
+
+ vmsumshm v11, v2, v8, v6
+ vmsumshm v11, v3, v9, v11
+ vsraw v11, v11, v7 ;# v11 = A2 A3 B2 B3
+
+ vpkuwum v10, v10, v11 ;# v10 = A0 A1 B0 B1 A2 A3 B2 B3
+ vperm \Dst, v10, v10, v5 ;# Dest = A0 B0 A1 B1 A2 B2 A3 B3
+.endm
+
+;# Vertical xf on two rows. DCT values in comments are for inverse transform;
+;# forward transform uses transpose.
+
+.macro two_rows_vert Ceven, Codd
+ vspltw v8, \Ceven, 0 ;# v8 = c00 c10 or c02 c12 four times
+ vspltw v9, \Codd, 0 ;# v9 = c20 c30 or c22 c32 ""
+ vmsumshm v8, v8, v12, v6
+ vmsumshm v8, v9, v13, v8
+ vsraw v10, v8, v7
+
+ vspltw v8, \Codd, 1 ;# v8 = c01 c11 or c03 c13
+ vspltw v9, \Ceven, 1 ;# v9 = c21 c31 or c23 c33
+ vmsumshm v8, v8, v12, v6
+ vmsumshm v8, v9, v13, v8
+ vsraw v8, v8, v7
+
+ vpkuwum v8, v10, v8 ;# v8 = rows 0,1 or 2,3
+.endm
+
+.macro two_rows_h Dest
+ stw r0, 0(r8)
+ lwz r0, 4(r3)
+ stw r0, 4(r8)
+ lwzux r0, r3,r5
+ stw r0, 8(r8)
+ lwz r0, 4(r3)
+ stw r0, 12(r8)
+ lvx v8, 0,r8
+ two_rows_horiz \Dest
+.endm
+
+ .align 2
+;# r3 short *input
+;# r4 short *output
+;# r5 int pitch
+vp8_short_fdct4x4_ppc:
+
+ prologue
+
+ vspltisw v7, 14 ;# == 14, fits in 5 signed bits
+ addi r8, r1, 0
+
+
+ lwz r0, 0(r3)
+ two_rows_h v12 ;# v12 = H00 H10 H01 H11 H02 H12 H03 H13
+
+ lwzux r0, r3, r5
+ two_rows_h v13 ;# v13 = H20 H30 H21 H31 H22 H32 H23 H33
+
+ lvx v6, r6, r9 ;# v6 = Vround
+ vspltisw v7, -16 ;# == 16 == -16, only low 5 bits matter
+
+ two_rows_vert v0, v1
+ stvx v8, 0, r4
+ two_rows_vert v2, v3
+ stvx v8, r6, r4
+
+ epilogue
+
+ blr
+
+ .align 2
+;# r3 short *input
+;# r4 short *output
+;# r5 int pitch
+vp8_short_fdct8x4_ppc:
+ prologue
+
+ vspltisw v7, 14 ;# == 14, fits in 5 signed bits
+ addi r8, r1, 0
+ addi r10, r3, 0
+
+ lwz r0, 0(r3)
+ two_rows_h v12 ;# v12 = H00 H10 H01 H11 H02 H12 H03 H13
+
+ lwzux r0, r3, r5
+ two_rows_h v13 ;# v13 = H20 H30 H21 H31 H22 H32 H23 H33
+
+ lvx v6, r6, r9 ;# v6 = Vround
+ vspltisw v7, -16 ;# == 16 == -16, only low 5 bits matter
+
+ two_rows_vert v0, v1
+ stvx v8, 0, r4
+ two_rows_vert v2, v3
+ stvx v8, r6, r4
+
+ ;# Next block
+ addi r3, r10, 8
+ addi r4, r4, 32
+ lvx v6, 0, r9 ;# v6 = Hround
+
+ vspltisw v7, 14 ;# == 14, fits in 5 signed bits
+ addi r8, r1, 0
+
+ lwz r0, 0(r3)
+ two_rows_h v12 ;# v12 = H00 H10 H01 H11 H02 H12 H03 H13
+
+ lwzux r0, r3, r5
+ two_rows_h v13 ;# v13 = H20 H30 H21 H31 H22 H32 H23 H33
+
+ lvx v6, r6, r9 ;# v6 = Vround
+ vspltisw v7, -16 ;# == 16 == -16, only low 5 bits matter
+
+ two_rows_vert v0, v1
+ stvx v8, 0, r4
+ two_rows_vert v2, v3
+ stvx v8, r6, r4
+
+ epilogue
+
+ blr
+
+ .data
+ .align 4
+ppc_dctperm_tab:
+ .byte 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11
+ .byte 0,1,4,5, 2,3,6,7, 8,9,12,13, 10,11,14,15
+
+ .align 4
+dct_tab:
+ .short 23170, 23170,-12540,-30274, 23170, 23170,-12540,-30274
+ .short 23170, 23170, 30274, 12540, 23170, 23170, 30274, 12540
+
+ .short 23170,-23170, 30274,-12540, 23170,-23170, 30274,-12540
+ .short -23170, 23170, 12540,-30274,-23170, 23170, 12540,-30274
+
+ .align 4
+round_tab:
+ .long (1 << (14-1)), (1 << (14-1)), (1 << (14-1)), (1 << (14-1))
+ .long (1 << (16-1)), (1 << (16-1)), (1 << (16-1)), (1 << (16-1))
diff --git a/vp9/encoder/ppc/vp9_rdopt_altivec.asm b/vp9/encoder/ppc/vp9_rdopt_altivec.asm
new file mode 100644
index 0000000..ba48230
--- /dev/null
+++ b/vp9/encoder/ppc/vp9_rdopt_altivec.asm
@@ -0,0 +1,51 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ .globl vp8_block_error_ppc
+
+ .align 2
+;# r3 short *Coeff
+;# r4 short *dqcoeff
+vp8_block_error_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xf800
+ mtspr 256, r12 ;# set VRSAVE
+
+ stwu r1,-32(r1) ;# create space on the stack
+
+ stw r5, 12(r1) ;# tranfer dc to vector register
+
+ lvx v0, 0, r3 ;# Coeff
+ lvx v1, 0, r4 ;# dqcoeff
+
+ li r10, 16
+
+ vspltisw v3, 0
+
+ vsubshs v0, v0, v1
+
+ vmsumshm v2, v0, v0, v3 ;# multiply differences
+
+ lvx v0, r10, r3 ;# Coeff
+ lvx v1, r10, r4 ;# dqcoeff
+
+ vsubshs v0, v0, v1
+
+ vmsumshm v1, v0, v0, v2 ;# multiply differences
+ vsumsws v1, v1, v3 ;# sum up
+
+ stvx v1, 0, r1
+ lwz r3, 12(r1) ;# return value
+
+ addi r1, r1, 32 ;# recover stack
+ mtspr 256, r11 ;# reset old VRSAVE
+
+ blr
diff --git a/vp9/encoder/ppc/vp9_sad_altivec.asm b/vp9/encoder/ppc/vp9_sad_altivec.asm
new file mode 100644
index 0000000..e5f2638
--- /dev/null
+++ b/vp9/encoder/ppc/vp9_sad_altivec.asm
@@ -0,0 +1,277 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ .globl vp8_sad16x16_ppc
+ .globl vp8_sad16x8_ppc
+ .globl vp8_sad8x16_ppc
+ .globl vp8_sad8x8_ppc
+ .globl vp8_sad4x4_ppc
+
+.macro load_aligned_16 V R O
+ lvsl v3, 0, \R ;# permutate value for alignment
+
+ lvx v1, 0, \R
+ lvx v2, \O, \R
+
+ vperm \V, v1, v2, v3
+.endm
+
+.macro prologue
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xffc0
+ mtspr 256, r12 ;# set VRSAVE
+
+ stwu r1, -32(r1) ;# create space on the stack
+
+ li r10, 16 ;# load offset and loop counter
+
+ vspltisw v8, 0 ;# zero out total to start
+.endm
+
+.macro epilogue
+ addi r1, r1, 32 ;# recover stack
+
+ mtspr 256, r11 ;# reset old VRSAVE
+.endm
+
+.macro SAD_16
+ ;# v6 = abs (v4 - v5)
+ vsububs v6, v4, v5
+ vsububs v7, v5, v4
+ vor v6, v6, v7
+
+ ;# v8 += abs (v4 - v5)
+ vsum4ubs v8, v6, v8
+.endm
+
+.macro sad_16_loop loop_label
+ lvsl v3, 0, r5 ;# only needs to be done once per block
+
+ ;# preload a line of data before getting into the loop
+ lvx v4, 0, r3
+ lvx v1, 0, r5
+ lvx v2, r10, r5
+
+ add r5, r5, r6
+ add r3, r3, r4
+
+ vperm v5, v1, v2, v3
+
+ .align 4
+\loop_label:
+ ;# compute difference on first row
+ vsububs v6, v4, v5
+ vsububs v7, v5, v4
+
+ ;# load up next set of data
+ lvx v9, 0, r3
+ lvx v1, 0, r5
+ lvx v2, r10, r5
+
+ ;# perform abs() of difference
+ vor v6, v6, v7
+ add r3, r3, r4
+
+ ;# add to the running tally
+ vsum4ubs v8, v6, v8
+
+ ;# now onto the next line
+ vperm v5, v1, v2, v3
+ add r5, r5, r6
+ lvx v4, 0, r3
+
+ ;# compute difference on second row
+ vsububs v6, v9, v5
+ lvx v1, 0, r5
+ vsububs v7, v5, v9
+ lvx v2, r10, r5
+ vor v6, v6, v7
+ add r3, r3, r4
+ vsum4ubs v8, v6, v8
+ vperm v5, v1, v2, v3
+ add r5, r5, r6
+
+ bdnz \loop_label
+
+ vspltisw v7, 0
+
+ vsumsws v8, v8, v7
+
+ stvx v8, 0, r1
+ lwz r3, 12(r1)
+.endm
+
+.macro sad_8_loop loop_label
+ .align 4
+\loop_label:
+ ;# only one of the inputs should need to be aligned.
+ load_aligned_16 v4, r3, r10
+ load_aligned_16 v5, r5, r10
+
+ ;# move onto the next line
+ add r3, r3, r4
+ add r5, r5, r6
+
+ ;# only one of the inputs should need to be aligned.
+ load_aligned_16 v6, r3, r10
+ load_aligned_16 v7, r5, r10
+
+ ;# move onto the next line
+ add r3, r3, r4
+ add r5, r5, r6
+
+ vmrghb v4, v4, v6
+ vmrghb v5, v5, v7
+
+ SAD_16
+
+ bdnz \loop_label
+
+ vspltisw v7, 0
+
+ vsumsws v8, v8, v7
+
+ stvx v8, 0, r1
+ lwz r3, 12(r1)
+.endm
+
+ .align 2
+;# r3 unsigned char *src_ptr
+;# r4 int src_stride
+;# r5 unsigned char *ref_ptr
+;# r6 int ref_stride
+;#
+;# r3 return value
+vp8_sad16x16_ppc:
+
+ prologue
+
+ li r9, 8
+ mtctr r9
+
+ sad_16_loop sad16x16_loop
+
+ epilogue
+
+ blr
+
+ .align 2
+;# r3 unsigned char *src_ptr
+;# r4 int src_stride
+;# r5 unsigned char *ref_ptr
+;# r6 int ref_stride
+;#
+;# r3 return value
+vp8_sad16x8_ppc:
+
+ prologue
+
+ li r9, 4
+ mtctr r9
+
+ sad_16_loop sad16x8_loop
+
+ epilogue
+
+ blr
+
+ .align 2
+;# r3 unsigned char *src_ptr
+;# r4 int src_stride
+;# r5 unsigned char *ref_ptr
+;# r6 int ref_stride
+;#
+;# r3 return value
+vp8_sad8x16_ppc:
+
+ prologue
+
+ li r9, 8
+ mtctr r9
+
+ sad_8_loop sad8x16_loop
+
+ epilogue
+
+ blr
+
+ .align 2
+;# r3 unsigned char *src_ptr
+;# r4 int src_stride
+;# r5 unsigned char *ref_ptr
+;# r6 int ref_stride
+;#
+;# r3 return value
+vp8_sad8x8_ppc:
+
+ prologue
+
+ li r9, 4
+ mtctr r9
+
+ sad_8_loop sad8x8_loop
+
+ epilogue
+
+ blr
+
+.macro transfer_4x4 I P
+ lwz r0, 0(\I)
+ add \I, \I, \P
+
+ lwz r7, 0(\I)
+ add \I, \I, \P
+
+ lwz r8, 0(\I)
+ add \I, \I, \P
+
+ lwz r9, 0(\I)
+
+ stw r0, 0(r1)
+ stw r7, 4(r1)
+ stw r8, 8(r1)
+ stw r9, 12(r1)
+.endm
+
+ .align 2
+;# r3 unsigned char *src_ptr
+;# r4 int src_stride
+;# r5 unsigned char *ref_ptr
+;# r6 int ref_stride
+;#
+;# r3 return value
+vp8_sad4x4_ppc:
+
+ prologue
+
+ transfer_4x4 r3, r4
+ lvx v4, 0, r1
+
+ transfer_4x4 r5, r6
+ lvx v5, 0, r1
+
+ vspltisw v8, 0 ;# zero out total to start
+
+ ;# v6 = abs (v4 - v5)
+ vsububs v6, v4, v5
+ vsububs v7, v5, v4
+ vor v6, v6, v7
+
+ ;# v8 += abs (v4 - v5)
+ vsum4ubs v7, v6, v8
+ vsumsws v7, v7, v8
+
+ stvx v7, 0, r1
+ lwz r3, 12(r1)
+
+ epilogue
+
+ blr
diff --git a/vp9/encoder/ppc/vp9_variance_altivec.asm b/vp9/encoder/ppc/vp9_variance_altivec.asm
new file mode 100644
index 0000000..ad26641
--- /dev/null
+++ b/vp9/encoder/ppc/vp9_variance_altivec.asm
@@ -0,0 +1,375 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ .globl vp8_get8x8var_ppc
+ .globl vp8_get16x16var_ppc
+ .globl vp8_mse16x16_ppc
+ .globl vp9_variance16x16_ppc
+ .globl vp9_variance16x8_ppc
+ .globl vp9_variance8x16_ppc
+ .globl vp9_variance8x8_ppc
+ .globl vp9_variance4x4_ppc
+
+.macro load_aligned_16 V R O
+ lvsl v3, 0, \R ;# permutate value for alignment
+
+ lvx v1, 0, \R
+ lvx v2, \O, \R
+
+ vperm \V, v1, v2, v3
+.endm
+
+.macro prologue
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xffc0
+ mtspr 256, r12 ;# set VRSAVE
+
+ stwu r1, -32(r1) ;# create space on the stack
+
+ li r10, 16 ;# load offset and loop counter
+
+ vspltisw v7, 0 ;# zero for merging
+ vspltisw v8, 0 ;# zero out total to start
+ vspltisw v9, 0 ;# zero out total for dif^2
+.endm
+
+.macro epilogue
+ addi r1, r1, 32 ;# recover stack
+
+ mtspr 256, r11 ;# reset old VRSAVE
+.endm
+
+.macro compute_sum_sse
+ ;# Compute sum first. Unpack to so signed subract
+ ;# can be used. Only have a half word signed
+ ;# subract. Do high, then low.
+ vmrghb v2, v7, v4
+ vmrghb v3, v7, v5
+ vsubshs v2, v2, v3
+ vsum4shs v8, v2, v8
+
+ vmrglb v2, v7, v4
+ vmrglb v3, v7, v5
+ vsubshs v2, v2, v3
+ vsum4shs v8, v2, v8
+
+ ;# Now compute sse.
+ vsububs v2, v4, v5
+ vsububs v3, v5, v4
+ vor v2, v2, v3
+
+ vmsumubm v9, v2, v2, v9
+.endm
+
+.macro variance_16 DS loop_label store_sum
+\loop_label:
+ ;# only one of the inputs should need to be aligned.
+ load_aligned_16 v4, r3, r10
+ load_aligned_16 v5, r5, r10
+
+ ;# move onto the next line
+ add r3, r3, r4
+ add r5, r5, r6
+
+ compute_sum_sse
+
+ bdnz \loop_label
+
+ vsumsws v8, v8, v7
+ vsumsws v9, v9, v7
+
+ stvx v8, 0, r1
+ lwz r3, 12(r1)
+
+ stvx v9, 0, r1
+ lwz r4, 12(r1)
+
+.if \store_sum
+ stw r3, 0(r8) ;# sum
+.endif
+ stw r4, 0(r7) ;# sse
+
+ mullw r3, r3, r3 ;# sum*sum
+ srawi r3, r3, \DS ;# (sum*sum) >> DS
+ subf r3, r3, r4 ;# sse - ((sum*sum) >> DS)
+.endm
+
+.macro variance_8 DS loop_label store_sum
+\loop_label:
+ ;# only one of the inputs should need to be aligned.
+ load_aligned_16 v4, r3, r10
+ load_aligned_16 v5, r5, r10
+
+ ;# move onto the next line
+ add r3, r3, r4
+ add r5, r5, r6
+
+ ;# only one of the inputs should need to be aligned.
+ load_aligned_16 v6, r3, r10
+ load_aligned_16 v0, r5, r10
+
+ ;# move onto the next line
+ add r3, r3, r4
+ add r5, r5, r6
+
+ vmrghb v4, v4, v6
+ vmrghb v5, v5, v0
+
+ compute_sum_sse
+
+ bdnz \loop_label
+
+ vsumsws v8, v8, v7
+ vsumsws v9, v9, v7
+
+ stvx v8, 0, r1
+ lwz r3, 12(r1)
+
+ stvx v9, 0, r1
+ lwz r4, 12(r1)
+
+.if \store_sum
+ stw r3, 0(r8) ;# sum
+.endif
+ stw r4, 0(r7) ;# sse
+
+ mullw r3, r3, r3 ;# sum*sum
+ srawi r3, r3, \DS ;# (sum*sum) >> 8
+ subf r3, r3, r4 ;# sse - ((sum*sum) >> 8)
+.endm
+
+ .align 2
+;# r3 unsigned char *src_ptr
+;# r4 int source_stride
+;# r5 unsigned char *ref_ptr
+;# r6 int recon_stride
+;# r7 unsigned int *SSE
+;# r8 int *Sum
+;#
+;# r3 return value
+vp8_get8x8var_ppc:
+
+ prologue
+
+ li r9, 4
+ mtctr r9
+
+ variance_8 6, get8x8var_loop, 1
+
+ epilogue
+
+ blr
+
+ .align 2
+;# r3 unsigned char *src_ptr
+;# r4 int source_stride
+;# r5 unsigned char *ref_ptr
+;# r6 int recon_stride
+;# r7 unsigned int *SSE
+;# r8 int *Sum
+;#
+;# r3 return value
+vp8_get16x16var_ppc:
+
+ prologue
+
+ mtctr r10
+
+ variance_16 8, get16x16var_loop, 1
+
+ epilogue
+
+ blr
+
+ .align 2
+;# r3 unsigned char *src_ptr
+;# r4 int source_stride
+;# r5 unsigned char *ref_ptr
+;# r6 int recon_stride
+;# r7 unsigned int *sse
+;#
+;# r 3 return value
+vp8_mse16x16_ppc:
+ prologue
+
+ mtctr r10
+
+mse16x16_loop:
+ ;# only one of the inputs should need to be aligned.
+ load_aligned_16 v4, r3, r10
+ load_aligned_16 v5, r5, r10
+
+ ;# move onto the next line
+ add r3, r3, r4
+ add r5, r5, r6
+
+ ;# Now compute sse.
+ vsububs v2, v4, v5
+ vsububs v3, v5, v4
+ vor v2, v2, v3
+
+ vmsumubm v9, v2, v2, v9
+
+ bdnz mse16x16_loop
+
+ vsumsws v9, v9, v7
+
+ stvx v9, 0, r1
+ lwz r3, 12(r1)
+
+ stvx v9, 0, r1
+ lwz r3, 12(r1)
+
+ stw r3, 0(r7) ;# sse
+
+ epilogue
+
+ blr
+
+ .align 2
+;# r3 unsigned char *src_ptr
+;# r4 int source_stride
+;# r5 unsigned char *ref_ptr
+;# r6 int recon_stride
+;# r7 unsigned int *sse
+;#
+;# r3 return value
+vp9_variance16x16_ppc:
+
+ prologue
+
+ mtctr r10
+
+ variance_16 8, variance16x16_loop, 0
+
+ epilogue
+
+ blr
+
+ .align 2
+;# r3 unsigned char *src_ptr
+;# r4 int source_stride
+;# r5 unsigned char *ref_ptr
+;# r6 int recon_stride
+;# r7 unsigned int *sse
+;#
+;# r3 return value
+vp9_variance16x8_ppc:
+
+ prologue
+
+ li r9, 8
+ mtctr r9
+
+ variance_16 7, variance16x8_loop, 0
+
+ epilogue
+
+ blr
+
+ .align 2
+;# r3 unsigned char *src_ptr
+;# r4 int source_stride
+;# r5 unsigned char *ref_ptr
+;# r6 int recon_stride
+;# r7 unsigned int *sse
+;#
+;# r3 return value
+vp9_variance8x16_ppc:
+
+ prologue
+
+ li r9, 8
+ mtctr r9
+
+ variance_8 7, variance8x16_loop, 0
+
+ epilogue
+
+ blr
+
+ .align 2
+;# r3 unsigned char *src_ptr
+;# r4 int source_stride
+;# r5 unsigned char *ref_ptr
+;# r6 int recon_stride
+;# r7 unsigned int *sse
+;#
+;# r3 return value
+vp9_variance8x8_ppc:
+
+ prologue
+
+ li r9, 4
+ mtctr r9
+
+ variance_8 6, variance8x8_loop, 0
+
+ epilogue
+
+ blr
+
+.macro transfer_4x4 I P
+ lwz r0, 0(\I)
+ add \I, \I, \P
+
+ lwz r10,0(\I)
+ add \I, \I, \P
+
+ lwz r8, 0(\I)
+ add \I, \I, \P
+
+ lwz r9, 0(\I)
+
+ stw r0, 0(r1)
+ stw r10, 4(r1)
+ stw r8, 8(r1)
+ stw r9, 12(r1)
+.endm
+
+ .align 2
+;# r3 unsigned char *src_ptr
+;# r4 int source_stride
+;# r5 unsigned char *ref_ptr
+;# r6 int recon_stride
+;# r7 unsigned int *sse
+;#
+;# r3 return value
+vp9_variance4x4_ppc:
+
+ prologue
+
+ transfer_4x4 r3, r4
+ lvx v4, 0, r1
+
+ transfer_4x4 r5, r6
+ lvx v5, 0, r1
+
+ compute_sum_sse
+
+ vsumsws v8, v8, v7
+ vsumsws v9, v9, v7
+
+ stvx v8, 0, r1
+ lwz r3, 12(r1)
+
+ stvx v9, 0, r1
+ lwz r4, 12(r1)
+
+ stw r4, 0(r7) ;# sse
+
+ mullw r3, r3, r3 ;# sum*sum
+ srawi r3, r3, 4 ;# (sum*sum) >> 4
+ subf r3, r3, r4 ;# sse - ((sum*sum) >> 4)
+
+ epilogue
+
+ blr
diff --git a/vp9/encoder/ppc/vp9_variance_subpixel_altivec.asm b/vp9/encoder/ppc/vp9_variance_subpixel_altivec.asm
new file mode 100644
index 0000000..26cc76f
--- /dev/null
+++ b/vp9/encoder/ppc/vp9_variance_subpixel_altivec.asm
@@ -0,0 +1,865 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ .globl vp9_sub_pixel_variance4x4_ppc
+ .globl vp9_sub_pixel_variance8x8_ppc
+ .globl vp9_sub_pixel_variance8x16_ppc
+ .globl vp9_sub_pixel_variance16x8_ppc
+ .globl vp9_sub_pixel_variance16x16_ppc
+
+.macro load_c V, LABEL, OFF, R0, R1
+ lis \R0, \LABEL@ha
+ la \R1, \LABEL@l(\R0)
+ lvx \V, \OFF, \R1
+.endm
+
+.macro load_vfilter V0, V1
+ load_c \V0, vfilter_b, r6, r12, r10
+
+ addi r6, r6, 16
+ lvx \V1, r6, r10
+.endm
+
+.macro HProlog jump_label
+ ;# load up horizontal filter
+ slwi. r5, r5, 4 ;# index into horizontal filter array
+
+ ;# index to the next set of vectors in the row.
+ li r10, 16
+
+ ;# downshift by 7 ( divide by 128 ) at the end
+ vspltish v19, 7
+
+ ;# If there isn't any filtering to be done for the horizontal, then
+ ;# just skip to the second pass.
+ beq \jump_label
+
+ load_c v20, hfilter_b, r5, r12, r0
+
+ ;# setup constants
+ ;# v14 permutation value for alignment
+ load_c v28, b_hperm_b, 0, r12, r0
+
+ ;# index to the next set of vectors in the row.
+ li r12, 32
+
+ ;# rounding added in on the multiply
+ vspltisw v21, 8
+ vspltisw v18, 3
+ vslw v18, v21, v18 ;# 0x00000040000000400000004000000040
+
+ slwi. r6, r6, 5 ;# index into vertical filter array
+.endm
+
+;# Filters a horizontal line
+;# expects:
+;# r3 src_ptr
+;# r4 pitch
+;# r10 16
+;# r12 32
+;# v17 perm intput
+;# v18 rounding
+;# v19 shift
+;# v20 filter taps
+;# v21 tmp
+;# v22 tmp
+;# v23 tmp
+;# v24 tmp
+;# v25 tmp
+;# v26 tmp
+;# v27 tmp
+;# v28 perm output
+;#
+
+.macro hfilter_8 V, hp, lp, increment_counter
+ lvsl v17, 0, r3 ;# permutate value for alignment
+
+ ;# input to filter is 9 bytes wide, output is 8 bytes.
+ lvx v21, 0, r3
+ lvx v22, r10, r3
+
+.if \increment_counter
+ add r3, r3, r4
+.endif
+ vperm v21, v21, v22, v17
+
+ vperm v24, v21, v21, \hp ;# v20 = 0123 1234 2345 3456
+ vperm v25, v21, v21, \lp ;# v21 = 4567 5678 6789 789A
+
+ vmsummbm v24, v20, v24, v18
+ vmsummbm v25, v20, v25, v18
+
+ vpkswus v24, v24, v25 ;# v24 = 0 4 8 C 1 5 9 D (16-bit)
+
+ vsrh v24, v24, v19 ;# divide v0, v1 by 128
+
+ vpkuhus \V, v24, v24 ;# \V = scrambled 8-bit result
+.endm
+
+.macro vfilter_16 P0 P1
+ vmuleub v22, \P0, v20 ;# 64 + 4 positive taps
+ vadduhm v22, v18, v22
+ vmuloub v23, \P0, v20
+ vadduhm v23, v18, v23
+
+ vmuleub v24, \P1, v21
+ vadduhm v22, v22, v24 ;# Re = evens, saturation unnecessary
+ vmuloub v25, \P1, v21
+ vadduhm v23, v23, v25 ;# Ro = odds
+
+ vsrh v22, v22, v19 ;# divide by 128
+ vsrh v23, v23, v19 ;# v16 v17 = evens, odds
+ vmrghh \P0, v22, v23 ;# v18 v19 = 16-bit result in order
+ vmrglh v23, v22, v23
+ vpkuhus \P0, \P0, v23 ;# P0 = 8-bit result
+.endm
+
+.macro compute_sum_sse src, ref, sum, sse, t1, t2, z0
+ ;# Compute sum first. Unpack to so signed subract
+ ;# can be used. Only have a half word signed
+ ;# subract. Do high, then low.
+ vmrghb \t1, \z0, \src
+ vmrghb \t2, \z0, \ref
+ vsubshs \t1, \t1, \t2
+ vsum4shs \sum, \t1, \sum
+
+ vmrglb \t1, \z0, \src
+ vmrglb \t2, \z0, \ref
+ vsubshs \t1, \t1, \t2
+ vsum4shs \sum, \t1, \sum
+
+ ;# Now compute sse.
+ vsububs \t1, \src, \ref
+ vsububs \t2, \ref, \src
+ vor \t1, \t1, \t2
+
+ vmsumubm \sse, \t1, \t1, \sse
+.endm
+
+.macro variance_final sum, sse, z0, DS
+ vsumsws \sum, \sum, \z0
+ vsumsws \sse, \sse, \z0
+
+ stvx \sum, 0, r1
+ lwz r3, 12(r1)
+
+ stvx \sse, 0, r1
+ lwz r4, 12(r1)
+
+ stw r4, 0(r9) ;# sse
+
+ mullw r3, r3, r3 ;# sum*sum
+ srawi r3, r3, \DS ;# (sum*sum) >> 8
+ subf r3, r3, r4 ;# sse - ((sum*sum) >> 8)
+.endm
+
+.macro compute_sum_sse_16 V, increment_counter
+ load_and_align_16 v16, r7, r8, \increment_counter
+ compute_sum_sse \V, v16, v18, v19, v20, v21, v23
+.endm
+
+.macro load_and_align_16 V, R, P, increment_counter
+ lvsl v17, 0, \R ;# permutate value for alignment
+
+ ;# input to filter is 21 bytes wide, output is 16 bytes.
+ ;# input will can span three vectors if not aligned correctly.
+ lvx v21, 0, \R
+ lvx v22, r10, \R
+
+.if \increment_counter
+ add \R, \R, \P
+.endif
+
+ vperm \V, v21, v22, v17
+.endm
+
+ .align 2
+;# r3 unsigned char *src_ptr
+;# r4 int src_pixels_per_line
+;# r5 int xoffset
+;# r6 int yoffset
+;# r7 unsigned char *dst_ptr
+;# r8 int dst_pixels_per_line
+;# r9 unsigned int *sse
+;#
+;# r3 return value
+vp9_sub_pixel_variance4x4_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xf830
+ ori r12, r12, 0xfff8
+ mtspr 256, r12 ;# set VRSAVE
+
+ stwu r1,-32(r1) ;# create space on the stack
+
+ HProlog second_pass_4x4_pre_copy_b
+
+ ;# Load up permutation constants
+ load_c v10, b_0123_b, 0, r12, r0
+ load_c v11, b_4567_b, 0, r12, r0
+
+ hfilter_8 v0, v10, v11, 1
+ hfilter_8 v1, v10, v11, 1
+ hfilter_8 v2, v10, v11, 1
+ hfilter_8 v3, v10, v11, 1
+
+ ;# Finished filtering main horizontal block. If there is no
+ ;# vertical filtering, jump to storing the data. Otherwise
+ ;# load up and filter the additional line that is needed
+ ;# for the vertical filter.
+ beq compute_sum_sse_4x4_b
+
+ hfilter_8 v4, v10, v11, 0
+
+ b second_pass_4x4_b
+
+second_pass_4x4_pre_copy_b:
+ slwi r6, r6, 5 ;# index into vertical filter array
+
+ load_and_align_16 v0, r3, r4, 1
+ load_and_align_16 v1, r3, r4, 1
+ load_and_align_16 v2, r3, r4, 1
+ load_and_align_16 v3, r3, r4, 1
+ load_and_align_16 v4, r3, r4, 0
+
+second_pass_4x4_b:
+ vspltish v20, 8
+ vspltish v18, 3
+ vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
+
+ load_vfilter v20, v21
+
+ vfilter_16 v0, v1
+ vfilter_16 v1, v2
+ vfilter_16 v2, v3
+ vfilter_16 v3, v4
+
+compute_sum_sse_4x4_b:
+ vspltish v18, 0 ;# sum
+ vspltish v19, 0 ;# sse
+ vspltish v23, 0 ;# unpack
+ li r10, 16
+
+ load_and_align_16 v4, r7, r8, 1
+ load_and_align_16 v5, r7, r8, 1
+ load_and_align_16 v6, r7, r8, 1
+ load_and_align_16 v7, r7, r8, 1
+
+ vmrghb v0, v0, v1
+ vmrghb v1, v2, v3
+
+ vmrghb v2, v4, v5
+ vmrghb v3, v6, v7
+
+ load_c v10, b_hilo_b, 0, r12, r0
+
+ vperm v0, v0, v1, v10
+ vperm v1, v2, v3, v10
+
+ compute_sum_sse v0, v1, v18, v19, v20, v21, v23
+
+ variance_final v18, v19, v23, 4
+
+ addi r1, r1, 32 ;# recover stack
+ mtspr 256, r11 ;# reset old VRSAVE
+
+ blr
+
+ .align 2
+;# r3 unsigned char *src_ptr
+;# r4 int src_pixels_per_line
+;# r5 int xoffset
+;# r6 int yoffset
+;# r7 unsigned char *dst_ptr
+;# r8 int dst_pixels_per_line
+;# r9 unsigned int *sse
+;#
+;# r3 return value
+vp9_sub_pixel_variance8x8_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xfff0
+ ori r12, r12, 0xffff
+ mtspr 256, r12 ;# set VRSAVE
+
+ stwu r1,-32(r1) ;# create space on the stack
+
+ HProlog second_pass_8x8_pre_copy_b
+
+ ;# Load up permutation constants
+ load_c v10, b_0123_b, 0, r12, r0
+ load_c v11, b_4567_b, 0, r12, r0
+
+ hfilter_8 v0, v10, v11, 1
+ hfilter_8 v1, v10, v11, 1
+ hfilter_8 v2, v10, v11, 1
+ hfilter_8 v3, v10, v11, 1
+ hfilter_8 v4, v10, v11, 1
+ hfilter_8 v5, v10, v11, 1
+ hfilter_8 v6, v10, v11, 1
+ hfilter_8 v7, v10, v11, 1
+
+ ;# Finished filtering main horizontal block. If there is no
+ ;# vertical filtering, jump to storing the data. Otherwise
+ ;# load up and filter the additional line that is needed
+ ;# for the vertical filter.
+ beq compute_sum_sse_8x8_b
+
+ hfilter_8 v8, v10, v11, 0
+
+ b second_pass_8x8_b
+
+second_pass_8x8_pre_copy_b:
+ slwi. r6, r6, 5 ;# index into vertical filter array
+
+ load_and_align_16 v0, r3, r4, 1
+ load_and_align_16 v1, r3, r4, 1
+ load_and_align_16 v2, r3, r4, 1
+ load_and_align_16 v3, r3, r4, 1
+ load_and_align_16 v4, r3, r4, 1
+ load_and_align_16 v5, r3, r4, 1
+ load_and_align_16 v6, r3, r4, 1
+ load_and_align_16 v7, r3, r4, 1
+ load_and_align_16 v8, r3, r4, 0
+
+ beq compute_sum_sse_8x8_b
+
+second_pass_8x8_b:
+ vspltish v20, 8
+ vspltish v18, 3
+ vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
+
+ load_vfilter v20, v21
+
+ vfilter_16 v0, v1
+ vfilter_16 v1, v2
+ vfilter_16 v2, v3
+ vfilter_16 v3, v4
+ vfilter_16 v4, v5
+ vfilter_16 v5, v6
+ vfilter_16 v6, v7
+ vfilter_16 v7, v8
+
+compute_sum_sse_8x8_b:
+ vspltish v18, 0 ;# sum
+ vspltish v19, 0 ;# sse
+ vspltish v23, 0 ;# unpack
+ li r10, 16
+
+ vmrghb v0, v0, v1
+ vmrghb v1, v2, v3
+ vmrghb v2, v4, v5
+ vmrghb v3, v6, v7
+
+ load_and_align_16 v4, r7, r8, 1
+ load_and_align_16 v5, r7, r8, 1
+ load_and_align_16 v6, r7, r8, 1
+ load_and_align_16 v7, r7, r8, 1
+ load_and_align_16 v8, r7, r8, 1
+ load_and_align_16 v9, r7, r8, 1
+ load_and_align_16 v10, r7, r8, 1
+ load_and_align_16 v11, r7, r8, 0
+
+ vmrghb v4, v4, v5
+ vmrghb v5, v6, v7
+ vmrghb v6, v8, v9
+ vmrghb v7, v10, v11
+
+ compute_sum_sse v0, v4, v18, v19, v20, v21, v23
+ compute_sum_sse v1, v5, v18, v19, v20, v21, v23
+ compute_sum_sse v2, v6, v18, v19, v20, v21, v23
+ compute_sum_sse v3, v7, v18, v19, v20, v21, v23
+
+ variance_final v18, v19, v23, 6
+
+ addi r1, r1, 32 ;# recover stack
+ mtspr 256, r11 ;# reset old VRSAVE
+ blr
+
+ .align 2
+;# r3 unsigned char *src_ptr
+;# r4 int src_pixels_per_line
+;# r5 int xoffset
+;# r6 int yoffset
+;# r7 unsigned char *dst_ptr
+;# r8 int dst_pixels_per_line
+;# r9 unsigned int *sse
+;#
+;# r3 return value
+vp9_sub_pixel_variance8x16_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xffff
+ ori r12, r12, 0xfffc
+ mtspr 256, r12 ;# set VRSAVE
+
+ stwu r1,-32(r1) ;# create space on the stack
+
+ HProlog second_pass_8x16_pre_copy_b
+
+ ;# Load up permutation constants
+ load_c v29, b_0123_b, 0, r12, r0
+ load_c v30, b_4567_b, 0, r12, r0
+
+ hfilter_8 v0, v29, v30, 1
+ hfilter_8 v1, v29, v30, 1
+ hfilter_8 v2, v29, v30, 1
+ hfilter_8 v3, v29, v30, 1
+ hfilter_8 v4, v29, v30, 1
+ hfilter_8 v5, v29, v30, 1
+ hfilter_8 v6, v29, v30, 1
+ hfilter_8 v7, v29, v30, 1
+ hfilter_8 v8, v29, v30, 1
+ hfilter_8 v9, v29, v30, 1
+ hfilter_8 v10, v29, v30, 1
+ hfilter_8 v11, v29, v30, 1
+ hfilter_8 v12, v29, v30, 1
+ hfilter_8 v13, v29, v30, 1
+ hfilter_8 v14, v29, v30, 1
+ hfilter_8 v15, v29, v30, 1
+
+ ;# Finished filtering main horizontal block. If there is no
+ ;# vertical filtering, jump to storing the data. Otherwise
+ ;# load up and filter the additional line that is needed
+ ;# for the vertical filter.
+ beq compute_sum_sse_8x16_b
+
+ hfilter_8 v16, v29, v30, 0
+
+ b second_pass_8x16_b
+
+second_pass_8x16_pre_copy_b:
+ slwi. r6, r6, 5 ;# index into vertical filter array
+
+ load_and_align_16 v0, r3, r4, 1
+ load_and_align_16 v1, r3, r4, 1
+ load_and_align_16 v2, r3, r4, 1
+ load_and_align_16 v3, r3, r4, 1
+ load_and_align_16 v4, r3, r4, 1
+ load_and_align_16 v5, r3, r4, 1
+ load_and_align_16 v6, r3, r4, 1
+ load_and_align_16 v7, r3, r4, 1
+ load_and_align_16 v8, r3, r4, 1
+ load_and_align_16 v9, r3, r4, 1
+ load_and_align_16 v10, r3, r4, 1
+ load_and_align_16 v11, r3, r4, 1
+ load_and_align_16 v12, r3, r4, 1
+ load_and_align_16 v13, r3, r4, 1
+ load_and_align_16 v14, r3, r4, 1
+ load_and_align_16 v15, r3, r4, 1
+ load_and_align_16 v16, r3, r4, 0
+
+ beq compute_sum_sse_8x16_b
+
+second_pass_8x16_b:
+ vspltish v20, 8
+ vspltish v18, 3
+ vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
+
+ load_vfilter v20, v21
+
+ vfilter_16 v0, v1
+ vfilter_16 v1, v2
+ vfilter_16 v2, v3
+ vfilter_16 v3, v4
+ vfilter_16 v4, v5
+ vfilter_16 v5, v6
+ vfilter_16 v6, v7
+ vfilter_16 v7, v8
+ vfilter_16 v8, v9
+ vfilter_16 v9, v10
+ vfilter_16 v10, v11
+ vfilter_16 v11, v12
+ vfilter_16 v12, v13
+ vfilter_16 v13, v14
+ vfilter_16 v14, v15
+ vfilter_16 v15, v16
+
+compute_sum_sse_8x16_b:
+ vspltish v18, 0 ;# sum
+ vspltish v19, 0 ;# sse
+ vspltish v23, 0 ;# unpack
+ li r10, 16
+
+ vmrghb v0, v0, v1
+ vmrghb v1, v2, v3
+ vmrghb v2, v4, v5
+ vmrghb v3, v6, v7
+ vmrghb v4, v8, v9
+ vmrghb v5, v10, v11
+ vmrghb v6, v12, v13
+ vmrghb v7, v14, v15
+
+ load_and_align_16 v8, r7, r8, 1
+ load_and_align_16 v9, r7, r8, 1
+ load_and_align_16 v10, r7, r8, 1
+ load_and_align_16 v11, r7, r8, 1
+ load_and_align_16 v12, r7, r8, 1
+ load_and_align_16 v13, r7, r8, 1
+ load_and_align_16 v14, r7, r8, 1
+ load_and_align_16 v15, r7, r8, 1
+
+ vmrghb v8, v8, v9
+ vmrghb v9, v10, v11
+ vmrghb v10, v12, v13
+ vmrghb v11, v14, v15
+
+ compute_sum_sse v0, v8, v18, v19, v20, v21, v23
+ compute_sum_sse v1, v9, v18, v19, v20, v21, v23
+ compute_sum_sse v2, v10, v18, v19, v20, v21, v23
+ compute_sum_sse v3, v11, v18, v19, v20, v21, v23
+
+ load_and_align_16 v8, r7, r8, 1
+ load_and_align_16 v9, r7, r8, 1
+ load_and_align_16 v10, r7, r8, 1
+ load_and_align_16 v11, r7, r8, 1
+ load_and_align_16 v12, r7, r8, 1
+ load_and_align_16 v13, r7, r8, 1
+ load_and_align_16 v14, r7, r8, 1
+ load_and_align_16 v15, r7, r8, 0
+
+ vmrghb v8, v8, v9
+ vmrghb v9, v10, v11
+ vmrghb v10, v12, v13
+ vmrghb v11, v14, v15
+
+ compute_sum_sse v4, v8, v18, v19, v20, v21, v23
+ compute_sum_sse v5, v9, v18, v19, v20, v21, v23
+ compute_sum_sse v6, v10, v18, v19, v20, v21, v23
+ compute_sum_sse v7, v11, v18, v19, v20, v21, v23
+
+ variance_final v18, v19, v23, 7
+
+ addi r1, r1, 32 ;# recover stack
+ mtspr 256, r11 ;# reset old VRSAVE
+ blr
+
+;# Filters a horizontal line
+;# expects:
+;# r3 src_ptr
+;# r4 pitch
+;# r10 16
+;# r12 32
+;# v17 perm intput
+;# v18 rounding
+;# v19 shift
+;# v20 filter taps
+;# v21 tmp
+;# v22 tmp
+;# v23 tmp
+;# v24 tmp
+;# v25 tmp
+;# v26 tmp
+;# v27 tmp
+;# v28 perm output
+;#
+.macro hfilter_16 V, increment_counter
+
+ lvsl v17, 0, r3 ;# permutate value for alignment
+
+ ;# input to filter is 21 bytes wide, output is 16 bytes.
+ ;# input will can span three vectors if not aligned correctly.
+ lvx v21, 0, r3
+ lvx v22, r10, r3
+ lvx v23, r12, r3
+
+.if \increment_counter
+ add r3, r3, r4
+.endif
+ vperm v21, v21, v22, v17
+ vperm v22, v22, v23, v17 ;# v8 v9 = 21 input pixels left-justified
+
+ ;# set 0
+ vmsummbm v24, v20, v21, v18 ;# taps times elements
+
+ ;# set 1
+ vsldoi v23, v21, v22, 1
+ vmsummbm v25, v20, v23, v18
+
+ ;# set 2
+ vsldoi v23, v21, v22, 2
+ vmsummbm v26, v20, v23, v18
+
+ ;# set 3
+ vsldoi v23, v21, v22, 3
+ vmsummbm v27, v20, v23, v18
+
+ vpkswus v24, v24, v25 ;# v24 = 0 4 8 C 1 5 9 D (16-bit)
+ vpkswus v25, v26, v27 ;# v25 = 2 6 A E 3 7 B F
+
+ vsrh v24, v24, v19 ;# divide v0, v1 by 128
+ vsrh v25, v25, v19
+
+ vpkuhus \V, v24, v25 ;# \V = scrambled 8-bit result
+ vperm \V, \V, v0, v28 ;# \V = correctly-ordered result
+.endm
+
+ .align 2
+;# r3 unsigned char *src_ptr
+;# r4 int src_pixels_per_line
+;# r5 int xoffset
+;# r6 int yoffset
+;# r7 unsigned char *dst_ptr
+;# r8 int dst_pixels_per_line
+;# r9 unsigned int *sse
+;#
+;# r3 return value
+vp9_sub_pixel_variance16x8_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xffff
+ ori r12, r12, 0xfff8
+ mtspr 256, r12 ;# set VRSAVE
+
+ stwu r1, -32(r1) ;# create space on the stack
+
+ HProlog second_pass_16x8_pre_copy_b
+
+ hfilter_16 v0, 1
+ hfilter_16 v1, 1
+ hfilter_16 v2, 1
+ hfilter_16 v3, 1
+ hfilter_16 v4, 1
+ hfilter_16 v5, 1
+ hfilter_16 v6, 1
+ hfilter_16 v7, 1
+
+ ;# Finished filtering main horizontal block. If there is no
+ ;# vertical filtering, jump to storing the data. Otherwise
+ ;# load up and filter the additional line that is needed
+ ;# for the vertical filter.
+ beq compute_sum_sse_16x8_b
+
+ hfilter_16 v8, 0
+
+ b second_pass_16x8_b
+
+second_pass_16x8_pre_copy_b:
+ slwi. r6, r6, 5 ;# index into vertical filter array
+
+ load_and_align_16 v0, r3, r4, 1
+ load_and_align_16 v1, r3, r4, 1
+ load_and_align_16 v2, r3, r4, 1
+ load_and_align_16 v3, r3, r4, 1
+ load_and_align_16 v4, r3, r4, 1
+ load_and_align_16 v5, r3, r4, 1
+ load_and_align_16 v6, r3, r4, 1
+ load_and_align_16 v7, r3, r4, 1
+ load_and_align_16 v8, r3, r4, 1
+
+ beq compute_sum_sse_16x8_b
+
+second_pass_16x8_b:
+ vspltish v20, 8
+ vspltish v18, 3
+ vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
+
+ load_vfilter v20, v21
+
+ vfilter_16 v0, v1
+ vfilter_16 v1, v2
+ vfilter_16 v2, v3
+ vfilter_16 v3, v4
+ vfilter_16 v4, v5
+ vfilter_16 v5, v6
+ vfilter_16 v6, v7
+ vfilter_16 v7, v8
+
+compute_sum_sse_16x8_b:
+ vspltish v18, 0 ;# sum
+ vspltish v19, 0 ;# sse
+ vspltish v23, 0 ;# unpack
+ li r10, 16
+
+ compute_sum_sse_16 v0, 1
+ compute_sum_sse_16 v1, 1
+ compute_sum_sse_16 v2, 1
+ compute_sum_sse_16 v3, 1
+ compute_sum_sse_16 v4, 1
+ compute_sum_sse_16 v5, 1
+ compute_sum_sse_16 v6, 1
+ compute_sum_sse_16 v7, 0
+
+ variance_final v18, v19, v23, 7
+
+ addi r1, r1, 32 ;# recover stack
+
+ mtspr 256, r11 ;# reset old VRSAVE
+
+ blr
+
+ .align 2
+;# r3 unsigned char *src_ptr
+;# r4 int src_pixels_per_line
+;# r5 int xoffset
+;# r6 int yoffset
+;# r7 unsigned char *dst_ptr
+;# r8 int dst_pixels_per_line
+;# r9 unsigned int *sse
+;#
+;# r3 return value
+vp9_sub_pixel_variance16x16_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xffff
+ ori r12, r12, 0xfff8
+ mtspr 256, r12 ;# set VRSAVE
+
+ stwu r1, -32(r1) ;# create space on the stack
+
+ HProlog second_pass_16x16_pre_copy_b
+
+ hfilter_16 v0, 1
+ hfilter_16 v1, 1
+ hfilter_16 v2, 1
+ hfilter_16 v3, 1
+ hfilter_16 v4, 1
+ hfilter_16 v5, 1
+ hfilter_16 v6, 1
+ hfilter_16 v7, 1
+ hfilter_16 v8, 1
+ hfilter_16 v9, 1
+ hfilter_16 v10, 1
+ hfilter_16 v11, 1
+ hfilter_16 v12, 1
+ hfilter_16 v13, 1
+ hfilter_16 v14, 1
+ hfilter_16 v15, 1
+
+ ;# Finished filtering main horizontal block. If there is no
+ ;# vertical filtering, jump to storing the data. Otherwise
+ ;# load up and filter the additional line that is needed
+ ;# for the vertical filter.
+ beq compute_sum_sse_16x16_b
+
+ hfilter_16 v16, 0
+
+ b second_pass_16x16_b
+
+second_pass_16x16_pre_copy_b:
+ slwi. r6, r6, 5 ;# index into vertical filter array
+
+ load_and_align_16 v0, r3, r4, 1
+ load_and_align_16 v1, r3, r4, 1
+ load_and_align_16 v2, r3, r4, 1
+ load_and_align_16 v3, r3, r4, 1
+ load_and_align_16 v4, r3, r4, 1
+ load_and_align_16 v5, r3, r4, 1
+ load_and_align_16 v6, r3, r4, 1
+ load_and_align_16 v7, r3, r4, 1
+ load_and_align_16 v8, r3, r4, 1
+ load_and_align_16 v9, r3, r4, 1
+ load_and_align_16 v10, r3, r4, 1
+ load_and_align_16 v11, r3, r4, 1
+ load_and_align_16 v12, r3, r4, 1
+ load_and_align_16 v13, r3, r4, 1
+ load_and_align_16 v14, r3, r4, 1
+ load_and_align_16 v15, r3, r4, 1
+ load_and_align_16 v16, r3, r4, 0
+
+ beq compute_sum_sse_16x16_b
+
+second_pass_16x16_b:
+ vspltish v20, 8
+ vspltish v18, 3
+ vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
+
+ load_vfilter v20, v21
+
+ vfilter_16 v0, v1
+ vfilter_16 v1, v2
+ vfilter_16 v2, v3
+ vfilter_16 v3, v4
+ vfilter_16 v4, v5
+ vfilter_16 v5, v6
+ vfilter_16 v6, v7
+ vfilter_16 v7, v8
+ vfilter_16 v8, v9
+ vfilter_16 v9, v10
+ vfilter_16 v10, v11
+ vfilter_16 v11, v12
+ vfilter_16 v12, v13
+ vfilter_16 v13, v14
+ vfilter_16 v14, v15
+ vfilter_16 v15, v16
+
+compute_sum_sse_16x16_b:
+ vspltish v18, 0 ;# sum
+ vspltish v19, 0 ;# sse
+ vspltish v23, 0 ;# unpack
+ li r10, 16
+
+ compute_sum_sse_16 v0, 1
+ compute_sum_sse_16 v1, 1
+ compute_sum_sse_16 v2, 1
+ compute_sum_sse_16 v3, 1
+ compute_sum_sse_16 v4, 1
+ compute_sum_sse_16 v5, 1
+ compute_sum_sse_16 v6, 1
+ compute_sum_sse_16 v7, 1
+ compute_sum_sse_16 v8, 1
+ compute_sum_sse_16 v9, 1
+ compute_sum_sse_16 v10, 1
+ compute_sum_sse_16 v11, 1
+ compute_sum_sse_16 v12, 1
+ compute_sum_sse_16 v13, 1
+ compute_sum_sse_16 v14, 1
+ compute_sum_sse_16 v15, 0
+
+ variance_final v18, v19, v23, 8
+
+ addi r1, r1, 32 ;# recover stack
+
+ mtspr 256, r11 ;# reset old VRSAVE
+
+ blr
+
+ .data
+
+ .align 4
+hfilter_b:
+ .byte 128, 0, 0, 0,128, 0, 0, 0,128, 0, 0, 0,128, 0, 0, 0
+ .byte 112, 16, 0, 0,112, 16, 0, 0,112, 16, 0, 0,112, 16, 0, 0
+ .byte 96, 32, 0, 0, 96, 32, 0, 0, 96, 32, 0, 0, 96, 32, 0, 0
+ .byte 80, 48, 0, 0, 80, 48, 0, 0, 80, 48, 0, 0, 80, 48, 0, 0
+ .byte 64, 64, 0, 0, 64, 64, 0, 0, 64, 64, 0, 0, 64, 64, 0, 0
+ .byte 48, 80, 0, 0, 48, 80, 0, 0, 48, 80, 0, 0, 48, 80, 0, 0
+ .byte 32, 96, 0, 0, 32, 96, 0, 0, 32, 96, 0, 0, 32, 96, 0, 0
+ .byte 16,112, 0, 0, 16,112, 0, 0, 16,112, 0, 0, 16,112, 0, 0
+
+ .align 4
+vfilter_b:
+ .byte 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112
+ .byte 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
+ .byte 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96
+ .byte 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
+ .byte 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80
+ .byte 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48
+ .byte 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
+ .byte 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
+ .byte 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48
+ .byte 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80
+ .byte 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
+ .byte 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96
+ .byte 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
+ .byte 112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112
+
+ .align 4
+b_hperm_b:
+ .byte 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15
+
+ .align 4
+b_0123_b:
+ .byte 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6
+
+ .align 4
+b_4567_b:
+ .byte 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10
+
+b_hilo_b:
+ .byte 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23
diff --git a/vp9/encoder/vp9_asm_enc_offsets.c b/vp9/encoder/vp9_asm_enc_offsets.c
new file mode 100644
index 0000000..71fad2e
--- /dev/null
+++ b/vp9/encoder/vp9_asm_enc_offsets.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/asm_offsets.h"
+#include "vpx_config.h"
+#include "vp9/encoder/vp9_block.h"
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_treewriter.h"
+#include "vp9/encoder/vp9_tokenize.h"
+
+BEGIN
+
+/* regular quantize */
+DEFINE(vp9_block_coeff, offsetof(BLOCK, coeff));
+DEFINE(vp9_block_zbin, offsetof(BLOCK, zbin));
+DEFINE(vp9_block_round, offsetof(BLOCK, round));
+DEFINE(vp9_block_quant, offsetof(BLOCK, quant));
+DEFINE(vp9_block_quant_fast, offsetof(BLOCK, quant_fast));
+DEFINE(vp9_block_zbin_extra, offsetof(BLOCK, zbin_extra));
+DEFINE(vp9_block_zrun_zbin_boost, offsetof(BLOCK, zrun_zbin_boost));
+DEFINE(vp9_block_quant_shift, offsetof(BLOCK, quant_shift));
+
+DEFINE(vp9_blockd_qcoeff, offsetof(BLOCKD, qcoeff));
+DEFINE(vp9_blockd_dequant, offsetof(BLOCKD, dequant));
+DEFINE(vp9_blockd_dqcoeff, offsetof(BLOCKD, dqcoeff));
+DEFINE(vp9_blockd_eob, offsetof(BLOCKD, eob));
+
+END
+
+/* add asserts for any offset that is not supported by assembly code
+ * add asserts for any size that is not supported by assembly code
+ */
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
new file mode 100644
index 0000000..5e56d2c
--- /dev/null
+++ b/vp9/encoder/vp9_bitstream.c
@@ -0,0 +1,2363 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/common/vp9_header.h"
+#include "vp9/encoder/vp9_encodemv.h"
+#include "vp9/common/vp9_entropymode.h"
+#include "vp9/common/vp9_findnearmv.h"
+#include "vp9/encoder/vp9_mcomp.h"
+#include "vp9/common/vp9_systemdependent.h"
+#include <assert.h>
+#include <stdio.h>
+#include <limits.h>
+#include "vp9/common/vp9_pragmas.h"
+#include "vpx/vpx_encoder.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vp9/encoder/vp9_bitstream.h"
+#include "vp9/encoder/vp9_segmentation.h"
+
+#include "vp9/common/vp9_seg_common.h"
+#include "vp9/common/vp9_pred_common.h"
+#include "vp9/common/vp9_entropy.h"
+#include "vp9/encoder/vp9_encodemv.h"
+#include "vp9/common/vp9_entropymv.h"
+#include "vp9/common/vp9_mvref_common.h"
+
+#if defined(SECTIONBITS_OUTPUT)
+unsigned __int64 Sectionbits[500];
+#endif
+
+#ifdef ENTROPY_STATS
+int intra_mode_stats[VP9_KF_BINTRAMODES]
+ [VP9_KF_BINTRAMODES]
+ [VP9_KF_BINTRAMODES];
+unsigned int tree_update_hist [BLOCK_TYPES]
+ [COEF_BANDS]
+ [PREV_COEF_CONTEXTS]
+ [ENTROPY_NODES][2];
+unsigned int hybrid_tree_update_hist [BLOCK_TYPES]
+ [COEF_BANDS]
+ [PREV_COEF_CONTEXTS]
+ [ENTROPY_NODES][2];
+unsigned int tree_update_hist_8x8 [BLOCK_TYPES_8X8]
+ [COEF_BANDS]
+ [PREV_COEF_CONTEXTS]
+ [ENTROPY_NODES] [2];
+unsigned int hybrid_tree_update_hist_8x8 [BLOCK_TYPES_8X8]
+ [COEF_BANDS]
+ [PREV_COEF_CONTEXTS]
+ [ENTROPY_NODES] [2];
+unsigned int tree_update_hist_16x16 [BLOCK_TYPES_16X16]
+ [COEF_BANDS]
+ [PREV_COEF_CONTEXTS]
+ [ENTROPY_NODES] [2];
+unsigned int hybrid_tree_update_hist_16x16 [BLOCK_TYPES_16X16]
+ [COEF_BANDS]
+ [PREV_COEF_CONTEXTS]
+ [ENTROPY_NODES] [2];
+
+extern unsigned int active_section;
+#endif
+
+#ifdef MODE_STATS
+int count_mb_seg[4] = { 0, 0, 0, 0 };
+#endif
+
+#define vp9_cost_upd ((int)(vp9_cost_one(upd) - vp9_cost_zero(upd)) >> 8)
+#define vp9_cost_upd256 ((int)(vp9_cost_one(upd) - vp9_cost_zero(upd)))
+
+#define SEARCH_NEWP
+static int update_bits[255];
+
+static void compute_update_table() {
+ int i;
+ for (i = 0; i < 255; i++)
+ update_bits[i] = vp9_count_term_subexp(i, SUBEXP_PARAM, 255);
+}
+
+static int split_index(int i, int n, int modulus) {
+ int max1 = (n - 1 - modulus / 2) / modulus + 1;
+ if (i % modulus == modulus / 2) i = i / modulus;
+ else i = max1 + i - (i + modulus - modulus / 2) / modulus;
+ return i;
+}
+
+static int remap_prob(int v, int m) {
+ const int n = 256;
+ const int modulus = MODULUS_PARAM;
+ int i;
+ if ((m << 1) <= n)
+ i = vp9_recenter_nonneg(v, m) - 1;
+ else
+ i = vp9_recenter_nonneg(n - 1 - v, n - 1 - m) - 1;
+
+ i = split_index(i, n - 1, modulus);
+ return i;
+}
+
+static void write_prob_diff_update(vp9_writer *const bc,
+ vp9_prob newp, vp9_prob oldp) {
+ int delp = remap_prob(newp, oldp);
+ vp9_encode_term_subexp(bc, delp, SUBEXP_PARAM, 255);
+}
+
+static int prob_diff_update_cost(vp9_prob newp, vp9_prob oldp) {
+ int delp = remap_prob(newp, oldp);
+ return update_bits[delp] * 256;
+}
+
+static void update_mode(
+ vp9_writer *const bc,
+ int n,
+ vp9_token tok [/* n */],
+ vp9_tree tree,
+ vp9_prob Pnew [/* n-1 */],
+ vp9_prob Pcur [/* n-1 */],
+ unsigned int bct [/* n-1 */] [2],
+ const unsigned int num_events[/* n */]
+) {
+ unsigned int new_b = 0, old_b = 0;
+ int i = 0;
+
+ vp9_tree_probs_from_distribution(
+ n--, tok, tree,
+ Pnew, bct, num_events,
+ 256, 1
+ );
+
+ do {
+ new_b += cost_branch(bct[i], Pnew[i]);
+ old_b += cost_branch(bct[i], Pcur[i]);
+ } while (++i < n);
+
+ if (new_b + (n << 8) < old_b) {
+ int i = 0;
+
+ vp9_write_bit(bc, 1);
+
+ do {
+ const vp9_prob p = Pnew[i];
+
+ vp9_write_literal(bc, Pcur[i] = p ? p : 1, 8);
+ } while (++i < n);
+ } else
+ vp9_write_bit(bc, 0);
+}
+
+static void update_mbintra_mode_probs(VP9_COMP* const cpi,
+ vp9_writer* const bc) {
+ VP9_COMMON *const cm = &cpi->common;
+
+ {
+ vp9_prob Pnew [VP9_YMODES - 1];
+ unsigned int bct [VP9_YMODES - 1] [2];
+
+ update_mode(
+ bc, VP9_YMODES, vp9_ymode_encodings, vp9_ymode_tree,
+ Pnew, cm->fc.ymode_prob, bct, (unsigned int *)cpi->ymode_count
+ );
+#if CONFIG_SUPERBLOCKS
+ update_mode(bc, VP9_I32X32_MODES, vp9_sb_ymode_encodings,
+ vp9_sb_ymode_tree, Pnew, cm->fc.sb_ymode_prob, bct,
+ (unsigned int *)cpi->sb_ymode_count);
+#endif
+ }
+}
+
+static int get_prob(int num, int den) {
+ int p;
+ if (den <= 0)
+ return 128;
+ p = (num * 255 + (den >> 1)) / den;
+ return clip_prob(p);
+}
+
+static int get_binary_prob(int n0, int n1) {
+ return get_prob(n0, n0 + n1);
+}
+
+void vp9_update_skip_probs(VP9_COMP *cpi) {
+ VP9_COMMON *const pc = &cpi->common;
+ int k;
+
+ for (k = 0; k < MBSKIP_CONTEXTS; ++k) {
+ pc->mbskip_pred_probs[k] = get_binary_prob(cpi->skip_false_count[k],
+ cpi->skip_true_count[k]);
+ }
+}
+
+static void update_switchable_interp_probs(VP9_COMP *cpi,
+ vp9_writer* const bc) {
+ VP9_COMMON *const pc = &cpi->common;
+ unsigned int branch_ct[32][2];
+ int i, j;
+ for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j) {
+ vp9_tree_probs_from_distribution(
+ VP9_SWITCHABLE_FILTERS,
+ vp9_switchable_interp_encodings, vp9_switchable_interp_tree,
+ pc->fc.switchable_interp_prob[j], branch_ct,
+ cpi->switchable_interp_count[j], 256, 1);
+ for (i = 0; i < VP9_SWITCHABLE_FILTERS - 1; ++i) {
+ if (pc->fc.switchable_interp_prob[j][i] < 1)
+ pc->fc.switchable_interp_prob[j][i] = 1;
+ vp9_write_literal(bc, pc->fc.switchable_interp_prob[j][i], 8);
+ }
+ }
+}
+
+// This function updates the reference frame prediction stats
+static void update_refpred_stats(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ int i;
+ vp9_prob new_pred_probs[PREDICTION_PROBS];
+ int old_cost, new_cost;
+
+ // Set the prediction probability structures to defaults
+ if (cm->frame_type == KEY_FRAME) {
+ // Set the prediction probabilities to defaults
+ cm->ref_pred_probs[0] = 120;
+ cm->ref_pred_probs[1] = 80;
+ cm->ref_pred_probs[2] = 40;
+
+ vpx_memset(cpi->ref_pred_probs_update, 0,
+ sizeof(cpi->ref_pred_probs_update));
+ } else {
+ // From the prediction counts set the probabilities for each context
+ for (i = 0; i < PREDICTION_PROBS; i++) {
+ new_pred_probs[i] = get_binary_prob(cpi->ref_pred_count[i][0],
+ cpi->ref_pred_count[i][1]);
+
+ // Decide whether or not to update the reference frame probs.
+ // Returned costs are in 1/256 bit units.
+ old_cost =
+ (cpi->ref_pred_count[i][0] * vp9_cost_zero(cm->ref_pred_probs[i])) +
+ (cpi->ref_pred_count[i][1] * vp9_cost_one(cm->ref_pred_probs[i]));
+
+ new_cost =
+ (cpi->ref_pred_count[i][0] * vp9_cost_zero(new_pred_probs[i])) +
+ (cpi->ref_pred_count[i][1] * vp9_cost_one(new_pred_probs[i]));
+
+ // Cost saving must be >= 8 bits (2048 in these units)
+ if ((old_cost - new_cost) >= 2048) {
+ cpi->ref_pred_probs_update[i] = 1;
+ cm->ref_pred_probs[i] = new_pred_probs[i];
+ } else
+ cpi->ref_pred_probs_update[i] = 0;
+
+ }
+ }
+}
+
+// This function is called to update the mode probability context used to encode
+// inter modes. It assumes the branch counts table has already been populated
+// prior to the actual packing of the bitstream (in rd stage or dummy pack)
+//
+// The branch counts table is re-populated during the actual pack stage and in
+// the decoder to facilitate backwards update of the context.
+static void update_mode_probs(VP9_COMMON *cm,
+ int mode_context[INTER_MODE_CONTEXTS][4]) {
+ int i, j;
+ unsigned int (*mv_ref_ct)[4][2];
+
+ vpx_memcpy(mode_context, cm->fc.vp9_mode_contexts,
+ sizeof(cm->fc.vp9_mode_contexts));
+
+ mv_ref_ct = cm->fc.mv_ref_ct;
+
+ for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
+ for (j = 0; j < 4; j++) {
+ int new_prob, count, old_cost, new_cost;
+
+ // Work out cost of coding branches with the old and optimal probability
+ old_cost = cost_branch256(mv_ref_ct[i][j], mode_context[i][j]);
+ count = mv_ref_ct[i][j][0] + mv_ref_ct[i][j][1];
+ new_prob = count > 0 ? (255 * mv_ref_ct[i][j][0]) / count : 128;
+ new_prob = (new_prob > 0) ? new_prob : 1;
+ new_cost = cost_branch256(mv_ref_ct[i][j], new_prob);
+
+ // If cost saving is >= 14 bits then update the mode probability.
+ // This is the approximate net cost of updating one probability given
+ // that the no update case ismuch more common than the update case.
+ if (new_cost <= (old_cost - (14 << 8))) {
+ mode_context[i][j] = new_prob;
+ }
+ }
+ }
+}
+static void write_ymode(vp9_writer *bc, int m, const vp9_prob *p) {
+ write_token(bc, vp9_ymode_tree, p, vp9_ymode_encodings + m);
+}
+
+static void kfwrite_ymode(vp9_writer *bc, int m, const vp9_prob *p) {
+ write_token(bc, vp9_kf_ymode_tree, p, vp9_kf_ymode_encodings + m);
+}
+
+#if CONFIG_SUPERBLOCKS
+static void write_sb_ymode(vp9_writer *bc, int m, const vp9_prob *p) {
+ write_token(bc, vp9_sb_ymode_tree, p, vp9_sb_ymode_encodings + m);
+}
+
+static void sb_kfwrite_ymode(vp9_writer *bc, int m, const vp9_prob *p) {
+ write_token(bc, vp9_uv_mode_tree, p, vp9_sb_kf_ymode_encodings + m);
+}
+#endif
+
+static void write_i8x8_mode(vp9_writer *bc, int m, const vp9_prob *p) {
+ write_token(bc, vp9_i8x8_mode_tree, p, vp9_i8x8_mode_encodings + m);
+}
+
+static void write_uv_mode(vp9_writer *bc, int m, const vp9_prob *p) {
+ write_token(bc, vp9_uv_mode_tree, p, vp9_uv_mode_encodings + m);
+}
+
+
+static void write_bmode(vp9_writer *bc, int m, const vp9_prob *p) {
+#if CONFIG_NEWBINTRAMODES
+ assert(m < B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS || m == B_CONTEXT_PRED);
+ if (m == B_CONTEXT_PRED) m -= CONTEXT_PRED_REPLACEMENTS;
+#endif
+ write_token(bc, vp9_bmode_tree, p, vp9_bmode_encodings + m);
+}
+
+static void write_kf_bmode(vp9_writer *bc, int m, const vp9_prob *p) {
+ write_token(bc, vp9_kf_bmode_tree, p, vp9_kf_bmode_encodings + m);
+}
+
+static void write_split(vp9_writer *bc, int x, const vp9_prob *p) {
+ write_token(
+ bc, vp9_mbsplit_tree, p, vp9_mbsplit_encodings + x);
+}
+
+static int prob_update_savings(const unsigned int *ct,
+ const vp9_prob oldp, const vp9_prob newp,
+ const vp9_prob upd) {
+ const int old_b = cost_branch256(ct, oldp);
+ const int new_b = cost_branch256(ct, newp);
+ const int update_b = 2048 + vp9_cost_upd256;
+ return (old_b - new_b - update_b);
+}
+
+static int prob_diff_update_savings(const unsigned int *ct,
+ const vp9_prob oldp, const vp9_prob newp,
+ const vp9_prob upd) {
+ const int old_b = cost_branch256(ct, oldp);
+ const int new_b = cost_branch256(ct, newp);
+ const int update_b = (newp == oldp ? 0 :
+ prob_diff_update_cost(newp, oldp) + vp9_cost_upd256);
+ return (old_b - new_b - update_b);
+}
+
+static int prob_diff_update_savings_search(const unsigned int *ct,
+ const vp9_prob oldp, vp9_prob *bestp,
+ const vp9_prob upd) {
+ const int old_b = cost_branch256(ct, oldp);
+ int new_b, update_b, savings, bestsavings, step;
+ vp9_prob newp, bestnewp;
+
+ bestsavings = 0;
+ bestnewp = oldp;
+
+ step = (*bestp > oldp ? -1 : 1);
+ for (newp = *bestp; newp != oldp; newp += step) {
+ new_b = cost_branch256(ct, newp);
+ update_b = prob_diff_update_cost(newp, oldp) + vp9_cost_upd256;
+ savings = old_b - new_b - update_b;
+ if (savings > bestsavings) {
+ bestsavings = savings;
+ bestnewp = newp;
+ }
+ }
+ *bestp = bestnewp;
+ return bestsavings;
+}
+
+static void vp9_cond_prob_update(vp9_writer *bc, vp9_prob *oldp, vp9_prob upd,
+ unsigned int *ct) {
+ vp9_prob newp;
+ int savings;
+ newp = get_binary_prob(ct[0], ct[1]);
+ savings = prob_update_savings(ct, *oldp, newp, upd);
+ if (savings > 0) {
+ vp9_write(bc, 1, upd);
+ vp9_write_literal(bc, newp, 8);
+ *oldp = newp;
+ } else {
+ vp9_write(bc, 0, upd);
+ }
+}
+
+static void pack_mb_tokens(vp9_writer* const bc,
+ TOKENEXTRA **tp,
+ const TOKENEXTRA *const stop) {
+ unsigned int split;
+ unsigned int shift;
+ int count = bc->count;
+ unsigned int range = bc->range;
+ unsigned int lowvalue = bc->lowvalue;
+ TOKENEXTRA *p = *tp;
+
+ while (p < stop) {
+ const int t = p->Token;
+ vp9_token *const a = vp9_coef_encodings + t;
+ const vp9_extra_bit_struct *const b = vp9_extra_bits + t;
+ int i = 0;
+ const unsigned char *pp = p->context_tree;
+ int v = a->value;
+ int n = a->Len;
+
+ if (t == EOSB_TOKEN)
+ {
+ ++p;
+ break;
+ }
+
+ /* skip one or two nodes */
+ if (p->skip_eob_node) {
+ n -= p->skip_eob_node;
+ i = 2 * p->skip_eob_node;
+ }
+
+ do {
+ const int bb = (v >> --n) & 1;
+ split = 1 + (((range - 1) * pp[i >> 1]) >> 8);
+ i = vp9_coef_tree[i + bb];
+
+ if (bb) {
+ lowvalue += split;
+ range = range - split;
+ } else {
+ range = split;
+ }
+
+ shift = vp9_norm[range];
+ range <<= shift;
+ count += shift;
+
+ if (count >= 0) {
+ int offset = shift - count;
+
+ if ((lowvalue << (offset - 1)) & 0x80000000) {
+ int x = bc->pos - 1;
+
+ while (x >= 0 && bc->buffer[x] == 0xff) {
+ bc->buffer[x] = (unsigned char)0;
+ x--;
+ }
+
+ bc->buffer[x] += 1;
+ }
+
+ bc->buffer[bc->pos++] = (lowvalue >> (24 - offset));
+ lowvalue <<= offset;
+ shift = count;
+ lowvalue &= 0xffffff;
+ count -= 8;
+ }
+
+ lowvalue <<= shift;
+ } while (n);
+
+
+ if (b->base_val) {
+ const int e = p->Extra, L = b->Len;
+
+ if (L) {
+ const unsigned char *pp = b->prob;
+ int v = e >> 1;
+ int n = L; /* number of bits in v, assumed nonzero */
+ int i = 0;
+
+ do {
+ const int bb = (v >> --n) & 1;
+ split = 1 + (((range - 1) * pp[i >> 1]) >> 8);
+ i = b->tree[i + bb];
+
+ if (bb) {
+ lowvalue += split;
+ range = range - split;
+ } else {
+ range = split;
+ }
+
+ shift = vp9_norm[range];
+ range <<= shift;
+ count += shift;
+
+ if (count >= 0) {
+ int offset = shift - count;
+
+ if ((lowvalue << (offset - 1)) & 0x80000000) {
+ int x = bc->pos - 1;
+
+ while (x >= 0 && bc->buffer[x] == 0xff) {
+ bc->buffer[x] = (unsigned char)0;
+ x--;
+ }
+
+ bc->buffer[x] += 1;
+ }
+
+ bc->buffer[bc->pos++] = (lowvalue >> (24 - offset));
+ lowvalue <<= offset;
+ shift = count;
+ lowvalue &= 0xffffff;
+ count -= 8;
+ }
+
+ lowvalue <<= shift;
+ } while (n);
+ }
+
+
+ {
+
+ split = (range + 1) >> 1;
+
+ if (e & 1) {
+ lowvalue += split;
+ range = range - split;
+ } else {
+ range = split;
+ }
+
+ range <<= 1;
+
+ if ((lowvalue & 0x80000000)) {
+ int x = bc->pos - 1;
+
+ while (x >= 0 && bc->buffer[x] == 0xff) {
+ bc->buffer[x] = (unsigned char)0;
+ x--;
+ }
+
+ bc->buffer[x] += 1;
+
+ }
+
+ lowvalue <<= 1;
+
+ if (!++count) {
+ count = -8;
+ bc->buffer[bc->pos++] = (lowvalue >> 24);
+ lowvalue &= 0xffffff;
+ }
+ }
+
+ }
+ ++p;
+ }
+
+ bc->count = count;
+ bc->lowvalue = lowvalue;
+ bc->range = range;
+ *tp = p;
+}
+
+static void write_partition_size(unsigned char *cx_data, int size) {
+ signed char csize;
+
+ csize = size & 0xff;
+ *cx_data = csize;
+ csize = (size >> 8) & 0xff;
+ *(cx_data + 1) = csize;
+ csize = (size >> 16) & 0xff;
+ *(cx_data + 2) = csize;
+
+}
+
+static void write_mv_ref
+(
+ vp9_writer *bc, MB_PREDICTION_MODE m, const vp9_prob *p
+) {
+#if CONFIG_DEBUG
+ assert(NEARESTMV <= m && m <= SPLITMV);
+#endif
+ write_token(bc, vp9_mv_ref_tree, p,
+ vp9_mv_ref_encoding_array - NEARESTMV + m);
+}
+
+#if CONFIG_SUPERBLOCKS
+static void write_sb_mv_ref(vp9_writer *bc, MB_PREDICTION_MODE m,
+ const vp9_prob *p) {
+#if CONFIG_DEBUG
+ assert(NEARESTMV <= m && m < SPLITMV);
+#endif
+ write_token(bc, vp9_sb_mv_ref_tree, p,
+ vp9_sb_mv_ref_encoding_array - NEARESTMV + m);
+}
+#endif
+
+static void write_sub_mv_ref
+(
+ vp9_writer *bc, B_PREDICTION_MODE m, const vp9_prob *p
+) {
+#if CONFIG_DEBUG
+ assert(LEFT4X4 <= m && m <= NEW4X4);
+#endif
+ write_token(bc, vp9_sub_mv_ref_tree, p,
+ vp9_sub_mv_ref_encoding_array - LEFT4X4 + m);
+}
+
+static void write_nmv(vp9_writer *bc, const MV *mv, const int_mv *ref,
+ const nmv_context *nmvc, int usehp) {
+ MV e;
+ e.row = mv->row - ref->as_mv.row;
+ e.col = mv->col - ref->as_mv.col;
+
+ vp9_encode_nmv(bc, &e, &ref->as_mv, nmvc);
+ vp9_encode_nmv_fp(bc, &e, &ref->as_mv, nmvc, usehp);
+}
+
+#if CONFIG_NEW_MVREF
+static void vp9_write_mv_ref_id(vp9_writer *w,
+ vp9_prob * ref_id_probs,
+ int mv_ref_id) {
+ // Encode the index for the MV reference.
+ switch (mv_ref_id) {
+ case 0:
+ vp9_write(w, 0, ref_id_probs[0]);
+ break;
+ case 1:
+ vp9_write(w, 1, ref_id_probs[0]);
+ vp9_write(w, 0, ref_id_probs[1]);
+ break;
+ case 2:
+ vp9_write(w, 1, ref_id_probs[0]);
+ vp9_write(w, 1, ref_id_probs[1]);
+ vp9_write(w, 0, ref_id_probs[2]);
+ break;
+ case 3:
+ vp9_write(w, 1, ref_id_probs[0]);
+ vp9_write(w, 1, ref_id_probs[1]);
+ vp9_write(w, 1, ref_id_probs[2]);
+ break;
+
+ // TRAP.. This should not happen
+ default:
+ assert(0);
+ break;
+ }
+}
+#endif
+
+// This function writes the current macro block's segnment id to the bitstream
+// It should only be called if a segment map update is indicated.
+static void write_mb_segid(vp9_writer *bc,
+ const MB_MODE_INFO *mi, const MACROBLOCKD *xd) {
+ // Encode the MB segment id.
+ int seg_id = mi->segment_id;
+#if CONFIG_SUPERBLOCKS
+ if (mi->encoded_as_sb) {
+ if (xd->mb_to_right_edge >= 0)
+ seg_id = seg_id && xd->mode_info_context[1].mbmi.segment_id;
+ if (xd->mb_to_bottom_edge >= 0) {
+ seg_id = seg_id &&
+ xd->mode_info_context[xd->mode_info_stride].mbmi.segment_id;
+ if (xd->mb_to_right_edge >= 0)
+ seg_id = seg_id &&
+ xd->mode_info_context[xd->mode_info_stride + 1].mbmi.segment_id;
+ }
+ }
+#endif
+ if (xd->segmentation_enabled && xd->update_mb_segmentation_map) {
+ switch (seg_id) {
+ case 0:
+ vp9_write(bc, 0, xd->mb_segment_tree_probs[0]);
+ vp9_write(bc, 0, xd->mb_segment_tree_probs[1]);
+ break;
+ case 1:
+ vp9_write(bc, 0, xd->mb_segment_tree_probs[0]);
+ vp9_write(bc, 1, xd->mb_segment_tree_probs[1]);
+ break;
+ case 2:
+ vp9_write(bc, 1, xd->mb_segment_tree_probs[0]);
+ vp9_write(bc, 0, xd->mb_segment_tree_probs[2]);
+ break;
+ case 3:
+ vp9_write(bc, 1, xd->mb_segment_tree_probs[0]);
+ vp9_write(bc, 1, xd->mb_segment_tree_probs[2]);
+ break;
+
+ // TRAP.. This should not happen
+ default:
+ vp9_write(bc, 0, xd->mb_segment_tree_probs[0]);
+ vp9_write(bc, 0, xd->mb_segment_tree_probs[1]);
+ break;
+ }
+ }
+}
+
+// This function encodes the reference frame
+static void encode_ref_frame(vp9_writer *const bc,
+ VP9_COMMON *const cm,
+ MACROBLOCKD *xd,
+ int segment_id,
+ MV_REFERENCE_FRAME rf) {
+ int seg_ref_active;
+ int seg_ref_count = 0;
+ seg_ref_active = vp9_segfeature_active(xd,
+ segment_id,
+ SEG_LVL_REF_FRAME);
+
+ if (seg_ref_active) {
+ seg_ref_count = vp9_check_segref(xd, segment_id, INTRA_FRAME) +
+ vp9_check_segref(xd, segment_id, LAST_FRAME) +
+ vp9_check_segref(xd, segment_id, GOLDEN_FRAME) +
+ vp9_check_segref(xd, segment_id, ALTREF_FRAME);
+ }
+
+ // If segment level coding of this signal is disabled...
+ // or the segment allows multiple reference frame options
+ if (!seg_ref_active || (seg_ref_count > 1)) {
+ // Values used in prediction model coding
+ unsigned char prediction_flag;
+ vp9_prob pred_prob;
+ MV_REFERENCE_FRAME pred_rf;
+
+ // Get the context probability the prediction flag
+ pred_prob = vp9_get_pred_prob(cm, xd, PRED_REF);
+
+ // Get the predicted value.
+ pred_rf = vp9_get_pred_ref(cm, xd);
+
+ // Did the chosen reference frame match its predicted value.
+ prediction_flag =
+ (xd->mode_info_context->mbmi.ref_frame == pred_rf);
+
+ vp9_set_pred_flag(xd, PRED_REF, prediction_flag);
+ vp9_write(bc, prediction_flag, pred_prob);
+
+ // If not predicted correctly then code value explicitly
+ if (!prediction_flag) {
+ vp9_prob mod_refprobs[PREDICTION_PROBS];
+
+ vpx_memcpy(mod_refprobs,
+ cm->mod_refprobs[pred_rf], sizeof(mod_refprobs));
+
+ // If segment coding enabled blank out options that cant occur by
+ // setting the branch probability to 0.
+ if (seg_ref_active) {
+ mod_refprobs[INTRA_FRAME] *=
+ vp9_check_segref(xd, segment_id, INTRA_FRAME);
+ mod_refprobs[LAST_FRAME] *=
+ vp9_check_segref(xd, segment_id, LAST_FRAME);
+ mod_refprobs[GOLDEN_FRAME] *=
+ (vp9_check_segref(xd, segment_id, GOLDEN_FRAME) *
+ vp9_check_segref(xd, segment_id, ALTREF_FRAME));
+ }
+
+ if (mod_refprobs[0]) {
+ vp9_write(bc, (rf != INTRA_FRAME), mod_refprobs[0]);
+ }
+
+ // Inter coded
+ if (rf != INTRA_FRAME) {
+ if (mod_refprobs[1]) {
+ vp9_write(bc, (rf != LAST_FRAME), mod_refprobs[1]);
+ }
+
+ if (rf != LAST_FRAME) {
+ if (mod_refprobs[2]) {
+ vp9_write(bc, (rf != GOLDEN_FRAME), mod_refprobs[2]);
+ }
+ }
+ }
+ }
+ }
+
+ // if using the prediction mdoel we have nothing further to do because
+ // the reference frame is fully coded by the segment
+}
+
+// Update the probabilities used to encode reference frame data
+static void update_ref_probs(VP9_COMP *const cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+
+ const int *const rfct = cpi->count_mb_ref_frame_usage;
+ const int rf_intra = rfct[INTRA_FRAME];
+ const int rf_inter = rfct[LAST_FRAME] +
+ rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
+
+ cm->prob_intra_coded = get_binary_prob(rf_intra, rf_inter);
+ cm->prob_last_coded = get_prob(rfct[LAST_FRAME], rf_inter);
+ cm->prob_gf_coded = get_binary_prob(rfct[GOLDEN_FRAME], rfct[ALTREF_FRAME]);
+
+ // Compute a modified set of probabilities to use when prediction of the
+ // reference frame fails
+ vp9_compute_mod_refprobs(cm);
+}
+
+static void pack_inter_mode_mvs(VP9_COMP *const cpi, vp9_writer *const bc) {
+ VP9_COMMON *const pc = &cpi->common;
+ const nmv_context *nmvc = &pc->fc.nmvc;
+ MACROBLOCKD *xd = &cpi->mb.e_mbd;
+ MODE_INFO *m;
+ MODE_INFO *prev_m;
+ TOKENEXTRA *tok = cpi->tok;
+ TOKENEXTRA *tok_end = tok + cpi->tok_count;
+
+ const int mis = pc->mode_info_stride;
+ int mb_row, mb_col;
+ int row, col;
+
+ // Values used in prediction model coding
+ vp9_prob pred_prob;
+ unsigned char prediction_flag;
+
+ int row_delta[4] = { 0, +1, 0, -1};
+ int col_delta[4] = { +1, -1, +1, +1};
+
+ cpi->mb.partition_info = cpi->mb.pi;
+
+ mb_row = 0;
+ for (row = 0; row < pc->mb_rows; row += 2) {
+ m = pc->mi + row * mis;
+ prev_m = pc->prev_mi + row * mis;
+
+ mb_col = 0;
+ for (col = 0; col < pc->mb_cols; col += 2) {
+ int i;
+
+ // Process the 4 MBs in the order:
+ // top-left, top-right, bottom-left, bottom-right
+#if CONFIG_SUPERBLOCKS
+ vp9_write(bc, m->mbmi.encoded_as_sb, pc->sb_coded);
+#endif
+ for (i = 0; i < 4; i++) {
+ MB_MODE_INFO *mi;
+ MV_REFERENCE_FRAME rf;
+ MB_PREDICTION_MODE mode;
+ int segment_id, skip_coeff;
+
+ int dy = row_delta[i];
+ int dx = col_delta[i];
+ int offset_extended = dy * mis + dx;
+
+ if ((mb_row >= pc->mb_rows) || (mb_col >= pc->mb_cols)) {
+ // MB lies outside frame, move on
+ mb_row += dy;
+ mb_col += dx;
+ m += offset_extended;
+ prev_m += offset_extended;
+ cpi->mb.partition_info += offset_extended;
+ continue;
+ }
+
+ mi = &m->mbmi;
+ rf = mi->ref_frame;
+ mode = mi->mode;
+ segment_id = mi->segment_id;
+
+ // Distance of Mb to the various image edges.
+ // These specified to 8th pel as they are always compared to MV
+ // values that are in 1/8th pel units
+ xd->mb_to_left_edge = -((mb_col * 16) << 3);
+ xd->mb_to_top_edge = -((mb_row * 16)) << 3;
+
+#if CONFIG_SUPERBLOCKS
+ if (mi->encoded_as_sb) {
+ xd->mb_to_right_edge = ((pc->mb_cols - 2 - mb_col) * 16) << 3;
+ xd->mb_to_bottom_edge = ((pc->mb_rows - 2 - mb_row) * 16) << 3;
+ } else {
+#endif
+ xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
+ xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
+#if CONFIG_SUPERBLOCKS
+ }
+#endif
+
+ // Make sure the MacroBlockD mode info pointer is set correctly
+ xd->mode_info_context = m;
+ xd->prev_mode_info_context = prev_m;
+
+#ifdef ENTROPY_STATS
+ active_section = 9;
+#endif
+ if (cpi->mb.e_mbd.update_mb_segmentation_map) {
+ // Is temporal coding of the segment map enabled
+ if (pc->temporal_update) {
+ prediction_flag = vp9_get_pred_flag(xd, PRED_SEG_ID);
+ pred_prob = vp9_get_pred_prob(pc, xd, PRED_SEG_ID);
+
+ // Code the segment id prediction flag for this mb
+ vp9_write(bc, prediction_flag, pred_prob);
+
+ // If the mb segment id wasn't predicted code explicitly
+ if (!prediction_flag)
+ write_mb_segid(bc, mi, &cpi->mb.e_mbd);
+ } else {
+ // Normal unpredicted coding
+ write_mb_segid(bc, mi, &cpi->mb.e_mbd);
+ }
+ }
+
+ skip_coeff = 1;
+ if (pc->mb_no_coeff_skip &&
+ (!vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) ||
+ (vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) != 0))) {
+ skip_coeff = mi->mb_skip_coeff;
+#if CONFIG_SUPERBLOCKS
+ if (mi->encoded_as_sb) {
+ skip_coeff &= m[1].mbmi.mb_skip_coeff;
+ skip_coeff &= m[mis].mbmi.mb_skip_coeff;
+ skip_coeff &= m[mis + 1].mbmi.mb_skip_coeff;
+ }
+#endif
+ vp9_write(bc, skip_coeff,
+ vp9_get_pred_prob(pc, xd, PRED_MBSKIP));
+ }
+
+ // Encode the reference frame.
+ if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)
+ || vp9_get_segdata(xd, segment_id, SEG_LVL_MODE) >= NEARESTMV) {
+ encode_ref_frame(bc, pc, xd, segment_id, rf);
+ } else {
+ assert(rf == INTRA_FRAME);
+ }
+
+ if (rf == INTRA_FRAME) {
+#ifdef ENTROPY_STATS
+ active_section = 6;
+#endif
+
+ if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) {
+#if CONFIG_SUPERBLOCKS
+ if (m->mbmi.encoded_as_sb)
+ write_sb_ymode(bc, mode, pc->fc.sb_ymode_prob);
+ else
+#endif
+ write_ymode(bc, mode, pc->fc.ymode_prob);
+ }
+ if (mode == B_PRED) {
+ int j = 0;
+#if CONFIG_COMP_INTRA_PRED
+ int uses_second =
+ m->bmi[0].as_mode.second !=
+ (B_PREDICTION_MODE)(B_DC_PRED - 1);
+ vp9_write(bc, uses_second, DEFAULT_COMP_INTRA_PROB);
+#endif
+ do {
+#if CONFIG_COMP_INTRA_PRED
+ B_PREDICTION_MODE mode2 = m->bmi[j].as_mode.second;
+#endif
+ write_bmode(bc, m->bmi[j].as_mode.first,
+ pc->fc.bmode_prob);
+#if CONFIG_COMP_INTRA_PRED
+ if (uses_second) {
+ write_bmode(bc, mode2, pc->fc.bmode_prob);
+ }
+#endif
+ } while (++j < 16);
+ }
+ if (mode == I8X8_PRED) {
+ write_i8x8_mode(bc, m->bmi[0].as_mode.first,
+ pc->fc.i8x8_mode_prob);
+ write_i8x8_mode(bc, m->bmi[2].as_mode.first,
+ pc->fc.i8x8_mode_prob);
+ write_i8x8_mode(bc, m->bmi[8].as_mode.first,
+ pc->fc.i8x8_mode_prob);
+ write_i8x8_mode(bc, m->bmi[10].as_mode.first,
+ pc->fc.i8x8_mode_prob);
+ } else {
+ write_uv_mode(bc, mi->uv_mode,
+ pc->fc.uv_mode_prob[mode]);
+ }
+ } else {
+ int_mv best_mv, best_second_mv;
+
+ vp9_prob mv_ref_p [VP9_MVREFS - 1];
+
+ {
+ best_mv.as_int = mi->ref_mvs[rf][0].as_int;
+
+ vp9_mv_ref_probs(&cpi->common, mv_ref_p, mi->mb_mode_context[rf]);
+
+#ifdef ENTROPY_STATS
+ accum_mv_refs(mode, ct);
+#endif
+ }
+
+#ifdef ENTROPY_STATS
+ active_section = 3;
+#endif
+
+ // Is the segment coding of mode enabled
+ if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) {
+#if CONFIG_SUPERBLOCKS
+ if (mi->encoded_as_sb) {
+ write_sb_mv_ref(bc, mode, mv_ref_p);
+ } else
+#endif
+ {
+ write_mv_ref(bc, mode, mv_ref_p);
+ }
+ vp9_accum_mv_refs(&cpi->common, mode, mi->mb_mode_context[rf]);
+ }
+
+#if CONFIG_PRED_FILTER
+ // Is the prediction filter enabled
+ if (mode >= NEARESTMV && mode < SPLITMV) {
+ if (cpi->common.pred_filter_mode == 2)
+ vp9_write(bc, mi->pred_filter_enabled,
+ pc->prob_pred_filter_off);
+ else
+ assert(mi->pred_filter_enabled ==
+ cpi->common.pred_filter_mode);
+ }
+#endif
+ if (mode >= NEARESTMV && mode <= SPLITMV)
+ {
+ if (cpi->common.mcomp_filter_type == SWITCHABLE) {
+ write_token(bc, vp9_switchable_interp_tree,
+ vp9_get_pred_probs(&cpi->common, xd,
+ PRED_SWITCHABLE_INTERP),
+ vp9_switchable_interp_encodings +
+ vp9_switchable_interp_map[mi->interp_filter]);
+ } else {
+ assert (mi->interp_filter ==
+ cpi->common.mcomp_filter_type);
+ }
+ }
+
+ if (mi->second_ref_frame > 0 &&
+ (mode == NEWMV || mode == SPLITMV)) {
+
+ best_second_mv.as_int =
+ mi->ref_mvs[mi->second_ref_frame][0].as_int;
+ }
+
+ // does the feature use compound prediction or not
+ // (if not specified at the frame/segment level)
+ if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
+ vp9_write(bc, mi->second_ref_frame > INTRA_FRAME,
+ vp9_get_pred_prob(pc, xd, PRED_COMP));
+ }
+#if CONFIG_COMP_INTERINTRA_PRED
+ if (cpi->common.use_interintra &&
+ mode >= NEARESTMV && mode < SPLITMV &&
+ mi->second_ref_frame <= INTRA_FRAME) {
+ vp9_write(bc, mi->second_ref_frame == INTRA_FRAME,
+ pc->fc.interintra_prob);
+ // if (!cpi->dummy_packing)
+ // printf("-- %d (%d)\n", mi->second_ref_frame == INTRA_FRAME,
+ // pc->fc.interintra_prob);
+ if (mi->second_ref_frame == INTRA_FRAME) {
+ // if (!cpi->dummy_packing)
+ // printf("** %d %d\n", mi->interintra_mode,
+ // mi->interintra_uv_mode);
+ write_ymode(bc, mi->interintra_mode, pc->fc.ymode_prob);
+#if SEPARATE_INTERINTRA_UV
+ write_uv_mode(bc, mi->interintra_uv_mode,
+ pc->fc.uv_mode_prob[mi->interintra_mode]);
+#endif
+ }
+ }
+#endif
+
+ {
+ switch (mode) { /* new, split require MVs */
+ case NEWMV:
+#ifdef ENTROPY_STATS
+ active_section = 5;
+#endif
+
+#if CONFIG_NEW_MVREF
+ {
+ unsigned int best_index;
+
+ // Choose the best mv reference
+ /*
+ best_index = pick_best_mv_ref(x, rf, mi->mv[0],
+ mi->ref_mvs[rf], &best_mv);
+ assert(best_index == mi->best_index);
+ assert(best_mv.as_int == mi->best_mv.as_int);
+ */
+ best_index = mi->best_index;
+ best_mv.as_int = mi->best_mv.as_int;
+
+ // Encode the index of the choice.
+ vp9_write_mv_ref_id(bc,
+ xd->mb_mv_ref_id_probs[rf], best_index);
+
+ cpi->best_ref_index_counts[rf][best_index]++;
+
+ }
+#endif
+
+ write_nmv(bc, &mi->mv[0].as_mv, &best_mv,
+ (const nmv_context*) nmvc,
+ xd->allow_high_precision_mv);
+
+ if (mi->second_ref_frame > 0) {
+#if CONFIG_NEW_MVREF
+ unsigned int best_index;
+ MV_REFERENCE_FRAME sec_ref_frame = mi->second_ref_frame;
+
+ /*
+ best_index =
+ pick_best_mv_ref(x, sec_ref_frame, mi->mv[1],
+ mi->ref_mvs[sec_ref_frame],
+ &best_second_mv);
+ assert(best_index == mi->best_second_index);
+ assert(best_second_mv.as_int == mi->best_second_mv.as_int);
+ */
+ best_index = mi->best_second_index;
+ best_second_mv.as_int = mi->best_second_mv.as_int;
+
+ // Encode the index of the choice.
+ vp9_write_mv_ref_id(bc,
+ xd->mb_mv_ref_id_probs[sec_ref_frame],
+ best_index);
+
+ cpi->best_ref_index_counts[sec_ref_frame][best_index]++;
+#endif
+ write_nmv(bc, &mi->mv[1].as_mv, &best_second_mv,
+ (const nmv_context*) nmvc,
+ xd->allow_high_precision_mv);
+ }
+ break;
+ case SPLITMV: {
+ int j = 0;
+
+#ifdef MODE_STATS
+ ++count_mb_seg [mi->partitioning];
+#endif
+
+ write_split(bc, mi->partitioning, cpi->common.fc.mbsplit_prob);
+ cpi->mbsplit_count[mi->partitioning]++;
+
+ do {
+ B_PREDICTION_MODE blockmode;
+ int_mv blockmv;
+ const int *const L =
+ vp9_mbsplits [mi->partitioning];
+ int k = -1; /* first block in subset j */
+ int mv_contz;
+ int_mv leftmv, abovemv;
+
+ blockmode = cpi->mb.partition_info->bmi[j].mode;
+ blockmv = cpi->mb.partition_info->bmi[j].mv;
+#if CONFIG_DEBUG
+ while (j != L[++k])
+ if (k >= 16)
+ assert(0);
+#else
+ while (j != L[++k]);
+#endif
+ leftmv.as_int = left_block_mv(m, k);
+ abovemv.as_int = above_block_mv(m, k, mis);
+ mv_contz = vp9_mv_cont(&leftmv, &abovemv);
+
+ write_sub_mv_ref(bc, blockmode,
+ cpi->common.fc.sub_mv_ref_prob [mv_contz]);
+ cpi->sub_mv_ref_count[mv_contz][blockmode - LEFT4X4]++;
+ if (blockmode == NEW4X4) {
+#ifdef ENTROPY_STATS
+ active_section = 11;
+#endif
+ write_nmv(bc, &blockmv.as_mv, &best_mv,
+ (const nmv_context*) nmvc,
+ xd->allow_high_precision_mv);
+
+ if (mi->second_ref_frame > 0) {
+ write_nmv(bc,
+ &cpi->mb.partition_info->bmi[j].second_mv.as_mv,
+ &best_second_mv,
+ (const nmv_context*) nmvc,
+ xd->allow_high_precision_mv);
+ }
+ }
+ } while (++j < cpi->mb.partition_info->count);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ /* This is not required if the counts in cpi are consistent with the
+ * final packing pass */
+ // if (!cpi->dummy_packing)
+ // vp9_update_nmv_count(cpi, x, &best_mv, &best_second_mv);
+ }
+
+ if (((rf == INTRA_FRAME && mode <= I8X8_PRED) ||
+ (rf != INTRA_FRAME && !(mode == SPLITMV &&
+ mi->partitioning == PARTITIONING_4X4))) &&
+ pc->txfm_mode == TX_MODE_SELECT &&
+ !((pc->mb_no_coeff_skip && skip_coeff) ||
+ (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
+ vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) {
+ TX_SIZE sz = mi->txfm_size;
+ // FIXME(rbultje) code ternary symbol once all experiments are merged
+ vp9_write(bc, sz != TX_4X4, pc->prob_tx[0]);
+ if (sz != TX_4X4 && mode != I8X8_PRED && mode != SPLITMV)
+ vp9_write(bc, sz != TX_8X8, pc->prob_tx[1]);
+ }
+
+#ifdef ENTROPY_STATS
+ active_section = 1;
+#endif
+ assert(tok < tok_end);
+ pack_mb_tokens(bc, &tok, tok_end);
+
+#if CONFIG_SUPERBLOCKS
+ if (m->mbmi.encoded_as_sb) {
+ assert(!i);
+ mb_col += 2;
+ m += 2;
+ cpi->mb.partition_info += 2;
+ prev_m += 2;
+ break;
+ }
+#endif
+
+ // Next MB
+ mb_row += dy;
+ mb_col += dx;
+ m += offset_extended;
+ prev_m += offset_extended;
+ cpi->mb.partition_info += offset_extended;
+#if CONFIG_DEBUG
+ assert((prev_m - cpi->common.prev_mip) == (m - cpi->common.mip));
+ assert((prev_m - cpi->common.prev_mi) == (m - cpi->common.mi));
+#endif
+ }
+ }
+
+ // Next SB
+ mb_row += 2;
+ m += mis + (1 - (pc->mb_cols & 0x1));
+ prev_m += mis + (1 - (pc->mb_cols & 0x1));
+ cpi->mb.partition_info += mis + (1 - (pc->mb_cols & 0x1));
+ }
+}
+
+
+static void write_mb_modes_kf(const VP9_COMMON *c,
+ const MACROBLOCKD *xd,
+ const MODE_INFO *m,
+ int mode_info_stride,
+ vp9_writer *const bc) {
+ int ym;
+ int segment_id;
+
+ ym = m->mbmi.mode;
+ segment_id = m->mbmi.segment_id;
+
+ if (xd->update_mb_segmentation_map) {
+ write_mb_segid(bc, &m->mbmi, xd);
+ }
+
+ if (c->mb_no_coeff_skip &&
+ (!vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) ||
+ (vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) != 0))) {
+ int skip_coeff = m->mbmi.mb_skip_coeff;
+#if CONFIG_SUPERBLOCKS
+ const int mis = mode_info_stride;
+ if (m->mbmi.encoded_as_sb) {
+ skip_coeff &= m[1].mbmi.mb_skip_coeff;
+ skip_coeff &= m[mis].mbmi.mb_skip_coeff;
+ skip_coeff &= m[mis + 1].mbmi.mb_skip_coeff;
+ }
+#endif
+ vp9_write(bc, skip_coeff,
+ vp9_get_pred_prob(c, xd, PRED_MBSKIP));
+ }
+
+#if CONFIG_SUPERBLOCKS
+ if (m->mbmi.encoded_as_sb) {
+ sb_kfwrite_ymode(bc, ym,
+ c->sb_kf_ymode_prob[c->kf_ymode_probs_index]);
+ } else
+#endif
+ {
+ kfwrite_ymode(bc, ym,
+ c->kf_ymode_prob[c->kf_ymode_probs_index]);
+ }
+
+ if (ym == B_PRED) {
+ const int mis = c->mode_info_stride;
+ int i = 0;
+#if CONFIG_COMP_INTRA_PRED
+ int uses_second =
+ m->bmi[0].as_mode.second !=
+ (B_PREDICTION_MODE)(B_DC_PRED - 1);
+ vp9_write(bc, uses_second, DEFAULT_COMP_INTRA_PROB);
+#endif
+ do {
+ const B_PREDICTION_MODE A = above_block_mode(m, i, mis);
+ const B_PREDICTION_MODE L = left_block_mode(m, i);
+ const int bm = m->bmi[i].as_mode.first;
+#if CONFIG_COMP_INTRA_PRED
+ const int bm2 = m->bmi[i].as_mode.second;
+#endif
+
+#ifdef ENTROPY_STATS
+ ++intra_mode_stats [A] [L] [bm];
+#endif
+
+ write_kf_bmode(bc, bm, c->kf_bmode_prob[A][L]);
+#if CONFIG_COMP_INTRA_PRED
+ if (uses_second) {
+ write_kf_bmode(bc, bm2, c->kf_bmode_prob[A][L]);
+ }
+#endif
+ } while (++i < 16);
+ }
+ if (ym == I8X8_PRED) {
+ write_i8x8_mode(bc, m->bmi[0].as_mode.first,
+ c->fc.i8x8_mode_prob);
+ // printf(" mode: %d\n", m->bmi[0].as_mode.first); fflush(stdout);
+ write_i8x8_mode(bc, m->bmi[2].as_mode.first,
+ c->fc.i8x8_mode_prob);
+ // printf(" mode: %d\n", m->bmi[2].as_mode.first); fflush(stdout);
+ write_i8x8_mode(bc, m->bmi[8].as_mode.first,
+ c->fc.i8x8_mode_prob);
+ // printf(" mode: %d\n", m->bmi[8].as_mode.first); fflush(stdout);
+ write_i8x8_mode(bc, m->bmi[10].as_mode.first,
+ c->fc.i8x8_mode_prob);
+ // printf(" mode: %d\n", m->bmi[10].as_mode.first); fflush(stdout);
+ } else
+ write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]);
+
+ if (ym <= I8X8_PRED && c->txfm_mode == TX_MODE_SELECT &&
+ !((c->mb_no_coeff_skip && m->mbmi.mb_skip_coeff) ||
+ (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
+ vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) {
+ TX_SIZE sz = m->mbmi.txfm_size;
+ // FIXME(rbultje) code ternary symbol once all experiments are merged
+ vp9_write(bc, sz != TX_4X4, c->prob_tx[0]);
+ if (sz != TX_4X4 && ym <= TM_PRED)
+ vp9_write(bc, sz != TX_8X8, c->prob_tx[1]);
+ }
+}
+
+static void write_kfmodes(VP9_COMP* const cpi, vp9_writer* const bc) {
+ VP9_COMMON *const c = &cpi->common;
+ const int mis = c->mode_info_stride;
+ MACROBLOCKD *xd = &cpi->mb.e_mbd;
+ MODE_INFO *m;
+ int i;
+ int row, col;
+ int mb_row, mb_col;
+ int row_delta[4] = { 0, +1, 0, -1};
+ int col_delta[4] = { +1, -1, +1, +1};
+ TOKENEXTRA *tok = cpi->tok;
+ TOKENEXTRA *tok_end = tok + cpi->tok_count;
+
+ mb_row = 0;
+ for (row = 0; row < c->mb_rows; row += 2) {
+ m = c->mi + row * mis;
+
+ mb_col = 0;
+ for (col = 0; col < c->mb_cols; col += 2) {
+#if CONFIG_SUPERBLOCKS
+ vp9_write(bc, m->mbmi.encoded_as_sb, c->sb_coded);
+#endif
+ // Process the 4 MBs in the order:
+ // top-left, top-right, bottom-left, bottom-right
+ for (i = 0; i < 4; i++) {
+ int dy = row_delta[i];
+ int dx = col_delta[i];
+ int offset_extended = dy * mis + dx;
+
+ if ((mb_row >= c->mb_rows) || (mb_col >= c->mb_cols)) {
+ // MB lies outside frame, move on
+ mb_row += dy;
+ mb_col += dx;
+ m += offset_extended;
+ continue;
+ }
+
+ // Make sure the MacroBlockD mode info pointer is set correctly
+ xd->mode_info_context = m;
+
+ write_mb_modes_kf(c, xd, m, mis, bc);
+#ifdef ENTROPY_STATS
+ active_section = 8;
+#endif
+ assert(tok < tok_end);
+ pack_mb_tokens(bc, &tok, tok_end);
+
+#if CONFIG_SUPERBLOCKS
+ if (m->mbmi.encoded_as_sb) {
+ assert(!i);
+ mb_col += 2;
+ m += 2;
+ break;
+ }
+#endif
+ // Next MB
+ mb_row += dy;
+ mb_col += dx;
+ m += offset_extended;
+ }
+ }
+ mb_row += 2;
+ }
+}
+
+
+/* This function is used for debugging probability trees. */
+static void print_prob_tree(vp9_prob
+ coef_probs[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]) {
+ /* print coef probability tree */
+ int i, j, k, l;
+ FILE *f = fopen("enc_tree_probs.txt", "a");
+ fprintf(f, "{\n");
+ for (i = 0; i < BLOCK_TYPES; i++) {
+ fprintf(f, " {\n");
+ for (j = 0; j < COEF_BANDS; j++) {
+ fprintf(f, " {\n");
+ for (k = 0; k < PREV_COEF_CONTEXTS; k++) {
+ fprintf(f, " {");
+ for (l = 0; l < ENTROPY_NODES; l++) {
+ fprintf(f, "%3u, ",
+ (unsigned int)(coef_probs [i][j][k][l]));
+ }
+ fprintf(f, " }\n");
+ }
+ fprintf(f, " }\n");
+ }
+ fprintf(f, " }\n");
+ }
+ fprintf(f, "}\n");
+ fclose(f);
+}
+
+static void build_coeff_contexts(VP9_COMP *cpi) {
+ int i = 0, j, k;
+#ifdef ENTROPY_STATS
+ int t = 0;
+#endif
+ for (i = 0; i < BLOCK_TYPES; ++i) {
+ for (j = 0; j < COEF_BANDS; ++j) {
+ for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
+ if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
+ continue;
+ vp9_tree_probs_from_distribution(
+ MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree,
+ cpi->frame_coef_probs [i][j][k],
+ cpi->frame_branch_ct [i][j][k],
+ cpi->coef_counts [i][j][k],
+ 256, 1
+ );
+#ifdef ENTROPY_STATS
+ if (!cpi->dummy_packing)
+ for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
+ context_counters[i][j][k][t] += cpi->coef_counts[i][j][k][t];
+#endif
+ }
+ }
+ }
+ for (i = 0; i < BLOCK_TYPES; ++i) {
+ for (j = 0; j < COEF_BANDS; ++j) {
+ for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
+ if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
+ continue;
+ vp9_tree_probs_from_distribution(
+ MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree,
+ cpi->frame_hybrid_coef_probs [i][j][k],
+ cpi->frame_hybrid_branch_ct [i][j][k],
+ cpi->hybrid_coef_counts [i][j][k],
+ 256, 1
+ );
+#ifdef ENTROPY_STATS
+ if (!cpi->dummy_packing)
+ for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
+ hybrid_context_counters[i][j][k][t] += cpi->hybrid_coef_counts[i][j][k][t];
+#endif
+ }
+ }
+ }
+
+ if (cpi->common.txfm_mode != ONLY_4X4) {
+ for (i = 0; i < BLOCK_TYPES_8X8; ++i) {
+ for (j = 0; j < COEF_BANDS; ++j) {
+ for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
+ /* at every context */
+ /* calc probs and branch cts for this frame only */
+ // vp9_prob new_p [ENTROPY_NODES];
+ // unsigned int branch_ct [ENTROPY_NODES] [2];
+ if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
+ continue;
+ vp9_tree_probs_from_distribution(
+ MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree,
+ cpi->frame_coef_probs_8x8 [i][j][k],
+ cpi->frame_branch_ct_8x8 [i][j][k],
+ cpi->coef_counts_8x8 [i][j][k],
+ 256, 1
+ );
+#ifdef ENTROPY_STATS
+ if (!cpi->dummy_packing)
+ for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
+ context_counters_8x8[i][j][k][t] += cpi->coef_counts_8x8[i][j][k][t];
+#endif
+ }
+ }
+ }
+ for (i = 0; i < BLOCK_TYPES_8X8; ++i) {
+ for (j = 0; j < COEF_BANDS; ++j) {
+ for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
+ /* at every context */
+ /* calc probs and branch cts for this frame only */
+ // vp9_prob new_p [ENTROPY_NODES];
+ // unsigned int branch_ct [ENTROPY_NODES] [2];
+ if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
+ continue;
+ vp9_tree_probs_from_distribution(
+ MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree,
+ cpi->frame_hybrid_coef_probs_8x8 [i][j][k],
+ cpi->frame_hybrid_branch_ct_8x8 [i][j][k],
+ cpi->hybrid_coef_counts_8x8 [i][j][k],
+ 256, 1
+ );
+#ifdef ENTROPY_STATS
+ if (!cpi->dummy_packing)
+ for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
+ hybrid_context_counters_8x8[i][j][k][t] += cpi->hybrid_coef_counts_8x8[i][j][k][t];
+#endif
+ }
+ }
+ }
+ }
+
+ if (cpi->common.txfm_mode > ALLOW_8X8) {
+ for (i = 0; i < BLOCK_TYPES_16X16; ++i) {
+ for (j = 0; j < COEF_BANDS; ++j) {
+ for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
+ if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
+ continue;
+ vp9_tree_probs_from_distribution(
+ MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree,
+ cpi->frame_coef_probs_16x16[i][j][k],
+ cpi->frame_branch_ct_16x16[i][j][k],
+ cpi->coef_counts_16x16[i][j][k], 256, 1);
+#ifdef ENTROPY_STATS
+ if (!cpi->dummy_packing)
+ for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
+ context_counters_16x16[i][j][k][t] += cpi->coef_counts_16x16[i][j][k][t];
+#endif
+ }
+ }
+ }
+ }
+ for (i = 0; i < BLOCK_TYPES_16X16; ++i) {
+ for (j = 0; j < COEF_BANDS; ++j) {
+ for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
+ if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
+ continue;
+ vp9_tree_probs_from_distribution(
+ MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree,
+ cpi->frame_hybrid_coef_probs_16x16[i][j][k],
+ cpi->frame_hybrid_branch_ct_16x16[i][j][k],
+ cpi->hybrid_coef_counts_16x16[i][j][k], 256, 1);
+#ifdef ENTROPY_STATS
+ if (!cpi->dummy_packing)
+ for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
+ hybrid_context_counters_16x16[i][j][k][t] += cpi->hybrid_coef_counts_16x16[i][j][k][t];
+#endif
+ }
+ }
+ }
+}
+
+static void update_coef_probs_common(
+ vp9_writer* const bc,
+ vp9_prob new_frame_coef_probs[BLOCK_TYPES][COEF_BANDS]
+ [PREV_COEF_CONTEXTS][ENTROPY_NODES],
+ vp9_prob old_frame_coef_probs[BLOCK_TYPES][COEF_BANDS]
+ [PREV_COEF_CONTEXTS][ENTROPY_NODES],
+ unsigned int frame_branch_ct[BLOCK_TYPES][COEF_BANDS]
+ [PREV_COEF_CONTEXTS][ENTROPY_NODES][2]) {
+ int i, j, k, t;
+ int update[2] = {0, 0};
+ int savings;
+ // vp9_prob bestupd = find_coef_update_prob(cpi);
+
+ /* dry run to see if there is any udpate at all needed */
+ savings = 0;
+ for (i = 0; i < BLOCK_TYPES; ++i) {
+ for (j = !i; j < COEF_BANDS; ++j) {
+ int prev_coef_savings[ENTROPY_NODES] = {0};
+ for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
+ for (t = 0; t < ENTROPY_NODES; ++t) {
+ vp9_prob newp = new_frame_coef_probs[i][j][k][t];
+ const vp9_prob oldp = old_frame_coef_probs[i][j][k][t];
+ const vp9_prob upd = COEF_UPDATE_PROB;
+ int s = prev_coef_savings[t];
+ int u = 0;
+ if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
+ continue;
+#if defined(SEARCH_NEWP)
+ s = prob_diff_update_savings_search(
+ frame_branch_ct[i][j][k][t],
+ oldp, &newp, upd);
+ if (s > 0 && newp != oldp)
+ u = 1;
+ if (u)
+ savings += s - (int)(vp9_cost_zero(upd));
+ else
+ savings -= (int)(vp9_cost_zero(upd));
+#else
+ s = prob_update_savings(
+ frame_branch_ct[i][j][k][t],
+ oldp, newp, upd);
+ if (s > 0)
+ u = 1;
+ if (u)
+ savings += s;
+#endif
+
+ update[u]++;
+ }
+ }
+ }
+ }
+
+ // printf("Update %d %d, savings %d\n", update[0], update[1], savings);
+ /* Is coef updated at all */
+ if (update[1] == 0 || savings < 0) {
+ vp9_write_bit(bc, 0);
+ } else {
+ vp9_write_bit(bc, 1);
+ for (i = 0; i < BLOCK_TYPES; ++i) {
+ for (j = !i; j < COEF_BANDS; ++j) {
+ int prev_coef_savings[ENTROPY_NODES] = {0};
+ for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
+ // calc probs and branch cts for this frame only
+ for (t = 0; t < ENTROPY_NODES; ++t) {
+ vp9_prob newp = new_frame_coef_probs[i][j][k][t];
+ vp9_prob *oldp = old_frame_coef_probs[i][j][k] + t;
+ const vp9_prob upd = COEF_UPDATE_PROB;
+ int s = prev_coef_savings[t];
+ int u = 0;
+ if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
+ continue;
+
+#if defined(SEARCH_NEWP)
+ s = prob_diff_update_savings_search(
+ frame_branch_ct[i][j][k][t],
+ *oldp, &newp, upd);
+ if (s > 0 && newp != *oldp)
+ u = 1;
+#else
+ s = prob_update_savings(
+ frame_branch_ct[i][j][k][t],
+ *oldp, newp, upd);
+ if (s > 0)
+ u = 1;
+#endif
+ vp9_write(bc, u, upd);
+#ifdef ENTROPY_STATS
+ if (!cpi->dummy_packing)
+ ++ tree_update_hist [i][j][k][t] [u];
+#endif
+ if (u) {
+ /* send/use new probability */
+ write_prob_diff_update(bc, newp, *oldp);
+ *oldp = newp;
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+static void update_coef_probs(VP9_COMP* const cpi, vp9_writer* const bc) {
+ vp9_clear_system_state();
+
+ // Build the cofficient contexts based on counts collected in encode loop
+ build_coeff_contexts(cpi);
+
+ update_coef_probs_common(bc,
+ cpi->frame_coef_probs,
+ cpi->common.fc.coef_probs,
+ cpi->frame_branch_ct);
+
+ update_coef_probs_common(bc,
+ cpi->frame_hybrid_coef_probs,
+ cpi->common.fc.hybrid_coef_probs,
+ cpi->frame_hybrid_branch_ct);
+
+ /* do not do this if not even allowed */
+ if (cpi->common.txfm_mode != ONLY_4X4) {
+ update_coef_probs_common(bc,
+ cpi->frame_coef_probs_8x8,
+ cpi->common.fc.coef_probs_8x8,
+ cpi->frame_branch_ct_8x8);
+
+ update_coef_probs_common(bc,
+ cpi->frame_hybrid_coef_probs_8x8,
+ cpi->common.fc.hybrid_coef_probs_8x8,
+ cpi->frame_hybrid_branch_ct_8x8);
+ }
+
+ if (cpi->common.txfm_mode > ALLOW_8X8) {
+ update_coef_probs_common(bc,
+ cpi->frame_coef_probs_16x16,
+ cpi->common.fc.coef_probs_16x16,
+ cpi->frame_branch_ct_16x16);
+ update_coef_probs_common(bc,
+ cpi->frame_hybrid_coef_probs_16x16,
+ cpi->common.fc.hybrid_coef_probs_16x16,
+ cpi->frame_hybrid_branch_ct_16x16);
+ }
+}
+
+#ifdef PACKET_TESTING
+FILE *vpxlogc = 0;
+#endif
+
+static void put_delta_q(vp9_writer *bc, int delta_q) {
+ if (delta_q != 0) {
+ vp9_write_bit(bc, 1);
+ vp9_write_literal(bc, abs(delta_q), 4);
+
+ if (delta_q < 0)
+ vp9_write_bit(bc, 1);
+ else
+ vp9_write_bit(bc, 0);
+ } else
+ vp9_write_bit(bc, 0);
+}
+
+static void decide_kf_ymode_entropy(VP9_COMP *cpi) {
+
+ int mode_cost[MB_MODE_COUNT];
+ int cost;
+ int bestcost = INT_MAX;
+ int bestindex = 0;
+ int i, j;
+
+ for (i = 0; i < 8; i++) {
+ vp9_cost_tokens(mode_cost, cpi->common.kf_ymode_prob[i], vp9_kf_ymode_tree);
+ cost = 0;
+ for (j = 0; j < VP9_YMODES; j++) {
+ cost += mode_cost[j] * cpi->ymode_count[j];
+ }
+#if CONFIG_SUPERBLOCKS
+ vp9_cost_tokens(mode_cost, cpi->common.sb_kf_ymode_prob[i],
+ vp9_sb_ymode_tree);
+ for (j = 0; j < VP9_I32X32_MODES; j++) {
+ cost += mode_cost[j] * cpi->sb_ymode_count[j];
+ }
+#endif
+ if (cost < bestcost) {
+ bestindex = i;
+ bestcost = cost;
+ }
+ }
+ cpi->common.kf_ymode_probs_index = bestindex;
+
+}
+static void segment_reference_frames(VP9_COMP *cpi) {
+ VP9_COMMON *oci = &cpi->common;
+ MODE_INFO *mi = oci->mi;
+ int ref[MAX_MB_SEGMENTS] = {0};
+ int i, j;
+ int mb_index = 0;
+ MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+
+ for (i = 0; i < oci->mb_rows; i++) {
+ for (j = 0; j < oci->mb_cols; j++, mb_index++) {
+ ref[mi[mb_index].mbmi.segment_id] |= (1 << mi[mb_index].mbmi.ref_frame);
+ }
+ mb_index++;
+ }
+ for (i = 0; i < MAX_MB_SEGMENTS; i++) {
+ vp9_enable_segfeature(xd, i, SEG_LVL_REF_FRAME);
+ vp9_set_segdata(xd, i, SEG_LVL_REF_FRAME, ref[i]);
+ }
+}
+
+void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
+ unsigned long *size) {
+ int i, j;
+ VP9_HEADER oh;
+ VP9_COMMON *const pc = &cpi->common;
+ vp9_writer header_bc, residual_bc;
+ MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+ int extra_bytes_packed = 0;
+
+ unsigned char *cx_data = dest;
+
+ oh.show_frame = (int) pc->show_frame;
+ oh.type = (int)pc->frame_type;
+ oh.version = pc->version;
+ oh.first_partition_length_in_bytes = 0;
+
+ cx_data += 3;
+
+#if defined(SECTIONBITS_OUTPUT)
+ Sectionbits[active_section = 1] += sizeof(VP9_HEADER) * 8 * 256;
+#endif
+
+ compute_update_table();
+
+ /* vp9_kf_default_bmode_probs() is called in vp9_setup_key_frame() once
+ * for each K frame before encode frame. pc->kf_bmode_prob doesn't get
+ * changed anywhere else. No need to call it again here. --yw
+ * vp9_kf_default_bmode_probs( pc->kf_bmode_prob);
+ */
+
+ /* every keyframe send startcode, width, height, scale factor, clamp
+ * and color type.
+ */
+ if (oh.type == KEY_FRAME) {
+ int v;
+
+ // Start / synch code
+ cx_data[0] = 0x9D;
+ cx_data[1] = 0x01;
+ cx_data[2] = 0x2a;
+
+ v = (pc->horiz_scale << 14) | pc->Width;
+ cx_data[3] = v;
+ cx_data[4] = v >> 8;
+
+ v = (pc->vert_scale << 14) | pc->Height;
+ cx_data[5] = v;
+ cx_data[6] = v >> 8;
+
+ extra_bytes_packed = 7;
+ cx_data += extra_bytes_packed;
+
+ vp9_start_encode(&header_bc, cx_data);
+
+ // signal clr type
+ vp9_write_bit(&header_bc, pc->clr_type);
+ vp9_write_bit(&header_bc, pc->clamp_type);
+
+ } else {
+ vp9_start_encode(&header_bc, cx_data);
+ }
+
+ // Signal whether or not Segmentation is enabled
+ vp9_write_bit(&header_bc, (xd->segmentation_enabled) ? 1 : 0);
+
+ // Indicate which features are enabled
+ if (xd->segmentation_enabled) {
+ // Indicate whether or not the segmentation map is being updated.
+ vp9_write_bit(&header_bc, (xd->update_mb_segmentation_map) ? 1 : 0);
+
+ // If it is, then indicate the method that will be used.
+ if (xd->update_mb_segmentation_map) {
+ // Select the coding strategy (temporal or spatial)
+ vp9_choose_segmap_coding_method(cpi);
+ // Send the tree probabilities used to decode unpredicted
+ // macro-block segments
+ for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) {
+ int data = xd->mb_segment_tree_probs[i];
+
+ if (data != 255) {
+ vp9_write_bit(&header_bc, 1);
+ vp9_write_literal(&header_bc, data, 8);
+ } else {
+ vp9_write_bit(&header_bc, 0);
+ }
+ }
+
+ // Write out the chosen coding method.
+ vp9_write_bit(&header_bc, (pc->temporal_update) ? 1 : 0);
+ if (pc->temporal_update) {
+ for (i = 0; i < PREDICTION_PROBS; i++) {
+ int data = pc->segment_pred_probs[i];
+
+ if (data != 255) {
+ vp9_write_bit(&header_bc, 1);
+ vp9_write_literal(&header_bc, data, 8);
+ } else {
+ vp9_write_bit(&header_bc, 0);
+ }
+ }
+ }
+ }
+
+ vp9_write_bit(&header_bc, (xd->update_mb_segmentation_data) ? 1 : 0);
+
+ // segment_reference_frames(cpi);
+
+ if (xd->update_mb_segmentation_data) {
+ signed char Data;
+
+ vp9_write_bit(&header_bc, (xd->mb_segment_abs_delta) ? 1 : 0);
+
+ // For each segments id...
+ for (i = 0; i < MAX_MB_SEGMENTS; i++) {
+ // For each segmentation codable feature...
+ for (j = 0; j < SEG_LVL_MAX; j++) {
+ Data = vp9_get_segdata(xd, i, j);
+
+ // If the feature is enabled...
+ if (vp9_segfeature_active(xd, i, j)) {
+ vp9_write_bit(&header_bc, 1);
+
+ // Is the segment data signed..
+ if (vp9_is_segfeature_signed(j)) {
+ // Encode the relevant feature data
+ if (Data < 0) {
+ Data = - Data;
+ vp9_encode_unsigned_max(&header_bc, Data,
+ vp9_seg_feature_data_max(j));
+ vp9_write_bit(&header_bc, 1);
+ } else {
+ vp9_encode_unsigned_max(&header_bc, Data,
+ vp9_seg_feature_data_max(j));
+ vp9_write_bit(&header_bc, 0);
+ }
+ }
+ // Unsigned data element so no sign bit needed
+ else
+ vp9_encode_unsigned_max(&header_bc, Data,
+ vp9_seg_feature_data_max(j));
+ } else
+ vp9_write_bit(&header_bc, 0);
+ }
+ }
+ }
+ }
+
+ // Encode the common prediction model status flag probability updates for
+ // the reference frame
+ update_refpred_stats(cpi);
+ if (pc->frame_type != KEY_FRAME) {
+ for (i = 0; i < PREDICTION_PROBS; i++) {
+ if (cpi->ref_pred_probs_update[i]) {
+ vp9_write_bit(&header_bc, 1);
+ vp9_write_literal(&header_bc, pc->ref_pred_probs[i], 8);
+ } else {
+ vp9_write_bit(&header_bc, 0);
+ }
+ }
+ }
+
+#if CONFIG_SUPERBLOCKS
+ {
+ /* sb mode probability */
+ const int sb_max = (((pc->mb_rows + 1) >> 1) * ((pc->mb_cols + 1) >> 1));
+
+ pc->sb_coded = get_prob(sb_max - cpi->sb_count, sb_max);
+ vp9_write_literal(&header_bc, pc->sb_coded, 8);
+ }
+#endif
+
+ {
+ if (pc->txfm_mode == TX_MODE_SELECT) {
+ pc->prob_tx[0] = get_prob(cpi->txfm_count[0] + cpi->txfm_count_8x8p[0],
+ cpi->txfm_count[0] + cpi->txfm_count[1] + cpi->txfm_count[2] +
+ cpi->txfm_count_8x8p[0] + cpi->txfm_count_8x8p[1]);
+ pc->prob_tx[1] = get_prob(cpi->txfm_count[1], cpi->txfm_count[1] + cpi->txfm_count[2]);
+ } else {
+ pc->prob_tx[0] = 128;
+ pc->prob_tx[1] = 128;
+ }
+ vp9_write_literal(&header_bc, pc->txfm_mode, 2);
+ if (pc->txfm_mode == TX_MODE_SELECT) {
+ vp9_write_literal(&header_bc, pc->prob_tx[0], 8);
+ vp9_write_literal(&header_bc, pc->prob_tx[1], 8);
+ }
+ }
+
+ // Encode the loop filter level and type
+ vp9_write_bit(&header_bc, pc->filter_type);
+ vp9_write_literal(&header_bc, pc->filter_level, 6);
+ vp9_write_literal(&header_bc, pc->sharpness_level, 3);
+
+ // Write out loop filter deltas applied at the MB level based on mode or ref frame (if they are enabled).
+ vp9_write_bit(&header_bc, (xd->mode_ref_lf_delta_enabled) ? 1 : 0);
+
+ if (xd->mode_ref_lf_delta_enabled) {
+ // Do the deltas need to be updated
+ int send_update = xd->mode_ref_lf_delta_update;
+
+ vp9_write_bit(&header_bc, send_update);
+ if (send_update) {
+ int Data;
+
+ // Send update
+ for (i = 0; i < MAX_REF_LF_DELTAS; i++) {
+ Data = xd->ref_lf_deltas[i];
+
+ // Frame level data
+ if (xd->ref_lf_deltas[i] != xd->last_ref_lf_deltas[i]) {
+ xd->last_ref_lf_deltas[i] = xd->ref_lf_deltas[i];
+ vp9_write_bit(&header_bc, 1);
+
+ if (Data > 0) {
+ vp9_write_literal(&header_bc, (Data & 0x3F), 6);
+ vp9_write_bit(&header_bc, 0); // sign
+ } else {
+ Data = -Data;
+ vp9_write_literal(&header_bc, (Data & 0x3F), 6);
+ vp9_write_bit(&header_bc, 1); // sign
+ }
+ } else {
+ vp9_write_bit(&header_bc, 0);
+ }
+ }
+
+ // Send update
+ for (i = 0; i < MAX_MODE_LF_DELTAS; i++) {
+ Data = xd->mode_lf_deltas[i];
+
+ if (xd->mode_lf_deltas[i] != xd->last_mode_lf_deltas[i]) {
+ xd->last_mode_lf_deltas[i] = xd->mode_lf_deltas[i];
+ vp9_write_bit(&header_bc, 1);
+
+ if (Data > 0) {
+ vp9_write_literal(&header_bc, (Data & 0x3F), 6);
+ vp9_write_bit(&header_bc, 0); // sign
+ } else {
+ Data = -Data;
+ vp9_write_literal(&header_bc, (Data & 0x3F), 6);
+ vp9_write_bit(&header_bc, 1); // sign
+ }
+ } else {
+ vp9_write_bit(&header_bc, 0);
+ }
+ }
+ }
+ }
+
+ // signal here is multi token partition is enabled
+ // vp9_write_literal(&header_bc, pc->multi_token_partition, 2);
+ vp9_write_literal(&header_bc, 0, 2);
+
+ // Frame Q baseline quantizer index
+ vp9_write_literal(&header_bc, pc->base_qindex, QINDEX_BITS);
+
+ // Transmit Dc, Second order and Uv quantizer delta information
+ put_delta_q(&header_bc, pc->y1dc_delta_q);
+ put_delta_q(&header_bc, pc->y2dc_delta_q);
+ put_delta_q(&header_bc, pc->y2ac_delta_q);
+ put_delta_q(&header_bc, pc->uvdc_delta_q);
+ put_delta_q(&header_bc, pc->uvac_delta_q);
+
+ // When there is a key frame all reference buffers are updated using the new key frame
+ if (pc->frame_type != KEY_FRAME) {
+ // Should the GF or ARF be updated using the transmitted frame or buffer
+ vp9_write_bit(&header_bc, pc->refresh_golden_frame);
+ vp9_write_bit(&header_bc, pc->refresh_alt_ref_frame);
+
+ // For inter frames the current default behavior is that when
+ // cm->refresh_golden_frame is set we copy the old GF over to
+ // the ARF buffer. This is purely an encoder decision at present.
+ if (pc->refresh_golden_frame)
+ pc->copy_buffer_to_arf = 2;
+
+ // If not being updated from current frame should either GF or ARF be updated from another buffer
+ if (!pc->refresh_golden_frame)
+ vp9_write_literal(&header_bc, pc->copy_buffer_to_gf, 2);
+
+ if (!pc->refresh_alt_ref_frame)
+ vp9_write_literal(&header_bc, pc->copy_buffer_to_arf, 2);
+
+ // Indicate reference frame sign bias for Golden and ARF frames (always 0 for last frame buffer)
+ vp9_write_bit(&header_bc, pc->ref_frame_sign_bias[GOLDEN_FRAME]);
+ vp9_write_bit(&header_bc, pc->ref_frame_sign_bias[ALTREF_FRAME]);
+
+ // Signal whether to allow high MV precision
+ vp9_write_bit(&header_bc, (xd->allow_high_precision_mv) ? 1 : 0);
+ if (pc->mcomp_filter_type == SWITCHABLE) {
+ /* Check to see if only one of the filters is actually used */
+ int count[VP9_SWITCHABLE_FILTERS];
+ int i, j, c = 0;
+ for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
+ count[i] = 0;
+ for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j) {
+ count[i] += cpi->switchable_interp_count[j][i];
+ }
+ c += (count[i] > 0);
+ }
+ if (c == 1) {
+ /* Only one filter is used. So set the filter at frame level */
+ for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
+ if (count[i]) {
+ pc->mcomp_filter_type = vp9_switchable_interp[i];
+ break;
+ }
+ }
+ }
+ }
+ // Signal the type of subpel filter to use
+ vp9_write_bit(&header_bc, (pc->mcomp_filter_type == SWITCHABLE));
+ if (pc->mcomp_filter_type != SWITCHABLE)
+ vp9_write_literal(&header_bc, (pc->mcomp_filter_type), 2);
+#if CONFIG_COMP_INTERINTRA_PRED
+ // printf("Counts: %d %d\n", cpi->interintra_count[0],
+ // cpi->interintra_count[1]);
+ if (!cpi->dummy_packing && pc->use_interintra)
+ pc->use_interintra = (cpi->interintra_count[1] > 0);
+ vp9_write_bit(&header_bc, pc->use_interintra);
+ if (!pc->use_interintra)
+ vp9_zero(cpi->interintra_count);
+#endif
+ }
+
+ vp9_write_bit(&header_bc, pc->refresh_entropy_probs);
+
+ if (pc->frame_type != KEY_FRAME)
+ vp9_write_bit(&header_bc, pc->refresh_last_frame);
+
+#ifdef ENTROPY_STATS
+ if (pc->frame_type == INTER_FRAME)
+ active_section = 0;
+ else
+ active_section = 7;
+#endif
+
+ // If appropriate update the inter mode probability context and code the
+ // changes in the bitstream.
+ if ((pc->frame_type != KEY_FRAME)) {
+ int i, j;
+ int new_context[INTER_MODE_CONTEXTS][4];
+ update_mode_probs(pc, new_context);
+
+ for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
+ for (j = 0; j < 4; j++) {
+ if (new_context[i][j] != pc->fc.vp9_mode_contexts[i][j]) {
+ vp9_write(&header_bc, 1, 252);
+ vp9_write_literal(&header_bc, new_context[i][j], 8);
+
+ // Only update the persistent copy if this is the "real pack"
+ if (!cpi->dummy_packing) {
+ pc->fc.vp9_mode_contexts[i][j] = new_context[i][j];
+ }
+ } else {
+ vp9_write(&header_bc, 0, 252);
+ }
+ }
+ }
+ }
+
+ vp9_clear_system_state(); // __asm emms;
+
+ vp9_copy(cpi->common.fc.pre_coef_probs, cpi->common.fc.coef_probs);
+ vp9_copy(cpi->common.fc.pre_hybrid_coef_probs, cpi->common.fc.hybrid_coef_probs);
+ vp9_copy(cpi->common.fc.pre_coef_probs_8x8, cpi->common.fc.coef_probs_8x8);
+ vp9_copy(cpi->common.fc.pre_hybrid_coef_probs_8x8, cpi->common.fc.hybrid_coef_probs_8x8);
+ vp9_copy(cpi->common.fc.pre_coef_probs_16x16, cpi->common.fc.coef_probs_16x16);
+ vp9_copy(cpi->common.fc.pre_hybrid_coef_probs_16x16, cpi->common.fc.hybrid_coef_probs_16x16);
+#if CONFIG_SUPERBLOCKS
+ vp9_copy(cpi->common.fc.pre_sb_ymode_prob, cpi->common.fc.sb_ymode_prob);
+#endif
+ vp9_copy(cpi->common.fc.pre_ymode_prob, cpi->common.fc.ymode_prob);
+ vp9_copy(cpi->common.fc.pre_uv_mode_prob, cpi->common.fc.uv_mode_prob);
+ vp9_copy(cpi->common.fc.pre_bmode_prob, cpi->common.fc.bmode_prob);
+ vp9_copy(cpi->common.fc.pre_sub_mv_ref_prob, cpi->common.fc.sub_mv_ref_prob);
+ vp9_copy(cpi->common.fc.pre_mbsplit_prob, cpi->common.fc.mbsplit_prob);
+ vp9_copy(cpi->common.fc.pre_i8x8_mode_prob, cpi->common.fc.i8x8_mode_prob);
+ cpi->common.fc.pre_nmvc = cpi->common.fc.nmvc;
+#if CONFIG_COMP_INTERINTRA_PRED
+ cpi->common.fc.pre_interintra_prob = cpi->common.fc.interintra_prob;
+#endif
+ vp9_zero(cpi->sub_mv_ref_count);
+ vp9_zero(cpi->mbsplit_count);
+ vp9_zero(cpi->common.fc.mv_ref_ct)
+
+ update_coef_probs(cpi, &header_bc);
+
+#ifdef ENTROPY_STATS
+ active_section = 2;
+#endif
+
+ // Write out the mb_no_coeff_skip flag
+ vp9_write_bit(&header_bc, pc->mb_no_coeff_skip);
+ if (pc->mb_no_coeff_skip) {
+ int k;
+
+ vp9_update_skip_probs(cpi);
+ for (k = 0; k < MBSKIP_CONTEXTS; ++k)
+ vp9_write_literal(&header_bc, pc->mbskip_pred_probs[k], 8);
+ }
+
+ if (pc->frame_type == KEY_FRAME) {
+ if (!pc->kf_ymode_probs_update) {
+ vp9_write_literal(&header_bc, pc->kf_ymode_probs_index, 3);
+ }
+ } else {
+ // Update the probabilities used to encode reference frame data
+ update_ref_probs(cpi);
+
+#ifdef ENTROPY_STATS
+ active_section = 1;
+#endif
+
+#if CONFIG_PRED_FILTER
+ // Write the prediction filter mode used for this frame
+ vp9_write_literal(&header_bc, pc->pred_filter_mode, 2);
+
+ // Write prediction filter on/off probability if signaling at MB level
+ if (pc->pred_filter_mode == 2)
+ vp9_write_literal(&header_bc, pc->prob_pred_filter_off, 8);
+
+#endif
+ if (pc->mcomp_filter_type == SWITCHABLE)
+ update_switchable_interp_probs(cpi, &header_bc);
+
+ #if CONFIG_COMP_INTERINTRA_PRED
+ if (pc->use_interintra) {
+ vp9_cond_prob_update(&header_bc,
+ &pc->fc.interintra_prob,
+ VP9_UPD_INTERINTRA_PROB,
+ cpi->interintra_count);
+ }
+#endif
+
+ vp9_write_literal(&header_bc, pc->prob_intra_coded, 8);
+ vp9_write_literal(&header_bc, pc->prob_last_coded, 8);
+ vp9_write_literal(&header_bc, pc->prob_gf_coded, 8);
+
+ {
+ const int comp_pred_mode = cpi->common.comp_pred_mode;
+ const int use_compound_pred = (comp_pred_mode != SINGLE_PREDICTION_ONLY);
+ const int use_hybrid_pred = (comp_pred_mode == HYBRID_PREDICTION);
+
+ vp9_write(&header_bc, use_compound_pred, 128);
+ if (use_compound_pred) {
+ vp9_write(&header_bc, use_hybrid_pred, 128);
+ if (use_hybrid_pred) {
+ for (i = 0; i < COMP_PRED_CONTEXTS; i++) {
+ pc->prob_comppred[i] = get_binary_prob(cpi->single_pred_count[i],
+ cpi->comp_pred_count[i]);
+ vp9_write_literal(&header_bc, pc->prob_comppred[i], 8);
+ }
+ }
+ }
+ }
+ update_mbintra_mode_probs(cpi, &header_bc);
+
+ vp9_write_nmv_probs(cpi, xd->allow_high_precision_mv, &header_bc);
+ }
+
+ vp9_stop_encode(&header_bc);
+
+ oh.first_partition_length_in_bytes = header_bc.pos;
+
+ /* update frame tag */
+ {
+ int v = (oh.first_partition_length_in_bytes << 5) |
+ (oh.show_frame << 4) |
+ (oh.version << 1) |
+ oh.type;
+
+ dest[0] = v;
+ dest[1] = v >> 8;
+ dest[2] = v >> 16;
+ }
+
+ *size = VP9_HEADER_SIZE + extra_bytes_packed + header_bc.pos;
+ vp9_start_encode(&residual_bc, cx_data + header_bc.pos);
+
+ if (pc->frame_type == KEY_FRAME) {
+ decide_kf_ymode_entropy(cpi);
+ write_kfmodes(cpi, &residual_bc);
+ } else {
+ /* This is not required if the counts in cpi are consistent with the
+ * final packing pass */
+ // if (!cpi->dummy_packing) vp9_zero(cpi->NMVcount);
+ pack_inter_mode_mvs(cpi, &residual_bc);
+
+ vp9_update_mode_context(&cpi->common);
+ }
+
+ vp9_stop_encode(&residual_bc);
+
+ *size += residual_bc.pos;
+}
+
+#ifdef ENTROPY_STATS
+void print_tree_update_probs() {
+ int i, j, k, l;
+ FILE *f = fopen("coefupdprob.h", "w");
+ int Sum;
+ fprintf(f, "\n/* Update probabilities for token entropy tree. */\n\n");
+
+ fprintf(f, "const vp9_prob\n"
+ "vp9_coef_update_probs[BLOCK_TYPES]\n"
+ " [COEF_BANDS]\n"
+ " [PREV_COEF_CONTEXTS]\n"
+ " [ENTROPY_NODES] = {\n");
+ for (i = 0; i < BLOCK_TYPES; i++) {
+ fprintf(f, " { \n");
+ for (j = 0; j < COEF_BANDS; j++) {
+ fprintf(f, " {\n");
+ for (k = 0; k < PREV_COEF_CONTEXTS; k++) {
+ fprintf(f, " {");
+ for (l = 0; l < ENTROPY_NODES; l++) {
+ fprintf(f, "%3ld, ",
+ get_binary_prob(tree_update_hist[i][j][k][l][0],
+ tree_update_hist[i][j][k][l][1]));
+ }
+ fprintf(f, "},\n");
+ }
+ fprintf(f, " },\n");
+ }
+ fprintf(f, " },\n");
+ }
+ fprintf(f, "};\n");
+
+ fprintf(f, "const vp9_prob\n"
+ "vp9_coef_update_probs_8x8[BLOCK_TYPES_8X8]\n"
+ " [COEF_BANDS]\n"
+ " [PREV_COEF_CONTEXTS]\n"
+ " [ENTROPY_NODES] = {\n");
+ for (i = 0; i < BLOCK_TYPES_8X8; i++) {
+ fprintf(f, " { \n");
+ for (j = 0; j < COEF_BANDS; j++) {
+ fprintf(f, " {\n");
+ for (k = 0; k < PREV_COEF_CONTEXTS; k++) {
+ fprintf(f, " {");
+ for (l = 0; l < MAX_ENTROPY_TOKENS - 1; l++) {
+ fprintf(f, "%3ld, ",
+ get_binary_prob(tree_update_hist_8x8[i][j][k][l][0],
+ tree_update_hist_8x8[i][j][k][l][1]));
+ }
+ fprintf(f, "},\n");
+ }
+ fprintf(f, " },\n");
+ }
+ fprintf(f, " },\n");
+ }
+
+ fprintf(f, "const vp9_prob\n"
+ "vp9_coef_update_probs_16x16[BLOCK_TYPES_16X16]\n"
+ " [COEF_BANDS]\n"
+ " [PREV_COEF_CONTEXTS]\n"
+ " [ENTROPY_NODES] = {\n");
+ for (i = 0; i < BLOCK_TYPES_16X16; i++) {
+ fprintf(f, " { \n");
+ for (j = 0; j < COEF_BANDS; j++) {
+ fprintf(f, " {\n");
+ for (k = 0; k < PREV_COEF_CONTEXTS; k++) {
+ fprintf(f, " {");
+ for (l = 0; l < MAX_ENTROPY_TOKENS - 1; l++) {
+ fprintf(f, "%3ld, ",
+ get_binary_prob(tree_update_hist_16x16[i][j][k][l][0],
+ tree_update_hist_16x16[i][j][k][l][1]));
+ }
+ fprintf(f, "},\n");
+ }
+ fprintf(f, " },\n");
+ }
+ fprintf(f, " },\n");
+ }
+
+ fclose(f);
+ f = fopen("treeupdate.bin", "wb");
+ fwrite(tree_update_hist, sizeof(tree_update_hist), 1, f);
+ fwrite(tree_update_hist_8x8, sizeof(tree_update_hist_8x8), 1, f);
+ fwrite(tree_update_hist_16x16, sizeof(tree_update_hist_16x16), 1, f);
+ fclose(f);
+}
+#endif
diff --git a/vp9/encoder/vp9_bitstream.h b/vp9/encoder/vp9_bitstream.h
new file mode 100644
index 0000000..5a63d6e
--- /dev/null
+++ b/vp9/encoder/vp9_bitstream.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_ENCODER_VP9_BITSTREAM_H_
+#define VP9_ENCODER_VP9_BITSTREAM_H_
+
+void vp9_update_skip_probs(VP9_COMP *cpi);
+
+#endif
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
new file mode 100644
index 0000000..d5bf89c
--- /dev/null
+++ b/vp9/encoder/vp9_block.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_ENCODER_VP9_BLOCK_H_
+#define VP9_ENCODER_VP9_BLOCK_H_
+
+#include "vp9/common/vp9_onyx.h"
+#include "vp9/common/vp9_entropymv.h"
+#include "vp9/common/vp9_entropy.h"
+#include "vpx_ports/mem.h"
+#include "vp9/common/vp9_onyxc_int.h"
+
+// motion search site
+typedef struct {
+ MV mv;
+ int offset;
+} search_site;
+
+typedef struct block {
+ // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
+ short *src_diff;
+ short *coeff;
+
+ // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
+ short *quant;
+ short *quant_fast; // fast quant deprecated for now
+ unsigned char *quant_shift;
+ short *zbin;
+ short *zbin_8x8;
+ short *zbin_16x16;
+ short *zrun_zbin_boost;
+ short *zrun_zbin_boost_8x8;
+ short *zrun_zbin_boost_16x16;
+ short *round;
+
+ // Zbin Over Quant value
+ short zbin_extra;
+
+ unsigned char **base_src;
+ unsigned char **base_second_src;
+ int src;
+ int src_stride;
+
+ int eob_max_offset;
+ int eob_max_offset_8x8;
+ int eob_max_offset_16x16;
+} BLOCK;
+
+typedef struct {
+ int count;
+ struct {
+ B_PREDICTION_MODE mode;
+ int_mv mv;
+ int_mv second_mv;
+ } bmi[16];
+} PARTITION_INFO;
+
+// Structure to hold snapshot of coding context during the mode picking process
+// TODO Do we need all of these?
+typedef struct {
+ MODE_INFO mic;
+ PARTITION_INFO partition_info;
+ int_mv best_ref_mv;
+ int_mv second_best_ref_mv;
+ int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REFS];
+ int rate;
+ int distortion;
+ int64_t intra_error;
+ int best_mode_index;
+ int rddiv;
+ int rdmult;
+ int hybrid_pred_diff;
+ int comp_pred_diff;
+ int single_pred_diff;
+ int64_t txfm_rd_diff[NB_TXFM_MODES];
+} PICK_MODE_CONTEXT;
+
+typedef struct macroblock {
+ DECLARE_ALIGNED(16, short, src_diff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
+ DECLARE_ALIGNED(16, short, coeff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
+#if !CONFIG_SUPERBLOCKS
+ DECLARE_ALIGNED(16, unsigned char, thismb[256]); // 16x16 Y
+
+ unsigned char *thismb_ptr;
+#endif
+ // 16 Y blocks, 4 U blocks, 4 V blocks,
+ // 1 DC 2nd order block each with 16 entries
+ BLOCK block[25];
+
+ YV12_BUFFER_CONFIG src;
+
+ MACROBLOCKD e_mbd;
+ PARTITION_INFO *partition_info; /* work pointer */
+ PARTITION_INFO *pi; /* Corresponds to upper left visible macroblock */
+ PARTITION_INFO *pip; /* Base of allocated array */
+
+ search_site *ss;
+ int ss_count;
+ int searches_per_step;
+
+ int errorperbit;
+ int sadperbit16;
+ int sadperbit4;
+ int rddiv;
+ int rdmult;
+ unsigned int *mb_activity_ptr;
+ int *mb_norm_activity_ptr;
+ signed int act_zbin_adj;
+
+ int nmvjointcost[MV_JOINTS];
+ int nmvcosts[2][MV_VALS];
+ int *nmvcost[2];
+ int nmvcosts_hp[2][MV_VALS];
+ int *nmvcost_hp[2];
+ int **mvcost;
+
+ int nmvjointsadcost[MV_JOINTS];
+ int nmvsadcosts[2][MV_VALS];
+ int *nmvsadcost[2];
+ int nmvsadcosts_hp[2][MV_VALS];
+ int *nmvsadcost_hp[2];
+ int **mvsadcost;
+
+ int mbmode_cost[2][MB_MODE_COUNT];
+ int intra_uv_mode_cost[2][MB_MODE_COUNT];
+ int bmode_costs[VP9_KF_BINTRAMODES][VP9_KF_BINTRAMODES][VP9_KF_BINTRAMODES];
+ int i8x8_mode_costs[MB_MODE_COUNT];
+ int inter_bmode_costs[B_MODE_COUNT];
+ int switchable_interp_costs[VP9_SWITCHABLE_FILTERS + 1]
+ [VP9_SWITCHABLE_FILTERS];
+
+ // These define limits to motion vector components to prevent them
+ // from extending outside the UMV borders
+ int mv_col_min;
+ int mv_col_max;
+ int mv_row_min;
+ int mv_row_max;
+
+ int skip;
+
+ int encode_breakout;
+
+ // char * gf_active_ptr;
+ signed char *gf_active_ptr;
+
+ unsigned char *active_ptr;
+
+ unsigned int token_costs[TX_SIZE_MAX][BLOCK_TYPES][COEF_BANDS]
+ [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
+ unsigned int hybrid_token_costs[TX_SIZE_MAX][BLOCK_TYPES][COEF_BANDS]
+ [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
+
+ int optimize;
+
+ // Structure to hold context for each of the 4 MBs within a SB:
+ // when encoded as 4 independent MBs:
+ PICK_MODE_CONTEXT mb_context[4];
+#if CONFIG_SUPERBLOCKS
+ // when 4 MBs share coding parameters:
+ PICK_MODE_CONTEXT sb_context[4];
+#endif
+
+ void (*vp9_short_fdct4x4)(short *input, short *output, int pitch);
+ void (*vp9_short_fdct8x4)(short *input, short *output, int pitch);
+ void (*short_walsh4x4)(short *input, short *output, int pitch);
+ void (*quantize_b_4x4)(BLOCK *b, BLOCKD *d);
+ void (*quantize_b_4x4_pair)(BLOCK *b1, BLOCK *b2, BLOCKD *d0, BLOCKD *d1);
+ void (*vp9_short_fdct8x8)(short *input, short *output, int pitch);
+ void (*vp9_short_fdct16x16)(short *input, short *output, int pitch);
+ void (*short_fhaar2x2)(short *input, short *output, int pitch);
+ void (*quantize_b_16x16)(BLOCK *b, BLOCKD *d);
+ void (*quantize_b_8x8)(BLOCK *b, BLOCKD *d);
+ void (*quantize_b_2x2)(BLOCK *b, BLOCKD *d);
+
+} MACROBLOCK;
+
+
+#endif
diff --git a/vp9/encoder/vp9_boolhuff.c b/vp9/encoder/vp9_boolhuff.c
new file mode 100644
index 0000000..d1b1e0e
--- /dev/null
+++ b/vp9/encoder/vp9_boolhuff.c
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include "vp9/encoder/vp9_boolhuff.h"
+
+#if defined(SECTIONBITS_OUTPUT)
+unsigned __int64 Sectionbits[500];
+
+#endif
+
+#ifdef ENTROPY_STATS
+unsigned int active_section = 0;
+#endif
+
+const unsigned int vp9_prob_cost[256] = {
+ 2047, 2047, 1791, 1641, 1535, 1452, 1385, 1328, 1279, 1235, 1196, 1161, 1129, 1099, 1072, 1046,
+ 1023, 1000, 979, 959, 940, 922, 905, 889, 873, 858, 843, 829, 816, 803, 790, 778,
+ 767, 755, 744, 733, 723, 713, 703, 693, 684, 675, 666, 657, 649, 641, 633, 625,
+ 617, 609, 602, 594, 587, 580, 573, 567, 560, 553, 547, 541, 534, 528, 522, 516,
+ 511, 505, 499, 494, 488, 483, 477, 472, 467, 462, 457, 452, 447, 442, 437, 433,
+ 428, 424, 419, 415, 410, 406, 401, 397, 393, 389, 385, 381, 377, 373, 369, 365,
+ 361, 357, 353, 349, 346, 342, 338, 335, 331, 328, 324, 321, 317, 314, 311, 307,
+ 304, 301, 297, 294, 291, 288, 285, 281, 278, 275, 272, 269, 266, 263, 260, 257,
+ 255, 252, 249, 246, 243, 240, 238, 235, 232, 229, 227, 224, 221, 219, 216, 214,
+ 211, 208, 206, 203, 201, 198, 196, 194, 191, 189, 186, 184, 181, 179, 177, 174,
+ 172, 170, 168, 165, 163, 161, 159, 156, 154, 152, 150, 148, 145, 143, 141, 139,
+ 137, 135, 133, 131, 129, 127, 125, 123, 121, 119, 117, 115, 113, 111, 109, 107,
+ 105, 103, 101, 99, 97, 95, 93, 92, 90, 88, 86, 84, 82, 81, 79, 77,
+ 75, 73, 72, 70, 68, 66, 65, 63, 61, 60, 58, 56, 55, 53, 51, 50,
+ 48, 46, 45, 43, 41, 40, 38, 37, 35, 33, 32, 30, 29, 27, 25, 24,
+ 22, 21, 19, 18, 16, 15, 13, 12, 10, 9, 7, 6, 4, 3, 1, 1
+};
+
+void vp9_start_encode(BOOL_CODER *br, unsigned char *source) {
+
+ br->lowvalue = 0;
+ br->range = 255;
+ br->value = 0;
+ br->count = -24;
+ br->buffer = source;
+ br->pos = 0;
+}
+
+void vp9_stop_encode(BOOL_CODER *br) {
+ int i;
+
+ for (i = 0; i < 32; i++)
+ encode_bool(br, 0, 128);
+}
+
+
+void vp9_encode_value(BOOL_CODER *br, int data, int bits) {
+ int bit;
+
+ for (bit = bits - 1; bit >= 0; bit--)
+ encode_bool(br, (1 & (data >> bit)), 0x80);
+}
+
+void vp9_encode_unsigned_max(BOOL_CODER *br, int data, int max) {
+ assert(data <= max);
+ while (max) {
+ encode_bool(br, data & 1, 128);
+ data >>= 1;
+ max >>= 1;
+ }
+}
+
+int vp9_recenter_nonneg(int v, int m) {
+ if (v > (m << 1)) return v;
+ else if (v >= m) return ((v - m) << 1);
+ else return ((m - v) << 1) - 1;
+}
+
+static int get_unsigned_bits(unsigned num_values) {
+ int cat = 0;
+ if ((num_values--) <= 1) return 0;
+ while (num_values > 0) {
+ cat++;
+ num_values >>= 1;
+ }
+ return cat;
+}
+
+void vp9_encode_uniform(BOOL_CODER *br, int v, int n) {
+ int l = get_unsigned_bits(n);
+ int m;
+ if (l == 0) return;
+ m = (1 << l) - n;
+ if (v < m)
+ vp9_encode_value(br, v, l - 1);
+ else {
+ vp9_encode_value(br, m + ((v - m) >> 1), l - 1);
+ vp9_encode_value(br, (v - m) & 1, 1);
+ }
+}
+
+int vp9_count_uniform(int v, int n) {
+ int l = get_unsigned_bits(n);
+ int m;
+ if (l == 0) return 0;
+ m = (1 << l) - n;
+ if (v < m)
+ return l - 1;
+ else
+ return l;
+}
+
+void vp9_encode_term_subexp(BOOL_CODER *br, int word, int k, int num_syms) {
+ int i = 0;
+ int mk = 0;
+ while (1) {
+ int b = (i ? k + i - 1 : k);
+ int a = (1 << b);
+ if (num_syms <= mk + 3 * a) {
+ vp9_encode_uniform(br, word - mk, num_syms - mk);
+ break;
+ } else {
+ int t = (word >= mk + a);
+ vp9_encode_value(br, t, 1);
+ if (t) {
+ i = i + 1;
+ mk += a;
+ } else {
+ vp9_encode_value(br, word - mk, b);
+ break;
+ }
+ }
+ }
+}
+
+int vp9_count_term_subexp(int word, int k, int num_syms) {
+ int count = 0;
+ int i = 0;
+ int mk = 0;
+ while (1) {
+ int b = (i ? k + i - 1 : k);
+ int a = (1 << b);
+ if (num_syms <= mk + 3 * a) {
+ count += vp9_count_uniform(word - mk, num_syms - mk);
+ break;
+ } else {
+ int t = (word >= mk + a);
+ count++;
+ if (t) {
+ i = i + 1;
+ mk += a;
+ } else {
+ count += b;
+ break;
+ }
+ }
+ }
+ return count;
+}
diff --git a/vp9/encoder/vp9_boolhuff.h b/vp9/encoder/vp9_boolhuff.h
new file mode 100644
index 0000000..0d42ecf
--- /dev/null
+++ b/vp9/encoder/vp9_boolhuff.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/****************************************************************************
+*
+* Module Title : vp9_boolhuff.h
+*
+* Description : Bool Coder header file.
+*
+****************************************************************************/
+#ifndef VP9_ENCODER_VP9_BOOLHUFF_H_
+#define VP9_ENCODER_VP9_BOOLHUFF_H_
+
+#include "vpx_ports/mem.h"
+
+typedef struct {
+ unsigned int lowvalue;
+ unsigned int range;
+ unsigned int value;
+ int count;
+ unsigned int pos;
+ unsigned char *buffer;
+
+ // Variables used to track bit costs without outputing to the bitstream
+ unsigned int measure_cost;
+ unsigned long bit_counter;
+} BOOL_CODER;
+
+extern void vp9_start_encode(BOOL_CODER *bc, unsigned char *buffer);
+
+extern void vp9_encode_value(BOOL_CODER *br, int data, int bits);
+extern void vp9_encode_unsigned_max(BOOL_CODER *br, int data, int max);
+extern void vp9_stop_encode(BOOL_CODER *bc);
+extern const unsigned int vp9_prob_cost[256];
+
+extern void vp9_encode_uniform(BOOL_CODER *bc, int v, int n);
+extern void vp9_encode_term_subexp(BOOL_CODER *bc, int v, int k, int n);
+extern int vp9_count_uniform(int v, int n);
+extern int vp9_count_term_subexp(int v, int k, int n);
+extern int vp9_recenter_nonneg(int v, int m);
+
+DECLARE_ALIGNED(16, extern const unsigned char, vp9_norm[256]);
+
+
+static void encode_bool(BOOL_CODER *br, int bit, int probability) {
+ unsigned int split;
+ int count = br->count;
+ unsigned int range = br->range;
+ unsigned int lowvalue = br->lowvalue;
+ register unsigned int shift;
+
+#ifdef ENTROPY_STATS
+#if defined(SECTIONBITS_OUTPUT)
+
+ if (bit)
+ Sectionbits[active_section] += vp9_prob_cost[255 - probability];
+ else
+ Sectionbits[active_section] += vp9_prob_cost[probability];
+
+#endif
+#endif
+
+ split = 1 + (((range - 1) * probability) >> 8);
+
+ range = split;
+
+ if (bit) {
+ lowvalue += split;
+ range = br->range - split;
+ }
+
+ shift = vp9_norm[range];
+
+ range <<= shift;
+ count += shift;
+
+ if (count >= 0) {
+ int offset = shift - count;
+
+ if ((lowvalue << (offset - 1)) & 0x80000000) {
+ int x = br->pos - 1;
+
+ while (x >= 0 && br->buffer[x] == 0xff) {
+ br->buffer[x] = (unsigned char)0;
+ x--;
+ }
+
+ br->buffer[x] += 1;
+ }
+
+ br->buffer[br->pos++] = (lowvalue >> (24 - offset));
+ lowvalue <<= offset;
+ shift = count;
+ lowvalue &= 0xffffff;
+ count -= 8;
+ }
+
+ lowvalue <<= shift;
+ br->count = count;
+ br->lowvalue = lowvalue;
+ br->range = range;
+}
+
+#endif
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
new file mode 100644
index 0000000..38df239
--- /dev/null
+++ b/vp9/encoder/vp9_dct.c
@@ -0,0 +1,1332 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <assert.h>
+#include <math.h>
+#include "./vpx_config.h"
+#include "vp9/common/vp9_systemdependent.h"
+
+#include "vp9/common/vp9_blockd.h"
+
+// TODO: these transforms can be converted into integer forms to reduce
+// the complexity
+static const float dct_4[16] = {
+ 0.500000000000000, 0.500000000000000, 0.500000000000000, 0.500000000000000,
+ 0.653281482438188, 0.270598050073099, -0.270598050073099, -0.653281482438188,
+ 0.500000000000000, -0.500000000000000, -0.500000000000000, 0.500000000000000,
+ 0.270598050073099, -0.653281482438188, 0.653281482438188, -0.270598050073099
+};
+
+static const float adst_4[16] = {
+ 0.228013428883779, 0.428525073124360, 0.577350269189626, 0.656538502008139,
+ 0.577350269189626, 0.577350269189626, 0.000000000000000, -0.577350269189626,
+ 0.656538502008139, -0.228013428883779, -0.577350269189626, 0.428525073124359,
+ 0.428525073124360, -0.656538502008139, 0.577350269189626, -0.228013428883779
+};
+
+static const float dct_8[64] = {
+ 0.353553390593274, 0.353553390593274, 0.353553390593274, 0.353553390593274,
+ 0.353553390593274, 0.353553390593274, 0.353553390593274, 0.353553390593274,
+ 0.490392640201615, 0.415734806151273, 0.277785116509801, 0.097545161008064,
+ -0.097545161008064, -0.277785116509801, -0.415734806151273, -0.490392640201615,
+ 0.461939766255643, 0.191341716182545, -0.191341716182545, -0.461939766255643,
+ -0.461939766255643, -0.191341716182545, 0.191341716182545, 0.461939766255643,
+ 0.415734806151273, -0.097545161008064, -0.490392640201615, -0.277785116509801,
+ 0.277785116509801, 0.490392640201615, 0.097545161008064, -0.415734806151273,
+ 0.353553390593274, -0.353553390593274, -0.353553390593274, 0.353553390593274,
+ 0.353553390593274, -0.353553390593274, -0.353553390593274, 0.353553390593274,
+ 0.277785116509801, -0.490392640201615, 0.097545161008064, 0.415734806151273,
+ -0.415734806151273, -0.097545161008064, 0.490392640201615, -0.277785116509801,
+ 0.191341716182545, -0.461939766255643, 0.461939766255643, -0.191341716182545,
+ -0.191341716182545, 0.461939766255643, -0.461939766255643, 0.191341716182545,
+ 0.097545161008064, -0.277785116509801, 0.415734806151273, -0.490392640201615,
+ 0.490392640201615, -0.415734806151273, 0.277785116509801, -0.097545161008064
+};
+
+static const float adst_8[64] = {
+ 0.089131608307533, 0.175227946595735, 0.255357107325376, 0.326790388032145,
+ 0.387095214016349, 0.434217976756762, 0.466553967085785, 0.483002021635509,
+ 0.255357107325376, 0.434217976756762, 0.483002021635509, 0.387095214016349,
+ 0.175227946595735, -0.089131608307533, -0.326790388032145, -0.466553967085785,
+ 0.387095214016349, 0.466553967085785, 0.175227946595735, -0.255357107325376,
+ -0.483002021635509, -0.326790388032145, 0.089131608307533, 0.434217976756762,
+ 0.466553967085785, 0.255357107325376, -0.326790388032145, -0.434217976756762,
+ 0.089131608307533, 0.483002021635509, 0.175227946595735, -0.387095214016348,
+ 0.483002021635509, -0.089131608307533, -0.466553967085785, 0.175227946595735,
+ 0.434217976756762, -0.255357107325376, -0.387095214016348, 0.326790388032145,
+ 0.434217976756762, -0.387095214016348, -0.089131608307533, 0.466553967085786,
+ -0.326790388032145, -0.175227946595735, 0.483002021635509, -0.255357107325375,
+ 0.326790388032145, -0.483002021635509, 0.387095214016349, -0.089131608307534,
+ -0.255357107325377, 0.466553967085785, -0.434217976756762, 0.175227946595736,
+ 0.175227946595735, -0.326790388032145, 0.434217976756762, -0.483002021635509,
+ 0.466553967085785, -0.387095214016348, 0.255357107325376, -0.089131608307532
+};
+
+/* Converted the transforms to integers. */
+static const int16_t dct_i4[16] = {
+ 16384, 16384, 16384, 16384,
+ 21407, 8867, -8867, -21407,
+ 16384, -16384, -16384, 16384,
+ 8867, -21407, 21407, -8867
+};
+
+static const int16_t adst_i4[16] = {
+ 7472, 14042, 18919, 21513,
+ 18919, 18919, 0, -18919,
+ 21513, -7472, -18919, 14042,
+ 14042, -21513, 18919, -7472
+};
+
+static const int16_t dct_i8[64] = {
+ 11585, 11585, 11585, 11585,
+ 11585, 11585, 11585, 11585,
+ 16069, 13623, 9102, 3196,
+ -3196, -9102, -13623, -16069,
+ 15137, 6270, -6270, -15137,
+ -15137, -6270, 6270, 15137,
+ 13623, -3196, -16069, -9102,
+ 9102, 16069, 3196, -13623,
+ 11585, -11585, -11585, 11585,
+ 11585, -11585, -11585, 11585,
+ 9102, -16069, 3196, 13623,
+ -13623, -3196, 16069, -9102,
+ 6270, -15137, 15137, -6270,
+ -6270, 15137, -15137, 6270,
+ 3196, -9102, 13623, -16069,
+ 16069, -13623, 9102, -3196
+};
+
+static const int16_t adst_i8[64] = {
+ 2921, 5742, 8368, 10708,
+ 12684, 14228, 15288, 15827,
+ 8368, 14228, 15827, 12684,
+ 5742, -2921, -10708, -15288,
+ 12684, 15288, 5742, -8368,
+ -15827, -10708, 2921, 14228,
+ 15288, 8368, -10708, -14228,
+ 2921, 15827, 5742, -12684,
+ 15827, -2921, -15288, 5742,
+ 14228, -8368, -12684, 10708,
+ 14228, -12684, -2921, 15288,
+ -10708, -5742, 15827, -8368,
+ 10708, -15827, 12684, -2921,
+ -8368, 15288, -14228, 5742,
+ 5742, -10708, 14228, -15827,
+ 15288, -12684, 8368, -2921
+};
+
+static const float dct_16[256] = {
+ 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000,
+ 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000,
+ 0.351851, 0.338330, 0.311806, 0.273300, 0.224292, 0.166664, 0.102631, 0.034654,
+ -0.034654, -0.102631, -0.166664, -0.224292, -0.273300, -0.311806, -0.338330, -0.351851,
+ 0.346760, 0.293969, 0.196424, 0.068975, -0.068975, -0.196424, -0.293969, -0.346760,
+ -0.346760, -0.293969, -0.196424, -0.068975, 0.068975, 0.196424, 0.293969, 0.346760,
+ 0.338330, 0.224292, 0.034654, -0.166664, -0.311806, -0.351851, -0.273300, -0.102631,
+ 0.102631, 0.273300, 0.351851, 0.311806, 0.166664, -0.034654, -0.224292, -0.338330,
+ 0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641,
+ 0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641,
+ 0.311806, 0.034654, -0.273300, -0.338330, -0.102631, 0.224292, 0.351851, 0.166664,
+ -0.166664, -0.351851, -0.224292, 0.102631, 0.338330, 0.273300, -0.034654, -0.311806,
+ 0.293969, -0.068975, -0.346760, -0.196424, 0.196424, 0.346760, 0.068975, -0.293969,
+ -0.293969, 0.068975, 0.346760, 0.196424, -0.196424, -0.346760, -0.068975, 0.293969,
+ 0.273300, -0.166664, -0.338330, 0.034654, 0.351851, 0.102631, -0.311806, -0.224292,
+ 0.224292, 0.311806, -0.102631, -0.351851, -0.034654, 0.338330, 0.166664, -0.273300,
+ 0.250000, -0.250000, -0.250000, 0.250000, 0.250000, -0.250000, -0.250000, 0.250000,
+ 0.250000, -0.250000, -0.250000, 0.250000, 0.250000, -0.250000, -0.250000, 0.250000,
+ 0.224292, -0.311806, -0.102631, 0.351851, -0.034654, -0.338330, 0.166664, 0.273300,
+ -0.273300, -0.166664, 0.338330, 0.034654, -0.351851, 0.102631, 0.311806, -0.224292,
+ 0.196424, -0.346760, 0.068975, 0.293969, -0.293969, -0.068975, 0.346760, -0.196424,
+ -0.196424, 0.346760, -0.068975, -0.293969, 0.293969, 0.068975, -0.346760, 0.196424,
+ 0.166664, -0.351851, 0.224292, 0.102631, -0.338330, 0.273300, 0.034654, -0.311806,
+ 0.311806, -0.034654, -0.273300, 0.338330, -0.102631, -0.224292, 0.351851, -0.166664,
+ 0.135299, -0.326641, 0.326641, -0.135299, -0.135299, 0.326641, -0.326641, 0.135299,
+ 0.135299, -0.326641, 0.326641, -0.135299, -0.135299, 0.326641, -0.326641, 0.135299,
+ 0.102631, -0.273300, 0.351851, -0.311806, 0.166664, 0.034654, -0.224292, 0.338330,
+ -0.338330, 0.224292, -0.034654, -0.166664, 0.311806, -0.351851, 0.273300, -0.102631,
+ 0.068975, -0.196424, 0.293969, -0.346760, 0.346760, -0.293969, 0.196424, -0.068975,
+ -0.068975, 0.196424, -0.293969, 0.346760, -0.346760, 0.293969, -0.196424, 0.068975,
+ 0.034654, -0.102631, 0.166664, -0.224292, 0.273300, -0.311806, 0.338330, -0.351851,
+ 0.351851, -0.338330, 0.311806, -0.273300, 0.224292, -0.166664, 0.102631, -0.034654
+};
+
+static const float adst_16[256] = {
+ 0.033094, 0.065889, 0.098087, 0.129396, 0.159534, 0.188227, 0.215215, 0.240255,
+ 0.263118, 0.283599, 0.301511, 0.316693, 0.329007, 0.338341, 0.344612, 0.347761,
+ 0.098087, 0.188227, 0.263118, 0.316693, 0.344612, 0.344612, 0.316693, 0.263118,
+ 0.188227, 0.098087, 0.000000, -0.098087, -0.188227, -0.263118, -0.316693, -0.344612,
+ 0.159534, 0.283599, 0.344612, 0.329007, 0.240255, 0.098087, -0.065889, -0.215215,
+ -0.316693, -0.347761, -0.301511, -0.188227, -0.033094, 0.129396, 0.263118, 0.338341,
+ 0.215215, 0.338341, 0.316693, 0.159534, -0.065889, -0.263118, -0.347761, -0.283599,
+ -0.098087, 0.129396, 0.301511, 0.344612, 0.240255, 0.033094, -0.188227, -0.329007,
+ 0.263118, 0.344612, 0.188227, -0.098087, -0.316693, -0.316693, -0.098087, 0.188227,
+ 0.344612, 0.263118, 0.000000, -0.263118, -0.344612, -0.188227, 0.098087, 0.316693,
+ 0.301511, 0.301511, 0.000000, -0.301511, -0.301511, -0.000000, 0.301511, 0.301511,
+ 0.000000, -0.301511, -0.301511, -0.000000, 0.301511, 0.301511, 0.000000, -0.301511,
+ 0.329007, 0.215215, -0.188227, -0.338341, -0.033094, 0.316693, 0.240255, -0.159534,
+ -0.344612, -0.065889, 0.301511, 0.263118, -0.129396, -0.347761, -0.098087, 0.283599,
+ 0.344612, 0.098087, -0.316693, -0.188227, 0.263118, 0.263118, -0.188227, -0.316693,
+ 0.098087, 0.344612, 0.000000, -0.344612, -0.098087, 0.316693, 0.188227, -0.263118,
+ 0.347761, -0.033094, -0.344612, 0.065889, 0.338341, -0.098087, -0.329007, 0.129396,
+ 0.316693, -0.159534, -0.301511, 0.188227, 0.283599, -0.215215, -0.263118, 0.240255,
+ 0.338341, -0.159534, -0.263118, 0.283599, 0.129396, -0.344612, 0.033094, 0.329007,
+ -0.188227, -0.240255, 0.301511, 0.098087, -0.347761, 0.065889, 0.316693, -0.215215,
+ 0.316693, -0.263118, -0.098087, 0.344612, -0.188227, -0.188227, 0.344612, -0.098087,
+ -0.263118, 0.316693, 0.000000, -0.316693, 0.263118, 0.098087, -0.344612, 0.188227,
+ 0.283599, -0.329007, 0.098087, 0.215215, -0.347761, 0.188227, 0.129396, -0.338341,
+ 0.263118, 0.033094, -0.301511, 0.316693, -0.065889, -0.240255, 0.344612, -0.159534,
+ 0.240255, -0.347761, 0.263118, -0.033094, -0.215215, 0.344612, -0.283599, 0.065889,
+ 0.188227, -0.338341, 0.301511, -0.098087, -0.159534, 0.329007, -0.316693, 0.129396,
+ 0.188227, -0.316693, 0.344612, -0.263118, 0.098087, 0.098087, -0.263118, 0.344612,
+ -0.316693, 0.188227, 0.000000, -0.188227, 0.316693, -0.344612, 0.263118, -0.098087,
+ 0.129396, -0.240255, 0.316693, -0.347761, 0.329007, -0.263118, 0.159534, -0.033094,
+ -0.098087, 0.215215, -0.301511, 0.344612, -0.338341, 0.283599, -0.188227, 0.065889,
+ 0.065889, -0.129396, 0.188227, -0.240255, 0.283599, -0.316693, 0.338341, -0.347761,
+ 0.344612, -0.329007, 0.301511, -0.263118, 0.215215, -0.159534, 0.098087, -0.033094
+};
+
+/* Converted the transforms to integers. */
+static const int16_t dct_i16[256] = {
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 11529, 11086, 10217, 8955, 7350, 5461, 3363, 1136,
+ -1136, -3363, -5461, -7350, -8955, -10217, -11086, -11529,
+ 11363, 9633, 6436, 2260, -2260, -6436, -9633, -11363,
+ -11363, -9633, -6436, -2260, 2260, 6436, 9633, 11363,
+ 11086, 7350, 1136, -5461, -10217, -11529, -8955, -3363,
+ 3363, 8955, 11529, 10217, 5461, -1136, -7350, -11086,
+ 10703, 4433, -4433, -10703, -10703, -4433, 4433, 10703,
+ 10703, 4433, -4433, -10703, -10703, -4433, 4433, 10703,
+ 10217, 1136, -8955, -11086, -3363, 7350, 11529, 5461,
+ -5461, -11529, -7350, 3363, 11086, 8955, -1136, -10217,
+ 9633, -2260, -11363, -6436, 6436, 11363, 2260, -9633,
+ -9633, 2260, 11363, 6436, -6436, -11363, -2260, 9633,
+ 8955, -5461, -11086, 1136, 11529, 3363, -10217, -7350,
+ 7350, 10217, -3363, -11529, -1136, 11086, 5461, -8955,
+ 8192, -8192, -8192, 8192, 8192, -8192, -8192, 8192,
+ 8192, -8192, -8192, 8192, 8192, -8192, -8192, 8192,
+ 7350, -10217, -3363, 11529, -1136, -11086, 5461, 8955,
+ -8955, -5461, 11086, 1136, -11529, 3363, 10217, -7350,
+ 6436, -11363, 2260, 9633, -9633, -2260, 11363, -6436,
+ -6436, 11363, -2260, -9633, 9633, 2260, -11363, 6436,
+ 5461, -11529, 7350, 3363, -11086, 8955, 1136, -10217,
+ 10217, -1136, -8955, 11086, -3363, -7350, 11529, -5461,
+ 4433, -10703, 10703, -4433, -4433, 10703, -10703, 4433,
+ 4433, -10703, 10703, -4433, -4433, 10703, -10703, 4433,
+ 3363, -8955, 11529, -10217, 5461, 1136, -7350, 11086,
+ -11086, 7350, -1136, -5461, 10217, -11529, 8955, -3363,
+ 2260, -6436, 9633, -11363, 11363, -9633, 6436, -2260,
+ -2260, 6436, -9633, 11363, -11363, 9633, -6436, 2260,
+ 1136, -3363, 5461, -7350, 8955, -10217, 11086, -11529,
+ 11529, -11086, 10217, -8955, 7350, -5461, 3363, -1136
+};
+
+static const int16_t adst_i16[256] = {
+ 1084, 2159, 3214, 4240, 5228, 6168, 7052, 7873,
+ 8622, 9293, 9880, 10377, 10781, 11087, 11292, 11395,
+ 3214, 6168, 8622, 10377, 11292, 11292, 10377, 8622,
+ 6168, 3214, 0, -3214, -6168, -8622, -10377, -11292,
+ 5228, 9293, 11292, 10781, 7873, 3214, -2159, -7052,
+ -10377, -11395, -9880, -6168, -1084, 4240, 8622, 11087,
+ 7052, 11087, 10377, 5228, -2159, -8622, -11395, -9293,
+ -3214, 4240, 9880, 11292, 7873, 1084, -6168, -10781,
+ 8622, 11292, 6168, -3214, -10377, -10377, -3214, 6168,
+ 11292, 8622, 0, -8622, -11292, -6168, 3214, 10377,
+ 9880, 9880, 0, -9880, -9880, 0, 9880, 9880,
+ 0, -9880, -9880, 0, 9880, 9880, 0, -9880,
+ 10781, 7052, -6168, -11087, -1084, 10377, 7873, -5228,
+ -11292, -2159, 9880, 8622, -4240, -11395, -3214, 9293,
+ 11292, 3214, -10377, -6168, 8622, 8622, -6168, -10377,
+ 3214, 11292, 0, -11292, -3214, 10377, 6168, -8622,
+ 11395, -1084, -11292, 2159, 11087, -3214, -10781, 4240,
+ 10377, -5228, -9880, 6168, 9293, -7052, -8622, 7873,
+ 11087, -5228, -8622, 9293, 4240, -11292, 1084, 10781,
+ -6168, -7873, 9880, 3214, -11395, 2159, 10377, -7052,
+ 10377, -8622, -3214, 11292, -6168, -6168, 11292, -3214,
+ -8622, 10377, 0, -10377, 8622, 3214, -11292, 6168,
+ 9293, -10781, 3214, 7052, -11395, 6168, 4240, -11087,
+ 8622, 1084, -9880, 10377, -2159, -7873, 11292, -5228,
+ 7873, -11395, 8622, -1084, -7052, 11292, -9293, 2159,
+ 6168, -11087, 9880, -3214, -5228, 10781, -10377, 4240,
+ 6168, -10377, 11292, -8622, 3214, 3214, -8622, 11292,
+ -10377, 6168, 0, -6168, 10377, -11292, 8622, -3214,
+ 4240, -7873, 10377, -11395, 10781, -8622, 5228, -1084,
+ -3214, 7052, -9880, 11292, -11087, 9293, -6168, 2159,
+ 2159, -4240, 6168, -7873, 9293, -10377, 11087, -11395,
+ 11292, -10781, 9880, -8622, 7052, -5228, 3214, -1084
+};
+
+static const int xC1S7 = 16069;
+static const int xC2S6 = 15137;
+static const int xC3S5 = 13623;
+static const int xC4S4 = 11585;
+static const int xC5S3 = 9102;
+static const int xC6S2 = 6270;
+static const int xC7S1 = 3196;
+
+#define SHIFT_BITS 14
+#define DOROUND(X) X += (1<<(SHIFT_BITS-1));
+
+#define FINAL_SHIFT 3
+#define FINAL_ROUNDING (1<<(FINAL_SHIFT -1))
+#define IN_SHIFT (FINAL_SHIFT+1)
+
+
+void vp9_short_fdct8x8_c(short *InputData, short *OutputData, int pitch) {
+ int loop;
+ int short_pitch = pitch >> 1;
+ int is07, is12, is34, is56;
+ int is0734, is1256;
+ int id07, id12, id34, id56;
+ int irot_input_x, irot_input_y;
+ int icommon_product1; // Re-used product (c4s4 * (s12 - s56))
+ int icommon_product2; // Re-used product (c4s4 * (d12 + d56))
+ int temp1, temp2; // intermediate variable for computation
+
+ int InterData[64];
+ int *ip = InterData;
+ short *op = OutputData;
+
+ for (loop = 0; loop < 8; loop++) {
+ // Pre calculate some common sums and differences.
+ is07 = (InputData[0] + InputData[7]) << IN_SHIFT;
+ is12 = (InputData[1] + InputData[2]) << IN_SHIFT;
+ is34 = (InputData[3] + InputData[4]) << IN_SHIFT;
+ is56 = (InputData[5] + InputData[6]) << IN_SHIFT;
+ id07 = (InputData[0] - InputData[7]) << IN_SHIFT;
+ id12 = (InputData[1] - InputData[2]) << IN_SHIFT;
+ id34 = (InputData[3] - InputData[4]) << IN_SHIFT;
+ id56 = (InputData[5] - InputData[6]) << IN_SHIFT;
+
+ is0734 = is07 + is34;
+ is1256 = is12 + is56;
+
+ // Pre-Calculate some common product terms.
+ icommon_product1 = xC4S4 * (is12 - is56);
+ DOROUND(icommon_product1)
+ icommon_product1 >>= SHIFT_BITS;
+
+ icommon_product2 = xC4S4 * (id12 + id56);
+ DOROUND(icommon_product2)
+ icommon_product2 >>= SHIFT_BITS;
+
+
+ ip[0] = (xC4S4 * (is0734 + is1256));
+ DOROUND(ip[0]);
+ ip[0] >>= SHIFT_BITS;
+
+ ip[4] = (xC4S4 * (is0734 - is1256));
+ DOROUND(ip[4]);
+ ip[4] >>= SHIFT_BITS;
+
+ // Define inputs to rotation for outputs 2 and 6
+ irot_input_x = id12 - id56;
+ irot_input_y = is07 - is34;
+
+ // Apply rotation for outputs 2 and 6.
+ temp1 = xC6S2 * irot_input_x;
+ DOROUND(temp1);
+ temp1 >>= SHIFT_BITS;
+ temp2 = xC2S6 * irot_input_y;
+ DOROUND(temp2);
+ temp2 >>= SHIFT_BITS;
+ ip[2] = temp1 + temp2;
+
+ temp1 = xC6S2 * irot_input_y;
+ DOROUND(temp1);
+ temp1 >>= SHIFT_BITS;
+ temp2 = xC2S6 * irot_input_x;
+ DOROUND(temp2);
+ temp2 >>= SHIFT_BITS;
+ ip[6] = temp1 - temp2;
+
+ // Define inputs to rotation for outputs 1 and 7
+ irot_input_x = icommon_product1 + id07;
+ irot_input_y = -(id34 + icommon_product2);
+
+ // Apply rotation for outputs 1 and 7.
+ temp1 = xC1S7 * irot_input_x;
+ DOROUND(temp1);
+ temp1 >>= SHIFT_BITS;
+ temp2 = xC7S1 * irot_input_y;
+ DOROUND(temp2);
+ temp2 >>= SHIFT_BITS;
+ ip[1] = temp1 - temp2;
+
+ temp1 = xC7S1 * irot_input_x;
+ DOROUND(temp1);
+ temp1 >>= SHIFT_BITS;
+ temp2 = xC1S7 * irot_input_y;
+ DOROUND(temp2);
+ temp2 >>= SHIFT_BITS;
+ ip[7] = temp1 + temp2;
+
+ // Define inputs to rotation for outputs 3 and 5
+ irot_input_x = id07 - icommon_product1;
+ irot_input_y = id34 - icommon_product2;
+
+ // Apply rotation for outputs 3 and 5.
+ temp1 = xC3S5 * irot_input_x;
+ DOROUND(temp1);
+ temp1 >>= SHIFT_BITS;
+ temp2 = xC5S3 * irot_input_y;
+ DOROUND(temp2);
+ temp2 >>= SHIFT_BITS;
+ ip[3] = temp1 - temp2;
+
+
+ temp1 = xC5S3 * irot_input_x;
+ DOROUND(temp1);
+ temp1 >>= SHIFT_BITS;
+ temp2 = xC3S5 * irot_input_y;
+ DOROUND(temp2);
+ temp2 >>= SHIFT_BITS;
+ ip[5] = temp1 + temp2;
+
+ // Increment data pointer for next row
+ InputData += short_pitch;
+ ip += 8;
+ }
+
+ // Performed DCT on rows, now transform the columns
+ ip = InterData;
+ for (loop = 0; loop < 8; loop++) {
+ // Pre calculate some common sums and differences.
+ is07 = ip[0 * 8] + ip[7 * 8];
+ is12 = ip[1 * 8] + ip[2 * 8];
+ is34 = ip[3 * 8] + ip[4 * 8];
+ is56 = ip[5 * 8] + ip[6 * 8];
+
+ id07 = ip[0 * 8] - ip[7 * 8];
+ id12 = ip[1 * 8] - ip[2 * 8];
+ id34 = ip[3 * 8] - ip[4 * 8];
+ id56 = ip[5 * 8] - ip[6 * 8];
+
+ is0734 = is07 + is34;
+ is1256 = is12 + is56;
+
+ // Pre-Calculate some common product terms
+ icommon_product1 = xC4S4 * (is12 - is56);
+ icommon_product2 = xC4S4 * (id12 + id56);
+ DOROUND(icommon_product1)
+ DOROUND(icommon_product2)
+ icommon_product1 >>= SHIFT_BITS;
+ icommon_product2 >>= SHIFT_BITS;
+
+
+ temp1 = xC4S4 * (is0734 + is1256);
+ temp2 = xC4S4 * (is0734 - is1256);
+ DOROUND(temp1);
+ DOROUND(temp2);
+ temp1 >>= SHIFT_BITS;
+
+ temp2 >>= SHIFT_BITS;
+ op[0 * 8] = (temp1 + FINAL_ROUNDING) >> FINAL_SHIFT;
+ op[4 * 8] = (temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
+
+ // Define inputs to rotation for outputs 2 and 6
+ irot_input_x = id12 - id56;
+ irot_input_y = is07 - is34;
+
+ // Apply rotation for outputs 2 and 6.
+ temp1 = xC6S2 * irot_input_x;
+ DOROUND(temp1);
+ temp1 >>= SHIFT_BITS;
+ temp2 = xC2S6 * irot_input_y;
+ DOROUND(temp2);
+ temp2 >>= SHIFT_BITS;
+ op[2 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
+
+ temp1 = xC6S2 * irot_input_y;
+ DOROUND(temp1);
+ temp1 >>= SHIFT_BITS;
+ temp2 = xC2S6 * irot_input_x;
+ DOROUND(temp2);
+ temp2 >>= SHIFT_BITS;
+ op[6 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
+
+ // Define inputs to rotation for outputs 1 and 7
+ irot_input_x = icommon_product1 + id07;
+ irot_input_y = -(id34 + icommon_product2);
+
+ // Apply rotation for outputs 1 and 7.
+ temp1 = xC1S7 * irot_input_x;
+ DOROUND(temp1);
+ temp1 >>= SHIFT_BITS;
+ temp2 = xC7S1 * irot_input_y;
+ DOROUND(temp2);
+ temp2 >>= SHIFT_BITS;
+ op[1 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
+
+ temp1 = xC7S1 * irot_input_x;
+ DOROUND(temp1);
+ temp1 >>= SHIFT_BITS;
+ temp2 = xC1S7 * irot_input_y;
+ DOROUND(temp2);
+ temp2 >>= SHIFT_BITS;
+ op[7 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
+
+ // Define inputs to rotation for outputs 3 and 5
+ irot_input_x = id07 - icommon_product1;
+ irot_input_y = id34 - icommon_product2;
+
+ // Apply rotation for outputs 3 and 5.
+ temp1 = xC3S5 * irot_input_x;
+ DOROUND(temp1);
+ temp1 >>= SHIFT_BITS;
+ temp2 = xC5S3 * irot_input_y;
+ DOROUND(temp2);
+ temp2 >>= SHIFT_BITS;
+ op[3 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
+
+
+ temp1 = xC5S3 * irot_input_x;
+ DOROUND(temp1);
+ temp1 >>= SHIFT_BITS;
+ temp2 = xC3S5 * irot_input_y;
+ DOROUND(temp2);
+ temp2 >>= SHIFT_BITS;
+ op[5 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
+
+ // Increment data pointer for next column.
+ ip++;
+ op++;
+ }
+}
+
+void vp9_short_fhaar2x2_c(short *input, short *output, int pitch) {
+ /* [1 1; 1 -1] orthogonal transform */
+ /* use position: 0,1, 4, 8 */
+ int i;
+ short *ip1 = input;
+ short *op1 = output;
+ for (i = 0; i < 16; i++) {
+ op1[i] = 0;
+ }
+
+ op1[0] = (ip1[0] + ip1[1] + ip1[4] + ip1[8] + 1) >> 1;
+ op1[1] = (ip1[0] - ip1[1] + ip1[4] - ip1[8]) >> 1;
+ op1[4] = (ip1[0] + ip1[1] - ip1[4] - ip1[8]) >> 1;
+ op1[8] = (ip1[0] - ip1[1] - ip1[4] + ip1[8]) >> 1;
+}
+
+/* For test */
+#define TEST_INT 1
+#if TEST_INT
+#define vp9_fht_int_c vp9_fht_c
+#else
+#define vp9_fht_float_c vp9_fht_c
+#endif
+
+void vp9_fht_float_c(const int16_t *input, int pitch, int16_t *output,
+ TX_TYPE tx_type, int tx_dim) {
+ vp9_clear_system_state(); // Make it simd safe : __asm emms;
+ {
+ int i, j, k;
+ float bufa[256], bufb[256]; // buffers are for floating-point test purpose
+ // the implementation could be simplified in
+ // conjunction with integer transform
+ const int16_t *ip = input;
+ int16_t *op = output;
+
+ float *pfa = &bufa[0];
+ float *pfb = &bufb[0];
+
+ // pointers to vertical and horizontal transforms
+ const float *ptv, *pth;
+
+ assert(tx_type != DCT_DCT);
+ // load and convert residual array into floating-point
+ for (j = 0; j < tx_dim; j++) {
+ for (i = 0; i < tx_dim; i++) {
+ pfa[i] = (float)ip[i];
+ }
+ pfa += tx_dim;
+ ip += pitch / 2;
+ }
+
+ // vertical transformation
+ pfa = &bufa[0];
+ pfb = &bufb[0];
+
+ switch (tx_type) {
+ case ADST_ADST :
+ case ADST_DCT :
+ ptv = (tx_dim == 4) ? &adst_4[0] :
+ ((tx_dim == 8) ? &adst_8[0] : &adst_16[0]);
+ break;
+
+ default :
+ ptv = (tx_dim == 4) ? &dct_4[0] :
+ ((tx_dim == 8) ? &dct_8[0] : &dct_16[0]);
+ break;
+ }
+
+ for (j = 0; j < tx_dim; j++) {
+ for (i = 0; i < tx_dim; i++) {
+ pfb[i] = 0;
+ for (k = 0; k < tx_dim; k++) {
+ pfb[i] += ptv[k] * pfa[(k * tx_dim)];
+ }
+ pfa += 1;
+ }
+ pfb += tx_dim;
+ ptv += tx_dim;
+ pfa = &bufa[0];
+ }
+
+ // horizontal transformation
+ pfa = &bufa[0];
+ pfb = &bufb[0];
+
+ switch (tx_type) {
+ case ADST_ADST :
+ case DCT_ADST :
+ pth = (tx_dim == 4) ? &adst_4[0] :
+ ((tx_dim == 8) ? &adst_8[0] : &adst_16[0]);
+ break;
+
+ default :
+ pth = (tx_dim == 4) ? &dct_4[0] :
+ ((tx_dim == 8) ? &dct_8[0] : &dct_16[0]);
+ break;
+ }
+
+ for (j = 0; j < tx_dim; j++) {
+ for (i = 0; i < tx_dim; i++) {
+ pfa[i] = 0;
+ for (k = 0; k < tx_dim; k++) {
+ pfa[i] += pfb[k] * pth[k];
+ }
+ pth += tx_dim;
+ }
+
+ pfa += tx_dim;
+ pfb += tx_dim;
+ // pth -= tx_dim * tx_dim;
+
+ switch (tx_type) {
+ case ADST_ADST :
+ case DCT_ADST :
+ pth = (tx_dim == 4) ? &adst_4[0] :
+ ((tx_dim == 8) ? &adst_8[0] : &adst_16[0]);
+ break;
+
+ default :
+ pth = (tx_dim == 4) ? &dct_4[0] :
+ ((tx_dim == 8) ? &dct_8[0] : &dct_16[0]);
+ break;
+ }
+ }
+
+ // convert to short integer format and load BLOCKD buffer
+ op = output;
+ pfa = &bufa[0];
+
+ for (j = 0; j < tx_dim; j++) {
+ for (i = 0; i < tx_dim; i++) {
+ op[i] = (pfa[i] > 0 ) ? (int16_t)( 8 * pfa[i] + 0.49) :
+ -(int16_t)(- 8 * pfa[i] + 0.49);
+ }
+ op += tx_dim;
+ pfa += tx_dim;
+ }
+ }
+ vp9_clear_system_state(); // Make it simd safe : __asm emms;
+}
+
+/* Converted the transforms to integer form. */
+#define VERTICAL_SHIFT 11
+#define VERTICAL_ROUNDING ((1 << (VERTICAL_SHIFT - 1)) - 1)
+#define HORIZONTAL_SHIFT 16
+#define HORIZONTAL_ROUNDING ((1 << (HORIZONTAL_SHIFT - 1)) - 1)
+void vp9_fht_int_c(const int16_t *input, int pitch, int16_t *output,
+ TX_TYPE tx_type, int tx_dim) {
+ int i, j, k;
+ int16_t imbuf[256];
+
+ const int16_t *ip = input;
+ int16_t *op = output;
+ int16_t *im = &imbuf[0];
+
+ /* pointers to vertical and horizontal transforms. */
+ const int16_t *ptv = NULL, *pth = NULL;
+
+ switch (tx_type) {
+ case ADST_ADST :
+ ptv = pth = (tx_dim == 4) ? &adst_i4[0]
+ : ((tx_dim == 8) ? &adst_i8[0]
+ : &adst_i16[0]);
+ break;
+ case ADST_DCT :
+ ptv = (tx_dim == 4) ? &adst_i4[0]
+ : ((tx_dim == 8) ? &adst_i8[0] : &adst_i16[0]);
+ pth = (tx_dim == 4) ? &dct_i4[0]
+ : ((tx_dim == 8) ? &dct_i8[0] : &dct_i16[0]);
+ break;
+ case DCT_ADST :
+ ptv = (tx_dim == 4) ? &dct_i4[0]
+ : ((tx_dim == 8) ? &dct_i8[0] : &dct_i16[0]);
+ pth = (tx_dim == 4) ? &adst_i4[0]
+ : ((tx_dim == 8) ? &adst_i8[0] : &adst_i16[0]);
+ break;
+ case DCT_DCT :
+ ptv = pth = (tx_dim == 4) ? &dct_i4[0]
+ : ((tx_dim == 8) ? &dct_i8[0] : &dct_i16[0]);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ /* vertical transformation */
+ for (j = 0; j < tx_dim; j++) {
+ for (i = 0; i < tx_dim; i++) {
+ int temp = 0;
+
+ for (k = 0; k < tx_dim; k++) {
+ temp += ptv[k] * ip[(k * (pitch >> 1))];
+ }
+
+ im[i] = (int16_t)((temp + VERTICAL_ROUNDING) >> VERTICAL_SHIFT);
+ ip++;
+ }
+ im += tx_dim; // 16
+ ptv += tx_dim;
+ ip = input;
+ }
+
+ /* horizontal transformation */
+ im = &imbuf[0];
+
+ for (j = 0; j < tx_dim; j++) {
+ const int16_t *pthc = pth;
+
+ for (i = 0; i < tx_dim; i++) {
+ int temp = 0;
+
+ for (k = 0; k < tx_dim; k++) {
+ temp += im[k] * pthc[k];
+ }
+
+ op[i] = (int16_t)((temp + HORIZONTAL_ROUNDING) >> HORIZONTAL_SHIFT);
+ pthc += tx_dim;
+ }
+
+ im += tx_dim; // 16
+ op += tx_dim;
+ }
+}
+
+void vp9_short_fdct4x4_c(short *input, short *output, int pitch) {
+ int i;
+ int a1, b1, c1, d1;
+ short *ip = input;
+ short *op = output;
+
+ for (i = 0; i < 4; i++) {
+ a1 = ((ip[0] + ip[3]) << 5);
+ b1 = ((ip[1] + ip[2]) << 5);
+ c1 = ((ip[1] - ip[2]) << 5);
+ d1 = ((ip[0] - ip[3]) << 5);
+
+ op[0] = a1 + b1;
+ op[2] = a1 - b1;
+
+ op[1] = (c1 * 2217 + d1 * 5352 + 14500) >> 12;
+ op[3] = (d1 * 2217 - c1 * 5352 + 7500) >> 12;
+
+ ip += pitch / 2;
+ op += 4;
+
+ }
+ ip = output;
+ op = output;
+ for (i = 0; i < 4; i++) {
+ a1 = ip[0] + ip[12];
+ b1 = ip[4] + ip[8];
+ c1 = ip[4] - ip[8];
+ d1 = ip[0] - ip[12];
+
+ op[0] = (a1 + b1 + 7) >> 4;
+ op[8] = (a1 - b1 + 7) >> 4;
+
+ op[4] = ((c1 * 2217 + d1 * 5352 + 12000) >> 16) + (d1 != 0);
+ op[12] = (d1 * 2217 - c1 * 5352 + 51000) >> 16;
+
+ ip++;
+ op++;
+ }
+}
+
+void vp9_short_fdct8x4_c(short *input, short *output, int pitch)
+{
+ vp9_short_fdct4x4_c(input, output, pitch);
+ vp9_short_fdct4x4_c(input + 4, output + 16, pitch);
+}
+
+void vp9_short_walsh4x4_c(short *input, short *output, int pitch) {
+ int i;
+ int a1, b1, c1, d1;
+ short *ip = input;
+ short *op = output;
+ int pitch_short = pitch >> 1;
+
+ for (i = 0; i < 4; i++) {
+ a1 = ip[0 * pitch_short] + ip[3 * pitch_short];
+ b1 = ip[1 * pitch_short] + ip[2 * pitch_short];
+ c1 = ip[1 * pitch_short] - ip[2 * pitch_short];
+ d1 = ip[0 * pitch_short] - ip[3 * pitch_short];
+
+ op[0] = (a1 + b1 + 1) >> 1;
+ op[4] = (c1 + d1) >> 1;
+ op[8] = (a1 - b1) >> 1;
+ op[12] = (d1 - c1) >> 1;
+
+ ip++;
+ op++;
+ }
+ ip = output;
+ op = output;
+
+ for (i = 0; i < 4; i++) {
+ a1 = ip[0] + ip[3];
+ b1 = ip[1] + ip[2];
+ c1 = ip[1] - ip[2];
+ d1 = ip[0] - ip[3];
+
+ op[0] = (a1 + b1 + 1) >> 1;
+ op[1] = (c1 + d1) >> 1;
+ op[2] = (a1 - b1) >> 1;
+ op[3] = (d1 - c1) >> 1;
+
+ ip += 4;
+ op += 4;
+ }
+}
+
+#if CONFIG_LOSSLESS
+void vp9_short_walsh4x4_lossless_c(short *input, short *output, int pitch) {
+ int i;
+ int a1, b1, c1, d1;
+ short *ip = input;
+ short *op = output;
+ int pitch_short = pitch >> 1;
+
+ for (i = 0; i < 4; i++) {
+ a1 = (ip[0 * pitch_short] + ip[3 * pitch_short]) >> Y2_WHT_UPSCALE_FACTOR;
+ b1 = (ip[1 * pitch_short] + ip[2 * pitch_short]) >> Y2_WHT_UPSCALE_FACTOR;
+ c1 = (ip[1 * pitch_short] - ip[2 * pitch_short]) >> Y2_WHT_UPSCALE_FACTOR;
+ d1 = (ip[0 * pitch_short] - ip[3 * pitch_short]) >> Y2_WHT_UPSCALE_FACTOR;
+
+ op[0] = (a1 + b1 + 1) >> 1;
+ op[4] = (c1 + d1) >> 1;
+ op[8] = (a1 - b1) >> 1;
+ op[12] = (d1 - c1) >> 1;
+
+ ip++;
+ op++;
+ }
+ ip = output;
+ op = output;
+
+ for (i = 0; i < 4; i++) {
+ a1 = ip[0] + ip[3];
+ b1 = ip[1] + ip[2];
+ c1 = ip[1] - ip[2];
+ d1 = ip[0] - ip[3];
+
+ op[0] = ((a1 + b1 + 1) >> 1) << Y2_WHT_UPSCALE_FACTOR;
+ op[1] = ((c1 + d1) >> 1) << Y2_WHT_UPSCALE_FACTOR;
+ op[2] = ((a1 - b1) >> 1) << Y2_WHT_UPSCALE_FACTOR;
+ op[3] = ((d1 - c1) >> 1) << Y2_WHT_UPSCALE_FACTOR;
+
+ ip += 4;
+ op += 4;
+ }
+}
+
+void vp9_short_walsh4x4_x8_c(short *input, short *output, int pitch) {
+ int i;
+ int a1, b1, c1, d1;
+ short *ip = input;
+ short *op = output;
+ int pitch_short = pitch >> 1;
+
+ for (i = 0; i < 4; i++) {
+ a1 = ip[0 * pitch_short] + ip[3 * pitch_short];
+ b1 = ip[1 * pitch_short] + ip[2 * pitch_short];
+ c1 = ip[1 * pitch_short] - ip[2 * pitch_short];
+ d1 = ip[0 * pitch_short] - ip[3 * pitch_short];
+
+ op[0] = (a1 + b1 + 1) >> 1;
+ op[4] = (c1 + d1) >> 1;
+ op[8] = (a1 - b1) >> 1;
+ op[12] = (d1 - c1) >> 1;
+
+ ip++;
+ op++;
+ }
+ ip = output;
+ op = output;
+
+ for (i = 0; i < 4; i++) {
+ a1 = ip[0] + ip[3];
+ b1 = ip[1] + ip[2];
+ c1 = ip[1] - ip[2];
+ d1 = ip[0] - ip[3];
+
+ op[0] = ((a1 + b1 + 1) >> 1) << WHT_UPSCALE_FACTOR;
+ op[1] = ((c1 + d1) >> 1) << WHT_UPSCALE_FACTOR;
+ op[2] = ((a1 - b1) >> 1) << WHT_UPSCALE_FACTOR;
+ op[3] = ((d1 - c1) >> 1) << WHT_UPSCALE_FACTOR;
+
+ ip += 4;
+ op += 4;
+ }
+}
+
+void vp9_short_walsh8x4_x8_c(short *input, short *output, int pitch) {
+ vp9_short_walsh4x4_x8_c(input, output, pitch);
+ vp9_short_walsh4x4_x8_c(input + 4, output + 16, pitch);
+}
+#endif
+
+#define TEST_INT_16x16_DCT 1
+#if !TEST_INT_16x16_DCT
+static const double C1 = 0.995184726672197;
+static const double C2 = 0.98078528040323;
+static const double C3 = 0.956940335732209;
+static const double C4 = 0.923879532511287;
+static const double C5 = 0.881921264348355;
+static const double C6 = 0.831469612302545;
+static const double C7 = 0.773010453362737;
+static const double C8 = 0.707106781186548;
+static const double C9 = 0.634393284163646;
+static const double C10 = 0.555570233019602;
+static const double C11 = 0.471396736825998;
+static const double C12 = 0.38268343236509;
+static const double C13 = 0.290284677254462;
+static const double C14 = 0.195090322016128;
+static const double C15 = 0.098017140329561;
+
+static void dct16x16_1d(double input[16], double output[16]) {
+ vp9_clear_system_state(); // Make it simd safe : __asm emms;
+ {
+ double step[16];
+ double intermediate[16];
+ double temp1, temp2;
+
+ // step 1
+ step[ 0] = input[0] + input[15];
+ step[ 1] = input[1] + input[14];
+ step[ 2] = input[2] + input[13];
+ step[ 3] = input[3] + input[12];
+ step[ 4] = input[4] + input[11];
+ step[ 5] = input[5] + input[10];
+ step[ 6] = input[6] + input[ 9];
+ step[ 7] = input[7] + input[ 8];
+ step[ 8] = input[7] - input[ 8];
+ step[ 9] = input[6] - input[ 9];
+ step[10] = input[5] - input[10];
+ step[11] = input[4] - input[11];
+ step[12] = input[3] - input[12];
+ step[13] = input[2] - input[13];
+ step[14] = input[1] - input[14];
+ step[15] = input[0] - input[15];
+
+ // step 2
+ output[0] = step[0] + step[7];
+ output[1] = step[1] + step[6];
+ output[2] = step[2] + step[5];
+ output[3] = step[3] + step[4];
+ output[4] = step[3] - step[4];
+ output[5] = step[2] - step[5];
+ output[6] = step[1] - step[6];
+ output[7] = step[0] - step[7];
+
+ temp1 = step[ 8]*C7;
+ temp2 = step[15]*C9;
+ output[ 8] = temp1 + temp2;
+
+ temp1 = step[ 9]*C11;
+ temp2 = step[14]*C5;
+ output[ 9] = temp1 - temp2;
+
+ temp1 = step[10]*C3;
+ temp2 = step[13]*C13;
+ output[10] = temp1 + temp2;
+
+ temp1 = step[11]*C15;
+ temp2 = step[12]*C1;
+ output[11] = temp1 - temp2;
+
+ temp1 = step[11]*C1;
+ temp2 = step[12]*C15;
+ output[12] = temp2 + temp1;
+
+ temp1 = step[10]*C13;
+ temp2 = step[13]*C3;
+ output[13] = temp2 - temp1;
+
+ temp1 = step[ 9]*C5;
+ temp2 = step[14]*C11;
+ output[14] = temp2 + temp1;
+
+ temp1 = step[ 8]*C9;
+ temp2 = step[15]*C7;
+ output[15] = temp2 - temp1;
+
+ // step 3
+ step[ 0] = output[0] + output[3];
+ step[ 1] = output[1] + output[2];
+ step[ 2] = output[1] - output[2];
+ step[ 3] = output[0] - output[3];
+
+ temp1 = output[4]*C14;
+ temp2 = output[7]*C2;
+ step[ 4] = temp1 + temp2;
+
+ temp1 = output[5]*C10;
+ temp2 = output[6]*C6;
+ step[ 5] = temp1 + temp2;
+
+ temp1 = output[5]*C6;
+ temp2 = output[6]*C10;
+ step[ 6] = temp2 - temp1;
+
+ temp1 = output[4]*C2;
+ temp2 = output[7]*C14;
+ step[ 7] = temp2 - temp1;
+
+ step[ 8] = output[ 8] + output[11];
+ step[ 9] = output[ 9] + output[10];
+ step[10] = output[ 9] - output[10];
+ step[11] = output[ 8] - output[11];
+
+ step[12] = output[12] + output[15];
+ step[13] = output[13] + output[14];
+ step[14] = output[13] - output[14];
+ step[15] = output[12] - output[15];
+
+ // step 4
+ output[ 0] = (step[ 0] + step[ 1]);
+ output[ 8] = (step[ 0] - step[ 1]);
+
+ temp1 = step[2]*C12;
+ temp2 = step[3]*C4;
+ temp1 = temp1 + temp2;
+ output[ 4] = 2*(temp1*C8);
+
+ temp1 = step[2]*C4;
+ temp2 = step[3]*C12;
+ temp1 = temp2 - temp1;
+ output[12] = 2*(temp1*C8);
+
+ output[ 2] = 2*((step[4] + step[ 5])*C8);
+ output[14] = 2*((step[7] - step[ 6])*C8);
+
+ temp1 = step[4] - step[5];
+ temp2 = step[6] + step[7];
+ output[ 6] = (temp1 + temp2);
+ output[10] = (temp1 - temp2);
+
+ intermediate[8] = step[8] + step[14];
+ intermediate[9] = step[9] + step[15];
+
+ temp1 = intermediate[8]*C12;
+ temp2 = intermediate[9]*C4;
+ temp1 = temp1 - temp2;
+ output[3] = 2*(temp1*C8);
+
+ temp1 = intermediate[8]*C4;
+ temp2 = intermediate[9]*C12;
+ temp1 = temp2 + temp1;
+ output[13] = 2*(temp1*C8);
+
+ output[ 9] = 2*((step[10] + step[11])*C8);
+
+ intermediate[11] = step[10] - step[11];
+ intermediate[12] = step[12] + step[13];
+ intermediate[13] = step[12] - step[13];
+ intermediate[14] = step[ 8] - step[14];
+ intermediate[15] = step[ 9] - step[15];
+
+ output[15] = (intermediate[11] + intermediate[12]);
+ output[ 1] = -(intermediate[11] - intermediate[12]);
+
+ output[ 7] = 2*(intermediate[13]*C8);
+
+ temp1 = intermediate[14]*C12;
+ temp2 = intermediate[15]*C4;
+ temp1 = temp1 - temp2;
+ output[11] = -2*(temp1*C8);
+
+ temp1 = intermediate[14]*C4;
+ temp2 = intermediate[15]*C12;
+ temp1 = temp2 + temp1;
+ output[ 5] = 2*(temp1*C8);
+ }
+ vp9_clear_system_state(); // Make it simd safe : __asm emms;
+}
+
+void vp9_short_fdct16x16_c(short *input, short *out, int pitch) {
+ vp9_clear_system_state(); // Make it simd safe : __asm emms;
+ {
+ int shortpitch = pitch >> 1;
+ int i, j;
+ double output[256];
+ // First transform columns
+ for (i = 0; i < 16; i++) {
+ double temp_in[16], temp_out[16];
+ for (j = 0; j < 16; j++)
+ temp_in[j] = input[j*shortpitch + i];
+ dct16x16_1d(temp_in, temp_out);
+ for (j = 0; j < 16; j++)
+ output[j*16 + i] = temp_out[j];
+ }
+ // Then transform rows
+ for (i = 0; i < 16; ++i) {
+ double temp_in[16], temp_out[16];
+ for (j = 0; j < 16; ++j)
+ temp_in[j] = output[j + i*16];
+ dct16x16_1d(temp_in, temp_out);
+ for (j = 0; j < 16; ++j)
+ output[j + i*16] = temp_out[j];
+ }
+ // Scale by some magic number
+ for (i = 0; i < 256; i++)
+ out[i] = (short)round(output[i]/2);
+ }
+ vp9_clear_system_state(); // Make it simd safe : __asm emms;
+}
+
+#else
+static const int16_t C1 = 16305;
+static const int16_t C2 = 16069;
+static const int16_t C3 = 15679;
+static const int16_t C4 = 15137;
+static const int16_t C5 = 14449;
+static const int16_t C6 = 13623;
+static const int16_t C7 = 12665;
+static const int16_t C8 = 11585;
+static const int16_t C9 = 10394;
+static const int16_t C10 = 9102;
+static const int16_t C11 = 7723;
+static const int16_t C12 = 6270;
+static const int16_t C13 = 4756;
+static const int16_t C14 = 3196;
+static const int16_t C15 = 1606;
+
+#define RIGHT_SHIFT 14
+#define ROUNDING (1 << (RIGHT_SHIFT - 1))
+
+static void dct16x16_1d(int16_t input[16], int16_t output[16],
+ int last_shift_bits) {
+ int16_t step[16];
+ int intermediate[16];
+ int temp1, temp2;
+ int final_shift = RIGHT_SHIFT;
+ int final_rounding = ROUNDING;
+ int output_shift = 0;
+ int output_rounding = 0;
+
+ final_shift += last_shift_bits;
+ if (final_shift > 0)
+ final_rounding = 1 << (final_shift - 1);
+
+ output_shift += last_shift_bits;
+ if (output_shift > 0)
+ output_rounding = 1 << (output_shift - 1);
+
+ // step 1
+ step[ 0] = input[0] + input[15];
+ step[ 1] = input[1] + input[14];
+ step[ 2] = input[2] + input[13];
+ step[ 3] = input[3] + input[12];
+ step[ 4] = input[4] + input[11];
+ step[ 5] = input[5] + input[10];
+ step[ 6] = input[6] + input[ 9];
+ step[ 7] = input[7] + input[ 8];
+ step[ 8] = input[7] - input[ 8];
+ step[ 9] = input[6] - input[ 9];
+ step[10] = input[5] - input[10];
+ step[11] = input[4] - input[11];
+ step[12] = input[3] - input[12];
+ step[13] = input[2] - input[13];
+ step[14] = input[1] - input[14];
+ step[15] = input[0] - input[15];
+
+ // step 2
+ output[0] = step[0] + step[7];
+ output[1] = step[1] + step[6];
+ output[2] = step[2] + step[5];
+ output[3] = step[3] + step[4];
+ output[4] = step[3] - step[4];
+ output[5] = step[2] - step[5];
+ output[6] = step[1] - step[6];
+ output[7] = step[0] - step[7];
+
+ temp1 = step[ 8] * C7;
+ temp2 = step[15] * C9;
+ output[ 8] = (temp1 + temp2 + ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = step[ 9] * C11;
+ temp2 = step[14] * C5;
+ output[ 9] = (temp1 - temp2 + ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = step[10] * C3;
+ temp2 = step[13] * C13;
+ output[10] = (temp1 + temp2 + ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = step[11] * C15;
+ temp2 = step[12] * C1;
+ output[11] = (temp1 - temp2 + ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = step[11] * C1;
+ temp2 = step[12] * C15;
+ output[12] = (temp2 + temp1 + ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = step[10] * C13;
+ temp2 = step[13] * C3;
+ output[13] = (temp2 - temp1 + ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = step[ 9] * C5;
+ temp2 = step[14] * C11;
+ output[14] = (temp2 + temp1 + ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = step[ 8] * C9;
+ temp2 = step[15] * C7;
+ output[15] = (temp2 - temp1 + ROUNDING) >> RIGHT_SHIFT;
+
+ // step 3
+ step[ 0] = output[0] + output[3];
+ step[ 1] = output[1] + output[2];
+ step[ 2] = output[1] - output[2];
+ step[ 3] = output[0] - output[3];
+
+ temp1 = output[4] * C14;
+ temp2 = output[7] * C2;
+ step[ 4] = (temp1 + temp2 + ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = output[5] * C10;
+ temp2 = output[6] * C6;
+ step[ 5] = (temp1 + temp2 + ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = output[5] * C6;
+ temp2 = output[6] * C10;
+ step[ 6] = (temp2 - temp1 + ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = output[4] * C2;
+ temp2 = output[7] * C14;
+ step[ 7] = (temp2 - temp1 + ROUNDING) >> RIGHT_SHIFT;
+
+ step[ 8] = output[ 8] + output[11];
+ step[ 9] = output[ 9] + output[10];
+ step[10] = output[ 9] - output[10];
+ step[11] = output[ 8] - output[11];
+
+ step[12] = output[12] + output[15];
+ step[13] = output[13] + output[14];
+ step[14] = output[13] - output[14];
+ step[15] = output[12] - output[15];
+
+ // step 4
+ output[ 0] = (step[ 0] + step[ 1] + output_rounding) >> output_shift;
+ output[ 8] = (step[ 0] - step[ 1] + output_rounding) >> output_shift;
+
+ temp1 = step[2] * C12;
+ temp2 = step[3] * C4;
+ temp1 = (temp1 + temp2 + final_rounding) >> final_shift;
+ output[ 4] = (2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = step[2] * C4;
+ temp2 = step[3] * C12;
+ temp1 = (temp2 - temp1 + final_rounding) >> final_shift;
+ output[12] = (2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT;
+
+ output[ 2] = (2 * ((step[4] + step[ 5]) * C8) + final_rounding)
+ >> final_shift;
+ output[14] = (2 * ((step[7] - step[ 6]) * C8) + final_rounding)
+ >> final_shift;
+
+ temp1 = step[4] - step[5];
+ temp2 = step[6] + step[7];
+ output[ 6] = (temp1 + temp2 + output_rounding) >> output_shift;
+ output[10] = (temp1 - temp2 + output_rounding) >> output_shift;
+
+ intermediate[8] = step[8] + step[14];
+ intermediate[9] = step[9] + step[15];
+
+ temp1 = intermediate[8] * C12;
+ temp2 = intermediate[9] * C4;
+ temp1 = (temp1 - temp2 + final_rounding) >> final_shift;
+ output[3] = (2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = intermediate[8] * C4;
+ temp2 = intermediate[9] * C12;
+ temp1 = (temp2 + temp1 + final_rounding) >> final_shift;
+ output[13] = (2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT;
+
+ output[ 9] = (2 * ((step[10] + step[11]) * C8) + final_rounding)
+ >> final_shift;
+
+ intermediate[11] = step[10] - step[11];
+ intermediate[12] = step[12] + step[13];
+ intermediate[13] = step[12] - step[13];
+ intermediate[14] = step[ 8] - step[14];
+ intermediate[15] = step[ 9] - step[15];
+
+ output[15] = (intermediate[11] + intermediate[12] + output_rounding)
+ >> output_shift;
+ output[ 1] = -(intermediate[11] - intermediate[12] + output_rounding)
+ >> output_shift;
+
+ output[ 7] = (2 * (intermediate[13] * C8) + final_rounding) >> final_shift;
+
+ temp1 = intermediate[14] * C12;
+ temp2 = intermediate[15] * C4;
+ temp1 = (temp1 - temp2 + final_rounding) >> final_shift;
+ output[11] = (-2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT;
+
+ temp1 = intermediate[14] * C4;
+ temp2 = intermediate[15] * C12;
+ temp1 = (temp2 + temp1 + final_rounding) >> final_shift;
+ output[ 5] = (2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT;
+}
+
+void vp9_short_fdct16x16_c(int16_t *input, int16_t *out, int pitch) {
+ int shortpitch = pitch >> 1;
+ int i, j;
+ int16_t output[256];
+ int16_t *outptr = &output[0];
+
+ // First transform columns
+ for (i = 0; i < 16; i++) {
+ int16_t temp_in[16];
+ int16_t temp_out[16];
+ for (j = 0; j < 16; j++)
+ temp_in[j] = input[j * shortpitch + i];
+ dct16x16_1d(temp_in, temp_out, 0);
+ for (j = 0; j < 16; j++)
+ output[j * 16 + i] = temp_out[j];
+ }
+
+ // Then transform rows
+ for (i = 0; i < 16; ++i) {
+ dct16x16_1d(outptr, out, 1);
+ outptr += 16;
+ out += 16;
+ }
+}
+#undef RIGHT_SHIFT
+#undef ROUNDING
+#endif
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
new file mode 100644
index 0000000..bd19662
--- /dev/null
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -0,0 +1,2467 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "./vpx_config.h"
+#include "vp9/encoder/vp9_encodeframe.h"
+#include "vp9/encoder/vp9_encodemb.h"
+#include "vp9/encoder/vp9_encodemv.h"
+#include "vp9/common/vp9_common.h"
+#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/common/vp9_extend.h"
+#include "vp9/common/vp9_entropymode.h"
+#include "vp9/common/vp9_quant_common.h"
+#include "vp9/encoder/vp9_segmentation.h"
+#include "vp9/common/vp9_setupintrarecon.h"
+#include "vp9/common/vp9_reconintra4x4.h"
+#include "vp9/encoder/vp9_encodeintra.h"
+#include "vp9/common/vp9_reconinter.h"
+#include "vp9/common/vp9_invtrans.h"
+#include "vp9/encoder/vp9_rdopt.h"
+#include "vp9/common/vp9_findnearmv.h"
+#include "vp9/common/vp9_reconintra.h"
+#include "vp9/common/vp9_seg_common.h"
+#include "vp9/encoder/vp9_tokenize.h"
+#include "vp9_rtcd.h"
+#include <stdio.h>
+#include <math.h>
+#include <limits.h>
+#include "vpx_ports/vpx_timer.h"
+#include "vp9/common/vp9_pred_common.h"
+#include "vp9/common/vp9_mvref_common.h"
+
+#define DBG_PRNT_SEGMAP 0
+
+// #define ENC_DEBUG
+#ifdef ENC_DEBUG
+int enc_debug = 0;
+#endif
+
+static void encode_macroblock(VP9_COMP *cpi, MACROBLOCK *x,
+ TOKENEXTRA **t, int recon_yoffset,
+ int recon_uvoffset, int output_enabled,
+ int mb_col, int mb_row);
+
+static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x,
+ TOKENEXTRA **t, int recon_yoffset,
+ int recon_uvoffset, int mb_col, int mb_row);
+
+static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x);
+
+#ifdef MODE_STATS
+unsigned int inter_y_modes[MB_MODE_COUNT];
+unsigned int inter_uv_modes[VP9_UV_MODES];
+unsigned int inter_b_modes[B_MODE_COUNT];
+unsigned int y_modes[VP9_YMODES];
+unsigned int i8x8_modes[VP9_I8X8_MODES];
+unsigned int uv_modes[VP9_UV_MODES];
+unsigned int uv_modes_y[VP9_YMODES][VP9_UV_MODES];
+unsigned int b_modes[B_MODE_COUNT];
+#endif
+
+
+/* activity_avg must be positive, or flat regions could get a zero weight
+ * (infinite lambda), which confounds analysis.
+ * This also avoids the need for divide by zero checks in
+ * vp9_activity_masking().
+ */
+#define VP9_ACTIVITY_AVG_MIN (64)
+
+/* This is used as a reference when computing the source variance for the
+ * purposes of activity masking.
+ * Eventually this should be replaced by custom no-reference routines,
+ * which will be faster.
+ */
+static const unsigned char VP9_VAR_OFFS[16] = {
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128
+};
+
+
+// Original activity measure from Tim T's code.
+static unsigned int tt_activity_measure(VP9_COMP *cpi, MACROBLOCK *x) {
+ unsigned int act;
+ unsigned int sse;
+ /* TODO: This could also be done over smaller areas (8x8), but that would
+ * require extensive changes elsewhere, as lambda is assumed to be fixed
+ * over an entire MB in most of the code.
+ * Another option is to compute four 8x8 variances, and pick a single
+ * lambda using a non-linear combination (e.g., the smallest, or second
+ * smallest, etc.).
+ */
+ act = vp9_variance16x16(x->src.y_buffer, x->src.y_stride, VP9_VAR_OFFS, 0,
+ &sse);
+ act = act << 4;
+
+ /* If the region is flat, lower the activity some more. */
+ if (act < 8 << 12)
+ act = act < 5 << 12 ? act : 5 << 12;
+
+ return act;
+}
+
+// Stub for alternative experimental activity measures.
+static unsigned int alt_activity_measure(VP9_COMP *cpi,
+ MACROBLOCK *x, int use_dc_pred) {
+ return vp9_encode_intra(cpi, x, use_dc_pred);
+}
+
+
+// Measure the activity of the current macroblock
+// What we measure here is TBD so abstracted to this function
+#define ALT_ACT_MEASURE 1
+static unsigned int mb_activity_measure(VP9_COMP *cpi, MACROBLOCK *x,
+ int mb_row, int mb_col) {
+ unsigned int mb_activity;
+
+ if (ALT_ACT_MEASURE) {
+ int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
+
+ // Or use and alternative.
+ mb_activity = alt_activity_measure(cpi, x, use_dc_pred);
+ } else {
+ // Original activity measure from Tim T's code.
+ mb_activity = tt_activity_measure(cpi, x);
+ }
+
+ if (mb_activity < VP9_ACTIVITY_AVG_MIN)
+ mb_activity = VP9_ACTIVITY_AVG_MIN;
+
+ return mb_activity;
+}
+
+// Calculate an "average" mb activity value for the frame
+#define ACT_MEDIAN 0
+static void calc_av_activity(VP9_COMP *cpi, int64_t activity_sum) {
+#if ACT_MEDIAN
+ // Find median: Simple n^2 algorithm for experimentation
+ {
+ unsigned int median;
+ unsigned int i, j;
+ unsigned int *sortlist;
+ unsigned int tmp;
+
+ // Create a list to sort to
+ CHECK_MEM_ERROR(sortlist,
+ vpx_calloc(sizeof(unsigned int),
+ cpi->common.MBs));
+
+ // Copy map to sort list
+ vpx_memcpy(sortlist, cpi->mb_activity_map,
+ sizeof(unsigned int) * cpi->common.MBs);
+
+
+ // Ripple each value down to its correct position
+ for (i = 1; i < cpi->common.MBs; i ++) {
+ for (j = i; j > 0; j --) {
+ if (sortlist[j] < sortlist[j - 1]) {
+ // Swap values
+ tmp = sortlist[j - 1];
+ sortlist[j - 1] = sortlist[j];
+ sortlist[j] = tmp;
+ } else
+ break;
+ }
+ }
+
+ // Even number MBs so estimate median as mean of two either side.
+ median = (1 + sortlist[cpi->common.MBs >> 1] +
+ sortlist[(cpi->common.MBs >> 1) + 1]) >> 1;
+
+ cpi->activity_avg = median;
+
+ vpx_free(sortlist);
+ }
+#else
+ // Simple mean for now
+ cpi->activity_avg = (unsigned int)(activity_sum / cpi->common.MBs);
+#endif
+
+ if (cpi->activity_avg < VP9_ACTIVITY_AVG_MIN)
+ cpi->activity_avg = VP9_ACTIVITY_AVG_MIN;
+
+ // Experimental code: return fixed value normalized for several clips
+ if (ALT_ACT_MEASURE)
+ cpi->activity_avg = 100000;
+}
+
+#define USE_ACT_INDEX 0
+#define OUTPUT_NORM_ACT_STATS 0
+
+#if USE_ACT_INDEX
+// Calculate and activity index for each mb
+static void calc_activity_index(VP9_COMP *cpi, MACROBLOCK *x) {
+ VP9_COMMON *const cm = &cpi->common;
+ int mb_row, mb_col;
+
+ int64_t act;
+ int64_t a;
+ int64_t b;
+
+#if OUTPUT_NORM_ACT_STATS
+ FILE *f = fopen("norm_act.stt", "a");
+ fprintf(f, "\n%12d\n", cpi->activity_avg);
+#endif
+
+ // Reset pointers to start of activity map
+ x->mb_activity_ptr = cpi->mb_activity_map;
+
+ // Calculate normalized mb activity number.
+ for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
+ // for each macroblock col in image
+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
+ // Read activity from the map
+ act = *(x->mb_activity_ptr);
+
+ // Calculate a normalized activity number
+ a = act + 4 * cpi->activity_avg;
+ b = 4 * act + cpi->activity_avg;
+
+ if (b >= a)
+ *(x->activity_ptr) = (int)((b + (a >> 1)) / a) - 1;
+ else
+ *(x->activity_ptr) = 1 - (int)((a + (b >> 1)) / b);
+
+#if OUTPUT_NORM_ACT_STATS
+ fprintf(f, " %6d", *(x->mb_activity_ptr));
+#endif
+ // Increment activity map pointers
+ x->mb_activity_ptr++;
+ }
+
+#if OUTPUT_NORM_ACT_STATS
+ fprintf(f, "\n");
+#endif
+
+ }
+
+#if OUTPUT_NORM_ACT_STATS
+ fclose(f);
+#endif
+
+}
+#endif
+
+// Loop through all MBs. Note activity of each, average activity and
+// calculate a normalized activity for each
+static void build_activity_map(VP9_COMP *cpi) {
+ MACROBLOCK *const x = &cpi->mb;
+ MACROBLOCKD *xd = &x->e_mbd;
+ VP9_COMMON *const cm = &cpi->common;
+
+#if ALT_ACT_MEASURE
+ YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx];
+ int recon_yoffset;
+ int recon_y_stride = new_yv12->y_stride;
+#endif
+
+ int mb_row, mb_col;
+ unsigned int mb_activity;
+ int64_t activity_sum = 0;
+
+ // for each macroblock row in image
+ for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
+#if ALT_ACT_MEASURE
+ // reset above block coeffs
+ xd->up_available = (mb_row != 0);
+ recon_yoffset = (mb_row * recon_y_stride * 16);
+#endif
+ // for each macroblock col in image
+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
+#if ALT_ACT_MEASURE
+ xd->dst.y_buffer = new_yv12->y_buffer + recon_yoffset;
+ xd->left_available = (mb_col != 0);
+ recon_yoffset += 16;
+#endif
+#if !CONFIG_SUPERBLOCKS
+ // Copy current mb to a buffer
+ vp9_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
+#endif
+
+ // measure activity
+ mb_activity = mb_activity_measure(cpi, x, mb_row, mb_col);
+
+ // Keep frame sum
+ activity_sum += mb_activity;
+
+ // Store MB level activity details.
+ *x->mb_activity_ptr = mb_activity;
+
+ // Increment activity map pointer
+ x->mb_activity_ptr++;
+
+ // adjust to the next column of source macroblocks
+ x->src.y_buffer += 16;
+ }
+
+
+ // adjust to the next row of mbs
+ x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
+
+#if ALT_ACT_MEASURE
+ // extend the recon for intra prediction
+ vp9_extend_mb_row(new_yv12, xd->dst.y_buffer + 16,
+ xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
+#endif
+
+ }
+
+ // Calculate an "average" MB activity
+ calc_av_activity(cpi, activity_sum);
+
+#if USE_ACT_INDEX
+ // Calculate an activity index number of each mb
+ calc_activity_index(cpi, x);
+#endif
+
+}
+
+// Macroblock activity masking
+void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x) {
+#if USE_ACT_INDEX
+ x->rdmult += *(x->mb_activity_ptr) * (x->rdmult >> 2);
+ x->errorperbit = x->rdmult * 100 / (110 * x->rddiv);
+ x->errorperbit += (x->errorperbit == 0);
+#else
+ int64_t a;
+ int64_t b;
+ int64_t act = *(x->mb_activity_ptr);
+
+ // Apply the masking to the RD multiplier.
+ a = act + (2 * cpi->activity_avg);
+ b = (2 * act) + cpi->activity_avg;
+
+ x->rdmult = (unsigned int)(((int64_t)x->rdmult * b + (a >> 1)) / a);
+ x->errorperbit = x->rdmult * 100 / (110 * x->rddiv);
+ x->errorperbit += (x->errorperbit == 0);
+#endif
+
+ // Activity based Zbin adjustment
+ adjust_act_zbin(cpi, x);
+}
+
+#if CONFIG_NEW_MVREF
+static int vp9_cost_mv_ref_id(vp9_prob * ref_id_probs, int mv_ref_id) {
+ int cost;
+
+ // Encode the index for the MV reference.
+ switch (mv_ref_id) {
+ case 0:
+ cost = vp9_cost_zero(ref_id_probs[0]);
+ break;
+ case 1:
+ cost = vp9_cost_one(ref_id_probs[0]);
+ cost += vp9_cost_zero(ref_id_probs[1]);
+ break;
+ case 2:
+ cost = vp9_cost_one(ref_id_probs[0]);
+ cost += vp9_cost_one(ref_id_probs[1]);
+ cost += vp9_cost_zero(ref_id_probs[2]);
+ break;
+ case 3:
+ cost = vp9_cost_one(ref_id_probs[0]);
+ cost += vp9_cost_one(ref_id_probs[1]);
+ cost += vp9_cost_one(ref_id_probs[2]);
+ break;
+
+ // TRAP.. This should not happen
+ default:
+ assert(0);
+ break;
+ }
+ return cost;
+}
+
+// Estimate the cost of each coding the vector using each reference candidate
+static unsigned int pick_best_mv_ref(MACROBLOCK *x,
+ MV_REFERENCE_FRAME ref_frame,
+ int_mv target_mv,
+ int_mv * mv_ref_list,
+ int_mv * best_ref) {
+ int i;
+ int best_index = 0;
+ int cost, cost2;
+ int zero_seen = (mv_ref_list[0].as_int) ? FALSE : TRUE;
+ MACROBLOCKD *xd = &x->e_mbd;
+ int max_mv = MV_MAX;
+
+ cost = vp9_cost_mv_ref_id(xd->mb_mv_ref_id_probs[ref_frame], 0) +
+ vp9_mv_bit_cost(&target_mv, &mv_ref_list[0], x->nmvjointcost,
+ x->mvcost, 96, xd->allow_high_precision_mv);
+
+ // Use 4 for now : for (i = 1; i < MAX_MV_REFS; ++i ) {
+ for (i = 1; i < 4; ++i) {
+ // If we see a 0,0 reference vector for a second time we have reached
+ // the end of the list of valid candidate vectors.
+ if (!mv_ref_list[i].as_int) {
+ if (zero_seen)
+ break;
+ else
+ zero_seen = TRUE;
+ }
+
+ // Check for cases where the reference choice would give rise to an
+ // uncodable/out of range residual for row or col.
+ if ((abs(target_mv.as_mv.row - mv_ref_list[i].as_mv.row) > max_mv) ||
+ (abs(target_mv.as_mv.col - mv_ref_list[i].as_mv.col) > max_mv)) {
+ continue;
+ }
+
+ cost2 = vp9_cost_mv_ref_id(xd->mb_mv_ref_id_probs[ref_frame], i) +
+ vp9_mv_bit_cost(&target_mv, &mv_ref_list[i], x->nmvjointcost,
+ x->mvcost, 96, xd->allow_high_precision_mv);
+
+ if (cost2 < cost) {
+ cost = cost2;
+ best_index = i;
+ }
+ }
+
+ best_ref->as_int = mv_ref_list[best_index].as_int;
+
+ return best_index;
+}
+#endif
+
+static void update_state(VP9_COMP *cpi, MACROBLOCK *x,
+ PICK_MODE_CONTEXT *ctx) {
+ int i;
+ MACROBLOCKD *xd = &x->e_mbd;
+ MODE_INFO *mi = &ctx->mic;
+ MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi;
+ int mb_mode = mi->mbmi.mode;
+ int mb_mode_index = ctx->best_mode_index;
+
+#if CONFIG_DEBUG
+ assert(mb_mode < MB_MODE_COUNT);
+ assert(mb_mode_index < MAX_MODES);
+ assert(mi->mbmi.ref_frame < MAX_REF_FRAMES);
+#endif
+
+ // Restore the coding context of the MB to that that was in place
+ // when the mode was picked for it
+ vpx_memcpy(xd->mode_info_context, mi, sizeof(MODE_INFO));
+#if CONFIG_SUPERBLOCKS
+ if (mi->mbmi.encoded_as_sb) {
+ const int mis = cpi->common.mode_info_stride;
+ if (xd->mb_to_right_edge >= 0)
+ vpx_memcpy(xd->mode_info_context + 1, mi, sizeof(MODE_INFO));
+ if (xd->mb_to_bottom_edge >= 0) {
+ vpx_memcpy(xd->mode_info_context + mis, mi, sizeof(MODE_INFO));
+ if (xd->mb_to_right_edge >= 0)
+ vpx_memcpy(xd->mode_info_context + mis + 1, mi, sizeof(MODE_INFO));
+ }
+ }
+#endif
+
+ if (mb_mode == B_PRED) {
+ for (i = 0; i < 16; i++) {
+ xd->block[i].bmi.as_mode = xd->mode_info_context->bmi[i].as_mode;
+ assert(xd->block[i].bmi.as_mode.first < B_MODE_COUNT);
+ }
+ } else if (mb_mode == I8X8_PRED) {
+ for (i = 0; i < 16; i++) {
+ xd->block[i].bmi = xd->mode_info_context->bmi[i];
+ }
+ } else if (mb_mode == SPLITMV) {
+ vpx_memcpy(x->partition_info, &ctx->partition_info,
+ sizeof(PARTITION_INFO));
+
+ mbmi->mv[0].as_int = x->partition_info->bmi[15].mv.as_int;
+ mbmi->mv[1].as_int = x->partition_info->bmi[15].second_mv.as_int;
+ }
+
+ {
+ int segment_id = mbmi->segment_id;
+ if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) ||
+ vp9_get_segdata(xd, segment_id, SEG_LVL_EOB)) {
+ for (i = 0; i < NB_TXFM_MODES; i++) {
+ cpi->rd_tx_select_diff[i] += ctx->txfm_rd_diff[i];
+ }
+ }
+ }
+
+ if (cpi->common.frame_type == KEY_FRAME) {
+ // Restore the coding modes to that held in the coding context
+ // if (mb_mode == B_PRED)
+ // for (i = 0; i < 16; i++)
+ // {
+ // xd->block[i].bmi.as_mode =
+ // xd->mode_info_context->bmi[i].as_mode;
+ // assert(xd->mode_info_context->bmi[i].as_mode < MB_MODE_COUNT);
+ // }
+#if CONFIG_INTERNAL_STATS
+ static const int kf_mode_index[] = {
+ THR_DC /*DC_PRED*/,
+ THR_V_PRED /*V_PRED*/,
+ THR_H_PRED /*H_PRED*/,
+ THR_D45_PRED /*D45_PRED*/,
+ THR_D135_PRED /*D135_PRED*/,
+ THR_D117_PRED /*D117_PRED*/,
+ THR_D153_PRED /*D153_PRED*/,
+ THR_D27_PRED /*D27_PRED*/,
+ THR_D63_PRED /*D63_PRED*/,
+ THR_TM /*TM_PRED*/,
+ THR_I8X8_PRED /*I8X8_PRED*/,
+ THR_B_PRED /*B_PRED*/,
+ };
+ cpi->mode_chosen_counts[kf_mode_index[mb_mode]]++;
+#endif
+ } else {
+ /*
+ // Reduce the activation RD thresholds for the best choice mode
+ if ((cpi->rd_baseline_thresh[mb_mode_index] > 0) &&
+ (cpi->rd_baseline_thresh[mb_mode_index] < (INT_MAX >> 2)))
+ {
+ int best_adjustment = (cpi->rd_thresh_mult[mb_mode_index] >> 2);
+
+ cpi->rd_thresh_mult[mb_mode_index] =
+ (cpi->rd_thresh_mult[mb_mode_index]
+ >= (MIN_THRESHMULT + best_adjustment)) ?
+ cpi->rd_thresh_mult[mb_mode_index] - best_adjustment :
+ MIN_THRESHMULT;
+ cpi->rd_threshes[mb_mode_index] =
+ (cpi->rd_baseline_thresh[mb_mode_index] >> 7)
+ * cpi->rd_thresh_mult[mb_mode_index];
+
+ }
+ */
+ // Note how often each mode chosen as best
+ cpi->mode_chosen_counts[mb_mode_index]++;
+ if (mbmi->mode == SPLITMV || mbmi->mode == NEWMV) {
+ int_mv best_mv, best_second_mv;
+ MV_REFERENCE_FRAME rf = mbmi->ref_frame;
+#if CONFIG_NEW_MVREF
+ unsigned int best_index;
+ MV_REFERENCE_FRAME sec_ref_frame = mbmi->second_ref_frame;
+#endif
+ best_mv.as_int = ctx->best_ref_mv.as_int;
+ best_second_mv.as_int = ctx->second_best_ref_mv.as_int;
+ if (mbmi->mode == NEWMV) {
+ best_mv.as_int = mbmi->ref_mvs[rf][0].as_int;
+ best_second_mv.as_int = mbmi->ref_mvs[mbmi->second_ref_frame][0].as_int;
+#if CONFIG_NEW_MVREF
+ best_index = pick_best_mv_ref(x, rf, mbmi->mv[0],
+ mbmi->ref_mvs[rf], &best_mv);
+ mbmi->best_index = best_index;
+
+ if (mbmi->second_ref_frame > 0) {
+ unsigned int best_index;
+ best_index =
+ pick_best_mv_ref(x, sec_ref_frame, mbmi->mv[1],
+ mbmi->ref_mvs[sec_ref_frame],
+ &best_second_mv);
+ mbmi->best_second_index = best_index;
+ }
+#endif
+ }
+ mbmi->best_mv.as_int = best_mv.as_int;
+ mbmi->best_second_mv.as_int = best_second_mv.as_int;
+ vp9_update_nmv_count(cpi, x, &best_mv, &best_second_mv);
+ }
+#if CONFIG_COMP_INTERINTRA_PRED
+ if (mbmi->mode >= NEARESTMV && mbmi->mode < SPLITMV &&
+ mbmi->second_ref_frame <= INTRA_FRAME) {
+ if (mbmi->second_ref_frame == INTRA_FRAME) {
+ ++cpi->interintra_count[1];
+ ++cpi->ymode_count[mbmi->interintra_mode];
+#if SEPARATE_INTERINTRA_UV
+ ++cpi->y_uv_mode_count[mbmi->interintra_mode][mbmi->interintra_uv_mode];
+#endif
+ } else {
+ ++cpi->interintra_count[0];
+ }
+ }
+ if (cpi->common.mcomp_filter_type == SWITCHABLE &&
+ mbmi->mode >= NEARESTMV &&
+ mbmi->mode <= SPLITMV) {
+ ++cpi->switchable_interp_count
+ [vp9_get_pred_context(&cpi->common, xd, PRED_SWITCHABLE_INTERP)]
+ [vp9_switchable_interp_map[mbmi->interp_filter]];
+ }
+#endif
+
+ cpi->prediction_error += ctx->distortion;
+ cpi->intra_error += ctx->intra_error;
+
+ cpi->rd_comp_pred_diff[SINGLE_PREDICTION_ONLY] += ctx->single_pred_diff;
+ cpi->rd_comp_pred_diff[COMP_PREDICTION_ONLY] += ctx->comp_pred_diff;
+ cpi->rd_comp_pred_diff[HYBRID_PREDICTION] += ctx->hybrid_pred_diff;
+ }
+}
+
+static void pick_mb_modes(VP9_COMP *cpi,
+ VP9_COMMON *cm,
+ int mb_row,
+ int mb_col,
+ MACROBLOCK *x,
+ MACROBLOCKD *xd,
+ TOKENEXTRA **tp,
+ int *totalrate,
+ int *totaldist) {
+ int i;
+ int map_index;
+ int recon_yoffset, recon_uvoffset;
+ int ref_fb_idx = cm->lst_fb_idx;
+ int dst_fb_idx = cm->new_fb_idx;
+ int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
+ int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
+ ENTROPY_CONTEXT_PLANES left_context[2];
+ ENTROPY_CONTEXT_PLANES above_context[2];
+ ENTROPY_CONTEXT_PLANES *initial_above_context_ptr = cm->above_context
+ + mb_col;
+
+ // Offsets to move pointers from MB to MB within a SB in raster order
+ int row_delta[4] = { 0, +1, 0, -1};
+ int col_delta[4] = { +1, -1, +1, +1};
+
+ /* Function should not modify L & A contexts; save and restore on exit */
+ vpx_memcpy(left_context,
+ cm->left_context,
+ sizeof(left_context));
+ vpx_memcpy(above_context,
+ initial_above_context_ptr,
+ sizeof(above_context));
+
+ /* Encode MBs in raster order within the SB */
+ for (i = 0; i < 4; i++) {
+ int dy = row_delta[i];
+ int dx = col_delta[i];
+ int offset_unextended = dy * cm->mb_cols + dx;
+ int offset_extended = dy * xd->mode_info_stride + dx;
+ MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi;
+
+ // TODO Many of the index items here can be computed more efficiently!
+
+ if ((mb_row >= cm->mb_rows) || (mb_col >= cm->mb_cols)) {
+ // MB lies outside frame, move on
+ mb_row += dy;
+ mb_col += dx;
+
+ // Update pointers
+ x->src.y_buffer += 16 * (dx + dy * x->src.y_stride);
+ x->src.u_buffer += 8 * (dx + dy * x->src.uv_stride);
+ x->src.v_buffer += 8 * (dx + dy * x->src.uv_stride);
+
+ x->gf_active_ptr += offset_unextended;
+ x->partition_info += offset_extended;
+ xd->mode_info_context += offset_extended;
+ xd->prev_mode_info_context += offset_extended;
+#if CONFIG_DEBUG
+ assert((xd->prev_mode_info_context - cpi->common.prev_mip) ==
+ (xd->mode_info_context - cpi->common.mip));
+#endif
+ continue;
+ }
+
+ // Index of the MB in the SB 0..3
+ xd->mb_index = i;
+
+ map_index = (mb_row * cpi->common.mb_cols) + mb_col;
+ x->mb_activity_ptr = &cpi->mb_activity_map[map_index];
+
+ // set above context pointer
+ xd->above_context = cm->above_context + mb_col;
+
+ // Restore the appropriate left context depending on which
+ // row in the SB the MB is situated
+ xd->left_context = cm->left_context + (i >> 1);
+
+ // Set up distance of MB to edge of frame in 1/8th pel units
+ xd->mb_to_top_edge = -((mb_row * 16) << 3);
+ xd->mb_to_left_edge = -((mb_col * 16) << 3);
+ xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
+ xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
+
+ // Set up limit values for MV components to prevent them from
+ // extending beyond the UMV borders assuming 16x16 block size
+ x->mv_row_min = -((mb_row * 16) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND);
+ x->mv_col_min = -((mb_col * 16) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND);
+ x->mv_row_max = ((cm->mb_rows - mb_row) * 16 +
+ (VP9BORDERINPIXELS - 16 - VP9_INTERP_EXTEND));
+ x->mv_col_max = ((cm->mb_cols - mb_col) * 16 +
+ (VP9BORDERINPIXELS - 16 - VP9_INTERP_EXTEND));
+
+ xd->up_available = (mb_row != 0);
+ xd->left_available = (mb_col != 0);
+
+ recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16);
+ recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8);
+
+ xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+ xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+ xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
+
+#if !CONFIG_SUPERBLOCKS
+ // Copy current MB to a work buffer
+ vp9_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
+#endif
+
+ x->rddiv = cpi->RDDIV;
+ x->rdmult = cpi->RDMULT;
+
+ if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
+ vp9_activity_masking(cpi, x);
+
+ // Is segmentation enabled
+ if (xd->segmentation_enabled) {
+ // Code to set segment id in xd->mbmi.segment_id
+ if (xd->update_mb_segmentation_map)
+ mbmi->segment_id = cpi->segmentation_map[map_index];
+ else
+ mbmi->segment_id = cm->last_frame_seg_map[map_index];
+ if (mbmi->segment_id > 3)
+ mbmi->segment_id = 0;
+
+ vp9_mb_init_quantizer(cpi, x);
+ } else
+ // Set to Segment 0 by default
+ mbmi->segment_id = 0;
+
+ x->active_ptr = cpi->active_map + map_index;
+
+#if CONFIG_SUPERBLOCKS
+ xd->mode_info_context->mbmi.encoded_as_sb = 0;
+#endif
+
+ cpi->update_context = 0; // TODO Do we need this now??
+
+ vp9_intra_prediction_down_copy(xd);
+
+#ifdef ENC_DEBUG
+ enc_debug = (cpi->common.current_video_frame == 46 &&
+ mb_row == 5 && mb_col == 2);
+#endif
+ // Find best coding mode & reconstruct the MB so it is available
+ // as a predictor for MBs that follow in the SB
+ if (cm->frame_type == KEY_FRAME) {
+ int r, d;
+#ifdef ENC_DEBUG
+ if (enc_debug)
+ printf("intra pick_mb_modes %d %d\n", mb_row, mb_col);
+#endif
+ vp9_rd_pick_intra_mode(cpi, x, &r, &d);
+ *totalrate += r;
+ *totaldist += d;
+
+ // Dummy encode, do not do the tokenization
+ encode_macroblock(cpi, x, tp,
+ recon_yoffset, recon_uvoffset, 0, mb_col, mb_row);
+ // Note the encoder may have changed the segment_id
+
+ // Save the coding context
+ vpx_memcpy(&x->mb_context[i].mic, xd->mode_info_context,
+ sizeof(MODE_INFO));
+ } else {
+ int seg_id, r, d;
+
+ if (xd->segmentation_enabled && cpi->seg0_cnt > 0 &&
+ !vp9_segfeature_active(xd, 0, SEG_LVL_REF_FRAME) &&
+ vp9_segfeature_active(xd, 1, SEG_LVL_REF_FRAME) &&
+ vp9_check_segref(xd, 1, INTRA_FRAME) +
+ vp9_check_segref(xd, 1, LAST_FRAME) +
+ vp9_check_segref(xd, 1, GOLDEN_FRAME) +
+ vp9_check_segref(xd, 1, ALTREF_FRAME) == 1) {
+ cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt;
+ } else {
+ cpi->seg0_progress = (((mb_col & ~1) * 2 + (mb_row & ~1) * cm->mb_cols + i) << 16) / cm->MBs;
+ }
+
+#ifdef ENC_DEBUG
+ if (enc_debug)
+ printf("inter pick_mb_modes %d %d\n", mb_row, mb_col);
+#endif
+ vp9_pick_mode_inter_macroblock(cpi, x, recon_yoffset,
+ recon_uvoffset, &r, &d);
+ *totalrate += r;
+ *totaldist += d;
+
+ // Dummy encode, do not do the tokenization
+ encode_macroblock(cpi, x, tp,
+ recon_yoffset, recon_uvoffset, 0, mb_col, mb_row);
+
+ seg_id = mbmi->segment_id;
+ if (cpi->mb.e_mbd.segmentation_enabled && seg_id == 0) {
+ cpi->seg0_idx++;
+ }
+ if (!xd->segmentation_enabled ||
+ !vp9_segfeature_active(xd, seg_id, SEG_LVL_REF_FRAME) ||
+ vp9_check_segref(xd, seg_id, INTRA_FRAME) +
+ vp9_check_segref(xd, seg_id, LAST_FRAME) +
+ vp9_check_segref(xd, seg_id, GOLDEN_FRAME) +
+ vp9_check_segref(xd, seg_id, ALTREF_FRAME) > 1) {
+ // Get the prediction context and status
+ int pred_flag = vp9_get_pred_flag(xd, PRED_REF);
+ int pred_context = vp9_get_pred_context(cm, xd, PRED_REF);
+
+ // Count prediction success
+ cpi->ref_pred_count[pred_context][pred_flag]++;
+ }
+ }
+
+ // Next MB
+ mb_row += dy;
+ mb_col += dx;
+
+ x->src.y_buffer += 16 * (dx + dy * x->src.y_stride);
+ x->src.u_buffer += 8 * (dx + dy * x->src.uv_stride);
+ x->src.v_buffer += 8 * (dx + dy * x->src.uv_stride);
+
+ x->gf_active_ptr += offset_unextended;
+ x->partition_info += offset_extended;
+ xd->mode_info_context += offset_extended;
+ xd->prev_mode_info_context += offset_extended;
+
+#if CONFIG_DEBUG
+ assert((xd->prev_mode_info_context - cpi->common.prev_mip) ==
+ (xd->mode_info_context - cpi->common.mip));
+#endif
+ }
+
+ /* Restore L & A coding context to those in place on entry */
+ vpx_memcpy(cm->left_context,
+ left_context,
+ sizeof(left_context));
+ vpx_memcpy(initial_above_context_ptr,
+ above_context,
+ sizeof(above_context));
+}
+
+#if CONFIG_SUPERBLOCKS
+static void pick_sb_modes (VP9_COMP *cpi,
+ VP9_COMMON *cm,
+ int mb_row,
+ int mb_col,
+ MACROBLOCK *x,
+ MACROBLOCKD *xd,
+ TOKENEXTRA **tp,
+ int *totalrate,
+ int *totaldist)
+{
+ int map_index;
+ int recon_yoffset, recon_uvoffset;
+ int ref_fb_idx = cm->lst_fb_idx;
+ int dst_fb_idx = cm->new_fb_idx;
+ int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
+ int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
+ ENTROPY_CONTEXT_PLANES left_context[2];
+ ENTROPY_CONTEXT_PLANES above_context[2];
+ ENTROPY_CONTEXT_PLANES *initial_above_context_ptr = cm->above_context
+ + mb_col;
+
+ /* Function should not modify L & A contexts; save and restore on exit */
+ vpx_memcpy (left_context,
+ cm->left_context,
+ sizeof(left_context));
+ vpx_memcpy (above_context,
+ initial_above_context_ptr,
+ sizeof(above_context));
+
+ map_index = (mb_row * cpi->common.mb_cols) + mb_col;
+ x->mb_activity_ptr = &cpi->mb_activity_map[map_index];
+
+ /* set above context pointer */
+ xd->above_context = cm->above_context + mb_col;
+
+ /* Restore the appropriate left context depending on which
+ * row in the SB the MB is situated */
+ xd->left_context = cm->left_context;
+
+ // Set up distance of MB to edge of frame in 1/8th pel units
+ xd->mb_to_top_edge = -((mb_row * 16) << 3);
+ xd->mb_to_left_edge = -((mb_col * 16) << 3);
+ xd->mb_to_bottom_edge = ((cm->mb_rows - 2 - mb_row) * 16) << 3;
+ xd->mb_to_right_edge = ((cm->mb_cols - 2 - mb_col) * 16) << 3;
+
+ /* Set up limit values for MV components to prevent them from
+ * extending beyond the UMV borders assuming 16x16 block size */
+ x->mv_row_min = -((mb_row * 16) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND);
+ x->mv_col_min = -((mb_col * 16) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND);
+ x->mv_row_max = ((cm->mb_rows - mb_row) * 16 +
+ (VP9BORDERINPIXELS - 32 - VP9_INTERP_EXTEND));
+ x->mv_col_max = ((cm->mb_cols - mb_col) * 16 +
+ (VP9BORDERINPIXELS - 32 - VP9_INTERP_EXTEND));
+
+ xd->up_available = (mb_row != 0);
+ xd->left_available = (mb_col != 0);
+
+ recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16);
+ recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8);
+
+ xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+ xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+ xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
+#if 0 // FIXME
+ /* Copy current MB to a work buffer */
+ vp9_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
+#endif
+ x->rddiv = cpi->RDDIV;
+ x->rdmult = cpi->RDMULT;
+ if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
+ vp9_activity_masking(cpi, x);
+ /* Is segmentation enabled */
+ if (xd->segmentation_enabled)
+ {
+ /* Code to set segment id in xd->mbmi.segment_id */
+ if (xd->update_mb_segmentation_map)
+ xd->mode_info_context->mbmi.segment_id =
+ cpi->segmentation_map[map_index] &&
+ cpi->segmentation_map[map_index + 1] &&
+ cpi->segmentation_map[map_index + cm->mb_cols] &&
+ cpi->segmentation_map[map_index + cm->mb_cols + 1];
+ else
+ xd->mode_info_context->mbmi.segment_id =
+ cm->last_frame_seg_map[map_index] &&
+ cm->last_frame_seg_map[map_index + 1] &&
+ cm->last_frame_seg_map[map_index + cm->mb_cols] &&
+ cm->last_frame_seg_map[map_index + cm->mb_cols + 1];
+ if (xd->mode_info_context->mbmi.segment_id > 3)
+ xd->mode_info_context->mbmi.segment_id = 0;
+
+ vp9_mb_init_quantizer(cpi, x);
+ }
+ else
+ /* Set to Segment 0 by default */
+ xd->mode_info_context->mbmi.segment_id = 0;
+
+ x->active_ptr = cpi->active_map + map_index;
+
+ cpi->update_context = 0; // TODO Do we need this now??
+
+ /* Find best coding mode & reconstruct the MB so it is available
+ * as a predictor for MBs that follow in the SB */
+ if (cm->frame_type == KEY_FRAME)
+ {
+ vp9_rd_pick_intra_mode_sb(cpi, x,
+ totalrate,
+ totaldist);
+
+ /* Save the coding context */
+ vpx_memcpy(&x->sb_context[0].mic, xd->mode_info_context,
+ sizeof(MODE_INFO));
+ } else {
+ if (xd->segmentation_enabled && cpi->seg0_cnt > 0 &&
+ !vp9_segfeature_active(xd, 0, SEG_LVL_REF_FRAME) &&
+ vp9_segfeature_active(xd, 1, SEG_LVL_REF_FRAME) &&
+ vp9_check_segref(xd, 1, INTRA_FRAME) +
+ vp9_check_segref(xd, 1, LAST_FRAME) +
+ vp9_check_segref(xd, 1, GOLDEN_FRAME) +
+ vp9_check_segref(xd, 1, ALTREF_FRAME) == 1) {
+ cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt;
+ } else {
+ cpi->seg0_progress =
+ (((mb_col & ~1) * 2 + (mb_row & ~1) * cm->mb_cols) << 16) / cm->MBs;
+ }
+
+ vp9_rd_pick_inter_mode_sb(cpi, x,
+ recon_yoffset,
+ recon_uvoffset,
+ totalrate,
+ totaldist);
+ }
+
+ /* Restore L & A coding context to those in place on entry */
+ vpx_memcpy (cm->left_context,
+ left_context,
+ sizeof(left_context));
+ vpx_memcpy (initial_above_context_ptr,
+ above_context,
+ sizeof(above_context));
+}
+#endif
+
+static void encode_sb(VP9_COMP *cpi,
+ VP9_COMMON *cm,
+ int mbrow,
+ int mbcol,
+ MACROBLOCK *x,
+ MACROBLOCKD *xd,
+ TOKENEXTRA **tp) {
+ int i;
+ int map_index;
+ int mb_row, mb_col;
+ int recon_yoffset, recon_uvoffset;
+ int ref_fb_idx = cm->lst_fb_idx;
+ int dst_fb_idx = cm->new_fb_idx;
+ int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
+ int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
+ int row_delta[4] = { 0, +1, 0, -1};
+ int col_delta[4] = { +1, -1, +1, +1};
+
+ mb_row = mbrow;
+ mb_col = mbcol;
+
+ /* Encode MBs in raster order within the SB */
+ for (i = 0; i < 4; i++) {
+ int dy = row_delta[i];
+ int dx = col_delta[i];
+ int offset_extended = dy * xd->mode_info_stride + dx;
+ int offset_unextended = dy * cm->mb_cols + dx;
+ MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi;
+
+ if ((mb_row >= cm->mb_rows) || (mb_col >= cm->mb_cols)) {
+ // MB lies outside frame, move on
+ mb_row += dy;
+ mb_col += dx;
+
+ x->src.y_buffer += 16 * (dx + dy * x->src.y_stride);
+ x->src.u_buffer += 8 * (dx + dy * x->src.uv_stride);
+ x->src.v_buffer += 8 * (dx + dy * x->src.uv_stride);
+
+ x->gf_active_ptr += offset_unextended;
+ x->partition_info += offset_extended;
+ xd->mode_info_context += offset_extended;
+ xd->prev_mode_info_context += offset_extended;
+
+#if CONFIG_DEBUG
+ assert((xd->prev_mode_info_context - cpi->common.prev_mip) ==
+ (xd->mode_info_context - cpi->common.mip));
+#endif
+ continue;
+ }
+
+ xd->mb_index = i;
+
+ // Restore MB state to that when it was picked
+#if CONFIG_SUPERBLOCKS
+ if (xd->mode_info_context->mbmi.encoded_as_sb) {
+ update_state(cpi, x, &x->sb_context[i]);
+ cpi->sb_count++;
+ } else
+#endif
+ update_state(cpi, x, &x->mb_context[i]);
+
+ map_index = (mb_row * cpi->common.mb_cols) + mb_col;
+ x->mb_activity_ptr = &cpi->mb_activity_map[map_index];
+
+ // reset above block coeffs
+ xd->above_context = cm->above_context + mb_col;
+ xd->left_context = cm->left_context + (i >> 1);
+
+ // Set up distance of MB to edge of the frame in 1/8th pel units
+ // Set up limit values for MV components to prevent them from
+ // extending beyond the UMV borders assuming 32x32 block size
+ x->mv_row_min = -((mb_row * 16) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND);
+ x->mv_col_min = -((mb_col * 16) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND);
+
+ xd->mb_to_top_edge = -((mb_row * 16) << 3);
+ xd->mb_to_left_edge = -((mb_col * 16) << 3);
+
+#if CONFIG_SUPERBLOCKS
+ if (xd->mode_info_context->mbmi.encoded_as_sb) {
+ x->mv_row_max = ((cm->mb_rows - mb_row) * 16 +
+ (VP9BORDERINPIXELS - 32 - VP9_INTERP_EXTEND));
+ x->mv_col_max = ((cm->mb_cols - mb_col) * 16 +
+ (VP9BORDERINPIXELS - 32 - VP9_INTERP_EXTEND));
+
+ xd->mb_to_bottom_edge = ((cm->mb_rows - 2 - mb_row) * 16) << 3;
+ xd->mb_to_right_edge = ((cm->mb_cols - 2 - mb_col) * 16) << 3;
+ } else {
+#endif
+ x->mv_row_max = ((cm->mb_rows - mb_row) * 16 +
+ (VP9BORDERINPIXELS - 16 - VP9_INTERP_EXTEND));
+ x->mv_col_max = ((cm->mb_cols - mb_col) * 16 +
+ (VP9BORDERINPIXELS - 16 - VP9_INTERP_EXTEND));
+
+ xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
+ xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
+#if CONFIG_SUPERBLOCKS
+ }
+#endif
+
+ xd->up_available = (mb_row != 0);
+ xd->left_available = (mb_col != 0);
+
+ recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16);
+ recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8);
+
+ xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+ xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+ xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
+
+#if !CONFIG_SUPERBLOCKS
+ // Copy current MB to a work buffer
+ vp9_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
+#endif
+
+ if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
+ vp9_activity_masking(cpi, x);
+
+ // Is segmentation enabled
+ if (xd->segmentation_enabled) {
+ vp9_mb_init_quantizer(cpi, x);
+ }
+
+ x->active_ptr = cpi->active_map + map_index;
+
+ cpi->update_context = 0;
+
+#if CONFIG_SUPERBLOCKS
+ if (!xd->mode_info_context->mbmi.encoded_as_sb)
+#endif
+ vp9_intra_prediction_down_copy(xd);
+
+ if (cm->frame_type == KEY_FRAME) {
+#if CONFIG_SUPERBLOCKS
+ if (xd->mode_info_context->mbmi.encoded_as_sb)
+ encode_superblock(cpi, x, tp, recon_yoffset, recon_uvoffset,
+ mb_col, mb_row);
+ else
+#endif
+ encode_macroblock(cpi, x, tp, recon_yoffset, recon_uvoffset, 1,
+ mb_col, mb_row);
+ // Note the encoder may have changed the segment_id
+
+#ifdef MODE_STATS
+ y_modes[mbmi->mode]++;
+#endif
+ } else {
+ unsigned char *segment_id;
+ int seg_ref_active;
+
+ if (xd->mode_info_context->mbmi.ref_frame) {
+ unsigned char pred_context;
+
+ pred_context = vp9_get_pred_context(cm, xd, PRED_COMP);
+
+ if (xd->mode_info_context->mbmi.second_ref_frame <= INTRA_FRAME)
+ cpi->single_pred_count[pred_context]++;
+ else
+ cpi->comp_pred_count[pred_context]++;
+ }
+
+#if CONFIG_SUPERBLOCKS
+ if (xd->mode_info_context->mbmi.encoded_as_sb)
+ encode_superblock(cpi, x, tp, recon_yoffset, recon_uvoffset,
+ mb_col, mb_row);
+ else
+#endif
+ encode_macroblock(cpi, x, tp, recon_yoffset, recon_uvoffset, 1,
+ mb_col, mb_row);
+ // Note the encoder may have changed the segment_id
+
+#ifdef MODE_STATS
+ inter_y_modes[mbmi->mode]++;
+
+ if (mbmi->mode == SPLITMV) {
+ int b;
+
+ for (b = 0; b < x->partition_info->count; b++) {
+ inter_b_modes[x->partition_info->bmi[b].mode]++;
+ }
+ }
+
+#endif
+
+ // If we have just a single reference frame coded for a segment then
+ // exclude from the reference frame counts used to work out
+ // probabilities. NOTE: At the moment we dont support custom trees
+ // for the reference frame coding for each segment but this is a
+ // possible future action.
+ segment_id = &mbmi->segment_id;
+ seg_ref_active = vp9_segfeature_active(xd, *segment_id,
+ SEG_LVL_REF_FRAME);
+ if (!seg_ref_active ||
+ ((vp9_check_segref(xd, *segment_id, INTRA_FRAME) +
+ vp9_check_segref(xd, *segment_id, LAST_FRAME) +
+ vp9_check_segref(xd, *segment_id, GOLDEN_FRAME) +
+ vp9_check_segref(xd, *segment_id, ALTREF_FRAME)) > 1)) {
+ {
+ cpi->count_mb_ref_frame_usage[mbmi->ref_frame]++;
+ }
+ }
+
+ // Count of last ref frame 0,0 usage
+ if ((mbmi->mode == ZEROMV) && (mbmi->ref_frame == LAST_FRAME))
+ cpi->inter_zz_count++;
+ }
+
+#if CONFIG_SUPERBLOCKS
+ if (xd->mode_info_context->mbmi.encoded_as_sb) {
+ x->src.y_buffer += 32;
+ x->src.u_buffer += 16;
+ x->src.v_buffer += 16;
+
+ x->gf_active_ptr += 2;
+ x->partition_info += 2;
+ xd->mode_info_context += 2;
+ xd->prev_mode_info_context += 2;
+
+ (*tp)->Token = EOSB_TOKEN;
+ (*tp)++;
+ if (mb_row < cm->mb_rows) cpi->tplist[mb_row].stop = *tp;
+ break;
+ }
+#endif
+
+ // Next MB
+ mb_row += dy;
+ mb_col += dx;
+
+ x->src.y_buffer += 16 * (dx + dy * x->src.y_stride);
+ x->src.u_buffer += 8 * (dx + dy * x->src.uv_stride);
+ x->src.v_buffer += 8 * (dx + dy * x->src.uv_stride);
+
+ x->gf_active_ptr += offset_unextended;
+ x->partition_info += offset_extended;
+ xd->mode_info_context += offset_extended;
+ xd->prev_mode_info_context += offset_extended;
+
+#if CONFIG_DEBUG
+ assert((xd->prev_mode_info_context - cpi->common.prev_mip) ==
+ (xd->mode_info_context - cpi->common.mip));
+#endif
+ (*tp)->Token = EOSB_TOKEN;
+ (*tp)++;
+ if (mb_row < cm->mb_rows) cpi->tplist[mb_row].stop = *tp;
+ }
+
+ // debug output
+#if DBG_PRNT_SEGMAP
+ {
+ FILE *statsfile;
+ statsfile = fopen("segmap2.stt", "a");
+ fprintf(statsfile, "\n");
+ fclose(statsfile);
+ }
+#endif
+}
+
+static
+void encode_sb_row(VP9_COMP *cpi,
+ VP9_COMMON *cm,
+ int mb_row,
+ MACROBLOCK *x,
+ MACROBLOCKD *xd,
+ TOKENEXTRA **tp,
+ int *totalrate) {
+ int mb_col;
+ int mb_cols = cm->mb_cols;
+
+ // Initialize the left context for the new SB row
+ vpx_memset(cm->left_context, 0, sizeof(cm->left_context));
+
+ // Code each SB in the row
+ for (mb_col = 0; mb_col < mb_cols; mb_col += 2) {
+ int mb_rate = 0, mb_dist = 0;
+#if CONFIG_SUPERBLOCKS
+ int sb_rate = INT_MAX, sb_dist;
+#endif
+
+#if CONFIG_DEBUG
+ MODE_INFO *mic = xd->mode_info_context;
+ PARTITION_INFO *pi = x->partition_info;
+ signed char *gfa = x->gf_active_ptr;
+ unsigned char *yb = x->src.y_buffer;
+ unsigned char *ub = x->src.u_buffer;
+ unsigned char *vb = x->src.v_buffer;
+#endif
+
+#if CONFIG_SUPERBLOCKS
+ // Pick modes assuming the SB is coded as 4 independent MBs
+ xd->mode_info_context->mbmi.encoded_as_sb = 0;
+#endif
+ pick_mb_modes(cpi, cm, mb_row, mb_col, x, xd, tp, &mb_rate, &mb_dist);
+#if CONFIG_SUPERBLOCKS
+ mb_rate += vp9_cost_bit(cm->sb_coded, 0);
+#endif
+
+ x->src.y_buffer -= 32;
+ x->src.u_buffer -= 16;
+ x->src.v_buffer -= 16;
+
+ x->gf_active_ptr -= 2;
+ x->partition_info -= 2;
+ xd->mode_info_context -= 2;
+ xd->prev_mode_info_context -= 2;
+
+#if CONFIG_DEBUG
+ assert(x->gf_active_ptr == gfa);
+ assert(x->partition_info == pi);
+ assert(xd->mode_info_context == mic);
+ assert(x->src.y_buffer == yb);
+ assert(x->src.u_buffer == ub);
+ assert(x->src.v_buffer == vb);
+#endif
+
+#if CONFIG_SUPERBLOCKS
+ if (!((( mb_cols & 1) && mb_col == mb_cols - 1) ||
+ ((cm->mb_rows & 1) && mb_row == cm->mb_rows - 1))) {
+ /* Pick a mode assuming that it applies to all 4 of the MBs in the SB */
+ xd->mode_info_context->mbmi.encoded_as_sb = 1;
+ pick_sb_modes(cpi, cm, mb_row, mb_col, x, xd, tp, &sb_rate, &sb_dist);
+ sb_rate += vp9_cost_bit(cm->sb_coded, 1);
+ }
+
+ /* Decide whether to encode as a SB or 4xMBs */
+ if (sb_rate < INT_MAX &&
+ RDCOST(x->rdmult, x->rddiv, sb_rate, sb_dist) <
+ RDCOST(x->rdmult, x->rddiv, mb_rate, mb_dist)) {
+ xd->mode_info_context->mbmi.encoded_as_sb = 1;
+ xd->mode_info_context[1].mbmi.encoded_as_sb = 1;
+ xd->mode_info_context[cm->mode_info_stride].mbmi.encoded_as_sb = 1;
+ xd->mode_info_context[1 + cm->mode_info_stride].mbmi.encoded_as_sb = 1;
+ *totalrate += sb_rate;
+ } else
+#endif
+ {
+#if CONFIG_SUPERBLOCKS
+ xd->mode_info_context->mbmi.encoded_as_sb = 0;
+ if (cm->mb_cols - 1 > mb_col)
+ xd->mode_info_context[1].mbmi.encoded_as_sb = 0;
+ if (cm->mb_rows - 1 > mb_row) {
+ xd->mode_info_context[cm->mode_info_stride].mbmi.encoded_as_sb = 0;
+ if (cm->mb_cols - 1 > mb_col)
+ xd->mode_info_context[1 + cm->mode_info_stride].mbmi.encoded_as_sb = 0;
+ }
+#endif
+ *totalrate += mb_rate;
+ }
+
+ /* Encode SB using best computed mode(s) */
+ encode_sb(cpi, cm, mb_row, mb_col, x, xd, tp);
+
+#if CONFIG_DEBUG
+ assert(x->gf_active_ptr == gfa + 2);
+ assert(x->partition_info == pi + 2);
+ assert(xd->mode_info_context == mic + 2);
+ assert(x->src.y_buffer == yb + 32);
+ assert(x->src.u_buffer == ub + 16);
+ assert(x->src.v_buffer == vb + 16);
+#endif
+ }
+
+ // this is to account for the border
+ x->gf_active_ptr += mb_cols - (mb_cols & 0x1);
+ x->partition_info += xd->mode_info_stride + 1 - (mb_cols & 0x1);
+ xd->mode_info_context += xd->mode_info_stride + 1 - (mb_cols & 0x1);
+ xd->prev_mode_info_context += xd->mode_info_stride + 1 - (mb_cols & 0x1);
+
+#if CONFIG_DEBUG
+ assert((xd->prev_mode_info_context - cpi->common.prev_mip) ==
+ (xd->mode_info_context - cpi->common.mip));
+#endif
+}
+
+static void init_encode_frame_mb_context(VP9_COMP *cpi) {
+ MACROBLOCK *const x = &cpi->mb;
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ // GF active flags data structure
+ x->gf_active_ptr = (signed char *)cpi->gf_active_flags;
+
+ // Activity map pointer
+ x->mb_activity_ptr = cpi->mb_activity_map;
+
+ x->act_zbin_adj = 0;
+ cpi->seg0_idx = 0;
+ vpx_memset(cpi->ref_pred_count, 0, sizeof(cpi->ref_pred_count));
+
+ x->partition_info = x->pi;
+
+ xd->mode_info_context = cm->mi;
+ xd->mode_info_stride = cm->mode_info_stride;
+ xd->prev_mode_info_context = cm->prev_mi;
+
+ xd->frame_type = cm->frame_type;
+
+ xd->frames_since_golden = cm->frames_since_golden;
+ xd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame;
+
+ // reset intra mode contexts
+ if (cm->frame_type == KEY_FRAME)
+ vp9_init_mbmode_probs(cm);
+
+ // Copy data over into macro block data structures.
+ x->src = * cpi->Source;
+ xd->pre = cm->yv12_fb[cm->lst_fb_idx];
+ xd->dst = cm->yv12_fb[cm->new_fb_idx];
+
+ // set up frame for intra coded blocks
+ vp9_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]);
+
+ vp9_build_block_offsets(x);
+
+ vp9_setup_block_dptrs(&x->e_mbd);
+
+ vp9_setup_block_ptrs(x);
+
+ xd->mode_info_context->mbmi.mode = DC_PRED;
+ xd->mode_info_context->mbmi.uv_mode = DC_PRED;
+
+ vp9_zero(cpi->count_mb_ref_frame_usage)
+ vp9_zero(cpi->bmode_count)
+ vp9_zero(cpi->ymode_count)
+ vp9_zero(cpi->i8x8_mode_count)
+ vp9_zero(cpi->y_uv_mode_count)
+ vp9_zero(cpi->sub_mv_ref_count)
+ vp9_zero(cpi->mbsplit_count)
+ vp9_zero(cpi->common.fc.mv_ref_ct)
+#if CONFIG_SUPERBLOCKS
+ vp9_zero(cpi->sb_ymode_count)
+ cpi->sb_count = 0;
+#endif
+#if CONFIG_COMP_INTERINTRA_PRED
+ vp9_zero(cpi->interintra_count);
+ vp9_zero(cpi->interintra_select_count);
+#endif
+
+ vpx_memset(cm->above_context, 0,
+ sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
+
+ xd->fullpixel_mask = 0xffffffff;
+ if (cm->full_pixel)
+ xd->fullpixel_mask = 0xfffffff8;
+}
+
+static void encode_frame_internal(VP9_COMP *cpi) {
+ int mb_row;
+ MACROBLOCK *const x = &cpi->mb;
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ TOKENEXTRA *tp = cpi->tok;
+ int totalrate;
+
+ // printf("encode_frame_internal frame %d (%d)\n",
+ // cpi->common.current_video_frame, cpi->common.show_frame);
+
+ // Compute a modified set of reference frame probabilities to use when
+ // prediction fails. These are based on the current general estimates for
+ // this frame which may be updated with each iteration of the recode loop.
+ vp9_compute_mod_refprobs(cm);
+
+#if CONFIG_NEW_MVREF
+ // temp stats reset
+ vp9_zero( cpi->best_ref_index_counts );
+#endif
+
+// debug output
+#if DBG_PRNT_SEGMAP
+ {
+ FILE *statsfile;
+ statsfile = fopen("segmap2.stt", "a");
+ fprintf(statsfile, "\n");
+ fclose(statsfile);
+ }
+#endif
+
+ totalrate = 0;
+
+ // Functions setup for all frame types so we can use MC in AltRef
+ vp9_setup_interp_filters(xd, cm->mcomp_filter_type, cm);
+
+ // Reset frame count of inter 0,0 motion vector usage.
+ cpi->inter_zz_count = 0;
+
+ cpi->prediction_error = 0;
+ cpi->intra_error = 0;
+ cpi->skip_true_count[0] = cpi->skip_true_count[1] = cpi->skip_true_count[2] = 0;
+ cpi->skip_false_count[0] = cpi->skip_false_count[1] = cpi->skip_false_count[2] = 0;
+
+#if CONFIG_PRED_FILTER
+ if (cm->current_video_frame == 0) {
+ // Initially assume that we'll signal the prediction filter
+ // state at the frame level and that it is off.
+ cpi->common.pred_filter_mode = 0;
+ cpi->common.prob_pred_filter_off = 128;
+ }
+ cpi->pred_filter_on_count = 0;
+ cpi->pred_filter_off_count = 0;
+#endif
+ vp9_zero(cpi->switchable_interp_count);
+
+ xd->mode_info_context = cm->mi;
+ xd->prev_mode_info_context = cm->prev_mi;
+
+ vp9_zero(cpi->NMVcount);
+ vp9_zero(cpi->coef_counts);
+ vp9_zero(cpi->hybrid_coef_counts);
+ vp9_zero(cpi->coef_counts_8x8);
+ vp9_zero(cpi->hybrid_coef_counts_8x8);
+ vp9_zero(cpi->coef_counts_16x16);
+ vp9_zero(cpi->hybrid_coef_counts_16x16);
+
+ vp9_frame_init_quantizer(cpi);
+
+ vp9_initialize_rd_consts(cpi, cm->base_qindex + cm->y1dc_delta_q);
+ vp9_initialize_me_consts(cpi, cm->base_qindex);
+
+ if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
+ // Initialize encode frame context.
+ init_encode_frame_mb_context(cpi);
+
+ // Build a frame level activity map
+ build_activity_map(cpi);
+ }
+
+ // re-initencode frame context.
+ init_encode_frame_mb_context(cpi);
+
+ vpx_memset(cpi->rd_comp_pred_diff, 0, sizeof(cpi->rd_comp_pred_diff));
+ vpx_memset(cpi->single_pred_count, 0, sizeof(cpi->single_pred_count));
+ vpx_memset(cpi->comp_pred_count, 0, sizeof(cpi->comp_pred_count));
+ vpx_memset(cpi->txfm_count, 0, sizeof(cpi->txfm_count));
+ vpx_memset(cpi->txfm_count_8x8p, 0, sizeof(cpi->txfm_count_8x8p));
+ vpx_memset(cpi->rd_tx_select_diff, 0, sizeof(cpi->rd_tx_select_diff));
+ {
+ struct vpx_usec_timer emr_timer;
+ vpx_usec_timer_start(&emr_timer);
+
+ {
+ // For each row of SBs in the frame
+ for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 2) {
+ int offset = (cm->mb_cols + 1) & ~0x1;
+
+ encode_sb_row(cpi, cm, mb_row, x, xd, &tp, &totalrate);
+
+ // adjust to the next row of SBs
+ x->src.y_buffer += 32 * x->src.y_stride - 16 * offset;
+ x->src.u_buffer += 16 * x->src.uv_stride - 8 * offset;
+ x->src.v_buffer += 16 * x->src.uv_stride - 8 * offset;
+ }
+
+ cpi->tok_count = (unsigned int)(tp - cpi->tok);
+ }
+
+ vpx_usec_timer_mark(&emr_timer);
+ cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer);
+
+ }
+
+ // 256 rate units to the bit,
+ // projected_frame_size in units of BYTES
+ cpi->projected_frame_size = totalrate >> 8;
+
+
+#if 0
+ // Keep record of the total distortion this time around for future use
+ cpi->last_frame_distortion = cpi->frame_distortion;
+#endif
+
+}
+
+static int check_dual_ref_flags(VP9_COMP *cpi) {
+ MACROBLOCKD *xd = &cpi->mb.e_mbd;
+ int ref_flags = cpi->ref_frame_flags;
+
+ if (vp9_segfeature_active(xd, 1, SEG_LVL_REF_FRAME)) {
+ if ((ref_flags & (VP9_LAST_FLAG | VP9_GOLD_FLAG)) == (VP9_LAST_FLAG | VP9_GOLD_FLAG) &&
+ vp9_check_segref(xd, 1, LAST_FRAME))
+ return 1;
+ if ((ref_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) == (VP9_GOLD_FLAG | VP9_ALT_FLAG) &&
+ vp9_check_segref(xd, 1, GOLDEN_FRAME))
+ return 1;
+ if ((ref_flags & (VP9_ALT_FLAG | VP9_LAST_FLAG)) == (VP9_ALT_FLAG | VP9_LAST_FLAG) &&
+ vp9_check_segref(xd, 1, ALTREF_FRAME))
+ return 1;
+ return 0;
+ } else {
+ return (!!(ref_flags & VP9_GOLD_FLAG) +
+ !!(ref_flags & VP9_LAST_FLAG) +
+ !!(ref_flags & VP9_ALT_FLAG)) >= 2;
+ }
+}
+
+static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) {
+ VP9_COMMON *cm = &cpi->common;
+ int mb_row, mb_col, mis = cm->mode_info_stride, segment_id;
+ MODE_INFO *mi, *mi_ptr = cm->mi;
+#if CONFIG_SUPERBLOCKS
+ int skip;
+ MODE_INFO *sb_mi_ptr = cm->mi, *sb_mi;
+ MB_MODE_INFO *sb_mbmi;
+#endif
+ MB_MODE_INFO *mbmi;
+ MACROBLOCK *x = &cpi->mb;
+ MACROBLOCKD *xd = &x->e_mbd;
+
+ for (mb_row = 0; mb_row < cm->mb_rows; mb_row++, mi_ptr += mis) {
+ mi = mi_ptr;
+#if CONFIG_SUPERBLOCKS
+ sb_mi = sb_mi_ptr;
+#endif
+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++, mi++) {
+ mbmi = &mi->mbmi;
+#if CONFIG_SUPERBLOCKS
+ sb_mbmi = &sb_mi->mbmi;
+#endif
+ if (mbmi->txfm_size > txfm_max) {
+#if CONFIG_SUPERBLOCKS
+ if (sb_mbmi->encoded_as_sb) {
+ if (!((mb_col & 1) || (mb_row & 1))) {
+ segment_id = mbmi->segment_id;
+ skip = mbmi->mb_skip_coeff;
+ if (mb_col < cm->mb_cols - 1) {
+ segment_id = segment_id && mi[1].mbmi.segment_id;
+ skip = skip && mi[1].mbmi.mb_skip_coeff;
+ }
+ if (mb_row < cm->mb_rows - 1) {
+ segment_id = segment_id &&
+ mi[cm->mode_info_stride].mbmi.segment_id;
+ skip = skip && mi[cm->mode_info_stride].mbmi.mb_skip_coeff;
+ if (mb_col < cm->mb_cols - 1) {
+ segment_id = segment_id &&
+ mi[cm->mode_info_stride + 1].mbmi.segment_id;
+ skip = skip && mi[cm->mode_info_stride + 1].mbmi.mb_skip_coeff;
+ }
+ }
+ xd->mode_info_context = mi;
+ assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
+ vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) ||
+ (cm->mb_no_coeff_skip && skip));
+ mbmi->txfm_size = txfm_max;
+ } else {
+ mbmi->txfm_size = sb_mbmi->txfm_size;
+ }
+ } else {
+#endif
+ segment_id = mbmi->segment_id;
+ xd->mode_info_context = mi;
+ assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
+ vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) ||
+ (cm->mb_no_coeff_skip && mbmi->mb_skip_coeff));
+ mbmi->txfm_size = txfm_max;
+#if CONFIG_SUPERBLOCKS
+ }
+#endif
+ }
+#if CONFIG_SUPERBLOCKS
+ if (mb_col & 1)
+ sb_mi += 2;
+#endif
+ }
+#if CONFIG_SUPERBLOCKS
+ if (mb_row & 1)
+ sb_mi_ptr += 2 * mis;
+#endif
+ }
+}
+
+void vp9_encode_frame(VP9_COMP *cpi) {
+ if (cpi->sf.RD) {
+ int i, frame_type, pred_type;
+ TXFM_MODE txfm_type;
+
+ /*
+ * This code does a single RD pass over the whole frame assuming
+ * either compound, single or hybrid prediction as per whatever has
+ * worked best for that type of frame in the past.
+ * It also predicts whether another coding mode would have worked
+ * better that this coding mode. If that is the case, it remembers
+ * that for subsequent frames.
+ * It does the same analysis for transform size selection also.
+ */
+ if (cpi->common.frame_type == KEY_FRAME)
+ frame_type = 0;
+ else if (cpi->is_src_frame_alt_ref && cpi->common.refresh_golden_frame)
+ frame_type = 3;
+ else if (cpi->common.refresh_golden_frame || cpi->common.refresh_alt_ref_frame)
+ frame_type = 1;
+ else
+ frame_type = 2;
+
+ /* prediction (compound, single or hybrid) mode selection */
+ if (frame_type == 3)
+ pred_type = SINGLE_PREDICTION_ONLY;
+ else if (cpi->rd_prediction_type_threshes[frame_type][1] >
+ cpi->rd_prediction_type_threshes[frame_type][0] &&
+ cpi->rd_prediction_type_threshes[frame_type][1] >
+ cpi->rd_prediction_type_threshes[frame_type][2] &&
+ check_dual_ref_flags(cpi) && cpi->static_mb_pct == 100)
+ pred_type = COMP_PREDICTION_ONLY;
+ else if (cpi->rd_prediction_type_threshes[frame_type][0] >
+ cpi->rd_prediction_type_threshes[frame_type][2])
+ pred_type = SINGLE_PREDICTION_ONLY;
+ else
+ pred_type = HYBRID_PREDICTION;
+
+ /* transform size (4x4, 8x8, 16x16 or select-per-mb) selection */
+#if CONFIG_LOSSLESS
+ if (cpi->oxcf.lossless) {
+ txfm_type = ONLY_4X4;
+ } else
+#endif
+ /* FIXME (rbultje)
+ * this is a hack (no really), basically to work around the complete
+ * nonsense coefficient cost prediction for keyframes. The probabilities
+ * are reset to defaults, and thus we basically have no idea how expensive
+ * a 4x4 vs. 8x8 will really be. The result is that any estimate at which
+ * of the two is better is utterly bogus.
+ * I'd like to eventually remove this hack, but in order to do that, we
+ * need to move the frame reset code from the frame encode init to the
+ * bitstream write code, or alternatively keep a backup of the previous
+ * keyframe's probabilities as an estimate of what the current keyframe's
+ * coefficient cost distributions may look like. */
+ if (frame_type == 0) {
+ txfm_type = ALLOW_16X16;
+ } else
+#if 0
+ /* FIXME (rbultje)
+ * this code is disabled for a similar reason as the code above; the
+ * problem is that each time we "revert" to 4x4 only (or even 8x8 only),
+ * the coefficient probabilities for 16x16 (and 8x8) start lagging behind,
+ * thus leading to them lagging further behind and not being chosen for
+ * subsequent frames either. This is essentially a local minimum problem
+ * that we can probably fix by estimating real costs more closely within
+ * a frame, perhaps by re-calculating costs on-the-fly as frame encoding
+ * progresses. */
+ if (cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] >
+ cpi->rd_tx_select_threshes[frame_type][ONLY_4X4] &&
+ cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] >
+ cpi->rd_tx_select_threshes[frame_type][ALLOW_16X16] &&
+ cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] >
+ cpi->rd_tx_select_threshes[frame_type][ALLOW_8X8]) {
+ txfm_type = TX_MODE_SELECT;
+ } else if (cpi->rd_tx_select_threshes[frame_type][ONLY_4X4] >
+ cpi->rd_tx_select_threshes[frame_type][ALLOW_8X8]
+ && cpi->rd_tx_select_threshes[frame_type][ONLY_4X4] >
+ cpi->rd_tx_select_threshes[frame_type][ALLOW_16X16]
+ ) {
+ txfm_type = ONLY_4X4;
+ } else if (cpi->rd_tx_select_threshes[frame_type][ALLOW_16X16] >=
+ cpi->rd_tx_select_threshes[frame_type][ALLOW_8X8]) {
+ txfm_type = ALLOW_16X16;
+ } else
+ txfm_type = ALLOW_8X8;
+#else
+ txfm_type = cpi->rd_tx_select_threshes[frame_type][ALLOW_16X16] >=
+ cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ?
+ ALLOW_16X16 : TX_MODE_SELECT;
+#endif
+ cpi->common.txfm_mode = txfm_type;
+ if (txfm_type != TX_MODE_SELECT) {
+ cpi->common.prob_tx[0] = 128;
+ cpi->common.prob_tx[1] = 128;
+ }
+ cpi->common.comp_pred_mode = pred_type;
+ encode_frame_internal(cpi);
+
+ for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
+ const int diff = (int)(cpi->rd_comp_pred_diff[i] / cpi->common.MBs);
+ cpi->rd_prediction_type_threshes[frame_type][i] += diff;
+ cpi->rd_prediction_type_threshes[frame_type][i] >>= 1;
+ }
+
+ for (i = 0; i < NB_TXFM_MODES; ++i) {
+ int64_t pd = cpi->rd_tx_select_diff[i];
+ int diff;
+ if (i == TX_MODE_SELECT)
+ pd -= RDCOST(cpi->mb.rdmult, cpi->mb.rddiv, 2048 * (TX_SIZE_MAX - 1), 0);
+ diff = (int)(pd / cpi->common.MBs);
+ cpi->rd_tx_select_threshes[frame_type][i] += diff;
+ cpi->rd_tx_select_threshes[frame_type][i] /= 2;
+ }
+
+ if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
+ int single_count_zero = 0;
+ int comp_count_zero = 0;
+
+ for (i = 0; i < COMP_PRED_CONTEXTS; i++) {
+ single_count_zero += cpi->single_pred_count[i];
+ comp_count_zero += cpi->comp_pred_count[i];
+ }
+
+ if (comp_count_zero == 0) {
+ cpi->common.comp_pred_mode = SINGLE_PREDICTION_ONLY;
+ } else if (single_count_zero == 0) {
+ cpi->common.comp_pred_mode = COMP_PREDICTION_ONLY;
+ }
+ }
+
+ if (cpi->common.txfm_mode == TX_MODE_SELECT) {
+ const int count4x4 = cpi->txfm_count[TX_4X4] + cpi->txfm_count_8x8p[TX_4X4];
+ const int count8x8 = cpi->txfm_count[TX_8X8];
+ const int count8x8_8x8p = cpi->txfm_count_8x8p[TX_8X8];
+ const int count16x16 = cpi->txfm_count[TX_16X16];
+
+ if (count4x4 == 0 && count16x16 == 0) {
+ cpi->common.txfm_mode = ALLOW_8X8;
+ reset_skip_txfm_size(cpi, TX_8X8);
+ } else if (count8x8 == 0 && count16x16 == 0 && count8x8_8x8p == 0) {
+ cpi->common.txfm_mode = ONLY_4X4;
+ reset_skip_txfm_size(cpi, TX_4X4);
+ } else if (count8x8 == 0 && count4x4 == 0) {
+ cpi->common.txfm_mode = ALLOW_16X16;
+ }
+ }
+ } else {
+ encode_frame_internal(cpi);
+ }
+
+}
+
+void vp9_setup_block_ptrs(MACROBLOCK *x) {
+ int r, c;
+ int i;
+
+ for (r = 0; r < 4; r++) {
+ for (c = 0; c < 4; c++) {
+ x->block[r * 4 + c].src_diff = x->src_diff + r * 4 * 16 + c * 4;
+ }
+ }
+
+ for (r = 0; r < 2; r++) {
+ for (c = 0; c < 2; c++) {
+ x->block[16 + r * 2 + c].src_diff = x->src_diff + 256 + r * 4 * 8 + c * 4;
+ }
+ }
+
+
+ for (r = 0; r < 2; r++) {
+ for (c = 0; c < 2; c++) {
+ x->block[20 + r * 2 + c].src_diff = x->src_diff + 320 + r * 4 * 8 + c * 4;
+ }
+ }
+
+ x->block[24].src_diff = x->src_diff + 384;
+
+
+ for (i = 0; i < 25; i++) {
+ x->block[i].coeff = x->coeff + i * 16;
+ }
+}
+
+void vp9_build_block_offsets(MACROBLOCK *x) {
+ int block = 0;
+ int br, bc;
+
+ vp9_build_block_doffsets(&x->e_mbd);
+
+#if !CONFIG_SUPERBLOCKS
+ // y blocks
+ x->thismb_ptr = &x->thismb[0];
+ for (br = 0; br < 4; br++) {
+ for (bc = 0; bc < 4; bc++) {
+ BLOCK *this_block = &x->block[block];
+ // this_block->base_src = &x->src.y_buffer;
+ // this_block->src_stride = x->src.y_stride;
+ // this_block->src = 4 * br * this_block->src_stride + 4 * bc;
+ this_block->base_src = &x->thismb_ptr;
+ this_block->src_stride = 16;
+ this_block->src = 4 * br * 16 + 4 * bc;
+ ++block;
+ }
+ }
+#else
+ for (br = 0; br < 4; br++) {
+ for (bc = 0; bc < 4; bc++) {
+ BLOCK *this_block = &x->block[block];
+ // this_block->base_src = &x->src.y_buffer;
+ // this_block->src_stride = x->src.y_stride;
+ // this_block->src = 4 * br * this_block->src_stride + 4 * bc;
+ this_block->base_src = &x->src.y_buffer;
+ this_block->src_stride = x->src.y_stride;
+ this_block->src = 4 * br * this_block->src_stride + 4 * bc;
+ ++block;
+ }
+ }
+#endif
+
+ // u blocks
+ for (br = 0; br < 2; br++) {
+ for (bc = 0; bc < 2; bc++) {
+ BLOCK *this_block = &x->block[block];
+ this_block->base_src = &x->src.u_buffer;
+ this_block->src_stride = x->src.uv_stride;
+ this_block->src = 4 * br * this_block->src_stride + 4 * bc;
+ ++block;
+ }
+ }
+
+ // v blocks
+ for (br = 0; br < 2; br++) {
+ for (bc = 0; bc < 2; bc++) {
+ BLOCK *this_block = &x->block[block];
+ this_block->base_src = &x->src.v_buffer;
+ this_block->src_stride = x->src.uv_stride;
+ this_block->src = 4 * br * this_block->src_stride + 4 * bc;
+ ++block;
+ }
+ }
+}
+
+static void sum_intra_stats(VP9_COMP *cpi, MACROBLOCK *x) {
+ const MACROBLOCKD *xd = &x->e_mbd;
+ const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode;
+ const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode;
+
+#ifdef MODE_STATS
+ const int is_key = cpi->common.frame_type == KEY_FRAME;
+
+ ++ (is_key ? uv_modes : inter_uv_modes)[uvm];
+ ++ uv_modes_y[m][uvm];
+
+ if (m == B_PRED) {
+ unsigned int *const bct = is_key ? b_modes : inter_b_modes;
+
+ int b = 0;
+
+ do {
+ ++ bct[xd->block[b].bmi.as_mode.first];
+ } while (++b < 16);
+ }
+
+ if (m == I8X8_PRED) {
+ i8x8_modes[xd->block[0].bmi.as_mode.first]++;
+ i8x8_modes[xd->block[2].bmi.as_mode.first]++;
+ i8x8_modes[xd->block[8].bmi.as_mode.first]++;
+ i8x8_modes[xd->block[10].bmi.as_mode.first]++;
+ }
+#endif
+
+#if CONFIG_SUPERBLOCKS
+ if (xd->mode_info_context->mbmi.encoded_as_sb) {
+ ++cpi->sb_ymode_count[m];
+ } else
+#endif
+ ++cpi->ymode_count[m];
+ if (m != I8X8_PRED)
+ ++cpi->y_uv_mode_count[m][uvm];
+ else {
+ cpi->i8x8_mode_count[xd->block[0].bmi.as_mode.first]++;
+ cpi->i8x8_mode_count[xd->block[2].bmi.as_mode.first]++;
+ cpi->i8x8_mode_count[xd->block[8].bmi.as_mode.first]++;
+ cpi->i8x8_mode_count[xd->block[10].bmi.as_mode.first]++;
+ }
+ if (m == B_PRED) {
+ int b = 0;
+ do {
+ int m = xd->block[b].bmi.as_mode.first;
+#if CONFIG_NEWBINTRAMODES
+ if (m == B_CONTEXT_PRED) m -= CONTEXT_PRED_REPLACEMENTS;
+#endif
+ ++cpi->bmode_count[m];
+ } while (++b < 16);
+ }
+}
+
+// Experimental stub function to create a per MB zbin adjustment based on
+// some previously calculated measure of MB activity.
+static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x) {
+#if USE_ACT_INDEX
+ x->act_zbin_adj = *(x->mb_activity_ptr);
+#else
+ int64_t a;
+ int64_t b;
+ int64_t act = *(x->mb_activity_ptr);
+
+ // Apply the masking to the RD multiplier.
+ a = act + 4 * cpi->activity_avg;
+ b = 4 * act + cpi->activity_avg;
+
+ if (act > cpi->activity_avg)
+ x->act_zbin_adj = (int)(((int64_t)b + (a >> 1)) / a) - 1;
+ else
+ x->act_zbin_adj = 1 - (int)(((int64_t)a + (b >> 1)) / b);
+#endif
+}
+
+#if CONFIG_SUPERBLOCKS
+static void update_sb_skip_coeff_state(VP9_COMP *cpi,
+ MACROBLOCK *x,
+ ENTROPY_CONTEXT_PLANES ta[4],
+ ENTROPY_CONTEXT_PLANES tl[4],
+ TOKENEXTRA *t[4],
+ TOKENEXTRA **tp,
+ int skip[4])
+{
+ TOKENEXTRA tokens[4][16 * 24];
+ int n_tokens[4], n;
+
+ // if there were no skips, we don't need to do anything
+ if (!skip[0] && !skip[1] && !skip[2] && !skip[3])
+ return;
+
+ // if we don't do coeff skipping for this frame, we don't
+ // need to do anything here
+ if (!cpi->common.mb_no_coeff_skip)
+ return;
+
+ // if all 4 MBs skipped coeff coding, nothing to be done
+ if (skip[0] && skip[1] && skip[2] && skip[3])
+ return;
+
+ // so the situation now is that we want to skip coeffs
+ // for some MBs, but not all, and we didn't code EOB
+ // coefficients for them. However, the skip flag for this
+ // SB will be 0 overall, so we need to insert EOBs in the
+ // middle of the token tree. Do so here.
+ n_tokens[0] = t[1] - t[0];
+ n_tokens[1] = t[2] - t[1];
+ n_tokens[2] = t[3] - t[2];
+ n_tokens[3] = *tp - t[3];
+ if (n_tokens[0])
+ memcpy(tokens[0], t[0], n_tokens[0] * sizeof(*t[0]));
+ if (n_tokens[1])
+ memcpy(tokens[1], t[1], n_tokens[1] * sizeof(*t[0]));
+ if (n_tokens[2])
+ memcpy(tokens[2], t[2], n_tokens[2] * sizeof(*t[0]));
+ if (n_tokens[3])
+ memcpy(tokens[3], t[3], n_tokens[3] * sizeof(*t[0]));
+
+ // reset pointer, stuff EOBs where necessary
+ *tp = t[0];
+ for (n = 0; n < 4; n++) {
+ if (skip[n]) {
+ x->e_mbd.above_context = &ta[n];
+ x->e_mbd.left_context = &tl[n];
+ vp9_stuff_mb(cpi, &x->e_mbd, tp, 0);
+ } else {
+ if (n_tokens[n]) {
+ memcpy(*tp, tokens[n], sizeof(*t[0]) * n_tokens[n]);
+ }
+ (*tp) += n_tokens[n];
+ }
+ }
+}
+#endif /* CONFIG_SUPERBLOCKS */
+
+static void encode_macroblock(VP9_COMP *cpi, MACROBLOCK *x,
+ TOKENEXTRA **t, int recon_yoffset,
+ int recon_uvoffset, int output_enabled,
+ int mb_col, int mb_row) {
+ VP9_COMMON *cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi;
+ unsigned char ref_pred_flag;
+
+ x->skip = 0;
+#if CONFIG_SUPERBLOCKS
+ assert(!xd->mode_info_context->mbmi.encoded_as_sb);
+#endif
+
+#ifdef ENC_DEBUG
+ enc_debug = (cpi->common.current_video_frame == 46 &&
+ mb_row == 5 && mb_col == 2);
+ if (enc_debug)
+ printf("Encode MB %d %d output %d\n", mb_row, mb_col, output_enabled);
+#endif
+ if (cm->frame_type == KEY_FRAME) {
+ if (cpi->oxcf.tuning == VP8_TUNE_SSIM && output_enabled) {
+ // Adjust the zbin based on this MB rate.
+ adjust_act_zbin(cpi, x);
+ vp9_update_zbin_extra(cpi, x);
+ }
+ } else {
+ vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
+
+ if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
+ // Adjust the zbin based on this MB rate.
+ adjust_act_zbin(cpi, x);
+ }
+
+ // Experimental code. Special case for gf and arf zeromv modes.
+ // Increase zbin size to suppress noise
+ cpi->zbin_mode_boost = 0;
+ if (cpi->zbin_mode_boost_enabled) {
+ if (mbmi->ref_frame != INTRA_FRAME) {
+ if (mbmi->mode == ZEROMV) {
+ if (mbmi->ref_frame != LAST_FRAME)
+ cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
+ else
+ cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
+ } else if (mbmi->mode == SPLITMV)
+ cpi->zbin_mode_boost = 0;
+ else
+ cpi->zbin_mode_boost = MV_ZBIN_BOOST;
+ }
+ }
+
+ vp9_update_zbin_extra(cpi, x);
+
+ // SET VARIOUS PREDICTION FLAGS
+
+ // Did the chosen reference frame match its predicted value.
+ ref_pred_flag = ((mbmi->ref_frame == vp9_get_pred_ref(cm, xd)));
+ vp9_set_pred_flag(xd, PRED_REF, ref_pred_flag);
+ }
+
+ if (mbmi->ref_frame == INTRA_FRAME) {
+#ifdef ENC_DEBUG
+ if (enc_debug) {
+ printf("Mode %d skip %d tx_size %d\n", mbmi->mode, x->skip,
+ mbmi->txfm_size);
+ }
+#endif
+ if (mbmi->mode == B_PRED) {
+ vp9_encode_intra16x16mbuv(x);
+ vp9_encode_intra4x4mby(x);
+ } else if (mbmi->mode == I8X8_PRED) {
+ vp9_encode_intra8x8mby(x);
+ vp9_encode_intra8x8mbuv(x);
+ } else {
+ vp9_encode_intra16x16mbuv(x);
+ vp9_encode_intra16x16mby(x);
+ }
+
+ if (output_enabled)
+ sum_intra_stats(cpi, x);
+ } else {
+ int ref_fb_idx;
+#ifdef ENC_DEBUG
+ if (enc_debug)
+ printf("Mode %d skip %d tx_size %d ref %d ref2 %d mv %d %d interp %d\n",
+ mbmi->mode, x->skip, mbmi->txfm_size,
+ mbmi->ref_frame, mbmi->second_ref_frame,
+ mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
+ mbmi->interp_filter);
+#endif
+
+ assert(cm->frame_type != KEY_FRAME);
+
+ if (mbmi->ref_frame == LAST_FRAME)
+ ref_fb_idx = cpi->common.lst_fb_idx;
+ else if (mbmi->ref_frame == GOLDEN_FRAME)
+ ref_fb_idx = cpi->common.gld_fb_idx;
+ else
+ ref_fb_idx = cpi->common.alt_fb_idx;
+
+ xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
+ xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
+ xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
+
+ if (mbmi->second_ref_frame > 0) {
+ int second_ref_fb_idx;
+
+ if (mbmi->second_ref_frame == LAST_FRAME)
+ second_ref_fb_idx = cpi->common.lst_fb_idx;
+ else if (mbmi->second_ref_frame == GOLDEN_FRAME)
+ second_ref_fb_idx = cpi->common.gld_fb_idx;
+ else
+ second_ref_fb_idx = cpi->common.alt_fb_idx;
+
+ xd->second_pre.y_buffer = cpi->common.yv12_fb[second_ref_fb_idx].y_buffer +
+ recon_yoffset;
+ xd->second_pre.u_buffer = cpi->common.yv12_fb[second_ref_fb_idx].u_buffer +
+ recon_uvoffset;
+ xd->second_pre.v_buffer = cpi->common.yv12_fb[second_ref_fb_idx].v_buffer +
+ recon_uvoffset;
+ }
+
+ if (!x->skip) {
+ vp9_encode_inter16x16(x);
+
+ // Clear mb_skip_coeff if mb_no_coeff_skip is not set
+ if (!cpi->common.mb_no_coeff_skip)
+ mbmi->mb_skip_coeff = 0;
+
+ } else {
+ vp9_build_1st_inter16x16_predictors_mb(xd,
+ xd->dst.y_buffer,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.y_stride,
+ xd->dst.uv_stride);
+ if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
+ vp9_build_2nd_inter16x16_predictors_mb(xd,
+ xd->dst.y_buffer,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.y_stride,
+ xd->dst.uv_stride);
+ }
+#if CONFIG_COMP_INTERINTRA_PRED
+ else if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
+ vp9_build_interintra_16x16_predictors_mb(xd,
+ xd->dst.y_buffer,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.y_stride,
+ xd->dst.uv_stride);
+ }
+#endif
+ }
+ }
+
+ if (!x->skip) {
+#ifdef ENC_DEBUG
+ if (enc_debug) {
+ int i, j;
+ printf("\n");
+ printf("qcoeff\n");
+ for (i = 0; i < 400; i++) {
+ printf("%3d ", xd->qcoeff[i]);
+ if (i % 16 == 15) printf("\n");
+ }
+ printf("\n");
+ printf("predictor\n");
+ for (i = 0; i < 384; i++) {
+ printf("%3d ", xd->predictor[i]);
+ if (i % 16 == 15) printf("\n");
+ }
+ printf("\n");
+ printf("src_diff\n");
+ for (i = 0; i < 384; i++) {
+ printf("%3d ", x->src_diff[i]);
+ if (i % 16 == 15) printf("\n");
+ }
+ printf("\n");
+ printf("diff\n");
+ for (i = 0; i < 384; i++) {
+ printf("%3d ", xd->block[0].diff[i]);
+ if (i % 16 == 15) printf("\n");
+ }
+ printf("\n");
+ printf("final y\n");
+ for (i = 0; i < 16; i++) {
+ for (j = 0; j < 16; j++)
+ printf("%3d ", xd->dst.y_buffer[i * xd->dst.y_stride + j]);
+ printf("\n");
+ }
+ printf("\n");
+ printf("final u\n");
+ for (i = 0; i < 8; i++) {
+ for (j = 0; j < 8; j++)
+ printf("%3d ", xd->dst.u_buffer[i * xd->dst.uv_stride + j]);
+ printf("\n");
+ }
+ printf("\n");
+ printf("final v\n");
+ for (i = 0; i < 8; i++) {
+ for (j = 0; j < 8; j++)
+ printf("%3d ", xd->dst.v_buffer[i * xd->dst.uv_stride + j]);
+ printf("\n");
+ }
+ fflush(stdout);
+ }
+#endif
+
+ vp9_tokenize_mb(cpi, xd, t, !output_enabled);
+
+ } else {
+ int mb_skip_context =
+ cpi->common.mb_no_coeff_skip ?
+ (x->e_mbd.mode_info_context - 1)->mbmi.mb_skip_coeff +
+ (x->e_mbd.mode_info_context - cpi->common.mode_info_stride)->mbmi.mb_skip_coeff :
+ 0;
+ if (cpi->common.mb_no_coeff_skip) {
+ mbmi->mb_skip_coeff = 1;
+ if (output_enabled)
+ cpi->skip_true_count[mb_skip_context]++;
+ vp9_fix_contexts(xd);
+ } else {
+ vp9_stuff_mb(cpi, xd, t, !output_enabled);
+ mbmi->mb_skip_coeff = 0;
+ if (output_enabled)
+ cpi->skip_false_count[mb_skip_context]++;
+ }
+ }
+
+ if (output_enabled) {
+ int segment_id = mbmi->segment_id;
+ if (cpi->common.txfm_mode == TX_MODE_SELECT &&
+ !((cpi->common.mb_no_coeff_skip && mbmi->mb_skip_coeff) ||
+ (vp9_segfeature_active(&x->e_mbd, segment_id, SEG_LVL_EOB) &&
+ vp9_get_segdata(&x->e_mbd, segment_id, SEG_LVL_EOB) == 0))) {
+ if (mbmi->mode != B_PRED && mbmi->mode != I8X8_PRED &&
+ mbmi->mode != SPLITMV) {
+ cpi->txfm_count[mbmi->txfm_size]++;
+ } else if (mbmi->mode == I8X8_PRED ||
+ (mbmi->mode == SPLITMV &&
+ mbmi->partitioning != PARTITIONING_4X4)) {
+ cpi->txfm_count_8x8p[mbmi->txfm_size]++;
+ }
+ } else if (mbmi->mode != B_PRED && mbmi->mode != I8X8_PRED &&
+ mbmi->mode != SPLITMV && cpi->common.txfm_mode >= ALLOW_16X16) {
+ mbmi->txfm_size = TX_16X16;
+ } else if (mbmi->mode != B_PRED &&
+ !(mbmi->mode == SPLITMV &&
+ mbmi->partitioning == PARTITIONING_4X4) &&
+ cpi->common.txfm_mode >= ALLOW_8X8) {
+ mbmi->txfm_size = TX_8X8;
+ } else {
+ mbmi->txfm_size = TX_4X4;
+ }
+ }
+}
+
+#if CONFIG_SUPERBLOCKS
+static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x,
+ TOKENEXTRA **t, int recon_yoffset,
+ int recon_uvoffset, int mb_col, int mb_row) {
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const uint8_t *src = x->src.y_buffer;
+ uint8_t *dst = xd->dst.y_buffer;
+ const uint8_t *usrc = x->src.u_buffer;
+ uint8_t *udst = xd->dst.u_buffer;
+ const uint8_t *vsrc = x->src.v_buffer;
+ uint8_t *vdst = xd->dst.v_buffer;
+ int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
+ int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride;
+ int seg_ref_active;
+ unsigned char ref_pred_flag;
+ int n;
+ TOKENEXTRA *tp[4];
+ int skip[4];
+ MODE_INFO *mi = x->e_mbd.mode_info_context;
+ unsigned int segment_id = mi->mbmi.segment_id;
+ ENTROPY_CONTEXT_PLANES ta[4], tl[4];
+
+ x->skip = 0;
+
+ if (cm->frame_type == KEY_FRAME) {
+ if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
+ adjust_act_zbin(cpi, x);
+ vp9_update_zbin_extra(cpi, x);
+ }
+ } else {
+ vp9_setup_interp_filters(xd, xd->mode_info_context->mbmi.interp_filter, cm);
+
+ if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
+ // Adjust the zbin based on this MB rate.
+ adjust_act_zbin(cpi, x);
+ }
+
+ // Experimental code. Special case for gf and arf zeromv modes.
+ // Increase zbin size to suppress noise
+ cpi->zbin_mode_boost = 0;
+ if (cpi->zbin_mode_boost_enabled) {
+ if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME) {
+ if (xd->mode_info_context->mbmi.mode == ZEROMV) {
+ if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME)
+ cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
+ else
+ cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
+ } else if (xd->mode_info_context->mbmi.mode == SPLITMV)
+ cpi->zbin_mode_boost = 0;
+ else
+ cpi->zbin_mode_boost = MV_ZBIN_BOOST;
+ }
+ }
+
+ vp9_update_zbin_extra(cpi, x);
+
+ seg_ref_active = vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME);
+
+ // SET VARIOUS PREDICTION FLAGS
+
+ // Did the chosen reference frame match its predicted value.
+ ref_pred_flag = ((xd->mode_info_context->mbmi.ref_frame ==
+ vp9_get_pred_ref(cm, xd)));
+ vp9_set_pred_flag(xd, PRED_REF, ref_pred_flag);
+ }
+
+
+ if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
+ vp9_build_intra_predictors_sby_s(&x->e_mbd);
+ vp9_build_intra_predictors_sbuv_s(&x->e_mbd);
+ sum_intra_stats(cpi, x);
+ } else {
+ int ref_fb_idx;
+
+ assert(cm->frame_type != KEY_FRAME);
+
+ if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
+ ref_fb_idx = cpi->common.lst_fb_idx;
+ else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
+ ref_fb_idx = cpi->common.gld_fb_idx;
+ else
+ ref_fb_idx = cpi->common.alt_fb_idx;
+
+ xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
+ xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
+ xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
+
+ if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
+ int second_ref_fb_idx;
+
+ if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME)
+ second_ref_fb_idx = cpi->common.lst_fb_idx;
+ else if (xd->mode_info_context->mbmi.second_ref_frame == GOLDEN_FRAME)
+ second_ref_fb_idx = cpi->common.gld_fb_idx;
+ else
+ second_ref_fb_idx = cpi->common.alt_fb_idx;
+
+ xd->second_pre.y_buffer = cpi->common.yv12_fb[second_ref_fb_idx].y_buffer +
+ recon_yoffset;
+ xd->second_pre.u_buffer = cpi->common.yv12_fb[second_ref_fb_idx].u_buffer +
+ recon_uvoffset;
+ xd->second_pre.v_buffer = cpi->common.yv12_fb[second_ref_fb_idx].v_buffer +
+ recon_uvoffset;
+ }
+
+ vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer,
+ xd->dst.u_buffer, xd->dst.v_buffer,
+ xd->dst.y_stride, xd->dst.uv_stride);
+ }
+
+ for (n = 0; n < 4; n++) {
+ int x_idx = n & 1, y_idx = n >> 1;
+
+ xd->left_context = cm->left_context + y_idx;
+ xd->above_context = cm->above_context + mb_col + x_idx;
+ memcpy(&ta[n], xd->above_context, sizeof(ta[n]));
+ memcpy(&tl[n], xd->left_context, sizeof(tl[n]));
+ tp[n] = *t;
+ xd->mode_info_context = mi + x_idx + y_idx * cm->mode_info_stride;
+
+ vp9_subtract_mby_s_c(x->src_diff,
+ src + x_idx * 16 + y_idx * 16 * src_y_stride,
+ src_y_stride,
+ dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
+ dst_y_stride);
+ vp9_subtract_mbuv_s_c(x->src_diff,
+ usrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
+ vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
+ src_uv_stride,
+ udst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
+ vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
+ dst_uv_stride);
+ vp9_fidct_mb(x);
+ vp9_recon_mby_s_c(&x->e_mbd,
+ dst + x_idx * 16 + y_idx * 16 * dst_y_stride);
+ vp9_recon_mbuv_s_c(&x->e_mbd,
+ udst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
+ vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride);
+
+ if (!x->skip) {
+ vp9_tokenize_mb(cpi, &x->e_mbd, t, 0);
+ skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff;
+ } else {
+ int mb_skip_context =
+ cpi->common.mb_no_coeff_skip ?
+ (x->e_mbd.mode_info_context - 1)->mbmi.mb_skip_coeff +
+ (x->e_mbd.mode_info_context - cpi->common.mode_info_stride)->mbmi.mb_skip_coeff :
+ 0;
+ xd->mode_info_context->mbmi.mb_skip_coeff = skip[n] = 1;
+ if (cpi->common.mb_no_coeff_skip) {
+ // TODO(rbultje) this should be done per-sb instead of per-mb?
+ cpi->skip_true_count[mb_skip_context]++;
+ vp9_fix_contexts(xd);
+ } else {
+ vp9_stuff_mb(cpi, xd, t, 0);
+ // TODO(rbultje) this should be done per-sb instead of per-mb?
+ cpi->skip_false_count[mb_skip_context]++;
+ }
+ }
+ }
+
+ xd->mode_info_context = mi;
+ update_sb_skip_coeff_state(cpi, x, ta, tl, tp, t, skip);
+ if (cm->txfm_mode == TX_MODE_SELECT &&
+ !((cm->mb_no_coeff_skip && skip[0] && skip[1] && skip[2] && skip[3]) ||
+ (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
+ vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) {
+ cpi->txfm_count[mi->mbmi.txfm_size]++;
+ } else {
+ TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? TX_16X16 : cm->txfm_mode;
+ mi->mbmi.txfm_size = sz;
+ if (mb_col < cm->mb_cols - 1)
+ mi[1].mbmi.txfm_size = sz;
+ if (mb_row < cm->mb_rows - 1) {
+ mi[cm->mode_info_stride].mbmi.txfm_size = sz;
+ if (mb_col < cm->mb_cols - 1)
+ mi[cm->mode_info_stride + 1].mbmi.txfm_size = sz;
+ }
+ }
+}
+#endif
diff --git a/vp9/encoder/vp9_encodeframe.h b/vp9/encoder/vp9_encodeframe.h
new file mode 100644
index 0000000..8c1716f
--- /dev/null
+++ b/vp9/encoder/vp9_encodeframe.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_ENCODER_VP9_ENCODEFRAME_H_
+#define VP9_ENCODER_VP9_ENCODEFRAME_H_
+
+struct macroblock;
+
+extern void vp9_build_block_offsets(struct macroblock *x);
+
+extern void vp9_setup_block_ptrs(struct macroblock *x);
+
+#endif // __INC_ENCODEFRAME_H
diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c
new file mode 100644
index 0000000..9b10626
--- /dev/null
+++ b/vp9/encoder/vp9_encodeintra.c
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_config.h"
+#include "vp9_rtcd.h"
+#include "vp9/encoder/vp9_quantize.h"
+#include "vp9/common/vp9_reconintra.h"
+#include "vp9/common/vp9_reconintra4x4.h"
+#include "vp9/encoder/vp9_encodemb.h"
+#include "vp9/common/vp9_invtrans.h"
+#include "vp9/encoder/vp9_encodeintra.h"
+
+int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) {
+ int i;
+ int intra_pred_var = 0;
+ MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
+ (void) cpi;
+
+ if (use_16x16_pred) {
+ mbmi->mode = DC_PRED;
+#if CONFIG_COMP_INTRA_PRED
+ mbmi->second_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
+#endif
+ mbmi->uv_mode = DC_PRED;
+ mbmi->ref_frame = INTRA_FRAME;
+
+ vp9_encode_intra16x16mby(x);
+ } else {
+ for (i = 0; i < 16; i++) {
+ x->e_mbd.block[i].bmi.as_mode.first = B_DC_PRED;
+ vp9_encode_intra4x4block(x, i);
+ }
+ }
+
+ intra_pred_var = vp9_get_mb_ss(x->src_diff);
+
+ return intra_pred_var;
+}
+
+void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) {
+ BLOCKD *b = &x->e_mbd.block[ib];
+ BLOCK *be = &x->block[ib];
+ TX_TYPE tx_type;
+
+#if CONFIG_NEWBINTRAMODES
+ b->bmi.as_mode.context = vp9_find_bpred_context(b);
+#endif
+
+#if CONFIG_COMP_INTRA_PRED
+ if (b->bmi.as_mode.second == (B_PREDICTION_MODE)(B_DC_PRED - 1)) {
+#endif
+ vp9_intra4x4_predict(b, b->bmi.as_mode.first, b->predictor);
+#if CONFIG_COMP_INTRA_PRED
+ } else {
+ vp9_comp_intra4x4_predict(b, b->bmi.as_mode.first, b->bmi.as_mode.second,
+ b->predictor);
+ }
+#endif
+
+ vp9_subtract_b(be, b, 16);
+
+ tx_type = get_tx_type_4x4(&x->e_mbd, b);
+ if (tx_type != DCT_DCT) {
+ vp9_fht(be->src_diff, 32, be->coeff, tx_type, 4);
+ vp9_ht_quantize_b_4x4(be, b, tx_type);
+ vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob);
+ } else {
+ x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
+ x->quantize_b_4x4(be, b) ;
+ vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 32);
+ }
+
+ vp9_recon_b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+}
+
+void vp9_encode_intra4x4mby(MACROBLOCK *mb) {
+ int i;
+
+ for (i = 0; i < 16; i++)
+ vp9_encode_intra4x4block(mb, i);
+ return;
+}
+
+void vp9_encode_intra16x16mby(MACROBLOCK *x) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ BLOCK *b = &x->block[0];
+ TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
+
+#if CONFIG_COMP_INTRA_PRED
+ if (xd->mode_info_context->mbmi.second_mode == (MB_PREDICTION_MODE)(DC_PRED - 1))
+#endif
+ vp9_build_intra_predictors_mby(xd);
+#if CONFIG_COMP_INTRA_PRED
+ else
+ vp9_build_comp_intra_predictors_mby(xd);
+#endif
+
+ vp9_subtract_mby(x->src_diff, *(b->base_src), xd->predictor, b->src_stride);
+
+ if (tx_size == TX_16X16) {
+ vp9_transform_mby_16x16(x);
+ vp9_quantize_mby_16x16(x);
+ if (x->optimize)
+ vp9_optimize_mby_16x16(x);
+ vp9_inverse_transform_mby_16x16(xd);
+ } else if (tx_size == TX_8X8) {
+ vp9_transform_mby_8x8(x);
+ vp9_quantize_mby_8x8(x);
+ if (x->optimize)
+ vp9_optimize_mby_8x8(x);
+ vp9_inverse_transform_mby_8x8(xd);
+ } else {
+ vp9_transform_mby_4x4(x);
+ vp9_quantize_mby_4x4(x);
+ if (x->optimize)
+ vp9_optimize_mby_4x4(x);
+ vp9_inverse_transform_mby_4x4(xd);
+ }
+
+ vp9_recon_mby(xd);
+}
+
+void vp9_encode_intra16x16mbuv(MACROBLOCK *x) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
+
+#if CONFIG_COMP_INTRA_PRED
+ if (xd->mode_info_context->mbmi.second_uv_mode == (MB_PREDICTION_MODE)(DC_PRED - 1)) {
+#endif
+ vp9_build_intra_predictors_mbuv(xd);
+#if CONFIG_COMP_INTRA_PRED
+ } else {
+ vp9_build_comp_intra_predictors_mbuv(xd);
+ }
+#endif
+
+ vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
+ xd->predictor, x->src.uv_stride);
+
+ if (tx_size == TX_4X4) {
+ vp9_transform_mbuv_4x4(x);
+ vp9_quantize_mbuv_4x4(x);
+ if (x->optimize)
+ vp9_optimize_mbuv_4x4(x);
+ vp9_inverse_transform_mbuv_4x4(xd);
+ } else /* 16x16 or 8x8 */ {
+ vp9_transform_mbuv_8x8(x);
+ vp9_quantize_mbuv_8x8(x);
+ if (x->optimize)
+ vp9_optimize_mbuv_8x8(x);
+ vp9_inverse_transform_mbuv_8x8(xd);
+ }
+
+ vp9_recon_intra_mbuv(xd);
+}
+
+void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ BLOCKD *b = &xd->block[ib];
+ BLOCK *be = &x->block[ib];
+ const int iblock[4] = {0, 1, 4, 5};
+ int i;
+ TX_TYPE tx_type;
+
+#if CONFIG_COMP_INTRA_PRED
+ if (b->bmi.as_mode.second == (MB_PREDICTION_MODE)(DC_PRED - 1)) {
+#endif
+ vp9_intra8x8_predict(b, b->bmi.as_mode.first, b->predictor);
+#if CONFIG_COMP_INTRA_PRED
+ } else {
+ vp9_comp_intra8x8_predict(b, b->bmi.as_mode.first, b->bmi.as_mode.second,
+ b->predictor);
+ }
+#endif
+ // generate residual blocks
+ vp9_subtract_4b_c(be, b, 16);
+
+ if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
+ int idx = (ib & 0x02) ? (ib + 2) : ib;
+
+ tx_type = get_tx_type_8x8(xd, &xd->block[ib]);
+ if (tx_type != DCT_DCT) {
+ vp9_fht(be->src_diff, 32, (x->block + idx)->coeff,
+ tx_type, 8);
+ x->quantize_b_8x8(x->block + idx, xd->block + idx);
+ vp9_ihtllm(xd->block[idx].dqcoeff, xd->block[ib].diff, 32,
+ tx_type, 8, xd->block[idx].eob);
+ } else {
+ x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
+ x->quantize_b_8x8(x->block + idx, xd->block + idx);
+ vp9_short_idct8x8(xd->block[idx].dqcoeff, xd->block[ib].diff, 32);
+ }
+ } else {
+ for (i = 0; i < 4; i++) {
+ b = &xd->block[ib + iblock[i]];
+ be = &x->block[ib + iblock[i]];
+ tx_type = get_tx_type_4x4(xd, b);
+ if (tx_type != DCT_DCT) {
+ vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4);
+ vp9_ht_quantize_b_4x4(be, b, tx_type);
+ vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob);
+ } else {
+ x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
+ x->quantize_b_4x4(be, b);
+ vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32);
+ }
+ }
+ }
+
+ // reconstruct submacroblock
+ for (i = 0; i < 4; i++) {
+ b = &xd->block[ib + iblock[i]];
+ vp9_recon_b_c(b->predictor, b->diff, *(b->base_dst) + b->dst,
+ b->dst_stride);
+ }
+}
+
+void vp9_encode_intra8x8mby(MACROBLOCK *x) {
+ int i, ib;
+
+ for (i = 0; i < 4; i++) {
+ ib = vp9_i8x8_block[i];
+ vp9_encode_intra8x8(x, ib);
+ }
+}
+
+void vp9_encode_intra_uv4x4(MACROBLOCK *x, int ib,
+ int mode, int second) {
+ BLOCKD *b = &x->e_mbd.block[ib];
+ BLOCK *be = &x->block[ib];
+
+#if CONFIG_COMP_INTRA_PRED
+ if (second == -1) {
+#endif
+ vp9_intra_uv4x4_predict(b, mode, b->predictor);
+#if CONFIG_COMP_INTRA_PRED
+ } else {
+ vp9_comp_intra_uv4x4_predict(b, mode, second, b->predictor);
+ }
+#endif
+
+ vp9_subtract_b(be, b, 8);
+
+ x->vp9_short_fdct4x4(be->src_diff, be->coeff, 16);
+ x->quantize_b_4x4(be, b);
+ vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 16);
+
+ vp9_recon_uv_b_c(b->predictor, b->diff, *(b->base_dst) + b->dst,
+ b->dst_stride);
+}
+
+void vp9_encode_intra8x8mbuv(MACROBLOCK *x) {
+ int i, ib, mode, second;
+ BLOCKD *b;
+
+ for (i = 0; i < 4; i++) {
+ ib = vp9_i8x8_block[i];
+ b = &x->e_mbd.block[ib];
+ mode = b->bmi.as_mode.first;
+#if CONFIG_COMP_INTRA_PRED
+ second = b->bmi.as_mode.second;
+#else
+ second = -1;
+#endif
+ /*u */
+ vp9_encode_intra_uv4x4(x, i + 16, mode, second);
+ /*v */
+ vp9_encode_intra_uv4x4(x, i + 20, mode, second);
+ }
+}
diff --git a/vp9/encoder/vp9_encodeintra.h b/vp9/encoder/vp9_encodeintra.h
new file mode 100644
index 0000000..91c410d
--- /dev/null
+++ b/vp9/encoder/vp9_encodeintra.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_ENCODER_VP9_ENCODEINTRA_H_
+#define VP9_ENCODER_VP9_ENCODEINTRA_H_
+
+#include "vp9/encoder/vp9_onyx_int.h"
+
+int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred);
+void vp9_encode_intra16x16mby(MACROBLOCK *x);
+void vp9_encode_intra16x16mbuv(MACROBLOCK *x);
+void vp9_encode_intra4x4mby(MACROBLOCK *mb);
+void vp9_encode_intra4x4block(MACROBLOCK *x, int ib);
+void vp9_encode_intra8x8mby(MACROBLOCK *x);
+void vp9_encode_intra8x8mbuv(MACROBLOCK *x);
+void vp9_encode_intra8x8(MACROBLOCK *x, int ib);
+
+#endif // __ENCODEINTRA_H_
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
new file mode 100644
index 0000000..1deb128
--- /dev/null
+++ b/vp9/encoder/vp9_encodemb.c
@@ -0,0 +1,989 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_config.h"
+#include "vp9/encoder/vp9_encodemb.h"
+#include "vp9/common/vp9_reconinter.h"
+#include "vp9/encoder/vp9_quantize.h"
+#include "vp9/encoder/vp9_tokenize.h"
+#include "vp9/common/vp9_invtrans.h"
+#include "vp9/common/vp9_reconintra.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vp9/encoder/vp9_rdopt.h"
+#include "vp9/common/vp9_systemdependent.h"
+#include "vp9_rtcd.h"
+
+void vp9_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch) {
+ unsigned char *src_ptr = (*(be->base_src) + be->src);
+ short *diff_ptr = be->src_diff;
+ unsigned char *pred_ptr = bd->predictor;
+ int src_stride = be->src_stride;
+
+ int r, c;
+
+ for (r = 0; r < 4; r++) {
+ for (c = 0; c < 4; c++) {
+ diff_ptr[c] = src_ptr[c] - pred_ptr[c];
+ }
+
+ diff_ptr += pitch;
+ pred_ptr += pitch;
+ src_ptr += src_stride;
+ }
+}
+
+void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch) {
+ unsigned char *src_ptr = (*(be->base_src) + be->src);
+ short *diff_ptr = be->src_diff;
+ unsigned char *pred_ptr = bd->predictor;
+ int src_stride = be->src_stride;
+ int r, c;
+
+ for (r = 0; r < 8; r++) {
+ for (c = 0; c < 8; c++) {
+ diff_ptr[c] = src_ptr[c] - pred_ptr[c];
+ }
+ diff_ptr += pitch;
+ pred_ptr += pitch;
+ src_ptr += src_stride;
+ }
+}
+
+void vp9_subtract_mbuv_s_c(short *diff, const unsigned char *usrc,
+ const unsigned char *vsrc, int src_stride,
+ const unsigned char *upred,
+ const unsigned char *vpred, int dst_stride) {
+ short *udiff = diff + 256;
+ short *vdiff = diff + 320;
+ int r, c;
+
+ for (r = 0; r < 8; r++) {
+ for (c = 0; c < 8; c++) {
+ udiff[c] = usrc[c] - upred[c];
+ }
+
+ udiff += 8;
+ upred += dst_stride;
+ usrc += src_stride;
+ }
+
+ for (r = 0; r < 8; r++) {
+ for (c = 0; c < 8; c++) {
+ vdiff[c] = vsrc[c] - vpred[c];
+ }
+
+ vdiff += 8;
+ vpred += dst_stride;
+ vsrc += src_stride;
+ }
+}
+
+void vp9_subtract_mbuv_c(short *diff, unsigned char *usrc,
+ unsigned char *vsrc, unsigned char *pred, int stride) {
+ unsigned char *upred = pred + 256;
+ unsigned char *vpred = pred + 320;
+
+ vp9_subtract_mbuv_s_c(diff, usrc, vsrc, stride, upred, vpred, 8);
+}
+
+void vp9_subtract_mby_s_c(short *diff, const unsigned char *src, int src_stride,
+ const unsigned char *pred, int dst_stride) {
+ int r, c;
+
+ for (r = 0; r < 16; r++) {
+ for (c = 0; c < 16; c++) {
+ diff[c] = src[c] - pred[c];
+ }
+
+ diff += 16;
+ pred += dst_stride;
+ src += src_stride;
+ }
+}
+
+void vp9_subtract_mby_c(short *diff, unsigned char *src,
+ unsigned char *pred, int stride) {
+ vp9_subtract_mby_s_c(diff, src, stride, pred, 16);
+}
+
+static void subtract_mb(MACROBLOCK *x) {
+ BLOCK *b = &x->block[0];
+
+ vp9_subtract_mby(x->src_diff, *(b->base_src), x->e_mbd.predictor,
+ b->src_stride);
+ vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
+ x->e_mbd.predictor, x->src.uv_stride);
+}
+
+static void build_dcblock_4x4(MACROBLOCK *x) {
+ short *src_diff_ptr = &x->src_diff[384];
+ int i;
+
+ for (i = 0; i < 16; i++) {
+ src_diff_ptr[i] = x->coeff[i * 16];
+ x->coeff[i * 16] = 0;
+ }
+}
+
+void vp9_transform_mby_4x4(MACROBLOCK *x) {
+ int i;
+ MACROBLOCKD *xd = &x->e_mbd;
+ int has_2nd_order = get_2nd_order_usage(xd);
+
+ for (i = 0; i < 16; i++) {
+ BLOCK *b = &x->block[i];
+ TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]);
+ if (tx_type != DCT_DCT) {
+ assert(has_2nd_order == 0);
+ vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 4);
+ } else {
+ x->vp9_short_fdct4x4(&x->block[i].src_diff[0],
+ &x->block[i].coeff[0], 32);
+ }
+ }
+
+ if (has_2nd_order) {
+ // build dc block from 16 y dc values
+ build_dcblock_4x4(x);
+
+ // do 2nd order transform on the dc block
+ x->short_walsh4x4(&x->block[24].src_diff[0],
+ &x->block[24].coeff[0], 8);
+ } else {
+ vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0]));
+ }
+}
+
+void vp9_transform_mbuv_4x4(MACROBLOCK *x) {
+ int i;
+
+ for (i = 16; i < 24; i += 2) {
+ x->vp9_short_fdct8x4(&x->block[i].src_diff[0],
+ &x->block[i].coeff[0], 16);
+ }
+}
+
+static void transform_mb_4x4(MACROBLOCK *x) {
+ vp9_transform_mby_4x4(x);
+ vp9_transform_mbuv_4x4(x);
+}
+
+static void build_dcblock_8x8(MACROBLOCK *x) {
+ int16_t *src_diff_ptr = x->block[24].src_diff;
+ int i;
+
+ for (i = 0; i < 16; i++) {
+ src_diff_ptr[i] = 0;
+ }
+ src_diff_ptr[0] = x->coeff[0 * 16];
+ src_diff_ptr[1] = x->coeff[4 * 16];
+ src_diff_ptr[4] = x->coeff[8 * 16];
+ src_diff_ptr[8] = x->coeff[12 * 16];
+ x->coeff[0 * 16] = 0;
+ x->coeff[4 * 16] = 0;
+ x->coeff[8 * 16] = 0;
+ x->coeff[12 * 16] = 0;
+}
+
+void vp9_transform_mby_8x8(MACROBLOCK *x) {
+ int i;
+ MACROBLOCKD *xd = &x->e_mbd;
+ TX_TYPE tx_type;
+ int has_2nd_order = get_2nd_order_usage(xd);
+
+ for (i = 0; i < 9; i += 8) {
+ BLOCK *b = &x->block[i];
+ tx_type = get_tx_type_8x8(xd, &xd->block[i]);
+ if (tx_type != DCT_DCT) {
+ assert(has_2nd_order == 0);
+ vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 8);
+ } else {
+ x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
+ &x->block[i].coeff[0], 32);
+ }
+ }
+ for (i = 2; i < 11; i += 8) {
+ BLOCK *b = &x->block[i];
+ tx_type = get_tx_type_8x8(xd, &xd->block[i]);
+ if (tx_type != DCT_DCT) {
+ assert(has_2nd_order == 0);
+ vp9_fht_c(b->src_diff, 32, (b + 2)->coeff, tx_type, 8);
+ } else {
+ x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
+ &x->block[i + 2].coeff[0], 32);
+ }
+ }
+
+ if (has_2nd_order) {
+ // build dc block from 2x2 y dc values
+ build_dcblock_8x8(x);
+
+ // do 2nd order transform on the dc block
+ x->short_fhaar2x2(&x->block[24].src_diff[0],
+ &x->block[24].coeff[0], 8);
+ } else {
+ vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0]));
+ }
+}
+
+void vp9_transform_mbuv_8x8(MACROBLOCK *x) {
+ int i;
+
+ for (i = 16; i < 24; i += 4) {
+ x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
+ &x->block[i].coeff[0], 16);
+ }
+}
+
+void vp9_transform_mb_8x8(MACROBLOCK *x) {
+ vp9_transform_mby_8x8(x);
+ vp9_transform_mbuv_8x8(x);
+}
+
+void vp9_transform_mby_16x16(MACROBLOCK *x) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ BLOCK *b = &x->block[0];
+ TX_TYPE tx_type = get_tx_type_16x16(xd, &xd->block[0]);
+ vp9_clear_system_state();
+ if (tx_type != DCT_DCT) {
+ vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 16);
+ } else {
+ x->vp9_short_fdct16x16(&x->block[0].src_diff[0],
+ &x->block[0].coeff[0], 32);
+ }
+}
+
+void vp9_transform_mb_16x16(MACROBLOCK *x) {
+ vp9_transform_mby_16x16(x);
+ vp9_transform_mbuv_8x8(x);
+}
+
+#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
+#define RDTRUNC_8x8(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
+typedef struct vp9_token_state vp9_token_state;
+
+struct vp9_token_state {
+ int rate;
+ int error;
+ int next;
+ signed char token;
+ short qc;
+};
+
+// TODO: experiments to find optimal multiple numbers
+#define Y1_RD_MULT 4
+#define UV_RD_MULT 2
+#define Y2_RD_MULT 4
+
+static const int plane_rd_mult[4] = {
+ Y1_RD_MULT,
+ Y2_RD_MULT,
+ UV_RD_MULT,
+ Y1_RD_MULT
+};
+
+#define UPDATE_RD_COST()\
+{\
+ rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\
+ rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\
+ if (rd_cost0 == rd_cost1) {\
+ rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\
+ rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\
+ }\
+}
+
+static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
+ ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
+ int tx_size) {
+ BLOCK *b;
+ BLOCKD *d;
+ vp9_token_state tokens[65][2];
+ uint64_t best_mask[2];
+ const short *dequant_ptr;
+ const short *coeff_ptr;
+ short *qcoeff_ptr;
+ short *dqcoeff_ptr;
+ int eob;
+ int i0;
+ int rc;
+ int x;
+ int sz = 0;
+ int next;
+ int rdmult;
+ int rddiv;
+ int final_eob;
+ int64_t rd_cost0, rd_cost1;
+ int rate0, rate1;
+ int error0, error1;
+ int t0, t1;
+ int best;
+ int band;
+ int pt;
+ int err_mult = plane_rd_mult[type];
+ int default_eob;
+ int const *scan, *bands;
+
+ b = &mb->block[i];
+ d = &mb->e_mbd.block[i];
+ switch (tx_size) {
+ default:
+ case TX_4X4:
+ scan = vp9_default_zig_zag1d;
+ bands = vp9_coef_bands;
+ default_eob = 16;
+ // TODO: this isn't called (for intra4x4 modes), but will be left in
+ // since it could be used later
+ {
+ TX_TYPE tx_type = get_tx_type_4x4(&mb->e_mbd, d);
+ if (tx_type != DCT_DCT) {
+ switch (tx_type) {
+ case ADST_DCT:
+ scan = vp9_row_scan;
+ break;
+
+ case DCT_ADST:
+ scan = vp9_col_scan;
+ break;
+
+ default:
+ scan = vp9_default_zig_zag1d;
+ break;
+ }
+ } else {
+ scan = vp9_default_zig_zag1d;
+ }
+ }
+ break;
+ case TX_8X8:
+ scan = vp9_default_zig_zag1d_8x8;
+ bands = vp9_coef_bands_8x8;
+ default_eob = 64;
+ break;
+ }
+
+ dequant_ptr = d->dequant;
+ coeff_ptr = b->coeff;
+ qcoeff_ptr = d->qcoeff;
+ dqcoeff_ptr = d->dqcoeff;
+ i0 = (type == PLANE_TYPE_Y_NO_DC);
+ eob = d->eob;
+
+ /* Now set up a Viterbi trellis to evaluate alternative roundings. */
+ rdmult = mb->rdmult * err_mult;
+ if (mb->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME)
+ rdmult = (rdmult * 9) >> 4;
+ rddiv = mb->rddiv;
+ best_mask[0] = best_mask[1] = 0;
+ /* Initialize the sentinel node of the trellis. */
+ tokens[eob][0].rate = 0;
+ tokens[eob][0].error = 0;
+ tokens[eob][0].next = default_eob;
+ tokens[eob][0].token = DCT_EOB_TOKEN;
+ tokens[eob][0].qc = 0;
+ *(tokens[eob] + 1) = *(tokens[eob] + 0);
+ next = eob;
+ for (i = eob; i-- > i0;) {
+ int base_bits;
+ int d2;
+ int dx;
+
+ rc = scan[i];
+ x = qcoeff_ptr[rc];
+ /* Only add a trellis state for non-zero coefficients. */
+ if (x) {
+ int shortcut = 0;
+ error0 = tokens[next][0].error;
+ error1 = tokens[next][1].error;
+ /* Evaluate the first possibility for this state. */
+ rate0 = tokens[next][0].rate;
+ rate1 = tokens[next][1].rate;
+ t0 = (vp9_dct_value_tokens_ptr + x)->Token;
+ /* Consider both possible successor states. */
+ if (next < default_eob) {
+ band = bands[i + 1];
+ pt = vp9_prev_token_class[t0];
+ rate0 +=
+ mb->token_costs[tx_size][type][band][pt][tokens[next][0].token];
+ rate1 +=
+ mb->token_costs[tx_size][type][band][pt][tokens[next][1].token];
+ }
+ UPDATE_RD_COST();
+ /* And pick the best. */
+ best = rd_cost1 < rd_cost0;
+ base_bits = *(vp9_dct_value_cost_ptr + x);
+ dx = dqcoeff_ptr[rc] - coeff_ptr[rc];
+ d2 = dx * dx;
+ tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
+ tokens[i][0].error = d2 + (best ? error1 : error0);
+ tokens[i][0].next = next;
+ tokens[i][0].token = t0;
+ tokens[i][0].qc = x;
+ best_mask[0] |= best << i;
+ /* Evaluate the second possibility for this state. */
+ rate0 = tokens[next][0].rate;
+ rate1 = tokens[next][1].rate;
+
+ if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff_ptr[rc])) &&
+ (abs(x)*dequant_ptr[rc != 0] < abs(coeff_ptr[rc]) + dequant_ptr[rc != 0]))
+ shortcut = 1;
+ else
+ shortcut = 0;
+
+ if (shortcut) {
+ sz = -(x < 0);
+ x -= 2 * sz + 1;
+ }
+
+ /* Consider both possible successor states. */
+ if (!x) {
+ /* If we reduced this coefficient to zero, check to see if
+ * we need to move the EOB back here.
+ */
+ t0 = tokens[next][0].token == DCT_EOB_TOKEN ?
+ DCT_EOB_TOKEN : ZERO_TOKEN;
+ t1 = tokens[next][1].token == DCT_EOB_TOKEN ?
+ DCT_EOB_TOKEN : ZERO_TOKEN;
+ } else {
+ t0 = t1 = (vp9_dct_value_tokens_ptr + x)->Token;
+ }
+ if (next < default_eob) {
+ band = bands[i + 1];
+ if (t0 != DCT_EOB_TOKEN) {
+ pt = vp9_prev_token_class[t0];
+ rate0 += mb->token_costs[tx_size][type][band][pt][
+ tokens[next][0].token];
+ }
+ if (t1 != DCT_EOB_TOKEN) {
+ pt = vp9_prev_token_class[t1];
+ rate1 += mb->token_costs[tx_size][type][band][pt][
+ tokens[next][1].token];
+ }
+ }
+
+ UPDATE_RD_COST();
+ /* And pick the best. */
+ best = rd_cost1 < rd_cost0;
+ base_bits = *(vp9_dct_value_cost_ptr + x);
+
+ if (shortcut) {
+ dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
+ d2 = dx * dx;
+ }
+ tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
+ tokens[i][1].error = d2 + (best ? error1 : error0);
+ tokens[i][1].next = next;
+ tokens[i][1].token = best ? t1 : t0;
+ tokens[i][1].qc = x;
+ best_mask[1] |= best << i;
+ /* Finally, make this the new head of the trellis. */
+ next = i;
+ }
+ /* There's no choice to make for a zero coefficient, so we don't
+ * add a new trellis node, but we do need to update the costs.
+ */
+ else {
+ band = bands[i + 1];
+ t0 = tokens[next][0].token;
+ t1 = tokens[next][1].token;
+ /* Update the cost of each path if we're past the EOB token. */
+ if (t0 != DCT_EOB_TOKEN) {
+ tokens[next][0].rate += mb->token_costs[tx_size][type][band][0][t0];
+ tokens[next][0].token = ZERO_TOKEN;
+ }
+ if (t1 != DCT_EOB_TOKEN) {
+ tokens[next][1].rate += mb->token_costs[tx_size][type][band][0][t1];
+ tokens[next][1].token = ZERO_TOKEN;
+ }
+ /* Don't update next, because we didn't add a new node. */
+ }
+ }
+
+ /* Now pick the best path through the whole trellis. */
+ band = bands[i + 1];
+ VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l);
+ rate0 = tokens[next][0].rate;
+ rate1 = tokens[next][1].rate;
+ error0 = tokens[next][0].error;
+ error1 = tokens[next][1].error;
+ t0 = tokens[next][0].token;
+ t1 = tokens[next][1].token;
+ rate0 += mb->token_costs[tx_size][type][band][pt][t0];
+ rate1 += mb->token_costs[tx_size][type][band][pt][t1];
+ UPDATE_RD_COST();
+ best = rd_cost1 < rd_cost0;
+ final_eob = i0 - 1;
+ for (i = next; i < eob; i = next) {
+ x = tokens[i][best].qc;
+ if (x)
+ final_eob = i;
+ rc = scan[i];
+ qcoeff_ptr[rc] = x;
+ dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]);
+
+ next = tokens[i][best].next;
+ best = (best_mask[best] >> i) & 1;
+ }
+ final_eob++;
+
+ d->eob = final_eob;
+ *a = *l = (d->eob > !type);
+}
+
+/**************************************************************************
+our inverse hadamard transform effectively is weighted sum of all 16 inputs
+with weight either 1 or -1. It has a last stage scaling of (sum+1)>>2. And
+dc only idct is (dc+16)>>5. So if all the sums are between -65 and 63 the
+output after inverse wht and idct will be all zero. A sum of absolute value
+smaller than 65 guarantees all 16 different (+1/-1) weighted sums in wht
+fall between -65 and +65.
+**************************************************************************/
+#define SUM_2ND_COEFF_THRESH 65
+
+static void check_reset_2nd_coeffs(MACROBLOCKD *xd,
+ ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
+ int sum = 0;
+ int i;
+ BLOCKD *bd = &xd->block[24];
+ if (bd->dequant[0] >= SUM_2ND_COEFF_THRESH
+ && bd->dequant[1] >= SUM_2ND_COEFF_THRESH)
+ return;
+
+ for (i = 0; i < bd->eob; i++) {
+ int coef = bd->dqcoeff[vp9_default_zig_zag1d[i]];
+ sum += (coef >= 0) ? coef : -coef;
+ if (sum >= SUM_2ND_COEFF_THRESH)
+ return;
+ }
+
+ if (sum < SUM_2ND_COEFF_THRESH) {
+ for (i = 0; i < bd->eob; i++) {
+ int rc = vp9_default_zig_zag1d[i];
+ bd->qcoeff[rc] = 0;
+ bd->dqcoeff[rc] = 0;
+ }
+ bd->eob = 0;
+ *a = *l = (bd->eob != 0);
+ }
+}
+
+#define SUM_2ND_COEFF_THRESH_8X8 32
+static void check_reset_8x8_2nd_coeffs(MACROBLOCKD *xd,
+ ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
+ int sum = 0;
+ BLOCKD *bd = &xd->block[24];
+ int coef;
+
+ coef = bd->dqcoeff[0];
+ sum += (coef >= 0) ? coef : -coef;
+ coef = bd->dqcoeff[1];
+ sum += (coef >= 0) ? coef : -coef;
+ coef = bd->dqcoeff[4];
+ sum += (coef >= 0) ? coef : -coef;
+ coef = bd->dqcoeff[8];
+ sum += (coef >= 0) ? coef : -coef;
+
+ if (sum < SUM_2ND_COEFF_THRESH_8X8) {
+ bd->qcoeff[0] = 0;
+ bd->dqcoeff[0] = 0;
+ bd->qcoeff[1] = 0;
+ bd->dqcoeff[1] = 0;
+ bd->qcoeff[4] = 0;
+ bd->dqcoeff[4] = 0;
+ bd->qcoeff[8] = 0;
+ bd->dqcoeff[8] = 0;
+ bd->eob = 0;
+ *a = *l = (bd->eob != 0);
+ }
+}
+
+void vp9_optimize_mby_4x4(MACROBLOCK *x) {
+ int b;
+ PLANE_TYPE type;
+ int has_2nd_order;
+ ENTROPY_CONTEXT_PLANES t_above, t_left;
+ ENTROPY_CONTEXT *ta;
+ ENTROPY_CONTEXT *tl;
+
+ if (!x->e_mbd.above_context || !x->e_mbd.left_context)
+ return;
+
+ vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+ ta = (ENTROPY_CONTEXT *)&t_above;
+ tl = (ENTROPY_CONTEXT *)&t_left;
+
+ has_2nd_order = get_2nd_order_usage(&x->e_mbd);
+
+ type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC;
+
+ for (b = 0; b < 16; b++) {
+ optimize_b(x, b, type,
+ ta + vp9_block2above[b], tl + vp9_block2left[b], TX_4X4);
+ }
+
+ if (has_2nd_order) {
+ b = 24;
+ optimize_b(x, b, PLANE_TYPE_Y2,
+ ta + vp9_block2above[b], tl + vp9_block2left[b], TX_4X4);
+ check_reset_2nd_coeffs(&x->e_mbd,
+ ta + vp9_block2above[b], tl + vp9_block2left[b]);
+ }
+}
+
+void vp9_optimize_mbuv_4x4(MACROBLOCK *x) {
+ int b;
+ ENTROPY_CONTEXT_PLANES t_above, t_left;
+ ENTROPY_CONTEXT *ta;
+ ENTROPY_CONTEXT *tl;
+
+ if (!x->e_mbd.above_context || !x->e_mbd.left_context)
+ return;
+
+ vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+ ta = (ENTROPY_CONTEXT *)&t_above;
+ tl = (ENTROPY_CONTEXT *)&t_left;
+
+ for (b = 16; b < 24; b++) {
+ optimize_b(x, b, PLANE_TYPE_UV,
+ ta + vp9_block2above[b], tl + vp9_block2left[b], TX_4X4);
+ }
+}
+
+static void optimize_mb_4x4(MACROBLOCK *x) {
+ vp9_optimize_mby_4x4(x);
+ vp9_optimize_mbuv_4x4(x);
+}
+
+void vp9_optimize_mby_8x8(MACROBLOCK *x) {
+ int b;
+ PLANE_TYPE type;
+ ENTROPY_CONTEXT_PLANES t_above, t_left;
+ ENTROPY_CONTEXT *ta;
+ ENTROPY_CONTEXT *tl;
+ int has_2nd_order = get_2nd_order_usage(&x->e_mbd);
+
+ if (!x->e_mbd.above_context || !x->e_mbd.left_context)
+ return;
+
+ vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+ ta = (ENTROPY_CONTEXT *)&t_above;
+ tl = (ENTROPY_CONTEXT *)&t_left;
+ type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC;
+ for (b = 0; b < 16; b += 4) {
+ optimize_b(x, b, type,
+ ta + vp9_block2above_8x8[b], tl + vp9_block2left_8x8[b],
+ TX_8X8);
+ ta[vp9_block2above_8x8[b] + 1] = ta[vp9_block2above_8x8[b]];
+ tl[vp9_block2left_8x8[b] + 1] = tl[vp9_block2left_8x8[b]];
+ }
+
+ // 8x8 always have 2nd roder haar block
+ if (has_2nd_order) {
+ check_reset_8x8_2nd_coeffs(&x->e_mbd,
+ ta + vp9_block2above_8x8[24],
+ tl + vp9_block2left_8x8[24]);
+ }
+}
+
+void vp9_optimize_mbuv_8x8(MACROBLOCK *x) {
+ int b;
+ ENTROPY_CONTEXT_PLANES t_above, t_left;
+ ENTROPY_CONTEXT *ta;
+ ENTROPY_CONTEXT *tl;
+
+ if (!x->e_mbd.above_context || !x->e_mbd.left_context)
+ return;
+
+ vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+ ta = (ENTROPY_CONTEXT *)&t_above;
+ tl = (ENTROPY_CONTEXT *)&t_left;
+
+ for (b = 16; b < 24; b += 4) {
+ optimize_b(x, b, PLANE_TYPE_UV,
+ ta + vp9_block2above_8x8[b], tl + vp9_block2left_8x8[b],
+ TX_8X8);
+ ta[vp9_block2above_8x8[b] + 1] = ta[vp9_block2above_8x8[b]];
+ tl[vp9_block2left_8x8[b] + 1] = tl[vp9_block2left_8x8[b]];
+ }
+}
+
+static void optimize_mb_8x8(MACROBLOCK *x) {
+ vp9_optimize_mby_8x8(x);
+ vp9_optimize_mbuv_8x8(x);
+}
+
+static void optimize_b_16x16(MACROBLOCK *mb, int i, PLANE_TYPE type,
+ ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
+ BLOCK *b = &mb->block[i];
+ BLOCKD *d = &mb->e_mbd.block[i];
+ vp9_token_state tokens[257][2];
+ unsigned best_index[257][2];
+ const short *dequant_ptr = d->dequant, *coeff_ptr = b->coeff;
+ short *qcoeff_ptr = qcoeff_ptr = d->qcoeff;
+ short *dqcoeff_ptr = dqcoeff_ptr = d->dqcoeff;
+ int eob = d->eob, final_eob, sz = 0;
+ int rc, x, next;
+ int64_t rdmult, rddiv, rd_cost0, rd_cost1;
+ int rate0, rate1, error0, error1, t0, t1;
+ int best, band, pt;
+ int err_mult = plane_rd_mult[type];
+
+ /* Now set up a Viterbi trellis to evaluate alternative roundings. */
+ rdmult = mb->rdmult * err_mult;
+ if (mb->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME)
+ rdmult = (rdmult * 9)>>4;
+ rddiv = mb->rddiv;
+ memset(best_index, 0, sizeof(best_index));
+ /* Initialize the sentinel node of the trellis. */
+ tokens[eob][0].rate = 0;
+ tokens[eob][0].error = 0;
+ tokens[eob][0].next = 256;
+ tokens[eob][0].token = DCT_EOB_TOKEN;
+ tokens[eob][0].qc = 0;
+ *(tokens[eob] + 1) = *(tokens[eob] + 0);
+ next = eob;
+ for (i = eob; i-- > 0;) {
+ int base_bits, d2, dx;
+
+ rc = vp9_default_zig_zag1d_16x16[i];
+ x = qcoeff_ptr[rc];
+ /* Only add a trellis state for non-zero coefficients. */
+ if (x) {
+ int shortcut = 0;
+ error0 = tokens[next][0].error;
+ error1 = tokens[next][1].error;
+ /* Evaluate the first possibility for this state. */
+ rate0 = tokens[next][0].rate;
+ rate1 = tokens[next][1].rate;
+ t0 = (vp9_dct_value_tokens_ptr + x)->Token;
+ /* Consider both possible successor states. */
+ if (next < 256) {
+ band = vp9_coef_bands_16x16[i + 1];
+ pt = vp9_prev_token_class[t0];
+ rate0 += mb->token_costs[TX_16X16][type][band][pt][tokens[next][0].token];
+ rate1 += mb->token_costs[TX_16X16][type][band][pt][tokens[next][1].token];
+ }
+ UPDATE_RD_COST();
+ /* And pick the best. */
+ best = rd_cost1 < rd_cost0;
+ base_bits = *(vp9_dct_value_cost_ptr + x);
+ dx = dqcoeff_ptr[rc] - coeff_ptr[rc];
+ d2 = dx*dx;
+ tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
+ tokens[i][0].error = d2 + (best ? error1 : error0);
+ tokens[i][0].next = next;
+ tokens[i][0].token = t0;
+ tokens[i][0].qc = x;
+ best_index[i][0] = best;
+ /* Evaluate the second possibility for this state. */
+ rate0 = tokens[next][0].rate;
+ rate1 = tokens[next][1].rate;
+
+ if((abs(x)*dequant_ptr[rc!=0]>abs(coeff_ptr[rc])) &&
+ (abs(x)*dequant_ptr[rc!=0]<abs(coeff_ptr[rc])+dequant_ptr[rc!=0]))
+ shortcut = 1;
+ else
+ shortcut = 0;
+
+ if (shortcut) {
+ sz = -(x < 0);
+ x -= 2*sz + 1;
+ }
+
+ /* Consider both possible successor states. */
+ if (!x) {
+ /* If we reduced this coefficient to zero, check to see if
+ * we need to move the EOB back here.
+ */
+ t0 = tokens[next][0].token == DCT_EOB_TOKEN ?
+ DCT_EOB_TOKEN : ZERO_TOKEN;
+ t1 = tokens[next][1].token == DCT_EOB_TOKEN ?
+ DCT_EOB_TOKEN : ZERO_TOKEN;
+ }
+ else
+ t0=t1 = (vp9_dct_value_tokens_ptr + x)->Token;
+ if (next < 256) {
+ band = vp9_coef_bands_16x16[i + 1];
+ if (t0 != DCT_EOB_TOKEN) {
+ pt = vp9_prev_token_class[t0];
+ rate0 += mb->token_costs[TX_16X16][type][band][pt]
+ [tokens[next][0].token];
+ }
+ if (t1!=DCT_EOB_TOKEN) {
+ pt = vp9_prev_token_class[t1];
+ rate1 += mb->token_costs[TX_16X16][type][band][pt]
+ [tokens[next][1].token];
+ }
+ }
+ UPDATE_RD_COST();
+ /* And pick the best. */
+ best = rd_cost1 < rd_cost0;
+ base_bits = *(vp9_dct_value_cost_ptr + x);
+
+ if(shortcut) {
+ dx -= (dequant_ptr[rc!=0] + sz) ^ sz;
+ d2 = dx*dx;
+ }
+ tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
+ tokens[i][1].error = d2 + (best ? error1 : error0);
+ tokens[i][1].next = next;
+ tokens[i][1].token = best ? t1 : t0;
+ tokens[i][1].qc = x;
+ best_index[i][1] = best;
+ /* Finally, make this the new head of the trellis. */
+ next = i;
+ }
+ /* There's no choice to make for a zero coefficient, so we don't
+ * add a new trellis node, but we do need to update the costs.
+ */
+ else {
+ band = vp9_coef_bands_16x16[i + 1];
+ t0 = tokens[next][0].token;
+ t1 = tokens[next][1].token;
+ /* Update the cost of each path if we're past the EOB token. */
+ if (t0 != DCT_EOB_TOKEN) {
+ tokens[next][0].rate += mb->token_costs[TX_16X16][type][band][0][t0];
+ tokens[next][0].token = ZERO_TOKEN;
+ }
+ if (t1 != DCT_EOB_TOKEN) {
+ tokens[next][1].rate += mb->token_costs[TX_16X16][type][band][0][t1];
+ tokens[next][1].token = ZERO_TOKEN;
+ }
+ /* Don't update next, because we didn't add a new node. */
+ }
+ }
+
+ /* Now pick the best path through the whole trellis. */
+ band = vp9_coef_bands_16x16[i + 1];
+ VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l);
+ rate0 = tokens[next][0].rate;
+ rate1 = tokens[next][1].rate;
+ error0 = tokens[next][0].error;
+ error1 = tokens[next][1].error;
+ t0 = tokens[next][0].token;
+ t1 = tokens[next][1].token;
+ rate0 += mb->token_costs[TX_16X16][type][band][pt][t0];
+ rate1 += mb->token_costs[TX_16X16][type][band][pt][t1];
+ UPDATE_RD_COST();
+ best = rd_cost1 < rd_cost0;
+ final_eob = -1;
+
+ for (i = next; i < eob; i = next) {
+ x = tokens[i][best].qc;
+ if (x)
+ final_eob = i;
+ rc = vp9_default_zig_zag1d_16x16[i];
+ qcoeff_ptr[rc] = x;
+ dqcoeff_ptr[rc] = (x * dequant_ptr[rc!=0]);
+
+ next = tokens[i][best].next;
+ best = best_index[i][best];
+ }
+ final_eob++;
+
+ d->eob = final_eob;
+ *a = *l = (d->eob > !type);
+}
+
+void vp9_optimize_mby_16x16(MACROBLOCK *x) {
+ ENTROPY_CONTEXT_PLANES t_above, t_left;
+ ENTROPY_CONTEXT *ta, *tl;
+
+ if (!x->e_mbd.above_context || !x->e_mbd.left_context)
+ return;
+
+ vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+ ta = (ENTROPY_CONTEXT *)&t_above;
+ tl = (ENTROPY_CONTEXT *)&t_left;
+ optimize_b_16x16(x, 0, PLANE_TYPE_Y_WITH_DC, ta, tl);
+}
+
+static void optimize_mb_16x16(MACROBLOCK *x) {
+ vp9_optimize_mby_16x16(x);
+ vp9_optimize_mbuv_8x8(x);
+}
+
+void vp9_fidct_mb(MACROBLOCK *x) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
+
+ if (tx_size == TX_16X16) {
+ vp9_transform_mb_16x16(x);
+ vp9_quantize_mb_16x16(x);
+ if (x->optimize)
+ optimize_mb_16x16(x);
+ vp9_inverse_transform_mb_16x16(xd);
+ } else if (tx_size == TX_8X8) {
+ if (xd->mode_info_context->mbmi.mode == SPLITMV) {
+ assert(xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4);
+ vp9_transform_mby_8x8(x);
+ vp9_transform_mbuv_4x4(x);
+ vp9_quantize_mby_8x8(x);
+ vp9_quantize_mbuv_4x4(x);
+ if (x->optimize) {
+ vp9_optimize_mby_8x8(x);
+ vp9_optimize_mbuv_4x4(x);
+ }
+ vp9_inverse_transform_mby_8x8(xd);
+ vp9_inverse_transform_mbuv_4x4(xd);
+ } else {
+ vp9_transform_mb_8x8(x);
+ vp9_quantize_mb_8x8(x);
+ if (x->optimize)
+ optimize_mb_8x8(x);
+ vp9_inverse_transform_mb_8x8(xd);
+ }
+ } else {
+ transform_mb_4x4(x);
+ vp9_quantize_mb_4x4(x);
+ if (x->optimize)
+ optimize_mb_4x4(x);
+ vp9_inverse_transform_mb_4x4(xd);
+ }
+}
+
+void vp9_encode_inter16x16(MACROBLOCK *x) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ vp9_build_inter_predictors_mb(xd);
+ subtract_mb(x);
+ vp9_fidct_mb(x);
+ vp9_recon_mb(xd);
+}
+
+/* this function is used by first pass only */
+void vp9_encode_inter16x16y(MACROBLOCK *x) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ BLOCK *b = &x->block[0];
+
+#if CONFIG_PRED_FILTER
+ // Disable the prediction filter for firstpass
+ xd->mode_info_context->mbmi.pred_filter_enabled = 0;
+#endif
+
+ vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0);
+
+ vp9_subtract_mby(x->src_diff, *(b->base_src), xd->predictor, b->src_stride);
+
+ vp9_transform_mby_4x4(x);
+ vp9_quantize_mby_4x4(x);
+ vp9_inverse_transform_mby_4x4(xd);
+
+ vp9_recon_mby(xd);
+}
diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h
new file mode 100644
index 0000000..73cfd8d
--- /dev/null
+++ b/vp9/encoder/vp9_encodemb.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_ENCODER_VP9_ENCODEMB_H_
+#define VP9_ENCODER_VP9_ENCODEMB_H_
+
+#include "./vpx_config.h"
+#include "vp9/encoder/vp9_block.h"
+
+typedef struct {
+ MB_PREDICTION_MODE mode;
+ MV_REFERENCE_FRAME ref_frame;
+ MV_REFERENCE_FRAME second_ref_frame;
+#if CONFIG_PRED_FILTER
+ int pred_filter_flag;
+#endif
+} MODE_DEFINITION;
+
+
+#include "vp9/encoder/vp9_onyx_int.h"
+struct VP9_ENCODER_RTCD;
+void vp9_encode_inter16x16(MACROBLOCK *x);
+
+void vp9_transform_mbuv_4x4(MACROBLOCK *x);
+void vp9_transform_mby_4x4(MACROBLOCK *x);
+
+void vp9_optimize_mby_4x4(MACROBLOCK *x);
+void vp9_optimize_mbuv_4x4(MACROBLOCK *x);
+void vp9_encode_inter16x16y(MACROBLOCK *x);
+
+void vp9_transform_mb_8x8(MACROBLOCK *mb);
+void vp9_transform_mby_8x8(MACROBLOCK *x);
+void vp9_transform_mbuv_8x8(MACROBLOCK *x);
+void vp9_build_dcblock_8x8(MACROBLOCK *b);
+void vp9_optimize_mby_8x8(MACROBLOCK *x);
+void vp9_optimize_mbuv_8x8(MACROBLOCK *x);
+
+void vp9_transform_mb_16x16(MACROBLOCK *mb);
+void vp9_transform_mby_16x16(MACROBLOCK *x);
+void vp9_optimize_mby_16x16(MACROBLOCK *x);
+
+void vp9_fidct_mb(MACROBLOCK *x);
+
+void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch);
+
+#if CONFIG_SUPERBLOCKS
+void vp9_subtract_mbuv_s_c(short *diff, const unsigned char *usrc,
+ const unsigned char *vsrc, int src_stride,
+ const unsigned char *upred,
+ const unsigned char *vpred, int dst_stride);
+void vp9_subtract_mby_s_c(short *diff, const unsigned char *src,
+ int src_stride, const unsigned char *pred,
+ int dst_stride);
+#endif
+
+#endif
diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c
new file mode 100644
index 0000000..9431f07
--- /dev/null
+++ b/vp9/encoder/vp9_encodemv.c
@@ -0,0 +1,661 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/common/vp9_common.h"
+#include "vp9/encoder/vp9_encodemv.h"
+#include "vp9/common/vp9_entropymode.h"
+#include "vp9/common/vp9_systemdependent.h"
+
+#include <math.h>
+
+#ifdef ENTROPY_STATS
+extern unsigned int active_section;
+#endif
+
+#ifdef NMV_STATS
+nmv_context_counts tnmvcounts;
+#endif
+
+static void encode_nmv_component(vp9_writer* const bc,
+ int v,
+ int r,
+ const nmv_component* const mvcomp) {
+ int s, z, c, o, d;
+ assert (v != 0); /* should not be zero */
+ s = v < 0;
+ vp9_write(bc, s, mvcomp->sign);
+ z = (s ? -v : v) - 1; /* magnitude - 1 */
+
+ c = vp9_get_mv_class(z, &o);
+
+ write_token(bc, vp9_mv_class_tree, mvcomp->classes,
+ vp9_mv_class_encodings + c);
+
+ d = (o >> 3); /* int mv data */
+
+ if (c == MV_CLASS_0) {
+ write_token(bc, vp9_mv_class0_tree, mvcomp->class0,
+ vp9_mv_class0_encodings + d);
+ } else {
+ int i, b;
+ b = c + CLASS0_BITS - 1; /* number of bits */
+ for (i = 0; i < b; ++i)
+ vp9_write(bc, ((d >> i) & 1), mvcomp->bits[i]);
+ }
+}
+
+static void encode_nmv_component_fp(vp9_writer *bc,
+ int v,
+ int r,
+ const nmv_component* const mvcomp,
+ int usehp) {
+ int s, z, c, o, d, f, e;
+ assert (v != 0); /* should not be zero */
+ s = v < 0;
+ z = (s ? -v : v) - 1; /* magnitude - 1 */
+
+ c = vp9_get_mv_class(z, &o);
+
+ d = (o >> 3); /* int mv data */
+ f = (o >> 1) & 3; /* fractional pel mv data */
+ e = (o & 1); /* high precision mv data */
+
+ /* Code the fractional pel bits */
+ if (c == MV_CLASS_0) {
+ write_token(bc, vp9_mv_fp_tree, mvcomp->class0_fp[d],
+ vp9_mv_fp_encodings + f);
+ } else {
+ write_token(bc, vp9_mv_fp_tree, mvcomp->fp,
+ vp9_mv_fp_encodings + f);
+ }
+ /* Code the high precision bit */
+ if (usehp) {
+ if (c == MV_CLASS_0) {
+ vp9_write(bc, e, mvcomp->class0_hp);
+ } else {
+ vp9_write(bc, e, mvcomp->hp);
+ }
+ }
+}
+
+static void build_nmv_component_cost_table(int *mvcost,
+ const nmv_component* const mvcomp,
+ int usehp) {
+ int i, v;
+ int sign_cost[2], class_cost[MV_CLASSES], class0_cost[CLASS0_SIZE];
+ int bits_cost[MV_OFFSET_BITS][2];
+ int class0_fp_cost[CLASS0_SIZE][4], fp_cost[4];
+ int class0_hp_cost[2], hp_cost[2];
+
+ sign_cost[0] = vp9_cost_zero(mvcomp->sign);
+ sign_cost[1] = vp9_cost_one(mvcomp->sign);
+ vp9_cost_tokens(class_cost, mvcomp->classes, vp9_mv_class_tree);
+ vp9_cost_tokens(class0_cost, mvcomp->class0, vp9_mv_class0_tree);
+ for (i = 0; i < MV_OFFSET_BITS; ++i) {
+ bits_cost[i][0] = vp9_cost_zero(mvcomp->bits[i]);
+ bits_cost[i][1] = vp9_cost_one(mvcomp->bits[i]);
+ }
+
+ for (i = 0; i < CLASS0_SIZE; ++i)
+ vp9_cost_tokens(class0_fp_cost[i], mvcomp->class0_fp[i], vp9_mv_fp_tree);
+ vp9_cost_tokens(fp_cost, mvcomp->fp, vp9_mv_fp_tree);
+
+ if (usehp) {
+ class0_hp_cost[0] = vp9_cost_zero(mvcomp->class0_hp);
+ class0_hp_cost[1] = vp9_cost_one(mvcomp->class0_hp);
+ hp_cost[0] = vp9_cost_zero(mvcomp->hp);
+ hp_cost[1] = vp9_cost_one(mvcomp->hp);
+ }
+ mvcost[0] = 0;
+ for (v = 1; v <= MV_MAX; ++v) {
+ int z, c, o, d, e, f, cost = 0;
+ z = v - 1;
+ c = vp9_get_mv_class(z, &o);
+ cost += class_cost[c];
+ d = (o >> 3); /* int mv data */
+ f = (o >> 1) & 3; /* fractional pel mv data */
+ e = (o & 1); /* high precision mv data */
+ if (c == MV_CLASS_0) {
+ cost += class0_cost[d];
+ } else {
+ int i, b;
+ b = c + CLASS0_BITS - 1; /* number of bits */
+ for (i = 0; i < b; ++i)
+ cost += bits_cost[i][((d >> i) & 1)];
+ }
+ if (c == MV_CLASS_0) {
+ cost += class0_fp_cost[d][f];
+ } else {
+ cost += fp_cost[f];
+ }
+ if (usehp) {
+ if (c == MV_CLASS_0) {
+ cost += class0_hp_cost[e];
+ } else {
+ cost += hp_cost[e];
+ }
+ }
+ mvcost[v] = cost + sign_cost[0];
+ mvcost[-v] = cost + sign_cost[1];
+ }
+}
+
+static int update_nmv_savings(const unsigned int ct[2],
+ const vp9_prob cur_p,
+ const vp9_prob new_p,
+ const vp9_prob upd_p) {
+
+#ifdef LOW_PRECISION_MV_UPDATE
+ vp9_prob mod_p = new_p | 1;
+#else
+ vp9_prob mod_p = new_p;
+#endif
+ const int cur_b = cost_branch256(ct, cur_p);
+ const int mod_b = cost_branch256(ct, mod_p);
+ const int cost = 7 * 256 +
+#ifndef LOW_PRECISION_MV_UPDATE
+ 256 +
+#endif
+ (vp9_cost_one(upd_p) - vp9_cost_zero(upd_p));
+ if (cur_b - mod_b - cost > 0) {
+ return cur_b - mod_b - cost;
+ } else {
+ return 0 - vp9_cost_zero(upd_p);
+ }
+}
+
+static int update_nmv(
+ vp9_writer *const bc,
+ const unsigned int ct[2],
+ vp9_prob *const cur_p,
+ const vp9_prob new_p,
+ const vp9_prob upd_p) {
+
+#ifdef LOW_PRECISION_MV_UPDATE
+ vp9_prob mod_p = new_p | 1;
+#else
+ vp9_prob mod_p = new_p;
+#endif
+
+ const int cur_b = cost_branch256(ct, *cur_p);
+ const int mod_b = cost_branch256(ct, mod_p);
+ const int cost = 7 * 256 +
+#ifndef LOW_PRECISION_MV_UPDATE
+ 256 +
+#endif
+ (vp9_cost_one(upd_p) - vp9_cost_zero(upd_p));
+
+ if (cur_b - mod_b > cost) {
+ *cur_p = mod_p;
+ vp9_write(bc, 1, upd_p);
+#ifdef LOW_PRECISION_MV_UPDATE
+ vp9_write_literal(bc, mod_p >> 1, 7);
+#else
+ vp9_write_literal(bc, mod_p, 8);
+#endif
+ return 1;
+ } else {
+ vp9_write(bc, 0, upd_p);
+ return 0;
+ }
+}
+
+void print_nmvcounts(nmv_context_counts tnmvcounts) {
+ int i, j, k;
+ printf("\nCounts =\n { ");
+ for (j = 0; j < MV_JOINTS; ++j)
+ printf("%d, ", tnmvcounts.joints[j]);
+ printf("},\n");
+ for (i = 0; i < 2; ++i) {
+ printf(" {\n");
+ printf(" %d/%d,\n", tnmvcounts.comps[i].sign[0],
+ tnmvcounts.comps[i].sign[1]);
+ printf(" { ");
+ for (j = 0; j < MV_CLASSES; ++j)
+ printf("%d, ", tnmvcounts.comps[i].classes[j]);
+ printf("},\n");
+ printf(" { ");
+ for (j = 0; j < CLASS0_SIZE; ++j)
+ printf("%d, ", tnmvcounts.comps[i].class0[j]);
+ printf("},\n");
+ printf(" { ");
+ for (j = 0; j < MV_OFFSET_BITS; ++j)
+ printf("%d/%d, ", tnmvcounts.comps[i].bits[j][0],
+ tnmvcounts.comps[i].bits[j][1]);
+ printf("},\n");
+
+ printf(" {");
+ for (j = 0; j < CLASS0_SIZE; ++j) {
+ printf("{");
+ for (k = 0; k < 4; ++k)
+ printf("%d, ", tnmvcounts.comps[i].class0_fp[j][k]);
+ printf("}, ");
+ }
+ printf("},\n");
+
+ printf(" { ");
+ for (j = 0; j < 4; ++j)
+ printf("%d, ", tnmvcounts.comps[i].fp[j]);
+ printf("},\n");
+
+ printf(" %d/%d,\n",
+ tnmvcounts.comps[i].class0_hp[0],
+ tnmvcounts.comps[i].class0_hp[1]);
+ printf(" %d/%d,\n",
+ tnmvcounts.comps[i].hp[0],
+ tnmvcounts.comps[i].hp[1]);
+ printf(" },\n");
+ }
+}
+
+#ifdef NMV_STATS
+void init_nmvstats() {
+ vp9_zero(tnmvcounts);
+}
+
+void print_nmvstats() {
+ nmv_context prob;
+ unsigned int branch_ct_joint[MV_JOINTS - 1][2];
+ unsigned int branch_ct_sign[2][2];
+ unsigned int branch_ct_classes[2][MV_CLASSES - 1][2];
+ unsigned int branch_ct_class0[2][CLASS0_SIZE - 1][2];
+ unsigned int branch_ct_bits[2][MV_OFFSET_BITS][2];
+ unsigned int branch_ct_class0_fp[2][CLASS0_SIZE][4 - 1][2];
+ unsigned int branch_ct_fp[2][4 - 1][2];
+ unsigned int branch_ct_class0_hp[2][2];
+ unsigned int branch_ct_hp[2][2];
+ int i, j, k;
+ vp9_counts_to_nmv_context(&tnmvcounts, &prob, 1,
+ branch_ct_joint, branch_ct_sign, branch_ct_classes,
+ branch_ct_class0, branch_ct_bits,
+ branch_ct_class0_fp, branch_ct_fp,
+ branch_ct_class0_hp, branch_ct_hp);
+
+ printf("\nCounts =\n { ");
+ for (j = 0; j < MV_JOINTS; ++j)
+ printf("%d, ", tnmvcounts.joints[j]);
+ printf("},\n");
+ for (i = 0; i < 2; ++i) {
+ printf(" {\n");
+ printf(" %d/%d,\n", tnmvcounts.comps[i].sign[0],
+ tnmvcounts.comps[i].sign[1]);
+ printf(" { ");
+ for (j = 0; j < MV_CLASSES; ++j)
+ printf("%d, ", tnmvcounts.comps[i].classes[j]);
+ printf("},\n");
+ printf(" { ");
+ for (j = 0; j < CLASS0_SIZE; ++j)
+ printf("%d, ", tnmvcounts.comps[i].class0[j]);
+ printf("},\n");
+ printf(" { ");
+ for (j = 0; j < MV_OFFSET_BITS; ++j)
+ printf("%d/%d, ", tnmvcounts.comps[i].bits[j][0],
+ tnmvcounts.comps[i].bits[j][1]);
+ printf("},\n");
+
+ printf(" {");
+ for (j = 0; j < CLASS0_SIZE; ++j) {
+ printf("{");
+ for (k = 0; k < 4; ++k)
+ printf("%d, ", tnmvcounts.comps[i].class0_fp[j][k]);
+ printf("}, ");
+ }
+ printf("},\n");
+
+ printf(" { ");
+ for (j = 0; j < 4; ++j)
+ printf("%d, ", tnmvcounts.comps[i].fp[j]);
+ printf("},\n");
+
+ printf(" %d/%d,\n",
+ tnmvcounts.comps[i].class0_hp[0],
+ tnmvcounts.comps[i].class0_hp[1]);
+ printf(" %d/%d,\n",
+ tnmvcounts.comps[i].hp[0],
+ tnmvcounts.comps[i].hp[1]);
+ printf(" },\n");
+ }
+
+ printf("\nProbs =\n { ");
+ for (j = 0; j < MV_JOINTS - 1; ++j)
+ printf("%d, ", prob.joints[j]);
+ printf("},\n");
+ for (i=0; i< 2; ++i) {
+ printf(" {\n");
+ printf(" %d,\n", prob.comps[i].sign);
+ printf(" { ");
+ for (j = 0; j < MV_CLASSES - 1; ++j)
+ printf("%d, ", prob.comps[i].classes[j]);
+ printf("},\n");
+ printf(" { ");
+ for (j = 0; j < CLASS0_SIZE - 1; ++j)
+ printf("%d, ", prob.comps[i].class0[j]);
+ printf("},\n");
+ printf(" { ");
+ for (j = 0; j < MV_OFFSET_BITS; ++j)
+ printf("%d, ", prob.comps[i].bits[j]);
+ printf("},\n");
+ printf(" { ");
+ for (j = 0; j < CLASS0_SIZE; ++j) {
+ printf("{");
+ for (k = 0; k < 3; ++k)
+ printf("%d, ", prob.comps[i].class0_fp[j][k]);
+ printf("}, ");
+ }
+ printf("},\n");
+ printf(" { ");
+ for (j = 0; j < 3; ++j)
+ printf("%d, ", prob.comps[i].fp[j]);
+ printf("},\n");
+
+ printf(" %d,\n", prob.comps[i].class0_hp);
+ printf(" %d,\n", prob.comps[i].hp);
+ printf(" },\n");
+ }
+}
+
+static void add_nmvcount(nmv_context_counts* const dst,
+ const nmv_context_counts* const src) {
+ int i, j, k;
+ for (j = 0; j < MV_JOINTS; ++j) {
+ dst->joints[j] += src->joints[j];
+ }
+ for (i = 0; i < 2; ++i) {
+ for (j = 0; j < MV_VALS; ++j) {
+ dst->comps[i].mvcount[j] += src->comps[i].mvcount[j];
+ }
+ dst->comps[i].sign[0] += src->comps[i].sign[0];
+ dst->comps[i].sign[1] += src->comps[i].sign[1];
+ for (j = 0; j < MV_CLASSES; ++j) {
+ dst->comps[i].classes[j] += src->comps[i].classes[j];
+ }
+ for (j = 0; j < CLASS0_SIZE; ++j) {
+ dst->comps[i].class0[j] += src->comps[i].class0[j];
+ }
+ for (j = 0; j < MV_OFFSET_BITS; ++j) {
+ dst->comps[i].bits[j][0] += src->comps[i].bits[j][0];
+ dst->comps[i].bits[j][1] += src->comps[i].bits[j][1];
+ }
+ }
+ for (i = 0; i < 2; ++i) {
+ for (j = 0; j < CLASS0_SIZE; ++j) {
+ for (k = 0; k < 4; ++k) {
+ dst->comps[i].class0_fp[j][k] += src->comps[i].class0_fp[j][k];
+ }
+ }
+ for (j = 0; j < 4; ++j) {
+ dst->comps[i].fp[j] += src->comps[i].fp[j];
+ }
+ dst->comps[i].class0_hp[0] += src->comps[i].class0_hp[0];
+ dst->comps[i].class0_hp[1] += src->comps[i].class0_hp[1];
+ dst->comps[i].hp[0] += src->comps[i].hp[0];
+ dst->comps[i].hp[1] += src->comps[i].hp[1];
+ }
+}
+#endif
+
+void vp9_write_nmv_probs(VP9_COMP* const cpi, int usehp, vp9_writer* const bc) {
+ int i, j;
+ nmv_context prob;
+ unsigned int branch_ct_joint[MV_JOINTS - 1][2];
+ unsigned int branch_ct_sign[2][2];
+ unsigned int branch_ct_classes[2][MV_CLASSES - 1][2];
+ unsigned int branch_ct_class0[2][CLASS0_SIZE - 1][2];
+ unsigned int branch_ct_bits[2][MV_OFFSET_BITS][2];
+ unsigned int branch_ct_class0_fp[2][CLASS0_SIZE][4 - 1][2];
+ unsigned int branch_ct_fp[2][4 - 1][2];
+ unsigned int branch_ct_class0_hp[2][2];
+ unsigned int branch_ct_hp[2][2];
+#ifdef MV_GROUP_UPDATE
+ int savings = 0;
+#endif
+
+#ifdef NMV_STATS
+ if (!cpi->dummy_packing)
+ add_nmvcount(&tnmvcounts, &cpi->NMVcount);
+#endif
+ vp9_counts_to_nmv_context(&cpi->NMVcount, &prob, usehp,
+ branch_ct_joint, branch_ct_sign, branch_ct_classes,
+ branch_ct_class0, branch_ct_bits,
+ branch_ct_class0_fp, branch_ct_fp,
+ branch_ct_class0_hp, branch_ct_hp);
+ /* write updates if they help */
+#ifdef MV_GROUP_UPDATE
+ for (j = 0; j < MV_JOINTS - 1; ++j) {
+ savings += update_nmv_savings(branch_ct_joint[j],
+ cpi->common.fc.nmvc.joints[j],
+ prob.joints[j],
+ VP9_NMV_UPDATE_PROB);
+ }
+ for (i = 0; i < 2; ++i) {
+ savings += update_nmv_savings(branch_ct_sign[i],
+ cpi->common.fc.nmvc.comps[i].sign,
+ prob.comps[i].sign,
+ VP9_NMV_UPDATE_PROB);
+ for (j = 0; j < MV_CLASSES - 1; ++j) {
+ savings += update_nmv_savings(branch_ct_classes[i][j],
+ cpi->common.fc.nmvc.comps[i].classes[j],
+ prob.comps[i].classes[j],
+ VP9_NMV_UPDATE_PROB);
+ }
+ for (j = 0; j < CLASS0_SIZE - 1; ++j) {
+ savings += update_nmv_savings(branch_ct_class0[i][j],
+ cpi->common.fc.nmvc.comps[i].class0[j],
+ prob.comps[i].class0[j],
+ VP9_NMV_UPDATE_PROB);
+ }
+ for (j = 0; j < MV_OFFSET_BITS; ++j) {
+ savings += update_nmv_savings(branch_ct_bits[i][j],
+ cpi->common.fc.nmvc.comps[i].bits[j],
+ prob.comps[i].bits[j],
+ VP9_NMV_UPDATE_PROB);
+ }
+ }
+ for (i = 0; i < 2; ++i) {
+ for (j = 0; j < CLASS0_SIZE; ++j) {
+ int k;
+ for (k = 0; k < 3; ++k) {
+ savings += update_nmv_savings(branch_ct_class0_fp[i][j][k],
+ cpi->common.fc.nmvc.comps[i].class0_fp[j][k],
+ prob.comps[i].class0_fp[j][k],
+ VP9_NMV_UPDATE_PROB);
+ }
+ }
+ for (j = 0; j < 3; ++j) {
+ savings += update_nmv_savings(branch_ct_fp[i][j],
+ cpi->common.fc.nmvc.comps[i].fp[j],
+ prob.comps[i].fp[j],
+ VP9_NMV_UPDATE_PROB);
+ }
+ }
+ if (usehp) {
+ for (i = 0; i < 2; ++i) {
+ savings += update_nmv_savings(branch_ct_class0_hp[i],
+ cpi->common.fc.nmvc.comps[i].class0_hp,
+ prob.comps[i].class0_hp,
+ VP9_NMV_UPDATE_PROB);
+ savings += update_nmv_savings(branch_ct_hp[i],
+ cpi->common.fc.nmvc.comps[i].hp,
+ prob.comps[i].hp,
+ VP9_NMV_UPDATE_PROB);
+ }
+ }
+ if (savings <= 0) {
+ vp9_write_bit(bc, 0);
+ return;
+ }
+ vp9_write_bit(bc, 1);
+#endif
+
+ for (j = 0; j < MV_JOINTS - 1; ++j) {
+ update_nmv(bc, branch_ct_joint[j],
+ &cpi->common.fc.nmvc.joints[j],
+ prob.joints[j],
+ VP9_NMV_UPDATE_PROB);
+ }
+ for (i = 0; i < 2; ++i) {
+ update_nmv(bc, branch_ct_sign[i],
+ &cpi->common.fc.nmvc.comps[i].sign,
+ prob.comps[i].sign,
+ VP9_NMV_UPDATE_PROB);
+ for (j = 0; j < MV_CLASSES - 1; ++j) {
+ update_nmv(bc, branch_ct_classes[i][j],
+ &cpi->common.fc.nmvc.comps[i].classes[j],
+ prob.comps[i].classes[j],
+ VP9_NMV_UPDATE_PROB);
+ }
+ for (j = 0; j < CLASS0_SIZE - 1; ++j) {
+ update_nmv(bc, branch_ct_class0[i][j],
+ &cpi->common.fc.nmvc.comps[i].class0[j],
+ prob.comps[i].class0[j],
+ VP9_NMV_UPDATE_PROB);
+ }
+ for (j = 0; j < MV_OFFSET_BITS; ++j) {
+ update_nmv(bc, branch_ct_bits[i][j],
+ &cpi->common.fc.nmvc.comps[i].bits[j],
+ prob.comps[i].bits[j],
+ VP9_NMV_UPDATE_PROB);
+ }
+ }
+ for (i = 0; i < 2; ++i) {
+ for (j = 0; j < CLASS0_SIZE; ++j) {
+ int k;
+ for (k = 0; k < 3; ++k) {
+ update_nmv(bc, branch_ct_class0_fp[i][j][k],
+ &cpi->common.fc.nmvc.comps[i].class0_fp[j][k],
+ prob.comps[i].class0_fp[j][k],
+ VP9_NMV_UPDATE_PROB);
+ }
+ }
+ for (j = 0; j < 3; ++j) {
+ update_nmv(bc, branch_ct_fp[i][j],
+ &cpi->common.fc.nmvc.comps[i].fp[j],
+ prob.comps[i].fp[j],
+ VP9_NMV_UPDATE_PROB);
+ }
+ }
+ if (usehp) {
+ for (i = 0; i < 2; ++i) {
+ update_nmv(bc, branch_ct_class0_hp[i],
+ &cpi->common.fc.nmvc.comps[i].class0_hp,
+ prob.comps[i].class0_hp,
+ VP9_NMV_UPDATE_PROB);
+ update_nmv(bc, branch_ct_hp[i],
+ &cpi->common.fc.nmvc.comps[i].hp,
+ prob.comps[i].hp,
+ VP9_NMV_UPDATE_PROB);
+ }
+ }
+}
+
+void vp9_encode_nmv(vp9_writer* const bc, const MV* const mv,
+ const MV* const ref, const nmv_context* const mvctx) {
+ MV_JOINT_TYPE j = vp9_get_mv_joint(*mv);
+ write_token(bc, vp9_mv_joint_tree, mvctx->joints,
+ vp9_mv_joint_encodings + j);
+ if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) {
+ encode_nmv_component(bc, mv->row, ref->col, &mvctx->comps[0]);
+ }
+ if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) {
+ encode_nmv_component(bc, mv->col, ref->col, &mvctx->comps[1]);
+ }
+}
+
+void vp9_encode_nmv_fp(vp9_writer* const bc, const MV* const mv,
+ const MV* const ref, const nmv_context* const mvctx,
+ int usehp) {
+ MV_JOINT_TYPE j = vp9_get_mv_joint(*mv);
+ usehp = usehp && vp9_use_nmv_hp(ref);
+ if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) {
+ encode_nmv_component_fp(bc, mv->row, ref->row, &mvctx->comps[0], usehp);
+ }
+ if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) {
+ encode_nmv_component_fp(bc, mv->col, ref->col, &mvctx->comps[1], usehp);
+ }
+}
+
+void vp9_build_nmv_cost_table(int *mvjoint,
+ int *mvcost[2],
+ const nmv_context* const mvctx,
+ int usehp,
+ int mvc_flag_v,
+ int mvc_flag_h) {
+ vp9_clear_system_state();
+ vp9_cost_tokens(mvjoint, mvctx->joints, vp9_mv_joint_tree);
+ if (mvc_flag_v)
+ build_nmv_component_cost_table(mvcost[0], &mvctx->comps[0], usehp);
+ if (mvc_flag_h)
+ build_nmv_component_cost_table(mvcost[1], &mvctx->comps[1], usehp);
+}
+
+void vp9_update_nmv_count(VP9_COMP *cpi, MACROBLOCK *x,
+ int_mv *best_ref_mv, int_mv *second_best_ref_mv) {
+ MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
+ MV mv;
+
+ if (mbmi->mode == SPLITMV) {
+ int i;
+
+ for (i = 0; i < x->partition_info->count; i++) {
+ if (x->partition_info->bmi[i].mode == NEW4X4) {
+ if (x->e_mbd.allow_high_precision_mv) {
+ mv.row = (x->partition_info->bmi[i].mv.as_mv.row
+ - best_ref_mv->as_mv.row);
+ mv.col = (x->partition_info->bmi[i].mv.as_mv.col
+ - best_ref_mv->as_mv.col);
+ vp9_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 1);
+ if (x->e_mbd.mode_info_context->mbmi.second_ref_frame > 0) {
+ mv.row = (x->partition_info->bmi[i].second_mv.as_mv.row
+ - second_best_ref_mv->as_mv.row);
+ mv.col = (x->partition_info->bmi[i].second_mv.as_mv.col
+ - second_best_ref_mv->as_mv.col);
+ vp9_increment_nmv(&mv, &second_best_ref_mv->as_mv,
+ &cpi->NMVcount, 1);
+ }
+ } else {
+ mv.row = (x->partition_info->bmi[i].mv.as_mv.row
+ - best_ref_mv->as_mv.row);
+ mv.col = (x->partition_info->bmi[i].mv.as_mv.col
+ - best_ref_mv->as_mv.col);
+ vp9_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 0);
+ if (x->e_mbd.mode_info_context->mbmi.second_ref_frame > 0) {
+ mv.row = (x->partition_info->bmi[i].second_mv.as_mv.row
+ - second_best_ref_mv->as_mv.row);
+ mv.col = (x->partition_info->bmi[i].second_mv.as_mv.col
+ - second_best_ref_mv->as_mv.col);
+ vp9_increment_nmv(&mv, &second_best_ref_mv->as_mv,
+ &cpi->NMVcount, 0);
+ }
+ }
+ }
+ }
+ } else if (mbmi->mode == NEWMV) {
+ if (x->e_mbd.allow_high_precision_mv) {
+ mv.row = (mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row);
+ mv.col = (mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col);
+ vp9_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 1);
+ if (mbmi->second_ref_frame > 0) {
+ mv.row = (mbmi->mv[1].as_mv.row - second_best_ref_mv->as_mv.row);
+ mv.col = (mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col);
+ vp9_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount, 1);
+ }
+ } else {
+ mv.row = (mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row);
+ mv.col = (mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col);
+ vp9_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 0);
+ if (mbmi->second_ref_frame > 0) {
+ mv.row = (mbmi->mv[1].as_mv.row - second_best_ref_mv->as_mv.row);
+ mv.col = (mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col);
+ vp9_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount, 0);
+ }
+ }
+ }
+}
diff --git a/vp9/encoder/vp9_encodemv.h b/vp9/encoder/vp9_encodemv.h
new file mode 100644
index 0000000..84cd6fb
--- /dev/null
+++ b/vp9/encoder/vp9_encodemv.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_ENCODER_VP9_ENCODEMV_H_
+#define VP9_ENCODER_VP9_ENCODEMV_H_
+
+#include "vp9/encoder/vp9_onyx_int.h"
+
+void vp9_write_nmv_probs(VP9_COMP* const, int usehp, vp9_writer* const);
+void vp9_encode_nmv(vp9_writer* const w, const MV* const mv,
+ const MV* const ref, const nmv_context* const mvctx);
+void vp9_encode_nmv_fp(vp9_writer* const w, const MV* const mv,
+ const MV* const ref, const nmv_context* const mvctx,
+ int usehp);
+void vp9_build_nmv_cost_table(int *mvjoint,
+ int *mvcost[2],
+ const nmv_context* const mvctx,
+ int usehp,
+ int mvc_flag_v,
+ int mvc_flag_h);
+void vp9_update_nmv_count(VP9_COMP *cpi, MACROBLOCK *x,
+ int_mv *best_ref_mv, int_mv *second_best_ref_mv);
+
+void print_nmvcounts(nmv_context_counts tnmvcounts);
+#endif
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
new file mode 100644
index 0000000..6eb022f
--- /dev/null
+++ b/vp9/encoder/vp9_firstpass.c
@@ -0,0 +1,2523 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "math.h"
+#include "limits.h"
+#include "vp9/encoder/vp9_block.h"
+#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_variance.h"
+#include "vp9/encoder/vp9_encodeintra.h"
+#include "vp9/common/vp9_setupintrarecon.h"
+#include "vp9/encoder/vp9_mcomp.h"
+#include "vp9/encoder/vp9_firstpass.h"
+#include "vpx_scale/vpxscale.h"
+#include "vp9/encoder/vp9_encodeframe.h"
+#include "vp9/encoder/vp9_encodemb.h"
+#include "vp9/common/vp9_extend.h"
+#include "vp9/common/vp9_systemdependent.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vp9/common/vp9_swapyv12buffer.h"
+#include <stdio.h>
+#include "vp9/encoder/vp9_quantize.h"
+#include "vp9/encoder/vp9_rdopt.h"
+#include "vp9/encoder/vp9_ratectrl.h"
+#include "vp9/common/vp9_quant_common.h"
+#include "vp9/common/vp9_entropymv.h"
+#include "vp9/encoder/vp9_encodemv.h"
+#include "./vpx_scale_rtcd.h"
+
+#define OUTPUT_FPF 0
+
+#define IIFACTOR 12.5
+#define IIKFACTOR1 12.5
+#define IIKFACTOR2 15.0
+#define RMAX 128.0
+#define GF_RMAX 96.0
+#define ERR_DIVISOR 150.0
+#define MIN_DECAY_FACTOR 0.1
+
+#define KF_MB_INTRA_MIN 150
+#define GF_MB_INTRA_MIN 100
+
+#define DOUBLE_DIVIDE_CHECK(X) ((X)<0?(X)-.000001:(X)+.000001)
+
+#define POW1 (double)cpi->oxcf.two_pass_vbrbias/100.0
+#define POW2 (double)cpi->oxcf.two_pass_vbrbias/100.0
+
+static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame);
+
+static int select_cq_level(int qindex) {
+ int ret_val = QINDEX_RANGE - 1;
+ int i;
+
+ double target_q = (vp9_convert_qindex_to_q(qindex) * 0.5847) + 1.0;
+
+ for (i = 0; i < QINDEX_RANGE; i++) {
+ if (target_q <= vp9_convert_qindex_to_q(i)) {
+ ret_val = i;
+ break;
+ }
+ }
+
+ return ret_val;
+}
+
+
+// Resets the first pass file to the given position using a relative seek from the current position
+static void reset_fpf_position(VP9_COMP *cpi, FIRSTPASS_STATS *Position) {
+ cpi->twopass.stats_in = Position;
+}
+
+static int lookup_next_frame_stats(VP9_COMP *cpi, FIRSTPASS_STATS *next_frame) {
+ if (cpi->twopass.stats_in >= cpi->twopass.stats_in_end)
+ return EOF;
+
+ *next_frame = *cpi->twopass.stats_in;
+ return 1;
+}
+
+// Read frame stats at an offset from the current position
+static int read_frame_stats(VP9_COMP *cpi,
+ FIRSTPASS_STATS *frame_stats,
+ int offset) {
+ FIRSTPASS_STATS *fps_ptr = cpi->twopass.stats_in;
+
+ // Check legality of offset
+ if (offset >= 0) {
+ if (&fps_ptr[offset] >= cpi->twopass.stats_in_end)
+ return EOF;
+ } else if (offset < 0) {
+ if (&fps_ptr[offset] < cpi->twopass.stats_in_start)
+ return EOF;
+ }
+
+ *frame_stats = fps_ptr[offset];
+ return 1;
+}
+
+static int input_stats(VP9_COMP *cpi, FIRSTPASS_STATS *fps) {
+ if (cpi->twopass.stats_in >= cpi->twopass.stats_in_end)
+ return EOF;
+
+ *fps = *cpi->twopass.stats_in;
+ cpi->twopass.stats_in =
+ (void *)((char *)cpi->twopass.stats_in + sizeof(FIRSTPASS_STATS));
+ return 1;
+}
+
+static void output_stats(const VP9_COMP *cpi,
+ struct vpx_codec_pkt_list *pktlist,
+ FIRSTPASS_STATS *stats) {
+ struct vpx_codec_cx_pkt pkt;
+ pkt.kind = VPX_CODEC_STATS_PKT;
+ pkt.data.twopass_stats.buf = stats;
+ pkt.data.twopass_stats.sz = sizeof(FIRSTPASS_STATS);
+ vpx_codec_pkt_list_add(pktlist, &pkt);
+
+// TEMP debug code
+#if OUTPUT_FPF
+
+ {
+ FILE *fpfile;
+ fpfile = fopen("firstpass.stt", "a");
+
+ fprintf(fpfile, "%12.0f %12.0f %12.0f %12.0f %12.0f %12.4f %12.4f"
+ "%12.4f %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f"
+ "%12.0f %12.0f %12.4f %12.0f %12.0f %12.4f\n",
+ stats->frame,
+ stats->intra_error,
+ stats->coded_error,
+ stats->sr_coded_error,
+ stats->ssim_weighted_pred_err,
+ stats->pcnt_inter,
+ stats->pcnt_motion,
+ stats->pcnt_second_ref,
+ stats->pcnt_neutral,
+ stats->MVr,
+ stats->mvr_abs,
+ stats->MVc,
+ stats->mvc_abs,
+ stats->MVrv,
+ stats->MVcv,
+ stats->mv_in_out_count,
+ stats->new_mv_count,
+ stats->count,
+ stats->duration);
+ fclose(fpfile);
+ }
+#endif
+}
+
+static void zero_stats(FIRSTPASS_STATS *section) {
+ section->frame = 0.0;
+ section->intra_error = 0.0;
+ section->coded_error = 0.0;
+ section->sr_coded_error = 0.0;
+ section->ssim_weighted_pred_err = 0.0;
+ section->pcnt_inter = 0.0;
+ section->pcnt_motion = 0.0;
+ section->pcnt_second_ref = 0.0;
+ section->pcnt_neutral = 0.0;
+ section->MVr = 0.0;
+ section->mvr_abs = 0.0;
+ section->MVc = 0.0;
+ section->mvc_abs = 0.0;
+ section->MVrv = 0.0;
+ section->MVcv = 0.0;
+ section->mv_in_out_count = 0.0;
+ section->new_mv_count = 0.0;
+ section->count = 0.0;
+ section->duration = 1.0;
+}
+
+static void accumulate_stats(FIRSTPASS_STATS *section, FIRSTPASS_STATS *frame) {
+ section->frame += frame->frame;
+ section->intra_error += frame->intra_error;
+ section->coded_error += frame->coded_error;
+ section->sr_coded_error += frame->sr_coded_error;
+ section->ssim_weighted_pred_err += frame->ssim_weighted_pred_err;
+ section->pcnt_inter += frame->pcnt_inter;
+ section->pcnt_motion += frame->pcnt_motion;
+ section->pcnt_second_ref += frame->pcnt_second_ref;
+ section->pcnt_neutral += frame->pcnt_neutral;
+ section->MVr += frame->MVr;
+ section->mvr_abs += frame->mvr_abs;
+ section->MVc += frame->MVc;
+ section->mvc_abs += frame->mvc_abs;
+ section->MVrv += frame->MVrv;
+ section->MVcv += frame->MVcv;
+ section->mv_in_out_count += frame->mv_in_out_count;
+ section->new_mv_count += frame->new_mv_count;
+ section->count += frame->count;
+ section->duration += frame->duration;
+}
+
+static void subtract_stats(FIRSTPASS_STATS *section, FIRSTPASS_STATS *frame) {
+ section->frame -= frame->frame;
+ section->intra_error -= frame->intra_error;
+ section->coded_error -= frame->coded_error;
+ section->sr_coded_error -= frame->sr_coded_error;
+ section->ssim_weighted_pred_err -= frame->ssim_weighted_pred_err;
+ section->pcnt_inter -= frame->pcnt_inter;
+ section->pcnt_motion -= frame->pcnt_motion;
+ section->pcnt_second_ref -= frame->pcnt_second_ref;
+ section->pcnt_neutral -= frame->pcnt_neutral;
+ section->MVr -= frame->MVr;
+ section->mvr_abs -= frame->mvr_abs;
+ section->MVc -= frame->MVc;
+ section->mvc_abs -= frame->mvc_abs;
+ section->MVrv -= frame->MVrv;
+ section->MVcv -= frame->MVcv;
+ section->mv_in_out_count -= frame->mv_in_out_count;
+ section->new_mv_count -= frame->new_mv_count;
+ section->count -= frame->count;
+ section->duration -= frame->duration;
+}
+
+static void avg_stats(FIRSTPASS_STATS *section) {
+ if (section->count < 1.0)
+ return;
+
+ section->intra_error /= section->count;
+ section->coded_error /= section->count;
+ section->sr_coded_error /= section->count;
+ section->ssim_weighted_pred_err /= section->count;
+ section->pcnt_inter /= section->count;
+ section->pcnt_second_ref /= section->count;
+ section->pcnt_neutral /= section->count;
+ section->pcnt_motion /= section->count;
+ section->MVr /= section->count;
+ section->mvr_abs /= section->count;
+ section->MVc /= section->count;
+ section->mvc_abs /= section->count;
+ section->MVrv /= section->count;
+ section->MVcv /= section->count;
+ section->mv_in_out_count /= section->count;
+ section->duration /= section->count;
+}
+
+// Calculate a modified Error used in distributing bits between easier and harder frames
+static double calculate_modified_err(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
+ double av_err = (cpi->twopass.total_stats->ssim_weighted_pred_err /
+ cpi->twopass.total_stats->count);
+ double this_err = this_frame->ssim_weighted_pred_err;
+ double modified_err;
+
+ if (this_err > av_err)
+ modified_err = av_err * pow((this_err / DOUBLE_DIVIDE_CHECK(av_err)), POW1);
+ else
+ modified_err = av_err * pow((this_err / DOUBLE_DIVIDE_CHECK(av_err)), POW2);
+
+ return modified_err;
+}
+
+static const double weight_table[256] = {
+ 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000,
+ 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000,
+ 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000,
+ 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000,
+ 0.020000, 0.031250, 0.062500, 0.093750, 0.125000, 0.156250, 0.187500, 0.218750,
+ 0.250000, 0.281250, 0.312500, 0.343750, 0.375000, 0.406250, 0.437500, 0.468750,
+ 0.500000, 0.531250, 0.562500, 0.593750, 0.625000, 0.656250, 0.687500, 0.718750,
+ 0.750000, 0.781250, 0.812500, 0.843750, 0.875000, 0.906250, 0.937500, 0.968750,
+ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
+ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
+ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
+ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
+ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
+ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
+ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
+ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
+ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
+ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
+ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
+ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
+ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
+ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
+ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
+ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
+ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
+ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
+ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
+ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
+ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
+ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
+ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
+ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000
+};
+
+static double simple_weight(YV12_BUFFER_CONFIG *source) {
+ int i, j;
+
+ unsigned char *src = source->y_buffer;
+ double sum_weights = 0.0;
+
+ // Loop throught the Y plane raw examining levels and creating a weight for the image
+ i = source->y_height;
+ do {
+ j = source->y_width;
+ do {
+ sum_weights += weight_table[ *src];
+ src++;
+ } while (--j);
+ src -= source->y_width;
+ src += source->y_stride;
+ } while (--i);
+
+ sum_weights /= (source->y_height * source->y_width);
+
+ return sum_weights;
+}
+
+
+// This function returns the current per frame maximum bitrate target
+static int frame_max_bits(VP9_COMP *cpi) {
+ // Max allocation for a single frame based on the max section guidelines passed in and how many bits are left
+ int max_bits;
+
+ // For VBR base this on the bits and frames left plus the two_pass_vbrmax_section rate passed in by the user
+ max_bits = (int)(((double)cpi->twopass.bits_left / (cpi->twopass.total_stats->count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0));
+
+ // Trap case where we are out of bits
+ if (max_bits < 0)
+ max_bits = 0;
+
+ return max_bits;
+}
+
+void vp9_init_first_pass(VP9_COMP *cpi) {
+ zero_stats(cpi->twopass.total_stats);
+}
+
+void vp9_end_first_pass(VP9_COMP *cpi) {
+ output_stats(cpi, cpi->output_pkt_list, cpi->twopass.total_stats);
+}
+
+static void zz_motion_search(VP9_COMP *cpi, MACROBLOCK *x, YV12_BUFFER_CONFIG *recon_buffer, int *best_motion_err, int recon_yoffset) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ BLOCK *b = &x->block[0];
+ BLOCKD *d = &x->e_mbd.block[0];
+
+ unsigned char *src_ptr = (*(b->base_src) + b->src);
+ int src_stride = b->src_stride;
+ unsigned char *ref_ptr;
+ int ref_stride = d->pre_stride;
+
+ // Set up pointers for this macro block recon buffer
+ xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
+
+ ref_ptr = (unsigned char *)(*(d->base_pre) + d->pre);
+
+ vp9_mse16x16(src_ptr, src_stride, ref_ptr, ref_stride,
+ (unsigned int *)(best_motion_err));
+}
+
+static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
+ int_mv *ref_mv, MV *best_mv,
+ YV12_BUFFER_CONFIG *recon_buffer,
+ int *best_motion_err, int recon_yoffset) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ BLOCK *b = &x->block[0];
+ BLOCKD *d = &x->e_mbd.block[0];
+ int num00;
+
+ int_mv tmp_mv;
+ int_mv ref_mv_full;
+
+ int tmp_err;
+ int step_param = 3;
+ int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
+ int n;
+ vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16];
+ int new_mv_mode_penalty = 256;
+
+ // override the default variance function to use MSE
+ v_fn_ptr.vf = vp9_mse16x16;
+
+ // Set up pointers for this macro block recon buffer
+ xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
+
+ // Initial step/diamond search centred on best mv
+ tmp_mv.as_int = 0;
+ ref_mv_full.as_mv.col = ref_mv->as_mv.col >> 3;
+ ref_mv_full.as_mv.row = ref_mv->as_mv.row >> 3;
+ tmp_err = cpi->diamond_search_sad(x, b, d, &ref_mv_full, &tmp_mv, step_param,
+ x->sadperbit16, &num00, &v_fn_ptr,
+ x->nmvjointcost,
+ x->mvcost, ref_mv);
+ if (tmp_err < INT_MAX - new_mv_mode_penalty)
+ tmp_err += new_mv_mode_penalty;
+
+ if (tmp_err < *best_motion_err) {
+ *best_motion_err = tmp_err;
+ best_mv->row = tmp_mv.as_mv.row;
+ best_mv->col = tmp_mv.as_mv.col;
+ }
+
+ // Further step/diamond searches as necessary
+ n = num00;
+ num00 = 0;
+
+ while (n < further_steps) {
+ n++;
+
+ if (num00)
+ num00--;
+ else {
+ tmp_err = cpi->diamond_search_sad(x, b, d, &ref_mv_full, &tmp_mv,
+ step_param + n, x->sadperbit16,
+ &num00, &v_fn_ptr,
+ x->nmvjointcost,
+ x->mvcost, ref_mv);
+ if (tmp_err < INT_MAX - new_mv_mode_penalty)
+ tmp_err += new_mv_mode_penalty;
+
+ if (tmp_err < *best_motion_err) {
+ *best_motion_err = tmp_err;
+ best_mv->row = tmp_mv.as_mv.row;
+ best_mv->col = tmp_mv.as_mv.col;
+ }
+ }
+ }
+}
+
+void vp9_first_pass(VP9_COMP *cpi) {
+ int mb_row, mb_col;
+ MACROBLOCK *const x = &cpi->mb;
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ int recon_yoffset, recon_uvoffset;
+ YV12_BUFFER_CONFIG *lst_yv12 = &cm->yv12_fb[cm->lst_fb_idx];
+ YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx];
+ YV12_BUFFER_CONFIG *gld_yv12 = &cm->yv12_fb[cm->gld_fb_idx];
+ int recon_y_stride = lst_yv12->y_stride;
+ int recon_uv_stride = lst_yv12->uv_stride;
+ int64_t intra_error = 0;
+ int64_t coded_error = 0;
+ int64_t sr_coded_error = 0;
+
+ int sum_mvr = 0, sum_mvc = 0;
+ int sum_mvr_abs = 0, sum_mvc_abs = 0;
+ int sum_mvrs = 0, sum_mvcs = 0;
+ int mvcount = 0;
+ int intercount = 0;
+ int second_ref_count = 0;
+ int intrapenalty = 256;
+ int neutral_count = 0;
+ int new_mv_count = 0;
+ int sum_in_vectors = 0;
+ uint32_t lastmv_as_int = 0;
+
+ int_mv zero_ref_mv;
+
+ zero_ref_mv.as_int = 0;
+
+ vp9_clear_system_state(); // __asm emms;
+
+ x->src = * cpi->Source;
+ xd->pre = *lst_yv12;
+ xd->dst = *new_yv12;
+
+ x->partition_info = x->pi;
+
+ xd->mode_info_context = cm->mi;
+
+ vp9_build_block_offsets(x);
+
+ vp9_setup_block_dptrs(&x->e_mbd);
+
+ vp9_setup_block_ptrs(x);
+
+ // set up frame new frame for intra coded blocks
+ vp9_setup_intra_recon(new_yv12);
+ vp9_frame_init_quantizer(cpi);
+
+ // Initialise the MV cost table to the defaults
+ // if( cm->current_video_frame == 0)
+ // if ( 0 )
+ {
+ vp9_init_mv_probs(cm);
+ vp9_initialize_rd_consts(cpi, cm->base_qindex + cm->y1dc_delta_q);
+ }
+
+ // for each macroblock row in image
+ for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
+ int_mv best_ref_mv;
+
+ best_ref_mv.as_int = 0;
+
+ // reset above block coeffs
+ xd->up_available = (mb_row != 0);
+ recon_yoffset = (mb_row * recon_y_stride * 16);
+ recon_uvoffset = (mb_row * recon_uv_stride * 8);
+
+ // Set up limit values for motion vectors to prevent them extending outside the UMV borders
+ x->mv_row_min = -((mb_row * 16) + (VP9BORDERINPIXELS - 16));
+ x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
+ + (VP9BORDERINPIXELS - 16);
+
+
+ // for each macroblock col in image
+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
+ int this_error;
+ int gf_motion_error = INT_MAX;
+ int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
+
+ xd->dst.y_buffer = new_yv12->y_buffer + recon_yoffset;
+ xd->dst.u_buffer = new_yv12->u_buffer + recon_uvoffset;
+ xd->dst.v_buffer = new_yv12->v_buffer + recon_uvoffset;
+ xd->left_available = (mb_col != 0);
+
+#if !CONFIG_SUPERBLOCKS
+ // Copy current mb to a buffer
+ vp9_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
+#endif
+
+ // do intra 16x16 prediction
+ this_error = vp9_encode_intra(cpi, x, use_dc_pred);
+
+ // "intrapenalty" below deals with situations where the intra and inter error scores are very low (eg a plain black frame)
+ // We do not have special cases in first pass for 0,0 and nearest etc so all inter modes carry an overhead cost estimate fot the mv.
+ // When the error score is very low this causes us to pick all or lots of INTRA modes and throw lots of key frames.
+ // This penalty adds a cost matching that of a 0,0 mv to the intra case.
+ this_error += intrapenalty;
+
+ // Cumulative intra error total
+ intra_error += (int64_t)this_error;
+
+ // Set up limit values for motion vectors to prevent them extending outside the UMV borders
+ x->mv_col_min = -((mb_col * 16) + (VP9BORDERINPIXELS - 16));
+ x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
+ + (VP9BORDERINPIXELS - 16);
+
+ // Other than for the first frame do a motion search
+ if (cm->current_video_frame > 0) {
+ int tmp_err;
+ int motion_error = INT_MAX;
+ int_mv mv, tmp_mv;
+
+ // Simple 0,0 motion with no mv overhead
+ zz_motion_search(cpi, x, lst_yv12, &motion_error, recon_yoffset);
+ mv.as_int = tmp_mv.as_int = 0;
+
+ // Test last reference frame using the previous best mv as the
+ // starting point (best reference) for the search
+ first_pass_motion_search(cpi, x, &best_ref_mv,
+ &mv.as_mv, lst_yv12,
+ &motion_error, recon_yoffset);
+
+ // If the current best reference mv is not centred on 0,0 then do a 0,0 based search as well
+ if (best_ref_mv.as_int) {
+ tmp_err = INT_MAX;
+ first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv.as_mv,
+ lst_yv12, &tmp_err, recon_yoffset);
+
+ if (tmp_err < motion_error) {
+ motion_error = tmp_err;
+ mv.as_int = tmp_mv.as_int;
+ }
+ }
+
+ // Experimental search in an older reference frame
+ if (cm->current_video_frame > 1) {
+ // Simple 0,0 motion with no mv overhead
+ zz_motion_search(cpi, x, gld_yv12,
+ &gf_motion_error, recon_yoffset);
+
+ first_pass_motion_search(cpi, x, &zero_ref_mv,
+ &tmp_mv.as_mv, gld_yv12,
+ &gf_motion_error, recon_yoffset);
+
+ if ((gf_motion_error < motion_error) &&
+ (gf_motion_error < this_error)) {
+ second_ref_count++;
+ }
+
+ // Reset to last frame as reference buffer
+ xd->pre.y_buffer = lst_yv12->y_buffer + recon_yoffset;
+ xd->pre.u_buffer = lst_yv12->u_buffer + recon_uvoffset;
+ xd->pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset;
+
+ // In accumulating a score for the older reference frame
+ // take the best of the motion predicted score and
+ // the intra coded error (just as will be done for)
+ // accumulation of "coded_error" for the last frame.
+ if (gf_motion_error < this_error)
+ sr_coded_error += gf_motion_error;
+ else
+ sr_coded_error += this_error;
+ } else
+ sr_coded_error += motion_error;
+
+ /* Intra assumed best */
+ best_ref_mv.as_int = 0;
+
+ if (motion_error <= this_error) {
+ // Keep a count of cases where the inter and intra were
+ // very close and very low. This helps with scene cut
+ // detection for example in cropped clips with black bars
+ // at the sides or top and bottom.
+ if ((((this_error - intrapenalty) * 9) <=
+ (motion_error * 10)) &&
+ (this_error < (2 * intrapenalty))) {
+ neutral_count++;
+ }
+
+ mv.as_mv.row <<= 3;
+ mv.as_mv.col <<= 3;
+ this_error = motion_error;
+ vp9_set_mbmode_and_mvs(x, NEWMV, &mv);
+ xd->mode_info_context->mbmi.txfm_size = TX_4X4;
+ vp9_encode_inter16x16y(x);
+ sum_mvr += mv.as_mv.row;
+ sum_mvr_abs += abs(mv.as_mv.row);
+ sum_mvc += mv.as_mv.col;
+ sum_mvc_abs += abs(mv.as_mv.col);
+ sum_mvrs += mv.as_mv.row * mv.as_mv.row;
+ sum_mvcs += mv.as_mv.col * mv.as_mv.col;
+ intercount++;
+
+ best_ref_mv.as_int = mv.as_int;
+
+ // Was the vector non-zero
+ if (mv.as_int) {
+ mvcount++;
+
+ // Was it different from the last non zero vector
+ if (mv.as_int != lastmv_as_int)
+ new_mv_count++;
+ lastmv_as_int = mv.as_int;
+
+ // Does the Row vector point inwards or outwards
+ if (mb_row < cm->mb_rows / 2) {
+ if (mv.as_mv.row > 0)
+ sum_in_vectors--;
+ else if (mv.as_mv.row < 0)
+ sum_in_vectors++;
+ } else if (mb_row > cm->mb_rows / 2) {
+ if (mv.as_mv.row > 0)
+ sum_in_vectors++;
+ else if (mv.as_mv.row < 0)
+ sum_in_vectors--;
+ }
+
+ // Does the Row vector point inwards or outwards
+ if (mb_col < cm->mb_cols / 2) {
+ if (mv.as_mv.col > 0)
+ sum_in_vectors--;
+ else if (mv.as_mv.col < 0)
+ sum_in_vectors++;
+ } else if (mb_col > cm->mb_cols / 2) {
+ if (mv.as_mv.col > 0)
+ sum_in_vectors++;
+ else if (mv.as_mv.col < 0)
+ sum_in_vectors--;
+ }
+ }
+ }
+ } else
+ sr_coded_error += (int64_t)this_error;
+
+ coded_error += (int64_t)this_error;
+
+ // adjust to the next column of macroblocks
+ x->src.y_buffer += 16;
+ x->src.u_buffer += 8;
+ x->src.v_buffer += 8;
+
+ recon_yoffset += 16;
+ recon_uvoffset += 8;
+ }
+
+ // adjust to the next row of mbs
+ x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
+ x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
+ x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
+
+ // extend the recon for intra prediction
+ vp9_extend_mb_row(new_yv12, xd->dst.y_buffer + 16,
+ xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
+ vp9_clear_system_state(); // __asm emms;
+ }
+
+ vp9_clear_system_state(); // __asm emms;
+ {
+ double weight = 0.0;
+
+ FIRSTPASS_STATS fps;
+
+ fps.frame = cm->current_video_frame;
+ fps.intra_error = (double)(intra_error >> 8);
+ fps.coded_error = (double)(coded_error >> 8);
+ fps.sr_coded_error = (double)(sr_coded_error >> 8);
+ weight = simple_weight(cpi->Source);
+
+
+ if (weight < 0.1)
+ weight = 0.1;
+
+ fps.ssim_weighted_pred_err = fps.coded_error * weight;
+
+ fps.pcnt_inter = 0.0;
+ fps.pcnt_motion = 0.0;
+ fps.MVr = 0.0;
+ fps.mvr_abs = 0.0;
+ fps.MVc = 0.0;
+ fps.mvc_abs = 0.0;
+ fps.MVrv = 0.0;
+ fps.MVcv = 0.0;
+ fps.mv_in_out_count = 0.0;
+ fps.new_mv_count = 0.0;
+ fps.count = 1.0;
+
+ fps.pcnt_inter = 1.0 * (double)intercount / cm->MBs;
+ fps.pcnt_second_ref = 1.0 * (double)second_ref_count / cm->MBs;
+ fps.pcnt_neutral = 1.0 * (double)neutral_count / cm->MBs;
+
+ if (mvcount > 0) {
+ fps.MVr = (double)sum_mvr / (double)mvcount;
+ fps.mvr_abs = (double)sum_mvr_abs / (double)mvcount;
+ fps.MVc = (double)sum_mvc / (double)mvcount;
+ fps.mvc_abs = (double)sum_mvc_abs / (double)mvcount;
+ fps.MVrv = ((double)sum_mvrs - (fps.MVr * fps.MVr / (double)mvcount)) / (double)mvcount;
+ fps.MVcv = ((double)sum_mvcs - (fps.MVc * fps.MVc / (double)mvcount)) / (double)mvcount;
+ fps.mv_in_out_count = (double)sum_in_vectors / (double)(mvcount * 2);
+ fps.new_mv_count = new_mv_count;
+
+ fps.pcnt_motion = 1.0 * (double)mvcount / cpi->common.MBs;
+ }
+
+ // TODO: handle the case when duration is set to 0, or something less
+ // than the full time between subsequent cpi->source_time_stamp s .
+ fps.duration = (double)(cpi->source->ts_end
+ - cpi->source->ts_start);
+
+ // don't want to do output stats with a stack variable!
+ memcpy(cpi->twopass.this_frame_stats,
+ &fps,
+ sizeof(FIRSTPASS_STATS));
+ output_stats(cpi, cpi->output_pkt_list, cpi->twopass.this_frame_stats);
+ accumulate_stats(cpi->twopass.total_stats, &fps);
+ }
+
+ // Copy the previous Last Frame back into gf and and arf buffers if
+ // the prediction is good enough... but also dont allow it to lag too far
+ if ((cpi->twopass.sr_update_lag > 3) ||
+ ((cm->current_video_frame > 0) &&
+ (cpi->twopass.this_frame_stats->pcnt_inter > 0.20) &&
+ ((cpi->twopass.this_frame_stats->intra_error /
+ cpi->twopass.this_frame_stats->coded_error) > 2.0))) {
+ vp8_yv12_copy_frame(lst_yv12, gld_yv12);
+ cpi->twopass.sr_update_lag = 1;
+ } else
+ cpi->twopass.sr_update_lag++;
+
+ // swap frame pointers so last frame refers to the frame we just compressed
+ vp9_swap_yv12_buffer(lst_yv12, new_yv12);
+ vp8_yv12_extend_frame_borders(lst_yv12);
+
+ // Special case for the first frame. Copy into the GF buffer as a second reference.
+ if (cm->current_video_frame == 0) {
+ vp8_yv12_copy_frame(lst_yv12, gld_yv12);
+ }
+
+
+ // use this to see what the first pass reconstruction looks like
+ if (0) {
+ char filename[512];
+ FILE *recon_file;
+ sprintf(filename, "enc%04d.yuv", (int) cm->current_video_frame);
+
+ if (cm->current_video_frame == 0)
+ recon_file = fopen(filename, "wb");
+ else
+ recon_file = fopen(filename, "ab");
+
+ (void)fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, recon_file);
+ fclose(recon_file);
+ }
+
+ cm->current_video_frame++;
+
+}
+
+// Estimate a cost per mb attributable to overheads such as the coding of
+// modes and motion vectors.
+// Currently simplistic in its assumptions for testing.
+//
+
+
+static double bitcost(double prob) {
+ return -(log(prob) / log(2.0));
+}
+
+static long long estimate_modemvcost(VP9_COMP *cpi,
+ FIRSTPASS_STATS *fpstats) {
+#if 0
+ int mv_cost;
+ int mode_cost;
+
+ double av_pct_inter = fpstats->pcnt_inter / fpstats->count;
+ double av_pct_motion = fpstats->pcnt_motion / fpstats->count;
+ double av_intra = (1.0 - av_pct_inter);
+
+ double zz_cost;
+ double motion_cost;
+ double intra_cost;
+
+ zz_cost = bitcost(av_pct_inter - av_pct_motion);
+ motion_cost = bitcost(av_pct_motion);
+ intra_cost = bitcost(av_intra);
+
+ // Estimate of extra bits per mv overhead for mbs
+ // << 9 is the normalization to the (bits * 512) used in vp9_bits_per_mb
+ mv_cost = ((int)(fpstats->new_mv_count / fpstats->count) * 8) << 9;
+
+ // Crude estimate of overhead cost from modes
+ // << 9 is the normalization to (bits * 512) used in vp9_bits_per_mb
+ mode_cost =
+ (int)((((av_pct_inter - av_pct_motion) * zz_cost) +
+ (av_pct_motion * motion_cost) +
+ (av_intra * intra_cost)) * cpi->common.MBs) << 9;
+
+ // return mv_cost + mode_cost;
+ // TODO PGW Fix overhead costs for extended Q range
+#endif
+ return 0;
+}
+
+static double calc_correction_factor(double err_per_mb,
+ double err_divisor,
+ double pt_low,
+ double pt_high,
+ int Q) {
+ double power_term;
+ double error_term = err_per_mb / err_divisor;
+ double correction_factor;
+
+ // Adjustment based on actual quantizer to power term.
+ power_term = (vp9_convert_qindex_to_q(Q) * 0.01) + pt_low;
+ power_term = (power_term > pt_high) ? pt_high : power_term;
+
+ // Adjustments to error term
+ // TBD
+
+ // Calculate correction factor
+ correction_factor = pow(error_term, power_term);
+
+ // Clip range
+ correction_factor =
+ (correction_factor < 0.05)
+ ? 0.05 : (correction_factor > 2.0) ? 2.0 : correction_factor;
+
+ return correction_factor;
+}
+
+// Given a current maxQ value sets a range for future values.
+// PGW TODO..
+// This code removes direct dependency on QIndex to determin the range
+// (now uses the actual quantizer) but has not been tuned.
+static void adjust_maxq_qrange(VP9_COMP *cpi) {
+ int i;
+ double q;
+
+ // Set the max corresponding to cpi->avg_q * 2.0
+ q = cpi->avg_q * 2.0;
+ cpi->twopass.maxq_max_limit = cpi->worst_quality;
+ for (i = cpi->best_quality; i <= cpi->worst_quality; i++) {
+ cpi->twopass.maxq_max_limit = i;
+ if (vp9_convert_qindex_to_q(i) >= q)
+ break;
+ }
+
+ // Set the min corresponding to cpi->avg_q * 0.5
+ q = cpi->avg_q * 0.5;
+ cpi->twopass.maxq_min_limit = cpi->best_quality;
+ for (i = cpi->worst_quality; i >= cpi->best_quality; i--) {
+ cpi->twopass.maxq_min_limit = i;
+ if (vp9_convert_qindex_to_q(i) <= q)
+ break;
+ }
+}
+
+static int estimate_max_q(VP9_COMP *cpi,
+ FIRSTPASS_STATS *fpstats,
+ int section_target_bandwitdh,
+ int overhead_bits) {
+ int Q;
+ int num_mbs = cpi->common.MBs;
+ int target_norm_bits_per_mb;
+
+ double section_err = (fpstats->coded_error / fpstats->count);
+ double sr_err_diff;
+ double sr_correction;
+ double err_per_mb = section_err / num_mbs;
+ double err_correction_factor;
+ double speed_correction = 1.0;
+ double overhead_bits_per_mb;
+
+ if (section_target_bandwitdh <= 0)
+ return cpi->twopass.maxq_max_limit; // Highest value allowed
+
+ target_norm_bits_per_mb =
+ (section_target_bandwitdh < (1 << 20))
+ ? (512 * section_target_bandwitdh) / num_mbs
+ : 512 * (section_target_bandwitdh / num_mbs);
+
+ // Look at the drop in prediction quality between the last frame
+ // and the GF buffer (which contained an older frame).
+ sr_err_diff =
+ (fpstats->sr_coded_error - fpstats->coded_error) /
+ (fpstats->count * cpi->common.MBs);
+ sr_correction = (sr_err_diff / 32.0);
+ sr_correction = pow(sr_correction, 0.25);
+ if (sr_correction < 0.75)
+ sr_correction = 0.75;
+ else if (sr_correction > 1.25)
+ sr_correction = 1.25;
+
+ // Calculate a corrective factor based on a rolling ratio of bits spent
+ // vs target bits
+ if ((cpi->rolling_target_bits > 0) &&
+ (cpi->active_worst_quality < cpi->worst_quality)) {
+ double rolling_ratio;
+
+ rolling_ratio = (double)cpi->rolling_actual_bits /
+ (double)cpi->rolling_target_bits;
+
+ if (rolling_ratio < 0.95)
+ cpi->twopass.est_max_qcorrection_factor -= 0.005;
+ else if (rolling_ratio > 1.05)
+ cpi->twopass.est_max_qcorrection_factor += 0.005;
+
+ cpi->twopass.est_max_qcorrection_factor =
+ (cpi->twopass.est_max_qcorrection_factor < 0.1)
+ ? 0.1
+ : (cpi->twopass.est_max_qcorrection_factor > 10.0)
+ ? 10.0 : cpi->twopass.est_max_qcorrection_factor;
+ }
+
+ // Corrections for higher compression speed settings
+ // (reduced compression expected)
+ if (cpi->compressor_speed == 1) {
+ if (cpi->oxcf.cpu_used <= 5)
+ speed_correction = 1.04 + (cpi->oxcf.cpu_used * 0.04);
+ else
+ speed_correction = 1.25;
+ }
+
+ // Estimate of overhead bits per mb
+ // Correction to overhead bits for min allowed Q.
+ // PGW TODO.. This code is broken for the extended Q range
+ // for now overhead set to 0.
+ overhead_bits_per_mb = overhead_bits / num_mbs;
+ overhead_bits_per_mb *= pow(0.98, (double)cpi->twopass.maxq_min_limit);
+
+ // Try and pick a max Q that will be high enough to encode the
+ // content at the given rate.
+ for (Q = cpi->twopass.maxq_min_limit; Q < cpi->twopass.maxq_max_limit; Q++) {
+ int bits_per_mb_at_this_q;
+
+ err_correction_factor =
+ calc_correction_factor(err_per_mb, ERR_DIVISOR, 0.4, 0.90, Q) *
+ sr_correction * speed_correction *
+ cpi->twopass.est_max_qcorrection_factor;
+
+ if (err_correction_factor < 0.05)
+ err_correction_factor = 0.05;
+ else if (err_correction_factor > 5.0)
+ err_correction_factor = 5.0;
+
+ bits_per_mb_at_this_q =
+ vp9_bits_per_mb(INTER_FRAME, Q) + (int)overhead_bits_per_mb;
+
+ bits_per_mb_at_this_q = (int)(.5 + err_correction_factor *
+ (double)bits_per_mb_at_this_q);
+
+ // Mode and motion overhead
+ // As Q rises in real encode loop rd code will force overhead down
+ // We make a crude adjustment for this here as *.98 per Q step.
+ // PGW TODO.. This code is broken for the extended Q range
+ // for now overhead set to 0.
+ // overhead_bits_per_mb = (int)((double)overhead_bits_per_mb * 0.98);
+
+ if (bits_per_mb_at_this_q <= target_norm_bits_per_mb)
+ break;
+ }
+
+ // Restriction on active max q for constrained quality mode.
+ if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) &&
+ (Q < cpi->cq_target_quality)) {
+ Q = cpi->cq_target_quality;
+ }
+
+ // Adjust maxq_min_limit and maxq_max_limit limits based on
+ // averaga q observed in clip for non kf/gf/arf frames
+ // Give average a chance to settle though.
+ // PGW TODO.. This code is broken for the extended Q range
+ if ((cpi->ni_frames >
+ ((int)cpi->twopass.total_stats->count >> 8)) &&
+ (cpi->ni_frames > 150)) {
+ adjust_maxq_qrange(cpi);
+ }
+
+ return Q;
+}
+
+// For cq mode estimate a cq level that matches the observed
+// complexity and data rate.
+static int estimate_cq(VP9_COMP *cpi,
+ FIRSTPASS_STATS *fpstats,
+ int section_target_bandwitdh,
+ int overhead_bits) {
+ int Q;
+ int num_mbs = cpi->common.MBs;
+ int target_norm_bits_per_mb;
+
+ double section_err = (fpstats->coded_error / fpstats->count);
+ double err_per_mb = section_err / num_mbs;
+ double err_correction_factor;
+ double sr_err_diff;
+ double sr_correction;
+ double speed_correction = 1.0;
+ double clip_iiratio;
+ double clip_iifactor;
+ double overhead_bits_per_mb;
+
+
+ target_norm_bits_per_mb = (section_target_bandwitdh < (1 << 20))
+ ? (512 * section_target_bandwitdh) / num_mbs
+ : 512 * (section_target_bandwitdh / num_mbs);
+
+ // Estimate of overhead bits per mb
+ overhead_bits_per_mb = overhead_bits / num_mbs;
+
+ // Corrections for higher compression speed settings
+ // (reduced compression expected)
+ if (cpi->compressor_speed == 1) {
+ if (cpi->oxcf.cpu_used <= 5)
+ speed_correction = 1.04 + (cpi->oxcf.cpu_used * 0.04);
+ else
+ speed_correction = 1.25;
+ }
+
+ // Look at the drop in prediction quality between the last frame
+ // and the GF buffer (which contained an older frame).
+ sr_err_diff =
+ (fpstats->sr_coded_error - fpstats->coded_error) /
+ (fpstats->count * cpi->common.MBs);
+ sr_correction = (sr_err_diff / 32.0);
+ sr_correction = pow(sr_correction, 0.25);
+ if (sr_correction < 0.75)
+ sr_correction = 0.75;
+ else if (sr_correction > 1.25)
+ sr_correction = 1.25;
+
+ // II ratio correction factor for clip as a whole
+ clip_iiratio = cpi->twopass.total_stats->intra_error /
+ DOUBLE_DIVIDE_CHECK(cpi->twopass.total_stats->coded_error);
+ clip_iifactor = 1.0 - ((clip_iiratio - 10.0) * 0.025);
+ if (clip_iifactor < 0.80)
+ clip_iifactor = 0.80;
+
+ // Try and pick a Q that can encode the content at the given rate.
+ for (Q = 0; Q < MAXQ; Q++) {
+ int bits_per_mb_at_this_q;
+
+ // Error per MB based correction factor
+ err_correction_factor =
+ calc_correction_factor(err_per_mb, 100.0, 0.4, 0.90, Q) *
+ sr_correction * speed_correction * clip_iifactor;
+
+ if (err_correction_factor < 0.05)
+ err_correction_factor = 0.05;
+ else if (err_correction_factor > 5.0)
+ err_correction_factor = 5.0;
+
+ bits_per_mb_at_this_q =
+ vp9_bits_per_mb(INTER_FRAME, Q) + (int)overhead_bits_per_mb;
+
+ bits_per_mb_at_this_q = (int)(.5 + err_correction_factor *
+ (double)bits_per_mb_at_this_q);
+
+ // Mode and motion overhead
+ // As Q rises in real encode loop rd code will force overhead down
+ // We make a crude adjustment for this here as *.98 per Q step.
+ // PGW TODO.. This code is broken for the extended Q range
+ // for now overhead set to 0.
+ overhead_bits_per_mb = (int)((double)overhead_bits_per_mb * 0.98);
+
+ if (bits_per_mb_at_this_q <= target_norm_bits_per_mb)
+ break;
+ }
+
+ // Clip value to range "best allowed to (worst allowed - 1)"
+ Q = select_cq_level(Q);
+ if (Q >= cpi->worst_quality)
+ Q = cpi->worst_quality - 1;
+ if (Q < cpi->best_quality)
+ Q = cpi->best_quality;
+
+ return Q;
+}
+
+
+extern void vp9_new_frame_rate(VP9_COMP *cpi, double framerate);
+
+void vp9_init_second_pass(VP9_COMP *cpi) {
+ FIRSTPASS_STATS this_frame;
+ FIRSTPASS_STATS *start_pos;
+
+ double lower_bounds_min_rate = FRAME_OVERHEAD_BITS * cpi->oxcf.frame_rate;
+ double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth
+ * cpi->oxcf.two_pass_vbrmin_section / 100);
+
+ if (two_pass_min_rate < lower_bounds_min_rate)
+ two_pass_min_rate = lower_bounds_min_rate;
+
+ zero_stats(cpi->twopass.total_stats);
+ zero_stats(cpi->twopass.total_left_stats);
+
+ if (!cpi->twopass.stats_in_end)
+ return;
+
+ *cpi->twopass.total_stats = *cpi->twopass.stats_in_end;
+ *cpi->twopass.total_left_stats = *cpi->twopass.total_stats;
+
+ // each frame can have a different duration, as the frame rate in the source
+ // isn't guaranteed to be constant. The frame rate prior to the first frame
+ // encoded in the second pass is a guess. However the sum duration is not.
+ // Its calculated based on the actual durations of all frames from the first
+ // pass.
+ vp9_new_frame_rate(cpi,
+ 10000000.0 * cpi->twopass.total_stats->count /
+ cpi->twopass.total_stats->duration);
+
+ cpi->output_frame_rate = cpi->oxcf.frame_rate;
+ cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats->duration *
+ cpi->oxcf.target_bandwidth / 10000000.0);
+ cpi->twopass.bits_left -= (int64_t)(cpi->twopass.total_stats->duration *
+ two_pass_min_rate / 10000000.0);
+
+ // Calculate a minimum intra value to be used in determining the IIratio
+ // scores used in the second pass. We have this minimum to make sure
+ // that clips that are static but "low complexity" in the intra domain
+ // are still boosted appropriately for KF/GF/ARF
+ cpi->twopass.kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs;
+ cpi->twopass.gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs;
+
+ // This variable monitors how far behind the second ref update is lagging
+ cpi->twopass.sr_update_lag = 1;
+
+ // Scan the first pass file and calculate an average Intra / Inter error score ratio for the sequence
+ {
+ double sum_iiratio = 0.0;
+ double IIRatio;
+
+ start_pos = cpi->twopass.stats_in; // Note starting "file" position
+
+ while (input_stats(cpi, &this_frame) != EOF) {
+ IIRatio = this_frame.intra_error / DOUBLE_DIVIDE_CHECK(this_frame.coded_error);
+ IIRatio = (IIRatio < 1.0) ? 1.0 : (IIRatio > 20.0) ? 20.0 : IIRatio;
+ sum_iiratio += IIRatio;
+ }
+
+ cpi->twopass.avg_iiratio = sum_iiratio / DOUBLE_DIVIDE_CHECK((double)cpi->twopass.total_stats->count);
+
+ // Reset file position
+ reset_fpf_position(cpi, start_pos);
+ }
+
+ // Scan the first pass file and calculate a modified total error based upon the bias/power function
+ // used to allocate bits
+ {
+ start_pos = cpi->twopass.stats_in; // Note starting "file" position
+
+ cpi->twopass.modified_error_total = 0.0;
+ cpi->twopass.modified_error_used = 0.0;
+
+ while (input_stats(cpi, &this_frame) != EOF) {
+ cpi->twopass.modified_error_total += calculate_modified_err(cpi, &this_frame);
+ }
+ cpi->twopass.modified_error_left = cpi->twopass.modified_error_total;
+
+ reset_fpf_position(cpi, start_pos); // Reset file position
+
+ }
+}
+
+void vp9_end_second_pass(VP9_COMP *cpi) {
+}
+
+// This function gives and estimate of how badly we believe
+// the prediction quality is decaying from frame to frame.
+static double get_prediction_decay_rate(VP9_COMP *cpi,
+ FIRSTPASS_STATS *next_frame) {
+ double prediction_decay_rate;
+ double second_ref_decay;
+ double mb_sr_err_diff;
+
+ // Initial basis is the % mbs inter coded
+ prediction_decay_rate = next_frame->pcnt_inter;
+
+ // Look at the observed drop in prediction quality between the last frame
+ // and the GF buffer (which contains an older frame).
+ mb_sr_err_diff =
+ (next_frame->sr_coded_error - next_frame->coded_error) /
+ (cpi->common.MBs);
+ second_ref_decay = 1.0 - (mb_sr_err_diff / 512.0);
+ second_ref_decay = pow(second_ref_decay, 0.5);
+ if (second_ref_decay < 0.85)
+ second_ref_decay = 0.85;
+ else if (second_ref_decay > 1.0)
+ second_ref_decay = 1.0;
+
+ if (second_ref_decay < prediction_decay_rate)
+ prediction_decay_rate = second_ref_decay;
+
+ return prediction_decay_rate;
+}
+
+// Function to test for a condition where a complex transition is followed
+// by a static section. For example in slide shows where there is a fade
+// between slides. This is to help with more optimal kf and gf positioning.
+static int detect_transition_to_still(
+ VP9_COMP *cpi,
+ int frame_interval,
+ int still_interval,
+ double loop_decay_rate,
+ double last_decay_rate) {
+ BOOL trans_to_still = FALSE;
+
+ // Break clause to detect very still sections after motion
+ // For example a static image after a fade or other transition
+ // instead of a clean scene cut.
+ if ((frame_interval > MIN_GF_INTERVAL) &&
+ (loop_decay_rate >= 0.999) &&
+ (last_decay_rate < 0.9)) {
+ int j;
+ FIRSTPASS_STATS *position = cpi->twopass.stats_in;
+ FIRSTPASS_STATS tmp_next_frame;
+ double zz_inter;
+
+ // Look ahead a few frames to see if static condition
+ // persists...
+ for (j = 0; j < still_interval; j++) {
+ if (EOF == input_stats(cpi, &tmp_next_frame))
+ break;
+
+ zz_inter =
+ (tmp_next_frame.pcnt_inter - tmp_next_frame.pcnt_motion);
+ if (zz_inter < 0.999)
+ break;
+ }
+ // Reset file position
+ reset_fpf_position(cpi, position);
+
+ // Only if it does do we signal a transition to still
+ if (j == still_interval)
+ trans_to_still = TRUE;
+ }
+
+ return trans_to_still;
+}
+
+// This function detects a flash through the high relative pcnt_second_ref
+// score in the frame following a flash frame. The offset passed in should
+// reflect this
+static BOOL detect_flash(VP9_COMP *cpi, int offset) {
+ FIRSTPASS_STATS next_frame;
+
+ BOOL flash_detected = FALSE;
+
+ // Read the frame data.
+ // The return is FALSE (no flash detected) if not a valid frame
+ if (read_frame_stats(cpi, &next_frame, offset) != EOF) {
+ // What we are looking for here is a situation where there is a
+ // brief break in prediction (such as a flash) but subsequent frames
+ // are reasonably well predicted by an earlier (pre flash) frame.
+ // The recovery after a flash is indicated by a high pcnt_second_ref
+ // comapred to pcnt_inter.
+ if ((next_frame.pcnt_second_ref > next_frame.pcnt_inter) &&
+ (next_frame.pcnt_second_ref >= 0.5)) {
+ flash_detected = TRUE;
+ }
+ }
+
+ return flash_detected;
+}
+
+// Update the motion related elements to the GF arf boost calculation
+static void accumulate_frame_motion_stats(
+ VP9_COMP *cpi,
+ FIRSTPASS_STATS *this_frame,
+ double *this_frame_mv_in_out,
+ double *mv_in_out_accumulator,
+ double *abs_mv_in_out_accumulator,
+ double *mv_ratio_accumulator) {
+ // double this_frame_mv_in_out;
+ double this_frame_mvr_ratio;
+ double this_frame_mvc_ratio;
+ double motion_pct;
+
+ // Accumulate motion stats.
+ motion_pct = this_frame->pcnt_motion;
+
+ // Accumulate Motion In/Out of frame stats
+ *this_frame_mv_in_out = this_frame->mv_in_out_count * motion_pct;
+ *mv_in_out_accumulator += this_frame->mv_in_out_count * motion_pct;
+ *abs_mv_in_out_accumulator +=
+ fabs(this_frame->mv_in_out_count * motion_pct);
+
+ // Accumulate a measure of how uniform (or conversely how random)
+ // the motion field is. (A ratio of absmv / mv)
+ if (motion_pct > 0.05) {
+ this_frame_mvr_ratio = fabs(this_frame->mvr_abs) /
+ DOUBLE_DIVIDE_CHECK(fabs(this_frame->MVr));
+
+ this_frame_mvc_ratio = fabs(this_frame->mvc_abs) /
+ DOUBLE_DIVIDE_CHECK(fabs(this_frame->MVc));
+
+ *mv_ratio_accumulator +=
+ (this_frame_mvr_ratio < this_frame->mvr_abs)
+ ? (this_frame_mvr_ratio * motion_pct)
+ : this_frame->mvr_abs * motion_pct;
+
+ *mv_ratio_accumulator +=
+ (this_frame_mvc_ratio < this_frame->mvc_abs)
+ ? (this_frame_mvc_ratio * motion_pct)
+ : this_frame->mvc_abs * motion_pct;
+
+ }
+}
+
+// Calculate a baseline boost number for the current frame.
+static double calc_frame_boost(
+ VP9_COMP *cpi,
+ FIRSTPASS_STATS *this_frame,
+ double this_frame_mv_in_out) {
+ double frame_boost;
+
+ // Underlying boost factor is based on inter intra error ratio
+ if (this_frame->intra_error > cpi->twopass.gf_intra_err_min)
+ frame_boost = (IIFACTOR * this_frame->intra_error /
+ DOUBLE_DIVIDE_CHECK(this_frame->coded_error));
+ else
+ frame_boost = (IIFACTOR * cpi->twopass.gf_intra_err_min /
+ DOUBLE_DIVIDE_CHECK(this_frame->coded_error));
+
+ // Increase boost for frames where new data coming into frame
+ // (eg zoom out). Slightly reduce boost if there is a net balance
+ // of motion out of the frame (zoom in).
+ // The range for this_frame_mv_in_out is -1.0 to +1.0
+ if (this_frame_mv_in_out > 0.0)
+ frame_boost += frame_boost * (this_frame_mv_in_out * 2.0);
+ // In extreme case boost is halved
+ else
+ frame_boost += frame_boost * (this_frame_mv_in_out / 2.0);
+
+ // Clip to maximum
+ if (frame_boost > GF_RMAX)
+ frame_boost = GF_RMAX;
+
+ return frame_boost;
+}
+
+static int calc_arf_boost(
+ VP9_COMP *cpi,
+ int offset,
+ int f_frames,
+ int b_frames,
+ int *f_boost,
+ int *b_boost) {
+ FIRSTPASS_STATS this_frame;
+
+ int i;
+ double boost_score = 0.0;
+ double mv_ratio_accumulator = 0.0;
+ double decay_accumulator = 1.0;
+ double this_frame_mv_in_out = 0.0;
+ double mv_in_out_accumulator = 0.0;
+ double abs_mv_in_out_accumulator = 0.0;
+ int arf_boost;
+ BOOL flash_detected = FALSE;
+
+ // Search forward from the proposed arf/next gf position
+ for (i = 0; i < f_frames; i++) {
+ if (read_frame_stats(cpi, &this_frame, (i + offset)) == EOF)
+ break;
+
+ // Update the motion related elements to the boost calculation
+ accumulate_frame_motion_stats(cpi, &this_frame,
+ &this_frame_mv_in_out, &mv_in_out_accumulator,
+ &abs_mv_in_out_accumulator, &mv_ratio_accumulator);
+
+ // We want to discount the the flash frame itself and the recovery
+ // frame that follows as both will have poor scores.
+ flash_detected = detect_flash(cpi, (i + offset)) ||
+ detect_flash(cpi, (i + offset + 1));
+
+ // Cumulative effect of prediction quality decay
+ if (!flash_detected) {
+ decay_accumulator =
+ decay_accumulator * get_prediction_decay_rate(cpi, &this_frame);
+ decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
+ ? MIN_DECAY_FACTOR : decay_accumulator;
+ }
+
+ boost_score += (decay_accumulator *
+ calc_frame_boost(cpi, &this_frame, this_frame_mv_in_out));
+ }
+
+ *f_boost = (int)boost_score;
+
+ // Reset for backward looking loop
+ boost_score = 0.0;
+ mv_ratio_accumulator = 0.0;
+ decay_accumulator = 1.0;
+ this_frame_mv_in_out = 0.0;
+ mv_in_out_accumulator = 0.0;
+ abs_mv_in_out_accumulator = 0.0;
+
+ // Search backward towards last gf position
+ for (i = -1; i >= -b_frames; i--) {
+ if (read_frame_stats(cpi, &this_frame, (i + offset)) == EOF)
+ break;
+
+ // Update the motion related elements to the boost calculation
+ accumulate_frame_motion_stats(cpi, &this_frame,
+ &this_frame_mv_in_out, &mv_in_out_accumulator,
+ &abs_mv_in_out_accumulator, &mv_ratio_accumulator);
+
+ // We want to discount the the flash frame itself and the recovery
+ // frame that follows as both will have poor scores.
+ flash_detected = detect_flash(cpi, (i + offset)) ||
+ detect_flash(cpi, (i + offset + 1));
+
+ // Cumulative effect of prediction quality decay
+ if (!flash_detected) {
+ decay_accumulator =
+ decay_accumulator * get_prediction_decay_rate(cpi, &this_frame);
+ decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
+ ? MIN_DECAY_FACTOR : decay_accumulator;
+ }
+
+ boost_score += (decay_accumulator *
+ calc_frame_boost(cpi, &this_frame, this_frame_mv_in_out));
+
+ }
+ *b_boost = (int)boost_score;
+
+ arf_boost = (*f_boost + *b_boost);
+ if (arf_boost < ((b_frames + f_frames) * 20))
+ arf_boost = ((b_frames + f_frames) * 20);
+
+ return arf_boost;
+}
+
+static void configure_arnr_filter(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
+ int half_gf_int;
+ int frames_after_arf;
+ int frames_bwd = cpi->oxcf.arnr_max_frames - 1;
+ int frames_fwd = cpi->oxcf.arnr_max_frames - 1;
+
+ // Define the arnr filter width for this group of frames:
+ // We only filter frames that lie within a distance of half
+ // the GF interval from the ARF frame. We also have to trap
+ // cases where the filter extends beyond the end of clip.
+ // Note: this_frame->frame has been updated in the loop
+ // so it now points at the ARF frame.
+ half_gf_int = cpi->baseline_gf_interval >> 1;
+ frames_after_arf = (int)(cpi->twopass.total_stats->count -
+ this_frame->frame - 1);
+
+ switch (cpi->oxcf.arnr_type) {
+ case 1: // Backward filter
+ frames_fwd = 0;
+ if (frames_bwd > half_gf_int)
+ frames_bwd = half_gf_int;
+ break;
+
+ case 2: // Forward filter
+ if (frames_fwd > half_gf_int)
+ frames_fwd = half_gf_int;
+ if (frames_fwd > frames_after_arf)
+ frames_fwd = frames_after_arf;
+ frames_bwd = 0;
+ break;
+
+ case 3: // Centered filter
+ default:
+ frames_fwd >>= 1;
+ if (frames_fwd > frames_after_arf)
+ frames_fwd = frames_after_arf;
+ if (frames_fwd > half_gf_int)
+ frames_fwd = half_gf_int;
+
+ frames_bwd = frames_fwd;
+
+ // For even length filter there is one more frame backward
+ // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff.
+ if (frames_bwd < half_gf_int)
+ frames_bwd += (cpi->oxcf.arnr_max_frames + 1) & 0x1;
+ break;
+ }
+
+ cpi->active_arnr_frames = frames_bwd + 1 + frames_fwd;
+}
+
+// Analyse and define a gf/arf group .
+static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
+ FIRSTPASS_STATS next_frame;
+ FIRSTPASS_STATS *start_pos;
+ int i;
+ double boost_score = 0.0;
+ double old_boost_score = 0.0;
+ double gf_group_err = 0.0;
+ double gf_first_frame_err = 0.0;
+ double mod_frame_err = 0.0;
+
+ double mv_ratio_accumulator = 0.0;
+ double decay_accumulator = 1.0;
+ double zero_motion_accumulator = 1.0;
+
+ double loop_decay_rate = 1.00; // Starting decay rate
+ double last_loop_decay_rate = 1.00;
+
+ double this_frame_mv_in_out = 0.0;
+ double mv_in_out_accumulator = 0.0;
+ double abs_mv_in_out_accumulator = 0.0;
+
+ int max_bits = frame_max_bits(cpi); // Max for a single frame
+
+ unsigned int allow_alt_ref =
+ cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames;
+
+ int f_boost = 0;
+ int b_boost = 0;
+ BOOL flash_detected;
+
+ cpi->twopass.gf_group_bits = 0;
+
+ vp9_clear_system_state(); // __asm emms;
+
+ start_pos = cpi->twopass.stats_in;
+
+ vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean
+
+ // Load stats for the current frame.
+ mod_frame_err = calculate_modified_err(cpi, this_frame);
+
+ // Note the error of the frame at the start of the group (this will be
+ // the GF frame error if we code a normal gf
+ gf_first_frame_err = mod_frame_err;
+
+ // Special treatment if the current frame is a key frame (which is also
+ // a gf). If it is then its error score (and hence bit allocation) need
+ // to be subtracted out from the calculation for the GF group
+ if (cpi->common.frame_type == KEY_FRAME)
+ gf_group_err -= gf_first_frame_err;
+
+ // Scan forward to try and work out how many frames the next gf group
+ // should contain and what level of boost is appropriate for the GF
+ // or ARF that will be coded with the group
+ i = 0;
+
+ while (((i < cpi->twopass.static_scene_max_gf_interval) ||
+ ((cpi->twopass.frames_to_key - i) < MIN_GF_INTERVAL)) &&
+ (i < cpi->twopass.frames_to_key)) {
+ i++; // Increment the loop counter
+
+ // Accumulate error score of frames in this gf group
+ mod_frame_err = calculate_modified_err(cpi, this_frame);
+ gf_group_err += mod_frame_err;
+
+ if (EOF == input_stats(cpi, &next_frame))
+ break;
+
+ // Test for the case where there is a brief flash but the prediction
+ // quality back to an earlier frame is then restored.
+ flash_detected = detect_flash(cpi, 0);
+
+ // Update the motion related elements to the boost calculation
+ accumulate_frame_motion_stats(cpi, &next_frame,
+ &this_frame_mv_in_out, &mv_in_out_accumulator,
+ &abs_mv_in_out_accumulator, &mv_ratio_accumulator);
+
+ // Cumulative effect of prediction quality decay
+ if (!flash_detected) {
+ last_loop_decay_rate = loop_decay_rate;
+ loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
+ decay_accumulator = decay_accumulator * loop_decay_rate;
+
+ // Monitor for static sections.
+ if ((next_frame.pcnt_inter - next_frame.pcnt_motion) <
+ zero_motion_accumulator) {
+ zero_motion_accumulator =
+ (next_frame.pcnt_inter - next_frame.pcnt_motion);
+ }
+
+ // Break clause to detect very still sections after motion
+ // (for example a staic image after a fade or other transition).
+ if (detect_transition_to_still(cpi, i, 5, loop_decay_rate,
+ last_loop_decay_rate)) {
+ allow_alt_ref = FALSE;
+ break;
+ }
+ }
+
+ // Calculate a boost number for this frame
+ boost_score +=
+ (decay_accumulator *
+ calc_frame_boost(cpi, &next_frame, this_frame_mv_in_out));
+
+ // Break out conditions.
+ if (
+ // Break at cpi->max_gf_interval unless almost totally static
+ (i >= cpi->max_gf_interval && (zero_motion_accumulator < 0.995)) ||
+ (
+ // Dont break out with a very short interval
+ (i > MIN_GF_INTERVAL) &&
+ // Dont break out very close to a key frame
+ ((cpi->twopass.frames_to_key - i) >= MIN_GF_INTERVAL) &&
+ ((boost_score > 125.0) || (next_frame.pcnt_inter < 0.75)) &&
+ (!flash_detected) &&
+ ((mv_ratio_accumulator > 100.0) ||
+ (abs_mv_in_out_accumulator > 3.0) ||
+ (mv_in_out_accumulator < -2.0) ||
+ ((boost_score - old_boost_score) < IIFACTOR))
+ )) {
+ boost_score = old_boost_score;
+ break;
+ }
+
+ vpx_memcpy(this_frame, &next_frame, sizeof(*this_frame));
+
+ old_boost_score = boost_score;
+ }
+
+ // Dont allow a gf too near the next kf
+ if ((cpi->twopass.frames_to_key - i) < MIN_GF_INTERVAL) {
+ while (i < cpi->twopass.frames_to_key) {
+ i++;
+
+ if (EOF == input_stats(cpi, this_frame))
+ break;
+
+ if (i < cpi->twopass.frames_to_key) {
+ mod_frame_err = calculate_modified_err(cpi, this_frame);
+ gf_group_err += mod_frame_err;
+ }
+ }
+ }
+
+ // Set the interval till the next gf or arf.
+ cpi->baseline_gf_interval = i;
+
+ // Should we use the alternate refernce frame
+ if (allow_alt_ref &&
+ (i < cpi->oxcf.lag_in_frames) &&
+ (i >= MIN_GF_INTERVAL) &&
+ // dont use ARF very near next kf
+ (i <= (cpi->twopass.frames_to_key - MIN_GF_INTERVAL)) &&
+ ((next_frame.pcnt_inter > 0.75) ||
+ (next_frame.pcnt_second_ref > 0.5)) &&
+ ((mv_in_out_accumulator / (double)i > -0.2) ||
+ (mv_in_out_accumulator > -2.0)) &&
+ (boost_score > 100)) {
+ // Alterrnative boost calculation for alt ref
+ cpi->gfu_boost = calc_arf_boost(cpi, 0, (i - 1), (i - 1), &f_boost, &b_boost);
+ cpi->source_alt_ref_pending = TRUE;
+
+ configure_arnr_filter(cpi, this_frame);
+ } else {
+ cpi->gfu_boost = (int)boost_score;
+ cpi->source_alt_ref_pending = FALSE;
+ }
+
+ // Now decide how many bits should be allocated to the GF group as a
+ // proportion of those remaining in the kf group.
+ // The final key frame group in the clip is treated as a special case
+ // where cpi->twopass.kf_group_bits is tied to cpi->twopass.bits_left.
+ // This is also important for short clips where there may only be one
+ // key frame.
+ if (cpi->twopass.frames_to_key >= (int)(cpi->twopass.total_stats->count -
+ cpi->common.current_video_frame)) {
+ cpi->twopass.kf_group_bits =
+ (cpi->twopass.bits_left > 0) ? cpi->twopass.bits_left : 0;
+ }
+
+ // Calculate the bits to be allocated to the group as a whole
+ if ((cpi->twopass.kf_group_bits > 0) &&
+ (cpi->twopass.kf_group_error_left > 0)) {
+ cpi->twopass.gf_group_bits =
+ (int)((double)cpi->twopass.kf_group_bits *
+ (gf_group_err / cpi->twopass.kf_group_error_left));
+ } else
+ cpi->twopass.gf_group_bits = 0;
+
+ cpi->twopass.gf_group_bits =
+ (cpi->twopass.gf_group_bits < 0)
+ ? 0
+ : (cpi->twopass.gf_group_bits > cpi->twopass.kf_group_bits)
+ ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits;
+
+ // Clip cpi->twopass.gf_group_bits based on user supplied data rate
+ // variability limit (cpi->oxcf.two_pass_vbrmax_section)
+ if (cpi->twopass.gf_group_bits > max_bits * cpi->baseline_gf_interval)
+ cpi->twopass.gf_group_bits = max_bits * cpi->baseline_gf_interval;
+
+ // Reset the file position
+ reset_fpf_position(cpi, start_pos);
+
+ // Update the record of error used so far (only done once per gf group)
+ cpi->twopass.modified_error_used += gf_group_err;
+
+ // Assign bits to the arf or gf.
+ for (i = 0; i <= (cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME); i++) {
+ int boost;
+ int allocation_chunks;
+ int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q;
+ int gf_bits;
+
+ boost = (cpi->gfu_boost * vp9_gfboost_qadjust(Q)) / 100;
+
+ // Set max and minimum boost and hence minimum allocation
+ if (boost > ((cpi->baseline_gf_interval + 1) * 200))
+ boost = ((cpi->baseline_gf_interval + 1) * 200);
+ else if (boost < 125)
+ boost = 125;
+
+ if (cpi->source_alt_ref_pending && i == 0)
+ allocation_chunks =
+ ((cpi->baseline_gf_interval + 1) * 100) + boost;
+ else
+ allocation_chunks =
+ (cpi->baseline_gf_interval * 100) + (boost - 100);
+
+ // Prevent overflow
+ if (boost > 1028) {
+ int divisor = boost >> 10;
+ boost /= divisor;
+ allocation_chunks /= divisor;
+ }
+
+ // Calculate the number of bits to be spent on the gf or arf based on
+ // the boost number
+ gf_bits = (int)((double)boost *
+ (cpi->twopass.gf_group_bits /
+ (double)allocation_chunks));
+
+ // If the frame that is to be boosted is simpler than the average for
+ // the gf/arf group then use an alternative calculation
+ // based on the error score of the frame itself
+ if (mod_frame_err < gf_group_err / (double)cpi->baseline_gf_interval) {
+ double alt_gf_grp_bits;
+ int alt_gf_bits;
+
+ alt_gf_grp_bits =
+ (double)cpi->twopass.kf_group_bits *
+ (mod_frame_err * (double)cpi->baseline_gf_interval) /
+ DOUBLE_DIVIDE_CHECK(cpi->twopass.kf_group_error_left);
+
+ alt_gf_bits = (int)((double)boost * (alt_gf_grp_bits /
+ (double)allocation_chunks));
+
+ if (gf_bits > alt_gf_bits) {
+ gf_bits = alt_gf_bits;
+ }
+ }
+ // Else if it is harder than other frames in the group make sure it at
+ // least receives an allocation in keeping with its relative error
+ // score, otherwise it may be worse off than an "un-boosted" frame
+ else {
+ int alt_gf_bits =
+ (int)((double)cpi->twopass.kf_group_bits *
+ mod_frame_err /
+ DOUBLE_DIVIDE_CHECK(cpi->twopass.kf_group_error_left));
+
+ if (alt_gf_bits > gf_bits) {
+ gf_bits = alt_gf_bits;
+ }
+ }
+
+ // Dont allow a negative value for gf_bits
+ if (gf_bits < 0)
+ gf_bits = 0;
+
+ gf_bits += cpi->min_frame_bandwidth; // Add in minimum for a frame
+
+ if (i == 0) {
+ cpi->twopass.gf_bits = gf_bits;
+ }
+ if (i == 1 || (!cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME))) {
+ cpi->per_frame_bandwidth = gf_bits; // Per frame bit target for this frame
+ }
+ }
+
+ {
+ // Adjust KF group bits and error remainin
+ cpi->twopass.kf_group_error_left -= (int64_t)gf_group_err;
+ cpi->twopass.kf_group_bits -= cpi->twopass.gf_group_bits;
+
+ if (cpi->twopass.kf_group_bits < 0)
+ cpi->twopass.kf_group_bits = 0;
+
+ // Note the error score left in the remaining frames of the group.
+ // For normal GFs we want to remove the error score for the first frame
+ // of the group (except in Key frame case where this has already
+ // happened)
+ if (!cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME)
+ cpi->twopass.gf_group_error_left = (int64_t)(gf_group_err
+ - gf_first_frame_err);
+ else
+ cpi->twopass.gf_group_error_left = (int64_t)gf_group_err;
+
+ cpi->twopass.gf_group_bits -= cpi->twopass.gf_bits - cpi->min_frame_bandwidth;
+
+ if (cpi->twopass.gf_group_bits < 0)
+ cpi->twopass.gf_group_bits = 0;
+
+ // This condition could fail if there are two kfs very close together
+ // despite (MIN_GF_INTERVAL) and would cause a devide by 0 in the
+ // calculation of cpi->twopass.alt_extra_bits.
+ if (cpi->baseline_gf_interval >= 3) {
+ int boost = (cpi->source_alt_ref_pending)
+ ? b_boost : cpi->gfu_boost;
+
+ if (boost >= 150) {
+ int pct_extra;
+
+ pct_extra = (boost - 100) / 50;
+ pct_extra = (pct_extra > 20) ? 20 : pct_extra;
+
+ cpi->twopass.alt_extra_bits = (int)
+ ((cpi->twopass.gf_group_bits * pct_extra) / 100);
+ cpi->twopass.gf_group_bits -= cpi->twopass.alt_extra_bits;
+ cpi->twopass.alt_extra_bits /=
+ ((cpi->baseline_gf_interval - 1) >> 1);
+ } else
+ cpi->twopass.alt_extra_bits = 0;
+ } else
+ cpi->twopass.alt_extra_bits = 0;
+ }
+
+ if (cpi->common.frame_type != KEY_FRAME) {
+ FIRSTPASS_STATS sectionstats;
+
+ zero_stats(§ionstats);
+ reset_fpf_position(cpi, start_pos);
+
+ for (i = 0; i < cpi->baseline_gf_interval; i++) {
+ input_stats(cpi, &next_frame);
+ accumulate_stats(§ionstats, &next_frame);
+ }
+
+ avg_stats(§ionstats);
+
+ cpi->twopass.section_intra_rating = (int)
+ (sectionstats.intra_error /
+ DOUBLE_DIVIDE_CHECK(sectionstats.coded_error));
+
+ reset_fpf_position(cpi, start_pos);
+ }
+}
+
+// Allocate bits to a normal frame that is neither a gf an arf or a key frame.
+static void assign_std_frame_bits(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
+ int target_frame_size; // gf_group_error_left
+
+ double modified_err;
+ double err_fraction; // What portion of the remaining GF group error is used by this frame
+
+ int max_bits = frame_max_bits(cpi); // Max for a single frame
+
+ // Calculate modified prediction error used in bit allocation
+ modified_err = calculate_modified_err(cpi, this_frame);
+
+ if (cpi->twopass.gf_group_error_left > 0)
+ err_fraction = modified_err / cpi->twopass.gf_group_error_left; // What portion of the remaining GF group error is used by this frame
+ else
+ err_fraction = 0.0;
+
+ target_frame_size = (int)((double)cpi->twopass.gf_group_bits * err_fraction); // How many of those bits available for allocation should we give it?
+
+ // Clip to target size to 0 - max_bits (or cpi->twopass.gf_group_bits) at the top end.
+ if (target_frame_size < 0)
+ target_frame_size = 0;
+ else {
+ if (target_frame_size > max_bits)
+ target_frame_size = max_bits;
+
+ if (target_frame_size > cpi->twopass.gf_group_bits)
+ target_frame_size = (int)cpi->twopass.gf_group_bits;
+ }
+
+ // Adjust error remaining
+ cpi->twopass.gf_group_error_left -= (int64_t)modified_err;
+ cpi->twopass.gf_group_bits -= target_frame_size; // Adjust bits remaining
+
+ if (cpi->twopass.gf_group_bits < 0)
+ cpi->twopass.gf_group_bits = 0;
+
+ target_frame_size += cpi->min_frame_bandwidth; // Add in the minimum number of bits that is set aside for every frame.
+
+
+ cpi->per_frame_bandwidth = target_frame_size; // Per frame bit target for this frame
+}
+
+// Make a damped adjustment to the active max q.
+static int adjust_active_maxq(int old_maxqi, int new_maxqi) {
+ int i;
+ int ret_val = new_maxqi;
+ double old_q;
+ double new_q;
+ double target_q;
+
+ old_q = vp9_convert_qindex_to_q(old_maxqi);
+ new_q = vp9_convert_qindex_to_q(new_maxqi);
+
+ target_q = ((old_q * 7.0) + new_q) / 8.0;
+
+ if (target_q > old_q) {
+ for (i = old_maxqi; i <= new_maxqi; i++) {
+ if (vp9_convert_qindex_to_q(i) >= target_q) {
+ ret_val = i;
+ break;
+ }
+ }
+ } else {
+ for (i = old_maxqi; i >= new_maxqi; i--) {
+ if (vp9_convert_qindex_to_q(i) <= target_q) {
+ ret_val = i;
+ break;
+ }
+ }
+ }
+
+ return ret_val;
+}
+
+void vp9_second_pass(VP9_COMP *cpi) {
+ int tmp_q;
+ int frames_left = (int)(cpi->twopass.total_stats->count - cpi->common.current_video_frame);
+
+ FIRSTPASS_STATS this_frame;
+ FIRSTPASS_STATS this_frame_copy;
+
+ double this_frame_intra_error;
+ double this_frame_coded_error;
+
+ int overhead_bits;
+
+ if (!cpi->twopass.stats_in) {
+ return;
+ }
+
+ vp9_clear_system_state();
+
+ vpx_memset(&this_frame, 0, sizeof(FIRSTPASS_STATS));
+
+ if (EOF == input_stats(cpi, &this_frame))
+ return;
+
+ this_frame_intra_error = this_frame.intra_error;
+ this_frame_coded_error = this_frame.coded_error;
+
+ // keyframe and section processing !
+ if (cpi->twopass.frames_to_key == 0) {
+ // Define next KF group and assign bits to it
+ vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
+ find_next_key_frame(cpi, &this_frame_copy);
+ }
+
+ // Is this a GF / ARF (Note that a KF is always also a GF)
+ if (cpi->frames_till_gf_update_due == 0) {
+ // Define next gf group and assign bits to it
+ vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
+ define_gf_group(cpi, &this_frame_copy);
+
+ // If we are going to code an altref frame at the end of the group and the current frame is not a key frame....
+ // If the previous group used an arf this frame has already benefited from that arf boost and it should not be given extra bits
+ // If the previous group was NOT coded using arf we may want to apply some boost to this GF as well
+ if (cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME)) {
+ // Assign a standard frames worth of bits from those allocated to the GF group
+ int bak = cpi->per_frame_bandwidth;
+ vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
+ assign_std_frame_bits(cpi, &this_frame_copy);
+ cpi->per_frame_bandwidth = bak;
+ }
+ }
+
+ // Otherwise this is an ordinary frame
+ else {
+ // Assign bits from those allocated to the GF group
+ vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
+ assign_std_frame_bits(cpi, &this_frame_copy);
+ }
+
+ // Keep a globally available copy of this and the next frame's iiratio.
+ cpi->twopass.this_iiratio = (int)(this_frame_intra_error /
+ DOUBLE_DIVIDE_CHECK(this_frame_coded_error));
+ {
+ FIRSTPASS_STATS next_frame;
+ if (lookup_next_frame_stats(cpi, &next_frame) != EOF) {
+ cpi->twopass.next_iiratio = (int)(next_frame.intra_error /
+ DOUBLE_DIVIDE_CHECK(next_frame.coded_error));
+ }
+ }
+
+ // Set nominal per second bandwidth for this frame
+ cpi->target_bandwidth = (int)(cpi->per_frame_bandwidth
+ * cpi->output_frame_rate);
+ if (cpi->target_bandwidth < 0)
+ cpi->target_bandwidth = 0;
+
+
+ // Account for mv, mode and other overheads.
+ overhead_bits = (int)estimate_modemvcost(
+ cpi, cpi->twopass.total_left_stats);
+
+ // Special case code for first frame.
+ if (cpi->common.current_video_frame == 0) {
+ cpi->twopass.est_max_qcorrection_factor = 1.0;
+
+ // Set a cq_level in constrained quality mode.
+ if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) {
+ int est_cq;
+
+ est_cq =
+ estimate_cq(cpi,
+ cpi->twopass.total_left_stats,
+ (int)(cpi->twopass.bits_left / frames_left),
+ overhead_bits);
+
+ cpi->cq_target_quality = cpi->oxcf.cq_level;
+ if (est_cq > cpi->cq_target_quality)
+ cpi->cq_target_quality = est_cq;
+ }
+
+ // guess at maxq needed in 2nd pass
+ cpi->twopass.maxq_max_limit = cpi->worst_quality;
+ cpi->twopass.maxq_min_limit = cpi->best_quality;
+
+ tmp_q = estimate_max_q(
+ cpi,
+ cpi->twopass.total_left_stats,
+ (int)(cpi->twopass.bits_left / frames_left),
+ overhead_bits);
+
+ cpi->active_worst_quality = tmp_q;
+ cpi->ni_av_qi = tmp_q;
+ cpi->avg_q = vp9_convert_qindex_to_q(tmp_q);
+
+ // Limit the maxq value returned subsequently.
+ // This increases the risk of overspend or underspend if the initial
+ // estimate for the clip is bad, but helps prevent excessive
+ // variation in Q, especially near the end of a clip
+ // where for example a small overspend may cause Q to crash
+ adjust_maxq_qrange(cpi);
+ }
+
+ // The last few frames of a clip almost always have to few or too many
+ // bits and for the sake of over exact rate control we dont want to make
+ // radical adjustments to the allowed quantizer range just to use up a
+ // few surplus bits or get beneath the target rate.
+ else if ((cpi->common.current_video_frame <
+ (((unsigned int)cpi->twopass.total_stats->count * 255) >> 8)) &&
+ ((cpi->common.current_video_frame + cpi->baseline_gf_interval) <
+ (unsigned int)cpi->twopass.total_stats->count)) {
+ if (frames_left < 1)
+ frames_left = 1;
+
+ tmp_q = estimate_max_q(
+ cpi,
+ cpi->twopass.total_left_stats,
+ (int)(cpi->twopass.bits_left / frames_left),
+ overhead_bits);
+
+ // Make a damped adjustment to active max Q
+ cpi->active_worst_quality =
+ adjust_active_maxq(cpi->active_worst_quality, tmp_q);
+ }
+
+ cpi->twopass.frames_to_key--;
+
+ // Update the total stats remaining sturcture
+ subtract_stats(cpi->twopass.total_left_stats, &this_frame);
+}
+
+
+static BOOL test_candidate_kf(VP9_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTPASS_STATS *this_frame, FIRSTPASS_STATS *next_frame) {
+ BOOL is_viable_kf = FALSE;
+
+ // Does the frame satisfy the primary criteria of a key frame
+ // If so, then examine how well it predicts subsequent frames
+ if ((this_frame->pcnt_second_ref < 0.10) &&
+ (next_frame->pcnt_second_ref < 0.10) &&
+ ((this_frame->pcnt_inter < 0.05) ||
+ (
+ ((this_frame->pcnt_inter - this_frame->pcnt_neutral) < .35) &&
+ ((this_frame->intra_error / DOUBLE_DIVIDE_CHECK(this_frame->coded_error)) < 2.5) &&
+ ((fabs(last_frame->coded_error - this_frame->coded_error) / DOUBLE_DIVIDE_CHECK(this_frame->coded_error) > .40) ||
+ (fabs(last_frame->intra_error - this_frame->intra_error) / DOUBLE_DIVIDE_CHECK(this_frame->intra_error) > .40) ||
+ ((next_frame->intra_error / DOUBLE_DIVIDE_CHECK(next_frame->coded_error)) > 3.5)
+ )
+ )
+ )
+ ) {
+ int i;
+ FIRSTPASS_STATS *start_pos;
+
+ FIRSTPASS_STATS local_next_frame;
+
+ double boost_score = 0.0;
+ double old_boost_score = 0.0;
+ double decay_accumulator = 1.0;
+ double next_iiratio;
+
+ vpx_memcpy(&local_next_frame, next_frame, sizeof(*next_frame));
+
+ // Note the starting file position so we can reset to it
+ start_pos = cpi->twopass.stats_in;
+
+ // Examine how well the key frame predicts subsequent frames
+ for (i = 0; i < 16; i++) {
+ next_iiratio = (IIKFACTOR1 * local_next_frame.intra_error / DOUBLE_DIVIDE_CHECK(local_next_frame.coded_error));
+
+ if (next_iiratio > RMAX)
+ next_iiratio = RMAX;
+
+ // Cumulative effect of decay in prediction quality
+ if (local_next_frame.pcnt_inter > 0.85)
+ decay_accumulator = decay_accumulator * local_next_frame.pcnt_inter;
+ else
+ decay_accumulator = decay_accumulator * ((0.85 + local_next_frame.pcnt_inter) / 2.0);
+
+ // decay_accumulator = decay_accumulator * local_next_frame.pcnt_inter;
+
+ // Keep a running total
+ boost_score += (decay_accumulator * next_iiratio);
+
+ // Test various breakout clauses
+ if ((local_next_frame.pcnt_inter < 0.05) ||
+ (next_iiratio < 1.5) ||
+ (((local_next_frame.pcnt_inter -
+ local_next_frame.pcnt_neutral) < 0.20) &&
+ (next_iiratio < 3.0)) ||
+ ((boost_score - old_boost_score) < 3.0) ||
+ (local_next_frame.intra_error < 200)
+ ) {
+ break;
+ }
+
+ old_boost_score = boost_score;
+
+ // Get the next frame details
+ if (EOF == input_stats(cpi, &local_next_frame))
+ break;
+ }
+
+ // If there is tolerable prediction for at least the next 3 frames then break out else discard this pottential key frame and move on
+ if (boost_score > 30.0 && (i > 3))
+ is_viable_kf = TRUE;
+ else {
+ // Reset the file position
+ reset_fpf_position(cpi, start_pos);
+
+ is_viable_kf = FALSE;
+ }
+ }
+
+ return is_viable_kf;
+}
+static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
+ int i, j;
+ FIRSTPASS_STATS last_frame;
+ FIRSTPASS_STATS first_frame;
+ FIRSTPASS_STATS next_frame;
+ FIRSTPASS_STATS *start_position;
+
+ double decay_accumulator = 1.0;
+ double zero_motion_accumulator = 1.0;
+ double boost_score = 0;
+ double old_boost_score = 0.0;
+ double loop_decay_rate;
+
+ double kf_mod_err = 0.0;
+ double kf_group_err = 0.0;
+ double kf_group_intra_err = 0.0;
+ double kf_group_coded_err = 0.0;
+ double recent_loop_decay[8] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
+
+ vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean
+
+ vp9_clear_system_state(); // __asm emms;
+ start_position = cpi->twopass.stats_in;
+
+ cpi->common.frame_type = KEY_FRAME;
+
+ // is this a forced key frame by interval
+ cpi->this_key_frame_forced = cpi->next_key_frame_forced;
+
+ // Clear the alt ref active flag as this can never be active on a key frame
+ cpi->source_alt_ref_active = FALSE;
+
+ // Kf is always a gf so clear frames till next gf counter
+ cpi->frames_till_gf_update_due = 0;
+
+ cpi->twopass.frames_to_key = 1;
+
+ // Take a copy of the initial frame details
+ vpx_memcpy(&first_frame, this_frame, sizeof(*this_frame));
+
+ cpi->twopass.kf_group_bits = 0; // Total bits avaialable to kf group
+ cpi->twopass.kf_group_error_left = 0; // Group modified error score.
+
+ kf_mod_err = calculate_modified_err(cpi, this_frame);
+
+ // find the next keyframe
+ i = 0;
+ while (cpi->twopass.stats_in < cpi->twopass.stats_in_end) {
+ // Accumulate kf group error
+ kf_group_err += calculate_modified_err(cpi, this_frame);
+
+ // These figures keep intra and coded error counts for all frames including key frames in the group.
+ // The effect of the key frame itself can be subtracted out using the first_frame data collected above
+ kf_group_intra_err += this_frame->intra_error;
+ kf_group_coded_err += this_frame->coded_error;
+
+ // load a the next frame's stats
+ vpx_memcpy(&last_frame, this_frame, sizeof(*this_frame));
+ input_stats(cpi, this_frame);
+
+ // Provided that we are not at the end of the file...
+ if (cpi->oxcf.auto_key
+ && lookup_next_frame_stats(cpi, &next_frame) != EOF) {
+ // Normal scene cut check
+ if (test_candidate_kf(cpi, &last_frame, this_frame, &next_frame)) {
+ break;
+ }
+
+ // How fast is prediction quality decaying
+ loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
+
+ // We want to know something about the recent past... rather than
+ // as used elsewhere where we are concened with decay in prediction
+ // quality since the last GF or KF.
+ recent_loop_decay[i % 8] = loop_decay_rate;
+ decay_accumulator = 1.0;
+ for (j = 0; j < 8; j++) {
+ decay_accumulator = decay_accumulator * recent_loop_decay[j];
+ }
+
+ // Special check for transition or high motion followed by a
+ // to a static scene.
+ if (detect_transition_to_still(cpi, i,
+ (cpi->key_frame_frequency - i),
+ loop_decay_rate,
+ decay_accumulator)) {
+ break;
+ }
+
+
+ // Step on to the next frame
+ cpi->twopass.frames_to_key++;
+
+ // If we don't have a real key frame within the next two
+ // forcekeyframeevery intervals then break out of the loop.
+ if (cpi->twopass.frames_to_key >= 2 * (int)cpi->key_frame_frequency)
+ break;
+ } else
+ cpi->twopass.frames_to_key++;
+
+ i++;
+ }
+
+ // If there is a max kf interval set by the user we must obey it.
+ // We already breakout of the loop above at 2x max.
+ // This code centers the extra kf if the actual natural
+ // interval is between 1x and 2x
+ if (cpi->oxcf.auto_key
+ && cpi->twopass.frames_to_key > (int)cpi->key_frame_frequency) {
+ FIRSTPASS_STATS *current_pos = cpi->twopass.stats_in;
+ FIRSTPASS_STATS tmp_frame;
+
+ cpi->twopass.frames_to_key /= 2;
+
+ // Copy first frame details
+ vpx_memcpy(&tmp_frame, &first_frame, sizeof(first_frame));
+
+ // Reset to the start of the group
+ reset_fpf_position(cpi, start_position);
+
+ kf_group_err = 0;
+ kf_group_intra_err = 0;
+ kf_group_coded_err = 0;
+
+ // Rescan to get the correct error data for the forced kf group
+ for (i = 0; i < cpi->twopass.frames_to_key; i++) {
+ // Accumulate kf group errors
+ kf_group_err += calculate_modified_err(cpi, &tmp_frame);
+ kf_group_intra_err += tmp_frame.intra_error;
+ kf_group_coded_err += tmp_frame.coded_error;
+
+ // Load a the next frame's stats
+ input_stats(cpi, &tmp_frame);
+ }
+
+ // Reset to the start of the group
+ reset_fpf_position(cpi, current_pos);
+
+ cpi->next_key_frame_forced = TRUE;
+ } else
+ cpi->next_key_frame_forced = FALSE;
+
+ // Special case for the last frame of the file
+ if (cpi->twopass.stats_in >= cpi->twopass.stats_in_end) {
+ // Accumulate kf group error
+ kf_group_err += calculate_modified_err(cpi, this_frame);
+
+ // These figures keep intra and coded error counts for all frames including key frames in the group.
+ // The effect of the key frame itself can be subtracted out using the first_frame data collected above
+ kf_group_intra_err += this_frame->intra_error;
+ kf_group_coded_err += this_frame->coded_error;
+ }
+
+ // Calculate the number of bits that should be assigned to the kf group.
+ if ((cpi->twopass.bits_left > 0) && (cpi->twopass.modified_error_left > 0.0)) {
+ // Max for a single normal frame (not key frame)
+ int max_bits = frame_max_bits(cpi);
+
+ // Maximum bits for the kf group
+ int64_t max_grp_bits;
+
+ // Default allocation based on bits left and relative
+ // complexity of the section
+ cpi->twopass.kf_group_bits = (int64_t)(cpi->twopass.bits_left *
+ (kf_group_err /
+ cpi->twopass.modified_error_left));
+
+ // Clip based on maximum per frame rate defined by the user.
+ max_grp_bits = (int64_t)max_bits * (int64_t)cpi->twopass.frames_to_key;
+ if (cpi->twopass.kf_group_bits > max_grp_bits)
+ cpi->twopass.kf_group_bits = max_grp_bits;
+ } else
+ cpi->twopass.kf_group_bits = 0;
+
+ // Reset the first pass file position
+ reset_fpf_position(cpi, start_position);
+
+ // determine how big to make this keyframe based on how well the subsequent frames use inter blocks
+ decay_accumulator = 1.0;
+ boost_score = 0.0;
+ loop_decay_rate = 1.00; // Starting decay rate
+
+ for (i = 0; i < cpi->twopass.frames_to_key; i++) {
+ double r;
+
+ if (EOF == input_stats(cpi, &next_frame))
+ break;
+
+ if (next_frame.intra_error > cpi->twopass.kf_intra_err_min)
+ r = (IIKFACTOR2 * next_frame.intra_error /
+ DOUBLE_DIVIDE_CHECK(next_frame.coded_error));
+ else
+ r = (IIKFACTOR2 * cpi->twopass.kf_intra_err_min /
+ DOUBLE_DIVIDE_CHECK(next_frame.coded_error));
+
+ if (r > RMAX)
+ r = RMAX;
+
+ // Monitor for static sections.
+ if ((next_frame.pcnt_inter - next_frame.pcnt_motion) <
+ zero_motion_accumulator) {
+ zero_motion_accumulator =
+ (next_frame.pcnt_inter - next_frame.pcnt_motion);
+ }
+
+ // How fast is prediction quality decaying
+ if (!detect_flash(cpi, 0)) {
+ loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
+ decay_accumulator = decay_accumulator * loop_decay_rate;
+ decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
+ ? MIN_DECAY_FACTOR : decay_accumulator;
+ }
+
+ boost_score += (decay_accumulator * r);
+
+ if ((i > MIN_GF_INTERVAL) &&
+ ((boost_score - old_boost_score) < 6.25)) {
+ break;
+ }
+
+ old_boost_score = boost_score;
+ }
+
+ {
+ FIRSTPASS_STATS sectionstats;
+
+ zero_stats(§ionstats);
+ reset_fpf_position(cpi, start_position);
+
+ for (i = 0; i < cpi->twopass.frames_to_key; i++) {
+ input_stats(cpi, &next_frame);
+ accumulate_stats(§ionstats, &next_frame);
+ }
+
+ avg_stats(§ionstats);
+
+ cpi->twopass.section_intra_rating = (int)
+ (sectionstats.intra_error
+ / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error));
+ }
+
+ // Reset the first pass file position
+ reset_fpf_position(cpi, start_position);
+
+ // Work out how many bits to allocate for the key frame itself
+ if (1) {
+ int kf_boost = (int)boost_score;
+ int allocation_chunks;
+ int alt_kf_bits;
+
+ if (kf_boost < (cpi->twopass.frames_to_key * 5))
+ kf_boost = (cpi->twopass.frames_to_key * 5);
+
+ if (kf_boost < 300) // Min KF boost
+ kf_boost = 300;
+
+ // Make a note of baseline boost and the zero motion
+ // accumulator value for use elsewhere.
+ cpi->kf_boost = kf_boost;
+ cpi->kf_zeromotion_pct = (int)(zero_motion_accumulator * 100.0);
+
+ // We do three calculations for kf size.
+ // The first is based on the error score for the whole kf group.
+ // The second (optionaly) on the key frames own error if this is
+ // smaller than the average for the group.
+ // The final one insures that the frame receives at least the
+ // allocation it would have received based on its own error score vs
+ // the error score remaining
+ // Special case if the sequence appears almost totaly static
+ // In this case we want to spend almost all of the bits on the
+ // key frame.
+ // cpi->twopass.frames_to_key-1 because key frame itself is taken
+ // care of by kf_boost.
+ if (zero_motion_accumulator >= 0.99) {
+ allocation_chunks =
+ ((cpi->twopass.frames_to_key - 1) * 10) + kf_boost;
+ } else {
+ allocation_chunks =
+ ((cpi->twopass.frames_to_key - 1) * 100) + kf_boost;
+ }
+
+ // Prevent overflow
+ if (kf_boost > 1028) {
+ int divisor = kf_boost >> 10;
+ kf_boost /= divisor;
+ allocation_chunks /= divisor;
+ }
+
+ cpi->twopass.kf_group_bits = (cpi->twopass.kf_group_bits < 0) ? 0 : cpi->twopass.kf_group_bits;
+
+ // Calculate the number of bits to be spent on the key frame
+ cpi->twopass.kf_bits = (int)((double)kf_boost * ((double)cpi->twopass.kf_group_bits / (double)allocation_chunks));
+
+ // If the key frame is actually easier than the average for the
+ // kf group (which does sometimes happen... eg a blank intro frame)
+ // Then use an alternate calculation based on the kf error score
+ // which should give a smaller key frame.
+ if (kf_mod_err < kf_group_err / cpi->twopass.frames_to_key) {
+ double alt_kf_grp_bits =
+ ((double)cpi->twopass.bits_left *
+ (kf_mod_err * (double)cpi->twopass.frames_to_key) /
+ DOUBLE_DIVIDE_CHECK(cpi->twopass.modified_error_left));
+
+ alt_kf_bits = (int)((double)kf_boost *
+ (alt_kf_grp_bits / (double)allocation_chunks));
+
+ if (cpi->twopass.kf_bits > alt_kf_bits) {
+ cpi->twopass.kf_bits = alt_kf_bits;
+ }
+ }
+ // Else if it is much harder than other frames in the group make sure
+ // it at least receives an allocation in keeping with its relative
+ // error score
+ else {
+ alt_kf_bits =
+ (int)((double)cpi->twopass.bits_left *
+ (kf_mod_err /
+ DOUBLE_DIVIDE_CHECK(cpi->twopass.modified_error_left)));
+
+ if (alt_kf_bits > cpi->twopass.kf_bits) {
+ cpi->twopass.kf_bits = alt_kf_bits;
+ }
+ }
+
+ cpi->twopass.kf_group_bits -= cpi->twopass.kf_bits;
+ // Add in the minimum frame allowance
+ cpi->twopass.kf_bits += cpi->min_frame_bandwidth;
+
+ // Peer frame bit target for this frame
+ cpi->per_frame_bandwidth = cpi->twopass.kf_bits;
+ // Convert to a per second bitrate
+ cpi->target_bandwidth = (int)(cpi->twopass.kf_bits *
+ cpi->output_frame_rate);
+ }
+
+ // Note the total error score of the kf group minus the key frame itself
+ cpi->twopass.kf_group_error_left = (int)(kf_group_err - kf_mod_err);
+
+ // Adjust the count of total modified error left.
+ // The count of bits left is adjusted elsewhere based on real coded frame sizes
+ cpi->twopass.modified_error_left -= kf_group_err;
+}
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
new file mode 100644
index 0000000..52a48f2
--- /dev/null
+++ b/vp9/encoder/vp9_firstpass.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#if !defined __INC_FIRSTPASS_H
+#define VP9_ENCODER_VP9_FIRSTPASS_H_
+
+extern void vp9_init_first_pass(VP9_COMP *cpi);
+extern void vp9_first_pass(VP9_COMP *cpi);
+extern void vp9_end_first_pass(VP9_COMP *cpi);
+
+extern void vp9_init_second_pass(VP9_COMP *cpi);
+extern void vp9_second_pass(VP9_COMP *cpi);
+extern void vp9_end_second_pass(VP9_COMP *cpi);
+
+#endif
diff --git a/vp9/encoder/vp9_lookahead.c b/vp9/encoder/vp9_lookahead.c
new file mode 100644
index 0000000..a51c786
--- /dev/null
+++ b/vp9/encoder/vp9_lookahead.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include <assert.h>
+#include <stdlib.h>
+#include "vpx_config.h"
+#include "vp9/encoder/vp9_lookahead.h"
+#include "vp9/common/vp9_extend.h"
+
+#define MAX_LAG_BUFFERS 25
+
+struct lookahead_ctx {
+ unsigned int max_sz; /* Absolute size of the queue */
+ unsigned int sz; /* Number of buffers currently in the queue */
+ unsigned int read_idx; /* Read index */
+ unsigned int write_idx; /* Write index */
+ struct lookahead_entry *buf; /* Buffer list */
+};
+
+
+/* Return the buffer at the given absolute index and increment the index */
+static struct lookahead_entry *
+pop(struct lookahead_ctx *ctx,
+ unsigned int *idx) {
+ unsigned int index = *idx;
+ struct lookahead_entry *buf = ctx->buf + index;
+
+ assert(index < ctx->max_sz);
+ if (++index >= ctx->max_sz)
+ index -= ctx->max_sz;
+ *idx = index;
+ return buf;
+}
+
+
+void
+vp9_lookahead_destroy(struct lookahead_ctx *ctx) {
+ if (ctx) {
+ if (ctx->buf) {
+ unsigned int i;
+
+ for (i = 0; i < ctx->max_sz; i++)
+ vp8_yv12_de_alloc_frame_buffer(&ctx->buf[i].img);
+ free(ctx->buf);
+ }
+ free(ctx);
+ }
+}
+
+
+struct lookahead_ctx *
+vp9_lookahead_init(unsigned int width,
+ unsigned int height,
+ unsigned int depth) {
+ struct lookahead_ctx *ctx = NULL;
+
+ /* Clamp the lookahead queue depth */
+ if (depth < 1)
+ depth = 1;
+ else if (depth > MAX_LAG_BUFFERS)
+ depth = MAX_LAG_BUFFERS;
+
+ /* Align the buffer dimensions */
+ width = (width + 15) &~15;
+ height = (height + 15) &~15;
+
+ /* Allocate the lookahead structures */
+ ctx = calloc(1, sizeof(*ctx));
+ if (ctx) {
+ unsigned int i;
+ ctx->max_sz = depth;
+ ctx->buf = calloc(depth, sizeof(*ctx->buf));
+ if (!ctx->buf)
+ goto bail;
+ for (i = 0; i < depth; i++)
+ if (vp8_yv12_alloc_frame_buffer(&ctx->buf[i].img,
+ width, height, VP9BORDERINPIXELS))
+ goto bail;
+ }
+ return ctx;
+bail:
+ vp9_lookahead_destroy(ctx);
+ return NULL;
+}
+
+
+int
+vp9_lookahead_push(struct lookahead_ctx *ctx,
+ YV12_BUFFER_CONFIG *src,
+ int64_t ts_start,
+ int64_t ts_end,
+ unsigned int flags,
+ unsigned char *active_map) {
+ struct lookahead_entry *buf;
+ int row, col, active_end;
+ int mb_rows = (src->y_height + 15) >> 4;
+ int mb_cols = (src->y_width + 15) >> 4;
+
+ if (ctx->sz + 1 > ctx->max_sz)
+ return 1;
+ ctx->sz++;
+ buf = pop(ctx, &ctx->write_idx);
+
+ // Only do this partial copy if the following conditions are all met:
+ // 1. Lookahead queue has has size of 1.
+ // 2. Active map is provided.
+ // 3. This is not a key frame, golden nor altref frame.
+ if (ctx->max_sz == 1 && active_map && !flags) {
+ for (row = 0; row < mb_rows; ++row) {
+ col = 0;
+
+ while (1) {
+ // Find the first active macroblock in this row.
+ for (; col < mb_cols; ++col) {
+ if (active_map[col])
+ break;
+ }
+
+ // No more active macroblock in this row.
+ if (col == mb_cols)
+ break;
+
+ // Find the end of active region in this row.
+ active_end = col;
+
+ for (; active_end < mb_cols; ++active_end) {
+ if (!active_map[active_end])
+ break;
+ }
+
+ // Only copy this active region.
+ vp9_copy_and_extend_frame_with_rect(src, &buf->img,
+ row << 4,
+ col << 4, 16,
+ (active_end - col) << 4);
+
+ // Start again from the end of this active region.
+ col = active_end;
+ }
+
+ active_map += mb_cols;
+ }
+ } else {
+ vp9_copy_and_extend_frame(src, &buf->img);
+ }
+ buf->ts_start = ts_start;
+ buf->ts_end = ts_end;
+ buf->flags = flags;
+ return 0;
+}
+
+
+struct lookahead_entry *
+vp9_lookahead_pop(struct lookahead_ctx *ctx,
+ int drain) {
+ struct lookahead_entry *buf = NULL;
+
+ if (ctx->sz && (drain || ctx->sz == ctx->max_sz)) {
+ buf = pop(ctx, &ctx->read_idx);
+ ctx->sz--;
+ }
+ return buf;
+}
+
+
+struct lookahead_entry *
+vp9_lookahead_peek(struct lookahead_ctx *ctx,
+ int index) {
+ struct lookahead_entry *buf = NULL;
+
+ assert(index < ctx->max_sz);
+ if (index < (int)ctx->sz) {
+ index += ctx->read_idx;
+ if (index >= (int)ctx->max_sz)
+ index -= ctx->max_sz;
+ buf = ctx->buf + index;
+ }
+ return buf;
+}
+
+
+unsigned int
+vp9_lookahead_depth(struct lookahead_ctx *ctx) {
+ return ctx->sz;
+}
diff --git a/vp9/encoder/vp9_lookahead.h b/vp9/encoder/vp9_lookahead.h
new file mode 100644
index 0000000..da2910c
--- /dev/null
+++ b/vp9/encoder/vp9_lookahead.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef VP9_ENCODER_VP9_LOOKAHEAD_H_
+#define VP9_ENCODER_VP9_LOOKAHEAD_H_
+#include "vpx_scale/yv12config.h"
+#include "vpx/vpx_integer.h"
+
+struct lookahead_entry {
+ YV12_BUFFER_CONFIG img;
+ int64_t ts_start;
+ int64_t ts_end;
+ unsigned int flags;
+};
+
+
+struct lookahead_ctx;
+
+/**\brief Initializes the lookahead stage
+ *
+ * The lookahead stage is a queue of frame buffers on which some analysis
+ * may be done when buffers are enqueued.
+ *
+ *
+ */
+struct lookahead_ctx *vp9_lookahead_init(unsigned int width,
+ unsigned int height,
+ unsigned int depth
+ );
+
+
+/**\brief Destroys the lookahead stage
+ *
+ */
+void vp9_lookahead_destroy(struct lookahead_ctx *ctx);
+
+
+/**\brief Enqueue a source buffer
+ *
+ * This function will copy the source image into a new framebuffer with
+ * the expected stride/border.
+ *
+ * If active_map is non-NULL and there is only one frame in the queue, then copy
+ * only active macroblocks.
+ *
+ * \param[in] ctx Pointer to the lookahead context
+ * \param[in] src Pointer to the image to enqueue
+ * \param[in] ts_start Timestamp for the start of this frame
+ * \param[in] ts_end Timestamp for the end of this frame
+ * \param[in] flags Flags set on this frame
+ * \param[in] active_map Map that specifies which macroblock is active
+ */
+int
+vp9_lookahead_push(struct lookahead_ctx *ctx,
+ YV12_BUFFER_CONFIG *src,
+ int64_t ts_start,
+ int64_t ts_end,
+ unsigned int flags,
+ unsigned char *active_map);
+
+
+/**\brief Get the next source buffer to encode
+ *
+ *
+ * \param[in] ctx Pointer to the lookahead context
+ * \param[in] drain Flag indicating the buffer should be drained
+ * (return a buffer regardless of the current queue depth)
+ *
+ * \retval NULL, if drain set and queue is empty
+ * \retval NULL, if drain not set and queue not of the configured depth
+ *
+ */
+struct lookahead_entry *
+vp9_lookahead_pop(struct lookahead_ctx *ctx,
+ int drain);
+
+
+/**\brief Get a future source buffer to encode
+ *
+ * \param[in] ctx Pointer to the lookahead context
+ * \param[in] index Index of the frame to be returned, 0 == next frame
+ *
+ * \retval NULL, if no buffer exists at the specified index
+ *
+ */
+struct lookahead_entry *
+vp9_lookahead_peek(struct lookahead_ctx *ctx,
+ int index);
+
+
+/**\brief Get the number of frames currently in the lookahead queue
+ *
+ * \param[in] ctx Pointer to the lookahead context
+ */
+unsigned int
+vp9_lookahead_depth(struct lookahead_ctx *ctx);
+
+
+#endif
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
new file mode 100644
index 0000000..c319e07
--- /dev/null
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -0,0 +1,477 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <limits.h>
+#include <vp9/encoder/vp9_encodeintra.h>
+#include <vp9/encoder/vp9_rdopt.h>
+#include <vp9/common/vp9_setupintrarecon.h>
+#include <vp9/common/vp9_blockd.h>
+#include <vp9/common/vp9_reconinter.h>
+#include <vp9/common/vp9_systemdependent.h>
+#include <vpx_mem/vpx_mem.h>
+#include <vp9/encoder/vp9_segmentation.h>
+
+static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
+ int_mv *ref_mv,
+ int_mv *dst_mv) {
+ MACROBLOCK *const x = &cpi->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ BLOCK *b = &x->block[0];
+ BLOCKD *d = &xd->block[0];
+ vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16];
+ unsigned int best_err;
+ int step_param;
+
+ int tmp_col_min = x->mv_col_min;
+ int tmp_col_max = x->mv_col_max;
+ int tmp_row_min = x->mv_row_min;
+ int tmp_row_max = x->mv_row_max;
+ int_mv ref_full;
+
+ // Further step/diamond searches as necessary
+ if (cpi->Speed < 8) {
+ step_param = cpi->sf.first_step + ((cpi->Speed > 5) ? 1 : 0);
+ } else {
+ step_param = cpi->sf.first_step + 2;
+ }
+
+ vp9_clamp_mv_min_max(x, ref_mv);
+
+ ref_full.as_mv.col = ref_mv->as_mv.col >> 3;
+ ref_full.as_mv.row = ref_mv->as_mv.row >> 3;
+
+ /*cpi->sf.search_method == HEX*/
+ best_err = vp9_hex_search(
+ x, b, d,
+ &ref_full, dst_mv,
+ step_param,
+ x->errorperbit,
+ &v_fn_ptr,
+ NULL, NULL,
+ NULL, NULL,
+ ref_mv);
+
+ // Try sub-pixel MC
+ // if (bestsme > error_thresh && bestsme < INT_MAX)
+ {
+ int distortion;
+ unsigned int sse;
+ best_err = cpi->find_fractional_mv_step(
+ x, b, d,
+ dst_mv, ref_mv,
+ x->errorperbit, &v_fn_ptr,
+ NULL, NULL,
+ & distortion, &sse);
+ }
+
+#if CONFIG_PRED_FILTER
+ // Disable the prediction filter
+ xd->mode_info_context->mbmi.pred_filter_enabled = 0;
+#endif
+
+ vp9_set_mbmode_and_mvs(x, NEWMV, dst_mv);
+ vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0);
+ best_err = vp9_sad16x16(xd->dst.y_buffer, xd->dst.y_stride,
+ xd->predictor, 16, INT_MAX);
+
+ /* restore UMV window */
+ x->mv_col_min = tmp_col_min;
+ x->mv_col_max = tmp_col_max;
+ x->mv_row_min = tmp_row_min;
+ x->mv_row_max = tmp_row_max;
+
+ return best_err;
+}
+
+static int do_16x16_motion_search
+(
+ VP9_COMP *cpi,
+ int_mv *ref_mv,
+ int_mv *dst_mv,
+ YV12_BUFFER_CONFIG *buf,
+ int buf_mb_y_offset,
+ YV12_BUFFER_CONFIG *ref,
+ int mb_y_offset
+) {
+ MACROBLOCK *const x = &cpi->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ unsigned int err, tmp_err;
+ int_mv tmp_mv;
+ int n;
+
+ for (n = 0; n < 16; n++) {
+ BLOCKD *d = &xd->block[n];
+ BLOCK *b = &x->block[n];
+
+ b->base_src = &buf->y_buffer;
+ b->src_stride = buf->y_stride;
+ b->src = buf->y_stride * (n & 12) + (n & 3) * 4 + buf_mb_y_offset;
+
+ d->base_pre = &ref->y_buffer;
+ d->pre_stride = ref->y_stride;
+ d->pre = ref->y_stride * (n & 12) + (n & 3) * 4 + mb_y_offset;
+ }
+
+ // Try zero MV first
+ // FIXME should really use something like near/nearest MV and/or MV prediction
+ xd->pre.y_buffer = ref->y_buffer + mb_y_offset;
+ xd->pre.y_stride = ref->y_stride;
+ err = vp9_sad16x16(ref->y_buffer + mb_y_offset, ref->y_stride,
+ xd->dst.y_buffer, xd->dst.y_stride, INT_MAX);
+ dst_mv->as_int = 0;
+
+ // Test last reference frame using the previous best mv as the
+ // starting point (best reference) for the search
+ tmp_err = do_16x16_motion_iteration(cpi, ref_mv, &tmp_mv);
+ if (tmp_err < err) {
+ err = tmp_err;
+ dst_mv->as_int = tmp_mv.as_int;
+ }
+
+ // If the current best reference mv is not centred on 0,0 then do a 0,0 based search as well
+ if (ref_mv->as_int) {
+ unsigned int tmp_err;
+ int_mv zero_ref_mv, tmp_mv;
+
+ zero_ref_mv.as_int = 0;
+ tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, &tmp_mv);
+ if (tmp_err < err) {
+ dst_mv->as_int = tmp_mv.as_int;
+ err = tmp_err;
+ }
+ }
+
+ return err;
+}
+
+static int do_16x16_zerozero_search
+(
+ VP9_COMP *cpi,
+ int_mv *dst_mv,
+ YV12_BUFFER_CONFIG *buf,
+ int buf_mb_y_offset,
+ YV12_BUFFER_CONFIG *ref,
+ int mb_y_offset
+) {
+ MACROBLOCK *const x = &cpi->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ unsigned int err;
+ int n;
+
+ for (n = 0; n < 16; n++) {
+ BLOCKD *d = &xd->block[n];
+ BLOCK *b = &x->block[n];
+
+ b->base_src = &buf->y_buffer;
+ b->src_stride = buf->y_stride;
+ b->src = buf->y_stride * (n & 12) + (n & 3) * 4 + buf_mb_y_offset;
+
+ d->base_pre = &ref->y_buffer;
+ d->pre_stride = ref->y_stride;
+ d->pre = ref->y_stride * (n & 12) + (n & 3) * 4 + mb_y_offset;
+ }
+
+ // Try zero MV first
+ // FIXME should really use something like near/nearest MV and/or MV prediction
+ xd->pre.y_buffer = ref->y_buffer + mb_y_offset;
+ xd->pre.y_stride = ref->y_stride;
+ err = vp9_sad16x16(ref->y_buffer + mb_y_offset, ref->y_stride,
+ xd->dst.y_buffer, xd->dst.y_stride, INT_MAX);
+
+ dst_mv->as_int = 0;
+
+ return err;
+}
+static int find_best_16x16_intra
+(
+ VP9_COMP *cpi,
+ YV12_BUFFER_CONFIG *buf,
+ int mb_y_offset,
+ MB_PREDICTION_MODE *pbest_mode
+) {
+ MACROBLOCK *const x = &cpi->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_PREDICTION_MODE best_mode = -1, mode;
+ unsigned int best_err = INT_MAX;
+
+ // calculate SATD for each intra prediction mode;
+ // we're intentionally not doing 4x4, we just want a rough estimate
+ for (mode = DC_PRED; mode <= TM_PRED; mode++) {
+ unsigned int err;
+
+ xd->mode_info_context->mbmi.mode = mode;
+ vp9_build_intra_predictors_mby(xd);
+ err = vp9_sad16x16(xd->predictor, 16, buf->y_buffer + mb_y_offset,
+ buf->y_stride, best_err);
+ // find best
+ if (err < best_err) {
+ best_err = err;
+ best_mode = mode;
+ }
+ }
+
+ if (pbest_mode)
+ *pbest_mode = best_mode;
+
+ return best_err;
+}
+
+static void update_mbgraph_mb_stats
+(
+ VP9_COMP *cpi,
+ MBGRAPH_MB_STATS *stats,
+ YV12_BUFFER_CONFIG *buf,
+ int mb_y_offset,
+ YV12_BUFFER_CONFIG *golden_ref,
+ int_mv *prev_golden_ref_mv,
+ int gld_y_offset,
+ YV12_BUFFER_CONFIG *alt_ref,
+ int_mv *prev_alt_ref_mv,
+ int arf_y_offset
+) {
+ MACROBLOCK *const x = &cpi->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int intra_error;
+
+ // FIXME in practice we're completely ignoring chroma here
+ xd->dst.y_buffer = buf->y_buffer + mb_y_offset;
+
+ // do intra 16x16 prediction
+ intra_error = find_best_16x16_intra(cpi, buf, mb_y_offset, &stats->ref[INTRA_FRAME].m.mode);
+ if (intra_error <= 0)
+ intra_error = 1;
+ stats->ref[INTRA_FRAME].err = intra_error;
+
+ // Golden frame MV search, if it exists and is different than last frame
+ if (golden_ref) {
+ int g_motion_error = do_16x16_motion_search(cpi, prev_golden_ref_mv,
+ &stats->ref[GOLDEN_FRAME].m.mv,
+ buf, mb_y_offset,
+ golden_ref, gld_y_offset);
+ stats->ref[GOLDEN_FRAME].err = g_motion_error;
+ } else {
+ stats->ref[GOLDEN_FRAME].err = INT_MAX;
+ stats->ref[GOLDEN_FRAME].m.mv.as_int = 0;
+ }
+
+ // Alt-ref frame MV search, if it exists and is different than last/golden frame
+ if (alt_ref) {
+ // int a_motion_error = do_16x16_motion_search(cpi, prev_alt_ref_mv,
+ // &stats->ref[ALTREF_FRAME].m.mv,
+ // buf, mb_y_offset,
+ // alt_ref, arf_y_offset);
+
+ int a_motion_error =
+ do_16x16_zerozero_search(cpi,
+ &stats->ref[ALTREF_FRAME].m.mv,
+ buf, mb_y_offset,
+ alt_ref, arf_y_offset);
+
+ stats->ref[ALTREF_FRAME].err = a_motion_error;
+ } else {
+ stats->ref[ALTREF_FRAME].err = INT_MAX;
+ stats->ref[ALTREF_FRAME].m.mv.as_int = 0;
+ }
+}
+
+static void update_mbgraph_frame_stats
+(
+ VP9_COMP *cpi,
+ MBGRAPH_FRAME_STATS *stats,
+ YV12_BUFFER_CONFIG *buf,
+ YV12_BUFFER_CONFIG *golden_ref,
+ YV12_BUFFER_CONFIG *alt_ref
+) {
+ MACROBLOCK *const x = &cpi->mb;
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int mb_col, mb_row, offset = 0;
+ int mb_y_offset = 0, arf_y_offset = 0, gld_y_offset = 0;
+ int_mv arf_top_mv, gld_top_mv;
+ MODE_INFO mi_local;
+
+ // Set up limit values for motion vectors to prevent them extending outside the UMV borders
+ arf_top_mv.as_int = 0;
+ gld_top_mv.as_int = 0;
+ x->mv_row_min = -(VP9BORDERINPIXELS - 16 - VP9_INTERP_EXTEND);
+ x->mv_row_max = (cm->mb_rows - 1) * 16 + VP9BORDERINPIXELS
+ - 16 - VP9_INTERP_EXTEND;
+ xd->up_available = 0;
+ xd->dst.y_stride = buf->y_stride;
+ xd->pre.y_stride = buf->y_stride;
+ xd->dst.uv_stride = buf->uv_stride;
+ xd->mode_info_context = &mi_local;
+
+ for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
+ int_mv arf_left_mv, gld_left_mv;
+ int mb_y_in_offset = mb_y_offset;
+ int arf_y_in_offset = arf_y_offset;
+ int gld_y_in_offset = gld_y_offset;
+
+ // Set up limit values for motion vectors to prevent them extending outside the UMV borders
+ arf_left_mv.as_int = arf_top_mv.as_int;
+ gld_left_mv.as_int = gld_top_mv.as_int;
+ x->mv_col_min = -(VP9BORDERINPIXELS - 16 - VP9_INTERP_EXTEND);
+ x->mv_col_max = (cm->mb_cols - 1) * 16 + VP9BORDERINPIXELS
+ - 16 - VP9_INTERP_EXTEND;
+ xd->left_available = 0;
+
+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
+ MBGRAPH_MB_STATS *mb_stats = &stats->mb_stats[offset + mb_col];
+
+ update_mbgraph_mb_stats(cpi, mb_stats, buf, mb_y_in_offset,
+ golden_ref, &gld_left_mv, gld_y_in_offset,
+ alt_ref, &arf_left_mv, arf_y_in_offset);
+ arf_left_mv.as_int = mb_stats->ref[ALTREF_FRAME].m.mv.as_int;
+ gld_left_mv.as_int = mb_stats->ref[GOLDEN_FRAME].m.mv.as_int;
+ if (mb_col == 0) {
+ arf_top_mv.as_int = arf_left_mv.as_int;
+ gld_top_mv.as_int = gld_left_mv.as_int;
+ }
+ xd->left_available = 1;
+ mb_y_in_offset += 16;
+ gld_y_in_offset += 16;
+ arf_y_in_offset += 16;
+ x->mv_col_min -= 16;
+ x->mv_col_max -= 16;
+ }
+ xd->up_available = 1;
+ mb_y_offset += buf->y_stride * 16;
+ gld_y_offset += golden_ref->y_stride * 16;
+ if (alt_ref)
+ arf_y_offset += alt_ref->y_stride * 16;
+ x->mv_row_min -= 16;
+ x->mv_row_max -= 16;
+ offset += cm->mb_cols;
+ }
+}
+
+// void separate_arf_mbs_byzz
+static void separate_arf_mbs(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ int mb_col, mb_row, offset, i;
+ int ncnt[4];
+ int n_frames = cpi->mbgraph_n_frames;
+
+ int *arf_not_zz;
+
+ CHECK_MEM_ERROR(arf_not_zz,
+ vpx_calloc(cm->mb_rows * cm->mb_cols * sizeof(*arf_not_zz), 1));
+
+ // We are not interested in results beyond the alt ref itself.
+ if (n_frames > cpi->frames_till_gf_update_due)
+ n_frames = cpi->frames_till_gf_update_due;
+
+ // defer cost to reference frames
+ for (i = n_frames - 1; i >= 0; i--) {
+ MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i];
+
+ for (offset = 0, mb_row = 0; mb_row < cm->mb_rows;
+ offset += cm->mb_cols, mb_row++) {
+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
+ MBGRAPH_MB_STATS *mb_stats =
+ &frame_stats->mb_stats[offset + mb_col];
+
+ int altref_err = mb_stats->ref[ALTREF_FRAME].err;
+ int intra_err = mb_stats->ref[INTRA_FRAME ].err;
+ int golden_err = mb_stats->ref[GOLDEN_FRAME].err;
+
+ // Test for altref vs intra and gf and that its mv was 0,0.
+ if ((altref_err > 1000) ||
+ (altref_err > intra_err) ||
+ (altref_err > golden_err)) {
+ arf_not_zz[offset + mb_col]++;
+ }
+ }
+ }
+ }
+
+ vpx_memset(ncnt, 0, sizeof(ncnt));
+ for (offset = 0, mb_row = 0; mb_row < cm->mb_rows;
+ offset += cm->mb_cols, mb_row++) {
+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
+ // If any of the blocks in the sequence failed then the MB
+ // goes in segment 0
+ if (arf_not_zz[offset + mb_col]) {
+ ncnt[0]++;
+ cpi->segmentation_map[offset + mb_col] = 0;
+ } else {
+ ncnt[1]++;
+ cpi->segmentation_map[offset + mb_col] = 1;
+ }
+ }
+ }
+
+ // Only bother with segmentation if over 10% of the MBs in static segment
+ // if ( ncnt[1] && (ncnt[0] / ncnt[1] < 10) )
+ if (1) {
+ // Note % of blocks that are marked as static
+ if (cm->MBs)
+ cpi->static_mb_pct = (ncnt[1] * 100) / cm->MBs;
+
+ // This error case should not be reachable as this function should
+ // never be called with the common data structure unititialized.
+ else
+ cpi->static_mb_pct = 0;
+
+ cpi->seg0_cnt = ncnt[0];
+ vp9_enable_segmentation((VP9_PTR) cpi);
+ } else {
+ cpi->static_mb_pct = 0;
+ vp9_disable_segmentation((VP9_PTR) cpi);
+ }
+
+ // Free localy allocated storage
+ vpx_free(arf_not_zz);
+}
+
+void vp9_update_mbgraph_stats
+(
+ VP9_COMP *cpi
+) {
+ VP9_COMMON *const cm = &cpi->common;
+ int i, n_frames = vp9_lookahead_depth(cpi->lookahead);
+ YV12_BUFFER_CONFIG *golden_ref = &cm->yv12_fb[cm->gld_fb_idx];
+
+ // we need to look ahead beyond where the ARF transitions into
+ // being a GF - so exit if we don't look ahead beyond that
+ if (n_frames <= cpi->frames_till_gf_update_due)
+ return;
+ if (n_frames > (int)cpi->common.frames_till_alt_ref_frame)
+ n_frames = cpi->common.frames_till_alt_ref_frame;
+ if (n_frames > MAX_LAG_BUFFERS)
+ n_frames = MAX_LAG_BUFFERS;
+
+ cpi->mbgraph_n_frames = n_frames;
+ for (i = 0; i < n_frames; i++) {
+ MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i];
+ vpx_memset(frame_stats->mb_stats, 0,
+ cm->mb_rows * cm->mb_cols * sizeof(*cpi->mbgraph_stats[i].mb_stats));
+ }
+
+ // do motion search to find contribution of each reference to data
+ // later on in this GF group
+ // FIXME really, the GF/last MC search should be done forward, and
+ // the ARF MC search backwards, to get optimal results for MV caching
+ for (i = 0; i < n_frames; i++) {
+ MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i];
+ struct lookahead_entry *q_cur =
+ vp9_lookahead_peek(cpi->lookahead, i);
+
+ assert(q_cur != NULL);
+
+ update_mbgraph_frame_stats(cpi, frame_stats, &q_cur->img,
+ golden_ref, cpi->Source);
+ }
+
+ vp9_clear_system_state(); // __asm emms;
+
+ separate_arf_mbs(cpi);
+}
diff --git a/vp9/encoder/vp9_mbgraph.h b/vp9/encoder/vp9_mbgraph.h
new file mode 100644
index 0000000..01ab18f
--- /dev/null
+++ b/vp9/encoder/vp9_mbgraph.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_ENCODER_VP9_MBGRAPH_H_
+#define VP9_ENCODER_VP9_MBGRAPH_H_
+
+extern void vp9_update_mbgraph_stats(VP9_COMP *cpi);
+
+#endif /* __INC_MBGRAPH_H__ */
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
new file mode 100644
index 0000000..b3e0415
--- /dev/null
+++ b/vp9/encoder/vp9_mcomp.c
@@ -0,0 +1,2216 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_mcomp.h"
+#include "vpx_mem/vpx_mem.h"
+#include "./vpx_config.h"
+#include <stdio.h>
+#include <limits.h>
+#include <math.h>
+#include "vp9/common/vp9_findnearmv.h"
+
+#ifdef ENTROPY_STATS
+static int mv_ref_ct [31] [4] [2];
+static int mv_mode_cts [4] [2];
+#endif
+
+void vp9_clamp_mv_min_max(MACROBLOCK *x, int_mv *ref_mv) {
+ int col_min = (ref_mv->as_mv.col >> 3) - MAX_FULL_PEL_VAL +
+ ((ref_mv->as_mv.col & 7) ? 1 : 0);
+ int row_min = (ref_mv->as_mv.row >> 3) - MAX_FULL_PEL_VAL +
+ ((ref_mv->as_mv.row & 7) ? 1 : 0);
+ int col_max = (ref_mv->as_mv.col >> 3) + MAX_FULL_PEL_VAL;
+ int row_max = (ref_mv->as_mv.row >> 3) + MAX_FULL_PEL_VAL;
+
+ /* Get intersection of UMV window and valid MV window to reduce # of checks in diamond search. */
+ if (x->mv_col_min < col_min)
+ x->mv_col_min = col_min;
+ if (x->mv_col_max > col_max)
+ x->mv_col_max = col_max;
+ if (x->mv_row_min < row_min)
+ x->mv_row_min = row_min;
+ if (x->mv_row_max > row_max)
+ x->mv_row_max = row_max;
+}
+
+int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2],
+ int Weight, int ishp) {
+ MV v;
+ v.row = (mv->as_mv.row - ref->as_mv.row);
+ v.col = (mv->as_mv.col - ref->as_mv.col);
+ return ((mvjcost[vp9_get_mv_joint(v)] +
+ mvcost[0][v.row] + mvcost[1][v.col]) *
+ Weight) >> 7;
+}
+
+static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2],
+ int error_per_bit, int ishp) {
+ if (mvcost) {
+ MV v;
+ v.row = (mv->as_mv.row - ref->as_mv.row);
+ v.col = (mv->as_mv.col - ref->as_mv.col);
+ return ((mvjcost[vp9_get_mv_joint(v)] +
+ mvcost[0][v.row] + mvcost[1][v.col]) *
+ error_per_bit + 128) >> 8;
+ }
+ return 0;
+}
+
+static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvjsadcost,
+ int *mvsadcost[2], int error_per_bit) {
+
+ if (mvsadcost) {
+ MV v;
+ v.row = (mv->as_mv.row - ref->as_mv.row);
+ v.col = (mv->as_mv.col - ref->as_mv.col);
+ return ((mvjsadcost[vp9_get_mv_joint(v)] +
+ mvsadcost[0][v.row] + mvsadcost[1][v.col]) *
+ error_per_bit + 128) >> 8;
+ }
+ return 0;
+}
+
+void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
+ int Len;
+ int search_site_count = 0;
+
+
+ // Generate offsets for 4 search sites per step.
+ Len = MAX_FIRST_STEP;
+ x->ss[search_site_count].mv.col = 0;
+ x->ss[search_site_count].mv.row = 0;
+ x->ss[search_site_count].offset = 0;
+ search_site_count++;
+
+ while (Len > 0) {
+
+ // Compute offsets for search sites.
+ x->ss[search_site_count].mv.col = 0;
+ x->ss[search_site_count].mv.row = -Len;
+ x->ss[search_site_count].offset = -Len * stride;
+ search_site_count++;
+
+ // Compute offsets for search sites.
+ x->ss[search_site_count].mv.col = 0;
+ x->ss[search_site_count].mv.row = Len;
+ x->ss[search_site_count].offset = Len * stride;
+ search_site_count++;
+
+ // Compute offsets for search sites.
+ x->ss[search_site_count].mv.col = -Len;
+ x->ss[search_site_count].mv.row = 0;
+ x->ss[search_site_count].offset = -Len;
+ search_site_count++;
+
+ // Compute offsets for search sites.
+ x->ss[search_site_count].mv.col = Len;
+ x->ss[search_site_count].mv.row = 0;
+ x->ss[search_site_count].offset = Len;
+ search_site_count++;
+
+ // Contract.
+ Len /= 2;
+ }
+
+ x->ss_count = search_site_count;
+ x->searches_per_step = 4;
+}
+
+void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
+ int Len;
+ int search_site_count = 0;
+
+ // Generate offsets for 8 search sites per step.
+ Len = MAX_FIRST_STEP;
+ x->ss[search_site_count].mv.col = 0;
+ x->ss[search_site_count].mv.row = 0;
+ x->ss[search_site_count].offset = 0;
+ search_site_count++;
+
+ while (Len > 0) {
+
+ // Compute offsets for search sites.
+ x->ss[search_site_count].mv.col = 0;
+ x->ss[search_site_count].mv.row = -Len;
+ x->ss[search_site_count].offset = -Len * stride;
+ search_site_count++;
+
+ // Compute offsets for search sites.
+ x->ss[search_site_count].mv.col = 0;
+ x->ss[search_site_count].mv.row = Len;
+ x->ss[search_site_count].offset = Len * stride;
+ search_site_count++;
+
+ // Compute offsets for search sites.
+ x->ss[search_site_count].mv.col = -Len;
+ x->ss[search_site_count].mv.row = 0;
+ x->ss[search_site_count].offset = -Len;
+ search_site_count++;
+
+ // Compute offsets for search sites.
+ x->ss[search_site_count].mv.col = Len;
+ x->ss[search_site_count].mv.row = 0;
+ x->ss[search_site_count].offset = Len;
+ search_site_count++;
+
+ // Compute offsets for search sites.
+ x->ss[search_site_count].mv.col = -Len;
+ x->ss[search_site_count].mv.row = -Len;
+ x->ss[search_site_count].offset = -Len * stride - Len;
+ search_site_count++;
+
+ // Compute offsets for search sites.
+ x->ss[search_site_count].mv.col = Len;
+ x->ss[search_site_count].mv.row = -Len;
+ x->ss[search_site_count].offset = -Len * stride + Len;
+ search_site_count++;
+
+ // Compute offsets for search sites.
+ x->ss[search_site_count].mv.col = -Len;
+ x->ss[search_site_count].mv.row = Len;
+ x->ss[search_site_count].offset = Len * stride - Len;
+ search_site_count++;
+
+ // Compute offsets for search sites.
+ x->ss[search_site_count].mv.col = Len;
+ x->ss[search_site_count].mv.row = Len;
+ x->ss[search_site_count].offset = Len * stride + Len;
+ search_site_count++;
+
+ // Contract.
+ Len /= 2;
+ }
+
+ x->ss_count = search_site_count;
+ x->searches_per_step = 8;
+}
+
+/*
+ * To avoid the penalty for crossing cache-line read, preload the reference
+ * area in a small buffer, which is aligned to make sure there won't be crossing
+ * cache-line read while reading from this buffer. This reduced the cpu
+ * cycles spent on reading ref data in sub-pixel filter functions.
+ * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
+ * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
+ * could reduce the area.
+ */
+
+/* estimated cost of a motion vector (r,c) */
+#define MVC(r, c) \
+ (mvcost ? \
+ ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \
+ mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
+ error_per_bit + 128) >> 8 : 0)
+
+#define SP(x) (((x) & 7) << 1) // convert motion vector component to offset
+ // for svf calc
+
+#define IFMVCV(r, c, s, e) \
+ if (c >= minc && c <= maxc && r >= minr && r <= maxr) \
+ s \
+ else \
+ e;
+
+/* pointer to predictor base of a motionvector */
+#define PRE(r, c) (y + (((r) >> 3) * y_stride + ((c) >> 3) -(offset)))
+
+/* returns subpixel variance error function */
+#define DIST(r, c) \
+ vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse)
+
+/* checks if (r, c) has better score than previous best */
+#define CHECK_BETTER(v, r, c) \
+ IFMVCV(r, c, { \
+ thismse = (DIST(r, c)); \
+ if ((v = MVC(r, c) + thismse) < besterr) { \
+ besterr = v; \
+ br = r; \
+ bc = c; \
+ *distortion = thismse; \
+ *sse1 = sse; \
+ } \
+ }, \
+ v = INT_MAX;)
+
+#define MIN(x,y) (((x)<(y))?(x):(y))
+#define MAX(x,y) (((x)>(y))?(x):(y))
+
+int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
+ int_mv *bestmv, int_mv *ref_mv,
+ int error_per_bit,
+ const vp9_variance_fn_ptr_t *vfp,
+ int *mvjcost, int *mvcost[2],
+ int *distortion,
+ unsigned int *sse1) {
+ unsigned char *z = (*(b->base_src) + b->src);
+ MACROBLOCKD *xd = &x->e_mbd;
+
+ int rr, rc, br, bc, hstep;
+ int tr, tc;
+ unsigned int besterr = INT_MAX;
+ unsigned int left, right, up, down, diag;
+ unsigned int sse;
+ unsigned int whichdir;
+ unsigned int halfiters = 4;
+ unsigned int quarteriters = 4;
+ unsigned int eighthiters = 4;
+ int thismse;
+ int maxc, minc, maxr, minr;
+ int y_stride;
+ int offset;
+ int usehp = xd->allow_high_precision_mv;
+
+#if !CONFIG_SUPERBLOCKS && (ARCH_X86 || ARCH_X86_64)
+ unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
+ unsigned char *y;
+ int buf_r1, buf_r2, buf_c1, buf_c2;
+
+ // Clamping to avoid out-of-range data access
+ buf_r1 = ((bestmv->as_mv.row - VP9_INTERP_EXTEND) < x->mv_row_min) ?
+ (bestmv->as_mv.row - x->mv_row_min) : VP9_INTERP_EXTEND - 1;
+ buf_r2 = ((bestmv->as_mv.row + VP9_INTERP_EXTEND) > x->mv_row_max) ?
+ (x->mv_row_max - bestmv->as_mv.row) : VP9_INTERP_EXTEND - 1;
+ buf_c1 = ((bestmv->as_mv.col - VP9_INTERP_EXTEND) < x->mv_col_min) ?
+ (bestmv->as_mv.col - x->mv_col_min) : VP9_INTERP_EXTEND - 1;
+ buf_c2 = ((bestmv->as_mv.col + VP9_INTERP_EXTEND) > x->mv_col_max) ?
+ (x->mv_col_max - bestmv->as_mv.col) : VP9_INTERP_EXTEND - 1;
+ y_stride = 32;
+
+ /* Copy to intermediate buffer before searching. */
+ vfp->copymem(y0 - buf_c1 - d->pre_stride * buf_r1, d->pre_stride, xd->y_buf, y_stride, 16 + buf_r1 + buf_r2);
+ y = xd->y_buf + y_stride * buf_r1 + buf_c1;
+#else
+ unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
+ y_stride = d->pre_stride;
+#endif
+
+ rr = ref_mv->as_mv.row;
+ rc = ref_mv->as_mv.col;
+ br = bestmv->as_mv.row << 3;
+ bc = bestmv->as_mv.col << 3;
+ hstep = 4;
+ minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << MV_MAX_BITS) - 1));
+ maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << MV_MAX_BITS) - 1));
+ minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << MV_MAX_BITS) - 1));
+ maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << MV_MAX_BITS) - 1));
+
+ tr = br;
+ tc = bc;
+
+
+ offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
+
+ // central mv
+ bestmv->as_mv.row <<= 3;
+ bestmv->as_mv.col <<= 3;
+
+ // calculate central point error
+ besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
+ *distortion = besterr;
+ besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost,
+ error_per_bit, xd->allow_high_precision_mv);
+
+ // TODO: Each subsequent iteration checks at least one point in
+ // common with the last iteration could be 2 ( if diag selected)
+ while (--halfiters) {
+ // 1/2 pel
+ CHECK_BETTER(left, tr, tc - hstep);
+ CHECK_BETTER(right, tr, tc + hstep);
+ CHECK_BETTER(up, tr - hstep, tc);
+ CHECK_BETTER(down, tr + hstep, tc);
+
+ whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
+
+ switch (whichdir) {
+ case 0:
+ CHECK_BETTER(diag, tr - hstep, tc - hstep);
+ break;
+ case 1:
+ CHECK_BETTER(diag, tr - hstep, tc + hstep);
+ break;
+ case 2:
+ CHECK_BETTER(diag, tr + hstep, tc - hstep);
+ break;
+ case 3:
+ CHECK_BETTER(diag, tr + hstep, tc + hstep);
+ break;
+ }
+
+ // no reason to check the same one again.
+ if (tr == br && tc == bc)
+ break;
+
+ tr = br;
+ tc = bc;
+ }
+
+ // TODO: Each subsequent iteration checks at least one point in common with
+ // the last iteration could be 2 ( if diag selected) 1/4 pel
+ hstep >>= 1;
+ while (--quarteriters) {
+ CHECK_BETTER(left, tr, tc - hstep);
+ CHECK_BETTER(right, tr, tc + hstep);
+ CHECK_BETTER(up, tr - hstep, tc);
+ CHECK_BETTER(down, tr + hstep, tc);
+
+ whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
+
+ switch (whichdir) {
+ case 0:
+ CHECK_BETTER(diag, tr - hstep, tc - hstep);
+ break;
+ case 1:
+ CHECK_BETTER(diag, tr - hstep, tc + hstep);
+ break;
+ case 2:
+ CHECK_BETTER(diag, tr + hstep, tc - hstep);
+ break;
+ case 3:
+ CHECK_BETTER(diag, tr + hstep, tc + hstep);
+ break;
+ }
+
+ // no reason to check the same one again.
+ if (tr == br && tc == bc)
+ break;
+
+ tr = br;
+ tc = bc;
+ }
+
+ if (xd->allow_high_precision_mv) {
+ usehp = vp9_use_nmv_hp(&ref_mv->as_mv);
+ } else {
+ usehp = 0;
+ }
+
+ if (usehp) {
+ hstep >>= 1;
+ while (--eighthiters) {
+ CHECK_BETTER(left, tr, tc - hstep);
+ CHECK_BETTER(right, tr, tc + hstep);
+ CHECK_BETTER(up, tr - hstep, tc);
+ CHECK_BETTER(down, tr + hstep, tc);
+
+ whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
+
+ switch (whichdir) {
+ case 0:
+ CHECK_BETTER(diag, tr - hstep, tc - hstep);
+ break;
+ case 1:
+ CHECK_BETTER(diag, tr - hstep, tc + hstep);
+ break;
+ case 2:
+ CHECK_BETTER(diag, tr + hstep, tc - hstep);
+ break;
+ case 3:
+ CHECK_BETTER(diag, tr + hstep, tc + hstep);
+ break;
+ }
+
+ // no reason to check the same one again.
+ if (tr == br && tc == bc)
+ break;
+
+ tr = br;
+ tc = bc;
+ }
+ }
+ bestmv->as_mv.row = br;
+ bestmv->as_mv.col = bc;
+
+ if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
+ (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
+ return INT_MAX;
+
+ return besterr;
+}
+#undef MVC
+#undef PRE
+#undef DIST
+#undef IFMVCV
+#undef CHECK_BETTER
+#undef MIN
+#undef MAX
+
+int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
+ int_mv *bestmv, int_mv *ref_mv,
+ int error_per_bit,
+ const vp9_variance_fn_ptr_t *vfp,
+ int *mvjcost, int *mvcost[2], int *distortion,
+ unsigned int *sse1) {
+ int bestmse = INT_MAX;
+ int_mv startmv;
+ int_mv this_mv;
+ int_mv orig_mv;
+ int yrow_movedback = 0, ycol_movedback = 0;
+ unsigned char *z = (*(b->base_src) + b->src);
+ int left, right, up, down, diag;
+ unsigned int sse;
+ int whichdir;
+ int thismse;
+ int y_stride;
+ MACROBLOCKD *xd = &x->e_mbd;
+ int usehp = xd->allow_high_precision_mv;
+
+#if !CONFIG_SUPERBLOCKS && (ARCH_X86 || ARCH_X86_64)
+ unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
+ unsigned char *y;
+
+ y_stride = 32;
+ /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
+ vfp->copymem(y0 - 1 - d->pre_stride, d->pre_stride, xd->y_buf, y_stride, 18);
+ y = xd->y_buf + y_stride + 1;
+#else
+ unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
+ y_stride = d->pre_stride;
+#endif
+
+ // central mv
+ bestmv->as_mv.row <<= 3;
+ bestmv->as_mv.col <<= 3;
+ startmv = *bestmv;
+ orig_mv = *bestmv;
+
+ // calculate central point error
+ bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
+ *distortion = bestmse;
+ bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit,
+ xd->allow_high_precision_mv);
+
+ // go left then right and check error
+ this_mv.as_mv.row = startmv.as_mv.row;
+ this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
+ thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
+ left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
+ xd->allow_high_precision_mv);
+
+ if (left < bestmse) {
+ *bestmv = this_mv;
+ bestmse = left;
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+
+ this_mv.as_mv.col += 8;
+ thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
+ right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
+ error_per_bit, xd->allow_high_precision_mv);
+
+ if (right < bestmse) {
+ *bestmv = this_mv;
+ bestmse = right;
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+
+ // go up then down and check error
+ this_mv.as_mv.col = startmv.as_mv.col;
+ this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
+ thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
+ up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
+ xd->allow_high_precision_mv);
+
+ if (up < bestmse) {
+ *bestmv = this_mv;
+ bestmse = up;
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+
+ this_mv.as_mv.row += 8;
+ thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
+ down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
+ xd->allow_high_precision_mv);
+
+ if (down < bestmse) {
+ *bestmv = this_mv;
+ bestmse = down;
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+
+
+ // now check 1 more diagonal
+ whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
+ // for(whichdir =0;whichdir<4;whichdir++)
+ // {
+ this_mv = startmv;
+
+ switch (whichdir) {
+ case 0:
+ this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
+ this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
+ thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
+ break;
+ case 1:
+ this_mv.as_mv.col += 4;
+ this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
+ thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
+ break;
+ case 2:
+ this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
+ this_mv.as_mv.row += 4;
+ thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
+ break;
+ case 3:
+ default:
+ this_mv.as_mv.col += 4;
+ this_mv.as_mv.row += 4;
+ thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
+ break;
+ }
+
+ diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
+ xd->allow_high_precision_mv);
+
+ if (diag < bestmse) {
+ *bestmv = this_mv;
+ bestmse = diag;
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+
+// }
+
+
+ // time to check quarter pels.
+ if (bestmv->as_mv.row < startmv.as_mv.row) {
+ y -= y_stride;
+ yrow_movedback = 1;
+ }
+
+ if (bestmv->as_mv.col < startmv.as_mv.col) {
+ y--;
+ ycol_movedback = 1;
+ }
+
+ startmv = *bestmv;
+
+
+
+ // go left then right and check error
+ this_mv.as_mv.row = startmv.as_mv.row;
+
+ if (startmv.as_mv.col & 7) {
+ this_mv.as_mv.col = startmv.as_mv.col - 2;
+ thismse = vfp->svf(y, y_stride,
+ SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
+ z, b->src_stride, &sse);
+ } else {
+ this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
+ thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z,
+ b->src_stride, &sse);
+ }
+
+ left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
+ xd->allow_high_precision_mv);
+
+ if (left < bestmse) {
+ *bestmv = this_mv;
+ bestmse = left;
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+
+ this_mv.as_mv.col += 4;
+ thismse = vfp->svf(y, y_stride,
+ SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
+ z, b->src_stride, &sse);
+ right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
+ error_per_bit, xd->allow_high_precision_mv);
+
+ if (right < bestmse) {
+ *bestmv = this_mv;
+ bestmse = right;
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+
+ // go up then down and check error
+ this_mv.as_mv.col = startmv.as_mv.col;
+
+ if (startmv.as_mv.row & 7) {
+ this_mv.as_mv.row = startmv.as_mv.row - 2;
+ thismse = vfp->svf(y, y_stride,
+ SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
+ z, b->src_stride, &sse);
+ } else {
+ this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
+ thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6),
+ z, b->src_stride, &sse);
+ }
+
+ up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
+ xd->allow_high_precision_mv);
+
+ if (up < bestmse) {
+ *bestmv = this_mv;
+ bestmse = up;
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+
+ this_mv.as_mv.row += 4;
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
+ z, b->src_stride, &sse);
+ down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
+ xd->allow_high_precision_mv);
+
+ if (down < bestmse) {
+ *bestmv = this_mv;
+ bestmse = down;
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+
+
+ // now check 1 more diagonal
+ whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
+
+// for(whichdir=0;whichdir<4;whichdir++)
+// {
+ this_mv = startmv;
+
+ switch (whichdir) {
+ case 0:
+
+ if (startmv.as_mv.row & 7) {
+ this_mv.as_mv.row -= 2;
+
+ if (startmv.as_mv.col & 7) {
+ this_mv.as_mv.col -= 2;
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+ } else {
+ this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
+ thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, b->src_stride, &sse);;
+ }
+ } else {
+ this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
+
+ if (startmv.as_mv.col & 7) {
+ this_mv.as_mv.col -= 2;
+ thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse);
+ } else {
+ this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
+ thismse = vfp->svf(y - y_stride - 1, y_stride, SP(6), SP(6), z, b->src_stride, &sse);
+ }
+ }
+
+ break;
+ case 1:
+ this_mv.as_mv.col += 2;
+
+ if (startmv.as_mv.row & 7) {
+ this_mv.as_mv.row -= 2;
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+ } else {
+ this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
+ thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse);
+ }
+
+ break;
+ case 2:
+ this_mv.as_mv.row += 2;
+
+ if (startmv.as_mv.col & 7) {
+ this_mv.as_mv.col -= 2;
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
+ z, b->src_stride, &sse);
+ } else {
+ this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
+ thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z,
+ b->src_stride, &sse);
+ }
+
+ break;
+ case 3:
+ this_mv.as_mv.col += 2;
+ this_mv.as_mv.row += 2;
+ thismse = vfp->svf(y, y_stride,
+ SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
+ z, b->src_stride, &sse);
+ break;
+ }
+
+ diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
+ xd->allow_high_precision_mv);
+
+ if (diag < bestmse) {
+ *bestmv = this_mv;
+ bestmse = diag;
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+
+ if (x->e_mbd.allow_high_precision_mv) {
+ usehp = vp9_use_nmv_hp(&ref_mv->as_mv);
+ } else {
+ usehp = 0;
+ }
+ if (!usehp)
+ return bestmse;
+
+ /* Now do 1/8th pixel */
+ if (bestmv->as_mv.row < orig_mv.as_mv.row && !yrow_movedback) {
+ y -= y_stride;
+ yrow_movedback = 1;
+ }
+
+ if (bestmv->as_mv.col < orig_mv.as_mv.col && !ycol_movedback) {
+ y--;
+ ycol_movedback = 1;
+ }
+
+ startmv = *bestmv;
+
+ // go left then right and check error
+ this_mv.as_mv.row = startmv.as_mv.row;
+
+ if (startmv.as_mv.col & 7) {
+ this_mv.as_mv.col = startmv.as_mv.col - 1;
+ thismse = vfp->svf(y, y_stride,
+ SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
+ z, b->src_stride, &sse);
+ } else {
+ this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
+ thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row),
+ z, b->src_stride, &sse);
+ }
+
+ left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
+ xd->allow_high_precision_mv);
+
+ if (left < bestmse) {
+ *bestmv = this_mv;
+ bestmse = left;
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+
+ this_mv.as_mv.col += 2;
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
+ z, b->src_stride, &sse);
+ right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
+ error_per_bit, xd->allow_high_precision_mv);
+
+ if (right < bestmse) {
+ *bestmv = this_mv;
+ bestmse = right;
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+
+ // go up then down and check error
+ this_mv.as_mv.col = startmv.as_mv.col;
+
+ if (startmv.as_mv.row & 7) {
+ this_mv.as_mv.row = startmv.as_mv.row - 1;
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+ } else {
+ this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
+ thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse);
+ }
+
+ up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
+ xd->allow_high_precision_mv);
+
+ if (up < bestmse) {
+ *bestmv = this_mv;
+ bestmse = up;
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+
+ this_mv.as_mv.row += 2;
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+ down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
+ xd->allow_high_precision_mv);
+
+ if (down < bestmse) {
+ *bestmv = this_mv;
+ bestmse = down;
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+
+ // now check 1 more diagonal
+ whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
+
+// for(whichdir=0;whichdir<4;whichdir++)
+// {
+ this_mv = startmv;
+
+ switch (whichdir) {
+ case 0:
+
+ if (startmv.as_mv.row & 7) {
+ this_mv.as_mv.row -= 1;
+
+ if (startmv.as_mv.col & 7) {
+ this_mv.as_mv.col -= 1;
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+ } else {
+ this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
+ thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse);;
+ }
+ } else {
+ this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
+
+ if (startmv.as_mv.col & 7) {
+ this_mv.as_mv.col -= 1;
+ thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse);
+ } else {
+ this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
+ thismse = vfp->svf(y - y_stride - 1, y_stride, SP(7), SP(7), z, b->src_stride, &sse);
+ }
+ }
+
+ break;
+ case 1:
+ this_mv.as_mv.col += 1;
+
+ if (startmv.as_mv.row & 7) {
+ this_mv.as_mv.row -= 1;
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+ } else {
+ this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
+ thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse);
+ }
+
+ break;
+ case 2:
+ this_mv.as_mv.row += 1;
+
+ if (startmv.as_mv.col & 7) {
+ this_mv.as_mv.col -= 1;
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+ } else {
+ this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
+ thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+ }
+
+ break;
+ case 3:
+ this_mv.as_mv.col += 1;
+ this_mv.as_mv.row += 1;
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+ break;
+ }
+
+ diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
+ xd->allow_high_precision_mv);
+
+ if (diag < bestmse) {
+ *bestmv = this_mv;
+ bestmse = diag;
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+
+ return bestmse;
+}
+
+#undef SP
+
+int vp9_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
+ int_mv *bestmv, int_mv *ref_mv,
+ int error_per_bit,
+ const vp9_variance_fn_ptr_t *vfp,
+ int *mvjcost, int *mvcost[2],
+ int *distortion,
+ unsigned int *sse1) {
+ int bestmse = INT_MAX;
+ int_mv startmv;
+ int_mv this_mv;
+ unsigned char *z = (*(b->base_src) + b->src);
+ int left, right, up, down, diag;
+ unsigned int sse;
+ int whichdir;
+ int thismse;
+ int y_stride;
+ MACROBLOCKD *xd = &x->e_mbd;
+
+#if !CONFIG_SUPERBLOCKS && (ARCH_X86 || ARCH_X86_64)
+ unsigned char *y0 = *(d->base_pre) + d->pre +
+ (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
+ unsigned char *y;
+
+ y_stride = 32;
+ /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
+ vfp->copymem(y0 - 1 - d->pre_stride, d->pre_stride, xd->y_buf, y_stride, 18);
+ y = xd->y_buf + y_stride + 1;
+#else
+ unsigned char *y = *(d->base_pre) + d->pre +
+ (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
+ y_stride = d->pre_stride;
+#endif
+
+ // central mv
+ bestmv->as_mv.row <<= 3;
+ bestmv->as_mv.col <<= 3;
+ startmv = *bestmv;
+
+ // calculate central point error
+ bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
+ *distortion = bestmse;
+ bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit,
+ xd->allow_high_precision_mv);
+
+ // go left then right and check error
+ this_mv.as_mv.row = startmv.as_mv.row;
+ this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
+ thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
+ left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
+ xd->allow_high_precision_mv);
+
+ if (left < bestmse) {
+ *bestmv = this_mv;
+ bestmse = left;
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+
+ this_mv.as_mv.col += 8;
+ thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
+ right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
+ error_per_bit, xd->allow_high_precision_mv);
+
+ if (right < bestmse) {
+ *bestmv = this_mv;
+ bestmse = right;
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+
+ // go up then down and check error
+ this_mv.as_mv.col = startmv.as_mv.col;
+ this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
+ thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
+ up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
+ xd->allow_high_precision_mv);
+
+ if (up < bestmse) {
+ *bestmv = this_mv;
+ bestmse = up;
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+
+ this_mv.as_mv.row += 8;
+ thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
+ down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
+ xd->allow_high_precision_mv);
+
+ if (down < bestmse) {
+ *bestmv = this_mv;
+ bestmse = down;
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+
+ // now check 1 more diagonal -
+ whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
+ this_mv = startmv;
+
+ switch (whichdir) {
+ case 0:
+ this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
+ this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
+ thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
+ break;
+ case 1:
+ this_mv.as_mv.col += 4;
+ this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
+ thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
+ break;
+ case 2:
+ this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
+ this_mv.as_mv.row += 4;
+ thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
+ break;
+ case 3:
+ default:
+ this_mv.as_mv.col += 4;
+ this_mv.as_mv.row += 4;
+ thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
+ break;
+ }
+
+ diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
+ xd->allow_high_precision_mv);
+
+ if (diag < bestmse) {
+ *bestmv = this_mv;
+ bestmse = diag;
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+
+ return bestmse;
+}
+
+#define CHECK_BOUNDS(range) \
+ {\
+ all_in = 1;\
+ all_in &= ((br-range) >= x->mv_row_min);\
+ all_in &= ((br+range) <= x->mv_row_max);\
+ all_in &= ((bc-range) >= x->mv_col_min);\
+ all_in &= ((bc+range) <= x->mv_col_max);\
+ }
+
+#define CHECK_POINT \
+ {\
+ if (this_mv.as_mv.col < x->mv_col_min) continue;\
+ if (this_mv.as_mv.col > x->mv_col_max) continue;\
+ if (this_mv.as_mv.row < x->mv_row_min) continue;\
+ if (this_mv.as_mv.row > x->mv_row_max) continue;\
+ }
+
+#define CHECK_BETTER \
+ {\
+ if (thissad < bestsad)\
+ {\
+ thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, \
+ sad_per_bit);\
+ if (thissad < bestsad)\
+ {\
+ bestsad = thissad;\
+ best_site = i;\
+ }\
+ }\
+ }
+
+static const MV next_chkpts[6][3] = {
+ {{ -2, 0}, { -1, -2}, {1, -2}},
+ {{ -1, -2}, {1, -2}, {2, 0}},
+ {{1, -2}, {2, 0}, {1, 2}},
+ {{2, 0}, {1, 2}, { -1, 2}},
+ {{1, 2}, { -1, 2}, { -2, 0}},
+ {{ -1, 2}, { -2, 0}, { -1, -2}}
+};
+
+int vp9_hex_search
+(
+ MACROBLOCK *x,
+ BLOCK *b,
+ BLOCKD *d,
+ int_mv *ref_mv,
+ int_mv *best_mv,
+ int search_param,
+ int sad_per_bit,
+ const vp9_variance_fn_ptr_t *vfp,
+ int *mvjsadcost, int *mvsadcost[2],
+ int *mvjcost, int *mvcost[2],
+ int_mv *center_mv
+) {
+ MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} };
+ MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}};
+ int i, j;
+
+ unsigned char *what = (*(b->base_src) + b->src);
+ int what_stride = b->src_stride;
+ int in_what_stride = d->pre_stride;
+ int br, bc;
+ int_mv this_mv;
+ unsigned int bestsad = 0x7fffffff;
+ unsigned int thissad;
+ unsigned char *base_offset;
+ unsigned char *this_offset;
+ int k = -1;
+ int all_in;
+ int best_site = -1;
+
+ int_mv fcenter_mv;
+ fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
+ fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
+
+ // adjust ref_mv to make sure it is within MV range
+ clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
+ br = ref_mv->as_mv.row;
+ bc = ref_mv->as_mv.col;
+
+ // Work out the start point for the search
+ base_offset = (unsigned char *)(*(d->base_pre) + d->pre);
+ this_offset = base_offset + (br * (d->pre_stride)) + bc;
+ this_mv.as_mv.row = br;
+ this_mv.as_mv.col = bc;
+ bestsad = vfp->sdf(what, what_stride, this_offset,
+ in_what_stride, 0x7fffffff)
+ + mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost,
+ sad_per_bit);
+
+ // hex search
+ // j=0
+ CHECK_BOUNDS(2)
+
+ if (all_in) {
+ for (i = 0; i < 6; i++) {
+ this_mv.as_mv.row = br + hex[i].row;
+ this_mv.as_mv.col = bc + hex[i].col;
+ this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
+ CHECK_BETTER
+ }
+ } else {
+ for (i = 0; i < 6; i++) {
+ this_mv.as_mv.row = br + hex[i].row;
+ this_mv.as_mv.col = bc + hex[i].col;
+ CHECK_POINT
+ this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
+ CHECK_BETTER
+ }
+ }
+
+ if (best_site == -1)
+ goto cal_neighbors;
+ else {
+ br += hex[best_site].row;
+ bc += hex[best_site].col;
+ k = best_site;
+ }
+
+ for (j = 1; j < 127; j++) {
+ best_site = -1;
+ CHECK_BOUNDS(2)
+
+ if (all_in) {
+ for (i = 0; i < 3; i++) {
+ this_mv.as_mv.row = br + next_chkpts[k][i].row;
+ this_mv.as_mv.col = bc + next_chkpts[k][i].col;
+ this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
+ CHECK_BETTER
+ }
+ } else {
+ for (i = 0; i < 3; i++) {
+ this_mv.as_mv.row = br + next_chkpts[k][i].row;
+ this_mv.as_mv.col = bc + next_chkpts[k][i].col;
+ CHECK_POINT
+ this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
+ CHECK_BETTER
+ }
+ }
+
+ if (best_site == -1)
+ break;
+ else {
+ br += next_chkpts[k][best_site].row;
+ bc += next_chkpts[k][best_site].col;
+ k += 5 + best_site;
+ if (k >= 12) k -= 12;
+ else if (k >= 6) k -= 6;
+ }
+ }
+
+ // check 4 1-away neighbors
+cal_neighbors:
+ for (j = 0; j < 32; j++) {
+ best_site = -1;
+ CHECK_BOUNDS(1)
+
+ if (all_in) {
+ for (i = 0; i < 4; i++) {
+ this_mv.as_mv.row = br + neighbors[i].row;
+ this_mv.as_mv.col = bc + neighbors[i].col;
+ this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
+ CHECK_BETTER
+ }
+ } else {
+ for (i = 0; i < 4; i++) {
+ this_mv.as_mv.row = br + neighbors[i].row;
+ this_mv.as_mv.col = bc + neighbors[i].col;
+ CHECK_POINT
+ this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
+ thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
+ CHECK_BETTER
+ }
+ }
+
+ if (best_site == -1)
+ break;
+ else {
+ br += neighbors[best_site].row;
+ bc += neighbors[best_site].col;
+ }
+ }
+
+ best_mv->as_mv.row = br;
+ best_mv->as_mv.col = bc;
+
+ return bestsad;
+}
+#undef CHECK_BOUNDS
+#undef CHECK_POINT
+#undef CHECK_BETTER
+
+int vp9_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
+ int_mv *ref_mv, int_mv *best_mv,
+ int search_param, int sad_per_bit, int *num00,
+ vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
+ int *mvcost[2], int_mv *center_mv) {
+ int i, j, step;
+
+ unsigned char *what = (*(b->base_src) + b->src);
+ int what_stride = b->src_stride;
+ unsigned char *in_what;
+ int in_what_stride = d->pre_stride;
+ unsigned char *best_address;
+
+ int tot_steps;
+ int_mv this_mv;
+
+ int bestsad = INT_MAX;
+ int best_site = 0;
+ int last_site = 0;
+
+ int ref_row, ref_col;
+ int this_row_offset, this_col_offset;
+ search_site *ss;
+
+ unsigned char *check_here;
+ int thissad;
+ MACROBLOCKD *xd = &x->e_mbd;
+ int_mv fcenter_mv;
+
+ int *mvjsadcost = x->nmvjointsadcost;
+ int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+
+ fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
+ fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
+
+ clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
+ ref_row = ref_mv->as_mv.row;
+ ref_col = ref_mv->as_mv.col;
+ *num00 = 0;
+ best_mv->as_mv.row = ref_row;
+ best_mv->as_mv.col = ref_col;
+
+ // Work out the start point for the search
+ in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
+ best_address = in_what;
+
+ // Check the starting position
+ bestsad = fn_ptr->sdf(what, what_stride, in_what,
+ in_what_stride, 0x7fffffff)
+ + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
+ sad_per_bit);
+
+ // search_param determines the length of the initial step and hence the number of iterations
+ // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
+ ss = &x->ss[search_param * x->searches_per_step];
+ tot_steps = (x->ss_count / x->searches_per_step) - search_param;
+
+ i = 1;
+
+ for (step = 0; step < tot_steps; step++) {
+ for (j = 0; j < x->searches_per_step; j++) {
+ // Trap illegal vectors
+ this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
+ this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
+
+ if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
+ (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
+
+ {
+ check_here = ss[i].offset + best_address;
+ thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
+
+ if (thissad < bestsad) {
+ this_mv.as_mv.row = this_row_offset;
+ this_mv.as_mv.col = this_col_offset;
+ thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
+
+ if (thissad < bestsad) {
+ bestsad = thissad;
+ best_site = i;
+ }
+ }
+ }
+
+ i++;
+ }
+
+ if (best_site != last_site) {
+ best_mv->as_mv.row += ss[best_site].mv.row;
+ best_mv->as_mv.col += ss[best_site].mv.col;
+ best_address += ss[best_site].offset;
+ last_site = best_site;
+ } else if (best_address == in_what)
+ (*num00)++;
+ }
+
+ this_mv.as_mv.row = best_mv->as_mv.row << 3;
+ this_mv.as_mv.col = best_mv->as_mv.col << 3;
+
+ if (bestsad == INT_MAX)
+ return INT_MAX;
+
+ return
+ fn_ptr->vf(what, what_stride, best_address, in_what_stride,
+ (unsigned int *)(&thissad)) +
+ mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
+ xd->allow_high_precision_mv);
+}
+
+int vp9_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
+ int_mv *ref_mv, int_mv *best_mv, int search_param,
+ int sad_per_bit, int *num00,
+ vp9_variance_fn_ptr_t *fn_ptr,
+ int *mvjcost, int *mvcost[2], int_mv *center_mv) {
+ int i, j, step;
+
+ unsigned char *what = (*(b->base_src) + b->src);
+ int what_stride = b->src_stride;
+ unsigned char *in_what;
+ int in_what_stride = d->pre_stride;
+ unsigned char *best_address;
+
+ int tot_steps;
+ int_mv this_mv;
+
+ unsigned int bestsad = INT_MAX;
+ int best_site = 0;
+ int last_site = 0;
+
+ int ref_row;
+ int ref_col;
+ int this_row_offset;
+ int this_col_offset;
+ search_site *ss;
+
+ unsigned char *check_here;
+ unsigned int thissad;
+ MACROBLOCKD *xd = &x->e_mbd;
+ int_mv fcenter_mv;
+
+ int *mvjsadcost = x->nmvjointsadcost;
+ int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+
+ fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
+ fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
+
+ clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
+ ref_row = ref_mv->as_mv.row;
+ ref_col = ref_mv->as_mv.col;
+ *num00 = 0;
+ best_mv->as_mv.row = ref_row;
+ best_mv->as_mv.col = ref_col;
+
+ // Work out the start point for the search
+ in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
+ best_address = in_what;
+
+ // Check the starting position
+ bestsad = fn_ptr->sdf(what, what_stride,
+ in_what, in_what_stride, 0x7fffffff)
+ + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
+ sad_per_bit);
+
+ // search_param determines the length of the initial step and hence the number of iterations
+ // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
+ ss = &x->ss[search_param * x->searches_per_step];
+ tot_steps = (x->ss_count / x->searches_per_step) - search_param;
+
+ i = 1;
+
+ for (step = 0; step < tot_steps; step++) {
+ int all_in = 1, t;
+
+ // To know if all neighbor points are within the bounds, 4 bounds checking are enough instead of
+ // checking 4 bounds for each points.
+ all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min);
+ all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max);
+ all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min);
+ all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max);
+
+ if (all_in) {
+ unsigned int sad_array[4];
+
+ for (j = 0; j < x->searches_per_step; j += 4) {
+ unsigned char const *block_offset[4];
+
+ for (t = 0; t < 4; t++)
+ block_offset[t] = ss[i + t].offset + best_address;
+
+ fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
+ sad_array);
+
+ for (t = 0; t < 4; t++, i++) {
+ if (sad_array[t] < bestsad) {
+ this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
+ this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
+ sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
+
+ if (sad_array[t] < bestsad) {
+ bestsad = sad_array[t];
+ best_site = i;
+ }
+ }
+ }
+ }
+ } else {
+ for (j = 0; j < x->searches_per_step; j++) {
+ // Trap illegal vectors
+ this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
+ this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
+
+ if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
+ (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) {
+ check_here = ss[i].offset + best_address;
+ thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
+
+ if (thissad < bestsad) {
+ this_mv.as_mv.row = this_row_offset;
+ this_mv.as_mv.col = this_col_offset;
+ thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
+
+ if (thissad < bestsad) {
+ bestsad = thissad;
+ best_site = i;
+ }
+ }
+ }
+ i++;
+ }
+ }
+
+ if (best_site != last_site) {
+ best_mv->as_mv.row += ss[best_site].mv.row;
+ best_mv->as_mv.col += ss[best_site].mv.col;
+ best_address += ss[best_site].offset;
+ last_site = best_site;
+ } else if (best_address == in_what)
+ (*num00)++;
+ }
+
+ this_mv.as_mv.row = best_mv->as_mv.row << 3;
+ this_mv.as_mv.col = best_mv->as_mv.col << 3;
+
+ if (bestsad == INT_MAX)
+ return INT_MAX;
+
+ return
+ fn_ptr->vf(what, what_stride, best_address, in_what_stride,
+ (unsigned int *)(&thissad)) +
+ mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
+ xd->allow_high_precision_mv);
+}
+
+/* do_refine: If last step (1-away) of n-step search doesn't pick the center
+ point as the best match, we will do a final 1-away diamond
+ refining search */
+int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *b,
+ BLOCKD *d, int_mv *mvp_full, int step_param,
+ int sadpb, int further_steps,
+ int do_refine, vp9_variance_fn_ptr_t *fn_ptr,
+ int_mv *ref_mv, int_mv *dst_mv) {
+ int_mv temp_mv;
+ int thissme, n, num00;
+ int bestsme = cpi->diamond_search_sad(x, b, d, mvp_full, &temp_mv,
+ step_param, sadpb, &num00,
+ fn_ptr, x->nmvjointcost,
+ x->mvcost, ref_mv);
+ dst_mv->as_int = temp_mv.as_int;
+
+ n = num00;
+ num00 = 0;
+
+ /* If there won't be more n-step search, check to see if refining search is needed. */
+ if (n > further_steps)
+ do_refine = 0;
+
+ while (n < further_steps) {
+ n++;
+
+ if (num00)
+ num00--;
+ else {
+ thissme = cpi->diamond_search_sad(x, b, d, mvp_full, &temp_mv,
+ step_param + n, sadpb, &num00,
+ fn_ptr, x->nmvjointcost, x->mvcost,
+ ref_mv);
+
+ /* check to see if refining search is needed. */
+ if (num00 > (further_steps - n))
+ do_refine = 0;
+
+ if (thissme < bestsme) {
+ bestsme = thissme;
+ dst_mv->as_int = temp_mv.as_int;
+ }
+ }
+ }
+
+ /* final 1-away diamond refining search */
+ if (do_refine == 1) {
+ int search_range = 8;
+ int_mv best_mv;
+ best_mv.as_int = dst_mv->as_int;
+ thissme = cpi->refining_search_sad(x, b, d, &best_mv, sadpb, search_range,
+ fn_ptr, x->nmvjointcost, x->mvcost,
+ ref_mv);
+
+ if (thissme < bestsme) {
+ bestsme = thissme;
+ dst_mv->as_int = best_mv.as_int;
+ }
+ }
+ return bestsme;
+}
+
+int vp9_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
+ int sad_per_bit, int distance,
+ vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
+ int *mvcost[2],
+ int_mv *center_mv) {
+ unsigned char *what = (*(b->base_src) + b->src);
+ int what_stride = b->src_stride;
+ unsigned char *in_what;
+ int in_what_stride = d->pre_stride;
+ int mv_stride = d->pre_stride;
+ unsigned char *bestaddress;
+ int_mv *best_mv = &d->bmi.as_mv.first;
+ int_mv this_mv;
+ int bestsad = INT_MAX;
+ int r, c;
+
+ unsigned char *check_here;
+ int thissad;
+ MACROBLOCKD *xd = &x->e_mbd;
+
+ int ref_row = ref_mv->as_mv.row;
+ int ref_col = ref_mv->as_mv.col;
+
+ int row_min = ref_row - distance;
+ int row_max = ref_row + distance;
+ int col_min = ref_col - distance;
+ int col_max = ref_col + distance;
+ int_mv fcenter_mv;
+
+ int *mvjsadcost = x->nmvjointsadcost;
+ int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+
+ fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
+ fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
+
+ // Work out the mid point for the search
+ in_what = *(d->base_pre) + d->pre;
+ bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
+
+ best_mv->as_mv.row = ref_row;
+ best_mv->as_mv.col = ref_col;
+
+ // Baseline value at the centre
+ bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
+ in_what_stride, 0x7fffffff)
+ + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
+ sad_per_bit);
+
+ // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
+ if (col_min < x->mv_col_min)
+ col_min = x->mv_col_min;
+
+ if (col_max > x->mv_col_max)
+ col_max = x->mv_col_max;
+
+ if (row_min < x->mv_row_min)
+ row_min = x->mv_row_min;
+
+ if (row_max > x->mv_row_max)
+ row_max = x->mv_row_max;
+
+ for (r = row_min; r < row_max; r++) {
+ this_mv.as_mv.row = r;
+ check_here = r * mv_stride + in_what + col_min;
+
+ for (c = col_min; c < col_max; c++) {
+ thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
+
+ this_mv.as_mv.col = c;
+ thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
+
+ if (thissad < bestsad) {
+ bestsad = thissad;
+ best_mv->as_mv.row = r;
+ best_mv->as_mv.col = c;
+ bestaddress = check_here;
+ }
+
+ check_here++;
+ }
+ }
+
+ this_mv.as_mv.row = best_mv->as_mv.row << 3;
+ this_mv.as_mv.col = best_mv->as_mv.col << 3;
+
+ if (bestsad < INT_MAX)
+ return
+ fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
+ (unsigned int *)(&thissad)) +
+ mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
+ xd->allow_high_precision_mv);
+ else
+ return INT_MAX;
+}
+
+int vp9_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
+ int sad_per_bit, int distance,
+ vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
+ int *mvcost[2], int_mv *center_mv) {
+ unsigned char *what = (*(b->base_src) + b->src);
+ int what_stride = b->src_stride;
+ unsigned char *in_what;
+ int in_what_stride = d->pre_stride;
+ int mv_stride = d->pre_stride;
+ unsigned char *bestaddress;
+ int_mv *best_mv = &d->bmi.as_mv.first;
+ int_mv this_mv;
+ unsigned int bestsad = INT_MAX;
+ int r, c;
+
+ unsigned char *check_here;
+ unsigned int thissad;
+ MACROBLOCKD *xd = &x->e_mbd;
+
+ int ref_row = ref_mv->as_mv.row;
+ int ref_col = ref_mv->as_mv.col;
+
+ int row_min = ref_row - distance;
+ int row_max = ref_row + distance;
+ int col_min = ref_col - distance;
+ int col_max = ref_col + distance;
+
+ unsigned int sad_array[3];
+ int_mv fcenter_mv;
+
+ int *mvjsadcost = x->nmvjointsadcost;
+ int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+
+ fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
+ fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
+
+ // Work out the mid point for the search
+ in_what = *(d->base_pre) + d->pre;
+ bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
+
+ best_mv->as_mv.row = ref_row;
+ best_mv->as_mv.col = ref_col;
+
+ // Baseline value at the centre
+ bestsad = fn_ptr->sdf(what, what_stride,
+ bestaddress, in_what_stride, 0x7fffffff)
+ + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
+ sad_per_bit);
+
+ // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
+ if (col_min < x->mv_col_min)
+ col_min = x->mv_col_min;
+
+ if (col_max > x->mv_col_max)
+ col_max = x->mv_col_max;
+
+ if (row_min < x->mv_row_min)
+ row_min = x->mv_row_min;
+
+ if (row_max > x->mv_row_max)
+ row_max = x->mv_row_max;
+
+ for (r = row_min; r < row_max; r++) {
+ this_mv.as_mv.row = r;
+ check_here = r * mv_stride + in_what + col_min;
+ c = col_min;
+
+ while ((c + 2) < col_max) {
+ int i;
+
+ fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
+
+ for (i = 0; i < 3; i++) {
+ thissad = sad_array[i];
+
+ if (thissad < bestsad) {
+ this_mv.as_mv.col = c;
+ thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
+
+ if (thissad < bestsad) {
+ bestsad = thissad;
+ best_mv->as_mv.row = r;
+ best_mv->as_mv.col = c;
+ bestaddress = check_here;
+ }
+ }
+
+ check_here++;
+ c++;
+ }
+ }
+
+ while (c < col_max) {
+ thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
+
+ if (thissad < bestsad) {
+ this_mv.as_mv.col = c;
+ thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
+
+ if (thissad < bestsad) {
+ bestsad = thissad;
+ best_mv->as_mv.row = r;
+ best_mv->as_mv.col = c;
+ bestaddress = check_here;
+ }
+ }
+
+ check_here++;
+ c++;
+ }
+
+ }
+
+ this_mv.as_mv.row = best_mv->as_mv.row << 3;
+ this_mv.as_mv.col = best_mv->as_mv.col << 3;
+
+ if (bestsad < INT_MAX)
+ return
+ fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
+ (unsigned int *)(&thissad)) +
+ mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
+ xd->allow_high_precision_mv);
+ else
+ return INT_MAX;
+}
+
+int vp9_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
+ int sad_per_bit, int distance,
+ vp9_variance_fn_ptr_t *fn_ptr,
+ int *mvjcost, int *mvcost[2],
+ int_mv *center_mv) {
+ unsigned char *what = (*(b->base_src) + b->src);
+ int what_stride = b->src_stride;
+ unsigned char *in_what;
+ int in_what_stride = d->pre_stride;
+ int mv_stride = d->pre_stride;
+ unsigned char *bestaddress;
+ int_mv *best_mv = &d->bmi.as_mv.first;
+ int_mv this_mv;
+ unsigned int bestsad = INT_MAX;
+ int r, c;
+
+ unsigned char *check_here;
+ unsigned int thissad;
+ MACROBLOCKD *xd = &x->e_mbd;
+
+ int ref_row = ref_mv->as_mv.row;
+ int ref_col = ref_mv->as_mv.col;
+
+ int row_min = ref_row - distance;
+ int row_max = ref_row + distance;
+ int col_min = ref_col - distance;
+ int col_max = ref_col + distance;
+
+ DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
+ unsigned int sad_array[3];
+ int_mv fcenter_mv;
+
+ int *mvjsadcost = x->nmvjointsadcost;
+ int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+
+ fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
+ fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
+
+ // Work out the mid point for the search
+ in_what = *(d->base_pre) + d->pre;
+ bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
+
+ best_mv->as_mv.row = ref_row;
+ best_mv->as_mv.col = ref_col;
+
+ // Baseline value at the centre
+ bestsad = fn_ptr->sdf(what, what_stride,
+ bestaddress, in_what_stride, 0x7fffffff)
+ + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
+ sad_per_bit);
+
+ // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
+ if (col_min < x->mv_col_min)
+ col_min = x->mv_col_min;
+
+ if (col_max > x->mv_col_max)
+ col_max = x->mv_col_max;
+
+ if (row_min < x->mv_row_min)
+ row_min = x->mv_row_min;
+
+ if (row_max > x->mv_row_max)
+ row_max = x->mv_row_max;
+
+ for (r = row_min; r < row_max; r++) {
+ this_mv.as_mv.row = r;
+ check_here = r * mv_stride + in_what + col_min;
+ c = col_min;
+
+ while ((c + 7) < col_max) {
+ int i;
+
+ fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
+
+ for (i = 0; i < 8; i++) {
+ thissad = (unsigned int)sad_array8[i];
+
+ if (thissad < bestsad) {
+ this_mv.as_mv.col = c;
+ thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
+
+ if (thissad < bestsad) {
+ bestsad = thissad;
+ best_mv->as_mv.row = r;
+ best_mv->as_mv.col = c;
+ bestaddress = check_here;
+ }
+ }
+
+ check_here++;
+ c++;
+ }
+ }
+
+ while ((c + 2) < col_max) {
+ int i;
+
+ fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
+
+ for (i = 0; i < 3; i++) {
+ thissad = sad_array[i];
+
+ if (thissad < bestsad) {
+ this_mv.as_mv.col = c;
+ thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
+
+ if (thissad < bestsad) {
+ bestsad = thissad;
+ best_mv->as_mv.row = r;
+ best_mv->as_mv.col = c;
+ bestaddress = check_here;
+ }
+ }
+
+ check_here++;
+ c++;
+ }
+ }
+
+ while (c < col_max) {
+ thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
+
+ if (thissad < bestsad) {
+ this_mv.as_mv.col = c;
+ thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
+
+ if (thissad < bestsad) {
+ bestsad = thissad;
+ best_mv->as_mv.row = r;
+ best_mv->as_mv.col = c;
+ bestaddress = check_here;
+ }
+ }
+
+ check_here++;
+ c++;
+ }
+ }
+
+ this_mv.as_mv.row = best_mv->as_mv.row << 3;
+ this_mv.as_mv.col = best_mv->as_mv.col << 3;
+
+ if (bestsad < INT_MAX)
+ return
+ fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
+ (unsigned int *)(&thissad)) +
+ mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
+ xd->allow_high_precision_mv);
+ else
+ return INT_MAX;
+}
+int vp9_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
+ int_mv *ref_mv, int error_per_bit,
+ int search_range, vp9_variance_fn_ptr_t *fn_ptr,
+ int *mvjcost, int *mvcost[2], int_mv *center_mv) {
+ MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
+ int i, j;
+ short this_row_offset, this_col_offset;
+
+ int what_stride = b->src_stride;
+ int in_what_stride = d->pre_stride;
+ unsigned char *what = (*(b->base_src) + b->src);
+ unsigned char *best_address = (unsigned char *)(*(d->base_pre) + d->pre +
+ (ref_mv->as_mv.row * (d->pre_stride)) + ref_mv->as_mv.col);
+ unsigned char *check_here;
+ unsigned int thissad;
+ int_mv this_mv;
+ unsigned int bestsad = INT_MAX;
+ MACROBLOCKD *xd = &x->e_mbd;
+ int_mv fcenter_mv;
+
+ int *mvjsadcost = x->nmvjointsadcost;
+ int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+
+ fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
+ fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
+
+ bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) +
+ mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
+
+ for (i = 0; i < search_range; i++) {
+ int best_site = -1;
+
+ for (j = 0; j < 4; j++) {
+ this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
+ this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
+
+ if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
+ (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) {
+ check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + best_address;
+ thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
+
+ if (thissad < bestsad) {
+ this_mv.as_mv.row = this_row_offset;
+ this_mv.as_mv.col = this_col_offset;
+ thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost,
+ mvsadcost, error_per_bit);
+
+ if (thissad < bestsad) {
+ bestsad = thissad;
+ best_site = j;
+ }
+ }
+ }
+ }
+
+ if (best_site == -1)
+ break;
+ else {
+ ref_mv->as_mv.row += neighbors[best_site].row;
+ ref_mv->as_mv.col += neighbors[best_site].col;
+ best_address += (neighbors[best_site].row) * in_what_stride + neighbors[best_site].col;
+ }
+ }
+
+ this_mv.as_mv.row = ref_mv->as_mv.row << 3;
+ this_mv.as_mv.col = ref_mv->as_mv.col << 3;
+
+ if (bestsad < INT_MAX)
+ return
+ fn_ptr->vf(what, what_stride, best_address, in_what_stride,
+ (unsigned int *)(&thissad)) +
+ mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
+ xd->allow_high_precision_mv);
+ else
+ return INT_MAX;
+}
+
+int vp9_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
+ int_mv *ref_mv, int error_per_bit,
+ int search_range, vp9_variance_fn_ptr_t *fn_ptr,
+ int *mvjcost, int *mvcost[2], int_mv *center_mv) {
+ MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
+ int i, j;
+ short this_row_offset, this_col_offset;
+
+ int what_stride = b->src_stride;
+ int in_what_stride = d->pre_stride;
+ unsigned char *what = (*(b->base_src) + b->src);
+ unsigned char *best_address = (unsigned char *)(*(d->base_pre) + d->pre +
+ (ref_mv->as_mv.row * (d->pre_stride)) + ref_mv->as_mv.col);
+ unsigned char *check_here;
+ unsigned int thissad;
+ int_mv this_mv;
+ unsigned int bestsad = INT_MAX;
+ MACROBLOCKD *xd = &x->e_mbd;
+ int_mv fcenter_mv;
+
+ int *mvjsadcost = x->nmvjointsadcost;
+ int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+
+ fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
+ fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
+
+ bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) +
+ mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
+
+ for (i = 0; i < search_range; i++) {
+ int best_site = -1;
+ int all_in = 1;
+
+ all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
+ all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
+ all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
+ all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
+
+ if (all_in) {
+ unsigned int sad_array[4];
+ unsigned char const *block_offset[4];
+ block_offset[0] = best_address - in_what_stride;
+ block_offset[1] = best_address - 1;
+ block_offset[2] = best_address + 1;
+ block_offset[3] = best_address + in_what_stride;
+
+ fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
+
+ for (j = 0; j < 4; j++) {
+ if (sad_array[j] < bestsad) {
+ this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
+ this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
+ sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost,
+ mvsadcost, error_per_bit);
+
+ if (sad_array[j] < bestsad) {
+ bestsad = sad_array[j];
+ best_site = j;
+ }
+ }
+ }
+ } else {
+ for (j = 0; j < 4; j++) {
+ this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
+ this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
+
+ if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
+ (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) {
+ check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + best_address;
+ thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
+
+ if (thissad < bestsad) {
+ this_mv.as_mv.row = this_row_offset;
+ this_mv.as_mv.col = this_col_offset;
+ thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost,
+ mvsadcost, error_per_bit);
+
+ if (thissad < bestsad) {
+ bestsad = thissad;
+ best_site = j;
+ }
+ }
+ }
+ }
+ }
+
+ if (best_site == -1)
+ break;
+ else {
+ ref_mv->as_mv.row += neighbors[best_site].row;
+ ref_mv->as_mv.col += neighbors[best_site].col;
+ best_address += (neighbors[best_site].row) * in_what_stride + neighbors[best_site].col;
+ }
+ }
+
+ this_mv.as_mv.row = ref_mv->as_mv.row << 3;
+ this_mv.as_mv.col = ref_mv->as_mv.col << 3;
+
+ if (bestsad < INT_MAX)
+ return
+ fn_ptr->vf(what, what_stride, best_address, in_what_stride,
+ (unsigned int *)(&thissad)) +
+ mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
+ xd->allow_high_precision_mv);
+ else
+ return INT_MAX;
+}
+
+
+
+#ifdef ENTROPY_STATS
+void print_mode_context(void) {
+ FILE *f = fopen("vp9_modecont.c", "a");
+ int i, j;
+
+ fprintf(f, "#include \"vp9_entropy.h\"\n");
+ fprintf(f, "const int vp9_mode_contexts[6][4] =");
+ fprintf(f, "{\n");
+ for (j = 0; j < 6; j++) {
+ fprintf(f, " {/* %d */ ", j);
+ fprintf(f, " ");
+ for (i = 0; i < 4; i++) {
+ int this_prob;
+ int count;
+
+ // context probs
+ count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
+ if (count)
+ this_prob = 256 * mv_ref_ct[j][i][0] / count;
+ else
+ this_prob = 128;
+
+ if (this_prob == 0)
+ this_prob = 1;
+ fprintf(f, "%5d, ", this_prob);
+ }
+ fprintf(f, " },\n");
+ }
+
+ fprintf(f, "};\n");
+ fclose(f);
+}
+
+/* MV ref count ENTROPY_STATS stats code */
+void init_mv_ref_counts() {
+ vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
+ vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
+}
+
+void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4]) {
+ if (m == ZEROMV) {
+ ++mv_ref_ct [ct[0]] [0] [0];
+ ++mv_mode_cts[0][0];
+ } else {
+ ++mv_ref_ct [ct[0]] [0] [1];
+ ++mv_mode_cts[0][1];
+
+ if (m == NEARESTMV) {
+ ++mv_ref_ct [ct[1]] [1] [0];
+ ++mv_mode_cts[1][0];
+ } else {
+ ++mv_ref_ct [ct[1]] [1] [1];
+ ++mv_mode_cts[1][1];
+
+ if (m == NEARMV) {
+ ++mv_ref_ct [ct[2]] [2] [0];
+ ++mv_mode_cts[2][0];
+ } else {
+ ++mv_ref_ct [ct[2]] [2] [1];
+ ++mv_mode_cts[2][1];
+
+ if (m == NEWMV) {
+ ++mv_ref_ct [ct[3]] [3] [0];
+ ++mv_mode_cts[3][0];
+ } else {
+ ++mv_ref_ct [ct[3]] [3] [1];
+ ++mv_mode_cts[3][1];
+ }
+ }
+ }
+ }
+}
+
+#endif/* END MV ref count ENTROPY_STATS stats code */
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
new file mode 100644
index 0000000..c052e16
--- /dev/null
+++ b/vp9/encoder/vp9_mcomp.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_ENCODER_VP9_MCOMP_H_
+#define VP9_ENCODER_VP9_MCOMP_H_
+
+#include "vp9/encoder/vp9_block.h"
+#include "vp9/encoder/vp9_variance.h"
+
+#ifdef ENTROPY_STATS
+extern void init_mv_ref_counts();
+extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]);
+#endif
+
+
+#define MAX_MVSEARCH_STEPS 8 // The maximum number of steps in a step search given the largest allowed initial step
+#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS)) - 1) // Max full pel mv specified in 1 pel units
+#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1)) // Maximum size of the first step in full pel units
+
+extern void vp9_clamp_mv_min_max(MACROBLOCK *x, int_mv *ref_mv);
+extern int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost,
+ int *mvcost[2], int Weight, int ishp);
+extern void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride);
+extern void vp9_init3smotion_compensation(MACROBLOCK *x, int stride);
+// Runs sequence of diamond searches in smaller steps for RD
+struct VP9_COMP;
+int vp9_full_pixel_diamond(struct VP9_COMP *cpi, MACROBLOCK *x, BLOCK *b,
+ BLOCKD *d, int_mv *mvp_full, int step_param,
+ int sadpb, int further_steps, int do_refine,
+ vp9_variance_fn_ptr_t *fn_ptr,
+ int_mv *ref_mv, int_mv *dst_mv);
+
+extern int vp9_hex_search
+(
+ MACROBLOCK *x,
+ BLOCK *b,
+ BLOCKD *d,
+ int_mv *ref_mv,
+ int_mv *best_mv,
+ int search_param,
+ int error_per_bit,
+ const vp9_variance_fn_ptr_t *vf,
+ int *mvjsadcost, int *mvsadcost[2],
+ int *mvjcost, int *mvcost[2],
+ int_mv *center_mv
+);
+
+typedef int (fractional_mv_step_fp) (MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv
+ *bestmv, int_mv *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp,
+ int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse);
+extern fractional_mv_step_fp vp9_find_best_sub_pixel_step_iteratively;
+extern fractional_mv_step_fp vp9_find_best_sub_pixel_step;
+extern fractional_mv_step_fp vp9_find_best_half_pixel_step;
+
+typedef int (*vp9_full_search_fn_t)(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
+ int_mv *ref_mv, int sad_per_bit,
+ int distance, vp9_variance_fn_ptr_t *fn_ptr,
+ int *mvjcost, int *mvcost[2],
+ int_mv *center_mv);
+
+typedef int (*vp9_refining_search_fn_t)(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
+ int_mv *ref_mv, int sad_per_bit,
+ int distance,
+ vp9_variance_fn_ptr_t *fn_ptr,
+ int *mvjcost, int *mvcost[2],
+ int_mv *center_mv);
+
+typedef int (*vp9_diamond_search_fn_t)(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
+ int_mv *ref_mv, int_mv *best_mv,
+ int search_param, int sad_per_bit,
+ int *num00,
+ vp9_variance_fn_ptr_t *fn_ptr,
+ int *mvjcost, int *mvcost[2],
+ int_mv *center_mv);
+
+
+#endif
diff --git a/vp9/encoder/vp9_modecosts.c b/vp9/encoder/vp9_modecosts.c
new file mode 100644
index 0000000..7d9462f
--- /dev/null
+++ b/vp9/encoder/vp9_modecosts.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_treewriter.h"
+#include "vp9/common/vp9_entropymode.h"
+
+
+void vp9_init_mode_costs(VP9_COMP *c) {
+ VP9_COMMON *x = &c->common;
+ const vp9_tree_p T = vp9_bmode_tree;
+ const vp9_tree_p KT = vp9_kf_bmode_tree;
+ int i, j;
+
+ for (i = 0; i < VP9_KF_BINTRAMODES; i++) {
+ for (j = 0; j < VP9_KF_BINTRAMODES; j++) {
+ vp9_cost_tokens((int *)c->mb.bmode_costs[i][j],
+ x->kf_bmode_prob[i][j], KT);
+ }
+ }
+
+ vp9_cost_tokens((int *)c->mb.inter_bmode_costs, x->fc.bmode_prob, T);
+ vp9_cost_tokens((int *)c->mb.inter_bmode_costs,
+ x->fc.sub_mv_ref_prob[0], vp9_sub_mv_ref_tree);
+
+ // TODO(rbultje) separate tables for superblock costing?
+ vp9_cost_tokens(c->mb.mbmode_cost[1], x->fc.ymode_prob, vp9_ymode_tree);
+ vp9_cost_tokens(c->mb.mbmode_cost[0],
+ x->kf_ymode_prob[c->common.kf_ymode_probs_index],
+ vp9_kf_ymode_tree);
+ vp9_cost_tokens(c->mb.intra_uv_mode_cost[1],
+ x->fc.uv_mode_prob[VP9_YMODES - 1], vp9_uv_mode_tree);
+ vp9_cost_tokens(c->mb.intra_uv_mode_cost[0],
+ x->kf_uv_mode_prob[VP9_YMODES - 1], vp9_uv_mode_tree);
+ vp9_cost_tokens(c->mb.i8x8_mode_costs,
+ x->fc.i8x8_mode_prob, vp9_i8x8_mode_tree);
+
+ for (i = 0; i <= VP9_SWITCHABLE_FILTERS; ++i)
+ vp9_cost_tokens((int *)c->mb.switchable_interp_costs[i],
+ x->fc.switchable_interp_prob[i],
+ vp9_switchable_interp_tree);
+}
diff --git a/vp9/encoder/vp9_modecosts.h b/vp9/encoder/vp9_modecosts.h
new file mode 100644
index 0000000..1f2cc56
--- /dev/null
+++ b/vp9/encoder/vp9_modecosts.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_ENCODER_VP9_MODECOSTS_H_
+#define VP9_ENCODER_VP9_MODECOSTS_H_
+
+void vp9_init_mode_costs(VP9_COMP *x);
+
+#endif
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
new file mode 100644
index 0000000..f5fb686
--- /dev/null
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -0,0 +1,4518 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_config.h"
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/common/vp9_systemdependent.h"
+#include "vp9/encoder/vp9_quantize.h"
+#include "vp9/common/vp9_alloccommon.h"
+#include "vp9/encoder/vp9_mcomp.h"
+#include "vp9/encoder/vp9_firstpass.h"
+#include "vp9/encoder/vp9_psnr.h"
+#include "vpx_scale/vpxscale.h"
+#include "vp9/common/vp9_extend.h"
+#include "vp9/encoder/vp9_ratectrl.h"
+#include "vp9/common/vp9_quant_common.h"
+#include "vp9/encoder/vp9_segmentation.h"
+#include "./vp9_rtcd.h"
+#include "./vpx_scale_rtcd.h"
+#if CONFIG_POSTPROC
+#include "vp9/common/vp9_postproc.h"
+#endif
+#include "vpx_mem/vpx_mem.h"
+#include "vp9/common/vp9_swapyv12buffer.h"
+#include "vpx_ports/vpx_timer.h"
+
+#include "vp9/common/vp9_seg_common.h"
+#include "vp9/encoder/vp9_mbgraph.h"
+#include "vp9/common/vp9_pred_common.h"
+#include "vp9/encoder/vp9_rdopt.h"
+#include "vp9/encoder/vp9_bitstream.h"
+#include "vp9/encoder/vp9_picklpf.h"
+#include "vp9/common/vp9_mvref_common.h"
+#include "vp9/encoder/vp9_temporal_filter.h"
+
+#include <math.h>
+#include <stdio.h>
+#include <limits.h>
+
+extern void print_tree_update_probs();
+
+static void set_default_lf_deltas(VP9_COMP *cpi);
+
+#define DEFAULT_INTERP_FILTER EIGHTTAP /* SWITCHABLE for better performance */
+#define SEARCH_BEST_FILTER 0 /* to search exhaustively for
+ best filter */
+#define RESET_FOREACH_FILTER 0 /* whether to reset the encoder state
+ before trying each new filter */
+#define SHARP_FILTER_QTHRESH 0 /* Q threshold for 8-tap sharp filter */
+
+#define ALTREF_HIGH_PRECISION_MV 1 /* whether to use high precision mv
+ for altref computation */
+#define HIGH_PRECISION_MV_QTHRESH 200 /* Q threshold for use of high precision
+ mv. Choose a very high value for
+ now so that HIGH_PRECISION is always
+ chosen */
+
+#if CONFIG_INTERNAL_STATS
+#include "math.h"
+
+extern double vp9_calc_ssim(YV12_BUFFER_CONFIG *source,
+ YV12_BUFFER_CONFIG *dest, int lumamask,
+ double *weight);
+
+
+extern double vp9_calc_ssimg(YV12_BUFFER_CONFIG *source,
+ YV12_BUFFER_CONFIG *dest, double *ssim_y,
+ double *ssim_u, double *ssim_v);
+
+
+#endif
+
+// #define OUTPUT_YUV_REC
+
+#ifdef OUTPUT_YUV_SRC
+FILE *yuv_file;
+#endif
+#ifdef OUTPUT_YUV_REC
+FILE *yuv_rec_file;
+#endif
+
+#if 0
+FILE *framepsnr;
+FILE *kf_list;
+FILE *keyfile;
+#endif
+
+#if 0
+extern int skip_true_count;
+extern int skip_false_count;
+#endif
+
+
+#ifdef ENTROPY_STATS
+extern int intra_mode_stats[VP9_KF_BINTRAMODES]
+ [VP9_KF_BINTRAMODES]
+ [VP9_KF_BINTRAMODES];
+#endif
+
+#ifdef NMV_STATS
+extern void init_nmvstats();
+extern void print_nmvstats();
+#endif
+
+#ifdef SPEEDSTATS
+unsigned int frames_at_speed[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+#endif
+
+#if defined(SECTIONBITS_OUTPUT)
+extern unsigned __int64 Sectionbits[500];
+#endif
+#ifdef MODE_STATS
+extern INT64 Sectionbits[500];
+extern unsigned int y_modes[VP9_YMODES];
+extern unsigned int i8x8_modes[VP9_I8X8_MODES];
+extern unsigned int uv_modes[VP9_UV_MODES];
+extern unsigned int uv_modes_y[VP9_YMODES][VP9_UV_MODES];
+extern unsigned int b_modes[B_MODE_COUNT];
+extern unsigned int inter_y_modes[MB_MODE_COUNT];
+extern unsigned int inter_uv_modes[VP9_UV_MODES];
+extern unsigned int inter_b_modes[B_MODE_COUNT];
+#endif
+
+extern void vp9_init_quantizer(VP9_COMP *cpi);
+
+static int base_skip_false_prob[QINDEX_RANGE][3];
+
+// Tables relating active max Q to active min Q
+static int kf_low_motion_minq[QINDEX_RANGE];
+static int kf_high_motion_minq[QINDEX_RANGE];
+static int gf_low_motion_minq[QINDEX_RANGE];
+static int gf_high_motion_minq[QINDEX_RANGE];
+static int inter_minq[QINDEX_RANGE];
+
+// Functions to compute the active minq lookup table entries based on a
+// formulaic approach to facilitate easier adjustment of the Q tables.
+// The formulae were derived from computing a 3rd order polynomial best
+// fit to the original data (after plotting real maxq vs minq (not q index))
+static int calculate_minq_index(double maxq,
+ double x3, double x2, double x, double c) {
+ int i;
+ double minqtarget;
+
+ minqtarget = ((x3 * maxq * maxq * maxq) +
+ (x2 * maxq * maxq) +
+ (x * maxq) +
+ c);
+
+ if (minqtarget > maxq)
+ minqtarget = maxq;
+
+ for (i = 0; i < QINDEX_RANGE; i++) {
+ if (minqtarget <= vp9_convert_qindex_to_q(i))
+ return i;
+ }
+ return QINDEX_RANGE - 1;
+}
+
+static void init_minq_luts(void) {
+ int i;
+ double maxq;
+
+ for (i = 0; i < QINDEX_RANGE; i++) {
+ maxq = vp9_convert_qindex_to_q(i);
+
+
+ kf_low_motion_minq[i] = calculate_minq_index(maxq,
+ 0.0000003,
+ -0.000015,
+ 0.074,
+ 0.0);
+ kf_high_motion_minq[i] = calculate_minq_index(maxq,
+ 0.0000004,
+ -0.000125,
+ 0.14,
+ 0.0);
+ gf_low_motion_minq[i] = calculate_minq_index(maxq,
+ 0.0000015,
+ -0.0009,
+ 0.33,
+ 0.0);
+ gf_high_motion_minq[i] = calculate_minq_index(maxq,
+ 0.0000021,
+ -0.00125,
+ 0.45,
+ 0.0);
+ inter_minq[i] = calculate_minq_index(maxq,
+ 0.00000271,
+ -0.00113,
+ 0.697,
+ 0.0);
+
+ }
+}
+
+static void set_mvcost(MACROBLOCK *mb) {
+ if (mb->e_mbd.allow_high_precision_mv) {
+ mb->mvcost = mb->nmvcost_hp;
+ mb->mvsadcost = mb->nmvsadcost_hp;
+
+ } else {
+ mb->mvcost = mb->nmvcost;
+ mb->mvsadcost = mb->nmvsadcost;
+ }
+}
+static void init_base_skip_probs(void) {
+ int i;
+ double q;
+ int skip_prob, t;
+
+ for (i = 0; i < QINDEX_RANGE; i++) {
+ q = vp9_convert_qindex_to_q(i);
+
+ // Exponential decay caluclation of baseline skip prob with clamping
+ // Based on crude best fit of old table.
+ t = (int)(564.25 * pow(2.71828, (-0.012 * q)));
+
+ skip_prob = t;
+ if (skip_prob < 1)
+ skip_prob = 1;
+ else if (skip_prob > 255)
+ skip_prob = 255;
+ base_skip_false_prob[i][1] = skip_prob;
+
+ skip_prob = t * 3 / 4;
+ if (skip_prob < 1)
+ skip_prob = 1;
+ else if (skip_prob > 255)
+ skip_prob = 255;
+ base_skip_false_prob[i][2] = skip_prob;
+
+ skip_prob = t * 5 / 4;
+ if (skip_prob < 1)
+ skip_prob = 1;
+ else if (skip_prob > 255)
+ skip_prob = 255;
+ base_skip_false_prob[i][0] = skip_prob;
+ }
+}
+
+static void update_base_skip_probs(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+
+ if (cm->frame_type != KEY_FRAME) {
+ vp9_update_skip_probs(cpi);
+
+ if (cm->refresh_alt_ref_frame) {
+ int k;
+ for (k = 0; k < MBSKIP_CONTEXTS; ++k)
+ cpi->last_skip_false_probs[2][k] = cm->mbskip_pred_probs[k];
+ cpi->last_skip_probs_q[2] = cm->base_qindex;
+ } else if (cpi->common.refresh_golden_frame) {
+ int k;
+ for (k = 0; k < MBSKIP_CONTEXTS; ++k)
+ cpi->last_skip_false_probs[1][k] = cm->mbskip_pred_probs[k];
+ cpi->last_skip_probs_q[1] = cm->base_qindex;
+ } else {
+ int k;
+ for (k = 0; k < MBSKIP_CONTEXTS; ++k)
+ cpi->last_skip_false_probs[0][k] = cm->mbskip_pred_probs[k];
+ cpi->last_skip_probs_q[0] = cm->base_qindex;
+
+ // update the baseline table for the current q
+ for (k = 0; k < MBSKIP_CONTEXTS; ++k)
+ cpi->base_skip_false_prob[cm->base_qindex][k] =
+ cm->mbskip_pred_probs[k];
+ }
+ }
+
+}
+
+void vp9_initialize_enc() {
+ static int init_done = 0;
+
+ if (!init_done) {
+ vp9_initialize_common();
+ vp9_tokenize_initialize();
+ vp9_init_quant_tables();
+ vp9_init_me_luts();
+ init_minq_luts();
+ init_base_skip_probs();
+ init_done = 1;
+ }
+}
+#ifdef PACKET_TESTING
+extern FILE *vpxlogc;
+#endif
+
+static void setup_features(VP9_COMP *cpi) {
+ MACROBLOCKD *xd = &cpi->mb.e_mbd;
+
+ // Set up default state for MB feature flags
+
+ xd->segmentation_enabled = 0; // Default segmentation disabled
+
+ xd->update_mb_segmentation_map = 0;
+ xd->update_mb_segmentation_data = 0;
+ vpx_memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs));
+
+ vp9_clearall_segfeatures(xd);
+
+ xd->mode_ref_lf_delta_enabled = 0;
+ xd->mode_ref_lf_delta_update = 0;
+ vpx_memset(xd->ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas));
+ vpx_memset(xd->mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas));
+ vpx_memset(xd->last_ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas));
+ vpx_memset(xd->last_mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas));
+
+ set_default_lf_deltas(cpi);
+
+}
+
+
+static void dealloc_compressor_data(VP9_COMP *cpi) {
+ vpx_free(cpi->tplist);
+ cpi->tplist = NULL;
+
+ // Delete last frame MV storage buffers
+ vpx_free(cpi->lfmv);
+ cpi->lfmv = 0;
+
+ vpx_free(cpi->lf_ref_frame_sign_bias);
+ cpi->lf_ref_frame_sign_bias = 0;
+
+ vpx_free(cpi->lf_ref_frame);
+ cpi->lf_ref_frame = 0;
+
+ // Delete sementation map
+ vpx_free(cpi->segmentation_map);
+ cpi->segmentation_map = 0;
+ vpx_free(cpi->common.last_frame_seg_map);
+ cpi->common.last_frame_seg_map = 0;
+ vpx_free(cpi->coding_context.last_frame_seg_map_copy);
+ cpi->coding_context.last_frame_seg_map_copy = 0;
+
+ vpx_free(cpi->active_map);
+ cpi->active_map = 0;
+
+ vp9_de_alloc_frame_buffers(&cpi->common);
+
+ vp8_yv12_de_alloc_frame_buffer(&cpi->last_frame_uf);
+ vp8_yv12_de_alloc_frame_buffer(&cpi->scaled_source);
+#if VP9_TEMPORAL_ALT_REF
+ vp8_yv12_de_alloc_frame_buffer(&cpi->alt_ref_buffer);
+#endif
+ vp9_lookahead_destroy(cpi->lookahead);
+
+ vpx_free(cpi->tok);
+ cpi->tok = 0;
+
+ // Structure used to monitor GF usage
+ vpx_free(cpi->gf_active_flags);
+ cpi->gf_active_flags = 0;
+
+ // Activity mask based per mb zbin adjustments
+ vpx_free(cpi->mb_activity_map);
+ cpi->mb_activity_map = 0;
+ vpx_free(cpi->mb_norm_activity_map);
+ cpi->mb_norm_activity_map = 0;
+
+ vpx_free(cpi->mb.pip);
+ cpi->mb.pip = 0;
+
+ vpx_free(cpi->twopass.total_stats);
+ cpi->twopass.total_stats = 0;
+
+ vpx_free(cpi->twopass.total_left_stats);
+ cpi->twopass.total_left_stats = 0;
+
+ vpx_free(cpi->twopass.this_frame_stats);
+ cpi->twopass.this_frame_stats = 0;
+}
+
+// Computes a q delta (in "q index" terms) to get from a starting q value
+// to a target value
+// target q value
+static int compute_qdelta(VP9_COMP *cpi, double qstart, double qtarget) {
+ int i;
+ int start_index = cpi->worst_quality;
+ int target_index = cpi->worst_quality;
+
+ // Convert the average q value to an index.
+ for (i = cpi->best_quality; i < cpi->worst_quality; i++) {
+ start_index = i;
+ if (vp9_convert_qindex_to_q(i) >= qstart)
+ break;
+ }
+
+ // Convert the q target to an index
+ for (i = cpi->best_quality; i < cpi->worst_quality; i++) {
+ target_index = i;
+ if (vp9_convert_qindex_to_q(i) >= qtarget)
+ break;
+ }
+
+ return target_index - start_index;
+}
+
+static void init_seg_features(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+ MACROBLOCKD *xd = &cpi->mb.e_mbd;
+
+ int high_q = (int)(cpi->avg_q > 48.0);
+ int qi_delta;
+
+ // Disable and clear down for KF
+ if (cm->frame_type == KEY_FRAME) {
+ // Clear down the global segmentation map
+ vpx_memset(cpi->segmentation_map, 0, (cm->mb_rows * cm->mb_cols));
+ xd->update_mb_segmentation_map = 0;
+ xd->update_mb_segmentation_data = 0;
+ cpi->static_mb_pct = 0;
+
+ // Disable segmentation
+ vp9_disable_segmentation((VP9_PTR)cpi);
+
+ // Clear down the segment features.
+ vp9_clearall_segfeatures(xd);
+ }
+
+ // If this is an alt ref frame
+ else if (cm->refresh_alt_ref_frame) {
+ // Clear down the global segmentation map
+ vpx_memset(cpi->segmentation_map, 0, (cm->mb_rows * cm->mb_cols));
+ xd->update_mb_segmentation_map = 0;
+ xd->update_mb_segmentation_data = 0;
+ cpi->static_mb_pct = 0;
+
+ // Disable segmentation and individual segment features by default
+ vp9_disable_segmentation((VP9_PTR)cpi);
+ vp9_clearall_segfeatures(xd);
+
+ // Scan frames from current to arf frame.
+ // This function re-enables segmentation if appropriate.
+ vp9_update_mbgraph_stats(cpi);
+
+ // If segmentation was enabled set those features needed for the
+ // arf itself.
+ if (xd->segmentation_enabled) {
+ xd->update_mb_segmentation_map = 1;
+ xd->update_mb_segmentation_data = 1;
+
+ qi_delta = compute_qdelta(cpi, cpi->avg_q, (cpi->avg_q * 0.875));
+ vp9_set_segdata(xd, 1, SEG_LVL_ALT_Q, (qi_delta - 2));
+ vp9_set_segdata(xd, 1, SEG_LVL_ALT_LF, -2);
+
+ vp9_enable_segfeature(xd, 1, SEG_LVL_ALT_Q);
+ vp9_enable_segfeature(xd, 1, SEG_LVL_ALT_LF);
+
+ // Where relevant assume segment data is delta data
+ xd->mb_segment_abs_delta = SEGMENT_DELTADATA;
+
+ }
+ }
+ // All other frames if segmentation has been enabled
+ else if (xd->segmentation_enabled) {
+ // First normal frame in a valid gf or alt ref group
+ if (cpi->common.frames_since_golden == 0) {
+ // Set up segment features for normal frames in an af group
+ if (cpi->source_alt_ref_active) {
+ xd->update_mb_segmentation_map = 0;
+ xd->update_mb_segmentation_data = 1;
+ xd->mb_segment_abs_delta = SEGMENT_DELTADATA;
+
+ qi_delta = compute_qdelta(cpi, cpi->avg_q,
+ (cpi->avg_q * 1.125));
+ vp9_set_segdata(xd, 1, SEG_LVL_ALT_Q, (qi_delta + 2));
+ vp9_set_segdata(xd, 1, SEG_LVL_ALT_Q, 0);
+ vp9_enable_segfeature(xd, 1, SEG_LVL_ALT_Q);
+
+ vp9_set_segdata(xd, 1, SEG_LVL_ALT_LF, -2);
+ vp9_enable_segfeature(xd, 1, SEG_LVL_ALT_LF);
+
+ // Segment coding disabled for compred testing
+ if (high_q || (cpi->static_mb_pct == 100)) {
+ // set_segref(xd, 1, LAST_FRAME);
+ vp9_set_segref(xd, 1, ALTREF_FRAME);
+ vp9_enable_segfeature(xd, 1, SEG_LVL_REF_FRAME);
+
+ vp9_set_segdata(xd, 1, SEG_LVL_MODE, ZEROMV);
+ vp9_enable_segfeature(xd, 1, SEG_LVL_MODE);
+
+ // EOB segment coding not fixed for 8x8 yet
+ vp9_set_segdata(xd, 1, SEG_LVL_EOB, 0);
+ vp9_enable_segfeature(xd, 1, SEG_LVL_EOB);
+ }
+ }
+ // Disable segmentation and clear down features if alt ref
+ // is not active for this group
+ else {
+ vp9_disable_segmentation((VP9_PTR)cpi);
+
+ vpx_memset(cpi->segmentation_map, 0,
+ (cm->mb_rows * cm->mb_cols));
+
+ xd->update_mb_segmentation_map = 0;
+ xd->update_mb_segmentation_data = 0;
+
+ vp9_clearall_segfeatures(xd);
+ }
+ }
+
+ // Special case where we are coding over the top of a previous
+ // alt ref frame
+ // Segment coding disabled for compred testing
+ else if (cpi->is_src_frame_alt_ref) {
+ // Enable mode and ref frame features for segment 0 as well
+ vp9_enable_segfeature(xd, 0, SEG_LVL_REF_FRAME);
+ vp9_enable_segfeature(xd, 0, SEG_LVL_MODE);
+ vp9_enable_segfeature(xd, 1, SEG_LVL_REF_FRAME);
+ vp9_enable_segfeature(xd, 1, SEG_LVL_MODE);
+
+ // All mbs should use ALTREF_FRAME, ZEROMV exclusively
+ vp9_clear_segref(xd, 0);
+ vp9_set_segref(xd, 0, ALTREF_FRAME);
+ vp9_clear_segref(xd, 1);
+ vp9_set_segref(xd, 1, ALTREF_FRAME);
+ vp9_set_segdata(xd, 0, SEG_LVL_MODE, ZEROMV);
+ vp9_set_segdata(xd, 1, SEG_LVL_MODE, ZEROMV);
+
+ // Skip all MBs if high Q
+ if (high_q) {
+ vp9_enable_segfeature(xd, 0, SEG_LVL_EOB);
+ vp9_set_segdata(xd, 0, SEG_LVL_EOB, 0);
+ vp9_enable_segfeature(xd, 1, SEG_LVL_EOB);
+ vp9_set_segdata(xd, 1, SEG_LVL_EOB, 0);
+ }
+ // Enable data udpate
+ xd->update_mb_segmentation_data = 1;
+ }
+ // All other frames.
+ else {
+ // No updates.. leave things as they are.
+ xd->update_mb_segmentation_map = 0;
+ xd->update_mb_segmentation_data = 0;
+ }
+ }
+}
+
+// DEBUG: Print out the segment id of each MB in the current frame.
+static void print_seg_map(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+ int row, col;
+ int map_index = 0;
+ FILE *statsfile;
+
+ statsfile = fopen("segmap.stt", "a");
+
+ fprintf(statsfile, "%10d\n",
+ cm->current_video_frame);
+
+ for (row = 0; row < cpi->common.mb_rows; row++) {
+ for (col = 0; col < cpi->common.mb_cols; col++) {
+ fprintf(statsfile, "%10d",
+ cpi->segmentation_map[map_index]);
+ map_index++;
+ }
+ fprintf(statsfile, "\n");
+ }
+ fprintf(statsfile, "\n");
+
+ fclose(statsfile);
+}
+
+static void update_reference_segmentation_map(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+ int row, col, sb_rows = (cm->mb_rows + 1) >> 1, sb_cols = (cm->mb_cols + 1) >> 1;
+ MODE_INFO *mi = cm->mi;
+ uint8_t *segmap = cpi->segmentation_map;
+ uint8_t *segcache = cm->last_frame_seg_map;
+
+ for (row = 0; row < sb_rows; row++) {
+ for (col = 0; col < sb_cols; col++) {
+ MODE_INFO *miptr = mi + col * 2;
+ uint8_t *cache = segcache + col * 2;
+#if CONFIG_SUPERBLOCKS
+ if (miptr->mbmi.encoded_as_sb) {
+ cache[0] = miptr->mbmi.segment_id;
+ if (!(cm->mb_cols & 1) || col < sb_cols - 1)
+ cache[1] = miptr->mbmi.segment_id;
+ if (!(cm->mb_rows & 1) || row < sb_rows - 1) {
+ cache[cm->mb_cols] = miptr->mbmi.segment_id;
+ if (!(cm->mb_cols & 1) || col < sb_cols - 1)
+ cache[cm->mb_cols + 1] = miptr->mbmi.segment_id;
+ }
+ } else
+#endif
+ {
+ cache[0] = miptr[0].mbmi.segment_id;
+ if (!(cm->mb_cols & 1) || col < sb_cols - 1)
+ cache[1] = miptr[1].mbmi.segment_id;
+ if (!(cm->mb_rows & 1) || row < sb_rows - 1) {
+ cache[cm->mb_cols] = miptr[cm->mode_info_stride].mbmi.segment_id;
+ if (!(cm->mb_cols & 1) || col < sb_cols - 1)
+ cache[1] = miptr[1].mbmi.segment_id;
+ cache[cm->mb_cols + 1] = miptr[cm->mode_info_stride + 1].mbmi.segment_id;
+ }
+ }
+ }
+ segmap += 2 * cm->mb_cols;
+ segcache += 2 * cm->mb_cols;
+ mi += 2 * cm->mode_info_stride;
+ }
+}
+
+static void set_default_lf_deltas(VP9_COMP *cpi) {
+ cpi->mb.e_mbd.mode_ref_lf_delta_enabled = 1;
+ cpi->mb.e_mbd.mode_ref_lf_delta_update = 1;
+
+ vpx_memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas));
+ vpx_memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas));
+
+ // Test of ref frame deltas
+ cpi->mb.e_mbd.ref_lf_deltas[INTRA_FRAME] = 2;
+ cpi->mb.e_mbd.ref_lf_deltas[LAST_FRAME] = 0;
+ cpi->mb.e_mbd.ref_lf_deltas[GOLDEN_FRAME] = -2;
+ cpi->mb.e_mbd.ref_lf_deltas[ALTREF_FRAME] = -2;
+
+ cpi->mb.e_mbd.mode_lf_deltas[0] = 4; // BPRED
+ cpi->mb.e_mbd.mode_lf_deltas[1] = -2; // Zero
+ cpi->mb.e_mbd.mode_lf_deltas[2] = 2; // New mv
+ cpi->mb.e_mbd.mode_lf_deltas[3] = 4; // Split mv
+}
+
+void vp9_set_speed_features(VP9_COMP *cpi) {
+ SPEED_FEATURES *sf = &cpi->sf;
+ int Mode = cpi->compressor_speed;
+ int Speed = cpi->Speed;
+ int i;
+ VP9_COMMON *cm = &cpi->common;
+
+ // Only modes 0 and 1 supported for now in experimental code basae
+ if (Mode > 1)
+ Mode = 1;
+
+ // Initialise default mode frequency sampling variables
+ for (i = 0; i < MAX_MODES; i ++) {
+ cpi->mode_check_freq[i] = 0;
+ cpi->mode_test_hit_counts[i] = 0;
+ cpi->mode_chosen_counts[i] = 0;
+ }
+
+ // best quality defaults
+ sf->RD = 1;
+ sf->search_method = NSTEP;
+ sf->improved_dct = 1;
+ sf->auto_filter = 1;
+ sf->recode_loop = 1;
+ sf->quarter_pixel_search = 1;
+ sf->half_pixel_search = 1;
+ sf->iterative_sub_pixel = 1;
+#if CONFIG_LOSSLESS
+ sf->optimize_coefficients = 0;
+#else
+ sf->optimize_coefficients = 1;
+#endif
+ sf->no_skip_block4x4_search = 1;
+
+ sf->first_step = 0;
+ sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
+ sf->improved_mv_pred = 1;
+
+ // default thresholds to 0
+ for (i = 0; i < MAX_MODES; i++)
+ sf->thresh_mult[i] = 0;
+
+ switch (Mode) {
+ case 0: // best quality mode
+#if CONFIG_PRED_FILTER
+ sf->thresh_mult[THR_ZEROMV ] = 0;
+ sf->thresh_mult[THR_ZEROMV_FILT ] = 0;
+ sf->thresh_mult[THR_ZEROG ] = 0;
+ sf->thresh_mult[THR_ZEROG_FILT ] = 0;
+ sf->thresh_mult[THR_ZEROA ] = 0;
+ sf->thresh_mult[THR_ZEROA_FILT ] = 0;
+ sf->thresh_mult[THR_NEARESTMV ] = 0;
+ sf->thresh_mult[THR_NEARESTMV_FILT] = 0;
+ sf->thresh_mult[THR_NEARESTG ] = 0;
+ sf->thresh_mult[THR_NEARESTG_FILT ] = 0;
+ sf->thresh_mult[THR_NEARESTA ] = 0;
+ sf->thresh_mult[THR_NEARESTA_FILT ] = 0;
+ sf->thresh_mult[THR_NEARMV ] = 0;
+ sf->thresh_mult[THR_NEARMV_FILT ] = 0;
+ sf->thresh_mult[THR_NEARG ] = 0;
+ sf->thresh_mult[THR_NEARG_FILT ] = 0;
+ sf->thresh_mult[THR_NEARA ] = 0;
+ sf->thresh_mult[THR_NEARA_FILT ] = 0;
+
+ sf->thresh_mult[THR_DC ] = 0;
+
+ sf->thresh_mult[THR_V_PRED ] = 1000;
+ sf->thresh_mult[THR_H_PRED ] = 1000;
+ sf->thresh_mult[THR_D45_PRED ] = 1000;
+ sf->thresh_mult[THR_D135_PRED] = 1000;
+ sf->thresh_mult[THR_D117_PRED] = 1000;
+ sf->thresh_mult[THR_D153_PRED] = 1000;
+ sf->thresh_mult[THR_D27_PRED ] = 1000;
+ sf->thresh_mult[THR_D63_PRED ] = 1000;
+ sf->thresh_mult[THR_B_PRED ] = 2000;
+ sf->thresh_mult[THR_I8X8_PRED] = 2000;
+ sf->thresh_mult[THR_TM ] = 1000;
+
+ sf->thresh_mult[THR_NEWMV ] = 1000;
+ sf->thresh_mult[THR_NEWG ] = 1000;
+ sf->thresh_mult[THR_NEWA ] = 1000;
+ sf->thresh_mult[THR_NEWMV_FILT ] = 1000;
+ sf->thresh_mult[THR_NEWG_FILT ] = 1000;
+ sf->thresh_mult[THR_NEWA_FILT ] = 1000;
+#else
+ sf->thresh_mult[THR_ZEROMV ] = 0;
+ sf->thresh_mult[THR_ZEROG ] = 0;
+ sf->thresh_mult[THR_ZEROA ] = 0;
+ sf->thresh_mult[THR_NEARESTMV] = 0;
+ sf->thresh_mult[THR_NEARESTG ] = 0;
+ sf->thresh_mult[THR_NEARESTA ] = 0;
+ sf->thresh_mult[THR_NEARMV ] = 0;
+ sf->thresh_mult[THR_NEARG ] = 0;
+ sf->thresh_mult[THR_NEARA ] = 0;
+
+ sf->thresh_mult[THR_DC ] = 0;
+
+ sf->thresh_mult[THR_V_PRED ] = 1000;
+ sf->thresh_mult[THR_H_PRED ] = 1000;
+ sf->thresh_mult[THR_D45_PRED ] = 1000;
+ sf->thresh_mult[THR_D135_PRED] = 1000;
+ sf->thresh_mult[THR_D117_PRED] = 1000;
+ sf->thresh_mult[THR_D153_PRED] = 1000;
+ sf->thresh_mult[THR_D27_PRED ] = 1000;
+ sf->thresh_mult[THR_D63_PRED ] = 1000;
+ sf->thresh_mult[THR_B_PRED ] = 2000;
+ sf->thresh_mult[THR_I8X8_PRED] = 2000;
+ sf->thresh_mult[THR_TM ] = 1000;
+
+ sf->thresh_mult[THR_NEWMV ] = 1000;
+ sf->thresh_mult[THR_NEWG ] = 1000;
+ sf->thresh_mult[THR_NEWA ] = 1000;
+#endif
+ sf->thresh_mult[THR_SPLITMV ] = 2500;
+ sf->thresh_mult[THR_SPLITG ] = 5000;
+ sf->thresh_mult[THR_SPLITA ] = 5000;
+
+ sf->thresh_mult[THR_COMP_ZEROLG ] = 0;
+ sf->thresh_mult[THR_COMP_NEARESTLG] = 0;
+ sf->thresh_mult[THR_COMP_NEARLG ] = 0;
+ sf->thresh_mult[THR_COMP_ZEROLA ] = 0;
+ sf->thresh_mult[THR_COMP_NEARESTLA] = 0;
+ sf->thresh_mult[THR_COMP_NEARLA ] = 0;
+ sf->thresh_mult[THR_COMP_ZEROGA ] = 0;
+ sf->thresh_mult[THR_COMP_NEARESTGA] = 0;
+ sf->thresh_mult[THR_COMP_NEARGA ] = 0;
+
+ sf->thresh_mult[THR_COMP_NEWLG ] = 1000;
+ sf->thresh_mult[THR_COMP_NEWLA ] = 1000;
+ sf->thresh_mult[THR_COMP_NEWGA ] = 1000;
+
+ sf->thresh_mult[THR_COMP_SPLITLA ] = 2500;
+ sf->thresh_mult[THR_COMP_SPLITGA ] = 5000;
+ sf->thresh_mult[THR_COMP_SPLITLG ] = 5000;
+
+#if CONFIG_COMP_INTERINTRA_PRED
+ sf->thresh_mult[THR_COMP_INTERINTRA_ZEROL ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTL] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARL ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEWL ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_ZEROG ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTG] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARG ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEWG ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_ZEROA ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTA] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARA ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEWA ] = 0;
+#endif
+
+ sf->first_step = 0;
+ sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
+ sf->search_best_filter = SEARCH_BEST_FILTER;
+ break;
+ case 1:
+#if CONFIG_PRED_FILTER
+ sf->thresh_mult[THR_NEARESTMV] = 0;
+ sf->thresh_mult[THR_NEARESTMV_FILT] = 0;
+ sf->thresh_mult[THR_ZEROMV ] = 0;
+ sf->thresh_mult[THR_ZEROMV_FILT ] = 0;
+ sf->thresh_mult[THR_DC ] = 0;
+ sf->thresh_mult[THR_NEARMV ] = 0;
+ sf->thresh_mult[THR_NEARMV_FILT ] = 0;
+ sf->thresh_mult[THR_V_PRED ] = 1000;
+ sf->thresh_mult[THR_H_PRED ] = 1000;
+ sf->thresh_mult[THR_D45_PRED ] = 1000;
+ sf->thresh_mult[THR_D135_PRED] = 1000;
+ sf->thresh_mult[THR_D117_PRED] = 1000;
+ sf->thresh_mult[THR_D153_PRED] = 1000;
+ sf->thresh_mult[THR_D27_PRED ] = 1000;
+ sf->thresh_mult[THR_D63_PRED ] = 1000;
+ sf->thresh_mult[THR_B_PRED ] = 2500;
+ sf->thresh_mult[THR_I8X8_PRED] = 2500;
+ sf->thresh_mult[THR_TM ] = 1000;
+
+ sf->thresh_mult[THR_NEARESTG ] = 1000;
+ sf->thresh_mult[THR_NEARESTG_FILT ] = 1000;
+ sf->thresh_mult[THR_NEARESTA ] = 1000;
+ sf->thresh_mult[THR_NEARESTA_FILT ] = 1000;
+
+ sf->thresh_mult[THR_ZEROG ] = 1000;
+ sf->thresh_mult[THR_ZEROA ] = 1000;
+ sf->thresh_mult[THR_NEARG ] = 1000;
+ sf->thresh_mult[THR_NEARA ] = 1000;
+ sf->thresh_mult[THR_ZEROG_FILT ] = 1000;
+ sf->thresh_mult[THR_ZEROA_FILT ] = 1000;
+ sf->thresh_mult[THR_NEARG_FILT ] = 1000;
+ sf->thresh_mult[THR_NEARA_FILT ] = 1000;
+
+ sf->thresh_mult[THR_ZEROMV ] = 0;
+ sf->thresh_mult[THR_ZEROG ] = 0;
+ sf->thresh_mult[THR_ZEROA ] = 0;
+ sf->thresh_mult[THR_NEARESTMV] = 0;
+ sf->thresh_mult[THR_NEARESTG ] = 0;
+ sf->thresh_mult[THR_NEARESTA ] = 0;
+ sf->thresh_mult[THR_NEARMV ] = 0;
+ sf->thresh_mult[THR_NEARG ] = 0;
+ sf->thresh_mult[THR_NEARA ] = 0;
+ sf->thresh_mult[THR_ZEROMV_FILT ] = 0;
+ sf->thresh_mult[THR_ZEROG_FILT ] = 0;
+ sf->thresh_mult[THR_ZEROA_FILT ] = 0;
+ sf->thresh_mult[THR_NEARESTMV_FILT] = 0;
+ sf->thresh_mult[THR_NEARESTG_FILT ] = 0;
+ sf->thresh_mult[THR_NEARESTA_FILT ] = 0;
+ sf->thresh_mult[THR_NEARMV_FILT ] = 0;
+ sf->thresh_mult[THR_NEARG_FILT ] = 0;
+ sf->thresh_mult[THR_NEARA_FILT ] = 0;
+
+ sf->thresh_mult[THR_NEWMV ] = 1000;
+ sf->thresh_mult[THR_NEWG ] = 1000;
+ sf->thresh_mult[THR_NEWA ] = 1000;
+ sf->thresh_mult[THR_NEWMV_FILT ] = 1000;
+ sf->thresh_mult[THR_NEWG_FILT ] = 1000;
+ sf->thresh_mult[THR_NEWA_FILT ] = 1000;
+#else
+ sf->thresh_mult[THR_NEARESTMV] = 0;
+ sf->thresh_mult[THR_ZEROMV ] = 0;
+ sf->thresh_mult[THR_DC ] = 0;
+ sf->thresh_mult[THR_NEARMV ] = 0;
+ sf->thresh_mult[THR_V_PRED ] = 1000;
+ sf->thresh_mult[THR_H_PRED ] = 1000;
+ sf->thresh_mult[THR_D45_PRED ] = 1000;
+ sf->thresh_mult[THR_D135_PRED] = 1000;
+ sf->thresh_mult[THR_D117_PRED] = 1000;
+ sf->thresh_mult[THR_D153_PRED] = 1000;
+ sf->thresh_mult[THR_D27_PRED ] = 1000;
+ sf->thresh_mult[THR_D63_PRED ] = 1000;
+ sf->thresh_mult[THR_B_PRED ] = 2500;
+ sf->thresh_mult[THR_I8X8_PRED] = 2500;
+ sf->thresh_mult[THR_TM ] = 1000;
+
+ sf->thresh_mult[THR_NEARESTG ] = 1000;
+ sf->thresh_mult[THR_NEARESTA ] = 1000;
+
+ sf->thresh_mult[THR_ZEROG ] = 1000;
+ sf->thresh_mult[THR_ZEROA ] = 1000;
+ sf->thresh_mult[THR_NEARG ] = 1000;
+ sf->thresh_mult[THR_NEARA ] = 1000;
+
+ sf->thresh_mult[THR_ZEROMV ] = 0;
+ sf->thresh_mult[THR_ZEROG ] = 0;
+ sf->thresh_mult[THR_ZEROA ] = 0;
+ sf->thresh_mult[THR_NEARESTMV] = 0;
+ sf->thresh_mult[THR_NEARESTG ] = 0;
+ sf->thresh_mult[THR_NEARESTA ] = 0;
+ sf->thresh_mult[THR_NEARMV ] = 0;
+ sf->thresh_mult[THR_NEARG ] = 0;
+ sf->thresh_mult[THR_NEARA ] = 0;
+
+ sf->thresh_mult[THR_NEWMV ] = 1000;
+ sf->thresh_mult[THR_NEWG ] = 1000;
+ sf->thresh_mult[THR_NEWA ] = 1000;
+#endif
+ sf->thresh_mult[THR_SPLITMV ] = 1700;
+ sf->thresh_mult[THR_SPLITG ] = 4500;
+ sf->thresh_mult[THR_SPLITA ] = 4500;
+
+ sf->thresh_mult[THR_COMP_ZEROLG ] = 0;
+ sf->thresh_mult[THR_COMP_NEARESTLG] = 0;
+ sf->thresh_mult[THR_COMP_NEARLG ] = 0;
+ sf->thresh_mult[THR_COMP_ZEROLA ] = 0;
+ sf->thresh_mult[THR_COMP_NEARESTLA] = 0;
+ sf->thresh_mult[THR_COMP_NEARLA ] = 0;
+ sf->thresh_mult[THR_COMP_ZEROGA ] = 0;
+ sf->thresh_mult[THR_COMP_NEARESTGA] = 0;
+ sf->thresh_mult[THR_COMP_NEARGA ] = 0;
+
+ sf->thresh_mult[THR_COMP_NEWLG ] = 1000;
+ sf->thresh_mult[THR_COMP_NEWLA ] = 1000;
+ sf->thresh_mult[THR_COMP_NEWGA ] = 1000;
+
+ sf->thresh_mult[THR_COMP_SPLITLA ] = 1700;
+ sf->thresh_mult[THR_COMP_SPLITGA ] = 4500;
+ sf->thresh_mult[THR_COMP_SPLITLG ] = 4500;
+#if CONFIG_COMP_INTERINTRA_PRED
+ sf->thresh_mult[THR_COMP_INTERINTRA_ZEROL ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTL] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARL ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEWL ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_ZEROG ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTG] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARG ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEWG ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_ZEROA ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTA] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARA ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEWA ] = 0;
+#endif
+
+ if (Speed > 0) {
+ /* Disable coefficient optimization above speed 0 */
+ sf->optimize_coefficients = 0;
+ sf->no_skip_block4x4_search = 0;
+
+ sf->first_step = 1;
+
+ cpi->mode_check_freq[THR_SPLITG] = 2;
+ cpi->mode_check_freq[THR_SPLITA] = 2;
+ cpi->mode_check_freq[THR_SPLITMV] = 0;
+
+ cpi->mode_check_freq[THR_COMP_SPLITGA] = 2;
+ cpi->mode_check_freq[THR_COMP_SPLITLG] = 2;
+ cpi->mode_check_freq[THR_COMP_SPLITLA] = 0;
+ }
+
+ if (Speed > 1) {
+ cpi->mode_check_freq[THR_SPLITG] = 4;
+ cpi->mode_check_freq[THR_SPLITA] = 4;
+ cpi->mode_check_freq[THR_SPLITMV] = 2;
+
+ cpi->mode_check_freq[THR_COMP_SPLITGA] = 4;
+ cpi->mode_check_freq[THR_COMP_SPLITLG] = 4;
+ cpi->mode_check_freq[THR_COMP_SPLITLA] = 2;
+
+ sf->thresh_mult[THR_TM ] = 1500;
+ sf->thresh_mult[THR_V_PRED ] = 1500;
+ sf->thresh_mult[THR_H_PRED ] = 1500;
+ sf->thresh_mult[THR_D45_PRED ] = 1500;
+ sf->thresh_mult[THR_D135_PRED] = 1500;
+ sf->thresh_mult[THR_D117_PRED] = 1500;
+ sf->thresh_mult[THR_D153_PRED] = 1500;
+ sf->thresh_mult[THR_D27_PRED ] = 1500;
+ sf->thresh_mult[THR_D63_PRED ] = 1500;
+ sf->thresh_mult[THR_B_PRED ] = 5000;
+ sf->thresh_mult[THR_I8X8_PRED] = 5000;
+
+ if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
+ sf->thresh_mult[THR_NEWMV ] = 2000;
+#if CONFIG_PRED_FILTER
+ sf->thresh_mult[THR_NEWMV_FILT ] = 2000;
+#endif
+ sf->thresh_mult[THR_SPLITMV ] = 10000;
+ sf->thresh_mult[THR_COMP_SPLITLG ] = 20000;
+ }
+
+ if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {
+ sf->thresh_mult[THR_NEARESTG ] = 1500;
+ sf->thresh_mult[THR_ZEROG ] = 1500;
+ sf->thresh_mult[THR_NEARG ] = 1500;
+ sf->thresh_mult[THR_NEWG ] = 2000;
+#if CONFIG_PRED_FILTER
+ sf->thresh_mult[THR_NEARESTG_FILT ] = 1500;
+ sf->thresh_mult[THR_ZEROG_FILT ] = 1500;
+ sf->thresh_mult[THR_NEARG_FILT ] = 1500;
+ sf->thresh_mult[THR_NEWG_FILT ] = 2000;
+#endif
+ sf->thresh_mult[THR_SPLITG ] = 20000;
+ sf->thresh_mult[THR_COMP_SPLITGA ] = 20000;
+ }
+
+ if (cpi->ref_frame_flags & VP9_ALT_FLAG) {
+ sf->thresh_mult[THR_NEARESTA ] = 1500;
+ sf->thresh_mult[THR_ZEROA ] = 1500;
+ sf->thresh_mult[THR_NEARA ] = 1500;
+ sf->thresh_mult[THR_NEWA ] = 2000;
+#if CONFIG_PRED_FILTER
+ sf->thresh_mult[THR_NEARESTA_FILT ] = 1500;
+ sf->thresh_mult[THR_ZEROA_FILT ] = 1500;
+ sf->thresh_mult[THR_NEARA_FILT ] = 1500;
+ sf->thresh_mult[THR_NEWA_FILT ] = 2000;
+#endif
+ sf->thresh_mult[THR_SPLITA ] = 20000;
+ sf->thresh_mult[THR_COMP_SPLITLA ] = 10000;
+ }
+
+ sf->thresh_mult[THR_COMP_ZEROLG ] = 1500;
+ sf->thresh_mult[THR_COMP_NEARESTLG] = 1500;
+ sf->thresh_mult[THR_COMP_NEARLG ] = 1500;
+ sf->thresh_mult[THR_COMP_ZEROLA ] = 1500;
+ sf->thresh_mult[THR_COMP_NEARESTLA] = 1500;
+ sf->thresh_mult[THR_COMP_NEARLA ] = 1500;
+ sf->thresh_mult[THR_COMP_ZEROGA ] = 1500;
+ sf->thresh_mult[THR_COMP_NEARESTGA] = 1500;
+ sf->thresh_mult[THR_COMP_NEARGA ] = 1500;
+
+ sf->thresh_mult[THR_COMP_NEWLG ] = 2000;
+ sf->thresh_mult[THR_COMP_NEWLA ] = 2000;
+ sf->thresh_mult[THR_COMP_NEWGA ] = 2000;
+#if CONFIG_COMP_INTERINTRA_PRED
+ sf->thresh_mult[THR_COMP_INTERINTRA_ZEROL ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTL] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARL ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEWL ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_ZEROG ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTG] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARG ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEWG ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_ZEROA ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTA] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARA ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEWA ] = 0;
+#endif
+ }
+
+ if (Speed > 2) {
+ cpi->mode_check_freq[THR_SPLITG] = 15;
+ cpi->mode_check_freq[THR_SPLITA] = 15;
+ cpi->mode_check_freq[THR_SPLITMV] = 7;
+
+ cpi->mode_check_freq[THR_COMP_SPLITGA] = 15;
+ cpi->mode_check_freq[THR_COMP_SPLITLG] = 15;
+ cpi->mode_check_freq[THR_COMP_SPLITLA] = 7;
+
+ sf->thresh_mult[THR_TM ] = 2000;
+ sf->thresh_mult[THR_V_PRED ] = 2000;
+ sf->thresh_mult[THR_H_PRED ] = 2000;
+ sf->thresh_mult[THR_D45_PRED ] = 2000;
+ sf->thresh_mult[THR_D135_PRED] = 2000;
+ sf->thresh_mult[THR_D117_PRED] = 2000;
+ sf->thresh_mult[THR_D153_PRED] = 2000;
+ sf->thresh_mult[THR_D27_PRED ] = 2000;
+ sf->thresh_mult[THR_D63_PRED ] = 2000;
+ sf->thresh_mult[THR_B_PRED ] = 7500;
+ sf->thresh_mult[THR_I8X8_PRED] = 7500;
+
+ if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
+ sf->thresh_mult[THR_NEWMV ] = 2000;
+#if CONFIG_PRED_FILTER
+ sf->thresh_mult[THR_NEWMV_FILT ] = 2000;
+#endif
+ sf->thresh_mult[THR_SPLITMV ] = 25000;
+ sf->thresh_mult[THR_COMP_SPLITLG ] = 50000;
+ }
+
+ if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {
+ sf->thresh_mult[THR_NEARESTG ] = 2000;
+ sf->thresh_mult[THR_ZEROG ] = 2000;
+ sf->thresh_mult[THR_NEARG ] = 2000;
+ sf->thresh_mult[THR_NEWG ] = 2500;
+#if CONFIG_PRED_FILTER
+ sf->thresh_mult[THR_NEARESTG_FILT ] = 2000;
+ sf->thresh_mult[THR_ZEROG_FILT ] = 2000;
+ sf->thresh_mult[THR_NEARG_FILT ] = 2000;
+ sf->thresh_mult[THR_NEWG_FILT ] = 2500;
+#endif
+ sf->thresh_mult[THR_SPLITG ] = 50000;
+ sf->thresh_mult[THR_COMP_SPLITGA ] = 50000;
+ }
+
+ if (cpi->ref_frame_flags & VP9_ALT_FLAG) {
+ sf->thresh_mult[THR_NEARESTA ] = 2000;
+ sf->thresh_mult[THR_ZEROA ] = 2000;
+ sf->thresh_mult[THR_NEARA ] = 2000;
+ sf->thresh_mult[THR_NEWA ] = 2500;
+#if CONFIG_PRED_FILTER
+ sf->thresh_mult[THR_NEARESTA_FILT ] = 2000;
+ sf->thresh_mult[THR_ZEROA_FILT ] = 2000;
+ sf->thresh_mult[THR_NEARA_FILT ] = 2000;
+ sf->thresh_mult[THR_NEWA_FILT ] = 2500;
+#endif
+ sf->thresh_mult[THR_SPLITA ] = 50000;
+ sf->thresh_mult[THR_COMP_SPLITLA ] = 25000;
+ }
+
+ sf->thresh_mult[THR_COMP_ZEROLG ] = 2000;
+ sf->thresh_mult[THR_COMP_NEARESTLG] = 2000;
+ sf->thresh_mult[THR_COMP_NEARLG ] = 2000;
+ sf->thresh_mult[THR_COMP_ZEROLA ] = 2000;
+ sf->thresh_mult[THR_COMP_NEARESTLA] = 2000;
+ sf->thresh_mult[THR_COMP_NEARLA ] = 2000;
+ sf->thresh_mult[THR_COMP_ZEROGA ] = 2000;
+ sf->thresh_mult[THR_COMP_NEARESTGA] = 2000;
+ sf->thresh_mult[THR_COMP_NEARGA ] = 2000;
+
+ sf->thresh_mult[THR_COMP_NEWLG ] = 2500;
+ sf->thresh_mult[THR_COMP_NEWLA ] = 2500;
+ sf->thresh_mult[THR_COMP_NEWGA ] = 2500;
+#if CONFIG_COMP_INTERINTRA_PRED
+ sf->thresh_mult[THR_COMP_INTERINTRA_ZEROL ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTL] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARL ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEWL ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_ZEROG ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTG] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARG ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEWG ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_ZEROA ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTA] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARA ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEWA ] = 0;
+#endif
+
+ sf->improved_dct = 0;
+
+ // Only do recode loop on key frames, golden frames and
+ // alt ref frames
+ sf->recode_loop = 2;
+
+ }
+
+ break;
+
+ }; /* switch */
+
+ /* disable frame modes if flags not set */
+ if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) {
+ sf->thresh_mult[THR_NEWMV ] = INT_MAX;
+ sf->thresh_mult[THR_NEARESTMV] = INT_MAX;
+ sf->thresh_mult[THR_ZEROMV ] = INT_MAX;
+ sf->thresh_mult[THR_NEARMV ] = INT_MAX;
+#if CONFIG_PRED_FILTER
+ sf->thresh_mult[THR_NEWMV_FILT ] = INT_MAX;
+ sf->thresh_mult[THR_NEARESTMV_FILT] = INT_MAX;
+ sf->thresh_mult[THR_ZEROMV_FILT ] = INT_MAX;
+ sf->thresh_mult[THR_NEARMV_FILT ] = INT_MAX;
+#endif
+ sf->thresh_mult[THR_SPLITMV ] = INT_MAX;
+ }
+
+ if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
+ sf->thresh_mult[THR_NEARESTG ] = INT_MAX;
+ sf->thresh_mult[THR_ZEROG ] = INT_MAX;
+ sf->thresh_mult[THR_NEARG ] = INT_MAX;
+ sf->thresh_mult[THR_NEWG ] = INT_MAX;
+#if CONFIG_PRED_FILTER
+ sf->thresh_mult[THR_NEARESTG_FILT ] = INT_MAX;
+ sf->thresh_mult[THR_ZEROG_FILT ] = INT_MAX;
+ sf->thresh_mult[THR_NEARG_FILT ] = INT_MAX;
+ sf->thresh_mult[THR_NEWG_FILT ] = INT_MAX;
+#endif
+#if CONFIG_COMP_INTERINTRA_PRED
+ sf->thresh_mult[THR_COMP_INTERINTRA_ZEROG ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTG] = INT_MAX;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARG ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEWG ] = INT_MAX;
+#endif
+ sf->thresh_mult[THR_SPLITG ] = INT_MAX;
+ }
+
+ if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) {
+ sf->thresh_mult[THR_NEARESTA ] = INT_MAX;
+ sf->thresh_mult[THR_ZEROA ] = INT_MAX;
+ sf->thresh_mult[THR_NEARA ] = INT_MAX;
+ sf->thresh_mult[THR_NEWA ] = INT_MAX;
+#if CONFIG_PRED_FILTER
+ sf->thresh_mult[THR_NEARESTA_FILT ] = INT_MAX;
+ sf->thresh_mult[THR_ZEROA_FILT ] = INT_MAX;
+ sf->thresh_mult[THR_NEARA_FILT ] = INT_MAX;
+ sf->thresh_mult[THR_NEWA_FILT ] = INT_MAX;
+#endif
+#if CONFIG_COMP_INTERINTRA_PRED
+ sf->thresh_mult[THR_COMP_INTERINTRA_ZEROA ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTA] = INT_MAX;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARA ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEWA ] = INT_MAX;
+#endif
+ sf->thresh_mult[THR_SPLITA ] = INT_MAX;
+ }
+
+ if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_GOLD_FLAG)) != (VP9_LAST_FLAG | VP9_GOLD_FLAG)) {
+ sf->thresh_mult[THR_COMP_ZEROLG ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_NEARESTLG] = INT_MAX;
+ sf->thresh_mult[THR_COMP_NEARLG ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_NEWLG ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_SPLITLG ] = INT_MAX;
+ }
+
+ if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) != (VP9_LAST_FLAG | VP9_ALT_FLAG)) {
+ sf->thresh_mult[THR_COMP_ZEROLA ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX;
+ sf->thresh_mult[THR_COMP_NEARLA ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_NEWLA ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_SPLITLA ] = INT_MAX;
+ }
+
+ if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) != (VP9_GOLD_FLAG | VP9_ALT_FLAG)) {
+ sf->thresh_mult[THR_COMP_ZEROGA ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX;
+ sf->thresh_mult[THR_COMP_NEARGA ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_NEWGA ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_SPLITGA ] = INT_MAX;
+ }
+#if CONFIG_COMP_INTERINTRA_PRED
+ if ((cpi->ref_frame_flags & VP9_LAST_FLAG) != VP9_LAST_FLAG) {
+ sf->thresh_mult[THR_COMP_INTERINTRA_ZEROL ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTL] = INT_MAX;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARL ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEWL ] = INT_MAX;
+ }
+#endif
+
+ // Slow quant, dct and trellis not worthwhile for first pass
+ // so make sure they are always turned off.
+ if (cpi->pass == 1) {
+ sf->optimize_coefficients = 0;
+ sf->improved_dct = 0;
+ }
+
+ if (cpi->sf.search_method == NSTEP) {
+ vp9_init3smotion_compensation(&cpi->mb,
+ cm->yv12_fb[cm->lst_fb_idx].y_stride);
+ } else if (cpi->sf.search_method == DIAMOND) {
+ vp9_init_dsmotion_compensation(&cpi->mb,
+ cm->yv12_fb[cm->lst_fb_idx].y_stride);
+ }
+
+ cpi->mb.vp9_short_fdct16x16 = vp9_short_fdct16x16;
+ cpi->mb.vp9_short_fdct8x8 = vp9_short_fdct8x8;
+ cpi->mb.vp9_short_fdct8x4 = vp9_short_fdct8x4;
+ cpi->mb.vp9_short_fdct4x4 = vp9_short_fdct4x4;
+ cpi->mb.short_walsh4x4 = vp9_short_walsh4x4;
+ cpi->mb.short_fhaar2x2 = vp9_short_fhaar2x2;
+
+#if CONFIG_LOSSLESS
+ if (cpi->oxcf.lossless) {
+ cpi->mb.vp9_short_fdct8x4 = vp9_short_walsh8x4_x8;
+ cpi->mb.vp9_short_fdct4x4 = vp9_short_walsh4x4_x8;
+ cpi->mb.short_walsh4x4 = vp9_short_walsh4x4;
+ cpi->mb.short_fhaar2x2 = vp9_short_fhaar2x2;
+ cpi->mb.short_walsh4x4 = vp9_short_walsh4x4_lossless;
+ }
+#endif
+
+ cpi->mb.quantize_b_4x4 = vp9_regular_quantize_b_4x4;
+ cpi->mb.quantize_b_4x4_pair = vp9_regular_quantize_b_4x4_pair;
+ cpi->mb.quantize_b_8x8 = vp9_regular_quantize_b_8x8;
+ cpi->mb.quantize_b_16x16 = vp9_regular_quantize_b_16x16;
+ cpi->mb.quantize_b_2x2 = vp9_regular_quantize_b_2x2;
+
+ vp9_init_quantizer(cpi);
+
+ if (cpi->sf.iterative_sub_pixel == 1) {
+ cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_step_iteratively;
+ } else if (cpi->sf.quarter_pixel_search) {
+ cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_step;
+ } else if (cpi->sf.half_pixel_search) {
+ cpi->find_fractional_mv_step = vp9_find_best_half_pixel_step;
+ }
+
+ if (cpi->sf.optimize_coefficients == 1 && cpi->pass != 1)
+ cpi->mb.optimize = 1;
+ else
+ cpi->mb.optimize = 0;
+
+#ifdef SPEEDSTATS
+ frames_at_speed[cpi->Speed]++;
+#endif
+}
+static void alloc_raw_frame_buffers(VP9_COMP *cpi) {
+ int width = (cpi->oxcf.Width + 15) & ~15;
+ int height = (cpi->oxcf.Height + 15) & ~15;
+
+ cpi->lookahead = vp9_lookahead_init(cpi->oxcf.Width, cpi->oxcf.Height,
+ cpi->oxcf.lag_in_frames);
+ if (!cpi->lookahead)
+ vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate lag buffers");
+
+#if VP9_TEMPORAL_ALT_REF
+
+ if (vp8_yv12_alloc_frame_buffer(&cpi->alt_ref_buffer,
+ width, height, VP9BORDERINPIXELS))
+ vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate altref buffer");
+
+#endif
+}
+
+static int alloc_partition_data(VP9_COMP *cpi) {
+ vpx_free(cpi->mb.pip);
+
+ cpi->mb.pip = vpx_calloc((cpi->common.mb_cols + 1) *
+ (cpi->common.mb_rows + 1),
+ sizeof(PARTITION_INFO));
+ if (!cpi->mb.pip)
+ return 1;
+
+ cpi->mb.pi = cpi->mb.pip + cpi->common.mode_info_stride + 1;
+
+ return 0;
+}
+
+void vp9_alloc_compressor_data(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+
+ int width = cm->Width;
+ int height = cm->Height;
+
+ if (vp9_alloc_frame_buffers(cm, width, height))
+ vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate frame buffers");
+
+ if (alloc_partition_data(cpi))
+ vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate partition data");
+
+
+ if ((width & 0xf) != 0)
+ width += 16 - (width & 0xf);
+
+ if ((height & 0xf) != 0)
+ height += 16 - (height & 0xf);
+
+
+ if (vp8_yv12_alloc_frame_buffer(&cpi->last_frame_uf,
+ width, height, VP9BORDERINPIXELS))
+ vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate last frame buffer");
+
+ if (vp8_yv12_alloc_frame_buffer(&cpi->scaled_source,
+ width, height, VP9BORDERINPIXELS))
+ vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate scaled source buffer");
+
+
+ vpx_free(cpi->tok);
+
+ {
+ unsigned int tokens = cm->mb_rows * cm->mb_cols * 24 * 16;
+
+ CHECK_MEM_ERROR(cpi->tok, vpx_calloc(tokens, sizeof(*cpi->tok)));
+ }
+
+ // Data used for real time vc mode to see if gf needs refreshing
+ cpi->inter_zz_count = 0;
+ cpi->gf_bad_count = 0;
+ cpi->gf_update_recommended = 0;
+
+
+ // Structures used to minitor GF usage
+ vpx_free(cpi->gf_active_flags);
+ CHECK_MEM_ERROR(cpi->gf_active_flags,
+ vpx_calloc(1, cm->mb_rows * cm->mb_cols));
+ cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
+
+ vpx_free(cpi->mb_activity_map);
+ CHECK_MEM_ERROR(cpi->mb_activity_map,
+ vpx_calloc(sizeof(unsigned int),
+ cm->mb_rows * cm->mb_cols));
+
+ vpx_free(cpi->mb_norm_activity_map);
+ CHECK_MEM_ERROR(cpi->mb_norm_activity_map,
+ vpx_calloc(sizeof(unsigned int),
+ cm->mb_rows * cm->mb_cols));
+
+ vpx_free(cpi->twopass.total_stats);
+
+ cpi->twopass.total_stats = vpx_calloc(1, sizeof(FIRSTPASS_STATS));
+
+ vpx_free(cpi->twopass.total_left_stats);
+ cpi->twopass.total_left_stats = vpx_calloc(1, sizeof(FIRSTPASS_STATS));
+
+ vpx_free(cpi->twopass.this_frame_stats);
+
+ cpi->twopass.this_frame_stats = vpx_calloc(1, sizeof(FIRSTPASS_STATS));
+
+ if (!cpi->twopass.total_stats ||
+ !cpi->twopass.total_left_stats ||
+ !cpi->twopass.this_frame_stats)
+ vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate firstpass stats");
+
+ vpx_free(cpi->tplist);
+
+ CHECK_MEM_ERROR(cpi->tplist,
+ vpx_malloc(sizeof(TOKENLIST) * (cpi->common.mb_rows)));
+}
+
+
+// TODO perhaps change number of steps expose to outside world when setting
+// max and min limits. Also this will likely want refining for the extended Q
+// range.
+//
+// Table that converts 0-63 Q range values passed in outside to the Qindex
+// range used internally.
+static const int q_trans[] = {
+ 0, 4, 8, 12, 16, 20, 24, 28,
+ 32, 36, 40, 44, 48, 52, 56, 60,
+ 64, 68, 72, 76, 80, 84, 88, 92,
+ 96, 100, 104, 108, 112, 116, 120, 124,
+ 128, 132, 136, 140, 144, 148, 152, 156,
+ 160, 164, 168, 172, 176, 180, 184, 188,
+ 192, 196, 200, 204, 208, 212, 216, 220,
+ 224, 228, 232, 236, 240, 244, 249, 255,
+};
+
+int vp9_reverse_trans(int x) {
+ int i;
+
+ for (i = 0; i < 64; i++)
+ if (q_trans[i] >= x)
+ return i;
+
+ return 63;
+};
+void vp9_new_frame_rate(VP9_COMP *cpi, double framerate) {
+ if (framerate < .1)
+ framerate = 30;
+
+ cpi->oxcf.frame_rate = framerate;
+ cpi->output_frame_rate = cpi->oxcf.frame_rate;
+ cpi->per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth / cpi->output_frame_rate);
+ cpi->av_per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth / cpi->output_frame_rate);
+ cpi->min_frame_bandwidth = (int)(cpi->av_per_frame_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100);
+
+ if (cpi->min_frame_bandwidth < FRAME_OVERHEAD_BITS)
+ cpi->min_frame_bandwidth = FRAME_OVERHEAD_BITS;
+
+ // Set Maximum gf/arf interval
+ cpi->max_gf_interval = ((int)(cpi->output_frame_rate / 2.0) + 2);
+
+ if (cpi->max_gf_interval < 12)
+ cpi->max_gf_interval = 12;
+
+ // Extended interval for genuinely static scenes
+ cpi->twopass.static_scene_max_gf_interval = cpi->key_frame_frequency >> 1;
+
+ // Special conditions when altr ref frame enabled in lagged compress mode
+ if (cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames) {
+ if (cpi->max_gf_interval > cpi->oxcf.lag_in_frames - 1)
+ cpi->max_gf_interval = cpi->oxcf.lag_in_frames - 1;
+
+ if (cpi->twopass.static_scene_max_gf_interval > cpi->oxcf.lag_in_frames - 1)
+ cpi->twopass.static_scene_max_gf_interval = cpi->oxcf.lag_in_frames - 1;
+ }
+
+ if (cpi->max_gf_interval > cpi->twopass.static_scene_max_gf_interval)
+ cpi->max_gf_interval = cpi->twopass.static_scene_max_gf_interval;
+}
+
+
+static int
+rescale(int val, int num, int denom) {
+ int64_t llnum = num;
+ int64_t llden = denom;
+ int64_t llval = val;
+
+ return (int)(llval * llnum / llden);
+}
+
+
+static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
+ VP9_COMP *cpi = (VP9_COMP *)(ptr);
+ VP9_COMMON *cm = &cpi->common;
+
+ cpi->oxcf = *oxcf;
+
+ cpi->goldfreq = 7;
+
+ cm->version = oxcf->Version;
+ vp9_setup_version(cm);
+
+ // change includes all joint functionality
+ vp9_change_config(ptr, oxcf);
+
+ // Initialize active best and worst q and average q values.
+ cpi->active_worst_quality = cpi->oxcf.worst_allowed_q;
+ cpi->active_best_quality = cpi->oxcf.best_allowed_q;
+ cpi->avg_frame_qindex = cpi->oxcf.worst_allowed_q;
+
+ // Initialise the starting buffer levels
+ cpi->buffer_level = cpi->oxcf.starting_buffer_level;
+ cpi->bits_off_target = cpi->oxcf.starting_buffer_level;
+
+ cpi->rolling_target_bits = cpi->av_per_frame_bandwidth;
+ cpi->rolling_actual_bits = cpi->av_per_frame_bandwidth;
+ cpi->long_rolling_target_bits = cpi->av_per_frame_bandwidth;
+ cpi->long_rolling_actual_bits = cpi->av_per_frame_bandwidth;
+
+ cpi->total_actual_bits = 0;
+ cpi->total_target_vs_actual = 0;
+
+ cpi->static_mb_pct = 0;
+
+#if VP9_TEMPORAL_ALT_REF
+ {
+ int i;
+
+ cpi->fixed_divide[0] = 0;
+
+ for (i = 1; i < 512; i++)
+ cpi->fixed_divide[i] = 0x80000 / i;
+ }
+#endif
+}
+
+
+void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
+ VP9_COMP *cpi = (VP9_COMP *)(ptr);
+ VP9_COMMON *cm = &cpi->common;
+
+ if (!cpi)
+ return;
+
+ if (!oxcf)
+ return;
+
+ if (cm->version != oxcf->Version) {
+ cm->version = oxcf->Version;
+ vp9_setup_version(cm);
+ }
+
+ cpi->oxcf = *oxcf;
+
+ switch (cpi->oxcf.Mode) {
+ // Real time and one pass deprecated in test code base
+ case MODE_FIRSTPASS:
+ cpi->pass = 1;
+ cpi->compressor_speed = 1;
+ break;
+
+ case MODE_SECONDPASS:
+ cpi->pass = 2;
+ cpi->compressor_speed = 1;
+
+ if (cpi->oxcf.cpu_used < -5) {
+ cpi->oxcf.cpu_used = -5;
+ }
+
+ if (cpi->oxcf.cpu_used > 5)
+ cpi->oxcf.cpu_used = 5;
+
+ break;
+
+ case MODE_SECONDPASS_BEST:
+ cpi->pass = 2;
+ cpi->compressor_speed = 0;
+ break;
+ }
+
+ cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q];
+ cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q];
+ cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level];
+
+ cpi->mb.e_mbd.inv_xform4x4_1_x8 = vp9_short_idct4x4llm_1;
+ cpi->mb.e_mbd.inv_xform4x4_x8 = vp9_short_idct4x4llm;
+ cpi->mb.e_mbd.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1;
+ cpi->mb.e_mbd.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;
+
+#if CONFIG_LOSSLESS
+ cpi->oxcf.lossless = oxcf->lossless;
+ if (cpi->oxcf.lossless) {
+ cpi->mb.e_mbd.inv_xform4x4_1_x8 = vp9_short_inv_walsh4x4_1_x8;
+ cpi->mb.e_mbd.inv_xform4x4_x8 = vp9_short_inv_walsh4x4_x8;
+ cpi->mb.e_mbd.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1_lossless;
+ cpi->mb.e_mbd.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4_lossless;
+ }
+#endif
+
+ cpi->baseline_gf_interval = DEFAULT_GF_INTERVAL;
+
+ cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
+
+ // cpi->use_golden_frame_only = 0;
+ // cpi->use_last_frame_only = 0;
+ cm->refresh_golden_frame = 0;
+ cm->refresh_last_frame = 1;
+ cm->refresh_entropy_probs = 1;
+
+ setup_features(cpi);
+ cpi->mb.e_mbd.allow_high_precision_mv = 0; // Default mv precision adaptation
+ set_mvcost(&cpi->mb);
+
+ {
+ int i;
+
+ for (i = 0; i < MAX_MB_SEGMENTS; i++)
+ cpi->segment_encode_breakout[i] = cpi->oxcf.encode_breakout;
+ }
+
+ // At the moment the first order values may not be > MAXQ
+ if (cpi->oxcf.fixed_q > MAXQ)
+ cpi->oxcf.fixed_q = MAXQ;
+
+ // local file playback mode == really big buffer
+ if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK) {
+ cpi->oxcf.starting_buffer_level = 60000;
+ cpi->oxcf.optimal_buffer_level = 60000;
+ cpi->oxcf.maximum_buffer_size = 240000;
+ }
+
+ // Convert target bandwidth from Kbit/s to Bit/s
+ cpi->oxcf.target_bandwidth *= 1000;
+
+ cpi->oxcf.starting_buffer_level =
+ rescale(cpi->oxcf.starting_buffer_level,
+ cpi->oxcf.target_bandwidth, 1000);
+
+ // Set or reset optimal and maximum buffer levels.
+ if (cpi->oxcf.optimal_buffer_level == 0)
+ cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8;
+ else
+ cpi->oxcf.optimal_buffer_level =
+ rescale(cpi->oxcf.optimal_buffer_level,
+ cpi->oxcf.target_bandwidth, 1000);
+
+ if (cpi->oxcf.maximum_buffer_size == 0)
+ cpi->oxcf.maximum_buffer_size = cpi->oxcf.target_bandwidth / 8;
+ else
+ cpi->oxcf.maximum_buffer_size =
+ rescale(cpi->oxcf.maximum_buffer_size,
+ cpi->oxcf.target_bandwidth, 1000);
+
+ // Set up frame rate and related parameters rate control values.
+ vp9_new_frame_rate(cpi, cpi->oxcf.frame_rate);
+
+ // Set absolute upper and lower quality limits
+ cpi->worst_quality = cpi->oxcf.worst_allowed_q;
+ cpi->best_quality = cpi->oxcf.best_allowed_q;
+
+ // active values should only be modified if out of new range
+ if (cpi->active_worst_quality > cpi->oxcf.worst_allowed_q) {
+ cpi->active_worst_quality = cpi->oxcf.worst_allowed_q;
+ }
+ // less likely
+ else if (cpi->active_worst_quality < cpi->oxcf.best_allowed_q) {
+ cpi->active_worst_quality = cpi->oxcf.best_allowed_q;
+ }
+ if (cpi->active_best_quality < cpi->oxcf.best_allowed_q) {
+ cpi->active_best_quality = cpi->oxcf.best_allowed_q;
+ }
+ // less likely
+ else if (cpi->active_best_quality > cpi->oxcf.worst_allowed_q) {
+ cpi->active_best_quality = cpi->oxcf.worst_allowed_q;
+ }
+
+ cpi->buffered_mode = (cpi->oxcf.optimal_buffer_level > 0) ? TRUE : FALSE;
+
+ cpi->cq_target_quality = cpi->oxcf.cq_level;
+
+ if (!cm->use_bilinear_mc_filter)
+ cm->mcomp_filter_type = DEFAULT_INTERP_FILTER;
+ else
+ cm->mcomp_filter_type = BILINEAR;
+
+ cpi->target_bandwidth = cpi->oxcf.target_bandwidth;
+
+ cm->Width = cpi->oxcf.Width;
+ cm->Height = cpi->oxcf.Height;
+
+ cm->horiz_scale = cpi->horiz_scale;
+ cm->vert_scale = cpi->vert_scale;
+
+ // VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs)
+ if (cpi->oxcf.Sharpness > 7)
+ cpi->oxcf.Sharpness = 7;
+
+ cm->sharpness_level = cpi->oxcf.Sharpness;
+
+ if (cm->horiz_scale != NORMAL || cm->vert_scale != NORMAL) {
+ int UNINITIALIZED_IS_SAFE(hr), UNINITIALIZED_IS_SAFE(hs);
+ int UNINITIALIZED_IS_SAFE(vr), UNINITIALIZED_IS_SAFE(vs);
+
+ Scale2Ratio(cm->horiz_scale, &hr, &hs);
+ Scale2Ratio(cm->vert_scale, &vr, &vs);
+
+ // always go to the next whole number
+ cm->Width = (hs - 1 + cpi->oxcf.Width * hr) / hs;
+ cm->Height = (vs - 1 + cpi->oxcf.Height * vr) / vs;
+ }
+
+ if (((cm->Width + 15) & 0xfffffff0) !=
+ cm->yv12_fb[cm->lst_fb_idx].y_width ||
+ ((cm->Height + 15) & 0xfffffff0) !=
+ cm->yv12_fb[cm->lst_fb_idx].y_height ||
+ cm->yv12_fb[cm->lst_fb_idx].y_width == 0) {
+ alloc_raw_frame_buffers(cpi);
+ vp9_alloc_compressor_data(cpi);
+ }
+
+ if (cpi->oxcf.fixed_q >= 0) {
+ cpi->last_q[0] = cpi->oxcf.fixed_q;
+ cpi->last_q[1] = cpi->oxcf.fixed_q;
+ cpi->last_boosted_qindex = cpi->oxcf.fixed_q;
+ }
+
+ cpi->Speed = cpi->oxcf.cpu_used;
+
+ // force to allowlag to 0 if lag_in_frames is 0;
+ if (cpi->oxcf.lag_in_frames == 0) {
+ cpi->oxcf.allow_lag = 0;
+ }
+ // Limit on lag buffers as these are not currently dynamically allocated
+ else if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS)
+ cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS;
+
+ // YX Temp
+ cpi->alt_ref_source = NULL;
+ cpi->is_src_frame_alt_ref = 0;
+
+#if 0
+ // Experimental RD Code
+ cpi->frame_distortion = 0;
+ cpi->last_frame_distortion = 0;
+#endif
+
+}
+
+#define M_LOG2_E 0.693147180559945309417
+#define log2f(x) (log (x) / (float) M_LOG2_E)
+
+static void cal_nmvjointsadcost(int *mvjointsadcost) {
+ mvjointsadcost[0] = 600;
+ mvjointsadcost[1] = 300;
+ mvjointsadcost[2] = 300;
+ mvjointsadcost[0] = 300;
+}
+
+static void cal_nmvsadcosts(int *mvsadcost[2]) {
+ int i = 1;
+
+ mvsadcost [0] [0] = 0;
+ mvsadcost [1] [0] = 0;
+
+ do {
+ double z = 256 * (2 * (log2f(8 * i) + .6));
+ mvsadcost [0][i] = (int) z;
+ mvsadcost [1][i] = (int) z;
+ mvsadcost [0][-i] = (int) z;
+ mvsadcost [1][-i] = (int) z;
+ } while (++i <= MV_MAX);
+}
+
+static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
+ int i = 1;
+
+ mvsadcost [0] [0] = 0;
+ mvsadcost [1] [0] = 0;
+
+ do {
+ double z = 256 * (2 * (log2f(8 * i) + .6));
+ mvsadcost [0][i] = (int) z;
+ mvsadcost [1][i] = (int) z;
+ mvsadcost [0][-i] = (int) z;
+ mvsadcost [1][-i] = (int) z;
+ } while (++i <= MV_MAX);
+}
+
+VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
+ int i;
+ volatile union {
+ VP9_COMP *cpi;
+ VP9_PTR ptr;
+ } ctx;
+
+ VP9_COMP *cpi;
+ VP9_COMMON *cm;
+
+ cpi = ctx.cpi = vpx_memalign(32, sizeof(VP9_COMP));
+ // Check that the CPI instance is valid
+ if (!cpi)
+ return 0;
+
+ cm = &cpi->common;
+
+ vpx_memset(cpi, 0, sizeof(VP9_COMP));
+
+ if (setjmp(cm->error.jmp)) {
+ VP9_PTR ptr = ctx.ptr;
+
+ ctx.cpi->common.error.setjmp = 0;
+ vp9_remove_compressor(&ptr);
+ return 0;
+ }
+
+ cpi->common.error.setjmp = 1;
+
+ CHECK_MEM_ERROR(cpi->mb.ss, vpx_calloc(sizeof(search_site), (MAX_MVSEARCH_STEPS * 8) + 1));
+
+ vp9_create_common(&cpi->common);
+
+ init_config((VP9_PTR)cpi, oxcf);
+
+ memcpy(cpi->base_skip_false_prob, base_skip_false_prob, sizeof(base_skip_false_prob));
+ cpi->common.current_video_frame = 0;
+ cpi->kf_overspend_bits = 0;
+ cpi->kf_bitrate_adjustment = 0;
+ cpi->frames_till_gf_update_due = 0;
+ cpi->gf_overspend_bits = 0;
+ cpi->non_gf_bitrate_adjustment = 0;
+ cm->prob_last_coded = 128;
+ cm->prob_gf_coded = 128;
+ cm->prob_intra_coded = 63;
+#if CONFIG_SUPERBLOCKS
+ cm->sb_coded = 200;
+#endif
+ for (i = 0; i < COMP_PRED_CONTEXTS; i++)
+ cm->prob_comppred[i] = 128;
+ for (i = 0; i < TX_SIZE_MAX - 1; i++)
+ cm->prob_tx[i] = 128;
+
+ // Prime the recent reference frame useage counters.
+ // Hereafter they will be maintained as a sort of moving average
+ cpi->recent_ref_frame_usage[INTRA_FRAME] = 1;
+ cpi->recent_ref_frame_usage[LAST_FRAME] = 1;
+ cpi->recent_ref_frame_usage[GOLDEN_FRAME] = 1;
+ cpi->recent_ref_frame_usage[ALTREF_FRAME] = 1;
+
+ // Set reference frame sign bias for ALTREF frame to 1 (for now)
+ cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 1;
+
+ cpi->baseline_gf_interval = DEFAULT_GF_INTERVAL;
+
+ cpi->gold_is_last = 0;
+ cpi->alt_is_last = 0;
+ cpi->gold_is_alt = 0;
+
+ // allocate memory for storing last frame's MVs for MV prediction.
+ CHECK_MEM_ERROR(cpi->lfmv, vpx_calloc((cpi->common.mb_rows + 2) * (cpi->common.mb_cols + 2), sizeof(int_mv)));
+ CHECK_MEM_ERROR(cpi->lf_ref_frame_sign_bias, vpx_calloc((cpi->common.mb_rows + 2) * (cpi->common.mb_cols + 2), sizeof(int)));
+ CHECK_MEM_ERROR(cpi->lf_ref_frame, vpx_calloc((cpi->common.mb_rows + 2) * (cpi->common.mb_cols + 2), sizeof(int)));
+
+ // Create the encoder segmentation map and set all entries to 0
+ CHECK_MEM_ERROR(cpi->segmentation_map, vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1));
+
+ // And a copy in common for temporal coding
+ CHECK_MEM_ERROR(cm->last_frame_seg_map,
+ vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1));
+
+ // And a place holder structure is the coding context
+ // for use if we want to save and restore it
+ CHECK_MEM_ERROR(cpi->coding_context.last_frame_seg_map_copy,
+ vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1));
+
+ CHECK_MEM_ERROR(cpi->active_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1));
+ vpx_memset(cpi->active_map, 1, (cpi->common.mb_rows * cpi->common.mb_cols));
+ cpi->active_map_enabled = 0;
+
+ for (i = 0; i < (sizeof(cpi->mbgraph_stats) /
+ sizeof(cpi->mbgraph_stats[0])); i++) {
+ CHECK_MEM_ERROR(cpi->mbgraph_stats[i].mb_stats,
+ vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols *
+ sizeof(*cpi->mbgraph_stats[i].mb_stats),
+ 1));
+ }
+
+#ifdef ENTROPY_STATS
+ if (cpi->pass != 1)
+ init_context_counters();
+#endif
+#ifdef MODE_STATS
+ vp9_zero(y_modes);
+ vp9_zero(i8x8_modes);
+ vp9_zero(uv_modes);
+ vp9_zero(uv_modes_y);
+ vp9_zero(b_modes);
+ vp9_zero(inter_y_modes);
+ vp9_zero(inter_uv_modes);
+ vp9_zero(inter_b_modes);
+#endif
+#ifdef NMV_STATS
+ init_nmvstats();
+#endif
+
+ /*Initialize the feed-forward activity masking.*/
+ cpi->activity_avg = 90 << 12;
+
+ cpi->frames_since_key = 8; // Give a sensible default for the first frame.
+ cpi->key_frame_frequency = cpi->oxcf.key_freq;
+ cpi->this_key_frame_forced = FALSE;
+ cpi->next_key_frame_forced = FALSE;
+
+ cpi->source_alt_ref_pending = FALSE;
+ cpi->source_alt_ref_active = FALSE;
+ cpi->common.refresh_alt_ref_frame = 0;
+
+ cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
+#if CONFIG_INTERNAL_STATS
+ cpi->b_calculate_ssimg = 0;
+
+ cpi->count = 0;
+ cpi->bytes = 0;
+
+ if (cpi->b_calculate_psnr) {
+ cpi->total_sq_error = 0.0;
+ cpi->total_sq_error2 = 0.0;
+ cpi->total_y = 0.0;
+ cpi->total_u = 0.0;
+ cpi->total_v = 0.0;
+ cpi->total = 0.0;
+ cpi->totalp_y = 0.0;
+ cpi->totalp_u = 0.0;
+ cpi->totalp_v = 0.0;
+ cpi->totalp = 0.0;
+ cpi->tot_recode_hits = 0;
+ cpi->summed_quality = 0;
+ cpi->summed_weights = 0;
+ }
+
+ if (cpi->b_calculate_ssimg) {
+ cpi->total_ssimg_y = 0;
+ cpi->total_ssimg_u = 0;
+ cpi->total_ssimg_v = 0;
+ cpi->total_ssimg_all = 0;
+ }
+
+#endif
+
+#ifndef LLONG_MAX
+#define LLONG_MAX 9223372036854775807LL
+#endif
+ cpi->first_time_stamp_ever = LLONG_MAX;
+
+ cpi->frames_till_gf_update_due = 0;
+ cpi->key_frame_count = 1;
+
+ cpi->ni_av_qi = cpi->oxcf.worst_allowed_q;
+ cpi->ni_tot_qi = 0;
+ cpi->ni_frames = 0;
+ cpi->tot_q = 0.0;
+ cpi->avg_q = vp9_convert_qindex_to_q(cpi->oxcf.worst_allowed_q);
+ cpi->total_byte_count = 0;
+
+ cpi->rate_correction_factor = 1.0;
+ cpi->key_frame_rate_correction_factor = 1.0;
+ cpi->gf_rate_correction_factor = 1.0;
+ cpi->twopass.est_max_qcorrection_factor = 1.0;
+
+ cal_nmvjointsadcost(cpi->mb.nmvjointsadcost);
+ cpi->mb.nmvcost[0] = &cpi->mb.nmvcosts[0][MV_MAX];
+ cpi->mb.nmvcost[1] = &cpi->mb.nmvcosts[1][MV_MAX];
+ cpi->mb.nmvsadcost[0] = &cpi->mb.nmvsadcosts[0][MV_MAX];
+ cpi->mb.nmvsadcost[1] = &cpi->mb.nmvsadcosts[1][MV_MAX];
+ cal_nmvsadcosts(cpi->mb.nmvsadcost);
+
+ cpi->mb.nmvcost_hp[0] = &cpi->mb.nmvcosts_hp[0][MV_MAX];
+ cpi->mb.nmvcost_hp[1] = &cpi->mb.nmvcosts_hp[1][MV_MAX];
+ cpi->mb.nmvsadcost_hp[0] = &cpi->mb.nmvsadcosts_hp[0][MV_MAX];
+ cpi->mb.nmvsadcost_hp[1] = &cpi->mb.nmvsadcosts_hp[1][MV_MAX];
+ cal_nmvsadcosts_hp(cpi->mb.nmvsadcost_hp);
+
+ for (i = 0; i < KEY_FRAME_CONTEXT; i++) {
+ cpi->prior_key_frame_distance[i] = (int)cpi->output_frame_rate;
+ }
+
+#ifdef OUTPUT_YUV_SRC
+ yuv_file = fopen("bd.yuv", "ab");
+#endif
+#ifdef OUTPUT_YUV_REC
+ yuv_rec_file = fopen("rec.yuv", "wb");
+#endif
+
+#if 0
+ framepsnr = fopen("framepsnr.stt", "a");
+ kf_list = fopen("kf_list.stt", "w");
+#endif
+
+ cpi->output_pkt_list = oxcf->output_pkt_list;
+
+ if (cpi->pass == 1) {
+ vp9_init_first_pass(cpi);
+ } else if (cpi->pass == 2) {
+ size_t packet_sz = sizeof(FIRSTPASS_STATS);
+ int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz);
+
+ cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;
+ cpi->twopass.stats_in = cpi->twopass.stats_in_start;
+ cpi->twopass.stats_in_end = (void *)((char *)cpi->twopass.stats_in
+ + (packets - 1) * packet_sz);
+ vp9_init_second_pass(cpi);
+ }
+
+ vp9_set_speed_features(cpi);
+
+ // Set starting values of RD threshold multipliers (128 = *1)
+ for (i = 0; i < MAX_MODES; i++) {
+ cpi->rd_thresh_mult[i] = 128;
+ }
+
+#ifdef ENTROPY_STATS
+ init_mv_ref_counts();
+#endif
+
+#define BFP(BT, SDF, VF, SVF, SVFHH, SVFHV, SVFHHV, SDX3F, SDX8F, SDX4DF) \
+ cpi->fn_ptr[BT].sdf = SDF; \
+ cpi->fn_ptr[BT].vf = VF; \
+ cpi->fn_ptr[BT].svf = SVF; \
+ cpi->fn_ptr[BT].svf_halfpix_h = SVFHH; \
+ cpi->fn_ptr[BT].svf_halfpix_v = SVFHV; \
+ cpi->fn_ptr[BT].svf_halfpix_hv = SVFHHV; \
+ cpi->fn_ptr[BT].sdx3f = SDX3F; \
+ cpi->fn_ptr[BT].sdx8f = SDX8F; \
+ cpi->fn_ptr[BT].sdx4df = SDX4DF;
+
+
+#if CONFIG_SUPERBLOCKS
+ BFP(BLOCK_32X32, vp9_sad32x32, vp9_variance32x32, vp9_sub_pixel_variance32x32,
+ vp9_variance_halfpixvar32x32_h, vp9_variance_halfpixvar32x32_v,
+ vp9_variance_halfpixvar32x32_hv, vp9_sad32x32x3, vp9_sad32x32x8,
+ vp9_sad32x32x4d)
+#endif
+
+ BFP(BLOCK_16X16, vp9_sad16x16, vp9_variance16x16, vp9_sub_pixel_variance16x16,
+ vp9_variance_halfpixvar16x16_h, vp9_variance_halfpixvar16x16_v,
+ vp9_variance_halfpixvar16x16_hv, vp9_sad16x16x3, vp9_sad16x16x8,
+ vp9_sad16x16x4d)
+
+ BFP(BLOCK_16X8, vp9_sad16x8, vp9_variance16x8, vp9_sub_pixel_variance16x8,
+ NULL, NULL, NULL, vp9_sad16x8x3, vp9_sad16x8x8, vp9_sad16x8x4d)
+
+ BFP(BLOCK_8X16, vp9_sad8x16, vp9_variance8x16, vp9_sub_pixel_variance8x16,
+ NULL, NULL, NULL, vp9_sad8x16x3, vp9_sad8x16x8, vp9_sad8x16x4d)
+
+ BFP(BLOCK_8X8, vp9_sad8x8, vp9_variance8x8, vp9_sub_pixel_variance8x8,
+ NULL, NULL, NULL, vp9_sad8x8x3, vp9_sad8x8x8, vp9_sad8x8x4d)
+
+ BFP(BLOCK_4X4, vp9_sad4x4, vp9_variance4x4, vp9_sub_pixel_variance4x4,
+ NULL, NULL, NULL, vp9_sad4x4x3, vp9_sad4x4x8, vp9_sad4x4x4d)
+
+#if ARCH_X86 || ARCH_X86_64
+ cpi->fn_ptr[BLOCK_16X16].copymem = vp9_copy32xn;
+ cpi->fn_ptr[BLOCK_16X8].copymem = vp9_copy32xn;
+ cpi->fn_ptr[BLOCK_8X16].copymem = vp9_copy32xn;
+ cpi->fn_ptr[BLOCK_8X8].copymem = vp9_copy32xn;
+ cpi->fn_ptr[BLOCK_4X4].copymem = vp9_copy32xn;
+#endif
+
+ cpi->full_search_sad = vp9_full_search_sad;
+ cpi->diamond_search_sad = vp9_diamond_search_sad;
+ cpi->refining_search_sad = vp9_refining_search_sad;
+
+ // make sure frame 1 is okay
+ cpi->error_bins[0] = cpi->common.MBs;
+
+ /* vp9_init_quantizer() is first called here. Add check in
+ * vp9_frame_init_quantizer() so that vp9_init_quantizer is only
+ * called later when needed. This will avoid unnecessary calls of
+ * vp9_init_quantizer() for every frame.
+ */
+ vp9_init_quantizer(cpi);
+
+ vp9_loop_filter_init(cm);
+
+ cpi->common.error.setjmp = 0;
+
+ vp9_zero(cpi->y_uv_mode_count)
+
+ return (VP9_PTR) cpi;
+}
+
+void vp9_remove_compressor(VP9_PTR *ptr) {
+ VP9_COMP *cpi = (VP9_COMP *)(*ptr);
+ int i;
+
+ if (!cpi)
+ return;
+
+ if (cpi && (cpi->common.current_video_frame > 0)) {
+ if (cpi->pass == 2) {
+ vp9_end_second_pass(cpi);
+ }
+
+#ifdef ENTROPY_STATS
+ if (cpi->pass != 1) {
+ print_context_counters();
+ print_tree_update_probs();
+ print_mode_context();
+ }
+#endif
+#ifdef NMV_STATS
+ if (cpi->pass != 1)
+ print_nmvstats();
+#endif
+
+#if CONFIG_INTERNAL_STATS
+
+ vp9_clear_system_state();
+
+ // printf("\n8x8-4x4:%d-%d\n", cpi->t8x8_count, cpi->t4x4_count);
+ if (cpi->pass != 1) {
+ FILE *f = fopen("opsnr.stt", "a");
+ double time_encoded = (cpi->last_end_time_stamp_seen
+ - cpi->first_time_stamp_ever) / 10000000.000;
+ double total_encode_time = (cpi->time_receive_data + cpi->time_compress_data) / 1000.000;
+ double dr = (double)cpi->bytes * (double) 8 / (double)1000 / time_encoded;
+#if defined(MODE_STATS)
+ print_mode_contexts(&cpi->common);
+#endif
+ if (cpi->b_calculate_psnr) {
+ YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx];
+ double samples = 3.0 / 2 * cpi->count * lst_yv12->y_width * lst_yv12->y_height;
+ double total_psnr = vp9_mse2psnr(samples, 255.0, cpi->total_sq_error);
+ double total_psnr2 = vp9_mse2psnr(samples, 255.0, cpi->total_sq_error2);
+ double total_ssim = 100 * pow(cpi->summed_quality / cpi->summed_weights, 8.0);
+
+ fprintf(f, "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\tVPXSSIM\t Time(ms)\n");
+ fprintf(f, "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%8.0f\n",
+ dr, cpi->total / cpi->count, total_psnr, cpi->totalp / cpi->count, total_psnr2, total_ssim,
+ total_encode_time);
+// fprintf(f, "%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%8.0f %10ld\n",
+// dr, cpi->total / cpi->count, total_psnr, cpi->totalp / cpi->count, total_psnr2, total_ssim,
+// total_encode_time, cpi->tot_recode_hits);
+ }
+
+ if (cpi->b_calculate_ssimg) {
+ fprintf(f, "BitRate\tSSIM_Y\tSSIM_U\tSSIM_V\tSSIM_A\t Time(ms)\n");
+ fprintf(f, "%7.2f\t%6.4f\t%6.4f\t%6.4f\t%6.4f\t%8.0f\n", dr,
+ cpi->total_ssimg_y / cpi->count, cpi->total_ssimg_u / cpi->count,
+ cpi->total_ssimg_v / cpi->count, cpi->total_ssimg_all / cpi->count, total_encode_time);
+// fprintf(f, "%7.3f\t%6.4f\t%6.4f\t%6.4f\t%6.4f\t%8.0f %10ld\n", dr,
+// cpi->total_ssimg_y / cpi->count, cpi->total_ssimg_u / cpi->count,
+// cpi->total_ssimg_v / cpi->count, cpi->total_ssimg_all / cpi->count, total_encode_time, cpi->tot_recode_hits);
+ }
+
+ fclose(f);
+ }
+
+#endif
+
+
+#ifdef MODE_STATS
+ {
+ extern int count_mb_seg[4];
+ char modes_stats_file[250];
+ FILE *f;
+ double dr = (double)cpi->oxcf.frame_rate * (double)cpi->bytes * (double)8 / (double)cpi->count / (double)1000;
+ sprintf(modes_stats_file, "modes_q%03d.stt", cpi->common.base_qindex);
+ f = fopen(modes_stats_file, "w");
+ fprintf(f, "intra_mode in Intra Frames:\n");
+ {
+ int i;
+ fprintf(f, "Y: ");
+ for (i = 0; i < VP9_YMODES; i++) fprintf(f, " %8d,", y_modes[i]);
+ fprintf(f, "\n");
+ }
+ {
+ int i;
+ fprintf(f, "I8: ");
+ for (i = 0; i < VP9_I8X8_MODES; i++) fprintf(f, " %8d,", i8x8_modes[i]);
+ fprintf(f, "\n");
+ }
+ {
+ int i;
+ fprintf(f, "UV: ");
+ for (i = 0; i < VP9_UV_MODES; i++) fprintf(f, " %8d,", uv_modes[i]);
+ fprintf(f, "\n");
+ }
+ {
+ int i, j;
+ fprintf(f, "KeyFrame Y-UV:\n");
+ for (i = 0; i < VP9_YMODES; i++) {
+ fprintf(f, "%2d:", i);
+ for (j = 0; j < VP9_UV_MODES; j++) fprintf(f, "%8d, ", uv_modes_y[i][j]);
+ fprintf(f, "\n");
+ }
+ }
+ {
+ int i, j;
+ fprintf(f, "Inter Y-UV:\n");
+ for (i = 0; i < VP9_YMODES; i++) {
+ fprintf(f, "%2d:", i);
+ for (j = 0; j < VP9_UV_MODES; j++) fprintf(f, "%8d, ", cpi->y_uv_mode_count[i][j]);
+ fprintf(f, "\n");
+ }
+ }
+ {
+ int i;
+
+ fprintf(f, "B: ");
+ for (i = 0; i < VP9_NKF_BINTRAMODES; i++)
+ fprintf(f, "%8d, ", b_modes[i]);
+
+ fprintf(f, "\n");
+
+ }
+
+ fprintf(f, "Modes in Inter Frames:\n");
+ {
+ int i;
+ fprintf(f, "Y: ");
+ for (i = 0; i < MB_MODE_COUNT; i++) fprintf(f, " %8d,", inter_y_modes[i]);
+ fprintf(f, "\n");
+ }
+ {
+ int i;
+ fprintf(f, "UV: ");
+ for (i = 0; i < VP9_UV_MODES; i++) fprintf(f, " %8d,", inter_uv_modes[i]);
+ fprintf(f, "\n");
+ }
+ {
+ int i;
+ fprintf(f, "B: ");
+ for (i = 0; i < B_MODE_COUNT; i++) fprintf(f, "%8d, ", inter_b_modes[i]);
+ fprintf(f, "\n");
+ }
+ fprintf(f, "P:%8d, %8d, %8d, %8d\n", count_mb_seg[0], count_mb_seg[1], count_mb_seg[2], count_mb_seg[3]);
+ fprintf(f, "PB:%8d, %8d, %8d, %8d\n", inter_b_modes[LEFT4X4], inter_b_modes[ABOVE4X4], inter_b_modes[ZERO4X4], inter_b_modes[NEW4X4]);
+ fclose(f);
+ }
+#endif
+
+#ifdef ENTROPY_STATS
+ {
+ int i, j, k;
+ FILE *fmode = fopen("vp9_modecontext.c", "w");
+
+ fprintf(fmode, "\n#include \"vp9_entropymode.h\"\n\n");
+ fprintf(fmode, "const unsigned int vp9_kf_default_bmode_counts ");
+ fprintf(fmode, "[VP9_KF_BINTRAMODES][VP9_KF_BINTRAMODES]"
+ "[VP9_KF_BINTRAMODES] =\n{\n");
+
+ for (i = 0; i < VP8_KF_BINTRAMODES; i++) {
+
+ fprintf(fmode, " { // Above Mode : %d\n", i);
+
+ for (j = 0; j < VP8_KF_BINTRAMODES; j++) {
+
+ fprintf(fmode, " {");
+
+ for (k = 0; k < VP9_KF_BINTRAMODES; k++) {
+ if (!intra_mode_stats[i][j][k])
+ fprintf(fmode, " %5d, ", 1);
+ else
+ fprintf(fmode, " %5d, ", intra_mode_stats[i][j][k]);
+ }
+
+ fprintf(fmode, "}, // left_mode %d\n", j);
+
+ }
+
+ fprintf(fmode, " },\n");
+
+ }
+
+ fprintf(fmode, "};\n");
+ fclose(fmode);
+ }
+#endif
+
+
+#if defined(SECTIONBITS_OUTPUT)
+
+ if (0) {
+ int i;
+ FILE *f = fopen("tokenbits.stt", "a");
+
+ for (i = 0; i < 28; i++)
+ fprintf(f, "%8d", (int)(Sectionbits[i] / 256));
+
+ fprintf(f, "\n");
+ fclose(f);
+ }
+
+#endif
+
+#if 0
+ {
+ printf("\n_pick_loop_filter_level:%d\n", cpi->time_pick_lpf / 1000);
+ printf("\n_frames recive_data encod_mb_row compress_frame Total\n");
+ printf("%6d %10ld %10ld %10ld %10ld\n", cpi->common.current_video_frame, cpi->time_receive_data / 1000, cpi->time_encode_mb_row / 1000, cpi->time_compress_data / 1000, (cpi->time_receive_data + cpi->time_compress_data) / 1000);
+ }
+#endif
+
+ }
+
+ dealloc_compressor_data(cpi);
+ vpx_free(cpi->mb.ss);
+ vpx_free(cpi->tok);
+
+ for (i = 0; i < sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]); i++) {
+ vpx_free(cpi->mbgraph_stats[i].mb_stats);
+ }
+
+ vp9_remove_common(&cpi->common);
+ vpx_free(cpi);
+ *ptr = 0;
+
+#ifdef OUTPUT_YUV_SRC
+ fclose(yuv_file);
+#endif
+#ifdef OUTPUT_YUV_REC
+ fclose(yuv_rec_file);
+#endif
+
+#if 0
+
+ if (keyfile)
+ fclose(keyfile);
+
+ if (framepsnr)
+ fclose(framepsnr);
+
+ if (kf_list)
+ fclose(kf_list);
+
+#endif
+
+}
+
+
+static uint64_t calc_plane_error(unsigned char *orig, int orig_stride,
+ unsigned char *recon, int recon_stride,
+ unsigned int cols, unsigned int rows) {
+ unsigned int row, col;
+ uint64_t total_sse = 0;
+ int diff;
+
+ for (row = 0; row + 16 <= rows; row += 16) {
+ for (col = 0; col + 16 <= cols; col += 16) {
+ unsigned int sse;
+
+ vp9_mse16x16(orig + col, orig_stride, recon + col, recon_stride, &sse);
+ total_sse += sse;
+ }
+
+ /* Handle odd-sized width */
+ if (col < cols) {
+ unsigned int border_row, border_col;
+ unsigned char *border_orig = orig;
+ unsigned char *border_recon = recon;
+
+ for (border_row = 0; border_row < 16; border_row++) {
+ for (border_col = col; border_col < cols; border_col++) {
+ diff = border_orig[border_col] - border_recon[border_col];
+ total_sse += diff * diff;
+ }
+
+ border_orig += orig_stride;
+ border_recon += recon_stride;
+ }
+ }
+
+ orig += orig_stride * 16;
+ recon += recon_stride * 16;
+ }
+
+ /* Handle odd-sized height */
+ for (; row < rows; row++) {
+ for (col = 0; col < cols; col++) {
+ diff = orig[col] - recon[col];
+ total_sse += diff * diff;
+ }
+
+ orig += orig_stride;
+ recon += recon_stride;
+ }
+
+ return total_sse;
+}
+
+
+static void generate_psnr_packet(VP9_COMP *cpi) {
+ YV12_BUFFER_CONFIG *orig = cpi->Source;
+ YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show;
+ struct vpx_codec_cx_pkt pkt;
+ uint64_t sse;
+ int i;
+ unsigned int width = cpi->common.Width;
+ unsigned int height = cpi->common.Height;
+
+ pkt.kind = VPX_CODEC_PSNR_PKT;
+ sse = calc_plane_error(orig->y_buffer, orig->y_stride,
+ recon->y_buffer, recon->y_stride,
+ width, height);
+ pkt.data.psnr.sse[0] = sse;
+ pkt.data.psnr.sse[1] = sse;
+ pkt.data.psnr.samples[0] = width * height;
+ pkt.data.psnr.samples[1] = width * height;
+
+ width = (width + 1) / 2;
+ height = (height + 1) / 2;
+
+ sse = calc_plane_error(orig->u_buffer, orig->uv_stride,
+ recon->u_buffer, recon->uv_stride,
+ width, height);
+ pkt.data.psnr.sse[0] += sse;
+ pkt.data.psnr.sse[2] = sse;
+ pkt.data.psnr.samples[0] += width * height;
+ pkt.data.psnr.samples[2] = width * height;
+
+ sse = calc_plane_error(orig->v_buffer, orig->uv_stride,
+ recon->v_buffer, recon->uv_stride,
+ width, height);
+ pkt.data.psnr.sse[0] += sse;
+ pkt.data.psnr.sse[3] = sse;
+ pkt.data.psnr.samples[0] += width * height;
+ pkt.data.psnr.samples[3] = width * height;
+
+ for (i = 0; i < 4; i++)
+ pkt.data.psnr.psnr[i] = vp9_mse2psnr(pkt.data.psnr.samples[i], 255.0,
+ (double)pkt.data.psnr.sse[i]);
+
+ vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt);
+}
+
+
+int vp9_use_as_reference(VP9_PTR ptr, int ref_frame_flags) {
+ VP9_COMP *cpi = (VP9_COMP *)(ptr);
+
+ if (ref_frame_flags > 7)
+ return -1;
+
+ cpi->ref_frame_flags = ref_frame_flags;
+ return 0;
+}
+int vp9_update_reference(VP9_PTR ptr, int ref_frame_flags) {
+ VP9_COMP *cpi = (VP9_COMP *)(ptr);
+
+ if (ref_frame_flags > 7)
+ return -1;
+
+ cpi->common.refresh_golden_frame = 0;
+ cpi->common.refresh_alt_ref_frame = 0;
+ cpi->common.refresh_last_frame = 0;
+
+ if (ref_frame_flags & VP9_LAST_FLAG)
+ cpi->common.refresh_last_frame = 1;
+
+ if (ref_frame_flags & VP9_GOLD_FLAG)
+ cpi->common.refresh_golden_frame = 1;
+
+ if (ref_frame_flags & VP9_ALT_FLAG)
+ cpi->common.refresh_alt_ref_frame = 1;
+
+ return 0;
+}
+
+int vp9_get_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag,
+ YV12_BUFFER_CONFIG *sd) {
+ VP9_COMP *cpi = (VP9_COMP *)(ptr);
+ VP9_COMMON *cm = &cpi->common;
+ int ref_fb_idx;
+
+ if (ref_frame_flag == VP9_LAST_FLAG)
+ ref_fb_idx = cm->lst_fb_idx;
+ else if (ref_frame_flag == VP9_GOLD_FLAG)
+ ref_fb_idx = cm->gld_fb_idx;
+ else if (ref_frame_flag == VP9_ALT_FLAG)
+ ref_fb_idx = cm->alt_fb_idx;
+ else
+ return -1;
+
+ vp8_yv12_copy_frame(&cm->yv12_fb[ref_fb_idx], sd);
+
+ return 0;
+}
+
+int vp9_set_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag,
+ YV12_BUFFER_CONFIG *sd) {
+ VP9_COMP *cpi = (VP9_COMP *)(ptr);
+ VP9_COMMON *cm = &cpi->common;
+
+ int ref_fb_idx;
+
+ if (ref_frame_flag == VP9_LAST_FLAG)
+ ref_fb_idx = cm->lst_fb_idx;
+ else if (ref_frame_flag == VP9_GOLD_FLAG)
+ ref_fb_idx = cm->gld_fb_idx;
+ else if (ref_frame_flag == VP9_ALT_FLAG)
+ ref_fb_idx = cm->alt_fb_idx;
+ else
+ return -1;
+
+ vp8_yv12_copy_frame(sd, &cm->yv12_fb[ref_fb_idx]);
+
+ return 0;
+}
+int vp9_update_entropy(VP9_PTR comp, int update) {
+ VP9_COMP *cpi = (VP9_COMP *) comp;
+ VP9_COMMON *cm = &cpi->common;
+ cm->refresh_entropy_probs = update;
+
+ return 0;
+}
+
+
+#ifdef OUTPUT_YUV_SRC
+void vp9_write_yuv_frame(YV12_BUFFER_CONFIG *s) {
+ unsigned char *src = s->y_buffer;
+ int h = s->y_height;
+
+ do {
+ fwrite(src, s->y_width, 1, yuv_file);
+ src += s->y_stride;
+ } while (--h);
+
+ src = s->u_buffer;
+ h = s->uv_height;
+
+ do {
+ fwrite(src, s->uv_width, 1, yuv_file);
+ src += s->uv_stride;
+ } while (--h);
+
+ src = s->v_buffer;
+ h = s->uv_height;
+
+ do {
+ fwrite(src, s->uv_width, 1, yuv_file);
+ src += s->uv_stride;
+ } while (--h);
+}
+#endif
+
+#ifdef OUTPUT_YUV_REC
+void vp9_write_yuv_rec_frame(VP9_COMMON *cm) {
+ YV12_BUFFER_CONFIG *s = cm->frame_to_show;
+ unsigned char *src = s->y_buffer;
+ int h = cm->Height;
+
+ do {
+ fwrite(src, s->y_width, 1, yuv_rec_file);
+ src += s->y_stride;
+ } while (--h);
+
+ src = s->u_buffer;
+ h = (cm->Height + 1) / 2;
+
+ do {
+ fwrite(src, s->uv_width, 1, yuv_rec_file);
+ src += s->uv_stride;
+ } while (--h);
+
+ src = s->v_buffer;
+ h = (cm->Height + 1) / 2;
+
+ do {
+ fwrite(src, s->uv_width, 1, yuv_rec_file);
+ src += s->uv_stride;
+ } while (--h);
+}
+#endif
+
+static void update_alt_ref_frame_stats(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+
+ // Update data structure that monitors level of reference to last GF
+ vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
+ cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
+
+ // this frame refreshes means next frames don't unless specified by user
+ cpi->common.frames_since_golden = 0;
+
+ // Clear the alternate reference update pending flag.
+ cpi->source_alt_ref_pending = FALSE;
+
+ // Set the alternate refernce frame active flag
+ cpi->source_alt_ref_active = TRUE;
+
+
+}
+static void update_golden_frame_stats(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+
+ // Update the Golden frame usage counts.
+ if (cm->refresh_golden_frame) {
+ // Update data structure that monitors level of reference to last GF
+ vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
+ cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
+
+ // this frame refreshes means next frames don't unless specified by user
+ cm->refresh_golden_frame = 0;
+ cpi->common.frames_since_golden = 0;
+
+ // if ( cm->frame_type == KEY_FRAME )
+ // {
+ cpi->recent_ref_frame_usage[INTRA_FRAME] = 1;
+ cpi->recent_ref_frame_usage[LAST_FRAME] = 1;
+ cpi->recent_ref_frame_usage[GOLDEN_FRAME] = 1;
+ cpi->recent_ref_frame_usage[ALTREF_FRAME] = 1;
+ // }
+ // else
+ // {
+ // // Carry a potrtion of count over to begining of next gf sequence
+ // cpi->recent_ref_frame_usage[INTRA_FRAME] >>= 5;
+ // cpi->recent_ref_frame_usage[LAST_FRAME] >>= 5;
+ // cpi->recent_ref_frame_usage[GOLDEN_FRAME] >>= 5;
+ // cpi->recent_ref_frame_usage[ALTREF_FRAME] >>= 5;
+ // }
+
+ // ******** Fixed Q test code only ************
+ // If we are going to use the ALT reference for the next group of frames set a flag to say so.
+ if (cpi->oxcf.fixed_q >= 0 &&
+ cpi->oxcf.play_alternate && !cpi->common.refresh_alt_ref_frame) {
+ cpi->source_alt_ref_pending = TRUE;
+ cpi->frames_till_gf_update_due = cpi->baseline_gf_interval;
+ }
+
+ if (!cpi->source_alt_ref_pending)
+ cpi->source_alt_ref_active = FALSE;
+
+ // Decrement count down till next gf
+ if (cpi->frames_till_gf_update_due > 0)
+ cpi->frames_till_gf_update_due--;
+
+ } else if (!cpi->common.refresh_alt_ref_frame) {
+ // Decrement count down till next gf
+ if (cpi->frames_till_gf_update_due > 0)
+ cpi->frames_till_gf_update_due--;
+
+ if (cpi->common.frames_till_alt_ref_frame)
+ cpi->common.frames_till_alt_ref_frame--;
+
+ cpi->common.frames_since_golden++;
+
+ if (cpi->common.frames_since_golden > 1) {
+ cpi->recent_ref_frame_usage[INTRA_FRAME] += cpi->count_mb_ref_frame_usage[INTRA_FRAME];
+ cpi->recent_ref_frame_usage[LAST_FRAME] += cpi->count_mb_ref_frame_usage[LAST_FRAME];
+ cpi->recent_ref_frame_usage[GOLDEN_FRAME] += cpi->count_mb_ref_frame_usage[GOLDEN_FRAME];
+ cpi->recent_ref_frame_usage[ALTREF_FRAME] += cpi->count_mb_ref_frame_usage[ALTREF_FRAME];
+ }
+ }
+}
+
+static int find_fp_qindex() {
+ int i;
+
+ for (i = 0; i < QINDEX_RANGE; i++) {
+ if (vp9_convert_qindex_to_q(i) >= 30.0) {
+ break;
+ }
+ }
+
+ if (i == QINDEX_RANGE)
+ i--;
+
+ return i;
+}
+
+static void Pass1Encode(VP9_COMP *cpi, unsigned long *size, unsigned char *dest, unsigned int *frame_flags) {
+ (void) size;
+ (void) dest;
+ (void) frame_flags;
+
+
+ vp9_set_quantizer(cpi, find_fp_qindex());
+ vp9_first_pass(cpi);
+}
+
+#define WRITE_RECON_BUFFER 0
+#if WRITE_RECON_BUFFER
+void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) {
+
+ // write the frame
+ FILE *yframe;
+ int i;
+ char filename[255];
+
+ sprintf(filename, "cx\\y%04d.raw", this_frame);
+ yframe = fopen(filename, "wb");
+
+ for (i = 0; i < frame->y_height; i++)
+ fwrite(frame->y_buffer + i * frame->y_stride,
+ frame->y_width, 1, yframe);
+
+ fclose(yframe);
+ sprintf(filename, "cx\\u%04d.raw", this_frame);
+ yframe = fopen(filename, "wb");
+
+ for (i = 0; i < frame->uv_height; i++)
+ fwrite(frame->u_buffer + i * frame->uv_stride,
+ frame->uv_width, 1, yframe);
+
+ fclose(yframe);
+ sprintf(filename, "cx\\v%04d.raw", this_frame);
+ yframe = fopen(filename, "wb");
+
+ for (i = 0; i < frame->uv_height; i++)
+ fwrite(frame->v_buffer + i * frame->uv_stride,
+ frame->uv_width, 1, yframe);
+
+ fclose(yframe);
+}
+#endif
+
+static double compute_edge_pixel_proportion(YV12_BUFFER_CONFIG *frame) {
+#define EDGE_THRESH 128
+ int i, j;
+ int num_edge_pels = 0;
+ int num_pels = (frame->y_height - 2) * (frame->y_width - 2);
+ unsigned char *prev = frame->y_buffer + 1;
+ unsigned char *curr = frame->y_buffer + 1 + frame->y_stride;
+ unsigned char *next = frame->y_buffer + 1 + 2 * frame->y_stride;
+ for (i = 1; i < frame->y_height - 1; i++) {
+ for (j = 1; j < frame->y_width - 1; j++) {
+ /* Sobel hor and ver gradients */
+ int v = 2 * (curr[1] - curr[-1]) + (prev[1] - prev[-1]) + (next[1] - next[-1]);
+ int h = 2 * (prev[0] - next[0]) + (prev[1] - next[1]) + (prev[-1] - next[-1]);
+ h = (h < 0 ? -h : h);
+ v = (v < 0 ? -v : v);
+ if (h > EDGE_THRESH || v > EDGE_THRESH) num_edge_pels++;
+ curr++;
+ prev++;
+ next++;
+ }
+ curr += frame->y_stride - frame->y_width + 2;
+ prev += frame->y_stride - frame->y_width + 2;
+ next += frame->y_stride - frame->y_width + 2;
+ }
+ return (double)num_edge_pels / (double)num_pels;
+}
+
+// Function to test for conditions that indicate we should loop
+// back and recode a frame.
+static BOOL recode_loop_test(VP9_COMP *cpi,
+ int high_limit, int low_limit,
+ int q, int maxq, int minq) {
+ BOOL force_recode = FALSE;
+ VP9_COMMON *cm = &cpi->common;
+
+ // Is frame recode allowed at all
+ // Yes if either recode mode 1 is selected or mode two is selcted
+ // and the frame is a key frame. golden frame or alt_ref_frame
+ if ((cpi->sf.recode_loop == 1) ||
+ ((cpi->sf.recode_loop == 2) &&
+ ((cm->frame_type == KEY_FRAME) ||
+ cm->refresh_golden_frame ||
+ cm->refresh_alt_ref_frame))) {
+ // General over and under shoot tests
+ if (((cpi->projected_frame_size > high_limit) && (q < maxq)) ||
+ ((cpi->projected_frame_size < low_limit) && (q > minq))) {
+ force_recode = TRUE;
+ }
+ // Special Constrained quality tests
+ else if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) {
+ // Undershoot and below auto cq level
+ if ((q > cpi->cq_target_quality) &&
+ (cpi->projected_frame_size <
+ ((cpi->this_frame_target * 7) >> 3))) {
+ force_recode = TRUE;
+ }
+ // Severe undershoot and between auto and user cq level
+ else if ((q > cpi->oxcf.cq_level) &&
+ (cpi->projected_frame_size < cpi->min_frame_bandwidth) &&
+ (cpi->active_best_quality > cpi->oxcf.cq_level)) {
+ force_recode = TRUE;
+ cpi->active_best_quality = cpi->oxcf.cq_level;
+ }
+ }
+ }
+
+ return force_recode;
+}
+
+static void update_reference_frames(VP9_COMMON *cm) {
+ YV12_BUFFER_CONFIG *yv12_fb = cm->yv12_fb;
+
+ // At this point the new frame has been encoded.
+ // If any buffer copy / swapping is signaled it should be done here.
+
+ if (cm->frame_type == KEY_FRAME) {
+ yv12_fb[cm->new_fb_idx].flags |= VP9_GOLD_FLAG | VP9_ALT_FLAG;
+
+ yv12_fb[cm->gld_fb_idx].flags &= ~VP9_GOLD_FLAG;
+ yv12_fb[cm->alt_fb_idx].flags &= ~VP9_ALT_FLAG;
+
+ cm->alt_fb_idx = cm->gld_fb_idx = cm->new_fb_idx;
+ } else { /* For non key frames */
+ if (cm->refresh_alt_ref_frame) {
+ assert(!cm->copy_buffer_to_arf);
+
+ cm->yv12_fb[cm->new_fb_idx].flags |= VP9_ALT_FLAG;
+ cm->yv12_fb[cm->alt_fb_idx].flags &= ~VP9_ALT_FLAG;
+ cm->alt_fb_idx = cm->new_fb_idx;
+ } else if (cm->copy_buffer_to_arf) {
+ assert(!(cm->copy_buffer_to_arf & ~0x3));
+
+ if (cm->copy_buffer_to_arf == 1) {
+ if (cm->alt_fb_idx != cm->lst_fb_idx) {
+ yv12_fb[cm->lst_fb_idx].flags |= VP9_ALT_FLAG;
+ yv12_fb[cm->alt_fb_idx].flags &= ~VP9_ALT_FLAG;
+ cm->alt_fb_idx = cm->lst_fb_idx;
+ }
+ } else { /* if (cm->copy_buffer_to_arf == 2) */
+ if (cm->alt_fb_idx != cm->gld_fb_idx) {
+ yv12_fb[cm->gld_fb_idx].flags |= VP9_ALT_FLAG;
+ yv12_fb[cm->alt_fb_idx].flags &= ~VP9_ALT_FLAG;
+ cm->alt_fb_idx = cm->gld_fb_idx;
+ }
+ }
+ }
+
+ if (cm->refresh_golden_frame) {
+ assert(!cm->copy_buffer_to_gf);
+
+ cm->yv12_fb[cm->new_fb_idx].flags |= VP9_GOLD_FLAG;
+ cm->yv12_fb[cm->gld_fb_idx].flags &= ~VP9_GOLD_FLAG;
+ cm->gld_fb_idx = cm->new_fb_idx;
+ } else if (cm->copy_buffer_to_gf) {
+ assert(!(cm->copy_buffer_to_arf & ~0x3));
+
+ if (cm->copy_buffer_to_gf == 1) {
+ if (cm->gld_fb_idx != cm->lst_fb_idx) {
+ yv12_fb[cm->lst_fb_idx].flags |= VP9_GOLD_FLAG;
+ yv12_fb[cm->gld_fb_idx].flags &= ~VP9_GOLD_FLAG;
+ cm->gld_fb_idx = cm->lst_fb_idx;
+ }
+ } else { /* if (cm->copy_buffer_to_gf == 2) */
+ if (cm->alt_fb_idx != cm->gld_fb_idx) {
+ yv12_fb[cm->alt_fb_idx].flags |= VP9_GOLD_FLAG;
+ yv12_fb[cm->gld_fb_idx].flags &= ~VP9_GOLD_FLAG;
+ cm->gld_fb_idx = cm->alt_fb_idx;
+ }
+ }
+ }
+ }
+
+ if (cm->refresh_last_frame) {
+ cm->yv12_fb[cm->new_fb_idx].flags |= VP9_LAST_FLAG;
+ cm->yv12_fb[cm->lst_fb_idx].flags &= ~VP9_LAST_FLAG;
+ cm->lst_fb_idx = cm->new_fb_idx;
+ }
+}
+
+static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
+ if (cm->no_lpf) {
+ cm->filter_level = 0;
+ }
+#if CONFIG_LOSSLESS
+ else if (cpi->oxcf.lossless) {
+ cm->filter_level = 0;
+ }
+#endif
+ else {
+ struct vpx_usec_timer timer;
+
+ vp9_clear_system_state();
+
+ vpx_usec_timer_start(&timer);
+ if (cpi->sf.auto_filter == 0)
+ vp9_pick_filter_level_fast(cpi->Source, cpi);
+ else
+ vp9_pick_filter_level(cpi->Source, cpi);
+
+ vpx_usec_timer_mark(&timer);
+ cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer);
+ }
+
+ if (cm->filter_level > 0) {
+ vp9_set_alt_lf_level(cpi, cm->filter_level);
+ vp9_loop_filter_frame(cm, &cpi->mb.e_mbd);
+ }
+
+ vp8_yv12_extend_frame_borders(cm->frame_to_show);
+
+}
+
+#if CONFIG_PRED_FILTER
+void select_pred_filter_mode(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+
+ int prob_pred_filter_off = cm->prob_pred_filter_off;
+
+ // Force filter on/off if probability is extreme
+ if (prob_pred_filter_off >= 255 * 0.95)
+ cm->pred_filter_mode = 0; // Off at the frame level
+ else if (prob_pred_filter_off <= 255 * 0.05)
+ cm->pred_filter_mode = 1; // On at the frame level
+ else
+ cm->pred_filter_mode = 2; // Selectable at the MB level
+}
+
+void update_pred_filt_prob(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+ int prob_pred_filter_off;
+
+ // Based on the selection in the previous frame determine what mode
+ // to use for the current frame and work out the signaling probability
+ if (cpi->pred_filter_on_count + cpi->pred_filter_off_count) {
+ prob_pred_filter_off = cpi->pred_filter_off_count * 256 /
+ (cpi->pred_filter_on_count + cpi->pred_filter_off_count);
+
+ if (prob_pred_filter_off < 1)
+ prob_pred_filter_off = 1;
+
+ if (prob_pred_filter_off > 255)
+ prob_pred_filter_off = 255;
+
+ cm->prob_pred_filter_off = prob_pred_filter_off;
+ } else
+ cm->prob_pred_filter_off = 128;
+ /*
+ {
+ FILE *fp = fopen("filt_use.txt", "a");
+ fprintf (fp, "%d %d prob=%d\n", cpi->pred_filter_off_count,
+ cpi->pred_filter_on_count, cm->prob_pred_filter_off);
+ fclose(fp);
+ }
+ */
+}
+#endif
+#if CONFIG_COMP_INTERINTRA_PRED
+static void select_interintra_mode(VP9_COMP *cpi) {
+ static const double threshold = 0.01;
+ VP9_COMMON *cm = &cpi->common;
+ // FIXME(debargha): Make this RD based
+ int sum = cpi->interintra_select_count[1] + cpi->interintra_select_count[0];
+ if (sum) {
+ double fraction = (double) cpi->interintra_select_count[1] / sum;
+ // printf("fraction: %f\n", fraction);
+ cm->use_interintra = (fraction > threshold);
+ }
+}
+#endif
+
+static void encode_frame_to_data_rate
+(
+ VP9_COMP *cpi,
+ unsigned long *size,
+ unsigned char *dest,
+ unsigned int *frame_flags
+) {
+ VP9_COMMON *cm = &cpi->common;
+ MACROBLOCKD *xd = &cpi->mb.e_mbd;
+
+ int Q;
+ int frame_over_shoot_limit;
+ int frame_under_shoot_limit;
+
+ int Loop = FALSE;
+ int loop_count;
+
+ int q_low;
+ int q_high;
+ int zbin_oq_high;
+ int zbin_oq_low = 0;
+
+ int top_index;
+ int bottom_index;
+ int active_worst_qchanged = FALSE;
+
+ int overshoot_seen = FALSE;
+ int undershoot_seen = FALSE;
+
+ SPEED_FEATURES *sf = &cpi->sf;
+#if RESET_FOREACH_FILTER
+ int q_low0;
+ int q_high0;
+ int zbin_oq_high0;
+ int zbin_oq_low0 = 0;
+ int Q0;
+ int last_zbin_oq;
+ int last_zbin_oq0;
+ int active_best_quality0;
+ int active_worst_quality0;
+ double rate_correction_factor0;
+ double gf_rate_correction_factor0;
+#endif
+
+ /* list of filters to search over */
+ int mcomp_filters_to_search[] = {
+ EIGHTTAP, EIGHTTAP_SHARP, SIXTAP, SWITCHABLE
+ };
+ int mcomp_filters = sizeof(mcomp_filters_to_search) /
+ sizeof(*mcomp_filters_to_search);
+ int mcomp_filter_index = 0;
+ INT64 mcomp_filter_cost[4];
+
+ // Clear down mmx registers to allow floating point in what follows
+ vp9_clear_system_state();
+
+
+ // For an alt ref frame in 2 pass we skip the call to the second
+ // pass function that sets the target bandwidth so must set it here
+ if (cpi->common.refresh_alt_ref_frame) {
+ cpi->per_frame_bandwidth = cpi->twopass.gf_bits; // Per frame bit target for the alt ref frame
+ // per second target bitrate
+ cpi->target_bandwidth = (int)(cpi->twopass.gf_bits *
+ cpi->output_frame_rate);
+ }
+
+ // Default turn off buffer to buffer copying
+ cm->copy_buffer_to_gf = 0;
+ cm->copy_buffer_to_arf = 0;
+
+ // Clear zbin over-quant value and mode boost values.
+ cpi->zbin_over_quant = 0;
+ cpi->zbin_mode_boost = 0;
+
+ // Enable or disable mode based tweaking of the zbin
+ // For 2 Pass Only used where GF/ARF prediction quality
+ // is above a threshold
+ cpi->zbin_mode_boost = 0;
+#if CONFIG_LOSSLESS
+ cpi->zbin_mode_boost_enabled = FALSE;
+#else
+ cpi->zbin_mode_boost_enabled = TRUE;
+#endif
+ if (cpi->gfu_boost <= 400) {
+ cpi->zbin_mode_boost_enabled = FALSE;
+ }
+
+ // Current default encoder behaviour for the altref sign bias
+ if (cpi->source_alt_ref_active)
+ cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 1;
+ else
+ cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 0;
+
+ // Check to see if a key frame is signalled
+ // For two pass with auto key frame enabled cm->frame_type may already be set, but not for one pass.
+ if ((cm->current_video_frame == 0) ||
+ (cm->frame_flags & FRAMEFLAGS_KEY) ||
+ (cpi->oxcf.auto_key && (cpi->frames_since_key % cpi->key_frame_frequency == 0))) {
+ // Key frame from VFW/auto-keyframe/first frame
+ cm->frame_type = KEY_FRAME;
+ }
+
+ // Set default state for segment based loop filter update flags
+ xd->mode_ref_lf_delta_update = 0;
+
+#if CONFIG_NEW_MVREF
+ // Temp defaults probabilities for ecnoding the MV ref id signal
+ vpx_memset(xd->mb_mv_ref_id_probs, 192,
+ sizeof(xd->mb_mv_ref_id_probs));
+#endif
+
+ // Set various flags etc to special state if it is a key frame
+ if (cm->frame_type == KEY_FRAME) {
+ int i;
+
+ // Reset the loop filter deltas and segmentation map
+ setup_features(cpi);
+
+ // If segmentation is enabled force a map update for key frames
+ if (xd->segmentation_enabled) {
+ xd->update_mb_segmentation_map = 1;
+ xd->update_mb_segmentation_data = 1;
+ }
+
+ // The alternate reference frame cannot be active for a key frame
+ cpi->source_alt_ref_active = FALSE;
+
+ // Reset the RD threshold multipliers to default of * 1 (128)
+ for (i = 0; i < MAX_MODES; i++) {
+ cpi->rd_thresh_mult[i] = 128;
+ }
+ }
+
+ // Test code for new segment features
+ init_seg_features(cpi);
+
+ // Decide how big to make the frame
+ vp9_pick_frame_size(cpi);
+
+ vp9_clear_system_state();
+
+ // Set an active best quality and if necessary active worst quality
+ Q = cpi->active_worst_quality;
+
+ if (cm->frame_type == KEY_FRAME) {
+ int high = 2000;
+ int low = 400;
+
+ if (cpi->kf_boost > high)
+ cpi->active_best_quality = kf_low_motion_minq[Q];
+ else if (cpi->kf_boost < low)
+ cpi->active_best_quality = kf_high_motion_minq[Q];
+ else {
+ int gap = high - low;
+ int offset = high - cpi->kf_boost;
+ int qdiff = kf_high_motion_minq[Q] - kf_low_motion_minq[Q];
+ int adjustment = ((offset * qdiff) + (gap >> 1)) / gap;
+
+ cpi->active_best_quality = kf_low_motion_minq[Q] + adjustment;
+ }
+
+ // Make an adjustment based on the %s static
+ // The main impact of this is at lower Q to prevent overly large key
+ // frames unless a lot of the image is static.
+ if (cpi->kf_zeromotion_pct < 64)
+ cpi->active_best_quality += 4 - (cpi->kf_zeromotion_pct >> 4);
+
+ // Special case for key frames forced because we have reached
+ // the maximum key frame interval. Here force the Q to a range
+ // based on the ambient Q to reduce the risk of popping
+ if (cpi->this_key_frame_forced) {
+ int delta_qindex;
+ int qindex = cpi->last_boosted_qindex;
+
+ delta_qindex = compute_qdelta(cpi, qindex,
+ (qindex * 0.75));
+
+ cpi->active_best_quality = qindex + delta_qindex;
+ if (cpi->active_best_quality < cpi->best_quality)
+ cpi->active_best_quality = cpi->best_quality;
+ }
+ }
+
+ else if (cm->refresh_golden_frame || cpi->common.refresh_alt_ref_frame) {
+ int high = 2000;
+ int low = 400;
+
+ // Use the lower of cpi->active_worst_quality and recent
+ // average Q as basis for GF/ARF Q limit unless last frame was
+ // a key frame.
+ if ((cpi->frames_since_key > 1) &&
+ (cpi->avg_frame_qindex < cpi->active_worst_quality)) {
+ Q = cpi->avg_frame_qindex;
+ }
+
+ // For constrained quality dont allow Q less than the cq level
+ if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) &&
+ (Q < cpi->cq_target_quality)) {
+ Q = cpi->cq_target_quality;
+ }
+
+ if (cpi->gfu_boost > high)
+ cpi->active_best_quality = gf_low_motion_minq[Q];
+ else if (cpi->gfu_boost < low)
+ cpi->active_best_quality = gf_high_motion_minq[Q];
+ else {
+ int gap = high - low;
+ int offset = high - cpi->gfu_boost;
+ int qdiff = gf_high_motion_minq[Q] - gf_low_motion_minq[Q];
+ int adjustment = ((offset * qdiff) + (gap >> 1)) / gap;
+
+ cpi->active_best_quality = gf_low_motion_minq[Q] + adjustment;
+ }
+
+ // Constrained quality use slightly lower active best.
+ if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) {
+ cpi->active_best_quality =
+ cpi->active_best_quality * 15 / 16;
+ }
+ } else {
+ cpi->active_best_quality = inter_minq[Q];
+
+ // For the constant/constrained quality mode we dont want
+ // q to fall below the cq level.
+ if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) &&
+ (cpi->active_best_quality < cpi->cq_target_quality)) {
+ // If we are strongly undershooting the target rate in the last
+ // frames then use the user passed in cq value not the auto
+ // cq value.
+ if (cpi->rolling_actual_bits < cpi->min_frame_bandwidth)
+ cpi->active_best_quality = cpi->oxcf.cq_level;
+ else
+ cpi->active_best_quality = cpi->cq_target_quality;
+ }
+ }
+
+ // Clip the active best and worst quality values to limits
+ if (cpi->active_worst_quality > cpi->worst_quality)
+ cpi->active_worst_quality = cpi->worst_quality;
+
+ if (cpi->active_best_quality < cpi->best_quality)
+ cpi->active_best_quality = cpi->best_quality;
+
+ if (cpi->active_best_quality > cpi->worst_quality)
+ cpi->active_best_quality = cpi->worst_quality;
+
+ if (cpi->active_worst_quality < cpi->active_best_quality)
+ cpi->active_worst_quality = cpi->active_best_quality;
+
+ // Specuial case code to try and match quality with forced key frames
+ if ((cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced) {
+ Q = cpi->last_boosted_qindex;
+ } else {
+ // Determine initial Q to try
+ Q = vp9_regulate_q(cpi, cpi->this_frame_target);
+ }
+#if RESET_FOREACH_FILTER
+ last_zbin_oq = cpi->zbin_over_quant;
+#endif
+
+ // Set highest allowed value for Zbin over quant
+ if (cm->frame_type == KEY_FRAME)
+ zbin_oq_high = 0; // ZBIN_OQ_MAX/16
+ else if (cm->refresh_alt_ref_frame || (cm->refresh_golden_frame && !cpi->source_alt_ref_active))
+ zbin_oq_high = 16;
+ else
+ zbin_oq_high = ZBIN_OQ_MAX;
+
+ vp9_compute_frame_size_bounds(cpi, &frame_under_shoot_limit,
+ &frame_over_shoot_limit);
+
+ // Limit Q range for the adaptive loop.
+ bottom_index = cpi->active_best_quality;
+ top_index = cpi->active_worst_quality;
+ q_low = cpi->active_best_quality;
+ q_high = cpi->active_worst_quality;
+
+ loop_count = 0;
+
+ if (cm->frame_type != KEY_FRAME) {
+ /* TODO: Decide this more intelligently */
+ if (sf->search_best_filter) {
+ cm->mcomp_filter_type = mcomp_filters_to_search[0];
+ mcomp_filter_index = 0;
+ } else {
+ cm->mcomp_filter_type = DEFAULT_INTERP_FILTER;
+ }
+ /* TODO: Decide this more intelligently */
+ xd->allow_high_precision_mv = (Q < HIGH_PRECISION_MV_QTHRESH);
+ set_mvcost(&cpi->mb);
+ }
+
+#if CONFIG_COMP_INTERINTRA_PRED
+ if (cm->current_video_frame == 0) {
+ cm->use_interintra = 1;
+ }
+#endif
+
+#if CONFIG_POSTPROC
+
+ if (cpi->oxcf.noise_sensitivity > 0) {
+ unsigned char *src;
+ int l = 0;
+
+ switch (cpi->oxcf.noise_sensitivity) {
+ case 1:
+ l = 20;
+ break;
+ case 2:
+ l = 40;
+ break;
+ case 3:
+ l = 60;
+ break;
+ case 4:
+
+ case 5:
+ l = 100;
+ break;
+ case 6:
+ l = 150;
+ break;
+ }
+
+
+ if (cm->frame_type == KEY_FRAME) {
+ vp9_de_noise(cpi->Source, cpi->Source, l, 1, 0);
+ } else {
+ vp9_de_noise(cpi->Source, cpi->Source, l, 1, 0);
+
+ src = cpi->Source->y_buffer;
+
+ if (cpi->Source->y_stride < 0) {
+ src += cpi->Source->y_stride * (cpi->Source->y_height - 1);
+ }
+ }
+ }
+
+#endif
+
+#ifdef OUTPUT_YUV_SRC
+ vp9_write_yuv_frame(cpi->Source);
+#endif
+
+#if RESET_FOREACH_FILTER
+ if (sf->search_best_filter) {
+ q_low0 = q_low;
+ q_high0 = q_high;
+ Q0 = Q;
+ zbin_oq_low0 = zbin_oq_low;
+ zbin_oq_high0 = zbin_oq_high;
+ last_zbin_oq0 = last_zbin_oq;
+ rate_correction_factor0 = cpi->rate_correction_factor;
+ gf_rate_correction_factor0 = cpi->gf_rate_correction_factor;
+ active_best_quality0 = cpi->active_best_quality;
+ active_worst_quality0 = cpi->active_worst_quality;
+ }
+#endif
+ do {
+ vp9_clear_system_state(); // __asm emms;
+
+ vp9_set_quantizer(cpi, Q);
+
+ if (loop_count == 0) {
+
+ // setup skip prob for costing in mode/mv decision
+ if (cpi->common.mb_no_coeff_skip) {
+ int k;
+ for (k = 0; k < MBSKIP_CONTEXTS; k++)
+ cm->mbskip_pred_probs[k] = cpi->base_skip_false_prob[Q][k];
+
+ if (cm->frame_type != KEY_FRAME) {
+ if (cpi->common.refresh_alt_ref_frame) {
+ for (k = 0; k < MBSKIP_CONTEXTS; k++) {
+ if (cpi->last_skip_false_probs[2][k] != 0)
+ cm->mbskip_pred_probs[k] = cpi->last_skip_false_probs[2][k];
+ }
+ } else if (cpi->common.refresh_golden_frame) {
+ for (k = 0; k < MBSKIP_CONTEXTS; k++) {
+ if (cpi->last_skip_false_probs[1][k] != 0)
+ cm->mbskip_pred_probs[k] = cpi->last_skip_false_probs[1][k];
+ }
+ } else {
+ int k;
+ for (k = 0; k < MBSKIP_CONTEXTS; k++) {
+ if (cpi->last_skip_false_probs[0][k] != 0)
+ cm->mbskip_pred_probs[k] = cpi->last_skip_false_probs[0][k];
+ }
+ }
+
+ // as this is for cost estimate, let's make sure it does not
+ // get extreme either way
+ {
+ int k;
+ for (k = 0; k < MBSKIP_CONTEXTS; ++k) {
+ if (cm->mbskip_pred_probs[k] < 5)
+ cm->mbskip_pred_probs[k] = 5;
+
+ if (cm->mbskip_pred_probs[k] > 250)
+ cm->mbskip_pred_probs[k] = 250;
+
+ if (cpi->is_src_frame_alt_ref)
+ cm->mbskip_pred_probs[k] = 1;
+ }
+ }
+ }
+ }
+
+ // Set up entropy depending on frame type.
+ if (cm->frame_type == KEY_FRAME)
+ vp9_setup_key_frame(cpi);
+ else
+ vp9_setup_inter_frame(cpi);
+ }
+
+ // transform / motion compensation build reconstruction frame
+
+ vp9_encode_frame(cpi);
+
+ // Update the skip mb flag probabilities based on the distribution
+ // seen in the last encoder iteration.
+ update_base_skip_probs(cpi);
+
+ vp9_clear_system_state(); // __asm emms;
+
+#if CONFIG_PRED_FILTER
+ // Update prediction filter on/off probability based on
+ // selection made for the current frame
+ if (cm->frame_type != KEY_FRAME)
+ update_pred_filt_prob(cpi);
+#endif
+
+ // Dummy pack of the bitstream using up to date stats to get an
+ // accurate estimate of output frame size to determine if we need
+ // to recode.
+ vp9_save_coding_context(cpi);
+ cpi->dummy_packing = 1;
+ vp9_pack_bitstream(cpi, dest, size);
+ cpi->projected_frame_size = (*size) << 3;
+ vp9_restore_coding_context(cpi);
+
+ if (frame_over_shoot_limit == 0)
+ frame_over_shoot_limit = 1;
+ active_worst_qchanged = FALSE;
+
+ // Special case handling for forced key frames
+ if ((cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced) {
+ int last_q = Q;
+ int kf_err = vp9_calc_ss_err(cpi->Source,
+ &cm->yv12_fb[cm->new_fb_idx]);
+
+ int high_err_target = cpi->ambient_err;
+ int low_err_target = (cpi->ambient_err >> 1);
+
+ // Prevent possible divide by zero error below for perfect KF
+ kf_err += (!kf_err);
+
+ // The key frame is not good enough or we can afford
+ // to make it better without undue risk of popping.
+ if (((kf_err > high_err_target) &&
+ (cpi->projected_frame_size <= frame_over_shoot_limit)) ||
+ ((kf_err > low_err_target) &&
+ (cpi->projected_frame_size <= frame_under_shoot_limit))) {
+ // Lower q_high
+ q_high = (Q > q_low) ? (Q - 1) : q_low;
+
+ // Adjust Q
+ Q = (Q * high_err_target) / kf_err;
+ if (Q < ((q_high + q_low) >> 1))
+ Q = (q_high + q_low) >> 1;
+ }
+ // The key frame is much better than the previous frame
+ else if ((kf_err < low_err_target) &&
+ (cpi->projected_frame_size >= frame_under_shoot_limit)) {
+ // Raise q_low
+ q_low = (Q < q_high) ? (Q + 1) : q_high;
+
+ // Adjust Q
+ Q = (Q * low_err_target) / kf_err;
+ if (Q > ((q_high + q_low + 1) >> 1))
+ Q = (q_high + q_low + 1) >> 1;
+ }
+
+ // Clamp Q to upper and lower limits:
+ if (Q > q_high)
+ Q = q_high;
+ else if (Q < q_low)
+ Q = q_low;
+
+ Loop = ((Q != last_q)) ? TRUE : FALSE;
+ }
+
+ // Is the projected frame size out of range and are we allowed to attempt to recode.
+ else if (recode_loop_test(cpi,
+ frame_over_shoot_limit, frame_under_shoot_limit,
+ Q, top_index, bottom_index)) {
+ int last_q = Q;
+ int Retries = 0;
+
+ // Frame size out of permitted range:
+ // Update correction factor & compute new Q to try...
+
+ // Frame is too large
+ if (cpi->projected_frame_size > cpi->this_frame_target) {
+ q_low = (Q < q_high) ? (Q + 1) : q_high; // Raise Qlow as to at least the current value
+
+ if (cpi->zbin_over_quant > 0) // If we are using over quant do the same for zbin_oq_low
+ zbin_oq_low = (cpi->zbin_over_quant < zbin_oq_high) ? (cpi->zbin_over_quant + 1) : zbin_oq_high;
+
+ if (undershoot_seen || (loop_count > 1)) {
+ // Update rate_correction_factor unless cpi->active_worst_quality has changed.
+ if (!active_worst_qchanged)
+ vp9_update_rate_correction_factors(cpi, 1);
+
+ Q = (q_high + q_low + 1) / 2;
+
+ // Adjust cpi->zbin_over_quant (only allowed when Q is max)
+ if (Q < MAXQ)
+ cpi->zbin_over_quant = 0;
+ else {
+ zbin_oq_low = (cpi->zbin_over_quant < zbin_oq_high) ? (cpi->zbin_over_quant + 1) : zbin_oq_high;
+ cpi->zbin_over_quant = (zbin_oq_high + zbin_oq_low) / 2;
+ }
+ } else {
+ // Update rate_correction_factor unless cpi->active_worst_quality has changed.
+ if (!active_worst_qchanged)
+ vp9_update_rate_correction_factors(cpi, 0);
+
+ Q = vp9_regulate_q(cpi, cpi->this_frame_target);
+
+ while (((Q < q_low) || (cpi->zbin_over_quant < zbin_oq_low)) && (Retries < 10)) {
+ vp9_update_rate_correction_factors(cpi, 0);
+ Q = vp9_regulate_q(cpi, cpi->this_frame_target);
+ Retries++;
+ }
+ }
+
+ overshoot_seen = TRUE;
+ }
+ // Frame is too small
+ else {
+ if (cpi->zbin_over_quant == 0)
+ q_high = (Q > q_low) ? (Q - 1) : q_low; // Lower q_high if not using over quant
+ else // else lower zbin_oq_high
+ zbin_oq_high = (cpi->zbin_over_quant > zbin_oq_low) ? (cpi->zbin_over_quant - 1) : zbin_oq_low;
+
+ if (overshoot_seen || (loop_count > 1)) {
+ // Update rate_correction_factor unless cpi->active_worst_quality has changed.
+ if (!active_worst_qchanged)
+ vp9_update_rate_correction_factors(cpi, 1);
+
+ Q = (q_high + q_low) / 2;
+
+ // Adjust cpi->zbin_over_quant (only allowed when Q is max)
+ if (Q < MAXQ)
+ cpi->zbin_over_quant = 0;
+ else
+ cpi->zbin_over_quant = (zbin_oq_high + zbin_oq_low) / 2;
+ } else {
+ // Update rate_correction_factor unless cpi->active_worst_quality has changed.
+ if (!active_worst_qchanged)
+ vp9_update_rate_correction_factors(cpi, 0);
+
+ Q = vp9_regulate_q(cpi, cpi->this_frame_target);
+
+ // Special case reset for qlow for constrained quality.
+ // This should only trigger where there is very substantial
+ // undershoot on a frame and the auto cq level is above
+ // the user passsed in value.
+ if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) &&
+ (Q < q_low)) {
+ q_low = Q;
+ }
+
+ while (((Q > q_high) || (cpi->zbin_over_quant > zbin_oq_high)) && (Retries < 10)) {
+ vp9_update_rate_correction_factors(cpi, 0);
+ Q = vp9_regulate_q(cpi, cpi->this_frame_target);
+ Retries++;
+ }
+ }
+
+ undershoot_seen = TRUE;
+ }
+
+ // Clamp Q to upper and lower limits:
+ if (Q > q_high)
+ Q = q_high;
+ else if (Q < q_low)
+ Q = q_low;
+
+ // Clamp cpi->zbin_over_quant
+ cpi->zbin_over_quant = (cpi->zbin_over_quant < zbin_oq_low) ?
+ zbin_oq_low : (cpi->zbin_over_quant > zbin_oq_high) ?
+ zbin_oq_high : cpi->zbin_over_quant;
+
+ // Loop = ((Q != last_q) || (last_zbin_oq != cpi->zbin_over_quant)) ? TRUE : FALSE;
+ Loop = ((Q != last_q)) ? TRUE : FALSE;
+#if RESET_FOREACH_FILTER
+ last_zbin_oq = cpi->zbin_over_quant;
+#endif
+ } else
+ Loop = FALSE;
+
+ if (cpi->is_src_frame_alt_ref)
+ Loop = FALSE;
+
+ if (cm->frame_type != KEY_FRAME &&
+ !sf->search_best_filter &&
+ cm->mcomp_filter_type == SWITCHABLE) {
+ int interp_factor = Q / 3; /* denominator is 256 */
+ int count[VP9_SWITCHABLE_FILTERS];
+ int tot_count = 0, c = 0, thr;
+ int i, j;
+ for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
+ count[i] = 0;
+ for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j) {
+ count[i] += cpi->switchable_interp_count[j][i];
+ }
+ tot_count += count[i];
+ }
+
+ thr = ((tot_count * interp_factor + 128) >> 8);
+ for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
+ c += (count[i] >= thr);
+ }
+ if (c == 1) {
+ /* Mostly one filter is used. So set the filter at frame level */
+ for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
+ if (count[i]) {
+ cm->mcomp_filter_type = vp9_switchable_interp[i];
+ Loop = TRUE; /* Make sure to loop since the filter changed */
+ break;
+ }
+ }
+ }
+ }
+
+ if (Loop == FALSE && cm->frame_type != KEY_FRAME && sf->search_best_filter) {
+ if (mcomp_filter_index < mcomp_filters) {
+ INT64 err = vp9_calc_ss_err(cpi->Source,
+ &cm->yv12_fb[cm->new_fb_idx]);
+ INT64 rate = cpi->projected_frame_size << 8;
+ mcomp_filter_cost[mcomp_filter_index] =
+ (RDCOST(cpi->RDMULT, cpi->RDDIV, rate, err));
+ mcomp_filter_index++;
+ if (mcomp_filter_index < mcomp_filters) {
+ cm->mcomp_filter_type = mcomp_filters_to_search[mcomp_filter_index];
+ loop_count = -1;
+ Loop = TRUE;
+ } else {
+ int f;
+ INT64 best_cost = mcomp_filter_cost[0];
+ int mcomp_best_filter = mcomp_filters_to_search[0];
+ for (f = 1; f < mcomp_filters; f++) {
+ if (mcomp_filter_cost[f] < best_cost) {
+ mcomp_best_filter = mcomp_filters_to_search[f];
+ best_cost = mcomp_filter_cost[f];
+ }
+ }
+ if (mcomp_best_filter != mcomp_filters_to_search[mcomp_filters - 1]) {
+ loop_count = -1;
+ Loop = TRUE;
+ cm->mcomp_filter_type = mcomp_best_filter;
+ }
+ /*
+ printf(" best filter = %d, ( ", mcomp_best_filter);
+ for (f=0;f<mcomp_filters; f++) printf("%d ", mcomp_filter_cost[f]);
+ printf(")\n");
+ */
+ }
+#if RESET_FOREACH_FILTER
+ if (Loop == TRUE) {
+ overshoot_seen = FALSE;
+ undershoot_seen = FALSE;
+ zbin_oq_low = zbin_oq_low0;
+ zbin_oq_high = zbin_oq_high0;
+ q_low = q_low0;
+ q_high = q_high0;
+ Q = Q0;
+ cpi->zbin_over_quant = last_zbin_oq = last_zbin_oq0;
+ cpi->rate_correction_factor = rate_correction_factor0;
+ cpi->gf_rate_correction_factor = gf_rate_correction_factor0;
+ cpi->active_best_quality = active_best_quality0;
+ cpi->active_worst_quality = active_worst_quality0;
+ }
+#endif
+ }
+ }
+
+ if (Loop == TRUE) {
+ loop_count++;
+#if CONFIG_INTERNAL_STATS
+ cpi->tot_recode_hits++;
+#endif
+ }
+ } while (Loop == TRUE);
+
+ // Special case code to reduce pulsing when key frames are forced at a
+ // fixed interval. Note the reconstruction error if it is the frame before
+ // the force key frame
+ if (cpi->next_key_frame_forced && (cpi->twopass.frames_to_key == 0)) {
+ cpi->ambient_err = vp9_calc_ss_err(cpi->Source,
+ &cm->yv12_fb[cm->new_fb_idx]);
+ }
+
+ // This frame's MVs are saved and will be used in next frame's MV
+ // prediction. Last frame has one more line(add to bottom) and one
+ // more column(add to right) than cm->mip. The edge elements are
+ // initialized to 0.
+ if (cm->show_frame) { // do not save for altref frame
+ int mb_row;
+ int mb_col;
+ MODE_INFO *tmp = cm->mip;
+
+ if (cm->frame_type != KEY_FRAME) {
+ for (mb_row = 0; mb_row < cm->mb_rows + 1; mb_row ++) {
+ for (mb_col = 0; mb_col < cm->mb_cols + 1; mb_col ++) {
+ if (tmp->mbmi.ref_frame != INTRA_FRAME)
+ cpi->lfmv[mb_col + mb_row * (cm->mode_info_stride + 1)].as_int = tmp->mbmi.mv[0].as_int;
+
+ cpi->lf_ref_frame_sign_bias[mb_col + mb_row * (cm->mode_info_stride + 1)] = cm->ref_frame_sign_bias[tmp->mbmi.ref_frame];
+ cpi->lf_ref_frame[mb_col + mb_row * (cm->mode_info_stride + 1)] = tmp->mbmi.ref_frame;
+ tmp++;
+ }
+ }
+ }
+ }
+
+ // Update the GF useage maps.
+ // This is done after completing the compression of a frame when all modes
+ // etc. are finalized but before loop filter
+ vp9_update_gf_useage_maps(cpi, cm, &cpi->mb);
+
+ if (cm->frame_type == KEY_FRAME)
+ cm->refresh_last_frame = 1;
+
+#if 0
+ {
+ FILE *f = fopen("gfactive.stt", "a");
+ fprintf(f, "%8d %8d %8d %8d %8d\n", cm->current_video_frame, (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols), cpi->this_iiratio, cpi->next_iiratio, cm->refresh_golden_frame);
+ fclose(f);
+ }
+#endif
+
+ cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx];
+
+#if WRITE_RECON_BUFFER
+ if (cm->show_frame)
+ write_cx_frame_to_file(cm->frame_to_show,
+ cm->current_video_frame);
+ else
+ write_cx_frame_to_file(cm->frame_to_show,
+ cm->current_video_frame + 1000);
+#endif
+
+ // Pick the loop filter level for the frame.
+ loopfilter_frame(cpi, cm);
+
+ // build the bitstream
+ cpi->dummy_packing = 0;
+ vp9_pack_bitstream(cpi, dest, size);
+
+ if (cpi->mb.e_mbd.update_mb_segmentation_map) {
+ update_reference_segmentation_map(cpi);
+ }
+
+#if CONFIG_PRED_FILTER
+ // Select the prediction filtering mode to use for the
+ // next frame based on the current frame selections
+ if (cm->frame_type != KEY_FRAME)
+ select_pred_filter_mode(cpi);
+#endif
+
+ update_reference_frames(cm);
+ vp9_copy(cpi->common.fc.coef_counts, cpi->coef_counts);
+ vp9_copy(cpi->common.fc.hybrid_coef_counts, cpi->hybrid_coef_counts);
+ vp9_copy(cpi->common.fc.coef_counts_8x8, cpi->coef_counts_8x8);
+ vp9_copy(cpi->common.fc.hybrid_coef_counts_8x8, cpi->hybrid_coef_counts_8x8);
+ vp9_copy(cpi->common.fc.coef_counts_16x16, cpi->coef_counts_16x16);
+ vp9_copy(cpi->common.fc.hybrid_coef_counts_16x16,
+ cpi->hybrid_coef_counts_16x16);
+ vp9_adapt_coef_probs(&cpi->common);
+ if (cpi->common.frame_type != KEY_FRAME) {
+#if CONFIG_SUPERBLOCKS
+ vp9_copy(cpi->common.fc.sb_ymode_counts, cpi->sb_ymode_count);
+#endif
+ vp9_copy(cpi->common.fc.ymode_counts, cpi->ymode_count);
+ vp9_copy(cpi->common.fc.uv_mode_counts, cpi->y_uv_mode_count);
+ vp9_copy(cpi->common.fc.bmode_counts, cpi->bmode_count);
+ vp9_copy(cpi->common.fc.i8x8_mode_counts, cpi->i8x8_mode_count);
+ vp9_copy(cpi->common.fc.sub_mv_ref_counts, cpi->sub_mv_ref_count);
+ vp9_copy(cpi->common.fc.mbsplit_counts, cpi->mbsplit_count);
+#if CONFIG_COMP_INTERINTRA_PRED
+ vp9_copy(cpi->common.fc.interintra_counts, cpi->interintra_count);
+#endif
+ vp9_adapt_mode_probs(&cpi->common);
+
+ cpi->common.fc.NMVcount = cpi->NMVcount;
+ /*
+ printf("2: %d %d %d %d\n", cpi->NMVcount.joints[0], cpi->NMVcount.joints[1],
+ cpi->NMVcount.joints[2], cpi->NMVcount.joints[3]);
+ */
+ vp9_adapt_nmv_probs(&cpi->common, cpi->mb.e_mbd.allow_high_precision_mv);
+ }
+#if CONFIG_COMP_INTERINTRA_PRED
+ if (cm->frame_type != KEY_FRAME)
+ select_interintra_mode(cpi);
+#endif
+
+ /* Move storing frame_type out of the above loop since it is also
+ * needed in motion search besides loopfilter */
+ cm->last_frame_type = cm->frame_type;
+
+ // Update rate control heuristics
+ cpi->total_byte_count += (*size);
+ cpi->projected_frame_size = (*size) << 3;
+
+ if (!active_worst_qchanged)
+ vp9_update_rate_correction_factors(cpi, 2);
+
+ cpi->last_q[cm->frame_type] = cm->base_qindex;
+
+ // Keep record of last boosted (KF/KF/ARF) Q value.
+ // If the current frame is coded at a lower Q then we also update it.
+ // If all mbs in this group are skipped only update if the Q value is
+ // better than that already stored.
+ // This is used to help set quality in forced key frames to reduce popping
+ if ((cm->base_qindex < cpi->last_boosted_qindex) ||
+ ((cpi->static_mb_pct < 100) &&
+ ((cm->frame_type == KEY_FRAME) ||
+ cm->refresh_alt_ref_frame ||
+ (cm->refresh_golden_frame && !cpi->is_src_frame_alt_ref)))) {
+ cpi->last_boosted_qindex = cm->base_qindex;
+ }
+
+ if (cm->frame_type == KEY_FRAME) {
+ vp9_adjust_key_frame_context(cpi);
+ }
+
+ // Keep a record of ambient average Q.
+ if (cm->frame_type != KEY_FRAME)
+ cpi->avg_frame_qindex = (2 + 3 * cpi->avg_frame_qindex + cm->base_qindex) >> 2;
+
+ // Keep a record from which we can calculate the average Q excluding GF updates and key frames
+ if ((cm->frame_type != KEY_FRAME) && !cm->refresh_golden_frame && !cm->refresh_alt_ref_frame) {
+ cpi->ni_frames++;
+ cpi->tot_q += vp9_convert_qindex_to_q(Q);
+ cpi->avg_q = cpi->tot_q / (double)cpi->ni_frames;
+
+ // Calculate the average Q for normal inter frames (not key or GFU
+ // frames).
+ cpi->ni_tot_qi += Q;
+ cpi->ni_av_qi = (cpi->ni_tot_qi / cpi->ni_frames);
+ }
+
+ // Update the buffer level variable.
+ // Non-viewable frames are a special case and are treated as pure overhead.
+ if (!cm->show_frame)
+ cpi->bits_off_target -= cpi->projected_frame_size;
+ else
+ cpi->bits_off_target += cpi->av_per_frame_bandwidth - cpi->projected_frame_size;
+
+ // Clip the buffer level at the maximum buffer size
+ if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size)
+ cpi->bits_off_target = cpi->oxcf.maximum_buffer_size;
+
+ // Rolling monitors of whether we are over or underspending used to help regulate min and Max Q in two pass.
+ cpi->rolling_target_bits = ((cpi->rolling_target_bits * 3) + cpi->this_frame_target + 2) / 4;
+ cpi->rolling_actual_bits = ((cpi->rolling_actual_bits * 3) + cpi->projected_frame_size + 2) / 4;
+ cpi->long_rolling_target_bits = ((cpi->long_rolling_target_bits * 31) + cpi->this_frame_target + 16) / 32;
+ cpi->long_rolling_actual_bits = ((cpi->long_rolling_actual_bits * 31) + cpi->projected_frame_size + 16) / 32;
+
+ // Actual bits spent
+ cpi->total_actual_bits += cpi->projected_frame_size;
+
+ // Debug stats
+ cpi->total_target_vs_actual += (cpi->this_frame_target - cpi->projected_frame_size);
+
+ cpi->buffer_level = cpi->bits_off_target;
+
+ // Update bits left to the kf and gf groups to account for overshoot or undershoot on these frames
+ if (cm->frame_type == KEY_FRAME) {
+ cpi->twopass.kf_group_bits += cpi->this_frame_target - cpi->projected_frame_size;
+
+ if (cpi->twopass.kf_group_bits < 0)
+ cpi->twopass.kf_group_bits = 0;
+ } else if (cm->refresh_golden_frame || cm->refresh_alt_ref_frame) {
+ cpi->twopass.gf_group_bits += cpi->this_frame_target - cpi->projected_frame_size;
+
+ if (cpi->twopass.gf_group_bits < 0)
+ cpi->twopass.gf_group_bits = 0;
+ }
+
+ // Update the skip mb flag probabilities based on the distribution seen
+ // in this frame.
+ update_base_skip_probs(cpi);
+
+#if 0 //CONFIG_NEW_MVREF && CONFIG_INTERNAL_STATS
+ {
+ FILE *f = fopen("mv_ref_dist.stt", "a");
+ unsigned int i;
+ for (i = 0; i < MAX_MV_REFS; ++i) {
+ fprintf(f, "%10d", cpi->best_ref_index_counts[0][i]);
+ }
+ fprintf(f, "\n" );
+
+ fclose(f);
+ }
+#endif
+
+#if 0// 1 && CONFIG_INTERNAL_STATS
+ {
+ FILE *f = fopen("tmp.stt", "a");
+ int recon_err;
+
+ vp9_clear_system_state(); // __asm emms;
+
+ recon_err = vp9_calc_ss_err(cpi->Source,
+ &cm->yv12_fb[cm->new_fb_idx]);
+
+ if (cpi->twopass.total_left_stats->coded_error != 0.0)
+ fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d"
+ "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f"
+ "%6d %5d %5d %5d %8d %8.2f %10d %10.3f"
+ "%10.3f %8d %10d %10d %10d\n",
+ cpi->common.current_video_frame, cpi->this_frame_target,
+ cpi->projected_frame_size, 0, //loop_size_estimate,
+ (cpi->projected_frame_size - cpi->this_frame_target),
+ (int)cpi->total_target_vs_actual,
+ (cpi->oxcf.starting_buffer_level - cpi->bits_off_target),
+ (int)cpi->total_actual_bits,
+ vp9_convert_qindex_to_q(cm->base_qindex),
+ (double)vp9_dc_quant(cm->base_qindex, 0) / 4.0,
+ vp9_convert_qindex_to_q(cpi->active_best_quality),
+ vp9_convert_qindex_to_q(cpi->active_worst_quality),
+ cpi->avg_q,
+ vp9_convert_qindex_to_q(cpi->ni_av_qi),
+ vp9_convert_qindex_to_q(cpi->cq_target_quality),
+ cpi->zbin_over_quant,
+ // cpi->avg_frame_qindex, cpi->zbin_over_quant,
+ cm->refresh_golden_frame, cm->refresh_alt_ref_frame,
+ cm->frame_type, cpi->gfu_boost,
+ cpi->twopass.est_max_qcorrection_factor,
+ (int)cpi->twopass.bits_left,
+ cpi->twopass.total_left_stats->coded_error,
+ (double)cpi->twopass.bits_left /
+ cpi->twopass.total_left_stats->coded_error,
+ cpi->tot_recode_hits, recon_err, cpi->kf_boost,
+ cpi->kf_zeromotion_pct);
+ else
+ fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d"
+ "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f"
+ "%6d %5d %5d %5d %8d %8.2f %10d %10.3f"
+ "%8d %10d %10d %10d\n",
+ cpi->common.current_video_frame,
+ cpi->this_frame_target, cpi->projected_frame_size,
+ 0, //loop_size_estimate,
+ (cpi->projected_frame_size - cpi->this_frame_target),
+ (int)cpi->total_target_vs_actual,
+ (cpi->oxcf.starting_buffer_level - cpi->bits_off_target),
+ (int)cpi->total_actual_bits,
+ vp9_convert_qindex_to_q(cm->base_qindex),
+ (double)vp9_dc_quant(cm->base_qindex, 0) / 4.0,
+ vp9_convert_qindex_to_q(cpi->active_best_quality),
+ vp9_convert_qindex_to_q(cpi->active_worst_quality),
+ cpi->avg_q,
+ vp9_convert_qindex_to_q(cpi->ni_av_qi),
+ vp9_convert_qindex_to_q(cpi->cq_target_quality),
+ cpi->zbin_over_quant,
+ // cpi->avg_frame_qindex, cpi->zbin_over_quant,
+ cm->refresh_golden_frame, cm->refresh_alt_ref_frame,
+ cm->frame_type, cpi->gfu_boost,
+ cpi->twopass.est_max_qcorrection_factor,
+ (int)cpi->twopass.bits_left,
+ cpi->twopass.total_left_stats->coded_error,
+ cpi->tot_recode_hits, recon_err, cpi->kf_boost,
+ cpi->kf_zeromotion_pct);
+
+ fclose(f);
+
+ if (0) {
+ FILE *fmodes = fopen("Modes.stt", "a");
+ int i;
+
+ fprintf(fmodes, "%6d:%1d:%1d:%1d ",
+ cpi->common.current_video_frame,
+ cm->frame_type, cm->refresh_golden_frame,
+ cm->refresh_alt_ref_frame);
+
+ for (i = 0; i < MAX_MODES; i++)
+ fprintf(fmodes, "%5d ", cpi->mode_chosen_counts[i]);
+
+ fprintf(fmodes, "\n");
+
+ fclose(fmodes);
+ }
+ }
+
+#endif
+
+#if 0
+ // Debug stats for segment feature experiments.
+ print_seg_map(cpi);
+#endif
+
+ // If this was a kf or Gf note the Q
+ if ((cm->frame_type == KEY_FRAME) || cm->refresh_golden_frame || cm->refresh_alt_ref_frame)
+ cm->last_kf_gf_q = cm->base_qindex;
+
+ if (cm->refresh_golden_frame == 1)
+ cm->frame_flags = cm->frame_flags | FRAMEFLAGS_GOLDEN;
+ else
+ cm->frame_flags = cm->frame_flags&~FRAMEFLAGS_GOLDEN;
+
+ if (cm->refresh_alt_ref_frame == 1)
+ cm->frame_flags = cm->frame_flags | FRAMEFLAGS_ALTREF;
+ else
+ cm->frame_flags = cm->frame_flags&~FRAMEFLAGS_ALTREF;
+
+
+ if (cm->refresh_last_frame & cm->refresh_golden_frame) // both refreshed
+ cpi->gold_is_last = 1;
+ else if (cm->refresh_last_frame ^ cm->refresh_golden_frame) // 1 refreshed but not the other
+ cpi->gold_is_last = 0;
+
+ if (cm->refresh_last_frame & cm->refresh_alt_ref_frame) // both refreshed
+ cpi->alt_is_last = 1;
+ else if (cm->refresh_last_frame ^ cm->refresh_alt_ref_frame) // 1 refreshed but not the other
+ cpi->alt_is_last = 0;
+
+ if (cm->refresh_alt_ref_frame & cm->refresh_golden_frame) // both refreshed
+ cpi->gold_is_alt = 1;
+ else if (cm->refresh_alt_ref_frame ^ cm->refresh_golden_frame) // 1 refreshed but not the other
+ cpi->gold_is_alt = 0;
+
+ cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
+
+ if (cpi->gold_is_last)
+ cpi->ref_frame_flags &= ~VP9_GOLD_FLAG;
+
+ if (cpi->alt_is_last)
+ cpi->ref_frame_flags &= ~VP9_ALT_FLAG;
+
+ if (cpi->gold_is_alt)
+ cpi->ref_frame_flags &= ~VP9_ALT_FLAG;
+
+ if (cpi->oxcf.play_alternate && cm->refresh_alt_ref_frame && (cm->frame_type != KEY_FRAME))
+ // Update the alternate reference frame stats as appropriate.
+ update_alt_ref_frame_stats(cpi);
+ else
+ // Update the Golden frame stats as appropriate.
+ update_golden_frame_stats(cpi);
+
+ if (cm->frame_type == KEY_FRAME) {
+ // Tell the caller that the frame was coded as a key frame
+ *frame_flags = cm->frame_flags | FRAMEFLAGS_KEY;
+
+ // As this frame is a key frame the next defaults to an inter frame.
+ cm->frame_type = INTER_FRAME;
+ } else {
+ *frame_flags = cm->frame_flags&~FRAMEFLAGS_KEY;
+ }
+
+ // Clear the one shot update flags for segmentation map and mode/ref loop filter deltas.
+ xd->update_mb_segmentation_map = 0;
+ xd->update_mb_segmentation_data = 0;
+ xd->mode_ref_lf_delta_update = 0;
+
+
+ // Dont increment frame counters if this was an altref buffer update not a real frame
+ if (cm->show_frame) {
+ cm->current_video_frame++;
+ cpi->frames_since_key++;
+ }
+
+ // reset to normal state now that we are done.
+
+
+
+#if 0
+ {
+ char filename[512];
+ FILE *recon_file;
+ sprintf(filename, "enc%04d.yuv", (int) cm->current_video_frame);
+ recon_file = fopen(filename, "wb");
+ fwrite(cm->yv12_fb[cm->lst_fb_idx].buffer_alloc,
+ cm->yv12_fb[cm->lst_fb_idx].frame_size, 1, recon_file);
+ fclose(recon_file);
+ }
+#endif
+#ifdef OUTPUT_YUV_REC
+ vp9_write_yuv_rec_frame(cm);
+#endif
+
+ if (cm->show_frame) {
+ vpx_memcpy(cm->prev_mip, cm->mip,
+ (cm->mb_cols + 1) * (cm->mb_rows + 1)* sizeof(MODE_INFO));
+ } else {
+ vpx_memset(cm->prev_mip, 0,
+ (cm->mb_cols + 1) * (cm->mb_rows + 1)* sizeof(MODE_INFO));
+ }
+}
+
+static void Pass2Encode(VP9_COMP *cpi, unsigned long *size,
+ unsigned char *dest, unsigned int *frame_flags) {
+
+ if (!cpi->common.refresh_alt_ref_frame)
+ vp9_second_pass(cpi);
+
+ encode_frame_to_data_rate(cpi, size, dest, frame_flags);
+ cpi->twopass.bits_left -= 8 * *size;
+
+ if (!cpi->common.refresh_alt_ref_frame) {
+ double lower_bounds_min_rate = FRAME_OVERHEAD_BITS * cpi->oxcf.frame_rate;
+ double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth
+ * cpi->oxcf.two_pass_vbrmin_section / 100);
+
+ if (two_pass_min_rate < lower_bounds_min_rate)
+ two_pass_min_rate = lower_bounds_min_rate;
+
+ cpi->twopass.bits_left += (int64_t)(two_pass_min_rate / cpi->oxcf.frame_rate);
+ }
+}
+
+
+int vp9_receive_raw_frame(VP9_PTR ptr, unsigned int frame_flags,
+ YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
+ int64_t end_time) {
+ VP9_COMP *cpi = (VP9_COMP *) ptr;
+ VP9_COMMON *cm = &cpi->common;
+ struct vpx_usec_timer timer;
+ int res = 0;
+
+ vpx_usec_timer_start(&timer);
+ if (vp9_lookahead_push(cpi->lookahead, sd, time_stamp, end_time, frame_flags,
+ cpi->active_map_enabled ? cpi->active_map : NULL))
+ res = -1;
+ cm->clr_type = sd->clrtype;
+ vpx_usec_timer_mark(&timer);
+ cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
+
+ return res;
+}
+
+
+static int frame_is_reference(const VP9_COMP *cpi) {
+ const VP9_COMMON *cm = &cpi->common;
+ const MACROBLOCKD *xd = &cpi->mb.e_mbd;
+
+ return cm->frame_type == KEY_FRAME || cm->refresh_last_frame
+ || cm->refresh_golden_frame || cm->refresh_alt_ref_frame
+ || cm->copy_buffer_to_gf || cm->copy_buffer_to_arf
+ || cm->refresh_entropy_probs
+ || xd->mode_ref_lf_delta_update
+ || xd->update_mb_segmentation_map || xd->update_mb_segmentation_data;
+}
+
+
+int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
+ unsigned long *size, unsigned char *dest,
+ int64_t *time_stamp, int64_t *time_end, int flush) {
+ VP9_COMP *cpi = (VP9_COMP *) ptr;
+ VP9_COMMON *cm = &cpi->common;
+ struct vpx_usec_timer cmptimer;
+ YV12_BUFFER_CONFIG *force_src_buffer = NULL;
+
+ if (!cpi)
+ return -1;
+
+ vpx_usec_timer_start(&cmptimer);
+
+ cpi->source = NULL;
+
+ cpi->mb.e_mbd.allow_high_precision_mv = ALTREF_HIGH_PRECISION_MV;
+ set_mvcost(&cpi->mb);
+
+ // Should we code an alternate reference frame
+ if (cpi->oxcf.play_alternate &&
+ cpi->source_alt_ref_pending) {
+ if ((cpi->source = vp9_lookahead_peek(cpi->lookahead,
+ cpi->frames_till_gf_update_due))) {
+ cpi->alt_ref_source = cpi->source;
+ if (cpi->oxcf.arnr_max_frames > 0) {
+ vp9_temporal_filter_prepare(cpi, cpi->frames_till_gf_update_due);
+ force_src_buffer = &cpi->alt_ref_buffer;
+ }
+ cm->frames_till_alt_ref_frame = cpi->frames_till_gf_update_due;
+ cm->refresh_alt_ref_frame = 1;
+ cm->refresh_golden_frame = 0;
+ cm->refresh_last_frame = 0;
+ cm->show_frame = 0;
+ cpi->source_alt_ref_pending = FALSE; // Clear Pending altf Ref flag.
+ cpi->is_src_frame_alt_ref = 0;
+ }
+ }
+
+ if (!cpi->source) {
+ if ((cpi->source = vp9_lookahead_pop(cpi->lookahead, flush))) {
+ cm->show_frame = 1;
+
+ cpi->is_src_frame_alt_ref = cpi->alt_ref_source
+ && (cpi->source == cpi->alt_ref_source);
+
+ if (cpi->is_src_frame_alt_ref)
+ cpi->alt_ref_source = NULL;
+ }
+ }
+
+ if (cpi->source) {
+ cpi->un_scaled_source =
+ cpi->Source = force_src_buffer ? force_src_buffer : &cpi->source->img;
+ *time_stamp = cpi->source->ts_start;
+ *time_end = cpi->source->ts_end;
+ *frame_flags = cpi->source->flags;
+ } else {
+ *size = 0;
+ if (flush && cpi->pass == 1 && !cpi->twopass.first_pass_done) {
+ vp9_end_first_pass(cpi); /* get last stats packet */
+ cpi->twopass.first_pass_done = 1;
+ }
+
+ return -1;
+ }
+
+ if (cpi->source->ts_start < cpi->first_time_stamp_ever) {
+ cpi->first_time_stamp_ever = cpi->source->ts_start;
+ cpi->last_end_time_stamp_seen = cpi->source->ts_start;
+ }
+
+ // adjust frame rates based on timestamps given
+ if (!cm->refresh_alt_ref_frame) {
+ int64_t this_duration;
+ int step = 0;
+
+ if (cpi->source->ts_start == cpi->first_time_stamp_ever) {
+ this_duration = cpi->source->ts_end - cpi->source->ts_start;
+ step = 1;
+ } else {
+ int64_t last_duration;
+
+ this_duration = cpi->source->ts_end - cpi->last_end_time_stamp_seen;
+ last_duration = cpi->last_end_time_stamp_seen
+ - cpi->last_time_stamp_seen;
+ // do a step update if the duration changes by 10%
+ if (last_duration)
+ step = (int)((this_duration - last_duration) * 10 / last_duration);
+ }
+
+ if (this_duration) {
+ if (step)
+ vp9_new_frame_rate(cpi, 10000000.0 / this_duration);
+ else {
+ double avg_duration, interval;
+
+ /* Average this frame's rate into the last second's average
+ * frame rate. If we haven't seen 1 second yet, then average
+ * over the whole interval seen.
+ */
+ interval = (double)(cpi->source->ts_end
+ - cpi->first_time_stamp_ever);
+ if (interval > 10000000.0)
+ interval = 10000000;
+
+ avg_duration = 10000000.0 / cpi->oxcf.frame_rate;
+ avg_duration *= (interval - avg_duration + this_duration);
+ avg_duration /= interval;
+
+ vp9_new_frame_rate(cpi, 10000000.0 / avg_duration);
+ }
+ }
+
+ cpi->last_time_stamp_seen = cpi->source->ts_start;
+ cpi->last_end_time_stamp_seen = cpi->source->ts_end;
+ }
+
+ // start with a 0 size frame
+ *size = 0;
+
+ // Clear down mmx registers
+ vp9_clear_system_state(); // __asm emms;
+
+ cm->frame_type = INTER_FRAME;
+ cm->frame_flags = *frame_flags;
+
+#if 0
+
+ if (cm->refresh_alt_ref_frame) {
+ // cm->refresh_golden_frame = 1;
+ cm->refresh_golden_frame = 0;
+ cm->refresh_last_frame = 0;
+ } else {
+ cm->refresh_golden_frame = 0;
+ cm->refresh_last_frame = 1;
+ }
+
+#endif
+ /* find a free buffer for the new frame */
+ {
+ int i = 0;
+ for (; i < NUM_YV12_BUFFERS; i++) {
+ if (!cm->yv12_fb[i].flags) {
+ cm->new_fb_idx = i;
+ break;
+ }
+ }
+
+ assert(i < NUM_YV12_BUFFERS);
+ }
+ if (cpi->pass == 1) {
+ Pass1Encode(cpi, size, dest, frame_flags);
+ } else if (cpi->pass == 2) {
+ Pass2Encode(cpi, size, dest, frame_flags);
+ } else {
+ encode_frame_to_data_rate(cpi, size, dest, frame_flags);
+ }
+
+ if (cm->refresh_entropy_probs) {
+ if (cm->refresh_alt_ref_frame)
+ vpx_memcpy(&cm->lfc_a, &cm->fc, sizeof(cm->fc));
+ else
+ vpx_memcpy(&cm->lfc, &cm->fc, sizeof(cm->fc));
+ }
+
+ // if its a dropped frame honor the requests on subsequent frames
+ if (*size > 0) {
+ cpi->droppable = !frame_is_reference(cpi);
+
+ // return to normal state
+ cm->refresh_entropy_probs = 1;
+ cm->refresh_alt_ref_frame = 0;
+ cm->refresh_golden_frame = 0;
+ cm->refresh_last_frame = 1;
+ cm->frame_type = INTER_FRAME;
+
+ }
+
+ vpx_usec_timer_mark(&cmptimer);
+ cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer);
+
+ if (cpi->b_calculate_psnr && cpi->pass != 1 && cm->show_frame) {
+ generate_psnr_packet(cpi);
+ }
+
+#if CONFIG_INTERNAL_STATS
+
+ if (cpi->pass != 1) {
+ cpi->bytes += *size;
+
+ if (cm->show_frame) {
+
+ cpi->count++;
+
+ if (cpi->b_calculate_psnr) {
+ double ye, ue, ve;
+ double frame_psnr;
+ YV12_BUFFER_CONFIG *orig = cpi->Source;
+ YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show;
+ YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer;
+ int y_samples = orig->y_height * orig->y_width;
+ int uv_samples = orig->uv_height * orig->uv_width;
+ int t_samples = y_samples + 2 * uv_samples;
+ double sq_error;
+
+ ye = (double)calc_plane_error(orig->y_buffer, orig->y_stride,
+ recon->y_buffer, recon->y_stride, orig->y_width,
+ orig->y_height);
+
+ ue = (double)calc_plane_error(orig->u_buffer, orig->uv_stride,
+ recon->u_buffer, recon->uv_stride, orig->uv_width,
+ orig->uv_height);
+
+ ve = (double)calc_plane_error(orig->v_buffer, orig->uv_stride,
+ recon->v_buffer, recon->uv_stride, orig->uv_width,
+ orig->uv_height);
+
+ sq_error = ye + ue + ve;
+
+ frame_psnr = vp9_mse2psnr(t_samples, 255.0, sq_error);
+
+ cpi->total_y += vp9_mse2psnr(y_samples, 255.0, ye);
+ cpi->total_u += vp9_mse2psnr(uv_samples, 255.0, ue);
+ cpi->total_v += vp9_mse2psnr(uv_samples, 255.0, ve);
+ cpi->total_sq_error += sq_error;
+ cpi->total += frame_psnr;
+ {
+ double frame_psnr2, frame_ssim2 = 0;
+ double weight = 0;
+#if CONFIG_POSTPROC
+ vp9_deblock(cm->frame_to_show, &cm->post_proc_buffer,
+ cm->filter_level * 10 / 6, 1, 0);
+#endif
+ vp9_clear_system_state();
+
+ ye = (double)calc_plane_error(orig->y_buffer, orig->y_stride,
+ pp->y_buffer, pp->y_stride, orig->y_width,
+ orig->y_height);
+
+ ue = (double)calc_plane_error(orig->u_buffer, orig->uv_stride,
+ pp->u_buffer, pp->uv_stride, orig->uv_width,
+ orig->uv_height);
+
+ ve = (double)calc_plane_error(orig->v_buffer, orig->uv_stride,
+ pp->v_buffer, pp->uv_stride, orig->uv_width,
+ orig->uv_height);
+
+ sq_error = ye + ue + ve;
+
+ frame_psnr2 = vp9_mse2psnr(t_samples, 255.0, sq_error);
+
+ cpi->totalp_y += vp9_mse2psnr(y_samples, 255.0, ye);
+ cpi->totalp_u += vp9_mse2psnr(uv_samples, 255.0, ue);
+ cpi->totalp_v += vp9_mse2psnr(uv_samples, 255.0, ve);
+ cpi->total_sq_error2 += sq_error;
+ cpi->totalp += frame_psnr2;
+
+ frame_ssim2 = vp9_calc_ssim(cpi->Source,
+ &cm->post_proc_buffer, 1, &weight);
+
+ cpi->summed_quality += frame_ssim2 * weight;
+ cpi->summed_weights += weight;
+#if 0
+ {
+ FILE *f = fopen("q_used.stt", "a");
+ fprintf(f, "%5d : Y%f7.3:U%f7.3:V%f7.3:F%f7.3:S%7.3f\n",
+ cpi->common.current_video_frame, y2, u2, v2,
+ frame_psnr2, frame_ssim2);
+ fclose(f);
+ }
+#endif
+ }
+ }
+
+ if (cpi->b_calculate_ssimg) {
+ double y, u, v, frame_all;
+ frame_all = vp9_calc_ssimg(cpi->Source, cm->frame_to_show,
+ &y, &u, &v);
+ cpi->total_ssimg_y += y;
+ cpi->total_ssimg_u += u;
+ cpi->total_ssimg_v += v;
+ cpi->total_ssimg_all += frame_all;
+ }
+
+ }
+ }
+
+#endif
+
+ return 0;
+}
+
+int vp9_get_preview_raw_frame(VP9_PTR comp, YV12_BUFFER_CONFIG *dest,
+ vp9_ppflags_t *flags) {
+ VP9_COMP *cpi = (VP9_COMP *) comp;
+
+ if (cpi->common.refresh_alt_ref_frame)
+ return -1;
+ else {
+ int ret;
+#if CONFIG_POSTPROC
+ ret = vp9_post_proc_frame(&cpi->common, dest, flags);
+#else
+
+ if (cpi->common.frame_to_show) {
+ *dest = *cpi->common.frame_to_show;
+ dest->y_width = cpi->common.Width;
+ dest->y_height = cpi->common.Height;
+ dest->uv_height = cpi->common.Height / 2;
+ ret = 0;
+ } else {
+ ret = -1;
+ }
+
+#endif // !CONFIG_POSTPROC
+ vp9_clear_system_state();
+ return ret;
+ }
+}
+
+int vp9_set_roimap(VP9_PTR comp, unsigned char *map, unsigned int rows,
+ unsigned int cols, int delta_q[4], int delta_lf[4],
+ unsigned int threshold[4]) {
+ VP9_COMP *cpi = (VP9_COMP *) comp;
+ signed char feature_data[SEG_LVL_MAX][MAX_MB_SEGMENTS];
+ MACROBLOCKD *xd = &cpi->mb.e_mbd;
+ int i;
+
+ if (cpi->common.mb_rows != rows || cpi->common.mb_cols != cols)
+ return -1;
+
+ if (!map) {
+ vp9_disable_segmentation((VP9_PTR)cpi);
+ return 0;
+ }
+
+ // Set the segmentation Map
+ vp9_set_segmentation_map((VP9_PTR)cpi, map);
+
+ // Activate segmentation.
+ vp9_enable_segmentation((VP9_PTR)cpi);
+
+ // Set up the quant segment data
+ feature_data[SEG_LVL_ALT_Q][0] = delta_q[0];
+ feature_data[SEG_LVL_ALT_Q][1] = delta_q[1];
+ feature_data[SEG_LVL_ALT_Q][2] = delta_q[2];
+ feature_data[SEG_LVL_ALT_Q][3] = delta_q[3];
+
+ // Set up the loop segment data s
+ feature_data[SEG_LVL_ALT_LF][0] = delta_lf[0];
+ feature_data[SEG_LVL_ALT_LF][1] = delta_lf[1];
+ feature_data[SEG_LVL_ALT_LF][2] = delta_lf[2];
+ feature_data[SEG_LVL_ALT_LF][3] = delta_lf[3];
+
+ cpi->segment_encode_breakout[0] = threshold[0];
+ cpi->segment_encode_breakout[1] = threshold[1];
+ cpi->segment_encode_breakout[2] = threshold[2];
+ cpi->segment_encode_breakout[3] = threshold[3];
+
+ // Enable the loop and quant changes in the feature mask
+ for (i = 0; i < 4; i++) {
+ if (delta_q[i])
+ vp9_enable_segfeature(xd, i, SEG_LVL_ALT_Q);
+ else
+ vp9_disable_segfeature(xd, i, SEG_LVL_ALT_Q);
+
+ if (delta_lf[i])
+ vp9_enable_segfeature(xd, i, SEG_LVL_ALT_LF);
+ else
+ vp9_disable_segfeature(xd, i, SEG_LVL_ALT_LF);
+ }
+
+ // Initialise the feature data structure
+ // SEGMENT_DELTADATA 0, SEGMENT_ABSDATA 1
+ vp9_set_segment_data((VP9_PTR)cpi, &feature_data[0][0], SEGMENT_DELTADATA);
+
+ return 0;
+}
+
+int vp9_set_active_map(VP9_PTR comp, unsigned char *map,
+ unsigned int rows, unsigned int cols) {
+ VP9_COMP *cpi = (VP9_COMP *) comp;
+
+ if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) {
+ if (map) {
+ vpx_memcpy(cpi->active_map, map, rows * cols);
+ cpi->active_map_enabled = 1;
+ } else
+ cpi->active_map_enabled = 0;
+
+ return 0;
+ } else {
+ // cpi->active_map_enabled = 0;
+ return -1;
+ }
+}
+
+int vp9_set_internal_size(VP9_PTR comp,
+ VPX_SCALING horiz_mode, VPX_SCALING vert_mode) {
+ VP9_COMP *cpi = (VP9_COMP *) comp;
+
+ if (horiz_mode <= ONETWO)
+ cpi->common.horiz_scale = horiz_mode;
+ else
+ return -1;
+
+ if (vert_mode <= ONETWO)
+ cpi->common.vert_scale = vert_mode;
+ else
+ return -1;
+
+ return 0;
+}
+
+
+
+int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest) {
+ int i, j;
+ int Total = 0;
+
+ unsigned char *src = source->y_buffer;
+ unsigned char *dst = dest->y_buffer;
+
+ // Loop through the Y plane raw and reconstruction data summing (square differences)
+ for (i = 0; i < source->y_height; i += 16) {
+ for (j = 0; j < source->y_width; j += 16) {
+ unsigned int sse;
+ Total += vp9_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride,
+ &sse);
+ }
+
+ src += 16 * source->y_stride;
+ dst += 16 * dest->y_stride;
+ }
+
+ return Total;
+}
+
+
+int vp9_get_quantizer(VP9_PTR c) {
+ VP9_COMP *cpi = (VP9_COMP *) c;
+ return cpi->common.base_qindex;
+}
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
new file mode 100644
index 0000000..0ccf308
--- /dev/null
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -0,0 +1,828 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_ENCODER_VP9_ONYX_INT_H_
+#define VP9_ENCODER_VP9_ONYX_INT_H_
+
+#include <stdio.h>
+#include "./vpx_config.h"
+#include "vp9/common/vp9_onyx.h"
+#include "vp9/encoder/vp9_treewriter.h"
+#include "vp9/encoder/vp9_tokenize.h"
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/encoder/vp9_variance.h"
+#include "vp9/encoder/vp9_encodemb.h"
+#include "vp9/encoder/vp9_quantize.h"
+#include "vp9/common/vp9_entropy.h"
+#include "vp9/common/vp9_entropymode.h"
+#include "vpx_ports/mem.h"
+#include "vpx/internal/vpx_codec_internal.h"
+#include "vp9/encoder/vp9_mcomp.h"
+#include "vp9/common/vp9_findnearmv.h"
+#include "vp9/encoder/vp9_lookahead.h"
+
+// #define SPEEDSTATS 1
+#define MIN_GF_INTERVAL 4
+#define DEFAULT_GF_INTERVAL 7
+
+#define KEY_FRAME_CONTEXT 5
+
+#define MAX_LAG_BUFFERS 25
+
+#define AF_THRESH 25
+#define AF_THRESH2 100
+#define ARF_DECAY_THRESH 12
+
+#if CONFIG_PRED_FILTER
+#if CONFIG_COMP_INTERINTRA_PRED
+#define MAX_MODES 66
+#else
+#define MAX_MODES 54
+#endif
+#else // CONFIG_PRED_FILTER
+#if CONFIG_COMP_INTERINTRA_PRED
+#define MAX_MODES 54
+#else
+#define MAX_MODES 42
+#endif
+#endif // CONFIG_PRED_FILTER
+
+#define MIN_THRESHMULT 32
+#define MAX_THRESHMULT 512
+
+#define GF_ZEROMV_ZBIN_BOOST 12
+#define LF_ZEROMV_ZBIN_BOOST 6
+#define MV_ZBIN_BOOST 4
+#define ZBIN_OQ_MAX 192
+
+#define VP9_TEMPORAL_ALT_REF 1
+
+typedef struct {
+ nmv_context nmvc;
+ int nmvjointcost[MV_JOINTS];
+ int nmvcosts[2][MV_VALS];
+ int nmvcosts_hp[2][MV_VALS];
+
+#ifdef MODE_STATS
+ // Stats
+ int y_modes[VP9_YMODES];
+ int uv_modes[VP9_UV_MODES];
+ int i8x8_modes[VP9_I8X8_MODES];
+ int b_modes[B_MODE_COUNT];
+ int inter_y_modes[MB_MODE_COUNT];
+ int inter_uv_modes[VP9_UV_MODES];
+ int inter_b_modes[B_MODE_COUNT];
+#endif
+
+ vp9_prob segment_pred_probs[PREDICTION_PROBS];
+ unsigned char ref_pred_probs_update[PREDICTION_PROBS];
+ vp9_prob ref_pred_probs[PREDICTION_PROBS];
+ vp9_prob prob_comppred[COMP_PRED_CONTEXTS];
+
+ unsigned char *last_frame_seg_map_copy;
+
+ // 0 = Intra, Last, GF, ARF
+ signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS];
+ // 0 = BPRED, ZERO_MV, MV, SPLIT
+ signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];
+
+ vp9_prob coef_probs[BLOCK_TYPES]
+ [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
+ vp9_prob hybrid_coef_probs[BLOCK_TYPES]
+ [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
+
+ vp9_prob coef_probs_8x8[BLOCK_TYPES_8X8]
+ [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
+ vp9_prob hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]
+ [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
+
+ vp9_prob coef_probs_16x16[BLOCK_TYPES_16X16]
+ [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
+ vp9_prob hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]
+ [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
+
+#if CONFIG_SUPERBLOCKS
+ vp9_prob sb_ymode_prob[VP9_I32X32_MODES - 1];
+#endif
+ vp9_prob ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */
+ vp9_prob uv_mode_prob[VP9_YMODES][VP9_UV_MODES - 1];
+ vp9_prob bmode_prob[VP9_NKF_BINTRAMODES - 1];
+ vp9_prob i8x8_mode_prob[VP9_I8X8_MODES - 1];
+ vp9_prob sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1];
+ vp9_prob mbsplit_prob[VP9_NUMMBSPLITS - 1];
+
+ vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1]
+ [VP9_SWITCHABLE_FILTERS - 1];
+#if CONFIG_COMP_INTERINTRA_PRED
+ vp9_prob interintra_prob;
+#endif
+
+ int mv_ref_ct[INTER_MODE_CONTEXTS][4][2];
+ int vp9_mode_contexts[INTER_MODE_CONTEXTS][4];
+
+} CODING_CONTEXT;
+
+typedef struct {
+ double frame;
+ double intra_error;
+ double coded_error;
+ double sr_coded_error;
+ double ssim_weighted_pred_err;
+ double pcnt_inter;
+ double pcnt_motion;
+ double pcnt_second_ref;
+ double pcnt_neutral;
+ double MVr;
+ double mvr_abs;
+ double MVc;
+ double mvc_abs;
+ double MVrv;
+ double MVcv;
+ double mv_in_out_count;
+ double new_mv_count;
+ double duration;
+ double count;
+}
+FIRSTPASS_STATS;
+
+typedef struct {
+ int frames_so_far;
+ double frame_intra_error;
+ double frame_coded_error;
+ double frame_pcnt_inter;
+ double frame_pcnt_motion;
+ double frame_mvr;
+ double frame_mvr_abs;
+ double frame_mvc;
+ double frame_mvc_abs;
+
+} ONEPASS_FRAMESTATS;
+
+typedef struct {
+ struct {
+ int err;
+ union {
+ int_mv mv;
+ MB_PREDICTION_MODE mode;
+ } m;
+ } ref[MAX_REF_FRAMES];
+} MBGRAPH_MB_STATS;
+
+typedef struct {
+ MBGRAPH_MB_STATS *mb_stats;
+} MBGRAPH_FRAME_STATS;
+
+#if CONFIG_PRED_FILTER
+typedef enum {
+ THR_ZEROMV,
+ THR_ZEROMV_FILT,
+ THR_DC,
+
+ THR_NEARESTMV,
+ THR_NEARESTMV_FILT,
+ THR_NEARMV,
+ THR_NEARMV_FILT,
+
+ THR_ZEROG,
+ THR_ZEROG_FILT,
+ THR_NEARESTG,
+ THR_NEARESTG_FILT,
+
+ THR_ZEROA,
+ THR_ZEROA_FILT,
+ THR_NEARESTA,
+ THR_NEARESTA_FILT,
+
+ THR_NEARG,
+ THR_NEARG_FILT,
+ THR_NEARA,
+ THR_NEARA_FILT,
+
+ THR_V_PRED,
+ THR_H_PRED,
+ THR_D45_PRED,
+ THR_D135_PRED,
+ THR_D117_PRED,
+ THR_D153_PRED,
+ THR_D27_PRED,
+ THR_D63_PRED,
+ THR_TM,
+
+ THR_NEWMV,
+ THR_NEWMV_FILT,
+ THR_NEWG,
+ THR_NEWG_FILT,
+ THR_NEWA,
+ THR_NEWA_FILT,
+
+ THR_SPLITMV,
+ THR_SPLITG,
+ THR_SPLITA,
+
+ THR_B_PRED,
+ THR_I8X8_PRED,
+
+ THR_COMP_ZEROLG,
+ THR_COMP_NEARESTLG,
+ THR_COMP_NEARLG,
+
+ THR_COMP_ZEROLA,
+ THR_COMP_NEARESTLA,
+ THR_COMP_NEARLA,
+
+ THR_COMP_ZEROGA,
+ THR_COMP_NEARESTGA,
+ THR_COMP_NEARGA,
+
+ THR_COMP_NEWLG,
+ THR_COMP_NEWLA,
+ THR_COMP_NEWGA,
+
+ THR_COMP_SPLITLG,
+ THR_COMP_SPLITLA,
+ THR_COMP_SPLITGA,
+#if CONFIG_COMP_INTERINTRA_PRED
+ THR_COMP_INTERINTRA_ZEROL,
+ THR_COMP_INTERINTRA_NEARESTL,
+ THR_COMP_INTERINTRA_NEARL,
+ THR_COMP_INTERINTRA_NEWL,
+
+ THR_COMP_INTERINTRA_ZEROG,
+ THR_COMP_INTERINTRA_NEARESTG,
+ THR_COMP_INTERINTRA_NEARG,
+ THR_COMP_INTERINTRA_NEWG,
+
+ THR_COMP_INTERINTRA_ZEROA,
+ THR_COMP_INTERINTRA_NEARESTA,
+ THR_COMP_INTERINTRA_NEARA,
+ THR_COMP_INTERINTRA_NEWA,
+#endif
+}
+THR_MODES;
+#else
+typedef enum {
+ THR_ZEROMV,
+ THR_DC,
+
+ THR_NEARESTMV,
+ THR_NEARMV,
+
+ THR_ZEROG,
+ THR_NEARESTG,
+
+ THR_ZEROA,
+ THR_NEARESTA,
+
+ THR_NEARG,
+ THR_NEARA,
+
+ THR_V_PRED,
+ THR_H_PRED,
+ THR_D45_PRED,
+ THR_D135_PRED,
+ THR_D117_PRED,
+ THR_D153_PRED,
+ THR_D27_PRED,
+ THR_D63_PRED,
+ THR_TM,
+
+ THR_NEWMV,
+ THR_NEWG,
+ THR_NEWA,
+
+ THR_SPLITMV,
+ THR_SPLITG,
+ THR_SPLITA,
+
+ THR_B_PRED,
+ THR_I8X8_PRED,
+
+ THR_COMP_ZEROLG,
+ THR_COMP_NEARESTLG,
+ THR_COMP_NEARLG,
+
+ THR_COMP_ZEROLA,
+ THR_COMP_NEARESTLA,
+ THR_COMP_NEARLA,
+
+ THR_COMP_ZEROGA,
+ THR_COMP_NEARESTGA,
+ THR_COMP_NEARGA,
+
+ THR_COMP_NEWLG,
+ THR_COMP_NEWLA,
+ THR_COMP_NEWGA,
+
+ THR_COMP_SPLITLG,
+ THR_COMP_SPLITLA,
+ THR_COMP_SPLITGA,
+#if CONFIG_COMP_INTERINTRA_PRED
+ THR_COMP_INTERINTRA_ZEROL,
+ THR_COMP_INTERINTRA_NEARESTL,
+ THR_COMP_INTERINTRA_NEARL,
+ THR_COMP_INTERINTRA_NEWL,
+
+ THR_COMP_INTERINTRA_ZEROG,
+ THR_COMP_INTERINTRA_NEARESTG,
+ THR_COMP_INTERINTRA_NEARG,
+ THR_COMP_INTERINTRA_NEWG,
+
+ THR_COMP_INTERINTRA_ZEROA,
+ THR_COMP_INTERINTRA_NEARESTA,
+ THR_COMP_INTERINTRA_NEARA,
+ THR_COMP_INTERINTRA_NEWA,
+#endif
+}
+THR_MODES;
+#endif
+
+typedef enum {
+ DIAMOND = 0,
+ NSTEP = 1,
+ HEX = 2
+} SEARCH_METHODS;
+
+typedef struct {
+ int RD;
+ SEARCH_METHODS search_method;
+ int improved_dct;
+ int auto_filter;
+ int recode_loop;
+ int iterative_sub_pixel;
+ int half_pixel_search;
+ int quarter_pixel_search;
+ int thresh_mult[MAX_MODES];
+ int max_step_search_steps;
+ int first_step;
+ int optimize_coefficients;
+ int no_skip_block4x4_search;
+ int improved_mv_pred;
+ int search_best_filter;
+
+} SPEED_FEATURES;
+
+typedef struct {
+ MACROBLOCK mb;
+ int totalrate;
+} MB_ROW_COMP;
+
+typedef struct {
+ TOKENEXTRA *start;
+ TOKENEXTRA *stop;
+} TOKENLIST;
+
+typedef struct {
+ int ithread;
+ void *ptr1;
+ void *ptr2;
+} ENCODETHREAD_DATA;
+typedef struct {
+ int ithread;
+ void *ptr1;
+} LPFTHREAD_DATA;
+
+enum BlockSize {
+ BLOCK_16X8 = PARTITIONING_16X8,
+ BLOCK_8X16 = PARTITIONING_8X16,
+ BLOCK_8X8 = PARTITIONING_8X8,
+ BLOCK_4X4 = PARTITIONING_4X4,
+ BLOCK_16X16,
+ BLOCK_MAX_SEGMENTS,
+ BLOCK_32X32 = BLOCK_MAX_SEGMENTS,
+ BLOCK_MAX_SB_SEGMENTS,
+};
+
+typedef struct VP9_COMP {
+
+ DECLARE_ALIGNED(16, short, Y1quant[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, unsigned char, Y1quant_shift[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][16]);
+
+ DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, unsigned char, Y2quant_shift[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][16]);
+
+ DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, unsigned char, UVquant_shift[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][16]);
+
+ DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv[QINDEX_RANGE][16]);
+
+ DECLARE_ALIGNED(64, short, Y1zbin_8x8[QINDEX_RANGE][64]);
+ DECLARE_ALIGNED(64, short, Y2zbin_8x8[QINDEX_RANGE][64]);
+ DECLARE_ALIGNED(64, short, UVzbin_8x8[QINDEX_RANGE][64]);
+ DECLARE_ALIGNED(64, short, zrun_zbin_boost_y1_8x8[QINDEX_RANGE][64]);
+ DECLARE_ALIGNED(64, short, zrun_zbin_boost_y2_8x8[QINDEX_RANGE][64]);
+ DECLARE_ALIGNED(64, short, zrun_zbin_boost_uv_8x8[QINDEX_RANGE][64]);
+
+ DECLARE_ALIGNED(16, short, Y1zbin_16x16[QINDEX_RANGE][256]);
+ DECLARE_ALIGNED(16, short, Y2zbin_16x16[QINDEX_RANGE][256]);
+ DECLARE_ALIGNED(16, short, UVzbin_16x16[QINDEX_RANGE][256]);
+ DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1_16x16[QINDEX_RANGE][256]);
+ DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2_16x16[QINDEX_RANGE][256]);
+ DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv_16x16[QINDEX_RANGE][256]);
+
+ MACROBLOCK mb;
+ VP9_COMMON common;
+ VP9_CONFIG oxcf;
+
+ struct lookahead_ctx *lookahead;
+ struct lookahead_entry *source;
+ struct lookahead_entry *alt_ref_source;
+
+ YV12_BUFFER_CONFIG *Source;
+ YV12_BUFFER_CONFIG *un_scaled_source;
+ YV12_BUFFER_CONFIG scaled_source;
+
+ int source_alt_ref_pending; // frame in src_buffers has been identified to be encoded as an alt ref
+ int source_alt_ref_active; // an alt ref frame has been encoded and is usable
+
+ int is_src_frame_alt_ref; // source of frame to encode is an exact copy of an alt ref frame
+
+ int gold_is_last; // golden frame same as last frame ( short circuit gold searches)
+ int alt_is_last; // Alt reference frame same as last ( short circuit altref search)
+ int gold_is_alt; // don't do both alt and gold search ( just do gold).
+
+ // int refresh_alt_ref_frame;
+ YV12_BUFFER_CONFIG last_frame_uf;
+
+ TOKENEXTRA *tok;
+ unsigned int tok_count;
+
+
+ unsigned int frames_since_key;
+ unsigned int key_frame_frequency;
+ unsigned int this_key_frame_forced;
+ unsigned int next_key_frame_forced;
+
+ // Ambient reconstruction err target for force key frames
+ int ambient_err;
+
+ unsigned int mode_check_freq[MAX_MODES];
+ unsigned int mode_test_hit_counts[MAX_MODES];
+ unsigned int mode_chosen_counts[MAX_MODES];
+
+ int rd_thresh_mult[MAX_MODES];
+ int rd_baseline_thresh[MAX_MODES];
+ int rd_threshes[MAX_MODES];
+ int64_t rd_comp_pred_diff[NB_PREDICTION_TYPES];
+ int rd_prediction_type_threshes[4][NB_PREDICTION_TYPES];
+ int comp_pred_count[COMP_PRED_CONTEXTS];
+ int single_pred_count[COMP_PRED_CONTEXTS];
+ // FIXME contextualize
+ int txfm_count[TX_SIZE_MAX];
+ int txfm_count_8x8p[TX_SIZE_MAX - 1];
+ int64_t rd_tx_select_diff[NB_TXFM_MODES];
+ int rd_tx_select_threshes[4][NB_TXFM_MODES];
+
+ int RDMULT;
+ int RDDIV;
+
+ CODING_CONTEXT coding_context;
+
+ // Rate targetting variables
+ int64_t prediction_error;
+ int64_t last_prediction_error;
+ int64_t intra_error;
+ int64_t last_intra_error;
+
+ int this_frame_target;
+ int projected_frame_size;
+ int last_q[2]; // Separate values for Intra/Inter
+ int last_boosted_qindex; // Last boosted GF/KF/ARF q
+
+ double rate_correction_factor;
+ double key_frame_rate_correction_factor;
+ double gf_rate_correction_factor;
+
+ int frames_till_gf_update_due; // Count down till next GF
+ int current_gf_interval; // GF interval chosen when we coded the last GF
+
+ int gf_overspend_bits; // Total bits overspent becasue of GF boost (cumulative)
+
+ int non_gf_bitrate_adjustment; // Used in the few frames following a GF to recover the extra bits spent in that GF
+
+ int kf_overspend_bits; // Extra bits spent on key frames that need to be recovered on inter frames
+ int kf_bitrate_adjustment; // Current number of bit s to try and recover on each inter frame.
+ int max_gf_interval;
+ int baseline_gf_interval;
+ int active_arnr_frames; // <= cpi->oxcf.arnr_max_frames
+
+ int64_t key_frame_count;
+ int prior_key_frame_distance[KEY_FRAME_CONTEXT];
+ int per_frame_bandwidth; // Current section per frame bandwidth target
+ int av_per_frame_bandwidth; // Average frame size target for clip
+ int min_frame_bandwidth; // Minimum allocation that should be used for any frame
+ int inter_frame_target;
+ double output_frame_rate;
+ int64_t last_time_stamp_seen;
+ int64_t last_end_time_stamp_seen;
+ int64_t first_time_stamp_ever;
+
+ int ni_av_qi;
+ int ni_tot_qi;
+ int ni_frames;
+ int avg_frame_qindex;
+ double tot_q;
+ double avg_q;
+
+ int zbin_over_quant;
+ int zbin_mode_boost;
+ int zbin_mode_boost_enabled;
+
+ int64_t total_byte_count;
+
+ int buffered_mode;
+
+ int buffer_level;
+ int bits_off_target;
+
+ int rolling_target_bits;
+ int rolling_actual_bits;
+
+ int long_rolling_target_bits;
+ int long_rolling_actual_bits;
+
+ int64_t total_actual_bits;
+ int total_target_vs_actual; // debug stats
+
+ int worst_quality;
+ int active_worst_quality;
+ int best_quality;
+ int active_best_quality;
+
+ int cq_target_quality;
+
+#if CONFIG_SUPERBLOCKS
+ int sb_count;
+ int sb_ymode_count [VP9_I32X32_MODES];
+#endif
+ int ymode_count[VP9_YMODES]; /* intra MB type cts this frame */
+ int bmode_count[VP9_NKF_BINTRAMODES];
+ int i8x8_mode_count[VP9_I8X8_MODES];
+ int sub_mv_ref_count[SUBMVREF_COUNT][VP9_SUBMVREFS];
+ int mbsplit_count[VP9_NUMMBSPLITS];
+ int y_uv_mode_count[VP9_YMODES][VP9_UV_MODES];
+#if CONFIG_COMP_INTERINTRA_PRED
+ unsigned int interintra_count[2];
+ unsigned int interintra_select_count[2];
+#endif
+
+ nmv_context_counts NMVcount;
+
+ unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */
+ vp9_prob frame_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+ unsigned int frame_branch_ct [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2];
+ unsigned int hybrid_coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */
+ vp9_prob frame_hybrid_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+ unsigned int frame_hybrid_branch_ct [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2];
+
+ unsigned int coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */
+ vp9_prob frame_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+ unsigned int frame_branch_ct_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2];
+ unsigned int hybrid_coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */
+ vp9_prob frame_hybrid_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+ unsigned int frame_hybrid_branch_ct_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2];
+
+ unsigned int coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */
+ vp9_prob frame_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+ unsigned int frame_branch_ct_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2];
+ unsigned int hybrid_coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */
+ vp9_prob frame_hybrid_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+ unsigned int frame_hybrid_branch_ct_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2];
+
+ int gfu_boost;
+ int last_boost;
+ int kf_boost;
+ int kf_zeromotion_pct;
+
+ int target_bandwidth;
+ struct vpx_codec_pkt_list *output_pkt_list;
+
+#if 0
+ // Experimental code for lagged and one pass
+ ONEPASS_FRAMESTATS one_pass_frame_stats[MAX_LAG_BUFFERS];
+ int one_pass_frame_index;
+#endif
+ MBGRAPH_FRAME_STATS mbgraph_stats[MAX_LAG_BUFFERS];
+ int mbgraph_n_frames; // number of frames filled in the above
+ int static_mb_pct; // % forced skip mbs by segmentation
+ int seg0_progress, seg0_idx, seg0_cnt;
+ int ref_pred_count[3][2];
+
+ int decimation_factor;
+ int decimation_count;
+
+ // for real time encoding
+ int avg_encode_time; // microsecond
+ int avg_pick_mode_time; // microsecond
+ int Speed;
+ unsigned int cpu_freq; // Mhz
+ int compressor_speed;
+
+ int interquantizer;
+ int goldfreq;
+ int auto_worst_q;
+ int cpu_used;
+ int horiz_scale;
+ int vert_scale;
+ int pass;
+
+ vp9_prob last_skip_false_probs[3][MBSKIP_CONTEXTS];
+ int last_skip_probs_q[3];
+
+ int recent_ref_frame_usage[MAX_REF_FRAMES];
+ int count_mb_ref_frame_usage[MAX_REF_FRAMES];
+ int ref_frame_flags;
+
+ unsigned char ref_pred_probs_update[PREDICTION_PROBS];
+
+ SPEED_FEATURES sf;
+ int error_bins[1024];
+
+ // Data used for real time conferencing mode to help determine if it would be good to update the gf
+ int inter_zz_count;
+ int gf_bad_count;
+ int gf_update_recommended;
+ int skip_true_count[3];
+ int skip_false_count[3];
+
+ unsigned char *segmentation_map;
+
+ // segment threashold for encode breakout
+ int segment_encode_breakout[MAX_MB_SEGMENTS];
+
+ unsigned char *active_map;
+ unsigned int active_map_enabled;
+
+ TOKENLIST *tplist;
+
+ fractional_mv_step_fp *find_fractional_mv_step;
+ vp9_full_search_fn_t full_search_sad;
+ vp9_refining_search_fn_t refining_search_sad;
+ vp9_diamond_search_fn_t diamond_search_sad;
+ vp9_variance_fn_ptr_t fn_ptr[BLOCK_MAX_SB_SEGMENTS];
+ uint64_t time_receive_data;
+ uint64_t time_compress_data;
+ uint64_t time_pick_lpf;
+ uint64_t time_encode_mb_row;
+
+ int base_skip_false_prob[QINDEX_RANGE][3];
+
+ struct twopass_rc {
+ unsigned int section_intra_rating;
+ unsigned int next_iiratio;
+ unsigned int this_iiratio;
+ FIRSTPASS_STATS *total_stats;
+ FIRSTPASS_STATS *this_frame_stats;
+ FIRSTPASS_STATS *stats_in, *stats_in_end, *stats_in_start;
+ FIRSTPASS_STATS *total_left_stats;
+ int first_pass_done;
+ int64_t bits_left;
+ int64_t clip_bits_total;
+ double avg_iiratio;
+ double modified_error_total;
+ double modified_error_used;
+ double modified_error_left;
+ double kf_intra_err_min;
+ double gf_intra_err_min;
+ int frames_to_key;
+ int maxq_max_limit;
+ int maxq_min_limit;
+ int static_scene_max_gf_interval;
+ int kf_bits;
+ // Remaining error from uncoded frames in a gf group. Two pass use only
+ int64_t gf_group_error_left;
+
+ // Projected total bits available for a key frame group of frames
+ int64_t kf_group_bits;
+
+ // Error score of frames still to be coded in kf group
+ int64_t kf_group_error_left;
+
+ // Projected Bits available for a group of frames including 1 GF or ARF
+ int64_t gf_group_bits;
+ // Bits for the golden frame or ARF - 2 pass only
+ int gf_bits;
+ int alt_extra_bits;
+
+ int sr_update_lag;
+ double est_max_qcorrection_factor;
+ } twopass;
+
+#if VP9_TEMPORAL_ALT_REF
+ YV12_BUFFER_CONFIG alt_ref_buffer;
+ YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS];
+ int fixed_divide[512];
+#endif
+
+#if CONFIG_INTERNAL_STATS
+ int count;
+ double total_y;
+ double total_u;
+ double total_v;
+ double total;
+ double total_sq_error;
+ double totalp_y;
+ double totalp_u;
+ double totalp_v;
+ double totalp;
+ double total_sq_error2;
+ int bytes;
+ double summed_quality;
+ double summed_weights;
+ unsigned int tot_recode_hits;
+
+
+ double total_ssimg_y;
+ double total_ssimg_u;
+ double total_ssimg_v;
+ double total_ssimg_all;
+
+ int b_calculate_ssimg;
+#endif
+ int b_calculate_psnr;
+
+ // Per MB activity measurement
+ unsigned int activity_avg;
+ unsigned int *mb_activity_map;
+ int *mb_norm_activity_map;
+
+ // Record of which MBs still refer to last golden frame either
+ // directly or through 0,0
+ unsigned char *gf_active_flags;
+ int gf_active_count;
+
+ int output_partition;
+
+ // Store last frame's MV info for next frame MV prediction
+ int_mv *lfmv;
+ int *lf_ref_frame_sign_bias;
+ int *lf_ref_frame;
+
+ /* force next frame to intra when kf_auto says so */
+ int force_next_frame_intra;
+
+ int droppable;
+
+ // TODO Do we still need this??
+ int update_context;
+
+ int dummy_packing; /* flag to indicate if packing is dummy */
+
+#if CONFIG_PRED_FILTER
+ int pred_filter_on_count;
+ int pred_filter_off_count;
+#endif
+ unsigned int switchable_interp_count[VP9_SWITCHABLE_FILTERS + 1]
+ [VP9_SWITCHABLE_FILTERS];
+#if CONFIG_NEW_MVREF
+ unsigned int best_ref_index_counts[MAX_REF_FRAMES][MAX_MV_REFS];
+#endif
+
+} VP9_COMP;
+
+void vp9_encode_frame(VP9_COMP *cpi);
+
+void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
+ unsigned long *size);
+
+void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x);
+
+void vp9_set_speed_features(VP9_COMP *cpi);
+
+extern int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source,
+ YV12_BUFFER_CONFIG *dest);
+
+extern void vp9_alloc_compressor_data(VP9_COMP *cpi);
+
+#if CONFIG_DEBUG
+#define CHECK_MEM_ERROR(lval,expr) do {\
+ lval = (expr); \
+ if(!lval) \
+ vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,\
+ "Failed to allocate "#lval" at %s:%d", \
+ __FILE__,__LINE__);\
+ } while(0)
+#else
+#define CHECK_MEM_ERROR(lval,expr) do {\
+ lval = (expr); \
+ if(!lval) \
+ vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,\
+ "Failed to allocate "#lval);\
+ } while(0)
+#endif
+#endif // __INC_ONYX_INT_H
diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c
new file mode 100644
index 0000000..4eb51df
--- /dev/null
+++ b/vp9/encoder/vp9_picklpf.c
@@ -0,0 +1,339 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_picklpf.h"
+#include "vp9/encoder/vp9_quantize.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vpx_scale/vpxscale.h"
+#include "vp9/common/vp9_alloccommon.h"
+#include "vp9/common/vp9_loopfilter.h"
+#include "./vpx_scale_rtcd.h"
+
+void vp9_yv12_copy_partial_frame_c(YV12_BUFFER_CONFIG *src_ybc,
+ YV12_BUFFER_CONFIG *dst_ybc, int Fraction) {
+ unsigned char *src_y, *dst_y;
+ int yheight;
+ int ystride;
+ int yoffset;
+ int linestocopy;
+
+ yheight = src_ybc->y_height;
+ ystride = src_ybc->y_stride;
+
+ linestocopy = (yheight >> (Fraction + 4));
+
+ if (linestocopy < 1)
+ linestocopy = 1;
+
+ linestocopy <<= 4;
+
+ yoffset = ystride * ((yheight >> 5) * 16 - 8);
+ src_y = src_ybc->y_buffer + yoffset;
+ dst_y = dst_ybc->y_buffer + yoffset;
+
+ vpx_memcpy(dst_y, src_y, ystride * (linestocopy + 16));
+}
+
+static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source,
+ YV12_BUFFER_CONFIG *dest, int Fraction) {
+ int i, j;
+ int Total = 0;
+ int srcoffset, dstoffset;
+ unsigned char *src = source->y_buffer;
+ unsigned char *dst = dest->y_buffer;
+
+ int linestocopy = (source->y_height >> (Fraction + 4));
+
+ if (linestocopy < 1)
+ linestocopy = 1;
+
+ linestocopy <<= 4;
+
+
+ srcoffset = source->y_stride * (dest->y_height >> 5) * 16;
+ dstoffset = dest->y_stride * (dest->y_height >> 5) * 16;
+
+ src += srcoffset;
+ dst += dstoffset;
+
+ // Loop through the Y plane raw and reconstruction data summing (square differences)
+ for (i = 0; i < linestocopy; i += 16) {
+ for (j = 0; j < source->y_width; j += 16) {
+ unsigned int sse;
+ Total += vp9_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride,
+ &sse);
+ }
+
+ src += 16 * source->y_stride;
+ dst += 16 * dest->y_stride;
+ }
+
+ return Total;
+}
+
+// Enforce a minimum filter level based upon baseline Q
+static int get_min_filter_level(VP9_COMP *cpi, int base_qindex) {
+ int min_filter_level;
+ /*int q = (int) vp9_convert_qindex_to_q(base_qindex);
+
+ if (cpi->source_alt_ref_active && cpi->common.refresh_golden_frame && !cpi->common.refresh_alt_ref_frame)
+ min_filter_level = 0;
+ else
+ {
+ if (q <= 10)
+ min_filter_level = 0;
+ else if (q <= 64)
+ min_filter_level = 1;
+ else
+ min_filter_level = (q >> 6);
+ }
+ */
+ min_filter_level = 0;
+
+ return min_filter_level;
+}
+
+// Enforce a maximum filter level based upon baseline Q
+static int get_max_filter_level(VP9_COMP *cpi, int base_qindex) {
+ // PGW August 2006: Highest filter values almost always a bad idea
+
+ // jbb chg: 20100118 - not so any more with this overquant stuff allow high values
+ // with lots of intra coming in.
+ int max_filter_level = MAX_LOOP_FILTER;// * 3 / 4;
+ (void)base_qindex;
+
+ if (cpi->twopass.section_intra_rating > 8)
+ max_filter_level = MAX_LOOP_FILTER * 3 / 4;
+
+ return max_filter_level;
+}
+
+void vp9_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+
+ int best_err = 0;
+ int filt_err = 0;
+ int min_filter_level = get_min_filter_level(cpi, cm->base_qindex);
+ int max_filter_level = get_max_filter_level(cpi, cm->base_qindex);
+ int filt_val;
+ int best_filt_val = cm->filter_level;
+
+ // Make a copy of the unfiltered / processed recon buffer
+ vp9_yv12_copy_partial_frame(cm->frame_to_show, &cpi->last_frame_uf, 3);
+
+ if (cm->frame_type == KEY_FRAME)
+ cm->sharpness_level = 0;
+ else
+ cm->sharpness_level = cpi->oxcf.Sharpness;
+
+ if (cm->sharpness_level != cm->last_sharpness_level) {
+ vp9_loop_filter_update_sharpness(&cm->lf_info, cm->sharpness_level);
+ cm->last_sharpness_level = cm->sharpness_level;
+ }
+
+ // Start the search at the previous frame filter level unless it is now out of range.
+ if (cm->filter_level < min_filter_level)
+ cm->filter_level = min_filter_level;
+ else if (cm->filter_level > max_filter_level)
+ cm->filter_level = max_filter_level;
+
+ filt_val = cm->filter_level;
+ best_filt_val = filt_val;
+
+ // Get the err using the previous frame's filter value.
+ vp9_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
+
+ best_err = calc_partial_ssl_err(sd, cm->frame_to_show, 3);
+
+ // Re-instate the unfiltered frame
+ vp9_yv12_copy_partial_frame(&cpi->last_frame_uf, cm->frame_to_show, 3);
+
+ filt_val -= (1 + ((filt_val > 10) ? 1 : 0));
+
+ // Search lower filter levels
+ while (filt_val >= min_filter_level) {
+ // Apply the loop filter
+ vp9_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
+
+ // Get the err for filtered frame
+ filt_err = calc_partial_ssl_err(sd, cm->frame_to_show, 3);
+
+ // Re-instate the unfiltered frame
+ vp9_yv12_copy_partial_frame(&cpi->last_frame_uf, cm->frame_to_show, 3);
+
+
+ // Update the best case record or exit loop.
+ if (filt_err < best_err) {
+ best_err = filt_err;
+ best_filt_val = filt_val;
+ } else
+ break;
+
+ // Adjust filter level
+ filt_val -= (1 + ((filt_val > 10) ? 1 : 0));
+ }
+
+ // Search up (note that we have already done filt_val = cm->filter_level)
+ filt_val = cm->filter_level + (1 + ((filt_val > 10) ? 1 : 0));
+
+ if (best_filt_val == cm->filter_level) {
+ // Resist raising filter level for very small gains
+ best_err -= (best_err >> 10);
+
+ while (filt_val < max_filter_level) {
+ // Apply the loop filter
+ vp9_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
+
+ // Get the err for filtered frame
+ filt_err = calc_partial_ssl_err(sd, cm->frame_to_show, 3);
+
+ // Re-instate the unfiltered frame
+ vp9_yv12_copy_partial_frame(&cpi->last_frame_uf,
+ cm->frame_to_show, 3);
+
+ // Update the best case record or exit loop.
+ if (filt_err < best_err) {
+ // Do not raise filter level if improvement is < 1 part in 4096
+ best_err = filt_err - (filt_err >> 10);
+
+ best_filt_val = filt_val;
+ } else
+ break;
+
+ // Adjust filter level
+ filt_val += (1 + ((filt_val > 10) ? 1 : 0));
+ }
+ }
+
+ cm->filter_level = best_filt_val;
+
+ if (cm->filter_level < min_filter_level)
+ cm->filter_level = min_filter_level;
+
+ if (cm->filter_level > max_filter_level)
+ cm->filter_level = max_filter_level;
+}
+
+// Stub function for now Alt LF not used
+void vp9_set_alt_lf_level(VP9_COMP *cpi, int filt_val) {
+}
+
+void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+
+ int best_err = 0;
+ int filt_err = 0;
+ int min_filter_level = get_min_filter_level(cpi, cm->base_qindex);
+ int max_filter_level = get_max_filter_level(cpi, cm->base_qindex);
+
+ int filter_step;
+ int filt_high = 0;
+ int filt_mid = cm->filter_level; // Start search at previous frame filter level
+ int filt_low = 0;
+ int filt_best;
+ int filt_direction = 0;
+
+ int Bias = 0; // Bias against raising loop filter and in favour of lowering it
+
+ // Make a copy of the unfiltered / processed recon buffer
+ vp8_yv12_copy_frame(cm->frame_to_show, &cpi->last_frame_uf);
+
+ if (cm->frame_type == KEY_FRAME)
+ cm->sharpness_level = 0;
+ else
+ cm->sharpness_level = cpi->oxcf.Sharpness;
+
+ // Start the search at the previous frame filter level unless it is now out of range.
+ filt_mid = cm->filter_level;
+
+ if (filt_mid < min_filter_level)
+ filt_mid = min_filter_level;
+ else if (filt_mid > max_filter_level)
+ filt_mid = max_filter_level;
+
+ // Define the initial step size
+ filter_step = (filt_mid < 16) ? 4 : filt_mid / 4;
+
+ // Get baseline error score
+ vp9_set_alt_lf_level(cpi, filt_mid);
+ vp9_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_mid);
+
+ best_err = vp9_calc_ss_err(sd, cm->frame_to_show);
+ filt_best = filt_mid;
+
+ // Re-instate the unfiltered frame
+ vp8_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
+
+ while (filter_step > 0) {
+ Bias = (best_err >> (15 - (filt_mid / 8))) * filter_step; // PGW change 12/12/06 for small images
+
+ // jbb chg: 20100118 - in sections with lots of new material coming in don't bias as much to a low filter value
+ if (cpi->twopass.section_intra_rating < 20)
+ Bias = Bias * cpi->twopass.section_intra_rating / 20;
+
+ // yx, bias less for large block size
+ if (cpi->common.txfm_mode != ONLY_4X4)
+ Bias >>= 1;
+
+ filt_high = ((filt_mid + filter_step) > max_filter_level) ? max_filter_level : (filt_mid + filter_step);
+ filt_low = ((filt_mid - filter_step) < min_filter_level) ? min_filter_level : (filt_mid - filter_step);
+
+ if ((filt_direction <= 0) && (filt_low != filt_mid)) {
+ // Get Low filter error score
+ vp9_set_alt_lf_level(cpi, filt_low);
+ vp9_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_low);
+
+ filt_err = vp9_calc_ss_err(sd, cm->frame_to_show);
+
+ // Re-instate the unfiltered frame
+ vp8_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
+
+ // If value is close to the best so far then bias towards a lower loop filter value.
+ if ((filt_err - Bias) < best_err) {
+ // Was it actually better than the previous best?
+ if (filt_err < best_err)
+ best_err = filt_err;
+
+ filt_best = filt_low;
+ }
+ }
+
+ // Now look at filt_high
+ if ((filt_direction >= 0) && (filt_high != filt_mid)) {
+ vp9_set_alt_lf_level(cpi, filt_high);
+ vp9_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_high);
+
+ filt_err = vp9_calc_ss_err(sd, cm->frame_to_show);
+
+ // Re-instate the unfiltered frame
+ vp8_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
+
+ // Was it better than the previous best?
+ if (filt_err < (best_err - Bias)) {
+ best_err = filt_err;
+ filt_best = filt_high;
+ }
+ }
+
+ // Half the step distance if the best filter value was the same as last time
+ if (filt_best == filt_mid) {
+ filter_step = filter_step / 2;
+ filt_direction = 0;
+ } else {
+ filt_direction = (filt_best < filt_mid) ? -1 : 1;
+ filt_mid = filt_best;
+ }
+ }
+
+ cm->filter_level = filt_best;
+}
diff --git a/vp9/encoder/vp9_picklpf.h b/vp9/encoder/vp9_picklpf.h
new file mode 100644
index 0000000..b5c6bdd
--- /dev/null
+++ b/vp9/encoder/vp9_picklpf.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_ENCODER_VP9_PICKLPF_H_
+#define VP9_ENCODER_VP9_PICKLPF_H_
+
+struct yv12_buffer_config;
+struct VP9_COMP;
+
+extern void vp9_pick_filter_level_fast(struct yv12_buffer_config *sd,
+ struct VP9_COMP *cpi);
+
+extern void vp9_set_alt_lf_level(struct VP9_COMP *cpi, int filt_val);
+
+extern void vp9_pick_filter_level(struct yv12_buffer_config *sd,
+ struct VP9_COMP *cpi);
+
+#endif // __INC_PICKLPF_H
diff --git a/vp9/encoder/vp9_psnr.c b/vp9/encoder/vp9_psnr.c
new file mode 100644
index 0000000..eb00f41
--- /dev/null
+++ b/vp9/encoder/vp9_psnr.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_scale/yv12config.h"
+#include "math.h"
+#include "vp9/common/vp9_systemdependent.h" /* for vp9_clear_system_state() */
+
+#define MAX_PSNR 100
+
+double vp9_mse2psnr(double Samples, double Peak, double Mse) {
+ double psnr;
+
+ if ((double)Mse > 0.0)
+ psnr = 10.0 * log10(Peak * Peak * Samples / Mse);
+ else
+ psnr = MAX_PSNR; // Limit to prevent / 0
+
+ if (psnr > MAX_PSNR)
+ psnr = MAX_PSNR;
+
+ return psnr;
+}
diff --git a/vp9/encoder/vp9_psnr.h b/vp9/encoder/vp9_psnr.h
new file mode 100644
index 0000000..7dd5048e
--- /dev/null
+++ b/vp9/encoder/vp9_psnr.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_ENCODER_VP9_PSNR_H_
+#define VP9_ENCODER_VP9_PSNR_H_
+
+extern double vp9_mse2psnr(double Samples, double Peak, double Mse);
+
+#endif
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
new file mode 100644
index 0000000..f160edb
--- /dev/null
+++ b/vp9/encoder/vp9_quantize.c
@@ -0,0 +1,741 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include "vpx_mem/vpx_mem.h"
+
+#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_quantize.h"
+#include "vp9/common/vp9_quant_common.h"
+
+#include "vp9/common/vp9_seg_common.h"
+
+#ifdef ENC_DEBUG
+extern int enc_debug;
+#endif
+
+void vp9_ht_quantize_b_4x4(BLOCK *b, BLOCKD *d, TX_TYPE tx_type) {
+ int i, rc, eob;
+ int zbin;
+ int x, y, z, sz;
+ short *zbin_boost_ptr = b->zrun_zbin_boost;
+ short *coeff_ptr = b->coeff;
+ short *zbin_ptr = b->zbin;
+ short *round_ptr = b->round;
+ short *quant_ptr = b->quant;
+ unsigned char *quant_shift_ptr = b->quant_shift;
+ short *qcoeff_ptr = d->qcoeff;
+ short *dqcoeff_ptr = d->dqcoeff;
+ short *dequant_ptr = d->dequant;
+ short zbin_oq_value = b->zbin_extra;
+
+ int const *pt_scan ;
+
+ switch (tx_type) {
+ case ADST_DCT :
+ pt_scan = vp9_row_scan;
+ break;
+
+ case DCT_ADST :
+ pt_scan = vp9_col_scan;
+ break;
+
+ default :
+ pt_scan = vp9_default_zig_zag1d;
+ break;
+ }
+
+ vpx_memset(qcoeff_ptr, 0, 32);
+ vpx_memset(dqcoeff_ptr, 0, 32);
+
+ eob = -1;
+
+ for (i = 0; i < b->eob_max_offset; i++) {
+ rc = pt_scan[i];
+ z = coeff_ptr[rc];
+
+ zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
+ zbin_boost_ptr ++;
+
+ sz = (z >> 31); // sign of z
+ x = (z ^ sz) - sz; // x = abs(z)
+
+ if (x >= zbin) {
+ x += round_ptr[rc];
+ y = (((x * quant_ptr[rc]) >> 16) + x)
+ >> quant_shift_ptr[rc]; // quantize (x)
+ x = (y ^ sz) - sz; // get the sign back
+ qcoeff_ptr[rc] = x; // write to destination
+ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
+
+ if (y) {
+ eob = i; // last nonzero coeffs
+ zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength
+ }
+ }
+ }
+
+ d->eob = eob + 1;
+}
+
+void vp9_regular_quantize_b_4x4(BLOCK *b, BLOCKD *d) {
+ int i, rc, eob;
+ int zbin;
+ int x, y, z, sz;
+ short *zbin_boost_ptr = b->zrun_zbin_boost;
+ short *coeff_ptr = b->coeff;
+ short *zbin_ptr = b->zbin;
+ short *round_ptr = b->round;
+ short *quant_ptr = b->quant;
+ unsigned char *quant_shift_ptr = b->quant_shift;
+ short *qcoeff_ptr = d->qcoeff;
+ short *dqcoeff_ptr = d->dqcoeff;
+ short *dequant_ptr = d->dequant;
+ short zbin_oq_value = b->zbin_extra;
+
+ vpx_memset(qcoeff_ptr, 0, 32);
+ vpx_memset(dqcoeff_ptr, 0, 32);
+
+ eob = -1;
+
+ for (i = 0; i < b->eob_max_offset; i++) {
+ rc = vp9_default_zig_zag1d[i];
+ z = coeff_ptr[rc];
+
+ zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
+ zbin_boost_ptr ++;
+
+ sz = (z >> 31); // sign of z
+ x = (z ^ sz) - sz; // x = abs(z)
+
+ if (x >= zbin) {
+ x += round_ptr[rc];
+
+ y = (((x * quant_ptr[rc]) >> 16) + x)
+ >> quant_shift_ptr[rc]; // quantize (x)
+ x = (y ^ sz) - sz; // get the sign back
+ qcoeff_ptr[rc] = x; // write to destination
+ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
+
+ if (y) {
+ eob = i; // last nonzero coeffs
+ zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength
+ }
+ }
+ }
+
+ d->eob = eob + 1;
+}
+
+void vp9_quantize_mby_4x4_c(MACROBLOCK *x) {
+ int i;
+ int has_2nd_order = get_2nd_order_usage(&x->e_mbd);
+
+ for (i = 0; i < 16; i++) {
+ TX_TYPE tx_type = get_tx_type_4x4(&x->e_mbd, &x->e_mbd.block[i]);
+ if (tx_type != DCT_DCT) {
+ assert(has_2nd_order == 0);
+ vp9_ht_quantize_b_4x4(&x->block[i], &x->e_mbd.block[i], tx_type);
+ } else {
+ x->quantize_b_4x4(&x->block[i], &x->e_mbd.block[i]);
+ }
+ }
+ if (has_2nd_order) {
+ x->quantize_b_4x4(&x->block[24], &x->e_mbd.block[24]);
+ } else {
+ vpx_memset(x->e_mbd.block[24].qcoeff, 0,
+ 16 * sizeof(x->e_mbd.block[24].qcoeff[0]));
+ vpx_memset(x->e_mbd.block[24].dqcoeff, 0,
+ 16 * sizeof(x->e_mbd.block[24].dqcoeff[0]));
+ x->e_mbd.block[24].eob = 0;
+ }
+}
+
+void vp9_quantize_mbuv_4x4_c(MACROBLOCK *x) {
+ int i;
+
+ for (i = 16; i < 24; i++)
+ x->quantize_b_4x4(&x->block[i], &x->e_mbd.block[i]);
+}
+
+void vp9_quantize_mb_4x4_c(MACROBLOCK *x) {
+ vp9_quantize_mby_4x4_c(x);
+ vp9_quantize_mbuv_4x4_c(x);
+}
+
+void vp9_regular_quantize_b_2x2(BLOCK *b, BLOCKD *d) {
+ int i, rc, eob;
+ int zbin;
+ int x, y, z, sz;
+ short *zbin_boost_ptr = b->zrun_zbin_boost;
+ int zbin_zrun_index = 0;
+ short *coeff_ptr = b->coeff;
+ short *zbin_ptr = b->zbin;
+ short *round_ptr = b->round;
+ short *quant_ptr = b->quant;
+ unsigned char *quant_shift_ptr = b->quant_shift;
+ short *qcoeff_ptr = d->qcoeff;
+ short *dqcoeff_ptr = d->dqcoeff;
+ short *dequant_ptr = d->dequant;
+ short zbin_oq_value = b->zbin_extra;
+ // double q2nd = 4;
+ vpx_memset(qcoeff_ptr, 0, 32);
+ vpx_memset(dqcoeff_ptr, 0, 32);
+
+ eob = -1;
+
+ for (i = 0; i < b->eob_max_offset_8x8; i++) {
+ rc = vp9_default_zig_zag1d[i];
+ z = coeff_ptr[rc];
+
+ zbin_boost_ptr = &b->zrun_zbin_boost[zbin_zrun_index];
+ zbin_zrun_index += 4;
+ zbin = (zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value);
+
+ sz = (z >> 31); // sign of z
+ x = (z ^ sz) - sz; // x = abs(z)
+
+ if (x >= zbin) {
+ x += (round_ptr[rc]);
+ y = ((int)((int)(x * quant_ptr[rc]) >> 16) + x)
+ >> quant_shift_ptr[rc]; // quantize (x)
+ x = (y ^ sz) - sz; // get the sign back
+ qcoeff_ptr[rc] = x; // write to destination
+ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
+
+ if (y) {
+ eob = i; // last nonzero coeffs
+ zbin_zrun_index = 0;
+ }
+ }
+ }
+
+ d->eob = eob + 1;
+}
+
+void vp9_regular_quantize_b_8x8(BLOCK *b, BLOCKD *d) {
+ int i, rc, eob;
+ int zbin;
+ int x, y, z, sz;
+ short *zbin_boost_ptr = b->zrun_zbin_boost_8x8;
+ short *coeff_ptr = b->coeff;
+ short *zbin_ptr = b->zbin_8x8;
+ short *round_ptr = b->round;
+ short *quant_ptr = b->quant;
+ unsigned char *quant_shift_ptr = b->quant_shift;
+ short *qcoeff_ptr = d->qcoeff;
+ short *dqcoeff_ptr = d->dqcoeff;
+ short *dequant_ptr = d->dequant;
+ short zbin_oq_value = b->zbin_extra;
+
+ vpx_memset(qcoeff_ptr, 0, 64 * sizeof(short));
+ vpx_memset(dqcoeff_ptr, 0, 64 * sizeof(short));
+
+ eob = -1;
+
+ for (i = 0; i < b->eob_max_offset_8x8; i++) {
+ rc = vp9_default_zig_zag1d_8x8[i];
+ z = coeff_ptr[rc];
+
+ zbin = (zbin_ptr[rc != 0] + *zbin_boost_ptr + zbin_oq_value);
+ zbin_boost_ptr++;
+
+ sz = (z >> 31); // sign of z
+ x = (z ^ sz) - sz; // x = abs(z)
+
+ if (x >= zbin) {
+ x += (round_ptr[rc != 0]);
+ y = ((int)(((int)(x * quant_ptr[rc != 0]) >> 16) + x))
+ >> quant_shift_ptr[rc != 0]; // quantize (x)
+ x = (y ^ sz) - sz; // get the sign back
+ qcoeff_ptr[rc] = x; // write to destination
+ dqcoeff_ptr[rc] = x * dequant_ptr[rc != 0]; // dequantized value
+
+ if (y) {
+ eob = i; // last nonzero coeffs
+ zbin_boost_ptr = b->zrun_zbin_boost_8x8;
+ }
+ }
+ }
+
+ d->eob = eob + 1;
+}
+
+void vp9_quantize_mby_8x8(MACROBLOCK *x) {
+ int i;
+ int has_2nd_order = get_2nd_order_usage(&x->e_mbd);
+
+ for (i = 0; i < 16; i ++) {
+ x->e_mbd.block[i].eob = 0;
+ }
+ x->e_mbd.block[24].eob = 0;
+ for (i = 0; i < 16; i += 4) {
+ int ib = (i & 8) + ((i & 4) >> 1);
+ TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd, &x->e_mbd.block[ib]);
+ if (tx_type != DCT_DCT)
+ assert(has_2nd_order == 0);
+ x->quantize_b_8x8(&x->block[i], &x->e_mbd.block[i]);
+ }
+
+ if (has_2nd_order) {
+ x->quantize_b_2x2(&x->block[24], &x->e_mbd.block[24]);
+ } else {
+ vpx_memset(x->e_mbd.block[24].qcoeff, 0,
+ 16 * sizeof(x->e_mbd.block[24].qcoeff[0]));
+ vpx_memset(x->e_mbd.block[24].dqcoeff, 0,
+ 16 * sizeof(x->e_mbd.block[24].dqcoeff[0]));
+ x->e_mbd.block[24].eob = 0;
+ }
+}
+
+void vp9_quantize_mbuv_8x8(MACROBLOCK *x) {
+ int i;
+
+ for (i = 16; i < 24; i ++)
+ x->e_mbd.block[i].eob = 0;
+ for (i = 16; i < 24; i += 4)
+ x->quantize_b_8x8(&x->block[i], &x->e_mbd.block[i]);
+}
+
+void vp9_quantize_mb_8x8(MACROBLOCK *x) {
+ vp9_quantize_mby_8x8(x);
+ vp9_quantize_mbuv_8x8(x);
+}
+
+void vp9_quantize_mby_16x16(MACROBLOCK *x) {
+ int i;
+
+ for (i = 0; i < 16; i++)
+ x->e_mbd.block[i].eob = 0;
+ x->e_mbd.block[24].eob = 0;
+ x->quantize_b_16x16(&x->block[0], &x->e_mbd.block[0]);
+}
+
+void vp9_quantize_mb_16x16(MACROBLOCK *x) {
+ vp9_quantize_mby_16x16(x);
+ vp9_quantize_mbuv_8x8(x);
+}
+
+void vp9_regular_quantize_b_16x16(BLOCK *b, BLOCKD *d) {
+ int i, rc, eob;
+ int zbin;
+ int x, y, z, sz;
+ short *zbin_boost_ptr = b->zrun_zbin_boost_16x16;
+ short *coeff_ptr = b->coeff;
+ short *zbin_ptr = b->zbin_16x16;
+ short *round_ptr = b->round;
+ short *quant_ptr = b->quant;
+ unsigned char *quant_shift_ptr = b->quant_shift;
+ short *qcoeff_ptr = d->qcoeff;
+ short *dqcoeff_ptr = d->dqcoeff;
+ short *dequant_ptr = d->dequant;
+ short zbin_oq_value = b->zbin_extra;
+
+ vpx_memset(qcoeff_ptr, 0, 256*sizeof(short));
+ vpx_memset(dqcoeff_ptr, 0, 256*sizeof(short));
+
+ eob = -1;
+ for (i = 0; i < b->eob_max_offset_16x16; i++) {
+ rc = vp9_default_zig_zag1d_16x16[i];
+ z = coeff_ptr[rc];
+
+ zbin = (zbin_ptr[rc!=0] + *zbin_boost_ptr + zbin_oq_value);
+ zbin_boost_ptr ++;
+
+ sz = (z >> 31); // sign of z
+ x = (z ^ sz) - sz; // x = abs(z)
+
+ if (x >= zbin) {
+ x += (round_ptr[rc!=0]);
+ y = ((int)(((int)(x * quant_ptr[rc!=0]) >> 16) + x))
+ >> quant_shift_ptr[rc!=0]; // quantize (x)
+ x = (y ^ sz) - sz; // get the sign back
+ qcoeff_ptr[rc] = x; // write to destination
+ dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0]; // dequantized value
+
+ if (y) {
+ eob = i; // last nonzero coeffs
+ zbin_boost_ptr = b->zrun_zbin_boost_16x16;
+ }
+ }
+ }
+
+ d->eob = eob + 1;
+}
+
+/* quantize_b_pair function pointer in MACROBLOCK structure is set to one of
+ * these two C functions if corresponding optimized routine is not available.
+ * NEON optimized version implements currently the fast quantization for pair
+ * of blocks. */
+void vp9_regular_quantize_b_4x4_pair(BLOCK *b1, BLOCK *b2,
+ BLOCKD *d1, BLOCKD *d2) {
+ vp9_regular_quantize_b_4x4(b1, d1);
+ vp9_regular_quantize_b_4x4(b2, d2);
+}
+
+static void invert_quant(short *quant,
+ unsigned char *shift, short d) {
+ unsigned t;
+ int l;
+ t = d;
+ for (l = 0; t > 1; l++)
+ t >>= 1;
+ t = 1 + (1 << (16 + l)) / d;
+ *quant = (short)(t - (1 << 16));
+ *shift = l;
+}
+
+void vp9_init_quantizer(VP9_COMP *cpi) {
+ int i;
+ int quant_val;
+ int Q;
+ static const int zbin_boost[16] = { 0, 0, 8, 10, 12, 14, 16, 20,
+ 24, 28, 32, 36, 40, 44, 44, 44
+ };
+
+ static const int zbin_boost_8x8[64] = { 0, 0, 0, 8, 8, 8, 10, 12,
+ 14, 16, 18, 20, 22, 24, 26, 28,
+ 30, 32, 34, 36, 38, 40, 42, 44,
+ 46, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48
+ };
+ static const int zbin_boost_16x16[256] = {
+ 0, 0, 0, 8, 8, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28,
+ 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ };
+ int qrounding_factor = 48;
+
+
+ for (Q = 0; Q < QINDEX_RANGE; Q++) {
+ int qzbin_factor = (vp9_dc_quant(Q, 0) < 148) ? 84 : 80;
+
+#if CONFIG_LOSSLESS
+ if (cpi->oxcf.lossless) {
+ if (Q == 0) {
+ qzbin_factor = 64;
+ qrounding_factor = 64;
+ }
+ }
+#endif
+
+ // dc values
+ quant_val = vp9_dc_quant(Q, cpi->common.y1dc_delta_q);
+ invert_quant(cpi->Y1quant[Q] + 0,
+ cpi->Y1quant_shift[Q] + 0, quant_val);
+ cpi->Y1zbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
+ cpi->Y1zbin_8x8[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
+ cpi->Y1zbin_16x16[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
+ cpi->Y1round[Q][0] = (qrounding_factor * quant_val) >> 7;
+ cpi->common.Y1dequant[Q][0] = quant_val;
+ cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
+ cpi->zrun_zbin_boost_y1_8x8[Q][0] =
+ ((quant_val * zbin_boost_8x8[0]) + 64) >> 7;
+ cpi->zrun_zbin_boost_y1_16x16[Q][0] = ((quant_val * zbin_boost_16x16[0]) + 64) >> 7;
+
+
+ quant_val = vp9_dc2quant(Q, cpi->common.y2dc_delta_q);
+ invert_quant(cpi->Y2quant[Q] + 0,
+ cpi->Y2quant_shift[Q] + 0, quant_val);
+ cpi->Y2zbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
+ cpi->Y2zbin_8x8[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
+ cpi->Y2zbin_16x16[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
+ cpi->Y2round[Q][0] = (qrounding_factor * quant_val) >> 7;
+ cpi->common.Y2dequant[Q][0] = quant_val;
+ cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
+ cpi->zrun_zbin_boost_y2_8x8[Q][0] =
+ ((quant_val * zbin_boost_8x8[0]) + 64) >> 7;
+ cpi->zrun_zbin_boost_y2_16x16[Q][0] = ((quant_val * zbin_boost_16x16[0]) + 64) >> 7;
+
+ quant_val = vp9_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
+ invert_quant(cpi->UVquant[Q] + 0,
+ cpi->UVquant_shift[Q] + 0, quant_val);
+ cpi->UVzbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
+ cpi->UVzbin_8x8[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
+ cpi->UVzbin_16x16[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
+ cpi->UVround[Q][0] = (qrounding_factor * quant_val) >> 7;
+ cpi->common.UVdequant[Q][0] = quant_val;
+ cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
+ cpi->zrun_zbin_boost_uv_8x8[Q][0] =
+ ((quant_val * zbin_boost_8x8[0]) + 64) >> 7;
+ cpi->zrun_zbin_boost_uv_16x16[Q][0] = ((quant_val * zbin_boost_16x16[0]) + 64) >> 7;
+
+ // all the 4x4 ac values =;
+ for (i = 1; i < 16; i++) {
+ int rc = vp9_default_zig_zag1d[i];
+
+ quant_val = vp9_ac_yquant(Q);
+ invert_quant(cpi->Y1quant[Q] + rc,
+ cpi->Y1quant_shift[Q] + rc, quant_val);
+ cpi->Y1zbin[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
+ cpi->Y1round[Q][rc] = (qrounding_factor * quant_val) >> 7;
+ cpi->common.Y1dequant[Q][rc] = quant_val;
+ cpi->zrun_zbin_boost_y1[Q][i] =
+ ((quant_val * zbin_boost[i]) + 64) >> 7;
+
+ quant_val = vp9_ac2quant(Q, cpi->common.y2ac_delta_q);
+ invert_quant(cpi->Y2quant[Q] + rc,
+ cpi->Y2quant_shift[Q] + rc, quant_val);
+ cpi->Y2zbin[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
+ cpi->Y2round[Q][rc] = (qrounding_factor * quant_val) >> 7;
+ cpi->common.Y2dequant[Q][rc] = quant_val;
+ cpi->zrun_zbin_boost_y2[Q][i] =
+ ((quant_val * zbin_boost[i]) + 64) >> 7;
+
+ quant_val = vp9_ac_uv_quant(Q, cpi->common.uvac_delta_q);
+ invert_quant(cpi->UVquant[Q] + rc,
+ cpi->UVquant_shift[Q] + rc, quant_val);
+ cpi->UVzbin[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
+ cpi->UVround[Q][rc] = (qrounding_factor * quant_val) >> 7;
+ cpi->common.UVdequant[Q][rc] = quant_val;
+ cpi->zrun_zbin_boost_uv[Q][i] =
+ ((quant_val * zbin_boost[i]) + 64) >> 7;
+ }
+
+ // 8x8 structures... only zbin seperated out for now
+ // This needs cleaning up for 8x8 especially if we are to add
+ // support for non flat Q matices
+ for (i = 1; i < 64; i++) {
+ int rc = vp9_default_zig_zag1d_8x8[i];
+
+ quant_val = vp9_ac_yquant(Q);
+ cpi->Y1zbin_8x8[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
+ cpi->zrun_zbin_boost_y1_8x8[Q][i] =
+ ((quant_val * zbin_boost_8x8[i]) + 64) >> 7;
+
+ quant_val = vp9_ac2quant(Q, cpi->common.y2ac_delta_q);
+ cpi->Y2zbin_8x8[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
+ cpi->zrun_zbin_boost_y2_8x8[Q][i] =
+ ((quant_val * zbin_boost_8x8[i]) + 64) >> 7;
+
+ quant_val = vp9_ac_uv_quant(Q, cpi->common.uvac_delta_q);
+ cpi->UVzbin_8x8[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
+ cpi->zrun_zbin_boost_uv_8x8[Q][i] =
+ ((quant_val * zbin_boost_8x8[i]) + 64) >> 7;
+ }
+
+ // 16x16 structures. Same comment above applies.
+ for (i = 1; i < 256; i++) {
+ int rc = vp9_default_zig_zag1d_16x16[i];
+
+ quant_val = vp9_ac_yquant(Q);
+ cpi->Y1zbin_16x16[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
+ cpi->zrun_zbin_boost_y1_16x16[Q][i] = ((quant_val * zbin_boost_16x16[i]) + 64) >> 7;
+
+ quant_val = vp9_ac2quant(Q, cpi->common.y2ac_delta_q);
+ cpi->Y2zbin_16x16[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
+ cpi->zrun_zbin_boost_y2_16x16[Q][i] = ((quant_val * zbin_boost_16x16[i]) + 64) >> 7;
+
+ quant_val = vp9_ac_uv_quant(Q, cpi->common.uvac_delta_q);
+ cpi->UVzbin_16x16[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
+ cpi->zrun_zbin_boost_uv_16x16[Q][i] = ((quant_val * zbin_boost_16x16[i]) + 64) >> 7;
+ }
+ }
+}
+
+void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) {
+ int i;
+ int QIndex;
+ MACROBLOCKD *xd = &x->e_mbd;
+ int zbin_extra;
+ int segment_id = xd->mode_info_context->mbmi.segment_id;
+
+ // Select the baseline MB Q index allowing for any segment level change.
+ if (vp9_segfeature_active(xd, segment_id, SEG_LVL_ALT_Q)) {
+ // Abs Value
+ if (xd->mb_segment_abs_delta == SEGMENT_ABSDATA)
+ QIndex = vp9_get_segdata(xd, segment_id, SEG_LVL_ALT_Q);
+
+ // Delta Value
+ else {
+ QIndex = cpi->common.base_qindex +
+ vp9_get_segdata(xd, segment_id, SEG_LVL_ALT_Q);
+
+ // Clamp to valid range
+ QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0;
+ }
+ } else
+ QIndex = cpi->common.base_qindex;
+
+ // Y
+ zbin_extra = (cpi->common.Y1dequant[QIndex][1] *
+ (cpi->zbin_over_quant +
+ cpi->zbin_mode_boost +
+ x->act_zbin_adj)) >> 7;
+
+ for (i = 0; i < 16; i++) {
+ x->block[i].quant = cpi->Y1quant[QIndex];
+ x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
+ x->block[i].zbin = cpi->Y1zbin[QIndex];
+ x->block[i].zbin_8x8 = cpi->Y1zbin_8x8[QIndex];
+ x->block[i].zbin_16x16 = cpi->Y1zbin_16x16[QIndex];
+ x->block[i].round = cpi->Y1round[QIndex];
+ x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
+ x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];
+ x->block[i].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_y1_8x8[QIndex];
+ x->block[i].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_y1_16x16[QIndex];
+ x->block[i].zbin_extra = (short)zbin_extra;
+
+ // Segment max eob offset feature.
+ if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB)) {
+ x->block[i].eob_max_offset =
+ vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
+ x->block[i].eob_max_offset_8x8 =
+ vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
+ x->block[i].eob_max_offset_16x16 =
+ vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
+ } else {
+ x->block[i].eob_max_offset = 16;
+ x->block[i].eob_max_offset_8x8 = 64;
+ x->block[i].eob_max_offset_16x16 = 256;
+ }
+ }
+
+ // UV
+ zbin_extra = (cpi->common.UVdequant[QIndex][1] *
+ (cpi->zbin_over_quant +
+ cpi->zbin_mode_boost +
+ x->act_zbin_adj)) >> 7;
+
+ for (i = 16; i < 24; i++) {
+ x->block[i].quant = cpi->UVquant[QIndex];
+ x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
+ x->block[i].zbin = cpi->UVzbin[QIndex];
+ x->block[i].zbin_8x8 = cpi->UVzbin_8x8[QIndex];
+ x->block[i].zbin_16x16 = cpi->UVzbin_16x16[QIndex];
+ x->block[i].round = cpi->UVround[QIndex];
+ x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
+ x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];
+ x->block[i].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_uv_8x8[QIndex];
+ x->block[i].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_uv_16x16[QIndex];
+
+ x->block[i].zbin_extra = (short)zbin_extra;
+
+ // Segment max eob offset feature.
+ if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB)) {
+ x->block[i].eob_max_offset =
+ vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
+ x->block[i].eob_max_offset_8x8 =
+ vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
+ } else {
+ x->block[i].eob_max_offset = 16;
+ x->block[i].eob_max_offset_8x8 = 64;
+ }
+ }
+
+ // Y2
+ zbin_extra = (cpi->common.Y2dequant[QIndex][1] *
+ ((cpi->zbin_over_quant / 2) +
+ cpi->zbin_mode_boost +
+ x->act_zbin_adj)) >> 7;
+
+ x->block[24].quant = cpi->Y2quant[QIndex];
+ x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
+ x->block[24].zbin = cpi->Y2zbin[QIndex];
+ x->block[24].zbin_8x8 = cpi->Y2zbin_8x8[QIndex];
+ x->block[24].zbin_16x16 = cpi->Y2zbin_16x16[QIndex];
+ x->block[24].round = cpi->Y2round[QIndex];
+ x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
+ x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];
+ x->block[24].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_y2_8x8[QIndex];
+ x->block[24].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_y2_16x16[QIndex];
+ x->block[24].zbin_extra = (short)zbin_extra;
+
+ // TBD perhaps not use for Y2
+ // Segment max eob offset feature.
+ if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB)) {
+ x->block[24].eob_max_offset =
+ vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
+ x->block[24].eob_max_offset_8x8 =
+ vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
+ } else {
+ x->block[24].eob_max_offset = 16;
+ x->block[24].eob_max_offset_8x8 = 4;
+ }
+
+ /* save this macroblock QIndex for vp9_update_zbin_extra() */
+ x->e_mbd.q_index = QIndex;
+}
+
+void vp9_update_zbin_extra(VP9_COMP *cpi, MACROBLOCK *x) {
+ int i;
+ int QIndex = x->e_mbd.q_index;
+ int zbin_extra;
+
+ // Y
+ zbin_extra = (cpi->common.Y1dequant[QIndex][1] *
+ (cpi->zbin_over_quant +
+ cpi->zbin_mode_boost +
+ x->act_zbin_adj)) >> 7;
+ for (i = 0; i < 16; i++) {
+ x->block[i].zbin_extra = (short)zbin_extra;
+ }
+
+ // UV
+ zbin_extra = (cpi->common.UVdequant[QIndex][1] *
+ (cpi->zbin_over_quant +
+ cpi->zbin_mode_boost +
+ x->act_zbin_adj)) >> 7;
+
+ for (i = 16; i < 24; i++) {
+ x->block[i].zbin_extra = (short)zbin_extra;
+ }
+
+ // Y2
+ zbin_extra = (cpi->common.Y2dequant[QIndex][1] *
+ ((cpi->zbin_over_quant / 2) +
+ cpi->zbin_mode_boost +
+ x->act_zbin_adj)) >> 7;
+
+ x->block[24].zbin_extra = (short)zbin_extra;
+}
+
+void vp9_frame_init_quantizer(VP9_COMP *cpi) {
+ // Clear Zbin mode boost for default case
+ cpi->zbin_mode_boost = 0;
+
+ // MB level quantizer setup
+ vp9_mb_init_quantizer(cpi, &cpi->mb);
+}
+
+void vp9_set_quantizer(struct VP9_COMP *cpi, int Q) {
+ VP9_COMMON *cm = &cpi->common;
+
+ cm->base_qindex = Q;
+
+ // if any of the delta_q values are changing update flag will
+ // have to be set.
+ cm->y1dc_delta_q = 0;
+ cm->y2ac_delta_q = 0;
+ cm->uvdc_delta_q = 0;
+ cm->uvac_delta_q = 0;
+ cm->y2dc_delta_q = 0;
+
+ // quantizer has to be reinitialized if any delta_q changes.
+ // As there are not any here for now this is inactive code.
+ // if(update)
+ // vp9_init_quantizer(cpi);
+}
diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h
new file mode 100644
index 0000000..dd11e75
--- /dev/null
+++ b/vp9/encoder/vp9_quantize.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_ENCODER_VP9_QUANTIZE_H_
+#define VP9_ENCODER_VP9_QUANTIZE_H_
+
+#include "vp9/encoder/vp9_block.h"
+
+#define prototype_quantize_block(sym) \
+ void (sym)(BLOCK *b,BLOCKD *d)
+
+#define prototype_quantize_block_pair(sym) \
+ void (sym)(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2)
+
+#define prototype_quantize_mb(sym) \
+ void (sym)(MACROBLOCK *x)
+
+#if ARCH_X86 || ARCH_X86_64
+#include "x86/vp9_quantize_x86.h"
+#endif
+
+#define prototype_quantize_block_type(sym) \
+ void (sym)(BLOCK *b, BLOCKD *d, TX_TYPE type)
+extern prototype_quantize_block_type(vp9_ht_quantize_b_4x4);
+
+#ifndef vp9_quantize_quantb_4x4
+#define vp9_quantize_quantb_4x4 vp9_regular_quantize_b_4x4
+#endif
+extern prototype_quantize_block(vp9_quantize_quantb_4x4);
+
+#ifndef vp9_quantize_quantb_4x4_pair
+#define vp9_quantize_quantb_4x4_pair vp9_regular_quantize_b_4x4_pair
+#endif
+extern prototype_quantize_block_pair(vp9_quantize_quantb_4x4_pair);
+
+#ifndef vp9_quantize_quantb_8x8
+#define vp9_quantize_quantb_8x8 vp9_regular_quantize_b_8x8
+#endif
+extern prototype_quantize_block(vp9_quantize_quantb_8x8);
+
+#ifndef vp9_quantize_quantb_16x16
+#define vp9_quantize_quantb_16x16 vp9_regular_quantize_b_16x16
+#endif
+extern prototype_quantize_block(vp9_quantize_quantb_16x16);
+
+#ifndef vp9_quantize_quantb_2x2
+#define vp9_quantize_quantb_2x2 vp9_regular_quantize_b_2x2
+#endif
+extern prototype_quantize_block(vp9_quantize_quantb_2x2);
+
+#ifndef vp9_quantize_mb_4x4
+#define vp9_quantize_mb_4x4 vp9_quantize_mb_4x4_c
+#endif
+extern prototype_quantize_mb(vp9_quantize_mb_4x4);
+void vp9_quantize_mb_8x8(MACROBLOCK *x);
+
+#ifndef vp9_quantize_mbuv_4x4
+#define vp9_quantize_mbuv_4x4 vp9_quantize_mbuv_4x4_c
+#endif
+extern prototype_quantize_mb(vp9_quantize_mbuv_4x4);
+
+#ifndef vp9_quantize_mby_4x4
+#define vp9_quantize_mby_4x4 vp9_quantize_mby_4x4_c
+#endif
+extern prototype_quantize_mb(vp9_quantize_mby_4x4);
+
+extern prototype_quantize_mb(vp9_quantize_mby_8x8);
+extern prototype_quantize_mb(vp9_quantize_mbuv_8x8);
+
+void vp9_quantize_mb_16x16(MACROBLOCK *x);
+extern prototype_quantize_block(vp9_quantize_quantb_16x16);
+extern prototype_quantize_mb(vp9_quantize_mby_16x16);
+
+struct VP9_COMP;
+
+extern void vp9_set_quantizer(struct VP9_COMP *cpi, int Q);
+
+extern void vp9_frame_init_quantizer(struct VP9_COMP *cpi);
+
+extern void vp9_update_zbin_extra(struct VP9_COMP *cpi, MACROBLOCK *x);
+
+extern void vp9_mb_init_quantizer(struct VP9_COMP *cpi, MACROBLOCK *x);
+
+extern void vp9_init_quantizer(struct VP9_COMP *cpi);
+
+#endif
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
new file mode 100644
index 0000000..5b5f38b
--- /dev/null
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -0,0 +1,702 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+
+#include "math.h"
+#include "vp9/common/vp9_alloccommon.h"
+#include "vp9/common/vp9_modecont.h"
+#include "vp9/common/vp9_common.h"
+#include "vp9/encoder/vp9_ratectrl.h"
+#include "vp9/common/vp9_entropymode.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vp9/common/vp9_systemdependent.h"
+#include "vp9/encoder/vp9_encodemv.h"
+#include "vp9/common/vp9_quant_common.h"
+
+#define MIN_BPB_FACTOR 0.005
+#define MAX_BPB_FACTOR 50
+
+#ifdef MODE_STATS
+extern unsigned int y_modes[VP9_YMODES];
+extern unsigned int uv_modes[VP9_UV_MODES];
+extern unsigned int b_modes[B_MODE_COUNT];
+
+extern unsigned int inter_y_modes[MB_MODE_COUNT];
+extern unsigned int inter_uv_modes[VP9_UV_MODES];
+extern unsigned int inter_b_modes[B_MODE_COUNT];
+#endif
+
+// Bits Per MB at different Q (Multiplied by 512)
+#define BPER_MB_NORMBITS 9
+
+// % adjustment to target kf size based on seperation from previous frame
+static const int kf_boost_seperation_adjustment[16] = {
+ 30, 40, 50, 55, 60, 65, 70, 75,
+ 80, 85, 90, 95, 100, 100, 100, 100,
+};
+
+static const int gf_adjust_table[101] = {
+ 100,
+ 115, 130, 145, 160, 175, 190, 200, 210, 220, 230,
+ 240, 260, 270, 280, 290, 300, 310, 320, 330, 340,
+ 350, 360, 370, 380, 390, 400, 400, 400, 400, 400,
+ 400, 400, 400, 400, 400, 400, 400, 400, 400, 400,
+ 400, 400, 400, 400, 400, 400, 400, 400, 400, 400,
+ 400, 400, 400, 400, 400, 400, 400, 400, 400, 400,
+ 400, 400, 400, 400, 400, 400, 400, 400, 400, 400,
+ 400, 400, 400, 400, 400, 400, 400, 400, 400, 400,
+ 400, 400, 400, 400, 400, 400, 400, 400, 400, 400,
+ 400, 400, 400, 400, 400, 400, 400, 400, 400, 400,
+};
+
+static const int gf_intra_usage_adjustment[20] = {
+ 125, 120, 115, 110, 105, 100, 95, 85, 80, 75,
+ 70, 65, 60, 55, 50, 50, 50, 50, 50, 50,
+};
+
+static const int gf_interval_table[101] = {
+ 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+};
+
+static const unsigned int prior_key_frame_weight[KEY_FRAME_CONTEXT] = { 1, 2, 3, 4, 5 };
+
+// These functions use formulaic calculations to make playing with the
+// quantizer tables easier. If necessary they can be replaced by lookup
+// tables if and when things settle down in the experimental bitstream
+double vp9_convert_qindex_to_q(int qindex) {
+ // Convert the index to a real Q value (scaled down to match old Q values)
+ return (double)vp9_ac_yquant(qindex) / 4.0;
+}
+
+int vp9_gfboost_qadjust(int qindex) {
+ int retval;
+ double q;
+
+ q = vp9_convert_qindex_to_q(qindex);
+ retval = (int)((0.00000828 * q * q * q) +
+ (-0.0055 * q * q) +
+ (1.32 * q) + 79.3);
+ return retval;
+}
+
+static int kfboost_qadjust(int qindex) {
+ int retval;
+ double q;
+
+ q = vp9_convert_qindex_to_q(qindex);
+ retval = (int)((0.00000973 * q * q * q) +
+ (-0.00613 * q * q) +
+ (1.316 * q) + 121.2);
+ return retval;
+}
+
+int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex) {
+ if (frame_type == KEY_FRAME)
+ return (int)(4500000 / vp9_convert_qindex_to_q(qindex));
+ else
+ return (int)(2850000 / vp9_convert_qindex_to_q(qindex));
+}
+
+
+void vp9_save_coding_context(VP9_COMP *cpi) {
+ CODING_CONTEXT *const cc = &cpi->coding_context;
+ VP9_COMMON *cm = &cpi->common;
+ MACROBLOCKD *xd = &cpi->mb.e_mbd;
+
+ // Stores a snapshot of key state variables which can subsequently be
+ // restored with a call to vp9_restore_coding_context. These functions are
+ // intended for use in a re-code loop in vp9_compress_frame where the
+ // quantizer value is adjusted between loop iterations.
+
+ cc->nmvc = cm->fc.nmvc;
+ vp9_copy(cc->nmvjointcost, cpi->mb.nmvjointcost);
+ vp9_copy(cc->nmvcosts, cpi->mb.nmvcosts);
+ vp9_copy(cc->nmvcosts_hp, cpi->mb.nmvcosts_hp);
+
+ vp9_copy(cc->vp9_mode_contexts, cm->fc.vp9_mode_contexts);
+
+ vp9_copy(cc->ymode_prob, cm->fc.ymode_prob);
+#if CONFIG_SUPERBLOCKS
+ vp9_copy(cc->sb_ymode_prob, cm->fc.sb_ymode_prob);
+#endif
+ vp9_copy(cc->bmode_prob, cm->fc.bmode_prob);
+ vp9_copy(cc->uv_mode_prob, cm->fc.uv_mode_prob);
+ vp9_copy(cc->i8x8_mode_prob, cm->fc.i8x8_mode_prob);
+ vp9_copy(cc->sub_mv_ref_prob, cm->fc.sub_mv_ref_prob);
+ vp9_copy(cc->mbsplit_prob, cm->fc.mbsplit_prob);
+
+ // Stats
+#ifdef MODE_STATS
+ vp9_copy(cc->y_modes, y_modes);
+ vp9_copy(cc->uv_modes, uv_modes);
+ vp9_copy(cc->b_modes, b_modes);
+ vp9_copy(cc->inter_y_modes, inter_y_modes);
+ vp9_copy(cc->inter_uv_modes, inter_uv_modes);
+ vp9_copy(cc->inter_b_modes, inter_b_modes);
+#endif
+
+ vp9_copy(cc->segment_pred_probs, cm->segment_pred_probs);
+ vp9_copy(cc->ref_pred_probs_update, cpi->ref_pred_probs_update);
+ vp9_copy(cc->ref_pred_probs, cm->ref_pred_probs);
+ vp9_copy(cc->prob_comppred, cm->prob_comppred);
+
+ vpx_memcpy(cpi->coding_context.last_frame_seg_map_copy,
+ cm->last_frame_seg_map, (cm->mb_rows * cm->mb_cols));
+
+ vp9_copy(cc->last_ref_lf_deltas, xd->last_ref_lf_deltas);
+ vp9_copy(cc->last_mode_lf_deltas, xd->last_mode_lf_deltas);
+
+ vp9_copy(cc->coef_probs, cm->fc.coef_probs);
+ vp9_copy(cc->hybrid_coef_probs, cm->fc.hybrid_coef_probs);
+ vp9_copy(cc->coef_probs_8x8, cm->fc.coef_probs_8x8);
+ vp9_copy(cc->hybrid_coef_probs_8x8, cm->fc.hybrid_coef_probs_8x8);
+ vp9_copy(cc->coef_probs_16x16, cm->fc.coef_probs_16x16);
+ vp9_copy(cc->hybrid_coef_probs_16x16, cm->fc.hybrid_coef_probs_16x16);
+ vp9_copy(cc->switchable_interp_prob, cm->fc.switchable_interp_prob);
+#if CONFIG_COMP_INTERINTRA_PRED
+ cc->interintra_prob = cm->fc.interintra_prob;
+#endif
+}
+
+void vp9_restore_coding_context(VP9_COMP *cpi) {
+ CODING_CONTEXT *const cc = &cpi->coding_context;
+ VP9_COMMON *cm = &cpi->common;
+ MACROBLOCKD *xd = &cpi->mb.e_mbd;
+
+ // Restore key state variables to the snapshot state stored in the
+ // previous call to vp9_save_coding_context.
+
+ cm->fc.nmvc = cc->nmvc;
+ vp9_copy(cpi->mb.nmvjointcost, cc->nmvjointcost);
+ vp9_copy(cpi->mb.nmvcosts, cc->nmvcosts);
+ vp9_copy(cpi->mb.nmvcosts_hp, cc->nmvcosts_hp);
+
+ vp9_copy(cm->fc.vp9_mode_contexts, cc->vp9_mode_contexts);
+
+ vp9_copy(cm->fc.ymode_prob, cc->ymode_prob);
+#if CONFIG_SUPERBLOCKS
+ vp9_copy(cm->fc.sb_ymode_prob, cc->sb_ymode_prob);
+#endif
+ vp9_copy(cm->fc.bmode_prob, cc->bmode_prob);
+ vp9_copy(cm->fc.i8x8_mode_prob, cc->i8x8_mode_prob);
+ vp9_copy(cm->fc.uv_mode_prob, cc->uv_mode_prob);
+ vp9_copy(cm->fc.sub_mv_ref_prob, cc->sub_mv_ref_prob);
+ vp9_copy(cm->fc.mbsplit_prob, cc->mbsplit_prob);
+
+ // Stats
+#ifdef MODE_STATS
+ vp9_copy(y_modes, cc->y_modes);
+ vp9_copy(uv_modes, cc->uv_modes);
+ vp9_copy(b_modes, cc->b_modes);
+ vp9_copy(inter_y_modes, cc->inter_y_modes);
+ vp9_copy(inter_uv_modes, cc->inter_uv_modes);
+ vp9_copy(inter_b_modes, cc->inter_b_modes);
+#endif
+
+ vp9_copy(cm->segment_pred_probs, cc->segment_pred_probs);
+ vp9_copy(cpi->ref_pred_probs_update, cc->ref_pred_probs_update);
+ vp9_copy(cm->ref_pred_probs, cc->ref_pred_probs);
+ vp9_copy(cm->prob_comppred, cc->prob_comppred);
+
+ vpx_memcpy(cm->last_frame_seg_map,
+ cpi->coding_context.last_frame_seg_map_copy,
+ (cm->mb_rows * cm->mb_cols));
+
+ vp9_copy(xd->last_ref_lf_deltas, cc->last_ref_lf_deltas);
+ vp9_copy(xd->last_mode_lf_deltas, cc->last_mode_lf_deltas);
+
+ vp9_copy(cm->fc.coef_probs, cc->coef_probs);
+ vp9_copy(cm->fc.hybrid_coef_probs, cc->hybrid_coef_probs);
+ vp9_copy(cm->fc.coef_probs_8x8, cc->coef_probs_8x8);
+ vp9_copy(cm->fc.hybrid_coef_probs_8x8, cc->hybrid_coef_probs_8x8);
+ vp9_copy(cm->fc.coef_probs_16x16, cc->coef_probs_16x16);
+ vp9_copy(cm->fc.hybrid_coef_probs_16x16, cc->hybrid_coef_probs_16x16);
+ vp9_copy(cm->fc.switchable_interp_prob, cc->switchable_interp_prob);
+#if CONFIG_COMP_INTERINTRA_PRED
+ cm->fc.interintra_prob = cc->interintra_prob;
+#endif
+}
+
+
+void vp9_setup_key_frame(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+ // Setup for Key frame:
+ vp9_default_coef_probs(& cpi->common);
+ vp9_kf_default_bmode_probs(cpi->common.kf_bmode_prob);
+ vp9_init_mbmode_probs(& cpi->common);
+ vp9_default_bmode_probs(cm->fc.bmode_prob);
+
+ if(cm->last_frame_seg_map)
+ vpx_memset(cm->last_frame_seg_map, 0, (cm->mb_rows * cm->mb_cols));
+
+ vp9_init_mv_probs(& cpi->common);
+
+ // cpi->common.filter_level = 0; // Reset every key frame.
+ cpi->common.filter_level = cpi->common.base_qindex * 3 / 8;
+
+ // interval before next GF
+ cpi->frames_till_gf_update_due = cpi->baseline_gf_interval;
+
+ cpi->common.refresh_golden_frame = TRUE;
+ cpi->common.refresh_alt_ref_frame = TRUE;
+
+ vp9_init_mode_contexts(&cpi->common);
+ vpx_memcpy(&cpi->common.lfc, &cpi->common.fc, sizeof(cpi->common.fc));
+ vpx_memcpy(&cpi->common.lfc_a, &cpi->common.fc, sizeof(cpi->common.fc));
+
+ vpx_memset(cm->prev_mip, 0,
+ (cm->mb_cols + 1) * (cm->mb_rows + 1)* sizeof(MODE_INFO));
+ vpx_memset(cm->mip, 0,
+ (cm->mb_cols + 1) * (cm->mb_rows + 1)* sizeof(MODE_INFO));
+
+ vp9_update_mode_info_border(cm, cm->mip);
+ vp9_update_mode_info_in_image(cm, cm->mi);
+}
+
+void vp9_setup_inter_frame(VP9_COMP *cpi) {
+ if (cpi->common.refresh_alt_ref_frame) {
+ vpx_memcpy(&cpi->common.fc,
+ &cpi->common.lfc_a,
+ sizeof(cpi->common.fc));
+ } else {
+ vpx_memcpy(&cpi->common.fc,
+ &cpi->common.lfc,
+ sizeof(cpi->common.fc));
+ }
+}
+
+
+static int estimate_bits_at_q(int frame_kind, int Q, int MBs,
+ double correction_factor) {
+ int Bpm = (int)(.5 + correction_factor * vp9_bits_per_mb(frame_kind, Q));
+
+ /* Attempt to retain reasonable accuracy without overflow. The cutoff is
+ * chosen such that the maximum product of Bpm and MBs fits 31 bits. The
+ * largest Bpm takes 20 bits.
+ */
+ if (MBs > (1 << 11))
+ return (Bpm >> BPER_MB_NORMBITS) * MBs;
+ else
+ return (Bpm * MBs) >> BPER_MB_NORMBITS;
+}
+
+
+static void calc_iframe_target_size(VP9_COMP *cpi) {
+ // boost defaults to half second
+ int target;
+
+ // Clear down mmx registers to allow floating point in what follows
+ vp9_clear_system_state(); // __asm emms;
+
+ // New Two pass RC
+ target = cpi->per_frame_bandwidth;
+
+ if (cpi->oxcf.rc_max_intra_bitrate_pct) {
+ int max_rate = cpi->per_frame_bandwidth
+ * cpi->oxcf.rc_max_intra_bitrate_pct / 100;
+
+ if (target > max_rate)
+ target = max_rate;
+ }
+
+ cpi->this_frame_target = target;
+
+}
+
+
+// Do the best we can to define the parameteres for the next GF based
+// on what information we have available.
+//
+// In this experimental code only two pass is supported
+// so we just use the interval determined in the two pass code.
+static void calc_gf_params(VP9_COMP *cpi) {
+ // Set the gf interval
+ cpi->frames_till_gf_update_due = cpi->baseline_gf_interval;
+}
+
+
+static void calc_pframe_target_size(VP9_COMP *cpi) {
+ int min_frame_target;
+
+ min_frame_target = 0;
+
+ min_frame_target = cpi->min_frame_bandwidth;
+
+ if (min_frame_target < (cpi->av_per_frame_bandwidth >> 5))
+ min_frame_target = cpi->av_per_frame_bandwidth >> 5;
+
+
+ // Special alt reference frame case
+ if (cpi->common.refresh_alt_ref_frame) {
+ // Per frame bit target for the alt ref frame
+ cpi->per_frame_bandwidth = cpi->twopass.gf_bits;
+ cpi->this_frame_target = cpi->per_frame_bandwidth;
+ }
+
+ // Normal frames (gf,and inter)
+ else {
+ cpi->this_frame_target = cpi->per_frame_bandwidth;
+ }
+
+ // Sanity check that the total sum of adjustments is not above the maximum allowed
+ // That is that having allowed for KF and GF penalties we have not pushed the
+ // current interframe target to low. If the adjustment we apply here is not capable of recovering
+ // all the extra bits we have spent in the KF or GF then the remainder will have to be recovered over
+ // a longer time span via other buffer / rate control mechanisms.
+ if (cpi->this_frame_target < min_frame_target)
+ cpi->this_frame_target = min_frame_target;
+
+ if (!cpi->common.refresh_alt_ref_frame)
+ // Note the baseline target data rate for this inter frame.
+ cpi->inter_frame_target = cpi->this_frame_target;
+
+ // Adjust target frame size for Golden Frames:
+ if (cpi->frames_till_gf_update_due == 0) {
+ // int Boost = 0;
+ int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q;
+
+ cpi->common.refresh_golden_frame = TRUE;
+
+ calc_gf_params(cpi);
+
+ // If we are using alternate ref instead of gf then do not apply the boost
+ // It will instead be applied to the altref update
+ // Jims modified boost
+ if (!cpi->source_alt_ref_active) {
+ if (cpi->oxcf.fixed_q < 0) {
+ // The spend on the GF is defined in the two pass code
+ // for two pass encodes
+ cpi->this_frame_target = cpi->per_frame_bandwidth;
+ } else
+ cpi->this_frame_target =
+ (estimate_bits_at_q(1, Q, cpi->common.MBs, 1.0)
+ * cpi->last_boost) / 100;
+
+ }
+ // If there is an active ARF at this location use the minimum
+ // bits on this frame even if it is a contructed arf.
+ // The active maximum quantizer insures that an appropriate
+ // number of bits will be spent if needed for contstructed ARFs.
+ else {
+ cpi->this_frame_target = 0;
+ }
+
+ cpi->current_gf_interval = cpi->frames_till_gf_update_due;
+ }
+}
+
+
+void vp9_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) {
+ int Q = cpi->common.base_qindex;
+ int correction_factor = 100;
+ double rate_correction_factor;
+ double adjustment_limit;
+
+ int projected_size_based_on_q = 0;
+
+ // Clear down mmx registers to allow floating point in what follows
+ vp9_clear_system_state(); // __asm emms;
+
+ if (cpi->common.frame_type == KEY_FRAME) {
+ rate_correction_factor = cpi->key_frame_rate_correction_factor;
+ } else {
+ if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame)
+ rate_correction_factor = cpi->gf_rate_correction_factor;
+ else
+ rate_correction_factor = cpi->rate_correction_factor;
+ }
+
+ // Work out how big we would have expected the frame to be at this Q given the current correction factor.
+ // Stay in double to avoid int overflow when values are large
+ projected_size_based_on_q =
+ (int)(((.5 + rate_correction_factor *
+ vp9_bits_per_mb(cpi->common.frame_type, Q)) *
+ cpi->common.MBs) / (1 << BPER_MB_NORMBITS));
+
+ // Make some allowance for cpi->zbin_over_quant
+ if (cpi->zbin_over_quant > 0) {
+ int Z = cpi->zbin_over_quant;
+ double Factor = 0.99;
+ double factor_adjustment = 0.01 / 256.0; // (double)ZBIN_OQ_MAX;
+
+ while (Z > 0) {
+ Z--;
+ projected_size_based_on_q =
+ (int)(Factor * projected_size_based_on_q);
+ Factor += factor_adjustment;
+
+ if (Factor >= 0.999)
+ Factor = 0.999;
+ }
+ }
+
+ // Work out a size correction factor.
+ // if ( cpi->this_frame_target > 0 )
+ // correction_factor = (100 * cpi->projected_frame_size) / cpi->this_frame_target;
+ if (projected_size_based_on_q > 0)
+ correction_factor = (100 * cpi->projected_frame_size) / projected_size_based_on_q;
+
+ // More heavily damped adjustment used if we have been oscillating either side of target
+ switch (damp_var) {
+ case 0:
+ adjustment_limit = 0.75;
+ break;
+ case 1:
+ adjustment_limit = 0.375;
+ break;
+ case 2:
+ default:
+ adjustment_limit = 0.25;
+ break;
+ }
+
+ // if ( (correction_factor > 102) && (Q < cpi->active_worst_quality) )
+ if (correction_factor > 102) {
+ // We are not already at the worst allowable quality
+ correction_factor = (int)(100.5 + ((correction_factor - 100) * adjustment_limit));
+ rate_correction_factor = ((rate_correction_factor * correction_factor) / 100);
+
+ // Keep rate_correction_factor within limits
+ if (rate_correction_factor > MAX_BPB_FACTOR)
+ rate_correction_factor = MAX_BPB_FACTOR;
+ }
+ // else if ( (correction_factor < 99) && (Q > cpi->active_best_quality) )
+ else if (correction_factor < 99) {
+ // We are not already at the best allowable quality
+ correction_factor = (int)(100.5 - ((100 - correction_factor) * adjustment_limit));
+ rate_correction_factor = ((rate_correction_factor * correction_factor) / 100);
+
+ // Keep rate_correction_factor within limits
+ if (rate_correction_factor < MIN_BPB_FACTOR)
+ rate_correction_factor = MIN_BPB_FACTOR;
+ }
+
+ if (cpi->common.frame_type == KEY_FRAME)
+ cpi->key_frame_rate_correction_factor = rate_correction_factor;
+ else {
+ if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame)
+ cpi->gf_rate_correction_factor = rate_correction_factor;
+ else
+ cpi->rate_correction_factor = rate_correction_factor;
+ }
+}
+
+
+int vp9_regulate_q(VP9_COMP *cpi, int target_bits_per_frame) {
+ int Q = cpi->active_worst_quality;
+
+ int i;
+ int last_error = INT_MAX;
+ int target_bits_per_mb;
+ int bits_per_mb_at_this_q;
+ double correction_factor;
+
+ // Reset Zbin OQ value
+ cpi->zbin_over_quant = 0;
+
+ // Select the appropriate correction factor based upon type of frame.
+ if (cpi->common.frame_type == KEY_FRAME)
+ correction_factor = cpi->key_frame_rate_correction_factor;
+ else {
+ if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame)
+ correction_factor = cpi->gf_rate_correction_factor;
+ else
+ correction_factor = cpi->rate_correction_factor;
+ }
+
+ // Calculate required scaling factor based on target frame size and size of frame produced using previous Q
+ if (target_bits_per_frame >= (INT_MAX >> BPER_MB_NORMBITS))
+ target_bits_per_mb = (target_bits_per_frame / cpi->common.MBs) << BPER_MB_NORMBITS; // Case where we would overflow int
+ else
+ target_bits_per_mb = (target_bits_per_frame << BPER_MB_NORMBITS) / cpi->common.MBs;
+
+ i = cpi->active_best_quality;
+
+ do {
+ bits_per_mb_at_this_q =
+ (int)(.5 + correction_factor *
+ vp9_bits_per_mb(cpi->common.frame_type, i));
+
+ if (bits_per_mb_at_this_q <= target_bits_per_mb) {
+ if ((target_bits_per_mb - bits_per_mb_at_this_q) <= last_error)
+ Q = i;
+ else
+ Q = i - 1;
+
+ break;
+ } else
+ last_error = bits_per_mb_at_this_q - target_bits_per_mb;
+ } while (++i <= cpi->active_worst_quality);
+
+
+ // If we are at MAXQ then enable Q over-run which seeks to claw back additional bits through things like
+ // the RD multiplier and zero bin size.
+ if (Q >= MAXQ) {
+ int zbin_oqmax;
+
+ double Factor = 0.99;
+ double factor_adjustment = 0.01 / 256.0; // (double)ZBIN_OQ_MAX;
+
+ if (cpi->common.frame_type == KEY_FRAME)
+ zbin_oqmax = 0; // ZBIN_OQ_MAX/16
+ else if (cpi->common.refresh_alt_ref_frame || (cpi->common.refresh_golden_frame && !cpi->source_alt_ref_active))
+ zbin_oqmax = 16;
+ else
+ zbin_oqmax = ZBIN_OQ_MAX;
+
+ // Each incrment in the zbin is assumed to have a fixed effect on bitrate. This is not of course true.
+ // The effect will be highly clip dependent and may well have sudden steps.
+ // The idea here is to acheive higher effective quantizers than the normal maximum by expanding the zero
+ // bin and hence decreasing the number of low magnitude non zero coefficients.
+ while (cpi->zbin_over_quant < zbin_oqmax) {
+ cpi->zbin_over_quant++;
+
+ if (cpi->zbin_over_quant > zbin_oqmax)
+ cpi->zbin_over_quant = zbin_oqmax;
+
+ // Adjust bits_per_mb_at_this_q estimate
+ bits_per_mb_at_this_q = (int)(Factor * bits_per_mb_at_this_q);
+ Factor += factor_adjustment;
+
+ if (Factor >= 0.999)
+ Factor = 0.999;
+
+ if (bits_per_mb_at_this_q <= target_bits_per_mb) // Break out if we get down to the target rate
+ break;
+ }
+
+ }
+
+ return Q;
+}
+
+
+static int estimate_keyframe_frequency(VP9_COMP *cpi) {
+ int i;
+
+ // Average key frame frequency
+ int av_key_frame_frequency = 0;
+
+ /* First key frame at start of sequence is a special case. We have no
+ * frequency data.
+ */
+ if (cpi->key_frame_count == 1) {
+ /* Assume a default of 1 kf every 2 seconds, or the max kf interval,
+ * whichever is smaller.
+ */
+ int key_freq = cpi->oxcf.key_freq > 0 ? cpi->oxcf.key_freq : 1;
+ av_key_frame_frequency = (int)cpi->output_frame_rate * 2;
+
+ if (cpi->oxcf.auto_key && av_key_frame_frequency > key_freq)
+ av_key_frame_frequency = cpi->oxcf.key_freq;
+
+ cpi->prior_key_frame_distance[KEY_FRAME_CONTEXT - 1]
+ = av_key_frame_frequency;
+ } else {
+ unsigned int total_weight = 0;
+ int last_kf_interval =
+ (cpi->frames_since_key > 0) ? cpi->frames_since_key : 1;
+
+ /* reset keyframe context and calculate weighted average of last
+ * KEY_FRAME_CONTEXT keyframes
+ */
+ for (i = 0; i < KEY_FRAME_CONTEXT; i++) {
+ if (i < KEY_FRAME_CONTEXT - 1)
+ cpi->prior_key_frame_distance[i]
+ = cpi->prior_key_frame_distance[i + 1];
+ else
+ cpi->prior_key_frame_distance[i] = last_kf_interval;
+
+ av_key_frame_frequency += prior_key_frame_weight[i]
+ * cpi->prior_key_frame_distance[i];
+ total_weight += prior_key_frame_weight[i];
+ }
+
+ av_key_frame_frequency /= total_weight;
+
+ }
+ return av_key_frame_frequency;
+}
+
+
+void vp9_adjust_key_frame_context(VP9_COMP *cpi) {
+ // Clear down mmx registers to allow floating point in what follows
+ vp9_clear_system_state();
+
+ cpi->frames_since_key = 0;
+ cpi->key_frame_count++;
+}
+
+
+void vp9_compute_frame_size_bounds(VP9_COMP *cpi, int *frame_under_shoot_limit,
+ int *frame_over_shoot_limit) {
+ // Set-up bounds on acceptable frame size:
+ if (cpi->oxcf.fixed_q >= 0) {
+ // Fixed Q scenario: frame size never outranges target (there is no target!)
+ *frame_under_shoot_limit = 0;
+ *frame_over_shoot_limit = INT_MAX;
+ } else {
+ if (cpi->common.frame_type == KEY_FRAME) {
+ *frame_over_shoot_limit = cpi->this_frame_target * 9 / 8;
+ *frame_under_shoot_limit = cpi->this_frame_target * 7 / 8;
+ } else {
+ if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame) {
+ *frame_over_shoot_limit = cpi->this_frame_target * 9 / 8;
+ *frame_under_shoot_limit = cpi->this_frame_target * 7 / 8;
+ } else {
+ // Stron overshoot limit for constrained quality
+ if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) {
+ *frame_over_shoot_limit = cpi->this_frame_target * 11 / 8;
+ *frame_under_shoot_limit = cpi->this_frame_target * 2 / 8;
+ } else {
+ *frame_over_shoot_limit = cpi->this_frame_target * 11 / 8;
+ *frame_under_shoot_limit = cpi->this_frame_target * 5 / 8;
+ }
+ }
+ }
+
+ // For very small rate targets where the fractional adjustment
+ // (eg * 7/8) may be tiny make sure there is at least a minimum
+ // range.
+ *frame_over_shoot_limit += 200;
+ *frame_under_shoot_limit -= 200;
+ if (*frame_under_shoot_limit < 0)
+ *frame_under_shoot_limit = 0;
+ }
+}
+
+
+// return of 0 means drop frame
+int vp9_pick_frame_size(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+
+ if (cm->frame_type == KEY_FRAME)
+ calc_iframe_target_size(cpi);
+ else
+ calc_pframe_target_size(cpi);
+
+ return 1;
+}
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
new file mode 100644
index 0000000..a257211
--- /dev/null
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_ENCODER_VP9_RATECTRL_H_
+#define VP9_ENCODER_VP9_RATECTRL_H_
+
+#include "vp9/encoder/vp9_onyx_int.h"
+
+#define FRAME_OVERHEAD_BITS 200
+
+extern void vp9_save_coding_context(VP9_COMP *cpi);
+extern void vp9_restore_coding_context(VP9_COMP *cpi);
+
+extern void vp9_setup_key_frame(VP9_COMP *cpi);
+extern void vp9_update_rate_correction_factors(VP9_COMP *cpi, int damp_var);
+extern int vp9_regulate_q(VP9_COMP *cpi, int target_bits_per_frame);
+extern void vp9_adjust_key_frame_context(VP9_COMP *cpi);
+extern void vp9_compute_frame_size_bounds(VP9_COMP *cpi,
+ int *frame_under_shoot_limit,
+ int *frame_over_shoot_limit);
+
+// return of 0 means drop frame
+extern int vp9_pick_frame_size(VP9_COMP *cpi);
+
+extern double vp9_convert_qindex_to_q(int qindex);
+extern int vp9_gfboost_qadjust(int qindex);
+extern int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex);
+void vp9_setup_inter_frame(VP9_COMP *cpi);
+
+#endif
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
new file mode 100644
index 0000000..27decb9
--- /dev/null
+++ b/vp9/encoder/vp9_rdopt.c
@@ -0,0 +1,4854 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <stdio.h>
+#include <math.h>
+#include <limits.h>
+#include <assert.h>
+#include "vp9/common/vp9_pragmas.h"
+
+#include "vp9/encoder/vp9_tokenize.h"
+#include "vp9/encoder/vp9_treewriter.h"
+#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_modecosts.h"
+#include "vp9/encoder/vp9_encodeintra.h"
+#include "vp9/common/vp9_entropymode.h"
+#include "vp9/common/vp9_reconinter.h"
+#include "vp9/common/vp9_reconintra.h"
+#include "vp9/common/vp9_reconintra4x4.h"
+#include "vp9/common/vp9_findnearmv.h"
+#include "vp9/common/vp9_quant_common.h"
+#include "vp9/encoder/vp9_encodemb.h"
+#include "vp9/encoder/vp9_quantize.h"
+#include "vp9/encoder/vp9_variance.h"
+#include "vp9/encoder/vp9_mcomp.h"
+#include "vp9/encoder/vp9_rdopt.h"
+#include "vp9/encoder/vp9_ratectrl.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vp9/common/vp9_systemdependent.h"
+#include "vp9/encoder/vp9_encodemv.h"
+
+#include "vp9/common/vp9_seg_common.h"
+#include "vp9/common/vp9_pred_common.h"
+#include "vp9/common/vp9_entropy.h"
+#include "vp9_rtcd.h"
+#include "vp9/common/vp9_mvref_common.h"
+
+#define MAXF(a,b) (((a) > (b)) ? (a) : (b))
+
+#define INVALID_MV 0x80008000
+
+/* Factor to weigh the rate for switchable interp filters */
+#define SWITCHABLE_INTERP_RATE_FACTOR 1
+
+static const int auto_speed_thresh[17] = {
+ 1000,
+ 200,
+ 150,
+ 130,
+ 150,
+ 125,
+ 120,
+ 115,
+ 115,
+ 115,
+ 115,
+ 115,
+ 115,
+ 115,
+ 115,
+ 115,
+ 105
+};
+
+#if CONFIG_PRED_FILTER
+const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
+ {ZEROMV, LAST_FRAME, NONE, 0},
+ {ZEROMV, LAST_FRAME, NONE, 1},
+ {DC_PRED, INTRA_FRAME, NONE, 0},
+
+ {NEARESTMV, LAST_FRAME, NONE, 0},
+ {NEARESTMV, LAST_FRAME, NONE, 1},
+ {NEARMV, LAST_FRAME, NONE, 0},
+ {NEARMV, LAST_FRAME, NONE, 1},
+
+ {ZEROMV, GOLDEN_FRAME, NONE, 0},
+ {ZEROMV, GOLDEN_FRAME, NONE, 1},
+ {NEARESTMV, GOLDEN_FRAME, NONE, 0},
+ {NEARESTMV, GOLDEN_FRAME, NONE, 1},
+
+ {ZEROMV, ALTREF_FRAME, NONE, 0},
+ {ZEROMV, ALTREF_FRAME, NONE, 1},
+ {NEARESTMV, ALTREF_FRAME, NONE, 0},
+ {NEARESTMV, ALTREF_FRAME, NONE, 1},
+
+ {NEARMV, GOLDEN_FRAME, NONE, 0},
+ {NEARMV, GOLDEN_FRAME, NONE, 1},
+ {NEARMV, ALTREF_FRAME, NONE, 0},
+ {NEARMV, ALTREF_FRAME, NONE, 1},
+
+ {V_PRED, INTRA_FRAME, NONE, 0},
+ {H_PRED, INTRA_FRAME, NONE, 0},
+ {D45_PRED, INTRA_FRAME, NONE, 0},
+ {D135_PRED, INTRA_FRAME, NONE, 0},
+ {D117_PRED, INTRA_FRAME, NONE, 0},
+ {D153_PRED, INTRA_FRAME, NONE, 0},
+ {D27_PRED, INTRA_FRAME, NONE, 0},
+ {D63_PRED, INTRA_FRAME, NONE, 0},
+
+ {TM_PRED, INTRA_FRAME, NONE, 0},
+
+ {NEWMV, LAST_FRAME, NONE, 0},
+ {NEWMV, LAST_FRAME, NONE, 1},
+ {NEWMV, GOLDEN_FRAME, NONE, 0},
+ {NEWMV, GOLDEN_FRAME, NONE, 1},
+ {NEWMV, ALTREF_FRAME, NONE, 0},
+ {NEWMV, ALTREF_FRAME, NONE, 1},
+
+ {SPLITMV, LAST_FRAME, NONE, 0},
+ {SPLITMV, GOLDEN_FRAME, NONE, 0},
+ {SPLITMV, ALTREF_FRAME, NONE, 0},
+
+ {B_PRED, INTRA_FRAME, NONE, 0},
+ {I8X8_PRED, INTRA_FRAME, NONE, 0},
+
+ /* compound prediction modes */
+ {ZEROMV, LAST_FRAME, GOLDEN_FRAME, 0},
+ {NEARESTMV, LAST_FRAME, GOLDEN_FRAME, 0},
+ {NEARMV, LAST_FRAME, GOLDEN_FRAME, 0},
+
+ {ZEROMV, ALTREF_FRAME, LAST_FRAME, 0},
+ {NEARESTMV, ALTREF_FRAME, LAST_FRAME, 0},
+ {NEARMV, ALTREF_FRAME, LAST_FRAME, 0},
+
+ {ZEROMV, GOLDEN_FRAME, ALTREF_FRAME, 0},
+ {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME, 0},
+ {NEARMV, GOLDEN_FRAME, ALTREF_FRAME, 0},
+
+ {NEWMV, LAST_FRAME, GOLDEN_FRAME, 0},
+ {NEWMV, ALTREF_FRAME, LAST_FRAME, 0},
+ {NEWMV, GOLDEN_FRAME, ALTREF_FRAME, 0},
+
+ {SPLITMV, LAST_FRAME, GOLDEN_FRAME, 0},
+ {SPLITMV, ALTREF_FRAME, LAST_FRAME, 0},
+ {SPLITMV, GOLDEN_FRAME, ALTREF_FRAME, 0},
+
+#if CONFIG_COMP_INTERINTRA_PRED
+ /* compound inter-intra prediction */
+ {ZEROMV, LAST_FRAME, INTRA_FRAME, 0},
+ {NEARESTMV, LAST_FRAME, INTRA_FRAME, 0},
+ {NEARMV, LAST_FRAME, INTRA_FRAME, 0},
+ {NEWMV, LAST_FRAME, INTRA_FRAME, 0},
+
+ {ZEROMV, GOLDEN_FRAME, INTRA_FRAME, 0},
+ {NEARESTMV, GOLDEN_FRAME, INTRA_FRAME, 0},
+ {NEARMV, GOLDEN_FRAME, INTRA_FRAME, 0},
+ {NEWMV, GOLDEN_FRAME, INTRA_FRAME, 0},
+
+ {ZEROMV, ALTREF_FRAME, INTRA_FRAME, 0},
+ {NEARESTMV, ALTREF_FRAME, INTRA_FRAME, 0},
+ {NEARMV, ALTREF_FRAME, INTRA_FRAME, 0},
+ {NEWMV, ALTREF_FRAME, INTRA_FRAME, 0},
+#endif
+};
+#else
+const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
+ {ZEROMV, LAST_FRAME, NONE},
+ {DC_PRED, INTRA_FRAME, NONE},
+
+ {NEARESTMV, LAST_FRAME, NONE},
+ {NEARMV, LAST_FRAME, NONE},
+
+ {ZEROMV, GOLDEN_FRAME, NONE},
+ {NEARESTMV, GOLDEN_FRAME, NONE},
+
+ {ZEROMV, ALTREF_FRAME, NONE},
+ {NEARESTMV, ALTREF_FRAME, NONE},
+
+ {NEARMV, GOLDEN_FRAME, NONE},
+ {NEARMV, ALTREF_FRAME, NONE},
+
+ {V_PRED, INTRA_FRAME, NONE},
+ {H_PRED, INTRA_FRAME, NONE},
+ {D45_PRED, INTRA_FRAME, NONE},
+ {D135_PRED, INTRA_FRAME, NONE},
+ {D117_PRED, INTRA_FRAME, NONE},
+ {D153_PRED, INTRA_FRAME, NONE},
+ {D27_PRED, INTRA_FRAME, NONE},
+ {D63_PRED, INTRA_FRAME, NONE},
+
+ {TM_PRED, INTRA_FRAME, NONE},
+
+ {NEWMV, LAST_FRAME, NONE},
+ {NEWMV, GOLDEN_FRAME, NONE},
+ {NEWMV, ALTREF_FRAME, NONE},
+
+ {SPLITMV, LAST_FRAME, NONE},
+ {SPLITMV, GOLDEN_FRAME, NONE},
+ {SPLITMV, ALTREF_FRAME, NONE},
+
+ {B_PRED, INTRA_FRAME, NONE},
+ {I8X8_PRED, INTRA_FRAME, NONE},
+
+ /* compound prediction modes */
+ {ZEROMV, LAST_FRAME, GOLDEN_FRAME},
+ {NEARESTMV, LAST_FRAME, GOLDEN_FRAME},
+ {NEARMV, LAST_FRAME, GOLDEN_FRAME},
+
+ {ZEROMV, ALTREF_FRAME, LAST_FRAME},
+ {NEARESTMV, ALTREF_FRAME, LAST_FRAME},
+ {NEARMV, ALTREF_FRAME, LAST_FRAME},
+
+ {ZEROMV, GOLDEN_FRAME, ALTREF_FRAME},
+ {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
+ {NEARMV, GOLDEN_FRAME, ALTREF_FRAME},
+
+ {NEWMV, LAST_FRAME, GOLDEN_FRAME},
+ {NEWMV, ALTREF_FRAME, LAST_FRAME },
+ {NEWMV, GOLDEN_FRAME, ALTREF_FRAME},
+
+ {SPLITMV, LAST_FRAME, GOLDEN_FRAME},
+ {SPLITMV, ALTREF_FRAME, LAST_FRAME },
+ {SPLITMV, GOLDEN_FRAME, ALTREF_FRAME},
+
+#if CONFIG_COMP_INTERINTRA_PRED
+ /* compound inter-intra prediction */
+ {ZEROMV, LAST_FRAME, INTRA_FRAME},
+ {NEARESTMV, LAST_FRAME, INTRA_FRAME},
+ {NEARMV, LAST_FRAME, INTRA_FRAME},
+ {NEWMV, LAST_FRAME, INTRA_FRAME},
+
+ {ZEROMV, GOLDEN_FRAME, INTRA_FRAME},
+ {NEARESTMV, GOLDEN_FRAME, INTRA_FRAME},
+ {NEARMV, GOLDEN_FRAME, INTRA_FRAME},
+ {NEWMV, GOLDEN_FRAME, INTRA_FRAME},
+
+ {ZEROMV, ALTREF_FRAME, INTRA_FRAME},
+ {NEARESTMV, ALTREF_FRAME, INTRA_FRAME},
+ {NEARMV, ALTREF_FRAME, INTRA_FRAME},
+ {NEWMV, ALTREF_FRAME, INTRA_FRAME},
+#endif
+};
+#endif
+
+static void fill_token_costs(
+ unsigned int (*c)[COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS],
+ const vp9_prob(*p)[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES],
+ int block_type_counts) {
+ int i, j, k;
+
+ for (i = 0; i < block_type_counts; i++)
+ for (j = 0; j < COEF_BANDS; j++)
+ for (k = 0; k < PREV_COEF_CONTEXTS; k++) {
+ if (k == 0 && ((j > 0 && i > 0) || (j > 1 && i == 0)))
+ vp9_cost_tokens_skip((int *)(c[i][j][k]),
+ p[i][j][k],
+ vp9_coef_tree);
+ else
+ vp9_cost_tokens((int *)(c[i][j][k]),
+ p[i][j][k],
+ vp9_coef_tree);
+ }
+}
+
+
+static int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, };
+
+// 3* dc_qlookup[Q]*dc_qlookup[Q];
+
+/* values are now correlated to quantizer */
+static int sad_per_bit16lut[QINDEX_RANGE];
+static int sad_per_bit4lut[QINDEX_RANGE];
+
+void vp9_init_me_luts() {
+ int i;
+
+ // Initialize the sad lut tables using a formulaic calculation for now
+ // This is to make it easier to resolve the impact of experimental changes
+ // to the quantizer tables.
+ for (i = 0; i < QINDEX_RANGE; i++) {
+ sad_per_bit16lut[i] =
+ (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
+ sad_per_bit4lut[i] = (int)((0.063 * vp9_convert_qindex_to_q(i)) + 2.742);
+ }
+}
+
+static int compute_rd_mult(int qindex) {
+ int q;
+
+ q = vp9_dc_quant(qindex, 0);
+ return (11 * q * q) >> 6;
+}
+
+void vp9_initialize_me_consts(VP9_COMP *cpi, int QIndex) {
+ cpi->mb.sadperbit16 = sad_per_bit16lut[QIndex];
+ cpi->mb.sadperbit4 = sad_per_bit4lut[QIndex];
+}
+
+
+void vp9_initialize_rd_consts(VP9_COMP *cpi, int QIndex) {
+ int q, i;
+
+ vp9_clear_system_state(); // __asm emms;
+
+ // Further tests required to see if optimum is different
+ // for key frames, golden frames and arf frames.
+ // if (cpi->common.refresh_golden_frame ||
+ // cpi->common.refresh_alt_ref_frame)
+ QIndex = (QIndex < 0) ? 0 : ((QIndex > MAXQ) ? MAXQ : QIndex);
+
+ cpi->RDMULT = compute_rd_mult(QIndex);
+
+ // Extend rate multiplier along side quantizer zbin increases
+ if (cpi->zbin_over_quant > 0) {
+ double oq_factor;
+
+ // Experimental code using the same basic equation as used for Q above
+ // The units of cpi->zbin_over_quant are 1/128 of Q bin size
+ oq_factor = 1.0 + ((double)0.0015625 * cpi->zbin_over_quant);
+ cpi->RDMULT = (int)((double)cpi->RDMULT * oq_factor * oq_factor);
+ }
+
+ if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
+ if (cpi->twopass.next_iiratio > 31)
+ cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
+ else
+ cpi->RDMULT +=
+ (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
+ }
+
+ if (cpi->RDMULT < 7)
+ cpi->RDMULT = 7;
+
+ cpi->mb.errorperbit = (cpi->RDMULT / 110);
+ cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
+
+ vp9_set_speed_features(cpi);
+
+ q = (int)pow(vp9_dc_quant(QIndex, 0) >> 2, 1.25);
+ q = q << 2;
+ cpi->RDMULT = cpi->RDMULT << 4;
+
+ if (q < 8)
+ q = 8;
+
+ if (cpi->RDMULT > 1000) {
+ cpi->RDDIV = 1;
+ cpi->RDMULT /= 100;
+
+ for (i = 0; i < MAX_MODES; i++) {
+ if (cpi->sf.thresh_mult[i] < INT_MAX) {
+ cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
+ } else {
+ cpi->rd_threshes[i] = INT_MAX;
+ }
+
+ cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
+ }
+ } else {
+ cpi->RDDIV = 100;
+
+ for (i = 0; i < MAX_MODES; i++) {
+ if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
+ cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
+ } else {
+ cpi->rd_threshes[i] = INT_MAX;
+ }
+
+ cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
+ }
+ }
+
+ fill_token_costs(
+ cpi->mb.token_costs[TX_4X4],
+ (const vp9_prob( *)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs,
+ BLOCK_TYPES);
+ fill_token_costs(
+ cpi->mb.hybrid_token_costs[TX_4X4],
+ (const vp9_prob( *)[8][PREV_COEF_CONTEXTS][11])
+ cpi->common.fc.hybrid_coef_probs,
+ BLOCK_TYPES);
+
+ fill_token_costs(
+ cpi->mb.token_costs[TX_8X8],
+ (const vp9_prob( *)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs_8x8,
+ BLOCK_TYPES_8X8);
+ fill_token_costs(
+ cpi->mb.hybrid_token_costs[TX_8X8],
+ (const vp9_prob( *)[8][PREV_COEF_CONTEXTS][11])
+ cpi->common.fc.hybrid_coef_probs_8x8,
+ BLOCK_TYPES_8X8);
+
+ fill_token_costs(
+ cpi->mb.token_costs[TX_16X16],
+ (const vp9_prob(*)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs_16x16,
+ BLOCK_TYPES_16X16);
+ fill_token_costs(
+ cpi->mb.hybrid_token_costs[TX_16X16],
+ (const vp9_prob(*)[8][PREV_COEF_CONTEXTS][11])
+ cpi->common.fc.hybrid_coef_probs_16x16,
+ BLOCK_TYPES_16X16);
+
+ /*rough estimate for costing*/
+ cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
+ vp9_init_mode_costs(cpi);
+
+ if (cpi->common.frame_type != KEY_FRAME)
+ {
+ vp9_build_nmv_cost_table(
+ cpi->mb.nmvjointcost,
+ cpi->mb.e_mbd.allow_high_precision_mv ?
+ cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
+ &cpi->common.fc.nmvc,
+ cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
+ }
+}
+
+int vp9_block_error_c(short *coeff, short *dqcoeff, int block_size) {
+ int i, error = 0;
+
+ for (i = 0; i < block_size; i++) {
+ int this_diff = coeff[i] - dqcoeff[i];
+ error += this_diff * this_diff;
+ }
+
+ return error;
+}
+
+int vp9_mbblock_error_8x8_c(MACROBLOCK *mb, int dc) {
+ BLOCK *be;
+ BLOCKD *bd;
+ int i, j;
+ int berror, error = 0;
+
+ for (i = 0; i < 16; i+=4) {
+ be = &mb->block[i];
+ bd = &mb->e_mbd.block[i];
+ berror = 0;
+ for (j = dc; j < 64; j++) {
+ int this_diff = be->coeff[j] - bd->dqcoeff[j];
+ berror += this_diff * this_diff;
+ }
+ error += berror;
+ }
+ return error;
+}
+
+int vp9_mbblock_error_c(MACROBLOCK *mb, int dc) {
+ BLOCK *be;
+ BLOCKD *bd;
+ int i, j;
+ int berror, error = 0;
+
+ for (i = 0; i < 16; i++) {
+ be = &mb->block[i];
+ bd = &mb->e_mbd.block[i];
+ berror = 0;
+ for (j = dc; j < 16; j++) {
+ int this_diff = be->coeff[j] - bd->dqcoeff[j];
+ berror += this_diff * this_diff;
+ }
+ error += berror;
+ }
+ return error;
+}
+
+int vp9_mbuverror_c(MACROBLOCK *mb) {
+ BLOCK *be;
+ BLOCKD *bd;
+
+ int i, error = 0;
+
+ for (i = 16; i < 24; i++) {
+ be = &mb->block[i];
+ bd = &mb->e_mbd.block[i];
+
+ error += vp9_block_error_c(be->coeff, bd->dqcoeff, 16);
+ }
+
+ return error;
+}
+
+int vp9_uvsse(MACROBLOCK *x) {
+ unsigned char *uptr, *vptr;
+ unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
+ unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
+ int uv_stride = x->block[16].src_stride;
+
+ unsigned int sse1 = 0;
+ unsigned int sse2 = 0;
+ int mv_row = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.row;
+ int mv_col = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.col;
+ int offset;
+ int pre_stride = x->e_mbd.block[16].pre_stride;
+
+ if (mv_row < 0)
+ mv_row -= 1;
+ else
+ mv_row += 1;
+
+ if (mv_col < 0)
+ mv_col -= 1;
+ else
+ mv_col += 1;
+
+ mv_row /= 2;
+ mv_col /= 2;
+
+ offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
+ uptr = x->e_mbd.pre.u_buffer + offset;
+ vptr = x->e_mbd.pre.v_buffer + offset;
+
+ if ((mv_row | mv_col) & 7) {
+ vp9_sub_pixel_variance8x8(uptr, pre_stride, (mv_col & 7) << 1,
+ (mv_row & 7) << 1, upred_ptr, uv_stride, &sse2);
+ vp9_sub_pixel_variance8x8(vptr, pre_stride, (mv_col & 7) << 1,
+ (mv_row & 7) << 1, vpred_ptr, uv_stride, &sse1);
+ sse2 += sse1;
+ } else {
+ vp9_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2);
+ vp9_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1);
+ sse2 += sse1;
+ }
+ return sse2;
+
+}
+
+static int cost_coeffs_2x2(MACROBLOCK *mb,
+ BLOCKD *b, PLANE_TYPE type,
+ ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
+ int c = (type == PLANE_TYPE_Y_NO_DC); /* start at coef 0, unless Y with Y2 */
+ int eob = b->eob;
+ int pt; /* surrounding block/prev coef predictor */
+ int cost = 0;
+ short *qcoeff_ptr = b->qcoeff;
+
+ VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l);
+ assert(eob <= 4);
+
+ for (; c < eob; c++) {
+ int v = qcoeff_ptr[vp9_default_zig_zag1d[c]];
+ int t = vp9_dct_value_tokens_ptr[v].Token;
+ cost += mb->token_costs[TX_8X8][type][vp9_coef_bands[c]][pt][t];
+ cost += vp9_dct_value_cost_ptr[v];
+ pt = vp9_prev_token_class[t];
+ }
+
+ if (c < 4)
+ cost += mb->token_costs[TX_8X8][type][vp9_coef_bands[c]]
+ [pt] [DCT_EOB_TOKEN];
+ // is eob first coefficient;
+ pt = (c > !type);
+ *a = *l = pt;
+ return cost;
+}
+
+static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type,
+ ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
+ int tx_size) {
+ const int eob = b->eob;
+ int c = (type == PLANE_TYPE_Y_NO_DC); /* start at coef 0, unless Y with Y2 */
+ int cost = 0, default_eob, seg_eob;
+ int pt; /* surrounding block/prev coef predictor */
+ int const *scan, *band;
+ short *qcoeff_ptr = b->qcoeff;
+ MACROBLOCKD *xd = &mb->e_mbd;
+ MB_MODE_INFO *mbmi = &mb->e_mbd.mode_info_context->mbmi;
+ TX_TYPE tx_type = DCT_DCT;
+ int segment_id = mbmi->segment_id;
+ scan = vp9_default_zig_zag1d;
+ band = vp9_coef_bands;
+ default_eob = 16;
+
+ switch (tx_size) {
+ case TX_4X4:
+ if (type == PLANE_TYPE_Y_WITH_DC) {
+ tx_type = get_tx_type_4x4(xd, b);
+ if (tx_type != DCT_DCT) {
+ switch (tx_type) {
+ case ADST_DCT:
+ scan = vp9_row_scan;
+ break;
+
+ case DCT_ADST:
+ scan = vp9_col_scan;
+ break;
+
+ default:
+ scan = vp9_default_zig_zag1d;
+ break;
+ }
+ }
+ }
+
+ break;
+ case TX_8X8:
+ scan = vp9_default_zig_zag1d_8x8;
+ band = vp9_coef_bands_8x8;
+ default_eob = 64;
+ if (type == PLANE_TYPE_Y_WITH_DC) {
+ BLOCKD *bb;
+ int ib = (int)(b - xd->block);
+ if (ib < 16) {
+ ib = (ib & 8) + ((ib & 4) >> 1);
+ bb = xd->block + ib;
+ tx_type = get_tx_type_8x8(xd, bb);
+ }
+ }
+ break;
+ case TX_16X16:
+ scan = vp9_default_zig_zag1d_16x16;
+ band = vp9_coef_bands_16x16;
+ default_eob = 256;
+ if (type == PLANE_TYPE_Y_WITH_DC) {
+ tx_type = get_tx_type_16x16(xd, b);
+ }
+ break;
+ default:
+ break;
+ }
+ if (vp9_segfeature_active(&mb->e_mbd, segment_id, SEG_LVL_EOB))
+ seg_eob = vp9_get_segdata(&mb->e_mbd, segment_id, SEG_LVL_EOB);
+ else
+ seg_eob = default_eob;
+
+ VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l);
+
+ if (tx_type != DCT_DCT) {
+ for (; c < eob; c++) {
+ int v = qcoeff_ptr[scan[c]];
+ int t = vp9_dct_value_tokens_ptr[v].Token;
+ cost += mb->hybrid_token_costs[tx_size][type][band[c]][pt][t];
+ cost += vp9_dct_value_cost_ptr[v];
+ pt = vp9_prev_token_class[t];
+ }
+ if (c < seg_eob)
+ cost += mb->hybrid_token_costs[tx_size][type][band[c]]
+ [pt][DCT_EOB_TOKEN];
+ } else {
+ for (; c < eob; c++) {
+ int v = qcoeff_ptr[scan[c]];
+ int t = vp9_dct_value_tokens_ptr[v].Token;
+ cost += mb->token_costs[tx_size][type][band[c]][pt][t];
+ cost += vp9_dct_value_cost_ptr[v];
+ pt = vp9_prev_token_class[t];
+ }
+ if (c < seg_eob)
+ cost += mb->token_costs[tx_size][type][band[c]]
+ [pt][DCT_EOB_TOKEN];
+ }
+
+ // is eob first coefficient;
+ pt = (c > !type);
+ *a = *l = pt;
+ return cost;
+}
+
+static int rdcost_mby_4x4(MACROBLOCK *mb, int has_2nd_order, int backup) {
+ int cost = 0;
+ int b;
+ MACROBLOCKD *xd = &mb->e_mbd;
+ ENTROPY_CONTEXT_PLANES t_above, t_left;
+ ENTROPY_CONTEXT *ta;
+ ENTROPY_CONTEXT *tl;
+
+ if (backup) {
+ vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+ ta = (ENTROPY_CONTEXT *)&t_above;
+ tl = (ENTROPY_CONTEXT *)&t_left;
+ } else {
+ ta = (ENTROPY_CONTEXT *)xd->above_context;
+ tl = (ENTROPY_CONTEXT *)xd->left_context;
+ }
+
+ for (b = 0; b < 16; b++)
+ cost += cost_coeffs(mb, xd->block + b,
+ (has_2nd_order ?
+ PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC),
+ ta + vp9_block2above[b], tl + vp9_block2left[b],
+ TX_4X4);
+
+ if (has_2nd_order)
+ cost += cost_coeffs(mb, xd->block + 24, PLANE_TYPE_Y2,
+ ta + vp9_block2above[24], tl + vp9_block2left[24],
+ TX_4X4);
+
+ return cost;
+}
+
+static void macro_block_yrd_4x4(MACROBLOCK *mb,
+ int *Rate,
+ int *Distortion,
+ int *skippable, int backup) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
+ BLOCK *const mb_y2 = mb->block + 24;
+ BLOCKD *const x_y2 = xd->block + 24;
+ int d, has_2nd_order;
+
+ xd->mode_info_context->mbmi.txfm_size = TX_4X4;
+ has_2nd_order = get_2nd_order_usage(xd);
+ // Fdct and building the 2nd order block
+ vp9_transform_mby_4x4(mb);
+ vp9_quantize_mby_4x4(mb);
+ d = vp9_mbblock_error(mb, has_2nd_order);
+ if (has_2nd_order)
+ d += vp9_block_error(mb_y2->coeff, x_y2->dqcoeff, 16);
+
+ *Distortion = (d >> 2);
+ // rate
+ *Rate = rdcost_mby_4x4(mb, has_2nd_order, backup);
+ *skippable = vp9_mby_is_skippable_4x4(&mb->e_mbd, has_2nd_order);
+}
+
+static int rdcost_mby_8x8(MACROBLOCK *mb, int has_2nd_order, int backup) {
+ int cost = 0;
+ int b;
+ MACROBLOCKD *xd = &mb->e_mbd;
+ ENTROPY_CONTEXT_PLANES t_above, t_left;
+ ENTROPY_CONTEXT *ta;
+ ENTROPY_CONTEXT *tl;
+
+ if (backup) {
+ vpx_memcpy(&t_above,xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+ ta = (ENTROPY_CONTEXT *)&t_above;
+ tl = (ENTROPY_CONTEXT *)&t_left;
+ } else {
+ ta = (ENTROPY_CONTEXT *)mb->e_mbd.above_context;
+ tl = (ENTROPY_CONTEXT *)mb->e_mbd.left_context;
+ }
+
+ for (b = 0; b < 16; b += 4)
+ cost += cost_coeffs(mb, xd->block + b,
+ (has_2nd_order ?
+ PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC),
+ ta + vp9_block2above_8x8[b], tl + vp9_block2left_8x8[b],
+ TX_8X8);
+
+ if (has_2nd_order)
+ cost += cost_coeffs_2x2(mb, xd->block + 24, PLANE_TYPE_Y2,
+ ta + vp9_block2above[24], tl + vp9_block2left[24]);
+ return cost;
+}
+
+static void macro_block_yrd_8x8(MACROBLOCK *mb,
+ int *Rate,
+ int *Distortion,
+ int *skippable, int backup) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
+ BLOCK *const mb_y2 = mb->block + 24;
+ BLOCKD *const x_y2 = xd->block + 24;
+ int d, has_2nd_order;
+
+ xd->mode_info_context->mbmi.txfm_size = TX_8X8;
+
+ vp9_transform_mby_8x8(mb);
+ vp9_quantize_mby_8x8(mb);
+ has_2nd_order = get_2nd_order_usage(xd);
+ d = vp9_mbblock_error_8x8_c(mb, has_2nd_order);
+ if (has_2nd_order)
+ d += vp9_block_error(mb_y2->coeff, x_y2->dqcoeff, 16);
+
+ *Distortion = (d >> 2);
+ // rate
+ *Rate = rdcost_mby_8x8(mb, has_2nd_order, backup);
+ *skippable = vp9_mby_is_skippable_8x8(&mb->e_mbd, has_2nd_order);
+}
+
+static int rdcost_mby_16x16(MACROBLOCK *mb, int backup) {
+ int cost;
+ MACROBLOCKD *xd = &mb->e_mbd;
+ ENTROPY_CONTEXT_PLANES t_above, t_left;
+ ENTROPY_CONTEXT *ta, *tl;
+
+ if (backup) {
+ vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+ ta = (ENTROPY_CONTEXT *)&t_above;
+ tl = (ENTROPY_CONTEXT *)&t_left;
+ } else {
+ ta = (ENTROPY_CONTEXT *)xd->above_context;
+ tl = (ENTROPY_CONTEXT *)xd->left_context;
+ }
+
+ cost = cost_coeffs(mb, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16);
+ return cost;
+}
+
+static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion,
+ int *skippable, int backup) {
+ int d;
+ MACROBLOCKD *xd = &mb->e_mbd;
+
+ xd->mode_info_context->mbmi.txfm_size = TX_16X16;
+ vp9_transform_mby_16x16(mb);
+ vp9_quantize_mby_16x16(mb);
+ // TODO(jingning) is it possible to quickly determine whether to force
+ // trailing coefficients to be zero, instead of running trellis
+ // optimization in the rate-distortion optimization loop?
+ if (mb->e_mbd.mode_info_context->mbmi.mode < I8X8_PRED)
+ vp9_optimize_mby_16x16(mb);
+
+ d = vp9_mbblock_error(mb, 0);
+
+ *Distortion = (d >> 2);
+ // rate
+ *Rate = rdcost_mby_16x16(mb, backup);
+ *skippable = vp9_mby_is_skippable_16x16(&mb->e_mbd);
+}
+
+static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
+ int r[2][TX_SIZE_MAX], int *rate,
+ int d[TX_SIZE_MAX], int *distortion,
+ int s[TX_SIZE_MAX], int *skip,
+ int64_t txfm_cache[NB_TXFM_MODES]) {
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
+ vp9_prob skip_prob = cm->mb_no_coeff_skip ?
+ vp9_get_pred_prob(cm, xd, PRED_MBSKIP) : 128;
+ int64_t rd[2][TX_SIZE_MAX];
+ int n;
+
+ r[1][TX_16X16] = r[0][TX_16X16] + vp9_cost_one(cm->prob_tx[0]) +
+ vp9_cost_one(cm->prob_tx[1]);
+ r[1][TX_8X8] = r[0][TX_8X8] + vp9_cost_one(cm->prob_tx[0]) +
+ vp9_cost_zero(cm->prob_tx[1]);
+ r[1][TX_4X4] = r[0][TX_4X4] + vp9_cost_zero(cm->prob_tx[0]);
+
+ if (cm->mb_no_coeff_skip) {
+ int s0, s1;
+
+ assert(skip_prob > 0);
+ s0 = vp9_cost_bit(skip_prob, 0);
+ s1 = vp9_cost_bit(skip_prob, 1);
+
+ for (n = TX_4X4; n <= TX_16X16; n++) {
+ if (s[n]) {
+ rd[0][n] = rd[1][n] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
+ } else {
+ rd[0][n] = RDCOST(x->rdmult, x->rddiv, r[0][n] + s0, d[n]);
+ rd[1][n] = RDCOST(x->rdmult, x->rddiv, r[1][n] + s0, d[n]);
+ }
+ }
+ } else {
+ for (n = TX_4X4; n <= TX_16X16; n++) {
+ rd[0][n] = RDCOST(x->rdmult, x->rddiv, r[0][n], d[n]);
+ rd[1][n] = RDCOST(x->rdmult, x->rddiv, r[1][n], d[n]);
+ }
+ }
+
+ if ( cm->txfm_mode == ALLOW_16X16 ||
+ (cm->txfm_mode == TX_MODE_SELECT &&
+ rd[1][TX_16X16] < rd[1][TX_8X8] && rd[1][TX_16X16] < rd[1][TX_4X4])) {
+ mbmi->txfm_size = TX_16X16;
+ } else if (cm->txfm_mode == ALLOW_8X8 ||
+ (cm->txfm_mode == TX_MODE_SELECT && rd[1][TX_8X8] < rd[1][TX_4X4])) {
+ mbmi->txfm_size = TX_8X8;
+ } else {
+ assert(cm->txfm_mode == ONLY_4X4 ||
+ (cm->txfm_mode == TX_MODE_SELECT && rd[1][TX_4X4] <= rd[1][TX_8X8]));
+ mbmi->txfm_size = TX_4X4;
+ }
+
+ *distortion = d[mbmi->txfm_size];
+ *rate = r[cm->txfm_mode == TX_MODE_SELECT][mbmi->txfm_size];
+ *skip = s[mbmi->txfm_size];
+
+ txfm_cache[ONLY_4X4] = rd[0][TX_4X4];
+ txfm_cache[ALLOW_8X8] = rd[0][TX_8X8];
+ txfm_cache[ALLOW_16X16] = rd[0][TX_16X16];
+ if (rd[1][TX_16X16] < rd[1][TX_8X8] && rd[1][TX_16X16] < rd[1][TX_4X4])
+ txfm_cache[TX_MODE_SELECT] = rd[1][TX_16X16];
+ else
+ txfm_cache[TX_MODE_SELECT] = rd[1][TX_4X4] < rd[1][TX_8X8] ?
+ rd[1][TX_4X4] : rd[1][TX_8X8];
+}
+
+static void macro_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
+ int *distortion, int *skippable,
+ int64_t txfm_cache[NB_TXFM_MODES]) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int r[2][TX_SIZE_MAX], d[TX_SIZE_MAX], s[TX_SIZE_MAX];
+
+ vp9_subtract_mby(x->src_diff, *(x->block[0].base_src), xd->predictor,
+ x->block[0].src_stride);
+
+ macro_block_yrd_16x16(x, &r[0][TX_16X16], &d[TX_16X16],
+ &s[TX_16X16], 1);
+ macro_block_yrd_8x8(x, &r[0][TX_8X8], &d[TX_8X8], &s[TX_8X8], 1);
+ macro_block_yrd_4x4(x, &r[0][TX_4X4], &d[TX_4X4], &s[TX_4X4], 1);
+
+ choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skippable,
+ txfm_cache);
+}
+
+static void copy_predictor(unsigned char *dst, const unsigned char *predictor) {
+ const unsigned int *p = (const unsigned int *)predictor;
+ unsigned int *d = (unsigned int *)dst;
+ d[0] = p[0];
+ d[4] = p[4];
+ d[8] = p[8];
+ d[12] = p[12];
+}
+
+#if CONFIG_SUPERBLOCKS
+static void super_block_yrd(VP9_COMP *cpi,
+ MACROBLOCK *x, int *rate, int *distortion,
+ int *skip,
+ int64_t txfm_cache[NB_TXFM_MODES]) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int r[2][TX_SIZE_MAX], d[TX_SIZE_MAX], s[TX_SIZE_MAX], n;
+ const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
+ int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
+ ENTROPY_CONTEXT_PLANES t_above[3][2], *orig_above = xd->above_context;
+ ENTROPY_CONTEXT_PLANES t_left[3][2], *orig_left = xd->left_context;
+
+ for (n = TX_4X4; n <= TX_16X16; n++) {
+ vpx_memcpy(t_above[n], xd->above_context, sizeof(t_above[n]));
+ vpx_memcpy(t_left[n], xd->left_context, sizeof(t_left[n]));
+ r[0][n] = 0;
+ d[n] = 0;
+ s[n] = 1;
+ }
+
+ for (n = 0; n < 4; n++) {
+ int x_idx = n & 1, y_idx = n >> 1;
+ int r_tmp, d_tmp, s_tmp;
+
+ vp9_subtract_mby_s_c(x->src_diff,
+ src + x_idx * 16 + y_idx * 16 * src_y_stride,
+ src_y_stride,
+ dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
+ dst_y_stride);
+
+ xd->above_context = &t_above[TX_16X16][x_idx];
+ xd->left_context = &t_left[TX_16X16][y_idx];
+ macro_block_yrd_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0);
+ d[TX_16X16] += d_tmp;
+ r[0][TX_16X16] += r_tmp;
+ s[TX_16X16] = s[TX_16X16] && s_tmp;
+
+ xd->above_context = &t_above[TX_4X4][x_idx];
+ xd->left_context = &t_left[TX_4X4][y_idx];
+ macro_block_yrd_4x4(x, &r_tmp, &d_tmp, &s_tmp, 0);
+ d[TX_4X4] += d_tmp;
+ r[0][TX_4X4] += r_tmp;
+ s[TX_4X4] = s[TX_4X4] && s_tmp;
+
+ xd->above_context = &t_above[TX_8X8][x_idx];
+ xd->left_context = &t_left[TX_8X8][y_idx];
+ macro_block_yrd_8x8(x, &r_tmp, &d_tmp, &s_tmp, 0);
+ d[TX_8X8] += d_tmp;
+ r[0][TX_8X8] += r_tmp;
+ s[TX_8X8] = s[TX_8X8] && s_tmp;
+ }
+
+ choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache);
+
+ xd->above_context = orig_above;
+ xd->left_context = orig_left;
+}
+#endif
+
+static void copy_predictor_8x8(unsigned char *dst, const unsigned char *predictor) {
+ const unsigned int *p = (const unsigned int *)predictor;
+ unsigned int *d = (unsigned int *)dst;
+ d[0] = p[0];
+ d[1] = p[1];
+ d[4] = p[4];
+ d[5] = p[5];
+ d[8] = p[8];
+ d[9] = p[9];
+ d[12] = p[12];
+ d[13] = p[13];
+ d[16] = p[16];
+ d[17] = p[17];
+ d[20] = p[20];
+ d[21] = p[21];
+ d[24] = p[24];
+ d[25] = p[25];
+ d[28] = p[28];
+ d[29] = p[29];
+}
+
+static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
+ BLOCKD *b, B_PREDICTION_MODE *best_mode,
+#if CONFIG_COMP_INTRA_PRED
+ B_PREDICTION_MODE *best_second_mode,
+ int allow_comp,
+#endif
+ int *bmode_costs,
+ ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
+ int *bestrate, int *bestratey,
+ int *bestdistortion) {
+ B_PREDICTION_MODE mode;
+ MACROBLOCKD *xd = &x->e_mbd;
+
+#if CONFIG_COMP_INTRA_PRED
+ B_PREDICTION_MODE mode2;
+#endif
+ int64_t best_rd = INT64_MAX;
+ int rate = 0;
+ int distortion;
+
+ ENTROPY_CONTEXT ta = *a, tempa = *a;
+ ENTROPY_CONTEXT tl = *l, templ = *l;
+ TX_TYPE tx_type = DCT_DCT;
+ TX_TYPE best_tx_type = DCT_DCT;
+ /*
+ * The predictor buffer is a 2d buffer with a stride of 16. Create
+ * a temp buffer that meets the stride requirements, but we are only
+ * interested in the left 4x4 block
+ * */
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, best_predictor, 16 * 4);
+ DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16);
+
+#if CONFIG_NEWBINTRAMODES
+ b->bmi.as_mode.context = vp9_find_bpred_context(b);
+#endif
+ for (mode = B_DC_PRED; mode < LEFT4X4; mode++) {
+#if CONFIG_COMP_INTRA_PRED
+ for (mode2 = (allow_comp ? 0 : (B_DC_PRED - 1));
+ mode2 != (allow_comp ? (mode + 1) : 0); mode2++) {
+#endif
+ int64_t this_rd;
+ int ratey;
+
+#if CONFIG_NEWBINTRAMODES
+ if (xd->frame_type == KEY_FRAME) {
+ if (mode == B_CONTEXT_PRED) continue;
+#if CONFIG_COMP_INTRA_PRED
+ if (mode2 == B_CONTEXT_PRED) continue;
+#endif
+ } else {
+ if (mode >= B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS &&
+ mode < B_CONTEXT_PRED)
+ continue;
+#if CONFIG_COMP_INTRA_PRED
+ if (mode2 >= B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS &&
+ mode2 < B_CONTEXT_PRED)
+ continue;
+#endif
+ }
+#endif
+
+ b->bmi.as_mode.first = mode;
+#if CONFIG_NEWBINTRAMODES
+ rate = bmode_costs[
+ mode == B_CONTEXT_PRED ? mode - CONTEXT_PRED_REPLACEMENTS : mode];
+#else
+ rate = bmode_costs[mode];
+#endif
+
+#if CONFIG_COMP_INTRA_PRED
+ if (mode2 == (B_PREDICTION_MODE)(B_DC_PRED - 1)) {
+#endif
+ vp9_intra4x4_predict(b, mode, b->predictor);
+#if CONFIG_COMP_INTRA_PRED
+ } else {
+ vp9_comp_intra4x4_predict(b, mode, mode2, b->predictor);
+#if CONFIG_NEWBINTRAMODES
+ rate += bmode_costs[
+ mode2 == B_CONTEXT_PRED ?
+ mode2 - CONTEXT_PRED_REPLACEMENTS : mode2];
+#else
+ rate += bmode_costs[mode2];
+#endif
+ }
+#endif
+ vp9_subtract_b(be, b, 16);
+
+ b->bmi.as_mode.first = mode;
+ tx_type = get_tx_type_4x4(xd, b);
+ if (tx_type != DCT_DCT) {
+ vp9_fht(be->src_diff, 32, be->coeff, tx_type, 4);
+ vp9_ht_quantize_b_4x4(be, b, tx_type);
+ } else {
+ x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
+ x->quantize_b_4x4(be, b);
+ }
+
+ tempa = ta;
+ templ = tl;
+
+ ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4);
+ rate += ratey;
+ distortion = vp9_block_error(be->coeff, b->dqcoeff, 16) >> 2;
+
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
+
+ if (this_rd < best_rd) {
+ *bestrate = rate;
+ *bestratey = ratey;
+ *bestdistortion = distortion;
+ best_rd = this_rd;
+ *best_mode = mode;
+ best_tx_type = tx_type;
+
+#if CONFIG_COMP_INTRA_PRED
+ *best_second_mode = mode2;
+#endif
+ *a = tempa;
+ *l = templ;
+ copy_predictor(best_predictor, b->predictor);
+ vpx_memcpy(best_dqcoeff, b->dqcoeff, 32);
+ }
+#if CONFIG_COMP_INTRA_PRED
+ }
+#endif
+ }
+ b->bmi.as_mode.first = (B_PREDICTION_MODE)(*best_mode);
+#if CONFIG_COMP_INTRA_PRED
+ b->bmi.as_mode.second = (B_PREDICTION_MODE)(*best_second_mode);
+#endif
+
+ // inverse transform
+ if (best_tx_type != DCT_DCT)
+ vp9_ihtllm(best_dqcoeff, b->diff, 32, best_tx_type, 4, b->eob);
+ else
+ xd->inv_xform4x4_x8(best_dqcoeff, b->diff, 32);
+
+ vp9_recon_b(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+
+ return best_rd;
+}
+
+static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, int *Rate,
+ int *rate_y, int *Distortion, int64_t best_rd,
+#if CONFIG_COMP_INTRA_PRED
+ int allow_comp,
+#endif
+ int update_contexts) {
+ int i;
+ MACROBLOCKD *const xd = &mb->e_mbd;
+ int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
+ int distortion = 0;
+ int tot_rate_y = 0;
+ int64_t total_rd = 0;
+ ENTROPY_CONTEXT_PLANES t_above, t_left;
+ ENTROPY_CONTEXT *ta, *tl;
+ int *bmode_costs;
+
+ if (update_contexts) {
+ ta = (ENTROPY_CONTEXT *)xd->above_context;
+ tl = (ENTROPY_CONTEXT *)xd->left_context;
+ } else {
+ vpx_memcpy(&t_above, xd->above_context,
+ sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left, xd->left_context,
+ sizeof(ENTROPY_CONTEXT_PLANES));
+
+ ta = (ENTROPY_CONTEXT *)&t_above;
+ tl = (ENTROPY_CONTEXT *)&t_left;
+ }
+
+ xd->mode_info_context->mbmi.mode = B_PRED;
+ bmode_costs = mb->inter_bmode_costs;
+
+ for (i = 0; i < 16; i++) {
+ MODE_INFO *const mic = xd->mode_info_context;
+ const int mis = xd->mode_info_stride;
+ B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
+#if CONFIG_COMP_INTRA_PRED
+ B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_second_mode);
+#endif
+ int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d);
+
+ if (xd->frame_type == KEY_FRAME) {
+ const B_PREDICTION_MODE A = above_block_mode(mic, i, mis);
+ const B_PREDICTION_MODE L = left_block_mode(mic, i);
+
+ bmode_costs = mb->bmode_costs[A][L];
+ }
+#if CONFIG_NEWBINTRAMODES
+ mic->bmi[i].as_mode.context = vp9_find_bpred_context(xd->block + i);
+#endif
+
+ total_rd += rd_pick_intra4x4block(
+ cpi, mb, mb->block + i, xd->block + i, &best_mode,
+#if CONFIG_COMP_INTRA_PRED
+ & best_second_mode, allow_comp,
+#endif
+ bmode_costs, ta + vp9_block2above[i],
+ tl + vp9_block2left[i], &r, &ry, &d);
+
+ cost += r;
+ distortion += d;
+ tot_rate_y += ry;
+
+ mic->bmi[i].as_mode.first = best_mode;
+#if CONFIG_COMP_INTRA_PRED
+ mic->bmi[i].as_mode.second = best_second_mode;
+#endif
+
+#if 0 // CONFIG_NEWBINTRAMODES
+ printf("%d %d\n", mic->bmi[i].as_mode.first, mic->bmi[i].as_mode.context);
+#endif
+
+ if (total_rd >= best_rd)
+ break;
+ }
+
+ if (total_rd >= best_rd)
+ return INT64_MAX;
+
+#if CONFIG_COMP_INTRA_PRED
+ cost += vp9_cost_bit(128, allow_comp);
+#endif
+ *Rate = cost;
+ *rate_y = tot_rate_y;
+ *Distortion = distortion;
+
+ return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
+}
+
+#if CONFIG_SUPERBLOCKS
+static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi,
+ MACROBLOCK *x,
+ int *rate,
+ int *rate_tokenonly,
+ int *distortion,
+ int *skippable,
+ int64_t txfm_cache[NB_TXFM_MODES]) {
+ MB_PREDICTION_MODE mode;
+ MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
+ int this_rate, this_rate_tokenonly;
+ int this_distortion, s;
+ int64_t best_rd = INT64_MAX, this_rd;
+
+ /* Y Search for 32x32 intra prediction mode */
+ for (mode = DC_PRED; mode <= TM_PRED; mode++) {
+ x->e_mbd.mode_info_context->mbmi.mode = mode;
+ vp9_build_intra_predictors_sby_s(&x->e_mbd);
+
+ super_block_yrd(cpi, x, &this_rate_tokenonly,
+ &this_distortion, &s, txfm_cache);
+ this_rate = this_rate_tokenonly +
+ x->mbmode_cost[x->e_mbd.frame_type]
+ [x->e_mbd.mode_info_context->mbmi.mode];
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+
+ if (this_rd < best_rd) {
+ mode_selected = mode;
+ best_rd = this_rd;
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ }
+ }
+
+ x->e_mbd.mode_info_context->mbmi.mode = mode_selected;
+
+ return best_rd;
+}
+#endif
+
+static int64_t rd_pick_intra16x16mby_mode(VP9_COMP *cpi,
+ MACROBLOCK *x,
+ int *Rate,
+ int *rate_y,
+ int *Distortion,
+ int *skippable,
+ int64_t txfm_cache[NB_TXFM_MODES]) {
+ MB_PREDICTION_MODE mode;
+ TX_SIZE txfm_size = 0;
+ MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
+#if CONFIG_COMP_INTRA_PRED
+ MB_PREDICTION_MODE mode2;
+ MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode2_selected);
+#endif
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
+ int rate, ratey;
+ int distortion, skip;
+ int64_t best_rd = INT64_MAX;
+ int64_t this_rd;
+
+ int i;
+ for (i = 0; i < NB_TXFM_MODES; i++)
+ txfm_cache[i] = INT64_MAX;
+
+ // Y Search for 16x16 intra prediction mode
+ for (mode = DC_PRED; mode <= TM_PRED; mode++) {
+ int64_t local_txfm_cache[NB_TXFM_MODES];
+
+ mbmi->mode = mode;
+
+#if CONFIG_COMP_INTRA_PRED
+ for (mode2 = DC_PRED - 1; mode2 != TM_PRED + 1; mode2++) {
+ mbmi->second_mode = mode2;
+ if (mode2 == (MB_PREDICTION_MODE)(DC_PRED - 1)) {
+#endif
+ vp9_build_intra_predictors_mby(xd);
+#if CONFIG_COMP_INTRA_PRED
+ } else {
+ continue; // i.e. disable for now
+ vp9_build_comp_intra_predictors_mby(xd);
+ }
+#endif
+
+ macro_block_yrd(cpi, x, &ratey, &distortion, &skip, local_txfm_cache);
+
+ // FIXME add compoundmode cost
+ // FIXME add rate for mode2
+ rate = ratey + x->mbmode_cost[xd->frame_type][mbmi->mode];
+
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
+
+ if (this_rd < best_rd) {
+ mode_selected = mode;
+ txfm_size = mbmi->txfm_size;
+#if CONFIG_COMP_INTRA_PRED
+ mode2_selected = mode2;
+#endif
+ best_rd = this_rd;
+ *Rate = rate;
+ *rate_y = ratey;
+ *Distortion = distortion;
+ *skippable = skip;
+ }
+
+ for (i = 0; i < NB_TXFM_MODES; i++) {
+ int64_t adj_rd = this_rd + local_txfm_cache[i] -
+ local_txfm_cache[cpi->common.txfm_mode];
+ if (adj_rd < txfm_cache[i]) {
+ txfm_cache[i] = adj_rd;
+ }
+ }
+
+#if CONFIG_COMP_INTRA_PRED
+ }
+#endif
+ }
+
+ mbmi->txfm_size = txfm_size;
+ mbmi->mode = mode_selected;
+
+#if CONFIG_COMP_INTRA_PRED
+ mbmi->second_mode = mode2_selected;
+#endif
+ return best_rd;
+}
+
+
+static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
+ B_PREDICTION_MODE *best_mode,
+#if CONFIG_COMP_INTRA_PRED
+ B_PREDICTION_MODE *best_second_mode,
+#endif
+ int *mode_costs,
+ ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
+ int *bestrate, int *bestratey,
+ int *bestdistortion) {
+ MB_PREDICTION_MODE mode;
+#if CONFIG_COMP_INTRA_PRED
+ MB_PREDICTION_MODE mode2;
+#endif
+ MACROBLOCKD *xd = &x->e_mbd;
+ int64_t best_rd = INT64_MAX;
+ int distortion = 0, rate = 0;
+ BLOCK *be = x->block + ib;
+ BLOCKD *b = xd->block + ib;
+ ENTROPY_CONTEXT ta0, ta1, besta0 = 0, besta1 = 0;
+ ENTROPY_CONTEXT tl0, tl1, bestl0 = 0, bestl1 = 0;
+
+ /*
+ * The predictor buffer is a 2d buffer with a stride of 16. Create
+ * a temp buffer that meets the stride requirements, but we are only
+ * interested in the left 8x8 block
+ * */
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, best_predictor, 16 * 8);
+ DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16 * 4);
+
+ // perform transformation of dimension 8x8
+ // note the input and output index mapping
+ int idx = (ib & 0x02) ? (ib + 2) : ib;
+
+ for (mode = DC_PRED; mode <= TM_PRED; mode++) {
+#if CONFIG_COMP_INTRA_PRED
+ for (mode2 = DC_PRED - 1; mode2 != TM_PRED + 1; mode2++) {
+#endif
+ int64_t this_rd;
+ int rate_t = 0;
+
+ // FIXME rate for compound mode and second intrapred mode
+ rate = mode_costs[mode];
+ b->bmi.as_mode.first = mode;
+
+#if CONFIG_COMP_INTRA_PRED
+ if (mode2 == (MB_PREDICTION_MODE)(DC_PRED - 1)) {
+#endif
+ vp9_intra8x8_predict(b, mode, b->predictor);
+#if CONFIG_COMP_INTRA_PRED
+ } else {
+ continue; // i.e. disable for now
+ vp9_comp_intra8x8_predict(b, mode, mode2, b->predictor);
+ }
+#endif
+
+ vp9_subtract_4b_c(be, b, 16);
+
+ assert(get_2nd_order_usage(xd) == 0);
+ if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
+ TX_TYPE tx_type = get_tx_type_8x8(xd, b);
+ if (tx_type != DCT_DCT)
+ vp9_fht(be->src_diff, 32, (x->block + idx)->coeff, tx_type, 8);
+ else
+ x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
+ x->quantize_b_8x8(x->block + idx, xd->block + idx);
+
+ // compute quantization mse of 8x8 block
+ distortion = vp9_block_error_c((x->block + idx)->coeff,
+ (xd->block + idx)->dqcoeff, 64);
+ ta0 = a[vp9_block2above_8x8[idx]];
+ tl0 = l[vp9_block2left_8x8[idx]];
+
+ rate_t = cost_coeffs(x, xd->block + idx, PLANE_TYPE_Y_WITH_DC,
+ &ta0, &tl0, TX_8X8);
+
+ rate += rate_t;
+ ta1 = ta0;
+ tl1 = tl0;
+ } else {
+ static const int iblock[4] = {0, 1, 4, 5};
+ TX_TYPE tx_type;
+ int i;
+ ta0 = a[vp9_block2above[ib]];
+ ta1 = a[vp9_block2above[ib + 1]];
+ tl0 = l[vp9_block2left[ib]];
+ tl1 = l[vp9_block2left[ib + 4]];
+ distortion = 0;
+ rate_t = 0;
+ for (i = 0; i < 4; ++i) {
+ b = &xd->block[ib + iblock[i]];
+ be = &x->block[ib + iblock[i]];
+ tx_type = get_tx_type_4x4(xd, b);
+ if (tx_type != DCT_DCT) {
+ vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4);
+ vp9_ht_quantize_b_4x4(be, b, tx_type);
+ } else {
+ x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
+ x->quantize_b_4x4(be, b);
+ }
+ distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16);
+ rate_t += cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC,
+ // i&1 ? &ta1 : &ta0, i&2 ? &tl1 : &tl0,
+ &ta0, &tl0,
+ TX_4X4);
+ }
+ rate += rate_t;
+ }
+
+ distortion >>= 2;
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
+ if (this_rd < best_rd) {
+ *bestrate = rate;
+ *bestratey = rate_t;
+ *bestdistortion = distortion;
+ besta0 = ta0;
+ besta1 = ta1;
+ bestl0 = tl0;
+ bestl1 = tl1;
+ best_rd = this_rd;
+ *best_mode = mode;
+#if CONFIG_COMP_INTRA_PRED
+ *best_second_mode = mode2;
+#endif
+ copy_predictor_8x8(best_predictor, b->predictor);
+ vpx_memcpy(best_dqcoeff, b->dqcoeff, 64);
+ vpx_memcpy(best_dqcoeff + 32, b->dqcoeff + 64, 64);
+#if CONFIG_COMP_INTRA_PRED
+ }
+#endif
+ }
+ }
+ b->bmi.as_mode.first = (*best_mode);
+#if CONFIG_COMP_INTRA_PRED
+ b->bmi.as_mode.second = (*best_second_mode);
+#endif
+ vp9_encode_intra8x8(x, ib);
+
+ if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
+ a[vp9_block2above_8x8[idx]] = besta0;
+ a[vp9_block2above_8x8[idx] + 1] = besta1;
+ l[vp9_block2left_8x8[idx]] = bestl0;
+ l[vp9_block2left_8x8[idx] + 1] = bestl1;
+ } else {
+ a[vp9_block2above[ib]] = besta0;
+ a[vp9_block2above[ib + 1]] = besta1;
+ l[vp9_block2left[ib]] = bestl0;
+ l[vp9_block2left[ib + 4]] = bestl1;
+ }
+
+ return best_rd;
+}
+
+static int64_t rd_pick_intra8x8mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
+ int *Rate, int *rate_y,
+ int *Distortion, int64_t best_rd) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
+ int i, ib;
+ int cost = mb->mbmode_cost [xd->frame_type] [I8X8_PRED];
+ int distortion = 0;
+ int tot_rate_y = 0;
+ long long total_rd = 0;
+ ENTROPY_CONTEXT_PLANES t_above, t_left;
+ ENTROPY_CONTEXT *ta, *tl;
+ int *i8x8mode_costs;
+
+ vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+ ta = (ENTROPY_CONTEXT *)&t_above;
+ tl = (ENTROPY_CONTEXT *)&t_left;
+
+ xd->mode_info_context->mbmi.mode = I8X8_PRED;
+ i8x8mode_costs = mb->i8x8_mode_costs;
+
+ for (i = 0; i < 4; i++) {
+ MODE_INFO *const mic = xd->mode_info_context;
+ B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
+#if CONFIG_COMP_INTRA_PRED
+ B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_second_mode);
+#endif
+ int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d);
+
+ ib = vp9_i8x8_block[i];
+ total_rd += rd_pick_intra8x8block(
+ cpi, mb, ib, &best_mode,
+#if CONFIG_COMP_INTRA_PRED
+ & best_second_mode,
+#endif
+ i8x8mode_costs, ta, tl, &r, &ry, &d);
+ cost += r;
+ distortion += d;
+ tot_rate_y += ry;
+ mic->bmi[ib].as_mode.first = best_mode;
+#if CONFIG_COMP_INTRA_PRED
+ mic->bmi[ib].as_mode.second = best_second_mode;
+#endif
+ }
+
+ *Rate = cost;
+ *rate_y = tot_rate_y;
+ *Distortion = distortion;
+ return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
+}
+
+static int rd_cost_mbuv_4x4(MACROBLOCK *mb, int backup) {
+ int b;
+ int cost = 0;
+ MACROBLOCKD *xd = &mb->e_mbd;
+ ENTROPY_CONTEXT_PLANES t_above, t_left;
+ ENTROPY_CONTEXT *ta, *tl;
+
+ if (backup) {
+ vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+ ta = (ENTROPY_CONTEXT *)&t_above;
+ tl = (ENTROPY_CONTEXT *)&t_left;
+ } else {
+ ta = (ENTROPY_CONTEXT *)xd->above_context;
+ tl = (ENTROPY_CONTEXT *)xd->left_context;
+ }
+
+ for (b = 16; b < 24; b++)
+ cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_UV,
+ ta + vp9_block2above[b], tl + vp9_block2left[b],
+ TX_4X4);
+
+ return cost;
+}
+
+
+static int64_t rd_inter16x16_uv_4x4(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
+ int *distortion, int fullpixel, int *skip,
+ int do_ctx_backup) {
+ vp9_transform_mbuv_4x4(x);
+ vp9_quantize_mbuv_4x4(x);
+
+ *rate = rd_cost_mbuv_4x4(x, do_ctx_backup);
+ *distortion = vp9_mbuverror(x) / 4;
+ *skip = vp9_mbuv_is_skippable_4x4(&x->e_mbd);
+
+ return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
+}
+
+static int rd_cost_mbuv_8x8(MACROBLOCK *mb, int backup) {
+ int b;
+ int cost = 0;
+ MACROBLOCKD *xd = &mb->e_mbd;
+ ENTROPY_CONTEXT_PLANES t_above, t_left;
+ ENTROPY_CONTEXT *ta, *tl;
+
+ if (backup) {
+ vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+ ta = (ENTROPY_CONTEXT *)&t_above;
+ tl = (ENTROPY_CONTEXT *)&t_left;
+ } else {
+ ta = (ENTROPY_CONTEXT *)mb->e_mbd.above_context;
+ tl = (ENTROPY_CONTEXT *)mb->e_mbd.left_context;
+ }
+
+ for (b = 16; b < 24; b += 4)
+ cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_UV,
+ ta + vp9_block2above_8x8[b],
+ tl + vp9_block2left_8x8[b], TX_8X8);
+
+ return cost;
+}
+
+static int64_t rd_inter16x16_uv_8x8(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
+ int *distortion, int fullpixel, int *skip,
+ int do_ctx_backup) {
+ vp9_transform_mbuv_8x8(x);
+ vp9_quantize_mbuv_8x8(x);
+
+ *rate = rd_cost_mbuv_8x8(x, do_ctx_backup);
+ *distortion = vp9_mbuverror(x) / 4;
+ *skip = vp9_mbuv_is_skippable_8x8(&x->e_mbd);
+
+ return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
+}
+
+#if CONFIG_SUPERBLOCKS
+static int64_t rd_inter32x32_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
+ int *distortion, int fullpixel, int *skip) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
+ int n, r = 0, d = 0;
+ const uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer;
+ const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer;
+ int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride;
+ int skippable = 1;
+ ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
+ ENTROPY_CONTEXT_PLANES *ta = xd->above_context;
+ ENTROPY_CONTEXT_PLANES *tl = xd->left_context;
+
+ memcpy(t_above, xd->above_context, sizeof(t_above));
+ memcpy(t_left, xd->left_context, sizeof(t_left));
+
+ for (n = 0; n < 4; n++) {
+ int x_idx = n & 1, y_idx = n >> 1;
+ int d_tmp, s_tmp, r_tmp;
+
+ xd->above_context = ta + x_idx;
+ xd->left_context = tl + y_idx;
+ vp9_subtract_mbuv_s_c(x->src_diff,
+ usrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
+ vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
+ src_uv_stride,
+ udst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
+ vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
+ dst_uv_stride);
+
+ if (mbmi->txfm_size == TX_4X4) {
+ rd_inter16x16_uv_4x4(cpi, x, &r_tmp, &d_tmp, fullpixel, &s_tmp, 0);
+ } else {
+ rd_inter16x16_uv_8x8(cpi, x, &r_tmp, &d_tmp, fullpixel, &s_tmp, 0);
+ }
+
+ r += r_tmp;
+ d += d_tmp;
+ skippable = skippable && s_tmp;
+ }
+
+ *rate = r;
+ *distortion = d;
+ *skip = skippable;
+ xd->left_context = tl;
+ xd->above_context = ta;
+ memcpy(xd->above_context, t_above, sizeof(t_above));
+ memcpy(xd->left_context, t_left, sizeof(t_left));
+
+ return RDCOST(x->rdmult, x->rddiv, r, d);
+}
+#endif
+
+static int64_t rd_inter4x4_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
+ int *distortion, int *skip, int fullpixel) {
+ vp9_build_inter4x4_predictors_mbuv(&x->e_mbd);
+ vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
+ x->e_mbd.predictor, x->src.uv_stride);
+ return rd_inter16x16_uv_4x4(cpi, x, rate, distortion, fullpixel, skip, 1);
+}
+
+static void rd_pick_intra_mbuv_mode(VP9_COMP *cpi,
+ MACROBLOCK *x,
+ int *rate,
+ int *rate_tokenonly,
+ int *distortion,
+ int *skippable) {
+ MB_PREDICTION_MODE mode;
+ MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
+#if CONFIG_COMP_INTRA_PRED
+ MB_PREDICTION_MODE mode2;
+ MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode2_selected);
+#endif
+ MACROBLOCKD *xd = &x->e_mbd;
+ MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
+ int64_t best_rd = INT64_MAX;
+ int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r);
+ int rate_to, UNINITIALIZED_IS_SAFE(skip);
+
+ for (mode = DC_PRED; mode <= TM_PRED; mode++) {
+#if CONFIG_COMP_INTRA_PRED
+ for (mode2 = DC_PRED - 1; mode2 != TM_PRED + 1; mode2++) {
+#endif
+ int rate;
+ int distortion;
+ int64_t this_rd;
+
+ mbmi->uv_mode = mode;
+#if CONFIG_COMP_INTRA_PRED
+ mbmi->second_uv_mode = mode2;
+ if (mode2 == (MB_PREDICTION_MODE)(DC_PRED - 1)) {
+#endif
+ vp9_build_intra_predictors_mbuv(&x->e_mbd);
+#if CONFIG_COMP_INTRA_PRED
+ } else {
+ continue;
+ vp9_build_comp_intra_predictors_mbuv(&x->e_mbd);
+ }
+#endif
+
+ vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
+ x->e_mbd.predictor, x->src.uv_stride);
+ vp9_transform_mbuv_4x4(x);
+ vp9_quantize_mbuv_4x4(x);
+
+ rate_to = rd_cost_mbuv_4x4(x, 1);
+ rate = rate_to
+ + x->intra_uv_mode_cost[x->e_mbd.frame_type][mbmi->uv_mode];
+
+ distortion = vp9_mbuverror(x) / 4;
+
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
+
+ if (this_rd < best_rd) {
+ skip = vp9_mbuv_is_skippable_4x4(xd);
+ best_rd = this_rd;
+ d = distortion;
+ r = rate;
+ *rate_tokenonly = rate_to;
+ mode_selected = mode;
+#if CONFIG_COMP_INTRA_PRED
+ mode2_selected = mode2;
+ }
+#endif
+ }
+ }
+
+ *rate = r;
+ *distortion = d;
+ *skippable = skip;
+
+ mbmi->uv_mode = mode_selected;
+#if CONFIG_COMP_INTRA_PRED
+ mbmi->second_uv_mode = mode2_selected;
+#endif
+}
+
+static void rd_pick_intra_mbuv_mode_8x8(VP9_COMP *cpi,
+ MACROBLOCK *x,
+ int *rate,
+ int *rate_tokenonly,
+ int *distortion,
+ int *skippable) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ MB_PREDICTION_MODE mode;
+ MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
+ MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
+ int64_t best_rd = INT64_MAX;
+ int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r);
+ int rate_to, UNINITIALIZED_IS_SAFE(skip);
+
+ for (mode = DC_PRED; mode <= TM_PRED; mode++) {
+ int rate;
+ int distortion;
+ int64_t this_rd;
+
+ mbmi->uv_mode = mode;
+ vp9_build_intra_predictors_mbuv(&x->e_mbd);
+ vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
+ x->e_mbd.predictor, x->src.uv_stride);
+ vp9_transform_mbuv_8x8(x);
+
+ vp9_quantize_mbuv_8x8(x);
+
+ rate_to = rd_cost_mbuv_8x8(x, 1);
+ rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][mbmi->uv_mode];
+
+ distortion = vp9_mbuverror(x) / 4;
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
+
+ if (this_rd < best_rd) {
+ skip = vp9_mbuv_is_skippable_8x8(xd);
+ best_rd = this_rd;
+ d = distortion;
+ r = rate;
+ *rate_tokenonly = rate_to;
+ mode_selected = mode;
+ }
+ }
+ *rate = r;
+ *distortion = d;
+ *skippable = skip;
+ mbmi->uv_mode = mode_selected;
+}
+
+#if CONFIG_SUPERBLOCKS
+static void super_block_uvrd_8x8(MACROBLOCK *x,
+ int *rate,
+ int *distortion,
+ int *skippable) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int d = 0, r = 0, n, s = 1;
+ const uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer;
+ const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer;
+ int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride;
+ ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
+ ENTROPY_CONTEXT_PLANES *ta = xd->above_context;
+ ENTROPY_CONTEXT_PLANES *tl = xd->left_context;
+
+ memcpy(t_above, xd->above_context, sizeof(t_above));
+ memcpy(t_left, xd->left_context, sizeof(t_left));
+
+ for (n = 0; n < 4; n++) {
+ int x_idx = n & 1, y_idx = n >> 1;
+
+ vp9_subtract_mbuv_s_c(x->src_diff,
+ usrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
+ vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
+ src_uv_stride,
+ udst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
+ vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
+ dst_uv_stride);
+ vp9_transform_mbuv_8x8(x);
+ vp9_quantize_mbuv_8x8(x);
+ s &= vp9_mbuv_is_skippable_8x8(xd);
+
+ d += vp9_mbuverror(x) >> 2;
+ xd->above_context = ta + x_idx;
+ xd->left_context = tl + y_idx;
+ r += rd_cost_mbuv_8x8(x, 0);
+ }
+
+ xd->above_context = ta;
+ xd->left_context = tl;
+ *distortion = d;
+ *rate = r;
+ *skippable = s;
+
+ xd->left_context = tl;
+ xd->above_context = ta;
+ memcpy(xd->above_context, t_above, sizeof(t_above));
+ memcpy(xd->left_context, t_left, sizeof(t_left));
+}
+
+static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi,
+ MACROBLOCK *x,
+ int *rate,
+ int *rate_tokenonly,
+ int *distortion,
+ int *skippable) {
+ MB_PREDICTION_MODE mode;
+ MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
+ int64_t best_rd = INT64_MAX, this_rd;
+ int this_rate_tokenonly, this_rate;
+ int this_distortion, s;
+
+ for (mode = DC_PRED; mode <= TM_PRED; mode++) {
+ x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
+ vp9_build_intra_predictors_sbuv_s(&x->e_mbd);
+
+ super_block_uvrd_8x8(x, &this_rate_tokenonly,
+ &this_distortion, &s);
+ this_rate = this_rate_tokenonly +
+ x->intra_uv_mode_cost[x->e_mbd.frame_type][mode];
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+
+ if (this_rd < best_rd) {
+ mode_selected = mode;
+ best_rd = this_rd;
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ }
+ }
+
+ x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected;
+
+ return best_rd;
+}
+#endif
+
+int vp9_cost_mv_ref(VP9_COMP *cpi,
+ MB_PREDICTION_MODE m,
+ const int mode_context) {
+ MACROBLOCKD *xd = &cpi->mb.e_mbd;
+ int segment_id = xd->mode_info_context->mbmi.segment_id;
+
+ // If the mode coding is done entirely at the segment level
+ // we should not account for it at the per mb level in rd code.
+ // Note that if the segment level coding is expanded from single mode
+ // to multiple mode masks as per reference frame coding we will need
+ // to do something different here.
+ if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) {
+ VP9_COMMON *pc = &cpi->common;
+
+ vp9_prob p [VP9_MVREFS - 1];
+ assert(NEARESTMV <= m && m <= SPLITMV);
+ vp9_mv_ref_probs(pc, p, mode_context);
+ return cost_token(vp9_mv_ref_tree, p,
+ vp9_mv_ref_encoding_array - NEARESTMV + m);
+ } else
+ return 0;
+}
+
+void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) {
+ x->e_mbd.mode_info_context->mbmi.mode = mb;
+ x->e_mbd.mode_info_context->mbmi.mv[0].as_int = mv->as_int;
+}
+
+static int labels2mode(
+ MACROBLOCK *x,
+ int const *labelings, int which_label,
+ B_PREDICTION_MODE this_mode,
+ int_mv *this_mv, int_mv *this_second_mv,
+ int_mv seg_mvs[MAX_REF_FRAMES - 1],
+ int_mv *best_ref_mv,
+ int_mv *second_best_ref_mv,
+ int *mvjcost, int *mvcost[2]) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MODE_INFO *const mic = xd->mode_info_context;
+ MB_MODE_INFO * mbmi = &mic->mbmi;
+ const int mis = xd->mode_info_stride;
+
+ int i, cost = 0, thismvcost = 0;
+
+ /* We have to be careful retrieving previously-encoded motion vectors.
+ Ones from this macroblock have to be pulled from the BLOCKD array
+ as they have not yet made it to the bmi array in our MB_MODE_INFO. */
+ for (i = 0; i < 16; ++i) {
+ BLOCKD *const d = xd->block + i;
+ const int row = i >> 2, col = i & 3;
+
+ B_PREDICTION_MODE m;
+
+ if (labelings[i] != which_label)
+ continue;
+
+ if (col && labelings[i] == labelings[i - 1])
+ m = LEFT4X4;
+ else if (row && labelings[i] == labelings[i - 4])
+ m = ABOVE4X4;
+ else {
+ // the only time we should do costing for new motion vector or mode
+ // is when we are on a new label (jbb May 08, 2007)
+ switch (m = this_mode) {
+ case NEW4X4 :
+ if (mbmi->second_ref_frame > 0) {
+ this_mv->as_int = seg_mvs[mbmi->ref_frame - 1].as_int;
+ this_second_mv->as_int =
+ seg_mvs[mbmi->second_ref_frame - 1].as_int;
+ }
+
+ thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost,
+ 102, xd->allow_high_precision_mv);
+ if (mbmi->second_ref_frame > 0) {
+ thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv,
+ mvjcost, mvcost, 102,
+ xd->allow_high_precision_mv);
+ }
+ break;
+ case LEFT4X4:
+ this_mv->as_int = col ? d[-1].bmi.as_mv.first.as_int : left_block_mv(mic, i);
+ if (mbmi->second_ref_frame > 0)
+ this_second_mv->as_int = col ? d[-1].bmi.as_mv.second.as_int : left_block_second_mv(mic, i);
+ break;
+ case ABOVE4X4:
+ this_mv->as_int = row ? d[-4].bmi.as_mv.first.as_int : above_block_mv(mic, i, mis);
+ if (mbmi->second_ref_frame > 0)
+ this_second_mv->as_int = row ? d[-4].bmi.as_mv.second.as_int : above_block_second_mv(mic, i, mis);
+ break;
+ case ZERO4X4:
+ this_mv->as_int = 0;
+ if (mbmi->second_ref_frame > 0)
+ this_second_mv->as_int = 0;
+ break;
+ default:
+ break;
+ }
+
+ if (m == ABOVE4X4) { // replace above with left if same
+ int_mv left_mv, left_second_mv;
+
+ left_second_mv.as_int = 0;
+ left_mv.as_int = col ? d[-1].bmi.as_mv.first.as_int :
+ left_block_mv(mic, i);
+ if (mbmi->second_ref_frame > 0)
+ left_second_mv.as_int = col ? d[-1].bmi.as_mv.second.as_int :
+ left_block_second_mv(mic, i);
+
+ if (left_mv.as_int == this_mv->as_int &&
+ (mbmi->second_ref_frame <= 0 ||
+ left_second_mv.as_int == this_second_mv->as_int))
+ m = LEFT4X4;
+ }
+
+#if CONFIG_NEWBINTRAMODES
+ cost = x->inter_bmode_costs[
+ m == B_CONTEXT_PRED ? m - CONTEXT_PRED_REPLACEMENTS : m];
+#else
+ cost = x->inter_bmode_costs[m];
+#endif
+ }
+
+ d->bmi.as_mv.first.as_int = this_mv->as_int;
+ if (mbmi->second_ref_frame > 0)
+ d->bmi.as_mv.second.as_int = this_second_mv->as_int;
+
+ x->partition_info->bmi[i].mode = m;
+ x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
+ if (mbmi->second_ref_frame > 0)
+ x->partition_info->bmi[i].second_mv.as_int = this_second_mv->as_int;
+ }
+
+ cost += thismvcost;
+ return cost;
+}
+
+static int64_t encode_inter_mb_segment(MACROBLOCK *x,
+ int const *labels,
+ int which_label,
+ int *labelyrate,
+ int *distortion,
+ ENTROPY_CONTEXT *ta,
+ ENTROPY_CONTEXT *tl) {
+ int i;
+ MACROBLOCKD *xd = &x->e_mbd;
+
+ *labelyrate = 0;
+ *distortion = 0;
+ for (i = 0; i < 16; i++) {
+ if (labels[i] == which_label) {
+ BLOCKD *bd = &x->e_mbd.block[i];
+ BLOCK *be = &x->block[i];
+ int thisdistortion;
+
+ vp9_build_inter_predictors_b(bd, 16, xd->subpixel_predict);
+ if (xd->mode_info_context->mbmi.second_ref_frame > 0)
+ vp9_build_2nd_inter_predictors_b(bd, 16, xd->subpixel_predict_avg);
+ vp9_subtract_b(be, bd, 16);
+ x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
+ x->quantize_b_4x4(be, bd);
+ thisdistortion = vp9_block_error(be->coeff, bd->dqcoeff, 16);
+ *distortion += thisdistortion;
+ *labelyrate += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC,
+ ta + vp9_block2above[i],
+ tl + vp9_block2left[i], TX_4X4);
+ }
+ }
+ *distortion >>= 2;
+ return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
+}
+
+static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
+ int const *labels,
+ int which_label,
+ int *labelyrate,
+ int *distortion,
+ int64_t *otherrd,
+ ENTROPY_CONTEXT *ta,
+ ENTROPY_CONTEXT *tl) {
+ int i, j;
+ MACROBLOCKD *xd = &x->e_mbd;
+ const int iblock[4] = { 0, 1, 4, 5 };
+ int othercost = 0, otherdist = 0;
+ ENTROPY_CONTEXT_PLANES tac, tlc;
+ ENTROPY_CONTEXT *tacp = (ENTROPY_CONTEXT *) &tac,
+ *tlcp = (ENTROPY_CONTEXT *) &tlc;
+
+ if (otherrd) {
+ memcpy(&tac, ta, sizeof(ENTROPY_CONTEXT_PLANES));
+ memcpy(&tlc, tl, sizeof(ENTROPY_CONTEXT_PLANES));
+ }
+
+ *distortion = 0;
+ *labelyrate = 0;
+ for (i = 0; i < 4; i++) {
+ int ib = vp9_i8x8_block[i];
+
+ if (labels[ib] == which_label) {
+ int idx = (ib & 8) + ((ib & 2) << 1);
+ BLOCKD *bd = &xd->block[ib], *bd2 = &xd->block[idx];
+ BLOCK *be = &x->block[ib], *be2 = &x->block[idx];
+ int thisdistortion;
+
+ vp9_build_inter_predictors4b(xd, bd, 16);
+ if (xd->mode_info_context->mbmi.second_ref_frame > 0)
+ vp9_build_2nd_inter_predictors4b(xd, bd, 16);
+ vp9_subtract_4b_c(be, bd, 16);
+
+ if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) {
+ if (otherrd) {
+ x->vp9_short_fdct8x8(be->src_diff, be2->coeff, 32);
+ x->quantize_b_8x8(be2, bd2);
+ thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
+ otherdist += thisdistortion;
+ othercost += cost_coeffs(x, bd2, PLANE_TYPE_Y_WITH_DC,
+ tacp + vp9_block2above_8x8[idx],
+ tlcp + vp9_block2left_8x8[idx], TX_8X8);
+ }
+ for (j = 0; j < 4; j += 2) {
+ bd = &xd->block[ib + iblock[j]];
+ be = &x->block[ib + iblock[j]];
+ x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
+ x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1);
+ thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);
+ *distortion += thisdistortion;
+ *labelyrate += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC,
+ ta + vp9_block2above[ib + iblock[j]],
+ tl + vp9_block2left[ib + iblock[j]],
+ TX_4X4);
+ *labelyrate += cost_coeffs(x, bd + 1, PLANE_TYPE_Y_WITH_DC,
+ ta + vp9_block2above[ib + iblock[j] + 1],
+ tl + vp9_block2left[ib + iblock[j]],
+ TX_4X4);
+ }
+ } else /* 8x8 */ {
+ if (otherrd) {
+ for (j = 0; j < 4; j += 2) {
+ BLOCKD *bd = &xd->block[ib + iblock[j]];
+ BLOCK *be = &x->block[ib + iblock[j]];
+ x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
+ x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1);
+ thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);
+ otherdist += thisdistortion;
+ othercost += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC,
+ tacp + vp9_block2above[ib + iblock[j]],
+ tlcp + vp9_block2left[ib + iblock[j]],
+ TX_4X4);
+ othercost += cost_coeffs(x, bd + 1, PLANE_TYPE_Y_WITH_DC,
+ tacp + vp9_block2above[ib + iblock[j] + 1],
+ tlcp + vp9_block2left[ib + iblock[j]],
+ TX_4X4);
+ }
+ }
+ x->vp9_short_fdct8x8(be->src_diff, be2->coeff, 32);
+ x->quantize_b_8x8(be2, bd2);
+ thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
+ *distortion += thisdistortion;
+ *labelyrate += cost_coeffs(x, bd2, PLANE_TYPE_Y_WITH_DC,
+ ta + vp9_block2above_8x8[idx],
+ tl + vp9_block2left_8x8[idx], TX_8X8);
+ }
+ }
+ }
+ *distortion >>= 2;
+ if (otherrd) {
+ otherdist >>= 2;
+ *otherrd = RDCOST(x->rdmult, x->rddiv, othercost, otherdist);
+ }
+ return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
+}
+
+static const unsigned int segmentation_to_sseshift[4] = {3, 3, 2, 0};
+
+
+typedef struct {
+ int_mv *ref_mv, *second_ref_mv;
+ int_mv mvp;
+
+ int64_t segment_rd;
+ SPLITMV_PARTITIONING_TYPE segment_num;
+ TX_SIZE txfm_size;
+ int r;
+ int d;
+ int segment_yrate;
+ B_PREDICTION_MODE modes[16];
+ int_mv mvs[16], second_mvs[16];
+ int eobs[16];
+
+ int mvthresh;
+ int *mdcounts;
+
+ int_mv sv_mvp[4]; // save 4 mvp from 8x8
+ int sv_istep[2]; // save 2 initial step_param for 16x8/8x16
+
+} BEST_SEG_INFO;
+
+static __inline
+int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
+ int r = 0;
+ r |= (mv->as_mv.row >> 3) < x->mv_row_min;
+ r |= (mv->as_mv.row >> 3) > x->mv_row_max;
+ r |= (mv->as_mv.col >> 3) < x->mv_col_min;
+ r |= (mv->as_mv.col >> 3) > x->mv_col_max;
+ return r;
+}
+
+static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
+ BEST_SEG_INFO *bsi,
+ SPLITMV_PARTITIONING_TYPE segmentation,
+ TX_SIZE tx_size, int64_t *otherrds,
+ int64_t *rds, int *completed,
+ /* 16 = n_blocks */
+ int_mv seg_mvs[16 /* n_blocks */]
+ [MAX_REF_FRAMES - 1]) {
+ int i, j;
+ int const *labels;
+ int br = 0, bd = 0;
+ B_PREDICTION_MODE this_mode;
+ MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
+
+ int label_count;
+ int64_t this_segment_rd = 0, other_segment_rd;
+ int label_mv_thresh;
+ int rate = 0;
+ int sbr = 0, sbd = 0;
+ int segmentyrate = 0;
+ int best_eobs[16] = { 0 };
+
+ vp9_variance_fn_ptr_t *v_fn_ptr;
+
+ ENTROPY_CONTEXT_PLANES t_above, t_left;
+ ENTROPY_CONTEXT *ta, *tl;
+ ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
+ ENTROPY_CONTEXT *ta_b, *tl_b;
+
+ vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+ ta = (ENTROPY_CONTEXT *)&t_above;
+ tl = (ENTROPY_CONTEXT *)&t_left;
+ ta_b = (ENTROPY_CONTEXT *)&t_above_b;
+ tl_b = (ENTROPY_CONTEXT *)&t_left_b;
+
+ v_fn_ptr = &cpi->fn_ptr[segmentation];
+ labels = vp9_mbsplits[segmentation];
+ label_count = vp9_mbsplit_count[segmentation];
+
+ // 64 makes this threshold really big effectively
+ // making it so that we very rarely check mvs on
+ // segments. setting this to 1 would make mv thresh
+ // roughly equal to what it is for macroblocks
+ label_mv_thresh = 1 * bsi->mvthresh / label_count;
+
+ // Segmentation method overheads
+ rate = cost_token(vp9_mbsplit_tree, vp9_mbsplit_probs,
+ vp9_mbsplit_encodings + segmentation);
+ rate += vp9_cost_mv_ref(cpi, SPLITMV,
+ mbmi->mb_mode_context[mbmi->ref_frame]);
+ this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
+ br += rate;
+ other_segment_rd = this_segment_rd;
+
+ mbmi->txfm_size = tx_size;
+ for (i = 0; i < label_count && this_segment_rd < bsi->segment_rd; i++) {
+ int_mv mode_mv[B_MODE_COUNT], second_mode_mv[B_MODE_COUNT];
+ int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX;
+ B_PREDICTION_MODE mode_selected = ZERO4X4;
+ int bestlabelyrate = 0;
+
+ // search for the best motion vector on this segment
+ for (this_mode = LEFT4X4; this_mode <= NEW4X4; this_mode ++) {
+ int64_t this_rd, other_rd;
+ int distortion;
+ int labelyrate;
+ ENTROPY_CONTEXT_PLANES t_above_s, t_left_s;
+ ENTROPY_CONTEXT *ta_s;
+ ENTROPY_CONTEXT *tl_s;
+
+ vpx_memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES));
+
+ ta_s = (ENTROPY_CONTEXT *)&t_above_s;
+ tl_s = (ENTROPY_CONTEXT *)&t_left_s;
+
+ // motion search for newmv (single predictor case only)
+ if (mbmi->second_ref_frame <= 0 && this_mode == NEW4X4) {
+ int sseshift, n;
+ int step_param = 0;
+ int further_steps;
+ int thissme, bestsme = INT_MAX;
+ BLOCK *c;
+ BLOCKD *e;
+
+ /* Is the best so far sufficiently good that we cant justify doing
+ * and new motion search. */
+ if (best_label_rd < label_mv_thresh)
+ break;
+
+ if (cpi->compressor_speed) {
+ if (segmentation == PARTITIONING_8X16 ||
+ segmentation == PARTITIONING_16X8) {
+ bsi->mvp.as_int = bsi->sv_mvp[i].as_int;
+ if (i == 1 && segmentation == PARTITIONING_16X8)
+ bsi->mvp.as_int = bsi->sv_mvp[2].as_int;
+
+ step_param = bsi->sv_istep[i];
+ }
+
+ // use previous block's result as next block's MV predictor.
+ if (segmentation == PARTITIONING_4X4 && i > 0) {
+ bsi->mvp.as_int = x->e_mbd.block[i - 1].bmi.as_mv.first.as_int;
+ if (i == 4 || i == 8 || i == 12)
+ bsi->mvp.as_int = x->e_mbd.block[i - 4].bmi.as_mv.first.as_int;
+ step_param = 2;
+ }
+ }
+
+ further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
+
+ {
+ int sadpb = x->sadperbit4;
+ int_mv mvp_full;
+
+ mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
+ mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
+
+ // find first label
+ n = vp9_mbsplit_offset[segmentation][i];
+
+ c = &x->block[n];
+ e = &x->e_mbd.block[n];
+
+ bestsme = vp9_full_pixel_diamond(cpi, x, c, e, &mvp_full, step_param,
+ sadpb, further_steps, 0, v_fn_ptr,
+ bsi->ref_mv, &mode_mv[NEW4X4]);
+
+ sseshift = segmentation_to_sseshift[segmentation];
+
+ // Should we do a full search (best quality only)
+ if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) {
+ /* Check if mvp_full is within the range. */
+ clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
+ x->mv_row_min, x->mv_row_max);
+
+ thissme = cpi->full_search_sad(x, c, e, &mvp_full,
+ sadpb, 16, v_fn_ptr,
+ x->nmvjointcost, x->mvcost,
+ bsi->ref_mv);
+
+ if (thissme < bestsme) {
+ bestsme = thissme;
+ mode_mv[NEW4X4].as_int = e->bmi.as_mv.first.as_int;
+ } else {
+ /* The full search result is actually worse so re-instate the
+ * previous best vector */
+ e->bmi.as_mv.first.as_int = mode_mv[NEW4X4].as_int;
+ }
+ }
+ }
+
+ if (bestsme < INT_MAX) {
+ int distortion;
+ unsigned int sse;
+ cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4],
+ bsi->ref_mv, x->errorperbit, v_fn_ptr,
+ x->nmvjointcost, x->mvcost,
+ &distortion, &sse);
+
+ // safe motion search result for use in compound prediction
+ seg_mvs[i][mbmi->ref_frame - 1].as_int = mode_mv[NEW4X4].as_int;
+ }
+ } else if (mbmi->second_ref_frame > 0 && this_mode == NEW4X4) {
+ /* NEW4X4 */
+ /* motion search not completed? Then skip newmv for this block with
+ * comppred */
+ if (seg_mvs[i][mbmi->second_ref_frame - 1].as_int == INVALID_MV ||
+ seg_mvs[i][mbmi->ref_frame - 1].as_int == INVALID_MV) {
+ continue;
+ }
+ }
+
+ rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode],
+ &second_mode_mv[this_mode], seg_mvs[i],
+ bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
+ x->mvcost);
+
+ // Trap vectors that reach beyond the UMV borders
+ if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
+ ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
+ ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
+ ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
+ continue;
+ }
+ if (mbmi->second_ref_frame > 0 &&
+ mv_check_bounds(x, &second_mode_mv[this_mode]))
+ continue;
+
+ if (segmentation == PARTITIONING_4X4) {
+ this_rd = encode_inter_mb_segment(x, labels, i, &labelyrate,
+ &distortion, ta_s, tl_s);
+ other_rd = this_rd;
+ } else {
+ this_rd = encode_inter_mb_segment_8x8(x, labels, i, &labelyrate,
+ &distortion, &other_rd,
+ ta_s, tl_s);
+ }
+ this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
+ rate += labelyrate;
+
+ if (this_rd < best_label_rd) {
+ sbr = rate;
+ sbd = distortion;
+ bestlabelyrate = labelyrate;
+ mode_selected = this_mode;
+ best_label_rd = this_rd;
+ if (x->e_mbd.mode_info_context->mbmi.txfm_size == TX_4X4) {
+ for (j = 0; j < 16; j++)
+ if (labels[j] == i)
+ best_eobs[j] = x->e_mbd.block[j].eob;
+ } else {
+ for (j = 0; j < 4; j++) {
+ int ib = vp9_i8x8_block[j], idx = j * 4;
+
+ if (labels[ib] == i)
+ best_eobs[idx] = x->e_mbd.block[idx].eob;
+ }
+ }
+ if (other_rd < best_other_rd)
+ best_other_rd = other_rd;
+
+ vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
+
+ }
+ } /*for each 4x4 mode*/
+
+ vpx_memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES));
+
+ labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
+ &second_mode_mv[mode_selected], seg_mvs[i],
+ bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost, x->mvcost);
+
+ br += sbr;
+ bd += sbd;
+ segmentyrate += bestlabelyrate;
+ this_segment_rd += best_label_rd;
+ other_segment_rd += best_other_rd;
+ if (rds)
+ rds[i] = this_segment_rd;
+ if (otherrds)
+ otherrds[i] = other_segment_rd;
+ } /* for each label */
+
+ if (this_segment_rd < bsi->segment_rd) {
+ bsi->r = br;
+ bsi->d = bd;
+ bsi->segment_yrate = segmentyrate;
+ bsi->segment_rd = this_segment_rd;
+ bsi->segment_num = segmentation;
+ bsi->txfm_size = mbmi->txfm_size;
+
+ // store everything needed to come back to this!!
+ for (i = 0; i < 16; i++) {
+ bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
+ if (mbmi->second_ref_frame > 0)
+ bsi->second_mvs[i].as_mv = x->partition_info->bmi[i].second_mv.as_mv;
+ bsi->modes[i] = x->partition_info->bmi[i].mode;
+ bsi->eobs[i] = best_eobs[i];
+ }
+ }
+
+ if (completed) {
+ *completed = i;
+ }
+}
+
+static void rd_check_segment(VP9_COMP *cpi, MACROBLOCK *x,
+ BEST_SEG_INFO *bsi,
+ unsigned int segmentation,
+ /* 16 = n_blocks */
+ int_mv seg_mvs[16][MAX_REF_FRAMES - 1],
+ int64_t txfm_cache[NB_TXFM_MODES]) {
+ int i, n, c = vp9_mbsplit_count[segmentation];
+
+ if (segmentation == PARTITIONING_4X4) {
+ int64_t rd[16];
+
+ rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_4X4, NULL,
+ rd, &n, seg_mvs);
+ if (n == c) {
+ for (i = 0; i < NB_TXFM_MODES; i++) {
+ if (rd[c - 1] < txfm_cache[i])
+ txfm_cache[i] = rd[c - 1];
+ }
+ }
+ } else {
+ int64_t diff, base_rd;
+ int cost4x4 = vp9_cost_bit(cpi->common.prob_tx[0], 0);
+ int cost8x8 = vp9_cost_bit(cpi->common.prob_tx[0], 1);
+
+ if (cpi->common.txfm_mode == TX_MODE_SELECT) {
+ int64_t rd4x4[4], rd8x8[4];
+ int n4x4, n8x8, nmin;
+ BEST_SEG_INFO bsi4x4, bsi8x8;
+
+ /* factor in cost of cost4x4/8x8 in decision */
+ vpx_memcpy(&bsi4x4, bsi, sizeof(*bsi));
+ vpx_memcpy(&bsi8x8, bsi, sizeof(*bsi));
+ rd_check_segment_txsize(cpi, x, &bsi4x4, segmentation,
+ TX_4X4, NULL, rd4x4, &n4x4, seg_mvs);
+ rd_check_segment_txsize(cpi, x, &bsi8x8, segmentation,
+ TX_8X8, NULL, rd8x8, &n8x8, seg_mvs);
+ if (bsi4x4.segment_num == segmentation) {
+ bsi4x4.segment_rd += RDCOST(x->rdmult, x->rddiv, cost4x4, 0);
+ if (bsi4x4.segment_rd < bsi->segment_rd)
+ vpx_memcpy(bsi, &bsi4x4, sizeof(*bsi));
+ }
+ if (bsi8x8.segment_num == segmentation) {
+ bsi8x8.segment_rd += RDCOST(x->rdmult, x->rddiv, cost8x8, 0);
+ if (bsi8x8.segment_rd < bsi->segment_rd)
+ vpx_memcpy(bsi, &bsi8x8, sizeof(*bsi));
+ }
+ n = n4x4 > n8x8 ? n4x4 : n8x8;
+ if (n == c) {
+ nmin = n4x4 < n8x8 ? n4x4 : n8x8;
+ diff = rd8x8[nmin - 1] - rd4x4[nmin - 1];
+ if (n == n4x4) {
+ base_rd = rd4x4[c - 1];
+ } else {
+ base_rd = rd8x8[c - 1] - diff;
+ }
+ }
+ } else {
+ int64_t rd[4], otherrd[4];
+
+ if (cpi->common.txfm_mode == ONLY_4X4) {
+ rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_4X4, otherrd,
+ rd, &n, seg_mvs);
+ if (n == c) {
+ base_rd = rd[c - 1];
+ diff = otherrd[c - 1] - rd[c - 1];
+ }
+ } else /* use 8x8 transform */ {
+ rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_8X8, otherrd,
+ rd, &n, seg_mvs);
+ if (n == c) {
+ diff = rd[c - 1] - otherrd[c - 1];
+ base_rd = otherrd[c - 1];
+ }
+ }
+ }
+
+ if (n == c) {
+ if (base_rd < txfm_cache[ONLY_4X4]) {
+ txfm_cache[ONLY_4X4] = base_rd;
+ }
+ if (base_rd + diff < txfm_cache[1]) {
+ txfm_cache[ALLOW_8X8] = txfm_cache[ALLOW_16X16] = base_rd + diff;
+ }
+ if (diff < 0) {
+ base_rd += diff + RDCOST(x->rdmult, x->rddiv, cost8x8, 0);
+ } else {
+ base_rd += RDCOST(x->rdmult, x->rddiv, cost4x4, 0);
+ }
+ if (base_rd < txfm_cache[TX_MODE_SELECT]) {
+ txfm_cache[TX_MODE_SELECT] = base_rd;
+ }
+ }
+ }
+}
+
+static __inline void cal_step_param(int sr, int *sp) {
+ int step = 0;
+
+ if (sr > MAX_FIRST_STEP) sr = MAX_FIRST_STEP;
+ else if (sr < 1) sr = 1;
+
+ while (sr >>= 1)
+ step++;
+
+ *sp = MAX_MVSEARCH_STEPS - 1 - step;
+}
+
+static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
+ int_mv *best_ref_mv,
+ int_mv *second_best_ref_mv,
+ int64_t best_rd,
+ int *mdcounts,
+ int *returntotrate,
+ int *returnyrate,
+ int *returndistortion,
+ int *skippable, int mvthresh,
+ int_mv seg_mvs[NB_PARTITIONINGS]
+ [16 /* n_blocks */]
+ [MAX_REF_FRAMES - 1],
+ int64_t txfm_cache[NB_TXFM_MODES]) {
+ int i;
+ BEST_SEG_INFO bsi;
+ MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
+
+ vpx_memset(&bsi, 0, sizeof(bsi));
+ for (i = 0; i < NB_TXFM_MODES; i++)
+ txfm_cache[i] = INT64_MAX;
+
+ bsi.segment_rd = best_rd;
+ bsi.ref_mv = best_ref_mv;
+ bsi.second_ref_mv = second_best_ref_mv;
+ bsi.mvp.as_int = best_ref_mv->as_int;
+ bsi.mvthresh = mvthresh;
+ bsi.mdcounts = mdcounts;
+ bsi.txfm_size = TX_4X4;
+
+ for (i = 0; i < 16; i++)
+ bsi.modes[i] = ZERO4X4;
+
+ if (cpi->compressor_speed == 0) {
+ /* for now, we will keep the original segmentation order
+ when in best quality mode */
+ rd_check_segment(cpi, x, &bsi, PARTITIONING_16X8,
+ seg_mvs[PARTITIONING_16X8], txfm_cache);
+ rd_check_segment(cpi, x, &bsi, PARTITIONING_8X16,
+ seg_mvs[PARTITIONING_8X16], txfm_cache);
+ rd_check_segment(cpi, x, &bsi, PARTITIONING_8X8,
+ seg_mvs[PARTITIONING_8X8], txfm_cache);
+ rd_check_segment(cpi, x, &bsi, PARTITIONING_4X4,
+ seg_mvs[PARTITIONING_4X4], txfm_cache);
+ } else {
+ int sr;
+
+ rd_check_segment(cpi, x, &bsi, PARTITIONING_8X8,
+ seg_mvs[PARTITIONING_8X8], txfm_cache);
+
+ if (bsi.segment_rd < best_rd) {
+ int tmp_col_min = x->mv_col_min;
+ int tmp_col_max = x->mv_col_max;
+ int tmp_row_min = x->mv_row_min;
+ int tmp_row_max = x->mv_row_max;
+
+ vp9_clamp_mv_min_max(x, best_ref_mv);
+
+ /* Get 8x8 result */
+ bsi.sv_mvp[0].as_int = bsi.mvs[0].as_int;
+ bsi.sv_mvp[1].as_int = bsi.mvs[2].as_int;
+ bsi.sv_mvp[2].as_int = bsi.mvs[8].as_int;
+ bsi.sv_mvp[3].as_int = bsi.mvs[10].as_int;
+
+ /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range
+ * according to the closeness of 2 MV. */
+ /* block 8X16 */
+ sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row)) >> 3,
+ (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col)) >> 3);
+ cal_step_param(sr, &bsi.sv_istep[0]);
+
+ sr = MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3,
+ (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
+ cal_step_param(sr, &bsi.sv_istep[1]);
+
+ rd_check_segment(cpi, x, &bsi, PARTITIONING_8X16,
+ seg_mvs[PARTITIONING_8X16], txfm_cache);
+
+ /* block 16X8 */
+ sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row)) >> 3,
+ (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col)) >> 3);
+ cal_step_param(sr, &bsi.sv_istep[0]);
+
+ sr = MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3,
+ (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
+ cal_step_param(sr, &bsi.sv_istep[1]);
+
+ rd_check_segment(cpi, x, &bsi, PARTITIONING_16X8,
+ seg_mvs[PARTITIONING_16X8], txfm_cache);
+
+ /* If 8x8 is better than 16x8/8x16, then do 4x4 search */
+ /* Not skip 4x4 if speed=0 (good quality) */
+ if (cpi->sf.no_skip_block4x4_search ||
+ bsi.segment_num == PARTITIONING_8X8) {
+ /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
+ bsi.mvp.as_int = bsi.sv_mvp[0].as_int;
+ rd_check_segment(cpi, x, &bsi, PARTITIONING_4X4,
+ seg_mvs[PARTITIONING_4X4], txfm_cache);
+ }
+
+ /* restore UMV window */
+ x->mv_col_min = tmp_col_min;
+ x->mv_col_max = tmp_col_max;
+ x->mv_row_min = tmp_row_min;
+ x->mv_row_max = tmp_row_max;
+ }
+ }
+
+ /* set it to the best */
+ for (i = 0; i < 16; i++) {
+ BLOCKD *bd = &x->e_mbd.block[i];
+
+ bd->bmi.as_mv.first.as_int = bsi.mvs[i].as_int;
+ if (mbmi->second_ref_frame > 0)
+ bd->bmi.as_mv.second.as_int = bsi.second_mvs[i].as_int;
+ bd->eob = bsi.eobs[i];
+ }
+
+ *returntotrate = bsi.r;
+ *returndistortion = bsi.d;
+ *returnyrate = bsi.segment_yrate;
+ *skippable = bsi.txfm_size == TX_4X4 ?
+ vp9_mby_is_skippable_4x4(&x->e_mbd, 0) :
+ vp9_mby_is_skippable_8x8(&x->e_mbd, 0);
+
+ /* save partitions */
+ mbmi->txfm_size = bsi.txfm_size;
+ mbmi->partitioning = bsi.segment_num;
+ x->partition_info->count = vp9_mbsplit_count[bsi.segment_num];
+
+ for (i = 0; i < x->partition_info->count; i++) {
+ int j;
+
+ j = vp9_mbsplit_offset[bsi.segment_num][i];
+
+ x->partition_info->bmi[i].mode = bsi.modes[j];
+ x->partition_info->bmi[i].mv.as_mv = bsi.mvs[j].as_mv;
+ if (mbmi->second_ref_frame > 0)
+ x->partition_info->bmi[i].second_mv.as_mv = bsi.second_mvs[j].as_mv;
+ }
+ /*
+ * used to set mbmi->mv.as_int
+ */
+ x->partition_info->bmi[15].mv.as_int = bsi.mvs[15].as_int;
+ if (mbmi->second_ref_frame > 0)
+ x->partition_info->bmi[15].second_mv.as_int = bsi.second_mvs[15].as_int;
+
+ return (int)(bsi.segment_rd);
+}
+
+static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
+ unsigned char *ref_y_buffer, int ref_y_stride,
+ int_mv *mvp, int ref_frame, enum BlockSize block_size ) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
+ int_mv this_mv;
+ int i;
+ int zero_seen = FALSE;
+ int best_index = 0;
+ int best_sad = INT_MAX;
+ int this_sad = INT_MAX;
+
+ BLOCK *b = &x->block[0];
+ unsigned char *src_y_ptr = *(b->base_src);
+ unsigned char *ref_y_ptr;
+ int row_offset, col_offset;
+
+ // Get the sad for each candidate reference mv
+ for (i = 0; i < 4; i++) {
+ this_mv.as_int = mbmi->ref_mvs[ref_frame][i].as_int;
+
+ // The list is at an end if we see 0 for a second time.
+ if (!this_mv.as_int && zero_seen)
+ break;
+ zero_seen = zero_seen || !this_mv.as_int;
+
+ row_offset = this_mv.as_mv.row >> 3;
+ col_offset = this_mv.as_mv.col >> 3;
+ ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset;
+
+ // Find sad for current vector.
+ this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, b->src_stride,
+ ref_y_ptr, ref_y_stride,
+ 0x7fffffff);
+
+ // Note if it is the best so far.
+ if (this_sad < best_sad) {
+ best_sad = this_sad;
+ best_index = i;
+ }
+ }
+
+ // Return the mv that had the best sad for use in the motion search.
+ mvp->as_int = mbmi->ref_mvs[ref_frame][best_index].as_int;
+ clamp_mv2(mvp, xd);
+}
+
+static void set_i8x8_block_modes(MACROBLOCK *x, int modes[2][4]) {
+ int i;
+ MACROBLOCKD *xd = &x->e_mbd;
+ for (i = 0; i < 4; i++) {
+ int ib = vp9_i8x8_block[i];
+ xd->mode_info_context->bmi[ib + 0].as_mode.first = modes[0][i];
+ xd->mode_info_context->bmi[ib + 1].as_mode.first = modes[0][i];
+ xd->mode_info_context->bmi[ib + 4].as_mode.first = modes[0][i];
+ xd->mode_info_context->bmi[ib + 5].as_mode.first = modes[0][i];
+#if CONFIG_COMP_INTRA_PRED
+ xd->mode_info_context->bmi[ib + 0].as_mode.second = modes[1][i];
+ xd->mode_info_context->bmi[ib + 1].as_mode.second = modes[1][i];
+ xd->mode_info_context->bmi[ib + 4].as_mode.second = modes[1][i];
+ xd->mode_info_context->bmi[ib + 5].as_mode.second = modes[1][i];
+#endif
+ // printf("%d,%d,%d,%d %d,%d,%d,%d\n",
+ // modes[0][0], modes[0][1], modes[0][2], modes[0][3],
+ // modes[1][0], modes[1][1], modes[1][2], modes[1][3]);
+ }
+
+ for (i = 0; i < 16; i++) {
+ xd->block[i].bmi = xd->mode_info_context->bmi[i];
+ }
+}
+
+extern void vp9_calc_ref_probs(int *count, vp9_prob *probs);
+static void estimate_curframe_refprobs(VP9_COMP *cpi, vp9_prob mod_refprobs[3], int pred_ref) {
+ int norm_cnt[MAX_REF_FRAMES];
+ const int *const rfct = cpi->count_mb_ref_frame_usage;
+ int intra_count = rfct[INTRA_FRAME];
+ int last_count = rfct[LAST_FRAME];
+ int gf_count = rfct[GOLDEN_FRAME];
+ int arf_count = rfct[ALTREF_FRAME];
+
+ // Work out modified reference frame probabilities to use where prediction
+ // of the reference frame fails
+ if (pred_ref == INTRA_FRAME) {
+ norm_cnt[0] = 0;
+ norm_cnt[1] = last_count;
+ norm_cnt[2] = gf_count;
+ norm_cnt[3] = arf_count;
+ vp9_calc_ref_probs(norm_cnt, mod_refprobs);
+ mod_refprobs[0] = 0; // This branch implicit
+ } else if (pred_ref == LAST_FRAME) {
+ norm_cnt[0] = intra_count;
+ norm_cnt[1] = 0;
+ norm_cnt[2] = gf_count;
+ norm_cnt[3] = arf_count;
+ vp9_calc_ref_probs(norm_cnt, mod_refprobs);
+ mod_refprobs[1] = 0; // This branch implicit
+ } else if (pred_ref == GOLDEN_FRAME) {
+ norm_cnt[0] = intra_count;
+ norm_cnt[1] = last_count;
+ norm_cnt[2] = 0;
+ norm_cnt[3] = arf_count;
+ vp9_calc_ref_probs(norm_cnt, mod_refprobs);
+ mod_refprobs[2] = 0; // This branch implicit
+ } else {
+ norm_cnt[0] = intra_count;
+ norm_cnt[1] = last_count;
+ norm_cnt[2] = gf_count;
+ norm_cnt[3] = 0;
+ vp9_calc_ref_probs(norm_cnt, mod_refprobs);
+ mod_refprobs[2] = 0; // This branch implicit
+ }
+}
+
+static __inline unsigned weighted_cost(vp9_prob *tab0, vp9_prob *tab1, int idx, int val, int weight) {
+ unsigned cost0 = tab0[idx] ? vp9_cost_bit(tab0[idx], val) : 0;
+ unsigned cost1 = tab1[idx] ? vp9_cost_bit(tab1[idx], val) : 0;
+ // weight is 16-bit fixed point, so this basically calculates:
+ // 0.5 + weight * cost1 + (1.0 - weight) * cost0
+ return (0x8000 + weight * cost1 + (0x10000 - weight) * cost0) >> 16;
+}
+
+static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, unsigned int *ref_costs) {
+ VP9_COMMON *cm = &cpi->common;
+ MACROBLOCKD *xd = &cpi->mb.e_mbd;
+ vp9_prob *mod_refprobs;
+
+ unsigned int cost;
+ int pred_ref;
+ int pred_flag;
+ int pred_ctx;
+ int i;
+ int tot_count;
+
+ vp9_prob pred_prob, new_pred_prob;
+ int seg_ref_active;
+ int seg_ref_count = 0;
+ seg_ref_active = vp9_segfeature_active(xd,
+ segment_id,
+ SEG_LVL_REF_FRAME);
+
+ if (seg_ref_active) {
+ seg_ref_count = vp9_check_segref(xd, segment_id, INTRA_FRAME) +
+ vp9_check_segref(xd, segment_id, LAST_FRAME) +
+ vp9_check_segref(xd, segment_id, GOLDEN_FRAME) +
+ vp9_check_segref(xd, segment_id, ALTREF_FRAME);
+ }
+
+ // Get the predicted reference for this mb
+ pred_ref = vp9_get_pred_ref(cm, xd);
+
+ // Get the context probability for the prediction flag (based on last frame)
+ pred_prob = vp9_get_pred_prob(cm, xd, PRED_REF);
+
+ // Predict probability for current frame based on stats so far
+ pred_ctx = vp9_get_pred_context(cm, xd, PRED_REF);
+ tot_count = cpi->ref_pred_count[pred_ctx][0] + cpi->ref_pred_count[pred_ctx][1];
+ if (tot_count) {
+ new_pred_prob =
+ (cpi->ref_pred_count[pred_ctx][0] * 255 + (tot_count >> 1)) / tot_count;
+ new_pred_prob += !new_pred_prob;
+ } else
+ new_pred_prob = 128;
+
+ // Get the set of probabilities to use if prediction fails
+ mod_refprobs = cm->mod_refprobs[pred_ref];
+
+ // For each possible selected reference frame work out a cost.
+ for (i = 0; i < MAX_REF_FRAMES; i++) {
+ if (seg_ref_active && seg_ref_count == 1) {
+ cost = 0;
+ } else {
+ pred_flag = (i == pred_ref);
+
+ // Get the prediction for the current mb
+ cost = weighted_cost(&pred_prob, &new_pred_prob, 0,
+ pred_flag, cpi->seg0_progress);
+ if (cost > 1024) cost = 768; // i.e. account for 4 bits max.
+
+ // for incorrectly predicted cases
+ if (! pred_flag) {
+ vp9_prob curframe_mod_refprobs[3];
+
+ if (cpi->seg0_progress) {
+ estimate_curframe_refprobs(cpi, curframe_mod_refprobs, pred_ref);
+ } else {
+ vpx_memset(curframe_mod_refprobs, 0, sizeof(curframe_mod_refprobs));
+ }
+
+ cost += weighted_cost(mod_refprobs, curframe_mod_refprobs, 0,
+ (i != INTRA_FRAME), cpi->seg0_progress);
+ if (i != INTRA_FRAME) {
+ cost += weighted_cost(mod_refprobs, curframe_mod_refprobs, 1,
+ (i != LAST_FRAME), cpi->seg0_progress);
+ if (i != LAST_FRAME) {
+ cost += weighted_cost(mod_refprobs, curframe_mod_refprobs, 2,
+ (i != GOLDEN_FRAME), cpi->seg0_progress);
+ }
+ }
+ }
+ }
+
+ ref_costs[i] = cost;
+ }
+}
+
+static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
+ int mode_index,
+ PARTITION_INFO *partition,
+ int_mv *ref_mv,
+ int_mv *second_ref_mv,
+ int64_t comp_pred_diff[NB_PREDICTION_TYPES],
+ int64_t txfm_size_diff[NB_TXFM_MODES]) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ // Take a snapshot of the coding context so it can be
+ // restored if we decide to encode this way
+ ctx->best_mode_index = mode_index;
+ vpx_memcpy(&ctx->mic, xd->mode_info_context,
+ sizeof(MODE_INFO));
+ if (partition)
+ vpx_memcpy(&ctx->partition_info, partition,
+ sizeof(PARTITION_INFO));
+ ctx->best_ref_mv.as_int = ref_mv->as_int;
+ ctx->second_best_ref_mv.as_int = second_ref_mv->as_int;
+
+ // ctx[mb_index].rddiv = x->rddiv;
+ // ctx[mb_index].rdmult = x->rdmult;
+
+ ctx->single_pred_diff = comp_pred_diff[SINGLE_PREDICTION_ONLY];
+ ctx->comp_pred_diff = comp_pred_diff[COMP_PREDICTION_ONLY];
+ ctx->hybrid_pred_diff = comp_pred_diff[HYBRID_PREDICTION];
+
+ memcpy(ctx->txfm_rd_diff, txfm_size_diff, sizeof(ctx->txfm_rd_diff));
+}
+
+static void inter_mode_cost(VP9_COMP *cpi, MACROBLOCK *x,
+ int *rate2, int *distortion2, int *rate_y,
+ int *distortion, int* rate_uv, int *distortion_uv,
+ int *skippable, int64_t txfm_cache[NB_TXFM_MODES]) {
+ int y_skippable, uv_skippable;
+
+ // Y cost and distortion
+ macro_block_yrd(cpi, x, rate_y, distortion, &y_skippable, txfm_cache);
+
+ *rate2 += *rate_y;
+ *distortion2 += *distortion;
+
+ // UV cost and distortion
+ vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
+ x->e_mbd.predictor, x->src.uv_stride);
+ if (x->e_mbd.mode_info_context->mbmi.txfm_size != TX_4X4)
+ rd_inter16x16_uv_8x8(cpi, x, rate_uv, distortion_uv,
+ cpi->common.full_pixel, &uv_skippable, 1);
+ else
+ rd_inter16x16_uv_4x4(cpi, x, rate_uv, distortion_uv,
+ cpi->common.full_pixel, &uv_skippable, 1);
+
+ *rate2 += *rate_uv;
+ *distortion2 += *distortion_uv;
+ *skippable = y_skippable && uv_skippable;
+}
+
+#define MIN(x,y) (((x)<(y))?(x):(y))
+#define MAX(x,y) (((x)>(y))?(x):(y))
+static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
+ int idx, MV_REFERENCE_FRAME frame_type,
+ int block_size,
+ int recon_yoffset, int recon_uvoffset,
+ int_mv frame_nearest_mv[MAX_REF_FRAMES],
+ int_mv frame_near_mv[MAX_REF_FRAMES],
+ int_mv frame_best_ref_mv[MAX_REF_FRAMES],
+ int_mv mv_search_ref[MAX_REF_FRAMES],
+ int frame_mdcounts[4][4],
+ unsigned char *y_buffer[4],
+ unsigned char *u_buffer[4],
+ unsigned char *v_buffer[4]) {
+ YV12_BUFFER_CONFIG *yv12 = &cpi->common.yv12_fb[idx];
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
+
+ y_buffer[frame_type] = yv12->y_buffer + recon_yoffset;
+ u_buffer[frame_type] = yv12->u_buffer + recon_uvoffset;
+ v_buffer[frame_type] = yv12->v_buffer + recon_uvoffset;
+
+ // Gets an initial list of candidate vectors from neighbours and orders them
+ vp9_find_mv_refs(xd, xd->mode_info_context,
+ xd->prev_mode_info_context,
+ frame_type,
+ mbmi->ref_mvs[frame_type],
+ cpi->common.ref_frame_sign_bias);
+
+ // Candidate refinement carried out at encoder and decoder
+ vp9_find_best_ref_mvs(xd, y_buffer[frame_type],
+ yv12->y_stride,
+ mbmi->ref_mvs[frame_type],
+ &frame_best_ref_mv[frame_type],
+ &frame_nearest_mv[frame_type],
+ &frame_near_mv[frame_type]);
+
+
+ // Further refinement that is encode side only to test the top few candidates
+ // in full and choose the best as the centre point for subsequent searches.
+ mv_pred(cpi, x, y_buffer[frame_type], yv12->y_stride,
+ &mv_search_ref[frame_type], frame_type, block_size);
+
+#if CONFIG_NEW_MVREF
+ // TODO(paulwilkins): Final choice of which of the best 4 candidates from
+ // above gives lowest error score when used in isolation. This stage encoder
+ // and sets the reference MV
+#endif
+}
+
+static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
+ enum BlockSize block_size,
+ int *saddone, int near_sadidx[],
+ int mdcounts[4], int64_t txfm_cache[],
+ int *rate2, int *distortion, int *skippable,
+ int *compmode_cost,
+#if CONFIG_COMP_INTERINTRA_PRED
+ int *compmode_interintra_cost,
+#endif
+ int *rate_y, int *distortion_y,
+ int *rate_uv, int *distortion_uv,
+ int *mode_excluded, int *disable_skip,
+ int recon_yoffset, int mode_index,
+ int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
+ int_mv frame_best_ref_mv[MAX_REF_FRAMES],
+ int_mv mv_search_ref[MAX_REF_FRAMES]) {
+ VP9_COMMON *cm = &cpi->common;
+ MACROBLOCKD *xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
+ BLOCK *b = &x->block[0];
+ BLOCKD *d = &xd->block[0];
+ const int is_comp_pred = (mbmi->second_ref_frame > 0);
+#if CONFIG_COMP_INTERINTRA_PRED
+ const int is_comp_interintra_pred = (mbmi->second_ref_frame == INTRA_FRAME);
+#endif
+ const int num_refs = is_comp_pred ? 2 : 1;
+ const int this_mode = mbmi->mode;
+ int i;
+ int refs[2] = { mbmi->ref_frame,
+ (mbmi->second_ref_frame < 0 ? 0 : mbmi->second_ref_frame) };
+ int_mv cur_mv[2];
+ int64_t this_rd = 0;
+
+ switch (this_mode) {
+ case NEWMV:
+ if (is_comp_pred) {
+ if (frame_mv[NEWMV][refs[0]].as_int == INVALID_MV ||
+ frame_mv[NEWMV][refs[1]].as_int == INVALID_MV)
+ return INT64_MAX;
+ *rate2 += vp9_mv_bit_cost(&frame_mv[NEWMV][refs[0]],
+ &frame_best_ref_mv[refs[0]],
+ x->nmvjointcost, x->mvcost, 96,
+ x->e_mbd.allow_high_precision_mv);
+ *rate2 += vp9_mv_bit_cost(&frame_mv[NEWMV][refs[1]],
+ &frame_best_ref_mv[refs[1]],
+ x->nmvjointcost, x->mvcost, 96,
+ x->e_mbd.allow_high_precision_mv);
+ } else {
+ int bestsme = INT_MAX;
+ int further_steps, step_param = cpi->sf.first_step;
+ int sadpb = x->sadperbit16;
+ int_mv mvp_full, tmp_mv;
+ int sr = 0;
+
+ int tmp_col_min = x->mv_col_min;
+ int tmp_col_max = x->mv_col_max;
+ int tmp_row_min = x->mv_row_min;
+ int tmp_row_max = x->mv_row_max;
+
+ vp9_clamp_mv_min_max(x, &frame_best_ref_mv[refs[0]]);
+
+ mvp_full.as_mv.col = mv_search_ref[mbmi->ref_frame].as_mv.col >> 3;
+ mvp_full.as_mv.row = mv_search_ref[mbmi->ref_frame].as_mv.row >> 3;
+
+ // adjust search range according to sr from mv prediction
+ step_param = MAX(step_param, sr);
+
+ // Further step/diamond searches as necessary
+ further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
+
+ bestsme = vp9_full_pixel_diamond(cpi, x, b, d, &mvp_full, step_param,
+ sadpb, further_steps, 1,
+ &cpi->fn_ptr[block_size],
+ &frame_best_ref_mv[refs[0]], &tmp_mv);
+
+ x->mv_col_min = tmp_col_min;
+ x->mv_col_max = tmp_col_max;
+ x->mv_row_min = tmp_row_min;
+ x->mv_row_max = tmp_row_max;
+
+ if (bestsme < INT_MAX) {
+ int dis; /* TODO: use dis in distortion calculation later. */
+ unsigned int sse;
+ cpi->find_fractional_mv_step(x, b, d, &tmp_mv,
+ &frame_best_ref_mv[refs[0]],
+ x->errorperbit,
+ &cpi->fn_ptr[block_size],
+ x->nmvjointcost, x->mvcost,
+ &dis, &sse);
+ }
+ d->bmi.as_mv.first.as_int = tmp_mv.as_int;
+ frame_mv[NEWMV][refs[0]].as_int = d->bmi.as_mv.first.as_int;
+
+ // Add the new motion vector cost to our rolling cost variable
+ *rate2 += vp9_mv_bit_cost(&tmp_mv, &frame_best_ref_mv[refs[0]],
+ x->nmvjointcost, x->mvcost,
+ 96, xd->allow_high_precision_mv);
+ }
+ break;
+ case NEARESTMV:
+ case NEARMV:
+ // Do not bother proceeding if the vector (from newmv, nearest or
+ // near) is 0,0 as this should then be coded using the zeromv mode.
+ for (i = 0; i < num_refs; ++i)
+ if (frame_mv[this_mode][refs[i]].as_int == 0)
+ return INT64_MAX;
+ case ZEROMV:
+ default:
+ break;
+ }
+ for (i = 0; i < num_refs; ++i) {
+ cur_mv[i] = frame_mv[this_mode][refs[i]];
+ // Clip "next_nearest" so that it does not extend to far out of image
+ clamp_mv2(&cur_mv[i], xd);
+ if (mv_check_bounds(x, &cur_mv[i]))
+ return INT64_MAX;
+ mbmi->mv[i].as_int = cur_mv[i].as_int;
+ }
+
+#if CONFIG_PRED_FILTER
+ // Filtered prediction:
+ mbmi->pred_filter_enabled = vp9_mode_order[mode_index].pred_filter_flag;
+ *rate2 += vp9_cost_bit(cpi->common.prob_pred_filter_off,
+ mbmi->pred_filter_enabled);
+#endif
+ if (cpi->common.mcomp_filter_type == SWITCHABLE) {
+ const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP);
+ const int m = vp9_switchable_interp_map[mbmi->interp_filter];
+ *rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
+ }
+
+ /* We don't include the cost of the second reference here, because there
+ * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
+ * words if you present them in that order, the second one is always known
+ * if the first is known */
+ *compmode_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_COMP),
+ is_comp_pred);
+ *rate2 += vp9_cost_mv_ref(cpi, this_mode,
+ mbmi->mb_mode_context[mbmi->ref_frame]);
+#if CONFIG_COMP_INTERINTRA_PRED
+ if (!is_comp_pred) {
+ *compmode_interintra_cost = vp9_cost_bit(cm->fc.interintra_prob,
+ is_comp_interintra_pred);
+ if (is_comp_interintra_pred) {
+ *compmode_interintra_cost +=
+ x->mbmode_cost[xd->frame_type][mbmi->interintra_mode];
+#if SEPARATE_INTERINTRA_UV
+ *compmode_interintra_cost +=
+ x->intra_uv_mode_cost[xd->frame_type][mbmi->interintra_uv_mode];
+#endif
+ }
+ }
+#endif
+
+ if (block_size == BLOCK_16X16) {
+ vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0);
+ if (is_comp_pred)
+ vp9_build_2nd_inter16x16_predictors_mby(xd, xd->predictor, 16);
+#if CONFIG_COMP_INTERINTRA_PRED
+ if (is_comp_interintra_pred) {
+ vp9_build_interintra_16x16_predictors_mby(xd, xd->predictor, 16);
+ }
+#endif
+ } else {
+#if CONFIG_SUPERBLOCKS
+ vp9_build_inter32x32_predictors_sb(xd,
+ xd->dst.y_buffer,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.y_stride,
+ xd->dst.uv_stride);
+#endif
+ }
+
+ if (cpi->active_map_enabled && x->active_ptr[0] == 0)
+ x->skip = 1;
+ else if (x->encode_breakout) {
+ unsigned int sse, var;
+ int threshold = (xd->block[0].dequant[1]
+ * xd->block[0].dequant[1] >> 4);
+
+ if (threshold < x->encode_breakout)
+ threshold = x->encode_breakout;
+
+ if (block_size == BLOCK_16X16) {
+ var = vp9_variance16x16(*(b->base_src), b->src_stride,
+ xd->predictor, 16, &sse);
+ } else {
+#if CONFIG_SUPERBLOCKS
+ var = vp9_variance32x32(*(b->base_src), b->src_stride,
+ xd->dst.y_buffer, xd->dst.y_stride, &sse);
+#endif
+ }
+
+ if ((int)sse < threshold) {
+ unsigned int q2dc = xd->block[24].dequant[0];
+ /* If there is no codeable 2nd order dc
+ or a very small uniform pixel change change */
+ if ((sse - var < q2dc * q2dc >> 4) ||
+ (sse / 2 > var && sse - var < 64)) {
+ // Check u and v to make sure skip is ok
+ int sse2;
+
+ if (block_size == BLOCK_16X16) {
+ sse2 = vp9_uvsse(x);
+ } else {
+ unsigned int sse2u, sse2v;
+ var = vp9_variance16x16(x->src.u_buffer, x->src.uv_stride,
+ xd->dst.u_buffer, xd->dst.uv_stride, &sse2u);
+ var = vp9_variance16x16(x->src.v_buffer, x->src.uv_stride,
+ xd->dst.v_buffer, xd->dst.uv_stride, &sse2v);
+ sse2 = sse2u + sse2v;
+ }
+
+ if (sse2 * 2 < threshold) {
+ x->skip = 1;
+ *distortion = sse + sse2;
+ *rate2 = 500;
+
+ /* for best_yrd calculation */
+ *rate_uv = 0;
+ *distortion_uv = sse2;
+
+ *disable_skip = 1;
+ this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
+ }
+ }
+ }
+ }
+
+ if (is_comp_pred) {
+ *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
+ } else {
+ *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
+ }
+#if CONFIG_COMP_INTERINTRA_PRED
+ if (is_comp_interintra_pred && !cm->use_interintra) *mode_excluded = 1;
+#endif
+
+ if (!x->skip) {
+ if (block_size == BLOCK_16X16) {
+ vp9_build_1st_inter16x16_predictors_mbuv(xd, &xd->predictor[256],
+ &xd->predictor[320], 8);
+ if (is_comp_pred)
+ vp9_build_2nd_inter16x16_predictors_mbuv(xd, &xd->predictor[256],
+ &xd->predictor[320], 8);
+#if CONFIG_COMP_INTERINTRA_PRED
+ if (is_comp_interintra_pred) {
+ vp9_build_interintra_16x16_predictors_mbuv(xd, &xd->predictor[256],
+ &xd->predictor[320], 8);
+ }
+#endif
+ inter_mode_cost(cpi, x, rate2, distortion,
+ rate_y, distortion_y, rate_uv, distortion_uv,
+ skippable, txfm_cache);
+ } else {
+#if CONFIG_SUPERBLOCKS
+ int skippable_y, skippable_uv;
+
+ // Y cost and distortion
+ super_block_yrd(cpi, x, rate_y, distortion_y,
+ &skippable_y, txfm_cache);
+ *rate2 += *rate_y;
+ *distortion += *distortion_y;
+
+ rd_inter32x32_uv(cpi, x, rate_uv, distortion_uv,
+ cm->full_pixel, &skippable_uv);
+
+ *rate2 += *rate_uv;
+ *distortion += *distortion_uv;
+ *skippable = skippable_y && skippable_uv;
+#endif
+ }
+ }
+ return this_rd; // if 0, this will be re-calculated by caller
+}
+
+static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
+ int recon_yoffset, int recon_uvoffset,
+ int *returnrate, int *returndistortion,
+ int64_t *returnintra) {
+ VP9_COMMON *cm = &cpi->common;
+ MACROBLOCKD *xd = &x->e_mbd;
+ union b_mode_info best_bmodes[16];
+ MB_MODE_INFO best_mbmode;
+ PARTITION_INFO best_partition;
+ int_mv best_ref_mv, second_best_ref_mv;
+ MB_PREDICTION_MODE this_mode;
+ MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi;
+ int i, best_mode_index = 0;
+ int mode8x8[2][4];
+ unsigned char segment_id = mbmi->segment_id;
+
+ int mode_index;
+ int mdcounts[4];
+ int rate, distortion;
+ int rate2, distortion2;
+ int64_t best_txfm_rd[NB_TXFM_MODES];
+ int64_t best_txfm_diff[NB_TXFM_MODES];
+ int64_t best_pred_diff[NB_PREDICTION_TYPES];
+ int64_t best_pred_rd[NB_PREDICTION_TYPES];
+ int64_t best_rd = INT64_MAX, best_intra_rd = INT64_MAX;
+#if CONFIG_COMP_INTERINTRA_PRED
+ int is_best_interintra = 0;
+ int64_t best_intra16_rd = INT64_MAX;
+ int best_intra16_mode = DC_PRED, best_intra16_uv_mode = DC_PRED;
+#endif
+ int64_t best_overall_rd = INT64_MAX;
+ int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly;
+ int uv_intra_skippable = 0;
+ int uv_intra_rate_8x8 = 0, uv_intra_distortion_8x8 = 0, uv_intra_rate_tokenonly_8x8 = 0;
+ int uv_intra_skippable_8x8 = 0;
+ int rate_y, UNINITIALIZED_IS_SAFE(rate_uv);
+ int distortion_uv = INT_MAX;
+ int64_t best_yrd = INT64_MAX;
+#if CONFIG_PRED_FILTER
+ int best_filter_state = 0;
+#endif
+ int switchable_filter_index = 0;
+
+ MB_PREDICTION_MODE uv_intra_mode;
+ MB_PREDICTION_MODE uv_intra_mode_8x8 = 0;
+
+ int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
+ int saddone = 0;
+
+ int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
+ int_mv frame_best_ref_mv[MAX_REF_FRAMES];
+ int_mv mv_search_ref[MAX_REF_FRAMES];
+ int frame_mdcounts[4][4];
+ unsigned char *y_buffer[4], *u_buffer[4], *v_buffer[4];
+
+ unsigned int ref_costs[MAX_REF_FRAMES];
+ int_mv seg_mvs[NB_PARTITIONINGS][16 /* n_blocks */][MAX_REF_FRAMES - 1];
+
+ int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex,
+ cpi->common.y1dc_delta_q);
+
+ vpx_memset(mode8x8, 0, sizeof(mode8x8));
+ vpx_memset(&frame_mv, 0, sizeof(frame_mv));
+ vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
+ vpx_memset(&best_bmodes, 0, sizeof(best_bmodes));
+ vpx_memset(&x->mb_context[xd->mb_index], 0, sizeof(PICK_MODE_CONTEXT));
+
+ for (i = 0; i < MAX_REF_FRAMES; i++)
+ frame_mv[NEWMV][i].as_int = INVALID_MV;
+ for (i = 0; i < NB_PREDICTION_TYPES; ++i)
+ best_pred_rd[i] = INT64_MAX;
+ for (i = 0; i < NB_TXFM_MODES; i++)
+ best_txfm_rd[i] = INT64_MAX;
+
+ for (i = 0; i < NB_PARTITIONINGS; i++) {
+ int j, k;
+
+ for (j = 0; j < 16; j++)
+ for (k = 0; k < MAX_REF_FRAMES - 1; k++)
+ seg_mvs[i][j][k].as_int = INVALID_MV;
+ }
+
+ if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
+ setup_buffer_inter(cpi, x, cpi->common.lst_fb_idx, LAST_FRAME,
+ BLOCK_16X16, recon_yoffset, recon_uvoffset,
+ frame_mv[NEARESTMV], frame_mv[NEARMV], frame_best_ref_mv,
+ mv_search_ref, frame_mdcounts,
+ y_buffer, u_buffer, v_buffer);
+ }
+
+ if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {
+ setup_buffer_inter(cpi, x, cpi->common.gld_fb_idx, GOLDEN_FRAME,
+ BLOCK_16X16, recon_yoffset, recon_uvoffset,
+ frame_mv[NEARESTMV], frame_mv[NEARMV], frame_best_ref_mv,
+ mv_search_ref, frame_mdcounts,
+ y_buffer, u_buffer, v_buffer);
+ }
+
+ if (cpi->ref_frame_flags & VP9_ALT_FLAG) {
+ setup_buffer_inter(cpi, x, cpi->common.alt_fb_idx, ALTREF_FRAME,
+ BLOCK_16X16, recon_yoffset, recon_uvoffset,
+ frame_mv[NEARESTMV], frame_mv[NEARMV], frame_best_ref_mv,
+ mv_search_ref, frame_mdcounts,
+ y_buffer, u_buffer, v_buffer);
+ }
+
+ *returnintra = INT64_MAX;
+
+ x->skip = 0;
+
+ mbmi->ref_frame = INTRA_FRAME;
+
+ /* Initialize zbin mode boost for uv costing */
+ cpi->zbin_mode_boost = 0;
+ vp9_update_zbin_extra(cpi, x);
+
+ rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate,
+ &uv_intra_rate_tokenonly, &uv_intra_distortion,
+ &uv_intra_skippable);
+ uv_intra_mode = mbmi->uv_mode;
+
+ /* rough estimate for now */
+ if (cpi->common.txfm_mode != ONLY_4X4) {
+ rd_pick_intra_mbuv_mode_8x8(cpi, x, &uv_intra_rate_8x8,
+ &uv_intra_rate_tokenonly_8x8,
+ &uv_intra_distortion_8x8,
+ &uv_intra_skippable_8x8);
+ uv_intra_mode_8x8 = mbmi->uv_mode;
+ }
+
+ // Get estimates of reference frame costs for each reference frame
+ // that depend on the current prediction etc.
+ estimate_ref_frame_costs(cpi, segment_id, ref_costs);
+
+ for (mode_index = 0; mode_index < MAX_MODES;
+ mode_index += (!switchable_filter_index)) {
+ int64_t this_rd = INT64_MAX;
+ int disable_skip = 0, skippable = 0;
+ int other_cost = 0;
+ int compmode_cost = 0;
+#if CONFIG_COMP_INTERINTRA_PRED
+ int compmode_interintra_cost = 0;
+#endif
+ int mode_excluded = 0;
+ int64_t txfm_cache[NB_TXFM_MODES] = { 0 };
+
+ // These variables hold are rolling total cost and distortion for this mode
+ rate2 = 0;
+ distortion2 = 0;
+ rate_y = 0;
+ rate_uv = 0;
+
+ this_mode = vp9_mode_order[mode_index].mode;
+ mbmi->mode = this_mode;
+ mbmi->uv_mode = DC_PRED;
+ mbmi->ref_frame = vp9_mode_order[mode_index].ref_frame;
+ mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
+#if CONFIG_PRED_FILTER
+ mbmi->pred_filter_enabled = 0;
+#endif
+ if (cpi->common.mcomp_filter_type == SWITCHABLE &&
+ this_mode >= NEARESTMV && this_mode <= SPLITMV) {
+ mbmi->interp_filter =
+ vp9_switchable_interp[switchable_filter_index++];
+ if (switchable_filter_index == VP9_SWITCHABLE_FILTERS)
+ switchable_filter_index = 0;
+ } else {
+ mbmi->interp_filter = cpi->common.mcomp_filter_type;
+ }
+ vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
+
+ // Test best rd so far against threshold for trying this mode.
+ if (best_rd <= cpi->rd_threshes[mode_index])
+ continue;
+
+ // current coding mode under rate-distortion optimization test loop
+#if CONFIG_COMP_INTRA_PRED
+ mbmi->second_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
+ mbmi->second_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
+#endif
+#if CONFIG_COMP_INTERINTRA_PRED
+ mbmi->interintra_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
+ mbmi->interintra_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
+#endif
+
+ // If the segment reference frame feature is enabled....
+ // then do nothing if the current ref frame is not allowed..
+ if (vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
+ !vp9_check_segref(xd, segment_id, mbmi->ref_frame)) {
+ continue;
+ // If the segment mode feature is enabled....
+ // then do nothing if the current mode is not allowed..
+ } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE) &&
+ (this_mode !=
+ vp9_get_segdata(xd, segment_id, SEG_LVL_MODE))) {
+ continue;
+ // Disable this drop out case if either the mode or ref frame
+ // segment level feature is enabled for this segment. This is to
+ // prevent the possibility that the we end up unable to pick any mode.
+ } else if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
+ !vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) {
+ // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
+ // unless ARNR filtering is enabled in which case we want
+ // an unfiltered alternative
+ if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
+ if (this_mode != ZEROMV ||
+ mbmi->ref_frame != ALTREF_FRAME) {
+ continue;
+ }
+ }
+ }
+
+ /* everything but intra */
+ if (mbmi->ref_frame) {
+ int ref = mbmi->ref_frame;
+
+ xd->pre.y_buffer = y_buffer[ref];
+ xd->pre.u_buffer = u_buffer[ref];
+ xd->pre.v_buffer = v_buffer[ref];
+ best_ref_mv = frame_best_ref_mv[ref];
+ vpx_memcpy(mdcounts, frame_mdcounts[ref], sizeof(mdcounts));
+ }
+
+ if (mbmi->second_ref_frame > 0) {
+ int ref = mbmi->second_ref_frame;
+
+ xd->second_pre.y_buffer = y_buffer[ref];
+ xd->second_pre.u_buffer = u_buffer[ref];
+ xd->second_pre.v_buffer = v_buffer[ref];
+ second_best_ref_mv = frame_best_ref_mv[ref];
+ }
+
+ // Experimental code. Special case for gf and arf zeromv modes.
+ // Increase zbin size to suppress noise
+ if (cpi->zbin_mode_boost_enabled) {
+ if (vp9_mode_order[mode_index].ref_frame == INTRA_FRAME)
+ cpi->zbin_mode_boost = 0;
+ else {
+ if (vp9_mode_order[mode_index].mode == ZEROMV) {
+ if (vp9_mode_order[mode_index].ref_frame != LAST_FRAME)
+ cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
+ else
+ cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
+ } else if (vp9_mode_order[mode_index].mode == SPLITMV)
+ cpi->zbin_mode_boost = 0;
+ else
+ cpi->zbin_mode_boost = MV_ZBIN_BOOST;
+ }
+
+ vp9_update_zbin_extra(cpi, x);
+ }
+
+ // Intra
+ if (!mbmi->ref_frame) {
+ switch (this_mode) {
+ default:
+ case V_PRED:
+ case H_PRED:
+ case D45_PRED:
+ case D135_PRED:
+ case D117_PRED:
+ case D153_PRED:
+ case D27_PRED:
+ case D63_PRED:
+ rate2 += intra_cost_penalty;
+ case DC_PRED:
+ case TM_PRED:
+ mbmi->ref_frame = INTRA_FRAME;
+ // FIXME compound intra prediction
+ vp9_build_intra_predictors_mby(&x->e_mbd);
+ macro_block_yrd(cpi, x, &rate_y, &distortion, &skippable, txfm_cache);
+ rate2 += rate_y;
+ distortion2 += distortion;
+ rate2 += x->mbmode_cost[xd->frame_type][mbmi->mode];
+ if (mbmi->txfm_size != TX_4X4) {
+ rate2 += uv_intra_rate_8x8;
+ rate_uv = uv_intra_rate_tokenonly_8x8;
+ distortion2 += uv_intra_distortion_8x8;
+ distortion_uv = uv_intra_distortion_8x8;
+ skippable = skippable && uv_intra_skippable_8x8;
+ } else {
+ rate2 += uv_intra_rate;
+ rate_uv = uv_intra_rate_tokenonly;
+ distortion2 += uv_intra_distortion;
+ distortion_uv = uv_intra_distortion;
+ skippable = skippable && uv_intra_skippable;
+ }
+ break;
+ case B_PRED: {
+ int64_t tmp_rd;
+
+ // Note the rate value returned here includes the cost of coding
+ // the BPRED mode : x->mbmode_cost[xd->frame_type][BPRED];
+ mbmi->txfm_size = TX_4X4;
+ tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion, best_yrd,
+#if CONFIG_COMP_INTRA_PRED
+ 0,
+#endif
+ 0);
+ rate2 += rate;
+ rate2 += intra_cost_penalty;
+ distortion2 += distortion;
+
+ if (tmp_rd < best_yrd) {
+ rate2 += uv_intra_rate;
+ rate_uv = uv_intra_rate_tokenonly;
+ distortion2 += uv_intra_distortion;
+ distortion_uv = uv_intra_distortion;
+ } else {
+ this_rd = INT64_MAX;
+ disable_skip = 1;
+ }
+ }
+ break;
+ case I8X8_PRED: {
+ int cost0 = vp9_cost_bit(cm->prob_tx[0], 0);
+ int cost1 = vp9_cost_bit(cm->prob_tx[0], 1);
+ int64_t tmp_rd_4x4s, tmp_rd_8x8s;
+ int64_t tmp_rd_4x4, tmp_rd_8x8, tmp_rd;
+ int r4x4, tok4x4, d4x4, r8x8, tok8x8, d8x8;
+ mbmi->txfm_size = TX_4X4;
+ tmp_rd_4x4 = rd_pick_intra8x8mby_modes(cpi, x, &r4x4, &tok4x4,
+ &d4x4, best_yrd);
+ mode8x8[0][0] = xd->mode_info_context->bmi[0].as_mode.first;
+ mode8x8[0][1] = xd->mode_info_context->bmi[2].as_mode.first;
+ mode8x8[0][2] = xd->mode_info_context->bmi[8].as_mode.first;
+ mode8x8[0][3] = xd->mode_info_context->bmi[10].as_mode.first;
+#if CONFIG_COMP_INTRA_PRED
+ mode8x8[1][0] = xd->mode_info_context->bmi[0].as_mode.second;
+ mode8x8[1][1] = xd->mode_info_context->bmi[2].as_mode.second;
+ mode8x8[1][2] = xd->mode_info_context->bmi[8].as_mode.second;
+ mode8x8[1][3] = xd->mode_info_context->bmi[10].as_mode.second;
+#endif
+ mbmi->txfm_size = TX_8X8;
+ tmp_rd_8x8 = rd_pick_intra8x8mby_modes(cpi, x, &r8x8, &tok8x8,
+ &d8x8, best_yrd);
+ txfm_cache[ONLY_4X4] = tmp_rd_4x4;
+ txfm_cache[ALLOW_8X8] = tmp_rd_8x8;
+ txfm_cache[ALLOW_16X16] = tmp_rd_8x8;
+ tmp_rd_4x4s = tmp_rd_4x4 + RDCOST(x->rdmult, x->rddiv, cost0, 0);
+ tmp_rd_8x8s = tmp_rd_8x8 + RDCOST(x->rdmult, x->rddiv, cost1, 0);
+ txfm_cache[TX_MODE_SELECT] = tmp_rd_4x4s < tmp_rd_8x8s ? tmp_rd_4x4s : tmp_rd_8x8s;
+ if (cm->txfm_mode == TX_MODE_SELECT) {
+ if (tmp_rd_4x4s < tmp_rd_8x8s) {
+ rate = r4x4 + cost0;
+ rate_y = tok4x4 + cost0;
+ distortion = d4x4;
+ mbmi->txfm_size = TX_4X4;
+ tmp_rd = tmp_rd_4x4s;
+ } else {
+ rate = r8x8 + cost1;
+ rate_y = tok8x8 + cost1;
+ distortion = d8x8;
+ mbmi->txfm_size = TX_8X8;
+ tmp_rd = tmp_rd_8x8s;
+
+ mode8x8[0][0] = xd->mode_info_context->bmi[0].as_mode.first;
+ mode8x8[0][1] = xd->mode_info_context->bmi[2].as_mode.first;
+ mode8x8[0][2] = xd->mode_info_context->bmi[8].as_mode.first;
+ mode8x8[0][3] = xd->mode_info_context->bmi[10].as_mode.first;
+#if CONFIG_COMP_INTRA_PRED
+ mode8x8[1][0] = xd->mode_info_context->bmi[0].as_mode.second;
+ mode8x8[1][1] = xd->mode_info_context->bmi[2].as_mode.second;
+ mode8x8[1][2] = xd->mode_info_context->bmi[8].as_mode.second;
+ mode8x8[1][3] = xd->mode_info_context->bmi[10].as_mode.second;
+#endif
+ }
+ } else if (cm->txfm_mode == ONLY_4X4) {
+ rate = r4x4;
+ rate_y = tok4x4;
+ distortion = d4x4;
+ mbmi->txfm_size = TX_4X4;
+ tmp_rd = tmp_rd_4x4;
+ } else {
+ rate = r8x8;
+ rate_y = tok8x8;
+ distortion = d8x8;
+ mbmi->txfm_size = TX_8X8;
+ tmp_rd = tmp_rd_8x8;
+
+ mode8x8[0][0] = xd->mode_info_context->bmi[0].as_mode.first;
+ mode8x8[0][1] = xd->mode_info_context->bmi[2].as_mode.first;
+ mode8x8[0][2] = xd->mode_info_context->bmi[8].as_mode.first;
+ mode8x8[0][3] = xd->mode_info_context->bmi[10].as_mode.first;
+#if CONFIG_COMP_INTRA_PRED
+ mode8x8[1][0] = xd->mode_info_context->bmi[0].as_mode.second;
+ mode8x8[1][1] = xd->mode_info_context->bmi[2].as_mode.second;
+ mode8x8[1][2] = xd->mode_info_context->bmi[8].as_mode.second;
+ mode8x8[1][3] = xd->mode_info_context->bmi[10].as_mode.second;
+#endif
+ }
+
+ rate2 += rate;
+ rate2 += intra_cost_penalty;
+ distortion2 += distortion;
+
+ /* TODO: uv rate maybe over-estimated here since there is UV intra
+ mode coded in I8X8_PRED prediction */
+ if (tmp_rd < best_yrd) {
+ rate2 += uv_intra_rate;
+ rate_uv = uv_intra_rate_tokenonly;
+ distortion2 += uv_intra_distortion;
+ distortion_uv = uv_intra_distortion;
+ } else {
+ this_rd = INT64_MAX;
+ disable_skip = 1;
+ }
+ }
+ break;
+ }
+ }
+ // Split MV. The code is very different from the other inter modes so
+ // special case it.
+ else if (this_mode == SPLITMV) {
+ const int is_comp_pred = mbmi->second_ref_frame > 0;
+ int64_t tmp_rd, this_rd_thresh;
+ int_mv *second_ref = is_comp_pred ? &second_best_ref_mv : NULL;
+
+ this_rd_thresh =
+ (mbmi->ref_frame == LAST_FRAME) ?
+ cpi->rd_threshes[THR_NEWMV] : cpi->rd_threshes[THR_NEWA];
+ this_rd_thresh =
+ (mbmi->ref_frame == GOLDEN_FRAME) ?
+ cpi->rd_threshes[THR_NEWG] : this_rd_thresh;
+
+ tmp_rd = rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv,
+ second_ref, best_yrd, mdcounts,
+ &rate, &rate_y, &distortion,
+ &skippable,
+ (int)this_rd_thresh, seg_mvs,
+ txfm_cache);
+ rate2 += rate;
+ distortion2 += distortion;
+
+ if (cpi->common.mcomp_filter_type == SWITCHABLE)
+ rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs
+ [vp9_get_pred_context(&cpi->common, xd, PRED_SWITCHABLE_INTERP)]
+ [vp9_switchable_interp_map[mbmi->interp_filter]];
+ // If even the 'Y' rd value of split is higher than best so far
+ // then dont bother looking at UV
+ if (tmp_rd < best_yrd) {
+ int uv_skippable;
+
+ rd_inter4x4_uv(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
+ cpi->common.full_pixel);
+ rate2 += rate_uv;
+ distortion2 += distortion_uv;
+ skippable = skippable && uv_skippable;
+ } else {
+ this_rd = INT64_MAX;
+ disable_skip = 1;
+ }
+
+ if (is_comp_pred)
+ mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY;
+ else
+ mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY;
+
+ compmode_cost =
+ vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_COMP), is_comp_pred);
+ mbmi->mode = this_mode;
+ }
+ else {
+#if CONFIG_COMP_INTERINTRA_PRED
+ if (mbmi->second_ref_frame == INTRA_FRAME) {
+ if (best_intra16_mode == DC_PRED - 1) continue;
+ mbmi->interintra_mode = best_intra16_mode;
+#if SEPARATE_INTERINTRA_UV
+ mbmi->interintra_uv_mode = best_intra16_uv_mode;
+#else
+ mbmi->interintra_uv_mode = best_intra16_mode;
+#endif
+ }
+#endif
+ this_rd = handle_inter_mode(cpi, x, BLOCK_16X16,
+ &saddone, near_sadidx, mdcounts, txfm_cache,
+ &rate2, &distortion2, &skippable,
+ &compmode_cost,
+#if CONFIG_COMP_INTERINTRA_PRED
+ &compmode_interintra_cost,
+#endif
+ &rate_y, &distortion,
+ &rate_uv, &distortion_uv,
+ &mode_excluded, &disable_skip, recon_yoffset,
+ mode_index, frame_mv, frame_best_ref_mv,
+ mv_search_ref);
+ if (this_rd == INT64_MAX)
+ continue;
+ }
+
+#if CONFIG_COMP_INTERINTRA_PRED
+ if (cpi->common.use_interintra)
+ rate2 += compmode_interintra_cost;
+#endif
+
+ if (cpi->common.comp_pred_mode == HYBRID_PREDICTION)
+ rate2 += compmode_cost;
+
+ // Estimate the reference frame signaling cost and add it
+ // to the rolling cost variable.
+ rate2 += ref_costs[mbmi->ref_frame];
+
+ if (!disable_skip) {
+ // Test for the condition where skip block will be activated
+ // because there are no non zero coefficients and make any
+ // necessary adjustment for rate. Ignore if skip is coded at
+ // segment level as the cost wont have been added in.
+ if (cpi->common.mb_no_coeff_skip) {
+ int mb_skip_allowed;
+
+ // Is Mb level skip allowed for this mb.
+ mb_skip_allowed =
+ !vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) ||
+ vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
+
+ if (skippable) {
+ mbmi->mb_skip_coeff = 1;
+
+ // Back out the coefficient coding costs
+ rate2 -= (rate_y + rate_uv);
+ // for best_yrd calculation
+ rate_uv = 0;
+
+ if (mb_skip_allowed) {
+ int prob_skip_cost;
+
+ // Cost the skip mb case
+ vp9_prob skip_prob =
+ vp9_get_pred_prob(cm, &x->e_mbd, PRED_MBSKIP);
+
+ if (skip_prob) {
+ prob_skip_cost = vp9_cost_bit(skip_prob, 1);
+ rate2 += prob_skip_cost;
+ other_cost += prob_skip_cost;
+ }
+ }
+ }
+ // Add in the cost of the no skip flag.
+ else {
+ mbmi->mb_skip_coeff = 0;
+ if (mb_skip_allowed) {
+ int prob_skip_cost = vp9_cost_bit(
+ vp9_get_pred_prob(cm, &x->e_mbd, PRED_MBSKIP), 0);
+ rate2 += prob_skip_cost;
+ other_cost += prob_skip_cost;
+ }
+ }
+ }
+
+ // Calculate the final RD estimate for this mode.
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ }
+
+ // Keep record of best intra distortion
+ if ((mbmi->ref_frame == INTRA_FRAME) &&
+ (this_rd < best_intra_rd)) {
+ best_intra_rd = this_rd;
+ *returnintra = distortion2;
+ }
+#if CONFIG_COMP_INTERINTRA_PRED
+ if ((mbmi->ref_frame == INTRA_FRAME) &&
+ (this_mode <= TM_PRED) &&
+ (this_rd < best_intra16_rd)) {
+ best_intra16_rd = this_rd;
+ best_intra16_mode = this_mode;
+ best_intra16_uv_mode = (mbmi->txfm_size != TX_4X4 ?
+ uv_intra_mode_8x8 : uv_intra_mode);
+ }
+#endif
+
+
+ if (!disable_skip && mbmi->ref_frame == INTRA_FRAME)
+ for (i = 0; i < NB_PREDICTION_TYPES; ++i)
+ best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
+
+ if (this_rd < best_overall_rd) {
+ best_overall_rd = this_rd;
+#if CONFIG_PRED_FILTER
+ best_filter_state = mbmi->pred_filter_enabled;
+#endif
+#if CONFIG_COMP_INTERINTRA_PRED
+ is_best_interintra = (mbmi->second_ref_frame == INTRA_FRAME);
+#endif
+ }
+
+#if CONFIG_PRED_FILTER
+ // Ignore modes where the prediction filter state doesn't
+ // match the state signaled at the frame level
+ if ((cm->pred_filter_mode == 2) ||
+ (cm->pred_filter_mode ==
+ mbmi->pred_filter_enabled)) {
+#endif
+ // Did this mode help.. i.e. is it the new best mode
+ if (this_rd < best_rd || x->skip) {
+ if (!mode_excluded) {
+ /*
+ if (mbmi->second_ref_frame == INTRA_FRAME) {
+ printf("rd %d best %d bestintra16 %d\n", this_rd, best_rd, best_intra16_rd);
+ }
+ */
+ // Note index of best mode so far
+ best_mode_index = mode_index;
+
+ if (this_mode <= B_PRED) {
+ if (mbmi->txfm_size != TX_4X4
+ && this_mode != B_PRED
+ && this_mode != I8X8_PRED)
+ mbmi->uv_mode = uv_intra_mode_8x8;
+ else
+ mbmi->uv_mode = uv_intra_mode;
+ /* required for left and above block mv */
+ mbmi->mv[0].as_int = 0;
+ }
+
+ other_cost += ref_costs[mbmi->ref_frame];
+
+ /* Calculate the final y RD estimate for this mode */
+ best_yrd = RDCOST(x->rdmult, x->rddiv, (rate2 - rate_uv - other_cost),
+ (distortion2 - distortion_uv));
+
+ *returnrate = rate2;
+ *returndistortion = distortion2;
+ best_rd = this_rd;
+ vpx_memcpy(&best_mbmode, mbmi, sizeof(MB_MODE_INFO));
+ vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO));
+
+ if ((this_mode == B_PRED)
+ || (this_mode == I8X8_PRED)
+ || (this_mode == SPLITMV))
+ for (i = 0; i < 16; i++) {
+ best_bmodes[i] = xd->block[i].bmi;
+ }
+ }
+
+ // Testing this mode gave rise to an improvement in best error score.
+ // Lower threshold a bit for next time
+ cpi->rd_thresh_mult[mode_index] =
+ (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ?
+ cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
+ cpi->rd_threshes[mode_index] =
+ (cpi->rd_baseline_thresh[mode_index] >> 7) *
+ cpi->rd_thresh_mult[mode_index];
+ }
+ // If the mode did not help improve the best error case then raise the
+ // threshold for testing that mode next time around.
+ else {
+ cpi->rd_thresh_mult[mode_index] += 4;
+
+ if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
+ cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
+
+ cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
+ }
+
+ /* keep record of best compound/single-only prediction */
+ if (!disable_skip && mbmi->ref_frame != INTRA_FRAME) {
+ int64_t single_rd, hybrid_rd;
+ int single_rate, hybrid_rate;
+
+ if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
+ single_rate = rate2 - compmode_cost;
+ hybrid_rate = rate2;
+ } else {
+ single_rate = rate2;
+ hybrid_rate = rate2 + compmode_cost;
+ }
+
+ single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
+ hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
+
+ if (mbmi->second_ref_frame <= INTRA_FRAME &&
+ single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) {
+ best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd;
+ } else if (mbmi->second_ref_frame > INTRA_FRAME &&
+ single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) {
+ best_pred_rd[COMP_PREDICTION_ONLY] = single_rd;
+ }
+ if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION])
+ best_pred_rd[HYBRID_PREDICTION] = hybrid_rd;
+ }
+
+ /* keep record of best txfm size */
+ if (!mode_excluded && this_rd != INT64_MAX) {
+ for (i = 0; i < NB_TXFM_MODES; i++) {
+ int64_t adj_rd;
+ if (this_mode != B_PRED) {
+ const int64_t txfm_mode_diff =
+ txfm_cache[i] - txfm_cache[cm->txfm_mode];
+ adj_rd = this_rd + txfm_mode_diff;
+ } else {
+ adj_rd = this_rd;
+ }
+ if (adj_rd < best_txfm_rd[i])
+ best_txfm_rd[i] = adj_rd;
+ }
+ }
+#if CONFIG_PRED_FILTER
+ }
+#endif
+
+ if (x->skip && !mode_excluded)
+ break;
+ }
+
+#if CONFIG_PRED_FILTER
+ // Update counts for prediction filter usage
+ if (best_filter_state != 0)
+ ++cpi->pred_filter_on_count;
+ else
+ ++cpi->pred_filter_off_count;
+#endif
+#if CONFIG_COMP_INTERINTRA_PRED
+ ++cpi->interintra_select_count[is_best_interintra];
+#endif
+
+ // Reduce the activation RD thresholds for the best choice mode
+ if ((cpi->rd_baseline_thresh[best_mode_index] > 0) &&
+ (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) {
+ int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2);
+
+ cpi->rd_thresh_mult[best_mode_index] =
+ (cpi->rd_thresh_mult[best_mode_index] >=
+ (MIN_THRESHMULT + best_adjustment)) ?
+ cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT;
+ cpi->rd_threshes[best_mode_index] =
+ (cpi->rd_baseline_thresh[best_mode_index] >> 7) *
+ cpi->rd_thresh_mult[best_mode_index];
+ }
+
+ // This code force Altref,0,0 and skip for the frame that overlays a
+ // an alrtef unless Altref is filtered. However, this is unsafe if
+ // segment level coding of ref frame or mode is enabled for this
+ // segment.
+ if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
+ !vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE) &&
+ cpi->is_src_frame_alt_ref &&
+ (cpi->oxcf.arnr_max_frames == 0) &&
+ (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) {
+ mbmi->mode = ZEROMV;
+ if (cm->txfm_mode != TX_MODE_SELECT)
+ mbmi->txfm_size = cm->txfm_mode;
+ else
+ mbmi->txfm_size = TX_16X16;
+ mbmi->ref_frame = ALTREF_FRAME;
+ mbmi->mv[0].as_int = 0;
+ mbmi->uv_mode = DC_PRED;
+ mbmi->mb_skip_coeff =
+ (cpi->common.mb_no_coeff_skip) ? 1 : 0;
+ mbmi->partitioning = 0;
+
+ vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff));
+ vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
+ goto end;
+ }
+
+ // macroblock modes
+ vpx_memcpy(mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+ if (best_mbmode.mode == B_PRED) {
+ for (i = 0; i < 16; i++) {
+ xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode;
+ xd->block[i].bmi.as_mode = xd->mode_info_context->bmi[i].as_mode;
+ }
+ }
+
+ if (best_mbmode.mode == I8X8_PRED)
+ set_i8x8_block_modes(x, mode8x8);
+
+ if (best_mbmode.mode == SPLITMV) {
+ for (i = 0; i < 16; i++)
+ xd->mode_info_context->bmi[i].as_mv.first.as_int = best_bmodes[i].as_mv.first.as_int;
+ if (mbmi->second_ref_frame > 0)
+ for (i = 0; i < 16; i++)
+ xd->mode_info_context->bmi[i].as_mv.second.as_int = best_bmodes[i].as_mv.second.as_int;
+
+ vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
+
+ mbmi->mv[0].as_int = x->partition_info->bmi[15].mv.as_int;
+ mbmi->mv[1].as_int = x->partition_info->bmi[15].second_mv.as_int;
+ }
+
+ for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
+ if (best_pred_rd[i] == INT64_MAX)
+ best_pred_diff[i] = INT_MIN;
+ else
+ best_pred_diff[i] = best_rd - best_pred_rd[i];
+ }
+
+ if (!x->skip) {
+ for (i = 0; i < NB_TXFM_MODES; i++) {
+ if (best_txfm_rd[i] == INT64_MAX)
+ best_txfm_diff[i] = INT_MIN;
+ else
+ best_txfm_diff[i] = best_rd - best_txfm_rd[i];
+ }
+ } else {
+ vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
+ }
+
+end:
+ store_coding_context(
+ x, &x->mb_context[xd->mb_index], best_mode_index, &best_partition,
+ &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame],
+ &frame_best_ref_mv[xd->mode_info_context->mbmi.second_ref_frame < 0 ?
+ 0 : xd->mode_info_context->mbmi.second_ref_frame],
+ best_pred_diff, best_txfm_diff);
+}
+
+#if CONFIG_SUPERBLOCKS
+void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
+ int *returnrate,
+ int *returndist) {
+ VP9_COMMON *cm = &cpi->common;
+ MACROBLOCKD *xd = &x->e_mbd;
+ int rate_y, rate_uv;
+ int rate_y_tokenonly, rate_uv_tokenonly;
+ int error_y, error_uv;
+ int dist_y, dist_uv;
+ int y_skip, uv_skip;
+ int64_t txfm_cache[NB_TXFM_MODES];
+
+ xd->mode_info_context->mbmi.txfm_size = TX_8X8;
+
+ error_y = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
+ &dist_y, &y_skip, txfm_cache);
+ error_uv = rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
+ &dist_uv, &uv_skip);
+
+ if (cpi->common.mb_no_coeff_skip && y_skip && uv_skip) {
+ *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
+ vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1);
+ *returndist = dist_y + (dist_uv >> 2);
+ } else {
+ *returnrate = rate_y + rate_uv;
+ if (cpi->common.mb_no_coeff_skip)
+ *returnrate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
+ *returndist = dist_y + (dist_uv >> 2);
+ }
+}
+#endif
+
+void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x,
+ int *returnrate, int *returndist) {
+ VP9_COMMON *cm = &cpi->common;
+ MACROBLOCKD *xd = &x->e_mbd;
+ MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
+ int64_t error4x4, error16x16;
+#if CONFIG_COMP_INTRA_PRED
+ int64_t error4x4d;
+ int rate4x4d, dist4x4d;
+#endif
+ int rate4x4, rate16x16 = 0, rateuv, rateuv8x8;
+ int dist4x4 = 0, dist16x16 = 0, distuv = 0, distuv8x8 = 0;
+ int rate;
+ int rate4x4_tokenonly = 0;
+ int rate16x16_tokenonly = 0;
+ int rateuv_tokenonly = 0, rateuv8x8_tokenonly = 0;
+ int64_t error8x8;
+ int rate8x8_tokenonly=0;
+ int rate8x8, dist8x8;
+ int mode16x16;
+ int mode8x8[2][4];
+ int dist;
+ int modeuv, uv_intra_skippable, uv_intra_skippable_8x8;
+ int y_intra16x16_skippable = 0;
+ int64_t txfm_cache[NB_TXFM_MODES];
+ TX_SIZE txfm_size_16x16;
+ int i;
+
+ mbmi->ref_frame = INTRA_FRAME;
+ rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv,
+ &uv_intra_skippable);
+ modeuv = mbmi->uv_mode;
+ if (cpi->common.txfm_mode != ONLY_4X4) {
+ rd_pick_intra_mbuv_mode_8x8(cpi, x, &rateuv8x8, &rateuv8x8_tokenonly,
+ &distuv8x8, &uv_intra_skippable_8x8);
+ } else {
+ uv_intra_skippable_8x8 = uv_intra_skippable;
+ rateuv8x8 = rateuv;
+ distuv8x8 = distuv;
+ rateuv8x8_tokenonly = rateuv_tokenonly;
+ }
+
+ // current macroblock under rate-distortion optimization test loop
+ error16x16 = rd_pick_intra16x16mby_mode(cpi, x, &rate16x16,
+ &rate16x16_tokenonly, &dist16x16,
+ &y_intra16x16_skippable, txfm_cache);
+ mode16x16 = mbmi->mode;
+ txfm_size_16x16 = mbmi->txfm_size;
+
+ // FIXME(rbultje) support transform-size selection
+ mbmi->txfm_size = (cm->txfm_mode == ONLY_4X4) ? TX_4X4 : TX_8X8;
+ error8x8 = rd_pick_intra8x8mby_modes(cpi, x, &rate8x8, &rate8x8_tokenonly,
+ &dist8x8, error16x16);
+ mode8x8[0][0]= xd->mode_info_context->bmi[0].as_mode.first;
+ mode8x8[0][1]= xd->mode_info_context->bmi[2].as_mode.first;
+ mode8x8[0][2]= xd->mode_info_context->bmi[8].as_mode.first;
+ mode8x8[0][3]= xd->mode_info_context->bmi[10].as_mode.first;
+#if CONFIG_COMP_INTRA_PRED
+ mode8x8[1][0] = xd->mode_info_context->bmi[0].as_mode.second;
+ mode8x8[1][1] = xd->mode_info_context->bmi[2].as_mode.second;
+ mode8x8[1][2] = xd->mode_info_context->bmi[8].as_mode.second;
+ mode8x8[1][3] = xd->mode_info_context->bmi[10].as_mode.second;
+#endif
+
+ error4x4 = rd_pick_intra4x4mby_modes(cpi, x,
+ &rate4x4, &rate4x4_tokenonly,
+ &dist4x4, error16x16,
+#if CONFIG_COMP_INTRA_PRED
+ 0,
+#endif
+ 0);
+#if CONFIG_COMP_INTRA_PRED
+ error4x4d = rd_pick_intra4x4mby_modes(cpi, x,
+ &rate4x4d, &rate4x4_tokenonly,
+ &dist4x4d, error16x16, 1, 0);
+#endif
+
+ mbmi->mb_skip_coeff = 0;
+ if (cpi->common.mb_no_coeff_skip &&
+ y_intra16x16_skippable && uv_intra_skippable_8x8) {
+ mbmi->mb_skip_coeff = 1;
+ mbmi->mode = mode16x16;
+ mbmi->uv_mode = modeuv;
+ rate = rateuv8x8 + rate16x16 - rateuv8x8_tokenonly - rate16x16_tokenonly +
+ vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1);
+ dist = dist16x16 + (distuv8x8 >> 2);
+ mbmi->txfm_size = txfm_size_16x16;
+ memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0,
+ sizeof(x->mb_context[xd->mb_index].txfm_rd_diff));
+ } else if (error8x8 > error16x16) {
+ if (error4x4 < error16x16) {
+ rate = rateuv;
+#if CONFIG_COMP_INTRA_PRED
+ rate += (error4x4d < error4x4) ? rate4x4d : rate4x4;
+ if (error4x4d >= error4x4) // FIXME save original modes etc.
+ error4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4,
+ &rate4x4_tokenonly,
+ &dist4x4, error16x16, 0,
+ cpi->update_context);
+#else
+ rate += rate4x4;
+#endif
+ mbmi->mode = B_PRED;
+ mbmi->txfm_size = TX_4X4;
+ dist = dist4x4 + (distuv >> 2);
+ memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0,
+ sizeof(x->mb_context[xd->mb_index].txfm_rd_diff));
+ } else {
+ mbmi->txfm_size = txfm_size_16x16;
+ mbmi->mode = mode16x16;
+ rate = rate16x16 + rateuv8x8;
+ dist = dist16x16 + (distuv8x8 >> 2);
+ for (i = 0; i < NB_TXFM_MODES; i++) {
+ x->mb_context[xd->mb_index].txfm_rd_diff[i] = error16x16 - txfm_cache[i];
+ }
+ }
+ if (cpi->common.mb_no_coeff_skip)
+ rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
+ } else {
+ if (error4x4 < error8x8) {
+ rate = rateuv;
+#if CONFIG_COMP_INTRA_PRED
+ rate += (error4x4d < error4x4) ? rate4x4d : rate4x4;
+ if (error4x4d >= error4x4) // FIXME save original modes etc.
+ error4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4,
+ &rate4x4_tokenonly,
+ &dist4x4, error16x16, 0,
+ cpi->update_context);
+#else
+ rate += rate4x4;
+#endif
+ mbmi->mode = B_PRED;
+ mbmi->txfm_size = TX_4X4;
+ dist = dist4x4 + (distuv >> 2);
+ memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0,
+ sizeof(x->mb_context[xd->mb_index].txfm_rd_diff));
+ } else {
+ // FIXME(rbultje) support transform-size selection
+ mbmi->mode = I8X8_PRED;
+ mbmi->txfm_size = (cm->txfm_mode == ONLY_4X4) ? TX_4X4 : TX_8X8;
+ set_i8x8_block_modes(x, mode8x8);
+ rate = rate8x8 + rateuv;
+ dist = dist8x8 + (distuv >> 2);
+ memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0,
+ sizeof(x->mb_context[xd->mb_index].txfm_rd_diff));
+ }
+ if (cpi->common.mb_no_coeff_skip)
+ rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
+ }
+
+ *returnrate = rate;
+ *returndist = dist;
+}
+
+#if CONFIG_SUPERBLOCKS
+int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
+ int recon_yoffset, int recon_uvoffset,
+ int *returnrate, int *returndistortion) {
+ VP9_COMMON *cm = &cpi->common;
+ MACROBLOCKD *xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
+ MB_PREDICTION_MODE this_mode;
+ MV_REFERENCE_FRAME ref_frame;
+ unsigned char segment_id = xd->mode_info_context->mbmi.segment_id;
+ int comp_pred, i;
+ int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
+ int_mv frame_best_ref_mv[MAX_REF_FRAMES];
+ int_mv mv_search_ref[MAX_REF_FRAMES];
+ int frame_mdcounts[4][4];
+ unsigned char *y_buffer[4];
+ unsigned char *u_buffer[4];
+ unsigned char *v_buffer[4];
+ static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
+ VP9_ALT_FLAG };
+ int idx_list[4] = { 0, cpi->common.lst_fb_idx, cpi->common.gld_fb_idx,
+ cpi->common.alt_fb_idx };
+ int mdcounts[4];
+ int near_sadidx[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+ int saddone = 0;
+ int64_t best_rd = INT64_MAX;
+ int64_t best_yrd = INT64_MAX;
+ int64_t best_txfm_rd[NB_TXFM_MODES];
+ int64_t best_txfm_diff[NB_TXFM_MODES];
+ int64_t best_pred_diff[NB_PREDICTION_TYPES];
+ int64_t best_pred_rd[NB_PREDICTION_TYPES];
+ MB_MODE_INFO best_mbmode;
+ int mode_index, best_mode_index = 0;
+ unsigned int ref_costs[MAX_REF_FRAMES];
+#if CONFIG_COMP_INTERINTRA_PRED
+ int is_best_interintra = 0;
+ int64_t best_intra16_rd = INT64_MAX;
+ int best_intra16_mode = DC_PRED, best_intra16_uv_mode = DC_PRED;
+#endif
+ int64_t best_overall_rd = INT64_MAX;
+ int rate_uv_4x4 = 0, rate_uv_8x8 = 0, rate_uv_tokenonly_4x4 = 0,
+ rate_uv_tokenonly_8x8 = 0;
+ int dist_uv_4x4 = 0, dist_uv_8x8 = 0, uv_skip_4x4 = 0, uv_skip_8x8 = 0;
+ MB_PREDICTION_MODE mode_uv_4x4 = NEARESTMV, mode_uv_8x8 = NEARESTMV;
+ int switchable_filter_index = 0;
+
+ x->skip = 0;
+ xd->mode_info_context->mbmi.segment_id = segment_id;
+ estimate_ref_frame_costs(cpi, segment_id, ref_costs);
+ vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
+
+ for (i = 0; i < NB_PREDICTION_TYPES; ++i)
+ best_pred_rd[i] = INT64_MAX;
+ for (i = 0; i < NB_TXFM_MODES; i++)
+ best_txfm_rd[i] = INT64_MAX;
+
+ for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
+ if (cpi->ref_frame_flags & flag_list[ref_frame]) {
+ setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, BLOCK_32X32,
+ recon_yoffset, recon_uvoffset, frame_mv[NEARESTMV],
+ frame_mv[NEARMV], frame_best_ref_mv, mv_search_ref,
+ frame_mdcounts, y_buffer, u_buffer, v_buffer);
+ }
+ frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
+ frame_mv[ZEROMV][ref_frame].as_int = 0;
+ }
+
+ mbmi->mode = DC_PRED;
+ if (cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT) {
+ mbmi->txfm_size = TX_4X4;
+ rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_4x4, &rate_uv_tokenonly_4x4,
+ &dist_uv_4x4, &uv_skip_4x4);
+ mode_uv_4x4 = mbmi->uv_mode;
+ }
+ if (cm->txfm_mode != ONLY_4X4) {
+ mbmi->txfm_size = TX_8X8;
+ rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_8x8, &rate_uv_tokenonly_8x8,
+ &dist_uv_8x8, &uv_skip_8x8);
+ mode_uv_8x8 = mbmi->uv_mode;
+ }
+
+ for (mode_index = 0; mode_index < MAX_MODES;
+ mode_index += (!switchable_filter_index)) {
+ int mode_excluded = 0;
+ int64_t this_rd = INT64_MAX;
+ int disable_skip = 0;
+ int other_cost = 0;
+ int compmode_cost = 0;
+ int rate2 = 0, rate_y = 0, rate_uv = 0;
+ int distortion2 = 0, distortion_y = 0, distortion_uv = 0;
+ int skippable;
+ int64_t txfm_cache[NB_TXFM_MODES];
+#if CONFIG_COMP_INTERINTRA_PRED
+ int compmode_interintra_cost = 0;
+#endif
+
+ // Test best rd so far against threshold for trying this mode.
+ if (best_rd <= cpi->rd_threshes[mode_index] ||
+ cpi->rd_threshes[mode_index] == INT_MAX) {
+ continue;
+ }
+
+ this_mode = vp9_mode_order[mode_index].mode;
+ ref_frame = vp9_mode_order[mode_index].ref_frame;
+ if (!(ref_frame == INTRA_FRAME ||
+ (cpi->ref_frame_flags & flag_list[ref_frame]))) {
+ continue;
+ }
+ mbmi->ref_frame = ref_frame;
+ mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
+ comp_pred = mbmi->second_ref_frame > INTRA_FRAME;
+ mbmi->mode = this_mode;
+ mbmi->uv_mode = DC_PRED;
+#if CONFIG_COMP_INTRA_PRED
+ mbmi->second_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
+ mbmi->second_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
+#endif
+#if CONFIG_COMP_INTERINTRA_PRED
+ mbmi->interintra_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
+ mbmi->interintra_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
+#endif
+ if (cpi->common.mcomp_filter_type == SWITCHABLE &&
+ this_mode >= NEARESTMV && this_mode <= SPLITMV) {
+ mbmi->interp_filter =
+ vp9_switchable_interp[switchable_filter_index++];
+ if (switchable_filter_index == VP9_SWITCHABLE_FILTERS)
+ switchable_filter_index = 0;
+ } else {
+ mbmi->interp_filter = cpi->common.mcomp_filter_type;
+ }
+ vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
+
+ // if (!(cpi->ref_frame_flags & flag_list[ref_frame]))
+ // continue;
+
+ if (this_mode == I8X8_PRED || this_mode == B_PRED || this_mode == SPLITMV)
+ continue;
+ // if (vp9_mode_order[mode_index].second_ref_frame == INTRA_FRAME)
+ // continue;
+
+ if (comp_pred) {
+ int second_ref;
+
+ if (ref_frame == ALTREF_FRAME) {
+ second_ref = LAST_FRAME;
+ } else {
+ second_ref = ref_frame + 1;
+ }
+ if (!(cpi->ref_frame_flags & flag_list[second_ref]))
+ continue;
+ mbmi->second_ref_frame = second_ref;
+
+ xd->second_pre.y_buffer = y_buffer[second_ref];
+ xd->second_pre.u_buffer = u_buffer[second_ref];
+ xd->second_pre.v_buffer = v_buffer[second_ref];
+ mode_excluded = cm->comp_pred_mode == SINGLE_PREDICTION_ONLY;
+ } else {
+ // mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
+ if (ref_frame != INTRA_FRAME) {
+ if (mbmi->second_ref_frame != INTRA_FRAME)
+ mode_excluded = cm->comp_pred_mode == COMP_PREDICTION_ONLY;
+#if CONFIG_COMP_INTERINTRA_PRED
+ else
+ mode_excluded = !cm->use_interintra;
+#endif
+ }
+ }
+
+ xd->pre.y_buffer = y_buffer[ref_frame];
+ xd->pre.u_buffer = u_buffer[ref_frame];
+ xd->pre.v_buffer = v_buffer[ref_frame];
+ vpx_memcpy(mdcounts, frame_mdcounts[ref_frame], sizeof(mdcounts));
+
+ // If the segment reference frame feature is enabled....
+ // then do nothing if the current ref frame is not allowed..
+ if (vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
+ !vp9_check_segref(xd, segment_id, ref_frame)) {
+ continue;
+ // If the segment mode feature is enabled....
+ // then do nothing if the current mode is not allowed..
+ } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE) &&
+ (this_mode != vp9_get_segdata(xd, segment_id, SEG_LVL_MODE))) {
+ continue;
+ // Disable this drop out case if either the mode or ref frame
+ // segment level feature is enabled for this segment. This is to
+ // prevent the possibility that we end up unable to pick any mode.
+ } else if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
+ !vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) {
+ // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
+ // unless ARNR filtering is enabled in which case we want
+ // an unfiltered alternative
+ if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
+ if (this_mode != ZEROMV || ref_frame != ALTREF_FRAME) {
+ continue;
+ }
+ }
+ }
+
+ if (ref_frame == INTRA_FRAME) {
+ vp9_build_intra_predictors_sby_s(xd);
+ super_block_yrd(cpi, x, &rate_y, &distortion_y,
+ &skippable, txfm_cache);
+ if (mbmi->txfm_size == TX_4X4) {
+ rate_uv = rate_uv_4x4;
+ distortion_uv = dist_uv_4x4;
+ skippable = skippable && uv_skip_4x4;
+ mbmi->uv_mode = mode_uv_4x4;
+ } else {
+ rate_uv = rate_uv_8x8;
+ distortion_uv = dist_uv_8x8;
+ skippable = skippable && uv_skip_8x8;
+ mbmi->uv_mode = mode_uv_8x8;
+ }
+
+ rate2 = rate_y + x->mbmode_cost[cm->frame_type][mbmi->mode] + rate_uv;
+ distortion2 = distortion_y + distortion_uv;
+ } else {
+#if CONFIG_COMP_INTERINTRA_PRED
+ if (mbmi->second_ref_frame == INTRA_FRAME) {
+ if (best_intra16_mode == DC_PRED - 1) continue;
+ mbmi->interintra_mode = best_intra16_mode;
+#if SEPARATE_INTERINTRA_UV
+ mbmi->interintra_uv_mode = best_intra16_uv_mode;
+#else
+ mbmi->interintra_uv_mode = best_intra16_mode;
+#endif
+ }
+#endif
+ this_rd = handle_inter_mode(cpi, x, BLOCK_32X32,
+ &saddone, near_sadidx, mdcounts, txfm_cache,
+ &rate2, &distortion2, &skippable,
+ &compmode_cost,
+#if CONFIG_COMP_INTERINTRA_PRED
+ &compmode_interintra_cost,
+#endif
+ &rate_y, &distortion_y,
+ &rate_uv, &distortion_uv,
+ &mode_excluded, &disable_skip, recon_yoffset,
+ mode_index, frame_mv, frame_best_ref_mv,
+ mv_search_ref);
+ if (this_rd == INT64_MAX)
+ continue;
+ }
+
+#if CONFIG_COMP_INTERINTRA_PRED
+ if (cpi->common.use_interintra) {
+ rate2 += compmode_interintra_cost;
+ }
+#endif
+ if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
+ rate2 += compmode_cost;
+ }
+
+ // Estimate the reference frame signaling cost and add it
+ // to the rolling cost variable.
+ rate2 += ref_costs[xd->mode_info_context->mbmi.ref_frame];
+
+ if (!disable_skip) {
+ // Test for the condition where skip block will be activated
+ // because there are no non zero coefficients and make any
+ // necessary adjustment for rate. Ignore if skip is coded at
+ // segment level as the cost wont have been added in.
+ if (cpi->common.mb_no_coeff_skip) {
+ int mb_skip_allowed;
+
+ // Is Mb level skip allowed for this mb.
+ mb_skip_allowed =
+ !vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) ||
+ vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
+
+ if (skippable) {
+ // Back out the coefficient coding costs
+ rate2 -= (rate_y + rate_uv);
+ // for best_yrd calculation
+ rate_uv = 0;
+
+ if (mb_skip_allowed) {
+ int prob_skip_cost;
+
+ // Cost the skip mb case
+ vp9_prob skip_prob =
+ vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
+
+ if (skip_prob) {
+ prob_skip_cost = vp9_cost_bit(skip_prob, 1);
+ rate2 += prob_skip_cost;
+ other_cost += prob_skip_cost;
+ }
+ }
+ }
+ // Add in the cost of the no skip flag.
+ else if (mb_skip_allowed) {
+ int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd,
+ PRED_MBSKIP), 0);
+ rate2 += prob_skip_cost;
+ other_cost += prob_skip_cost;
+ }
+ }
+
+ // Calculate the final RD estimate for this mode.
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ }
+
+#if 0
+ // Keep record of best intra distortion
+ if ((xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
+ (this_rd < best_intra_rd)) {
+ best_intra_rd = this_rd;
+ *returnintra = distortion2;
+ }
+#endif
+#if CONFIG_COMP_INTERINTRA_PRED
+ if ((mbmi->ref_frame == INTRA_FRAME) &&
+ (this_mode <= TM_PRED) &&
+ (this_rd < best_intra16_rd)) {
+ best_intra16_rd = this_rd;
+ best_intra16_mode = this_mode;
+ best_intra16_uv_mode = (mbmi->txfm_size != TX_4X4 ?
+ mode_uv_8x8 : mode_uv_4x4);
+ }
+#endif
+
+ if (!disable_skip && mbmi->ref_frame == INTRA_FRAME)
+ for (i = 0; i < NB_PREDICTION_TYPES; ++i)
+ best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
+
+ if (this_rd < best_overall_rd) {
+ best_overall_rd = this_rd;
+#if CONFIG_COMP_INTERINTRA_PRED
+ is_best_interintra = (mbmi->second_ref_frame == INTRA_FRAME);
+#endif
+ }
+
+ // Did this mode help.. i.e. is it the new best mode
+ if (this_rd < best_rd || x->skip) {
+ if (!mode_excluded) {
+ // Note index of best mode so far
+ best_mode_index = mode_index;
+
+ if (this_mode <= B_PRED) {
+ /* required for left and above block mv */
+ mbmi->mv[0].as_int = 0;
+ }
+
+ other_cost += ref_costs[xd->mode_info_context->mbmi.ref_frame];
+
+ /* Calculate the final y RD estimate for this mode */
+ best_yrd = RDCOST(x->rdmult, x->rddiv, (rate2 - rate_uv - other_cost),
+ (distortion2 - distortion_uv));
+
+ *returnrate = rate2;
+ *returndistortion = distortion2;
+ best_rd = this_rd;
+ vpx_memcpy(&best_mbmode, mbmi, sizeof(MB_MODE_INFO));
+ }
+#if 0
+ // Testing this mode gave rise to an improvement in best error score. Lower threshold a bit for next time
+ cpi->rd_thresh_mult[mode_index] = (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
+ cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
+#endif
+ }
+ // If the mode did not help improve the best error case then raise the threshold for testing that mode next time around.
+ else {
+#if 0
+ cpi->rd_thresh_mult[mode_index] += 4;
+
+ if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
+ cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
+
+ cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
+#endif
+ }
+
+ /* keep record of best compound/single-only prediction */
+ if (!disable_skip && mbmi->ref_frame != INTRA_FRAME) {
+ int single_rd, hybrid_rd, single_rate, hybrid_rate;
+
+ if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
+ single_rate = rate2 - compmode_cost;
+ hybrid_rate = rate2;
+ } else {
+ single_rate = rate2;
+ hybrid_rate = rate2 + compmode_cost;
+ }
+
+ single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
+ hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
+
+ if (mbmi->second_ref_frame <= INTRA_FRAME &&
+ single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) {
+ best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd;
+ } else if (mbmi->second_ref_frame > INTRA_FRAME &&
+ single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) {
+ best_pred_rd[COMP_PREDICTION_ONLY] = single_rd;
+ }
+ if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION])
+ best_pred_rd[HYBRID_PREDICTION] = hybrid_rd;
+ }
+
+ /* keep record of best txfm size */
+ if (!mode_excluded && this_rd != INT64_MAX) {
+ for (i = 0; i < NB_TXFM_MODES; i++) {
+ int64_t adj_rd;
+ if (this_mode != B_PRED) {
+ adj_rd = this_rd + txfm_cache[i] - txfm_cache[cm->txfm_mode];
+ } else {
+ adj_rd = this_rd;
+ }
+ if (adj_rd < best_txfm_rd[i])
+ best_txfm_rd[i] = adj_rd;
+ }
+ }
+
+ if (x->skip && !mode_excluded)
+ break;
+ }
+
+#if CONFIG_COMP_INTERINTRA_PRED
+ ++cpi->interintra_select_count[is_best_interintra];
+ // if (is_best_interintra) printf("best_interintra\n");
+#endif
+
+ // TODO(rbultje) integrate with RD thresholding
+#if 0
+ // Reduce the activation RD thresholds for the best choice mode
+ if ((cpi->rd_baseline_thresh[best_mode_index] > 0) &&
+ (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) {
+ int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2);
+
+ cpi->rd_thresh_mult[best_mode_index] =
+ (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ?
+ cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT;
+ cpi->rd_threshes[best_mode_index] =
+ (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best_mode_index];
+ }
+#endif
+
+ // This code forces Altref,0,0 and skip for the frame that overlays a
+ // an alrtef unless Altref is filtered. However, this is unsafe if
+ // segment level coding of ref frame or mode is enabled for this
+ // segment.
+ if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
+ !vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE) &&
+ cpi->is_src_frame_alt_ref &&
+ (cpi->oxcf.arnr_max_frames == 0) &&
+ (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) {
+ mbmi->mode = ZEROMV;
+ mbmi->ref_frame = ALTREF_FRAME;
+ mbmi->second_ref_frame = INTRA_FRAME;
+ mbmi->mv[0].as_int = 0;
+ mbmi->uv_mode = DC_PRED;
+ mbmi->mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
+ mbmi->partitioning = 0;
+ mbmi->txfm_size = cm->txfm_mode == TX_MODE_SELECT ?
+ TX_16X16 : cm->txfm_mode;
+
+ vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
+ vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff));
+ goto end;
+ }
+
+ // macroblock modes
+ vpx_memcpy(mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+
+ for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
+ if (best_pred_rd[i] == INT64_MAX)
+ best_pred_diff[i] = INT_MIN;
+ else
+ best_pred_diff[i] = best_rd - best_pred_rd[i];
+ }
+
+ if (!x->skip) {
+ for (i = 0; i < NB_TXFM_MODES; i++) {
+ if (best_txfm_rd[i] == INT64_MAX)
+ best_txfm_diff[i] = INT_MIN;
+ else
+ best_txfm_diff[i] = best_rd - best_txfm_rd[i];
+ }
+ } else {
+ vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
+ }
+
+ end:
+ store_coding_context(x, &x->sb_context[0], best_mode_index, NULL,
+ &frame_best_ref_mv[mbmi->ref_frame],
+ &frame_best_ref_mv[mbmi->second_ref_frame < 0 ?
+ 0 : mbmi->second_ref_frame],
+ best_pred_diff, best_txfm_diff);
+
+ return best_rd;
+}
+#endif
+
+void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x,
+ int recon_yoffset,
+ int recon_uvoffset,
+ int *totalrate, int *totaldist) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
+ int rate, distortion;
+ int64_t intra_error = 0;
+ unsigned char *segment_id = &mbmi->segment_id;
+
+ if (xd->segmentation_enabled)
+ x->encode_breakout = cpi->segment_encode_breakout[*segment_id];
+ else
+ x->encode_breakout = cpi->oxcf.encode_breakout;
+
+ // if (cpi->sf.RD)
+ // For now this codebase is limited to a single rd encode path
+ {
+ int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled;
+
+ rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate,
+ &distortion, &intra_error);
+
+ /* restore cpi->zbin_mode_boost_enabled */
+ cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled;
+ }
+ // else
+ // The non rd encode path has been deleted from this code base
+ // to simplify development
+ // vp9_pick_inter_mode
+
+ // Store metrics so they can be added in to totals if this mode is picked
+ x->mb_context[xd->mb_index].distortion = distortion;
+ x->mb_context[xd->mb_index].intra_error = intra_error;
+
+ *totalrate = rate;
+ *totaldist = distortion;
+}
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h
new file mode 100644
index 0000000..4e41714
--- /dev/null
+++ b/vp9/encoder/vp9_rdopt.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_ENCODER_VP9_RDOPT_H_
+#define VP9_ENCODER_VP9_RDOPT_H_
+
+#define RDCOST(RM,DM,R,D) ( ((128+((int64_t)R)*(RM)) >> 8) + ((int64_t)DM)*(D) )
+#define RDCOST_8x8(RM,DM,R,D) ( ((128+((int64_t)R)*(RM)) >> 8) + ((int64_t)DM)*(D) )
+
+extern void vp9_initialize_rd_consts(VP9_COMP *cpi, int Qvalue);
+
+extern void vp9_initialize_me_consts(VP9_COMP *cpi, int QIndex);
+
+extern void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x,
+ int *r, int *d);
+
+extern void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
+ int *r, int *d);
+
+extern void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x,
+ int recon_yoffset,
+ int recon_uvoffset, int *r, int *d);
+
+extern int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
+ int recon_yoffset, int recon_uvoffset,
+ int *returnrate, int *returndist);
+
+extern void vp9_init_me_luts();
+
+extern void vp9_set_mbmode_and_mvs(MACROBLOCK *x,
+ MB_PREDICTION_MODE mb, int_mv *mv);
+
+#endif
diff --git a/vp9/encoder/vp9_sad_c.c b/vp9/encoder/vp9_sad_c.c
new file mode 100644
index 0000000..4650442
--- /dev/null
+++ b/vp9/encoder/vp9_sad_c.c
@@ -0,0 +1,480 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <stdlib.h>
+#include "vp9/common/vp9_sadmxn.h"
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+
+unsigned int vp9_sad32x32_c(const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ int max_sad) {
+ return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 32, 32);
+}
+
+unsigned int vp9_sad16x16_c(const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ int max_sad) {
+ return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 16);
+}
+
+unsigned int vp9_sad8x8_c(const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ int max_sad) {
+ return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 8, 8);
+}
+
+
+unsigned int vp9_sad16x8_c(const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ int max_sad) {
+ return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 8);
+}
+
+unsigned int vp9_sad8x16_c(const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ int max_sad) {
+ return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 8, 16);
+}
+
+
+unsigned int vp9_sad4x4_c(const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ int max_sad) {
+ return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 4, 4);
+}
+
+void vp9_sad32x32x3_c(const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned int *sad_array
+ ) {
+ sad_array[0] = vp9_sad32x32_c(src_ptr, src_stride,
+ ref_ptr, ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad32x32_c(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad32x32_c(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride, 0x7fffffff);
+}
+
+void vp9_sad32x32x8_c(const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned short *sad_array
+ ) {
+ sad_array[0] = (unsigned short)vp9_sad32x32_c(src_ptr, src_stride,
+ ref_ptr, ref_stride,
+ 0x7fffffff);
+ sad_array[1] = (unsigned short)vp9_sad32x32_c(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride,
+ 0x7fffffff);
+ sad_array[2] = (unsigned short)vp9_sad32x32_c(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride,
+ 0x7fffffff);
+ sad_array[3] = (unsigned short)vp9_sad32x32_c(src_ptr, src_stride,
+ ref_ptr + 3, ref_stride,
+ 0x7fffffff);
+ sad_array[4] = (unsigned short)vp9_sad32x32_c(src_ptr, src_stride,
+ ref_ptr + 4, ref_stride,
+ 0x7fffffff);
+ sad_array[5] = (unsigned short)vp9_sad32x32_c(src_ptr, src_stride,
+ ref_ptr + 5, ref_stride,
+ 0x7fffffff);
+ sad_array[6] = (unsigned short)vp9_sad32x32_c(src_ptr, src_stride,
+ ref_ptr + 6, ref_stride,
+ 0x7fffffff);
+ sad_array[7] = (unsigned short)vp9_sad32x32_c(src_ptr, src_stride,
+ ref_ptr + 7, ref_stride,
+ 0x7fffffff);
+}
+
+void vp9_sad16x16x3_c(const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned int *sad_array) {
+ sad_array[0] = vp9_sad16x16_c(src_ptr, src_stride,
+ ref_ptr, ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad16x16_c(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad16x16_c(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride, 0x7fffffff);
+}
+
+void vp9_sad16x16x8_c(const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned short *sad_array) {
+ sad_array[0] = (unsigned short)vp9_sad16x16_c(src_ptr, src_stride,
+ ref_ptr, ref_stride,
+ 0x7fffffff);
+ sad_array[1] = (unsigned short)vp9_sad16x16_c(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride,
+ 0x7fffffff);
+ sad_array[2] = (unsigned short)vp9_sad16x16_c(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride,
+ 0x7fffffff);
+ sad_array[3] = (unsigned short)vp9_sad16x16_c(src_ptr, src_stride,
+ ref_ptr + 3, ref_stride,
+ 0x7fffffff);
+ sad_array[4] = (unsigned short)vp9_sad16x16_c(src_ptr, src_stride,
+ ref_ptr + 4, ref_stride,
+ 0x7fffffff);
+ sad_array[5] = (unsigned short)vp9_sad16x16_c(src_ptr, src_stride,
+ ref_ptr + 5, ref_stride,
+ 0x7fffffff);
+ sad_array[6] = (unsigned short)vp9_sad16x16_c(src_ptr, src_stride,
+ ref_ptr + 6, ref_stride,
+ 0x7fffffff);
+ sad_array[7] = (unsigned short)vp9_sad16x16_c(src_ptr, src_stride,
+ ref_ptr + 7, ref_stride,
+ 0x7fffffff);
+}
+
+void vp9_sad16x8x3_c(const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned int *sad_array) {
+ sad_array[0] = vp9_sad16x8_c(src_ptr, src_stride,
+ ref_ptr, ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad16x8_c(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad16x8_c(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride, 0x7fffffff);
+}
+
+void vp9_sad16x8x8_c(const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned short *sad_array) {
+ sad_array[0] = (unsigned short)vp9_sad16x8_c(src_ptr, src_stride,
+ ref_ptr, ref_stride,
+ 0x7fffffff);
+ sad_array[1] = (unsigned short)vp9_sad16x8_c(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride,
+ 0x7fffffff);
+ sad_array[2] = (unsigned short)vp9_sad16x8_c(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride,
+ 0x7fffffff);
+ sad_array[3] = (unsigned short)vp9_sad16x8_c(src_ptr, src_stride,
+ ref_ptr + 3, ref_stride,
+ 0x7fffffff);
+ sad_array[4] = (unsigned short)vp9_sad16x8_c(src_ptr, src_stride,
+ ref_ptr + 4, ref_stride,
+ 0x7fffffff);
+ sad_array[5] = (unsigned short)vp9_sad16x8_c(src_ptr, src_stride,
+ ref_ptr + 5, ref_stride,
+ 0x7fffffff);
+ sad_array[6] = (unsigned short)vp9_sad16x8_c(src_ptr, src_stride,
+ ref_ptr + 6, ref_stride,
+ 0x7fffffff);
+ sad_array[7] = (unsigned short)vp9_sad16x8_c(src_ptr, src_stride,
+ ref_ptr + 7, ref_stride,
+ 0x7fffffff);
+}
+
+void vp9_sad8x8x3_c(const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned int *sad_array) {
+ sad_array[0] = vp9_sad8x8_c(src_ptr, src_stride,
+ ref_ptr, ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad8x8_c(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad8x8_c(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride, 0x7fffffff);
+}
+
+void vp9_sad8x8x8_c(const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned short *sad_array) {
+ sad_array[0] = (unsigned short)vp9_sad8x8_c(src_ptr, src_stride,
+ ref_ptr, ref_stride,
+ 0x7fffffff);
+ sad_array[1] = (unsigned short)vp9_sad8x8_c(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride,
+ 0x7fffffff);
+ sad_array[2] = (unsigned short)vp9_sad8x8_c(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride,
+ 0x7fffffff);
+ sad_array[3] = (unsigned short)vp9_sad8x8_c(src_ptr, src_stride,
+ ref_ptr + 3, ref_stride,
+ 0x7fffffff);
+ sad_array[4] = (unsigned short)vp9_sad8x8_c(src_ptr, src_stride,
+ ref_ptr + 4, ref_stride,
+ 0x7fffffff);
+ sad_array[5] = (unsigned short)vp9_sad8x8_c(src_ptr, src_stride,
+ ref_ptr + 5, ref_stride,
+ 0x7fffffff);
+ sad_array[6] = (unsigned short)vp9_sad8x8_c(src_ptr, src_stride,
+ ref_ptr + 6, ref_stride,
+ 0x7fffffff);
+ sad_array[7] = (unsigned short)vp9_sad8x8_c(src_ptr, src_stride,
+ ref_ptr + 7, ref_stride,
+ 0x7fffffff);
+}
+
+void vp9_sad8x16x3_c(const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned int *sad_array) {
+ sad_array[0] = vp9_sad8x16_c(src_ptr, src_stride,
+ ref_ptr, ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad8x16_c(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad8x16_c(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride, 0x7fffffff);
+}
+
+void vp9_sad8x16x8_c(const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned short *sad_array) {
+ sad_array[0] = (unsigned short)vp9_sad8x16_c(src_ptr, src_stride,
+ ref_ptr, ref_stride,
+ 0x7fffffff);
+ sad_array[1] = (unsigned short)vp9_sad8x16_c(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride,
+ 0x7fffffff);
+ sad_array[2] = (unsigned short)vp9_sad8x16_c(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride,
+ 0x7fffffff);
+ sad_array[3] = (unsigned short)vp9_sad8x16_c(src_ptr, src_stride,
+ ref_ptr + 3, ref_stride,
+ 0x7fffffff);
+ sad_array[4] = (unsigned short)vp9_sad8x16_c(src_ptr, src_stride,
+ ref_ptr + 4, ref_stride,
+ 0x7fffffff);
+ sad_array[5] = (unsigned short)vp9_sad8x16_c(src_ptr, src_stride,
+ ref_ptr + 5, ref_stride,
+ 0x7fffffff);
+ sad_array[6] = (unsigned short)vp9_sad8x16_c(src_ptr, src_stride,
+ ref_ptr + 6, ref_stride,
+ 0x7fffffff);
+ sad_array[7] = (unsigned short)vp9_sad8x16_c(src_ptr, src_stride,
+ ref_ptr + 7, ref_stride,
+ 0x7fffffff);
+}
+
+void vp9_sad4x4x3_c(const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned int *sad_array) {
+ sad_array[0] = vp9_sad4x4_c(src_ptr, src_stride,
+ ref_ptr, ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad4x4_c(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad4x4_c(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride, 0x7fffffff);
+}
+
+void vp9_sad4x4x8_c(const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned short *sad_array) {
+ sad_array[0] = (unsigned short)vp9_sad4x4_c(src_ptr, src_stride,
+ ref_ptr, ref_stride,
+ 0x7fffffff);
+ sad_array[1] = (unsigned short)vp9_sad4x4_c(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride,
+ 0x7fffffff);
+ sad_array[2] = (unsigned short)vp9_sad4x4_c(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride,
+ 0x7fffffff);
+ sad_array[3] = (unsigned short)vp9_sad4x4_c(src_ptr, src_stride,
+ ref_ptr + 3, ref_stride,
+ 0x7fffffff);
+ sad_array[4] = (unsigned short)vp9_sad4x4_c(src_ptr, src_stride,
+ ref_ptr + 4, ref_stride,
+ 0x7fffffff);
+ sad_array[5] = (unsigned short)vp9_sad4x4_c(src_ptr, src_stride,
+ ref_ptr + 5, ref_stride,
+ 0x7fffffff);
+ sad_array[6] = (unsigned short)vp9_sad4x4_c(src_ptr, src_stride,
+ ref_ptr + 6, ref_stride,
+ 0x7fffffff);
+ sad_array[7] = (unsigned short)vp9_sad4x4_c(src_ptr, src_stride,
+ ref_ptr + 7, ref_stride,
+ 0x7fffffff);
+}
+
+void vp9_sad32x32x4d_c(const unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr[],
+ int ref_stride,
+ unsigned int *sad_array
+ ) {
+ sad_array[0] = vp9_sad32x32_c(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad32x32_c(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad32x32_c(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad32x32_c(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
+}
+
+void vp9_sad16x16x4d_c(const unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr[],
+ int ref_stride,
+ unsigned int *sad_array) {
+ sad_array[0] = vp9_sad16x16_c(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad16x16_c(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad16x16_c(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad16x16_c(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
+}
+
+void vp9_sad16x8x4d_c(const unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr[],
+ int ref_stride,
+ unsigned int *sad_array) {
+ sad_array[0] = vp9_sad16x8_c(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad16x8_c(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad16x8_c(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad16x8_c(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
+}
+
+void vp9_sad8x8x4d_c(const unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr[],
+ int ref_stride,
+ unsigned int *sad_array) {
+ sad_array[0] = vp9_sad8x8_c(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad8x8_c(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad8x8_c(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad8x8_c(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
+}
+
+void vp9_sad8x16x4d_c(const unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr[],
+ int ref_stride,
+ unsigned int *sad_array) {
+ sad_array[0] = vp9_sad8x16_c(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad8x16_c(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad8x16_c(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad8x16_c(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
+}
+
+void vp9_sad4x4x4d_c(const unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr[],
+ int ref_stride,
+ unsigned int *sad_array) {
+ sad_array[0] = vp9_sad4x4_c(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad4x4_c(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad4x4_c(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad4x4_c(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
+}
+
+/* Copy 2 macroblocks to a buffer */
+void vp9_copy32xn_c(unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *dst_ptr,
+ int dst_stride,
+ int height) {
+ int r;
+
+ for (r = 0; r < height; r++) {
+#if !(CONFIG_FAST_UNALIGNED)
+ dst_ptr[0] = src_ptr[0];
+ dst_ptr[1] = src_ptr[1];
+ dst_ptr[2] = src_ptr[2];
+ dst_ptr[3] = src_ptr[3];
+ dst_ptr[4] = src_ptr[4];
+ dst_ptr[5] = src_ptr[5];
+ dst_ptr[6] = src_ptr[6];
+ dst_ptr[7] = src_ptr[7];
+ dst_ptr[8] = src_ptr[8];
+ dst_ptr[9] = src_ptr[9];
+ dst_ptr[10] = src_ptr[10];
+ dst_ptr[11] = src_ptr[11];
+ dst_ptr[12] = src_ptr[12];
+ dst_ptr[13] = src_ptr[13];
+ dst_ptr[14] = src_ptr[14];
+ dst_ptr[15] = src_ptr[15];
+ dst_ptr[16] = src_ptr[16];
+ dst_ptr[17] = src_ptr[17];
+ dst_ptr[18] = src_ptr[18];
+ dst_ptr[19] = src_ptr[19];
+ dst_ptr[20] = src_ptr[20];
+ dst_ptr[21] = src_ptr[21];
+ dst_ptr[22] = src_ptr[22];
+ dst_ptr[23] = src_ptr[23];
+ dst_ptr[24] = src_ptr[24];
+ dst_ptr[25] = src_ptr[25];
+ dst_ptr[26] = src_ptr[26];
+ dst_ptr[27] = src_ptr[27];
+ dst_ptr[28] = src_ptr[28];
+ dst_ptr[29] = src_ptr[29];
+ dst_ptr[30] = src_ptr[30];
+ dst_ptr[31] = src_ptr[31];
+#else
+ ((uint32_t *)dst_ptr)[0] = ((uint32_t *)src_ptr)[0];
+ ((uint32_t *)dst_ptr)[1] = ((uint32_t *)src_ptr)[1];
+ ((uint32_t *)dst_ptr)[2] = ((uint32_t *)src_ptr)[2];
+ ((uint32_t *)dst_ptr)[3] = ((uint32_t *)src_ptr)[3];
+ ((uint32_t *)dst_ptr)[4] = ((uint32_t *)src_ptr)[4];
+ ((uint32_t *)dst_ptr)[5] = ((uint32_t *)src_ptr)[5];
+ ((uint32_t *)dst_ptr)[6] = ((uint32_t *)src_ptr)[6];
+ ((uint32_t *)dst_ptr)[7] = ((uint32_t *)src_ptr)[7];
+#endif
+ src_ptr += src_stride;
+ dst_ptr += dst_stride;
+
+ }
+}
diff --git a/vp9/encoder/vp9_satd_c.c b/vp9/encoder/vp9_satd_c.c
new file mode 100644
index 0000000..63944f0
--- /dev/null
+++ b/vp9/encoder/vp9_satd_c.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include "vpx_ports/mem.h"
+#include "./vp9_rtcd.h"
+unsigned int vp9_satd16x16_c(const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned int *psatd) {
+ int r, c, i;
+ unsigned int satd = 0;
+ DECLARE_ALIGNED(16, short, diff_in[256]);
+ DECLARE_ALIGNED(16, short, diff_out[16]);
+ short *in;
+
+ for (r = 0; r < 16; r++) {
+ for (c = 0; c < 16; c++) {
+ diff_in[r * 16 + c] = src_ptr[c] - ref_ptr[c];
+ }
+ src_ptr += src_stride;
+ ref_ptr += ref_stride;
+ }
+
+ in = diff_in;
+ for (r = 0; r < 16; r += 4) {
+ for (c = 0; c < 16; c += 4) {
+ vp9_short_walsh4x4_c(in + c, diff_out, 32);
+ for (i = 0; i < 16; i++)
+ satd += abs(diff_out[i]);
+ }
+ in += 64;
+ }
+
+ if (psatd)
+ *psatd = satd;
+
+ return satd;
+}
diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c
new file mode 100644
index 0000000..5e8d94c
--- /dev/null
+++ b/vp9/encoder/vp9_segmentation.c
@@ -0,0 +1,335 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "limits.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vp9/encoder/vp9_segmentation.h"
+#include "vp9/common/vp9_pred_common.h"
+
+void vp9_update_gf_useage_maps(VP9_COMP *cpi, VP9_COMMON *cm, MACROBLOCK *x) {
+ int mb_row, mb_col;
+
+ MODE_INFO *this_mb_mode_info = cm->mi;
+
+ x->gf_active_ptr = (signed char *)cpi->gf_active_flags;
+
+ if ((cm->frame_type == KEY_FRAME) || (cm->refresh_golden_frame)) {
+ // Reset Gf useage monitors
+ vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
+ cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
+ } else {
+ // for each macroblock row in image
+ for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
+ // for each macroblock col in image
+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
+
+ // If using golden then set GF active flag if not already set.
+ // If using last frame 0,0 mode then leave flag as it is
+ // else if using non 0,0 motion or intra modes then clear
+ // flag if it is currently set
+ if ((this_mb_mode_info->mbmi.ref_frame == GOLDEN_FRAME) ||
+ (this_mb_mode_info->mbmi.ref_frame == ALTREF_FRAME)) {
+ if (*(x->gf_active_ptr) == 0) {
+ *(x->gf_active_ptr) = 1;
+ cpi->gf_active_count++;
+ }
+ } else if ((this_mb_mode_info->mbmi.mode != ZEROMV) &&
+ *(x->gf_active_ptr)) {
+ *(x->gf_active_ptr) = 0;
+ cpi->gf_active_count--;
+ }
+
+ x->gf_active_ptr++; // Step onto next entry
+ this_mb_mode_info++; // skip to next mb
+
+ }
+
+ // this is to account for the border
+ this_mb_mode_info++;
+ }
+ }
+}
+
+void vp9_enable_segmentation(VP9_PTR ptr) {
+ VP9_COMP *cpi = (VP9_COMP *)(ptr);
+
+ // Set the appropriate feature bit
+ cpi->mb.e_mbd.segmentation_enabled = 1;
+ cpi->mb.e_mbd.update_mb_segmentation_map = 1;
+ cpi->mb.e_mbd.update_mb_segmentation_data = 1;
+}
+
+void vp9_disable_segmentation(VP9_PTR ptr) {
+ VP9_COMP *cpi = (VP9_COMP *)(ptr);
+
+ // Clear the appropriate feature bit
+ cpi->mb.e_mbd.segmentation_enabled = 0;
+}
+
+void vp9_set_segmentation_map(VP9_PTR ptr,
+ unsigned char *segmentation_map) {
+ VP9_COMP *cpi = (VP9_COMP *)(ptr);
+
+ // Copy in the new segmentation map
+ vpx_memcpy(cpi->segmentation_map, segmentation_map,
+ (cpi->common.mb_rows * cpi->common.mb_cols));
+
+ // Signal that the map should be updated.
+ cpi->mb.e_mbd.update_mb_segmentation_map = 1;
+ cpi->mb.e_mbd.update_mb_segmentation_data = 1;
+}
+
+void vp9_set_segment_data(VP9_PTR ptr,
+ signed char *feature_data,
+ unsigned char abs_delta) {
+ VP9_COMP *cpi = (VP9_COMP *)(ptr);
+
+ cpi->mb.e_mbd.mb_segment_abs_delta = abs_delta;
+
+ vpx_memcpy(cpi->mb.e_mbd.segment_feature_data, feature_data,
+ sizeof(cpi->mb.e_mbd.segment_feature_data));
+
+ // TBD ?? Set the feature mask
+ // vpx_memcpy(cpi->mb.e_mbd.segment_feature_mask, 0,
+ // sizeof(cpi->mb.e_mbd.segment_feature_mask));
+}
+
+// Based on set of segment counts calculate a probability tree
+static void calc_segtree_probs(MACROBLOCKD *xd,
+ int *segcounts,
+ vp9_prob *segment_tree_probs) {
+ int count1, count2;
+ int tot_count;
+ int i;
+
+ // Blank the strtucture to start with
+ vpx_memset(segment_tree_probs, 0,
+ MB_FEATURE_TREE_PROBS * sizeof(*segment_tree_probs));
+
+ // Total count for all segments
+ count1 = segcounts[0] + segcounts[1];
+ count2 = segcounts[2] + segcounts[3];
+ tot_count = count1 + count2;
+
+ // Work out probabilities of each segment
+ if (tot_count)
+ segment_tree_probs[0] = (count1 * 255) / tot_count;
+ if (count1 > 0)
+ segment_tree_probs[1] = (segcounts[0] * 255) / count1;
+ if (count2 > 0)
+ segment_tree_probs[2] = (segcounts[2] * 255) / count2;
+
+ // Clamp probabilities to minimum allowed value
+ for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) {
+ if (segment_tree_probs[i] == 0)
+ segment_tree_probs[i] = 1;
+ }
+}
+
+// Based on set of segment counts and probabilities calculate a cost estimate
+static int cost_segmap(MACROBLOCKD *xd,
+ int *segcounts,
+ vp9_prob *probs) {
+ int cost;
+ int count1, count2;
+
+ // Cost the top node of the tree
+ count1 = segcounts[0] + segcounts[1];
+ count2 = segcounts[2] + segcounts[3];
+ cost = count1 * vp9_cost_zero(probs[0]) +
+ count2 * vp9_cost_one(probs[0]);
+
+ // Now add the cost of each individual segment branch
+ if (count1 > 0)
+ cost += segcounts[0] * vp9_cost_zero(probs[1]) +
+ segcounts[1] * vp9_cost_one(probs[1]);
+
+ if (count2 > 0)
+ cost += segcounts[2] * vp9_cost_zero(probs[2]) +
+ segcounts[3] * vp9_cost_one(probs[2]);
+
+ return cost;
+
+}
+
+void vp9_choose_segmap_coding_method(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+
+ int i;
+ int tot_count;
+ int no_pred_cost;
+ int t_pred_cost = INT_MAX;
+ int pred_context;
+
+ int mb_row, mb_col;
+ int segmap_index = 0;
+ unsigned char segment_id;
+
+ int temporal_predictor_count[PREDICTION_PROBS][2];
+ int no_pred_segcounts[MAX_MB_SEGMENTS];
+ int t_unpred_seg_counts[MAX_MB_SEGMENTS];
+
+ vp9_prob no_pred_tree[MB_FEATURE_TREE_PROBS];
+ vp9_prob t_pred_tree[MB_FEATURE_TREE_PROBS];
+ vp9_prob t_nopred_prob[PREDICTION_PROBS];
+
+#if CONFIG_SUPERBLOCKS
+ const int mis = cm->mode_info_stride;
+#endif
+
+ // Set default state for the segment tree probabilities and the
+ // temporal coding probabilities
+ vpx_memset(xd->mb_segment_tree_probs, 255,
+ sizeof(xd->mb_segment_tree_probs));
+ vpx_memset(cm->segment_pred_probs, 255,
+ sizeof(cm->segment_pred_probs));
+
+ vpx_memset(no_pred_segcounts, 0, sizeof(no_pred_segcounts));
+ vpx_memset(t_unpred_seg_counts, 0, sizeof(t_unpred_seg_counts));
+ vpx_memset(temporal_predictor_count, 0, sizeof(temporal_predictor_count));
+
+ // First of all generate stats regarding how well the last segment map
+ // predicts this one
+
+ // Initialize macroblock decoder mode info context for the first mb
+ // in the frame
+ xd->mode_info_context = cm->mi;
+
+ for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 2) {
+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col += 2) {
+ for (i = 0; i < 4; i++) {
+ static const int dx[4] = { +1, -1, +1, +1 };
+ static const int dy[4] = { 0, +1, 0, -1 };
+ int x_idx = i & 1, y_idx = i >> 1;
+
+ if (mb_col + x_idx >= cm->mb_cols ||
+ mb_row + y_idx >= cm->mb_rows) {
+ goto end;
+ }
+
+ xd->mb_to_top_edge = -((mb_row * 16) << 3);
+ xd->mb_to_left_edge = -((mb_col * 16) << 3);
+
+ segmap_index = (mb_row + y_idx) * cm->mb_cols + mb_col + x_idx;
+ segment_id = xd->mode_info_context->mbmi.segment_id;
+#if CONFIG_SUPERBLOCKS
+ if (xd->mode_info_context->mbmi.encoded_as_sb) {
+ if (mb_col + 1 < cm->mb_cols)
+ segment_id = segment_id &&
+ xd->mode_info_context[1].mbmi.segment_id;
+ if (mb_row + 1 < cm->mb_rows) {
+ segment_id = segment_id &&
+ xd->mode_info_context[mis].mbmi.segment_id;
+ if (mb_col + 1 < cm->mb_cols)
+ segment_id = segment_id &&
+ xd->mode_info_context[mis + 1].mbmi.segment_id;
+ }
+ xd->mb_to_bottom_edge = ((cm->mb_rows - 2 - mb_row) * 16) << 3;
+ xd->mb_to_right_edge = ((cm->mb_cols - 2 - mb_col) * 16) << 3;
+ } else {
+#endif
+ xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
+ xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
+#if CONFIG_SUPERBLOCKS
+ }
+#endif
+
+ // Count the number of hits on each segment with no prediction
+ no_pred_segcounts[segment_id]++;
+
+ // Temporal prediction not allowed on key frames
+ if (cm->frame_type != KEY_FRAME) {
+ // Test to see if the segment id matches the predicted value.
+ int seg_predicted =
+ (segment_id == vp9_get_pred_mb_segid(cm, xd, segmap_index));
+
+ // Get the segment id prediction context
+ pred_context =
+ vp9_get_pred_context(cm, xd, PRED_SEG_ID);
+
+ // Store the prediction status for this mb and update counts
+ // as appropriate
+ vp9_set_pred_flag(xd, PRED_SEG_ID, seg_predicted);
+ temporal_predictor_count[pred_context][seg_predicted]++;
+
+ if (!seg_predicted)
+ // Update the "unpredicted" segment count
+ t_unpred_seg_counts[segment_id]++;
+ }
+
+#if CONFIG_SUPERBLOCKS
+ if (xd->mode_info_context->mbmi.encoded_as_sb) {
+ assert(!i);
+ xd->mode_info_context += 2;
+ break;
+ }
+#endif
+ end:
+ xd->mode_info_context += dx[i] + dy[i] * cm->mode_info_stride;
+ }
+ }
+
+ // this is to account for the border in mode_info_context
+ xd->mode_info_context -= mb_col;
+ xd->mode_info_context += cm->mode_info_stride * 2;
+ }
+
+ // Work out probability tree for coding segments without prediction
+ // and the cost.
+ calc_segtree_probs(xd, no_pred_segcounts, no_pred_tree);
+ no_pred_cost = cost_segmap(xd, no_pred_segcounts, no_pred_tree);
+
+ // Key frames cannot use temporal prediction
+ if (cm->frame_type != KEY_FRAME) {
+ // Work out probability tree for coding those segments not
+ // predicted using the temporal method and the cost.
+ calc_segtree_probs(xd, t_unpred_seg_counts, t_pred_tree);
+ t_pred_cost = cost_segmap(xd, t_unpred_seg_counts, t_pred_tree);
+
+ // Add in the cost of the signalling for each prediction context
+ for (i = 0; i < PREDICTION_PROBS; i++) {
+ tot_count = temporal_predictor_count[i][0] +
+ temporal_predictor_count[i][1];
+
+ // Work out the context probabilities for the segment
+ // prediction flag
+ if (tot_count) {
+ t_nopred_prob[i] = (temporal_predictor_count[i][0] * 255) /
+ tot_count;
+
+ // Clamp to minimum allowed value
+ if (t_nopred_prob[i] < 1)
+ t_nopred_prob[i] = 1;
+ } else
+ t_nopred_prob[i] = 1;
+
+ // Add in the predictor signaling cost
+ t_pred_cost += (temporal_predictor_count[i][0] *
+ vp9_cost_zero(t_nopred_prob[i])) +
+ (temporal_predictor_count[i][1] *
+ vp9_cost_one(t_nopred_prob[i]));
+ }
+ }
+
+ // Now choose which coding method to use.
+ if (t_pred_cost < no_pred_cost) {
+ cm->temporal_update = 1;
+ vpx_memcpy(xd->mb_segment_tree_probs,
+ t_pred_tree, sizeof(t_pred_tree));
+ vpx_memcpy(&cm->segment_pred_probs,
+ t_nopred_prob, sizeof(t_nopred_prob));
+ } else {
+ cm->temporal_update = 0;
+ vpx_memcpy(xd->mb_segment_tree_probs,
+ no_pred_tree, sizeof(no_pred_tree));
+ }
+}
diff --git a/vp9/encoder/vp9_segmentation.h b/vp9/encoder/vp9_segmentation.h
new file mode 100644
index 0000000..493a767
--- /dev/null
+++ b/vp9/encoder/vp9_segmentation.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "string.h"
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/encoder/vp9_onyx_int.h"
+
+#ifndef VP9_ENCODER_VP9_SEGMENTATION_H_
+#define VP9_ENCODER_VP9_SEGMENTATION_H_
+
+extern void vp9_update_gf_useage_maps(VP9_COMP *cpi, VP9_COMMON *cm,
+ MACROBLOCK *x);
+
+extern void vp9_enable_segmentation(VP9_PTR ptr);
+extern void vp9_disable_segmentation(VP9_PTR ptr);
+
+// Valid values for a segment are 0 to 3
+// Segmentation map is arrange as [Rows][Columns]
+extern void vp9_set_segmentation_map(VP9_PTR ptr,
+ unsigned char *segmentation_map);
+
+// The values given for each segment can be either deltas (from the default
+// value chosen for the frame) or absolute values.
+//
+// Valid range for abs values is (0-127 for MB_LVL_ALT_Q), (0-63 for
+// SEGMENT_ALT_LF)
+// Valid range for delta values are (+/-127 for MB_LVL_ALT_Q), (+/-63 for
+// SEGMENT_ALT_LF)
+//
+// abs_delta = SEGMENT_DELTADATA (deltas) abs_delta = SEGMENT_ABSDATA (use
+// the absolute values given).
+//
+extern void vp9_set_segment_data(VP9_PTR ptr, signed char *feature_data,
+ unsigned char abs_delta);
+
+extern void vp9_choose_segmap_coding_method(VP9_COMP *cpi);
+
+#endif /* __INC_SEGMENTATION_H__ */
diff --git a/vp9/encoder/vp9_ssim.c b/vp9/encoder/vp9_ssim.c
new file mode 100644
index 0000000..4cbb914
--- /dev/null
+++ b/vp9/encoder/vp9_ssim.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/encoder/vp9_onyx_int.h"
+
+void vp9_ssim_parms_16x16_c(unsigned char *s, int sp, unsigned char *r,
+ int rp, unsigned long *sum_s, unsigned long *sum_r,
+ unsigned long *sum_sq_s, unsigned long *sum_sq_r,
+ unsigned long *sum_sxr) {
+ int i, j;
+ for (i = 0; i < 16; i++, s += sp, r += rp) {
+ for (j = 0; j < 16; j++) {
+ *sum_s += s[j];
+ *sum_r += r[j];
+ *sum_sq_s += s[j] * s[j];
+ *sum_sq_r += r[j] * r[j];
+ *sum_sxr += s[j] * r[j];
+ }
+ }
+}
+void vp9_ssim_parms_8x8_c(unsigned char *s, int sp, unsigned char *r, int rp,
+ unsigned long *sum_s, unsigned long *sum_r,
+ unsigned long *sum_sq_s, unsigned long *sum_sq_r,
+ unsigned long *sum_sxr) {
+ int i, j;
+ for (i = 0; i < 8; i++, s += sp, r += rp) {
+ for (j = 0; j < 8; j++) {
+ *sum_s += s[j];
+ *sum_r += r[j];
+ *sum_sq_s += s[j] * s[j];
+ *sum_sq_r += r[j] * r[j];
+ *sum_sxr += s[j] * r[j];
+ }
+ }
+}
+
+const static int64_t cc1 = 26634; // (64^2*(.01*255)^2
+const static int64_t cc2 = 239708; // (64^2*(.03*255)^2
+
+static double similarity(unsigned long sum_s, unsigned long sum_r,
+ unsigned long sum_sq_s, unsigned long sum_sq_r,
+ unsigned long sum_sxr, int count) {
+ int64_t ssim_n, ssim_d;
+ int64_t c1, c2;
+
+ // scale the constants by number of pixels
+ c1 = (cc1 * count * count) >> 12;
+ c2 = (cc2 * count * count) >> 12;
+
+ ssim_n = (2 * sum_s * sum_r + c1) * ((int64_t) 2 * count * sum_sxr -
+ (int64_t) 2 * sum_s * sum_r + c2);
+
+ ssim_d = (sum_s * sum_s + sum_r * sum_r + c1) *
+ ((int64_t)count * sum_sq_s - (int64_t)sum_s * sum_s +
+ (int64_t)count * sum_sq_r - (int64_t) sum_r * sum_r + c2);
+
+ return ssim_n * 1.0 / ssim_d;
+}
+
+static double ssim_16x16(unsigned char *s, int sp, unsigned char *r, int rp) {
+ unsigned long sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
+ vp9_ssim_parms_16x16(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
+ &sum_sxr);
+ return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 256);
+}
+static double ssim_8x8(unsigned char *s, int sp, unsigned char *r, int rp) {
+ unsigned long sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
+ vp9_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
+ &sum_sxr);
+ return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64);
+}
+
+// We are using a 8x8 moving window with starting location of each 8x8 window
+// on the 4x4 pixel grid. Such arrangement allows the windows to overlap
+// block boundaries to penalize blocking artifacts.
+double vp9_ssim2(unsigned char *img1, unsigned char *img2, int stride_img1,
+ int stride_img2, int width, int height) {
+ int i, j;
+ int samples = 0;
+ double ssim_total = 0;
+
+ // sample point start with each 4x4 location
+ for (i = 0; i < height - 8; i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) {
+ for (j = 0; j < width - 8; j += 4) {
+ double v = ssim_8x8(img1 + j, stride_img1, img2 + j, stride_img2);
+ ssim_total += v;
+ samples++;
+ }
+ }
+ ssim_total /= samples;
+ return ssim_total;
+}
+double vp9_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+ int lumamask, double *weight) {
+ double a, b, c;
+ double ssimv;
+
+ a = vp9_ssim2(source->y_buffer, dest->y_buffer,
+ source->y_stride, dest->y_stride, source->y_width,
+ source->y_height);
+
+ b = vp9_ssim2(source->u_buffer, dest->u_buffer,
+ source->uv_stride, dest->uv_stride, source->uv_width,
+ source->uv_height);
+
+ c = vp9_ssim2(source->v_buffer, dest->v_buffer,
+ source->uv_stride, dest->uv_stride, source->uv_width,
+ source->uv_height);
+
+ ssimv = a * .8 + .1 * (b + c);
+
+ *weight = 1;
+
+ return ssimv;
+}
+
+double vp9_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+ double *ssim_y, double *ssim_u, double *ssim_v) {
+ double ssim_all = 0;
+ double a, b, c;
+
+ a = vp9_ssim2(source->y_buffer, dest->y_buffer,
+ source->y_stride, dest->y_stride, source->y_width,
+ source->y_height);
+
+ b = vp9_ssim2(source->u_buffer, dest->u_buffer,
+ source->uv_stride, dest->uv_stride, source->uv_width,
+ source->uv_height);
+
+ c = vp9_ssim2(source->v_buffer, dest->v_buffer,
+ source->uv_stride, dest->uv_stride, source->uv_width,
+ source->uv_height);
+ *ssim_y = a;
+ *ssim_u = b;
+ *ssim_v = c;
+ ssim_all = (a * 4 + b + c) / 6;
+
+ return ssim_all;
+}
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
new file mode 100644
index 0000000..57253bd
--- /dev/null
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -0,0 +1,494 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/common/vp9_systemdependent.h"
+#include "vp9/encoder/vp9_quantize.h"
+#include "vp9/common/vp9_alloccommon.h"
+#include "vp9/encoder/vp9_mcomp.h"
+#include "vp9/encoder/vp9_firstpass.h"
+#include "vp9/encoder/vp9_psnr.h"
+#include "vpx_scale/vpxscale.h"
+#include "vp9/common/vp9_extend.h"
+#include "vp9/encoder/vp9_ratectrl.h"
+#include "vp9/common/vp9_quant_common.h"
+#include "vp9/encoder/vp9_segmentation.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vp9/common/vp9_swapyv12buffer.h"
+#include "vpx_ports/vpx_timer.h"
+
+#include <math.h>
+#include <limits.h>
+
+#define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering
+#define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering
+
+#if VP9_TEMPORAL_ALT_REF
+
+
+static void temporal_filter_predictors_mb_c
+(
+ MACROBLOCKD *xd,
+ unsigned char *y_mb_ptr,
+ unsigned char *u_mb_ptr,
+ unsigned char *v_mb_ptr,
+ int stride,
+ int mv_row,
+ int mv_col,
+ unsigned char *pred
+) {
+ int offset;
+ unsigned char *yptr, *uptr, *vptr;
+ int omv_row, omv_col;
+
+ // Y
+ yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3);
+
+ if ((mv_row | mv_col) & 7) {
+ xd->subpixel_predict16x16(yptr, stride,
+ (mv_col & 7) << 1, (mv_row & 7) << 1, &pred[0], 16);
+ } else {
+ vp9_copy_mem16x16(yptr, stride, &pred[0], 16);
+ }
+
+ // U & V
+ omv_row = mv_row;
+ omv_col = mv_col;
+ mv_row >>= 1;
+ mv_col >>= 1;
+ stride = (stride + 1) >> 1;
+ offset = (mv_row >> 3) * stride + (mv_col >> 3);
+ uptr = u_mb_ptr + offset;
+ vptr = v_mb_ptr + offset;
+
+ if ((omv_row | omv_col) & 15) {
+ xd->subpixel_predict8x8(uptr, stride,
+ (omv_col & 15), (omv_row & 15), &pred[256], 8);
+ xd->subpixel_predict8x8(vptr, stride,
+ (omv_col & 15), (omv_row & 15), &pred[320], 8);
+ }
+ else {
+ vp9_copy_mem8x8(uptr, stride, &pred[256], 8);
+ vp9_copy_mem8x8(vptr, stride, &pred[320], 8);
+ }
+}
+void vp9_temporal_filter_apply_c
+(
+ unsigned char *frame1,
+ unsigned int stride,
+ unsigned char *frame2,
+ unsigned int block_size,
+ int strength,
+ int filter_weight,
+ unsigned int *accumulator,
+ unsigned short *count
+) {
+ unsigned int i, j, k;
+ int modifier;
+ int byte = 0;
+
+ for (i = 0, k = 0; i < block_size; i++) {
+ for (j = 0; j < block_size; j++, k++) {
+
+ int src_byte = frame1[byte];
+ int pixel_value = *frame2++;
+
+ modifier = src_byte - pixel_value;
+ // This is an integer approximation of:
+ // float coeff = (3.0 * modifer * modifier) / pow(2, strength);
+ // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff);
+ modifier *= modifier;
+ modifier *= 3;
+ modifier += 1 << (strength - 1);
+ modifier >>= strength;
+
+ if (modifier > 16)
+ modifier = 16;
+
+ modifier = 16 - modifier;
+ modifier *= filter_weight;
+
+ count[k] += modifier;
+ accumulator[k] += modifier * pixel_value;
+
+ byte++;
+ }
+
+ byte += stride - block_size;
+ }
+}
+
+#if ALT_REF_MC_ENABLED
+
+static int temporal_filter_find_matching_mb_c
+(
+ VP9_COMP *cpi,
+ YV12_BUFFER_CONFIG *arf_frame,
+ YV12_BUFFER_CONFIG *frame_ptr,
+ int mb_offset,
+ int error_thresh
+) {
+ MACROBLOCK *x = &cpi->mb;
+ int step_param;
+ int sadpb = x->sadperbit16;
+ int bestsme = INT_MAX;
+
+ BLOCK *b = &x->block[0];
+ BLOCKD *d = &x->e_mbd.block[0];
+ int_mv best_ref_mv1;
+ int_mv best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
+
+ // Save input state
+ unsigned char **base_src = b->base_src;
+ int src = b->src;
+ int src_stride = b->src_stride;
+ unsigned char **base_pre = d->base_pre;
+ int pre = d->pre;
+ int pre_stride = d->pre_stride;
+
+ best_ref_mv1.as_int = 0;
+ best_ref_mv1_full.as_mv.col = best_ref_mv1.as_mv.col >> 3;
+ best_ref_mv1_full.as_mv.row = best_ref_mv1.as_mv.row >> 3;
+
+ // Setup frame pointers
+ b->base_src = &arf_frame->y_buffer;
+ b->src_stride = arf_frame->y_stride;
+ b->src = mb_offset;
+
+ d->base_pre = &frame_ptr->y_buffer;
+ d->pre_stride = frame_ptr->y_stride;
+ d->pre = mb_offset;
+
+ // Further step/diamond searches as necessary
+ if (cpi->Speed < 8) {
+ step_param = cpi->sf.first_step +
+ ((cpi->Speed > 5) ? 1 : 0);
+ } else {
+ step_param = cpi->sf.first_step + 2;
+ }
+
+ /*cpi->sf.search_method == HEX*/
+ // TODO Check that the 16x16 vf & sdf are selected here
+ // Ignore mv costing by sending NULL pointer instead of cost arrays
+ bestsme = vp9_hex_search(x, b, d, &best_ref_mv1_full, &d->bmi.as_mv.first,
+ step_param, sadpb, &cpi->fn_ptr[BLOCK_16X16],
+ NULL, NULL, NULL, NULL,
+ &best_ref_mv1);
+
+#if ALT_REF_SUBPEL_ENABLED
+ // Try sub-pixel MC?
+ // if (bestsme > error_thresh && bestsme < INT_MAX)
+ {
+ int distortion;
+ unsigned int sse;
+ // Ignore mv costing by sending NULL pointer instead of cost array
+ bestsme = cpi->find_fractional_mv_step(x, b, d, &d->bmi.as_mv.first,
+ &best_ref_mv1,
+ x->errorperbit,
+ &cpi->fn_ptr[BLOCK_16X16],
+ NULL, NULL,
+ &distortion, &sse);
+ }
+#endif
+
+ // Save input state
+ b->base_src = base_src;
+ b->src = src;
+ b->src_stride = src_stride;
+ d->base_pre = base_pre;
+ d->pre = pre;
+ d->pre_stride = pre_stride;
+
+ return bestsme;
+}
+#endif
+
+static void temporal_filter_iterate_c
+(
+ VP9_COMP *cpi,
+ int frame_count,
+ int alt_ref_index,
+ int strength
+) {
+ int byte;
+ int frame;
+ int mb_col, mb_row;
+ unsigned int filter_weight;
+ int mb_cols = cpi->common.mb_cols;
+ int mb_rows = cpi->common.mb_rows;
+ int mb_y_offset = 0;
+ int mb_uv_offset = 0;
+ DECLARE_ALIGNED_ARRAY(16, unsigned int, accumulator, 16 * 16 + 8 * 8 + 8 * 8);
+ DECLARE_ALIGNED_ARRAY(16, unsigned short, count, 16 * 16 + 8 * 8 + 8 * 8);
+ MACROBLOCKD *mbd = &cpi->mb.e_mbd;
+ YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index];
+ unsigned char *dst1, *dst2;
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, predictor, 16 * 16 + 8 * 8 + 8 * 8);
+
+ // Save input state
+ unsigned char *y_buffer = mbd->pre.y_buffer;
+ unsigned char *u_buffer = mbd->pre.u_buffer;
+ unsigned char *v_buffer = mbd->pre.v_buffer;
+
+ for (mb_row = 0; mb_row < mb_rows; mb_row++) {
+#if ALT_REF_MC_ENABLED
+ // Source frames are extended to 16 pixels. This is different than
+ // L/A/G reference frames that have a border of 32 (VP9BORDERINPIXELS)
+ // A 6/8 tap filter is used for motion search. This requires 2 pixels
+ // before and 3 pixels after. So the largest Y mv on a border would
+ // then be 16 - VP9_INTERP_EXTEND. The UV blocks are half the size of the
+ // Y and therefore only extended by 8. The largest mv that a UV block
+ // can support is 8 - VP9_INTERP_EXTEND. A UV mv is half of a Y mv.
+ // (16 - VP9_INTERP_EXTEND) >> 1 which is greater than
+ // 8 - VP9_INTERP_EXTEND.
+ // To keep the mv in play for both Y and UV planes the max that it
+ // can be on a border is therefore 16 - (2*VP9_INTERP_EXTEND+1).
+ cpi->mb.mv_row_min = -((mb_row * 16) + (17 - 2 * VP9_INTERP_EXTEND));
+ cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16)
+ + (17 - 2 * VP9_INTERP_EXTEND);
+#endif
+
+ for (mb_col = 0; mb_col < mb_cols; mb_col++) {
+ int i, j, k;
+ int stride;
+
+ vpx_memset(accumulator, 0, 384 * sizeof(unsigned int));
+ vpx_memset(count, 0, 384 * sizeof(unsigned short));
+
+#if ALT_REF_MC_ENABLED
+ cpi->mb.mv_col_min = -((mb_col * 16) + (17 - 2 * VP9_INTERP_EXTEND));
+ cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16)
+ + (17 - 2 * VP9_INTERP_EXTEND);
+#endif
+
+ for (frame = 0; frame < frame_count; frame++) {
+ if (cpi->frames[frame] == NULL)
+ continue;
+
+ mbd->block[0].bmi.as_mv.first.as_mv.row = 0;
+ mbd->block[0].bmi.as_mv.first.as_mv.col = 0;
+
+ if (frame == alt_ref_index) {
+ filter_weight = 2;
+ } else {
+ int err = 0;
+#if ALT_REF_MC_ENABLED
+#define THRESH_LOW 10000
+#define THRESH_HIGH 20000
+
+ // Find best match in this frame by MC
+ err = temporal_filter_find_matching_mb_c
+ (cpi,
+ cpi->frames[alt_ref_index],
+ cpi->frames[frame],
+ mb_y_offset,
+ THRESH_LOW);
+#endif
+ // Assign higher weight to matching MB if it's error
+ // score is lower. If not applying MC default behavior
+ // is to weight all MBs equal.
+ filter_weight = err < THRESH_LOW
+ ? 2 : err < THRESH_HIGH ? 1 : 0;
+ }
+
+ if (filter_weight != 0) {
+ // Construct the predictors
+ temporal_filter_predictors_mb_c
+ (mbd,
+ cpi->frames[frame]->y_buffer + mb_y_offset,
+ cpi->frames[frame]->u_buffer + mb_uv_offset,
+ cpi->frames[frame]->v_buffer + mb_uv_offset,
+ cpi->frames[frame]->y_stride,
+ mbd->block[0].bmi.as_mv.first.as_mv.row,
+ mbd->block[0].bmi.as_mv.first.as_mv.col,
+ predictor);
+
+ // Apply the filter (YUV)
+ vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride,
+ predictor, 16, strength, filter_weight,
+ accumulator, count);
+
+ vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride,
+ predictor + 256, 8, strength, filter_weight,
+ accumulator + 256, count + 256);
+
+ vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride,
+ predictor + 320, 8, strength, filter_weight,
+ accumulator + 320, count + 320);
+ }
+ }
+
+ // Normalize filter output to produce AltRef frame
+ dst1 = cpi->alt_ref_buffer.y_buffer;
+ stride = cpi->alt_ref_buffer.y_stride;
+ byte = mb_y_offset;
+ for (i = 0, k = 0; i < 16; i++) {
+ for (j = 0; j < 16; j++, k++) {
+ unsigned int pval = accumulator[k] + (count[k] >> 1);
+ pval *= cpi->fixed_divide[count[k]];
+ pval >>= 19;
+
+ dst1[byte] = (unsigned char)pval;
+
+ // move to next pixel
+ byte++;
+ }
+
+ byte += stride - 16;
+ }
+
+ dst1 = cpi->alt_ref_buffer.u_buffer;
+ dst2 = cpi->alt_ref_buffer.v_buffer;
+ stride = cpi->alt_ref_buffer.uv_stride;
+ byte = mb_uv_offset;
+ for (i = 0, k = 256; i < 8; i++) {
+ for (j = 0; j < 8; j++, k++) {
+ int m = k + 64;
+
+ // U
+ unsigned int pval = accumulator[k] + (count[k] >> 1);
+ pval *= cpi->fixed_divide[count[k]];
+ pval >>= 19;
+ dst1[byte] = (unsigned char)pval;
+
+ // V
+ pval = accumulator[m] + (count[m] >> 1);
+ pval *= cpi->fixed_divide[count[m]];
+ pval >>= 19;
+ dst2[byte] = (unsigned char)pval;
+
+ // move to next pixel
+ byte++;
+ }
+
+ byte += stride - 8;
+ }
+
+ mb_y_offset += 16;
+ mb_uv_offset += 8;
+ }
+
+ mb_y_offset += 16 * (f->y_stride - mb_cols);
+ mb_uv_offset += 8 * (f->uv_stride - mb_cols);
+ }
+
+ // Restore input state
+ mbd->pre.y_buffer = y_buffer;
+ mbd->pre.u_buffer = u_buffer;
+ mbd->pre.v_buffer = v_buffer;
+}
+
+void vp9_temporal_filter_prepare
+(
+ VP9_COMP *cpi,
+ int distance
+) {
+ int frame = 0;
+
+ int num_frames_backward = 0;
+ int num_frames_forward = 0;
+ int frames_to_blur_backward = 0;
+ int frames_to_blur_forward = 0;
+ int frames_to_blur = 0;
+ int start_frame = 0;
+
+ int strength = cpi->oxcf.arnr_strength;
+
+ int blur_type = cpi->oxcf.arnr_type;
+
+ int max_frames = cpi->active_arnr_frames;
+
+ num_frames_backward = distance;
+ num_frames_forward = vp9_lookahead_depth(cpi->lookahead)
+ - (num_frames_backward + 1);
+
+ switch (blur_type) {
+ case 1:
+ /////////////////////////////////////////
+ // Backward Blur
+
+ frames_to_blur_backward = num_frames_backward;
+
+ if (frames_to_blur_backward >= max_frames)
+ frames_to_blur_backward = max_frames - 1;
+
+ frames_to_blur = frames_to_blur_backward + 1;
+ break;
+
+ case 2:
+ /////////////////////////////////////////
+ // Forward Blur
+
+ frames_to_blur_forward = num_frames_forward;
+
+ if (frames_to_blur_forward >= max_frames)
+ frames_to_blur_forward = max_frames - 1;
+
+ frames_to_blur = frames_to_blur_forward + 1;
+ break;
+
+ case 3:
+ default:
+ /////////////////////////////////////////
+ // Center Blur
+ frames_to_blur_forward = num_frames_forward;
+ frames_to_blur_backward = num_frames_backward;
+
+ if (frames_to_blur_forward > frames_to_blur_backward)
+ frames_to_blur_forward = frames_to_blur_backward;
+
+ if (frames_to_blur_backward > frames_to_blur_forward)
+ frames_to_blur_backward = frames_to_blur_forward;
+
+ // When max_frames is even we have 1 more frame backward than forward
+ if (frames_to_blur_forward > (max_frames - 1) / 2)
+ frames_to_blur_forward = ((max_frames - 1) / 2);
+
+ if (frames_to_blur_backward > (max_frames / 2))
+ frames_to_blur_backward = (max_frames / 2);
+
+ frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1;
+ break;
+ }
+
+ start_frame = distance + frames_to_blur_forward;
+
+#ifdef DEBUGFWG
+ // DEBUG FWG
+ printf("max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d start:%d"
+, max_frames
+, num_frames_backward
+, num_frames_forward
+, frames_to_blur
+, frames_to_blur_backward
+, frames_to_blur_forward
+, cpi->source_encode_index
+, cpi->last_alt_ref_sei
+, start_frame);
+#endif
+
+ // Setup frame pointers, NULL indicates frame not included in filter
+ vpx_memset(cpi->frames, 0, max_frames * sizeof(YV12_BUFFER_CONFIG *));
+ for (frame = 0; frame < frames_to_blur; frame++) {
+ int which_buffer = start_frame - frame;
+ struct lookahead_entry *buf = vp9_lookahead_peek(cpi->lookahead,
+ which_buffer);
+ cpi->frames[frames_to_blur - 1 - frame] = &buf->img;
+ }
+
+ temporal_filter_iterate_c(
+ cpi,
+ frames_to_blur,
+ frames_to_blur_backward,
+ strength);
+}
+#endif
diff --git a/vp9/encoder/vp9_temporal_filter.h b/vp9/encoder/vp9_temporal_filter.h
new file mode 100644
index 0000000..abcb219
--- /dev/null
+++ b/vp9/encoder/vp9_temporal_filter.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_ENCODER_VP9_TEMPORAL_FILTER_H_
+#define VP9_ENCODER_VP9_TEMPORAL_FILTER_H_
+
+extern void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance);
+
+#endif
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
new file mode 100644
index 0000000..44963b2
--- /dev/null
+++ b/vp9/encoder/vp9_tokenize.c
@@ -0,0 +1,887 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_tokenize.h"
+#include "vpx_mem/vpx_mem.h"
+
+#include "vp9/common/vp9_pred_common.h"
+#include "vp9/common/vp9_seg_common.h"
+#include "vp9/common/vp9_entropy.h"
+
+/* Global event counters used for accumulating statistics across several
+ compressions, then generating vp9_context.c = initial stats. */
+
+#ifdef ENTROPY_STATS
+INT64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
+INT64 hybrid_context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
+
+INT64 context_counters_8x8[BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
+INT64 hybrid_context_counters_8x8[BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
+
+INT64 context_counters_16x16[BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
+INT64 hybrid_context_counters_16x16[BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
+
+extern unsigned int tree_update_hist[BLOCK_TYPES][COEF_BANDS]
+ [PREV_COEF_CONTEXTS][ENTROPY_NODES][2];
+extern unsigned int hybrid_tree_update_hist[BLOCK_TYPES][COEF_BANDS]
+ [PREV_COEF_CONTEXTS][ENTROPY_NODES][2];
+extern unsigned int tree_update_hist_8x8[BLOCK_TYPES_8X8][COEF_BANDS]
+ [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2];
+extern unsigned int hybrid_tree_update_hist_8x8[BLOCK_TYPES_8X8][COEF_BANDS]
+ [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2];
+extern unsigned int tree_update_hist_16x16[BLOCK_TYPES_16X16][COEF_BANDS]
+ [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2];
+extern unsigned int hybrid_tree_update_hist_16x16[BLOCK_TYPES_16X16][COEF_BANDS]
+ [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2];
+#endif /* ENTROPY_STATS */
+
+static TOKENVALUE dct_value_tokens[DCT_MAX_VALUE * 2];
+const TOKENVALUE *vp9_dct_value_tokens_ptr;
+static int dct_value_cost[DCT_MAX_VALUE * 2];
+const int *vp9_dct_value_cost_ptr;
+
+static void fill_value_tokens() {
+
+ TOKENVALUE *const t = dct_value_tokens + DCT_MAX_VALUE;
+ vp9_extra_bit_struct *const e = vp9_extra_bits;
+
+ int i = -DCT_MAX_VALUE;
+ int sign = 1;
+
+ do {
+ if (!i)
+ sign = 0;
+
+ {
+ const int a = sign ? -i : i;
+ int eb = sign;
+
+ if (a > 4) {
+ int j = 4;
+
+ while (++j < 11 && e[j].base_val <= a) {}
+
+ t[i].Token = --j;
+ eb |= (a - e[j].base_val) << 1;
+ } else
+ t[i].Token = a;
+
+ t[i].Extra = eb;
+ }
+
+ // initialize the cost for extra bits for all possible coefficient value.
+ {
+ int cost = 0;
+ vp9_extra_bit_struct *p = vp9_extra_bits + t[i].Token;
+
+ if (p->base_val) {
+ const int extra = t[i].Extra;
+ const int Length = p->Len;
+
+ if (Length)
+ cost += treed_cost(p->tree, p->prob, extra >> 1, Length);
+
+ cost += vp9_cost_bit(vp9_prob_half, extra & 1); /* sign */
+ dct_value_cost[i + DCT_MAX_VALUE] = cost;
+ }
+
+ }
+
+ } while (++i < DCT_MAX_VALUE);
+
+ vp9_dct_value_tokens_ptr = dct_value_tokens + DCT_MAX_VALUE;
+ vp9_dct_value_cost_ptr = dct_value_cost + DCT_MAX_VALUE;
+}
+
+static void tokenize_b(VP9_COMP *cpi,
+ MACROBLOCKD *xd,
+ const BLOCKD * const b,
+ TOKENEXTRA **tp,
+ PLANE_TYPE type,
+ ENTROPY_CONTEXT *a,
+ ENTROPY_CONTEXT *l,
+ TX_SIZE tx_size,
+ int dry_run) {
+ int pt; /* near block/prev token context index */
+ int c = (type == PLANE_TYPE_Y_NO_DC) ? 1 : 0;
+ const int eob = b->eob; /* one beyond last nonzero coeff */
+ TOKENEXTRA *t = *tp; /* store tokens starting here */
+ const short *qcoeff_ptr = b->qcoeff;
+ int seg_eob;
+ int segment_id = xd->mode_info_context->mbmi.segment_id;
+ const int *bands, *scan;
+ unsigned int (*counts)[COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
+ vp9_prob (*probs)[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
+ const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
+ get_tx_type(xd, b) : DCT_DCT;
+
+ VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l);
+ switch (tx_size) {
+ default:
+ case TX_4X4:
+ seg_eob = 16;
+ bands = vp9_coef_bands;
+ scan = vp9_default_zig_zag1d;
+ if (tx_type != DCT_DCT) {
+ counts = cpi->hybrid_coef_counts;
+ probs = cpi->common.fc.hybrid_coef_probs;
+ if (tx_type == ADST_DCT) {
+ scan = vp9_row_scan;
+ } else if (tx_type == DCT_ADST) {
+ scan = vp9_col_scan;
+ }
+ } else {
+ counts = cpi->coef_counts;
+ probs = cpi->common.fc.coef_probs;
+ }
+ break;
+ case TX_8X8:
+ if (type == PLANE_TYPE_Y2) {
+ seg_eob = 4;
+ bands = vp9_coef_bands;
+ scan = vp9_default_zig_zag1d;
+ } else {
+ seg_eob = 64;
+ bands = vp9_coef_bands_8x8;
+ scan = vp9_default_zig_zag1d_8x8;
+ }
+ if (tx_type != DCT_DCT) {
+ counts = cpi->hybrid_coef_counts_8x8;
+ probs = cpi->common.fc.hybrid_coef_probs_8x8;
+ } else {
+ counts = cpi->coef_counts_8x8;
+ probs = cpi->common.fc.coef_probs_8x8;
+ }
+ break;
+ case TX_16X16:
+ seg_eob = 256;
+ bands = vp9_coef_bands_16x16;
+ scan = vp9_default_zig_zag1d_16x16;
+ if (tx_type != DCT_DCT) {
+ counts = cpi->hybrid_coef_counts_16x16;
+ probs = cpi->common.fc.hybrid_coef_probs_16x16;
+ } else {
+ counts = cpi->coef_counts_16x16;
+ probs = cpi->common.fc.coef_probs_16x16;
+ }
+ break;
+ }
+
+ if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB))
+ seg_eob = vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
+
+ do {
+ const int band = bands[c];
+ int token;
+
+ if (c < eob) {
+ const int rc = scan[c];
+ const int v = qcoeff_ptr[rc];
+
+ assert(-DCT_MAX_VALUE <= v && v < DCT_MAX_VALUE);
+
+ t->Extra = vp9_dct_value_tokens_ptr[v].Extra;
+ token = vp9_dct_value_tokens_ptr[v].Token;
+ } else {
+ token = DCT_EOB_TOKEN;
+ }
+
+ t->Token = token;
+ t->context_tree = probs[type][band][pt];
+ t->skip_eob_node = (pt == 0) && ((band > 0 && type != PLANE_TYPE_Y_NO_DC) ||
+ (band > 1 && type == PLANE_TYPE_Y_NO_DC));
+ assert(vp9_coef_encodings[t->Token].Len - t->skip_eob_node > 0);
+ if (!dry_run) {
+ ++counts[type][band][pt][token];
+ }
+ pt = vp9_prev_token_class[token];
+ ++t;
+ } while (c < eob && ++c < seg_eob);
+
+ *tp = t;
+ *a = *l = (c > !type); /* 0 <-> all coeff data is zero */
+}
+
+int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd, int has_2nd_order) {
+ int skip = 1;
+ int i = 0;
+
+ if (has_2nd_order) {
+ for (i = 0; i < 16; i++)
+ skip &= (xd->block[i].eob < 2);
+ skip &= (!xd->block[24].eob);
+ } else {
+ for (i = 0; i < 16; i++)
+ skip &= (!xd->block[i].eob);
+ }
+ return skip;
+}
+
+int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd) {
+ int skip = 1;
+ int i;
+
+ for (i = 16; i < 24; i++)
+ skip &= (!xd->block[i].eob);
+ return skip;
+}
+
+static int mb_is_skippable_4x4(MACROBLOCKD *xd, int has_2nd_order) {
+ return (vp9_mby_is_skippable_4x4(xd, has_2nd_order) &
+ vp9_mbuv_is_skippable_4x4(xd));
+}
+
+int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd, int has_2nd_order) {
+ int skip = 1;
+ int i = 0;
+
+ if (has_2nd_order) {
+ for (i = 0; i < 16; i += 4)
+ skip &= (xd->block[i].eob < 2);
+ skip &= (!xd->block[24].eob);
+ } else {
+ for (i = 0; i < 16; i += 4)
+ skip &= (!xd->block[i].eob);
+ }
+ return skip;
+}
+
+int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd) {
+ return (!xd->block[16].eob) & (!xd->block[20].eob);
+}
+
+static int mb_is_skippable_8x8(MACROBLOCKD *xd, int has_2nd_order) {
+ return (vp9_mby_is_skippable_8x8(xd, has_2nd_order) &
+ vp9_mbuv_is_skippable_8x8(xd));
+}
+
+static int mb_is_skippable_8x8_4x4uv(MACROBLOCKD *xd, int has_2nd_order) {
+ return (vp9_mby_is_skippable_8x8(xd, has_2nd_order) &
+ vp9_mbuv_is_skippable_4x4(xd));
+}
+
+int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd) {
+ int skip = 1;
+ skip &= !xd->block[0].eob;
+ return skip;
+}
+
+static int mb_is_skippable_16x16(MACROBLOCKD *xd) {
+ return (vp9_mby_is_skippable_16x16(xd) & vp9_mbuv_is_skippable_8x8(xd));
+}
+
+void vp9_tokenize_mb(VP9_COMP *cpi,
+ MACROBLOCKD *xd,
+ TOKENEXTRA **t,
+ int dry_run) {
+ PLANE_TYPE plane_type;
+ int has_2nd_order;
+ int b;
+ int tx_size = xd->mode_info_context->mbmi.txfm_size;
+ int mb_skip_context = vp9_get_pred_context(&cpi->common, xd, PRED_MBSKIP);
+ TOKENEXTRA *t_backup = *t;
+ ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *) xd->above_context;
+ ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *) xd->left_context;
+
+ // If the MB is going to be skipped because of a segment level flag
+ // exclude this from the skip count stats used to calculate the
+ // transmitted skip probability;
+ int skip_inc;
+ int segment_id = xd->mode_info_context->mbmi.segment_id;
+
+ if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) ||
+ (vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) != 0)) {
+ skip_inc = 1;
+ } else
+ skip_inc = 0;
+
+ has_2nd_order = get_2nd_order_usage(xd);
+
+ switch (tx_size) {
+ case TX_16X16:
+ xd->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable_16x16(xd);
+ break;
+ case TX_8X8:
+ if (xd->mode_info_context->mbmi.mode == I8X8_PRED ||
+ xd->mode_info_context->mbmi.mode == SPLITMV)
+ xd->mode_info_context->mbmi.mb_skip_coeff =
+ mb_is_skippable_8x8_4x4uv(xd, 0);
+ else
+ xd->mode_info_context->mbmi.mb_skip_coeff =
+ mb_is_skippable_8x8(xd, has_2nd_order);
+ break;
+
+ default:
+ xd->mode_info_context->mbmi.mb_skip_coeff =
+ mb_is_skippable_4x4(xd, has_2nd_order);
+ break;
+ }
+
+ if (xd->mode_info_context->mbmi.mb_skip_coeff) {
+ if (!dry_run)
+ cpi->skip_true_count[mb_skip_context] += skip_inc;
+ if (!cpi->common.mb_no_coeff_skip) {
+ vp9_stuff_mb(cpi, xd, t, dry_run);
+ } else {
+ vp9_fix_contexts(xd);
+ }
+ if (dry_run)
+ *t = t_backup;
+ return;
+ }
+
+ if (!dry_run)
+ cpi->skip_false_count[mb_skip_context] += skip_inc;
+
+ if (has_2nd_order) {
+ if (tx_size == TX_8X8) {
+ tokenize_b(cpi, xd, xd->block + 24, t, PLANE_TYPE_Y2,
+ A + vp9_block2above_8x8[24], L + vp9_block2left_8x8[24],
+ TX_8X8, dry_run);
+ } else {
+ tokenize_b(cpi, xd, xd->block + 24, t, PLANE_TYPE_Y2,
+ A + vp9_block2above[24], L + vp9_block2left[24],
+ TX_4X4, dry_run);
+ }
+
+ plane_type = PLANE_TYPE_Y_NO_DC;
+ } else {
+ xd->above_context->y2 = 1;
+ xd->left_context->y2 = 1;
+ plane_type = PLANE_TYPE_Y_WITH_DC;
+ }
+
+ if (tx_size == TX_16X16) {
+ tokenize_b(cpi, xd, xd->block, t, PLANE_TYPE_Y_WITH_DC,
+ A, L, TX_16X16, dry_run);
+ A[1] = A[2] = A[3] = A[0];
+ L[1] = L[2] = L[3] = L[0];
+
+ for (b = 16; b < 24; b += 4) {
+ tokenize_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV,
+ A + vp9_block2above_8x8[b], L + vp9_block2left_8x8[b],
+ TX_8X8, dry_run);
+ A[vp9_block2above_8x8[b] + 1] = A[vp9_block2above_8x8[b]];
+ L[vp9_block2left_8x8[b] + 1] = L[vp9_block2left_8x8[b]];
+ }
+ A[8] = 0;
+ L[8] = 0;
+ } else if (tx_size == TX_8X8) {
+ for (b = 0; b < 16; b += 4) {
+ tokenize_b(cpi, xd, xd->block + b, t, plane_type,
+ A + vp9_block2above_8x8[b], L + vp9_block2left_8x8[b],
+ TX_8X8, dry_run);
+ A[vp9_block2above_8x8[b] + 1] = A[vp9_block2above_8x8[b]];
+ L[vp9_block2left_8x8[b] + 1] = L[vp9_block2left_8x8[b]];
+ }
+ if (xd->mode_info_context->mbmi.mode == I8X8_PRED ||
+ xd->mode_info_context->mbmi.mode == SPLITMV) {
+ for (b = 16; b < 24; b++) {
+ tokenize_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV,
+ A + vp9_block2above[b], L + vp9_block2left[b],
+ TX_4X4, dry_run);
+ }
+ } else {
+ for (b = 16; b < 24; b += 4) {
+ tokenize_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV,
+ A + vp9_block2above_8x8[b], L + vp9_block2left_8x8[b],
+ TX_8X8, dry_run);
+ A[vp9_block2above_8x8[b] + 1] = A[vp9_block2above_8x8[b]];
+ L[vp9_block2left_8x8[b] + 1] = L[vp9_block2left_8x8[b]];
+ }
+ }
+ } else {
+ for (b = 0; b < 16; b++) {
+ tokenize_b(cpi, xd, xd->block + b, t, plane_type,
+ A + vp9_block2above[b], L + vp9_block2left[b],
+ TX_4X4, dry_run);
+ }
+
+ for (b = 16; b < 24; b++) {
+ tokenize_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV,
+ A + vp9_block2above[b], L + vp9_block2left[b],
+ TX_4X4, dry_run);
+ }
+ }
+ if (dry_run)
+ *t = t_backup;
+}
+
+
+#ifdef ENTROPY_STATS
+void init_context_counters(void) {
+ FILE *f = fopen("context.bin", "rb");
+ if (!f) {
+ vpx_memset(context_counters, 0, sizeof(context_counters));
+ vpx_memset(context_counters_8x8, 0, sizeof(context_counters_8x8));
+ vpx_memset(context_counters_16x16, 0, sizeof(context_counters_16x16));
+ } else {
+ fread(context_counters, sizeof(context_counters), 1, f);
+ fread(context_counters_8x8, sizeof(context_counters_8x8), 1, f);
+ fread(context_counters_16x16, sizeof(context_counters_16x16), 1, f);
+ fclose(f);
+ }
+
+ f = fopen("treeupdate.bin", "rb");
+ if (!f) {
+ vpx_memset(tree_update_hist, 0, sizeof(tree_update_hist));
+ vpx_memset(tree_update_hist_8x8, 0, sizeof(tree_update_hist_8x8));
+ vpx_memset(tree_update_hist_16x16, 0, sizeof(tree_update_hist_16x16));
+ } else {
+ fread(tree_update_hist, sizeof(tree_update_hist), 1, f);
+ fread(tree_update_hist_8x8, sizeof(tree_update_hist_8x8), 1, f);
+ fread(tree_update_hist_16x16, sizeof(tree_update_hist_16x16), 1, f);
+ fclose(f);
+ }
+}
+
+void print_context_counters() {
+ int type, band, pt, t;
+ FILE *f = fopen("vp9_context.c", "w");
+
+ fprintf(f, "#include \"vp9_entropy.h\"\n");
+ fprintf(f, "\n/* *** GENERATED FILE: DO NOT EDIT *** */\n\n");
+ fprintf(f, "static const unsigned int\n"
+ "vp9_default_coef_counts[BLOCK_TYPES]\n"
+ " [COEF_BANDS]\n"
+ " [PREV_COEF_CONTEXTS]\n"
+ " [MAX_ENTROPY_TOKENS]={\n");
+
+# define Comma( X) (X? ",":"")
+ type = 0;
+ do {
+ fprintf(f, "%s\n { /* block Type %d */", Comma(type), type);
+ band = 0;
+ do {
+ fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band);
+ pt = 0;
+ do {
+ fprintf(f, "%s\n {", Comma(pt));
+
+ t = 0;
+ do {
+ const INT64 x = context_counters [type] [band] [pt] [t];
+ const int y = (int) x;
+ assert(x == (INT64) y); /* no overflow handling yet */
+ fprintf(f, "%s %d", Comma(t), y);
+ } while (++t < MAX_ENTROPY_TOKENS);
+ fprintf(f, "}");
+ } while (++pt < PREV_COEF_CONTEXTS);
+ fprintf(f, "\n }");
+ } while (++band < COEF_BANDS);
+ fprintf(f, "\n }");
+ } while (++type < BLOCK_TYPES);
+ fprintf(f, "\n};\n");
+
+ fprintf(f, "static const unsigned int\nvp9_default_coef_counts_8x8"
+ "[BLOCK_TYPES_8X8] [COEF_BANDS]"
+ "[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS] = {");
+ type = 0;
+ do {
+ fprintf(f, "%s\n { /* block Type %d */", Comma(type), type);
+ band = 0;
+ do {
+ fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band);
+ pt = 0;
+ do {
+ fprintf(f, "%s\n {", Comma(pt));
+ t = 0;
+ do {
+ const INT64 x = context_counters_8x8 [type] [band] [pt] [t];
+ const int y = (int) x;
+
+ assert(x == (INT64) y); /* no overflow handling yet */
+ fprintf(f, "%s %d", Comma(t), y);
+
+ } while (++t < MAX_ENTROPY_TOKENS);
+
+ fprintf(f, "}");
+ } while (++pt < PREV_COEF_CONTEXTS);
+
+ fprintf(f, "\n }");
+
+ } while (++band < COEF_BANDS);
+
+ fprintf(f, "\n }");
+ } while (++type < BLOCK_TYPES_8X8);
+ fprintf(f, "\n};\n");
+
+ fprintf(f, "static const unsigned int\nvp9_default_coef_counts_16x16"
+ "[BLOCK_TYPES_16X16] [COEF_BANDS]"
+ "[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS] = {");
+ type = 0;
+ do {
+ fprintf(f, "%s\n { /* block Type %d */", Comma(type), type);
+ band = 0;
+ do {
+ fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band);
+ pt = 0;
+ do {
+ fprintf(f, "%s\n {", Comma(pt));
+ t = 0;
+ do {
+ const INT64 x = context_counters_16x16 [type] [band] [pt] [t];
+ const int y = (int) x;
+
+ assert(x == (INT64) y); /* no overflow handling yet */
+ fprintf(f, "%s %d", Comma(t), y);
+
+ } while (++t < MAX_ENTROPY_TOKENS);
+
+ fprintf(f, "}");
+ } while (++pt < PREV_COEF_CONTEXTS);
+
+ fprintf(f, "\n }");
+
+ } while (++band < COEF_BANDS);
+
+ fprintf(f, "\n }");
+ } while (++type < BLOCK_TYPES_16X16);
+ fprintf(f, "\n};\n");
+
+ fprintf(f, "static const vp9_prob\n"
+ "vp9_default_coef_probs[BLOCK_TYPES] [COEF_BANDS] \n"
+ "[PREV_COEF_CONTEXTS] [ENTROPY_NODES] = {");
+ type = 0;
+ do {
+ fprintf(f, "%s\n { /* block Type %d */", Comma(type), type);
+ band = 0;
+ do {
+ fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band);
+ pt = 0;
+ do {
+ unsigned int branch_ct [ENTROPY_NODES] [2];
+ unsigned int coef_counts[MAX_ENTROPY_TOKENS];
+ vp9_prob coef_probs[ENTROPY_NODES];
+ for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
+ coef_counts[t] = context_counters [type] [band] [pt] [t];
+ vp9_tree_probs_from_distribution(
+ MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree,
+ coef_probs, branch_ct, coef_counts, 256, 1);
+ fprintf(f, "%s\n {", Comma(pt));
+
+ t = 0;
+ do {
+ fprintf(f, "%s %d", Comma(t), coef_probs[t]);
+
+ } while (++t < ENTROPY_NODES);
+
+ fprintf(f, "}");
+ } while (++pt < PREV_COEF_CONTEXTS);
+ fprintf(f, "\n }");
+ } while (++band < COEF_BANDS);
+ fprintf(f, "\n }");
+ } while (++type < BLOCK_TYPES);
+ fprintf(f, "\n};\n");
+
+ fprintf(f, "static const vp9_prob\n"
+ "vp9_default_coef_probs_8x8[BLOCK_TYPES_8X8] [COEF_BANDS]\n"
+ "[PREV_COEF_CONTEXTS] [ENTROPY_NODES] = {");
+ type = 0;
+ do {
+ fprintf(f, "%s\n { /* block Type %d */", Comma(type), type);
+ band = 0;
+ do {
+ fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band);
+ pt = 0;
+ do {
+ unsigned int branch_ct [ENTROPY_NODES] [2];
+ unsigned int coef_counts[MAX_ENTROPY_TOKENS];
+ vp9_prob coef_probs[ENTROPY_NODES];
+ for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
+ coef_counts[t] = context_counters_8x8[type] [band] [pt] [t];
+ vp9_tree_probs_from_distribution(
+ MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree,
+ coef_probs, branch_ct, coef_counts, 256, 1);
+ fprintf(f, "%s\n {", Comma(pt));
+
+ t = 0;
+ do {
+ fprintf(f, "%s %d", Comma(t), coef_probs[t]);
+ } while (++t < ENTROPY_NODES);
+ fprintf(f, "}");
+ } while (++pt < PREV_COEF_CONTEXTS);
+ fprintf(f, "\n }");
+ } while (++band < COEF_BANDS);
+ fprintf(f, "\n }");
+ } while (++type < BLOCK_TYPES_8X8);
+ fprintf(f, "\n};\n");
+
+ fprintf(f, "static const vp9_prob\n"
+ "vp9_default_coef_probs_16x16[BLOCK_TYPES_16X16] [COEF_BANDS]\n"
+ "[PREV_COEF_CONTEXTS] [ENTROPY_NODES] = {");
+ type = 0;
+ do {
+ fprintf(f, "%s\n { /* block Type %d */", Comma(type), type);
+ band = 0;
+ do {
+ fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band);
+ pt = 0;
+ do {
+ unsigned int branch_ct [ENTROPY_NODES] [2];
+ unsigned int coef_counts[MAX_ENTROPY_TOKENS];
+ vp9_prob coef_probs[ENTROPY_NODES];
+ for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
+ coef_counts[t] = context_counters_16x16[type] [band] [pt] [t];
+ vp9_tree_probs_from_distribution(
+ MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree,
+ coef_probs, branch_ct, coef_counts, 256, 1);
+ fprintf(f, "%s\n {", Comma(pt));
+
+ t = 0;
+ do {
+ fprintf(f, "%s %d", Comma(t), coef_probs[t]);
+ } while (++t < ENTROPY_NODES);
+ fprintf(f, "}");
+ } while (++pt < PREV_COEF_CONTEXTS);
+ fprintf(f, "\n }");
+ } while (++band < COEF_BANDS);
+ fprintf(f, "\n }");
+ } while (++type < BLOCK_TYPES_16X16);
+ fprintf(f, "\n};\n");
+
+ fclose(f);
+
+ f = fopen("context.bin", "wb");
+ fwrite(context_counters, sizeof(context_counters), 1, f);
+ fwrite(context_counters_8x8, sizeof(context_counters_8x8), 1, f);
+ fwrite(context_counters_16x16, sizeof(context_counters_16x16), 1, f);
+ fclose(f);
+}
+#endif
+
+void vp9_tokenize_initialize() {
+ fill_value_tokens();
+}
+
+static __inline void stuff_b(VP9_COMP *cpi,
+ MACROBLOCKD *xd,
+ const BLOCKD * const b,
+ TOKENEXTRA **tp,
+ PLANE_TYPE type,
+ ENTROPY_CONTEXT *a,
+ ENTROPY_CONTEXT *l,
+ TX_SIZE tx_size,
+ int dry_run) {
+ const int *bands;
+ unsigned int (*counts)[COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
+ vp9_prob (*probs)[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
+ int pt, band;
+ TOKENEXTRA *t = *tp;
+ const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
+ get_tx_type(xd, b) : DCT_DCT;
+ VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l);
+
+ switch (tx_size) {
+ default:
+ case TX_4X4:
+ bands = vp9_coef_bands;
+ if (tx_type != DCT_DCT) {
+ counts = cpi->hybrid_coef_counts;
+ probs = cpi->common.fc.hybrid_coef_probs;
+ } else {
+ counts = cpi->coef_counts;
+ probs = cpi->common.fc.coef_probs;
+ }
+ break;
+ case TX_8X8:
+ bands = vp9_coef_bands_8x8;
+ if (tx_type != DCT_DCT) {
+ counts = cpi->hybrid_coef_counts_8x8;
+ probs = cpi->common.fc.hybrid_coef_probs_8x8;
+ } else {
+ counts = cpi->coef_counts_8x8;
+ probs = cpi->common.fc.coef_probs_8x8;
+ }
+ break;
+ case TX_16X16:
+ bands = vp9_coef_bands_16x16;
+ if (tx_type != DCT_DCT) {
+ counts = cpi->hybrid_coef_counts_16x16;
+ probs = cpi->common.fc.hybrid_coef_probs_16x16;
+ } else {
+ counts = cpi->coef_counts_16x16;
+ probs = cpi->common.fc.coef_probs_16x16;
+ }
+ break;
+ }
+ band = bands[(type == PLANE_TYPE_Y_NO_DC) ? 1 : 0];
+ t->Token = DCT_EOB_TOKEN;
+ t->context_tree = probs[type][band][pt];
+ t->skip_eob_node = 0;
+ ++t;
+ *tp = t;
+ *a = *l = 0;
+ if (!dry_run) {
+ ++counts[type][band][pt][DCT_EOB_TOKEN];
+ }
+}
+
+static void stuff_mb_8x8(VP9_COMP *cpi, MACROBLOCKD *xd,
+ TOKENEXTRA **t, int dry_run) {
+ ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)xd->above_context;
+ ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)xd->left_context;
+ PLANE_TYPE plane_type;
+ int b;
+ int has_2nd_order = get_2nd_order_usage(xd);
+
+ if (has_2nd_order) {
+ stuff_b(cpi, xd, xd->block + 24, t, PLANE_TYPE_Y2,
+ A + vp9_block2above_8x8[24], L + vp9_block2left_8x8[24],
+ TX_8X8, dry_run);
+ plane_type = PLANE_TYPE_Y_NO_DC;
+ } else {
+ xd->above_context->y2 = 1;
+ xd->left_context->y2 = 1;
+ plane_type = PLANE_TYPE_Y_WITH_DC;
+ }
+
+ for (b = 0; b < 16; b += 4) {
+ stuff_b(cpi, xd, xd->block + b, t, plane_type, A + vp9_block2above_8x8[b],
+ L + vp9_block2left_8x8[b], TX_8X8, dry_run);
+ A[vp9_block2above_8x8[b] + 1] = A[vp9_block2above_8x8[b]];
+ L[vp9_block2left_8x8[b] + 1] = L[vp9_block2left_8x8[b]];
+ }
+
+ for (b = 16; b < 24; b += 4) {
+ stuff_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV,
+ A + vp9_block2above_8x8[b], L + vp9_block2left_8x8[b],
+ TX_8X8, dry_run);
+ A[vp9_block2above_8x8[b] + 1] = A[vp9_block2above_8x8[b]];
+ L[vp9_block2left_8x8[b] + 1] = L[vp9_block2left_8x8[b]];
+ }
+}
+
+static void stuff_mb_16x16(VP9_COMP *cpi, MACROBLOCKD *xd,
+ TOKENEXTRA **t, int dry_run) {
+ ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)xd->above_context;
+ ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)xd->left_context;
+ int b;
+
+ stuff_b(cpi, xd, xd->block, t, PLANE_TYPE_Y_WITH_DC, A, L, TX_16X16, dry_run);
+ A[1] = A[2] = A[3] = A[0];
+ L[1] = L[2] = L[3] = L[0];
+ for (b = 16; b < 24; b += 4) {
+ stuff_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, A + vp9_block2above[b],
+ L + vp9_block2above_8x8[b], TX_8X8, dry_run);
+ A[vp9_block2above_8x8[b] + 1] = A[vp9_block2above_8x8[b]];
+ L[vp9_block2left_8x8[b] + 1] = L[vp9_block2left_8x8[b]];
+ }
+ vpx_memset(&A[8], 0, sizeof(A[8]));
+ vpx_memset(&L[8], 0, sizeof(L[8]));
+}
+
+static void stuff_mb_4x4(VP9_COMP *cpi, MACROBLOCKD *xd,
+ TOKENEXTRA **t, int dry_run) {
+ ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)xd->above_context;
+ ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)xd->left_context;
+ int b;
+ PLANE_TYPE plane_type;
+ int has_2nd_order = (xd->mode_info_context->mbmi.mode != B_PRED &&
+ xd->mode_info_context->mbmi.mode != I8X8_PRED &&
+ xd->mode_info_context->mbmi.mode != SPLITMV);
+ if (has_2nd_order && get_tx_type(xd, &xd->block[0]) != DCT_DCT)
+ has_2nd_order = 0;
+
+ if (has_2nd_order) {
+ stuff_b(cpi, xd, xd->block + 24, t, PLANE_TYPE_Y2, A + vp9_block2above[24],
+ L + vp9_block2left[24], TX_4X4, dry_run);
+ plane_type = PLANE_TYPE_Y_NO_DC;
+ } else {
+ xd->above_context->y2 = 1;
+ xd->left_context->y2 = 1;
+ plane_type = PLANE_TYPE_Y_WITH_DC;
+ }
+
+ for (b = 0; b < 16; b++)
+ stuff_b(cpi, xd, xd->block + b, t, plane_type, A + vp9_block2above[b],
+ L + vp9_block2left[b], TX_4X4, dry_run);
+
+ for (b = 16; b < 24; b++)
+ stuff_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, A + vp9_block2above[b],
+ L + vp9_block2left[b], TX_4X4, dry_run);
+}
+
+static void stuff_mb_8x8_4x4uv(VP9_COMP *cpi, MACROBLOCKD *xd,
+ TOKENEXTRA **t, int dry_run) {
+ ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)xd->above_context;
+ ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)xd->left_context;
+ PLANE_TYPE plane_type;
+ int b;
+
+ int has_2nd_order = get_2nd_order_usage(xd);
+ if (has_2nd_order) {
+ stuff_b(cpi, xd, xd->block + 24, t, PLANE_TYPE_Y2,
+ A + vp9_block2above_8x8[24], L + vp9_block2left_8x8[24],
+ TX_8X8, dry_run);
+ plane_type = PLANE_TYPE_Y_NO_DC;
+ } else {
+ plane_type = PLANE_TYPE_Y_WITH_DC;
+ }
+
+ for (b = 0; b < 16; b += 4) {
+ stuff_b(cpi, xd, xd->block + b, t, plane_type,
+ A + vp9_block2above_8x8[b], L + vp9_block2left_8x8[b],
+ TX_8X8, dry_run);
+ A[vp9_block2above_8x8[b] + 1] = A[vp9_block2above_8x8[b]];
+ L[vp9_block2left_8x8[b] + 1] = L[vp9_block2left_8x8[b]];
+ }
+
+ for (b = 16; b < 24; b++)
+ stuff_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, A + vp9_block2above[b],
+ L + vp9_block2left[b], TX_4X4, dry_run);
+ xd->above_context->y2 = 1;
+ xd->left_context->y2 = 1;
+}
+
+void vp9_stuff_mb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) {
+ TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
+ TOKENEXTRA * const t_backup = *t;
+
+ if (tx_size == TX_16X16) {
+ stuff_mb_16x16(cpi, xd, t, dry_run);
+ } else if (tx_size == TX_8X8) {
+ if (xd->mode_info_context->mbmi.mode == I8X8_PRED ||
+ xd->mode_info_context->mbmi.mode == SPLITMV) {
+ stuff_mb_8x8_4x4uv(cpi, xd, t, dry_run);
+ } else {
+ stuff_mb_8x8(cpi, xd, t, dry_run);
+ }
+ } else {
+ stuff_mb_4x4(cpi, xd, t, dry_run);
+ }
+
+ if (dry_run) {
+ *t = t_backup;
+ }
+}
+
+void vp9_fix_contexts(MACROBLOCKD *xd) {
+ /* Clear entropy contexts for blocks */
+ if ((xd->mode_info_context->mbmi.mode != B_PRED
+ && xd->mode_info_context->mbmi.mode != I8X8_PRED
+ && xd->mode_info_context->mbmi.mode != SPLITMV)
+ || xd->mode_info_context->mbmi.txfm_size == TX_16X16
+ ) {
+ vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
+ } else {
+ vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) - 1);
+ vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) - 1);
+ xd->above_context->y2 = 1;
+ xd->left_context->y2 = 1;
+ }
+}
diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h
new file mode 100644
index 0000000..868909b
--- /dev/null
+++ b/vp9/encoder/vp9_tokenize.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_ENCODER_VP9_TOKENIZE_H_
+#define VP9_ENCODER_VP9_TOKENIZE_H_
+
+#include "vp9/common/vp9_entropy.h"
+#include "vp9/encoder/vp9_block.h"
+
+void vp9_tokenize_initialize();
+
+typedef struct {
+ short Token;
+ short Extra;
+} TOKENVALUE;
+
+typedef struct {
+ const vp9_prob *context_tree;
+ short Extra;
+ unsigned char Token;
+ unsigned char skip_eob_node;
+} TOKENEXTRA;
+
+extern int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd, int has_y2_block);
+extern int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd);
+extern int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd, int has_y2_block);
+extern int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd);
+extern int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd);
+
+struct VP9_COMP;
+
+extern void vp9_tokenize_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd,
+ TOKENEXTRA **t, int dry_run);
+
+extern void vp9_stuff_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd,
+ TOKENEXTRA **t, int dry_run);
+
+extern void vp9_fix_contexts(MACROBLOCKD *xd);
+
+#ifdef ENTROPY_STATS
+void init_context_counters();
+void print_context_counters();
+
+extern INT64 context_counters[BLOCK_TYPES][COEF_BANDS]
+ [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
+extern INT64 context_counters_8x8[BLOCK_TYPES_8X8][COEF_BANDS]
+ [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
+extern INT64 context_counters_16x16[BLOCK_TYPES_16X16][COEF_BANDS]
+ [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
+#endif
+
+extern const int *vp9_dct_value_cost_ptr;
+/* TODO: The Token field should be broken out into a separate char array to
+ * improve cache locality, since it's needed for costing when the rest of the
+ * fields are not.
+ */
+extern const TOKENVALUE *vp9_dct_value_tokens_ptr;
+
+#endif /* tokenize_h */
diff --git a/vp9/encoder/vp9_treewriter.c b/vp9/encoder/vp9_treewriter.c
new file mode 100644
index 0000000..8e25281
--- /dev/null
+++ b/vp9/encoder/vp9_treewriter.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/encoder/vp9_treewriter.h"
+
+static void cost(
+ int *const C,
+ vp9_tree T,
+ const vp9_prob *const P,
+ int i,
+ int c
+) {
+ const vp9_prob p = P [i >> 1];
+
+ do {
+ const vp9_tree_index j = T[i];
+ const int d = c + vp9_cost_bit(p, i & 1);
+
+ if (j <= 0)
+ C[-j] = d;
+ else
+ cost(C, T, P, j, d);
+ } while (++i & 1);
+}
+void vp9_cost_tokens(int *c, const vp9_prob *p, vp9_tree t) {
+ cost(c, t, p, 0, 0);
+}
+
+void vp9_cost_tokens_skip(int *c, const vp9_prob *p, vp9_tree t) {
+ cost(c, t, p, 2, 0);
+}
diff --git a/vp9/encoder/vp9_treewriter.h b/vp9/encoder/vp9_treewriter.h
new file mode 100644
index 0000000..5da4a17
--- /dev/null
+++ b/vp9/encoder/vp9_treewriter.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_ENCODER_VP9_TREEWRITER_H_
+#define VP9_ENCODER_VP9_TREEWRITER_H_
+
+/* Trees map alphabets into huffman-like codes suitable for an arithmetic
+ bit coder. Timothy S Murphy 11 October 2004 */
+
+#include "vp9/common/vp9_treecoder.h"
+
+#include "vp9/encoder/vp9_boolhuff.h" /* for now */
+
+typedef BOOL_CODER vp9_writer;
+
+#define vp9_write encode_bool
+#define vp9_write_literal vp9_encode_value
+#define vp9_write_bit(W, V) vp9_write(W, V, vp9_prob_half)
+
+/* Approximate length of an encoded bool in 256ths of a bit at given prob */
+
+#define vp9_cost_zero(x) (vp9_prob_cost[x])
+#define vp9_cost_one(x) vp9_cost_zero(vp9_complement(x))
+
+#define vp9_cost_bit(x, b) vp9_cost_zero((b) ? vp9_complement(x) : (x))
+
+/* VP8BC version is scaled by 2^20 rather than 2^8; see bool_coder.h */
+
+
+/* Both of these return bits, not scaled bits. */
+
+static __inline unsigned int cost_branch(const unsigned int ct[2],
+ vp9_prob p) {
+ /* Imitate existing calculation */
+ return ((ct[0] * vp9_cost_zero(p))
+ + (ct[1] * vp9_cost_one(p))) >> 8;
+}
+
+static __inline unsigned int cost_branch256(const unsigned int ct[2],
+ vp9_prob p) {
+ /* Imitate existing calculation */
+ return ((ct[0] * vp9_cost_zero(p))
+ + (ct[1] * vp9_cost_one(p)));
+}
+
+/* Small functions to write explicit values and tokens, as well as
+ estimate their lengths. */
+
+static __inline void treed_write(vp9_writer *const w,
+ vp9_tree t,
+ const vp9_prob *const p,
+ int v,
+ /* number of bits in v, assumed nonzero */
+ int n) {
+ vp9_tree_index i = 0;
+
+ do {
+ const int b = (v >> --n) & 1;
+ vp9_write(w, b, p[i >> 1]);
+ i = t[i + b];
+ } while (n);
+}
+
+static __inline void write_token(vp9_writer *const w,
+ vp9_tree t,
+ const vp9_prob *const p,
+ vp9_token *const x) {
+ treed_write(w, t, p, x->value, x->Len);
+}
+
+static __inline int treed_cost(vp9_tree t,
+ const vp9_prob *const p,
+ int v,
+ /* number of bits in v, assumed nonzero */
+ int n) {
+ int c = 0;
+ vp9_tree_index i = 0;
+
+ do {
+ const int b = (v >> --n) & 1;
+ c += vp9_cost_bit(p[i >> 1], b);
+ i = t[i + b];
+ } while (n);
+
+ return c;
+}
+
+static __inline int cost_token(vp9_tree t,
+ const vp9_prob *const p,
+ vp9_token *const x) {
+ return treed_cost(t, p, x->value, x->Len);
+}
+
+/* Fill array of costs for all possible token values. */
+
+void vp9_cost_tokens(int *Costs, const vp9_prob *, vp9_tree);
+
+void vp9_cost_tokens_skip(int *c, const vp9_prob *p, vp9_tree t);
+
+#endif
diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h
new file mode 100644
index 0000000..87036c1
--- /dev/null
+++ b/vp9/encoder/vp9_variance.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_ENCODER_VP9_VARIANCE_H_
+#define VP9_ENCODER_VP9_VARIANCE_H_
+
+typedef unsigned int(*vp9_sad_fn_t)(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned int max_sad);
+
+typedef void (*vp9_copy32xn_fn_t)(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ int n);
+
+typedef void (*vp9_sad_multi_fn_t)(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned int *sad_array);
+
+typedef void (*vp9_sad_multi1_fn_t)(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned short *sad_array);
+
+typedef void (*vp9_sad_multi_d_fn_t)(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char ** ref_ptr,
+ int ref_stride, unsigned int *sad_array);
+
+typedef unsigned int (*vp9_variance_fn_t)(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned int *sse);
+
+typedef unsigned int (*vp9_subpixvariance_fn_t)(const unsigned char *src_ptr,
+ int source_stride,
+ int xoffset,
+ int yoffset,
+ const unsigned char *ref_ptr,
+ int Refstride,
+ unsigned int *sse);
+
+typedef void (*vp9_ssimpf_fn_t)(unsigned char *s, int sp, unsigned char *r,
+ int rp, unsigned long *sum_s,
+ unsigned long *sum_r, unsigned long *sum_sq_s,
+ unsigned long *sum_sq_r,
+ unsigned long *sum_sxr);
+
+typedef unsigned int (*vp9_getmbss_fn_t)(const short *);
+
+typedef unsigned int (*vp9_get16x16prederror_fn_t)(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride);
+
+typedef struct variance_vtable {
+ vp9_sad_fn_t sdf;
+ vp9_variance_fn_t vf;
+ vp9_subpixvariance_fn_t svf;
+ vp9_variance_fn_t svf_halfpix_h;
+ vp9_variance_fn_t svf_halfpix_v;
+ vp9_variance_fn_t svf_halfpix_hv;
+ vp9_sad_multi_fn_t sdx3f;
+ vp9_sad_multi1_fn_t sdx8f;
+ vp9_sad_multi_d_fn_t sdx4df;
+ vp9_copy32xn_fn_t copymem;
+} vp9_variance_fn_ptr_t;
+
+#endif
diff --git a/vp9/encoder/vp9_variance_c.c b/vp9/encoder/vp9_variance_c.c
new file mode 100644
index 0000000..a91cb12
--- /dev/null
+++ b/vp9/encoder/vp9_variance_c.c
@@ -0,0 +1,340 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/encoder/vp9_variance.h"
+#include "vp9/common/vp9_filter.h"
+#include "vp9/common/vp9_subpelvar.h"
+#include "vpx/vpx_integer.h"
+
+unsigned int vp9_get_mb_ss_c(const short *src_ptr) {
+ unsigned int i, sum = 0;
+
+ for (i = 0; i < 256; i++) {
+ sum += (src_ptr[i] * src_ptr[i]);
+ }
+
+ return sum;
+}
+
+
+
+#if CONFIG_SUPERBLOCKS
+unsigned int vp9_variance32x32_c(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, &var, &avg);
+ *sse = var;
+ return (var - (((int64_t)avg * avg) >> 10));
+}
+#endif
+
+unsigned int vp9_variance16x16_c(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
+ *sse = var;
+ return (var - (((unsigned int)avg * avg) >> 8));
+}
+
+unsigned int vp9_variance8x16_c(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
+ *sse = var;
+ return (var - (((unsigned int)avg * avg) >> 7));
+}
+
+unsigned int vp9_variance16x8_c(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
+ *sse = var;
+ return (var - (((unsigned int)avg * avg) >> 7));
+}
+
+
+unsigned int vp9_variance8x8_c(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
+ *sse = var;
+ return (var - (((unsigned int)avg * avg) >> 6));
+}
+
+unsigned int vp9_variance4x4_c(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
+ *sse = var;
+ return (var - (((unsigned int)avg * avg) >> 4));
+}
+
+
+unsigned int vp9_mse16x16_c(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
+ *sse = var;
+ return var;
+}
+
+
+unsigned int vp9_sub_pixel_variance4x4_c(const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse) {
+ unsigned char temp2[20 * 16];
+ const short *HFilter, *VFilter;
+ unsigned short FData3[5 * 4]; // Temp data bufffer used in filtering
+
+ HFilter = vp9_bilinear_filters[xoffset];
+ VFilter = vp9_bilinear_filters[yoffset];
+
+ // First filter 1d Horizontal
+ var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter);
+
+ // Now filter Verticaly
+ var_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter);
+
+ return vp9_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
+}
+
+
+unsigned int vp9_sub_pixel_variance8x8_c(const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse) {
+ unsigned short FData3[9 * 8]; // Temp data bufffer used in filtering
+ unsigned char temp2[20 * 16];
+ const short *HFilter, *VFilter;
+
+ HFilter = vp9_bilinear_filters[xoffset];
+ VFilter = vp9_bilinear_filters[yoffset];
+
+ var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter);
+ var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter);
+
+ return vp9_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
+}
+
+unsigned int vp9_sub_pixel_variance16x16_c(const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse) {
+ unsigned short FData3[17 * 16]; // Temp data bufffer used in filtering
+ unsigned char temp2[20 * 16];
+ const short *HFilter, *VFilter;
+
+ HFilter = vp9_bilinear_filters[xoffset];
+ VFilter = vp9_bilinear_filters[yoffset];
+
+ var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter);
+ var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter);
+
+ return vp9_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
+}
+
+#if CONFIG_SUPERBLOCKS
+unsigned int vp9_sub_pixel_variance32x32_c(const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse) {
+ unsigned short FData3[33 * 32]; // Temp data bufffer used in filtering
+ unsigned char temp2[36 * 32];
+ const short *HFilter, *VFilter;
+
+ HFilter = vp9_bilinear_filters[xoffset];
+ VFilter = vp9_bilinear_filters[yoffset];
+
+ var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 33, 32, HFilter);
+ var_filter_block2d_bil_second_pass(FData3, temp2, 32, 32, 32, 32, VFilter);
+
+ return vp9_variance32x32_c(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
+}
+#endif
+
+unsigned int vp9_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0,
+ ref_ptr, recon_stride, sse);
+}
+
+#if CONFIG_SUPERBLOCKS
+unsigned int vp9_variance_halfpixvar32x32_h_c(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0,
+ ref_ptr, recon_stride, sse);
+}
+#endif
+
+
+unsigned int vp9_variance_halfpixvar16x16_v_c(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8,
+ ref_ptr, recon_stride, sse);
+}
+
+#if CONFIG_SUPERBLOCKS
+unsigned int vp9_variance_halfpixvar32x32_v_c(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8,
+ ref_ptr, recon_stride, sse);
+}
+#endif
+
+unsigned int vp9_variance_halfpixvar16x16_hv_c(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8,
+ ref_ptr, recon_stride, sse);
+}
+
+#if CONFIG_SUPERBLOCKS
+unsigned int vp9_variance_halfpixvar32x32_hv_c(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8,
+ ref_ptr, recon_stride, sse);
+}
+#endif
+
+unsigned int vp9_sub_pixel_mse16x16_c(const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse) {
+ vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line,
+ xoffset, yoffset, dst_ptr,
+ dst_pixels_per_line, sse);
+ return *sse;
+}
+
+#if CONFIG_SUPERBLOCKS
+unsigned int vp9_sub_pixel_mse32x32_c(const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse) {
+ vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line,
+ xoffset, yoffset, dst_ptr,
+ dst_pixels_per_line, sse);
+ return *sse;
+}
+#endif
+
+unsigned int vp9_sub_pixel_variance16x8_c(const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse) {
+ unsigned short FData3[16 * 9]; // Temp data bufffer used in filtering
+ unsigned char temp2[20 * 16];
+ const short *HFilter, *VFilter;
+
+ HFilter = vp9_bilinear_filters[xoffset];
+ VFilter = vp9_bilinear_filters[yoffset];
+
+ var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter);
+ var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter);
+
+ return vp9_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
+}
+
+unsigned int vp9_sub_pixel_variance8x16_c(const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse) {
+ unsigned short FData3[9 * 16]; // Temp data bufffer used in filtering
+ unsigned char temp2[20 * 16];
+ const short *HFilter, *VFilter;
+
+ HFilter = vp9_bilinear_filters[xoffset];
+ VFilter = vp9_bilinear_filters[yoffset];
+
+ var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line,
+ 1, 17, 8, HFilter);
+ var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter);
+
+ return vp9_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
+}
+
diff --git a/vp9/encoder/x86/vp9_dct_mmx.asm b/vp9/encoder/x86/vp9_dct_mmx.asm
new file mode 100644
index 0000000..54766d8
--- /dev/null
+++ b/vp9/encoder/x86/vp9_dct_mmx.asm
@@ -0,0 +1,241 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;void vp9_short_fdct4x4_mmx(short *input, short *output, int pitch)
+global sym(vp9_short_fdct4x4_mmx) PRIVATE
+sym(vp9_short_fdct4x4_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 3
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ; input
+ mov rdi, arg(1) ; output
+
+ movsxd rax, dword ptr arg(2) ;pitch
+
+ lea rcx, [rsi + rax*2]
+ ; read the input data
+ movq mm0, [rsi]
+ movq mm1, [rsi + rax]
+
+ movq mm2, [rcx]
+ movq mm4, [rcx + rax]
+
+ ; transpose for the first stage
+ movq mm3, mm0 ; 00 01 02 03
+ movq mm5, mm2 ; 20 21 22 23
+
+ punpcklwd mm0, mm1 ; 00 10 01 11
+ punpckhwd mm3, mm1 ; 02 12 03 13
+
+ punpcklwd mm2, mm4 ; 20 30 21 31
+ punpckhwd mm5, mm4 ; 22 32 23 33
+
+ movq mm1, mm0 ; 00 10 01 11
+ punpckldq mm0, mm2 ; 00 10 20 30
+
+ punpckhdq mm1, mm2 ; 01 11 21 31
+
+ movq mm2, mm3 ; 02 12 03 13
+ punpckldq mm2, mm5 ; 02 12 22 32
+
+ punpckhdq mm3, mm5 ; 03 13 23 33
+
+ ; mm0 0
+ ; mm1 1
+ ; mm2 2
+ ; mm3 3
+
+ ; first stage
+ movq mm5, mm0
+ movq mm4, mm1
+
+ paddw mm0, mm3 ; a1 = 0 + 3
+ paddw mm1, mm2 ; b1 = 1 + 2
+
+ psubw mm4, mm2 ; c1 = 1 - 2
+ psubw mm5, mm3 ; d1 = 0 - 3
+
+ psllw mm5, 3
+ psllw mm4, 3
+
+ psllw mm0, 3
+ psllw mm1, 3
+
+ ; output 0 and 2
+ movq mm2, mm0 ; a1
+
+ paddw mm0, mm1 ; op[0] = a1 + b1
+ psubw mm2, mm1 ; op[2] = a1 - b1
+
+ ; output 1 and 3
+ ; interleave c1, d1
+ movq mm1, mm5 ; d1
+ punpcklwd mm1, mm4 ; c1 d1
+ punpckhwd mm5, mm4 ; c1 d1
+
+ movq mm3, mm1
+ movq mm4, mm5
+
+ pmaddwd mm1, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
+ pmaddwd mm4, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
+
+ pmaddwd mm3, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
+ pmaddwd mm5, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
+
+ paddd mm1, MMWORD PTR[GLOBAL(_14500)]
+ paddd mm4, MMWORD PTR[GLOBAL(_14500)]
+ paddd mm3, MMWORD PTR[GLOBAL(_7500)]
+ paddd mm5, MMWORD PTR[GLOBAL(_7500)]
+
+ psrad mm1, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12
+ psrad mm4, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12
+ psrad mm3, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12
+ psrad mm5, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12
+
+ packssdw mm1, mm4 ; op[1]
+ packssdw mm3, mm5 ; op[3]
+
+ ; done with vertical
+ ; transpose for the second stage
+ movq mm4, mm0 ; 00 10 20 30
+ movq mm5, mm2 ; 02 12 22 32
+
+ punpcklwd mm0, mm1 ; 00 01 10 11
+ punpckhwd mm4, mm1 ; 20 21 30 31
+
+ punpcklwd mm2, mm3 ; 02 03 12 13
+ punpckhwd mm5, mm3 ; 22 23 32 33
+
+ movq mm1, mm0 ; 00 01 10 11
+ punpckldq mm0, mm2 ; 00 01 02 03
+
+ punpckhdq mm1, mm2 ; 01 22 12 13
+
+ movq mm2, mm4 ; 20 31 30 31
+ punpckldq mm2, mm5 ; 20 21 22 23
+
+ punpckhdq mm4, mm5 ; 30 31 32 33
+
+ ; mm0 0
+ ; mm1 1
+ ; mm2 2
+ ; mm3 4
+
+ movq mm5, mm0
+ movq mm3, mm1
+
+ paddw mm0, mm4 ; a1 = 0 + 3
+ paddw mm1, mm2 ; b1 = 1 + 2
+
+ psubw mm3, mm2 ; c1 = 1 - 2
+ psubw mm5, mm4 ; d1 = 0 - 3
+
+ pxor mm6, mm6 ; zero out for compare
+
+ pcmpeqw mm6, mm5 ; d1 != 0
+
+ pandn mm6, MMWORD PTR[GLOBAL(_cmp_mask)] ; clear upper,
+ ; and keep bit 0 of lower
+
+ ; output 0 and 2
+ movq mm2, mm0 ; a1
+
+ paddw mm0, mm1 ; a1 + b1
+ psubw mm2, mm1 ; a1 - b1
+
+ paddw mm0, MMWORD PTR[GLOBAL(_7w)]
+ paddw mm2, MMWORD PTR[GLOBAL(_7w)]
+
+ psraw mm0, 4 ; op[0] = (a1 + b1 + 7)>>4
+ psraw mm2, 4 ; op[8] = (a1 - b1 + 7)>>4
+
+ movq MMWORD PTR[rdi + 0 ], mm0
+ movq MMWORD PTR[rdi + 16], mm2
+
+ ; output 1 and 3
+ ; interleave c1, d1
+ movq mm1, mm5 ; d1
+ punpcklwd mm1, mm3 ; c1 d1
+ punpckhwd mm5, mm3 ; c1 d1
+
+ movq mm3, mm1
+ movq mm4, mm5
+
+ pmaddwd mm1, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
+ pmaddwd mm4, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
+
+ pmaddwd mm3, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
+ pmaddwd mm5, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
+
+ paddd mm1, MMWORD PTR[GLOBAL(_12000)]
+ paddd mm4, MMWORD PTR[GLOBAL(_12000)]
+ paddd mm3, MMWORD PTR[GLOBAL(_51000)]
+ paddd mm5, MMWORD PTR[GLOBAL(_51000)]
+
+ psrad mm1, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16
+ psrad mm4, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16
+ psrad mm3, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16
+ psrad mm5, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16
+
+ packssdw mm1, mm4 ; op[4]
+ packssdw mm3, mm5 ; op[12]
+
+ paddw mm1, mm6 ; op[4] += (d1!=0)
+
+ movq MMWORD PTR[rdi + 8 ], mm1
+ movq MMWORD PTR[rdi + 24], mm3
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+SECTION_RODATA
+align 8
+_5352_2217:
+ dw 5352
+ dw 2217
+ dw 5352
+ dw 2217
+align 8
+_2217_neg5352:
+ dw 2217
+ dw -5352
+ dw 2217
+ dw -5352
+align 8
+_cmp_mask:
+ times 4 dw 1
+align 8
+_7w:
+ times 4 dw 7
+align 8
+_14500:
+ times 2 dd 14500
+align 8
+_7500:
+ times 2 dd 7500
+align 8
+_12000:
+ times 2 dd 12000
+align 8
+_51000:
+ times 2 dd 51000
diff --git a/vp9/encoder/x86/vp9_dct_mmx.h b/vp9/encoder/x86/vp9_dct_mmx.h
new file mode 100644
index 0000000..3bac7c8
--- /dev/null
+++ b/vp9/encoder/x86/vp9_dct_mmx.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_ENCODER_X86_VP9_DCT_MMX_H_
+#define VP9_ENCODER_X86_VP9_DCT_MMX_H_
+
+extern void vp9_short_fdct4x4_mmx(short *input, short *output, int pitch);
+
+
+#endif /* VP9_ENCODER_X86_VP9_DCT_MMX_H_ */
diff --git a/vp9/encoder/x86/vp9_dct_sse2.asm b/vp9/encoder/x86/vp9_dct_sse2.asm
new file mode 100644
index 0000000..57b81a5
--- /dev/null
+++ b/vp9/encoder/x86/vp9_dct_sse2.asm
@@ -0,0 +1,432 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+%macro STACK_FRAME_CREATE 0
+%if ABI_IS_32BIT
+ %define input rsi
+ %define output rdi
+ %define pitch rax
+ push rbp
+ mov rbp, rsp
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0)
+ mov rdi, arg(1)
+
+ movsxd rax, dword ptr arg(2)
+ lea rcx, [rsi + rax*2]
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ %define input rcx
+ %define output rdx
+ %define pitch r8
+ SAVE_XMM 7, u
+ %else
+ %define input rdi
+ %define output rsi
+ %define pitch rdx
+ %endif
+%endif
+%endmacro
+
+%macro STACK_FRAME_DESTROY 0
+ %define input
+ %define output
+ %define pitch
+
+%if ABI_IS_32BIT
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ pop rbp
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ RESTORE_XMM
+ %endif
+%endif
+ ret
+%endmacro
+
+;void vp9_short_fdct4x4_sse2(short *input, short *output, int pitch)
+global sym(vp9_short_fdct4x4_sse2) PRIVATE
+sym(vp9_short_fdct4x4_sse2):
+
+ STACK_FRAME_CREATE
+
+ movq xmm0, MMWORD PTR[input ] ;03 02 01 00
+ movq xmm2, MMWORD PTR[input+ pitch] ;13 12 11 10
+ lea input, [input+2*pitch]
+ movq xmm1, MMWORD PTR[input ] ;23 22 21 20
+ movq xmm3, MMWORD PTR[input+ pitch] ;33 32 31 30
+
+ punpcklqdq xmm0, xmm2 ;13 12 11 10 03 02 01 00
+ punpcklqdq xmm1, xmm3 ;33 32 31 30 23 22 21 20
+
+ movdqa xmm2, xmm0
+ punpckldq xmm0, xmm1 ;23 22 03 02 21 20 01 00
+ punpckhdq xmm2, xmm1 ;33 32 13 12 31 30 11 10
+ movdqa xmm1, xmm0
+ punpckldq xmm0, xmm2 ;31 21 30 20 11 10 01 00
+ pshufhw xmm1, xmm1, 0b1h ;22 23 02 03 xx xx xx xx
+ pshufhw xmm2, xmm2, 0b1h ;32 33 12 13 xx xx xx xx
+
+ punpckhdq xmm1, xmm2 ;32 33 22 23 12 13 02 03
+ movdqa xmm3, xmm0
+ paddw xmm0, xmm1 ;b1 a1 b1 a1 b1 a1 b1 a1
+ psubw xmm3, xmm1 ;c1 d1 c1 d1 c1 d1 c1 d1
+ psllw xmm0, 3 ;b1 <<= 3 a1 <<= 3
+ psllw xmm3, 3 ;c1 <<= 3 d1 <<= 3
+
+ movdqa xmm1, xmm0
+ pmaddwd xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1
+ pmaddwd xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1
+ movdqa xmm4, xmm3
+ pmaddwd xmm3, XMMWORD PTR[GLOBAL(_5352_2217)] ;c1*2217 + d1*5352
+ pmaddwd xmm4, XMMWORD PTR[GLOBAL(_2217_neg5352)];d1*2217 - c1*5352
+
+ paddd xmm3, XMMWORD PTR[GLOBAL(_14500)]
+ paddd xmm4, XMMWORD PTR[GLOBAL(_7500)]
+ psrad xmm3, 12 ;(c1 * 2217 + d1 * 5352 + 14500)>>12
+ psrad xmm4, 12 ;(d1 * 2217 - c1 * 5352 + 7500)>>12
+
+ packssdw xmm0, xmm1 ;op[2] op[0]
+ packssdw xmm3, xmm4 ;op[3] op[1]
+ ; 23 22 21 20 03 02 01 00
+ ;
+ ; 33 32 31 30 13 12 11 10
+ ;
+ movdqa xmm2, xmm0
+ punpcklqdq xmm0, xmm3 ;13 12 11 10 03 02 01 00
+ punpckhqdq xmm2, xmm3 ;23 22 21 20 33 32 31 30
+
+ movdqa xmm3, xmm0
+ punpcklwd xmm0, xmm2 ;32 30 22 20 12 10 02 00
+ punpckhwd xmm3, xmm2 ;33 31 23 21 13 11 03 01
+ movdqa xmm2, xmm0
+ punpcklwd xmm0, xmm3 ;13 12 11 10 03 02 01 00
+ punpckhwd xmm2, xmm3 ;33 32 31 30 23 22 21 20
+
+ movdqa xmm5, XMMWORD PTR[GLOBAL(_7)]
+ pshufd xmm2, xmm2, 04eh
+ movdqa xmm3, xmm0
+ paddw xmm0, xmm2 ;b1 b1 b1 b1 a1 a1 a1 a1
+ psubw xmm3, xmm2 ;c1 c1 c1 c1 d1 d1 d1 d1
+
+ pshufd xmm0, xmm0, 0d8h ;b1 b1 a1 a1 b1 b1 a1 a1
+ movdqa xmm2, xmm3 ;save d1 for compare
+ pshufd xmm3, xmm3, 0d8h ;c1 c1 d1 d1 c1 c1 d1 d1
+ pshuflw xmm0, xmm0, 0d8h ;b1 b1 a1 a1 b1 a1 b1 a1
+ pshuflw xmm3, xmm3, 0d8h ;c1 c1 d1 d1 c1 d1 c1 d1
+ pshufhw xmm0, xmm0, 0d8h ;b1 a1 b1 a1 b1 a1 b1 a1
+ pshufhw xmm3, xmm3, 0d8h ;c1 d1 c1 d1 c1 d1 c1 d1
+ movdqa xmm1, xmm0
+ pmaddwd xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1
+ pmaddwd xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1
+
+ pxor xmm4, xmm4 ;zero out for compare
+ paddd xmm0, xmm5
+ paddd xmm1, xmm5
+ pcmpeqw xmm2, xmm4
+ psrad xmm0, 4 ;(a1 + b1 + 7)>>4
+ psrad xmm1, 4 ;(a1 - b1 + 7)>>4
+ pandn xmm2, XMMWORD PTR[GLOBAL(_cmp_mask)] ;clear upper,
+ ;and keep bit 0 of lower
+
+ movdqa xmm4, xmm3
+ pmaddwd xmm3, XMMWORD PTR[GLOBAL(_5352_2217)] ;c1*2217 + d1*5352
+ pmaddwd xmm4, XMMWORD PTR[GLOBAL(_2217_neg5352)] ;d1*2217 - c1*5352
+ paddd xmm3, XMMWORD PTR[GLOBAL(_12000)]
+ paddd xmm4, XMMWORD PTR[GLOBAL(_51000)]
+ packssdw xmm0, xmm1 ;op[8] op[0]
+ psrad xmm3, 16 ;(c1 * 2217 + d1 * 5352 + 12000)>>16
+ psrad xmm4, 16 ;(d1 * 2217 - c1 * 5352 + 51000)>>16
+
+ packssdw xmm3, xmm4 ;op[12] op[4]
+ movdqa xmm1, xmm0
+ paddw xmm3, xmm2 ;op[4] += (d1!=0)
+ punpcklqdq xmm0, xmm3 ;op[4] op[0]
+ punpckhqdq xmm1, xmm3 ;op[12] op[8]
+
+ movdqa XMMWORD PTR[output + 0], xmm0
+ movdqa XMMWORD PTR[output + 16], xmm1
+
+ STACK_FRAME_DESTROY
+
+;void vp9_short_fdct8x4_sse2(short *input, short *output, int pitch)
+global sym(vp9_short_fdct8x4_sse2) PRIVATE
+sym(vp9_short_fdct8x4_sse2):
+
+ STACK_FRAME_CREATE
+
+ ; read the input data
+ movdqa xmm0, [input ]
+ movdqa xmm2, [input+ pitch]
+ lea input, [input+2*pitch]
+ movdqa xmm4, [input ]
+ movdqa xmm3, [input+ pitch]
+
+ ; transpose for the first stage
+ movdqa xmm1, xmm0 ; 00 01 02 03 04 05 06 07
+ movdqa xmm5, xmm4 ; 20 21 22 23 24 25 26 27
+
+ punpcklwd xmm0, xmm2 ; 00 10 01 11 02 12 03 13
+ punpckhwd xmm1, xmm2 ; 04 14 05 15 06 16 07 17
+
+ punpcklwd xmm4, xmm3 ; 20 30 21 31 22 32 23 33
+ punpckhwd xmm5, xmm3 ; 24 34 25 35 26 36 27 37
+
+ movdqa xmm2, xmm0 ; 00 10 01 11 02 12 03 13
+ punpckldq xmm0, xmm4 ; 00 10 20 30 01 11 21 31
+
+ punpckhdq xmm2, xmm4 ; 02 12 22 32 03 13 23 33
+
+ movdqa xmm4, xmm1 ; 04 14 05 15 06 16 07 17
+ punpckldq xmm4, xmm5 ; 04 14 24 34 05 15 25 35
+
+ punpckhdq xmm1, xmm5 ; 06 16 26 36 07 17 27 37
+ movdqa xmm3, xmm2 ; 02 12 22 32 03 13 23 33
+
+ punpckhqdq xmm3, xmm1 ; 03 13 23 33 07 17 27 37
+ punpcklqdq xmm2, xmm1 ; 02 12 22 32 06 16 26 36
+
+ movdqa xmm1, xmm0 ; 00 10 20 30 01 11 21 31
+ punpcklqdq xmm0, xmm4 ; 00 10 20 30 04 14 24 34
+
+ punpckhqdq xmm1, xmm4 ; 01 11 21 32 05 15 25 35
+
+ ; xmm0 0
+ ; xmm1 1
+ ; xmm2 2
+ ; xmm3 3
+
+ ; first stage
+ movdqa xmm5, xmm0
+ movdqa xmm4, xmm1
+
+ paddw xmm0, xmm3 ; a1 = 0 + 3
+ paddw xmm1, xmm2 ; b1 = 1 + 2
+
+ psubw xmm4, xmm2 ; c1 = 1 - 2
+ psubw xmm5, xmm3 ; d1 = 0 - 3
+
+ psllw xmm5, 3
+ psllw xmm4, 3
+
+ psllw xmm0, 3
+ psllw xmm1, 3
+
+ ; output 0 and 2
+ movdqa xmm2, xmm0 ; a1
+
+ paddw xmm0, xmm1 ; op[0] = a1 + b1
+ psubw xmm2, xmm1 ; op[2] = a1 - b1
+
+ ; output 1 and 3
+ ; interleave c1, d1
+ movdqa xmm1, xmm5 ; d1
+ punpcklwd xmm1, xmm4 ; c1 d1
+ punpckhwd xmm5, xmm4 ; c1 d1
+
+ movdqa xmm3, xmm1
+ movdqa xmm4, xmm5
+
+ pmaddwd xmm1, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
+ pmaddwd xmm4, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
+
+ pmaddwd xmm3, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
+ pmaddwd xmm5, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
+
+ paddd xmm1, XMMWORD PTR[GLOBAL(_14500)]
+ paddd xmm4, XMMWORD PTR[GLOBAL(_14500)]
+ paddd xmm3, XMMWORD PTR[GLOBAL(_7500)]
+ paddd xmm5, XMMWORD PTR[GLOBAL(_7500)]
+
+ psrad xmm1, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12
+ psrad xmm4, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12
+ psrad xmm3, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12
+ psrad xmm5, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12
+
+ packssdw xmm1, xmm4 ; op[1]
+ packssdw xmm3, xmm5 ; op[3]
+
+ ; done with vertical
+ ; transpose for the second stage
+ movdqa xmm4, xmm0 ; 00 10 20 30 04 14 24 34
+ movdqa xmm5, xmm2 ; 02 12 22 32 06 16 26 36
+
+ punpcklwd xmm0, xmm1 ; 00 01 10 11 20 21 30 31
+ punpckhwd xmm4, xmm1 ; 04 05 14 15 24 25 34 35
+
+ punpcklwd xmm2, xmm3 ; 02 03 12 13 22 23 32 33
+ punpckhwd xmm5, xmm3 ; 06 07 16 17 26 27 36 37
+
+ movdqa xmm1, xmm0 ; 00 01 10 11 20 21 30 31
+ punpckldq xmm0, xmm2 ; 00 01 02 03 10 11 12 13
+
+ punpckhdq xmm1, xmm2 ; 20 21 22 23 30 31 32 33
+
+ movdqa xmm2, xmm4 ; 04 05 14 15 24 25 34 35
+ punpckldq xmm2, xmm5 ; 04 05 06 07 14 15 16 17
+
+ punpckhdq xmm4, xmm5 ; 24 25 26 27 34 35 36 37
+ movdqa xmm3, xmm1 ; 20 21 22 23 30 31 32 33
+
+ punpckhqdq xmm3, xmm4 ; 30 31 32 33 34 35 36 37
+ punpcklqdq xmm1, xmm4 ; 20 21 22 23 24 25 26 27
+
+ movdqa xmm4, xmm0 ; 00 01 02 03 10 11 12 13
+ punpcklqdq xmm0, xmm2 ; 00 01 02 03 04 05 06 07
+
+ punpckhqdq xmm4, xmm2 ; 10 11 12 13 14 15 16 17
+
+ ; xmm0 0
+ ; xmm1 4
+ ; xmm2 1
+ ; xmm3 3
+
+ movdqa xmm5, xmm0
+ movdqa xmm2, xmm1
+
+ paddw xmm0, xmm3 ; a1 = 0 + 3
+ paddw xmm1, xmm4 ; b1 = 1 + 2
+
+ psubw xmm4, xmm2 ; c1 = 1 - 2
+ psubw xmm5, xmm3 ; d1 = 0 - 3
+
+ pxor xmm6, xmm6 ; zero out for compare
+
+ pcmpeqw xmm6, xmm5 ; d1 != 0
+
+ pandn xmm6, XMMWORD PTR[GLOBAL(_cmp_mask8x4)] ; clear upper,
+ ; and keep bit 0 of lower
+
+ ; output 0 and 2
+ movdqa xmm2, xmm0 ; a1
+
+ paddw xmm0, xmm1 ; a1 + b1
+ psubw xmm2, xmm1 ; a1 - b1
+
+ paddw xmm0, XMMWORD PTR[GLOBAL(_7w)]
+ paddw xmm2, XMMWORD PTR[GLOBAL(_7w)]
+
+ psraw xmm0, 4 ; op[0] = (a1 + b1 + 7)>>4
+ psraw xmm2, 4 ; op[8] = (a1 - b1 + 7)>>4
+
+ ; output 1 and 3
+ ; interleave c1, d1
+ movdqa xmm1, xmm5 ; d1
+ punpcklwd xmm1, xmm4 ; c1 d1
+ punpckhwd xmm5, xmm4 ; c1 d1
+
+ movdqa xmm3, xmm1
+ movdqa xmm4, xmm5
+
+ pmaddwd xmm1, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
+ pmaddwd xmm4, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
+
+ pmaddwd xmm3, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
+ pmaddwd xmm5, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
+
+ paddd xmm1, XMMWORD PTR[GLOBAL(_12000)]
+ paddd xmm4, XMMWORD PTR[GLOBAL(_12000)]
+ paddd xmm3, XMMWORD PTR[GLOBAL(_51000)]
+ paddd xmm5, XMMWORD PTR[GLOBAL(_51000)]
+
+ psrad xmm1, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16
+ psrad xmm4, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16
+ psrad xmm3, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16
+ psrad xmm5, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16
+
+ packssdw xmm1, xmm4 ; op[4]
+ packssdw xmm3, xmm5 ; op[12]
+
+ paddw xmm1, xmm6 ; op[4] += (d1!=0)
+
+ movdqa xmm4, xmm0
+ movdqa xmm5, xmm2
+
+ punpcklqdq xmm0, xmm1
+ punpckhqdq xmm4, xmm1
+
+ punpcklqdq xmm2, xmm3
+ punpckhqdq xmm5, xmm3
+
+ movdqa XMMWORD PTR[output + 0 ], xmm0
+ movdqa XMMWORD PTR[output + 16], xmm2
+ movdqa XMMWORD PTR[output + 32], xmm4
+ movdqa XMMWORD PTR[output + 48], xmm5
+
+ STACK_FRAME_DESTROY
+
+SECTION_RODATA
+align 16
+_5352_2217:
+ dw 5352
+ dw 2217
+ dw 5352
+ dw 2217
+ dw 5352
+ dw 2217
+ dw 5352
+ dw 2217
+align 16
+_2217_neg5352:
+ dw 2217
+ dw -5352
+ dw 2217
+ dw -5352
+ dw 2217
+ dw -5352
+ dw 2217
+ dw -5352
+align 16
+_mult_add:
+ times 8 dw 1
+align 16
+_cmp_mask:
+ times 4 dw 1
+ times 4 dw 0
+align 16
+_cmp_mask8x4:
+ times 8 dw 1
+align 16
+_mult_sub:
+ dw 1
+ dw -1
+ dw 1
+ dw -1
+ dw 1
+ dw -1
+ dw 1
+ dw -1
+align 16
+_7:
+ times 4 dd 7
+align 16
+_7w:
+ times 8 dw 7
+align 16
+_14500:
+ times 4 dd 14500
+align 16
+_7500:
+ times 4 dd 7500
+align 16
+_12000:
+ times 4 dd 12000
+align 16
+_51000:
+ times 4 dd 51000
diff --git a/vp9/encoder/x86/vp9_encodeopt.asm b/vp9/encoder/x86/vp9_encodeopt.asm
new file mode 100644
index 0000000..5d9f776
--- /dev/null
+++ b/vp9/encoder/x86/vp9_encodeopt.asm
@@ -0,0 +1,386 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;int vp9_block_error_xmm(short *coeff_ptr, short *dcoef_ptr)
+global sym(vp9_block_error_xmm) PRIVATE
+sym(vp9_block_error_xmm):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 2
+ push rsi
+ push rdi
+ ; end prologue
+
+ mov rsi, arg(0) ;coeff_ptr
+ mov rdi, arg(1) ;dcoef_ptr
+
+ movdqa xmm0, [rsi]
+ movdqa xmm1, [rdi]
+
+ movdqa xmm2, [rsi+16]
+ movdqa xmm3, [rdi+16]
+
+ psubw xmm0, xmm1
+ psubw xmm2, xmm3
+
+ pmaddwd xmm0, xmm0
+ pmaddwd xmm2, xmm2
+
+ paddd xmm0, xmm2
+
+ pxor xmm5, xmm5
+ movdqa xmm1, xmm0
+
+ punpckldq xmm0, xmm5
+ punpckhdq xmm1, xmm5
+
+ paddd xmm0, xmm1
+ movdqa xmm1, xmm0
+
+ psrldq xmm0, 8
+ paddd xmm0, xmm1
+
+ movq rax, xmm0
+
+ pop rdi
+ pop rsi
+ ; begin epilog
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;int vp9_block_error_mmx(short *coeff_ptr, short *dcoef_ptr)
+global sym(vp9_block_error_mmx) PRIVATE
+sym(vp9_block_error_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 2
+ push rsi
+ push rdi
+ ; end prolog
+
+
+ mov rsi, arg(0) ;coeff_ptr
+ pxor mm7, mm7
+
+ mov rdi, arg(1) ;dcoef_ptr
+ movq mm3, [rsi]
+
+ movq mm4, [rdi]
+ movq mm5, [rsi+8]
+
+ movq mm6, [rdi+8]
+ pxor mm1, mm1 ; from movd mm1, dc ; dc =0
+
+ movq mm2, mm7
+ psubw mm5, mm6
+
+ por mm1, mm2
+ pmaddwd mm5, mm5
+
+ pcmpeqw mm1, mm7
+ psubw mm3, mm4
+
+ pand mm1, mm3
+ pmaddwd mm1, mm1
+
+ paddd mm1, mm5
+ movq mm3, [rsi+16]
+
+ movq mm4, [rdi+16]
+ movq mm5, [rsi+24]
+
+ movq mm6, [rdi+24]
+ psubw mm5, mm6
+
+ pmaddwd mm5, mm5
+ psubw mm3, mm4
+
+ pmaddwd mm3, mm3
+ paddd mm3, mm5
+
+ paddd mm1, mm3
+ movq mm0, mm1
+
+ psrlq mm1, 32
+ paddd mm0, mm1
+
+ movq rax, mm0
+
+ pop rdi
+ pop rsi
+ ; begin epilog
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
+global sym(vp9_mbblock_error_mmx_impl) PRIVATE
+sym(vp9_mbblock_error_mmx_impl):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 3
+ push rsi
+ push rdi
+ ; end prolog
+
+
+ mov rsi, arg(0) ;coeff_ptr
+ pxor mm7, mm7
+
+ mov rdi, arg(1) ;dcoef_ptr
+ pxor mm2, mm2
+
+ movd mm1, dword ptr arg(2) ;dc
+ por mm1, mm2
+
+ pcmpeqw mm1, mm7
+ mov rcx, 16
+
+.mberror_loop_mmx:
+ movq mm3, [rsi]
+ movq mm4, [rdi]
+
+ movq mm5, [rsi+8]
+ movq mm6, [rdi+8]
+
+
+ psubw mm5, mm6
+ pmaddwd mm5, mm5
+
+ psubw mm3, mm4
+ pand mm3, mm1
+
+ pmaddwd mm3, mm3
+ paddd mm2, mm5
+
+ paddd mm2, mm3
+ movq mm3, [rsi+16]
+
+ movq mm4, [rdi+16]
+ movq mm5, [rsi+24]
+
+ movq mm6, [rdi+24]
+ psubw mm5, mm6
+
+ pmaddwd mm5, mm5
+ psubw mm3, mm4
+
+ pmaddwd mm3, mm3
+ paddd mm2, mm5
+
+ paddd mm2, mm3
+ add rsi, 32
+
+ add rdi, 32
+ sub rcx, 1
+
+ jnz .mberror_loop_mmx
+
+ movq mm0, mm2
+ psrlq mm2, 32
+
+ paddd mm0, mm2
+ movq rax, mm0
+
+ pop rdi
+ pop rsi
+ ; begin epilog
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
+global sym(vp9_mbblock_error_xmm_impl) PRIVATE
+sym(vp9_mbblock_error_xmm_impl):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 3
+ SAVE_XMM 6
+ push rsi
+ push rdi
+ ; end prolog
+
+
+ mov rsi, arg(0) ;coeff_ptr
+ pxor xmm6, xmm6
+
+ mov rdi, arg(1) ;dcoef_ptr
+ pxor xmm4, xmm4
+
+ movd xmm5, dword ptr arg(2) ;dc
+ por xmm5, xmm4
+
+ pcmpeqw xmm5, xmm6
+ mov rcx, 16
+
+.mberror_loop:
+ movdqa xmm0, [rsi]
+ movdqa xmm1, [rdi]
+
+ movdqa xmm2, [rsi+16]
+ movdqa xmm3, [rdi+16]
+
+
+ psubw xmm2, xmm3
+ pmaddwd xmm2, xmm2
+
+ psubw xmm0, xmm1
+ pand xmm0, xmm5
+
+ pmaddwd xmm0, xmm0
+ add rsi, 32
+
+ add rdi, 32
+
+ sub rcx, 1
+ paddd xmm4, xmm2
+
+ paddd xmm4, xmm0
+ jnz .mberror_loop
+
+ movdqa xmm0, xmm4
+ punpckldq xmm0, xmm6
+
+ punpckhdq xmm4, xmm6
+ paddd xmm0, xmm4
+
+ movdqa xmm1, xmm0
+ psrldq xmm0, 8
+
+ paddd xmm0, xmm1
+ movq rax, xmm0
+
+ pop rdi
+ pop rsi
+ ; begin epilog
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;int vp9_mbuverror_mmx_impl(short *s_ptr, short *d_ptr);
+global sym(vp9_mbuverror_mmx_impl) PRIVATE
+sym(vp9_mbuverror_mmx_impl):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 2
+ push rsi
+ push rdi
+ ; end prolog
+
+
+ mov rsi, arg(0) ;s_ptr
+ mov rdi, arg(1) ;d_ptr
+
+ mov rcx, 16
+ pxor mm7, mm7
+
+.mbuverror_loop_mmx:
+
+ movq mm1, [rsi]
+ movq mm2, [rdi]
+
+ psubw mm1, mm2
+ pmaddwd mm1, mm1
+
+
+ movq mm3, [rsi+8]
+ movq mm4, [rdi+8]
+
+ psubw mm3, mm4
+ pmaddwd mm3, mm3
+
+
+ paddd mm7, mm1
+ paddd mm7, mm3
+
+
+ add rsi, 16
+ add rdi, 16
+
+ dec rcx
+ jnz .mbuverror_loop_mmx
+
+ movq mm0, mm7
+ psrlq mm7, 32
+
+ paddd mm0, mm7
+ movq rax, mm0
+
+ pop rdi
+ pop rsi
+ ; begin epilog
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;int vp9_mbuverror_xmm_impl(short *s_ptr, short *d_ptr);
+global sym(vp9_mbuverror_xmm_impl) PRIVATE
+sym(vp9_mbuverror_xmm_impl):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 2
+ push rsi
+ push rdi
+ ; end prolog
+
+
+ mov rsi, arg(0) ;s_ptr
+ mov rdi, arg(1) ;d_ptr
+
+ mov rcx, 16
+ pxor xmm3, xmm3
+
+.mbuverror_loop:
+
+ movdqa xmm1, [rsi]
+ movdqa xmm2, [rdi]
+
+ psubw xmm1, xmm2
+ pmaddwd xmm1, xmm1
+
+ paddd xmm3, xmm1
+
+ add rsi, 16
+ add rdi, 16
+
+ dec rcx
+ jnz .mbuverror_loop
+
+ pxor xmm0, xmm0
+ movdqa xmm1, xmm3
+
+ movdqa xmm2, xmm1
+ punpckldq xmm1, xmm0
+
+ punpckhdq xmm2, xmm0
+ paddd xmm1, xmm2
+
+ movdqa xmm2, xmm1
+
+ psrldq xmm1, 8
+ paddd xmm1, xmm2
+
+ movq rax, xmm1
+
+ pop rdi
+ pop rsi
+ ; begin epilog
+ UNSHADOW_ARGS
+ pop rbp
+ ret
diff --git a/vp9/encoder/x86/vp9_fwalsh_sse2.asm b/vp9/encoder/x86/vp9_fwalsh_sse2.asm
new file mode 100644
index 0000000..7bee9ef
--- /dev/null
+++ b/vp9/encoder/x86/vp9_fwalsh_sse2.asm
@@ -0,0 +1,164 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;void vp9_short_walsh4x4_sse2(short *input, short *output, int pitch)
+global sym(vp9_short_walsh4x4_sse2) PRIVATE
+sym(vp9_short_walsh4x4_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 3
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ; input
+ mov rdi, arg(1) ; output
+ movsxd rdx, dword ptr arg(2) ; pitch
+
+ ; first for loop
+ movq xmm0, MMWORD PTR [rsi] ; load input
+ movq xmm1, MMWORD PTR [rsi + rdx]
+ lea rsi, [rsi + rdx*2]
+ movq xmm2, MMWORD PTR [rsi]
+ movq xmm3, MMWORD PTR [rsi + rdx]
+
+ punpcklwd xmm0, xmm1
+ punpcklwd xmm2, xmm3
+
+ movdqa xmm1, xmm0
+ punpckldq xmm0, xmm2 ; ip[1] ip[0]
+ punpckhdq xmm1, xmm2 ; ip[3] ip[2]
+
+ movdqa xmm2, xmm0
+ paddw xmm0, xmm1
+ psubw xmm2, xmm1
+
+ psllw xmm0, 2 ; d1 a1
+ psllw xmm2, 2 ; c1 b1
+
+ movdqa xmm1, xmm0
+ punpcklqdq xmm0, xmm2 ; b1 a1
+ punpckhqdq xmm1, xmm2 ; c1 d1
+
+ pxor xmm6, xmm6
+ movq xmm6, xmm0
+ pxor xmm7, xmm7
+ pcmpeqw xmm7, xmm6
+ paddw xmm7, [GLOBAL(c1)]
+
+ movdqa xmm2, xmm0
+ paddw xmm0, xmm1 ; b1+c1 a1+d1
+ psubw xmm2, xmm1 ; b1-c1 a1-d1
+ paddw xmm0, xmm7 ; b1+c1 a1+d1+(a1!=0)
+
+ ; second for loop
+ ; input: 13 9 5 1 12 8 4 0 (xmm0)
+ ; 14 10 6 2 15 11 7 3 (xmm2)
+ ; after shuffle:
+ ; 13 5 9 1 12 4 8 0 (xmm0)
+ ; 14 6 10 2 15 7 11 3 (xmm1)
+ pshuflw xmm3, xmm0, 0xd8
+ pshufhw xmm0, xmm3, 0xd8
+ pshuflw xmm3, xmm2, 0xd8
+ pshufhw xmm1, xmm3, 0xd8
+
+ movdqa xmm2, xmm0
+ pmaddwd xmm0, [GLOBAL(c1)] ; d11 a11 d10 a10
+ pmaddwd xmm2, [GLOBAL(cn1)] ; c11 b11 c10 b10
+ movdqa xmm3, xmm1
+ pmaddwd xmm1, [GLOBAL(c1)] ; d12 a12 d13 a13
+ pmaddwd xmm3, [GLOBAL(cn1)] ; c12 b12 c13 b13
+
+ pshufd xmm4, xmm0, 0xd8 ; d11 d10 a11 a10
+ pshufd xmm5, xmm2, 0xd8 ; c11 c10 b11 b10
+ pshufd xmm6, xmm1, 0x72 ; d13 d12 a13 a12
+ pshufd xmm7, xmm3, 0x72 ; c13 c12 b13 b12
+
+ movdqa xmm0, xmm4
+ punpcklqdq xmm0, xmm5 ; b11 b10 a11 a10
+ punpckhqdq xmm4, xmm5 ; c11 c10 d11 d10
+ movdqa xmm1, xmm6
+ punpcklqdq xmm1, xmm7 ; b13 b12 a13 a12
+ punpckhqdq xmm6, xmm7 ; c13 c12 d13 d12
+
+ movdqa xmm2, xmm0
+ paddd xmm0, xmm4 ; b21 b20 a21 a20
+ psubd xmm2, xmm4 ; c21 c20 d21 d20
+ movdqa xmm3, xmm1
+ paddd xmm1, xmm6 ; b23 b22 a23 a22
+ psubd xmm3, xmm6 ; c23 c22 d23 d22
+
+ pxor xmm4, xmm4
+ movdqa xmm5, xmm4
+ pcmpgtd xmm4, xmm0
+ pcmpgtd xmm5, xmm2
+ pand xmm4, [GLOBAL(cd1)]
+ pand xmm5, [GLOBAL(cd1)]
+
+ pxor xmm6, xmm6
+ movdqa xmm7, xmm6
+ pcmpgtd xmm6, xmm1
+ pcmpgtd xmm7, xmm3
+ pand xmm6, [GLOBAL(cd1)]
+ pand xmm7, [GLOBAL(cd1)]
+
+ paddd xmm0, xmm4
+ paddd xmm2, xmm5
+ paddd xmm0, [GLOBAL(cd3)]
+ paddd xmm2, [GLOBAL(cd3)]
+ paddd xmm1, xmm6
+ paddd xmm3, xmm7
+ paddd xmm1, [GLOBAL(cd3)]
+ paddd xmm3, [GLOBAL(cd3)]
+
+ psrad xmm0, 3
+ psrad xmm1, 3
+ psrad xmm2, 3
+ psrad xmm3, 3
+ movdqa xmm4, xmm0
+ punpcklqdq xmm0, xmm1 ; a23 a22 a21 a20
+ punpckhqdq xmm4, xmm1 ; b23 b22 b21 b20
+ movdqa xmm5, xmm2
+ punpckhqdq xmm2, xmm3 ; c23 c22 c21 c20
+ punpcklqdq xmm5, xmm3 ; d23 d22 d21 d20
+
+ packssdw xmm0, xmm4 ; b23 b22 b21 b20 a23 a22 a21 a20
+ packssdw xmm2, xmm5 ; d23 d22 d21 d20 c23 c22 c21 c20
+
+ movdqa XMMWORD PTR [rdi], xmm0
+ movdqa XMMWORD PTR [rdi + 16], xmm2
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+SECTION_RODATA
+align 16
+c1:
+ dw 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001
+align 16
+cn1:
+ dw 0x0001, 0xffff, 0x0001, 0xffff, 0x0001, 0xffff, 0x0001, 0xffff
+align 16
+cd1:
+ dd 0x00000001, 0x00000001, 0x00000001, 0x00000001
+align 16
+cd3:
+ dd 0x00000003, 0x00000003, 0x00000003, 0x00000003
diff --git a/vp9/encoder/x86/vp9_mcomp_x86.h b/vp9/encoder/x86/vp9_mcomp_x86.h
new file mode 100644
index 0000000..ca80b8b
--- /dev/null
+++ b/vp9/encoder/x86/vp9_mcomp_x86.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_ENCODER_X86_VP9_MCOMP_X86_H_
+#define VP9_ENCODER_X86_VP9_MCOMP_X86_H_
+
+#if HAVE_SSE3
+#if !CONFIG_RUNTIME_CPU_DETECT
+
+#undef vp9_search_full_search
+#define vp9_search_full_search vp9_full_search_sadx3
+
+#undef vp9_search_refining_search
+#define vp9_search_refining_search vp9_refining_search_sadx4
+
+#undef vp9_search_diamond_search
+#define vp9_search_diamond_search vp9_diamond_search_sadx4
+
+#endif
+#endif
+
+#if HAVE_SSE4_1
+#if !CONFIG_RUNTIME_CPU_DETECT
+
+#undef vp9_search_full_search
+#define vp9_search_full_search vp9_full_search_sadx8
+
+#endif
+#endif
+
+#endif
+
diff --git a/vp9/encoder/x86/vp9_quantize_mmx.asm b/vp9/encoder/x86/vp9_quantize_mmx.asm
new file mode 100644
index 0000000..22e2356
--- /dev/null
+++ b/vp9/encoder/x86/vp9_quantize_mmx.asm
@@ -0,0 +1,286 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;int vp9_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr,
+; short *qcoeff_ptr,short *dequant_ptr,
+; short *scan_mask, short *round_ptr,
+; short *quant_ptr, short *dqcoeff_ptr);
+global sym(vp9_fast_quantize_b_impl_mmx) PRIVATE
+sym(vp9_fast_quantize_b_impl_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 8
+ push rsi
+ push rdi
+ ; end prolog
+
+
+ mov rsi, arg(0) ;coeff_ptr
+ movq mm0, [rsi]
+
+ mov rax, arg(1) ;zbin_ptr
+ movq mm1, [rax]
+
+ movq mm3, mm0
+ psraw mm0, 15
+
+ pxor mm3, mm0
+ psubw mm3, mm0 ; abs
+
+ movq mm2, mm3
+ pcmpgtw mm1, mm2
+
+ pandn mm1, mm2
+ movq mm3, mm1
+
+ mov rdx, arg(6) ;quant_ptr
+ movq mm1, [rdx]
+
+ mov rcx, arg(5) ;round_ptr
+ movq mm2, [rcx]
+
+ paddw mm3, mm2
+ pmulhuw mm3, mm1
+
+ pxor mm3, mm0
+ psubw mm3, mm0 ;gain the sign back
+
+ mov rdi, arg(2) ;qcoeff_ptr
+ movq mm0, mm3
+
+ movq [rdi], mm3
+
+ mov rax, arg(3) ;dequant_ptr
+ movq mm2, [rax]
+
+ pmullw mm3, mm2
+ mov rax, arg(7) ;dqcoeff_ptr
+
+ movq [rax], mm3
+
+ ; next 8
+ movq mm4, [rsi+8]
+
+ mov rax, arg(1) ;zbin_ptr
+ movq mm5, [rax+8]
+
+ movq mm7, mm4
+ psraw mm4, 15
+
+ pxor mm7, mm4
+ psubw mm7, mm4 ; abs
+
+ movq mm6, mm7
+ pcmpgtw mm5, mm6
+
+ pandn mm5, mm6
+ movq mm7, mm5
+
+ movq mm5, [rdx+8]
+ movq mm6, [rcx+8]
+
+ paddw mm7, mm6
+ pmulhuw mm7, mm5
+
+ pxor mm7, mm4
+ psubw mm7, mm4;gain the sign back
+
+ mov rdi, arg(2) ;qcoeff_ptr
+
+ movq mm1, mm7
+ movq [rdi+8], mm7
+
+ mov rax, arg(3) ;dequant_ptr
+ movq mm6, [rax+8]
+
+ pmullw mm7, mm6
+ mov rax, arg(7) ;dqcoeff_ptr
+
+ movq [rax+8], mm7
+
+
+ ; next 8
+ movq mm4, [rsi+16]
+
+ mov rax, arg(1) ;zbin_ptr
+ movq mm5, [rax+16]
+
+ movq mm7, mm4
+ psraw mm4, 15
+
+ pxor mm7, mm4
+ psubw mm7, mm4 ; abs
+
+ movq mm6, mm7
+ pcmpgtw mm5, mm6
+
+ pandn mm5, mm6
+ movq mm7, mm5
+
+ movq mm5, [rdx+16]
+ movq mm6, [rcx+16]
+
+ paddw mm7, mm6
+ pmulhuw mm7, mm5
+
+ pxor mm7, mm4
+ psubw mm7, mm4;gain the sign back
+
+ mov rdi, arg(2) ;qcoeff_ptr
+
+ movq mm1, mm7
+ movq [rdi+16], mm7
+
+ mov rax, arg(3) ;dequant_ptr
+ movq mm6, [rax+16]
+
+ pmullw mm7, mm6
+ mov rax, arg(7) ;dqcoeff_ptr
+
+ movq [rax+16], mm7
+
+
+ ; next 8
+ movq mm4, [rsi+24]
+
+ mov rax, arg(1) ;zbin_ptr
+ movq mm5, [rax+24]
+
+ movq mm7, mm4
+ psraw mm4, 15
+
+ pxor mm7, mm4
+ psubw mm7, mm4 ; abs
+
+ movq mm6, mm7
+ pcmpgtw mm5, mm6
+
+ pandn mm5, mm6
+ movq mm7, mm5
+
+ movq mm5, [rdx+24]
+ movq mm6, [rcx+24]
+
+ paddw mm7, mm6
+ pmulhuw mm7, mm5
+
+ pxor mm7, mm4
+ psubw mm7, mm4;gain the sign back
+
+ mov rdi, arg(2) ;qcoeff_ptr
+
+ movq mm1, mm7
+ movq [rdi+24], mm7
+
+ mov rax, arg(3) ;dequant_ptr
+ movq mm6, [rax+24]
+
+ pmullw mm7, mm6
+ mov rax, arg(7) ;dqcoeff_ptr
+
+ movq [rax+24], mm7
+
+
+
+ mov rdi, arg(4) ;scan_mask
+ mov rsi, arg(2) ;qcoeff_ptr
+
+ pxor mm5, mm5
+ pxor mm7, mm7
+
+ movq mm0, [rsi]
+ movq mm1, [rsi+8]
+
+ movq mm2, [rdi]
+ movq mm3, [rdi+8];
+
+ pcmpeqw mm0, mm7
+ pcmpeqw mm1, mm7
+
+ pcmpeqw mm6, mm6
+ pxor mm0, mm6
+
+ pxor mm1, mm6
+ psrlw mm0, 15
+
+ psrlw mm1, 15
+ pmaddwd mm0, mm2
+
+ pmaddwd mm1, mm3
+ movq mm5, mm0
+
+ paddd mm5, mm1
+
+ movq mm0, [rsi+16]
+ movq mm1, [rsi+24]
+
+ movq mm2, [rdi+16]
+ movq mm3, [rdi+24];
+
+ pcmpeqw mm0, mm7
+ pcmpeqw mm1, mm7
+
+ pcmpeqw mm6, mm6
+ pxor mm0, mm6
+
+ pxor mm1, mm6
+ psrlw mm0, 15
+
+ psrlw mm1, 15
+ pmaddwd mm0, mm2
+
+ pmaddwd mm1, mm3
+ paddd mm5, mm0
+
+ paddd mm5, mm1
+ movq mm0, mm5
+
+ psrlq mm5, 32
+ paddd mm0, mm5
+
+ ; eob adjustment begins here
+ movq rcx, mm0
+ and rcx, 0xffff
+
+ xor rdx, rdx
+ sub rdx, rcx ; rdx=-rcx
+
+ bsr rax, rcx
+ inc rax
+
+ sar rdx, 31
+ and rax, rdx
+ ; Substitute the sse assembly for the old mmx mixed assembly/C. The
+ ; following is kept as reference
+ ; movq rcx, mm0
+ ; bsr rax, rcx
+ ;
+ ; mov eob, rax
+ ; mov eee, rcx
+ ;
+ ;if(eee==0)
+ ;{
+ ; eob=-1;
+ ;}
+ ;else if(eee<0)
+ ;{
+ ; eob=15;
+ ;}
+ ;d->eob = eob+1;
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
diff --git a/vp9/encoder/x86/vp9_quantize_sse2.asm b/vp9/encoder/x86/vp9_quantize_sse2.asm
new file mode 100644
index 0000000..060acc2
--- /dev/null
+++ b/vp9/encoder/x86/vp9_quantize_sse2.asm
@@ -0,0 +1,380 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+%include "vp9_asm_enc_offsets.asm"
+
+
+; void vp9_regular_quantize_b_sse2 | arg
+; (BLOCK *b, | 0
+; BLOCKD *d) | 1
+
+global sym(vp9_regular_quantize_b_sse2) PRIVATE
+sym(vp9_regular_quantize_b_sse2):
+ push rbp
+ mov rbp, rsp
+ SAVE_XMM 7
+ GET_GOT rbx
+
+%if ABI_IS_32BIT
+ push rdi
+ push rsi
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ push rdi
+ push rsi
+ %endif
+%endif
+
+ ALIGN_STACK 16, rax
+ %define zrun_zbin_boost 0 ; 8
+ %define abs_minus_zbin 8 ; 32
+ %define temp_qcoeff 40 ; 32
+ %define qcoeff 72 ; 32
+ %define stack_size 104
+ sub rsp, stack_size
+ ; end prolog
+
+%if ABI_IS_32BIT
+ mov rdi, arg(0) ; BLOCK *b
+ mov rsi, arg(1) ; BLOCKD *d
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ mov rdi, rcx ; BLOCK *b
+ mov rsi, rdx ; BLOCKD *d
+ %else
+ ;mov rdi, rdi ; BLOCK *b
+ ;mov rsi, rsi ; BLOCKD *d
+ %endif
+%endif
+
+ mov rdx, [rdi + vp9_block_coeff] ; coeff_ptr
+ mov rcx, [rdi + vp9_block_zbin] ; zbin_ptr
+ movd xmm7, [rdi + vp9_block_zbin_extra] ; zbin_oq_value
+
+ ; z
+ movdqa xmm0, [rdx]
+ movdqa xmm4, [rdx + 16]
+ mov rdx, [rdi + vp9_block_round] ; round_ptr
+
+ pshuflw xmm7, xmm7, 0
+ punpcklwd xmm7, xmm7 ; duplicated zbin_oq_value
+
+ movdqa xmm1, xmm0
+ movdqa xmm5, xmm4
+
+ ; sz
+ psraw xmm0, 15
+ psraw xmm4, 15
+
+ ; (z ^ sz)
+ pxor xmm1, xmm0
+ pxor xmm5, xmm4
+
+ ; x = abs(z)
+ psubw xmm1, xmm0
+ psubw xmm5, xmm4
+
+ movdqa xmm2, [rcx]
+ movdqa xmm3, [rcx + 16]
+ mov rcx, [rdi + vp9_block_quant] ; quant_ptr
+
+ ; *zbin_ptr + zbin_oq_value
+ paddw xmm2, xmm7
+ paddw xmm3, xmm7
+
+ ; x - (*zbin_ptr + zbin_oq_value)
+ psubw xmm1, xmm2
+ psubw xmm5, xmm3
+ movdqa [rsp + abs_minus_zbin], xmm1
+ movdqa [rsp + abs_minus_zbin + 16], xmm5
+
+ ; add (zbin_ptr + zbin_oq_value) back
+ paddw xmm1, xmm2
+ paddw xmm5, xmm3
+
+ movdqa xmm2, [rdx]
+ movdqa xmm6, [rdx + 16]
+
+ movdqa xmm3, [rcx]
+ movdqa xmm7, [rcx + 16]
+
+ ; x + round
+ paddw xmm1, xmm2
+ paddw xmm5, xmm6
+
+ ; y = x * quant_ptr >> 16
+ pmulhw xmm3, xmm1
+ pmulhw xmm7, xmm5
+
+ ; y += x
+ paddw xmm1, xmm3
+ paddw xmm5, xmm7
+
+ movdqa [rsp + temp_qcoeff], xmm1
+ movdqa [rsp + temp_qcoeff + 16], xmm5
+
+ pxor xmm6, xmm6
+ ; zero qcoeff
+ movdqa [rsp + qcoeff], xmm6
+ movdqa [rsp + qcoeff + 16], xmm6
+
+ mov rdx, [rdi + vp9_block_zrun_zbin_boost] ; zbin_boost_ptr
+ mov rax, [rdi + vp9_block_quant_shift] ; quant_shift_ptr
+ mov [rsp + zrun_zbin_boost], rdx
+
+%macro ZIGZAG_LOOP 1
+ ; x
+ movsx ecx, WORD PTR[rsp + abs_minus_zbin + %1 * 2]
+
+ ; if (x >= zbin)
+ sub cx, WORD PTR[rdx] ; x - zbin
+ lea rdx, [rdx + 2] ; zbin_boost_ptr++
+ jl .rq_zigzag_loop_%1 ; x < zbin
+
+ movsx edi, WORD PTR[rsp + temp_qcoeff + %1 * 2]
+
+ ; downshift by quant_shift[rc]
+ movsx cx, BYTE PTR[rax + %1] ; quant_shift_ptr[rc]
+ sar edi, cl ; also sets Z bit
+ je .rq_zigzag_loop_%1 ; !y
+ mov WORD PTR[rsp + qcoeff + %1 * 2], di ;qcoeff_ptr[rc] = temp_qcoeff[rc]
+ mov rdx, [rsp + zrun_zbin_boost] ; reset to b->zrun_zbin_boost
+.rq_zigzag_loop_%1:
+%endmacro
+; in vp9_default_zig_zag1d order: see vp9/common/vp9_entropy.c
+ZIGZAG_LOOP 0
+ZIGZAG_LOOP 1
+ZIGZAG_LOOP 4
+ZIGZAG_LOOP 8
+ZIGZAG_LOOP 5
+ZIGZAG_LOOP 2
+ZIGZAG_LOOP 3
+ZIGZAG_LOOP 6
+ZIGZAG_LOOP 9
+ZIGZAG_LOOP 12
+ZIGZAG_LOOP 13
+ZIGZAG_LOOP 10
+ZIGZAG_LOOP 7
+ZIGZAG_LOOP 11
+ZIGZAG_LOOP 14
+ZIGZAG_LOOP 15
+
+ movdqa xmm2, [rsp + qcoeff]
+ movdqa xmm3, [rsp + qcoeff + 16]
+
+ mov rcx, [rsi + vp9_blockd_dequant] ; dequant_ptr
+ mov rdi, [rsi + vp9_blockd_dqcoeff] ; dqcoeff_ptr
+
+ ; y ^ sz
+ pxor xmm2, xmm0
+ pxor xmm3, xmm4
+ ; x = (y ^ sz) - sz
+ psubw xmm2, xmm0
+ psubw xmm3, xmm4
+
+ ; dequant
+ movdqa xmm0, [rcx]
+ movdqa xmm1, [rcx + 16]
+
+ mov rcx, [rsi + vp9_blockd_qcoeff] ; qcoeff_ptr
+
+ pmullw xmm0, xmm2
+ pmullw xmm1, xmm3
+
+ movdqa [rcx], xmm2 ; store qcoeff
+ movdqa [rcx + 16], xmm3
+ movdqa [rdi], xmm0 ; store dqcoeff
+ movdqa [rdi + 16], xmm1
+
+ ; select the last value (in zig_zag order) for EOB
+ pcmpeqw xmm2, xmm6
+ pcmpeqw xmm3, xmm6
+ ; !
+ pcmpeqw xmm6, xmm6
+ pxor xmm2, xmm6
+ pxor xmm3, xmm6
+ ; mask inv_zig_zag
+ pand xmm2, [GLOBAL(inv_zig_zag)]
+ pand xmm3, [GLOBAL(inv_zig_zag + 16)]
+ ; select the max value
+ pmaxsw xmm2, xmm3
+ pshufd xmm3, xmm2, 00001110b
+ pmaxsw xmm2, xmm3
+ pshuflw xmm3, xmm2, 00001110b
+ pmaxsw xmm2, xmm3
+ pshuflw xmm3, xmm2, 00000001b
+ pmaxsw xmm2, xmm3
+ movd eax, xmm2
+ and eax, 0xff
+ mov [rsi + vp9_blockd_eob], eax
+
+ ; begin epilog
+ add rsp, stack_size
+ pop rsp
+%if ABI_IS_32BIT
+ pop rsi
+ pop rdi
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ pop rsi
+ pop rdi
+ %endif
+%endif
+ RESTORE_GOT
+ RESTORE_XMM
+ pop rbp
+ ret
+
+; void vp9_fast_quantize_b_sse2 | arg
+; (BLOCK *b, | 0
+; BLOCKD *d) | 1
+
+global sym(vp9_fast_quantize_b_sse2) PRIVATE
+sym(vp9_fast_quantize_b_sse2):
+ push rbp
+ mov rbp, rsp
+ GET_GOT rbx
+
+%if ABI_IS_32BIT
+ push rdi
+ push rsi
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ push rdi
+ push rsi
+ %else
+ ; these registers are used for passing arguments
+ %endif
+%endif
+
+ ; end prolog
+
+%if ABI_IS_32BIT
+ mov rdi, arg(0) ; BLOCK *b
+ mov rsi, arg(1) ; BLOCKD *d
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ mov rdi, rcx ; BLOCK *b
+ mov rsi, rdx ; BLOCKD *d
+ %else
+ ;mov rdi, rdi ; BLOCK *b
+ ;mov rsi, rsi ; BLOCKD *d
+ %endif
+%endif
+
+ mov rax, [rdi + vp9_block_coeff]
+ mov rcx, [rdi + vp9_block_round]
+ mov rdx, [rdi + vp9_block_quant_fast]
+
+ ; z = coeff
+ movdqa xmm0, [rax]
+ movdqa xmm4, [rax + 16]
+
+ ; dup z so we can save sz
+ movdqa xmm1, xmm0
+ movdqa xmm5, xmm4
+
+ ; sz = z >> 15
+ psraw xmm0, 15
+ psraw xmm4, 15
+
+ ; x = abs(z) = (z ^ sz) - sz
+ pxor xmm1, xmm0
+ pxor xmm5, xmm4
+ psubw xmm1, xmm0
+ psubw xmm5, xmm4
+
+ ; x += round
+ paddw xmm1, [rcx]
+ paddw xmm5, [rcx + 16]
+
+ mov rax, [rsi + vp9_blockd_qcoeff]
+ mov rcx, [rsi + vp9_blockd_dequant]
+ mov rdi, [rsi + vp9_blockd_dqcoeff]
+
+ ; y = x * quant >> 16
+ pmulhw xmm1, [rdx]
+ pmulhw xmm5, [rdx + 16]
+
+ ; x = (y ^ sz) - sz
+ pxor xmm1, xmm0
+ pxor xmm5, xmm4
+ psubw xmm1, xmm0
+ psubw xmm5, xmm4
+
+ ; qcoeff = x
+ movdqa [rax], xmm1
+ movdqa [rax + 16], xmm5
+
+ ; x * dequant
+ movdqa xmm2, xmm1
+ movdqa xmm3, xmm5
+ pmullw xmm2, [rcx]
+ pmullw xmm3, [rcx + 16]
+
+ ; dqcoeff = x * dequant
+ movdqa [rdi], xmm2
+ movdqa [rdi + 16], xmm3
+
+ pxor xmm4, xmm4 ;clear all bits
+ pcmpeqw xmm1, xmm4
+ pcmpeqw xmm5, xmm4
+
+ pcmpeqw xmm4, xmm4 ;set all bits
+ pxor xmm1, xmm4
+ pxor xmm5, xmm4
+
+ pand xmm1, [GLOBAL(inv_zig_zag)]
+ pand xmm5, [GLOBAL(inv_zig_zag + 16)]
+
+ pmaxsw xmm1, xmm5
+
+ ; now down to 8
+ pshufd xmm5, xmm1, 00001110b
+
+ pmaxsw xmm1, xmm5
+
+ ; only 4 left
+ pshuflw xmm5, xmm1, 00001110b
+
+ pmaxsw xmm1, xmm5
+
+ ; okay, just 2!
+ pshuflw xmm5, xmm1, 00000001b
+
+ pmaxsw xmm1, xmm5
+
+ movd eax, xmm1
+ and eax, 0xff
+ mov [rsi + vp9_blockd_eob], eax
+
+ ; begin epilog
+%if ABI_IS_32BIT
+ pop rsi
+ pop rdi
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ pop rsi
+ pop rdi
+ %endif
+%endif
+
+ RESTORE_GOT
+ pop rbp
+ ret
+
+SECTION_RODATA
+align 16
+inv_zig_zag:
+ dw 0x0001, 0x0002, 0x0006, 0x0007
+ dw 0x0003, 0x0005, 0x0008, 0x000d
+ dw 0x0004, 0x0009, 0x000c, 0x000e
+ dw 0x000a, 0x000b, 0x000f, 0x0010
diff --git a/vp9/encoder/x86/vp9_quantize_sse4.asm b/vp9/encoder/x86/vp9_quantize_sse4.asm
new file mode 100644
index 0000000..1d43ce9
--- /dev/null
+++ b/vp9/encoder/x86/vp9_quantize_sse4.asm
@@ -0,0 +1,254 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+%include "vp9_asm_enc_offsets.asm"
+
+
+; void vp9_regular_quantize_b_sse4 | arg
+; (BLOCK *b, | 0
+; BLOCKD *d) | 1
+
+global sym(vp9_regular_quantize_b_sse4) PRIVATE
+sym(vp9_regular_quantize_b_sse4):
+
+%if ABI_IS_32BIT
+ push rbp
+ mov rbp, rsp
+ GET_GOT rbx
+ push rdi
+ push rsi
+
+ ALIGN_STACK 16, rax
+ %define qcoeff 0 ; 32
+ %define stack_size 32
+ sub rsp, stack_size
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ SAVE_XMM 8, u
+ push rdi
+ push rsi
+ %endif
+%endif
+ ; end prolog
+
+%if ABI_IS_32BIT
+ mov rdi, arg(0) ; BLOCK *b
+ mov rsi, arg(1) ; BLOCKD *d
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ mov rdi, rcx ; BLOCK *b
+ mov rsi, rdx ; BLOCKD *d
+ %else
+ ;mov rdi, rdi ; BLOCK *b
+ ;mov rsi, rsi ; BLOCKD *d
+ %endif
+%endif
+
+ mov rax, [rdi + vp9_block_coeff]
+ mov rcx, [rdi + vp9_block_zbin]
+ mov rdx, [rdi + vp9_block_round]
+ movd xmm7, [rdi + vp9_block_zbin_extra]
+
+ ; z
+ movdqa xmm0, [rax]
+ movdqa xmm1, [rax + 16]
+
+ ; duplicate zbin_oq_value
+ pshuflw xmm7, xmm7, 0
+ punpcklwd xmm7, xmm7
+
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+
+ ; sz
+ psraw xmm0, 15
+ psraw xmm1, 15
+
+ ; (z ^ sz)
+ pxor xmm2, xmm0
+ pxor xmm3, xmm1
+
+ ; x = abs(z)
+ psubw xmm2, xmm0
+ psubw xmm3, xmm1
+
+ ; zbin
+ movdqa xmm4, [rcx]
+ movdqa xmm5, [rcx + 16]
+
+ ; *zbin_ptr + zbin_oq_value
+ paddw xmm4, xmm7
+ paddw xmm5, xmm7
+
+ movdqa xmm6, xmm2
+ movdqa xmm7, xmm3
+
+ ; x - (*zbin_ptr + zbin_oq_value)
+ psubw xmm6, xmm4
+ psubw xmm7, xmm5
+
+ ; round
+ movdqa xmm4, [rdx]
+ movdqa xmm5, [rdx + 16]
+
+ mov rax, [rdi + vp9_block_quant_shift]
+ mov rcx, [rdi + vp9_block_quant]
+ mov rdx, [rdi + vp9_block_zrun_zbin_boost]
+
+ ; x + round
+ paddw xmm2, xmm4
+ paddw xmm3, xmm5
+
+ ; quant
+ movdqa xmm4, [rcx]
+ movdqa xmm5, [rcx + 16]
+
+ ; y = x * quant_ptr >> 16
+ pmulhw xmm4, xmm2
+ pmulhw xmm5, xmm3
+
+ ; y += x
+ paddw xmm2, xmm4
+ paddw xmm3, xmm5
+
+ pxor xmm4, xmm4
+%if ABI_IS_32BIT
+ movdqa [rsp + qcoeff], xmm4
+ movdqa [rsp + qcoeff + 16], xmm4
+%else
+ pxor xmm8, xmm8
+%endif
+
+ ; quant_shift
+ movdqa xmm5, [rax]
+
+ ; zrun_zbin_boost
+ mov rax, rdx
+
+%macro ZIGZAG_LOOP 5
+ ; x
+ pextrw ecx, %4, %2
+
+ ; if (x >= zbin)
+ sub cx, WORD PTR[rdx] ; x - zbin
+ lea rdx, [rdx + 2] ; zbin_boost_ptr++
+ jl .rq_zigzag_loop_%1 ; x < zbin
+
+ pextrw edi, %3, %2 ; y
+
+ ; downshift by quant_shift[rc]
+ pextrb ecx, xmm5, %1 ; quant_shift[rc]
+ sar edi, cl ; also sets Z bit
+ je .rq_zigzag_loop_%1 ; !y
+%if ABI_IS_32BIT
+ mov WORD PTR[rsp + qcoeff + %1 *2], di
+%else
+ pinsrw %5, edi, %2 ; qcoeff[rc]
+%endif
+ mov rdx, rax ; reset to b->zrun_zbin_boost
+.rq_zigzag_loop_%1:
+%endmacro
+; in vp9_default_zig_zag1d order: see vp9/common/vp9_entropy.c
+ZIGZAG_LOOP 0, 0, xmm2, xmm6, xmm4
+ZIGZAG_LOOP 1, 1, xmm2, xmm6, xmm4
+ZIGZAG_LOOP 4, 4, xmm2, xmm6, xmm4
+ZIGZAG_LOOP 8, 0, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 5, 5, xmm2, xmm6, xmm4
+ZIGZAG_LOOP 2, 2, xmm2, xmm6, xmm4
+ZIGZAG_LOOP 3, 3, xmm2, xmm6, xmm4
+ZIGZAG_LOOP 6, 6, xmm2, xmm6, xmm4
+ZIGZAG_LOOP 9, 1, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 12, 4, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 13, 5, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 10, 2, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 7, 7, xmm2, xmm6, xmm4
+ZIGZAG_LOOP 11, 3, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 14, 6, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8
+
+ mov rcx, [rsi + vp9_blockd_dequant]
+ mov rdi, [rsi + vp9_blockd_dqcoeff]
+
+%if ABI_IS_32BIT
+ movdqa xmm4, [rsp + qcoeff]
+ movdqa xmm5, [rsp + qcoeff + 16]
+%else
+ %define xmm5 xmm8
+%endif
+
+ ; y ^ sz
+ pxor xmm4, xmm0
+ pxor xmm5, xmm1
+ ; x = (y ^ sz) - sz
+ psubw xmm4, xmm0
+ psubw xmm5, xmm1
+
+ ; dequant
+ movdqa xmm0, [rcx]
+ movdqa xmm1, [rcx + 16]
+
+ mov rcx, [rsi + vp9_blockd_qcoeff]
+
+ pmullw xmm0, xmm4
+ pmullw xmm1, xmm5
+
+ ; store qcoeff
+ movdqa [rcx], xmm4
+ movdqa [rcx + 16], xmm5
+
+ ; store dqcoeff
+ movdqa [rdi], xmm0
+ movdqa [rdi + 16], xmm1
+
+ ; select the last value (in zig_zag order) for EOB
+ pxor xmm6, xmm6
+ pcmpeqw xmm4, xmm6
+ pcmpeqw xmm5, xmm6
+
+ packsswb xmm4, xmm5
+ pshufb xmm4, [GLOBAL(zig_zag1d)]
+ pmovmskb edx, xmm4
+ xor rdi, rdi
+ mov eax, -1
+ xor dx, ax
+ bsr eax, edx
+ sub edi, edx
+ sar edi, 31
+ add eax, 1
+ and eax, edi
+
+ mov [rsi + vp9_blockd_eob], eax
+
+ ; begin epilog
+%if ABI_IS_32BIT
+ add rsp, stack_size
+ pop rsp
+
+ pop rsi
+ pop rdi
+ RESTORE_GOT
+ pop rbp
+%else
+ %undef xmm5
+ %ifidn __OUTPUT_FORMAT__,x64
+ pop rsi
+ pop rdi
+ RESTORE_XMM
+ %endif
+%endif
+
+ ret
+
+SECTION_RODATA
+align 16
+; vp9/common/vp9_entropy.c: vp9_default_zig_zag1d
+zig_zag1d:
+ db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
diff --git a/vp9/encoder/x86/vp9_quantize_ssse3.asm b/vp9/encoder/x86/vp9_quantize_ssse3.asm
new file mode 100644
index 0000000..41edbc1
--- /dev/null
+++ b/vp9/encoder/x86/vp9_quantize_ssse3.asm
@@ -0,0 +1,138 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+%include "vp9_asm_enc_offsets.asm"
+
+
+; void vp9_fast_quantize_b_ssse3 | arg
+; (BLOCK *b, | 0
+; BLOCKD *d) | 1
+;
+
+global sym(vp9_fast_quantize_b_ssse3) PRIVATE
+sym(vp9_fast_quantize_b_ssse3):
+ push rbp
+ mov rbp, rsp
+ GET_GOT rbx
+
+%if ABI_IS_32BIT
+ push rdi
+ push rsi
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ push rdi
+ push rsi
+ %endif
+%endif
+ ; end prolog
+
+%if ABI_IS_32BIT
+ mov rdi, arg(0) ; BLOCK *b
+ mov rsi, arg(1) ; BLOCKD *d
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ mov rdi, rcx ; BLOCK *b
+ mov rsi, rdx ; BLOCKD *d
+ %else
+ ;mov rdi, rdi ; BLOCK *b
+ ;mov rsi, rsi ; BLOCKD *d
+ %endif
+%endif
+
+ mov rax, [rdi + vp9_block_coeff]
+ mov rcx, [rdi + vp9_block_round]
+ mov rdx, [rdi + vp9_block_quant_fast]
+
+ ; coeff
+ movdqa xmm0, [rax]
+ movdqa xmm4, [rax + 16]
+
+ ; round
+ movdqa xmm2, [rcx]
+ movdqa xmm3, [rcx + 16]
+
+ movdqa xmm1, xmm0
+ movdqa xmm5, xmm4
+
+ ; sz = z >> 15
+ psraw xmm0, 15
+ psraw xmm4, 15
+
+ pabsw xmm1, xmm1
+ pabsw xmm5, xmm5
+
+ paddw xmm1, xmm2
+ paddw xmm5, xmm3
+
+ ; quant_fast
+ pmulhw xmm1, [rdx]
+ pmulhw xmm5, [rdx + 16]
+
+ mov rax, [rsi + vp9_blockd_qcoeff]
+ mov rdi, [rsi + vp9_blockd_dequant]
+ mov rcx, [rsi + vp9_blockd_dqcoeff]
+
+ pxor xmm1, xmm0
+ pxor xmm5, xmm4
+ psubw xmm1, xmm0
+ psubw xmm5, xmm4
+
+ movdqa [rax], xmm1
+ movdqa [rax + 16], xmm5
+
+ movdqa xmm2, [rdi]
+ movdqa xmm3, [rdi + 16]
+
+ pxor xmm4, xmm4
+ pmullw xmm2, xmm1
+ pmullw xmm3, xmm5
+
+ pcmpeqw xmm1, xmm4 ;non zero mask
+ pcmpeqw xmm5, xmm4 ;non zero mask
+ packsswb xmm1, xmm5
+ pshufb xmm1, [GLOBAL(zz_shuf)]
+
+ pmovmskb edx, xmm1
+
+ xor rdi, rdi
+ mov eax, -1
+ xor dx, ax ;flip the bits for bsr
+ bsr eax, edx
+
+ movdqa [rcx], xmm2 ;store dqcoeff
+ movdqa [rcx + 16], xmm3 ;store dqcoeff
+
+ sub edi, edx ;check for all zeros in bit mask
+ sar edi, 31 ;0 or -1
+ add eax, 1
+ and eax, edi ;if the bit mask was all zero,
+ ;then eob = 0
+ mov [rsi + vp9_blockd_eob], eax
+
+ ; begin epilog
+%if ABI_IS_32BIT
+ pop rsi
+ pop rdi
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ pop rsi
+ pop rdi
+ %endif
+%endif
+
+ RESTORE_GOT
+ pop rbp
+ ret
+
+SECTION_RODATA
+align 16
+zz_shuf:
+ db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
diff --git a/vp9/encoder/x86/vp9_quantize_x86.h b/vp9/encoder/x86/vp9_quantize_x86.h
new file mode 100644
index 0000000..d1db173
--- /dev/null
+++ b/vp9/encoder/x86/vp9_quantize_x86.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+#ifndef VP9_ENCODER_X86_VP9_QUANTIZE_X86_H_
+#define VP9_ENCODER_X86_VP9_QUANTIZE_X86_H_
+
+
+/* Note:
+ *
+ * This platform is commonly built for runtime CPU detection. If you modify
+ * any of the function mappings present in this file, be sure to also update
+ * them in the function pointer initialization code
+ */
+#if HAVE_MMX
+
+#endif /* HAVE_MMX */
+
+
+#if HAVE_SSE2
+extern prototype_quantize_block(vp9_regular_quantize_b_sse2);
+#if !CONFIG_RUNTIME_CPU_DETECT
+
+#undef vp9_quantize_quantb
+#define vp9_quantize_quantb vp9_regular_quantize_b_sse2
+#endif /* !CONFIG_RUNTIME_CPU_DETECT */
+
+#endif /* HAVE_SSE2 */
+
+
+#if HAVE_SSE4_1
+extern prototype_quantize_block(vp9_regular_quantize_b_sse4);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+
+#undef vp9_quantize_quantb
+#define vp9_quantize_quantb vp9_regular_quantize_b_sse4
+
+#endif /* !CONFIG_RUNTIME_CPU_DETECT */
+
+#endif /* HAVE_SSE4_1 */
+
+#endif /* QUANTIZE_X86_H */
diff --git a/vp9/encoder/x86/vp9_sad_mmx.asm b/vp9/encoder/x86/vp9_sad_mmx.asm
new file mode 100644
index 0000000..32fdd23
--- /dev/null
+++ b/vp9/encoder/x86/vp9_sad_mmx.asm
@@ -0,0 +1,427 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+global sym(vp9_sad16x16_mmx) PRIVATE
+global sym(vp9_sad8x16_mmx) PRIVATE
+global sym(vp9_sad8x8_mmx) PRIVATE
+global sym(vp9_sad4x4_mmx) PRIVATE
+global sym(vp9_sad16x8_mmx) PRIVATE
+
+;unsigned int vp9_sad16x16_mmx(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride)
+sym(vp9_sad16x16_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ lea rcx, [rsi+rax*8]
+
+ lea rcx, [rcx+rax*8]
+ pxor mm7, mm7
+
+ pxor mm6, mm6
+
+.x16x16sad_mmx_loop:
+
+ movq mm0, QWORD PTR [rsi]
+ movq mm2, QWORD PTR [rsi+8]
+
+ movq mm1, QWORD PTR [rdi]
+ movq mm3, QWORD PTR [rdi+8]
+
+ movq mm4, mm0
+ movq mm5, mm2
+
+ psubusb mm0, mm1
+ psubusb mm1, mm4
+
+ psubusb mm2, mm3
+ psubusb mm3, mm5
+
+ por mm0, mm1
+ por mm2, mm3
+
+ movq mm1, mm0
+ movq mm3, mm2
+
+ punpcklbw mm0, mm6
+ punpcklbw mm2, mm6
+
+ punpckhbw mm1, mm6
+ punpckhbw mm3, mm6
+
+ paddw mm0, mm2
+ paddw mm1, mm3
+
+
+ lea rsi, [rsi+rax]
+ add rdi, rdx
+
+ paddw mm7, mm0
+ paddw mm7, mm1
+
+ cmp rsi, rcx
+ jne .x16x16sad_mmx_loop
+
+
+ movq mm0, mm7
+
+ punpcklwd mm0, mm6
+ punpckhwd mm7, mm6
+
+ paddw mm0, mm7
+ movq mm7, mm0
+
+
+ psrlq mm0, 32
+ paddw mm7, mm0
+
+ movq rax, mm7
+
+ pop rdi
+ pop rsi
+ mov rsp, rbp
+ ; begin epilog
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;unsigned int vp9_sad8x16_mmx(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride)
+sym(vp9_sad8x16_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ lea rcx, [rsi+rax*8]
+
+ lea rcx, [rcx+rax*8]
+ pxor mm7, mm7
+
+ pxor mm6, mm6
+
+.x8x16sad_mmx_loop:
+
+ movq mm0, QWORD PTR [rsi]
+ movq mm1, QWORD PTR [rdi]
+
+ movq mm2, mm0
+ psubusb mm0, mm1
+
+ psubusb mm1, mm2
+ por mm0, mm1
+
+ movq mm2, mm0
+ punpcklbw mm0, mm6
+
+ punpckhbw mm2, mm6
+ lea rsi, [rsi+rax]
+
+ add rdi, rdx
+ paddw mm7, mm0
+
+ paddw mm7, mm2
+ cmp rsi, rcx
+
+ jne .x8x16sad_mmx_loop
+
+ movq mm0, mm7
+ punpcklwd mm0, mm6
+
+ punpckhwd mm7, mm6
+ paddw mm0, mm7
+
+ movq mm7, mm0
+ psrlq mm0, 32
+
+ paddw mm7, mm0
+ movq rax, mm7
+
+ pop rdi
+ pop rsi
+ mov rsp, rbp
+ ; begin epilog
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;unsigned int vp9_sad8x8_mmx(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride)
+sym(vp9_sad8x8_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ lea rcx, [rsi+rax*8]
+ pxor mm7, mm7
+
+ pxor mm6, mm6
+
+.x8x8sad_mmx_loop:
+
+ movq mm0, QWORD PTR [rsi]
+ movq mm1, QWORD PTR [rdi]
+
+ movq mm2, mm0
+ psubusb mm0, mm1
+
+ psubusb mm1, mm2
+ por mm0, mm1
+
+ movq mm2, mm0
+ punpcklbw mm0, mm6
+
+ punpckhbw mm2, mm6
+ paddw mm0, mm2
+
+ lea rsi, [rsi+rax]
+ add rdi, rdx
+
+ paddw mm7, mm0
+ cmp rsi, rcx
+
+ jne .x8x8sad_mmx_loop
+
+ movq mm0, mm7
+ punpcklwd mm0, mm6
+
+ punpckhwd mm7, mm6
+ paddw mm0, mm7
+
+ movq mm7, mm0
+ psrlq mm0, 32
+
+ paddw mm7, mm0
+ movq rax, mm7
+
+ pop rdi
+ pop rsi
+ mov rsp, rbp
+ ; begin epilog
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;unsigned int vp9_sad4x4_mmx(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride)
+sym(vp9_sad4x4_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ movd mm0, DWORD PTR [rsi]
+ movd mm1, DWORD PTR [rdi]
+
+ movd mm2, DWORD PTR [rsi+rax]
+ movd mm3, DWORD PTR [rdi+rdx]
+
+ punpcklbw mm0, mm2
+ punpcklbw mm1, mm3
+
+ movq mm2, mm0
+ psubusb mm0, mm1
+
+ psubusb mm1, mm2
+ por mm0, mm1
+
+ movq mm2, mm0
+ pxor mm3, mm3
+
+ punpcklbw mm0, mm3
+ punpckhbw mm2, mm3
+
+ paddw mm0, mm2
+
+ lea rsi, [rsi+rax*2]
+ lea rdi, [rdi+rdx*2]
+
+ movd mm4, DWORD PTR [rsi]
+ movd mm5, DWORD PTR [rdi]
+
+ movd mm6, DWORD PTR [rsi+rax]
+ movd mm7, DWORD PTR [rdi+rdx]
+
+ punpcklbw mm4, mm6
+ punpcklbw mm5, mm7
+
+ movq mm6, mm4
+ psubusb mm4, mm5
+
+ psubusb mm5, mm6
+ por mm4, mm5
+
+ movq mm5, mm4
+ punpcklbw mm4, mm3
+
+ punpckhbw mm5, mm3
+ paddw mm4, mm5
+
+ paddw mm0, mm4
+ movq mm1, mm0
+
+ punpcklwd mm0, mm3
+ punpckhwd mm1, mm3
+
+ paddw mm0, mm1
+ movq mm1, mm0
+
+ psrlq mm0, 32
+ paddw mm0, mm1
+
+ movq rax, mm0
+
+ pop rdi
+ pop rsi
+ mov rsp, rbp
+ ; begin epilog
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;unsigned int vp9_sad16x8_mmx(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride)
+sym(vp9_sad16x8_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ lea rcx, [rsi+rax*8]
+ pxor mm7, mm7
+
+ pxor mm6, mm6
+
+.x16x8sad_mmx_loop:
+
+ movq mm0, [rsi]
+ movq mm1, [rdi]
+
+ movq mm2, [rsi+8]
+ movq mm3, [rdi+8]
+
+ movq mm4, mm0
+ movq mm5, mm2
+
+ psubusb mm0, mm1
+ psubusb mm1, mm4
+
+ psubusb mm2, mm3
+ psubusb mm3, mm5
+
+ por mm0, mm1
+ por mm2, mm3
+
+ movq mm1, mm0
+ movq mm3, mm2
+
+ punpcklbw mm0, mm6
+ punpckhbw mm1, mm6
+
+ punpcklbw mm2, mm6
+ punpckhbw mm3, mm6
+
+
+ paddw mm0, mm2
+ paddw mm1, mm3
+
+ paddw mm0, mm1
+ lea rsi, [rsi+rax]
+
+ add rdi, rdx
+ paddw mm7, mm0
+
+ cmp rsi, rcx
+ jne .x16x8sad_mmx_loop
+
+ movq mm0, mm7
+ punpcklwd mm0, mm6
+
+ punpckhwd mm7, mm6
+ paddw mm0, mm7
+
+ movq mm7, mm0
+ psrlq mm0, 32
+
+ paddw mm7, mm0
+ movq rax, mm7
+
+ pop rdi
+ pop rsi
+ mov rsp, rbp
+ ; begin epilog
+ UNSHADOW_ARGS
+ pop rbp
+ ret
diff --git a/vp9/encoder/x86/vp9_sad_sse2.asm b/vp9/encoder/x86/vp9_sad_sse2.asm
new file mode 100644
index 0000000..3327163
--- /dev/null
+++ b/vp9/encoder/x86/vp9_sad_sse2.asm
@@ -0,0 +1,410 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;unsigned int vp9_sad16x16_wmt(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride)
+global sym(vp9_sad16x16_wmt) PRIVATE
+sym(vp9_sad16x16_wmt):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ SAVE_XMM 6
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ lea rcx, [rsi+rax*8]
+
+ lea rcx, [rcx+rax*8]
+ pxor xmm6, xmm6
+
+.x16x16sad_wmt_loop:
+
+ movq xmm0, QWORD PTR [rsi]
+ movq xmm2, QWORD PTR [rsi+8]
+
+ movq xmm1, QWORD PTR [rdi]
+ movq xmm3, QWORD PTR [rdi+8]
+
+ movq xmm4, QWORD PTR [rsi+rax]
+ movq xmm5, QWORD PTR [rdi+rdx]
+
+
+ punpcklbw xmm0, xmm2
+ punpcklbw xmm1, xmm3
+
+ psadbw xmm0, xmm1
+ movq xmm2, QWORD PTR [rsi+rax+8]
+
+ movq xmm3, QWORD PTR [rdi+rdx+8]
+ lea rsi, [rsi+rax*2]
+
+ lea rdi, [rdi+rdx*2]
+ punpcklbw xmm4, xmm2
+
+ punpcklbw xmm5, xmm3
+ psadbw xmm4, xmm5
+
+ paddw xmm6, xmm0
+ paddw xmm6, xmm4
+
+ cmp rsi, rcx
+ jne .x16x16sad_wmt_loop
+
+ movq xmm0, xmm6
+ psrldq xmm6, 8
+
+ paddw xmm0, xmm6
+ movq rax, xmm0
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;unsigned int vp9_sad8x16_wmt(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride,
+; int max_err)
+global sym(vp9_sad8x16_wmt) PRIVATE
+sym(vp9_sad8x16_wmt):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ push rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ movsxd rbx, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ lea rcx, [rsi+rbx*8]
+
+ lea rcx, [rcx+rbx*8]
+ pxor mm7, mm7
+
+.x8x16sad_wmt_loop:
+
+ movq rax, mm7
+ cmp eax, arg(4)
+ jg .x8x16sad_wmt_early_exit
+
+ movq mm0, QWORD PTR [rsi]
+ movq mm1, QWORD PTR [rdi]
+
+ movq mm2, QWORD PTR [rsi+rbx]
+ movq mm3, QWORD PTR [rdi+rdx]
+
+ psadbw mm0, mm1
+ psadbw mm2, mm3
+
+ lea rsi, [rsi+rbx*2]
+ lea rdi, [rdi+rdx*2]
+
+ paddw mm7, mm0
+ paddw mm7, mm2
+
+ cmp rsi, rcx
+ jne .x8x16sad_wmt_loop
+
+ movq rax, mm7
+
+.x8x16sad_wmt_early_exit:
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ pop rbx
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;unsigned int vp9_sad8x8_wmt(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride)
+global sym(vp9_sad8x8_wmt) PRIVATE
+sym(vp9_sad8x8_wmt):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ push rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ movsxd rbx, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ lea rcx, [rsi+rbx*8]
+ pxor mm7, mm7
+
+.x8x8sad_wmt_loop:
+
+ movq rax, mm7
+ cmp eax, arg(4)
+ jg .x8x8sad_wmt_early_exit
+
+ movq mm0, QWORD PTR [rsi]
+ movq mm1, QWORD PTR [rdi]
+
+ psadbw mm0, mm1
+ lea rsi, [rsi+rbx]
+
+ add rdi, rdx
+ paddw mm7, mm0
+
+ cmp rsi, rcx
+ jne .x8x8sad_wmt_loop
+
+ movq rax, mm7
+.x8x8sad_wmt_early_exit:
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ pop rbx
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;unsigned int vp9_sad4x4_wmt(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride)
+global sym(vp9_sad4x4_wmt) PRIVATE
+sym(vp9_sad4x4_wmt):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ movd mm0, DWORD PTR [rsi]
+ movd mm1, DWORD PTR [rdi]
+
+ movd mm2, DWORD PTR [rsi+rax]
+ movd mm3, DWORD PTR [rdi+rdx]
+
+ punpcklbw mm0, mm2
+ punpcklbw mm1, mm3
+
+ psadbw mm0, mm1
+ lea rsi, [rsi+rax*2]
+
+ lea rdi, [rdi+rdx*2]
+ movd mm4, DWORD PTR [rsi]
+
+ movd mm5, DWORD PTR [rdi]
+ movd mm6, DWORD PTR [rsi+rax]
+
+ movd mm7, DWORD PTR [rdi+rdx]
+ punpcklbw mm4, mm6
+
+ punpcklbw mm5, mm7
+ psadbw mm4, mm5
+
+ paddw mm0, mm4
+ movq rax, mm0
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;unsigned int vp9_sad16x8_wmt(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride)
+global sym(vp9_sad16x8_wmt) PRIVATE
+sym(vp9_sad16x8_wmt):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ push rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ movsxd rbx, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ lea rcx, [rsi+rbx*8]
+ pxor mm7, mm7
+
+.x16x8sad_wmt_loop:
+
+ movq rax, mm7
+ cmp eax, arg(4)
+ jg .x16x8sad_wmt_early_exit
+
+ movq mm0, QWORD PTR [rsi]
+ movq mm2, QWORD PTR [rsi+8]
+
+ movq mm1, QWORD PTR [rdi]
+ movq mm3, QWORD PTR [rdi+8]
+
+ movq mm4, QWORD PTR [rsi+rbx]
+ movq mm5, QWORD PTR [rdi+rdx]
+
+ psadbw mm0, mm1
+ psadbw mm2, mm3
+
+ movq mm1, QWORD PTR [rsi+rbx+8]
+ movq mm3, QWORD PTR [rdi+rdx+8]
+
+ psadbw mm4, mm5
+ psadbw mm1, mm3
+
+ lea rsi, [rsi+rbx*2]
+ lea rdi, [rdi+rdx*2]
+
+ paddw mm0, mm2
+ paddw mm4, mm1
+
+ paddw mm7, mm0
+ paddw mm7, mm4
+
+ cmp rsi, rcx
+ jne .x16x8sad_wmt_loop
+
+ movq rax, mm7
+
+.x16x8sad_wmt_early_exit:
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ pop rbx
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp9_copy32xn_sse2(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *dst_ptr,
+; int dst_stride,
+; int height);
+global sym(vp9_copy32xn_sse2) PRIVATE
+sym(vp9_copy32xn_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;dst_ptr
+
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;dst_stride
+ movsxd rcx, dword ptr arg(4) ;height
+
+.block_copy_sse2_loopx4:
+ movdqu xmm0, XMMWORD PTR [rsi]
+ movdqu xmm1, XMMWORD PTR [rsi + 16]
+ movdqu xmm2, XMMWORD PTR [rsi + rax]
+ movdqu xmm3, XMMWORD PTR [rsi + rax + 16]
+
+ lea rsi, [rsi+rax*2]
+
+ movdqu xmm4, XMMWORD PTR [rsi]
+ movdqu xmm5, XMMWORD PTR [rsi + 16]
+ movdqu xmm6, XMMWORD PTR [rsi + rax]
+ movdqu xmm7, XMMWORD PTR [rsi + rax + 16]
+
+ lea rsi, [rsi+rax*2]
+
+ movdqa XMMWORD PTR [rdi], xmm0
+ movdqa XMMWORD PTR [rdi + 16], xmm1
+ movdqa XMMWORD PTR [rdi + rdx], xmm2
+ movdqa XMMWORD PTR [rdi + rdx + 16], xmm3
+
+ lea rdi, [rdi+rdx*2]
+
+ movdqa XMMWORD PTR [rdi], xmm4
+ movdqa XMMWORD PTR [rdi + 16], xmm5
+ movdqa XMMWORD PTR [rdi + rdx], xmm6
+ movdqa XMMWORD PTR [rdi + rdx + 16], xmm7
+
+ lea rdi, [rdi+rdx*2]
+
+ sub rcx, 4
+ cmp rcx, 4
+ jge .block_copy_sse2_loopx4
+
+ cmp rcx, 0
+ je .copy_is_done
+
+.block_copy_sse2_loop:
+ movdqu xmm0, XMMWORD PTR [rsi]
+ movdqu xmm1, XMMWORD PTR [rsi + 16]
+ lea rsi, [rsi+rax]
+
+ movdqa XMMWORD PTR [rdi], xmm0
+ movdqa XMMWORD PTR [rdi + 16], xmm1
+ lea rdi, [rdi+rdx]
+
+ sub rcx, 1
+ jne .block_copy_sse2_loop
+
+.copy_is_done:
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
diff --git a/vp9/encoder/x86/vp9_sad_sse3.asm b/vp9/encoder/x86/vp9_sad_sse3.asm
new file mode 100644
index 0000000..2c409cb
--- /dev/null
+++ b/vp9/encoder/x86/vp9_sad_sse3.asm
@@ -0,0 +1,960 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%include "vpx_ports/x86_abi_support.asm"
+
+%macro STACK_FRAME_CREATE_X3 0
+%if ABI_IS_32BIT
+ %define src_ptr rsi
+ %define src_stride rax
+ %define ref_ptr rdi
+ %define ref_stride rdx
+ %define end_ptr rcx
+ %define ret_var rbx
+ %define result_ptr arg(4)
+ %define max_err arg(4)
+ %define height dword ptr arg(4)
+ push rbp
+ mov rbp, rsp
+ push rsi
+ push rdi
+ push rbx
+
+ mov rsi, arg(0) ; src_ptr
+ mov rdi, arg(2) ; ref_ptr
+
+ movsxd rax, dword ptr arg(1) ; src_stride
+ movsxd rdx, dword ptr arg(3) ; ref_stride
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ SAVE_XMM 7, u
+ %define src_ptr rcx
+ %define src_stride rdx
+ %define ref_ptr r8
+ %define ref_stride r9
+ %define end_ptr r10
+ %define ret_var r11
+ %define result_ptr [rsp+xmm_stack_space+8+4*8]
+ %define max_err [rsp+xmm_stack_space+8+4*8]
+ %define height dword ptr [rsp+xmm_stack_space+8+4*8]
+ %else
+ %define src_ptr rdi
+ %define src_stride rsi
+ %define ref_ptr rdx
+ %define ref_stride rcx
+ %define end_ptr r9
+ %define ret_var r10
+ %define result_ptr r8
+ %define max_err r8
+ %define height r8
+ %endif
+%endif
+
+%endmacro
+
+%macro STACK_FRAME_DESTROY_X3 0
+ %define src_ptr
+ %define src_stride
+ %define ref_ptr
+ %define ref_stride
+ %define end_ptr
+ %define ret_var
+ %define result_ptr
+ %define max_err
+ %define height
+
+%if ABI_IS_32BIT
+ pop rbx
+ pop rdi
+ pop rsi
+ pop rbp
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ RESTORE_XMM
+ %endif
+%endif
+ ret
+%endmacro
+
+%macro STACK_FRAME_CREATE_X4 0
+%if ABI_IS_32BIT
+ %define src_ptr rsi
+ %define src_stride rax
+ %define r0_ptr rcx
+ %define r1_ptr rdx
+ %define r2_ptr rbx
+ %define r3_ptr rdi
+ %define ref_stride rbp
+ %define result_ptr arg(4)
+ push rbp
+ mov rbp, rsp
+ push rsi
+ push rdi
+ push rbx
+
+ push rbp
+ mov rdi, arg(2) ; ref_ptr_base
+
+ LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
+
+ mov rsi, arg(0) ; src_ptr
+
+ movsxd rbx, dword ptr arg(1) ; src_stride
+ movsxd rbp, dword ptr arg(3) ; ref_stride
+
+ xchg rbx, rax
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ SAVE_XMM 7, u
+ %define src_ptr rcx
+ %define src_stride rdx
+ %define r0_ptr rsi
+ %define r1_ptr r10
+ %define r2_ptr r11
+ %define r3_ptr r8
+ %define ref_stride r9
+ %define result_ptr [rsp+xmm_stack_space+16+4*8]
+ push rsi
+
+ LOAD_X4_ADDRESSES r8, r0_ptr, r1_ptr, r2_ptr, r3_ptr
+ %else
+ %define src_ptr rdi
+ %define src_stride rsi
+ %define r0_ptr r9
+ %define r1_ptr r10
+ %define r2_ptr r11
+ %define r3_ptr rdx
+ %define ref_stride rcx
+ %define result_ptr r8
+
+ LOAD_X4_ADDRESSES rdx, r0_ptr, r1_ptr, r2_ptr, r3_ptr
+
+ %endif
+%endif
+%endmacro
+
+%macro STACK_FRAME_DESTROY_X4 0
+ %define src_ptr
+ %define src_stride
+ %define r0_ptr
+ %define r1_ptr
+ %define r2_ptr
+ %define r3_ptr
+ %define ref_stride
+ %define result_ptr
+
+%if ABI_IS_32BIT
+ pop rbx
+ pop rdi
+ pop rsi
+ pop rbp
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ pop rsi
+ RESTORE_XMM
+ %endif
+%endif
+ ret
+%endmacro
+
+%macro PROCESS_16X2X3 5
+%if %1==0
+ movdqa xmm0, XMMWORD PTR [%2]
+ lddqu xmm5, XMMWORD PTR [%3]
+ lddqu xmm6, XMMWORD PTR [%3+1]
+ lddqu xmm7, XMMWORD PTR [%3+2]
+
+ psadbw xmm5, xmm0
+ psadbw xmm6, xmm0
+ psadbw xmm7, xmm0
+%else
+ movdqa xmm0, XMMWORD PTR [%2]
+ lddqu xmm1, XMMWORD PTR [%3]
+ lddqu xmm2, XMMWORD PTR [%3+1]
+ lddqu xmm3, XMMWORD PTR [%3+2]
+
+ psadbw xmm1, xmm0
+ psadbw xmm2, xmm0
+ psadbw xmm3, xmm0
+
+ paddw xmm5, xmm1
+ paddw xmm6, xmm2
+ paddw xmm7, xmm3
+%endif
+ movdqa xmm0, XMMWORD PTR [%2+%4]
+ lddqu xmm1, XMMWORD PTR [%3+%5]
+ lddqu xmm2, XMMWORD PTR [%3+%5+1]
+ lddqu xmm3, XMMWORD PTR [%3+%5+2]
+
+%if %1==0 || %1==1
+ lea %2, [%2+%4*2]
+ lea %3, [%3+%5*2]
+%endif
+
+ psadbw xmm1, xmm0
+ psadbw xmm2, xmm0
+ psadbw xmm3, xmm0
+
+ paddw xmm5, xmm1
+ paddw xmm6, xmm2
+ paddw xmm7, xmm3
+%endmacro
+
+%macro PROCESS_8X2X3 5
+%if %1==0
+ movq mm0, QWORD PTR [%2]
+ movq mm5, QWORD PTR [%3]
+ movq mm6, QWORD PTR [%3+1]
+ movq mm7, QWORD PTR [%3+2]
+
+ psadbw mm5, mm0
+ psadbw mm6, mm0
+ psadbw mm7, mm0
+%else
+ movq mm0, QWORD PTR [%2]
+ movq mm1, QWORD PTR [%3]
+ movq mm2, QWORD PTR [%3+1]
+ movq mm3, QWORD PTR [%3+2]
+
+ psadbw mm1, mm0
+ psadbw mm2, mm0
+ psadbw mm3, mm0
+
+ paddw mm5, mm1
+ paddw mm6, mm2
+ paddw mm7, mm3
+%endif
+ movq mm0, QWORD PTR [%2+%4]
+ movq mm1, QWORD PTR [%3+%5]
+ movq mm2, QWORD PTR [%3+%5+1]
+ movq mm3, QWORD PTR [%3+%5+2]
+
+%if %1==0 || %1==1
+ lea %2, [%2+%4*2]
+ lea %3, [%3+%5*2]
+%endif
+
+ psadbw mm1, mm0
+ psadbw mm2, mm0
+ psadbw mm3, mm0
+
+ paddw mm5, mm1
+ paddw mm6, mm2
+ paddw mm7, mm3
+%endmacro
+
+%macro LOAD_X4_ADDRESSES 5
+ mov %2, [%1+REG_SZ_BYTES*0]
+ mov %3, [%1+REG_SZ_BYTES*1]
+
+ mov %4, [%1+REG_SZ_BYTES*2]
+ mov %5, [%1+REG_SZ_BYTES*3]
+%endmacro
+
+%macro PROCESS_16X2X4 8
+%if %1==0
+ movdqa xmm0, XMMWORD PTR [%2]
+ lddqu xmm4, XMMWORD PTR [%3]
+ lddqu xmm5, XMMWORD PTR [%4]
+ lddqu xmm6, XMMWORD PTR [%5]
+ lddqu xmm7, XMMWORD PTR [%6]
+
+ psadbw xmm4, xmm0
+ psadbw xmm5, xmm0
+ psadbw xmm6, xmm0
+ psadbw xmm7, xmm0
+%else
+ movdqa xmm0, XMMWORD PTR [%2]
+ lddqu xmm1, XMMWORD PTR [%3]
+ lddqu xmm2, XMMWORD PTR [%4]
+ lddqu xmm3, XMMWORD PTR [%5]
+
+ psadbw xmm1, xmm0
+ psadbw xmm2, xmm0
+ psadbw xmm3, xmm0
+
+ paddw xmm4, xmm1
+ lddqu xmm1, XMMWORD PTR [%6]
+ paddw xmm5, xmm2
+ paddw xmm6, xmm3
+
+ psadbw xmm1, xmm0
+ paddw xmm7, xmm1
+%endif
+ movdqa xmm0, XMMWORD PTR [%2+%7]
+ lddqu xmm1, XMMWORD PTR [%3+%8]
+ lddqu xmm2, XMMWORD PTR [%4+%8]
+ lddqu xmm3, XMMWORD PTR [%5+%8]
+
+ psadbw xmm1, xmm0
+ psadbw xmm2, xmm0
+ psadbw xmm3, xmm0
+
+ paddw xmm4, xmm1
+ lddqu xmm1, XMMWORD PTR [%6+%8]
+ paddw xmm5, xmm2
+ paddw xmm6, xmm3
+
+%if %1==0 || %1==1
+ lea %2, [%2+%7*2]
+ lea %3, [%3+%8*2]
+
+ lea %4, [%4+%8*2]
+ lea %5, [%5+%8*2]
+
+ lea %6, [%6+%8*2]
+%endif
+ psadbw xmm1, xmm0
+ paddw xmm7, xmm1
+
+%endmacro
+
+%macro PROCESS_8X2X4 8
+%if %1==0
+ movq mm0, QWORD PTR [%2]
+ movq mm4, QWORD PTR [%3]
+ movq mm5, QWORD PTR [%4]
+ movq mm6, QWORD PTR [%5]
+ movq mm7, QWORD PTR [%6]
+
+ psadbw mm4, mm0
+ psadbw mm5, mm0
+ psadbw mm6, mm0
+ psadbw mm7, mm0
+%else
+ movq mm0, QWORD PTR [%2]
+ movq mm1, QWORD PTR [%3]
+ movq mm2, QWORD PTR [%4]
+ movq mm3, QWORD PTR [%5]
+
+ psadbw mm1, mm0
+ psadbw mm2, mm0
+ psadbw mm3, mm0
+
+ paddw mm4, mm1
+ movq mm1, QWORD PTR [%6]
+ paddw mm5, mm2
+ paddw mm6, mm3
+
+ psadbw mm1, mm0
+ paddw mm7, mm1
+%endif
+ movq mm0, QWORD PTR [%2+%7]
+ movq mm1, QWORD PTR [%3+%8]
+ movq mm2, QWORD PTR [%4+%8]
+ movq mm3, QWORD PTR [%5+%8]
+
+ psadbw mm1, mm0
+ psadbw mm2, mm0
+ psadbw mm3, mm0
+
+ paddw mm4, mm1
+ movq mm1, QWORD PTR [%6+%8]
+ paddw mm5, mm2
+ paddw mm6, mm3
+
+%if %1==0 || %1==1
+ lea %2, [%2+%7*2]
+ lea %3, [%3+%8*2]
+
+ lea %4, [%4+%8*2]
+ lea %5, [%5+%8*2]
+
+ lea %6, [%6+%8*2]
+%endif
+ psadbw mm1, mm0
+ paddw mm7, mm1
+
+%endmacro
+
+;void int vp9_sad16x16x3_sse3(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride,
+; int *results)
+global sym(vp9_sad16x16x3_sse3) PRIVATE
+sym(vp9_sad16x16x3_sse3):
+
+ STACK_FRAME_CREATE_X3
+
+ PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
+ PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+ PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+ PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+ PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+ PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+ PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+ PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
+
+ mov rcx, result_ptr
+
+ movq xmm0, xmm5
+ psrldq xmm5, 8
+
+ paddw xmm0, xmm5
+ movd [rcx], xmm0
+;-
+ movq xmm0, xmm6
+ psrldq xmm6, 8
+
+ paddw xmm0, xmm6
+ movd [rcx+4], xmm0
+;-
+ movq xmm0, xmm7
+ psrldq xmm7, 8
+
+ paddw xmm0, xmm7
+ movd [rcx+8], xmm0
+
+ STACK_FRAME_DESTROY_X3
+
+;void int vp9_sad16x8x3_sse3(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride,
+; int *results)
+global sym(vp9_sad16x8x3_sse3) PRIVATE
+sym(vp9_sad16x8x3_sse3):
+
+ STACK_FRAME_CREATE_X3
+
+ PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
+ PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+ PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+ PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
+
+ mov rcx, result_ptr
+
+ movq xmm0, xmm5
+ psrldq xmm5, 8
+
+ paddw xmm0, xmm5
+ movd [rcx], xmm0
+;-
+ movq xmm0, xmm6
+ psrldq xmm6, 8
+
+ paddw xmm0, xmm6
+ movd [rcx+4], xmm0
+;-
+ movq xmm0, xmm7
+ psrldq xmm7, 8
+
+ paddw xmm0, xmm7
+ movd [rcx+8], xmm0
+
+ STACK_FRAME_DESTROY_X3
+
+;void int vp9_sad8x16x3_sse3(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride,
+; int *results)
+global sym(vp9_sad8x16x3_sse3) PRIVATE
+sym(vp9_sad8x16x3_sse3):
+
+ STACK_FRAME_CREATE_X3
+
+ PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
+ PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+ PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+ PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+ PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+ PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+ PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+ PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
+
+ mov rcx, result_ptr
+
+ punpckldq mm5, mm6
+
+ movq [rcx], mm5
+ movd [rcx+8], mm7
+
+ STACK_FRAME_DESTROY_X3
+
+;void int vp9_sad8x8x3_sse3(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride,
+; int *results)
+global sym(vp9_sad8x8x3_sse3) PRIVATE
+sym(vp9_sad8x8x3_sse3):
+
+ STACK_FRAME_CREATE_X3
+
+ PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
+ PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+ PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+ PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
+
+ mov rcx, result_ptr
+
+ punpckldq mm5, mm6
+
+ movq [rcx], mm5
+ movd [rcx+8], mm7
+
+ STACK_FRAME_DESTROY_X3
+
+;void int vp9_sad4x4x3_sse3(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride,
+; int *results)
+global sym(vp9_sad4x4x3_sse3) PRIVATE
+sym(vp9_sad4x4x3_sse3):
+
+ STACK_FRAME_CREATE_X3
+
+ movd mm0, DWORD PTR [src_ptr]
+ movd mm1, DWORD PTR [ref_ptr]
+
+ movd mm2, DWORD PTR [src_ptr+src_stride]
+ movd mm3, DWORD PTR [ref_ptr+ref_stride]
+
+ punpcklbw mm0, mm2
+ punpcklbw mm1, mm3
+
+ movd mm4, DWORD PTR [ref_ptr+1]
+ movd mm5, DWORD PTR [ref_ptr+2]
+
+ movd mm2, DWORD PTR [ref_ptr+ref_stride+1]
+ movd mm3, DWORD PTR [ref_ptr+ref_stride+2]
+
+ psadbw mm1, mm0
+
+ punpcklbw mm4, mm2
+ punpcklbw mm5, mm3
+
+ psadbw mm4, mm0
+ psadbw mm5, mm0
+
+ lea src_ptr, [src_ptr+src_stride*2]
+ lea ref_ptr, [ref_ptr+ref_stride*2]
+
+ movd mm0, DWORD PTR [src_ptr]
+ movd mm2, DWORD PTR [ref_ptr]
+
+ movd mm3, DWORD PTR [src_ptr+src_stride]
+ movd mm6, DWORD PTR [ref_ptr+ref_stride]
+
+ punpcklbw mm0, mm3
+ punpcklbw mm2, mm6
+
+ movd mm3, DWORD PTR [ref_ptr+1]
+ movd mm7, DWORD PTR [ref_ptr+2]
+
+ psadbw mm2, mm0
+
+ paddw mm1, mm2
+
+ movd mm2, DWORD PTR [ref_ptr+ref_stride+1]
+ movd mm6, DWORD PTR [ref_ptr+ref_stride+2]
+
+ punpcklbw mm3, mm2
+ punpcklbw mm7, mm6
+
+ psadbw mm3, mm0
+ psadbw mm7, mm0
+
+ paddw mm3, mm4
+ paddw mm7, mm5
+
+ mov rcx, result_ptr
+
+ punpckldq mm1, mm3
+
+ movq [rcx], mm1
+ movd [rcx+8], mm7
+
+ STACK_FRAME_DESTROY_X3
+
+;unsigned int vp9_sad16x16_sse3(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride,
+; int max_err)
+;%define lddqu movdqu
+global sym(vp9_sad16x16_sse3) PRIVATE
+sym(vp9_sad16x16_sse3):
+
+ STACK_FRAME_CREATE_X3
+
+ mov end_ptr, 4
+ pxor xmm7, xmm7
+
+.vp9_sad16x16_sse3_loop:
+ movdqa xmm0, XMMWORD PTR [src_ptr]
+ movdqu xmm1, XMMWORD PTR [ref_ptr]
+ movdqa xmm2, XMMWORD PTR [src_ptr+src_stride]
+ movdqu xmm3, XMMWORD PTR [ref_ptr+ref_stride]
+
+ lea src_ptr, [src_ptr+src_stride*2]
+ lea ref_ptr, [ref_ptr+ref_stride*2]
+
+ movdqa xmm4, XMMWORD PTR [src_ptr]
+ movdqu xmm5, XMMWORD PTR [ref_ptr]
+ movdqa xmm6, XMMWORD PTR [src_ptr+src_stride]
+
+ psadbw xmm0, xmm1
+
+ movdqu xmm1, XMMWORD PTR [ref_ptr+ref_stride]
+
+ psadbw xmm2, xmm3
+ psadbw xmm4, xmm5
+ psadbw xmm6, xmm1
+
+ lea src_ptr, [src_ptr+src_stride*2]
+ lea ref_ptr, [ref_ptr+ref_stride*2]
+
+ paddw xmm7, xmm0
+ paddw xmm7, xmm2
+ paddw xmm7, xmm4
+ paddw xmm7, xmm6
+
+ sub end_ptr, 1
+ jne .vp9_sad16x16_sse3_loop
+
+ movq xmm0, xmm7
+ psrldq xmm7, 8
+ paddw xmm0, xmm7
+ movq rax, xmm0
+
+ STACK_FRAME_DESTROY_X3
+
+;void vp9_copy32xn_sse3(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *dst_ptr,
+; int dst_stride,
+; int height);
+global sym(vp9_copy32xn_sse3) PRIVATE
+sym(vp9_copy32xn_sse3):
+
+ STACK_FRAME_CREATE_X3
+
+.block_copy_sse3_loopx4:
+ lea end_ptr, [src_ptr+src_stride*2]
+
+ movdqu xmm0, XMMWORD PTR [src_ptr]
+ movdqu xmm1, XMMWORD PTR [src_ptr + 16]
+ movdqu xmm2, XMMWORD PTR [src_ptr + src_stride]
+ movdqu xmm3, XMMWORD PTR [src_ptr + src_stride + 16]
+ movdqu xmm4, XMMWORD PTR [end_ptr]
+ movdqu xmm5, XMMWORD PTR [end_ptr + 16]
+ movdqu xmm6, XMMWORD PTR [end_ptr + src_stride]
+ movdqu xmm7, XMMWORD PTR [end_ptr + src_stride + 16]
+
+ lea src_ptr, [src_ptr+src_stride*4]
+
+ lea end_ptr, [ref_ptr+ref_stride*2]
+
+ movdqa XMMWORD PTR [ref_ptr], xmm0
+ movdqa XMMWORD PTR [ref_ptr + 16], xmm1
+ movdqa XMMWORD PTR [ref_ptr + ref_stride], xmm2
+ movdqa XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3
+ movdqa XMMWORD PTR [end_ptr], xmm4
+ movdqa XMMWORD PTR [end_ptr + 16], xmm5
+ movdqa XMMWORD PTR [end_ptr + ref_stride], xmm6
+ movdqa XMMWORD PTR [end_ptr + ref_stride + 16], xmm7
+
+ lea ref_ptr, [ref_ptr+ref_stride*4]
+
+ sub height, 4
+ cmp height, 4
+ jge .block_copy_sse3_loopx4
+
+ ;Check to see if there is more rows need to be copied.
+ cmp height, 0
+ je .copy_is_done
+
+.block_copy_sse3_loop:
+ movdqu xmm0, XMMWORD PTR [src_ptr]
+ movdqu xmm1, XMMWORD PTR [src_ptr + 16]
+ lea src_ptr, [src_ptr+src_stride]
+
+ movdqa XMMWORD PTR [ref_ptr], xmm0
+ movdqa XMMWORD PTR [ref_ptr + 16], xmm1
+ lea ref_ptr, [ref_ptr+ref_stride]
+
+ sub height, 1
+ jne .block_copy_sse3_loop
+
+.copy_is_done:
+ STACK_FRAME_DESTROY_X3
+
+;void vp9_sad16x16x4d_sse3(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr_base,
+; int ref_stride,
+; int *results)
+global sym(vp9_sad16x16x4d_sse3) PRIVATE
+sym(vp9_sad16x16x4d_sse3):
+
+ STACK_FRAME_CREATE_X4
+
+ PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+ PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+ PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+ PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+ PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+ PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+ PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+ PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+
+%if ABI_IS_32BIT
+ pop rbp
+%endif
+ mov rcx, result_ptr
+
+ movq xmm0, xmm4
+ psrldq xmm4, 8
+
+ paddw xmm0, xmm4
+ movd [rcx], xmm0
+;-
+ movq xmm0, xmm5
+ psrldq xmm5, 8
+
+ paddw xmm0, xmm5
+ movd [rcx+4], xmm0
+;-
+ movq xmm0, xmm6
+ psrldq xmm6, 8
+
+ paddw xmm0, xmm6
+ movd [rcx+8], xmm0
+;-
+ movq xmm0, xmm7
+ psrldq xmm7, 8
+
+ paddw xmm0, xmm7
+ movd [rcx+12], xmm0
+
+ STACK_FRAME_DESTROY_X4
+
+;void vp9_sad16x8x4d_sse3(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr_base,
+; int ref_stride,
+; int *results)
+global sym(vp9_sad16x8x4d_sse3) PRIVATE
+sym(vp9_sad16x8x4d_sse3):
+
+ STACK_FRAME_CREATE_X4
+
+ PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+ PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+ PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+ PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+
+%if ABI_IS_32BIT
+ pop rbp
+%endif
+ mov rcx, result_ptr
+
+ movq xmm0, xmm4
+ psrldq xmm4, 8
+
+ paddw xmm0, xmm4
+ movd [rcx], xmm0
+;-
+ movq xmm0, xmm5
+ psrldq xmm5, 8
+
+ paddw xmm0, xmm5
+ movd [rcx+4], xmm0
+;-
+ movq xmm0, xmm6
+ psrldq xmm6, 8
+
+ paddw xmm0, xmm6
+ movd [rcx+8], xmm0
+;-
+ movq xmm0, xmm7
+ psrldq xmm7, 8
+
+ paddw xmm0, xmm7
+ movd [rcx+12], xmm0
+
+ STACK_FRAME_DESTROY_X4
+
+;void int vp9_sad8x16x4d_sse3(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride,
+; int *results)
+global sym(vp9_sad8x16x4d_sse3) PRIVATE
+sym(vp9_sad8x16x4d_sse3):
+
+ STACK_FRAME_CREATE_X4
+
+ PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+ PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+ PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+ PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+ PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+ PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+ PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+ PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+
+%if ABI_IS_32BIT
+ pop rbp
+%endif
+ mov rcx, result_ptr
+
+ punpckldq mm4, mm5
+ punpckldq mm6, mm7
+
+ movq [rcx], mm4
+ movq [rcx+8], mm6
+
+ STACK_FRAME_DESTROY_X4
+
+;void int vp9_sad8x8x4d_sse3(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride,
+; int *results)
+global sym(vp9_sad8x8x4d_sse3) PRIVATE
+sym(vp9_sad8x8x4d_sse3):
+
+ STACK_FRAME_CREATE_X4
+
+ PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+ PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+ PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+ PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+
+%if ABI_IS_32BIT
+ pop rbp
+%endif
+ mov rcx, result_ptr
+
+ punpckldq mm4, mm5
+ punpckldq mm6, mm7
+
+ movq [rcx], mm4
+ movq [rcx+8], mm6
+
+ STACK_FRAME_DESTROY_X4
+
+;void int vp9_sad4x4x4d_sse3(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride,
+; int *results)
+global sym(vp9_sad4x4x4d_sse3) PRIVATE
+sym(vp9_sad4x4x4d_sse3):
+
+ STACK_FRAME_CREATE_X4
+
+ movd mm0, DWORD PTR [src_ptr]
+ movd mm1, DWORD PTR [r0_ptr]
+
+ movd mm2, DWORD PTR [src_ptr+src_stride]
+ movd mm3, DWORD PTR [r0_ptr+ref_stride]
+
+ punpcklbw mm0, mm2
+ punpcklbw mm1, mm3
+
+ movd mm4, DWORD PTR [r1_ptr]
+ movd mm5, DWORD PTR [r2_ptr]
+
+ movd mm6, DWORD PTR [r3_ptr]
+ movd mm2, DWORD PTR [r1_ptr+ref_stride]
+
+ movd mm3, DWORD PTR [r2_ptr+ref_stride]
+ movd mm7, DWORD PTR [r3_ptr+ref_stride]
+
+ psadbw mm1, mm0
+
+ punpcklbw mm4, mm2
+ punpcklbw mm5, mm3
+
+ punpcklbw mm6, mm7
+ psadbw mm4, mm0
+
+ psadbw mm5, mm0
+ psadbw mm6, mm0
+
+
+
+ lea src_ptr, [src_ptr+src_stride*2]
+ lea r0_ptr, [r0_ptr+ref_stride*2]
+
+ lea r1_ptr, [r1_ptr+ref_stride*2]
+ lea r2_ptr, [r2_ptr+ref_stride*2]
+
+ lea r3_ptr, [r3_ptr+ref_stride*2]
+
+ movd mm0, DWORD PTR [src_ptr]
+ movd mm2, DWORD PTR [r0_ptr]
+
+ movd mm3, DWORD PTR [src_ptr+src_stride]
+ movd mm7, DWORD PTR [r0_ptr+ref_stride]
+
+ punpcklbw mm0, mm3
+ punpcklbw mm2, mm7
+
+ movd mm3, DWORD PTR [r1_ptr]
+ movd mm7, DWORD PTR [r2_ptr]
+
+ psadbw mm2, mm0
+%if ABI_IS_32BIT
+ mov rax, rbp
+
+ pop rbp
+%define ref_stride rax
+%endif
+ mov rsi, result_ptr
+
+ paddw mm1, mm2
+ movd [rsi], mm1
+
+ movd mm2, DWORD PTR [r1_ptr+ref_stride]
+ movd mm1, DWORD PTR [r2_ptr+ref_stride]
+
+ punpcklbw mm3, mm2
+ punpcklbw mm7, mm1
+
+ psadbw mm3, mm0
+ psadbw mm7, mm0
+
+ movd mm2, DWORD PTR [r3_ptr]
+ movd mm1, DWORD PTR [r3_ptr+ref_stride]
+
+ paddw mm3, mm4
+ paddw mm7, mm5
+
+ movd [rsi+4], mm3
+ punpcklbw mm2, mm1
+
+ movd [rsi+8], mm7
+ psadbw mm2, mm0
+
+ paddw mm2, mm6
+ movd [rsi+12], mm2
+
+
+ STACK_FRAME_DESTROY_X4
+
diff --git a/vp9/encoder/x86/vp9_sad_sse4.asm b/vp9/encoder/x86/vp9_sad_sse4.asm
new file mode 100644
index 0000000..b42982a
--- /dev/null
+++ b/vp9/encoder/x86/vp9_sad_sse4.asm
@@ -0,0 +1,353 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+%macro PROCESS_16X2X8 1
+%if %1
+ movdqa xmm0, XMMWORD PTR [rsi]
+ movq xmm1, MMWORD PTR [rdi]
+ movq xmm3, MMWORD PTR [rdi+8]
+ movq xmm2, MMWORD PTR [rdi+16]
+ punpcklqdq xmm1, xmm3
+ punpcklqdq xmm3, xmm2
+
+ movdqa xmm2, xmm1
+ mpsadbw xmm1, xmm0, 0x0
+ mpsadbw xmm2, xmm0, 0x5
+
+ psrldq xmm0, 8
+
+ movdqa xmm4, xmm3
+ mpsadbw xmm3, xmm0, 0x0
+ mpsadbw xmm4, xmm0, 0x5
+
+ paddw xmm1, xmm2
+ paddw xmm1, xmm3
+ paddw xmm1, xmm4
+%else
+ movdqa xmm0, XMMWORD PTR [rsi]
+ movq xmm5, MMWORD PTR [rdi]
+ movq xmm3, MMWORD PTR [rdi+8]
+ movq xmm2, MMWORD PTR [rdi+16]
+ punpcklqdq xmm5, xmm3
+ punpcklqdq xmm3, xmm2
+
+ movdqa xmm2, xmm5
+ mpsadbw xmm5, xmm0, 0x0
+ mpsadbw xmm2, xmm0, 0x5
+
+ psrldq xmm0, 8
+
+ movdqa xmm4, xmm3
+ mpsadbw xmm3, xmm0, 0x0
+ mpsadbw xmm4, xmm0, 0x5
+
+ paddw xmm5, xmm2
+ paddw xmm5, xmm3
+ paddw xmm5, xmm4
+
+ paddw xmm1, xmm5
+%endif
+ movdqa xmm0, XMMWORD PTR [rsi + rax]
+ movq xmm5, MMWORD PTR [rdi+ rdx]
+ movq xmm3, MMWORD PTR [rdi+ rdx+8]
+ movq xmm2, MMWORD PTR [rdi+ rdx+16]
+ punpcklqdq xmm5, xmm3
+ punpcklqdq xmm3, xmm2
+
+ lea rsi, [rsi+rax*2]
+ lea rdi, [rdi+rdx*2]
+
+ movdqa xmm2, xmm5
+ mpsadbw xmm5, xmm0, 0x0
+ mpsadbw xmm2, xmm0, 0x5
+
+ psrldq xmm0, 8
+ movdqa xmm4, xmm3
+ mpsadbw xmm3, xmm0, 0x0
+ mpsadbw xmm4, xmm0, 0x5
+
+ paddw xmm5, xmm2
+ paddw xmm5, xmm3
+ paddw xmm5, xmm4
+
+ paddw xmm1, xmm5
+%endmacro
+
+%macro PROCESS_8X2X8 1
+%if %1
+ movq xmm0, MMWORD PTR [rsi]
+ movq xmm1, MMWORD PTR [rdi]
+ movq xmm3, MMWORD PTR [rdi+8]
+ punpcklqdq xmm1, xmm3
+
+ movdqa xmm2, xmm1
+ mpsadbw xmm1, xmm0, 0x0
+ mpsadbw xmm2, xmm0, 0x5
+ paddw xmm1, xmm2
+%else
+ movq xmm0, MMWORD PTR [rsi]
+ movq xmm5, MMWORD PTR [rdi]
+ movq xmm3, MMWORD PTR [rdi+8]
+ punpcklqdq xmm5, xmm3
+
+ movdqa xmm2, xmm5
+ mpsadbw xmm5, xmm0, 0x0
+ mpsadbw xmm2, xmm0, 0x5
+ paddw xmm5, xmm2
+
+ paddw xmm1, xmm5
+%endif
+ movq xmm0, MMWORD PTR [rsi + rax]
+ movq xmm5, MMWORD PTR [rdi+ rdx]
+ movq xmm3, MMWORD PTR [rdi+ rdx+8]
+ punpcklqdq xmm5, xmm3
+
+ lea rsi, [rsi+rax*2]
+ lea rdi, [rdi+rdx*2]
+
+ movdqa xmm2, xmm5
+ mpsadbw xmm5, xmm0, 0x0
+ mpsadbw xmm2, xmm0, 0x5
+ paddw xmm5, xmm2
+
+ paddw xmm1, xmm5
+%endmacro
+
+%macro PROCESS_4X2X8 1
+%if %1
+ movd xmm0, [rsi]
+ movq xmm1, MMWORD PTR [rdi]
+ movq xmm3, MMWORD PTR [rdi+8]
+ punpcklqdq xmm1, xmm3
+
+ mpsadbw xmm1, xmm0, 0x0
+%else
+ movd xmm0, [rsi]
+ movq xmm5, MMWORD PTR [rdi]
+ movq xmm3, MMWORD PTR [rdi+8]
+ punpcklqdq xmm5, xmm3
+
+ mpsadbw xmm5, xmm0, 0x0
+
+ paddw xmm1, xmm5
+%endif
+ movd xmm0, [rsi + rax]
+ movq xmm5, MMWORD PTR [rdi+ rdx]
+ movq xmm3, MMWORD PTR [rdi+ rdx+8]
+ punpcklqdq xmm5, xmm3
+
+ lea rsi, [rsi+rax*2]
+ lea rdi, [rdi+rdx*2]
+
+ mpsadbw xmm5, xmm0, 0x0
+
+ paddw xmm1, xmm5
+%endmacro
+
+
+;void vp9_sad16x16x8_sse4(
+; const unsigned char *src_ptr,
+; int src_stride,
+; const unsigned char *ref_ptr,
+; int ref_stride,
+; unsigned short *sad_array);
+global sym(vp9_sad16x16x8_sse4) PRIVATE
+sym(vp9_sad16x16x8_sse4):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ PROCESS_16X2X8 1
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+
+ mov rdi, arg(4) ;Results
+ movdqa XMMWORD PTR [rdi], xmm1
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_sad16x8x8_sse4(
+; const unsigned char *src_ptr,
+; int src_stride,
+; const unsigned char *ref_ptr,
+; int ref_stride,
+; unsigned short *sad_array
+;);
+global sym(vp9_sad16x8x8_sse4) PRIVATE
+sym(vp9_sad16x8x8_sse4):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ PROCESS_16X2X8 1
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+
+ mov rdi, arg(4) ;Results
+ movdqa XMMWORD PTR [rdi], xmm1
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_sad8x8x8_sse4(
+; const unsigned char *src_ptr,
+; int src_stride,
+; const unsigned char *ref_ptr,
+; int ref_stride,
+; unsigned short *sad_array
+;);
+global sym(vp9_sad8x8x8_sse4) PRIVATE
+sym(vp9_sad8x8x8_sse4):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ PROCESS_8X2X8 1
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+
+ mov rdi, arg(4) ;Results
+ movdqa XMMWORD PTR [rdi], xmm1
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_sad8x16x8_sse4(
+; const unsigned char *src_ptr,
+; int src_stride,
+; const unsigned char *ref_ptr,
+; int ref_stride,
+; unsigned short *sad_array
+;);
+global sym(vp9_sad8x16x8_sse4) PRIVATE
+sym(vp9_sad8x16x8_sse4):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ PROCESS_8X2X8 1
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ mov rdi, arg(4) ;Results
+ movdqa XMMWORD PTR [rdi], xmm1
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_sad4x4x8_c(
+; const unsigned char *src_ptr,
+; int src_stride,
+; const unsigned char *ref_ptr,
+; int ref_stride,
+; unsigned short *sad_array
+;);
+global sym(vp9_sad4x4x8_sse4) PRIVATE
+sym(vp9_sad4x4x8_sse4):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ PROCESS_4X2X8 1
+ PROCESS_4X2X8 0
+
+ mov rdi, arg(4) ;Results
+ movdqa XMMWORD PTR [rdi], xmm1
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+
+
diff --git a/vp9/encoder/x86/vp9_sad_ssse3.asm b/vp9/encoder/x86/vp9_sad_ssse3.asm
new file mode 100644
index 0000000..0cb3542
--- /dev/null
+++ b/vp9/encoder/x86/vp9_sad_ssse3.asm
@@ -0,0 +1,370 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+%macro PROCESS_16X2X3 1
+%if %1
+ movdqa xmm0, XMMWORD PTR [rsi]
+ lddqu xmm5, XMMWORD PTR [rdi]
+ lddqu xmm6, XMMWORD PTR [rdi+1]
+ lddqu xmm7, XMMWORD PTR [rdi+2]
+
+ psadbw xmm5, xmm0
+ psadbw xmm6, xmm0
+ psadbw xmm7, xmm0
+%else
+ movdqa xmm0, XMMWORD PTR [rsi]
+ lddqu xmm1, XMMWORD PTR [rdi]
+ lddqu xmm2, XMMWORD PTR [rdi+1]
+ lddqu xmm3, XMMWORD PTR [rdi+2]
+
+ psadbw xmm1, xmm0
+ psadbw xmm2, xmm0
+ psadbw xmm3, xmm0
+
+ paddw xmm5, xmm1
+ paddw xmm6, xmm2
+ paddw xmm7, xmm3
+%endif
+ movdqa xmm0, XMMWORD PTR [rsi+rax]
+ lddqu xmm1, XMMWORD PTR [rdi+rdx]
+ lddqu xmm2, XMMWORD PTR [rdi+rdx+1]
+ lddqu xmm3, XMMWORD PTR [rdi+rdx+2]
+
+ lea rsi, [rsi+rax*2]
+ lea rdi, [rdi+rdx*2]
+
+ psadbw xmm1, xmm0
+ psadbw xmm2, xmm0
+ psadbw xmm3, xmm0
+
+ paddw xmm5, xmm1
+ paddw xmm6, xmm2
+ paddw xmm7, xmm3
+%endmacro
+
+%macro PROCESS_16X2X3_OFFSET 2
+%if %1
+ movdqa xmm0, XMMWORD PTR [rsi]
+ movdqa xmm4, XMMWORD PTR [rdi]
+ movdqa xmm7, XMMWORD PTR [rdi+16]
+
+ movdqa xmm5, xmm7
+ palignr xmm5, xmm4, %2
+
+ movdqa xmm6, xmm7
+ palignr xmm6, xmm4, (%2+1)
+
+ palignr xmm7, xmm4, (%2+2)
+
+ psadbw xmm5, xmm0
+ psadbw xmm6, xmm0
+ psadbw xmm7, xmm0
+%else
+ movdqa xmm0, XMMWORD PTR [rsi]
+ movdqa xmm4, XMMWORD PTR [rdi]
+ movdqa xmm3, XMMWORD PTR [rdi+16]
+
+ movdqa xmm1, xmm3
+ palignr xmm1, xmm4, %2
+
+ movdqa xmm2, xmm3
+ palignr xmm2, xmm4, (%2+1)
+
+ palignr xmm3, xmm4, (%2+2)
+
+ psadbw xmm1, xmm0
+ psadbw xmm2, xmm0
+ psadbw xmm3, xmm0
+
+ paddw xmm5, xmm1
+ paddw xmm6, xmm2
+ paddw xmm7, xmm3
+%endif
+ movdqa xmm0, XMMWORD PTR [rsi+rax]
+ movdqa xmm4, XMMWORD PTR [rdi+rdx]
+ movdqa xmm3, XMMWORD PTR [rdi+rdx+16]
+
+ movdqa xmm1, xmm3
+ palignr xmm1, xmm4, %2
+
+ movdqa xmm2, xmm3
+ palignr xmm2, xmm4, (%2+1)
+
+ palignr xmm3, xmm4, (%2+2)
+
+ lea rsi, [rsi+rax*2]
+ lea rdi, [rdi+rdx*2]
+
+ psadbw xmm1, xmm0
+ psadbw xmm2, xmm0
+ psadbw xmm3, xmm0
+
+ paddw xmm5, xmm1
+ paddw xmm6, xmm2
+ paddw xmm7, xmm3
+%endmacro
+
+%macro PROCESS_16X16X3_OFFSET 2
+%2_aligned_by_%1:
+
+ sub rdi, %1
+
+ PROCESS_16X2X3_OFFSET 1, %1
+ PROCESS_16X2X3_OFFSET 0, %1
+ PROCESS_16X2X3_OFFSET 0, %1
+ PROCESS_16X2X3_OFFSET 0, %1
+ PROCESS_16X2X3_OFFSET 0, %1
+ PROCESS_16X2X3_OFFSET 0, %1
+ PROCESS_16X2X3_OFFSET 0, %1
+ PROCESS_16X2X3_OFFSET 0, %1
+
+ jmp %2_store_off
+
+%endmacro
+
+%macro PROCESS_16X8X3_OFFSET 2
+%2_aligned_by_%1:
+
+ sub rdi, %1
+
+ PROCESS_16X2X3_OFFSET 1, %1
+ PROCESS_16X2X3_OFFSET 0, %1
+ PROCESS_16X2X3_OFFSET 0, %1
+ PROCESS_16X2X3_OFFSET 0, %1
+
+ jmp %2_store_off
+
+%endmacro
+
+;void int vp9_sad16x16x3_ssse3(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride,
+; int *results)
+global sym(vp9_sad16x16x3_ssse3) PRIVATE
+sym(vp9_sad16x16x3_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ push rcx
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ mov rdx, 0xf
+ and rdx, rdi
+
+ jmp .vp9_sad16x16x3_ssse3_skiptable
+.vp9_sad16x16x3_ssse3_jumptable:
+ dd .vp9_sad16x16x3_ssse3_aligned_by_0 - .vp9_sad16x16x3_ssse3_do_jump
+ dd .vp9_sad16x16x3_ssse3_aligned_by_1 - .vp9_sad16x16x3_ssse3_do_jump
+ dd .vp9_sad16x16x3_ssse3_aligned_by_2 - .vp9_sad16x16x3_ssse3_do_jump
+ dd .vp9_sad16x16x3_ssse3_aligned_by_3 - .vp9_sad16x16x3_ssse3_do_jump
+ dd .vp9_sad16x16x3_ssse3_aligned_by_4 - .vp9_sad16x16x3_ssse3_do_jump
+ dd .vp9_sad16x16x3_ssse3_aligned_by_5 - .vp9_sad16x16x3_ssse3_do_jump
+ dd .vp9_sad16x16x3_ssse3_aligned_by_6 - .vp9_sad16x16x3_ssse3_do_jump
+ dd .vp9_sad16x16x3_ssse3_aligned_by_7 - .vp9_sad16x16x3_ssse3_do_jump
+ dd .vp9_sad16x16x3_ssse3_aligned_by_8 - .vp9_sad16x16x3_ssse3_do_jump
+ dd .vp9_sad16x16x3_ssse3_aligned_by_9 - .vp9_sad16x16x3_ssse3_do_jump
+ dd .vp9_sad16x16x3_ssse3_aligned_by_10 - .vp9_sad16x16x3_ssse3_do_jump
+ dd .vp9_sad16x16x3_ssse3_aligned_by_11 - .vp9_sad16x16x3_ssse3_do_jump
+ dd .vp9_sad16x16x3_ssse3_aligned_by_12 - .vp9_sad16x16x3_ssse3_do_jump
+ dd .vp9_sad16x16x3_ssse3_aligned_by_13 - .vp9_sad16x16x3_ssse3_do_jump
+ dd .vp9_sad16x16x3_ssse3_aligned_by_14 - .vp9_sad16x16x3_ssse3_do_jump
+ dd .vp9_sad16x16x3_ssse3_aligned_by_15 - .vp9_sad16x16x3_ssse3_do_jump
+.vp9_sad16x16x3_ssse3_skiptable:
+
+ call .vp9_sad16x16x3_ssse3_do_jump
+.vp9_sad16x16x3_ssse3_do_jump:
+ pop rcx ; get the address of do_jump
+ mov rax, .vp9_sad16x16x3_ssse3_jumptable - .vp9_sad16x16x3_ssse3_do_jump
+ add rax, rcx ; get the absolute address of vp9_sad16x16x3_ssse3_jumptable
+
+ movsxd rax, dword [rax + 4*rdx] ; get the 32 bit offset from the jumptable
+ add rcx, rax
+
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ jmp rcx
+
+ PROCESS_16X16X3_OFFSET 0, .vp9_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 1, .vp9_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 2, .vp9_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 3, .vp9_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 4, .vp9_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 5, .vp9_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 6, .vp9_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 7, .vp9_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 8, .vp9_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 9, .vp9_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 10, .vp9_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 11, .vp9_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 12, .vp9_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 13, .vp9_sad16x16x3_ssse3
+ PROCESS_16X16X3_OFFSET 14, .vp9_sad16x16x3_ssse3
+
+.vp9_sad16x16x3_ssse3_aligned_by_15:
+ PROCESS_16X2X3 1
+ PROCESS_16X2X3 0
+ PROCESS_16X2X3 0
+ PROCESS_16X2X3 0
+ PROCESS_16X2X3 0
+ PROCESS_16X2X3 0
+ PROCESS_16X2X3 0
+ PROCESS_16X2X3 0
+
+.vp9_sad16x16x3_ssse3_store_off:
+ mov rdi, arg(4) ;Results
+
+ movq xmm0, xmm5
+ psrldq xmm5, 8
+
+ paddw xmm0, xmm5
+ movd [rdi], xmm0
+;-
+ movq xmm0, xmm6
+ psrldq xmm6, 8
+
+ paddw xmm0, xmm6
+ movd [rdi+4], xmm0
+;-
+ movq xmm0, xmm7
+ psrldq xmm7, 8
+
+ paddw xmm0, xmm7
+ movd [rdi+8], xmm0
+
+ ; begin epilog
+ pop rcx
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void int vp9_sad16x8x3_ssse3(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride,
+; int *results)
+global sym(vp9_sad16x8x3_ssse3) PRIVATE
+sym(vp9_sad16x8x3_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ push rcx
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ mov rdx, 0xf
+ and rdx, rdi
+
+ jmp .vp9_sad16x8x3_ssse3_skiptable
+.vp9_sad16x8x3_ssse3_jumptable:
+ dd .vp9_sad16x8x3_ssse3_aligned_by_0 - .vp9_sad16x8x3_ssse3_do_jump
+ dd .vp9_sad16x8x3_ssse3_aligned_by_1 - .vp9_sad16x8x3_ssse3_do_jump
+ dd .vp9_sad16x8x3_ssse3_aligned_by_2 - .vp9_sad16x8x3_ssse3_do_jump
+ dd .vp9_sad16x8x3_ssse3_aligned_by_3 - .vp9_sad16x8x3_ssse3_do_jump
+ dd .vp9_sad16x8x3_ssse3_aligned_by_4 - .vp9_sad16x8x3_ssse3_do_jump
+ dd .vp9_sad16x8x3_ssse3_aligned_by_5 - .vp9_sad16x8x3_ssse3_do_jump
+ dd .vp9_sad16x8x3_ssse3_aligned_by_6 - .vp9_sad16x8x3_ssse3_do_jump
+ dd .vp9_sad16x8x3_ssse3_aligned_by_7 - .vp9_sad16x8x3_ssse3_do_jump
+ dd .vp9_sad16x8x3_ssse3_aligned_by_8 - .vp9_sad16x8x3_ssse3_do_jump
+ dd .vp9_sad16x8x3_ssse3_aligned_by_9 - .vp9_sad16x8x3_ssse3_do_jump
+ dd .vp9_sad16x8x3_ssse3_aligned_by_10 - .vp9_sad16x8x3_ssse3_do_jump
+ dd .vp9_sad16x8x3_ssse3_aligned_by_11 - .vp9_sad16x8x3_ssse3_do_jump
+ dd .vp9_sad16x8x3_ssse3_aligned_by_12 - .vp9_sad16x8x3_ssse3_do_jump
+ dd .vp9_sad16x8x3_ssse3_aligned_by_13 - .vp9_sad16x8x3_ssse3_do_jump
+ dd .vp9_sad16x8x3_ssse3_aligned_by_14 - .vp9_sad16x8x3_ssse3_do_jump
+ dd .vp9_sad16x8x3_ssse3_aligned_by_15 - .vp9_sad16x8x3_ssse3_do_jump
+.vp9_sad16x8x3_ssse3_skiptable:
+
+ call .vp9_sad16x8x3_ssse3_do_jump
+.vp9_sad16x8x3_ssse3_do_jump:
+ pop rcx ; get the address of do_jump
+ mov rax, .vp9_sad16x8x3_ssse3_jumptable - .vp9_sad16x8x3_ssse3_do_jump
+ add rax, rcx ; get the absolute address of vp9_sad16x8x3_ssse3_jumptable
+
+ movsxd rax, dword [rax + 4*rdx] ; get the 32 bit offset from the jumptable
+ add rcx, rax
+
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ jmp rcx
+
+ PROCESS_16X8X3_OFFSET 0, .vp9_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 1, .vp9_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 2, .vp9_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 3, .vp9_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 4, .vp9_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 5, .vp9_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 6, .vp9_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 7, .vp9_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 8, .vp9_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 9, .vp9_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 10, .vp9_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 11, .vp9_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 12, .vp9_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 13, .vp9_sad16x8x3_ssse3
+ PROCESS_16X8X3_OFFSET 14, .vp9_sad16x8x3_ssse3
+
+.vp9_sad16x8x3_ssse3_aligned_by_15:
+
+ PROCESS_16X2X3 1
+ PROCESS_16X2X3 0
+ PROCESS_16X2X3 0
+ PROCESS_16X2X3 0
+
+.vp9_sad16x8x3_ssse3_store_off:
+ mov rdi, arg(4) ;Results
+
+ movq xmm0, xmm5
+ psrldq xmm5, 8
+
+ paddw xmm0, xmm5
+ movd [rdi], xmm0
+;-
+ movq xmm0, xmm6
+ psrldq xmm6, 8
+
+ paddw xmm0, xmm6
+ movd [rdi+4], xmm0
+;-
+ movq xmm0, xmm7
+ psrldq xmm7, 8
+
+ paddw xmm0, xmm7
+ movd [rdi+8], xmm0
+
+ ; begin epilog
+ pop rcx
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
diff --git a/vp9/encoder/x86/vp9_ssim_opt.asm b/vp9/encoder/x86/vp9_ssim_opt.asm
new file mode 100644
index 0000000..455d10d
--- /dev/null
+++ b/vp9/encoder/x86/vp9_ssim_opt.asm
@@ -0,0 +1,216 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%include "vpx_ports/x86_abi_support.asm"
+
+; tabulate_ssim - sums sum_s,sum_r,sum_sq_s,sum_sq_r, sum_sxr
+%macro TABULATE_SSIM 0
+ paddusw xmm15, xmm3 ; sum_s
+ paddusw xmm14, xmm4 ; sum_r
+ movdqa xmm1, xmm3
+ pmaddwd xmm1, xmm1
+ paddd xmm13, xmm1 ; sum_sq_s
+ movdqa xmm2, xmm4
+ pmaddwd xmm2, xmm2
+ paddd xmm12, xmm2 ; sum_sq_r
+ pmaddwd xmm3, xmm4
+ paddd xmm11, xmm3 ; sum_sxr
+%endmacro
+
+; Sum across the register %1 starting with q words
+%macro SUM_ACROSS_Q 1
+ movdqa xmm2,%1
+ punpckldq %1,xmm0
+ punpckhdq xmm2,xmm0
+ paddq %1,xmm2
+ movdqa xmm2,%1
+ punpcklqdq %1,xmm0
+ punpckhqdq xmm2,xmm0
+ paddq %1,xmm2
+%endmacro
+
+; Sum across the register %1 starting with q words
+%macro SUM_ACROSS_W 1
+ movdqa xmm1, %1
+ punpcklwd %1,xmm0
+ punpckhwd xmm1,xmm0
+ paddd %1, xmm1
+ SUM_ACROSS_Q %1
+%endmacro
+;void ssim_parms_sse2(
+; unsigned char *s,
+; int sp,
+; unsigned char *r,
+; int rp
+; unsigned long *sum_s,
+; unsigned long *sum_r,
+; unsigned long *sum_sq_s,
+; unsigned long *sum_sq_r,
+; unsigned long *sum_sxr);
+;
+; TODO: Use parm passing through structure, probably don't need the pxors
+; ( calling app will initialize to 0 ) could easily fit everything in sse2
+; without too much hastle, and can probably do better estimates with psadw
+; or pavgb At this point this is just meant to be first pass for calculating
+; all the parms needed for 16x16 ssim so we can play with dssim as distortion
+; in mode selection code.
+global sym(vp9_ssim_parms_16x16_sse2) PRIVATE
+sym(vp9_ssim_parms_16x16_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 9
+ SAVE_XMM 15
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;s
+ mov rcx, arg(1) ;sp
+ mov rdi, arg(2) ;r
+ mov rax, arg(3) ;rp
+
+ pxor xmm0, xmm0
+ pxor xmm15,xmm15 ;sum_s
+ pxor xmm14,xmm14 ;sum_r
+ pxor xmm13,xmm13 ;sum_sq_s
+ pxor xmm12,xmm12 ;sum_sq_r
+ pxor xmm11,xmm11 ;sum_sxr
+
+ mov rdx, 16 ;row counter
+.NextRow:
+
+ ;grab source and reference pixels
+ movdqu xmm5, [rsi]
+ movdqu xmm6, [rdi]
+ movdqa xmm3, xmm5
+ movdqa xmm4, xmm6
+ punpckhbw xmm3, xmm0 ; high_s
+ punpckhbw xmm4, xmm0 ; high_r
+
+ TABULATE_SSIM
+
+ movdqa xmm3, xmm5
+ movdqa xmm4, xmm6
+ punpcklbw xmm3, xmm0 ; low_s
+ punpcklbw xmm4, xmm0 ; low_r
+
+ TABULATE_SSIM
+
+ add rsi, rcx ; next s row
+ add rdi, rax ; next r row
+
+ dec rdx ; counter
+ jnz .NextRow
+
+ SUM_ACROSS_W xmm15
+ SUM_ACROSS_W xmm14
+ SUM_ACROSS_Q xmm13
+ SUM_ACROSS_Q xmm12
+ SUM_ACROSS_Q xmm11
+
+ mov rdi,arg(4)
+ movd [rdi], xmm15;
+ mov rdi,arg(5)
+ movd [rdi], xmm14;
+ mov rdi,arg(6)
+ movd [rdi], xmm13;
+ mov rdi,arg(7)
+ movd [rdi], xmm12;
+ mov rdi,arg(8)
+ movd [rdi], xmm11;
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void ssim_parms_sse2(
+; unsigned char *s,
+; int sp,
+; unsigned char *r,
+; int rp
+; unsigned long *sum_s,
+; unsigned long *sum_r,
+; unsigned long *sum_sq_s,
+; unsigned long *sum_sq_r,
+; unsigned long *sum_sxr);
+;
+; TODO: Use parm passing through structure, probably don't need the pxors
+; ( calling app will initialize to 0 ) could easily fit everything in sse2
+; without too much hastle, and can probably do better estimates with psadw
+; or pavgb At this point this is just meant to be first pass for calculating
+; all the parms needed for 16x16 ssim so we can play with dssim as distortion
+; in mode selection code.
+global sym(vp9_ssim_parms_8x8_sse2) PRIVATE
+sym(vp9_ssim_parms_8x8_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 9
+ SAVE_XMM 15
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;s
+ mov rcx, arg(1) ;sp
+ mov rdi, arg(2) ;r
+ mov rax, arg(3) ;rp
+
+ pxor xmm0, xmm0
+ pxor xmm15,xmm15 ;sum_s
+ pxor xmm14,xmm14 ;sum_r
+ pxor xmm13,xmm13 ;sum_sq_s
+ pxor xmm12,xmm12 ;sum_sq_r
+ pxor xmm11,xmm11 ;sum_sxr
+
+ mov rdx, 8 ;row counter
+.NextRow:
+
+ ;grab source and reference pixels
+ movq xmm3, [rsi]
+ movq xmm4, [rdi]
+ punpcklbw xmm3, xmm0 ; low_s
+ punpcklbw xmm4, xmm0 ; low_r
+
+ TABULATE_SSIM
+
+ add rsi, rcx ; next s row
+ add rdi, rax ; next r row
+
+ dec rdx ; counter
+ jnz .NextRow
+
+ SUM_ACROSS_W xmm15
+ SUM_ACROSS_W xmm14
+ SUM_ACROSS_Q xmm13
+ SUM_ACROSS_Q xmm12
+ SUM_ACROSS_Q xmm11
+
+ mov rdi,arg(4)
+ movd [rdi], xmm15;
+ mov rdi,arg(5)
+ movd [rdi], xmm14;
+ mov rdi,arg(6)
+ movd [rdi], xmm13;
+ mov rdi,arg(7)
+ movd [rdi], xmm12;
+ mov rdi,arg(8)
+ movd [rdi], xmm11;
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
diff --git a/vp9/encoder/x86/vp9_subtract_mmx.asm b/vp9/encoder/x86/vp9_subtract_mmx.asm
new file mode 100644
index 0000000..e9eda4f
--- /dev/null
+++ b/vp9/encoder/x86/vp9_subtract_mmx.asm
@@ -0,0 +1,432 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;void vp9_subtract_b_mmx_impl(unsigned char *z, int src_stride,
+; short *diff, unsigned char *Predictor,
+; int pitch);
+global sym(vp9_subtract_b_mmx_impl) PRIVATE
+sym(vp9_subtract_b_mmx_impl):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ push rsi
+ push rdi
+ ; end prolog
+
+
+ mov rdi, arg(2) ;diff
+ mov rax, arg(3) ;Predictor
+ mov rsi, arg(0) ;z
+ movsxd rdx, dword ptr arg(1);src_stride;
+ movsxd rcx, dword ptr arg(4);pitch
+ pxor mm7, mm7
+
+ movd mm0, [rsi]
+ movd mm1, [rax]
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ psubw mm0, mm1
+ movq [rdi], mm0
+
+
+ movd mm0, [rsi+rdx]
+ movd mm1, [rax+rcx]
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ psubw mm0, mm1
+ movq [rdi+rcx*2],mm0
+
+
+ movd mm0, [rsi+rdx*2]
+ movd mm1, [rax+rcx*2]
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ psubw mm0, mm1
+ movq [rdi+rcx*4], mm0
+
+ lea rsi, [rsi+rdx*2]
+ lea rcx, [rcx+rcx*2]
+
+
+
+ movd mm0, [rsi+rdx]
+ movd mm1, [rax+rcx]
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ psubw mm0, mm1
+ movq [rdi+rcx*2], mm0
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp9_subtract_mby_mmx(short *diff, unsigned char *src, unsigned char *pred, int stride)
+global sym(vp9_subtract_mby_mmx) PRIVATE
+sym(vp9_subtract_mby_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ ; end prolog
+
+
+ mov rsi, arg(1) ;src
+ mov rdi, arg(0) ;diff
+
+ mov rax, arg(2) ;pred
+ movsxd rdx, dword ptr arg(3) ;stride
+
+ mov rcx, 16
+ pxor mm0, mm0
+
+.submby_loop:
+
+ movq mm1, [rsi]
+ movq mm3, [rax]
+
+ movq mm2, mm1
+ movq mm4, mm3
+
+ punpcklbw mm1, mm0
+ punpcklbw mm3, mm0
+
+ punpckhbw mm2, mm0
+ punpckhbw mm4, mm0
+
+ psubw mm1, mm3
+ psubw mm2, mm4
+
+ movq [rdi], mm1
+ movq [rdi+8], mm2
+
+
+ movq mm1, [rsi+8]
+ movq mm3, [rax+8]
+
+ movq mm2, mm1
+ movq mm4, mm3
+
+ punpcklbw mm1, mm0
+ punpcklbw mm3, mm0
+
+ punpckhbw mm2, mm0
+ punpckhbw mm4, mm0
+
+ psubw mm1, mm3
+ psubw mm2, mm4
+
+ movq [rdi+16], mm1
+ movq [rdi+24], mm2
+
+
+ add rdi, 32
+ add rax, 16
+
+ lea rsi, [rsi+rdx]
+
+ sub rcx, 1
+ jnz .submby_loop
+
+ pop rdi
+ pop rsi
+ ; begin epilog
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
+global sym(vp9_subtract_mbuv_mmx) PRIVATE
+sym(vp9_subtract_mbuv_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ push rsi
+ push rdi
+ ; end prolog
+
+ ;short *udiff = diff + 256;
+ ;short *vdiff = diff + 320;
+ ;unsigned char *upred = pred + 256;
+ ;unsigned char *vpred = pred + 320;
+
+ ;unsigned char *z = usrc;
+ ;unsigned short *diff = udiff;
+ ;unsigned char *Predictor= upred;
+
+ mov rdi, arg(0) ;diff
+ mov rax, arg(3) ;pred
+ mov rsi, arg(1) ;z = usrc
+ add rdi, 256*2 ;diff = diff + 256 (shorts)
+ add rax, 256 ;Predictor = pred + 256
+ movsxd rdx, dword ptr arg(4) ;stride;
+ pxor mm7, mm7
+
+ movq mm0, [rsi]
+ movq mm1, [rax]
+ movq mm3, mm0
+ movq mm4, mm1
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ punpckhbw mm3, mm7
+ punpckhbw mm4, mm7
+ psubw mm0, mm1
+ psubw mm3, mm4
+ movq [rdi], mm0
+ movq [rdi+8], mm3
+
+
+ movq mm0, [rsi+rdx]
+ movq mm1, [rax+8]
+ movq mm3, mm0
+ movq mm4, mm1
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ punpckhbw mm3, mm7
+ punpckhbw mm4, mm7
+ psubw mm0, mm1
+ psubw mm3, mm4
+ movq [rdi+16], mm0
+ movq [rdi+24], mm3
+
+ movq mm0, [rsi+rdx*2]
+ movq mm1, [rax+16]
+ movq mm3, mm0
+ movq mm4, mm1
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ punpckhbw mm3, mm7
+ punpckhbw mm4, mm7
+ psubw mm0, mm1
+ psubw mm3, mm4
+ movq [rdi+32], mm0
+ movq [rdi+40], mm3
+ lea rsi, [rsi+rdx*2]
+
+
+ movq mm0, [rsi+rdx]
+ movq mm1, [rax+24]
+ movq mm3, mm0
+ movq mm4, mm1
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ punpckhbw mm3, mm7
+ punpckhbw mm4, mm7
+ psubw mm0, mm1
+ psubw mm3, mm4
+
+ movq [rdi+48], mm0
+ movq [rdi+56], mm3
+
+
+ add rdi, 64
+ add rax, 32
+ lea rsi, [rsi+rdx*2]
+
+
+ movq mm0, [rsi]
+ movq mm1, [rax]
+ movq mm3, mm0
+ movq mm4, mm1
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ punpckhbw mm3, mm7
+ punpckhbw mm4, mm7
+ psubw mm0, mm1
+ psubw mm3, mm4
+ movq [rdi], mm0
+ movq [rdi+8], mm3
+
+
+ movq mm0, [rsi+rdx]
+ movq mm1, [rax+8]
+ movq mm3, mm0
+ movq mm4, mm1
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ punpckhbw mm3, mm7
+ punpckhbw mm4, mm7
+ psubw mm0, mm1
+ psubw mm3, mm4
+ movq [rdi+16], mm0
+ movq [rdi+24], mm3
+
+ movq mm0, [rsi+rdx*2]
+ movq mm1, [rax+16]
+ movq mm3, mm0
+ movq mm4, mm1
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ punpckhbw mm3, mm7
+ punpckhbw mm4, mm7
+ psubw mm0, mm1
+ psubw mm3, mm4
+ movq [rdi+32], mm0
+ movq [rdi+40], mm3
+ lea rsi, [rsi+rdx*2]
+
+
+ movq mm0, [rsi+rdx]
+ movq mm1, [rax+24]
+ movq mm3, mm0
+ movq mm4, mm1
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ punpckhbw mm3, mm7
+ punpckhbw mm4, mm7
+ psubw mm0, mm1
+ psubw mm3, mm4
+
+ movq [rdi+48], mm0
+ movq [rdi+56], mm3
+
+ ;unsigned char *z = vsrc;
+ ;unsigned short *diff = vdiff;
+ ;unsigned char *Predictor= vpred;
+
+ mov rdi, arg(0) ;diff
+ mov rax, arg(3) ;pred
+ mov rsi, arg(2) ;z = usrc
+ add rdi, 320*2 ;diff = diff + 320 (shorts)
+ add rax, 320 ;Predictor = pred + 320
+ movsxd rdx, dword ptr arg(4) ;stride;
+ pxor mm7, mm7
+
+ movq mm0, [rsi]
+ movq mm1, [rax]
+ movq mm3, mm0
+ movq mm4, mm1
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ punpckhbw mm3, mm7
+ punpckhbw mm4, mm7
+ psubw mm0, mm1
+ psubw mm3, mm4
+ movq [rdi], mm0
+ movq [rdi+8], mm3
+
+
+ movq mm0, [rsi+rdx]
+ movq mm1, [rax+8]
+ movq mm3, mm0
+ movq mm4, mm1
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ punpckhbw mm3, mm7
+ punpckhbw mm4, mm7
+ psubw mm0, mm1
+ psubw mm3, mm4
+ movq [rdi+16], mm0
+ movq [rdi+24], mm3
+
+ movq mm0, [rsi+rdx*2]
+ movq mm1, [rax+16]
+ movq mm3, mm0
+ movq mm4, mm1
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ punpckhbw mm3, mm7
+ punpckhbw mm4, mm7
+ psubw mm0, mm1
+ psubw mm3, mm4
+ movq [rdi+32], mm0
+ movq [rdi+40], mm3
+ lea rsi, [rsi+rdx*2]
+
+
+ movq mm0, [rsi+rdx]
+ movq mm1, [rax+24]
+ movq mm3, mm0
+ movq mm4, mm1
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ punpckhbw mm3, mm7
+ punpckhbw mm4, mm7
+ psubw mm0, mm1
+ psubw mm3, mm4
+
+ movq [rdi+48], mm0
+ movq [rdi+56], mm3
+
+
+ add rdi, 64
+ add rax, 32
+ lea rsi, [rsi+rdx*2]
+
+
+ movq mm0, [rsi]
+ movq mm1, [rax]
+ movq mm3, mm0
+ movq mm4, mm1
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ punpckhbw mm3, mm7
+ punpckhbw mm4, mm7
+ psubw mm0, mm1
+ psubw mm3, mm4
+ movq [rdi], mm0
+ movq [rdi+8], mm3
+
+
+ movq mm0, [rsi+rdx]
+ movq mm1, [rax+8]
+ movq mm3, mm0
+ movq mm4, mm1
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ punpckhbw mm3, mm7
+ punpckhbw mm4, mm7
+ psubw mm0, mm1
+ psubw mm3, mm4
+ movq [rdi+16], mm0
+ movq [rdi+24], mm3
+
+ movq mm0, [rsi+rdx*2]
+ movq mm1, [rax+16]
+ movq mm3, mm0
+ movq mm4, mm1
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ punpckhbw mm3, mm7
+ punpckhbw mm4, mm7
+ psubw mm0, mm1
+ psubw mm3, mm4
+ movq [rdi+32], mm0
+ movq [rdi+40], mm3
+ lea rsi, [rsi+rdx*2]
+
+
+ movq mm0, [rsi+rdx]
+ movq mm1, [rax+24]
+ movq mm3, mm0
+ movq mm4, mm1
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ punpckhbw mm3, mm7
+ punpckhbw mm4, mm7
+ psubw mm0, mm1
+ psubw mm3, mm4
+
+ movq [rdi+48], mm0
+ movq [rdi+56], mm3
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
diff --git a/vp9/encoder/x86/vp9_subtract_sse2.asm b/vp9/encoder/x86/vp9_subtract_sse2.asm
new file mode 100644
index 0000000..739d948
--- /dev/null
+++ b/vp9/encoder/x86/vp9_subtract_sse2.asm
@@ -0,0 +1,356 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;void vp9_subtract_b_sse2_impl(unsigned char *z, int src_stride,
+; short *diff, unsigned char *Predictor,
+; int pitch);
+global sym(vp9_subtract_b_sse2_impl) PRIVATE
+sym(vp9_subtract_b_sse2_impl):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rdi, arg(2) ;diff
+ mov rax, arg(3) ;Predictor
+ mov rsi, arg(0) ;z
+ movsxd rdx, dword ptr arg(1);src_stride;
+ movsxd rcx, dword ptr arg(4);pitch
+ pxor mm7, mm7
+
+ movd mm0, [rsi]
+ movd mm1, [rax]
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ psubw mm0, mm1
+ movq MMWORD PTR [rdi], mm0
+
+ movd mm0, [rsi+rdx]
+ movd mm1, [rax+rcx]
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ psubw mm0, mm1
+ movq MMWORD PTR [rdi+rcx*2], mm0
+
+ movd mm0, [rsi+rdx*2]
+ movd mm1, [rax+rcx*2]
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ psubw mm0, mm1
+ movq MMWORD PTR [rdi+rcx*4], mm0
+
+ lea rsi, [rsi+rdx*2]
+ lea rcx, [rcx+rcx*2]
+
+ movd mm0, [rsi+rdx]
+ movd mm1, [rax+rcx]
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ psubw mm0, mm1
+ movq MMWORD PTR [rdi+rcx*2], mm0
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_subtract_mby_sse2(short *diff, unsigned char *src, unsigned char *pred, int stride)
+global sym(vp9_subtract_mby_sse2) PRIVATE
+sym(vp9_subtract_mby_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(1) ;src
+ mov rdi, arg(0) ;diff
+
+ mov rax, arg(2) ;pred
+ movsxd rdx, dword ptr arg(3) ;stride
+
+ mov rcx, 8 ; do two lines at one time
+
+.submby_loop:
+ movdqa xmm0, XMMWORD PTR [rsi] ; src
+ movdqa xmm1, XMMWORD PTR [rax] ; pred
+
+ movdqa xmm2, xmm0
+ psubb xmm0, xmm1
+
+ pxor xmm1, [GLOBAL(t80)] ;convert to signed values
+ pxor xmm2, [GLOBAL(t80)]
+ pcmpgtb xmm1, xmm2 ; obtain sign information
+
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+ punpcklbw xmm0, xmm1 ; put sign back to subtraction
+ punpckhbw xmm2, xmm3 ; put sign back to subtraction
+
+ movdqa XMMWORD PTR [rdi], xmm0
+ movdqa XMMWORD PTR [rdi +16], xmm2
+
+ movdqa xmm4, XMMWORD PTR [rsi + rdx]
+ movdqa xmm5, XMMWORD PTR [rax + 16]
+
+ movdqa xmm6, xmm4
+ psubb xmm4, xmm5
+
+ pxor xmm5, [GLOBAL(t80)] ;convert to signed values
+ pxor xmm6, [GLOBAL(t80)]
+ pcmpgtb xmm5, xmm6 ; obtain sign information
+
+ movdqa xmm6, xmm4
+ movdqa xmm7, xmm5
+ punpcklbw xmm4, xmm5 ; put sign back to subtraction
+ punpckhbw xmm6, xmm7 ; put sign back to subtraction
+
+ movdqa XMMWORD PTR [rdi +32], xmm4
+ movdqa XMMWORD PTR [rdi +48], xmm6
+
+ add rdi, 64
+ add rax, 32
+ lea rsi, [rsi+rdx*2]
+
+ sub rcx, 1
+ jnz .submby_loop
+
+ pop rdi
+ pop rsi
+ ; begin epilog
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
+global sym(vp9_subtract_mbuv_sse2) PRIVATE
+sym(vp9_subtract_mbuv_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rdi, arg(0) ;diff
+ mov rax, arg(3) ;pred
+ mov rsi, arg(1) ;z = usrc
+ add rdi, 256*2 ;diff = diff + 256 (shorts)
+ add rax, 256 ;Predictor = pred + 256
+ movsxd rdx, dword ptr arg(4) ;stride;
+ lea rcx, [rdx + rdx*2]
+
+ ;u
+ ;line 0 1
+ movq xmm0, MMWORD PTR [rsi] ; src
+ movq xmm2, MMWORD PTR [rsi+rdx]
+ movdqa xmm1, XMMWORD PTR [rax] ; pred
+ punpcklqdq xmm0, xmm2
+
+ movdqa xmm2, xmm0
+ psubb xmm0, xmm1 ; subtraction with sign missed
+
+ pxor xmm1, [GLOBAL(t80)] ;convert to signed values
+ pxor xmm2, [GLOBAL(t80)]
+ pcmpgtb xmm1, xmm2 ; obtain sign information
+
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+ punpcklbw xmm0, xmm1 ; put sign back to subtraction
+ punpckhbw xmm2, xmm3 ; put sign back to subtraction
+
+ movdqa XMMWORD PTR [rdi], xmm0
+ movdqa XMMWORD PTR [rdi +16], xmm2
+
+ ;line 2 3
+ movq xmm0, MMWORD PTR [rsi+rdx*2] ; src
+ movq xmm2, MMWORD PTR [rsi+rcx]
+ movdqa xmm1, XMMWORD PTR [rax+16] ; pred
+ punpcklqdq xmm0, xmm2
+
+ movdqa xmm2, xmm0
+ psubb xmm0, xmm1 ; subtraction with sign missed
+
+ pxor xmm1, [GLOBAL(t80)] ;convert to signed values
+ pxor xmm2, [GLOBAL(t80)]
+ pcmpgtb xmm1, xmm2 ; obtain sign information
+
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+ punpcklbw xmm0, xmm1 ; put sign back to subtraction
+ punpckhbw xmm2, xmm3 ; put sign back to subtraction
+
+ movdqa XMMWORD PTR [rdi + 32], xmm0
+ movdqa XMMWORD PTR [rdi + 48], xmm2
+
+ ;line 4 5
+ lea rsi, [rsi + rdx*4]
+
+ movq xmm0, MMWORD PTR [rsi] ; src
+ movq xmm2, MMWORD PTR [rsi+rdx]
+ movdqa xmm1, XMMWORD PTR [rax + 32] ; pred
+ punpcklqdq xmm0, xmm2
+
+ movdqa xmm2, xmm0
+ psubb xmm0, xmm1 ; subtraction with sign missed
+
+ pxor xmm1, [GLOBAL(t80)] ;convert to signed values
+ pxor xmm2, [GLOBAL(t80)]
+ pcmpgtb xmm1, xmm2 ; obtain sign information
+
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+ punpcklbw xmm0, xmm1 ; put sign back to subtraction
+ punpckhbw xmm2, xmm3 ; put sign back to subtraction
+
+ movdqa XMMWORD PTR [rdi + 64], xmm0
+ movdqa XMMWORD PTR [rdi + 80], xmm2
+
+ ;line 6 7
+ movq xmm0, MMWORD PTR [rsi+rdx*2] ; src
+ movq xmm2, MMWORD PTR [rsi+rcx]
+ movdqa xmm1, XMMWORD PTR [rax+ 48] ; pred
+ punpcklqdq xmm0, xmm2
+
+ movdqa xmm2, xmm0
+ psubb xmm0, xmm1 ; subtraction with sign missed
+
+ pxor xmm1, [GLOBAL(t80)] ;convert to signed values
+ pxor xmm2, [GLOBAL(t80)]
+ pcmpgtb xmm1, xmm2 ; obtain sign information
+
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+ punpcklbw xmm0, xmm1 ; put sign back to subtraction
+ punpckhbw xmm2, xmm3 ; put sign back to subtraction
+
+ movdqa XMMWORD PTR [rdi + 96], xmm0
+ movdqa XMMWORD PTR [rdi + 112], xmm2
+
+ ;v
+ mov rsi, arg(2) ;z = vsrc
+ add rdi, 64*2 ;diff = diff + 320 (shorts)
+ add rax, 64 ;Predictor = pred + 320
+
+ ;line 0 1
+ movq xmm0, MMWORD PTR [rsi] ; src
+ movq xmm2, MMWORD PTR [rsi+rdx]
+ movdqa xmm1, XMMWORD PTR [rax] ; pred
+ punpcklqdq xmm0, xmm2
+
+ movdqa xmm2, xmm0
+ psubb xmm0, xmm1 ; subtraction with sign missed
+
+ pxor xmm1, [GLOBAL(t80)] ;convert to signed values
+ pxor xmm2, [GLOBAL(t80)]
+ pcmpgtb xmm1, xmm2 ; obtain sign information
+
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+ punpcklbw xmm0, xmm1 ; put sign back to subtraction
+ punpckhbw xmm2, xmm3 ; put sign back to subtraction
+
+ movdqa XMMWORD PTR [rdi], xmm0
+ movdqa XMMWORD PTR [rdi +16], xmm2
+
+ ;line 2 3
+ movq xmm0, MMWORD PTR [rsi+rdx*2] ; src
+ movq xmm2, MMWORD PTR [rsi+rcx]
+ movdqa xmm1, XMMWORD PTR [rax+16] ; pred
+ punpcklqdq xmm0, xmm2
+
+ movdqa xmm2, xmm0
+ psubb xmm0, xmm1 ; subtraction with sign missed
+
+ pxor xmm1, [GLOBAL(t80)] ;convert to signed values
+ pxor xmm2, [GLOBAL(t80)]
+ pcmpgtb xmm1, xmm2 ; obtain sign information
+
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+ punpcklbw xmm0, xmm1 ; put sign back to subtraction
+ punpckhbw xmm2, xmm3 ; put sign back to subtraction
+
+ movdqa XMMWORD PTR [rdi + 32], xmm0
+ movdqa XMMWORD PTR [rdi + 48], xmm2
+
+ ;line 4 5
+ lea rsi, [rsi + rdx*4]
+
+ movq xmm0, MMWORD PTR [rsi] ; src
+ movq xmm2, MMWORD PTR [rsi+rdx]
+ movdqa xmm1, XMMWORD PTR [rax + 32] ; pred
+ punpcklqdq xmm0, xmm2
+
+ movdqa xmm2, xmm0
+ psubb xmm0, xmm1 ; subtraction with sign missed
+
+ pxor xmm1, [GLOBAL(t80)] ;convert to signed values
+ pxor xmm2, [GLOBAL(t80)]
+ pcmpgtb xmm1, xmm2 ; obtain sign information
+
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+ punpcklbw xmm0, xmm1 ; put sign back to subtraction
+ punpckhbw xmm2, xmm3 ; put sign back to subtraction
+
+ movdqa XMMWORD PTR [rdi + 64], xmm0
+ movdqa XMMWORD PTR [rdi + 80], xmm2
+
+ ;line 6 7
+ movq xmm0, MMWORD PTR [rsi+rdx*2] ; src
+ movq xmm2, MMWORD PTR [rsi+rcx]
+ movdqa xmm1, XMMWORD PTR [rax+ 48] ; pred
+ punpcklqdq xmm0, xmm2
+
+ movdqa xmm2, xmm0
+ psubb xmm0, xmm1 ; subtraction with sign missed
+
+ pxor xmm1, [GLOBAL(t80)] ;convert to signed values
+ pxor xmm2, [GLOBAL(t80)]
+ pcmpgtb xmm1, xmm2 ; obtain sign information
+
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+ punpcklbw xmm0, xmm1 ; put sign back to subtraction
+ punpckhbw xmm2, xmm3 ; put sign back to subtraction
+
+ movdqa XMMWORD PTR [rdi + 96], xmm0
+ movdqa XMMWORD PTR [rdi + 112], xmm2
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+SECTION_RODATA
+align 16
+t80:
+ times 16 db 0x80
diff --git a/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm b/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm
new file mode 100644
index 0000000..a559d5d
--- /dev/null
+++ b/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm
@@ -0,0 +1,207 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+; void vp9_temporal_filter_apply_sse2 | arg
+; (unsigned char *frame1, | 0
+; unsigned int stride, | 1
+; unsigned char *frame2, | 2
+; unsigned int block_size, | 3
+; int strength, | 4
+; int filter_weight, | 5
+; unsigned int *accumulator, | 6
+; unsigned short *count) | 7
+global sym(vp9_temporal_filter_apply_sse2) PRIVATE
+sym(vp9_temporal_filter_apply_sse2):
+
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 8
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ALIGN_STACK 16, rax
+ %define block_size 0
+ %define strength 16
+ %define filter_weight 32
+ %define rounding_bit 48
+ %define rbp_backup 64
+ %define stack_size 80
+ sub rsp, stack_size
+ mov [rsp + rbp_backup], rbp
+ ; end prolog
+
+ mov rdx, arg(3)
+ mov [rsp + block_size], rdx
+ movd xmm6, arg(4)
+ movdqa [rsp + strength], xmm6 ; where strength is used, all 16 bytes are read
+
+ ; calculate the rounding bit outside the loop
+ ; 0x8000 >> (16 - strength)
+ mov rdx, 16
+ sub rdx, arg(4) ; 16 - strength
+ movd xmm4, rdx ; can't use rdx w/ shift
+ movdqa xmm5, [GLOBAL(_const_top_bit)]
+ psrlw xmm5, xmm4
+ movdqa [rsp + rounding_bit], xmm5
+
+ mov rsi, arg(0) ; src/frame1
+ mov rdx, arg(2) ; predictor frame
+ mov rdi, arg(6) ; accumulator
+ mov rax, arg(7) ; count
+
+ ; dup the filter weight and store for later
+ movd xmm0, arg(5) ; filter_weight
+ pshuflw xmm0, xmm0, 0
+ punpcklwd xmm0, xmm0
+ movdqa [rsp + filter_weight], xmm0
+
+ mov rbp, arg(1) ; stride
+ pxor xmm7, xmm7 ; zero for extraction
+
+ lea rcx, [rdx + 16*16*1]
+ cmp dword ptr [rsp + block_size], 8
+ jne .temporal_filter_apply_load_16
+ lea rcx, [rdx + 8*8*1]
+
+.temporal_filter_apply_load_8:
+ movq xmm0, [rsi] ; first row
+ lea rsi, [rsi + rbp] ; += stride
+ punpcklbw xmm0, xmm7 ; src[ 0- 7]
+ movq xmm1, [rsi] ; second row
+ lea rsi, [rsi + rbp] ; += stride
+ punpcklbw xmm1, xmm7 ; src[ 8-15]
+ jmp .temporal_filter_apply_load_finished
+
+.temporal_filter_apply_load_16:
+ movdqa xmm0, [rsi] ; src (frame1)
+ lea rsi, [rsi + rbp] ; += stride
+ movdqa xmm1, xmm0
+ punpcklbw xmm0, xmm7 ; src[ 0- 7]
+ punpckhbw xmm1, xmm7 ; src[ 8-15]
+
+.temporal_filter_apply_load_finished:
+ movdqa xmm2, [rdx] ; predictor (frame2)
+ movdqa xmm3, xmm2
+ punpcklbw xmm2, xmm7 ; pred[ 0- 7]
+ punpckhbw xmm3, xmm7 ; pred[ 8-15]
+
+ ; modifier = src_byte - pixel_value
+ psubw xmm0, xmm2 ; src - pred[ 0- 7]
+ psubw xmm1, xmm3 ; src - pred[ 8-15]
+
+ ; modifier *= modifier
+ pmullw xmm0, xmm0 ; modifer[ 0- 7]^2
+ pmullw xmm1, xmm1 ; modifer[ 8-15]^2
+
+ ; modifier *= 3
+ pmullw xmm0, [GLOBAL(_const_3w)]
+ pmullw xmm1, [GLOBAL(_const_3w)]
+
+ ; modifer += 0x8000 >> (16 - strength)
+ paddw xmm0, [rsp + rounding_bit]
+ paddw xmm1, [rsp + rounding_bit]
+
+ ; modifier >>= strength
+ psrlw xmm0, [rsp + strength]
+ psrlw xmm1, [rsp + strength]
+
+ ; modifier = 16 - modifier
+ ; saturation takes care of modifier > 16
+ movdqa xmm3, [GLOBAL(_const_16w)]
+ movdqa xmm2, [GLOBAL(_const_16w)]
+ psubusw xmm3, xmm1
+ psubusw xmm2, xmm0
+
+ ; modifier *= filter_weight
+ pmullw xmm2, [rsp + filter_weight]
+ pmullw xmm3, [rsp + filter_weight]
+
+ ; count
+ movdqa xmm4, [rax]
+ movdqa xmm5, [rax+16]
+ ; += modifier
+ paddw xmm4, xmm2
+ paddw xmm5, xmm3
+ ; write back
+ movdqa [rax], xmm4
+ movdqa [rax+16], xmm5
+ lea rax, [rax + 16*2] ; count += 16*(sizeof(short))
+
+ ; load and extract the predictor up to shorts
+ pxor xmm7, xmm7
+ movdqa xmm0, [rdx]
+ lea rdx, [rdx + 16*1] ; pred += 16*(sizeof(char))
+ movdqa xmm1, xmm0
+ punpcklbw xmm0, xmm7 ; pred[ 0- 7]
+ punpckhbw xmm1, xmm7 ; pred[ 8-15]
+
+ ; modifier *= pixel_value
+ pmullw xmm0, xmm2
+ pmullw xmm1, xmm3
+
+ ; expand to double words
+ movdqa xmm2, xmm0
+ punpcklwd xmm0, xmm7 ; [ 0- 3]
+ punpckhwd xmm2, xmm7 ; [ 4- 7]
+ movdqa xmm3, xmm1
+ punpcklwd xmm1, xmm7 ; [ 8-11]
+ punpckhwd xmm3, xmm7 ; [12-15]
+
+ ; accumulator
+ movdqa xmm4, [rdi]
+ movdqa xmm5, [rdi+16]
+ movdqa xmm6, [rdi+32]
+ movdqa xmm7, [rdi+48]
+ ; += modifier
+ paddd xmm4, xmm0
+ paddd xmm5, xmm2
+ paddd xmm6, xmm1
+ paddd xmm7, xmm3
+ ; write back
+ movdqa [rdi], xmm4
+ movdqa [rdi+16], xmm5
+ movdqa [rdi+32], xmm6
+ movdqa [rdi+48], xmm7
+ lea rdi, [rdi + 16*4] ; accumulator += 16*(sizeof(int))
+
+ cmp rdx, rcx
+ je .temporal_filter_apply_epilog
+ pxor xmm7, xmm7 ; zero for extraction
+ cmp dword ptr [rsp + block_size], 16
+ je .temporal_filter_apply_load_16
+ jmp .temporal_filter_apply_load_8
+
+.temporal_filter_apply_epilog:
+ ; begin epilog
+ mov rbp, [rsp + rbp_backup]
+ add rsp, stack_size
+ pop rsp
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+SECTION_RODATA
+align 16
+_const_3w:
+ times 8 dw 3
+align 16
+_const_top_bit:
+ times 8 dw 1<<15
+align 16
+_const_16w
+ times 8 dw 16
diff --git a/vp9/encoder/x86/vp9_variance_impl_mmx.asm b/vp9/encoder/x86/vp9_variance_impl_mmx.asm
new file mode 100644
index 0000000..9f140c9
--- /dev/null
+++ b/vp9/encoder/x86/vp9_variance_impl_mmx.asm
@@ -0,0 +1,851 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;unsigned int vp9_get_mb_ss_mmx( short *src_ptr )
+global sym(vp9_get_mb_ss_mmx) PRIVATE
+sym(vp9_get_mb_ss_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ sub rsp, 8
+ ; end prolog
+
+ mov rax, arg(0) ;src_ptr
+ mov rcx, 16
+ pxor mm4, mm4
+
+.NEXTROW:
+ movq mm0, [rax]
+ movq mm1, [rax+8]
+ movq mm2, [rax+16]
+ movq mm3, [rax+24]
+ pmaddwd mm0, mm0
+ pmaddwd mm1, mm1
+ pmaddwd mm2, mm2
+ pmaddwd mm3, mm3
+
+ paddd mm4, mm0
+ paddd mm4, mm1
+ paddd mm4, mm2
+ paddd mm4, mm3
+
+ add rax, 32
+ dec rcx
+ ja .NEXTROW
+ movq QWORD PTR [rsp], mm4
+
+ ;return sum[0]+sum[1];
+ movsxd rax, dword ptr [rsp]
+ movsxd rcx, dword ptr [rsp+4]
+ add rax, rcx
+
+
+ ; begin epilog
+ add rsp, 8
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;unsigned int vp9_get8x8var_mmx
+;(
+; unsigned char *src_ptr,
+; int source_stride,
+; unsigned char *ref_ptr,
+; int recon_stride,
+; unsigned int *SSE,
+; int *Sum
+;)
+global sym(vp9_get8x8var_mmx) PRIVATE
+sym(vp9_get8x8var_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ push rsi
+ push rdi
+ push rbx
+ sub rsp, 16
+ ; end prolog
+
+
+ pxor mm5, mm5 ; Blank mmx6
+ pxor mm6, mm6 ; Blank mmx7
+ pxor mm7, mm7 ; Blank mmx7
+
+ mov rax, arg(0) ;[src_ptr] ; Load base addresses
+ mov rbx, arg(2) ;[ref_ptr]
+ movsxd rcx, dword ptr arg(1) ;[source_stride]
+ movsxd rdx, dword ptr arg(3) ;[recon_stride]
+
+ ; Row 1
+ movq mm0, [rax] ; Copy eight bytes to mm0
+ movq mm1, [rbx] ; Copy eight bytes to mm1
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher prrcision
+ punpcklbw mm1, mm6
+ punpckhbw mm2, mm6 ; unpack to higher prrcision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add rbx,rdx ; Inc pointer into ref data
+ add rax,rcx ; Inc pointer into the new data
+ movq mm1, [rbx] ; Copy eight bytes to mm1
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+
+ ; Row 2
+ movq mm0, [rax] ; Copy eight bytes to mm0
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher prrcision
+ punpcklbw mm1, mm6
+ punpckhbw mm2, mm6 ; unpack to higher prrcision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add rbx,rdx ; Inc pointer into ref data
+ add rax,rcx ; Inc pointer into the new data
+ movq mm1, [rbx] ; Copy eight bytes to mm1
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ ; Row 3
+ movq mm0, [rax] ; Copy eight bytes to mm0
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher prrcision
+ punpcklbw mm1, mm6
+ punpckhbw mm2, mm6 ; unpack to higher prrcision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add rbx,rdx ; Inc pointer into ref data
+ add rax,rcx ; Inc pointer into the new data
+ movq mm1, [rbx] ; Copy eight bytes to mm1
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ ; Row 4
+ movq mm0, [rax] ; Copy eight bytes to mm0
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher prrcision
+ punpcklbw mm1, mm6
+ punpckhbw mm2, mm6 ; unpack to higher prrcision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add rbx,rdx ; Inc pointer into ref data
+ add rax,rcx ; Inc pointer into the new data
+ movq mm1, [rbx] ; Copy eight bytes to mm1
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ ; Row 5
+ movq mm0, [rax] ; Copy eight bytes to mm0
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher prrcision
+ punpcklbw mm1, mm6
+ punpckhbw mm2, mm6 ; unpack to higher prrcision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add rbx,rdx ; Inc pointer into ref data
+ add rax,rcx ; Inc pointer into the new data
+ movq mm1, [rbx] ; Copy eight bytes to mm1
+ ; movq mm4, [rbx + rdx]
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ ; Row 6
+ movq mm0, [rax] ; Copy eight bytes to mm0
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher prrcision
+ punpcklbw mm1, mm6
+ punpckhbw mm2, mm6 ; unpack to higher prrcision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add rbx,rdx ; Inc pointer into ref data
+ add rax,rcx ; Inc pointer into the new data
+ movq mm1, [rbx] ; Copy eight bytes to mm1
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ ; Row 7
+ movq mm0, [rax] ; Copy eight bytes to mm0
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher prrcision
+ punpcklbw mm1, mm6
+ punpckhbw mm2, mm6 ; unpack to higher prrcision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add rbx,rdx ; Inc pointer into ref data
+ add rax,rcx ; Inc pointer into the new data
+ movq mm1, [rbx] ; Copy eight bytes to mm1
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ ; Row 8
+ movq mm0, [rax] ; Copy eight bytes to mm0
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher prrcision
+ punpcklbw mm1, mm6
+ punpckhbw mm2, mm6 ; unpack to higher prrcision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add rbx,rdx ; Inc pointer into ref data
+ add rax,rcx ; Inc pointer into the new data
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ ; Now accumulate the final results.
+ movq QWORD PTR [rsp+8], mm5 ; copy back accumulated results into normal memory
+ movq QWORD PTR [rsp], mm7 ; copy back accumulated results into normal memory
+ movsx rdx, WORD PTR [rsp+8]
+ movsx rcx, WORD PTR [rsp+10]
+ movsx rbx, WORD PTR [rsp+12]
+ movsx rax, WORD PTR [rsp+14]
+ add rdx, rcx
+ add rbx, rax
+ add rdx, rbx ;XSum
+ movsxd rax, DWORD PTR [rsp]
+ movsxd rcx, DWORD PTR [rsp+4]
+ add rax, rcx ;XXSum
+ mov rsi, arg(4) ;SSE
+ mov rdi, arg(5) ;Sum
+ mov dword ptr [rsi], eax
+ mov dword ptr [rdi], edx
+ xor rax, rax ; return 0
+
+
+ ; begin epilog
+ add rsp, 16
+ pop rbx
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+
+;unsigned int
+;vp9_get4x4var_mmx
+;(
+; unsigned char *src_ptr,
+; int source_stride,
+; unsigned char *ref_ptr,
+; int recon_stride,
+; unsigned int *SSE,
+; int *Sum
+;)
+global sym(vp9_get4x4var_mmx) PRIVATE
+sym(vp9_get4x4var_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ push rsi
+ push rdi
+ push rbx
+ sub rsp, 16
+ ; end prolog
+
+
+ pxor mm5, mm5 ; Blank mmx6
+ pxor mm6, mm6 ; Blank mmx7
+ pxor mm7, mm7 ; Blank mmx7
+
+ mov rax, arg(0) ;[src_ptr] ; Load base addresses
+ mov rbx, arg(2) ;[ref_ptr]
+ movsxd rcx, dword ptr arg(1) ;[source_stride]
+ movsxd rdx, dword ptr arg(3) ;[recon_stride]
+
+ ; Row 1
+ movq mm0, [rax] ; Copy eight bytes to mm0
+ movq mm1, [rbx] ; Copy eight bytes to mm1
+ punpcklbw mm0, mm6 ; unpack to higher prrcision
+ punpcklbw mm1, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ paddw mm5, mm0 ; accumulate differences in mm5
+ pmaddwd mm0, mm0 ; square and accumulate
+ add rbx,rdx ; Inc pointer into ref data
+ add rax,rcx ; Inc pointer into the new data
+ movq mm1, [rbx] ; Copy eight bytes to mm1
+ paddd mm7, mm0 ; accumulate in mm7
+
+
+ ; Row 2
+ movq mm0, [rax] ; Copy eight bytes to mm0
+ punpcklbw mm0, mm6 ; unpack to higher prrcision
+ punpcklbw mm1, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ paddw mm5, mm0 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ add rbx,rdx ; Inc pointer into ref data
+ add rax,rcx ; Inc pointer into the new data
+ movq mm1, [rbx] ; Copy eight bytes to mm1
+ paddd mm7, mm0 ; accumulate in mm7
+
+ ; Row 3
+ movq mm0, [rax] ; Copy eight bytes to mm0
+ punpcklbw mm0, mm6 ; unpack to higher prrcision
+ punpcklbw mm1, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ paddw mm5, mm0 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ add rbx,rdx ; Inc pointer into ref data
+ add rax,rcx ; Inc pointer into the new data
+ movq mm1, [rbx] ; Copy eight bytes to mm1
+ paddd mm7, mm0 ; accumulate in mm7
+
+ ; Row 4
+ movq mm0, [rax] ; Copy eight bytes to mm0
+
+ punpcklbw mm0, mm6 ; unpack to higher prrcision
+ punpcklbw mm1, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ paddd mm7, mm0 ; accumulate in mm7
+
+
+ ; Now accumulate the final results.
+ movq QWORD PTR [rsp+8], mm5 ; copy back accumulated results into normal memory
+ movq QWORD PTR [rsp], mm7 ; copy back accumulated results into normal memory
+ movsx rdx, WORD PTR [rsp+8]
+ movsx rcx, WORD PTR [rsp+10]
+ movsx rbx, WORD PTR [rsp+12]
+ movsx rax, WORD PTR [rsp+14]
+ add rdx, rcx
+ add rbx, rax
+ add rdx, rbx ;XSum
+ movsxd rax, DWORD PTR [rsp]
+ movsxd rcx, DWORD PTR [rsp+4]
+ add rax, rcx ;XXSum
+ mov rsi, arg(4) ;SSE
+ mov rdi, arg(5) ;Sum
+ mov dword ptr [rsi], eax
+ mov dword ptr [rdi], edx
+ xor rax, rax ; return 0
+
+
+ ; begin epilog
+ add rsp, 16
+ pop rbx
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+
+;unsigned int
+;vp9_get4x4sse_cs_mmx
+;(
+; unsigned char *src_ptr,
+; int source_stride,
+; unsigned char *ref_ptr,
+; int recon_stride
+;)
+global sym(vp9_get4x4sse_cs_mmx) PRIVATE
+sym(vp9_get4x4sse_cs_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ push rbx
+ ; end prolog
+
+
+ pxor mm6, mm6 ; Blank mmx7
+ pxor mm7, mm7 ; Blank mmx7
+
+ mov rax, arg(0) ;[src_ptr] ; Load base addresses
+ mov rbx, arg(2) ;[ref_ptr]
+ movsxd rcx, dword ptr arg(1) ;[source_stride]
+ movsxd rdx, dword ptr arg(3) ;[recon_stride]
+ ; Row 1
+ movd mm0, [rax] ; Copy eight bytes to mm0
+ movd mm1, [rbx] ; Copy eight bytes to mm1
+ punpcklbw mm0, mm6 ; unpack to higher prrcision
+ punpcklbw mm1, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ pmaddwd mm0, mm0 ; square and accumulate
+ add rbx,rdx ; Inc pointer into ref data
+ add rax,rcx ; Inc pointer into the new data
+ movd mm1, [rbx] ; Copy eight bytes to mm1
+ paddd mm7, mm0 ; accumulate in mm7
+
+ ; Row 2
+ movd mm0, [rax] ; Copy eight bytes to mm0
+ punpcklbw mm0, mm6 ; unpack to higher prrcision
+ punpcklbw mm1, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ pmaddwd mm0, mm0 ; square and accumulate
+ add rbx,rdx ; Inc pointer into ref data
+ add rax,rcx ; Inc pointer into the new data
+ movd mm1, [rbx] ; Copy eight bytes to mm1
+ paddd mm7, mm0 ; accumulate in mm7
+
+ ; Row 3
+ movd mm0, [rax] ; Copy eight bytes to mm0
+ punpcklbw mm1, mm6
+ punpcklbw mm0, mm6 ; unpack to higher prrcision
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ add rbx,rdx ; Inc pointer into ref data
+ add rax,rcx ; Inc pointer into the new data
+ movd mm1, [rbx] ; Copy eight bytes to mm1
+ paddd mm7, mm0 ; accumulate in mm7
+
+ ; Row 4
+ movd mm0, [rax] ; Copy eight bytes to mm0
+ punpcklbw mm0, mm6 ; unpack to higher prrcision
+ punpcklbw mm1, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ pmaddwd mm0, mm0 ; square and accumulate
+ paddd mm7, mm0 ; accumulate in mm7
+
+ movq mm0, mm7 ;
+ psrlq mm7, 32
+
+ paddd mm0, mm7
+ movq rax, mm0
+
+
+ ; begin epilog
+ pop rbx
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+%define mmx_filter_shift 7
+
+;void vp9_filter_block2d_bil4x4_var_mmx
+;(
+; unsigned char *ref_ptr,
+; int ref_pixels_per_line,
+; unsigned char *src_ptr,
+; int src_pixels_per_line,
+; unsigned short *HFilter,
+; unsigned short *VFilter,
+; int *sum,
+; unsigned int *sumsquared
+;)
+global sym(vp9_filter_block2d_bil4x4_var_mmx) PRIVATE
+sym(vp9_filter_block2d_bil4x4_var_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 8
+ GET_GOT rbx
+ push rsi
+ push rdi
+ sub rsp, 16
+ ; end prolog
+
+
+ pxor mm6, mm6 ;
+ pxor mm7, mm7 ;
+
+ mov rax, arg(4) ;HFilter ;
+ mov rdx, arg(5) ;VFilter ;
+
+ mov rsi, arg(0) ;ref_ptr ;
+ mov rdi, arg(2) ;src_ptr ;
+
+ mov rcx, 4 ;
+ pxor mm0, mm0 ;
+
+ movd mm1, [rsi] ;
+ movd mm3, [rsi+1] ;
+
+ punpcklbw mm1, mm0 ;
+ pmullw mm1, [rax] ;
+
+ punpcklbw mm3, mm0 ;
+ pmullw mm3, [rax+8] ;
+
+ paddw mm1, mm3 ;
+ paddw mm1, [GLOBAL(mmx_bi_rd)] ;
+
+ psraw mm1, mmx_filter_shift ;
+ movq mm5, mm1
+
+%if ABI_IS_32BIT
+ add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
+%else
+ movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ;
+ add rsi, r8
+%endif
+
+.filter_block2d_bil4x4_var_mmx_loop:
+
+ movd mm1, [rsi] ;
+ movd mm3, [rsi+1] ;
+
+ punpcklbw mm1, mm0 ;
+ pmullw mm1, [rax] ;
+
+ punpcklbw mm3, mm0 ;
+ pmullw mm3, [rax+8] ;
+
+ paddw mm1, mm3 ;
+ paddw mm1, [GLOBAL(mmx_bi_rd)] ;
+
+ psraw mm1, mmx_filter_shift ;
+ movq mm3, mm5 ;
+
+ movq mm5, mm1 ;
+ pmullw mm3, [rdx] ;
+
+ pmullw mm1, [rdx+8] ;
+ paddw mm1, mm3 ;
+
+
+ paddw mm1, [GLOBAL(mmx_bi_rd)] ;
+ psraw mm1, mmx_filter_shift ;
+
+ movd mm3, [rdi] ;
+ punpcklbw mm3, mm0 ;
+
+ psubw mm1, mm3 ;
+ paddw mm6, mm1 ;
+
+ pmaddwd mm1, mm1 ;
+ paddd mm7, mm1 ;
+
+%if ABI_IS_32BIT
+ add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
+ add rdi, dword ptr arg(3) ;src_pixels_per_line ;
+%else
+ movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
+ movsxd r9, dword ptr arg(3) ;src_pixels_per_line
+ add rsi, r8
+ add rdi, r9
+%endif
+ sub rcx, 1 ;
+ jnz .filter_block2d_bil4x4_var_mmx_loop ;
+
+
+ pxor mm3, mm3 ;
+ pxor mm2, mm2 ;
+
+ punpcklwd mm2, mm6 ;
+ punpckhwd mm3, mm6 ;
+
+ paddd mm2, mm3 ;
+ movq mm6, mm2 ;
+
+ psrlq mm6, 32 ;
+ paddd mm2, mm6 ;
+
+ psrad mm2, 16 ;
+ movq mm4, mm7 ;
+
+ psrlq mm4, 32 ;
+ paddd mm4, mm7 ;
+
+ mov rdi, arg(6) ;sum
+ mov rsi, arg(7) ;sumsquared
+
+ movd dword ptr [rdi], mm2 ;
+ movd dword ptr [rsi], mm4 ;
+
+
+
+ ; begin epilog
+ add rsp, 16
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+
+
+;void vp9_filter_block2d_bil_var_mmx
+;(
+; unsigned char *ref_ptr,
+; int ref_pixels_per_line,
+; unsigned char *src_ptr,
+; int src_pixels_per_line,
+; unsigned int Height,
+; unsigned short *HFilter,
+; unsigned short *VFilter,
+; int *sum,
+; unsigned int *sumsquared
+;)
+global sym(vp9_filter_block2d_bil_var_mmx) PRIVATE
+sym(vp9_filter_block2d_bil_var_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 9
+ GET_GOT rbx
+ push rsi
+ push rdi
+ sub rsp, 16
+ ; end prolog
+
+ pxor mm6, mm6 ;
+ pxor mm7, mm7 ;
+ mov rax, arg(5) ;HFilter ;
+
+ mov rdx, arg(6) ;VFilter ;
+ mov rsi, arg(0) ;ref_ptr ;
+
+ mov rdi, arg(2) ;src_ptr ;
+ movsxd rcx, dword ptr arg(4) ;Height ;
+
+ pxor mm0, mm0 ;
+ movq mm1, [rsi] ;
+
+ movq mm3, [rsi+1] ;
+ movq mm2, mm1 ;
+
+ movq mm4, mm3 ;
+ punpcklbw mm1, mm0 ;
+
+ punpckhbw mm2, mm0 ;
+ pmullw mm1, [rax] ;
+
+ pmullw mm2, [rax] ;
+ punpcklbw mm3, mm0 ;
+
+ punpckhbw mm4, mm0 ;
+ pmullw mm3, [rax+8] ;
+
+ pmullw mm4, [rax+8] ;
+ paddw mm1, mm3 ;
+
+ paddw mm2, mm4 ;
+ paddw mm1, [GLOBAL(mmx_bi_rd)] ;
+
+ psraw mm1, mmx_filter_shift ;
+ paddw mm2, [GLOBAL(mmx_bi_rd)] ;
+
+ psraw mm2, mmx_filter_shift ;
+ movq mm5, mm1
+
+ packuswb mm5, mm2 ;
+%if ABI_IS_32BIT
+ add rsi, dword ptr arg(1) ;ref_pixels_per_line
+%else
+ movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
+ add rsi, r8
+%endif
+
+.filter_block2d_bil_var_mmx_loop:
+
+ movq mm1, [rsi] ;
+ movq mm3, [rsi+1] ;
+
+ movq mm2, mm1 ;
+ movq mm4, mm3 ;
+
+ punpcklbw mm1, mm0 ;
+ punpckhbw mm2, mm0 ;
+
+ pmullw mm1, [rax] ;
+ pmullw mm2, [rax] ;
+
+ punpcklbw mm3, mm0 ;
+ punpckhbw mm4, mm0 ;
+
+ pmullw mm3, [rax+8] ;
+ pmullw mm4, [rax+8] ;
+
+ paddw mm1, mm3 ;
+ paddw mm2, mm4 ;
+
+ paddw mm1, [GLOBAL(mmx_bi_rd)] ;
+ psraw mm1, mmx_filter_shift ;
+
+ paddw mm2, [GLOBAL(mmx_bi_rd)] ;
+ psraw mm2, mmx_filter_shift ;
+
+ movq mm3, mm5 ;
+ movq mm4, mm5 ;
+
+ punpcklbw mm3, mm0 ;
+ punpckhbw mm4, mm0 ;
+
+ movq mm5, mm1 ;
+ packuswb mm5, mm2 ;
+
+ pmullw mm3, [rdx] ;
+ pmullw mm4, [rdx] ;
+
+ pmullw mm1, [rdx+8] ;
+ pmullw mm2, [rdx+8] ;
+
+ paddw mm1, mm3 ;
+ paddw mm2, mm4 ;
+
+ paddw mm1, [GLOBAL(mmx_bi_rd)] ;
+ paddw mm2, [GLOBAL(mmx_bi_rd)] ;
+
+ psraw mm1, mmx_filter_shift ;
+ psraw mm2, mmx_filter_shift ;
+
+ movq mm3, [rdi] ;
+ movq mm4, mm3 ;
+
+ punpcklbw mm3, mm0 ;
+ punpckhbw mm4, mm0 ;
+
+ psubw mm1, mm3 ;
+ psubw mm2, mm4 ;
+
+ paddw mm6, mm1 ;
+ pmaddwd mm1, mm1 ;
+
+ paddw mm6, mm2 ;
+ pmaddwd mm2, mm2 ;
+
+ paddd mm7, mm1 ;
+ paddd mm7, mm2 ;
+
+%if ABI_IS_32BIT
+ add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
+ add rdi, dword ptr arg(3) ;src_pixels_per_line ;
+%else
+ movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ;
+ movsxd r9, dword ptr arg(3) ;src_pixels_per_line ;
+ add rsi, r8
+ add rdi, r9
+%endif
+ sub rcx, 1 ;
+ jnz .filter_block2d_bil_var_mmx_loop ;
+
+
+ pxor mm3, mm3 ;
+ pxor mm2, mm2 ;
+
+ punpcklwd mm2, mm6 ;
+ punpckhwd mm3, mm6 ;
+
+ paddd mm2, mm3 ;
+ movq mm6, mm2 ;
+
+ psrlq mm6, 32 ;
+ paddd mm2, mm6 ;
+
+ psrad mm2, 16 ;
+ movq mm4, mm7 ;
+
+ psrlq mm4, 32 ;
+ paddd mm4, mm7 ;
+
+ mov rdi, arg(7) ;sum
+ mov rsi, arg(8) ;sumsquared
+
+ movd dword ptr [rdi], mm2 ;
+ movd dword ptr [rsi], mm4 ;
+
+ ; begin epilog
+ add rsp, 16
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+SECTION_RODATA
+;short mmx_bi_rd[4] = { 64, 64, 64, 64};
+align 16
+mmx_bi_rd:
+ times 4 dw 64
diff --git a/vp9/encoder/x86/vp9_variance_impl_sse2.asm b/vp9/encoder/x86/vp9_variance_impl_sse2.asm
new file mode 100644
index 0000000..3999269
--- /dev/null
+++ b/vp9/encoder/x86/vp9_variance_impl_sse2.asm
@@ -0,0 +1,1367 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+%define xmm_filter_shift 7
+
+;unsigned int vp9_get_mb_ss_sse2
+;(
+; short *src_ptr
+;)
+global sym(vp9_get_mb_ss_sse2) PRIVATE
+sym(vp9_get_mb_ss_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 1
+ GET_GOT rbx
+ push rsi
+ push rdi
+ sub rsp, 16
+ ; end prolog
+
+
+ mov rax, arg(0) ;[src_ptr]
+ mov rcx, 8
+ pxor xmm4, xmm4
+
+.NEXTROW:
+ movdqa xmm0, [rax]
+ movdqa xmm1, [rax+16]
+ movdqa xmm2, [rax+32]
+ movdqa xmm3, [rax+48]
+ pmaddwd xmm0, xmm0
+ pmaddwd xmm1, xmm1
+ pmaddwd xmm2, xmm2
+ pmaddwd xmm3, xmm3
+
+ paddd xmm0, xmm1
+ paddd xmm2, xmm3
+ paddd xmm4, xmm0
+ paddd xmm4, xmm2
+
+ add rax, 0x40
+ dec rcx
+ ja .NEXTROW
+
+ movdqa xmm3,xmm4
+ psrldq xmm4,8
+ paddd xmm4,xmm3
+ movdqa xmm3,xmm4
+ psrldq xmm4,4
+ paddd xmm4,xmm3
+ movq rax,xmm4
+
+
+ ; begin epilog
+ add rsp, 16
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;unsigned int vp9_get16x16var_sse2
+;(
+; unsigned char * src_ptr,
+; int source_stride,
+; unsigned char * ref_ptr,
+; int recon_stride,
+; unsigned int * SSE,
+; int * Sum
+;)
+global sym(vp9_get16x16var_sse2) PRIVATE
+sym(vp9_get16x16var_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;[src_ptr]
+ mov rdi, arg(2) ;[ref_ptr]
+
+ movsxd rax, DWORD PTR arg(1) ;[source_stride]
+ movsxd rdx, DWORD PTR arg(3) ;[recon_stride]
+
+ ; Prefetch data
+ lea rcx, [rax+rax*2]
+ prefetcht0 [rsi]
+ prefetcht0 [rsi+rax]
+ prefetcht0 [rsi+rax*2]
+ prefetcht0 [rsi+rcx]
+ lea rbx, [rsi+rax*4]
+ prefetcht0 [rbx]
+ prefetcht0 [rbx+rax]
+ prefetcht0 [rbx+rax*2]
+ prefetcht0 [rbx+rcx]
+
+ lea rcx, [rdx+rdx*2]
+ prefetcht0 [rdi]
+ prefetcht0 [rdi+rdx]
+ prefetcht0 [rdi+rdx*2]
+ prefetcht0 [rdi+rcx]
+ lea rbx, [rdi+rdx*4]
+ prefetcht0 [rbx]
+ prefetcht0 [rbx+rdx]
+ prefetcht0 [rbx+rdx*2]
+ prefetcht0 [rbx+rcx]
+
+ pxor xmm0, xmm0 ; clear xmm0 for unpack
+ pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs
+
+ pxor xmm6, xmm6 ; clear xmm6 for accumulating sse
+ mov rcx, 16
+
+.var16loop:
+ movdqu xmm1, XMMWORD PTR [rsi]
+ movdqu xmm2, XMMWORD PTR [rdi]
+
+ prefetcht0 [rsi+rax*8]
+ prefetcht0 [rdi+rdx*8]
+
+ movdqa xmm3, xmm1
+ movdqa xmm4, xmm2
+
+
+ punpcklbw xmm1, xmm0
+ punpckhbw xmm3, xmm0
+
+ punpcklbw xmm2, xmm0
+ punpckhbw xmm4, xmm0
+
+
+ psubw xmm1, xmm2
+ psubw xmm3, xmm4
+
+ paddw xmm7, xmm1
+ pmaddwd xmm1, xmm1
+
+ paddw xmm7, xmm3
+ pmaddwd xmm3, xmm3
+
+ paddd xmm6, xmm1
+ paddd xmm6, xmm3
+
+ add rsi, rax
+ add rdi, rdx
+
+ sub rcx, 1
+ jnz .var16loop
+
+
+ movdqa xmm1, xmm6
+ pxor xmm6, xmm6
+
+ pxor xmm5, xmm5
+ punpcklwd xmm6, xmm7
+
+ punpckhwd xmm5, xmm7
+ psrad xmm5, 16
+
+ psrad xmm6, 16
+ paddd xmm6, xmm5
+
+ movdqa xmm2, xmm1
+ punpckldq xmm1, xmm0
+
+ punpckhdq xmm2, xmm0
+ movdqa xmm7, xmm6
+
+ paddd xmm1, xmm2
+ punpckldq xmm6, xmm0
+
+ punpckhdq xmm7, xmm0
+ paddd xmm6, xmm7
+
+ movdqa xmm2, xmm1
+ movdqa xmm7, xmm6
+
+ psrldq xmm1, 8
+ psrldq xmm6, 8
+
+ paddd xmm7, xmm6
+ paddd xmm1, xmm2
+
+ mov rax, arg(5) ;[Sum]
+ mov rdi, arg(4) ;[SSE]
+
+ movd DWORD PTR [rax], xmm7
+ movd DWORD PTR [rdi], xmm1
+
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ pop rbx
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+
+
+;unsigned int vp9_get8x8var_sse2
+;(
+; unsigned char * src_ptr,
+; int source_stride,
+; unsigned char * ref_ptr,
+; int recon_stride,
+; unsigned int * SSE,
+; int * Sum
+;)
+global sym(vp9_get8x8var_sse2) PRIVATE
+sym(vp9_get8x8var_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ sub rsp, 16
+ ; end prolog
+
+ mov rsi, arg(0) ;[src_ptr]
+ mov rdi, arg(2) ;[ref_ptr]
+
+ movsxd rax, DWORD PTR arg(1) ;[source_stride]
+ movsxd rdx, DWORD PTR arg(3) ;[recon_stride]
+
+ pxor xmm0, xmm0 ; clear xmm0 for unpack
+ pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs
+
+ movq xmm1, QWORD PTR [rsi]
+ movq xmm2, QWORD PTR [rdi]
+
+ punpcklbw xmm1, xmm0
+ punpcklbw xmm2, xmm0
+
+ psubsw xmm1, xmm2
+ paddw xmm7, xmm1
+
+ pmaddwd xmm1, xmm1
+
+ movq xmm2, QWORD PTR[rsi + rax]
+ movq xmm3, QWORD PTR[rdi + rdx]
+
+ punpcklbw xmm2, xmm0
+ punpcklbw xmm3, xmm0
+
+ psubsw xmm2, xmm3
+ paddw xmm7, xmm2
+
+ pmaddwd xmm2, xmm2
+ paddd xmm1, xmm2
+
+
+ movq xmm2, QWORD PTR[rsi + rax * 2]
+ movq xmm3, QWORD PTR[rdi + rdx * 2]
+
+ punpcklbw xmm2, xmm0
+ punpcklbw xmm3, xmm0
+
+ psubsw xmm2, xmm3
+ paddw xmm7, xmm2
+
+ pmaddwd xmm2, xmm2
+ paddd xmm1, xmm2
+
+
+ lea rsi, [rsi + rax * 2]
+ lea rdi, [rdi + rdx * 2]
+ movq xmm2, QWORD PTR[rsi + rax]
+ movq xmm3, QWORD PTR[rdi + rdx]
+
+ punpcklbw xmm2, xmm0
+ punpcklbw xmm3, xmm0
+
+ psubsw xmm2, xmm3
+ paddw xmm7, xmm2
+
+ pmaddwd xmm2, xmm2
+ paddd xmm1, xmm2
+
+ movq xmm2, QWORD PTR[rsi + rax *2]
+ movq xmm3, QWORD PTR[rdi + rdx *2]
+
+ punpcklbw xmm2, xmm0
+ punpcklbw xmm3, xmm0
+
+ psubsw xmm2, xmm3
+ paddw xmm7, xmm2
+
+ pmaddwd xmm2, xmm2
+ paddd xmm1, xmm2
+
+
+ lea rsi, [rsi + rax * 2]
+ lea rdi, [rdi + rdx * 2]
+
+
+ movq xmm2, QWORD PTR[rsi + rax]
+ movq xmm3, QWORD PTR[rdi + rdx]
+
+ punpcklbw xmm2, xmm0
+ punpcklbw xmm3, xmm0
+
+ psubsw xmm2, xmm3
+ paddw xmm7, xmm2
+
+ pmaddwd xmm2, xmm2
+ paddd xmm1, xmm2
+
+ movq xmm2, QWORD PTR[rsi + rax *2]
+ movq xmm3, QWORD PTR[rdi + rdx *2]
+
+ punpcklbw xmm2, xmm0
+ punpcklbw xmm3, xmm0
+
+ psubsw xmm2, xmm3
+ paddw xmm7, xmm2
+
+ pmaddwd xmm2, xmm2
+ paddd xmm1, xmm2
+
+
+ lea rsi, [rsi + rax * 2]
+ lea rdi, [rdi + rdx * 2]
+
+ movq xmm2, QWORD PTR[rsi + rax]
+ movq xmm3, QWORD PTR[rdi + rdx]
+
+ punpcklbw xmm2, xmm0
+ punpcklbw xmm3, xmm0
+
+ psubsw xmm2, xmm3
+ paddw xmm7, xmm2
+
+ pmaddwd xmm2, xmm2
+ paddd xmm1, xmm2
+
+
+ movdqa xmm6, xmm7
+ punpcklwd xmm6, xmm0
+
+ punpckhwd xmm7, xmm0
+ movdqa xmm2, xmm1
+
+ paddw xmm6, xmm7
+ punpckldq xmm1, xmm0
+
+ punpckhdq xmm2, xmm0
+ movdqa xmm7, xmm6
+
+ paddd xmm1, xmm2
+ punpckldq xmm6, xmm0
+
+ punpckhdq xmm7, xmm0
+ paddw xmm6, xmm7
+
+ movdqa xmm2, xmm1
+ movdqa xmm7, xmm6
+
+ psrldq xmm1, 8
+ psrldq xmm6, 8
+
+ paddw xmm7, xmm6
+ paddd xmm1, xmm2
+
+ mov rax, arg(5) ;[Sum]
+ mov rdi, arg(4) ;[SSE]
+
+ movq rdx, xmm7
+ movsx rcx, dx
+
+ mov dword ptr [rax], ecx
+ movd DWORD PTR [rdi], xmm1
+
+ ; begin epilog
+ add rsp, 16
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp9_filter_block2d_bil_var_sse2
+;(
+; unsigned char *ref_ptr,
+; int ref_pixels_per_line,
+; unsigned char *src_ptr,
+; int src_pixels_per_line,
+; unsigned int Height,
+; int xoffset,
+; int yoffset,
+; int *sum,
+; unsigned int *sumsquared;;
+;
+;)
+global sym(vp9_filter_block2d_bil_var_sse2) PRIVATE
+sym(vp9_filter_block2d_bil_var_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 9
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ push rbx
+ ; end prolog
+
+ pxor xmm6, xmm6 ;
+ pxor xmm7, xmm7 ;
+
+ lea rsi, [GLOBAL(xmm_bi_rd)] ; rounding
+ movdqa xmm4, XMMWORD PTR [rsi]
+
+ lea rcx, [GLOBAL(bilinear_filters_sse2)]
+ movsxd rax, dword ptr arg(5) ; xoffset
+
+ cmp rax, 0 ; skip first_pass filter if xoffset=0
+ je filter_block2d_bil_var_sse2_sp_only
+
+ shl rax, 5 ; point to filter coeff with xoffset
+ lea rax, [rax + rcx] ; HFilter
+
+ movsxd rdx, dword ptr arg(6) ; yoffset
+
+ cmp rdx, 0 ; skip second_pass filter if yoffset=0
+ je filter_block2d_bil_var_sse2_fp_only
+
+ shl rdx, 5
+ lea rdx, [rdx + rcx] ; VFilter
+
+ mov rsi, arg(0) ;ref_ptr
+ mov rdi, arg(2) ;src_ptr
+ movsxd rcx, dword ptr arg(4) ;Height
+
+ pxor xmm0, xmm0 ;
+ movq xmm1, QWORD PTR [rsi] ;
+ movq xmm3, QWORD PTR [rsi+1] ;
+
+ punpcklbw xmm1, xmm0 ;
+ pmullw xmm1, [rax] ;
+ punpcklbw xmm3, xmm0
+ pmullw xmm3, [rax+16] ;
+
+ paddw xmm1, xmm3 ;
+ paddw xmm1, xmm4 ;
+ psraw xmm1, xmm_filter_shift ;
+ movdqa xmm5, xmm1
+
+ movsxd rbx, dword ptr arg(1) ;ref_pixels_per_line
+ lea rsi, [rsi + rbx]
+%if ABI_IS_32BIT=0
+ movsxd r9, dword ptr arg(3) ;src_pixels_per_line
+%endif
+
+filter_block2d_bil_var_sse2_loop:
+ movq xmm1, QWORD PTR [rsi] ;
+ movq xmm3, QWORD PTR [rsi+1] ;
+
+ punpcklbw xmm1, xmm0 ;
+ pmullw xmm1, [rax] ;
+ punpcklbw xmm3, xmm0 ;
+ pmullw xmm3, [rax+16] ;
+
+ paddw xmm1, xmm3 ;
+ paddw xmm1, xmm4 ;
+ psraw xmm1, xmm_filter_shift ;
+
+ movdqa xmm3, xmm5 ;
+ movdqa xmm5, xmm1 ;
+
+ pmullw xmm3, [rdx] ;
+ pmullw xmm1, [rdx+16] ;
+ paddw xmm1, xmm3 ;
+ paddw xmm1, xmm4 ;
+ psraw xmm1, xmm_filter_shift ;
+
+ movq xmm3, QWORD PTR [rdi] ;
+ punpcklbw xmm3, xmm0 ;
+
+ psubw xmm1, xmm3 ;
+ paddw xmm6, xmm1 ;
+
+ pmaddwd xmm1, xmm1 ;
+ paddd xmm7, xmm1 ;
+
+ lea rsi, [rsi + rbx] ;ref_pixels_per_line
+%if ABI_IS_32BIT
+ add rdi, dword ptr arg(3) ;src_pixels_per_line
+%else
+ lea rdi, [rdi + r9]
+%endif
+
+ sub rcx, 1 ;
+ jnz filter_block2d_bil_var_sse2_loop ;
+
+ jmp filter_block2d_bil_variance
+
+filter_block2d_bil_var_sse2_sp_only:
+ movsxd rdx, dword ptr arg(6) ; yoffset
+
+ cmp rdx, 0 ; skip all if both xoffset=0 and yoffset=0
+ je filter_block2d_bil_var_sse2_full_pixel
+
+ shl rdx, 5
+ lea rdx, [rdx + rcx] ; VFilter
+
+ mov rsi, arg(0) ;ref_ptr
+ mov rdi, arg(2) ;src_ptr
+ movsxd rcx, dword ptr arg(4) ;Height
+ movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
+
+ pxor xmm0, xmm0 ;
+ movq xmm1, QWORD PTR [rsi] ;
+ punpcklbw xmm1, xmm0 ;
+
+ movsxd rbx, dword ptr arg(3) ;src_pixels_per_line
+ lea rsi, [rsi + rax]
+
+filter_block2d_bil_sp_only_loop:
+ movq xmm3, QWORD PTR [rsi] ;
+ punpcklbw xmm3, xmm0 ;
+ movdqa xmm5, xmm3
+
+ pmullw xmm1, [rdx] ;
+ pmullw xmm3, [rdx+16] ;
+ paddw xmm1, xmm3 ;
+ paddw xmm1, xmm4 ;
+ psraw xmm1, xmm_filter_shift ;
+
+ movq xmm3, QWORD PTR [rdi] ;
+ punpcklbw xmm3, xmm0 ;
+
+ psubw xmm1, xmm3 ;
+ paddw xmm6, xmm1 ;
+
+ pmaddwd xmm1, xmm1 ;
+ paddd xmm7, xmm1 ;
+
+ movdqa xmm1, xmm5 ;
+ lea rsi, [rsi + rax] ;ref_pixels_per_line
+ lea rdi, [rdi + rbx] ;src_pixels_per_line
+
+ sub rcx, 1 ;
+ jnz filter_block2d_bil_sp_only_loop ;
+
+ jmp filter_block2d_bil_variance
+
+filter_block2d_bil_var_sse2_full_pixel:
+ mov rsi, arg(0) ;ref_ptr
+ mov rdi, arg(2) ;src_ptr
+ movsxd rcx, dword ptr arg(4) ;Height
+ movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
+ movsxd rbx, dword ptr arg(3) ;src_pixels_per_line
+ pxor xmm0, xmm0 ;
+
+filter_block2d_bil_full_pixel_loop:
+ movq xmm1, QWORD PTR [rsi] ;
+ punpcklbw xmm1, xmm0 ;
+
+ movq xmm2, QWORD PTR [rdi] ;
+ punpcklbw xmm2, xmm0 ;
+
+ psubw xmm1, xmm2 ;
+ paddw xmm6, xmm1 ;
+
+ pmaddwd xmm1, xmm1 ;
+ paddd xmm7, xmm1 ;
+
+ lea rsi, [rsi + rax] ;ref_pixels_per_line
+ lea rdi, [rdi + rbx] ;src_pixels_per_line
+
+ sub rcx, 1 ;
+ jnz filter_block2d_bil_full_pixel_loop ;
+
+ jmp filter_block2d_bil_variance
+
+filter_block2d_bil_var_sse2_fp_only:
+ mov rsi, arg(0) ;ref_ptr
+ mov rdi, arg(2) ;src_ptr
+ movsxd rcx, dword ptr arg(4) ;Height
+ movsxd rdx, dword ptr arg(1) ;ref_pixels_per_line
+
+ pxor xmm0, xmm0 ;
+ movsxd rbx, dword ptr arg(3) ;src_pixels_per_line
+
+filter_block2d_bil_fp_only_loop:
+ movq xmm1, QWORD PTR [rsi] ;
+ movq xmm3, QWORD PTR [rsi+1] ;
+
+ punpcklbw xmm1, xmm0 ;
+ pmullw xmm1, [rax] ;
+ punpcklbw xmm3, xmm0 ;
+ pmullw xmm3, [rax+16] ;
+
+ paddw xmm1, xmm3 ;
+ paddw xmm1, xmm4 ;
+ psraw xmm1, xmm_filter_shift ;
+
+ movq xmm3, QWORD PTR [rdi] ;
+ punpcklbw xmm3, xmm0 ;
+
+ psubw xmm1, xmm3 ;
+ paddw xmm6, xmm1 ;
+
+ pmaddwd xmm1, xmm1 ;
+ paddd xmm7, xmm1 ;
+ lea rsi, [rsi + rdx]
+ lea rdi, [rdi + rbx] ;src_pixels_per_line
+
+ sub rcx, 1 ;
+ jnz filter_block2d_bil_fp_only_loop ;
+
+ jmp filter_block2d_bil_variance
+
+filter_block2d_bil_variance:
+ movdq2q mm6, xmm6 ;
+ movdq2q mm7, xmm7 ;
+
+ psrldq xmm6, 8
+ psrldq xmm7, 8
+
+ movdq2q mm2, xmm6
+ movdq2q mm3, xmm7
+
+ paddw mm6, mm2
+ paddd mm7, mm3
+
+ pxor mm3, mm3 ;
+ pxor mm2, mm2 ;
+
+ punpcklwd mm2, mm6 ;
+ punpckhwd mm3, mm6 ;
+
+ paddd mm2, mm3 ;
+ movq mm6, mm2 ;
+
+ psrlq mm6, 32 ;
+ paddd mm2, mm6 ;
+
+ psrad mm2, 16 ;
+ movq mm4, mm7 ;
+
+ psrlq mm4, 32 ;
+ paddd mm4, mm7 ;
+
+ mov rsi, arg(7) ; sum
+ mov rdi, arg(8) ; sumsquared
+
+ movd [rsi], mm2 ; xsum
+ movd [rdi], mm4 ; xxsum
+
+ ; begin epilog
+ pop rbx
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_half_horiz_vert_variance8x_h_sse2
+;(
+; unsigned char *ref_ptr,
+; int ref_pixels_per_line,
+; unsigned char *src_ptr,
+; int src_pixels_per_line,
+; unsigned int Height,
+; int *sum,
+; unsigned int *sumsquared
+;)
+global sym(vp9_half_horiz_vert_variance8x_h_sse2) PRIVATE
+sym(vp9_half_horiz_vert_variance8x_h_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+%if ABI_IS_32BIT=0
+ movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
+ movsxd r9, dword ptr arg(3) ;src_pixels_per_line
+%endif
+
+ pxor xmm6, xmm6 ; error accumulator
+ pxor xmm7, xmm7 ; sse eaccumulator
+ mov rsi, arg(0) ;ref_ptr ;
+
+ mov rdi, arg(2) ;src_ptr ;
+ movsxd rcx, dword ptr arg(4) ;Height ;
+ movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
+
+ pxor xmm0, xmm0 ;
+
+ movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8
+ movq xmm3, QWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s9
+ pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) horizontal line 1
+
+%if ABI_IS_32BIT
+ add rsi, dword ptr arg(1) ;ref_pixels_per_line ; next source
+%else
+ add rsi, r8
+%endif
+
+.half_horiz_vert_variance8x_h_1:
+
+ movq xmm1, QWORD PTR [rsi] ;
+ movq xmm2, QWORD PTR [rsi+1] ;
+ pavgb xmm1, xmm2 ; xmm1 = avg(xmm1,xmm3) horizontal line i+1
+
+ pavgb xmm5, xmm1 ; xmm = vertical average of the above
+ punpcklbw xmm5, xmm0 ; xmm5 = words of above
+
+ movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d8
+ punpcklbw xmm3, xmm0 ; xmm3 = words of above
+
+ psubw xmm5, xmm3 ; xmm5 -= xmm3
+ paddw xmm6, xmm5 ; xmm6 += accumulated column differences
+ pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
+ paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
+
+ movdqa xmm5, xmm1 ; save xmm1 for use on the next row
+
+%if ABI_IS_32BIT
+ add esi, dword ptr arg(1) ;ref_pixels_per_line ; next source
+ add edi, dword ptr arg(3) ;src_pixels_per_line ; next destination
+%else
+ add rsi, r8
+ add rdi, r9
+%endif
+
+ sub rcx, 1 ;
+ jnz .half_horiz_vert_variance8x_h_1 ;
+
+ movdq2q mm6, xmm6 ;
+ movdq2q mm7, xmm7 ;
+
+ psrldq xmm6, 8
+ psrldq xmm7, 8
+
+ movdq2q mm2, xmm6
+ movdq2q mm3, xmm7
+
+ paddw mm6, mm2
+ paddd mm7, mm3
+
+ pxor mm3, mm3 ;
+ pxor mm2, mm2 ;
+
+ punpcklwd mm2, mm6 ;
+ punpckhwd mm3, mm6 ;
+
+ paddd mm2, mm3 ;
+ movq mm6, mm2 ;
+
+ psrlq mm6, 32 ;
+ paddd mm2, mm6 ;
+
+ psrad mm2, 16 ;
+ movq mm4, mm7 ;
+
+ psrlq mm4, 32 ;
+ paddd mm4, mm7 ;
+
+ mov rsi, arg(5) ; sum
+ mov rdi, arg(6) ; sumsquared
+
+ movd [rsi], mm2 ;
+ movd [rdi], mm4 ;
+
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp9_half_horiz_vert_variance16x_h_sse2
+;(
+; unsigned char *ref_ptr,
+; int ref_pixels_per_line,
+; unsigned char *src_ptr,
+; int src_pixels_per_line,
+; unsigned int Height,
+; int *sum,
+; unsigned int *sumsquared
+;)
+global sym(vp9_half_horiz_vert_variance16x_h_sse2) PRIVATE
+sym(vp9_half_horiz_vert_variance16x_h_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ pxor xmm6, xmm6 ; error accumulator
+ pxor xmm7, xmm7 ; sse eaccumulator
+ mov rsi, arg(0) ;ref_ptr ;
+
+ mov rdi, arg(2) ;src_ptr ;
+ movsxd rcx, dword ptr arg(4) ;Height ;
+ movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
+ movsxd rdx, dword ptr arg(3) ;src_pixels_per_line
+
+ pxor xmm0, xmm0 ;
+
+ movdqu xmm5, XMMWORD PTR [rsi]
+ movdqu xmm3, XMMWORD PTR [rsi+1]
+ pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) horizontal line 1
+
+ lea rsi, [rsi + rax]
+
+.half_horiz_vert_variance16x_h_1:
+ movdqu xmm1, XMMWORD PTR [rsi] ;
+ movdqu xmm2, XMMWORD PTR [rsi+1] ;
+ pavgb xmm1, xmm2 ; xmm1 = avg(xmm1,xmm3) horizontal line i+1
+
+ pavgb xmm5, xmm1 ; xmm = vertical average of the above
+
+ movdqa xmm4, xmm5
+ punpcklbw xmm5, xmm0 ; xmm5 = words of above
+ punpckhbw xmm4, xmm0
+
+ movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d7
+ punpcklbw xmm3, xmm0 ; xmm3 = words of above
+ psubw xmm5, xmm3 ; xmm5 -= xmm3
+
+ movq xmm3, QWORD PTR [rdi+8]
+ punpcklbw xmm3, xmm0
+ psubw xmm4, xmm3
+
+ paddw xmm6, xmm5 ; xmm6 += accumulated column differences
+ paddw xmm6, xmm4
+ pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
+ pmaddwd xmm4, xmm4
+ paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
+ paddd xmm7, xmm4
+
+ movdqa xmm5, xmm1 ; save xmm1 for use on the next row
+
+ lea rsi, [rsi + rax]
+ lea rdi, [rdi + rdx]
+
+ sub rcx, 1 ;
+ jnz .half_horiz_vert_variance16x_h_1 ;
+
+ pxor xmm1, xmm1
+ pxor xmm5, xmm5
+
+ punpcklwd xmm0, xmm6
+ punpckhwd xmm1, xmm6
+ psrad xmm0, 16
+ psrad xmm1, 16
+ paddd xmm0, xmm1
+ movdqa xmm1, xmm0
+
+ movdqa xmm6, xmm7
+ punpckldq xmm6, xmm5
+ punpckhdq xmm7, xmm5
+ paddd xmm6, xmm7
+
+ punpckldq xmm0, xmm5
+ punpckhdq xmm1, xmm5
+ paddd xmm0, xmm1
+
+ movdqa xmm7, xmm6
+ movdqa xmm1, xmm0
+
+ psrldq xmm7, 8
+ psrldq xmm1, 8
+
+ paddd xmm6, xmm7
+ paddd xmm0, xmm1
+
+ mov rsi, arg(5) ;[Sum]
+ mov rdi, arg(6) ;[SSE]
+
+ movd [rsi], xmm0
+ movd [rdi], xmm6
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_half_vert_variance8x_h_sse2
+;(
+; unsigned char *ref_ptr,
+; int ref_pixels_per_line,
+; unsigned char *src_ptr,
+; int src_pixels_per_line,
+; unsigned int Height,
+; int *sum,
+; unsigned int *sumsquared
+;)
+global sym(vp9_half_vert_variance8x_h_sse2) PRIVATE
+sym(vp9_half_vert_variance8x_h_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+%if ABI_IS_32BIT=0
+ movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
+ movsxd r9, dword ptr arg(3) ;src_pixels_per_line
+%endif
+
+ pxor xmm6, xmm6 ; error accumulator
+ pxor xmm7, xmm7 ; sse eaccumulator
+ mov rsi, arg(0) ;ref_ptr ;
+
+ mov rdi, arg(2) ;src_ptr ;
+ movsxd rcx, dword ptr arg(4) ;Height ;
+ movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
+
+ pxor xmm0, xmm0 ;
+.half_vert_variance8x_h_1:
+ movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8
+ movq xmm3, QWORD PTR [rsi+rax] ; xmm3 = s1,s2,s3..s9
+
+ pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3)
+ punpcklbw xmm5, xmm0 ; xmm5 = words of above
+
+ movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d8
+ punpcklbw xmm3, xmm0 ; xmm3 = words of above
+
+ psubw xmm5, xmm3 ; xmm5 -= xmm3
+ paddw xmm6, xmm5 ; xmm6 += accumulated column differences
+ pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
+ paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
+
+%if ABI_IS_32BIT
+ add esi, dword ptr arg(1) ;ref_pixels_per_line ; next source
+ add edi, dword ptr arg(3) ;src_pixels_per_line ; next destination
+%else
+ add rsi, r8
+ add rdi, r9
+%endif
+
+ sub rcx, 1 ;
+ jnz .half_vert_variance8x_h_1 ;
+
+ movdq2q mm6, xmm6 ;
+ movdq2q mm7, xmm7 ;
+
+ psrldq xmm6, 8
+ psrldq xmm7, 8
+
+ movdq2q mm2, xmm6
+ movdq2q mm3, xmm7
+
+ paddw mm6, mm2
+ paddd mm7, mm3
+
+ pxor mm3, mm3 ;
+ pxor mm2, mm2 ;
+
+ punpcklwd mm2, mm6 ;
+ punpckhwd mm3, mm6 ;
+
+ paddd mm2, mm3 ;
+ movq mm6, mm2 ;
+
+ psrlq mm6, 32 ;
+ paddd mm2, mm6 ;
+
+ psrad mm2, 16 ;
+ movq mm4, mm7 ;
+
+ psrlq mm4, 32 ;
+ paddd mm4, mm7 ;
+
+ mov rsi, arg(5) ; sum
+ mov rdi, arg(6) ; sumsquared
+
+ movd [rsi], mm2 ;
+ movd [rdi], mm4 ;
+
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp9_half_vert_variance16x_h_sse2
+;(
+; unsigned char *ref_ptr,
+; int ref_pixels_per_line,
+; unsigned char *src_ptr,
+; int src_pixels_per_line,
+; unsigned int Height,
+; int *sum,
+; unsigned int *sumsquared
+;)
+global sym(vp9_half_vert_variance16x_h_sse2) PRIVATE
+sym(vp9_half_vert_variance16x_h_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ pxor xmm6, xmm6 ; error accumulator
+ pxor xmm7, xmm7 ; sse eaccumulator
+ mov rsi, arg(0) ;ref_ptr
+
+ mov rdi, arg(2) ;src_ptr
+ movsxd rcx, dword ptr arg(4) ;Height
+ movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
+ movsxd rdx, dword ptr arg(3) ;src_pixels_per_line
+
+ movdqu xmm5, XMMWORD PTR [rsi]
+ lea rsi, [rsi + rax ]
+ pxor xmm0, xmm0
+
+.half_vert_variance16x_h_1:
+ movdqu xmm3, XMMWORD PTR [rsi]
+
+ pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3)
+ movdqa xmm4, xmm5
+ punpcklbw xmm5, xmm0
+ punpckhbw xmm4, xmm0
+
+ movq xmm2, QWORD PTR [rdi]
+ punpcklbw xmm2, xmm0
+ psubw xmm5, xmm2
+ movq xmm2, QWORD PTR [rdi+8]
+ punpcklbw xmm2, xmm0
+ psubw xmm4, xmm2
+
+ paddw xmm6, xmm5 ; xmm6 += accumulated column differences
+ paddw xmm6, xmm4
+ pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
+ pmaddwd xmm4, xmm4
+ paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
+ paddd xmm7, xmm4
+
+ movdqa xmm5, xmm3
+
+ lea rsi, [rsi + rax]
+ lea rdi, [rdi + rdx]
+
+ sub rcx, 1
+ jnz .half_vert_variance16x_h_1
+
+ pxor xmm1, xmm1
+ pxor xmm5, xmm5
+
+ punpcklwd xmm0, xmm6
+ punpckhwd xmm1, xmm6
+ psrad xmm0, 16
+ psrad xmm1, 16
+ paddd xmm0, xmm1
+ movdqa xmm1, xmm0
+
+ movdqa xmm6, xmm7
+ punpckldq xmm6, xmm5
+ punpckhdq xmm7, xmm5
+ paddd xmm6, xmm7
+
+ punpckldq xmm0, xmm5
+ punpckhdq xmm1, xmm5
+ paddd xmm0, xmm1
+
+ movdqa xmm7, xmm6
+ movdqa xmm1, xmm0
+
+ psrldq xmm7, 8
+ psrldq xmm1, 8
+
+ paddd xmm6, xmm7
+ paddd xmm0, xmm1
+
+ mov rsi, arg(5) ;[Sum]
+ mov rdi, arg(6) ;[SSE]
+
+ movd [rsi], xmm0
+ movd [rdi], xmm6
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp9_half_horiz_variance8x_h_sse2
+;(
+; unsigned char *ref_ptr,
+; int ref_pixels_per_line,
+; unsigned char *src_ptr,
+; int src_pixels_per_line,
+; unsigned int Height,
+; int *sum,
+; unsigned int *sumsquared
+;)
+global sym(vp9_half_horiz_variance8x_h_sse2) PRIVATE
+sym(vp9_half_horiz_variance8x_h_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+%if ABI_IS_32BIT=0
+ movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
+ movsxd r9, dword ptr arg(3) ;src_pixels_per_line
+%endif
+
+ pxor xmm6, xmm6 ; error accumulator
+ pxor xmm7, xmm7 ; sse eaccumulator
+ mov rsi, arg(0) ;ref_ptr ;
+
+ mov rdi, arg(2) ;src_ptr ;
+ movsxd rcx, dword ptr arg(4) ;Height ;
+
+ pxor xmm0, xmm0 ;
+.half_horiz_variance8x_h_1:
+ movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8
+ movq xmm3, QWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s9
+
+ pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3)
+ punpcklbw xmm5, xmm0 ; xmm5 = words of above
+
+ movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d8
+ punpcklbw xmm3, xmm0 ; xmm3 = words of above
+
+ psubw xmm5, xmm3 ; xmm5 -= xmm3
+ paddw xmm6, xmm5 ; xmm6 += accumulated column differences
+ pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
+ paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
+
+%if ABI_IS_32BIT
+ add esi, dword ptr arg(1) ;ref_pixels_per_line ; next source
+ add edi, dword ptr arg(3) ;src_pixels_per_line ; next destination
+%else
+ add rsi, r8
+ add rdi, r9
+%endif
+ sub rcx, 1 ;
+ jnz .half_horiz_variance8x_h_1 ;
+
+ movdq2q mm6, xmm6 ;
+ movdq2q mm7, xmm7 ;
+
+ psrldq xmm6, 8
+ psrldq xmm7, 8
+
+ movdq2q mm2, xmm6
+ movdq2q mm3, xmm7
+
+ paddw mm6, mm2
+ paddd mm7, mm3
+
+ pxor mm3, mm3 ;
+ pxor mm2, mm2 ;
+
+ punpcklwd mm2, mm6 ;
+ punpckhwd mm3, mm6 ;
+
+ paddd mm2, mm3 ;
+ movq mm6, mm2 ;
+
+ psrlq mm6, 32 ;
+ paddd mm2, mm6 ;
+
+ psrad mm2, 16 ;
+ movq mm4, mm7 ;
+
+ psrlq mm4, 32 ;
+ paddd mm4, mm7 ;
+
+ mov rsi, arg(5) ; sum
+ mov rdi, arg(6) ; sumsquared
+
+ movd [rsi], mm2 ;
+ movd [rdi], mm4 ;
+
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp9_half_horiz_variance16x_h_sse2
+;(
+; unsigned char *ref_ptr,
+; int ref_pixels_per_line,
+; unsigned char *src_ptr,
+; int src_pixels_per_line,
+; unsigned int Height,
+; int *sum,
+; unsigned int *sumsquared
+;)
+global sym(vp9_half_horiz_variance16x_h_sse2) PRIVATE
+sym(vp9_half_horiz_variance16x_h_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ pxor xmm6, xmm6 ; error accumulator
+ pxor xmm7, xmm7 ; sse eaccumulator
+ mov rsi, arg(0) ;ref_ptr ;
+
+ mov rdi, arg(2) ;src_ptr ;
+ movsxd rcx, dword ptr arg(4) ;Height ;
+ movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
+ movsxd rdx, dword ptr arg(3) ;src_pixels_per_line
+
+ pxor xmm0, xmm0 ;
+
+.half_horiz_variance16x_h_1:
+ movdqu xmm5, XMMWORD PTR [rsi] ; xmm5 = s0,s1,s2..s15
+ movdqu xmm3, XMMWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s16
+
+ pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3)
+ movdqa xmm1, xmm5
+ punpcklbw xmm5, xmm0 ; xmm5 = words of above
+ punpckhbw xmm1, xmm0
+
+ movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d7
+ punpcklbw xmm3, xmm0 ; xmm3 = words of above
+ movq xmm2, QWORD PTR [rdi+8]
+ punpcklbw xmm2, xmm0
+
+ psubw xmm5, xmm3 ; xmm5 -= xmm3
+ psubw xmm1, xmm2
+ paddw xmm6, xmm5 ; xmm6 += accumulated column differences
+ paddw xmm6, xmm1
+ pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
+ pmaddwd xmm1, xmm1
+ paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
+ paddd xmm7, xmm1
+
+ lea rsi, [rsi + rax]
+ lea rdi, [rdi + rdx]
+
+ sub rcx, 1 ;
+ jnz .half_horiz_variance16x_h_1 ;
+
+ pxor xmm1, xmm1
+ pxor xmm5, xmm5
+
+ punpcklwd xmm0, xmm6
+ punpckhwd xmm1, xmm6
+ psrad xmm0, 16
+ psrad xmm1, 16
+ paddd xmm0, xmm1
+ movdqa xmm1, xmm0
+
+ movdqa xmm6, xmm7
+ punpckldq xmm6, xmm5
+ punpckhdq xmm7, xmm5
+ paddd xmm6, xmm7
+
+ punpckldq xmm0, xmm5
+ punpckhdq xmm1, xmm5
+ paddd xmm0, xmm1
+
+ movdqa xmm7, xmm6
+ movdqa xmm1, xmm0
+
+ psrldq xmm7, 8
+ psrldq xmm1, 8
+
+ paddd xmm6, xmm7
+ paddd xmm0, xmm1
+
+ mov rsi, arg(5) ;[Sum]
+ mov rdi, arg(6) ;[SSE]
+
+ movd [rsi], xmm0
+ movd [rdi], xmm6
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+SECTION_RODATA
+; short xmm_bi_rd[8] = { 64, 64, 64, 64,64, 64, 64, 64};
+align 16
+xmm_bi_rd:
+ times 8 dw 64
+align 16
+bilinear_filters_sse2:
+ dw 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0
+ dw 120, 120, 120, 120, 120, 120, 120, 120, 8, 8, 8, 8, 8, 8, 8, 8
+ dw 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16
+ dw 104, 104, 104, 104, 104, 104, 104, 104, 24, 24, 24, 24, 24, 24, 24, 24
+ dw 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32
+ dw 88, 88, 88, 88, 88, 88, 88, 88, 40, 40, 40, 40, 40, 40, 40, 40
+ dw 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48
+ dw 72, 72, 72, 72, 72, 72, 72, 72, 56, 56, 56, 56, 56, 56, 56, 56
+ dw 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
+ dw 56, 56, 56, 56, 56, 56, 56, 56, 72, 72, 72, 72, 72, 72, 72, 72
+ dw 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80
+ dw 40, 40, 40, 40, 40, 40, 40, 40, 88, 88, 88, 88, 88, 88, 88, 88
+ dw 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96
+ dw 24, 24, 24, 24, 24, 24, 24, 24, 104, 104, 104, 104, 104, 104, 104, 104
+ dw 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112
+ dw 8, 8, 8, 8, 8, 8, 8, 8, 120, 120, 120, 120, 120, 120, 120, 120
diff --git a/vp9/encoder/x86/vp9_variance_impl_ssse3.asm b/vp9/encoder/x86/vp9_variance_impl_ssse3.asm
new file mode 100644
index 0000000..98a4a16
--- /dev/null
+++ b/vp9/encoder/x86/vp9_variance_impl_ssse3.asm
@@ -0,0 +1,372 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+%define xmm_filter_shift 7
+
+
+;void vp9_filter_block2d_bil_var_ssse3
+;(
+; unsigned char *ref_ptr,
+; int ref_pixels_per_line,
+; unsigned char *src_ptr,
+; int src_pixels_per_line,
+; unsigned int Height,
+; int xoffset,
+; int yoffset,
+; int *sum,
+; unsigned int *sumsquared;;
+;
+;)
+;Note: The filter coefficient at offset=0 is 128. Since the second register
+;for Pmaddubsw is signed bytes, we must calculate zero offset seperately.
+global sym(vp9_filter_block2d_bil_var_ssse3) PRIVATE
+sym(vp9_filter_block2d_bil_var_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 9
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ pxor xmm6, xmm6
+ pxor xmm7, xmm7
+
+ lea rcx, [GLOBAL(bilinear_filters_ssse3)]
+ movsxd rax, dword ptr arg(5) ; xoffset
+
+ cmp rax, 0 ; skip first_pass filter if xoffset=0
+ je .filter_block2d_bil_var_ssse3_sp_only
+
+ shl rax, 4 ; point to filter coeff with xoffset
+ lea rax, [rax + rcx] ; HFilter
+
+ movsxd rdx, dword ptr arg(6) ; yoffset
+
+ cmp rdx, 0 ; skip second_pass filter if yoffset=0
+ je .filter_block2d_bil_var_ssse3_fp_only
+
+ shl rdx, 4
+ lea rdx, [rdx + rcx] ; VFilter
+
+ mov rsi, arg(0) ;ref_ptr
+ mov rdi, arg(2) ;src_ptr
+ movsxd rcx, dword ptr arg(4) ;Height
+
+ movdqu xmm0, XMMWORD PTR [rsi]
+ movdqu xmm1, XMMWORD PTR [rsi+1]
+ movdqa xmm2, xmm0
+
+ punpcklbw xmm0, xmm1
+ punpckhbw xmm2, xmm1
+ pmaddubsw xmm0, [rax]
+ pmaddubsw xmm2, [rax]
+
+ paddw xmm0, [GLOBAL(xmm_bi_rd)]
+ paddw xmm2, [GLOBAL(xmm_bi_rd)]
+ psraw xmm0, xmm_filter_shift
+ psraw xmm2, xmm_filter_shift
+
+ packuswb xmm0, xmm2
+
+%if ABI_IS_32BIT
+ add rsi, dword ptr arg(1) ;ref_pixels_per_line
+%else
+ movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
+ movsxd r9, dword ptr arg(3) ;src_pixels_per_line
+ lea rsi, [rsi + r8]
+%endif
+
+.filter_block2d_bil_var_ssse3_loop:
+ movdqu xmm1, XMMWORD PTR [rsi]
+ movdqu xmm2, XMMWORD PTR [rsi+1]
+ movdqa xmm3, xmm1
+
+ punpcklbw xmm1, xmm2
+ punpckhbw xmm3, xmm2
+ pmaddubsw xmm1, [rax]
+ pmaddubsw xmm3, [rax]
+
+ paddw xmm1, [GLOBAL(xmm_bi_rd)]
+ paddw xmm3, [GLOBAL(xmm_bi_rd)]
+ psraw xmm1, xmm_filter_shift
+ psraw xmm3, xmm_filter_shift
+ packuswb xmm1, xmm3
+
+ movdqa xmm2, xmm0
+ movdqa xmm0, xmm1
+ movdqa xmm3, xmm2
+
+ punpcklbw xmm2, xmm1
+ punpckhbw xmm3, xmm1
+ pmaddubsw xmm2, [rdx]
+ pmaddubsw xmm3, [rdx]
+
+ paddw xmm2, [GLOBAL(xmm_bi_rd)]
+ paddw xmm3, [GLOBAL(xmm_bi_rd)]
+ psraw xmm2, xmm_filter_shift
+ psraw xmm3, xmm_filter_shift
+
+ movq xmm1, QWORD PTR [rdi]
+ pxor xmm4, xmm4
+ punpcklbw xmm1, xmm4
+ movq xmm5, QWORD PTR [rdi+8]
+ punpcklbw xmm5, xmm4
+
+ psubw xmm2, xmm1
+ psubw xmm3, xmm5
+ paddw xmm6, xmm2
+ paddw xmm6, xmm3
+ pmaddwd xmm2, xmm2
+ pmaddwd xmm3, xmm3
+ paddd xmm7, xmm2
+ paddd xmm7, xmm3
+
+%if ABI_IS_32BIT
+ add rsi, dword ptr arg(1) ;ref_pixels_per_line
+ add rdi, dword ptr arg(3) ;src_pixels_per_line
+%else
+ lea rsi, [rsi + r8]
+ lea rdi, [rdi + r9]
+%endif
+
+ sub rcx, 1
+ jnz .filter_block2d_bil_var_ssse3_loop
+
+ jmp .filter_block2d_bil_variance
+
+.filter_block2d_bil_var_ssse3_sp_only:
+ movsxd rdx, dword ptr arg(6) ; yoffset
+
+ cmp rdx, 0 ; Both xoffset =0 and yoffset=0
+ je .filter_block2d_bil_var_ssse3_full_pixel
+
+ shl rdx, 4
+ lea rdx, [rdx + rcx] ; VFilter
+
+ mov rsi, arg(0) ;ref_ptr
+ mov rdi, arg(2) ;src_ptr
+ movsxd rcx, dword ptr arg(4) ;Height
+ movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
+
+ movdqu xmm1, XMMWORD PTR [rsi]
+ movdqa xmm0, xmm1
+
+%if ABI_IS_32BIT=0
+ movsxd r9, dword ptr arg(3) ;src_pixels_per_line
+%endif
+
+ lea rsi, [rsi + rax]
+
+.filter_block2d_bil_sp_only_loop:
+ movdqu xmm3, XMMWORD PTR [rsi]
+ movdqa xmm2, xmm1
+ movdqa xmm0, xmm3
+
+ punpcklbw xmm1, xmm3
+ punpckhbw xmm2, xmm3
+ pmaddubsw xmm1, [rdx]
+ pmaddubsw xmm2, [rdx]
+
+ paddw xmm1, [GLOBAL(xmm_bi_rd)]
+ paddw xmm2, [GLOBAL(xmm_bi_rd)]
+ psraw xmm1, xmm_filter_shift
+ psraw xmm2, xmm_filter_shift
+
+ movq xmm3, QWORD PTR [rdi]
+ pxor xmm4, xmm4
+ punpcklbw xmm3, xmm4
+ movq xmm5, QWORD PTR [rdi+8]
+ punpcklbw xmm5, xmm4
+
+ psubw xmm1, xmm3
+ psubw xmm2, xmm5
+ paddw xmm6, xmm1
+ paddw xmm6, xmm2
+ pmaddwd xmm1, xmm1
+ pmaddwd xmm2, xmm2
+ paddd xmm7, xmm1
+ paddd xmm7, xmm2
+
+ movdqa xmm1, xmm0
+ lea rsi, [rsi + rax] ;ref_pixels_per_line
+
+%if ABI_IS_32BIT
+ add rdi, dword ptr arg(3) ;src_pixels_per_line
+%else
+ lea rdi, [rdi + r9]
+%endif
+
+ sub rcx, 1
+ jnz .filter_block2d_bil_sp_only_loop
+
+ jmp .filter_block2d_bil_variance
+
+.filter_block2d_bil_var_ssse3_full_pixel:
+ mov rsi, arg(0) ;ref_ptr
+ mov rdi, arg(2) ;src_ptr
+ movsxd rcx, dword ptr arg(4) ;Height
+ movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
+ movsxd rdx, dword ptr arg(3) ;src_pixels_per_line
+ pxor xmm0, xmm0
+
+.filter_block2d_bil_full_pixel_loop:
+ movq xmm1, QWORD PTR [rsi]
+ punpcklbw xmm1, xmm0
+ movq xmm2, QWORD PTR [rsi+8]
+ punpcklbw xmm2, xmm0
+
+ movq xmm3, QWORD PTR [rdi]
+ punpcklbw xmm3, xmm0
+ movq xmm4, QWORD PTR [rdi+8]
+ punpcklbw xmm4, xmm0
+
+ psubw xmm1, xmm3
+ psubw xmm2, xmm4
+ paddw xmm6, xmm1
+ paddw xmm6, xmm2
+ pmaddwd xmm1, xmm1
+ pmaddwd xmm2, xmm2
+ paddd xmm7, xmm1
+ paddd xmm7, xmm2
+
+ lea rsi, [rsi + rax] ;ref_pixels_per_line
+ lea rdi, [rdi + rdx] ;src_pixels_per_line
+ sub rcx, 1
+ jnz .filter_block2d_bil_full_pixel_loop
+
+ jmp .filter_block2d_bil_variance
+
+.filter_block2d_bil_var_ssse3_fp_only:
+ mov rsi, arg(0) ;ref_ptr
+ mov rdi, arg(2) ;src_ptr
+ movsxd rcx, dword ptr arg(4) ;Height
+ movsxd rdx, dword ptr arg(1) ;ref_pixels_per_line
+
+ pxor xmm0, xmm0
+
+%if ABI_IS_32BIT=0
+ movsxd r9, dword ptr arg(3) ;src_pixels_per_line
+%endif
+
+.filter_block2d_bil_fp_only_loop:
+ movdqu xmm1, XMMWORD PTR [rsi]
+ movdqu xmm2, XMMWORD PTR [rsi+1]
+ movdqa xmm3, xmm1
+
+ punpcklbw xmm1, xmm2
+ punpckhbw xmm3, xmm2
+ pmaddubsw xmm1, [rax]
+ pmaddubsw xmm3, [rax]
+
+ paddw xmm1, [GLOBAL(xmm_bi_rd)]
+ paddw xmm3, [GLOBAL(xmm_bi_rd)]
+ psraw xmm1, xmm_filter_shift
+ psraw xmm3, xmm_filter_shift
+
+ movq xmm2, XMMWORD PTR [rdi]
+ pxor xmm4, xmm4
+ punpcklbw xmm2, xmm4
+ movq xmm5, QWORD PTR [rdi+8]
+ punpcklbw xmm5, xmm4
+
+ psubw xmm1, xmm2
+ psubw xmm3, xmm5
+ paddw xmm6, xmm1
+ paddw xmm6, xmm3
+ pmaddwd xmm1, xmm1
+ pmaddwd xmm3, xmm3
+ paddd xmm7, xmm1
+ paddd xmm7, xmm3
+
+ lea rsi, [rsi + rdx]
+%if ABI_IS_32BIT
+ add rdi, dword ptr arg(3) ;src_pixels_per_line
+%else
+ lea rdi, [rdi + r9]
+%endif
+
+ sub rcx, 1
+ jnz .filter_block2d_bil_fp_only_loop
+
+ jmp .filter_block2d_bil_variance
+
+.filter_block2d_bil_variance:
+ pxor xmm0, xmm0
+ pxor xmm1, xmm1
+ pxor xmm5, xmm5
+
+ punpcklwd xmm0, xmm6
+ punpckhwd xmm1, xmm6
+ psrad xmm0, 16
+ psrad xmm1, 16
+ paddd xmm0, xmm1
+ movdqa xmm1, xmm0
+
+ movdqa xmm6, xmm7
+ punpckldq xmm6, xmm5
+ punpckhdq xmm7, xmm5
+ paddd xmm6, xmm7
+
+ punpckldq xmm0, xmm5
+ punpckhdq xmm1, xmm5
+ paddd xmm0, xmm1
+
+ movdqa xmm7, xmm6
+ movdqa xmm1, xmm0
+
+ psrldq xmm7, 8
+ psrldq xmm1, 8
+
+ paddd xmm6, xmm7
+ paddd xmm0, xmm1
+
+ mov rsi, arg(7) ;[Sum]
+ mov rdi, arg(8) ;[SSE]
+
+ movd [rsi], xmm0
+ movd [rdi], xmm6
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+SECTION_RODATA
+align 16
+xmm_bi_rd:
+ times 8 dw 64
+align 16
+bilinear_filters_ssse3:
+ times 8 db 128, 0
+ times 8 db 120, 8
+ times 8 db 112, 16
+ times 8 db 104, 24
+ times 8 db 96, 32
+ times 8 db 88, 40
+ times 8 db 80, 48
+ times 8 db 72, 56
+ times 8 db 64, 64
+ times 8 db 56, 72
+ times 8 db 48, 80
+ times 8 db 40, 88
+ times 8 db 32, 96
+ times 8 db 24, 104
+ times 8 db 16, 112
+ times 8 db 8, 120
diff --git a/vp9/encoder/x86/vp9_variance_mmx.c b/vp9/encoder/x86/vp9_variance_mmx.c
new file mode 100644
index 0000000..bad1cfa
--- /dev/null
+++ b/vp9/encoder/x86/vp9_variance_mmx.c
@@ -0,0 +1,382 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_config.h"
+#include "vp9/encoder/vp9_variance.h"
+#include "vp9/common/vp9_pragmas.h"
+#include "vpx_ports/mem.h"
+
+extern void filter_block1d_h6_mmx
+(
+ const unsigned char *src_ptr,
+ unsigned short *output_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned int pixel_step,
+ unsigned int output_height,
+ unsigned int output_width,
+ short *vp7_filter
+);
+extern void filter_block1d_v6_mmx
+(
+ const short *src_ptr,
+ unsigned char *output_ptr,
+ unsigned int pixels_per_line,
+ unsigned int pixel_step,
+ unsigned int output_height,
+ unsigned int output_width,
+ short *vp7_filter
+);
+
+extern unsigned int vp9_get_mb_ss_mmx(const short *src_ptr);
+extern unsigned int vp9_get8x8var_mmx
+(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *SSE,
+ int *Sum
+);
+extern unsigned int vp9_get4x4var_mmx
+(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *SSE,
+ int *Sum
+);
+extern void vp9_filter_block2d_bil4x4_var_mmx
+(
+ const unsigned char *ref_ptr,
+ int ref_pixels_per_line,
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ const short *HFilter,
+ const short *VFilter,
+ int *sum,
+ unsigned int *sumsquared
+);
+extern void vp9_filter_block2d_bil_var_mmx
+(
+ const unsigned char *ref_ptr,
+ int ref_pixels_per_line,
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ unsigned int Height,
+ const short *HFilter,
+ const short *VFilter,
+ int *sum,
+ unsigned int *sumsquared
+);
+
+
+unsigned int vp9_variance4x4_mmx(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ vp9_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg);
+ *sse = var;
+ return (var - (((unsigned int)avg * avg) >> 4));
+
+}
+
+unsigned int vp9_variance8x8_mmx(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg);
+ *sse = var;
+
+ return (var - (((unsigned int)avg * avg) >> 6));
+
+}
+
+unsigned int vp9_mse16x16_mmx(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int sse0, sse1, sse2, sse3, var;
+ int sum0, sum1, sum2, sum3;
+
+
+ vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0);
+ vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
+ vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2);
+ vp9_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
+
+ var = sse0 + sse1 + sse2 + sse3;
+ *sse = var;
+ return var;
+}
+
+
+unsigned int vp9_variance16x16_mmx(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int sse0, sse1, sse2, sse3, var;
+ int sum0, sum1, sum2, sum3, avg;
+
+
+ vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0);
+ vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
+ vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2);
+ vp9_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
+
+ var = sse0 + sse1 + sse2 + sse3;
+ avg = sum0 + sum1 + sum2 + sum3;
+ *sse = var;
+ return (var - (((unsigned int)avg * avg) >> 8));
+}
+
+unsigned int vp9_variance16x8_mmx(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int sse0, sse1, var;
+ int sum0, sum1, avg;
+
+ vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0);
+ vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
+
+ var = sse0 + sse1;
+ avg = sum0 + sum1;
+ *sse = var;
+ return (var - (((unsigned int)avg * avg) >> 7));
+
+}
+
+
+unsigned int vp9_variance8x16_mmx(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int sse0, sse1, var;
+ int sum0, sum1, avg;
+
+ vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0);
+ vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1);
+
+ var = sse0 + sse1;
+ avg = sum0 + sum1;
+ *sse = var;
+
+ return (var - (((unsigned int)avg * avg) >> 7));
+
+}
+
+DECLARE_ALIGNED(16, extern const short, vp9_bilinear_filters_mmx[16][8]);
+
+unsigned int vp9_sub_pixel_variance4x4_mmx
+(
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse)
+
+{
+ int xsum;
+ unsigned int xxsum;
+ vp9_filter_block2d_bil4x4_var_mmx(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line,
+ vp9_bilinear_filters_mmx[xoffset], vp9_bilinear_filters_mmx[yoffset],
+ &xsum, &xxsum
+ );
+ *sse = xxsum;
+ return (xxsum - (((unsigned int)xsum * xsum) >> 4));
+}
+
+
+unsigned int vp9_sub_pixel_variance8x8_mmx
+(
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse
+) {
+
+ int xsum;
+ unsigned int xxsum;
+ vp9_filter_block2d_bil_var_mmx(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 8,
+ vp9_bilinear_filters_mmx[xoffset], vp9_bilinear_filters_mmx[yoffset],
+ &xsum, &xxsum
+ );
+ *sse = xxsum;
+ return (xxsum - (((unsigned int)xsum * xsum) >> 6));
+}
+
+unsigned int vp9_sub_pixel_variance16x16_mmx
+(
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse
+) {
+
+ int xsum0, xsum1;
+ unsigned int xxsum0, xxsum1;
+
+ vp9_filter_block2d_bil_var_mmx(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 16,
+ vp9_bilinear_filters_mmx[xoffset], vp9_bilinear_filters_mmx[yoffset],
+ &xsum0, &xxsum0
+ );
+
+ vp9_filter_block2d_bil_var_mmx(
+ src_ptr + 8, src_pixels_per_line,
+ dst_ptr + 8, dst_pixels_per_line, 16,
+ vp9_bilinear_filters_mmx[xoffset], vp9_bilinear_filters_mmx[yoffset],
+ &xsum1, &xxsum1
+ );
+
+ xsum0 += xsum1;
+ xxsum0 += xxsum1;
+
+ *sse = xxsum0;
+ return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
+
+
+}
+
+unsigned int vp9_sub_pixel_mse16x16_mmx(
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse
+) {
+ vp9_sub_pixel_variance16x16_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
+ return *sse;
+}
+
+unsigned int vp9_sub_pixel_variance16x8_mmx
+(
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse
+) {
+ int xsum0, xsum1;
+ unsigned int xxsum0, xxsum1;
+
+
+ vp9_filter_block2d_bil_var_mmx(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 8,
+ vp9_bilinear_filters_mmx[xoffset], vp9_bilinear_filters_mmx[yoffset],
+ &xsum0, &xxsum0
+ );
+
+
+ vp9_filter_block2d_bil_var_mmx(
+ src_ptr + 8, src_pixels_per_line,
+ dst_ptr + 8, dst_pixels_per_line, 8,
+ vp9_bilinear_filters_mmx[xoffset], vp9_bilinear_filters_mmx[yoffset],
+ &xsum1, &xxsum1
+ );
+
+ xsum0 += xsum1;
+ xxsum0 += xxsum1;
+
+ *sse = xxsum0;
+ return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7));
+}
+
+unsigned int vp9_sub_pixel_variance8x16_mmx
+(
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse
+) {
+ int xsum;
+ unsigned int xxsum;
+ vp9_filter_block2d_bil_var_mmx(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 16,
+ vp9_bilinear_filters_mmx[xoffset], vp9_bilinear_filters_mmx[yoffset],
+ &xsum, &xxsum
+ );
+ *sse = xxsum;
+ return (xxsum - (((unsigned int)xsum * xsum) >> 7));
+}
+
+
+unsigned int vp9_variance_halfpixvar16x16_h_mmx(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ return vp9_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 8, 0,
+ ref_ptr, recon_stride, sse);
+}
+
+
+unsigned int vp9_variance_halfpixvar16x16_v_mmx(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ return vp9_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 8,
+ ref_ptr, recon_stride, sse);
+}
+
+
+unsigned int vp9_variance_halfpixvar16x16_hv_mmx(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ return vp9_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 8, 8,
+ ref_ptr, recon_stride, sse);
+}
diff --git a/vp9/encoder/x86/vp9_variance_sse2.c b/vp9/encoder/x86/vp9_variance_sse2.c
new file mode 100644
index 0000000..36fae6e
--- /dev/null
+++ b/vp9/encoder/x86/vp9_variance_sse2.c
@@ -0,0 +1,517 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_config.h"
+#include "vp9/encoder/vp9_variance.h"
+#include "vp9/common/vp9_pragmas.h"
+#include "vpx_ports/mem.h"
+
+#define HALFNDX 8
+
+extern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
+extern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
+extern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
+extern void filter_block1d8_v6_sse2(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
+
+extern void vp9_filter_block2d_bil4x4_var_mmx
+(
+ const unsigned char *ref_ptr,
+ int ref_pixels_per_line,
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ const short *HFilter,
+ const short *VFilter,
+ int *sum,
+ unsigned int *sumsquared
+);
+
+extern unsigned int vp9_get4x4var_mmx
+(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *SSE,
+ int *Sum
+);
+
+unsigned int vp9_get_mb_ss_sse2
+(
+ const short *src_ptr
+);
+unsigned int vp9_get16x16var_sse2
+(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *SSE,
+ int *Sum
+);
+unsigned int vp9_get8x8var_sse2
+(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *SSE,
+ int *Sum
+);
+void vp9_filter_block2d_bil_var_sse2
+(
+ const unsigned char *ref_ptr,
+ int ref_pixels_per_line,
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ unsigned int Height,
+ int xoffset,
+ int yoffset,
+ int *sum,
+ unsigned int *sumsquared
+);
+void vp9_half_horiz_vert_variance8x_h_sse2
+(
+ const unsigned char *ref_ptr,
+ int ref_pixels_per_line,
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ unsigned int Height,
+ int *sum,
+ unsigned int *sumsquared
+);
+void vp9_half_horiz_vert_variance16x_h_sse2
+(
+ const unsigned char *ref_ptr,
+ int ref_pixels_per_line,
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ unsigned int Height,
+ int *sum,
+ unsigned int *sumsquared
+);
+void vp9_half_horiz_variance8x_h_sse2
+(
+ const unsigned char *ref_ptr,
+ int ref_pixels_per_line,
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ unsigned int Height,
+ int *sum,
+ unsigned int *sumsquared
+);
+void vp9_half_horiz_variance16x_h_sse2
+(
+ const unsigned char *ref_ptr,
+ int ref_pixels_per_line,
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ unsigned int Height,
+ int *sum,
+ unsigned int *sumsquared
+);
+void vp9_half_vert_variance8x_h_sse2
+(
+ const unsigned char *ref_ptr,
+ int ref_pixels_per_line,
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ unsigned int Height,
+ int *sum,
+ unsigned int *sumsquared
+);
+void vp9_half_vert_variance16x_h_sse2
+(
+ const unsigned char *ref_ptr,
+ int ref_pixels_per_line,
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ unsigned int Height,
+ int *sum,
+ unsigned int *sumsquared
+);
+
+DECLARE_ALIGNED(16, extern const short, vp9_bilinear_filters_mmx[16][8]);
+
+unsigned int vp9_variance4x4_wmt(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ vp9_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg);
+ *sse = var;
+ return (var - (((unsigned int)avg * avg) >> 4));
+
+}
+
+unsigned int vp9_variance8x8_wmt
+(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ vp9_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg);
+ *sse = var;
+ return (var - (((unsigned int)avg * avg) >> 6));
+
+}
+
+
+unsigned int vp9_variance16x16_wmt
+(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int sse0;
+ int sum0;
+
+
+ vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0);
+ *sse = sse0;
+ return (sse0 - (((unsigned int)sum0 * sum0) >> 8));
+}
+unsigned int vp9_mse16x16_wmt(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+
+ unsigned int sse0;
+ int sum0;
+ vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0);
+ *sse = sse0;
+ return sse0;
+
+}
+
+
+unsigned int vp9_variance16x8_wmt
+(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int sse0, sse1, var;
+ int sum0, sum1, avg;
+
+ vp9_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0);
+ vp9_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
+
+ var = sse0 + sse1;
+ avg = sum0 + sum1;
+ *sse = var;
+ return (var - (((unsigned int)avg * avg) >> 7));
+
+}
+
+unsigned int vp9_variance8x16_wmt
+(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int sse0, sse1, var;
+ int sum0, sum1, avg;
+
+ vp9_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0);
+ vp9_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1);
+
+ var = sse0 + sse1;
+ avg = sum0 + sum1;
+ *sse = var;
+ return (var - (((unsigned int)avg * avg) >> 7));
+
+}
+
+unsigned int vp9_sub_pixel_variance4x4_wmt
+(
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse
+) {
+ int xsum;
+ unsigned int xxsum;
+ vp9_filter_block2d_bil4x4_var_mmx(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line,
+ vp9_bilinear_filters_mmx[xoffset], vp9_bilinear_filters_mmx[yoffset],
+ &xsum, &xxsum
+ );
+ *sse = xxsum;
+ return (xxsum - (((unsigned int)xsum * xsum) >> 4));
+}
+
+
+unsigned int vp9_sub_pixel_variance8x8_wmt
+(
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse
+) {
+ int xsum;
+ unsigned int xxsum;
+
+ if (xoffset == HALFNDX && yoffset == 0) {
+ vp9_half_horiz_variance8x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 8,
+ &xsum, &xxsum);
+ } else if (xoffset == 0 && yoffset == HALFNDX) {
+ vp9_half_vert_variance8x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 8,
+ &xsum, &xxsum);
+ } else if (xoffset == HALFNDX && yoffset == HALFNDX) {
+ vp9_half_horiz_vert_variance8x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 8,
+ &xsum, &xxsum);
+ } else {
+ vp9_filter_block2d_bil_var_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 8,
+ xoffset, yoffset,
+ &xsum, &xxsum);
+ }
+
+ *sse = xxsum;
+ return (xxsum - (((unsigned int)xsum * xsum) >> 6));
+}
+
+unsigned int vp9_sub_pixel_variance16x16_wmt
+(
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse
+) {
+ int xsum0, xsum1;
+ unsigned int xxsum0, xxsum1;
+
+
+ // note we could avoid these if statements if the calling function
+ // just called the appropriate functions inside.
+ if (xoffset == HALFNDX && yoffset == 0) {
+ vp9_half_horiz_variance16x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 16,
+ &xsum0, &xxsum0);
+ } else if (xoffset == 0 && yoffset == HALFNDX) {
+ vp9_half_vert_variance16x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 16,
+ &xsum0, &xxsum0);
+ } else if (xoffset == HALFNDX && yoffset == HALFNDX) {
+ vp9_half_horiz_vert_variance16x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 16,
+ &xsum0, &xxsum0);
+ } else {
+ vp9_filter_block2d_bil_var_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 16,
+ xoffset, yoffset,
+ &xsum0, &xxsum0
+ );
+
+ vp9_filter_block2d_bil_var_sse2(
+ src_ptr + 8, src_pixels_per_line,
+ dst_ptr + 8, dst_pixels_per_line, 16,
+ xoffset, yoffset,
+ &xsum1, &xxsum1
+ );
+ xsum0 += xsum1;
+ xxsum0 += xxsum1;
+ }
+
+ *sse = xxsum0;
+ return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
+}
+
+unsigned int vp9_sub_pixel_mse16x16_wmt(
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse
+) {
+ vp9_sub_pixel_variance16x16_wmt(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
+ return *sse;
+}
+
+unsigned int vp9_sub_pixel_variance16x8_wmt
+(
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse
+
+) {
+ int xsum0, xsum1;
+ unsigned int xxsum0, xxsum1;
+
+ if (xoffset == HALFNDX && yoffset == 0) {
+ vp9_half_horiz_variance16x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 8,
+ &xsum0, &xxsum0);
+ } else if (xoffset == 0 && yoffset == HALFNDX) {
+ vp9_half_vert_variance16x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 8,
+ &xsum0, &xxsum0);
+ } else if (xoffset == HALFNDX && yoffset == HALFNDX) {
+ vp9_half_horiz_vert_variance16x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 8,
+ &xsum0, &xxsum0);
+ } else {
+ vp9_filter_block2d_bil_var_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 8,
+ xoffset, yoffset,
+ &xsum0, &xxsum0);
+
+ vp9_filter_block2d_bil_var_sse2(
+ src_ptr + 8, src_pixels_per_line,
+ dst_ptr + 8, dst_pixels_per_line, 8,
+ xoffset, yoffset,
+ &xsum1, &xxsum1);
+ xsum0 += xsum1;
+ xxsum0 += xxsum1;
+ }
+
+ *sse = xxsum0;
+ return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7));
+}
+
+unsigned int vp9_sub_pixel_variance8x16_wmt
+(
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse
+) {
+ int xsum;
+ unsigned int xxsum;
+
+ if (xoffset == HALFNDX && yoffset == 0) {
+ vp9_half_horiz_variance8x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 16,
+ &xsum, &xxsum);
+ } else if (xoffset == 0 && yoffset == HALFNDX) {
+ vp9_half_vert_variance8x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 16,
+ &xsum, &xxsum);
+ } else if (xoffset == HALFNDX && yoffset == HALFNDX) {
+ vp9_half_horiz_vert_variance8x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 16,
+ &xsum, &xxsum);
+ } else {
+ vp9_filter_block2d_bil_var_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 16,
+ xoffset, yoffset,
+ &xsum, &xxsum);
+ }
+
+ *sse = xxsum;
+ return (xxsum - (((unsigned int)xsum * xsum) >> 7));
+}
+
+
+unsigned int vp9_variance_halfpixvar16x16_h_wmt(
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse) {
+ int xsum0;
+ unsigned int xxsum0;
+
+ vp9_half_horiz_variance16x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 16,
+ &xsum0, &xxsum0);
+
+ *sse = xxsum0;
+ return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
+}
+
+
+unsigned int vp9_variance_halfpixvar16x16_v_wmt(
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse) {
+ int xsum0;
+ unsigned int xxsum0;
+ vp9_half_vert_variance16x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 16,
+ &xsum0, &xxsum0);
+
+ *sse = xxsum0;
+ return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
+}
+
+
+unsigned int vp9_variance_halfpixvar16x16_hv_wmt(
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse) {
+ int xsum0;
+ unsigned int xxsum0;
+
+ vp9_half_horiz_vert_variance16x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 16,
+ &xsum0, &xxsum0);
+
+ *sse = xxsum0;
+ return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
+}
diff --git a/vp9/encoder/x86/vp9_variance_ssse3.c b/vp9/encoder/x86/vp9_variance_ssse3.c
new file mode 100644
index 0000000..f95a542
--- /dev/null
+++ b/vp9/encoder/x86/vp9_variance_ssse3.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_config.h"
+#include "vp9/encoder/vp9_variance.h"
+#include "vp9/common/vp9_pragmas.h"
+#include "vpx_ports/mem.h"
+
+#define HALFNDX 8
+
+extern unsigned int vp9_get16x16var_sse2
+(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *SSE,
+ int *Sum
+);
+extern void vp9_half_horiz_vert_variance16x_h_sse2
+(
+ const unsigned char *ref_ptr,
+ int ref_pixels_per_line,
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ unsigned int Height,
+ int *sum,
+ unsigned int *sumsquared
+);
+extern void vp9_half_horiz_variance16x_h_sse2
+(
+ const unsigned char *ref_ptr,
+ int ref_pixels_per_line,
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ unsigned int Height,
+ int *sum,
+ unsigned int *sumsquared
+);
+extern void vp9_half_vert_variance16x_h_sse2
+(
+ const unsigned char *ref_ptr,
+ int ref_pixels_per_line,
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ unsigned int Height,
+ int *sum,
+ unsigned int *sumsquared
+);
+extern void vp9_filter_block2d_bil_var_ssse3
+(
+ const unsigned char *ref_ptr,
+ int ref_pixels_per_line,
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ unsigned int Height,
+ int xoffset,
+ int yoffset,
+ int *sum,
+ unsigned int *sumsquared
+);
+
+unsigned int vp9_sub_pixel_variance16x16_ssse3
+(
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse
+) {
+ int xsum0;
+ unsigned int xxsum0;
+
+ // note we could avoid these if statements if the calling function
+ // just called the appropriate functions inside.
+ if (xoffset == HALFNDX && yoffset == 0) {
+ vp9_half_horiz_variance16x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 16,
+ &xsum0, &xxsum0);
+ } else if (xoffset == 0 && yoffset == HALFNDX) {
+ vp9_half_vert_variance16x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 16,
+ &xsum0, &xxsum0);
+ } else if (xoffset == HALFNDX && yoffset == HALFNDX) {
+ vp9_half_horiz_vert_variance16x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 16,
+ &xsum0, &xxsum0);
+ } else {
+ vp9_filter_block2d_bil_var_ssse3(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 16,
+ xoffset, yoffset,
+ &xsum0, &xxsum0);
+ }
+
+ *sse = xxsum0;
+ return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
+}
+
+unsigned int vp9_sub_pixel_variance16x8_ssse3
+(
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse
+
+) {
+ int xsum0;
+ unsigned int xxsum0;
+
+ if (xoffset == HALFNDX && yoffset == 0) {
+ vp9_half_horiz_variance16x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 8,
+ &xsum0, &xxsum0);
+ } else if (xoffset == 0 && yoffset == HALFNDX) {
+ vp9_half_vert_variance16x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 8,
+ &xsum0, &xxsum0);
+ } else if (xoffset == HALFNDX && yoffset == HALFNDX) {
+ vp9_half_horiz_vert_variance16x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 8,
+ &xsum0, &xxsum0);
+ } else {
+ vp9_filter_block2d_bil_var_ssse3(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 8,
+ xoffset, yoffset,
+ &xsum0, &xxsum0);
+ }
+
+ *sse = xxsum0;
+ return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7));
+}
diff --git a/vp9/encoder/x86/vp9_x86_csystemdependent.c b/vp9/encoder/x86/vp9_x86_csystemdependent.c
new file mode 100644
index 0000000..3beef53
--- /dev/null
+++ b/vp9/encoder/x86/vp9_x86_csystemdependent.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "./vpx_config.h"
+#include "vpx_ports/x86.h"
+#include "vp9/encoder/vp9_variance.h"
+#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/x86/vp9_dct_mmx.h"
+
+// TODO(jimbankoski) Consider rewriting the c to take the same values rather
+// than going through these pointer conversions
+#if HAVE_MMX
+void vp9_short_fdct8x4_mmx(short *input, short *output, int pitch) {
+ vp9_short_fdct4x4_mmx(input, output, pitch);
+ vp9_short_fdct4x4_mmx(input + 4, output + 16, pitch);
+}
+
+int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
+int vp9_mbblock_error_mmx(MACROBLOCK *mb, int dc) {
+ short *coeff_ptr = mb->block[0].coeff;
+ short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff;
+ return vp9_mbblock_error_mmx_impl(coeff_ptr, dcoef_ptr, dc);
+}
+
+int vp9_mbuverror_mmx_impl(short *s_ptr, short *d_ptr);
+int vp9_mbuverror_mmx(MACROBLOCK *mb) {
+ short *s_ptr = &mb->coeff[256];
+ short *d_ptr = &mb->e_mbd.dqcoeff[256];
+ return vp9_mbuverror_mmx_impl(s_ptr, d_ptr);
+}
+
+void vp9_subtract_b_mmx_impl(unsigned char *z, int src_stride,
+ short *diff, unsigned char *predictor,
+ int pitch);
+void vp9_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) {
+ unsigned char *z = *(be->base_src) + be->src;
+ unsigned int src_stride = be->src_stride;
+ short *diff = &be->src_diff[0];
+ unsigned char *predictor = &bd->predictor[0];
+ vp9_subtract_b_mmx_impl(z, src_stride, diff, predictor, pitch);
+}
+
+#endif
+
+#if HAVE_SSE2
+int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
+int vp9_mbblock_error_xmm(MACROBLOCK *mb, int dc) {
+ short *coeff_ptr = mb->block[0].coeff;
+ short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff;
+ return vp9_mbblock_error_xmm_impl(coeff_ptr, dcoef_ptr, dc);
+}
+
+int vp9_mbuverror_xmm_impl(short *s_ptr, short *d_ptr);
+int vp9_mbuverror_xmm(MACROBLOCK *mb) {
+ short *s_ptr = &mb->coeff[256];
+ short *d_ptr = &mb->e_mbd.dqcoeff[256];
+ return vp9_mbuverror_xmm_impl(s_ptr, d_ptr);
+}
+
+void vp9_subtract_b_sse2_impl(unsigned char *z, int src_stride,
+ short *diff, unsigned char *predictor,
+ int pitch);
+void vp9_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch) {
+ unsigned char *z = *(be->base_src) + be->src;
+ unsigned int src_stride = be->src_stride;
+ short *diff = &be->src_diff[0];
+ unsigned char *predictor = &bd->predictor[0];
+ vp9_subtract_b_sse2_impl(z, src_stride, diff, predictor, pitch);
+}
+
+#endif
diff --git a/vp9/exports_dec b/vp9/exports_dec
new file mode 100644
index 0000000..0a61fde
--- /dev/null
+++ b/vp9/exports_dec
@@ -0,0 +1,2 @@
+data vpx_codec_vp9_dx_algo
+text vpx_codec_vp9_dx
diff --git a/vp9/exports_enc b/vp9/exports_enc
new file mode 100644
index 0000000..25156e8
--- /dev/null
+++ b/vp9/exports_enc
@@ -0,0 +1,4 @@
+data vpx_codec_vp9_cx_algo
+text vpx_codec_vp9_cx
+data vpx_codec_vp9x_cx_algo
+text vpx_codec_vp9x_cx
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
new file mode 100644
index 0000000..67d38ea
--- /dev/null
+++ b/vp9/vp9_common.mk
@@ -0,0 +1,135 @@
+##
+## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+
+VP9_COMMON_SRCS-yes += vp9_common.mk
+VP9_COMMON_SRCS-yes += common/vp9_type_aliases.h
+VP9_COMMON_SRCS-yes += common/vp9_pragmas.h
+VP9_COMMON_SRCS-yes += common/vp9_ppflags.h
+VP9_COMMON_SRCS-yes += common/vp9_onyx.h
+VP9_COMMON_SRCS-yes += common/vp9_onyxd.h
+VP9_COMMON_SRCS-yes += common/vp9_alloccommon.c
+VP9_COMMON_SRCS-yes += common/vp9_asm_com_offsets.c
+VP9_COMMON_SRCS-yes += common/vp9_blockd.c
+VP9_COMMON_SRCS-yes += common/vp9_coefupdateprobs.h
+VP9_COMMON_SRCS-yes += common/vp9_debugmodes.c
+VP9_COMMON_SRCS-yes += common/vp9_default_coef_probs.h
+VP9_COMMON_SRCS-yes += common/vp9_entropy.c
+VP9_COMMON_SRCS-yes += common/vp9_entropymode.c
+VP9_COMMON_SRCS-yes += common/vp9_entropymv.c
+VP9_COMMON_SRCS-yes += common/vp9_extend.c
+VP9_COMMON_SRCS-yes += common/vp9_filter.c
+VP9_COMMON_SRCS-yes += common/vp9_filter.h
+VP9_COMMON_SRCS-yes += common/vp9_findnearmv.c
+VP9_COMMON_SRCS-yes += common/generic/vp9_systemdependent.c
+VP9_COMMON_SRCS-yes += common/vp9_idctllm.c
+VP9_COMMON_SRCS-yes += common/vp9_alloccommon.h
+VP9_COMMON_SRCS-yes += common/vp9_blockd.h
+VP9_COMMON_SRCS-yes += common/vp9_common.h
+VP9_COMMON_SRCS-yes += common/vp9_common_types.h
+VP9_COMMON_SRCS-yes += common/vp9_entropy.h
+VP9_COMMON_SRCS-yes += common/vp9_entropymode.h
+VP9_COMMON_SRCS-yes += common/vp9_entropymv.h
+VP9_COMMON_SRCS-yes += common/vp9_extend.h
+VP9_COMMON_SRCS-yes += common/vp9_findnearmv.h
+VP9_COMMON_SRCS-yes += common/vp9_header.h
+VP9_COMMON_SRCS-yes += common/vp9_invtrans.h
+VP9_COMMON_SRCS-yes += common/vp9_loopfilter.h
+VP9_COMMON_SRCS-yes += common/vp9_modecont.h
+VP9_COMMON_SRCS-yes += common/vp9_mv.h
+VP9_COMMON_SRCS-yes += common/vp9_onyxc_int.h
+VP9_COMMON_SRCS-yes += common/vp9_pred_common.h
+VP9_COMMON_SRCS-yes += common/vp9_pred_common.c
+VP9_COMMON_SRCS-yes += common/vp9_quant_common.h
+VP9_COMMON_SRCS-yes += common/vp9_reconinter.h
+VP9_COMMON_SRCS-yes += common/vp9_reconintra.h
+VP9_COMMON_SRCS-yes += common/vp9_reconintra4x4.h
+VP9_COMMON_SRCS-yes += common/vp9_rtcd.c
+VP9_COMMON_SRCS-yes += common/vp9_rtcd_defs.sh
+VP9_COMMON_SRCS-yes += common/vp9_sadmxn.h
+VP9_COMMON_SRCS-yes += common/vp9_subpelvar.h
+VP9_COMMON_SRCS-yes += common/vp9_seg_common.h
+VP9_COMMON_SRCS-yes += common/vp9_seg_common.c
+VP9_COMMON_SRCS-yes += common/vp9_setupintrarecon.h
+VP9_COMMON_SRCS-yes += common/vp9_subpixel.h
+VP9_COMMON_SRCS-yes += common/vp9_swapyv12buffer.h
+VP9_COMMON_SRCS-yes += common/vp9_systemdependent.h
+VP9_COMMON_SRCS-yes += common/vp9_textblit.h
+VP9_COMMON_SRCS-yes += common/vp9_treecoder.h
+VP9_COMMON_SRCS-yes += common/vp9_invtrans.c
+VP9_COMMON_SRCS-yes += common/vp9_loopfilter.c
+VP9_COMMON_SRCS-yes += common/vp9_loopfilter_filters.c
+VP9_COMMON_SRCS-yes += common/vp9_mbpitch.c
+VP9_COMMON_SRCS-yes += common/vp9_modecont.c
+VP9_COMMON_SRCS-yes += common/vp9_modecontext.c
+VP9_COMMON_SRCS-yes += common/vp9_mvref_common.c
+VP9_COMMON_SRCS-yes += common/vp9_mvref_common.h
+VP9_COMMON_SRCS-yes += common/vp9_quant_common.c
+VP9_COMMON_SRCS-yes += common/vp9_recon.c
+VP9_COMMON_SRCS-yes += common/vp9_reconinter.c
+VP9_COMMON_SRCS-yes += common/vp9_reconintra.c
+VP9_COMMON_SRCS-yes += common/vp9_reconintra4x4.c
+VP9_COMMON_SRCS-yes += common/vp9_setupintrarecon.c
+VP9_COMMON_SRCS-yes += common/vp9_swapyv12buffer.c
+VP9_COMMON_SRCS-$(CONFIG_POSTPROC_VISUALIZER) += common/vp9_textblit.c
+VP9_COMMON_SRCS-yes += common/vp9_treecoder.c
+VP9_COMMON_SRCS-$(CONFIG_IMPLICIT_SEGMENTATION) += common/vp9_implicit_segmentation.c
+
+VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_idct_x86.h
+VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_subpixel_x86.h
+VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_loopfilter_x86.h
+VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_postproc_x86.h
+VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_asm_stubs.c
+VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_loopfilter_x86.c
+VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/vp9_postproc.h
+VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/vp9_postproc.c
+VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_iwalsh_mmx.asm
+VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_recon_mmx.asm
+VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_subpixel_mmx.asm
+VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_loopfilter_mmx.asm
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idctllm_sse2.asm
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_recon_sse2.asm
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_recon_wrapper_sse2.c
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_sse2.asm
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_loopfilter_sse2.asm
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_iwalsh_sse2.asm
+VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_ssse3.asm
+VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_ssse3.asm
+ifeq ($(CONFIG_POSTPROC),yes)
+VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_postproc_mmx.asm
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_postproc_sse2.asm
+endif
+
+# common (c)
+ifeq ($(CONFIG_CSM),yes)
+VP9_COMMON_SRCS-yes += common/vp9_maskingmv.c
+VP9_COMMON_SRCS-$(HAVE_SSE3) += common/x86/vp9_mask_sse3.asm
+endif
+
+VP9_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/vp9_filter_sse4.c
+ifeq ($(HAVE_SSE4_1),yes)
+vp9/common/x86/vp9_filter_sse4.c.o: CFLAGS += -msse4
+vp9/common/x86/vp9_filter_sse4.c.d: CFLAGS += -msse4
+endif
+
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_filter_sse2.c
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_sadmxn_x86.c
+ifeq ($(HAVE_SSE2),yes)
+vp9/common/x86/vp9_filter_sse2.c.o: CFLAGS += -msse2
+vp9/common/x86/vp9_loopfilter_x86.c.o: CFLAGS += -msse2
+vp9/common/x86/vp9_sadmxn_x86.c.o: CFLAGS += -msse2
+vp9/common/x86/vp9_filter_sse2.c.d: CFLAGS += -msse2
+vp9/common/x86/vp9_loopfilter_x86.c.d: CFLAGS += -msse2
+vp9/common/x86/vp9_sadmxn_x86.c.d: CFLAGS += -msse2
+endif
+
+$(eval $(call asm_offsets_template,\
+ vp9_asm_com_offsets.asm, $(VP9_PREFIX)common/vp9_asm_com_offsets.c))
+
+$(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.sh))
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
new file mode 100644
index 0000000..1ef5ff1
--- /dev/null
+++ b/vp9/vp9_cx_iface.c
@@ -0,0 +1,1136 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx/vpx_codec.h"
+#include "vpx/internal/vpx_codec_internal.h"
+#include "vpx_version.h"
+#include "vp9/encoder/vp9_onyx_int.h"
+#include "vpx/vp8cx.h"
+#include "vp9/encoder/vp9_firstpass.h"
+#include "vp9/common/vp9_onyx.h"
+#include <stdlib.h>
+#include <string.h>
+
+struct vp8_extracfg {
+ struct vpx_codec_pkt_list *pkt_list;
+ int cpu_used; /** available cpu percentage in 1/16*/
+ unsigned int enable_auto_alt_ref; /** if encoder decides to uses alternate reference frame */
+ unsigned int noise_sensitivity;
+ unsigned int Sharpness;
+ unsigned int static_thresh;
+ unsigned int token_partitions;
+ unsigned int arnr_max_frames; /* alt_ref Noise Reduction Max Frame Count */
+ unsigned int arnr_strength; /* alt_ref Noise Reduction Strength */
+ unsigned int arnr_type; /* alt_ref filter type */
+ unsigned int experimental;
+ vp8e_tuning tuning;
+ unsigned int cq_level; /* constrained quality level */
+ unsigned int rc_max_intra_bitrate_pct;
+#if CONFIG_LOSSLESS
+ unsigned int lossless;
+#endif
+};
+
+struct extraconfig_map {
+ int usage;
+ struct vp8_extracfg cfg;
+};
+
+static const struct extraconfig_map extracfg_map[] = {
+ {
+ 0,
+ {
+ NULL,
+ 0, /* cpu_used */
+ 0, /* enable_auto_alt_ref */
+ 0, /* noise_sensitivity */
+ 0, /* Sharpness */
+ 0, /* static_thresh */
+ VP8_ONE_TOKENPARTITION, /* token_partitions */
+ 0, /* arnr_max_frames */
+ 3, /* arnr_strength */
+ 3, /* arnr_type*/
+ 0, /* experimental mode */
+ 0, /* tuning*/
+ 10, /* cq_level */
+ 0, /* rc_max_intra_bitrate_pct */
+#if CONFIG_LOSSLESS
+ 0, /* lossless */
+#endif
+ }
+ }
+};
+
+struct vpx_codec_alg_priv {
+ vpx_codec_priv_t base;
+ vpx_codec_enc_cfg_t cfg;
+ struct vp8_extracfg vp8_cfg;
+ VP9_CONFIG oxcf;
+ VP9_PTR cpi;
+ unsigned char *cx_data;
+ unsigned int cx_data_sz;
+ unsigned char *pending_cx_data;
+ unsigned int pending_cx_data_sz;
+ vpx_image_t preview_img;
+ unsigned int next_frame_flag;
+ vp8_postproc_cfg_t preview_ppcfg;
+ vpx_codec_pkt_list_decl(64) pkt_list; // changed to accomendate the maximum number of lagged frames allowed
+ unsigned int fixed_kf_cntr;
+};
+
+
+static vpx_codec_err_t
+update_error_state(vpx_codec_alg_priv_t *ctx,
+ const struct vpx_internal_error_info *error) {
+ vpx_codec_err_t res;
+
+ if ((res = error->error_code))
+ ctx->base.err_detail = error->has_detail
+ ? error->detail
+ : NULL;
+
+ return res;
+}
+
+
+#undef ERROR
+#define ERROR(str) do {\
+ ctx->base.err_detail = str;\
+ return VPX_CODEC_INVALID_PARAM;\
+ } while(0)
+
+#define RANGE_CHECK(p,memb,lo,hi) do {\
+ if(!(((p)->memb == lo || (p)->memb > (lo)) && (p)->memb <= hi)) \
+ ERROR(#memb " out of range ["#lo".."#hi"]");\
+ } while(0)
+
+#define RANGE_CHECK_HI(p,memb,hi) do {\
+ if(!((p)->memb <= (hi))) \
+ ERROR(#memb " out of range [.."#hi"]");\
+ } while(0)
+
+#define RANGE_CHECK_LO(p,memb,lo) do {\
+ if(!((p)->memb >= (lo))) \
+ ERROR(#memb " out of range ["#lo"..]");\
+ } while(0)
+
+#define RANGE_CHECK_BOOL(p,memb) do {\
+ if(!!((p)->memb) != (p)->memb) ERROR(#memb " expected boolean");\
+ } while(0)
+
+static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
+ const vpx_codec_enc_cfg_t *cfg,
+ const struct vp8_extracfg *vp8_cfg) {
+ RANGE_CHECK(cfg, g_w, 1, 16383); /* 14 bits available */
+ RANGE_CHECK(cfg, g_h, 1, 16383); /* 14 bits available */
+ RANGE_CHECK(cfg, g_timebase.den, 1, 1000000000);
+ RANGE_CHECK(cfg, g_timebase.num, 1, cfg->g_timebase.den);
+ RANGE_CHECK_HI(cfg, g_profile, 3);
+
+ RANGE_CHECK_HI(cfg, rc_max_quantizer, 63);
+ RANGE_CHECK_HI(cfg, rc_min_quantizer, cfg->rc_max_quantizer);
+#if CONFIG_LOSSLESS
+ RANGE_CHECK_BOOL(vp8_cfg, lossless);
+ if (vp8_cfg->lossless) {
+ RANGE_CHECK_HI(cfg, rc_max_quantizer, 0);
+ RANGE_CHECK_HI(cfg, rc_min_quantizer, 0);
+ }
+#endif
+
+ RANGE_CHECK_HI(cfg, g_threads, 64);
+ RANGE_CHECK_HI(cfg, g_lag_in_frames, MAX_LAG_BUFFERS);
+ RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_CQ);
+ RANGE_CHECK_HI(cfg, rc_undershoot_pct, 1000);
+ RANGE_CHECK_HI(cfg, rc_overshoot_pct, 1000);
+ RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100);
+ RANGE_CHECK(cfg, kf_mode, VPX_KF_DISABLED, VPX_KF_AUTO);
+ // RANGE_CHECK_BOOL(cfg, g_delete_firstpassfile);
+ RANGE_CHECK_BOOL(cfg, rc_resize_allowed);
+ RANGE_CHECK_HI(cfg, rc_dropframe_thresh, 100);
+ RANGE_CHECK_HI(cfg, rc_resize_up_thresh, 100);
+ RANGE_CHECK_HI(cfg, rc_resize_down_thresh, 100);
+ RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_LAST_PASS);
+
+ /* VP8 does not support a lower bound on the keyframe interval in
+ * automatic keyframe placement mode.
+ */
+ if (cfg->kf_mode != VPX_KF_DISABLED && cfg->kf_min_dist != cfg->kf_max_dist
+ && cfg->kf_min_dist > 0)
+ ERROR("kf_min_dist not supported in auto mode, use 0 "
+ "or kf_max_dist instead.");
+
+ RANGE_CHECK_BOOL(vp8_cfg, enable_auto_alt_ref);
+ RANGE_CHECK(vp8_cfg, cpu_used, -16, 16);
+
+ RANGE_CHECK_HI(vp8_cfg, noise_sensitivity, 6);
+
+ RANGE_CHECK(vp8_cfg, token_partitions, VP8_ONE_TOKENPARTITION, VP8_EIGHT_TOKENPARTITION);
+ RANGE_CHECK_HI(vp8_cfg, Sharpness, 7);
+ RANGE_CHECK(vp8_cfg, arnr_max_frames, 0, 15);
+ RANGE_CHECK_HI(vp8_cfg, arnr_strength, 6);
+ RANGE_CHECK(vp8_cfg, arnr_type, 1, 3);
+ RANGE_CHECK(vp8_cfg, cq_level, 0, 63);
+
+ if (cfg->g_pass == VPX_RC_LAST_PASS) {
+ size_t packet_sz = sizeof(FIRSTPASS_STATS);
+ int n_packets = (int)(cfg->rc_twopass_stats_in.sz / packet_sz);
+ FIRSTPASS_STATS *stats;
+
+ if (!cfg->rc_twopass_stats_in.buf)
+ ERROR("rc_twopass_stats_in.buf not set.");
+
+ if (cfg->rc_twopass_stats_in.sz % packet_sz)
+ ERROR("rc_twopass_stats_in.sz indicates truncated packet.");
+
+ if (cfg->rc_twopass_stats_in.sz < 2 * packet_sz)
+ ERROR("rc_twopass_stats_in requires at least two packets.");
+
+ stats = (void *)((char *)cfg->rc_twopass_stats_in.buf
+ + (n_packets - 1) * packet_sz);
+
+ if ((int)(stats->count + 0.5) != n_packets - 1)
+ ERROR("rc_twopass_stats_in missing EOS stats packet");
+ }
+
+ return VPX_CODEC_OK;
+}
+
+
+static vpx_codec_err_t validate_img(vpx_codec_alg_priv_t *ctx,
+ const vpx_image_t *img) {
+ switch (img->fmt) {
+ case VPX_IMG_FMT_YV12:
+ case VPX_IMG_FMT_I420:
+ case VPX_IMG_FMT_VPXI420:
+ case VPX_IMG_FMT_VPXYV12:
+ break;
+ default:
+ ERROR("Invalid image format. Only YV12 and I420 images are supported");
+ }
+
+ if ((img->d_w != ctx->cfg.g_w) || (img->d_h != ctx->cfg.g_h))
+ ERROR("Image size must match encoder init configuration size");
+
+ return VPX_CODEC_OK;
+}
+
+
+static vpx_codec_err_t set_vp8e_config(VP9_CONFIG *oxcf,
+ vpx_codec_enc_cfg_t cfg,
+ struct vp8_extracfg vp8_cfg) {
+ oxcf->Version = cfg.g_profile;
+ oxcf->Version |= vp8_cfg.experimental ? 0x4 : 0;
+
+ oxcf->Width = cfg.g_w;
+ oxcf->Height = cfg.g_h;
+ /* guess a frame rate if out of whack, use 30 */
+ oxcf->frame_rate = (double)(cfg.g_timebase.den) / (double)(cfg.g_timebase.num);
+
+ if (oxcf->frame_rate > 180) {
+ oxcf->frame_rate = 30;
+ }
+
+ switch (cfg.g_pass) {
+ case VPX_RC_ONE_PASS:
+ oxcf->Mode = MODE_BESTQUALITY;
+ break;
+ case VPX_RC_FIRST_PASS:
+ oxcf->Mode = MODE_FIRSTPASS;
+ break;
+ case VPX_RC_LAST_PASS:
+ oxcf->Mode = MODE_SECONDPASS_BEST;
+ break;
+ }
+
+ if (cfg.g_pass == VPX_RC_FIRST_PASS) {
+ oxcf->allow_lag = 0;
+ oxcf->lag_in_frames = 0;
+ } else {
+ oxcf->allow_lag = (cfg.g_lag_in_frames) > 0;
+ oxcf->lag_in_frames = cfg.g_lag_in_frames;
+ }
+
+ // VBR only supported for now.
+ // CBR code has been deprectated for experimental phase.
+ // CQ mode not yet tested
+ oxcf->end_usage = USAGE_LOCAL_FILE_PLAYBACK;
+ /*if (cfg.rc_end_usage == VPX_CQ)
+ oxcf->end_usage = USAGE_CONSTRAINED_QUALITY;
+ else
+ oxcf->end_usage = USAGE_LOCAL_FILE_PLAYBACK;*/
+
+ oxcf->target_bandwidth = cfg.rc_target_bitrate;
+ oxcf->rc_max_intra_bitrate_pct = vp8_cfg.rc_max_intra_bitrate_pct;
+
+ oxcf->best_allowed_q = cfg.rc_min_quantizer;
+ oxcf->worst_allowed_q = cfg.rc_max_quantizer;
+ oxcf->cq_level = vp8_cfg.cq_level;
+ oxcf->fixed_q = -1;
+
+ oxcf->under_shoot_pct = cfg.rc_undershoot_pct;
+ oxcf->over_shoot_pct = cfg.rc_overshoot_pct;
+
+ oxcf->maximum_buffer_size = cfg.rc_buf_sz;
+ oxcf->starting_buffer_level = cfg.rc_buf_initial_sz;
+ oxcf->optimal_buffer_level = cfg.rc_buf_optimal_sz;
+
+ oxcf->two_pass_vbrbias = cfg.rc_2pass_vbr_bias_pct;
+ oxcf->two_pass_vbrmin_section = cfg.rc_2pass_vbr_minsection_pct;
+ oxcf->two_pass_vbrmax_section = cfg.rc_2pass_vbr_maxsection_pct;
+
+ oxcf->auto_key = cfg.kf_mode == VPX_KF_AUTO
+ && cfg.kf_min_dist != cfg.kf_max_dist;
+ // oxcf->kf_min_dist = cfg.kf_min_dis;
+ oxcf->key_freq = cfg.kf_max_dist;
+
+ // oxcf->delete_first_pass_file = cfg.g_delete_firstpassfile;
+ // strcpy(oxcf->first_pass_file, cfg.g_firstpass_file);
+
+ oxcf->cpu_used = vp8_cfg.cpu_used;
+ oxcf->encode_breakout = vp8_cfg.static_thresh;
+ oxcf->play_alternate = vp8_cfg.enable_auto_alt_ref;
+ oxcf->noise_sensitivity = vp8_cfg.noise_sensitivity;
+ oxcf->Sharpness = vp8_cfg.Sharpness;
+
+ oxcf->two_pass_stats_in = cfg.rc_twopass_stats_in;
+ oxcf->output_pkt_list = vp8_cfg.pkt_list;
+
+ oxcf->arnr_max_frames = vp8_cfg.arnr_max_frames;
+ oxcf->arnr_strength = vp8_cfg.arnr_strength;
+ oxcf->arnr_type = vp8_cfg.arnr_type;
+
+ oxcf->tuning = vp8_cfg.tuning;
+
+#if CONFIG_LOSSLESS
+ oxcf->lossless = vp8_cfg.lossless;
+#endif
+
+ /*
+ printf("Current VP8 Settings: \n");
+ printf("target_bandwidth: %d\n", oxcf->target_bandwidth);
+ printf("noise_sensitivity: %d\n", oxcf->noise_sensitivity);
+ printf("Sharpness: %d\n", oxcf->Sharpness);
+ printf("cpu_used: %d\n", oxcf->cpu_used);
+ printf("Mode: %d\n", oxcf->Mode);
+ printf("delete_first_pass_file: %d\n", oxcf->delete_first_pass_file);
+ printf("auto_key: %d\n", oxcf->auto_key);
+ printf("key_freq: %d\n", oxcf->key_freq);
+ printf("end_usage: %d\n", oxcf->end_usage);
+ printf("under_shoot_pct: %d\n", oxcf->under_shoot_pct);
+ printf("over_shoot_pct: %d\n", oxcf->over_shoot_pct);
+ printf("starting_buffer_level: %d\n", oxcf->starting_buffer_level);
+ printf("optimal_buffer_level: %d\n", oxcf->optimal_buffer_level);
+ printf("maximum_buffer_size: %d\n", oxcf->maximum_buffer_size);
+ printf("fixed_q: %d\n", oxcf->fixed_q);
+ printf("worst_allowed_q: %d\n", oxcf->worst_allowed_q);
+ printf("best_allowed_q: %d\n", oxcf->best_allowed_q);
+ printf("two_pass_vbrbias: %d\n", oxcf->two_pass_vbrbias);
+ printf("two_pass_vbrmin_section: %d\n", oxcf->two_pass_vbrmin_section);
+ printf("two_pass_vbrmax_section: %d\n", oxcf->two_pass_vbrmax_section);
+ printf("allow_lag: %d\n", oxcf->allow_lag);
+ printf("lag_in_frames: %d\n", oxcf->lag_in_frames);
+ printf("play_alternate: %d\n", oxcf->play_alternate);
+ printf("Version: %d\n", oxcf->Version);
+ printf("encode_breakout: %d\n", oxcf->encode_breakout);
+ */
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t vp8e_set_config(vpx_codec_alg_priv_t *ctx,
+ const vpx_codec_enc_cfg_t *cfg) {
+ vpx_codec_err_t res;
+
+ if ((cfg->g_w != ctx->cfg.g_w) || (cfg->g_h != ctx->cfg.g_h))
+ ERROR("Cannot change width or height after initialization");
+
+ /* Prevent increasing lag_in_frames. This check is stricter than it needs
+ * to be -- the limit is not increasing past the first lag_in_frames
+ * value, but we don't track the initial config, only the last successful
+ * config.
+ */
+ if ((cfg->g_lag_in_frames > ctx->cfg.g_lag_in_frames))
+ ERROR("Cannot increase lag_in_frames");
+
+ res = validate_config(ctx, cfg, &ctx->vp8_cfg);
+
+ if (!res) {
+ ctx->cfg = *cfg;
+ set_vp8e_config(&ctx->oxcf, ctx->cfg, ctx->vp8_cfg);
+ vp9_change_config(ctx->cpi, &ctx->oxcf);
+ }
+
+ return res;
+}
+
+
+int vp9_reverse_trans(int q);
+
+
+static vpx_codec_err_t get_param(vpx_codec_alg_priv_t *ctx,
+ int ctrl_id,
+ va_list args) {
+ void *arg = va_arg(args, void *);
+
+#define MAP(id, var) case id: *(RECAST(id, arg)) = var; break
+
+ if (!arg)
+ return VPX_CODEC_INVALID_PARAM;
+
+ switch (ctrl_id) {
+ MAP(VP8E_GET_LAST_QUANTIZER, vp9_get_quantizer(ctx->cpi));
+ MAP(VP8E_GET_LAST_QUANTIZER_64,
+ vp9_reverse_trans(vp9_get_quantizer(ctx->cpi)));
+ }
+
+ return VPX_CODEC_OK;
+#undef MAP
+}
+
+
+static vpx_codec_err_t set_param(vpx_codec_alg_priv_t *ctx,
+ int ctrl_id,
+ va_list args) {
+ vpx_codec_err_t res = VPX_CODEC_OK;
+ struct vp8_extracfg xcfg = ctx->vp8_cfg;
+
+#define MAP(id, var) case id: var = CAST(id, args); break;
+
+ switch (ctrl_id) {
+ MAP(VP8E_SET_CPUUSED, xcfg.cpu_used);
+ MAP(VP8E_SET_ENABLEAUTOALTREF, xcfg.enable_auto_alt_ref);
+ MAP(VP8E_SET_NOISE_SENSITIVITY, xcfg.noise_sensitivity);
+ MAP(VP8E_SET_SHARPNESS, xcfg.Sharpness);
+ MAP(VP8E_SET_STATIC_THRESHOLD, xcfg.static_thresh);
+ MAP(VP8E_SET_TOKEN_PARTITIONS, xcfg.token_partitions);
+
+ MAP(VP8E_SET_ARNR_MAXFRAMES, xcfg.arnr_max_frames);
+ MAP(VP8E_SET_ARNR_STRENGTH, xcfg.arnr_strength);
+ MAP(VP8E_SET_ARNR_TYPE, xcfg.arnr_type);
+ MAP(VP8E_SET_TUNING, xcfg.tuning);
+ MAP(VP8E_SET_CQ_LEVEL, xcfg.cq_level);
+ MAP(VP8E_SET_MAX_INTRA_BITRATE_PCT, xcfg.rc_max_intra_bitrate_pct);
+#if CONFIG_LOSSLESS
+ MAP(VP9E_SET_LOSSLESS, xcfg.lossless);
+#endif
+ }
+
+ res = validate_config(ctx, &ctx->cfg, &xcfg);
+
+ if (!res) {
+ ctx->vp8_cfg = xcfg;
+ set_vp8e_config(&ctx->oxcf, ctx->cfg, ctx->vp8_cfg);
+ vp9_change_config(ctx->cpi, &ctx->oxcf);
+ }
+
+ return res;
+#undef MAP
+}
+
+
+static vpx_codec_err_t vp8e_common_init(vpx_codec_ctx_t *ctx,
+ int experimental) {
+ vpx_codec_err_t res = VPX_CODEC_OK;
+ struct vpx_codec_alg_priv *priv;
+ vpx_codec_enc_cfg_t *cfg;
+ unsigned int i;
+
+ VP9_PTR optr;
+
+ if (!ctx->priv) {
+ priv = calloc(1, sizeof(struct vpx_codec_alg_priv));
+
+ if (!priv) {
+ return VPX_CODEC_MEM_ERROR;
+ }
+
+ ctx->priv = &priv->base;
+ ctx->priv->sz = sizeof(*ctx->priv);
+ ctx->priv->iface = ctx->iface;
+ ctx->priv->alg_priv = priv;
+ ctx->priv->init_flags = ctx->init_flags;
+ ctx->priv->enc.total_encoders = 1;
+
+ if (ctx->config.enc) {
+ /* Update the reference to the config structure to an
+ * internal copy.
+ */
+ ctx->priv->alg_priv->cfg = *ctx->config.enc;
+ ctx->config.enc = &ctx->priv->alg_priv->cfg;
+ }
+
+ cfg = &ctx->priv->alg_priv->cfg;
+
+ /* Select the extra vp6 configuration table based on the current
+ * usage value. If the current usage value isn't found, use the
+ * values for usage case 0.
+ */
+ for (i = 0;
+ extracfg_map[i].usage && extracfg_map[i].usage != cfg->g_usage;
+ i++);
+
+ priv->vp8_cfg = extracfg_map[i].cfg;
+ priv->vp8_cfg.pkt_list = &priv->pkt_list.head;
+ priv->vp8_cfg.experimental = experimental;
+
+ priv->cx_data_sz = priv->cfg.g_w * priv->cfg.g_h * 3 / 2 * 2;
+
+ if (priv->cx_data_sz < 4096) priv->cx_data_sz = 4096;
+
+ priv->cx_data = malloc(priv->cx_data_sz);
+
+ if (!priv->cx_data) {
+ return VPX_CODEC_MEM_ERROR;
+ }
+
+ vp9_initialize_enc();
+
+ res = validate_config(priv, &priv->cfg, &priv->vp8_cfg);
+
+ if (!res) {
+ set_vp8e_config(&ctx->priv->alg_priv->oxcf,
+ ctx->priv->alg_priv->cfg,
+ ctx->priv->alg_priv->vp8_cfg);
+ optr = vp9_create_compressor(&ctx->priv->alg_priv->oxcf);
+
+ if (!optr)
+ res = VPX_CODEC_MEM_ERROR;
+ else
+ ctx->priv->alg_priv->cpi = optr;
+ }
+ }
+
+ return res;
+}
+
+
+static vpx_codec_err_t vp8e_init(vpx_codec_ctx_t *ctx,
+ vpx_codec_priv_enc_mr_cfg_t *data) {
+ return vp8e_common_init(ctx, 0);
+}
+
+
+#if CONFIG_EXPERIMENTAL
+static vpx_codec_err_t vp8e_exp_init(vpx_codec_ctx_t *ctx,
+ vpx_codec_priv_enc_mr_cfg_t *data) {
+ return vp8e_common_init(ctx, 1);
+}
+#endif
+
+
+static vpx_codec_err_t vp8e_destroy(vpx_codec_alg_priv_t *ctx) {
+
+ free(ctx->cx_data);
+ vp9_remove_compressor(&ctx->cpi);
+ free(ctx);
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img,
+ YV12_BUFFER_CONFIG *yv12) {
+ vpx_codec_err_t res = VPX_CODEC_OK;
+ yv12->y_buffer = img->planes[VPX_PLANE_Y];
+ yv12->u_buffer = img->planes[VPX_PLANE_U];
+ yv12->v_buffer = img->planes[VPX_PLANE_V];
+
+ yv12->y_width = img->d_w;
+ yv12->y_height = img->d_h;
+ yv12->uv_width = (1 + yv12->y_width) / 2;
+ yv12->uv_height = (1 + yv12->y_height) / 2;
+
+ yv12->y_stride = img->stride[VPX_PLANE_Y];
+ yv12->uv_stride = img->stride[VPX_PLANE_U];
+
+ yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2;
+ yv12->clrtype = (img->fmt == VPX_IMG_FMT_VPXI420 || img->fmt == VPX_IMG_FMT_VPXYV12); // REG_YUV = 0
+ return res;
+}
+
+static void pick_quickcompress_mode(vpx_codec_alg_priv_t *ctx,
+ unsigned long duration,
+ unsigned long deadline) {
+ unsigned int new_qc;
+
+ /* Use best quality mode if no deadline is given. */
+ if (deadline)
+ new_qc = MODE_GOODQUALITY;
+ else
+ new_qc = MODE_BESTQUALITY;
+
+ if (ctx->cfg.g_pass == VPX_RC_FIRST_PASS)
+ new_qc = MODE_FIRSTPASS;
+ else if (ctx->cfg.g_pass == VPX_RC_LAST_PASS)
+ new_qc = (new_qc == MODE_BESTQUALITY)
+ ? MODE_SECONDPASS_BEST
+ : MODE_SECONDPASS;
+
+ if (ctx->oxcf.Mode != new_qc) {
+ ctx->oxcf.Mode = new_qc;
+ vp9_change_config(ctx->cpi, &ctx->oxcf);
+ }
+}
+
+
+static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
+ const vpx_image_t *img,
+ vpx_codec_pts_t pts,
+ unsigned long duration,
+ vpx_enc_frame_flags_t flags,
+ unsigned long deadline) {
+ vpx_codec_err_t res = VPX_CODEC_OK;
+
+ if (img)
+ res = validate_img(ctx, img);
+
+ pick_quickcompress_mode(ctx, duration, deadline);
+ vpx_codec_pkt_list_init(&ctx->pkt_list);
+
+ /* Handle Flags */
+ if (((flags & VP8_EFLAG_NO_UPD_GF) && (flags & VP8_EFLAG_FORCE_GF))
+ || ((flags & VP8_EFLAG_NO_UPD_ARF) && (flags & VP8_EFLAG_FORCE_ARF))) {
+ ctx->base.err_detail = "Conflicting flags.";
+ return VPX_CODEC_INVALID_PARAM;
+ }
+
+ if (flags & (VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF
+ | VP8_EFLAG_NO_REF_ARF)) {
+ int ref = 7;
+
+ if (flags & VP8_EFLAG_NO_REF_LAST)
+ ref ^= VP9_LAST_FLAG;
+
+ if (flags & VP8_EFLAG_NO_REF_GF)
+ ref ^= VP9_GOLD_FLAG;
+
+ if (flags & VP8_EFLAG_NO_REF_ARF)
+ ref ^= VP9_ALT_FLAG;
+
+ vp9_use_as_reference(ctx->cpi, ref);
+ }
+
+ if (flags & (VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF
+ | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_FORCE_GF
+ | VP8_EFLAG_FORCE_ARF)) {
+ int upd = 7;
+
+ if (flags & VP8_EFLAG_NO_UPD_LAST)
+ upd ^= VP9_LAST_FLAG;
+
+ if (flags & VP8_EFLAG_NO_UPD_GF)
+ upd ^= VP9_GOLD_FLAG;
+
+ if (flags & VP8_EFLAG_NO_UPD_ARF)
+ upd ^= VP9_ALT_FLAG;
+
+ vp9_update_reference(ctx->cpi, upd);
+ }
+
+ if (flags & VP8_EFLAG_NO_UPD_ENTROPY) {
+ vp9_update_entropy(ctx->cpi, 0);
+ }
+
+ /* Handle fixed keyframe intervals */
+ if (ctx->cfg.kf_mode == VPX_KF_AUTO
+ && ctx->cfg.kf_min_dist == ctx->cfg.kf_max_dist) {
+ if (++ctx->fixed_kf_cntr > ctx->cfg.kf_min_dist) {
+ flags |= VPX_EFLAG_FORCE_KF;
+ ctx->fixed_kf_cntr = 1;
+ }
+ }
+
+ /* Initialize the encoder instance on the first frame*/
+ if (!res && ctx->cpi) {
+ unsigned int lib_flags;
+ YV12_BUFFER_CONFIG sd;
+ int64_t dst_time_stamp, dst_end_time_stamp;
+ unsigned long size, cx_data_sz;
+ unsigned char *cx_data;
+
+ /* Set up internal flags */
+ if (ctx->base.init_flags & VPX_CODEC_USE_PSNR)
+ ((VP9_COMP *)ctx->cpi)->b_calculate_psnr = 1;
+
+ // if (ctx->base.init_flags & VPX_CODEC_USE_OUTPUT_PARTITION)
+ // ((VP9_COMP *)ctx->cpi)->output_partition = 1;
+
+ /* Convert API flags to internal codec lib flags */
+ lib_flags = (flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0;
+
+ /* vp8 use 10,000,000 ticks/second as time stamp */
+ dst_time_stamp = pts * 10000000 * ctx->cfg.g_timebase.num / ctx->cfg.g_timebase.den;
+ dst_end_time_stamp = (pts + duration) * 10000000 * ctx->cfg.g_timebase.num / ctx->cfg.g_timebase.den;
+
+ if (img != NULL) {
+ res = image2yuvconfig(img, &sd);
+
+ if (vp9_receive_raw_frame(ctx->cpi, ctx->next_frame_flag | lib_flags,
+ &sd, dst_time_stamp, dst_end_time_stamp)) {
+ VP9_COMP *cpi = (VP9_COMP *)ctx->cpi;
+ res = update_error_state(ctx, &cpi->common.error);
+ }
+
+ /* reset for next frame */
+ ctx->next_frame_flag = 0;
+ }
+
+ cx_data = ctx->cx_data;
+ cx_data_sz = ctx->cx_data_sz;
+ lib_flags = 0;
+
+ /* Any pending invisible frames? */
+ if (ctx->pending_cx_data) {
+ memmove(cx_data, ctx->pending_cx_data, ctx->pending_cx_data_sz);
+ ctx->pending_cx_data = cx_data;
+ cx_data += ctx->pending_cx_data_sz;
+ cx_data_sz -= ctx->pending_cx_data_sz;
+
+ /* TODO: this is a minimal check, the underlying codec doesn't respect
+ * the buffer size anyway.
+ */
+ if (cx_data_sz < ctx->cx_data_sz / 2) {
+ ctx->base.err_detail = "Compressed data buffer too small";
+ return VPX_CODEC_ERROR;
+ }
+ }
+
+ while (cx_data_sz >= ctx->cx_data_sz / 2 &&
+ -1 != vp9_get_compressed_data(ctx->cpi, &lib_flags, &size,
+ cx_data, &dst_time_stamp,
+ &dst_end_time_stamp, !img)) {
+ if (size) {
+ vpx_codec_pts_t round, delta;
+ vpx_codec_cx_pkt_t pkt;
+ VP9_COMP *cpi = (VP9_COMP *)ctx->cpi;
+
+ /* Pack invisible frames with the next visisble frame */
+ if (!cpi->common.show_frame) {
+ if (!ctx->pending_cx_data)
+ ctx->pending_cx_data = cx_data;
+ ctx->pending_cx_data_sz += size;
+ cx_data += size;
+ cx_data_sz -= size;
+ continue;
+ }
+
+ /* Add the frame packet to the list of returned packets. */
+ round = 1000000 * ctx->cfg.g_timebase.num / 2 - 1;
+ delta = (dst_end_time_stamp - dst_time_stamp);
+ pkt.kind = VPX_CODEC_CX_FRAME_PKT;
+ pkt.data.frame.pts =
+ (dst_time_stamp * ctx->cfg.g_timebase.den + round)
+ / ctx->cfg.g_timebase.num / 10000000;
+ pkt.data.frame.duration = (unsigned long)
+ ((delta * ctx->cfg.g_timebase.den + round)
+ / ctx->cfg.g_timebase.num / 10000000);
+ pkt.data.frame.flags = lib_flags << 16;
+
+ if (lib_flags & FRAMEFLAGS_KEY)
+ pkt.data.frame.flags |= VPX_FRAME_IS_KEY;
+
+ if (!cpi->common.show_frame) {
+ pkt.data.frame.flags |= VPX_FRAME_IS_INVISIBLE;
+
+ // This timestamp should be as close as possible to the
+ // prior PTS so that if a decoder uses pts to schedule when
+ // to do this, we start right after last frame was decoded.
+ // Invisible frames have no duration.
+ pkt.data.frame.pts = ((cpi->last_time_stamp_seen
+ * ctx->cfg.g_timebase.den + round)
+ / ctx->cfg.g_timebase.num / 10000000) + 1;
+ pkt.data.frame.duration = 0;
+ }
+
+ if (cpi->droppable)
+ pkt.data.frame.flags |= VPX_FRAME_IS_DROPPABLE;
+
+ /*if (cpi->output_partition)
+ {
+ int i;
+ const int num_partitions = 1;
+
+ pkt.data.frame.flags |= VPX_FRAME_IS_FRAGMENT;
+
+ for (i = 0; i < num_partitions; ++i)
+ {
+ pkt.data.frame.buf = cx_data;
+ pkt.data.frame.sz = cpi->partition_sz[i];
+ pkt.data.frame.partition_id = i;
+ // don't set the fragment bit for the last partition
+ if (i == (num_partitions - 1))
+ pkt.data.frame.flags &= ~VPX_FRAME_IS_FRAGMENT;
+ vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
+ cx_data += cpi->partition_sz[i];
+ cx_data_sz -= cpi->partition_sz[i];
+ }
+ }
+ else*/
+ {
+ if (ctx->pending_cx_data) {
+ pkt.data.frame.buf = ctx->pending_cx_data;
+ pkt.data.frame.sz = ctx->pending_cx_data_sz + size;
+ ctx->pending_cx_data = NULL;
+ ctx->pending_cx_data_sz = 0;
+ } else {
+ pkt.data.frame.buf = cx_data;
+ pkt.data.frame.sz = size;
+ }
+ pkt.data.frame.partition_id = -1;
+ vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
+ cx_data += size;
+ cx_data_sz -= size;
+ }
+
+ // printf("timestamp: %lld, duration: %d\n", pkt->data.frame.pts, pkt->data.frame.duration);
+ }
+ }
+ }
+
+ return res;
+}
+
+
+static const vpx_codec_cx_pkt_t *vp8e_get_cxdata(vpx_codec_alg_priv_t *ctx,
+ vpx_codec_iter_t *iter) {
+ return vpx_codec_pkt_list_get(&ctx->pkt_list.head, iter);
+}
+
+static vpx_codec_err_t vp8e_set_reference(vpx_codec_alg_priv_t *ctx,
+ int ctr_id,
+ va_list args) {
+ vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *);
+
+ if (data) {
+ vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data;
+ YV12_BUFFER_CONFIG sd;
+
+ image2yuvconfig(&frame->img, &sd);
+ vp9_set_reference_enc(ctx->cpi, frame->frame_type, &sd);
+ return VPX_CODEC_OK;
+ } else
+ return VPX_CODEC_INVALID_PARAM;
+
+}
+
+static vpx_codec_err_t vp8e_get_reference(vpx_codec_alg_priv_t *ctx,
+ int ctr_id,
+ va_list args) {
+
+ vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *);
+
+ if (data) {
+ vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data;
+ YV12_BUFFER_CONFIG sd;
+
+ image2yuvconfig(&frame->img, &sd);
+ vp9_get_reference_enc(ctx->cpi, frame->frame_type, &sd);
+ return VPX_CODEC_OK;
+ } else
+ return VPX_CODEC_INVALID_PARAM;
+}
+
+static vpx_codec_err_t vp8e_set_previewpp(vpx_codec_alg_priv_t *ctx,
+ int ctr_id,
+ va_list args) {
+#if CONFIG_POSTPROC
+ vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *);
+ (void)ctr_id;
+
+ if (data) {
+ ctx->preview_ppcfg = *((vp8_postproc_cfg_t *)data);
+ return VPX_CODEC_OK;
+ } else
+ return VPX_CODEC_INVALID_PARAM;
+#else
+ (void)ctx;
+ (void)ctr_id;
+ (void)args;
+ return VPX_CODEC_INCAPABLE;
+#endif
+}
+
+
+static vpx_image_t *vp8e_get_preview(vpx_codec_alg_priv_t *ctx) {
+
+ YV12_BUFFER_CONFIG sd;
+ vp9_ppflags_t flags = {0};
+
+ if (ctx->preview_ppcfg.post_proc_flag) {
+ flags.post_proc_flag = ctx->preview_ppcfg.post_proc_flag;
+ flags.deblocking_level = ctx->preview_ppcfg.deblocking_level;
+ flags.noise_level = ctx->preview_ppcfg.noise_level;
+ }
+
+ if (0 == vp9_get_preview_raw_frame(ctx->cpi, &sd, &flags)) {
+
+ /*
+ vpx_img_wrap(&ctx->preview_img, VPX_IMG_FMT_YV12,
+ sd.y_width + 2*VP9BORDERINPIXELS,
+ sd.y_height + 2*VP9BORDERINPIXELS,
+ 1,
+ sd.buffer_alloc);
+ vpx_img_set_rect(&ctx->preview_img,
+ VP9BORDERINPIXELS, VP9BORDERINPIXELS,
+ sd.y_width, sd.y_height);
+ */
+
+ ctx->preview_img.bps = 12;
+ ctx->preview_img.planes[VPX_PLANE_Y] = sd.y_buffer;
+ ctx->preview_img.planes[VPX_PLANE_U] = sd.u_buffer;
+ ctx->preview_img.planes[VPX_PLANE_V] = sd.v_buffer;
+
+ if (sd.clrtype == REG_YUV)
+ ctx->preview_img.fmt = VPX_IMG_FMT_I420;
+ else
+ ctx->preview_img.fmt = VPX_IMG_FMT_VPXI420;
+
+ ctx->preview_img.x_chroma_shift = 1;
+ ctx->preview_img.y_chroma_shift = 1;
+
+ ctx->preview_img.d_w = sd.y_width;
+ ctx->preview_img.d_h = sd.y_height;
+ ctx->preview_img.stride[VPX_PLANE_Y] = sd.y_stride;
+ ctx->preview_img.stride[VPX_PLANE_U] = sd.uv_stride;
+ ctx->preview_img.stride[VPX_PLANE_V] = sd.uv_stride;
+ ctx->preview_img.w = sd.y_width;
+ ctx->preview_img.h = sd.y_height;
+
+ return &ctx->preview_img;
+ } else
+ return NULL;
+}
+
+static vpx_codec_err_t vp8e_update_entropy(vpx_codec_alg_priv_t *ctx,
+ int ctr_id,
+ va_list args) {
+ int update = va_arg(args, int);
+ vp9_update_entropy(ctx->cpi, update);
+ return VPX_CODEC_OK;
+
+}
+
+static vpx_codec_err_t vp8e_update_reference(vpx_codec_alg_priv_t *ctx,
+ int ctr_id,
+ va_list args) {
+ int update = va_arg(args, int);
+ vp9_update_reference(ctx->cpi, update);
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t vp8e_use_reference(vpx_codec_alg_priv_t *ctx,
+ int ctr_id,
+ va_list args) {
+ int reference_flag = va_arg(args, int);
+ vp9_use_as_reference(ctx->cpi, reference_flag);
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t vp8e_set_roi_map(vpx_codec_alg_priv_t *ctx,
+ int ctr_id,
+ va_list args) {
+ vpx_roi_map_t *data = va_arg(args, vpx_roi_map_t *);
+
+ if (data) {
+ vpx_roi_map_t *roi = (vpx_roi_map_t *)data;
+
+ if (!vp9_set_roimap(ctx->cpi, roi->roi_map, roi->rows, roi->cols,
+ roi->delta_q, roi->delta_lf, roi->static_threshold))
+ return VPX_CODEC_OK;
+ else
+ return VPX_CODEC_INVALID_PARAM;
+ } else
+ return VPX_CODEC_INVALID_PARAM;
+}
+
+
+static vpx_codec_err_t vp8e_set_activemap(vpx_codec_alg_priv_t *ctx,
+ int ctr_id,
+ va_list args) {
+ vpx_active_map_t *data = va_arg(args, vpx_active_map_t *);
+
+ if (data) {
+
+ vpx_active_map_t *map = (vpx_active_map_t *)data;
+
+ if (!vp9_set_active_map(ctx->cpi, map->active_map, map->rows, map->cols))
+ return VPX_CODEC_OK;
+ else
+ return VPX_CODEC_INVALID_PARAM;
+ } else
+ return VPX_CODEC_INVALID_PARAM;
+}
+
+static vpx_codec_err_t vp8e_set_scalemode(vpx_codec_alg_priv_t *ctx,
+ int ctr_id,
+ va_list args) {
+
+ vpx_scaling_mode_t *data = va_arg(args, vpx_scaling_mode_t *);
+
+ if (data) {
+ int res;
+ vpx_scaling_mode_t scalemode = *(vpx_scaling_mode_t *)data;
+ res = vp9_set_internal_size(ctx->cpi, scalemode.h_scaling_mode,
+ scalemode.v_scaling_mode);
+
+ if (!res) {
+ /*force next frame a key frame to effect scaling mode */
+ ctx->next_frame_flag |= FRAMEFLAGS_KEY;
+ return VPX_CODEC_OK;
+ } else
+ return VPX_CODEC_INVALID_PARAM;
+ } else
+ return VPX_CODEC_INVALID_PARAM;
+}
+
+
+static vpx_codec_ctrl_fn_map_t vp8e_ctf_maps[] = {
+ {VP8_SET_REFERENCE, vp8e_set_reference},
+ {VP8_COPY_REFERENCE, vp8e_get_reference},
+ {VP8_SET_POSTPROC, vp8e_set_previewpp},
+ {VP8E_UPD_ENTROPY, vp8e_update_entropy},
+ {VP8E_UPD_REFERENCE, vp8e_update_reference},
+ {VP8E_USE_REFERENCE, vp8e_use_reference},
+ {VP8E_SET_ROI_MAP, vp8e_set_roi_map},
+ {VP8E_SET_ACTIVEMAP, vp8e_set_activemap},
+ {VP8E_SET_SCALEMODE, vp8e_set_scalemode},
+ {VP8E_SET_CPUUSED, set_param},
+ {VP8E_SET_NOISE_SENSITIVITY, set_param},
+ {VP8E_SET_ENABLEAUTOALTREF, set_param},
+ {VP8E_SET_SHARPNESS, set_param},
+ {VP8E_SET_STATIC_THRESHOLD, set_param},
+ {VP8E_SET_TOKEN_PARTITIONS, set_param},
+ {VP8E_GET_LAST_QUANTIZER, get_param},
+ {VP8E_GET_LAST_QUANTIZER_64, get_param},
+ {VP8E_SET_ARNR_MAXFRAMES, set_param},
+ {VP8E_SET_ARNR_STRENGTH, set_param},
+ {VP8E_SET_ARNR_TYPE, set_param},
+ {VP8E_SET_TUNING, set_param},
+ {VP8E_SET_CQ_LEVEL, set_param},
+ {VP8E_SET_MAX_INTRA_BITRATE_PCT, set_param},
+#if CONFIG_LOSSLESS
+ {VP9E_SET_LOSSLESS, set_param},
+#endif
+ { -1, NULL},
+};
+
+static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] = {
+ {
+ 0,
+ {
+ 0, /* g_usage */
+ 0, /* g_threads */
+ 0, /* g_profile */
+
+ 320, /* g_width */
+ 240, /* g_height */
+ {1, 30}, /* g_timebase */
+
+ 0, /* g_error_resilient */
+
+ VPX_RC_ONE_PASS, /* g_pass */
+
+ 0, /* g_lag_in_frames */
+
+ 0, /* rc_dropframe_thresh */
+ 0, /* rc_resize_allowed */
+ 60, /* rc_resize_down_thresold */
+ 30, /* rc_resize_up_thresold */
+
+ VPX_VBR, /* rc_end_usage */
+#if VPX_ENCODER_ABI_VERSION > (1 + VPX_CODEC_ABI_VERSION)
+ {0}, /* rc_twopass_stats_in */
+#endif
+ 256, /* rc_target_bandwidth */
+ 4, /* rc_min_quantizer */
+ 63, /* rc_max_quantizer */
+ 100, /* rc_undershoot_pct */
+ 100, /* rc_overshoot_pct */
+
+ 6000, /* rc_max_buffer_size */
+ 4000, /* rc_buffer_initial_size; */
+ 5000, /* rc_buffer_optimal_size; */
+
+ 50, /* rc_two_pass_vbrbias */
+ 0, /* rc_two_pass_vbrmin_section */
+ 400, /* rc_two_pass_vbrmax_section */
+
+ /* keyframing settings (kf) */
+ VPX_KF_AUTO, /* g_kfmode*/
+ 0, /* kf_min_dist */
+ 9999, /* kf_max_dist */
+
+#if VPX_ENCODER_ABI_VERSION == (1 + VPX_CODEC_ABI_VERSION)
+ 1, /* g_delete_first_pass_file */
+ "vp8.fpf" /* first pass filename */
+#endif
+ }
+ },
+ { -1, {NOT_IMPLEMENTED}}
+};
+
+
+#ifndef VERSION_STRING
+#define VERSION_STRING
+#endif
+CODEC_INTERFACE(vpx_codec_vp9_cx) = {
+ "WebM Project VP9 Encoder" VERSION_STRING,
+ VPX_CODEC_INTERNAL_ABI_VERSION,
+ VPX_CODEC_CAP_ENCODER | VPX_CODEC_CAP_PSNR |
+ VPX_CODEC_CAP_OUTPUT_PARTITION,
+ /* vpx_codec_caps_t caps; */
+ vp8e_init, /* vpx_codec_init_fn_t init; */
+ vp8e_destroy, /* vpx_codec_destroy_fn_t destroy; */
+ vp8e_ctf_maps, /* vpx_codec_ctrl_fn_map_t *ctrl_maps; */
+ NOT_IMPLEMENTED, /* vpx_codec_get_mmap_fn_t get_mmap; */
+ NOT_IMPLEMENTED, /* vpx_codec_set_mmap_fn_t set_mmap; */
+ {
+ NOT_IMPLEMENTED, /* vpx_codec_peek_si_fn_t peek_si; */
+ NOT_IMPLEMENTED, /* vpx_codec_get_si_fn_t get_si; */
+ NOT_IMPLEMENTED, /* vpx_codec_decode_fn_t decode; */
+ NOT_IMPLEMENTED, /* vpx_codec_frame_get_fn_t frame_get; */
+ },
+ {
+ vp8e_usage_cfg_map, /* vpx_codec_enc_cfg_map_t peek_si; */
+ vp8e_encode, /* vpx_codec_encode_fn_t encode; */
+ vp8e_get_cxdata, /* vpx_codec_get_cx_data_fn_t frame_get; */
+ vp8e_set_config,
+ NOT_IMPLEMENTED,
+ vp8e_get_preview,
+ } /* encoder functions */
+};
+
+
+#if CONFIG_EXPERIMENTAL
+
+CODEC_INTERFACE(vpx_codec_vp9x_cx) = {
+ "VP8 Experimental Encoder" VERSION_STRING,
+ VPX_CODEC_INTERNAL_ABI_VERSION,
+ VPX_CODEC_CAP_ENCODER | VPX_CODEC_CAP_PSNR,
+ /* vpx_codec_caps_t caps; */
+ vp8e_exp_init, /* vpx_codec_init_fn_t init; */
+ vp8e_destroy, /* vpx_codec_destroy_fn_t destroy; */
+ vp8e_ctf_maps, /* vpx_codec_ctrl_fn_map_t *ctrl_maps; */
+ NOT_IMPLEMENTED, /* vpx_codec_get_mmap_fn_t get_mmap; */
+ NOT_IMPLEMENTED, /* vpx_codec_set_mmap_fn_t set_mmap; */
+ {
+ NOT_IMPLEMENTED, /* vpx_codec_peek_si_fn_t peek_si; */
+ NOT_IMPLEMENTED, /* vpx_codec_get_si_fn_t get_si; */
+ NOT_IMPLEMENTED, /* vpx_codec_decode_fn_t decode; */
+ NOT_IMPLEMENTED, /* vpx_codec_frame_get_fn_t frame_get; */
+ },
+ {
+ vp8e_usage_cfg_map, /* vpx_codec_enc_cfg_map_t peek_si; */
+ vp8e_encode, /* vpx_codec_encode_fn_t encode; */
+ vp8e_get_cxdata, /* vpx_codec_get_cx_data_fn_t frame_get; */
+ vp8e_set_config,
+ NOT_IMPLEMENTED,
+ vp8e_get_preview,
+ } /* encoder functions */
+};
+#endif
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
new file mode 100644
index 0000000..c35ebed
--- /dev/null
+++ b/vp9/vp9_dx_iface.c
@@ -0,0 +1,714 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include "vpx/vpx_decoder.h"
+#include "vpx/vp8dx.h"
+#include "vpx/internal/vpx_codec_internal.h"
+#include "vpx_version.h"
+#include "common/vp9_onyxd.h"
+#include "decoder/vp9_onyxd_int.h"
+
+#define VP8_CAP_POSTPROC (CONFIG_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0)
+typedef vpx_codec_stream_info_t vp8_stream_info_t;
+
+/* Structures for handling memory allocations */
+typedef enum {
+ VP8_SEG_ALG_PRIV = 256,
+ VP8_SEG_MAX
+} mem_seg_id_t;
+#define NELEMENTS(x) ((int)(sizeof(x)/sizeof(x[0])))
+
+static unsigned long vp8_priv_sz(const vpx_codec_dec_cfg_t *si, vpx_codec_flags_t);
+
+typedef struct {
+ unsigned int id;
+ unsigned long sz;
+ unsigned int align;
+ unsigned int flags;
+ unsigned long(*calc_sz)(const vpx_codec_dec_cfg_t *, vpx_codec_flags_t);
+} mem_req_t;
+
+static const mem_req_t vp8_mem_req_segs[] = {
+ {VP8_SEG_ALG_PRIV, 0, 8, VPX_CODEC_MEM_ZERO, vp8_priv_sz},
+ {VP8_SEG_MAX, 0, 0, 0, NULL}
+};
+
+struct vpx_codec_alg_priv {
+ vpx_codec_priv_t base;
+ vpx_codec_mmap_t mmaps[NELEMENTS(vp8_mem_req_segs) - 1];
+ vpx_codec_dec_cfg_t cfg;
+ vp8_stream_info_t si;
+ int defer_alloc;
+ int decoder_init;
+ VP9D_PTR pbi;
+ int postproc_cfg_set;
+ vp8_postproc_cfg_t postproc_cfg;
+#if CONFIG_POSTPROC_VISUALIZER
+ unsigned int dbg_postproc_flag;
+ int dbg_color_ref_frame_flag;
+ int dbg_color_mb_modes_flag;
+ int dbg_color_b_modes_flag;
+ int dbg_display_mv_flag;
+#endif
+ vpx_image_t img;
+ int img_setup;
+ int img_avail;
+};
+
+static unsigned long vp8_priv_sz(const vpx_codec_dec_cfg_t *si,
+ vpx_codec_flags_t flags) {
+ /* Although this declaration is constant, we can't use it in the requested
+ * segments list because we want to define the requested segments list
+ * before defining the private type (so that the number of memory maps is
+ * known)
+ */
+ (void)si;
+ return sizeof(vpx_codec_alg_priv_t);
+}
+
+
+static void vp8_mmap_dtor(vpx_codec_mmap_t *mmap) {
+ free(mmap->priv);
+}
+
+static vpx_codec_err_t vp8_mmap_alloc(vpx_codec_mmap_t *mmap) {
+ vpx_codec_err_t res;
+ unsigned int align;
+
+ align = mmap->align ? mmap->align - 1 : 0;
+
+ if (mmap->flags & VPX_CODEC_MEM_ZERO)
+ mmap->priv = calloc(1, mmap->sz + align);
+ else
+ mmap->priv = malloc(mmap->sz + align);
+
+ res = (mmap->priv) ? VPX_CODEC_OK : VPX_CODEC_MEM_ERROR;
+ mmap->base = (void *)((((uintptr_t)mmap->priv) + align) & ~(uintptr_t)align);
+ mmap->dtor = vp8_mmap_dtor;
+ return res;
+}
+
+static vpx_codec_err_t vp8_validate_mmaps(const vp8_stream_info_t *si,
+ const vpx_codec_mmap_t *mmaps,
+ vpx_codec_flags_t init_flags) {
+ int i;
+ vpx_codec_err_t res = VPX_CODEC_OK;
+
+ for (i = 0; i < NELEMENTS(vp8_mem_req_segs) - 1; i++) {
+ /* Ensure the segment has been allocated */
+ if (!mmaps[i].base) {
+ res = VPX_CODEC_MEM_ERROR;
+ break;
+ }
+
+ /* Verify variable size segment is big enough for the current si. */
+ if (vp8_mem_req_segs[i].calc_sz) {
+ vpx_codec_dec_cfg_t cfg;
+
+ cfg.w = si->w;
+ cfg.h = si->h;
+
+ if (mmaps[i].sz < vp8_mem_req_segs[i].calc_sz(&cfg, init_flags)) {
+ res = VPX_CODEC_MEM_ERROR;
+ break;
+ }
+ }
+ }
+
+ return res;
+}
+
+static void vp8_init_ctx(vpx_codec_ctx_t *ctx, const vpx_codec_mmap_t *mmap) {
+ int i;
+
+ ctx->priv = mmap->base;
+ ctx->priv->sz = sizeof(*ctx->priv);
+ ctx->priv->iface = ctx->iface;
+ ctx->priv->alg_priv = mmap->base;
+
+ for (i = 0; i < NELEMENTS(ctx->priv->alg_priv->mmaps); i++)
+ ctx->priv->alg_priv->mmaps[i].id = vp8_mem_req_segs[i].id;
+
+ ctx->priv->alg_priv->mmaps[0] = *mmap;
+ ctx->priv->alg_priv->si.sz = sizeof(ctx->priv->alg_priv->si);
+ ctx->priv->init_flags = ctx->init_flags;
+
+ if (ctx->config.dec) {
+ /* Update the reference to the config structure to an internal copy. */
+ ctx->priv->alg_priv->cfg = *ctx->config.dec;
+ ctx->config.dec = &ctx->priv->alg_priv->cfg;
+ }
+}
+
+static void *mmap_lkup(vpx_codec_alg_priv_t *ctx, unsigned int id) {
+ int i;
+
+ for (i = 0; i < NELEMENTS(ctx->mmaps); i++)
+ if (ctx->mmaps[i].id == id)
+ return ctx->mmaps[i].base;
+
+ return NULL;
+}
+static void vp8_finalize_mmaps(vpx_codec_alg_priv_t *ctx) {
+ /* nothing to clean up */
+}
+
+static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx,
+ vpx_codec_priv_enc_mr_cfg_t *data) {
+ vpx_codec_err_t res = VPX_CODEC_OK;
+
+ /* This function only allocates space for the vpx_codec_alg_priv_t
+ * structure. More memory may be required at the time the stream
+ * information becomes known.
+ */
+ if (!ctx->priv) {
+ vpx_codec_mmap_t mmap;
+
+ mmap.id = vp8_mem_req_segs[0].id;
+ mmap.sz = sizeof(vpx_codec_alg_priv_t);
+ mmap.align = vp8_mem_req_segs[0].align;
+ mmap.flags = vp8_mem_req_segs[0].flags;
+
+ res = vp8_mmap_alloc(&mmap);
+
+ if (!res) {
+ vp8_init_ctx(ctx, &mmap);
+
+ ctx->priv->alg_priv->defer_alloc = 1;
+ /*post processing level initialized to do nothing */
+ }
+ }
+
+ return res;
+}
+
+static vpx_codec_err_t vp8_destroy(vpx_codec_alg_priv_t *ctx) {
+ int i;
+
+ vp9_remove_decompressor(ctx->pbi);
+
+ for (i = NELEMENTS(ctx->mmaps) - 1; i >= 0; i--) {
+ if (ctx->mmaps[i].dtor)
+ ctx->mmaps[i].dtor(&ctx->mmaps[i]);
+ }
+
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t vp8_peek_si(const uint8_t *data,
+ unsigned int data_sz,
+ vpx_codec_stream_info_t *si) {
+ vpx_codec_err_t res = VPX_CODEC_OK;
+
+ if (data + data_sz <= data)
+ res = VPX_CODEC_INVALID_PARAM;
+ else {
+ /* Parse uncompresssed part of key frame header.
+ * 3 bytes:- including version, frame type and an offset
+ * 3 bytes:- sync code (0x9d, 0x01, 0x2a)
+ * 4 bytes:- including image width and height in the lowest 14 bits
+ * of each 2-byte value.
+ */
+ si->is_kf = 0;
+
+ if (data_sz >= 10 && !(data[0] & 0x01)) { /* I-Frame */
+ const uint8_t *c = data + 3;
+ si->is_kf = 1;
+
+ /* vet via sync code */
+ if (c[0] != 0x9d || c[1] != 0x01 || c[2] != 0x2a)
+ res = VPX_CODEC_UNSUP_BITSTREAM;
+
+ si->w = (c[3] | (c[4] << 8)) & 0x3fff;
+ si->h = (c[5] | (c[6] << 8)) & 0x3fff;
+
+ /*printf("w=%d, h=%d\n", si->w, si->h);*/
+ if (!(si->h | si->w))
+ res = VPX_CODEC_UNSUP_BITSTREAM;
+ } else
+ res = VPX_CODEC_UNSUP_BITSTREAM;
+ }
+
+ return res;
+
+}
+
+static vpx_codec_err_t vp8_get_si(vpx_codec_alg_priv_t *ctx,
+ vpx_codec_stream_info_t *si) {
+
+ unsigned int sz;
+
+ if (si->sz >= sizeof(vp8_stream_info_t))
+ sz = sizeof(vp8_stream_info_t);
+ else
+ sz = sizeof(vpx_codec_stream_info_t);
+
+ memcpy(si, &ctx->si, sz);
+ si->sz = sz;
+
+ return VPX_CODEC_OK;
+}
+
+
+static vpx_codec_err_t
+update_error_state(vpx_codec_alg_priv_t *ctx,
+ const struct vpx_internal_error_info *error) {
+ vpx_codec_err_t res;
+
+ if ((res = error->error_code))
+ ctx->base.err_detail = error->has_detail
+ ? error->detail
+ : NULL;
+
+ return res;
+}
+
+static void yuvconfig2image(vpx_image_t *img,
+ const YV12_BUFFER_CONFIG *yv12,
+ void *user_priv) {
+ /** vpx_img_wrap() doesn't allow specifying independent strides for
+ * the Y, U, and V planes, nor other alignment adjustments that
+ * might be representable by a YV12_BUFFER_CONFIG, so we just
+ * initialize all the fields.*/
+ img->fmt = yv12->clrtype == REG_YUV ?
+ VPX_IMG_FMT_I420 : VPX_IMG_FMT_VPXI420;
+ img->w = yv12->y_stride;
+ img->h = (yv12->y_height + 2 * VP9BORDERINPIXELS + 15) & ~15;
+ img->d_w = yv12->y_width;
+ img->d_h = yv12->y_height;
+ img->x_chroma_shift = 1;
+ img->y_chroma_shift = 1;
+ img->planes[VPX_PLANE_Y] = yv12->y_buffer;
+ img->planes[VPX_PLANE_U] = yv12->u_buffer;
+ img->planes[VPX_PLANE_V] = yv12->v_buffer;
+ img->planes[VPX_PLANE_ALPHA] = NULL;
+ img->stride[VPX_PLANE_Y] = yv12->y_stride;
+ img->stride[VPX_PLANE_U] = yv12->uv_stride;
+ img->stride[VPX_PLANE_V] = yv12->uv_stride;
+ img->stride[VPX_PLANE_ALPHA] = yv12->y_stride;
+ img->bps = 12;
+ img->user_priv = user_priv;
+ img->img_data = yv12->buffer_alloc;
+ img->img_data_owner = 0;
+ img->self_allocd = 0;
+}
+
+static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
+ const uint8_t **data,
+ unsigned int data_sz,
+ void *user_priv,
+ long deadline) {
+ vpx_codec_err_t res = VPX_CODEC_OK;
+
+ ctx->img_avail = 0;
+
+ /* Determine the stream parameters. Note that we rely on peek_si to
+ * validate that we have a buffer that does not wrap around the top
+ * of the heap.
+ */
+ if (!ctx->si.h)
+ res = ctx->base.iface->dec.peek_si(*data, data_sz, &ctx->si);
+
+
+ /* Perform deferred allocations, if required */
+ if (!res && ctx->defer_alloc) {
+ int i;
+
+ for (i = 1; !res && i < NELEMENTS(ctx->mmaps); i++) {
+ vpx_codec_dec_cfg_t cfg;
+
+ cfg.w = ctx->si.w;
+ cfg.h = ctx->si.h;
+ ctx->mmaps[i].id = vp8_mem_req_segs[i].id;
+ ctx->mmaps[i].sz = vp8_mem_req_segs[i].sz;
+ ctx->mmaps[i].align = vp8_mem_req_segs[i].align;
+ ctx->mmaps[i].flags = vp8_mem_req_segs[i].flags;
+
+ if (!ctx->mmaps[i].sz)
+ ctx->mmaps[i].sz = vp8_mem_req_segs[i].calc_sz(&cfg,
+ ctx->base.init_flags);
+
+ res = vp8_mmap_alloc(&ctx->mmaps[i]);
+ }
+
+ if (!res)
+ vp8_finalize_mmaps(ctx);
+
+ ctx->defer_alloc = 0;
+ }
+
+ /* Initialize the decoder instance on the first frame*/
+ if (!res && !ctx->decoder_init) {
+ res = vp8_validate_mmaps(&ctx->si, ctx->mmaps, ctx->base.init_flags);
+
+ if (!res) {
+ VP9D_CONFIG oxcf;
+ VP9D_PTR optr;
+
+ vp9_initialize_dec();
+
+ oxcf.Width = ctx->si.w;
+ oxcf.Height = ctx->si.h;
+ oxcf.Version = 9;
+ oxcf.postprocess = 0;
+ oxcf.max_threads = ctx->cfg.threads;
+ optr = vp9_create_decompressor(&oxcf);
+
+ /* If postprocessing was enabled by the application and a
+ * configuration has not been provided, default it.
+ */
+ if (!ctx->postproc_cfg_set
+ && (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)) {
+ ctx->postproc_cfg.post_proc_flag =
+ VP8_DEBLOCK | VP8_DEMACROBLOCK;
+ ctx->postproc_cfg.deblocking_level = 4;
+ ctx->postproc_cfg.noise_level = 0;
+ }
+
+ if (!optr)
+ res = VPX_CODEC_ERROR;
+ else
+ ctx->pbi = optr;
+ }
+
+ ctx->decoder_init = 1;
+ }
+
+ if (!res && ctx->pbi) {
+ YV12_BUFFER_CONFIG sd;
+ int64_t time_stamp = 0, time_end_stamp = 0;
+ vp9_ppflags_t flags = {0};
+
+ if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) {
+ flags.post_proc_flag = ctx->postproc_cfg.post_proc_flag
+#if CONFIG_POSTPROC_VISUALIZER
+
+ | ((ctx->dbg_color_ref_frame_flag != 0) ? VP9D_DEBUG_CLR_FRM_REF_BLKS : 0)
+ | ((ctx->dbg_color_mb_modes_flag != 0) ? VP9D_DEBUG_CLR_BLK_MODES : 0)
+ | ((ctx->dbg_color_b_modes_flag != 0) ? VP9D_DEBUG_CLR_BLK_MODES : 0)
+ | ((ctx->dbg_display_mv_flag != 0) ? VP9D_DEBUG_DRAW_MV : 0)
+#endif
+;
+ flags.deblocking_level = ctx->postproc_cfg.deblocking_level;
+ flags.noise_level = ctx->postproc_cfg.noise_level;
+#if CONFIG_POSTPROC_VISUALIZER
+ flags.display_ref_frame_flag = ctx->dbg_color_ref_frame_flag;
+ flags.display_mb_modes_flag = ctx->dbg_color_mb_modes_flag;
+ flags.display_b_modes_flag = ctx->dbg_color_b_modes_flag;
+ flags.display_mv_flag = ctx->dbg_display_mv_flag;
+#endif
+ }
+
+ if (vp9_receive_compressed_data(ctx->pbi, data_sz, data, deadline)) {
+ VP9D_COMP *pbi = (VP9D_COMP *)ctx->pbi;
+ res = update_error_state(ctx, &pbi->common.error);
+ }
+
+ if (!res && 0 == vp9_get_raw_frame(ctx->pbi, &sd, &time_stamp,
+ &time_end_stamp, &flags)) {
+ yuvconfig2image(&ctx->img, &sd, user_priv);
+ ctx->img_avail = 1;
+ }
+ }
+
+ return res;
+}
+
+static vpx_codec_err_t vp9_decode(vpx_codec_alg_priv_t *ctx,
+ const uint8_t *data,
+ unsigned int data_sz,
+ void *user_priv,
+ long deadline) {
+ const uint8_t *data_start = data;
+ const uint8_t *data_end = data + data_sz;
+ vpx_codec_err_t res;
+
+ do {
+ res = decode_one(ctx, &data_start, data_sz, user_priv, deadline);
+ assert(data_start >= data);
+ assert(data_start <= data_end);
+
+ /* Early exit if there was a decode error */
+ if (res)
+ break;
+
+ /* Account for suboptimal termination by the encoder. */
+ while (data_start < data_end && *data_start == 0)
+ data_start++;
+
+ data_sz = data_end - data_start;
+ } while (data_start < data_end);
+ return res;
+}
+
+static vpx_image_t *vp8_get_frame(vpx_codec_alg_priv_t *ctx,
+ vpx_codec_iter_t *iter) {
+ vpx_image_t *img = NULL;
+
+ if (ctx->img_avail) {
+ /* iter acts as a flip flop, so an image is only returned on the first
+ * call to get_frame.
+ */
+ if (!(*iter)) {
+ img = &ctx->img;
+ *iter = img;
+ }
+ }
+
+ return img;
+}
+
+
+static
+vpx_codec_err_t vp8_xma_get_mmap(const vpx_codec_ctx_t *ctx,
+ vpx_codec_mmap_t *mmap,
+ vpx_codec_iter_t *iter) {
+ vpx_codec_err_t res;
+ const mem_req_t *seg_iter = *iter;
+
+ /* Get address of next segment request */
+ do {
+ if (!seg_iter)
+ seg_iter = vp8_mem_req_segs;
+ else if (seg_iter->id != VP8_SEG_MAX)
+ seg_iter++;
+
+ *iter = (vpx_codec_iter_t)seg_iter;
+
+ if (seg_iter->id != VP8_SEG_MAX) {
+ mmap->id = seg_iter->id;
+ mmap->sz = seg_iter->sz;
+ mmap->align = seg_iter->align;
+ mmap->flags = seg_iter->flags;
+
+ if (!seg_iter->sz)
+ mmap->sz = seg_iter->calc_sz(ctx->config.dec, ctx->init_flags);
+
+ res = VPX_CODEC_OK;
+ } else
+ res = VPX_CODEC_LIST_END;
+ } while (!mmap->sz && res != VPX_CODEC_LIST_END);
+
+ return res;
+}
+
+static vpx_codec_err_t vp8_xma_set_mmap(vpx_codec_ctx_t *ctx,
+ const vpx_codec_mmap_t *mmap) {
+ vpx_codec_err_t res = VPX_CODEC_MEM_ERROR;
+ int i, done;
+
+ if (!ctx->priv) {
+ if (mmap->id == VP8_SEG_ALG_PRIV) {
+ if (!ctx->priv) {
+ vp8_init_ctx(ctx, mmap);
+ res = VPX_CODEC_OK;
+ }
+ }
+ }
+
+ done = 1;
+
+ if (!res && ctx->priv->alg_priv) {
+ for (i = 0; i < NELEMENTS(ctx->priv->alg_priv->mmaps); i++) {
+ if (ctx->priv->alg_priv->mmaps[i].id == mmap->id)
+ if (!ctx->priv->alg_priv->mmaps[i].base) {
+ ctx->priv->alg_priv->mmaps[i] = *mmap;
+ res = VPX_CODEC_OK;
+ }
+
+ done &= (ctx->priv->alg_priv->mmaps[i].base != NULL);
+ }
+ }
+
+ if (done && !res) {
+ vp8_finalize_mmaps(ctx->priv->alg_priv);
+ res = ctx->iface->init(ctx, NULL);
+ }
+
+ return res;
+}
+
+static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img,
+ YV12_BUFFER_CONFIG *yv12) {
+ vpx_codec_err_t res = VPX_CODEC_OK;
+ yv12->y_buffer = img->planes[VPX_PLANE_Y];
+ yv12->u_buffer = img->planes[VPX_PLANE_U];
+ yv12->v_buffer = img->planes[VPX_PLANE_V];
+
+ yv12->y_width = img->d_w;
+ yv12->y_height = img->d_h;
+ yv12->uv_width = yv12->y_width / 2;
+ yv12->uv_height = yv12->y_height / 2;
+
+ yv12->y_stride = img->stride[VPX_PLANE_Y];
+ yv12->uv_stride = img->stride[VPX_PLANE_U];
+
+ yv12->border = (img->stride[VPX_PLANE_Y] - img->d_w) / 2;
+ yv12->clrtype = (img->fmt == VPX_IMG_FMT_VPXI420 ||
+ img->fmt == VPX_IMG_FMT_VPXYV12);
+
+ return res;
+}
+
+
+static vpx_codec_err_t vp9_set_reference(vpx_codec_alg_priv_t *ctx,
+ int ctr_id,
+ va_list args) {
+
+ vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *);
+
+ if (data) {
+ vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data;
+ YV12_BUFFER_CONFIG sd;
+
+ image2yuvconfig(&frame->img, &sd);
+
+ return vp9_set_reference_dec(ctx->pbi, frame->frame_type, &sd);
+ } else
+ return VPX_CODEC_INVALID_PARAM;
+
+}
+
+static vpx_codec_err_t vp9_get_reference(vpx_codec_alg_priv_t *ctx,
+ int ctr_id,
+ va_list args) {
+
+ vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *);
+
+ if (data) {
+ vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data;
+ YV12_BUFFER_CONFIG sd;
+
+ image2yuvconfig(&frame->img, &sd);
+
+ return vp9_get_reference_dec(ctx->pbi, frame->frame_type, &sd);
+ } else
+ return VPX_CODEC_INVALID_PARAM;
+
+}
+
+static vpx_codec_err_t vp8_set_postproc(vpx_codec_alg_priv_t *ctx,
+ int ctr_id,
+ va_list args) {
+#if CONFIG_POSTPROC
+ vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *);
+
+ if (data) {
+ ctx->postproc_cfg_set = 1;
+ ctx->postproc_cfg = *((vp8_postproc_cfg_t *)data);
+ return VPX_CODEC_OK;
+ } else
+ return VPX_CODEC_INVALID_PARAM;
+
+#else
+ return VPX_CODEC_INCAPABLE;
+#endif
+}
+
+static vpx_codec_err_t vp8_set_dbg_options(vpx_codec_alg_priv_t *ctx,
+ int ctrl_id,
+ va_list args) {
+#if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC
+ int data = va_arg(args, int);
+
+#define MAP(id, var) case id: var = data; break;
+
+ switch (ctrl_id) {
+ MAP(VP8_SET_DBG_COLOR_REF_FRAME, ctx->dbg_color_ref_frame_flag);
+ MAP(VP8_SET_DBG_COLOR_MB_MODES, ctx->dbg_color_mb_modes_flag);
+ MAP(VP8_SET_DBG_COLOR_B_MODES, ctx->dbg_color_b_modes_flag);
+ MAP(VP8_SET_DBG_DISPLAY_MV, ctx->dbg_display_mv_flag);
+ }
+
+ return VPX_CODEC_OK;
+#else
+ return VPX_CODEC_INCAPABLE;
+#endif
+}
+
+static vpx_codec_err_t vp8_get_last_ref_updates(vpx_codec_alg_priv_t *ctx,
+ int ctrl_id,
+ va_list args) {
+ int *update_info = va_arg(args, int *);
+ VP9D_COMP *pbi = (VP9D_COMP *)ctx->pbi;
+
+ if (update_info) {
+ *update_info = pbi->common.refresh_alt_ref_frame * (int) VP8_ALTR_FRAME
+ + pbi->common.refresh_golden_frame * (int) VP8_GOLD_FRAME
+ + pbi->common.refresh_last_frame * (int) VP8_LAST_FRAME;
+
+ return VPX_CODEC_OK;
+ } else
+ return VPX_CODEC_INVALID_PARAM;
+}
+
+
+static vpx_codec_err_t vp8_get_frame_corrupted(vpx_codec_alg_priv_t *ctx,
+ int ctrl_id,
+ va_list args) {
+
+ int *corrupted = va_arg(args, int *);
+
+ if (corrupted) {
+ VP9D_COMP *pbi = (VP9D_COMP *)ctx->pbi;
+ *corrupted = pbi->common.frame_to_show->corrupted;
+
+ return VPX_CODEC_OK;
+ } else
+ return VPX_CODEC_INVALID_PARAM;
+
+}
+
+static vpx_codec_ctrl_fn_map_t ctf_maps[] = {
+ {VP8_SET_REFERENCE, vp9_set_reference},
+ {VP8_COPY_REFERENCE, vp9_get_reference},
+ {VP8_SET_POSTPROC, vp8_set_postproc},
+ {VP8_SET_DBG_COLOR_REF_FRAME, vp8_set_dbg_options},
+ {VP8_SET_DBG_COLOR_MB_MODES, vp8_set_dbg_options},
+ {VP8_SET_DBG_COLOR_B_MODES, vp8_set_dbg_options},
+ {VP8_SET_DBG_DISPLAY_MV, vp8_set_dbg_options},
+ {VP8D_GET_LAST_REF_UPDATES, vp8_get_last_ref_updates},
+ {VP8D_GET_FRAME_CORRUPTED, vp8_get_frame_corrupted},
+ { -1, NULL},
+};
+
+
+#ifndef VERSION_STRING
+#define VERSION_STRING
+#endif
+CODEC_INTERFACE(vpx_codec_vp9_dx) = {
+ "WebM Project VP9 Decoder" VERSION_STRING,
+ VPX_CODEC_INTERNAL_ABI_VERSION,
+ VPX_CODEC_CAP_DECODER | VP8_CAP_POSTPROC,
+ /* vpx_codec_caps_t caps; */
+ vp8_init, /* vpx_codec_init_fn_t init; */
+ vp8_destroy, /* vpx_codec_destroy_fn_t destroy; */
+ ctf_maps, /* vpx_codec_ctrl_fn_map_t *ctrl_maps; */
+ vp8_xma_get_mmap, /* vpx_codec_get_mmap_fn_t get_mmap; */
+ vp8_xma_set_mmap, /* vpx_codec_set_mmap_fn_t set_mmap; */
+ {
+ vp8_peek_si, /* vpx_codec_peek_si_fn_t peek_si; */
+ vp8_get_si, /* vpx_codec_get_si_fn_t get_si; */
+ vp9_decode, /* vpx_codec_decode_fn_t decode; */
+ vp8_get_frame, /* vpx_codec_frame_get_fn_t frame_get; */
+ },
+ {
+ /* encoder functions */
+ NOT_IMPLEMENTED,
+ NOT_IMPLEMENTED,
+ NOT_IMPLEMENTED,
+ NOT_IMPLEMENTED,
+ NOT_IMPLEMENTED,
+ NOT_IMPLEMENTED
+ }
+};
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
new file mode 100644
index 0000000..12d1ec4
--- /dev/null
+++ b/vp9/vp9cx.mk
@@ -0,0 +1,117 @@
+##
+## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+
+VP9_CX_EXPORTS += exports_enc
+
+VP9_CX_SRCS-yes += $(VP9_COMMON_SRCS-yes)
+VP9_CX_SRCS-no += $(VP9_COMMON_SRCS-no)
+VP9_CX_SRCS_REMOVE-yes += $(VP9_COMMON_SRCS_REMOVE-yes)
+VP9_CX_SRCS_REMOVE-no += $(VP9_COMMON_SRCS_REMOVE-no)
+
+VP9_CX_SRCS-yes += vp9_cx_iface.c
+
+# encoder
+#INCLUDES += algo/vpx_common/vpx_mem/include
+#INCLUDES += common
+#INCLUDES += common
+#INCLUDES += common
+#INCLUDES += algo/vpx_ref/cpu_id/include
+#INCLUDES += common
+#INCLUDES += encoder
+
+VP9_CX_SRCS-yes += encoder/vp9_asm_enc_offsets.c
+VP9_CX_SRCS-yes += encoder/vp9_bitstream.c
+VP9_CX_SRCS-yes += encoder/vp9_boolhuff.c
+VP9_CX_SRCS-yes += encoder/vp9_dct.c
+VP9_CX_SRCS-yes += encoder/vp9_encodeframe.c
+VP9_CX_SRCS-yes += encoder/vp9_encodeframe.h
+VP9_CX_SRCS-yes += encoder/vp9_encodeintra.c
+VP9_CX_SRCS-yes += encoder/vp9_encodemb.c
+VP9_CX_SRCS-yes += encoder/vp9_encodemv.c
+VP9_CX_SRCS-yes += encoder/vp9_firstpass.c
+VP9_CX_SRCS-yes += encoder/vp9_block.h
+VP9_CX_SRCS-yes += encoder/vp9_boolhuff.h
+VP9_CX_SRCS-yes += encoder/vp9_bitstream.h
+VP9_CX_SRCS-yes += encoder/vp9_encodeintra.h
+VP9_CX_SRCS-yes += encoder/vp9_encodemb.h
+VP9_CX_SRCS-yes += encoder/vp9_encodemv.h
+VP9_CX_SRCS-yes += encoder/vp9_firstpass.h
+VP9_CX_SRCS-yes += encoder/vp9_lookahead.c
+VP9_CX_SRCS-yes += encoder/vp9_lookahead.h
+VP9_CX_SRCS-yes += encoder/vp9_mcomp.h
+VP9_CX_SRCS-yes += encoder/vp9_modecosts.h
+VP9_CX_SRCS-yes += encoder/vp9_onyx_int.h
+VP9_CX_SRCS-yes += encoder/vp9_psnr.h
+VP9_CX_SRCS-yes += encoder/vp9_quantize.h
+VP9_CX_SRCS-yes += encoder/vp9_ratectrl.h
+VP9_CX_SRCS-yes += encoder/vp9_rdopt.h
+VP9_CX_SRCS-yes += encoder/vp9_tokenize.h
+VP9_CX_SRCS-yes += encoder/vp9_treewriter.h
+VP9_CX_SRCS-yes += encoder/vp9_variance.h
+VP9_CX_SRCS-yes += encoder/vp9_mcomp.c
+VP9_CX_SRCS-yes += encoder/vp9_modecosts.c
+VP9_CX_SRCS-yes += encoder/vp9_onyx_if.c
+VP9_CX_SRCS-yes += encoder/vp9_picklpf.c
+VP9_CX_SRCS-yes += encoder/vp9_picklpf.h
+VP9_CX_SRCS-yes += encoder/vp9_psnr.c
+VP9_CX_SRCS-yes += encoder/vp9_quantize.c
+VP9_CX_SRCS-yes += encoder/vp9_ratectrl.c
+VP9_CX_SRCS-yes += encoder/vp9_rdopt.c
+VP9_CX_SRCS-yes += encoder/vp9_sad_c.c
+VP9_CX_SRCS-yes += encoder/vp9_satd_c.c
+VP9_CX_SRCS-yes += encoder/vp9_segmentation.c
+VP9_CX_SRCS-yes += encoder/vp9_segmentation.h
+VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.c
+VP9_CX_SRCS-yes += encoder/vp9_tokenize.c
+VP9_CX_SRCS-yes += encoder/vp9_treewriter.c
+VP9_CX_SRCS-yes += encoder/vp9_variance_c.c
+ifeq ($(CONFIG_POSTPROC),yes)
+VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.h
+VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.c
+endif
+VP9_CX_SRCS-yes += encoder/vp9_temporal_filter.c
+VP9_CX_SRCS-yes += encoder/vp9_temporal_filter.h
+VP9_CX_SRCS-yes += encoder/vp9_mbgraph.c
+VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h
+
+
+VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_mcomp_x86.h
+VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_quantize_x86.h
+VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_x86_csystemdependent.c
+VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_variance_mmx.c
+VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_variance_impl_mmx.asm
+VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_sad_mmx.asm
+VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.asm
+VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.h
+VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_subtract_mmx.asm
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.asm
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_sse2.c
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_impl_sse2.asm
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2.asm
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_fwalsh_sse2.asm
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.asm
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subtract_sse2.asm
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
+VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/vp9_sad_sse3.asm
+VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_sad_ssse3.asm
+VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_variance_ssse3.c
+VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_variance_impl_ssse3.asm
+VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3.asm
+VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/vp9_sad_sse4.asm
+VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/vp9_quantize_sse4.asm
+VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_quantize_mmx.asm
+VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_encodeopt.asm
+VP9_CX_SRCS-$(ARCH_X86_64) += encoder/x86/vp9_ssim_opt.asm
+
+
+VP9_CX_SRCS-yes := $(filter-out $(VP9_CX_SRCS_REMOVE-yes),$(VP9_CX_SRCS-yes))
+
+$(eval $(call asm_offsets_template,\
+ vp9_asm_enc_offsets.asm, $(VP9_PREFIX)encoder/vp9_asm_enc_offsets.c))
diff --git a/vp9/vp9dx.mk b/vp9/vp9dx.mk
new file mode 100644
index 0000000..ddb36a9
--- /dev/null
+++ b/vp9/vp9dx.mk
@@ -0,0 +1,41 @@
+##
+## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+
+VP9_DX_EXPORTS += exports_dec
+
+VP9_DX_SRCS-yes += $(VP9_COMMON_SRCS-yes)
+VP9_DX_SRCS-no += $(VP9_COMMON_SRCS-no)
+VP9_DX_SRCS_REMOVE-yes += $(VP9_COMMON_SRCS_REMOVE-yes)
+VP9_DX_SRCS_REMOVE-no += $(VP9_COMMON_SRCS_REMOVE-no)
+
+VP9_DX_SRCS-yes += vp9_dx_iface.c
+
+VP9_DX_SRCS-yes += decoder/vp9_asm_dec_offsets.c
+VP9_DX_SRCS-yes += decoder/vp9_dboolhuff.c
+VP9_DX_SRCS-yes += decoder/vp9_decodemv.c
+VP9_DX_SRCS-yes += decoder/vp9_decodframe.c
+VP9_DX_SRCS-yes += decoder/vp9_decodframe.h
+VP9_DX_SRCS-yes += decoder/vp9_dequantize.c
+VP9_DX_SRCS-yes += decoder/vp9_detokenize.c
+VP9_DX_SRCS-yes += decoder/vp9_dboolhuff.h
+VP9_DX_SRCS-yes += decoder/vp9_decodemv.h
+VP9_DX_SRCS-yes += decoder/vp9_dequantize.h
+VP9_DX_SRCS-yes += decoder/vp9_detokenize.h
+VP9_DX_SRCS-yes += decoder/vp9_onyxd_int.h
+VP9_DX_SRCS-yes += decoder/vp9_treereader.h
+VP9_DX_SRCS-yes += decoder/vp9_onyxd_if.c
+VP9_DX_SRCS-yes += decoder/vp9_idct_blk.c
+
+VP9_DX_SRCS-yes := $(filter-out $(VP9_DX_SRCS_REMOVE-yes),$(VP9_DX_SRCS-yes))
+
+VP9_DX_SRCS-$(HAVE_SSE2) += decoder/x86/vp9_idct_blk_sse2.c
+
+$(eval $(call asm_offsets_template,\
+ vp9_asm_dec_offsets.asm, $(VP9_PREFIX)decoder/vp9_asm_dec_offsets.c))
diff --git a/vpx/internal/vpx_codec_internal.h b/vpx/internal/vpx_codec_internal.h
index 4474331..d7bcd46 100644
--- a/vpx/internal/vpx_codec_internal.h
+++ b/vpx/internal/vpx_codec_internal.h
@@ -75,7 +75,7 @@
* Memory operation failed.
*/
typedef vpx_codec_err_t (*vpx_codec_init_fn_t)(vpx_codec_ctx_t *ctx,
- vpx_codec_priv_enc_mr_cfg_t *data);
+ vpx_codec_priv_enc_mr_cfg_t *data);
/*!\brief destroy function pointer prototype
*
@@ -109,8 +109,8 @@
* Bitstream is parsable and stream information updated
*/
typedef vpx_codec_err_t (*vpx_codec_peek_si_fn_t)(const uint8_t *data,
- unsigned int data_sz,
- vpx_codec_stream_info_t *si);
+ unsigned int data_sz,
+ vpx_codec_stream_info_t *si);
/*!\brief Return information about the current stream.
*
@@ -126,7 +126,7 @@
* Bitstream is parsable and stream information updated
*/
typedef vpx_codec_err_t (*vpx_codec_get_si_fn_t)(vpx_codec_alg_priv_t *ctx,
- vpx_codec_stream_info_t *si);
+ vpx_codec_stream_info_t *si);
/*!\brief control function pointer prototype
*
@@ -151,8 +151,8 @@
* The internal state data was deserialized.
*/
typedef vpx_codec_err_t (*vpx_codec_control_fn_t)(vpx_codec_alg_priv_t *ctx,
- int ctrl_id,
- va_list ap);
+ int ctrl_id,
+ va_list ap);
/*!\brief control function pointer mapping
*
@@ -165,10 +165,9 @@
* mapping. This implies that ctrl_id values chosen by the algorithm
* \ref MUST be non-zero.
*/
-typedef const struct vpx_codec_ctrl_fn_map
-{
- int ctrl_id;
- vpx_codec_control_fn_t fn;
+typedef const struct vpx_codec_ctrl_fn_map {
+ int ctrl_id;
+ vpx_codec_control_fn_t fn;
} vpx_codec_ctrl_fn_map_t;
/*!\brief decode data function pointer prototype
@@ -192,10 +191,10 @@
* for recoverability capabilities.
*/
typedef vpx_codec_err_t (*vpx_codec_decode_fn_t)(vpx_codec_alg_priv_t *ctx,
- const uint8_t *data,
- unsigned int data_sz,
- void *user_priv,
- long deadline);
+ const uint8_t *data,
+ unsigned int data_sz,
+ void *user_priv,
+ long deadline);
/*!\brief Decoded frames iterator
*
@@ -212,8 +211,8 @@
* \return Returns a pointer to an image, if one is ready for display. Frames
* produced will always be in PTS (presentation time stamp) order.
*/
-typedef vpx_image_t*(*vpx_codec_get_frame_fn_t)(vpx_codec_alg_priv_t *ctx,
- vpx_codec_iter_t *iter);
+typedef vpx_image_t *(*vpx_codec_get_frame_fn_t)(vpx_codec_alg_priv_t *ctx,
+ vpx_codec_iter_t *iter);
/*\brief eXternal Memory Allocation memory map get iterator
@@ -228,8 +227,8 @@
* indicate end-of-list.
*/
typedef vpx_codec_err_t (*vpx_codec_get_mmap_fn_t)(const vpx_codec_ctx_t *ctx,
- vpx_codec_mmap_t *mmap,
- vpx_codec_iter_t *iter);
+ vpx_codec_mmap_t *mmap,
+ vpx_codec_iter_t *iter);
/*\brief eXternal Memory Allocation memory map set iterator
@@ -245,17 +244,17 @@
* The memory map was rejected.
*/
typedef vpx_codec_err_t (*vpx_codec_set_mmap_fn_t)(vpx_codec_ctx_t *ctx,
- const vpx_codec_mmap_t *mmap);
+ const vpx_codec_mmap_t *mmap);
typedef vpx_codec_err_t (*vpx_codec_encode_fn_t)(vpx_codec_alg_priv_t *ctx,
- const vpx_image_t *img,
- vpx_codec_pts_t pts,
- unsigned long duration,
- vpx_enc_frame_flags_t flags,
- unsigned long deadline);
-typedef const vpx_codec_cx_pkt_t*(*vpx_codec_get_cx_data_fn_t)(vpx_codec_alg_priv_t *ctx,
- vpx_codec_iter_t *iter);
+ const vpx_image_t *img,
+ vpx_codec_pts_t pts,
+ unsigned long duration,
+ vpx_enc_frame_flags_t flags,
+ unsigned long deadline);
+typedef const vpx_codec_cx_pkt_t *(*vpx_codec_get_cx_data_fn_t)(vpx_codec_alg_priv_t *ctx,
+ vpx_codec_iter_t *iter);
typedef vpx_codec_err_t
(*vpx_codec_enc_config_set_fn_t)(vpx_codec_alg_priv_t *ctx,
@@ -268,7 +267,7 @@
typedef vpx_codec_err_t
(*vpx_codec_enc_mr_get_mem_loc_fn_t)(const vpx_codec_enc_cfg_t *cfg,
- void **mem_loc);
+ void **mem_loc);
/*!\brief usage configuration mapping
*
@@ -280,10 +279,9 @@
* one mapping must be present, in addition to the end-of-list.
*
*/
-typedef const struct vpx_codec_enc_cfg_map
-{
- int usage;
- vpx_codec_enc_cfg_t cfg;
+typedef const struct vpx_codec_enc_cfg_map {
+ int usage;
+ vpx_codec_enc_cfg_t cfg;
} vpx_codec_enc_cfg_map_t;
#define NOT_IMPLEMENTED 0
@@ -292,44 +290,39 @@
*
* All decoders \ref MUST expose a variable of this type.
*/
-struct vpx_codec_iface
-{
- const char *name; /**< Identification String */
- int abi_version; /**< Implemented ABI version */
- vpx_codec_caps_t caps; /**< Decoder capabilities */
- vpx_codec_init_fn_t init; /**< \copydoc ::vpx_codec_init_fn_t */
- vpx_codec_destroy_fn_t destroy; /**< \copydoc ::vpx_codec_destroy_fn_t */
- vpx_codec_ctrl_fn_map_t *ctrl_maps; /**< \copydoc ::vpx_codec_ctrl_fn_map_t */
- vpx_codec_get_mmap_fn_t get_mmap; /**< \copydoc ::vpx_codec_get_mmap_fn_t */
- vpx_codec_set_mmap_fn_t set_mmap; /**< \copydoc ::vpx_codec_set_mmap_fn_t */
- struct vpx_codec_dec_iface
- {
- vpx_codec_peek_si_fn_t peek_si; /**< \copydoc ::vpx_codec_peek_si_fn_t */
- vpx_codec_get_si_fn_t get_si; /**< \copydoc ::vpx_codec_peek_si_fn_t */
- vpx_codec_decode_fn_t decode; /**< \copydoc ::vpx_codec_decode_fn_t */
- vpx_codec_get_frame_fn_t get_frame; /**< \copydoc ::vpx_codec_get_frame_fn_t */
- } dec;
- struct vpx_codec_enc_iface
- {
- vpx_codec_enc_cfg_map_t *cfg_maps; /**< \copydoc ::vpx_codec_enc_cfg_map_t */
- vpx_codec_encode_fn_t encode; /**< \copydoc ::vpx_codec_encode_fn_t */
- vpx_codec_get_cx_data_fn_t get_cx_data; /**< \copydoc ::vpx_codec_get_cx_data_fn_t */
- vpx_codec_enc_config_set_fn_t cfg_set; /**< \copydoc ::vpx_codec_enc_config_set_fn_t */
- vpx_codec_get_global_headers_fn_t get_glob_hdrs; /**< \copydoc ::vpx_codec_get_global_headers_fn_t */
- vpx_codec_get_preview_frame_fn_t get_preview; /**< \copydoc ::vpx_codec_get_preview_frame_fn_t */
- vpx_codec_enc_mr_get_mem_loc_fn_t mr_get_mem_loc; /**< \copydoc ::vpx_codec_enc_mr_get_mem_loc_fn_t */
- } enc;
+struct vpx_codec_iface {
+ const char *name; /**< Identification String */
+ int abi_version; /**< Implemented ABI version */
+ vpx_codec_caps_t caps; /**< Decoder capabilities */
+ vpx_codec_init_fn_t init; /**< \copydoc ::vpx_codec_init_fn_t */
+ vpx_codec_destroy_fn_t destroy; /**< \copydoc ::vpx_codec_destroy_fn_t */
+ vpx_codec_ctrl_fn_map_t *ctrl_maps; /**< \copydoc ::vpx_codec_ctrl_fn_map_t */
+ vpx_codec_get_mmap_fn_t get_mmap; /**< \copydoc ::vpx_codec_get_mmap_fn_t */
+ vpx_codec_set_mmap_fn_t set_mmap; /**< \copydoc ::vpx_codec_set_mmap_fn_t */
+ struct vpx_codec_dec_iface {
+ vpx_codec_peek_si_fn_t peek_si; /**< \copydoc ::vpx_codec_peek_si_fn_t */
+ vpx_codec_get_si_fn_t get_si; /**< \copydoc ::vpx_codec_peek_si_fn_t */
+ vpx_codec_decode_fn_t decode; /**< \copydoc ::vpx_codec_decode_fn_t */
+ vpx_codec_get_frame_fn_t get_frame; /**< \copydoc ::vpx_codec_get_frame_fn_t */
+ } dec;
+ struct vpx_codec_enc_iface {
+ vpx_codec_enc_cfg_map_t *cfg_maps; /**< \copydoc ::vpx_codec_enc_cfg_map_t */
+ vpx_codec_encode_fn_t encode; /**< \copydoc ::vpx_codec_encode_fn_t */
+ vpx_codec_get_cx_data_fn_t get_cx_data; /**< \copydoc ::vpx_codec_get_cx_data_fn_t */
+ vpx_codec_enc_config_set_fn_t cfg_set; /**< \copydoc ::vpx_codec_enc_config_set_fn_t */
+ vpx_codec_get_global_headers_fn_t get_glob_hdrs; /**< \copydoc ::vpx_codec_get_global_headers_fn_t */
+ vpx_codec_get_preview_frame_fn_t get_preview; /**< \copydoc ::vpx_codec_get_preview_frame_fn_t */
+ vpx_codec_enc_mr_get_mem_loc_fn_t mr_get_mem_loc; /**< \copydoc ::vpx_codec_enc_mr_get_mem_loc_fn_t */
+ } enc;
};
/*!\brief Callback function pointer / user data pair storage */
-typedef struct vpx_codec_priv_cb_pair
-{
- union
- {
- vpx_codec_put_frame_cb_fn_t put_frame;
- vpx_codec_put_slice_cb_fn_t put_slice;
- } u;
- void *user_priv;
+typedef struct vpx_codec_priv_cb_pair {
+ union {
+ vpx_codec_put_frame_cb_fn_t put_frame;
+ vpx_codec_put_slice_cb_fn_t put_slice;
+ } u;
+ void *user_priv;
} vpx_codec_priv_cb_pair_t;
@@ -341,27 +334,24 @@
* structure can be made the first member of the algorithm specific structure,
* and the pointer cast to the proper type.
*/
-struct vpx_codec_priv
-{
- unsigned int sz;
- vpx_codec_iface_t *iface;
- struct vpx_codec_alg_priv *alg_priv;
- const char *err_detail;
- vpx_codec_flags_t init_flags;
- struct
- {
- vpx_codec_priv_cb_pair_t put_frame_cb;
- vpx_codec_priv_cb_pair_t put_slice_cb;
- } dec;
- struct
- {
- int tbd;
- struct vpx_fixed_buf cx_data_dst_buf;
- unsigned int cx_data_pad_before;
- unsigned int cx_data_pad_after;
- vpx_codec_cx_pkt_t cx_data_pkt;
- unsigned int total_encoders;
- } enc;
+struct vpx_codec_priv {
+ unsigned int sz;
+ vpx_codec_iface_t *iface;
+ struct vpx_codec_alg_priv *alg_priv;
+ const char *err_detail;
+ vpx_codec_flags_t init_flags;
+ struct {
+ vpx_codec_priv_cb_pair_t put_frame_cb;
+ vpx_codec_priv_cb_pair_t put_slice_cb;
+ } dec;
+ struct {
+ int tbd;
+ struct vpx_fixed_buf cx_data_dst_buf;
+ unsigned int cx_data_pad_before;
+ unsigned int cx_data_pad_after;
+ vpx_codec_cx_pkt_t cx_data_pkt;
+ unsigned int total_encoders;
+ } enc;
};
/*
@@ -377,32 +367,32 @@
#undef VPX_CTRL_USE_TYPE
#define VPX_CTRL_USE_TYPE(id, typ) \
- static typ id##__value(va_list args) {return va_arg(args, typ);} \
- static typ id##__convert(void *x)\
+ static typ id##__value(va_list args) {return va_arg(args, typ);} \
+ static typ id##__convert(void *x)\
+ {\
+ union\
{\
- union\
- {\
- void *x;\
- typ d;\
- } u;\
- u.x = x;\
- return u.d;\
- }
+ void *x;\
+ typ d;\
+ } u;\
+ u.x = x;\
+ return u.d;\
+ }
#undef VPX_CTRL_USE_TYPE_DEPRECATED
#define VPX_CTRL_USE_TYPE_DEPRECATED(id, typ) \
- static typ id##__value(va_list args) {return va_arg(args, typ);} \
- static typ id##__convert(void *x)\
+ static typ id##__value(va_list args) {return va_arg(args, typ);} \
+ static typ id##__convert(void *x)\
+ {\
+ union\
{\
- union\
- {\
- void *x;\
- typ d;\
- } u;\
- u.x = x;\
- return u.d;\
- }
+ void *x;\
+ typ d;\
+ } u;\
+ u.x = x;\
+ return u.d;\
+ }
#define CAST(id, arg) id##__value(arg)
#define RECAST(id, x) id##__convert(x)
@@ -418,8 +408,8 @@
* macro is provided to define this getter function automatically.
*/
#define CODEC_INTERFACE(id)\
-vpx_codec_iface_t* id(void) { return &id##_algo; }\
-vpx_codec_iface_t id##_algo
+ vpx_codec_iface_t* id(void) { return &id##_algo; }\
+ vpx_codec_iface_t id##_algo
/* Internal Utility Functions
@@ -427,64 +417,60 @@
* The following functions are intended to be used inside algorithms as
* utilities for manipulating vpx_codec_* data structures.
*/
-struct vpx_codec_pkt_list
-{
- unsigned int cnt;
- unsigned int max;
- struct vpx_codec_cx_pkt pkts[1];
+struct vpx_codec_pkt_list {
+ unsigned int cnt;
+ unsigned int max;
+ struct vpx_codec_cx_pkt pkts[1];
};
#define vpx_codec_pkt_list_decl(n)\
- union {struct vpx_codec_pkt_list head;\
- struct {struct vpx_codec_pkt_list head;\
- struct vpx_codec_cx_pkt pkts[n];} alloc;}
+ union {struct vpx_codec_pkt_list head;\
+ struct {struct vpx_codec_pkt_list head;\
+ struct vpx_codec_cx_pkt pkts[n];} alloc;}
#define vpx_codec_pkt_list_init(m)\
- (m)->alloc.head.cnt = 0,\
- (m)->alloc.head.max = sizeof((m)->alloc.pkts) / sizeof((m)->alloc.pkts[0])
+ (m)->alloc.head.cnt = 0,\
+ (m)->alloc.head.max = sizeof((m)->alloc.pkts) / sizeof((m)->alloc.pkts[0])
int
vpx_codec_pkt_list_add(struct vpx_codec_pkt_list *,
const struct vpx_codec_cx_pkt *);
-const vpx_codec_cx_pkt_t*
+const vpx_codec_cx_pkt_t *
vpx_codec_pkt_list_get(struct vpx_codec_pkt_list *list,
vpx_codec_iter_t *iter);
#include <stdio.h>
#include <setjmp.h>
-struct vpx_internal_error_info
-{
- vpx_codec_err_t error_code;
- int has_detail;
- char detail[80];
- int setjmp;
- jmp_buf jmp;
+struct vpx_internal_error_info {
+ vpx_codec_err_t error_code;
+ int has_detail;
+ char detail[80];
+ int setjmp;
+ jmp_buf jmp;
};
static void vpx_internal_error(struct vpx_internal_error_info *info,
vpx_codec_err_t error,
const char *fmt,
- ...)
-{
- va_list ap;
+ ...) {
+ va_list ap;
- info->error_code = error;
- info->has_detail = 0;
+ info->error_code = error;
+ info->has_detail = 0;
- if (fmt)
- {
- size_t sz = sizeof(info->detail);
+ if (fmt) {
+ size_t sz = sizeof(info->detail);
- info->has_detail = 1;
- va_start(ap, fmt);
- vsnprintf(info->detail, sz - 1, fmt, ap);
- va_end(ap);
- info->detail[sz-1] = '\0';
- }
+ info->has_detail = 1;
+ va_start(ap, fmt);
+ vsnprintf(info->detail, sz - 1, fmt, ap);
+ va_end(ap);
+ info->detail[sz - 1] = '\0';
+ }
- if (info->setjmp)
- longjmp(info->jmp, info->error_code);
+ if (info->setjmp)
+ longjmp(info->jmp, info->error_code);
}
#endif
diff --git a/vpx/src/vpx_codec.c b/vpx/src/vpx_codec.c
index f1a8b67..61d7f4c 100644
--- a/vpx/src/vpx_codec.c
+++ b/vpx/src/vpx_codec.c
@@ -20,131 +20,116 @@
#define SAVE_STATUS(ctx,var) (ctx?(ctx->err = var):var)
-int vpx_codec_version(void)
-{
- return VERSION_PACKED;
+int vpx_codec_version(void) {
+ return VERSION_PACKED;
}
-const char *vpx_codec_version_str(void)
-{
- return VERSION_STRING_NOSP;
+const char *vpx_codec_version_str(void) {
+ return VERSION_STRING_NOSP;
}
-const char *vpx_codec_version_extra_str(void)
-{
- return VERSION_EXTRA;
+const char *vpx_codec_version_extra_str(void) {
+ return VERSION_EXTRA;
}
-const char *vpx_codec_iface_name(vpx_codec_iface_t *iface)
-{
- return iface ? iface->name : "<invalid interface>";
+const char *vpx_codec_iface_name(vpx_codec_iface_t *iface) {
+ return iface ? iface->name : "<invalid interface>";
}
-const char *vpx_codec_err_to_string(vpx_codec_err_t err)
-{
- switch (err)
- {
+const char *vpx_codec_err_to_string(vpx_codec_err_t err) {
+ switch (err) {
case VPX_CODEC_OK:
- return "Success";
+ return "Success";
case VPX_CODEC_ERROR:
- return "Unspecified internal error";
+ return "Unspecified internal error";
case VPX_CODEC_MEM_ERROR:
- return "Memory allocation error";
+ return "Memory allocation error";
case VPX_CODEC_ABI_MISMATCH:
- return "ABI version mismatch";
+ return "ABI version mismatch";
case VPX_CODEC_INCAPABLE:
- return "Codec does not implement requested capability";
+ return "Codec does not implement requested capability";
case VPX_CODEC_UNSUP_BITSTREAM:
- return "Bitstream not supported by this decoder";
+ return "Bitstream not supported by this decoder";
case VPX_CODEC_UNSUP_FEATURE:
- return "Bitstream required feature not supported by this decoder";
+ return "Bitstream required feature not supported by this decoder";
case VPX_CODEC_CORRUPT_FRAME:
- return "Corrupt frame detected";
+ return "Corrupt frame detected";
case VPX_CODEC_INVALID_PARAM:
- return "Invalid parameter";
+ return "Invalid parameter";
case VPX_CODEC_LIST_END:
- return "End of iterated list";
- }
+ return "End of iterated list";
+ }
- return "Unrecognized error code";
+ return "Unrecognized error code";
}
-const char *vpx_codec_error(vpx_codec_ctx_t *ctx)
-{
- return (ctx) ? vpx_codec_err_to_string(ctx->err)
- : vpx_codec_err_to_string(VPX_CODEC_INVALID_PARAM);
+const char *vpx_codec_error(vpx_codec_ctx_t *ctx) {
+ return (ctx) ? vpx_codec_err_to_string(ctx->err)
+ : vpx_codec_err_to_string(VPX_CODEC_INVALID_PARAM);
}
-const char *vpx_codec_error_detail(vpx_codec_ctx_t *ctx)
-{
- if (ctx && ctx->err)
- return ctx->priv ? ctx->priv->err_detail : ctx->err_detail;
+const char *vpx_codec_error_detail(vpx_codec_ctx_t *ctx) {
+ if (ctx && ctx->err)
+ return ctx->priv ? ctx->priv->err_detail : ctx->err_detail;
- return NULL;
+ return NULL;
}
-vpx_codec_err_t vpx_codec_destroy(vpx_codec_ctx_t *ctx)
-{
- vpx_codec_err_t res;
+vpx_codec_err_t vpx_codec_destroy(vpx_codec_ctx_t *ctx) {
+ vpx_codec_err_t res;
- if (!ctx)
- res = VPX_CODEC_INVALID_PARAM;
- else if (!ctx->iface || !ctx->priv)
- res = VPX_CODEC_ERROR;
- else
- {
- if (ctx->priv->alg_priv)
- ctx->iface->destroy(ctx->priv->alg_priv);
+ if (!ctx)
+ res = VPX_CODEC_INVALID_PARAM;
+ else if (!ctx->iface || !ctx->priv)
+ res = VPX_CODEC_ERROR;
+ else {
+ if (ctx->priv->alg_priv)
+ ctx->iface->destroy(ctx->priv->alg_priv);
- ctx->iface = NULL;
- ctx->name = NULL;
- ctx->priv = NULL;
- res = VPX_CODEC_OK;
- }
+ ctx->iface = NULL;
+ ctx->name = NULL;
+ ctx->priv = NULL;
+ res = VPX_CODEC_OK;
+ }
- return SAVE_STATUS(ctx, res);
+ return SAVE_STATUS(ctx, res);
}
-vpx_codec_caps_t vpx_codec_get_caps(vpx_codec_iface_t *iface)
-{
- return (iface) ? iface->caps : 0;
+vpx_codec_caps_t vpx_codec_get_caps(vpx_codec_iface_t *iface) {
+ return (iface) ? iface->caps : 0;
}
vpx_codec_err_t vpx_codec_control_(vpx_codec_ctx_t *ctx,
int ctrl_id,
- ...)
-{
- vpx_codec_err_t res;
+ ...) {
+ vpx_codec_err_t res;
- if (!ctx || !ctrl_id)
- res = VPX_CODEC_INVALID_PARAM;
- else if (!ctx->iface || !ctx->priv || !ctx->iface->ctrl_maps)
- res = VPX_CODEC_ERROR;
- else
- {
- vpx_codec_ctrl_fn_map_t *entry;
+ if (!ctx || !ctrl_id)
+ res = VPX_CODEC_INVALID_PARAM;
+ else if (!ctx->iface || !ctx->priv || !ctx->iface->ctrl_maps)
+ res = VPX_CODEC_ERROR;
+ else {
+ vpx_codec_ctrl_fn_map_t *entry;
- res = VPX_CODEC_ERROR;
+ res = VPX_CODEC_ERROR;
- for (entry = ctx->iface->ctrl_maps; entry && entry->fn; entry++)
- {
- if (!entry->ctrl_id || entry->ctrl_id == ctrl_id)
- {
- va_list ap;
+ for (entry = ctx->iface->ctrl_maps; entry && entry->fn; entry++) {
+ if (!entry->ctrl_id || entry->ctrl_id == ctrl_id) {
+ va_list ap;
- va_start(ap, ctrl_id);
- res = entry->fn(ctx->priv->alg_priv, ctrl_id, ap);
- va_end(ap);
- break;
- }
- }
+ va_start(ap, ctrl_id);
+ res = entry->fn(ctx->priv->alg_priv, ctrl_id, ap);
+ va_end(ap);
+ break;
+ }
}
+ }
- return SAVE_STATUS(ctx, res);
+ return SAVE_STATUS(ctx, res);
}
diff --git a/vpx/src/vpx_decoder.c b/vpx/src/vpx_decoder.c
index 59a783d..1f575e0 100644
--- a/vpx/src/vpx_decoder.c
+++ b/vpx/src/vpx_decoder.c
@@ -22,99 +22,91 @@
vpx_codec_iface_t *iface,
vpx_codec_dec_cfg_t *cfg,
vpx_codec_flags_t flags,
- int ver)
-{
- vpx_codec_err_t res;
+ int ver) {
+ vpx_codec_err_t res;
- if (ver != VPX_DECODER_ABI_VERSION)
- res = VPX_CODEC_ABI_MISMATCH;
- else if (!ctx || !iface)
- res = VPX_CODEC_INVALID_PARAM;
- else if (iface->abi_version != VPX_CODEC_INTERNAL_ABI_VERSION)
- res = VPX_CODEC_ABI_MISMATCH;
- else if ((flags & VPX_CODEC_USE_XMA) && !(iface->caps & VPX_CODEC_CAP_XMA))
- res = VPX_CODEC_INCAPABLE;
- else if ((flags & VPX_CODEC_USE_POSTPROC) && !(iface->caps & VPX_CODEC_CAP_POSTPROC))
- res = VPX_CODEC_INCAPABLE;
- else if ((flags & VPX_CODEC_USE_ERROR_CONCEALMENT) &&
- !(iface->caps & VPX_CODEC_CAP_ERROR_CONCEALMENT))
- res = VPX_CODEC_INCAPABLE;
- else if ((flags & VPX_CODEC_USE_INPUT_FRAGMENTS) &&
- !(iface->caps & VPX_CODEC_CAP_INPUT_FRAGMENTS))
- res = VPX_CODEC_INCAPABLE;
- else if (!(iface->caps & VPX_CODEC_CAP_DECODER))
- res = VPX_CODEC_INCAPABLE;
- else
- {
- memset(ctx, 0, sizeof(*ctx));
- ctx->iface = iface;
- ctx->name = iface->name;
- ctx->priv = NULL;
- ctx->init_flags = flags;
- ctx->config.dec = cfg;
- res = VPX_CODEC_OK;
+ if (ver != VPX_DECODER_ABI_VERSION)
+ res = VPX_CODEC_ABI_MISMATCH;
+ else if (!ctx || !iface)
+ res = VPX_CODEC_INVALID_PARAM;
+ else if (iface->abi_version != VPX_CODEC_INTERNAL_ABI_VERSION)
+ res = VPX_CODEC_ABI_MISMATCH;
+ else if ((flags & VPX_CODEC_USE_XMA) && !(iface->caps & VPX_CODEC_CAP_XMA))
+ res = VPX_CODEC_INCAPABLE;
+ else if ((flags & VPX_CODEC_USE_POSTPROC) && !(iface->caps & VPX_CODEC_CAP_POSTPROC))
+ res = VPX_CODEC_INCAPABLE;
+ else if ((flags & VPX_CODEC_USE_ERROR_CONCEALMENT) &&
+ !(iface->caps & VPX_CODEC_CAP_ERROR_CONCEALMENT))
+ res = VPX_CODEC_INCAPABLE;
+ else if ((flags & VPX_CODEC_USE_INPUT_FRAGMENTS) &&
+ !(iface->caps & VPX_CODEC_CAP_INPUT_FRAGMENTS))
+ res = VPX_CODEC_INCAPABLE;
+ else if (!(iface->caps & VPX_CODEC_CAP_DECODER))
+ res = VPX_CODEC_INCAPABLE;
+ else {
+ memset(ctx, 0, sizeof(*ctx));
+ ctx->iface = iface;
+ ctx->name = iface->name;
+ ctx->priv = NULL;
+ ctx->init_flags = flags;
+ ctx->config.dec = cfg;
+ res = VPX_CODEC_OK;
- if (!(flags & VPX_CODEC_USE_XMA))
- {
- res = ctx->iface->init(ctx, NULL);
+ if (!(flags & VPX_CODEC_USE_XMA)) {
+ res = ctx->iface->init(ctx, NULL);
- if (res)
- {
- ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL;
- vpx_codec_destroy(ctx);
- }
+ if (res) {
+ ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL;
+ vpx_codec_destroy(ctx);
+ }
- if (ctx->priv)
- ctx->priv->iface = ctx->iface;
- }
+ if (ctx->priv)
+ ctx->priv->iface = ctx->iface;
}
+ }
- return SAVE_STATUS(ctx, res);
+ return SAVE_STATUS(ctx, res);
}
vpx_codec_err_t vpx_codec_peek_stream_info(vpx_codec_iface_t *iface,
- const uint8_t *data,
- unsigned int data_sz,
- vpx_codec_stream_info_t *si)
-{
- vpx_codec_err_t res;
+ const uint8_t *data,
+ unsigned int data_sz,
+ vpx_codec_stream_info_t *si) {
+ vpx_codec_err_t res;
- if (!iface || !data || !data_sz || !si
- || si->sz < sizeof(vpx_codec_stream_info_t))
- res = VPX_CODEC_INVALID_PARAM;
- else
- {
- /* Set default/unknown values */
- si->w = 0;
- si->h = 0;
+ if (!iface || !data || !data_sz || !si
+ || si->sz < sizeof(vpx_codec_stream_info_t))
+ res = VPX_CODEC_INVALID_PARAM;
+ else {
+ /* Set default/unknown values */
+ si->w = 0;
+ si->h = 0;
- res = iface->dec.peek_si(data, data_sz, si);
- }
+ res = iface->dec.peek_si(data, data_sz, si);
+ }
- return res;
+ return res;
}
vpx_codec_err_t vpx_codec_get_stream_info(vpx_codec_ctx_t *ctx,
- vpx_codec_stream_info_t *si)
-{
- vpx_codec_err_t res;
+ vpx_codec_stream_info_t *si) {
+ vpx_codec_err_t res;
- if (!ctx || !si || si->sz < sizeof(vpx_codec_stream_info_t))
- res = VPX_CODEC_INVALID_PARAM;
- else if (!ctx->iface || !ctx->priv)
- res = VPX_CODEC_ERROR;
- else
- {
- /* Set default/unknown values */
- si->w = 0;
- si->h = 0;
+ if (!ctx || !si || si->sz < sizeof(vpx_codec_stream_info_t))
+ res = VPX_CODEC_INVALID_PARAM;
+ else if (!ctx->iface || !ctx->priv)
+ res = VPX_CODEC_ERROR;
+ else {
+ /* Set default/unknown values */
+ si->w = 0;
+ si->h = 0;
- res = ctx->iface->dec.get_si(ctx->priv->alg_priv, si);
- }
+ res = ctx->iface->dec.get_si(ctx->priv->alg_priv, si);
+ }
- return SAVE_STATUS(ctx, res);
+ return SAVE_STATUS(ctx, res);
}
@@ -122,126 +114,115 @@
const uint8_t *data,
unsigned int data_sz,
void *user_priv,
- long deadline)
-{
- vpx_codec_err_t res;
+ long deadline) {
+ vpx_codec_err_t res;
- /* Sanity checks */
- /* NULL data ptr allowed if data_sz is 0 too */
- if (!ctx || (!data && data_sz))
- res = VPX_CODEC_INVALID_PARAM;
- else if (!ctx->iface || !ctx->priv)
- res = VPX_CODEC_ERROR;
- else
- {
- res = ctx->iface->dec.decode(ctx->priv->alg_priv, data, data_sz,
- user_priv, deadline);
- }
+ /* Sanity checks */
+ /* NULL data ptr allowed if data_sz is 0 too */
+ if (!ctx || (!data && data_sz))
+ res = VPX_CODEC_INVALID_PARAM;
+ else if (!ctx->iface || !ctx->priv)
+ res = VPX_CODEC_ERROR;
+ else {
+ res = ctx->iface->dec.decode(ctx->priv->alg_priv, data, data_sz,
+ user_priv, deadline);
+ }
- return SAVE_STATUS(ctx, res);
+ return SAVE_STATUS(ctx, res);
}
vpx_image_t *vpx_codec_get_frame(vpx_codec_ctx_t *ctx,
- vpx_codec_iter_t *iter)
-{
- vpx_image_t *img;
+ vpx_codec_iter_t *iter) {
+ vpx_image_t *img;
- if (!ctx || !iter || !ctx->iface || !ctx->priv)
- img = NULL;
- else
- img = ctx->iface->dec.get_frame(ctx->priv->alg_priv, iter);
+ if (!ctx || !iter || !ctx->iface || !ctx->priv)
+ img = NULL;
+ else
+ img = ctx->iface->dec.get_frame(ctx->priv->alg_priv, iter);
- return img;
+ return img;
}
vpx_codec_err_t vpx_codec_register_put_frame_cb(vpx_codec_ctx_t *ctx,
- vpx_codec_put_frame_cb_fn_t cb,
- void *user_priv)
-{
- vpx_codec_err_t res;
+ vpx_codec_put_frame_cb_fn_t cb,
+ void *user_priv) {
+ vpx_codec_err_t res;
- if (!ctx || !cb)
- res = VPX_CODEC_INVALID_PARAM;
- else if (!ctx->iface || !ctx->priv
- || !(ctx->iface->caps & VPX_CODEC_CAP_PUT_FRAME))
- res = VPX_CODEC_ERROR;
- else
- {
- ctx->priv->dec.put_frame_cb.u.put_frame = cb;
- ctx->priv->dec.put_frame_cb.user_priv = user_priv;
- res = VPX_CODEC_OK;
- }
+ if (!ctx || !cb)
+ res = VPX_CODEC_INVALID_PARAM;
+ else if (!ctx->iface || !ctx->priv
+ || !(ctx->iface->caps & VPX_CODEC_CAP_PUT_FRAME))
+ res = VPX_CODEC_ERROR;
+ else {
+ ctx->priv->dec.put_frame_cb.u.put_frame = cb;
+ ctx->priv->dec.put_frame_cb.user_priv = user_priv;
+ res = VPX_CODEC_OK;
+ }
- return SAVE_STATUS(ctx, res);
+ return SAVE_STATUS(ctx, res);
}
vpx_codec_err_t vpx_codec_register_put_slice_cb(vpx_codec_ctx_t *ctx,
- vpx_codec_put_slice_cb_fn_t cb,
- void *user_priv)
-{
- vpx_codec_err_t res;
+ vpx_codec_put_slice_cb_fn_t cb,
+ void *user_priv) {
+ vpx_codec_err_t res;
- if (!ctx || !cb)
- res = VPX_CODEC_INVALID_PARAM;
- else if (!ctx->iface || !ctx->priv
- || !(ctx->iface->caps & VPX_CODEC_CAP_PUT_FRAME))
- res = VPX_CODEC_ERROR;
- else
- {
- ctx->priv->dec.put_slice_cb.u.put_slice = cb;
- ctx->priv->dec.put_slice_cb.user_priv = user_priv;
- res = VPX_CODEC_OK;
- }
+ if (!ctx || !cb)
+ res = VPX_CODEC_INVALID_PARAM;
+ else if (!ctx->iface || !ctx->priv
+ || !(ctx->iface->caps & VPX_CODEC_CAP_PUT_FRAME))
+ res = VPX_CODEC_ERROR;
+ else {
+ ctx->priv->dec.put_slice_cb.u.put_slice = cb;
+ ctx->priv->dec.put_slice_cb.user_priv = user_priv;
+ res = VPX_CODEC_OK;
+ }
- return SAVE_STATUS(ctx, res);
+ return SAVE_STATUS(ctx, res);
}
vpx_codec_err_t vpx_codec_get_mem_map(vpx_codec_ctx_t *ctx,
vpx_codec_mmap_t *mmap,
- vpx_codec_iter_t *iter)
-{
- vpx_codec_err_t res = VPX_CODEC_OK;
+ vpx_codec_iter_t *iter) {
+ vpx_codec_err_t res = VPX_CODEC_OK;
- if (!ctx || !mmap || !iter || !ctx->iface)
- res = VPX_CODEC_INVALID_PARAM;
- else if (!(ctx->iface->caps & VPX_CODEC_CAP_XMA))
- res = VPX_CODEC_ERROR;
- else
- res = ctx->iface->get_mmap(ctx, mmap, iter);
+ if (!ctx || !mmap || !iter || !ctx->iface)
+ res = VPX_CODEC_INVALID_PARAM;
+ else if (!(ctx->iface->caps & VPX_CODEC_CAP_XMA))
+ res = VPX_CODEC_ERROR;
+ else
+ res = ctx->iface->get_mmap(ctx, mmap, iter);
- return SAVE_STATUS(ctx, res);
+ return SAVE_STATUS(ctx, res);
}
vpx_codec_err_t vpx_codec_set_mem_map(vpx_codec_ctx_t *ctx,
vpx_codec_mmap_t *mmap,
- unsigned int num_maps)
-{
- vpx_codec_err_t res = VPX_CODEC_MEM_ERROR;
+ unsigned int num_maps) {
+ vpx_codec_err_t res = VPX_CODEC_MEM_ERROR;
- if (!ctx || !mmap || !ctx->iface)
- res = VPX_CODEC_INVALID_PARAM;
- else if (!(ctx->iface->caps & VPX_CODEC_CAP_XMA))
- res = VPX_CODEC_ERROR;
- else
- {
- unsigned int i;
+ if (!ctx || !mmap || !ctx->iface)
+ res = VPX_CODEC_INVALID_PARAM;
+ else if (!(ctx->iface->caps & VPX_CODEC_CAP_XMA))
+ res = VPX_CODEC_ERROR;
+ else {
+ unsigned int i;
- for (i = 0; i < num_maps; i++, mmap++)
- {
- if (!mmap->base)
- break;
+ for (i = 0; i < num_maps; i++, mmap++) {
+ if (!mmap->base)
+ break;
- /* Everything look ok, set the mmap in the decoder */
- res = ctx->iface->set_mmap(ctx, mmap);
+ /* Everything look ok, set the mmap in the decoder */
+ res = ctx->iface->set_mmap(ctx, mmap);
- if (res)
- break;
- }
+ if (res)
+ break;
}
+ }
- return SAVE_STATUS(ctx, res);
+ return SAVE_STATUS(ctx, res);
}
diff --git a/vpx/src/vpx_encoder.c b/vpx/src/vpx_encoder.c
index 73c1c66..3cec895 100644
--- a/vpx/src/vpx_encoder.c
+++ b/vpx/src/vpx_encoder.c
@@ -24,46 +24,43 @@
vpx_codec_iface_t *iface,
vpx_codec_enc_cfg_t *cfg,
vpx_codec_flags_t flags,
- int ver)
-{
- vpx_codec_err_t res;
+ int ver) {
+ vpx_codec_err_t res;
- if (ver != VPX_ENCODER_ABI_VERSION)
- res = VPX_CODEC_ABI_MISMATCH;
- else if (!ctx || !iface || !cfg)
- res = VPX_CODEC_INVALID_PARAM;
- else if (iface->abi_version != VPX_CODEC_INTERNAL_ABI_VERSION)
- res = VPX_CODEC_ABI_MISMATCH;
- else if (!(iface->caps & VPX_CODEC_CAP_ENCODER))
- res = VPX_CODEC_INCAPABLE;
- else if ((flags & VPX_CODEC_USE_XMA) && !(iface->caps & VPX_CODEC_CAP_XMA))
- res = VPX_CODEC_INCAPABLE;
- else if ((flags & VPX_CODEC_USE_PSNR)
- && !(iface->caps & VPX_CODEC_CAP_PSNR))
- res = VPX_CODEC_INCAPABLE;
- else if ((flags & VPX_CODEC_USE_OUTPUT_PARTITION)
- && !(iface->caps & VPX_CODEC_CAP_OUTPUT_PARTITION))
- res = VPX_CODEC_INCAPABLE;
- else
- {
- ctx->iface = iface;
- ctx->name = iface->name;
- ctx->priv = NULL;
- ctx->init_flags = flags;
- ctx->config.enc = cfg;
- res = ctx->iface->init(ctx, NULL);
+ if (ver != VPX_ENCODER_ABI_VERSION)
+ res = VPX_CODEC_ABI_MISMATCH;
+ else if (!ctx || !iface || !cfg)
+ res = VPX_CODEC_INVALID_PARAM;
+ else if (iface->abi_version != VPX_CODEC_INTERNAL_ABI_VERSION)
+ res = VPX_CODEC_ABI_MISMATCH;
+ else if (!(iface->caps & VPX_CODEC_CAP_ENCODER))
+ res = VPX_CODEC_INCAPABLE;
+ else if ((flags & VPX_CODEC_USE_XMA) && !(iface->caps & VPX_CODEC_CAP_XMA))
+ res = VPX_CODEC_INCAPABLE;
+ else if ((flags & VPX_CODEC_USE_PSNR)
+ && !(iface->caps & VPX_CODEC_CAP_PSNR))
+ res = VPX_CODEC_INCAPABLE;
+ else if ((flags & VPX_CODEC_USE_OUTPUT_PARTITION)
+ && !(iface->caps & VPX_CODEC_CAP_OUTPUT_PARTITION))
+ res = VPX_CODEC_INCAPABLE;
+ else {
+ ctx->iface = iface;
+ ctx->name = iface->name;
+ ctx->priv = NULL;
+ ctx->init_flags = flags;
+ ctx->config.enc = cfg;
+ res = ctx->iface->init(ctx, NULL);
- if (res)
- {
- ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL;
- vpx_codec_destroy(ctx);
- }
-
- if (ctx->priv)
- ctx->priv->iface = ctx->iface;
+ if (res) {
+ ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL;
+ vpx_codec_destroy(ctx);
}
- return SAVE_STATUS(ctx, res);
+ if (ctx->priv)
+ ctx->priv->iface = ctx->iface;
+ }
+
+ return SAVE_STATUS(ctx, res);
}
vpx_codec_err_t vpx_codec_enc_init_multi_ver(vpx_codec_ctx_t *ctx,
@@ -72,128 +69,117 @@
int num_enc,
vpx_codec_flags_t flags,
vpx_rational_t *dsf,
- int ver)
-{
- vpx_codec_err_t res = 0;
+ int ver) {
+ vpx_codec_err_t res = 0;
- if (ver != VPX_ENCODER_ABI_VERSION)
- res = VPX_CODEC_ABI_MISMATCH;
- else if (!ctx || !iface || !cfg || (num_enc > 16 || num_enc < 1))
- res = VPX_CODEC_INVALID_PARAM;
- else if (iface->abi_version != VPX_CODEC_INTERNAL_ABI_VERSION)
- res = VPX_CODEC_ABI_MISMATCH;
- else if (!(iface->caps & VPX_CODEC_CAP_ENCODER))
- res = VPX_CODEC_INCAPABLE;
- else if ((flags & VPX_CODEC_USE_XMA) && !(iface->caps & VPX_CODEC_CAP_XMA))
- res = VPX_CODEC_INCAPABLE;
- else if ((flags & VPX_CODEC_USE_PSNR)
- && !(iface->caps & VPX_CODEC_CAP_PSNR))
- res = VPX_CODEC_INCAPABLE;
- else if ((flags & VPX_CODEC_USE_OUTPUT_PARTITION)
- && !(iface->caps & VPX_CODEC_CAP_OUTPUT_PARTITION))
- res = VPX_CODEC_INCAPABLE;
- else
- {
- int i;
- void *mem_loc = NULL;
+ if (ver != VPX_ENCODER_ABI_VERSION)
+ res = VPX_CODEC_ABI_MISMATCH;
+ else if (!ctx || !iface || !cfg || (num_enc > 16 || num_enc < 1))
+ res = VPX_CODEC_INVALID_PARAM;
+ else if (iface->abi_version != VPX_CODEC_INTERNAL_ABI_VERSION)
+ res = VPX_CODEC_ABI_MISMATCH;
+ else if (!(iface->caps & VPX_CODEC_CAP_ENCODER))
+ res = VPX_CODEC_INCAPABLE;
+ else if ((flags & VPX_CODEC_USE_XMA) && !(iface->caps & VPX_CODEC_CAP_XMA))
+ res = VPX_CODEC_INCAPABLE;
+ else if ((flags & VPX_CODEC_USE_PSNR)
+ && !(iface->caps & VPX_CODEC_CAP_PSNR))
+ res = VPX_CODEC_INCAPABLE;
+ else if ((flags & VPX_CODEC_USE_OUTPUT_PARTITION)
+ && !(iface->caps & VPX_CODEC_CAP_OUTPUT_PARTITION))
+ res = VPX_CODEC_INCAPABLE;
+ else {
+ int i;
+ void *mem_loc = NULL;
- if(!(res = iface->enc.mr_get_mem_loc(cfg, &mem_loc)))
- {
- for (i = 0; i < num_enc; i++)
- {
- vpx_codec_priv_enc_mr_cfg_t mr_cfg;
+ if (!(res = iface->enc.mr_get_mem_loc(cfg, &mem_loc))) {
+ for (i = 0; i < num_enc; i++) {
+ vpx_codec_priv_enc_mr_cfg_t mr_cfg;
- /* Validate down-sampling factor. */
- if(dsf->num < 1 || dsf->num > 4096 || dsf->den < 1 ||
- dsf->den > dsf->num)
- {
- res = VPX_CODEC_INVALID_PARAM;
- break;
- }
-
- mr_cfg.mr_low_res_mode_info = mem_loc;
- mr_cfg.mr_total_resolutions = num_enc;
- mr_cfg.mr_encoder_id = num_enc-1-i;
- mr_cfg.mr_down_sampling_factor.num = dsf->num;
- mr_cfg.mr_down_sampling_factor.den = dsf->den;
-
- /* Force Key-frame synchronization. Namely, encoder at higher
- * resolution always use the same frame_type chosen by the
- * lowest-resolution encoder.
- */
- if(mr_cfg.mr_encoder_id)
- cfg->kf_mode = VPX_KF_DISABLED;
-
- ctx->iface = iface;
- ctx->name = iface->name;
- ctx->priv = NULL;
- ctx->init_flags = flags;
- ctx->config.enc = cfg;
- res = ctx->iface->init(ctx, &mr_cfg);
-
- if (res)
- {
- const char *error_detail =
- ctx->priv ? ctx->priv->err_detail : NULL;
- /* Destroy current ctx */
- ctx->err_detail = error_detail;
- vpx_codec_destroy(ctx);
-
- /* Destroy already allocated high-level ctx */
- while (i)
- {
- ctx--;
- ctx->err_detail = error_detail;
- vpx_codec_destroy(ctx);
- i--;
- }
- }
-
- if (ctx->priv)
- ctx->priv->iface = ctx->iface;
-
- if (res)
- break;
-
- ctx++;
- cfg++;
- dsf++;
- }
+ /* Validate down-sampling factor. */
+ if (dsf->num < 1 || dsf->num > 4096 || dsf->den < 1 ||
+ dsf->den > dsf->num) {
+ res = VPX_CODEC_INVALID_PARAM;
+ break;
}
- }
- return SAVE_STATUS(ctx, res);
+ mr_cfg.mr_low_res_mode_info = mem_loc;
+ mr_cfg.mr_total_resolutions = num_enc;
+ mr_cfg.mr_encoder_id = num_enc - 1 - i;
+ mr_cfg.mr_down_sampling_factor.num = dsf->num;
+ mr_cfg.mr_down_sampling_factor.den = dsf->den;
+
+ /* Force Key-frame synchronization. Namely, encoder at higher
+ * resolution always use the same frame_type chosen by the
+ * lowest-resolution encoder.
+ */
+ if (mr_cfg.mr_encoder_id)
+ cfg->kf_mode = VPX_KF_DISABLED;
+
+ ctx->iface = iface;
+ ctx->name = iface->name;
+ ctx->priv = NULL;
+ ctx->init_flags = flags;
+ ctx->config.enc = cfg;
+ res = ctx->iface->init(ctx, &mr_cfg);
+
+ if (res) {
+ const char *error_detail =
+ ctx->priv ? ctx->priv->err_detail : NULL;
+ /* Destroy current ctx */
+ ctx->err_detail = error_detail;
+ vpx_codec_destroy(ctx);
+
+ /* Destroy already allocated high-level ctx */
+ while (i) {
+ ctx--;
+ ctx->err_detail = error_detail;
+ vpx_codec_destroy(ctx);
+ i--;
+ }
+ }
+
+ if (ctx->priv)
+ ctx->priv->iface = ctx->iface;
+
+ if (res)
+ break;
+
+ ctx++;
+ cfg++;
+ dsf++;
+ }
+ }
+ }
+
+ return SAVE_STATUS(ctx, res);
}
vpx_codec_err_t vpx_codec_enc_config_default(vpx_codec_iface_t *iface,
- vpx_codec_enc_cfg_t *cfg,
- unsigned int usage)
-{
- vpx_codec_err_t res;
- vpx_codec_enc_cfg_map_t *map;
+ vpx_codec_enc_cfg_t *cfg,
+ unsigned int usage) {
+ vpx_codec_err_t res;
+ vpx_codec_enc_cfg_map_t *map;
- if (!iface || !cfg || usage > INT_MAX)
- res = VPX_CODEC_INVALID_PARAM;
- else if (!(iface->caps & VPX_CODEC_CAP_ENCODER))
- res = VPX_CODEC_INCAPABLE;
- else
- {
- res = VPX_CODEC_INVALID_PARAM;
+ if (!iface || !cfg || usage > INT_MAX)
+ res = VPX_CODEC_INVALID_PARAM;
+ else if (!(iface->caps & VPX_CODEC_CAP_ENCODER))
+ res = VPX_CODEC_INCAPABLE;
+ else {
+ res = VPX_CODEC_INVALID_PARAM;
- for (map = iface->enc.cfg_maps; map->usage >= 0; map++)
- {
- if (map->usage == (int)usage)
- {
- *cfg = map->cfg;
- cfg->g_usage = usage;
- res = VPX_CODEC_OK;
- break;
- }
- }
+ for (map = iface->enc.cfg_maps; map->usage >= 0; map++) {
+ if (map->usage == (int)usage) {
+ *cfg = map->cfg;
+ cfg->g_usage = usage;
+ res = VPX_CODEC_OK;
+ break;
+ }
}
+ }
- return res;
+ return res;
}
@@ -203,9 +189,9 @@
*/
#include "vpx_ports/x86.h"
#define FLOATING_POINT_INIT() do {\
- unsigned short x87_orig_mode = x87_set_double_precision();
+ unsigned short x87_orig_mode = x87_set_double_precision();
#define FLOATING_POINT_RESTORE() \
- x87_set_control_word(x87_orig_mode); }while(0)
+ x87_set_control_word(x87_orig_mode); }while(0)
#else
@@ -219,224 +205,202 @@
vpx_codec_pts_t pts,
unsigned long duration,
vpx_enc_frame_flags_t flags,
- unsigned long deadline)
-{
- vpx_codec_err_t res = 0;
+ unsigned long deadline) {
+ vpx_codec_err_t res = 0;
- if (!ctx || (img && !duration))
- res = VPX_CODEC_INVALID_PARAM;
- else if (!ctx->iface || !ctx->priv)
- res = VPX_CODEC_ERROR;
- else if (!(ctx->iface->caps & VPX_CODEC_CAP_ENCODER))
- res = VPX_CODEC_INCAPABLE;
- else
- {
- /* Execute in a normalized floating point environment, if the platform
- * requires it.
- */
- unsigned int num_enc =ctx->priv->enc.total_encoders;
+ if (!ctx || (img && !duration))
+ res = VPX_CODEC_INVALID_PARAM;
+ else if (!ctx->iface || !ctx->priv)
+ res = VPX_CODEC_ERROR;
+ else if (!(ctx->iface->caps & VPX_CODEC_CAP_ENCODER))
+ res = VPX_CODEC_INCAPABLE;
+ else {
+ /* Execute in a normalized floating point environment, if the platform
+ * requires it.
+ */
+ unsigned int num_enc = ctx->priv->enc.total_encoders;
- FLOATING_POINT_INIT();
+ FLOATING_POINT_INIT();
- if (num_enc == 1)
- res = ctx->iface->enc.encode(ctx->priv->alg_priv, img, pts,
- duration, flags, deadline);
- else
- {
- /* Multi-resolution encoding:
- * Encode multi-levels in reverse order. For example,
- * if mr_total_resolutions = 3, first encode level 2,
- * then encode level 1, and finally encode level 0.
- */
- int i;
+ if (num_enc == 1)
+ res = ctx->iface->enc.encode(ctx->priv->alg_priv, img, pts,
+ duration, flags, deadline);
+ else {
+ /* Multi-resolution encoding:
+ * Encode multi-levels in reverse order. For example,
+ * if mr_total_resolutions = 3, first encode level 2,
+ * then encode level 1, and finally encode level 0.
+ */
+ int i;
- ctx += num_enc - 1;
- if (img) img += num_enc - 1;
+ ctx += num_enc - 1;
+ if (img) img += num_enc - 1;
- for (i = num_enc-1; i >= 0; i--)
- {
- if ((res = ctx->iface->enc.encode(ctx->priv->alg_priv, img, pts,
- duration, flags, deadline)))
- break;
+ for (i = num_enc - 1; i >= 0; i--) {
+ if ((res = ctx->iface->enc.encode(ctx->priv->alg_priv, img, pts,
+ duration, flags, deadline)))
+ break;
- ctx--;
- if (img) img--;
- }
- ctx++;
- }
-
- FLOATING_POINT_RESTORE();
+ ctx--;
+ if (img) img--;
+ }
+ ctx++;
}
- return SAVE_STATUS(ctx, res);
+ FLOATING_POINT_RESTORE();
+ }
+
+ return SAVE_STATUS(ctx, res);
}
const vpx_codec_cx_pkt_t *vpx_codec_get_cx_data(vpx_codec_ctx_t *ctx,
- vpx_codec_iter_t *iter)
-{
- const vpx_codec_cx_pkt_t *pkt = NULL;
+ vpx_codec_iter_t *iter) {
+ const vpx_codec_cx_pkt_t *pkt = NULL;
- if (ctx)
- {
- if (!iter)
- ctx->err = VPX_CODEC_INVALID_PARAM;
- else if (!ctx->iface || !ctx->priv)
- ctx->err = VPX_CODEC_ERROR;
- else if (!(ctx->iface->caps & VPX_CODEC_CAP_ENCODER))
- ctx->err = VPX_CODEC_INCAPABLE;
- else
- pkt = ctx->iface->enc.get_cx_data(ctx->priv->alg_priv, iter);
+ if (ctx) {
+ if (!iter)
+ ctx->err = VPX_CODEC_INVALID_PARAM;
+ else if (!ctx->iface || !ctx->priv)
+ ctx->err = VPX_CODEC_ERROR;
+ else if (!(ctx->iface->caps & VPX_CODEC_CAP_ENCODER))
+ ctx->err = VPX_CODEC_INCAPABLE;
+ else
+ pkt = ctx->iface->enc.get_cx_data(ctx->priv->alg_priv, iter);
+ }
+
+ if (pkt && pkt->kind == VPX_CODEC_CX_FRAME_PKT) {
+ /* If the application has specified a destination area for the
+ * compressed data, and the codec has not placed the data there,
+ * and it fits, copy it.
+ */
+ char *dst_buf = ctx->priv->enc.cx_data_dst_buf.buf;
+
+ if (dst_buf
+ && pkt->data.raw.buf != dst_buf
+ && pkt->data.raw.sz
+ + ctx->priv->enc.cx_data_pad_before
+ + ctx->priv->enc.cx_data_pad_after
+ <= ctx->priv->enc.cx_data_dst_buf.sz) {
+ vpx_codec_cx_pkt_t *modified_pkt = &ctx->priv->enc.cx_data_pkt;
+
+ memcpy(dst_buf + ctx->priv->enc.cx_data_pad_before,
+ pkt->data.raw.buf, pkt->data.raw.sz);
+ *modified_pkt = *pkt;
+ modified_pkt->data.raw.buf = dst_buf;
+ modified_pkt->data.raw.sz += ctx->priv->enc.cx_data_pad_before
+ + ctx->priv->enc.cx_data_pad_after;
+ pkt = modified_pkt;
}
- if (pkt && pkt->kind == VPX_CODEC_CX_FRAME_PKT)
- {
- /* If the application has specified a destination area for the
- * compressed data, and the codec has not placed the data there,
- * and it fits, copy it.
- */
- char *dst_buf = ctx->priv->enc.cx_data_dst_buf.buf;
-
- if (dst_buf
- && pkt->data.raw.buf != dst_buf
- && pkt->data.raw.sz
- + ctx->priv->enc.cx_data_pad_before
- + ctx->priv->enc.cx_data_pad_after
- <= ctx->priv->enc.cx_data_dst_buf.sz)
- {
- vpx_codec_cx_pkt_t *modified_pkt = &ctx->priv->enc.cx_data_pkt;
-
- memcpy(dst_buf + ctx->priv->enc.cx_data_pad_before,
- pkt->data.raw.buf, pkt->data.raw.sz);
- *modified_pkt = *pkt;
- modified_pkt->data.raw.buf = dst_buf;
- modified_pkt->data.raw.sz += ctx->priv->enc.cx_data_pad_before
- + ctx->priv->enc.cx_data_pad_after;
- pkt = modified_pkt;
- }
-
- if (dst_buf == pkt->data.raw.buf)
- {
- ctx->priv->enc.cx_data_dst_buf.buf = dst_buf + pkt->data.raw.sz;
- ctx->priv->enc.cx_data_dst_buf.sz -= pkt->data.raw.sz;
- }
+ if (dst_buf == pkt->data.raw.buf) {
+ ctx->priv->enc.cx_data_dst_buf.buf = dst_buf + pkt->data.raw.sz;
+ ctx->priv->enc.cx_data_dst_buf.sz -= pkt->data.raw.sz;
}
+ }
- return pkt;
+ return pkt;
}
vpx_codec_err_t vpx_codec_set_cx_data_buf(vpx_codec_ctx_t *ctx,
- const vpx_fixed_buf_t *buf,
- unsigned int pad_before,
- unsigned int pad_after)
-{
- if (!ctx || !ctx->priv)
- return VPX_CODEC_INVALID_PARAM;
+ const vpx_fixed_buf_t *buf,
+ unsigned int pad_before,
+ unsigned int pad_after) {
+ if (!ctx || !ctx->priv)
+ return VPX_CODEC_INVALID_PARAM;
- if (buf)
- {
- ctx->priv->enc.cx_data_dst_buf = *buf;
- ctx->priv->enc.cx_data_pad_before = pad_before;
- ctx->priv->enc.cx_data_pad_after = pad_after;
- }
+ if (buf) {
+ ctx->priv->enc.cx_data_dst_buf = *buf;
+ ctx->priv->enc.cx_data_pad_before = pad_before;
+ ctx->priv->enc.cx_data_pad_after = pad_after;
+ } else {
+ ctx->priv->enc.cx_data_dst_buf.buf = NULL;
+ ctx->priv->enc.cx_data_dst_buf.sz = 0;
+ ctx->priv->enc.cx_data_pad_before = 0;
+ ctx->priv->enc.cx_data_pad_after = 0;
+ }
+
+ return VPX_CODEC_OK;
+}
+
+
+const vpx_image_t *vpx_codec_get_preview_frame(vpx_codec_ctx_t *ctx) {
+ vpx_image_t *img = NULL;
+
+ if (ctx) {
+ if (!ctx->iface || !ctx->priv)
+ ctx->err = VPX_CODEC_ERROR;
+ else if (!(ctx->iface->caps & VPX_CODEC_CAP_ENCODER))
+ ctx->err = VPX_CODEC_INCAPABLE;
+ else if (!ctx->iface->enc.get_preview)
+ ctx->err = VPX_CODEC_INCAPABLE;
else
- {
- ctx->priv->enc.cx_data_dst_buf.buf = NULL;
- ctx->priv->enc.cx_data_dst_buf.sz = 0;
- ctx->priv->enc.cx_data_pad_before = 0;
- ctx->priv->enc.cx_data_pad_after = 0;
- }
+ img = ctx->iface->enc.get_preview(ctx->priv->alg_priv);
+ }
- return VPX_CODEC_OK;
+ return img;
}
-const vpx_image_t *vpx_codec_get_preview_frame(vpx_codec_ctx_t *ctx)
-{
- vpx_image_t *img = NULL;
+vpx_fixed_buf_t *vpx_codec_get_global_headers(vpx_codec_ctx_t *ctx) {
+ vpx_fixed_buf_t *buf = NULL;
- if (ctx)
- {
- if (!ctx->iface || !ctx->priv)
- ctx->err = VPX_CODEC_ERROR;
- else if (!(ctx->iface->caps & VPX_CODEC_CAP_ENCODER))
- ctx->err = VPX_CODEC_INCAPABLE;
- else if (!ctx->iface->enc.get_preview)
- ctx->err = VPX_CODEC_INCAPABLE;
- else
- img = ctx->iface->enc.get_preview(ctx->priv->alg_priv);
- }
+ if (ctx) {
+ if (!ctx->iface || !ctx->priv)
+ ctx->err = VPX_CODEC_ERROR;
+ else if (!(ctx->iface->caps & VPX_CODEC_CAP_ENCODER))
+ ctx->err = VPX_CODEC_INCAPABLE;
+ else if (!ctx->iface->enc.get_glob_hdrs)
+ ctx->err = VPX_CODEC_INCAPABLE;
+ else
+ buf = ctx->iface->enc.get_glob_hdrs(ctx->priv->alg_priv);
+ }
- return img;
-}
-
-
-vpx_fixed_buf_t *vpx_codec_get_global_headers(vpx_codec_ctx_t *ctx)
-{
- vpx_fixed_buf_t *buf = NULL;
-
- if (ctx)
- {
- if (!ctx->iface || !ctx->priv)
- ctx->err = VPX_CODEC_ERROR;
- else if (!(ctx->iface->caps & VPX_CODEC_CAP_ENCODER))
- ctx->err = VPX_CODEC_INCAPABLE;
- else if (!ctx->iface->enc.get_glob_hdrs)
- ctx->err = VPX_CODEC_INCAPABLE;
- else
- buf = ctx->iface->enc.get_glob_hdrs(ctx->priv->alg_priv);
- }
-
- return buf;
+ return buf;
}
vpx_codec_err_t vpx_codec_enc_config_set(vpx_codec_ctx_t *ctx,
- const vpx_codec_enc_cfg_t *cfg)
-{
- vpx_codec_err_t res;
+ const vpx_codec_enc_cfg_t *cfg) {
+ vpx_codec_err_t res;
- if (!ctx || !ctx->iface || !ctx->priv || !cfg)
- res = VPX_CODEC_INVALID_PARAM;
- else if (!(ctx->iface->caps & VPX_CODEC_CAP_ENCODER))
- res = VPX_CODEC_INCAPABLE;
- else
- res = ctx->iface->enc.cfg_set(ctx->priv->alg_priv, cfg);
+ if (!ctx || !ctx->iface || !ctx->priv || !cfg)
+ res = VPX_CODEC_INVALID_PARAM;
+ else if (!(ctx->iface->caps & VPX_CODEC_CAP_ENCODER))
+ res = VPX_CODEC_INCAPABLE;
+ else
+ res = ctx->iface->enc.cfg_set(ctx->priv->alg_priv, cfg);
- return SAVE_STATUS(ctx, res);
+ return SAVE_STATUS(ctx, res);
}
int vpx_codec_pkt_list_add(struct vpx_codec_pkt_list *list,
- const struct vpx_codec_cx_pkt *pkt)
-{
- if (list->cnt < list->max)
- {
- list->pkts[list->cnt++] = *pkt;
- return 0;
- }
+ const struct vpx_codec_cx_pkt *pkt) {
+ if (list->cnt < list->max) {
+ list->pkts[list->cnt++] = *pkt;
+ return 0;
+ }
- return 1;
+ return 1;
}
const vpx_codec_cx_pkt_t *vpx_codec_pkt_list_get(struct vpx_codec_pkt_list *list,
- vpx_codec_iter_t *iter)
-{
- const vpx_codec_cx_pkt_t *pkt;
+ vpx_codec_iter_t *iter) {
+ const vpx_codec_cx_pkt_t *pkt;
- if (!(*iter))
- {
- *iter = list->pkts;
- }
+ if (!(*iter)) {
+ *iter = list->pkts;
+ }
- pkt = (const void *) * iter;
+ pkt = (const void *) * iter;
- if ((size_t)(pkt - list->pkts) < list->cnt)
- *iter = pkt + 1;
- else
- pkt = NULL;
+ if ((size_t)(pkt - list->pkts) < list->cnt)
+ *iter = pkt + 1;
+ else
+ pkt = NULL;
- return pkt;
+ return pkt;
}
diff --git a/vpx/src/vpx_image.c b/vpx/src/vpx_image.c
index 336b6e2..36eda95 100644
--- a/vpx/src/vpx_image.c
+++ b/vpx/src/vpx_image.c
@@ -18,30 +18,26 @@
#define align_addr(addr,align) (void*)(((size_t)(addr) + ((align) - 1)) & (size_t)-(align))
/* Memalign code is copied from vpx_mem.c */
-static void *img_buf_memalign(size_t align, size_t size)
-{
- void *addr,
- * x = NULL;
+static void *img_buf_memalign(size_t align, size_t size) {
+ void *addr,
+ * x = NULL;
- addr = malloc(size + align - 1 + ADDRESS_STORAGE_SIZE);
+ addr = malloc(size + align - 1 + ADDRESS_STORAGE_SIZE);
- if (addr)
- {
- x = align_addr((unsigned char *)addr + ADDRESS_STORAGE_SIZE, (int)align);
- /* save the actual malloc address */
- ((size_t *)x)[-1] = (size_t)addr;
- }
+ if (addr) {
+ x = align_addr((unsigned char *)addr + ADDRESS_STORAGE_SIZE, (int)align);
+ /* save the actual malloc address */
+ ((size_t *)x)[-1] = (size_t)addr;
+ }
- return x;
+ return x;
}
-static void img_buf_free(void *memblk)
-{
- if (memblk)
- {
- void *addr = (void *)(((size_t *)memblk)[-1]);
- free(addr);
- }
+static void img_buf_free(void *memblk) {
+ if (memblk) {
+ void *addr = (void *)(((size_t *)memblk)[-1]);
+ free(addr);
+ }
}
static vpx_image_t *img_alloc_helper(vpx_image_t *img,
@@ -50,41 +46,39 @@
unsigned int d_h,
unsigned int buf_align,
unsigned int stride_align,
- unsigned char *img_data)
-{
+ unsigned char *img_data) {
- unsigned int h, w, s, xcs, ycs, bps;
- int align;
+ unsigned int h, w, s, xcs, ycs, bps;
+ int align;
- /* Treat align==0 like align==1 */
- if (!buf_align)
- buf_align = 1;
+ /* Treat align==0 like align==1 */
+ if (!buf_align)
+ buf_align = 1;
- /* Validate alignment (must be power of 2) */
- if (buf_align & (buf_align - 1))
- goto fail;
+ /* Validate alignment (must be power of 2) */
+ if (buf_align & (buf_align - 1))
+ goto fail;
- /* Treat align==0 like align==1 */
- if (!stride_align)
- stride_align = 1;
+ /* Treat align==0 like align==1 */
+ if (!stride_align)
+ stride_align = 1;
- /* Validate alignment (must be power of 2) */
- if (stride_align & (stride_align - 1))
- goto fail;
+ /* Validate alignment (must be power of 2) */
+ if (stride_align & (stride_align - 1))
+ goto fail;
- /* Get sample size for this format */
- switch (fmt)
- {
+ /* Get sample size for this format */
+ switch (fmt) {
case VPX_IMG_FMT_RGB32:
case VPX_IMG_FMT_RGB32_LE:
case VPX_IMG_FMT_ARGB:
case VPX_IMG_FMT_ARGB_LE:
- bps = 32;
- break;
+ bps = 32;
+ break;
case VPX_IMG_FMT_RGB24:
case VPX_IMG_FMT_BGR24:
- bps = 24;
- break;
+ bps = 24;
+ break;
case VPX_IMG_FMT_RGB565:
case VPX_IMG_FMT_RGB565_LE:
case VPX_IMG_FMT_RGB555:
@@ -92,108 +86,101 @@
case VPX_IMG_FMT_UYVY:
case VPX_IMG_FMT_YUY2:
case VPX_IMG_FMT_YVYU:
- bps = 16;
- break;
+ bps = 16;
+ break;
case VPX_IMG_FMT_I420:
case VPX_IMG_FMT_YV12:
case VPX_IMG_FMT_VPXI420:
case VPX_IMG_FMT_VPXYV12:
- bps = 12;
- break;
+ bps = 12;
+ break;
default:
- bps = 16;
- break;
- }
+ bps = 16;
+ break;
+ }
- /* Get chroma shift values for this format */
- switch (fmt)
- {
+ /* Get chroma shift values for this format */
+ switch (fmt) {
case VPX_IMG_FMT_I420:
case VPX_IMG_FMT_YV12:
case VPX_IMG_FMT_VPXI420:
case VPX_IMG_FMT_VPXYV12:
- xcs = 1;
- break;
+ xcs = 1;
+ break;
default:
- xcs = 0;
- break;
- }
+ xcs = 0;
+ break;
+ }
- switch (fmt)
- {
+ switch (fmt) {
case VPX_IMG_FMT_I420:
case VPX_IMG_FMT_YV12:
case VPX_IMG_FMT_VPXI420:
case VPX_IMG_FMT_VPXYV12:
- ycs = 1;
- break;
+ ycs = 1;
+ break;
default:
- ycs = 0;
- break;
- }
+ ycs = 0;
+ break;
+ }
- /* Calculate storage sizes given the chroma subsampling */
- align = (1 << xcs) - 1;
- w = (d_w + align) & ~align;
- align = (1 << ycs) - 1;
- h = (d_h + align) & ~align;
- s = (fmt & VPX_IMG_FMT_PLANAR) ? w : bps * w / 8;
- s = (s + stride_align - 1) & ~(stride_align - 1);
+ /* Calculate storage sizes given the chroma subsampling */
+ align = (1 << xcs) - 1;
+ w = (d_w + align) & ~align;
+ align = (1 << ycs) - 1;
+ h = (d_h + align) & ~align;
+ s = (fmt & VPX_IMG_FMT_PLANAR) ? w : bps * w / 8;
+ s = (s + stride_align - 1) & ~(stride_align - 1);
- /* Allocate the new image */
+ /* Allocate the new image */
+ if (!img) {
+ img = (vpx_image_t *)calloc(1, sizeof(vpx_image_t));
+
if (!img)
- {
- img = (vpx_image_t *)calloc(1, sizeof(vpx_image_t));
+ goto fail;
- if (!img)
- goto fail;
+ img->self_allocd = 1;
+ } else {
+ memset(img, 0, sizeof(vpx_image_t));
+ }
- img->self_allocd = 1;
- }
- else
- {
- memset(img, 0, sizeof(vpx_image_t));
- }
+ img->img_data = img_data;
- img->img_data = img_data;
+ if (!img_data) {
+ img->img_data = img_buf_memalign(buf_align, ((fmt & VPX_IMG_FMT_PLANAR) ?
+ h * s * bps / 8 : h * s));
+ img->img_data_owner = 1;
+ }
- if (!img_data)
- {
- img->img_data = img_buf_memalign(buf_align, ((fmt & VPX_IMG_FMT_PLANAR)?
- h * s * bps / 8 : h * s));
- img->img_data_owner = 1;
- }
+ if (!img->img_data)
+ goto fail;
- if (!img->img_data)
- goto fail;
+ img->fmt = fmt;
+ img->w = w;
+ img->h = h;
+ img->x_chroma_shift = xcs;
+ img->y_chroma_shift = ycs;
+ img->bps = bps;
- img->fmt = fmt;
- img->w = w;
- img->h = h;
- img->x_chroma_shift = xcs;
- img->y_chroma_shift = ycs;
- img->bps = bps;
+ /* Calculate strides */
+ img->stride[VPX_PLANE_Y] = img->stride[VPX_PLANE_ALPHA] = s;
+ img->stride[VPX_PLANE_U] = img->stride[VPX_PLANE_V] = s >> xcs;
- /* Calculate strides */
- img->stride[VPX_PLANE_Y] = img->stride[VPX_PLANE_ALPHA] = s;
- img->stride[VPX_PLANE_U] = img->stride[VPX_PLANE_V] = s >> xcs;
-
- /* Default viewport to entire image */
- if (!vpx_img_set_rect(img, 0, 0, d_w, d_h))
- return img;
+ /* Default viewport to entire image */
+ if (!vpx_img_set_rect(img, 0, 0, d_w, d_h))
+ return img;
fail:
- vpx_img_free(img);
- return NULL;
+ vpx_img_free(img);
+ return NULL;
}
vpx_image_t *vpx_img_alloc(vpx_image_t *img,
vpx_img_fmt_t fmt,
unsigned int d_w,
unsigned int d_h,
- unsigned int align)
-{
- return img_alloc_helper(img, fmt, d_w, d_h, align, align, NULL);
+ unsigned int align) {
+ return img_alloc_helper(img, fmt, d_w, d_h, align, align, NULL);
}
vpx_image_t *vpx_img_wrap(vpx_image_t *img,
@@ -201,105 +188,92 @@
unsigned int d_w,
unsigned int d_h,
unsigned int stride_align,
- unsigned char *img_data)
-{
- /* By setting buf_align = 1, we don't change buffer alignment in this
- * function. */
- return img_alloc_helper(img, fmt, d_w, d_h, 1, stride_align, img_data);
+ unsigned char *img_data) {
+ /* By setting buf_align = 1, we don't change buffer alignment in this
+ * function. */
+ return img_alloc_helper(img, fmt, d_w, d_h, 1, stride_align, img_data);
}
int vpx_img_set_rect(vpx_image_t *img,
unsigned int x,
unsigned int y,
unsigned int w,
- unsigned int h)
-{
- unsigned char *data;
+ unsigned int h) {
+ unsigned char *data;
- if (x + w <= img->w && y + h <= img->h)
- {
- img->d_w = w;
- img->d_h = h;
+ if (x + w <= img->w && y + h <= img->h) {
+ img->d_w = w;
+ img->d_h = h;
- /* Calculate plane pointers */
- if (!(img->fmt & VPX_IMG_FMT_PLANAR))
- {
- img->planes[VPX_PLANE_PACKED] =
- img->img_data + x * img->bps / 8 + y * img->stride[VPX_PLANE_PACKED];
- }
- else
- {
- data = img->img_data;
+ /* Calculate plane pointers */
+ if (!(img->fmt & VPX_IMG_FMT_PLANAR)) {
+ img->planes[VPX_PLANE_PACKED] =
+ img->img_data + x * img->bps / 8 + y * img->stride[VPX_PLANE_PACKED];
+ } else {
+ data = img->img_data;
- if (img->fmt & VPX_IMG_FMT_HAS_ALPHA)
- {
- img->planes[VPX_PLANE_ALPHA] =
- data + x + y * img->stride[VPX_PLANE_ALPHA];
- data += img->h * img->stride[VPX_PLANE_ALPHA];
- }
+ if (img->fmt & VPX_IMG_FMT_HAS_ALPHA) {
+ img->planes[VPX_PLANE_ALPHA] =
+ data + x + y * img->stride[VPX_PLANE_ALPHA];
+ data += img->h * img->stride[VPX_PLANE_ALPHA];
+ }
- img->planes[VPX_PLANE_Y] = data + x + y * img->stride[VPX_PLANE_Y];
- data += img->h * img->stride[VPX_PLANE_Y];
+ img->planes[VPX_PLANE_Y] = data + x + y * img->stride[VPX_PLANE_Y];
+ data += img->h * img->stride[VPX_PLANE_Y];
- if (!(img->fmt & VPX_IMG_FMT_UV_FLIP))
- {
- img->planes[VPX_PLANE_U] = data
- + (x >> img->x_chroma_shift)
- + (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_U];
- data += (img->h >> img->y_chroma_shift) * img->stride[VPX_PLANE_U];
- img->planes[VPX_PLANE_V] = data
- + (x >> img->x_chroma_shift)
- + (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_V];
- }
- else
- {
- img->planes[VPX_PLANE_V] = data
- + (x >> img->x_chroma_shift)
- + (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_V];
- data += (img->h >> img->y_chroma_shift) * img->stride[VPX_PLANE_V];
- img->planes[VPX_PLANE_U] = data
- + (x >> img->x_chroma_shift)
- + (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_U];
- }
- }
-
- return 0;
+ if (!(img->fmt & VPX_IMG_FMT_UV_FLIP)) {
+ img->planes[VPX_PLANE_U] = data
+ + (x >> img->x_chroma_shift)
+ + (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_U];
+ data += (img->h >> img->y_chroma_shift) * img->stride[VPX_PLANE_U];
+ img->planes[VPX_PLANE_V] = data
+ + (x >> img->x_chroma_shift)
+ + (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_V];
+ } else {
+ img->planes[VPX_PLANE_V] = data
+ + (x >> img->x_chroma_shift)
+ + (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_V];
+ data += (img->h >> img->y_chroma_shift) * img->stride[VPX_PLANE_V];
+ img->planes[VPX_PLANE_U] = data
+ + (x >> img->x_chroma_shift)
+ + (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_U];
+ }
}
- return -1;
+ return 0;
+ }
+
+ return -1;
}
-void vpx_img_flip(vpx_image_t *img)
-{
- /* Note: In the calculation pointer adjustment calculation, we want the
- * rhs to be promoted to a signed type. Section 6.3.1.8 of the ISO C99
- * standard indicates that if the adjustment parameter is unsigned, the
- * stride parameter will be promoted to unsigned, causing errors when
- * the lhs is a larger type than the rhs.
- */
- img->planes[VPX_PLANE_Y] += (signed)(img->d_h - 1) * img->stride[VPX_PLANE_Y];
- img->stride[VPX_PLANE_Y] = -img->stride[VPX_PLANE_Y];
+void vpx_img_flip(vpx_image_t *img) {
+ /* Note: In the calculation pointer adjustment calculation, we want the
+ * rhs to be promoted to a signed type. Section 6.3.1.8 of the ISO C99
+ * standard indicates that if the adjustment parameter is unsigned, the
+ * stride parameter will be promoted to unsigned, causing errors when
+ * the lhs is a larger type than the rhs.
+ */
+ img->planes[VPX_PLANE_Y] += (signed)(img->d_h - 1) * img->stride[VPX_PLANE_Y];
+ img->stride[VPX_PLANE_Y] = -img->stride[VPX_PLANE_Y];
- img->planes[VPX_PLANE_U] += (signed)((img->d_h >> img->y_chroma_shift) - 1)
- * img->stride[VPX_PLANE_U];
- img->stride[VPX_PLANE_U] = -img->stride[VPX_PLANE_U];
+ img->planes[VPX_PLANE_U] += (signed)((img->d_h >> img->y_chroma_shift) - 1)
+ * img->stride[VPX_PLANE_U];
+ img->stride[VPX_PLANE_U] = -img->stride[VPX_PLANE_U];
- img->planes[VPX_PLANE_V] += (signed)((img->d_h >> img->y_chroma_shift) - 1)
- * img->stride[VPX_PLANE_V];
- img->stride[VPX_PLANE_V] = -img->stride[VPX_PLANE_V];
+ img->planes[VPX_PLANE_V] += (signed)((img->d_h >> img->y_chroma_shift) - 1)
+ * img->stride[VPX_PLANE_V];
+ img->stride[VPX_PLANE_V] = -img->stride[VPX_PLANE_V];
- img->planes[VPX_PLANE_ALPHA] += (signed)(img->d_h - 1) * img->stride[VPX_PLANE_ALPHA];
- img->stride[VPX_PLANE_ALPHA] = -img->stride[VPX_PLANE_ALPHA];
+ img->planes[VPX_PLANE_ALPHA] += (signed)(img->d_h - 1) * img->stride[VPX_PLANE_ALPHA];
+ img->stride[VPX_PLANE_ALPHA] = -img->stride[VPX_PLANE_ALPHA];
}
-void vpx_img_free(vpx_image_t *img)
-{
- if (img)
- {
- if (img->img_data && img->img_data_owner)
- img_buf_free(img->img_data);
+void vpx_img_free(vpx_image_t *img) {
+ if (img) {
+ if (img->img_data && img->img_data_owner)
+ img_buf_free(img->img_data);
- if (img->self_allocd)
- free(img);
- }
+ if (img->self_allocd)
+ free(img);
+ }
}
diff --git a/vpx/vp8.h b/vpx/vp8.h
index 2952203..3c31363 100644
--- a/vpx/vp8.h
+++ b/vpx/vp8.h
@@ -36,34 +36,32 @@
*
* The set of macros define the control functions of VP8 interface
*/
-enum vp8_com_control_id
-{
- VP8_SET_REFERENCE = 1, /**< pass in an external frame into decoder to be used as reference frame */
- VP8_COPY_REFERENCE = 2, /**< get a copy of reference frame from the decoder */
- VP8_SET_POSTPROC = 3, /**< set the decoder's post processing settings */
- VP8_SET_DBG_COLOR_REF_FRAME = 4, /**< set the reference frames to color for each macroblock */
- VP8_SET_DBG_COLOR_MB_MODES = 5, /**< set which macro block modes to color */
- VP8_SET_DBG_COLOR_B_MODES = 6, /**< set which blocks modes to color */
- VP8_SET_DBG_DISPLAY_MV = 7, /**< set which motion vector modes to draw */
- VP8_COMMON_CTRL_ID_MAX,
- VP8_DECODER_CTRL_ID_START = 256
+enum vp8_com_control_id {
+ VP8_SET_REFERENCE = 1, /**< pass in an external frame into decoder to be used as reference frame */
+ VP8_COPY_REFERENCE = 2, /**< get a copy of reference frame from the decoder */
+ VP8_SET_POSTPROC = 3, /**< set the decoder's post processing settings */
+ VP8_SET_DBG_COLOR_REF_FRAME = 4, /**< set the reference frames to color for each macroblock */
+ VP8_SET_DBG_COLOR_MB_MODES = 5, /**< set which macro block modes to color */
+ VP8_SET_DBG_COLOR_B_MODES = 6, /**< set which blocks modes to color */
+ VP8_SET_DBG_DISPLAY_MV = 7, /**< set which motion vector modes to draw */
+ VP8_COMMON_CTRL_ID_MAX,
+ VP8_DECODER_CTRL_ID_START = 256
};
/*!\brief post process flags
*
* The set of macros define VP8 decoder post processing flags
*/
-enum vp8_postproc_level
-{
- VP8_NOFILTERING = 0,
- VP8_DEBLOCK = 1<<0,
- VP8_DEMACROBLOCK = 1<<1,
- VP8_ADDNOISE = 1<<2,
- VP8_DEBUG_TXT_FRAME_INFO = 1<<3, /**< print frame information */
- VP8_DEBUG_TXT_MBLK_MODES = 1<<4, /**< print macro block modes over each macro block */
- VP8_DEBUG_TXT_DC_DIFF = 1<<5, /**< print dc diff for each macro block */
- VP8_DEBUG_TXT_RATE_INFO = 1<<6, /**< print video rate info (encoder only) */
- VP8_MFQE = 1<<10
+enum vp8_postproc_level {
+ VP8_NOFILTERING = 0,
+ VP8_DEBLOCK = 1 << 0,
+ VP8_DEMACROBLOCK = 1 << 1,
+ VP8_ADDNOISE = 1 << 2,
+ VP8_DEBUG_TXT_FRAME_INFO = 1 << 3, /**< print frame information */
+ VP8_DEBUG_TXT_MBLK_MODES = 1 << 4, /**< print macro block modes over each macro block */
+ VP8_DEBUG_TXT_DC_DIFF = 1 << 5, /**< print dc diff for each macro block */
+ VP8_DEBUG_TXT_RATE_INFO = 1 << 6, /**< print video rate info (encoder only) */
+ VP8_MFQE = 1 << 10
};
/*!\brief post process flags
@@ -73,22 +71,20 @@
* to VP8_DEBLOCK and deblocking_level to 1.
*/
-typedef struct vp8_postproc_cfg
-{
- int post_proc_flag; /**< the types of post processing to be done, should be combination of "vp8_postproc_level" */
- int deblocking_level; /**< the strength of deblocking, valid range [0, 16] */
- int noise_level; /**< the strength of additive noise, valid range [0, 16] */
+typedef struct vp8_postproc_cfg {
+ int post_proc_flag; /**< the types of post processing to be done, should be combination of "vp8_postproc_level" */
+ int deblocking_level; /**< the strength of deblocking, valid range [0, 16] */
+ int noise_level; /**< the strength of additive noise, valid range [0, 16] */
} vp8_postproc_cfg_t;
/*!\brief reference frame type
*
* The set of macros define the type of VP8 reference frames
*/
-typedef enum vpx_ref_frame_type
-{
- VP8_LAST_FRAME = 1,
- VP8_GOLD_FRAME = 2,
- VP8_ALTR_FRAME = 4
+typedef enum vpx_ref_frame_type {
+ VP8_LAST_FRAME = 1,
+ VP8_GOLD_FRAME = 2,
+ VP8_ALTR_FRAME = 4
} vpx_ref_frame_type_t;
/*!\brief reference frame data struct
@@ -96,10 +92,9 @@
* define the data struct to access vp8 reference frames
*/
-typedef struct vpx_ref_frame
-{
- vpx_ref_frame_type_t frame_type; /**< which reference frame */
- vpx_image_t img; /**< reference frame data in image format */
+typedef struct vpx_ref_frame {
+ vpx_ref_frame_type_t frame_type; /**< which reference frame */
+ vpx_image_t img; /**< reference frame data in image format */
} vpx_ref_frame_t;
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index a3c95d2..90b7169 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -31,7 +31,14 @@
* @{
*/
extern vpx_codec_iface_t vpx_codec_vp8_cx_algo;
-extern vpx_codec_iface_t* vpx_codec_vp8_cx(void);
+extern vpx_codec_iface_t *vpx_codec_vp8_cx(void);
+
+/* TODO(jkoleszar): These move to VP9 in a later patch set. */
+extern vpx_codec_iface_t vpx_codec_vp9_cx_algo;
+extern vpx_codec_iface_t *vpx_codec_vp9_cx(void);
+extern vpx_codec_iface_t vpx_codec_vp9x_cx_algo;
+extern vpx_codec_iface_t *vpx_codec_vp9x_cx(void);
+
/*!@} - end algorithm interface member group*/
@@ -121,75 +128,77 @@
*
* \sa #vpx_codec_control
*/
-enum vp8e_enc_control_id
-{
- VP8E_UPD_ENTROPY = 5, /**< control function to set mode of entropy update in encoder */
- VP8E_UPD_REFERENCE, /**< control function to set reference update mode in encoder */
- VP8E_USE_REFERENCE, /**< control function to set which reference frame encoder can use */
- VP8E_SET_ROI_MAP, /**< control function to pass an ROI map to encoder */
- VP8E_SET_ACTIVEMAP, /**< control function to pass an Active map to encoder */
- VP8E_SET_SCALEMODE = 11, /**< control function to set encoder scaling mode */
- /*!\brief control function to set vp8 encoder cpuused
- *
- * Changes in this value influences, among others, the encoder's selection
- * of motion estimation methods. Values greater than 0 will increase encoder
- * speed at the expense of quality.
- * The full set of adjustments can be found in
- * onyx_if.c:vp8_set_speed_features().
- * \todo List highlights of the changes at various levels.
- *
- * \note Valid range: -16..16
- */
- VP8E_SET_CPUUSED = 13,
- VP8E_SET_ENABLEAUTOALTREF, /**< control function to enable vp8 to automatic set and use altref frame */
- VP8E_SET_NOISE_SENSITIVITY, /**< control function to set noise sensitivity */
- VP8E_SET_SHARPNESS, /**< control function to set sharpness */
- VP8E_SET_STATIC_THRESHOLD, /**< control function to set the threshold for macroblocks treated static */
- VP8E_SET_TOKEN_PARTITIONS, /**< control function to set the number of token partitions */
- VP8E_GET_LAST_QUANTIZER, /**< return the quantizer chosen by the
+enum vp8e_enc_control_id {
+ VP8E_UPD_ENTROPY = 5, /**< control function to set mode of entropy update in encoder */
+ VP8E_UPD_REFERENCE, /**< control function to set reference update mode in encoder */
+ VP8E_USE_REFERENCE, /**< control function to set which reference frame encoder can use */
+ VP8E_SET_ROI_MAP, /**< control function to pass an ROI map to encoder */
+ VP8E_SET_ACTIVEMAP, /**< control function to pass an Active map to encoder */
+ VP8E_SET_SCALEMODE = 11, /**< control function to set encoder scaling mode */
+ /*!\brief control function to set vp8 encoder cpuused
+ *
+ * Changes in this value influences, among others, the encoder's selection
+ * of motion estimation methods. Values greater than 0 will increase encoder
+ * speed at the expense of quality.
+ * The full set of adjustments can be found in
+ * onyx_if.c:vp8_set_speed_features().
+ * \todo List highlights of the changes at various levels.
+ *
+ * \note Valid range: -16..16
+ */
+ VP8E_SET_CPUUSED = 13,
+ VP8E_SET_ENABLEAUTOALTREF, /**< control function to enable vp8 to automatic set and use altref frame */
+ VP8E_SET_NOISE_SENSITIVITY, /**< control function to set noise sensitivity */
+ VP8E_SET_SHARPNESS, /**< control function to set sharpness */
+ VP8E_SET_STATIC_THRESHOLD, /**< control function to set the threshold for macroblocks treated static */
+ VP8E_SET_TOKEN_PARTITIONS, /**< control function to set the number of token partitions */
+ VP8E_GET_LAST_QUANTIZER, /**< return the quantizer chosen by the
encoder for the last frame using the internal
scale */
- VP8E_GET_LAST_QUANTIZER_64, /**< return the quantizer chosen by the
+ VP8E_GET_LAST_QUANTIZER_64, /**< return the quantizer chosen by the
encoder for the last frame, using the 0..63
scale as used by the rc_*_quantizer config
parameters */
- VP8E_SET_ARNR_MAXFRAMES, /**< control function to set the max number of frames blurred creating arf*/
- VP8E_SET_ARNR_STRENGTH , /**< control function to set the filter strength for the arf */
- VP8E_SET_ARNR_TYPE , /**< control function to set the type of filter to use for the arf*/
- VP8E_SET_TUNING, /**< control function to set visual tuning */
- /*!\brief control function to set constrained quality level
- *
- * \attention For this value to be used vpx_codec_enc_cfg_t::g_usage must be
- * set to #VPX_CQ.
- * \note Valid range: 0..63
- */
- VP8E_SET_CQ_LEVEL,
+ VP8E_SET_ARNR_MAXFRAMES, /**< control function to set the max number of frames blurred creating arf*/
+ VP8E_SET_ARNR_STRENGTH, /**< control function to set the filter strength for the arf */
+ VP8E_SET_ARNR_TYPE, /**< control function to set the type of filter to use for the arf*/
+ VP8E_SET_TUNING, /**< control function to set visual tuning */
+ /*!\brief control function to set constrained quality level
+ *
+ * \attention For this value to be used vpx_codec_enc_cfg_t::g_usage must be
+ * set to #VPX_CQ.
+ * \note Valid range: 0..63
+ */
+ VP8E_SET_CQ_LEVEL,
- /*!\brief Max data rate for Intra frames
- *
- * This value controls additional clamping on the maximum size of a
- * keyframe. It is expressed as a percentage of the average
- * per-frame bitrate, with the special (and default) value 0 meaning
- * unlimited, or no additional clamping beyond the codec's built-in
- * algorithm.
- *
- * For example, to allocate no more than 4.5 frames worth of bitrate
- * to a keyframe, set this to 450.
- *
- */
- VP8E_SET_MAX_INTRA_BITRATE_PCT
+ /*!\brief Max data rate for Intra frames
+ *
+ * This value controls additional clamping on the maximum size of a
+ * keyframe. It is expressed as a percentage of the average
+ * per-frame bitrate, with the special (and default) value 0 meaning
+ * unlimited, or no additional clamping beyond the codec's built-in
+ * algorithm.
+ *
+ * For example, to allocate no more than 4.5 frames worth of bitrate
+ * to a keyframe, set this to 450.
+ *
+ */
+ VP8E_SET_MAX_INTRA_BITRATE_PCT,
+
+
+ /* TODO(jkoleszar): Move to vp9cx.h */
+ VP9E_SET_LOSSLESS
};
/*!\brief vpx 1-D scaling mode
*
* This set of constants define 1-D vpx scaling modes
*/
-typedef enum vpx_scaling_mode_1d
-{
- VP8E_NORMAL = 0,
- VP8E_FOURFIVE = 1,
- VP8E_THREEFIVE = 2,
- VP8E_ONETWO = 3
+typedef enum vpx_scaling_mode_1d {
+ VP8E_NORMAL = 0,
+ VP8E_FOURFIVE = 1,
+ VP8E_THREEFIVE = 2,
+ VP8E_ONETWO = 3
} VPX_SCALING_MODE;
@@ -199,14 +208,13 @@
*
*/
-typedef struct vpx_roi_map
-{
- unsigned char *roi_map; /**< specify an id between 0 and 3 for each 16x16 region within a frame */
- unsigned int rows; /**< number of rows */
- unsigned int cols; /**< number of cols */
- int delta_q[4]; /**< quantizer delta [-63, 63] off baseline for regions with id between 0 and 3*/
- int delta_lf[4]; /**< loop filter strength delta [-63, 63] for regions with id between 0 and 3 */
- unsigned int static_threshold[4];/**< threshold for region to be treated as static */
+typedef struct vpx_roi_map {
+ unsigned char *roi_map; /**< specify an id between 0 and 3 for each 16x16 region within a frame */
+ unsigned int rows; /**< number of rows */
+ unsigned int cols; /**< number of cols */
+ int delta_q[4]; /**< quantizer delta [-63, 63] off baseline for regions with id between 0 and 3*/
+ int delta_lf[4]; /**< loop filter strength delta [-63, 63] for regions with id between 0 and 3 */
+ unsigned int static_threshold[4];/**< threshold for region to be treated as static */
} vpx_roi_map_t;
/*!\brief vpx active region map
@@ -216,11 +224,10 @@
*/
-typedef struct vpx_active_map
-{
- unsigned char *active_map; /**< specify an on (1) or off (0) each 16x16 region within a frame */
- unsigned int rows; /**< number of rows */
- unsigned int cols; /**< number of cols */
+typedef struct vpx_active_map {
+ unsigned char *active_map; /**< specify an on (1) or off (0) each 16x16 region within a frame */
+ unsigned int rows; /**< number of rows */
+ unsigned int cols; /**< number of cols */
} vpx_active_map_t;
/*!\brief vpx image scaling mode
@@ -228,10 +235,9 @@
* This defines the data structure for image scaling mode
*
*/
-typedef struct vpx_scaling_mode
-{
- VPX_SCALING_MODE h_scaling_mode; /**< horizontal scaling mode */
- VPX_SCALING_MODE v_scaling_mode; /**< vertical scaling mode */
+typedef struct vpx_scaling_mode {
+ VPX_SCALING_MODE h_scaling_mode; /**< horizontal scaling mode */
+ VPX_SCALING_MODE v_scaling_mode; /**< vertical scaling mode */
} vpx_scaling_mode_t;
/*!\brief VP8 token partition mode
@@ -241,12 +247,11 @@
*
*/
-typedef enum
-{
- VP8_ONE_TOKENPARTITION = 0,
- VP8_TWO_TOKENPARTITION = 1,
- VP8_FOUR_TOKENPARTITION = 2,
- VP8_EIGHT_TOKENPARTITION = 3
+typedef enum {
+ VP8_ONE_TOKENPARTITION = 0,
+ VP8_TWO_TOKENPARTITION = 1,
+ VP8_FOUR_TOKENPARTITION = 2,
+ VP8_EIGHT_TOKENPARTITION = 3
} vp8e_token_partitions;
@@ -255,10 +260,9 @@
* Changes the encoder to tune for certain types of input material.
*
*/
-typedef enum
-{
- VP8_TUNE_PSNR,
- VP8_TUNE_SSIM
+typedef enum {
+ VP8_TUNE_PSNR,
+ VP8_TUNE_SSIM
} vp8e_tuning;
@@ -289,16 +293,17 @@
VPX_CTRL_USE_TYPE(VP8E_SET_TOKEN_PARTITIONS, int) /* vp8e_token_partitions */
VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_MAXFRAMES, unsigned int)
-VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_STRENGTH , unsigned int)
-VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_TYPE , unsigned int)
+VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_STRENGTH, unsigned int)
+VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_TYPE, unsigned int)
VPX_CTRL_USE_TYPE(VP8E_SET_TUNING, int) /* vp8e_tuning */
-VPX_CTRL_USE_TYPE(VP8E_SET_CQ_LEVEL , unsigned int)
+VPX_CTRL_USE_TYPE(VP8E_SET_CQ_LEVEL, unsigned int)
VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER, int *)
VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER_64, int *)
VPX_CTRL_USE_TYPE(VP8E_SET_MAX_INTRA_BITRATE_PCT, unsigned int)
+VPX_CTRL_USE_TYPE(VP9E_SET_LOSSLESS, unsigned int)
/*! @} - end defgroup vp8_encoder */
#include "vpx_codec_impl_bottom.h"
diff --git a/vpx/vp8dx.h b/vpx/vp8dx.h
index 8661035..e2ec8b2 100644
--- a/vpx/vp8dx.h
+++ b/vpx/vp8dx.h
@@ -31,7 +31,11 @@
* @{
*/
extern vpx_codec_iface_t vpx_codec_vp8_dx_algo;
-extern vpx_codec_iface_t* vpx_codec_vp8_dx(void);
+extern vpx_codec_iface_t *vpx_codec_vp8_dx(void);
+
+/* TODO(jkoleszar): These move to VP9 in a later patch set. */
+extern vpx_codec_iface_t vpx_codec_vp9_dx_algo;
+extern vpx_codec_iface_t *vpx_codec_vp9_dx(void);
/*!@} - end algorithm interface member group*/
/* Include controls common to both the encoder and decoder */
@@ -45,23 +49,22 @@
*
* \sa #vpx_codec_control
*/
-enum vp8_dec_control_id
-{
- /** control function to get info on which reference frames were updated
- * by the last decode
- */
- VP8D_GET_LAST_REF_UPDATES = VP8_DECODER_CTRL_ID_START,
+enum vp8_dec_control_id {
+ /** control function to get info on which reference frames were updated
+ * by the last decode
+ */
+ VP8D_GET_LAST_REF_UPDATES = VP8_DECODER_CTRL_ID_START,
- /** check if the indicated frame is corrupted */
- VP8D_GET_FRAME_CORRUPTED,
+ /** check if the indicated frame is corrupted */
+ VP8D_GET_FRAME_CORRUPTED,
- /** control function to get info on which reference frames were used
- * by the last decode
- */
- VP8D_GET_LAST_REF_USED,
+ /** control function to get info on which reference frames were used
+ * by the last decode
+ */
+ VP8D_GET_LAST_REF_USED,
- VP8_DECODER_CTRL_ID_MAX
-} ;
+ VP8_DECODER_CTRL_ID_MAX
+};
/*!\brief VP8 decoder control function parameter type
diff --git a/vpx/vpx_codec.h b/vpx/vpx_codec.h
index 243b7a5..2e6f1e7 100644
--- a/vpx/vpx_codec.h
+++ b/vpx/vpx_codec.h
@@ -45,7 +45,7 @@
#include "vpx_integer.h"
#include "vpx_image.h"
- /*!\brief Decorator indicating a function is deprecated */
+ /*!\brief Decorator indicating a function is deprecated */
#ifndef DEPRECATED
#if defined(__GNUC__) && __GNUC__
#define DEPRECATED __attribute__ ((deprecated))
@@ -66,7 +66,7 @@
#endif
#endif /* DECLSPEC_DEPRECATED */
- /*!\brief Decorator indicating a function is potentially unused */
+ /*!\brief Decorator indicating a function is potentially unused */
#ifdef UNUSED
#elif __GNUC__
#define UNUSED __attribute__ ((unused))
@@ -74,312 +74,310 @@
#define UNUSED
#endif
- /*!\brief Current ABI version number
- *
- * \internal
- * If this file is altered in any way that changes the ABI, this value
- * must be bumped. Examples include, but are not limited to, changing
- * types, removing or reassigning enums, adding/removing/rearranging
- * fields to structures
- */
+ /*!\brief Current ABI version number
+ *
+ * \internal
+ * If this file is altered in any way that changes the ABI, this value
+ * must be bumped. Examples include, but are not limited to, changing
+ * types, removing or reassigning enums, adding/removing/rearranging
+ * fields to structures
+ */
#define VPX_CODEC_ABI_VERSION (2 + VPX_IMAGE_ABI_VERSION) /**<\hideinitializer*/
- /*!\brief Algorithm return codes */
- typedef enum {
- /*!\brief Operation completed without error */
- VPX_CODEC_OK,
+ /*!\brief Algorithm return codes */
+ typedef enum {
+ /*!\brief Operation completed without error */
+ VPX_CODEC_OK,
- /*!\brief Unspecified error */
- VPX_CODEC_ERROR,
+ /*!\brief Unspecified error */
+ VPX_CODEC_ERROR,
- /*!\brief Memory operation failed */
- VPX_CODEC_MEM_ERROR,
+ /*!\brief Memory operation failed */
+ VPX_CODEC_MEM_ERROR,
- /*!\brief ABI version mismatch */
- VPX_CODEC_ABI_MISMATCH,
+ /*!\brief ABI version mismatch */
+ VPX_CODEC_ABI_MISMATCH,
- /*!\brief Algorithm does not have required capability */
- VPX_CODEC_INCAPABLE,
+ /*!\brief Algorithm does not have required capability */
+ VPX_CODEC_INCAPABLE,
- /*!\brief The given bitstream is not supported.
- *
- * The bitstream was unable to be parsed at the highest level. The decoder
- * is unable to proceed. This error \ref SHOULD be treated as fatal to the
- * stream. */
- VPX_CODEC_UNSUP_BITSTREAM,
-
- /*!\brief Encoded bitstream uses an unsupported feature
- *
- * The decoder does not implement a feature required by the encoder. This
- * return code should only be used for features that prevent future
- * pictures from being properly decoded. This error \ref MAY be treated as
- * fatal to the stream or \ref MAY be treated as fatal to the current GOP.
- */
- VPX_CODEC_UNSUP_FEATURE,
-
- /*!\brief The coded data for this stream is corrupt or incomplete
- *
- * There was a problem decoding the current frame. This return code
- * should only be used for failures that prevent future pictures from
- * being properly decoded. This error \ref MAY be treated as fatal to the
- * stream or \ref MAY be treated as fatal to the current GOP. If decoding
- * is continued for the current GOP, artifacts may be present.
- */
- VPX_CODEC_CORRUPT_FRAME,
-
- /*!\brief An application-supplied parameter is not valid.
- *
- */
- VPX_CODEC_INVALID_PARAM,
-
- /*!\brief An iterator reached the end of list.
- *
- */
- VPX_CODEC_LIST_END
-
- }
- vpx_codec_err_t;
-
-
- /*! \brief Codec capabilities bitfield
+ /*!\brief The given bitstream is not supported.
*
- * Each codec advertises the capabilities it supports as part of its
- * ::vpx_codec_iface_t interface structure. Capabilities are extra interfaces
- * or functionality, and are not required to be supported.
+ * The bitstream was unable to be parsed at the highest level. The decoder
+ * is unable to proceed. This error \ref SHOULD be treated as fatal to the
+ * stream. */
+ VPX_CODEC_UNSUP_BITSTREAM,
+
+ /*!\brief Encoded bitstream uses an unsupported feature
*
- * The available flags are specified by VPX_CODEC_CAP_* defines.
+ * The decoder does not implement a feature required by the encoder. This
+ * return code should only be used for features that prevent future
+ * pictures from being properly decoded. This error \ref MAY be treated as
+ * fatal to the stream or \ref MAY be treated as fatal to the current GOP.
*/
- typedef long vpx_codec_caps_t;
+ VPX_CODEC_UNSUP_FEATURE,
+
+ /*!\brief The coded data for this stream is corrupt or incomplete
+ *
+ * There was a problem decoding the current frame. This return code
+ * should only be used for failures that prevent future pictures from
+ * being properly decoded. This error \ref MAY be treated as fatal to the
+ * stream or \ref MAY be treated as fatal to the current GOP. If decoding
+ * is continued for the current GOP, artifacts may be present.
+ */
+ VPX_CODEC_CORRUPT_FRAME,
+
+ /*!\brief An application-supplied parameter is not valid.
+ *
+ */
+ VPX_CODEC_INVALID_PARAM,
+
+ /*!\brief An iterator reached the end of list.
+ *
+ */
+ VPX_CODEC_LIST_END
+
+ }
+ vpx_codec_err_t;
+
+
+ /*! \brief Codec capabilities bitfield
+ *
+ * Each codec advertises the capabilities it supports as part of its
+ * ::vpx_codec_iface_t interface structure. Capabilities are extra interfaces
+ * or functionality, and are not required to be supported.
+ *
+ * The available flags are specified by VPX_CODEC_CAP_* defines.
+ */
+ typedef long vpx_codec_caps_t;
#define VPX_CODEC_CAP_DECODER 0x1 /**< Is a decoder */
#define VPX_CODEC_CAP_ENCODER 0x2 /**< Is an encoder */
#define VPX_CODEC_CAP_XMA 0x4 /**< Supports eXternal Memory Allocation */
- /*! \brief Initialization-time Feature Enabling
- *
- * Certain codec features must be known at initialization time, to allow for
- * proper memory allocation.
- *
- * The available flags are specified by VPX_CODEC_USE_* defines.
- */
- typedef long vpx_codec_flags_t;
+ /*! \brief Initialization-time Feature Enabling
+ *
+ * Certain codec features must be known at initialization time, to allow for
+ * proper memory allocation.
+ *
+ * The available flags are specified by VPX_CODEC_USE_* defines.
+ */
+ typedef long vpx_codec_flags_t;
#define VPX_CODEC_USE_XMA 0x00000001 /**< Use eXternal Memory Allocation mode */
- /*!\brief Codec interface structure.
- *
- * Contains function pointers and other data private to the codec
- * implementation. This structure is opaque to the application.
- */
- typedef const struct vpx_codec_iface vpx_codec_iface_t;
+ /*!\brief Codec interface structure.
+ *
+ * Contains function pointers and other data private to the codec
+ * implementation. This structure is opaque to the application.
+ */
+ typedef const struct vpx_codec_iface vpx_codec_iface_t;
- /*!\brief Codec private data structure.
- *
- * Contains data private to the codec implementation. This structure is opaque
- * to the application.
- */
- typedef struct vpx_codec_priv vpx_codec_priv_t;
+ /*!\brief Codec private data structure.
+ *
+ * Contains data private to the codec implementation. This structure is opaque
+ * to the application.
+ */
+ typedef struct vpx_codec_priv vpx_codec_priv_t;
- /*!\brief Iterator
- *
- * Opaque storage used for iterating over lists.
- */
- typedef const void *vpx_codec_iter_t;
+ /*!\brief Iterator
+ *
+ * Opaque storage used for iterating over lists.
+ */
+ typedef const void *vpx_codec_iter_t;
- /*!\brief Codec context structure
- *
- * All codecs \ref MUST support this context structure fully. In general,
- * this data should be considered private to the codec algorithm, and
- * not be manipulated or examined by the calling application. Applications
- * may reference the 'name' member to get a printable description of the
- * algorithm.
- */
- typedef struct vpx_codec_ctx
- {
- const char *name; /**< Printable interface name */
- vpx_codec_iface_t *iface; /**< Interface pointers */
- vpx_codec_err_t err; /**< Last returned error */
- const char *err_detail; /**< Detailed info, if available */
- vpx_codec_flags_t init_flags; /**< Flags passed at init time */
- union
- {
- struct vpx_codec_dec_cfg *dec; /**< Decoder Configuration Pointer */
- struct vpx_codec_enc_cfg *enc; /**< Encoder Configuration Pointer */
- void *raw;
- } config; /**< Configuration pointer aliasing union */
- vpx_codec_priv_t *priv; /**< Algorithm private storage */
- } vpx_codec_ctx_t;
+ /*!\brief Codec context structure
+ *
+ * All codecs \ref MUST support this context structure fully. In general,
+ * this data should be considered private to the codec algorithm, and
+ * not be manipulated or examined by the calling application. Applications
+ * may reference the 'name' member to get a printable description of the
+ * algorithm.
+ */
+ typedef struct vpx_codec_ctx {
+ const char *name; /**< Printable interface name */
+ vpx_codec_iface_t *iface; /**< Interface pointers */
+ vpx_codec_err_t err; /**< Last returned error */
+ const char *err_detail; /**< Detailed info, if available */
+ vpx_codec_flags_t init_flags; /**< Flags passed at init time */
+ union {
+ struct vpx_codec_dec_cfg *dec; /**< Decoder Configuration Pointer */
+ struct vpx_codec_enc_cfg *enc; /**< Encoder Configuration Pointer */
+ void *raw;
+ } config; /**< Configuration pointer aliasing union */
+ vpx_codec_priv_t *priv; /**< Algorithm private storage */
+ } vpx_codec_ctx_t;
- /*
- * Library Version Number Interface
- *
- * For example, see the following sample return values:
- * vpx_codec_version() (1<<16 | 2<<8 | 3)
- * vpx_codec_version_str() "v1.2.3-rc1-16-gec6a1ba"
- * vpx_codec_version_extra_str() "rc1-16-gec6a1ba"
- */
+ /*
+ * Library Version Number Interface
+ *
+ * For example, see the following sample return values:
+ * vpx_codec_version() (1<<16 | 2<<8 | 3)
+ * vpx_codec_version_str() "v1.2.3-rc1-16-gec6a1ba"
+ * vpx_codec_version_extra_str() "rc1-16-gec6a1ba"
+ */
- /*!\brief Return the version information (as an integer)
- *
- * Returns a packed encoding of the library version number. This will only include
- * the major.minor.patch component of the version number. Note that this encoded
- * value should be accessed through the macros provided, as the encoding may change
- * in the future.
- *
- */
- int vpx_codec_version(void);
+ /*!\brief Return the version information (as an integer)
+ *
+ * Returns a packed encoding of the library version number. This will only include
+ * the major.minor.patch component of the version number. Note that this encoded
+ * value should be accessed through the macros provided, as the encoding may change
+ * in the future.
+ *
+ */
+ int vpx_codec_version(void);
#define VPX_VERSION_MAJOR(v) ((v>>16)&0xff) /**< extract major from packed version */
#define VPX_VERSION_MINOR(v) ((v>>8)&0xff) /**< extract minor from packed version */
#define VPX_VERSION_PATCH(v) ((v>>0)&0xff) /**< extract patch from packed version */
- /*!\brief Return the version major number */
+ /*!\brief Return the version major number */
#define vpx_codec_version_major() ((vpx_codec_version()>>16)&0xff)
- /*!\brief Return the version minor number */
+ /*!\brief Return the version minor number */
#define vpx_codec_version_minor() ((vpx_codec_version()>>8)&0xff)
- /*!\brief Return the version patch number */
+ /*!\brief Return the version patch number */
#define vpx_codec_version_patch() ((vpx_codec_version()>>0)&0xff)
- /*!\brief Return the version information (as a string)
- *
- * Returns a printable string containing the full library version number. This may
- * contain additional text following the three digit version number, as to indicate
- * release candidates, prerelease versions, etc.
- *
- */
- const char *vpx_codec_version_str(void);
+ /*!\brief Return the version information (as a string)
+ *
+ * Returns a printable string containing the full library version number. This may
+ * contain additional text following the three digit version number, as to indicate
+ * release candidates, prerelease versions, etc.
+ *
+ */
+ const char *vpx_codec_version_str(void);
- /*!\brief Return the version information (as a string)
- *
- * Returns a printable "extra string". This is the component of the string returned
- * by vpx_codec_version_str() following the three digit version number.
- *
- */
- const char *vpx_codec_version_extra_str(void);
+ /*!\brief Return the version information (as a string)
+ *
+ * Returns a printable "extra string". This is the component of the string returned
+ * by vpx_codec_version_str() following the three digit version number.
+ *
+ */
+ const char *vpx_codec_version_extra_str(void);
- /*!\brief Return the build configuration
- *
- * Returns a printable string containing an encoded version of the build
- * configuration. This may be useful to vpx support.
- *
- */
- const char *vpx_codec_build_config(void);
+ /*!\brief Return the build configuration
+ *
+ * Returns a printable string containing an encoded version of the build
+ * configuration. This may be useful to vpx support.
+ *
+ */
+ const char *vpx_codec_build_config(void);
- /*!\brief Return the name for a given interface
- *
- * Returns a human readable string for name of the given codec interface.
- *
- * \param[in] iface Interface pointer
- *
- */
- const char *vpx_codec_iface_name(vpx_codec_iface_t *iface);
+ /*!\brief Return the name for a given interface
+ *
+ * Returns a human readable string for name of the given codec interface.
+ *
+ * \param[in] iface Interface pointer
+ *
+ */
+ const char *vpx_codec_iface_name(vpx_codec_iface_t *iface);
- /*!\brief Convert error number to printable string
- *
- * Returns a human readable string for the last error returned by the
- * algorithm. The returned error will be one line and will not contain
- * any newline characters.
- *
- *
- * \param[in] err Error number.
- *
- */
- const char *vpx_codec_err_to_string(vpx_codec_err_t err);
+ /*!\brief Convert error number to printable string
+ *
+ * Returns a human readable string for the last error returned by the
+ * algorithm. The returned error will be one line and will not contain
+ * any newline characters.
+ *
+ *
+ * \param[in] err Error number.
+ *
+ */
+ const char *vpx_codec_err_to_string(vpx_codec_err_t err);
- /*!\brief Retrieve error synopsis for codec context
- *
- * Returns a human readable string for the last error returned by the
- * algorithm. The returned error will be one line and will not contain
- * any newline characters.
- *
- *
- * \param[in] ctx Pointer to this instance's context.
- *
- */
- const char *vpx_codec_error(vpx_codec_ctx_t *ctx);
+ /*!\brief Retrieve error synopsis for codec context
+ *
+ * Returns a human readable string for the last error returned by the
+ * algorithm. The returned error will be one line and will not contain
+ * any newline characters.
+ *
+ *
+ * \param[in] ctx Pointer to this instance's context.
+ *
+ */
+ const char *vpx_codec_error(vpx_codec_ctx_t *ctx);
- /*!\brief Retrieve detailed error information for codec context
- *
- * Returns a human readable string providing detailed information about
- * the last error.
- *
- * \param[in] ctx Pointer to this instance's context.
- *
- * \retval NULL
- * No detailed information is available.
- */
- const char *vpx_codec_error_detail(vpx_codec_ctx_t *ctx);
+ /*!\brief Retrieve detailed error information for codec context
+ *
+ * Returns a human readable string providing detailed information about
+ * the last error.
+ *
+ * \param[in] ctx Pointer to this instance's context.
+ *
+ * \retval NULL
+ * No detailed information is available.
+ */
+ const char *vpx_codec_error_detail(vpx_codec_ctx_t *ctx);
- /* REQUIRED FUNCTIONS
- *
- * The following functions are required to be implemented for all codecs.
- * They represent the base case functionality expected of all codecs.
- */
+ /* REQUIRED FUNCTIONS
+ *
+ * The following functions are required to be implemented for all codecs.
+ * They represent the base case functionality expected of all codecs.
+ */
- /*!\brief Destroy a codec instance
- *
- * Destroys a codec context, freeing any associated memory buffers.
- *
- * \param[in] ctx Pointer to this instance's context
- *
- * \retval #VPX_CODEC_OK
- * The codec algorithm initialized.
- * \retval #VPX_CODEC_MEM_ERROR
- * Memory allocation failed.
- */
- vpx_codec_err_t vpx_codec_destroy(vpx_codec_ctx_t *ctx);
+ /*!\brief Destroy a codec instance
+ *
+ * Destroys a codec context, freeing any associated memory buffers.
+ *
+ * \param[in] ctx Pointer to this instance's context
+ *
+ * \retval #VPX_CODEC_OK
+ * The codec algorithm initialized.
+ * \retval #VPX_CODEC_MEM_ERROR
+ * Memory allocation failed.
+ */
+ vpx_codec_err_t vpx_codec_destroy(vpx_codec_ctx_t *ctx);
- /*!\brief Get the capabilities of an algorithm.
- *
- * Retrieves the capabilities bitfield from the algorithm's interface.
- *
- * \param[in] iface Pointer to the algorithm interface
- *
- */
- vpx_codec_caps_t vpx_codec_get_caps(vpx_codec_iface_t *iface);
+ /*!\brief Get the capabilities of an algorithm.
+ *
+ * Retrieves the capabilities bitfield from the algorithm's interface.
+ *
+ * \param[in] iface Pointer to the algorithm interface
+ *
+ */
+ vpx_codec_caps_t vpx_codec_get_caps(vpx_codec_iface_t *iface);
- /*!\brief Control algorithm
- *
- * This function is used to exchange algorithm specific data with the codec
- * instance. This can be used to implement features specific to a particular
- * algorithm.
- *
- * This wrapper function dispatches the request to the helper function
- * associated with the given ctrl_id. It tries to call this function
- * transparently, but will return #VPX_CODEC_ERROR if the request could not
- * be dispatched.
- *
- * Note that this function should not be used directly. Call the
- * #vpx_codec_control wrapper macro instead.
- *
- * \param[in] ctx Pointer to this instance's context
- * \param[in] ctrl_id Algorithm specific control identifier
- *
- * \retval #VPX_CODEC_OK
- * The control request was processed.
- * \retval #VPX_CODEC_ERROR
- * The control request was not processed.
- * \retval #VPX_CODEC_INVALID_PARAM
- * The data was not valid.
- */
- vpx_codec_err_t vpx_codec_control_(vpx_codec_ctx_t *ctx,
- int ctrl_id,
- ...);
+ /*!\brief Control algorithm
+ *
+ * This function is used to exchange algorithm specific data with the codec
+ * instance. This can be used to implement features specific to a particular
+ * algorithm.
+ *
+ * This wrapper function dispatches the request to the helper function
+ * associated with the given ctrl_id. It tries to call this function
+ * transparently, but will return #VPX_CODEC_ERROR if the request could not
+ * be dispatched.
+ *
+ * Note that this function should not be used directly. Call the
+ * #vpx_codec_control wrapper macro instead.
+ *
+ * \param[in] ctx Pointer to this instance's context
+ * \param[in] ctrl_id Algorithm specific control identifier
+ *
+ * \retval #VPX_CODEC_OK
+ * The control request was processed.
+ * \retval #VPX_CODEC_ERROR
+ * The control request was not processed.
+ * \retval #VPX_CODEC_INVALID_PARAM
+ * The data was not valid.
+ */
+ vpx_codec_err_t vpx_codec_control_(vpx_codec_ctx_t *ctx,
+ int ctrl_id,
+ ...);
#if defined(VPX_DISABLE_CTRL_TYPECHECKS) && VPX_DISABLE_CTRL_TYPECHECKS
# define vpx_codec_control(ctx,id,data) vpx_codec_control_(ctx,id,data)
# define VPX_CTRL_USE_TYPE(id, typ)
@@ -387,172 +385,171 @@
# define VPX_CTRL_VOID(id, typ)
#else
- /*!\brief vpx_codec_control wrapper macro
- *
- * This macro allows for type safe conversions across the variadic parameter
- * to vpx_codec_control_().
- *
- * \internal
- * It works by dispatching the call to the control function through a wrapper
- * function named with the id parameter.
- */
+ /*!\brief vpx_codec_control wrapper macro
+ *
+ * This macro allows for type safe conversions across the variadic parameter
+ * to vpx_codec_control_().
+ *
+ * \internal
+ * It works by dispatching the call to the control function through a wrapper
+ * function named with the id parameter.
+ */
# define vpx_codec_control(ctx,id,data) vpx_codec_control_##id(ctx,id,data)\
- /**<\hideinitializer*/
+ /**<\hideinitializer*/
- /*!\brief vpx_codec_control type definition macro
- *
- * This macro allows for type safe conversions across the variadic parameter
- * to vpx_codec_control_(). It defines the type of the argument for a given
- * control identifier.
- *
- * \internal
- * It defines a static function with
- * the correctly typed arguments as a wrapper to the type-unsafe internal
- * function.
- */
+ /*!\brief vpx_codec_control type definition macro
+ *
+ * This macro allows for type safe conversions across the variadic parameter
+ * to vpx_codec_control_(). It defines the type of the argument for a given
+ * control identifier.
+ *
+ * \internal
+ * It defines a static function with
+ * the correctly typed arguments as a wrapper to the type-unsafe internal
+ * function.
+ */
# define VPX_CTRL_USE_TYPE(id, typ) \
- static vpx_codec_err_t \
- vpx_codec_control_##id(vpx_codec_ctx_t*, int, typ) UNUSED;\
- \
- static vpx_codec_err_t \
- vpx_codec_control_##id(vpx_codec_ctx_t *ctx, int ctrl_id, typ data) {\
- return vpx_codec_control_(ctx, ctrl_id, data);\
- } /**<\hideinitializer*/
+ static vpx_codec_err_t \
+ vpx_codec_control_##id(vpx_codec_ctx_t*, int, typ) UNUSED;\
+ \
+ static vpx_codec_err_t \
+ vpx_codec_control_##id(vpx_codec_ctx_t *ctx, int ctrl_id, typ data) {\
+ return vpx_codec_control_(ctx, ctrl_id, data);\
+ } /**<\hideinitializer*/
- /*!\brief vpx_codec_control deprecated type definition macro
- *
- * Like #VPX_CTRL_USE_TYPE, but indicates that the specified control is
- * deprecated and should not be used. Consult the documentation for your
- * codec for more information.
- *
- * \internal
- * It defines a static function with the correctly typed arguments as a
- * wrapper to the type-unsafe internal function.
- */
+ /*!\brief vpx_codec_control deprecated type definition macro
+ *
+ * Like #VPX_CTRL_USE_TYPE, but indicates that the specified control is
+ * deprecated and should not be used. Consult the documentation for your
+ * codec for more information.
+ *
+ * \internal
+ * It defines a static function with the correctly typed arguments as a
+ * wrapper to the type-unsafe internal function.
+ */
# define VPX_CTRL_USE_TYPE_DEPRECATED(id, typ) \
- DECLSPEC_DEPRECATED static vpx_codec_err_t \
- vpx_codec_control_##id(vpx_codec_ctx_t*, int, typ) DEPRECATED UNUSED;\
- \
- DECLSPEC_DEPRECATED static vpx_codec_err_t \
- vpx_codec_control_##id(vpx_codec_ctx_t *ctx, int ctrl_id, typ data) {\
- return vpx_codec_control_(ctx, ctrl_id, data);\
- } /**<\hideinitializer*/
+ DECLSPEC_DEPRECATED static vpx_codec_err_t \
+ vpx_codec_control_##id(vpx_codec_ctx_t*, int, typ) DEPRECATED UNUSED;\
+ \
+ DECLSPEC_DEPRECATED static vpx_codec_err_t \
+ vpx_codec_control_##id(vpx_codec_ctx_t *ctx, int ctrl_id, typ data) {\
+ return vpx_codec_control_(ctx, ctrl_id, data);\
+ } /**<\hideinitializer*/
- /*!\brief vpx_codec_control void type definition macro
- *
- * This macro allows for type safe conversions across the variadic parameter
- * to vpx_codec_control_(). It indicates that a given control identifier takes
- * no argument.
- *
- * \internal
- * It defines a static function without a data argument as a wrapper to the
- * type-unsafe internal function.
- */
+ /*!\brief vpx_codec_control void type definition macro
+ *
+ * This macro allows for type safe conversions across the variadic parameter
+ * to vpx_codec_control_(). It indicates that a given control identifier takes
+ * no argument.
+ *
+ * \internal
+ * It defines a static function without a data argument as a wrapper to the
+ * type-unsafe internal function.
+ */
# define VPX_CTRL_VOID(id) \
- static vpx_codec_err_t \
- vpx_codec_control_##id(vpx_codec_ctx_t*, int) UNUSED;\
- \
- static vpx_codec_err_t \
- vpx_codec_control_##id(vpx_codec_ctx_t *ctx, int ctrl_id) {\
- return vpx_codec_control_(ctx, ctrl_id);\
- } /**<\hideinitializer*/
+ static vpx_codec_err_t \
+ vpx_codec_control_##id(vpx_codec_ctx_t*, int) UNUSED;\
+ \
+ static vpx_codec_err_t \
+ vpx_codec_control_##id(vpx_codec_ctx_t *ctx, int ctrl_id) {\
+ return vpx_codec_control_(ctx, ctrl_id);\
+ } /**<\hideinitializer*/
#endif
- /*!\defgroup cap_xma External Memory Allocation Functions
- *
- * The following functions are required to be implemented for all codecs
- * that advertise the VPX_CODEC_CAP_XMA capability. Calling these functions
- * for codecs that don't advertise this capability will result in an error
- * code being returned, usually VPX_CODEC_INCAPABLE
- * @{
- */
+ /*!\defgroup cap_xma External Memory Allocation Functions
+ *
+ * The following functions are required to be implemented for all codecs
+ * that advertise the VPX_CODEC_CAP_XMA capability. Calling these functions
+ * for codecs that don't advertise this capability will result in an error
+ * code being returned, usually VPX_CODEC_INCAPABLE
+ * @{
+ */
- /*!\brief Memory Map Entry
- *
- * This structure is used to contain the properties of a memory segment. It
- * is populated by the codec in the request phase, and by the calling
- * application once the requested allocation has been performed.
+ /*!\brief Memory Map Entry
+ *
+ * This structure is used to contain the properties of a memory segment. It
+ * is populated by the codec in the request phase, and by the calling
+ * application once the requested allocation has been performed.
+ */
+ typedef struct vpx_codec_mmap {
+ /*
+ * The following members are set by the codec when requesting a segment
*/
- typedef struct vpx_codec_mmap
- {
- /*
- * The following members are set by the codec when requesting a segment
- */
- unsigned int id; /**< identifier for the segment's contents */
- unsigned long sz; /**< size of the segment, in bytes */
- unsigned int align; /**< required alignment of the segment, in bytes */
- unsigned int flags; /**< bitfield containing segment properties */
+ unsigned int id; /**< identifier for the segment's contents */
+ unsigned long sz; /**< size of the segment, in bytes */
+ unsigned int align; /**< required alignment of the segment, in bytes */
+ unsigned int flags; /**< bitfield containing segment properties */
#define VPX_CODEC_MEM_ZERO 0x1 /**< Segment must be zeroed by allocation */
#define VPX_CODEC_MEM_WRONLY 0x2 /**< Segment need not be readable */
#define VPX_CODEC_MEM_FAST 0x4 /**< Place in fast memory, if available */
- /* The following members are to be filled in by the allocation function */
- void *base; /**< pointer to the allocated segment */
- void (*dtor)(struct vpx_codec_mmap *map); /**< destructor to call */
- void *priv; /**< allocator private storage */
- } vpx_codec_mmap_t; /**< alias for struct vpx_codec_mmap */
+ /* The following members are to be filled in by the allocation function */
+ void *base; /**< pointer to the allocated segment */
+ void (*dtor)(struct vpx_codec_mmap *map); /**< destructor to call */
+ void *priv; /**< allocator private storage */
+ } vpx_codec_mmap_t; /**< alias for struct vpx_codec_mmap */
- /*!\brief Iterate over the list of segments to allocate.
- *
- * Iterates over a list of the segments to allocate. The iterator storage
- * should be initialized to NULL to start the iteration. Iteration is complete
- * when this function returns VPX_CODEC_LIST_END. The amount of memory needed to
- * allocate is dependent upon the size of the encoded stream. In cases where the
- * stream is not available at allocation time, a fixed size must be requested.
- * The codec will not be able to operate on streams larger than the size used at
- * allocation time.
- *
- * \param[in] ctx Pointer to this instance's context.
- * \param[out] mmap Pointer to the memory map entry to populate.
- * \param[in,out] iter Iterator storage, initialized to NULL
- *
- * \retval #VPX_CODEC_OK
- * The memory map entry was populated.
- * \retval #VPX_CODEC_ERROR
- * Codec does not support XMA mode.
- * \retval #VPX_CODEC_MEM_ERROR
- * Unable to determine segment size from stream info.
- */
- vpx_codec_err_t vpx_codec_get_mem_map(vpx_codec_ctx_t *ctx,
- vpx_codec_mmap_t *mmap,
- vpx_codec_iter_t *iter);
+ /*!\brief Iterate over the list of segments to allocate.
+ *
+ * Iterates over a list of the segments to allocate. The iterator storage
+ * should be initialized to NULL to start the iteration. Iteration is complete
+ * when this function returns VPX_CODEC_LIST_END. The amount of memory needed to
+ * allocate is dependent upon the size of the encoded stream. In cases where the
+ * stream is not available at allocation time, a fixed size must be requested.
+ * The codec will not be able to operate on streams larger than the size used at
+ * allocation time.
+ *
+ * \param[in] ctx Pointer to this instance's context.
+ * \param[out] mmap Pointer to the memory map entry to populate.
+ * \param[in,out] iter Iterator storage, initialized to NULL
+ *
+ * \retval #VPX_CODEC_OK
+ * The memory map entry was populated.
+ * \retval #VPX_CODEC_ERROR
+ * Codec does not support XMA mode.
+ * \retval #VPX_CODEC_MEM_ERROR
+ * Unable to determine segment size from stream info.
+ */
+ vpx_codec_err_t vpx_codec_get_mem_map(vpx_codec_ctx_t *ctx,
+ vpx_codec_mmap_t *mmap,
+ vpx_codec_iter_t *iter);
- /*!\brief Identify allocated segments to codec instance
- *
- * Stores a list of allocated segments in the codec. Segments \ref MUST be
- * passed in the order they are read from vpx_codec_get_mem_map(), but may be
- * passed in groups of any size. Segments \ref MUST be set only once. The
- * allocation function \ref MUST ensure that the vpx_codec_mmap_t::base member
- * is non-NULL. If the segment requires cleanup handling (e.g., calling free()
- * or close()) then the vpx_codec_mmap_t::dtor member \ref MUST be populated.
- *
- * \param[in] ctx Pointer to this instance's context.
- * \param[in] mmaps Pointer to the first memory map entry in the list.
- * \param[in] num_maps Number of entries being set at this time
- *
- * \retval #VPX_CODEC_OK
- * The segment was stored in the codec context.
- * \retval #VPX_CODEC_INCAPABLE
- * Codec does not support XMA mode.
- * \retval #VPX_CODEC_MEM_ERROR
- * Segment base address was not set, or segment was already stored.
+ /*!\brief Identify allocated segments to codec instance
+ *
+ * Stores a list of allocated segments in the codec. Segments \ref MUST be
+ * passed in the order they are read from vpx_codec_get_mem_map(), but may be
+ * passed in groups of any size. Segments \ref MUST be set only once. The
+ * allocation function \ref MUST ensure that the vpx_codec_mmap_t::base member
+ * is non-NULL. If the segment requires cleanup handling (e.g., calling free()
+ * or close()) then the vpx_codec_mmap_t::dtor member \ref MUST be populated.
+ *
+ * \param[in] ctx Pointer to this instance's context.
+ * \param[in] mmaps Pointer to the first memory map entry in the list.
+ * \param[in] num_maps Number of entries being set at this time
+ *
+ * \retval #VPX_CODEC_OK
+ * The segment was stored in the codec context.
+ * \retval #VPX_CODEC_INCAPABLE
+ * Codec does not support XMA mode.
+ * \retval #VPX_CODEC_MEM_ERROR
+ * Segment base address was not set, or segment was already stored.
- */
- vpx_codec_err_t vpx_codec_set_mem_map(vpx_codec_ctx_t *ctx,
- vpx_codec_mmap_t *mmaps,
- unsigned int num_maps);
+ */
+ vpx_codec_err_t vpx_codec_set_mem_map(vpx_codec_ctx_t *ctx,
+ vpx_codec_mmap_t *mmaps,
+ unsigned int num_maps);
- /*!@} - end defgroup cap_xma*/
- /*!@} - end defgroup codec*/
+ /*!@} - end defgroup cap_xma*/
+ /*!@} - end defgroup codec*/
#endif
diff --git a/vpx/vpx_decoder.h b/vpx/vpx_decoder.h
index 1ccf1c5..dbe6aaa 100644
--- a/vpx/vpx_decoder.h
+++ b/vpx/vpx_decoder.h
@@ -32,299 +32,297 @@
#define VPX_DECODER_H
#include "vpx_codec.h"
- /*!\brief Current ABI version number
- *
- * \internal
- * If this file is altered in any way that changes the ABI, this value
- * must be bumped. Examples include, but are not limited to, changing
- * types, removing or reassigning enums, adding/removing/rearranging
- * fields to structures
- */
+ /*!\brief Current ABI version number
+ *
+ * \internal
+ * If this file is altered in any way that changes the ABI, this value
+ * must be bumped. Examples include, but are not limited to, changing
+ * types, removing or reassigning enums, adding/removing/rearranging
+ * fields to structures
+ */
#define VPX_DECODER_ABI_VERSION (2 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/
- /*! \brief Decoder capabilities bitfield
- *
- * Each decoder advertises the capabilities it supports as part of its
- * ::vpx_codec_iface_t interface structure. Capabilities are extra interfaces
- * or functionality, and are not required to be supported by a decoder.
- *
- * The available flags are specified by VPX_CODEC_CAP_* defines.
- */
+ /*! \brief Decoder capabilities bitfield
+ *
+ * Each decoder advertises the capabilities it supports as part of its
+ * ::vpx_codec_iface_t interface structure. Capabilities are extra interfaces
+ * or functionality, and are not required to be supported by a decoder.
+ *
+ * The available flags are specified by VPX_CODEC_CAP_* defines.
+ */
#define VPX_CODEC_CAP_PUT_SLICE 0x10000 /**< Will issue put_slice callbacks */
#define VPX_CODEC_CAP_PUT_FRAME 0x20000 /**< Will issue put_frame callbacks */
#define VPX_CODEC_CAP_POSTPROC 0x40000 /**< Can postprocess decoded frame */
#define VPX_CODEC_CAP_ERROR_CONCEALMENT 0x80000 /**< Can conceal errors due to
- packet loss */
+ packet loss */
#define VPX_CODEC_CAP_INPUT_FRAGMENTS 0x100000 /**< Can receive encoded frames
- one fragment at a time */
+ one fragment at a time */
- /*! \brief Initialization-time Feature Enabling
- *
- * Certain codec features must be known at initialization time, to allow for
- * proper memory allocation.
- *
- * The available flags are specified by VPX_CODEC_USE_* defines.
- */
+ /*! \brief Initialization-time Feature Enabling
+ *
+ * Certain codec features must be known at initialization time, to allow for
+ * proper memory allocation.
+ *
+ * The available flags are specified by VPX_CODEC_USE_* defines.
+ */
#define VPX_CODEC_USE_POSTPROC 0x10000 /**< Postprocess decoded frame */
#define VPX_CODEC_USE_ERROR_CONCEALMENT 0x20000 /**< Conceal errors in decoded
- frames */
+ frames */
#define VPX_CODEC_USE_INPUT_FRAGMENTS 0x40000 /**< The input frame should be
- passed to the decoder one
- fragment at a time */
+ passed to the decoder one
+ fragment at a time */
- /*!\brief Stream properties
- *
- * This structure is used to query or set properties of the decoded
- * stream. Algorithms may extend this structure with data specific
- * to their bitstream by setting the sz member appropriately.
- */
- typedef struct vpx_codec_stream_info
- {
- unsigned int sz; /**< Size of this structure */
- unsigned int w; /**< Width (or 0 for unknown/default) */
- unsigned int h; /**< Height (or 0 for unknown/default) */
- unsigned int is_kf; /**< Current frame is a keyframe */
- } vpx_codec_stream_info_t;
+ /*!\brief Stream properties
+ *
+ * This structure is used to query or set properties of the decoded
+ * stream. Algorithms may extend this structure with data specific
+ * to their bitstream by setting the sz member appropriately.
+ */
+ typedef struct vpx_codec_stream_info {
+ unsigned int sz; /**< Size of this structure */
+ unsigned int w; /**< Width (or 0 for unknown/default) */
+ unsigned int h; /**< Height (or 0 for unknown/default) */
+ unsigned int is_kf; /**< Current frame is a keyframe */
+ } vpx_codec_stream_info_t;
- /* REQUIRED FUNCTIONS
- *
- * The following functions are required to be implemented for all decoders.
- * They represent the base case functionality expected of all decoders.
- */
+ /* REQUIRED FUNCTIONS
+ *
+ * The following functions are required to be implemented for all decoders.
+ * They represent the base case functionality expected of all decoders.
+ */
- /*!\brief Initialization Configurations
- *
- * This structure is used to pass init time configuration options to the
- * decoder.
- */
- typedef struct vpx_codec_dec_cfg
- {
- unsigned int threads; /**< Maximum number of threads to use, default 1 */
- unsigned int w; /**< Width */
- unsigned int h; /**< Height */
- } vpx_codec_dec_cfg_t; /**< alias for struct vpx_codec_dec_cfg */
+ /*!\brief Initialization Configurations
+ *
+ * This structure is used to pass init time configuration options to the
+ * decoder.
+ */
+ typedef struct vpx_codec_dec_cfg {
+ unsigned int threads; /**< Maximum number of threads to use, default 1 */
+ unsigned int w; /**< Width */
+ unsigned int h; /**< Height */
+ } vpx_codec_dec_cfg_t; /**< alias for struct vpx_codec_dec_cfg */
- /*!\brief Initialize a decoder instance
- *
- * Initializes a decoder context using the given interface. Applications
- * should call the vpx_codec_dec_init convenience macro instead of this
- * function directly, to ensure that the ABI version number parameter
- * is properly initialized.
- *
- * If the library was configured with --disable-multithread, this call
- * is not thread safe and should be guarded with a lock if being used
- * in a multithreaded context.
- *
- * In XMA mode (activated by setting VPX_CODEC_USE_XMA in the flags
- * parameter), the storage pointed to by the cfg parameter must be
- * kept readable and stable until all memory maps have been set.
- *
- * \param[in] ctx Pointer to this instance's context.
- * \param[in] iface Pointer to the algorithm interface to use.
- * \param[in] cfg Configuration to use, if known. May be NULL.
- * \param[in] flags Bitfield of VPX_CODEC_USE_* flags
- * \param[in] ver ABI version number. Must be set to
- * VPX_DECODER_ABI_VERSION
- * \retval #VPX_CODEC_OK
- * The decoder algorithm initialized.
- * \retval #VPX_CODEC_MEM_ERROR
- * Memory allocation failed.
- */
- vpx_codec_err_t vpx_codec_dec_init_ver(vpx_codec_ctx_t *ctx,
- vpx_codec_iface_t *iface,
- vpx_codec_dec_cfg_t *cfg,
- vpx_codec_flags_t flags,
- int ver);
+ /*!\brief Initialize a decoder instance
+ *
+ * Initializes a decoder context using the given interface. Applications
+ * should call the vpx_codec_dec_init convenience macro instead of this
+ * function directly, to ensure that the ABI version number parameter
+ * is properly initialized.
+ *
+ * If the library was configured with --disable-multithread, this call
+ * is not thread safe and should be guarded with a lock if being used
+ * in a multithreaded context.
+ *
+ * In XMA mode (activated by setting VPX_CODEC_USE_XMA in the flags
+ * parameter), the storage pointed to by the cfg parameter must be
+ * kept readable and stable until all memory maps have been set.
+ *
+ * \param[in] ctx Pointer to this instance's context.
+ * \param[in] iface Pointer to the algorithm interface to use.
+ * \param[in] cfg Configuration to use, if known. May be NULL.
+ * \param[in] flags Bitfield of VPX_CODEC_USE_* flags
+ * \param[in] ver ABI version number. Must be set to
+ * VPX_DECODER_ABI_VERSION
+ * \retval #VPX_CODEC_OK
+ * The decoder algorithm initialized.
+ * \retval #VPX_CODEC_MEM_ERROR
+ * Memory allocation failed.
+ */
+ vpx_codec_err_t vpx_codec_dec_init_ver(vpx_codec_ctx_t *ctx,
+ vpx_codec_iface_t *iface,
+ vpx_codec_dec_cfg_t *cfg,
+ vpx_codec_flags_t flags,
+ int ver);
- /*!\brief Convenience macro for vpx_codec_dec_init_ver()
- *
- * Ensures the ABI version parameter is properly set.
- */
+ /*!\brief Convenience macro for vpx_codec_dec_init_ver()
+ *
+ * Ensures the ABI version parameter is properly set.
+ */
#define vpx_codec_dec_init(ctx, iface, cfg, flags) \
- vpx_codec_dec_init_ver(ctx, iface, cfg, flags, VPX_DECODER_ABI_VERSION)
+ vpx_codec_dec_init_ver(ctx, iface, cfg, flags, VPX_DECODER_ABI_VERSION)
- /*!\brief Parse stream info from a buffer
- *
- * Performs high level parsing of the bitstream. Construction of a decoder
- * context is not necessary. Can be used to determine if the bitstream is
- * of the proper format, and to extract information from the stream.
- *
- * \param[in] iface Pointer to the algorithm interface
- * \param[in] data Pointer to a block of data to parse
- * \param[in] data_sz Size of the data buffer
- * \param[in,out] si Pointer to stream info to update. The size member
- * \ref MUST be properly initialized, but \ref MAY be
- * clobbered by the algorithm. This parameter \ref MAY
- * be NULL.
- *
- * \retval #VPX_CODEC_OK
- * Bitstream is parsable and stream information updated
- */
- vpx_codec_err_t vpx_codec_peek_stream_info(vpx_codec_iface_t *iface,
- const uint8_t *data,
- unsigned int data_sz,
- vpx_codec_stream_info_t *si);
+ /*!\brief Parse stream info from a buffer
+ *
+ * Performs high level parsing of the bitstream. Construction of a decoder
+ * context is not necessary. Can be used to determine if the bitstream is
+ * of the proper format, and to extract information from the stream.
+ *
+ * \param[in] iface Pointer to the algorithm interface
+ * \param[in] data Pointer to a block of data to parse
+ * \param[in] data_sz Size of the data buffer
+ * \param[in,out] si Pointer to stream info to update. The size member
+ * \ref MUST be properly initialized, but \ref MAY be
+ * clobbered by the algorithm. This parameter \ref MAY
+ * be NULL.
+ *
+ * \retval #VPX_CODEC_OK
+ * Bitstream is parsable and stream information updated
+ */
+ vpx_codec_err_t vpx_codec_peek_stream_info(vpx_codec_iface_t *iface,
+ const uint8_t *data,
+ unsigned int data_sz,
+ vpx_codec_stream_info_t *si);
- /*!\brief Return information about the current stream.
- *
- * Returns information about the stream that has been parsed during decoding.
- *
- * \param[in] ctx Pointer to this instance's context
- * \param[in,out] si Pointer to stream info to update. The size member
- * \ref MUST be properly initialized, but \ref MAY be
- * clobbered by the algorithm. This parameter \ref MAY
- * be NULL.
- *
- * \retval #VPX_CODEC_OK
- * Bitstream is parsable and stream information updated
- */
- vpx_codec_err_t vpx_codec_get_stream_info(vpx_codec_ctx_t *ctx,
- vpx_codec_stream_info_t *si);
+ /*!\brief Return information about the current stream.
+ *
+ * Returns information about the stream that has been parsed during decoding.
+ *
+ * \param[in] ctx Pointer to this instance's context
+ * \param[in,out] si Pointer to stream info to update. The size member
+ * \ref MUST be properly initialized, but \ref MAY be
+ * clobbered by the algorithm. This parameter \ref MAY
+ * be NULL.
+ *
+ * \retval #VPX_CODEC_OK
+ * Bitstream is parsable and stream information updated
+ */
+ vpx_codec_err_t vpx_codec_get_stream_info(vpx_codec_ctx_t *ctx,
+ vpx_codec_stream_info_t *si);
- /*!\brief Decode data
- *
- * Processes a buffer of coded data. If the processing results in a new
- * decoded frame becoming available, PUT_SLICE and PUT_FRAME events may be
- * generated, as appropriate. Encoded data \ref MUST be passed in DTS (decode
- * time stamp) order. Frames produced will always be in PTS (presentation
- * time stamp) order.
- * If the decoder is configured with VPX_CODEC_USE_INPUT_FRAGMENTS enabled,
- * data and data_sz can contain a fragment of the encoded frame. Fragment
- * \#n must contain at least partition \#n, but can also contain subsequent
- * partitions (\#n+1 - \#n+i), and if so, fragments \#n+1, .., \#n+i must
- * be empty. When no more data is available, this function should be called
- * with NULL as data and 0 as data_sz. The memory passed to this function
- * must be available until the frame has been decoded.
- *
- * \param[in] ctx Pointer to this instance's context
- * \param[in] data Pointer to this block of new coded data. If
- * NULL, a VPX_CODEC_CB_PUT_FRAME event is posted
- * for the previously decoded frame.
- * \param[in] data_sz Size of the coded data, in bytes.
- * \param[in] user_priv Application specific data to associate with
- * this frame.
- * \param[in] deadline Soft deadline the decoder should attempt to meet,
- * in us. Set to zero for unlimited.
- *
- * \return Returns #VPX_CODEC_OK if the coded data was processed completely
- * and future pictures can be decoded without error. Otherwise,
- * see the descriptions of the other error codes in ::vpx_codec_err_t
- * for recoverability capabilities.
- */
- vpx_codec_err_t vpx_codec_decode(vpx_codec_ctx_t *ctx,
- const uint8_t *data,
- unsigned int data_sz,
- void *user_priv,
- long deadline);
+ /*!\brief Decode data
+ *
+ * Processes a buffer of coded data. If the processing results in a new
+ * decoded frame becoming available, PUT_SLICE and PUT_FRAME events may be
+ * generated, as appropriate. Encoded data \ref MUST be passed in DTS (decode
+ * time stamp) order. Frames produced will always be in PTS (presentation
+ * time stamp) order.
+ * If the decoder is configured with VPX_CODEC_USE_INPUT_FRAGMENTS enabled,
+ * data and data_sz can contain a fragment of the encoded frame. Fragment
+ * \#n must contain at least partition \#n, but can also contain subsequent
+ * partitions (\#n+1 - \#n+i), and if so, fragments \#n+1, .., \#n+i must
+ * be empty. When no more data is available, this function should be called
+ * with NULL as data and 0 as data_sz. The memory passed to this function
+ * must be available until the frame has been decoded.
+ *
+ * \param[in] ctx Pointer to this instance's context
+ * \param[in] data Pointer to this block of new coded data. If
+ * NULL, a VPX_CODEC_CB_PUT_FRAME event is posted
+ * for the previously decoded frame.
+ * \param[in] data_sz Size of the coded data, in bytes.
+ * \param[in] user_priv Application specific data to associate with
+ * this frame.
+ * \param[in] deadline Soft deadline the decoder should attempt to meet,
+ * in us. Set to zero for unlimited.
+ *
+ * \return Returns #VPX_CODEC_OK if the coded data was processed completely
+ * and future pictures can be decoded without error. Otherwise,
+ * see the descriptions of the other error codes in ::vpx_codec_err_t
+ * for recoverability capabilities.
+ */
+ vpx_codec_err_t vpx_codec_decode(vpx_codec_ctx_t *ctx,
+ const uint8_t *data,
+ unsigned int data_sz,
+ void *user_priv,
+ long deadline);
- /*!\brief Decoded frames iterator
- *
- * Iterates over a list of the frames available for display. The iterator
- * storage should be initialized to NULL to start the iteration. Iteration is
- * complete when this function returns NULL.
- *
- * The list of available frames becomes valid upon completion of the
- * vpx_codec_decode call, and remains valid until the next call to vpx_codec_decode.
- *
- * \param[in] ctx Pointer to this instance's context
- * \param[in,out] iter Iterator storage, initialized to NULL
- *
- * \return Returns a pointer to an image, if one is ready for display. Frames
- * produced will always be in PTS (presentation time stamp) order.
- */
- vpx_image_t *vpx_codec_get_frame(vpx_codec_ctx_t *ctx,
- vpx_codec_iter_t *iter);
+ /*!\brief Decoded frames iterator
+ *
+ * Iterates over a list of the frames available for display. The iterator
+ * storage should be initialized to NULL to start the iteration. Iteration is
+ * complete when this function returns NULL.
+ *
+ * The list of available frames becomes valid upon completion of the
+ * vpx_codec_decode call, and remains valid until the next call to vpx_codec_decode.
+ *
+ * \param[in] ctx Pointer to this instance's context
+ * \param[in,out] iter Iterator storage, initialized to NULL
+ *
+ * \return Returns a pointer to an image, if one is ready for display. Frames
+ * produced will always be in PTS (presentation time stamp) order.
+ */
+ vpx_image_t *vpx_codec_get_frame(vpx_codec_ctx_t *ctx,
+ vpx_codec_iter_t *iter);
- /*!\defgroup cap_put_frame Frame-Based Decoding Functions
- *
- * The following functions are required to be implemented for all decoders
- * that advertise the VPX_CODEC_CAP_PUT_FRAME capability. Calling these functions
- * for codecs that don't advertise this capability will result in an error
- * code being returned, usually VPX_CODEC_ERROR
- * @{
- */
+ /*!\defgroup cap_put_frame Frame-Based Decoding Functions
+ *
+ * The following functions are required to be implemented for all decoders
+ * that advertise the VPX_CODEC_CAP_PUT_FRAME capability. Calling these functions
+ * for codecs that don't advertise this capability will result in an error
+ * code being returned, usually VPX_CODEC_ERROR
+ * @{
+ */
- /*!\brief put frame callback prototype
- *
- * This callback is invoked by the decoder to notify the application of
- * the availability of decoded image data.
- */
- typedef void (*vpx_codec_put_frame_cb_fn_t)(void *user_priv,
- const vpx_image_t *img);
+ /*!\brief put frame callback prototype
+ *
+ * This callback is invoked by the decoder to notify the application of
+ * the availability of decoded image data.
+ */
+ typedef void (*vpx_codec_put_frame_cb_fn_t)(void *user_priv,
+ const vpx_image_t *img);
- /*!\brief Register for notification of frame completion.
- *
- * Registers a given function to be called when a decoded frame is
- * available.
- *
- * \param[in] ctx Pointer to this instance's context
- * \param[in] cb Pointer to the callback function
- * \param[in] user_priv User's private data
- *
- * \retval #VPX_CODEC_OK
- * Callback successfully registered.
- * \retval #VPX_CODEC_ERROR
- * Decoder context not initialized, or algorithm not capable of
- * posting slice completion.
- */
- vpx_codec_err_t vpx_codec_register_put_frame_cb(vpx_codec_ctx_t *ctx,
- vpx_codec_put_frame_cb_fn_t cb,
- void *user_priv);
+ /*!\brief Register for notification of frame completion.
+ *
+ * Registers a given function to be called when a decoded frame is
+ * available.
+ *
+ * \param[in] ctx Pointer to this instance's context
+ * \param[in] cb Pointer to the callback function
+ * \param[in] user_priv User's private data
+ *
+ * \retval #VPX_CODEC_OK
+ * Callback successfully registered.
+ * \retval #VPX_CODEC_ERROR
+ * Decoder context not initialized, or algorithm not capable of
+ * posting slice completion.
+ */
+ vpx_codec_err_t vpx_codec_register_put_frame_cb(vpx_codec_ctx_t *ctx,
+ vpx_codec_put_frame_cb_fn_t cb,
+ void *user_priv);
- /*!@} - end defgroup cap_put_frame */
+ /*!@} - end defgroup cap_put_frame */
- /*!\defgroup cap_put_slice Slice-Based Decoding Functions
- *
- * The following functions are required to be implemented for all decoders
- * that advertise the VPX_CODEC_CAP_PUT_SLICE capability. Calling these functions
- * for codecs that don't advertise this capability will result in an error
- * code being returned, usually VPX_CODEC_ERROR
- * @{
- */
+ /*!\defgroup cap_put_slice Slice-Based Decoding Functions
+ *
+ * The following functions are required to be implemented for all decoders
+ * that advertise the VPX_CODEC_CAP_PUT_SLICE capability. Calling these functions
+ * for codecs that don't advertise this capability will result in an error
+ * code being returned, usually VPX_CODEC_ERROR
+ * @{
+ */
- /*!\brief put slice callback prototype
- *
- * This callback is invoked by the decoder to notify the application of
- * the availability of partially decoded image data. The
- */
- typedef void (*vpx_codec_put_slice_cb_fn_t)(void *user_priv,
- const vpx_image_t *img,
- const vpx_image_rect_t *valid,
- const vpx_image_rect_t *update);
+ /*!\brief put slice callback prototype
+ *
+ * This callback is invoked by the decoder to notify the application of
+ * the availability of partially decoded image data. The
+ */
+ typedef void (*vpx_codec_put_slice_cb_fn_t)(void *user_priv,
+ const vpx_image_t *img,
+ const vpx_image_rect_t *valid,
+ const vpx_image_rect_t *update);
- /*!\brief Register for notification of slice completion.
- *
- * Registers a given function to be called when a decoded slice is
- * available.
- *
- * \param[in] ctx Pointer to this instance's context
- * \param[in] cb Pointer to the callback function
- * \param[in] user_priv User's private data
- *
- * \retval #VPX_CODEC_OK
- * Callback successfully registered.
- * \retval #VPX_CODEC_ERROR
- * Decoder context not initialized, or algorithm not capable of
- * posting slice completion.
- */
- vpx_codec_err_t vpx_codec_register_put_slice_cb(vpx_codec_ctx_t *ctx,
- vpx_codec_put_slice_cb_fn_t cb,
- void *user_priv);
+ /*!\brief Register for notification of slice completion.
+ *
+ * Registers a given function to be called when a decoded slice is
+ * available.
+ *
+ * \param[in] ctx Pointer to this instance's context
+ * \param[in] cb Pointer to the callback function
+ * \param[in] user_priv User's private data
+ *
+ * \retval #VPX_CODEC_OK
+ * Callback successfully registered.
+ * \retval #VPX_CODEC_ERROR
+ * Decoder context not initialized, or algorithm not capable of
+ * posting slice completion.
+ */
+ vpx_codec_err_t vpx_codec_register_put_slice_cb(vpx_codec_ctx_t *ctx,
+ vpx_codec_put_slice_cb_fn_t cb,
+ void *user_priv);
- /*!@} - end defgroup cap_put_slice*/
+ /*!@} - end defgroup cap_put_slice*/
- /*!@} - end defgroup decoder*/
+ /*!@} - end defgroup decoder*/
#endif
diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h
index 67d9033..ffdbc06 100644
--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h
@@ -32,902 +32,891 @@
#define VPX_ENCODER_H
#include "vpx_codec.h"
-/*! Temporal Scalability: Maximum length of the sequence defining frame
- * layer membership
- */
+ /*! Temporal Scalability: Maximum length of the sequence defining frame
+ * layer membership
+ */
#define VPX_TS_MAX_PERIODICITY 16
-/*! Temporal Scalability: Maximum number of coding layers */
+ /*! Temporal Scalability: Maximum number of coding layers */
#define VPX_TS_MAX_LAYERS 5
-/*!\deprecated Use #VPX_TS_MAX_PERIODICITY instead. */
+ /*!\deprecated Use #VPX_TS_MAX_PERIODICITY instead. */
#define MAX_PERIODICITY VPX_TS_MAX_PERIODICITY
-/*!\deprecated Use #VPX_TS_MAX_LAYERS instead. */
+ /*!\deprecated Use #VPX_TS_MAX_LAYERS instead. */
#define MAX_LAYERS VPX_TS_MAX_LAYERS
- /*!\brief Current ABI version number
- *
- * \internal
- * If this file is altered in any way that changes the ABI, this value
- * must be bumped. Examples include, but are not limited to, changing
- * types, removing or reassigning enums, adding/removing/rearranging
- * fields to structures
- */
+ /*!\brief Current ABI version number
+ *
+ * \internal
+ * If this file is altered in any way that changes the ABI, this value
+ * must be bumped. Examples include, but are not limited to, changing
+ * types, removing or reassigning enums, adding/removing/rearranging
+ * fields to structures
+ */
#define VPX_ENCODER_ABI_VERSION (3 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/
- /*! \brief Encoder capabilities bitfield
- *
- * Each encoder advertises the capabilities it supports as part of its
- * ::vpx_codec_iface_t interface structure. Capabilities are extra
- * interfaces or functionality, and are not required to be supported
- * by an encoder.
- *
- * The available flags are specified by VPX_CODEC_CAP_* defines.
- */
+ /*! \brief Encoder capabilities bitfield
+ *
+ * Each encoder advertises the capabilities it supports as part of its
+ * ::vpx_codec_iface_t interface structure. Capabilities are extra
+ * interfaces or functionality, and are not required to be supported
+ * by an encoder.
+ *
+ * The available flags are specified by VPX_CODEC_CAP_* defines.
+ */
#define VPX_CODEC_CAP_PSNR 0x10000 /**< Can issue PSNR packets */
- /*! Can output one partition at a time. Each partition is returned in its
- * own VPX_CODEC_CX_FRAME_PKT, with the FRAME_IS_FRAGMENT flag set for
- * every partition but the last. In this mode all frames are always
- * returned partition by partition.
- */
+ /*! Can output one partition at a time. Each partition is returned in its
+ * own VPX_CODEC_CX_FRAME_PKT, with the FRAME_IS_FRAGMENT flag set for
+ * every partition but the last. In this mode all frames are always
+ * returned partition by partition.
+ */
#define VPX_CODEC_CAP_OUTPUT_PARTITION 0x20000
- /*! \brief Initialization-time Feature Enabling
- *
- * Certain codec features must be known at initialization time, to allow
- * for proper memory allocation.
- *
- * The available flags are specified by VPX_CODEC_USE_* defines.
- */
+ /*! \brief Initialization-time Feature Enabling
+ *
+ * Certain codec features must be known at initialization time, to allow
+ * for proper memory allocation.
+ *
+ * The available flags are specified by VPX_CODEC_USE_* defines.
+ */
#define VPX_CODEC_USE_PSNR 0x10000 /**< Calculate PSNR on each frame */
#define VPX_CODEC_USE_OUTPUT_PARTITION 0x20000 /**< Make the encoder output one
- partition at a time. */
+ partition at a time. */
- /*!\brief Generic fixed size buffer structure
- *
- * This structure is able to hold a reference to any fixed size buffer.
- */
- typedef struct vpx_fixed_buf
- {
- void *buf; /**< Pointer to the data */
- size_t sz; /**< Length of the buffer, in chars */
- } vpx_fixed_buf_t; /**< alias for struct vpx_fixed_buf */
+ /*!\brief Generic fixed size buffer structure
+ *
+ * This structure is able to hold a reference to any fixed size buffer.
+ */
+ typedef struct vpx_fixed_buf {
+ void *buf; /**< Pointer to the data */
+ size_t sz; /**< Length of the buffer, in chars */
+ } vpx_fixed_buf_t; /**< alias for struct vpx_fixed_buf */
- /*!\brief Time Stamp Type
- *
- * An integer, which when multiplied by the stream's time base, provides
- * the absolute time of a sample.
- */
- typedef int64_t vpx_codec_pts_t;
+ /*!\brief Time Stamp Type
+ *
+ * An integer, which when multiplied by the stream's time base, provides
+ * the absolute time of a sample.
+ */
+ typedef int64_t vpx_codec_pts_t;
- /*!\brief Compressed Frame Flags
- *
- * This type represents a bitfield containing information about a compressed
- * frame that may be useful to an application. The most significant 16 bits
- * can be used by an algorithm to provide additional detail, for example to
- * support frame types that are codec specific (MPEG-1 D-frames for example)
- */
- typedef uint32_t vpx_codec_frame_flags_t;
+ /*!\brief Compressed Frame Flags
+ *
+ * This type represents a bitfield containing information about a compressed
+ * frame that may be useful to an application. The most significant 16 bits
+ * can be used by an algorithm to provide additional detail, for example to
+ * support frame types that are codec specific (MPEG-1 D-frames for example)
+ */
+ typedef uint32_t vpx_codec_frame_flags_t;
#define VPX_FRAME_IS_KEY 0x1 /**< frame is the start of a GOP */
#define VPX_FRAME_IS_DROPPABLE 0x2 /**< frame can be dropped without affecting
- the stream (no future frame depends on
- this one) */
+ the stream (no future frame depends on
+ this one) */
#define VPX_FRAME_IS_INVISIBLE 0x4 /**< frame should be decoded but will not
- be shown */
+ be shown */
#define VPX_FRAME_IS_FRAGMENT 0x8 /**< this is a fragment of the encoded
- frame */
+ frame */
- /*!\brief Error Resilient flags
- *
- * These flags define which error resilient features to enable in the
- * encoder. The flags are specified through the
- * vpx_codec_enc_cfg::g_error_resilient variable.
- */
- typedef uint32_t vpx_codec_er_flags_t;
+ /*!\brief Error Resilient flags
+ *
+ * These flags define which error resilient features to enable in the
+ * encoder. The flags are specified through the
+ * vpx_codec_enc_cfg::g_error_resilient variable.
+ */
+ typedef uint32_t vpx_codec_er_flags_t;
#define VPX_ERROR_RESILIENT_DEFAULT 0x1 /**< Improve resiliency against
- losses of whole frames */
+ losses of whole frames */
#define VPX_ERROR_RESILIENT_PARTITIONS 0x2 /**< The frame partitions are
- independently decodable by the
- bool decoder, meaning that
- partitions can be decoded even
- though earlier partitions have
- been lost. Note that intra
- predicition is still done over
- the partition boundary. */
+ independently decodable by the
+ bool decoder, meaning that
+ partitions can be decoded even
+ though earlier partitions have
+ been lost. Note that intra
+ predicition is still done over
+ the partition boundary. */
- /*!\brief Encoder output packet variants
- *
- * This enumeration lists the different kinds of data packets that can be
- * returned by calls to vpx_codec_get_cx_data(). Algorithms \ref MAY
- * extend this list to provide additional functionality.
- */
- enum vpx_codec_cx_pkt_kind
- {
- VPX_CODEC_CX_FRAME_PKT, /**< Compressed video frame */
- VPX_CODEC_STATS_PKT, /**< Two-pass statistics for this frame */
- VPX_CODEC_PSNR_PKT, /**< PSNR statistics for this frame */
- VPX_CODEC_CUSTOM_PKT = 256 /**< Algorithm extensions */
- };
+ /*!\brief Encoder output packet variants
+ *
+ * This enumeration lists the different kinds of data packets that can be
+ * returned by calls to vpx_codec_get_cx_data(). Algorithms \ref MAY
+ * extend this list to provide additional functionality.
+ */
+ enum vpx_codec_cx_pkt_kind {
+ VPX_CODEC_CX_FRAME_PKT, /**< Compressed video frame */
+ VPX_CODEC_STATS_PKT, /**< Two-pass statistics for this frame */
+ VPX_CODEC_PSNR_PKT, /**< PSNR statistics for this frame */
+ VPX_CODEC_CUSTOM_PKT = 256 /**< Algorithm extensions */
+ };
- /*!\brief Encoder output packet
- *
- * This structure contains the different kinds of output data the encoder
- * may produce while compressing a frame.
- */
- typedef struct vpx_codec_cx_pkt
- {
- enum vpx_codec_cx_pkt_kind kind; /**< packet variant */
- union
- {
- struct
- {
- void *buf; /**< compressed data buffer */
- size_t sz; /**< length of compressed data */
- vpx_codec_pts_t pts; /**< time stamp to show frame
+ /*!\brief Encoder output packet
+ *
+ * This structure contains the different kinds of output data the encoder
+ * may produce while compressing a frame.
+ */
+ typedef struct vpx_codec_cx_pkt {
+ enum vpx_codec_cx_pkt_kind kind; /**< packet variant */
+ union {
+ struct {
+ void *buf; /**< compressed data buffer */
+ size_t sz; /**< length of compressed data */
+ vpx_codec_pts_t pts; /**< time stamp to show frame
(in timebase units) */
- unsigned long duration; /**< duration to show frame
+ unsigned long duration; /**< duration to show frame
(in timebase units) */
- vpx_codec_frame_flags_t flags; /**< flags for this frame */
- int partition_id; /**< the partition id
+ vpx_codec_frame_flags_t flags; /**< flags for this frame */
+ int partition_id; /**< the partition id
defines the decoding order
of the partitions. Only
applicable when "output partition"
mode is enabled. First partition
has id 0.*/
- } frame; /**< data for compressed frame packet */
- struct vpx_fixed_buf twopass_stats; /**< data for two-pass packet */
- struct vpx_psnr_pkt
- {
- unsigned int samples[4]; /**< Number of samples, total/y/u/v */
- uint64_t sse[4]; /**< sum squared error, total/y/u/v */
- double psnr[4]; /**< PSNR, total/y/u/v */
- } psnr; /**< data for PSNR packet */
- struct vpx_fixed_buf raw; /**< data for arbitrary packets */
+ } frame; /**< data for compressed frame packet */
+ struct vpx_fixed_buf twopass_stats; /**< data for two-pass packet */
+ struct vpx_psnr_pkt {
+ unsigned int samples[4]; /**< Number of samples, total/y/u/v */
+ uint64_t sse[4]; /**< sum squared error, total/y/u/v */
+ double psnr[4]; /**< PSNR, total/y/u/v */
+ } psnr; /**< data for PSNR packet */
+ struct vpx_fixed_buf raw; /**< data for arbitrary packets */
- /* This packet size is fixed to allow codecs to extend this
- * interface without having to manage storage for raw packets,
- * i.e., if it's smaller than 128 bytes, you can store in the
- * packet list directly.
- */
- char pad[128 - sizeof(enum vpx_codec_cx_pkt_kind)]; /**< fixed sz */
- } data; /**< packet data */
- } vpx_codec_cx_pkt_t; /**< alias for struct vpx_codec_cx_pkt */
+ /* This packet size is fixed to allow codecs to extend this
+ * interface without having to manage storage for raw packets,
+ * i.e., if it's smaller than 128 bytes, you can store in the
+ * packet list directly.
+ */
+ char pad[128 - sizeof(enum vpx_codec_cx_pkt_kind)]; /**< fixed sz */
+ } data; /**< packet data */
+ } vpx_codec_cx_pkt_t; /**< alias for struct vpx_codec_cx_pkt */
- /*!\brief Rational Number
- *
- * This structure holds a fractional value.
- */
- typedef struct vpx_rational
- {
- int num; /**< fraction numerator */
- int den; /**< fraction denominator */
- } vpx_rational_t; /**< alias for struct vpx_rational */
+ /*!\brief Rational Number
+ *
+ * This structure holds a fractional value.
+ */
+ typedef struct vpx_rational {
+ int num; /**< fraction numerator */
+ int den; /**< fraction denominator */
+ } vpx_rational_t; /**< alias for struct vpx_rational */
- /*!\brief Multi-pass Encoding Pass */
- enum vpx_enc_pass
- {
- VPX_RC_ONE_PASS, /**< Single pass mode */
- VPX_RC_FIRST_PASS, /**< First pass of multi-pass mode */
- VPX_RC_LAST_PASS /**< Final pass of multi-pass mode */
- };
+ /*!\brief Multi-pass Encoding Pass */
+ enum vpx_enc_pass {
+ VPX_RC_ONE_PASS, /**< Single pass mode */
+ VPX_RC_FIRST_PASS, /**< First pass of multi-pass mode */
+ VPX_RC_LAST_PASS /**< Final pass of multi-pass mode */
+ };
- /*!\brief Rate control mode */
- enum vpx_rc_mode
- {
- VPX_VBR, /**< Variable Bit Rate (VBR) mode */
- VPX_CBR, /**< Constant Bit Rate (CBR) mode */
- VPX_CQ /**< Constant Quality (CQ) mode */
- };
+ /*!\brief Rate control mode */
+ enum vpx_rc_mode {
+ VPX_VBR, /**< Variable Bit Rate (VBR) mode */
+ VPX_CBR, /**< Constant Bit Rate (CBR) mode */
+ VPX_CQ /**< Constant Quality (CQ) mode */
+ };
- /*!\brief Keyframe placement mode.
- *
- * This enumeration determines whether keyframes are placed automatically by
- * the encoder or whether this behavior is disabled. Older releases of this
- * SDK were implemented such that VPX_KF_FIXED meant keyframes were disabled.
- * This name is confusing for this behavior, so the new symbols to be used
- * are VPX_KF_AUTO and VPX_KF_DISABLED.
- */
- enum vpx_kf_mode
- {
- VPX_KF_FIXED, /**< deprecated, implies VPX_KF_DISABLED */
- VPX_KF_AUTO, /**< Encoder determines optimal placement automatically */
- VPX_KF_DISABLED = 0 /**< Encoder does not place keyframes. */
- };
+ /*!\brief Keyframe placement mode.
+ *
+ * This enumeration determines whether keyframes are placed automatically by
+ * the encoder or whether this behavior is disabled. Older releases of this
+ * SDK were implemented such that VPX_KF_FIXED meant keyframes were disabled.
+ * This name is confusing for this behavior, so the new symbols to be used
+ * are VPX_KF_AUTO and VPX_KF_DISABLED.
+ */
+ enum vpx_kf_mode {
+ VPX_KF_FIXED, /**< deprecated, implies VPX_KF_DISABLED */
+ VPX_KF_AUTO, /**< Encoder determines optimal placement automatically */
+ VPX_KF_DISABLED = 0 /**< Encoder does not place keyframes. */
+ };
- /*!\brief Encoded Frame Flags
- *
- * This type indicates a bitfield to be passed to vpx_codec_encode(), defining
- * per-frame boolean values. By convention, bits common to all codecs will be
- * named VPX_EFLAG_*, and bits specific to an algorithm will be named
- * /algo/_eflag_*. The lower order 16 bits are reserved for common use.
- */
- typedef long vpx_enc_frame_flags_t;
+ /*!\brief Encoded Frame Flags
+ *
+ * This type indicates a bitfield to be passed to vpx_codec_encode(), defining
+ * per-frame boolean values. By convention, bits common to all codecs will be
+ * named VPX_EFLAG_*, and bits specific to an algorithm will be named
+ * /algo/_eflag_*. The lower order 16 bits are reserved for common use.
+ */
+ typedef long vpx_enc_frame_flags_t;
#define VPX_EFLAG_FORCE_KF (1<<0) /**< Force this frame to be a keyframe */
- /*!\brief Encoder configuration structure
- *
- * This structure contains the encoder settings that have common representations
- * across all codecs. This doesn't imply that all codecs support all features,
- * however.
+ /*!\brief Encoder configuration structure
+ *
+ * This structure contains the encoder settings that have common representations
+ * across all codecs. This doesn't imply that all codecs support all features,
+ * however.
+ */
+ typedef struct vpx_codec_enc_cfg {
+ /*
+ * generic settings (g)
*/
- typedef struct vpx_codec_enc_cfg
- {
- /*
- * generic settings (g)
- */
- /*!\brief Algorithm specific "usage" value
- *
- * Algorithms may define multiple values for usage, which may convey the
- * intent of how the application intends to use the stream. If this value
- * is non-zero, consult the documentation for the codec to determine its
- * meaning.
- */
- unsigned int g_usage;
-
-
- /*!\brief Maximum number of threads to use
- *
- * For multi-threaded implementations, use no more than this number of
- * threads. The codec may use fewer threads than allowed. The value
- * 0 is equivalent to the value 1.
- */
- unsigned int g_threads;
-
-
- /*!\brief Bitstream profile to use
- *
- * Some codecs support a notion of multiple bitstream profiles. Typically
- * this maps to a set of features that are turned on or off. Often the
- * profile to use is determined by the features of the intended decoder.
- * Consult the documentation for the codec to determine the valid values
- * for this parameter, or set to zero for a sane default.
- */
- unsigned int g_profile; /**< profile of bitstream to use */
-
-
-
- /*!\brief Width of the frame
- *
- * This value identifies the presentation resolution of the frame,
- * in pixels. Note that the frames passed as input to the encoder must
- * have this resolution. Frames will be presented by the decoder in this
- * resolution, independent of any spatial resampling the encoder may do.
- */
- unsigned int g_w;
-
-
- /*!\brief Height of the frame
- *
- * This value identifies the presentation resolution of the frame,
- * in pixels. Note that the frames passed as input to the encoder must
- * have this resolution. Frames will be presented by the decoder in this
- * resolution, independent of any spatial resampling the encoder may do.
- */
- unsigned int g_h;
-
-
- /*!\brief Stream timebase units
- *
- * Indicates the smallest interval of time, in seconds, used by the stream.
- * For fixed frame rate material, or variable frame rate material where
- * frames are timed at a multiple of a given clock (ex: video capture),
- * the \ref RECOMMENDED method is to set the timebase to the reciprocal
- * of the frame rate (ex: 1001/30000 for 29.970 Hz NTSC). This allows the
- * pts to correspond to the frame number, which can be handy. For
- * re-encoding video from containers with absolute time timestamps, the
- * \ref RECOMMENDED method is to set the timebase to that of the parent
- * container or multimedia framework (ex: 1/1000 for ms, as in FLV).
- */
- struct vpx_rational g_timebase;
-
-
- /*!\brief Enable error resilient modes.
- *
- * The error resilient bitfield indicates to the encoder which features
- * it should enable to take measures for streaming over lossy or noisy
- * links.
- */
- vpx_codec_er_flags_t g_error_resilient;
-
-
- /*!\brief Multi-pass Encoding Mode
- *
- * This value should be set to the current phase for multi-pass encoding.
- * For single pass, set to #VPX_RC_ONE_PASS.
- */
- enum vpx_enc_pass g_pass;
-
-
- /*!\brief Allow lagged encoding
- *
- * If set, this value allows the encoder to consume a number of input
- * frames before producing output frames. This allows the encoder to
- * base decisions for the current frame on future frames. This does
- * increase the latency of the encoding pipeline, so it is not appropriate
- * in all situations (ex: realtime encoding).
- *
- * Note that this is a maximum value -- the encoder may produce frames
- * sooner than the given limit. Set this value to 0 to disable this
- * feature.
- */
- unsigned int g_lag_in_frames;
-
-
- /*
- * rate control settings (rc)
- */
-
- /*!\brief Temporal resampling configuration, if supported by the codec.
- *
- * Temporal resampling allows the codec to "drop" frames as a strategy to
- * meet its target data rate. This can cause temporal discontinuities in
- * the encoded video, which may appear as stuttering during playback. This
- * trade-off is often acceptable, but for many applications is not. It can
- * be disabled in these cases.
- *
- * Note that not all codecs support this feature. All vpx VPx codecs do.
- * For other codecs, consult the documentation for that algorithm.
- *
- * This threshold is described as a percentage of the target data buffer.
- * When the data buffer falls below this percentage of fullness, a
- * dropped frame is indicated. Set the threshold to zero (0) to disable
- * this feature.
- */
- unsigned int rc_dropframe_thresh;
-
-
- /*!\brief Enable/disable spatial resampling, if supported by the codec.
- *
- * Spatial resampling allows the codec to compress a lower resolution
- * version of the frame, which is then upscaled by the encoder to the
- * correct presentation resolution. This increases visual quality at
- * low data rates, at the expense of CPU time on the encoder/decoder.
- */
- unsigned int rc_resize_allowed;
-
-
- /*!\brief Spatial resampling up watermark.
- *
- * This threshold is described as a percentage of the target data buffer.
- * When the data buffer rises above this percentage of fullness, the
- * encoder will step up to a higher resolution version of the frame.
- */
- unsigned int rc_resize_up_thresh;
-
-
- /*!\brief Spatial resampling down watermark.
- *
- * This threshold is described as a percentage of the target data buffer.
- * When the data buffer falls below this percentage of fullness, the
- * encoder will step down to a lower resolution version of the frame.
- */
- unsigned int rc_resize_down_thresh;
-
-
- /*!\brief Rate control algorithm to use.
- *
- * Indicates whether the end usage of this stream is to be streamed over
- * a bandwidth constrained link, indicating that Constant Bit Rate (CBR)
- * mode should be used, or whether it will be played back on a high
- * bandwidth link, as from a local disk, where higher variations in
- * bitrate are acceptable.
- */
- enum vpx_rc_mode rc_end_usage;
-
-
- /*!\brief Two-pass stats buffer.
- *
- * A buffer containing all of the stats packets produced in the first
- * pass, concatenated.
- */
- struct vpx_fixed_buf rc_twopass_stats_in;
-
-
- /*!\brief Target data rate
- *
- * Target bandwidth to use for this stream, in kilobits per second.
- */
- unsigned int rc_target_bitrate;
-
-
- /*
- * quantizer settings
- */
-
-
- /*!\brief Minimum (Best Quality) Quantizer
- *
- * The quantizer is the most direct control over the quality of the
- * encoded image. The range of valid values for the quantizer is codec
- * specific. Consult the documentation for the codec to determine the
- * values to use. To determine the range programmatically, call
- * vpx_codec_enc_config_default() with a usage value of 0.
- */
- unsigned int rc_min_quantizer;
-
-
- /*!\brief Maximum (Worst Quality) Quantizer
- *
- * The quantizer is the most direct control over the quality of the
- * encoded image. The range of valid values for the quantizer is codec
- * specific. Consult the documentation for the codec to determine the
- * values to use. To determine the range programmatically, call
- * vpx_codec_enc_config_default() with a usage value of 0.
- */
- unsigned int rc_max_quantizer;
-
-
- /*
- * bitrate tolerance
- */
-
-
- /*!\brief Rate control adaptation undershoot control
- *
- * This value, expressed as a percentage of the target bitrate,
- * controls the maximum allowed adaptation speed of the codec.
- * This factor controls the maximum amount of bits that can
- * be subtracted from the target bitrate in order to compensate
- * for prior overshoot.
- *
- * Valid values in the range 0-1000.
- */
- unsigned int rc_undershoot_pct;
-
-
- /*!\brief Rate control adaptation overshoot control
- *
- * This value, expressed as a percentage of the target bitrate,
- * controls the maximum allowed adaptation speed of the codec.
- * This factor controls the maximum amount of bits that can
- * be added to the target bitrate in order to compensate for
- * prior undershoot.
- *
- * Valid values in the range 0-1000.
- */
- unsigned int rc_overshoot_pct;
-
-
- /*
- * decoder buffer model parameters
- */
-
-
- /*!\brief Decoder Buffer Size
- *
- * This value indicates the amount of data that may be buffered by the
- * decoding application. Note that this value is expressed in units of
- * time (milliseconds). For example, a value of 5000 indicates that the
- * client will buffer (at least) 5000ms worth of encoded data. Use the
- * target bitrate (#rc_target_bitrate) to convert to bits/bytes, if
- * necessary.
- */
- unsigned int rc_buf_sz;
-
-
- /*!\brief Decoder Buffer Initial Size
- *
- * This value indicates the amount of data that will be buffered by the
- * decoding application prior to beginning playback. This value is
- * expressed in units of time (milliseconds). Use the target bitrate
- * (#rc_target_bitrate) to convert to bits/bytes, if necessary.
- */
- unsigned int rc_buf_initial_sz;
-
-
- /*!\brief Decoder Buffer Optimal Size
- *
- * This value indicates the amount of data that the encoder should try
- * to maintain in the decoder's buffer. This value is expressed in units
- * of time (milliseconds). Use the target bitrate (#rc_target_bitrate)
- * to convert to bits/bytes, if necessary.
- */
- unsigned int rc_buf_optimal_sz;
-
-
- /*
- * 2 pass rate control parameters
- */
-
-
- /*!\brief Two-pass mode CBR/VBR bias
- *
- * Bias, expressed on a scale of 0 to 100, for determining target size
- * for the current frame. The value 0 indicates the optimal CBR mode
- * value should be used. The value 100 indicates the optimal VBR mode
- * value should be used. Values in between indicate which way the
- * encoder should "lean."
- */
- unsigned int rc_2pass_vbr_bias_pct; /**< RC mode bias between CBR and VBR(0-100: 0->CBR, 100->VBR) */
-
-
- /*!\brief Two-pass mode per-GOP minimum bitrate
- *
- * This value, expressed as a percentage of the target bitrate, indicates
- * the minimum bitrate to be used for a single GOP (aka "section")
- */
- unsigned int rc_2pass_vbr_minsection_pct;
-
-
- /*!\brief Two-pass mode per-GOP maximum bitrate
- *
- * This value, expressed as a percentage of the target bitrate, indicates
- * the maximum bitrate to be used for a single GOP (aka "section")
- */
- unsigned int rc_2pass_vbr_maxsection_pct;
-
-
- /*
- * keyframing settings (kf)
- */
-
- /*!\brief Keyframe placement mode
- *
- * This value indicates whether the encoder should place keyframes at a
- * fixed interval, or determine the optimal placement automatically
- * (as governed by the #kf_min_dist and #kf_max_dist parameters)
- */
- enum vpx_kf_mode kf_mode;
-
-
- /*!\brief Keyframe minimum interval
- *
- * This value, expressed as a number of frames, prevents the encoder from
- * placing a keyframe nearer than kf_min_dist to the previous keyframe. At
- * least kf_min_dist frames non-keyframes will be coded before the next
- * keyframe. Set kf_min_dist equal to kf_max_dist for a fixed interval.
- */
- unsigned int kf_min_dist;
-
-
- /*!\brief Keyframe maximum interval
- *
- * This value, expressed as a number of frames, forces the encoder to code
- * a keyframe if one has not been coded in the last kf_max_dist frames.
- * A value of 0 implies all frames will be keyframes. Set kf_min_dist
- * equal to kf_max_dist for a fixed interval.
- */
- unsigned int kf_max_dist;
-
- /*
- * Temporal scalability settings (ts)
- */
-
- /*!\brief Number of coding layers
- *
- * This value specifies the number of coding layers to be used.
- */
- unsigned int ts_number_layers;
-
- /*!\brief Target bitrate for each layer
- *
- * These values specify the target coding bitrate for each coding layer.
- */
- unsigned int ts_target_bitrate[VPX_TS_MAX_LAYERS];
-
- /*!\brief Frame rate decimation factor for each layer
- *
- * These values specify the frame rate decimation factors to apply
- * to each layer.
- */
- unsigned int ts_rate_decimator[VPX_TS_MAX_LAYERS];
-
- /*!\brief Length of the sequence defining frame layer membership
- *
- * This value specifies the length of the sequence that defines the
- * membership of frames to layers. For example, if ts_periodicity=8 then
- * frames are assigned to coding layers with a repeated sequence of
- * length 8.
- */
- unsigned int ts_periodicity;
-
- /*!\brief Template defining the membership of frames to coding layers
- *
- * This array defines the membership of frames to coding layers. For a
- * 2-layer encoding that assigns even numbered frames to one layer (0)
- * and odd numbered frames to a second layer (1) with ts_periodicity=8,
- * then ts_layer_id = (0,1,0,1,0,1,0,1).
- */
- unsigned int ts_layer_id[VPX_TS_MAX_PERIODICITY];
- } vpx_codec_enc_cfg_t; /**< alias for struct vpx_codec_enc_cfg */
-
-
- /*!\brief Initialize an encoder instance
+ /*!\brief Algorithm specific "usage" value
*
- * Initializes a encoder context using the given interface. Applications
- * should call the vpx_codec_enc_init convenience macro instead of this
- * function directly, to ensure that the ABI version number parameter
- * is properly initialized.
- *
- * If the library was configured with --disable-multithread, this call
- * is not thread safe and should be guarded with a lock if being used
- * in a multithreaded context.
- *
- * In XMA mode (activated by setting VPX_CODEC_USE_XMA in the flags
- * parameter), the storage pointed to by the cfg parameter must be
- * kept readable and stable until all memory maps have been set.
- *
- * \param[in] ctx Pointer to this instance's context.
- * \param[in] iface Pointer to the algorithm interface to use.
- * \param[in] cfg Configuration to use, if known. May be NULL.
- * \param[in] flags Bitfield of VPX_CODEC_USE_* flags
- * \param[in] ver ABI version number. Must be set to
- * VPX_ENCODER_ABI_VERSION
- * \retval #VPX_CODEC_OK
- * The decoder algorithm initialized.
- * \retval #VPX_CODEC_MEM_ERROR
- * Memory allocation failed.
+ * Algorithms may define multiple values for usage, which may convey the
+ * intent of how the application intends to use the stream. If this value
+ * is non-zero, consult the documentation for the codec to determine its
+ * meaning.
*/
- vpx_codec_err_t vpx_codec_enc_init_ver(vpx_codec_ctx_t *ctx,
- vpx_codec_iface_t *iface,
- vpx_codec_enc_cfg_t *cfg,
- vpx_codec_flags_t flags,
- int ver);
+ unsigned int g_usage;
- /*!\brief Convenience macro for vpx_codec_enc_init_ver()
+ /*!\brief Maximum number of threads to use
*
- * Ensures the ABI version parameter is properly set.
+ * For multi-threaded implementations, use no more than this number of
+ * threads. The codec may use fewer threads than allowed. The value
+ * 0 is equivalent to the value 1.
*/
+ unsigned int g_threads;
+
+
+ /*!\brief Bitstream profile to use
+ *
+ * Some codecs support a notion of multiple bitstream profiles. Typically
+ * this maps to a set of features that are turned on or off. Often the
+ * profile to use is determined by the features of the intended decoder.
+ * Consult the documentation for the codec to determine the valid values
+ * for this parameter, or set to zero for a sane default.
+ */
+ unsigned int g_profile; /**< profile of bitstream to use */
+
+
+
+ /*!\brief Width of the frame
+ *
+ * This value identifies the presentation resolution of the frame,
+ * in pixels. Note that the frames passed as input to the encoder must
+ * have this resolution. Frames will be presented by the decoder in this
+ * resolution, independent of any spatial resampling the encoder may do.
+ */
+ unsigned int g_w;
+
+
+ /*!\brief Height of the frame
+ *
+ * This value identifies the presentation resolution of the frame,
+ * in pixels. Note that the frames passed as input to the encoder must
+ * have this resolution. Frames will be presented by the decoder in this
+ * resolution, independent of any spatial resampling the encoder may do.
+ */
+ unsigned int g_h;
+
+
+ /*!\brief Stream timebase units
+ *
+ * Indicates the smallest interval of time, in seconds, used by the stream.
+ * For fixed frame rate material, or variable frame rate material where
+ * frames are timed at a multiple of a given clock (ex: video capture),
+ * the \ref RECOMMENDED method is to set the timebase to the reciprocal
+ * of the frame rate (ex: 1001/30000 for 29.970 Hz NTSC). This allows the
+ * pts to correspond to the frame number, which can be handy. For
+ * re-encoding video from containers with absolute time timestamps, the
+ * \ref RECOMMENDED method is to set the timebase to that of the parent
+ * container or multimedia framework (ex: 1/1000 for ms, as in FLV).
+ */
+ struct vpx_rational g_timebase;
+
+
+ /*!\brief Enable error resilient modes.
+ *
+ * The error resilient bitfield indicates to the encoder which features
+ * it should enable to take measures for streaming over lossy or noisy
+ * links.
+ */
+ vpx_codec_er_flags_t g_error_resilient;
+
+
+ /*!\brief Multi-pass Encoding Mode
+ *
+ * This value should be set to the current phase for multi-pass encoding.
+ * For single pass, set to #VPX_RC_ONE_PASS.
+ */
+ enum vpx_enc_pass g_pass;
+
+
+ /*!\brief Allow lagged encoding
+ *
+ * If set, this value allows the encoder to consume a number of input
+ * frames before producing output frames. This allows the encoder to
+ * base decisions for the current frame on future frames. This does
+ * increase the latency of the encoding pipeline, so it is not appropriate
+ * in all situations (ex: realtime encoding).
+ *
+ * Note that this is a maximum value -- the encoder may produce frames
+ * sooner than the given limit. Set this value to 0 to disable this
+ * feature.
+ */
+ unsigned int g_lag_in_frames;
+
+
+ /*
+ * rate control settings (rc)
+ */
+
+ /*!\brief Temporal resampling configuration, if supported by the codec.
+ *
+ * Temporal resampling allows the codec to "drop" frames as a strategy to
+ * meet its target data rate. This can cause temporal discontinuities in
+ * the encoded video, which may appear as stuttering during playback. This
+ * trade-off is often acceptable, but for many applications is not. It can
+ * be disabled in these cases.
+ *
+ * Note that not all codecs support this feature. All vpx VPx codecs do.
+ * For other codecs, consult the documentation for that algorithm.
+ *
+ * This threshold is described as a percentage of the target data buffer.
+ * When the data buffer falls below this percentage of fullness, a
+ * dropped frame is indicated. Set the threshold to zero (0) to disable
+ * this feature.
+ */
+ unsigned int rc_dropframe_thresh;
+
+
+ /*!\brief Enable/disable spatial resampling, if supported by the codec.
+ *
+ * Spatial resampling allows the codec to compress a lower resolution
+ * version of the frame, which is then upscaled by the encoder to the
+ * correct presentation resolution. This increases visual quality at
+ * low data rates, at the expense of CPU time on the encoder/decoder.
+ */
+ unsigned int rc_resize_allowed;
+
+
+ /*!\brief Spatial resampling up watermark.
+ *
+ * This threshold is described as a percentage of the target data buffer.
+ * When the data buffer rises above this percentage of fullness, the
+ * encoder will step up to a higher resolution version of the frame.
+ */
+ unsigned int rc_resize_up_thresh;
+
+
+ /*!\brief Spatial resampling down watermark.
+ *
+ * This threshold is described as a percentage of the target data buffer.
+ * When the data buffer falls below this percentage of fullness, the
+ * encoder will step down to a lower resolution version of the frame.
+ */
+ unsigned int rc_resize_down_thresh;
+
+
+ /*!\brief Rate control algorithm to use.
+ *
+ * Indicates whether the end usage of this stream is to be streamed over
+ * a bandwidth constrained link, indicating that Constant Bit Rate (CBR)
+ * mode should be used, or whether it will be played back on a high
+ * bandwidth link, as from a local disk, where higher variations in
+ * bitrate are acceptable.
+ */
+ enum vpx_rc_mode rc_end_usage;
+
+
+ /*!\brief Two-pass stats buffer.
+ *
+ * A buffer containing all of the stats packets produced in the first
+ * pass, concatenated.
+ */
+ struct vpx_fixed_buf rc_twopass_stats_in;
+
+
+ /*!\brief Target data rate
+ *
+ * Target bandwidth to use for this stream, in kilobits per second.
+ */
+ unsigned int rc_target_bitrate;
+
+
+ /*
+ * quantizer settings
+ */
+
+
+ /*!\brief Minimum (Best Quality) Quantizer
+ *
+ * The quantizer is the most direct control over the quality of the
+ * encoded image. The range of valid values for the quantizer is codec
+ * specific. Consult the documentation for the codec to determine the
+ * values to use. To determine the range programmatically, call
+ * vpx_codec_enc_config_default() with a usage value of 0.
+ */
+ unsigned int rc_min_quantizer;
+
+
+ /*!\brief Maximum (Worst Quality) Quantizer
+ *
+ * The quantizer is the most direct control over the quality of the
+ * encoded image. The range of valid values for the quantizer is codec
+ * specific. Consult the documentation for the codec to determine the
+ * values to use. To determine the range programmatically, call
+ * vpx_codec_enc_config_default() with a usage value of 0.
+ */
+ unsigned int rc_max_quantizer;
+
+
+ /*
+ * bitrate tolerance
+ */
+
+
+ /*!\brief Rate control adaptation undershoot control
+ *
+ * This value, expressed as a percentage of the target bitrate,
+ * controls the maximum allowed adaptation speed of the codec.
+ * This factor controls the maximum amount of bits that can
+ * be subtracted from the target bitrate in order to compensate
+ * for prior overshoot.
+ *
+ * Valid values in the range 0-1000.
+ */
+ unsigned int rc_undershoot_pct;
+
+
+ /*!\brief Rate control adaptation overshoot control
+ *
+ * This value, expressed as a percentage of the target bitrate,
+ * controls the maximum allowed adaptation speed of the codec.
+ * This factor controls the maximum amount of bits that can
+ * be added to the target bitrate in order to compensate for
+ * prior undershoot.
+ *
+ * Valid values in the range 0-1000.
+ */
+ unsigned int rc_overshoot_pct;
+
+
+ /*
+ * decoder buffer model parameters
+ */
+
+
+ /*!\brief Decoder Buffer Size
+ *
+ * This value indicates the amount of data that may be buffered by the
+ * decoding application. Note that this value is expressed in units of
+ * time (milliseconds). For example, a value of 5000 indicates that the
+ * client will buffer (at least) 5000ms worth of encoded data. Use the
+ * target bitrate (#rc_target_bitrate) to convert to bits/bytes, if
+ * necessary.
+ */
+ unsigned int rc_buf_sz;
+
+
+ /*!\brief Decoder Buffer Initial Size
+ *
+ * This value indicates the amount of data that will be buffered by the
+ * decoding application prior to beginning playback. This value is
+ * expressed in units of time (milliseconds). Use the target bitrate
+ * (#rc_target_bitrate) to convert to bits/bytes, if necessary.
+ */
+ unsigned int rc_buf_initial_sz;
+
+
+ /*!\brief Decoder Buffer Optimal Size
+ *
+ * This value indicates the amount of data that the encoder should try
+ * to maintain in the decoder's buffer. This value is expressed in units
+ * of time (milliseconds). Use the target bitrate (#rc_target_bitrate)
+ * to convert to bits/bytes, if necessary.
+ */
+ unsigned int rc_buf_optimal_sz;
+
+
+ /*
+ * 2 pass rate control parameters
+ */
+
+
+ /*!\brief Two-pass mode CBR/VBR bias
+ *
+ * Bias, expressed on a scale of 0 to 100, for determining target size
+ * for the current frame. The value 0 indicates the optimal CBR mode
+ * value should be used. The value 100 indicates the optimal VBR mode
+ * value should be used. Values in between indicate which way the
+ * encoder should "lean."
+ */
+ unsigned int rc_2pass_vbr_bias_pct; /**< RC mode bias between CBR and VBR(0-100: 0->CBR, 100->VBR) */
+
+
+ /*!\brief Two-pass mode per-GOP minimum bitrate
+ *
+ * This value, expressed as a percentage of the target bitrate, indicates
+ * the minimum bitrate to be used for a single GOP (aka "section")
+ */
+ unsigned int rc_2pass_vbr_minsection_pct;
+
+
+ /*!\brief Two-pass mode per-GOP maximum bitrate
+ *
+ * This value, expressed as a percentage of the target bitrate, indicates
+ * the maximum bitrate to be used for a single GOP (aka "section")
+ */
+ unsigned int rc_2pass_vbr_maxsection_pct;
+
+
+ /*
+ * keyframing settings (kf)
+ */
+
+ /*!\brief Keyframe placement mode
+ *
+ * This value indicates whether the encoder should place keyframes at a
+ * fixed interval, or determine the optimal placement automatically
+ * (as governed by the #kf_min_dist and #kf_max_dist parameters)
+ */
+ enum vpx_kf_mode kf_mode;
+
+
+ /*!\brief Keyframe minimum interval
+ *
+ * This value, expressed as a number of frames, prevents the encoder from
+ * placing a keyframe nearer than kf_min_dist to the previous keyframe. At
+ * least kf_min_dist frames non-keyframes will be coded before the next
+ * keyframe. Set kf_min_dist equal to kf_max_dist for a fixed interval.
+ */
+ unsigned int kf_min_dist;
+
+
+ /*!\brief Keyframe maximum interval
+ *
+ * This value, expressed as a number of frames, forces the encoder to code
+ * a keyframe if one has not been coded in the last kf_max_dist frames.
+ * A value of 0 implies all frames will be keyframes. Set kf_min_dist
+ * equal to kf_max_dist for a fixed interval.
+ */
+ unsigned int kf_max_dist;
+
+ /*
+ * Temporal scalability settings (ts)
+ */
+
+ /*!\brief Number of coding layers
+ *
+ * This value specifies the number of coding layers to be used.
+ */
+ unsigned int ts_number_layers;
+
+ /*!\brief Target bitrate for each layer
+ *
+ * These values specify the target coding bitrate for each coding layer.
+ */
+ unsigned int ts_target_bitrate[VPX_TS_MAX_LAYERS];
+
+ /*!\brief Frame rate decimation factor for each layer
+ *
+ * These values specify the frame rate decimation factors to apply
+ * to each layer.
+ */
+ unsigned int ts_rate_decimator[VPX_TS_MAX_LAYERS];
+
+ /*!\brief Length of the sequence defining frame layer membership
+ *
+ * This value specifies the length of the sequence that defines the
+ * membership of frames to layers. For example, if ts_periodicity=8 then
+ * frames are assigned to coding layers with a repeated sequence of
+ * length 8.
+ */
+ unsigned int ts_periodicity;
+
+ /*!\brief Template defining the membership of frames to coding layers
+ *
+ * This array defines the membership of frames to coding layers. For a
+ * 2-layer encoding that assigns even numbered frames to one layer (0)
+ * and odd numbered frames to a second layer (1) with ts_periodicity=8,
+ * then ts_layer_id = (0,1,0,1,0,1,0,1).
+ */
+ unsigned int ts_layer_id[VPX_TS_MAX_PERIODICITY];
+ } vpx_codec_enc_cfg_t; /**< alias for struct vpx_codec_enc_cfg */
+
+
+ /*!\brief Initialize an encoder instance
+ *
+ * Initializes a encoder context using the given interface. Applications
+ * should call the vpx_codec_enc_init convenience macro instead of this
+ * function directly, to ensure that the ABI version number parameter
+ * is properly initialized.
+ *
+ * If the library was configured with --disable-multithread, this call
+ * is not thread safe and should be guarded with a lock if being used
+ * in a multithreaded context.
+ *
+ * In XMA mode (activated by setting VPX_CODEC_USE_XMA in the flags
+ * parameter), the storage pointed to by the cfg parameter must be
+ * kept readable and stable until all memory maps have been set.
+ *
+ * \param[in] ctx Pointer to this instance's context.
+ * \param[in] iface Pointer to the algorithm interface to use.
+ * \param[in] cfg Configuration to use, if known. May be NULL.
+ * \param[in] flags Bitfield of VPX_CODEC_USE_* flags
+ * \param[in] ver ABI version number. Must be set to
+ * VPX_ENCODER_ABI_VERSION
+ * \retval #VPX_CODEC_OK
+ * The decoder algorithm initialized.
+ * \retval #VPX_CODEC_MEM_ERROR
+ * Memory allocation failed.
+ */
+ vpx_codec_err_t vpx_codec_enc_init_ver(vpx_codec_ctx_t *ctx,
+ vpx_codec_iface_t *iface,
+ vpx_codec_enc_cfg_t *cfg,
+ vpx_codec_flags_t flags,
+ int ver);
+
+
+ /*!\brief Convenience macro for vpx_codec_enc_init_ver()
+ *
+ * Ensures the ABI version parameter is properly set.
+ */
#define vpx_codec_enc_init(ctx, iface, cfg, flags) \
- vpx_codec_enc_init_ver(ctx, iface, cfg, flags, VPX_ENCODER_ABI_VERSION)
+ vpx_codec_enc_init_ver(ctx, iface, cfg, flags, VPX_ENCODER_ABI_VERSION)
- /*!\brief Initialize multi-encoder instance
- *
- * Initializes multi-encoder context using the given interface.
- * Applications should call the vpx_codec_enc_init_multi convenience macro
- * instead of this function directly, to ensure that the ABI version number
- * parameter is properly initialized.
- *
- * In XMA mode (activated by setting VPX_CODEC_USE_XMA in the flags
- * parameter), the storage pointed to by the cfg parameter must be
- * kept readable and stable until all memory maps have been set.
- *
- * \param[in] ctx Pointer to this instance's context.
- * \param[in] iface Pointer to the algorithm interface to use.
- * \param[in] cfg Configuration to use, if known. May be NULL.
- * \param[in] num_enc Total number of encoders.
- * \param[in] flags Bitfield of VPX_CODEC_USE_* flags
- * \param[in] dsf Pointer to down-sampling factors.
- * \param[in] ver ABI version number. Must be set to
- * VPX_ENCODER_ABI_VERSION
- * \retval #VPX_CODEC_OK
- * The decoder algorithm initialized.
- * \retval #VPX_CODEC_MEM_ERROR
- * Memory allocation failed.
- */
- vpx_codec_err_t vpx_codec_enc_init_multi_ver(vpx_codec_ctx_t *ctx,
- vpx_codec_iface_t *iface,
- vpx_codec_enc_cfg_t *cfg,
- int num_enc,
- vpx_codec_flags_t flags,
- vpx_rational_t *dsf,
- int ver);
+ /*!\brief Initialize multi-encoder instance
+ *
+ * Initializes multi-encoder context using the given interface.
+ * Applications should call the vpx_codec_enc_init_multi convenience macro
+ * instead of this function directly, to ensure that the ABI version number
+ * parameter is properly initialized.
+ *
+ * In XMA mode (activated by setting VPX_CODEC_USE_XMA in the flags
+ * parameter), the storage pointed to by the cfg parameter must be
+ * kept readable and stable until all memory maps have been set.
+ *
+ * \param[in] ctx Pointer to this instance's context.
+ * \param[in] iface Pointer to the algorithm interface to use.
+ * \param[in] cfg Configuration to use, if known. May be NULL.
+ * \param[in] num_enc Total number of encoders.
+ * \param[in] flags Bitfield of VPX_CODEC_USE_* flags
+ * \param[in] dsf Pointer to down-sampling factors.
+ * \param[in] ver ABI version number. Must be set to
+ * VPX_ENCODER_ABI_VERSION
+ * \retval #VPX_CODEC_OK
+ * The decoder algorithm initialized.
+ * \retval #VPX_CODEC_MEM_ERROR
+ * Memory allocation failed.
+ */
+ vpx_codec_err_t vpx_codec_enc_init_multi_ver(vpx_codec_ctx_t *ctx,
+ vpx_codec_iface_t *iface,
+ vpx_codec_enc_cfg_t *cfg,
+ int num_enc,
+ vpx_codec_flags_t flags,
+ vpx_rational_t *dsf,
+ int ver);
- /*!\brief Convenience macro for vpx_codec_enc_init_multi_ver()
- *
- * Ensures the ABI version parameter is properly set.
- */
+ /*!\brief Convenience macro for vpx_codec_enc_init_multi_ver()
+ *
+ * Ensures the ABI version parameter is properly set.
+ */
#define vpx_codec_enc_init_multi(ctx, iface, cfg, num_enc, flags, dsf) \
- vpx_codec_enc_init_multi_ver(ctx, iface, cfg, num_enc, flags, dsf, \
- VPX_ENCODER_ABI_VERSION)
+ vpx_codec_enc_init_multi_ver(ctx, iface, cfg, num_enc, flags, dsf, \
+ VPX_ENCODER_ABI_VERSION)
- /*!\brief Get a default configuration
- *
- * Initializes a encoder configuration structure with default values. Supports
- * the notion of "usages" so that an algorithm may offer different default
- * settings depending on the user's intended goal. This function \ref SHOULD
- * be called by all applications to initialize the configuration structure
- * before specializing the configuration with application specific values.
- *
- * \param[in] iface Pointer to the algorithm interface to use.
- * \param[out] cfg Configuration buffer to populate
- * \param[in] usage End usage. Set to 0 or use codec specific values.
- *
- * \retval #VPX_CODEC_OK
- * The configuration was populated.
- * \retval #VPX_CODEC_INCAPABLE
- * Interface is not an encoder interface.
- * \retval #VPX_CODEC_INVALID_PARAM
- * A parameter was NULL, or the usage value was not recognized.
- */
- vpx_codec_err_t vpx_codec_enc_config_default(vpx_codec_iface_t *iface,
- vpx_codec_enc_cfg_t *cfg,
- unsigned int usage);
+ /*!\brief Get a default configuration
+ *
+ * Initializes a encoder configuration structure with default values. Supports
+ * the notion of "usages" so that an algorithm may offer different default
+ * settings depending on the user's intended goal. This function \ref SHOULD
+ * be called by all applications to initialize the configuration structure
+ * before specializing the configuration with application specific values.
+ *
+ * \param[in] iface Pointer to the algorithm interface to use.
+ * \param[out] cfg Configuration buffer to populate
+ * \param[in] usage End usage. Set to 0 or use codec specific values.
+ *
+ * \retval #VPX_CODEC_OK
+ * The configuration was populated.
+ * \retval #VPX_CODEC_INCAPABLE
+ * Interface is not an encoder interface.
+ * \retval #VPX_CODEC_INVALID_PARAM
+ * A parameter was NULL, or the usage value was not recognized.
+ */
+ vpx_codec_err_t vpx_codec_enc_config_default(vpx_codec_iface_t *iface,
+ vpx_codec_enc_cfg_t *cfg,
+ unsigned int usage);
- /*!\brief Set or change configuration
- *
- * Reconfigures an encoder instance according to the given configuration.
- *
- * \param[in] ctx Pointer to this instance's context
- * \param[in] cfg Configuration buffer to use
- *
- * \retval #VPX_CODEC_OK
- * The configuration was populated.
- * \retval #VPX_CODEC_INCAPABLE
- * Interface is not an encoder interface.
- * \retval #VPX_CODEC_INVALID_PARAM
- * A parameter was NULL, or the usage value was not recognized.
- */
- vpx_codec_err_t vpx_codec_enc_config_set(vpx_codec_ctx_t *ctx,
- const vpx_codec_enc_cfg_t *cfg);
+ /*!\brief Set or change configuration
+ *
+ * Reconfigures an encoder instance according to the given configuration.
+ *
+ * \param[in] ctx Pointer to this instance's context
+ * \param[in] cfg Configuration buffer to use
+ *
+ * \retval #VPX_CODEC_OK
+ * The configuration was populated.
+ * \retval #VPX_CODEC_INCAPABLE
+ * Interface is not an encoder interface.
+ * \retval #VPX_CODEC_INVALID_PARAM
+ * A parameter was NULL, or the usage value was not recognized.
+ */
+ vpx_codec_err_t vpx_codec_enc_config_set(vpx_codec_ctx_t *ctx,
+ const vpx_codec_enc_cfg_t *cfg);
- /*!\brief Get global stream headers
- *
- * Retrieves a stream level global header packet, if supported by the codec.
- *
- * \param[in] ctx Pointer to this instance's context
- *
- * \retval NULL
- * Encoder does not support global header
- * \retval Non-NULL
- * Pointer to buffer containing global header packet
- */
- vpx_fixed_buf_t *vpx_codec_get_global_headers(vpx_codec_ctx_t *ctx);
+ /*!\brief Get global stream headers
+ *
+ * Retrieves a stream level global header packet, if supported by the codec.
+ *
+ * \param[in] ctx Pointer to this instance's context
+ *
+ * \retval NULL
+ * Encoder does not support global header
+ * \retval Non-NULL
+ * Pointer to buffer containing global header packet
+ */
+ vpx_fixed_buf_t *vpx_codec_get_global_headers(vpx_codec_ctx_t *ctx);
#define VPX_DL_REALTIME (1) /**< deadline parameter analogous to
- * VPx REALTIME mode. */
+ * VPx REALTIME mode. */
#define VPX_DL_GOOD_QUALITY (1000000) /**< deadline parameter analogous to
- * VPx GOOD QUALITY mode. */
+ * VPx GOOD QUALITY mode. */
#define VPX_DL_BEST_QUALITY (0) /**< deadline parameter analogous to
- * VPx BEST QUALITY mode. */
- /*!\brief Encode a frame
- *
- * Encodes a video frame at the given "presentation time." The presentation
- * time stamp (PTS) \ref MUST be strictly increasing.
- *
- * The encoder supports the notion of a soft real-time deadline. Given a
- * non-zero value to the deadline parameter, the encoder will make a "best
- * effort" guarantee to return before the given time slice expires. It is
- * implicit that limiting the available time to encode will degrade the
- * output quality. The encoder can be given an unlimited time to produce the
- * best possible frame by specifying a deadline of '0'. This deadline
- * supercedes the VPx notion of "best quality, good quality, realtime".
- * Applications that wish to map these former settings to the new deadline
- * based system can use the symbols #VPX_DL_REALTIME, #VPX_DL_GOOD_QUALITY,
- * and #VPX_DL_BEST_QUALITY.
- *
- * When the last frame has been passed to the encoder, this function should
- * continue to be called, with the img parameter set to NULL. This will
- * signal the end-of-stream condition to the encoder and allow it to encode
- * any held buffers. Encoding is complete when vpx_codec_encode() is called
- * and vpx_codec_get_cx_data() returns no data.
- *
- * \param[in] ctx Pointer to this instance's context
- * \param[in] img Image data to encode, NULL to flush.
- * \param[in] pts Presentation time stamp, in timebase units.
- * \param[in] duration Duration to show frame, in timebase units.
- * \param[in] flags Flags to use for encoding this frame.
- * \param[in] deadline Time to spend encoding, in microseconds. (0=infinite)
- *
- * \retval #VPX_CODEC_OK
- * The configuration was populated.
- * \retval #VPX_CODEC_INCAPABLE
- * Interface is not an encoder interface.
- * \retval #VPX_CODEC_INVALID_PARAM
- * A parameter was NULL, the image format is unsupported, etc.
- */
- vpx_codec_err_t vpx_codec_encode(vpx_codec_ctx_t *ctx,
- const vpx_image_t *img,
- vpx_codec_pts_t pts,
- unsigned long duration,
- vpx_enc_frame_flags_t flags,
- unsigned long deadline);
+ * VPx BEST QUALITY mode. */
+ /*!\brief Encode a frame
+ *
+ * Encodes a video frame at the given "presentation time." The presentation
+ * time stamp (PTS) \ref MUST be strictly increasing.
+ *
+ * The encoder supports the notion of a soft real-time deadline. Given a
+ * non-zero value to the deadline parameter, the encoder will make a "best
+ * effort" guarantee to return before the given time slice expires. It is
+ * implicit that limiting the available time to encode will degrade the
+ * output quality. The encoder can be given an unlimited time to produce the
+ * best possible frame by specifying a deadline of '0'. This deadline
+ * supercedes the VPx notion of "best quality, good quality, realtime".
+ * Applications that wish to map these former settings to the new deadline
+ * based system can use the symbols #VPX_DL_REALTIME, #VPX_DL_GOOD_QUALITY,
+ * and #VPX_DL_BEST_QUALITY.
+ *
+ * When the last frame has been passed to the encoder, this function should
+ * continue to be called, with the img parameter set to NULL. This will
+ * signal the end-of-stream condition to the encoder and allow it to encode
+ * any held buffers. Encoding is complete when vpx_codec_encode() is called
+ * and vpx_codec_get_cx_data() returns no data.
+ *
+ * \param[in] ctx Pointer to this instance's context
+ * \param[in] img Image data to encode, NULL to flush.
+ * \param[in] pts Presentation time stamp, in timebase units.
+ * \param[in] duration Duration to show frame, in timebase units.
+ * \param[in] flags Flags to use for encoding this frame.
+ * \param[in] deadline Time to spend encoding, in microseconds. (0=infinite)
+ *
+ * \retval #VPX_CODEC_OK
+ * The configuration was populated.
+ * \retval #VPX_CODEC_INCAPABLE
+ * Interface is not an encoder interface.
+ * \retval #VPX_CODEC_INVALID_PARAM
+ * A parameter was NULL, the image format is unsupported, etc.
+ */
+ vpx_codec_err_t vpx_codec_encode(vpx_codec_ctx_t *ctx,
+ const vpx_image_t *img,
+ vpx_codec_pts_t pts,
+ unsigned long duration,
+ vpx_enc_frame_flags_t flags,
+ unsigned long deadline);
- /*!\brief Set compressed data output buffer
- *
- * Sets the buffer that the codec should output the compressed data
- * into. This call effectively sets the buffer pointer returned in the
- * next VPX_CODEC_CX_FRAME_PKT packet. Subsequent packets will be
- * appended into this buffer. The buffer is preserved across frames,
- * so applications must periodically call this function after flushing
- * the accumulated compressed data to disk or to the network to reset
- * the pointer to the buffer's head.
- *
- * `pad_before` bytes will be skipped before writing the compressed
- * data, and `pad_after` bytes will be appended to the packet. The size
- * of the packet will be the sum of the size of the actual compressed
- * data, pad_before, and pad_after. The padding bytes will be preserved
- * (not overwritten).
- *
- * Note that calling this function does not guarantee that the returned
- * compressed data will be placed into the specified buffer. In the
- * event that the encoded data will not fit into the buffer provided,
- * the returned packet \ref MAY point to an internal buffer, as it would
- * if this call were never used. In this event, the output packet will
- * NOT have any padding, and the application must free space and copy it
- * to the proper place. This is of particular note in configurations
- * that may output multiple packets for a single encoded frame (e.g., lagged
- * encoding) or if the application does not reset the buffer periodically.
- *
- * Applications may restore the default behavior of the codec providing
- * the compressed data buffer by calling this function with a NULL
- * buffer.
- *
- * Applications \ref MUSTNOT call this function during iteration of
- * vpx_codec_get_cx_data().
- *
- * \param[in] ctx Pointer to this instance's context
- * \param[in] buf Buffer to store compressed data into
- * \param[in] pad_before Bytes to skip before writing compressed data
- * \param[in] pad_after Bytes to skip after writing compressed data
- *
- * \retval #VPX_CODEC_OK
- * The buffer was set successfully.
- * \retval #VPX_CODEC_INVALID_PARAM
- * A parameter was NULL, the image format is unsupported, etc.
- */
- vpx_codec_err_t vpx_codec_set_cx_data_buf(vpx_codec_ctx_t *ctx,
- const vpx_fixed_buf_t *buf,
- unsigned int pad_before,
- unsigned int pad_after);
+ /*!\brief Set compressed data output buffer
+ *
+ * Sets the buffer that the codec should output the compressed data
+ * into. This call effectively sets the buffer pointer returned in the
+ * next VPX_CODEC_CX_FRAME_PKT packet. Subsequent packets will be
+ * appended into this buffer. The buffer is preserved across frames,
+ * so applications must periodically call this function after flushing
+ * the accumulated compressed data to disk or to the network to reset
+ * the pointer to the buffer's head.
+ *
+ * `pad_before` bytes will be skipped before writing the compressed
+ * data, and `pad_after` bytes will be appended to the packet. The size
+ * of the packet will be the sum of the size of the actual compressed
+ * data, pad_before, and pad_after. The padding bytes will be preserved
+ * (not overwritten).
+ *
+ * Note that calling this function does not guarantee that the returned
+ * compressed data will be placed into the specified buffer. In the
+ * event that the encoded data will not fit into the buffer provided,
+ * the returned packet \ref MAY point to an internal buffer, as it would
+ * if this call were never used. In this event, the output packet will
+ * NOT have any padding, and the application must free space and copy it
+ * to the proper place. This is of particular note in configurations
+ * that may output multiple packets for a single encoded frame (e.g., lagged
+ * encoding) or if the application does not reset the buffer periodically.
+ *
+ * Applications may restore the default behavior of the codec providing
+ * the compressed data buffer by calling this function with a NULL
+ * buffer.
+ *
+ * Applications \ref MUSTNOT call this function during iteration of
+ * vpx_codec_get_cx_data().
+ *
+ * \param[in] ctx Pointer to this instance's context
+ * \param[in] buf Buffer to store compressed data into
+ * \param[in] pad_before Bytes to skip before writing compressed data
+ * \param[in] pad_after Bytes to skip after writing compressed data
+ *
+ * \retval #VPX_CODEC_OK
+ * The buffer was set successfully.
+ * \retval #VPX_CODEC_INVALID_PARAM
+ * A parameter was NULL, the image format is unsupported, etc.
+ */
+ vpx_codec_err_t vpx_codec_set_cx_data_buf(vpx_codec_ctx_t *ctx,
+ const vpx_fixed_buf_t *buf,
+ unsigned int pad_before,
+ unsigned int pad_after);
- /*!\brief Encoded data iterator
- *
- * Iterates over a list of data packets to be passed from the encoder to the
- * application. The different kinds of packets available are enumerated in
- * #vpx_codec_cx_pkt_kind.
- *
- * #VPX_CODEC_CX_FRAME_PKT packets should be passed to the application's
- * muxer. Multiple compressed frames may be in the list.
- * #VPX_CODEC_STATS_PKT packets should be appended to a global buffer.
- *
- * The application \ref MUST silently ignore any packet kinds that it does
- * not recognize or support.
- *
- * The data buffers returned from this function are only guaranteed to be
- * valid until the application makes another call to any vpx_codec_* function.
- *
- * \param[in] ctx Pointer to this instance's context
- * \param[in,out] iter Iterator storage, initialized to NULL
- *
- * \return Returns a pointer to an output data packet (compressed frame data,
- * two-pass statistics, etc.) or NULL to signal end-of-list.
- *
- */
- const vpx_codec_cx_pkt_t *vpx_codec_get_cx_data(vpx_codec_ctx_t *ctx,
- vpx_codec_iter_t *iter);
+ /*!\brief Encoded data iterator
+ *
+ * Iterates over a list of data packets to be passed from the encoder to the
+ * application. The different kinds of packets available are enumerated in
+ * #vpx_codec_cx_pkt_kind.
+ *
+ * #VPX_CODEC_CX_FRAME_PKT packets should be passed to the application's
+ * muxer. Multiple compressed frames may be in the list.
+ * #VPX_CODEC_STATS_PKT packets should be appended to a global buffer.
+ *
+ * The application \ref MUST silently ignore any packet kinds that it does
+ * not recognize or support.
+ *
+ * The data buffers returned from this function are only guaranteed to be
+ * valid until the application makes another call to any vpx_codec_* function.
+ *
+ * \param[in] ctx Pointer to this instance's context
+ * \param[in,out] iter Iterator storage, initialized to NULL
+ *
+ * \return Returns a pointer to an output data packet (compressed frame data,
+ * two-pass statistics, etc.) or NULL to signal end-of-list.
+ *
+ */
+ const vpx_codec_cx_pkt_t *vpx_codec_get_cx_data(vpx_codec_ctx_t *ctx,
+ vpx_codec_iter_t *iter);
- /*!\brief Get Preview Frame
- *
- * Returns an image that can be used as a preview. Shows the image as it would
- * exist at the decompressor. The application \ref MUST NOT write into this
- * image buffer.
- *
- * \param[in] ctx Pointer to this instance's context
- *
- * \return Returns a pointer to a preview image, or NULL if no image is
- * available.
- *
- */
- const vpx_image_t *vpx_codec_get_preview_frame(vpx_codec_ctx_t *ctx);
+ /*!\brief Get Preview Frame
+ *
+ * Returns an image that can be used as a preview. Shows the image as it would
+ * exist at the decompressor. The application \ref MUST NOT write into this
+ * image buffer.
+ *
+ * \param[in] ctx Pointer to this instance's context
+ *
+ * \return Returns a pointer to a preview image, or NULL if no image is
+ * available.
+ *
+ */
+ const vpx_image_t *vpx_codec_get_preview_frame(vpx_codec_ctx_t *ctx);
- /*!@} - end defgroup encoder*/
+ /*!@} - end defgroup encoder*/
#endif
#ifdef __cplusplus
diff --git a/vpx/vpx_image.h b/vpx/vpx_image.h
index 3e42447..809fa38 100644
--- a/vpx/vpx_image.h
+++ b/vpx/vpx_image.h
@@ -20,14 +20,14 @@
#ifndef VPX_IMAGE_H
#define VPX_IMAGE_H
- /*!\brief Current ABI version number
- *
- * \internal
- * If this file is altered in any way that changes the ABI, this value
- * must be bumped. Examples include, but are not limited to, changing
- * types, removing or reassigning enums, adding/removing/rearranging
- * fields to structures
- */
+ /*!\brief Current ABI version number
+ *
+ * \internal
+ * If this file is altered in any way that changes the ABI, this value
+ * must be bumped. Examples include, but are not limited to, changing
+ * types, removing or reassigning enums, adding/removing/rearranging
+ * fields to structures
+ */
#define VPX_IMAGE_ABI_VERSION (1) /**<\hideinitializer*/
@@ -36,41 +36,41 @@
#define VPX_IMG_FMT_HAS_ALPHA 0x400 /**< Image has an alpha channel component */
- /*!\brief List of supported image formats */
- typedef enum vpx_img_fmt {
- VPX_IMG_FMT_NONE,
- VPX_IMG_FMT_RGB24, /**< 24 bit per pixel packed RGB */
- VPX_IMG_FMT_RGB32, /**< 32 bit per pixel packed 0RGB */
- VPX_IMG_FMT_RGB565, /**< 16 bit per pixel, 565 */
- VPX_IMG_FMT_RGB555, /**< 16 bit per pixel, 555 */
- VPX_IMG_FMT_UYVY, /**< UYVY packed YUV */
- VPX_IMG_FMT_YUY2, /**< YUYV packed YUV */
- VPX_IMG_FMT_YVYU, /**< YVYU packed YUV */
- VPX_IMG_FMT_BGR24, /**< 24 bit per pixel packed BGR */
- VPX_IMG_FMT_RGB32_LE, /**< 32 bit packed BGR0 */
- VPX_IMG_FMT_ARGB, /**< 32 bit packed ARGB, alpha=255 */
- VPX_IMG_FMT_ARGB_LE, /**< 32 bit packed BGRA, alpha=255 */
- VPX_IMG_FMT_RGB565_LE, /**< 16 bit per pixel, gggbbbbb rrrrrggg */
- VPX_IMG_FMT_RGB555_LE, /**< 16 bit per pixel, gggbbbbb 0rrrrrgg */
- VPX_IMG_FMT_YV12 = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 1, /**< planar YVU */
- VPX_IMG_FMT_I420 = VPX_IMG_FMT_PLANAR | 2,
- VPX_IMG_FMT_VPXYV12 = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 3, /** < planar 4:2:0 format with vpx color space */
- VPX_IMG_FMT_VPXI420 = VPX_IMG_FMT_PLANAR | 4 /** < planar 4:2:0 format with vpx color space */
- }
- vpx_img_fmt_t; /**< alias for enum vpx_img_fmt */
+ /*!\brief List of supported image formats */
+ typedef enum vpx_img_fmt {
+ VPX_IMG_FMT_NONE,
+ VPX_IMG_FMT_RGB24, /**< 24 bit per pixel packed RGB */
+ VPX_IMG_FMT_RGB32, /**< 32 bit per pixel packed 0RGB */
+ VPX_IMG_FMT_RGB565, /**< 16 bit per pixel, 565 */
+ VPX_IMG_FMT_RGB555, /**< 16 bit per pixel, 555 */
+ VPX_IMG_FMT_UYVY, /**< UYVY packed YUV */
+ VPX_IMG_FMT_YUY2, /**< YUYV packed YUV */
+ VPX_IMG_FMT_YVYU, /**< YVYU packed YUV */
+ VPX_IMG_FMT_BGR24, /**< 24 bit per pixel packed BGR */
+ VPX_IMG_FMT_RGB32_LE, /**< 32 bit packed BGR0 */
+ VPX_IMG_FMT_ARGB, /**< 32 bit packed ARGB, alpha=255 */
+ VPX_IMG_FMT_ARGB_LE, /**< 32 bit packed BGRA, alpha=255 */
+ VPX_IMG_FMT_RGB565_LE, /**< 16 bit per pixel, gggbbbbb rrrrrggg */
+ VPX_IMG_FMT_RGB555_LE, /**< 16 bit per pixel, gggbbbbb 0rrrrrgg */
+ VPX_IMG_FMT_YV12 = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 1, /**< planar YVU */
+ VPX_IMG_FMT_I420 = VPX_IMG_FMT_PLANAR | 2,
+ VPX_IMG_FMT_VPXYV12 = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 3, /** < planar 4:2:0 format with vpx color space */
+ VPX_IMG_FMT_VPXI420 = VPX_IMG_FMT_PLANAR | 4 /** < planar 4:2:0 format with vpx color space */
+ }
+ vpx_img_fmt_t; /**< alias for enum vpx_img_fmt */
#if !defined(VPX_CODEC_DISABLE_COMPAT) || !VPX_CODEC_DISABLE_COMPAT
#define IMG_FMT_PLANAR VPX_IMG_FMT_PLANAR /**< \deprecated Use #VPX_IMG_FMT_PLANAR */
#define IMG_FMT_UV_FLIP VPX_IMG_FMT_UV_FLIP /**< \deprecated Use #VPX_IMG_FMT_UV_FLIP */
#define IMG_FMT_HAS_ALPHA VPX_IMG_FMT_HAS_ALPHA /**< \deprecated Use #VPX_IMG_FMT_HAS_ALPHA */
- /*!\brief Deprecated list of supported image formats
- * \deprecated New code should use #vpx_img_fmt
- */
+ /*!\brief Deprecated list of supported image formats
+ * \deprecated New code should use #vpx_img_fmt
+ */
#define img_fmt vpx_img_fmt
- /*!\brief alias for enum img_fmt.
- * \deprecated New code should use #vpx_img_fmt_t
- */
+ /*!\brief alias for enum img_fmt.
+ * \deprecated New code should use #vpx_img_fmt_t
+ */
#define img_fmt_t vpx_img_fmt_t
#define IMG_FMT_NONE VPX_IMG_FMT_NONE /**< \deprecated Use #VPX_IMG_FMT_NONE */
@@ -93,24 +93,23 @@
#define IMG_FMT_VPXI420 VPX_IMG_FMT_VPXI420 /**< \deprecated Use #VPX_IMG_FMT_VPXI420 */
#endif /* VPX_CODEC_DISABLE_COMPAT */
- /**\brief Image Descriptor */
- typedef struct vpx_image
- {
- vpx_img_fmt_t fmt; /**< Image Format */
+ /**\brief Image Descriptor */
+ typedef struct vpx_image {
+ vpx_img_fmt_t fmt; /**< Image Format */
- /* Image storage dimensions */
- unsigned int w; /**< Stored image width */
- unsigned int h; /**< Stored image height */
+ /* Image storage dimensions */
+ unsigned int w; /**< Stored image width */
+ unsigned int h; /**< Stored image height */
- /* Image display dimensions */
- unsigned int d_w; /**< Displayed image width */
- unsigned int d_h; /**< Displayed image height */
+ /* Image display dimensions */
+ unsigned int d_w; /**< Displayed image width */
+ unsigned int d_h; /**< Displayed image height */
- /* Chroma subsampling info */
- unsigned int x_chroma_shift; /**< subsampling order, X */
- unsigned int y_chroma_shift; /**< subsampling order, Y */
+ /* Chroma subsampling info */
+ unsigned int x_chroma_shift; /**< subsampling order, X */
+ unsigned int y_chroma_shift; /**< subsampling order, Y */
- /* Image data pointers. */
+ /* Image data pointers. */
#define VPX_PLANE_PACKED 0 /**< To be used for all packed formats */
#define VPX_PLANE_Y 0 /**< Y (Luminance) plane */
#define VPX_PLANE_U 1 /**< U (Chroma) plane */
@@ -123,119 +122,118 @@
#define PLANE_V VPX_PLANE_V
#define PLANE_ALPHA VPX_PLANE_ALPHA
#endif
- unsigned char *planes[4]; /**< pointer to the top left pixel for each plane */
- int stride[4]; /**< stride between rows for each plane */
+ unsigned char *planes[4]; /**< pointer to the top left pixel for each plane */
+ int stride[4]; /**< stride between rows for each plane */
- int bps; /**< bits per sample (for packed formats) */
+ int bps; /**< bits per sample (for packed formats) */
- /* The following member may be set by the application to associate data
- * with this image.
- */
- void *user_priv; /**< may be set by the application to associate data
+ /* The following member may be set by the application to associate data
+ * with this image.
+ */
+ void *user_priv; /**< may be set by the application to associate data
* with this image. */
- /* The following members should be treated as private. */
- unsigned char *img_data; /**< private */
- int img_data_owner; /**< private */
- int self_allocd; /**< private */
- } vpx_image_t; /**< alias for struct vpx_image */
+ /* The following members should be treated as private. */
+ unsigned char *img_data; /**< private */
+ int img_data_owner; /**< private */
+ int self_allocd; /**< private */
+ } vpx_image_t; /**< alias for struct vpx_image */
- /**\brief Representation of a rectangle on a surface */
- typedef struct vpx_image_rect
- {
- unsigned int x; /**< leftmost column */
- unsigned int y; /**< topmost row */
- unsigned int w; /**< width */
- unsigned int h; /**< height */
- } vpx_image_rect_t; /**< alias for struct vpx_image_rect */
+ /**\brief Representation of a rectangle on a surface */
+ typedef struct vpx_image_rect {
+ unsigned int x; /**< leftmost column */
+ unsigned int y; /**< topmost row */
+ unsigned int w; /**< width */
+ unsigned int h; /**< height */
+ } vpx_image_rect_t; /**< alias for struct vpx_image_rect */
- /*!\brief Open a descriptor, allocating storage for the underlying image
- *
- * Returns a descriptor for storing an image of the given format. The
- * storage for the descriptor is allocated on the heap.
- *
- * \param[in] img Pointer to storage for descriptor. If this parameter
- * is NULL, the storage for the descriptor will be
- * allocated on the heap.
- * \param[in] fmt Format for the image
- * \param[in] d_w Width of the image
- * \param[in] d_h Height of the image
- * \param[in] align Alignment, in bytes, of the image buffer and
- * each row in the image(stride).
- *
- * \return Returns a pointer to the initialized image descriptor. If the img
- * parameter is non-null, the value of the img parameter will be
- * returned.
- */
- vpx_image_t *vpx_img_alloc(vpx_image_t *img,
- vpx_img_fmt_t fmt,
- unsigned int d_w,
- unsigned int d_h,
- unsigned int align);
+ /*!\brief Open a descriptor, allocating storage for the underlying image
+ *
+ * Returns a descriptor for storing an image of the given format. The
+ * storage for the descriptor is allocated on the heap.
+ *
+ * \param[in] img Pointer to storage for descriptor. If this parameter
+ * is NULL, the storage for the descriptor will be
+ * allocated on the heap.
+ * \param[in] fmt Format for the image
+ * \param[in] d_w Width of the image
+ * \param[in] d_h Height of the image
+ * \param[in] align Alignment, in bytes, of the image buffer and
+ * each row in the image(stride).
+ *
+ * \return Returns a pointer to the initialized image descriptor. If the img
+ * parameter is non-null, the value of the img parameter will be
+ * returned.
+ */
+ vpx_image_t *vpx_img_alloc(vpx_image_t *img,
+ vpx_img_fmt_t fmt,
+ unsigned int d_w,
+ unsigned int d_h,
+ unsigned int align);
- /*!\brief Open a descriptor, using existing storage for the underlying image
- *
- * Returns a descriptor for storing an image of the given format. The
- * storage for descriptor has been allocated elsewhere, and a descriptor is
- * desired to "wrap" that storage.
- *
- * \param[in] img Pointer to storage for descriptor. If this parameter
- * is NULL, the storage for the descriptor will be
- * allocated on the heap.
- * \param[in] fmt Format for the image
- * \param[in] d_w Width of the image
- * \param[in] d_h Height of the image
- * \param[in] align Alignment, in bytes, of each row in the image.
- * \param[in] img_data Storage to use for the image
- *
- * \return Returns a pointer to the initialized image descriptor. If the img
- * parameter is non-null, the value of the img parameter will be
- * returned.
- */
- vpx_image_t *vpx_img_wrap(vpx_image_t *img,
- vpx_img_fmt_t fmt,
- unsigned int d_w,
- unsigned int d_h,
- unsigned int align,
- unsigned char *img_data);
+ /*!\brief Open a descriptor, using existing storage for the underlying image
+ *
+ * Returns a descriptor for storing an image of the given format. The
+ * storage for descriptor has been allocated elsewhere, and a descriptor is
+ * desired to "wrap" that storage.
+ *
+ * \param[in] img Pointer to storage for descriptor. If this parameter
+ * is NULL, the storage for the descriptor will be
+ * allocated on the heap.
+ * \param[in] fmt Format for the image
+ * \param[in] d_w Width of the image
+ * \param[in] d_h Height of the image
+ * \param[in] align Alignment, in bytes, of each row in the image.
+ * \param[in] img_data Storage to use for the image
+ *
+ * \return Returns a pointer to the initialized image descriptor. If the img
+ * parameter is non-null, the value of the img parameter will be
+ * returned.
+ */
+ vpx_image_t *vpx_img_wrap(vpx_image_t *img,
+ vpx_img_fmt_t fmt,
+ unsigned int d_w,
+ unsigned int d_h,
+ unsigned int align,
+ unsigned char *img_data);
- /*!\brief Set the rectangle identifying the displayed portion of the image
- *
- * Updates the displayed rectangle (aka viewport) on the image surface to
- * match the specified coordinates and size.
- *
- * \param[in] img Image descriptor
- * \param[in] x leftmost column
- * \param[in] y topmost row
- * \param[in] w width
- * \param[in] h height
- *
- * \return 0 if the requested rectangle is valid, nonzero otherwise.
- */
- int vpx_img_set_rect(vpx_image_t *img,
- unsigned int x,
- unsigned int y,
- unsigned int w,
- unsigned int h);
+ /*!\brief Set the rectangle identifying the displayed portion of the image
+ *
+ * Updates the displayed rectangle (aka viewport) on the image surface to
+ * match the specified coordinates and size.
+ *
+ * \param[in] img Image descriptor
+ * \param[in] x leftmost column
+ * \param[in] y topmost row
+ * \param[in] w width
+ * \param[in] h height
+ *
+ * \return 0 if the requested rectangle is valid, nonzero otherwise.
+ */
+ int vpx_img_set_rect(vpx_image_t *img,
+ unsigned int x,
+ unsigned int y,
+ unsigned int w,
+ unsigned int h);
- /*!\brief Flip the image vertically (top for bottom)
- *
- * Adjusts the image descriptor's pointers and strides to make the image
- * be referenced upside-down.
- *
- * \param[in] img Image descriptor
- */
- void vpx_img_flip(vpx_image_t *img);
+ /*!\brief Flip the image vertically (top for bottom)
+ *
+ * Adjusts the image descriptor's pointers and strides to make the image
+ * be referenced upside-down.
+ *
+ * \param[in] img Image descriptor
+ */
+ void vpx_img_flip(vpx_image_t *img);
- /*!\brief Close an image descriptor
- *
- * Frees all allocated storage associated with an image descriptor.
- *
- * \param[in] img Image descriptor
- */
- void vpx_img_free(vpx_image_t *img);
+ /*!\brief Close an image descriptor
+ *
+ * Frees all allocated storage associated with an image descriptor.
+ *
+ * \param[in] img Image descriptor
+ */
+ void vpx_img_free(vpx_image_t *img);
#endif
#ifdef __cplusplus
diff --git a/vpx_mem/include/vpx_mem_intrnl.h b/vpx_mem/include/vpx_mem_intrnl.h
index 63c6b77..60b5165 100644
--- a/vpx_mem/include/vpx_mem_intrnl.h
+++ b/vpx_mem/include/vpx_mem_intrnl.h
@@ -11,7 +11,7 @@
#ifndef __VPX_MEM_INTRNL_H__
#define __VPX_MEM_INTRNL_H__
-#include "vpx_config.h"
+#include "./vpx_config.h"
#ifndef CONFIG_MEM_MANAGER
# if defined(VXWORKS)
@@ -47,8 +47,8 @@
#ifndef DEFAULT_ALIGNMENT
# if defined(VXWORKS)
# define DEFAULT_ALIGNMENT 32 /*default addr alignment to use in
- calls to vpx_* functions other
- than vpx_memalign*/
+calls to vpx_* functions other
+than vpx_memalign*/
# else
# define DEFAULT_ALIGNMENT 1
# endif
@@ -60,9 +60,9 @@
#if CONFIG_MEM_TRACKER
# define TRY_BOUNDS_CHECK 1 /*when set to 1 pads each allocation,
- integrity can be checked using
- vpx_memory_tracker_check_integrity
- or on free by defining*/
+integrity can be checked using
+vpx_memory_tracker_check_integrity
+or on free by defining*/
/*TRY_BOUNDS_CHECK_ON_FREE*/
#else
# define TRY_BOUNDS_CHECK 0
@@ -70,13 +70,13 @@
#if TRY_BOUNDS_CHECK
# define TRY_BOUNDS_CHECK_ON_FREE 0 /*checks mem integrity on every
- free, very expensive*/
+free, very expensive*/
# define BOUNDS_CHECK_VALUE 0xdeadbeef /*value stored before/after ea.
- mem addr for bounds checking*/
+mem addr for bounds checking*/
# define BOUNDS_CHECK_PAD_SIZE 32 /*size of the padding before and
- after ea allocation to be filled
- with BOUNDS_CHECK_VALUE.
- this should be a multiple of 4*/
+after ea allocation to be filled
+with BOUNDS_CHECK_VALUE.
+this should be a multiple of 4*/
#else
# define BOUNDS_CHECK_VALUE 0
# define BOUNDS_CHECK_PAD_SIZE 0
diff --git a/vpx_mem/include/vpx_mem_tracker.h b/vpx_mem/include/vpx_mem_tracker.h
index ef2b29b..3be0d2d 100644
--- a/vpx_mem/include/vpx_mem_tracker.h
+++ b/vpx_mem/include/vpx_mem_tracker.h
@@ -23,158 +23,157 @@
#include <stdarg.h>
-struct mem_block
-{
- size_t addr;
- unsigned int size,
- line;
- char *file;
- struct mem_block *prev,
- * next;
+struct mem_block {
+ size_t addr;
+ unsigned int size,
+ line;
+ char *file;
+ struct mem_block *prev,
+ * next;
- int padded; // This mem_block has padding for integrity checks.
- // As of right now, this should only be 0 if
- // using vpx_mem_alloc to allocate cache memory.
- // 2005-01-11 tjf
+ int padded; // This mem_block has padding for integrity checks.
+ // As of right now, this should only be 0 if
+ // using vpx_mem_alloc to allocate cache memory.
+ // 2005-01-11 tjf
};
#if defined(__cplusplus)
extern "C" {
#endif
- /*
- vpx_memory_tracker_init(int padding_size, int pad_value)
- padding_size - the size of the padding before and after each mem addr.
- Values > 0 indicate that integrity checks can be performed
- by inspecting these areas.
- pad_value - the initial value within the padding area before and after
- each mem addr.
+ /*
+ vpx_memory_tracker_init(int padding_size, int pad_value)
+ padding_size - the size of the padding before and after each mem addr.
+ Values > 0 indicate that integrity checks can be performed
+ by inspecting these areas.
+ pad_value - the initial value within the padding area before and after
+ each mem addr.
- Initializes the memory tracker interface. Should be called before any
- other calls to the memory tracker.
- */
- int vpx_memory_tracker_init(int padding_size, int pad_value);
+ Initializes the memory tracker interface. Should be called before any
+ other calls to the memory tracker.
+ */
+ int vpx_memory_tracker_init(int padding_size, int pad_value);
- /*
- vpx_memory_tracker_destroy()
- Deinitializes the memory tracker interface
- */
- void vpx_memory_tracker_destroy();
+ /*
+ vpx_memory_tracker_destroy()
+ Deinitializes the memory tracker interface
+ */
+ void vpx_memory_tracker_destroy();
- /*
- vpx_memory_tracker_add(size_t addr, unsigned int size,
- char * file, unsigned int line)
- addr - memory address to be added to list
- size - size of addr
- file - the file addr was referenced from
- line - the line in file addr was referenced from
- Adds memory address addr, it's size, file and line it came from
- to the memory tracker allocation table
- */
- void vpx_memory_tracker_add(size_t addr, unsigned int size,
- char *file, unsigned int line,
- int padded);
+ /*
+ vpx_memory_tracker_add(size_t addr, unsigned int size,
+ char * file, unsigned int line)
+ addr - memory address to be added to list
+ size - size of addr
+ file - the file addr was referenced from
+ line - the line in file addr was referenced from
+ Adds memory address addr, it's size, file and line it came from
+ to the memory tracker allocation table
+ */
+ void vpx_memory_tracker_add(size_t addr, unsigned int size,
+ char *file, unsigned int line,
+ int padded);
- /*
- vpx_memory_tracker_add(size_t addr, unsigned int size, char * file, unsigned int line)
- addr - memory address to be added to be removed
- padded - if 0, disables bounds checking on this memory block even if bounds
- checking is enabled. (for example, when allocating cache memory, we still want
- to check for memory leaks, but we do not waste cache space for bounds check padding)
- Removes the specified address from the memory tracker's allocation
- table
- Return:
- 0: on success
- -1: if memory allocation table's mutex could not be locked
- -2: if the addr was not found in the list
- */
- int vpx_memory_tracker_remove(size_t addr);
+ /*
+ vpx_memory_tracker_add(size_t addr, unsigned int size, char * file, unsigned int line)
+ addr - memory address to be added to be removed
+ padded - if 0, disables bounds checking on this memory block even if bounds
+ checking is enabled. (for example, when allocating cache memory, we still want
+ to check for memory leaks, but we do not waste cache space for bounds check padding)
+ Removes the specified address from the memory tracker's allocation
+ table
+ Return:
+ 0: on success
+ -1: if memory allocation table's mutex could not be locked
+ -2: if the addr was not found in the list
+ */
+ int vpx_memory_tracker_remove(size_t addr);
- /*
- vpx_memory_tracker_find(unsigned int addr)
- addr - address to be found in the memory tracker's
- allocation table
- Return:
- If found, pointer to the memory block that matches addr
- NULL otherwise
- */
- struct mem_block *vpx_memory_tracker_find(size_t addr);
+ /*
+ vpx_memory_tracker_find(unsigned int addr)
+ addr - address to be found in the memory tracker's
+ allocation table
+ Return:
+ If found, pointer to the memory block that matches addr
+ NULL otherwise
+ */
+ struct mem_block *vpx_memory_tracker_find(size_t addr);
- /*
- vpx_memory_tracker_dump()
- Dumps the current contents of the memory
- tracker allocation table
- */
- void vpx_memory_tracker_dump();
+ /*
+ vpx_memory_tracker_dump()
+ Dumps the current contents of the memory
+ tracker allocation table
+ */
+ void vpx_memory_tracker_dump();
- /*
- vpx_memory_tracker_check_integrity()
- If a padding_size was provided to vpx_memory_tracker_init()
- This function will verify that the region before and after each
- memory address contains the specified pad_value. Should the check
- fail, the filename and line of the check will be printed out.
- */
- void vpx_memory_tracker_check_integrity(char *file, unsigned int line);
+ /*
+ vpx_memory_tracker_check_integrity()
+ If a padding_size was provided to vpx_memory_tracker_init()
+ This function will verify that the region before and after each
+ memory address contains the specified pad_value. Should the check
+ fail, the filename and line of the check will be printed out.
+ */
+ void vpx_memory_tracker_check_integrity(char *file, unsigned int line);
- /*
- vpx_memory_tracker_set_log_type
- type - value representing the logging type to use
- option - type specific option. This will be interpreted differently
- based on the type.
- Sets the logging type for the memory tracker.
- Values currently supported:
- 0: if option is NULL, log to stderr, otherwise interpret option as a
- filename and attempt to open it.
- 1: Use output_debug_string (WIN32 only), option ignored
- Return:
- 0: on success
- -1: if the logging type could not be set, because the value was invalid
- or because a file could not be opened
- */
- int vpx_memory_tracker_set_log_type(int type, char *option);
+ /*
+ vpx_memory_tracker_set_log_type
+ type - value representing the logging type to use
+ option - type specific option. This will be interpreted differently
+ based on the type.
+ Sets the logging type for the memory tracker.
+ Values currently supported:
+ 0: if option is NULL, log to stderr, otherwise interpret option as a
+ filename and attempt to open it.
+ 1: Use output_debug_string (WIN32 only), option ignored
+ Return:
+ 0: on success
+ -1: if the logging type could not be set, because the value was invalid
+ or because a file could not be opened
+ */
+ int vpx_memory_tracker_set_log_type(int type, char *option);
- /*
- vpx_memory_tracker_set_log_func
- userdata - ptr to be passed to the supplied logfunc, can be NULL
- logfunc - the logging function to be used to output data from
- vpx_memory_track_dump/check_integrity
- Sets a logging function to be used by the memory tracker.
- Return:
- 0: on success
- -1: if the logging type could not be set because logfunc was NULL
- */
- int vpx_memory_tracker_set_log_func(void *userdata,
- void(*logfunc)(void *userdata,
- const char *fmt, va_list args));
+ /*
+ vpx_memory_tracker_set_log_func
+ userdata - ptr to be passed to the supplied logfunc, can be NULL
+ logfunc - the logging function to be used to output data from
+ vpx_memory_track_dump/check_integrity
+ Sets a logging function to be used by the memory tracker.
+ Return:
+ 0: on success
+ -1: if the logging type could not be set because logfunc was NULL
+ */
+ int vpx_memory_tracker_set_log_func(void *userdata,
+ void(*logfunc)(void *userdata,
+ const char *fmt, va_list args));
- /* Wrappers to standard library functions. */
- typedef void*(* mem_track_malloc_func)(size_t);
- typedef void*(* mem_track_calloc_func)(size_t, size_t);
- typedef void*(* mem_track_realloc_func)(void *, size_t);
- typedef void (* mem_track_free_func)(void *);
- typedef void*(* mem_track_memcpy_func)(void *, const void *, size_t);
- typedef void*(* mem_track_memset_func)(void *, int, size_t);
- typedef void*(* mem_track_memmove_func)(void *, const void *, size_t);
+ /* Wrappers to standard library functions. */
+ typedef void *(* mem_track_malloc_func)(size_t);
+ typedef void *(* mem_track_calloc_func)(size_t, size_t);
+ typedef void *(* mem_track_realloc_func)(void *, size_t);
+ typedef void (* mem_track_free_func)(void *);
+ typedef void *(* mem_track_memcpy_func)(void *, const void *, size_t);
+ typedef void *(* mem_track_memset_func)(void *, int, size_t);
+ typedef void *(* mem_track_memmove_func)(void *, const void *, size_t);
- /*
- vpx_memory_tracker_set_functions
+ /*
+ vpx_memory_tracker_set_functions
- Sets the function pointers for the standard library functions.
+ Sets the function pointers for the standard library functions.
- Return:
- 0: on success
- -1: if the use global function pointers is not set.
- */
- int vpx_memory_tracker_set_functions(mem_track_malloc_func g_malloc_l
- , mem_track_calloc_func g_calloc_l
- , mem_track_realloc_func g_realloc_l
- , mem_track_free_func g_free_l
- , mem_track_memcpy_func g_memcpy_l
- , mem_track_memset_func g_memset_l
- , mem_track_memmove_func g_memmove_l);
+ Return:
+ 0: on success
+ -1: if the use global function pointers is not set.
+ */
+ int vpx_memory_tracker_set_functions(mem_track_malloc_func g_malloc_l
+, mem_track_calloc_func g_calloc_l
+, mem_track_realloc_func g_realloc_l
+, mem_track_free_func g_free_l
+, mem_track_memcpy_func g_memcpy_l
+, mem_track_memset_func g_memset_l
+, mem_track_memmove_func g_memmove_l);
#if defined(__cplusplus)
}
#endif
-#endif //__VPX_MEM_TRACKER_H__
+#endif // __VPX_MEM_TRACKER_H__
diff --git a/vpx_mem/memory_manager/hmm_alloc.c b/vpx_mem/memory_manager/hmm_alloc.c
index 22c4a54..ab3562d 100644
--- a/vpx_mem/memory_manager/hmm_alloc.c
+++ b/vpx_mem/memory_manager/hmm_alloc.c
@@ -15,46 +15,44 @@
#include "hmm_intrnl.h"
-void *U(alloc)(U(descriptor) *desc, U(size_aau) n)
-{
+void *U(alloc)(U(descriptor) *desc, U(size_aau) n) {
#ifdef HMM_AUDIT_FAIL
- if (desc->avl_tree_root)
- AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
+ if (desc->avl_tree_root)
+ AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
#endif
- if (desc->last_freed)
- {
+ if (desc->last_freed) {
#ifdef HMM_AUDIT_FAIL
- AUDIT_BLOCK(desc->last_freed)
+ AUDIT_BLOCK(desc->last_freed)
#endif
- U(into_free_collection)(desc, (head_record *)(desc->last_freed));
+ U(into_free_collection)(desc, (head_record *)(desc->last_freed));
- desc->last_freed = 0;
- }
-
- /* Add space for block header. */
- n += HEAD_AAUS;
-
- /* Convert n from number of address alignment units to block alignment
- ** units. */
- n = DIV_ROUND_UP(n, HMM_BLOCK_ALIGN_UNIT);
-
- if (n < MIN_BLOCK_BAUS)
- n = MIN_BLOCK_BAUS;
-
- {
- /* Search for the first node of the bin containing the smallest
- ** block big enough to satisfy request. */
- ptr_record *ptr_rec_ptr =
- U(avl_search)(
- (U(avl_avl) *) & (desc->avl_tree_root), (U(size_bau)) n,
- AVL_GREATER_EQUAL);
-
- /* If an approprate bin is found, satisfy the allocation request,
- ** otherwise return null pointer. */
- return(ptr_rec_ptr ?
- U(alloc_from_bin)(desc, ptr_rec_ptr, (U(size_bau)) n) : 0);
+ desc->last_freed = 0;
}
+
+ /* Add space for block header. */
+ n += HEAD_AAUS;
+
+ /* Convert n from number of address alignment units to block alignment
+ ** units. */
+ n = DIV_ROUND_UP(n, HMM_BLOCK_ALIGN_UNIT);
+
+ if (n < MIN_BLOCK_BAUS)
+ n = MIN_BLOCK_BAUS;
+
+ {
+ /* Search for the first node of the bin containing the smallest
+ ** block big enough to satisfy request. */
+ ptr_record *ptr_rec_ptr =
+ U(avl_search)(
+ (U(avl_avl) *) & (desc->avl_tree_root), (U(size_bau)) n,
+ AVL_GREATER_EQUAL);
+
+ /* If an approprate bin is found, satisfy the allocation request,
+ ** otherwise return null pointer. */
+ return(ptr_rec_ptr ?
+ U(alloc_from_bin)(desc, ptr_rec_ptr, (U(size_bau)) n) : 0);
+ }
}
diff --git a/vpx_mem/memory_manager/hmm_base.c b/vpx_mem/memory_manager/hmm_base.c
index ad1da03..0eff59d 100644
--- a/vpx_mem/memory_manager/hmm_base.c
+++ b/vpx_mem/memory_manager/hmm_base.c
@@ -15,58 +15,53 @@
#include "hmm_intrnl.h"
-void U(init)(U(descriptor) *desc)
-{
- desc->avl_tree_root = 0;
- desc->last_freed = 0;
+void U(init)(U(descriptor) *desc) {
+ desc->avl_tree_root = 0;
+ desc->last_freed = 0;
}
/* Remove a free block from a bin's doubly-linked list when it is not,
** the first block in the bin.
*/
void U(dll_remove)(
- /* Pointer to pointer record in the block to be removed. */
- ptr_record *to_remove)
-{
- to_remove->prev->next = to_remove->next;
+ /* Pointer to pointer record in the block to be removed. */
+ ptr_record *to_remove) {
+ to_remove->prev->next = to_remove->next;
- if (to_remove->next)
- to_remove->next->prev = to_remove->prev;
+ if (to_remove->next)
+ to_remove->next->prev = to_remove->prev;
}
/* Put a block into the free collection of a heap.
*/
void U(into_free_collection)(
- /* Pointer to heap descriptor. */
- U(descriptor) *desc,
- /* Pointer to head record of block. */
- head_record *head_ptr)
-{
- ptr_record *ptr_rec_ptr = HEAD_TO_PTR_REC(head_ptr);
+ /* Pointer to heap descriptor. */
+ U(descriptor) *desc,
+ /* Pointer to head record of block. */
+ head_record *head_ptr) {
+ ptr_record *ptr_rec_ptr = HEAD_TO_PTR_REC(head_ptr);
- ptr_record *bin_front_ptr =
- U(avl_insert)((U(avl_avl) *) & (desc->avl_tree_root), ptr_rec_ptr);
+ ptr_record *bin_front_ptr =
+ U(avl_insert)((U(avl_avl) *) & (desc->avl_tree_root), ptr_rec_ptr);
- if (bin_front_ptr != ptr_rec_ptr)
- {
- /* The block was not inserted into the AVL tree because there is
- ** already a bin for the size of the block. */
+ if (bin_front_ptr != ptr_rec_ptr) {
+ /* The block was not inserted into the AVL tree because there is
+ ** already a bin for the size of the block. */
- MARK_SUCCESSIVE_BLOCK_IN_FREE_BIN(head_ptr)
- ptr_rec_ptr->self = ptr_rec_ptr;
+ MARK_SUCCESSIVE_BLOCK_IN_FREE_BIN(head_ptr)
+ ptr_rec_ptr->self = ptr_rec_ptr;
- /* Make the block the new second block in the bin's doubly-linked
- ** list. */
- ptr_rec_ptr->prev = bin_front_ptr;
- ptr_rec_ptr->next = bin_front_ptr->next;
- bin_front_ptr->next = ptr_rec_ptr;
+ /* Make the block the new second block in the bin's doubly-linked
+ ** list. */
+ ptr_rec_ptr->prev = bin_front_ptr;
+ ptr_rec_ptr->next = bin_front_ptr->next;
+ bin_front_ptr->next = ptr_rec_ptr;
- if (ptr_rec_ptr->next)
- ptr_rec_ptr->next->prev = ptr_rec_ptr;
- }
- else
- /* Block is first block in new bin. */
- ptr_rec_ptr->next = 0;
+ if (ptr_rec_ptr->next)
+ ptr_rec_ptr->next->prev = ptr_rec_ptr;
+ } else
+ /* Block is first block in new bin. */
+ ptr_rec_ptr->next = 0;
}
/* Allocate a block from a given bin. Returns a pointer to the payload
@@ -74,268 +69,245 @@
** to calling this function.
*/
void *U(alloc_from_bin)(
- /* Pointer to heap descriptor. */
- U(descriptor) *desc,
- /* Pointer to pointer record of first block in bin. */
- ptr_record *bin_front_ptr,
- /* Number of BAUs needed in the allocated block. If the block taken
- ** from the bin is significantly larger than the number of BAUs needed,
- ** the "extra" BAUs are split off to form a new free block. */
- U(size_bau) n_baus)
-{
- head_record *head_ptr;
- U(size_bau) rem_baus;
+ /* Pointer to heap descriptor. */
+ U(descriptor) *desc,
+ /* Pointer to pointer record of first block in bin. */
+ ptr_record *bin_front_ptr,
+ /* Number of BAUs needed in the allocated block. If the block taken
+ ** from the bin is significantly larger than the number of BAUs needed,
+ ** the "extra" BAUs are split off to form a new free block. */
+ U(size_bau) n_baus) {
+ head_record *head_ptr;
+ U(size_bau) rem_baus;
- if (bin_front_ptr->next)
- {
- /* There are multiple blocks in this bin. Use the 2nd block in
- ** the bin to avoid needless change to the AVL tree.
- */
+ if (bin_front_ptr->next) {
+ /* There are multiple blocks in this bin. Use the 2nd block in
+ ** the bin to avoid needless change to the AVL tree.
+ */
- ptr_record *ptr_rec_ptr = bin_front_ptr->next;
- head_ptr = PTR_REC_TO_HEAD(ptr_rec_ptr);
+ ptr_record *ptr_rec_ptr = bin_front_ptr->next;
+ head_ptr = PTR_REC_TO_HEAD(ptr_rec_ptr);
#ifdef AUDIT_FAIL
- AUDIT_BLOCK(head_ptr)
+ AUDIT_BLOCK(head_ptr)
#endif
- U(dll_remove)(ptr_rec_ptr);
- }
- else
- {
- /* There is only one block in the bin, so it has to be removed
- ** from the AVL tree.
- */
+ U(dll_remove)(ptr_rec_ptr);
+ } else {
+ /* There is only one block in the bin, so it has to be removed
+ ** from the AVL tree.
+ */
- head_ptr = PTR_REC_TO_HEAD(bin_front_ptr);
+ head_ptr = PTR_REC_TO_HEAD(bin_front_ptr);
- U(avl_remove)(
- (U(avl_avl) *) &(desc->avl_tree_root), BLOCK_BAUS(head_ptr));
- }
+ U(avl_remove)(
+ (U(avl_avl) *) & (desc->avl_tree_root), BLOCK_BAUS(head_ptr));
+ }
- MARK_BLOCK_ALLOCATED(head_ptr)
+ MARK_BLOCK_ALLOCATED(head_ptr)
- rem_baus = BLOCK_BAUS(head_ptr) - n_baus;
+ rem_baus = BLOCK_BAUS(head_ptr) - n_baus;
- if (rem_baus >= MIN_BLOCK_BAUS)
- {
- /* Since there are enough "extra" BAUs, split them off to form
- ** a new free block.
- */
+ if (rem_baus >= MIN_BLOCK_BAUS) {
+ /* Since there are enough "extra" BAUs, split them off to form
+ ** a new free block.
+ */
- head_record *rem_head_ptr =
- (head_record *) BAUS_FORWARD(head_ptr, n_baus);
+ head_record *rem_head_ptr =
+ (head_record *) BAUS_FORWARD(head_ptr, n_baus);
- /* Change the next block's header to reflect the fact that the
- ** block preceeding it is now smaller.
- */
- SET_PREV_BLOCK_BAUS(
- BAUS_FORWARD(head_ptr, head_ptr->block_size), rem_baus)
+ /* Change the next block's header to reflect the fact that the
+ ** block preceeding it is now smaller.
+ */
+ SET_PREV_BLOCK_BAUS(
+ BAUS_FORWARD(head_ptr, head_ptr->block_size), rem_baus)
- head_ptr->block_size = n_baus;
+ head_ptr->block_size = n_baus;
- rem_head_ptr->previous_block_size = n_baus;
- rem_head_ptr->block_size = rem_baus;
+ rem_head_ptr->previous_block_size = n_baus;
+ rem_head_ptr->block_size = rem_baus;
- desc->last_freed = rem_head_ptr;
- }
+ desc->last_freed = rem_head_ptr;
+ }
- return(HEAD_TO_PTR_REC(head_ptr));
+ return(HEAD_TO_PTR_REC(head_ptr));
}
/* Take a block out of the free collection.
*/
void U(out_of_free_collection)(
- /* Descriptor of heap that block is in. */
- U(descriptor) *desc,
- /* Pointer to head of block to take out of free collection. */
- head_record *head_ptr)
-{
- ptr_record *ptr_rec_ptr = HEAD_TO_PTR_REC(head_ptr);
+ /* Descriptor of heap that block is in. */
+ U(descriptor) *desc,
+ /* Pointer to head of block to take out of free collection. */
+ head_record *head_ptr) {
+ ptr_record *ptr_rec_ptr = HEAD_TO_PTR_REC(head_ptr);
- if (ptr_rec_ptr->self == ptr_rec_ptr)
- /* Block is not the front block in its bin, so all we have to
- ** do is take it out of the bin's doubly-linked list. */
- U(dll_remove)(ptr_rec_ptr);
+ if (ptr_rec_ptr->self == ptr_rec_ptr)
+ /* Block is not the front block in its bin, so all we have to
+ ** do is take it out of the bin's doubly-linked list. */
+ U(dll_remove)(ptr_rec_ptr);
+ else {
+ ptr_record *next = ptr_rec_ptr->next;
+
+ if (next)
+ /* Block is the front block in its bin, and there is at least
+ ** one other block in the bin. Substitute the next block for
+ ** the front block. */
+ U(avl_subst)((U(avl_avl) *) & (desc->avl_tree_root), next);
else
- {
- ptr_record *next = ptr_rec_ptr->next;
-
- if (next)
- /* Block is the front block in its bin, and there is at least
- ** one other block in the bin. Substitute the next block for
- ** the front block. */
- U(avl_subst)((U(avl_avl) *) &(desc->avl_tree_root), next);
- else
- /* Block is the front block in its bin, but there is no other
- ** block in the bin. Eliminate the bin. */
- U(avl_remove)(
- (U(avl_avl) *) &(desc->avl_tree_root), BLOCK_BAUS(head_ptr));
- }
+ /* Block is the front block in its bin, but there is no other
+ ** block in the bin. Eliminate the bin. */
+ U(avl_remove)(
+ (U(avl_avl) *) & (desc->avl_tree_root), BLOCK_BAUS(head_ptr));
+ }
}
-void U(free)(U(descriptor) *desc, void *payload_ptr)
-{
- /* Flags if coalesce with adjacent block. */
- int coalesce;
+void U(free)(U(descriptor) *desc, void *payload_ptr) {
+ /* Flags if coalesce with adjacent block. */
+ int coalesce;
- head_record *fwd_head_ptr;
- head_record *free_head_ptr = PTR_REC_TO_HEAD(payload_ptr);
+ head_record *fwd_head_ptr;
+ head_record *free_head_ptr = PTR_REC_TO_HEAD(payload_ptr);
- desc->num_baus_can_shrink = 0;
+ desc->num_baus_can_shrink = 0;
#ifdef HMM_AUDIT_FAIL
- AUDIT_BLOCK(free_head_ptr)
+ AUDIT_BLOCK(free_head_ptr)
- /* Make sure not freeing an already free block. */
- if (!IS_BLOCK_ALLOCATED(free_head_ptr))
- HMM_AUDIT_FAIL
+ /* Make sure not freeing an already free block. */
+ if (!IS_BLOCK_ALLOCATED(free_head_ptr))
+ HMM_AUDIT_FAIL
- if (desc->avl_tree_root)
- /* Audit root block in AVL tree. */
- AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
+ if (desc->avl_tree_root)
+ /* Audit root block in AVL tree. */
+ AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
#endif
- fwd_head_ptr =
- (head_record *) BAUS_FORWARD(free_head_ptr, free_head_ptr->block_size);
+ fwd_head_ptr =
+ (head_record *) BAUS_FORWARD(free_head_ptr, free_head_ptr->block_size);
- if (free_head_ptr->previous_block_size)
- {
- /* Coalesce with backward block if possible. */
+ if (free_head_ptr->previous_block_size) {
+ /* Coalesce with backward block if possible. */
- head_record *bkwd_head_ptr =
- (head_record *) BAUS_BACKWARD(
- free_head_ptr, free_head_ptr->previous_block_size);
+ head_record *bkwd_head_ptr =
+ (head_record *) BAUS_BACKWARD(
+ free_head_ptr, free_head_ptr->previous_block_size);
#ifdef HMM_AUDIT_FAIL
- AUDIT_BLOCK(bkwd_head_ptr)
+ AUDIT_BLOCK(bkwd_head_ptr)
#endif
- if (bkwd_head_ptr == (head_record *)(desc->last_freed))
- {
- desc->last_freed = 0;
- coalesce = 1;
- }
- else if (IS_BLOCK_ALLOCATED(bkwd_head_ptr))
- coalesce = 0;
- else
- {
- U(out_of_free_collection)(desc, bkwd_head_ptr);
- coalesce = 1;
- }
-
- if (coalesce)
- {
- bkwd_head_ptr->block_size += free_head_ptr->block_size;
- SET_PREV_BLOCK_BAUS(fwd_head_ptr, BLOCK_BAUS(bkwd_head_ptr))
- free_head_ptr = bkwd_head_ptr;
- }
+ if (bkwd_head_ptr == (head_record *)(desc->last_freed)) {
+ desc->last_freed = 0;
+ coalesce = 1;
+ } else if (IS_BLOCK_ALLOCATED(bkwd_head_ptr))
+ coalesce = 0;
+ else {
+ U(out_of_free_collection)(desc, bkwd_head_ptr);
+ coalesce = 1;
}
- if (fwd_head_ptr->block_size == 0)
- {
- /* Block to be freed is last block before dummy end-of-chunk block. */
+ if (coalesce) {
+ bkwd_head_ptr->block_size += free_head_ptr->block_size;
+ SET_PREV_BLOCK_BAUS(fwd_head_ptr, BLOCK_BAUS(bkwd_head_ptr))
+ free_head_ptr = bkwd_head_ptr;
+ }
+ }
+
+ if (fwd_head_ptr->block_size == 0) {
+ /* Block to be freed is last block before dummy end-of-chunk block. */
+ desc->end_of_shrinkable_chunk =
+ BAUS_FORWARD(fwd_head_ptr, DUMMY_END_BLOCK_BAUS);
+ desc->num_baus_can_shrink = BLOCK_BAUS(free_head_ptr);
+
+ if (PREV_BLOCK_BAUS(free_head_ptr) == 0)
+ /* Free block is the entire chunk, so shrinking can eliminate
+ ** entire chunk including dummy end block. */
+ desc->num_baus_can_shrink += DUMMY_END_BLOCK_BAUS;
+ } else {
+ /* Coalesce with forward block if possible. */
+
+#ifdef HMM_AUDIT_FAIL
+ AUDIT_BLOCK(fwd_head_ptr)
+#endif
+
+ if (fwd_head_ptr == (head_record *)(desc->last_freed)) {
+ desc->last_freed = 0;
+ coalesce = 1;
+ } else if (IS_BLOCK_ALLOCATED(fwd_head_ptr))
+ coalesce = 0;
+ else {
+ U(out_of_free_collection)(desc, fwd_head_ptr);
+ coalesce = 1;
+ }
+
+ if (coalesce) {
+ free_head_ptr->block_size += fwd_head_ptr->block_size;
+
+ fwd_head_ptr =
+ (head_record *) BAUS_FORWARD(
+ fwd_head_ptr, BLOCK_BAUS(fwd_head_ptr));
+
+ SET_PREV_BLOCK_BAUS(fwd_head_ptr, BLOCK_BAUS(free_head_ptr))
+
+ if (fwd_head_ptr->block_size == 0) {
+ /* Coalesced block to be freed is last block before dummy
+ ** end-of-chunk block. */
desc->end_of_shrinkable_chunk =
- BAUS_FORWARD(fwd_head_ptr, DUMMY_END_BLOCK_BAUS);
+ BAUS_FORWARD(fwd_head_ptr, DUMMY_END_BLOCK_BAUS);
desc->num_baus_can_shrink = BLOCK_BAUS(free_head_ptr);
if (PREV_BLOCK_BAUS(free_head_ptr) == 0)
- /* Free block is the entire chunk, so shrinking can eliminate
- ** entire chunk including dummy end block. */
- desc->num_baus_can_shrink += DUMMY_END_BLOCK_BAUS;
+ /* Free block is the entire chunk, so shrinking can
+ ** eliminate entire chunk including dummy end block. */
+ desc->num_baus_can_shrink += DUMMY_END_BLOCK_BAUS;
+ }
}
- else
- {
- /* Coalesce with forward block if possible. */
+ }
+
+ if (desc->last_freed) {
+ /* There is a last freed block, but it is not adjacent to the
+ ** block being freed by this call to free, so put the last
+ ** freed block into the free collection.
+ */
#ifdef HMM_AUDIT_FAIL
- AUDIT_BLOCK(fwd_head_ptr)
+ AUDIT_BLOCK(desc->last_freed)
#endif
- if (fwd_head_ptr == (head_record *)(desc->last_freed))
- {
- desc->last_freed = 0;
- coalesce = 1;
- }
- else if (IS_BLOCK_ALLOCATED(fwd_head_ptr))
- coalesce = 0;
- else
- {
- U(out_of_free_collection)(desc, fwd_head_ptr);
- coalesce = 1;
- }
+ U(into_free_collection)(desc, (head_record *)(desc->last_freed));
+ }
- if (coalesce)
- {
- free_head_ptr->block_size += fwd_head_ptr->block_size;
-
- fwd_head_ptr =
- (head_record *) BAUS_FORWARD(
- fwd_head_ptr, BLOCK_BAUS(fwd_head_ptr));
-
- SET_PREV_BLOCK_BAUS(fwd_head_ptr, BLOCK_BAUS(free_head_ptr))
-
- if (fwd_head_ptr->block_size == 0)
- {
- /* Coalesced block to be freed is last block before dummy
- ** end-of-chunk block. */
- desc->end_of_shrinkable_chunk =
- BAUS_FORWARD(fwd_head_ptr, DUMMY_END_BLOCK_BAUS);
- desc->num_baus_can_shrink = BLOCK_BAUS(free_head_ptr);
-
- if (PREV_BLOCK_BAUS(free_head_ptr) == 0)
- /* Free block is the entire chunk, so shrinking can
- ** eliminate entire chunk including dummy end block. */
- desc->num_baus_can_shrink += DUMMY_END_BLOCK_BAUS;
- }
- }
- }
-
- if (desc->last_freed)
- {
- /* There is a last freed block, but it is not adjacent to the
- ** block being freed by this call to free, so put the last
- ** freed block into the free collection.
- */
-
-#ifdef HMM_AUDIT_FAIL
- AUDIT_BLOCK(desc->last_freed)
-#endif
-
- U(into_free_collection)(desc, (head_record *)(desc->last_freed));
- }
-
- desc->last_freed = free_head_ptr;
+ desc->last_freed = free_head_ptr;
}
-void U(new_chunk)(U(descriptor) *desc, void *start, U(size_bau) n_baus)
-{
+void U(new_chunk)(U(descriptor) *desc, void *start, U(size_bau) n_baus) {
#ifdef HMM_AUDIT_FAIL
- if (desc->avl_tree_root)
- /* Audit root block in AVL tree. */
- AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
+ if (desc->avl_tree_root)
+ /* Audit root block in AVL tree. */
+ AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
#endif
#undef HEAD_PTR
#define HEAD_PTR ((head_record *) start)
- /* Make the chunk one big free block followed by a dummy end block.
- */
+ /* Make the chunk one big free block followed by a dummy end block.
+ */
- n_baus -= DUMMY_END_BLOCK_BAUS;
+ n_baus -= DUMMY_END_BLOCK_BAUS;
- HEAD_PTR->previous_block_size = 0;
- HEAD_PTR->block_size = n_baus;
+ HEAD_PTR->previous_block_size = 0;
+ HEAD_PTR->block_size = n_baus;
- U(into_free_collection)(desc, HEAD_PTR);
+ U(into_free_collection)(desc, HEAD_PTR);
- /* Set up the dummy end block. */
- start = BAUS_FORWARD(start, n_baus);
- HEAD_PTR->previous_block_size = n_baus;
- HEAD_PTR->block_size = 0;
+ /* Set up the dummy end block. */
+ start = BAUS_FORWARD(start, n_baus);
+ HEAD_PTR->previous_block_size = n_baus;
+ HEAD_PTR->block_size = 0;
#undef HEAD_PTR
}
@@ -345,12 +317,11 @@
/* Function that does audit fail actions defined my preprocessor symbol,
** and returns a dummy integer value.
*/
-int U(audit_block_fail_dummy_return)(void)
-{
- HMM_AUDIT_FAIL
+int U(audit_block_fail_dummy_return)(void) {
+ HMM_AUDIT_FAIL
- /* Dummy return. */
- return(0);
+ /* Dummy return. */
+ return(0);
}
#endif
@@ -372,9 +343,9 @@
*/
#define AVL_GET_LESS(H, ACCESS) \
- (((ACCESS) ? AUDIT_BLOCK_AS_EXPR(PTR_REC_TO_HEAD(H)) : 0), (H)->self)
+ (((ACCESS) ? AUDIT_BLOCK_AS_EXPR(PTR_REC_TO_HEAD(H)) : 0), (H)->self)
#define AVL_GET_GREATER(H, ACCESS) \
- (((ACCESS) ? AUDIT_BLOCK_AS_EXPR(PTR_REC_TO_HEAD(H)) : 0), (H)->prev)
+ (((ACCESS) ? AUDIT_BLOCK_AS_EXPR(PTR_REC_TO_HEAD(H)) : 0), (H)->prev)
#else
@@ -396,39 +367,39 @@
*/
#define AVL_GET_BALANCE_FACTOR(H) \
- ((((head_record *) (PTR_REC_TO_HEAD(H)))->block_size & \
- HIGH_BIT_BAU_SIZE) ? \
- (((head_record *) (PTR_REC_TO_HEAD(H)))->previous_block_size & \
- HIGH_BIT_BAU_SIZE ? 0 : -1) : 1)
+ ((((head_record *) (PTR_REC_TO_HEAD(H)))->block_size & \
+ HIGH_BIT_BAU_SIZE) ? \
+ (((head_record *) (PTR_REC_TO_HEAD(H)))->previous_block_size & \
+ HIGH_BIT_BAU_SIZE ? 0 : -1) : 1)
#define AVL_SET_BALANCE_FACTOR(H, BF) \
- { \
- register head_record *p = \
- (head_record *) PTR_REC_TO_HEAD(H); \
- register int bal_f = (BF); \
- \
- if (bal_f <= 0) \
- p->block_size |= HIGH_BIT_BAU_SIZE; \
- else \
- p->block_size &= ~HIGH_BIT_BAU_SIZE; \
- if (bal_f >= 0) \
- p->previous_block_size |= HIGH_BIT_BAU_SIZE; \
- else \
- p->previous_block_size &= ~HIGH_BIT_BAU_SIZE; \
- }
+ { \
+ register head_record *p = \
+ (head_record *) PTR_REC_TO_HEAD(H); \
+ register int bal_f = (BF); \
+ \
+ if (bal_f <= 0) \
+ p->block_size |= HIGH_BIT_BAU_SIZE; \
+ else \
+ p->block_size &= ~HIGH_BIT_BAU_SIZE; \
+ if (bal_f >= 0) \
+ p->previous_block_size |= HIGH_BIT_BAU_SIZE; \
+ else \
+ p->previous_block_size &= ~HIGH_BIT_BAU_SIZE; \
+ }
#define COMPARE_KEY_KEY(K1, K2) ((K1) == (K2) ? 0 : ((K1) > (K2) ? 1 : -1))
#define AVL_COMPARE_KEY_NODE(K, H) \
- COMPARE_KEY_KEY(K, BLOCK_BAUS(PTR_REC_TO_HEAD(H)))
+ COMPARE_KEY_KEY(K, BLOCK_BAUS(PTR_REC_TO_HEAD(H)))
#define AVL_COMPARE_NODE_NODE(H1, H2) \
- COMPARE_KEY_KEY(BLOCK_BAUS(PTR_REC_TO_HEAD(H1)), \
- BLOCK_BAUS(PTR_REC_TO_HEAD(H2)))
+ COMPARE_KEY_KEY(BLOCK_BAUS(PTR_REC_TO_HEAD(H1)), \
+ BLOCK_BAUS(PTR_REC_TO_HEAD(H2)))
#define AVL_NULL ((ptr_record *) 0)
#define AVL_IMPL_MASK \
- ( AVL_IMPL_INSERT | AVL_IMPL_SEARCH | AVL_IMPL_REMOVE | AVL_IMPL_SUBST )
+ ( AVL_IMPL_INSERT | AVL_IMPL_SEARCH | AVL_IMPL_REMOVE | AVL_IMPL_SUBST )
#include "cavl_impl.h"
diff --git a/vpx_mem/memory_manager/hmm_dflt_abort.c b/vpx_mem/memory_manager/hmm_dflt_abort.c
index d92435c..51c3cc2 100644
--- a/vpx_mem/memory_manager/hmm_dflt_abort.c
+++ b/vpx_mem/memory_manager/hmm_dflt_abort.c
@@ -29,26 +29,25 @@
/* Print abort message, file and line. Terminate execution.
*/
-void hmm_dflt_abort(const char *file, const char *line)
-{
- /* Avoid use of printf(), which is more likely to use heap. */
+void hmm_dflt_abort(const char *file, const char *line) {
+ /* Avoid use of printf(), which is more likely to use heap. */
- if (entered)
+ if (entered)
- /* The standard I/O functions called a heap function and caused
- ** an indirect recursive call to this function. So we'll have
- ** to just exit without printing a message. */
- while (1);
-
- entered = 1;
-
- fputs("\n_abort - Heap corruption\n" "File: ", stderr);
- fputs(file, stderr);
- fputs(" Line: ", stderr);
- fputs(line, stderr);
- fputs("\n\n", stderr);
- fputs("hmm_dflt_abort: while(1)!!!\n", stderr);
- fflush(stderr);
-
+ /* The standard I/O functions called a heap function and caused
+ ** an indirect recursive call to this function. So we'll have
+ ** to just exit without printing a message. */
while (1);
+
+ entered = 1;
+
+ fputs("\n_abort - Heap corruption\n" "File: ", stderr);
+ fputs(file, stderr);
+ fputs(" Line: ", stderr);
+ fputs(line, stderr);
+ fputs("\n\n", stderr);
+ fputs("hmm_dflt_abort: while(1)!!!\n", stderr);
+ fflush(stderr);
+
+ while (1);
}
diff --git a/vpx_mem/memory_manager/hmm_grow.c b/vpx_mem/memory_manager/hmm_grow.c
index 9a4b6e4..0e86373 100644
--- a/vpx_mem/memory_manager/hmm_grow.c
+++ b/vpx_mem/memory_manager/hmm_grow.c
@@ -15,36 +15,35 @@
#include "hmm_intrnl.h"
-void U(grow_chunk)(U(descriptor) *desc, void *end, U(size_bau) n_baus)
-{
+void U(grow_chunk)(U(descriptor) *desc, void *end, U(size_bau) n_baus) {
#undef HEAD_PTR
#define HEAD_PTR ((head_record *) end)
- end = BAUS_BACKWARD(end, DUMMY_END_BLOCK_BAUS);
+ end = BAUS_BACKWARD(end, DUMMY_END_BLOCK_BAUS);
#ifdef HMM_AUDIT_FAIL
- if (HEAD_PTR->block_size != 0)
- /* Chunk does not have valid dummy end block. */
- HMM_AUDIT_FAIL
+ if (HEAD_PTR->block_size != 0)
+ /* Chunk does not have valid dummy end block. */
+ HMM_AUDIT_FAIL
#endif
- /* Create a new block that absorbs the old dummy end block. */
- HEAD_PTR->block_size = n_baus;
+ /* Create a new block that absorbs the old dummy end block. */
+ HEAD_PTR->block_size = n_baus;
- /* Set up the new dummy end block. */
- {
- head_record *dummy = (head_record *) BAUS_FORWARD(end, n_baus);
- dummy->previous_block_size = n_baus;
- dummy->block_size = 0;
- }
+ /* Set up the new dummy end block. */
+ {
+ head_record *dummy = (head_record *) BAUS_FORWARD(end, n_baus);
+ dummy->previous_block_size = n_baus;
+ dummy->block_size = 0;
+ }
- /* Simply free the new block, allowing it to coalesce with any
- ** free block at that was the last block in the chunk prior to
- ** growth.
- */
- U(free)(desc, HEAD_TO_PTR_REC(end));
+ /* Simply free the new block, allowing it to coalesce with any
+ ** free block at that was the last block in the chunk prior to
+ ** growth.
+ */
+ U(free)(desc, HEAD_TO_PTR_REC(end));
#undef HEAD_PTR
}
diff --git a/vpx_mem/memory_manager/hmm_largest.c b/vpx_mem/memory_manager/hmm_largest.c
index c3c6f2c..192758d 100644
--- a/vpx_mem/memory_manager/hmm_largest.c
+++ b/vpx_mem/memory_manager/hmm_largest.c
@@ -15,46 +15,43 @@
#include "hmm_intrnl.h"
-U(size_aau) U(largest_available)(U(descriptor) *desc)
-{
- U(size_bau) largest;
+U(size_aau) U(largest_available)(U(descriptor) *desc) {
+ U(size_bau) largest;
- if (!(desc->avl_tree_root))
- largest = 0;
- else
- {
+ if (!(desc->avl_tree_root))
+ largest = 0;
+ else {
#ifdef HMM_AUDIT_FAIL
- /* Audit root block in AVL tree. */
- AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
+ /* Audit root block in AVL tree. */
+ AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
#endif
- largest =
- BLOCK_BAUS(
- PTR_REC_TO_HEAD(
- U(avl_search)(
- (U(avl_avl) *) & (desc->avl_tree_root),
- (U(size_bau)) ~(U(size_bau)) 0, AVL_LESS)));
- }
+ largest =
+ BLOCK_BAUS(
+ PTR_REC_TO_HEAD(
+ U(avl_search)(
+ (U(avl_avl) *) & (desc->avl_tree_root),
+ (U(size_bau)) ~(U(size_bau)) 0, AVL_LESS)));
+ }
- if (desc->last_freed)
- {
- /* Size of last freed block. */
- register U(size_bau) lf_size;
+ if (desc->last_freed) {
+ /* Size of last freed block. */
+ register U(size_bau) lf_size;
#ifdef HMM_AUDIT_FAIL
- AUDIT_BLOCK(desc->last_freed)
+ AUDIT_BLOCK(desc->last_freed)
#endif
- lf_size = BLOCK_BAUS(desc->last_freed);
+ lf_size = BLOCK_BAUS(desc->last_freed);
- if (lf_size > largest)
- largest = lf_size;
- }
+ if (lf_size > largest)
+ largest = lf_size;
+ }
- /* Convert largest size to AAUs and subract head size leaving payload
- ** size.
- */
- return(largest ?
- ((largest * ((U(size_aau)) HMM_BLOCK_ALIGN_UNIT)) - HEAD_AAUS) :
- 0);
+ /* Convert largest size to AAUs and subract head size leaving payload
+ ** size.
+ */
+ return(largest ?
+ ((largest * ((U(size_aau)) HMM_BLOCK_ALIGN_UNIT)) - HEAD_AAUS) :
+ 0);
}
diff --git a/vpx_mem/memory_manager/hmm_resize.c b/vpx_mem/memory_manager/hmm_resize.c
index f90da96..baa5a8f 100644
--- a/vpx_mem/memory_manager/hmm_resize.c
+++ b/vpx_mem/memory_manager/hmm_resize.c
@@ -15,105 +15,100 @@
#include "hmm_intrnl.h"
-int U(resize)(U(descriptor) *desc, void *mem, U(size_aau) n)
-{
- U(size_aau) i;
- head_record *next_head_ptr;
- head_record *head_ptr = PTR_REC_TO_HEAD(mem);
+int U(resize)(U(descriptor) *desc, void *mem, U(size_aau) n) {
+ U(size_aau) i;
+ head_record *next_head_ptr;
+ head_record *head_ptr = PTR_REC_TO_HEAD(mem);
- /* Flag. */
- int next_block_free;
+ /* Flag. */
+ int next_block_free;
- /* Convert n from desired block size in AAUs to BAUs. */
- n += HEAD_AAUS;
- n = DIV_ROUND_UP(n, HMM_BLOCK_ALIGN_UNIT);
+ /* Convert n from desired block size in AAUs to BAUs. */
+ n += HEAD_AAUS;
+ n = DIV_ROUND_UP(n, HMM_BLOCK_ALIGN_UNIT);
- if (n < MIN_BLOCK_BAUS)
- n = MIN_BLOCK_BAUS;
+ if (n < MIN_BLOCK_BAUS)
+ n = MIN_BLOCK_BAUS;
#ifdef HMM_AUDIT_FAIL
- AUDIT_BLOCK(head_ptr)
+ AUDIT_BLOCK(head_ptr)
- if (!IS_BLOCK_ALLOCATED(head_ptr))
- HMM_AUDIT_FAIL
+ if (!IS_BLOCK_ALLOCATED(head_ptr))
+ HMM_AUDIT_FAIL
- if (desc->avl_tree_root)
- AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
+ if (desc->avl_tree_root)
+ AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
#endif
- i = head_ptr->block_size;
+ i = head_ptr->block_size;
+
+ next_head_ptr =
+ (head_record *) BAUS_FORWARD(head_ptr, head_ptr->block_size);
+
+ next_block_free =
+ (next_head_ptr == desc->last_freed) ||
+ !IS_BLOCK_ALLOCATED(next_head_ptr);
+
+ if (next_block_free)
+ /* Block can expand into next free block. */
+ i += BLOCK_BAUS(next_head_ptr);
+
+ if (n > i)
+ /* Not enough room for block to expand. */
+ return(-1);
+
+ if (next_block_free) {
+#ifdef HMM_AUDIT_FAIL
+ AUDIT_BLOCK(next_head_ptr)
+#endif
+
+ if (next_head_ptr == desc->last_freed)
+ desc->last_freed = 0;
+ else
+ U(out_of_free_collection)(desc, next_head_ptr);
next_head_ptr =
- (head_record *) BAUS_FORWARD(head_ptr, head_ptr->block_size);
+ (head_record *) BAUS_FORWARD(head_ptr, (U(size_bau)) i);
+ }
- next_block_free =
- (next_head_ptr == desc->last_freed) ||
- !IS_BLOCK_ALLOCATED(next_head_ptr);
+ /* Set i to number of "extra" BAUs. */
+ i -= n;
- if (next_block_free)
- /* Block can expand into next free block. */
- i += BLOCK_BAUS(next_head_ptr);
+ if (i < MIN_BLOCK_BAUS)
+ /* Not enough extra BAUs to be a block on their own, so just keep them
+ ** in the block being resized.
+ */
+ {
+ n += i;
+ i = n;
+ } else {
+ /* There are enough "leftover" BAUs in the next block to
+ ** form a remainder block. */
- if (n > i)
- /* Not enough room for block to expand. */
- return(-1);
+ head_record *rem_head_ptr;
- if (next_block_free)
- {
+ rem_head_ptr = (head_record *) BAUS_FORWARD(head_ptr, n);
+
+ rem_head_ptr->previous_block_size = (U(size_bau)) n;
+ rem_head_ptr->block_size = (U(size_bau)) i;
+
+ if (desc->last_freed) {
#ifdef HMM_AUDIT_FAIL
- AUDIT_BLOCK(next_head_ptr)
+ AUDIT_BLOCK(desc->last_freed)
#endif
- if (next_head_ptr == desc->last_freed)
- desc->last_freed = 0;
- else
- U(out_of_free_collection)(desc, next_head_ptr);
+ U(into_free_collection)(desc, (head_record *)(desc->last_freed));
- next_head_ptr =
- (head_record *) BAUS_FORWARD(head_ptr, (U(size_bau)) i);
+ desc->last_freed = 0;
}
- /* Set i to number of "extra" BAUs. */
- i -= n;
+ desc->last_freed = rem_head_ptr;
+ }
- if (i < MIN_BLOCK_BAUS)
- /* Not enough extra BAUs to be a block on their own, so just keep them
- ** in the block being resized.
- */
- {
- n += i;
- i = n;
- }
- else
- {
- /* There are enough "leftover" BAUs in the next block to
- ** form a remainder block. */
+ head_ptr->block_size = (U(size_bau)) n;
+ next_head_ptr->previous_block_size = (U(size_bau)) i;
- head_record *rem_head_ptr;
-
- rem_head_ptr = (head_record *) BAUS_FORWARD(head_ptr, n);
-
- rem_head_ptr->previous_block_size = (U(size_bau)) n;
- rem_head_ptr->block_size = (U(size_bau)) i;
-
- if (desc->last_freed)
- {
-#ifdef HMM_AUDIT_FAIL
- AUDIT_BLOCK(desc->last_freed)
-#endif
-
- U(into_free_collection)(desc, (head_record *)(desc->last_freed));
-
- desc->last_freed = 0;
- }
-
- desc->last_freed = rem_head_ptr;
- }
-
- head_ptr->block_size = (U(size_bau)) n;
- next_head_ptr->previous_block_size = (U(size_bau)) i;
-
- return(0);
+ return(0);
}
diff --git a/vpx_mem/memory_manager/hmm_shrink.c b/vpx_mem/memory_manager/hmm_shrink.c
index 78fe268..f80aeea 100644
--- a/vpx_mem/memory_manager/hmm_shrink.c
+++ b/vpx_mem/memory_manager/hmm_shrink.c
@@ -15,97 +15,89 @@
#include "hmm_intrnl.h"
-void U(shrink_chunk)(U(descriptor) *desc, U(size_bau) n_baus_to_shrink)
-{
- head_record *dummy_end_block = (head_record *)
- BAUS_BACKWARD(desc->end_of_shrinkable_chunk, DUMMY_END_BLOCK_BAUS);
+void U(shrink_chunk)(U(descriptor) *desc, U(size_bau) n_baus_to_shrink) {
+ head_record *dummy_end_block = (head_record *)
+ BAUS_BACKWARD(desc->end_of_shrinkable_chunk, DUMMY_END_BLOCK_BAUS);
#ifdef HMM_AUDIT_FAIL
- if (dummy_end_block->block_size != 0)
- /* Chunk does not have valid dummy end block. */
- HMM_AUDIT_FAIL
+ if (dummy_end_block->block_size != 0)
+ /* Chunk does not have valid dummy end block. */
+ HMM_AUDIT_FAIL
#endif
- if (n_baus_to_shrink)
- {
- head_record *last_block = (head_record *)
- BAUS_BACKWARD(
- dummy_end_block, dummy_end_block->previous_block_size);
+ if (n_baus_to_shrink) {
+ head_record *last_block = (head_record *)
+ BAUS_BACKWARD(
+ dummy_end_block, dummy_end_block->previous_block_size);
#ifdef HMM_AUDIT_FAIL
- AUDIT_BLOCK(last_block)
+ AUDIT_BLOCK(last_block)
#endif
- if (last_block == desc->last_freed)
- {
- U(size_bau) bs = BLOCK_BAUS(last_block);
+ if (last_block == desc->last_freed) {
+ U(size_bau) bs = BLOCK_BAUS(last_block);
- /* Chunk will not be shrunk out of existence if
- ** 1. There is at least one allocated block in the chunk
- ** and the amount to shrink is exactly the size of the
- ** last block, OR
- ** 2. After the last block is shrunk, there will be enough
- ** BAUs left in it to form a minimal size block. */
- int chunk_will_survive =
- (PREV_BLOCK_BAUS(last_block) && (n_baus_to_shrink == bs)) ||
- (n_baus_to_shrink <= (U(size_bau))(bs - MIN_BLOCK_BAUS));
+ /* Chunk will not be shrunk out of existence if
+ ** 1. There is at least one allocated block in the chunk
+ ** and the amount to shrink is exactly the size of the
+ ** last block, OR
+ ** 2. After the last block is shrunk, there will be enough
+ ** BAUs left in it to form a minimal size block. */
+ int chunk_will_survive =
+ (PREV_BLOCK_BAUS(last_block) && (n_baus_to_shrink == bs)) ||
+ (n_baus_to_shrink <= (U(size_bau))(bs - MIN_BLOCK_BAUS));
- if (chunk_will_survive ||
- (!PREV_BLOCK_BAUS(last_block) &&
- (n_baus_to_shrink ==
- (U(size_bau))(bs + DUMMY_END_BLOCK_BAUS))))
- {
- desc->last_freed = 0;
+ if (chunk_will_survive ||
+ (!PREV_BLOCK_BAUS(last_block) &&
+ (n_baus_to_shrink ==
+ (U(size_bau))(bs + DUMMY_END_BLOCK_BAUS)))) {
+ desc->last_freed = 0;
- if (chunk_will_survive)
- {
- bs -= n_baus_to_shrink;
+ if (chunk_will_survive) {
+ bs -= n_baus_to_shrink;
- if (bs)
- {
- /* The last (non-dummy) block was not completely
- ** eliminated by the shrink. */
+ if (bs) {
+ /* The last (non-dummy) block was not completely
+ ** eliminated by the shrink. */
- last_block->block_size = bs;
+ last_block->block_size = bs;
- /* Create new dummy end record.
- */
- dummy_end_block =
- (head_record *) BAUS_FORWARD(last_block, bs);
- dummy_end_block->previous_block_size = bs;
- dummy_end_block->block_size = 0;
+ /* Create new dummy end record.
+ */
+ dummy_end_block =
+ (head_record *) BAUS_FORWARD(last_block, bs);
+ dummy_end_block->previous_block_size = bs;
+ dummy_end_block->block_size = 0;
#ifdef HMM_AUDIT_FAIL
- if (desc->avl_tree_root)
- AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
+ if (desc->avl_tree_root)
+ AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
#endif
- U(into_free_collection)(desc, last_block);
- }
- else
- {
- /* The last (non-dummy) block was completely
- ** eliminated by the shrink. Make its head
- ** the new dummy end block.
- */
- last_block->block_size = 0;
- last_block->previous_block_size &= ~HIGH_BIT_BAU_SIZE;
- }
- }
- }
-
-#ifdef HMM_AUDIT_FAIL
- else
- HMM_AUDIT_FAIL
-#endif
- }
-
-#ifdef HMM_AUDIT_FAIL
- else
- HMM_AUDIT_FAIL
-#endif
+ U(into_free_collection)(desc, last_block);
+ } else {
+ /* The last (non-dummy) block was completely
+ ** eliminated by the shrink. Make its head
+ ** the new dummy end block.
+ */
+ last_block->block_size = 0;
+ last_block->previous_block_size &= ~HIGH_BIT_BAU_SIZE;
}
+ }
+ }
+
+#ifdef HMM_AUDIT_FAIL
+ else
+ HMM_AUDIT_FAIL
+#endif
+ }
+
+#ifdef HMM_AUDIT_FAIL
+ else
+ HMM_AUDIT_FAIL
+#endif
+ }
}
diff --git a/vpx_mem/memory_manager/hmm_true.c b/vpx_mem/memory_manager/hmm_true.c
index 3f7be8f..4428c3e 100644
--- a/vpx_mem/memory_manager/hmm_true.c
+++ b/vpx_mem/memory_manager/hmm_true.c
@@ -15,18 +15,17 @@
#include "hmm_intrnl.h"
-U(size_aau) U(true_size)(void *payload_ptr)
-{
- register head_record *head_ptr = PTR_REC_TO_HEAD(payload_ptr);
+U(size_aau) U(true_size)(void *payload_ptr) {
+ register head_record *head_ptr = PTR_REC_TO_HEAD(payload_ptr);
#ifdef HMM_AUDIT_FAIL
- AUDIT_BLOCK(head_ptr)
+ AUDIT_BLOCK(head_ptr)
#endif
- /* Convert block size from BAUs to AAUs. Subtract head size, leaving
- ** payload size.
- */
- return(
- (BLOCK_BAUS(head_ptr) * ((U(size_aau)) HMM_BLOCK_ALIGN_UNIT)) -
- HEAD_AAUS);
+ /* Convert block size from BAUs to AAUs. Subtract head size, leaving
+ ** payload size.
+ */
+ return(
+ (BLOCK_BAUS(head_ptr) * ((U(size_aau)) HMM_BLOCK_ALIGN_UNIT)) -
+ HEAD_AAUS);
}
diff --git a/vpx_mem/memory_manager/include/cavl_if.h b/vpx_mem/memory_manager/include/cavl_if.h
index 1b2c9b7..ec6e525 100644
--- a/vpx_mem/memory_manager/include/cavl_if.h
+++ b/vpx_mem/memory_manager/include/cavl_if.h
@@ -32,13 +32,12 @@
#ifndef AVL_SEARCH_TYPE_DEFINED_
#define AVL_SEARCH_TYPE_DEFINED_
-typedef enum
-{
- AVL_EQUAL = 1,
- AVL_LESS = 2,
- AVL_GREATER = 4,
- AVL_LESS_EQUAL = AVL_EQUAL | AVL_LESS,
- AVL_GREATER_EQUAL = AVL_EQUAL | AVL_GREATER
+typedef enum {
+ AVL_EQUAL = 1,
+ AVL_LESS = 2,
+ AVL_GREATER = 4,
+ AVL_LESS_EQUAL = AVL_EQUAL | AVL_LESS,
+ AVL_GREATER_EQUAL = AVL_EQUAL | AVL_GREATER
}
avl_search_type;
@@ -75,15 +74,14 @@
#endif
-typedef struct
-{
+typedef struct {
#ifdef AVL_INSIDE_STRUCT
- AVL_INSIDE_STRUCT
+ AVL_INSIDE_STRUCT
#endif
- AVL_HANDLE root;
+ AVL_HANDLE root;
}
L_(avl);
@@ -108,7 +106,7 @@
#ifdef AVL_BUILD_ITER_TYPE
L_SC int L_(build)(
- L_(avl) *tree, AVL_BUILD_ITER_TYPE p, L_SIZE num_nodes);
+ L_(avl) *tree, AVL_BUILD_ITER_TYPE p, L_SIZE num_nodes);
#endif
@@ -153,7 +151,7 @@
/* Maximum depth may be more than number of bits in a long. */
#define L_BIT_ARR_DEFN(NAME) \
- unsigned long NAME[((AVL_MAX_DEPTH) + L_LONG_BIT - 1) / L_LONG_BIT];
+ unsigned long NAME[((AVL_MAX_DEPTH) + L_LONG_BIT - 1) / L_LONG_BIT];
#else
@@ -164,29 +162,28 @@
#endif
/* Iterator structure. */
-typedef struct
-{
- /* Tree being iterated over. */
- L_(avl) *tree_;
+typedef struct {
+ /* Tree being iterated over. */
+ L_(avl) *tree_;
- /* Records a path into the tree. If bit n is true, indicates
- ** take greater branch from the nth node in the path, otherwise
- ** take the less branch. bit 0 gives branch from root, and
- ** so on. */
- L_BIT_ARR_DEFN(branch)
+ /* Records a path into the tree. If bit n is true, indicates
+ ** take greater branch from the nth node in the path, otherwise
+ ** take the less branch. bit 0 gives branch from root, and
+ ** so on. */
+ L_BIT_ARR_DEFN(branch)
- /* Zero-based depth of path into tree. */
- unsigned depth;
+ /* Zero-based depth of path into tree. */
+ unsigned depth;
- /* Handles of nodes in path from root to current node (returned by *). */
- AVL_HANDLE path_h[(AVL_MAX_DEPTH) - 1];
+ /* Handles of nodes in path from root to current node (returned by *). */
+ AVL_HANDLE path_h[(AVL_MAX_DEPTH) - 1];
}
L_(iter);
/* Iterator function prototypes. */
L_SC void L_(start_iter)(
- L_(avl) *tree, L_(iter) *iter, AVL_KEY k, avl_search_type st);
+ L_(avl) *tree, L_(iter) *iter, AVL_KEY k, avl_search_type st);
L_SC void L_(start_iter_least)(L_(avl) *tree, L_(iter) *iter);
diff --git a/vpx_mem/memory_manager/include/cavl_impl.h b/vpx_mem/memory_manager/include/cavl_impl.h
index 5e165dd..cf7deb7 100644
--- a/vpx_mem/memory_manager/include/cavl_impl.h
+++ b/vpx_mem/memory_manager/include/cavl_impl.h
@@ -110,16 +110,16 @@
#define L_BIT_ARR_DEFN(NAME) unsigned long NAME[L_BIT_ARR_LONGS];
#define L_BIT_ARR_VAL(BIT_ARR, BIT_NUM) \
- ((BIT_ARR)[(BIT_NUM) / L_LONG_BIT] & (1L << ((BIT_NUM) % L_LONG_BIT)))
+ ((BIT_ARR)[(BIT_NUM) / L_LONG_BIT] & (1L << ((BIT_NUM) % L_LONG_BIT)))
#define L_BIT_ARR_0(BIT_ARR, BIT_NUM) \
- (BIT_ARR)[(BIT_NUM) / L_LONG_BIT] &= ~(1L << ((BIT_NUM) % L_LONG_BIT));
+ (BIT_ARR)[(BIT_NUM) / L_LONG_BIT] &= ~(1L << ((BIT_NUM) % L_LONG_BIT));
#define L_BIT_ARR_1(BIT_ARR, BIT_NUM) \
- (BIT_ARR)[(BIT_NUM) / L_LONG_BIT] |= 1L << ((BIT_NUM) % L_LONG_BIT);
+ (BIT_ARR)[(BIT_NUM) / L_LONG_BIT] |= 1L << ((BIT_NUM) % L_LONG_BIT);
#define L_BIT_ARR_ALL(BIT_ARR, BIT_VAL) \
- { int i = L_BIT_ARR_LONGS; do (BIT_ARR)[--i] = 0L - (BIT_VAL); while(i); }
+ { int i = L_BIT_ARR_LONGS; do (BIT_ARR)[--i] = 0L - (BIT_VAL); while(i); }
#else /* The bit array can definitely fit in one long */
@@ -138,7 +138,7 @@
#ifdef AVL_READ_ERRORS_HAPPEN
#define L_CHECK_READ_ERROR(ERROR_RETURN) \
- { if (AVL_READ_ERROR) return(ERROR_RETURN); }
+ { if (AVL_READ_ERROR) return(ERROR_RETURN); }
#else
@@ -179,18 +179,16 @@
#if (L_IMPL_MASK & AVL_IMPL_INIT)
-L_SC void L_(init)(L_(avl) *l_tree)
-{
- l_tree->root = AVL_NULL;
+L_SC void L_(init)(L_(avl) *l_tree) {
+ l_tree->root = AVL_NULL;
}
#endif
#if (L_IMPL_MASK & AVL_IMPL_IS_EMPTY)
-L_SC int L_(is_empty)(L_(avl) *l_tree)
-{
- return(l_tree->root == AVL_NULL);
+L_SC int L_(is_empty)(L_(avl) *l_tree) {
+ return(l_tree->root == AVL_NULL);
}
#endif
@@ -201,358 +199,305 @@
/* Balances subtree, returns handle of root node of subtree after balancing.
*/
-L_SC AVL_HANDLE L_(balance)(L_BALANCE_PARAM_DECL_PREFIX AVL_HANDLE bal_h)
-{
- AVL_HANDLE deep_h;
+L_SC AVL_HANDLE L_(balance)(L_BALANCE_PARAM_DECL_PREFIX AVL_HANDLE bal_h) {
+ AVL_HANDLE deep_h;
- /* Either the "greater than" or the "less than" subtree of
- ** this node has to be 2 levels deeper (or else it wouldn't
- ** need balancing).
- */
- if (AVL_GET_BALANCE_FACTOR(bal_h) > 0)
- {
- /* "Greater than" subtree is deeper. */
+ /* Either the "greater than" or the "less than" subtree of
+ ** this node has to be 2 levels deeper (or else it wouldn't
+ ** need balancing).
+ */
+ if (AVL_GET_BALANCE_FACTOR(bal_h) > 0) {
+ /* "Greater than" subtree is deeper. */
- deep_h = AVL_GET_GREATER(bal_h, 1);
+ deep_h = AVL_GET_GREATER(bal_h, 1);
- L_CHECK_READ_ERROR(AVL_NULL)
+ L_CHECK_READ_ERROR(AVL_NULL)
- if (AVL_GET_BALANCE_FACTOR(deep_h) < 0)
- {
- int bf;
+ if (AVL_GET_BALANCE_FACTOR(deep_h) < 0) {
+ int bf;
- AVL_HANDLE old_h = bal_h;
- bal_h = AVL_GET_LESS(deep_h, 1);
- L_CHECK_READ_ERROR(AVL_NULL)
- AVL_SET_GREATER(old_h, AVL_GET_LESS(bal_h, 1))
- AVL_SET_LESS(deep_h, AVL_GET_GREATER(bal_h, 1))
- AVL_SET_LESS(bal_h, old_h)
- AVL_SET_GREATER(bal_h, deep_h)
+ AVL_HANDLE old_h = bal_h;
+ bal_h = AVL_GET_LESS(deep_h, 1);
+ L_CHECK_READ_ERROR(AVL_NULL)
+ AVL_SET_GREATER(old_h, AVL_GET_LESS(bal_h, 1))
+ AVL_SET_LESS(deep_h, AVL_GET_GREATER(bal_h, 1))
+ AVL_SET_LESS(bal_h, old_h)
+ AVL_SET_GREATER(bal_h, deep_h)
- bf = AVL_GET_BALANCE_FACTOR(bal_h);
+ bf = AVL_GET_BALANCE_FACTOR(bal_h);
- if (bf != 0)
- {
- if (bf > 0)
- {
- AVL_SET_BALANCE_FACTOR(old_h, -1)
- AVL_SET_BALANCE_FACTOR(deep_h, 0)
- }
- else
- {
- AVL_SET_BALANCE_FACTOR(deep_h, 1)
- AVL_SET_BALANCE_FACTOR(old_h, 0)
- }
-
- AVL_SET_BALANCE_FACTOR(bal_h, 0)
- }
- else
- {
- AVL_SET_BALANCE_FACTOR(old_h, 0)
- AVL_SET_BALANCE_FACTOR(deep_h, 0)
- }
+ if (bf != 0) {
+ if (bf > 0) {
+ AVL_SET_BALANCE_FACTOR(old_h, -1)
+ AVL_SET_BALANCE_FACTOR(deep_h, 0)
+ } else {
+ AVL_SET_BALANCE_FACTOR(deep_h, 1)
+ AVL_SET_BALANCE_FACTOR(old_h, 0)
}
- else
- {
- AVL_SET_GREATER(bal_h, AVL_GET_LESS(deep_h, 0))
- AVL_SET_LESS(deep_h, bal_h)
- if (AVL_GET_BALANCE_FACTOR(deep_h) == 0)
- {
- AVL_SET_BALANCE_FACTOR(deep_h, -1)
- AVL_SET_BALANCE_FACTOR(bal_h, 1)
- }
- else
- {
- AVL_SET_BALANCE_FACTOR(deep_h, 0)
- AVL_SET_BALANCE_FACTOR(bal_h, 0)
- }
+ AVL_SET_BALANCE_FACTOR(bal_h, 0)
+ } else {
+ AVL_SET_BALANCE_FACTOR(old_h, 0)
+ AVL_SET_BALANCE_FACTOR(deep_h, 0)
+ }
+ } else {
+ AVL_SET_GREATER(bal_h, AVL_GET_LESS(deep_h, 0))
+ AVL_SET_LESS(deep_h, bal_h)
- bal_h = deep_h;
- }
+ if (AVL_GET_BALANCE_FACTOR(deep_h) == 0) {
+ AVL_SET_BALANCE_FACTOR(deep_h, -1)
+ AVL_SET_BALANCE_FACTOR(bal_h, 1)
+ } else {
+ AVL_SET_BALANCE_FACTOR(deep_h, 0)
+ AVL_SET_BALANCE_FACTOR(bal_h, 0)
+ }
+
+ bal_h = deep_h;
}
- else
- {
- /* "Less than" subtree is deeper. */
+ } else {
+ /* "Less than" subtree is deeper. */
- deep_h = AVL_GET_LESS(bal_h, 1);
- L_CHECK_READ_ERROR(AVL_NULL)
+ deep_h = AVL_GET_LESS(bal_h, 1);
+ L_CHECK_READ_ERROR(AVL_NULL)
- if (AVL_GET_BALANCE_FACTOR(deep_h) > 0)
- {
- int bf;
- AVL_HANDLE old_h = bal_h;
- bal_h = AVL_GET_GREATER(deep_h, 1);
- L_CHECK_READ_ERROR(AVL_NULL)
- AVL_SET_LESS(old_h, AVL_GET_GREATER(bal_h, 0))
- AVL_SET_GREATER(deep_h, AVL_GET_LESS(bal_h, 0))
- AVL_SET_GREATER(bal_h, old_h)
- AVL_SET_LESS(bal_h, deep_h)
+ if (AVL_GET_BALANCE_FACTOR(deep_h) > 0) {
+ int bf;
+ AVL_HANDLE old_h = bal_h;
+ bal_h = AVL_GET_GREATER(deep_h, 1);
+ L_CHECK_READ_ERROR(AVL_NULL)
+ AVL_SET_LESS(old_h, AVL_GET_GREATER(bal_h, 0))
+ AVL_SET_GREATER(deep_h, AVL_GET_LESS(bal_h, 0))
+ AVL_SET_GREATER(bal_h, old_h)
+ AVL_SET_LESS(bal_h, deep_h)
- bf = AVL_GET_BALANCE_FACTOR(bal_h);
+ bf = AVL_GET_BALANCE_FACTOR(bal_h);
- if (bf != 0)
- {
- if (bf < 0)
- {
- AVL_SET_BALANCE_FACTOR(old_h, 1)
- AVL_SET_BALANCE_FACTOR(deep_h, 0)
- }
- else
- {
- AVL_SET_BALANCE_FACTOR(deep_h, -1)
- AVL_SET_BALANCE_FACTOR(old_h, 0)
- }
-
- AVL_SET_BALANCE_FACTOR(bal_h, 0)
- }
- else
- {
- AVL_SET_BALANCE_FACTOR(old_h, 0)
- AVL_SET_BALANCE_FACTOR(deep_h, 0)
- }
+ if (bf != 0) {
+ if (bf < 0) {
+ AVL_SET_BALANCE_FACTOR(old_h, 1)
+ AVL_SET_BALANCE_FACTOR(deep_h, 0)
+ } else {
+ AVL_SET_BALANCE_FACTOR(deep_h, -1)
+ AVL_SET_BALANCE_FACTOR(old_h, 0)
}
- else
- {
- AVL_SET_LESS(bal_h, AVL_GET_GREATER(deep_h, 0))
- AVL_SET_GREATER(deep_h, bal_h)
- if (AVL_GET_BALANCE_FACTOR(deep_h) == 0)
- {
- AVL_SET_BALANCE_FACTOR(deep_h, 1)
- AVL_SET_BALANCE_FACTOR(bal_h, -1)
- }
- else
- {
- AVL_SET_BALANCE_FACTOR(deep_h, 0)
- AVL_SET_BALANCE_FACTOR(bal_h, 0)
- }
+ AVL_SET_BALANCE_FACTOR(bal_h, 0)
+ } else {
+ AVL_SET_BALANCE_FACTOR(old_h, 0)
+ AVL_SET_BALANCE_FACTOR(deep_h, 0)
+ }
+ } else {
+ AVL_SET_LESS(bal_h, AVL_GET_GREATER(deep_h, 0))
+ AVL_SET_GREATER(deep_h, bal_h)
- bal_h = deep_h;
- }
+ if (AVL_GET_BALANCE_FACTOR(deep_h) == 0) {
+ AVL_SET_BALANCE_FACTOR(deep_h, 1)
+ AVL_SET_BALANCE_FACTOR(bal_h, -1)
+ } else {
+ AVL_SET_BALANCE_FACTOR(deep_h, 0)
+ AVL_SET_BALANCE_FACTOR(bal_h, 0)
+ }
+
+ bal_h = deep_h;
}
+ }
- return(bal_h);
+ return(bal_h);
}
-L_SC AVL_HANDLE L_(insert)(L_(avl) *l_tree, AVL_HANDLE h)
-{
- AVL_SET_LESS(h, AVL_NULL)
- AVL_SET_GREATER(h, AVL_NULL)
- AVL_SET_BALANCE_FACTOR(h, 0)
+L_SC AVL_HANDLE L_(insert)(L_(avl) *l_tree, AVL_HANDLE h) {
+ AVL_SET_LESS(h, AVL_NULL)
+ AVL_SET_GREATER(h, AVL_NULL)
+ AVL_SET_BALANCE_FACTOR(h, 0)
- if (l_tree->root == AVL_NULL)
- l_tree->root = h;
- else
- {
- /* Last unbalanced node encountered in search for insertion point. */
- AVL_HANDLE unbal = AVL_NULL;
- /* Parent of last unbalanced node. */
- AVL_HANDLE parent_unbal = AVL_NULL;
- /* Balance factor of last unbalanced node. */
- int unbal_bf;
+ if (l_tree->root == AVL_NULL)
+ l_tree->root = h;
+ else {
+ /* Last unbalanced node encountered in search for insertion point. */
+ AVL_HANDLE unbal = AVL_NULL;
+ /* Parent of last unbalanced node. */
+ AVL_HANDLE parent_unbal = AVL_NULL;
+ /* Balance factor of last unbalanced node. */
+ int unbal_bf;
- /* Zero-based depth in tree. */
- unsigned depth = 0, unbal_depth = 0;
+ /* Zero-based depth in tree. */
+ unsigned depth = 0, unbal_depth = 0;
- /* Records a path into the tree. If bit n is true, indicates
- ** take greater branch from the nth node in the path, otherwise
- ** take the less branch. bit 0 gives branch from root, and
- ** so on. */
- L_BIT_ARR_DEFN(branch)
+ /* Records a path into the tree. If bit n is true, indicates
+ ** take greater branch from the nth node in the path, otherwise
+ ** take the less branch. bit 0 gives branch from root, and
+ ** so on. */
+ L_BIT_ARR_DEFN(branch)
- AVL_HANDLE hh = l_tree->root;
- AVL_HANDLE parent = AVL_NULL;
- int cmp;
+ AVL_HANDLE hh = l_tree->root;
+ AVL_HANDLE parent = AVL_NULL;
+ int cmp;
- do
- {
- if (AVL_GET_BALANCE_FACTOR(hh) != 0)
- {
- unbal = hh;
- parent_unbal = parent;
- unbal_depth = depth;
- }
+ do {
+ if (AVL_GET_BALANCE_FACTOR(hh) != 0) {
+ unbal = hh;
+ parent_unbal = parent;
+ unbal_depth = depth;
+ }
- cmp = AVL_COMPARE_NODE_NODE(h, hh);
+ cmp = AVL_COMPARE_NODE_NODE(h, hh);
- if (cmp == 0)
- /* Duplicate key. */
- return(hh);
+ if (cmp == 0)
+ /* Duplicate key. */
+ return(hh);
- parent = hh;
+ parent = hh;
- if (cmp > 0)
- {
- hh = AVL_GET_GREATER(hh, 1);
- L_BIT_ARR_1(branch, depth)
- }
- else
- {
- hh = AVL_GET_LESS(hh, 1);
- L_BIT_ARR_0(branch, depth)
- }
+ if (cmp > 0) {
+ hh = AVL_GET_GREATER(hh, 1);
+ L_BIT_ARR_1(branch, depth)
+ } else {
+ hh = AVL_GET_LESS(hh, 1);
+ L_BIT_ARR_0(branch, depth)
+ }
- L_CHECK_READ_ERROR(AVL_NULL)
- depth++;
- }
- while (hh != AVL_NULL);
+ L_CHECK_READ_ERROR(AVL_NULL)
+ depth++;
+ } while (hh != AVL_NULL);
- /* Add node to insert as leaf of tree. */
- if (cmp < 0)
- AVL_SET_LESS(parent, h)
- else
- AVL_SET_GREATER(parent, h)
+ /* Add node to insert as leaf of tree. */
+ if (cmp < 0)
+ AVL_SET_LESS(parent, h)
+ else
+ AVL_SET_GREATER(parent, h)
- depth = unbal_depth;
+ depth = unbal_depth;
- if (unbal == AVL_NULL)
- hh = l_tree->root;
- else
- {
- cmp = L_BIT_ARR_VAL(branch, depth) ? 1 : -1;
- depth++;
- unbal_bf = AVL_GET_BALANCE_FACTOR(unbal);
+ if (unbal == AVL_NULL)
+ hh = l_tree->root;
+ else {
+ cmp = L_BIT_ARR_VAL(branch, depth) ? 1 : -1;
+ depth++;
+ unbal_bf = AVL_GET_BALANCE_FACTOR(unbal);
- if (cmp < 0)
- unbal_bf--;
- else /* cmp > 0 */
- unbal_bf++;
+ if (cmp < 0)
+ unbal_bf--;
+ else /* cmp > 0 */
+ unbal_bf++;
- hh = cmp < 0 ? AVL_GET_LESS(unbal, 1) : AVL_GET_GREATER(unbal, 1);
- L_CHECK_READ_ERROR(AVL_NULL)
+ hh = cmp < 0 ? AVL_GET_LESS(unbal, 1) : AVL_GET_GREATER(unbal, 1);
+ L_CHECK_READ_ERROR(AVL_NULL)
- if ((unbal_bf != -2) && (unbal_bf != 2))
- {
- /* No rebalancing of tree is necessary. */
- AVL_SET_BALANCE_FACTOR(unbal, unbal_bf)
- unbal = AVL_NULL;
- }
- }
-
- if (hh != AVL_NULL)
- while (h != hh)
- {
- cmp = L_BIT_ARR_VAL(branch, depth) ? 1 : -1;
- depth++;
-
- if (cmp < 0)
- {
- AVL_SET_BALANCE_FACTOR(hh, -1)
- hh = AVL_GET_LESS(hh, 1);
- }
- else /* cmp > 0 */
- {
- AVL_SET_BALANCE_FACTOR(hh, 1)
- hh = AVL_GET_GREATER(hh, 1);
- }
-
- L_CHECK_READ_ERROR(AVL_NULL)
- }
-
- if (unbal != AVL_NULL)
- {
- unbal = L_(balance)(L_BALANCE_PARAM_CALL_PREFIX unbal);
- L_CHECK_READ_ERROR(AVL_NULL)
-
- if (parent_unbal == AVL_NULL)
- l_tree->root = unbal;
- else
- {
- depth = unbal_depth - 1;
- cmp = L_BIT_ARR_VAL(branch, depth) ? 1 : -1;
-
- if (cmp < 0)
- AVL_SET_LESS(parent_unbal, unbal)
- else /* cmp > 0 */
- AVL_SET_GREATER(parent_unbal, unbal)
- }
- }
-
+ if ((unbal_bf != -2) && (unbal_bf != 2)) {
+ /* No rebalancing of tree is necessary. */
+ AVL_SET_BALANCE_FACTOR(unbal, unbal_bf)
+ unbal = AVL_NULL;
+ }
}
- return(h);
+ if (hh != AVL_NULL)
+ while (h != hh) {
+ cmp = L_BIT_ARR_VAL(branch, depth) ? 1 : -1;
+ depth++;
+
+ if (cmp < 0) {
+ AVL_SET_BALANCE_FACTOR(hh, -1)
+ hh = AVL_GET_LESS(hh, 1);
+ } else { /* cmp > 0 */
+ AVL_SET_BALANCE_FACTOR(hh, 1)
+ hh = AVL_GET_GREATER(hh, 1);
+ }
+
+ L_CHECK_READ_ERROR(AVL_NULL)
+ }
+
+ if (unbal != AVL_NULL) {
+ unbal = L_(balance)(L_BALANCE_PARAM_CALL_PREFIX unbal);
+ L_CHECK_READ_ERROR(AVL_NULL)
+
+ if (parent_unbal == AVL_NULL)
+ l_tree->root = unbal;
+ else {
+ depth = unbal_depth - 1;
+ cmp = L_BIT_ARR_VAL(branch, depth) ? 1 : -1;
+
+ if (cmp < 0)
+ AVL_SET_LESS(parent_unbal, unbal)
+ else /* cmp > 0 */
+ AVL_SET_GREATER(parent_unbal, unbal)
+ }
+ }
+
+ }
+
+ return(h);
}
#endif
#if (L_IMPL_MASK & AVL_IMPL_SEARCH)
-L_SC AVL_HANDLE L_(search)(L_(avl) *l_tree, AVL_KEY k, avl_search_type st)
-{
- int cmp, target_cmp;
- AVL_HANDLE match_h = AVL_NULL;
- AVL_HANDLE h = l_tree->root;
+L_SC AVL_HANDLE L_(search)(L_(avl) *l_tree, AVL_KEY k, avl_search_type st) {
+ int cmp, target_cmp;
+ AVL_HANDLE match_h = AVL_NULL;
+ AVL_HANDLE h = l_tree->root;
- if (st & AVL_LESS)
- target_cmp = 1;
- else if (st & AVL_GREATER)
- target_cmp = -1;
- else
- target_cmp = 0;
+ if (st & AVL_LESS)
+ target_cmp = 1;
+ else if (st & AVL_GREATER)
+ target_cmp = -1;
+ else
+ target_cmp = 0;
- while (h != AVL_NULL)
- {
- cmp = AVL_COMPARE_KEY_NODE(k, h);
+ while (h != AVL_NULL) {
+ cmp = AVL_COMPARE_KEY_NODE(k, h);
- if (cmp == 0)
- {
- if (st & AVL_EQUAL)
- {
- match_h = h;
- break;
- }
+ if (cmp == 0) {
+ if (st & AVL_EQUAL) {
+ match_h = h;
+ break;
+ }
- cmp = -target_cmp;
- }
- else if (target_cmp != 0)
- if (!((cmp ^ target_cmp) & L_MASK_HIGH_BIT))
- /* cmp and target_cmp are both positive or both negative. */
- match_h = h;
+ cmp = -target_cmp;
+ } else if (target_cmp != 0)
+ if (!((cmp ^ target_cmp) & L_MASK_HIGH_BIT))
+ /* cmp and target_cmp are both positive or both negative. */
+ match_h = h;
- h = cmp < 0 ? AVL_GET_LESS(h, 1) : AVL_GET_GREATER(h, 1);
- L_CHECK_READ_ERROR(AVL_NULL)
- }
+ h = cmp < 0 ? AVL_GET_LESS(h, 1) : AVL_GET_GREATER(h, 1);
+ L_CHECK_READ_ERROR(AVL_NULL)
+ }
- return(match_h);
+ return(match_h);
}
#endif
#if (L_IMPL_MASK & AVL_IMPL_SEARCH_LEAST)
-L_SC AVL_HANDLE L_(search_least)(L_(avl) *l_tree)
-{
- AVL_HANDLE h = l_tree->root;
- AVL_HANDLE parent = AVL_NULL;
+L_SC AVL_HANDLE L_(search_least)(L_(avl) *l_tree) {
+ AVL_HANDLE h = l_tree->root;
+ AVL_HANDLE parent = AVL_NULL;
- while (h != AVL_NULL)
- {
- parent = h;
- h = AVL_GET_LESS(h, 1);
- L_CHECK_READ_ERROR(AVL_NULL)
- }
+ while (h != AVL_NULL) {
+ parent = h;
+ h = AVL_GET_LESS(h, 1);
+ L_CHECK_READ_ERROR(AVL_NULL)
+ }
- return(parent);
+ return(parent);
}
#endif
#if (L_IMPL_MASK & AVL_IMPL_SEARCH_GREATEST)
-L_SC AVL_HANDLE L_(search_greatest)(L_(avl) *l_tree)
-{
- AVL_HANDLE h = l_tree->root;
- AVL_HANDLE parent = AVL_NULL;
+L_SC AVL_HANDLE L_(search_greatest)(L_(avl) *l_tree) {
+ AVL_HANDLE h = l_tree->root;
+ AVL_HANDLE parent = AVL_NULL;
- while (h != AVL_NULL)
- {
- parent = h;
- h = AVL_GET_GREATER(h, 1);
- L_CHECK_READ_ERROR(AVL_NULL)
- }
+ while (h != AVL_NULL) {
+ parent = h;
+ h = AVL_GET_GREATER(h, 1);
+ L_CHECK_READ_ERROR(AVL_NULL)
+ }
- return(parent);
+ return(parent);
}
#endif
@@ -564,284 +509,253 @@
*/
L_SC AVL_HANDLE L_(balance)(L_BALANCE_PARAM_DECL_PREFIX AVL_HANDLE bal_h);
-L_SC AVL_HANDLE L_(remove)(L_(avl) *l_tree, AVL_KEY k)
-{
- /* Zero-based depth in tree. */
- unsigned depth = 0, rm_depth;
+L_SC AVL_HANDLE L_(remove)(L_(avl) *l_tree, AVL_KEY k) {
+ /* Zero-based depth in tree. */
+ unsigned depth = 0, rm_depth;
- /* Records a path into the tree. If bit n is true, indicates
- ** take greater branch from the nth node in the path, otherwise
- ** take the less branch. bit 0 gives branch from root, and
- ** so on. */
- L_BIT_ARR_DEFN(branch)
+ /* Records a path into the tree. If bit n is true, indicates
+ ** take greater branch from the nth node in the path, otherwise
+ ** take the less branch. bit 0 gives branch from root, and
+ ** so on. */
+ L_BIT_ARR_DEFN(branch)
- AVL_HANDLE h = l_tree->root;
- AVL_HANDLE parent = AVL_NULL;
- AVL_HANDLE child;
- AVL_HANDLE path;
- int cmp, cmp_shortened_sub_with_path;
- int reduced_depth;
- int bf;
- AVL_HANDLE rm;
- AVL_HANDLE parent_rm;
+ AVL_HANDLE h = l_tree->root;
+ AVL_HANDLE parent = AVL_NULL;
+ AVL_HANDLE child;
+ AVL_HANDLE path;
+ int cmp, cmp_shortened_sub_with_path;
+ int reduced_depth;
+ int bf;
+ AVL_HANDLE rm;
+ AVL_HANDLE parent_rm;
- for (; ;)
- {
- if (h == AVL_NULL)
- /* No node in tree with given key. */
- return(AVL_NULL);
+ for (;;) {
+ if (h == AVL_NULL)
+ /* No node in tree with given key. */
+ return(AVL_NULL);
- cmp = AVL_COMPARE_KEY_NODE(k, h);
+ cmp = AVL_COMPARE_KEY_NODE(k, h);
- if (cmp == 0)
- /* Found node to remove. */
- break;
+ if (cmp == 0)
+ /* Found node to remove. */
+ break;
- parent = h;
+ parent = h;
- if (cmp > 0)
- {
- h = AVL_GET_GREATER(h, 1);
- L_BIT_ARR_1(branch, depth)
- }
- else
- {
- h = AVL_GET_LESS(h, 1);
- L_BIT_ARR_0(branch, depth)
- }
-
- L_CHECK_READ_ERROR(AVL_NULL)
- depth++;
- cmp_shortened_sub_with_path = cmp;
- }
-
- rm = h;
- parent_rm = parent;
- rm_depth = depth;
-
- /* If the node to remove is not a leaf node, we need to get a
- ** leaf node, or a node with a single leaf as its child, to put
- ** in the place of the node to remove. We will get the greatest
- ** node in the less subtree (of the node to remove), or the least
- ** node in the greater subtree. We take the leaf node from the
- ** deeper subtree, if there is one. */
-
- if (AVL_GET_BALANCE_FACTOR(h) < 0)
- {
- child = AVL_GET_LESS(h, 1);
- L_BIT_ARR_0(branch, depth)
- cmp = -1;
- }
- else
- {
- child = AVL_GET_GREATER(h, 1);
- L_BIT_ARR_1(branch, depth)
- cmp = 1;
+ if (cmp > 0) {
+ h = AVL_GET_GREATER(h, 1);
+ L_BIT_ARR_1(branch, depth)
+ } else {
+ h = AVL_GET_LESS(h, 1);
+ L_BIT_ARR_0(branch, depth)
}
L_CHECK_READ_ERROR(AVL_NULL)
depth++;
+ cmp_shortened_sub_with_path = cmp;
+ }
- if (child != AVL_NULL)
- {
- cmp = -cmp;
+ rm = h;
+ parent_rm = parent;
+ rm_depth = depth;
- do
- {
- parent = h;
- h = child;
+ /* If the node to remove is not a leaf node, we need to get a
+ ** leaf node, or a node with a single leaf as its child, to put
+ ** in the place of the node to remove. We will get the greatest
+ ** node in the less subtree (of the node to remove), or the least
+ ** node in the greater subtree. We take the leaf node from the
+ ** deeper subtree, if there is one. */
- if (cmp < 0)
- {
- child = AVL_GET_LESS(h, 1);
- L_BIT_ARR_0(branch, depth)
- }
- else
- {
- child = AVL_GET_GREATER(h, 1);
- L_BIT_ARR_1(branch, depth)
- }
+ if (AVL_GET_BALANCE_FACTOR(h) < 0) {
+ child = AVL_GET_LESS(h, 1);
+ L_BIT_ARR_0(branch, depth)
+ cmp = -1;
+ } else {
+ child = AVL_GET_GREATER(h, 1);
+ L_BIT_ARR_1(branch, depth)
+ cmp = 1;
+ }
- L_CHECK_READ_ERROR(AVL_NULL)
- depth++;
- }
- while (child != AVL_NULL);
+ L_CHECK_READ_ERROR(AVL_NULL)
+ depth++;
- if (parent == rm)
- /* Only went through do loop once. Deleted node will be replaced
- ** in the tree structure by one of its immediate children. */
- cmp_shortened_sub_with_path = -cmp;
+ if (child != AVL_NULL) {
+ cmp = -cmp;
+
+ do {
+ parent = h;
+ h = child;
+
+ if (cmp < 0) {
+ child = AVL_GET_LESS(h, 1);
+ L_BIT_ARR_0(branch, depth)
+ } else {
+ child = AVL_GET_GREATER(h, 1);
+ L_BIT_ARR_1(branch, depth)
+ }
+
+ L_CHECK_READ_ERROR(AVL_NULL)
+ depth++;
+ } while (child != AVL_NULL);
+
+ if (parent == rm)
+ /* Only went through do loop once. Deleted node will be replaced
+ ** in the tree structure by one of its immediate children. */
+ cmp_shortened_sub_with_path = -cmp;
+ else
+ cmp_shortened_sub_with_path = cmp;
+
+ /* Get the handle of the opposite child, which may not be null. */
+ child = cmp > 0 ? AVL_GET_LESS(h, 0) : AVL_GET_GREATER(h, 0);
+ }
+
+ if (parent == AVL_NULL)
+ /* There were only 1 or 2 nodes in this tree. */
+ l_tree->root = child;
+ else if (cmp_shortened_sub_with_path < 0)
+ AVL_SET_LESS(parent, child)
+ else
+ AVL_SET_GREATER(parent, child)
+
+ /* "path" is the parent of the subtree being eliminated or reduced
+ ** from a depth of 2 to 1. If "path" is the node to be removed, we
+ ** set path to the node we're about to poke into the position of the
+ ** node to be removed. */
+ path = parent == rm ? h : parent;
+
+ if (h != rm) {
+ /* Poke in the replacement for the node to be removed. */
+ AVL_SET_LESS(h, AVL_GET_LESS(rm, 0))
+ AVL_SET_GREATER(h, AVL_GET_GREATER(rm, 0))
+ AVL_SET_BALANCE_FACTOR(h, AVL_GET_BALANCE_FACTOR(rm))
+
+ if (parent_rm == AVL_NULL)
+ l_tree->root = h;
+ else {
+ depth = rm_depth - 1;
+
+ if (L_BIT_ARR_VAL(branch, depth))
+ AVL_SET_GREATER(parent_rm, h)
else
- cmp_shortened_sub_with_path = cmp;
+ AVL_SET_LESS(parent_rm, h)
+ }
+ }
- /* Get the handle of the opposite child, which may not be null. */
- child = cmp > 0 ? AVL_GET_LESS(h, 0) : AVL_GET_GREATER(h, 0);
+ if (path != AVL_NULL) {
+ /* Create a temporary linked list from the parent of the path node
+ ** to the root node. */
+ h = l_tree->root;
+ parent = AVL_NULL;
+ depth = 0;
+
+ while (h != path) {
+ if (L_BIT_ARR_VAL(branch, depth)) {
+ child = AVL_GET_GREATER(h, 1);
+ AVL_SET_GREATER(h, parent)
+ } else {
+ child = AVL_GET_LESS(h, 1);
+ AVL_SET_LESS(h, parent)
+ }
+
+ L_CHECK_READ_ERROR(AVL_NULL)
+ depth++;
+ parent = h;
+ h = child;
}
- if (parent == AVL_NULL)
- /* There were only 1 or 2 nodes in this tree. */
- l_tree->root = child;
- else if (cmp_shortened_sub_with_path < 0)
- AVL_SET_LESS(parent, child)
- else
- AVL_SET_GREATER(parent, child)
+ /* Climb from the path node to the root node using the linked
+ ** list, restoring the tree structure and rebalancing as necessary.
+ */
+ reduced_depth = 1;
+ cmp = cmp_shortened_sub_with_path;
- /* "path" is the parent of the subtree being eliminated or reduced
- ** from a depth of 2 to 1. If "path" is the node to be removed, we
- ** set path to the node we're about to poke into the position of the
- ** node to be removed. */
- path = parent == rm ? h : parent;
+ for (;;) {
+ if (reduced_depth) {
+ bf = AVL_GET_BALANCE_FACTOR(h);
- if (h != rm)
- {
- /* Poke in the replacement for the node to be removed. */
- AVL_SET_LESS(h, AVL_GET_LESS(rm, 0))
- AVL_SET_GREATER(h, AVL_GET_GREATER(rm, 0))
- AVL_SET_BALANCE_FACTOR(h, AVL_GET_BALANCE_FACTOR(rm))
+ if (cmp < 0)
+ bf++;
+ else /* cmp > 0 */
+ bf--;
- if (parent_rm == AVL_NULL)
- l_tree->root = h;
- else
- {
- depth = rm_depth - 1;
+ if ((bf == -2) || (bf == 2)) {
+ h = L_(balance)(L_BALANCE_PARAM_CALL_PREFIX h);
+ L_CHECK_READ_ERROR(AVL_NULL)
+ bf = AVL_GET_BALANCE_FACTOR(h);
+ } else
+ AVL_SET_BALANCE_FACTOR(h, bf)
+ reduced_depth = (bf == 0);
+ }
- if (L_BIT_ARR_VAL(branch, depth))
- AVL_SET_GREATER(parent_rm, h)
- else
- AVL_SET_LESS(parent_rm, h)
- }
+ if (parent == AVL_NULL)
+ break;
+
+ child = h;
+ h = parent;
+ depth--;
+ cmp = L_BIT_ARR_VAL(branch, depth) ? 1 : -1;
+
+ if (cmp < 0) {
+ parent = AVL_GET_LESS(h, 1);
+ AVL_SET_LESS(h, child)
+ } else {
+ parent = AVL_GET_GREATER(h, 1);
+ AVL_SET_GREATER(h, child)
+ }
+
+ L_CHECK_READ_ERROR(AVL_NULL)
}
- if (path != AVL_NULL)
- {
- /* Create a temporary linked list from the parent of the path node
- ** to the root node. */
- h = l_tree->root;
- parent = AVL_NULL;
- depth = 0;
+ l_tree->root = h;
+ }
- while (h != path)
- {
- if (L_BIT_ARR_VAL(branch, depth))
- {
- child = AVL_GET_GREATER(h, 1);
- AVL_SET_GREATER(h, parent)
- }
- else
- {
- child = AVL_GET_LESS(h, 1);
- AVL_SET_LESS(h, parent)
- }
-
- L_CHECK_READ_ERROR(AVL_NULL)
- depth++;
- parent = h;
- h = child;
- }
-
- /* Climb from the path node to the root node using the linked
- ** list, restoring the tree structure and rebalancing as necessary.
- */
- reduced_depth = 1;
- cmp = cmp_shortened_sub_with_path;
-
- for (; ;)
- {
- if (reduced_depth)
- {
- bf = AVL_GET_BALANCE_FACTOR(h);
-
- if (cmp < 0)
- bf++;
- else /* cmp > 0 */
- bf--;
-
- if ((bf == -2) || (bf == 2))
- {
- h = L_(balance)(L_BALANCE_PARAM_CALL_PREFIX h);
- L_CHECK_READ_ERROR(AVL_NULL)
- bf = AVL_GET_BALANCE_FACTOR(h);
- }
- else
- AVL_SET_BALANCE_FACTOR(h, bf)
- reduced_depth = (bf == 0);
- }
-
- if (parent == AVL_NULL)
- break;
-
- child = h;
- h = parent;
- depth--;
- cmp = L_BIT_ARR_VAL(branch, depth) ? 1 : -1;
-
- if (cmp < 0)
- {
- parent = AVL_GET_LESS(h, 1);
- AVL_SET_LESS(h, child)
- }
- else
- {
- parent = AVL_GET_GREATER(h, 1);
- AVL_SET_GREATER(h, child)
- }
-
- L_CHECK_READ_ERROR(AVL_NULL)
- }
-
- l_tree->root = h;
- }
-
- return(rm);
+ return(rm);
}
#endif
#if (L_IMPL_MASK & AVL_IMPL_SUBST)
-L_SC AVL_HANDLE L_(subst)(L_(avl) *l_tree, AVL_HANDLE new_node)
-{
- AVL_HANDLE h = l_tree->root;
- AVL_HANDLE parent = AVL_NULL;
- int cmp, last_cmp;
+L_SC AVL_HANDLE L_(subst)(L_(avl) *l_tree, AVL_HANDLE new_node) {
+ AVL_HANDLE h = l_tree->root;
+ AVL_HANDLE parent = AVL_NULL;
+ int cmp, last_cmp;
- /* Search for node already in tree with same key. */
- for (; ;)
- {
- if (h == AVL_NULL)
- /* No node in tree with same key as new node. */
- return(AVL_NULL);
+ /* Search for node already in tree with same key. */
+ for (;;) {
+ if (h == AVL_NULL)
+ /* No node in tree with same key as new node. */
+ return(AVL_NULL);
- cmp = AVL_COMPARE_NODE_NODE(new_node, h);
+ cmp = AVL_COMPARE_NODE_NODE(new_node, h);
- if (cmp == 0)
- /* Found the node to substitute new one for. */
- break;
+ if (cmp == 0)
+ /* Found the node to substitute new one for. */
+ break;
- last_cmp = cmp;
- parent = h;
- h = cmp < 0 ? AVL_GET_LESS(h, 1) : AVL_GET_GREATER(h, 1);
- L_CHECK_READ_ERROR(AVL_NULL)
- }
+ last_cmp = cmp;
+ parent = h;
+ h = cmp < 0 ? AVL_GET_LESS(h, 1) : AVL_GET_GREATER(h, 1);
+ L_CHECK_READ_ERROR(AVL_NULL)
+ }
- /* Copy tree housekeeping fields from node in tree to new node. */
- AVL_SET_LESS(new_node, AVL_GET_LESS(h, 0))
- AVL_SET_GREATER(new_node, AVL_GET_GREATER(h, 0))
- AVL_SET_BALANCE_FACTOR(new_node, AVL_GET_BALANCE_FACTOR(h))
+ /* Copy tree housekeeping fields from node in tree to new node. */
+ AVL_SET_LESS(new_node, AVL_GET_LESS(h, 0))
+ AVL_SET_GREATER(new_node, AVL_GET_GREATER(h, 0))
+ AVL_SET_BALANCE_FACTOR(new_node, AVL_GET_BALANCE_FACTOR(h))
- if (parent == AVL_NULL)
- /* New node is also new root. */
- l_tree->root = new_node;
- else
- {
- /* Make parent point to new node. */
- if (last_cmp < 0)
- AVL_SET_LESS(parent, new_node)
- else
- AVL_SET_GREATER(parent, new_node)
- }
+ if (parent == AVL_NULL)
+ /* New node is also new root. */
+ l_tree->root = new_node;
+ else {
+ /* Make parent point to new node. */
+ if (last_cmp < 0)
+ AVL_SET_LESS(parent, new_node)
+ else
+ AVL_SET_GREATER(parent, new_node)
+ }
- return(h);
+ return(h);
}
#endif
@@ -851,144 +765,136 @@
#if (L_IMPL_MASK & AVL_IMPL_BUILD)
L_SC int L_(build)(
- L_(avl) *l_tree, AVL_BUILD_ITER_TYPE p, L_SIZE num_nodes)
-{
- /* Gives path to subtree being built. If bit n is false, branch
- ** less from the node at depth n, if true branch greater. */
- L_BIT_ARR_DEFN(branch)
+ L_(avl) *l_tree, AVL_BUILD_ITER_TYPE p, L_SIZE num_nodes) {
+ /* Gives path to subtree being built. If bit n is false, branch
+ ** less from the node at depth n, if true branch greater. */
+ L_BIT_ARR_DEFN(branch)
- /* If bit n is true, then for the current subtree at depth n, its
- ** greater subtree has one more node than its less subtree. */
- L_BIT_ARR_DEFN(rem)
+ /* If bit n is true, then for the current subtree at depth n, its
+ ** greater subtree has one more node than its less subtree. */
+ L_BIT_ARR_DEFN(rem)
- /* Depth of root node of current subtree. */
- unsigned depth = 0;
+ /* Depth of root node of current subtree. */
+ unsigned depth = 0;
- /* Number of nodes in current subtree. */
- L_SIZE num_sub = num_nodes;
+ /* Number of nodes in current subtree. */
+ L_SIZE num_sub = num_nodes;
- /* The algorithm relies on a stack of nodes whose less subtree has
- ** been built, but whose greater subtree has not yet been built.
- ** The stack is implemented as linked list. The nodes are linked
- ** together by having the "greater" handle of a node set to the
- ** next node in the list. "less_parent" is the handle of the first
- ** node in the list. */
- AVL_HANDLE less_parent = AVL_NULL;
+ /* The algorithm relies on a stack of nodes whose less subtree has
+ ** been built, but whose greater subtree has not yet been built.
+ ** The stack is implemented as linked list. The nodes are linked
+ ** together by having the "greater" handle of a node set to the
+ ** next node in the list. "less_parent" is the handle of the first
+ ** node in the list. */
+ AVL_HANDLE less_parent = AVL_NULL;
- /* h is root of current subtree, child is one of its children. */
- AVL_HANDLE h;
- AVL_HANDLE child;
+ /* h is root of current subtree, child is one of its children. */
+ AVL_HANDLE h;
+ AVL_HANDLE child;
- if (num_nodes == 0)
- {
- l_tree->root = AVL_NULL;
- return(1);
+ if (num_nodes == 0) {
+ l_tree->root = AVL_NULL;
+ return(1);
+ }
+
+ for (;;) {
+ while (num_sub > 2) {
+ /* Subtract one for root of subtree. */
+ num_sub--;
+
+ if (num_sub & 1)
+ L_BIT_ARR_1(rem, depth)
+ else
+ L_BIT_ARR_0(rem, depth)
+ L_BIT_ARR_0(branch, depth)
+ depth++;
+
+ num_sub >>= 1;
}
- for (; ;)
- {
- while (num_sub > 2)
- {
- /* Subtract one for root of subtree. */
- num_sub--;
+ if (num_sub == 2) {
+ /* Build a subtree with two nodes, slanting to greater.
+ ** I arbitrarily chose to always have the extra node in the
+ ** greater subtree when there is an odd number of nodes to
+ ** split between the two subtrees. */
- if (num_sub & 1)
- L_BIT_ARR_1(rem, depth)
- else
- L_BIT_ARR_0(rem, depth)
- L_BIT_ARR_0(branch, depth)
- depth++;
+ h = AVL_BUILD_ITER_VAL(p);
+ L_CHECK_READ_ERROR(0)
+ AVL_BUILD_ITER_INCR(p)
+ child = AVL_BUILD_ITER_VAL(p);
+ L_CHECK_READ_ERROR(0)
+ AVL_BUILD_ITER_INCR(p)
+ AVL_SET_LESS(child, AVL_NULL)
+ AVL_SET_GREATER(child, AVL_NULL)
+ AVL_SET_BALANCE_FACTOR(child, 0)
+ AVL_SET_GREATER(h, child)
+ AVL_SET_LESS(h, AVL_NULL)
+ AVL_SET_BALANCE_FACTOR(h, 1)
+ } else { /* num_sub == 1 */
+ /* Build a subtree with one node. */
- num_sub >>= 1;
+ h = AVL_BUILD_ITER_VAL(p);
+ L_CHECK_READ_ERROR(0)
+ AVL_BUILD_ITER_INCR(p)
+ AVL_SET_LESS(h, AVL_NULL)
+ AVL_SET_GREATER(h, AVL_NULL)
+ AVL_SET_BALANCE_FACTOR(h, 0)
+ }
+
+ while (depth) {
+ depth--;
+
+ if (!L_BIT_ARR_VAL(branch, depth))
+ /* We've completed a less subtree. */
+ break;
+
+ /* We've completed a greater subtree, so attach it to
+ ** its parent (that is less than it). We pop the parent
+ ** off the stack of less parents. */
+ child = h;
+ h = less_parent;
+ less_parent = AVL_GET_GREATER(h, 1);
+ L_CHECK_READ_ERROR(0)
+ AVL_SET_GREATER(h, child)
+ /* num_sub = 2 * (num_sub - rem[depth]) + rem[depth] + 1 */
+ num_sub <<= 1;
+ num_sub += L_BIT_ARR_VAL(rem, depth) ? 0 : 1;
+
+ if (num_sub & (num_sub - 1))
+ /* num_sub is not a power of 2. */
+ AVL_SET_BALANCE_FACTOR(h, 0)
+ else
+ /* num_sub is a power of 2. */
+ AVL_SET_BALANCE_FACTOR(h, 1)
}
- if (num_sub == 2)
- {
- /* Build a subtree with two nodes, slanting to greater.
- ** I arbitrarily chose to always have the extra node in the
- ** greater subtree when there is an odd number of nodes to
- ** split between the two subtrees. */
+ if (num_sub == num_nodes)
+ /* We've completed the full tree. */
+ break;
- h = AVL_BUILD_ITER_VAL(p);
- L_CHECK_READ_ERROR(0)
- AVL_BUILD_ITER_INCR(p)
- child = AVL_BUILD_ITER_VAL(p);
- L_CHECK_READ_ERROR(0)
- AVL_BUILD_ITER_INCR(p)
- AVL_SET_LESS(child, AVL_NULL)
- AVL_SET_GREATER(child, AVL_NULL)
- AVL_SET_BALANCE_FACTOR(child, 0)
- AVL_SET_GREATER(h, child)
- AVL_SET_LESS(h, AVL_NULL)
- AVL_SET_BALANCE_FACTOR(h, 1)
- }
- else /* num_sub == 1 */
- {
- /* Build a subtree with one node. */
+ /* The subtree we've completed is the less subtree of the
+ ** next node in the sequence. */
- h = AVL_BUILD_ITER_VAL(p);
- L_CHECK_READ_ERROR(0)
- AVL_BUILD_ITER_INCR(p)
- AVL_SET_LESS(h, AVL_NULL)
- AVL_SET_GREATER(h, AVL_NULL)
- AVL_SET_BALANCE_FACTOR(h, 0)
- }
+ child = h;
+ h = AVL_BUILD_ITER_VAL(p);
+ L_CHECK_READ_ERROR(0)
+ AVL_BUILD_ITER_INCR(p)
+ AVL_SET_LESS(h, child)
- while (depth)
- {
- depth--;
+ /* Put h into stack of less parents. */
+ AVL_SET_GREATER(h, less_parent)
+ less_parent = h;
- if (!L_BIT_ARR_VAL(branch, depth))
- /* We've completed a less subtree. */
- break;
+ /* Proceed to creating greater than subtree of h. */
+ L_BIT_ARR_1(branch, depth)
+ num_sub += L_BIT_ARR_VAL(rem, depth) ? 1 : 0;
+ depth++;
- /* We've completed a greater subtree, so attach it to
- ** its parent (that is less than it). We pop the parent
- ** off the stack of less parents. */
- child = h;
- h = less_parent;
- less_parent = AVL_GET_GREATER(h, 1);
- L_CHECK_READ_ERROR(0)
- AVL_SET_GREATER(h, child)
- /* num_sub = 2 * (num_sub - rem[depth]) + rem[depth] + 1 */
- num_sub <<= 1;
- num_sub += L_BIT_ARR_VAL(rem, depth) ? 0 : 1;
+ } /* end for (;; ) */
- if (num_sub & (num_sub - 1))
- /* num_sub is not a power of 2. */
- AVL_SET_BALANCE_FACTOR(h, 0)
- else
- /* num_sub is a power of 2. */
- AVL_SET_BALANCE_FACTOR(h, 1)
- }
+ l_tree->root = h;
- if (num_sub == num_nodes)
- /* We've completed the full tree. */
- break;
-
- /* The subtree we've completed is the less subtree of the
- ** next node in the sequence. */
-
- child = h;
- h = AVL_BUILD_ITER_VAL(p);
- L_CHECK_READ_ERROR(0)
- AVL_BUILD_ITER_INCR(p)
- AVL_SET_LESS(h, child)
-
- /* Put h into stack of less parents. */
- AVL_SET_GREATER(h, less_parent)
- less_parent = h;
-
- /* Proceed to creating greater than subtree of h. */
- L_BIT_ARR_1(branch, depth)
- num_sub += L_BIT_ARR_VAL(rem, depth) ? 1 : 0;
- depth++;
-
- } /* end for ( ; ; ) */
-
- l_tree->root = h;
-
- return(1);
+ return(1);
}
#endif
@@ -1001,9 +907,8 @@
** invalid. (Depth is zero-base.) It's not necessary to initialize
** iterators prior to passing them to the "start" function.
*/
-L_SC void L_(init_iter)(L_(iter) *iter)
-{
- iter->depth = ~0;
+L_SC void L_(init_iter)(L_(iter) *iter) {
+ iter->depth = ~0;
}
#endif
@@ -1011,7 +916,7 @@
#ifdef AVL_READ_ERRORS_HAPPEN
#define L_CHECK_READ_ERROR_INV_DEPTH \
- { if (AVL_READ_ERROR) { iter->depth = ~0; return; } }
+ { if (AVL_READ_ERROR) { iter->depth = ~0; return; } }
#else
@@ -1022,174 +927,157 @@
#if (L_IMPL_MASK & AVL_IMPL_START_ITER)
L_SC void L_(start_iter)(
- L_(avl) *l_tree, L_(iter) *iter, AVL_KEY k, avl_search_type st)
-{
- AVL_HANDLE h = l_tree->root;
- unsigned d = 0;
- int cmp, target_cmp;
+ L_(avl) *l_tree, L_(iter) *iter, AVL_KEY k, avl_search_type st) {
+ AVL_HANDLE h = l_tree->root;
+ unsigned d = 0;
+ int cmp, target_cmp;
- /* Save the tree that we're going to iterate through in a
- ** member variable. */
- iter->tree_ = l_tree;
+ /* Save the tree that we're going to iterate through in a
+ ** member variable. */
+ iter->tree_ = l_tree;
- iter->depth = ~0;
+ iter->depth = ~0;
+
+ if (h == AVL_NULL)
+ /* Tree is empty. */
+ return;
+
+ if (st & AVL_LESS)
+ /* Key can be greater than key of starting node. */
+ target_cmp = 1;
+ else if (st & AVL_GREATER)
+ /* Key can be less than key of starting node. */
+ target_cmp = -1;
+ else
+ /* Key must be same as key of starting node. */
+ target_cmp = 0;
+
+ for (;;) {
+ cmp = AVL_COMPARE_KEY_NODE(k, h);
+
+ if (cmp == 0) {
+ if (st & AVL_EQUAL) {
+ /* Equal node was sought and found as starting node. */
+ iter->depth = d;
+ break;
+ }
+
+ cmp = -target_cmp;
+ } else if (target_cmp != 0)
+ if (!((cmp ^ target_cmp) & L_MASK_HIGH_BIT))
+ /* cmp and target_cmp are both negative or both positive. */
+ iter->depth = d;
+
+ h = cmp < 0 ? AVL_GET_LESS(h, 1) : AVL_GET_GREATER(h, 1);
+ L_CHECK_READ_ERROR_INV_DEPTH
if (h == AVL_NULL)
- /* Tree is empty. */
- return;
+ break;
- if (st & AVL_LESS)
- /* Key can be greater than key of starting node. */
- target_cmp = 1;
- else if (st & AVL_GREATER)
- /* Key can be less than key of starting node. */
- target_cmp = -1;
- else
- /* Key must be same as key of starting node. */
- target_cmp = 0;
-
- for (; ;)
- {
- cmp = AVL_COMPARE_KEY_NODE(k, h);
-
- if (cmp == 0)
- {
- if (st & AVL_EQUAL)
- {
- /* Equal node was sought and found as starting node. */
- iter->depth = d;
- break;
- }
-
- cmp = -target_cmp;
- }
- else if (target_cmp != 0)
- if (!((cmp ^ target_cmp) & L_MASK_HIGH_BIT))
- /* cmp and target_cmp are both negative or both positive. */
- iter->depth = d;
-
- h = cmp < 0 ? AVL_GET_LESS(h, 1) : AVL_GET_GREATER(h, 1);
- L_CHECK_READ_ERROR_INV_DEPTH
-
- if (h == AVL_NULL)
- break;
-
- if (cmp > 0)
- L_BIT_ARR_1(iter->branch, d)
- else
- L_BIT_ARR_0(iter->branch, d)
- iter->path_h[d++] = h;
- }
+ if (cmp > 0)
+ L_BIT_ARR_1(iter->branch, d)
+ else
+ L_BIT_ARR_0(iter->branch, d)
+ iter->path_h[d++] = h;
+ }
}
#endif
#if (L_IMPL_MASK & AVL_IMPL_START_ITER_LEAST)
-L_SC void L_(start_iter_least)(L_(avl) *l_tree, L_(iter) *iter)
-{
- AVL_HANDLE h = l_tree->root;
+L_SC void L_(start_iter_least)(L_(avl) *l_tree, L_(iter) *iter) {
+ AVL_HANDLE h = l_tree->root;
- iter->tree_ = l_tree;
+ iter->tree_ = l_tree;
- iter->depth = ~0;
+ iter->depth = ~0;
- L_BIT_ARR_ALL(iter->branch, 0)
+ L_BIT_ARR_ALL(iter->branch, 0)
- while (h != AVL_NULL)
- {
- if (iter->depth != ~0)
- iter->path_h[iter->depth] = h;
+ while (h != AVL_NULL) {
+ if (iter->depth != ~0)
+ iter->path_h[iter->depth] = h;
- iter->depth++;
- h = AVL_GET_LESS(h, 1);
- L_CHECK_READ_ERROR_INV_DEPTH
- }
+ iter->depth++;
+ h = AVL_GET_LESS(h, 1);
+ L_CHECK_READ_ERROR_INV_DEPTH
+ }
}
#endif
#if (L_IMPL_MASK & AVL_IMPL_START_ITER_GREATEST)
-L_SC void L_(start_iter_greatest)(L_(avl) *l_tree, L_(iter) *iter)
-{
- AVL_HANDLE h = l_tree->root;
+L_SC void L_(start_iter_greatest)(L_(avl) *l_tree, L_(iter) *iter) {
+ AVL_HANDLE h = l_tree->root;
- iter->tree_ = l_tree;
+ iter->tree_ = l_tree;
- iter->depth = ~0;
+ iter->depth = ~0;
- L_BIT_ARR_ALL(iter->branch, 1)
+ L_BIT_ARR_ALL(iter->branch, 1)
- while (h != AVL_NULL)
- {
- if (iter->depth != ~0)
- iter->path_h[iter->depth] = h;
+ while (h != AVL_NULL) {
+ if (iter->depth != ~0)
+ iter->path_h[iter->depth] = h;
- iter->depth++;
- h = AVL_GET_GREATER(h, 1);
- L_CHECK_READ_ERROR_INV_DEPTH
- }
+ iter->depth++;
+ h = AVL_GET_GREATER(h, 1);
+ L_CHECK_READ_ERROR_INV_DEPTH
+ }
}
#endif
#if (L_IMPL_MASK & AVL_IMPL_GET_ITER)
-L_SC AVL_HANDLE L_(get_iter)(L_(iter) *iter)
-{
- if (iter->depth == ~0)
- return(AVL_NULL);
+L_SC AVL_HANDLE L_(get_iter)(L_(iter) *iter) {
+ if (iter->depth == ~0)
+ return(AVL_NULL);
- return(iter->depth == 0 ?
- iter->tree_->root : iter->path_h[iter->depth - 1]);
+ return(iter->depth == 0 ?
+ iter->tree_->root : iter->path_h[iter->depth - 1]);
}
#endif
#if (L_IMPL_MASK & AVL_IMPL_INCR_ITER)
-L_SC void L_(incr_iter)(L_(iter) *iter)
-{
+L_SC void L_(incr_iter)(L_(iter) *iter) {
#define l_tree (iter->tree_)
- if (iter->depth != ~0)
- {
- AVL_HANDLE h =
- AVL_GET_GREATER((iter->depth == 0 ?
- iter->tree_->root : iter->path_h[iter->depth - 1]), 1);
+ if (iter->depth != ~0) {
+ AVL_HANDLE h =
+ AVL_GET_GREATER((iter->depth == 0 ?
+ iter->tree_->root : iter->path_h[iter->depth - 1]), 1);
+ L_CHECK_READ_ERROR_INV_DEPTH
+
+ if (h == AVL_NULL)
+ do {
+ if (iter->depth == 0) {
+ iter->depth = ~0;
+ break;
+ }
+
+ iter->depth--;
+ } while (L_BIT_ARR_VAL(iter->branch, iter->depth));
+ else {
+ L_BIT_ARR_1(iter->branch, iter->depth)
+ iter->path_h[iter->depth++] = h;
+
+ for (;;) {
+ h = AVL_GET_LESS(h, 1);
L_CHECK_READ_ERROR_INV_DEPTH
if (h == AVL_NULL)
- do
- {
- if (iter->depth == 0)
- {
- iter->depth = ~0;
- break;
- }
+ break;
- iter->depth--;
- }
- while (L_BIT_ARR_VAL(iter->branch, iter->depth));
- else
- {
- L_BIT_ARR_1(iter->branch, iter->depth)
- iter->path_h[iter->depth++] = h;
-
- for (; ;)
- {
- h = AVL_GET_LESS(h, 1);
- L_CHECK_READ_ERROR_INV_DEPTH
-
- if (h == AVL_NULL)
- break;
-
- L_BIT_ARR_0(iter->branch, iter->depth)
- iter->path_h[iter->depth++] = h;
- }
- }
+ L_BIT_ARR_0(iter->branch, iter->depth)
+ iter->path_h[iter->depth++] = h;
+ }
}
+ }
#undef l_tree
}
@@ -1198,47 +1086,40 @@
#if (L_IMPL_MASK & AVL_IMPL_DECR_ITER)
-L_SC void L_(decr_iter)(L_(iter) *iter)
-{
+L_SC void L_(decr_iter)(L_(iter) *iter) {
#define l_tree (iter->tree_)
- if (iter->depth != ~0)
- {
- AVL_HANDLE h =
- AVL_GET_LESS((iter->depth == 0 ?
- iter->tree_->root : iter->path_h[iter->depth - 1]), 1);
+ if (iter->depth != ~0) {
+ AVL_HANDLE h =
+ AVL_GET_LESS((iter->depth == 0 ?
+ iter->tree_->root : iter->path_h[iter->depth - 1]), 1);
+ L_CHECK_READ_ERROR_INV_DEPTH
+
+ if (h == AVL_NULL)
+ do {
+ if (iter->depth == 0) {
+ iter->depth = ~0;
+ break;
+ }
+
+ iter->depth--;
+ } while (!L_BIT_ARR_VAL(iter->branch, iter->depth));
+ else {
+ L_BIT_ARR_0(iter->branch, iter->depth)
+ iter->path_h[iter->depth++] = h;
+
+ for (;;) {
+ h = AVL_GET_GREATER(h, 1);
L_CHECK_READ_ERROR_INV_DEPTH
if (h == AVL_NULL)
- do
- {
- if (iter->depth == 0)
- {
- iter->depth = ~0;
- break;
- }
+ break;
- iter->depth--;
- }
- while (!L_BIT_ARR_VAL(iter->branch, iter->depth));
- else
- {
- L_BIT_ARR_0(iter->branch, iter->depth)
- iter->path_h[iter->depth++] = h;
-
- for (; ;)
- {
- h = AVL_GET_GREATER(h, 1);
- L_CHECK_READ_ERROR_INV_DEPTH
-
- if (h == AVL_NULL)
- break;
-
- L_BIT_ARR_1(iter->branch, iter->depth)
- iter->path_h[iter->depth++] = h;
- }
- }
+ L_BIT_ARR_1(iter->branch, iter->depth)
+ iter->path_h[iter->depth++] = h;
+ }
}
+ }
#undef l_tree
}
diff --git a/vpx_mem/memory_manager/include/heapmm.h b/vpx_mem/memory_manager/include/heapmm.h
index 33004ca..4934c2d 100644
--- a/vpx_mem/memory_manager/include/heapmm.h
+++ b/vpx_mem/memory_manager/include/heapmm.h
@@ -81,30 +81,29 @@
#include "hmm_cnfg.h"
/* Heap descriptor. */
-typedef struct HMM_UNIQUE(structure)
-{
- /* private: */
+typedef struct HMM_UNIQUE(structure) {
+ /* private: */
- /* Pointer to (payload of) root node in AVL tree. This field should
- ** really be the AVL tree descriptor (type avl_avl). But (in the
- ** instantiation of the AVL tree generic package used in package) the
- ** AVL tree descriptor simply contains a pointer to the root. So,
- ** whenever a pointer to the AVL tree descriptor is needed, I use the
- ** cast:
- **
- ** (avl_avl *) &(heap_desc->avl_tree_root)
- **
- ** (where heap_desc is a pointer to a heap descriptor). This trick
- ** allows me to avoid including cavl_if.h in this external header. */
- void *avl_tree_root;
+ /* Pointer to (payload of) root node in AVL tree. This field should
+ ** really be the AVL tree descriptor (type avl_avl). But (in the
+ ** instantiation of the AVL tree generic package used in package) the
+ ** AVL tree descriptor simply contains a pointer to the root. So,
+ ** whenever a pointer to the AVL tree descriptor is needed, I use the
+ ** cast:
+ **
+ ** (avl_avl *) &(heap_desc->avl_tree_root)
+ **
+ ** (where heap_desc is a pointer to a heap descriptor). This trick
+ ** allows me to avoid including cavl_if.h in this external header. */
+ void *avl_tree_root;
- /* Pointer to first byte of last block freed, after any coalescing. */
- void *last_freed;
+ /* Pointer to first byte of last block freed, after any coalescing. */
+ void *last_freed;
- /* public: */
+ /* public: */
- HMM_UNIQUE(size_bau) num_baus_can_shrink;
- void *end_of_shrinkable_chunk;
+ HMM_UNIQUE(size_bau) num_baus_can_shrink;
+ void *end_of_shrinkable_chunk;
}
HMM_UNIQUE(descriptor);
@@ -113,41 +112,41 @@
void HMM_UNIQUE(init)(HMM_UNIQUE(descriptor) *desc);
void *HMM_UNIQUE(alloc)(
- HMM_UNIQUE(descriptor) *desc, HMM_UNIQUE(size_aau) num_addr_align_units);
+ HMM_UNIQUE(descriptor) *desc, HMM_UNIQUE(size_aau) num_addr_align_units);
/* NOT YET IMPLEMENTED */
void *HMM_UNIQUE(greedy_alloc)(
- HMM_UNIQUE(descriptor) *desc, HMM_UNIQUE(size_aau) needed_addr_align_units,
- HMM_UNIQUE(size_aau) coveted_addr_align_units);
+ HMM_UNIQUE(descriptor) *desc, HMM_UNIQUE(size_aau) needed_addr_align_units,
+ HMM_UNIQUE(size_aau) coveted_addr_align_units);
int HMM_UNIQUE(resize)(
- HMM_UNIQUE(descriptor) *desc, void *mem,
- HMM_UNIQUE(size_aau) num_addr_align_units);
+ HMM_UNIQUE(descriptor) *desc, void *mem,
+ HMM_UNIQUE(size_aau) num_addr_align_units);
/* NOT YET IMPLEMENTED */
int HMM_UNIQUE(greedy_resize)(
- HMM_UNIQUE(descriptor) *desc, void *mem,
- HMM_UNIQUE(size_aau) needed_addr_align_units,
- HMM_UNIQUE(size_aau) coveted_addr_align_units);
+ HMM_UNIQUE(descriptor) *desc, void *mem,
+ HMM_UNIQUE(size_aau) needed_addr_align_units,
+ HMM_UNIQUE(size_aau) coveted_addr_align_units);
void HMM_UNIQUE(free)(HMM_UNIQUE(descriptor) *desc, void *mem);
HMM_UNIQUE(size_aau) HMM_UNIQUE(true_size)(void *mem);
HMM_UNIQUE(size_aau) HMM_UNIQUE(largest_available)(
- HMM_UNIQUE(descriptor) *desc);
+ HMM_UNIQUE(descriptor) *desc);
void HMM_UNIQUE(new_chunk)(
- HMM_UNIQUE(descriptor) *desc, void *start_of_chunk,
- HMM_UNIQUE(size_bau) num_block_align_units);
+ HMM_UNIQUE(descriptor) *desc, void *start_of_chunk,
+ HMM_UNIQUE(size_bau) num_block_align_units);
void HMM_UNIQUE(grow_chunk)(
- HMM_UNIQUE(descriptor) *desc, void *end_of_chunk,
- HMM_UNIQUE(size_bau) num_block_align_units);
+ HMM_UNIQUE(descriptor) *desc, void *end_of_chunk,
+ HMM_UNIQUE(size_bau) num_block_align_units);
/* NOT YET IMPLEMENTED */
void HMM_UNIQUE(shrink_chunk)(
- HMM_UNIQUE(descriptor) *desc,
- HMM_UNIQUE(size_bau) num_block_align_units);
+ HMM_UNIQUE(descriptor) *desc,
+ HMM_UNIQUE(size_bau) num_block_align_units);
#endif /* defined HMM_PROCESS */
diff --git a/vpx_mem/memory_manager/include/hmm_cnfg.h b/vpx_mem/memory_manager/include/hmm_cnfg.h
index 30b9f50..2c3391d 100644
--- a/vpx_mem/memory_manager/include/hmm_cnfg.h
+++ b/vpx_mem/memory_manager/include/hmm_cnfg.h
@@ -45,8 +45,8 @@
#define HMM_UNIQUE(BASE) hmm_ ## BASE
/* Number of bytes in an Address Alignment Unit (AAU). */
-//fwg
-//#define HMM_ADDR_ALIGN_UNIT sizeof(int)
+// fwg
+// #define HMM_ADDR_ALIGN_UNIT sizeof(int)
#define HMM_ADDR_ALIGN_UNIT 32
/* Number of AAUs in a Block Alignment Unit (BAU). */
@@ -65,7 +65,7 @@
** statement. If you remove the definition of this macro, no self-auditing
** will be performed. */
#define HMM_AUDIT_FAIL \
- hmm_dflt_abort(__FILE__, HMM_SYM_TO_STRING(__LINE__));
+ hmm_dflt_abort(__FILE__, HMM_SYM_TO_STRING(__LINE__));
#elif HMM_CNFG_NUM == 0
@@ -90,8 +90,8 @@
extern unsigned HMM_UNIQUE(fail_line);
#define HMM_AUDIT_FAIL \
- { HMM_UNIQUE(fail_file) = __FILE__; HMM_UNIQUE(fail_line) = __LINE__; \
- longjmp(HMM_UNIQUE(jmp_buf), 1); }
+ { HMM_UNIQUE(fail_file) = __FILE__; HMM_UNIQUE(fail_line) = __LINE__; \
+ longjmp(HMM_UNIQUE(jmp_buf), 1); }
#elif HMM_CNFG_NUM == 1
diff --git a/vpx_mem/memory_manager/include/hmm_intrnl.h b/vpx_mem/memory_manager/include/hmm_intrnl.h
index 5d62abc..27cefe4 100644
--- a/vpx_mem/memory_manager/include/hmm_intrnl.h
+++ b/vpx_mem/memory_manager/include/hmm_intrnl.h
@@ -26,34 +26,32 @@
/* Mask of high bit of variable of size_bau type. */
#define HIGH_BIT_BAU_SIZE \
- ((U(size_bau)) ~ (((U(size_bau)) ~ (U(size_bau)) 0) >> 1))
+ ((U(size_bau)) ~ (((U(size_bau)) ~ (U(size_bau)) 0) >> 1))
/* Add a given number of AAUs to pointer. */
#define AAUS_FORWARD(PTR, AAU_OFFSET) \
- (((char *) (PTR)) + ((AAU_OFFSET) * ((U(size_aau)) HMM_ADDR_ALIGN_UNIT)))
+ (((char *) (PTR)) + ((AAU_OFFSET) * ((U(size_aau)) HMM_ADDR_ALIGN_UNIT)))
/* Subtract a given number of AAUs from pointer. */
#define AAUS_BACKWARD(PTR, AAU_OFFSET) \
- (((char *) (PTR)) - ((AAU_OFFSET) * ((U(size_aau)) HMM_ADDR_ALIGN_UNIT)))
+ (((char *) (PTR)) - ((AAU_OFFSET) * ((U(size_aau)) HMM_ADDR_ALIGN_UNIT)))
/* Add a given number of BAUs to a pointer. */
#define BAUS_FORWARD(PTR, BAU_OFFSET) \
- AAUS_FORWARD((PTR), (BAU_OFFSET) * ((U(size_aau)) HMM_BLOCK_ALIGN_UNIT))
+ AAUS_FORWARD((PTR), (BAU_OFFSET) * ((U(size_aau)) HMM_BLOCK_ALIGN_UNIT))
/* Subtract a given number of BAUs to a pointer. */
#define BAUS_BACKWARD(PTR, BAU_OFFSET) \
- AAUS_BACKWARD((PTR), (BAU_OFFSET) * ((U(size_aau)) HMM_BLOCK_ALIGN_UNIT))
+ AAUS_BACKWARD((PTR), (BAU_OFFSET) * ((U(size_aau)) HMM_BLOCK_ALIGN_UNIT))
-typedef struct head_struct
-{
- /* Sizes in Block Alignment Units. */
- HMM_UNIQUE(size_bau) previous_block_size, block_size;
+typedef struct head_struct {
+ /* Sizes in Block Alignment Units. */
+ HMM_UNIQUE(size_bau) previous_block_size, block_size;
}
head_record;
-typedef struct ptr_struct
-{
- struct ptr_struct *self, *prev, *next;
+typedef struct ptr_struct {
+ struct ptr_struct *self, *prev, *next;
}
ptr_record;
@@ -71,50 +69,50 @@
/* Minimum number of BAUs in a block (allowing room for the pointer record. */
#define MIN_BLOCK_BAUS \
- DIV_ROUND_UP(HEAD_AAUS + PTR_RECORD_AAUS, HMM_BLOCK_ALIGN_UNIT)
+ DIV_ROUND_UP(HEAD_AAUS + PTR_RECORD_AAUS, HMM_BLOCK_ALIGN_UNIT)
/* Return number of BAUs in block (masking off high bit containing block
** status). */
#define BLOCK_BAUS(HEAD_PTR) \
- (((head_record *) (HEAD_PTR))->block_size & ~HIGH_BIT_BAU_SIZE)
+ (((head_record *) (HEAD_PTR))->block_size & ~HIGH_BIT_BAU_SIZE)
/* Return number of BAUs in previous block (masking off high bit containing
** block status). */
#define PREV_BLOCK_BAUS(HEAD_PTR) \
- (((head_record *) (HEAD_PTR))->previous_block_size & ~HIGH_BIT_BAU_SIZE)
+ (((head_record *) (HEAD_PTR))->previous_block_size & ~HIGH_BIT_BAU_SIZE)
/* Set number of BAUs in previous block, preserving high bit containing
** block status. */
#define SET_PREV_BLOCK_BAUS(HEAD_PTR, N_BAUS) \
- { register head_record *h_ptr = (head_record *) (HEAD_PTR); \
- h_ptr->previous_block_size &= HIGH_BIT_BAU_SIZE; \
- h_ptr->previous_block_size |= (N_BAUS); }
+ { register head_record *h_ptr = (head_record *) (HEAD_PTR); \
+ h_ptr->previous_block_size &= HIGH_BIT_BAU_SIZE; \
+ h_ptr->previous_block_size |= (N_BAUS); }
/* Convert pointer to pointer record of block to pointer to block's head
** record. */
#define PTR_REC_TO_HEAD(PTR_REC_PTR) \
- ((head_record *) AAUS_BACKWARD(PTR_REC_PTR, HEAD_AAUS))
+ ((head_record *) AAUS_BACKWARD(PTR_REC_PTR, HEAD_AAUS))
/* Convert pointer to block head to pointer to block's pointer record. */
#define HEAD_TO_PTR_REC(HEAD_PTR) \
- ((ptr_record *) AAUS_FORWARD(HEAD_PTR, HEAD_AAUS))
+ ((ptr_record *) AAUS_FORWARD(HEAD_PTR, HEAD_AAUS))
/* Returns non-zero if block is allocated. */
#define IS_BLOCK_ALLOCATED(HEAD_PTR) \
- (((((head_record *) (HEAD_PTR))->block_size | \
- ((head_record *) (HEAD_PTR))->previous_block_size) & \
- HIGH_BIT_BAU_SIZE) == 0)
+ (((((head_record *) (HEAD_PTR))->block_size | \
+ ((head_record *) (HEAD_PTR))->previous_block_size) & \
+ HIGH_BIT_BAU_SIZE) == 0)
#define MARK_BLOCK_ALLOCATED(HEAD_PTR) \
- { register head_record *h_ptr = (head_record *) (HEAD_PTR); \
- h_ptr->block_size &= ~HIGH_BIT_BAU_SIZE; \
- h_ptr->previous_block_size &= ~HIGH_BIT_BAU_SIZE; }
+ { register head_record *h_ptr = (head_record *) (HEAD_PTR); \
+ h_ptr->block_size &= ~HIGH_BIT_BAU_SIZE; \
+ h_ptr->previous_block_size &= ~HIGH_BIT_BAU_SIZE; }
/* Mark a block as free when it is not the first block in a bin (and
** therefore not a node in the AVL tree). */
#define MARK_SUCCESSIVE_BLOCK_IN_FREE_BIN(HEAD_PTR) \
- { register head_record *h_ptr = (head_record *) (HEAD_PTR); \
- h_ptr->block_size |= HIGH_BIT_BAU_SIZE; }
+ { register head_record *h_ptr = (head_record *) (HEAD_PTR); \
+ h_ptr->block_size |= HIGH_BIT_BAU_SIZE; }
/* Prototypes for internal functions implemented in one file and called in
** another.
@@ -125,7 +123,7 @@
void U(out_of_free_collection)(U(descriptor) *desc, head_record *head_ptr);
void *U(alloc_from_bin)(
- U(descriptor) *desc, ptr_record *bin_front_ptr, U(size_bau) n_baus);
+ U(descriptor) *desc, ptr_record *bin_front_ptr, U(size_bau) n_baus);
#ifdef HMM_AUDIT_FAIL
@@ -137,12 +135,12 @@
/* Auditing a block consists of checking that the size in its head
** matches the previous block size in the head of the next block. */
#define AUDIT_BLOCK_AS_EXPR(HEAD_PTR) \
- ((BLOCK_BAUS(HEAD_PTR) == \
- PREV_BLOCK_BAUS(BAUS_FORWARD(HEAD_PTR, BLOCK_BAUS(HEAD_PTR)))) ? \
- 0 : U(audit_block_fail_dummy_return)())
+ ((BLOCK_BAUS(HEAD_PTR) == \
+ PREV_BLOCK_BAUS(BAUS_FORWARD(HEAD_PTR, BLOCK_BAUS(HEAD_PTR)))) ? \
+ 0 : U(audit_block_fail_dummy_return)())
#define AUDIT_BLOCK(HEAD_PTR) \
- { void *h_ptr = (HEAD_PTR); AUDIT_BLOCK_AS_EXPR(h_ptr); }
+ { void *h_ptr = (HEAD_PTR); AUDIT_BLOCK_AS_EXPR(h_ptr); }
#endif
diff --git a/vpx_mem/vpx_mem.c b/vpx_mem/vpx_mem.c
index eade432..059248b 100644
--- a/vpx_mem/vpx_mem.c
+++ b/vpx_mem/vpx_mem.c
@@ -51,15 +51,14 @@
#endif /*CONFIG_MEM_MANAGER*/
#if USE_GLOBAL_FUNCTION_POINTERS
-struct GLOBAL_FUNC_POINTERS
-{
- g_malloc_func g_malloc;
- g_calloc_func g_calloc;
- g_realloc_func g_realloc;
- g_free_func g_free;
- g_memcpy_func g_memcpy;
- g_memset_func g_memset;
- g_memmove_func g_memmove;
+struct GLOBAL_FUNC_POINTERS {
+ g_malloc_func g_malloc;
+ g_calloc_func g_calloc;
+ g_realloc_func g_realloc;
+ g_free_func g_free;
+ g_memcpy_func g_memcpy;
+ g_memset_func g_memset;
+ g_memmove_func g_memmove;
} *g_func = NULL;
# define VPX_MALLOC_L g_func->g_malloc
@@ -77,346 +76,314 @@
# define VPX_MEMMOVE_L memmove
#endif /* USE_GLOBAL_FUNCTION_POINTERS */
-unsigned int vpx_mem_get_version()
-{
- unsigned int ver = ((unsigned int)(unsigned char)VPX_MEM_VERSION_CHIEF << 24 |
- (unsigned int)(unsigned char)VPX_MEM_VERSION_MAJOR << 16 |
- (unsigned int)(unsigned char)VPX_MEM_VERSION_MINOR << 8 |
- (unsigned int)(unsigned char)VPX_MEM_VERSION_PATCH);
- return ver;
+unsigned int vpx_mem_get_version() {
+ unsigned int ver = ((unsigned int)(unsigned char)VPX_MEM_VERSION_CHIEF << 24 |
+ (unsigned int)(unsigned char)VPX_MEM_VERSION_MAJOR << 16 |
+ (unsigned int)(unsigned char)VPX_MEM_VERSION_MINOR << 8 |
+ (unsigned int)(unsigned char)VPX_MEM_VERSION_PATCH);
+ return ver;
}
-int vpx_mem_set_heap_size(size_t size)
-{
- int ret = -1;
+int vpx_mem_set_heap_size(size_t size) {
+ int ret = -1;
#if CONFIG_MEM_MANAGER
#if MM_DYNAMIC_MEMORY
- if (!g_mng_memory_allocated && size)
- {
- g_mm_memory_size = size;
- ret = 0;
- }
- else
- ret = -3;
+ if (!g_mng_memory_allocated && size) {
+ g_mm_memory_size = size;
+ ret = 0;
+ } else
+ ret = -3;
#else
- ret = -2;
+ ret = -2;
#endif
#else
- (void)size;
+ (void)size;
#endif
- return ret;
+ return ret;
}
-void *vpx_memalign(size_t align, size_t size)
-{
- void *addr,
- * x = NULL;
+void *vpx_memalign(size_t align, size_t size) {
+ void *addr,
+ * x = NULL;
#if CONFIG_MEM_MANAGER
- int number_aau;
+ int number_aau;
- if (vpx_mm_create_heap_memory() < 0)
- {
- _P(printf("[vpx][mm] ERROR vpx_memalign() Couldn't create memory for Heap.\n");)
- }
+ if (vpx_mm_create_heap_memory() < 0) {
+ _P(printf("[vpx][mm] ERROR vpx_memalign() Couldn't create memory for Heap.\n");)
+ }
- number_aau = ((size + align - 1 + ADDRESS_STORAGE_SIZE) >>
- SHIFT_HMM_ADDR_ALIGN_UNIT) + 1;
+ number_aau = ((size + align - 1 + ADDRESS_STORAGE_SIZE) >>
+ SHIFT_HMM_ADDR_ALIGN_UNIT) + 1;
- addr = hmm_alloc(&hmm_d, number_aau);
+ addr = hmm_alloc(&hmm_d, number_aau);
#else
- addr = VPX_MALLOC_L(size + align - 1 + ADDRESS_STORAGE_SIZE);
+ addr = VPX_MALLOC_L(size + align - 1 + ADDRESS_STORAGE_SIZE);
#endif /*CONFIG_MEM_MANAGER*/
- if (addr)
- {
- x = align_addr((unsigned char *)addr + ADDRESS_STORAGE_SIZE, (int)align);
- /* save the actual malloc address */
- ((size_t *)x)[-1] = (size_t)addr;
- }
+ if (addr) {
+ x = align_addr((unsigned char *)addr + ADDRESS_STORAGE_SIZE, (int)align);
+ /* save the actual malloc address */
+ ((size_t *)x)[-1] = (size_t)addr;
+ }
- return x;
+ return x;
}
-void *vpx_malloc(size_t size)
-{
- return vpx_memalign(DEFAULT_ALIGNMENT, size);
+void *vpx_malloc(size_t size) {
+ return vpx_memalign(DEFAULT_ALIGNMENT, size);
}
-void *vpx_calloc(size_t num, size_t size)
-{
- void *x;
+void *vpx_calloc(size_t num, size_t size) {
+ void *x;
- x = vpx_memalign(DEFAULT_ALIGNMENT, num * size);
+ x = vpx_memalign(DEFAULT_ALIGNMENT, num * size);
- if (x)
- VPX_MEMSET_L(x, 0, num * size);
+ if (x)
+ VPX_MEMSET_L(x, 0, num * size);
- return x;
+ return x;
}
-void *vpx_realloc(void *memblk, size_t size)
-{
- void *addr,
- * new_addr = NULL;
- int align = DEFAULT_ALIGNMENT;
+void *vpx_realloc(void *memblk, size_t size) {
+ void *addr,
+ * new_addr = NULL;
+ int align = DEFAULT_ALIGNMENT;
- /*
- The realloc() function changes the size of the object pointed to by
- ptr to the size specified by size, and returns a pointer to the
- possibly moved block. The contents are unchanged up to the lesser
- of the new and old sizes. If ptr is null, realloc() behaves like
- malloc() for the specified size. If size is zero (0) and ptr is
- not a null pointer, the object pointed to is freed.
- */
- if (!memblk)
- new_addr = vpx_malloc(size);
- else if (!size)
- vpx_free(memblk);
- else
- {
- addr = (void *)(((size_t *)memblk)[-1]);
- memblk = NULL;
+ /*
+ The realloc() function changes the size of the object pointed to by
+ ptr to the size specified by size, and returns a pointer to the
+ possibly moved block. The contents are unchanged up to the lesser
+ of the new and old sizes. If ptr is null, realloc() behaves like
+ malloc() for the specified size. If size is zero (0) and ptr is
+ not a null pointer, the object pointed to is freed.
+ */
+ if (!memblk)
+ new_addr = vpx_malloc(size);
+ else if (!size)
+ vpx_free(memblk);
+ else {
+ addr = (void *)(((size_t *)memblk)[-1]);
+ memblk = NULL;
#if CONFIG_MEM_MANAGER
- new_addr = vpx_mm_realloc(addr, size + align + ADDRESS_STORAGE_SIZE);
+ new_addr = vpx_mm_realloc(addr, size + align + ADDRESS_STORAGE_SIZE);
#else
- new_addr = VPX_REALLOC_L(addr, size + align + ADDRESS_STORAGE_SIZE);
+ new_addr = VPX_REALLOC_L(addr, size + align + ADDRESS_STORAGE_SIZE);
#endif
- if (new_addr)
- {
- addr = new_addr;
- new_addr = (void *)(((size_t)
- ((unsigned char *)new_addr + ADDRESS_STORAGE_SIZE) + (align - 1)) &
- (size_t) - align);
- /* save the actual malloc address */
- ((size_t *)new_addr)[-1] = (size_t)addr;
- }
+ if (new_addr) {
+ addr = new_addr;
+ new_addr = (void *)(((size_t)
+ ((unsigned char *)new_addr + ADDRESS_STORAGE_SIZE) + (align - 1)) &
+ (size_t) - align);
+ /* save the actual malloc address */
+ ((size_t *)new_addr)[-1] = (size_t)addr;
}
+ }
- return new_addr;
+ return new_addr;
}
-void vpx_free(void *memblk)
-{
- if (memblk)
- {
- void *addr = (void *)(((size_t *)memblk)[-1]);
+void vpx_free(void *memblk) {
+ if (memblk) {
+ void *addr = (void *)(((size_t *)memblk)[-1]);
#if CONFIG_MEM_MANAGER
- hmm_free(&hmm_d, addr);
+ hmm_free(&hmm_d, addr);
#else
- VPX_FREE_L(addr);
+ VPX_FREE_L(addr);
#endif
- }
+ }
}
#if CONFIG_MEM_TRACKER
-void *xvpx_memalign(size_t align, size_t size, char *file, int line)
-{
+void *xvpx_memalign(size_t align, size_t size, char *file, int line) {
#if TRY_BOUNDS_CHECK
- unsigned char *x_bounds;
+ unsigned char *x_bounds;
#endif
- void *x;
+ void *x;
- if (g_alloc_count == 0)
- {
+ if (g_alloc_count == 0) {
#if TRY_BOUNDS_CHECK
- int i_rv = vpx_memory_tracker_init(BOUNDS_CHECK_PAD_SIZE, BOUNDS_CHECK_VALUE);
+ int i_rv = vpx_memory_tracker_init(BOUNDS_CHECK_PAD_SIZE, BOUNDS_CHECK_VALUE);
#else
- int i_rv = vpx_memory_tracker_init(0, 0);
+ int i_rv = vpx_memory_tracker_init(0, 0);
#endif
- if (i_rv < 0)
- {
- _P(printf("ERROR xvpx_malloc MEM_TRACK_USAGE error vpx_memory_tracker_init().\n");)
- }
+ if (i_rv < 0) {
+ _P(printf("ERROR xvpx_malloc MEM_TRACK_USAGE error vpx_memory_tracker_init().\n");)
}
+ }
#if TRY_BOUNDS_CHECK
- {
- int i;
- unsigned int tempme = BOUNDS_CHECK_VALUE;
+ {
+ int i;
+ unsigned int tempme = BOUNDS_CHECK_VALUE;
- x_bounds = vpx_memalign(align, size + (BOUNDS_CHECK_PAD_SIZE * 2));
+ x_bounds = vpx_memalign(align, size + (BOUNDS_CHECK_PAD_SIZE * 2));
- if (x_bounds)
- {
- /*we're aligning the address twice here but to keep things
- consistent we want to have the padding come before the stored
- address so no matter what free function gets called we will
- attempt to free the correct address*/
- x_bounds = (unsigned char *)(((size_t *)x_bounds)[-1]);
- x = align_addr(x_bounds + BOUNDS_CHECK_PAD_SIZE + ADDRESS_STORAGE_SIZE,
- (int)align);
- /* save the actual malloc address */
- ((size_t *)x)[-1] = (size_t)x_bounds;
+ if (x_bounds) {
+ /*we're aligning the address twice here but to keep things
+ consistent we want to have the padding come before the stored
+ address so no matter what free function gets called we will
+ attempt to free the correct address*/
+ x_bounds = (unsigned char *)(((size_t *)x_bounds)[-1]);
+ x = align_addr(x_bounds + BOUNDS_CHECK_PAD_SIZE + ADDRESS_STORAGE_SIZE,
+ (int)align);
+ /* save the actual malloc address */
+ ((size_t *)x)[-1] = (size_t)x_bounds;
- for (i = 0; i < BOUNDS_CHECK_PAD_SIZE; i += sizeof(unsigned int))
- {
- VPX_MEMCPY_L(x_bounds + i, &tempme, sizeof(unsigned int));
- VPX_MEMCPY_L((unsigned char *)x + size + i,
- &tempme, sizeof(unsigned int));
- }
- }
- else
- x = NULL;
- }
+ for (i = 0; i < BOUNDS_CHECK_PAD_SIZE; i += sizeof(unsigned int)) {
+ VPX_MEMCPY_L(x_bounds + i, &tempme, sizeof(unsigned int));
+ VPX_MEMCPY_L((unsigned char *)x + size + i,
+ &tempme, sizeof(unsigned int));
+ }
+ } else
+ x = NULL;
+ }
#else
- x = vpx_memalign(align, size);
+ x = vpx_memalign(align, size);
#endif /*TRY_BOUNDS_CHECK*/
- g_alloc_count++;
+ g_alloc_count++;
- vpx_memory_tracker_add((size_t)x, (unsigned int)size, file, line, 1);
+ vpx_memory_tracker_add((size_t)x, (unsigned int)size, file, line, 1);
- return x;
+ return x;
}
-void *xvpx_malloc(size_t size, char *file, int line)
-{
- return xvpx_memalign(DEFAULT_ALIGNMENT, size, file, line);
+void *xvpx_malloc(size_t size, char *file, int line) {
+ return xvpx_memalign(DEFAULT_ALIGNMENT, size, file, line);
}
-void *xvpx_calloc(size_t num, size_t size, char *file, int line)
-{
- void *x = xvpx_memalign(DEFAULT_ALIGNMENT, num * size, file, line);
+void *xvpx_calloc(size_t num, size_t size, char *file, int line) {
+ void *x = xvpx_memalign(DEFAULT_ALIGNMENT, num * size, file, line);
- if (x)
- VPX_MEMSET_L(x, 0, num * size);
+ if (x)
+ VPX_MEMSET_L(x, 0, num * size);
- return x;
+ return x;
}
-void *xvpx_realloc(void *memblk, size_t size, char *file, int line)
-{
- struct mem_block *p = NULL;
- int orig_size = 0,
- orig_line = 0;
- char *orig_file = NULL;
+void *xvpx_realloc(void *memblk, size_t size, char *file, int line) {
+ struct mem_block *p = NULL;
+ int orig_size = 0,
+ orig_line = 0;
+ char *orig_file = NULL;
#if TRY_BOUNDS_CHECK
- unsigned char *x_bounds = memblk ?
- (unsigned char *)(((size_t *)memblk)[-1]) :
- NULL;
+ unsigned char *x_bounds = memblk ?
+ (unsigned char *)(((size_t *)memblk)[-1]) :
+ NULL;
#endif
- void *x;
+ void *x;
- if (g_alloc_count == 0)
- {
+ if (g_alloc_count == 0) {
#if TRY_BOUNDS_CHECK
- if (!vpx_memory_tracker_init(BOUNDS_CHECK_PAD_SIZE, BOUNDS_CHECK_VALUE))
+ if (!vpx_memory_tracker_init(BOUNDS_CHECK_PAD_SIZE, BOUNDS_CHECK_VALUE))
#else
- if (!vpx_memory_tracker_init(0, 0))
+ if (!vpx_memory_tracker_init(0, 0))
#endif
- {
- _P(printf("ERROR xvpx_malloc MEM_TRACK_USAGE error vpx_memory_tracker_init().\n");)
- }
- }
-
- if ((p = vpx_memory_tracker_find((size_t)memblk)))
{
- orig_size = p->size;
- orig_file = p->file;
- orig_line = p->line;
+ _P(printf("ERROR xvpx_malloc MEM_TRACK_USAGE error vpx_memory_tracker_init().\n");)
}
+ }
+ if ((p = vpx_memory_tracker_find((size_t)memblk))) {
+ orig_size = p->size;
+ orig_file = p->file;
+ orig_line = p->line;
+ }
+
+#if TRY_BOUNDS_CHECK_ON_FREE
+ vpx_memory_tracker_check_integrity(file, line);
+#endif
+
+ /* have to do this regardless of success, because
+ * the memory that does get realloc'd may change
+ * the bounds values of this block
+ */
+ vpx_memory_tracker_remove((size_t)memblk);
+
+#if TRY_BOUNDS_CHECK
+ {
+ int i;
+ unsigned int tempme = BOUNDS_CHECK_VALUE;
+
+ x_bounds = vpx_realloc(memblk, size + (BOUNDS_CHECK_PAD_SIZE * 2));
+
+ if (x_bounds) {
+ x_bounds = (unsigned char *)(((size_t *)x_bounds)[-1]);
+ x = align_addr(x_bounds + BOUNDS_CHECK_PAD_SIZE + ADDRESS_STORAGE_SIZE,
+ (int)DEFAULT_ALIGNMENT);
+ /* save the actual malloc address */
+ ((size_t *)x)[-1] = (size_t)x_bounds;
+
+ for (i = 0; i < BOUNDS_CHECK_PAD_SIZE; i += sizeof(unsigned int)) {
+ VPX_MEMCPY_L(x_bounds + i, &tempme, sizeof(unsigned int));
+ VPX_MEMCPY_L((unsigned char *)x + size + i,
+ &tempme, sizeof(unsigned int));
+ }
+ } else
+ x = NULL;
+ }
+#else
+ x = vpx_realloc(memblk, size);
+#endif /*TRY_BOUNDS_CHECK*/
+
+ if (!memblk) ++g_alloc_count;
+
+ if (x)
+ vpx_memory_tracker_add((size_t)x, (unsigned int)size, file, line, 1);
+ else
+ vpx_memory_tracker_add((size_t)memblk, orig_size, orig_file, orig_line, 1);
+
+ return x;
+}
+
+void xvpx_free(void *p_address, char *file, int line) {
+#if TRY_BOUNDS_CHECK
+ unsigned char *p_bounds_address = (unsigned char *)p_address;
+ /*p_bounds_address -= BOUNDS_CHECK_PAD_SIZE;*/
+#endif
+
+#if !TRY_BOUNDS_CHECK_ON_FREE
+ (void)file;
+ (void)line;
+#endif
+
+ if (p_address) {
#if TRY_BOUNDS_CHECK_ON_FREE
vpx_memory_tracker_check_integrity(file, line);
#endif
- /* have to do this regardless of success, because
- * the memory that does get realloc'd may change
- * the bounds values of this block
+ /* if the addr isn't found in the list, assume it was allocated via
+ * vpx_ calls not xvpx_, therefore it does not contain any padding
*/
- vpx_memory_tracker_remove((size_t)memblk);
+ if (vpx_memory_tracker_remove((size_t)p_address) == -2) {
+ p_bounds_address = p_address;
+ _P(fprintf(stderr, "[vpx_mem][xvpx_free] addr: %p not found in"
+ " list; freed from file:%s"
+ " line:%d\n", p_address, file, line));
+ } else
+ --g_alloc_count;
#if TRY_BOUNDS_CHECK
- {
- int i;
- unsigned int tempme = BOUNDS_CHECK_VALUE;
-
- x_bounds = vpx_realloc(memblk, size + (BOUNDS_CHECK_PAD_SIZE * 2));
-
- if (x_bounds)
- {
- x_bounds = (unsigned char *)(((size_t *)x_bounds)[-1]);
- x = align_addr(x_bounds + BOUNDS_CHECK_PAD_SIZE + ADDRESS_STORAGE_SIZE,
- (int)DEFAULT_ALIGNMENT);
- /* save the actual malloc address */
- ((size_t *)x)[-1] = (size_t)x_bounds;
-
- for (i = 0; i < BOUNDS_CHECK_PAD_SIZE; i += sizeof(unsigned int))
- {
- VPX_MEMCPY_L(x_bounds + i, &tempme, sizeof(unsigned int));
- VPX_MEMCPY_L((unsigned char *)x + size + i,
- &tempme, sizeof(unsigned int));
- }
- }
- else
- x = NULL;
- }
+ vpx_free(p_bounds_address);
#else
- x = vpx_realloc(memblk, size);
-#endif /*TRY_BOUNDS_CHECK*/
-
- if (!memblk) ++g_alloc_count;
-
- if (x)
- vpx_memory_tracker_add((size_t)x, (unsigned int)size, file, line, 1);
- else
- vpx_memory_tracker_add((size_t)memblk, orig_size, orig_file, orig_line, 1);
-
- return x;
-}
-
-void xvpx_free(void *p_address, char *file, int line)
-{
-#if TRY_BOUNDS_CHECK
- unsigned char *p_bounds_address = (unsigned char *)p_address;
- /*p_bounds_address -= BOUNDS_CHECK_PAD_SIZE;*/
+ vpx_free(p_address);
#endif
-#if !TRY_BOUNDS_CHECK_ON_FREE
- (void)file;
- (void)line;
-#endif
-
- if (p_address)
- {
-#if TRY_BOUNDS_CHECK_ON_FREE
- vpx_memory_tracker_check_integrity(file, line);
-#endif
-
- /* if the addr isn't found in the list, assume it was allocated via
- * vpx_ calls not xvpx_, therefore it does not contain any padding
- */
- if (vpx_memory_tracker_remove((size_t)p_address) == -2)
- {
- p_bounds_address = p_address;
- _P(fprintf(stderr, "[vpx_mem][xvpx_free] addr: %p not found in"
- " list; freed from file:%s"
- " line:%d\n", p_address, file, line));
- }
- else
- --g_alloc_count;
-
-#if TRY_BOUNDS_CHECK
- vpx_free(p_bounds_address);
-#else
- vpx_free(p_address);
-#endif
-
- if (!g_alloc_count)
- vpx_memory_tracker_destroy();
- }
+ if (!g_alloc_count)
+ vpx_memory_tracker_destroy();
+ }
}
#endif /*CONFIG_MEM_TRACKER*/
@@ -426,297 +393,265 @@
#include <task_lib.h> /*for task_delay()*/
/* This function is only used to get a stack trace of the player
object so we can se where we are having a problem. */
-static int get_my_tt(int task)
-{
- tt(task);
+static int get_my_tt(int task) {
+ tt(task);
- return 0;
+ return 0;
}
-static void vx_sleep(int msec)
-{
- int ticks_to_sleep = 0;
+static void vx_sleep(int msec) {
+ int ticks_to_sleep = 0;
- if (msec)
- {
- int msec_per_tick = 1000 / sys_clk_rate_get();
+ if (msec) {
+ int msec_per_tick = 1000 / sys_clk_rate_get();
- if (msec < msec_per_tick)
- ticks_to_sleep++;
- else
- ticks_to_sleep = msec / msec_per_tick;
- }
+ if (msec < msec_per_tick)
+ ticks_to_sleep++;
+ else
+ ticks_to_sleep = msec / msec_per_tick;
+ }
- task_delay(ticks_to_sleep);
+ task_delay(ticks_to_sleep);
}
#endif
#endif
-void *vpx_memcpy(void *dest, const void *source, size_t length)
-{
+void *vpx_memcpy(void *dest, const void *source, size_t length) {
#if CONFIG_MEM_CHECKS
- if (((int)dest < 0x4000) || ((int)source < 0x4000))
- {
- _P(printf("WARNING: vpx_memcpy dest:0x%x source:0x%x len:%d\n", (int)dest, (int)source, length);)
+ if (((int)dest < 0x4000) || ((int)source < 0x4000)) {
+ _P(printf("WARNING: vpx_memcpy dest:0x%x source:0x%x len:%d\n", (int)dest, (int)source, length);)
#if defined(VXWORKS)
- sp(get_my_tt, task_id_self(), 0, 0, 0, 0, 0, 0, 0, 0);
+ sp(get_my_tt, task_id_self(), 0, 0, 0, 0, 0, 0, 0, 0);
- vx_sleep(10000);
+ vx_sleep(10000);
#endif
- }
+ }
#endif
- return VPX_MEMCPY_L(dest, source, length);
+ return VPX_MEMCPY_L(dest, source, length);
}
-void *vpx_memset(void *dest, int val, size_t length)
-{
+void *vpx_memset(void *dest, int val, size_t length) {
#if CONFIG_MEM_CHECKS
- if ((int)dest < 0x4000)
- {
- _P(printf("WARNING: vpx_memset dest:0x%x val:%d len:%d\n", (int)dest, val, length);)
+ if ((int)dest < 0x4000) {
+ _P(printf("WARNING: vpx_memset dest:0x%x val:%d len:%d\n", (int)dest, val, length);)
#if defined(VXWORKS)
- sp(get_my_tt, task_id_self(), 0, 0, 0, 0, 0, 0, 0, 0);
+ sp(get_my_tt, task_id_self(), 0, 0, 0, 0, 0, 0, 0, 0);
- vx_sleep(10000);
+ vx_sleep(10000);
#endif
- }
+ }
#endif
- return VPX_MEMSET_L(dest, val, length);
+ return VPX_MEMSET_L(dest, val, length);
}
-void *vpx_memmove(void *dest, const void *src, size_t count)
-{
+void *vpx_memmove(void *dest, const void *src, size_t count) {
#if CONFIG_MEM_CHECKS
- if (((int)dest < 0x4000) || ((int)src < 0x4000))
- {
- _P(printf("WARNING: vpx_memmove dest:0x%x src:0x%x count:%d\n", (int)dest, (int)src, count);)
+ if (((int)dest < 0x4000) || ((int)src < 0x4000)) {
+ _P(printf("WARNING: vpx_memmove dest:0x%x src:0x%x count:%d\n", (int)dest, (int)src, count);)
#if defined(VXWORKS)
- sp(get_my_tt, task_id_self(), 0, 0, 0, 0, 0, 0, 0, 0);
+ sp(get_my_tt, task_id_self(), 0, 0, 0, 0, 0, 0, 0, 0);
- vx_sleep(10000);
+ vx_sleep(10000);
#endif
- }
+ }
#endif
- return VPX_MEMMOVE_L(dest, src, count);
+ return VPX_MEMMOVE_L(dest, src, count);
}
#if CONFIG_MEM_MANAGER
-static int vpx_mm_create_heap_memory()
-{
- int i_rv = 0;
+static int vpx_mm_create_heap_memory() {
+ int i_rv = 0;
- if (!g_mng_memory_allocated)
- {
+ if (!g_mng_memory_allocated) {
#if MM_DYNAMIC_MEMORY
- g_p_mng_memory_raw =
- (unsigned char *)malloc(g_mm_memory_size + HMM_ADDR_ALIGN_UNIT);
+ g_p_mng_memory_raw =
+ (unsigned char *)malloc(g_mm_memory_size + HMM_ADDR_ALIGN_UNIT);
- if (g_p_mng_memory_raw)
- {
- g_p_mng_memory = (unsigned char *)((((unsigned int)g_p_mng_memory_raw) +
- HMM_ADDR_ALIGN_UNIT - 1) &
- -(int)HMM_ADDR_ALIGN_UNIT);
+ if (g_p_mng_memory_raw) {
+ g_p_mng_memory = (unsigned char *)((((unsigned int)g_p_mng_memory_raw) +
+ HMM_ADDR_ALIGN_UNIT - 1) &
+ -(int)HMM_ADDR_ALIGN_UNIT);
- _P(printf("[vpx][mm] total memory size:%d g_p_mng_memory_raw:0x%x g_p_mng_memory:0x%x\n"
- , g_mm_memory_size + HMM_ADDR_ALIGN_UNIT
- , (unsigned int)g_p_mng_memory_raw
- , (unsigned int)g_p_mng_memory);)
- }
- else
- {
- _P(printf("[vpx][mm] Couldn't allocate memory:%d for vpx memory manager.\n"
- , g_mm_memory_size);)
+ _P(printf("[vpx][mm] total memory size:%d g_p_mng_memory_raw:0x%x g_p_mng_memory:0x%x\n"
+, g_mm_memory_size + HMM_ADDR_ALIGN_UNIT
+, (unsigned int)g_p_mng_memory_raw
+, (unsigned int)g_p_mng_memory);)
+ } else {
+ _P(printf("[vpx][mm] Couldn't allocate memory:%d for vpx memory manager.\n"
+, g_mm_memory_size);)
- i_rv = -1;
- }
-
- if (g_p_mng_memory)
-#endif
- {
- int chunk_size = 0;
-
- g_mng_memory_allocated = 1;
-
- hmm_init(&hmm_d);
-
- chunk_size = g_mm_memory_size >> SHIFT_HMM_ADDR_ALIGN_UNIT;
-
- chunk_size -= DUMMY_END_BLOCK_BAUS;
-
- _P(printf("[vpx][mm] memory size:%d for vpx memory manager. g_p_mng_memory:0x%x chunk_size:%d\n"
- , g_mm_memory_size
- , (unsigned int)g_p_mng_memory
- , chunk_size);)
-
- hmm_new_chunk(&hmm_d, (void *)g_p_mng_memory, chunk_size);
- }
-
-#if MM_DYNAMIC_MEMORY
- else
- {
- _P(printf("[vpx][mm] Couldn't allocate memory:%d for vpx memory manager.\n"
- , g_mm_memory_size);)
-
- i_rv = -1;
- }
-
-#endif
+ i_rv = -1;
}
- return i_rv;
+ if (g_p_mng_memory)
+#endif
+ {
+ int chunk_size = 0;
+
+ g_mng_memory_allocated = 1;
+
+ hmm_init(&hmm_d);
+
+ chunk_size = g_mm_memory_size >> SHIFT_HMM_ADDR_ALIGN_UNIT;
+
+ chunk_size -= DUMMY_END_BLOCK_BAUS;
+
+ _P(printf("[vpx][mm] memory size:%d for vpx memory manager. g_p_mng_memory:0x%x chunk_size:%d\n"
+, g_mm_memory_size
+, (unsigned int)g_p_mng_memory
+, chunk_size);)
+
+ hmm_new_chunk(&hmm_d, (void *)g_p_mng_memory, chunk_size);
+ }
+
+#if MM_DYNAMIC_MEMORY
+ else {
+ _P(printf("[vpx][mm] Couldn't allocate memory:%d for vpx memory manager.\n"
+, g_mm_memory_size);)
+
+ i_rv = -1;
+ }
+
+#endif
+ }
+
+ return i_rv;
}
-static void *vpx_mm_realloc(void *memblk, size_t size)
-{
- void *p_ret = NULL;
+static void *vpx_mm_realloc(void *memblk, size_t size) {
+ void *p_ret = NULL;
- if (vpx_mm_create_heap_memory() < 0)
- {
- _P(printf("[vpx][mm] ERROR vpx_mm_realloc() Couldn't create memory for Heap.\n");)
- }
- else
- {
- int i_rv = 0;
- int old_num_aaus;
- int new_num_aaus;
+ if (vpx_mm_create_heap_memory() < 0) {
+ _P(printf("[vpx][mm] ERROR vpx_mm_realloc() Couldn't create memory for Heap.\n");)
+ } else {
+ int i_rv = 0;
+ int old_num_aaus;
+ int new_num_aaus;
- old_num_aaus = hmm_true_size(memblk);
+ old_num_aaus = hmm_true_size(memblk);
+ new_num_aaus = (size >> SHIFT_HMM_ADDR_ALIGN_UNIT) + 1;
+
+ if (old_num_aaus == new_num_aaus) {
+ p_ret = memblk;
+ } else {
+ i_rv = hmm_resize(&hmm_d, memblk, new_num_aaus);
+
+ if (i_rv == 0) {
+ p_ret = memblk;
+ } else {
+ /* Error. Try to malloc and then copy data. */
+ void *p_from_malloc;
+
new_num_aaus = (size >> SHIFT_HMM_ADDR_ALIGN_UNIT) + 1;
+ p_from_malloc = hmm_alloc(&hmm_d, new_num_aaus);
- if (old_num_aaus == new_num_aaus)
- {
- p_ret = memblk;
+ if (p_from_malloc) {
+ vpx_memcpy(p_from_malloc, memblk, size);
+ hmm_free(&hmm_d, memblk);
+
+ p_ret = p_from_malloc;
}
- else
- {
- i_rv = hmm_resize(&hmm_d, memblk, new_num_aaus);
-
- if (i_rv == 0)
- {
- p_ret = memblk;
- }
- else
- {
- /* Error. Try to malloc and then copy data. */
- void *p_from_malloc;
-
- new_num_aaus = (size >> SHIFT_HMM_ADDR_ALIGN_UNIT) + 1;
- p_from_malloc = hmm_alloc(&hmm_d, new_num_aaus);
-
- if (p_from_malloc)
- {
- vpx_memcpy(p_from_malloc, memblk, size);
- hmm_free(&hmm_d, memblk);
-
- p_ret = p_from_malloc;
- }
- }
- }
+ }
}
+ }
- return p_ret;
+ return p_ret;
}
#endif /*CONFIG_MEM_MANAGER*/
#if USE_GLOBAL_FUNCTION_POINTERS
# if CONFIG_MEM_TRACKER
extern int vpx_memory_tracker_set_functions(g_malloc_func g_malloc_l
- , g_calloc_func g_calloc_l
- , g_realloc_func g_realloc_l
- , g_free_func g_free_l
- , g_memcpy_func g_memcpy_l
- , g_memset_func g_memset_l
- , g_memmove_func g_memmove_l);
+, g_calloc_func g_calloc_l
+, g_realloc_func g_realloc_l
+, g_free_func g_free_l
+, g_memcpy_func g_memcpy_l
+, g_memset_func g_memset_l
+, g_memmove_func g_memmove_l);
# endif
#endif /*USE_GLOBAL_FUNCTION_POINTERS*/
int vpx_mem_set_functions(g_malloc_func g_malloc_l
- , g_calloc_func g_calloc_l
- , g_realloc_func g_realloc_l
- , g_free_func g_free_l
- , g_memcpy_func g_memcpy_l
- , g_memset_func g_memset_l
- , g_memmove_func g_memmove_l)
-{
+, g_calloc_func g_calloc_l
+, g_realloc_func g_realloc_l
+, g_free_func g_free_l
+, g_memcpy_func g_memcpy_l
+, g_memset_func g_memset_l
+, g_memmove_func g_memmove_l) {
#if USE_GLOBAL_FUNCTION_POINTERS
- /* If use global functions is turned on then the
- application must set the global functions before
- it does anything else or vpx_mem will have
- unpredictable results. */
- if (!g_func)
- {
- g_func = (struct GLOBAL_FUNC_POINTERS *)
- g_malloc_l(sizeof(struct GLOBAL_FUNC_POINTERS));
+ /* If use global functions is turned on then the
+ application must set the global functions before
+ it does anything else or vpx_mem will have
+ unpredictable results. */
+ if (!g_func) {
+ g_func = (struct GLOBAL_FUNC_POINTERS *)
+ g_malloc_l(sizeof(struct GLOBAL_FUNC_POINTERS));
- if (!g_func)
- {
- return -1;
- }
+ if (!g_func) {
+ return -1;
}
+ }
#if CONFIG_MEM_TRACKER
- {
- int rv = 0;
- rv = vpx_memory_tracker_set_functions(g_malloc_l
- , g_calloc_l
- , g_realloc_l
- , g_free_l
- , g_memcpy_l
- , g_memset_l
- , g_memmove_l);
+ {
+ int rv = 0;
+ rv = vpx_memory_tracker_set_functions(g_malloc_l
+, g_calloc_l
+, g_realloc_l
+, g_free_l
+, g_memcpy_l
+, g_memset_l
+, g_memmove_l);
- if (rv < 0)
- {
- return rv;
- }
+ if (rv < 0) {
+ return rv;
}
+ }
#endif
- g_func->g_malloc = g_malloc_l;
- g_func->g_calloc = g_calloc_l;
- g_func->g_realloc = g_realloc_l;
- g_func->g_free = g_free_l;
- g_func->g_memcpy = g_memcpy_l;
- g_func->g_memset = g_memset_l;
- g_func->g_memmove = g_memmove_l;
+ g_func->g_malloc = g_malloc_l;
+ g_func->g_calloc = g_calloc_l;
+ g_func->g_realloc = g_realloc_l;
+ g_func->g_free = g_free_l;
+ g_func->g_memcpy = g_memcpy_l;
+ g_func->g_memset = g_memset_l;
+ g_func->g_memmove = g_memmove_l;
- return 0;
+ return 0;
#else
- (void)g_malloc_l;
- (void)g_calloc_l;
- (void)g_realloc_l;
- (void)g_free_l;
- (void)g_memcpy_l;
- (void)g_memset_l;
- (void)g_memmove_l;
- return -1;
+ (void)g_malloc_l;
+ (void)g_calloc_l;
+ (void)g_realloc_l;
+ (void)g_free_l;
+ (void)g_memcpy_l;
+ (void)g_memset_l;
+ (void)g_memmove_l;
+ return -1;
#endif
}
-int vpx_mem_unset_functions()
-{
+int vpx_mem_unset_functions() {
#if USE_GLOBAL_FUNCTION_POINTERS
- if (g_func)
- {
- g_free_func temp_free = g_func->g_free;
- temp_free(g_func);
- g_func = NULL;
- }
+ if (g_func) {
+ g_free_func temp_free = g_func->g_free;
+ temp_free(g_func);
+ g_func = NULL;
+ }
#endif
- return 0;
+ return 0;
}
diff --git a/vpx_mem/vpx_mem.h b/vpx_mem/vpx_mem.h
index 749eaa4..3246e92 100644
--- a/vpx_mem/vpx_mem.h
+++ b/vpx_mem/vpx_mem.h
@@ -12,6 +12,7 @@
#ifndef __VPX_MEM_H__
#define __VPX_MEM_H__
+#include "vpx_config.h"
#if defined(__uClinux__)
# include <lddk.h>
#endif
@@ -30,11 +31,11 @@
#endif
#ifndef VPX_CHECK_MEM_FUNCTIONS
# define VPX_CHECK_MEM_FUNCTIONS 0 /* enable basic safety checks in _memcpy,
- _memset, and _memmove */
+_memset, and _memmove */
#endif
#ifndef REPLACE_BUILTIN_FUNCTIONS
# define REPLACE_BUILTIN_FUNCTIONS 0 /* replace builtin functions with their
- vpx_ equivalents */
+vpx_ equivalents */
#endif
#include <stdlib.h>
@@ -44,60 +45,60 @@
extern "C" {
#endif
- /*
- vpx_mem_get_version()
- provided for runtime version checking. Returns an unsigned int of the form
- CHIEF | MAJOR | MINOR | PATCH, where the chief version number is the high
- order byte.
- */
- unsigned int vpx_mem_get_version(void);
+ /*
+ vpx_mem_get_version()
+ provided for runtime version checking. Returns an unsigned int of the form
+ CHIEF | MAJOR | MINOR | PATCH, where the chief version number is the high
+ order byte.
+ */
+ unsigned int vpx_mem_get_version(void);
- /*
- vpx_mem_set_heap_size(size_t size)
- size - size in bytes for the memory manager to allocate for its heap
- Sets the memory manager's initial heap size
- Return:
- 0: on success
- -1: if memory manager calls have not been included in the vpx_mem lib
- -2: if the memory manager has been compiled to use static memory
- -3: if the memory manager has already allocated its heap
- */
- int vpx_mem_set_heap_size(size_t size);
+ /*
+ vpx_mem_set_heap_size(size_t size)
+ size - size in bytes for the memory manager to allocate for its heap
+ Sets the memory manager's initial heap size
+ Return:
+ 0: on success
+ -1: if memory manager calls have not been included in the vpx_mem lib
+ -2: if the memory manager has been compiled to use static memory
+ -3: if the memory manager has already allocated its heap
+ */
+ int vpx_mem_set_heap_size(size_t size);
- void *vpx_memalign(size_t align, size_t size);
- void *vpx_malloc(size_t size);
- void *vpx_calloc(size_t num, size_t size);
- void *vpx_realloc(void *memblk, size_t size);
- void vpx_free(void *memblk);
+ void *vpx_memalign(size_t align, size_t size);
+ void *vpx_malloc(size_t size);
+ void *vpx_calloc(size_t num, size_t size);
+ void *vpx_realloc(void *memblk, size_t size);
+ void vpx_free(void *memblk);
- void *vpx_memcpy(void *dest, const void *src, size_t length);
- void *vpx_memset(void *dest, int val, size_t length);
- void *vpx_memmove(void *dest, const void *src, size_t count);
+ void *vpx_memcpy(void *dest, const void *src, size_t length);
+ void *vpx_memset(void *dest, int val, size_t length);
+ void *vpx_memmove(void *dest, const void *src, size_t count);
- /* special memory functions */
- void *vpx_mem_alloc(int id, size_t size, size_t align);
- void vpx_mem_free(int id, void *mem, size_t size);
+ /* special memory functions */
+ void *vpx_mem_alloc(int id, size_t size, size_t align);
+ void vpx_mem_free(int id, void *mem, size_t size);
- /* Wrappers to standard library functions. */
- typedef void*(* g_malloc_func)(size_t);
- typedef void*(* g_calloc_func)(size_t, size_t);
- typedef void*(* g_realloc_func)(void *, size_t);
- typedef void (* g_free_func)(void *);
- typedef void*(* g_memcpy_func)(void *, const void *, size_t);
- typedef void*(* g_memset_func)(void *, int, size_t);
- typedef void*(* g_memmove_func)(void *, const void *, size_t);
+ /* Wrappers to standard library functions. */
+ typedef void *(* g_malloc_func)(size_t);
+ typedef void *(* g_calloc_func)(size_t, size_t);
+ typedef void *(* g_realloc_func)(void *, size_t);
+ typedef void (* g_free_func)(void *);
+ typedef void *(* g_memcpy_func)(void *, const void *, size_t);
+ typedef void *(* g_memset_func)(void *, int, size_t);
+ typedef void *(* g_memmove_func)(void *, const void *, size_t);
- int vpx_mem_set_functions(g_malloc_func g_malloc_l
- , g_calloc_func g_calloc_l
- , g_realloc_func g_realloc_l
- , g_free_func g_free_l
- , g_memcpy_func g_memcpy_l
- , g_memset_func g_memset_l
- , g_memmove_func g_memmove_l);
- int vpx_mem_unset_functions(void);
+ int vpx_mem_set_functions(g_malloc_func g_malloc_l
+, g_calloc_func g_calloc_l
+, g_realloc_func g_realloc_l
+, g_free_func g_free_l
+, g_memcpy_func g_memcpy_l
+, g_memset_func g_memset_l
+, g_memmove_func g_memmove_l);
+ int vpx_mem_unset_functions(void);
- /* some defines for backward compatibility */
+ /* some defines for backward compatibility */
#define DMEM_GENERAL 0
#define duck_memalign(X,Y,Z) vpx_memalign(X,Y)
@@ -124,13 +125,13 @@
#if CONFIG_MEM_TRACKER
#include <stdarg.h>
- /*from vpx_mem/vpx_mem_tracker.c*/
- extern void vpx_memory_tracker_dump();
- extern void vpx_memory_tracker_check_integrity(char *file, unsigned int line);
- extern int vpx_memory_tracker_set_log_type(int type, char *option);
- extern int vpx_memory_tracker_set_log_func(void *userdata,
- void(*logfunc)(void *userdata,
- const char *fmt, va_list args));
+ /*from vpx_mem/vpx_mem_tracker.c*/
+ extern void vpx_memory_tracker_dump();
+ extern void vpx_memory_tracker_check_integrity(char *file, unsigned int line);
+ extern int vpx_memory_tracker_set_log_type(int type, char *option);
+ extern int vpx_memory_tracker_set_log_func(void *userdata,
+ void(*logfunc)(void *userdata,
+ const char *fmt, va_list args));
# ifndef __VPX_MEM_C__
# define vpx_memalign(align, size) xvpx_memalign((align), (size), __FILE__, __LINE__)
# define vpx_malloc(size) xvpx_malloc((size), __FILE__, __LINE__)
@@ -142,13 +143,13 @@
# define vpx_mem_free(id,mem,size) xvpx_mem_free(id, mem, size, __FILE__, __LINE__)
# endif
- void *xvpx_memalign(size_t align, size_t size, char *file, int line);
- void *xvpx_malloc(size_t size, char *file, int line);
- void *xvpx_calloc(size_t num, size_t size, char *file, int line);
- void *xvpx_realloc(void *memblk, size_t size, char *file, int line);
- void xvpx_free(void *memblk, char *file, int line);
- void *xvpx_mem_alloc(int id, size_t size, size_t align, char *file, int line);
- void xvpx_mem_free(int id, void *mem, size_t size, char *file, int line);
+ void *xvpx_memalign(size_t align, size_t size, char *file, int line);
+ void *xvpx_malloc(size_t size, char *file, int line);
+ void *xvpx_calloc(size_t num, size_t size, char *file, int line);
+ void *xvpx_realloc(void *memblk, size_t size, char *file, int line);
+ void xvpx_free(void *memblk, char *file, int line);
+ void *xvpx_mem_alloc(int id, size_t size, size_t align, char *file, int line);
+ void xvpx_mem_free(int id, void *mem, size_t size, char *file, int line);
#else
# ifndef __VPX_MEM_C__
diff --git a/vpx_mem/vpx_mem_tracker.c b/vpx_mem/vpx_mem_tracker.c
index b37076e..613e8a1 100644
--- a/vpx_mem/vpx_mem_tracker.c
+++ b/vpx_mem/vpx_mem_tracker.c
@@ -22,7 +22,7 @@
in the memory_tracker struct as well as calls to create/destroy/lock/unlock
the mutex in vpx_memory_tracker_init/Destroy and memory_tracker_lock_mutex/unlock_mutex
*/
-#include "vpx_config.h"
+#include "./vpx_config.h"
#if defined(__uClinux__)
# include <lddk.h>
@@ -40,20 +40,20 @@
#include <stdio.h>
#include <stdlib.h>
-#include <string.h> //VXWORKS doesn't have a malloc/memory.h file,
-//this should pull in malloc,free,etc.
+#include <string.h> // VXWORKS doesn't have a malloc/memory.h file,
+// this should pull in malloc,free,etc.
#include <stdarg.h>
#include "include/vpx_mem_tracker.h"
-#undef vpx_malloc //undefine any vpx_mem macros that may affect calls to
-#undef vpx_free //memory functions in this file
+#undef vpx_malloc // undefine any vpx_mem macros that may affect calls to
+#undef vpx_free // memory functions in this file
#undef vpx_memcpy
#undef vpx_memset
#ifndef USE_GLOBAL_FUNCTION_POINTERS
-# define USE_GLOBAL_FUNCTION_POINTERS 0 //use function pointers instead of compiled functions.
+# define USE_GLOBAL_FUNCTION_POINTERS 0 // use function pointers instead of compiled functions.
#endif
#if USE_GLOBAL_FUNCTION_POINTERS
@@ -94,39 +94,37 @@
#endif
#ifndef VPX_NO_GLOBALS
-struct memory_tracker
-{
- struct mem_block *head,
- * tail;
- int len,
- totalsize;
- unsigned int current_allocated,
- max_allocated;
+struct memory_tracker {
+ struct mem_block *head,
+ * tail;
+ int len,
+ totalsize;
+ unsigned int current_allocated,
+ max_allocated;
#if HAVE_PTHREAD_H
- pthread_mutex_t mutex;
+ pthread_mutex_t mutex;
#elif defined(WIN32) || defined(_WIN32_WCE)
- HANDLE mutex;
+ HANDLE mutex;
#elif defined(VXWORKS)
- SEM_ID mutex;
+ SEM_ID mutex;
#elif defined(NO_MUTEX)
#else
#error "No mutex type defined for this platform!"
#endif
- int padding_size,
- pad_value;
+ int padding_size,
+ pad_value;
};
-static struct memory_tracker memtrack; //our global memory allocation list
-static int g_b_mem_tracker_inited = 0; //indicates whether the global list has
-//been initialized (1:yes/0:no)
-static struct
-{
- FILE *file;
- int type;
- void (*func)(void *userdata, const char *fmt, va_list args);
- void *userdata;
+static struct memory_tracker memtrack; // our global memory allocation list
+static int g_b_mem_tracker_inited = 0; // indicates whether the global list has
+// been initialized (1:yes/0:no)
+static struct {
+ FILE *file;
+ int type;
+ void (*func)(void *userdata, const char *fmt, va_list args);
+ void *userdata;
} g_logging = {NULL, 0, NULL, NULL};
#else
# include "vpx_global_handling.h"
@@ -157,60 +155,54 @@
Initializes global memory tracker structure
Allocates the head of the list
*/
-int vpx_memory_tracker_init(int padding_size, int pad_value)
-{
- if (!g_b_mem_tracker_inited)
- {
- if ((memtrack.head = (struct mem_block *)
- MEM_TRACK_MALLOC(sizeof(struct mem_block))))
- {
- int ret;
+int vpx_memory_tracker_init(int padding_size, int pad_value) {
+ if (!g_b_mem_tracker_inited) {
+ if ((memtrack.head = (struct mem_block *)
+ MEM_TRACK_MALLOC(sizeof(struct mem_block)))) {
+ int ret;
- MEM_TRACK_MEMSET(memtrack.head, 0, sizeof(struct mem_block));
+ MEM_TRACK_MEMSET(memtrack.head, 0, sizeof(struct mem_block));
- memtrack.tail = memtrack.head;
+ memtrack.tail = memtrack.head;
- memtrack.current_allocated = 0;
- memtrack.max_allocated = 0;
+ memtrack.current_allocated = 0;
+ memtrack.max_allocated = 0;
- memtrack.padding_size = padding_size;
- memtrack.pad_value = pad_value;
+ memtrack.padding_size = padding_size;
+ memtrack.pad_value = pad_value;
#if HAVE_PTHREAD_H
- ret = pthread_mutex_init(&memtrack.mutex,
- NULL); /*mutex attributes (NULL=default)*/
+ ret = pthread_mutex_init(&memtrack.mutex,
+ NULL); /*mutex attributes (NULL=default)*/
#elif defined(WIN32) || defined(_WIN32_WCE)
- memtrack.mutex = CreateMutex(NULL, /*security attributes*/
- FALSE, /*we don't want initial ownership*/
- NULL); /*mutex name*/
- ret = !memtrack.mutex;
+ memtrack.mutex = CreateMutex(NULL, /*security attributes*/
+ FALSE, /*we don't want initial ownership*/
+ NULL); /*mutex name*/
+ ret = !memtrack.mutex;
#elif defined(VXWORKS)
- memtrack.mutex = sem_bcreate(SEM_Q_FIFO, /*SEM_Q_FIFO non-priority based mutex*/
- SEM_FULL); /*SEM_FULL initial state is unlocked*/
- ret = !memtrack.mutex;
+ memtrack.mutex = sem_bcreate(SEM_Q_FIFO, /*SEM_Q_FIFO non-priority based mutex*/
+ SEM_FULL); /*SEM_FULL initial state is unlocked*/
+ ret = !memtrack.mutex;
#elif defined(NO_MUTEX)
- ret = 0;
+ ret = 0;
#endif
- if (ret)
- {
- memtrack_log("vpx_memory_tracker_init: Error creating mutex!\n");
+ if (ret) {
+ memtrack_log("vpx_memory_tracker_init: Error creating mutex!\n");
- MEM_TRACK_FREE(memtrack.head);
- memtrack.head = NULL;
- }
- else
- {
- memtrack_log("Memory Tracker init'd, v."vpx_mem_tracker_version" pad_size:%d pad_val:0x%x %d\n"
- , padding_size
- , pad_value
- , pad_value);
- g_b_mem_tracker_inited = 1;
- }
- }
+ MEM_TRACK_FREE(memtrack.head);
+ memtrack.head = NULL;
+ } else {
+ memtrack_log("Memory Tracker init'd, v."vpx_mem_tracker_version" pad_size:%d pad_val:0x%x %d\n"
+, padding_size
+, pad_value
+, pad_value);
+ g_b_mem_tracker_inited = 1;
+ }
}
+ }
- return g_b_mem_tracker_inited;
+ return g_b_mem_tracker_inited;
}
/*
@@ -218,39 +210,35 @@
If our global struct was initialized zeros out all its members,
frees memory and destroys it's mutex
*/
-void vpx_memory_tracker_destroy()
-{
- if (!memory_tracker_lock_mutex())
- {
- struct mem_block *p = memtrack.head,
- * p2 = memtrack.head;
+void vpx_memory_tracker_destroy() {
+ if (!memory_tracker_lock_mutex()) {
+ struct mem_block *p = memtrack.head,
+ * p2 = memtrack.head;
- memory_tracker_dump();
+ memory_tracker_dump();
- while (p)
- {
- p2 = p;
- p = p->next;
+ while (p) {
+ p2 = p;
+ p = p->next;
- MEM_TRACK_FREE(p2);
- }
-
- memtrack.head = NULL;
- memtrack.tail = NULL;
- memtrack.len = 0;
- memtrack.current_allocated = 0;
- memtrack.max_allocated = 0;
-
- if (!g_logging.type && g_logging.file && g_logging.file != stderr)
- {
- fclose(g_logging.file);
- g_logging.file = NULL;
- }
-
- memory_tracker_unlock_mutex();
-
- g_b_mem_tracker_inited = 0;
+ MEM_TRACK_FREE(p2);
}
+
+ memtrack.head = NULL;
+ memtrack.tail = NULL;
+ memtrack.len = 0;
+ memtrack.current_allocated = 0;
+ memtrack.max_allocated = 0;
+
+ if (!g_logging.type && g_logging.file && g_logging.file != stderr) {
+ fclose(g_logging.file);
+ g_logging.file = NULL;
+ }
+
+ memory_tracker_unlock_mutex();
+
+ g_b_mem_tracker_inited = 0;
+ }
}
/*
@@ -265,9 +253,8 @@
*/
void vpx_memory_tracker_add(size_t addr, unsigned int size,
char *file, unsigned int line,
- int padded)
-{
- memory_tracker_add(addr, size, file, line, padded);
+ int padded) {
+ memory_tracker_add(addr, size, file, line, padded);
}
/*
@@ -278,9 +265,8 @@
Return:
Same as described for memory_tracker_remove
*/
-int vpx_memory_tracker_remove(size_t addr)
-{
- return memory_tracker_remove(addr);
+int vpx_memory_tracker_remove(size_t addr) {
+ return memory_tracker_remove(addr);
}
/*
@@ -290,17 +276,15 @@
If found, pointer to the memory block that matches addr
NULL otherwise
*/
-struct mem_block *vpx_memory_tracker_find(size_t addr)
-{
- struct mem_block *p = NULL;
+struct mem_block *vpx_memory_tracker_find(size_t addr) {
+ struct mem_block *p = NULL;
- if (!memory_tracker_lock_mutex())
- {
- p = memory_tracker_find(addr);
- memory_tracker_unlock_mutex();
- }
+ if (!memory_tracker_lock_mutex()) {
+ p = memory_tracker_find(addr);
+ memory_tracker_unlock_mutex();
+ }
- return p;
+ return p;
}
/*
@@ -309,13 +293,11 @@
library function to dump the current contents of the
global memory allocation list
*/
-void vpx_memory_tracker_dump()
-{
- if (!memory_tracker_lock_mutex())
- {
- memory_tracker_dump();
- memory_tracker_unlock_mutex();
- }
+void vpx_memory_tracker_dump() {
+ if (!memory_tracker_lock_mutex()) {
+ memory_tracker_dump();
+ memory_tracker_unlock_mutex();
+ }
}
/*
@@ -326,13 +308,11 @@
integrity check function to inspect every address in the global
memory allocation list
*/
-void vpx_memory_tracker_check_integrity(char *file, unsigned int line)
-{
- if (!memory_tracker_lock_mutex())
- {
- memory_tracker_check_integrity(file, line);
- memory_tracker_unlock_mutex();
- }
+void vpx_memory_tracker_check_integrity(char *file, unsigned int line) {
+ if (!memory_tracker_lock_mutex()) {
+ memory_tracker_check_integrity(file, line);
+ memory_tracker_unlock_mutex();
+ }
}
/*
@@ -344,43 +324,38 @@
-1: if the logging type could not be set, because the value was invalid
or because a file could not be opened
*/
-int vpx_memory_tracker_set_log_type(int type, char *option)
-{
- int ret = -1;
+int vpx_memory_tracker_set_log_type(int type, char *option) {
+ int ret = -1;
- switch (type)
- {
+ switch (type) {
case 0:
- g_logging.type = 0;
+ g_logging.type = 0;
- if (!option)
- {
- g_logging.file = stderr;
- ret = 0;
- }
- else
- {
- if ((g_logging.file = fopen((char *)option, "w")))
- ret = 0;
- }
+ if (!option) {
+ g_logging.file = stderr;
+ ret = 0;
+ } else {
+ if ((g_logging.file = fopen((char *)option, "w")))
+ ret = 0;
+ }
- break;
+ break;
#if defined(WIN32) && !defined(_WIN32_WCE)
case 1:
- g_logging.type = type;
- ret = 0;
- break;
+ g_logging.type = type;
+ ret = 0;
+ break;
#endif
default:
- break;
- }
+ break;
+ }
- //output the version to the new logging destination
- if (!ret)
- memtrack_log("Memory Tracker logging initialized, "
- "Memory Tracker v."vpx_mem_tracker_version"\n");
+ // output the version to the new logging destination
+ if (!ret)
+ memtrack_log("Memory Tracker logging initialized, "
+ "Memory Tracker v."vpx_mem_tracker_version"\n");
- return ret;
+ return ret;
}
/*
@@ -392,24 +367,22 @@
*/
int vpx_memory_tracker_set_log_func(void *userdata,
void(*logfunc)(void *userdata,
- const char *fmt, va_list args))
-{
- int ret = -1;
+ const char *fmt, va_list args)) {
+ int ret = -1;
- if (logfunc)
- {
- g_logging.type = -1;
- g_logging.userdata = userdata;
- g_logging.func = logfunc;
- ret = 0;
- }
+ if (logfunc) {
+ g_logging.type = -1;
+ g_logging.userdata = userdata;
+ g_logging.func = logfunc;
+ ret = 0;
+ }
- //output the version to the new logging destination
- if (!ret)
- memtrack_log("Memory Tracker logging initialized, "
- "Memory Tracker v."vpx_mem_tracker_version"\n");
+ // output the version to the new logging destination
+ if (!ret)
+ memtrack_log("Memory Tracker logging initialized, "
+ "Memory Tracker v."vpx_mem_tracker_version"\n");
- return ret;
+ return ret;
}
/*
@@ -425,79 +398,73 @@
*
*/
-static void memtrack_log(const char *fmt, ...)
-{
- va_list list;
+static void memtrack_log(const char *fmt, ...) {
+ va_list list;
- va_start(list, fmt);
+ va_start(list, fmt);
- switch (g_logging.type)
- {
+ switch (g_logging.type) {
case -1:
- if (g_logging.func)
- g_logging.func(g_logging.userdata, fmt, list);
+ if (g_logging.func)
+ g_logging.func(g_logging.userdata, fmt, list);
- break;
+ break;
case 0:
- if (g_logging.file)
- {
- vfprintf(g_logging.file, fmt, list);
- fflush(g_logging.file);
- }
+ if (g_logging.file) {
+ vfprintf(g_logging.file, fmt, list);
+ fflush(g_logging.file);
+ }
- break;
+ break;
#if defined(WIN32) && !defined(_WIN32_WCE)
- case 1:
- {
- char temp[1024];
- _vsnprintf(temp, sizeof(temp) / sizeof(char) - 1, fmt, list);
- OutputDebugString(temp);
+ case 1: {
+ char temp[1024];
+ _vsnprintf(temp, sizeof(temp) / sizeof(char) - 1, fmt, list);
+ OutputDebugString(temp);
}
break;
#endif
default:
- break;
- }
+ break;
+ }
- va_end(list);
+ va_end(list);
}
/*
memory_tracker_dump()
Dumps the current contents of the global memory allocation list
*/
-static void memory_tracker_dump()
-{
- int i = 0;
- struct mem_block *p = (memtrack.head ? memtrack.head->next : NULL);
+static void memory_tracker_dump() {
+ int i = 0;
+ struct mem_block *p = (memtrack.head ? memtrack.head->next : NULL);
- memtrack_log("\n_currently Allocated= %d; Max allocated= %d\n",
- memtrack.current_allocated, memtrack.max_allocated);
+ memtrack_log("\n_currently Allocated= %d; Max allocated= %d\n",
+ memtrack.current_allocated, memtrack.max_allocated);
- while (p)
- {
+ while (p) {
#if defined(WIN32) && !defined(_WIN32_WCE)
- /*when using outputdebugstring, output filenames so they
- can be clicked to be opened in visual studio*/
- if (g_logging.type == 1)
- memtrack_log("memblocks[%d].addr= 0x%.8x, memblocks[%d].size= %d, file:\n"
- " %s(%d):\n", i,
- p->addr, i, p->size,
- p->file, p->line);
- else
+ /*when using outputdebugstring, output filenames so they
+ can be clicked to be opened in visual studio*/
+ if (g_logging.type == 1)
+ memtrack_log("memblocks[%d].addr= 0x%.8x, memblocks[%d].size= %d, file:\n"
+ " %s(%d):\n", i,
+ p->addr, i, p->size,
+ p->file, p->line);
+ else
#endif
- memtrack_log("memblocks[%d].addr= 0x%.8x, memblocks[%d].size= %d, file: %s, line: %d\n", i,
- p->addr, i, p->size,
- p->file, p->line);
+ memtrack_log("memblocks[%d].addr= 0x%.8x, memblocks[%d].size= %d, file: %s, line: %d\n", i,
+ p->addr, i, p->size,
+ p->file, p->line);
- p = p->next;
- ++i;
- }
+ p = p->next;
+ ++i;
+ }
- memtrack_log("\n");
+ memtrack_log("\n");
}
/*
@@ -508,55 +475,49 @@
this function will check ea. addr in the list verifying that
addr-padding_size and addr+padding_size is filled with pad_value
*/
-static void memory_tracker_check_integrity(char *file, unsigned int line)
-{
- if (memtrack.padding_size)
- {
- int i,
- index = 0;
- unsigned char *p_show_me,
- * p_show_me2;
- unsigned int tempme = memtrack.pad_value,
- dead1,
- dead2;
- unsigned char *x_bounds;
- struct mem_block *p = memtrack.head->next;
+static void memory_tracker_check_integrity(char *file, unsigned int line) {
+ if (memtrack.padding_size) {
+ int i,
+ index = 0;
+ unsigned char *p_show_me,
+ * p_show_me2;
+ unsigned int tempme = memtrack.pad_value,
+ dead1,
+ dead2;
+ unsigned char *x_bounds;
+ struct mem_block *p = memtrack.head->next;
- while (p)
- {
- //x_bounds = (unsigned char*)p->addr;
- //back up VPX_BYTE_ALIGNMENT
- //x_bounds -= memtrack.padding_size;
+ while (p) {
+ // x_bounds = (unsigned char*)p->addr;
+ // back up VPX_BYTE_ALIGNMENT
+ // x_bounds -= memtrack.padding_size;
- if (p->padded) // can the bounds be checked?
- {
- /*yes, move to the address that was actually allocated
- by the vpx_* calls*/
- x_bounds = (unsigned char *)(((size_t *)p->addr)[-1]);
+ if (p->padded) { // can the bounds be checked?
+ /*yes, move to the address that was actually allocated
+ by the vpx_* calls*/
+ x_bounds = (unsigned char *)(((size_t *)p->addr)[-1]);
- for (i = 0; i < memtrack.padding_size; i += sizeof(unsigned int))
- {
- p_show_me = (x_bounds + i);
- p_show_me2 = (unsigned char *)(p->addr + p->size + i);
+ for (i = 0; i < memtrack.padding_size; i += sizeof(unsigned int)) {
+ p_show_me = (x_bounds + i);
+ p_show_me2 = (unsigned char *)(p->addr + p->size + i);
- MEM_TRACK_MEMCPY(&dead1, p_show_me, sizeof(unsigned int));
- MEM_TRACK_MEMCPY(&dead2, p_show_me2, sizeof(unsigned int));
+ MEM_TRACK_MEMCPY(&dead1, p_show_me, sizeof(unsigned int));
+ MEM_TRACK_MEMCPY(&dead2, p_show_me2, sizeof(unsigned int));
- if ((dead1 != tempme) || (dead2 != tempme))
- {
- memtrack_log("\n[vpx_mem integrity check failed]:\n"
- " index[%d,%d] {%s:%d} addr=0x%x, size=%d,"
- " file: %s, line: %d c0:0x%x c1:0x%x\n",
- index, i, file, line, p->addr, p->size, p->file,
- p->line, dead1, dead2);
- }
- }
- }
-
- ++index;
- p = p->next;
+ if ((dead1 != tempme) || (dead2 != tempme)) {
+ memtrack_log("\n[vpx_mem integrity check failed]:\n"
+ " index[%d,%d] {%s:%d} addr=0x%x, size=%d,"
+ " file: %s, line: %d c0:0x%x c1:0x%x\n",
+ index, i, file, line, p->addr, p->size, p->file,
+ p->line, dead1, dead2);
+ }
}
+ }
+
+ ++index;
+ p = p->next;
}
+ }
}
/*
@@ -568,43 +529,38 @@
*/
void memory_tracker_add(size_t addr, unsigned int size,
char *file, unsigned int line,
- int padded)
-{
- if (!memory_tracker_lock_mutex())
- {
- struct mem_block *p;
+ int padded) {
+ if (!memory_tracker_lock_mutex()) {
+ struct mem_block *p;
- p = MEM_TRACK_MALLOC(sizeof(struct mem_block));
+ p = MEM_TRACK_MALLOC(sizeof(struct mem_block));
- if (p)
- {
- p->prev = memtrack.tail;
- p->prev->next = p;
- p->addr = addr;
- p->size = size;
- p->line = line;
- p->file = file;
- p->padded = padded;
- p->next = NULL;
+ if (p) {
+ p->prev = memtrack.tail;
+ p->prev->next = p;
+ p->addr = addr;
+ p->size = size;
+ p->line = line;
+ p->file = file;
+ p->padded = padded;
+ p->next = NULL;
- memtrack.tail = p;
+ memtrack.tail = p;
- memtrack.current_allocated += size;
+ memtrack.current_allocated += size;
- if (memtrack.current_allocated > memtrack.max_allocated)
- memtrack.max_allocated = memtrack.current_allocated;
+ if (memtrack.current_allocated > memtrack.max_allocated)
+ memtrack.max_allocated = memtrack.current_allocated;
- //memtrack_log("memory_tracker_add: added addr=0x%.8x\n", addr);
+ // memtrack_log("memory_tracker_add: added addr=0x%.8x\n", addr);
- memory_tracker_unlock_mutex();
- }
- else
- {
- memtrack_log("memory_tracker_add: error allocating memory!\n");
- memory_tracker_unlock_mutex();
- vpx_memory_tracker_destroy();
- }
+ memory_tracker_unlock_mutex();
+ } else {
+ memtrack_log("memory_tracker_add: error allocating memory!\n");
+ memory_tracker_unlock_mutex();
+ vpx_memory_tracker_destroy();
}
+ }
}
/*
@@ -617,41 +573,36 @@
-1: if the mutex could not be locked
-2: if the addr was not found in the list
*/
-int memory_tracker_remove(size_t addr)
-{
- int ret = -1;
+int memory_tracker_remove(size_t addr) {
+ int ret = -1;
- if (!memory_tracker_lock_mutex())
- {
- struct mem_block *p;
+ if (!memory_tracker_lock_mutex()) {
+ struct mem_block *p;
- if ((p = memory_tracker_find(addr)))
- {
- memtrack.current_allocated -= p->size;
+ if ((p = memory_tracker_find(addr))) {
+ memtrack.current_allocated -= p->size;
- p->prev->next = p->next;
+ p->prev->next = p->next;
- if (p->next)
- p->next->prev = p->prev;
- else
- memtrack.tail = p->prev;
+ if (p->next)
+ p->next->prev = p->prev;
+ else
+ memtrack.tail = p->prev;
- ret = 0;
- MEM_TRACK_FREE(p);
- }
- else
- {
- if (addr)
- memtrack_log("memory_tracker_remove(): addr not found in list,"
- " 0x%.8x\n", addr);
+ ret = 0;
+ MEM_TRACK_FREE(p);
+ } else {
+ if (addr)
+ memtrack_log("memory_tracker_remove(): addr not found in list,"
+ " 0x%.8x\n", addr);
- ret = -2;
- }
-
- memory_tracker_unlock_mutex();
+ ret = -2;
}
- return ret;
+ memory_tracker_unlock_mutex();
+ }
+
+ return ret;
}
/*
@@ -662,19 +613,17 @@
the need for repeated locking and unlocking as in Remove
Returns: pointer to the mem block if found, NULL otherwise
*/
-static struct mem_block *memory_tracker_find(size_t addr)
-{
- struct mem_block *p = NULL;
+static struct mem_block *memory_tracker_find(size_t addr) {
+ struct mem_block *p = NULL;
- if (memtrack.head)
- {
- p = memtrack.head->next;
+ if (memtrack.head) {
+ p = memtrack.head->next;
- while (p && (p->addr != addr))
- p = p->next;
- }
+ while (p && (p->addr != addr))
+ p = p->next;
+ }
- return p;
+ return p;
}
@@ -687,28 +636,25 @@
<0: Failure, either the mutex was not initialized
or the call to lock the mutex failed
*/
-static int memory_tracker_lock_mutex()
-{
- int ret = -1;
+static int memory_tracker_lock_mutex() {
+ int ret = -1;
- if (g_b_mem_tracker_inited)
- {
+ if (g_b_mem_tracker_inited) {
#if HAVE_PTHREAD_H
- ret = pthread_mutex_lock(&memtrack.mutex);
+ ret = pthread_mutex_lock(&memtrack.mutex);
#elif defined(WIN32) || defined(_WIN32_WCE)
- ret = WaitForSingleObject(memtrack.mutex, INFINITE);
+ ret = WaitForSingleObject(memtrack.mutex, INFINITE);
#elif defined(VXWORKS)
- ret = sem_take(memtrack.mutex, WAIT_FOREVER);
+ ret = sem_take(memtrack.mutex, WAIT_FOREVER);
#endif
- if (ret)
- {
- memtrack_log("memory_tracker_lock_mutex: mutex lock failed\n");
- }
+ if (ret) {
+ memtrack_log("memory_tracker_lock_mutex: mutex lock failed\n");
}
+ }
- return ret;
+ return ret;
}
/*
@@ -719,28 +665,25 @@
<0: Failure, either the mutex was not initialized
or the call to unlock the mutex failed
*/
-static int memory_tracker_unlock_mutex()
-{
- int ret = -1;
+static int memory_tracker_unlock_mutex() {
+ int ret = -1;
- if (g_b_mem_tracker_inited)
- {
+ if (g_b_mem_tracker_inited) {
#if HAVE_PTHREAD_H
- ret = pthread_mutex_unlock(&memtrack.mutex);
+ ret = pthread_mutex_unlock(&memtrack.mutex);
#elif defined(WIN32) || defined(_WIN32_WCE)
- ret = !ReleaseMutex(memtrack.mutex);
+ ret = !ReleaseMutex(memtrack.mutex);
#elif defined(VXWORKS)
- ret = sem_give(memtrack.mutex);
+ ret = sem_give(memtrack.mutex);
#endif
- if (ret)
- {
- memtrack_log("memory_tracker_unlock_mutex: mutex unlock failed\n");
- }
+ if (ret) {
+ memtrack_log("memory_tracker_unlock_mutex: mutex unlock failed\n");
}
+ }
- return ret;
+ return ret;
}
#endif
@@ -754,45 +697,44 @@
-1: if the use global function pointers is not set.
*/
int vpx_memory_tracker_set_functions(mem_track_malloc_func g_malloc_l
- , mem_track_calloc_func g_calloc_l
- , mem_track_realloc_func g_realloc_l
- , mem_track_free_func g_free_l
- , mem_track_memcpy_func g_memcpy_l
- , mem_track_memset_func g_memset_l
- , mem_track_memmove_func g_memmove_l)
-{
+, mem_track_calloc_func g_calloc_l
+, mem_track_realloc_func g_realloc_l
+, mem_track_free_func g_free_l
+, mem_track_memcpy_func g_memcpy_l
+, mem_track_memset_func g_memset_l
+, mem_track_memmove_func g_memmove_l) {
#if USE_GLOBAL_FUNCTION_POINTERS
- if (g_malloc_l)
- g_malloc = g_malloc_l;
+ if (g_malloc_l)
+ g_malloc = g_malloc_l;
- if (g_calloc_l)
- g_calloc = g_calloc_l;
+ if (g_calloc_l)
+ g_calloc = g_calloc_l;
- if (g_realloc_l)
- g_realloc = g_realloc_l;
+ if (g_realloc_l)
+ g_realloc = g_realloc_l;
- if (g_free_l)
- g_free = g_free_l;
+ if (g_free_l)
+ g_free = g_free_l;
- if (g_memcpy_l)
- g_memcpy = g_memcpy_l;
+ if (g_memcpy_l)
+ g_memcpy = g_memcpy_l;
- if (g_memset_l)
- g_memset = g_memset_l;
+ if (g_memset_l)
+ g_memset = g_memset_l;
- if (g_memmove_l)
- g_memmove = g_memmove_l;
+ if (g_memmove_l)
+ g_memmove = g_memmove_l;
- return 0;
+ return 0;
#else
- (void)g_malloc_l;
- (void)g_calloc_l;
- (void)g_realloc_l;
- (void)g_free_l;
- (void)g_memcpy_l;
- (void)g_memset_l;
- (void)g_memmove_l;
- return -1;
+ (void)g_malloc_l;
+ (void)g_calloc_l;
+ (void)g_realloc_l;
+ (void)g_free_l;
+ (void)g_memcpy_l;
+ (void)g_memset_l;
+ (void)g_memmove_l;
+ return -1;
#endif
}
diff --git a/vpx_ports/arm_cpudetect.c b/vpx_ports/arm_cpudetect.c
index 8ff95a1..3c916f2 100644
--- a/vpx_ports/arm_cpudetect.c
+++ b/vpx_ports/arm_cpudetect.c
@@ -12,208 +12,183 @@
#include <string.h>
#include "arm.h"
-static int arm_cpu_env_flags(int *flags)
-{
- char *env;
- env = getenv("VPX_SIMD_CAPS");
- if (env && *env)
- {
- *flags = (int)strtol(env, NULL, 0);
- return 0;
- }
- *flags = 0;
- return -1;
+static int arm_cpu_env_flags(int *flags) {
+ char *env;
+ env = getenv("VPX_SIMD_CAPS");
+ if (env && *env) {
+ *flags = (int)strtol(env, NULL, 0);
+ return 0;
+ }
+ *flags = 0;
+ return -1;
}
-static int arm_cpu_env_mask(void)
-{
- char *env;
- env = getenv("VPX_SIMD_CAPS_MASK");
- return env && *env ? (int)strtol(env, NULL, 0) : ~0;
+static int arm_cpu_env_mask(void) {
+ char *env;
+ env = getenv("VPX_SIMD_CAPS_MASK");
+ return env && *env ? (int)strtol(env, NULL, 0) : ~0;
}
#if !CONFIG_RUNTIME_CPU_DETECT
-int arm_cpu_caps(void)
-{
+int arm_cpu_caps(void) {
/* This function should actually be a no-op. There is no way to adjust any of
* these because the RTCD tables do not exist: the functions are called
* statically */
- int flags;
- int mask;
- if (!arm_cpu_env_flags(&flags))
- {
- return flags;
- }
- mask = arm_cpu_env_mask();
+ int flags;
+ int mask;
+ if (!arm_cpu_env_flags(&flags)) {
+ return flags;
+ }
+ mask = arm_cpu_env_mask();
#if HAVE_EDSP
- flags |= HAS_EDSP;
+ flags |= HAS_EDSP;
#endif /* HAVE_EDSP */
#if HAVE_MEDIA
- flags |= HAS_MEDIA;
+ flags |= HAS_MEDIA;
#endif /* HAVE_MEDIA */
#if HAVE_NEON
- flags |= HAS_NEON;
+ flags |= HAS_NEON;
#endif /* HAVE_NEON */
- return flags & mask;
+ return flags & mask;
}
#elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */
+
+#elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */
/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
#define WIN32_LEAN_AND_MEAN
#define WIN32_EXTRA_LEAN
#include <windows.h>
-int arm_cpu_caps(void)
-{
- int flags;
- int mask;
- if (!arm_cpu_env_flags(&flags))
- {
- return flags;
- }
- mask = arm_cpu_env_mask();
- /* MSVC has no inline __asm support for ARM, but it does let you __emit
- * instructions via their assembled hex code.
- * All of these instructions should be essentially nops.
- */
+int arm_cpu_caps(void) {
+ int flags;
+ int mask;
+ if (!arm_cpu_env_flags(&flags)) {
+ return flags;
+ }
+ mask = arm_cpu_env_mask();
+ /* MSVC has no inline __asm support for ARM, but it does let you __emit
+ * instructions via their assembled hex code.
+ * All of these instructions should be essentially nops.
+ */
#if HAVE_EDSP
- if (mask & HAS_EDSP)
- {
- __try
- {
- /*PLD [r13]*/
- __emit(0xF5DDF000);
- flags |= HAS_EDSP;
- }
- __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION)
- {
- /*Ignore exception.*/
- }
+ if (mask & HAS_EDSP) {
+ __try {
+ /*PLD [r13]*/
+ __emit(0xF5DDF000);
+ flags |= HAS_EDSP;
+ } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) {
+ /*Ignore exception.*/
}
+ }
#if HAVE_MEDIA
- if (mask & HAS_MEDIA)
- __try
- {
- /*SHADD8 r3,r3,r3*/
- __emit(0xE6333F93);
- flags |= HAS_MEDIA;
- }
- __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION)
- {
- /*Ignore exception.*/
- }
- }
+ if (mask & HAS_MEDIA)
+ __try {
+ /*SHADD8 r3,r3,r3*/
+ __emit(0xE6333F93);
+ flags |= HAS_MEDIA;
+ } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) {
+ /*Ignore exception.*/
+ }
+}
#if HAVE_NEON
- if (mask & HAS_NEON)
- {
- __try
- {
- /*VORR q0,q0,q0*/
- __emit(0xF2200150);
- flags |= HAS_NEON;
- }
- __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION)
- {
- /*Ignore exception.*/
- }
- }
+if (mask &HAS_NEON) {
+ __try {
+ /*VORR q0,q0,q0*/
+ __emit(0xF2200150);
+ flags |= HAS_NEON;
+ } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) {
+ /*Ignore exception.*/
+ }
+}
#endif /* HAVE_NEON */
#endif /* HAVE_MEDIA */
#endif /* HAVE_EDSP */
- return flags & mask;
+return flags & mask;
}
#elif defined(__ANDROID__) /* end _MSC_VER */
#include <cpu-features.h>
-int arm_cpu_caps(void)
-{
- int flags;
- int mask;
- uint64_t features;
- if (!arm_cpu_env_flags(&flags))
- {
- return flags;
- }
- mask = arm_cpu_env_mask();
- features = android_getCpuFeatures();
+int arm_cpu_caps(void) {
+ int flags;
+ int mask;
+ uint64_t features;
+ if (!arm_cpu_env_flags(&flags)) {
+ return flags;
+ }
+ mask = arm_cpu_env_mask();
+ features = android_getCpuFeatures();
#if HAVE_EDSP
- flags |= HAS_EDSP;
+ flags |= HAS_EDSP;
#endif /* HAVE_EDSP */
#if HAVE_MEDIA
- flags |= HAS_MEDIA;
+ flags |= HAS_MEDIA;
#endif /* HAVE_MEDIA */
#if HAVE_NEON
- if (features & ANDROID_CPU_ARM_FEATURE_NEON)
- flags |= HAS_NEON;
+ if (features & ANDROID_CPU_ARM_FEATURE_NEON)
+ flags |= HAS_NEON;
#endif /* HAVE_NEON */
- return flags & mask;
+ return flags & mask;
}
#elif defined(__linux__) /* end __ANDROID__ */
+
#include <stdio.h>
-int arm_cpu_caps(void)
-{
- FILE *fin;
- int flags;
- int mask;
- if (!arm_cpu_env_flags(&flags))
- {
- return flags;
- }
- mask = arm_cpu_env_mask();
- /* Reading /proc/self/auxv would be easier, but that doesn't work reliably
- * on Android.
- * This also means that detection will fail in Scratchbox.
+int arm_cpu_caps(void) {
+ FILE *fin;
+ int flags;
+ int mask;
+ if (!arm_cpu_env_flags(&flags)) {
+ return flags;
+ }
+ mask = arm_cpu_env_mask();
+ /* Reading /proc/self/auxv would be easier, but that doesn't work reliably
+ * on Android.
+ * This also means that detection will fail in Scratchbox.
+ */
+ fin = fopen("/proc/cpuinfo", "r");
+ if (fin != NULL) {
+ /* 512 should be enough for anybody (it's even enough for all the flags
+ * that x86 has accumulated... so far).
*/
- fin = fopen("/proc/cpuinfo","r");
- if(fin != NULL)
- {
- /* 512 should be enough for anybody (it's even enough for all the flags
- * that x86 has accumulated... so far).
- */
- char buf[512];
- while (fgets(buf, 511, fin) != NULL)
- {
+ char buf[512];
+ while (fgets(buf, 511, fin) != NULL) {
#if HAVE_EDSP || HAVE_NEON
- if (memcmp(buf, "Features", 8) == 0)
- {
- char *p;
+ if (memcmp(buf, "Features", 8) == 0) {
+ char *p;
#if HAVE_EDSP
- p=strstr(buf, " edsp");
- if (p != NULL && (p[5] == ' ' || p[5] == '\n'))
- {
- flags |= HAS_EDSP;
- }
+ p = strstr(buf, " edsp");
+ if (p != NULL && (p[5] == ' ' || p[5] == '\n')) {
+ flags |= HAS_EDSP;
+ }
#if HAVE_NEON
- p = strstr(buf, " neon");
- if (p != NULL && (p[5] == ' ' || p[5] == '\n'))
- {
- flags |= HAS_NEON;
- }
+ p = strstr(buf, " neon");
+ if (p != NULL && (p[5] == ' ' || p[5] == '\n')) {
+ flags |= HAS_NEON;
+ }
#endif /* HAVE_NEON */
#endif /* HAVE_EDSP */
- }
+ }
#endif /* HAVE_EDSP || HAVE_NEON */
#if HAVE_MEDIA
- if (memcmp(buf, "CPU architecture:",17) == 0){
- int version;
- version = atoi(buf+17);
- if (version >= 6)
- {
- flags |= HAS_MEDIA;
- }
- }
-#endif /* HAVE_MEDIA */
+ if (memcmp(buf, "CPU architecture:", 17) == 0) {
+ int version;
+ version = atoi(buf + 17);
+ if (version >= 6) {
+ flags |= HAS_MEDIA;
}
- fclose(fin);
+ }
+#endif /* HAVE_MEDIA */
}
- return flags & mask;
+ fclose(fin);
+ }
+ return flags & mask;
}
#else /* end __linux__ */
#error "--enable-runtime-cpu-detect selected, but no CPU detection method " \
- "available for your platform. Reconfigure with --disable-runtime-cpu-detect."
+"available for your platform. Reconfigure with --disable-runtime-cpu-detect."
#endif
diff --git a/vpx_ports/asm_offsets.h b/vpx_ports/asm_offsets.h
index 7b6ae4a..d3a3e5a 100644
--- a/vpx_ports/asm_offsets.h
+++ b/vpx_ports/asm_offsets.h
@@ -15,8 +15,8 @@
#include <stddef.h>
#define ct_assert(name,cond) \
- static void assert_##name(void) UNUSED;\
- static void assert_##name(void) {switch(0){case 0:case !!(cond):;}}
+ static void assert_##name(void) UNUSED;\
+ static void assert_##name(void) {switch(0){case 0:case !!(cond):;}}
#if INLINE_ASM
#define DEFINE(sym, val) asm("\n" #sym " EQU %0" : : "i" (val))
diff --git a/vpx_ports/config.h b/vpx_ports/config.h
new file mode 100644
index 0000000..1abe70d
--- /dev/null
+++ b/vpx_ports/config.h
@@ -0,0 +1,10 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "vpx_config.h"
diff --git a/vpx_ports/emmintrin_compat.h b/vpx_ports/emmintrin_compat.h
new file mode 100644
index 0000000..782d603
--- /dev/null
+++ b/vpx_ports/emmintrin_compat.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_PORTS_EMMINTRIN_COMPAT_H
+#define VPX_PORTS_EMMINTRIN_COMPAT_H
+
+#if defined(__GNUC__) && __GNUC__ < 4
+/* From emmintrin.h (gcc 4.5.3) */
+/* Casts between various SP, DP, INT vector types. Note that these do no
+ conversion of values, they just change the type. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castpd_ps(__m128d __A)
+{
+ return (__m128) __A;
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castpd_si128(__m128d __A)
+{
+ return (__m128i) __A;
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castps_pd(__m128 __A)
+{
+ return (__m128d) __A;
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castps_si128(__m128 __A)
+{
+ return (__m128i) __A;
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castsi128_ps(__m128i __A)
+{
+ return (__m128) __A;
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castsi128_pd(__m128i __A)
+{
+ return (__m128d) __A;
+}
+#endif
+
+#endif
diff --git a/vpx_ports/mem.h b/vpx_ports/mem.h
index 29e507f..b130da8 100644
--- a/vpx_ports/mem.h
+++ b/vpx_ports/mem.h
@@ -31,8 +31,8 @@
* within the array.
*/
#define DECLARE_ALIGNED_ARRAY(a,typ,val,n)\
-typ val##_[(n)+(a)/sizeof(typ)+1];\
-typ *val = (typ*)((((intptr_t)val##_)+(a)-1)&((intptr_t)-(a)))
+ typ val##_[(n)+(a)/sizeof(typ)+1];\
+ typ *val = (typ*)((((intptr_t)val##_)+(a)-1)&((intptr_t)-(a)))
/* Indicates that the usage of the specified variable has been audited to assure
diff --git a/vpx_ports/mem_ops.h b/vpx_ports/mem_ops.h
index dec28d5..2d44a3a 100644
--- a/vpx_ports/mem_ops.h
+++ b/vpx_ports/mem_ops.h
@@ -60,88 +60,82 @@
#undef mem_get_be16
#define mem_get_be16 mem_ops_wrap_symbol(mem_get_be16)
-static unsigned MEM_VALUE_T mem_get_be16(const void *vmem)
-{
- unsigned MEM_VALUE_T val;
- const MAU_T *mem = (const MAU_T *)vmem;
+static unsigned MEM_VALUE_T mem_get_be16(const void *vmem) {
+ unsigned MEM_VALUE_T val;
+ const MAU_T *mem = (const MAU_T *)vmem;
- val = mem[0] << 8;
- val |= mem[1];
- return val;
+ val = mem[0] << 8;
+ val |= mem[1];
+ return val;
}
#undef mem_get_be24
#define mem_get_be24 mem_ops_wrap_symbol(mem_get_be24)
-static unsigned MEM_VALUE_T mem_get_be24(const void *vmem)
-{
- unsigned MEM_VALUE_T val;
- const MAU_T *mem = (const MAU_T *)vmem;
+static unsigned MEM_VALUE_T mem_get_be24(const void *vmem) {
+ unsigned MEM_VALUE_T val;
+ const MAU_T *mem = (const MAU_T *)vmem;
- val = mem[0] << 16;
- val |= mem[1] << 8;
- val |= mem[2];
- return val;
+ val = mem[0] << 16;
+ val |= mem[1] << 8;
+ val |= mem[2];
+ return val;
}
#undef mem_get_be32
#define mem_get_be32 mem_ops_wrap_symbol(mem_get_be32)
-static unsigned MEM_VALUE_T mem_get_be32(const void *vmem)
-{
- unsigned MEM_VALUE_T val;
- const MAU_T *mem = (const MAU_T *)vmem;
+static unsigned MEM_VALUE_T mem_get_be32(const void *vmem) {
+ unsigned MEM_VALUE_T val;
+ const MAU_T *mem = (const MAU_T *)vmem;
- val = mem[0] << 24;
- val |= mem[1] << 16;
- val |= mem[2] << 8;
- val |= mem[3];
- return val;
+ val = mem[0] << 24;
+ val |= mem[1] << 16;
+ val |= mem[2] << 8;
+ val |= mem[3];
+ return val;
}
#undef mem_get_le16
#define mem_get_le16 mem_ops_wrap_symbol(mem_get_le16)
-static unsigned MEM_VALUE_T mem_get_le16(const void *vmem)
-{
- unsigned MEM_VALUE_T val;
- const MAU_T *mem = (const MAU_T *)vmem;
+static unsigned MEM_VALUE_T mem_get_le16(const void *vmem) {
+ unsigned MEM_VALUE_T val;
+ const MAU_T *mem = (const MAU_T *)vmem;
- val = mem[1] << 8;
- val |= mem[0];
- return val;
+ val = mem[1] << 8;
+ val |= mem[0];
+ return val;
}
#undef mem_get_le24
#define mem_get_le24 mem_ops_wrap_symbol(mem_get_le24)
-static unsigned MEM_VALUE_T mem_get_le24(const void *vmem)
-{
- unsigned MEM_VALUE_T val;
- const MAU_T *mem = (const MAU_T *)vmem;
+static unsigned MEM_VALUE_T mem_get_le24(const void *vmem) {
+ unsigned MEM_VALUE_T val;
+ const MAU_T *mem = (const MAU_T *)vmem;
- val = mem[2] << 16;
- val |= mem[1] << 8;
- val |= mem[0];
- return val;
+ val = mem[2] << 16;
+ val |= mem[1] << 8;
+ val |= mem[0];
+ return val;
}
#undef mem_get_le32
#define mem_get_le32 mem_ops_wrap_symbol(mem_get_le32)
-static unsigned MEM_VALUE_T mem_get_le32(const void *vmem)
-{
- unsigned MEM_VALUE_T val;
- const MAU_T *mem = (const MAU_T *)vmem;
+static unsigned MEM_VALUE_T mem_get_le32(const void *vmem) {
+ unsigned MEM_VALUE_T val;
+ const MAU_T *mem = (const MAU_T *)vmem;
- val = mem[3] << 24;
- val |= mem[2] << 16;
- val |= mem[1] << 8;
- val |= mem[0];
- return val;
+ val = mem[3] << 24;
+ val |= mem[2] << 16;
+ val |= mem[1] << 8;
+ val |= mem[0];
+ return val;
}
#define mem_get_s_generic(end,sz) \
- static signed MEM_VALUE_T mem_get_s##end##sz(const void *vmem) {\
- const MAU_T *mem = (const MAU_T*)vmem;\
- signed MEM_VALUE_T val = mem_get_##end##sz(mem);\
- return (val << (MEM_VALUE_T_SZ_BITS - sz)) >> (MEM_VALUE_T_SZ_BITS - sz);\
- }
+ static signed MEM_VALUE_T mem_get_s##end##sz(const void *vmem) {\
+ const MAU_T *mem = (const MAU_T*)vmem;\
+ signed MEM_VALUE_T val = mem_get_##end##sz(mem);\
+ return (val << (MEM_VALUE_T_SZ_BITS - sz)) >> (MEM_VALUE_T_SZ_BITS - sz);\
+ }
#undef mem_get_sbe16
#define mem_get_sbe16 mem_ops_wrap_symbol(mem_get_sbe16)
@@ -169,66 +163,60 @@
#undef mem_put_be16
#define mem_put_be16 mem_ops_wrap_symbol(mem_put_be16)
-static void mem_put_be16(void *vmem, MEM_VALUE_T val)
-{
- MAU_T *mem = (MAU_T *)vmem;
+static void mem_put_be16(void *vmem, MEM_VALUE_T val) {
+ MAU_T *mem = (MAU_T *)vmem;
- mem[0] = (val >> 8) & 0xff;
- mem[1] = (val >> 0) & 0xff;
+ mem[0] = (val >> 8) & 0xff;
+ mem[1] = (val >> 0) & 0xff;
}
#undef mem_put_be24
#define mem_put_be24 mem_ops_wrap_symbol(mem_put_be24)
-static void mem_put_be24(void *vmem, MEM_VALUE_T val)
-{
- MAU_T *mem = (MAU_T *)vmem;
+static void mem_put_be24(void *vmem, MEM_VALUE_T val) {
+ MAU_T *mem = (MAU_T *)vmem;
- mem[0] = (val >> 16) & 0xff;
- mem[1] = (val >> 8) & 0xff;
- mem[2] = (val >> 0) & 0xff;
+ mem[0] = (val >> 16) & 0xff;
+ mem[1] = (val >> 8) & 0xff;
+ mem[2] = (val >> 0) & 0xff;
}
#undef mem_put_be32
#define mem_put_be32 mem_ops_wrap_symbol(mem_put_be32)
-static void mem_put_be32(void *vmem, MEM_VALUE_T val)
-{
- MAU_T *mem = (MAU_T *)vmem;
+static void mem_put_be32(void *vmem, MEM_VALUE_T val) {
+ MAU_T *mem = (MAU_T *)vmem;
- mem[0] = (val >> 24) & 0xff;
- mem[1] = (val >> 16) & 0xff;
- mem[2] = (val >> 8) & 0xff;
- mem[3] = (val >> 0) & 0xff;
+ mem[0] = (val >> 24) & 0xff;
+ mem[1] = (val >> 16) & 0xff;
+ mem[2] = (val >> 8) & 0xff;
+ mem[3] = (val >> 0) & 0xff;
}
#undef mem_put_le16
#define mem_put_le16 mem_ops_wrap_symbol(mem_put_le16)
-static void mem_put_le16(void *vmem, MEM_VALUE_T val)
-{
- MAU_T *mem = (MAU_T *)vmem;
+static void mem_put_le16(void *vmem, MEM_VALUE_T val) {
+ MAU_T *mem = (MAU_T *)vmem;
- mem[0] = (val >> 0) & 0xff;
- mem[1] = (val >> 8) & 0xff;
+ mem[0] = (val >> 0) & 0xff;
+ mem[1] = (val >> 8) & 0xff;
}
#undef mem_put_le24
#define mem_put_le24 mem_ops_wrap_symbol(mem_put_le24)
-static void mem_put_le24(void *vmem, MEM_VALUE_T val)
-{
- MAU_T *mem = (MAU_T *)vmem;
+static void mem_put_le24(void *vmem, MEM_VALUE_T val) {
+ MAU_T *mem = (MAU_T *)vmem;
- mem[0] = (val >> 0) & 0xff;
- mem[1] = (val >> 8) & 0xff;
- mem[2] = (val >> 16) & 0xff;
+ mem[0] = (val >> 0) & 0xff;
+ mem[1] = (val >> 8) & 0xff;
+ mem[2] = (val >> 16) & 0xff;
}
#undef mem_put_le32
#define mem_put_le32 mem_ops_wrap_symbol(mem_put_le32)
-static void mem_put_le32(void *vmem, MEM_VALUE_T val)
-{
- MAU_T *mem = (MAU_T *)vmem;
+static void mem_put_le32(void *vmem, MEM_VALUE_T val) {
+ MAU_T *mem = (MAU_T *)vmem;
- mem[0] = (val >> 0) & 0xff;
- mem[1] = (val >> 8) & 0xff;
- mem[2] = (val >> 16) & 0xff;
- mem[3] = (val >> 24) & 0xff;
+ mem[0] = (val >> 0) & 0xff;
+ mem[1] = (val >> 8) & 0xff;
+ mem[2] = (val >> 16) & 0xff;
+ mem[3] = (val >> 24) & 0xff;
}
diff --git a/vpx_ports/mem_ops_aligned.h b/vpx_ports/mem_ops_aligned.h
index fca653a..0100300 100644
--- a/vpx_ports/mem_ops_aligned.h
+++ b/vpx_ports/mem_ops_aligned.h
@@ -24,61 +24,61 @@
* could redefine these macros.
*/
#define swap_endian_16(val,raw) do {\
- val = ((raw>>8) & 0x00ff) \
- | ((raw<<8) & 0xff00);\
- } while(0)
+ val = ((raw>>8) & 0x00ff) \
+ | ((raw<<8) & 0xff00);\
+ } while(0)
#define swap_endian_32(val,raw) do {\
- val = ((raw>>24) & 0x000000ff) \
- | ((raw>>8) & 0x0000ff00) \
- | ((raw<<8) & 0x00ff0000) \
- | ((raw<<24) & 0xff000000); \
- } while(0)
+ val = ((raw>>24) & 0x000000ff) \
+ | ((raw>>8) & 0x0000ff00) \
+ | ((raw<<8) & 0x00ff0000) \
+ | ((raw<<24) & 0xff000000); \
+ } while(0)
#define swap_endian_16_se(val,raw) do {\
- swap_endian_16(val,raw);\
- val = ((val << 16) >> 16);\
- } while(0)
+ swap_endian_16(val,raw);\
+ val = ((val << 16) >> 16);\
+ } while(0)
#define swap_endian_32_se(val,raw) swap_endian_32(val,raw)
#define mem_get_ne_aligned_generic(end,sz) \
- static unsigned MEM_VALUE_T mem_get_##end##sz##_aligned(const void *vmem) {\
- const uint##sz##_t *mem = (const uint##sz##_t *)vmem;\
- return *mem;\
- }
+ static unsigned MEM_VALUE_T mem_get_##end##sz##_aligned(const void *vmem) {\
+ const uint##sz##_t *mem = (const uint##sz##_t *)vmem;\
+ return *mem;\
+ }
#define mem_get_sne_aligned_generic(end,sz) \
- static signed MEM_VALUE_T mem_get_s##end##sz##_aligned(const void *vmem) {\
- const int##sz##_t *mem = (const int##sz##_t *)vmem;\
- return *mem;\
- }
+ static signed MEM_VALUE_T mem_get_s##end##sz##_aligned(const void *vmem) {\
+ const int##sz##_t *mem = (const int##sz##_t *)vmem;\
+ return *mem;\
+ }
#define mem_get_se_aligned_generic(end,sz) \
- static unsigned MEM_VALUE_T mem_get_##end##sz##_aligned(const void *vmem) {\
- const uint##sz##_t *mem = (const uint##sz##_t *)vmem;\
- unsigned MEM_VALUE_T val, raw = *mem;\
- swap_endian_##sz(val,raw);\
- return val;\
- }
+ static unsigned MEM_VALUE_T mem_get_##end##sz##_aligned(const void *vmem) {\
+ const uint##sz##_t *mem = (const uint##sz##_t *)vmem;\
+ unsigned MEM_VALUE_T val, raw = *mem;\
+ swap_endian_##sz(val,raw);\
+ return val;\
+ }
#define mem_get_sse_aligned_generic(end,sz) \
- static signed MEM_VALUE_T mem_get_s##end##sz##_aligned(const void *vmem) {\
- const int##sz##_t *mem = (const int##sz##_t *)vmem;\
- unsigned MEM_VALUE_T val, raw = *mem;\
- swap_endian_##sz##_se(val,raw);\
- return val;\
- }
+ static signed MEM_VALUE_T mem_get_s##end##sz##_aligned(const void *vmem) {\
+ const int##sz##_t *mem = (const int##sz##_t *)vmem;\
+ unsigned MEM_VALUE_T val, raw = *mem;\
+ swap_endian_##sz##_se(val,raw);\
+ return val;\
+ }
#define mem_put_ne_aligned_generic(end,sz) \
- static void mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\
- uint##sz##_t *mem = (uint##sz##_t *)vmem;\
- *mem = (uint##sz##_t)val;\
- }
+ static void mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\
+ uint##sz##_t *mem = (uint##sz##_t *)vmem;\
+ *mem = (uint##sz##_t)val;\
+ }
#define mem_put_se_aligned_generic(end,sz) \
- static void mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\
- uint##sz##_t *mem = (uint##sz##_t *)vmem, raw;\
- swap_endian_##sz(raw,val);\
- *mem = (uint##sz##_t)raw;\
- }
+ static void mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\
+ uint##sz##_t *mem = (uint##sz##_t *)vmem, raw;\
+ swap_endian_##sz(raw,val);\
+ *mem = (uint##sz##_t)raw;\
+ }
#include "vpx_config.h"
#if CONFIG_BIG_ENDIAN
diff --git a/vpx_ports/vpx_once.h b/vpx_ports/vpx_once.h
new file mode 100644
index 0000000..16a735c
--- /dev/null
+++ b/vpx_ports/vpx_once.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "vpx_config.h"
+
+#if CONFIG_MULTITHREAD && defined(_WIN32)
+#include <windows.h>
+#include <stdlib.h>
+static void once(void (*func)(void))
+{
+ static CRITICAL_SECTION *lock;
+ static LONG waiters;
+ static int done;
+ void *lock_ptr = &lock;
+
+ /* If the initialization is complete, return early. This isn't just an
+ * optimization, it prevents races on the destruction of the global
+ * lock.
+ */
+ if(done)
+ return;
+
+ InterlockedIncrement(&waiters);
+
+ /* Get a lock. We create one and try to make it the one-true-lock,
+ * throwing it away if we lost the race.
+ */
+
+ {
+ /* Scope to protect access to new_lock */
+ CRITICAL_SECTION *new_lock = malloc(sizeof(CRITICAL_SECTION));
+ InitializeCriticalSection(new_lock);
+ if (InterlockedCompareExchangePointer(lock_ptr, new_lock, NULL) != NULL)
+ {
+ DeleteCriticalSection(new_lock);
+ free(new_lock);
+ }
+ }
+
+ /* At this point, we have a lock that can be synchronized on. We don't
+ * care which thread actually performed the allocation.
+ */
+
+ EnterCriticalSection(lock);
+
+ if (!done)
+ {
+ func();
+ done = 1;
+ }
+
+ LeaveCriticalSection(lock);
+
+ /* Last one out should free resources. The destructed objects are
+ * protected by checking if(done) above.
+ */
+ if(!InterlockedDecrement(&waiters))
+ {
+ DeleteCriticalSection(lock);
+ free(lock);
+ lock = NULL;
+ }
+}
+
+
+#elif CONFIG_MULTITHREAD && HAVE_PTHREAD_H
+#include <pthread.h>
+static void once(void (*func)(void))
+{
+ static pthread_once_t lock = PTHREAD_ONCE_INIT;
+ pthread_once(&lock, func);
+}
+
+
+#else
+/* No-op version that performs no synchronization. vp8_rtcd() is idempotent,
+ * so as long as your platform provides atomic loads/stores of pointers
+ * no synchronization is strictly necessary.
+ */
+
+static void once(void (*func)(void))
+{
+ static int done;
+
+ if(!done)
+ {
+ func();
+ done = 1;
+ }
+}
+#endif
diff --git a/vpx_ports/vpx_timer.h b/vpx_ports/vpx_timer.h
index d07e086..cdad9ef 100644
--- a/vpx_ports/vpx_timer.h
+++ b/vpx_ports/vpx_timer.h
@@ -32,65 +32,61 @@
/* timersub is not provided by msys at this time. */
#ifndef timersub
#define timersub(a, b, result) \
- do { \
- (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
- (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
- if ((result)->tv_usec < 0) { \
- --(result)->tv_sec; \
- (result)->tv_usec += 1000000; \
- } \
- } while (0)
+ do { \
+ (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
+ (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
+ if ((result)->tv_usec < 0) { \
+ --(result)->tv_sec; \
+ (result)->tv_usec += 1000000; \
+ } \
+ } while (0)
#endif
#endif
-struct vpx_usec_timer
-{
+struct vpx_usec_timer {
#if defined(_WIN32)
- LARGE_INTEGER begin, end;
+ LARGE_INTEGER begin, end;
#else
- struct timeval begin, end;
+ struct timeval begin, end;
#endif
};
static void
-vpx_usec_timer_start(struct vpx_usec_timer *t)
-{
+vpx_usec_timer_start(struct vpx_usec_timer *t) {
#if defined(_WIN32)
- QueryPerformanceCounter(&t->begin);
+ QueryPerformanceCounter(&t->begin);
#else
- gettimeofday(&t->begin, NULL);
+ gettimeofday(&t->begin, NULL);
#endif
}
static void
-vpx_usec_timer_mark(struct vpx_usec_timer *t)
-{
+vpx_usec_timer_mark(struct vpx_usec_timer *t) {
#if defined(_WIN32)
- QueryPerformanceCounter(&t->end);
+ QueryPerformanceCounter(&t->end);
#else
- gettimeofday(&t->end, NULL);
+ gettimeofday(&t->end, NULL);
#endif
}
static int64_t
-vpx_usec_timer_elapsed(struct vpx_usec_timer *t)
-{
+vpx_usec_timer_elapsed(struct vpx_usec_timer *t) {
#if defined(_WIN32)
- LARGE_INTEGER freq, diff;
+ LARGE_INTEGER freq, diff;
- diff.QuadPart = t->end.QuadPart - t->begin.QuadPart;
+ diff.QuadPart = t->end.QuadPart - t->begin.QuadPart;
- QueryPerformanceFrequency(&freq);
- return diff.QuadPart * 1000000 / freq.QuadPart;
+ QueryPerformanceFrequency(&freq);
+ return diff.QuadPart * 1000000 / freq.QuadPart;
#else
- struct timeval diff;
+ struct timeval diff;
- timersub(&t->end, &t->begin, &diff);
- return diff.tv_sec * 1000000 + diff.tv_usec;
+ timersub(&t->end, &t->begin, &diff);
+ return diff.tv_sec * 1000000 + diff.tv_usec;
#endif
}
@@ -101,9 +97,8 @@
#define timersub(a, b, result)
#endif
-struct vpx_usec_timer
-{
- void *dummy;
+struct vpx_usec_timer {
+ void *dummy;
};
static void
@@ -113,7 +108,9 @@
vpx_usec_timer_mark(struct vpx_usec_timer *t) { }
static long
-vpx_usec_timer_elapsed(struct vpx_usec_timer *t) { return 0; }
+vpx_usec_timer_elapsed(struct vpx_usec_timer *t) {
+ return 0;
+}
#endif /* CONFIG_OS_SUPPORT */
diff --git a/vpx_ports/vpxtypes.h b/vpx_ports/vpxtypes.h
index f2fb089..4365213 100644
--- a/vpx_ports/vpxtypes.h
+++ b/vpx_ports/vpxtypes.h
@@ -14,7 +14,7 @@
#include "vpx_config.h"
-//#include <sys/types.h>
+// #include <sys/types.h>
#ifdef _MSC_VER
# include <basetsd.h>
typedef SSIZE_T ssize_t;
@@ -107,25 +107,24 @@
/*!\ingroup basetypes
\brief Common return type*/
-typedef enum
-{
- VPX_NOT_FOUND = -404,
- VPX_BUFFER_EMPTY = -202,
- VPX_BUFFER_FULL = -201,
+typedef enum {
+ VPX_NOT_FOUND = -404,
+ VPX_BUFFER_EMPTY = -202,
+ VPX_BUFFER_FULL = -201,
- VPX_CONNREFUSED = -102,
- VPX_TIMEDOUT = -101,
- VPX_WOULDBLOCK = -100,
+ VPX_CONNREFUSED = -102,
+ VPX_TIMEDOUT = -101,
+ VPX_WOULDBLOCK = -100,
- VPX_NET_ERROR = -9,
- VPX_INVALID_VERSION = -8,
- VPX_INPROGRESS = -7,
- VPX_NOT_SUPP = -6,
- VPX_NO_MEM = -3,
- VPX_INVALID_PARAMS = -2,
- VPX_ERROR = -1,
- VPX_OK = 0,
- VPX_DONE = 1
+ VPX_NET_ERROR = -9,
+ VPX_INVALID_VERSION = -8,
+ VPX_INPROGRESS = -7,
+ VPX_NOT_SUPP = -6,
+ VPX_NO_MEM = -3,
+ VPX_INVALID_PARAMS = -2,
+ VPX_ERROR = -1,
+ VPX_OK = 0,
+ VPX_DONE = 1
} vpxsc;
#if defined(WIN32) || defined(_WIN32_WCE)
@@ -135,7 +134,7 @@
#elif defined(LINUX)
# define DLLIMPORT
/*visibility attribute support is available in 3.4 and later.
- see: http://gcc.gnu.org/wiki/Visibility for more info*/
+ see: http:// gcc.gnu.org/wiki/Visibility for more info*/
# if defined(__GNUC__) && ((__GNUC__<<16|(__GNUC_MINOR__&0xff)) >= (3<<16|4))
# define GCC_HASCLASSVISIBILITY
# endif /*defined(__GNUC__) && __GNUC_PREREQ(3,4)*/
diff --git a/vpx_ports/x86.h b/vpx_ports/x86.h
index 9dd8c4b..f1cf626 100644
--- a/vpx_ports/x86.h
+++ b/vpx_ports/x86.h
@@ -14,78 +14,77 @@
#include <stdlib.h>
#include "vpx_config.h"
-typedef enum
-{
- VPX_CPU_UNKNOWN = -1,
- VPX_CPU_AMD,
- VPX_CPU_AMD_OLD,
- VPX_CPU_CENTAUR,
- VPX_CPU_CYRIX,
- VPX_CPU_INTEL,
- VPX_CPU_NEXGEN,
- VPX_CPU_NSC,
- VPX_CPU_RISE,
- VPX_CPU_SIS,
- VPX_CPU_TRANSMETA,
- VPX_CPU_TRANSMETA_OLD,
- VPX_CPU_UMC,
- VPX_CPU_VIA,
+typedef enum {
+ VPX_CPU_UNKNOWN = -1,
+ VPX_CPU_AMD,
+ VPX_CPU_AMD_OLD,
+ VPX_CPU_CENTAUR,
+ VPX_CPU_CYRIX,
+ VPX_CPU_INTEL,
+ VPX_CPU_NEXGEN,
+ VPX_CPU_NSC,
+ VPX_CPU_RISE,
+ VPX_CPU_SIS,
+ VPX_CPU_TRANSMETA,
+ VPX_CPU_TRANSMETA_OLD,
+ VPX_CPU_UMC,
+ VPX_CPU_VIA,
- VPX_CPU_LAST
+ VPX_CPU_LAST
} vpx_cpu_t;
#if defined(__GNUC__) && __GNUC__
#if ARCH_X86_64
#define cpuid(func,ax,bx,cx,dx)\
- __asm__ __volatile__ (\
- "cpuid \n\t" \
- : "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) \
- : "a" (func));
+ __asm__ __volatile__ (\
+ "cpuid \n\t" \
+ : "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) \
+ : "a" (func));
#else
#define cpuid(func,ax,bx,cx,dx)\
- __asm__ __volatile__ (\
- "mov %%ebx, %%edi \n\t" \
- "cpuid \n\t" \
- "xchg %%edi, %%ebx \n\t" \
- : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
- : "a" (func));
+ __asm__ __volatile__ (\
+ "mov %%ebx, %%edi \n\t" \
+ "cpuid \n\t" \
+ "xchg %%edi, %%ebx \n\t" \
+ : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
+ : "a" (func));
#endif
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
#if ARCH_X86_64
#define cpuid(func,ax,bx,cx,dx)\
- asm volatile (\
- "xchg %rsi, %rbx \n\t" \
- "cpuid \n\t" \
- "movl %ebx, %edi \n\t" \
- "xchg %rsi, %rbx \n\t" \
- : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
- : "a" (func));
+ asm volatile (\
+ "xchg %rsi, %rbx \n\t" \
+ "cpuid \n\t" \
+ "movl %ebx, %edi \n\t" \
+ "xchg %rsi, %rbx \n\t" \
+ : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
+ : "a" (func));
#else
#define cpuid(func,ax,bx,cx,dx)\
- asm volatile (\
- "pushl %ebx \n\t" \
- "cpuid \n\t" \
- "movl %ebx, %edi \n\t" \
- "popl %ebx \n\t" \
- : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
- : "a" (func));
+ asm volatile (\
+ "pushl %ebx \n\t" \
+ "cpuid \n\t" \
+ "movl %ebx, %edi \n\t" \
+ "popl %ebx \n\t" \
+ : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
+ : "a" (func));
#endif
#else
#if ARCH_X86_64
void __cpuid(int CPUInfo[4], int info_type);
#pragma intrinsic(__cpuid)
#define cpuid(func,a,b,c,d) do{\
- int regs[4];\
- __cpuid(regs,func); a=regs[0]; b=regs[1]; c=regs[2]; d=regs[3];\
- } while(0)
+ int regs[4];\
+ __cpuid(regs,func); a=regs[0]; b=regs[1]; c=regs[2]; d=regs[3];\
+ } while(0)
#else
#define cpuid(func,a,b,c,d)\
- __asm mov eax, func\
- __asm cpuid\
- __asm mov a, eax\
- __asm mov b, ebx\
- __asm mov c, ecx\
- __asm mov d, edx
+ __asm mov eax, func\
+ __asm cpuid\
+ __asm mov a, eax\
+ __asm mov b, ebx\
+ __asm mov c, ecx\
+ __asm mov d, edx
#endif
#endif
@@ -100,47 +99,46 @@
#endif
static int
-x86_simd_caps(void)
-{
- unsigned int flags = 0;
- unsigned int mask = ~0;
- unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx;
- char *env;
- (void)reg_ebx;
+x86_simd_caps(void) {
+ unsigned int flags = 0;
+ unsigned int mask = ~0;
+ unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx;
+ char *env;
+ (void)reg_ebx;
- /* See if the CPU capabilities are being overridden by the environment */
- env = getenv("VPX_SIMD_CAPS");
+ /* See if the CPU capabilities are being overridden by the environment */
+ env = getenv("VPX_SIMD_CAPS");
- if (env && *env)
- return (int)strtol(env, NULL, 0);
+ if (env && *env)
+ return (int)strtol(env, NULL, 0);
- env = getenv("VPX_SIMD_CAPS_MASK");
+ env = getenv("VPX_SIMD_CAPS_MASK");
- if (env && *env)
- mask = strtol(env, NULL, 0);
+ if (env && *env)
+ mask = strtol(env, NULL, 0);
- /* Ensure that the CPUID instruction supports extended features */
- cpuid(0, reg_eax, reg_ebx, reg_ecx, reg_edx);
+ /* Ensure that the CPUID instruction supports extended features */
+ cpuid(0, reg_eax, reg_ebx, reg_ecx, reg_edx);
- if (reg_eax < 1)
- return 0;
+ if (reg_eax < 1)
+ return 0;
- /* Get the standard feature flags */
- cpuid(1, reg_eax, reg_ebx, reg_ecx, reg_edx);
+ /* Get the standard feature flags */
+ cpuid(1, reg_eax, reg_ebx, reg_ecx, reg_edx);
- if (reg_edx & BIT(23)) flags |= HAS_MMX;
+ if (reg_edx & BIT(23)) flags |= HAS_MMX;
- if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */
+ if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */
- if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */
+ if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */
- if (reg_ecx & BIT(0)) flags |= HAS_SSE3;
+ if (reg_ecx & BIT(0)) flags |= HAS_SSE3;
- if (reg_ecx & BIT(9)) flags |= HAS_SSSE3;
+ if (reg_ecx & BIT(9)) flags |= HAS_SSSE3;
- if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1;
+ if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1;
- return flags & mask;
+ return flags & mask;
}
vpx_cpu_t vpx_x86_vendor(void);
@@ -150,21 +148,20 @@
#pragma intrinsic(__rdtsc)
#endif
static unsigned int
-x86_readtsc(void)
-{
+x86_readtsc(void) {
#if defined(__GNUC__) && __GNUC__
- unsigned int tsc;
- __asm__ __volatile__("rdtsc\n\t":"=a"(tsc):);
- return tsc;
+ unsigned int tsc;
+ __asm__ __volatile__("rdtsc\n\t":"=a"(tsc):);
+ return tsc;
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
- unsigned int tsc;
- asm volatile("rdtsc\n\t":"=a"(tsc):);
- return tsc;
+ unsigned int tsc;
+ asm volatile("rdtsc\n\t":"=a"(tsc):);
+ return tsc;
#else
#if ARCH_X86_64
- return (unsigned int)__rdtsc();
+ return (unsigned int)__rdtsc();
#else
- __asm rdtsc;
+ __asm rdtsc;
#endif
#endif
}
@@ -172,31 +169,29 @@
#if defined(__GNUC__) && __GNUC__
#define x86_pause_hint()\
- __asm__ __volatile__ ("pause \n\t")
+ __asm__ __volatile__ ("pause \n\t")
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
#define x86_pause_hint()\
- asm volatile ("pause \n\t")
+ asm volatile ("pause \n\t")
#else
#if ARCH_X86_64
#define x86_pause_hint()\
- _mm_pause();
+ _mm_pause();
#else
#define x86_pause_hint()\
- __asm pause
+ __asm pause
#endif
#endif
#if defined(__GNUC__) && __GNUC__
static void
-x87_set_control_word(unsigned short mode)
-{
- __asm__ __volatile__("fldcw %0" : : "m"(*&mode));
+x87_set_control_word(unsigned short mode) {
+ __asm__ __volatile__("fldcw %0" : : "m"( *&mode));
}
static unsigned short
-x87_get_control_word(void)
-{
- unsigned short mode;
- __asm__ __volatile__("fstcw %0\n\t":"=m"(*&mode):);
+x87_get_control_word(void) {
+ unsigned short mode;
+ __asm__ __volatile__("fstcw %0\n\t":"=m"( *&mode):);
return mode;
}
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
@@ -210,7 +205,18 @@
{
unsigned short mode;
asm volatile("fstcw %0\n\t":"=m"(*&mode):);
- return mode;
+ return mode;
+}
+#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
+static void
+x87_set_control_word(unsigned short mode) {
+ asm volatile("fldcw %0" : : "m"( *&mode));
+}
+static unsigned short
+x87_get_control_word(void) {
+ unsigned short mode;
+ asm volatile("fstcw %0\n\t":"=m"( *&mode):);
+ return mode;
}
#elif ARCH_X86_64
/* No fldcw intrinsics on Windows x64, punt to external asm */
@@ -220,25 +226,22 @@
#define x87_get_control_word vpx_winx64_fstcw
#else
static void
-x87_set_control_word(unsigned short mode)
-{
- __asm { fldcw mode }
+x87_set_control_word(unsigned short mode) {
+ __asm { fldcw mode }
}
static unsigned short
-x87_get_control_word(void)
-{
- unsigned short mode;
- __asm { fstcw mode }
- return mode;
+x87_get_control_word(void) {
+ unsigned short mode;
+ __asm { fstcw mode }
+ return mode;
}
#endif
static unsigned short
-x87_set_double_precision(void)
-{
- unsigned short mode = x87_get_control_word();
- x87_set_control_word((mode&~0x300) | 0x200);
- return mode;
+x87_set_double_precision(void) {
+ unsigned short mode = x87_get_control_word();
+ x87_set_control_word((mode&~0x300) | 0x200);
+ return mode;
}
diff --git a/vpx_ports/x86_cpuid.c b/vpx_ports/x86_cpuid.c
index ce64033..fe86cfc 100644
--- a/vpx_ports/x86_cpuid.c
+++ b/vpx_ports/x86_cpuid.c
@@ -11,43 +11,39 @@
#include <string.h>
#include "x86.h"
-struct cpuid_vendors
-{
- char vendor_string[12];
- vpx_cpu_t vendor_id;
+struct cpuid_vendors {
+ char vendor_string[12];
+ vpx_cpu_t vendor_id;
};
-static struct cpuid_vendors cpuid_vendor_list[VPX_CPU_LAST] =
-{
- { "AuthenticAMD", VPX_CPU_AMD },
- { "AMDisbetter!", VPX_CPU_AMD_OLD },
- { "CentaurHauls", VPX_CPU_CENTAUR },
- { "CyrixInstead", VPX_CPU_CYRIX },
- { "GenuineIntel", VPX_CPU_INTEL },
- { "NexGenDriven", VPX_CPU_NEXGEN },
- { "Geode by NSC", VPX_CPU_NSC },
- { "RiseRiseRise", VPX_CPU_RISE },
- { "SiS SiS SiS ", VPX_CPU_SIS },
- { "GenuineTMx86", VPX_CPU_TRANSMETA },
- { "TransmetaCPU", VPX_CPU_TRANSMETA_OLD },
- { "UMC UMC UMC ", VPX_CPU_UMC },
- { "VIA VIA VIA ", VPX_CPU_VIA },
+static struct cpuid_vendors cpuid_vendor_list[VPX_CPU_LAST] = {
+ { "AuthenticAMD", VPX_CPU_AMD },
+ { "AMDisbetter!", VPX_CPU_AMD_OLD },
+ { "CentaurHauls", VPX_CPU_CENTAUR },
+ { "CyrixInstead", VPX_CPU_CYRIX },
+ { "GenuineIntel", VPX_CPU_INTEL },
+ { "NexGenDriven", VPX_CPU_NEXGEN },
+ { "Geode by NSC", VPX_CPU_NSC },
+ { "RiseRiseRise", VPX_CPU_RISE },
+ { "SiS SiS SiS ", VPX_CPU_SIS },
+ { "GenuineTMx86", VPX_CPU_TRANSMETA },
+ { "TransmetaCPU", VPX_CPU_TRANSMETA_OLD },
+ { "UMC UMC UMC ", VPX_CPU_UMC },
+ { "VIA VIA VIA ", VPX_CPU_VIA },
};
-vpx_cpu_t vpx_x86_vendor(void)
-{
- unsigned int reg_eax;
- unsigned int vs[3];
- int i;
+vpx_cpu_t vpx_x86_vendor(void) {
+ unsigned int reg_eax;
+ unsigned int vs[3];
+ int i;
- /* Get the Vendor String from the CPU */
- cpuid(0, reg_eax, vs[0], vs[2], vs[1]);
+ /* Get the Vendor String from the CPU */
+ cpuid(0, reg_eax, vs[0], vs[2], vs[1]);
- for (i = 0; i < VPX_CPU_LAST; i++)
- {
- if (strncmp ((const char *)vs, cpuid_vendor_list[i].vendor_string, 12) == 0)
- return (cpuid_vendor_list[i].vendor_id);
- }
+ for (i = 0; i < VPX_CPU_LAST; i++) {
+ if (strncmp((const char *)vs, cpuid_vendor_list[i].vendor_string, 12) == 0)
+ return (cpuid_vendor_list[i].vendor_id);
+ }
- return VPX_CPU_UNKNOWN;
+ return VPX_CPU_UNKNOWN;
}
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_copy_y_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_copy_y_neon.asm
index 9189641..cc1789a 100644
--- a/vpx_scale/arm/neon/vp8_vpxyv12_copy_y_neon.asm
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_copy_y_neon.asm
@@ -15,7 +15,7 @@
REQUIRE8
PRESERVE8
- INCLUDE asm_com_offsets.asm
+ INCLUDE vpx_scale_asm_offsets.asm
AREA ||.text||, CODE, READONLY, ALIGN=2
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm
index e55d076..3f17883 100644
--- a/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm
@@ -14,7 +14,7 @@
REQUIRE8
PRESERVE8
- INCLUDE asm_com_offsets.asm
+ INCLUDE vpx_scale_asm_offsets.asm
AREA ||.text||, CODE, READONLY, ALIGN=2
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm
index ec64dbc..d452ad2 100644
--- a/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm
@@ -14,7 +14,7 @@
REQUIRE8
PRESERVE8
- INCLUDE asm_com_offsets.asm
+ INCLUDE vpx_scale_asm_offsets.asm
AREA ||.text||, CODE, READONLY, ALIGN=2
;Note: This function is used to copy source data in src_buffer[i] at beginning of
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm
index ebc4242..b2eb9eb 100644
--- a/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm
@@ -14,7 +14,7 @@
REQUIRE8
PRESERVE8
- INCLUDE asm_com_offsets.asm
+ INCLUDE vpx_scale_asm_offsets.asm
AREA ||.text||, CODE, READONLY, ALIGN=2
;void vp8_yv12_extend_frame_borders_neon (YV12_BUFFER_CONFIG *ybf);
diff --git a/vpx_scale/arm/neon/yv12extend_arm.c b/vpx_scale/arm/neon/yv12extend_arm.c
index eabd495..4535b8f 100644
--- a/vpx_scale/arm/neon/yv12extend_arm.c
+++ b/vpx_scale/arm/neon/yv12extend_arm.c
@@ -8,7 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "./vpx_rtcd.h"
+#include "./vpx_scale_rtcd.h"
extern void vp8_yv12_copy_frame_func_neon(struct yv12_buffer_config *src_ybc,
struct yv12_buffer_config *dst_ybc);
diff --git a/vpx_scale/generic/bicubic_scaler.c b/vpx_scale/generic/bicubic_scaler.c
deleted file mode 100644
index c116740..0000000
--- a/vpx_scale/generic/bicubic_scaler.c
+++ /dev/null
@@ -1,569 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include <float.h>
-#include <math.h>
-#include <stdio.h>
-#include "vpx_mem/vpx_mem.h"
-#include "vpxscale_arbitrary.h"
-
-#define FIXED_POINT
-
-#define MAX_IN_WIDTH 800
-#define MAX_IN_HEIGHT 600
-#define MAX_OUT_WIDTH 800
-#define MAX_OUT_HEIGHT 600
-#define MAX_OUT_DIMENSION ((MAX_OUT_WIDTH > MAX_OUT_HEIGHT) ? \
- MAX_OUT_WIDTH : MAX_OUT_HEIGHT)
-
-BICUBIC_SCALER_STRUCT g_b_scaler;
-static int g_first_time = 1;
-
-#pragma DATA_SECTION(g_hbuf, "VP6_HEAP")
-#pragma DATA_ALIGN (g_hbuf, 32);
-unsigned char g_hbuf[MAX_OUT_DIMENSION];
-
-#pragma DATA_SECTION(g_hbuf_uv, "VP6_HEAP")
-#pragma DATA_ALIGN (g_hbuf_uv, 32);
-unsigned char g_hbuf_uv[MAX_OUT_DIMENSION];
-
-
-#ifdef FIXED_POINT
-static int a_i = 0.6 * 65536;
-#else
-static float a = -0.6;
-#endif
-
-#ifdef FIXED_POINT
-// 3 2
-// C0 = a*t - a*t
-//
-static short c0_fixed(unsigned int t) {
- // put t in Q16 notation
- unsigned short v1, v2;
-
- // Q16
- v1 = (a_i * t) >> 16;
- v1 = (v1 * t) >> 16;
-
- // Q16
- v2 = (a_i * t) >> 16;
- v2 = (v2 * t) >> 16;
- v2 = (v2 * t) >> 16;
-
- // Q12
- return -((v1 - v2) >> 4);
-}
-
-// 2 3
-// C1 = a*t + (3-2*a)*t - (2-a)*t
-//
-static short c1_fixed(unsigned int t) {
- unsigned short v1, v2, v3;
- unsigned short two, three;
-
- // Q16
- v1 = (a_i * t) >> 16;
-
- // Q13
- two = 2 << 13;
- v2 = two - (a_i >> 3);
- v2 = (v2 * t) >> 16;
- v2 = (v2 * t) >> 16;
- v2 = (v2 * t) >> 16;
-
- // Q13
- three = 3 << 13;
- v3 = three - (2 * (a_i >> 3));
- v3 = (v3 * t) >> 16;
- v3 = (v3 * t) >> 16;
-
- // Q12
- return (((v1 >> 3) - v2 + v3) >> 1);
-
-}
-
-// 2 3
-// C2 = 1 - (3-a)*t + (2-a)*t
-//
-static short c2_fixed(unsigned int t) {
- unsigned short v1, v2, v3;
- unsigned short two, three;
-
- // Q13
- v1 = 1 << 13;
-
- // Q13
- three = 3 << 13;
- v2 = three - (a_i >> 3);
- v2 = (v2 * t) >> 16;
- v2 = (v2 * t) >> 16;
-
- // Q13
- two = 2 << 13;
- v3 = two - (a_i >> 3);
- v3 = (v3 * t) >> 16;
- v3 = (v3 * t) >> 16;
- v3 = (v3 * t) >> 16;
-
- // Q12
- return (v1 - v2 + v3) >> 1;
-}
-
-// 2 3
-// C3 = a*t - 2*a*t + a*t
-//
-static short c3_fixed(unsigned int t) {
- int v1, v2, v3;
-
- // Q16
- v1 = (a_i * t) >> 16;
-
- // Q15
- v2 = 2 * (a_i >> 1);
- v2 = (v2 * t) >> 16;
- v2 = (v2 * t) >> 16;
-
- // Q16
- v3 = (a_i * t) >> 16;
- v3 = (v3 * t) >> 16;
- v3 = (v3 * t) >> 16;
-
- // Q12
- return ((v2 - (v1 >> 1) - (v3 >> 1)) >> 3);
-}
-#else
-// 3 2
-// C0 = -a*t + a*t
-//
-float C0(float t) {
- return -a * t * t * t + a * t * t;
-}
-
-// 2 3
-// C1 = -a*t + (2*a+3)*t - (a+2)*t
-//
-float C1(float t) {
- return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t;
-}
-
-// 2 3
-// C2 = 1 - (a+3)*t + (a+2)*t
-//
-float C2(float t) {
- return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f;
-}
-
-// 2 3
-// C3 = a*t - 2*a*t + a*t
-//
-float C3(float t) {
- return a * t * t * t - 2.0f * a * t * t + a * t;
-}
-#endif
-
-#if 0
-int compare_real_fixed() {
- int i, errors = 0;
- float mult = 1.0 / 10000.0;
- unsigned int fixed_mult = mult * 4294967296;// 65536;
- unsigned int phase_offset_int;
- float phase_offset_real;
-
- for (i = 0; i < 10000; i++) {
- int fixed0, fixed1, fixed2, fixed3, fixed_total;
- int real0, real1, real2, real3, real_total;
-
- phase_offset_real = (float)i * mult;
- phase_offset_int = (fixed_mult * i) >> 16;
-// phase_offset_int = phase_offset_real * 65536;
-
- fixed0 = c0_fixed(phase_offset_int);
- real0 = C0(phase_offset_real) * 4096.0;
-
- if ((abs(fixed0) > (abs(real0) + 1)) || (abs(fixed0) < (abs(real0) - 1)))
- errors++;
-
- fixed1 = c1_fixed(phase_offset_int);
- real1 = C1(phase_offset_real) * 4096.0;
-
- if ((abs(fixed1) > (abs(real1) + 1)) || (abs(fixed1) < (abs(real1) - 1)))
- errors++;
-
- fixed2 = c2_fixed(phase_offset_int);
- real2 = C2(phase_offset_real) * 4096.0;
-
- if ((abs(fixed2) > (abs(real2) + 1)) || (abs(fixed2) < (abs(real2) - 1)))
- errors++;
-
- fixed3 = c3_fixed(phase_offset_int);
- real3 = C3(phase_offset_real) * 4096.0;
-
- if ((abs(fixed3) > (abs(real3) + 1)) || (abs(fixed3) < (abs(real3) - 1)))
- errors++;
-
- fixed_total = fixed0 + fixed1 + fixed2 + fixed3;
- real_total = real0 + real1 + real2 + real3;
-
- if ((fixed_total > 4097) || (fixed_total < 4094))
- errors++;
-
- if ((real_total > 4097) || (real_total < 4095))
- errors++;
- }
-
- return errors;
-}
-#endif
-
-// Find greatest common denominator between two integers. Method used here is
-// slow compared to Euclid's algorithm, but does not require any division.
-int gcd(int a, int b) {
- // Problem with this algorithm is that if a or b = 0 this function
- // will never exit. Don't want to return 0 because any computation
- // that was based on a common denoninator and tried to reduce by
- // dividing by 0 would fail. Best solution that could be thought of
- // would to be fail by returing a 1;
- if (a <= 0 || b <= 0)
- return 1;
-
- while (a != b) {
- if (b > a)
- b = b - a;
- else {
- int tmp = a;// swap large and
- a = b; // small
- b = tmp;
- }
- }
-
- return b;
-}
-
-void bicubic_coefficient_init() {
- vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
- g_first_time = 0;
-}
-
-void bicubic_coefficient_destroy() {
- if (!g_first_time) {
- vpx_free(g_b_scaler.l_w);
-
- vpx_free(g_b_scaler.l_h);
-
- vpx_free(g_b_scaler.l_h_uv);
-
- vpx_free(g_b_scaler.c_w);
-
- vpx_free(g_b_scaler.c_h);
-
- vpx_free(g_b_scaler.c_h_uv);
-
- vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
- }
-}
-
-// Create the coeffients that will be used for the cubic interpolation.
-// Because scaling does not have to be equal in the vertical and horizontal
-// regimes the phase offsets will be different. There are 4 coefficents
-// for each point, two on each side. The layout is that there are the
-// 4 coefficents for each phase in the array and then the next phase.
-int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height) {
- int i;
-#ifdef FIXED_POINT
- int phase_offset_int;
- unsigned int fixed_mult;
- int product_val = 0;
-#else
- float phase_offset;
-#endif
- int gcd_w, gcd_h, gcd_h_uv, d_w, d_h, d_h_uv;
-
- if (g_first_time)
- bicubic_coefficient_init();
-
-
- // check to see if the coefficents have already been set up correctly
- if ((in_width == g_b_scaler.in_width) && (in_height == g_b_scaler.in_height)
- && (out_width == g_b_scaler.out_width) && (out_height == g_b_scaler.out_height))
- return 0;
-
- g_b_scaler.in_width = in_width;
- g_b_scaler.in_height = in_height;
- g_b_scaler.out_width = out_width;
- g_b_scaler.out_height = out_height;
-
- // Don't want to allow crazy scaling, just try and prevent a catastrophic
- // failure here. Want to fail after setting the member functions so if
- // if the scaler is called the member functions will not scale.
- if (out_width <= 0 || out_height <= 0)
- return -1;
-
- // reduce in/out width and height ratios using the gcd
- gcd_w = gcd(out_width, in_width);
- gcd_h = gcd(out_height, in_height);
- gcd_h_uv = gcd(out_height, in_height / 2);
-
- // the numerator width and height are to be saved in
- // globals so they can be used during the scaling process
- // without having to be recalculated.
- g_b_scaler.nw = out_width / gcd_w;
- d_w = in_width / gcd_w;
-
- g_b_scaler.nh = out_height / gcd_h;
- d_h = in_height / gcd_h;
-
- g_b_scaler.nh_uv = out_height / gcd_h_uv;
- d_h_uv = (in_height / 2) / gcd_h_uv;
-
- // allocate memory for the coefficents
- vpx_free(g_b_scaler.l_w);
-
- vpx_free(g_b_scaler.l_h);
-
- vpx_free(g_b_scaler.l_h_uv);
-
- g_b_scaler.l_w = (short *)vpx_memalign(32, out_width * 2);
- g_b_scaler.l_h = (short *)vpx_memalign(32, out_height * 2);
- g_b_scaler.l_h_uv = (short *)vpx_memalign(32, out_height * 2);
-
- vpx_free(g_b_scaler.c_w);
-
- vpx_free(g_b_scaler.c_h);
-
- vpx_free(g_b_scaler.c_h_uv);
-
- g_b_scaler.c_w = (short *)vpx_memalign(32, g_b_scaler.nw * 4 * 2);
- g_b_scaler.c_h = (short *)vpx_memalign(32, g_b_scaler.nh * 4 * 2);
- g_b_scaler.c_h_uv = (short *)vpx_memalign(32, g_b_scaler.nh_uv * 4 * 2);
-
- g_b_scaler.hbuf = g_hbuf;
- g_b_scaler.hbuf_uv = g_hbuf_uv;
-
- // Set up polyphase filter taps. This needs to be done before
- // the scaling because of the floating point math required. The
- // coefficients are multiplied by 2^12 so that fixed point math
- // can be used in the main scaling loop.
-#ifdef FIXED_POINT
- fixed_mult = (1.0 / (float)g_b_scaler.nw) * 4294967296;
-
- product_val = 0;
-
- for (i = 0; i < g_b_scaler.nw; i++) {
- if (product_val > g_b_scaler.nw)
- product_val -= g_b_scaler.nw;
-
- phase_offset_int = (fixed_mult * product_val) >> 16;
-
- g_b_scaler.c_w[i * 4] = c3_fixed(phase_offset_int);
- g_b_scaler.c_w[i * 4 + 1] = c2_fixed(phase_offset_int);
- g_b_scaler.c_w[i * 4 + 2] = c1_fixed(phase_offset_int);
- g_b_scaler.c_w[i * 4 + 3] = c0_fixed(phase_offset_int);
-
- product_val += d_w;
- }
-
-
- fixed_mult = (1.0 / (float)g_b_scaler.nh) * 4294967296;
-
- product_val = 0;
-
- for (i = 0; i < g_b_scaler.nh; i++) {
- if (product_val > g_b_scaler.nh)
- product_val -= g_b_scaler.nh;
-
- phase_offset_int = (fixed_mult * product_val) >> 16;
-
- g_b_scaler.c_h[i * 4] = c0_fixed(phase_offset_int);
- g_b_scaler.c_h[i * 4 + 1] = c1_fixed(phase_offset_int);
- g_b_scaler.c_h[i * 4 + 2] = c2_fixed(phase_offset_int);
- g_b_scaler.c_h[i * 4 + 3] = c3_fixed(phase_offset_int);
-
- product_val += d_h;
- }
-
- fixed_mult = (1.0 / (float)g_b_scaler.nh_uv) * 4294967296;
-
- product_val = 0;
-
- for (i = 0; i < g_b_scaler.nh_uv; i++) {
- if (product_val > g_b_scaler.nh_uv)
- product_val -= g_b_scaler.nh_uv;
-
- phase_offset_int = (fixed_mult * product_val) >> 16;
-
- g_b_scaler.c_h_uv[i * 4] = c0_fixed(phase_offset_int);
- g_b_scaler.c_h_uv[i * 4 + 1] = c1_fixed(phase_offset_int);
- g_b_scaler.c_h_uv[i * 4 + 2] = c2_fixed(phase_offset_int);
- g_b_scaler.c_h_uv[i * 4 + 3] = c3_fixed(phase_offset_int);
-
- product_val += d_h_uv;
- }
-
-#else
-
- for (i = 0; i < g_nw; i++) {
- phase_offset = (float)((i * d_w) % g_nw) / (float)g_nw;
- g_c_w[i * 4] = (C3(phase_offset) * 4096.0);
- g_c_w[i * 4 + 1] = (C2(phase_offset) * 4096.0);
- g_c_w[i * 4 + 2] = (C1(phase_offset) * 4096.0);
- g_c_w[i * 4 + 3] = (C0(phase_offset) * 4096.0);
- }
-
- for (i = 0; i < g_nh; i++) {
- phase_offset = (float)((i * d_h) % g_nh) / (float)g_nh;
- g_c_h[i * 4] = (C0(phase_offset) * 4096.0);
- g_c_h[i * 4 + 1] = (C1(phase_offset) * 4096.0);
- g_c_h[i * 4 + 2] = (C2(phase_offset) * 4096.0);
- g_c_h[i * 4 + 3] = (C3(phase_offset) * 4096.0);
- }
-
- for (i = 0; i < g_nh_uv; i++) {
- phase_offset = (float)((i * d_h_uv) % g_nh_uv) / (float)g_nh_uv;
- g_c_h_uv[i * 4] = (C0(phase_offset) * 4096.0);
- g_c_h_uv[i * 4 + 1] = (C1(phase_offset) * 4096.0);
- g_c_h_uv[i * 4 + 2] = (C2(phase_offset) * 4096.0);
- g_c_h_uv[i * 4 + 3] = (C3(phase_offset) * 4096.0);
- }
-
-#endif
-
- // Create an array that corresponds input lines to output lines.
- // This doesn't require floating point math, but it does require
- // a division and because hardware division is not present that
- // is a call.
- for (i = 0; i < out_width; i++) {
- g_b_scaler.l_w[i] = (i * d_w) / g_b_scaler.nw;
-
- if ((g_b_scaler.l_w[i] + 2) <= in_width)
- g_b_scaler.max_usable_out_width = i;
-
- }
-
- for (i = 0; i < out_height + 1; i++) {
- g_b_scaler.l_h[i] = (i * d_h) / g_b_scaler.nh;
- g_b_scaler.l_h_uv[i] = (i * d_h_uv) / g_b_scaler.nh_uv;
- }
-
- return 0;
-}
-
-int bicubic_scale(int in_width, int in_height, int in_stride,
- int out_width, int out_height, int out_stride,
- unsigned char *input_image, unsigned char *output_image) {
- short *RESTRICT l_w, * RESTRICT l_h;
- short *RESTRICT c_w, * RESTRICT c_h;
- unsigned char *RESTRICT ip, * RESTRICT op;
- unsigned char *RESTRICT hbuf;
- int h, w, lw, lh;
- int temp_sum;
- int phase_offset_w, phase_offset_h;
-
- c_w = g_b_scaler.c_w;
- c_h = g_b_scaler.c_h;
-
- op = output_image;
-
- l_w = g_b_scaler.l_w;
- l_h = g_b_scaler.l_h;
-
- phase_offset_h = 0;
-
- for (h = 0; h < out_height; h++) {
- // select the row to work on
- lh = l_h[h];
- ip = input_image + (in_stride * lh);
-
- // vp8_filter the row vertically into an temporary buffer.
- // If the phase offset == 0 then all the multiplication
- // is going to result in the output equalling the input.
- // So instead point the temporary buffer to the input.
- // Also handle the boundry condition of not being able to
- // filter that last lines.
- if (phase_offset_h && (lh < in_height - 2)) {
- hbuf = g_b_scaler.hbuf;
-
- for (w = 0; w < in_width; w++) {
- temp_sum = c_h[phase_offset_h * 4 + 3] * ip[w - in_stride];
- temp_sum += c_h[phase_offset_h * 4 + 2] * ip[w];
- temp_sum += c_h[phase_offset_h * 4 + 1] * ip[w + in_stride];
- temp_sum += c_h[phase_offset_h * 4] * ip[w + 2 * in_stride];
-
- hbuf[w] = temp_sum >> 12;
- }
- } else
- hbuf = ip;
-
- // increase the phase offset for the next time around.
- if (++phase_offset_h >= g_b_scaler.nh)
- phase_offset_h = 0;
-
- // now filter and expand it horizontally into the final
- // output buffer
- phase_offset_w = 0;
-
- for (w = 0; w < out_width; w++) {
- // get the index to use to expand the image
- lw = l_w[w];
-
- temp_sum = c_w[phase_offset_w * 4] * hbuf[lw - 1];
- temp_sum += c_w[phase_offset_w * 4 + 1] * hbuf[lw];
- temp_sum += c_w[phase_offset_w * 4 + 2] * hbuf[lw + 1];
- temp_sum += c_w[phase_offset_w * 4 + 3] * hbuf[lw + 2];
- temp_sum = temp_sum >> 12;
-
- if (++phase_offset_w >= g_b_scaler.nw)
- phase_offset_w = 0;
-
- // boundry conditions
- if ((lw + 2) >= in_width)
- temp_sum = hbuf[lw];
-
- if (lw == 0)
- temp_sum = hbuf[0];
-
- op[w] = temp_sum;
- }
-
- op += out_stride;
- }
-
- return 0;
-}
-
-void bicubic_scale_frame_reset() {
- g_b_scaler.out_width = 0;
- g_b_scaler.out_height = 0;
-}
-
-void bicubic_scale_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst,
- int new_width, int new_height) {
-
- dst->y_width = new_width;
- dst->y_height = new_height;
- dst->uv_width = new_width / 2;
- dst->uv_height = new_height / 2;
-
- dst->y_stride = dst->y_width;
- dst->uv_stride = dst->uv_width;
-
- bicubic_scale(src->y_width, src->y_height, src->y_stride,
- new_width, new_height, dst->y_stride,
- src->y_buffer, dst->y_buffer);
-
- bicubic_scale(src->uv_width, src->uv_height, src->uv_stride,
- new_width / 2, new_height / 2, dst->uv_stride,
- src->u_buffer, dst->u_buffer);
-
- bicubic_scale(src->uv_width, src->uv_height, src->uv_stride,
- new_width / 2, new_height / 2, dst->uv_stride,
- src->v_buffer, dst->v_buffer);
-}
diff --git a/vpx_scale/generic/gen_scalers.c b/vpx_scale/generic/gen_scalers.c
index 60c21fb..febe97d 100644
--- a/vpx_scale/generic/gen_scalers.c
+++ b/vpx_scale/generic/gen_scalers.c
@@ -17,688 +17,6 @@
/****************************************************************************
*
- * ROUTINE : vp8_horizontal_line_4_5_scale_c
- *
- * INPUTS : const unsigned char *source : Pointer to source data.
- * unsigned int source_width : Stride of source.
- * unsigned char *dest : Pointer to destination data.
- * unsigned int dest_width : Stride of destination (NOT USED).
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Copies horizontal line of pixels from source to
- * destination scaling up by 4 to 5.
- *
- * SPECIAL NOTES : None.
- *
- ****************************************************************************/
-void vp8_horizontal_line_4_5_scale_c(const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width) {
- unsigned i;
- unsigned int a, b, c;
- unsigned char *des = dest;
- const unsigned char *src = source;
-
- (void) dest_width;
-
- for (i = 0; i < source_width - 4; i += 4) {
- a = src[0];
- b = src[1];
- des [0] = (unsigned char) a;
- des [1] = (unsigned char)((a * 51 + 205 * b + 128) >> 8);
- c = src[2] * 154;
- a = src[3];
- des [2] = (unsigned char)((b * 102 + c + 128) >> 8);
- des [3] = (unsigned char)((c + 102 * a + 128) >> 8);
- b = src[4];
- des [4] = (unsigned char)((a * 205 + 51 * b + 128) >> 8);
-
- src += 4;
- des += 5;
- }
-
- a = src[0];
- b = src[1];
- des [0] = (unsigned char)(a);
- des [1] = (unsigned char)((a * 51 + 205 * b + 128) >> 8);
- c = src[2] * 154;
- a = src[3];
- des [2] = (unsigned char)((b * 102 + c + 128) >> 8);
- des [3] = (unsigned char)((c + 102 * a + 128) >> 8);
- des [4] = (unsigned char)(a);
-
-}
-
-/****************************************************************************
- *
- * ROUTINE : vp8_vertical_band_4_5_scale_c
- *
- * INPUTS : unsigned char *dest : Pointer to destination data.
- * unsigned int dest_pitch : Stride of destination data.
- * unsigned int dest_width : Width of destination data.
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Scales vertical band of pixels by scale 4 to 5. The
- * height of the band scaled is 4-pixels.
- *
- * SPECIAL NOTES : The routine uses the first line of the band below
- * the current band.
- *
- ****************************************************************************/
-void vp8_vertical_band_4_5_scale_c(unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width) {
- unsigned int i;
- unsigned int a, b, c, d;
- unsigned char *des = dest;
-
- for (i = 0; i < dest_width; i++) {
- a = des [0];
- b = des [dest_pitch];
-
- des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8);
-
- c = des[dest_pitch * 2] * 154;
- d = des[dest_pitch * 3];
-
- des [dest_pitch * 2] = (unsigned char)((b * 102 + c + 128) >> 8);
- des [dest_pitch * 3] = (unsigned char)((c + 102 * d + 128) >> 8);
-
- /* First line in next band */
- a = des [dest_pitch * 5];
- des [dest_pitch * 4] = (unsigned char)((d * 205 + 51 * a + 128) >> 8);
-
- des++;
- }
-}
-
-/****************************************************************************
- *
- * ROUTINE : vp8_last_vertical_band_4_5_scale_c
- *
- * INPUTS : unsigned char *dest : Pointer to destination data.
- * unsigned int dest_pitch : Stride of destination data.
- * unsigned int dest_width : Width of destination data.
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Scales last vertical band of pixels by scale 4 to 5. The
- * height of the band scaled is 4-pixels.
- *
- * SPECIAL NOTES : The routine does not have available the first line of
- * the band below the current band, since this is the
- * last band.
- *
- ****************************************************************************/
-void vp8_last_vertical_band_4_5_scale_c(unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width) {
- unsigned int i;
- unsigned int a, b, c, d;
- unsigned char *des = dest;
-
- for (i = 0; i < dest_width; ++i) {
- a = des[0];
- b = des[dest_pitch];
-
- des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8);
-
- c = des[dest_pitch * 2] * 154;
- d = des[dest_pitch * 3];
-
- des [dest_pitch * 2] = (unsigned char)((b * 102 + c + 128) >> 8);
- des [dest_pitch * 3] = (unsigned char)((c + 102 * d + 128) >> 8);
-
- /* No other line for interplation of this line, so .. */
- des[dest_pitch * 4] = (unsigned char) d;
-
- des++;
- }
-}
-
-/****************************************************************************
- *
- * ROUTINE : vp8_horizontal_line_2_3_scale_c
- *
- * INPUTS : const unsigned char *source : Pointer to source data.
- * unsigned int source_width : Stride of source.
- * unsigned char *dest : Pointer to destination data.
- * unsigned int dest_width : Stride of destination (NOT USED).
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Copies horizontal line of pixels from source to
- * destination scaling up by 2 to 3.
- *
- * SPECIAL NOTES : None.
- *
- *
- ****************************************************************************/
-void vp8_horizontal_line_2_3_scale_c(const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width) {
- unsigned int i;
- unsigned int a, b, c;
- unsigned char *des = dest;
- const unsigned char *src = source;
-
- (void) dest_width;
-
- for (i = 0; i < source_width - 2; i += 2) {
- a = src[0];
- b = src[1];
- c = src[2];
-
- des [0] = (unsigned char)(a);
- des [1] = (unsigned char)((a * 85 + 171 * b + 128) >> 8);
- des [2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8);
-
- src += 2;
- des += 3;
- }
-
- a = src[0];
- b = src[1];
- des [0] = (unsigned char)(a);
- des [1] = (unsigned char)((a * 85 + 171 * b + 128) >> 8);
- des [2] = (unsigned char)(b);
-}
-
-
-/****************************************************************************
- *
- * ROUTINE : vp8_vertical_band_2_3_scale_c
- *
- * INPUTS : unsigned char *dest : Pointer to destination data.
- * unsigned int dest_pitch : Stride of destination data.
- * unsigned int dest_width : Width of destination data.
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Scales vertical band of pixels by scale 2 to 3. The
- * height of the band scaled is 2-pixels.
- *
- * SPECIAL NOTES : The routine uses the first line of the band below
- * the current band.
- *
- ****************************************************************************/
-void vp8_vertical_band_2_3_scale_c(unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width) {
- unsigned int i;
- unsigned int a, b, c;
- unsigned char *des = dest;
-
- for (i = 0; i < dest_width; i++) {
- a = des [0];
- b = des [dest_pitch];
- c = des[dest_pitch * 3];
- des [dest_pitch ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8);
- des [dest_pitch * 2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8);
-
- des++;
- }
-}
-
-/****************************************************************************
- *
- * ROUTINE : vp8_last_vertical_band_2_3_scale_c
- *
- * INPUTS : unsigned char *dest : Pointer to destination data.
- * unsigned int dest_pitch : Stride of destination data.
- * unsigned int dest_width : Width of destination data.
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Scales last vertical band of pixels by scale 2 to 3. The
- * height of the band scaled is 2-pixels.
- *
- * SPECIAL NOTES : The routine does not have available the first line of
- * the band below the current band, since this is the
- * last band.
- *
- ****************************************************************************/
-void vp8_last_vertical_band_2_3_scale_c(unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width) {
- unsigned int i;
- unsigned int a, b;
- unsigned char *des = dest;
-
- for (i = 0; i < dest_width; ++i) {
- a = des [0];
- b = des [dest_pitch];
-
- des [dest_pitch ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8);
- des [dest_pitch * 2] = (unsigned char)(b);
- des++;
- }
-}
-
-/****************************************************************************
- *
- * ROUTINE : vp8_horizontal_line_3_5_scale_c
- *
- * INPUTS : const unsigned char *source : Pointer to source data.
- * unsigned int source_width : Stride of source.
- * unsigned char *dest : Pointer to destination data.
- * unsigned int dest_width : Stride of destination (NOT USED).
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Copies horizontal line of pixels from source to
- * destination scaling up by 3 to 5.
- *
- * SPECIAL NOTES : None.
- *
- *
- ****************************************************************************/
-void vp8_horizontal_line_3_5_scale_c(const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width) {
- unsigned int i;
- unsigned int a, b, c;
- unsigned char *des = dest;
- const unsigned char *src = source;
-
- (void) dest_width;
-
- for (i = 0; i < source_width - 3; i += 3) {
- a = src[0];
- b = src[1];
- des [0] = (unsigned char)(a);
- des [1] = (unsigned char)((a * 102 + 154 * b + 128) >> 8);
-
- c = src[2];
- des [2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8);
- des [3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8);
-
- a = src[3];
- des [4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8);
-
- src += 3;
- des += 5;
- }
-
- a = src[0];
- b = src[1];
- des [0] = (unsigned char)(a);
-
- des [1] = (unsigned char)((a * 102 + 154 * b + 128) >> 8);
- c = src[2];
- des [2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8);
- des [3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8);
-
- des [4] = (unsigned char)(c);
-}
-
-/****************************************************************************
- *
- * ROUTINE : vp8_vertical_band_3_5_scale_c
- *
- * INPUTS : unsigned char *dest : Pointer to destination data.
- * unsigned int dest_pitch : Stride of destination data.
- * unsigned int dest_width : Width of destination data.
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Scales vertical band of pixels by scale 3 to 5. The
- * height of the band scaled is 3-pixels.
- *
- * SPECIAL NOTES : The routine uses the first line of the band below
- * the current band.
- *
- ****************************************************************************/
-void vp8_vertical_band_3_5_scale_c(unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width) {
- unsigned int i;
- unsigned int a, b, c;
- unsigned char *des = dest;
-
- for (i = 0; i < dest_width; i++) {
- a = des [0];
- b = des [dest_pitch];
- des [dest_pitch] = (unsigned char)((a * 102 + 154 * b + 128) >> 8);
-
- c = des[dest_pitch * 2];
- des [dest_pitch * 2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8);
- des [dest_pitch * 3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8);
-
- /* First line in next band... */
- a = des [dest_pitch * 5];
- des [dest_pitch * 4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8);
-
- des++;
- }
-}
-
-/****************************************************************************
- *
- * ROUTINE : vp8_last_vertical_band_3_5_scale_c
- *
- * INPUTS : unsigned char *dest : Pointer to destination data.
- * unsigned int dest_pitch : Stride of destination data.
- * unsigned int dest_width : Width of destination data.
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Scales last vertical band of pixels by scale 3 to 5. The
- * height of the band scaled is 3-pixels.
- *
- * SPECIAL NOTES : The routine does not have available the first line of
- * the band below the current band, since this is the
- * last band.
- *
- ****************************************************************************/
-void vp8_last_vertical_band_3_5_scale_c(unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width) {
- unsigned int i;
- unsigned int a, b, c;
- unsigned char *des = dest;
-
- for (i = 0; i < dest_width; ++i) {
- a = des [0];
- b = des [dest_pitch];
-
- des [ dest_pitch ] = (unsigned char)((a * 102 + 154 * b + 128) >> 8);
-
- c = des[dest_pitch * 2];
- des [dest_pitch * 2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8);
- des [dest_pitch * 3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8);
-
- /* No other line for interplation of this line, so .. */
- des [ dest_pitch * 4 ] = (unsigned char)(c);
-
- des++;
- }
-}
-
-/****************************************************************************
- *
- * ROUTINE : vp8_horizontal_line_3_4_scale_c
- *
- * INPUTS : const unsigned char *source : Pointer to source data.
- * unsigned int source_width : Stride of source.
- * unsigned char *dest : Pointer to destination data.
- * unsigned int dest_width : Stride of destination (NOT USED).
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Copies horizontal line of pixels from source to
- * destination scaling up by 3 to 4.
- *
- * SPECIAL NOTES : None.
- *
- *
- ****************************************************************************/
-void vp8_horizontal_line_3_4_scale_c(const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width) {
- unsigned int i;
- unsigned int a, b, c;
- unsigned char *des = dest;
- const unsigned char *src = source;
-
- (void) dest_width;
-
- for (i = 0; i < source_width - 3; i += 3) {
- a = src[0];
- b = src[1];
- des [0] = (unsigned char)(a);
- des [1] = (unsigned char)((a * 64 + b * 192 + 128) >> 8);
-
- c = src[2];
- des [2] = (unsigned char)((b + c + 1) >> 1);
-
- a = src[3];
- des [3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8);
-
- src += 3;
- des += 4;
- }
-
- a = src[0];
- b = src[1];
- des [0] = (unsigned char)(a);
- des [1] = (unsigned char)((a * 64 + b * 192 + 128) >> 8);
-
- c = src[2];
- des [2] = (unsigned char)((b + c + 1) >> 1);
- des [3] = (unsigned char)(c);
-}
-
-/****************************************************************************
- *
- * ROUTINE : vp8_vertical_band_3_4_scale_c
- *
- * INPUTS : unsigned char *dest : Pointer to destination data.
- * unsigned int dest_pitch : Stride of destination data.
- * unsigned int dest_width : Width of destination data.
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Scales vertical band of pixels by scale 3 to 4. The
- * height of the band scaled is 3-pixels.
- *
- * SPECIAL NOTES : The routine uses the first line of the band below
- * the current band.
- *
- ****************************************************************************/
-void vp8_vertical_band_3_4_scale_c(unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width) {
- unsigned int i;
- unsigned int a, b, c;
- unsigned char *des = dest;
-
- for (i = 0; i < dest_width; i++) {
- a = des [0];
- b = des [dest_pitch];
- des [dest_pitch] = (unsigned char)((a * 64 + b * 192 + 128) >> 8);
-
- c = des[dest_pitch * 2];
- des [dest_pitch * 2] = (unsigned char)((b + c + 1) >> 1);
-
- /* First line in next band... */
- a = des [dest_pitch * 4];
- des [dest_pitch * 3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8);
-
- des++;
- }
-}
-
-/****************************************************************************
- *
- * ROUTINE : vp8_last_vertical_band_3_4_scale_c
- *
- * INPUTS : unsigned char *dest : Pointer to destination data.
- * unsigned int dest_pitch : Stride of destination data.
- * unsigned int dest_width : Width of destination data.
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Scales last vertical band of pixels by scale 3 to 4. The
- * height of the band scaled is 3-pixels.
- *
- * SPECIAL NOTES : The routine does not have available the first line of
- * the band below the current band, since this is the
- * last band.
- *
- ****************************************************************************/
-void vp8_last_vertical_band_3_4_scale_c(unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width) {
- unsigned int i;
- unsigned int a, b, c;
- unsigned char *des = dest;
-
- for (i = 0; i < dest_width; ++i) {
- a = des [0];
- b = des [dest_pitch];
-
- des [dest_pitch] = (unsigned char)((a * 64 + b * 192 + 128) >> 8);
-
- c = des[dest_pitch * 2];
- des [dest_pitch * 2] = (unsigned char)((b + c + 1) >> 1);
-
- /* No other line for interplation of this line, so .. */
- des [dest_pitch * 3] = (unsigned char)(c);
-
- des++;
- }
-}
-
-/****************************************************************************
- *
- * ROUTINE : vp8_horizontal_line_1_2_scale_c
- *
- * INPUTS : const unsigned char *source : Pointer to source data.
- * unsigned int source_width : Stride of source.
- * unsigned char *dest : Pointer to destination data.
- * unsigned int dest_width : Stride of destination (NOT USED).
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Copies horizontal line of pixels from source to
- * destination scaling up by 1 to 2.
- *
- * SPECIAL NOTES : None.
- *
- ****************************************************************************/
-void vp8_horizontal_line_1_2_scale_c(const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width) {
- unsigned int i;
- unsigned int a, b;
- unsigned char *des = dest;
- const unsigned char *src = source;
-
- (void) dest_width;
-
- for (i = 0; i < source_width - 1; i += 1) {
- a = src[0];
- b = src[1];
- des [0] = (unsigned char)(a);
- des [1] = (unsigned char)((a + b + 1) >> 1);
- src += 1;
- des += 2;
- }
-
- a = src[0];
- des [0] = (unsigned char)(a);
- des [1] = (unsigned char)(a);
-}
-
-/****************************************************************************
- *
- * ROUTINE : vp8_vertical_band_1_2_scale_c
- *
- * INPUTS : unsigned char *dest : Pointer to destination data.
- * unsigned int dest_pitch : Stride of destination data.
- * unsigned int dest_width : Width of destination data.
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Scales vertical band of pixels by scale 1 to 2. The
- * height of the band scaled is 1-pixel.
- *
- * SPECIAL NOTES : The routine uses the first line of the band below
- * the current band.
- *
- ****************************************************************************/
-void vp8_vertical_band_1_2_scale_c(unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width) {
- unsigned int i;
- unsigned int a, b;
- unsigned char *des = dest;
-
- for (i = 0; i < dest_width; i++) {
- a = des [0];
- b = des [dest_pitch * 2];
-
- des[dest_pitch] = (unsigned char)((a + b + 1) >> 1);
-
- des++;
- }
-}
-
-/****************************************************************************
- *
- * ROUTINE : vp8_last_vertical_band_1_2_scale_c
- *
- * INPUTS : unsigned char *dest : Pointer to destination data.
- * unsigned int dest_pitch : Stride of destination data.
- * unsigned int dest_width : Width of destination data.
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Scales last vertical band of pixels by scale 1 to 2. The
- * height of the band scaled is 1-pixel.
- *
- * SPECIAL NOTES : The routine does not have available the first line of
- * the band below the current band, since this is the
- * last band.
- *
- ****************************************************************************/
-void vp8_last_vertical_band_1_2_scale_c(unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width) {
- unsigned int i;
- unsigned char *des = dest;
-
- for (i = 0; i < dest_width; ++i) {
- des[dest_pitch] = des[0];
- des++;
- }
-}
-
-
-
-
-
-/****************************************************************************
- *
- * ROUTINE : vp8_horizontal_line_4_5_scale_c
*
* INPUTS : const unsigned char *source : Pointer to source data.
* unsigned int source_width : Stride of source.
diff --git a/vpx_scale/generic/vpxscale.c b/vpx_scale/generic/vpxscale.c
index 7de85ca..49cdb7b 100644
--- a/vpx_scale/generic/vpxscale.c
+++ b/vpx_scale/generic/vpxscale.c
@@ -20,10 +20,9 @@
/****************************************************************************
* Header Files
****************************************************************************/
-#include "./vpx_rtcd.h"
+#include "./vpx_scale_rtcd.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_scale/yv12config.h"
-#include "vpx_scale/scale_mode.h"
typedef struct {
int expanded_frame_width;
@@ -41,66 +40,6 @@
/****************************************************************************
*
- * ROUTINE : horizontal_line_copy
- *
- * INPUTS : None
- *
- *
- * OUTPUTS : None.
- *
- * RETURNS : None
- *
- * FUNCTION : 1 to 1 scaling up for a horizontal line of pixles
- *
- * SPECIAL NOTES : None.
- *
- * ERRORS : None.
- *
- ****************************************************************************/
-static
-void horizontal_line_copy(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-) {
- (void) dest_width;
-
- duck_memcpy(dest, source, source_width);
-}
-/****************************************************************************
- *
- * ROUTINE : null_scale
- *
- * INPUTS : None
- *
- *
- * OUTPUTS : None.
- *
- * RETURNS : None
- *
- * FUNCTION : 1 to 1 scaling up for a vertical band
- *
- * SPECIAL NOTES : None.
- *
- * ERRORS : None.
- *
- ****************************************************************************/
-static
-void null_scale(
- unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width
-) {
- (void) dest;
- (void) dest_pitch;
- (void) dest_width;
-
- return;
-}
-
-/****************************************************************************
- *
* ROUTINE : scale1d_2t1_i
*
* INPUTS : const unsigned char *source : Pointer to data to be scaled.
@@ -589,422 +528,3 @@
for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++)
duck_memcpy(dst->v_buffer + i * dst->uv_stride, dst->v_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width);
}
-/****************************************************************************
- *
- * ROUTINE : any_ratio_2d_scale
- *
- * INPUTS : SCALE_INSTANCE *si : Pointer to post-processor instance (NOT USED).
- * const unsigned char *source : Pointer to source image.
- * unsigned int source_pitch : Stride of source image.
- * unsigned int source_width : Width of source image.
- * unsigned int source_height : Height of source image (NOT USED).
- * unsigned char *dest : Pointer to destination image.
- * unsigned int dest_pitch : Stride of destination image.
- * unsigned int dest_width : Width of destination image.
- * unsigned int dest_height : Height of destination image.
- *
- * OUTPUTS : None.
- *
- * RETURNS : int: 1 if image scaled, 0 if image could not be scaled.
- *
- * FUNCTION : Scale the image with changing apect ratio.
- *
- * SPECIAL NOTES : This scaling is a bi-linear scaling. Need to re-work the
- * whole function for new scaling algorithm.
- *
- ****************************************************************************/
-static
-int any_ratio_2d_scale
-(
- SCALE_VARS *si,
- const unsigned char *source,
- int source_pitch,
- unsigned int source_width,
- unsigned int source_height,
- unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width,
- unsigned int dest_height
-) {
- unsigned int i, k;
- unsigned int src_band_height = 0;
- unsigned int dest_band_height = 0;
-
- /* suggested scale factors */
- int hs = si->HScale;
- int hr = si->HRatio;
- int vs = si->VScale;
- int vr = si->VRatio;
-
- /* assume the ratios are scalable instead of should be centered */
- int ratio_scalable = 1;
-
- const unsigned char *source_base = ((source_pitch >= 0) ? source : (source + ((source_height - 1) * source_pitch)));
- const unsigned char *line_src;
-
- void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL;
- void (*vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL;
- void (*last_vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL;
-
- (void) si;
-
- /* find out the ratio for each direction */
- switch (hr * 30 / hs) {
- case 24:
- /* 4-5 Scale in Width direction */
- horiz_line_scale = vp8_horizontal_line_4_5_scale;
- break;
- case 22:
- /* 3-4 Scale in Width direction */
- horiz_line_scale = vp8_horizontal_line_3_4_scale;
- break;
-
- case 20:
- /* 4-5 Scale in Width direction */
- horiz_line_scale = vp8_horizontal_line_2_3_scale;
- break;
- case 18:
- /* 3-5 Scale in Width direction */
- horiz_line_scale = vp8_horizontal_line_3_5_scale;
- break;
- case 15:
- /* 1-2 Scale in Width direction */
- horiz_line_scale = vp8_horizontal_line_1_2_scale;
- break;
- case 30:
- /* no scale in Width direction */
- horiz_line_scale = horizontal_line_copy;
- break;
- default:
- /* The ratio is not acceptable now */
- /* throw("The ratio is not acceptable for now!"); */
- ratio_scalable = 0;
- break;
- }
-
- switch (vr * 30 / vs) {
- case 24:
- /* 4-5 Scale in vertical direction */
- vert_band_scale = vp8_vertical_band_4_5_scale;
- last_vert_band_scale = vp8_last_vertical_band_4_5_scale;
- src_band_height = 4;
- dest_band_height = 5;
- break;
- case 22:
- /* 3-4 Scale in vertical direction */
- vert_band_scale = vp8_vertical_band_3_4_scale;
- last_vert_band_scale = vp8_last_vertical_band_3_4_scale;
- src_band_height = 3;
- dest_band_height = 4;
- break;
- case 20:
- /* 2-3 Scale in vertical direction */
- vert_band_scale = vp8_vertical_band_2_3_scale;
- last_vert_band_scale = vp8_last_vertical_band_2_3_scale;
- src_band_height = 2;
- dest_band_height = 3;
- break;
- case 18:
- /* 3-5 Scale in vertical direction */
- vert_band_scale = vp8_vertical_band_3_5_scale;
- last_vert_band_scale = vp8_last_vertical_band_3_5_scale;
- src_band_height = 3;
- dest_band_height = 5;
- break;
- case 15:
- /* 1-2 Scale in vertical direction */
- vert_band_scale = vp8_vertical_band_1_2_scale;
- last_vert_band_scale = vp8_last_vertical_band_1_2_scale;
- src_band_height = 1;
- dest_band_height = 2;
- break;
- case 30:
- /* no scale in Width direction */
- vert_band_scale = null_scale;
- last_vert_band_scale = null_scale;
- src_band_height = 4;
- dest_band_height = 4;
- break;
- default:
- /* The ratio is not acceptable now */
- /* throw("The ratio is not acceptable for now!"); */
- ratio_scalable = 0;
- break;
- }
-
- if (ratio_scalable == 0)
- return ratio_scalable;
-
- horiz_line_scale(source, source_width, dest, dest_width);
-
- /* except last band */
- for (k = 0; k < (dest_height + dest_band_height - 1) / dest_band_height - 1; k++) {
- /* scale one band horizontally */
- for (i = 1; i < src_band_height; i++) {
- /* Trap case where we could read off the base of the source buffer */
- line_src = source + i * source_pitch;
-
- if (line_src < source_base)
- line_src = source_base;
-
- horiz_line_scale(line_src, source_width,
- dest + i * dest_pitch, dest_width);
- }
-
- /* first line of next band */
- /* Trap case where we could read off the base of the source buffer */
- line_src = source + src_band_height * source_pitch;
-
- if (line_src < source_base)
- line_src = source_base;
-
- horiz_line_scale(line_src, source_width,
- dest + dest_band_height * dest_pitch,
- dest_width);
-
- /* Vertical scaling is in place */
- vert_band_scale(dest, dest_pitch, dest_width);
-
- /* Next band... */
- source += src_band_height * source_pitch;
- dest += dest_band_height * dest_pitch;
- }
-
- /* scale one band horizontally */
- for (i = 1; i < src_band_height; i++) {
- /* Trap case where we could read off the base of the source buffer */
- line_src = source + i * source_pitch;
-
- if (line_src < source_base)
- line_src = source_base;
-
- horiz_line_scale(line_src, source_width,
- dest + i * dest_pitch,
- dest_width);
- }
-
- /* Vertical scaling is in place */
- last_vert_band_scale(dest, dest_pitch, dest_width);
-
- return ratio_scalable;
-}
-
-/****************************************************************************
- *
- * ROUTINE : any_ratio_frame_scale
- *
- * INPUTS : SCALE_INSTANCE *si : Pointer to post-processor instance (NOT USED).
- * unsigned char *frame_buffer : Pointer to source image.
- * int YOffset : Offset from start of buffer to Y samples.
- * int UVOffset : Offset from start of buffer to UV samples.
- *
- * OUTPUTS : None.
- *
- * RETURNS : int: 1 if image scaled, 0 if image could not be scaled.
- *
- * FUNCTION : Scale the image with changing apect ratio.
- *
- * SPECIAL NOTES : None.
- *
- ****************************************************************************/
-static
-int any_ratio_frame_scale(SCALE_VARS *scale_vars, int YOffset, int UVOffset) {
- int i;
- int ew;
- int eh;
-
- /* suggested scale factors */
- int hs = scale_vars->HScale;
- int hr = scale_vars->HRatio;
- int vs = scale_vars->VScale;
- int vr = scale_vars->VRatio;
-
- int ratio_scalable = 1;
-
- int sw = (scale_vars->expanded_frame_width * hr + hs - 1) / hs;
- int sh = (scale_vars->expanded_frame_height * vr + vs - 1) / vs;
- int dw = scale_vars->expanded_frame_width;
- int dh = scale_vars->expanded_frame_height;
- YV12_BUFFER_CONFIG *src_yuv_config = scale_vars->src_yuv_config;
- YV12_BUFFER_CONFIG *dst_yuv_config = scale_vars->dst_yuv_config;
-
- if (hr == 3)
- ew = (sw + 2) / 3 * 3 * hs / hr;
- else
- ew = (sw + 7) / 8 * 8 * hs / hr;
-
- if (vr == 3)
- eh = (sh + 2) / 3 * 3 * vs / vr;
- else
- eh = (sh + 7) / 8 * 8 * vs / vr;
-
- ratio_scalable = any_ratio_2d_scale(scale_vars,
- (const unsigned char *)src_yuv_config->y_buffer,
- src_yuv_config->y_stride, sw, sh,
- (unsigned char *) dst_yuv_config->y_buffer + YOffset,
- dst_yuv_config->y_stride, dw, dh);
-
- for (i = 0; i < eh; i++)
- duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride + dw, 0, ew - dw);
-
- for (i = dh; i < eh; i++)
- duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride, 0, ew);
-
- if (ratio_scalable == 0)
- return ratio_scalable;
-
- sw = (sw + 1) >> 1;
- sh = (sh + 1) >> 1;
- dw = (dw + 1) >> 1;
- dh = (dh + 1) >> 1;
-
- any_ratio_2d_scale(scale_vars,
- (const unsigned char *)src_yuv_config->u_buffer,
- src_yuv_config->y_stride / 2, sw, sh,
- (unsigned char *)dst_yuv_config->u_buffer + UVOffset,
- dst_yuv_config->uv_stride, dw, dh);
-
- any_ratio_2d_scale(scale_vars,
- (const unsigned char *)src_yuv_config->v_buffer,
- src_yuv_config->y_stride / 2, sw, sh,
- (unsigned char *)dst_yuv_config->v_buffer + UVOffset,
- dst_yuv_config->uv_stride, dw, dh);
-
- return ratio_scalable;
-}
-
-/****************************************************************************
- *
- * ROUTINE : center_image
- *
- * INPUTS : SCALE_INSTANCE *si : Pointer to post-processor instance.
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Centers the image without scaling in the output buffer.
- *
- * SPECIAL NOTES : None.
- *
- ****************************************************************************/
-static void
-center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_config) {
- int i;
- int row_offset, col_offset;
- unsigned char *src_data_pointer;
- unsigned char *dst_data_pointer;
-
- /* center values */
- row_offset = (dst_yuv_config->y_height - src_yuv_config->y_height) / 2;
- col_offset = (dst_yuv_config->y_width - src_yuv_config->y_width) / 2;
-
- /* Y's */
- src_data_pointer = src_yuv_config->y_buffer;
- dst_data_pointer = (unsigned char *)dst_yuv_config->y_buffer + (row_offset * dst_yuv_config->y_stride) + col_offset;
-
- for (i = 0; i < src_yuv_config->y_height; i++) {
- duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->y_width);
- dst_data_pointer += dst_yuv_config->y_stride;
- src_data_pointer += src_yuv_config->y_stride;
- }
-
- row_offset /= 2;
- col_offset /= 2;
-
- /* U's */
- src_data_pointer = src_yuv_config->u_buffer;
- dst_data_pointer = (unsigned char *)dst_yuv_config->u_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset;
-
- for (i = 0; i < src_yuv_config->uv_height; i++) {
- duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width);
- dst_data_pointer += dst_yuv_config->uv_stride;
- src_data_pointer += src_yuv_config->uv_stride;
- }
-
- /* V's */
- src_data_pointer = src_yuv_config->v_buffer;
- dst_data_pointer = (unsigned char *)dst_yuv_config->v_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset;
-
- for (i = 0; i < src_yuv_config->uv_height; i++) {
- duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width);
- dst_data_pointer += dst_yuv_config->uv_stride;
- src_data_pointer += src_yuv_config->uv_stride;
- }
-}
-
-/****************************************************************************
- *
- * ROUTINE : scale_or_center
- *
- * INPUTS : SCALE_INSTANCE *si : Pointer to post-processor instance.
- *
- *
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Decides to scale or center image in scale buffer for blit
- *
- * SPECIAL NOTES : None.
- *
- ****************************************************************************/
-void
-vp8_yv12_scale_or_center
-(
- YV12_BUFFER_CONFIG *src_yuv_config,
- YV12_BUFFER_CONFIG *dst_yuv_config,
- int expanded_frame_width,
- int expanded_frame_height,
- int scaling_mode,
- int HScale,
- int HRatio,
- int VScale,
- int VRatio
-) {
- /*if ( ppi->post_processing_level )
- update_umvborder ( ppi, frame_buffer );*/
-
-
- switch (scaling_mode) {
- case SCALE_TO_FIT:
- case MAINTAIN_ASPECT_RATIO: {
- SCALE_VARS scale_vars;
- /* center values */
-#if 1
- int row = (dst_yuv_config->y_height - expanded_frame_height) / 2;
- int col = (dst_yuv_config->y_width - expanded_frame_width) / 2;
- /*int YOffset = row * dst_yuv_config->y_width + col;
- int UVOffset = (row>>1) * dst_yuv_config->uv_width + (col>>1);*/
- int YOffset = row * dst_yuv_config->y_stride + col;
- int UVOffset = (row >> 1) * dst_yuv_config->uv_stride + (col >> 1);
-#else
- int row = (src_yuv_config->y_height - expanded_frame_height) / 2;
- int col = (src_yuv_config->y_width - expanded_frame_width) / 2;
- int YOffset = row * src_yuv_config->y_width + col;
- int UVOffset = (row >> 1) * src_yuv_config->uv_width + (col >> 1);
-#endif
-
- scale_vars.dst_yuv_config = dst_yuv_config;
- scale_vars.src_yuv_config = src_yuv_config;
- scale_vars.HScale = HScale;
- scale_vars.HRatio = HRatio;
- scale_vars.VScale = VScale;
- scale_vars.VRatio = VRatio;
- scale_vars.expanded_frame_width = expanded_frame_width;
- scale_vars.expanded_frame_height = expanded_frame_height;
-
- /* perform center and scale */
- any_ratio_frame_scale(&scale_vars, YOffset, UVOffset);
-
- break;
- }
- case CENTER:
- center_image(src_yuv_config, dst_yuv_config);
- break;
-
- default:
- break;
- }
-}
diff --git a/vpx_scale/generic/yv12extend_generic.h b/vpx_scale/generic/yv12extend_generic.h
deleted file mode 100644
index cc2a554..0000000
--- a/vpx_scale/generic/yv12extend_generic.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef YV12_EXTEND_GENERIC_H
-#define YV12_EXTEND_GENERIC_H
-
-#include "vpx_scale/yv12config.h"
-
- void vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf);
-
- /* Copy Y,U,V buffer data from src to dst, filling border of dst as well. */
- void vp8_yv12_copy_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
-
- /* Copy Y buffer data from src_ybc to dst_ybc without filling border data */
- void vp8_yv12_copy_y_c(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
-
-#endif /* YV12_EXTEND_GENERIC_H */
diff --git a/vpx_scale/include/generic/vpxscale_arbitrary.h b/vpx_scale/include/generic/vpxscale_arbitrary.h
deleted file mode 100644
index c535252..0000000
--- a/vpx_scale/include/generic/vpxscale_arbitrary.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef __VPX_SCALE_ARBITRARY_H__
-#define __VPX_SCALE_ARBITRARY_H__
-
-#include "vpx_scale/yv12config.h"
-
-typedef struct {
- int in_width;
- int in_height;
-
- int out_width;
- int out_height;
- int max_usable_out_width;
-
- // numerator for the width and height
- int nw;
- int nh;
- int nh_uv;
-
- // output to input correspondance array
- short *l_w;
- short *l_h;
- short *l_h_uv;
-
- // polyphase coefficients
- short *c_w;
- short *c_h;
- short *c_h_uv;
-
- // buffer for horizontal filtering.
- unsigned char *hbuf;
- unsigned char *hbuf_uv;
-} BICUBIC_SCALER_STRUCT;
-
-int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height);
-int bicubic_scale(int in_width, int in_height, int in_stride,
- int out_width, int out_height, int out_stride,
- unsigned char *input_image, unsigned char *output_image);
-void bicubic_scale_frame_reset();
-void bicubic_scale_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst,
- int new_width, int new_height);
-void bicubic_coefficient_init();
-void bicubic_coefficient_destroy();
-
-#endif /* __VPX_SCALE_ARBITRARY_H__ */
diff --git a/vpx_scale/include/generic/vpxscale_depricated.h b/vpx_scale/include/generic/vpxscale_depricated.h
deleted file mode 100644
index 3f7fe0f..0000000
--- a/vpx_scale/include/generic/vpxscale_depricated.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/****************************************************************************
-*
-* Module Title : postp.h
-*
-* Description : Post processor interface
-*
-****************************************************************************/
-#ifndef VPXSCALE_H
-#define VPXSCALE_H
-
-extern void (*vp8_vertical_band_4_5_scale)(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-extern void (*vp8_last_vertical_band_4_5_scale)(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-extern void (*vp8_vertical_band_3_5_scale)(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-extern void (*vp8_last_vertical_band_3_5_scale)(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-extern void (*vp8_horizontal_line_1_2_scale)(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-extern void (*vp8_horizontal_line_3_5_scale)(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-extern void (*vp8_horizontal_line_4_5_scale)(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-extern void (*vp8_vertical_band_1_2_scale)(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-extern void (*vp8_last_vertical_band_1_2_scale)(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-
-extern void dmachine_specific_config(int mmx_enabled, int xmm_enabled, int wmt_enabled);
-
-#endif
diff --git a/vpx_scale/scale_mode.h b/vpx_scale/scale_mode.h
deleted file mode 100644
index 5581385..0000000
--- a/vpx_scale/scale_mode.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/****************************************************************************
-*
-*****************************************************************************
-*/
-
-#ifndef SCALE_MODE_H
-#define SCALE_MODE_H
-
-typedef enum {
- MAINTAIN_ASPECT_RATIO = 0x0,
- SCALE_TO_FIT = 0x1,
- CENTER = 0x2,
- OTHER = 0x3
-} SCALE_MODE;
-
-
-#endif
diff --git a/vpx_scale/vpx_scale.mk b/vpx_scale/vpx_scale.mk
index dc89478..864af55 100644
--- a/vpx_scale/vpx_scale.mk
+++ b/vpx_scale/vpx_scale.mk
@@ -1,12 +1,13 @@
SCALE_SRCS-yes += vpx_scale.mk
-SCALE_SRCS-yes += scale_mode.h
SCALE_SRCS-yes += yv12config.h
SCALE_SRCS-yes += vpxscale.h
SCALE_SRCS-yes += generic/vpxscale.c
SCALE_SRCS-yes += generic/yv12config.c
SCALE_SRCS-yes += generic/yv12extend.c
-SCALE_SRCS-yes += generic/yv12extend_generic.h
SCALE_SRCS-$(CONFIG_SPATIAL_RESAMPLING) += generic/gen_scalers.c
+SCALE_SRCS-yes += vpx_scale_asm_offsets.c
+SCALE_SRCS-yes += vpx_scale_rtcd.c
+SCALE_SRCS-yes += vpx_scale_rtcd.sh
#neon
SCALE_SRCS-$(HAVE_NEON) += arm/neon/vp8_vpxyv12_copyframe_func_neon$(ASM)
@@ -16,3 +17,8 @@
SCALE_SRCS-$(HAVE_NEON) += arm/neon/yv12extend_arm.c
SCALE_SRCS-no += $(SCALE_SRCS_REMOVE-yes)
+
+$(eval $(call asm_offsets_template,\
+ vpx_scale_asm_offsets.asm, vpx_scale/vpx_scale_asm_offsets.c))
+
+$(eval $(call rtcd_h_template,vpx_scale_rtcd,vpx_scale/vpx_scale_rtcd.sh))
diff --git a/vpx_scale/vpx_scale_asm_offsets.c b/vpx_scale/vpx_scale_asm_offsets.c
new file mode 100644
index 0000000..caa9e80
--- /dev/null
+++ b/vpx_scale/vpx_scale_asm_offsets.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "./vpx_config.h"
+#include "vpx/vpx_codec.h"
+#include "vpx_ports/asm_offsets.h"
+#include "vpx_scale/yv12config.h"
+
+BEGIN
+
+/* vpx_scale */
+DEFINE(yv12_buffer_config_y_width, offsetof(YV12_BUFFER_CONFIG, y_width));
+DEFINE(yv12_buffer_config_y_height, offsetof(YV12_BUFFER_CONFIG, y_height));
+DEFINE(yv12_buffer_config_y_stride, offsetof(YV12_BUFFER_CONFIG, y_stride));
+DEFINE(yv12_buffer_config_uv_width, offsetof(YV12_BUFFER_CONFIG, uv_width));
+DEFINE(yv12_buffer_config_uv_height, offsetof(YV12_BUFFER_CONFIG, uv_height));
+DEFINE(yv12_buffer_config_uv_stride, offsetof(YV12_BUFFER_CONFIG, uv_stride));
+DEFINE(yv12_buffer_config_y_buffer, offsetof(YV12_BUFFER_CONFIG, y_buffer));
+DEFINE(yv12_buffer_config_u_buffer, offsetof(YV12_BUFFER_CONFIG, u_buffer));
+DEFINE(yv12_buffer_config_v_buffer, offsetof(YV12_BUFFER_CONFIG, v_buffer));
+DEFINE(yv12_buffer_config_border, offsetof(YV12_BUFFER_CONFIG, border));
+DEFINE(VP8BORDERINPIXELS_VAL, VP8BORDERINPIXELS);
+
+END
+
+/* add asserts for any offset that is not supported by assembly code */
+/* add asserts for any size that is not supported by assembly code */
+
+#if HAVE_NEON
+/* vp8_yv12_extend_frame_borders_neon makes several assumptions based on this */
+ct_assert(VP8BORDERINPIXELS_VAL, VP8BORDERINPIXELS == 32)
+#endif
diff --git a/vpx_scale/vpx_scale_rtcd.c b/vpx_scale/vpx_scale_rtcd.c
new file mode 100644
index 0000000..656a22f
--- /dev/null
+++ b/vpx_scale/vpx_scale_rtcd.c
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "vpx_config.h"
+#define RTCD_C
+#include "vpx_scale_rtcd.h"
+#include "vpx_ports/vpx_once.h"
+
+void vpx_scale_rtcd()
+{
+ once(setup_rtcd_internal);
+}
diff --git a/vpx_scale/vpx_scale_rtcd.sh b/vpx_scale/vpx_scale_rtcd.sh
new file mode 100644
index 0000000..e2bade0
--- /dev/null
+++ b/vpx_scale/vpx_scale_rtcd.sh
@@ -0,0 +1,26 @@
+vpx_scale_forward_decls() {
+cat <<EOF
+struct yv12_buffer_config;
+EOF
+}
+forward_decls vpx_scale_forward_decls
+
+# Scaler functions
+if [ "CONFIG_SPATIAL_RESAMPLING" != "yes" ]; then
+ prototype void vp8_horizontal_line_5_4_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
+ prototype void vp8_vertical_band_5_4_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+ prototype void vp8_horizontal_line_5_3_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
+ prototype void vp8_vertical_band_5_3_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+ prototype void vp8_horizontal_line_2_1_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
+ prototype void vp8_vertical_band_2_1_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+ prototype void vp8_vertical_band_2_1_scale_i "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+fi
+
+prototype void vp8_yv12_extend_frame_borders "struct yv12_buffer_config *ybf"
+specialize vp8_yv12_extend_frame_borders neon
+
+prototype void vp8_yv12_copy_frame "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"
+specialize vp8_yv12_copy_frame neon
+
+prototype void vp8_yv12_copy_y "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"
+specialize vp8_yv12_copy_y neon
diff --git a/vpx_scale/vpxscale.h b/vpx_scale/vpxscale.h
index 3c2194d..308b6aa 100644
--- a/vpx_scale/vpxscale.h
+++ b/vpx_scale/vpxscale.h
@@ -14,16 +14,6 @@
#include "vpx_scale/yv12config.h"
-extern void vp8_yv12_scale_or_center(YV12_BUFFER_CONFIG *src_yuv_config,
- YV12_BUFFER_CONFIG *dst_yuv_config,
- int expanded_frame_width,
- int expanded_frame_height,
- int scaling_mode,
- int HScale,
- int HRatio,
- int VScale,
- int VRatio);
-
extern void vp8_scale_frame(YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst,
unsigned char *temp_area,
diff --git a/vpx_scale/win32/scaleopt.c b/vpx_scale/win32/scaleopt.c
index 2d96cc7..54f9ac0 100644
--- a/vpx_scale/win32/scaleopt.c
+++ b/vpx_scale/win32/scaleopt.c
@@ -18,1184 +18,14 @@
****************************************************************************/
#include "pragmas.h"
-
-
/****************************************************************************
* Module Statics
****************************************************************************/
-__declspec(align(16)) const static unsigned short one_fifth[] = { 51, 51, 51, 51 };
-__declspec(align(16)) const static unsigned short two_fifths[] = { 102, 102, 102, 102 };
-__declspec(align(16)) const static unsigned short three_fifths[] = { 154, 154, 154, 154 };
-__declspec(align(16)) const static unsigned short four_fifths[] = { 205, 205, 205, 205 };
__declspec(align(16)) const static unsigned short round_values[] = { 128, 128, 128, 128 };
-__declspec(align(16)) const static unsigned short four_ones[] = { 1, 1, 1, 1};
-__declspec(align(16)) const static unsigned short const45_2[] = {205, 154, 102, 51 };
-__declspec(align(16)) const static unsigned short const45_1[] = { 51, 102, 154, 205 };
-__declspec(align(16)) const static unsigned char mask45[] = { 0, 0, 0, 0, 0, 0, 255, 0};
-__declspec(align(16)) const static unsigned short const35_2[] = { 154, 51, 205, 102 };
-__declspec(align(16)) const static unsigned short const35_1[] = { 102, 205, 51, 154 };
-
-
#include "vpx_scale/vpxscale.h"
#include "vpx_mem/vpx_mem.h"
-/****************************************************************************
- *
- * ROUTINE : horizontal_line_3_5_scale_mmx
- *
- * INPUTS : const unsigned char *source :
- * unsigned int source_width :
- * unsigned char *dest :
- * unsigned int dest_width :
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : 3 to 5 up-scaling of a horizontal line of pixels.
- *
- * SPECIAL NOTES : None.
- *
- ****************************************************************************/
-static
-void horizontal_line_3_5_scale_mmx
-(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-) {
- (void) dest_width;
-
- __asm {
-
- push ebx
-
- mov esi, source
- mov edi, dest
-
- mov ecx, source_width
- lea edx, [esi+ecx-3];
-
- movq mm5, const35_1 // mm5 = 66 xx cd xx 33 xx 9a xx
- movq mm6, const35_2 // mm6 = 9a xx 33 xx cd xx 66 xx
-
- movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx
- pxor mm7, mm7 // clear mm7
-
- horiz_line_3_5_loop:
-
- mov eax, DWORD PTR [esi] // eax = 00 01 02 03
- mov ebx, eax
-
- and ebx, 0xffff00 // ebx = xx 01 02 xx
- mov ecx, eax // ecx = 00 01 02 03
-
- and eax, 0xffff0000 // eax = xx xx 02 03
- xor ecx, eax // ecx = 00 01 xx xx
-
- shr ebx, 8 // ebx = 01 02 xx xx
- or eax, ebx // eax = 01 02 02 03
-
- shl ebx, 16 // ebx = xx xx 01 02
- movd mm1, eax // mm1 = 01 02 02 03 xx xx xx xx
-
- or ebx, ecx // ebx = 00 01 01 02
- punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 03 xx
-
- movd mm0, ebx // mm0 = 00 01 01 02
- pmullw mm1, mm6 //
-
- punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx
- pmullw mm0, mm5 //
-
- mov [edi], ebx // writeoutput 00 xx xx xx
- add esi, 3
-
- add edi, 5
- paddw mm0, mm1
-
- paddw mm0, mm4
- psrlw mm0, 8
-
- cmp esi, edx
- packuswb mm0, mm7
-
- movd DWORD Ptr [edi-4], mm0
- jl horiz_line_3_5_loop
-
-// Exit:
- mov eax, DWORD PTR [esi] // eax = 00 01 02 03
- mov ebx, eax
-
- and ebx, 0xffff00 // ebx = xx 01 02 xx
- mov ecx, eax // ecx = 00 01 02 03
-
- and eax, 0xffff0000 // eax = xx xx 02 03
- xor ecx, eax // ecx = 00 01 xx xx
-
- shr ebx, 8 // ebx = 01 02 xx xx
- or eax, ebx // eax = 01 02 02 03
-
- shl eax, 8 // eax = xx 01 02 02
- and eax, 0xffff0000 // eax = xx xx 02 02
-
- or eax, ebx // eax = 01 02 02 02
-
- shl ebx, 16 // ebx = xx xx 01 02
- movd mm1, eax // mm1 = 01 02 02 02 xx xx xx xx
-
- or ebx, ecx // ebx = 00 01 01 02
- punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 02 xx
-
- movd mm0, ebx // mm0 = 00 01 01 02
- pmullw mm1, mm6 //
-
- punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx
- pmullw mm0, mm5 //
-
- mov [edi], ebx // writeoutput 00 xx xx xx
- paddw mm0, mm1
-
- paddw mm0, mm4
- psrlw mm0, 8
-
- packuswb mm0, mm7
- movd DWORD Ptr [edi+1], mm0
-
- pop ebx
-
- }
-
-}
-
-
-/****************************************************************************
- *
- * ROUTINE : horizontal_line_4_5_scale_mmx
- *
- * INPUTS : const unsigned char *source :
- * unsigned int source_width :
- * unsigned char *dest :
- * unsigned int dest_width :
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : 4 to 5 up-scaling of a horizontal line of pixels.
- *
- * SPECIAL NOTES : None.
- *
- ****************************************************************************/
-static
-void horizontal_line_4_5_scale_mmx
-(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-) {
- (void)dest_width;
-
- __asm {
-
- mov esi, source
- mov edi, dest
-
- mov ecx, source_width
- lea edx, [esi+ecx-8];
-
- movq mm5, const45_1 // mm5 = 33 xx 66 xx 9a xx cd xx
- movq mm6, const45_2 // mm6 = cd xx 9a xx 66 xx 33 xx
-
- movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx
- pxor mm7, mm7 // clear mm7
-
- horiz_line_4_5_loop:
-
- movq mm0, QWORD PTR [esi] // mm0 = 00 01 02 03 04 05 06 07
- movq mm1, QWORD PTR [esi+1]; // mm1 = 01 02 03 04 05 06 07 08
-
- movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07
- movq mm3, mm1 // mm3 = 01 02 03 04 05 06 07 08
-
- movd DWORD PTR [edi], mm0 // write output 00 xx xx xx
- punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx
-
- punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx
- pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205
-
- pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51
- punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx
-
- movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx
- pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205
-
- punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx
- pmullw mm3, mm6 // 05*205 06*154 07*102 08* 51
-
- paddw mm0, mm1 // added round values
- paddw mm0, mm4
-
- psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx
- packuswb mm0, mm7
-
- movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04
- add edi, 10
-
- add esi, 8
- paddw mm2, mm3 //
-
- paddw mm2, mm4 // added round values
- cmp esi, edx
-
- psrlw mm2, 8
- packuswb mm2, mm7
-
- movd DWORD PTR [edi-4], mm2 // writeoutput 06 07 08 09
- jl horiz_line_4_5_loop
-
-// Exit:
- movq mm0, [esi] // mm0 = 00 01 02 03 04 05 06 07
- movq mm1, mm0 // mm1 = 00 01 02 03 04 05 06 07
-
- movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07
- psrlq mm1, 8 // mm1 = 01 02 03 04 05 06 07 00
-
- movq mm3, mask45 // mm3 = 00 00 00 00 00 00 ff 00
- pand mm3, mm1 // mm3 = 00 00 00 00 00 00 07 00
-
- psllq mm3, 8 // mm3 = 00 00 00 00 00 00 00 07
- por mm1, mm3 // mm1 = 01 02 03 04 05 06 07 07
-
- movq mm3, mm1
-
- movd DWORD PTR [edi], mm0 // write output 00 xx xx xx
- punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx
-
- punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx
- pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205
-
- pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51
- punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx
-
- movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx
- pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205
-
- punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx
- pmullw mm3, mm6 // 05*205 06*154 07*102 07* 51
-
- paddw mm0, mm1 // added round values
- paddw mm0, mm4
-
- psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx
- packuswb mm0, mm7 // 01 02 03 04 xx xx xx xx
-
- movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04
- paddw mm2, mm3 //
-
- paddw mm2, mm4 // added round values
- psrlw mm2, 8
-
- packuswb mm2, mm7
- movd DWORD PTR [edi+6], mm2 // writeoutput 06 07 08 09
-
-
- }
-}
-
-/****************************************************************************
- *
- * ROUTINE : vertical_band_4_5_scale_mmx
- *
- * INPUTS : unsigned char *dest :
- * unsigned int dest_pitch :
- * unsigned int dest_width :
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : 4 to 5 up-scaling of a 4 pixel high band of pixels.
- *
- * SPECIAL NOTES : The routine uses the first line of the band below
- * the current band. The function also has a "C" only
- * version.
- *
- ****************************************************************************/
-static
-void vertical_band_4_5_scale_mmx
-(
- unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width
-) {
- __asm {
-
- mov esi, dest // Get the source and destination pointer
- mov ecx, dest_pitch // Get the pitch size
-
- lea edi, [esi+ecx*2] // tow lines below
- add edi, ecx // three lines below
-
- pxor mm7, mm7 // clear out mm7
- mov edx, dest_width // Loop counter
-
- vs_4_5_loop:
-
- movq mm0, QWORD ptr [esi] // src[0];
- movq mm1, QWORD ptr [esi+ecx] // src[1];
-
- movq mm2, mm0 // Make a copy
- punpcklbw mm0, mm7 // unpack low to word
-
- movq mm5, one_fifth
- punpckhbw mm2, mm7 // unpack high to word
-
- pmullw mm0, mm5 // a * 1/5
-
- movq mm3, mm1 // make a copy
- punpcklbw mm1, mm7 // unpack low to word
-
- pmullw mm2, mm5 // a * 1/5
- movq mm6, four_fifths // constan
-
- movq mm4, mm1 // copy of low b
- pmullw mm4, mm6 // b * 4/5
-
- punpckhbw mm3, mm7 // unpack high to word
- movq mm5, mm3 // copy of high b
-
- pmullw mm5, mm6 // b * 4/5
- paddw mm0, mm4 // a * 1/5 + b * 4/5
-
- paddw mm2, mm5 // a * 1/5 + b * 4/5
- paddw mm0, round_values // + 128
-
- paddw mm2, round_values // + 128
- psrlw mm0, 8
-
- psrlw mm2, 8
- packuswb mm0, mm2 // des [1]
-
- movq QWORD ptr [esi+ecx], mm0 // write des[1]
- movq mm0, [esi+ecx*2] // mm0 = src[2]
-
- // mm1, mm3 --- Src[1]
- // mm0 --- Src[2]
- // mm7 for unpacking
-
- movq mm5, two_fifths
- movq mm2, mm0 // make a copy
-
- pmullw mm1, mm5 // b * 2/5
- movq mm6, three_fifths
-
-
- punpcklbw mm0, mm7 // unpack low to word
- pmullw mm3, mm5 // b * 2/5
-
- movq mm4, mm0 // make copy of c
- punpckhbw mm2, mm7 // unpack high to word
-
- pmullw mm4, mm6 // c * 3/5
- movq mm5, mm2
-
- pmullw mm5, mm6 // c * 3/5
- paddw mm1, mm4 // b * 2/5 + c * 3/5
-
- paddw mm3, mm5 // b * 2/5 + c * 3/5
- paddw mm1, round_values // + 128
-
- paddw mm3, round_values // + 128
- psrlw mm1, 8
-
- psrlw mm3, 8
- packuswb mm1, mm3 // des[2]
-
- movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
- movq mm1, [edi] // mm1=Src[3];
-
- // mm0, mm2 --- Src[2]
- // mm1 --- Src[3]
- // mm6 --- 3/5
- // mm7 for unpacking
-
- pmullw mm0, mm6 // c * 3/5
- movq mm5, two_fifths // mm5 = 2/5
-
- movq mm3, mm1 // make a copy
- pmullw mm2, mm6 // c * 3/5
-
- punpcklbw mm1, mm7 // unpack low
- movq mm4, mm1 // make a copy
-
- punpckhbw mm3, mm7 // unpack high
- pmullw mm4, mm5 // d * 2/5
-
- movq mm6, mm3 // make a copy
- pmullw mm6, mm5 // d * 2/5
-
- paddw mm0, mm4 // c * 3/5 + d * 2/5
- paddw mm2, mm6 // c * 3/5 + d * 2/5
-
- paddw mm0, round_values // + 128
- paddw mm2, round_values // + 128
-
- psrlw mm0, 8
- psrlw mm2, 8
-
- packuswb mm0, mm2 // des[3]
- movq QWORD ptr [edi], mm0 // write des[3]
-
- // mm1, mm3 --- Src[3]
- // mm7 -- cleared for unpacking
-
- movq mm0, [edi+ecx*2] // mm0, Src[0] of the next group
-
- movq mm5, four_fifths // mm5 = 4/5
- pmullw mm1, mm5 // d * 4/5
-
- movq mm6, one_fifth // mm6 = 1/5
- movq mm2, mm0 // make a copy
-
- pmullw mm3, mm5 // d * 4/5
- punpcklbw mm0, mm7 // unpack low
-
- pmullw mm0, mm6 // an * 1/5
- punpckhbw mm2, mm7 // unpack high
-
- paddw mm1, mm0 // d * 4/5 + an * 1/5
- pmullw mm2, mm6 // an * 1/5
-
- paddw mm3, mm2 // d * 4/5 + an * 1/5
- paddw mm1, round_values // + 128
-
- paddw mm3, round_values // + 128
- psrlw mm1, 8
-
- psrlw mm3, 8
- packuswb mm1, mm3 // des[4]
-
- movq QWORD ptr [edi+ecx], mm1 // write des[4]
-
- add edi, 8
- add esi, 8
-
- sub edx, 8
- jg vs_4_5_loop
- }
-}
-
-/****************************************************************************
- *
- * ROUTINE : last_vertical_band_4_5_scale_mmx
- *
- * INPUTS : unsigned char *dest :
- * unsigned int dest_pitch :
- * unsigned int dest_width :
- *
- * OUTPUTS : None.
- *
- * RETURNS : None
- *
- * FUNCTION : 4 to 5 up-scaling of the last 4-pixel high band in an image.
- *
- * SPECIAL NOTES : The routine uses the first line of the band below
- * the current band. The function also has an "C" only
- * version.
- *
- ****************************************************************************/
-static
-void last_vertical_band_4_5_scale_mmx
-(
- unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width
-) {
- __asm {
- mov esi, dest // Get the source and destination pointer
- mov ecx, dest_pitch // Get the pitch size
-
- lea edi, [esi+ecx*2] // tow lines below
- add edi, ecx // three lines below
-
- pxor mm7, mm7 // clear out mm7
- mov edx, dest_width // Loop counter
-
- last_vs_4_5_loop:
-
- movq mm0, QWORD ptr [esi] // src[0];
- movq mm1, QWORD ptr [esi+ecx] // src[1];
-
- movq mm2, mm0 // Make a copy
- punpcklbw mm0, mm7 // unpack low to word
-
- movq mm5, one_fifth
- punpckhbw mm2, mm7 // unpack high to word
-
- pmullw mm0, mm5 // a * 1/5
-
- movq mm3, mm1 // make a copy
- punpcklbw mm1, mm7 // unpack low to word
-
- pmullw mm2, mm5 // a * 1/5
- movq mm6, four_fifths // constan
-
- movq mm4, mm1 // copy of low b
- pmullw mm4, mm6 // b * 4/5
-
- punpckhbw mm3, mm7 // unpack high to word
- movq mm5, mm3 // copy of high b
-
- pmullw mm5, mm6 // b * 4/5
- paddw mm0, mm4 // a * 1/5 + b * 4/5
-
- paddw mm2, mm5 // a * 1/5 + b * 4/5
- paddw mm0, round_values // + 128
-
- paddw mm2, round_values // + 128
- psrlw mm0, 8
-
- psrlw mm2, 8
- packuswb mm0, mm2 // des [1]
-
- movq QWORD ptr [esi+ecx], mm0 // write des[1]
- movq mm0, [esi+ecx*2] // mm0 = src[2]
-
- // mm1, mm3 --- Src[1]
- // mm0 --- Src[2]
- // mm7 for unpacking
-
- movq mm5, two_fifths
- movq mm2, mm0 // make a copy
-
- pmullw mm1, mm5 // b * 2/5
- movq mm6, three_fifths
-
-
- punpcklbw mm0, mm7 // unpack low to word
- pmullw mm3, mm5 // b * 2/5
-
- movq mm4, mm0 // make copy of c
- punpckhbw mm2, mm7 // unpack high to word
-
- pmullw mm4, mm6 // c * 3/5
- movq mm5, mm2
-
- pmullw mm5, mm6 // c * 3/5
- paddw mm1, mm4 // b * 2/5 + c * 3/5
-
- paddw mm3, mm5 // b * 2/5 + c * 3/5
- paddw mm1, round_values // + 128
-
- paddw mm3, round_values // + 128
- psrlw mm1, 8
-
- psrlw mm3, 8
- packuswb mm1, mm3 // des[2]
-
- movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
- movq mm1, [edi] // mm1=Src[3];
-
- movq QWORD ptr [edi+ecx], mm1 // write des[4];
-
- // mm0, mm2 --- Src[2]
- // mm1 --- Src[3]
- // mm6 --- 3/5
- // mm7 for unpacking
-
- pmullw mm0, mm6 // c * 3/5
- movq mm5, two_fifths // mm5 = 2/5
-
- movq mm3, mm1 // make a copy
- pmullw mm2, mm6 // c * 3/5
-
- punpcklbw mm1, mm7 // unpack low
- movq mm4, mm1 // make a copy
-
- punpckhbw mm3, mm7 // unpack high
- pmullw mm4, mm5 // d * 2/5
-
- movq mm6, mm3 // make a copy
- pmullw mm6, mm5 // d * 2/5
-
- paddw mm0, mm4 // c * 3/5 + d * 2/5
- paddw mm2, mm6 // c * 3/5 + d * 2/5
-
- paddw mm0, round_values // + 128
- paddw mm2, round_values // + 128
-
- psrlw mm0, 8
- psrlw mm2, 8
-
- packuswb mm0, mm2 // des[3]
- movq QWORD ptr [edi], mm0 // write des[3]
-
- // mm1, mm3 --- Src[3]
- // mm7 -- cleared for unpacking
- add edi, 8
- add esi, 8
-
- sub edx, 8
- jg last_vs_4_5_loop
- }
-}
-
-/****************************************************************************
- *
- * ROUTINE : vertical_band_3_5_scale_mmx
- *
- * INPUTS : unsigned char *dest :
- * unsigned int dest_pitch :
- * unsigned int dest_width :
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : 3 to 5 up-scaling of a 3-pixel high band of pixels.
- *
- * SPECIAL NOTES : The routine uses the first line of the band below
- * the current band. The function also has an "C" only
- * version.
- *
- ****************************************************************************/
-static
-void vertical_band_3_5_scale_mmx
-(
- unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width
-) {
- __asm {
- mov esi, dest // Get the source and destination pointer
- mov ecx, dest_pitch // Get the pitch size
-
- lea edi, [esi+ecx*2] // tow lines below
- add edi, ecx // three lines below
-
- pxor mm7, mm7 // clear out mm7
- mov edx, dest_width // Loop counter
-
- vs_3_5_loop:
-
- movq mm0, QWORD ptr [esi] // src[0];
- movq mm1, QWORD ptr [esi+ecx] // src[1];
-
- movq mm2, mm0 // Make a copy
- punpcklbw mm0, mm7 // unpack low to word
-
- movq mm5, two_fifths // mm5 = 2/5
- punpckhbw mm2, mm7 // unpack high to word
-
- pmullw mm0, mm5 // a * 2/5
-
- movq mm3, mm1 // make a copy
- punpcklbw mm1, mm7 // unpack low to word
-
- pmullw mm2, mm5 // a * 2/5
- movq mm6, three_fifths // mm6 = 3/5
-
- movq mm4, mm1 // copy of low b
- pmullw mm4, mm6 // b * 3/5
-
- punpckhbw mm3, mm7 // unpack high to word
- movq mm5, mm3 // copy of high b
-
- pmullw mm5, mm6 // b * 3/5
- paddw mm0, mm4 // a * 2/5 + b * 3/5
-
- paddw mm2, mm5 // a * 2/5 + b * 3/5
- paddw mm0, round_values // + 128
-
- paddw mm2, round_values // + 128
- psrlw mm0, 8
-
- psrlw mm2, 8
- packuswb mm0, mm2 // des [1]
-
- movq QWORD ptr [esi+ecx], mm0 // write des[1]
- movq mm0, [esi+ecx*2] // mm0 = src[2]
-
- // mm1, mm3 --- Src[1]
- // mm0 --- Src[2]
- // mm7 for unpacking
-
- movq mm4, mm1 // b low
- pmullw mm1, four_fifths // b * 4/5 low
-
- movq mm5, mm3 // b high
- pmullw mm3, four_fifths // b * 4/5 high
-
- movq mm2, mm0 // c
- pmullw mm4, one_fifth // b * 1/5
-
- punpcklbw mm0, mm7 // c low
- pmullw mm5, one_fifth // b * 1/5
-
- movq mm6, mm0 // make copy of c low
- punpckhbw mm2, mm7 // c high
-
- pmullw mm6, one_fifth // c * 1/5 low
- movq mm7, mm2 // make copy of c high
-
- pmullw mm7, one_fifth // c * 1/5 high
- paddw mm1, mm6 // b * 4/5 + c * 1/5 low
-
- paddw mm3, mm7 // b * 4/5 + c * 1/5 high
- movq mm6, mm0 // make copy of c low
-
- pmullw mm6, four_fifths // c * 4/5 low
- movq mm7, mm2 // make copy of c high
-
- pmullw mm7, four_fifths // c * 4/5 high
-
- paddw mm4, mm6 // b * 1/5 + c * 4/5 low
- paddw mm5, mm7 // b * 1/5 + c * 4/5 high
-
- paddw mm1, round_values // + 128
- paddw mm3, round_values // + 128
-
- psrlw mm1, 8
- psrlw mm3, 8
-
- packuswb mm1, mm3 // des[2]
- movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
-
- paddw mm4, round_values // + 128
- paddw mm5, round_values // + 128
-
- psrlw mm4, 8
- psrlw mm5, 8
-
- packuswb mm4, mm5 // des[3]
- movq QWORD ptr [edi], mm4 // write des[3]
-
- // mm0, mm2 --- Src[3]
-
- pxor mm7, mm7 // clear mm7 for unpacking
- movq mm1, [edi+ecx*2] // mm1 = Src[0] of the next group
-
- movq mm5, three_fifths // mm5 = 3/5
- pmullw mm0, mm5 // d * 3/5
-
- movq mm6, two_fifths // mm6 = 2/5
- movq mm3, mm1 // make a copy
-
- pmullw mm2, mm5 // d * 3/5
- punpcklbw mm1, mm7 // unpack low
-
- pmullw mm1, mm6 // an * 2/5
- punpckhbw mm3, mm7 // unpack high
-
- paddw mm0, mm1 // d * 3/5 + an * 2/5
- pmullw mm3, mm6 // an * 2/5
-
- paddw mm2, mm3 // d * 3/5 + an * 2/5
- paddw mm0, round_values // + 128
-
- paddw mm2, round_values // + 128
- psrlw mm0, 8
-
- psrlw mm2, 8
- packuswb mm0, mm2 // des[4]
-
- movq QWORD ptr [edi+ecx], mm0 // write des[4]
-
- add edi, 8
- add esi, 8
-
- sub edx, 8
- jg vs_3_5_loop
- }
-}
-
-/****************************************************************************
- *
- * ROUTINE : last_vertical_band_3_5_scale_mmx
- *
- * INPUTS : unsigned char *dest :
- * unsigned int dest_pitch :
- * unsigned int dest_width :
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : 3 to 5 up-scaling of a 3-pixel high band of pixels.
- *
- * SPECIAL NOTES : The routine uses the first line of the band below
- * the current band. The function also has an "C" only
- * version.
- *
- ****************************************************************************/
-static
-void last_vertical_band_3_5_scale_mmx
-(
- unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width
-) {
- __asm {
- mov esi, dest // Get the source and destination pointer
- mov ecx, dest_pitch // Get the pitch size
-
- lea edi, [esi+ecx*2] // tow lines below
- add edi, ecx // three lines below
-
- pxor mm7, mm7 // clear out mm7
- mov edx, dest_width // Loop counter
-
-
- last_vs_3_5_loop:
-
- movq mm0, QWORD ptr [esi] // src[0];
- movq mm1, QWORD ptr [esi+ecx] // src[1];
-
- movq mm2, mm0 // Make a copy
- punpcklbw mm0, mm7 // unpack low to word
-
- movq mm5, two_fifths // mm5 = 2/5
- punpckhbw mm2, mm7 // unpack high to word
-
- pmullw mm0, mm5 // a * 2/5
-
- movq mm3, mm1 // make a copy
- punpcklbw mm1, mm7 // unpack low to word
-
- pmullw mm2, mm5 // a * 2/5
- movq mm6, three_fifths // mm6 = 3/5
-
- movq mm4, mm1 // copy of low b
- pmullw mm4, mm6 // b * 3/5
-
- punpckhbw mm3, mm7 // unpack high to word
- movq mm5, mm3 // copy of high b
-
- pmullw mm5, mm6 // b * 3/5
- paddw mm0, mm4 // a * 2/5 + b * 3/5
-
- paddw mm2, mm5 // a * 2/5 + b * 3/5
- paddw mm0, round_values // + 128
-
- paddw mm2, round_values // + 128
- psrlw mm0, 8
-
- psrlw mm2, 8
- packuswb mm0, mm2 // des [1]
-
- movq QWORD ptr [esi+ecx], mm0 // write des[1]
- movq mm0, [esi+ecx*2] // mm0 = src[2]
-
-
-
- // mm1, mm3 --- Src[1]
- // mm0 --- Src[2]
- // mm7 for unpacking
-
- movq mm4, mm1 // b low
- pmullw mm1, four_fifths // b * 4/5 low
-
- movq QWORD ptr [edi+ecx], mm0 // write des[4]
-
- movq mm5, mm3 // b high
- pmullw mm3, four_fifths // b * 4/5 high
-
- movq mm2, mm0 // c
- pmullw mm4, one_fifth // b * 1/5
-
- punpcklbw mm0, mm7 // c low
- pmullw mm5, one_fifth // b * 1/5
-
- movq mm6, mm0 // make copy of c low
- punpckhbw mm2, mm7 // c high
-
- pmullw mm6, one_fifth // c * 1/5 low
- movq mm7, mm2 // make copy of c high
-
- pmullw mm7, one_fifth // c * 1/5 high
- paddw mm1, mm6 // b * 4/5 + c * 1/5 low
-
- paddw mm3, mm7 // b * 4/5 + c * 1/5 high
- movq mm6, mm0 // make copy of c low
-
- pmullw mm6, four_fifths // c * 4/5 low
- movq mm7, mm2 // make copy of c high
-
- pmullw mm7, four_fifths // c * 4/5 high
-
- paddw mm4, mm6 // b * 1/5 + c * 4/5 low
- paddw mm5, mm7 // b * 1/5 + c * 4/5 high
-
- paddw mm1, round_values // + 128
- paddw mm3, round_values // + 128
-
- psrlw mm1, 8
- psrlw mm3, 8
-
- packuswb mm1, mm3 // des[2]
- movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
-
- paddw mm4, round_values // + 128
- paddw mm5, round_values // + 128
-
- psrlw mm4, 8
- psrlw mm5, 8
-
- packuswb mm4, mm5 // des[3]
- movq QWORD ptr [edi], mm4 // write des[3]
-
- // mm0, mm2 --- Src[3]
-
- add edi, 8
- add esi, 8
-
- sub edx, 8
- jg last_vs_3_5_loop
- }
-}
-
-/****************************************************************************
- *
- * ROUTINE : vertical_band_1_2_scale_mmx
- *
- * INPUTS : unsigned char *dest :
- * unsigned int dest_pitch :
- * unsigned int dest_width :
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : 1 to 2 up-scaling of a band of pixels.
- *
- * SPECIAL NOTES : The routine uses the first line of the band below
- * the current band. The function also has an "C" only
- * version.
- *
- ****************************************************************************/
-static
-void vertical_band_1_2_scale_mmx
-(
- unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width
-) {
- __asm {
-
- mov esi, dest // Get the source and destination pointer
- mov ecx, dest_pitch // Get the pitch size
-
- pxor mm7, mm7 // clear out mm7
- mov edx, dest_width // Loop counter
-
- vs_1_2_loop:
-
- movq mm0, [esi] // get Src[0]
- movq mm1, [esi + ecx * 2] // get Src[1]
-
- movq mm2, mm0 // make copy before unpack
- movq mm3, mm1 // make copy before unpack
-
- punpcklbw mm0, mm7 // low Src[0]
- movq mm6, four_ones // mm6= 1, 1, 1, 1
-
- punpcklbw mm1, mm7 // low Src[1]
- paddw mm0, mm1 // low (a + b)
-
- punpckhbw mm2, mm7 // high Src[0]
- paddw mm0, mm6 // low (a + b + 1)
-
- punpckhbw mm3, mm7
- paddw mm2, mm3 // high (a + b )
-
- psraw mm0, 1 // low (a + b +1 )/2
- paddw mm2, mm6 // high (a + b + 1)
-
- psraw mm2, 1 // high (a + b + 1)/2
- packuswb mm0, mm2 // pack results
-
- movq [esi+ecx], mm0 // write out eight bytes
- add esi, 8
-
- sub edx, 8
- jg vs_1_2_loop
- }
-
-}
-
-/****************************************************************************
- *
- * ROUTINE : last_vertical_band_1_2_scale_mmx
- *
- * INPUTS : unsigned char *dest :
- * unsigned int dest_pitch :
- * unsigned int dest_width :
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : 1 to 2 up-scaling of band of pixels.
- *
- * SPECIAL NOTES : The routine uses the first line of the band below
- * the current band. The function also has an "C" only
- * version.
- *
- ****************************************************************************/
-static
-void last_vertical_band_1_2_scale_mmx
-(
- unsigned char *dest,
- unsigned int dest_pitch,
- unsigned int dest_width
-) {
- __asm {
- mov esi, dest // Get the source and destination pointer
- mov ecx, dest_pitch // Get the pitch size
-
- mov edx, dest_width // Loop counter
-
- last_vs_1_2_loop:
-
- movq mm0, [esi] // get Src[0]
- movq [esi+ecx], mm0 // write out eight bytes
-
- add esi, 8
- sub edx, 8
-
- jg last_vs_1_2_loop
- }
-}
-
-/****************************************************************************
- *
- * ROUTINE : horizontal_line_1_2_scale
- *
- * INPUTS : const unsigned char *source :
- * unsigned int source_width :
- * unsigned char *dest :
- * unsigned int dest_width :
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : 1 to 2 up-scaling of a horizontal line of pixels.
- *
- * SPECIAL NOTES : None.
- *
- ****************************************************************************/
-static
-void horizontal_line_1_2_scale_mmx
-(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-) {
- (void) dest_width;
-
- __asm {
- mov esi, source
- mov edi, dest
-
- pxor mm7, mm7
- movq mm6, four_ones
-
- mov ecx, source_width
-
- hs_1_2_loop:
-
- movq mm0, [esi]
- movq mm1, [esi+1]
-
- movq mm2, mm0
- movq mm3, mm1
-
- movq mm4, mm0
- punpcklbw mm0, mm7
-
- punpcklbw mm1, mm7
- paddw mm0, mm1
-
- paddw mm0, mm6
- punpckhbw mm2, mm7
-
- punpckhbw mm3, mm7
- paddw mm2, mm3
-
- paddw mm2, mm6
- psraw mm0, 1
-
- psraw mm2, 1
- packuswb mm0, mm2
-
- movq mm2, mm4
- punpcklbw mm2, mm0
-
- movq [edi], mm2
- punpckhbw mm4, mm0
-
- movq [edi+8], mm4
- add esi, 8
-
- add edi, 16
- sub ecx, 8
-
- cmp ecx, 8
- jg hs_1_2_loop
-
-// last eight pixel
-
- movq mm0, [esi]
- movq mm1, mm0
-
- movq mm2, mm0
- movq mm3, mm1
-
- psrlq mm1, 8
- psrlq mm3, 56
-
- psllq mm3, 56
- por mm1, mm3
-
- movq mm3, mm1
- movq mm4, mm0
-
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
-
- paddw mm0, mm1
- paddw mm0, mm6
-
- punpckhbw mm2, mm7
- punpckhbw mm3, mm7
-
- paddw mm2, mm3
- paddw mm2, mm6
-
- psraw mm0, 1
- psraw mm2, 1
-
- packuswb mm0, mm2
- movq mm2, mm4
-
- punpcklbw mm2, mm0
- movq [edi], mm2
-
- punpckhbw mm4, mm0
- movq [edi+8], mm4
- }
-}
-
-
-
-
-
__declspec(align(16)) const static unsigned short const54_2[] = { 0, 64, 128, 192 };
__declspec(align(16)) const static unsigned short const54_1[] = {256, 192, 128, 64 };
@@ -1685,25 +515,6 @@
void
register_mmxscalers(void) {
- vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_mmx;
- vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_mmx;
- vp8_last_vertical_band_1_2_scale = last_vertical_band_1_2_scale_mmx;
- vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_mmx;
- vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_mmx;
- vp8_last_vertical_band_3_5_scale = last_vertical_band_3_5_scale_mmx;
- vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_mmx;
- vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_mmx;
- vp8_last_vertical_band_4_5_scale = last_vertical_band_4_5_scale_mmx;
-
- vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c;
- vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c;
- vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c;
- vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c;
- vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c;
- vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c;
-
-
-
vp8_vertical_band_5_4_scale = vertical_band_5_4_scale_mmx;
vp8_vertical_band_5_3_scale = vertical_band_5_3_scale_mmx;
vp8_vertical_band_2_1_scale = vertical_band_2_1_scale_mmx;
@@ -1711,8 +522,4 @@
vp8_horizontal_line_2_1_scale = horizontal_line_2_1_scale_mmx;
vp8_horizontal_line_5_3_scale = horizontal_line_5_3_scale_mmx;
vp8_horizontal_line_5_4_scale = horizontal_line_5_4_scale_mmx;
-
-
-
-
}
diff --git a/vpx_scale/win32/scalesystemdependent.c b/vpx_scale/win32/scalesystemdependent.c
deleted file mode 100644
index 98913d1..0000000
--- a/vpx_scale/win32/scalesystemdependent.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/****************************************************************************
-*
-* Module Title : system_dependent.c
-*
-* Description : Miscellaneous system dependent functions
-*
-****************************************************************************/
-
-/****************************************************************************
-* Header Files
-****************************************************************************/
-#include "vpx_scale/vpxscale.h"
-#include "cpuidlib.h"
-
-/****************************************************************************
-* Imports
-*****************************************************************************/
-extern void register_generic_scalers(void);
-extern void register_mmxscalers(void);
-
-/****************************************************************************
- *
- * ROUTINE : post_proc_machine_specific_config
- *
- * INPUTS : UINT32 Version : Codec version number.
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Checks for machine specifc features such as MMX support
- * sets appropriate flags and function pointers.
- *
- * SPECIAL NOTES : None.
- *
- ****************************************************************************/
-void
-vp8_scale_machine_specific_config(void) {
- // If MMX supported then set to use MMX versions of functions else
- // use original 'C' versions.
- int mmx_enabled;
- int xmm_enabled;
- int wmt_enabled;
-
- vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled);
-
- if (mmx_enabled || xmm_enabled || wmt_enabled) {
- register_mmxscalers();
- } else {
- vp8_horizontal_line_1_2_scale = vp8cx_horizontal_line_1_2_scale_c;
- vp8_vertical_band_1_2_scale = vp8cx_vertical_band_1_2_scale_c;
- vp8_last_vertical_band_1_2_scale = vp8cx_last_vertical_band_1_2_scale_c;
- vp8_horizontal_line_3_5_scale = vp8cx_horizontal_line_3_5_scale_c;
- vp8_vertical_band_3_5_scale = vp8cx_vertical_band_3_5_scale_c;
- vp8_last_vertical_band_3_5_scale = vp8cx_last_vertical_band_3_5_scale_c;
- vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c;
- vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c;
- vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c;
- vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c;
- vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c;
- vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c;
- vp8_horizontal_line_4_5_scale = vp8cx_horizontal_line_4_5_scale_c;
- vp8_vertical_band_4_5_scale = vp8cx_vertical_band_4_5_scale_c;
- vp8_last_vertical_band_4_5_scale = vp8cx_last_vertical_band_4_5_scale_c;
-
-
- vp8_vertical_band_5_4_scale = vp8cx_vertical_band_5_4_scale_c;
- vp8_vertical_band_5_3_scale = vp8cx_vertical_band_5_3_scale_c;
- vp8_vertical_band_2_1_scale = vp8cx_vertical_band_2_1_scale_c;
- vp8_vertical_band_2_1_scale_i = vp8cx_vertical_band_2_1_scale_i_c;
- vp8_horizontal_line_2_1_scale = vp8cx_horizontal_line_2_1_scale_c;
- vp8_horizontal_line_5_3_scale = vp8cx_horizontal_line_5_3_scale_c;
- vp8_horizontal_line_5_4_scale = vp8cx_horizontal_line_5_4_scale_c;
-
- }
-}
diff --git a/vpxdec.c b/vpxdec.c
index 9b728bf..f7281a4 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -22,7 +22,7 @@
#include "vpx_config.h"
#include "vpx/vpx_decoder.h"
#include "vpx_ports/vpx_timer.h"
-#if CONFIG_VP8_DECODER
+#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
#include "vpx/vp8dx.h"
#endif
#if CONFIG_MD5
@@ -49,175 +49,171 @@
static const char *exec_name;
#define VP8_FOURCC (0x00385056)
-static const struct
-{
- char const *name;
- vpx_codec_iface_t *iface;
- unsigned int fourcc;
- unsigned int fourcc_mask;
-} ifaces[] =
-{
+#define VP9_FOURCC (0x00395056)
+static const struct {
+ char const *name;
+ const vpx_codec_iface_t *(*iface)(void);
+ unsigned int fourcc;
+ unsigned int fourcc_mask;
+} ifaces[] = {
#if CONFIG_VP8_DECODER
- {"vp8", &vpx_codec_vp8_dx_algo, VP8_FOURCC, 0x00FFFFFF},
+ {"vp8", vpx_codec_vp8_dx, VP8_FOURCC, 0x00FFFFFF},
+#endif
+#if CONFIG_VP9_DECODER
+ {"vp9", vpx_codec_vp9_dx, VP9_FOURCC, 0x00FFFFFF},
#endif
};
#include "args.h"
static const arg_def_t codecarg = ARG_DEF(NULL, "codec", 1,
- "Codec to use");
+ "Codec to use");
static const arg_def_t use_yv12 = ARG_DEF(NULL, "yv12", 0,
- "Output raw YV12 frames");
+ "Output raw YV12 frames");
static const arg_def_t use_i420 = ARG_DEF(NULL, "i420", 0,
- "Output raw I420 frames");
+ "Output raw I420 frames");
static const arg_def_t flipuvarg = ARG_DEF(NULL, "flipuv", 0,
- "Flip the chroma planes in the output");
+ "Flip the chroma planes in the output");
static const arg_def_t noblitarg = ARG_DEF(NULL, "noblit", 0,
- "Don't process the decoded frames");
+ "Don't process the decoded frames");
static const arg_def_t progressarg = ARG_DEF(NULL, "progress", 0,
- "Show progress after each frame decodes");
+ "Show progress after each frame decodes");
static const arg_def_t limitarg = ARG_DEF(NULL, "limit", 1,
- "Stop decoding after n frames");
+ "Stop decoding after n frames");
+static const arg_def_t skiparg = ARG_DEF(NULL, "skip", 1,
+ "Skip the first n input frames");
static const arg_def_t postprocarg = ARG_DEF(NULL, "postproc", 0,
- "Postprocess decoded frames");
+ "Postprocess decoded frames");
static const arg_def_t summaryarg = ARG_DEF(NULL, "summary", 0,
- "Show timing summary");
+ "Show timing summary");
static const arg_def_t outputfile = ARG_DEF("o", "output", 1,
- "Output file name pattern (see below)");
+ "Output file name pattern (see below)");
static const arg_def_t threadsarg = ARG_DEF("t", "threads", 1,
- "Max threads to use");
+ "Max threads to use");
static const arg_def_t verbosearg = ARG_DEF("v", "verbose", 0,
- "Show version string");
+ "Show version string");
static const arg_def_t error_concealment = ARG_DEF(NULL, "error-concealment", 0,
- "Enable decoder error-concealment");
+ "Enable decoder error-concealment");
#if CONFIG_MD5
static const arg_def_t md5arg = ARG_DEF(NULL, "md5", 0,
"Compute the MD5 sum of the decoded frame");
#endif
-static const arg_def_t *all_args[] =
-{
- &codecarg, &use_yv12, &use_i420, &flipuvarg, &noblitarg,
- &progressarg, &limitarg, &postprocarg, &summaryarg, &outputfile,
- &threadsarg, &verbosearg,
+static const arg_def_t *all_args[] = {
+ &codecarg, &use_yv12, &use_i420, &flipuvarg, &noblitarg,
+ &progressarg, &limitarg, &skiparg, &postprocarg, &summaryarg, &outputfile,
+ &threadsarg, &verbosearg,
#if CONFIG_MD5
- &md5arg,
+ &md5arg,
#endif
- &error_concealment,
- NULL
+ &error_concealment,
+ NULL
};
#if CONFIG_VP8_DECODER
static const arg_def_t addnoise_level = ARG_DEF(NULL, "noise-level", 1,
- "Enable VP8 postproc add noise");
+ "Enable VP8 postproc add noise");
static const arg_def_t deblock = ARG_DEF(NULL, "deblock", 0,
- "Enable VP8 deblocking");
+ "Enable VP8 deblocking");
static const arg_def_t demacroblock_level = ARG_DEF(NULL, "demacroblock-level", 1,
- "Enable VP8 demacroblocking, w/ level");
+ "Enable VP8 demacroblocking, w/ level");
static const arg_def_t pp_debug_info = ARG_DEF(NULL, "pp-debug-info", 1,
- "Enable VP8 visible debug info");
+ "Enable VP8 visible debug info");
static const arg_def_t pp_disp_ref_frame = ARG_DEF(NULL, "pp-dbg-ref-frame", 1,
- "Display only selected reference frame per macro block");
+ "Display only selected reference frame per macro block");
static const arg_def_t pp_disp_mb_modes = ARG_DEF(NULL, "pp-dbg-mb-modes", 1,
- "Display only selected macro block modes");
+ "Display only selected macro block modes");
static const arg_def_t pp_disp_b_modes = ARG_DEF(NULL, "pp-dbg-b-modes", 1,
- "Display only selected block modes");
+ "Display only selected block modes");
static const arg_def_t pp_disp_mvs = ARG_DEF(NULL, "pp-dbg-mvs", 1,
- "Draw only selected motion vectors");
+ "Draw only selected motion vectors");
static const arg_def_t mfqe = ARG_DEF(NULL, "mfqe", 0,
- "Enable multiframe quality enhancement");
+ "Enable multiframe quality enhancement");
-static const arg_def_t *vp8_pp_args[] =
-{
- &addnoise_level, &deblock, &demacroblock_level, &pp_debug_info,
- &pp_disp_ref_frame, &pp_disp_mb_modes, &pp_disp_b_modes, &pp_disp_mvs, &mfqe,
- NULL
+static const arg_def_t *vp8_pp_args[] = {
+ &addnoise_level, &deblock, &demacroblock_level, &pp_debug_info,
+ &pp_disp_ref_frame, &pp_disp_mb_modes, &pp_disp_b_modes, &pp_disp_mvs, &mfqe,
+ NULL
};
#endif
-static void usage_exit()
-{
- int i;
+static void usage_exit() {
+ int i;
- fprintf(stderr, "Usage: %s <options> filename\n\n"
- "Options:\n", exec_name);
- arg_show_usage(stderr, all_args);
+ fprintf(stderr, "Usage: %s <options> filename\n\n"
+ "Options:\n", exec_name);
+ arg_show_usage(stderr, all_args);
#if CONFIG_VP8_DECODER
- fprintf(stderr, "\nVP8 Postprocessing Options:\n");
- arg_show_usage(stderr, vp8_pp_args);
+ fprintf(stderr, "\nVP8 Postprocessing Options:\n");
+ arg_show_usage(stderr, vp8_pp_args);
#endif
- fprintf(stderr,
- "\nOutput File Patterns:\n\n"
- " The -o argument specifies the name of the file(s) to "
- "write to. If the\n argument does not include any escape "
- "characters, the output will be\n written to a single file. "
- "Otherwise, the filename will be calculated by\n expanding "
- "the following escape characters:\n");
- fprintf(stderr,
- "\n\t%%w - Frame width"
- "\n\t%%h - Frame height"
- "\n\t%%<n> - Frame number, zero padded to <n> places (1..9)"
- "\n\n Pattern arguments are only supported in conjunction "
- "with the --yv12 and\n --i420 options. If the -o option is "
- "not specified, the output will be\n directed to stdout.\n"
- );
- fprintf(stderr, "\nIncluded decoders:\n\n");
+ fprintf(stderr,
+ "\nOutput File Patterns:\n\n"
+ " The -o argument specifies the name of the file(s) to "
+ "write to. If the\n argument does not include any escape "
+ "characters, the output will be\n written to a single file. "
+ "Otherwise, the filename will be calculated by\n expanding "
+ "the following escape characters:\n");
+ fprintf(stderr,
+ "\n\t%%w - Frame width"
+ "\n\t%%h - Frame height"
+ "\n\t%%<n> - Frame number, zero padded to <n> places (1..9)"
+ "\n\n Pattern arguments are only supported in conjunction "
+ "with the --yv12 and\n --i420 options. If the -o option is "
+ "not specified, the output will be\n directed to stdout.\n"
+ );
+ fprintf(stderr, "\nIncluded decoders:\n\n");
- for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
- fprintf(stderr, " %-6s - %s\n",
- ifaces[i].name,
- vpx_codec_iface_name(ifaces[i].iface));
+ for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
+ fprintf(stderr, " %-6s - %s\n",
+ ifaces[i].name,
+ vpx_codec_iface_name(ifaces[i].iface()));
- exit(EXIT_FAILURE);
+ exit(EXIT_FAILURE);
}
-void die(const char *fmt, ...)
-{
- va_list ap;
- va_start(ap, fmt);
- vfprintf(stderr, fmt, ap);
- fprintf(stderr, "\n");
- usage_exit();
+void die(const char *fmt, ...) {
+ va_list ap;
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ usage_exit();
}
-static unsigned int mem_get_le16(const void *vmem)
-{
- unsigned int val;
- const unsigned char *mem = (const unsigned char *)vmem;
+static unsigned int mem_get_le16(const void *vmem) {
+ unsigned int val;
+ const unsigned char *mem = (const unsigned char *)vmem;
- val = mem[1] << 8;
- val |= mem[0];
- return val;
+ val = mem[1] << 8;
+ val |= mem[0];
+ return val;
}
-static unsigned int mem_get_le32(const void *vmem)
-{
- unsigned int val;
- const unsigned char *mem = (const unsigned char *)vmem;
+static unsigned int mem_get_le32(const void *vmem) {
+ unsigned int val;
+ const unsigned char *mem = (const unsigned char *)vmem;
- val = mem[3] << 24;
- val |= mem[2] << 16;
- val |= mem[1] << 8;
- val |= mem[0];
- return val;
+ val = mem[3] << 24;
+ val |= mem[2] << 16;
+ val |= mem[1] << 8;
+ val |= mem[0];
+ return val;
}
-enum file_kind
-{
- RAW_FILE,
- IVF_FILE,
- WEBM_FILE
+enum file_kind {
+ RAW_FILE,
+ IVF_FILE,
+ WEBM_FILE
};
-struct input_ctx
-{
- enum file_kind kind;
- FILE *infile;
- nestegg *nestegg_ctx;
- nestegg_packet *pkt;
- unsigned int chunk;
- unsigned int chunks;
- unsigned int video_track;
+struct input_ctx {
+ enum file_kind kind;
+ FILE *infile;
+ nestegg *nestegg_ctx;
+ nestegg_packet *pkt;
+ unsigned int chunk;
+ unsigned int chunks;
+ unsigned int video_track;
};
#define IVF_FRAME_HDR_SZ (sizeof(uint32_t) + sizeof(uint64_t))
@@ -225,163 +221,136 @@
static int read_frame(struct input_ctx *input,
uint8_t **buf,
size_t *buf_sz,
- size_t *buf_alloc_sz)
-{
- char raw_hdr[IVF_FRAME_HDR_SZ];
- size_t new_buf_sz;
- FILE *infile = input->infile;
- enum file_kind kind = input->kind;
- if(kind == WEBM_FILE)
- {
- if(input->chunk >= input->chunks)
- {
- unsigned int track;
+ size_t *buf_alloc_sz) {
+ char raw_hdr[IVF_FRAME_HDR_SZ];
+ size_t new_buf_sz;
+ FILE *infile = input->infile;
+ enum file_kind kind = input->kind;
+ if (kind == WEBM_FILE) {
+ if (input->chunk >= input->chunks) {
+ unsigned int track;
- do
- {
- /* End of this packet, get another. */
- if(input->pkt)
- nestegg_free_packet(input->pkt);
+ do {
+ /* End of this packet, get another. */
+ if (input->pkt)
+ nestegg_free_packet(input->pkt);
- if(nestegg_read_packet(input->nestegg_ctx, &input->pkt) <= 0
- || nestegg_packet_track(input->pkt, &track))
- return 1;
+ if (nestegg_read_packet(input->nestegg_ctx, &input->pkt) <= 0
+ || nestegg_packet_track(input->pkt, &track))
+ return 1;
- } while(track != input->video_track);
+ } while (track != input->video_track);
- if(nestegg_packet_count(input->pkt, &input->chunks))
- return 1;
- input->chunk = 0;
- }
-
- if(nestegg_packet_data(input->pkt, input->chunk, buf, buf_sz))
- return 1;
- input->chunk++;
-
- return 0;
- }
- /* For both the raw and ivf formats, the frame size is the first 4 bytes
- * of the frame header. We just need to special case on the header
- * size.
- */
- else if (fread(raw_hdr, kind==IVF_FILE
- ? IVF_FRAME_HDR_SZ : RAW_FRAME_HDR_SZ, 1, infile) != 1)
- {
- if (!feof(infile))
- fprintf(stderr, "Failed to read frame size\n");
-
- new_buf_sz = 0;
- }
- else
- {
- new_buf_sz = mem_get_le32(raw_hdr);
-
- if (new_buf_sz > 256 * 1024 * 1024)
- {
- fprintf(stderr, "Error: Read invalid frame size (%u)\n",
- (unsigned int)new_buf_sz);
- new_buf_sz = 0;
- }
-
- if (kind == RAW_FILE && new_buf_sz > 256 * 1024)
- fprintf(stderr, "Warning: Read invalid frame size (%u)"
- " - not a raw file?\n", (unsigned int)new_buf_sz);
-
- if (new_buf_sz > *buf_alloc_sz)
- {
- uint8_t *new_buf = realloc(*buf, 2 * new_buf_sz);
-
- if (new_buf)
- {
- *buf = new_buf;
- *buf_alloc_sz = 2 * new_buf_sz;
- }
- else
- {
- fprintf(stderr, "Failed to allocate compressed data buffer\n");
- new_buf_sz = 0;
- }
- }
+ if (nestegg_packet_count(input->pkt, &input->chunks))
+ return 1;
+ input->chunk = 0;
}
- *buf_sz = new_buf_sz;
+ if (nestegg_packet_data(input->pkt, input->chunk, buf, buf_sz))
+ return 1;
+ input->chunk++;
+ return 0;
+ }
+ /* For both the raw and ivf formats, the frame size is the first 4 bytes
+ * of the frame header. We just need to special case on the header
+ * size.
+ */
+ else if (fread(raw_hdr, kind == IVF_FILE
+ ? IVF_FRAME_HDR_SZ : RAW_FRAME_HDR_SZ, 1, infile) != 1) {
if (!feof(infile))
- {
- if (fread(*buf, 1, *buf_sz, infile) != *buf_sz)
- {
- fprintf(stderr, "Failed to read full frame\n");
- return 1;
- }
+ fprintf(stderr, "Failed to read frame size\n");
- return 0;
+ new_buf_sz = 0;
+ } else {
+ new_buf_sz = mem_get_le32(raw_hdr);
+
+ if (new_buf_sz > 256 * 1024 * 1024) {
+ fprintf(stderr, "Error: Read invalid frame size (%u)\n",
+ (unsigned int)new_buf_sz);
+ new_buf_sz = 0;
}
- return 1;
+ if (kind == RAW_FILE && new_buf_sz > 256 * 1024)
+ fprintf(stderr, "Warning: Read invalid frame size (%u)"
+ " - not a raw file?\n", (unsigned int)new_buf_sz);
+
+ if (new_buf_sz > *buf_alloc_sz) {
+ uint8_t *new_buf = realloc(*buf, 2 * new_buf_sz);
+
+ if (new_buf) {
+ *buf = new_buf;
+ *buf_alloc_sz = 2 * new_buf_sz;
+ } else {
+ fprintf(stderr, "Failed to allocate compressed data buffer\n");
+ new_buf_sz = 0;
+ }
+ }
+ }
+
+ *buf_sz = new_buf_sz;
+
+ if (!feof(infile)) {
+ if (fread(*buf, 1, *buf_sz, infile) != *buf_sz) {
+ fprintf(stderr, "Failed to read full frame\n");
+ return 1;
+ }
+
+ return 0;
+ }
+
+ return 1;
}
-void *out_open(const char *out_fn, int do_md5)
-{
- void *out = NULL;
+void *out_open(const char *out_fn, int do_md5) {
+ void *out = NULL;
- if (do_md5)
- {
+ if (do_md5) {
#if CONFIG_MD5
- MD5Context *md5_ctx = out = malloc(sizeof(MD5Context));
- (void)out_fn;
- MD5Init(md5_ctx);
+ MD5Context *md5_ctx = out = malloc(sizeof(MD5Context));
+ (void)out_fn;
+ MD5Init(md5_ctx);
#endif
- }
- else
- {
- FILE *outfile = out = strcmp("-", out_fn) ? fopen(out_fn, "wb")
- : set_binary_mode(stdout);
+ } else {
+ FILE *outfile = out = strcmp("-", out_fn) ? fopen(out_fn, "wb")
+ : set_binary_mode(stdout);
- if (!outfile)
- {
- fprintf(stderr, "Failed to output file");
- exit(EXIT_FAILURE);
- }
+ if (!outfile) {
+ fprintf(stderr, "Failed to output file");
+ exit(EXIT_FAILURE);
}
+ }
- return out;
+ return out;
}
-void out_put(void *out, const uint8_t *buf, unsigned int len, int do_md5)
-{
- if (do_md5)
- {
+void out_put(void *out, const uint8_t *buf, unsigned int len, int do_md5) {
+ if (do_md5) {
#if CONFIG_MD5
- MD5Update(out, buf, len);
+ MD5Update(out, buf, len);
#endif
- }
- else
- {
- (void) fwrite(buf, 1, len, out);
- }
+ } else {
+ (void) fwrite(buf, 1, len, out);
+ }
}
-void out_close(void *out, const char *out_fn, int do_md5)
-{
- if (do_md5)
- {
+void out_close(void *out, const char *out_fn, int do_md5) {
+ if (do_md5) {
#if CONFIG_MD5
- uint8_t md5[16];
- int i;
+ uint8_t md5[16];
+ int i;
- MD5Final(md5, out);
- free(out);
+ MD5Final(md5, out);
+ free(out);
- for (i = 0; i < 16; i++)
- printf("%02x", md5[i]);
+ for (i = 0; i < 16; i++)
+ printf("%02x", md5[i]);
- printf(" %s\n", out_fn);
+ printf(" %s\n", out_fn);
#endif
- }
- else
- {
- fclose(out);
- }
+ } else {
+ fclose(out);
+ }
}
unsigned int file_is_ivf(FILE *infile,
@@ -389,56 +358,50 @@
unsigned int *width,
unsigned int *height,
unsigned int *fps_den,
- unsigned int *fps_num)
-{
- char raw_hdr[32];
- int is_ivf = 0;
+ unsigned int *fps_num) {
+ char raw_hdr[32];
+ int is_ivf = 0;
- if (fread(raw_hdr, 1, 32, infile) == 32)
- {
- if (raw_hdr[0] == 'D' && raw_hdr[1] == 'K'
- && raw_hdr[2] == 'I' && raw_hdr[3] == 'F')
- {
- is_ivf = 1;
+ if (fread(raw_hdr, 1, 32, infile) == 32) {
+ if (raw_hdr[0] == 'D' && raw_hdr[1] == 'K'
+ && raw_hdr[2] == 'I' && raw_hdr[3] == 'F') {
+ is_ivf = 1;
- if (mem_get_le16(raw_hdr + 4) != 0)
- fprintf(stderr, "Error: Unrecognized IVF version! This file may not"
- " decode properly.");
+ if (mem_get_le16(raw_hdr + 4) != 0)
+ fprintf(stderr, "Error: Unrecognized IVF version! This file may not"
+ " decode properly.");
- *fourcc = mem_get_le32(raw_hdr + 8);
- *width = mem_get_le16(raw_hdr + 12);
- *height = mem_get_le16(raw_hdr + 14);
- *fps_num = mem_get_le32(raw_hdr + 16);
- *fps_den = mem_get_le32(raw_hdr + 20);
+ *fourcc = mem_get_le32(raw_hdr + 8);
+ *width = mem_get_le16(raw_hdr + 12);
+ *height = mem_get_le16(raw_hdr + 14);
+ *fps_num = mem_get_le32(raw_hdr + 16);
+ *fps_den = mem_get_le32(raw_hdr + 20);
- /* Some versions of vpxenc used 1/(2*fps) for the timebase, so
- * we can guess the framerate using only the timebase in this
- * case. Other files would require reading ahead to guess the
- * timebase, like we do for webm.
- */
- if(*fps_num < 1000)
- {
- /* Correct for the factor of 2 applied to the timebase in the
- * encoder.
- */
- if(*fps_num&1)*fps_den<<=1;
- else *fps_num>>=1;
- }
- else
- {
- /* Don't know FPS for sure, and don't have readahead code
- * (yet?), so just default to 30fps.
- */
- *fps_num = 30;
- *fps_den = 1;
- }
- }
+ /* Some versions of vpxenc used 1/(2*fps) for the timebase, so
+ * we can guess the framerate using only the timebase in this
+ * case. Other files would require reading ahead to guess the
+ * timebase, like we do for webm.
+ */
+ if (*fps_num < 1000) {
+ /* Correct for the factor of 2 applied to the timebase in the
+ * encoder.
+ */
+ if (*fps_num & 1)*fps_den <<= 1;
+ else *fps_num >>= 1;
+ } else {
+ /* Don't know FPS for sure, and don't have readahead code
+ * (yet?), so just default to 30fps.
+ */
+ *fps_num = 30;
+ *fps_den = 1;
+ }
}
+ }
- if (!is_ivf)
- rewind(infile);
+ if (!is_ivf)
+ rewind(infile);
- return is_ivf;
+ return is_ivf;
}
@@ -447,126 +410,121 @@
unsigned int *width,
unsigned int *height,
unsigned int *fps_den,
- unsigned int *fps_num)
-{
- unsigned char buf[32];
- int is_raw = 0;
- vpx_codec_stream_info_t si;
+ unsigned int *fps_num) {
+ unsigned char buf[32];
+ int is_raw = 0;
+ vpx_codec_stream_info_t si;
- si.sz = sizeof(si);
+ si.sz = sizeof(si);
- if (fread(buf, 1, 32, infile) == 32)
- {
- int i;
+ if (fread(buf, 1, 32, infile) == 32) {
+ int i;
- if(mem_get_le32(buf) < 256 * 1024 * 1024)
- for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
- if(!vpx_codec_peek_stream_info(ifaces[i].iface,
- buf + 4, 32 - 4, &si))
- {
- is_raw = 1;
- *fourcc = ifaces[i].fourcc;
- *width = si.w;
- *height = si.h;
- *fps_num = 30;
- *fps_den = 1;
- break;
- }
- }
+ if (mem_get_le32(buf) < 256 * 1024 * 1024)
+ for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
+ if (!vpx_codec_peek_stream_info(ifaces[i].iface(),
+ buf + 4, 32 - 4, &si)) {
+ is_raw = 1;
+ *fourcc = ifaces[i].fourcc;
+ *width = si.w;
+ *height = si.h;
+ *fps_num = 30;
+ *fps_den = 1;
+ break;
+ }
+ }
- rewind(infile);
- return is_raw;
+ rewind(infile);
+ return is_raw;
}
static int
-nestegg_read_cb(void *buffer, size_t length, void *userdata)
-{
- FILE *f = userdata;
+nestegg_read_cb(void *buffer, size_t length, void *userdata) {
+ FILE *f = userdata;
- if(fread(buffer, 1, length, f) < length)
- {
- if (ferror(f))
- return -1;
- if (feof(f))
- return 0;
- }
- return 1;
+ if (fread(buffer, 1, length, f) < length) {
+ if (ferror(f))
+ return -1;
+ if (feof(f))
+ return 0;
+ }
+ return 1;
}
static int
-nestegg_seek_cb(int64_t offset, int whence, void * userdata)
-{
- switch(whence) {
- case NESTEGG_SEEK_SET: whence = SEEK_SET; break;
- case NESTEGG_SEEK_CUR: whence = SEEK_CUR; break;
- case NESTEGG_SEEK_END: whence = SEEK_END; break;
- };
- return fseek(userdata, (long)offset, whence)? -1 : 0;
+nestegg_seek_cb(int64_t offset, int whence, void *userdata) {
+ switch (whence) {
+ case NESTEGG_SEEK_SET:
+ whence = SEEK_SET;
+ break;
+ case NESTEGG_SEEK_CUR:
+ whence = SEEK_CUR;
+ break;
+ case NESTEGG_SEEK_END:
+ whence = SEEK_END;
+ break;
+ };
+ return fseek(userdata, (long)offset, whence) ? -1 : 0;
}
static int64_t
-nestegg_tell_cb(void * userdata)
-{
- return ftell(userdata);
+nestegg_tell_cb(void *userdata) {
+ return ftell(userdata);
}
static void
-nestegg_log_cb(nestegg * context, unsigned int severity, char const * format,
- ...)
-{
- va_list ap;
+nestegg_log_cb(nestegg *context, unsigned int severity, char const *format,
+ ...) {
+ va_list ap;
- va_start(ap, format);
- vfprintf(stderr, format, ap);
- fprintf(stderr, "\n");
- va_end(ap);
+ va_start(ap, format);
+ vfprintf(stderr, format, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
}
static int
webm_guess_framerate(struct input_ctx *input,
unsigned int *fps_den,
- unsigned int *fps_num)
-{
- unsigned int i;
- uint64_t tstamp=0;
+ unsigned int *fps_num) {
+ unsigned int i;
+ uint64_t tstamp = 0;
- /* Guess the framerate. Read up to 1 second, or 50 video packets,
- * whichever comes first.
- */
- for(i=0; tstamp < 1000000000 && i < 50;)
- {
- nestegg_packet * pkt;
- unsigned int track;
+ /* Guess the framerate. Read up to 1 second, or 50 video packets,
+ * whichever comes first.
+ */
+ for (i = 0; tstamp < 1000000000 && i < 50;) {
+ nestegg_packet *pkt;
+ unsigned int track;
- if(nestegg_read_packet(input->nestegg_ctx, &pkt) <= 0)
- break;
+ if (nestegg_read_packet(input->nestegg_ctx, &pkt) <= 0)
+ break;
- nestegg_packet_track(pkt, &track);
- if(track == input->video_track)
- {
- nestegg_packet_tstamp(pkt, &tstamp);
- i++;
- }
-
- nestegg_free_packet(pkt);
+ nestegg_packet_track(pkt, &track);
+ if (track == input->video_track) {
+ nestegg_packet_tstamp(pkt, &tstamp);
+ i++;
}
- if(nestegg_track_seek(input->nestegg_ctx, input->video_track, 0))
- goto fail;
+ nestegg_free_packet(pkt);
+ }
- *fps_num = (i - 1) * 1000000;
- *fps_den = (unsigned int)(tstamp / 1000);
- return 0;
+ if (nestegg_track_seek(input->nestegg_ctx, input->video_track, 0))
+ goto fail;
+
+ *fps_num = (i - 1) * 1000000;
+ *fps_den = (unsigned int)(tstamp / 1000);
+ return 0;
fail:
- nestegg_destroy(input->nestegg_ctx);
- input->nestegg_ctx = NULL;
- rewind(input->infile);
- return 1;
+ nestegg_destroy(input->nestegg_ctx);
+ input->nestegg_ctx = NULL;
+ rewind(input->infile);
+ return 1;
}
@@ -576,586 +534,556 @@
unsigned int *width,
unsigned int *height,
unsigned int *fps_den,
- unsigned int *fps_num)
-{
- unsigned int i, n;
- int track_type = -1;
+ unsigned int *fps_num) {
+ unsigned int i, n;
+ int track_type = -1;
+ int codec_id;
- nestegg_io io = {nestegg_read_cb, nestegg_seek_cb, nestegg_tell_cb, 0};
- nestegg_video_params params;
+ nestegg_io io = {nestegg_read_cb, nestegg_seek_cb, nestegg_tell_cb, 0};
+ nestegg_video_params params;
- io.userdata = input->infile;
- if(nestegg_init(&input->nestegg_ctx, io, NULL))
- goto fail;
+ io.userdata = input->infile;
+ if (nestegg_init(&input->nestegg_ctx, io, NULL))
+ goto fail;
- if(nestegg_track_count(input->nestegg_ctx, &n))
- goto fail;
+ if (nestegg_track_count(input->nestegg_ctx, &n))
+ goto fail;
- for(i=0; i<n; i++)
- {
- track_type = nestegg_track_type(input->nestegg_ctx, i);
+ for (i = 0; i < n; i++) {
+ track_type = nestegg_track_type(input->nestegg_ctx, i);
- if(track_type == NESTEGG_TRACK_VIDEO)
- break;
- else if(track_type < 0)
- goto fail;
- }
+ if (track_type == NESTEGG_TRACK_VIDEO)
+ break;
+ else if (track_type < 0)
+ goto fail;
+ }
- if(nestegg_track_codec_id(input->nestegg_ctx, i) != NESTEGG_CODEC_VP8)
- {
- fprintf(stderr, "Not VP8 video, quitting.\n");
- exit(1);
- }
-
- input->video_track = i;
-
- if(nestegg_track_video_params(input->nestegg_ctx, i, ¶ms))
- goto fail;
-
- *fps_den = 0;
- *fps_num = 0;
+ codec_id = nestegg_track_codec_id(input->nestegg_ctx, i);
+ if (codec_id == NESTEGG_CODEC_VP8) {
*fourcc = VP8_FOURCC;
- *width = params.width;
- *height = params.height;
- return 1;
+ } else if (codec_id == NESTEGG_CODEC_VP9) {
+ *fourcc = VP9_FOURCC;
+ } else {
+ fprintf(stderr, "Not VPx video, quitting.\n");
+ exit(1);
+ }
+
+ input->video_track = i;
+
+ if (nestegg_track_video_params(input->nestegg_ctx, i, ¶ms))
+ goto fail;
+
+ *fps_den = 0;
+ *fps_num = 0;
+ *width = params.width;
+ *height = params.height;
+ return 1;
fail:
- input->nestegg_ctx = NULL;
- rewind(input->infile);
- return 0;
+ input->nestegg_ctx = NULL;
+ rewind(input->infile);
+ return 0;
}
-void show_progress(int frame_in, int frame_out, unsigned long dx_time)
-{
- fprintf(stderr, "%d decoded frames/%d showed frames in %lu us (%.2f fps)\r",
- frame_in, frame_out, dx_time,
- (float)frame_out * 1000000.0 / (float)dx_time);
+void show_progress(int frame_in, int frame_out, unsigned long dx_time) {
+ fprintf(stderr, "%d decoded frames/%d showed frames in %lu us (%.2f fps)\r",
+ frame_in, frame_out, dx_time,
+ (float)frame_out * 1000000.0 / (float)dx_time);
}
void generate_filename(const char *pattern, char *out, size_t q_len,
unsigned int d_w, unsigned int d_h,
- unsigned int frame_in)
-{
- const char *p = pattern;
- char *q = out;
+ unsigned int frame_in) {
+ const char *p = pattern;
+ char *q = out;
- do
- {
- char *next_pat = strchr(p, '%');
+ do {
+ char *next_pat = strchr(p, '%');
- if(p == next_pat)
- {
- size_t pat_len;
+ if (p == next_pat) {
+ size_t pat_len;
- /* parse the pattern */
- q[q_len - 1] = '\0';
- switch(p[1])
- {
- case 'w': snprintf(q, q_len - 1, "%d", d_w); break;
- case 'h': snprintf(q, q_len - 1, "%d", d_h); break;
- case '1': snprintf(q, q_len - 1, "%d", frame_in); break;
- case '2': snprintf(q, q_len - 1, "%02d", frame_in); break;
- case '3': snprintf(q, q_len - 1, "%03d", frame_in); break;
- case '4': snprintf(q, q_len - 1, "%04d", frame_in); break;
- case '5': snprintf(q, q_len - 1, "%05d", frame_in); break;
- case '6': snprintf(q, q_len - 1, "%06d", frame_in); break;
- case '7': snprintf(q, q_len - 1, "%07d", frame_in); break;
- case '8': snprintf(q, q_len - 1, "%08d", frame_in); break;
- case '9': snprintf(q, q_len - 1, "%09d", frame_in); break;
- default:
- die("Unrecognized pattern %%%c\n", p[1]);
- }
+ /* parse the pattern */
+ q[q_len - 1] = '\0';
+ switch (p[1]) {
+ case 'w':
+ snprintf(q, q_len - 1, "%d", d_w);
+ break;
+ case 'h':
+ snprintf(q, q_len - 1, "%d", d_h);
+ break;
+ case '1':
+ snprintf(q, q_len - 1, "%d", frame_in);
+ break;
+ case '2':
+ snprintf(q, q_len - 1, "%02d", frame_in);
+ break;
+ case '3':
+ snprintf(q, q_len - 1, "%03d", frame_in);
+ break;
+ case '4':
+ snprintf(q, q_len - 1, "%04d", frame_in);
+ break;
+ case '5':
+ snprintf(q, q_len - 1, "%05d", frame_in);
+ break;
+ case '6':
+ snprintf(q, q_len - 1, "%06d", frame_in);
+ break;
+ case '7':
+ snprintf(q, q_len - 1, "%07d", frame_in);
+ break;
+ case '8':
+ snprintf(q, q_len - 1, "%08d", frame_in);
+ break;
+ case '9':
+ snprintf(q, q_len - 1, "%09d", frame_in);
+ break;
+ default:
+ die("Unrecognized pattern %%%c\n", p[1]);
+ }
- pat_len = strlen(q);
- if(pat_len >= q_len - 1)
- die("Output filename too long.\n");
- q += pat_len;
- p += 2;
- q_len -= pat_len;
- }
- else
- {
- size_t copy_len;
+ pat_len = strlen(q);
+ if (pat_len >= q_len - 1)
+ die("Output filename too long.\n");
+ q += pat_len;
+ p += 2;
+ q_len -= pat_len;
+ } else {
+ size_t copy_len;
- /* copy the next segment */
- if(!next_pat)
- copy_len = strlen(p);
- else
- copy_len = next_pat - p;
+ /* copy the next segment */
+ if (!next_pat)
+ copy_len = strlen(p);
+ else
+ copy_len = next_pat - p;
- if(copy_len >= q_len - 1)
- die("Output filename too long.\n");
+ if (copy_len >= q_len - 1)
+ die("Output filename too long.\n");
- memcpy(q, p, copy_len);
- q[copy_len] = '\0';
- q += copy_len;
- p += copy_len;
- q_len -= copy_len;
- }
- } while(*p);
+ memcpy(q, p, copy_len);
+ q[copy_len] = '\0';
+ q += copy_len;
+ p += copy_len;
+ q_len -= copy_len;
+ }
+ } while (*p);
}
-int main(int argc, const char **argv_)
-{
- vpx_codec_ctx_t decoder;
- char *fn = NULL;
- int i;
- uint8_t *buf = NULL;
- size_t buf_sz = 0, buf_alloc_sz = 0;
- FILE *infile;
- int frame_in = 0, frame_out = 0, flipuv = 0, noblit = 0, do_md5 = 0, progress = 0;
- int stop_after = 0, postproc = 0, summary = 0, quiet = 1;
- int ec_enabled = 0;
- vpx_codec_iface_t *iface = NULL;
- unsigned int fourcc;
- unsigned long dx_time = 0;
- struct arg arg;
- char **argv, **argi, **argj;
- const char *outfile_pattern = 0;
- char outfile[PATH_MAX];
- int single_file;
- int use_y4m = 1;
- unsigned int width;
- unsigned int height;
- unsigned int fps_den;
- unsigned int fps_num;
- void *out = NULL;
- vpx_codec_dec_cfg_t cfg = {0};
+int main(int argc, const char **argv_) {
+ vpx_codec_ctx_t decoder;
+ char *fn = NULL;
+ int i;
+ uint8_t *buf = NULL;
+ size_t buf_sz = 0, buf_alloc_sz = 0;
+ FILE *infile;
+ int frame_in = 0, frame_out = 0, flipuv = 0, noblit = 0, do_md5 = 0, progress = 0;
+ int stop_after = 0, postproc = 0, summary = 0, quiet = 1;
+ int arg_skip = 0;
+ int ec_enabled = 0;
+ vpx_codec_iface_t *iface = NULL;
+ unsigned int fourcc;
+ unsigned long dx_time = 0;
+ struct arg arg;
+ char **argv, **argi, **argj;
+ const char *outfile_pattern = 0;
+ char outfile[PATH_MAX];
+ int single_file;
+ int use_y4m = 1;
+ unsigned int width;
+ unsigned int height;
+ unsigned int fps_den;
+ unsigned int fps_num;
+ void *out = NULL;
+ vpx_codec_dec_cfg_t cfg = {0};
#if CONFIG_VP8_DECODER
- vp8_postproc_cfg_t vp8_pp_cfg = {0};
- int vp8_dbg_color_ref_frame = 0;
- int vp8_dbg_color_mb_modes = 0;
- int vp8_dbg_color_b_modes = 0;
- int vp8_dbg_display_mv = 0;
+ vp8_postproc_cfg_t vp8_pp_cfg = {0};
+ int vp8_dbg_color_ref_frame = 0;
+ int vp8_dbg_color_mb_modes = 0;
+ int vp8_dbg_color_b_modes = 0;
+ int vp8_dbg_display_mv = 0;
#endif
- struct input_ctx input = {0};
- int frames_corrupted = 0;
- int dec_flags = 0;
+ struct input_ctx input = {0};
+ int frames_corrupted = 0;
+ int dec_flags = 0;
- /* Parse command line */
- exec_name = argv_[0];
- argv = argv_dup(argc - 1, argv_ + 1);
+ /* Parse command line */
+ exec_name = argv_[0];
+ argv = argv_dup(argc - 1, argv_ + 1);
- for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step)
- {
- memset(&arg, 0, sizeof(arg));
- arg.argv_step = 1;
+ for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
+ memset(&arg, 0, sizeof(arg));
+ arg.argv_step = 1;
- if (arg_match(&arg, &codecarg, argi))
- {
- int j, k = -1;
+ if (arg_match(&arg, &codecarg, argi)) {
+ int j, k = -1;
- for (j = 0; j < sizeof(ifaces) / sizeof(ifaces[0]); j++)
- if (!strcmp(ifaces[j].name, arg.val))
- k = j;
+ for (j = 0; j < sizeof(ifaces) / sizeof(ifaces[0]); j++)
+ if (!strcmp(ifaces[j].name, arg.val))
+ k = j;
- if (k >= 0)
- iface = ifaces[k].iface;
- else
- die("Error: Unrecognized argument (%s) to --codec\n",
- arg.val);
- }
- else if (arg_match(&arg, &outputfile, argi))
- outfile_pattern = arg.val;
- else if (arg_match(&arg, &use_yv12, argi))
- {
- use_y4m = 0;
- flipuv = 1;
- }
- else if (arg_match(&arg, &use_i420, argi))
- {
- use_y4m = 0;
- flipuv = 0;
- }
- else if (arg_match(&arg, &flipuvarg, argi))
- flipuv = 1;
- else if (arg_match(&arg, &noblitarg, argi))
- noblit = 1;
- else if (arg_match(&arg, &progressarg, argi))
- progress = 1;
- else if (arg_match(&arg, &limitarg, argi))
- stop_after = arg_parse_uint(&arg);
- else if (arg_match(&arg, &postprocarg, argi))
- postproc = 1;
- else if (arg_match(&arg, &md5arg, argi))
- do_md5 = 1;
- else if (arg_match(&arg, &summaryarg, argi))
- summary = 1;
- else if (arg_match(&arg, &threadsarg, argi))
- cfg.threads = arg_parse_uint(&arg);
- else if (arg_match(&arg, &verbosearg, argi))
- quiet = 0;
+ if (k >= 0)
+ iface = ifaces[k].iface();
+ else
+ die("Error: Unrecognized argument (%s) to --codec\n",
+ arg.val);
+ } else if (arg_match(&arg, &outputfile, argi))
+ outfile_pattern = arg.val;
+ else if (arg_match(&arg, &use_yv12, argi)) {
+ use_y4m = 0;
+ flipuv = 1;
+ } else if (arg_match(&arg, &use_i420, argi)) {
+ use_y4m = 0;
+ flipuv = 0;
+ } else if (arg_match(&arg, &flipuvarg, argi))
+ flipuv = 1;
+ else if (arg_match(&arg, &noblitarg, argi))
+ noblit = 1;
+ else if (arg_match(&arg, &progressarg, argi))
+ progress = 1;
+ else if (arg_match(&arg, &limitarg, argi))
+ stop_after = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &skiparg, argi))
+ arg_skip = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &postprocarg, argi))
+ postproc = 1;
+ else if (arg_match(&arg, &md5arg, argi))
+ do_md5 = 1;
+ else if (arg_match(&arg, &summaryarg, argi))
+ summary = 1;
+ else if (arg_match(&arg, &threadsarg, argi))
+ cfg.threads = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &verbosearg, argi))
+ quiet = 0;
#if CONFIG_VP8_DECODER
- else if (arg_match(&arg, &addnoise_level, argi))
- {
- postproc = 1;
- vp8_pp_cfg.post_proc_flag |= VP8_ADDNOISE;
- vp8_pp_cfg.noise_level = arg_parse_uint(&arg);
- }
- else if (arg_match(&arg, &demacroblock_level, argi))
- {
- postproc = 1;
- vp8_pp_cfg.post_proc_flag |= VP8_DEMACROBLOCK;
- vp8_pp_cfg.deblocking_level = arg_parse_uint(&arg);
- }
- else if (arg_match(&arg, &deblock, argi))
- {
- postproc = 1;
- vp8_pp_cfg.post_proc_flag |= VP8_DEBLOCK;
- }
- else if (arg_match(&arg, &mfqe, argi))
- {
- postproc = 1;
- vp8_pp_cfg.post_proc_flag |= VP8_MFQE;
- }
- else if (arg_match(&arg, &pp_debug_info, argi))
- {
- unsigned int level = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &addnoise_level, argi)) {
+ postproc = 1;
+ vp8_pp_cfg.post_proc_flag |= VP8_ADDNOISE;
+ vp8_pp_cfg.noise_level = arg_parse_uint(&arg);
+ } else if (arg_match(&arg, &demacroblock_level, argi)) {
+ postproc = 1;
+ vp8_pp_cfg.post_proc_flag |= VP8_DEMACROBLOCK;
+ vp8_pp_cfg.deblocking_level = arg_parse_uint(&arg);
+ } else if (arg_match(&arg, &deblock, argi)) {
+ postproc = 1;
+ vp8_pp_cfg.post_proc_flag |= VP8_DEBLOCK;
+ } else if (arg_match(&arg, &mfqe, argi)) {
+ postproc = 1;
+ vp8_pp_cfg.post_proc_flag |= VP8_MFQE;
+ } else if (arg_match(&arg, &pp_debug_info, argi)) {
+ unsigned int level = arg_parse_uint(&arg);
- postproc = 1;
- vp8_pp_cfg.post_proc_flag &= ~0x7;
+ postproc = 1;
+ vp8_pp_cfg.post_proc_flag &= ~0x7;
- if (level)
- vp8_pp_cfg.post_proc_flag |= level;
- }
- else if (arg_match(&arg, &pp_disp_ref_frame, argi))
- {
- unsigned int flags = arg_parse_int(&arg);
- if (flags)
- {
- postproc = 1;
- vp8_dbg_color_ref_frame = flags;
- }
- }
- else if (arg_match(&arg, &pp_disp_mb_modes, argi))
- {
- unsigned int flags = arg_parse_int(&arg);
- if (flags)
- {
- postproc = 1;
- vp8_dbg_color_mb_modes = flags;
- }
- }
- else if (arg_match(&arg, &pp_disp_b_modes, argi))
- {
- unsigned int flags = arg_parse_int(&arg);
- if (flags)
- {
- postproc = 1;
- vp8_dbg_color_b_modes = flags;
- }
- }
- else if (arg_match(&arg, &pp_disp_mvs, argi))
- {
- unsigned int flags = arg_parse_int(&arg);
- if (flags)
- {
- postproc = 1;
- vp8_dbg_display_mv = flags;
- }
- }
- else if (arg_match(&arg, &error_concealment, argi))
- {
- ec_enabled = 1;
- }
+ if (level)
+ vp8_pp_cfg.post_proc_flag |= level;
+ } else if (arg_match(&arg, &pp_disp_ref_frame, argi)) {
+ unsigned int flags = arg_parse_int(&arg);
+ if (flags) {
+ postproc = 1;
+ vp8_dbg_color_ref_frame = flags;
+ }
+ } else if (arg_match(&arg, &pp_disp_mb_modes, argi)) {
+ unsigned int flags = arg_parse_int(&arg);
+ if (flags) {
+ postproc = 1;
+ vp8_dbg_color_mb_modes = flags;
+ }
+ } else if (arg_match(&arg, &pp_disp_b_modes, argi)) {
+ unsigned int flags = arg_parse_int(&arg);
+ if (flags) {
+ postproc = 1;
+ vp8_dbg_color_b_modes = flags;
+ }
+ } else if (arg_match(&arg, &pp_disp_mvs, argi)) {
+ unsigned int flags = arg_parse_int(&arg);
+ if (flags) {
+ postproc = 1;
+ vp8_dbg_display_mv = flags;
+ }
+ } else if (arg_match(&arg, &error_concealment, argi)) {
+ ec_enabled = 1;
+ }
#endif
- else
- argj++;
- }
-
- /* Check for unrecognized options */
- for (argi = argv; *argi; argi++)
- if (argi[0][0] == '-' && strlen(argi[0]) > 1)
- die("Error: Unrecognized option %s\n", *argi);
-
- /* Handle non-option arguments */
- fn = argv[0];
-
- if (!fn)
- usage_exit();
-
- /* Open file */
- infile = strcmp(fn, "-") ? fopen(fn, "rb") : set_binary_mode(stdin);
-
- if (!infile)
- {
- fprintf(stderr, "Failed to open file '%s'",
- strcmp(fn, "-") ? fn : "stdin");
- return EXIT_FAILURE;
- }
-#if CONFIG_OS_SUPPORT
- /* Make sure we don't dump to the terminal, unless forced to with -o - */
- if(!outfile_pattern && isatty(fileno(stdout)) && !do_md5 && !noblit)
- {
- fprintf(stderr,
- "Not dumping raw video to your terminal. Use '-o -' to "
- "override.\n");
- return EXIT_FAILURE;
- }
-#endif
- input.infile = infile;
- if(file_is_ivf(infile, &fourcc, &width, &height, &fps_den,
- &fps_num))
- input.kind = IVF_FILE;
- else if(file_is_webm(&input, &fourcc, &width, &height, &fps_den, &fps_num))
- input.kind = WEBM_FILE;
- else if(file_is_raw(infile, &fourcc, &width, &height, &fps_den, &fps_num))
- input.kind = RAW_FILE;
else
- {
- fprintf(stderr, "Unrecognized input file type.\n");
+ argj++;
+ }
+
+ /* Check for unrecognized options */
+ for (argi = argv; *argi; argi++)
+ if (argi[0][0] == '-' && strlen(argi[0]) > 1)
+ die("Error: Unrecognized option %s\n", *argi);
+
+ /* Handle non-option arguments */
+ fn = argv[0];
+
+ if (!fn)
+ usage_exit();
+
+ /* Open file */
+ infile = strcmp(fn, "-") ? fopen(fn, "rb") : set_binary_mode(stdin);
+
+ if (!infile) {
+ fprintf(stderr, "Failed to open file '%s'",
+ strcmp(fn, "-") ? fn : "stdin");
+ return EXIT_FAILURE;
+ }
+#if CONFIG_OS_SUPPORT
+ /* Make sure we don't dump to the terminal, unless forced to with -o - */
+ if (!outfile_pattern && isatty(fileno(stdout)) && !do_md5 && !noblit) {
+ fprintf(stderr,
+ "Not dumping raw video to your terminal. Use '-o -' to "
+ "override.\n");
+ return EXIT_FAILURE;
+ }
+#endif
+ input.infile = infile;
+ if (file_is_ivf(infile, &fourcc, &width, &height, &fps_den,
+ &fps_num))
+ input.kind = IVF_FILE;
+ else if (file_is_webm(&input, &fourcc, &width, &height, &fps_den, &fps_num))
+ input.kind = WEBM_FILE;
+ else if (file_is_raw(infile, &fourcc, &width, &height, &fps_den, &fps_num))
+ input.kind = RAW_FILE;
+ else {
+ fprintf(stderr, "Unrecognized input file type.\n");
+ return EXIT_FAILURE;
+ }
+
+ /* If the output file is not set or doesn't have a sequence number in
+ * it, then we only open it once.
+ */
+ outfile_pattern = outfile_pattern ? outfile_pattern : "-";
+ single_file = 1;
+ {
+ const char *p = outfile_pattern;
+ do {
+ p = strchr(p, '%');
+ if (p && p[1] >= '1' && p[1] <= '9') {
+ /* pattern contains sequence number, so it's not unique. */
+ single_file = 0;
+ break;
+ }
+ if (p)
+ p++;
+ } while (p);
+ }
+
+ if (single_file && !noblit) {
+ generate_filename(outfile_pattern, outfile, sizeof(outfile) - 1,
+ width, height, 0);
+ out = out_open(outfile, do_md5);
+ }
+
+ if (use_y4m && !noblit) {
+ char buffer[128];
+ if (!single_file) {
+ fprintf(stderr, "YUV4MPEG2 not supported with output patterns,"
+ " try --i420 or --yv12.\n");
+ return EXIT_FAILURE;
+ }
+
+ if (input.kind == WEBM_FILE)
+ if (webm_guess_framerate(&input, &fps_den, &fps_num)) {
+ fprintf(stderr, "Failed to guess framerate -- error parsing "
+ "webm file?\n");
return EXIT_FAILURE;
+ }
+
+
+ /*Note: We can't output an aspect ratio here because IVF doesn't
+ store one, and neither does VP8.
+ That will have to wait until these tools support WebM natively.*/
+ sprintf(buffer, "YUV4MPEG2 C%s W%u H%u F%u:%u I%c\n",
+ "420jpeg", width, height, fps_num, fps_den, 'p');
+ out_put(out, (unsigned char *)buffer,
+ (unsigned int)strlen(buffer), do_md5);
+ }
+
+ /* Try to determine the codec from the fourcc. */
+ for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
+ if ((fourcc & ifaces[i].fourcc_mask) == ifaces[i].fourcc) {
+ vpx_codec_iface_t *ivf_iface = ifaces[i].iface();
+
+ if (iface && iface != ivf_iface)
+ fprintf(stderr, "Notice -- IVF header indicates codec: %s\n",
+ ifaces[i].name);
+ else
+ iface = ivf_iface;
+
+ break;
}
- /* If the output file is not set or doesn't have a sequence number in
- * it, then we only open it once.
- */
- outfile_pattern = outfile_pattern ? outfile_pattern : "-";
- single_file = 1;
- {
- const char *p = outfile_pattern;
- do
- {
- p = strchr(p, '%');
- if(p && p[1] >= '1' && p[1] <= '9')
- {
- /* pattern contains sequence number, so it's not unique. */
- single_file = 0;
- break;
- }
- if(p)
- p++;
- } while(p);
- }
+ dec_flags = (postproc ? VPX_CODEC_USE_POSTPROC : 0) |
+ (ec_enabled ? VPX_CODEC_USE_ERROR_CONCEALMENT : 0);
+ if (vpx_codec_dec_init(&decoder, iface ? iface : ifaces[0].iface(), &cfg,
+ dec_flags)) {
+ fprintf(stderr, "Failed to initialize decoder: %s\n", vpx_codec_error(&decoder));
+ return EXIT_FAILURE;
+ }
- if(single_file && !noblit)
- {
- generate_filename(outfile_pattern, outfile, sizeof(outfile)-1,
- width, height, 0);
- out = out_open(outfile, do_md5);
- }
-
- if (use_y4m && !noblit)
- {
- char buffer[128];
- if (!single_file)
- {
- fprintf(stderr, "YUV4MPEG2 not supported with output patterns,"
- " try --i420 or --yv12.\n");
- return EXIT_FAILURE;
- }
-
- if(input.kind == WEBM_FILE)
- if(webm_guess_framerate(&input, &fps_den, &fps_num))
- {
- fprintf(stderr, "Failed to guess framerate -- error parsing "
- "webm file?\n");
- return EXIT_FAILURE;
- }
-
-
- /*Note: We can't output an aspect ratio here because IVF doesn't
- store one, and neither does VP8.
- That will have to wait until these tools support WebM natively.*/
- sprintf(buffer, "YUV4MPEG2 C%s W%u H%u F%u:%u I%c\n",
- "420jpeg", width, height, fps_num, fps_den, 'p');
- out_put(out, (unsigned char *)buffer,
- (unsigned int)strlen(buffer), do_md5);
- }
-
- /* Try to determine the codec from the fourcc. */
- for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
- if ((fourcc & ifaces[i].fourcc_mask) == ifaces[i].fourcc)
- {
- vpx_codec_iface_t *ivf_iface = ifaces[i].iface;
-
- if (iface && iface != ivf_iface)
- fprintf(stderr, "Notice -- IVF header indicates codec: %s\n",
- ifaces[i].name);
- else
- iface = ivf_iface;
-
- break;
- }
-
- dec_flags = (postproc ? VPX_CODEC_USE_POSTPROC : 0) |
- (ec_enabled ? VPX_CODEC_USE_ERROR_CONCEALMENT : 0);
- if (vpx_codec_dec_init(&decoder, iface ? iface : ifaces[0].iface, &cfg,
- dec_flags))
- {
- fprintf(stderr, "Failed to initialize decoder: %s\n", vpx_codec_error(&decoder));
- return EXIT_FAILURE;
- }
-
- if (!quiet)
- fprintf(stderr, "%s\n", decoder.name);
+ if (!quiet)
+ fprintf(stderr, "%s\n", decoder.name);
#if CONFIG_VP8_DECODER
- if (vp8_pp_cfg.post_proc_flag
- && vpx_codec_control(&decoder, VP8_SET_POSTPROC, &vp8_pp_cfg))
- {
- fprintf(stderr, "Failed to configure postproc: %s\n", vpx_codec_error(&decoder));
- return EXIT_FAILURE;
- }
+ if (vp8_pp_cfg.post_proc_flag
+ && vpx_codec_control(&decoder, VP8_SET_POSTPROC, &vp8_pp_cfg)) {
+ fprintf(stderr, "Failed to configure postproc: %s\n", vpx_codec_error(&decoder));
+ return EXIT_FAILURE;
+ }
- if (vp8_dbg_color_ref_frame
- && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_REF_FRAME, vp8_dbg_color_ref_frame))
- {
- fprintf(stderr, "Failed to configure reference block visualizer: %s\n", vpx_codec_error(&decoder));
- return EXIT_FAILURE;
- }
+ if (vp8_dbg_color_ref_frame
+ && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_REF_FRAME, vp8_dbg_color_ref_frame)) {
+ fprintf(stderr, "Failed to configure reference block visualizer: %s\n", vpx_codec_error(&decoder));
+ return EXIT_FAILURE;
+ }
- if (vp8_dbg_color_mb_modes
- && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_MB_MODES, vp8_dbg_color_mb_modes))
- {
- fprintf(stderr, "Failed to configure macro block visualizer: %s\n", vpx_codec_error(&decoder));
- return EXIT_FAILURE;
- }
+ if (vp8_dbg_color_mb_modes
+ && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_MB_MODES, vp8_dbg_color_mb_modes)) {
+ fprintf(stderr, "Failed to configure macro block visualizer: %s\n", vpx_codec_error(&decoder));
+ return EXIT_FAILURE;
+ }
- if (vp8_dbg_color_b_modes
- && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_B_MODES, vp8_dbg_color_b_modes))
- {
- fprintf(stderr, "Failed to configure block visualizer: %s\n", vpx_codec_error(&decoder));
- return EXIT_FAILURE;
- }
+ if (vp8_dbg_color_b_modes
+ && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_B_MODES, vp8_dbg_color_b_modes)) {
+ fprintf(stderr, "Failed to configure block visualizer: %s\n", vpx_codec_error(&decoder));
+ return EXIT_FAILURE;
+ }
- if (vp8_dbg_display_mv
- && vpx_codec_control(&decoder, VP8_SET_DBG_DISPLAY_MV, vp8_dbg_display_mv))
- {
- fprintf(stderr, "Failed to configure motion vector visualizer: %s\n", vpx_codec_error(&decoder));
- return EXIT_FAILURE;
- }
+ if (vp8_dbg_display_mv
+ && vpx_codec_control(&decoder, VP8_SET_DBG_DISPLAY_MV, vp8_dbg_display_mv)) {
+ fprintf(stderr, "Failed to configure motion vector visualizer: %s\n", vpx_codec_error(&decoder));
+ return EXIT_FAILURE;
+ }
#endif
- /* Decode file */
- while (!read_frame(&input, &buf, &buf_sz, &buf_alloc_sz))
- {
- vpx_codec_iter_t iter = NULL;
- vpx_image_t *img;
- struct vpx_usec_timer timer;
- int corrupted;
- vpx_usec_timer_start(&timer);
+ if(arg_skip)
+ fprintf(stderr, "Skiping first %d frames.\n", arg_skip);
+ while (arg_skip) {
+ if (read_frame(&input, &buf, &buf_sz, &buf_alloc_sz))
+ break;
+ arg_skip--;
+ }
- if (vpx_codec_decode(&decoder, buf, (unsigned int)buf_sz, NULL, 0))
- {
- const char *detail = vpx_codec_error_detail(&decoder);
- fprintf(stderr, "Failed to decode frame: %s\n", vpx_codec_error(&decoder));
+ /* Decode file */
+ while (!read_frame(&input, &buf, &buf_sz, &buf_alloc_sz)) {
+ vpx_codec_iter_t iter = NULL;
+ vpx_image_t *img;
+ struct vpx_usec_timer timer;
+ int corrupted;
- if (detail)
- fprintf(stderr, " Additional information: %s\n", detail);
+ vpx_usec_timer_start(&timer);
- goto fail;
- }
+ if (vpx_codec_decode(&decoder, buf, (unsigned int)buf_sz, NULL, 0)) {
+ const char *detail = vpx_codec_error_detail(&decoder);
+ fprintf(stderr, "Failed to decode frame: %s\n", vpx_codec_error(&decoder));
- vpx_usec_timer_mark(&timer);
- dx_time += (unsigned int)vpx_usec_timer_elapsed(&timer);
+ if (detail)
+ fprintf(stderr, " Additional information: %s\n", detail);
- ++frame_in;
-
- if (vpx_codec_control(&decoder, VP8D_GET_FRAME_CORRUPTED, &corrupted))
- {
- fprintf(stderr, "Failed VP8_GET_FRAME_CORRUPTED: %s\n",
- vpx_codec_error(&decoder));
- goto fail;
- }
- frames_corrupted += corrupted;
-
- vpx_usec_timer_start(&timer);
-
- if ((img = vpx_codec_get_frame(&decoder, &iter)))
- ++frame_out;
-
- vpx_usec_timer_mark(&timer);
- dx_time += (unsigned int)vpx_usec_timer_elapsed(&timer);
-
- if (progress)
- show_progress(frame_in, frame_out, dx_time);
-
- if (!noblit)
- {
- if (img)
- {
- unsigned int y;
- char out_fn[PATH_MAX];
- uint8_t *buf;
-
- if (!single_file)
- {
- size_t len = sizeof(out_fn)-1;
-
- out_fn[len] = '\0';
- generate_filename(outfile_pattern, out_fn, len-1,
- img->d_w, img->d_h, frame_in);
- out = out_open(out_fn, do_md5);
- }
- else if(use_y4m)
- out_put(out, (unsigned char *)"FRAME\n", 6, do_md5);
-
- buf = img->planes[VPX_PLANE_Y];
-
- for (y = 0; y < img->d_h; y++)
- {
- out_put(out, buf, img->d_w, do_md5);
- buf += img->stride[VPX_PLANE_Y];
- }
-
- buf = img->planes[flipuv?VPX_PLANE_V:VPX_PLANE_U];
-
- for (y = 0; y < (1 + img->d_h) / 2; y++)
- {
- out_put(out, buf, (1 + img->d_w) / 2, do_md5);
- buf += img->stride[VPX_PLANE_U];
- }
-
- buf = img->planes[flipuv?VPX_PLANE_U:VPX_PLANE_V];
-
- for (y = 0; y < (1 + img->d_h) / 2; y++)
- {
- out_put(out, buf, (1 + img->d_w) / 2, do_md5);
- buf += img->stride[VPX_PLANE_V];
- }
-
- if (!single_file)
- out_close(out, out_fn, do_md5);
- }
- }
-
- if (stop_after && frame_in >= stop_after)
- break;
+ goto fail;
}
- if (summary || progress)
- {
- show_progress(frame_in, frame_out, dx_time);
- fprintf(stderr, "\n");
+ vpx_usec_timer_mark(&timer);
+ dx_time += (unsigned int)vpx_usec_timer_elapsed(&timer);
+
+ ++frame_in;
+
+ if (vpx_codec_control(&decoder, VP8D_GET_FRAME_CORRUPTED, &corrupted)) {
+ fprintf(stderr, "Failed VP8_GET_FRAME_CORRUPTED: %s\n",
+ vpx_codec_error(&decoder));
+ goto fail;
+ }
+ frames_corrupted += corrupted;
+
+ vpx_usec_timer_start(&timer);
+
+ if ((img = vpx_codec_get_frame(&decoder, &iter)))
+ ++frame_out;
+
+ vpx_usec_timer_mark(&timer);
+ dx_time += (unsigned int)vpx_usec_timer_elapsed(&timer);
+
+ if (progress)
+ show_progress(frame_in, frame_out, dx_time);
+
+ if (!noblit) {
+ if (img) {
+ unsigned int y;
+ char out_fn[PATH_MAX];
+ uint8_t *buf;
+
+ if (!single_file) {
+ size_t len = sizeof(out_fn) - 1;
+
+ out_fn[len] = '\0';
+ generate_filename(outfile_pattern, out_fn, len - 1,
+ img->d_w, img->d_h, frame_in);
+ out = out_open(out_fn, do_md5);
+ } else if (use_y4m)
+ out_put(out, (unsigned char *)"FRAME\n", 6, do_md5);
+
+ buf = img->planes[VPX_PLANE_Y];
+
+ for (y = 0; y < img->d_h; y++) {
+ out_put(out, buf, img->d_w, do_md5);
+ buf += img->stride[VPX_PLANE_Y];
+ }
+
+ buf = img->planes[flipuv ? VPX_PLANE_V : VPX_PLANE_U];
+
+ for (y = 0; y < (1 + img->d_h) / 2; y++) {
+ out_put(out, buf, (1 + img->d_w) / 2, do_md5);
+ buf += img->stride[VPX_PLANE_U];
+ }
+
+ buf = img->planes[flipuv ? VPX_PLANE_U : VPX_PLANE_V];
+
+ for (y = 0; y < (1 + img->d_h) / 2; y++) {
+ out_put(out, buf, (1 + img->d_w) / 2, do_md5);
+ buf += img->stride[VPX_PLANE_V];
+ }
+
+ if (!single_file)
+ out_close(out, out_fn, do_md5);
+ }
}
- if (frames_corrupted)
- fprintf(stderr, "WARNING: %d frames corrupted.\n",frames_corrupted);
+ if (stop_after && frame_in >= stop_after)
+ break;
+ }
+
+ if (summary || progress) {
+ show_progress(frame_in, frame_out, dx_time);
+ fprintf(stderr, "\n");
+ }
+
+ if (frames_corrupted)
+ fprintf(stderr, "WARNING: %d frames corrupted.\n", frames_corrupted);
fail:
- if (vpx_codec_destroy(&decoder))
- {
- fprintf(stderr, "Failed to destroy decoder: %s\n", vpx_codec_error(&decoder));
- return EXIT_FAILURE;
- }
+ if (vpx_codec_destroy(&decoder)) {
+ fprintf(stderr, "Failed to destroy decoder: %s\n", vpx_codec_error(&decoder));
+ return EXIT_FAILURE;
+ }
- if (single_file && !noblit)
- out_close(out, outfile, do_md5);
+ if (single_file && !noblit)
+ out_close(out, outfile, do_md5);
- if(input.nestegg_ctx)
- nestegg_destroy(input.nestegg_ctx);
- if(input.kind != WEBM_FILE)
- free(buf);
- fclose(infile);
- free(argv);
+ if (input.nestegg_ctx)
+ nestegg_destroy(input.nestegg_ctx);
+ if (input.kind != WEBM_FILE)
+ free(buf);
+ fclose(infile);
+ free(argv);
- return frames_corrupted ? EXIT_FAILURE : EXIT_SUCCESS;
+ return frames_corrupted ? EXIT_FAILURE : EXIT_SUCCESS;
}
diff --git a/vpxenc.c b/vpxenc.c
index c9547ea..3fc8da1 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -8,10 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "vpx_config.h"
-/* This is a simple program that encodes YV12 files and generates ivf
- * files using the new interface.
- */
#if defined(_WIN32) || !CONFIG_OS_SUPPORT
#define USE_POSIX_MMAP 0
#else
@@ -25,6 +23,9 @@
#include <limits.h>
#include <assert.h>
#include "vpx/vpx_encoder.h"
+#if CONFIG_DECODERS
+#include "vpx/vpx_decoder.h"
+#endif
#if USE_POSIX_MMAP
#include <sys/types.h>
#include <sys/stat.h>
@@ -32,7 +33,14 @@
#include <fcntl.h>
#include <unistd.h>
#endif
+
+#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
#include "vpx/vp8cx.h"
+#endif
+#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
+#include "vpx/vp8dx.h"
+#endif
+
#include "vpx_ports/mem_ops.h"
#include "vpx_ports/vpx_timer.h"
#include "tools_common.h"
@@ -66,643 +74,592 @@
/* Swallow warnings about unused results of fread/fwrite */
static size_t wrap_fread(void *ptr, size_t size, size_t nmemb,
- FILE *stream)
-{
- return fread(ptr, size, nmemb, stream);
+ FILE *stream) {
+ return fread(ptr, size, nmemb, stream);
}
#define fread wrap_fread
static size_t wrap_fwrite(const void *ptr, size_t size, size_t nmemb,
- FILE *stream)
-{
- return fwrite(ptr, size, nmemb, stream);
+ FILE *stream) {
+ return fwrite(ptr, size, nmemb, stream);
}
#define fwrite wrap_fwrite
static const char *exec_name;
-static const struct codec_item
-{
- char const *name;
- vpx_codec_iface_t *iface;
- unsigned int fourcc;
-} codecs[] =
-{
-#if CONFIG_VP8_ENCODER
- {"vp8", &vpx_codec_vp8_cx_algo, 0x30385056},
+#define VP8_FOURCC (0x00385056)
+#define VP9_FOURCC (0x00395056)
+static const struct codec_item {
+ char const *name;
+ const vpx_codec_iface_t *(*iface)(void);
+ const vpx_codec_iface_t *(*dx_iface)(void);
+ unsigned int fourcc;
+} codecs[] = {
+#if CONFIG_VP8_ENCODER && CONFIG_VP8_DECODER
+ {"vp8", &vpx_codec_vp8_cx, &vpx_codec_vp8_dx, VP8_FOURCC},
+#elif CONFIG_VP8_ENCODER && !CONFIG_VP8_DECODER
+ {"vp8", &vpx_codec_vp8_cx, NULL, VP8_FOURCC},
+#endif
+#if CONFIG_VP9_ENCODER && CONFIG_VP9_DECODER
+ {"vp9", &vpx_codec_vp9_cx, &vpx_codec_vp9_dx, VP9_FOURCC},
+#elif CONFIG_VP9_ENCODER && !CONFIG_VP9_DECODER
+ {"vp9", &vpx_codec_vp9_cx, NULL, VP9_FOURCC},
#endif
};
static void usage_exit();
#define LOG_ERROR(label) do \
-{\
+ {\
const char *l=label;\
va_list ap;\
va_start(ap, fmt);\
if(l)\
- fprintf(stderr, "%s: ", l);\
+ fprintf(stderr, "%s: ", l);\
vfprintf(stderr, fmt, ap);\
fprintf(stderr, "\n");\
va_end(ap);\
-} while(0)
+ } while(0)
-void die(const char *fmt, ...)
-{
- LOG_ERROR(NULL);
- usage_exit();
+void die(const char *fmt, ...) {
+ LOG_ERROR(NULL);
+ usage_exit();
}
-void fatal(const char *fmt, ...)
-{
- LOG_ERROR("Fatal");
+void fatal(const char *fmt, ...) {
+ LOG_ERROR("Fatal");
+ exit(EXIT_FAILURE);
+}
+
+
+void warn(const char *fmt, ...) {
+ LOG_ERROR("Warning");
+}
+
+
+static void ctx_exit_on_error(vpx_codec_ctx_t *ctx, const char *s, ...) {
+ va_list ap;
+
+ va_start(ap, s);
+ if (ctx->err) {
+ const char *detail = vpx_codec_error_detail(ctx);
+
+ vfprintf(stderr, s, ap);
+ fprintf(stderr, ": %s\n", vpx_codec_error(ctx));
+
+ if (detail)
+ fprintf(stderr, " %s\n", detail);
+
exit(EXIT_FAILURE);
-}
-
-
-void warn(const char *fmt, ...)
-{
- LOG_ERROR("Warning");
-}
-
-
-static void ctx_exit_on_error(vpx_codec_ctx_t *ctx, const char *s, ...)
-{
- va_list ap;
-
- va_start(ap, s);
- if (ctx->err)
- {
- const char *detail = vpx_codec_error_detail(ctx);
-
- vfprintf(stderr, s, ap);
- fprintf(stderr, ": %s\n", vpx_codec_error(ctx));
-
- if (detail)
- fprintf(stderr, " %s\n", detail);
-
- exit(EXIT_FAILURE);
- }
+ }
}
/* This structure is used to abstract the different ways of handling
* first pass statistics.
*/
-typedef struct
-{
- vpx_fixed_buf_t buf;
- int pass;
- FILE *file;
- char *buf_ptr;
- size_t buf_alloc_sz;
+typedef struct {
+ vpx_fixed_buf_t buf;
+ int pass;
+ FILE *file;
+ char *buf_ptr;
+ size_t buf_alloc_sz;
} stats_io_t;
-int stats_open_file(stats_io_t *stats, const char *fpf, int pass)
-{
- int res;
+int stats_open_file(stats_io_t *stats, const char *fpf, int pass) {
+ int res;
- stats->pass = pass;
+ stats->pass = pass;
- if (pass == 0)
- {
- stats->file = fopen(fpf, "wb");
- stats->buf.sz = 0;
- stats->buf.buf = NULL,
- res = (stats->file != NULL);
- }
- else
- {
+ if (pass == 0) {
+ stats->file = fopen(fpf, "wb");
+ stats->buf.sz = 0;
+ stats->buf.buf = NULL,
+ res = (stats->file != NULL);
+ } else {
#if 0
#elif USE_POSIX_MMAP
- struct stat stat_buf;
- int fd;
+ struct stat stat_buf;
+ int fd;
- fd = open(fpf, O_RDONLY);
- stats->file = fdopen(fd, "rb");
- fstat(fd, &stat_buf);
- stats->buf.sz = stat_buf.st_size;
- stats->buf.buf = mmap(NULL, stats->buf.sz, PROT_READ, MAP_PRIVATE,
- fd, 0);
- res = (stats->buf.buf != NULL);
-#else
- size_t nbytes;
-
- stats->file = fopen(fpf, "rb");
-
- if (fseek(stats->file, 0, SEEK_END))
- fatal("First-pass stats file must be seekable!");
-
- stats->buf.sz = stats->buf_alloc_sz = ftell(stats->file);
- rewind(stats->file);
-
- stats->buf.buf = malloc(stats->buf_alloc_sz);
-
- if (!stats->buf.buf)
- fatal("Failed to allocate first-pass stats buffer (%lu bytes)",
- (unsigned long)stats->buf_alloc_sz);
-
- nbytes = fread(stats->buf.buf, 1, stats->buf.sz, stats->file);
- res = (nbytes == stats->buf.sz);
-#endif
- }
-
- return res;
-}
-
-int stats_open_mem(stats_io_t *stats, int pass)
-{
- int res;
- stats->pass = pass;
-
- if (!pass)
- {
- stats->buf.sz = 0;
- stats->buf_alloc_sz = 64 * 1024;
- stats->buf.buf = malloc(stats->buf_alloc_sz);
- }
-
- stats->buf_ptr = stats->buf.buf;
+ fd = open(fpf, O_RDONLY);
+ stats->file = fdopen(fd, "rb");
+ fstat(fd, &stat_buf);
+ stats->buf.sz = stat_buf.st_size;
+ stats->buf.buf = mmap(NULL, stats->buf.sz, PROT_READ, MAP_PRIVATE,
+ fd, 0);
res = (stats->buf.buf != NULL);
- return res;
+#else
+ size_t nbytes;
+
+ stats->file = fopen(fpf, "rb");
+
+ if (fseek(stats->file, 0, SEEK_END))
+ fatal("First-pass stats file must be seekable!");
+
+ stats->buf.sz = stats->buf_alloc_sz = ftell(stats->file);
+ rewind(stats->file);
+
+ stats->buf.buf = malloc(stats->buf_alloc_sz);
+
+ if (!stats->buf.buf)
+ fatal("Failed to allocate first-pass stats buffer (%lu bytes)",
+ (unsigned long)stats->buf_alloc_sz);
+
+ nbytes = fread(stats->buf.buf, 1, stats->buf.sz, stats->file);
+ res = (nbytes == stats->buf.sz);
+#endif
+ }
+
+ return res;
+}
+
+int stats_open_mem(stats_io_t *stats, int pass) {
+ int res;
+ stats->pass = pass;
+
+ if (!pass) {
+ stats->buf.sz = 0;
+ stats->buf_alloc_sz = 64 * 1024;
+ stats->buf.buf = malloc(stats->buf_alloc_sz);
+ }
+
+ stats->buf_ptr = stats->buf.buf;
+ res = (stats->buf.buf != NULL);
+ return res;
}
-void stats_close(stats_io_t *stats, int last_pass)
-{
- if (stats->file)
- {
- if (stats->pass == last_pass)
- {
+void stats_close(stats_io_t *stats, int last_pass) {
+ if (stats->file) {
+ if (stats->pass == last_pass) {
#if 0
#elif USE_POSIX_MMAP
- munmap(stats->buf.buf, stats->buf.sz);
+ munmap(stats->buf.buf, stats->buf.sz);
#else
- free(stats->buf.buf);
+ free(stats->buf.buf);
#endif
- }
+ }
- fclose(stats->file);
- stats->file = NULL;
- }
- else
- {
- if (stats->pass == last_pass)
- free(stats->buf.buf);
- }
+ fclose(stats->file);
+ stats->file = NULL;
+ } else {
+ if (stats->pass == last_pass)
+ free(stats->buf.buf);
+ }
}
-void stats_write(stats_io_t *stats, const void *pkt, size_t len)
-{
- if (stats->file)
- {
- (void) fwrite(pkt, 1, len, stats->file);
- }
- else
- {
- if (stats->buf.sz + len > stats->buf_alloc_sz)
- {
- size_t new_sz = stats->buf_alloc_sz + 64 * 1024;
- char *new_ptr = realloc(stats->buf.buf, new_sz);
+void stats_write(stats_io_t *stats, const void *pkt, size_t len) {
+ if (stats->file) {
+ (void) fwrite(pkt, 1, len, stats->file);
+ } else {
+ if (stats->buf.sz + len > stats->buf_alloc_sz) {
+ size_t new_sz = stats->buf_alloc_sz + 64 * 1024;
+ char *new_ptr = realloc(stats->buf.buf, new_sz);
- if (new_ptr)
- {
- stats->buf_ptr = new_ptr + (stats->buf_ptr - (char *)stats->buf.buf);
- stats->buf.buf = new_ptr;
- stats->buf_alloc_sz = new_sz;
- }
- else
- fatal("Failed to realloc firstpass stats buffer.");
- }
-
- memcpy(stats->buf_ptr, pkt, len);
- stats->buf.sz += len;
- stats->buf_ptr += len;
+ if (new_ptr) {
+ stats->buf_ptr = new_ptr + (stats->buf_ptr - (char *)stats->buf.buf);
+ stats->buf.buf = new_ptr;
+ stats->buf_alloc_sz = new_sz;
+ } else
+ fatal("Failed to realloc firstpass stats buffer.");
}
+
+ memcpy(stats->buf_ptr, pkt, len);
+ stats->buf.sz += len;
+ stats->buf_ptr += len;
+ }
}
-vpx_fixed_buf_t stats_get(stats_io_t *stats)
-{
- return stats->buf;
+vpx_fixed_buf_t stats_get(stats_io_t *stats) {
+ return stats->buf;
}
/* Stereo 3D packed frame format */
-typedef enum stereo_format
-{
- STEREO_FORMAT_MONO = 0,
- STEREO_FORMAT_LEFT_RIGHT = 1,
- STEREO_FORMAT_BOTTOM_TOP = 2,
- STEREO_FORMAT_TOP_BOTTOM = 3,
- STEREO_FORMAT_RIGHT_LEFT = 11
+typedef enum stereo_format {
+ STEREO_FORMAT_MONO = 0,
+ STEREO_FORMAT_LEFT_RIGHT = 1,
+ STEREO_FORMAT_BOTTOM_TOP = 2,
+ STEREO_FORMAT_TOP_BOTTOM = 3,
+ STEREO_FORMAT_RIGHT_LEFT = 11
} stereo_format_t;
-enum video_file_type
-{
- FILE_TYPE_RAW,
- FILE_TYPE_IVF,
- FILE_TYPE_Y4M
+enum video_file_type {
+ FILE_TYPE_RAW,
+ FILE_TYPE_IVF,
+ FILE_TYPE_Y4M
};
struct detect_buffer {
- char buf[4];
- size_t buf_read;
- size_t position;
+ char buf[4];
+ size_t buf_read;
+ size_t position;
};
-struct input_state
-{
- char *fn;
- FILE *file;
- y4m_input y4m;
- struct detect_buffer detect;
- enum video_file_type file_type;
- unsigned int w;
- unsigned int h;
- struct vpx_rational framerate;
- int use_i420;
+struct input_state {
+ char *fn;
+ FILE *file;
+ y4m_input y4m;
+ struct detect_buffer detect;
+ enum video_file_type file_type;
+ unsigned int w;
+ unsigned int h;
+ struct vpx_rational framerate;
+ int use_i420;
};
#define IVF_FRAME_HDR_SZ (4+8) /* 4 byte size + 8 byte timestamp */
-static int read_frame(struct input_state *input, vpx_image_t *img)
-{
- FILE *f = input->file;
- enum video_file_type file_type = input->file_type;
- y4m_input *y4m = &input->y4m;
- struct detect_buffer *detect = &input->detect;
- int plane = 0;
- int shortread = 0;
+static int read_frame(struct input_state *input, vpx_image_t *img) {
+ FILE *f = input->file;
+ enum video_file_type file_type = input->file_type;
+ y4m_input *y4m = &input->y4m;
+ struct detect_buffer *detect = &input->detect;
+ int plane = 0;
+ int shortread = 0;
- if (file_type == FILE_TYPE_Y4M)
- {
- if (y4m_input_fetch_frame(y4m, f, img) < 1)
- return 0;
- }
- else
- {
- if (file_type == FILE_TYPE_IVF)
- {
- char junk[IVF_FRAME_HDR_SZ];
+ if (file_type == FILE_TYPE_Y4M) {
+ if (y4m_input_fetch_frame(y4m, f, img) < 1)
+ return 0;
+ } else {
+ if (file_type == FILE_TYPE_IVF) {
+ char junk[IVF_FRAME_HDR_SZ];
- /* Skip the frame header. We know how big the frame should be. See
- * write_ivf_frame_header() for documentation on the frame header
- * layout.
- */
- (void) fread(junk, 1, IVF_FRAME_HDR_SZ, f);
- }
-
- for (plane = 0; plane < 3; plane++)
- {
- unsigned char *ptr;
- int w = (plane ? (1 + img->d_w) / 2 : img->d_w);
- int h = (plane ? (1 + img->d_h) / 2 : img->d_h);
- int r;
-
- /* Determine the correct plane based on the image format. The for-loop
- * always counts in Y,U,V order, but this may not match the order of
- * the data on disk.
- */
- switch (plane)
- {
- case 1:
- ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12? VPX_PLANE_V : VPX_PLANE_U];
- break;
- case 2:
- ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12?VPX_PLANE_U : VPX_PLANE_V];
- break;
- default:
- ptr = img->planes[plane];
- }
-
- for (r = 0; r < h; r++)
- {
- size_t needed = w;
- size_t buf_position = 0;
- const size_t left = detect->buf_read - detect->position;
- if (left > 0)
- {
- const size_t more = (left < needed) ? left : needed;
- memcpy(ptr, detect->buf + detect->position, more);
- buf_position = more;
- needed -= more;
- detect->position += more;
- }
- if (needed > 0)
- {
- shortread |= (fread(ptr + buf_position, 1, needed, f) < needed);
- }
-
- ptr += img->stride[plane];
- }
- }
+ /* Skip the frame header. We know how big the frame should be. See
+ * write_ivf_frame_header() for documentation on the frame header
+ * layout.
+ */
+ (void) fread(junk, 1, IVF_FRAME_HDR_SZ, f);
}
- return !shortread;
+ for (plane = 0; plane < 3; plane++) {
+ unsigned char *ptr;
+ int w = (plane ? (1 + img->d_w) / 2 : img->d_w);
+ int h = (plane ? (1 + img->d_h) / 2 : img->d_h);
+ int r;
+
+ /* Determine the correct plane based on the image format. The for-loop
+ * always counts in Y,U,V order, but this may not match the order of
+ * the data on disk.
+ */
+ switch (plane) {
+ case 1:
+ ptr = img->planes[img->fmt == VPX_IMG_FMT_YV12 ? VPX_PLANE_V : VPX_PLANE_U];
+ break;
+ case 2:
+ ptr = img->planes[img->fmt == VPX_IMG_FMT_YV12 ? VPX_PLANE_U : VPX_PLANE_V];
+ break;
+ default:
+ ptr = img->planes[plane];
+ }
+
+ for (r = 0; r < h; r++) {
+ size_t needed = w;
+ size_t buf_position = 0;
+ const size_t left = detect->buf_read - detect->position;
+ if (left > 0) {
+ const size_t more = (left < needed) ? left : needed;
+ memcpy(ptr, detect->buf + detect->position, more);
+ buf_position = more;
+ needed -= more;
+ detect->position += more;
+ }
+ if (needed > 0) {
+ shortread |= (fread(ptr + buf_position, 1, needed, f) < needed);
+ }
+
+ ptr += img->stride[plane];
+ }
+ }
+ }
+
+ return !shortread;
}
unsigned int file_is_y4m(FILE *infile,
y4m_input *y4m,
- char detect[4])
-{
- if(memcmp(detect, "YUV4", 4) == 0)
- {
- return 1;
- }
- return 0;
+ char detect[4]) {
+ if (memcmp(detect, "YUV4", 4) == 0) {
+ return 1;
+ }
+ return 0;
}
#define IVF_FILE_HDR_SZ (32)
unsigned int file_is_ivf(struct input_state *input,
- unsigned int *fourcc)
-{
- char raw_hdr[IVF_FILE_HDR_SZ];
- int is_ivf = 0;
- FILE *infile = input->file;
- unsigned int *width = &input->w;
- unsigned int *height = &input->h;
- struct detect_buffer *detect = &input->detect;
+ unsigned int *fourcc) {
+ char raw_hdr[IVF_FILE_HDR_SZ];
+ int is_ivf = 0;
+ FILE *infile = input->file;
+ unsigned int *width = &input->w;
+ unsigned int *height = &input->h;
+ struct detect_buffer *detect = &input->detect;
- if(memcmp(detect->buf, "DKIF", 4) != 0)
- return 0;
+ if (memcmp(detect->buf, "DKIF", 4) != 0)
+ return 0;
- /* See write_ivf_file_header() for more documentation on the file header
- * layout.
- */
- if (fread(raw_hdr + 4, 1, IVF_FILE_HDR_SZ - 4, infile)
- == IVF_FILE_HDR_SZ - 4)
+ /* See write_ivf_file_header() for more documentation on the file header
+ * layout.
+ */
+ if (fread(raw_hdr + 4, 1, IVF_FILE_HDR_SZ - 4, infile)
+ == IVF_FILE_HDR_SZ - 4) {
{
- {
- is_ivf = 1;
+ is_ivf = 1;
- if (mem_get_le16(raw_hdr + 4) != 0)
- warn("Unrecognized IVF version! This file may not decode "
- "properly.");
+ if (mem_get_le16(raw_hdr + 4) != 0)
+ warn("Unrecognized IVF version! This file may not decode "
+ "properly.");
- *fourcc = mem_get_le32(raw_hdr + 8);
- }
+ *fourcc = mem_get_le32(raw_hdr + 8);
}
+ }
- if (is_ivf)
- {
- *width = mem_get_le16(raw_hdr + 12);
- *height = mem_get_le16(raw_hdr + 14);
- detect->position = 4;
- }
+ if (is_ivf) {
+ *width = mem_get_le16(raw_hdr + 12);
+ *height = mem_get_le16(raw_hdr + 14);
+ detect->position = 4;
+ }
- return is_ivf;
+ return is_ivf;
}
static void write_ivf_file_header(FILE *outfile,
const vpx_codec_enc_cfg_t *cfg,
unsigned int fourcc,
- int frame_cnt)
-{
- char header[32];
+ int frame_cnt) {
+ char header[32];
- if (cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS)
- return;
+ if (cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS)
+ return;
- header[0] = 'D';
- header[1] = 'K';
- header[2] = 'I';
- header[3] = 'F';
- mem_put_le16(header + 4, 0); /* version */
- mem_put_le16(header + 6, 32); /* headersize */
- mem_put_le32(header + 8, fourcc); /* headersize */
- mem_put_le16(header + 12, cfg->g_w); /* width */
- mem_put_le16(header + 14, cfg->g_h); /* height */
- mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */
- mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */
- mem_put_le32(header + 24, frame_cnt); /* length */
- mem_put_le32(header + 28, 0); /* unused */
+ header[0] = 'D';
+ header[1] = 'K';
+ header[2] = 'I';
+ header[3] = 'F';
+ mem_put_le16(header + 4, 0); /* version */
+ mem_put_le16(header + 6, 32); /* headersize */
+ mem_put_le32(header + 8, fourcc); /* headersize */
+ mem_put_le16(header + 12, cfg->g_w); /* width */
+ mem_put_le16(header + 14, cfg->g_h); /* height */
+ mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */
+ mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */
+ mem_put_le32(header + 24, frame_cnt); /* length */
+ mem_put_le32(header + 28, 0); /* unused */
- (void) fwrite(header, 1, 32, outfile);
+ (void) fwrite(header, 1, 32, outfile);
}
static void write_ivf_frame_header(FILE *outfile,
- const vpx_codec_cx_pkt_t *pkt)
-{
- char header[12];
- vpx_codec_pts_t pts;
+ const vpx_codec_cx_pkt_t *pkt) {
+ char header[12];
+ vpx_codec_pts_t pts;
- if (pkt->kind != VPX_CODEC_CX_FRAME_PKT)
- return;
+ if (pkt->kind != VPX_CODEC_CX_FRAME_PKT)
+ return;
- pts = pkt->data.frame.pts;
- mem_put_le32(header, (int)pkt->data.frame.sz);
- mem_put_le32(header + 4, pts & 0xFFFFFFFF);
- mem_put_le32(header + 8, pts >> 32);
+ pts = pkt->data.frame.pts;
+ mem_put_le32(header, (int)pkt->data.frame.sz);
+ mem_put_le32(header + 4, pts & 0xFFFFFFFF);
+ mem_put_le32(header + 8, pts >> 32);
- (void) fwrite(header, 1, 12, outfile);
+ (void) fwrite(header, 1, 12, outfile);
}
-static void write_ivf_frame_size(FILE *outfile, size_t size)
-{
- char header[4];
- mem_put_le32(header, (int)size);
- (void) fwrite(header, 1, 4, outfile);
+static void write_ivf_frame_size(FILE *outfile, size_t size) {
+ char header[4];
+ mem_put_le32(header, (int)size);
+ (void) fwrite(header, 1, 4, outfile);
}
typedef off_t EbmlLoc;
-struct cue_entry
-{
- unsigned int time;
- uint64_t loc;
+struct cue_entry {
+ unsigned int time;
+ uint64_t loc;
};
-struct EbmlGlobal
-{
- int debug;
+struct EbmlGlobal {
+ int debug;
- FILE *stream;
- int64_t last_pts_ms;
- vpx_rational_t framerate;
+ FILE *stream;
+ int64_t last_pts_ms;
+ vpx_rational_t framerate;
- /* These pointers are to the start of an element */
- off_t position_reference;
- off_t seek_info_pos;
- off_t segment_info_pos;
- off_t track_pos;
- off_t cue_pos;
- off_t cluster_pos;
+ /* These pointers are to the start of an element */
+ off_t position_reference;
+ off_t seek_info_pos;
+ off_t segment_info_pos;
+ off_t track_pos;
+ off_t cue_pos;
+ off_t cluster_pos;
- /* This pointer is to a specific element to be serialized */
- off_t track_id_pos;
+ /* This pointer is to a specific element to be serialized */
+ off_t track_id_pos;
- /* These pointers are to the size field of the element */
- EbmlLoc startSegment;
- EbmlLoc startCluster;
+ /* These pointers are to the size field of the element */
+ EbmlLoc startSegment;
+ EbmlLoc startCluster;
- uint32_t cluster_timecode;
- int cluster_open;
+ uint32_t cluster_timecode;
+ int cluster_open;
- struct cue_entry *cue_list;
- unsigned int cues;
+ struct cue_entry *cue_list;
+ unsigned int cues;
};
-void Ebml_Write(EbmlGlobal *glob, const void *buffer_in, unsigned long len)
-{
- (void) fwrite(buffer_in, 1, len, glob->stream);
+void Ebml_Write(EbmlGlobal *glob, const void *buffer_in, unsigned long len) {
+ (void) fwrite(buffer_in, 1, len, glob->stream);
}
#define WRITE_BUFFER(s) \
-for(i = len-1; i>=0; i--)\
-{ \
+ for(i = len-1; i>=0; i--)\
+ { \
x = (char)(*(const s *)buffer_in >> (i * CHAR_BIT)); \
Ebml_Write(glob, &x, 1); \
-}
-void Ebml_Serialize(EbmlGlobal *glob, const void *buffer_in, int buffer_size, unsigned long len)
-{
- char x;
- int i;
+ }
+void Ebml_Serialize(EbmlGlobal *glob, const void *buffer_in, int buffer_size, unsigned long len) {
+ char x;
+ int i;
- /* buffer_size:
- * 1 - int8_t;
- * 2 - int16_t;
- * 3 - int32_t;
- * 4 - int64_t;
- */
- switch (buffer_size)
- {
- case 1:
- WRITE_BUFFER(int8_t)
- break;
- case 2:
- WRITE_BUFFER(int16_t)
- break;
- case 4:
- WRITE_BUFFER(int32_t)
- break;
- case 8:
- WRITE_BUFFER(int64_t)
- break;
- default:
- break;
- }
+ /* buffer_size:
+ * 1 - int8_t;
+ * 2 - int16_t;
+ * 3 - int32_t;
+ * 4 - int64_t;
+ */
+ switch (buffer_size) {
+ case 1:
+ WRITE_BUFFER(int8_t)
+ break;
+ case 2:
+ WRITE_BUFFER(int16_t)
+ break;
+ case 4:
+ WRITE_BUFFER(int32_t)
+ break;
+ case 8:
+ WRITE_BUFFER(int64_t)
+ break;
+ default:
+ break;
+ }
}
#undef WRITE_BUFFER
/* Need a fixed size serializer for the track ID. libmkv provides a 64 bit
* one, but not a 32 bit one.
*/
-static void Ebml_SerializeUnsigned32(EbmlGlobal *glob, unsigned long class_id, uint64_t ui)
-{
- unsigned char sizeSerialized = 4 | 0x80;
- Ebml_WriteID(glob, class_id);
- Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1);
- Ebml_Serialize(glob, &ui, sizeof(ui), 4);
+static void Ebml_SerializeUnsigned32(EbmlGlobal *glob, unsigned long class_id, uint64_t ui) {
+ unsigned char sizeSerialized = 4 | 0x80;
+ Ebml_WriteID(glob, class_id);
+ Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1);
+ Ebml_Serialize(glob, &ui, sizeof(ui), 4);
}
static void
Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc,
- unsigned long class_id)
-{
- /* todo this is always taking 8 bytes, this may need later optimization */
- /* this is a key that says length unknown */
- uint64_t unknownLen = LITERALU64(0x01FFFFFF, 0xFFFFFFFF);
+ unsigned long class_id) {
+ /* todo this is always taking 8 bytes, this may need later optimization */
+ /* this is a key that says length unknown */
+ uint64_t unknownLen = LITERALU64(0x01FFFFFF, 0xFFFFFFFF);
- Ebml_WriteID(glob, class_id);
- *ebmlLoc = ftello(glob->stream);
- Ebml_Serialize(glob, &unknownLen, sizeof(unknownLen), 8);
+ Ebml_WriteID(glob, class_id);
+ *ebmlLoc = ftello(glob->stream);
+ Ebml_Serialize(glob, &unknownLen, sizeof(unknownLen), 8);
}
static void
-Ebml_EndSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc)
-{
- off_t pos;
- uint64_t size;
+Ebml_EndSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc) {
+ off_t pos;
+ uint64_t size;
- /* Save the current stream pointer */
- pos = ftello(glob->stream);
+ /* Save the current stream pointer */
+ pos = ftello(glob->stream);
- /* Calculate the size of this element */
- size = pos - *ebmlLoc - 8;
- size |= LITERALU64(0x01000000,0x00000000);
+ /* Calculate the size of this element */
+ size = pos - *ebmlLoc - 8;
+ size |= LITERALU64(0x01000000, 0x00000000);
- /* Seek back to the beginning of the element and write the new size */
- fseeko(glob->stream, *ebmlLoc, SEEK_SET);
- Ebml_Serialize(glob, &size, sizeof(size), 8);
+ /* Seek back to the beginning of the element and write the new size */
+ fseeko(glob->stream, *ebmlLoc, SEEK_SET);
+ Ebml_Serialize(glob, &size, sizeof(size), 8);
- /* Reset the stream pointer */
- fseeko(glob->stream, pos, SEEK_SET);
+ /* Reset the stream pointer */
+ fseeko(glob->stream, pos, SEEK_SET);
}
static void
-write_webm_seek_element(EbmlGlobal *ebml, unsigned long id, off_t pos)
-{
- uint64_t offset = pos - ebml->position_reference;
+write_webm_seek_element(EbmlGlobal *ebml, unsigned long id, off_t pos) {
+ uint64_t offset = pos - ebml->position_reference;
+ EbmlLoc start;
+ Ebml_StartSubElement(ebml, &start, Seek);
+ Ebml_SerializeBinary(ebml, SeekID, id);
+ Ebml_SerializeUnsigned64(ebml, SeekPosition, offset);
+ Ebml_EndSubElement(ebml, &start);
+}
+
+
+static void
+write_webm_seek_info(EbmlGlobal *ebml) {
+
+ off_t pos;
+
+ /* Save the current stream pointer */
+ pos = ftello(ebml->stream);
+
+ if (ebml->seek_info_pos)
+ fseeko(ebml->stream, ebml->seek_info_pos, SEEK_SET);
+ else
+ ebml->seek_info_pos = pos;
+
+ {
EbmlLoc start;
- Ebml_StartSubElement(ebml, &start, Seek);
- Ebml_SerializeBinary(ebml, SeekID, id);
- Ebml_SerializeUnsigned64(ebml, SeekPosition, offset);
+
+ Ebml_StartSubElement(ebml, &start, SeekHead);
+ write_webm_seek_element(ebml, Tracks, ebml->track_pos);
+ write_webm_seek_element(ebml, Cues, ebml->cue_pos);
+ write_webm_seek_element(ebml, Info, ebml->segment_info_pos);
Ebml_EndSubElement(ebml, &start);
-}
+ }
+ {
+ /* segment info */
+ EbmlLoc startInfo;
+ uint64_t frame_time;
+ char version_string[64];
-
-static void
-write_webm_seek_info(EbmlGlobal *ebml)
-{
-
- off_t pos;
-
- /* Save the current stream pointer */
- pos = ftello(ebml->stream);
-
- if(ebml->seek_info_pos)
- fseeko(ebml->stream, ebml->seek_info_pos, SEEK_SET);
- else
- ebml->seek_info_pos = pos;
-
- {
- EbmlLoc start;
-
- Ebml_StartSubElement(ebml, &start, SeekHead);
- write_webm_seek_element(ebml, Tracks, ebml->track_pos);
- write_webm_seek_element(ebml, Cues, ebml->cue_pos);
- write_webm_seek_element(ebml, Info, ebml->segment_info_pos);
- Ebml_EndSubElement(ebml, &start);
+ /* Assemble version string */
+ if (ebml->debug)
+ strcpy(version_string, "vpxenc");
+ else {
+ strcpy(version_string, "vpxenc ");
+ strncat(version_string,
+ vpx_codec_version_str(),
+ sizeof(version_string) - 1 - strlen(version_string));
}
- {
- /* segment info */
- EbmlLoc startInfo;
- uint64_t frame_time;
- char version_string[64];
- /* Assemble version string */
- if(ebml->debug)
- strcpy(version_string, "vpxenc");
- else
- {
- strcpy(version_string, "vpxenc ");
- strncat(version_string,
- vpx_codec_version_str(),
- sizeof(version_string) - 1 - strlen(version_string));
- }
-
- frame_time = (uint64_t)1000 * ebml->framerate.den
- / ebml->framerate.num;
- ebml->segment_info_pos = ftello(ebml->stream);
- Ebml_StartSubElement(ebml, &startInfo, Info);
- Ebml_SerializeUnsigned(ebml, TimecodeScale, 1000000);
- Ebml_SerializeFloat(ebml, Segment_Duration,
- (double)(ebml->last_pts_ms + frame_time));
- Ebml_SerializeString(ebml, 0x4D80, version_string);
- Ebml_SerializeString(ebml, 0x5741, version_string);
- Ebml_EndSubElement(ebml, &startInfo);
- }
+ frame_time = (uint64_t)1000 * ebml->framerate.den
+ / ebml->framerate.num;
+ ebml->segment_info_pos = ftello(ebml->stream);
+ Ebml_StartSubElement(ebml, &startInfo, Info);
+ Ebml_SerializeUnsigned(ebml, TimecodeScale, 1000000);
+ Ebml_SerializeFloat(ebml, Segment_Duration,
+ (double)(ebml->last_pts_ms + frame_time));
+ Ebml_SerializeString(ebml, 0x4D80, version_string);
+ Ebml_SerializeString(ebml, 0x5741, version_string);
+ Ebml_EndSubElement(ebml, &startInfo);
+ }
}
@@ -710,1859 +667,1944 @@
write_webm_file_header(EbmlGlobal *glob,
const vpx_codec_enc_cfg_t *cfg,
const struct vpx_rational *fps,
- stereo_format_t stereo_fmt)
-{
+ stereo_format_t stereo_fmt,
+ unsigned int fourcc) {
+ {
+ EbmlLoc start;
+ Ebml_StartSubElement(glob, &start, EBML);
+ Ebml_SerializeUnsigned(glob, EBMLVersion, 1);
+ Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1);
+ Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4);
+ Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8);
+ Ebml_SerializeString(glob, DocType, "webm");
+ Ebml_SerializeUnsigned(glob, DocTypeVersion, 2);
+ Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2);
+ Ebml_EndSubElement(glob, &start);
+ }
+ {
+ Ebml_StartSubElement(glob, &glob->startSegment, Segment);
+ glob->position_reference = ftello(glob->stream);
+ glob->framerate = *fps;
+ write_webm_seek_info(glob);
+
{
+ EbmlLoc trackStart;
+ glob->track_pos = ftello(glob->stream);
+ Ebml_StartSubElement(glob, &trackStart, Tracks);
+ {
+ unsigned int trackNumber = 1;
+ uint64_t trackID = 0;
+
EbmlLoc start;
- Ebml_StartSubElement(glob, &start, EBML);
- Ebml_SerializeUnsigned(glob, EBMLVersion, 1);
- Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1);
- Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4);
- Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8);
- Ebml_SerializeString(glob, DocType, "webm");
- Ebml_SerializeUnsigned(glob, DocTypeVersion, 2);
- Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2);
- Ebml_EndSubElement(glob, &start);
- }
- {
- Ebml_StartSubElement(glob, &glob->startSegment, Segment);
- glob->position_reference = ftello(glob->stream);
- glob->framerate = *fps;
- write_webm_seek_info(glob);
-
+ Ebml_StartSubElement(glob, &start, TrackEntry);
+ Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber);
+ glob->track_id_pos = ftello(glob->stream);
+ Ebml_SerializeUnsigned32(glob, TrackUID, trackID);
+ Ebml_SerializeUnsigned(glob, TrackType, 1);
+ Ebml_SerializeString(glob, CodecID,
+ fourcc == VP8_FOURCC ? "V_VP8" : "V_VP9");
{
- EbmlLoc trackStart;
- glob->track_pos = ftello(glob->stream);
- Ebml_StartSubElement(glob, &trackStart, Tracks);
- {
- unsigned int trackNumber = 1;
- uint64_t trackID = 0;
+ unsigned int pixelWidth = cfg->g_w;
+ unsigned int pixelHeight = cfg->g_h;
+ float frameRate = (float)fps->num / (float)fps->den;
- EbmlLoc start;
- Ebml_StartSubElement(glob, &start, TrackEntry);
- Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber);
- glob->track_id_pos = ftello(glob->stream);
- Ebml_SerializeUnsigned32(glob, TrackUID, trackID);
- Ebml_SerializeUnsigned(glob, TrackType, 1);
- Ebml_SerializeString(glob, CodecID, "V_VP8");
- {
- unsigned int pixelWidth = cfg->g_w;
- unsigned int pixelHeight = cfg->g_h;
- float frameRate = (float)fps->num/(float)fps->den;
-
- EbmlLoc videoStart;
- Ebml_StartSubElement(glob, &videoStart, Video);
- Ebml_SerializeUnsigned(glob, PixelWidth, pixelWidth);
- Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight);
- Ebml_SerializeUnsigned(glob, StereoMode, stereo_fmt);
- Ebml_SerializeFloat(glob, FrameRate, frameRate);
- Ebml_EndSubElement(glob, &videoStart);
- }
- Ebml_EndSubElement(glob, &start); /* Track Entry */
- }
- Ebml_EndSubElement(glob, &trackStart);
+ EbmlLoc videoStart;
+ Ebml_StartSubElement(glob, &videoStart, Video);
+ Ebml_SerializeUnsigned(glob, PixelWidth, pixelWidth);
+ Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight);
+ Ebml_SerializeUnsigned(glob, StereoMode, stereo_fmt);
+ Ebml_SerializeFloat(glob, FrameRate, frameRate);
+ Ebml_EndSubElement(glob, &videoStart);
}
- /* segment element is open */
+ Ebml_EndSubElement(glob, &start); /* Track Entry */
+ }
+ Ebml_EndSubElement(glob, &trackStart);
}
+ /* segment element is open */
+ }
}
static void
write_webm_block(EbmlGlobal *glob,
const vpx_codec_enc_cfg_t *cfg,
- const vpx_codec_cx_pkt_t *pkt)
-{
- unsigned long block_length;
- unsigned char track_number;
- unsigned short block_timecode = 0;
- unsigned char flags;
- int64_t pts_ms;
- int start_cluster = 0, is_keyframe;
+ const vpx_codec_cx_pkt_t *pkt) {
+ unsigned long block_length;
+ unsigned char track_number;
+ unsigned short block_timecode = 0;
+ unsigned char flags;
+ int64_t pts_ms;
+ int start_cluster = 0, is_keyframe;
- /* Calculate the PTS of this frame in milliseconds */
- pts_ms = pkt->data.frame.pts * 1000
- * (uint64_t)cfg->g_timebase.num / (uint64_t)cfg->g_timebase.den;
- if(pts_ms <= glob->last_pts_ms)
- pts_ms = glob->last_pts_ms + 1;
- glob->last_pts_ms = pts_ms;
+ /* Calculate the PTS of this frame in milliseconds */
+ pts_ms = pkt->data.frame.pts * 1000
+ * (uint64_t)cfg->g_timebase.num / (uint64_t)cfg->g_timebase.den;
+ if (pts_ms <= glob->last_pts_ms)
+ pts_ms = glob->last_pts_ms + 1;
+ glob->last_pts_ms = pts_ms;
- /* Calculate the relative time of this block */
- if(pts_ms - glob->cluster_timecode > SHRT_MAX)
- start_cluster = 1;
- else
- block_timecode = (unsigned short)pts_ms - glob->cluster_timecode;
+ /* Calculate the relative time of this block */
+ if (pts_ms - glob->cluster_timecode > SHRT_MAX)
+ start_cluster = 1;
+ else
+ block_timecode = (unsigned short)pts_ms - glob->cluster_timecode;
- is_keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY);
- if(start_cluster || is_keyframe)
- {
- if(glob->cluster_open)
- Ebml_EndSubElement(glob, &glob->startCluster);
+ is_keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY);
+ if (start_cluster || is_keyframe) {
+ if (glob->cluster_open)
+ Ebml_EndSubElement(glob, &glob->startCluster);
- /* Open the new cluster */
- block_timecode = 0;
- glob->cluster_open = 1;
- glob->cluster_timecode = (uint32_t)pts_ms;
- glob->cluster_pos = ftello(glob->stream);
- Ebml_StartSubElement(glob, &glob->startCluster, Cluster); /* cluster */
- Ebml_SerializeUnsigned(glob, Timecode, glob->cluster_timecode);
+ /* Open the new cluster */
+ block_timecode = 0;
+ glob->cluster_open = 1;
+ glob->cluster_timecode = (uint32_t)pts_ms;
+ glob->cluster_pos = ftello(glob->stream);
+ Ebml_StartSubElement(glob, &glob->startCluster, Cluster); /* cluster */
+ Ebml_SerializeUnsigned(glob, Timecode, glob->cluster_timecode);
- /* Save a cue point if this is a keyframe. */
- if(is_keyframe)
- {
- struct cue_entry *cue, *new_cue_list;
+ /* Save a cue point if this is a keyframe. */
+ if (is_keyframe) {
+ struct cue_entry *cue, *new_cue_list;
- new_cue_list = realloc(glob->cue_list,
- (glob->cues+1) * sizeof(struct cue_entry));
- if(new_cue_list)
- glob->cue_list = new_cue_list;
- else
- fatal("Failed to realloc cue list.");
+ new_cue_list = realloc(glob->cue_list,
+ (glob->cues + 1) * sizeof(struct cue_entry));
+ if (new_cue_list)
+ glob->cue_list = new_cue_list;
+ else
+ fatal("Failed to realloc cue list.");
- cue = &glob->cue_list[glob->cues];
- cue->time = glob->cluster_timecode;
- cue->loc = glob->cluster_pos;
- glob->cues++;
- }
+ cue = &glob->cue_list[glob->cues];
+ cue->time = glob->cluster_timecode;
+ cue->loc = glob->cluster_pos;
+ glob->cues++;
}
+ }
- /* Write the Simple Block */
- Ebml_WriteID(glob, SimpleBlock);
+ /* Write the Simple Block */
+ Ebml_WriteID(glob, SimpleBlock);
- block_length = (unsigned long)pkt->data.frame.sz + 4;
- block_length |= 0x10000000;
- Ebml_Serialize(glob, &block_length, sizeof(block_length), 4);
+ block_length = (unsigned long)pkt->data.frame.sz + 4;
+ block_length |= 0x10000000;
+ Ebml_Serialize(glob, &block_length, sizeof(block_length), 4);
- track_number = 1;
- track_number |= 0x80;
- Ebml_Write(glob, &track_number, 1);
+ track_number = 1;
+ track_number |= 0x80;
+ Ebml_Write(glob, &track_number, 1);
- Ebml_Serialize(glob, &block_timecode, sizeof(block_timecode), 2);
+ Ebml_Serialize(glob, &block_timecode, sizeof(block_timecode), 2);
- flags = 0;
- if(is_keyframe)
- flags |= 0x80;
- if(pkt->data.frame.flags & VPX_FRAME_IS_INVISIBLE)
- flags |= 0x08;
- Ebml_Write(glob, &flags, 1);
+ flags = 0;
+ if (is_keyframe)
+ flags |= 0x80;
+ if (pkt->data.frame.flags & VPX_FRAME_IS_INVISIBLE)
+ flags |= 0x08;
+ Ebml_Write(glob, &flags, 1);
- Ebml_Write(glob, pkt->data.frame.buf, (unsigned long)pkt->data.frame.sz);
+ Ebml_Write(glob, pkt->data.frame.buf, (unsigned long)pkt->data.frame.sz);
}
static void
-write_webm_file_footer(EbmlGlobal *glob, long hash)
-{
+write_webm_file_footer(EbmlGlobal *glob, long hash) {
- if(glob->cluster_open)
- Ebml_EndSubElement(glob, &glob->startCluster);
+ if (glob->cluster_open)
+ Ebml_EndSubElement(glob, &glob->startCluster);
- {
+ {
+ EbmlLoc start;
+ unsigned int i;
+
+ glob->cue_pos = ftello(glob->stream);
+ Ebml_StartSubElement(glob, &start, Cues);
+ for (i = 0; i < glob->cues; i++) {
+ struct cue_entry *cue = &glob->cue_list[i];
+ EbmlLoc start;
+
+ Ebml_StartSubElement(glob, &start, CuePoint);
+ {
EbmlLoc start;
- unsigned int i;
- glob->cue_pos = ftello(glob->stream);
- Ebml_StartSubElement(glob, &start, Cues);
- for(i=0; i<glob->cues; i++)
- {
- struct cue_entry *cue = &glob->cue_list[i];
- EbmlLoc start;
+ Ebml_SerializeUnsigned(glob, CueTime, cue->time);
- Ebml_StartSubElement(glob, &start, CuePoint);
- {
- EbmlLoc start;
-
- Ebml_SerializeUnsigned(glob, CueTime, cue->time);
-
- Ebml_StartSubElement(glob, &start, CueTrackPositions);
- Ebml_SerializeUnsigned(glob, CueTrack, 1);
- Ebml_SerializeUnsigned64(glob, CueClusterPosition,
- cue->loc - glob->position_reference);
- Ebml_EndSubElement(glob, &start);
- }
- Ebml_EndSubElement(glob, &start);
- }
+ Ebml_StartSubElement(glob, &start, CueTrackPositions);
+ Ebml_SerializeUnsigned(glob, CueTrack, 1);
+ Ebml_SerializeUnsigned64(glob, CueClusterPosition,
+ cue->loc - glob->position_reference);
Ebml_EndSubElement(glob, &start);
+ }
+ Ebml_EndSubElement(glob, &start);
}
+ Ebml_EndSubElement(glob, &start);
+ }
- Ebml_EndSubElement(glob, &glob->startSegment);
+ Ebml_EndSubElement(glob, &glob->startSegment);
- /* Patch up the seek info block */
- write_webm_seek_info(glob);
+ /* Patch up the seek info block */
+ write_webm_seek_info(glob);
- /* Patch up the track id */
- fseeko(glob->stream, glob->track_id_pos, SEEK_SET);
- Ebml_SerializeUnsigned32(glob, TrackUID, glob->debug ? 0xDEADBEEF : hash);
+ /* Patch up the track id */
+ fseeko(glob->stream, glob->track_id_pos, SEEK_SET);
+ Ebml_SerializeUnsigned32(glob, TrackUID, glob->debug ? 0xDEADBEEF : hash);
- fseeko(glob->stream, 0, SEEK_END);
+ fseeko(glob->stream, 0, SEEK_END);
}
/* Murmur hash derived from public domain reference implementation at
- * http://sites.google.com/site/murmurhash/
+ * http:// sites.google.com/site/murmurhash/
*/
-static unsigned int murmur ( const void * key, int len, unsigned int seed )
-{
- const unsigned int m = 0x5bd1e995;
- const int r = 24;
+static unsigned int murmur(const void *key, int len, unsigned int seed) {
+ const unsigned int m = 0x5bd1e995;
+ const int r = 24;
- unsigned int h = seed ^ len;
+ unsigned int h = seed ^ len;
- const unsigned char * data = (const unsigned char *)key;
+ const unsigned char *data = (const unsigned char *)key;
- while(len >= 4)
- {
- unsigned int k;
+ while (len >= 4) {
+ unsigned int k;
- k = data[0];
- k |= data[1] << 8;
- k |= data[2] << 16;
- k |= data[3] << 24;
+ k = data[0];
+ k |= data[1] << 8;
+ k |= data[2] << 16;
+ k |= data[3] << 24;
- k *= m;
- k ^= k >> r;
- k *= m;
+ k *= m;
+ k ^= k >> r;
+ k *= m;
- h *= m;
- h ^= k;
-
- data += 4;
- len -= 4;
- }
-
- switch(len)
- {
- case 3: h ^= data[2] << 16;
- case 2: h ^= data[1] << 8;
- case 1: h ^= data[0];
- h *= m;
- };
-
- h ^= h >> 13;
h *= m;
- h ^= h >> 15;
+ h ^= k;
- return h;
+ data += 4;
+ len -= 4;
+ }
+
+ switch (len) {
+ case 3:
+ h ^= data[2] << 16;
+ case 2:
+ h ^= data[1] << 8;
+ case 1:
+ h ^= data[0];
+ h *= m;
+ };
+
+ h ^= h >> 13;
+ h *= m;
+ h ^= h >> 15;
+
+ return h;
}
#include "math.h"
+#define MAX_PSNR 100
+static double vp8_mse2psnr(double Samples, double Peak, double Mse) {
+ double psnr;
-static double vp8_mse2psnr(double Samples, double Peak, double Mse)
-{
- double psnr;
+ if ((double)Mse > 0.0)
+ psnr = 10.0 * log10(Peak * Peak * Samples / Mse);
+ else
+ psnr = MAX_PSNR; /* Limit to prevent / 0 */
- if ((double)Mse > 0.0)
- psnr = 10.0 * log10(Peak * Peak * Samples / Mse);
- else
- psnr = 60; /* Limit to prevent / 0 */
+ if (psnr > MAX_PSNR)
+ psnr = MAX_PSNR;
- if (psnr > 60)
- psnr = 60;
-
- return psnr;
+ return psnr;
}
#include "args.h"
static const arg_def_t debugmode = ARG_DEF("D", "debug", 0,
- "Debug mode (makes output deterministic)");
+ "Debug mode (makes output deterministic)");
static const arg_def_t outputfile = ARG_DEF("o", "output", 1,
- "Output filename");
+ "Output filename");
static const arg_def_t use_yv12 = ARG_DEF(NULL, "yv12", 0,
- "Input file is YV12 ");
+ "Input file is YV12 ");
static const arg_def_t use_i420 = ARG_DEF(NULL, "i420", 0,
- "Input file is I420 (default)");
+ "Input file is I420 (default)");
static const arg_def_t codecarg = ARG_DEF(NULL, "codec", 1,
- "Codec to use");
+ "Codec to use");
static const arg_def_t passes = ARG_DEF("p", "passes", 1,
- "Number of passes (1/2)");
+ "Number of passes (1/2)");
static const arg_def_t pass_arg = ARG_DEF(NULL, "pass", 1,
- "Pass to execute (1/2)");
+ "Pass to execute (1/2)");
static const arg_def_t fpf_name = ARG_DEF(NULL, "fpf", 1,
- "First pass statistics file name");
+ "First pass statistics file name");
static const arg_def_t limit = ARG_DEF(NULL, "limit", 1,
"Stop encoding after n input frames");
+static const arg_def_t skip = ARG_DEF(NULL, "skip", 1,
+ "Skip the first n input frames");
static const arg_def_t deadline = ARG_DEF("d", "deadline", 1,
- "Deadline per frame (usec)");
+ "Deadline per frame (usec)");
static const arg_def_t best_dl = ARG_DEF(NULL, "best", 0,
- "Use Best Quality Deadline");
+ "Use Best Quality Deadline");
static const arg_def_t good_dl = ARG_DEF(NULL, "good", 0,
- "Use Good Quality Deadline");
+ "Use Good Quality Deadline");
static const arg_def_t rt_dl = ARG_DEF(NULL, "rt", 0,
- "Use Realtime Quality Deadline");
+ "Use Realtime Quality Deadline");
static const arg_def_t quietarg = ARG_DEF("q", "quiet", 0,
- "Do not print encode progress");
+ "Do not print encode progress");
static const arg_def_t verbosearg = ARG_DEF("v", "verbose", 0,
- "Show encoder parameters");
+ "Show encoder parameters");
static const arg_def_t psnrarg = ARG_DEF(NULL, "psnr", 0,
- "Show PSNR in status line");
+ "Show PSNR in status line");
+static const arg_def_t recontest = ARG_DEF(NULL, "test-decode", 0,
+ "Test encode/decode mismatch");
static const arg_def_t framerate = ARG_DEF(NULL, "fps", 1,
- "Stream frame rate (rate/scale)");
+ "Stream frame rate (rate/scale)");
static const arg_def_t use_ivf = ARG_DEF(NULL, "ivf", 0,
- "Output IVF (default is WebM)");
+ "Output IVF (default is WebM)");
static const arg_def_t out_part = ARG_DEF("P", "output-partitions", 0,
- "Makes encoder output partitions. Requires IVF output!");
+ "Makes encoder output partitions. Requires IVF output!");
static const arg_def_t q_hist_n = ARG_DEF(NULL, "q-hist", 1,
- "Show quantizer histogram (n-buckets)");
+ "Show quantizer histogram (n-buckets)");
static const arg_def_t rate_hist_n = ARG_DEF(NULL, "rate-hist", 1,
- "Show rate histogram (n-buckets)");
-static const arg_def_t *main_args[] =
-{
- &debugmode,
- &outputfile, &codecarg, &passes, &pass_arg, &fpf_name, &limit, &deadline,
- &best_dl, &good_dl, &rt_dl,
- &quietarg, &verbosearg, &psnrarg, &use_ivf, &out_part, &q_hist_n, &rate_hist_n,
- NULL
+ "Show rate histogram (n-buckets)");
+static const arg_def_t *main_args[] = {
+ &debugmode,
+ &outputfile, &codecarg, &passes, &pass_arg, &fpf_name, &limit, &skip,
+ &deadline, &best_dl, &good_dl, &rt_dl,
+ &quietarg, &verbosearg, &psnrarg, &use_ivf, &out_part, &q_hist_n, &rate_hist_n,
+ NULL
};
static const arg_def_t usage = ARG_DEF("u", "usage", 1,
- "Usage profile number to use");
+ "Usage profile number to use");
static const arg_def_t threads = ARG_DEF("t", "threads", 1,
- "Max number of threads to use");
+ "Max number of threads to use");
static const arg_def_t profile = ARG_DEF(NULL, "profile", 1,
- "Bitstream profile number to use");
+ "Bitstream profile number to use");
static const arg_def_t width = ARG_DEF("w", "width", 1,
- "Frame width");
+ "Frame width");
static const arg_def_t height = ARG_DEF("h", "height", 1,
- "Frame height");
+ "Frame height");
static const struct arg_enum_list stereo_mode_enum[] = {
- {"mono" , STEREO_FORMAT_MONO},
- {"left-right", STEREO_FORMAT_LEFT_RIGHT},
- {"bottom-top", STEREO_FORMAT_BOTTOM_TOP},
- {"top-bottom", STEREO_FORMAT_TOP_BOTTOM},
- {"right-left", STEREO_FORMAT_RIGHT_LEFT},
- {NULL, 0}
+ {"mono", STEREO_FORMAT_MONO},
+ {"left-right", STEREO_FORMAT_LEFT_RIGHT},
+ {"bottom-top", STEREO_FORMAT_BOTTOM_TOP},
+ {"top-bottom", STEREO_FORMAT_TOP_BOTTOM},
+ {"right-left", STEREO_FORMAT_RIGHT_LEFT},
+ {NULL, 0}
};
static const arg_def_t stereo_mode = ARG_DEF_ENUM(NULL, "stereo-mode", 1,
- "Stereo 3D video format", stereo_mode_enum);
+ "Stereo 3D video format", stereo_mode_enum);
static const arg_def_t timebase = ARG_DEF(NULL, "timebase", 1,
- "Output timestamp precision (fractional seconds)");
+ "Output timestamp precision (fractional seconds)");
static const arg_def_t error_resilient = ARG_DEF(NULL, "error-resilient", 1,
- "Enable error resiliency features");
+ "Enable error resiliency features");
static const arg_def_t lag_in_frames = ARG_DEF(NULL, "lag-in-frames", 1,
- "Max number of frames to lag");
+ "Max number of frames to lag");
-static const arg_def_t *global_args[] =
-{
- &use_yv12, &use_i420, &usage, &threads, &profile,
- &width, &height, &stereo_mode, &timebase, &framerate, &error_resilient,
- &lag_in_frames, NULL
+static const arg_def_t *global_args[] = {
+ &use_yv12, &use_i420, &usage, &threads, &profile,
+ &width, &height, &stereo_mode, &timebase, &framerate, &error_resilient,
+ &lag_in_frames, NULL
};
static const arg_def_t dropframe_thresh = ARG_DEF(NULL, "drop-frame", 1,
- "Temporal resampling threshold (buf %)");
+ "Temporal resampling threshold (buf %)");
static const arg_def_t resize_allowed = ARG_DEF(NULL, "resize-allowed", 1,
- "Spatial resampling enabled (bool)");
+ "Spatial resampling enabled (bool)");
static const arg_def_t resize_up_thresh = ARG_DEF(NULL, "resize-up", 1,
- "Upscale threshold (buf %)");
+ "Upscale threshold (buf %)");
static const arg_def_t resize_down_thresh = ARG_DEF(NULL, "resize-down", 1,
- "Downscale threshold (buf %)");
+ "Downscale threshold (buf %)");
static const struct arg_enum_list end_usage_enum[] = {
- {"vbr", VPX_VBR},
- {"cbr", VPX_CBR},
- {"cq", VPX_CQ},
- {NULL, 0}
+ {"vbr", VPX_VBR},
+ {"cbr", VPX_CBR},
+ {"cq", VPX_CQ},
+ {NULL, 0}
};
static const arg_def_t end_usage = ARG_DEF_ENUM(NULL, "end-usage", 1,
- "Rate control mode", end_usage_enum);
+ "Rate control mode", end_usage_enum);
static const arg_def_t target_bitrate = ARG_DEF(NULL, "target-bitrate", 1,
- "Bitrate (kbps)");
+ "Bitrate (kbps)");
static const arg_def_t min_quantizer = ARG_DEF(NULL, "min-q", 1,
- "Minimum (best) quantizer");
+ "Minimum (best) quantizer");
static const arg_def_t max_quantizer = ARG_DEF(NULL, "max-q", 1,
- "Maximum (worst) quantizer");
+ "Maximum (worst) quantizer");
static const arg_def_t undershoot_pct = ARG_DEF(NULL, "undershoot-pct", 1,
- "Datarate undershoot (min) target (%)");
+ "Datarate undershoot (min) target (%)");
static const arg_def_t overshoot_pct = ARG_DEF(NULL, "overshoot-pct", 1,
- "Datarate overshoot (max) target (%)");
+ "Datarate overshoot (max) target (%)");
static const arg_def_t buf_sz = ARG_DEF(NULL, "buf-sz", 1,
- "Client buffer size (ms)");
+ "Client buffer size (ms)");
static const arg_def_t buf_initial_sz = ARG_DEF(NULL, "buf-initial-sz", 1,
- "Client initial buffer size (ms)");
+ "Client initial buffer size (ms)");
static const arg_def_t buf_optimal_sz = ARG_DEF(NULL, "buf-optimal-sz", 1,
- "Client optimal buffer size (ms)");
-static const arg_def_t *rc_args[] =
-{
- &dropframe_thresh, &resize_allowed, &resize_up_thresh, &resize_down_thresh,
- &end_usage, &target_bitrate, &min_quantizer, &max_quantizer,
- &undershoot_pct, &overshoot_pct, &buf_sz, &buf_initial_sz, &buf_optimal_sz,
- NULL
+ "Client optimal buffer size (ms)");
+static const arg_def_t *rc_args[] = {
+ &dropframe_thresh, &resize_allowed, &resize_up_thresh, &resize_down_thresh,
+ &end_usage, &target_bitrate, &min_quantizer, &max_quantizer,
+ &undershoot_pct, &overshoot_pct, &buf_sz, &buf_initial_sz, &buf_optimal_sz,
+ NULL
};
static const arg_def_t bias_pct = ARG_DEF(NULL, "bias-pct", 1,
- "CBR/VBR bias (0=CBR, 100=VBR)");
+ "CBR/VBR bias (0=CBR, 100=VBR)");
static const arg_def_t minsection_pct = ARG_DEF(NULL, "minsection-pct", 1,
- "GOP min bitrate (% of target)");
+ "GOP min bitrate (% of target)");
static const arg_def_t maxsection_pct = ARG_DEF(NULL, "maxsection-pct", 1,
- "GOP max bitrate (% of target)");
-static const arg_def_t *rc_twopass_args[] =
-{
- &bias_pct, &minsection_pct, &maxsection_pct, NULL
+ "GOP max bitrate (% of target)");
+static const arg_def_t *rc_twopass_args[] = {
+ &bias_pct, &minsection_pct, &maxsection_pct, NULL
};
static const arg_def_t kf_min_dist = ARG_DEF(NULL, "kf-min-dist", 1,
- "Minimum keyframe interval (frames)");
+ "Minimum keyframe interval (frames)");
static const arg_def_t kf_max_dist = ARG_DEF(NULL, "kf-max-dist", 1,
- "Maximum keyframe interval (frames)");
+ "Maximum keyframe interval (frames)");
static const arg_def_t kf_disabled = ARG_DEF(NULL, "disable-kf", 0,
- "Disable keyframe placement");
-static const arg_def_t *kf_args[] =
-{
- &kf_min_dist, &kf_max_dist, &kf_disabled, NULL
+ "Disable keyframe placement");
+static const arg_def_t *kf_args[] = {
+ &kf_min_dist, &kf_max_dist, &kf_disabled, NULL
};
-#if CONFIG_VP8_ENCODER
static const arg_def_t noise_sens = ARG_DEF(NULL, "noise-sensitivity", 1,
- "Noise sensitivity (frames to blur)");
+ "Noise sensitivity (frames to blur)");
static const arg_def_t sharpness = ARG_DEF(NULL, "sharpness", 1,
- "Filter sharpness (0-7)");
+ "Filter sharpness (0-7)");
static const arg_def_t static_thresh = ARG_DEF(NULL, "static-thresh", 1,
- "Motion detection threshold");
-#endif
-
-#if CONFIG_VP8_ENCODER
+ "Motion detection threshold");
static const arg_def_t cpu_used = ARG_DEF(NULL, "cpu-used", 1,
- "CPU Used (-16..16)");
-#endif
-
-
-#if CONFIG_VP8_ENCODER
+ "CPU Used (-16..16)");
static const arg_def_t token_parts = ARG_DEF(NULL, "token-parts", 1,
- "Number of token partitions to use, log2");
+ "Number of token partitions to use, log2");
static const arg_def_t auto_altref = ARG_DEF(NULL, "auto-alt-ref", 1,
- "Enable automatic alt reference frames");
+ "Enable automatic alt reference frames");
static const arg_def_t arnr_maxframes = ARG_DEF(NULL, "arnr-maxframes", 1,
- "AltRef Max Frames");
+ "AltRef Max Frames");
static const arg_def_t arnr_strength = ARG_DEF(NULL, "arnr-strength", 1,
- "AltRef Strength");
+ "AltRef Strength");
static const arg_def_t arnr_type = ARG_DEF(NULL, "arnr-type", 1,
- "AltRef Type");
+ "AltRef Type");
static const struct arg_enum_list tuning_enum[] = {
- {"psnr", VP8_TUNE_PSNR},
- {"ssim", VP8_TUNE_SSIM},
- {NULL, 0}
+ {"psnr", VP8_TUNE_PSNR},
+ {"ssim", VP8_TUNE_SSIM},
+ {NULL, 0}
};
static const arg_def_t tune_ssim = ARG_DEF_ENUM(NULL, "tune", 1,
- "Material to favor", tuning_enum);
+ "Material to favor", tuning_enum);
static const arg_def_t cq_level = ARG_DEF(NULL, "cq-level", 1,
- "Constrained Quality Level");
+ "Constrained Quality Level");
static const arg_def_t max_intra_rate_pct = ARG_DEF(NULL, "max-intra-rate", 1,
- "Max I-frame bitrate (pct)");
+ "Max I-frame bitrate (pct)");
+#if CONFIG_LOSSLESS
+static const arg_def_t lossless = ARG_DEF(NULL, "lossless", 1, "Lossless mode");
+#endif
-static const arg_def_t *vp8_args[] =
-{
- &cpu_used, &auto_altref, &noise_sens, &sharpness, &static_thresh,
- &token_parts, &arnr_maxframes, &arnr_strength, &arnr_type,
- &tune_ssim, &cq_level, &max_intra_rate_pct, NULL
+#if CONFIG_VP8_ENCODER
+static const arg_def_t *vp8_args[] = {
+ &cpu_used, &auto_altref, &noise_sens, &sharpness, &static_thresh,
+ &token_parts, &arnr_maxframes, &arnr_strength, &arnr_type,
+ &tune_ssim, &cq_level, &max_intra_rate_pct,
+ NULL
};
-static const int vp8_arg_ctrl_map[] =
-{
- VP8E_SET_CPUUSED, VP8E_SET_ENABLEAUTOALTREF,
- VP8E_SET_NOISE_SENSITIVITY, VP8E_SET_SHARPNESS, VP8E_SET_STATIC_THRESHOLD,
- VP8E_SET_TOKEN_PARTITIONS,
- VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH , VP8E_SET_ARNR_TYPE,
- VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, VP8E_SET_MAX_INTRA_BITRATE_PCT, 0
+static const int vp8_arg_ctrl_map[] = {
+ VP8E_SET_CPUUSED, VP8E_SET_ENABLEAUTOALTREF,
+ VP8E_SET_NOISE_SENSITIVITY, VP8E_SET_SHARPNESS, VP8E_SET_STATIC_THRESHOLD,
+ VP8E_SET_TOKEN_PARTITIONS,
+ VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH, VP8E_SET_ARNR_TYPE,
+ VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, VP8E_SET_MAX_INTRA_BITRATE_PCT,
+ 0
+};
+#endif
+
+#if CONFIG_VP9_ENCODER
+static const arg_def_t *vp9_args[] = {
+ &cpu_used, &auto_altref, &noise_sens, &sharpness, &static_thresh,
+ &token_parts, &arnr_maxframes, &arnr_strength, &arnr_type,
+ &tune_ssim, &cq_level, &max_intra_rate_pct,
+#if CONFIG_LOSSLESS
+ &lossless,
+#endif
+ NULL
+};
+static const int vp9_arg_ctrl_map[] = {
+ VP8E_SET_CPUUSED, VP8E_SET_ENABLEAUTOALTREF,
+ VP8E_SET_NOISE_SENSITIVITY, VP8E_SET_SHARPNESS, VP8E_SET_STATIC_THRESHOLD,
+ VP8E_SET_TOKEN_PARTITIONS,
+ VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH, VP8E_SET_ARNR_TYPE,
+ VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, VP8E_SET_MAX_INTRA_BITRATE_PCT,
+#if CONFIG_LOSSLESS
+ VP9E_SET_LOSSLESS,
+#endif
+ 0
};
#endif
static const arg_def_t *no_args[] = { NULL };
-static void usage_exit()
-{
- int i;
+static void usage_exit() {
+ int i;
- fprintf(stderr, "Usage: %s <options> -o dst_filename src_filename \n",
- exec_name);
+ fprintf(stderr, "Usage: %s <options> -o dst_filename src_filename \n",
+ exec_name);
- fprintf(stderr, "\nOptions:\n");
- arg_show_usage(stdout, main_args);
- fprintf(stderr, "\nEncoder Global Options:\n");
- arg_show_usage(stdout, global_args);
- fprintf(stderr, "\nRate Control Options:\n");
- arg_show_usage(stdout, rc_args);
- fprintf(stderr, "\nTwopass Rate Control Options:\n");
- arg_show_usage(stdout, rc_twopass_args);
- fprintf(stderr, "\nKeyframe Placement Options:\n");
- arg_show_usage(stdout, kf_args);
+ fprintf(stderr, "\nOptions:\n");
+ arg_show_usage(stdout, main_args);
+ fprintf(stderr, "\nEncoder Global Options:\n");
+ arg_show_usage(stdout, global_args);
+ fprintf(stderr, "\nRate Control Options:\n");
+ arg_show_usage(stdout, rc_args);
+ fprintf(stderr, "\nTwopass Rate Control Options:\n");
+ arg_show_usage(stdout, rc_twopass_args);
+ fprintf(stderr, "\nKeyframe Placement Options:\n");
+ arg_show_usage(stdout, kf_args);
#if CONFIG_VP8_ENCODER
- fprintf(stderr, "\nVP8 Specific Options:\n");
- arg_show_usage(stdout, vp8_args);
+ fprintf(stderr, "\nVP8 Specific Options:\n");
+ arg_show_usage(stdout, vp8_args);
#endif
- fprintf(stderr, "\nStream timebase (--timebase):\n"
- " The desired precision of timestamps in the output, expressed\n"
- " in fractional seconds. Default is 1/1000.\n");
- fprintf(stderr, "\n"
- "Included encoders:\n"
- "\n");
+#if CONFIG_VP9_ENCODER
+ fprintf(stderr, "\nVP9 Specific Options:\n");
+ arg_show_usage(stdout, vp9_args);
+#endif
+ fprintf(stderr, "\nStream timebase (--timebase):\n"
+ " The desired precision of timestamps in the output, expressed\n"
+ " in fractional seconds. Default is 1/1000.\n");
+ fprintf(stderr, "\n"
+ "Included encoders:\n"
+ "\n");
- for (i = 0; i < sizeof(codecs) / sizeof(codecs[0]); i++)
- fprintf(stderr, " %-6s - %s\n",
- codecs[i].name,
- vpx_codec_iface_name(codecs[i].iface));
+ for (i = 0; i < sizeof(codecs) / sizeof(codecs[0]); i++)
+ fprintf(stderr, " %-6s - %s\n",
+ codecs[i].name,
+ vpx_codec_iface_name(codecs[i].iface()));
- exit(EXIT_FAILURE);
+ exit(EXIT_FAILURE);
}
#define HIST_BAR_MAX 40
-struct hist_bucket
-{
- int low, high, count;
+struct hist_bucket {
+ int low, high, count;
};
static int merge_hist_buckets(struct hist_bucket *bucket,
int *buckets_,
- int max_buckets)
-{
- int small_bucket = 0, merge_bucket = INT_MAX, big_bucket=0;
- int buckets = *buckets_;
- int i;
+ int max_buckets) {
+ int small_bucket = 0, merge_bucket = INT_MAX, big_bucket = 0;
+ int buckets = *buckets_;
+ int i;
- /* Find the extrema for this list of buckets */
- big_bucket = small_bucket = 0;
- for(i=0; i < buckets; i++)
- {
- if(bucket[i].count < bucket[small_bucket].count)
- small_bucket = i;
- if(bucket[i].count > bucket[big_bucket].count)
- big_bucket = i;
+ /* Find the extrema for this list of buckets */
+ big_bucket = small_bucket = 0;
+ for (i = 0; i < buckets; i++) {
+ if (bucket[i].count < bucket[small_bucket].count)
+ small_bucket = i;
+ if (bucket[i].count > bucket[big_bucket].count)
+ big_bucket = i;
+ }
+
+ /* If we have too many buckets, merge the smallest with an adjacent
+ * bucket.
+ */
+ while (buckets > max_buckets) {
+ int last_bucket = buckets - 1;
+
+ /* merge the small bucket with an adjacent one. */
+ if (small_bucket == 0)
+ merge_bucket = 1;
+ else if (small_bucket == last_bucket)
+ merge_bucket = last_bucket - 1;
+ else if (bucket[small_bucket - 1].count < bucket[small_bucket + 1].count)
+ merge_bucket = small_bucket - 1;
+ else
+ merge_bucket = small_bucket + 1;
+
+ assert(abs(merge_bucket - small_bucket) <= 1);
+ assert(small_bucket < buckets);
+ assert(big_bucket < buckets);
+ assert(merge_bucket < buckets);
+
+ if (merge_bucket < small_bucket) {
+ bucket[merge_bucket].high = bucket[small_bucket].high;
+ bucket[merge_bucket].count += bucket[small_bucket].count;
+ } else {
+ bucket[small_bucket].high = bucket[merge_bucket].high;
+ bucket[small_bucket].count += bucket[merge_bucket].count;
+ merge_bucket = small_bucket;
}
- /* If we have too many buckets, merge the smallest with an adjacent
- * bucket.
+ assert(bucket[merge_bucket].low != bucket[merge_bucket].high);
+
+ buckets--;
+
+ /* Remove the merge_bucket from the list, and find the new small
+ * and big buckets while we're at it
*/
- while(buckets > max_buckets)
- {
- int last_bucket = buckets - 1;
+ big_bucket = small_bucket = 0;
+ for (i = 0; i < buckets; i++) {
+ if (i > merge_bucket)
+ bucket[i] = bucket[i + 1];
- /* merge the small bucket with an adjacent one. */
- if(small_bucket == 0)
- merge_bucket = 1;
- else if(small_bucket == last_bucket)
- merge_bucket = last_bucket - 1;
- else if(bucket[small_bucket - 1].count < bucket[small_bucket + 1].count)
- merge_bucket = small_bucket - 1;
- else
- merge_bucket = small_bucket + 1;
-
- assert(abs(merge_bucket - small_bucket) <= 1);
- assert(small_bucket < buckets);
- assert(big_bucket < buckets);
- assert(merge_bucket < buckets);
-
- if(merge_bucket < small_bucket)
- {
- bucket[merge_bucket].high = bucket[small_bucket].high;
- bucket[merge_bucket].count += bucket[small_bucket].count;
- }
- else
- {
- bucket[small_bucket].high = bucket[merge_bucket].high;
- bucket[small_bucket].count += bucket[merge_bucket].count;
- merge_bucket = small_bucket;
- }
-
- assert(bucket[merge_bucket].low != bucket[merge_bucket].high);
-
- buckets--;
-
- /* Remove the merge_bucket from the list, and find the new small
- * and big buckets while we're at it
- */
- big_bucket = small_bucket = 0;
- for(i=0; i < buckets; i++)
- {
- if(i > merge_bucket)
- bucket[i] = bucket[i+1];
-
- if(bucket[i].count < bucket[small_bucket].count)
- small_bucket = i;
- if(bucket[i].count > bucket[big_bucket].count)
- big_bucket = i;
- }
-
+ if (bucket[i].count < bucket[small_bucket].count)
+ small_bucket = i;
+ if (bucket[i].count > bucket[big_bucket].count)
+ big_bucket = i;
}
- *buckets_ = buckets;
- return bucket[big_bucket].count;
+ }
+
+ *buckets_ = buckets;
+ return bucket[big_bucket].count;
}
static void show_histogram(const struct hist_bucket *bucket,
int buckets,
int total,
- int scale)
-{
- const char *pat1, *pat2;
- int i;
+ int scale) {
+ const char *pat1, *pat2;
+ int i;
- switch((int)(log(bucket[buckets-1].high)/log(10))+1)
- {
- case 1:
- case 2:
- pat1 = "%4d %2s: ";
- pat2 = "%4d-%2d: ";
- break;
- case 3:
- pat1 = "%5d %3s: ";
- pat2 = "%5d-%3d: ";
- break;
- case 4:
- pat1 = "%6d %4s: ";
- pat2 = "%6d-%4d: ";
- break;
- case 5:
- pat1 = "%7d %5s: ";
- pat2 = "%7d-%5d: ";
- break;
- case 6:
- pat1 = "%8d %6s: ";
- pat2 = "%8d-%6d: ";
- break;
- case 7:
- pat1 = "%9d %7s: ";
- pat2 = "%9d-%7d: ";
- break;
- default:
- pat1 = "%12d %10s: ";
- pat2 = "%12d-%10d: ";
- break;
- }
+ switch ((int)(log(bucket[buckets - 1].high) / log(10)) + 1) {
+ case 1:
+ case 2:
+ pat1 = "%4d %2s: ";
+ pat2 = "%4d-%2d: ";
+ break;
+ case 3:
+ pat1 = "%5d %3s: ";
+ pat2 = "%5d-%3d: ";
+ break;
+ case 4:
+ pat1 = "%6d %4s: ";
+ pat2 = "%6d-%4d: ";
+ break;
+ case 5:
+ pat1 = "%7d %5s: ";
+ pat2 = "%7d-%5d: ";
+ break;
+ case 6:
+ pat1 = "%8d %6s: ";
+ pat2 = "%8d-%6d: ";
+ break;
+ case 7:
+ pat1 = "%9d %7s: ";
+ pat2 = "%9d-%7d: ";
+ break;
+ default:
+ pat1 = "%12d %10s: ";
+ pat2 = "%12d-%10d: ";
+ break;
+ }
- for(i=0; i<buckets; i++)
- {
- int len;
- int j;
- float pct;
+ for (i = 0; i < buckets; i++) {
+ int len;
+ int j;
+ float pct;
- pct = (float)(100.0 * bucket[i].count / total);
- len = HIST_BAR_MAX * bucket[i].count / scale;
- if(len < 1)
- len = 1;
- assert(len <= HIST_BAR_MAX);
+ pct = (float)(100.0 * bucket[i].count / total);
+ len = HIST_BAR_MAX * bucket[i].count / scale;
+ if (len < 1)
+ len = 1;
+ assert(len <= HIST_BAR_MAX);
- if(bucket[i].low == bucket[i].high)
- fprintf(stderr, pat1, bucket[i].low, "");
- else
- fprintf(stderr, pat2, bucket[i].low, bucket[i].high);
+ if (bucket[i].low == bucket[i].high)
+ fprintf(stderr, pat1, bucket[i].low, "");
+ else
+ fprintf(stderr, pat2, bucket[i].low, bucket[i].high);
- for(j=0; j<HIST_BAR_MAX; j++)
- fprintf(stderr, j<len?"=":" ");
- fprintf(stderr, "\t%5d (%6.2f%%)\n",bucket[i].count,pct);
- }
+ for (j = 0; j < HIST_BAR_MAX; j++)
+ fprintf(stderr, j < len ? "=" : " ");
+ fprintf(stderr, "\t%5d (%6.2f%%)\n", bucket[i].count, pct);
+ }
}
-static void show_q_histogram(const int counts[64], int max_buckets)
-{
- struct hist_bucket bucket[64];
- int buckets = 0;
- int total = 0;
- int scale;
- int i;
+static void show_q_histogram(const int counts[64], int max_buckets) {
+ struct hist_bucket bucket[64];
+ int buckets = 0;
+ int total = 0;
+ int scale;
+ int i;
- for(i=0; i<64; i++)
- {
- if(counts[i])
- {
- bucket[buckets].low = bucket[buckets].high = i;
- bucket[buckets].count = counts[i];
- buckets++;
- total += counts[i];
- }
+ for (i = 0; i < 64; i++) {
+ if (counts[i]) {
+ bucket[buckets].low = bucket[buckets].high = i;
+ bucket[buckets].count = counts[i];
+ buckets++;
+ total += counts[i];
}
+ }
- fprintf(stderr, "\nQuantizer Selection:\n");
- scale = merge_hist_buckets(bucket, &buckets, max_buckets);
- show_histogram(bucket, buckets, total, scale);
+ fprintf(stderr, "\nQuantizer Selection:\n");
+ scale = merge_hist_buckets(bucket, &buckets, max_buckets);
+ show_histogram(bucket, buckets, total, scale);
}
#define RATE_BINS (100)
-struct rate_hist
-{
- int64_t *pts;
- int *sz;
- int samples;
- int frames;
- struct hist_bucket bucket[RATE_BINS];
- int total;
+struct rate_hist {
+ int64_t *pts;
+ int *sz;
+ int samples;
+ int frames;
+ struct hist_bucket bucket[RATE_BINS];
+ int total;
};
static void init_rate_histogram(struct rate_hist *hist,
const vpx_codec_enc_cfg_t *cfg,
- const vpx_rational_t *fps)
-{
- int i;
+ const vpx_rational_t *fps) {
+ int i;
- /* Determine the number of samples in the buffer. Use the file's framerate
- * to determine the number of frames in rc_buf_sz milliseconds, with an
- * adjustment (5/4) to account for alt-refs
- */
- hist->samples = cfg->rc_buf_sz * 5 / 4 * fps->num / fps->den / 1000;
+ /* Determine the number of samples in the buffer. Use the file's framerate
+ * to determine the number of frames in rc_buf_sz milliseconds, with an
+ * adjustment (5/4) to account for alt-refs
+ */
+ hist->samples = cfg->rc_buf_sz * 5 / 4 * fps->num / fps->den / 1000;
- /* prevent division by zero */
- if (hist->samples == 0)
- hist->samples=1;
+ /* prevent division by zero */
+ if (hist->samples == 0)
+ hist->samples = 1;
- hist->pts = calloc(hist->samples, sizeof(*hist->pts));
- hist->sz = calloc(hist->samples, sizeof(*hist->sz));
- for(i=0; i<RATE_BINS; i++)
- {
- hist->bucket[i].low = INT_MAX;
- hist->bucket[i].high = 0;
- hist->bucket[i].count = 0;
- }
+ hist->pts = calloc(hist->samples, sizeof(*hist->pts));
+ hist->sz = calloc(hist->samples, sizeof(*hist->sz));
+ for (i = 0; i < RATE_BINS; i++) {
+ hist->bucket[i].low = INT_MAX;
+ hist->bucket[i].high = 0;
+ hist->bucket[i].count = 0;
+ }
}
-static void destroy_rate_histogram(struct rate_hist *hist)
-{
- free(hist->pts);
- free(hist->sz);
+static void destroy_rate_histogram(struct rate_hist *hist) {
+ free(hist->pts);
+ free(hist->sz);
}
static void update_rate_histogram(struct rate_hist *hist,
const vpx_codec_enc_cfg_t *cfg,
- const vpx_codec_cx_pkt_t *pkt)
-{
- int i, idx;
- int64_t now, then, sum_sz = 0, avg_bitrate;
+ const vpx_codec_cx_pkt_t *pkt) {
+ int i, idx;
+ int64_t now, then, sum_sz = 0, avg_bitrate;
- now = pkt->data.frame.pts * 1000
- * (uint64_t)cfg->g_timebase.num / (uint64_t)cfg->g_timebase.den;
+ now = pkt->data.frame.pts * 1000
+ * (uint64_t)cfg->g_timebase.num / (uint64_t)cfg->g_timebase.den;
- idx = hist->frames++ % hist->samples;
- hist->pts[idx] = now;
- hist->sz[idx] = (int)pkt->data.frame.sz;
+ idx = hist->frames++ % hist->samples;
+ hist->pts[idx] = now;
+ hist->sz[idx] = (int)pkt->data.frame.sz;
- if(now < cfg->rc_buf_initial_sz)
- return;
+ if (now < cfg->rc_buf_initial_sz)
+ return;
- then = now;
+ then = now;
- /* Sum the size over the past rc_buf_sz ms */
- for(i = hist->frames; i > 0 && hist->frames - i < hist->samples; i--)
- {
- int i_idx = (i-1) % hist->samples;
+ /* Sum the size over the past rc_buf_sz ms */
+ for (i = hist->frames; i > 0 && hist->frames - i < hist->samples; i--) {
+ int i_idx = (i - 1) % hist->samples;
- then = hist->pts[i_idx];
- if(now - then > cfg->rc_buf_sz)
- break;
- sum_sz += hist->sz[i_idx];
- }
+ then = hist->pts[i_idx];
+ if (now - then > cfg->rc_buf_sz)
+ break;
+ sum_sz += hist->sz[i_idx];
+ }
- if (now == then)
- return;
+ if (now == then)
+ return;
- avg_bitrate = sum_sz * 8 * 1000 / (now - then);
- idx = (int)(avg_bitrate * (RATE_BINS/2) / (cfg->rc_target_bitrate * 1000));
- if(idx < 0)
- idx = 0;
- if(idx > RATE_BINS-1)
- idx = RATE_BINS-1;
- if(hist->bucket[idx].low > avg_bitrate)
- hist->bucket[idx].low = (int)avg_bitrate;
- if(hist->bucket[idx].high < avg_bitrate)
- hist->bucket[idx].high = (int)avg_bitrate;
- hist->bucket[idx].count++;
- hist->total++;
+ avg_bitrate = sum_sz * 8 * 1000 / (now - then);
+ idx = (int)(avg_bitrate * (RATE_BINS / 2) / (cfg->rc_target_bitrate * 1000));
+ if (idx < 0)
+ idx = 0;
+ if (idx > RATE_BINS - 1)
+ idx = RATE_BINS - 1;
+ if (hist->bucket[idx].low > avg_bitrate)
+ hist->bucket[idx].low = (int)avg_bitrate;
+ if (hist->bucket[idx].high < avg_bitrate)
+ hist->bucket[idx].high = (int)avg_bitrate;
+ hist->bucket[idx].count++;
+ hist->total++;
}
static void show_rate_histogram(struct rate_hist *hist,
const vpx_codec_enc_cfg_t *cfg,
- int max_buckets)
-{
- int i, scale;
- int buckets = 0;
+ int max_buckets) {
+ int i, scale;
+ int buckets = 0;
- for(i = 0; i < RATE_BINS; i++)
- {
- if(hist->bucket[i].low == INT_MAX)
- continue;
- hist->bucket[buckets++] = hist->bucket[i];
- }
+ for (i = 0; i < RATE_BINS; i++) {
+ if (hist->bucket[i].low == INT_MAX)
+ continue;
+ hist->bucket[buckets++] = hist->bucket[i];
+ }
- fprintf(stderr, "\nRate (over %dms window):\n", cfg->rc_buf_sz);
- scale = merge_hist_buckets(hist->bucket, &buckets, max_buckets);
- show_histogram(hist->bucket, buckets, hist->total, scale);
+ fprintf(stderr, "\nRate (over %dms window):\n", cfg->rc_buf_sz);
+ scale = merge_hist_buckets(hist->bucket, &buckets, max_buckets);
+ show_histogram(hist->bucket, buckets, hist->total, scale);
}
-#define NELEMENTS(x) (sizeof(x)/sizeof(x[0]))
-#define ARG_CTRL_CNT_MAX NELEMENTS(vp8_arg_ctrl_map)
+#define mmin(a, b) ((a) < (b) ? (a) : (b))
+static void find_mismatch(vpx_image_t *img1, vpx_image_t *img2,
+ int yloc[2], int uloc[2], int vloc[2]) {
+ int match = 1;
+ int i, j;
+ yloc[0] = yloc[1] = -1;
+ for (i = 0, match = 1; match && i < img1->d_h; i+=32) {
+ for (j = 0; match && j < img1->d_w; j+=32) {
+ int k, l;
+ int si = mmin(i + 32, img1->d_h) - i;
+ int sj = mmin(j + 32, img1->d_w) - j;
+ for (k = 0; match && k < si; k++)
+ for (l = 0; match && l < sj; l++) {
+ if (*(img1->planes[VPX_PLANE_Y] +
+ (i + k) * img1->stride[VPX_PLANE_Y] + j + l) !=
+ *(img2->planes[VPX_PLANE_Y] +
+ (i + k) * img2->stride[VPX_PLANE_Y] + j + l)) {
+ yloc[0] = i + k;
+ yloc[1] = j + l;
+ match = 0;
+ break;
+ }
+ }
+ }
+ }
+ uloc[0] = uloc[1] = -1;
+ for (i = 0, match = 1; match && i < (img1->d_h + 1) / 2; i+=16) {
+ for (j = 0; j < match && (img1->d_w + 1) / 2; j+=16) {
+ int k, l;
+ int si = mmin(i + 16, (img1->d_h + 1) / 2) - i;
+ int sj = mmin(j + 16, (img1->d_w + 1) / 2) - j;
+ for (k = 0; match && k < si; k++)
+ for (l = 0; match && l < sj; l++) {
+ if (*(img1->planes[VPX_PLANE_U] +
+ (i + k) * img1->stride[VPX_PLANE_U] + j + l) !=
+ *(img2->planes[VPX_PLANE_U] +
+ (i + k) * img2->stride[VPX_PLANE_U] + j + l)) {
+ uloc[0] = i + k;
+ uloc[1] = j + l;
+ match = 0;
+ break;
+ }
+ }
+ }
+ }
+ vloc[0] = vloc[1] = -1;
+ for (i = 0, match = 1; match && i < (img1->d_h + 1) / 2; i+=16) {
+ for (j = 0; j < match && (img1->d_w + 1) / 2; j+=16) {
+ int k, l;
+ int si = mmin(i + 16, (img1->d_h + 1) / 2) - i;
+ int sj = mmin(j + 16, (img1->d_w + 1) / 2) - j;
+ for (k = 0; match && k < si; k++)
+ for (l = 0; match && l < sj; l++) {
+ if (*(img1->planes[VPX_PLANE_V] +
+ (i + k) * img1->stride[VPX_PLANE_V] + j + l) !=
+ *(img2->planes[VPX_PLANE_V] +
+ (i + k) * img2->stride[VPX_PLANE_V] + j + l)) {
+ vloc[0] = i + k;
+ vloc[1] = j + l;
+ match = 0;
+ break;
+ }
+ }
+ }
+ }
+}
+static int compare_img(vpx_image_t *img1, vpx_image_t *img2)
+{
+ int match = 1;
+ int i;
+
+ match &= (img1->fmt == img2->fmt);
+ match &= (img1->w == img2->w);
+ match &= (img1->h == img2->h);
+
+ for (i = 0; i < img1->d_h; i++)
+ match &= (memcmp(img1->planes[VPX_PLANE_Y]+i*img1->stride[VPX_PLANE_Y],
+ img2->planes[VPX_PLANE_Y]+i*img2->stride[VPX_PLANE_Y],
+ img1->d_w) == 0);
+
+ for (i = 0; i < img1->d_h/2; i++)
+ match &= (memcmp(img1->planes[VPX_PLANE_U]+i*img1->stride[VPX_PLANE_U],
+ img2->planes[VPX_PLANE_U]+i*img2->stride[VPX_PLANE_U],
+ (img1->d_w + 1) / 2) == 0);
+
+ for (i = 0; i < img1->d_h/2; i++)
+ match &= (memcmp(img1->planes[VPX_PLANE_V]+i*img1->stride[VPX_PLANE_U],
+ img2->planes[VPX_PLANE_V]+i*img2->stride[VPX_PLANE_U],
+ (img1->d_w + 1) / 2) == 0);
+
+ return match;
+}
+
+
+#define NELEMENTS(x) (sizeof(x)/sizeof(x[0]))
+#define MAX(x,y) ((x)>(y)?(x):(y))
+#if CONFIG_VP8_ENCODER && !CONFIG_VP9_ENCODER
+#define ARG_CTRL_CNT_MAX NELEMENTS(vp8_arg_ctrl_map)
+#elif !CONFIG_VP8_ENCODER && CONFIG_VP9_ENCODER
+#define ARG_CTRL_CNT_MAX NELEMENTS(vp9_arg_ctrl_map)
+#else
+#define ARG_CTRL_CNT_MAX MAX(NELEMENTS(vp8_arg_ctrl_map), \
+ NELEMENTS(vp9_arg_ctrl_map))
+#endif
/* Configuration elements common to all streams */
-struct global_config
-{
- const struct codec_item *codec;
- int passes;
- int pass;
- int usage;
- int deadline;
- int use_i420;
- int quiet;
- int verbose;
- int limit;
- int show_psnr;
- int have_framerate;
- struct vpx_rational framerate;
- int out_part;
- int debug;
- int show_q_hist_buckets;
- int show_rate_hist_buckets;
+struct global_config {
+ const struct codec_item *codec;
+ int passes;
+ int pass;
+ int usage;
+ int deadline;
+ int use_i420;
+ int quiet;
+ int verbose;
+ int limit;
+ int skip_frames;
+ int show_psnr;
+ int test_decode;
+ int have_framerate;
+ struct vpx_rational framerate;
+ int out_part;
+ int debug;
+ int show_q_hist_buckets;
+ int show_rate_hist_buckets;
};
/* Per-stream configuration */
-struct stream_config
-{
- struct vpx_codec_enc_cfg cfg;
- const char *out_fn;
- const char *stats_fn;
- stereo_format_t stereo_fmt;
- int arg_ctrls[ARG_CTRL_CNT_MAX][2];
- int arg_ctrl_cnt;
- int write_webm;
- int have_kf_max_dist;
+struct stream_config {
+ struct vpx_codec_enc_cfg cfg;
+ const char *out_fn;
+ const char *stats_fn;
+ stereo_format_t stereo_fmt;
+ int arg_ctrls[ARG_CTRL_CNT_MAX][2];
+ int arg_ctrl_cnt;
+ int write_webm;
+ int have_kf_max_dist;
};
-struct stream_state
-{
- int index;
- struct stream_state *next;
- struct stream_config config;
- FILE *file;
- struct rate_hist rate_hist;
- EbmlGlobal ebml;
- uint32_t hash;
- uint64_t psnr_sse_total;
- uint64_t psnr_samples_total;
- double psnr_totals[4];
- int psnr_count;
- int counts[64];
- vpx_codec_ctx_t encoder;
- unsigned int frames_out;
- uint64_t cx_time;
- size_t nbytes;
- stats_io_t stats;
+struct stream_state {
+ int index;
+ struct stream_state *next;
+ struct stream_config config;
+ FILE *file;
+ struct rate_hist rate_hist;
+ EbmlGlobal ebml;
+ uint32_t hash;
+ uint64_t psnr_sse_total;
+ uint64_t psnr_samples_total;
+ double psnr_totals[4];
+ int psnr_count;
+ int counts[64];
+ vpx_codec_ctx_t encoder;
+ unsigned int frames_out;
+ uint64_t cx_time;
+ size_t nbytes;
+ stats_io_t stats;
+ vpx_codec_ctx_t decoder;
+ vpx_ref_frame_t ref_enc;
+ vpx_ref_frame_t ref_dec;
+ int mismatch_seen;
};
void validate_positive_rational(const char *msg,
- struct vpx_rational *rat)
-{
- if (rat->den < 0)
- {
- rat->num *= -1;
- rat->den *= -1;
- }
+ struct vpx_rational *rat) {
+ if (rat->den < 0) {
+ rat->num *= -1;
+ rat->den *= -1;
+ }
- if (rat->num < 0)
- die("Error: %s must be positive\n", msg);
+ if (rat->num < 0)
+ die("Error: %s must be positive\n", msg);
- if (!rat->den)
- die("Error: %s has zero denominator\n", msg);
+ if (!rat->den)
+ die("Error: %s has zero denominator\n", msg);
}
-static void parse_global_config(struct global_config *global, char **argv)
-{
- char **argi, **argj;
- struct arg arg;
+static void parse_global_config(struct global_config *global, char **argv) {
+ char **argi, **argj;
+ struct arg arg;
- /* Initialize default parameters */
- memset(global, 0, sizeof(*global));
- global->codec = codecs;
- global->passes = 1;
- global->use_i420 = 1;
+ /* Initialize default parameters */
+ memset(global, 0, sizeof(*global));
+ global->codec = codecs;
+ global->passes = 1;
+ global->use_i420 = 1;
- for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step)
- {
- arg.argv_step = 1;
+ for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
+ arg.argv_step = 1;
- if (arg_match(&arg, &codecarg, argi))
- {
- int j, k = -1;
+ if (arg_match(&arg, &codecarg, argi)) {
+ int j, k = -1;
- for (j = 0; j < sizeof(codecs) / sizeof(codecs[0]); j++)
- if (!strcmp(codecs[j].name, arg.val))
- k = j;
+ for (j = 0; j < sizeof(codecs) / sizeof(codecs[0]); j++)
+ if (!strcmp(codecs[j].name, arg.val))
+ k = j;
- if (k >= 0)
- global->codec = codecs + k;
- else
- die("Error: Unrecognized argument (%s) to --codec\n",
- arg.val);
+ if (k >= 0)
+ global->codec = codecs + k;
+ else
+ die("Error: Unrecognized argument (%s) to --codec\n",
+ arg.val);
- }
- else if (arg_match(&arg, &passes, argi))
- {
- global->passes = arg_parse_uint(&arg);
+ } else if (arg_match(&arg, &passes, argi)) {
+ global->passes = arg_parse_uint(&arg);
- if (global->passes < 1 || global->passes > 2)
- die("Error: Invalid number of passes (%d)\n", global->passes);
- }
- else if (arg_match(&arg, &pass_arg, argi))
- {
- global->pass = arg_parse_uint(&arg);
+ if (global->passes < 1 || global->passes > 2)
+ die("Error: Invalid number of passes (%d)\n", global->passes);
+ } else if (arg_match(&arg, &pass_arg, argi)) {
+ global->pass = arg_parse_uint(&arg);
- if (global->pass < 1 || global->pass > 2)
- die("Error: Invalid pass selected (%d)\n",
- global->pass);
- }
- else if (arg_match(&arg, &usage, argi))
- global->usage = arg_parse_uint(&arg);
- else if (arg_match(&arg, &deadline, argi))
- global->deadline = arg_parse_uint(&arg);
- else if (arg_match(&arg, &best_dl, argi))
- global->deadline = VPX_DL_BEST_QUALITY;
- else if (arg_match(&arg, &good_dl, argi))
- global->deadline = VPX_DL_GOOD_QUALITY;
- else if (arg_match(&arg, &rt_dl, argi))
- global->deadline = VPX_DL_REALTIME;
- else if (arg_match(&arg, &use_yv12, argi))
- global->use_i420 = 0;
- else if (arg_match(&arg, &use_i420, argi))
- global->use_i420 = 1;
- else if (arg_match(&arg, &quietarg, argi))
- global->quiet = 1;
- else if (arg_match(&arg, &verbosearg, argi))
- global->verbose = 1;
- else if (arg_match(&arg, &limit, argi))
- global->limit = arg_parse_uint(&arg);
- else if (arg_match(&arg, &psnrarg, argi))
- global->show_psnr = 1;
- else if (arg_match(&arg, &framerate, argi))
- {
- global->framerate = arg_parse_rational(&arg);
- validate_positive_rational(arg.name, &global->framerate);
- global->have_framerate = 1;
- }
- else if (arg_match(&arg,&out_part, argi))
- global->out_part = 1;
- else if (arg_match(&arg, &debugmode, argi))
- global->debug = 1;
- else if (arg_match(&arg, &q_hist_n, argi))
- global->show_q_hist_buckets = arg_parse_uint(&arg);
- else if (arg_match(&arg, &rate_hist_n, argi))
- global->show_rate_hist_buckets = arg_parse_uint(&arg);
- else
- argj++;
- }
-
- /* Validate global config */
-
- if (global->pass)
- {
- /* DWIM: Assume the user meant passes=2 if pass=2 is specified */
- if (global->pass > global->passes)
- {
- warn("Assuming --pass=%d implies --passes=%d\n",
- global->pass, global->pass);
- global->passes = global->pass;
- }
- }
-}
-
-
-void open_input_file(struct input_state *input)
-{
- unsigned int fourcc;
-
- /* Parse certain options from the input file, if possible */
- input->file = strcmp(input->fn, "-") ? fopen(input->fn, "rb")
- : set_binary_mode(stdin);
-
- if (!input->file)
- fatal("Failed to open input file");
-
- /* For RAW input sources, these bytes will applied on the first frame
- * in read_frame().
- */
- input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file);
- input->detect.position = 0;
-
- if (input->detect.buf_read == 4
- && file_is_y4m(input->file, &input->y4m, input->detect.buf))
- {
- if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4) >= 0)
- {
- input->file_type = FILE_TYPE_Y4M;
- input->w = input->y4m.pic_w;
- input->h = input->y4m.pic_h;
- input->framerate.num = input->y4m.fps_n;
- input->framerate.den = input->y4m.fps_d;
- input->use_i420 = 0;
- }
- else
- fatal("Unsupported Y4M stream.");
- }
- else if (input->detect.buf_read == 4 && file_is_ivf(input, &fourcc))
- {
- input->file_type = FILE_TYPE_IVF;
- switch (fourcc)
- {
- case 0x32315659:
- input->use_i420 = 0;
- break;
- case 0x30323449:
- input->use_i420 = 1;
- break;
- default:
- fatal("Unsupported fourcc (%08x) in IVF", fourcc);
- }
- }
+ if (global->pass < 1 || global->pass > 2)
+ die("Error: Invalid pass selected (%d)\n",
+ global->pass);
+ } else if (arg_match(&arg, &usage, argi))
+ global->usage = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &deadline, argi))
+ global->deadline = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &best_dl, argi))
+ global->deadline = VPX_DL_BEST_QUALITY;
+ else if (arg_match(&arg, &good_dl, argi))
+ global->deadline = VPX_DL_GOOD_QUALITY;
+ else if (arg_match(&arg, &rt_dl, argi))
+ global->deadline = VPX_DL_REALTIME;
+ else if (arg_match(&arg, &use_yv12, argi))
+ global->use_i420 = 0;
+ else if (arg_match(&arg, &use_i420, argi))
+ global->use_i420 = 1;
+ else if (arg_match(&arg, &quietarg, argi))
+ global->quiet = 1;
+ else if (arg_match(&arg, &verbosearg, argi))
+ global->verbose = 1;
+ else if (arg_match(&arg, &limit, argi))
+ global->limit = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &skip, argi))
+ global->skip_frames = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &psnrarg, argi))
+ global->show_psnr = 1;
+ else if (arg_match(&arg, &recontest, argi))
+ global->test_decode = 1;
+ else if (arg_match(&arg, &framerate, argi)) {
+ global->framerate = arg_parse_rational(&arg);
+ validate_positive_rational(arg.name, &global->framerate);
+ global->have_framerate = 1;
+ } else if (arg_match(&arg, &out_part, argi))
+ global->out_part = 1;
+ else if (arg_match(&arg, &debugmode, argi))
+ global->debug = 1;
+ else if (arg_match(&arg, &q_hist_n, argi))
+ global->show_q_hist_buckets = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &rate_hist_n, argi))
+ global->show_rate_hist_buckets = arg_parse_uint(&arg);
else
- {
- input->file_type = FILE_TYPE_RAW;
+ argj++;
+ }
+
+ /* Validate global config */
+
+ if (global->pass) {
+ /* DWIM: Assume the user meant passes=2 if pass=2 is specified */
+ if (global->pass > global->passes) {
+ warn("Assuming --pass=%d implies --passes=%d\n",
+ global->pass, global->pass);
+ global->passes = global->pass;
}
+ }
}
-static void close_input_file(struct input_state *input)
-{
- fclose(input->file);
- if (input->file_type == FILE_TYPE_Y4M)
- y4m_input_close(&input->y4m);
+void open_input_file(struct input_state *input) {
+ unsigned int fourcc;
+
+ /* Parse certain options from the input file, if possible */
+ input->file = strcmp(input->fn, "-") ? fopen(input->fn, "rb")
+ : set_binary_mode(stdin);
+
+ if (!input->file)
+ fatal("Failed to open input file");
+
+ /* For RAW input sources, these bytes will applied on the first frame
+ * in read_frame().
+ */
+ input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file);
+ input->detect.position = 0;
+
+ if (input->detect.buf_read == 4
+ && file_is_y4m(input->file, &input->y4m, input->detect.buf)) {
+ if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4) >= 0) {
+ input->file_type = FILE_TYPE_Y4M;
+ input->w = input->y4m.pic_w;
+ input->h = input->y4m.pic_h;
+ input->framerate.num = input->y4m.fps_n;
+ input->framerate.den = input->y4m.fps_d;
+ input->use_i420 = 0;
+ } else
+ fatal("Unsupported Y4M stream.");
+ } else if (input->detect.buf_read == 4 && file_is_ivf(input, &fourcc)) {
+ input->file_type = FILE_TYPE_IVF;
+ switch (fourcc) {
+ case 0x32315659:
+ input->use_i420 = 0;
+ break;
+ case 0x30323449:
+ input->use_i420 = 1;
+ break;
+ default:
+ fatal("Unsupported fourcc (%08x) in IVF", fourcc);
+ }
+ } else {
+ input->file_type = FILE_TYPE_RAW;
+ }
+}
+
+
+static void close_input_file(struct input_state *input) {
+ fclose(input->file);
+ if (input->file_type == FILE_TYPE_Y4M)
+ y4m_input_close(&input->y4m);
}
static struct stream_state *new_stream(struct global_config *global,
- struct stream_state *prev)
-{
- struct stream_state *stream;
+ struct stream_state *prev) {
+ struct stream_state *stream;
- stream = calloc(1, sizeof(*stream));
- if(!stream)
- fatal("Failed to allocate new stream.");
- if(prev)
- {
- memcpy(stream, prev, sizeof(*stream));
- stream->index++;
- prev->next = stream;
- }
- else
- {
- vpx_codec_err_t res;
+ stream = calloc(1, sizeof(*stream));
+ if (!stream)
+ fatal("Failed to allocate new stream.");
+ if (prev) {
+ memcpy(stream, prev, sizeof(*stream));
+ stream->index++;
+ prev->next = stream;
+ } else {
+ vpx_codec_err_t res;
- /* Populate encoder configuration */
- res = vpx_codec_enc_config_default(global->codec->iface,
- &stream->config.cfg,
- global->usage);
- if (res)
- fatal("Failed to get config: %s\n", vpx_codec_err_to_string(res));
+ /* Populate encoder configuration */
+ res = vpx_codec_enc_config_default(global->codec->iface(),
+ &stream->config.cfg,
+ global->usage);
+ if (res)
+ fatal("Failed to get config: %s\n", vpx_codec_err_to_string(res));
- /* Change the default timebase to a high enough value so that the
- * encoder will always create strictly increasing timestamps.
- */
- stream->config.cfg.g_timebase.den = 1000;
+ /* Change the default timebase to a high enough value so that the
+ * encoder will always create strictly increasing timestamps.
+ */
+ stream->config.cfg.g_timebase.den = 1000;
- /* Never use the library's default resolution, require it be parsed
- * from the file or set on the command line.
- */
- stream->config.cfg.g_w = 0;
- stream->config.cfg.g_h = 0;
+ /* Never use the library's default resolution, require it be parsed
+ * from the file or set on the command line.
+ */
+ stream->config.cfg.g_w = 0;
+ stream->config.cfg.g_h = 0;
- /* Initialize remaining stream parameters */
- stream->config.stereo_fmt = STEREO_FORMAT_MONO;
- stream->config.write_webm = 1;
- stream->ebml.last_pts_ms = -1;
+ /* Initialize remaining stream parameters */
+ stream->config.stereo_fmt = STEREO_FORMAT_MONO;
+ stream->config.write_webm = 1;
+ stream->ebml.last_pts_ms = -1;
- /* Allows removal of the application version from the EBML tags */
- stream->ebml.debug = global->debug;
- }
+ /* Allows removal of the application version from the EBML tags */
+ stream->ebml.debug = global->debug;
+ }
- /* Output files must be specified for each stream */
- stream->config.out_fn = NULL;
+ /* Output files must be specified for each stream */
+ stream->config.out_fn = NULL;
- stream->next = NULL;
- return stream;
+ stream->next = NULL;
+ return stream;
}
static int parse_stream_params(struct global_config *global,
struct stream_state *stream,
- char **argv)
-{
- char **argi, **argj;
- struct arg arg;
- static const arg_def_t **ctrl_args = no_args;
- static const int *ctrl_args_map = NULL;
- struct stream_config *config = &stream->config;
- int eos_mark_found = 0;
+ char **argv) {
+ char **argi, **argj;
+ struct arg arg;
+ static const arg_def_t **ctrl_args = no_args;
+ static const int *ctrl_args_map = NULL;
+ struct stream_config *config = &stream->config;
+ int eos_mark_found = 0;
- /* Handle codec specific options */
- if (global->codec->iface == &vpx_codec_vp8_cx_algo)
- {
- ctrl_args = vp8_args;
- ctrl_args_map = vp8_arg_ctrl_map;
+ /* Handle codec specific options */
+ if (0) {
+#if CONFIG_VP8_ENCODER
+ } else if (global->codec->iface == vpx_codec_vp8_cx) {
+ ctrl_args = vp8_args;
+ ctrl_args_map = vp8_arg_ctrl_map;
+#endif
+#if CONFIG_VP9_ENCODER
+ } else if (global->codec->iface == vpx_codec_vp9_cx) {
+ ctrl_args = vp9_args;
+ ctrl_args_map = vp9_arg_ctrl_map;
+#endif
+ }
+
+ for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
+ arg.argv_step = 1;
+
+ /* Once we've found an end-of-stream marker (--) we want to continue
+ * shifting arguments but not consuming them.
+ */
+ if (eos_mark_found) {
+ argj++;
+ continue;
+ } else if (!strcmp(*argj, "--")) {
+ eos_mark_found = 1;
+ continue;
}
- for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step)
- {
- arg.argv_step = 1;
+ if (0);
+ else if (arg_match(&arg, &outputfile, argi))
+ config->out_fn = arg.val;
+ else if (arg_match(&arg, &fpf_name, argi))
+ config->stats_fn = arg.val;
+ else if (arg_match(&arg, &use_ivf, argi))
+ config->write_webm = 0;
+ else if (arg_match(&arg, &threads, argi))
+ config->cfg.g_threads = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &profile, argi))
+ config->cfg.g_profile = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &width, argi))
+ config->cfg.g_w = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &height, argi))
+ config->cfg.g_h = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &stereo_mode, argi))
+ config->stereo_fmt = arg_parse_enum_or_int(&arg);
+ else if (arg_match(&arg, &timebase, argi)) {
+ config->cfg.g_timebase = arg_parse_rational(&arg);
+ validate_positive_rational(arg.name, &config->cfg.g_timebase);
+ } else if (arg_match(&arg, &error_resilient, argi))
+ config->cfg.g_error_resilient = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &lag_in_frames, argi))
+ config->cfg.g_lag_in_frames = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &dropframe_thresh, argi))
+ config->cfg.rc_dropframe_thresh = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &resize_allowed, argi))
+ config->cfg.rc_resize_allowed = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &resize_up_thresh, argi))
+ config->cfg.rc_resize_up_thresh = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &resize_down_thresh, argi))
+ config->cfg.rc_resize_down_thresh = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &end_usage, argi))
+ config->cfg.rc_end_usage = arg_parse_enum_or_int(&arg);
+ else if (arg_match(&arg, &target_bitrate, argi))
+ config->cfg.rc_target_bitrate = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &min_quantizer, argi))
+ config->cfg.rc_min_quantizer = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &max_quantizer, argi))
+ config->cfg.rc_max_quantizer = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &undershoot_pct, argi))
+ config->cfg.rc_undershoot_pct = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &overshoot_pct, argi))
+ config->cfg.rc_overshoot_pct = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &buf_sz, argi))
+ config->cfg.rc_buf_sz = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &buf_initial_sz, argi))
+ config->cfg.rc_buf_initial_sz = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &buf_optimal_sz, argi))
+ config->cfg.rc_buf_optimal_sz = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &bias_pct, argi)) {
+ config->cfg.rc_2pass_vbr_bias_pct = arg_parse_uint(&arg);
- /* Once we've found an end-of-stream marker (--) we want to continue
- * shifting arguments but not consuming them.
- */
- if (eos_mark_found)
- {
- argj++;
- continue;
+ if (global->passes < 2)
+ warn("option %s ignored in one-pass mode.\n", arg.name);
+ } else if (arg_match(&arg, &minsection_pct, argi)) {
+ config->cfg.rc_2pass_vbr_minsection_pct = arg_parse_uint(&arg);
+
+ if (global->passes < 2)
+ warn("option %s ignored in one-pass mode.\n", arg.name);
+ } else if (arg_match(&arg, &maxsection_pct, argi)) {
+ config->cfg.rc_2pass_vbr_maxsection_pct = arg_parse_uint(&arg);
+
+ if (global->passes < 2)
+ warn("option %s ignored in one-pass mode.\n", arg.name);
+ } else if (arg_match(&arg, &kf_min_dist, argi))
+ config->cfg.kf_min_dist = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &kf_max_dist, argi)) {
+ config->cfg.kf_max_dist = arg_parse_uint(&arg);
+ config->have_kf_max_dist = 1;
+ } else if (arg_match(&arg, &kf_disabled, argi))
+ config->cfg.kf_mode = VPX_KF_DISABLED;
+ else {
+ int i, match = 0;
+
+ for (i = 0; ctrl_args[i]; i++) {
+ if (arg_match(&arg, ctrl_args[i], argi)) {
+ int j;
+ match = 1;
+
+ /* Point either to the next free element or the first
+ * instance of this control.
+ */
+ for (j = 0; j < config->arg_ctrl_cnt; j++)
+ if (config->arg_ctrls[j][0] == ctrl_args_map[i])
+ break;
+
+ /* Update/insert */
+ assert(j < ARG_CTRL_CNT_MAX);
+ if (j < ARG_CTRL_CNT_MAX) {
+ config->arg_ctrls[j][0] = ctrl_args_map[i];
+ config->arg_ctrls[j][1] = arg_parse_enum_or_int(&arg);
+ if (j == config->arg_ctrl_cnt)
+ config->arg_ctrl_cnt++;
+ }
+
}
- else if (!strcmp(*argj, "--"))
- {
- eos_mark_found = 1;
- continue;
- }
+ }
- if (0);
- else if (arg_match(&arg, &outputfile, argi))
- config->out_fn = arg.val;
- else if (arg_match(&arg, &fpf_name, argi))
- config->stats_fn = arg.val;
- else if (arg_match(&arg, &use_ivf, argi))
- config->write_webm = 0;
- else if (arg_match(&arg, &threads, argi))
- config->cfg.g_threads = arg_parse_uint(&arg);
- else if (arg_match(&arg, &profile, argi))
- config->cfg.g_profile = arg_parse_uint(&arg);
- else if (arg_match(&arg, &width, argi))
- config->cfg.g_w = arg_parse_uint(&arg);
- else if (arg_match(&arg, &height, argi))
- config->cfg.g_h = arg_parse_uint(&arg);
- else if (arg_match(&arg, &stereo_mode, argi))
- config->stereo_fmt = arg_parse_enum_or_int(&arg);
- else if (arg_match(&arg, &timebase, argi))
- {
- config->cfg.g_timebase = arg_parse_rational(&arg);
- validate_positive_rational(arg.name, &config->cfg.g_timebase);
- }
- else if (arg_match(&arg, &error_resilient, argi))
- config->cfg.g_error_resilient = arg_parse_uint(&arg);
- else if (arg_match(&arg, &lag_in_frames, argi))
- config->cfg.g_lag_in_frames = arg_parse_uint(&arg);
- else if (arg_match(&arg, &dropframe_thresh, argi))
- config->cfg.rc_dropframe_thresh = arg_parse_uint(&arg);
- else if (arg_match(&arg, &resize_allowed, argi))
- config->cfg.rc_resize_allowed = arg_parse_uint(&arg);
- else if (arg_match(&arg, &resize_up_thresh, argi))
- config->cfg.rc_resize_up_thresh = arg_parse_uint(&arg);
- else if (arg_match(&arg, &resize_down_thresh, argi))
- config->cfg.rc_resize_down_thresh = arg_parse_uint(&arg);
- else if (arg_match(&arg, &end_usage, argi))
- config->cfg.rc_end_usage = arg_parse_enum_or_int(&arg);
- else if (arg_match(&arg, &target_bitrate, argi))
- config->cfg.rc_target_bitrate = arg_parse_uint(&arg);
- else if (arg_match(&arg, &min_quantizer, argi))
- config->cfg.rc_min_quantizer = arg_parse_uint(&arg);
- else if (arg_match(&arg, &max_quantizer, argi))
- config->cfg.rc_max_quantizer = arg_parse_uint(&arg);
- else if (arg_match(&arg, &undershoot_pct, argi))
- config->cfg.rc_undershoot_pct = arg_parse_uint(&arg);
- else if (arg_match(&arg, &overshoot_pct, argi))
- config->cfg.rc_overshoot_pct = arg_parse_uint(&arg);
- else if (arg_match(&arg, &buf_sz, argi))
- config->cfg.rc_buf_sz = arg_parse_uint(&arg);
- else if (arg_match(&arg, &buf_initial_sz, argi))
- config->cfg.rc_buf_initial_sz = arg_parse_uint(&arg);
- else if (arg_match(&arg, &buf_optimal_sz, argi))
- config->cfg.rc_buf_optimal_sz = arg_parse_uint(&arg);
- else if (arg_match(&arg, &bias_pct, argi))
- {
- config->cfg.rc_2pass_vbr_bias_pct = arg_parse_uint(&arg);
-
- if (global->passes < 2)
- warn("option %s ignored in one-pass mode.\n", arg.name);
- }
- else if (arg_match(&arg, &minsection_pct, argi))
- {
- config->cfg.rc_2pass_vbr_minsection_pct = arg_parse_uint(&arg);
-
- if (global->passes < 2)
- warn("option %s ignored in one-pass mode.\n", arg.name);
- }
- else if (arg_match(&arg, &maxsection_pct, argi))
- {
- config->cfg.rc_2pass_vbr_maxsection_pct = arg_parse_uint(&arg);
-
- if (global->passes < 2)
- warn("option %s ignored in one-pass mode.\n", arg.name);
- }
- else if (arg_match(&arg, &kf_min_dist, argi))
- config->cfg.kf_min_dist = arg_parse_uint(&arg);
- else if (arg_match(&arg, &kf_max_dist, argi))
- {
- config->cfg.kf_max_dist = arg_parse_uint(&arg);
- config->have_kf_max_dist = 1;
- }
- else if (arg_match(&arg, &kf_disabled, argi))
- config->cfg.kf_mode = VPX_KF_DISABLED;
- else
- {
- int i, match = 0;
-
- for (i = 0; ctrl_args[i]; i++)
- {
- if (arg_match(&arg, ctrl_args[i], argi))
- {
- int j;
- match = 1;
-
- /* Point either to the next free element or the first
- * instance of this control.
- */
- for(j=0; j<config->arg_ctrl_cnt; j++)
- if(config->arg_ctrls[j][0] == ctrl_args_map[i])
- break;
-
- /* Update/insert */
- assert(j < ARG_CTRL_CNT_MAX);
- if (j < ARG_CTRL_CNT_MAX)
- {
- config->arg_ctrls[j][0] = ctrl_args_map[i];
- config->arg_ctrls[j][1] = arg_parse_enum_or_int(&arg);
- if(j == config->arg_ctrl_cnt)
- config->arg_ctrl_cnt++;
- }
-
- }
- }
-
- if (!match)
- argj++;
- }
+ if (!match)
+ argj++;
}
+ }
- return eos_mark_found;
+ return eos_mark_found;
}
#define FOREACH_STREAM(func)\
-do\
-{\
+ do\
+ {\
struct stream_state *stream;\
-\
+ \
for(stream = streams; stream; stream = stream->next)\
- func;\
-}while(0)
+ func;\
+ }while(0)
-static void validate_stream_config(struct stream_state *stream)
-{
- struct stream_state *streami;
+static void validate_stream_config(struct stream_state *stream) {
+ struct stream_state *streami;
- if(!stream->config.cfg.g_w || !stream->config.cfg.g_h)
- fatal("Stream %d: Specify stream dimensions with --width (-w) "
- " and --height (-h)", stream->index);
+ if (!stream->config.cfg.g_w || !stream->config.cfg.g_h)
+ fatal("Stream %d: Specify stream dimensions with --width (-w) "
+ " and --height (-h)", stream->index);
- for(streami = stream; streami; streami = streami->next)
- {
- /* All streams require output files */
- if(!streami->config.out_fn)
- fatal("Stream %d: Output file is required (specify with -o)",
- streami->index);
+ for (streami = stream; streami; streami = streami->next) {
+ /* All streams require output files */
+ if (!streami->config.out_fn)
+ fatal("Stream %d: Output file is required (specify with -o)",
+ streami->index);
- /* Check for two streams outputting to the same file */
- if(streami != stream)
- {
- const char *a = stream->config.out_fn;
- const char *b = streami->config.out_fn;
- if(!strcmp(a,b) && strcmp(a, "/dev/null") && strcmp(a, ":nul"))
- fatal("Stream %d: duplicate output file (from stream %d)",
- streami->index, stream->index);
- }
-
- /* Check for two streams sharing a stats file. */
- if(streami != stream)
- {
- const char *a = stream->config.stats_fn;
- const char *b = streami->config.stats_fn;
- if(a && b && !strcmp(a,b))
- fatal("Stream %d: duplicate stats file (from stream %d)",
- streami->index, stream->index);
- }
+ /* Check for two streams outputting to the same file */
+ if (streami != stream) {
+ const char *a = stream->config.out_fn;
+ const char *b = streami->config.out_fn;
+ if (!strcmp(a, b) && strcmp(a, "/dev/null") && strcmp(a, ":nul"))
+ fatal("Stream %d: duplicate output file (from stream %d)",
+ streami->index, stream->index);
}
+
+ /* Check for two streams sharing a stats file. */
+ if (streami != stream) {
+ const char *a = stream->config.stats_fn;
+ const char *b = streami->config.stats_fn;
+ if (a && b && !strcmp(a, b))
+ fatal("Stream %d: duplicate stats file (from stream %d)",
+ streami->index, stream->index);
+ }
+ }
}
static void set_stream_dimensions(struct stream_state *stream,
unsigned int w,
- unsigned int h)
-{
- if ((stream->config.cfg.g_w && stream->config.cfg.g_w != w)
- ||(stream->config.cfg.g_h && stream->config.cfg.g_h != h))
- fatal("Stream %d: Resizing not yet supported", stream->index);
- stream->config.cfg.g_w = w;
- stream->config.cfg.g_h = h;
+ unsigned int h) {
+ if ((stream->config.cfg.g_w && stream->config.cfg.g_w != w)
+ || (stream->config.cfg.g_h && stream->config.cfg.g_h != h))
+ fatal("Stream %d: Resizing not yet supported", stream->index);
+ stream->config.cfg.g_w = w;
+ stream->config.cfg.g_h = h;
}
static void set_default_kf_interval(struct stream_state *stream,
- struct global_config *global)
-{
- /* Use a max keyframe interval of 5 seconds, if none was
- * specified on the command line.
- */
- if (!stream->config.have_kf_max_dist)
- {
- double framerate = (double)global->framerate.num/global->framerate.den;
- if (framerate > 0.0)
- stream->config.cfg.kf_max_dist = (unsigned int)(5.0*framerate);
- }
+ struct global_config *global) {
+ /* Use a max keyframe interval of 5 seconds, if none was
+ * specified on the command line.
+ */
+ if (!stream->config.have_kf_max_dist) {
+ double framerate = (double)global->framerate.num / global->framerate.den;
+ if (framerate > 0.0)
+ stream->config.cfg.kf_max_dist = (unsigned int)(5.0 * framerate);
+ }
}
static void show_stream_config(struct stream_state *stream,
struct global_config *global,
- struct input_state *input)
-{
+ struct input_state *input) {
#define SHOW(field) \
- fprintf(stderr, " %-28s = %d\n", #field, stream->config.cfg.field)
+ fprintf(stderr, " %-28s = %d\n", #field, stream->config.cfg.field)
- if(stream->index == 0)
- {
- fprintf(stderr, "Codec: %s\n",
- vpx_codec_iface_name(global->codec->iface));
- fprintf(stderr, "Source file: %s Format: %s\n", input->fn,
- input->use_i420 ? "I420" : "YV12");
- }
- if(stream->next || stream->index)
- fprintf(stderr, "\nStream Index: %d\n", stream->index);
- fprintf(stderr, "Destination file: %s\n", stream->config.out_fn);
- fprintf(stderr, "Encoder parameters:\n");
+ if (stream->index == 0) {
+ fprintf(stderr, "Codec: %s\n",
+ vpx_codec_iface_name(global->codec->iface()));
+ fprintf(stderr, "Source file: %s Format: %s\n", input->fn,
+ input->use_i420 ? "I420" : "YV12");
+ }
+ if (stream->next || stream->index)
+ fprintf(stderr, "\nStream Index: %d\n", stream->index);
+ fprintf(stderr, "Destination file: %s\n", stream->config.out_fn);
+ fprintf(stderr, "Encoder parameters:\n");
- SHOW(g_usage);
- SHOW(g_threads);
- SHOW(g_profile);
- SHOW(g_w);
- SHOW(g_h);
- SHOW(g_timebase.num);
- SHOW(g_timebase.den);
- SHOW(g_error_resilient);
- SHOW(g_pass);
- SHOW(g_lag_in_frames);
- SHOW(rc_dropframe_thresh);
- SHOW(rc_resize_allowed);
- SHOW(rc_resize_up_thresh);
- SHOW(rc_resize_down_thresh);
- SHOW(rc_end_usage);
- SHOW(rc_target_bitrate);
- SHOW(rc_min_quantizer);
- SHOW(rc_max_quantizer);
- SHOW(rc_undershoot_pct);
- SHOW(rc_overshoot_pct);
- SHOW(rc_buf_sz);
- SHOW(rc_buf_initial_sz);
- SHOW(rc_buf_optimal_sz);
- SHOW(rc_2pass_vbr_bias_pct);
- SHOW(rc_2pass_vbr_minsection_pct);
- SHOW(rc_2pass_vbr_maxsection_pct);
- SHOW(kf_mode);
- SHOW(kf_min_dist);
- SHOW(kf_max_dist);
+ SHOW(g_usage);
+ SHOW(g_threads);
+ SHOW(g_profile);
+ SHOW(g_w);
+ SHOW(g_h);
+ SHOW(g_timebase.num);
+ SHOW(g_timebase.den);
+ SHOW(g_error_resilient);
+ SHOW(g_pass);
+ SHOW(g_lag_in_frames);
+ SHOW(rc_dropframe_thresh);
+ SHOW(rc_resize_allowed);
+ SHOW(rc_resize_up_thresh);
+ SHOW(rc_resize_down_thresh);
+ SHOW(rc_end_usage);
+ SHOW(rc_target_bitrate);
+ SHOW(rc_min_quantizer);
+ SHOW(rc_max_quantizer);
+ SHOW(rc_undershoot_pct);
+ SHOW(rc_overshoot_pct);
+ SHOW(rc_buf_sz);
+ SHOW(rc_buf_initial_sz);
+ SHOW(rc_buf_optimal_sz);
+ SHOW(rc_2pass_vbr_bias_pct);
+ SHOW(rc_2pass_vbr_minsection_pct);
+ SHOW(rc_2pass_vbr_maxsection_pct);
+ SHOW(kf_mode);
+ SHOW(kf_min_dist);
+ SHOW(kf_max_dist);
}
static void open_output_file(struct stream_state *stream,
- struct global_config *global)
-{
- const char *fn = stream->config.out_fn;
+ struct global_config *global) {
+ const char *fn = stream->config.out_fn;
- stream->file = strcmp(fn, "-") ? fopen(fn, "wb") : set_binary_mode(stdout);
+ stream->file = strcmp(fn, "-") ? fopen(fn, "wb") : set_binary_mode(stdout);
- if (!stream->file)
- fatal("Failed to open output file");
+ if (!stream->file)
+ fatal("Failed to open output file");
- if(stream->config.write_webm && fseek(stream->file, 0, SEEK_CUR))
- fatal("WebM output to pipes not supported.");
+ if (stream->config.write_webm && fseek(stream->file, 0, SEEK_CUR))
+ fatal("WebM output to pipes not supported.");
- if(stream->config.write_webm)
- {
- stream->ebml.stream = stream->file;
- write_webm_file_header(&stream->ebml, &stream->config.cfg,
- &global->framerate,
- stream->config.stereo_fmt);
- }
- else
- write_ivf_file_header(stream->file, &stream->config.cfg,
- global->codec->fourcc, 0);
+ if (stream->config.write_webm) {
+ stream->ebml.stream = stream->file;
+ write_webm_file_header(&stream->ebml, &stream->config.cfg,
+ &global->framerate,
+ stream->config.stereo_fmt,
+ global->codec->fourcc);
+ } else
+ write_ivf_file_header(stream->file, &stream->config.cfg,
+ global->codec->fourcc, 0);
}
static void close_output_file(struct stream_state *stream,
- unsigned int fourcc)
-{
- if(stream->config.write_webm)
- {
- write_webm_file_footer(&stream->ebml, stream->hash);
- free(stream->ebml.cue_list);
- stream->ebml.cue_list = NULL;
- }
- else
- {
- if (!fseek(stream->file, 0, SEEK_SET))
- write_ivf_file_header(stream->file, &stream->config.cfg,
- fourcc,
- stream->frames_out);
- }
+ unsigned int fourcc) {
+ if (stream->config.write_webm) {
+ write_webm_file_footer(&stream->ebml, stream->hash);
+ free(stream->ebml.cue_list);
+ stream->ebml.cue_list = NULL;
+ } else {
+ if (!fseek(stream->file, 0, SEEK_SET))
+ write_ivf_file_header(stream->file, &stream->config.cfg,
+ fourcc,
+ stream->frames_out);
+ }
- fclose(stream->file);
+ fclose(stream->file);
}
static void setup_pass(struct stream_state *stream,
struct global_config *global,
- int pass)
-{
- if (stream->config.stats_fn)
- {
- if (!stats_open_file(&stream->stats, stream->config.stats_fn,
- pass))
- fatal("Failed to open statistics store");
- }
- else
- {
- if (!stats_open_mem(&stream->stats, pass))
- fatal("Failed to open statistics store");
- }
+ int pass) {
+ if (stream->config.stats_fn) {
+ if (!stats_open_file(&stream->stats, stream->config.stats_fn,
+ pass))
+ fatal("Failed to open statistics store");
+ } else {
+ if (!stats_open_mem(&stream->stats, pass))
+ fatal("Failed to open statistics store");
+ }
- stream->config.cfg.g_pass = global->passes == 2
- ? pass ? VPX_RC_LAST_PASS : VPX_RC_FIRST_PASS
- : VPX_RC_ONE_PASS;
- if (pass)
- stream->config.cfg.rc_twopass_stats_in = stats_get(&stream->stats);
+ stream->config.cfg.g_pass = global->passes == 2
+ ? pass ? VPX_RC_LAST_PASS : VPX_RC_FIRST_PASS
+ : VPX_RC_ONE_PASS;
+ if (pass)
+ stream->config.cfg.rc_twopass_stats_in = stats_get(&stream->stats);
- stream->cx_time = 0;
- stream->nbytes = 0;
- stream->frames_out = 0;
+ stream->cx_time = 0;
+ stream->nbytes = 0;
+ stream->frames_out = 0;
}
static void initialize_encoder(struct stream_state *stream,
- struct global_config *global)
-{
- int i;
- int flags = 0;
+ struct global_config *global) {
+ int i;
+ int flags = 0;
- flags |= global->show_psnr ? VPX_CODEC_USE_PSNR : 0;
- flags |= global->out_part ? VPX_CODEC_USE_OUTPUT_PARTITION : 0;
+ flags |= global->show_psnr ? VPX_CODEC_USE_PSNR : 0;
+ flags |= global->out_part ? VPX_CODEC_USE_OUTPUT_PARTITION : 0;
- /* Construct Encoder Context */
- vpx_codec_enc_init(&stream->encoder, global->codec->iface,
- &stream->config.cfg, flags);
- ctx_exit_on_error(&stream->encoder, "Failed to initialize encoder");
+ /* Construct Encoder Context */
+ vpx_codec_enc_init(&stream->encoder, global->codec->iface(),
+ &stream->config.cfg, flags);
+ ctx_exit_on_error(&stream->encoder, "Failed to initialize encoder");
- /* Note that we bypass the vpx_codec_control wrapper macro because
- * we're being clever to store the control IDs in an array. Real
- * applications will want to make use of the enumerations directly
- */
- for (i = 0; i < stream->config.arg_ctrl_cnt; i++)
- {
- int ctrl = stream->config.arg_ctrls[i][0];
- int value = stream->config.arg_ctrls[i][1];
- if (vpx_codec_control_(&stream->encoder, ctrl, value))
- fprintf(stderr, "Error: Tried to set control %d = %d\n",
- ctrl, value);
+ /* Note that we bypass the vpx_codec_control wrapper macro because
+ * we're being clever to store the control IDs in an array. Real
+ * applications will want to make use of the enumerations directly
+ */
+ for (i = 0; i < stream->config.arg_ctrl_cnt; i++) {
+ int ctrl = stream->config.arg_ctrls[i][0];
+ int value = stream->config.arg_ctrls[i][1];
+ if (vpx_codec_control_(&stream->encoder, ctrl, value))
+ fprintf(stderr, "Error: Tried to set control %d = %d\n",
+ ctrl, value);
- ctx_exit_on_error(&stream->encoder, "Failed to control codec");
- }
+ ctx_exit_on_error(&stream->encoder, "Failed to control codec");
+ }
+
+#if CONFIG_DECODERS
+ if (global->test_decode) {
+ int width, height;
+
+ vpx_codec_dec_init(&stream->decoder, global->codec->dx_iface(), NULL, 0);
+
+ width = (stream->config.cfg.g_w + 15) & ~15;
+ height = (stream->config.cfg.g_h + 15) & ~15;
+ vpx_img_alloc(&stream->ref_enc.img, VPX_IMG_FMT_I420, width, height, 1);
+ vpx_img_alloc(&stream->ref_dec.img, VPX_IMG_FMT_I420, width, height, 1);
+ stream->ref_enc.frame_type = VP8_LAST_FRAME;
+ stream->ref_dec.frame_type = VP8_LAST_FRAME;
+ }
+#endif
}
static void encode_frame(struct stream_state *stream,
struct global_config *global,
struct vpx_image *img,
- unsigned int frames_in)
-{
- vpx_codec_pts_t frame_start, next_frame_start;
- struct vpx_codec_enc_cfg *cfg = &stream->config.cfg;
- struct vpx_usec_timer timer;
+ unsigned int frames_in) {
+ vpx_codec_pts_t frame_start, next_frame_start;
+ struct vpx_codec_enc_cfg *cfg = &stream->config.cfg;
+ struct vpx_usec_timer timer;
- frame_start = (cfg->g_timebase.den * (int64_t)(frames_in - 1)
- * global->framerate.den)
- / cfg->g_timebase.num / global->framerate.num;
- next_frame_start = (cfg->g_timebase.den * (int64_t)(frames_in)
- * global->framerate.den)
- / cfg->g_timebase.num / global->framerate.num;
- vpx_usec_timer_start(&timer);
- vpx_codec_encode(&stream->encoder, img, frame_start,
- (unsigned long)(next_frame_start - frame_start),
- 0, global->deadline);
- vpx_usec_timer_mark(&timer);
- stream->cx_time += vpx_usec_timer_elapsed(&timer);
- ctx_exit_on_error(&stream->encoder, "Stream %d: Failed to encode frame",
- stream->index);
+ frame_start = (cfg->g_timebase.den * (int64_t)(frames_in - 1)
+ * global->framerate.den)
+ / cfg->g_timebase.num / global->framerate.num;
+ next_frame_start = (cfg->g_timebase.den * (int64_t)(frames_in)
+ * global->framerate.den)
+ / cfg->g_timebase.num / global->framerate.num;
+ vpx_usec_timer_start(&timer);
+ vpx_codec_encode(&stream->encoder, img, frame_start,
+ (unsigned long)(next_frame_start - frame_start),
+ 0, global->deadline);
+ vpx_usec_timer_mark(&timer);
+ stream->cx_time += vpx_usec_timer_elapsed(&timer);
+ ctx_exit_on_error(&stream->encoder, "Stream %d: Failed to encode frame",
+ stream->index);
}
-static void update_quantizer_histogram(struct stream_state *stream)
-{
- if(stream->config.cfg.g_pass != VPX_RC_FIRST_PASS)
- {
- int q;
+static void update_quantizer_histogram(struct stream_state *stream) {
+ if (stream->config.cfg.g_pass != VPX_RC_FIRST_PASS) {
+ int q;
- vpx_codec_control(&stream->encoder, VP8E_GET_LAST_QUANTIZER_64, &q);
- ctx_exit_on_error(&stream->encoder, "Failed to read quantizer");
- stream->counts[q]++;
- }
+ vpx_codec_control(&stream->encoder, VP8E_GET_LAST_QUANTIZER_64, &q);
+ ctx_exit_on_error(&stream->encoder, "Failed to read quantizer");
+ stream->counts[q]++;
+ }
}
static void get_cx_data(struct stream_state *stream,
struct global_config *global,
- int *got_data)
-{
- const vpx_codec_cx_pkt_t *pkt;
- const struct vpx_codec_enc_cfg *cfg = &stream->config.cfg;
- vpx_codec_iter_t iter = NULL;
+ int *got_data) {
+ const vpx_codec_cx_pkt_t *pkt;
+ const struct vpx_codec_enc_cfg *cfg = &stream->config.cfg;
+ vpx_codec_iter_t iter = NULL;
- while ((pkt = vpx_codec_get_cx_data(&stream->encoder, &iter)))
- {
- static size_t fsize = 0;
- static off_t ivf_header_pos = 0;
+ *got_data = 0;
+ while ((pkt = vpx_codec_get_cx_data(&stream->encoder, &iter))) {
+ static size_t fsize = 0;
+ static off_t ivf_header_pos = 0;
+
+ switch (pkt->kind) {
+ case VPX_CODEC_CX_FRAME_PKT:
+ if (!(pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT)) {
+ stream->frames_out++;
+ }
+ if (!global->quiet)
+ fprintf(stderr, " %6luF",
+ (unsigned long)pkt->data.frame.sz);
+
+ update_rate_histogram(&stream->rate_hist, cfg, pkt);
+ if (stream->config.write_webm) {
+ /* Update the hash */
+ if (!stream->ebml.debug)
+ stream->hash = murmur(pkt->data.frame.buf,
+ (int)pkt->data.frame.sz,
+ stream->hash);
+
+ write_webm_block(&stream->ebml, cfg, pkt);
+ } else {
+ if (pkt->data.frame.partition_id <= 0) {
+ ivf_header_pos = ftello(stream->file);
+ fsize = pkt->data.frame.sz;
+
+ write_ivf_frame_header(stream->file, pkt);
+ } else {
+ fsize += pkt->data.frame.sz;
+
+ if (!(pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT)) {
+ off_t currpos = ftello(stream->file);
+ fseeko(stream->file, ivf_header_pos, SEEK_SET);
+ write_ivf_frame_size(stream->file, fsize);
+ fseeko(stream->file, currpos, SEEK_SET);
+ }
+ }
+
+ (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
+ stream->file);
+ }
+ stream->nbytes += pkt->data.raw.sz;
*got_data = 1;
-
- switch (pkt->kind)
- {
- case VPX_CODEC_CX_FRAME_PKT:
- if (!(pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT))
- {
- stream->frames_out++;
- }
- if (!global->quiet)
- fprintf(stderr, " %6luF",
- (unsigned long)pkt->data.frame.sz);
-
- update_rate_histogram(&stream->rate_hist, cfg, pkt);
- if(stream->config.write_webm)
- {
- /* Update the hash */
- if(!stream->ebml.debug)
- stream->hash = murmur(pkt->data.frame.buf,
- (int)pkt->data.frame.sz,
- stream->hash);
-
- write_webm_block(&stream->ebml, cfg, pkt);
- }
- else
- {
- if (pkt->data.frame.partition_id <= 0)
- {
- ivf_header_pos = ftello(stream->file);
- fsize = pkt->data.frame.sz;
-
- write_ivf_frame_header(stream->file, pkt);
- }
- else
- {
- fsize += pkt->data.frame.sz;
-
- if (!(pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT))
- {
- off_t currpos = ftello(stream->file);
- fseeko(stream->file, ivf_header_pos, SEEK_SET);
- write_ivf_frame_size(stream->file, fsize);
- fseeko(stream->file, currpos, SEEK_SET);
- }
- }
-
- (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
- stream->file);
- }
- stream->nbytes += pkt->data.raw.sz;
- break;
- case VPX_CODEC_STATS_PKT:
- stream->frames_out++;
- if (!global->quiet)
- fprintf(stderr, " %6luS",
- (unsigned long)pkt->data.twopass_stats.sz);
- stats_write(&stream->stats,
- pkt->data.twopass_stats.buf,
- pkt->data.twopass_stats.sz);
- stream->nbytes += pkt->data.raw.sz;
- break;
- case VPX_CODEC_PSNR_PKT:
-
- if (global->show_psnr)
- {
- int i;
-
- stream->psnr_sse_total += pkt->data.psnr.sse[0];
- stream->psnr_samples_total += pkt->data.psnr.samples[0];
- for (i = 0; i < 4; i++)
- {
- if (!global->quiet)
- fprintf(stderr, "%.3f ", pkt->data.psnr.psnr[i]);
- stream->psnr_totals[i] += pkt->data.psnr.psnr[i];
- }
- stream->psnr_count++;
- }
-
- break;
- default:
- break;
+#if CONFIG_DECODERS
+ if (global->test_decode) {
+ vpx_codec_decode(&stream->decoder, pkt->data.frame.buf,
+ pkt->data.frame.sz, NULL, 0);
+ ctx_exit_on_error(&stream->decoder, "Failed to decode frame");
}
+#endif
+ break;
+ case VPX_CODEC_STATS_PKT:
+ stream->frames_out++;
+ if (!global->quiet)
+ fprintf(stderr, " %6luS",
+ (unsigned long)pkt->data.twopass_stats.sz);
+ stats_write(&stream->stats,
+ pkt->data.twopass_stats.buf,
+ pkt->data.twopass_stats.sz);
+ stream->nbytes += pkt->data.raw.sz;
+ break;
+ case VPX_CODEC_PSNR_PKT:
+
+ if (global->show_psnr) {
+ int i;
+
+ stream->psnr_sse_total += pkt->data.psnr.sse[0];
+ stream->psnr_samples_total += pkt->data.psnr.samples[0];
+ for (i = 0; i < 4; i++) {
+ if (!global->quiet)
+ fprintf(stderr, "%.3f ", pkt->data.psnr.psnr[i]);
+ stream->psnr_totals[i] += pkt->data.psnr.psnr[i];
+ }
+ stream->psnr_count++;
+ }
+
+ break;
+ default:
+ break;
}
+ }
}
-static void show_psnr(struct stream_state *stream)
-{
- int i;
- double ovpsnr;
+static void show_psnr(struct stream_state *stream) {
+ int i;
+ double ovpsnr;
- if (!stream->psnr_count)
- return;
+ if (!stream->psnr_count)
+ return;
- fprintf(stderr, "Stream %d PSNR (Overall/Avg/Y/U/V)", stream->index);
- ovpsnr = vp8_mse2psnr((double)stream->psnr_samples_total, 255.0,
- (double)stream->psnr_sse_total);
- fprintf(stderr, " %.3f", ovpsnr);
+ fprintf(stderr, "Stream %d PSNR (Overall/Avg/Y/U/V)", stream->index);
+ ovpsnr = vp8_mse2psnr((double)stream->psnr_samples_total, 255.0,
+ (double)stream->psnr_sse_total);
+ fprintf(stderr, " %.3f", ovpsnr);
- for (i = 0; i < 4; i++)
- {
- fprintf(stderr, " %.3f", stream->psnr_totals[i]/stream->psnr_count);
- }
- fprintf(stderr, "\n");
+ for (i = 0; i < 4; i++) {
+ fprintf(stderr, " %.3f", stream->psnr_totals[i] / stream->psnr_count);
+ }
+ fprintf(stderr, "\n");
}
-float usec_to_fps(uint64_t usec, unsigned int frames)
-{
- return (float)(usec > 0 ? frames * 1000000.0 / (float)usec : 0);
+float usec_to_fps(uint64_t usec, unsigned int frames) {
+ return (float)(usec > 0 ? frames * 1000000.0 / (float)usec : 0);
}
-int main(int argc, const char **argv_)
-{
- int pass;
- vpx_image_t raw;
- int frame_avail, got_data;
+static void test_decode(struct stream_state *stream) {
+ vpx_codec_control(&stream->encoder, VP8_COPY_REFERENCE, &stream->ref_enc);
+ ctx_exit_on_error(&stream->encoder, "Failed to get encoder reference frame");
+ vpx_codec_control(&stream->decoder, VP8_COPY_REFERENCE, &stream->ref_dec);
+ ctx_exit_on_error(&stream->decoder, "Failed to get decoder reference frame");
- struct input_state input = {0};
- struct global_config global;
- struct stream_state *streams = NULL;
- char **argv, **argi;
- unsigned long cx_time = 0;
- int stream_cnt = 0;
+ if (!stream->mismatch_seen
+ && !compare_img(&stream->ref_enc.img, &stream->ref_dec.img)) {
+ /* TODO(jkoleszar): make fatal. */
+ int y[2], u[2], v[2];
+ find_mismatch(&stream->ref_enc.img, &stream->ref_dec.img,
+ y, u, v);
+ warn("Stream %d: Encode/decode mismatch on frame %d"
+ " at Y[%d, %d], U[%d, %d], V[%d, %d]",
+ stream->index, stream->frames_out,
+ y[0], y[1], u[0], u[1], v[0], v[1]);
+ stream->mismatch_seen = stream->frames_out;
+ }
+}
- exec_name = argv_[0];
+int main(int argc, const char **argv_) {
+ int pass;
+ vpx_image_t raw;
+ int frame_avail, got_data;
- if (argc < 3)
- usage_exit();
+ struct input_state input = {0};
+ struct global_config global;
+ struct stream_state *streams = NULL;
+ char **argv, **argi;
+ unsigned long cx_time = 0;
+ int stream_cnt = 0;
- /* Setup default input stream settings */
- input.framerate.num = 30;
- input.framerate.den = 1;
- input.use_i420 = 1;
+ exec_name = argv_[0];
- /* First parse the global configuration values, because we want to apply
- * other parameters on top of the default configuration provided by the
- * codec.
+ if (argc < 3)
+ usage_exit();
+
+ /* Setup default input stream settings */
+ input.framerate.num = 30;
+ input.framerate.den = 1;
+ input.use_i420 = 1;
+
+ /* First parse the global configuration values, because we want to apply
+ * other parameters on top of the default configuration provided by the
+ * codec.
+ */
+ argv = argv_dup(argc - 1, argv_ + 1);
+ parse_global_config(&global, argv);
+
+ {
+ /* Now parse each stream's parameters. Using a local scope here
+ * due to the use of 'stream' as loop variable in FOREACH_STREAM
+ * loops
*/
- argv = argv_dup(argc - 1, argv_ + 1);
- parse_global_config(&global, argv);
+ struct stream_state *stream = NULL;
- {
- /* Now parse each stream's parameters. Using a local scope here
- * due to the use of 'stream' as loop variable in FOREACH_STREAM
- * loops
- */
- struct stream_state *stream = NULL;
+ do {
+ stream = new_stream(&global, stream);
+ stream_cnt++;
+ if (!streams)
+ streams = stream;
+ } while (parse_stream_params(&global, stream, argv));
+ }
- do
- {
- stream = new_stream(&global, stream);
- stream_cnt++;
- if(!streams)
- streams = stream;
- } while(parse_stream_params(&global, stream, argv));
+ /* Check for unrecognized options */
+ for (argi = argv; *argi; argi++)
+ if (argi[0][0] == '-' && argi[0][1])
+ die("Error: Unrecognized option %s\n", *argi);
+
+ /* Handle non-option arguments */
+ input.fn = argv[0];
+
+ if (!input.fn)
+ usage_exit();
+
+ for (pass = global.pass ? global.pass - 1 : 0; pass < global.passes; pass++) {
+ int frames_in = 0;
+
+ open_input_file(&input);
+
+ /* If the input file doesn't specify its w/h (raw files), try to get
+ * the data from the first stream's configuration.
+ */
+ if (!input.w || !input.h)
+ FOREACH_STREAM( {
+ if (stream->config.cfg.g_w && stream->config.cfg.g_h) {
+ input.w = stream->config.cfg.g_w;
+ input.h = stream->config.cfg.g_h;
+ break;
+ }
+ });
+
+ /* Update stream configurations from the input file's parameters */
+ FOREACH_STREAM(set_stream_dimensions(stream, input.w, input.h));
+ FOREACH_STREAM(validate_stream_config(stream));
+
+ /* Ensure that --passes and --pass are consistent. If --pass is set and
+ * --passes=2, ensure --fpf was set.
+ */
+ if (global.pass && global.passes == 2)
+ FOREACH_STREAM( {
+ if (!stream->config.stats_fn)
+ die("Stream %d: Must specify --fpf when --pass=%d"
+ " and --passes=2\n", stream->index, global.pass);
+ });
+
+
+ /* Use the frame rate from the file only if none was specified
+ * on the command-line.
+ */
+ if (!global.have_framerate)
+ global.framerate = input.framerate;
+
+ FOREACH_STREAM(set_default_kf_interval(stream, &global));
+
+ /* Show configuration */
+ if (global.verbose && pass == 0)
+ FOREACH_STREAM(show_stream_config(stream, &global, &input));
+
+ if (pass == (global.pass ? global.pass - 1 : 0)) {
+ if (input.file_type == FILE_TYPE_Y4M)
+ /*The Y4M reader does its own allocation.
+ Just initialize this here to avoid problems if we never read any
+ frames.*/
+ memset(&raw, 0, sizeof(raw));
+ else
+ vpx_img_alloc(&raw,
+ input.use_i420 ? VPX_IMG_FMT_I420
+ : VPX_IMG_FMT_YV12,
+ input.w, input.h, 32);
+
+ FOREACH_STREAM(init_rate_histogram(&stream->rate_hist,
+ &stream->config.cfg,
+ &global.framerate));
}
- /* Check for unrecognized options */
- for (argi = argv; *argi; argi++)
- if (argi[0][0] == '-' && argi[0][1])
- die("Error: Unrecognized option %s\n", *argi);
+ FOREACH_STREAM(open_output_file(stream, &global));
+ FOREACH_STREAM(setup_pass(stream, &global, pass));
+ FOREACH_STREAM(initialize_encoder(stream, &global));
- /* Handle non-option arguments */
- input.fn = argv[0];
+ frame_avail = 1;
+ got_data = 0;
- if (!input.fn)
- usage_exit();
+ while (frame_avail || got_data) {
+ struct vpx_usec_timer timer;
- for (pass = global.pass ? global.pass - 1 : 0; pass < global.passes; pass++)
- {
- int frames_in = 0;
+ if (!global.limit || frames_in < global.limit) {
+ frame_avail = read_frame(&input, &raw);
- open_input_file(&input);
+ if (frame_avail)
+ frames_in++;
- /* If the input file doesn't specify its w/h (raw files), try to get
- * the data from the first stream's configuration.
- */
- if(!input.w || !input.h)
- FOREACH_STREAM({
- if(stream->config.cfg.g_w && stream->config.cfg.g_h)
- {
- input.w = stream->config.cfg.g_w;
- input.h = stream->config.cfg.g_h;
- break;
- }
- });
-
- /* Update stream configurations from the input file's parameters */
- FOREACH_STREAM(set_stream_dimensions(stream, input.w, input.h));
- FOREACH_STREAM(validate_stream_config(stream));
-
- /* Ensure that --passes and --pass are consistent. If --pass is set and
- * --passes=2, ensure --fpf was set.
- */
- if (global.pass && global.passes == 2)
- FOREACH_STREAM({
- if(!stream->config.stats_fn)
- die("Stream %d: Must specify --fpf when --pass=%d"
- " and --passes=2\n", stream->index, global.pass);
- });
-
-
- /* Use the frame rate from the file only if none was specified
- * on the command-line.
- */
- if (!global.have_framerate)
- global.framerate = input.framerate;
-
- FOREACH_STREAM(set_default_kf_interval(stream, &global));
-
- /* Show configuration */
- if (global.verbose && pass == 0)
- FOREACH_STREAM(show_stream_config(stream, &global, &input));
-
- if(pass == (global.pass ? global.pass - 1 : 0)) {
- if (input.file_type == FILE_TYPE_Y4M)
- /*The Y4M reader does its own allocation.
- Just initialize this here to avoid problems if we never read any
- frames.*/
- memset(&raw, 0, sizeof(raw));
- else
- vpx_img_alloc(&raw,
- input.use_i420 ? VPX_IMG_FMT_I420
- : VPX_IMG_FMT_YV12,
- input.w, input.h, 32);
-
- FOREACH_STREAM(init_rate_histogram(&stream->rate_hist,
- &stream->config.cfg,
- &global.framerate));
+ if (!global.quiet) {
+ if (stream_cnt == 1)
+ fprintf(stderr,
+ "\rPass %d/%d frame %4d/%-4d %7"PRId64"B \033[K",
+ pass + 1, global.passes, frames_in,
+ streams->frames_out, (int64_t)streams->nbytes);
+ else
+ fprintf(stderr,
+ "\rPass %d/%d frame %4d %7lu %s (%.2f fps)\033[K",
+ pass + 1, global.passes, frames_in,
+ cx_time > 9999999 ? cx_time / 1000 : cx_time,
+ cx_time > 9999999 ? "ms" : "us",
+ usec_to_fps(cx_time, frames_in));
}
- FOREACH_STREAM(open_output_file(stream, &global));
- FOREACH_STREAM(setup_pass(stream, &global, pass));
- FOREACH_STREAM(initialize_encoder(stream, &global));
+ } else
+ frame_avail = 0;
- frame_avail = 1;
+ if (frames_in > global.skip_frames) {
+ vpx_usec_timer_start(&timer);
+ FOREACH_STREAM(encode_frame(stream, &global,
+ frame_avail ? &raw : NULL,
+ frames_in));
+ vpx_usec_timer_mark(&timer);
+ cx_time += (unsigned long)vpx_usec_timer_elapsed(&timer);
+
+ FOREACH_STREAM(update_quantizer_histogram(stream));
+
got_data = 0;
+ FOREACH_STREAM(get_cx_data(stream, &global, &got_data));
- while (frame_avail || got_data)
- {
- struct vpx_usec_timer timer;
+ if (got_data && global.test_decode)
+ FOREACH_STREAM(test_decode(stream));
+ }
- if (!global.limit || frames_in < global.limit)
- {
- frame_avail = read_frame(&input, &raw);
-
- if (frame_avail)
- frames_in++;
-
- if (!global.quiet)
- {
- if(stream_cnt == 1)
- fprintf(stderr,
- "\rPass %d/%d frame %4d/%-4d %7"PRId64"B \033[K",
- pass + 1, global.passes, frames_in,
- streams->frames_out, (int64_t)streams->nbytes);
- else
- fprintf(stderr,
- "\rPass %d/%d frame %4d %7lu %s (%.2f fps)\033[K",
- pass + 1, global.passes, frames_in,
- cx_time > 9999999 ? cx_time / 1000 : cx_time,
- cx_time > 9999999 ? "ms" : "us",
- usec_to_fps(cx_time, frames_in));
- }
-
- }
- else
- frame_avail = 0;
-
- vpx_usec_timer_start(&timer);
- FOREACH_STREAM(encode_frame(stream, &global,
- frame_avail ? &raw : NULL,
- frames_in));
- vpx_usec_timer_mark(&timer);
- cx_time += (unsigned long)vpx_usec_timer_elapsed(&timer);
-
- FOREACH_STREAM(update_quantizer_histogram(stream));
-
- got_data = 0;
- FOREACH_STREAM(get_cx_data(stream, &global, &got_data));
-
- fflush(stdout);
- }
-
- if(stream_cnt > 1)
- fprintf(stderr, "\n");
-
- if (!global.quiet)
- FOREACH_STREAM(fprintf(
- stderr,
- "\rPass %d/%d frame %4d/%-4d %7"PRId64"B %7lub/f %7"PRId64"b/s"
- " %7"PRId64" %s (%.2f fps)\033[K\n", pass + 1,
- global.passes, frames_in, stream->frames_out, (int64_t)stream->nbytes,
- frames_in ? (unsigned long)(stream->nbytes * 8 / frames_in) : 0,
- frames_in ? (int64_t)stream->nbytes * 8
- * (int64_t)global.framerate.num / global.framerate.den
- / frames_in
- : 0,
- stream->cx_time > 9999999 ? stream->cx_time / 1000 : stream->cx_time,
- stream->cx_time > 9999999 ? "ms" : "us",
- usec_to_fps(stream->cx_time, frames_in));
- );
-
- if (global.show_psnr)
- FOREACH_STREAM(show_psnr(stream));
-
- FOREACH_STREAM(vpx_codec_destroy(&stream->encoder));
-
- close_input_file(&input);
-
- FOREACH_STREAM(close_output_file(stream, global.codec->fourcc));
-
- FOREACH_STREAM(stats_close(&stream->stats, global.passes-1));
-
- if (global.pass)
- break;
+ fflush(stdout);
}
- if (global.show_q_hist_buckets)
- FOREACH_STREAM(show_q_histogram(stream->counts,
- global.show_q_hist_buckets));
+ if (stream_cnt > 1)
+ fprintf(stderr, "\n");
- if (global.show_rate_hist_buckets)
- FOREACH_STREAM(show_rate_histogram(&stream->rate_hist,
- &stream->config.cfg,
- global.show_rate_hist_buckets));
- FOREACH_STREAM(destroy_rate_histogram(&stream->rate_hist));
+ if (!global.quiet)
+ FOREACH_STREAM(fprintf(
+ stderr,
+ "\rPass %d/%d frame %4d/%-4d %7"PRId64"B %7lub/f %7"PRId64"b/s"
+ " %7"PRId64" %s (%.2f fps)\033[K\n", pass + 1,
+ global.passes, frames_in, stream->frames_out, (int64_t)stream->nbytes,
+ frames_in ? (unsigned long)(stream->nbytes * 8 / frames_in) : 0,
+ frames_in ? (int64_t)stream->nbytes * 8
+ * (int64_t)global.framerate.num / global.framerate.den
+ / frames_in
+ : 0,
+ stream->cx_time > 9999999 ? stream->cx_time / 1000 : stream->cx_time,
+ stream->cx_time > 9999999 ? "ms" : "us",
+ usec_to_fps(stream->cx_time, frames_in));
+ );
- vpx_img_free(&raw);
- free(argv);
- free(streams);
- return EXIT_SUCCESS;
+ if (global.show_psnr)
+ FOREACH_STREAM(show_psnr(stream));
+
+ FOREACH_STREAM(vpx_codec_destroy(&stream->encoder));
+
+ if (global.test_decode) {
+ FOREACH_STREAM(vpx_codec_destroy(&stream->decoder));
+ FOREACH_STREAM(vpx_img_free(&stream->ref_enc.img));
+ FOREACH_STREAM(vpx_img_free(&stream->ref_dec.img));
+ }
+
+ close_input_file(&input);
+
+ FOREACH_STREAM(close_output_file(stream, global.codec->fourcc));
+
+ FOREACH_STREAM(stats_close(&stream->stats, global.passes - 1));
+
+ if (global.pass)
+ break;
+ }
+
+ if (global.show_q_hist_buckets)
+ FOREACH_STREAM(show_q_histogram(stream->counts,
+ global.show_q_hist_buckets));
+
+ if (global.show_rate_hist_buckets)
+ FOREACH_STREAM(show_rate_histogram(&stream->rate_hist,
+ &stream->config.cfg,
+ global.show_rate_hist_buckets));
+ FOREACH_STREAM(destroy_rate_histogram(&stream->rate_hist));
+
+#if CONFIG_INTERNAL_STATS
+ /* TODO(jkoleszar): This doesn't belong in this executable. Do it for now,
+ * to match some existing utilities.
+ */
+ FOREACH_STREAM({
+ FILE *f = fopen("opsnr.stt", "a");
+ if (stream->mismatch_seen) {
+ fprintf(f, "First mismatch occurred in frame %d\n",
+ stream->mismatch_seen);
+ } else {
+ fprintf(f, "No mismatch detected in recon buffers\n");
+ }
+ fclose(f);
+ });
+#endif
+
+ vpx_img_free(&raw);
+ free(argv);
+ free(streams);
+ return EXIT_SUCCESS;
}
diff --git a/y4minput.c b/y4minput.c
index ff9ffbc..24f0c15 100644
--- a/y4minput.c
+++ b/y4minput.c
@@ -14,7 +14,7 @@
#include <string.h>
#include "y4minput.h"
-static int y4m_parse_tags(y4m_input *_y4m,char *_tags){
+static int y4m_parse_tags(y4m_input *_y4m, char *_tags) {
int got_w;
int got_h;
int got_fps;
@@ -23,55 +23,61 @@
int got_chroma;
char *p;
char *q;
- got_w=got_h=got_fps=got_interlace=got_par=got_chroma=0;
- for(p=_tags;;p=q){
+ got_w = got_h = got_fps = got_interlace = got_par = got_chroma = 0;
+ for (p = _tags;; p = q) {
/*Skip any leading spaces.*/
- while(*p==' ')p++;
+ while (*p == ' ')p++;
/*If that's all we have, stop.*/
- if(p[0]=='\0')break;
+ if (p[0] == '\0')break;
/*Find the end of this tag.*/
- for(q=p+1;*q!='\0'&&*q!=' ';q++);
+ for (q = p + 1; *q != '\0' && *q != ' '; q++);
/*Process the tag.*/
- switch(p[0]){
- case 'W':{
- if(sscanf(p+1,"%d",&_y4m->pic_w)!=1)return -1;
- got_w=1;
- }break;
- case 'H':{
- if(sscanf(p+1,"%d",&_y4m->pic_h)!=1)return -1;
- got_h=1;
- }break;
- case 'F':{
- if(sscanf(p+1,"%d:%d",&_y4m->fps_n,&_y4m->fps_d)!=2){
+ switch (p[0]) {
+ case 'W': {
+ if (sscanf(p + 1, "%d", &_y4m->pic_w) != 1)return -1;
+ got_w = 1;
+ }
+ break;
+ case 'H': {
+ if (sscanf(p + 1, "%d", &_y4m->pic_h) != 1)return -1;
+ got_h = 1;
+ }
+ break;
+ case 'F': {
+ if (sscanf(p + 1, "%d:%d", &_y4m->fps_n, &_y4m->fps_d) != 2) {
return -1;
}
- got_fps=1;
- }break;
- case 'I':{
- _y4m->interlace=p[1];
- got_interlace=1;
- }break;
- case 'A':{
- if(sscanf(p+1,"%d:%d",&_y4m->par_n,&_y4m->par_d)!=2){
+ got_fps = 1;
+ }
+ break;
+ case 'I': {
+ _y4m->interlace = p[1];
+ got_interlace = 1;
+ }
+ break;
+ case 'A': {
+ if (sscanf(p + 1, "%d:%d", &_y4m->par_n, &_y4m->par_d) != 2) {
return -1;
}
- got_par=1;
- }break;
- case 'C':{
- if(q-p>16)return -1;
- memcpy(_y4m->chroma_type,p+1,q-p-1);
- _y4m->chroma_type[q-p-1]='\0';
- got_chroma=1;
- }break;
+ got_par = 1;
+ }
+ break;
+ case 'C': {
+ if (q - p > 16)return -1;
+ memcpy(_y4m->chroma_type, p + 1, q - p - 1);
+ _y4m->chroma_type[q - p - 1] = '\0';
+ got_chroma = 1;
+ }
+ break;
/*Ignore unknown tags.*/
}
}
- if(!got_w||!got_h||!got_fps)return -1;
- if(!got_interlace)_y4m->interlace='?';
- if(!got_par)_y4m->par_n=_y4m->par_d=0;
+ if (!got_w || !got_h || !got_fps)return -1;
+ if (!got_interlace)_y4m->interlace = '?';
+ if (!got_par)_y4m->par_n = _y4m->par_d = 0;
/*Chroma-type is not specified in older files, e.g., those generated by
mplayer.*/
- if(!got_chroma)strcpy(_y4m->chroma_type,"420");
+ if (!got_chroma)strcpy(_y4m->chroma_type, "420");
return 0;
}
@@ -145,48 +151,48 @@
lines, and they are vertically co-sited with the luma samples in both the
mpeg2 and jpeg cases (thus requiring no vertical resampling).*/
static void y4m_42xmpeg2_42xjpeg_helper(unsigned char *_dst,
- const unsigned char *_src,int _c_w,int _c_h){
+ const unsigned char *_src, int _c_w, int _c_h) {
int y;
int x;
- for(y=0;y<_c_h;y++){
+ for (y = 0; y < _c_h; y++) {
/*Filter: [4 -17 114 35 -9 1]/128, derived from a 6-tap Lanczos
window.*/
- for(x=0;x<OC_MINI(_c_w,2);x++){
- _dst[x]=(unsigned char)OC_CLAMPI(0,(4*_src[0]-17*_src[OC_MAXI(x-1,0)]+
- 114*_src[x]+35*_src[OC_MINI(x+1,_c_w-1)]-9*_src[OC_MINI(x+2,_c_w-1)]+
- _src[OC_MINI(x+3,_c_w-1)]+64)>>7,255);
+ for (x = 0; x < OC_MINI(_c_w, 2); x++) {
+ _dst[x] = (unsigned char)OC_CLAMPI(0, (4 * _src[0] - 17 * _src[OC_MAXI(x - 1, 0)] +
+ 114 * _src[x] + 35 * _src[OC_MINI(x + 1, _c_w - 1)] - 9 * _src[OC_MINI(x + 2, _c_w - 1)] +
+ _src[OC_MINI(x + 3, _c_w - 1)] + 64) >> 7, 255);
}
- for(;x<_c_w-3;x++){
- _dst[x]=(unsigned char)OC_CLAMPI(0,(4*_src[x-2]-17*_src[x-1]+
- 114*_src[x]+35*_src[x+1]-9*_src[x+2]+_src[x+3]+64)>>7,255);
+ for (; x < _c_w - 3; x++) {
+ _dst[x] = (unsigned char)OC_CLAMPI(0, (4 * _src[x - 2] - 17 * _src[x - 1] +
+ 114 * _src[x] + 35 * _src[x + 1] - 9 * _src[x + 2] + _src[x + 3] + 64) >> 7, 255);
}
- for(;x<_c_w;x++){
- _dst[x]=(unsigned char)OC_CLAMPI(0,(4*_src[x-2]-17*_src[x-1]+
- 114*_src[x]+35*_src[OC_MINI(x+1,_c_w-1)]-9*_src[OC_MINI(x+2,_c_w-1)]+
- _src[_c_w-1]+64)>>7,255);
+ for (; x < _c_w; x++) {
+ _dst[x] = (unsigned char)OC_CLAMPI(0, (4 * _src[x - 2] - 17 * _src[x - 1] +
+ 114 * _src[x] + 35 * _src[OC_MINI(x + 1, _c_w - 1)] - 9 * _src[OC_MINI(x + 2, _c_w - 1)] +
+ _src[_c_w - 1] + 64) >> 7, 255);
}
- _dst+=_c_w;
- _src+=_c_w;
+ _dst += _c_w;
+ _src += _c_w;
}
}
/*Handles both 422 and 420mpeg2 to 422jpeg and 420jpeg, respectively.*/
-static void y4m_convert_42xmpeg2_42xjpeg(y4m_input *_y4m,unsigned char *_dst,
- unsigned char *_aux){
+static void y4m_convert_42xmpeg2_42xjpeg(y4m_input *_y4m, unsigned char *_dst,
+ unsigned char *_aux) {
int c_w;
int c_h;
int c_sz;
int pli;
/*Skip past the luma data.*/
- _dst+=_y4m->pic_w*_y4m->pic_h;
+ _dst += _y4m->pic_w * _y4m->pic_h;
/*Compute the size of each chroma plane.*/
- c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
- c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
- c_sz=c_w*c_h;
- for(pli=1;pli<3;pli++){
- y4m_42xmpeg2_42xjpeg_helper(_dst,_aux,c_w,c_h);
- _dst+=c_sz;
- _aux+=c_sz;
+ c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
+ c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
+ c_sz = c_w * c_h;
+ for (pli = 1; pli < 3; pli++) {
+ y4m_42xmpeg2_42xjpeg_helper(_dst, _aux, c_w, c_h);
+ _dst += c_sz;
+ _aux += c_sz;
}
}
@@ -233,8 +239,8 @@
the chroma plane's resolution) to the right.
Then we use another filter to move the C_r location down one quarter pixel,
and the C_b location up one quarter pixel.*/
-static void y4m_convert_42xpaldv_42xjpeg(y4m_input *_y4m,unsigned char *_dst,
- unsigned char *_aux){
+static void y4m_convert_42xpaldv_42xjpeg(y4m_input *_y4m, unsigned char *_dst,
+ unsigned char *_aux) {
unsigned char *tmp;
int c_w;
int c_h;
@@ -243,69 +249,71 @@
int y;
int x;
/*Skip past the luma data.*/
- _dst+=_y4m->pic_w*_y4m->pic_h;
+ _dst += _y4m->pic_w * _y4m->pic_h;
/*Compute the size of each chroma plane.*/
- c_w=(_y4m->pic_w+1)/2;
- c_h=(_y4m->pic_h+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
- c_sz=c_w*c_h;
- tmp=_aux+2*c_sz;
- for(pli=1;pli<3;pli++){
+ c_w = (_y4m->pic_w + 1) / 2;
+ c_h = (_y4m->pic_h + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
+ c_sz = c_w * c_h;
+ tmp = _aux + 2 * c_sz;
+ for (pli = 1; pli < 3; pli++) {
/*First do the horizontal re-sampling.
This is the same as the mpeg2 case, except that after the horizontal
case, we need to apply a second vertical filter.*/
- y4m_42xmpeg2_42xjpeg_helper(tmp,_aux,c_w,c_h);
- _aux+=c_sz;
- switch(pli){
- case 1:{
+ y4m_42xmpeg2_42xjpeg_helper(tmp, _aux, c_w, c_h);
+ _aux += c_sz;
+ switch (pli) {
+ case 1: {
/*Slide C_b up a quarter-pel.
This is the same filter used above, but in the other order.*/
- for(x=0;x<c_w;x++){
- for(y=0;y<OC_MINI(c_h,3);y++){
- _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(tmp[0]
- -9*tmp[OC_MAXI(y-2,0)*c_w]+35*tmp[OC_MAXI(y-1,0)*c_w]
- +114*tmp[y*c_w]-17*tmp[OC_MINI(y+1,c_h-1)*c_w]
- +4*tmp[OC_MINI(y+2,c_h-1)*c_w]+64)>>7,255);
+ for (x = 0; x < c_w; x++) {
+ for (y = 0; y < OC_MINI(c_h, 3); y++) {
+ _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (tmp[0]
+ - 9 * tmp[OC_MAXI(y - 2, 0) * c_w] + 35 * tmp[OC_MAXI(y - 1, 0) * c_w]
+ + 114 * tmp[y * c_w] - 17 * tmp[OC_MINI(y + 1, c_h - 1) * c_w]
+ + 4 * tmp[OC_MINI(y + 2, c_h - 1) * c_w] + 64) >> 7, 255);
}
- for(;y<c_h-2;y++){
- _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(tmp[(y-3)*c_w]
- -9*tmp[(y-2)*c_w]+35*tmp[(y-1)*c_w]+114*tmp[y*c_w]
- -17*tmp[(y+1)*c_w]+4*tmp[(y+2)*c_w]+64)>>7,255);
+ for (; y < c_h - 2; y++) {
+ _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (tmp[(y - 3) * c_w]
+ - 9 * tmp[(y - 2) * c_w] + 35 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w]
+ - 17 * tmp[(y + 1) * c_w] + 4 * tmp[(y + 2) * c_w] + 64) >> 7, 255);
}
- for(;y<c_h;y++){
- _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(tmp[(y-3)*c_w]
- -9*tmp[(y-2)*c_w]+35*tmp[(y-1)*c_w]+114*tmp[y*c_w]
- -17*tmp[OC_MINI(y+1,c_h-1)*c_w]+4*tmp[(c_h-1)*c_w]+64)>>7,255);
+ for (; y < c_h; y++) {
+ _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (tmp[(y - 3) * c_w]
+ - 9 * tmp[(y - 2) * c_w] + 35 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w]
+ - 17 * tmp[OC_MINI(y + 1, c_h - 1) * c_w] + 4 * tmp[(c_h - 1) * c_w] + 64) >> 7, 255);
}
_dst++;
tmp++;
}
- _dst+=c_sz-c_w;
- tmp-=c_w;
- }break;
- case 2:{
+ _dst += c_sz - c_w;
+ tmp -= c_w;
+ }
+ break;
+ case 2: {
/*Slide C_r down a quarter-pel.
This is the same as the horizontal filter.*/
- for(x=0;x<c_w;x++){
- for(y=0;y<OC_MINI(c_h,2);y++){
- _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(4*tmp[0]
- -17*tmp[OC_MAXI(y-1,0)*c_w]+114*tmp[y*c_w]
- +35*tmp[OC_MINI(y+1,c_h-1)*c_w]-9*tmp[OC_MINI(y+2,c_h-1)*c_w]
- +tmp[OC_MINI(y+3,c_h-1)*c_w]+64)>>7,255);
+ for (x = 0; x < c_w; x++) {
+ for (y = 0; y < OC_MINI(c_h, 2); y++) {
+ _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (4 * tmp[0]
+ - 17 * tmp[OC_MAXI(y - 1, 0) * c_w] + 114 * tmp[y * c_w]
+ + 35 * tmp[OC_MINI(y + 1, c_h - 1) * c_w] - 9 * tmp[OC_MINI(y + 2, c_h - 1) * c_w]
+ + tmp[OC_MINI(y + 3, c_h - 1) * c_w] + 64) >> 7, 255);
}
- for(;y<c_h-3;y++){
- _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(4*tmp[(y-2)*c_w]
- -17*tmp[(y-1)*c_w]+114*tmp[y*c_w]+35*tmp[(y+1)*c_w]
- -9*tmp[(y+2)*c_w]+tmp[(y+3)*c_w]+64)>>7,255);
+ for (; y < c_h - 3; y++) {
+ _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (4 * tmp[(y - 2) * c_w]
+ - 17 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w] + 35 * tmp[(y + 1) * c_w]
+ - 9 * tmp[(y + 2) * c_w] + tmp[(y + 3) * c_w] + 64) >> 7, 255);
}
- for(;y<c_h;y++){
- _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(4*tmp[(y-2)*c_w]
- -17*tmp[(y-1)*c_w]+114*tmp[y*c_w]+35*tmp[OC_MINI(y+1,c_h-1)*c_w]
- -9*tmp[OC_MINI(y+2,c_h-1)*c_w]+tmp[(c_h-1)*c_w]+64)>>7,255);
+ for (; y < c_h; y++) {
+ _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (4 * tmp[(y - 2) * c_w]
+ - 17 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w] + 35 * tmp[OC_MINI(y + 1, c_h - 1) * c_w]
+ - 9 * tmp[OC_MINI(y + 2, c_h - 1) * c_w] + tmp[(c_h - 1) * c_w] + 64) >> 7, 255);
}
_dst++;
tmp++;
}
- }break;
+ }
+ break;
}
/*For actual interlaced material, this would have to be done separately on
each field, and the shift amounts would be different.
@@ -320,27 +328,27 @@
/*Perform vertical filtering to reduce a single plane from 4:2:2 to 4:2:0.
This is used as a helper by several converation routines.*/
static void y4m_422jpeg_420jpeg_helper(unsigned char *_dst,
- const unsigned char *_src,int _c_w,int _c_h){
+ const unsigned char *_src, int _c_w, int _c_h) {
int y;
int x;
/*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
- for(x=0;x<_c_w;x++){
- for(y=0;y<OC_MINI(_c_h,2);y+=2){
- _dst[(y>>1)*_c_w]=OC_CLAMPI(0,(64*_src[0]
- +78*_src[OC_MINI(1,_c_h-1)*_c_w]
- -17*_src[OC_MINI(2,_c_h-1)*_c_w]
- +3*_src[OC_MINI(3,_c_h-1)*_c_w]+64)>>7,255);
+ for (x = 0; x < _c_w; x++) {
+ for (y = 0; y < OC_MINI(_c_h, 2); y += 2) {
+ _dst[(y >> 1)*_c_w] = OC_CLAMPI(0, (64 * _src[0]
+ + 78 * _src[OC_MINI(1, _c_h - 1) * _c_w]
+ - 17 * _src[OC_MINI(2, _c_h - 1) * _c_w]
+ + 3 * _src[OC_MINI(3, _c_h - 1) * _c_w] + 64) >> 7, 255);
}
- for(;y<_c_h-3;y+=2){
- _dst[(y>>1)*_c_w]=OC_CLAMPI(0,(3*(_src[(y-2)*_c_w]+_src[(y+3)*_c_w])
- -17*(_src[(y-1)*_c_w]+_src[(y+2)*_c_w])
- +78*(_src[y*_c_w]+_src[(y+1)*_c_w])+64)>>7,255);
+ for (; y < _c_h - 3; y += 2) {
+ _dst[(y >> 1)*_c_w] = OC_CLAMPI(0, (3 * (_src[(y - 2) * _c_w] + _src[(y + 3) * _c_w])
+ - 17 * (_src[(y - 1) * _c_w] + _src[(y + 2) * _c_w])
+ + 78 * (_src[y * _c_w] + _src[(y + 1) * _c_w]) + 64) >> 7, 255);
}
- for(;y<_c_h;y+=2){
- _dst[(y>>1)*_c_w]=OC_CLAMPI(0,(3*(_src[(y-2)*_c_w]
- +_src[(_c_h-1)*_c_w])-17*(_src[(y-1)*_c_w]
- +_src[OC_MINI(y+2,_c_h-1)*_c_w])
- +78*(_src[y*_c_w]+_src[OC_MINI(y+1,_c_h-1)*_c_w])+64)>>7,255);
+ for (; y < _c_h; y += 2) {
+ _dst[(y >> 1)*_c_w] = OC_CLAMPI(0, (3 * (_src[(y - 2) * _c_w]
+ + _src[(_c_h - 1) * _c_w]) - 17 * (_src[(y - 1) * _c_w]
+ + _src[OC_MINI(y + 2, _c_h - 1) * _c_w])
+ + 78 * (_src[y * _c_w] + _src[OC_MINI(y + 1, _c_h - 1) * _c_w]) + 64) >> 7, 255);
}
_src++;
_dst++;
@@ -385,8 +393,8 @@
We use a resampling filter to decimate the chroma planes by two in the
vertical direction.*/
-static void y4m_convert_422jpeg_420jpeg(y4m_input *_y4m,unsigned char *_dst,
- unsigned char *_aux){
+static void y4m_convert_422jpeg_420jpeg(y4m_input *_y4m, unsigned char *_dst,
+ unsigned char *_aux) {
int c_w;
int c_h;
int c_sz;
@@ -395,18 +403,18 @@
int dst_c_sz;
int pli;
/*Skip past the luma data.*/
- _dst+=_y4m->pic_w*_y4m->pic_h;
+ _dst += _y4m->pic_w * _y4m->pic_h;
/*Compute the size of each chroma plane.*/
- c_w=(_y4m->pic_w+_y4m->src_c_dec_h-1)/_y4m->src_c_dec_h;
- c_h=_y4m->pic_h;
- dst_c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
- dst_c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
- c_sz=c_w*c_h;
- dst_c_sz=dst_c_w*dst_c_h;
- for(pli=1;pli<3;pli++){
- y4m_422jpeg_420jpeg_helper(_dst,_aux,c_w,c_h);
- _aux+=c_sz;
- _dst+=dst_c_sz;
+ c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
+ c_h = _y4m->pic_h;
+ dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
+ dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
+ c_sz = c_w * c_h;
+ dst_c_sz = dst_c_w * dst_c_h;
+ for (pli = 1; pli < 3; pli++) {
+ y4m_422jpeg_420jpeg_helper(_dst, _aux, c_w, c_h);
+ _aux += c_sz;
+ _dst += dst_c_sz;
}
}
@@ -450,8 +458,8 @@
pixel (at the original chroma resolution) to the right.
Then we use a second resampling filter to decimate the chroma planes by two
in the vertical direction.*/
-static void y4m_convert_422_420jpeg(y4m_input *_y4m,unsigned char *_dst,
- unsigned char *_aux){
+static void y4m_convert_422_420jpeg(y4m_input *_y4m, unsigned char *_dst,
+ unsigned char *_aux) {
unsigned char *tmp;
int c_w;
int c_h;
@@ -460,24 +468,24 @@
int dst_c_sz;
int pli;
/*Skip past the luma data.*/
- _dst+=_y4m->pic_w*_y4m->pic_h;
+ _dst += _y4m->pic_w * _y4m->pic_h;
/*Compute the size of each chroma plane.*/
- c_w=(_y4m->pic_w+_y4m->src_c_dec_h-1)/_y4m->src_c_dec_h;
- c_h=_y4m->pic_h;
- dst_c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
- c_sz=c_w*c_h;
- dst_c_sz=c_w*dst_c_h;
- tmp=_aux+2*c_sz;
- for(pli=1;pli<3;pli++){
+ c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
+ c_h = _y4m->pic_h;
+ dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
+ c_sz = c_w * c_h;
+ dst_c_sz = c_w * dst_c_h;
+ tmp = _aux + 2 * c_sz;
+ for (pli = 1; pli < 3; pli++) {
/*In reality, the horizontal and vertical steps could be pipelined, for
less memory consumption and better cache performance, but we do them
separately for simplicity.*/
/*First do horizontal filtering (convert to 422jpeg)*/
- y4m_42xmpeg2_42xjpeg_helper(tmp,_aux,c_w,c_h);
+ y4m_42xmpeg2_42xjpeg_helper(tmp, _aux, c_w, c_h);
/*Now do the vertical filtering.*/
- y4m_422jpeg_420jpeg_helper(_dst,tmp,c_w,c_h);
- _aux+=c_sz;
- _dst+=dst_c_sz;
+ y4m_422jpeg_420jpeg_helper(_dst, tmp, c_w, c_h);
+ _aux += c_sz;
+ _dst += dst_c_sz;
}
}
@@ -522,8 +530,8 @@
right.
Then we use another filter to decimate the planes by 2 in the vertical
direction.*/
-static void y4m_convert_411_420jpeg(y4m_input *_y4m,unsigned char *_dst,
- unsigned char *_aux){
+static void y4m_convert_411_420jpeg(y4m_input *_y4m, unsigned char *_dst,
+ unsigned char *_aux) {
unsigned char *tmp;
int c_w;
int c_h;
@@ -536,57 +544,57 @@
int y;
int x;
/*Skip past the luma data.*/
- _dst+=_y4m->pic_w*_y4m->pic_h;
+ _dst += _y4m->pic_w * _y4m->pic_h;
/*Compute the size of each chroma plane.*/
- c_w=(_y4m->pic_w+_y4m->src_c_dec_h-1)/_y4m->src_c_dec_h;
- c_h=_y4m->pic_h;
- dst_c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
- dst_c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
- c_sz=c_w*c_h;
- dst_c_sz=dst_c_w*dst_c_h;
- tmp_sz=dst_c_w*c_h;
- tmp=_aux+2*c_sz;
- for(pli=1;pli<3;pli++){
+ c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
+ c_h = _y4m->pic_h;
+ dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
+ dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
+ c_sz = c_w * c_h;
+ dst_c_sz = dst_c_w * dst_c_h;
+ tmp_sz = dst_c_w * c_h;
+ tmp = _aux + 2 * c_sz;
+ for (pli = 1; pli < 3; pli++) {
/*In reality, the horizontal and vertical steps could be pipelined, for
less memory consumption and better cache performance, but we do them
separately for simplicity.*/
/*First do horizontal filtering (convert to 422jpeg)*/
- for(y=0;y<c_h;y++){
+ for (y = 0; y < c_h; y++) {
/*Filters: [1 110 18 -1]/128 and [-3 50 86 -5]/128, both derived from a
4-tap Mitchell window.*/
- for(x=0;x<OC_MINI(c_w,1);x++){
- tmp[x<<1]=(unsigned char)OC_CLAMPI(0,(111*_aux[0]
- +18*_aux[OC_MINI(1,c_w-1)]-_aux[OC_MINI(2,c_w-1)]+64)>>7,255);
- tmp[x<<1|1]=(unsigned char)OC_CLAMPI(0,(47*_aux[0]
- +86*_aux[OC_MINI(1,c_w-1)]-5*_aux[OC_MINI(2,c_w-1)]+64)>>7,255);
+ for (x = 0; x < OC_MINI(c_w, 1); x++) {
+ tmp[x << 1] = (unsigned char)OC_CLAMPI(0, (111 * _aux[0]
+ + 18 * _aux[OC_MINI(1, c_w - 1)] - _aux[OC_MINI(2, c_w - 1)] + 64) >> 7, 255);
+ tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(0, (47 * _aux[0]
+ + 86 * _aux[OC_MINI(1, c_w - 1)] - 5 * _aux[OC_MINI(2, c_w - 1)] + 64) >> 7, 255);
}
- for(;x<c_w-2;x++){
- tmp[x<<1]=(unsigned char)OC_CLAMPI(0,(_aux[x-1]+110*_aux[x]
- +18*_aux[x+1]-_aux[x+2]+64)>>7,255);
- tmp[x<<1|1]=(unsigned char)OC_CLAMPI(0,(-3*_aux[x-1]+50*_aux[x]
- +86*_aux[x+1]-5*_aux[x+2]+64)>>7,255);
+ for (; x < c_w - 2; x++) {
+ tmp[x << 1] = (unsigned char)OC_CLAMPI(0, (_aux[x - 1] + 110 * _aux[x]
+ + 18 * _aux[x + 1] - _aux[x + 2] + 64) >> 7, 255);
+ tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(0, (-3 * _aux[x - 1] + 50 * _aux[x]
+ + 86 * _aux[x + 1] - 5 * _aux[x + 2] + 64) >> 7, 255);
}
- for(;x<c_w;x++){
- tmp[x<<1]=(unsigned char)OC_CLAMPI(0,(_aux[x-1]+110*_aux[x]
- +18*_aux[OC_MINI(x+1,c_w-1)]-_aux[c_w-1]+64)>>7,255);
- if((x<<1|1)<dst_c_w){
- tmp[x<<1|1]=(unsigned char)OC_CLAMPI(0,(-3*_aux[x-1]+50*_aux[x]
- +86*_aux[OC_MINI(x+1,c_w-1)]-5*_aux[c_w-1]+64)>>7,255);
+ for (; x < c_w; x++) {
+ tmp[x << 1] = (unsigned char)OC_CLAMPI(0, (_aux[x - 1] + 110 * _aux[x]
+ + 18 * _aux[OC_MINI(x + 1, c_w - 1)] - _aux[c_w - 1] + 64) >> 7, 255);
+ if ((x << 1 | 1) < dst_c_w) {
+ tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(0, (-3 * _aux[x - 1] + 50 * _aux[x]
+ + 86 * _aux[OC_MINI(x + 1, c_w - 1)] - 5 * _aux[c_w - 1] + 64) >> 7, 255);
}
}
- tmp+=dst_c_w;
- _aux+=c_w;
+ tmp += dst_c_w;
+ _aux += c_w;
}
- tmp-=tmp_sz;
+ tmp -= tmp_sz;
/*Now do the vertical filtering.*/
- y4m_422jpeg_420jpeg_helper(_dst,tmp,dst_c_w,c_h);
- _dst+=dst_c_sz;
+ y4m_422jpeg_420jpeg_helper(_dst, tmp, dst_c_w, c_h);
+ _dst += dst_c_sz;
}
}
/*Convert 444 to 420jpeg.*/
-static void y4m_convert_444_420jpeg(y4m_input *_y4m,unsigned char *_dst,
- unsigned char *_aux){
+static void y4m_convert_444_420jpeg(y4m_input *_y4m, unsigned char *_dst,
+ unsigned char *_aux) {
unsigned char *tmp;
int c_w;
int c_h;
@@ -599,218 +607,207 @@
int y;
int x;
/*Skip past the luma data.*/
- _dst+=_y4m->pic_w*_y4m->pic_h;
+ _dst += _y4m->pic_w * _y4m->pic_h;
/*Compute the size of each chroma plane.*/
- c_w=(_y4m->pic_w+_y4m->src_c_dec_h-1)/_y4m->src_c_dec_h;
- c_h=_y4m->pic_h;
- dst_c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
- dst_c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
- c_sz=c_w*c_h;
- dst_c_sz=dst_c_w*dst_c_h;
- tmp_sz=dst_c_w*c_h;
- tmp=_aux+2*c_sz;
- for(pli=1;pli<3;pli++){
+ c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
+ c_h = _y4m->pic_h;
+ dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
+ dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
+ c_sz = c_w * c_h;
+ dst_c_sz = dst_c_w * dst_c_h;
+ tmp_sz = dst_c_w * c_h;
+ tmp = _aux + 2 * c_sz;
+ for (pli = 1; pli < 3; pli++) {
/*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
- for(y=0;y<c_h;y++){
- for(x=0;x<OC_MINI(c_w,2);x+=2){
- tmp[x>>1]=OC_CLAMPI(0,(64*_aux[0]+78*_aux[OC_MINI(1,c_w-1)]
- -17*_aux[OC_MINI(2,c_w-1)]
- +3*_aux[OC_MINI(3,c_w-1)]+64)>>7,255);
+ for (y = 0; y < c_h; y++) {
+ for (x = 0; x < OC_MINI(c_w, 2); x += 2) {
+ tmp[x >> 1] = OC_CLAMPI(0, (64 * _aux[0] + 78 * _aux[OC_MINI(1, c_w - 1)]
+ - 17 * _aux[OC_MINI(2, c_w - 1)]
+ + 3 * _aux[OC_MINI(3, c_w - 1)] + 64) >> 7, 255);
}
- for(;x<c_w-3;x+=2){
- tmp[x>>1]=OC_CLAMPI(0,(3*(_aux[x-2]+_aux[x+3])
- -17*(_aux[x-1]+_aux[x+2])+78*(_aux[x]+_aux[x+1])+64)>>7,255);
+ for (; x < c_w - 3; x += 2) {
+ tmp[x >> 1] = OC_CLAMPI(0, (3 * (_aux[x - 2] + _aux[x + 3])
+ - 17 * (_aux[x - 1] + _aux[x + 2]) + 78 * (_aux[x] + _aux[x + 1]) + 64) >> 7, 255);
}
- for(;x<c_w;x+=2){
- tmp[x>>1]=OC_CLAMPI(0,(3*(_aux[x-2]+_aux[c_w-1])-
- 17*(_aux[x-1]+_aux[OC_MINI(x+2,c_w-1)])+
- 78*(_aux[x]+_aux[OC_MINI(x+1,c_w-1)])+64)>>7,255);
+ for (; x < c_w; x += 2) {
+ tmp[x >> 1] = OC_CLAMPI(0, (3 * (_aux[x - 2] + _aux[c_w - 1]) -
+ 17 * (_aux[x - 1] + _aux[OC_MINI(x + 2, c_w - 1)]) +
+ 78 * (_aux[x] + _aux[OC_MINI(x + 1, c_w - 1)]) + 64) >> 7, 255);
}
- tmp+=dst_c_w;
- _aux+=c_w;
+ tmp += dst_c_w;
+ _aux += c_w;
}
- tmp-=tmp_sz;
+ tmp -= tmp_sz;
/*Now do the vertical filtering.*/
- y4m_422jpeg_420jpeg_helper(_dst,tmp,dst_c_w,c_h);
- _dst+=dst_c_sz;
+ y4m_422jpeg_420jpeg_helper(_dst, tmp, dst_c_w, c_h);
+ _dst += dst_c_sz;
}
}
/*The image is padded with empty chroma components at 4:2:0.*/
-static void y4m_convert_mono_420jpeg(y4m_input *_y4m,unsigned char *_dst,
- unsigned char *_aux){
+static void y4m_convert_mono_420jpeg(y4m_input *_y4m, unsigned char *_dst,
+ unsigned char *_aux) {
int c_sz;
- _dst+=_y4m->pic_w*_y4m->pic_h;
- c_sz=((_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h)*
- ((_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v);
- memset(_dst,128,c_sz*2);
+ _dst += _y4m->pic_w * _y4m->pic_h;
+ c_sz = ((_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h) *
+ ((_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v);
+ memset(_dst, 128, c_sz * 2);
}
/*No conversion function needed.*/
-static void y4m_convert_null(y4m_input *_y4m,unsigned char *_dst,
- unsigned char *_aux){
+static void y4m_convert_null(y4m_input *_y4m, unsigned char *_dst,
+ unsigned char *_aux) {
}
-int y4m_input_open(y4m_input *_y4m,FILE *_fin,char *_skip,int _nskip){
+int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip) {
char buffer[80];
int ret;
int i;
/*Read until newline, or 80 cols, whichever happens first.*/
- for(i=0;i<79;i++){
- if(_nskip>0){
- buffer[i]=*_skip++;
+ for (i = 0; i < 79; i++) {
+ if (_nskip > 0) {
+ buffer[i] = *_skip++;
_nskip--;
+ } else {
+ ret = (int)fread(buffer + i, 1, 1, _fin);
+ if (ret < 1)return -1;
}
- else{
- ret=(int)fread(buffer+i,1,1,_fin);
- if(ret<1)return -1;
- }
- if(buffer[i]=='\n')break;
+ if (buffer[i] == '\n')break;
}
/*We skipped too much header data.*/
- if(_nskip>0)return -1;
- if(i==79){
- fprintf(stderr,"Error parsing header; not a YUV2MPEG2 file?\n");
+ if (_nskip > 0)return -1;
+ if (i == 79) {
+ fprintf(stderr, "Error parsing header; not a YUV2MPEG2 file?\n");
return -1;
}
- buffer[i]='\0';
- if(memcmp(buffer,"YUV4MPEG",8)){
- fprintf(stderr,"Incomplete magic for YUV4MPEG file.\n");
+ buffer[i] = '\0';
+ if (memcmp(buffer, "YUV4MPEG", 8)) {
+ fprintf(stderr, "Incomplete magic for YUV4MPEG file.\n");
return -1;
}
- if(buffer[8]!='2'){
- fprintf(stderr,"Incorrect YUV input file version; YUV4MPEG2 required.\n");
+ if (buffer[8] != '2') {
+ fprintf(stderr, "Incorrect YUV input file version; YUV4MPEG2 required.\n");
}
- ret=y4m_parse_tags(_y4m,buffer+5);
- if(ret<0){
- fprintf(stderr,"Error parsing YUV4MPEG2 header.\n");
+ ret = y4m_parse_tags(_y4m, buffer + 5);
+ if (ret < 0) {
+ fprintf(stderr, "Error parsing YUV4MPEG2 header.\n");
return ret;
}
- if(_y4m->interlace=='?'){
- fprintf(stderr,"Warning: Input video interlacing format unknown; "
- "assuming progressive scan.\n");
- }
- else if(_y4m->interlace!='p'){
- fprintf(stderr,"Input video is interlaced; "
- "Only progressive scan handled.\n");
+ if (_y4m->interlace == '?') {
+ fprintf(stderr, "Warning: Input video interlacing format unknown; "
+ "assuming progressive scan.\n");
+ } else if (_y4m->interlace != 'p') {
+ fprintf(stderr, "Input video is interlaced; "
+ "Only progressive scan handled.\n");
return -1;
}
- if(strcmp(_y4m->chroma_type,"420")==0||
- strcmp(_y4m->chroma_type,"420jpeg")==0){
- _y4m->src_c_dec_h=_y4m->dst_c_dec_h=_y4m->src_c_dec_v=_y4m->dst_c_dec_v=2;
- _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h
- +2*((_y4m->pic_w+1)/2)*((_y4m->pic_h+1)/2);
+ if (strcmp(_y4m->chroma_type, "420") == 0 ||
+ strcmp(_y4m->chroma_type, "420jpeg") == 0) {
+ _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2;
+ _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h
+ + 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
/*Natively supported: no conversion required.*/
- _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=0;
- _y4m->convert=y4m_convert_null;
- }
- else if(strcmp(_y4m->chroma_type,"420mpeg2")==0){
- _y4m->src_c_dec_h=_y4m->dst_c_dec_h=_y4m->src_c_dec_v=_y4m->dst_c_dec_v=2;
- _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
+ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
+ _y4m->convert = y4m_convert_null;
+ } else if (strcmp(_y4m->chroma_type, "420mpeg2") == 0) {
+ _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2;
+ _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
/*Chroma filter required: read into the aux buf first.*/
- _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=
- 2*((_y4m->pic_w+1)/2)*((_y4m->pic_h+1)/2);
- _y4m->convert=y4m_convert_42xmpeg2_42xjpeg;
- }
- else if(strcmp(_y4m->chroma_type,"420paldv")==0){
- _y4m->src_c_dec_h=_y4m->dst_c_dec_h=_y4m->src_c_dec_v=_y4m->dst_c_dec_v=2;
- _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
+ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz =
+ 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
+ _y4m->convert = y4m_convert_42xmpeg2_42xjpeg;
+ } else if (strcmp(_y4m->chroma_type, "420paldv") == 0) {
+ _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2;
+ _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
/*Chroma filter required: read into the aux buf first.
We need to make two filter passes, so we need some extra space in the
aux buffer.*/
- _y4m->aux_buf_sz=3*((_y4m->pic_w+1)/2)*((_y4m->pic_h+1)/2);
- _y4m->aux_buf_read_sz=2*((_y4m->pic_w+1)/2)*((_y4m->pic_h+1)/2);
- _y4m->convert=y4m_convert_42xpaldv_42xjpeg;
- }
- else if(strcmp(_y4m->chroma_type,"422jpeg")==0){
- _y4m->src_c_dec_h=_y4m->dst_c_dec_h=2;
- _y4m->src_c_dec_v=1;
- _y4m->dst_c_dec_v=2;
- _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
+ _y4m->aux_buf_sz = 3 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
+ _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
+ _y4m->convert = y4m_convert_42xpaldv_42xjpeg;
+ } else if (strcmp(_y4m->chroma_type, "422jpeg") == 0) {
+ _y4m->src_c_dec_h = _y4m->dst_c_dec_h = 2;
+ _y4m->src_c_dec_v = 1;
+ _y4m->dst_c_dec_v = 2;
+ _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
/*Chroma filter required: read into the aux buf first.*/
- _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=2*((_y4m->pic_w+1)/2)*_y4m->pic_h;
- _y4m->convert=y4m_convert_422jpeg_420jpeg;
- }
- else if(strcmp(_y4m->chroma_type,"422")==0){
- _y4m->src_c_dec_h=_y4m->dst_c_dec_h=2;
- _y4m->src_c_dec_v=1;
- _y4m->dst_c_dec_v=2;
- _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
+ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
+ _y4m->convert = y4m_convert_422jpeg_420jpeg;
+ } else if (strcmp(_y4m->chroma_type, "422") == 0) {
+ _y4m->src_c_dec_h = _y4m->dst_c_dec_h = 2;
+ _y4m->src_c_dec_v = 1;
+ _y4m->dst_c_dec_v = 2;
+ _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
/*Chroma filter required: read into the aux buf first.
We need to make two filter passes, so we need some extra space in the
aux buffer.*/
- _y4m->aux_buf_read_sz=2*((_y4m->pic_w+1)/2)*_y4m->pic_h;
- _y4m->aux_buf_sz=_y4m->aux_buf_read_sz+((_y4m->pic_w+1)/2)*_y4m->pic_h;
- _y4m->convert=y4m_convert_422_420jpeg;
- }
- else if(strcmp(_y4m->chroma_type,"411")==0){
- _y4m->src_c_dec_h=4;
- _y4m->dst_c_dec_h=2;
- _y4m->src_c_dec_v=1;
- _y4m->dst_c_dec_v=2;
- _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
+ _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
+ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
+ _y4m->convert = y4m_convert_422_420jpeg;
+ } else if (strcmp(_y4m->chroma_type, "411") == 0) {
+ _y4m->src_c_dec_h = 4;
+ _y4m->dst_c_dec_h = 2;
+ _y4m->src_c_dec_v = 1;
+ _y4m->dst_c_dec_v = 2;
+ _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
/*Chroma filter required: read into the aux buf first.
We need to make two filter passes, so we need some extra space in the
aux buffer.*/
- _y4m->aux_buf_read_sz=2*((_y4m->pic_w+3)/4)*_y4m->pic_h;
- _y4m->aux_buf_sz=_y4m->aux_buf_read_sz+((_y4m->pic_w+1)/2)*_y4m->pic_h;
- _y4m->convert=y4m_convert_411_420jpeg;
- }
- else if(strcmp(_y4m->chroma_type,"444")==0){
- _y4m->src_c_dec_h=1;
- _y4m->dst_c_dec_h=2;
- _y4m->src_c_dec_v=1;
- _y4m->dst_c_dec_v=2;
- _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
+ _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 3) / 4) * _y4m->pic_h;
+ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
+ _y4m->convert = y4m_convert_411_420jpeg;
+ } else if (strcmp(_y4m->chroma_type, "444") == 0) {
+ _y4m->src_c_dec_h = 1;
+ _y4m->dst_c_dec_h = 2;
+ _y4m->src_c_dec_v = 1;
+ _y4m->dst_c_dec_v = 2;
+ _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
/*Chroma filter required: read into the aux buf first.
We need to make two filter passes, so we need some extra space in the
aux buffer.*/
- _y4m->aux_buf_read_sz=2*_y4m->pic_w*_y4m->pic_h;
- _y4m->aux_buf_sz=_y4m->aux_buf_read_sz+((_y4m->pic_w+1)/2)*_y4m->pic_h;
- _y4m->convert=y4m_convert_444_420jpeg;
- }
- else if(strcmp(_y4m->chroma_type,"444alpha")==0){
- _y4m->src_c_dec_h=1;
- _y4m->dst_c_dec_h=2;
- _y4m->src_c_dec_v=1;
- _y4m->dst_c_dec_v=2;
- _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
+ _y4m->aux_buf_read_sz = 2 * _y4m->pic_w * _y4m->pic_h;
+ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
+ _y4m->convert = y4m_convert_444_420jpeg;
+ } else if (strcmp(_y4m->chroma_type, "444alpha") == 0) {
+ _y4m->src_c_dec_h = 1;
+ _y4m->dst_c_dec_h = 2;
+ _y4m->src_c_dec_v = 1;
+ _y4m->dst_c_dec_v = 2;
+ _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
/*Chroma filter required: read into the aux buf first.
We need to make two filter passes, so we need some extra space in the
aux buffer.
The extra plane also gets read into the aux buf.
It will be discarded.*/
- _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=3*_y4m->pic_w*_y4m->pic_h;
- _y4m->convert=y4m_convert_444_420jpeg;
- }
- else if(strcmp(_y4m->chroma_type,"mono")==0){
- _y4m->src_c_dec_h=_y4m->src_c_dec_v=0;
- _y4m->dst_c_dec_h=_y4m->dst_c_dec_v=2;
- _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
+ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 3 * _y4m->pic_w * _y4m->pic_h;
+ _y4m->convert = y4m_convert_444_420jpeg;
+ } else if (strcmp(_y4m->chroma_type, "mono") == 0) {
+ _y4m->src_c_dec_h = _y4m->src_c_dec_v = 0;
+ _y4m->dst_c_dec_h = _y4m->dst_c_dec_v = 2;
+ _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
/*No extra space required, but we need to clear the chroma planes.*/
- _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=0;
- _y4m->convert=y4m_convert_mono_420jpeg;
- }
- else{
- fprintf(stderr,"Unknown chroma sampling type: %s\n",_y4m->chroma_type);
+ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
+ _y4m->convert = y4m_convert_mono_420jpeg;
+ } else {
+ fprintf(stderr, "Unknown chroma sampling type: %s\n", _y4m->chroma_type);
return -1;
}
/*The size of the final frame buffers is always computed from the
destination chroma decimation type.*/
- _y4m->dst_buf_sz=_y4m->pic_w*_y4m->pic_h
- +2*((_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h)*
- ((_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v);
- _y4m->dst_buf=(unsigned char *)malloc(_y4m->dst_buf_sz);
- _y4m->aux_buf=(unsigned char *)malloc(_y4m->aux_buf_sz);
+ _y4m->dst_buf_sz = _y4m->pic_w * _y4m->pic_h
+ + 2 * ((_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h) *
+ ((_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v);
+ _y4m->dst_buf = (unsigned char *)malloc(_y4m->dst_buf_sz);
+ _y4m->aux_buf = (unsigned char *)malloc(_y4m->aux_buf_sz);
return 0;
}
-void y4m_input_close(y4m_input *_y4m){
+void y4m_input_close(y4m_input *_y4m) {
free(_y4m->dst_buf);
free(_y4m->aux_buf);
}
-int y4m_input_fetch_frame(y4m_input *_y4m,FILE *_fin,vpx_image_t *_img){
+int y4m_input_fetch_frame(y4m_input *_y4m, FILE *_fin, vpx_image_t *_img) {
char frame[6];
int pic_sz;
int c_w;
@@ -818,54 +815,54 @@
int c_sz;
int ret;
/*Read and skip the frame header.*/
- ret=(int)fread(frame,1,6,_fin);
- if(ret<6)return 0;
- if(memcmp(frame,"FRAME",5)){
- fprintf(stderr,"Loss of framing in Y4M input data\n");
+ ret = (int)fread(frame, 1, 6, _fin);
+ if (ret < 6)return 0;
+ if (memcmp(frame, "FRAME", 5)) {
+ fprintf(stderr, "Loss of framing in Y4M input data\n");
return -1;
}
- if(frame[5]!='\n'){
+ if (frame[5] != '\n') {
char c;
int j;
- for(j=0;j<79&&fread(&c,1,1,_fin)&&c!='\n';j++);
- if(j==79){
- fprintf(stderr,"Error parsing Y4M frame header\n");
+ for (j = 0; j < 79 && fread(&c, 1, 1, _fin) && c != '\n'; j++);
+ if (j == 79) {
+ fprintf(stderr, "Error parsing Y4M frame header\n");
return -1;
}
}
/*Read the frame data that needs no conversion.*/
- if(fread(_y4m->dst_buf,1,_y4m->dst_buf_read_sz,_fin)!=_y4m->dst_buf_read_sz){
- fprintf(stderr,"Error reading Y4M frame data.\n");
+ if (fread(_y4m->dst_buf, 1, _y4m->dst_buf_read_sz, _fin) != _y4m->dst_buf_read_sz) {
+ fprintf(stderr, "Error reading Y4M frame data.\n");
return -1;
}
/*Read the frame data that does need conversion.*/
- if(fread(_y4m->aux_buf,1,_y4m->aux_buf_read_sz,_fin)!=_y4m->aux_buf_read_sz){
- fprintf(stderr,"Error reading Y4M frame data.\n");
+ if (fread(_y4m->aux_buf, 1, _y4m->aux_buf_read_sz, _fin) != _y4m->aux_buf_read_sz) {
+ fprintf(stderr, "Error reading Y4M frame data.\n");
return -1;
}
/*Now convert the just read frame.*/
- (*_y4m->convert)(_y4m,_y4m->dst_buf,_y4m->aux_buf);
+ (*_y4m->convert)(_y4m, _y4m->dst_buf, _y4m->aux_buf);
/*Fill in the frame buffer pointers.
We don't use vpx_img_wrap() because it forces padding for odd picture
sizes, which would require a separate fread call for every row.*/
- memset(_img,0,sizeof(*_img));
+ memset(_img, 0, sizeof(*_img));
/*Y4M has the planes in Y'CbCr order, which libvpx calls Y, U, and V.*/
- _img->fmt=IMG_FMT_I420;
- _img->w=_img->d_w=_y4m->pic_w;
- _img->h=_img->d_h=_y4m->pic_h;
+ _img->fmt = IMG_FMT_I420;
+ _img->w = _img->d_w = _y4m->pic_w;
+ _img->h = _img->d_h = _y4m->pic_h;
/*This is hard-coded to 4:2:0 for now, as that's all VP8 supports.*/
- _img->x_chroma_shift=1;
- _img->y_chroma_shift=1;
- _img->bps=12;
+ _img->x_chroma_shift = 1;
+ _img->y_chroma_shift = 1;
+ _img->bps = 12;
/*Set up the buffer pointers.*/
- pic_sz=_y4m->pic_w*_y4m->pic_h;
- c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
- c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
- c_sz=c_w*c_h;
- _img->stride[PLANE_Y]=_y4m->pic_w;
- _img->stride[PLANE_U]=_img->stride[PLANE_V]=c_w;
- _img->planes[PLANE_Y]=_y4m->dst_buf;
- _img->planes[PLANE_U]=_y4m->dst_buf+pic_sz;
- _img->planes[PLANE_V]=_y4m->dst_buf+pic_sz+c_sz;
+ pic_sz = _y4m->pic_w * _y4m->pic_h;
+ c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
+ c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
+ c_sz = c_w * c_h;
+ _img->stride[PLANE_Y] = _y4m->pic_w;
+ _img->stride[PLANE_U] = _img->stride[PLANE_V] = c_w;
+ _img->planes[PLANE_Y] = _y4m->dst_buf;
+ _img->planes[PLANE_U] = _y4m->dst_buf + pic_sz;
+ _img->planes[PLANE_V] = _y4m->dst_buf + pic_sz + c_sz;
return 1;
}
diff --git a/y4minput.h b/y4minput.h
index 1a01bcd..2fa3767 100644
--- a/y4minput.h
+++ b/y4minput.h
@@ -23,11 +23,11 @@
/*The function used to perform chroma conversion.*/
typedef void (*y4m_convert_func)(y4m_input *_y4m,
- unsigned char *_dst,unsigned char *_src);
+ unsigned char *_dst, unsigned char *_src);
-struct y4m_input{
+struct y4m_input {
int pic_w;
int pic_h;
int fps_n;
@@ -53,8 +53,8 @@
unsigned char *aux_buf;
};
-int y4m_input_open(y4m_input *_y4m,FILE *_fin,char *_skip,int _nskip);
+int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip);
void y4m_input_close(y4m_input *_y4m);
-int y4m_input_fetch_frame(y4m_input *_y4m,FILE *_fin,vpx_image_t *img);
+int y4m_input_fetch_frame(y4m_input *_y4m, FILE *_fin, vpx_image_t *img);
#endif