Merge "vp8 - compatibility warning added to changelog"
diff --git a/tools/ftfy.sh b/tools/ftfy.sh
new file mode 100755
index 0000000..de0f0ed
--- /dev/null
+++ b/tools/ftfy.sh
@@ -0,0 +1,153 @@
+#!/bin/sh
+self="$0"
+dirname_self=$(dirname "$self")
+
+usage() {
+ cat <<EOF >&2
+Usage: $self [option]
+
+This script applies a whitespace transformation to the commit at HEAD. If no
+options are given, then the modified files are left in the working tree.
+
+Options:
+ -h, --help Shows this message
+ -n, --dry-run Shows a diff of the changes to be made.
+ --amend Squashes the changes into the commit at HEAD
+ This option will also reformat the commit message.
+ --commit Creates a new commit containing only the whitespace changes
+ --msg-only Reformat the commit message only, ignore the patch itself.
+
+EOF
+ rm -f ${CLEAN_FILES}
+ exit 1
+}
+
+
+log() {
+ echo "${self##*/}: $@" >&2
+}
+
+
+vpx_style() {
+ astyle --style=bsd --min-conditional-indent=0 --break-blocks \
+ --pad-oper --pad-header --unpad-paren \
+ --align-pointer=name \
+ --indent-preprocessor --convert-tabs --indent-labels \
+ --suffix=none --quiet "$@"
+ sed -i 's/[[:space:]]\{1,\},/,/g' "$@"
+}
+
+
+apply() {
+ [ $INTERSECT_RESULT -ne 0 ] && patch -p1 < "$1"
+}
+
+
+commit() {
+ LAST_CHANGEID=$(git show | awk '/Change-Id:/{print $2}')
+ if [ -z "$LAST_CHANGEID" ]; then
+ log "HEAD doesn't have a Change-Id, unable to generate a new commit"
+ exit 1
+ fi
+
+ # Build a deterministic Change-Id from the parent's
+ NEW_CHANGEID=${LAST_CHANGEID}-styled
+ NEW_CHANGEID=I$(echo $NEW_CHANGEID | git hash-object --stdin)
+
+ # Commit, preserving authorship from the parent commit.
+ git commit -a -C HEAD > /dev/null
+ git commit --amend -F- << EOF
+Cosmetic: Fix whitespace in change ${LAST_CHANGEID:0:9}
+
+Change-Id: ${NEW_CHANGEID}
+EOF
+}
+
+
+show_commit_msg_diff() {
+ if [ $DIFF_MSG_RESULT -ne 0 ]; then
+ log "Modified commit message:"
+ diff -u "$ORIG_COMMIT_MSG" "$NEW_COMMIT_MSG" | tail -n +3
+ fi
+}
+
+
+amend() {
+ show_commit_msg_diff
+ if [ $DIFF_MSG_RESULT -ne 0 ] || [ $INTERSECT_RESULT -ne 0 ]; then
+ git commit -a --amend -F "$NEW_COMMIT_MSG"
+ fi
+}
+
+
+diff_msg() {
+ git log -1 --format=%B > "$ORIG_COMMIT_MSG"
+ "${dirname_self}"/wrap-commit-msg.py \
+ < "$ORIG_COMMIT_MSG" > "$NEW_COMMIT_MSG"
+ cmp -s "$ORIG_COMMIT_MSG" "$NEW_COMMIT_MSG"
+ DIFF_MSG_RESULT=$?
+}
+
+
+# Temporary files
+ORIG_DIFF=orig.diff.$$
+MODIFIED_DIFF=modified.diff.$$
+FINAL_DIFF=final.diff.$$
+ORIG_COMMIT_MSG=orig.commit-msg.$$
+NEW_COMMIT_MSG=new.commit-msg.$$
+CLEAN_FILES="${ORIG_DIFF} ${MODIFIED_DIFF} ${FINAL_DIFF}"
+CLEAN_FILES="${CLEAN_FILES} ${ORIG_COMMIT_MSG} ${NEW_COMMIT_MSG}"
+
+# Preconditions
+[ $# -lt 2 ] || usage
+
+if ! git diff --quiet HEAD; then
+ log "Working tree is dirty, commit your changes first"
+ exit 1
+fi
+
+# Need to be in the root
+cd "$(git rev-parse --show-toplevel)"
+
+# Collect the original diff
+git show > "${ORIG_DIFF}"
+
+# Apply the style guide on the modified files and collect its diff
+for f in $(git diff HEAD^ --name-only | grep '\.[ch]$'); do
+ case "$f" in
+ third_party/*) continue;;
+ nestegg/*) continue;;
+ esac
+ vpx_style "$f"
+done
+git diff --no-color --no-ext-diff > "${MODIFIED_DIFF}"
+
+# Intersect the two diffs
+"${dirname_self}"/intersect-diffs.py \
+ "${ORIG_DIFF}" "${MODIFIED_DIFF}" > "${FINAL_DIFF}"
+INTERSECT_RESULT=$?
+git reset --hard >/dev/null
+
+# Fixup the commit message
+diff_msg
+
+# Handle options
+if [ -n "$1" ]; then
+ case "$1" in
+ -h|--help) usage;;
+ -n|--dry-run) cat "${FINAL_DIFF}"; show_commit_msg_diff;;
+ --commit) apply "${FINAL_DIFF}"; commit;;
+ --amend) apply "${FINAL_DIFF}"; amend;;
+ --msg-only) amend;;
+ *) usage;;
+ esac
+else
+ apply "${FINAL_DIFF}"
+ if ! git diff --quiet; then
+ log "Formatting changes applied, verify and commit."
+ log "See also: http://www.webmproject.org/code/contribute/conventions/"
+ git diff --stat
+ fi
+fi
+
+rm -f ${CLEAN_FILES}
diff --git a/tools/intersect-diffs.py b/tools/intersect-diffs.py
new file mode 100755
index 0000000..be9dea5
--- /dev/null
+++ b/tools/intersect-diffs.py
@@ -0,0 +1,188 @@
+#!/usr/bin/env python
+## Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+"""Calculates the "intersection" of two unified diffs.
+
+Given two diffs, A and B, it finds all hunks in B that had non-context lines
+in A and prints them to stdout. This is useful to determine the hunks in B that
+are relevant to A. The resulting file can be applied with patch(1) on top of A.
+"""
+
+__author__ = "jkoleszar@google.com"
+
+import re
+import sys
+
+
+class DiffLines(object):
+ """A container for one half of a diff."""
+
+ def __init__(self, filename, offset, length):
+ self.filename = filename
+ self.offset = offset
+ self.length = length
+ self.lines = []
+ self.delta_line_nums = []
+
+ def Append(self, line):
+ l = len(self.lines)
+ if line[0] != " ":
+ self.delta_line_nums.append(self.offset + l)
+ self.lines.append(line[1:])
+ assert l+1 <= self.length
+
+ def Complete(self):
+ return len(self.lines) == self.length
+
+ def __contains__(self, item):
+ return item >= self.offset and item <= self.offset + self.length - 1
+
+
+class DiffHunk(object):
+ """A container for one diff hunk, consisting of two DiffLines."""
+
+ def __init__(self, header, file_a, file_b, start_a, len_a, start_b, len_b):
+ self.header = header
+ self.left = DiffLines(file_a, start_a, len_a)
+ self.right = DiffLines(file_b, start_b, len_b)
+ self.lines = []
+
+ def Append(self, line):
+ """Adds a line to the DiffHunk and its DiffLines children."""
+ if line[0] == "-":
+ self.left.Append(line)
+ elif line[0] == "+":
+ self.right.Append(line)
+ elif line[0] == " ":
+ self.left.Append(line)
+ self.right.Append(line)
+ else:
+ assert False, ("Unrecognized character at start of diff line "
+ "%r" % line[0])
+ self.lines.append(line)
+
+ def Complete(self):
+ return self.left.Complete() and self.right.Complete()
+
+ def __repr__(self):
+ return "DiffHunk(%s, %s, len %d)" % (
+ self.left.filename, self.right.filename,
+ max(self.left.length, self.right.length))
+
+
+def ParseDiffHunks(stream):
+ """Walk a file-like object, yielding DiffHunks as they're parsed."""
+
+ file_regex = re.compile(r"(\+\+\+|---) (\S+)")
+ range_regex = re.compile(r"@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?")
+ hunk = None
+ while True:
+ line = stream.readline()
+ if not line:
+ break
+
+ if hunk is None:
+ # Parse file names
+ diff_file = file_regex.match(line)
+ if diff_file:
+ if line.startswith("---"):
+ a_line = line
+ a = diff_file.group(2)
+ continue
+ if line.startswith("+++"):
+ b_line = line
+ b = diff_file.group(2)
+ continue
+
+ # Parse offset/lengths
+ diffrange = range_regex.match(line)
+ if diffrange:
+ if diffrange.group(2):
+ start_a = int(diffrange.group(1))
+ len_a = int(diffrange.group(3))
+ else:
+ start_a = 1
+ len_a = int(diffrange.group(1))
+
+ if diffrange.group(5):
+ start_b = int(diffrange.group(4))
+ len_b = int(diffrange.group(6))
+ else:
+ start_b = 1
+ len_b = int(diffrange.group(4))
+
+ header = [a_line, b_line, line]
+ hunk = DiffHunk(header, a, b, start_a, len_a, start_b, len_b)
+ else:
+ # Add the current line to the hunk
+ hunk.Append(line)
+
+ # See if the whole hunk has been parsed. If so, yield it and prepare
+ # for the next hunk.
+ if hunk.Complete():
+ yield hunk
+ hunk = None
+
+ # Partial hunks are a parse error
+ assert hunk is None
+
+
+def FormatDiffHunks(hunks):
+ """Re-serialize a list of DiffHunks."""
+ r = []
+ last_header = None
+ for hunk in hunks:
+ this_header = hunk.header[0:2]
+ if last_header != this_header:
+ r.extend(hunk.header)
+ last_header = this_header
+ else:
+ r.extend(hunk.header[2])
+ r.extend(hunk.lines)
+ r.append("\n")
+ return "".join(r)
+
+
+def ZipHunks(rhs_hunks, lhs_hunks):
+ """Join two hunk lists on filename."""
+ for rhs_hunk in rhs_hunks:
+ rhs_file = rhs_hunk.right.filename.split("/")[1:]
+
+ for lhs_hunk in lhs_hunks:
+ lhs_file = lhs_hunk.left.filename.split("/")[1:]
+ if lhs_file != rhs_file:
+ continue
+ yield (rhs_hunk, lhs_hunk)
+
+
+def main():
+ old_hunks = [x for x in ParseDiffHunks(open(sys.argv[1], "r"))]
+ new_hunks = [x for x in ParseDiffHunks(open(sys.argv[2], "r"))]
+ out_hunks = []
+
+ # Join the right hand side of the older diff with the left hand side of the
+ # newer diff.
+ for old_hunk, new_hunk in ZipHunks(old_hunks, new_hunks):
+ if new_hunk in out_hunks:
+ continue
+ old_lines = old_hunk.right
+ new_lines = new_hunk.left
+
+ # Determine if this hunk overlaps any non-context line from the other
+ for i in old_lines.delta_line_nums:
+ if i in new_lines:
+ out_hunks.append(new_hunk)
+ break
+
+ if out_hunks:
+ print FormatDiffHunks(out_hunks)
+ sys.exit(1)
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/wrap-commit-msg.py b/tools/wrap-commit-msg.py
new file mode 100755
index 0000000..d5b4b04
--- /dev/null
+++ b/tools/wrap-commit-msg.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+## Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+"""Wraps paragraphs of text, preserving manual formatting
+
+This is like fold(1), but has the special convention of not modifying lines
+that start with whitespace. This allows you to intersperse blocks with
+special formatting, like code blocks, with written prose. The prose will
+be wordwrapped, and the manual formatting will be preserved.
+
+ * This won't handle the case of a bulleted (or ordered) list specially, so
+ manual wrapping must be done.
+
+Occasionally it's useful to put something with explicit formatting that
+doesn't look at all like a block of text inline.
+
+ indicator = has_leading_whitespace(line);
+ if (indicator)
+ preserve_formatting(line);
+
+The intent is that this docstring would make it through the transform
+and still be legible and presented as it is in the source. If additional
+cases are handled, update this doc to describe the effect.
+"""
+
+__author__ = "jkoleszar@google.com"
+import textwrap
+import sys
+
+def wrap(text):
+ if text:
+ return textwrap.fill(text, break_long_words=False) + '\n'
+ return ""
+
+
+def main(fileobj):
+ text = ""
+ output = ""
+ while True:
+ line = fileobj.readline()
+ if not line:
+ break
+
+ if line.lstrip() == line:
+ text += line
+ else:
+ output += wrap(text)
+ text=""
+ output += line
+ output += wrap(text)
+
+ # Replace the file or write to stdout.
+ if fileobj == sys.stdin:
+ fileobj = sys.stdout
+ else:
+ fileobj.seek(0)
+ fileobj.truncate(0)
+ fileobj.write(output)
+
+if __name__ == "__main__":
+ if len(sys.argv) > 1:
+ main(open(sys.argv[1], "r+"))
+ else:
+ main(sys.stdin)
diff --git a/vp8/common/reconintra.c b/vp8/common/reconintra.c
index 4b13777..4067a68 100644
--- a/vp8/common/reconintra.c
+++ b/vp8/common/reconintra.c
@@ -14,140 +14,17 @@
#include "vpx_mem/vpx_mem.h"
#include "blockd.h"
-/* For skip_recon_mb(), add vp8_build_intra_predictors_mby_s(MACROBLOCKD *x) and
- * vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x).
- */
-
-void vp8_build_intra_predictors_mby_c(MACROBLOCKD *x)
-{
-
- unsigned char *yabove_row = x->dst.y_buffer - x->dst.y_stride;
- unsigned char yleft_col[16];
- unsigned char ytop_left = yabove_row[-1];
- unsigned char *ypred_ptr = x->predictor;
- int r, c, i;
-
- for (i = 0; i < 16; i++)
- {
- yleft_col[i] = x->dst.y_buffer [i* x->dst.y_stride -1];
- }
-
- /* for Y */
- switch (x->mode_info_context->mbmi.mode)
- {
- case DC_PRED:
- {
- int expected_dc;
- int i;
- int shift;
- int average = 0;
-
-
- if (x->up_available || x->left_available)
- {
- if (x->up_available)
- {
- for (i = 0; i < 16; i++)
- {
- average += yabove_row[i];
- }
- }
-
- if (x->left_available)
- {
-
- for (i = 0; i < 16; i++)
- {
- average += yleft_col[i];
- }
-
- }
-
-
-
- shift = 3 + x->up_available + x->left_available;
- expected_dc = (average + (1 << (shift - 1))) >> shift;
- }
- else
- {
- expected_dc = 128;
- }
-
- vpx_memset(ypred_ptr, expected_dc, 256);
- }
- break;
- case V_PRED:
- {
-
- for (r = 0; r < 16; r++)
- {
-
- ((int *)ypred_ptr)[0] = ((int *)yabove_row)[0];
- ((int *)ypred_ptr)[1] = ((int *)yabove_row)[1];
- ((int *)ypred_ptr)[2] = ((int *)yabove_row)[2];
- ((int *)ypred_ptr)[3] = ((int *)yabove_row)[3];
- ypred_ptr += 16;
- }
- }
- break;
- case H_PRED:
- {
-
- for (r = 0; r < 16; r++)
- {
-
- vpx_memset(ypred_ptr, yleft_col[r], 16);
- ypred_ptr += 16;
- }
-
- }
- break;
- case TM_PRED:
- {
-
- for (r = 0; r < 16; r++)
- {
- for (c = 0; c < 16; c++)
- {
- int pred = yleft_col[r] + yabove_row[ c] - ytop_left;
-
- if (pred < 0)
- pred = 0;
-
- if (pred > 255)
- pred = 255;
-
- ypred_ptr[c] = pred;
- }
-
- ypred_ptr += 16;
- }
-
- }
- break;
- case B_PRED:
- case NEARESTMV:
- case NEARMV:
- case ZEROMV:
- case NEWMV:
- case SPLITMV:
- case MB_MODE_COUNT:
- break;
- }
-}
-
-void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
+void vp8_build_intra_predictors_mby_s_c(MACROBLOCKD *x,
unsigned char * yabove_row,
unsigned char * yleft,
int left_stride,
- unsigned char * ypred_ptr)
+ unsigned char * ypred_ptr,
+ int y_stride)
{
unsigned char yleft_col[16];
unsigned char ytop_left = yabove_row[-1];
int r, c, i;
- int y_stride = x->dst.y_stride;
-
for (i = 0; i < 16; i++)
{
yleft_col[i] = yleft[i* left_stride];
@@ -198,7 +75,7 @@
for (r = 0; r < 16; r++)
{
vpx_memset(ypred_ptr, expected_dc, 16);
- ypred_ptr += y_stride; /*16;*/
+ ypred_ptr += y_stride;
}
}
break;
@@ -212,7 +89,7 @@
((int *)ypred_ptr)[1] = ((int *)yabove_row)[1];
((int *)ypred_ptr)[2] = ((int *)yabove_row)[2];
((int *)ypred_ptr)[3] = ((int *)yabove_row)[3];
- ypred_ptr += y_stride; /*16;*/
+ ypred_ptr += y_stride;
}
}
break;
@@ -223,7 +100,7 @@
{
vpx_memset(ypred_ptr, yleft_col[r], 16);
- ypred_ptr += y_stride; /*16;*/
+ ypred_ptr += y_stride;
}
}
@@ -246,145 +123,7 @@
ypred_ptr[c] = pred;
}
- ypred_ptr += y_stride; /*16;*/
- }
-
- }
- break;
- case B_PRED:
- case NEARESTMV:
- case NEARMV:
- case ZEROMV:
- case NEWMV:
- case SPLITMV:
- case MB_MODE_COUNT:
- break;
- }
-}
-
-void vp8_build_intra_predictors_mbuv_c(MACROBLOCKD *x)
-{
- unsigned char *uabove_row = x->dst.u_buffer - x->dst.uv_stride;
- unsigned char uleft_col[16];
- unsigned char utop_left = uabove_row[-1];
- unsigned char *vabove_row = x->dst.v_buffer - x->dst.uv_stride;
- unsigned char vleft_col[20];
- unsigned char vtop_left = vabove_row[-1];
- unsigned char *upred_ptr = &x->predictor[256];
- unsigned char *vpred_ptr = &x->predictor[320];
- int i, j;
-
- for (i = 0; i < 8; i++)
- {
- uleft_col[i] = x->dst.u_buffer [i* x->dst.uv_stride -1];
- vleft_col[i] = x->dst.v_buffer [i* x->dst.uv_stride -1];
- }
-
- switch (x->mode_info_context->mbmi.uv_mode)
- {
- case DC_PRED:
- {
- int expected_udc;
- int expected_vdc;
- int i;
- int shift;
- int Uaverage = 0;
- int Vaverage = 0;
-
- if (x->up_available)
- {
- for (i = 0; i < 8; i++)
- {
- Uaverage += uabove_row[i];
- Vaverage += vabove_row[i];
- }
- }
-
- if (x->left_available)
- {
- for (i = 0; i < 8; i++)
- {
- Uaverage += uleft_col[i];
- Vaverage += vleft_col[i];
- }
- }
-
- if (!x->up_available && !x->left_available)
- {
- expected_udc = 128;
- expected_vdc = 128;
- }
- else
- {
- shift = 2 + x->up_available + x->left_available;
- expected_udc = (Uaverage + (1 << (shift - 1))) >> shift;
- expected_vdc = (Vaverage + (1 << (shift - 1))) >> shift;
- }
-
-
- vpx_memset(upred_ptr, expected_udc, 64);
- vpx_memset(vpred_ptr, expected_vdc, 64);
-
-
- }
- break;
- case V_PRED:
- {
- int i;
-
- for (i = 0; i < 8; i++)
- {
- vpx_memcpy(upred_ptr, uabove_row, 8);
- vpx_memcpy(vpred_ptr, vabove_row, 8);
- upred_ptr += 8;
- vpred_ptr += 8;
- }
-
- }
- break;
- case H_PRED:
- {
- int i;
-
- for (i = 0; i < 8; i++)
- {
- vpx_memset(upred_ptr, uleft_col[i], 8);
- vpx_memset(vpred_ptr, vleft_col[i], 8);
- upred_ptr += 8;
- vpred_ptr += 8;
- }
- }
-
- break;
- case TM_PRED:
- {
- int i;
-
- for (i = 0; i < 8; i++)
- {
- for (j = 0; j < 8; j++)
- {
- int predu = uleft_col[i] + uabove_row[j] - utop_left;
- int predv = vleft_col[i] + vabove_row[j] - vtop_left;
-
- if (predu < 0)
- predu = 0;
-
- if (predu > 255)
- predu = 255;
-
- if (predv < 0)
- predv = 0;
-
- if (predv > 255)
- predv = 255;
-
- upred_ptr[j] = predu;
- vpred_ptr[j] = predv;
- }
-
- upred_ptr += 8;
- vpred_ptr += 8;
+ ypred_ptr += y_stride;
}
}
@@ -407,13 +146,13 @@
unsigned char * vleft,
int left_stride,
unsigned char * upred_ptr,
- unsigned char * vpred_ptr)
+ unsigned char * vpred_ptr,
+ int pred_stride)
{
unsigned char uleft_col[8];
unsigned char utop_left = uabove_row[-1];
unsigned char vleft_col[8];
unsigned char vtop_left = vabove_row[-1];
- int uv_stride = x->dst.uv_stride;
int i, j;
@@ -471,8 +210,8 @@
{
vpx_memset(upred_ptr, expected_udc, 8);
vpx_memset(vpred_ptr, expected_vdc, 8);
- upred_ptr += uv_stride; /*8;*/
- vpred_ptr += uv_stride; /*8;*/
+ upred_ptr += pred_stride;
+ vpred_ptr += pred_stride;
}
}
break;
@@ -484,8 +223,8 @@
{
vpx_memcpy(upred_ptr, uabove_row, 8);
vpx_memcpy(vpred_ptr, vabove_row, 8);
- upred_ptr += uv_stride; /*8;*/
- vpred_ptr += uv_stride; /*8;*/
+ upred_ptr += pred_stride;
+ vpred_ptr += pred_stride;
}
}
@@ -498,8 +237,8 @@
{
vpx_memset(upred_ptr, uleft_col[i], 8);
vpx_memset(vpred_ptr, vleft_col[i], 8);
- upred_ptr += uv_stride; /*8;*/
- vpred_ptr += uv_stride; /*8;*/
+ upred_ptr += pred_stride;
+ vpred_ptr += pred_stride;
}
}
@@ -531,8 +270,8 @@
vpred_ptr[j] = predv;
}
- upred_ptr += uv_stride; /*8;*/
- vpred_ptr += uv_stride; /*8;*/
+ upred_ptr += pred_stride;
+ vpred_ptr += pred_stride;
}
}
diff --git a/vp8/common/rtcd_defs.sh b/vp8/common/rtcd_defs.sh
index 0fdb4fa..ab99515 100644
--- a/vp8/common/rtcd_defs.sh
+++ b/vp8/common/rtcd_defs.sh
@@ -122,17 +122,12 @@
specialize vp8_copy_mem8x4 mmx media neon
vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6
-prototype void vp8_build_intra_predictors_mby "struct macroblockd *x"
-specialize vp8_build_intra_predictors_mby sse2 ssse3 neon
+prototype void vp8_build_intra_predictors_mby_s "struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride"
+specialize vp8_build_intra_predictors_mby_s sse2 ssse3
+#TODO: fix assembly for neon
-prototype void vp8_build_intra_predictors_mby_s "struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr"
-#TODO: fix assembly --- specialize vp8_build_intra_predictors_mby_s sse2 ssse3 neon
-
-prototype void vp8_build_intra_predictors_mbuv "struct macroblockd *x"
-specialize vp8_build_intra_predictors_mbuv sse2 ssse3
-
-prototype void vp8_build_intra_predictors_mbuv_s "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr"
-#TODO: fix assembly --- specialize vp8_build_intra_predictors_mbuv_s sse2 ssse3
+prototype void vp8_build_intra_predictors_mbuv_s "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride"
+specialize vp8_build_intra_predictors_mbuv_s sse2 ssse3
prototype void vp8_intra4x4_predict "unsigned char *src, int src_stride, int b_mode, unsigned char *dst, int dst_stride"
specialize vp8_intra4x4_predict media
diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm
index 4b68ef5..7b6e3cf 100644
--- a/vp8/common/x86/recon_sse2.asm
+++ b/vp8/common/x86/recon_sse2.asm
@@ -119,35 +119,37 @@
;void vp8_intra_pred_uv_dc_mmx2(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride,
; )
global sym(vp8_intra_pred_uv_dc_mmx2)
sym(vp8_intra_pred_uv_dc_mmx2):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
; from top
- mov rsi, arg(2) ;src;
- movsxd rax, dword ptr arg(3) ;src_stride;
- sub rsi, rax
+ mov rdi, arg(2) ;above;
+ mov rsi, arg(3) ;left;
+ movsxd rax, dword ptr arg(4) ;left_stride;
pxor mm0, mm0
- movq mm1, [rsi]
- psadbw mm1, mm0
-
- ; from left
- dec rsi
+ movq mm1, [rdi]
lea rdi, [rax*3]
- movzx ecx, byte [rsi+rax]
+ psadbw mm1, mm0
+ ; from left
+ movzx ecx, byte [rsi]
+ movzx edx, byte [rsi+rax*1]
+ add ecx, edx
movzx edx, byte [rsi+rax*2]
add ecx, edx
+
movzx edx, byte [rsi+rdi]
- add ecx, edx
lea rsi, [rsi+rax*4]
+ add ecx, edx
movzx edx, byte [rsi]
add ecx, edx
movzx edx, byte [rsi+rax]
@@ -156,31 +158,29 @@
add ecx, edx
movzx edx, byte [rsi+rdi]
add ecx, edx
- movzx edx, byte [rsi+rax*4]
- add ecx, edx
; add up
pextrw edx, mm1, 0x0
lea edx, [edx+ecx+8]
sar edx, 4
movd mm1, edx
+ movsxd rcx, dword ptr arg(1) ;dst_stride
pshufw mm1, mm1, 0x0
+ mov rdi, arg(0) ;dst;
packuswb mm1, mm1
; write out
- mov rdi, arg(0) ;dst;
- movsxd rcx, dword ptr arg(1) ;dst_stride
lea rax, [rcx*3]
+ lea rdx, [rdi+rcx*4]
movq [rdi ], mm1
movq [rdi+rcx ], mm1
movq [rdi+rcx*2], mm1
movq [rdi+rax ], mm1
- lea rdi, [rdi+rcx*4]
- movq [rdi ], mm1
- movq [rdi+rcx ], mm1
- movq [rdi+rcx*2], mm1
- movq [rdi+rax ], mm1
+ movq [rdx ], mm1
+ movq [rdx+rcx ], mm1
+ movq [rdx+rcx*2], mm1
+ movq [rdx+rax ], mm1
; begin epilog
pop rdi
@@ -192,23 +192,24 @@
;void vp8_intra_pred_uv_dctop_mmx2(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride,
; )
global sym(vp8_intra_pred_uv_dctop_mmx2)
sym(vp8_intra_pred_uv_dctop_mmx2):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
GET_GOT rbx
push rsi
push rdi
; end prolog
+ ;arg(3), arg(4) not used
+
; from top
- mov rsi, arg(2) ;src;
- movsxd rax, dword ptr arg(3) ;src_stride;
- sub rsi, rax
+ mov rsi, arg(2) ;above;
pxor mm0, mm0
movq mm1, [rsi]
psadbw mm1, mm0
@@ -245,22 +246,24 @@
;void vp8_intra_pred_uv_dcleft_mmx2(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride,
; )
global sym(vp8_intra_pred_uv_dcleft_mmx2)
sym(vp8_intra_pred_uv_dcleft_mmx2):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
+ ;arg(2) not used
+
; from left
- mov rsi, arg(2) ;src;
- movsxd rax, dword ptr arg(3) ;src_stride;
- dec rsi
+ mov rsi, arg(3) ;left;
+ movsxd rax, dword ptr arg(4) ;left_stride;
lea rdi, [rax*3]
movzx ecx, byte [rsi]
movzx edx, byte [rsi+rax]
@@ -310,17 +313,20 @@
;void vp8_intra_pred_uv_dc128_mmx(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride,
; )
global sym(vp8_intra_pred_uv_dc128_mmx)
sym(vp8_intra_pred_uv_dc128_mmx):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
GET_GOT rbx
; end prolog
+ ;arg(2), arg(3), arg(4) not used
+
; write out
movq mm1, [GLOBAL(dc_128)]
mov rax, arg(0) ;dst;
@@ -346,15 +352,16 @@
;void vp8_intra_pred_uv_tm_sse2(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride,
; )
%macro vp8_intra_pred_uv_tm 1
global sym(vp8_intra_pred_uv_tm_%1)
sym(vp8_intra_pred_uv_tm_%1):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
GET_GOT rbx
push rsi
push rdi
@@ -362,9 +369,8 @@
; read top row
mov edx, 4
- mov rsi, arg(2) ;src;
- movsxd rax, dword ptr arg(3) ;src_stride;
- sub rsi, rax
+ mov rsi, arg(2) ;above
+ movsxd rax, dword ptr arg(4) ;left_stride;
pxor xmm0, xmm0
%ifidn %1, ssse3
movdqa xmm2, [GLOBAL(dc_1024)]
@@ -374,7 +380,7 @@
; set up left ptrs ans subtract topleft
movd xmm3, [rsi-1]
- lea rsi, [rsi+rax-1]
+ mov rsi, arg(3) ;left;
%ifidn %1, sse2
punpcklbw xmm3, xmm0
pshuflw xmm3, xmm3, 0x0
@@ -427,20 +433,22 @@
;void vp8_intra_pred_uv_ve_mmx(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride,
; )
global sym(vp8_intra_pred_uv_ve_mmx)
sym(vp8_intra_pred_uv_ve_mmx):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
; end prolog
+ ; arg(3), arg(4) not used
+
; read from top
mov rax, arg(2) ;src;
- movsxd rdx, dword ptr arg(3) ;src_stride;
- sub rax, rdx
+
movq mm1, [rax]
; write out
@@ -466,15 +474,16 @@
;void vp8_intra_pred_uv_ho_mmx2(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride
; )
%macro vp8_intra_pred_uv_ho 1
global sym(vp8_intra_pred_uv_ho_%1)
sym(vp8_intra_pred_uv_ho_%1):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
%ifidn %1, ssse3
@@ -485,12 +494,14 @@
%endif
; end prolog
+ ;arg(2) not used
+
; read from left and write out
%ifidn %1, mmx2
mov edx, 4
%endif
- mov rsi, arg(2) ;src;
- movsxd rax, dword ptr arg(3) ;src_stride;
+ mov rsi, arg(3) ;left
+ movsxd rax, dword ptr arg(4) ;left_stride;
mov rdi, arg(0) ;dst;
movsxd rcx, dword ptr arg(1) ;dst_stride
%ifidn %1, ssse3
@@ -498,7 +509,7 @@
movdqa xmm2, [GLOBAL(dc_00001111)]
lea rbx, [rax*3]
%endif
- dec rsi
+
%ifidn %1, mmx2
.vp8_intra_pred_uv_ho_%1_loop:
movd mm0, [rsi]
@@ -562,38 +573,43 @@
;void vp8_intra_pred_y_dc_sse2(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride
; )
global sym(vp8_intra_pred_y_dc_sse2)
sym(vp8_intra_pred_y_dc_sse2):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
; from top
- mov rsi, arg(2) ;src;
- movsxd rax, dword ptr arg(3) ;src_stride;
- sub rsi, rax
+ mov rdi, arg(2) ;above
+ mov rsi, arg(3) ;left
+ movsxd rax, dword ptr arg(4) ;left_stride;
+
pxor xmm0, xmm0
- movdqa xmm1, [rsi]
+ movdqa xmm1, [rdi]
psadbw xmm1, xmm0
movq xmm2, xmm1
punpckhqdq xmm1, xmm1
paddw xmm1, xmm2
; from left
- dec rsi
lea rdi, [rax*3]
- movzx ecx, byte [rsi+rax]
+
+ movzx ecx, byte [rsi]
+ movzx edx, byte [rsi+rax]
+ add ecx, edx
movzx edx, byte [rsi+rax*2]
add ecx, edx
movzx edx, byte [rsi+rdi]
add ecx, edx
lea rsi, [rsi+rax*4]
+
movzx edx, byte [rsi]
add ecx, edx
movzx edx, byte [rsi+rax]
@@ -603,6 +619,7 @@
movzx edx, byte [rsi+rdi]
add ecx, edx
lea rsi, [rsi+rax*4]
+
movzx edx, byte [rsi]
add ecx, edx
movzx edx, byte [rsi+rax]
@@ -612,6 +629,7 @@
movzx edx, byte [rsi+rdi]
add ecx, edx
lea rsi, [rsi+rax*4]
+
movzx edx, byte [rsi]
add ecx, edx
movzx edx, byte [rsi+rax]
@@ -620,8 +638,6 @@
add ecx, edx
movzx edx, byte [rsi+rdi]
add ecx, edx
- movzx edx, byte [rsi+rax*4]
- add ecx, edx
; add up
pextrw edx, xmm1, 0x0
@@ -663,22 +679,23 @@
;void vp8_intra_pred_y_dctop_sse2(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride
; )
global sym(vp8_intra_pred_y_dctop_sse2)
sym(vp8_intra_pred_y_dctop_sse2):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
push rsi
GET_GOT rbx
; end prolog
+ ;arg(3), arg(4) not used
+
; from top
- mov rcx, arg(2) ;src;
- movsxd rax, dword ptr arg(3) ;src_stride;
- sub rcx, rax
+ mov rcx, arg(2) ;above;
pxor xmm0, xmm0
movdqa xmm1, [rcx]
psadbw xmm1, xmm0
@@ -724,22 +741,25 @@
;void vp8_intra_pred_y_dcleft_sse2(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride
; )
global sym(vp8_intra_pred_y_dcleft_sse2)
sym(vp8_intra_pred_y_dcleft_sse2):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
+ ;arg(2) not used
+
; from left
- mov rsi, arg(2) ;src;
- movsxd rax, dword ptr arg(3) ;src_stride;
- dec rsi
+ mov rsi, arg(3) ;left;
+ movsxd rax, dword ptr arg(4) ;left_stride;
+
lea rdi, [rax*3]
movzx ecx, byte [rsi]
movzx edx, byte [rsi+rax]
@@ -814,18 +834,21 @@
;void vp8_intra_pred_y_dc128_sse2(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride
; )
global sym(vp8_intra_pred_y_dc128_sse2)
sym(vp8_intra_pred_y_dc128_sse2):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
push rsi
GET_GOT rbx
; end prolog
+ ;arg(2), arg(3), arg(4) not used
+
; write out
mov rsi, 2
movdqa xmm1, [GLOBAL(dc_128)]
@@ -857,15 +880,16 @@
;void vp8_intra_pred_y_tm_sse2(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride
; )
%macro vp8_intra_pred_y_tm 1
global sym(vp8_intra_pred_y_tm_%1)
sym(vp8_intra_pred_y_tm_%1):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
GET_GOT rbx
@@ -873,9 +897,8 @@
; read top row
mov edx, 8
- mov rsi, arg(2) ;src;
- movsxd rax, dword ptr arg(3) ;src_stride;
- sub rsi, rax
+ mov rsi, arg(2) ;above
+ movsxd rax, dword ptr arg(4) ;left_stride;
pxor xmm0, xmm0
%ifidn %1, ssse3
movdqa xmm3, [GLOBAL(dc_1024)]
@@ -887,7 +910,7 @@
; set up left ptrs ans subtract topleft
movd xmm4, [rsi-1]
- lea rsi, [rsi+rax-1]
+ mov rsi, arg(3) ;left
%ifidn %1, sse2
punpcklbw xmm4, xmm0
pshuflw xmm4, xmm4, 0x0
@@ -945,27 +968,29 @@
;void vp8_intra_pred_y_ve_sse2(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride
; )
global sym(vp8_intra_pred_y_ve_sse2)
sym(vp8_intra_pred_y_ve_sse2):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
push rsi
; end prolog
+ ;arg(3), arg(4) not used
+
+ mov rax, arg(2) ;above;
+ mov rsi, 2
+ movsxd rdx, dword ptr arg(1) ;dst_stride
+
; read from top
- mov rax, arg(2) ;src;
- movsxd rdx, dword ptr arg(3) ;src_stride;
- sub rax, rdx
movdqa xmm1, [rax]
; write out
- mov rsi, 2
mov rax, arg(0) ;dst;
- movsxd rdx, dword ptr arg(1) ;dst_stride
lea rcx, [rdx*3]
.label
@@ -991,25 +1016,27 @@
;void vp8_intra_pred_y_ho_sse2(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride,
; )
global sym(vp8_intra_pred_y_ho_sse2)
sym(vp8_intra_pred_y_ho_sse2):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
+ ;arg(2) not used
+
; read from left and write out
mov edx, 8
- mov rsi, arg(2) ;src;
- movsxd rax, dword ptr arg(3) ;src_stride;
+ mov rsi, arg(3) ;left;
+ movsxd rax, dword ptr arg(4) ;left_stride;
mov rdi, arg(0) ;dst;
movsxd rcx, dword ptr arg(1) ;dst_stride
- dec rsi
vp8_intra_pred_y_ho_sse2_loop:
movd xmm0, [rsi]
diff --git a/vp8/common/x86/recon_wrapper_sse2.c b/vp8/common/x86/recon_wrapper_sse2.c
index cb9ab80..b482faa 100644
--- a/vp8/common/x86/recon_wrapper_sse2.c
+++ b/vp8/common/x86/recon_wrapper_sse2.c
@@ -15,7 +15,8 @@
#define build_intra_predictors_mbuv_prototype(sym) \
void sym(unsigned char *dst, int dst_stride, \
- const unsigned char *src, int src_stride)
+ const unsigned char *above, \
+ const unsigned char *left, int left_stride)
typedef build_intra_predictors_mbuv_prototype((*build_intra_predictors_mbuv_fn_t));
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dc_mmx2);
@@ -29,15 +30,19 @@
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_tm_ssse3);
static void vp8_build_intra_predictors_mbuv_x86(MACROBLOCKD *x,
+ unsigned char * uabove_row,
+ unsigned char * vabove_row,
unsigned char *dst_u,
unsigned char *dst_v,
int dst_stride,
+ unsigned char * uleft,
+ unsigned char * vleft,
+ int left_stride,
build_intra_predictors_mbuv_fn_t tm_func,
build_intra_predictors_mbuv_fn_t ho_func)
{
int mode = x->mode_info_context->mbmi.uv_mode;
build_intra_predictors_mbuv_fn_t fn;
- int src_stride = x->dst.uv_stride;
switch (mode) {
case V_PRED: fn = vp8_intra_pred_uv_ve_mmx; break;
@@ -59,59 +64,78 @@
default: return;
}
- fn(dst_u, dst_stride, x->dst.u_buffer, src_stride);
- fn(dst_v, dst_stride, x->dst.v_buffer, src_stride);
+ fn(dst_u, dst_stride, uabove_row, uleft, left_stride);
+ fn(dst_v, dst_stride, vabove_row, vleft, left_stride);
}
-void vp8_build_intra_predictors_mbuv_sse2(MACROBLOCKD *x)
+void vp8_build_intra_predictors_mbuv_s_sse2(MACROBLOCKD *x,
+ unsigned char * uabove_row,
+ unsigned char * vabove_row,
+ unsigned char * uleft,
+ unsigned char * vleft,
+ int left_stride,
+ unsigned char * upred_ptr,
+ unsigned char * vpred_ptr,
+ int pred_stride)
{
- vp8_build_intra_predictors_mbuv_x86(x, &x->predictor[256],
- &x->predictor[320], 8,
+ vp8_build_intra_predictors_mbuv_x86(x,
+ uabove_row, vabove_row,
+ upred_ptr,
+ vpred_ptr, pred_stride,
+ uleft,
+ vleft,
+ left_stride,
vp8_intra_pred_uv_tm_sse2,
vp8_intra_pred_uv_ho_mmx2);
}
-void vp8_build_intra_predictors_mbuv_ssse3(MACROBLOCKD *x)
+void vp8_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *x,
+ unsigned char * uabove_row,
+ unsigned char * vabove_row,
+ unsigned char * uleft,
+ unsigned char * vleft,
+ int left_stride,
+ unsigned char * upred_ptr,
+ unsigned char * vpred_ptr,
+ int pred_stride)
{
- vp8_build_intra_predictors_mbuv_x86(x, &x->predictor[256],
- &x->predictor[320], 8,
+ vp8_build_intra_predictors_mbuv_x86(x,
+ uabove_row, vabove_row,
+ upred_ptr,
+ vpred_ptr, pred_stride,
+ uleft,
+ vleft,
+ left_stride,
vp8_intra_pred_uv_tm_ssse3,
vp8_intra_pred_uv_ho_ssse3);
}
-void vp8_build_intra_predictors_mbuv_s_sse2(MACROBLOCKD *x)
-{
- vp8_build_intra_predictors_mbuv_x86(x, x->dst.u_buffer,
- x->dst.v_buffer, x->dst.uv_stride,
- vp8_intra_pred_uv_tm_sse2,
- vp8_intra_pred_uv_ho_mmx2);
-}
+#define build_intra_predictors_mby_prototype(sym) \
+ void sym(unsigned char *dst, int dst_stride, \
+ const unsigned char *above, \
+ const unsigned char *left, int left_stride)
+typedef build_intra_predictors_mby_prototype((*build_intra_predictors_mby_fn_t));
-void vp8_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *x)
-{
- vp8_build_intra_predictors_mbuv_x86(x, x->dst.u_buffer,
- x->dst.v_buffer, x->dst.uv_stride,
- vp8_intra_pred_uv_tm_ssse3,
- vp8_intra_pred_uv_ho_ssse3);
-}
-
-extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dc_sse2);
-extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dctop_sse2);
-extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dcleft_sse2);
-extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dc128_sse2);
-extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_ho_sse2);
-extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_ve_sse2);
-extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_tm_sse2);
-extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_tm_ssse3);
+extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_dc_sse2);
+extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_dctop_sse2);
+extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_dcleft_sse2);
+extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_dc128_sse2);
+extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_ho_sse2);
+extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_ve_sse2);
+extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_tm_sse2);
+extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_tm_ssse3);
static void vp8_build_intra_predictors_mby_x86(MACROBLOCKD *x,
+ unsigned char * yabove_row,
unsigned char *dst_y,
int dst_stride,
- build_intra_predictors_mbuv_fn_t tm_func)
+ unsigned char * yleft,
+ int left_stride,
+ build_intra_predictors_mby_fn_t tm_func)
{
int mode = x->mode_info_context->mbmi.mode;
build_intra_predictors_mbuv_fn_t fn;
- int src_stride = x->dst.y_stride;
+
switch (mode) {
case V_PRED: fn = vp8_intra_pred_y_ve_sse2; break;
case H_PRED: fn = vp8_intra_pred_y_ho_sse2; break;
@@ -132,31 +156,31 @@
default: return;
}
- fn(dst_y, dst_stride, x->dst.y_buffer, src_stride);
+ fn(dst_y, dst_stride, yabove_row, yleft, left_stride);
return;
}
-void vp8_build_intra_predictors_mby_sse2(MACROBLOCKD *x)
+void vp8_build_intra_predictors_mby_s_sse2(MACROBLOCKD *x,
+ unsigned char * yabove_row,
+ unsigned char * yleft,
+ int left_stride,
+ unsigned char * ypred_ptr,
+ int y_stride)
{
- vp8_build_intra_predictors_mby_x86(x, x->predictor, 16,
+ vp8_build_intra_predictors_mby_x86(x, yabove_row, ypred_ptr,
+ y_stride, yleft, left_stride,
vp8_intra_pred_y_tm_sse2);
}
-void vp8_build_intra_predictors_mby_ssse3(MACROBLOCKD *x)
+void vp8_build_intra_predictors_mby_s_ssse3(MACROBLOCKD *x,
+ unsigned char * yabove_row,
+ unsigned char * yleft,
+ int left_stride,
+ unsigned char * ypred_ptr,
+ int y_stride)
{
- vp8_build_intra_predictors_mby_x86(x, x->predictor, 16,
- vp8_intra_pred_y_tm_ssse3);
-}
-
-void vp8_build_intra_predictors_mby_s_sse2(MACROBLOCKD *x)
-{
- vp8_build_intra_predictors_mby_x86(x, x->dst.y_buffer, x->dst.y_stride,
- vp8_intra_pred_y_tm_sse2);
-}
-
-void vp8_build_intra_predictors_mby_s_ssse3(MACROBLOCKD *x)
-{
- vp8_build_intra_predictors_mby_x86(x, x->dst.y_buffer, x->dst.y_stride,
+ vp8_build_intra_predictors_mby_x86(x, yabove_row, ypred_ptr,
+ y_stride, yleft, left_stride,
vp8_intra_pred_y_tm_ssse3);
}
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 08a0c4b..f75e8ee 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -162,7 +162,8 @@
xd->recon_left[1],
xd->recon_left[2],
xd->recon_left_stride[1],
- xd->dst.u_buffer, xd->dst.v_buffer);
+ xd->dst.u_buffer, xd->dst.v_buffer,
+ xd->dst.uv_stride);
if (mode != B_PRED)
{
@@ -170,7 +171,8 @@
xd->recon_above[0],
xd->recon_left[0],
xd->recon_left_stride[0],
- xd->dst.y_buffer);
+ xd->dst.y_buffer,
+ xd->dst.y_stride);
}
else
{
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index ba94c58..c5752ee 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -15,58 +15,6 @@
#include "vpx_ports/mem.h"
#include "detokenize.h"
-#define BOOL_DATA unsigned char
-
-#define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES
-DECLARE_ALIGNED(16, static const unsigned char, coef_bands_x[16]) =
-{
- 0 * OCB_X, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X,
- 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X,
- 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X,
- 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X
-};
-#define EOB_CONTEXT_NODE 0
-#define ZERO_CONTEXT_NODE 1
-#define ONE_CONTEXT_NODE 2
-#define LOW_VAL_CONTEXT_NODE 3
-#define TWO_CONTEXT_NODE 4
-#define THREE_CONTEXT_NODE 5
-#define HIGH_LOW_CONTEXT_NODE 6
-#define CAT_ONE_CONTEXT_NODE 7
-#define CAT_THREEFOUR_CONTEXT_NODE 8
-#define CAT_THREE_CONTEXT_NODE 9
-#define CAT_FIVE_CONTEXT_NODE 10
-
-#define CAT1_MIN_VAL 5
-#define CAT2_MIN_VAL 7
-#define CAT3_MIN_VAL 11
-#define CAT4_MIN_VAL 19
-#define CAT5_MIN_VAL 35
-#define CAT6_MIN_VAL 67
-
-#define CAT1_PROB0 159
-#define CAT2_PROB0 145
-#define CAT2_PROB1 165
-
-#define CAT3_PROB0 140
-#define CAT3_PROB1 148
-#define CAT3_PROB2 173
-
-#define CAT4_PROB0 135
-#define CAT4_PROB1 140
-#define CAT4_PROB2 155
-#define CAT4_PROB3 176
-
-#define CAT5_PROB0 130
-#define CAT5_PROB1 134
-#define CAT5_PROB2 141
-#define CAT5_PROB3 157
-#define CAT5_PROB4 180
-
-static const unsigned char cat6_prob[12] =
-{ 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0 };
-
-
void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
{
/* Clear entropy contexts for Y2 blocks */
@@ -83,302 +31,216 @@
}
}
-DECLARE_ALIGNED(16, extern const unsigned char, vp8_norm[256]);
-#define FILL \
- if(count < 0) \
- VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend);
+/*
+ ------------------------------------------------------------------------------
+ Residual decoding (Paragraph 13.2 / 13.3)
+*/
+static const uint8_t kBands[16 + 1] = {
+ 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+ 0 /* extra entry as sentinel */
+};
-#define NORMALIZE \
- /*if(range < 0x80)*/ \
- { \
- shift = vp8_norm[range]; \
- range <<= shift; \
- value <<= shift; \
- count -= shift; \
+static const uint8_t kCat3[] = { 173, 148, 140, 0 };
+static const uint8_t kCat4[] = { 176, 155, 140, 135, 0 };
+static const uint8_t kCat5[] = { 180, 157, 141, 134, 130, 0 };
+static const uint8_t kCat6[] =
+ { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 };
+static const uint8_t* const kCat3456[] = { kCat3, kCat4, kCat5, kCat6 };
+static const uint8_t kZigzag[16] = {
+ 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
+};
+
+#define VP8GetBit vp8dx_decode_bool
+#define NUM_PROBAS 11
+#define NUM_CTX 3
+
+typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS]; // for const-casting
+
+static int GetSigned(BOOL_DECODER *br, int value_to_sign)
+{
+ int split = (br->range + 1) >> 1;
+ VP8_BD_VALUE bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8);
+ int v;
+
+ if(br->count < 0)
+ vp8dx_bool_decoder_fill(br);
+
+ if ( br->value < bigsplit )
+ {
+ br->range = split;
+ v= value_to_sign;
}
-
-#define DECODE_AND_APPLYSIGN(value_to_sign) \
- split = (range + 1) >> 1; \
- bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
- FILL \
- if ( value < bigsplit ) \
- { \
- range = split; \
- v= value_to_sign; \
- } \
- else \
- { \
- range = range-split; \
- value = value-bigsplit; \
- v = -value_to_sign; \
- } \
- range +=range; \
- value +=value; \
- count--;
-
-#define DECODE_AND_BRANCH_IF_ZERO(probability,branch) \
- { \
- split = 1 + ((( probability*(range-1) ) )>> 8); \
- bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
- FILL \
- if ( value < bigsplit ) \
- { \
- range = split; \
- NORMALIZE \
- goto branch; \
- } \
- value -= bigsplit; \
- range = range - split; \
- NORMALIZE \
+ else
+ {
+ br->range = br->range-split;
+ br->value = br->value-bigsplit;
+ v = -value_to_sign;
}
+ br->range +=br->range;
+ br->value +=br->value;
+ br->count--;
-#define DECODE_AND_LOOP_IF_ZERO(probability,branch) \
- { \
- split = 1 + ((( probability*(range-1) ) ) >> 8); \
- bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
- FILL \
- if ( value < bigsplit ) \
- { \
- range = split; \
- NORMALIZE \
- Prob = coef_probs; \
- if(c<15) {\
- ++c; \
- Prob += coef_bands_x[c]; \
- goto branch; \
- } goto BLOCK_FINISHED; /*for malformed input */\
- } \
- value -= bigsplit; \
- range = range - split; \
- NORMALIZE \
+ return v;
+}
+/*
+ Returns the position of the last non-zero coeff plus one
+ (and 0 if there's no coeff at all)
+*/
+static int GetCoeffs(BOOL_DECODER *br, ProbaArray prob,
+ int ctx, int n, int16_t* out)
+{
+ const uint8_t* p = prob[n][ctx];
+ if (!VP8GetBit(br, p[0]))
+ { /* first EOB is more a 'CBP' bit. */
+ return 0;
}
+ while (1)
+ {
+ ++n;
+ if (!VP8GetBit(br, p[1]))
+ {
+ p = prob[kBands[n]][0];
+ }
+ else
+ { /* non zero coeff */
+ int v, j;
+ if (!VP8GetBit(br, p[2]))
+ {
+ p = prob[kBands[n]][1];
+ v = 1;
+ }
+ else
+ {
+ if (!VP8GetBit(br, p[3]))
+ {
+ if (!VP8GetBit(br, p[4]))
+ {
+ v = 2;
+ }
+ else
+ {
+ v = 3 + VP8GetBit(br, p[5]);
+ }
+ }
+ else
+ {
+ if (!VP8GetBit(br, p[6]))
+ {
+ if (!VP8GetBit(br, p[7]))
+ {
+ v = 5 + VP8GetBit(br, 159);
+ } else
+ {
+ v = 7 + 2 * VP8GetBit(br, 165);
+ v += VP8GetBit(br, 145);
+ }
+ }
+ else
+ {
+ const uint8_t* tab;
+ const int bit1 = VP8GetBit(br, p[8]);
+ const int bit0 = VP8GetBit(br, p[9 + bit1]);
+ const int cat = 2 * bit1 + bit0;
+ v = 0;
+ for (tab = kCat3456[cat]; *tab; ++tab)
+ {
+ v += v + VP8GetBit(br, *tab);
+ }
+ v += 3 + (8 << cat);
+ }
+ }
+ p = prob[kBands[n]][2];
+ }
+ j = kZigzag[n - 1];
-#define DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val) \
- DECODE_AND_APPLYSIGN(val) \
- Prob = coef_probs + (ENTROPY_NODES*2); \
- if(c < 15){\
- qcoeff_ptr [ scan[c] ] = (int16_t) v; \
- ++c; \
- goto DO_WHILE; }\
- qcoeff_ptr [ 15 ] = (int16_t) v; \
- goto BLOCK_FINISHED;
+ out[j] = GetSigned(br, v);
-
-#define DECODE_EXTRABIT_AND_ADJUST_VAL(prob, bits_count)\
- split = 1 + (((range-1) * prob) >> 8); \
- bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
- FILL \
- if(value >= bigsplit)\
- {\
- range = range-split;\
- value = value-bigsplit;\
- val += ((uint16_t)1<<bits_count);\
- }\
- else\
- {\
- range = split;\
- }\
- NORMALIZE
+ if (n == 16 || !VP8GetBit(br, p[0]))
+ { /* EOB */
+ return n;
+ }
+ }
+ if (n == 16)
+ {
+ return 16;
+ }
+ }
+}
int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
{
- ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)x->above_context;
- ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)x->left_context;
- const FRAME_CONTEXT * const fc = &dx->common.fc;
-
BOOL_DECODER *bc = x->current_bc;
-
+ const FRAME_CONTEXT * const fc = &dx->common.fc;
char *eobs = x->eobs;
- ENTROPY_CONTEXT *a;
- ENTROPY_CONTEXT *l;
int i;
-
+ int nonzeros;
int eobtotal = 0;
- register int count;
-
- const BOOL_DATA *bufptr;
- const BOOL_DATA *bufend;
- register unsigned int range;
- VP8_BD_VALUE value;
- const int *scan;
- register unsigned int shift;
- unsigned int split;
- VP8_BD_VALUE bigsplit;
short *qcoeff_ptr;
+ ProbaArray coef_probs;
+ ENTROPY_CONTEXT *a_ctx = ((ENTROPY_CONTEXT *)x->above_context);
+ ENTROPY_CONTEXT *l_ctx = ((ENTROPY_CONTEXT *)x->left_context);
+ ENTROPY_CONTEXT *a;
+ ENTROPY_CONTEXT *l;
+ int skip_dc = 0;
- const vp8_prob *coef_probs;
- int stop;
- int val, bits_count;
- int c;
- int v;
- const vp8_prob *Prob;
- int start_coeff;
-
-
- i = 0;
- stop = 16;
-
- scan = vp8_default_zig_zag1d;
qcoeff_ptr = &x->qcoeff[0];
- coef_probs = fc->coef_probs [3] [ 0 ] [0];
if (x->mode_info_context->mbmi.mode != B_PRED &&
x->mode_info_context->mbmi.mode != SPLITMV)
{
- i = 24;
- stop = 24;
- qcoeff_ptr += 24*16;
- eobtotal -= 16;
- coef_probs = fc->coef_probs [1] [ 0 ] [0];
+ a = a_ctx + 8;
+ l = l_ctx + 8;
+
+ coef_probs = fc->coef_probs [1];
+
+ nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), 0, qcoeff_ptr + 24 * 16);
+ *a = *l = (nonzeros > 0);
+
+ eobs[24] = nonzeros;
+ eobtotal += nonzeros - 16;
+
+ coef_probs = fc->coef_probs [0];
+ skip_dc = 1;
}
-
- bufend = bc->user_buffer_end;
- bufptr = bc->user_buffer;
- value = bc->value;
- count = bc->count;
- range = bc->range;
-
- start_coeff = 0;
-
-BLOCK_LOOP:
- a = A + vp8_block2above[i];
- l = L + vp8_block2left[i];
-
- c = start_coeff;
-
- VP8_COMBINEENTROPYCONTEXTS(v, *a, *l);
-
- Prob = coef_probs;
- Prob += v * ENTROPY_NODES;
- *a = *l = 0;
-
-DO_WHILE:
- Prob += coef_bands_x[c];
- DECODE_AND_BRANCH_IF_ZERO(Prob[EOB_CONTEXT_NODE], BLOCK_FINISHED);
- *a = *l = 1;
-
-CHECK_0_:
- DECODE_AND_LOOP_IF_ZERO(Prob[ZERO_CONTEXT_NODE], CHECK_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[ONE_CONTEXT_NODE], ONE_CONTEXT_NODE_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[LOW_VAL_CONTEXT_NODE],
- LOW_VAL_CONTEXT_NODE_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[HIGH_LOW_CONTEXT_NODE],
- HIGH_LOW_CONTEXT_NODE_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREEFOUR_CONTEXT_NODE],
- CAT_THREEFOUR_CONTEXT_NODE_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_FIVE_CONTEXT_NODE],
- CAT_FIVE_CONTEXT_NODE_0_);
-
- val = CAT6_MIN_VAL;
- bits_count = 10;
-
- do
+ else
{
- DECODE_EXTRABIT_AND_ADJUST_VAL(cat6_prob[bits_count], bits_count);
- bits_count -- ;
+ coef_probs = fc->coef_probs [3];
+ skip_dc = 0;
}
- while (bits_count >= 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_FIVE_CONTEXT_NODE_0_:
- val = CAT5_MIN_VAL;
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB4, 4);
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB3, 3);
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB2, 2);
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB1, 1);
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB0, 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_THREEFOUR_CONTEXT_NODE_0_:
- DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREE_CONTEXT_NODE],
- CAT_THREE_CONTEXT_NODE_0_);
- val = CAT4_MIN_VAL;
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB3, 3);
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB2, 2);
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB1, 1);
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB0, 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_THREE_CONTEXT_NODE_0_:
- val = CAT3_MIN_VAL;
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB2, 2);
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB1, 1);
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB0, 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-HIGH_LOW_CONTEXT_NODE_0_:
- DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_ONE_CONTEXT_NODE],
- CAT_ONE_CONTEXT_NODE_0_);
-
- val = CAT2_MIN_VAL;
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT2_PROB1, 1);
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT2_PROB0, 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_ONE_CONTEXT_NODE_0_:
- val = CAT1_MIN_VAL;
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT1_PROB0, 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-LOW_VAL_CONTEXT_NODE_0_:
- DECODE_AND_BRANCH_IF_ZERO(Prob[TWO_CONTEXT_NODE], TWO_CONTEXT_NODE_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[THREE_CONTEXT_NODE], THREE_CONTEXT_NODE_0_);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(4);
-
-THREE_CONTEXT_NODE_0_:
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(3);
-
-TWO_CONTEXT_NODE_0_:
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(2);
-
-ONE_CONTEXT_NODE_0_:
- DECODE_AND_APPLYSIGN(1);
- Prob = coef_probs + ENTROPY_NODES;
-
- if (c < 15)
+ for (i = 0; i < 16; ++i)
{
- qcoeff_ptr [ scan[c] ] = (int16_t) v;
- ++c;
- goto DO_WHILE;
+ a = a_ctx + (i&3);
+ l = l_ctx + ((i&0xc)>>2);
+
+ nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), skip_dc, qcoeff_ptr);
+ *a = *l = (nonzeros > 0);
+
+ nonzeros += skip_dc;
+ eobs[i] = nonzeros;
+ eobtotal += nonzeros;
+ qcoeff_ptr += 16;
}
- qcoeff_ptr [ 15 ] = (int16_t) v;
-BLOCK_FINISHED:
- eobs[i] = c;
- eobtotal += c;
- qcoeff_ptr += 16;
+ coef_probs = fc->coef_probs [2];
- i++;
-
- if (i < stop)
- goto BLOCK_LOOP;
-
- if (i == 25)
+ a_ctx += 4;
+ l_ctx += 4;
+ for (i = 16; i < 24; ++i)
{
- start_coeff = 1;
- i = 0;
- stop = 16;
- coef_probs = fc->coef_probs [0] [ 0 ] [0];
- qcoeff_ptr -= (24*16 + 16);
- goto BLOCK_LOOP;
+ a = a_ctx + ((i > 19)<<1) + (i&1);
+ l = l_ctx + ((i > 19)<<1) + ((i&3)>1);
+
+ nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), 0, qcoeff_ptr);
+ *a = *l = (nonzeros > 0);
+
+ eobs[i] = nonzeros;
+ eobtotal += nonzeros;
+ qcoeff_ptr += 16;
}
- if (i == 16)
- {
- start_coeff = 0;
- coef_probs = fc->coef_probs [2] [ 0 ] [0];
- stop = 24;
- goto BLOCK_LOOP;
- }
-
- FILL
- bc->user_buffer = bufptr;
- bc->value = value;
- bc->count = count;
- bc->range = range;
return eobtotal;
-
}
+
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index bc4450d..845228b 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -150,7 +150,8 @@
xd->recon_left[1],
xd->recon_left[2],
xd->recon_left_stride[1],
- xd->dst.u_buffer, xd->dst.v_buffer);
+ xd->dst.u_buffer, xd->dst.v_buffer,
+ xd->dst.uv_stride);
if (mode != B_PRED)
{
@@ -158,7 +159,8 @@
xd->recon_above[0],
xd->recon_left[0],
xd->recon_left_stride[0],
- xd->dst.y_buffer);
+ xd->dst.y_buffer,
+ xd->dst.y_stride);
}
else
{
@@ -813,15 +815,15 @@
/* Allocate memory for above_row buffers. */
CHECK_MEM_ERROR(pbi->mt_yabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
for (i=0; i< pc->mb_rows; i++)
- CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_calloc(sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1)), 1));
+ CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1))));
CHECK_MEM_ERROR(pbi->mt_uabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
for (i=0; i< pc->mb_rows; i++)
- CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1));
+ CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS)));
CHECK_MEM_ERROR(pbi->mt_vabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
for (i=0; i< pc->mb_rows; i++)
- CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1));
+ CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS)));
/* Allocate memory for left_col buffers. */
CHECK_MEM_ERROR(pbi->mt_yleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
diff --git a/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
index 5b7e8f6..a644a00 100644
--- a/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
+++ b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
@@ -47,7 +47,6 @@
mvn r2, #23
str r12, [r0, #vp8_writer_lowvalue]
str r3, [r0, #vp8_writer_range]
- str r12, [r0, #vp8_writer_value]
str r2, [r0, #vp8_writer_count]
str r12, [r0, #vp8_writer_pos]
str r1, [r0, #vp8_writer_buffer]
diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
index 3a183aa..90a98fe 100644
--- a/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
@@ -90,7 +90,6 @@
mov r5, #255 ; vp8_writer_range
mvn r3, #23 ; vp8_writer_count
- str r2, [r0, #vp8_writer_value]
str r2, [r0, #vp8_writer_pos]
str r10, [r0, #vp8_writer_buffer]
diff --git a/vp8/encoder/asm_enc_offsets.c b/vp8/encoder/asm_enc_offsets.c
index 09ee6fb..a4169b3 100644
--- a/vp8/encoder/asm_enc_offsets.c
+++ b/vp8/encoder/asm_enc_offsets.c
@@ -45,7 +45,6 @@
/* pack tokens */
DEFINE(vp8_writer_lowvalue, offsetof(vp8_writer, lowvalue));
DEFINE(vp8_writer_range, offsetof(vp8_writer, range));
-DEFINE(vp8_writer_value, offsetof(vp8_writer, value));
DEFINE(vp8_writer_count, offsetof(vp8_writer, count));
DEFINE(vp8_writer_pos, offsetof(vp8_writer, pos));
DEFINE(vp8_writer_buffer, offsetof(vp8_writer, buffer));
diff --git a/vp8/encoder/boolhuff.c b/vp8/encoder/boolhuff.c
index d8ff5f9..74770a2 100644
--- a/vp8/encoder/boolhuff.c
+++ b/vp8/encoder/boolhuff.c
@@ -45,7 +45,6 @@
br->lowvalue = 0;
br->range = 255;
- br->value = 0;
br->count = -24;
br->buffer = source;
br->buffer_end = source_end;
diff --git a/vp8/encoder/boolhuff.h b/vp8/encoder/boolhuff.h
index 569b779..fb6cbaf 100644
--- a/vp8/encoder/boolhuff.h
+++ b/vp8/encoder/boolhuff.h
@@ -26,7 +26,6 @@
{
unsigned int lowvalue;
unsigned int range;
- unsigned int value;
int count;
unsigned int pos;
unsigned char *buffer;
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index f73bcc5..1f445b7 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -99,7 +99,8 @@
xd->dst.y_buffer - xd->dst.y_stride,
xd->dst.y_buffer - 1,
xd->dst.y_stride,
- xd->dst.y_buffer);
+ xd->dst.y_buffer,
+ xd->dst.y_stride);
vp8_subtract_mby(x->src_diff, *(b->base_src),
b->src_stride, xd->dst.y_buffer, xd->dst.y_stride);
@@ -121,7 +122,8 @@
xd->dst.u_buffer - 1,
xd->dst.v_buffer - 1,
xd->dst.uv_stride,
- xd->dst.u_buffer, xd->dst.v_buffer);
+ xd->dst.u_buffer, xd->dst.v_buffer,
+ xd->dst.uv_stride);
vp8_subtract_mbuv(x->src_diff, x->src.u_buffer,
x->src.v_buffer, x->src.uv_stride, xd->dst.u_buffer,
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index 24e041f..dafb645 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -735,8 +735,12 @@
case V_PRED:
case H_PRED:
case TM_PRED:
- vp8_build_intra_predictors_mby
- (&x->e_mbd);
+ vp8_build_intra_predictors_mby_s(xd,
+ xd->dst.y_buffer - xd->dst.y_stride,
+ xd->dst.y_buffer - 1,
+ xd->dst.y_stride,
+ xd->predictor,
+ 16);
distortion2 = vp8_variance16x16
(*(b->base_src), b->src_stride,
x->e_mbd.predictor, 16, &sse);
@@ -1130,19 +1134,24 @@
int this_rd;
unsigned int sse;
BLOCK *b = &x->block[0];
+ MACROBLOCKD *xd = &x->e_mbd;
- x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
+ xd->mode_info_context->mbmi.ref_frame = INTRA_FRAME;
pick_intra_mbuv_mode(x);
for (mode = DC_PRED; mode <= TM_PRED; mode ++)
{
- x->e_mbd.mode_info_context->mbmi.mode = mode;
- vp8_build_intra_predictors_mby
- (&x->e_mbd);
+ xd->mode_info_context->mbmi.mode = mode;
+ vp8_build_intra_predictors_mby_s(xd,
+ xd->dst.y_buffer - xd->dst.y_stride,
+ xd->dst.y_buffer - 1,
+ xd->dst.y_stride,
+ xd->predictor,
+ 16);
distortion = vp8_variance16x16
- (*(b->base_src), b->src_stride, x->e_mbd.predictor, 16, &sse);
- rate = x->mbmode_cost[x->e_mbd.frame_type][mode];
+ (*(b->base_src), b->src_stride, xd->predictor, 16, &sse);
+ rate = x->mbmode_cost[xd->frame_type][mode];
this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
if (error16x16 > this_rd)
@@ -1153,13 +1162,13 @@
best_rate = rate;
}
}
- x->e_mbd.mode_info_context->mbmi.mode = best_mode;
+ xd->mode_info_context->mbmi.mode = best_mode;
error4x4 = pick_intra4x4mby_modes(x, &rate,
&best_sse);
if (error4x4 < error16x16)
{
- x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
+ xd->mode_info_context->mbmi.mode = B_PRED;
best_rate = rate;
}
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 8f575e4..2b706ba 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -782,18 +782,23 @@
int distortion;
int best_rd = INT_MAX;
int this_rd;
+ MACROBLOCKD *xd = &x->e_mbd;
//Y Search for 16x16 intra prediction mode
for (mode = DC_PRED; mode <= TM_PRED; mode++)
{
- x->e_mbd.mode_info_context->mbmi.mode = mode;
+ xd->mode_info_context->mbmi.mode = mode;
- vp8_build_intra_predictors_mby
- (&x->e_mbd);
+ vp8_build_intra_predictors_mby_s(xd,
+ xd->dst.y_buffer - xd->dst.y_stride,
+ xd->dst.y_buffer - 1,
+ xd->dst.y_stride,
+ xd->predictor,
+ 16);
macro_block_yrd(x, &ratey, &distortion);
- rate = ratey + x->mbmode_cost[x->e_mbd.frame_type]
- [x->e_mbd.mode_info_context->mbmi.mode];
+ rate = ratey + x->mbmode_cost[xd->frame_type]
+ [xd->mode_info_context->mbmi.mode];
this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
@@ -807,7 +812,7 @@
}
}
- x->e_mbd.mode_info_context->mbmi.mode = mode_selected;
+ xd->mode_info_context->mbmi.mode = mode_selected;
return best_rd;
}
@@ -875,6 +880,7 @@
int best_rd = INT_MAX;
int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r);
int rate_to;
+ MACROBLOCKD *xd = &x->e_mbd;
for (mode = DC_PRED; mode <= TM_PRED; mode++)
{
@@ -882,17 +888,26 @@
int distortion;
int this_rd;
- x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
- vp8_build_intra_predictors_mbuv
- (&x->e_mbd);
+ xd->mode_info_context->mbmi.uv_mode = mode;
+
+ vp8_build_intra_predictors_mbuv_s(xd,
+ xd->dst.u_buffer - xd->dst.uv_stride,
+ xd->dst.v_buffer - xd->dst.uv_stride,
+ xd->dst.u_buffer - 1,
+ xd->dst.v_buffer - 1,
+ xd->dst.uv_stride,
+ &xd->predictor[256], &xd->predictor[320],
+ 8);
+
+
vp8_subtract_mbuv(x->src_diff,
x->src.u_buffer, x->src.v_buffer, x->src.uv_stride,
- &x->e_mbd.predictor[256], &x->e_mbd.predictor[320], 8);
+ &xd->predictor[256], &xd->predictor[320], 8);
vp8_transform_mbuv(x);
vp8_quantize_mbuv(x);
rate_to = rd_cost_mbuv(x);
- rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.uv_mode];
+ rate = rate_to + x->intra_uv_mode_cost[xd->frame_type][xd->mode_info_context->mbmi.uv_mode];
distortion = vp8_mbuverror(x) / 4;
@@ -911,7 +926,7 @@
*rate = r;
*distortion = d;
- x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected;
+ xd->mode_info_context->mbmi.uv_mode = mode_selected;
}
int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4])
@@ -2157,8 +2172,13 @@
{
int distortion;
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
- vp8_build_intra_predictors_mby
- (&x->e_mbd);
+
+ vp8_build_intra_predictors_mby_s(xd,
+ xd->dst.y_buffer - xd->dst.y_stride,
+ xd->dst.y_buffer - 1,
+ xd->dst.y_stride,
+ xd->predictor,
+ 16);
macro_block_yrd(x, &rd.rate_y, &distortion) ;
rd.rate2 += rd.rate_y;
rd.distortion2 += distortion;
diff --git a/vpxenc.c b/vpxenc.c
index e8b8261..d89c075 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -2496,5 +2496,6 @@
vpx_img_free(&raw);
free(argv);
+ free(streams);
return EXIT_SUCCESS;
}