Merge "Modified the inverse walsh to output directly"
diff --git a/build/make/ads2gas.pl b/build/make/ads2gas.pl
index 388133a..cea967f 100755
--- a/build/make/ads2gas.pl
+++ b/build/make/ads2gas.pl
@@ -126,15 +126,14 @@
# ALIGN directive
s/ALIGN/.balign/g;
- # Strip ARM
- s/\sARM/@ ARM/g;
+ # ARM code
+ s/\sARM/.arm/g;
- # Strip REQUIRE8
- #s/\sREQUIRE8/@ REQUIRE8/g;
- s/\sREQUIRE8/@ /g; #EQU cause problem
+ # REQUIRE8 Stack is required to be 8-byte aligned
+ s/\sREQUIRE8/.eabi_attribute Tag_ABI_align_needed, 1/g;
- # Strip PRESERVE8
- s/\sPRESERVE8/@ PRESERVE8/g;
+ # PRESERVE8 Stack 8-byte align is preserved
+ s/\sPRESERVE8/.eabi_attribute Tag_ABI_align_preserved, 1/g;
# Use PROC and ENDP to give the symbols a .size directive.
# This makes them show up properly in debugging tools like gdb and valgrind.
diff --git a/configure b/configure
index e6fcc87..cca94a2 100755
--- a/configure
+++ b/configure
@@ -121,6 +121,7 @@
all_platforms="${all_platforms} x86_64-linux-gcc"
all_platforms="${all_platforms} x86_64-linux-icc"
all_platforms="${all_platforms} x86_64-solaris-gcc"
+all_platforms="${all_platforms} x86_64-win64-gcc"
all_platforms="${all_platforms} x86_64-win64-vs8"
all_platforms="${all_platforms} x86_64-win64-vs9"
all_platforms="${all_platforms} universal-darwin8-gcc"
diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h
index 28cbaed..3f04dab 100644
--- a/vp8/common/onyx.h
+++ b/vp8/common/onyx.h
@@ -104,7 +104,7 @@
int Version; // 4 versions of bitstream defined 0 best quality/slowest decode, 3 lowest quality/fastest decode
int Width; // width of data passed to the compressor
int Height; // height of data passed to the compressor
- double frame_rate; // set to passed in framerate
+ struct vpx_rational timebase;
int target_bandwidth; // bandwidth to be used in kilobits per second
int noise_sensitivity; // parameter used for applying pre processing blur: recommendation 0
diff --git a/vp8/common/x86/filter_x86.c b/vp8/common/x86/filter_x86.c
new file mode 100644
index 0000000..ebab814
--- /dev/null
+++ b/vp8/common/x86/filter_x86.c
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/mem.h"
+
+DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]) =
+{
+ { 128, 128, 128, 128, 0, 0, 0, 0 },
+ { 112, 112, 112, 112, 16, 16, 16, 16 },
+ { 96, 96, 96, 96, 32, 32, 32, 32 },
+ { 80, 80, 80, 80, 48, 48, 48, 48 },
+ { 64, 64, 64, 64, 64, 64, 64, 64 },
+ { 48, 48, 48, 48, 80, 80, 80, 80 },
+ { 32, 32, 32, 32, 96, 96, 96, 96 },
+ { 16, 16, 16, 16, 112, 112, 112, 112 }
+};
+
+DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_8[8][16]) =
+{
+ { 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 },
+ { 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 },
+ { 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 },
+ { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
+ { 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 },
+ { 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 },
+ { 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 }
+};
diff --git a/vp8/common/x86/filter_x86.h b/vp8/common/x86/filter_x86.h
new file mode 100644
index 0000000..efcc4dc
--- /dev/null
+++ b/vp8/common/x86/filter_x86.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef FILTER_X86_H
+#define FILTER_X86_H
+
+/* x86 assembly specific copy of vp8/common/filter.c:vp8_bilinear_filters with
+ * duplicated values */
+extern const short vp8_bilinear_filters_x86_4[8][8]; /* duplicated 4x */
+extern const short vp8_bilinear_filters_x86_8[8][16]; /* duplicated 8x */
+
+#endif /* FILTER_X86_H */
diff --git a/vp8/common/x86/subpixel_mmx.asm b/vp8/common/x86/subpixel_mmx.asm
index e68d950..5528fd0 100644
--- a/vp8/common/x86/subpixel_mmx.asm
+++ b/vp8/common/x86/subpixel_mmx.asm
@@ -10,6 +10,7 @@
%include "vpx_ports/x86_abi_support.asm"
+extern sym(vp8_bilinear_filters_x86_8)
%define BLOCK_HEIGHT_WIDTH 4
@@ -222,14 +223,14 @@
push rdi
; end prolog
- ;const short *HFilter = bilinear_filters_mmx[xoffset];
- ;const short *VFilter = bilinear_filters_mmx[yoffset];
+ ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset];
+ ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset];
movsxd rax, dword ptr arg(2) ;xoffset
mov rdi, arg(4) ;dst_ptr ;
shl rax, 5 ; offset * 32
- lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
+ lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))]
add rax, rcx ; HFilter
mov rsi, arg(0) ;src_ptr ;
@@ -379,13 +380,13 @@
push rdi
; end prolog
- ;const short *HFilter = bilinear_filters_mmx[xoffset];
- ;const short *VFilter = bilinear_filters_mmx[yoffset];
+ ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset];
+ ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset];
movsxd rax, dword ptr arg(2) ;xoffset
mov rdi, arg(4) ;dst_ptr ;
- lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
+ lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))]
shl rax, 5
mov rsi, arg(0) ;src_ptr ;
@@ -534,13 +535,13 @@
push rdi
; end prolog
- ;const short *HFilter = bilinear_filters_mmx[xoffset];
- ;const short *VFilter = bilinear_filters_mmx[yoffset];
+ ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset];
+ ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset];
movsxd rax, dword ptr arg(2) ;xoffset
mov rdi, arg(4) ;dst_ptr ;
- lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
+ lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))]
shl rax, 5
add rax, rcx ; HFilter
@@ -699,29 +700,3 @@
times 8 dw 0
-align 16
-global HIDDEN_DATA(sym(vp8_bilinear_filters_mmx))
-sym(vp8_bilinear_filters_mmx):
- times 8 dw 128
- times 8 dw 0
-
- times 8 dw 112
- times 8 dw 16
-
- times 8 dw 96
- times 8 dw 32
-
- times 8 dw 80
- times 8 dw 48
-
- times 8 dw 64
- times 8 dw 64
-
- times 8 dw 48
- times 8 dw 80
-
- times 8 dw 32
- times 8 dw 96
-
- times 8 dw 16
- times 8 dw 112
diff --git a/vp8/common/x86/subpixel_sse2.asm b/vp8/common/x86/subpixel_sse2.asm
index b62b5c6..cb550af 100644
--- a/vp8/common/x86/subpixel_sse2.asm
+++ b/vp8/common/x86/subpixel_sse2.asm
@@ -10,6 +10,7 @@
%include "vpx_ports/x86_abi_support.asm"
+extern sym(vp8_bilinear_filters_x86_8)
%define BLOCK_HEIGHT_WIDTH 4
%define VP8_FILTER_WEIGHT 128
@@ -961,7 +962,7 @@
; unsigned char *dst_ptr,
; int dst_pitch
;)
-extern sym(vp8_bilinear_filters_mmx)
+extern sym(vp8_bilinear_filters_x86_8)
global sym(vp8_bilinear_predict16x16_sse2)
sym(vp8_bilinear_predict16x16_sse2):
push rbp
@@ -973,10 +974,10 @@
push rdi
; end prolog
- ;const short *HFilter = bilinear_filters_mmx[xoffset]
- ;const short *VFilter = bilinear_filters_mmx[yoffset]
+ ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset]
+ ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset]
- lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
+ lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))]
movsxd rax, dword ptr arg(2) ;xoffset
cmp rax, 0 ;skip first_pass filter if xoffset=0
@@ -1230,7 +1231,6 @@
; unsigned char *dst_ptr,
; int dst_pitch
;)
-extern sym(vp8_bilinear_filters_mmx)
global sym(vp8_bilinear_predict8x8_sse2)
sym(vp8_bilinear_predict8x8_sse2):
push rbp
@@ -1245,9 +1245,9 @@
ALIGN_STACK 16, rax
sub rsp, 144 ; reserve 144 bytes
- ;const short *HFilter = bilinear_filters_mmx[xoffset]
- ;const short *VFilter = bilinear_filters_mmx[yoffset]
- lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
+ ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset]
+ ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset]
+ lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))]
mov rsi, arg(0) ;src_ptr
movsxd rdx, dword ptr arg(1) ;src_pixels_per_line
diff --git a/vp8/common/x86/subpixel_x86.h b/vp8/common/x86/subpixel_x86.h
index 75991cc..01ec9e2 100644
--- a/vp8/common/x86/subpixel_x86.h
+++ b/vp8/common/x86/subpixel_x86.h
@@ -12,6 +12,8 @@
#ifndef SUBPIXEL_X86_H
#define SUBPIXEL_X86_H
+#include "filter_x86.h"
+
/* Note:
*
* This platform is commonly built for runtime CPU detection. If you modify
diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c
index bce7bc3..a623c69 100644
--- a/vp8/common/x86/vp8_asm_stubs.c
+++ b/vp8/common/x86/vp8_asm_stubs.c
@@ -12,9 +12,9 @@
#include "vpx_config.h"
#include "vpx_ports/mem.h"
#include "vp8/common/subpixel.h"
+#include "filter_x86.h"
extern const short vp8_six_tap_mmx[8][6*8];
-extern const short vp8_bilinear_filters_mmx[8][2*8];
extern void vp8_filter_block1d_h6_mmx
(
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 1133efb..31eafcf 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -942,16 +942,38 @@
if (!pc->refresh_golden_frame)
pc->copy_buffer_to_gf = vp8_read_literal(bc, 2);
+#if CONFIG_ERROR_CONCEALMENT
+ /* Assume we shouldn't copy to the golden if the bit is missing */
+ xd->corrupted |= vp8dx_bool_error(bc);
+ if (pbi->ec_active && xd->corrupted)
+ pc->copy_buffer_to_gf = 0;
+#endif
+
pc->copy_buffer_to_arf = 0;
if (!pc->refresh_alt_ref_frame)
pc->copy_buffer_to_arf = vp8_read_literal(bc, 2);
+#if CONFIG_ERROR_CONCEALMENT
+ /* Assume we shouldn't copy to the alt-ref if the bit is missing */
+ xd->corrupted |= vp8dx_bool_error(bc);
+ if (pbi->ec_active && xd->corrupted)
+ pc->copy_buffer_to_arf = 0;
+#endif
+
+
pc->ref_frame_sign_bias[GOLDEN_FRAME] = vp8_read_bit(bc);
pc->ref_frame_sign_bias[ALTREF_FRAME] = vp8_read_bit(bc);
}
pc->refresh_entropy_probs = vp8_read_bit(bc);
+#if CONFIG_ERROR_CONCEALMENT
+ /* Assume we shouldn't refresh the probabilities if the bit is
+ * missing */
+ xd->corrupted |= vp8dx_bool_error(bc);
+ if (pbi->ec_active && xd->corrupted)
+ pc->refresh_entropy_probs = 0;
+#endif
if (pc->refresh_entropy_probs == 0)
{
vpx_memcpy(&pc->lfc, &pc->fc, sizeof(pc->fc));
diff --git a/vp8/decoder/error_concealment.c b/vp8/decoder/error_concealment.c
index 86fa191..b77d743 100644
--- a/vp8/decoder/error_concealment.c
+++ b/vp8/decoder/error_concealment.c
@@ -491,33 +491,6 @@
assert(i == 20);
}
-/* Calculates which reference frame type is dominating among the neighbors */
-static MV_REFERENCE_FRAME dominant_ref_frame(EC_BLOCK *neighbors)
-{
- /* Default to referring to "skip" */
- MV_REFERENCE_FRAME dom_ref_frame = LAST_FRAME;
- int max_ref_frame_cnt = 0;
- int ref_frame_cnt[MAX_REF_FRAMES] = {0};
- int i;
- /* Count neighboring reference frames */
- for (i = 0; i < NUM_NEIGHBORS; ++i)
- {
- if (neighbors[i].ref_frame < MAX_REF_FRAMES &&
- neighbors[i].ref_frame != INTRA_FRAME)
- ++ref_frame_cnt[neighbors[i].ref_frame];
- }
- /* Find maximum */
- for (i = 0; i < MAX_REF_FRAMES; ++i)
- {
- if (ref_frame_cnt[i] > max_ref_frame_cnt)
- {
- dom_ref_frame = i;
- max_ref_frame_cnt = ref_frame_cnt[i];
- }
- }
- return dom_ref_frame;
-}
-
/* Interpolates all motion vectors for a macroblock from the neighboring blocks'
* motion vectors.
*/
@@ -591,7 +564,6 @@
{
/* Find relevant neighboring blocks */
EC_BLOCK neighbors[NUM_NEIGHBORS];
- MV_REFERENCE_FRAME dom_ref_frame;
int i;
/* Initialize the array. MAX_REF_FRAMES is interpreted as "doesn't exist" */
for (i = 0; i < NUM_NEIGHBORS; ++i)
@@ -604,13 +576,11 @@
mb_row, mb_col,
mb_rows, mb_cols,
mb->mode_info_stride);
- /* Determine the dominant block type */
- dom_ref_frame = dominant_ref_frame(neighbors);
- /* Interpolate MVs for the missing blocks
- * from the dominating MVs */
- interpolate_mvs(mb, neighbors, dom_ref_frame);
+ /* Interpolate MVs for the missing blocks from the surrounding
+ * blocks which refer to the last frame. */
+ interpolate_mvs(mb, neighbors, LAST_FRAME);
- mb->mode_info_context->mbmi.ref_frame = dom_ref_frame;
+ mb->mode_info_context->mbmi.ref_frame = LAST_FRAME;
mb->mode_info_context->mbmi.mode = SPLITMV;
mb->mode_info_context->mbmi.uv_mode = DC_PRED;
mb->mode_info_context->mbmi.partitioning = 3;
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 0779549..cf525f4 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -359,28 +359,38 @@
pbi->fragment_sizes[0] = 0;
}
- if (pbi->num_fragments <= 1 && pbi->fragment_sizes[0] == 0)
+ if (!pbi->ec_active &&
+ pbi->num_fragments <= 1 && pbi->fragment_sizes[0] == 0)
{
- /* This is used to signal that we are missing frames.
- * We do not know if the missing frame(s) was supposed to update
- * any of the reference buffers, but we act conservative and
- * mark only the last buffer as corrupted.
- */
- cm->yv12_fb[cm->lst_fb_idx].corrupted = 1;
-
/* If error concealment is disabled we won't signal missing frames
* to the decoder.
*/
- if (!pbi->ec_active)
+ if (cm->fb_idx_ref_cnt[cm->lst_fb_idx] > 1)
{
- /* Signal that we have no frame to show. */
- cm->show_frame = 0;
-
- pbi->num_fragments = 0;
-
- /* Nothing more to do. */
- return 0;
+ /* The last reference shares buffer with another reference
+ * buffer. Move it to its own buffer before setting it as
+ * corrupt, otherwise we will make multiple buffers corrupt.
+ */
+ const int prev_idx = cm->lst_fb_idx;
+ cm->fb_idx_ref_cnt[prev_idx]--;
+ cm->lst_fb_idx = get_free_fb(cm);
+ vp8_yv12_copy_frame_ptr(&cm->yv12_fb[prev_idx],
+ &cm->yv12_fb[cm->lst_fb_idx]);
}
+ /* This is used to signal that we are missing frames.
+ * We do not know if the missing frame(s) was supposed to update
+ * any of the reference buffers, but we act conservative and
+ * mark only the last buffer as corrupted.
+ */
+ cm->yv12_fb[cm->lst_fb_idx].corrupted = 1;
+
+ /* Signal that we have no frame to show. */
+ cm->show_frame = 0;
+
+ pbi->num_fragments = 0;
+
+ /* Nothing more to do. */
+ return 0;
}
#if HAVE_ARMV7
diff --git a/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
index 30513f91..5b7e8f6 100644
--- a/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
+++ b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
@@ -13,6 +13,7 @@
EXPORT |vp8_encode_bool|
EXPORT |vp8_stop_encode|
EXPORT |vp8_encode_value|
+ IMPORT |vp8_validate_buffer_arm|
INCLUDE asm_enc_offsets.asm
@@ -22,6 +23,20 @@
AREA |.text|, CODE, READONLY
+ ; macro for validating write buffer position
+ ; needs vp8_writer in r0
+ ; start shall not be in r1
+ MACRO
+ VALIDATE_POS $start, $pos
+ push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
+ ldr r2, [r0, #vp8_writer_buffer_end]
+ ldr r3, [r0, #vp8_writer_error]
+ mov r1, $pos
+ mov r0, $start
+ bl vp8_validate_buffer_arm
+ pop {r0-r3, r12, lr}
+ MEND
+
; r0 BOOL_CODER *br
; r1 unsigned char *source
; r2 unsigned char *source_end
@@ -43,7 +58,7 @@
; r1 int bit
; r2 int probability
|vp8_encode_bool| PROC
- push {r4-r9, lr}
+ push {r4-r10, lr}
mov r4, r2
@@ -106,6 +121,9 @@
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r1, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
+
+ VALIDATE_POS r9, r1 ; validate_buffer at pos
+
strb r7, [r9, r4] ; w->buffer[w->pos++]
token_count_lt_zero
@@ -114,7 +132,7 @@
str r2, [r0, #vp8_writer_lowvalue]
str r5, [r0, #vp8_writer_range]
str r3, [r0, #vp8_writer_count]
- pop {r4-r9, pc}
+ pop {r4-r10, pc}
ENDP
; r0 BOOL_CODER *br
@@ -179,6 +197,9 @@
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r1, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
+
+ VALIDATE_POS r9, r1 ; validate_buffer at pos
+
strb r7, [r9, r4] ; w->buffer[w->pos++]
token_count_lt_zero_se
@@ -198,7 +219,7 @@
; r1 int data
; r2 int bits
|vp8_encode_value| PROC
- push {r4-r11, lr}
+ push {r4-r12, lr}
mov r10, r2
@@ -270,6 +291,9 @@
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r11, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
+
+ VALIDATE_POS r9, r11 ; validate_buffer at pos
+
strb r7, [r9, r4] ; w->buffer[w->pos++]
token_count_lt_zero_ev
@@ -281,7 +305,7 @@
str r2, [r0, #vp8_writer_lowvalue]
str r5, [r0, #vp8_writer_range]
str r3, [r0, #vp8_writer_count]
- pop {r4-r11, pc}
+ pop {r4-r12, pc}
ENDP
END
diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
index 933717c..a1cd467 100644
--- a/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
@@ -10,6 +10,7 @@
EXPORT |vp8cx_pack_tokens_armv5|
+ IMPORT |vp8_validate_buffer_arm|
INCLUDE asm_enc_offsets.asm
@@ -19,6 +20,22 @@
AREA |.text|, CODE, READONLY
+
+ ; macro for validating write buffer position
+ ; needs vp8_writer in r0
+ ; start shall not be in r1
+ MACRO
+ VALIDATE_POS $start, $pos
+ push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
+ ldr r2, [r0, #vp8_writer_buffer_end]
+ ldr r3, [r0, #vp8_writer_error]
+ mov r1, $pos
+ mov r0, $start
+ bl vp8_validate_buffer_arm
+ pop {r0-r3, r12, lr}
+ MEND
+
+
; r0 vp8_writer *w
; r1 const TOKENEXTRA *p
; r2 int xcount
@@ -26,11 +43,11 @@
; s0 vp8_extra_bits
; s1 vp8_coef_tree
|vp8cx_pack_tokens_armv5| PROC
- push {r4-r11, lr}
+ push {r4-r12, lr}
+ sub sp, sp, #16
; Add size of xcount * sizeof (TOKENEXTRA) to get stop
; sizeof (TOKENEXTRA) is 8
- sub sp, sp, #12
add r2, r1, r2, lsl #3 ; stop = p + xcount*sizeof(TOKENEXTRA)
str r2, [sp, #0]
str r3, [sp, #8] ; save vp8_coef_encodings
@@ -57,7 +74,7 @@
subne r8, r8, #1 ; --n
rsb r4, r8, #32 ; 32-n
- ldr r10, [sp, #52] ; vp8_coef_tree
+ ldr r10, [sp, #60] ; vp8_coef_tree
; v is kept in r12 during the token pack loop
lsl r12, r6, r4 ; r12 = v << 32 - n
@@ -128,12 +145,15 @@
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r11, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
+
+ VALIDATE_POS r10, r11 ; validate_buffer at pos
+
strb r7, [r10, r4] ; w->buffer[w->pos++]
; r10 is used earlier in the loop, but r10 is used as
; temp variable here. So after r10 is used, reload
; vp8_coef_tree_dcd into r10
- ldr r10, [sp, #52] ; vp8_coef_tree
+ ldr r10, [sp, #60] ; vp8_coef_tree
token_count_lt_zero
lsl r2, r2, r6 ; lowvalue <<= shift
@@ -142,7 +162,7 @@
bne token_loop
ldrb r6, [r1, #tokenextra_token] ; t
- ldr r7, [sp, #48] ; vp8_extra_bits
+ ldr r7, [sp, #56] ; vp8_extra_bits
; Add t * sizeof (vp8_extra_bit_struct) to get the desired
; element. Here vp8_extra_bit_struct == 16
add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
@@ -223,6 +243,9 @@
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r11, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
+
+ VALIDATE_POS r10, r11 ; validate_buffer at pos
+
strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
ldr r10, [sp, #4] ; b->tree
extra_count_lt_zero
@@ -271,7 +294,10 @@
lsr r6, r2, #24 ; lowvalue >> 24
add r12, r4, #1 ; w->pos++
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
- str r12, [r0, #0x10]
+ str r12, [r0, #vp8_writer_pos]
+
+ VALIDATE_POS r7, r12 ; validate_buffer at pos
+
strb r6, [r7, r4]
end_count_zero
skip_extra_bits
@@ -284,8 +310,8 @@
str r2, [r0, #vp8_writer_lowvalue]
str r5, [r0, #vp8_writer_range]
str r3, [r0, #vp8_writer_count]
- add sp, sp, #12
- pop {r4-r11, pc}
+ add sp, sp, #16
+ pop {r4-r12, pc}
ENDP
END
diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
index 82bf71f..1fa5e6c 100644
--- a/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
@@ -10,6 +10,7 @@
EXPORT |vp8cx_pack_mb_row_tokens_armv5|
+ IMPORT |vp8_validate_buffer_arm|
INCLUDE asm_enc_offsets.asm
@@ -19,6 +20,21 @@
AREA |.text|, CODE, READONLY
+
+ ; macro for validating write buffer position
+ ; needs vp8_writer in r0
+ ; start shall not be in r1
+ MACRO
+ VALIDATE_POS $start, $pos
+ push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
+ ldr r2, [r0, #vp8_writer_buffer_end]
+ ldr r3, [r0, #vp8_writer_error]
+ mov r1, $pos
+ mov r0, $start
+ bl vp8_validate_buffer_arm
+ pop {r0-r3, r12, lr}
+ MEND
+
; r0 VP8_COMP *cpi
; r1 vp8_writer *w
; r2 vp8_coef_encodings
@@ -26,7 +42,7 @@
; s0 vp8_coef_tree
|vp8cx_pack_mb_row_tokens_armv5| PROC
- push {r4-r11, lr}
+ push {r4-r12, lr}
sub sp, sp, #24
; Compute address of cpi->common.mb_rows
@@ -79,7 +95,7 @@
subne r8, r8, #1 ; --n
rsb r4, r8, #32 ; 32-n
- ldr r10, [sp, #60] ; vp8_coef_tree
+ ldr r10, [sp, #64] ; vp8_coef_tree
; v is kept in r12 during the token pack loop
lsl r12, r6, r4 ; r12 = v << 32 - n
@@ -150,12 +166,15 @@
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r11, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
+
+ VALIDATE_POS r10, r11 ; validate_buffer at pos
+
strb r7, [r10, r4] ; w->buffer[w->pos++]
; r10 is used earlier in the loop, but r10 is used as
; temp variable here. So after r10 is used, reload
; vp8_coef_tree_dcd into r10
- ldr r10, [sp, #60] ; vp8_coef_tree
+ ldr r10, [sp, #64] ; vp8_coef_tree
token_count_lt_zero
lsl r2, r2, r6 ; lowvalue <<= shift
@@ -245,6 +264,9 @@
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r11, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
+
+ VALIDATE_POS r10, r11 ; validate_buffer at pos
+
strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
ldr r10, [sp, #4] ; b->tree
extra_count_lt_zero
@@ -293,7 +315,10 @@
lsr r6, r2, #24 ; lowvalue >> 24
add r12, r4, #1 ; w->pos++
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
- str r12, [r0, #0x10]
+ str r12, [r0, #vp8_writer_pos]
+
+ VALIDATE_POS r7, r12 ; validate_buffer at pos
+
strb r6, [r7, r4]
end_count_zero
skip_extra_bits
@@ -314,7 +339,7 @@
str r5, [r0, #vp8_writer_range]
str r3, [r0, #vp8_writer_count]
add sp, sp, #24
- pop {r4-r11, pc}
+ pop {r4-r12, pc}
ENDP
_VP8_COMP_common_
diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
index c061b2f..3a183aa 100644
--- a/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
@@ -10,6 +10,7 @@
EXPORT |vp8cx_pack_tokens_into_partitions_armv5|
+ IMPORT |vp8_validate_buffer_arm|
INCLUDE asm_enc_offsets.asm
@@ -19,17 +20,31 @@
AREA |.text|, CODE, READONLY
+ ; macro for validating write buffer position
+ ; needs vp8_writer in r0
+ ; start shall not be in r1
+ MACRO
+ VALIDATE_POS $start, $pos
+ push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
+ ldr r2, [r0, #vp8_writer_buffer_end]
+ ldr r3, [r0, #vp8_writer_error]
+ mov r1, $pos
+ mov r0, $start
+ bl vp8_validate_buffer_arm
+ pop {r0-r3, r12, lr}
+ MEND
+
; r0 VP8_COMP *cpi
; r1 unsigned char *cx_data
-; r2 int num_part
-; r3 *size
+; r2 const unsigned char *cx_data_end
+; r3 int num_part
; s0 vp8_coef_encodings
; s1 vp8_extra_bits,
-; s2 const vp8_tree_index *,
+; s2 const vp8_tree_index *
|vp8cx_pack_tokens_into_partitions_armv5| PROC
- push {r4-r11, lr}
- sub sp, sp, #44
+ push {r4-r12, lr}
+ sub sp, sp, #40
; Compute address of cpi->common.mb_rows
ldr r4, _VP8_COMP_common_
@@ -39,31 +54,26 @@
ldr r5, [r4, r6] ; load up mb_rows
str r5, [sp, #36] ; save mb_rows
- str r1, [sp, #24] ; save cx_data
- str r2, [sp, #20] ; save num_part
- str r3, [sp, #8] ; save *size
-
- ; *size = 3*(num_part -1 );
- sub r2, r2, #1 ; num_part - 1
- add r2, r2, r2, lsl #1 ; 3*(num_part - 1)
- str r2, [r3]
-
- add r2, r2, r1 ; cx_data + *size
- str r2, [sp, #40] ; ptr
+ str r1, [sp, #24] ; save ptr = cx_data
+ str r3, [sp, #20] ; save num_part
+ str r2, [sp, #8] ; save cx_data_end
ldr r4, _VP8_COMP_tplist_
add r4, r0, r4
ldr r7, [r4, #0] ; dereference cpi->tp_list
str r7, [sp, #32] ; store start of cpi->tp_list
- ldr r11, _VP8_COMP_bc2_ ; load up vp8_writer out of cpi
+ ldr r11, _VP8_COMP_bc_ ; load up vp8_writer out of cpi
add r0, r0, r11
mov r11, #0
str r11, [sp, #28] ; i
numparts_loop
- ldr r10, [sp, #40] ; ptr
+ ldr r2, _vp8_writer_sz_ ; load up sizeof(vp8_writer)
+ add r0, r2 ; bc[i + 1]
+
+ ldr r10, [sp, #24] ; ptr
ldr r5, [sp, #36] ; move mb_rows to the counting section
subs r5, r5, r11 ; move start point with each partition
; mb_rows starts at i
@@ -72,6 +82,10 @@
; Reset all of the VP8 Writer data for each partition that
; is processed.
; start_encode
+
+ ldr r3, [sp, #8]
+ str r3, [r0, #vp8_writer_buffer_end]
+
mov r2, #0 ; vp8_writer_lowvalue
mov r5, #255 ; vp8_writer_range
mvn r3, #23 ; vp8_writer_count
@@ -182,6 +196,9 @@
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r11, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
+
+ VALIDATE_POS r10, r11 ; validate_buffer at pos
+
strb r7, [r10, r4] ; w->buffer[w->pos++]
; r10 is used earlier in the loop, but r10 is used as
@@ -277,6 +294,9 @@
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r11, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
+
+ VALIDATE_POS r10, r11 ; validate_buffer at pos
+
strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
ldr r10, [sp, #4] ; b->tree
extra_count_lt_zero
@@ -320,12 +340,15 @@
bne end_count_zero
ldr r4, [r0, #vp8_writer_pos]
- mvn r3, #7
+ mvn r3, #7 ; count = -8
ldr r7, [r0, #vp8_writer_buffer]
lsr r6, r2, #24 ; lowvalue >> 24
add r12, r4, #1 ; w->pos++
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
- str r12, [r0, #0x10]
+ str r12, [r0, #vp8_writer_pos]
+
+ VALIDATE_POS r7, r12 ; validate_buffer at pos
+
strb r6, [r7, r4]
end_count_zero
skip_extra_bits
@@ -401,6 +424,9 @@
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r11, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
+
+ VALIDATE_POS r10, r11 ; validate_buffer at pos
+
strb r7, [r10, r4] ; w->buffer[w->pos++]
token_count_lt_zero_se
@@ -409,33 +435,10 @@
subs r12, r12, #1
bne stop_encode_loop
- ldr r10, [sp, #8] ; *size
- ldr r11, [r10]
ldr r4, [r0, #vp8_writer_pos] ; w->pos
- add r11, r11, r4 ; *size += w->pos
- str r11, [r10]
-
- ldr r9, [sp, #20] ; num_parts
- sub r9, r9, #1
- ldr r10, [sp, #28] ; i
- cmp r10, r9 ; if(i<(num_part - 1))
- bge skip_write_partition
-
- ldr r12, [sp, #40] ; ptr
+ ldr r12, [sp, #24] ; ptr
add r12, r12, r4 ; ptr += w->pos
- str r12, [sp, #40]
-
- ldr r9, [sp, #24] ; cx_data
- mov r8, r4, asr #8
- strb r4, [r9, #0]
- strb r8, [r9, #1]
- mov r4, r4, asr #16
- strb r4, [r9, #2]
-
- add r9, r9, #3 ; cx_data += 3
- str r9, [sp, #24]
-
-skip_write_partition
+ str r12, [sp, #24]
ldr r11, [sp, #28] ; i
ldr r10, [sp, #20] ; num_parts
@@ -451,9 +454,8 @@
cmp r10, r11
bgt numparts_loop
-
- add sp, sp, #44
- pop {r4-r11, pc}
+ add sp, sp, #40
+ pop {r4-r12, pc}
ENDP
_VP8_COMP_common_
@@ -462,7 +464,9 @@
DCD vp8_common_mb_rows
_VP8_COMP_tplist_
DCD vp8_comp_tplist
-_VP8_COMP_bc2_
- DCD vp8_comp_bc2
+_VP8_COMP_bc_
+ DCD vp8_comp_bc
+_vp8_writer_sz_
+ DCD vp8_writer_sz
END
diff --git a/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm b/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm
index 0ca7438..f329f8f 100644
--- a/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm
+++ b/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm
@@ -72,22 +72,23 @@
; r0 short *diff
; r1 unsigned char *usrc
; r2 unsigned char *vsrc
-; r3 unsigned char *pred
-; stack int stride
+; r3 int src_stride
+; sp unsigned char *upred
+; sp unsigned char *vpred
+; sp int pred_stride
|vp8_subtract_mbuv_armv6| PROC
- stmfd sp!, {r4-r12, lr}
+ stmfd sp!, {r4-r11}
add r0, r0, #512 ; set *diff point to Cb
- add r3, r3, #256 ; set *pred point to Cb
-
mov r4, #8 ; loop count
- ldr r5, [sp, #40] ; stride
+ ldr r5, [sp, #32] ; upred
+ ldr r12, [sp, #40] ; pred_stride
; Subtract U block
loop_u
- ldr r6, [r1] ; src (A)
- ldr r7, [r3], #4 ; pred (A)
+ ldr r6, [r1] ; usrc (A)
+ ldr r7, [r5] ; upred (A)
uxtb16 r8, r6 ; [s2 | s0] (A)
uxtb16 r9, r7 ; [p2 | p0] (A)
@@ -97,8 +98,8 @@
usub16 r6, r8, r9 ; [d2 | d0] (A)
usub16 r7, r10, r11 ; [d3 | d1] (A)
- ldr r10, [r1, #4] ; src (B)
- ldr r11, [r3], #4 ; pred (B)
+ ldr r10, [r1, #4] ; usrc (B)
+ ldr r11, [r5, #4] ; upred (B)
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
@@ -114,7 +115,8 @@
usub16 r6, r8, r9 ; [d2 | d0] (B)
usub16 r7, r10, r11 ; [d3 | d1] (B)
- add r1, r1, r5 ; update usrc pointer
+ add r1, r1, r3 ; update usrc pointer
+ add r5, r5, r12 ; update upred pointer
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
@@ -125,12 +127,13 @@
bne loop_u
+ ldr r5, [sp, #36] ; vpred
mov r4, #8 ; loop count
; Subtract V block
loop_v
- ldr r6, [r2] ; src (A)
- ldr r7, [r3], #4 ; pred (A)
+ ldr r6, [r2] ; vsrc (A)
+ ldr r7, [r5] ; vpred (A)
uxtb16 r8, r6 ; [s2 | s0] (A)
uxtb16 r9, r7 ; [p2 | p0] (A)
@@ -140,8 +143,8 @@
usub16 r6, r8, r9 ; [d2 | d0] (A)
usub16 r7, r10, r11 ; [d3 | d1] (A)
- ldr r10, [r2, #4] ; src (B)
- ldr r11, [r3], #4 ; pred (B)
+ ldr r10, [r2, #4] ; vsrc (B)
+ ldr r11, [r5, #4] ; vpred (B)
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
@@ -157,7 +160,8 @@
usub16 r6, r8, r9 ; [d2 | d0] (B)
usub16 r7, r10, r11 ; [d3 | d1] (B)
- add r2, r2, r5 ; update vsrc pointer
+ add r2, r2, r3 ; update vsrc pointer
+ add r5, r5, r12 ; update vpred pointer
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
@@ -168,23 +172,25 @@
bne loop_v
- ldmfd sp!, {r4-r12, pc}
+ ldmfd sp!, {r4-r11}
+ bx lr
ENDP
; r0 short *diff
; r1 unsigned char *src
-; r2 unsigned char *pred
-; r3 int stride
+; r2 int src_stride
+; r3 unsigned char *pred
+; sp int pred_stride
|vp8_subtract_mby_armv6| PROC
stmfd sp!, {r4-r11}
-
+ ldr r12, [sp, #32] ; pred_stride
mov r4, #16
loop
ldr r6, [r1] ; src (A)
- ldr r7, [r2], #4 ; pred (A)
+ ldr r7, [r3] ; pred (A)
uxtb16 r8, r6 ; [s2 | s0] (A)
uxtb16 r9, r7 ; [p2 | p0] (A)
@@ -195,7 +201,7 @@
usub16 r7, r10, r11 ; [d3 | d1] (A)
ldr r10, [r1, #4] ; src (B)
- ldr r11, [r2], #4 ; pred (B)
+ ldr r11, [r3, #4] ; pred (B)
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
@@ -212,7 +218,7 @@
usub16 r7, r10, r11 ; [d3 | d1] (B)
ldr r10, [r1, #8] ; src (C)
- ldr r11, [r2], #4 ; pred (C)
+ ldr r11, [r3, #8] ; pred (C)
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
@@ -229,10 +235,10 @@
usub16 r7, r10, r11 ; [d3 | d1] (C)
ldr r10, [r1, #12] ; src (D)
- ldr r11, [r2], #4 ; pred (D)
+ ldr r11, [r3, #12] ; pred (D)
- pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (C)
- pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (C)
+ pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (C)
+ pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (C)
str r8, [r0], #4 ; diff (C)
uxtb16 r8, r10 ; [s2 | s0] (D)
@@ -245,7 +251,8 @@
usub16 r6, r8, r9 ; [d2 | d0] (D)
usub16 r7, r10, r11 ; [d3 | d1] (D)
- add r1, r1, r3 ; update src pointer
+ add r1, r1, r2 ; update src pointer
+ add r3, r3, r12 ; update pred pointer
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (D)
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (D)
@@ -257,7 +264,7 @@
bne loop
ldmfd sp!, {r4-r11}
- mov pc, lr
+ bx lr
ENDP
diff --git a/vp8/encoder/arm/boolhuff_arm.c b/vp8/encoder/arm/boolhuff_arm.c
index 9089663..17a941b 100644
--- a/vp8/encoder/arm/boolhuff_arm.c
+++ b/vp8/encoder/arm/boolhuff_arm.c
@@ -10,7 +10,7 @@
#include "vp8/encoder/boolhuff.h"
-#include "vp8/common/blockd.h"
+#include "vpx/internal/vpx_codec_internal.h"
const unsigned int vp8_prob_cost[256] =
{
@@ -32,3 +32,10 @@
22, 21, 19, 18, 16, 15, 13, 12, 10, 9, 7, 6, 4, 3, 1, 1
};
+int vp8_validate_buffer_arm(const unsigned char *start,
+ size_t len,
+ const unsigned char *end,
+ struct vpx_internal_error_info *error)
+{
+ return validate_buffer(start, len, end, error);
+}
diff --git a/vp8/encoder/arm/neon/subtract_neon.asm b/vp8/encoder/arm/neon/subtract_neon.asm
index 68c2950..91a328c 100644
--- a/vp8/encoder/arm/neon/subtract_neon.asm
+++ b/vp8/encoder/arm/neon/subtract_neon.asm
@@ -61,19 +61,24 @@
;==========================================
-;void vp8_subtract_mby_neon(short *diff, unsigned char *src, unsigned char *pred, int stride)
+;void vp8_subtract_mby_neon(short *diff, unsigned char *src, int src_stride
+; unsigned char *pred, int pred_stride)
|vp8_subtract_mby_neon| PROC
+ push {r4-r7}
mov r12, #4
+ ldr r4, [sp, #16] ; pred_stride
+ mov r6, #32 ; "diff" stride x2
+ add r5, r0, #16 ; second diff pointer
subtract_mby_loop
- vld1.8 {q0}, [r1], r3 ;load src
- vld1.8 {q1}, [r2]! ;load pred
- vld1.8 {q2}, [r1], r3
- vld1.8 {q3}, [r2]!
- vld1.8 {q4}, [r1], r3
- vld1.8 {q5}, [r2]!
- vld1.8 {q6}, [r1], r3
- vld1.8 {q7}, [r2]!
+ vld1.8 {q0}, [r1], r2 ;load src
+ vld1.8 {q1}, [r3], r4 ;load pred
+ vld1.8 {q2}, [r1], r2
+ vld1.8 {q3}, [r3], r4
+ vld1.8 {q4}, [r1], r2
+ vld1.8 {q5}, [r3], r4
+ vld1.8 {q6}, [r1], r2
+ vld1.8 {q7}, [r3], r4
vsubl.u8 q8, d0, d2
vsubl.u8 q9, d1, d3
@@ -84,46 +89,53 @@
vsubl.u8 q14, d12, d14
vsubl.u8 q15, d13, d15
- vst1.16 {q8}, [r0]! ;store diff
- vst1.16 {q9}, [r0]!
- vst1.16 {q10}, [r0]!
- vst1.16 {q11}, [r0]!
- vst1.16 {q12}, [r0]!
- vst1.16 {q13}, [r0]!
- vst1.16 {q14}, [r0]!
- vst1.16 {q15}, [r0]!
+ vst1.16 {q8}, [r0], r6 ;store diff
+ vst1.16 {q9}, [r5], r6
+ vst1.16 {q10}, [r0], r6
+ vst1.16 {q11}, [r5], r6
+ vst1.16 {q12}, [r0], r6
+ vst1.16 {q13}, [r5], r6
+ vst1.16 {q14}, [r0], r6
+ vst1.16 {q15}, [r5], r6
subs r12, r12, #1
bne subtract_mby_loop
+ pop {r4-r7}
bx lr
ENDP
;=================================
-;void vp8_subtract_mbuv_neon(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
+;void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
+; int src_stride, unsigned char *upred,
+; unsigned char *vpred, int pred_stride)
+
|vp8_subtract_mbuv_neon| PROC
- ldr r12, [sp]
+ push {r4-r7}
+ ldr r4, [sp, #16] ; upred
+ ldr r5, [sp, #20] ; vpred
+ ldr r6, [sp, #24] ; pred_stride
+ add r0, r0, #512 ; short *udiff = diff + 256;
+ mov r12, #32 ; "diff" stride x2
+ add r7, r0, #16 ; second diff pointer
;u
- add r0, r0, #512 ; short *udiff = diff + 256;
- add r3, r3, #256 ; unsigned char *upred = pred + 256;
-
- vld1.8 {d0}, [r1], r12 ;load src
- vld1.8 {d1}, [r3]! ;load pred
- vld1.8 {d2}, [r1], r12
- vld1.8 {d3}, [r3]!
- vld1.8 {d4}, [r1], r12
- vld1.8 {d5}, [r3]!
- vld1.8 {d6}, [r1], r12
- vld1.8 {d7}, [r3]!
- vld1.8 {d8}, [r1], r12
- vld1.8 {d9}, [r3]!
- vld1.8 {d10}, [r1], r12
- vld1.8 {d11}, [r3]!
- vld1.8 {d12}, [r1], r12
- vld1.8 {d13}, [r3]!
- vld1.8 {d14}, [r1], r12
- vld1.8 {d15}, [r3]!
+ vld1.8 {d0}, [r1], r3 ;load usrc
+ vld1.8 {d1}, [r4], r6 ;load upred
+ vld1.8 {d2}, [r1], r3
+ vld1.8 {d3}, [r4], r6
+ vld1.8 {d4}, [r1], r3
+ vld1.8 {d5}, [r4], r6
+ vld1.8 {d6}, [r1], r3
+ vld1.8 {d7}, [r4], r6
+ vld1.8 {d8}, [r1], r3
+ vld1.8 {d9}, [r4], r6
+ vld1.8 {d10}, [r1], r3
+ vld1.8 {d11}, [r4], r6
+ vld1.8 {d12}, [r1], r3
+ vld1.8 {d13}, [r4], r6
+ vld1.8 {d14}, [r1], r3
+ vld1.8 {d15}, [r4], r6
vsubl.u8 q8, d0, d1
vsubl.u8 q9, d2, d3
@@ -134,32 +146,32 @@
vsubl.u8 q14, d12, d13
vsubl.u8 q15, d14, d15
- vst1.16 {q8}, [r0]! ;store diff
- vst1.16 {q9}, [r0]!
- vst1.16 {q10}, [r0]!
- vst1.16 {q11}, [r0]!
- vst1.16 {q12}, [r0]!
- vst1.16 {q13}, [r0]!
- vst1.16 {q14}, [r0]!
- vst1.16 {q15}, [r0]!
+ vst1.16 {q8}, [r0], r12 ;store diff
+ vst1.16 {q9}, [r7], r12
+ vst1.16 {q10}, [r0], r12
+ vst1.16 {q11}, [r7], r12
+ vst1.16 {q12}, [r0], r12
+ vst1.16 {q13}, [r7], r12
+ vst1.16 {q14}, [r0], r12
+ vst1.16 {q15}, [r7], r12
;v
- vld1.8 {d0}, [r2], r12 ;load src
- vld1.8 {d1}, [r3]! ;load pred
- vld1.8 {d2}, [r2], r12
- vld1.8 {d3}, [r3]!
- vld1.8 {d4}, [r2], r12
- vld1.8 {d5}, [r3]!
- vld1.8 {d6}, [r2], r12
- vld1.8 {d7}, [r3]!
- vld1.8 {d8}, [r2], r12
- vld1.8 {d9}, [r3]!
- vld1.8 {d10}, [r2], r12
- vld1.8 {d11}, [r3]!
- vld1.8 {d12}, [r2], r12
- vld1.8 {d13}, [r3]!
- vld1.8 {d14}, [r2], r12
- vld1.8 {d15}, [r3]!
+ vld1.8 {d0}, [r2], r3 ;load vsrc
+ vld1.8 {d1}, [r5], r6 ;load vpred
+ vld1.8 {d2}, [r2], r3
+ vld1.8 {d3}, [r5], r6
+ vld1.8 {d4}, [r2], r3
+ vld1.8 {d5}, [r5], r6
+ vld1.8 {d6}, [r2], r3
+ vld1.8 {d7}, [r5], r6
+ vld1.8 {d8}, [r2], r3
+ vld1.8 {d9}, [r5], r6
+ vld1.8 {d10}, [r2], r3
+ vld1.8 {d11}, [r5], r6
+ vld1.8 {d12}, [r2], r3
+ vld1.8 {d13}, [r5], r6
+ vld1.8 {d14}, [r2], r3
+ vld1.8 {d15}, [r5], r6
vsubl.u8 q8, d0, d1
vsubl.u8 q9, d2, d3
@@ -170,16 +182,18 @@
vsubl.u8 q14, d12, d13
vsubl.u8 q15, d14, d15
- vst1.16 {q8}, [r0]! ;store diff
- vst1.16 {q9}, [r0]!
- vst1.16 {q10}, [r0]!
- vst1.16 {q11}, [r0]!
- vst1.16 {q12}, [r0]!
- vst1.16 {q13}, [r0]!
- vst1.16 {q14}, [r0]!
- vst1.16 {q15}, [r0]!
+ vst1.16 {q8}, [r0], r12 ;store diff
+ vst1.16 {q9}, [r7], r12
+ vst1.16 {q10}, [r0], r12
+ vst1.16 {q11}, [r7], r12
+ vst1.16 {q12}, [r0], r12
+ vst1.16 {q13}, [r7], r12
+ vst1.16 {q14}, [r0], r12
+ vst1.16 {q15}, [r7], r12
+ pop {r4-r7}
bx lr
+
ENDP
END
diff --git a/vp8/encoder/asm_enc_offsets.c b/vp8/encoder/asm_enc_offsets.c
index d05dab4..2e9ca72 100644
--- a/vp8/encoder/asm_enc_offsets.c
+++ b/vp8/encoder/asm_enc_offsets.c
@@ -50,6 +50,7 @@
DEFINE(vp8_writer_pos, offsetof(vp8_writer, pos));
DEFINE(vp8_writer_buffer, offsetof(vp8_writer, buffer));
DEFINE(vp8_writer_buffer_end, offsetof(vp8_writer, buffer_end));
+DEFINE(vp8_writer_error, offsetof(vp8_writer, error));
DEFINE(tokenextra_token, offsetof(TOKENEXTRA, Token));
DEFINE(tokenextra_extra, offsetof(TOKENEXTRA, Extra));
@@ -69,7 +70,8 @@
DEFINE(vp8_comp_tplist, offsetof(VP8_COMP, tplist));
DEFINE(vp8_comp_common, offsetof(VP8_COMP, common));
-DEFINE(vp8_comp_bc2, offsetof(VP8_COMP, bc2));
+DEFINE(vp8_comp_bc , offsetof(VP8_COMP, bc));
+DEFINE(vp8_writer_sz , sizeof(vp8_writer));
DEFINE(tokenlist_start, offsetof(TOKENLIST, start));
DEFINE(tokenlist_stop, offsetof(TOKENLIST, stop));
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index 748b607..5eea39c 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -109,7 +109,7 @@
{
VP8_COMMON *const x = & cpi->common;
- vp8_writer *const w = & cpi->bc;
+ vp8_writer *const w = cpi->bc;
{
vp8_prob Pnew [VP8_YMODES-1];
@@ -221,6 +221,11 @@
w->buffer[x] += 1;
}
+ validate_buffer(w->buffer + w->pos,
+ 1,
+ w->buffer_end,
+ w->error);
+
w->buffer[w->pos++] = (lowvalue >> (24 - offset));
lowvalue <<= offset;
shift = count;
@@ -281,6 +286,11 @@
w->buffer[x] += 1;
}
+ validate_buffer(w->buffer + w->pos,
+ 1,
+ w->buffer_end,
+ w->error);
+
w->buffer[w->pos++] = (lowvalue >> (24 - offset));
lowvalue <<= offset;
shift = count;
@@ -329,6 +339,12 @@
if (!++count)
{
count = -8;
+
+ validate_buffer(w->buffer + w->pos,
+ 1,
+ w->buffer_end,
+ w->error);
+
w->buffer[w->pos++] = (lowvalue >> 24);
lowvalue &= 0xffffff;
}
@@ -358,20 +374,21 @@
}
-static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data, unsigned char * cx_data_end, int num_part, int *size)
+static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data,
+ unsigned char * cx_data_end,
+ int num_part)
{
int i;
unsigned char *ptr = cx_data;
unsigned char *ptr_end = cx_data_end;
unsigned int shift;
- vp8_writer *w = &cpi->bc2;
- *size = 3 * (num_part - 1);
- cpi->partition_sz[0] += *size;
- ptr = cx_data + (*size);
+ vp8_writer *w;
+ ptr = cx_data;
for (i = 0; i < num_part; i++)
{
+ w = cpi->bc + i + 1;
vp8_start_encode(w, ptr, ptr_end);
{
unsigned int split;
@@ -581,17 +598,7 @@
}
vp8_stop_encode(w);
- *size += w->pos;
-
- /* The first partition size is set earlier */
- cpi->partition_sz[i + 1] = w->pos;
-
- if (i < (num_part - 1))
- {
- write_partition_size(cx_data, w->pos);
- cx_data += 3;
- ptr += w->pos;
- }
+ ptr += w->pos;
}
}
@@ -664,6 +671,11 @@
w->buffer[x] += 1;
}
+ validate_buffer(w->buffer + w->pos,
+ 1,
+ w->buffer_end,
+ w->error);
+
w->buffer[w->pos++] = (lowvalue >> (24 - offset));
lowvalue <<= offset;
shift = count;
@@ -724,6 +736,11 @@
w->buffer[x] += 1;
}
+ validate_buffer(w->buffer + w->pos,
+ 1,
+ w->buffer_end,
+ w->error);
+
w->buffer[w->pos++] = (lowvalue >> (24 - offset));
lowvalue <<= offset;
shift = count;
@@ -770,6 +787,12 @@
if (!++count)
{
count = -8;
+
+ validate_buffer(w->buffer + w->pos,
+ 1,
+ w->buffer_end,
+ w->error);
+
w->buffer[w->pos++] = (lowvalue >> 24);
lowvalue &= 0xffffff;
}
@@ -860,7 +883,7 @@
static void pack_inter_mode_mvs(VP8_COMP *const cpi)
{
VP8_COMMON *const pc = & cpi->common;
- vp8_writer *const w = & cpi->bc;
+ vp8_writer *const w = cpi->bc;
const MV_CONTEXT *mvc = pc->fc.mvc;
const int *const rfct = cpi->count_mb_ref_frame_usage;
@@ -1075,7 +1098,7 @@
static void write_kfmodes(VP8_COMP *cpi)
{
- vp8_writer *const bc = & cpi->bc;
+ vp8_writer *const bc = cpi->bc;
const VP8_COMMON *const c = & cpi->common;
/* const */
MODE_INFO *m = c->mi;
@@ -1405,7 +1428,7 @@
static void update_coef_probs(VP8_COMP *cpi)
{
int i = 0;
- vp8_writer *const w = & cpi->bc;
+ vp8_writer *const w = cpi->bc;
int savings = 0;
vp8_clear_system_state(); //__asm emms;
@@ -1551,7 +1574,7 @@
int i, j;
VP8_HEADER oh;
VP8_COMMON *const pc = & cpi->common;
- vp8_writer *const bc = & cpi->bc;
+ vp8_writer *const bc = cpi->bc;
MACROBLOCKD *const xd = & cpi->mb.e_mbd;
int extra_bytes_packed = 0;
@@ -1566,6 +1589,8 @@
mb_feature_data_bits = vp8_mb_feature_data_bits;
+ bc[0].error = &pc->error;
+
validate_buffer(cx_data, 3, cx_data_end, &cpi->common.error);
cx_data += 3;
@@ -1844,7 +1869,9 @@
vp8_stop_encode(bc);
- oh.first_partition_length_in_bytes = cpi->bc.pos;
+ cx_data += bc->pos;
+
+ oh.first_partition_length_in_bytes = cpi->bc->pos;
/* update frame tag */
{
@@ -1858,34 +1885,58 @@
dest[2] = v >> 16;
}
- *size = VP8_HEADER_SIZE + extra_bytes_packed + cpi->bc.pos;
+ *size = VP8_HEADER_SIZE + extra_bytes_packed + cpi->bc->pos;
+
cpi->partition_sz[0] = *size;
if (pc->multi_token_partition != ONE_PARTITION)
{
- int num_part;
- int asize;
- num_part = 1 << pc->multi_token_partition;
+ int num_part = 1 << pc->multi_token_partition;
- pack_tokens_into_partitions(cpi, cx_data + bc->pos, cx_data_end, num_part, &asize);
+ /* partition size table at the end of first partition */
+ cpi->partition_sz[0] += 3 * (num_part - 1);
+ *size += 3 * (num_part - 1);
- *size += asize;
+ validate_buffer(cx_data, 3 * (num_part - 1), cx_data_end,
+ &pc->error);
+
+ for(i = 1; i < num_part + 1; i++)
+ {
+ cpi->bc[i].error = &pc->error;
+ }
+
+ pack_tokens_into_partitions(cpi, cx_data + 3 * (num_part - 1),
+ cx_data_end, num_part);
+
+ for(i = 1; i < num_part; i++)
+ {
+ cpi->partition_sz[i] = cpi->bc[i].pos;
+ write_partition_size(cx_data, cpi->partition_sz[i]);
+ cx_data += 3;
+ *size += cpi->partition_sz[i]; /* add to total */
+ }
+
+ /* add last partition to total size */
+ cpi->partition_sz[i] = cpi->bc[i].pos;
+ *size += cpi->partition_sz[i];
}
else
{
- vp8_start_encode(&cpi->bc2, cx_data + bc->pos, cx_data_end);
+ bc[1].error = &pc->error;
+
+ vp8_start_encode(&cpi->bc[1], cx_data, cx_data_end);
#if CONFIG_MULTITHREAD
if (cpi->b_multi_threaded)
- pack_mb_row_tokens(cpi, &cpi->bc2);
+ pack_mb_row_tokens(cpi, &cpi->bc[1]);
else
#endif
- pack_tokens(&cpi->bc2, cpi->tok, cpi->tok_count);
+ pack_tokens(&cpi->bc[1], cpi->tok, cpi->tok_count);
- vp8_stop_encode(&cpi->bc2);
+ vp8_stop_encode(&cpi->bc[1]);
- *size += cpi->bc2.pos;
- cpi->partition_sz[1] = cpi->bc2.pos;
+ *size += cpi->bc[1].pos;
+ cpi->partition_sz[1] = cpi->bc[1].pos;
}
}
diff --git a/vp8/encoder/bitstream.h b/vp8/encoder/bitstream.h
index 8a875a5..9007ced 100644
--- a/vp8/encoder/bitstream.h
+++ b/vp8/encoder/bitstream.h
@@ -17,23 +17,27 @@
vp8_token *,
vp8_extra_bit_struct *,
const vp8_tree_index *);
-void vp8cx_pack_tokens_into_partitions_armv5(VP8_COMP *, unsigned char *, int , int *,
- vp8_token *,
- vp8_extra_bit_struct *,
- const vp8_tree_index *);
+void vp8cx_pack_tokens_into_partitions_armv5(VP8_COMP *,
+ unsigned char * cx_data,
+ const unsigned char *cx_data_end,
+ int num_parts,
+ vp8_token *,
+ vp8_extra_bit_struct *,
+ const vp8_tree_index *);
void vp8cx_pack_mb_row_tokens_armv5(VP8_COMP *cpi, vp8_writer *w,
vp8_token *,
vp8_extra_bit_struct *,
const vp8_tree_index *);
# define pack_tokens(a,b,c) \
vp8cx_pack_tokens_armv5(a,b,c,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
-# define pack_tokens_into_partitions(a,b,unused,c,d) \
+# define pack_tokens_into_partitions(a,b,c,d) \
vp8cx_pack_tokens_into_partitions_armv5(a,b,c,d,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
# define pack_mb_row_tokens(a,b) \
vp8cx_pack_mb_row_tokens_armv5(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
#else
-# define pack_tokens(a,b,c) pack_tokens_c(a,b,c)
-# define pack_tokens_into_partitions(a,b,c,d,e) pack_tokens_into_partitions_c(a,b,c,d,e)
+# define pack_tokens(a,b,c) pack_tokens_c(a,b,c)
+# define pack_tokens_into_partitions(a,b,c,d) pack_tokens_into_partitions_c(a,b,c,d)
# define pack_mb_row_tokens(a,b) pack_mb_row_tokens_c(a,b)
#endif
+
#endif
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index d89d74e..a3b800a 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -100,7 +100,7 @@
RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mby)(&x->e_mbd);
ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src),
- x->e_mbd.predictor, b->src_stride);
+ b->src_stride, x->e_mbd.predictor, 16);
vp8_transform_intra_mby(x);
@@ -115,7 +115,9 @@
{
RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mbuv)(&x->e_mbd);
- ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
+ ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer,
+ x->src.v_buffer, x->src.uv_stride, &x->e_mbd.predictor[256],
+ &x->e_mbd.predictor[320], 8);
vp8_transform_mbuv(x);
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index 80c32df..e9042e1 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -48,12 +48,12 @@
}
}
-void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
+void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
+ int src_stride, unsigned char *upred,
+ unsigned char *vpred, int pred_stride)
{
short *udiff = diff + 256;
short *vdiff = diff + 320;
- unsigned char *upred = pred + 256;
- unsigned char *vpred = pred + 320;
int r, c;
@@ -65,8 +65,8 @@
}
udiff += 8;
- upred += 8;
- usrc += stride;
+ upred += pred_stride;
+ usrc += src_stride;
}
for (r = 0; r < 8; r++)
@@ -77,12 +77,13 @@
}
vdiff += 8;
- vpred += 8;
- vsrc += stride;
+ vpred += pred_stride;
+ vsrc += src_stride;
}
}
-void vp8_subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, int stride)
+void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride,
+ unsigned char *pred, int pred_stride)
{
int r, c;
@@ -94,8 +95,8 @@
}
diff += 16;
- pred += 16;
- src += stride;
+ pred += pred_stride;
+ src += src_stride;
}
}
@@ -103,8 +104,11 @@
{
BLOCK *b = &x->block[0];
- ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src), x->e_mbd.predictor, b->src_stride);
- ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
+ ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src),
+ b->src_stride, x->e_mbd.predictor, 16);
+ ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer,
+ x->src.v_buffer, x->src.uv_stride, &x->e_mbd.predictor[256],
+ &x->e_mbd.predictor[320], 8);
}
static void build_dcblock(MACROBLOCK *x)
@@ -641,7 +645,8 @@
vp8_build_inter16x16_predictors_mby(&x->e_mbd);
- ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src), x->e_mbd.predictor, b->src_stride);
+ ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src),
+ b->src_stride, x->e_mbd.predictor, 16);
transform_mby(x);
diff --git a/vp8/encoder/encodemb.h b/vp8/encoder/encodemb.h
index 8fa457a..597a57b 100644
--- a/vp8/encoder/encodemb.h
+++ b/vp8/encoder/encodemb.h
@@ -28,11 +28,13 @@
void (sym)(BLOCK *be,BLOCKD *bd, int pitch)
#define prototype_submby(sym) \
- void (sym)(short *diff, unsigned char *src, unsigned char *pred, int stride)
+ void (sym)(short *diff, unsigned char *src, int src_stride, \
+ unsigned char *pred, int pred_stride)
#define prototype_submbuv(sym) \
void (sym)(short *diff, unsigned char *usrc, unsigned char *vsrc,\
- unsigned char *pred, int stride)
+ int src_stride, unsigned char *upred, unsigned char *vpred,\
+ int pred_stride)
#if ARCH_X86 || ARCH_X86_64
#include "x86/encodemb_x86.h"
diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c
index a4849c6..c122d03 100644
--- a/vp8/encoder/encodemv.c
+++ b/vp8/encoder/encodemv.c
@@ -395,7 +395,7 @@
void vp8_write_mvprobs(VP8_COMP *cpi)
{
- vp8_writer *const w = & cpi->bc;
+ vp8_writer *const w = cpi->bc;
MV_CONTEXT *mvc = cpi->common.fc.mvc;
int flags[2] = {0, 0};
#ifdef ENTROPY_STATS
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 23e3050..8d19d1e 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -1276,7 +1276,7 @@
// pass.
vp8_new_frame_rate(cpi, 10000000.0 * cpi->twopass.total_stats->count / cpi->twopass.total_stats->duration);
- cpi->output_frame_rate = cpi->oxcf.frame_rate;
+ cpi->output_frame_rate = cpi->frame_rate;
cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats->duration * cpi->oxcf.target_bandwidth / 10000000.0) ;
cpi->twopass.bits_left -= (int64_t)(cpi->twopass.total_stats->duration * two_pass_min_rate / 10000000.0);
@@ -3061,7 +3061,7 @@
// Calculate Average bits per frame.
//av_bits_per_frame = cpi->twopass.bits_left/(double)(cpi->twopass.total_stats->count - cpi->common.current_video_frame);
- av_bits_per_frame = cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate);
+ av_bits_per_frame = cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate);
//if ( av_bits_per_frame < 0.0 )
// av_bits_per_frame = 0.0
@@ -3123,7 +3123,7 @@
}
else
{
- int64_t clip_bits = (int64_t)(cpi->twopass.total_stats->count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate));
+ int64_t clip_bits = (int64_t)(cpi->twopass.total_stats->count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate));
int64_t over_spend = cpi->oxcf.starting_buffer_level - cpi->buffer_level;
if ((last_kf_resampled && (kf_q > cpi->worst_quality)) || // If triggered last time the threshold for triggering again is reduced
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 1d00e67..4df2a78 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1436,7 +1436,7 @@
cpi->mt_sync_range = 16;
#endif
- vpx_free(cpi->tplist);
+ vpx_free(cpi->tplist);
CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cpi->common.mb_rows));
}
@@ -1470,8 +1470,8 @@
if(framerate < .1)
framerate = 30;
- cpi->oxcf.frame_rate = framerate;
- cpi->output_frame_rate = cpi->oxcf.frame_rate;
+ cpi->frame_rate = framerate;
+ cpi->output_frame_rate = framerate;
cpi->per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth /
cpi->output_frame_rate);
cpi->av_per_frame_bandwidth = cpi->per_frame_bandwidth;
@@ -1527,6 +1527,18 @@
cm->version = oxcf->Version;
vp8_setup_version(cm);
+ /* frame rate is not available on the first frame, as it's derived from
+ * the observed timestamps. The actual value used here doesn't matter
+ * too much, as it will adapt quickly. If the reciprocal of the timebase
+ * seems like a reasonable framerate, then use that as a guess, otherwise
+ * use 30.
+ */
+ cpi->frame_rate = (double)(oxcf->timebase.den) /
+ (double)(oxcf->timebase.num);
+
+ if (cpi->frame_rate > 180)
+ cpi->frame_rate = 30;
+
// change includes all joint functionality
vp8_change_config(ptr, oxcf);
@@ -1787,7 +1799,7 @@
cpi->oxcf.target_bandwidth, 1000);
// Set up frame rate and related parameters rate control values.
- vp8_new_frame_rate(cpi, cpi->oxcf.frame_rate);
+ vp8_new_frame_rate(cpi, cpi->frame_rate);
// Set absolute upper and lower quality limits
cpi->worst_quality = cpi->oxcf.worst_allowed_q;
@@ -2408,7 +2420,7 @@
{
extern int count_mb_seg[4];
FILE *f = fopen("modes.stt", "a");
- double dr = (double)cpi->oxcf.frame_rate * (double)bytes * (double)8 / (double)count / (double)1000 ;
+ double dr = (double)cpi->frame_rate * (double)bytes * (double)8 / (double)count / (double)1000 ;
fprintf(f, "intra_mode in Intra Frames:\n");
fprintf(f, "Y: %8d, %8d, %8d, %8d, %8d\n", y_modes[0], y_modes[1], y_modes[2], y_modes[3], y_modes[4]);
fprintf(f, "UV:%8d, %8d, %8d, %8d\n", uv_modes[0], uv_modes[1], uv_modes[2], uv_modes[3]);
@@ -4856,7 +4868,7 @@
{
double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth
*cpi->oxcf.two_pass_vbrmin_section / 100);
- cpi->twopass.bits_left += (int64_t)(two_pass_min_rate / cpi->oxcf.frame_rate);
+ cpi->twopass.bits_left += (int64_t)(two_pass_min_rate / cpi->frame_rate);
}
}
#endif
@@ -4928,7 +4940,7 @@
int64_t store_reg[8];
#endif
VP8_COMP *cpi = (VP8_COMP *) ptr;
- VP8_COMMON *cm = &cpi->common;
+ VP8_COMMON *cm;
struct vpx_usec_timer tsctimer;
struct vpx_usec_timer ticktimer;
struct vpx_usec_timer cmptimer;
@@ -4937,12 +4949,14 @@
if (!cpi)
return -1;
- if (setjmp(cpi->common.error.jmp)){
+ cm = &cpi->common;
+
+ if (setjmp(cpi->common.error.jmp))
+ {
cpi->common.error.setjmp = 0;
return VPX_CODEC_CORRUPT_FRAME;
}
- cpi->bc.error = &cpi->common.error;
cpi->common.error.setjmp = 1;
#if HAVE_ARMV7
@@ -5092,7 +5106,7 @@
if(interval > 10000000.0)
interval = 10000000;
- avg_duration = 10000000.0 / cpi->oxcf.frame_rate;
+ avg_duration = 10000000.0 / cpi->frame_rate;
avg_duration *= (interval - avg_duration + this_duration);
avg_duration /= interval;
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index a0828a4..7382de4 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -314,8 +314,7 @@
MACROBLOCK mb;
VP8_COMMON common;
- vp8_writer bc, bc2;
- // bool_writer *bc2;
+ vp8_writer bc[9]; // one boolcoder for each partition
VP8_CONFIG oxcf;
@@ -418,6 +417,7 @@
int buffered_mode;
+ double frame_rate;
int64_t buffer_level;
int bits_off_target;
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index e8abf84..77e4553 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -296,7 +296,7 @@
void vp8_auto_select_speed(VP8_COMP *cpi)
{
- int milliseconds_for_compress = (int)(1000000 / cpi->oxcf.frame_rate);
+ int milliseconds_for_compress = (int)(1000000 / cpi->frame_rate);
milliseconds_for_compress = milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16;
@@ -552,7 +552,7 @@
int d;
ENCODEMB_INVOKE(rtcd, submby)( mb->src_diff, *(mb->block[0].base_src),
- mb->e_mbd.predictor, mb->block[0].src_stride );
+ mb->block[0].src_stride, mb->e_mbd.predictor, 16);
// Fdct and building the 2nd order block
for (beptr = mb->block; beptr < mb->block + 16; beptr += 2)
@@ -800,7 +800,8 @@
{
vp8_build_inter16x16_predictors_mbuv(&x->e_mbd);
ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), submbuv)(x->src_diff,
- x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
+ x->src.u_buffer, x->src.v_buffer, x->src.uv_stride,
+ &x->e_mbd.predictor[256], &x->e_mbd.predictor[320], 8);
vp8_transform_mbuv(x);
vp8_quantize_mbuv(x);
@@ -816,7 +817,8 @@
{
vp8_build_inter4x4_predictors_mbuv(&x->e_mbd);
ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), submbuv)(x->src_diff,
- x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
+ x->src.u_buffer, x->src.v_buffer, x->src.uv_stride,
+ &x->e_mbd.predictor[256], &x->e_mbd.predictor[320], 8);
vp8_transform_mbuv(x);
vp8_quantize_mbuv(x);
@@ -845,8 +847,8 @@
RECON_INVOKE(&cpi->rtcd.common->recon, build_intra_predictors_mbuv)
(&x->e_mbd);
ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), submbuv)(x->src_diff,
- x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor,
- x->src.uv_stride);
+ x->src.u_buffer, x->src.v_buffer, x->src.uv_stride,
+ &x->e_mbd.predictor[256], &x->e_mbd.predictor[320], 8);
vp8_transform_mbuv(x);
vp8_quantize_mbuv(x);
diff --git a/vp8/encoder/x86/subtract_mmx.asm b/vp8/encoder/x86/subtract_mmx.asm
index 4ce16ce..75e8aa3c 100644
--- a/vp8/encoder/x86/subtract_mmx.asm
+++ b/vp8/encoder/x86/subtract_mmx.asm
@@ -73,74 +73,71 @@
pop rbp
ret
-;void vp8_subtract_mby_mmx(short *diff, unsigned char *src, unsigned char *pred, int stride)
+;void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride,
+;unsigned char *pred, int pred_stride)
global sym(vp8_subtract_mby_mmx)
sym(vp8_subtract_mby_mmx):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
+ mov rdi, arg(0) ;diff
+ mov rsi, arg(1) ;src
+ movsxd rdx, dword ptr arg(2);src_stride
+ mov rax, arg(3) ;pred
+ push rbx
+ movsxd rbx, dword ptr arg(4);pred_stride
- mov rsi, arg(1) ;src
- mov rdi, arg(0) ;diff
+ pxor mm0, mm0
+ mov rcx, 16
- mov rax, arg(2) ;pred
- movsxd rdx, dword ptr arg(3) ;stride
-
- mov rcx, 16
- pxor mm0, mm0
.submby_loop:
+ movq mm1, [rsi]
+ movq mm3, [rax]
- movq mm1, [rsi]
- movq mm3, [rax]
+ movq mm2, mm1
+ movq mm4, mm3
- movq mm2, mm1
- movq mm4, mm3
+ punpcklbw mm1, mm0
+ punpcklbw mm3, mm0
- punpcklbw mm1, mm0
- punpcklbw mm3, mm0
+ punpckhbw mm2, mm0
+ punpckhbw mm4, mm0
- punpckhbw mm2, mm0
- punpckhbw mm4, mm0
+ psubw mm1, mm3
+ psubw mm2, mm4
- psubw mm1, mm3
- psubw mm2, mm4
+ movq [rdi], mm1
+ movq [rdi+8], mm2
- movq [rdi], mm1
- movq [rdi+8], mm2
+ movq mm1, [rsi+8]
+ movq mm3, [rax+8]
+ movq mm2, mm1
+ movq mm4, mm3
- movq mm1, [rsi+8]
- movq mm3, [rax+8]
+ punpcklbw mm1, mm0
+ punpcklbw mm3, mm0
- movq mm2, mm1
- movq mm4, mm3
+ punpckhbw mm2, mm0
+ punpckhbw mm4, mm0
- punpcklbw mm1, mm0
- punpcklbw mm3, mm0
+ psubw mm1, mm3
+ psubw mm2, mm4
- punpckhbw mm2, mm0
- punpckhbw mm4, mm0
+ movq [rdi+16], mm1
+ movq [rdi+24], mm2
+ add rdi, 32
+ lea rax, [rax+rbx]
+ lea rsi, [rsi+rdx]
+ dec rcx
+ jnz .submby_loop
- psubw mm1, mm3
- psubw mm2, mm4
-
- movq [rdi+16], mm1
- movq [rdi+24], mm2
-
-
- add rdi, 32
- add rax, 16
-
- lea rsi, [rsi+rdx]
-
- sub rcx, 1
- jnz .submby_loop
-
+ pop rbx
pop rdi
pop rsi
; begin epilog
@@ -149,281 +146,75 @@
ret
-;void vp8_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
+;vp8_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc,
+; int src_stride, unsigned char *upred,
+; unsigned char *vpred, int pred_stride)
+
global sym(vp8_subtract_mbuv_mmx)
sym(vp8_subtract_mbuv_mmx):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
+ SHADOW_ARGS_TO_STACK 7
push rsi
push rdi
; end prolog
- ;short *udiff = diff + 256;
- ;short *vdiff = diff + 320;
- ;unsigned char *upred = pred + 256;
- ;unsigned char *vpred = pred + 320;
+ mov rdi, arg(0) ;diff
+ mov rsi, arg(1) ;usrc
+ movsxd rdx, dword ptr arg(3);src_stride;
+ mov rax, arg(4) ;upred
+ add rdi, 256*2 ;diff = diff + 256 (shorts)
+ mov rcx, 8
+ push rbx
+ movsxd rbx, dword ptr arg(6);pred_stride
- ;unsigned char *z = usrc;
- ;unsigned short *diff = udiff;
- ;unsigned char *Predictor= upred;
+ pxor mm7, mm7
- mov rdi, arg(0) ;diff
- mov rax, arg(3) ;pred
- mov rsi, arg(1) ;z = usrc
- add rdi, 256*2 ;diff = diff + 256 (shorts)
- add rax, 256 ;Predictor = pred + 256
- movsxd rdx, dword ptr arg(4) ;stride;
- pxor mm7, mm7
+.submbu_loop:
+ movq mm0, [rsi]
+ movq mm1, [rax]
+ movq mm3, mm0
+ movq mm4, mm1
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ punpckhbw mm3, mm7
+ punpckhbw mm4, mm7
+ psubw mm0, mm1
+ psubw mm3, mm4
+ movq [rdi], mm0
+ movq [rdi+8], mm3
+ add rdi, 16
+ add rsi, rdx
+ add rax, rbx
- movq mm0, [rsi]
- movq mm1, [rax]
- movq mm3, mm0
- movq mm4, mm1
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- punpckhbw mm3, mm7
- punpckhbw mm4, mm7
- psubw mm0, mm1
- psubw mm3, mm4
- movq [rdi], mm0
- movq [rdi+8], mm3
+ dec rcx
+ jnz .submbu_loop
+ mov rsi, arg(2) ;vsrc
+ mov rax, arg(5) ;vpred
+ mov rcx, 8
- movq mm0, [rsi+rdx]
- movq mm1, [rax+8]
- movq mm3, mm0
- movq mm4, mm1
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- punpckhbw mm3, mm7
- punpckhbw mm4, mm7
- psubw mm0, mm1
- psubw mm3, mm4
- movq [rdi+16], mm0
- movq [rdi+24], mm3
+.submbv_loop:
+ movq mm0, [rsi]
+ movq mm1, [rax]
+ movq mm3, mm0
+ movq mm4, mm1
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ punpckhbw mm3, mm7
+ punpckhbw mm4, mm7
+ psubw mm0, mm1
+ psubw mm3, mm4
+ movq [rdi], mm0
+ movq [rdi+8], mm3
+ add rdi, 16
+ add rsi, rdx
+ add rax, rbx
- movq mm0, [rsi+rdx*2]
- movq mm1, [rax+16]
- movq mm3, mm0
- movq mm4, mm1
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- punpckhbw mm3, mm7
- punpckhbw mm4, mm7
- psubw mm0, mm1
- psubw mm3, mm4
- movq [rdi+32], mm0
- movq [rdi+40], mm3
- lea rsi, [rsi+rdx*2]
+ dec rcx
+ jnz .submbv_loop
-
- movq mm0, [rsi+rdx]
- movq mm1, [rax+24]
- movq mm3, mm0
- movq mm4, mm1
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- punpckhbw mm3, mm7
- punpckhbw mm4, mm7
- psubw mm0, mm1
- psubw mm3, mm4
-
- movq [rdi+48], mm0
- movq [rdi+56], mm3
-
-
- add rdi, 64
- add rax, 32
- lea rsi, [rsi+rdx*2]
-
-
- movq mm0, [rsi]
- movq mm1, [rax]
- movq mm3, mm0
- movq mm4, mm1
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- punpckhbw mm3, mm7
- punpckhbw mm4, mm7
- psubw mm0, mm1
- psubw mm3, mm4
- movq [rdi], mm0
- movq [rdi+8], mm3
-
-
- movq mm0, [rsi+rdx]
- movq mm1, [rax+8]
- movq mm3, mm0
- movq mm4, mm1
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- punpckhbw mm3, mm7
- punpckhbw mm4, mm7
- psubw mm0, mm1
- psubw mm3, mm4
- movq [rdi+16], mm0
- movq [rdi+24], mm3
-
- movq mm0, [rsi+rdx*2]
- movq mm1, [rax+16]
- movq mm3, mm0
- movq mm4, mm1
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- punpckhbw mm3, mm7
- punpckhbw mm4, mm7
- psubw mm0, mm1
- psubw mm3, mm4
- movq [rdi+32], mm0
- movq [rdi+40], mm3
- lea rsi, [rsi+rdx*2]
-
-
- movq mm0, [rsi+rdx]
- movq mm1, [rax+24]
- movq mm3, mm0
- movq mm4, mm1
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- punpckhbw mm3, mm7
- punpckhbw mm4, mm7
- psubw mm0, mm1
- psubw mm3, mm4
-
- movq [rdi+48], mm0
- movq [rdi+56], mm3
-
- ;unsigned char *z = vsrc;
- ;unsigned short *diff = vdiff;
- ;unsigned char *Predictor= vpred;
-
- mov rdi, arg(0) ;diff
- mov rax, arg(3) ;pred
- mov rsi, arg(2) ;z = usrc
- add rdi, 320*2 ;diff = diff + 320 (shorts)
- add rax, 320 ;Predictor = pred + 320
- movsxd rdx, dword ptr arg(4) ;stride;
- pxor mm7, mm7
-
- movq mm0, [rsi]
- movq mm1, [rax]
- movq mm3, mm0
- movq mm4, mm1
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- punpckhbw mm3, mm7
- punpckhbw mm4, mm7
- psubw mm0, mm1
- psubw mm3, mm4
- movq [rdi], mm0
- movq [rdi+8], mm3
-
-
- movq mm0, [rsi+rdx]
- movq mm1, [rax+8]
- movq mm3, mm0
- movq mm4, mm1
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- punpckhbw mm3, mm7
- punpckhbw mm4, mm7
- psubw mm0, mm1
- psubw mm3, mm4
- movq [rdi+16], mm0
- movq [rdi+24], mm3
-
- movq mm0, [rsi+rdx*2]
- movq mm1, [rax+16]
- movq mm3, mm0
- movq mm4, mm1
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- punpckhbw mm3, mm7
- punpckhbw mm4, mm7
- psubw mm0, mm1
- psubw mm3, mm4
- movq [rdi+32], mm0
- movq [rdi+40], mm3
- lea rsi, [rsi+rdx*2]
-
-
- movq mm0, [rsi+rdx]
- movq mm1, [rax+24]
- movq mm3, mm0
- movq mm4, mm1
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- punpckhbw mm3, mm7
- punpckhbw mm4, mm7
- psubw mm0, mm1
- psubw mm3, mm4
-
- movq [rdi+48], mm0
- movq [rdi+56], mm3
-
-
- add rdi, 64
- add rax, 32
- lea rsi, [rsi+rdx*2]
-
-
- movq mm0, [rsi]
- movq mm1, [rax]
- movq mm3, mm0
- movq mm4, mm1
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- punpckhbw mm3, mm7
- punpckhbw mm4, mm7
- psubw mm0, mm1
- psubw mm3, mm4
- movq [rdi], mm0
- movq [rdi+8], mm3
-
-
- movq mm0, [rsi+rdx]
- movq mm1, [rax+8]
- movq mm3, mm0
- movq mm4, mm1
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- punpckhbw mm3, mm7
- punpckhbw mm4, mm7
- psubw mm0, mm1
- psubw mm3, mm4
- movq [rdi+16], mm0
- movq [rdi+24], mm3
-
- movq mm0, [rsi+rdx*2]
- movq mm1, [rax+16]
- movq mm3, mm0
- movq mm4, mm1
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- punpckhbw mm3, mm7
- punpckhbw mm4, mm7
- psubw mm0, mm1
- psubw mm3, mm4
- movq [rdi+32], mm0
- movq [rdi+40], mm3
- lea rsi, [rsi+rdx*2]
-
-
- movq mm0, [rsi+rdx]
- movq mm1, [rax+24]
- movq mm3, mm0
- movq mm4, mm1
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- punpckhbw mm3, mm7
- punpckhbw mm4, mm7
- psubw mm0, mm1
- psubw mm3, mm4
-
- movq [rdi+48], mm0
- movq [rdi+56], mm3
-
+ pop rbx
; begin epilog
pop rdi
pop rsi
diff --git a/vp8/encoder/x86/subtract_sse2.asm b/vp8/encoder/x86/subtract_sse2.asm
index 3bd1ff6..008e9c7 100644
--- a/vp8/encoder/x86/subtract_sse2.asm
+++ b/vp8/encoder/x86/subtract_sse2.asm
@@ -71,277 +71,166 @@
ret
-;void vp8_subtract_mby_sse2(short *diff, unsigned char *src, unsigned char *pred, int stride)
+;void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride,
+;unsigned char *pred, int pred_stride)
global sym(vp8_subtract_mby_sse2)
sym(vp8_subtract_mby_sse2):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(1) ;src
- mov rdi, arg(0) ;diff
-
- mov rax, arg(2) ;pred
- movsxd rdx, dword ptr arg(3) ;stride
-
- mov rcx, 8 ; do two lines at one time
-
-.submby_loop:
- movdqa xmm0, XMMWORD PTR [rsi] ; src
- movdqa xmm1, XMMWORD PTR [rax] ; pred
-
- movdqa xmm2, xmm0
- psubb xmm0, xmm1
-
- pxor xmm1, [GLOBAL(t80)] ;convert to signed values
- pxor xmm2, [GLOBAL(t80)]
- pcmpgtb xmm1, xmm2 ; obtain sign information
-
- movdqa xmm2, xmm0
- movdqa xmm3, xmm1
- punpcklbw xmm0, xmm1 ; put sign back to subtraction
- punpckhbw xmm2, xmm3 ; put sign back to subtraction
-
- movdqa XMMWORD PTR [rdi], xmm0
- movdqa XMMWORD PTR [rdi +16], xmm2
-
- movdqa xmm4, XMMWORD PTR [rsi + rdx]
- movdqa xmm5, XMMWORD PTR [rax + 16]
-
- movdqa xmm6, xmm4
- psubb xmm4, xmm5
-
- pxor xmm5, [GLOBAL(t80)] ;convert to signed values
- pxor xmm6, [GLOBAL(t80)]
- pcmpgtb xmm5, xmm6 ; obtain sign information
-
- movdqa xmm6, xmm4
- movdqa xmm7, xmm5
- punpcklbw xmm4, xmm5 ; put sign back to subtraction
- punpckhbw xmm6, xmm7 ; put sign back to subtraction
-
- movdqa XMMWORD PTR [rdi +32], xmm4
- movdqa XMMWORD PTR [rdi +48], xmm6
-
- add rdi, 64
- add rax, 32
- lea rsi, [rsi+rdx*2]
-
- sub rcx, 1
- jnz .submby_loop
-
- pop rdi
- pop rsi
- ; begin epilog
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void vp8_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
-global sym(vp8_subtract_mbuv_sse2)
-sym(vp8_subtract_mbuv_sse2):
- push rbp
- mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
GET_GOT rbx
push rsi
push rdi
; end prolog
- mov rdi, arg(0) ;diff
- mov rax, arg(3) ;pred
- mov rsi, arg(1) ;z = usrc
- add rdi, 256*2 ;diff = diff + 256 (shorts)
- add rax, 256 ;Predictor = pred + 256
- movsxd rdx, dword ptr arg(4) ;stride;
- lea rcx, [rdx + rdx*2]
+ mov rdi, arg(0) ;diff
+ mov rsi, arg(1) ;src
+ movsxd rdx, dword ptr arg(2);src_stride
+ mov rax, arg(3) ;pred
+ movdqa xmm4, [GLOBAL(t80)]
+ push rbx
+ mov rcx, 8 ; do two lines at one time
+ movsxd rbx, dword ptr arg(4);pred_stride
- ;u
- ;line 0 1
- movq xmm0, MMWORD PTR [rsi] ; src
- movq xmm2, MMWORD PTR [rsi+rdx]
- movdqa xmm1, XMMWORD PTR [rax] ; pred
- punpcklqdq xmm0, xmm2
+.submby_loop:
+ movdqa xmm0, [rsi] ; src
+ movdqa xmm1, [rax] ; pred
- movdqa xmm2, xmm0
- psubb xmm0, xmm1 ; subtraction with sign missed
+ movdqa xmm2, xmm0
+ psubb xmm0, xmm1
- pxor xmm1, [GLOBAL(t80)] ;convert to signed values
- pxor xmm2, [GLOBAL(t80)]
- pcmpgtb xmm1, xmm2 ; obtain sign information
+ pxor xmm1, xmm4 ;convert to signed values
+ pxor xmm2, xmm4
+ pcmpgtb xmm1, xmm2 ; obtain sign information
- movdqa xmm2, xmm0
- movdqa xmm3, xmm1
- punpcklbw xmm0, xmm1 ; put sign back to subtraction
- punpckhbw xmm2, xmm3 ; put sign back to subtraction
+ movdqa xmm2, xmm0
+ punpcklbw xmm0, xmm1 ; put sign back to subtraction
+ punpckhbw xmm2, xmm1 ; put sign back to subtraction
- movdqa XMMWORD PTR [rdi], xmm0
- movdqa XMMWORD PTR [rdi +16], xmm2
+ movdqa xmm3, [rsi + rdx]
+ movdqa xmm5, [rax + rbx]
- ;line 2 3
- movq xmm0, MMWORD PTR [rsi+rdx*2] ; src
- movq xmm2, MMWORD PTR [rsi+rcx]
- movdqa xmm1, XMMWORD PTR [rax+16] ; pred
- punpcklqdq xmm0, xmm2
+ lea rsi, [rsi+rdx*2]
+ lea rax, [rax+rbx*2]
- movdqa xmm2, xmm0
- psubb xmm0, xmm1 ; subtraction with sign missed
+ movdqa [rdi], xmm0
+ movdqa [rdi +16], xmm2
- pxor xmm1, [GLOBAL(t80)] ;convert to signed values
- pxor xmm2, [GLOBAL(t80)]
- pcmpgtb xmm1, xmm2 ; obtain sign information
+ movdqa xmm1, xmm3
+ psubb xmm3, xmm5
- movdqa xmm2, xmm0
- movdqa xmm3, xmm1
- punpcklbw xmm0, xmm1 ; put sign back to subtraction
- punpckhbw xmm2, xmm3 ; put sign back to subtraction
+ pxor xmm5, xmm4 ;convert to signed values
+ pxor xmm1, xmm4
+ pcmpgtb xmm5, xmm1 ; obtain sign information
- movdqa XMMWORD PTR [rdi + 32], xmm0
- movdqa XMMWORD PTR [rdi + 48], xmm2
+ movdqa xmm1, xmm3
+ punpcklbw xmm3, xmm5 ; put sign back to subtraction
+ punpckhbw xmm1, xmm5 ; put sign back to subtraction
- ;line 4 5
- lea rsi, [rsi + rdx*4]
+ movdqa [rdi +32], xmm3
+ movdqa [rdi +48], xmm1
- movq xmm0, MMWORD PTR [rsi] ; src
- movq xmm2, MMWORD PTR [rsi+rdx]
- movdqa xmm1, XMMWORD PTR [rax + 32] ; pred
- punpcklqdq xmm0, xmm2
+ add rdi, 64
+ dec rcx
+ jnz .submby_loop
- movdqa xmm2, xmm0
- psubb xmm0, xmm1 ; subtraction with sign missed
+ pop rbx
+ pop rdi
+ pop rsi
+ ; begin epilog
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
- pxor xmm1, [GLOBAL(t80)] ;convert to signed values
- pxor xmm2, [GLOBAL(t80)]
- pcmpgtb xmm1, xmm2 ; obtain sign information
+;vp8_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc,
+; int src_stride, unsigned char *upred,
+; unsigned char *vpred, int pred_stride)
+global sym(vp8_subtract_mbuv_sse2)
+sym(vp8_subtract_mbuv_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
- movdqa xmm2, xmm0
- movdqa xmm3, xmm1
- punpcklbw xmm0, xmm1 ; put sign back to subtraction
- punpckhbw xmm2, xmm3 ; put sign back to subtraction
+ movdqa xmm4, [GLOBAL(t80)]
+ mov rdi, arg(0) ;diff
+ mov rsi, arg(1) ;usrc
+ movsxd rdx, dword ptr arg(3);src_stride;
+ mov rax, arg(4) ;upred
+ add rdi, 256*2 ;diff = diff + 256 (shorts)
+ mov rcx, 4
+ push rbx
+ movsxd rbx, dword ptr arg(6);pred_stride
- movdqa XMMWORD PTR [rdi + 64], xmm0
- movdqa XMMWORD PTR [rdi + 80], xmm2
+ ;u
+.submbu_loop:
+ movq xmm0, [rsi] ; src
+ movq xmm2, [rsi+rdx] ; src -- next line
+ movq xmm1, [rax] ; pred
+ movq xmm3, [rax+rbx] ; pred -- next line
+ lea rsi, [rsi + rdx*2]
+ lea rax, [rax + rbx*2]
- ;line 6 7
- movq xmm0, MMWORD PTR [rsi+rdx*2] ; src
- movq xmm2, MMWORD PTR [rsi+rcx]
- movdqa xmm1, XMMWORD PTR [rax+ 48] ; pred
- punpcklqdq xmm0, xmm2
+ punpcklqdq xmm0, xmm2
+ punpcklqdq xmm1, xmm3
- movdqa xmm2, xmm0
- psubb xmm0, xmm1 ; subtraction with sign missed
+ movdqa xmm2, xmm0
+ psubb xmm0, xmm1 ; subtraction with sign missed
- pxor xmm1, [GLOBAL(t80)] ;convert to signed values
- pxor xmm2, [GLOBAL(t80)]
- pcmpgtb xmm1, xmm2 ; obtain sign information
+ pxor xmm1, xmm4 ;convert to signed values
+ pxor xmm2, xmm4
+ pcmpgtb xmm1, xmm2 ; obtain sign information
- movdqa xmm2, xmm0
- movdqa xmm3, xmm1
- punpcklbw xmm0, xmm1 ; put sign back to subtraction
- punpckhbw xmm2, xmm3 ; put sign back to subtraction
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+ punpcklbw xmm0, xmm1 ; put sign back to subtraction
+ punpckhbw xmm2, xmm3 ; put sign back to subtraction
- movdqa XMMWORD PTR [rdi + 96], xmm0
- movdqa XMMWORD PTR [rdi + 112], xmm2
+ movdqa [rdi], xmm0 ; store difference
+ movdqa [rdi +16], xmm2 ; store difference
+ add rdi, 32
+ sub rcx, 1
+ jnz .submbu_loop
- ;v
- mov rsi, arg(2) ;z = vsrc
- add rdi, 64*2 ;diff = diff + 320 (shorts)
- add rax, 64 ;Predictor = pred + 320
+ mov rsi, arg(2) ;vsrc
+ mov rax, arg(5) ;vpred
+ mov rcx, 4
- ;line 0 1
- movq xmm0, MMWORD PTR [rsi] ; src
- movq xmm2, MMWORD PTR [rsi+rdx]
- movdqa xmm1, XMMWORD PTR [rax] ; pred
- punpcklqdq xmm0, xmm2
+ ;v
+.submbv_loop:
+ movq xmm0, [rsi] ; src
+ movq xmm2, [rsi+rdx] ; src -- next line
+ movq xmm1, [rax] ; pred
+ movq xmm3, [rax+rbx] ; pred -- next line
+ lea rsi, [rsi + rdx*2]
+ lea rax, [rax + rbx*2]
- movdqa xmm2, xmm0
- psubb xmm0, xmm1 ; subtraction with sign missed
+ punpcklqdq xmm0, xmm2
+ punpcklqdq xmm1, xmm3
- pxor xmm1, [GLOBAL(t80)] ;convert to signed values
- pxor xmm2, [GLOBAL(t80)]
- pcmpgtb xmm1, xmm2 ; obtain sign information
+ movdqa xmm2, xmm0
+ psubb xmm0, xmm1 ; subtraction with sign missed
- movdqa xmm2, xmm0
- movdqa xmm3, xmm1
- punpcklbw xmm0, xmm1 ; put sign back to subtraction
- punpckhbw xmm2, xmm3 ; put sign back to subtraction
+ pxor xmm1, xmm4 ;convert to signed values
+ pxor xmm2, xmm4
+ pcmpgtb xmm1, xmm2 ; obtain sign information
- movdqa XMMWORD PTR [rdi], xmm0
- movdqa XMMWORD PTR [rdi +16], xmm2
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+ punpcklbw xmm0, xmm1 ; put sign back to subtraction
+ punpckhbw xmm2, xmm3 ; put sign back to subtraction
- ;line 2 3
- movq xmm0, MMWORD PTR [rsi+rdx*2] ; src
- movq xmm2, MMWORD PTR [rsi+rcx]
- movdqa xmm1, XMMWORD PTR [rax+16] ; pred
- punpcklqdq xmm0, xmm2
+ movdqa [rdi], xmm0 ; store difference
+ movdqa [rdi +16], xmm2 ; store difference
+ add rdi, 32
+ sub rcx, 1
+ jnz .submbv_loop
- movdqa xmm2, xmm0
- psubb xmm0, xmm1 ; subtraction with sign missed
-
- pxor xmm1, [GLOBAL(t80)] ;convert to signed values
- pxor xmm2, [GLOBAL(t80)]
- pcmpgtb xmm1, xmm2 ; obtain sign information
-
- movdqa xmm2, xmm0
- movdqa xmm3, xmm1
- punpcklbw xmm0, xmm1 ; put sign back to subtraction
- punpckhbw xmm2, xmm3 ; put sign back to subtraction
-
- movdqa XMMWORD PTR [rdi + 32], xmm0
- movdqa XMMWORD PTR [rdi + 48], xmm2
-
- ;line 4 5
- lea rsi, [rsi + rdx*4]
-
- movq xmm0, MMWORD PTR [rsi] ; src
- movq xmm2, MMWORD PTR [rsi+rdx]
- movdqa xmm1, XMMWORD PTR [rax + 32] ; pred
- punpcklqdq xmm0, xmm2
-
- movdqa xmm2, xmm0
- psubb xmm0, xmm1 ; subtraction with sign missed
-
- pxor xmm1, [GLOBAL(t80)] ;convert to signed values
- pxor xmm2, [GLOBAL(t80)]
- pcmpgtb xmm1, xmm2 ; obtain sign information
-
- movdqa xmm2, xmm0
- movdqa xmm3, xmm1
- punpcklbw xmm0, xmm1 ; put sign back to subtraction
- punpckhbw xmm2, xmm3 ; put sign back to subtraction
-
- movdqa XMMWORD PTR [rdi + 64], xmm0
- movdqa XMMWORD PTR [rdi + 80], xmm2
-
- ;line 6 7
- movq xmm0, MMWORD PTR [rsi+rdx*2] ; src
- movq xmm2, MMWORD PTR [rsi+rcx]
- movdqa xmm1, XMMWORD PTR [rax+ 48] ; pred
- punpcklqdq xmm0, xmm2
-
- movdqa xmm2, xmm0
- psubb xmm0, xmm1 ; subtraction with sign missed
-
- pxor xmm1, [GLOBAL(t80)] ;convert to signed values
- pxor xmm2, [GLOBAL(t80)]
- pcmpgtb xmm1, xmm2 ; obtain sign information
-
- movdqa xmm2, xmm0
- movdqa xmm3, xmm1
- punpcklbw xmm0, xmm1 ; put sign back to subtraction
- punpckhbw xmm2, xmm3 ; put sign back to subtraction
-
- movdqa XMMWORD PTR [rdi + 96], xmm0
- movdqa XMMWORD PTR [rdi + 112], xmm2
-
+ pop rbx
; begin epilog
pop rdi
pop rsi
diff --git a/vp8/encoder/x86/variance_mmx.c b/vp8/encoder/x86/variance_mmx.c
index 92b695f..e2524b4 100644
--- a/vp8/encoder/x86/variance_mmx.c
+++ b/vp8/encoder/x86/variance_mmx.c
@@ -12,6 +12,7 @@
#include "vp8/encoder/variance.h"
#include "vp8/common/pragmas.h"
#include "vpx_ports/mem.h"
+#include "vp8/common/x86/filter_x86.h"
extern void filter_block1d_h6_mmx
(
@@ -21,7 +22,7 @@
unsigned int pixel_step,
unsigned int output_height,
unsigned int output_width,
- short *vp7_filter
+ short *filter
);
extern void filter_block1d_v6_mmx
(
@@ -31,7 +32,7 @@
unsigned int pixel_step,
unsigned int output_height,
unsigned int output_width,
- short *vp7_filter
+ short *filter
);
extern unsigned int vp8_get_mb_ss_mmx(const short *src_ptr);
@@ -198,24 +199,6 @@
}
-
-
-///////////////////////////////////////////////////////////////////////////
-// the mmx function that does the bilinear filtering and var calculation //
-// int one pass //
-///////////////////////////////////////////////////////////////////////////
-DECLARE_ALIGNED(16, const short, vp8_vp7_bilinear_filters_mmx[8][8]) =
-{
- { 128, 128, 128, 128, 0, 0, 0, 0 },
- { 112, 112, 112, 112, 16, 16, 16, 16 },
- { 96, 96, 96, 96, 32, 32, 32, 32 },
- { 80, 80, 80, 80, 48, 48, 48, 48 },
- { 64, 64, 64, 64, 64, 64, 64, 64 },
- { 48, 48, 48, 48, 80, 80, 80, 80 },
- { 32, 32, 32, 32, 96, 96, 96, 96 },
- { 16, 16, 16, 16, 112, 112, 112, 112 }
-};
-
unsigned int vp8_sub_pixel_variance4x4_mmx
(
const unsigned char *src_ptr,
@@ -232,7 +215,7 @@
vp8_filter_block2d_bil4x4_var_mmx(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line,
- vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
+ vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
&xsum, &xxsum
);
*sse = xxsum;
@@ -257,7 +240,7 @@
vp8_filter_block2d_bil_var_mmx(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 8,
- vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
+ vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
&xsum, &xxsum
);
*sse = xxsum;
@@ -283,7 +266,7 @@
vp8_filter_block2d_bil_var_mmx(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 16,
- vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
+ vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
&xsum0, &xxsum0
);
@@ -291,7 +274,7 @@
vp8_filter_block2d_bil_var_mmx(
src_ptr + 8, src_pixels_per_line,
dst_ptr + 8, dst_pixels_per_line, 16,
- vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
+ vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
&xsum1, &xxsum1
);
@@ -336,7 +319,7 @@
vp8_filter_block2d_bil_var_mmx(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 8,
- vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
+ vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
&xsum0, &xxsum0
);
@@ -344,7 +327,7 @@
vp8_filter_block2d_bil_var_mmx(
src_ptr + 8, src_pixels_per_line,
dst_ptr + 8, dst_pixels_per_line, 8,
- vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
+ vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
&xsum1, &xxsum1
);
@@ -371,7 +354,7 @@
vp8_filter_block2d_bil_var_mmx(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 16,
- vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
+ vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
&xsum, &xxsum
);
*sse = xxsum;
diff --git a/vp8/encoder/x86/variance_sse2.c b/vp8/encoder/x86/variance_sse2.c
index 24062eb..39213b0 100644
--- a/vp8/encoder/x86/variance_sse2.c
+++ b/vp8/encoder/x86/variance_sse2.c
@@ -12,11 +12,12 @@
#include "vp8/encoder/variance.h"
#include "vp8/common/pragmas.h"
#include "vpx_ports/mem.h"
+#include "vp8/common/x86/filter_x86.h"
-extern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
-extern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
-extern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
-extern void filter_block1d8_v6_sse2(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
+extern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
+extern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
+extern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
+extern void filter_block1d8_v6_sse2(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
extern void vp8_filter_block2d_bil4x4_var_mmx
(
@@ -135,8 +136,6 @@
unsigned int *sumsquared
);
-DECLARE_ALIGNED(16, extern short, vp8_vp7_bilinear_filters_mmx[8][8]);
-
unsigned int vp8_variance4x4_wmt(
const unsigned char *src_ptr,
int source_stride,
@@ -262,7 +261,7 @@
vp8_filter_block2d_bil4x4_var_mmx(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line,
- vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
+ vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
&xsum, &xxsum
);
*sse = xxsum;
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index 5c15a3e..683af34 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -72,6 +72,8 @@
VP8_COMMON_SRCS-$(CONFIG_POSTPROC_VISUALIZER) += common/textblit.c
VP8_COMMON_SRCS-yes += common/treecoder.c
+VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/filter_x86.c
+VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/filter_x86.h
VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/idct_x86.h
VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/subpixel_x86.h
VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/recon_x86.h
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 7260e94..4f21e14 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -271,14 +271,7 @@
oxcf->Width = cfg.g_w;
oxcf->Height = cfg.g_h;
- /* guess a frame rate if out of whack, use 30 */
- oxcf->frame_rate = (double)(cfg.g_timebase.den) /
- (double)(cfg.g_timebase.num);
-
- if (oxcf->frame_rate > 180)
- {
- oxcf->frame_rate = 30;
- }
+ oxcf->timebase = cfg.g_timebase;
oxcf->error_resilient_mode = cfg.g_error_resilient;